diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 8f7fb911b2cc..e0f51ebdb225 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -374,6 +374,11 @@ selects a performance level in this range and appropriate to the current workload. + amd_prefcore= + [X86] + disable + Disable amd-pstate preferred core. + amijoy.map= [HW,JOY] Amiga joystick support Map of devices attached to JOY0DAT and JOY1DAT Format: , diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 9eb26014d34b..1e0d101b020a 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -300,8 +300,8 @@ platforms. The AMD P-States mechanism is the more performance and energy efficiency frequency management method on AMD processors. -AMD Pstate Driver Operation Modes -================================= +``amd-pstate`` Driver Operation Modes +====================================== ``amd_pstate`` CPPC has 3 operation modes: autonomous (active) mode, non-autonomous (passive) mode and guided autonomous (guided) mode. @@ -353,6 +353,48 @@ is activated. In this mode, driver requests minimum and maximum performance level and the platform autonomously selects a performance level in this range and appropriate to the current workload. +``amd-pstate`` Preferred Core +================================= + +The core frequency is subjected to the process variation in semiconductors. +Not all cores are able to reach the maximum frequency respecting the +infrastructure limits. Consequently, AMD has redefined the concept of +maximum frequency of a part. This means that a fraction of cores can reach +maximum frequency. To find the best process scheduling policy for a given +scenario, OS needs to know the core ordering informed by the platform through +highest performance capability register of the CPPC interface. + +``amd-pstate`` preferred core enables the scheduler to prefer scheduling on +cores that can achieve a higher frequency with lower voltage. The preferred +core rankings can dynamically change based on the workload, platform conditions, +thermals and ageing. + +The priority metric will be initialized by the ``amd-pstate`` driver. The ``amd-pstate`` +driver will also determine whether or not ``amd-pstate`` preferred core is +supported by the platform. + +``amd-pstate`` driver will provide an initial core ordering when the system boots. +The platform uses the CPPC interfaces to communicate the core ranking to the +operating system and scheduler to make sure that OS is choosing the cores +with highest performance firstly for scheduling the process. When ``amd-pstate`` +driver receives a message with the highest performance change, it will +update the core ranking and set the cpu's priority. + +``amd-pstate`` Preferred Core Switch +===================================== +Kernel Parameters +----------------- + +``amd-pstate`` peferred core`` has two states: enable and disable. +Enable/disable states can be chosen by different kernel parameters. +Default enable ``amd-pstate`` preferred core. + +``amd_prefcore=disable`` + +For systems that support ``amd-pstate`` preferred core, the core rankings will +always be advertised by the platform. But OS can choose to ignore that via the +kernel parameter ``amd_prefcore=disable``. + User Space Interface in ``sysfs`` - General =========================================== @@ -385,6 +427,19 @@ control its functionality at the system level. They are located in the to the operation mode represented by that string - or to be unregistered in the "disable" case. +``prefcore`` + Preferred core state of the driver: "enabled" or "disabled". + + "enabled" + Enable the ``amd-pstate`` preferred core. + + "disabled" + Disable the ``amd-pstate`` preferred core + + + This attribute is read-only to check the state of preferred core set + by the kernel parameter. + ``cpupower`` tool support for ``amd-pstate`` =============================================== diff --git a/Documentation/power/opp.rst b/Documentation/power/opp.rst index a7c03c470980..1b7f1d854f14 100644 --- a/Documentation/power/opp.rst +++ b/Documentation/power/opp.rst @@ -305,7 +305,7 @@ dev_pm_opp_get_opp_count { /* Do things */ num_available = dev_pm_opp_get_opp_count(dev); - speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL); + speeds = kcalloc(num_available, sizeof(u32), GFP_KERNEL); /* populate the table in increasing order */ freq = 0; while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) { diff --git a/Documentation/translations/zh_CN/power/opp.rst b/Documentation/translations/zh_CN/power/opp.rst index 8d6e3f6f6202..7470fa2d4c43 100644 --- a/Documentation/translations/zh_CN/power/opp.rst +++ b/Documentation/translations/zh_CN/power/opp.rst @@ -274,7 +274,7 @@ dev_pm_opp_get_opp_count { /* 做一些事情 */ num_available = dev_pm_opp_get_opp_count(dev); - speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL); + speeds = kcalloc(num_available, sizeof(u32), GFP_KERNEL); /* 按升序填充表 */ freq = 0; while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5edec175b9bf..29d110285438 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1054,8 +1054,9 @@ config SCHED_MC config SCHED_MC_PRIO bool "CPU core priorities scheduler support" - depends on SCHED_MC && CPU_SUP_INTEL - select X86_INTEL_PSTATE + depends on SCHED_MC + select X86_INTEL_PSTATE if CPU_SUP_INTEL + select X86_AMD_PSTATE if CPU_SUP_AMD && ACPI select CPU_FREQ default y help diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index d155a86a8614..a50e70abdf19 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1157,6 +1157,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf); } +/** + * cppc_get_highest_perf - Get the highest performance register value. + * @cpunum: CPU from which to get highest performance. + * @highest_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_get_highest_perf(int cpunum, u64 *highest_perf) +{ + return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf); +} +EXPORT_SYMBOL_GPL(cppc_get_highest_perf); + /** * cppc_get_epp_perf - Get the epp register value. * @cpunum: CPU from which to get epp preference value. diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 4bd16b3f0781..67db60eda370 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -27,6 +27,7 @@ #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80 #define ACPI_PROCESSOR_NOTIFY_POWER 0x81 #define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82 +#define ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED 0x85 MODULE_AUTHOR("Paul Diefenbaugh"); MODULE_DESCRIPTION("ACPI Processor Driver"); @@ -83,6 +84,11 @@ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data) acpi_bus_generate_netlink_event(device->pnp.device_class, dev_name(&device->dev), event, 0); break; + case ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED: + cpufreq_update_limits(pr->id); + acpi_bus_generate_netlink_event(device->pnp.device_class, + dev_name(&device->dev), event, 0); + break; default: acpi_handle_debug(handle, "Unsupported event [0x%x]\n", event); break; diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index f911606897b8..a0ebad77666e 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -173,6 +173,7 @@ config ARM_QCOM_CPUFREQ_NVMEM config ARM_QCOM_CPUFREQ_HW tristate "QCOM CPUFreq HW driver" depends on ARCH_QCOM || COMPILE_TEST + depends on COMMON_CLK help Support for the CPUFreq HW driver. Some QCOM chipsets have a HW engine to offload the steps diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 1791d37fbc53..2015c9fcc3c9 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -49,6 +50,7 @@ #define AMD_PSTATE_TRANSITION_LATENCY 20000 #define AMD_PSTATE_TRANSITION_DELAY 1000 +#define AMD_PSTATE_PREFCORE_THRESHOLD 166 /* * TODO: We need more time to fine tune processors with shared memory solution @@ -64,6 +66,7 @@ static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_UNDEFINED; static bool cppc_enabled; +static bool amd_pstate_prefcore = true; /* * AMD Energy Preference Performance (EPP) @@ -297,13 +300,14 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; - /* - * TODO: Introduce AMD specific power feature. - * - * CPPC entry doesn't indicate the highest performance in some ASICs. + /* For platforms that do not support the preferred core feature, the + * highest_pef may be configured with 166 or 255, to avoid max frequency + * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as + * the default max perf. */ - highest_perf = amd_get_highest_perf(); - if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1)) + if (cpudata->hw_prefcore) + highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD; + else highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); WRITE_ONCE(cpudata->highest_perf, highest_perf); @@ -311,6 +315,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); + WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); return 0; } @@ -324,8 +329,9 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; - highest_perf = amd_get_highest_perf(); - if (highest_perf > cppc_perf.highest_perf) + if (cpudata->hw_prefcore) + highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD; + else highest_perf = cppc_perf.highest_perf; WRITE_ONCE(cpudata->highest_perf, highest_perf); @@ -334,6 +340,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) WRITE_ONCE(cpudata->lowest_nonlinear_perf, cppc_perf.lowest_nonlinear_perf); WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); + WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); if (cppc_state == AMD_PSTATE_ACTIVE) @@ -477,12 +484,19 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy) static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u32 max_limit_perf, min_limit_perf; + u32 max_limit_perf, min_limit_perf, lowest_perf; struct amd_cpudata *cpudata = policy->driver_data; max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + lowest_perf = READ_ONCE(cpudata->lowest_perf); + if (min_limit_perf < lowest_perf) + min_limit_perf = lowest_perf; + + if (max_limit_perf < min_limit_perf) + max_limit_perf = min_limit_perf; + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); WRITE_ONCE(cpudata->max_limit_freq, policy->max); @@ -570,7 +584,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (target_perf < capacity) des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); - min_perf = READ_ONCE(cpudata->highest_perf); + min_perf = READ_ONCE(cpudata->lowest_perf); if (_min_perf < capacity) min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); @@ -706,6 +720,114 @@ static void amd_perf_ctl_reset(unsigned int cpu) wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); } +/* + * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks + * due to locking, so queue the work for later. + */ +static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) +{ + sched_set_itmt_support(); +} +static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); + +/* + * Get the highest performance register value. + * @cpu: CPU from which to get highest performance. + * @highest_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf) +{ + int ret; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + u64 cap1; + + ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); + if (ret) + return ret; + WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); + } else { + u64 cppc_highest_perf; + + ret = cppc_get_highest_perf(cpu, &cppc_highest_perf); + if (ret) + return ret; + WRITE_ONCE(*highest_perf, cppc_highest_perf); + } + + return (ret); +} + +#define CPPC_MAX_PERF U8_MAX + +static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) +{ + int ret, prio; + u32 highest_perf; + + ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf); + if (ret) + return; + + cpudata->hw_prefcore = true; + /* check if CPPC preferred core feature is enabled*/ + if (highest_perf < CPPC_MAX_PERF) + prio = (int)highest_perf; + else { + pr_debug("AMD CPPC preferred core is unsupported!\n"); + cpudata->hw_prefcore = false; + return; + } + + if (!amd_pstate_prefcore) + return; + + /* + * The priorities can be set regardless of whether or not + * sched_set_itmt_support(true) has been called and it is valid to + * update them at any time after it has been called. + */ + sched_set_itmt_core_prio(prio, cpudata->cpu); + + schedule_work(&sched_prefcore_work); +} + +static void amd_pstate_update_limits(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct amd_cpudata *cpudata = policy->driver_data; + u32 prev_high = 0, cur_high = 0; + int ret; + bool highest_perf_changed = false; + + mutex_lock(&amd_pstate_driver_lock); + if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore)) + goto free_cpufreq_put; + + ret = amd_pstate_get_highest_perf(cpu, &cur_high); + if (ret) + goto free_cpufreq_put; + + prev_high = READ_ONCE(cpudata->prefcore_ranking); + if (prev_high != cur_high) { + highest_perf_changed = true; + WRITE_ONCE(cpudata->prefcore_ranking, cur_high); + + if (cur_high < CPPC_MAX_PERF) + sched_set_itmt_core_prio((int)cur_high, cpu); + } + +free_cpufreq_put: + cpufreq_cpu_put(policy); + + if (!highest_perf_changed) + cpufreq_update_policy(cpu); + + mutex_unlock(&amd_pstate_driver_lock); +} + static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; @@ -727,6 +849,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; + amd_pstate_init_prefcore(cpudata); + ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; @@ -877,6 +1001,28 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, return sysfs_emit(buf, "%u\n", perf); } +static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, + char *buf) +{ + u32 perf; + struct amd_cpudata *cpudata = policy->driver_data; + + perf = READ_ONCE(cpudata->prefcore_ranking); + + return sysfs_emit(buf, "%u\n", perf); +} + +static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy, + char *buf) +{ + bool hw_prefcore; + struct amd_cpudata *cpudata = policy->driver_data; + + hw_prefcore = READ_ONCE(cpudata->hw_prefcore); + + return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore)); +} + static ssize_t show_energy_performance_available_preferences( struct cpufreq_policy *policy, char *buf) { @@ -1074,18 +1220,29 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, return ret < 0 ? ret : count; } +static ssize_t prefcore_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore)); +} + cpufreq_freq_attr_ro(amd_pstate_max_freq); cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); cpufreq_freq_attr_ro(amd_pstate_highest_perf); +cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking); +cpufreq_freq_attr_ro(amd_pstate_hw_prefcore); cpufreq_freq_attr_rw(energy_performance_preference); cpufreq_freq_attr_ro(energy_performance_available_preferences); static DEVICE_ATTR_RW(status); +static DEVICE_ATTR_RO(prefcore); static struct freq_attr *amd_pstate_attr[] = { &amd_pstate_max_freq, &amd_pstate_lowest_nonlinear_freq, &amd_pstate_highest_perf, + &amd_pstate_prefcore_ranking, + &amd_pstate_hw_prefcore, NULL, }; @@ -1093,6 +1250,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = { &amd_pstate_max_freq, &amd_pstate_lowest_nonlinear_freq, &amd_pstate_highest_perf, + &amd_pstate_prefcore_ranking, + &amd_pstate_hw_prefcore, &energy_performance_preference, &energy_performance_available_preferences, NULL, @@ -1100,6 +1259,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = { static struct attribute *pstate_global_attributes[] = { &dev_attr_status.attr, + &dev_attr_prefcore.attr, NULL }; @@ -1151,6 +1311,8 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; cpudata->epp_policy = 0; + amd_pstate_init_prefcore(cpudata); + ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; @@ -1232,6 +1394,12 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + if (min_limit_perf < min_perf) + min_limit_perf = min_perf; + + if (max_limit_perf < min_limit_perf) + max_limit_perf = min_limit_perf; + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); @@ -1432,6 +1600,7 @@ static struct cpufreq_driver amd_pstate_driver = { .suspend = amd_pstate_cpu_suspend, .resume = amd_pstate_cpu_resume, .set_boost = amd_pstate_set_boost, + .update_limits = amd_pstate_update_limits, .name = "amd-pstate", .attr = amd_pstate_attr, }; @@ -1446,6 +1615,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = { .online = amd_pstate_epp_cpu_online, .suspend = amd_pstate_epp_suspend, .resume = amd_pstate_epp_resume, + .update_limits = amd_pstate_update_limits, .name = "amd-pstate-epp", .attr = amd_pstate_epp_attr, }; @@ -1567,7 +1737,17 @@ static int __init amd_pstate_param(char *str) return amd_pstate_set_driver(mode_idx); } + +static int __init amd_prefcore_param(char *str) +{ + if (!strcmp(str, "disable")) + amd_pstate_prefcore = false; + + return 0; +} + early_param("amd_pstate", amd_pstate_param); +early_param("amd_prefcore", amd_prefcore_param); MODULE_AUTHOR("Huang Rui "); MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 35fb3a559ea9..1a1857b0a6f4 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -481,6 +481,8 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv) static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + if (!policy) + return 0; struct private_data *priv = policy->driver_data; cpufreq_cpu_put(policy); diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index bd1e1357cef8..b993a498084b 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -156,6 +156,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sc7280", }, { .compatible = "qcom,sc8180x", }, { .compatible = "qcom,sc8280xp", }, + { .compatible = "qcom,sdm670", }, { .compatible = "qcom,sdm845", }, { .compatible = "qcom,sdx75", }, { .compatible = "qcom,sm6115", }, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 44db4f59c4cc..f6f8d7f450e7 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -576,17 +576,26 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy) latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC; if (latency) { + unsigned int max_delay_us = 2 * MSEC_PER_SEC; + /* - * For platforms that can change the frequency very fast (< 10 + * If the platform already has high transition_latency, use it + * as-is. + */ + if (latency > max_delay_us) + return latency; + + /* + * For platforms that can change the frequency very fast (< 2 * us), the above formula gives a decent transition delay. But * for platforms where transition_latency is in milliseconds, it * ends up giving unrealistic values. * - * Cap the default transition delay to 10 ms, which seems to be + * Cap the default transition delay to 2 ms, which seems to be * a reasonable amount of time after which we should reevaluate * the frequency. */ - return min(latency * LATENCY_MULTIPLIER, (unsigned int)10000); + return min(latency * LATENCY_MULTIPLIER, max_delay_us); } return LATENCY_MULTIPLIER; @@ -1571,7 +1580,8 @@ static int cpufreq_online(unsigned int cpu) if (cpufreq_driver->ready) cpufreq_driver->ready(policy); - if (cpufreq_thermal_control_enabled(cpufreq_driver)) + /* Register cpufreq cooling only for a new policy */ + if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); pr_debug("initialization complete\n"); @@ -1655,11 +1665,6 @@ static void __cpufreq_offline(unsigned int cpu, struct cpufreq_policy *policy) else policy->last_policy = policy->policy; - if (cpufreq_thermal_control_enabled(cpufreq_driver)) { - cpufreq_cooling_unregister(policy->cdev); - policy->cdev = NULL; - } - if (has_target()) cpufreq_exit_governor(policy); @@ -1720,6 +1725,15 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) return; } + /* + * Unregister cpufreq cooling once all the CPUs of the policy are + * removed. + */ + if (cpufreq_thermal_control_enabled(cpufreq_driver)) { + cpufreq_cooling_unregister(policy->cdev); + policy->cdev = NULL; + } + /* We did light-weight exit earlier, do full tear down now */ if (cpufreq_driver->offline) cpufreq_driver->exit(policy); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index c52d19d67557..a7c38b8b3e78 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -22,7 +22,6 @@ #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_UP_THRESHOLD (95) -#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (1) #define MAX_FREQUENCY_UP_THRESHOLD (100) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 33728c242f66..c20d3ecc5a81 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #define PU_SOC_VOLTAGE_NORMAL 1250000 #define PU_SOC_VOLTAGE_HIGH 1275000 @@ -225,8 +227,6 @@ static void imx6x_disable_freq_in_opp(struct device *dev, unsigned long freq) static int imx6q_opp_check_speed_grading(struct device *dev) { - struct device_node *np; - void __iomem *base; u32 val; int ret; @@ -235,16 +235,11 @@ static int imx6q_opp_check_speed_grading(struct device *dev) if (ret) return ret; } else { - np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-ocotp"); - if (!np) - return -ENOENT; + struct regmap *ocotp; - base = of_iomap(np, 0); - of_node_put(np); - if (!base) { - dev_err(dev, "failed to map ocotp\n"); - return -EFAULT; - } + ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6q-ocotp"); + if (IS_ERR(ocotp)) + return -ENOENT; /* * SPEED_GRADING[1:0] defines the max speed of ARM: @@ -254,8 +249,7 @@ static int imx6q_opp_check_speed_grading(struct device *dev) * 2b'00: 792000000Hz; * We need to set the max speed of ARM according to fuse map. */ - val = readl_relaxed(base + OCOTP_CFG3); - iounmap(base); + regmap_read(ocotp, OCOTP_CFG3, &val); } val >>= OCOTP_CFG3_SPEED_SHIFT; @@ -290,25 +284,16 @@ static int imx6ul_opp_check_speed_grading(struct device *dev) if (ret) return ret; } else { - struct device_node *np; - void __iomem *base; + struct regmap *ocotp; - np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-ocotp"); - if (!np) - np = of_find_compatible_node(NULL, NULL, - "fsl,imx6ull-ocotp"); - if (!np) + ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6ul-ocotp"); + if (IS_ERR(ocotp)) + ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6ull-ocotp"); + + if (IS_ERR(ocotp)) return -ENOENT; - base = of_iomap(np, 0); - of_node_put(np); - if (!base) { - dev_err(dev, "failed to map ocotp\n"); - return -EFAULT; - } - - val = readl_relaxed(base + OCOTP_CFG3); - iounmap(base); + regmap_read(ocotp, OCOTP_CFG3, &val); } /* diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 79619227ea51..dbbf299f4219 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -201,8 +202,6 @@ struct global_params { * @prev_aperf: Last APERF value read from APERF MSR * @prev_mperf: Last MPERF value read from MPERF MSR * @prev_tsc: Last timestamp counter (TSC) value - * @prev_cummulative_iowait: IO Wait time difference from last and - * current sample * @sample: Storage for storing last Sample data * @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios * @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios @@ -241,7 +240,6 @@ struct cpudata { u64 prev_aperf; u64 prev_mperf; u64 prev_tsc; - u64 prev_cummulative_iowait; struct sample sample; int32_t min_perf_ratio; int32_t max_perf_ratio; @@ -3407,14 +3405,31 @@ static bool intel_pstate_hwp_is_enabled(void) return !!(value & 0x1); } -static const struct x86_cpu_id intel_epp_balance_perf[] = { +#define POWERSAVE_MASK GENMASK(7, 0) +#define BALANCE_POWER_MASK GENMASK(15, 8) +#define BALANCE_PERFORMANCE_MASK GENMASK(23, 16) +#define PERFORMANCE_MASK GENMASK(31, 24) + +#define HWP_SET_EPP_VALUES(powersave, balance_power, balance_perf, performance) \ + (FIELD_PREP_CONST(POWERSAVE_MASK, powersave) |\ + FIELD_PREP_CONST(BALANCE_POWER_MASK, balance_power) |\ + FIELD_PREP_CONST(BALANCE_PERFORMANCE_MASK, balance_perf) |\ + FIELD_PREP_CONST(PERFORMANCE_MASK, performance)) + +#define HWP_SET_DEF_BALANCE_PERF_EPP(balance_perf) \ + (HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, HWP_EPP_BALANCE_POWERSAVE,\ + balance_perf, HWP_EPP_PERFORMANCE)) + +static const struct x86_cpu_id intel_epp_default[] = { /* * Set EPP value as 102, this is the max suggested EPP * which can result in one core turbo frequency for * AlderLake Mobile CPUs. */ - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 102), - X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 32), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, + HWP_EPP_BALANCE_POWERSAVE, 115, 16)), {} }; @@ -3512,11 +3527,24 @@ static int __init intel_pstate_init(void) intel_pstate_sysfs_expose_params(); if (hwp_active) { - const struct x86_cpu_id *id = x86_match_cpu(intel_epp_balance_perf); + const struct x86_cpu_id *id = x86_match_cpu(intel_epp_default); const struct x86_cpu_id *hybrid_id = x86_match_cpu(intel_hybrid_scaling_factor); - if (id) - epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = id->driver_data; + if (id) { + epp_values[EPP_INDEX_POWERSAVE] = + FIELD_GET(POWERSAVE_MASK, id->driver_data); + epp_values[EPP_INDEX_BALANCE_POWERSAVE] = + FIELD_GET(BALANCE_POWER_MASK, id->driver_data); + epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = + FIELD_GET(BALANCE_PERFORMANCE_MASK, id->driver_data); + epp_values[EPP_INDEX_PERFORMANCE] = + FIELD_GET(PERFORMANCE_MASK, id->driver_data); + pr_debug("Updated EPPs powersave:%x balanced power:%x balanced perf:%x performance:%x\n", + epp_values[EPP_INDEX_POWERSAVE], + epp_values[EPP_INDEX_BALANCE_POWERSAVE], + epp_values[EPP_INDEX_BALANCE_PERFORMANCE], + epp_values[EPP_INDEX_PERFORMANCE]); + } if (hybrid_id) { hybrid_scaling_factor = hybrid_id->driver_data; diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index d46afb3c0092..8d097dcddda4 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #define LUT_MAX_ENTRIES 32U @@ -300,7 +301,23 @@ static struct cpufreq_driver cpufreq_mtk_hw_driver = { static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) { const void *data; - int ret; + int ret, cpu; + struct device *cpu_dev; + struct regulator *cpu_reg; + + /* Make sure that all CPU supplies are available before proceeding. */ + for_each_possible_cpu(cpu) { + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return dev_err_probe(&pdev->dev, -EPROBE_DEFER, + "Failed to get cpu%d device\n", cpu); + + cpu_reg = devm_regulator_get(cpu_dev, "cpu"); + if (IS_ERR(cpu_reg)) + return dev_err_probe(&pdev->dev, PTR_ERR(cpu_reg), + "CPU%d regulator get failed\n", cpu); + } + data = of_device_get_match_data(&pdev->dev); if (!data) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 4ee23f4ebf4a..0b483bd0d3ca 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -144,6 +144,29 @@ scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power, return 0; } +static int +scmi_get_rate_limit(u32 domain, bool has_fast_switch) +{ + int ret, rate_limit; + + if (has_fast_switch) { + /* + * Fast channels are used whenever available, + * so use their rate_limit value if populated. + */ + ret = perf_ops->fast_switch_rate_limit(ph, domain, + &rate_limit); + if (!ret && rate_limit) + return rate_limit; + } + + ret = perf_ops->rate_limit_get(ph, domain, &rate_limit); + if (ret) + return 0; + + return rate_limit; +} + static int scmi_cpufreq_init(struct cpufreq_policy *policy) { int ret, nr_opp, domain; @@ -250,6 +273,9 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) policy->fast_switch_possible = perf_ops->fast_switch_possible(ph, domain); + policy->transition_delay_us = + scmi_get_rate_limit(domain, policy->fast_switch_possible); + return 0; out_free_opp: diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 3ea64b22cf0d..1d38ecfafc59 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -1617,7 +1617,7 @@ static void scmi_common_fastchannel_init(const struct scmi_protocol_handle *ph, u8 describe_id, u32 message_id, u32 valid_size, u32 domain, void __iomem **p_addr, - struct scmi_fc_db_info **p_db) + struct scmi_fc_db_info **p_db, u32 *rate_limit) { int ret; u32 flags; @@ -1661,6 +1661,9 @@ scmi_common_fastchannel_init(const struct scmi_protocol_handle *ph, goto err_xfer; } + if (rate_limit) + *rate_limit = le32_to_cpu(resp->rate_limit) & GENMASK(19, 0); + phys_addr = le32_to_cpu(resp->chan_addr_low); phys_addr |= (u64)le32_to_cpu(resp->chan_addr_high) << 32; addr = devm_ioremap(ph->dev, phys_addr, size); diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 211e8e0aef2c..fbcbd703198a 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -153,6 +153,7 @@ struct perf_dom_info { bool perf_fastchannels; bool level_indexing_mode; u32 opp_count; + u32 rate_limit_us; u32 sustained_freq_khz; u32 sustained_perf_level; unsigned long mult_factor; @@ -266,6 +267,8 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph, if (PROTOCOL_REV_MAJOR(version) >= 0x4) dom_info->level_indexing_mode = SUPPORTS_LEVEL_INDEXING(flags); + dom_info->rate_limit_us = le32_to_cpu(attr->rate_limit_us) & + GENMASK(19, 0); dom_info->sustained_freq_khz = le32_to_cpu(attr->sustained_freq_khz); dom_info->sustained_perf_level = @@ -786,23 +789,27 @@ static void scmi_perf_domain_init_fc(const struct scmi_protocol_handle *ph, ph->hops->fastchannel_init(ph, PERF_DESCRIBE_FASTCHANNEL, PERF_LEVEL_GET, 4, dom->id, - &fc[PERF_FC_LEVEL].get_addr, NULL); + &fc[PERF_FC_LEVEL].get_addr, NULL, + &fc[PERF_FC_LEVEL].rate_limit); ph->hops->fastchannel_init(ph, PERF_DESCRIBE_FASTCHANNEL, PERF_LIMITS_GET, 8, dom->id, - &fc[PERF_FC_LIMIT].get_addr, NULL); + &fc[PERF_FC_LIMIT].get_addr, NULL, + &fc[PERF_FC_LIMIT].rate_limit); if (dom->info.set_perf) ph->hops->fastchannel_init(ph, PERF_DESCRIBE_FASTCHANNEL, PERF_LEVEL_SET, 4, dom->id, &fc[PERF_FC_LEVEL].set_addr, - &fc[PERF_FC_LEVEL].set_db); + &fc[PERF_FC_LEVEL].set_db, + &fc[PERF_FC_LEVEL].rate_limit); if (dom->set_limits) ph->hops->fastchannel_init(ph, PERF_DESCRIBE_FASTCHANNEL, PERF_LIMITS_SET, 8, dom->id, &fc[PERF_FC_LIMIT].set_addr, - &fc[PERF_FC_LIMIT].set_db); + &fc[PERF_FC_LIMIT].set_db, + &fc[PERF_FC_LIMIT].rate_limit); dom->fc_info = fc; } @@ -855,6 +862,23 @@ scmi_dvfs_transition_latency_get(const struct scmi_protocol_handle *ph, return dom->opp[dom->opp_count - 1].trans_latency_us * 1000; } +static int +scmi_dvfs_rate_limit_get(const struct scmi_protocol_handle *ph, + u32 domain, u32 *rate_limit) +{ + struct perf_dom_info *dom; + + if (!rate_limit) + return -EINVAL; + + dom = scmi_perf_domain_lookup(ph, domain); + if (IS_ERR(dom)) + return PTR_ERR(dom); + + *rate_limit = dom->rate_limit_us; + return 0; +} + static int scmi_dvfs_freq_set(const struct scmi_protocol_handle *ph, u32 domain, unsigned long freq, bool poll) { @@ -954,6 +978,25 @@ static bool scmi_fast_switch_possible(const struct scmi_protocol_handle *ph, return dom->fc_info && dom->fc_info[PERF_FC_LEVEL].set_addr; } +static int scmi_fast_switch_rate_limit(const struct scmi_protocol_handle *ph, + u32 domain, u32 *rate_limit) +{ + struct perf_dom_info *dom; + + if (!rate_limit) + return -EINVAL; + + dom = scmi_perf_domain_lookup(ph, domain); + if (IS_ERR(dom)) + return PTR_ERR(dom); + + if (!dom->fc_info) + return -EINVAL; + + *rate_limit = dom->fc_info[PERF_FC_LEVEL].rate_limit; + return 0; +} + static enum scmi_power_scale scmi_power_scale_get(const struct scmi_protocol_handle *ph) { @@ -970,11 +1013,13 @@ static const struct scmi_perf_proto_ops perf_proto_ops = { .level_set = scmi_perf_level_set, .level_get = scmi_perf_level_get, .transition_latency_get = scmi_dvfs_transition_latency_get, + .rate_limit_get = scmi_dvfs_rate_limit_get, .device_opps_add = scmi_dvfs_device_opps_add, .freq_set = scmi_dvfs_freq_set, .freq_get = scmi_dvfs_freq_get, .est_power_get = scmi_dvfs_est_power_get, .fast_switch_possible = scmi_fast_switch_possible, + .fast_switch_rate_limit = scmi_fast_switch_rate_limit, .power_scale_get = scmi_power_scale_get, }; diff --git a/drivers/firmware/arm_scmi/powercap.c b/drivers/firmware/arm_scmi/powercap.c index a4c6cd4716fe..604184c044ff 100644 --- a/drivers/firmware/arm_scmi/powercap.c +++ b/drivers/firmware/arm_scmi/powercap.c @@ -703,20 +703,24 @@ static void scmi_powercap_domain_init_fc(const struct scmi_protocol_handle *ph, ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL, POWERCAP_CAP_SET, 4, domain, &fc[POWERCAP_FC_CAP].set_addr, - &fc[POWERCAP_FC_CAP].set_db); + &fc[POWERCAP_FC_CAP].set_db, + &fc[POWERCAP_FC_CAP].rate_limit); ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL, POWERCAP_CAP_GET, 4, domain, - &fc[POWERCAP_FC_CAP].get_addr, NULL); + &fc[POWERCAP_FC_CAP].get_addr, NULL, + &fc[POWERCAP_FC_CAP].rate_limit); ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL, POWERCAP_PAI_SET, 4, domain, &fc[POWERCAP_FC_PAI].set_addr, - &fc[POWERCAP_FC_PAI].set_db); + &fc[POWERCAP_FC_PAI].set_db, + &fc[POWERCAP_FC_PAI].rate_limit); ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL, POWERCAP_PAI_GET, 4, domain, - &fc[POWERCAP_FC_PAI].get_addr, NULL); + &fc[POWERCAP_FC_PAI].get_addr, NULL, + &fc[POWERCAP_PAI_GET].rate_limit); *p_fc = fc; } diff --git a/drivers/firmware/arm_scmi/protocols.h b/drivers/firmware/arm_scmi/protocols.h index e683c26f24eb..8b5d9ce4a33a 100644 --- a/drivers/firmware/arm_scmi/protocols.h +++ b/drivers/firmware/arm_scmi/protocols.h @@ -234,6 +234,7 @@ struct scmi_fc_info { void __iomem *set_addr; void __iomem *get_addr; struct scmi_fc_db_info *set_db; + u32 rate_limit; }; /** @@ -268,7 +269,8 @@ struct scmi_proto_helpers_ops { u8 describe_id, u32 message_id, u32 valid_size, u32 domain, void __iomem **p_addr, - struct scmi_fc_db_info **p_db); + struct scmi_fc_db_info **p_db, + u32 *rate_limit); void (*fastchannel_db_ring)(struct scmi_fc_db_info *db); }; diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 3a0995f8bce8..930b6afba6f4 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -139,6 +139,7 @@ struct cppc_cpudata { #ifdef CONFIG_ACPI_CPPC_LIB extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); +extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_enable(int cpu, bool enable); @@ -167,6 +168,10 @@ static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) { return -ENOTSUPP; } +static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf) +{ + return -ENOTSUPP; +} static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs) { return -ENOTSUPP; diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index 6ad02ad9c7b4..d21838835abd 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -39,11 +39,16 @@ struct amd_aperf_mperf { * @cppc_req_cached: cached performance request hints * @highest_perf: the maximum performance an individual processor may reach, * assuming ideal conditions + * For platforms that do not support the preferred core feature, the + * highest_pef may be configured with 166 or 255, to avoid max frequency + * calculated wrongly. we take the fixed value as the highest_perf. * @nominal_perf: the maximum sustained performance level of the processor, * assuming ideal operating conditions * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power * savings are achieved * @lowest_perf: the absolute lowest performance level of the processor + * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher + * priority. * @max_freq: the frequency that mapped to highest_perf * @min_freq: the frequency that mapped to lowest_perf * @nominal_freq: the frequency that mapped to nominal_perf @@ -52,6 +57,9 @@ struct amd_aperf_mperf { * @prev: Last Aperf/Mperf/tsc count value read from register * @freq: current cpu frequency value * @boost_supported: check whether the Processor or SBIOS supports boost mode + * @hw_prefcore: check whether HW supports preferred core featue. + * Only when hw_prefcore and early prefcore param are true, + * AMD P-State driver supports preferred core featue. * @epp_policy: Last saved policy used to set energy-performance preference * @epp_cached: Cached CPPC energy-performance preference value * @policy: Cpufreq policy value @@ -70,6 +78,7 @@ struct amd_cpudata { u32 nominal_perf; u32 lowest_nonlinear_perf; u32 lowest_perf; + u32 prefcore_ranking; u32 min_limit_perf; u32 max_limit_perf; u32 min_limit_freq; @@ -85,6 +94,7 @@ struct amd_cpudata { u64 freq; bool boost_supported; + bool hw_prefcore; /* EPP feature related attributes*/ s16 epp_policy; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index afda5f24d3dd..692ea6e55129 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -263,6 +263,7 @@ static inline bool cpufreq_supports_freq_invariance(void) return false; } static inline void disable_cpufreq(void) { } +static inline void cpufreq_update_limits(unsigned int cpu) { } #endif #ifdef CONFIG_CPU_FREQ_STAT @@ -568,9 +569,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, /* * The polling frequency depends on the capability of the processor. Default - * polling frequency is 1000 times the transition latency of the processor. The - * ondemand governor will work on any processor with transition latency <= 10ms, - * using appropriate sampling rate. + * polling frequency is 1000 times the transition latency of the processor. */ #define LATENCY_MULTIPLIER (1000) @@ -1021,6 +1020,18 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +{ + unsigned int freq; + + if (idx < 0) + return false; + + freq = policy->freq_table[idx].frequency; + + return freq == clamp_val(freq, policy->min, policy->max); +} + static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) @@ -1054,7 +1065,8 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, return 0; } - if (idx < 0 && efficiencies) { + /* Limit frequency index to honor policy->min/max */ + if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { efficiencies = false; goto retry; } diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index f2f05fb42d28..fafedb3b6604 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -128,6 +128,8 @@ struct scmi_perf_domain_info { * @level_set: sets the performance level of a domain * @level_get: gets the performance level of a domain * @transition_latency_get: gets the DVFS transition latency for a given device + * @rate_limit_get: gets the minimum time (us) required between successive + * requests * @device_opps_add: adds all the OPPs for a given device * @freq_set: sets the frequency for a given device using sustained frequency * to sustained performance level mapping @@ -137,6 +139,8 @@ struct scmi_perf_domain_info { * at a given frequency * @fast_switch_possible: indicates if fast DVFS switching is possible or not * for a given device + * @fast_switch_rate_limit: gets the minimum time (us) required between + * successive fast_switching requests * @power_scale_mw_get: indicates if the power values provided are in milliWatts * or in some other (abstract) scale */ @@ -154,6 +158,8 @@ struct scmi_perf_proto_ops { u32 *level, bool poll); int (*transition_latency_get)(const struct scmi_protocol_handle *ph, u32 domain); + int (*rate_limit_get)(const struct scmi_protocol_handle *ph, + u32 domain, u32 *rate_limit); int (*device_opps_add)(const struct scmi_protocol_handle *ph, struct device *dev, u32 domain); int (*freq_set)(const struct scmi_protocol_handle *ph, u32 domain, @@ -164,6 +170,8 @@ struct scmi_perf_proto_ops { unsigned long *rate, unsigned long *power); bool (*fast_switch_possible)(const struct scmi_protocol_handle *ph, u32 domain); + int (*fast_switch_rate_limit)(const struct scmi_protocol_handle *ph, + u32 domain, u32 *rate_limit); enum scmi_power_scale (*power_scale_get)(const struct scmi_protocol_handle *ph); };