From d7de5d8e403a1293e10f8f59a1f2db320232c831 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 17 May 2023 21:10:20 +0200 Subject: [PATCH 01/74] cpupower: Bump soname version Several functions in the libcpupower API are renamed or removed in Linux 4.7. This is an backward-incompatible ABI change, so the library soname should change from libcpupower.so.0 to libcpupower.so.1. Fixes: ac5a181d065d ("cpupower: Add cpuidle parts into library") Signed-off-by: Ben Hutchings Signed-off-by: Salvatore Bonaccorso Signed-off-by: Shuah Khan --- tools/power/cpupower/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 59bfa05dec5d..dc531805a570 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -53,7 +53,7 @@ DESTDIR ?= VERSION:= $(shell ./utils/version-gen.sh) LIB_MAJ= 0.0.1 -LIB_MIN= 0 +LIB_MIN= 1 PACKAGE = cpupower PACKAGE_BUGREPORT = linux-pm@vger.kernel.org From a1cf97c2d43ae51f3de091d51adff6b70a5cd55c Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Mon, 19 Jun 2023 19:04:59 +0000 Subject: [PATCH 02/74] cpupower: Recognise amd-pstate active mode driver amd-pstate active mode driver name is "amd-pstate-epp". Use common prefix for string matching condition to recognise amd-pstate active mode driver. Reviewed-by: Gautham R. Shenoy Signed-off-by: Wyes Karny Reviewed-by: Mario Limonciello Tested-by: Perry Yuan Acked-by: Huang Rui Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/helpers/misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 9547b29254a7..0c56fc77f93b 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -95,7 +95,7 @@ bool cpupower_amd_pstate_enabled(void) if (!driver) return ret; - if (!strcmp(driver, "amd-pstate")) + if (!strncmp(driver, "amd", 3)) ret = true; cpufreq_put_driver(driver); From 1ce5ab7c1dbfa82f0d382a48cccb51178cbf5416 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Mon, 19 Jun 2023 19:05:00 +0000 Subject: [PATCH 03/74] cpupower: Add is_valid_path API Add is_valid_path API to check whether the sysfs file is present or not. Suggested-by: Gautham R. Shenoy Reviewed-by: Mario Limonciello Signed-off-by: Wyes Karny Tested-by: Perry Yuan Acked-by: Huang Rui Signed-off-by: Shuah Khan --- tools/power/cpupower/lib/cpupower.c | 7 +++++++ tools/power/cpupower/lib/cpupower_intern.h | 1 + 2 files changed, 8 insertions(+) diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c index 3f7d0c0c5067..7a2ef691b20e 100644 --- a/tools/power/cpupower/lib/cpupower.c +++ b/tools/power/cpupower/lib/cpupower.c @@ -14,6 +14,13 @@ #include "cpupower.h" #include "cpupower_intern.h" +int is_valid_path(const char *path) +{ + if (access(path, F_OK) == -1) + return 0; + return 1; +} + unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen) { ssize_t numread; diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h index ac1112b956ec..5fdb8620d41b 100644 --- a/tools/power/cpupower/lib/cpupower_intern.h +++ b/tools/power/cpupower/lib/cpupower_intern.h @@ -7,5 +7,6 @@ #define SYSFS_PATH_MAX 255 +int is_valid_path(const char *path); unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen); unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen); From f2ab5557119a5ccd0ceaf7ecdca00ab782cd76c5 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Mon, 19 Jun 2023 19:05:01 +0000 Subject: [PATCH 04/74] cpupower: Add EPP value change support amd_pstate and intel_pstate active mode drivers support energy performance preference feature. Through this user can convey it's energy/performance preference to platform. Add this value change capability to cpupower. To change the EPP value use below command: cpupower set --epp performance Reviewed-by: Mario Limonciello Signed-off-by: Wyes Karny Tested-by: Perry Yuan Acked-by: Huang Rui Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpupower-set.c | 23 +++++++++++++++++++- tools/power/cpupower/utils/helpers/helpers.h | 5 +++++ tools/power/cpupower/utils/helpers/misc.c | 19 ++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index 180d5ba877e6..a789b123dbd4 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -18,6 +18,7 @@ static struct option set_opts[] = { {"perf-bias", required_argument, NULL, 'b'}, + {"epp", required_argument, NULL, 'e'}, { }, }; @@ -37,11 +38,13 @@ int cmd_set(int argc, char **argv) union { struct { int perf_bias:1; + int epp:1; }; int params; } params; int perf_bias = 0; int ret = 0; + char epp[30]; ret = uname(&uts); if (!ret && (!strcmp(uts.machine, "ppc64le") || @@ -55,7 +58,7 @@ int cmd_set(int argc, char **argv) params.params = 0; /* parameter parsing */ - while ((ret = getopt_long(argc, argv, "b:", + while ((ret = getopt_long(argc, argv, "b:e:", set_opts, NULL)) != -1) { switch (ret) { case 'b': @@ -69,6 +72,15 @@ int cmd_set(int argc, char **argv) } params.perf_bias = 1; break; + case 'e': + if (params.epp) + print_wrong_arg_exit(); + if (sscanf(optarg, "%29s", epp) != 1) { + print_wrong_arg_exit(); + return -EINVAL; + } + params.epp = 1; + break; default: print_wrong_arg_exit(); } @@ -102,6 +114,15 @@ int cmd_set(int argc, char **argv) break; } } + + if (params.epp) { + ret = cpupower_set_epp(cpu, epp); + if (ret) { + fprintf(stderr, + "Error setting epp value on CPU %d\n", cpu); + break; + } + } } return ret; } diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 96e4bede078b..5d998de2d291 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -116,6 +116,8 @@ extern int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val); extern int cpupower_intel_get_perf_bias(unsigned int cpu); extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu); +extern int cpupower_set_epp(unsigned int cpu, char *epp); + /* Read/Write msr ****************************/ /* PCI stuff ****************************/ @@ -173,6 +175,9 @@ static inline int cpupower_intel_get_perf_bias(unsigned int cpu) static inline unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu) { return 0; }; +static inline int cpupower_set_epp(unsigned int cpu, char *epp) +{ return -1; }; + /* Read/Write msr ****************************/ static inline int cpufreq_has_boost_support(unsigned int cpu, int *support, diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 0c56fc77f93b..583df38ab13c 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -87,6 +87,25 @@ int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val) return 0; } +int cpupower_set_epp(unsigned int cpu, char *epp) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[30] = {}; + + snprintf(path, sizeof(path), + PATH_TO_CPU "cpu%u/cpufreq/energy_performance_preference", cpu); + + if (!is_valid_path(path)) + return -1; + + snprintf(linebuf, sizeof(linebuf), "%s", epp); + + if (cpupower_write_sysfs(path, linebuf, 30) <= 0) + return -1; + + return 0; +} + bool cpupower_amd_pstate_enabled(void) { char *driver = cpufreq_get_driver(0); From df8776b03689987e3239f3c010b6dc6ab4185d30 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Mon, 19 Jun 2023 19:05:02 +0000 Subject: [PATCH 05/74] cpupower: Add support for amd_pstate mode change amd_pstate supports changing of its mode dynamically via `status` sysfs file. Add the same capability in cpupower. To change the mode to active mode use below command: cpupower set --amd-pstate-mode active Acked-by: Huang Rui Reviewed-by: Gautham R. Shenoy Reviewed-by: Mario Limonciello Signed-off-by: Wyes Karny Tested-by: Perry Yuan Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpupower-set.c | 24 ++++++++++++++++++-- tools/power/cpupower/utils/helpers/helpers.h | 3 +++ tools/power/cpupower/utils/helpers/misc.c | 18 +++++++++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index a789b123dbd4..c2ba69b7ea54 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -19,6 +19,7 @@ static struct option set_opts[] = { {"perf-bias", required_argument, NULL, 'b'}, {"epp", required_argument, NULL, 'e'}, + {"amd-pstate-mode", required_argument, NULL, 'm'}, { }, }; @@ -39,12 +40,13 @@ int cmd_set(int argc, char **argv) struct { int perf_bias:1; int epp:1; + int mode:1; }; int params; } params; int perf_bias = 0; int ret = 0; - char epp[30]; + char epp[30], mode[20]; ret = uname(&uts); if (!ret && (!strcmp(uts.machine, "ppc64le") || @@ -58,7 +60,7 @@ int cmd_set(int argc, char **argv) params.params = 0; /* parameter parsing */ - while ((ret = getopt_long(argc, argv, "b:e:", + while ((ret = getopt_long(argc, argv, "b:e:m:", set_opts, NULL)) != -1) { switch (ret) { case 'b': @@ -81,6 +83,17 @@ int cmd_set(int argc, char **argv) } params.epp = 1; break; + case 'm': + if (cpupower_cpu_info.vendor != X86_VENDOR_AMD) + print_wrong_arg_exit(); + if (params.mode) + print_wrong_arg_exit(); + if (sscanf(optarg, "%19s", mode) != 1) { + print_wrong_arg_exit(); + return -EINVAL; + } + params.mode = 1; + break; default: print_wrong_arg_exit(); } @@ -89,6 +102,12 @@ int cmd_set(int argc, char **argv) if (!params.params) print_wrong_arg_exit(); + if (params.mode) { + ret = cpupower_set_amd_pstate_mode(mode); + if (ret) + fprintf(stderr, "Error setting mode\n"); + } + /* Default is: set all CPUs */ if (bitmask_isallclear(cpus_chosen)) bitmask_setall(cpus_chosen); @@ -123,6 +142,7 @@ int cmd_set(int argc, char **argv) break; } } + } return ret; } diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 5d998de2d291..d35596631eef 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -117,6 +117,7 @@ extern int cpupower_intel_get_perf_bias(unsigned int cpu); extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu); extern int cpupower_set_epp(unsigned int cpu, char *epp); +extern int cpupower_set_amd_pstate_mode(char *mode); /* Read/Write msr ****************************/ @@ -177,6 +178,8 @@ static inline unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu) static inline int cpupower_set_epp(unsigned int cpu, char *epp) { return -1; }; +static inline int cpupower_set_amd_pstate_mode(char *mode) +{ return -1; }; /* Read/Write msr ****************************/ diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 583df38ab13c..075c136a100c 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -106,6 +106,24 @@ int cpupower_set_epp(unsigned int cpu, char *epp) return 0; } +int cpupower_set_amd_pstate_mode(char *mode) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[20] = {}; + + snprintf(path, sizeof(path), PATH_TO_CPU "amd_pstate/status"); + + if (!is_valid_path(path)) + return -1; + + snprintf(linebuf, sizeof(linebuf), "%s\n", mode); + + if (cpupower_write_sysfs(path, linebuf, 20) <= 0) + return -1; + + return 0; +} + bool cpupower_amd_pstate_enabled(void) { char *driver = cpufreq_get_driver(0); From eb426fc6bdd6b731cbc0900f24a8c5fba10a7631 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Mon, 19 Jun 2023 19:05:03 +0000 Subject: [PATCH 06/74] cpupower: Add turbo-boost support in cpupower If boost sysfs (/sys/devices/system/cpu/cpufreq/boost) file is present turbo-boost is feature is supported in the hardware. By default this feature should be enabled. But to disable/enable it write to the sysfs file. Use the same to control this feature via cpupower. To enable: cpupower set --turbo-boost 1 To disable: cpupower set --turbo-boost 0 Acked-by: Huang Rui Reviewed-by: Gautham R. Shenoy Reviewed-by: Mario Limonciello Signed-off-by: Wyes Karny Tested-by: Perry Yuan Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpupower-set.c | 22 +++++++++++++++++++- tools/power/cpupower/utils/helpers/helpers.h | 3 +++ tools/power/cpupower/utils/helpers/misc.c | 18 ++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index c2ba69b7ea54..0677b58374ab 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -20,6 +20,7 @@ static struct option set_opts[] = { {"perf-bias", required_argument, NULL, 'b'}, {"epp", required_argument, NULL, 'e'}, {"amd-pstate-mode", required_argument, NULL, 'm'}, + {"turbo-boost", required_argument, NULL, 't'}, { }, }; @@ -41,10 +42,11 @@ int cmd_set(int argc, char **argv) int perf_bias:1; int epp:1; int mode:1; + int turbo_boost:1; }; int params; } params; - int perf_bias = 0; + int perf_bias = 0, turbo_boost = 1; int ret = 0; char epp[30], mode[20]; @@ -94,6 +96,18 @@ int cmd_set(int argc, char **argv) } params.mode = 1; break; + case 't': + if (params.turbo_boost) + print_wrong_arg_exit(); + turbo_boost = atoi(optarg); + if (turbo_boost < 0 || turbo_boost > 1) { + printf("--turbo-boost param out of range [0-1]\n"); + print_wrong_arg_exit(); + } + params.turbo_boost = 1; + break; + + default: print_wrong_arg_exit(); } @@ -108,6 +122,12 @@ int cmd_set(int argc, char **argv) fprintf(stderr, "Error setting mode\n"); } + if (params.turbo_boost) { + ret = cpupower_set_turbo_boost(turbo_boost); + if (ret) + fprintf(stderr, "Error setting turbo-boost\n"); + } + /* Default is: set all CPUs */ if (bitmask_isallclear(cpus_chosen)) bitmask_setall(cpus_chosen); diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index d35596631eef..95749b8ee475 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -118,6 +118,7 @@ extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu); extern int cpupower_set_epp(unsigned int cpu, char *epp); extern int cpupower_set_amd_pstate_mode(char *mode); +extern int cpupower_set_turbo_boost(int turbo_boost); /* Read/Write msr ****************************/ @@ -180,6 +181,8 @@ static inline int cpupower_set_epp(unsigned int cpu, char *epp) { return -1; }; static inline int cpupower_set_amd_pstate_mode(char *mode) { return -1; }; +static inline int cpupower_set_turbo_boost(int turbo_boost) +{ return -1; }; /* Read/Write msr ****************************/ diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 075c136a100c..76e461ff4f74 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -124,6 +124,24 @@ int cpupower_set_amd_pstate_mode(char *mode) return 0; } +int cpupower_set_turbo_boost(int turbo_boost) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[2] = {}; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost"); + + if (!is_valid_path(path)) + return -1; + + snprintf(linebuf, sizeof(linebuf), "%d", turbo_boost); + + if (cpupower_write_sysfs(path, linebuf, 2) <= 0) + return -1; + + return 0; +} + bool cpupower_amd_pstate_enabled(void) { char *driver = cpufreq_get_driver(0); From 99481d2195bfd13a663904e6014887abf46b57c7 Mon Sep 17 00:00:00 2001 From: Likhitha Korrapati Date: Fri, 14 Jul 2023 15:10:21 +0530 Subject: [PATCH 07/74] cpupower: Fix cpuidle_set to accept only numeric values for idle-set operation. For both the d and e options in 'cpupower idle_set' command, an atoi() conversion is done without checking if the input argument is all numeric. So, an atoi conversion is done on any character provided as input and the CPU idle_set operation continues with that integer value, which may not be what is intended or entirely correct. The output of cpuidle-set before patch is as follows: [root@xxx cpupower]# cpupower idle-set -e 1$ Idlestate 1 enabled on CPU 0 [snip] Idlestate 1 enabled on CPU 47 [root@xxx cpupower]# cpupower idle-set -e 11 Idlestate 11 not available on CPU 0 [snip] Idlestate 11 not available on CPU 47 [root@xxx cpupower]# cpupower idle-set -d 12 Idlestate 12 not available on CPU 0 [snip] Idlestate 12 not available on CPU 47 [root@xxx cpupower]# cpupower idle-set -d qw Idlestate 0 disabled on CPU 0 [snip] Idlestate 0 disabled on CPU 47 This patch adds a check for both d and e options in cpuidle-set.c to see that the idle_set value is all numeric before doing a string-to-int conversion using strtol(). The output of cpuidle-set after the patch is as below: [root@xxx cpupower]# ./cpupower idle-set -e 1$ Bad idle_set value: 1$. Integer expected [root@xxx cpupower]# ./cpupower idle-set -e 11 Idlestate 11 not available on CPU 0 [snip] Idlestate 11 not available on CPU 47 [root@xxx cpupower]# ./cpupower idle-set -d 12 Idlestate 12 not available on CPU 0 [snip] Idlestate 12 not available on CPU 47 [root@xxx cpupower]# ./cpupower idle-set -d qw Bad idle_set value: qw. Integer expected Signed-off-by: Brahadambal Srinivasan Signed-off-by: Likhitha Korrapati Tested-by: Pavithra Prakash Reviewed-by: Rick Lindsley Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpuidle-set.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c index 46158928f9ad..a551d1d4ac51 100644 --- a/tools/power/cpupower/utils/cpuidle-set.c +++ b/tools/power/cpupower/utils/cpuidle-set.c @@ -41,14 +41,6 @@ int cmd_idle_set(int argc, char **argv) cont = 0; break; case 'd': - if (param) { - param = -1; - cont = 0; - break; - } - param = ret; - idlestate = atoi(optarg); - break; case 'e': if (param) { param = -1; @@ -56,7 +48,13 @@ int cmd_idle_set(int argc, char **argv) break; } param = ret; - idlestate = atoi(optarg); + strtol(optarg, &endptr, 10); + if (*endptr != '\0') { + printf(_("Bad value: %s, Integer expected\n"), optarg); + exit(EXIT_FAILURE); + } else { + idlestate = atoi(optarg); + } break; case 'D': if (param) { From a70eb93a2477371638ef481aaae7bb7b760d3004 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 14 Jul 2023 11:44:13 -0600 Subject: [PATCH 08/74] cpufreq: Explicitly include correct DT includes The DT of_device.h and of_platform.h date back to the separate of_platform_bus_type before it as merged into the regular platform bus. As part of that merge prepping Arm DT support 13 years ago, they "temporarily" include each other. They also include platform_device.h and of.h. As a result, there's a pretty much random mix of those include files used throughout the tree. In order to detangle these headers and replace the implicit includes with struct declarations, users need to explicitly include the correct includes. Signed-off-by: Rob Herring Acked-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar --- drivers/cpufreq/armada-37xx-cpufreq.c | 4 +--- drivers/cpufreq/mediatek-cpufreq-hw.c | 3 ++- drivers/cpufreq/ppc_cbe_cpufreq.c | 2 +- drivers/cpufreq/ppc_cbe_cpufreq_pmi.c | 1 - drivers/cpufreq/qcom-cpufreq-nvmem.c | 1 - drivers/cpufreq/scpi-cpufreq.c | 2 +- drivers/cpufreq/sti-cpufreq.c | 2 +- drivers/cpufreq/ti-cpufreq.c | 2 +- drivers/cpufreq/vexpress-spc-cpufreq.c | 1 - 9 files changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index b74289a95a17..bea41ccabf1f 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -14,10 +14,8 @@ #include #include #include +#include #include -#include -#include -#include #include #include #include diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index b22f5cc8a463..c93e14eb6221 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -10,8 +10,9 @@ #include #include #include -#include +#include #include +#include #include #define LUT_MAX_ENTRIES 32U diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c index e3313ce63b38..88afc49941b7 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c index 4fba3637b115..6f0c32592416 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index a88b6fe5db50..4590c2570086 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index fd2c16821d54..ac719aca49b7 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index 1a63aeea8711..9c542e723a15 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index d5cd2fd25cad..3c37d7899660 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index d295f405c4bb..865e50164803 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include From a7fb1727537253460466b840df0ce0f860f29947 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:04 +0800 Subject: [PATCH 09/74] cpufreq: sun50i: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Acked-by: Jernej Škrabec Signed-off-by: Viresh Kumar --- drivers/cpufreq/sun50i-cpufreq-nvmem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c index 4321d7bbe769..32a9c88f8ff6 100644 --- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -137,7 +137,7 @@ static int sun50i_cpufreq_nvmem_probe(struct platform_device *pdev) return ret; } -static int sun50i_cpufreq_nvmem_remove(struct platform_device *pdev) +static void sun50i_cpufreq_nvmem_remove(struct platform_device *pdev) { int *opp_tokens = platform_get_drvdata(pdev); unsigned int cpu; @@ -148,13 +148,11 @@ static int sun50i_cpufreq_nvmem_remove(struct platform_device *pdev) dev_pm_opp_put_prop_name(opp_tokens[cpu]); kfree(opp_tokens); - - return 0; } static struct platform_driver sun50i_cpufreq_driver = { .probe = sun50i_cpufreq_nvmem_probe, - .remove = sun50i_cpufreq_nvmem_remove, + .remove_new = sun50i_cpufreq_nvmem_remove, .driver = { .name = "sun50i-cpufreq-nvmem", }, From 18da417686b3db681b1b77a925e4b60d73612e17 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:05 +0800 Subject: [PATCH 10/74] cpufreq: dt: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 4aec4b2a5225..8bd6e5e8f121 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -349,11 +349,10 @@ static int dt_cpufreq_probe(struct platform_device *pdev) return ret; } -static int dt_cpufreq_remove(struct platform_device *pdev) +static void dt_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&dt_cpufreq_driver); dt_cpufreq_release(); - return 0; } static struct platform_driver dt_cpufreq_platdrv = { @@ -361,7 +360,7 @@ static struct platform_driver dt_cpufreq_platdrv = { .name = "cpufreq-dt", }, .probe = dt_cpufreq_probe, - .remove = dt_cpufreq_remove, + .remove_new = dt_cpufreq_remove, }; module_platform_driver(dt_cpufreq_platdrv); From 573d54dba2825bf7b5447fbaef4d95a3bf3582f4 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:06 +0800 Subject: [PATCH 11/74] cpufreq: qcom-cpufreq-hw: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index f2830371d25f..f24cf2eddf1e 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -730,16 +730,14 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) return ret; } -static int qcom_cpufreq_hw_driver_remove(struct platform_device *pdev) +static void qcom_cpufreq_hw_driver_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&cpufreq_qcom_hw_driver); - - return 0; } static struct platform_driver qcom_cpufreq_hw_driver = { .probe = qcom_cpufreq_hw_driver_probe, - .remove = qcom_cpufreq_hw_driver_remove, + .remove_new = qcom_cpufreq_hw_driver_remove, .driver = { .name = "qcom-cpufreq-hw", .of_match_table = qcom_cpufreq_hw_match, From 9ffb053dc51058cef3d49aca9881380282257797 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:07 +0800 Subject: [PATCH 12/74] cpufreq: vexpress: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Reviewed-by: Sudeep Holla Signed-off-by: Viresh Kumar --- drivers/cpufreq/vexpress-spc-cpufreq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index 865e50164803..9ac4ea50b874 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -551,7 +551,7 @@ static int ve_spc_cpufreq_probe(struct platform_device *pdev) return ret; } -static int ve_spc_cpufreq_remove(struct platform_device *pdev) +static void ve_spc_cpufreq_remove(struct platform_device *pdev) { bL_switcher_get_enabled(); __bLs_unregister_notifier(); @@ -559,7 +559,6 @@ static int ve_spc_cpufreq_remove(struct platform_device *pdev) bL_switcher_put_enabled(); pr_info("%s: Un-registered platform driver: %s\n", __func__, ve_spc_cpufreq_driver.name); - return 0; } static struct platform_driver ve_spc_cpufreq_platdrv = { @@ -567,7 +566,7 @@ static struct platform_driver ve_spc_cpufreq_platdrv = { .name = "vexpress-spc-cpufreq", }, .probe = ve_spc_cpufreq_probe, - .remove = ve_spc_cpufreq_remove, + .remove_new = ve_spc_cpufreq_remove, }; module_platform_driver(ve_spc_cpufreq_platdrv); From 1d61b32e500de0223eb42bf5642718a8801d9bc1 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:08 +0800 Subject: [PATCH 13/74] cpufreq: imx6q: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/imx6q-cpufreq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 9fb1501033bb..494d044b9e72 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -519,7 +519,7 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) return ret; } -static int imx6q_cpufreq_remove(struct platform_device *pdev) +static void imx6q_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&imx6q_cpufreq_driver); dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); @@ -530,8 +530,6 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev) regulator_put(soc_reg); clk_bulk_put(num_clks, clks); - - return 0; } static struct platform_driver imx6q_cpufreq_platdrv = { @@ -539,7 +537,7 @@ static struct platform_driver imx6q_cpufreq_platdrv = { .name = "imx6q-cpufreq", }, .probe = imx6q_cpufreq_probe, - .remove = imx6q_cpufreq_remove, + .remove_new = imx6q_cpufreq_remove, }; module_platform_driver(imx6q_cpufreq_platdrv); From b68ea4c2fb89f690c6d5b34002e3ea40cbecb8a2 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:09 +0800 Subject: [PATCH 14/74] cpufreq: mediatek-hw: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq-hw.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index c93e14eb6221..d46afb3c0092 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -316,11 +316,9 @@ static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) return ret; } -static int mtk_cpufreq_hw_driver_remove(struct platform_device *pdev) +static void mtk_cpufreq_hw_driver_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&cpufreq_mtk_hw_driver); - - return 0; } static const struct of_device_id mtk_cpufreq_hw_match[] = { @@ -331,7 +329,7 @@ MODULE_DEVICE_TABLE(of, mtk_cpufreq_hw_match); static struct platform_driver mtk_cpufreq_hw_driver = { .probe = mtk_cpufreq_hw_driver_probe, - .remove = mtk_cpufreq_hw_driver_remove, + .remove_new = mtk_cpufreq_hw_driver_remove, .driver = { .name = "mtk-cpufreq-hw", .of_match_table = mtk_cpufreq_hw_match, From d5aa35fcf3ee0dc7319d12132ed14e61f38007ed Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:10 +0800 Subject: [PATCH 15/74] cpufreq: scpi: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Reviewed-by: Sudeep Holla Signed-off-by: Viresh Kumar --- drivers/cpufreq/scpi-cpufreq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index ac719aca49b7..d33be56983ed 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -208,11 +208,10 @@ static int scpi_cpufreq_probe(struct platform_device *pdev) return ret; } -static int scpi_cpufreq_remove(struct platform_device *pdev) +static void scpi_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&scpi_cpufreq_driver); scpi_ops = NULL; - return 0; } static struct platform_driver scpi_cpufreq_platdrv = { @@ -220,7 +219,7 @@ static struct platform_driver scpi_cpufreq_platdrv = { .name = "scpi-cpufreq", }, .probe = scpi_cpufreq_probe, - .remove = scpi_cpufreq_remove, + .remove_new = scpi_cpufreq_remove, }; module_platform_driver(scpi_cpufreq_platdrv); From 552f8df83e685c666d0df59559253fe02679f966 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:11 +0800 Subject: [PATCH 16/74] cpufreq: tegra194: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra194-cpufreq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c index 36dad5ea5947..c90b30469165 100644 --- a/drivers/cpufreq/tegra194-cpufreq.c +++ b/drivers/cpufreq/tegra194-cpufreq.c @@ -708,12 +708,10 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev) return err; } -static int tegra194_cpufreq_remove(struct platform_device *pdev) +static void tegra194_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&tegra194_cpufreq_driver); tegra194_cpufreq_free_resources(); - - return 0; } static const struct of_device_id tegra194_cpufreq_of_match[] = { @@ -730,7 +728,7 @@ static struct platform_driver tegra194_ccplex_driver = { .of_match_table = tegra194_cpufreq_of_match, }, .probe = tegra194_cpufreq_probe, - .remove = tegra194_cpufreq_remove, + .remove_new = tegra194_cpufreq_remove, }; module_platform_driver(tegra194_ccplex_driver); From f1154d65d12feda9ddba36bad4b91984e6a8db36 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:12 +0800 Subject: [PATCH 17/74] cpufreq: brcmstb-avs-cpufreq: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Reviewed-by: Florian Fainelli Signed-off-by: Viresh Kumar --- drivers/cpufreq/brcmstb-avs-cpufreq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index ffea6402189d..1bdd513bcd19 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -749,13 +749,11 @@ static int brcm_avs_cpufreq_probe(struct platform_device *pdev) return ret; } -static int brcm_avs_cpufreq_remove(struct platform_device *pdev) +static void brcm_avs_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&brcm_avs_driver); brcm_avs_prepare_uninit(pdev); - - return 0; } static const struct of_device_id brcm_avs_cpufreq_match[] = { @@ -770,7 +768,7 @@ static struct platform_driver brcm_avs_cpufreq_platdrv = { .of_match_table = brcm_avs_cpufreq_match, }, .probe = brcm_avs_cpufreq_probe, - .remove = brcm_avs_cpufreq_remove, + .remove_new = brcm_avs_cpufreq_remove, }; module_platform_driver(brcm_avs_cpufreq_platdrv); From 8d09c46a494af814e059c77f4a1a3319776594e0 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:13 +0800 Subject: [PATCH 18/74] cpufreq: imx-cpufreq-dt: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/imx-cpufreq-dt.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/imx-cpufreq-dt.c b/drivers/cpufreq/imx-cpufreq-dt.c index 535867a7dfdd..577bb9e2f112 100644 --- a/drivers/cpufreq/imx-cpufreq-dt.c +++ b/drivers/cpufreq/imx-cpufreq-dt.c @@ -172,20 +172,18 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev) return 0; } -static int imx_cpufreq_dt_remove(struct platform_device *pdev) +static void imx_cpufreq_dt_remove(struct platform_device *pdev) { platform_device_unregister(cpufreq_dt_pdev); if (!of_machine_is_compatible("fsl,imx7ulp")) dev_pm_opp_put_supported_hw(cpufreq_opp_token); else clk_bulk_put(ARRAY_SIZE(imx7ulp_clks), imx7ulp_clks); - - return 0; } static struct platform_driver imx_cpufreq_dt_driver = { .probe = imx_cpufreq_dt_probe, - .remove = imx_cpufreq_dt_remove, + .remove_new = imx_cpufreq_dt_remove, .driver = { .name = "imx-cpufreq-dt", }, From 463ff6d4d42d5672b0835e10a8144ba1832c497f Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:14 +0800 Subject: [PATCH 19/74] cpufreq: davinci: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/davinci-cpufreq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c index ebb3a8102681..7d2754411d8c 100644 --- a/drivers/cpufreq/davinci-cpufreq.c +++ b/drivers/cpufreq/davinci-cpufreq.c @@ -131,7 +131,7 @@ static int __init davinci_cpufreq_probe(struct platform_device *pdev) return cpufreq_register_driver(&davinci_driver); } -static int __exit davinci_cpufreq_remove(struct platform_device *pdev) +static void __exit davinci_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&davinci_driver); @@ -139,15 +139,13 @@ static int __exit davinci_cpufreq_remove(struct platform_device *pdev) if (cpufreq.asyncclk) clk_put(cpufreq.asyncclk); - - return 0; } static struct platform_driver davinci_cpufreq_driver = { .driver = { .name = "cpufreq-davinci", }, - .remove = __exit_p(davinci_cpufreq_remove), + .remove_new = __exit_p(davinci_cpufreq_remove), }; int __init davinci_cpufreq_init(void) From 46ebd4d865f4383ce3c45dc97d581614f84a6ac2 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:15 +0800 Subject: [PATCH 20/74] cpufreq: raspberrypi: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Reviewed-by: Florian Fainelli Signed-off-by: Viresh Kumar --- drivers/cpufreq/raspberrypi-cpufreq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/raspberrypi-cpufreq.c b/drivers/cpufreq/raspberrypi-cpufreq.c index 2bc7d9734272..e0705cc9a57d 100644 --- a/drivers/cpufreq/raspberrypi-cpufreq.c +++ b/drivers/cpufreq/raspberrypi-cpufreq.c @@ -65,7 +65,7 @@ static int raspberrypi_cpufreq_probe(struct platform_device *pdev) return ret; } -static int raspberrypi_cpufreq_remove(struct platform_device *pdev) +static void raspberrypi_cpufreq_remove(struct platform_device *pdev) { struct device *cpu_dev; @@ -74,8 +74,6 @@ static int raspberrypi_cpufreq_remove(struct platform_device *pdev) dev_pm_opp_remove_all_dynamic(cpu_dev); platform_device_unregister(cpufreq_dt); - - return 0; } /* @@ -87,7 +85,7 @@ static struct platform_driver raspberrypi_cpufreq_driver = { .name = "raspberrypi-cpufreq", }, .probe = raspberrypi_cpufreq_probe, - .remove = raspberrypi_cpufreq_remove, + .remove_new = raspberrypi_cpufreq_remove, }; module_platform_driver(raspberrypi_cpufreq_driver); From d0988eaaa31d29c6bff1d079b42ea6c2ef9cab5e Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:16 +0800 Subject: [PATCH 21/74] cpufreq: pcc-cpufreq: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Acked-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar --- drivers/cpufreq/pcc-cpufreq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index 73efbcf5513b..84fe37def0f1 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -608,22 +608,20 @@ static int __init pcc_cpufreq_probe(struct platform_device *pdev) return ret; } -static int pcc_cpufreq_remove(struct platform_device *pdev) +static void pcc_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&pcc_cpufreq_driver); pcc_clear_mapping(); free_percpu(pcc_cpu_info); - - return 0; } static struct platform_driver pcc_cpufreq_platdrv = { .driver = { .name = "pcc-cpufreq", }, - .remove = pcc_cpufreq_remove, + .remove_new = pcc_cpufreq_remove, }; static int __init pcc_cpufreq_init(void) From cc35f433543f3db25da37e6a663f07f970a03caf Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:17 +0800 Subject: [PATCH 22/74] cpufreq: kirkwood: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/kirkwood-cpufreq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c index 95588101efbd..fd20b986d1f2 100644 --- a/drivers/cpufreq/kirkwood-cpufreq.c +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -178,20 +178,18 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) return err; } -static int kirkwood_cpufreq_remove(struct platform_device *pdev) +static void kirkwood_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&kirkwood_cpufreq_driver); clk_disable_unprepare(priv.powersave_clk); clk_disable_unprepare(priv.ddr_clk); clk_disable_unprepare(priv.cpu_clk); - - return 0; } static struct platform_driver kirkwood_cpufreq_platform_driver = { .probe = kirkwood_cpufreq_probe, - .remove = kirkwood_cpufreq_remove, + .remove_new = kirkwood_cpufreq_remove, .driver = { .name = "kirkwood-cpufreq", }, From 402732324b17a31f7e5dce9659d1b1f049fd65d3 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:18 +0800 Subject: [PATCH 23/74] cpufreq: qcom-nvmem: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-nvmem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index 4590c2570086..84d7033e5efe 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -333,7 +333,7 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) return ret; } -static int qcom_cpufreq_remove(struct platform_device *pdev) +static void qcom_cpufreq_remove(struct platform_device *pdev) { struct qcom_cpufreq_drv *drv = platform_get_drvdata(pdev); unsigned int cpu; @@ -345,13 +345,11 @@ static int qcom_cpufreq_remove(struct platform_device *pdev) kfree(drv->opp_tokens); kfree(drv); - - return 0; } static struct platform_driver qcom_cpufreq_driver = { .probe = qcom_cpufreq_probe, - .remove = qcom_cpufreq_remove, + .remove_new = qcom_cpufreq_remove, .driver = { .name = "qcom-cpufreq-nvmem", }, From b196622358c2dcdd70e0984eacf7105615a245be Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:19 +0800 Subject: [PATCH 24/74] cpufreq: tegra186: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra186-cpufreq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c index f98f53bf1011..7b8fcfa55038 100644 --- a/drivers/cpufreq/tegra186-cpufreq.c +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -259,11 +259,9 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev) return err; } -static int tegra186_cpufreq_remove(struct platform_device *pdev) +static void tegra186_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&tegra186_cpufreq_driver); - - return 0; } static const struct of_device_id tegra186_cpufreq_of_match[] = { @@ -278,7 +276,7 @@ static struct platform_driver tegra186_cpufreq_platform_driver = { .of_match_table = tegra186_cpufreq_of_match, }, .probe = tegra186_cpufreq_probe, - .remove = tegra186_cpufreq_remove, + .remove_new = tegra186_cpufreq_remove, }; module_platform_driver(tegra186_cpufreq_platform_driver); From 1cd04adf97e9e7da5872d4535d25b6ec04c19d4f Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:20 +0800 Subject: [PATCH 25/74] cpufreq: acpi: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Acked-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar --- drivers/cpufreq/acpi-cpufreq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index b2f05d27167e..37f1cdf46d29 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -1011,22 +1011,20 @@ static int __init acpi_cpufreq_probe(struct platform_device *pdev) return ret; } -static int acpi_cpufreq_remove(struct platform_device *pdev) +static void acpi_cpufreq_remove(struct platform_device *pdev) { pr_debug("%s\n", __func__); cpufreq_unregister_driver(&acpi_cpufreq_driver); free_acpi_perf_data(); - - return 0; } static struct platform_driver acpi_cpufreq_platdrv = { .driver = { .name = "acpi-cpufreq", }, - .remove = acpi_cpufreq_remove, + .remove_new = acpi_cpufreq_remove, }; static int __init acpi_cpufreq_init(void) From a8cf9284a794abdaadc91e409e9f233621ea07f8 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:21 +0800 Subject: [PATCH 26/74] cpufreq: qoriq: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/qoriq-cpufreq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index 573b417e1483..0aecaecbb0e6 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -288,11 +288,9 @@ static int qoriq_cpufreq_probe(struct platform_device *pdev) return 0; } -static int qoriq_cpufreq_remove(struct platform_device *pdev) +static void qoriq_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&qoriq_cpufreq_driver); - - return 0; } static struct platform_driver qoriq_cpufreq_platform_driver = { @@ -300,7 +298,7 @@ static struct platform_driver qoriq_cpufreq_platform_driver = { .name = "qoriq-cpufreq", }, .probe = qoriq_cpufreq_probe, - .remove = qoriq_cpufreq_remove, + .remove_new = qoriq_cpufreq_remove, }; module_platform_driver(qoriq_cpufreq_platform_driver); From fd459406b99d4a0c641a02aaaf56b585571566cf Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:22 +0800 Subject: [PATCH 27/74] cpufreq: omap: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/omap-cpufreq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index 81649a1969b6..895690856665 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -182,11 +182,9 @@ static int omap_cpufreq_probe(struct platform_device *pdev) return cpufreq_register_driver(&omap_driver); } -static int omap_cpufreq_remove(struct platform_device *pdev) +static void omap_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&omap_driver); - - return 0; } static struct platform_driver omap_cpufreq_platdrv = { @@ -194,7 +192,7 @@ static struct platform_driver omap_cpufreq_platdrv = { .name = "omap-cpufreq", }, .probe = omap_cpufreq_probe, - .remove = omap_cpufreq_remove, + .remove_new = omap_cpufreq_remove, }; module_platform_driver(omap_cpufreq_platdrv); From 3e767d6850f867cc33ac16ca097350a1d2417982 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 18 Jul 2023 11:17:26 +0100 Subject: [PATCH 28/74] powercap: arm_scmi: Remove recursion while parsing zones Powercap zones can be defined as arranged in a hierarchy of trees and when registering a zone with powercap_register_zone(), the kernel powercap subsystem expects this to happen starting from the root zones down to the leaves; on the other side, de-registration by powercap_deregister_zone() must begin from the leaf zones. Available SCMI powercap zones are retrieved dynamically from the platform at probe time and, while any defined hierarchy between the zones is described properly in the zones descriptor, the platform returns the availables zones with no particular well-defined order: as a consequence, the trees possibly composing the hierarchy of zones have to be somehow walked properly to register the retrieved zones from the root. Currently the ARM SCMI Powercap driver walks the zones using a recursive algorithm; this approach, even though correct and tested can lead to kernel stack overflow when processing a returned hierarchy of zones composed by particularly high trees. Avoid possible kernel stack overflow by substituting the recursive approach with an iterative one supported by a dynamically allocated stack-like data structure. Fixes: b55eef5226b7 ("powercap: arm_scmi: Add SCMI Powercap based driver") Signed-off-by: Cristian Marussi Acked-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- drivers/powercap/arm_scmi_powercap.c | 159 ++++++++++++++++----------- 1 file changed, 92 insertions(+), 67 deletions(-) diff --git a/drivers/powercap/arm_scmi_powercap.c b/drivers/powercap/arm_scmi_powercap.c index 5231f6d52ae3..a081f177e702 100644 --- a/drivers/powercap/arm_scmi_powercap.c +++ b/drivers/powercap/arm_scmi_powercap.c @@ -12,6 +12,7 @@ #include #include #include +#include #define to_scmi_powercap_zone(z) \ container_of(z, struct scmi_powercap_zone, zone) @@ -19,6 +20,8 @@ static const struct scmi_powercap_proto_ops *powercap_ops; struct scmi_powercap_zone { + bool registered; + bool invalid; unsigned int height; struct device *dev; struct scmi_protocol_handle *ph; @@ -32,6 +35,7 @@ struct scmi_powercap_root { unsigned int num_zones; struct scmi_powercap_zone *spzones; struct list_head *registered_zones; + struct list_head scmi_zones; }; static struct powercap_control_type *scmi_top_pcntrl; @@ -271,12 +275,6 @@ static void scmi_powercap_unregister_all_zones(struct scmi_powercap_root *pr) } } -static inline bool -scmi_powercap_is_zone_registered(struct scmi_powercap_zone *spz) -{ - return !list_empty(&spz->node); -} - static inline unsigned int scmi_powercap_get_zone_height(struct scmi_powercap_zone *spz) { @@ -295,11 +293,46 @@ scmi_powercap_get_parent_zone(struct scmi_powercap_zone *spz) return &spz->spzones[spz->info->parent_id]; } +static int scmi_powercap_register_zone(struct scmi_powercap_root *pr, + struct scmi_powercap_zone *spz, + struct scmi_powercap_zone *parent) +{ + int ret = 0; + struct powercap_zone *z; + + if (spz->invalid) { + list_del(&spz->node); + return -EINVAL; + } + + z = powercap_register_zone(&spz->zone, scmi_top_pcntrl, spz->info->name, + parent ? &parent->zone : NULL, + &zone_ops, 1, &constraint_ops); + if (!IS_ERR(z)) { + spz->height = scmi_powercap_get_zone_height(spz); + spz->registered = true; + list_move(&spz->node, &pr->registered_zones[spz->height]); + dev_dbg(spz->dev, "Registered node %s - parent %s - height:%d\n", + spz->info->name, parent ? parent->info->name : "ROOT", + spz->height); + } else { + list_del(&spz->node); + ret = PTR_ERR(z); + dev_err(spz->dev, + "Error registering node:%s - parent:%s - h:%d - ret:%d\n", + spz->info->name, + parent ? parent->info->name : "ROOT", + spz->height, ret); + } + + return ret; +} + /** - * scmi_powercap_register_zone - Register an SCMI powercap zone recursively + * scmi_zones_register- Register SCMI powercap zones starting from parent zones * + * @dev: A reference to the SCMI device * @pr: A reference to the root powercap zones descriptors - * @spz: A reference to the SCMI powercap zone to register * * When registering SCMI powercap zones with the powercap framework we should * take care to always register zones starting from the root ones and to @@ -309,10 +342,10 @@ scmi_powercap_get_parent_zone(struct scmi_powercap_zone *spz) * zones provided by the SCMI platform firmware is built to comply with such * requirement. * - * This function, given an SCMI powercap zone to register, takes care to walk - * the SCMI powercap zones tree up to the root looking recursively for - * unregistered parent zones before registering the provided zone; at the same - * time each registered zone height in such a tree is accounted for and each + * This function, given the set of SCMI powercap zones to register, takes care + * to walk the SCMI powercap zones trees up to the root registering any + * unregistered parent zone before registering the child zones; at the same + * time each registered-zone height in such a tree is accounted for and each * zone, once registered, is stored in the @registered_zones array that is * indexed by zone height: this way will be trivial, at unregister time, to walk * the @registered_zones array backward and unregister all the zones starting @@ -330,57 +363,55 @@ scmi_powercap_get_parent_zone(struct scmi_powercap_zone *spz) * * Return: 0 on Success */ -static int scmi_powercap_register_zone(struct scmi_powercap_root *pr, - struct scmi_powercap_zone *spz) +static int scmi_zones_register(struct device *dev, + struct scmi_powercap_root *pr) { int ret = 0; - struct scmi_powercap_zone *parent; + unsigned int sp = 0, reg_zones = 0; + struct scmi_powercap_zone *spz, **zones_stack; - if (!spz->info) - return ret; + zones_stack = kcalloc(pr->num_zones, sizeof(spz), GFP_KERNEL); + if (!zones_stack) + return -ENOMEM; - parent = scmi_powercap_get_parent_zone(spz); - if (parent && !scmi_powercap_is_zone_registered(parent)) { - /* - * Bail out if a parent domain was marked as unsupported: - * only domains participating as leaves can be skipped. - */ - if (!parent->info) - return -ENODEV; + spz = list_first_entry_or_null(&pr->scmi_zones, + struct scmi_powercap_zone, node); + while (spz) { + struct scmi_powercap_zone *parent; - ret = scmi_powercap_register_zone(pr, parent); - if (ret) - return ret; - } - - if (!scmi_powercap_is_zone_registered(spz)) { - struct powercap_zone *z; - - z = powercap_register_zone(&spz->zone, - scmi_top_pcntrl, - spz->info->name, - parent ? &parent->zone : NULL, - &zone_ops, 1, &constraint_ops); - if (!IS_ERR(z)) { - spz->height = scmi_powercap_get_zone_height(spz); - list_add(&spz->node, - &pr->registered_zones[spz->height]); - dev_dbg(spz->dev, - "Registered node %s - parent %s - height:%d\n", - spz->info->name, - parent ? parent->info->name : "ROOT", - spz->height); - ret = 0; + parent = scmi_powercap_get_parent_zone(spz); + if (parent && !parent->registered) { + zones_stack[sp++] = spz; + spz = parent; } else { - ret = PTR_ERR(z); - dev_err(spz->dev, - "Error registering node:%s - parent:%s - h:%d - ret:%d\n", - spz->info->name, - parent ? parent->info->name : "ROOT", - spz->height, ret); + ret = scmi_powercap_register_zone(pr, spz, parent); + if (!ret) { + reg_zones++; + } else if (sp) { + /* Failed to register a non-leaf zone. + * Bail-out. + */ + dev_err(dev, + "Failed to register non-leaf zone - ret:%d\n", + ret); + scmi_powercap_unregister_all_zones(pr); + reg_zones = 0; + goto out; + } + /* Pick next zone to process */ + if (sp) + spz = zones_stack[--sp]; + else + spz = list_first_entry_or_null(&pr->scmi_zones, + struct scmi_powercap_zone, + node); } } +out: + kfree(zones_stack); + dev_info(dev, "Registered %d SCMI Powercap domains !\n", reg_zones); + return ret; } @@ -424,6 +455,8 @@ static int scmi_powercap_probe(struct scmi_device *sdev) if (!pr->registered_zones) return -ENOMEM; + INIT_LIST_HEAD(&pr->scmi_zones); + for (i = 0, spz = pr->spzones; i < pr->num_zones; i++, spz++) { /* * Powercap domains are validate by the protocol layer, i.e. @@ -438,6 +471,7 @@ static int scmi_powercap_probe(struct scmi_device *sdev) INIT_LIST_HEAD(&spz->node); INIT_LIST_HEAD(&pr->registered_zones[i]); + list_add_tail(&spz->node, &pr->scmi_zones); /* * Forcibly skip powercap domains using an abstract scale. * Note that only leaves domains can be skipped, so this could @@ -448,7 +482,7 @@ static int scmi_powercap_probe(struct scmi_device *sdev) dev_warn(dev, "Abstract power scale not supported. Skip %s.\n", spz->info->name); - spz->info = NULL; + spz->invalid = true; continue; } } @@ -457,21 +491,12 @@ static int scmi_powercap_probe(struct scmi_device *sdev) * Scan array of retrieved SCMI powercap domains and register them * recursively starting from the root domains. */ - for (i = 0, spz = pr->spzones; i < pr->num_zones; i++, spz++) { - ret = scmi_powercap_register_zone(pr, spz); - if (ret) { - dev_err(dev, - "Failed to register powercap zone %s - ret:%d\n", - spz->info->name, ret); - scmi_powercap_unregister_all_zones(pr); - return ret; - } - } + ret = scmi_zones_register(dev, pr); + if (ret) + return ret; dev_set_drvdata(dev, pr); - dev_info(dev, "Registered %d SCMI Powercap domains !\n", pr->num_zones); - return ret; } From 754833b3194c30dad5af0145e25192a8e29521ab Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 21 Jul 2023 13:34:54 +0530 Subject: [PATCH 29/74] OPP: Rearrange entries in pm_opp.h Rearrange the helper function declarations / definitions to keep them in order of freq, level and then bw. Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index dc1fb5890792..3821f50b9b89 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -121,17 +121,19 @@ unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev); struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available); + struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq); -struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, - unsigned int level); -struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, - unsigned int *level); - struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq); +struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, + unsigned int level); + +struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, + unsigned int *level); + struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw, int index); @@ -247,18 +249,6 @@ static inline unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev) return 0; } -static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, - unsigned int level) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, - unsigned int *level) -{ - return ERR_PTR(-EOPNOTSUPP); -} - static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available) { @@ -277,6 +267,18 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, + unsigned int level) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, + unsigned int *level) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw, int index) { From 142e17c1c2b48e3fb4f024e62ab6dee18f268694 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Jul 2023 11:10:52 +0530 Subject: [PATCH 30/74] OPP: Introduce dev_pm_opp_find_freq_{ceil/floor}_indexed() APIs In the case of devices with multiple clocks, drivers need to specify the clock index for the OPP framework to find the OPP corresponding to the floor/ceil of the supplied frequency. So let's introduce the two new APIs accepting the clock index as an argument. These APIs use the exising _find_key_ceil() helper by supplying the clock index to it. Signed-off-by: Manivannan Sadhasivam [ Viresh: Rearranged definitions in pm_opp.h ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 56 ++++++++++++++++++++++++++++++++++++++++++ include/linux/pm_opp.h | 18 ++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 3f46e499d615..cb4f47b341f9 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -658,6 +658,34 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil); +/** + * dev_pm_opp_find_freq_ceil_indexed() - Search for a rounded ceil freq for the + * clock corresponding to the index + * @dev: Device for which we do this operation + * @freq: Start frequency + * @index: Clock index + * + * Search for the matching ceil *available* OPP for the clock corresponding to + * the specified index from a starting freq for a device. + * + * Return: matching *opp and refreshes *freq accordingly, else returns + * ERR_PTR in case of error and should be handled using IS_ERR. Error return + * values can be: + * EINVAL: for bad pointer + * ERANGE: no match found for search + * ENODEV: if device not found in list of registered devices + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp * +dev_pm_opp_find_freq_ceil_indexed(struct device *dev, unsigned long *freq, + u32 index) +{ + return _find_key_ceil(dev, freq, index, true, _read_freq, NULL); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil_indexed); + /** * dev_pm_opp_find_freq_floor() - Search for a rounded floor freq * @dev: device for which we do this operation @@ -683,6 +711,34 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); +/** + * dev_pm_opp_find_freq_floor_indexed() - Search for a rounded floor freq for the + * clock corresponding to the index + * @dev: Device for which we do this operation + * @freq: Start frequency + * @index: Clock index + * + * Search for the matching floor *available* OPP for the clock corresponding to + * the specified index from a starting freq for a device. + * + * Return: matching *opp and refreshes *freq accordingly, else returns + * ERR_PTR in case of error and should be handled using IS_ERR. Error return + * values can be: + * EINVAL: for bad pointer + * ERANGE: no match found for search + * ENODEV: if device not found in list of registered devices + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp * +dev_pm_opp_find_freq_floor_indexed(struct device *dev, unsigned long *freq, + u32 index) +{ + return _find_key_floor(dev, freq, index, true, _read_freq, NULL); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor_indexed); + /** * dev_pm_opp_find_level_exact() - search for an exact level * @dev: device for which we do this operation diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 3821f50b9b89..2617f2c51f29 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -125,9 +125,15 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq); +struct dev_pm_opp *dev_pm_opp_find_freq_floor_indexed(struct device *dev, + unsigned long *freq, u32 index); + struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq); +struct dev_pm_opp *dev_pm_opp_find_freq_ceil_indexed(struct device *dev, + unsigned long *freq, u32 index); + struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, unsigned int level); @@ -261,12 +267,24 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp * +dev_pm_opp_find_freq_floor_indexed(struct device *dev, unsigned long *freq, u32 index) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq) { return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp * +dev_pm_opp_find_freq_ceil_indexed(struct device *dev, unsigned long *freq, u32 index) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, unsigned int level) { From 9027f2e79788ddee716674ad9af3cdc076de18cc Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 14 Jul 2023 11:44:25 -0600 Subject: [PATCH 31/74] PM / devfreq: Explicitly include correct DT includes The DT of_device.h and of_platform.h date back to the separate of_platform_bus_type before it as merged into the regular platform bus. As part of that merge prepping Arm DT support 13 years ago, they "temporarily" include each other. They also include platform_device.h and of.h. As a result, there's a pretty much random mix of those include files used throughout the tree. In order to detangle these headers and replace the implicit includes with struct declarations, users need to explicitly include the correct includes. Signed-off-by: Rob Herring Signed-off-by: Chanwoo Choi --- drivers/devfreq/imx-bus.c | 2 +- drivers/devfreq/imx8m-ddrc.c | 2 +- drivers/devfreq/mtk-cci-devfreq.c | 1 - drivers/devfreq/tegra30-devfreq.c | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/devfreq/imx-bus.c b/drivers/devfreq/imx-bus.c index a727067980fb..86850b7dea09 100644 --- a/drivers/devfreq/imx-bus.c +++ b/drivers/devfreq/imx-bus.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/devfreq/imx8m-ddrc.c b/drivers/devfreq/imx8m-ddrc.c index 16636973eb10..e1348490c8aa 100644 --- a/drivers/devfreq/imx8m-ddrc.c +++ b/drivers/devfreq/imx8m-ddrc.c @@ -3,9 +3,9 @@ * Copyright 2019 NXP */ +#include #include #include -#include #include #include #include diff --git a/drivers/devfreq/mtk-cci-devfreq.c b/drivers/devfreq/mtk-cci-devfreq.c index 6354622eda65..83a73f0ccd80 100644 --- a/drivers/devfreq/mtk-cci-devfreq.c +++ b/drivers/devfreq/mtk-cci-devfreq.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index 503376b894b6..4a4f0106ab9d 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include From c46de2fb4ad9e2e5d9041953e87d71ee762fc160 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Jul 2023 14:18:54 +0530 Subject: [PATCH 32/74] PM / devfreq: Reword the kernel-doc comment for devfreq_monitor_start() API Current kernel-doc comment doesn't specify the default timer used for the load monitoring. Also, it uses the term "default delayed work" which could be misunderstood as "default delayer timer". So reword the comment to clearly specify the default timer and also reword the last sentence to make it more understandable. Signed-off-by: Manivannan Sadhasivam Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index e36cbb920ec8..db50c93c0ac3 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -472,10 +472,11 @@ static void devfreq_monitor(struct work_struct *work) * devfreq_monitor_start() - Start load monitoring of devfreq instance * @devfreq: the devfreq instance. * - * Helper function for starting devfreq device load monitoring. By - * default delayed work based monitoring is supported. Function - * to be called from governor in response to DEVFREQ_GOV_START - * event when device is added to devfreq framework. + * Helper function for starting devfreq device load monitoring. By default, + * deferrable timer is used for load monitoring. But the users can change this + * behavior using the "timer" type in devfreq_dev_profile. This function will be + * called by devfreq governor in response to the DEVFREQ_GOV_START event + * generated while adding a device to the devfreq framework. */ void devfreq_monitor_start(struct devfreq *devfreq) { From 005e8dddd4978f6243820d031987056a1a88a2dd Mon Sep 17 00:00:00 2001 From: Brian Geffon Date: Fri, 14 Jul 2023 13:55:05 -0400 Subject: [PATCH 33/74] PM: hibernate: don't store zero pages in the image file On ChromeOS we've observed a considerable number of in-use pages filled with zeros. Today with hibernate it's entirely possible that saveable pages are just zero filled. Since we're already copying pages word-by-word in do_copy_page it becomes almost free to determine if a page was completely filled with zeros. This change introduces a new bitmap which will track these zero pages. If a page is zero it will not be included in the saved image, instead to track these zero pages in the image file we will introduce a new flag which we will set on the packed PFN list. When reading back in the image file we will detect these zero page PFNs and rebuild the zero page bitmap. When the image is being loaded through calls to write_next_page if we encounter a zero page we will silently memset it to 0 and then continue on to the next page. Given the implementation in snapshot_read_next/snapshot_write_next this change will be transparent to non-compressed/compressed and swsusp modes of operation. To provide some concrete numbers from simple ad-hoc testing, on a device which was lightly in use we saw that: PM: hibernation: Image created (964408 pages copied, 548304 zero pages) Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero filled and could be tracked entirely within the packed PFN list. The savings would obviously be much lower for lzo compressed images, but even in the case of compression not copying pages across to the compression threads will still speed things up. It's also possible that we would see better overall compression ratios as larger regions of "real data" would improve the compressibility. Finally, such an approach could dramatically improve swsusp performance as each one of those zero pages requires a write syscall to reload, by handling it as part of the packed PFN list we're able to fully avoid that. Signed-off-by: Brian Geffon [ rjw: Whitespace adjustments, removal of redundant parentheses ] Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 187 ++++++++++++++++++++++++++++++++-------- 1 file changed, 149 insertions(+), 38 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 0415d5ecb977..87e9f7e2bdc0 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -404,6 +404,7 @@ struct bm_position { struct mem_zone_bm_rtree *zone; struct rtree_node *node; unsigned long node_pfn; + unsigned long cur_pfn; int node_bit; }; @@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm) bm->cur.node = list_entry(bm->cur.zone->leaves.next, struct rtree_node, list); bm->cur.node_pfn = 0; + bm->cur.cur_pfn = BM_END_OF_MAP; bm->cur.node_bit = 0; } @@ -799,6 +801,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, bm->cur.zone = zone; bm->cur.node = node; bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK; + bm->cur.cur_pfn = pfn; /* Set return values */ *addr = node->data; @@ -850,6 +853,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm) clear_bit(bit, bm->cur.node->data); } +static unsigned long memory_bm_get_current(struct memory_bitmap *bm) +{ + return bm->cur.cur_pfn; +} + static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; @@ -929,10 +937,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) if (bit < bits) { pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit; bm->cur.node_bit = bit + 1; + bm->cur.cur_pfn = pfn; return pfn; } } while (rtree_next_node(bm)); + bm->cur.cur_pfn = BM_END_OF_MAP; return BM_END_OF_MAP; } @@ -1423,14 +1433,19 @@ static unsigned int count_data_pages(void) /* * This is needed, because copy_page and memcpy are not usable for copying - * task structs. + * task structs. Returns true if the page was filled with only zeros, + * otherwise false. */ -static inline void do_copy_page(long *dst, long *src) +static inline bool do_copy_page(long *dst, long *src) { + long z = 0; int n; - for (n = PAGE_SIZE / sizeof(long); n; n--) + for (n = PAGE_SIZE / sizeof(long); n; n--) { + z |= *src; *dst++ = *src++; + } + return !z; } /** @@ -1439,17 +1454,21 @@ static inline void do_copy_page(long *dst, long *src) * Check if the page we are going to copy is marked as present in the kernel * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present() - * always returns 'true'. + * always returns 'true'. Returns true if the page was entirely composed of + * zeros, otherwise it will return false. */ -static void safe_copy_page(void *dst, struct page *s_page) +static bool safe_copy_page(void *dst, struct page *s_page) { + bool zeros_only; + if (kernel_page_present(s_page)) { - do_copy_page(dst, page_address(s_page)); + zeros_only = do_copy_page(dst, page_address(s_page)); } else { hibernate_map_page(s_page); - do_copy_page(dst, page_address(s_page)); + zeros_only = do_copy_page(dst, page_address(s_page)); hibernate_unmap_page(s_page); } + return zeros_only; } #ifdef CONFIG_HIGHMEM @@ -1459,17 +1478,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); } -static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) +static bool copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) { struct page *s_page, *d_page; void *src, *dst; + bool zeros_only; s_page = pfn_to_page(src_pfn); d_page = pfn_to_page(dst_pfn); if (PageHighMem(s_page)) { src = kmap_atomic(s_page); dst = kmap_atomic(d_page); - do_copy_page(dst, src); + zeros_only = do_copy_page(dst, src); kunmap_atomic(dst); kunmap_atomic(src); } else { @@ -1478,30 +1498,39 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) * The page pointed to by src may contain some kernel * data modified by kmap_atomic() */ - safe_copy_page(buffer, s_page); + zeros_only = safe_copy_page(buffer, s_page); dst = kmap_atomic(d_page); copy_page(dst, buffer); kunmap_atomic(dst); } else { - safe_copy_page(page_address(d_page), s_page); + zeros_only = safe_copy_page(page_address(d_page), s_page); } } + return zeros_only; } #else #define page_is_saveable(zone, pfn) saveable_page(zone, pfn) -static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) +static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) { - safe_copy_page(page_address(pfn_to_page(dst_pfn)), + return safe_copy_page(page_address(pfn_to_page(dst_pfn)), pfn_to_page(src_pfn)); } #endif /* CONFIG_HIGHMEM */ -static void copy_data_pages(struct memory_bitmap *copy_bm, - struct memory_bitmap *orig_bm) +/* + * Copy data pages will copy all pages into pages pulled from the copy_bm. + * If a page was entirely filled with zeros it will be marked in the zero_bm. + * + * Returns the number of pages copied. + */ +static unsigned long copy_data_pages(struct memory_bitmap *copy_bm, + struct memory_bitmap *orig_bm, + struct memory_bitmap *zero_bm) { + unsigned long copied_pages = 0; struct zone *zone; - unsigned long pfn; + unsigned long pfn, copy_pfn; for_each_populated_zone(zone) { unsigned long max_zone_pfn; @@ -1514,18 +1543,29 @@ static void copy_data_pages(struct memory_bitmap *copy_bm, } memory_bm_position_reset(orig_bm); memory_bm_position_reset(copy_bm); + copy_pfn = memory_bm_next_pfn(copy_bm); for(;;) { pfn = memory_bm_next_pfn(orig_bm); if (unlikely(pfn == BM_END_OF_MAP)) break; - copy_data_page(memory_bm_next_pfn(copy_bm), pfn); + if (copy_data_page(copy_pfn, pfn)) { + memory_bm_set_bit(zero_bm, pfn); + /* Use this copy_pfn for a page that is not full of zeros */ + continue; + } + copied_pages++; + copy_pfn = memory_bm_next_pfn(copy_bm); } + return copied_pages; } /* Total number of image pages */ static unsigned int nr_copy_pages; /* Number of pages needed for saving the original pfns of the image pages */ static unsigned int nr_meta_pages; +/* Number of zero pages */ +static unsigned int nr_zero_pages; + /* * Numbers of normal and highmem page frames allocated for hibernation image * before suspending devices. @@ -1546,6 +1586,9 @@ static struct memory_bitmap orig_bm; */ static struct memory_bitmap copy_bm; +/* Memory bitmap which tracks which saveable pages were zero filled. */ +static struct memory_bitmap zero_bm; + /** * swsusp_free - Free pages allocated for hibernation image. * @@ -1590,6 +1633,7 @@ void swsusp_free(void) out: nr_copy_pages = 0; nr_meta_pages = 0; + nr_zero_pages = 0; restore_pblist = NULL; buffer = NULL; alloc_normal = 0; @@ -1808,8 +1852,15 @@ int hibernate_preallocate_memory(void) goto err_out; } + error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY); + if (error) { + pr_err("Cannot allocate zero bitmap\n"); + goto err_out; + } + alloc_normal = 0; alloc_highmem = 0; + nr_zero_pages = 0; /* Count the number of saveable data pages. */ save_highmem = count_highmem_pages(); @@ -2089,19 +2140,19 @@ asmlinkage __visible int swsusp_save(void) * Kill them. */ drain_local_pages(NULL); - copy_data_pages(©_bm, &orig_bm); + nr_copy_pages = copy_data_pages(©_bm, &orig_bm, &zero_bm); /* * End of critical section. From now on, we can write to memory, * but we should not touch disk. This specially means we must _not_ * touch swap space! Except we must write out our image of course. */ - nr_pages += nr_highmem; - nr_copy_pages = nr_pages; + /* We don't actually copy the zero pages */ + nr_zero_pages = nr_pages - nr_copy_pages; nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); - pr_info("Image created (%d pages copied)\n", nr_pages); + pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages); return 0; } @@ -2146,15 +2197,22 @@ static int init_header(struct swsusp_info *info) return init_header_complete(info); } +#define ENCODED_PFN_ZERO_FLAG ((unsigned long)1 << (BITS_PER_LONG - 1)) +#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG) + /** * pack_pfns - Prepare PFNs for saving. * @bm: Memory bitmap. * @buf: Memory buffer to store the PFNs in. + * @zero_bm: Memory bitmap containing PFNs of zero pages. * * PFNs corresponding to set bits in @bm are stored in the area of memory - * pointed to by @buf (1 page at a time). + * pointed to by @buf (1 page at a time). Pages which were filled with only + * zeros will have the highest bit set in the packed format to distinguish + * them from PFNs which will be contained in the image file. */ -static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm) +static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm, + struct memory_bitmap *zero_bm) { int j; @@ -2162,6 +2220,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm) buf[j] = memory_bm_next_pfn(bm); if (unlikely(buf[j] == BM_END_OF_MAP)) break; + if (memory_bm_test_bit(zero_bm, buf[j])) + buf[j] |= ENCODED_PFN_ZERO_FLAG; } } @@ -2203,7 +2263,7 @@ int snapshot_read_next(struct snapshot_handle *handle) memory_bm_position_reset(©_bm); } else if (handle->cur <= nr_meta_pages) { clear_page(buffer); - pack_pfns(buffer, &orig_bm); + pack_pfns(buffer, &orig_bm, &zero_bm); } else { struct page *page; @@ -2299,24 +2359,35 @@ static int load_header(struct swsusp_info *info) * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap. * @bm: Memory bitmap. * @buf: Area of memory containing the PFNs. + * @zero_bm: Memory bitmap with the zero PFNs marked. * * For each element of the array pointed to by @buf (1 page at a time), set the - * corresponding bit in @bm. + * corresponding bit in @bm. If the page was originally populated with only + * zeros then a corresponding bit will also be set in @zero_bm. */ -static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) +static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm, + struct memory_bitmap *zero_bm) { + unsigned long decoded_pfn; + bool zero; int j; for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { if (unlikely(buf[j] == BM_END_OF_MAP)) break; - if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) { - memory_bm_set_bit(bm, buf[j]); + zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG); + decoded_pfn = buf[j] & ENCODED_PFN_MASK; + if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) { + memory_bm_set_bit(bm, decoded_pfn); + if (zero) { + memory_bm_set_bit(zero_bm, decoded_pfn); + nr_zero_pages++; + } } else { - if (!pfn_valid(buf[j])) + if (!pfn_valid(decoded_pfn)) pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n", - (unsigned long long)PFN_PHYS(buf[j])); + (unsigned long long)PFN_PHYS(decoded_pfn)); return -EFAULT; } } @@ -2538,6 +2609,7 @@ static inline void free_highmem_data(void) {} * prepare_image - Make room for loading hibernation image. * @new_bm: Uninitialized memory bitmap structure. * @bm: Memory bitmap with unsafe pages marked. + * @zero_bm: Memory bitmap containing the zero pages. * * Use @bm to mark the pages that will be overwritten in the process of * restoring the system memory state from the suspend image ("unsafe" pages) @@ -2548,10 +2620,15 @@ static inline void free_highmem_data(void) {} * pages will be used for just yet. Instead, we mark them all as allocated and * create a lists of "safe" pages to be used later. On systems with high * memory a list of "safe" highmem pages is created too. + * + * Because it was not known which pages were unsafe when @zero_bm was created, + * make a copy of it and recreate it within safe pages. */ -static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) +static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm, + struct memory_bitmap *zero_bm) { unsigned int nr_pages, nr_highmem; + struct memory_bitmap tmp; struct linked_page *lp; int error; @@ -2568,6 +2645,24 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) duplicate_memory_bitmap(new_bm, bm); memory_bm_free(bm, PG_UNSAFE_KEEP); + + /* Make a copy of zero_bm so it can be created in safe pages */ + error = memory_bm_create(&tmp, GFP_ATOMIC, PG_ANY); + if (error) + goto Free; + + duplicate_memory_bitmap(&tmp, zero_bm); + memory_bm_free(zero_bm, PG_UNSAFE_KEEP); + + /* Recreate zero_bm in safe pages */ + error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE); + if (error) + goto Free; + + duplicate_memory_bitmap(zero_bm, &tmp); + memory_bm_free(&tmp, PG_UNSAFE_KEEP); + /* At this point zero_bm is in safe pages and it can be used for restoring. */ + if (nr_highmem > 0) { error = prepare_highmem_image(bm, &nr_highmem); if (error) @@ -2582,7 +2677,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) * * nr_copy_pages cannot be less than allocated_unsafe_pages too. */ - nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; + nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages; nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); while (nr_pages > 0) { lp = get_image_page(GFP_ATOMIC, PG_SAFE); @@ -2595,7 +2690,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) nr_pages--; } /* Preallocate memory for the image */ - nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; + nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages; while (nr_pages > 0) { lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); if (!lp) { @@ -2683,8 +2778,9 @@ int snapshot_write_next(struct snapshot_handle *handle) static struct chain_allocator ca; int error = 0; +next: /* Check if we have already loaded the entire image */ - if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) + if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) return 0; handle->sync_read = 1; @@ -2709,19 +2805,26 @@ int snapshot_write_next(struct snapshot_handle *handle) if (error) return error; + error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY); + if (error) + return error; + + nr_zero_pages = 0; + hibernate_restore_protection_begin(); } else if (handle->cur <= nr_meta_pages + 1) { - error = unpack_orig_pfns(buffer, ©_bm); + error = unpack_orig_pfns(buffer, ©_bm, &zero_bm); if (error) return error; if (handle->cur == nr_meta_pages + 1) { - error = prepare_image(&orig_bm, ©_bm); + error = prepare_image(&orig_bm, ©_bm, &zero_bm); if (error) return error; chain_init(&ca, GFP_ATOMIC, PG_SAFE); memory_bm_position_reset(&orig_bm); + memory_bm_position_reset(&zero_bm); restore_pblist = NULL; handle->buffer = get_buffer(&orig_bm, &ca); handle->sync_read = 0; @@ -2738,6 +2841,14 @@ int snapshot_write_next(struct snapshot_handle *handle) handle->sync_read = 0; } handle->cur++; + + /* Zero pages were not included in the image, memset it and move on. */ + if (handle->cur > nr_meta_pages + 1 && + memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) { + memset(handle->buffer, 0, PAGE_SIZE); + goto next; + } + return PAGE_SIZE; } @@ -2754,7 +2865,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle) copy_last_highmem_page(); hibernate_restore_protect_page(handle->buffer); /* Do that only if we have loaded the image entirely */ - if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { + if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) { memory_bm_recycle(&orig_bm); free_highmem_data(); } @@ -2763,7 +2874,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle) int snapshot_image_loaded(struct snapshot_handle *handle) { return !(!nr_copy_pages || !last_highmem_page_copied() || - handle->cur <= nr_meta_pages + nr_copy_pages); + handle->cur <= nr_meta_pages + nr_copy_pages + nr_zero_pages); } #ifdef CONFIG_HIGHMEM From 5f756d03e2c7db63c1df7148d7b1739f29ff1532 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Jul 2023 11:10:53 +0530 Subject: [PATCH 34/74] OPP: Introduce dev_pm_opp_get_freq_indexed() API In the case of devices with multiple clocks, drivers need to specify the frequency index for the OPP framework to get the specific frequency within the required OPP. So let's introduce the dev_pm_opp_get_freq_indexed() API accepting the frequency index as an argument. Signed-off-by: Manivannan Sadhasivam [ Viresh: Fixed potential access to NULL opp pointer ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 20 ++++++++++++++++++++ include/linux/pm_opp.h | 7 +++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index cb4f47b341f9..07341050d57f 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -197,6 +197,26 @@ unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) } EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq); +/** + * dev_pm_opp_get_freq_indexed() - Gets the frequency corresponding to an + * available opp with specified index + * @opp: opp for which frequency has to be returned for + * @index: index of the frequency within the required opp + * + * Return: frequency in hertz corresponding to the opp with specified index, + * else return 0 + */ +unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index) +{ + if (IS_ERR_OR_NULL(opp) || index >= opp->opp_table->clk_count) { + pr_err("%s: Invalid parameters\n", __func__); + return 0; + } + + return opp->rates[index]; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq_indexed); + /** * dev_pm_opp_get_level() - Gets the level corresponding to an available opp * @opp: opp for which level value has to be returned for diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 2617f2c51f29..a13a1705df57 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -105,6 +105,8 @@ unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp); unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp); +unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index); + unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp); unsigned int dev_pm_opp_get_required_pstate(struct dev_pm_opp *opp, @@ -213,6 +215,11 @@ static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) return 0; } +static inline unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index) +{ + return 0; +} + static inline unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp) { return 0; From a5893928bb179d67ca1d44a8f66c990480ba541d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 21 Jul 2023 13:41:20 +0530 Subject: [PATCH 35/74] OPP: Add dev_pm_opp_find_freq_exact_indexed() The indexed version of the API is added for other floor and ceil, add the same for exact as well for completeness. Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 28 ++++++++++++++++++++++++++++ include/linux/pm_opp.h | 11 +++++++++++ 2 files changed, 39 insertions(+) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 07341050d57f..2af958dc17ba 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -646,6 +646,34 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact); +/** + * dev_pm_opp_find_freq_exact_indexed() - Search for an exact freq for the + * clock corresponding to the index + * @dev: Device for which we do this operation + * @freq: frequency to search for + * @index: Clock index + * @available: true/false - match for available opp + * + * Search for the matching exact OPP for the clock corresponding to the + * specified index from a starting freq for a device. + * + * Return: matching *opp , else returns ERR_PTR in case of error and should be + * handled using IS_ERR. Error return values can be: + * EINVAL: for bad pointer + * ERANGE: no match found for search + * ENODEV: if device not found in list of registered devices + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp * +dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq, + u32 index, bool available) +{ + return _find_key_exact(dev, freq, index, available, _read_freq, NULL); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact_indexed); + static noinline struct dev_pm_opp *_find_freq_ceil(struct opp_table *opp_table, unsigned long *freq) { diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index a13a1705df57..23e4e4eaaa42 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -124,6 +124,10 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available); +struct dev_pm_opp * +dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq, + u32 index, bool available); + struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq); @@ -268,6 +272,13 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp * +dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq, + u32 index, bool available) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq) { From 034d6aac2160b732d58d82e34aaf2c058a8f72fa Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 21 Jul 2023 14:35:08 +0530 Subject: [PATCH 36/74] OPP: Update _read_freq() to return the correct frequency Now that we support finding indexed frequencies, lets update _read_freq() to return the right one. Signed-off-by: Viresh Kumar Acked-by: Manivannan Sadhasivam --- drivers/opp/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 2af958dc17ba..b83f651680e7 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -470,7 +470,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count); /* Helpers to read keys */ static unsigned long _read_freq(struct dev_pm_opp *opp, int index) { - return opp->rates[0]; + return opp->rates[index]; } static unsigned long _read_level(struct dev_pm_opp *opp, int index) From 746de8255076c9924ffa51baad9822adddccb94e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 21 Jul 2023 14:55:07 +0530 Subject: [PATCH 37/74] OPP: Reuse dev_pm_opp_get_freq_indexed() Reuse dev_pm_opp_get_freq_indexed() from dev_pm_opp_get_freq(). Signed-off-by: Viresh Kumar Acked-by: Manivannan Sadhasivam --- drivers/opp/core.c | 21 --------------------- include/linux/pm_opp.h | 12 +++++------- 2 files changed, 5 insertions(+), 28 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index b83f651680e7..0ad8664d2423 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -176,27 +176,6 @@ unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp) } EXPORT_SYMBOL_GPL(dev_pm_opp_get_power); -/** - * dev_pm_opp_get_freq() - Gets the frequency corresponding to an available opp - * @opp: opp for which frequency has to be returned for - * - * Return: frequency in hertz corresponding to the opp, else - * return 0 - */ -unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) -{ - if (IS_ERR_OR_NULL(opp)) { - pr_err("%s: Invalid parameters\n", __func__); - return 0; - } - - if (!assert_single_clk(opp->opp_table)) - return 0; - - return opp->rates[0]; -} -EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq); - /** * dev_pm_opp_get_freq_indexed() - Gets the frequency corresponding to an * available opp with specified index diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 23e4e4eaaa42..91f87d7e807c 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -103,8 +103,6 @@ int dev_pm_opp_get_supplies(struct dev_pm_opp *opp, struct dev_pm_opp_supply *su unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp); -unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp); - unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index); unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp); @@ -214,11 +212,6 @@ static inline unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp) return 0; } -static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) -{ - return 0; -} - static inline unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index) { return 0; @@ -669,4 +662,9 @@ static inline void dev_pm_opp_put_prop_name(int token) dev_pm_opp_clear_config(token); } +static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) +{ + return dev_pm_opp_get_freq_indexed(opp, 0); +} + #endif /* __LINUX_OPP_H__ */ From 7ddd8deb1c3c0363a7e14fafb5df26e2089a69a5 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 21 Jul 2023 18:16:33 +0530 Subject: [PATCH 38/74] OPP: Fix potential null ptr dereference in dev_pm_opp_get_required_pstate() "opp" pointer is dereferenced before the IS_ERR_OR_NULL() check. Fix it by removing the dereference to cache opp_table and dereference it directly where opp_table is used. This fixes the following smatch warning: drivers/opp/core.c:232 dev_pm_opp_get_required_pstate() warn: variable dereferenced before IS_ERR check 'opp' (see line 230) Fixes: 84cb7ff35fcf ("OPP: pstate is only valid for genpd OPP tables") Signed-off-by: Manivannan Sadhasivam Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 0ad8664d2423..991c2e894b1a 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -226,20 +226,18 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_level); unsigned int dev_pm_opp_get_required_pstate(struct dev_pm_opp *opp, unsigned int index) { - struct opp_table *opp_table = opp->opp_table; - if (IS_ERR_OR_NULL(opp) || !opp->available || - index >= opp_table->required_opp_count) { + index >= opp->opp_table->required_opp_count) { pr_err("%s: Invalid parameters\n", __func__); return 0; } /* required-opps not fully initialized yet */ - if (lazy_linking_pending(opp_table)) + if (lazy_linking_pending(opp->opp_table)) return 0; /* The required OPP table must belong to a genpd */ - if (unlikely(!opp_table->required_opp_tables[index]->is_genpd)) { + if (unlikely(!opp->opp_table->required_opp_tables[index]->is_genpd)) { pr_err("%s: Performance state is only valid for genpds.\n", __func__); return 0; } From d920920f85a82c1c806a4143871a0e8f534732f2 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 21 Jul 2023 18:16:34 +0530 Subject: [PATCH 39/74] OPP: Fix passing 0 to PTR_ERR in _opp_attach_genpd() If dev_pm_domain_attach_by_name() returns NULL, then 0 will be passed to PTR_ERR() as reported by the smatch warning below: drivers/opp/core.c:2456 _opp_attach_genpd() warn: passing zero to 'PTR_ERR' Fix it by checking for the non-NULL virt_dev pointer before passing it to PTR_ERR. Otherwise return -ENODEV. Fixes: 4ea9496cbc95 ("opp: Fix error check in dev_pm_opp_attach_genpd()") Signed-off-by: Manivannan Sadhasivam Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 991c2e894b1a..919cc53bc02e 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -2460,7 +2460,7 @@ static int _opp_attach_genpd(struct opp_table *opp_table, struct device *dev, virt_dev = dev_pm_domain_attach_by_name(dev, *name); if (IS_ERR_OR_NULL(virt_dev)) { - ret = PTR_ERR(virt_dev) ? : -ENODEV; + ret = virt_dev ? PTR_ERR(virt_dev) : -ENODEV; dev_err(dev, "Couldn't attach to pm_domain: %d\n", ret); goto err; } From c2add32ce452e216f5b80269fe73d11be1e2c9a5 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Jul 2023 11:10:47 +0530 Subject: [PATCH 40/74] dt-bindings: opp: Increase maxItems for opp-hz property Current limit of 16 will be exhausted by platforms specifying the frequency for 9 clocks using opp-hz, like Qcom SDM845 SoC. For instance, specifying the frequency for 9 clocks with 64bit specifier as below would consume (9 * 2 = 18) items. opp-50000000 { opp-hz = /bits/ 64 <50000000>, /bits/ 64 <0>, /bits/ 64 <0>, /bits/ 64 <37500000>, /bits/ 64 <0>, /bits/ 64 <0>, /bits/ 64 <0>, /bits/ 64 <0>, /bits/ 64 <75000000>; }; So let's increase the limit to 32 which should be enough for most platforms (hopefully). Signed-off-by: Manivannan Sadhasivam Acked-by: Rob Herring Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/opp/opp-v2-base.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml index 47e6f36b7637..e2f8f7af3cf4 100644 --- a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml @@ -56,7 +56,7 @@ patternProperties: need to be configured and that is left for the implementation specific binding. minItems: 1 - maxItems: 16 + maxItems: 32 items: maxItems: 1 From 2fa00769b1e4bcf20cf0c967cb45f0cd1cea2b5c Mon Sep 17 00:00:00 2001 From: xiongxin Date: Fri, 7 Apr 2023 10:47:59 +0800 Subject: [PATCH 41/74] powercap: intel_rapl: Optimize rp->domains memory allocation In the memory allocation of rp->domains in rapl_detect_domains(), there is an additional memory of struct rapl_domain allocated, optimize the code here to save sizeof(struct rapl_domain) bytes of memory. Test in Intel NUC (i5-1135G7). Signed-off-by: xiongxin Tested-by: xiongxin Reviewed-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 4e646e5e48f6..5e2a1c376a64 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1485,7 +1485,7 @@ static int rapl_detect_domains(struct rapl_package *rp) } pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name); - rp->domains = kcalloc(rp->nr_domains + 1, sizeof(struct rapl_domain), + rp->domains = kcalloc(rp->nr_domains, sizeof(struct rapl_domain), GFP_KERNEL); if (!rp->domains) return -ENOMEM; From 6b6349d0d685fc3032455b43fce57f374819fb54 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 2 Aug 2023 14:37:41 +0200 Subject: [PATCH 42/74] cpufreq: blocklist MSM8998 in cpufreq-dt-platdev Add the MSM8998 to the blocklist since the CPU scaling on this platform is handled by a separate driver. Signed-off-by: AngeloGioacchino Del Regno Tested-by: Jeffrey Hugo Signed-off-by: Konrad Dybcio Reviewed-by: Caleb Connolly Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index e2b20080de3a..adb3579a1fee 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -143,6 +143,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,apq8096", }, { .compatible = "qcom,msm8996", }, + { .compatible = "qcom,msm8998", }, { .compatible = "qcom,qcs404", }, { .compatible = "qcom,sa8155p" }, { .compatible = "qcom,sa8540p" }, From e520d0b6be950ce3738cf4b9bd3b392be818f1dc Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 31 Jul 2023 21:15:48 -0600 Subject: [PATCH 43/74] cpufreq: brcmstb-avs-cpufreq: Fix -Warray-bounds bug Allocate extra space for terminating element at: drivers/cpufreq/brcmstb-avs-cpufreq.c: 449 table[i].frequency = CPUFREQ_TABLE_END; and add code comment to make this clear. This fixes the following -Warray-bounds warning seen after building ARM with multi_v7_defconfig (GCC 13): In function 'brcm_avs_get_freq_table', inlined from 'brcm_avs_cpufreq_init' at drivers/cpufreq/brcmstb-avs-cpufreq.c:623:15: drivers/cpufreq/brcmstb-avs-cpufreq.c:449:28: warning: array subscript 5 is outside array bounds of 'void[60]' [-Warray-bounds=] 449 | table[i].frequency = CPUFREQ_TABLE_END; In file included from include/linux/node.h:18, from include/linux/cpu.h:17, from include/linux/cpufreq.h:12, from drivers/cpufreq/brcmstb-avs-cpufreq.c:44: In function 'devm_kmalloc_array', inlined from 'devm_kcalloc' at include/linux/device.h:328:9, inlined from 'brcm_avs_get_freq_table' at drivers/cpufreq/brcmstb-avs-cpufreq.c:437:10, inlined from 'brcm_avs_cpufreq_init' at drivers/cpufreq/brcmstb-avs-cpufreq.c:623:15: include/linux/device.h:323:16: note: at offset 60 into object of size 60 allocated by 'devm_kmalloc' 323 | return devm_kmalloc(dev, bytes, flags); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help us make progress towards globally enabling -Warray-bounds. Link: https://github.com/KSPP/linux/issues/324 Fixes: de322e085995 ("cpufreq: brcmstb-avs-cpufreq: AVS CPUfreq driver for Broadcom STB SoCs") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Reviewed-by: Florian Fainelli Signed-off-by: Viresh Kumar --- drivers/cpufreq/brcmstb-avs-cpufreq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 1bdd513bcd19..35fb3a559ea9 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -434,7 +434,11 @@ brcm_avs_get_freq_table(struct device *dev, struct private_data *priv) if (ret) return ERR_PTR(ret); - table = devm_kcalloc(dev, AVS_PSTATE_MAX + 1, sizeof(*table), + /* + * We allocate space for the 5 different P-STATES AVS, + * plus extra space for a terminating element. + */ + table = devm_kcalloc(dev, AVS_PSTATE_MAX + 1 + 1, sizeof(*table), GFP_KERNEL); if (!table) return ERR_PTR(-ENOMEM); From 3f0b0966b30982e843950b170b7a9ddfd8094428 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 31 Jul 2023 20:56:35 +0200 Subject: [PATCH 44/74] cpuidle: teo: Update idle duration estimate when choosing shallower state The TEO governor takes CPU utilization into account by refining idle state selection when the utilization is above a certain threshold. This is done by choosing an idle state shallower than the previously selected one. However, when doing this, the idle duration estimate needs to be adjusted so as to prevent the scheduler tick from being stopped when the candidate idle state is shallow, which may lead to excessive energy usage if the CPU is not woken up quickly enough going forward. Moreover, if the scheduler tick has been stopped already and the new idle duration estimate is too small, the replacement candidate state cannot be used. Modify the relevant code to take the above observations into account. Fixes: 9ce0f7c4bc64 ("cpuidle: teo: Introduce util-awareness") Link: https://lore.kernel.org/linux-pm/CAJZ5v0jJxHj65r2HXBTd3wfbZtsg=_StzwO1kA5STDnaPe_dWA@mail.gmail.com Signed-off-by: Rafael J. Wysocki Reviewed-and-tested-by: Kajetan Puchalski --- drivers/cpuidle/governors/teo.c | 40 ++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 987fc5f3997d..2cdc711679a5 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -397,13 +397,23 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * the shallowest non-polling state and exit. */ if (drv->state_count < 3 && cpu_data->utilized) { - for (i = 0; i < drv->state_count; ++i) { - if (!dev->states_usage[i].disable && - !(drv->states[i].flags & CPUIDLE_FLAG_POLLING)) { - idx = i; - goto end; - } - } + /* The CPU is utilized, so assume a short idle duration. */ + duration_ns = teo_middle_of_bin(0, drv); + /* + * If state 0 is enabled and it is not a polling one, select it + * right away unless the scheduler tick has been stopped, in + * which case care needs to be taken to leave the CPU in a deep + * enough state in case it is not woken up any time soon after + * all. If state 1 is disabled, though, state 0 must be used + * anyway. + */ + if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) && + teo_time_ok(duration_ns)) || dev->states_usage[1].disable) + idx = 0; + else /* Assume that state 1 is not a polling one and use it. */ + idx = 1; + + goto end; } /* @@ -539,10 +549,20 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, /* * If the CPU is being utilized over the threshold, choose a shallower - * non-polling state to improve latency + * non-polling state to improve latency, unless the scheduler tick has + * been stopped already and the shallower state's target residency is + * not sufficiently large. */ - if (cpu_data->utilized) - idx = teo_find_shallower_state(drv, dev, idx, duration_ns, true); + if (cpu_data->utilized) { + s64 span_ns; + + i = teo_find_shallower_state(drv, dev, idx, duration_ns, true); + span_ns = teo_middle_of_bin(i, drv); + if (teo_time_ok(span_ns)) { + idx = i; + duration_ns = span_ns; + } + } end: /* From 04bae4e2267d49eb9cfec403acd0462b8d00d637 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 31 Jul 2023 21:03:09 +0200 Subject: [PATCH 45/74] cpuidle: teo: Avoid stopping the tick unnecessarily when bailing out When teo_select() is going to return early in some special cases, make it avoid stopping the tick if the idle state to be returned is shallow. In particular, never stop the tick if state 0 is to be returned. Link: https://lore.kernel.org/linux-pm/CAJZ5v0jJxHj65r2HXBTd3wfbZtsg=_StzwO1kA5STDnaPe_dWA@mail.gmail.com Signed-off-by: Rafael J. Wysocki Reviewed-and-tested-by: Kajetan Puchalski --- drivers/cpuidle/governors/teo.c | 56 +++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 2cdc711679a5..1b76db3689d0 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -382,12 +382,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, /* Check if there is any choice in the first place. */ if (drv->state_count < 2) { idx = 0; - goto end; + goto out_tick; } + if (!dev->states_usage[0].disable) { idx = 0; if (drv->states[1].target_residency_ns > duration_ns) - goto end; + goto out_tick; } cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data); @@ -408,11 +409,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * anyway. */ if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) && - teo_time_ok(duration_ns)) || dev->states_usage[1].disable) + teo_time_ok(duration_ns)) || dev->states_usage[1].disable) { idx = 0; - else /* Assume that state 1 is not a polling one and use it. */ - idx = 1; - + goto out_tick; + } + /* Assume that state 1 is not a polling one and use it. */ + idx = 1; goto end; } @@ -459,8 +461,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, /* Avoid unnecessary overhead. */ if (idx < 0) { idx = 0; /* No states enabled, must use 0. */ - goto end; - } else if (idx == idx0) { + goto out_tick; + } + + if (idx == idx0) { + /* + * This is the first enabled idle state, so use it, but do not + * allow the tick to be stopped it is shallow enough. + */ + duration_ns = drv->states[idx].target_residency_ns; goto end; } @@ -566,24 +575,25 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, end: /* - * Don't stop the tick if the selected state is a polling one or if the - * expected idle duration is shorter than the tick period length. + * Allow the tick to be stopped unless the selected state is a polling + * one or the expected idle duration is shorter than the tick period + * length. */ - if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || - duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) { - *stop_tick = false; + if ((!(drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && + duration_ns >= TICK_NSEC) || tick_nohz_tick_stopped()) + return idx; - /* - * The tick is not going to be stopped, so if the target - * residency of the state to be returned is not within the time - * till the closest timer including the tick, try to correct - * that. - */ - if (idx > idx0 && - drv->states[idx].target_residency_ns > delta_tick) - idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false); - } + /* + * The tick is not going to be stopped, so if the target residency of + * the state to be returned is not within the time till the closest + * timer including the tick, try to correct that. + */ + if (idx > idx0 && + drv->states[idx].target_residency_ns > delta_tick) + idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false); +out_tick: + *stop_tick = false; return idx; } From 9a41e16f11103c37263271ff2a433f2f3885fced Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 31 Jul 2023 21:04:41 +0200 Subject: [PATCH 46/74] cpuidle: teo: Drop utilized from struct teo_cpu Because the utilized field in struct teo_cpu is only used locally in teo_select(), replace it with a local variable in that function. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-and-tested-by: Kajetan Puchalski --- drivers/cpuidle/governors/teo.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 1b76db3689d0..3915e5639790 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -187,7 +187,6 @@ struct teo_bin { * @next_recent_idx: Index of the next @recent_idx entry to update. * @recent_idx: Indices of bins corresponding to recent "intercepts". * @util_threshold: Threshold above which the CPU is considered utilized - * @utilized: Whether the last sleep on the CPU happened while utilized */ struct teo_cpu { s64 time_span_ns; @@ -197,7 +196,6 @@ struct teo_cpu { int next_recent_idx; int recent_idx[NR_RECENT]; unsigned long util_threshold; - bool utilized; }; static DEFINE_PER_CPU(struct teo_cpu, teo_cpus); @@ -366,6 +364,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int idx0 = 0, idx = -1; bool alt_intercepts, alt_recent; ktime_t delta_tick; + bool cpu_utilized; s64 duration_ns; int i; @@ -391,13 +390,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, goto out_tick; } - cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data); + cpu_utilized = teo_cpu_is_utilized(dev->cpu, cpu_data); /* * If the CPU is being utilized over the threshold and there are only 2 * states to choose from, the metrics need not be considered, so choose * the shallowest non-polling state and exit. */ - if (drv->state_count < 3 && cpu_data->utilized) { + if (drv->state_count < 3 && cpu_utilized) { /* The CPU is utilized, so assume a short idle duration. */ duration_ns = teo_middle_of_bin(0, drv); /* @@ -562,7 +561,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * been stopped already and the shallower state's target residency is * not sufficiently large. */ - if (cpu_data->utilized) { + if (cpu_utilized) { s64 span_ns; i = teo_find_shallower_state(drv, dev, idx, duration_ns, true); From 0aea7a2f88a551d9dd38a2b405063cf5f97bfc29 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 9 Aug 2023 02:07:59 +0200 Subject: [PATCH 47/74] cpufreq: blocklist more Qualcomm platforms in cpufreq-dt-platdev All Qualcomm platforms utilizing the qcom-cpufreq-hw driver have no business in using cpufreq-dt. Prevent that from happening. Signed-off-by: Konrad Dybcio Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index adb3579a1fee..fb2875ce1fdd 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -144,14 +144,18 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,apq8096", }, { .compatible = "qcom,msm8996", }, { .compatible = "qcom,msm8998", }, + { .compatible = "qcom,qcm2290", }, { .compatible = "qcom,qcs404", }, + { .compatible = "qcom,qdu1000", }, { .compatible = "qcom,sa8155p" }, { .compatible = "qcom,sa8540p" }, + { .compatible = "qcom,sa8775p" }, { .compatible = "qcom,sc7180", }, { .compatible = "qcom,sc7280", }, { .compatible = "qcom,sc8180x", }, { .compatible = "qcom,sc8280xp", }, { .compatible = "qcom,sdm845", }, + { .compatible = "qcom,sdx75", }, { .compatible = "qcom,sm6115", }, { .compatible = "qcom,sm6350", }, { .compatible = "qcom,sm6375", }, @@ -159,6 +163,8 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sm8150", }, { .compatible = "qcom,sm8250", }, { .compatible = "qcom,sm8350", }, + { .compatible = "qcom,sm8450", }, + { .compatible = "qcom,sm8550", }, { .compatible = "st,stih407", }, { .compatible = "st,stih410", }, From 21d28cd2fa5fc01ee83e64df838ecd72112c09b7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 3 Aug 2023 23:07:41 +0200 Subject: [PATCH 48/74] cpuidle: teo: Do not call tick_nohz_get_sleep_length() upfront Because the cost of calling tick_nohz_get_sleep_length() may increase in the future, reorder the code in teo_select() so it first uses the statistics to pick up a candidate idle state and applies the utilization heuristic to it and only then calls tick_nohz_get_sleep_length() to obtain the sleep length value and refine the selection if necessary. This change by itself does not cause tick_nohz_get_sleep_length() to be called less often, but it prepares the code for subsequent changes that will do so. Signed-off-by: Rafael J. Wysocki Tested-by: Kajetan Puchalski Tested-by: Anna-Maria Behnsen --- drivers/cpuidle/governors/teo.c | 107 ++++++++++++++------------------ 1 file changed, 45 insertions(+), 62 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 3915e5639790..4ef76cdfc5c8 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -306,15 +306,10 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) cpu_data->total += PULSE; } -static bool teo_time_ok(u64 interval_ns) +static bool teo_state_ok(int i, struct cpuidle_driver *drv) { - return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC; -} - -static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv) -{ - return (drv->states[idx].target_residency_ns + - drv->states[idx+1].target_residency_ns) / 2; + return !tick_nohz_tick_stopped() || + drv->states[i].target_residency_ns >= TICK_NSEC; } /** @@ -354,6 +349,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, { struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); s64 latency_req = cpuidle_governor_latency_req(dev->cpu); + ktime_t delta_tick = TICK_NSEC / 2; unsigned int idx_intercept_sum = 0; unsigned int intercept_sum = 0; unsigned int idx_recent_sum = 0; @@ -363,7 +359,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int constraint_idx = 0; int idx0 = 0, idx = -1; bool alt_intercepts, alt_recent; - ktime_t delta_tick; bool cpu_utilized; s64 duration_ns; int i; @@ -374,9 +369,11 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, } cpu_data->time_span_ns = local_clock(); - - duration_ns = tick_nohz_get_sleep_length(&delta_tick); - cpu_data->sleep_length_ns = duration_ns; + /* + * Set the expected sleep length to infinity in case of an early + * return. + */ + cpu_data->sleep_length_ns = KTIME_MAX; /* Check if there is any choice in the first place. */ if (drv->state_count < 2) { @@ -384,11 +381,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, goto out_tick; } - if (!dev->states_usage[0].disable) { + if (!dev->states_usage[0].disable) idx = 0; - if (drv->states[1].target_residency_ns > duration_ns) - goto out_tick; - } cpu_utilized = teo_cpu_is_utilized(dev->cpu, cpu_data); /* @@ -397,8 +391,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * the shallowest non-polling state and exit. */ if (drv->state_count < 3 && cpu_utilized) { - /* The CPU is utilized, so assume a short idle duration. */ - duration_ns = teo_middle_of_bin(0, drv); /* * If state 0 is enabled and it is not a polling one, select it * right away unless the scheduler tick has been stopped, in @@ -408,22 +400,17 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * anyway. */ if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) && - teo_time_ok(duration_ns)) || dev->states_usage[1].disable) { + teo_state_ok(0, drv)) || dev->states_usage[1].disable) { idx = 0; goto out_tick; } /* Assume that state 1 is not a polling one and use it. */ idx = 1; + duration_ns = drv->states[1].target_residency_ns; goto end; } - /* - * Find the deepest idle state whose target residency does not exceed - * the current sleep length and the deepest idle state not deeper than - * the former whose exit latency does not exceed the current latency - * constraint. Compute the sums of metrics for early wakeup pattern - * detection. - */ + /* Compute the sums of metrics for early wakeup pattern detection. */ for (i = 1; i < drv->state_count; i++) { struct teo_bin *prev_bin = &cpu_data->state_bins[i-1]; struct cpuidle_state *s = &drv->states[i]; @@ -439,19 +426,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (dev->states_usage[i].disable) continue; - if (idx < 0) { - idx = i; /* first enabled state */ - idx0 = i; - } - - if (s->target_residency_ns > duration_ns) - break; + if (idx < 0) + idx0 = i; /* first enabled state */ idx = i; if (s->exit_latency_ns <= latency_req) constraint_idx = i; + /* Save the sums for the current state. */ idx_intercept_sum = intercept_sum; idx_hit_sum = hit_sum; idx_recent_sum = recent_sum; @@ -465,7 +448,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (idx == idx0) { /* - * This is the first enabled idle state, so use it, but do not + * Only one idle state is enabled, so use it, but do not * allow the tick to be stopped it is shallow enough. */ duration_ns = drv->states[idx].target_residency_ns; @@ -479,13 +462,11 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * all of the deeper states, or the sum of the numbers of recent * intercepts over all of the states shallower than the candidate one * is greater than a half of the number of recent events taken into - * account, the CPU is likely to wake up early, so find an alternative - * idle state to select. + * account, a shallower idle state is likely to be a better choice. */ alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum; alt_recent = idx_recent_sum > NR_RECENT / 2; if (alt_recent || alt_intercepts) { - s64 first_suitable_span_ns = duration_ns; int first_suitable_idx = idx; /* @@ -494,44 +475,39 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * cases (both with respect to intercepts overall and with * respect to the recent intercepts only) in the past. * - * Take the possible latency constraint and duration limitation - * present if the tick has been stopped already into account. + * Take the possible duration limitation present if the tick + * has been stopped already into account. */ intercept_sum = 0; recent_sum = 0; for (i = idx - 1; i >= 0; i--) { struct teo_bin *bin = &cpu_data->state_bins[i]; - s64 span_ns; intercept_sum += bin->intercepts; recent_sum += bin->recent; - span_ns = teo_middle_of_bin(i, drv); - if ((!alt_recent || 2 * recent_sum > idx_recent_sum) && (!alt_intercepts || 2 * intercept_sum > idx_intercept_sum)) { - if (teo_time_ok(span_ns) && - !dev->states_usage[i].disable) { + /* + * Use the current state unless it is too + * shallow or disabled, in which case take the + * first enabled state that is deep enough. + */ + if (teo_state_ok(i, drv) && + !dev->states_usage[i].disable) idx = i; - duration_ns = span_ns; - } else { - /* - * The current state is too shallow or - * disabled, so take the first enabled - * deeper state with suitable time span. - */ + else idx = first_suitable_idx; - duration_ns = first_suitable_span_ns; - } + break; } if (dev->states_usage[i].disable) continue; - if (!teo_time_ok(span_ns)) { + if (!teo_state_ok(i, drv)) { /* * The current state is too shallow, but if an * alternative candidate state has been found, @@ -543,7 +519,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, break; } - first_suitable_span_ns = span_ns; first_suitable_idx = i; } } @@ -562,14 +537,22 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * not sufficiently large. */ if (cpu_utilized) { - s64 span_ns; - - i = teo_find_shallower_state(drv, dev, idx, duration_ns, true); - span_ns = teo_middle_of_bin(i, drv); - if (teo_time_ok(span_ns)) { + i = teo_find_shallower_state(drv, dev, idx, KTIME_MAX, true); + if (teo_state_ok(i, drv)) + idx = i; + } + + duration_ns = tick_nohz_get_sleep_length(&delta_tick); + cpu_data->sleep_length_ns = duration_ns; + + /* + * If the closest expected timer is before the terget residency of the + * candidate state, a shallower one needs to be found. + */ + if (drv->states[idx].target_residency_ns > duration_ns) { + i = teo_find_shallower_state(drv, dev, idx, duration_ns, false); + if (teo_state_ok(i, drv)) idx = i; - duration_ns = span_ns; - } } end: From 6da8f9ba5a87f8aecf2cd4441cc6024a77e5b645 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 3 Aug 2023 23:09:18 +0200 Subject: [PATCH 49/74] cpuidle: teo: Skip tick_nohz_get_sleep_length() call in some cases Make teo_select() avoid calling tick_nohz_get_sleep_length() if the candidate idle state to return is state 0 or if state 0 is a polling one and the target residency of the current candidate one is below a certain threshold, in which cases it may be assumed that the CPU will be woken up immediately by a non-timer wakeup source and the timers are not likely to matter. Signed-off-by: Rafael J. Wysocki Tested-by: Kajetan Puchalski Tested-by: Anna-Maria Behnsen --- drivers/cpuidle/governors/teo.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 4ef76cdfc5c8..8248492cad3a 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -166,6 +166,12 @@ */ #define NR_RECENT 9 +/* + * Idle state target residency threshold used for deciding whether or not to + * check the time till the closest expected timer event. + */ +#define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC) + /** * struct teo_bin - Metrics used by the TEO cpuidle governor. * @intercepts: The "intercepts" metric. @@ -542,6 +548,22 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, idx = i; } + /* + * Skip the timers check if state 0 is the current candidate one, + * because an immediate non-timer wakeup is expected in that case. + */ + if (!idx) + goto out_tick; + + /* + * If state 0 is a polling one, check if the target residency of + * the current candidate state is low enough and skip the timers + * check in that case too. + */ + if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) && + drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) + goto out_tick; + duration_ns = tick_nohz_get_sleep_length(&delta_tick); cpu_data->sleep_length_ns = duration_ns; From 2662342079f54b8a940f7094c197c99458caeb0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 3 Aug 2023 23:11:40 +0200 Subject: [PATCH 50/74] cpuidle: teo: Gather statistics regarding whether or not to stop the tick Currently, if the target residency of the deepest idle state is less than the tick period length, which is quite likely for HZ=100, and the deepest idle state is about to be selected by the TEO idle governor, the decision on whether or not to stop the scheduler tick is based entirely on the time till the closest timer. This is often insufficient, because timers may not be in heavy use and there may be a plenty of other CPU wakeup events between the deepest idle state's target residency and the closest tick. Allow the governor to count those events by making the deepest idle state's bin effectively end at TICK_NSEC and introducing an additional "bin" for collecting "hit" events (ie. the ones in which the measured idle duration falls into the same bin as the time till the closest timer) with idle duration values past TICK_NSEC. This way the "intercepts" metric for the deepest idle state's bin becomes nonzero in general, and so it can influence the decision on whether or not to stop the tick possibly increasing the governor's accuracy in that respect. Signed-off-by: Rafael J. Wysocki Tested-by: Kajetan Puchalski Tested-by: Anna-Maria Behnsen --- drivers/cpuidle/governors/teo.c | 41 ++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 8248492cad3a..70846b669255 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -192,6 +192,7 @@ struct teo_bin { * @total: Grand total of the "intercepts" and "hits" metrics for all bins. * @next_recent_idx: Index of the next @recent_idx entry to update. * @recent_idx: Indices of bins corresponding to recent "intercepts". + * @tick_hits: Number of "hits" after TICK_NSEC. * @util_threshold: Threshold above which the CPU is considered utilized */ struct teo_cpu { @@ -201,6 +202,7 @@ struct teo_cpu { unsigned int total; int next_recent_idx; int recent_idx[NR_RECENT]; + unsigned int tick_hits; unsigned long util_threshold; }; @@ -232,6 +234,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); int i, idx_timer = 0, idx_duration = 0; + s64 target_residency_ns; u64 measured_ns; if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) { @@ -272,7 +275,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * fall into. */ for (i = 0; i < drv->state_count; i++) { - s64 target_residency_ns = drv->states[i].target_residency_ns; struct teo_bin *bin = &cpu_data->state_bins[i]; bin->hits -= bin->hits >> DECAY_SHIFT; @@ -280,6 +282,8 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) cpu_data->total += bin->hits + bin->intercepts; + target_residency_ns = drv->states[i].target_residency_ns; + if (target_residency_ns <= cpu_data->sleep_length_ns) { idx_timer = i; if (target_residency_ns <= measured_ns) @@ -294,6 +298,26 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (cpu_data->recent_idx[i] >= 0) cpu_data->state_bins[cpu_data->recent_idx[i]].recent--; + /* + * If the deepest state's target residency is below the tick length, + * make a record of it to help teo_select() decide whether or not + * to stop the tick. This effectively adds an extra hits-only bin + * beyond the last state-related one. + */ + if (target_residency_ns < TICK_NSEC) { + cpu_data->tick_hits -= cpu_data->tick_hits >> DECAY_SHIFT; + + cpu_data->total += cpu_data->tick_hits; + + if (TICK_NSEC <= cpu_data->sleep_length_ns) { + idx_timer = drv->state_count; + if (TICK_NSEC <= measured_ns) { + cpu_data->tick_hits += PULSE; + goto end; + } + } + } + /* * If the measured idle duration falls into the same bin as the sleep * length, this is a "hit", so update the "hits" metric for that bin. @@ -309,6 +333,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) cpu_data->recent_idx[i] = idx_duration; } +end: cpu_data->total += PULSE; } @@ -356,6 +381,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); s64 latency_req = cpuidle_governor_latency_req(dev->cpu); ktime_t delta_tick = TICK_NSEC / 2; + unsigned int tick_intercept_sum = 0; unsigned int idx_intercept_sum = 0; unsigned int intercept_sum = 0; unsigned int idx_recent_sum = 0; @@ -429,6 +455,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, hit_sum += prev_bin->hits; recent_sum += prev_bin->recent; + tick_intercept_sum = intercept_sum; + if (dev->states_usage[i].disable) continue; @@ -461,6 +489,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, goto end; } + tick_intercept_sum += cpu_data->state_bins[drv->state_count-1].intercepts; + /* * If the sum of the intercepts metric for all of the idle states * shallower than the current candidate one (idx) is greater than the @@ -577,6 +607,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, idx = i; } + /* + * If the selected state's target residency is below the tick length + * and intercepts occurring before the tick length are the majority of + * total wakeup events, do not stop the tick. + */ + if (drv->states[idx].target_residency_ns < TICK_NSEC && + tick_intercept_sum > cpu_data->total / 2 + cpu_data->total / 8) + duration_ns = TICK_NSEC / 2; + end: /* * Allow the tick to be stopped unless the selected state is a polling From 5693d077595de721f9ddbf9d37f40e5409707dfe Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 9 Aug 2023 13:31:08 +0200 Subject: [PATCH 51/74] PM / devfreq: Fix leak in devfreq_dev_release() srcu_init_notifier_head() allocates resources that need to be released with a srcu_cleanup_notifier_head() call. Reported by kmemleak. Fixes: 0fe3a66410a3 ("PM / devfreq: Add new DEVFREQ_TRANSITION_NOTIFIER notifier") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Dhruva Gole Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index db50c93c0ac3..474d81831ad3 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -764,6 +764,7 @@ static void devfreq_dev_release(struct device *dev) dev_pm_opp_put_opp_table(devfreq->opp_table); mutex_destroy(&devfreq->lock); + srcu_cleanup_notifier_head(&devfreq->transition_notifier_list); kfree(devfreq); } From 216710a492dde71fc7674e9962727bccb08dfaeb Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 10 Aug 2023 20:12:03 +0200 Subject: [PATCH 52/74] cpufreq: mediatek-hw: Remove unused define DYNAMIC_POWER does not seem to be used anywhere in the tree, remove it. Signed-off-by: Konrad Dybcio Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index fef68cb2b38f..a0a61919bc4c 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -313,8 +313,6 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy, return ret; } -#define DYNAMIC_POWER "dynamic-power-coefficient" - static int mtk_cpufreq_opp_notifier(struct notifier_block *nb, unsigned long event, void *data) { From d3dec5bb61ce113cc84146bf4a72f752256d3767 Mon Sep 17 00:00:00 2001 From: Meng Li Date: Thu, 3 Aug 2023 09:43:54 +0800 Subject: [PATCH 53/74] cpufreq: amd-pstate-ut: Modify the function to get the highest_perf value The previous function amd_get_highest_perf() will be deprecated. It can only return 166 or 255 by cpuinfo. For platforms that support preferred core, the value of highest perf can be between 166 and 255. Therefore, it will cause amd-pstate-ut to fail when run amd_pstate_ut_check_perf(). Signed-off-by: Meng Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/amd-pstate-ut.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index 7f3fe2048981..9c889a4a0177 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -145,8 +145,6 @@ static void amd_pstate_ut_check_perf(u32 index) struct cpufreq_policy *policy = NULL; struct amd_cpudata *cpudata = NULL; - highest_perf = amd_get_highest_perf(); - for_each_possible_cpu(cpu) { policy = cpufreq_cpu_get(cpu); if (!policy) @@ -161,6 +159,7 @@ static void amd_pstate_ut_check_perf(u32 index) return; } + highest_perf = cppc_perf.highest_perf; nominal_perf = cppc_perf.nominal_perf; lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; lowest_perf = cppc_perf.lowest_perf; @@ -172,6 +171,7 @@ static void amd_pstate_ut_check_perf(u32 index) return; } + highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1); lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1); lowest_perf = AMD_CPPC_LOWEST_PERF(cap1); From ba6ea77d0e75113b52fdae6e01473476cb48a83c Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Tue, 15 Aug 2023 09:40:02 +0800 Subject: [PATCH 54/74] cpufreq: Prefer to print cpuid in MIN/MAX QoS register error message When a cpufreq_policy is allocated, the cpus, related_cpus and real_cpus of policy are still unset. Therefore, it is preferable to print the passed 'cpu' parameter instead of a empty 'cpus' cpumask in error message when registering MIN/MAX QoS notifier fails. Signed-off-by: Liao Chang Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 50bbc969ffe5..a757f90aa9d6 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1234,16 +1234,16 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) ret = freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MIN, &policy->nb_min); if (ret) { - dev_err(dev, "Failed to register MIN QoS notifier: %d (%*pbl)\n", - ret, cpumask_pr_args(policy->cpus)); + dev_err(dev, "Failed to register MIN QoS notifier: %d (CPU%u)\n", + ret, cpu); goto err_kobj_remove; } ret = freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MAX, &policy->nb_max); if (ret) { - dev_err(dev, "Failed to register MAX QoS notifier: %d (%*pbl)\n", - ret, cpumask_pr_args(policy->cpus)); + dev_err(dev, "Failed to register MAX QoS notifier: %d (CPU%u)\n", + ret, cpu); goto err_min_qos_notifier; } From 6a4fec4f6d30a325a1b27be70729145484e6fe9f Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Tue, 15 Aug 2023 09:40:17 +0800 Subject: [PATCH 55/74] cpufreq: cppc: cppc_cpufreq_get_rate() returns zero in all error cases. The cpufreq framework used to use the zero of return value to reflect the cppc_cpufreq_get_rate() had failed to get current frequecy and treat all positive integer to be succeed. Since cppc_get_perf_ctrs() returns a negative integer in error case, so it is better to convert the value to zero as the return value of cppc_cpufreq_get_rate(). Signed-off-by: Liao Chang Signed-off-by: Viresh Kumar --- drivers/cpufreq/cppc_cpufreq.c | 4 ++-- include/linux/cpufreq.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 022e3555407c..05642759faee 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -849,13 +849,13 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0); if (ret) - return ret; + return 0; udelay(2); /* 2usec delay between sampling */ ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1); if (ret) - return ret; + return 0; delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0, &fb_ctrs_t1); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 172ff51c1b2a..99da27466b8f 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -370,7 +370,7 @@ struct cpufreq_driver { int (*target_intermediate)(struct cpufreq_policy *policy, unsigned int index); - /* should be defined, if possible */ + /* should be defined, if possible, return 0 on error */ unsigned int (*get)(unsigned int cpu); /* Called to update policy limits on firmware notifications. */ From e613d8cff54736d4fa73730ccaa0ecbe39140782 Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Thu, 17 Aug 2023 07:47:56 +0000 Subject: [PATCH 56/74] cpufreq: cppc: Set fie_disabled to FIE_DISABLED if fails to create kworker_fie The function cppc_freq_invariance_init() may failed to create kworker_fie, make it more robust by setting fie_disabled to FIE_DISBALED to prevent an invalid pointer dereference in kthread_destroy_worker(), which called from cppc_freq_invariance_exit(). Signed-off-by: Liao Chang Signed-off-by: Viresh Kumar --- drivers/cpufreq/cppc_cpufreq.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 05642759faee..fe08ca419b3d 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -249,15 +249,19 @@ static void __init cppc_freq_invariance_init(void) return; kworker_fie = kthread_create_worker(0, "cppc_fie"); - if (IS_ERR(kworker_fie)) + if (IS_ERR(kworker_fie)) { + pr_warn("%s: failed to create kworker_fie: %ld\n", __func__, + PTR_ERR(kworker_fie)); + fie_disabled = FIE_DISABLED; return; + } ret = sched_setattr_nocheck(kworker_fie->task, &attr); if (ret) { pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__, ret); kthread_destroy_worker(kworker_fie); - return; + fie_disabled = FIE_DISABLED; } } @@ -267,7 +271,6 @@ static void cppc_freq_invariance_exit(void) return; kthread_destroy_worker(kworker_fie); - kworker_fie = NULL; } #else From 5484e31bbbff285f9505c4766373f840ffb746e5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 10 Aug 2023 20:36:36 +0200 Subject: [PATCH 57/74] cpuidle: menu: Skip tick_nohz_get_sleep_length() call in some cases Because the cost of calling tick_nohz_get_sleep_length() may increase in the future, reorder the code in menu_select() so it first uses the statistics to determine the expected idle duration. If that value is higher than RESIDENCY_THRESHOLD_NS, tick_nohz_get_sleep_length() will be called to obtain the time till the closest timer and refine the idle duration prediction if necessary. This causes the governor to always take the full overhead of get_typical_interval() with the assumption that the cost will be amortized by skipping the tick_nohz_get_sleep_length() call in the cases when the predicted idle duration is relatively very small. Signed-off-by: Rafael J. Wysocki Tested-by: Doug Smythies --- drivers/cpuidle/governors/gov.h | 14 +++++++ drivers/cpuidle/governors/menu.c | 65 +++++++++++++++++++------------- drivers/cpuidle/governors/teo.c | 9 +---- 3 files changed, 54 insertions(+), 34 deletions(-) create mode 100644 drivers/cpuidle/governors/gov.h diff --git a/drivers/cpuidle/governors/gov.h b/drivers/cpuidle/governors/gov.h new file mode 100644 index 000000000000..99e067d9668c --- /dev/null +++ b/drivers/cpuidle/governors/gov.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Common definitions for cpuidle governors. */ + +#ifndef __CPUIDLE_GOVERNOR_H +#define __CPUIDLE_GOVERNOR_H + +/* + * Idle state target residency threshold used for deciding whether or not to + * check the time till the closest expected timer event. + */ +#define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC) + +#endif /* __CPUIDLE_GOVERNOR_H */ diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index c4922684f305..b96e3da0fedd 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -19,6 +19,8 @@ #include #include +#include "gov.h" + #define BUCKETS 12 #define INTERVAL_SHIFT 3 #define INTERVALS (1UL << INTERVAL_SHIFT) @@ -166,8 +168,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); * of points is below a threshold. If it is... then use the * average of these 8 points as the estimated value. */ -static unsigned int get_typical_interval(struct menu_device *data, - unsigned int predicted_us) +static unsigned int get_typical_interval(struct menu_device *data) { int i, divisor; unsigned int min, max, thresh, avg; @@ -195,11 +196,7 @@ static unsigned int get_typical_interval(struct menu_device *data, } } - /* - * If the result of the computation is going to be discarded anyway, - * avoid the computation altogether. - */ - if (min >= predicted_us) + if (!max) return UINT_MAX; if (divisor == INTERVALS) @@ -267,7 +264,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, { struct menu_device *data = this_cpu_ptr(&menu_devices); s64 latency_req = cpuidle_governor_latency_req(dev->cpu); - unsigned int predicted_us; u64 predicted_ns; u64 interactivity_req; unsigned int nr_iowaiters; @@ -279,16 +275,41 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, data->needs_update = 0; } - /* determine the expected residency time, round up */ - delta = tick_nohz_get_sleep_length(&delta_tick); - if (unlikely(delta < 0)) { - delta = 0; - delta_tick = 0; - } - data->next_timer_ns = delta; - nr_iowaiters = nr_iowait_cpu(dev->cpu); - data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters); + + /* Find the shortest expected idle interval. */ + predicted_ns = get_typical_interval(data) * NSEC_PER_USEC; + if (predicted_ns > RESIDENCY_THRESHOLD_NS) { + unsigned int timer_us; + + /* Determine the time till the closest timer. */ + delta = tick_nohz_get_sleep_length(&delta_tick); + if (unlikely(delta < 0)) { + delta = 0; + delta_tick = 0; + } + + data->next_timer_ns = delta; + data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters); + + /* Round up the result for half microseconds. */ + timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 + + data->next_timer_ns * + data->correction_factor[data->bucket], + RESOLUTION * DECAY * NSEC_PER_USEC); + /* Use the lowest expected idle interval to pick the idle state. */ + predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns); + } else { + /* + * Because the next timer event is not going to be determined + * in this case, assume that without the tick the closest timer + * will be in distant future and that the closest tick will occur + * after 1/2 of the tick period. + */ + data->next_timer_ns = KTIME_MAX; + delta_tick = TICK_NSEC / 2; + data->bucket = which_bucket(KTIME_MAX, nr_iowaiters); + } if (unlikely(drv->state_count <= 1 || latency_req == 0) || ((data->next_timer_ns < drv->states[1].target_residency_ns || @@ -303,16 +324,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, return 0; } - /* Round up the result for half microseconds. */ - predicted_us = div_u64(data->next_timer_ns * - data->correction_factor[data->bucket] + - (RESOLUTION * DECAY * NSEC_PER_USEC) / 2, - RESOLUTION * DECAY * NSEC_PER_USEC); - /* Use the lowest expected idle interval to pick the idle state. */ - predicted_ns = (u64)min(predicted_us, - get_typical_interval(data, predicted_us)) * - NSEC_PER_USEC; - if (tick_nohz_tick_stopped()) { /* * If the tick is already stopped, the cost of possible short diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 70846b669255..31050fdc633f 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -140,6 +140,8 @@ #include #include +#include "gov.h" + /* * The number of bits to shift the CPU's capacity by in order to determine * the utilized threshold. @@ -152,7 +154,6 @@ */ #define UTIL_THRESHOLD_SHIFT 6 - /* * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value * is used for decreasing metrics on a regular basis. @@ -166,12 +167,6 @@ */ #define NR_RECENT 9 -/* - * Idle state target residency threshold used for deciding whether or not to - * check the time till the closest expected timer event. - */ -#define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC) - /** * struct teo_bin - Metrics used by the TEO cpuidle governor. * @intercepts: The "intercepts" metric. From a5a297918abba9e35b4b1683b14542d6b7f31ade Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 18 Aug 2023 10:55:49 +0530 Subject: [PATCH 58/74] OPP: Fix argument name in doc comment The name of the argument is "opp_table" and not "table", fix the comment. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308172310.FzcidE4c-lkp@intel.com/ Signed-off-by: Viresh Kumar --- drivers/opp/cpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/opp/cpu.c b/drivers/opp/cpu.c index 3c3506021501..12c429b407ca 100644 --- a/drivers/opp/cpu.c +++ b/drivers/opp/cpu.c @@ -24,7 +24,7 @@ /** * dev_pm_opp_init_cpufreq_table() - create a cpufreq table for a device * @dev: device for which we do this operation - * @table: Cpufreq table returned back to caller + * @opp_table: Cpufreq table returned back to caller * * Generate a cpufreq table for a provided device- this assumes that the * opp table is already initialized and ready for usage. @@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_init_cpufreq_table); /** * dev_pm_opp_free_cpufreq_table() - free the cpufreq table * @dev: device for which we do this operation - * @table: table to free + * @opp_table: table to free * * Free up the table allocated by dev_pm_opp_init_cpufreq_table */ From bbc2bf13886846cfaf8b13aac6e43afdbafa189c Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 18 Aug 2023 07:45:03 -0500 Subject: [PATCH 59/74] dt-bindings: opp: Convert ti-omap5-opp-supply to json schema Rename ti-omap5-opp-supply to be bit more generic omap-opp-supply and convert the free text binding to json-schema. Reviewed-by: Krzysztof Kozlowski Reviewed-by: Dhruva Gole Signed-off-by: Nishanth Menon Signed-off-by: Viresh Kumar --- .../bindings/opp/ti,omap-opp-supply.yaml | 101 ++++++++++++++++++ .../bindings/opp/ti-omap5-opp-supply.txt | 63 ----------- 2 files changed, 101 insertions(+), 63 deletions(-) create mode 100644 Documentation/devicetree/bindings/opp/ti,omap-opp-supply.yaml delete mode 100644 Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt diff --git a/Documentation/devicetree/bindings/opp/ti,omap-opp-supply.yaml b/Documentation/devicetree/bindings/opp/ti,omap-opp-supply.yaml new file mode 100644 index 000000000000..693f22539606 --- /dev/null +++ b/Documentation/devicetree/bindings/opp/ti,omap-opp-supply.yaml @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/opp/ti,omap-opp-supply.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Texas Instruments OMAP compatible OPP supply + +description: + OMAP5, DRA7, and AM57 families of SoCs have Class 0 AVS eFuse + registers, which contain OPP-specific voltage information tailored + for the specific device. This binding provides the information + needed to describe such a hardware values and relate them to program + the primary regulator during an OPP transition. + + Also, some supplies may have an associated vbb-supply, an Adaptive + Body Bias regulator, which must transition in a specific sequence + w.r.t the vdd-supply and clk when making an OPP transition. By + supplying two regulators to the device that will undergo OPP + transitions, we can use the multi-regulator support implemented by + the OPP core to describe both regulators the platform needs. The + OPP core binding Documentation/devicetree/bindings/opp/opp-v2.yaml + provides further information (refer to Example 4 Handling multiple + regulators). + +maintainers: + - Nishanth Menon + +properties: + $nodename: + pattern: '^opp-supply(@[0-9a-f]+)?$' + + compatible: + oneOf: + - description: Basic OPP supply controlling VDD and VBB + const: ti,omap-opp-supply + - description: OMAP5+ optimized voltages in efuse(Class 0) VDD along with + VBB. + const: ti,omap5-opp-supply + - description: OMAP5+ optimized voltages in efuse(class0) VDD but no VBB + const: ti,omap5-core-opp-supply + + reg: + maxItems: 1 + + ti,absolute-max-voltage-uv: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Absolute maximum voltage for the OPP supply in micro-volts. + minimum: 750000 + maximum: 1500000 + + ti,efuse-settings: + description: An array of u32 tuple items providing information about + optimized efuse configuration. + minItems: 1 + $ref: /schemas/types.yaml#/definitions/uint32-matrix + items: + items: + - description: Reference voltage in micro-volts (OPP Voltage) + minimum: 750000 + maximum: 1500000 + multipleOf: 10000 + - description: efuse offset where the optimized voltage is located + multipleOf: 4 + maximum: 256 + +required: + - compatible + - ti,absolute-max-voltage-uv + +allOf: + - if: + not: + properties: + compatible: + contains: + const: ti,omap-opp-supply + then: + required: + - reg + - ti,efuse-settings + +additionalProperties: false + +examples: + - | + opp-supply { + compatible = "ti,omap-opp-supply"; + ti,absolute-max-voltage-uv = <1375000>; + }; + - | + opp-supply@4a003b20 { + compatible = "ti,omap5-opp-supply"; + reg = <0x4a003b20 0x8>; + ti,efuse-settings = + /* uV offset */ + <1060000 0x0>, + <1160000 0x4>, + <1210000 0x8>; + ti,absolute-max-voltage-uv = <1500000>; + }; diff --git a/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt b/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt deleted file mode 100644 index b70d326117cd..000000000000 --- a/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt +++ /dev/null @@ -1,63 +0,0 @@ -Texas Instruments OMAP compatible OPP supply description - -OMAP5, DRA7, and AM57 family of SoCs have Class0 AVS eFuse registers which -contain data that can be used to adjust voltages programmed for some of their -supplies for more efficient operation. This binding provides the information -needed to read these values and use them to program the main regulator during -an OPP transitions. - -Also, some supplies may have an associated vbb-supply which is an Adaptive Body -Bias regulator which much be transitioned in a specific sequence with regards -to the vdd-supply and clk when making an OPP transition. By supplying two -regulators to the device that will undergo OPP transitions we can make use -of the multi regulator binding that is part of the OPP core described here [1] -to describe both regulators needed by the platform. - -[1] Documentation/devicetree/bindings/opp/opp-v2.yaml - -Required Properties for Device Node: -- vdd-supply: phandle to regulator controlling VDD supply -- vbb-supply: phandle to regulator controlling Body Bias supply - (Usually Adaptive Body Bias regulator) - -Required Properties for opp-supply node: -- compatible: Should be one of: - "ti,omap-opp-supply" - basic OPP supply controlling VDD and VBB - "ti,omap5-opp-supply" - OMAP5+ optimized voltages in efuse(class0)VDD - along with VBB - "ti,omap5-core-opp-supply" - OMAP5+ optimized voltages in efuse(class0) VDD - but no VBB. -- reg: Address and length of the efuse register set for the device (mandatory - only for "ti,omap5-opp-supply") -- ti,efuse-settings: An array of u32 tuple items providing information about - optimized efuse configuration. Each item consists of the following: - volt: voltage in uV - reference voltage (OPP voltage) - efuse_offseet: efuse offset from reg where the optimized voltage is stored. -- ti,absolute-max-voltage-uv: absolute maximum voltage for the OPP supply. - -Example: - -/* Device Node (CPU) */ -cpus { - cpu0: cpu@0 { - device_type = "cpu"; - - ... - - vdd-supply = <&vcc>; - vbb-supply = <&abb_mpu>; - }; -}; - -/* OMAP OPP Supply with Class0 registers */ -opp_supply_mpu: opp_supply@4a003b20 { - compatible = "ti,omap5-opp-supply"; - reg = <0x4a003b20 0x8>; - ti,efuse-settings = < - /* uV offset */ - 1060000 0x0 - 1160000 0x4 - 1210000 0x8 - >; - ti,absolute-max-voltage-uv = <1500000>; -}; From e576a9a8603f7c6f8fed5159e2fe33f6d67a49e7 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 18 Aug 2023 07:45:04 -0500 Subject: [PATCH 60/74] dt-bindings: cpufreq: Convert ti-cpufreq to json schema Move the ti-cpufreq binding over to opp and convert the free text binding to json-schema. Reviewed-by: Dhruva Gole Signed-off-by: Nishanth Menon Reviewed-by: Krzysztof Kozlowski Signed-off-by: Viresh Kumar --- .../bindings/cpufreq/ti-cpufreq.txt | 132 ------------------ .../opp/operating-points-v2-ti-cpu.yaml | 92 ++++++++++++ 2 files changed, 92 insertions(+), 132 deletions(-) delete mode 100644 Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt create mode 100644 Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml diff --git a/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt b/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt deleted file mode 100644 index 1758051798fe..000000000000 --- a/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt +++ /dev/null @@ -1,132 +0,0 @@ -TI CPUFreq and OPP bindings -================================ - -Certain TI SoCs, like those in the am335x, am437x, am57xx, and dra7xx -families support different OPPs depending on the silicon variant in use. -The ti-cpufreq driver can use revision and an efuse value from the SoC to -provide the OPP framework with supported hardware information. This is -used to determine which OPPs from the operating-points-v2 table get enabled -when it is parsed by the OPP framework. - -Required properties: --------------------- -In 'cpus' nodes: -- operating-points-v2: Phandle to the operating-points-v2 table to use. - -In 'operating-points-v2' table: -- compatible: Should be - - 'operating-points-v2-ti-cpu' for am335x, am43xx, and dra7xx/am57xx, - omap34xx, omap36xx and am3517 SoCs -- syscon: A phandle pointing to a syscon node representing the control module - register space of the SoC. - -Optional properties: --------------------- -- "vdd-supply", "vbb-supply": to define two regulators for dra7xx -- "cpu0-supply", "vbb-supply": to define two regulators for omap36xx - -For each opp entry in 'operating-points-v2' table: -- opp-supported-hw: Two bitfields indicating: - 1. Which revision of the SoC the OPP is supported by - 2. Which eFuse bits indicate this OPP is available - - A bitwise AND is performed against these values and if any bit - matches, the OPP gets enabled. - -Example: --------- - -/* From arch/arm/boot/dts/am33xx.dtsi */ -cpus { - #address-cells = <1>; - #size-cells = <0>; - cpu@0 { - compatible = "arm,cortex-a8"; - device_type = "cpu"; - reg = <0>; - - operating-points-v2 = <&cpu0_opp_table>; - - clocks = <&dpll_mpu_ck>; - clock-names = "cpu"; - - clock-latency = <300000>; /* From omap-cpufreq driver */ - }; -}; - -/* - * cpu0 has different OPPs depending on SoC revision and some on revisions - * 0x2 and 0x4 have eFuse bits that indicate if they are available or not - */ -cpu0_opp_table: opp-table { - compatible = "operating-points-v2-ti-cpu"; - syscon = <&scm_conf>; - - /* - * The three following nodes are marked with opp-suspend - * because they can not be enabled simultaneously on a - * single SoC. - */ - opp50-300000000 { - opp-hz = /bits/ 64 <300000000>; - opp-microvolt = <950000 931000 969000>; - opp-supported-hw = <0x06 0x0010>; - opp-suspend; - }; - - opp100-275000000 { - opp-hz = /bits/ 64 <275000000>; - opp-microvolt = <1100000 1078000 1122000>; - opp-supported-hw = <0x01 0x00FF>; - opp-suspend; - }; - - opp100-300000000 { - opp-hz = /bits/ 64 <300000000>; - opp-microvolt = <1100000 1078000 1122000>; - opp-supported-hw = <0x06 0x0020>; - opp-suspend; - }; - - opp100-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <1100000 1078000 1122000>; - opp-supported-hw = <0x01 0xFFFF>; - }; - - opp100-600000000 { - opp-hz = /bits/ 64 <600000000>; - opp-microvolt = <1100000 1078000 1122000>; - opp-supported-hw = <0x06 0x0040>; - }; - - opp120-600000000 { - opp-hz = /bits/ 64 <600000000>; - opp-microvolt = <1200000 1176000 1224000>; - opp-supported-hw = <0x01 0xFFFF>; - }; - - opp120-720000000 { - opp-hz = /bits/ 64 <720000000>; - opp-microvolt = <1200000 1176000 1224000>; - opp-supported-hw = <0x06 0x0080>; - }; - - oppturbo-720000000 { - opp-hz = /bits/ 64 <720000000>; - opp-microvolt = <1260000 1234800 1285200>; - opp-supported-hw = <0x01 0xFFFF>; - }; - - oppturbo-800000000 { - opp-hz = /bits/ 64 <800000000>; - opp-microvolt = <1260000 1234800 1285200>; - opp-supported-hw = <0x06 0x0100>; - }; - - oppnitro-1000000000 { - opp-hz = /bits/ 64 <1000000000>; - opp-microvolt = <1325000 1298500 1351500>; - opp-supported-hw = <0x04 0x0200>; - }; -}; diff --git a/Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml b/Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml new file mode 100644 index 000000000000..02d1d2c17129 --- /dev/null +++ b/Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/opp/operating-points-v2-ti-cpu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: TI CPU OPP (Operating Performance Points) + +description: + TI SoCs, like those in the AM335x, AM437x, AM57xx, AM62x, and DRA7xx + families, the CPU frequencies subset and the voltage value of each + OPP vary based on the silicon variant used. The data sheet sections + corresponding to "Operating Performance Points" describe the frequency + and voltage values based on device type and speed bin information + blown in corresponding eFuse bits as referred to by the Technical + Reference Manual. + + This document extends the operating-points-v2 binding by providing + the hardware description for the scheme mentioned above. + +maintainers: + - Nishanth Menon + +allOf: + - $ref: opp-v2-base.yaml# + +properties: + compatible: + const: operating-points-v2-ti-cpu + + syscon: + $ref: /schemas/types.yaml#/definitions/phandle + description: | + points to syscon node representing the control module + register space of the SoC. + + opp-shared: true + +patternProperties: + '^opp(-?[0-9]+)*$': + type: object + additionalProperties: false + + properties: + clock-latency-ns: true + opp-hz: true + opp-microvolt: true + opp-supported-hw: true + opp-suspend: true + turbo-mode: true + + required: + - opp-hz + - opp-supported-hw + +required: + - compatible + - syscon + +additionalProperties: false + +examples: + - | + opp-table { + compatible = "operating-points-v2-ti-cpu"; + syscon = <&scm_conf>; + + opp-300000000 { + opp-hz = /bits/ 64 <300000000>; + opp-microvolt = <1100000 1078000 1122000>; + opp-supported-hw = <0x06 0x0020>; + opp-suspend; + }; + + opp-500000000 { + opp-hz = /bits/ 64 <500000000>; + opp-microvolt = <1100000 1078000 1122000>; + opp-supported-hw = <0x01 0xFFFF>; + }; + + opp-600000000 { + opp-hz = /bits/ 64 <600000000>; + opp-microvolt = <1100000 1078000 1122000>; + opp-supported-hw = <0x06 0x0040>; + }; + + opp-1000000000 { + opp-hz = /bits/ 64 <1000000000>; + opp-microvolt = <1325000 1298500 1351500>; + opp-supported-hw = <0x04 0x0200>; + }; + }; From ed9571647eff3dd7a53068f83338530959c7ace4 Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Fri, 18 Aug 2023 09:50:00 +0000 Subject: [PATCH 61/74] cpufreq: stats: Improve the performance of cpufreq_stats_create_table() In the worst case, the freq_table of policy data is not sorted and contains duplicate frequencies, this means that it needs to iterate through the entire freq_table of policy to ensure each frequency is unique in the freq_table of stats data, this has a time complexity of O(N^2), where N is the number of frequencies in the freq_table of policy. However, if the policy.freq_table is already sorted and contains no duplicate frequencies, it can reduce the time complexity of creating stats.freq_table to O(N), the 'freq_table_sorted' field of policy data can be used to indicate whether the policy.freq_table is sorted. Signed-off-by: Liao Chang Acked-by: Viresh Kumar Reviewed-by: Dhruva Gole [ rjw: Fix typo in changelog, remove redundant parens ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 55c7ffd37d1c..a33df3c66c88 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -243,7 +243,8 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy) /* Find valid-unique entries */ cpufreq_for_each_valid_entry(pos, policy->freq_table) - if (freq_table_get_index(stats, pos->frequency) == -1) + if (policy->freq_table_sorted != CPUFREQ_TABLE_UNSORTED || + freq_table_get_index(stats, pos->frequency) == -1) stats->freq_table[i++] = pos->frequency; stats->state_num = i; From d51847acb018d83186e4af67bc93f9a00a8644f7 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Sun, 20 Aug 2023 13:46:49 -0700 Subject: [PATCH 62/74] cpufreq: intel_pstate: set stale CPU frequency to minimum The intel_pstate CPU frequency scaling driver does not use policy->cur and it is 0. When the CPU frequency is outdated arch_freq_get_on_cpu() will default to the nominal clock frequency when its call to cpufreq_quick_getpolicy_cur returns the never updated 0. Thus, the listed frequency might be outside of currently set limits. Some users are complaining about the high reported frequency, albeit stale, when their system is idle and/or it is above the reduced maximum they have set. This patch will maintain policy_cur for the intel_pstate driver at the current minimum CPU frequency. Reported-by: Yang Jie Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217597 Signed-off-by: Doug Smythies [ rjw: White space damage fixes and comment adjustment ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 8ca2bce4341a..dc50c9fb488d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2609,6 +2609,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) intel_pstate_clear_update_util_hook(policy->cpu); intel_pstate_hwp_set(policy->cpu); } + /* + * policy->cur is never updated with the intel_pstate driver, but it + * is used as a stale frequency value. So, keep it within limits. + */ + policy->cur = policy->min; mutex_unlock(&intel_pstate_limits_lock); From e26a99dd1522caefcde023aabc0f79d7e6a016bf Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 21 Jul 2023 20:12:16 +0800 Subject: [PATCH 63/74] PM: runtime: Remove unsued extern declaration of pm_runtime_update_max_time_suspended() Commit 76e267d822f2 ("PM / Runtime: Remove device fields related to suspend time, v2") left behind this. Signed-off-by: YueHaibing Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 9a8151a2bdea..7c9b35448563 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -85,8 +85,6 @@ extern void pm_runtime_irq_safe(struct device *dev); extern void __pm_runtime_use_autosuspend(struct device *dev, bool use); extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); extern u64 pm_runtime_autosuspend_expiration(struct device *dev); -extern void pm_runtime_update_max_time_suspended(struct device *dev, - s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); From 5f55836ab41671e005038255fdd60482718d2ca2 Mon Sep 17 00:00:00 2001 From: Clive Lin Date: Mon, 31 Jul 2023 14:03:54 +0800 Subject: [PATCH 64/74] PM: QoS: Add check to make sure CPU latency is non-negative CPU latency should never be negative, which will be incorrectly high when converted to unsigned data type. Commit 8d36694245f2 ("PM: QoS: Add check to make sure CPU freq is non-negative") makes sure CPU frequency is non-negative to fix incorrect behavior in freqency QoS. Add an analogous check to make sure CPU latency is non-negative so as to prevent this problem from happening in CPU latency QoS. Signed-off-by: Clive Lin [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- kernel/power/qos.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 782d3b41c1f3..4244b069442e 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -220,6 +220,11 @@ static struct pm_qos_constraints cpu_latency_constraints = { .type = PM_QOS_MIN, }; +static inline bool cpu_latency_qos_value_invalid(s32 value) +{ + return value < 0 && value != PM_QOS_DEFAULT_VALUE; +} + /** * cpu_latency_qos_limit - Return current system-wide CPU latency QoS limit. */ @@ -263,7 +268,7 @@ static void cpu_latency_qos_apply(struct pm_qos_request *req, */ void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value) { - if (!req) + if (!req || cpu_latency_qos_value_invalid(value)) return; if (cpu_latency_qos_request_active(req)) { @@ -289,7 +294,7 @@ EXPORT_SYMBOL_GPL(cpu_latency_qos_add_request); */ void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value) { - if (!req) + if (!req || cpu_latency_qos_value_invalid(new_value)) return; if (!cpu_latency_qos_request_active(req)) { From 10bb4e4ab7dd3898f413b5b4a3b81f6e9b1f6bf5 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 10 Aug 2023 18:21:19 +0200 Subject: [PATCH 65/74] PM: sleep: Add helpers to allow a device to remain powered-on On some platforms a device and its corresponding PM domain, may need to remain powered-on during system wide suspend, to support various use cases. For example, when the console_suspend_enabled flag is unset for a serial controller, the corresponding device may need to remain powered on. Other use cases exists too. In fact, we already have the mechanism in the PM core to deal with these kind of use cases. However, the current naming of the corresponding functions/flags clearly suggests these should be use for system wakeup. See device_wakeup_path(), device_set_wakeup_path and dev->power.wakeup_path. As a way to extend the use of the existing mechanism, let's introduce two new helpers functions, device_awake_path() and device_set_awake_path(). At this point, let them act as wrappers of the existing functions. Ideally, when all users have been converted to use the new helpers, we may decide to drop the old ones and rename the flag. Signed-off-by: Ulf Hansson Reviewed-by: Peng Fan Signed-off-by: Rafael J. Wysocki --- include/linux/pm_wakeup.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 77f4849e3418..6eb9adaef52b 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -194,6 +194,16 @@ static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec, #endif /* !CONFIG_PM_SLEEP */ +static inline bool device_awake_path(struct device *dev) +{ + return device_wakeup_path(dev); +} + +static inline void device_set_awake_path(struct device *dev) +{ + device_set_wakeup_path(dev); +} + static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) { return pm_wakeup_ws_event(ws, msec, false); From a436ae9434ec491eefb4ed4c77bdb9c762925976 Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Wed, 16 Aug 2023 01:58:53 +0000 Subject: [PATCH 66/74] cpufreq: Use clamp() helper macro to improve the code readability The valid values of policy.{min, max} should be between 'min' and 'max', so use clamp() helper macro to makes cpufreq_verify_within_limits() easier to follow. Signed-off-by: Liao Chang [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 172ff51c1b2a..9bf94ae08158 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -19,6 +19,7 @@ #include #include #include +#include /********************************************************************* * CPUFREQ INTERFACE * @@ -467,17 +468,8 @@ static inline void cpufreq_verify_within_limits(struct cpufreq_policy_data *poli unsigned int min, unsigned int max) { - if (policy->min < min) - policy->min = min; - if (policy->max < min) - policy->max = min; - if (policy->min > max) - policy->min = max; - if (policy->max > max) - policy->max = max; - if (policy->min > policy->max) - policy->min = policy->max; - return; + policy->max = clamp(policy->max, min, max); + policy->min = clamp(policy->min, min, policy->max); } static inline void From 8d6e5e8268e89979d86501dbb8385ce2e6154de1 Mon Sep 17 00:00:00 2001 From: Swapnil Sapkal Date: Fri, 18 Aug 2023 11:44:51 +0000 Subject: [PATCH 67/74] cpufreq: amd-pstate-ut: Remove module parameter access In amd-pstate-ut, shared memory-based systems call get_shared_mem() as part of amd_pstate_ut_check_enabled() function. This function was written when CONFIG_X86_AMD_PSTATE was tristate config and amd_pstate can be built as a module. Currently CONFIG_X86_AMD_PSTATE is a boolean config and module parameter shared_mem is removed. But amd-pstate-ut code still accesses this module parameter. Remove those accesses. Fixes: 456ca88d8a52 ("cpufreq: amd-pstate: change amd-pstate driver to be built-in type") Reviewed-by: Mario Limonciello Reviewed-by: Meng Li Reviewed-by: Wyes Karny Suggested-by: Wyes Karny Signed-off-by: Swapnil Sapkal [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate-ut.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index 7f3fe2048981..cf07ee77d3cc 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -64,27 +64,9 @@ static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = { static bool get_shared_mem(void) { bool result = false; - char path[] = "/sys/module/amd_pstate/parameters/shared_mem"; - char buf[5] = {0}; - struct file *filp = NULL; - loff_t pos = 0; - ssize_t ret; - if (!boot_cpu_has(X86_FEATURE_CPPC)) { - filp = filp_open(path, O_RDONLY, 0); - if (IS_ERR(filp)) - pr_err("%s unable to open %s file!\n", __func__, path); - else { - ret = kernel_read(filp, &buf, sizeof(buf), &pos); - if (ret < 0) - pr_err("%s read %s file fail ret=%ld!\n", - __func__, path, (long)ret); - filp_close(filp, NULL); - } - - if ('Y' == *buf) - result = true; - } + if (!boot_cpu_has(X86_FEATURE_CPPC)) + result = true; return result; } From 60dd283804479c4a52f995b713f448e2cd65b8c8 Mon Sep 17 00:00:00 2001 From: Swapnil Sapkal Date: Fri, 18 Aug 2023 11:44:52 +0000 Subject: [PATCH 68/74] cpufreq: amd-pstate-ut: Fix kernel panic when loading the driver After loading the amd-pstate-ut driver, amd_pstate_ut_check_perf() and amd_pstate_ut_check_freq() use cpufreq_cpu_get() to get the policy of the CPU and mark it as busy. In these functions, cpufreq_cpu_put() should be used to release the policy, but it is not, so any other entity trying to access the policy is blocked indefinitely. One such scenario is when amd_pstate mode is changed, leading to the following splat: [ 1332.103727] INFO: task bash:2929 blocked for more than 120 seconds. [ 1332.110001] Not tainted 6.5.0-rc2-amd-pstate-ut #5 [ 1332.115315] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1332.123140] task:bash state:D stack:0 pid:2929 ppid:2873 flags:0x00004006 [ 1332.123143] Call Trace: [ 1332.123145] [ 1332.123148] __schedule+0x3c1/0x16a0 [ 1332.123154] ? _raw_read_lock_irqsave+0x2d/0x70 [ 1332.123157] schedule+0x6f/0x110 [ 1332.123160] schedule_timeout+0x14f/0x160 [ 1332.123162] ? preempt_count_add+0x86/0xd0 [ 1332.123165] __wait_for_common+0x92/0x190 [ 1332.123168] ? __pfx_schedule_timeout+0x10/0x10 [ 1332.123170] wait_for_completion+0x28/0x30 [ 1332.123173] cpufreq_policy_put_kobj+0x4d/0x90 [ 1332.123177] cpufreq_policy_free+0x157/0x1d0 [ 1332.123178] ? preempt_count_add+0x58/0xd0 [ 1332.123180] cpufreq_remove_dev+0xb6/0x100 [ 1332.123182] subsys_interface_unregister+0x114/0x120 [ 1332.123185] ? preempt_count_add+0x58/0xd0 [ 1332.123187] ? __pfx_amd_pstate_change_driver_mode+0x10/0x10 [ 1332.123190] cpufreq_unregister_driver+0x3b/0xd0 [ 1332.123192] amd_pstate_change_driver_mode+0x1e/0x50 [ 1332.123194] store_status+0xe9/0x180 [ 1332.123197] dev_attr_store+0x1b/0x30 [ 1332.123199] sysfs_kf_write+0x42/0x50 [ 1332.123202] kernfs_fop_write_iter+0x143/0x1d0 [ 1332.123204] vfs_write+0x2df/0x400 [ 1332.123208] ksys_write+0x6b/0xf0 [ 1332.123210] __x64_sys_write+0x1d/0x30 [ 1332.123213] do_syscall_64+0x60/0x90 [ 1332.123216] ? fpregs_assert_state_consistent+0x2e/0x50 [ 1332.123219] ? exit_to_user_mode_prepare+0x49/0x1a0 [ 1332.123223] ? irqentry_exit_to_user_mode+0xd/0x20 [ 1332.123225] ? irqentry_exit+0x3f/0x50 [ 1332.123226] ? exc_page_fault+0x8e/0x190 [ 1332.123228] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 1332.123232] RIP: 0033:0x7fa74c514a37 [ 1332.123234] RSP: 002b:00007ffe31dd0788 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1332.123238] RAX: ffffffffffffffda RBX: 0000000000000008 RCX: 00007fa74c514a37 [ 1332.123239] RDX: 0000000000000008 RSI: 000055e27c447aa0 RDI: 0000000000000001 [ 1332.123241] RBP: 000055e27c447aa0 R08: 00007fa74c5d1460 R09: 000000007fffffff [ 1332.123242] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000008 [ 1332.123244] R13: 00007fa74c61a780 R14: 00007fa74c616600 R15: 00007fa74c615a00 [ 1332.123247] Fix this by calling cpufreq_cpu_put() wherever necessary. Fixes: 14eb1c96e3a3 ("cpufreq: amd-pstate: Add test module for amd-pstate driver") Reviewed-by: Mario Limonciello Reviewed-by: Meng Li Reviewed-by: Wyes Karny Suggested-by: Wyes Karny Signed-off-by: Swapnil Sapkal [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate-ut.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index cf07ee77d3cc..502d494499ae 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -140,7 +140,7 @@ static void amd_pstate_ut_check_perf(u32 index) if (ret) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret); - return; + goto skip_test; } nominal_perf = cppc_perf.nominal_perf; @@ -151,7 +151,7 @@ static void amd_pstate_ut_check_perf(u32 index) if (ret) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret); - return; + goto skip_test; } nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1); @@ -169,7 +169,7 @@ static void amd_pstate_ut_check_perf(u32 index) nominal_perf, cpudata->nominal_perf, lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf, lowest_perf, cpudata->lowest_perf); - return; + goto skip_test; } if (!((highest_perf >= nominal_perf) && @@ -180,11 +180,15 @@ static void amd_pstate_ut_check_perf(u32 index) pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n", __func__, cpu, highest_perf, nominal_perf, lowest_nonlinear_perf, lowest_perf); - return; + goto skip_test; } + cpufreq_cpu_put(policy); } amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; + return; +skip_test: + cpufreq_cpu_put(policy); } /* @@ -212,14 +216,14 @@ static void amd_pstate_ut_check_freq(u32 index) pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", __func__, cpu, cpudata->max_freq, cpudata->nominal_freq, cpudata->lowest_nonlinear_freq, cpudata->min_freq); - return; + goto skip_test; } if (cpudata->min_freq != policy->min) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d cpudata_min_freq=%d policy_min=%d, they should be equal!\n", __func__, cpu, cpudata->min_freq, policy->min); - return; + goto skip_test; } if (cpudata->boost_supported) { @@ -231,16 +235,20 @@ static void amd_pstate_ut_check_freq(u32 index) pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", __func__, cpu, policy->max, cpudata->max_freq, cpudata->nominal_freq); - return; + goto skip_test; } } else { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d must support boost!\n", __func__, cpu); - return; + goto skip_test; } + cpufreq_cpu_put(policy); } amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; + return; +skip_test: + cpufreq_cpu_put(policy); } static int __init amd_pstate_ut_init(void) From 862c74a56d56a31cb75a15bdd1b6c82e752d4c51 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 21 Aug 2023 09:39:13 +0200 Subject: [PATCH 69/74] dt-bindings: cpufreq: qcom-hw: add a 4th frequency domain On new platforms, a 4th frequency domain is used, document it. Signed-off-by: Neil Armstrong Acked-by: Manivannan Sadhasivam Acked-by: Rob Herring Signed-off-by: Viresh Kumar --- .../devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index a6b3bb8fdf33..c1d225fcf2d5 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -49,6 +49,7 @@ properties: - description: Frequency domain 0 register region - description: Frequency domain 1 register region - description: Frequency domain 2 register region + - description: Frequency domain 3 register region reg-names: minItems: 1 @@ -56,6 +57,7 @@ properties: - const: freq-domain0 - const: freq-domain1 - const: freq-domain2 + - const: freq-domain3 clocks: items: @@ -69,7 +71,7 @@ properties: interrupts: minItems: 1 - maxItems: 3 + maxItems: 4 interrupt-names: minItems: 1 @@ -77,6 +79,7 @@ properties: - const: dcvsh-irq-0 - const: dcvsh-irq-1 - const: dcvsh-irq-2 + - const: dcvsh-irq-3 '#freq-domain-cells': const: 1 From 5f19d0969a9a29cff7fbbb0f758c9246e3f6beaf Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 21 Aug 2023 09:39:14 +0200 Subject: [PATCH 70/74] cpufreq: qcom-cpufreq-hw: add support for 4 freq domains Add support for up to 4 frequency domains as used on new platforms. Signed-off-by: Neil Armstrong Acked-by: Konrad Dybcio Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index f24cf2eddf1e..70b0f21968a0 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -28,7 +28,7 @@ #define GT_IRQ_STATUS BIT(2) -#define MAX_FREQ_DOMAINS 3 +#define MAX_FREQ_DOMAINS 4 struct qcom_cpufreq_soc_data { u32 reg_enable; From 78aabcb3211aa4b7e8f8a1d5d4bdad699a42d4ba Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 22 Aug 2023 13:28:02 +0200 Subject: [PATCH 71/74] cpuidle: teo: Avoid unnecessary variable assignments Notice that it is not necessary to assign tick_intercept_sum in every iteration of the first loop over idle states in teo_select(), because the intercept_sum value does not change after the assignment in a given iteration of the loop, so its value after the last iteration of the loop can be used for computing the tick_intercept_sum value directly. Modify the code accordingly. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/teo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 31050fdc633f..7244f71c59c5 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -450,8 +450,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, hit_sum += prev_bin->hits; recent_sum += prev_bin->recent; - tick_intercept_sum = intercept_sum; - if (dev->states_usage[i].disable) continue; @@ -484,7 +482,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, goto end; } - tick_intercept_sum += cpu_data->state_bins[drv->state_count-1].intercepts; + tick_intercept_sum = intercept_sum + + cpu_data->state_bins[drv->state_count-1].intercepts; /* * If the sum of the intercepts metric for all of the idle states From a3aa97be69a7cc14ddc2bb0add0b9c51cb74bf83 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 25 Aug 2023 16:49:20 +0530 Subject: [PATCH 72/74] cpufreq: tegra194: add online/offline hooks Implement the light-weight tear down and bring up helpers to reduce the amount of work to do on CPU offline/online operation. This change helps to make the hotplugging paths much faster. Suggested-by: Viresh Kumar Signed-off-by: Sumit Gupta Link: https://lore.kernel.org/lkml/20230816033402.3abmugb5goypvllm@vireshk-i7/ [ Viresh: Fixed rebase conflict ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra194-cpufreq.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c index c90b30469165..88df7e5a0573 100644 --- a/drivers/cpufreq/tegra194-cpufreq.c +++ b/drivers/cpufreq/tegra194-cpufreq.c @@ -508,6 +508,21 @@ static int tegra194_cpufreq_init(struct cpufreq_policy *policy) return 0; } +static int tegra194_cpufreq_online(struct cpufreq_policy *policy) +{ + /* We did light-weight tear down earlier, nothing to do here */ + return 0; +} + +static int tegra194_cpufreq_offline(struct cpufreq_policy *policy) +{ + /* + * Preserve policy->driver_data and don't free resources on light-weight + * tear down. + */ + return 0; +} + static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) { @@ -535,6 +550,8 @@ static struct cpufreq_driver tegra194_cpufreq_driver = { .target_index = tegra194_cpufreq_set_target, .get = tegra194_get_speed, .init = tegra194_cpufreq_init, + .online = tegra194_cpufreq_online, + .offline = tegra194_cpufreq_offline, .attr = cpufreq_generic_attr, }; From 03997da042dac73c69e60d91942c727c76828b65 Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Sat, 26 Aug 2023 09:51:13 +0000 Subject: [PATCH 73/74] cpufreq: powernow-k8: Use related_cpus instead of cpus in driver.exit() Since the 'cpus' field of policy structure will become empty in the cpufreq core API, it is better to use 'related_cpus' in the exit() callback of driver. Fixes: c3274763bfc3 ("cpufreq: powernow-k8: Initialize per-cpu data-structures properly") Signed-off-by: Liao Chang Signed-off-by: Viresh Kumar --- drivers/cpufreq/powernow-k8.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index d289036beff2..b10f7a1b77f1 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -1101,7 +1101,8 @@ static int powernowk8_cpu_exit(struct cpufreq_policy *pol) kfree(data->powernow_table); kfree(data); - for_each_cpu(cpu, pol->cpus) + /* pol->cpus will be empty here, use related_cpus instead. */ + for_each_cpu(cpu, pol->related_cpus) per_cpu(powernow_data, cpu) = NULL; return 0; From de0e85b29edfc68046d587c7d67bbd2bdc31b73f Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 25 Aug 2023 16:46:17 +0530 Subject: [PATCH 74/74] cpufreq: tegra194: remove opp table in exit hook Add exit hook and remove OPP table when the device gets unregistered. This will fix the error messages when the CPU FREQ driver module is removed and then re-inserted. It also fixes these messages while onlining the first CPU from a policy whose all CPU's were previously offlined. debugfs: File 'cpu5' in directory 'opp' already present! debugfs: File 'cpu6' in directory 'opp' already present! debugfs: File 'cpu7' in directory 'opp' already present! Fixes: f41e1442ac5b ("cpufreq: tegra194: add OPP support and set bandwidth") Signed-off-by: Sumit Gupta [ Viresh: Dropped irrelevant change from it ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra194-cpufreq.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c index 88df7e5a0573..88ef5e57ccd0 100644 --- a/drivers/cpufreq/tegra194-cpufreq.c +++ b/drivers/cpufreq/tegra194-cpufreq.c @@ -520,6 +520,17 @@ static int tegra194_cpufreq_offline(struct cpufreq_policy *policy) * Preserve policy->driver_data and don't free resources on light-weight * tear down. */ + + return 0; +} + +static int tegra194_cpufreq_exit(struct cpufreq_policy *policy) +{ + struct device *cpu_dev = get_cpu_device(policy->cpu); + + dev_pm_opp_remove_all_dynamic(cpu_dev); + dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); + return 0; } @@ -550,6 +561,7 @@ static struct cpufreq_driver tegra194_cpufreq_driver = { .target_index = tegra194_cpufreq_set_target, .get = tegra194_get_speed, .init = tegra194_cpufreq_init, + .exit = tegra194_cpufreq_exit, .online = tegra194_cpufreq_online, .offline = tegra194_cpufreq_offline, .attr = cpufreq_generic_attr,