From 52352558d288c3dd5aa40f3bbf736fc005e17f7c Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Fri, 1 May 2015 10:34:00 +0200 Subject: [PATCH 01/37] cpufreq: pxa: replace typedef pxa_freqs_t by structure typedef is not really useful here. Replace it by structure to improve readability. typedef should only be used in some cases. (See Documentation/CodingStyle Chapter 5 for details). Signed-off-by: Fabian Frederick Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/pxa2xx-cpufreq.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c index e24269ab4e9b..fcf6e342da4c 100644 --- a/drivers/cpufreq/pxa2xx-cpufreq.c +++ b/drivers/cpufreq/pxa2xx-cpufreq.c @@ -56,7 +56,7 @@ module_param(pxa27x_maxfreq, uint, 0); MODULE_PARM_DESC(pxa27x_maxfreq, "Set the pxa27x maxfreq in MHz" "(typically 624=>pxa270, 416=>pxa271, 520=>pxa272)"); -typedef struct { +struct pxa_freqs { unsigned int khz; unsigned int membus; unsigned int cccr; @@ -64,7 +64,7 @@ typedef struct { unsigned int cclkcfg; int vmin; int vmax; -} pxa_freqs_t; +}; /* Define the refresh period in mSec for the SDRAM and the number of rows */ #define SDRAM_TREF 64 /* standard 64ms SDRAM */ @@ -86,7 +86,7 @@ static unsigned int sdram_rows; /* Use the run mode frequencies for the CPUFREQ_POLICY_PERFORMANCE policy */ #define CCLKCFG CCLKCFG_TURBO | CCLKCFG_FCS -static pxa_freqs_t pxa255_run_freqs[] = +static struct pxa_freqs pxa255_run_freqs[] = { /* CPU MEMBUS CCCR DIV2 CCLKCFG run turbo PXbus SDRAM */ { 99500, 99500, 0x121, 1, CCLKCFG, -1, -1}, /* 99, 99, 50, 50 */ @@ -98,7 +98,7 @@ static pxa_freqs_t pxa255_run_freqs[] = }; /* Use the turbo mode frequencies for the CPUFREQ_POLICY_POWERSAVE policy */ -static pxa_freqs_t pxa255_turbo_freqs[] = +static struct pxa_freqs pxa255_turbo_freqs[] = { /* CPU MEMBUS CCCR DIV2 CCLKCFG run turbo PXbus SDRAM */ { 99500, 99500, 0x121, 1, CCLKCFG, -1, -1}, /* 99, 99, 50, 50 */ @@ -153,7 +153,7 @@ MODULE_PARM_DESC(pxa255_turbo_table, "Selects the frequency table (0 = run table ((HT) ? CCLKCFG_HALFTURBO : 0) | \ ((T) ? CCLKCFG_TURBO : 0)) -static pxa_freqs_t pxa27x_freqs[] = { +static struct pxa_freqs pxa27x_freqs[] = { {104000, 104000, PXA27x_CCCR(1, 8, 2), 0, CCLKCFG2(1, 0, 1), 900000, 1705000 }, {156000, 104000, PXA27x_CCCR(1, 8, 3), 0, CCLKCFG2(1, 0, 1), 1000000, 1705000 }, {208000, 208000, PXA27x_CCCR(0, 16, 2), 1, CCLKCFG2(0, 0, 1), 1180000, 1705000 }, @@ -171,7 +171,7 @@ extern unsigned get_clk_frequency_khz(int info); #ifdef CONFIG_REGULATOR -static int pxa_cpufreq_change_voltage(pxa_freqs_t *pxa_freq) +static int pxa_cpufreq_change_voltage(struct pxa_freqs *pxa_freq) { int ret = 0; int vmin, vmax; @@ -202,7 +202,7 @@ static void __init pxa_cpufreq_init_voltages(void) } } #else -static int pxa_cpufreq_change_voltage(pxa_freqs_t *pxa_freq) +static int pxa_cpufreq_change_voltage(struct pxa_freqs *pxa_freq) { return 0; } @@ -211,7 +211,7 @@ static void __init pxa_cpufreq_init_voltages(void) { } #endif static void find_freq_tables(struct cpufreq_frequency_table **freq_table, - pxa_freqs_t **pxa_freqs) + struct pxa_freqs **pxa_freqs) { if (cpu_is_pxa25x()) { if (!pxa255_turbo_table) { @@ -270,7 +270,7 @@ static unsigned int pxa_cpufreq_get(unsigned int cpu) static int pxa_set_target(struct cpufreq_policy *policy, unsigned int idx) { struct cpufreq_frequency_table *pxa_freqs_table; - pxa_freqs_t *pxa_freq_settings; + struct pxa_freqs *pxa_freq_settings; unsigned long flags; unsigned int new_freq_cpu, new_freq_mem; unsigned int unused, preset_mdrefr, postset_mdrefr, cclkcfg; @@ -361,7 +361,7 @@ static int pxa_cpufreq_init(struct cpufreq_policy *policy) int i; unsigned int freq; struct cpufreq_frequency_table *pxa255_freq_table; - pxa_freqs_t *pxa255_freqs; + struct pxa_freqs *pxa255_freqs; /* try to guess pxa27x cpu */ if (cpu_is_pxa27x()) From 03c229906311f3b7232ce134fdd6405288780ed3 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Fri, 1 May 2015 10:34:01 +0200 Subject: [PATCH 02/37] cpufreq: pxa: make pxa_freqs arrays const pxa255_run_freqs and pxa255_turbo_freqs are only read. This patch updates arrays declaration, find_freq_tables() and its callsites. Suggested-by: Joe Perches Signed-off-by: Fabian Frederick Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/pxa2xx-cpufreq.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c index fcf6e342da4c..1d99c97defa9 100644 --- a/drivers/cpufreq/pxa2xx-cpufreq.c +++ b/drivers/cpufreq/pxa2xx-cpufreq.c @@ -86,7 +86,7 @@ static unsigned int sdram_rows; /* Use the run mode frequencies for the CPUFREQ_POLICY_PERFORMANCE policy */ #define CCLKCFG CCLKCFG_TURBO | CCLKCFG_FCS -static struct pxa_freqs pxa255_run_freqs[] = +static const struct pxa_freqs pxa255_run_freqs[] = { /* CPU MEMBUS CCCR DIV2 CCLKCFG run turbo PXbus SDRAM */ { 99500, 99500, 0x121, 1, CCLKCFG, -1, -1}, /* 99, 99, 50, 50 */ @@ -98,7 +98,7 @@ static struct pxa_freqs pxa255_run_freqs[] = }; /* Use the turbo mode frequencies for the CPUFREQ_POLICY_POWERSAVE policy */ -static struct pxa_freqs pxa255_turbo_freqs[] = +static const struct pxa_freqs pxa255_turbo_freqs[] = { /* CPU MEMBUS CCCR DIV2 CCLKCFG run turbo PXbus SDRAM */ { 99500, 99500, 0x121, 1, CCLKCFG, -1, -1}, /* 99, 99, 50, 50 */ @@ -171,7 +171,7 @@ extern unsigned get_clk_frequency_khz(int info); #ifdef CONFIG_REGULATOR -static int pxa_cpufreq_change_voltage(struct pxa_freqs *pxa_freq) +static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq) { int ret = 0; int vmin, vmax; @@ -211,7 +211,7 @@ static void __init pxa_cpufreq_init_voltages(void) { } #endif static void find_freq_tables(struct cpufreq_frequency_table **freq_table, - struct pxa_freqs **pxa_freqs) + const struct pxa_freqs **pxa_freqs) { if (cpu_is_pxa25x()) { if (!pxa255_turbo_table) { @@ -270,7 +270,7 @@ static unsigned int pxa_cpufreq_get(unsigned int cpu) static int pxa_set_target(struct cpufreq_policy *policy, unsigned int idx) { struct cpufreq_frequency_table *pxa_freqs_table; - struct pxa_freqs *pxa_freq_settings; + const struct pxa_freqs *pxa_freq_settings; unsigned long flags; unsigned int new_freq_cpu, new_freq_mem; unsigned int unused, preset_mdrefr, postset_mdrefr, cclkcfg; @@ -361,7 +361,7 @@ static int pxa_cpufreq_init(struct cpufreq_policy *policy) int i; unsigned int freq; struct cpufreq_frequency_table *pxa255_freq_table; - struct pxa_freqs *pxa255_freqs; + const struct pxa_freqs *pxa255_freqs; /* try to guess pxa27x cpu */ if (cpu_is_pxa27x()) From 0a95e630b49a30c176daeff39ac2e90f1231604b Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 27 Apr 2015 10:51:05 +0100 Subject: [PATCH 03/37] cpufreq: arm_big_little: check if the frequency is set correctly The actual frequency is set through "clk_change_rate" which is void function. If the underlying hardware fails and returns error, the error is lost in the clk layer. In order to track such failures, we need to read back the frequency(just the cached value as clk_recalc called after clk->ops->set_rate gets the frequency) This patch adds check to see if the frequency is set correctly or if they were any hardware failures and sends the appropriate errors to the cpufreq core. Reviewed-by: Michael Turquette Signed-off-by: Sudeep Holla Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/arm_big_little.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index e1a6ba66a7f5..f65e19f340d0 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -186,6 +186,15 @@ bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate) mutex_unlock(&cluster_lock[old_cluster]); } + /* + * FIXME: clk_set_rate has to handle the case where clk_change_rate + * can fail due to hardware or firmware issues. Until the clk core + * layer is fixed, we can check here. In most of the cases we will + * be reading only the cached value anyway. This needs to be removed + * once clk core is fixed. + */ + if (bL_cpufreq_get_rate(cpu) != new_rate) + return -EIO; return 0; } From b904f5cce1aeb9a9ee5ca7f1a31c32e1f3487c8b Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 27 Apr 2015 10:51:06 +0100 Subject: [PATCH 04/37] cpufreq: arm_big_little: remove unused cpu-cluster. clock name The "cpu-cluster." used to get the cluster clock is not used by any platform. Moreover __of_clk_get_by_name used in clk_get return error if the "clock-names" in the DT doesn't match this string. When using DT, it's not compulsory to specify the clock name unless there are multiple clock input entries in the consumer. This patch removes the unused clock string from the driver. Acked-by: Viresh Kumar Signed-off-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/arm_big_little.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index f65e19f340d0..e4d75ca9f3b0 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -331,7 +331,6 @@ static void put_cluster_clk_and_freq_table(struct device *cpu_dev) static int _get_cluster_clk_and_freq_table(struct device *cpu_dev) { u32 cluster = raw_cpu_to_cluster(cpu_dev->id); - char name[14] = "cpu-cluster."; int ret; if (freq_table[cluster]) @@ -351,8 +350,7 @@ static int _get_cluster_clk_and_freq_table(struct device *cpu_dev) goto free_opp_table; } - name[12] = cluster + '0'; - clk[cluster] = clk_get(cpu_dev, name); + clk[cluster] = clk_get(cpu_dev, NULL); if (!IS_ERR(clk[cluster])) { dev_dbg(cpu_dev, "%s: clk: %p & freq table: %p, cluster: %d\n", __func__, clk[cluster], freq_table[cluster], From 4055fad34086dcf5229c43846e0a3cf0fb3692e3 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Sat, 11 Apr 2015 21:10:26 -0700 Subject: [PATCH 05/37] intel_pstate: Add tsc collection and keep previous target pstate The intel_pstate driver is difficult to debug and investigate without tsc. Also, it is likely use of tsc, and some version of C0 percentage, will be re-introdcued in futute. There have also been occasions where it is desirebale to know, and confirm, the previous target pstate. This patch brings back tsc, adds previous target pstate, and adds both to the trace data collection. Signed-off-by: Doug Smythies Acked-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 31 +++++++++++++++++++++---------- include/trace/events/power.h | 25 +++++++++++++++++-------- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6414661ac1c4..e833be4fd903 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -68,6 +68,7 @@ struct sample { int32_t core_pct_busy; u64 aperf; u64 mperf; + u64 tsc; int freq; ktime_t time; }; @@ -109,6 +110,7 @@ struct cpudata { ktime_t last_sample_time; u64 prev_aperf; u64 prev_mperf; + u64 prev_tsc; struct sample sample; }; @@ -756,23 +758,28 @@ static inline void intel_pstate_sample(struct cpudata *cpu) { u64 aperf, mperf; unsigned long flags; + u64 tsc; local_irq_save(flags); rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); + tsc = native_read_tsc(); local_irq_restore(flags); cpu->last_sample_time = cpu->sample.time; cpu->sample.time = ktime_get(); cpu->sample.aperf = aperf; cpu->sample.mperf = mperf; + cpu->sample.tsc = tsc; cpu->sample.aperf -= cpu->prev_aperf; cpu->sample.mperf -= cpu->prev_mperf; + cpu->sample.tsc -= cpu->prev_tsc; intel_pstate_calc_busy(cpu); cpu->prev_aperf = aperf; cpu->prev_mperf = mperf; + cpu->prev_tsc = tsc; } static inline void intel_hwp_set_sample_time(struct cpudata *cpu) @@ -837,6 +844,10 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) int32_t busy_scaled; struct _pid *pid; signed int ctl; + int from; + struct sample *sample; + + from = cpu->pstate.current_pstate; pid = &cpu->pid; busy_scaled = intel_pstate_get_scaled_busy(cpu); @@ -845,6 +856,16 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) /* Negative values of ctl increase the pstate and vice versa */ intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl); + + sample = &cpu->sample; + trace_pstate_sample(fp_toint(sample->core_pct_busy), + fp_toint(busy_scaled), + from, + cpu->pstate.current_pstate, + sample->mperf, + sample->aperf, + sample->tsc, + sample->freq); } static void intel_hwp_timer_func(unsigned long __data) @@ -858,21 +879,11 @@ static void intel_hwp_timer_func(unsigned long __data) static void intel_pstate_timer_func(unsigned long __data) { struct cpudata *cpu = (struct cpudata *) __data; - struct sample *sample; intel_pstate_sample(cpu); - sample = &cpu->sample; - intel_pstate_adjust_busy_pstate(cpu); - trace_pstate_sample(fp_toint(sample->core_pct_busy), - fp_toint(intel_pstate_get_scaled_busy(cpu)), - cpu->pstate.current_pstate, - sample->mperf, - sample->aperf, - sample->freq); - intel_pstate_set_sample_time(cpu); } diff --git a/include/trace/events/power.h b/include/trace/events/power.h index d19840b0cac8..630d1e5e4de0 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -42,45 +42,54 @@ TRACE_EVENT(pstate_sample, TP_PROTO(u32 core_busy, u32 scaled_busy, - u32 state, + u32 from, + u32 to, u64 mperf, u64 aperf, + u64 tsc, u32 freq ), TP_ARGS(core_busy, scaled_busy, - state, + from, + to, mperf, aperf, + tsc, freq ), TP_STRUCT__entry( __field(u32, core_busy) __field(u32, scaled_busy) - __field(u32, state) + __field(u32, from) + __field(u32, to) __field(u64, mperf) __field(u64, aperf) + __field(u64, tsc) __field(u32, freq) - - ), + ), TP_fast_assign( __entry->core_busy = core_busy; __entry->scaled_busy = scaled_busy; - __entry->state = state; + __entry->from = from; + __entry->to = to; __entry->mperf = mperf; __entry->aperf = aperf; + __entry->tsc = tsc; __entry->freq = freq; ), - TP_printk("core_busy=%lu scaled=%lu state=%lu mperf=%llu aperf=%llu freq=%lu ", + TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu ", (unsigned long)__entry->core_busy, (unsigned long)__entry->scaled_busy, - (unsigned long)__entry->state, + (unsigned long)__entry->from, + (unsigned long)__entry->to, (unsigned long long)__entry->mperf, (unsigned long long)__entry->aperf, + (unsigned long long)__entry->tsc, (unsigned long)__entry->freq ) From f133d08a3986efeff24069616ac739fc83c8ec9f Mon Sep 17 00:00:00 2001 From: Wang Long Date: Tue, 5 May 2015 01:22:26 +0000 Subject: [PATCH 06/37] Documentation: cpufreq: delete duplicate description of sysfs interface 'scaling_driver' The file 'Documentation/cpu-freq/user-guide.txt' has duplicate description of sysfs interface 'scaling_driver'. [first] scaling_driver : this file shows what cpufreq driver is used to set the frequency on this CPU [second] scaling_driver : Hardware driver for cpufreq. Although this does not affect anything, I think we should only have one. so delete the second one because the first one is described in more detail. Signed-off-by: Wang Long Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/user-guide.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index ff2f28332cc4..109e97bbab77 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -196,8 +196,6 @@ affected_cpus : List of Online CPUs that require software related_cpus : List of Online + Offline CPUs that need software coordination of frequency. -scaling_driver : Hardware driver for cpufreq. - scaling_cur_freq : Current frequency of the CPU as determined by the governor and cpufreq core, in KHz. This is the frequency the kernel thinks the CPU runs From 50e9c852130e86b10e9098227d4153b2bf997611 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 19 Feb 2015 17:02:03 +0530 Subject: [PATCH 07/37] cpufreq: Add doc style comment about cpufreq_cpu_{get|put}() This clearly states what the code inside these routines is doing and how these must be used. Signed-off-by: Viresh Kumar Acked-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8ae655c364f4..5ff57b0c2f47 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -207,6 +207,23 @@ struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu) return per_cpu(cpufreq_cpu_data, cpu); } +/** + * cpufreq_cpu_get: returns policy for a cpu and marks it busy. + * + * @cpu: cpu to find policy for. + * + * This returns policy for 'cpu', returns NULL if it doesn't exist. + * It also increments the kobject reference count to mark it busy and so would + * require a corresponding call to cpufreq_cpu_put() to decrement it back. + * If corresponding call cpufreq_cpu_put() isn't made, the policy wouldn't be + * freed as that depends on the kobj count. + * + * It also takes a read-lock of 'cpufreq_rwsem' and doesn't put it back if a + * valid policy is found. This is done to make sure the driver doesn't get + * unregistered while the policy is being used. + * + * Return: A valid policy on success, otherwise NULL on failure. + */ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) { struct cpufreq_policy *policy = NULL; @@ -237,6 +254,16 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) } EXPORT_SYMBOL_GPL(cpufreq_cpu_get); +/** + * cpufreq_cpu_put: Decrements the usage count of a policy + * + * @policy: policy earlier returned by cpufreq_cpu_get(). + * + * This decrements the kobject reference count incremented earlier by calling + * cpufreq_cpu_get(). + * + * It also drops the read-lock of 'cpufreq_rwsem' taken at cpufreq_cpu_get(). + */ void cpufreq_cpu_put(struct cpufreq_policy *policy) { kobject_put(&policy->kobj); From 23faf0b7430f1f19cf033f4ee1588ab86a026b60 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 19 Feb 2015 17:02:04 +0530 Subject: [PATCH 08/37] cpufreq: Merge __cpufreq_add_dev() and cpufreq_add_dev() cpufreq_add_dev() is an unnecessary wrapper over __cpufreq_add_dev(). Merge them. Signed-off-by: Viresh Kumar Acked-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5ff57b0c2f47..ee3d920ae207 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1125,7 +1125,16 @@ static int update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu, return 0; } -static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) +/** + * cpufreq_add_dev - add a CPU device + * + * Adds the cpufreq interface for a CPU device. + * + * The Oracle says: try running cpufreq registration/unregistration concurrently + * with with cpu hotplugging and all hell will break loose. Tried to clean this + * mess up, but more thorough testing is needed. - Mathieu + */ +static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) { unsigned int j, cpu = dev->id; int ret = -ENOMEM; @@ -1336,20 +1345,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return ret; } -/** - * cpufreq_add_dev - add a CPU device - * - * Adds the cpufreq interface for a CPU device. - * - * The Oracle says: try running cpufreq registration/unregistration concurrently - * with with cpu hotplugging and all hell will break loose. Tried to clean this - * mess up, but more thorough testing is needed. - Mathieu - */ -static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) -{ - return __cpufreq_add_dev(dev, sif); -} - static int __cpufreq_remove_dev_prepare(struct device *dev, struct subsys_interface *sif) { @@ -2331,7 +2326,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, if (dev) { switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - __cpufreq_add_dev(dev, NULL); + cpufreq_add_dev(dev, NULL); break; case CPU_DOWN_PREPARE: @@ -2343,7 +2338,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_FAILED: - __cpufreq_add_dev(dev, NULL); + cpufreq_add_dev(dev, NULL); break; } } From 1b947c904c4831d787c1f17a6a6cd40c7144ba85 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 19 Feb 2015 17:02:05 +0530 Subject: [PATCH 09/37] cpufreq: Throw warning when we try to get policy for an invalid CPU Simply returning here with an error is not enough. It shouldn't be allowed at all to try calling cpufreq_cpu_get() for an invalid CPU. Add a WARN here to make it clear that it wouldn't be acceptable at all. Signed-off-by: Viresh Kumar Acked-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index ee3d920ae207..48ca0764eb52 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -229,7 +229,7 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) struct cpufreq_policy *policy = NULL; unsigned long flags; - if (cpu >= nr_cpu_ids) + if (WARN_ON(cpu >= nr_cpu_ids)) return NULL; if (!down_read_trylock(&cpufreq_rwsem)) From bb29ae152e3b9d79ca35e41945a55b616e91b5b9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 19 Feb 2015 17:02:06 +0530 Subject: [PATCH 10/37] cpufreq: Keep a single path for adding managed CPUs There are two cases when we may try to add CPUs we're already handling: - On boot, the first cpu has marked all policy->cpus managed and so we will find policy for all other policy->cpus later on. - When a managed cpu is hotplugged out and later brought back in. Currently, separate paths and checks take care of the two. While the first one is detected by testing cpu against 'policy->cpus', the other one is detected by testing cpu against 'policy->related_cpus'. We can handle them both via a single path and there is no need to do special checking for the first one. Signed-off-by: Viresh Kumar Acked-by: Saravana Kannan [ rjw: Changelog, comments ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 48ca0764eb52..497935a93614 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -992,6 +992,10 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, int ret = 0; unsigned long flags; + /* Has this CPU been taken care of already? */ + if (cpumask_test_cpu(cpu, policy->cpus)) + return 0; + if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) { @@ -1147,16 +1151,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) pr_debug("adding CPU %u\n", cpu); - /* check whether a different CPU already registered this - * CPU because it is in the same boat. */ - policy = cpufreq_cpu_get_raw(cpu); - if (unlikely(policy)) - return 0; - if (!down_read_trylock(&cpufreq_rwsem)) return 0; - /* Check if this cpu was hot-unplugged earlier and has siblings */ + /* Check if this CPU already has a policy to manage it */ read_lock_irqsave(&cpufreq_driver_lock, flags); for_each_policy(policy) { if (cpumask_test_cpu(cpu, policy->related_cpus)) { From 303ae7230751219f43115615a2b79669f8ba53cf Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 19 Feb 2015 17:02:07 +0530 Subject: [PATCH 11/37] cpufreq: Clear policy->cpus even for the last CPU We clear policy->cpus mask while CPUs are hotplugged out. We do it for all CPUs except the last CPU of the policy. I don't remember what the rationale behind that was, but I couldn't think of anything that will break if we remove this conditional clearing and always clear policy->cpus. The benefit we get out of it is, we can know if a policy is active or not by checking if this field is empty or not. That will be used by later commits. Signed-off-by: Viresh Kumar Acked-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 497935a93614..8cf0c0e7aea8 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1430,9 +1430,7 @@ static int __cpufreq_remove_dev_finish(struct device *dev, down_write(&policy->rwsem); cpus = cpumask_weight(policy->cpus); - - if (cpus > 1) - cpumask_clear_cpu(cpu, policy->cpus); + cpumask_clear_cpu(cpu, policy->cpus); up_write(&policy->rwsem); /* If cpu is last user of policy, free policy */ From 0dd23f94251f49da99a6cbfb22418b2d757d77d6 Mon Sep 17 00:00:00 2001 From: Joe Konno Date: Tue, 12 May 2015 07:59:42 -0700 Subject: [PATCH 12/37] intel_pstate: set BYT MSR with wrmsrl_on_cpu() Commit 007bea098b86 (intel_pstate: Add setting voltage value for baytrail P states.) introduced byt_set_pstate() with the assumption that it would always be run by the CPU whose MSR is to be written by it. It turns out, however, that is not always the case in practice, so modify byt_set_pstate() to enforce the MSR write done by it to always happen on the right CPU. Fixes: 007bea098b86 (intel_pstate: Add setting voltage value for baytrail P states.) Signed-off-by: Joe Konno Acked-by: Kristen Carlson Accardi Cc: 3.14+ # 3.14+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e833be4fd903..2f329b45eacd 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -537,7 +537,7 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate) val |= vid; - wrmsrl(MSR_IA32_PERF_CTL, val); + wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); } #define BYT_BCLK_FREQS 5 From 147301450248d51c5913aff8476cd326f1de557e Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 13 May 2015 13:35:52 +0100 Subject: [PATCH 13/37] cpufreq: arm_big_little: remove compile-time dependency on BIG_LITTLE With the addition of switcher code, there's compile-time dependency on BIG_LITTLE to get arm_big_little driver compiling on ARM64. Since ARM64 will never add support for bL switcher, it's better to remove the dependency so that the driver can be reused on ARM64 platforms. This patch adds stubs to remove BIG_LITTLE dependency in the driver. Signed-off-by: Sudeep Holla Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 2 +- drivers/cpufreq/arm_big_little.c | 27 ++++++++++++++++++++++----- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 4f3dbc8cf729..611cb09239eb 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -5,7 +5,7 @@ # big LITTLE core layer and glue drivers config ARM_BIG_LITTLE_CPUFREQ tristate "Generic ARM big LITTLE CPUfreq driver" - depends on ARM && BIG_LITTLE && ARM_CPU_TOPOLOGY && HAVE_CLK + depends on (ARM_CPU_TOPOLOGY || ARM64) && HAVE_CLK select PM_OPP help This enables the Generic CPUfreq driver for ARM big.LITTLE platforms. diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index e4d75ca9f3b0..f1e42f8ce0fc 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -31,7 +31,6 @@ #include #include #include -#include #include "arm_big_little.h" @@ -41,12 +40,16 @@ #define MAX_CLUSTERS 2 #ifdef CONFIG_BL_SWITCHER +#include static bool bL_switching_enabled; #define is_bL_switching_enabled() bL_switching_enabled #define set_switching_enabled(x) (bL_switching_enabled = (x)) #else #define is_bL_switching_enabled() false #define set_switching_enabled(x) do { } while (0) +#define bL_switch_request(...) do { } while (0) +#define bL_switcher_put_enabled() do { } while (0) +#define bL_switcher_get_enabled() do { } while (0) #endif #define ACTUAL_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq << 1 : freq) @@ -513,6 +516,7 @@ static struct cpufreq_driver bL_cpufreq_driver = { .attr = cpufreq_generic_attr, }; +#ifdef CONFIG_BL_SWITCHER static int bL_cpufreq_switcher_notifier(struct notifier_block *nfb, unsigned long action, void *_arg) { @@ -545,6 +549,20 @@ static struct notifier_block bL_switcher_notifier = { .notifier_call = bL_cpufreq_switcher_notifier, }; +static int __bLs_register_notifier(void) +{ + return bL_switcher_register_notifier(&bL_switcher_notifier); +} + +static int __bLs_unregister_notifier(void) +{ + return bL_switcher_unregister_notifier(&bL_switcher_notifier); +} +#else +static int __bLs_register_notifier(void) { return 0; } +static int __bLs_unregister_notifier(void) { return 0; } +#endif + int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops) { int ret, i; @@ -562,8 +580,7 @@ int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops) arm_bL_ops = ops; - ret = bL_switcher_get_enabled(); - set_switching_enabled(ret); + set_switching_enabled(bL_switcher_get_enabled()); for (i = 0; i < MAX_CLUSTERS; i++) mutex_init(&cluster_lock[i]); @@ -574,7 +591,7 @@ int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops) __func__, ops->name, ret); arm_bL_ops = NULL; } else { - ret = bL_switcher_register_notifier(&bL_switcher_notifier); + ret = __bLs_register_notifier(); if (ret) { cpufreq_unregister_driver(&bL_cpufreq_driver); arm_bL_ops = NULL; @@ -598,7 +615,7 @@ void bL_cpufreq_unregister(struct cpufreq_arm_bL_ops *ops) } bL_switcher_get_enabled(); - bL_switcher_unregister_notifier(&bL_switcher_notifier); + __bLs_unregister_notifier(); cpufreq_unregister_driver(&bL_cpufreq_driver); bL_switcher_put_enabled(); pr_info("%s: Un-registered platform driver: %s\n", __func__, From f963735a3ca388da4893fc2d463eca6b58667add Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 12 May 2015 12:20:11 +0530 Subject: [PATCH 14/37] cpufreq: Create for_each_{in}active_policy() policy->cpus is cleared unconditionally now on hotplug-out of a CPU and it can be checked to know if a policy is active or not. Create helper routines to iterate over all active/inactive policies, based on policy->cpus field. Replace all instances of for_each_policy() with for_each_active_policy() to make them iterate only for active policies. (We haven't made changes yet to keep inactive policies in the same list, but that will be followed in a later patch). Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 66 ++++++++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8cf0c0e7aea8..74d9fcbbe4f9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -31,10 +31,62 @@ #include #include -/* Macros to iterate over lists */ -/* Iterate over online CPUs policies */ static LIST_HEAD(cpufreq_policy_list); -#define for_each_policy(__policy) \ + +static inline bool policy_is_inactive(struct cpufreq_policy *policy) +{ + return cpumask_empty(policy->cpus); +} + +static bool suitable_policy(struct cpufreq_policy *policy, bool active) +{ + return active == !policy_is_inactive(policy); +} + +/* Finds Next Acive/Inactive policy */ +static struct cpufreq_policy *next_policy(struct cpufreq_policy *policy, + bool active) +{ + do { + policy = list_next_entry(policy, policy_list); + + /* No more policies in the list */ + if (&policy->policy_list == &cpufreq_policy_list) + return NULL; + } while (!suitable_policy(policy, active)); + + return policy; +} + +static struct cpufreq_policy *first_policy(bool active) +{ + struct cpufreq_policy *policy; + + /* No policies in the list */ + if (list_empty(&cpufreq_policy_list)) + return NULL; + + policy = list_first_entry(&cpufreq_policy_list, typeof(*policy), + policy_list); + + if (!suitable_policy(policy, active)) + policy = next_policy(policy, active); + + return policy; +} + +/* Macros to iterate over CPU policies */ +#define for_each_suitable_policy(__policy, __active) \ + for (__policy = first_policy(__active); \ + __policy; \ + __policy = next_policy(__policy, __active)) + +#define for_each_active_policy(__policy) \ + for_each_suitable_policy(__policy, true) +#define for_each_inactive_policy(__policy) \ + for_each_suitable_policy(__policy, false) + +#define for_each_policy(__policy) \ list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) /* Iterate over governors */ @@ -1156,7 +1208,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* Check if this CPU already has a policy to manage it */ read_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_policy(policy) { + for_each_active_policy(policy) { if (cpumask_test_cpu(cpu, policy->related_cpus)) { read_unlock_irqrestore(&cpufreq_driver_lock, flags); ret = cpufreq_add_policy_cpu(policy, cpu, dev); @@ -1674,7 +1726,7 @@ void cpufreq_suspend(void) pr_debug("%s: Suspending Governors\n", __func__); - for_each_policy(policy) { + for_each_active_policy(policy) { if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP)) pr_err("%s: Failed to stop governor for policy: %p\n", __func__, policy); @@ -1708,7 +1760,7 @@ void cpufreq_resume(void) pr_debug("%s: Resuming Governors\n", __func__); - for_each_policy(policy) { + for_each_active_policy(policy) { if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) pr_err("%s: Failed to resume driver: %p\n", __func__, policy); @@ -2354,7 +2406,7 @@ static int cpufreq_boost_set_sw(int state) struct cpufreq_policy *policy; int ret = -EINVAL; - for_each_policy(policy) { + for_each_active_policy(policy) { freq_table = cpufreq_frequency_get_table(policy->cpu); if (freq_table) { ret = cpufreq_frequency_table_cpuinfo(policy, From 988bed09d35a8533eb59868692b827dfae7b66fe Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 8 May 2015 11:53:45 +0530 Subject: [PATCH 15/37] cpufreq: Don't clear cpufreq_cpu_data and policy list for inactive policies Now that we can check policy->cpus to find if policy is active or not, we don't need to clean cpufreq_cpu_data and delete policy from the list on light weight tear down of policies (like in suspend). To make it consistent and clean, set cpufreq_cpu_data for all related CPUs when the policy is first created and clean it only while it is freed. Also update cpufreq_cpu_get_raw() to check if cpu is part of policy->cpus mask, so that we don't end up getting policies for offline CPUs. In order to make sure that no users of 'policy' are using an inactive policy, use cpufreq_cpu_get_raw() instead of directly accessing cpufreq_cpu_data. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 79 +++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 45 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 74d9fcbbe4f9..e899a5446d0e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -239,10 +239,18 @@ int cpufreq_generic_init(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_generic_init); -unsigned int cpufreq_generic_get(unsigned int cpu) +/* Only for cpufreq core internal use */ +struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu) { struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); + return policy && cpumask_test_cpu(cpu, policy->cpus) ? policy : NULL; +} + +unsigned int cpufreq_generic_get(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); + if (!policy || IS_ERR(policy->clk)) { pr_err("%s: No %s associated to cpu: %d\n", __func__, policy ? "clk" : "policy", cpu); @@ -253,12 +261,6 @@ unsigned int cpufreq_generic_get(unsigned int cpu) } EXPORT_SYMBOL_GPL(cpufreq_generic_get); -/* Only for cpufreq core internal use */ -struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu) -{ - return per_cpu(cpufreq_cpu_data, cpu); -} - /** * cpufreq_cpu_get: returns policy for a cpu and marks it busy. * @@ -292,7 +294,7 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) if (cpufreq_driver) { /* get the CPU */ - policy = per_cpu(cpufreq_cpu_data, cpu); + policy = cpufreq_cpu_get_raw(cpu); if (policy) kobject_get(&policy->kobj); } @@ -1042,7 +1044,6 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu, struct device *dev) { int ret = 0; - unsigned long flags; /* Has this CPU been taken care of already? */ if (cpumask_test_cpu(cpu, policy->cpus)) @@ -1057,13 +1058,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, } down_write(&policy->rwsem); - - write_lock_irqsave(&cpufreq_driver_lock, flags); - cpumask_set_cpu(cpu, policy->cpus); - per_cpu(cpufreq_cpu_data, cpu) = policy; - write_unlock_irqrestore(&cpufreq_driver_lock, flags); - up_write(&policy->rwsem); if (has_target()) { @@ -1154,6 +1149,17 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy) static void cpufreq_policy_free(struct cpufreq_policy *policy) { + unsigned long flags; + int cpu; + + /* Remove policy from list */ + write_lock_irqsave(&cpufreq_driver_lock, flags); + list_del(&policy->policy_list); + + for_each_cpu(cpu, policy->related_cpus) + per_cpu(cpufreq_cpu_data, cpu) = NULL; + write_unlock_irqrestore(&cpufreq_driver_lock, flags); + free_cpumask_var(policy->related_cpus); free_cpumask_var(policy->cpus); kfree(policy); @@ -1275,12 +1281,12 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) __func__, ret); goto err_init_policy_kobj; } - } - write_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_cpu(j, policy->cpus) - per_cpu(cpufreq_cpu_data, j) = policy; - write_unlock_irqrestore(&cpufreq_driver_lock, flags); + write_lock_irqsave(&cpufreq_driver_lock, flags); + for_each_cpu(j, policy->related_cpus) + per_cpu(cpufreq_cpu_data, j) = policy; + write_unlock_irqrestore(&cpufreq_driver_lock, flags); + } if (cpufreq_driver->get && !cpufreq_driver->setpolicy) { policy->cur = cpufreq_driver->get(policy->cpu); @@ -1339,11 +1345,11 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) goto err_out_unregister; blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); - } - write_lock_irqsave(&cpufreq_driver_lock, flags); - list_add(&policy->policy_list, &cpufreq_policy_list); - write_unlock_irqrestore(&cpufreq_driver_lock, flags); + write_lock_irqsave(&cpufreq_driver_lock, flags); + list_add(&policy->policy_list, &cpufreq_policy_list); + write_unlock_irqrestore(&cpufreq_driver_lock, flags); + } cpufreq_init_policy(policy); @@ -1367,11 +1373,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) err_out_unregister: err_get_freq: - write_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_cpu(j, policy->cpus) - per_cpu(cpufreq_cpu_data, j) = NULL; - write_unlock_irqrestore(&cpufreq_driver_lock, flags); - if (!recover_policy) { kobject_put(&policy->kobj); wait_for_completion(&policy->kobj_unregister); @@ -1407,7 +1408,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, write_lock_irqsave(&cpufreq_driver_lock, flags); - policy = per_cpu(cpufreq_cpu_data, cpu); + policy = cpufreq_cpu_get_raw(cpu); /* Save the policy somewhere when doing a light-weight tear-down */ if (cpufreq_suspended) @@ -1465,15 +1466,9 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, static int __cpufreq_remove_dev_finish(struct device *dev, struct subsys_interface *sif) { - unsigned int cpu = dev->id, cpus; + unsigned int cpu = dev->id; int ret; - unsigned long flags; - struct cpufreq_policy *policy; - - write_lock_irqsave(&cpufreq_driver_lock, flags); - policy = per_cpu(cpufreq_cpu_data, cpu); - per_cpu(cpufreq_cpu_data, cpu) = NULL; - write_unlock_irqrestore(&cpufreq_driver_lock, flags); + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); if (!policy) { pr_debug("%s: No cpu_data found\n", __func__); @@ -1481,12 +1476,11 @@ static int __cpufreq_remove_dev_finish(struct device *dev, } down_write(&policy->rwsem); - cpus = cpumask_weight(policy->cpus); cpumask_clear_cpu(cpu, policy->cpus); up_write(&policy->rwsem); /* If cpu is last user of policy, free policy */ - if (cpus == 1) { + if (policy_is_inactive(policy)) { if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); @@ -1508,11 +1502,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev, if (cpufreq_driver->exit) cpufreq_driver->exit(policy); - /* Remove policy from list of active policies */ - write_lock_irqsave(&cpufreq_driver_lock, flags); - list_del(&policy->policy_list); - write_unlock_irqrestore(&cpufreq_driver_lock, flags); - if (!cpufreq_suspended) cpufreq_policy_free(policy); } else if (has_target()) { From 3914d37910af2cd0cc992ae546b8308e05759d2b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 8 May 2015 11:53:46 +0530 Subject: [PATCH 16/37] cpufreq: Get rid of cpufreq_cpu_data_fallback We can extract the same information from cpufreq_cpu_data as it is also available for inactive policies now. And so don't need cpufreq_cpu_data_fallback anymore. Also add a WARN_ON() for the case where we try to restore from an active policy. Acked-by: Saravana Kannan Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e899a5446d0e..eb0c3a802b14 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -101,7 +101,6 @@ static LIST_HEAD(cpufreq_governor_list); */ static struct cpufreq_driver *cpufreq_driver; static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); -static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data_fallback); static DEFINE_RWLOCK(cpufreq_driver_lock); DEFINE_MUTEX(cpufreq_governor_lock); @@ -1081,13 +1080,14 @@ static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) unsigned long flags; read_lock_irqsave(&cpufreq_driver_lock, flags); - - policy = per_cpu(cpufreq_cpu_data_fallback, cpu); - + policy = per_cpu(cpufreq_cpu_data, cpu); read_unlock_irqrestore(&cpufreq_driver_lock, flags); - if (policy) + if (likely(policy)) { + /* Policy should be inactive here */ + WARN_ON(!policy_is_inactive(policy)); policy->governor = NULL; + } return policy; } @@ -1383,11 +1383,8 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) if (cpufreq_driver->exit) cpufreq_driver->exit(policy); err_set_policy_cpu: - if (recover_policy) { - /* Do not leave stale fallback data behind. */ - per_cpu(cpufreq_cpu_data_fallback, cpu) = NULL; + if (recover_policy) cpufreq_policy_put_kobj(policy); - } cpufreq_policy_free(policy); nomem_out: @@ -1401,21 +1398,11 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, { unsigned int cpu = dev->id, cpus; int ret; - unsigned long flags; struct cpufreq_policy *policy; pr_debug("%s: unregistering CPU %u\n", __func__, cpu); - write_lock_irqsave(&cpufreq_driver_lock, flags); - policy = cpufreq_cpu_get_raw(cpu); - - /* Save the policy somewhere when doing a light-weight tear-down */ - if (cpufreq_suspended) - per_cpu(cpufreq_cpu_data_fallback, cpu) = policy; - - write_unlock_irqrestore(&cpufreq_driver_lock, flags); - if (!policy) { pr_debug("%s: No cpu_data found\n", __func__); return -EINVAL; From 9104bb26c740cd4b2c9ee927f3caabbde0414558 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 12 May 2015 12:22:12 +0530 Subject: [PATCH 17/37] cpufreq: Don't traverse all active policies to find policy for a cpu We reach here while adding policy for a CPU and enter into the 'if' block only if a policy already exists for the CPU. As cpufreq_cpu_data is set for all policy->related_cpus now, when the policy is first added, we can use that to find the CPU's policy instead of traversing the list of all active policies. Acked-by: Saravana Kannan Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index eb0c3a802b14..e6a63d6ba6f1 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1213,16 +1213,13 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return 0; /* Check if this CPU already has a policy to manage it */ - read_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_active_policy(policy) { - if (cpumask_test_cpu(cpu, policy->related_cpus)) { - read_unlock_irqrestore(&cpufreq_driver_lock, flags); - ret = cpufreq_add_policy_cpu(policy, cpu, dev); - up_read(&cpufreq_rwsem); - return ret; - } + policy = per_cpu(cpufreq_cpu_data, cpu); + if (policy && !policy_is_inactive(policy)) { + WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus)); + ret = cpufreq_add_policy_cpu(policy, cpu, dev); + up_read(&cpufreq_rwsem); + return ret; } - read_unlock_irqrestore(&cpufreq_driver_lock, flags); /* * Restore the saved policy when doing light-weight init and fall back From 4573237b01221881702fbe6655f3ae5135be1c18 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 12 May 2015 12:22:34 +0530 Subject: [PATCH 18/37] cpufreq: Manage governor usage history with 'policy->last_governor' History of which governor was used last is common to all CPUs within a policy and maintaining it per-cpu isn't the best approach for sure. Apart from wasting memory, this also increases the complexity of managing this data structure as it has to be updated for all CPUs. To make that somewhat simpler, lets store this information in a new field 'last_governor' in struct cpufreq_policy and update it on removal of last cpu of a policy. As a side-effect it also solves an old problem, consider a system with two clusters 0 & 1. And there is one policy per cluster. Cluster 0: CPU0 and 1. Cluster 1: CPU2 and 3. - CPU2 is first brought online, and governor is set to performance (default as cpufreq_cpu_governor wasn't set). - Governor is changed to ondemand. - CPU2 is taken offline and cpufreq_cpu_governor is updated for CPU2. - CPU3 is brought online. - Because cpufreq_cpu_governor wasn't set for CPU3, the default governor performance is picked for CPU3. This patch fixes the bug as we now have a single variable to update for policy. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 30 +++++++++++++++--------------- include/linux/cpufreq.h | 1 + 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e6a63d6ba6f1..16275ba6428e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -104,9 +104,6 @@ static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); static DEFINE_RWLOCK(cpufreq_driver_lock); DEFINE_MUTEX(cpufreq_governor_lock); -/* This one keeps track of the previously set governor of a removed CPU */ -static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor); - /* Flag to suspend/resume CPUFreq governors */ static bool cpufreq_suspended; @@ -1017,7 +1014,7 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy) memcpy(&new_policy, policy, sizeof(*policy)); /* Update governor of new_policy to the governor used before hotplug */ - gov = find_governor(per_cpu(cpufreq_cpu_governor, policy->cpu)); + gov = find_governor(policy->last_governor); if (gov) pr_debug("Restoring governor %s for cpu %d\n", policy->governor->name, policy->cpu); @@ -1411,14 +1408,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, pr_err("%s: Failed to stop governor\n", __func__); return ret; } - - strncpy(per_cpu(cpufreq_cpu_governor, cpu), - policy->governor->name, CPUFREQ_NAME_LEN); } - down_read(&policy->rwsem); + down_write(&policy->rwsem); cpus = cpumask_weight(policy->cpus); - up_read(&policy->rwsem); + + if (has_target() && cpus == 1) + strncpy(policy->last_governor, policy->governor->name, + CPUFREQ_NAME_LEN); + up_write(&policy->rwsem); if (cpu != policy->cpu) { sysfs_remove_link(&dev->kobj, "cpufreq"); @@ -2135,7 +2133,8 @@ EXPORT_SYMBOL_GPL(cpufreq_register_governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor) { - int cpu; + struct cpufreq_policy *policy; + unsigned long flags; if (!governor) return; @@ -2143,12 +2142,13 @@ void cpufreq_unregister_governor(struct cpufreq_governor *governor) if (cpufreq_disabled()) return; - for_each_present_cpu(cpu) { - if (cpu_online(cpu)) - continue; - if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name)) - strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0"); + /* clear last_governor for all inactive policies */ + read_lock_irqsave(&cpufreq_driver_lock, flags); + for_each_inactive_policy(policy) { + if (!strcmp(policy->last_governor, governor->name)) + strcpy(policy->last_governor, "\0"); } + read_unlock_irqrestore(&cpufreq_driver_lock, flags); mutex_lock(&cpufreq_governor_mutex); list_del(&governor->governor_list); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 2ee4888c1f47..48e37c07eb84 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -80,6 +80,7 @@ struct cpufreq_policy { struct cpufreq_governor *governor; /* see below */ void *governor_data; bool governor_enabled; /* governor start/stop flag */ + char last_governor[CPUFREQ_NAME_LEN]; /* last governor used */ struct work_struct update; /* if update_policy() needs to be * called, but you're in IRQ context */ From 18bf3a124ef87fe43045cbf13dff7ea7e3a94aa3 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 12 May 2015 12:22:51 +0530 Subject: [PATCH 19/37] cpufreq: Mark policy->governor = NULL for inactive policies Later commits would change the way policies are managed today. Policies wouldn't be freed on cpu hotplug (currently they aren't freed on suspend), and while the CPU is offline, the sysfs cpufreq files would still be present. Because we don't mark policy->governor as NULL, it still contains pointer of the last used governor. And if the governor is removed, while all the CPUs of a policy are hotplugged out, this pointer wouldn't be valid anymore. And if we try to read the 'scaling_governor', etc. from sysfs, it will result in kernel OOPs. To prevent this, mark policy->governor as NULL for all inactive policies while the governor is removed from kernel. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 16275ba6428e..c08de5e985c7 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1083,7 +1083,6 @@ static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) if (likely(policy)) { /* Policy should be inactive here */ WARN_ON(!policy_is_inactive(policy)); - policy->governor = NULL; } return policy; @@ -2145,8 +2144,10 @@ void cpufreq_unregister_governor(struct cpufreq_governor *governor) /* clear last_governor for all inactive policies */ read_lock_irqsave(&cpufreq_driver_lock, flags); for_each_inactive_policy(policy) { - if (!strcmp(policy->last_governor, governor->name)) + if (!strcmp(policy->last_governor, governor->name)) { + policy->governor = NULL; strcpy(policy->last_governor, "\0"); + } } read_unlock_irqrestore(&cpufreq_driver_lock, flags); From 58405af6321a6d69de838f7a2d3115b85c987416 Mon Sep 17 00:00:00 2001 From: Shailendra Verma Date: Fri, 22 May 2015 22:48:22 +0530 Subject: [PATCH 20/37] cpufreq: Fix for typos in two comments Signed-off-by: Shailendra Verma Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c08de5e985c7..870df9400d3f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -226,7 +226,7 @@ int cpufreq_generic_init(struct cpufreq_policy *policy, policy->cpuinfo.transition_latency = transition_latency; /* - * The driver only supports the SMP configuartion where all processors + * The driver only supports the SMP configuration where all processors * share the clock and voltage and clock. */ cpumask_setall(policy->cpus); @@ -1931,7 +1931,7 @@ static int __target_index(struct cpufreq_policy *policy, * Failed after setting to intermediate freq? Driver should have * reverted back to initial frequency and so should we. Check * here for intermediate_freq instead of get_intermediate, in - * case we have't switched to intermediate freq at all. + * case we haven't switched to intermediate freq at all. */ if (unlikely(retval && intermediate_freq)) { freqs.old = intermediate_freq; From 9d16f207112f77711600fb0770182a06e056e5de Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 18 May 2015 10:43:31 +0530 Subject: [PATCH 21/37] cpufreq: Track cpu managing sysfs kobjects separately In order to prepare for the next few commits, that will stop migrating sysfs files on cpu hotplug, this patch starts managing sysfs-cpu separately. The behavior is still the same as we are still migrating sysfs files on hotplug, later commits would change that. Signed-off-by: Saravana Kannan Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 11 +++++++---- include/linux/cpufreq.h | 4 +++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 870df9400d3f..5d780ff5a10a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -959,7 +959,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy) for_each_cpu(j, policy->cpus) { struct device *cpu_dev; - if (j == policy->cpu) + if (j == policy->kobj_cpu) continue; pr_debug("Adding link for CPU: %u\n", j); @@ -1178,6 +1178,7 @@ static int update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu, down_write(&policy->rwsem); policy->cpu = cpu; + policy->kobj_cpu = cpu; up_write(&policy->rwsem); return 0; @@ -1235,10 +1236,12 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) * the creation of a brand new one. So we need to perform this update * by invoking update_policy_cpu(). */ - if (recover_policy && cpu != policy->cpu) + if (recover_policy && cpu != policy->cpu) { WARN_ON(update_policy_cpu(policy, cpu, dev)); - else + } else { policy->cpu = cpu; + policy->kobj_cpu = cpu; + } cpumask_copy(policy->cpus, cpumask_of(cpu)); @@ -1417,7 +1420,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, CPUFREQ_NAME_LEN); up_write(&policy->rwsem); - if (cpu != policy->cpu) { + if (cpu != policy->kobj_cpu) { sysfs_remove_link(&dev->kobj, "cpufreq"); } else if (cpus > 1) { /* Nominate new CPU */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 48e37c07eb84..29ad97c34fd5 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -65,7 +65,9 @@ struct cpufreq_policy { unsigned int shared_type; /* ACPI: ANY or ALL affected CPUs should set cpufreq */ - unsigned int cpu; /* cpu nr of CPU managing this policy */ + unsigned int cpu; /* cpu managing this policy, must be online */ + unsigned int kobj_cpu; /* cpu managing sysfs files, can be offline */ + struct clk *clk; struct cpufreq_cpuinfo cpuinfo;/* see above */ From f16255eb930173f386db0ce78ed41401aa8a94a6 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Sun, 31 May 2015 07:46:47 -0700 Subject: [PATCH 22/37] intel_pstate: change some inconsistent debug information Commit ce717613f3fb (intel_pstate: Turn per cpu printk into pr_debug) turned per cpu printk into pr_debug. However, only half of the change was done, introducing an inconsistency between entry and exit from driver pstate control. This patch changes the exit message to pr_debug also. The various messages are inconsistent with respect to any identifier text that can be used to help isolate the desired information from a huge log. This patch makes a consistent identifier portion of the string. Amends: ce717613f3fb (intel_pstate: Turn per cpu printk into pr_debug) Signed-off-by: Doug Smythies Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2f329b45eacd..2050796afd77 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -398,7 +398,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, update_turbo_state(); if (limits.turbo_disabled) { - pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); + pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n"); return -EPERM; } @@ -486,7 +486,7 @@ static void __init intel_pstate_sysfs_expose_params(void) static void intel_pstate_hwp_enable(void) { hwp_active++; - pr_info("intel_pstate HWP enabled\n"); + pr_info("intel_pstate: HWP enabled\n"); wrmsrl( MSR_PM_ENABLE, 0x1); } @@ -946,7 +946,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum) add_timer_on(&cpu->timer, cpunum); - pr_debug("Intel pstate controlling: cpu %d\n", cpunum); + pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); return 0; } @@ -1012,7 +1012,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) int cpu_num = policy->cpu; struct cpudata *cpu = all_cpu_data[cpu_num]; - pr_info("intel_pstate CPU %d exiting\n", cpu_num); + pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); del_timer_sync(&all_cpu_data[cpu_num]->timer); if (hwp_active) From 6c1e45917dec5e7c99ba8125fd8cc50f6e482a21 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Mon, 1 Jun 2015 21:12:34 -0700 Subject: [PATCH 23/37] intel_pstate: Force setting target pstate when required During initialization and exit it is possible that the target pstate might not actually be set. Furthermore, the result can be that the driver and the processor are out of synch and, under some conditions, the driver might never send the processor the proper target pstate. This patch adds a bypass or do_checks flag to the call to intel_pstate_set_pstate. If bypass, then specifically bypass clamp checks and the do not send if it is the same as last time check. If do_checks, then, and as before, do the current policy clamp checks, and do not do actual send if the new target is the same as the old. Signed-off-by: Doug Smythies Reported-by: Marien Zwart Reported-by: Alex Lochmann Reported-by: Piotr Ko?aczkowski Reported-by: Clemens Eisserer Tested-by: Marien Zwart Tested-by: Doug Smythies [ rjw: Dropped pointless symbol definitions, rebased ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2050796afd77..aef357203593 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -706,19 +706,20 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); } -static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) +static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force) { int max_perf, min_perf; - update_turbo_state(); + if (force) { + update_turbo_state(); - intel_pstate_get_min_max(cpu, &min_perf, &max_perf); + intel_pstate_get_min_max(cpu, &min_perf, &max_perf); - pstate = clamp_t(int, pstate, min_perf, max_perf); - - if (pstate == cpu->pstate.current_pstate) - return; + pstate = clamp_t(int, pstate, min_perf, max_perf); + if (pstate == cpu->pstate.current_pstate) + return; + } trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); cpu->pstate.current_pstate = pstate; @@ -735,7 +736,7 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) if (pstate_funcs.get_vid) pstate_funcs.get_vid(cpu); - intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); + intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false); } static inline void intel_pstate_calc_busy(struct cpudata *cpu) @@ -855,7 +856,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) ctl = pid_calc(pid, busy_scaled); /* Negative values of ctl increase the pstate and vice versa */ - intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl); + intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl, true); sample = &cpu->sample; trace_pstate_sample(fp_toint(sample->core_pct_busy), @@ -1018,7 +1019,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) if (hwp_active) return; - intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); + intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false); } static int intel_pstate_cpu_init(struct cpufreq_policy *policy) From 11e584cfb8a9d2226151fd39bfa74d09e575f72d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 10 Jun 2015 02:11:45 +0200 Subject: [PATCH 24/37] cpufreq: Don't allow updating inactive policies from sysfs Later commits would change the way policies are managed today. Policies wouldn't be freed on cpu hotplug (currently they aren't freed only for suspend), and while the CPU is offline, the sysfs cpufreq files would still be present. User may accidentally try to update the sysfs files in following directory: '/sys/devices/system/cpu/cpuX/cpufreq/'. And that would result in undefined behavior as policy wouldn't be active then. Apart from updating the store() routine, we also update __cpufreq_get() which can call cpufreq_out_of_sync(). The later routine tries to update policy->cur and starts notifying kernel about it. Signed-off-by: Viresh Kumar Acked-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5d780ff5a10a..16214dfe78c5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -875,11 +875,18 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, down_write(&policy->rwsem); + /* Updating inactive policies is invalid, so avoid doing that. */ + if (unlikely(policy_is_inactive(policy))) { + ret = -EBUSY; + goto unlock_policy_rwsem; + } + if (fattr->store) ret = fattr->store(policy, buf, count); else ret = -EIO; +unlock_policy_rwsem: up_write(&policy->rwsem); up_read(&cpufreq_rwsem); @@ -1610,6 +1617,10 @@ static unsigned int __cpufreq_get(struct cpufreq_policy *policy) ret_freq = cpufreq_driver->get(policy->cpu); + /* Updating inactive policies is invalid, so avoid doing that. */ + if (unlikely(policy_is_inactive(policy))) + return ret_freq; + if (ret_freq && policy->cur && !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { /* verify no discrepancy between actual and From 87549141d516aee71d511138e27117c41e8aef68 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 10 Jun 2015 02:13:21 +0200 Subject: [PATCH 25/37] cpufreq: Stop migrating sysfs files on hotplug When we hot-unplug a cpu, we remove its sysfs cpufreq directory and if the outgoing cpu was the owner of policy->kobj earlier then we migrate the sysfs directory to under another online cpu. There are few disadvantages this brings: - Code Complexity - Slower hotplug/suspend/resume - sysfs file permissions are reset after all policy->cpus are offlined - CPUFreq stats history lost after all policy->cpus are offlined - Special management of sysfs stuff during suspend/resume To overcome these, this patch modifies the way sysfs directories are managed: - Select sysfs kobjects owner while initializing policy and don't change it during hotplugs. Track it with kobj_cpu created earlier. - Create symlinks for all related CPUs (can be offline) instead of affected CPUs on policy initialization and remove them only when the policy is freed. - Free policy structure only on the removal of cpufreq-driver and not during hotplug/suspend/resume, detected by checking 'struct subsys_interface *' (Valid only when called from subsys_interface_unregister() while unregistering driver). Apart from this, special care is taken to handle physical hoplug of CPUs as we wouldn't remove sysfs links or remove policies on logical hotplugs. Physical hotplug happens in the following sequence. Hot removal: - CPU is offlined first, ~ 'echo 0 > /sys/devices/system/cpu/cpuX/online' - Then its device is removed along with all sysfs files, cpufreq core notified with cpufreq_remove_dev() callback from subsys-interface.. Hot addition: - First the device along with its sysfs files is added, cpufreq core notified with cpufreq_add_dev() callback from subsys-interface.. - CPU is onlined, ~ 'echo 1 > /sys/devices/system/cpu/cpuX/online' We call the same routines with both hotplug and subsys callbacks, and we sense physical hotplug with cpu_offline() check in subsys callback. We can handle most of the stuff with regular hotplug callback paths and add/remove cpufreq sysfs links or free policy from subsys callbacks. Original-by: Saravana Kannan Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 220 +++++++++++++++++++++++--------------- 1 file changed, 135 insertions(+), 85 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 16214dfe78c5..4bfe0a53502f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -957,28 +957,67 @@ void cpufreq_sysfs_remove_file(const struct attribute *attr) } EXPORT_SYMBOL(cpufreq_sysfs_remove_file); -/* symlink affected CPUs */ +static int add_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu) +{ + struct device *cpu_dev; + + pr_debug("%s: Adding symlink for CPU: %u\n", __func__, cpu); + + if (!policy) + return 0; + + cpu_dev = get_cpu_device(cpu); + if (WARN_ON(!cpu_dev)) + return 0; + + return sysfs_create_link(&cpu_dev->kobj, &policy->kobj, "cpufreq"); +} + +static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu) +{ + struct device *cpu_dev; + + pr_debug("%s: Removing symlink for CPU: %u\n", __func__, cpu); + + cpu_dev = get_cpu_device(cpu); + if (WARN_ON(!cpu_dev)) + return; + + sysfs_remove_link(&cpu_dev->kobj, "cpufreq"); +} + +/* Add/remove symlinks for all related CPUs */ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy) { unsigned int j; int ret = 0; - for_each_cpu(j, policy->cpus) { - struct device *cpu_dev; - + /* Some related CPUs might not be present (physically hotplugged) */ + for_each_cpu_and(j, policy->related_cpus, cpu_present_mask) { if (j == policy->kobj_cpu) continue; - pr_debug("Adding link for CPU: %u\n", j); - cpu_dev = get_cpu_device(j); - ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj, - "cpufreq"); + ret = add_cpu_dev_symlink(policy, j); if (ret) break; } + return ret; } +static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy) +{ + unsigned int j; + + /* Some related CPUs might not be present (physically hotplugged) */ + for_each_cpu_and(j, policy->related_cpus, cpu_present_mask) { + if (j == policy->kobj_cpu) + continue; + + remove_cpu_dev_symlink(policy, j); + } +} + static int cpufreq_add_dev_interface(struct cpufreq_policy *policy, struct device *dev) { @@ -1075,7 +1114,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, } } - return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"); + return 0; } static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) @@ -1095,7 +1134,7 @@ static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) return policy; } -static struct cpufreq_policy *cpufreq_policy_alloc(void) +static struct cpufreq_policy *cpufreq_policy_alloc(int cpu) { struct cpufreq_policy *policy; @@ -1116,6 +1155,11 @@ static struct cpufreq_policy *cpufreq_policy_alloc(void) init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update); + policy->cpu = cpu; + + /* Set this once on allocation */ + policy->kobj_cpu = cpu; + return policy; err_free_cpumask: @@ -1134,10 +1178,11 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy) blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_REMOVE_POLICY, policy); - down_read(&policy->rwsem); + down_write(&policy->rwsem); + cpufreq_remove_dev_symlink(policy); kobj = &policy->kobj; cmp = &policy->kobj_unregister; - up_read(&policy->rwsem); + up_write(&policy->rwsem); kobject_put(kobj); /* @@ -1168,27 +1213,14 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) kfree(policy); } -static int update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu, - struct device *cpu_dev) +static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) { - int ret; - if (WARN_ON(cpu == policy->cpu)) - return 0; - - /* Move kobject to the new policy->cpu */ - ret = kobject_move(&policy->kobj, &cpu_dev->kobj); - if (ret) { - pr_err("%s: Failed to move kobj: %d\n", __func__, ret); - return ret; - } + return; down_write(&policy->rwsem); policy->cpu = cpu; - policy->kobj_cpu = cpu; up_write(&policy->rwsem); - - return 0; } /** @@ -1206,13 +1238,19 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) int ret = -ENOMEM; struct cpufreq_policy *policy; unsigned long flags; - bool recover_policy = cpufreq_suspended; - - if (cpu_is_offline(cpu)) - return 0; + bool recover_policy = !sif; pr_debug("adding CPU %u\n", cpu); + /* + * Only possible if 'cpu' wasn't physically present earlier and we are + * here from subsys_interface add callback. A hotplug notifier will + * follow and we will handle it like logical CPU hotplug then. For now, + * just create the sysfs link. + */ + if (cpu_is_offline(cpu)) + return add_cpu_dev_symlink(per_cpu(cpufreq_cpu_data, cpu), cpu); + if (!down_read_trylock(&cpufreq_rwsem)) return 0; @@ -1232,7 +1270,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL; if (!policy) { recover_policy = false; - policy = cpufreq_policy_alloc(); + policy = cpufreq_policy_alloc(cpu); if (!policy) goto nomem_out; } @@ -1243,12 +1281,8 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) * the creation of a brand new one. So we need to perform this update * by invoking update_policy_cpu(). */ - if (recover_policy && cpu != policy->cpu) { - WARN_ON(update_policy_cpu(policy, cpu, dev)); - } else { - policy->cpu = cpu; - policy->kobj_cpu = cpu; - } + if (recover_policy && cpu != policy->cpu) + update_policy_cpu(policy, cpu); cpumask_copy(policy->cpus, cpumask_of(cpu)); @@ -1427,29 +1461,14 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, CPUFREQ_NAME_LEN); up_write(&policy->rwsem); - if (cpu != policy->kobj_cpu) { - sysfs_remove_link(&dev->kobj, "cpufreq"); - } else if (cpus > 1) { + if (cpu != policy->cpu) + return 0; + + if (cpus > 1) /* Nominate new CPU */ - int new_cpu = cpumask_any_but(policy->cpus, cpu); - struct device *cpu_dev = get_cpu_device(new_cpu); - - sysfs_remove_link(&cpu_dev->kobj, "cpufreq"); - ret = update_policy_cpu(policy, new_cpu, cpu_dev); - if (ret) { - if (sysfs_create_link(&cpu_dev->kobj, &policy->kobj, - "cpufreq")) - pr_err("%s: Failed to restore kobj link to cpu:%d\n", - __func__, cpu_dev->id); - return ret; - } - - if (!cpufreq_suspended) - pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n", - __func__, new_cpu, cpu); - } else if (cpufreq_driver->stop_cpu) { + update_policy_cpu(policy, cpumask_any_but(policy->cpus, cpu)); + else if (cpufreq_driver->stop_cpu) cpufreq_driver->stop_cpu(policy); - } return 0; } @@ -1470,32 +1489,11 @@ static int __cpufreq_remove_dev_finish(struct device *dev, cpumask_clear_cpu(cpu, policy->cpus); up_write(&policy->rwsem); - /* If cpu is last user of policy, free policy */ - if (policy_is_inactive(policy)) { - if (has_target()) { - ret = __cpufreq_governor(policy, - CPUFREQ_GOV_POLICY_EXIT); - if (ret) { - pr_err("%s: Failed to exit governor\n", - __func__); - return ret; - } - } + /* Not the last cpu of policy, start governor again ? */ + if (!policy_is_inactive(policy)) { + if (!has_target()) + return 0; - if (!cpufreq_suspended) - cpufreq_policy_put_kobj(policy); - - /* - * Perform the ->exit() even during light-weight tear-down, - * since this is a core component, and is essential for the - * subsequent light-weight ->init() to succeed. - */ - if (cpufreq_driver->exit) - cpufreq_driver->exit(policy); - - if (!cpufreq_suspended) - cpufreq_policy_free(policy); - } else if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); @@ -1504,8 +1502,34 @@ static int __cpufreq_remove_dev_finish(struct device *dev, pr_err("%s: Failed to start governor\n", __func__); return ret; } + + return 0; } + /* If cpu is last user of policy, free policy */ + if (has_target()) { + ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + if (ret) { + pr_err("%s: Failed to exit governor\n", __func__); + return ret; + } + } + + /* Free the policy kobjects only if the driver is getting removed. */ + if (sif) + cpufreq_policy_put_kobj(policy); + + /* + * Perform the ->exit() even during light-weight tear-down, + * since this is a core component, and is essential for the + * subsequent light-weight ->init() to succeed. + */ + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + + if (sif) + cpufreq_policy_free(policy); + return 0; } @@ -1519,8 +1543,34 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) unsigned int cpu = dev->id; int ret; - if (cpu_is_offline(cpu)) + /* + * Only possible if 'cpu' is getting physically removed now. A hotplug + * notifier should have already been called and we just need to remove + * link or free policy here. + */ + if (cpu_is_offline(cpu)) { + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); + struct cpumask mask; + + if (!policy) + return 0; + + cpumask_copy(&mask, policy->related_cpus); + cpumask_clear_cpu(cpu, &mask); + + /* + * Free policy only if all policy->related_cpus are removed + * physically. + */ + if (cpumask_intersects(&mask, cpu_present_mask)) { + remove_cpu_dev_symlink(policy, cpu); + return 0; + } + + cpufreq_policy_put_kobj(policy); + cpufreq_policy_free(policy); return 0; + } ret = __cpufreq_remove_dev_prepare(dev, sif); From 2fc3384dc75bf7333384c7a16d12c796f61c3f56 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 8 Jun 2015 18:25:29 +0530 Subject: [PATCH 26/37] cpufreq: Initialize policy->kobj while allocating policy policy->kobj is required to be initialized once in the lifetime of a policy. Currently we are initializing it from __cpufreq_add_dev() and that doesn't look to be the best place for doing so as we have to do this on special cases (like: !recover_policy). We can initialize it from a more obvious place cpufreq_policy_alloc() and that will make code look cleaner, specially the error handling part. The error handling part of __cpufreq_add_dev() was doing almost the same thing while recover_policy is true or false. Fix that as well by always calling cpufreq_policy_put_kobj() with an additional parameter to skip notification part of it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 46 ++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 4bfe0a53502f..dbb1bd6c57eb 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1134,9 +1134,10 @@ static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) return policy; } -static struct cpufreq_policy *cpufreq_policy_alloc(int cpu) +static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) { struct cpufreq_policy *policy; + int ret; policy = kzalloc(sizeof(*policy), GFP_KERNEL); if (!policy) @@ -1148,6 +1149,13 @@ static struct cpufreq_policy *cpufreq_policy_alloc(int cpu) if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) goto err_free_cpumask; + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &dev->kobj, + "cpufreq"); + if (ret) { + pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); + goto err_free_rcpumask; + } + INIT_LIST_HEAD(&policy->policy_list); init_rwsem(&policy->rwsem); spin_lock_init(&policy->transition_lock); @@ -1155,13 +1163,15 @@ static struct cpufreq_policy *cpufreq_policy_alloc(int cpu) init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update); - policy->cpu = cpu; + policy->cpu = dev->id; /* Set this once on allocation */ - policy->kobj_cpu = cpu; + policy->kobj_cpu = dev->id; return policy; +err_free_rcpumask: + free_cpumask_var(policy->related_cpus); err_free_cpumask: free_cpumask_var(policy->cpus); err_free_policy: @@ -1170,13 +1180,14 @@ static struct cpufreq_policy *cpufreq_policy_alloc(int cpu) return NULL; } -static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy) +static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy, bool notify) { struct kobject *kobj; struct completion *cmp; - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_REMOVE_POLICY, policy); + if (notify) + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_REMOVE_POLICY, policy); down_write(&policy->rwsem); cpufreq_remove_dev_symlink(policy); @@ -1270,7 +1281,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL; if (!policy) { recover_policy = false; - policy = cpufreq_policy_alloc(cpu); + policy = cpufreq_policy_alloc(dev); if (!policy) goto nomem_out; } @@ -1310,15 +1321,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) policy->user_policy.min = policy->min; policy->user_policy.max = policy->max; - /* prepare interface data */ - ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, - &dev->kobj, "cpufreq"); - if (ret) { - pr_err("%s: failed to init policy->kobj: %d\n", - __func__, ret); - goto err_init_policy_kobj; - } - write_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->related_cpus) per_cpu(cpufreq_cpu_data, j) = policy; @@ -1410,18 +1412,12 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) err_out_unregister: err_get_freq: - if (!recover_policy) { - kobject_put(&policy->kobj); - wait_for_completion(&policy->kobj_unregister); - } -err_init_policy_kobj: up_write(&policy->rwsem); if (cpufreq_driver->exit) cpufreq_driver->exit(policy); err_set_policy_cpu: - if (recover_policy) - cpufreq_policy_put_kobj(policy); + cpufreq_policy_put_kobj(policy, recover_policy); cpufreq_policy_free(policy); nomem_out: @@ -1517,7 +1513,7 @@ static int __cpufreq_remove_dev_finish(struct device *dev, /* Free the policy kobjects only if the driver is getting removed. */ if (sif) - cpufreq_policy_put_kobj(policy); + cpufreq_policy_put_kobj(policy, true); /* * Perform the ->exit() even during light-weight tear-down, @@ -1567,7 +1563,7 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) return 0; } - cpufreq_policy_put_kobj(policy); + cpufreq_policy_put_kobj(policy, true); cpufreq_policy_free(policy); return 0; } From 3654c5cc810e9b1f7eadf19780d1bc5f37e1ae6e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 8 Jun 2015 18:25:30 +0530 Subject: [PATCH 27/37] cpufreq: Call cpufreq_policy_put_kobj() from cpufreq_policy_free() cpufreq_policy_put_kobj() is actually part of freeing the policy and can be called from cpufreq_policy_free() directly instead of a separate call. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index dbb1bd6c57eb..8b810071ddd2 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1206,7 +1206,7 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy, bool notify) pr_debug("wait complete\n"); } -static void cpufreq_policy_free(struct cpufreq_policy *policy) +static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify) { unsigned long flags; int cpu; @@ -1219,6 +1219,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) per_cpu(cpufreq_cpu_data, cpu) = NULL; write_unlock_irqrestore(&cpufreq_driver_lock, flags); + cpufreq_policy_put_kobj(policy, notify); free_cpumask_var(policy->related_cpus); free_cpumask_var(policy->cpus); kfree(policy); @@ -1417,9 +1418,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) if (cpufreq_driver->exit) cpufreq_driver->exit(policy); err_set_policy_cpu: - cpufreq_policy_put_kobj(policy, recover_policy); - cpufreq_policy_free(policy); - + cpufreq_policy_free(policy, recover_policy); nomem_out: up_read(&cpufreq_rwsem); @@ -1511,10 +1510,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev, } } - /* Free the policy kobjects only if the driver is getting removed. */ - if (sif) - cpufreq_policy_put_kobj(policy, true); - /* * Perform the ->exit() even during light-weight tear-down, * since this is a core component, and is essential for the @@ -1523,8 +1518,9 @@ static int __cpufreq_remove_dev_finish(struct device *dev, if (cpufreq_driver->exit) cpufreq_driver->exit(policy); + /* Free the policy only if the driver is getting removed. */ if (sif) - cpufreq_policy_free(policy); + cpufreq_policy_free(policy, true); return 0; } @@ -1563,8 +1559,7 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) return 0; } - cpufreq_policy_put_kobj(policy, true); - cpufreq_policy_free(policy); + cpufreq_policy_free(policy, true); return 0; } From 9591becbf226e3aa0f6c73494736e2c5ab14cc8d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 10 Jun 2015 02:20:23 +0200 Subject: [PATCH 28/37] cpufreq: Restart governor as soon as possible __cpufreq_remove_dev_finish() is doing two things today: - Restarts the governor if some CPUs from concerned policy are still online. - Frees the policy if all CPUs are offline. The first task of restarting the governor can be moved to __cpufreq_remove_dev_prepare() to restart the governor early. There is no race between _prepare() and _finish() as they would be handling completely different cases. _finish() will only be required if we are going to free the policy and that has nothing to do with restarting the governor. Original-by: Saravana Kannan Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 59 +++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8b810071ddd2..c355ab656dfb 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1428,8 +1428,8 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) static int __cpufreq_remove_dev_prepare(struct device *dev, struct subsys_interface *sif) { - unsigned int cpu = dev->id, cpus; - int ret; + unsigned int cpu = dev->id; + int ret = 0; struct cpufreq_policy *policy; pr_debug("%s: unregistering CPU %u\n", __func__, cpu); @@ -1449,23 +1449,33 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, } down_write(&policy->rwsem); - cpus = cpumask_weight(policy->cpus); + cpumask_clear_cpu(cpu, policy->cpus); - if (has_target() && cpus == 1) - strncpy(policy->last_governor, policy->governor->name, - CPUFREQ_NAME_LEN); + if (policy_is_inactive(policy)) { + if (has_target()) + strncpy(policy->last_governor, policy->governor->name, + CPUFREQ_NAME_LEN); + } else if (cpu == policy->cpu) { + /* Nominate new CPU */ + policy->cpu = cpumask_any(policy->cpus); + } up_write(&policy->rwsem); - if (cpu != policy->cpu) - return 0; + /* Start governor again for active policy */ + if (!policy_is_inactive(policy)) { + if (has_target()) { + ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + if (!ret) + ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); - if (cpus > 1) - /* Nominate new CPU */ - update_policy_cpu(policy, cpumask_any_but(policy->cpus, cpu)); - else if (cpufreq_driver->stop_cpu) + if (ret) + pr_err("%s: Failed to start governor\n", __func__); + } + } else if (cpufreq_driver->stop_cpu) { cpufreq_driver->stop_cpu(policy); + } - return 0; + return ret; } static int __cpufreq_remove_dev_finish(struct device *dev, @@ -1473,33 +1483,16 @@ static int __cpufreq_remove_dev_finish(struct device *dev, { unsigned int cpu = dev->id; int ret; - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); if (!policy) { pr_debug("%s: No cpu_data found\n", __func__); return -EINVAL; } - down_write(&policy->rwsem); - cpumask_clear_cpu(cpu, policy->cpus); - up_write(&policy->rwsem); - - /* Not the last cpu of policy, start governor again ? */ - if (!policy_is_inactive(policy)) { - if (!has_target()) - return 0; - - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); - if (!ret) - ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); - - if (ret) { - pr_err("%s: Failed to start governor\n", __func__); - return ret; - } - + /* Only proceed for inactive policies */ + if (!policy_is_inactive(policy)) return 0; - } /* If cpu is last user of policy, free policy */ if (has_target()) { From 37829029837b2f653fd407cbd6796dcf124c1ed8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 8 Jun 2015 18:25:32 +0530 Subject: [PATCH 29/37] cpufreq: Remove cpufreq_update_policy() cpufreq_update_policy() was kept as a separate routine earlier as it was handling migration of sysfs directories, which isn't the case anymore. It is only updating policy->cpu now and is called by a single caller. The WARN_ON() isn't really required anymore, as we are just updating the cpu now, not moving the sysfs directories. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c355ab656dfb..b612411655f9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1129,6 +1129,10 @@ static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) if (likely(policy)) { /* Policy should be inactive here */ WARN_ON(!policy_is_inactive(policy)); + + down_write(&policy->rwsem); + policy->cpu = cpu; + up_write(&policy->rwsem); } return policy; @@ -1225,16 +1229,6 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify) kfree(policy); } -static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) -{ - if (WARN_ON(cpu == policy->cpu)) - return; - - down_write(&policy->rwsem); - policy->cpu = cpu; - up_write(&policy->rwsem); -} - /** * cpufreq_add_dev - add a CPU device * @@ -1287,15 +1281,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) goto nomem_out; } - /* - * In the resume path, since we restore a saved policy, the assignment - * to policy->cpu is like an update of the existing policy, rather than - * the creation of a brand new one. So we need to perform this update - * by invoking update_policy_cpu(). - */ - if (recover_policy && cpu != policy->cpu) - update_policy_cpu(policy, cpu); - cpumask_copy(policy->cpus, cpumask_of(cpu)); /* call driver. From then on the cpufreq must be able From 8e0484d2b38aeb2bcce0a7b32e6b33d72c11ad85 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 3 Jun 2015 15:57:11 +0530 Subject: [PATCH 30/37] cpufreq: governor: register notifier from cs_init() Notifiers are required only for conservative governor and the common governor code is unnecessarily polluted with that. Handle that from cs_init/exit() instead of cpufreq_governor_dbs(). Signed-off-by: Viresh Kumar Reviewed-by: Preeti U Murthy Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 26 +++++++++++++++----------- drivers/cpufreq/cpufreq_governor.c | 22 +++------------------- drivers/cpufreq/cpufreq_governor.h | 8 ++------ drivers/cpufreq/cpufreq_ondemand.c | 4 ++-- 4 files changed, 22 insertions(+), 38 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 25a70d06c5bf..75f875bb155e 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -148,6 +148,10 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, return 0; } +static struct notifier_block cs_cpufreq_notifier_block = { + .notifier_call = dbs_cpufreq_notifier, +}; + /************************** sysfs interface ************************/ static struct common_dbs_data cs_dbs_cdata; @@ -317,7 +321,7 @@ static struct attribute_group cs_attr_group_gov_pol = { /************************** sysfs end ************************/ -static int cs_init(struct dbs_data *dbs_data) +static int cs_init(struct dbs_data *dbs_data, bool notify) { struct cs_dbs_tuners *tuners; @@ -336,25 +340,26 @@ static int cs_init(struct dbs_data *dbs_data) dbs_data->tuners = tuners; dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); + + if (notify) + cpufreq_register_notifier(&cs_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + mutex_init(&dbs_data->mutex); return 0; } -static void cs_exit(struct dbs_data *dbs_data) +static void cs_exit(struct dbs_data *dbs_data, bool notify) { + if (notify) + cpufreq_unregister_notifier(&cs_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + kfree(dbs_data->tuners); } define_get_cpu_dbs_routines(cs_cpu_dbs_info); -static struct notifier_block cs_cpufreq_notifier_block = { - .notifier_call = dbs_cpufreq_notifier, -}; - -static struct cs_ops cs_ops = { - .notifier_block = &cs_cpufreq_notifier_block, -}; - static struct common_dbs_data cs_dbs_cdata = { .governor = GOV_CONSERVATIVE, .attr_group_gov_sys = &cs_attr_group_gov_sys, @@ -363,7 +368,6 @@ static struct common_dbs_data cs_dbs_cdata = { .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = cs_dbs_timer, .gov_check_cpu = cs_check_cpu, - .gov_ops = &cs_ops, .init = cs_init, .exit = cs_exit, }; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 1b44496b2d2b..d64a82e6481a 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -278,7 +278,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, dbs_data->cdata = cdata; dbs_data->usage_count = 1; - rc = cdata->init(dbs_data); + rc = cdata->init(dbs_data, !policy->governor->initialized); if (rc) { pr_err("%s: POLICY_INIT: init() failed\n", __func__); kfree(dbs_data); @@ -291,7 +291,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, rc = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr(dbs_data)); if (rc) { - cdata->exit(dbs_data); + cdata->exit(dbs_data, !policy->governor->initialized); kfree(dbs_data); return rc; } @@ -309,14 +309,6 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, latency * LATENCY_MULTIPLIER)); - if ((cdata->governor == GOV_CONSERVATIVE) && - (!policy->governor->initialized)) { - struct cs_ops *cs_ops = dbs_data->cdata->gov_ops; - - cpufreq_register_notifier(cs_ops->notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - } - if (!have_governor_per_policy()) cdata->gdbs_data = dbs_data; @@ -329,15 +321,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, if (!have_governor_per_policy()) cpufreq_put_global_kobject(); - if ((dbs_data->cdata->governor == GOV_CONSERVATIVE) && - (policy->governor->initialized == 1)) { - struct cs_ops *cs_ops = dbs_data->cdata->gov_ops; - - cpufreq_unregister_notifier(cs_ops->notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - } - - cdata->exit(dbs_data); + cdata->exit(dbs_data, policy->governor->initialized == 1); kfree(dbs_data); cdata->gdbs_data = NULL; } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index cc401d147e72..1690120df487 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -208,8 +208,8 @@ struct common_dbs_data { void *(*get_cpu_dbs_info_s)(int cpu); void (*gov_dbs_timer)(struct work_struct *work); void (*gov_check_cpu)(int cpu, unsigned int load); - int (*init)(struct dbs_data *dbs_data); - void (*exit)(struct dbs_data *dbs_data); + int (*init)(struct dbs_data *dbs_data, bool notify); + void (*exit)(struct dbs_data *dbs_data, bool notify); /* Governor specific ops, see below */ void *gov_ops; @@ -234,10 +234,6 @@ struct od_ops { void (*freq_increase)(struct cpufreq_policy *policy, unsigned int freq); }; -struct cs_ops { - struct notifier_block *notifier_block; -}; - static inline int delay_for_sampling_rate(unsigned int sampling_rate) { int delay = usecs_to_jiffies(sampling_rate); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index ad3f38fd3eb9..4fe78a9caa04 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -475,7 +475,7 @@ static struct attribute_group od_attr_group_gov_pol = { /************************** sysfs end ************************/ -static int od_init(struct dbs_data *dbs_data) +static int od_init(struct dbs_data *dbs_data, bool notify) { struct od_dbs_tuners *tuners; u64 idle_time; @@ -517,7 +517,7 @@ static int od_init(struct dbs_data *dbs_data) return 0; } -static void od_exit(struct dbs_data *dbs_data) +static void od_exit(struct dbs_data *dbs_data, bool notify) { kfree(dbs_data->tuners); } From 714a2d9c8792919090f256c16286ac3cff4cb489 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 4 Jun 2015 16:43:27 +0530 Subject: [PATCH 31/37] cpufreq: governor: split cpufreq_governor_dbs() cpufreq_governor_dbs() is hardly readable, it is just too big and complicated. Lets make it more readable by splitting out event specific routines. Order of statements is changed at few places, but that shouldn't bring any functional change. Signed-off-by: Viresh Kumar Reviewed-by: Preeti U Murthy Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 387 ++++++++++++++++------------- 1 file changed, 218 insertions(+), 169 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index d64a82e6481a..ccf6ce7e5983 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -239,19 +239,218 @@ static void set_sampling_rate(struct dbs_data *dbs_data, } } +static int cpufreq_governor_init(struct cpufreq_policy *policy, + struct dbs_data *dbs_data, + struct common_dbs_data *cdata) +{ + unsigned int latency; + int ret; + + if (dbs_data) { + if (WARN_ON(have_governor_per_policy())) + return -EINVAL; + dbs_data->usage_count++; + policy->governor_data = dbs_data; + return 0; + } + + dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); + if (!dbs_data) + return -ENOMEM; + + dbs_data->cdata = cdata; + dbs_data->usage_count = 1; + + ret = cdata->init(dbs_data, !policy->governor->initialized); + if (ret) + goto free_dbs_data; + + /* policy latency is in ns. Convert it to us first */ + latency = policy->cpuinfo.transition_latency / 1000; + if (latency == 0) + latency = 1; + + /* Bring kernel and HW constraints together */ + dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, + MIN_LATENCY_MULTIPLIER * latency); + set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, + latency * LATENCY_MULTIPLIER)); + + if (!have_governor_per_policy()) { + if (WARN_ON(cpufreq_get_global_kobject())) { + ret = -EINVAL; + goto cdata_exit; + } + cdata->gdbs_data = dbs_data; + } + + ret = sysfs_create_group(get_governor_parent_kobj(policy), + get_sysfs_attr(dbs_data)); + if (ret) + goto put_kobj; + + policy->governor_data = dbs_data; + + return 0; + +put_kobj: + if (!have_governor_per_policy()) { + cdata->gdbs_data = NULL; + cpufreq_put_global_kobject(); + } +cdata_exit: + cdata->exit(dbs_data, !policy->governor->initialized); +free_dbs_data: + kfree(dbs_data); + return ret; +} + +static void cpufreq_governor_exit(struct cpufreq_policy *policy, + struct dbs_data *dbs_data) +{ + struct common_dbs_data *cdata = dbs_data->cdata; + + policy->governor_data = NULL; + if (!--dbs_data->usage_count) { + sysfs_remove_group(get_governor_parent_kobj(policy), + get_sysfs_attr(dbs_data)); + + if (!have_governor_per_policy()) { + cdata->gdbs_data = NULL; + cpufreq_put_global_kobject(); + } + + cdata->exit(dbs_data, policy->governor->initialized == 1); + kfree(dbs_data); + } +} + +static int cpufreq_governor_start(struct cpufreq_policy *policy, + struct dbs_data *dbs_data) +{ + struct common_dbs_data *cdata = dbs_data->cdata; + unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; + struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + int io_busy = 0; + + if (!policy->cur) + return -EINVAL; + + if (cdata->governor == GOV_CONSERVATIVE) { + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + + sampling_rate = cs_tuners->sampling_rate; + ignore_nice = cs_tuners->ignore_nice_load; + } else { + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + + sampling_rate = od_tuners->sampling_rate; + ignore_nice = od_tuners->ignore_nice_load; + io_busy = od_tuners->io_is_busy; + } + + mutex_lock(&dbs_data->mutex); + + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_common_info *j_cdbs = cdata->get_cpu_cdbs(j); + unsigned int prev_load; + + j_cdbs->cpu = j; + j_cdbs->cur_policy = policy; + j_cdbs->prev_cpu_idle = + get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); + + prev_load = (unsigned int)(j_cdbs->prev_cpu_wall - + j_cdbs->prev_cpu_idle); + j_cdbs->prev_load = 100 * prev_load / + (unsigned int)j_cdbs->prev_cpu_wall; + + if (ignore_nice) + j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + + mutex_init(&j_cdbs->timer_mutex); + INIT_DEFERRABLE_WORK(&j_cdbs->work, cdata->gov_dbs_timer); + } + + if (cdata->governor == GOV_CONSERVATIVE) { + struct cs_cpu_dbs_info_s *cs_dbs_info = + cdata->get_cpu_dbs_info_s(cpu); + + cs_dbs_info->down_skip = 0; + cs_dbs_info->enable = 1; + cs_dbs_info->requested_freq = policy->cur; + } else { + struct od_ops *od_ops = cdata->gov_ops; + struct od_cpu_dbs_info_s *od_dbs_info = cdata->get_cpu_dbs_info_s(cpu); + + od_dbs_info->rate_mult = 1; + od_dbs_info->sample_type = OD_NORMAL_SAMPLE; + od_ops->powersave_bias_init_cpu(cpu); + } + + mutex_unlock(&dbs_data->mutex); + + /* Initiate timer time stamp */ + cpu_cdbs->time_stamp = ktime_get(); + + gov_queue_work(dbs_data, policy, delay_for_sampling_rate(sampling_rate), + true); + return 0; +} + +static void cpufreq_governor_stop(struct cpufreq_policy *policy, + struct dbs_data *dbs_data) +{ + struct common_dbs_data *cdata = dbs_data->cdata; + unsigned int cpu = policy->cpu; + struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + + if (cdata->governor == GOV_CONSERVATIVE) { + struct cs_cpu_dbs_info_s *cs_dbs_info = + cdata->get_cpu_dbs_info_s(cpu); + + cs_dbs_info->enable = 0; + } + + gov_cancel_work(dbs_data, policy); + + mutex_lock(&dbs_data->mutex); + mutex_destroy(&cpu_cdbs->timer_mutex); + cpu_cdbs->cur_policy = NULL; + mutex_unlock(&dbs_data->mutex); +} + +static void cpufreq_governor_limits(struct cpufreq_policy *policy, + struct dbs_data *dbs_data) +{ + struct common_dbs_data *cdata = dbs_data->cdata; + unsigned int cpu = policy->cpu; + struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + + mutex_lock(&dbs_data->mutex); + if (!cpu_cdbs->cur_policy) { + mutex_unlock(&dbs_data->mutex); + return; + } + + mutex_lock(&cpu_cdbs->timer_mutex); + if (policy->max < cpu_cdbs->cur_policy->cur) + __cpufreq_driver_target(cpu_cdbs->cur_policy, policy->max, + CPUFREQ_RELATION_H); + else if (policy->min > cpu_cdbs->cur_policy->cur) + __cpufreq_driver_target(cpu_cdbs->cur_policy, policy->min, + CPUFREQ_RELATION_L); + dbs_check_cpu(dbs_data, cpu); + mutex_unlock(&cpu_cdbs->timer_mutex); + + mutex_unlock(&dbs_data->mutex); +} + int cpufreq_governor_dbs(struct cpufreq_policy *policy, - struct common_dbs_data *cdata, unsigned int event) + struct common_dbs_data *cdata, unsigned int event) { struct dbs_data *dbs_data; - struct od_cpu_dbs_info_s *od_dbs_info = NULL; - struct cs_cpu_dbs_info_s *cs_dbs_info = NULL; - struct od_ops *od_ops = NULL; - struct od_dbs_tuners *od_tuners = NULL; - struct cs_dbs_tuners *cs_tuners = NULL; - struct cpu_dbs_common_info *cpu_cdbs; - unsigned int sampling_rate, latency, ignore_nice, j, cpu = policy->cpu; - int io_busy = 0; - int rc; + int ret = 0; if (have_governor_per_policy()) dbs_data = policy->governor_data; @@ -262,172 +461,22 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, switch (event) { case CPUFREQ_GOV_POLICY_INIT: - if (have_governor_per_policy()) { - WARN_ON(dbs_data); - } else if (dbs_data) { - dbs_data->usage_count++; - policy->governor_data = dbs_data; - return 0; - } - - dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); - if (!dbs_data) { - pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__); - return -ENOMEM; - } - - dbs_data->cdata = cdata; - dbs_data->usage_count = 1; - rc = cdata->init(dbs_data, !policy->governor->initialized); - if (rc) { - pr_err("%s: POLICY_INIT: init() failed\n", __func__); - kfree(dbs_data); - return rc; - } - - if (!have_governor_per_policy()) - WARN_ON(cpufreq_get_global_kobject()); - - rc = sysfs_create_group(get_governor_parent_kobj(policy), - get_sysfs_attr(dbs_data)); - if (rc) { - cdata->exit(dbs_data, !policy->governor->initialized); - kfree(dbs_data); - return rc; - } - - policy->governor_data = dbs_data; - - /* policy latency is in ns. Convert it to us first */ - latency = policy->cpuinfo.transition_latency / 1000; - if (latency == 0) - latency = 1; - - /* Bring kernel and HW constraints together */ - dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, - MIN_LATENCY_MULTIPLIER * latency); - set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, - latency * LATENCY_MULTIPLIER)); - - if (!have_governor_per_policy()) - cdata->gdbs_data = dbs_data; - - return 0; + ret = cpufreq_governor_init(policy, dbs_data, cdata); + break; case CPUFREQ_GOV_POLICY_EXIT: - if (!--dbs_data->usage_count) { - sysfs_remove_group(get_governor_parent_kobj(policy), - get_sysfs_attr(dbs_data)); - - if (!have_governor_per_policy()) - cpufreq_put_global_kobject(); - - cdata->exit(dbs_data, policy->governor->initialized == 1); - kfree(dbs_data); - cdata->gdbs_data = NULL; - } - - policy->governor_data = NULL; - return 0; - } - - cpu_cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); - - if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { - cs_tuners = dbs_data->tuners; - cs_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu); - sampling_rate = cs_tuners->sampling_rate; - ignore_nice = cs_tuners->ignore_nice_load; - } else { - od_tuners = dbs_data->tuners; - od_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu); - sampling_rate = od_tuners->sampling_rate; - ignore_nice = od_tuners->ignore_nice_load; - od_ops = dbs_data->cdata->gov_ops; - io_busy = od_tuners->io_is_busy; - } - - switch (event) { + cpufreq_governor_exit(policy, dbs_data); + break; case CPUFREQ_GOV_START: - if (!policy->cur) - return -EINVAL; - - mutex_lock(&dbs_data->mutex); - - for_each_cpu(j, policy->cpus) { - struct cpu_dbs_common_info *j_cdbs = - dbs_data->cdata->get_cpu_cdbs(j); - unsigned int prev_load; - - j_cdbs->cpu = j; - j_cdbs->cur_policy = policy; - j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, - &j_cdbs->prev_cpu_wall, io_busy); - - prev_load = (unsigned int) - (j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle); - j_cdbs->prev_load = 100 * prev_load / - (unsigned int) j_cdbs->prev_cpu_wall; - - if (ignore_nice) - j_cdbs->prev_cpu_nice = - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - - mutex_init(&j_cdbs->timer_mutex); - INIT_DEFERRABLE_WORK(&j_cdbs->work, - dbs_data->cdata->gov_dbs_timer); - } - - if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { - cs_dbs_info->down_skip = 0; - cs_dbs_info->enable = 1; - cs_dbs_info->requested_freq = policy->cur; - } else { - od_dbs_info->rate_mult = 1; - od_dbs_info->sample_type = OD_NORMAL_SAMPLE; - od_ops->powersave_bias_init_cpu(cpu); - } - - mutex_unlock(&dbs_data->mutex); - - /* Initiate timer time stamp */ - cpu_cdbs->time_stamp = ktime_get(); - - gov_queue_work(dbs_data, policy, - delay_for_sampling_rate(sampling_rate), true); + ret = cpufreq_governor_start(policy, dbs_data); break; - case CPUFREQ_GOV_STOP: - if (dbs_data->cdata->governor == GOV_CONSERVATIVE) - cs_dbs_info->enable = 0; - - gov_cancel_work(dbs_data, policy); - - mutex_lock(&dbs_data->mutex); - mutex_destroy(&cpu_cdbs->timer_mutex); - cpu_cdbs->cur_policy = NULL; - - mutex_unlock(&dbs_data->mutex); - + cpufreq_governor_stop(policy, dbs_data); break; - case CPUFREQ_GOV_LIMITS: - mutex_lock(&dbs_data->mutex); - if (!cpu_cdbs->cur_policy) { - mutex_unlock(&dbs_data->mutex); - break; - } - mutex_lock(&cpu_cdbs->timer_mutex); - if (policy->max < cpu_cdbs->cur_policy->cur) - __cpufreq_driver_target(cpu_cdbs->cur_policy, - policy->max, CPUFREQ_RELATION_H); - else if (policy->min > cpu_cdbs->cur_policy->cur) - __cpufreq_driver_target(cpu_cdbs->cur_policy, - policy->min, CPUFREQ_RELATION_L); - dbs_check_cpu(dbs_data, cpu); - mutex_unlock(&cpu_cdbs->timer_mutex); - mutex_unlock(&dbs_data->mutex); + cpufreq_governor_limits(policy, dbs_data); break; } - return 0; + + return ret; } EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); From 732b6d617a4cfd8363d1f70a06bff38b8c1a19e9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 3 Jun 2015 15:57:13 +0530 Subject: [PATCH 32/37] cpufreq: governor: Serialize governor callbacks There are several races reported in cpufreq core around governors (only ondemand and conservative) by different people. There are at least two race scenarios present in governor code: (a) Concurrent access/updates of governor internal structures. It is possible that fields such as 'dbs_data->usage_count', etc. are accessed simultaneously for different policies using same governor structure (i.e. CPUFREQ_HAVE_GOVERNOR_PER_POLICY flag unset). And because of this we can dereference bad pointers. For example consider a system with two CPUs with separate 'struct cpufreq_policy' instances. CPU0 governor: ondemand and CPU1: powersave. CPU0 switching to powersave and CPU1 to ondemand: CPU0 CPU1 store* store* cpufreq_governor_exit() cpufreq_governor_init() dbs_data = cdata->gdbs_data; if (!--dbs_data->usage_count) kfree(dbs_data); dbs_data->usage_count++; *Bad pointer dereference* There are other races possible between EXIT and START/STOP/LIMIT as well. Its really complicated. (b) Switching governor state in bad sequence: For example trying to switch a governor to START state, when the governor is in EXIT state. There are some checks present in __cpufreq_governor() but they aren't sufficient as they compare events against 'policy->governor_enabled', where as we need to take governor's state into account, which can be used by multiple policies. These two issues need to be solved separately and the responsibility should be properly divided between cpufreq and governor core. The first problem is more about the governor core, as it needs to protect its structures properly. And the second problem should be fixed in cpufreq core instead of governor, as its all about sequence of events. This patch is trying to solve only the first problem. There are two types of data we need to protect, - 'struct common_dbs_data': No matter what, there is going to be a single copy of this per governor. - 'struct dbs_data': With CPUFREQ_HAVE_GOVERNOR_PER_POLICY flag set, we will have per-policy copy of this data, otherwise a single copy. Because of such complexities, the mutex present in 'struct dbs_data' is insufficient to solve our problem. For example we need to protect fetching of 'dbs_data' from different structures at the beginning of cpufreq_governor_dbs(), to make sure it isn't currently being updated. This can be fixed if we can guarantee serialization of event parsing code for an individual governor. This is best solved with a mutex per governor, and the placeholder for that is 'struct common_dbs_data'. And so this patch moves the mutex from 'struct dbs_data' to 'struct common_dbs_data' and takes it at the beginning and drops it at the end of cpufreq_governor_dbs(). Tested with and without following configuration options: CONFIG_LOCKDEP_SUPPORT=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_PI_LIST=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y CONFIG_LOCKDEP=y CONFIG_DEBUG_ATOMIC_SLEEP=y Signed-off-by: Viresh Kumar Reviewed-by: Preeti U Murthy Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 2 +- drivers/cpufreq/cpufreq_governor.c | 24 +++++++++++------------- drivers/cpufreq/cpufreq_governor.h | 8 +++++--- drivers/cpufreq/cpufreq_ondemand.c | 2 +- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 75f875bb155e..c86a10c30912 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -345,7 +345,6 @@ static int cs_init(struct dbs_data *dbs_data, bool notify) cpufreq_register_notifier(&cs_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); - mutex_init(&dbs_data->mutex); return 0; } @@ -370,6 +369,7 @@ static struct common_dbs_data cs_dbs_cdata = { .gov_check_cpu = cs_check_cpu, .init = cs_init, .exit = cs_exit, + .mutex = __MUTEX_INITIALIZER(cs_dbs_cdata.mutex), }; static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index ccf6ce7e5983..57a39f8a92b7 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -349,8 +349,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, io_busy = od_tuners->io_is_busy; } - mutex_lock(&dbs_data->mutex); - for_each_cpu(j, policy->cpus) { struct cpu_dbs_common_info *j_cdbs = cdata->get_cpu_cdbs(j); unsigned int prev_load; @@ -388,8 +386,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, od_ops->powersave_bias_init_cpu(cpu); } - mutex_unlock(&dbs_data->mutex); - /* Initiate timer time stamp */ cpu_cdbs->time_stamp = ktime_get(); @@ -414,10 +410,8 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy, gov_cancel_work(dbs_data, policy); - mutex_lock(&dbs_data->mutex); mutex_destroy(&cpu_cdbs->timer_mutex); cpu_cdbs->cur_policy = NULL; - mutex_unlock(&dbs_data->mutex); } static void cpufreq_governor_limits(struct cpufreq_policy *policy, @@ -427,11 +421,8 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy, unsigned int cpu = policy->cpu; struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); - mutex_lock(&dbs_data->mutex); - if (!cpu_cdbs->cur_policy) { - mutex_unlock(&dbs_data->mutex); + if (!cpu_cdbs->cur_policy) return; - } mutex_lock(&cpu_cdbs->timer_mutex); if (policy->max < cpu_cdbs->cur_policy->cur) @@ -442,8 +433,6 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy, CPUFREQ_RELATION_L); dbs_check_cpu(dbs_data, cpu); mutex_unlock(&cpu_cdbs->timer_mutex); - - mutex_unlock(&dbs_data->mutex); } int cpufreq_governor_dbs(struct cpufreq_policy *policy, @@ -452,12 +441,18 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct dbs_data *dbs_data; int ret = 0; + /* Lock governor to block concurrent initialization of governor */ + mutex_lock(&cdata->mutex); + if (have_governor_per_policy()) dbs_data = policy->governor_data; else dbs_data = cdata->gdbs_data; - WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)); + if (WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT))) { + ret = -EINVAL; + goto unlock; + } switch (event) { case CPUFREQ_GOV_POLICY_INIT: @@ -477,6 +472,9 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, break; } +unlock: + mutex_unlock(&cdata->mutex); + return ret; } EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 1690120df487..34736f5e869d 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -213,6 +213,11 @@ struct common_dbs_data { /* Governor specific ops, see below */ void *gov_ops; + + /* + * Protects governor's data (struct dbs_data and struct common_dbs_data) + */ + struct mutex mutex; }; /* Governor Per policy data */ @@ -221,9 +226,6 @@ struct dbs_data { unsigned int min_sampling_rate; int usage_count; void *tuners; - - /* dbs_mutex protects dbs_enable in governor start/stop */ - struct mutex mutex; }; /* Governor specific ops, will be passed to dbs_data->gov_ops */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 4fe78a9caa04..3c1e10f2304c 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -513,7 +513,6 @@ static int od_init(struct dbs_data *dbs_data, bool notify) tuners->io_is_busy = should_io_be_busy(); dbs_data->tuners = tuners; - mutex_init(&dbs_data->mutex); return 0; } @@ -541,6 +540,7 @@ static struct common_dbs_data od_dbs_cdata = { .gov_ops = &od_ops, .init = od_init, .exit = od_exit, + .mutex = __MUTEX_INITIALIZER(od_dbs_cdata.mutex), }; static void od_set_powersave_bias(unsigned int powersave_bias) From 97155e033662d0e9059ed2a007b0eb5080339349 Mon Sep 17 00:00:00 2001 From: Shailendra Verma Date: Sat, 23 May 2015 10:36:18 +0530 Subject: [PATCH 33/37] cpufreq: nforce2: Fix typo in comment to function nforce2_init() Signed-off-by: Shailendra Verma Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-nforce2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c index a2258090b58b..db69eeb501a7 100644 --- a/drivers/cpufreq/cpufreq-nforce2.c +++ b/drivers/cpufreq/cpufreq-nforce2.c @@ -414,7 +414,7 @@ static int nforce2_detect_chipset(void) * nforce2_init - initializes the nForce2 CPUFreq driver * * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported - * devices, -EINVAL on problems during initiatization, and zero on + * devices, -EINVAL on problems during initialization, and zero on * success. */ static int __init nforce2_init(void) From 431920edfd675ba74949415aace0a4eae07073e3 Mon Sep 17 00:00:00 2001 From: Shailendra Verma Date: Sat, 23 May 2015 10:36:49 +0530 Subject: [PATCH 34/37] cpufreq: gx-suspmod: Fix two typos in two comments Signed-off-by: Shailendra Verma Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/gx-suspmod.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/gx-suspmod.c b/drivers/cpufreq/gx-suspmod.c index 1d723dc8880c..3488c9c175eb 100644 --- a/drivers/cpufreq/gx-suspmod.c +++ b/drivers/cpufreq/gx-suspmod.c @@ -144,7 +144,7 @@ module_param(max_duration, int, 0444); /** - * we can detect a core multipiler from dir0_lsb + * we can detect a core multiplier from dir0_lsb * from GX1 datasheet p.56, * MULT[3:0]: * 0000 = SYSCLK multiplied by 4 (test only) @@ -346,7 +346,7 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy) /* it needs to be assured that at least one supported frequency is * within policy->min and policy->max. If it is not, policy->max - * needs to be increased until one freuqency is supported. + * needs to be increased until one frequency is supported. * policy->min may not be decreased, though. This way we guarantee a * specific processing capacity. */ From 8a95c1441c799bb0f0d31cdb11523d91923d51a7 Mon Sep 17 00:00:00 2001 From: Tang Yuantian Date: Thu, 4 Jun 2015 14:25:42 +0800 Subject: [PATCH 35/37] cpufreq: qoriq: optimize the CPU frequency switching time Each time the CPU switches its frequency, the clock nodes in DTS are walked through to find proper clock source. This is very time-consuming, for example, it is up to 500+ us on T4240. Besides, switching time varies from clock to clock. To optimize this, each input clock of CPU is buffered, so that it can be picked up instantly when needed. Since for each CPU each input clock is stored in a pointer which takes 4 or 8 bytes memory and normally there are several input clocks per CPU, that will not take much memory as well. Signed-off-by: Tang Yuantian Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/qoriq-cpufreq.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index 88b21ae0d6b0..358f0752c31e 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -27,11 +27,11 @@ /** * struct cpu_data - * @parent: the parent node of cpu clock + * @pclk: the parent clock of cpu * @table: frequency table */ struct cpu_data { - struct device_node *parent; + struct clk **pclk; struct cpufreq_frequency_table *table; }; @@ -196,7 +196,7 @@ static void freq_table_sort(struct cpufreq_frequency_table *freq_table, static int qoriq_cpufreq_cpu_init(struct cpufreq_policy *policy) { - struct device_node *np; + struct device_node *np, *pnode; int i, count, ret; u32 freq, mask; struct clk *clk; @@ -219,17 +219,23 @@ static int qoriq_cpufreq_cpu_init(struct cpufreq_policy *policy) goto err_nomem2; } - data->parent = of_parse_phandle(np, "clocks", 0); - if (!data->parent) { + pnode = of_parse_phandle(np, "clocks", 0); + if (!pnode) { pr_err("%s: could not get clock information\n", __func__); goto err_nomem2; } - count = of_property_count_strings(data->parent, "clock-names"); + count = of_property_count_strings(pnode, "clock-names"); + data->pclk = kcalloc(count, sizeof(struct clk *), GFP_KERNEL); + if (!data->pclk) { + pr_err("%s: no memory\n", __func__); + goto err_node; + } + table = kcalloc(count + 1, sizeof(*table), GFP_KERNEL); if (!table) { pr_err("%s: no memory\n", __func__); - goto err_node; + goto err_pclk; } if (fmask) @@ -238,7 +244,8 @@ static int qoriq_cpufreq_cpu_init(struct cpufreq_policy *policy) mask = 0x0; for (i = 0; i < count; i++) { - clk = of_clk_get(data->parent, i); + clk = of_clk_get(pnode, i); + data->pclk[i] = clk; freq = clk_get_rate(clk); /* * the clock is valid if its frequency is not masked @@ -273,13 +280,16 @@ static int qoriq_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = u64temp + 1; of_node_put(np); + of_node_put(pnode); return 0; err_nomem1: kfree(table); +err_pclk: + kfree(data->pclk); err_node: - of_node_put(data->parent); + of_node_put(pnode); err_nomem2: policy->driver_data = NULL; kfree(data); @@ -293,7 +303,7 @@ static int __exit qoriq_cpufreq_cpu_exit(struct cpufreq_policy *policy) { struct cpu_data *data = policy->driver_data; - of_node_put(data->parent); + kfree(data->pclk); kfree(data->table); kfree(data); policy->driver_data = NULL; @@ -307,7 +317,7 @@ static int qoriq_cpufreq_target(struct cpufreq_policy *policy, struct clk *parent; struct cpu_data *data = policy->driver_data; - parent = of_clk_get(data->parent, data->table[index].driver_data); + parent = data->pclk[data->table[index].driver_data]; return clk_set_parent(policy->clk, parent); } From 7180dddf7c32c49975c7e7babf2b60ed450cb760 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 15 Jun 2015 13:43:29 -0400 Subject: [PATCH 36/37] intel_pstate: Fix overflow in busy_scaled due to long delay The kernel may delay interrupts for a long time which can result in timers being delayed. If this occurs the intel_pstate driver will crash with a divide by zero error: divide error: 0000 [#1] SMP Modules linked in: btrfs zlib_deflate raid6_pq xor msdos ext4 mbcache jbd2 binfmt_misc arc4 md4 nls_utf8 cifs dns_resolver tcp_lp bnep bluetooth rfkill fuse dm_service_time iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ftp ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables intel_powerclamp coretemp vfat fat kvm_intel iTCO_wdt iTCO_vendor_support ipmi_devintf sr_mod kvm crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel cdc_ether lrw usbnet cdrom mii gf128mul glue_helper ablk_helper cryptd lpc_ich mfd_core pcspkr sb_edac edac_core ipmi_si ipmi_msghandler ioatdma wmi shpchp acpi_pad nfsd auth_rpcgss nfs_acl lockd uinput dm_multipath sunrpc xfs libcrc32c usb_storage sd_mod crc_t10dif crct10dif_common ixgbe mgag200 syscopyarea sysfillrect sysimgblt mdio drm_kms_helper ttm igb drm ptp pps_core dca i2c_algo_bit megaraid_sas i2c_core dm_mirror dm_region_hash dm_log dm_mod CPU: 113 PID: 0 Comm: swapper/113 Tainted: G W -------------- 3.10.0-229.1.2.el7.x86_64 #1 Hardware name: IBM x3950 X6 -[3837AC2]-/00FN827, BIOS -[A8E112BUS-1.00]- 08/27/2014 task: ffff880fe8abe660 ti: ffff880fe8ae4000 task.ti: ffff880fe8ae4000 RIP: 0010:[] [] intel_pstate_timer_func+0x179/0x3d0 RSP: 0018:ffff883fff4e3db8 EFLAGS: 00010206 RAX: 0000000027100000 RBX: ffff883fe6965100 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000010 RDI: 000000002e53632d RBP: ffff883fff4e3e20 R08: 000e6f69a5a125c0 R09: ffff883fe84ec001 R10: 0000000000000002 R11: 0000000000000005 R12: 00000000000049f5 R13: 0000000000271000 R14: 00000000000049f5 R15: 0000000000000246 FS: 0000000000000000(0000) GS:ffff883fff4e0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7668601000 CR3: 000000000190a000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffff883fff4e3e58 ffffffff81099dc1 0000000000000086 0000000000000071 ffff883fff4f3680 0000000000000071 fbdc8a965e33afee ffffffff810b69dd ffff883fe84ec000 ffff883fe6965108 0000000000000100 ffffffff814a9100 Call Trace: [] ? run_posix_cpu_timers+0x51/0x840 [] ? trigger_load_balance+0x5d/0x200 [] ? pid_param_set+0x130/0x130 [] call_timer_fn+0x36/0x110 [] ? pid_param_set+0x130/0x130 [] run_timer_softirq+0x21f/0x320 [] __do_softirq+0xef/0x280 [] call_softirq+0x1c/0x30 [] do_softirq+0x65/0xa0 [] irq_exit+0x115/0x120 [] smp_apic_timer_interrupt+0x45/0x60 [] apic_timer_interrupt+0x6d/0x80 [] ? cpuidle_enter_state+0x52/0xc0 [] ? cpuidle_enter_state+0x48/0xc0 [] cpuidle_idle_call+0xc5/0x200 [] arch_cpu_idle+0xe/0x30 [] cpu_startup_entry+0xf1/0x290 [] start_secondary+0x1ba/0x230 Code: 42 0f 00 45 89 e6 48 01 c2 43 8d 44 6d 00 39 d0 73 26 49 c1 e5 08 89 d2 4d 63 f4 49 63 c5 48 c1 e2 08 48 c1 e0 08 48 63 ca 48 99 <48> f7 f9 48 98 4c 0f af f0 49 c1 ee 08 8b 43 78 c1 e0 08 44 29 RIP [] intel_pstate_timer_func+0x179/0x3d0 RSP The kernel values for cpudata for CPU 113 were: struct cpudata { cpu = 113, timer = { entry = { next = 0x0, prev = 0xdead000000200200 }, expires = 8357799745, base = 0xffff883fe84ec001, function = 0xffffffff814a9100 , data = 18446612406765768960, i_gain = 0, d_gain = 0, deadband = 0, last_err = 22489 }, last_sample_time = { tv64 = 4063132438017305 }, prev_aperf = 287326796397463, prev_mperf = 251427432090198, sample = { core_pct_busy = 23081, aperf = 2937407, mperf = 3257884, freq = 2524484, time = { tv64 = 4063149215234118 } } } which results in the time between samples = last_sample_time - sample.time = 4063149215234118 - 4063132438017305 = 16777216813 which is 16.777 seconds. The duration between reads of the APERF and MPERF registers overflowed a s32 sized integer in intel_pstate_get_scaled_busy()'s call to div_fp(). The result is that int_tofp(duration_us) == 0, and the kernel attempts to divide by 0. While the kernel shouldn't be delaying for a long time, it can and does happen and the intel_pstate driver should not panic in this situation. This patch changes the div_fp() function to use div64_s64() to allow for "long" division. This will avoid the overflow condition on long delays. [v2]: use div64_s64() in div_fp() Signed-off-by: Prarit Bhargava Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index aef357203593..1da3197425c5 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -48,9 +48,9 @@ static inline int32_t mul_fp(int32_t x, int32_t y) return ((int64_t)x * (int64_t)y) >> FRAC_BITS; } -static inline int32_t div_fp(int32_t x, int32_t y) +static inline int32_t div_fp(s64 x, s64 y) { - return div_s64((int64_t)x << FRAC_BITS, y); + return div64_s64((int64_t)x << FRAC_BITS, y); } static inline int ceiling_fp(int32_t x) @@ -802,7 +802,7 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) { int32_t core_busy, max_pstate, current_pstate, sample_ratio; - u32 duration_us; + s64 duration_us; u32 sample_time; /* @@ -829,8 +829,8 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) * to adjust our busyness. */ sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; - duration_us = (u32) ktime_us_delta(cpu->sample.time, - cpu->last_sample_time); + duration_us = ktime_us_delta(cpu->sample.time, + cpu->last_sample_time); if (duration_us > sample_time * 3) { sample_ratio = div_fp(int_tofp(sample_time), int_tofp(duration_us)); From 07949bf9c63c9a80027fe8452d5fe8b9ba9b3c23 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 8 May 2015 14:57:30 -0500 Subject: [PATCH 37/37] cpufreq: dt: allow driver to boot automatically by adding the missing MODULE_ALIAS(), cpufreq-dt can be autoloaded by udev/systemd. Signed-off-by: Felipe Balbi Acked-by: Nishanth Menon Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index bab67db54b7e..528a82bf5038 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -416,6 +416,7 @@ static struct platform_driver dt_cpufreq_platdrv = { }; module_platform_driver(dt_cpufreq_platdrv); +MODULE_ALIAS("platform:cpufreq-dt"); MODULE_AUTHOR("Viresh Kumar "); MODULE_AUTHOR("Shawn Guo "); MODULE_DESCRIPTION("Generic cpufreq driver");