From 2abfa876f1117b0ab45f191fb1f82c41b1cbc8fe Mon Sep 17 00:00:00 2001 From: Rickard Andersson Date: Thu, 27 Dec 2012 14:55:38 +0000 Subject: [PATCH 01/55] cpufreq: handle SW coordinated CPUs This patch fixes a bug that occurred when we had load on a secondary CPU and the primary CPU was sleeping. Only one sampling timer was spawned and it was spawned as a deferred timer on the primary CPU, so when a secondary CPU had a change in load this was not detected by the cpufreq governor (both ondemand and conservative). This patch make sure that deferred timers are run on all CPUs in the case of software controlled CPUs that run on the same frequency. Signed-off-by: Rickard Andersson Signed-off-by: Fabio Baltieri Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 3 +- drivers/cpufreq/cpufreq_governor.c | 44 ++++++++++++++++++++++---- drivers/cpufreq/cpufreq_governor.h | 1 + drivers/cpufreq/cpufreq_ondemand.c | 3 +- 4 files changed, 43 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 64ef737e7e72..b9d7f14d7d3d 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -122,7 +122,8 @@ static void cs_dbs_timer(struct work_struct *work) dbs_check_cpu(&cs_dbs_data, cpu); - schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay); + schedule_delayed_work_on(smp_processor_id(), &dbs_info->cdbs.work, + delay); mutex_unlock(&dbs_info->cdbs.timer_mutex); } diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 6c5f1d383cdc..b0e4506f2cae 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -161,13 +161,23 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) } EXPORT_SYMBOL_GPL(dbs_check_cpu); +bool dbs_sw_coordinated_cpus(struct cpu_dbs_common_info *cdbs) +{ + struct cpufreq_policy *policy = cdbs->cur_policy; + + return cpumask_weight(policy->cpus) > 1; +} +EXPORT_SYMBOL_GPL(dbs_sw_coordinated_cpus); + static inline void dbs_timer_init(struct dbs_data *dbs_data, - struct cpu_dbs_common_info *cdbs, unsigned int sampling_rate) + struct cpu_dbs_common_info *cdbs, + unsigned int sampling_rate, + int cpu) { int delay = delay_for_sampling_rate(sampling_rate); + struct cpu_dbs_common_info *cdbs_local = dbs_data->get_cpu_cdbs(cpu); - INIT_DEFERRABLE_WORK(&cdbs->work, dbs_data->gov_dbs_timer); - schedule_delayed_work_on(cdbs->cpu, &cdbs->work, delay); + schedule_delayed_work_on(cpu, &cdbs_local->work, delay); } static inline void dbs_timer_exit(struct cpu_dbs_common_info *cdbs) @@ -217,6 +227,10 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + + mutex_init(&j_cdbs->timer_mutex); + INIT_DEFERRABLE_WORK(&j_cdbs->work, + dbs_data->gov_dbs_timer); } /* @@ -275,15 +289,33 @@ second_time: } mutex_unlock(&dbs_data->mutex); - mutex_init(&cpu_cdbs->timer_mutex); - dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate); + if (dbs_sw_coordinated_cpus(cpu_cdbs)) { + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_common_info *j_cdbs; + + j_cdbs = dbs_data->get_cpu_cdbs(j); + dbs_timer_init(dbs_data, j_cdbs, + *sampling_rate, j); + } + } else { + dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate, cpu); + } break; case CPUFREQ_GOV_STOP: if (dbs_data->governor == GOV_CONSERVATIVE) cs_dbs_info->enable = 0; - dbs_timer_exit(cpu_cdbs); + if (dbs_sw_coordinated_cpus(cpu_cdbs)) { + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_common_info *j_cdbs; + + j_cdbs = dbs_data->get_cpu_cdbs(j); + dbs_timer_exit(j_cdbs); + } + } else { + dbs_timer_exit(cpu_cdbs); + } mutex_lock(&dbs_data->mutex); mutex_destroy(&cpu_cdbs->timer_mutex); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index f6616540c53d..5bf6fb8023ef 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -171,6 +171,7 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) u64 get_cpu_idle_time(unsigned int cpu, u64 *wall); void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); +bool dbs_sw_coordinated_cpus(struct cpu_dbs_common_info *cdbs); int cpufreq_governor_dbs(struct dbs_data *dbs_data, struct cpufreq_policy *policy, unsigned int event); #endif /* _CPUFREQ_GOVERNER_H */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 7731f7c7e79a..93bb56d14cfa 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -243,7 +243,8 @@ static void od_dbs_timer(struct work_struct *work) } } - schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay); + schedule_delayed_work_on(smp_processor_id(), &dbs_info->cdbs.work, + delay); mutex_unlock(&dbs_info->cdbs.timer_mutex); } From da53d61e21a5869b2e44247bb37deb8be387e063 Mon Sep 17 00:00:00 2001 From: Fabio Baltieri Date: Thu, 27 Dec 2012 14:55:40 +0000 Subject: [PATCH 02/55] cpufreq: ondemand: call dbs_check_cpu only when necessary Modify ondemand timer to not resample CPU utilization if recently sampled from another SW coordinated core. Signed-off-by: Fabio Baltieri Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 3 ++ drivers/cpufreq/cpufreq_governor.h | 1 + drivers/cpufreq/cpufreq_ondemand.c | 58 ++++++++++++++++++++++++------ 3 files changed, 52 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index b0e4506f2cae..ee8b7cac11f3 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -290,6 +290,9 @@ second_time: mutex_unlock(&dbs_data->mutex); if (dbs_sw_coordinated_cpus(cpu_cdbs)) { + /* Initiate timer time stamp */ + cpu_cdbs->time_stamp = ktime_get(); + for_each_cpu(j, policy->cpus) { struct cpu_dbs_common_info *j_cdbs; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 5bf6fb8023ef..aaf073daa9fb 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -82,6 +82,7 @@ struct cpu_dbs_common_info { * the governor or limits. */ struct mutex timer_mutex; + ktime_t time_stamp; }; struct od_cpu_dbs_info_s { diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 93bb56d14cfa..13ceb3c69b64 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -216,23 +216,23 @@ static void od_check_cpu(int cpu, unsigned int load_freq) } } -static void od_dbs_timer(struct work_struct *work) +static void od_timer_update(struct od_cpu_dbs_info_s *dbs_info, bool sample, + struct delayed_work *dw) { - struct od_cpu_dbs_info_s *dbs_info = - container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work); unsigned int cpu = dbs_info->cdbs.cpu; int delay, sample_type = dbs_info->sample_type; - mutex_lock(&dbs_info->cdbs.timer_mutex); - /* Common NORMAL_SAMPLE setup */ dbs_info->sample_type = OD_NORMAL_SAMPLE; if (sample_type == OD_SUB_SAMPLE) { delay = dbs_info->freq_lo_jiffies; - __cpufreq_driver_target(dbs_info->cdbs.cur_policy, - dbs_info->freq_lo, CPUFREQ_RELATION_H); + if (sample) + __cpufreq_driver_target(dbs_info->cdbs.cur_policy, + dbs_info->freq_lo, + CPUFREQ_RELATION_H); } else { - dbs_check_cpu(&od_dbs_data, cpu); + if (sample) + dbs_check_cpu(&od_dbs_data, cpu); if (dbs_info->freq_lo) { /* Setup timer for SUB_SAMPLE */ dbs_info->sample_type = OD_SUB_SAMPLE; @@ -243,11 +243,49 @@ static void od_dbs_timer(struct work_struct *work) } } - schedule_delayed_work_on(smp_processor_id(), &dbs_info->cdbs.work, - delay); + schedule_delayed_work_on(smp_processor_id(), dw, delay); +} + +static void od_timer_coordinated(struct od_cpu_dbs_info_s *dbs_info_local, + struct delayed_work *dw) +{ + struct od_cpu_dbs_info_s *dbs_info; + ktime_t time_now; + s64 delta_us; + bool sample = true; + + /* use leader CPU's dbs_info */ + dbs_info = &per_cpu(od_cpu_dbs_info, dbs_info_local->cdbs.cpu); + mutex_lock(&dbs_info->cdbs.timer_mutex); + + time_now = ktime_get(); + delta_us = ktime_us_delta(time_now, dbs_info->cdbs.time_stamp); + + /* Do nothing if we recently have sampled */ + if (delta_us < (s64)(od_tuners.sampling_rate / 2)) + sample = false; + else + dbs_info->cdbs.time_stamp = time_now; + + od_timer_update(dbs_info, sample, dw); mutex_unlock(&dbs_info->cdbs.timer_mutex); } +static void od_dbs_timer(struct work_struct *work) +{ + struct delayed_work *dw = to_delayed_work(work); + struct od_cpu_dbs_info_s *dbs_info = + container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work); + + if (dbs_sw_coordinated_cpus(&dbs_info->cdbs)) { + od_timer_coordinated(dbs_info, dw); + } else { + mutex_lock(&dbs_info->cdbs.timer_mutex); + od_timer_update(dbs_info, true, dw); + mutex_unlock(&dbs_info->cdbs.timer_mutex); + } +} + /************************** sysfs interface ************************/ static ssize_t show_sampling_rate_min(struct kobject *kobj, From 66df2a01dfd715636f5c86f7afd05362e7e3fddd Mon Sep 17 00:00:00 2001 From: Fabio Baltieri Date: Thu, 27 Dec 2012 14:55:41 +0000 Subject: [PATCH 03/55] cpufreq: conservative: call dbs_check_cpu only when necessary Modify conservative timer to not resample CPU utilization if recently sampled from another SW coordinated core. Signed-off-by: Fabio Baltieri Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 47 ++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index b9d7f14d7d3d..5d8e8942ec97 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -111,22 +111,57 @@ static void cs_check_cpu(int cpu, unsigned int load) } } -static void cs_dbs_timer(struct work_struct *work) +static void cs_timer_update(struct cs_cpu_dbs_info_s *dbs_info, bool sample, + struct delayed_work *dw) { - struct cs_cpu_dbs_info_s *dbs_info = container_of(work, - struct cs_cpu_dbs_info_s, cdbs.work.work); unsigned int cpu = dbs_info->cdbs.cpu; int delay = delay_for_sampling_rate(cs_tuners.sampling_rate); + if (sample) + dbs_check_cpu(&cs_dbs_data, cpu); + + schedule_delayed_work_on(smp_processor_id(), dw, delay); +} + +static void cs_timer_coordinated(struct cs_cpu_dbs_info_s *dbs_info_local, + struct delayed_work *dw) +{ + struct cs_cpu_dbs_info_s *dbs_info; + ktime_t time_now; + s64 delta_us; + bool sample = true; + + /* use leader CPU's dbs_info */ + dbs_info = &per_cpu(cs_cpu_dbs_info, dbs_info_local->cdbs.cpu); mutex_lock(&dbs_info->cdbs.timer_mutex); - dbs_check_cpu(&cs_dbs_data, cpu); + time_now = ktime_get(); + delta_us = ktime_us_delta(time_now, dbs_info->cdbs.time_stamp); - schedule_delayed_work_on(smp_processor_id(), &dbs_info->cdbs.work, - delay); + /* Do nothing if we recently have sampled */ + if (delta_us < (s64)(cs_tuners.sampling_rate / 2)) + sample = false; + else + dbs_info->cdbs.time_stamp = time_now; + + cs_timer_update(dbs_info, sample, dw); mutex_unlock(&dbs_info->cdbs.timer_mutex); } +static void cs_dbs_timer(struct work_struct *work) +{ + struct delayed_work *dw = to_delayed_work(work); + struct cs_cpu_dbs_info_s *dbs_info = container_of(work, + struct cs_cpu_dbs_info_s, cdbs.work.work); + + if (dbs_sw_coordinated_cpus(&dbs_info->cdbs)) { + cs_timer_coordinated(dbs_info, dw); + } else { + mutex_lock(&dbs_info->cdbs.timer_mutex); + cs_timer_update(dbs_info, true, dw); + mutex_unlock(&dbs_info->cdbs.timer_mutex); + } +} static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { From 8ee2ec51d0916b8c8c4387fb6da5904900ef6b98 Mon Sep 17 00:00:00 2001 From: Fabio Baltieri Date: Thu, 27 Dec 2012 14:55:42 +0000 Subject: [PATCH 04/55] cpufreq: ondemand: use all CPUs in update_sampling_rate Modify update_sampling_rate() to check, and eventually immediately schedule, all CPU's do_dbs_timer delayed work. This is required in case of software coordinated CPUs, as we now have a separate delayed work for each CPU. Signed-off-by: Fabio Baltieri Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_ondemand.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 13ceb3c69b64..1017b90b902e 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -326,7 +326,7 @@ static void update_sampling_rate(unsigned int new_rate) cpufreq_cpu_put(policy); continue; } - dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); + dbs_info = &per_cpu(od_cpu_dbs_info, cpu); cpufreq_cpu_put(policy); mutex_lock(&dbs_info->cdbs.timer_mutex); @@ -345,8 +345,7 @@ static void update_sampling_rate(unsigned int new_rate) cancel_delayed_work_sync(&dbs_info->cdbs.work); mutex_lock(&dbs_info->cdbs.timer_mutex); - schedule_delayed_work_on(dbs_info->cdbs.cpu, - &dbs_info->cdbs.work, + schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, usecs_to_jiffies(new_rate)); } From 643ae6e81dd65b333a13259852405fc9f764ac76 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sat, 12 Jan 2013 05:14:38 +0000 Subject: [PATCH 05/55] cpufreq: Manage only online cpus cpufreq core doesn't manage offline cpus and if driver->init() has returned mask including offline cpus, it may result in unwanted behavior by cpufreq core or governors. We need to get only online cpus in this mask. There are two places to fix this mask, cpufreq core and cpufreq driver. It makes sense to do this at common place and hence is done in core. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1f93dbd72355..de9951766dd8 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -970,6 +970,13 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) pr_debug("initialization failed\n"); goto err_unlock_policy; } + + /* + * affected cpus must always be the one, which are online. We aren't + * managing offline cpus here. + */ + cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); + policy->user_policy.min = policy->min; policy->user_policy.max = policy->max; From f6a7409cab3b525c5e55540e7cd08d23e198352f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sat, 12 Jan 2013 05:14:39 +0000 Subject: [PATCH 06/55] cpufreq: Notify governors when cpus are hot-[un]plugged Because cpufreq core and governors worry only about the online cpus, if a cpu is hot [un]plugged, we must notify governors about it, otherwise be ready to expect something unexpected. We already have notifiers in the form of CPUFREQ_GOV_START/CPUFREQ_GOV_STOP, we just need to call them now. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index de9951766dd8..a0a33bdb4533 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -751,11 +751,16 @@ static int cpufreq_add_dev_policy(unsigned int cpu, return -EBUSY; } + __cpufreq_governor(managed_policy, CPUFREQ_GOV_STOP); + spin_lock_irqsave(&cpufreq_driver_lock, flags); cpumask_copy(managed_policy->cpus, policy->cpus); per_cpu(cpufreq_cpu_data, cpu) = managed_policy; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + __cpufreq_governor(managed_policy, CPUFREQ_GOV_START); + __cpufreq_governor(managed_policy, CPUFREQ_GOV_LIMITS); + pr_debug("CPU already managed, adding link\n"); ret = sysfs_create_link(&dev->kobj, &managed_policy->kobj, @@ -1066,8 +1071,13 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif */ if (unlikely(cpu != data->cpu)) { pr_debug("removing link\n"); + __cpufreq_governor(data, CPUFREQ_GOV_STOP); cpumask_clear_cpu(cpu, data->cpus); spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + __cpufreq_governor(data, CPUFREQ_GOV_START); + __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); + kobj = &dev->kobj; cpufreq_cpu_put(data); unlock_policy_rwsem_write(cpu); From 6954ca9c8b5cbaf45c3e45a5a5a7450c6b55ab27 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sat, 12 Jan 2013 05:14:40 +0000 Subject: [PATCH 07/55] cpufreq: Don't use cpu removed during cpufreq_driver_unregister This is how the core works: cpufreq_driver_unregister() - subsys_interface_unregister() - for_each_cpu() call cpufreq_remove_dev(), i.e. 0,1,2,3,4 when we unregister. cpufreq_remove_dev(): - Remove policy node - Call cpufreq_add_dev() for next cpu, sharing mask with removed cpu. i.e. When cpu 0 is removed, we call it for cpu 1. And when called for cpu 2, we call it for cpu 3. - cpufreq_add_dev() would call cpufreq_driver->init() - init would return mask as AND of 2, 3 and 4 for cluster A7. - cpufreq core would do online_cpu && policy->cpus Here is the BUG(). Because cpu hasn't died but we have just unregistered the cpufreq driver, online cpu would still have cpu 2 in it. And so thing go bad again. Solution: Keep cpumask of cpus that are registered with cpufreq core and clear cpus when we get a call from subsys_interface_unregister() via cpufreq_remove_dev(). Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a0a33bdb4533..034d1836884b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -47,6 +47,9 @@ static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor); #endif static DEFINE_SPINLOCK(cpufreq_driver_lock); +/* Used when we unregister cpufreq driver */ +static struct cpumask cpufreq_online_mask; + /* * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure * all cpufreq/hotplug/workqueue/etc related lock issues. @@ -981,6 +984,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) * managing offline cpus here. */ cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); + cpumask_and(policy->cpus, policy->cpus, &cpufreq_online_mask); policy->user_policy.min = policy->min; policy->user_policy.max = policy->max; @@ -1064,7 +1068,6 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif } per_cpu(cpufreq_cpu_data, cpu) = NULL; - #ifdef CONFIG_SMP /* if this isn't the CPU which is the parent of the kobj, we * only need to unlink, put and exit @@ -1185,6 +1188,7 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) if (unlikely(lock_policy_rwsem_write(cpu))) BUG(); + cpumask_clear_cpu(cpu, &cpufreq_online_mask); retval = __cpufreq_remove_dev(dev, sif); return retval; } @@ -1903,6 +1907,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) cpufreq_driver = driver_data; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + cpumask_setall(&cpufreq_online_mask); + ret = subsys_interface_register(&cpufreq_interface); if (ret) goto err_null_driver; From f85178048c083520bd920921744dd2c4a797fbc5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sat, 12 Jan 2013 05:12:09 +0000 Subject: [PATCH 08/55] cpufreq: SPEAr: Fix sparse warning for cpufreq driver This patch fixes following sparse warning: drivers/cpufreq/spear-cpufreq.c:33:5: warning: symbol 'spear_cpufreq_verify' was not declared. Should it be static? Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/spear-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c index 4575cfe41755..8ff26af622ea 100644 --- a/drivers/cpufreq/spear-cpufreq.c +++ b/drivers/cpufreq/spear-cpufreq.c @@ -30,7 +30,7 @@ static struct { u32 cnt; } spear_cpufreq; -int spear_cpufreq_verify(struct cpufreq_policy *policy) +static int spear_cpufreq_verify(struct cpufreq_policy *policy) { return cpufreq_frequency_table_verify(policy, spear_cpufreq.freq_tbl); } From b8eed8af94f9203e0cc39245ea335f4b8dc1ed31 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 14 Jan 2013 13:23:03 +0000 Subject: [PATCH 09/55] cpufreq: Simplify __cpufreq_remove_dev() __cpufreq_remove_dev() is called on multiple occasions: cpufreq_driver unregister and cpu removals. Current implementation of this routine is overly complex without much need. If the cpu to be removed is the policy->cpu, we remove the policy first and add all other cpus again from policy->cpus and then finally call __cpufreq_remove_dev() again to remove the cpu to be deleted. Haahhhh.. There exist a simple solution to removal of a cpu: - Simply use the old policy structure - update its fields like: policy->cpu, etc. - notify any users of cpufreq, which depend on changing policy->cpu Hence this patch, which tries to implement the above theory. It is tested well by myself on ARM big.LITTLE TC2 SoC, which has 5 cores (2 A15 and 3 A7). Both A15's share same struct policy and all A7's share same policy structure. Signed-off-by: Viresh Kumar Tested-by: Shawn Guo Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 175 ++++++++++++++------------------ drivers/cpufreq/cpufreq_stats.c | 27 ++++- drivers/cpufreq/freq_table.c | 9 ++ include/linux/cpufreq.h | 14 ++- 4 files changed, 120 insertions(+), 105 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 034d1836884b..9af14a8bbcdb 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1036,6 +1036,25 @@ module_out: return ret; } +static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) +{ + int j; + + policy->last_cpu = policy->cpu; + policy->cpu = cpu; + + for_each_cpu(j, policy->cpus) { + if (!cpu_online(j)) + continue; + per_cpu(cpufreq_policy_cpu, j) = cpu; + } + +#ifdef CONFIG_CPU_FREQ_TABLE + cpufreq_frequency_table_update_policy_cpu(policy); +#endif + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_UPDATE_POLICY_CPU, policy); +} /** * __cpufreq_remove_dev - remove a CPU device @@ -1046,132 +1065,92 @@ module_out: */ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) { - unsigned int cpu = dev->id; + unsigned int cpu = dev->id, ret, cpus; unsigned long flags; struct cpufreq_policy *data; struct kobject *kobj; struct completion *cmp; -#ifdef CONFIG_SMP struct device *cpu_dev; - unsigned int j; -#endif - pr_debug("unregistering CPU %u\n", cpu); + pr_debug("%s: unregistering CPU %u\n", __func__, cpu); spin_lock_irqsave(&cpufreq_driver_lock, flags); data = per_cpu(cpufreq_cpu_data, cpu); if (!data) { + pr_debug("%s: No cpu_data found\n", __func__); spin_unlock_irqrestore(&cpufreq_driver_lock, flags); unlock_policy_rwsem_write(cpu); return -EINVAL; } - per_cpu(cpufreq_cpu_data, cpu) = NULL; -#ifdef CONFIG_SMP - /* if this isn't the CPU which is the parent of the kobj, we - * only need to unlink, put and exit - */ - if (unlikely(cpu != data->cpu)) { - pr_debug("removing link\n"); + if (cpufreq_driver->target) __cpufreq_governor(data, CPUFREQ_GOV_STOP); - cpumask_clear_cpu(cpu, data->cpus); - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - - __cpufreq_governor(data, CPUFREQ_GOV_START); - __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); - - kobj = &dev->kobj; - cpufreq_cpu_put(data); - unlock_policy_rwsem_write(cpu); - sysfs_remove_link(kobj, "cpufreq"); - return 0; - } -#endif - -#ifdef CONFIG_SMP #ifdef CONFIG_HOTPLUG_CPU strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name, CPUFREQ_NAME_LEN); #endif - /* if we have other CPUs still registered, we need to unlink them, - * or else wait_for_completion below will lock up. Clean the - * per_cpu(cpufreq_cpu_data) while holding the lock, and remove - * the sysfs links afterwards. - */ - if (unlikely(cpumask_weight(data->cpus) > 1)) { - for_each_cpu(j, data->cpus) { - if (j == cpu) - continue; - per_cpu(cpufreq_cpu_data, j) = NULL; - } - } + per_cpu(cpufreq_cpu_data, cpu) = NULL; + cpus = cpumask_weight(data->cpus); + cpumask_clear_cpu(cpu, data->cpus); - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - - if (unlikely(cpumask_weight(data->cpus) > 1)) { - for_each_cpu(j, data->cpus) { - if (j == cpu) - continue; - pr_debug("removing link for cpu %u\n", j); -#ifdef CONFIG_HOTPLUG_CPU - strncpy(per_cpu(cpufreq_cpu_governor, j), - data->governor->name, CPUFREQ_NAME_LEN); -#endif - cpu_dev = get_cpu_device(j); - kobj = &cpu_dev->kobj; - unlock_policy_rwsem_write(cpu); - sysfs_remove_link(kobj, "cpufreq"); - lock_policy_rwsem_write(cpu); - cpufreq_cpu_put(data); - } - } -#else - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); -#endif - - if (cpufreq_driver->target) - __cpufreq_governor(data, CPUFREQ_GOV_STOP); - - kobj = &data->kobj; - cmp = &data->kobj_unregister; - unlock_policy_rwsem_write(cpu); - kobject_put(kobj); - - /* we need to make sure that the underlying kobj is actually - * not referenced anymore by anybody before we proceed with - * unloading. - */ - pr_debug("waiting for dropping of refcount\n"); - wait_for_completion(cmp); - pr_debug("wait complete\n"); - - lock_policy_rwsem_write(cpu); - if (cpufreq_driver->exit) - cpufreq_driver->exit(data); - unlock_policy_rwsem_write(cpu); - -#ifdef CONFIG_HOTPLUG_CPU - /* when the CPU which is the parent of the kobj is hotplugged - * offline, check for siblings, and create cpufreq sysfs interface - * and symlinks - */ - if (unlikely(cpumask_weight(data->cpus) > 1)) { + if (unlikely((cpu == data->cpu) && (cpus > 1))) { /* first sibling now owns the new sysfs dir */ - cpumask_clear_cpu(cpu, data->cpus); - cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL); + cpu_dev = get_cpu_device(cpumask_first(data->cpus)); + sysfs_remove_link(&cpu_dev->kobj, "cpufreq"); + ret = kobject_move(&data->kobj, &cpu_dev->kobj); + if (ret) { + pr_err("%s: Failed to move kobj: %d", __func__, ret); + cpumask_set_cpu(cpu, data->cpus); + ret = sysfs_create_link(&cpu_dev->kobj, &data->kobj, + "cpufreq"); + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + unlock_policy_rwsem_write(cpu); + return -EINVAL; + } - /* finally remove our own symlink */ - lock_policy_rwsem_write(cpu); - __cpufreq_remove_dev(dev, sif); + update_policy_cpu(data, cpu_dev->id); + pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n", + __func__, cpu_dev->id, cpu); } -#endif - free_cpumask_var(data->related_cpus); - free_cpumask_var(data->cpus); - kfree(data); + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + pr_debug("%s: removing link, cpu: %d\n", __func__, cpu); + cpufreq_cpu_put(data); + unlock_policy_rwsem_write(cpu); + sysfs_remove_link(&dev->kobj, "cpufreq"); + + /* If cpu is last user of policy, free policy */ + if (cpus == 1) { + lock_policy_rwsem_write(cpu); + kobj = &data->kobj; + cmp = &data->kobj_unregister; + unlock_policy_rwsem_write(cpu); + kobject_put(kobj); + + /* we need to make sure that the underlying kobj is actually + * not referenced anymore by anybody before we proceed with + * unloading. + */ + pr_debug("waiting for dropping of refcount\n"); + wait_for_completion(cmp); + pr_debug("wait complete\n"); + + lock_policy_rwsem_write(cpu); + if (cpufreq_driver->exit) + cpufreq_driver->exit(data); + unlock_policy_rwsem_write(cpu); + + free_cpumask_var(data->related_cpus); + free_cpumask_var(data->cpus); + kfree(data); + } else if (cpufreq_driver->target) { + __cpufreq_governor(data, CPUFREQ_GOV_START); + __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); + } return 0; } diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 9d7732b81044..beef6b54382b 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -170,11 +170,13 @@ static int freq_table_get_index(struct cpufreq_stats *stat, unsigned int freq) static void cpufreq_stats_free_table(unsigned int cpu) { struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, cpu); + if (stat) { + pr_debug("%s: Free stat table\n", __func__); kfree(stat->time_in_state); kfree(stat); + per_cpu(cpufreq_stats_table, cpu) = NULL; } - per_cpu(cpufreq_stats_table, cpu) = NULL; } /* must be called early in the CPU removal sequence (before @@ -183,8 +185,10 @@ static void cpufreq_stats_free_table(unsigned int cpu) static void cpufreq_stats_free_sysfs(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - if (policy && policy->cpu == cpu) + if (policy && (cpumask_weight(policy->cpus) == 1)) { + pr_debug("%s: Free sysfs stat\n", __func__); sysfs_remove_group(&policy->kobj, &stats_attr_group); + } if (policy) cpufreq_cpu_put(policy); } @@ -262,6 +266,19 @@ error_get_fail: return ret; } +static void cpufreq_stats_update_policy_cpu(struct cpufreq_policy *policy) +{ + struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, + policy->last_cpu); + + pr_debug("Updating stats_table for new_cpu %u from last_cpu %u\n", + policy->cpu, policy->last_cpu); + per_cpu(cpufreq_stats_table, policy->cpu) = per_cpu(cpufreq_stats_table, + policy->last_cpu); + per_cpu(cpufreq_stats_table, policy->last_cpu) = NULL; + stat->cpu = policy->cpu; +} + static int cpufreq_stat_notifier_policy(struct notifier_block *nb, unsigned long val, void *data) { @@ -269,6 +286,12 @@ static int cpufreq_stat_notifier_policy(struct notifier_block *nb, struct cpufreq_policy *policy = data; struct cpufreq_frequency_table *table; unsigned int cpu = policy->cpu; + + if (val == CPUFREQ_UPDATE_POLICY_CPU) { + cpufreq_stats_update_policy_cpu(policy); + return 0; + } + if (val != CPUFREQ_NOTIFY) return 0; table = cpufreq_frequency_get_table(cpu); diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 49cda256efb2..aa5bd39d129e 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -227,6 +227,15 @@ void cpufreq_frequency_table_put_attr(unsigned int cpu) } EXPORT_SYMBOL_GPL(cpufreq_frequency_table_put_attr); +void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy) +{ + pr_debug("Updating show_table for new_cpu %u from last_cpu %u\n", + policy->cpu, policy->last_cpu); + per_cpu(cpufreq_show_table, policy->cpu) = per_cpu(cpufreq_show_table, + policy->last_cpu); + per_cpu(cpufreq_show_table, policy->last_cpu) = NULL; +} + struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu) { return per_cpu(cpufreq_show_table, cpu); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a55b88eaf96a..52be2d0c994a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -93,7 +93,9 @@ struct cpufreq_policy { cpumask_var_t related_cpus; /* CPUs with any coordination */ unsigned int shared_type; /* ANY or ALL affected CPUs should set cpufreq */ - unsigned int cpu; /* cpu nr of registered CPU */ + unsigned int cpu; /* cpu nr of CPU managing this policy */ + unsigned int last_cpu; /* cpu nr of previous CPU that managed + * this policy */ struct cpufreq_cpuinfo cpuinfo;/* see above */ unsigned int min; /* in kHz */ @@ -112,10 +114,11 @@ struct cpufreq_policy { struct completion kobj_unregister; }; -#define CPUFREQ_ADJUST (0) -#define CPUFREQ_INCOMPATIBLE (1) -#define CPUFREQ_NOTIFY (2) -#define CPUFREQ_START (3) +#define CPUFREQ_ADJUST (0) +#define CPUFREQ_INCOMPATIBLE (1) +#define CPUFREQ_NOTIFY (2) +#define CPUFREQ_START (3) +#define CPUFREQ_UPDATE_POLICY_CPU (4) #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */ #define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */ @@ -405,6 +408,7 @@ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, unsigned int cpu); +void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy); void cpufreq_frequency_table_put_attr(unsigned int cpu); #endif /* _LINUX_CPUFREQ_H */ From d5aaffa9dd531c978c6f3fea06a2972653bd7fc8 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Thu, 17 Jan 2013 16:22:21 +0000 Subject: [PATCH 10/55] cpufreq: handle cpufreq being disabled for all exported function. When disable_cpufreq() is called some exported functions are still being used that do not have a check for cpufreq being disabled. Add a disabled check into cpufreq_cpu_get() to return NULL if cpufreq is disabled this covers most of the exported functions. For the exported functions that do not call cpufreq_cpu_get() add an explicit check. Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 9af14a8bbcdb..2417576393a6 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -183,6 +183,9 @@ err_out: struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) { + if (cpufreq_disabled()) + return NULL; + return __cpufreq_cpu_get(cpu, false); } EXPORT_SYMBOL_GPL(cpufreq_cpu_get); @@ -201,6 +204,9 @@ static void __cpufreq_cpu_put(struct cpufreq_policy *data, bool sysfs) void cpufreq_cpu_put(struct cpufreq_policy *data) { + if (cpufreq_disabled()) + return; + __cpufreq_cpu_put(data, false); } EXPORT_SYMBOL_GPL(cpufreq_cpu_put); @@ -267,6 +273,9 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) BUG_ON(irqs_disabled()); + if (cpufreq_disabled()) + return; + freqs->flags = cpufreq_driver->flags; pr_debug("notification %u of frequency transition to %u kHz\n", state, freqs->new); @@ -1408,6 +1417,9 @@ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list) { int ret; + if (cpufreq_disabled()) + return -EINVAL; + WARN_ON(!init_cpufreq_transition_notifier_list_called); switch (list) { @@ -1442,6 +1454,9 @@ int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list) { int ret; + if (cpufreq_disabled()) + return -EINVAL; + switch (list) { case CPUFREQ_TRANSITION_NOTIFIER: ret = srcu_notifier_chain_unregister( @@ -1522,6 +1537,9 @@ int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu) { int ret = 0; + if (cpufreq_disabled()) + return ret; + if (!(cpu_online(cpu) && cpufreq_driver->getavg)) return 0; From c40a4518166b6c500c844579ec7069d4a0419f30 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Sun, 20 Jan 2013 10:24:26 +0000 Subject: [PATCH 11/55] acpi-cpufreq: Do not load on K8 de3ed81d746d ("[CPUFREQ] Change link order of x86 cpufreq modules") changed cpufreq drivers link order so that powernow-k8 gets loaded first due to earlier K8s having BIOS bugs. However, now that acpi-cpufreq supports both AMD and Intel CPUs with HW P-states, we want to load it first, so that cases where acpi-cpufreq and powernow-k8 are both built-in and powernow-k8 initializing first, can be addressed. So, make sure that even if acpi-cpufreq gets loaded first, it errors out on K8s and powernow-k8 can be loaded then successfully. Signed-off-by: Matthew Garrett References: http://lkml.kernel.org/r/20130118162347.GA31499@srcf.ucam.org Signed-off-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 7b0d49d78c61..22f9b47c5c74 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -762,6 +762,12 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) switch (perf->control_register.space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 == 0xf) { + pr_debug("AMD K8 systems must use native drivers.\n"); + result = -ENODEV; + goto err_unreg; + } pr_debug("SYSTEM IO addr space\n"); data->cpu_feature = SYSTEM_IO_CAPABLE; break; From 741220ea86709717e1d392b5a9617dfe90cd3802 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 20 Jan 2013 10:24:27 +0000 Subject: [PATCH 12/55] cpufreq: Make acpi-cpufreq link first Now that the majority of x86 CPUs out there are supported by acpi-cpufreq, we want it to load first and, in the AMD case, drop to powernow-k8 only on K8s. If, however, both powernow-k8 and acpi-cpufreq are built-in, the link order matters. Correct that. Signed-off-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index fadc4d496e2f..24d4a63fd6cf 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -19,11 +19,12 @@ obj-$(CONFIG_GENERIC_CPUFREQ_CPU0) += cpufreq-cpu0.o ################################################################################## # x86 drivers. # Link order matters. K8 is preferred to ACPI because of firmware bugs in early -# K8 systems. ACPI is preferred to all other hardware-specific drivers. +# K8 systems. This is still the case but acpi-cpufreq errors out so that +# powernow-k8 can load then. ACPI is preferred to all other hardware-specific drivers. # speedstep-* is preferred over p4-clockmod. -obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o +obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o From 9d95046e5d6afd6d7ae86fb71ab59c6faf0db8de Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 20 Jan 2013 10:24:28 +0000 Subject: [PATCH 13/55] cpufreq: Add a get_current_driver helper Add a helper function to return cpufreq_driver->name. Signed-off-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 14 ++++++++++++++ include/linux/cpufreq.h | 1 + 2 files changed, 15 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 2417576393a6..216c104d51ad 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1395,6 +1395,20 @@ static struct syscore_ops cpufreq_syscore_ops = { .resume = cpufreq_bp_resume, }; +/** + * cpufreq_get_current_driver - return current driver's name + * + * Return the name string of the currently loaded cpufreq driver + * or NULL, if none. + */ +const char *cpufreq_get_current_driver(void) +{ + if (cpufreq_driver) + return cpufreq_driver->name; + + return NULL; +} +EXPORT_SYMBOL_GPL(cpufreq_get_current_driver); /********************************************************************* * NOTIFIER LISTS INTERFACE * diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 52be2d0c994a..1f3a726640e8 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -411,4 +411,5 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy); void cpufreq_frequency_table_put_attr(unsigned int cpu); +const char *cpufreq_get_current_driver(void); #endif /* _LINUX_CPUFREQ_H */ From 4827ea6ec9ca1e873a6d387a3ee287f78ea5ee83 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 20 Jan 2013 10:24:29 +0000 Subject: [PATCH 14/55] powernow-k8: Cleanup module request Check whether we've actually already loaded acpi-cpufreq before requesting it. Signed-off-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernow-k8.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 056faf6af1a9..0234d0c6d68c 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -1249,6 +1249,24 @@ static struct cpufreq_driver cpufreq_amd64_driver = { .attr = powernow_k8_attr, }; +static void __request_acpi_cpufreq(void) +{ + const char *cur_drv, *drv = "acpi-cpufreq"; + + cur_drv = cpufreq_get_current_driver(); + if (!cur_drv) + goto request; + + if (strncmp(cur_drv, drv, min_t(size_t, strlen(cur_drv), strlen(drv)))) + pr_warn(PFX "WTF driver: %s\n", cur_drv); + + return; + + request: + pr_warn(PFX "This CPU is not supported anymore, using acpi-cpufreq instead.\n"); + request_module(drv); +} + /* driver entry point for init */ static int __cpuinit powernowk8_init(void) { @@ -1256,8 +1274,7 @@ static int __cpuinit powernowk8_init(void) int rv; if (static_cpu_has(X86_FEATURE_HW_PSTATE)) { - pr_warn(PFX "this CPU is not supported anymore, using acpi-cpufreq instead.\n"); - request_module("acpi-cpufreq"); + __request_acpi_cpufreq(); return -ENODEV; } From c0939e46a84c6af89d6f093a34c1c9341dfe1d6e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 20 Jan 2013 10:24:30 +0000 Subject: [PATCH 15/55] powernow-k8: Cleanup init function Make it hotplug-safe and cleanup formatting. Signed-off-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernow-k8.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 0234d0c6d68c..d13a13678b5f 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -1271,7 +1271,7 @@ static void __request_acpi_cpufreq(void) static int __cpuinit powernowk8_init(void) { unsigned int i, supported_cpus = 0; - int rv; + int ret; if (static_cpu_has(X86_FEATURE_HW_PSTATE)) { __request_acpi_cpufreq(); @@ -1281,24 +1281,27 @@ static int __cpuinit powernowk8_init(void) if (!x86_match_cpu(powernow_k8_ids)) return -ENODEV; + get_online_cpus(); for_each_online_cpu(i) { - int rc; - smp_call_function_single(i, check_supported_cpu, &rc, 1); - if (rc == 0) + smp_call_function_single(i, check_supported_cpu, &ret, 1); + if (!ret) supported_cpus++; } - if (supported_cpus != num_online_cpus()) + if (supported_cpus != num_online_cpus()) { + put_online_cpus(); return -ENODEV; + } + put_online_cpus(); - rv = cpufreq_register_driver(&cpufreq_amd64_driver); + ret = cpufreq_register_driver(&cpufreq_amd64_driver); + if (ret) + return ret; - if (!rv) - pr_info(PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", - num_online_nodes(), boot_cpu_data.x86_model_id, - supported_cpus); + pr_info(PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", + num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); - return rv; + return ret; } /* driver entry point for term */ From ab45bd9bed36ad6c471b090bb8846ab7228fdf11 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 28 Jan 2013 14:50:39 +0100 Subject: [PATCH 16/55] cpufreq: Sort function prototypes properly Move function prototypes to a place where they logically fit better. Signed-off-by: Borislav Petkov Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1f3a726640e8..5fdc6c6e3f8a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -311,6 +311,9 @@ __ATTR(_name, 0444, show_##_name, NULL) static struct global_attr _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) +struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu); +void cpufreq_cpu_put(struct cpufreq_policy *data); +const char *cpufreq_get_current_driver(void); /********************************************************************* * CPUFREQ 2.6. INTERFACE * @@ -400,8 +403,6 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, /* the following 3 funtions are for cpufreq core use only */ struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu); -struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu); -void cpufreq_cpu_put(struct cpufreq_policy *data); /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; @@ -411,5 +412,4 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy); void cpufreq_frequency_table_put_attr(unsigned int cpu); -const char *cpufreq_get_current_driver(void); #endif /* _LINUX_CPUFREQ_H */ From bd603455f366bd66a5e1870bc285c05c9cb6a72d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 28 Jan 2013 16:13:12 +0000 Subject: [PATCH 17/55] ARM: use device tree to get smp_twd clock Move clk setup to twd_local_timer_common_register and rely on twd_timer_rate being 0 to force calibration if there is no clock. Remove common_setup_called as it is no longer needed. Signed-off-by: Rob Herring Signed-off-by: Mark Langsdorf Acked-by: Russell King Signed-off-by: Rafael J. Wysocki --- arch/arm/kernel/smp_twd.c | 53 ++++++++++++++------------------------- 1 file changed, 19 insertions(+), 34 deletions(-) diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 49f335d301ba..ae0c7bb39ae8 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -31,7 +31,6 @@ static void __iomem *twd_base; static struct clk *twd_clk; static unsigned long twd_timer_rate; -static bool common_setup_called; static DEFINE_PER_CPU(bool, percpu_setup_called); static struct clock_event_device __percpu **twd_evt; @@ -239,25 +238,28 @@ static irqreturn_t twd_handler(int irq, void *dev_id) return IRQ_NONE; } -static struct clk *twd_get_clock(void) +static void twd_get_clock(struct device_node *np) { - struct clk *clk; int err; - clk = clk_get_sys("smp_twd", NULL); - if (IS_ERR(clk)) { - pr_err("smp_twd: clock not found: %d\n", (int)PTR_ERR(clk)); - return clk; + if (np) + twd_clk = of_clk_get(np, 0); + else + twd_clk = clk_get_sys("smp_twd", NULL); + + if (IS_ERR(twd_clk)) { + pr_err("smp_twd: clock not found %d\n", (int) PTR_ERR(twd_clk)); + return; } - err = clk_prepare_enable(clk); + err = clk_prepare_enable(twd_clk); if (err) { pr_err("smp_twd: clock failed to prepare+enable: %d\n", err); - clk_put(clk); - return ERR_PTR(err); + clk_put(twd_clk); + return; } - return clk; + twd_timer_rate = clk_get_rate(twd_clk); } /* @@ -280,26 +282,7 @@ static int __cpuinit twd_timer_setup(struct clock_event_device *clk) } per_cpu(percpu_setup_called, cpu) = true; - /* - * This stuff only need to be done once for the entire TWD cluster - * during the runtime of the system. - */ - if (!common_setup_called) { - twd_clk = twd_get_clock(); - - /* - * We use IS_ERR_OR_NULL() here, because if the clock stubs - * are active we will get a valid clk reference which is - * however NULL and will return the rate 0. In that case we - * need to calibrate the rate instead. - */ - if (!IS_ERR_OR_NULL(twd_clk)) - twd_timer_rate = clk_get_rate(twd_clk); - else - twd_calibrate_rate(); - - common_setup_called = true; - } + twd_calibrate_rate(); /* * The following is done once per CPU the first time .setup() is @@ -330,7 +313,7 @@ static struct local_timer_ops twd_lt_ops __cpuinitdata = { .stop = twd_timer_stop, }; -static int __init twd_local_timer_common_register(void) +static int __init twd_local_timer_common_register(struct device_node *np) { int err; @@ -350,6 +333,8 @@ static int __init twd_local_timer_common_register(void) if (err) goto out_irq; + twd_get_clock(np); + return 0; out_irq: @@ -373,7 +358,7 @@ int __init twd_local_timer_register(struct twd_local_timer *tlt) if (!twd_base) return -ENOMEM; - return twd_local_timer_common_register(); + return twd_local_timer_common_register(NULL); } #ifdef CONFIG_OF @@ -405,7 +390,7 @@ void __init twd_local_timer_of_register(void) goto out; } - err = twd_local_timer_common_register(); + err = twd_local_timer_common_register(np); out: WARN(err, "twd_local_timer_of_register failed (%d)\n", err); From b5964708532f4713e9cfb1b8b1a6ac8544fc66af Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Mon, 28 Jan 2013 16:13:13 +0000 Subject: [PATCH 18/55] clk / highbank: Prevent glitches in non-bypass reset mode The highbank clock will glitch with the current code if the clock rate is reset without relocking the PLL. Program the PLL correctly to prevent glitches. Signed-off-by: Mark Langsdorf Signed-off-by: Rob Herring Acked-by: Mike Turquette Signed-off-by: Rafael J. Wysocki --- drivers/clk/clk-highbank.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/clk-highbank.c b/drivers/clk/clk-highbank.c index 52fecadf004a..3a0b723da2bc 100644 --- a/drivers/clk/clk-highbank.c +++ b/drivers/clk/clk-highbank.c @@ -182,8 +182,10 @@ static int clk_pll_set_rate(struct clk_hw *hwclk, unsigned long rate, reg |= HB_PLL_EXT_ENA; reg &= ~HB_PLL_EXT_BYPASS; } else { + writel(reg | HB_PLL_EXT_BYPASS, hbclk->reg); reg &= ~HB_PLL_DIVQ_MASK; reg |= divq << HB_PLL_DIVQ_SHIFT; + writel(reg | HB_PLL_EXT_BYPASS, hbclk->reg); } writel(reg, hbclk->reg); From 300586778d405f0a4d1f6dc51fcfb4fed567d020 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 28 Jan 2013 16:13:14 +0000 Subject: [PATCH 19/55] ARM / highbank: add support for pl320 IPC The pl320 IPC allows for interprocessor communication between the highbank A9 and the EnergyCore Management Engine. The pl320 implements a straightforward mailbox protocol. Signed-off-by: Mark Langsdorf Signed-off-by: Rob Herring Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-highbank/Kconfig | 2 + drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/mailbox/Kconfig | 19 ++++ drivers/mailbox/Makefile | 1 + drivers/mailbox/pl320-ipc.c | 199 +++++++++++++++++++++++++++++++++ include/linux/mailbox.h | 17 +++ 7 files changed, 241 insertions(+) create mode 100644 drivers/mailbox/Kconfig create mode 100644 drivers/mailbox/Makefile create mode 100644 drivers/mailbox/pl320-ipc.c create mode 100644 include/linux/mailbox.h diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig index 551c97e87a78..2388085beaa2 100644 --- a/arch/arm/mach-highbank/Kconfig +++ b/arch/arm/mach-highbank/Kconfig @@ -11,5 +11,7 @@ config ARCH_HIGHBANK select GENERIC_CLOCKEVENTS select HAVE_ARM_SCU select HAVE_SMP + select MAILBOX + select PL320_MBOX select SPARSE_IRQ select USE_OF diff --git a/drivers/Kconfig b/drivers/Kconfig index f5fb0722a63a..2b4e89ba15ad 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -134,6 +134,8 @@ source "drivers/hwspinlock/Kconfig" source "drivers/clocksource/Kconfig" +source "drivers/mailbox/Kconfig" + source "drivers/iommu/Kconfig" source "drivers/remoteproc/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 7863b9fee50b..a8d32f1094b4 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -130,6 +130,7 @@ obj-y += platform/ #common clk code obj-y += clk/ +obj-$(CONFIG_MAILBOX) += mailbox/ obj-$(CONFIG_HWSPINLOCK) += hwspinlock/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_IOMMU_SUPPORT) += iommu/ diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig new file mode 100644 index 000000000000..9545c9f03809 --- /dev/null +++ b/drivers/mailbox/Kconfig @@ -0,0 +1,19 @@ +menuconfig MAILBOX + bool "Mailbox Hardware Support" + help + Mailbox is a framework to control hardware communication between + on-chip processors through queued messages and interrupt driven + signals. Say Y if your platform supports hardware mailboxes. + +if MAILBOX +config PL320_MBOX + bool "ARM PL320 Mailbox" + depends on ARM_AMBA + help + An implementation of the ARM PL320 Interprocessor Communication + Mailbox (IPCM), tailored for the Calxeda Highbank. It is used to + send short messages between Highbank's A9 cores and the EnergyCore + Management Engine, primarily for cpufreq. Say Y here if you want + to use the PL320 IPCM support. + +endif diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile new file mode 100644 index 000000000000..543ad6a79505 --- /dev/null +++ b/drivers/mailbox/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_PL320_MBOX) += pl320-ipc.o diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c new file mode 100644 index 000000000000..c45b3aedafba --- /dev/null +++ b/drivers/mailbox/pl320-ipc.c @@ -0,0 +1,199 @@ +/* + * Copyright 2012 Calxeda, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define IPCMxSOURCE(m) ((m) * 0x40) +#define IPCMxDSET(m) (((m) * 0x40) + 0x004) +#define IPCMxDCLEAR(m) (((m) * 0x40) + 0x008) +#define IPCMxDSTATUS(m) (((m) * 0x40) + 0x00C) +#define IPCMxMODE(m) (((m) * 0x40) + 0x010) +#define IPCMxMSET(m) (((m) * 0x40) + 0x014) +#define IPCMxMCLEAR(m) (((m) * 0x40) + 0x018) +#define IPCMxMSTATUS(m) (((m) * 0x40) + 0x01C) +#define IPCMxSEND(m) (((m) * 0x40) + 0x020) +#define IPCMxDR(m, dr) (((m) * 0x40) + ((dr) * 4) + 0x024) + +#define IPCMMIS(irq) (((irq) * 8) + 0x800) +#define IPCMRIS(irq) (((irq) * 8) + 0x804) + +#define MBOX_MASK(n) (1 << (n)) +#define IPC_TX_MBOX 1 +#define IPC_RX_MBOX 2 + +#define CHAN_MASK(n) (1 << (n)) +#define A9_SOURCE 1 +#define M3_SOURCE 0 + +static void __iomem *ipc_base; +static int ipc_irq; +static DEFINE_MUTEX(ipc_m1_lock); +static DECLARE_COMPLETION(ipc_completion); +static ATOMIC_NOTIFIER_HEAD(ipc_notifier); + +static inline void set_destination(int source, int mbox) +{ + __raw_writel(CHAN_MASK(source), ipc_base + IPCMxDSET(mbox)); + __raw_writel(CHAN_MASK(source), ipc_base + IPCMxMSET(mbox)); +} + +static inline void clear_destination(int source, int mbox) +{ + __raw_writel(CHAN_MASK(source), ipc_base + IPCMxDCLEAR(mbox)); + __raw_writel(CHAN_MASK(source), ipc_base + IPCMxMCLEAR(mbox)); +} + +static void __ipc_send(int mbox, u32 *data) +{ + int i; + for (i = 0; i < 7; i++) + __raw_writel(data[i], ipc_base + IPCMxDR(mbox, i)); + __raw_writel(0x1, ipc_base + IPCMxSEND(mbox)); +} + +static u32 __ipc_rcv(int mbox, u32 *data) +{ + int i; + for (i = 0; i < 7; i++) + data[i] = __raw_readl(ipc_base + IPCMxDR(mbox, i)); + return data[1]; +} + +/* blocking implmentation from the A9 side, not usuable in interrupts! */ +int pl320_ipc_transmit(u32 *data) +{ + int ret; + + mutex_lock(&ipc_m1_lock); + + init_completion(&ipc_completion); + __ipc_send(IPC_TX_MBOX, data); + ret = wait_for_completion_timeout(&ipc_completion, + msecs_to_jiffies(1000)); + if (ret == 0) { + ret = -ETIMEDOUT; + goto out; + } + + ret = __ipc_rcv(IPC_TX_MBOX, data); +out: + mutex_unlock(&ipc_m1_lock); + return ret; +} +EXPORT_SYMBOL_GPL(pl320_ipc_transmit); + +static irqreturn_t ipc_handler(int irq, void *dev) +{ + u32 irq_stat; + u32 data[7]; + + irq_stat = __raw_readl(ipc_base + IPCMMIS(1)); + if (irq_stat & MBOX_MASK(IPC_TX_MBOX)) { + __raw_writel(0, ipc_base + IPCMxSEND(IPC_TX_MBOX)); + complete(&ipc_completion); + } + if (irq_stat & MBOX_MASK(IPC_RX_MBOX)) { + __ipc_rcv(IPC_RX_MBOX, data); + atomic_notifier_call_chain(&ipc_notifier, data[0], data + 1); + __raw_writel(2, ipc_base + IPCMxSEND(IPC_RX_MBOX)); + } + + return IRQ_HANDLED; +} + +int pl320_ipc_register_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&ipc_notifier, nb); +} +EXPORT_SYMBOL_GPL(pl320_ipc_register_notifier); + +int pl320_ipc_unregister_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&ipc_notifier, nb); +} +EXPORT_SYMBOL_GPL(pl320_ipc_unregister_notifier); + +static int __init pl320_probe(struct amba_device *adev, + const struct amba_id *id) +{ + int ret; + + ipc_base = ioremap(adev->res.start, resource_size(&adev->res)); + if (ipc_base == NULL) + return -ENOMEM; + + __raw_writel(0, ipc_base + IPCMxSEND(IPC_TX_MBOX)); + + ipc_irq = adev->irq[0]; + ret = request_irq(ipc_irq, ipc_handler, 0, dev_name(&adev->dev), NULL); + if (ret < 0) + goto err; + + /* Init slow mailbox */ + __raw_writel(CHAN_MASK(A9_SOURCE), + ipc_base + IPCMxSOURCE(IPC_TX_MBOX)); + __raw_writel(CHAN_MASK(M3_SOURCE), + ipc_base + IPCMxDSET(IPC_TX_MBOX)); + __raw_writel(CHAN_MASK(M3_SOURCE) | CHAN_MASK(A9_SOURCE), + ipc_base + IPCMxMSET(IPC_TX_MBOX)); + + /* Init receive mailbox */ + __raw_writel(CHAN_MASK(M3_SOURCE), + ipc_base + IPCMxSOURCE(IPC_RX_MBOX)); + __raw_writel(CHAN_MASK(A9_SOURCE), + ipc_base + IPCMxDSET(IPC_RX_MBOX)); + __raw_writel(CHAN_MASK(M3_SOURCE) | CHAN_MASK(A9_SOURCE), + ipc_base + IPCMxMSET(IPC_RX_MBOX)); + + return 0; +err: + iounmap(ipc_base); + return ret; +} + +static struct amba_id pl320_ids[] = { + { + .id = 0x00041320, + .mask = 0x000fffff, + }, + { 0, 0 }, +}; + +static struct amba_driver pl320_driver = { + .drv = { + .name = "pl320", + }, + .id_table = pl320_ids, + .probe = pl320_probe, +}; + +static int __init ipc_init(void) +{ + return amba_driver_register(&pl320_driver); +} +module_init(ipc_init); diff --git a/include/linux/mailbox.h b/include/linux/mailbox.h new file mode 100644 index 000000000000..5161f63ec1c8 --- /dev/null +++ b/include/linux/mailbox.h @@ -0,0 +1,17 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +int pl320_ipc_transmit(u32 *data); +int pl320_ipc_register_notifier(struct notifier_block *nb); +int pl320_ipc_unregister_notifier(struct notifier_block *nb); From 6754f556103be5bd172263b1075ddbb7157afbad Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Mon, 28 Jan 2013 16:13:15 +0000 Subject: [PATCH 20/55] cpufreq / highbank: add support for highbank cpufreq Highbank processors depend on the external ECME to perform voltage management based on a requested frequency. Communication between the A9 cores and the ECME happens over the pl320 IPC channel. Signed-off-by: Mark Langsdorf Reviewed-by: Shawn Guo Reviewed-by: Mike Turquette Signed-off-by: Rafael J. Wysocki --- arch/arm/boot/dts/highbank.dts | 10 +++ arch/arm/mach-highbank/Kconfig | 2 + drivers/cpufreq/Kconfig.arm | 15 ++++ drivers/cpufreq/Makefile | 3 +- drivers/cpufreq/cpufreq-cpu0.c | 6 +- drivers/cpufreq/highbank-cpufreq.c | 115 +++++++++++++++++++++++++++++ 6 files changed, 149 insertions(+), 2 deletions(-) create mode 100644 drivers/cpufreq/highbank-cpufreq.c diff --git a/arch/arm/boot/dts/highbank.dts b/arch/arm/boot/dts/highbank.dts index 5927a8df5625..6aad34ad9517 100644 --- a/arch/arm/boot/dts/highbank.dts +++ b/arch/arm/boot/dts/highbank.dts @@ -37,6 +37,16 @@ next-level-cache = <&L2>; clocks = <&a9pll>; clock-names = "cpu"; + operating-points = < + /* kHz ignored */ + 1300000 1000000 + 1200000 1000000 + 1100000 1000000 + 800000 1000000 + 400000 1000000 + 200000 1000000 + >; + clock-latency = <100000>; }; cpu@901 { diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig index 2388085beaa2..44b12f9c1584 100644 --- a/arch/arm/mach-highbank/Kconfig +++ b/arch/arm/mach-highbank/Kconfig @@ -1,5 +1,7 @@ config ARCH_HIGHBANK bool "Calxeda ECX-1000/2000 (Highbank/Midway)" if ARCH_MULTI_V7 + select ARCH_HAS_CPUFREQ + select ARCH_HAS_OPP select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_AMBA select ARM_GIC diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index a0b3661d90b0..ffe55b8a98cc 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -83,3 +83,18 @@ config ARM_SPEAR_CPUFREQ default y help This adds the CPUFreq driver support for SPEAr SOCs. + +config ARM_HIGHBANK_CPUFREQ + tristate "Calxeda Highbank-based" + depends on ARCH_HIGHBANK + select CPU_FREQ_TABLE + select GENERIC_CPUFREQ_CPU0 + select PM_OPP + select REGULATOR + + default m + help + This adds the CPUFreq driver for Calxeda Highbank SoC + based boards. + + If in doubt, say N. diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 24d4a63fd6cf..e2a5da77370f 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -51,8 +51,9 @@ obj-$(CONFIG_ARM_EXYNOS_CPUFREQ) += exynos-cpufreq.o obj-$(CONFIG_ARM_EXYNOS4210_CPUFREQ) += exynos4210-cpufreq.o obj-$(CONFIG_ARM_EXYNOS4X12_CPUFREQ) += exynos4x12-cpufreq.o obj-$(CONFIG_ARM_EXYNOS5250_CPUFREQ) += exynos5250-cpufreq.o -obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o +obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o +obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o ################################################################################## # PowerPC platform drivers diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c index debc5a7c8db6..38ae178ce4ef 100644 --- a/drivers/cpufreq/cpufreq-cpu0.c +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -182,7 +182,11 @@ static int cpu0_cpufreq_driver_init(void) struct device_node *np; int ret; - np = of_find_node_by_path("/cpus/cpu@0"); + for_each_child_of_node(of_find_node_by_path("/cpus"), np) { + if (of_get_property(np, "operating-points", NULL)) + break; + } + if (!np) { pr_err("failed to find cpu0 node\n"); return -ENOENT; diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c new file mode 100644 index 000000000000..53f25e5aa6b7 --- /dev/null +++ b/drivers/cpufreq/highbank-cpufreq.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2012 Calxeda, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This driver provides the clk notifier callbacks that are used when + * the cpufreq-cpu0 driver changes to frequency to alert the highbank + * EnergyCore Management Engine (ECME) about the need to change + * voltage. The ECME interfaces with the actual voltage regulators. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#define HB_CPUFREQ_CHANGE_NOTE 0x80000001 +#define HB_CPUFREQ_IPC_LEN 7 +#define HB_CPUFREQ_VOLT_RETRIES 15 + +static int hb_voltage_change(unsigned int freq) +{ + int i; + u32 msg[HB_CPUFREQ_IPC_LEN]; + + msg[0] = HB_CPUFREQ_CHANGE_NOTE; + msg[1] = freq / 1000000; + for (i = 2; i < HB_CPUFREQ_IPC_LEN; i++) + msg[i] = 0; + + return pl320_ipc_transmit(msg); +} + +static int hb_cpufreq_clk_notify(struct notifier_block *nb, + unsigned long action, void *hclk) +{ + struct clk_notifier_data *clk_data = hclk; + int i = 0; + + if (action == PRE_RATE_CHANGE) { + if (clk_data->new_rate > clk_data->old_rate) + while (hb_voltage_change(clk_data->new_rate)) + if (i++ > HB_CPUFREQ_VOLT_RETRIES) + return NOTIFY_BAD; + } else if (action == POST_RATE_CHANGE) { + if (clk_data->new_rate < clk_data->old_rate) + while (hb_voltage_change(clk_data->new_rate)) + if (i++ > HB_CPUFREQ_VOLT_RETRIES) + return NOTIFY_BAD; + } + + return NOTIFY_DONE; +} + +static struct notifier_block hb_cpufreq_clk_nb = { + .notifier_call = hb_cpufreq_clk_notify, +}; + +static int hb_cpufreq_driver_init(void) +{ + struct device *cpu_dev; + struct clk *cpu_clk; + struct device_node *np; + int ret; + + if (!of_machine_is_compatible("calxeda,highbank")) + return -ENODEV; + + for_each_child_of_node(of_find_node_by_path("/cpus"), np) + if (of_get_property(np, "operating-points", NULL)) + break; + + if (!np) { + pr_err("failed to find highbank cpufreq node\n"); + return -ENOENT; + } + + cpu_dev = get_cpu_device(0); + if (!cpu_dev) { + pr_err("failed to get highbank cpufreq device\n"); + ret = -ENODEV; + goto out_put_node; + } + + cpu_dev->of_node = np; + + cpu_clk = clk_get(cpu_dev, NULL); + if (IS_ERR(cpu_clk)) { + ret = PTR_ERR(cpu_clk); + pr_err("failed to get cpu0 clock: %d\n", ret); + goto out_put_node; + } + + ret = clk_notifier_register(cpu_clk, &hb_cpufreq_clk_nb); + if (ret) { + pr_err("failed to register clk notifier: %d\n", ret); + goto out_put_node; + } + +out_put_node: + of_node_put(np); + return ret; +} +module_init(hb_cpufreq_driver_init); + +MODULE_AUTHOR("Mark Langsdorf "); +MODULE_DESCRIPTION("Calxeda Highbank cpufreq driver"); +MODULE_LICENSE("GPL"); From b2ccd7632939b8c8d53ea1c4aa21ebf8d0add7cf Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 28 Jan 2013 18:26:15 +0000 Subject: [PATCH 21/55] PM / OPP: switch exported symbols to GPL variant We are GPLV2 library, so be clear in the symbols exported as well. Signed-off-by: Nishanth Menon Acked-by: Shawn Guo Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 50b2831e027d..0dbe270b85f8 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -162,7 +162,7 @@ unsigned long opp_get_voltage(struct opp *opp) return v; } -EXPORT_SYMBOL(opp_get_voltage); +EXPORT_SYMBOL_GPL(opp_get_voltage); /** * opp_get_freq() - Gets the frequency corresponding to an available opp @@ -192,7 +192,7 @@ unsigned long opp_get_freq(struct opp *opp) return f; } -EXPORT_SYMBOL(opp_get_freq); +EXPORT_SYMBOL_GPL(opp_get_freq); /** * opp_get_opp_count() - Get number of opps available in the opp list @@ -225,7 +225,7 @@ int opp_get_opp_count(struct device *dev) return count; } -EXPORT_SYMBOL(opp_get_opp_count); +EXPORT_SYMBOL_GPL(opp_get_opp_count); /** * opp_find_freq_exact() - search for an exact frequency @@ -276,7 +276,7 @@ struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq, return opp; } -EXPORT_SYMBOL(opp_find_freq_exact); +EXPORT_SYMBOL_GPL(opp_find_freq_exact); /** * opp_find_freq_ceil() - Search for an rounded ceil freq @@ -323,7 +323,7 @@ struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq) return opp; } -EXPORT_SYMBOL(opp_find_freq_ceil); +EXPORT_SYMBOL_GPL(opp_find_freq_ceil); /** * opp_find_freq_floor() - Search for a rounded floor freq @@ -374,7 +374,7 @@ struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq) return opp; } -EXPORT_SYMBOL(opp_find_freq_floor); +EXPORT_SYMBOL_GPL(opp_find_freq_floor); /** * opp_add() - Add an OPP table from a table definitions @@ -568,7 +568,7 @@ int opp_enable(struct device *dev, unsigned long freq) { return opp_set_availability(dev, freq, true); } -EXPORT_SYMBOL(opp_enable); +EXPORT_SYMBOL_GPL(opp_enable); /** * opp_disable() - Disable a specific OPP @@ -590,7 +590,7 @@ int opp_disable(struct device *dev, unsigned long freq) { return opp_set_availability(dev, freq, false); } -EXPORT_SYMBOL(opp_disable); +EXPORT_SYMBOL_GPL(opp_disable); #ifdef CONFIG_CPU_FREQ /** From 74c46c6eaf9724edbfc12cc67e62773b708eb2ed Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Mon, 28 Jan 2013 18:26:16 +0000 Subject: [PATCH 22/55] PM / OPP: Export more symbols for module usage Export cpufreq helpers in OPP to make the cpufreq-core0 and highbank-cpufreq drivers loadable as modules. Signed-off-by: Mark Langsdorf Signed-off-by: Nishanth Menon Acked-by: Shawn Guo Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 0dbe270b85f8..32ee0fc7ea54 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -661,6 +661,7 @@ int opp_init_cpufreq_table(struct device *dev, return 0; } +EXPORT_SYMBOL_GPL(opp_init_cpufreq_table); /** * opp_free_cpufreq_table() - free the cpufreq table @@ -678,6 +679,7 @@ void opp_free_cpufreq_table(struct device *dev, kfree(*table); *table = NULL; } +EXPORT_SYMBOL_GPL(opp_free_cpufreq_table); #endif /* CONFIG_CPU_FREQ */ /** @@ -738,4 +740,5 @@ int of_init_opp_table(struct device *dev) return 0; } +EXPORT_SYMBOL_GPL(of_init_opp_table); #endif From b26f72042e433642787e51fb3f40dbdd9969f6e1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 29 Jan 2013 04:40:00 +0000 Subject: [PATCH 23/55] cpufreq: Revert "cpufreq: Don't use cpu removed during cpufreq_driver_unregister" This reverts commit 956f339 "cpufreq: Don't use cpu removed during cpufreq_driver_unregister". With the addition of the following commit, this change/variable is not required any more: commit b9ba2725343ae57add3f324dfa5074167f48de96 Author: Viresh Kumar Date: Mon Jan 14 13:23:03 2013 +0000 cpufreq: Simplify __cpufreq_remove_dev() [rjw: Subject and changelog] Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 216c104d51ad..d474421d219b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -47,9 +47,6 @@ static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor); #endif static DEFINE_SPINLOCK(cpufreq_driver_lock); -/* Used when we unregister cpufreq driver */ -static struct cpumask cpufreq_online_mask; - /* * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure * all cpufreq/hotplug/workqueue/etc related lock issues. @@ -993,7 +990,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) * managing offline cpus here. */ cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); - cpumask_and(policy->cpus, policy->cpus, &cpufreq_online_mask); policy->user_policy.min = policy->min; policy->user_policy.max = policy->max; @@ -1176,7 +1172,6 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) if (unlikely(lock_policy_rwsem_write(cpu))) BUG(); - cpumask_clear_cpu(cpu, &cpufreq_online_mask); retval = __cpufreq_remove_dev(dev, sif); return retval; } @@ -1918,8 +1913,6 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) cpufreq_driver = driver_data; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - cpumask_setall(&cpufreq_online_mask); - ret = subsys_interface_register(&cpufreq_interface); if (ret) goto err_null_driver; From fcf8058296edbc3de43adf095824fc32b067b9f8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 29 Jan 2013 14:39:08 +0000 Subject: [PATCH 24/55] cpufreq: Simplify cpufreq_add_dev() Currently cpufreq_add_dev() firsts allocates policy, calls driver->init() and then checks if this CPU is already managed or not. And if it is already managed, its policy is freed. We can save all this if we somehow know that CPU is managed or not in advance. policy->related_cpus contains the list of all valid sibling CPUs of policy->cpu. We can check this to see if the current CPU is already managed. From now on, platforms don't really need to set related_cpus from their init() routines, as the same work is done by core too. If a platform driver needs to set the related_cpus mask with some additional CPUs, other than CPUs present in policy->cpus, they are free to do it, though, as we don't override anything. [rjw: Changelog] Signed-off-by: Viresh Kumar Tested-by: Shawn Guo Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 158 ++++++++++++-------------------- drivers/cpufreq/spear-cpufreq.c | 1 - 2 files changed, 58 insertions(+), 101 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d474421d219b..1cea7a1eac13 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -552,8 +552,6 @@ static ssize_t show_cpus(const struct cpumask *mask, char *buf) */ static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf) { - if (cpumask_empty(policy->related_cpus)) - return show_cpus(policy->cpus, buf); return show_cpus(policy->related_cpus, buf); } @@ -709,92 +707,6 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; -/* - * Returns: - * Negative: Failure - * 0: Success - * Positive: When we have a managed CPU and the sysfs got symlinked - */ -static int cpufreq_add_dev_policy(unsigned int cpu, - struct cpufreq_policy *policy, - struct device *dev) -{ - int ret = 0; -#ifdef CONFIG_SMP - unsigned long flags; - unsigned int j; -#ifdef CONFIG_HOTPLUG_CPU - struct cpufreq_governor *gov; - - gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu)); - if (gov) { - policy->governor = gov; - pr_debug("Restoring governor %s for cpu %d\n", - policy->governor->name, cpu); - } -#endif - - for_each_cpu(j, policy->cpus) { - struct cpufreq_policy *managed_policy; - - if (cpu == j) - continue; - - /* Check for existing affected CPUs. - * They may not be aware of it due to CPU Hotplug. - * cpufreq_cpu_put is called when the device is removed - * in __cpufreq_remove_dev() - */ - managed_policy = cpufreq_cpu_get(j); - if (unlikely(managed_policy)) { - - /* Set proper policy_cpu */ - unlock_policy_rwsem_write(cpu); - per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu; - - if (lock_policy_rwsem_write(cpu) < 0) { - /* Should not go through policy unlock path */ - if (cpufreq_driver->exit) - cpufreq_driver->exit(policy); - cpufreq_cpu_put(managed_policy); - return -EBUSY; - } - - __cpufreq_governor(managed_policy, CPUFREQ_GOV_STOP); - - spin_lock_irqsave(&cpufreq_driver_lock, flags); - cpumask_copy(managed_policy->cpus, policy->cpus); - per_cpu(cpufreq_cpu_data, cpu) = managed_policy; - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - - __cpufreq_governor(managed_policy, CPUFREQ_GOV_START); - __cpufreq_governor(managed_policy, CPUFREQ_GOV_LIMITS); - - pr_debug("CPU already managed, adding link\n"); - ret = sysfs_create_link(&dev->kobj, - &managed_policy->kobj, - "cpufreq"); - if (ret) - cpufreq_cpu_put(managed_policy); - /* - * Success. We only needed to be added to the mask. - * Call driver->exit() because only the cpu parent of - * the kobj needed to call init(). - */ - if (cpufreq_driver->exit) - cpufreq_driver->exit(policy); - - if (!ret) - return 1; - else - return ret; - } - } -#endif - return ret; -} - - /* symlink affected CPUs */ static int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy) @@ -899,6 +811,42 @@ err_out_kobj_put: return ret; } +#ifdef CONFIG_HOTPLUG_CPU +static int cpufreq_add_policy_cpu(unsigned int cpu, unsigned int sibling, + struct device *dev) +{ + struct cpufreq_policy *policy; + int ret = 0; + unsigned long flags; + + policy = cpufreq_cpu_get(sibling); + WARN_ON(!policy); + + per_cpu(cpufreq_policy_cpu, cpu) = policy->cpu; + + lock_policy_rwsem_write(cpu); + + __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + cpumask_set_cpu(cpu, policy->cpus); + per_cpu(cpufreq_cpu_data, cpu) = policy; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + __cpufreq_governor(policy, CPUFREQ_GOV_START); + __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + + unlock_policy_rwsem_write(cpu); + + ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"); + if (ret) { + cpufreq_cpu_put(policy); + return ret; + } + + return 0; +} +#endif /** * cpufreq_add_dev - add a CPU device @@ -911,12 +859,12 @@ err_out_kobj_put: */ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) { - unsigned int cpu = dev->id; - int ret = 0, found = 0; + unsigned int j, cpu = dev->id; + int ret = -ENOMEM, found = 0; struct cpufreq_policy *policy; unsigned long flags; - unsigned int j; #ifdef CONFIG_HOTPLUG_CPU + struct cpufreq_governor *gov; int sibling; #endif @@ -933,6 +881,15 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) cpufreq_cpu_put(policy); return 0; } + +#ifdef CONFIG_HOTPLUG_CPU + /* Check if this cpu was hot-unplugged earlier and has siblings */ + for_each_online_cpu(sibling) { + struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling); + if (cp && cpumask_test_cpu(cpu, cp->related_cpus)) + return cpufreq_add_policy_cpu(cpu, sibling, dev); + } +#endif #endif if (!try_module_get(cpufreq_driver->owner)) { @@ -940,7 +897,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) goto module_out; } - ret = -ENOMEM; policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL); if (!policy) goto nomem_out; @@ -985,6 +941,9 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) goto err_unlock_policy; } + /* related cpus should atleast have policy->cpus */ + cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); + /* * affected cpus must always be the one, which are online. We aren't * managing offline cpus here. @@ -997,14 +956,14 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_START, policy); - ret = cpufreq_add_dev_policy(cpu, policy, dev); - if (ret) { - if (ret > 0) - /* This is a managed cpu, symlink created, - exit with 0 */ - ret = 0; - goto err_unlock_policy; +#ifdef CONFIG_HOTPLUG_CPU + gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu)); + if (gov) { + policy->governor = gov; + pr_debug("Restoring governor %s for cpu %d\n", + policy->governor->name, cpu); } +#endif ret = cpufreq_add_dev_interface(cpu, policy, dev); if (ret) @@ -1018,7 +977,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return 0; - err_out_unregister: spin_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c index 8ff26af622ea..fc714a65fa15 100644 --- a/drivers/cpufreq/spear-cpufreq.c +++ b/drivers/cpufreq/spear-cpufreq.c @@ -189,7 +189,6 @@ static int spear_cpufreq_init(struct cpufreq_policy *policy) policy->cur = spear_cpufreq_get(0); cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu)); - cpumask_copy(policy->related_cpus, policy->cpus); return 0; } From 58ddcead4f163a01cef96aa5ba88f374011d8aea Mon Sep 17 00:00:00 2001 From: Fabio Baltieri Date: Wed, 30 Jan 2013 13:53:37 +0000 Subject: [PATCH 25/55] cpufreq: governors: clean timer init and exit code Drop unused arguments from dbs_timer_init and clean dbs_timer_exit and cpufreq_governor_dbs to remove non necessary special cases. Reported-by: Viresh Kumar Signed-off-by: Fabio Baltieri Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 43 +++++++++--------------------- 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index ee8b7cac11f3..46f96a4cebf9 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -169,19 +169,19 @@ bool dbs_sw_coordinated_cpus(struct cpu_dbs_common_info *cdbs) } EXPORT_SYMBOL_GPL(dbs_sw_coordinated_cpus); -static inline void dbs_timer_init(struct dbs_data *dbs_data, - struct cpu_dbs_common_info *cdbs, - unsigned int sampling_rate, - int cpu) +static inline void dbs_timer_init(struct dbs_data *dbs_data, int cpu, + unsigned int sampling_rate) { int delay = delay_for_sampling_rate(sampling_rate); - struct cpu_dbs_common_info *cdbs_local = dbs_data->get_cpu_cdbs(cpu); + struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); - schedule_delayed_work_on(cpu, &cdbs_local->work, delay); + schedule_delayed_work_on(cpu, &cdbs->work, delay); } -static inline void dbs_timer_exit(struct cpu_dbs_common_info *cdbs) +static inline void dbs_timer_exit(struct dbs_data *dbs_data, int cpu) { + struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); + cancel_delayed_work_sync(&cdbs->work); } @@ -289,36 +289,19 @@ second_time: } mutex_unlock(&dbs_data->mutex); - if (dbs_sw_coordinated_cpus(cpu_cdbs)) { - /* Initiate timer time stamp */ - cpu_cdbs->time_stamp = ktime_get(); + /* Initiate timer time stamp */ + cpu_cdbs->time_stamp = ktime_get(); - for_each_cpu(j, policy->cpus) { - struct cpu_dbs_common_info *j_cdbs; - - j_cdbs = dbs_data->get_cpu_cdbs(j); - dbs_timer_init(dbs_data, j_cdbs, - *sampling_rate, j); - } - } else { - dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate, cpu); - } + for_each_cpu(j, policy->cpus) + dbs_timer_init(dbs_data, j, *sampling_rate); break; case CPUFREQ_GOV_STOP: if (dbs_data->governor == GOV_CONSERVATIVE) cs_dbs_info->enable = 0; - if (dbs_sw_coordinated_cpus(cpu_cdbs)) { - for_each_cpu(j, policy->cpus) { - struct cpu_dbs_common_info *j_cdbs; - - j_cdbs = dbs_data->get_cpu_cdbs(j); - dbs_timer_exit(j_cdbs); - } - } else { - dbs_timer_exit(cpu_cdbs); - } + for_each_cpu(j, policy->cpus) + dbs_timer_exit(dbs_data, j); mutex_lock(&dbs_data->mutex); mutex_destroy(&cpu_cdbs->timer_mutex); From 5553f9e26f6f49a93ba732fd222eac6973a4cf35 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 30 Jan 2013 14:27:49 +0000 Subject: [PATCH 26/55] cpufreq: instantiate cpufreq-cpu0 as a platform_driver As multiplatform build is being adopted by more and more ARM platforms, initcall function should be used very carefully. For example, when GENERIC_CPUFREQ_CPU0 is built in the kernel, cpu0_cpufreq_driver_init() will be called on all the platforms to initialize cpufreq-cpu0 driver. To eliminate this undesired the effect, the patch changes cpufreq-cpu0 driver to have it instantiated as a platform_driver. Then it will only run on platforms that create the platform_device "cpufreq-cpu0". Along with the change, it also changes cpu_dev to be &pdev->dev, so that managed functions can start working, and module build gets supported too. The highbank-cpufreq driver is also updated accordingly to adapt the changes on cpufreq-cpu0. Signed-off-by: Shawn Guo Reviewed-by: Viresh Kumar Acked-by: Mark Langsdorf Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig | 2 +- drivers/cpufreq/cpufreq-cpu0.c | 35 ++++++++++++++++++++---------- drivers/cpufreq/highbank-cpufreq.c | 5 +++++ 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index e0a899f25e37..cbcb21e32771 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -185,7 +185,7 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. config GENERIC_CPUFREQ_CPU0 - bool "Generic CPU0 cpufreq driver" + tristate "Generic CPU0 cpufreq driver" depends on HAVE_CLK && REGULATOR && PM_OPP && OF select CPU_FREQ_TABLE help diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c index 38ae178ce4ef..a108f66271ee 100644 --- a/drivers/cpufreq/cpufreq-cpu0.c +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -12,12 +12,12 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include #include #include #include +#include #include #include @@ -177,7 +177,7 @@ static struct cpufreq_driver cpu0_cpufreq_driver = { .attr = cpu0_cpufreq_attr, }; -static int cpu0_cpufreq_driver_init(void) +static int cpu0_cpufreq_probe(struct platform_device *pdev) { struct device_node *np; int ret; @@ -192,23 +192,17 @@ static int cpu0_cpufreq_driver_init(void) return -ENOENT; } - cpu_dev = get_cpu_device(0); - if (!cpu_dev) { - pr_err("failed to get cpu0 device\n"); - ret = -ENODEV; - goto out_put_node; - } - + cpu_dev = &pdev->dev; cpu_dev->of_node = np; - cpu_clk = clk_get(cpu_dev, NULL); + cpu_clk = devm_clk_get(cpu_dev, NULL); if (IS_ERR(cpu_clk)) { ret = PTR_ERR(cpu_clk); pr_err("failed to get cpu0 clock: %d\n", ret); goto out_put_node; } - cpu_reg = regulator_get(cpu_dev, "cpu0"); + cpu_reg = devm_regulator_get(cpu_dev, "cpu0"); if (IS_ERR(cpu_reg)) { pr_warn("failed to get cpu0 regulator\n"); cpu_reg = NULL; @@ -271,7 +265,24 @@ out_put_node: of_node_put(np); return ret; } -late_initcall(cpu0_cpufreq_driver_init); + +static int cpu0_cpufreq_remove(struct platform_device *pdev) +{ + cpufreq_unregister_driver(&cpu0_cpufreq_driver); + opp_free_cpufreq_table(cpu_dev, &freq_table); + + return 0; +} + +static struct platform_driver cpu0_cpufreq_platdrv = { + .driver = { + .name = "cpufreq-cpu0", + .owner = THIS_MODULE, + }, + .probe = cpu0_cpufreq_probe, + .remove = cpu0_cpufreq_remove, +}; +module_platform_driver(cpu0_cpufreq_platdrv); MODULE_AUTHOR("Shawn Guo "); MODULE_DESCRIPTION("Generic CPU0 cpufreq driver"); diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c index 53f25e5aa6b7..66e3a71b81a3 100644 --- a/drivers/cpufreq/highbank-cpufreq.c +++ b/drivers/cpufreq/highbank-cpufreq.c @@ -20,6 +20,7 @@ #include #include #include +#include #define HB_CPUFREQ_CHANGE_NOTE 0x80000001 #define HB_CPUFREQ_IPC_LEN 7 @@ -65,6 +66,7 @@ static struct notifier_block hb_cpufreq_clk_nb = { static int hb_cpufreq_driver_init(void) { + struct platform_device_info devinfo = { .name = "cpufreq-cpu0", }; struct device *cpu_dev; struct clk *cpu_clk; struct device_node *np; @@ -104,6 +106,9 @@ static int hb_cpufreq_driver_init(void) goto out_put_node; } + /* Instantiate cpufreq-cpu0 */ + platform_device_register_full(&devinfo); + out_put_node: of_node_put(np); return ret; From 951fc5f45836988c7df1d05c7f4658f331e7a920 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 31 Jan 2013 02:03:53 +0000 Subject: [PATCH 27/55] cpufreq: Update Documentation for cpus and related_cpus Documentation related to cpus and related_cpus is confusing and not very clear. Over that CPUFreq core has seen much changes recently. Lets update documentation and comments for cpus and related_cpus. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/cpu-drivers.txt | 6 ++++++ Documentation/cpu-freq/user-guide.txt | 8 ++++---- include/linux/cpufreq.h | 6 ++++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt index c436096351f8..72f70b16d299 100644 --- a/Documentation/cpu-freq/cpu-drivers.txt +++ b/Documentation/cpu-freq/cpu-drivers.txt @@ -111,6 +111,12 @@ policy->governor must contain the "default policy" for For setting some of these values, the frequency table helpers might be helpful. See the section 2 for more information on them. +SMP systems normally have same clock source for a group of cpus. For these the +.init() would be called only once for the first online cpu. Here the .init() +routine must initialize policy->cpus with mask of all possible cpus (Online + +Offline) that share the clock. Then the core would copy this mask onto +policy->related_cpus and will reset policy->cpus to carry only online cpus. + 1.3 verify ------------ diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index 04f6b32993e6..ff2f28332cc4 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -190,11 +190,11 @@ scaling_max_freq show the current "policy limits" (in first set scaling_max_freq, then scaling_min_freq. -affected_cpus : List of CPUs that require software coordination - of frequency. +affected_cpus : List of Online CPUs that require software + coordination of frequency. -related_cpus : List of CPUs that need some sort of frequency - coordination, whether software or hardware. +related_cpus : List of Online + Offline CPUs that need software + coordination of frequency. scaling_driver : Hardware driver for cpufreq. diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 5fdc6c6e3f8a..753b198750cf 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -89,8 +89,10 @@ struct cpufreq_real_policy { }; struct cpufreq_policy { - cpumask_var_t cpus; /* CPUs requiring sw coordination */ - cpumask_var_t related_cpus; /* CPUs with any coordination */ + /* CPUs sharing clock, require sw coordination */ + cpumask_var_t cpus; /* Online CPUs only */ + cpumask_var_t related_cpus; /* Online + Offline CPUs */ + unsigned int shared_type; /* ANY or ALL affected CPUs should set cpufreq */ unsigned int cpu; /* cpu nr of CPU managing this policy */ From 6f35a65fbb570086428596d907df6300abffd948 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 31 Jan 2013 04:53:56 +0000 Subject: [PATCH 28/55] cpufreq: SPEAr: Notify all policy->cpus of frequency change SPEAr cpufreq driver supports dual core Cortex-A9 SoC's, where cpus share policy structure. Whenever we update frequency of a cpu, we must notify all policy->cpus. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/spear-cpufreq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c index fc714a65fa15..a0265353cb45 100644 --- a/drivers/cpufreq/spear-cpufreq.c +++ b/drivers/cpufreq/spear-cpufreq.c @@ -157,7 +157,9 @@ static int spear_cpufreq_target(struct cpufreq_policy *policy, freqs.new = newfreq / 1000; freqs.new /= mult; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + for_each_cpu(freqs.cpu, policy->cpus) + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); if (mult == 2) ret = spear1340_set_cpu_rate(srcclk, newfreq); @@ -170,7 +172,8 @@ static int spear_cpufreq_target(struct cpufreq_policy *policy, freqs.new = clk_get_rate(spear_cpufreq.clk) / 1000; } - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + for_each_cpu(freqs.cpu, policy->cpus) + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); return ret; } From 2624f90c16413990ecb0414400174a066319a9f5 Mon Sep 17 00:00:00 2001 From: Fabio Baltieri Date: Thu, 31 Jan 2013 09:44:40 +0000 Subject: [PATCH 29/55] cpufreq: governors: implement generic policy_is_shared Implement a generic helper function policy_is_shared() to replace the current dbs_sw_coordinated_cpus() at cpufreq level, so that it can be used by code other than cpufreq governors. Suggested-by: Viresh Kumar Signed-off-by: Fabio Baltieri Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 2 +- drivers/cpufreq/cpufreq_conservative.c | 2 +- drivers/cpufreq/cpufreq_governor.c | 8 -------- drivers/cpufreq/cpufreq_governor.h | 1 - drivers/cpufreq/cpufreq_ondemand.c | 2 +- drivers/cpufreq/cpufreq_stats.c | 2 +- include/linux/cpufreq.h | 5 +++++ 7 files changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 22f9b47c5c74..937bc286591f 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -734,7 +734,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) #ifdef CONFIG_SMP dmi_check_system(sw_any_bug_dmi_table); - if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) { + if (bios_with_sw_any_bug && !policy_is_shared(policy)) { policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; cpumask_copy(policy->cpus, cpu_core_mask(cpu)); } diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 5d8e8942ec97..653fb0652412 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -154,7 +154,7 @@ static void cs_dbs_timer(struct work_struct *work) struct cs_cpu_dbs_info_s *dbs_info = container_of(work, struct cs_cpu_dbs_info_s, cdbs.work.work); - if (dbs_sw_coordinated_cpus(&dbs_info->cdbs)) { + if (policy_is_shared(dbs_info->cdbs.cur_policy)) { cs_timer_coordinated(dbs_info, dw); } else { mutex_lock(&dbs_info->cdbs.timer_mutex); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 46f96a4cebf9..67e235acf43b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -161,14 +161,6 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) } EXPORT_SYMBOL_GPL(dbs_check_cpu); -bool dbs_sw_coordinated_cpus(struct cpu_dbs_common_info *cdbs) -{ - struct cpufreq_policy *policy = cdbs->cur_policy; - - return cpumask_weight(policy->cpus) > 1; -} -EXPORT_SYMBOL_GPL(dbs_sw_coordinated_cpus); - static inline void dbs_timer_init(struct dbs_data *dbs_data, int cpu, unsigned int sampling_rate) { diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index aaf073daa9fb..b72e628e7ed6 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -172,7 +172,6 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) u64 get_cpu_idle_time(unsigned int cpu, u64 *wall); void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); -bool dbs_sw_coordinated_cpus(struct cpu_dbs_common_info *cdbs); int cpufreq_governor_dbs(struct dbs_data *dbs_data, struct cpufreq_policy *policy, unsigned int event); #endif /* _CPUFREQ_GOVERNER_H */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 1017b90b902e..5ae84ffeafb3 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -277,7 +277,7 @@ static void od_dbs_timer(struct work_struct *work) struct od_cpu_dbs_info_s *dbs_info = container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work); - if (dbs_sw_coordinated_cpus(&dbs_info->cdbs)) { + if (policy_is_shared(dbs_info->cdbs.cur_policy)) { od_timer_coordinated(dbs_info, dw); } else { mutex_lock(&dbs_info->cdbs.timer_mutex); diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index beef6b54382b..572124c6e36b 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -185,7 +185,7 @@ static void cpufreq_stats_free_table(unsigned int cpu) static void cpufreq_stats_free_sysfs(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - if (policy && (cpumask_weight(policy->cpus) == 1)) { + if (policy && !policy_is_shared(policy)) { pr_debug("%s: Free sysfs stat\n", __func__); sysfs_remove_group(&policy->kobj, &stats_attr_group); } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 753b198750cf..feb360c8aa88 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -127,6 +127,11 @@ struct cpufreq_policy { #define CPUFREQ_SHARED_TYPE_ALL (2) /* All dependent CPUs should set freq */ #define CPUFREQ_SHARED_TYPE_ANY (3) /* Freq can be set from any dependent CPU*/ +static inline bool policy_is_shared(struct cpufreq_policy *policy) +{ + return cpumask_weight(policy->cpus) > 1; +} + /******************** cpufreq transition notifiers *******************/ #define CPUFREQ_PRECHANGE (0) From 09dca5ae7531c9df379a2c2484a17438b9e947bc Mon Sep 17 00:00:00 2001 From: Fabio Baltieri Date: Thu, 31 Jan 2013 10:39:19 +0000 Subject: [PATCH 30/55] cpufreq: governors: fix misuse of cdbs.cpu Fix governors code to set all cpu's cdbs->cpu to the the actual cpu id and use cur_policy->cpu istead of cdbs->cpu to track current governor's leader cpu. Reported-by: Viresh Kumar Signed-off-by: Fabio Baltieri Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 5 +++-- drivers/cpufreq/cpufreq_governor.c | 2 +- drivers/cpufreq/cpufreq_ondemand.c | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 653fb0652412..c18a304b3a38 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -114,7 +114,7 @@ static void cs_check_cpu(int cpu, unsigned int load) static void cs_timer_update(struct cs_cpu_dbs_info_s *dbs_info, bool sample, struct delayed_work *dw) { - unsigned int cpu = dbs_info->cdbs.cpu; + unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; int delay = delay_for_sampling_rate(cs_tuners.sampling_rate); if (sample) @@ -132,7 +132,8 @@ static void cs_timer_coordinated(struct cs_cpu_dbs_info_s *dbs_info_local, bool sample = true; /* use leader CPU's dbs_info */ - dbs_info = &per_cpu(cs_cpu_dbs_info, dbs_info_local->cdbs.cpu); + dbs_info = &per_cpu(cs_cpu_dbs_info, + dbs_info_local->cdbs.cur_policy->cpu); mutex_lock(&dbs_info->cdbs.timer_mutex); time_now = ktime_get(); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 67e235acf43b..46f1c78bd16f 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -208,11 +208,11 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, mutex_lock(&dbs_data->mutex); dbs_data->enable++; - cpu_cdbs->cpu = cpu; for_each_cpu(j, policy->cpus) { struct cpu_dbs_common_info *j_cdbs; j_cdbs = dbs_data->get_cpu_cdbs(j); + j_cdbs->cpu = j; j_cdbs->cur_policy = policy; j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 5ae84ffeafb3..75efd5ee00f8 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -219,7 +219,7 @@ static void od_check_cpu(int cpu, unsigned int load_freq) static void od_timer_update(struct od_cpu_dbs_info_s *dbs_info, bool sample, struct delayed_work *dw) { - unsigned int cpu = dbs_info->cdbs.cpu; + unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; int delay, sample_type = dbs_info->sample_type; /* Common NORMAL_SAMPLE setup */ @@ -255,7 +255,8 @@ static void od_timer_coordinated(struct od_cpu_dbs_info_s *dbs_info_local, bool sample = true; /* use leader CPU's dbs_info */ - dbs_info = &per_cpu(od_cpu_dbs_info, dbs_info_local->cdbs.cpu); + dbs_info = &per_cpu(od_cpu_dbs_info, + dbs_info_local->cdbs.cur_policy->cpu); mutex_lock(&dbs_info->cdbs.timer_mutex); time_now = ktime_get(); From 8eeed0956615294200be783bb67d851280b5b1b9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 31 Jan 2013 17:28:01 +0000 Subject: [PATCH 31/55] cpufreq: governors: Get rid of dbs_data->enable field CPUFREQ_GOV_START/STOP are called only once for all policy->cpus and hence we don't need to adapt cpufreq_governor_dbs() routine for multiple calls. So, this patch removes dbs_data->enable field entirely. And rearrange code a bit. Signed-off-by: Viresh Kumar Tested-by: Fabio Baltieri Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 56 ++++++++++-------------------- drivers/cpufreq/cpufreq_governor.h | 1 - 2 files changed, 19 insertions(+), 38 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 46f1c78bd16f..29d6a59b1a15 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -182,6 +182,8 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, { struct od_cpu_dbs_info_s *od_dbs_info = NULL; struct cs_cpu_dbs_info_s *cs_dbs_info = NULL; + struct cs_ops *cs_ops = NULL; + struct od_ops *od_ops = NULL; struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; struct cpu_dbs_common_info *cpu_cdbs; @@ -194,10 +196,12 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, cs_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu); sampling_rate = &cs_tuners->sampling_rate; ignore_nice = cs_tuners->ignore_nice; + cs_ops = dbs_data->gov_ops; } else { od_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu); sampling_rate = &od_tuners->sampling_rate; ignore_nice = od_tuners->ignore_nice; + od_ops = dbs_data->gov_ops; } switch (event) { @@ -207,10 +211,9 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, mutex_lock(&dbs_data->mutex); - dbs_data->enable++; for_each_cpu(j, policy->cpus) { - struct cpu_dbs_common_info *j_cdbs; - j_cdbs = dbs_data->get_cpu_cdbs(j); + struct cpu_dbs_common_info *j_cdbs = + dbs_data->get_cpu_cdbs(j); j_cdbs->cpu = j; j_cdbs->cur_policy = policy; @@ -225,13 +228,6 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, dbs_data->gov_dbs_timer); } - /* - * Start the timerschedule work, when this governor is used for - * first time - */ - if (dbs_data->enable != 1) - goto second_time; - rc = sysfs_create_group(cpufreq_global_kobject, dbs_data->attr_group); if (rc) { @@ -249,17 +245,19 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, * governor, thus we are bound to jiffes/HZ */ if (dbs_data->governor == GOV_CONSERVATIVE) { - struct cs_ops *ops = dbs_data->gov_ops; - - cpufreq_register_notifier(ops->notifier_block, + cs_dbs_info->down_skip = 0; + cs_dbs_info->enable = 1; + cs_dbs_info->requested_freq = policy->cur; + cpufreq_register_notifier(cs_ops->notifier_block, CPUFREQ_TRANSITION_NOTIFIER); dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); } else { - struct od_ops *ops = dbs_data->gov_ops; - - od_tuners->io_is_busy = ops->io_busy(); + od_dbs_info->rate_mult = 1; + od_dbs_info->sample_type = OD_NORMAL_SAMPLE; + od_ops->powersave_bias_init_cpu(cpu); + od_tuners->io_is_busy = od_ops->io_busy(); } /* Bring kernel and HW constraints together */ @@ -267,18 +265,6 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, MIN_LATENCY_MULTIPLIER * latency); *sampling_rate = max(dbs_data->min_sampling_rate, latency * LATENCY_MULTIPLIER); - -second_time: - if (dbs_data->governor == GOV_CONSERVATIVE) { - cs_dbs_info->down_skip = 0; - cs_dbs_info->enable = 1; - cs_dbs_info->requested_freq = policy->cur; - } else { - struct od_ops *ops = dbs_data->gov_ops; - od_dbs_info->rate_mult = 1; - od_dbs_info->sample_type = OD_NORMAL_SAMPLE; - ops->powersave_bias_init_cpu(cpu); - } mutex_unlock(&dbs_data->mutex); /* Initiate timer time stamp */ @@ -297,16 +283,12 @@ second_time: mutex_lock(&dbs_data->mutex); mutex_destroy(&cpu_cdbs->timer_mutex); - dbs_data->enable--; - if (!dbs_data->enable) { - struct cs_ops *ops = dbs_data->gov_ops; - sysfs_remove_group(cpufreq_global_kobject, - dbs_data->attr_group); - if (dbs_data->governor == GOV_CONSERVATIVE) - cpufreq_unregister_notifier(ops->notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - } + sysfs_remove_group(cpufreq_global_kobject, + dbs_data->attr_group); + if (dbs_data->governor == GOV_CONSERVATIVE) + cpufreq_unregister_notifier(cs_ops->notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); mutex_unlock(&dbs_data->mutex); break; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index b72e628e7ed6..c19a16c34361 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -130,7 +130,6 @@ struct dbs_data { #define GOV_CONSERVATIVE 1 int governor; unsigned int min_sampling_rate; - unsigned int enable; /* number of CPUs using this policy */ struct attribute_group *attr_group; void *tuners; From 4447266b842d27f77b017a59eb9dc38ad7b299f1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 31 Jan 2013 17:28:02 +0000 Subject: [PATCH 32/55] cpufreq: governors: Remove code redundancy between governors With the inclusion of following patches: 9f4eb10 cpufreq: conservative: call dbs_check_cpu only when necessary 772b4b1 cpufreq: ondemand: call dbs_check_cpu only when necessary code redundancy between the conservative and ondemand governors is introduced again, so get rid of it. [rjw: Changelog] Signed-off-by: Viresh Kumar Tested-by: Fabio Baltieri Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 56 ++++------------ drivers/cpufreq/cpufreq_governor.c | 19 ++++++ drivers/cpufreq/cpufreq_governor.h | 2 + drivers/cpufreq/cpufreq_ondemand.c | 91 +++++++++----------------- 4 files changed, 62 insertions(+), 106 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index c18a304b3a38..e8bb91571672 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -111,58 +111,24 @@ static void cs_check_cpu(int cpu, unsigned int load) } } -static void cs_timer_update(struct cs_cpu_dbs_info_s *dbs_info, bool sample, - struct delayed_work *dw) -{ - unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; - int delay = delay_for_sampling_rate(cs_tuners.sampling_rate); - - if (sample) - dbs_check_cpu(&cs_dbs_data, cpu); - - schedule_delayed_work_on(smp_processor_id(), dw, delay); -} - -static void cs_timer_coordinated(struct cs_cpu_dbs_info_s *dbs_info_local, - struct delayed_work *dw) -{ - struct cs_cpu_dbs_info_s *dbs_info; - ktime_t time_now; - s64 delta_us; - bool sample = true; - - /* use leader CPU's dbs_info */ - dbs_info = &per_cpu(cs_cpu_dbs_info, - dbs_info_local->cdbs.cur_policy->cpu); - mutex_lock(&dbs_info->cdbs.timer_mutex); - - time_now = ktime_get(); - delta_us = ktime_us_delta(time_now, dbs_info->cdbs.time_stamp); - - /* Do nothing if we recently have sampled */ - if (delta_us < (s64)(cs_tuners.sampling_rate / 2)) - sample = false; - else - dbs_info->cdbs.time_stamp = time_now; - - cs_timer_update(dbs_info, sample, dw); - mutex_unlock(&dbs_info->cdbs.timer_mutex); -} - static void cs_dbs_timer(struct work_struct *work) { struct delayed_work *dw = to_delayed_work(work); struct cs_cpu_dbs_info_s *dbs_info = container_of(work, struct cs_cpu_dbs_info_s, cdbs.work.work); + unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; + struct cs_cpu_dbs_info_s *core_dbs_info = &per_cpu(cs_cpu_dbs_info, + cpu); + int delay = delay_for_sampling_rate(cs_tuners.sampling_rate); - if (policy_is_shared(dbs_info->cdbs.cur_policy)) { - cs_timer_coordinated(dbs_info, dw); - } else { - mutex_lock(&dbs_info->cdbs.timer_mutex); - cs_timer_update(dbs_info, true, dw); - mutex_unlock(&dbs_info->cdbs.timer_mutex); - } + mutex_lock(&core_dbs_info->cdbs.timer_mutex); + if (need_load_eval(&core_dbs_info->cdbs, cs_tuners.sampling_rate)) + dbs_check_cpu(&cs_dbs_data, cpu); + + schedule_delayed_work_on(smp_processor_id(), dw, delay); + mutex_unlock(&core_dbs_info->cdbs.timer_mutex); } + static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 29d6a59b1a15..7aaa9b151940 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -177,6 +177,25 @@ static inline void dbs_timer_exit(struct dbs_data *dbs_data, int cpu) cancel_delayed_work_sync(&cdbs->work); } +/* Will return if we need to evaluate cpu load again or not */ +bool need_load_eval(struct cpu_dbs_common_info *cdbs, + unsigned int sampling_rate) +{ + if (policy_is_shared(cdbs->cur_policy)) { + ktime_t time_now = ktime_get(); + s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp); + + /* Do nothing if we recently have sampled */ + if (delta_us < (s64)(sampling_rate / 2)) + return false; + else + cdbs->time_stamp = time_now; + } + + return true; +} +EXPORT_SYMBOL_GPL(need_load_eval); + int cpufreq_governor_dbs(struct dbs_data *dbs_data, struct cpufreq_policy *policy, unsigned int event) { diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index c19a16c34361..16314b65ca67 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -171,6 +171,8 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) u64 get_cpu_idle_time(unsigned int cpu, u64 *wall); void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); +bool need_load_eval(struct cpu_dbs_common_info *cdbs, + unsigned int sampling_rate); int cpufreq_governor_dbs(struct dbs_data *dbs_data, struct cpufreq_policy *policy, unsigned int event); #endif /* _CPUFREQ_GOVERNER_H */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 75efd5ee00f8..f38b8da60128 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -216,75 +216,44 @@ static void od_check_cpu(int cpu, unsigned int load_freq) } } -static void od_timer_update(struct od_cpu_dbs_info_s *dbs_info, bool sample, - struct delayed_work *dw) -{ - unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; - int delay, sample_type = dbs_info->sample_type; - - /* Common NORMAL_SAMPLE setup */ - dbs_info->sample_type = OD_NORMAL_SAMPLE; - if (sample_type == OD_SUB_SAMPLE) { - delay = dbs_info->freq_lo_jiffies; - if (sample) - __cpufreq_driver_target(dbs_info->cdbs.cur_policy, - dbs_info->freq_lo, - CPUFREQ_RELATION_H); - } else { - if (sample) - dbs_check_cpu(&od_dbs_data, cpu); - if (dbs_info->freq_lo) { - /* Setup timer for SUB_SAMPLE */ - dbs_info->sample_type = OD_SUB_SAMPLE; - delay = dbs_info->freq_hi_jiffies; - } else { - delay = delay_for_sampling_rate(od_tuners.sampling_rate - * dbs_info->rate_mult); - } - } - - schedule_delayed_work_on(smp_processor_id(), dw, delay); -} - -static void od_timer_coordinated(struct od_cpu_dbs_info_s *dbs_info_local, - struct delayed_work *dw) -{ - struct od_cpu_dbs_info_s *dbs_info; - ktime_t time_now; - s64 delta_us; - bool sample = true; - - /* use leader CPU's dbs_info */ - dbs_info = &per_cpu(od_cpu_dbs_info, - dbs_info_local->cdbs.cur_policy->cpu); - mutex_lock(&dbs_info->cdbs.timer_mutex); - - time_now = ktime_get(); - delta_us = ktime_us_delta(time_now, dbs_info->cdbs.time_stamp); - - /* Do nothing if we recently have sampled */ - if (delta_us < (s64)(od_tuners.sampling_rate / 2)) - sample = false; - else - dbs_info->cdbs.time_stamp = time_now; - - od_timer_update(dbs_info, sample, dw); - mutex_unlock(&dbs_info->cdbs.timer_mutex); -} - static void od_dbs_timer(struct work_struct *work) { struct delayed_work *dw = to_delayed_work(work); struct od_cpu_dbs_info_s *dbs_info = container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work); + unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; + struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info, + cpu); + int delay, sample_type = core_dbs_info->sample_type; + bool eval_load; - if (policy_is_shared(dbs_info->cdbs.cur_policy)) { - od_timer_coordinated(dbs_info, dw); + mutex_lock(&core_dbs_info->cdbs.timer_mutex); + eval_load = need_load_eval(&core_dbs_info->cdbs, + od_tuners.sampling_rate); + + /* Common NORMAL_SAMPLE setup */ + core_dbs_info->sample_type = OD_NORMAL_SAMPLE; + if (sample_type == OD_SUB_SAMPLE) { + delay = core_dbs_info->freq_lo_jiffies; + if (eval_load) + __cpufreq_driver_target(core_dbs_info->cdbs.cur_policy, + core_dbs_info->freq_lo, + CPUFREQ_RELATION_H); } else { - mutex_lock(&dbs_info->cdbs.timer_mutex); - od_timer_update(dbs_info, true, dw); - mutex_unlock(&dbs_info->cdbs.timer_mutex); + if (eval_load) + dbs_check_cpu(&od_dbs_data, cpu); + if (core_dbs_info->freq_lo) { + /* Setup timer for SUB_SAMPLE */ + core_dbs_info->sample_type = OD_SUB_SAMPLE; + delay = core_dbs_info->freq_hi_jiffies; + } else { + delay = delay_for_sampling_rate(od_tuners.sampling_rate + * core_dbs_info->rate_mult); + } } + + schedule_delayed_work_on(smp_processor_id(), dw, delay); + mutex_unlock(&core_dbs_info->cdbs.timer_mutex); } /************************** sysfs interface ************************/ From b394058f064848deac7a7cd6942b6521d7b3fe1d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 1 Feb 2013 05:42:58 +0000 Subject: [PATCH 33/55] cpufreq: governors: Reset tunables only for cpufreq_unregister_governor() Currently, whenever governor->governor() is called for CPUFRREQ_GOV_START event we reset few tunables of governor. Which isn't correct, as this routine is called for every cpu hot-[un]plugging event. We should actually be resetting these only when the governor module is removed and re-installed. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 4 ++++ drivers/cpufreq/cpufreq_governor.c | 24 ++++++++++++++++-------- include/linux/cpufreq.h | 1 + 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1cea7a1eac13..0b4be4481433 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1562,6 +1562,9 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, policy->cpu, event); ret = policy->governor->governor(policy, event); + if (!policy->governor->initialized && (event == CPUFREQ_GOV_START)) + policy->governor->initialized = 1; + /* we keep one module reference alive for each CPU governed by this CPU */ if ((event != CPUFREQ_GOV_START) || ret) @@ -1585,6 +1588,7 @@ int cpufreq_register_governor(struct cpufreq_governor *governor) mutex_lock(&cpufreq_governor_mutex); + governor->initialized = 0; err = -EBUSY; if (__find_governor(governor->name) == NULL) { err = 0; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 7aaa9b151940..79795c4bf611 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -254,11 +254,6 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, return rc; } - /* policy latency is in nS. Convert it to uS first */ - latency = policy->cpuinfo.transition_latency / 1000; - if (latency == 0) - latency = 1; - /* * conservative does not implement micro like ondemand * governor, thus we are bound to jiffes/HZ @@ -270,20 +265,33 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, cpufreq_register_notifier(cs_ops->notifier_block, CPUFREQ_TRANSITION_NOTIFIER); - dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * - jiffies_to_usecs(10); + if (!policy->governor->initialized) + dbs_data->min_sampling_rate = + MIN_SAMPLING_RATE_RATIO * + jiffies_to_usecs(10); } else { od_dbs_info->rate_mult = 1; od_dbs_info->sample_type = OD_NORMAL_SAMPLE; od_ops->powersave_bias_init_cpu(cpu); - od_tuners->io_is_busy = od_ops->io_busy(); + + if (!policy->governor->initialized) + od_tuners->io_is_busy = od_ops->io_busy(); } + if (policy->governor->initialized) + goto unlock; + + /* policy latency is in nS. Convert it to uS first */ + latency = policy->cpuinfo.transition_latency / 1000; + if (latency == 0) + latency = 1; + /* Bring kernel and HW constraints together */ dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, MIN_LATENCY_MULTIPLIER * latency); *sampling_rate = max(dbs_data->min_sampling_rate, latency * LATENCY_MULTIPLIER); +unlock: mutex_unlock(&dbs_data->mutex); /* Initiate timer time stamp */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index feb360c8aa88..6bf3f2d12c90 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -183,6 +183,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, u_int mu struct cpufreq_governor { char name[CPUFREQ_NAME_LEN]; + int initialized; int (*governor) (struct cpufreq_policy *policy, unsigned int event); ssize_t (*show_setspeed) (struct cpufreq_policy *policy, From 16a44f82674ddd8d5b5b2527979ac22a25c55c50 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 1 Feb 2013 06:40:00 +0000 Subject: [PATCH 34/55] cpufreq: TEGRA: Set policy->cpus from driver->init() For multicore SoC's, with cores sharing clock line, we are required to set policy->cpus and policy->related_cpus with mask of cpus. With following patch, we need to set policy->cpus with mask of all possible cpus and policy->related_cpus would be filled automatically by the cpufreq core. commit 4948b355e90080cd5ec1e91189f65a01e4186ef2 Author: Viresh Kumar Date: Tue Jan 29 14:39:08 2013 +0000 cpufreq: Simplify cpufreq_add_dev() Current Tegra driver fills only ->related_cpus and not ->cpus, which looks to be incorrect. Lets fix it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-tegra/cpu-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index a74d3c7d2e26..e7ddcb2b5261 100644 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -244,7 +244,7 @@ static int tegra_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = 300 * 1000; policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; - cpumask_copy(policy->related_cpus, cpu_possible_mask); + cpumask_copy(policy->cpus, cpu_possible_mask); if (policy->cpu == 0) register_pm_notifier(&tegra_cpu_pm_notifier); From 4c738d00cf5866a0b114ceb41d736a4a803f0c46 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 1 Feb 2013 06:40:01 +0000 Subject: [PATCH 35/55] cpufreq: Set all cpus in policy->cpus for single cluster SoCs With following patch, we need to set policy->cpus with mask of all possible cpus and policy->related_cpus would be filled automatically by the core. commit 4948b355e90080cd5ec1e91189f65a01e4186ef2 Author: Viresh Kumar Date: Tue Jan 29 14:39:08 2013 +0000 cpufreq: Simplify cpufreq_add_dev() Lets fix it for all single cluster SoCs. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/db8500-cpufreq.c | 2 +- drivers/cpufreq/maple-cpufreq.c | 2 +- drivers/cpufreq/spear-cpufreq.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/db8500-cpufreq.c b/drivers/cpufreq/db8500-cpufreq.c index 4f154bc0ebe4..e12dff601b8a 100644 --- a/drivers/cpufreq/db8500-cpufreq.c +++ b/drivers/cpufreq/db8500-cpufreq.c @@ -128,7 +128,7 @@ static int __cpuinit db8500_cpufreq_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = 20 * 1000; /* in ns */ /* policy sharing between dual CPUs */ - cpumask_copy(policy->cpus, cpu_present_mask); + cpumask_setall(policy->cpus); policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c index 89b178a3f849..d4c4989823dc 100644 --- a/drivers/cpufreq/maple-cpufreq.c +++ b/drivers/cpufreq/maple-cpufreq.c @@ -181,7 +181,7 @@ static int maple_cpufreq_cpu_init(struct cpufreq_policy *policy) /* secondary CPUs are tied to the primary one by the * cpufreq core if in the secondary policy we tell it that * it actually must be one policy together with all others. */ - cpumask_copy(policy->cpus, cpu_online_mask); + cpumask_setall(policy->cpus); cpufreq_frequency_table_get_attr(maple_cpu_freqs, policy->cpu); return cpufreq_frequency_table_cpuinfo(policy, diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c index a0265353cb45..7e4d77327957 100644 --- a/drivers/cpufreq/spear-cpufreq.c +++ b/drivers/cpufreq/spear-cpufreq.c @@ -191,7 +191,7 @@ static int spear_cpufreq_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = spear_cpufreq.transition_latency; policy->cur = spear_cpufreq_get(0); - cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu)); + cpumask_setall(policy->cpus); return 0; } From 62b36cc1c83aca1cd252772e82cbc5d9ef8ff25b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 1 Feb 2013 06:40:02 +0000 Subject: [PATCH 36/55] cpufreq: Remove unnecessary use of policy->shared_type policy->shared_type field was added only for SoCs with ACPI support: commit 3b2d99429e3386b6e2ac949fc72486509c8bbe36 Author: Venkatesh Pallipadi Date: Wed Dec 14 15:05:00 2005 -0500 P-state software coordination for ACPI core http://bugzilla.kernel.org/show_bug.cgi?id=5737 Many non-ACPI systems are filling this field by mistake, which makes its usage confusing. Lets clean it. Signed-off-by: Viresh Kumar Acked-by: Santosh Shilimkar Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-tegra/cpu-tegra.c | 1 - drivers/cpufreq/cpufreq-cpu0.c | 1 - drivers/cpufreq/db8500-cpufreq.c | 2 -- drivers/cpufreq/omap-cpufreq.c | 4 +--- include/linux/cpufreq.h | 3 ++- 5 files changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index e7ddcb2b5261..a36a03d3c9a0 100644 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -243,7 +243,6 @@ static int tegra_cpu_init(struct cpufreq_policy *policy) /* FIXME: what's the actual transition time? */ policy->cpuinfo.transition_latency = 300 * 1000; - policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; cpumask_copy(policy->cpus, cpu_possible_mask); if (policy->cpu == 0) diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c index a108f66271ee..4e5b7fb8927c 100644 --- a/drivers/cpufreq/cpufreq-cpu0.c +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -146,7 +146,6 @@ static int cpu0_cpufreq_init(struct cpufreq_policy *policy) * share the clock and voltage and clock. Use cpufreq affected_cpus * interface to have all CPUs scaled together. */ - policy->shared_type = CPUFREQ_SHARED_TYPE_ANY; cpumask_setall(policy->cpus); cpufreq_frequency_table_get_attr(freq_table, policy->cpu); diff --git a/drivers/cpufreq/db8500-cpufreq.c b/drivers/cpufreq/db8500-cpufreq.c index e12dff601b8a..79a84860ea56 100644 --- a/drivers/cpufreq/db8500-cpufreq.c +++ b/drivers/cpufreq/db8500-cpufreq.c @@ -130,8 +130,6 @@ static int __cpuinit db8500_cpufreq_init(struct cpufreq_policy *policy) /* policy sharing between dual CPUs */ cpumask_setall(policy->cpus); - policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; - return 0; } diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index 97102b05843f..9128c07bafba 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -214,10 +214,8 @@ static int __cpuinit omap_cpu_init(struct cpufreq_policy *policy) * interface to handle this scenario. Additional is_smp() check * is to keep SMP_ON_UP build working. */ - if (is_smp()) { - policy->shared_type = CPUFREQ_SHARED_TYPE_ANY; + if (is_smp()) cpumask_setall(policy->cpus); - } /* FIXME: what's the actual transition time? */ policy->cpuinfo.transition_latency = 300 * 1000; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 6bf3f2d12c90..a22944ca0526 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -93,7 +93,7 @@ struct cpufreq_policy { cpumask_var_t cpus; /* Online CPUs only */ cpumask_var_t related_cpus; /* Online + Offline CPUs */ - unsigned int shared_type; /* ANY or ALL affected CPUs + unsigned int shared_type; /* ACPI: ANY or ALL affected CPUs should set cpufreq */ unsigned int cpu; /* cpu nr of CPU managing this policy */ unsigned int last_cpu; /* cpu nr of previous CPU that managed @@ -122,6 +122,7 @@ struct cpufreq_policy { #define CPUFREQ_START (3) #define CPUFREQ_UPDATE_POLICY_CPU (4) +/* Only for ACPI */ #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */ #define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */ #define CPUFREQ_SHARED_TYPE_ALL (2) /* All dependent CPUs should set freq */ From 73bf0fc2b03d1f4fdada0ec430dc20bfb089cfd5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 5 Feb 2013 22:21:14 +0100 Subject: [PATCH 37/55] cpufreq: Don't remove sysfs link for policy->cpu "cpufreq" directory in policy->cpu is never created using sysfs_create_link(), but using kobject_init_and_add(). And so we shouldn't call sysfs_remove_link() for policy->cpu(). sysfs stuff for policy->cpu is automatically removed when we call kobject_put() for dying policy. Signed-off-by: Viresh Kumar Tested-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0b4be4481433..b63b3cbfe2c4 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1059,7 +1059,9 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif cpus = cpumask_weight(data->cpus); cpumask_clear_cpu(cpu, data->cpus); - if (unlikely((cpu == data->cpu) && (cpus > 1))) { + if (cpu != data->cpu) { + sysfs_remove_link(&dev->kobj, "cpufreq"); + } else if (cpus > 1) { /* first sibling now owns the new sysfs dir */ cpu_dev = get_cpu_device(cpumask_first(data->cpus)); sysfs_remove_link(&cpu_dev->kobj, "cpufreq"); @@ -1084,7 +1086,6 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif pr_debug("%s: removing link, cpu: %d\n", __func__, cpu); cpufreq_cpu_put(data); unlock_policy_rwsem_write(cpu); - sysfs_remove_link(&dev->kobj, "cpufreq"); /* If cpu is last user of policy, free policy */ if (cpus == 1) { From 1dd538f072f0b7ba327613253d41ebb329c6d490 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Mon, 4 Feb 2013 05:46:29 +0000 Subject: [PATCH 38/55] cpufreq: add imx6q-cpufreq driver Add an imx6q-cpufreq driver for Freescale i.MX6Q SoC to handle the hardware specific frequency and voltage scaling requirements. The driver supports module build and is instantiated by the platform device/driver mechanism, so that it will not be instantiated on other platforms, as IMX is built with multiplatform support. Signed-off-by: Shawn Guo Reviewed-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 9 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/imx6q-cpufreq.c | 336 ++++++++++++++++++++++++++++++++ 3 files changed, 346 insertions(+) create mode 100644 drivers/cpufreq/imx6q-cpufreq.c diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index ffe55b8a98cc..c65226c4717d 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -77,6 +77,15 @@ config ARM_EXYNOS5250_CPUFREQ This adds the CPUFreq driver for Samsung EXYNOS5250 SoC. +config ARM_IMX6Q_CPUFREQ + tristate "Freescale i.MX6Q cpufreq support" + depends on SOC_IMX6Q + depends on REGULATOR_ANATOP + help + This adds cpufreq driver support for Freescale i.MX6Q SOC. + + If in doubt, say N. + config ARM_SPEAR_CPUFREQ bool "SPEAr CPUFreq support" depends on PLAT_SPEAR diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index e2a5da77370f..93610b284b35 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -54,6 +54,7 @@ obj-$(CONFIG_ARM_EXYNOS5250_CPUFREQ) += exynos5250-cpufreq.o obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o +obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o ################################################################################## # PowerPC platform drivers diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c new file mode 100644 index 000000000000..d6b6ef350cb6 --- /dev/null +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -0,0 +1,336 @@ +/* + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PU_SOC_VOLTAGE_NORMAL 1250000 +#define PU_SOC_VOLTAGE_HIGH 1275000 +#define FREQ_1P2_GHZ 1200000000 + +static struct regulator *arm_reg; +static struct regulator *pu_reg; +static struct regulator *soc_reg; + +static struct clk *arm_clk; +static struct clk *pll1_sys_clk; +static struct clk *pll1_sw_clk; +static struct clk *step_clk; +static struct clk *pll2_pfd2_396m_clk; + +static struct device *cpu_dev; +static struct cpufreq_frequency_table *freq_table; +static unsigned int transition_latency; + +static int imx6q_verify_speed(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, freq_table); +} + +static unsigned int imx6q_get_speed(unsigned int cpu) +{ + return clk_get_rate(arm_clk) / 1000; +} + +static int imx6q_set_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + struct cpufreq_freqs freqs; + struct opp *opp; + unsigned long freq_hz, volt, volt_old; + unsigned int index, cpu; + int ret; + + ret = cpufreq_frequency_table_target(policy, freq_table, target_freq, + relation, &index); + if (ret) { + dev_err(cpu_dev, "failed to match target frequency %d: %d\n", + target_freq, ret); + return ret; + } + + freqs.new = freq_table[index].frequency; + freq_hz = freqs.new * 1000; + freqs.old = clk_get_rate(arm_clk) / 1000; + + if (freqs.old == freqs.new) + return 0; + + for_each_online_cpu(cpu) { + freqs.cpu = cpu; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + rcu_read_lock(); + opp = opp_find_freq_ceil(cpu_dev, &freq_hz); + if (IS_ERR(opp)) { + rcu_read_unlock(); + dev_err(cpu_dev, "failed to find OPP for %ld\n", freq_hz); + return PTR_ERR(opp); + } + + volt = opp_get_voltage(opp); + rcu_read_unlock(); + volt_old = regulator_get_voltage(arm_reg); + + dev_dbg(cpu_dev, "%u MHz, %ld mV --> %u MHz, %ld mV\n", + freqs.old / 1000, volt_old / 1000, + freqs.new / 1000, volt / 1000); + + /* scaling up? scale voltage before frequency */ + if (freqs.new > freqs.old) { + ret = regulator_set_voltage_tol(arm_reg, volt, 0); + if (ret) { + dev_err(cpu_dev, + "failed to scale vddarm up: %d\n", ret); + return ret; + } + + /* + * Need to increase vddpu and vddsoc for safety + * if we are about to run at 1.2 GHz. + */ + if (freqs.new == FREQ_1P2_GHZ / 1000) { + regulator_set_voltage_tol(pu_reg, + PU_SOC_VOLTAGE_HIGH, 0); + regulator_set_voltage_tol(soc_reg, + PU_SOC_VOLTAGE_HIGH, 0); + } + } + + /* + * The setpoints are selected per PLL/PDF frequencies, so we need to + * reprogram PLL for frequency scaling. The procedure of reprogramming + * PLL1 is as below. + * + * - Enable pll2_pfd2_396m_clk and reparent pll1_sw_clk to it + * - Reprogram pll1_sys_clk and reparent pll1_sw_clk back to it + * - Disable pll2_pfd2_396m_clk + */ + clk_prepare_enable(pll2_pfd2_396m_clk); + clk_set_parent(step_clk, pll2_pfd2_396m_clk); + clk_set_parent(pll1_sw_clk, step_clk); + if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk)) { + clk_set_rate(pll1_sys_clk, freqs.new * 1000); + /* + * If we are leaving 396 MHz set-point, we need to enable + * pll1_sys_clk and disable pll2_pfd2_396m_clk to keep + * their use count correct. + */ + if (freqs.old * 1000 <= clk_get_rate(pll2_pfd2_396m_clk)) { + clk_prepare_enable(pll1_sys_clk); + clk_disable_unprepare(pll2_pfd2_396m_clk); + } + clk_set_parent(pll1_sw_clk, pll1_sys_clk); + clk_disable_unprepare(pll2_pfd2_396m_clk); + } else { + /* + * Disable pll1_sys_clk if pll2_pfd2_396m_clk is sufficient + * to provide the frequency. + */ + clk_disable_unprepare(pll1_sys_clk); + } + + /* Ensure the arm clock divider is what we expect */ + ret = clk_set_rate(arm_clk, freqs.new * 1000); + if (ret) { + dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); + regulator_set_voltage_tol(arm_reg, volt_old, 0); + return ret; + } + + /* scaling down? scale voltage after frequency */ + if (freqs.new < freqs.old) { + ret = regulator_set_voltage_tol(arm_reg, volt, 0); + if (ret) + dev_warn(cpu_dev, + "failed to scale vddarm down: %d\n", ret); + + if (freqs.old == FREQ_1P2_GHZ / 1000) { + regulator_set_voltage_tol(pu_reg, + PU_SOC_VOLTAGE_NORMAL, 0); + regulator_set_voltage_tol(soc_reg, + PU_SOC_VOLTAGE_NORMAL, 0); + } + } + + for_each_online_cpu(cpu) { + freqs.cpu = cpu; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + + return 0; +} + +static int imx6q_cpufreq_init(struct cpufreq_policy *policy) +{ + int ret; + + ret = cpufreq_frequency_table_cpuinfo(policy, freq_table); + if (ret) { + dev_err(cpu_dev, "invalid frequency table: %d\n", ret); + return ret; + } + + policy->cpuinfo.transition_latency = transition_latency; + policy->cur = clk_get_rate(arm_clk) / 1000; + cpumask_setall(policy->cpus); + cpufreq_frequency_table_get_attr(freq_table, policy->cpu); + + return 0; +} + +static int imx6q_cpufreq_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static struct freq_attr *imx6q_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver imx6q_cpufreq_driver = { + .verify = imx6q_verify_speed, + .target = imx6q_set_target, + .get = imx6q_get_speed, + .init = imx6q_cpufreq_init, + .exit = imx6q_cpufreq_exit, + .name = "imx6q-cpufreq", + .attr = imx6q_cpufreq_attr, +}; + +static int imx6q_cpufreq_probe(struct platform_device *pdev) +{ + struct device_node *np; + struct opp *opp; + unsigned long min_volt, max_volt; + int num, ret; + + cpu_dev = &pdev->dev; + + np = of_find_node_by_path("/cpus/cpu@0"); + if (!np) { + dev_err(cpu_dev, "failed to find cpu0 node\n"); + return -ENOENT; + } + + cpu_dev->of_node = np; + + arm_clk = devm_clk_get(cpu_dev, "arm"); + pll1_sys_clk = devm_clk_get(cpu_dev, "pll1_sys"); + pll1_sw_clk = devm_clk_get(cpu_dev, "pll1_sw"); + step_clk = devm_clk_get(cpu_dev, "step"); + pll2_pfd2_396m_clk = devm_clk_get(cpu_dev, "pll2_pfd2_396m"); + if (IS_ERR(arm_clk) || IS_ERR(pll1_sys_clk) || IS_ERR(pll1_sw_clk) || + IS_ERR(step_clk) || IS_ERR(pll2_pfd2_396m_clk)) { + dev_err(cpu_dev, "failed to get clocks\n"); + ret = -ENOENT; + goto put_node; + } + + arm_reg = devm_regulator_get(cpu_dev, "arm"); + pu_reg = devm_regulator_get(cpu_dev, "pu"); + soc_reg = devm_regulator_get(cpu_dev, "soc"); + if (!arm_reg || !pu_reg || !soc_reg) { + dev_err(cpu_dev, "failed to get regulators\n"); + ret = -ENOENT; + goto put_node; + } + + /* We expect an OPP table supplied by platform */ + num = opp_get_opp_count(cpu_dev); + if (num < 0) { + ret = num; + dev_err(cpu_dev, "no OPP table is found: %d\n", ret); + goto put_node; + } + + ret = opp_init_cpufreq_table(cpu_dev, &freq_table); + if (ret) { + dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); + goto put_node; + } + + if (of_property_read_u32(np, "clock-latency", &transition_latency)) + transition_latency = CPUFREQ_ETERNAL; + + /* + * OPP is maintained in order of increasing frequency, and + * freq_table initialised from OPP is therefore sorted in the + * same order. + */ + rcu_read_lock(); + opp = opp_find_freq_exact(cpu_dev, + freq_table[0].frequency * 1000, true); + min_volt = opp_get_voltage(opp); + opp = opp_find_freq_exact(cpu_dev, + freq_table[--num].frequency * 1000, true); + max_volt = opp_get_voltage(opp); + rcu_read_unlock(); + ret = regulator_set_voltage_time(arm_reg, min_volt, max_volt); + if (ret > 0) + transition_latency += ret * 1000; + + /* Count vddpu and vddsoc latency in for 1.2 GHz support */ + if (freq_table[num].frequency == FREQ_1P2_GHZ / 1000) { + ret = regulator_set_voltage_time(pu_reg, PU_SOC_VOLTAGE_NORMAL, + PU_SOC_VOLTAGE_HIGH); + if (ret > 0) + transition_latency += ret * 1000; + ret = regulator_set_voltage_time(soc_reg, PU_SOC_VOLTAGE_NORMAL, + PU_SOC_VOLTAGE_HIGH); + if (ret > 0) + transition_latency += ret * 1000; + } + + ret = cpufreq_register_driver(&imx6q_cpufreq_driver); + if (ret) { + dev_err(cpu_dev, "failed register driver: %d\n", ret); + goto free_freq_table; + } + + of_node_put(np); + return 0; + +free_freq_table: + opp_free_cpufreq_table(cpu_dev, &freq_table); +put_node: + of_node_put(np); + return ret; +} + +static int imx6q_cpufreq_remove(struct platform_device *pdev) +{ + cpufreq_unregister_driver(&imx6q_cpufreq_driver); + opp_free_cpufreq_table(cpu_dev, &freq_table); + + return 0; +} + +static struct platform_driver imx6q_cpufreq_platdrv = { + .driver = { + .name = "imx6q-cpufreq", + .owner = THIS_MODULE, + }, + .probe = imx6q_cpufreq_probe, + .remove = imx6q_cpufreq_remove, +}; +module_platform_driver(imx6q_cpufreq_platdrv); + +MODULE_AUTHOR("Shawn Guo "); +MODULE_DESCRIPTION("Freescale i.MX6Q cpufreq driver"); +MODULE_LICENSE("GPL"); From 3361b7b173341fdaa85153e1b322099949c9f8c8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 4 Feb 2013 11:38:51 +0000 Subject: [PATCH 39/55] cpufreq: Don't check cpu_online(policy->cpu) policy->cpu or cpus in policy->cpus can't be offline anymore. And so we don't need to check if they are online or not. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 17 +++-------------- drivers/cpufreq/cpufreq_governor.c | 2 +- drivers/cpufreq/cpufreq_userspace.c | 2 -- drivers/cpufreq/freq_table.c | 6 ------ 4 files changed, 4 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b63b3cbfe2c4..0dc9933069c5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -76,10 +76,6 @@ static int lock_policy_rwsem_##mode \ int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); \ BUG_ON(policy_cpu == -1); \ down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ - if (unlikely(!cpu_online(cpu))) { \ - up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ - return -1; \ - } \ \ return 0; \ } @@ -720,8 +716,6 @@ static int cpufreq_add_dev_symlink(unsigned int cpu, if (j == cpu) continue; - if (!cpu_online(j)) - continue; pr_debug("CPU %u already managed, adding link\n", j); managed_policy = cpufreq_cpu_get(cpu); @@ -778,8 +772,6 @@ static int cpufreq_add_dev_interface(unsigned int cpu, spin_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) { - if (!cpu_online(j)) - continue; per_cpu(cpufreq_cpu_data, j) = policy; per_cpu(cpufreq_policy_cpu, j) = policy->cpu; } @@ -1006,11 +998,8 @@ static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) policy->last_cpu = policy->cpu; policy->cpu = cpu; - for_each_cpu(j, policy->cpus) { - if (!cpu_online(j)) - continue; + for_each_cpu(j, policy->cpus) per_cpu(cpufreq_policy_cpu, j) = cpu; - } #ifdef CONFIG_CPU_FREQ_TABLE cpufreq_frequency_table_update_policy_cpu(policy); @@ -1470,7 +1459,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (target_freq == policy->cur) return 0; - if (cpu_online(policy->cpu) && cpufreq_driver->target) + if (cpufreq_driver->target) retval = cpufreq_driver->target(policy, target_freq, relation); return retval; @@ -1508,7 +1497,7 @@ int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu) if (cpufreq_disabled()) return ret; - if (!(cpu_online(cpu) && cpufreq_driver->getavg)) + if (!cpufreq_driver->getavg) return 0; policy = cpufreq_cpu_get(policy->cpu); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 79795c4bf611..e4a306c8ff1b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -225,7 +225,7 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, switch (event) { case CPUFREQ_GOV_START: - if ((!cpu_online(cpu)) || (!policy->cur)) + if (!policy->cur) return -EINVAL; mutex_lock(&dbs_data->mutex); diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index c8c3d293cc57..bbeb9c0720a6 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -118,8 +118,6 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, switch (event) { case CPUFREQ_GOV_START: - if (!cpu_online(cpu)) - return -EINVAL; BUG_ON(!policy->cur); mutex_lock(&userspace_mutex); diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index aa5bd39d129e..d7a79662e24c 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -63,9 +63,6 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, pr_debug("request for verification of policy (%u - %u kHz) for cpu %u\n", policy->min, policy->max, policy->cpu); - if (!cpu_online(policy->cpu)) - return -EINVAL; - cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, policy->cpuinfo.max_freq); @@ -121,9 +118,6 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, break; } - if (!cpu_online(policy->cpu)) - return -EINVAL; - for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { unsigned int freq = table[i].frequency; if (freq == CPUFREQ_ENTRY_INVALID) From df18e504aa5db8e7263c91e153c27c6f895da3c7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 4 Feb 2013 11:38:52 +0000 Subject: [PATCH 40/55] cpufreq / stats: Get rid of CPUFREQ_STATDEVICE_ATTR Macro "CPUFREQ_STATDEVICE_ATTR" is defined local to cpufreq_stats.c file and is almost a copy of the generic version present in cpufreq.h file. Lets use the generic version instead. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 572124c6e36b..a2dee4cedf41 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -24,12 +24,6 @@ static spinlock_t cpufreq_stats_lock; -#define CPUFREQ_STATDEVICE_ATTR(_name, _mode, _show) \ -static struct freq_attr _attr_##_name = {\ - .attr = {.name = __stringify(_name), .mode = _mode, }, \ - .show = _show,\ -}; - struct cpufreq_stats { unsigned int cpu; unsigned int total_trans; @@ -136,17 +130,17 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) return PAGE_SIZE; return len; } -CPUFREQ_STATDEVICE_ATTR(trans_table, 0444, show_trans_table); +cpufreq_freq_attr_ro(trans_table); #endif -CPUFREQ_STATDEVICE_ATTR(total_trans, 0444, show_total_trans); -CPUFREQ_STATDEVICE_ATTR(time_in_state, 0444, show_time_in_state); +cpufreq_freq_attr_ro(total_trans); +cpufreq_freq_attr_ro(time_in_state); static struct attribute *default_attrs[] = { - &_attr_total_trans.attr, - &_attr_time_in_state.attr, + &total_trans.attr, + &time_in_state.attr, #ifdef CONFIG_CPU_FREQ_STAT_DETAILS - &_attr_trans_table.attr, + &trans_table.attr, #endif NULL }; From 4bd4e42819c7b5b2d608b353f4d9a7717e586479 Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Wed, 6 Feb 2013 13:34:00 +0100 Subject: [PATCH 41/55] cpufreq: ondemand: Replace down_differential tuner with adj_up_threshold In order to avoid the calculation of up_threshold - down_differential every time that the frequency must be decreased, we replace the down_differential tuner with the adj_up_threshold which keeps the difference across multiple checks. Update the adj_up_threshold only when the up_theshold is also updated. Signed-off-by: Stratos Karafotis Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.h | 2 +- drivers/cpufreq/cpufreq_ondemand.c | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 16314b65ca67..d2ac91150600 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -109,7 +109,7 @@ struct od_dbs_tuners { unsigned int sampling_rate; unsigned int sampling_down_factor; unsigned int up_threshold; - unsigned int down_differential; + unsigned int adj_up_threshold; unsigned int powersave_bias; unsigned int io_is_busy; }; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index f38b8da60128..09b27ae6576f 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -47,7 +47,8 @@ static struct cpufreq_governor cpufreq_gov_ondemand; static struct od_dbs_tuners od_tuners = { .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, - .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, + .adj_up_threshold = DEF_FREQUENCY_UP_THRESHOLD - + DEF_FREQUENCY_DOWN_DIFFERENTIAL, .ignore_nice = 0, .powersave_bias = 0, }; @@ -192,11 +193,9 @@ static void od_check_cpu(int cpu, unsigned int load_freq) * support the current CPU usage without triggering the up policy. To be * safe, we focus 10 points under the threshold. */ - if (load_freq < (od_tuners.up_threshold - od_tuners.down_differential) * - policy->cur) { + if (load_freq < od_tuners.adj_up_threshold * policy->cur) { unsigned int freq_next; - freq_next = load_freq / (od_tuners.up_threshold - - od_tuners.down_differential); + freq_next = load_freq / od_tuners.adj_up_threshold; /* No longer fully busy, reset rate_mult */ dbs_info->rate_mult = 1; @@ -359,6 +358,10 @@ static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, input < MIN_FREQUENCY_UP_THRESHOLD) { return -EINVAL; } + /* Calculate the new adj_up_threshold */ + od_tuners.adj_up_threshold += input; + od_tuners.adj_up_threshold -= od_tuners.up_threshold; + od_tuners.up_threshold = input; return count; } @@ -515,7 +518,8 @@ static int __init cpufreq_gov_dbs_init(void) if (idle_time != -1ULL) { /* Idle micro accounting is supported. Use finer thresholds */ od_tuners.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; - od_tuners.down_differential = MICRO_FREQUENCY_DOWN_DIFFERENTIAL; + od_tuners.adj_up_threshold = MICRO_FREQUENCY_UP_THRESHOLD - + MICRO_FREQUENCY_DOWN_DIFFERENTIAL; /* * In nohz/micro accounting case we set the minimum frequency * not depending on HZ, but fixed (very low). The deferred From 8e53695f7f1d005fd1fcd3b099cd1bd73683a9f5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 7 Feb 2013 12:51:27 +0530 Subject: [PATCH 42/55] cpufreq: governors: Fix WARN_ON() for multi-policy platforms On multi-policy systems there is a single instance of governor for both the policies (if same governor is chosen for both policies). With the code update from following patches: 8eeed09 cpufreq: governors: Get rid of dbs_data->enable field b394058 cpufreq: governors: Reset tunables only for cpufreq_unregister_governor() We are creating/removing sysfs directory of governor for for every call to GOV_START and STOP. This would fail for multi-policy system as there is a per-policy call to START/STOP. This patch reuses the governor->initialized variable to detect total users of governor. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 ++++-- drivers/cpufreq/cpufreq_governor.c | 32 ++++++++++++++++++------------ 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0dc9933069c5..cd7644554a62 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1552,8 +1552,10 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, policy->cpu, event); ret = policy->governor->governor(policy, event); - if (!policy->governor->initialized && (event == CPUFREQ_GOV_START)) - policy->governor->initialized = 1; + if (event == CPUFREQ_GOV_START) + policy->governor->initialized++; + else if (event == CPUFREQ_GOV_STOP) + policy->governor->initialized--; /* we keep one module reference alive for each CPU governed by this CPU */ diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index e4a306c8ff1b..5a76086ff09b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -247,11 +247,13 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, dbs_data->gov_dbs_timer); } - rc = sysfs_create_group(cpufreq_global_kobject, - dbs_data->attr_group); - if (rc) { - mutex_unlock(&dbs_data->mutex); - return rc; + if (!policy->governor->initialized) { + rc = sysfs_create_group(cpufreq_global_kobject, + dbs_data->attr_group); + if (rc) { + mutex_unlock(&dbs_data->mutex); + return rc; + } } /* @@ -262,13 +264,15 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, cs_dbs_info->down_skip = 0; cs_dbs_info->enable = 1; cs_dbs_info->requested_freq = policy->cur; - cpufreq_register_notifier(cs_ops->notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - if (!policy->governor->initialized) + if (!policy->governor->initialized) { + cpufreq_register_notifier(cs_ops->notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); + } } else { od_dbs_info->rate_mult = 1; od_dbs_info->sample_type = OD_NORMAL_SAMPLE; @@ -311,11 +315,13 @@ unlock: mutex_lock(&dbs_data->mutex); mutex_destroy(&cpu_cdbs->timer_mutex); - sysfs_remove_group(cpufreq_global_kobject, - dbs_data->attr_group); - if (dbs_data->governor == GOV_CONSERVATIVE) - cpufreq_unregister_notifier(cs_ops->notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); + if (policy->governor->initialized == 1) { + sysfs_remove_group(cpufreq_global_kobject, + dbs_data->attr_group); + if (dbs_data->governor == GOV_CONSERVATIVE) + cpufreq_unregister_notifier(cs_ops->notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } mutex_unlock(&dbs_data->mutex); break; From 65922465b5bc76b11181a7d3a6c936bb475775a3 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 7 Feb 2013 10:56:03 +0530 Subject: [PATCH 43/55] cpufreq: Remove unused HOTPLUG_CPU code Because the sibling cpu of any online cpu is identified very early in cpufreq_add_dev(), below code is never executed. And so can be removed. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index cd7644554a62..41d72c3aa2b2 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -852,7 +852,7 @@ static int cpufreq_add_policy_cpu(unsigned int cpu, unsigned int sibling, static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) { unsigned int j, cpu = dev->id; - int ret = -ENOMEM, found = 0; + int ret = -ENOMEM; struct cpufreq_policy *policy; unsigned long flags; #ifdef CONFIG_HOTPLUG_CPU @@ -900,6 +900,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) goto err_free_cpumask; policy->cpu = cpu; + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; cpumask_copy(policy->cpus, cpumask_of(cpu)); /* Initially set CPU itself as the policy_cpu */ @@ -910,20 +911,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update); - /* Set governor before ->init, so that driver could check it */ -#ifdef CONFIG_HOTPLUG_CPU - for_each_online_cpu(sibling) { - struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling); - if (cp && cp->governor && - (cpumask_test_cpu(cpu, cp->related_cpus))) { - policy->governor = cp->governor; - found = 1; - break; - } - } -#endif - if (!found) - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; /* call driver. From then on the cpufreq must be able * to accept all calls to ->verify and ->setpolicy for this CPU */ From fa1d8af47f42671fa82779582ca60948f876a73e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 7 Feb 2013 15:38:42 +0530 Subject: [PATCH 44/55] cpufreq: Create a macro for unlock_policy_rwsem{read,write} On the lines of macro: lock_policy_rwsem, we can create another macro for unlock_policy_rwsem. Lets do it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 41d72c3aa2b2..7cbf1d53804f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -70,8 +70,7 @@ static DEFINE_PER_CPU(int, cpufreq_policy_cpu); static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); #define lock_policy_rwsem(mode, cpu) \ -static int lock_policy_rwsem_##mode \ -(int cpu) \ +static int lock_policy_rwsem_##mode(int cpu) \ { \ int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); \ BUG_ON(policy_cpu == -1); \ @@ -81,23 +80,18 @@ static int lock_policy_rwsem_##mode \ } lock_policy_rwsem(read, cpu); - lock_policy_rwsem(write, cpu); -static void unlock_policy_rwsem_read(int cpu) -{ - int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); - BUG_ON(policy_cpu == -1); - up_read(&per_cpu(cpu_policy_rwsem, policy_cpu)); -} - -static void unlock_policy_rwsem_write(int cpu) -{ - int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); - BUG_ON(policy_cpu == -1); - up_write(&per_cpu(cpu_policy_rwsem, policy_cpu)); +#define unlock_policy_rwsem(mode, cpu) \ +static void unlock_policy_rwsem_##mode(int cpu) \ +{ \ + int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); \ + BUG_ON(policy_cpu == -1); \ + up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ } +unlock_policy_rwsem(read, cpu); +unlock_policy_rwsem(write, cpu); /* internal prototypes */ static int __cpufreq_governor(struct cpufreq_policy *policy, From 2eaa3e2df185997e92596ab14a2a67dde3876d2e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 7 Feb 2013 10:55:00 +0530 Subject: [PATCH 45/55] cpufreq: Fix locking issues cpufreq core uses two locks: - cpufreq_driver_lock: General lock for driver and cpufreq_cpu_data array. - cpu_policy_rwsemfix locking: per CPU reader-writer semaphore designed to cure all cpufreq/hotplug/workqueue/etc related lock issues. These locks were not used properly and are placed against their principle (present before their definition) at various places. This patch is an attempt to fix their use. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 72 +++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7cbf1d53804f..e000f3691661 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -59,8 +59,6 @@ static DEFINE_SPINLOCK(cpufreq_driver_lock); * mode before doing so. * * Additional rules: - * - All holders of the lock should check to make sure that the CPU they - * are concerned with are online after they get the lock. * - Governor routines that can be called in cpufreq hotplug path should not * take this sem as top level hotplug notifier handler takes this. * - Lock should not be held across @@ -257,6 +255,7 @@ static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) { struct cpufreq_policy *policy; + unsigned long flags; BUG_ON(irqs_disabled()); @@ -267,7 +266,10 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) pr_debug("notification %u of frequency transition to %u kHz\n", state, freqs->new); + spin_lock_irqsave(&cpufreq_driver_lock, flags); policy = per_cpu(cpufreq_cpu_data, freqs->cpu); + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + switch (state) { case CPUFREQ_PRECHANGE: @@ -808,22 +810,22 @@ static int cpufreq_add_policy_cpu(unsigned int cpu, unsigned int sibling, policy = cpufreq_cpu_get(sibling); WARN_ON(!policy); - per_cpu(cpufreq_policy_cpu, cpu) = policy->cpu; - - lock_policy_rwsem_write(cpu); - __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + lock_policy_rwsem_write(sibling); + spin_lock_irqsave(&cpufreq_driver_lock, flags); + cpumask_set_cpu(cpu, policy->cpus); + per_cpu(cpufreq_policy_cpu, cpu) = policy->cpu; per_cpu(cpufreq_cpu_data, cpu) = policy; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + unlock_policy_rwsem_write(sibling); + __cpufreq_governor(policy, CPUFREQ_GOV_START); __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); - unlock_policy_rwsem_write(cpu); - ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"); if (ret) { cpufreq_cpu_put(policy); @@ -870,11 +872,15 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) #ifdef CONFIG_HOTPLUG_CPU /* Check if this cpu was hot-unplugged earlier and has siblings */ + spin_lock_irqsave(&cpufreq_driver_lock, flags); for_each_online_cpu(sibling) { struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling); - if (cp && cpumask_test_cpu(cpu, cp->related_cpus)) + if (cp && cpumask_test_cpu(cpu, cp->related_cpus)) { + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); return cpufreq_add_policy_cpu(cpu, sibling, dev); + } } + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); #endif #endif @@ -899,8 +905,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* Initially set CPU itself as the policy_cpu */ per_cpu(cpufreq_policy_cpu, cpu) = cpu; - ret = (lock_policy_rwsem_write(cpu) < 0); - WARN_ON(ret); init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update); @@ -911,7 +915,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) ret = cpufreq_driver->init(policy); if (ret) { pr_debug("initialization failed\n"); - goto err_unlock_policy; + goto err_set_policy_cpu; } /* related cpus should atleast have policy->cpus */ @@ -942,8 +946,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) if (ret) goto err_out_unregister; - unlock_policy_rwsem_write(cpu); - kobject_uevent(&policy->kobj, KOBJ_ADD); module_put(cpufreq_driver->owner); pr_debug("initialization complete\n"); @@ -959,8 +961,8 @@ err_out_unregister: kobject_put(&policy->kobj); wait_for_completion(&policy->kobj_unregister); -err_unlock_policy: - unlock_policy_rwsem_write(cpu); +err_set_policy_cpu: + per_cpu(cpufreq_policy_cpu, cpu) = -1; free_cpumask_var(policy->related_cpus); err_free_cpumask: free_cpumask_var(policy->cpus); @@ -1008,12 +1010,14 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif pr_debug("%s: unregistering CPU %u\n", __func__, cpu); spin_lock_irqsave(&cpufreq_driver_lock, flags); + data = per_cpu(cpufreq_cpu_data, cpu); + per_cpu(cpufreq_cpu_data, cpu) = NULL; + + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); if (!data) { pr_debug("%s: No cpu_data found\n", __func__); - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - unlock_policy_rwsem_write(cpu); return -EINVAL; } @@ -1025,9 +1029,10 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif CPUFREQ_NAME_LEN); #endif - per_cpu(cpufreq_cpu_data, cpu) = NULL; + WARN_ON(lock_policy_rwsem_write(cpu)); cpus = cpumask_weight(data->cpus); cpumask_clear_cpu(cpu, data->cpus); + unlock_policy_rwsem_write(cpu); if (cpu != data->cpu) { sysfs_remove_link(&dev->kobj, "cpufreq"); @@ -1038,31 +1043,37 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif ret = kobject_move(&data->kobj, &cpu_dev->kobj); if (ret) { pr_err("%s: Failed to move kobj: %d", __func__, ret); + + WARN_ON(lock_policy_rwsem_write(cpu)); cpumask_set_cpu(cpu, data->cpus); + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + per_cpu(cpufreq_cpu_data, cpu) = data; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + unlock_policy_rwsem_write(cpu); + ret = sysfs_create_link(&cpu_dev->kobj, &data->kobj, "cpufreq"); - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - unlock_policy_rwsem_write(cpu); return -EINVAL; } + WARN_ON(lock_policy_rwsem_write(cpu)); update_policy_cpu(data, cpu_dev->id); + unlock_policy_rwsem_write(cpu); pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n", __func__, cpu_dev->id, cpu); } - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - pr_debug("%s: removing link, cpu: %d\n", __func__, cpu); cpufreq_cpu_put(data); - unlock_policy_rwsem_write(cpu); /* If cpu is last user of policy, free policy */ if (cpus == 1) { - lock_policy_rwsem_write(cpu); + lock_policy_rwsem_read(cpu); kobj = &data->kobj; cmp = &data->kobj_unregister; - unlock_policy_rwsem_write(cpu); + unlock_policy_rwsem_read(cpu); kobject_put(kobj); /* we need to make sure that the underlying kobj is actually @@ -1073,10 +1084,8 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif wait_for_completion(cmp); pr_debug("wait complete\n"); - lock_policy_rwsem_write(cpu); if (cpufreq_driver->exit) cpufreq_driver->exit(data); - unlock_policy_rwsem_write(cpu); free_cpumask_var(data->related_cpus); free_cpumask_var(data->cpus); @@ -1086,6 +1095,7 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); } + per_cpu(cpufreq_policy_cpu, cpu) = -1; return 0; } @@ -1098,9 +1108,6 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) if (cpu_is_offline(cpu)) return 0; - if (unlikely(lock_policy_rwsem_write(cpu))) - BUG(); - retval = __cpufreq_remove_dev(dev, sif); return retval; } @@ -1791,9 +1798,6 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: - if (unlikely(lock_policy_rwsem_write(cpu))) - BUG(); - __cpufreq_remove_dev(dev, NULL); break; case CPU_DOWN_FAILED: From 9e21ba8bd849251c8ba22ddf39308a5227134fdc Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Wed, 6 Feb 2013 09:02:08 -0800 Subject: [PATCH 46/55] cpufreq: Retrieve current frequency from scaling drivers with internal governors Scaling drivers that implement the cpufreq_driver.setpolicy() versus the cpufreq_driver.target() interface do not set policy->cur. Normally policy->cur is set during the call to cpufreq_driver.target() when the frequnecy request is made by the governor. If the scaling driver implements cpufreq_driver.setpolicy() and cpufreq_driver.get() interfaces use cpufreq_driver.get() to retrieve the current frequency. Signed-off-by: Dirk Brandewie Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e000f3691661..e98035dc2265 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1156,9 +1156,13 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, */ unsigned int cpufreq_quick_get(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy; unsigned int ret_freq = 0; + if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get) + return cpufreq_driver->get(cpu); + + policy = cpufreq_cpu_get(cpu); if (policy) { ret_freq = policy->cur; cpufreq_cpu_put(policy); From f6b0515b078f641d00ca531a9f0ae34f3b05ec20 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Wed, 6 Feb 2013 09:02:09 -0800 Subject: [PATCH 47/55] cpufreq: Only call cpufreq_out_of_sync() for driver that implement cpufreq_driver.target() Scaling drivers that implement cpufreq_driver.setpolicy() have internal governors that do not signal changes via cpufreq_notify_transition() so the frequncy in the policy will almost certainly be different than the current frequncy. Only call cpufreq_out_of_sync() when the underlying driver implements cpufreq_driver.target() Signed-off-by: Dirk Brandewie Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e98035dc2265..480c49c71afe 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1770,7 +1770,7 @@ int cpufreq_update_policy(unsigned int cpu) pr_debug("Driver did not initialize current freq"); data->cur = policy.cur; } else { - if (data->cur != policy.cur) + if (data->cur != policy.cur && cpufreq_driver->target) cpufreq_out_of_sync(cpu, data->cur, policy.cur); } From fa69e33f7d38e658583ff910642da7ea4515ad74 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Wed, 6 Feb 2013 09:02:11 -0800 Subject: [PATCH 48/55] cpufreq: Do not track governor name for scaling drivers with internal governors. Scaling drivers that implement internal governors do not have governor structures assocaited with them. Only track the name of the governor associated with the CPU if the driver does not implement cpufreq_driver.setpolicy() Signed-off-by: Dirk Brandewie Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 480c49c71afe..94117a7f219b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1025,8 +1025,9 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif __cpufreq_governor(data, CPUFREQ_GOV_STOP); #ifdef CONFIG_HOTPLUG_CPU - strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name, - CPUFREQ_NAME_LEN); + if (!cpufreq_driver->setpolicy) + strncpy(per_cpu(cpufreq_cpu_governor, cpu), + data->governor->name, CPUFREQ_NAME_LEN); #endif WARN_ON(lock_policy_rwsem_write(cpu)); From 633d47d653f3a717fb68293d24ab7059c480f71b Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Wed, 6 Feb 2013 09:02:12 -0800 Subject: [PATCH 49/55] cpufreq_stats: do not remove sysfs files if frequency table is not present The sysfs files for cpufreq_stats are created in cpufreq_stats_create_table() called from cpufreq_stat_notifier_policy() when a policy is added to the cpu. cpufreq_stats_create_table() will not be called if the scaling driver does not export a frequency table to cpufreq. Use the same fence on tear down. Signed-off-by: Dirk Brandewie Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index a2dee4cedf41..2fd779eb1ed1 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -179,6 +179,10 @@ static void cpufreq_stats_free_table(unsigned int cpu) static void cpufreq_stats_free_sysfs(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + + if (!cpufreq_frequency_get_table(cpu)) + return; + if (policy && !policy_is_shared(policy)) { pr_debug("%s: Free sysfs stat\n", __func__); sysfs_remove_group(&policy->kobj, &stats_attr_group); From 93f0822dff5dae2f0a2645f16300c14af41ca777 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Wed, 6 Feb 2013 09:02:13 -0800 Subject: [PATCH 50/55] cpufreq/x86: Add P-state driver for sandy bridge. Add a P-state driver for the Intel Sandy bridge processor. In cpufreq terminology this driver implements a scaling driver with an internal governor. When built into the the kernel this driver will be the preferred scaling driver for Sandy bridge processors. In addition to the interfaces provided by the cpufreq subsystem for controlling scaling drivers. The user may control the behavior of the driver via three sysfs files located in "/sys/devices/system/cpu/intel_pstate". max_perf_pct: limits the maximum P state that will be requested by the driver stated as a percentage of the avail performance. min_perf_pct: limits the minimum P state that will be requested by the driver stated as a percentage of the avail performance. no_turbo: limits the driver to selecting P states below the turbo frequency range. Signed-off-by: Dirk Brandewie Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.x86 | 18 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/intel_pstate.c | 806 +++++++++++++++++++++++++++++++++ 3 files changed, 825 insertions(+) create mode 100644 drivers/cpufreq/intel_pstate.c diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 7227cd734042..6aa7053ce2ef 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -2,6 +2,24 @@ # x86 CPU Frequency scaling drivers # +config X86_INTEL_PSTATE + tristate "Intel P state control" + depends on X86 + help + This driver provides a P state for Intel core processors. + The driver implements an internal governor and will become + the scaling driver and governor for Sandy bridge processors. + + When this driver is enabled it will become the perferred + scaling driver for Sandy bridge processors. + + Note: This driver should be built with the same settings as + the other scaling drivers configured into the system + (module/built-in) in order for the driver to register itself + as the scaling driver on the system. + + If in doubt, say N. + config X86_PCC_CPUFREQ tristate "Processor Clocking Control interface driver" depends on ACPI && ACPI_PROCESSOR diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 93610b284b35..cd3d4ffe9626 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o +obj-$(CONFIG_X86_INTEL_PSTATE) += intel_pstate.o ################################################################################## # ARM SoC drivers diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c new file mode 100644 index 000000000000..86ad4822305d --- /dev/null +++ b/drivers/cpufreq/intel_pstate.c @@ -0,0 +1,806 @@ +/* + * cpufreq_snb.c: Native P state management for Intel processors + * + * (C) Copyright 2012 Intel Corporation + * Author: Dirk Brandewie + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define SAMPLE_COUNT 3 + +#define FRAC_BITS 8 +#define int_tofp(X) ((int64_t)(X) << FRAC_BITS) +#define fp_toint(X) ((X) >> FRAC_BITS) + +static inline int32_t mul_fp(int32_t x, int32_t y) +{ + return ((int64_t)x * (int64_t)y) >> FRAC_BITS; +} + +static inline int32_t div_fp(int32_t x, int32_t y) +{ + return div_s64((int64_t)x << FRAC_BITS, (int64_t)y); +} + +struct sample { + ktime_t start_time; + ktime_t end_time; + int core_pct_busy; + int pstate_pct_busy; + u64 duration_us; + u64 idletime_us; + u64 aperf; + u64 mperf; + int freq; +}; + +struct pstate_data { + int current_pstate; + int min_pstate; + int max_pstate; + int turbo_pstate; +}; + +struct _pid { + int setpoint; + int32_t integral; + int32_t p_gain; + int32_t i_gain; + int32_t d_gain; + int deadband; + int last_err; +}; + +struct cpudata { + int cpu; + + char name[64]; + + struct timer_list timer; + + struct pstate_adjust_policy *pstate_policy; + struct pstate_data pstate; + struct _pid pid; + struct _pid idle_pid; + + int min_pstate_count; + int idle_mode; + + ktime_t prev_sample; + u64 prev_idle_time_us; + u64 prev_aperf; + u64 prev_mperf; + int sample_ptr; + struct sample samples[SAMPLE_COUNT]; +}; + +static struct cpudata **all_cpu_data; +struct pstate_adjust_policy { + int sample_rate_ms; + int deadband; + int setpoint; + int p_gain_pct; + int d_gain_pct; + int i_gain_pct; +}; + +static struct pstate_adjust_policy default_policy = { + .sample_rate_ms = 10, + .deadband = 0, + .setpoint = 109, + .p_gain_pct = 17, + .d_gain_pct = 0, + .i_gain_pct = 4, +}; + +struct perf_limits { + int no_turbo; + int max_perf_pct; + int min_perf_pct; + int32_t max_perf; + int32_t min_perf; +}; + +static struct perf_limits limits = { + .no_turbo = 0, + .max_perf_pct = 100, + .max_perf = int_tofp(1), + .min_perf_pct = 0, + .min_perf = 0, +}; + +static inline void pid_reset(struct _pid *pid, int setpoint, int busy, + int deadband, int integral) { + pid->setpoint = setpoint; + pid->deadband = deadband; + pid->integral = int_tofp(integral); + pid->last_err = setpoint - busy; +} + +static inline void pid_p_gain_set(struct _pid *pid, int percent) +{ + pid->p_gain = div_fp(int_tofp(percent), int_tofp(100)); +} + +static inline void pid_i_gain_set(struct _pid *pid, int percent) +{ + pid->i_gain = div_fp(int_tofp(percent), int_tofp(100)); +} + +static inline void pid_d_gain_set(struct _pid *pid, int percent) +{ + + pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); +} + +static signed int pid_calc(struct _pid *pid, int busy) +{ + signed int err, result; + int32_t pterm, dterm, fp_error; + int32_t integral_limit; + + err = pid->setpoint - busy; + fp_error = int_tofp(err); + + if (abs(err) <= pid->deadband) + return 0; + + pterm = mul_fp(pid->p_gain, fp_error); + + pid->integral += fp_error; + + /* limit the integral term */ + integral_limit = int_tofp(30); + if (pid->integral > integral_limit) + pid->integral = integral_limit; + if (pid->integral < -integral_limit) + pid->integral = -integral_limit; + + dterm = mul_fp(pid->d_gain, (err - pid->last_err)); + pid->last_err = err; + + result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; + + return (signed int)fp_toint(result); +} + +static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) +{ + pid_p_gain_set(&cpu->pid, cpu->pstate_policy->p_gain_pct); + pid_d_gain_set(&cpu->pid, cpu->pstate_policy->d_gain_pct); + pid_i_gain_set(&cpu->pid, cpu->pstate_policy->i_gain_pct); + + pid_reset(&cpu->pid, + cpu->pstate_policy->setpoint, + 100, + cpu->pstate_policy->deadband, + 0); +} + +static inline void intel_pstate_idle_pid_reset(struct cpudata *cpu) +{ + pid_p_gain_set(&cpu->idle_pid, cpu->pstate_policy->p_gain_pct); + pid_d_gain_set(&cpu->idle_pid, cpu->pstate_policy->d_gain_pct); + pid_i_gain_set(&cpu->idle_pid, cpu->pstate_policy->i_gain_pct); + + pid_reset(&cpu->idle_pid, + 75, + 50, + cpu->pstate_policy->deadband, + 0); +} + +static inline void intel_pstate_reset_all_pid(void) +{ + unsigned int cpu; + for_each_online_cpu(cpu) { + if (all_cpu_data[cpu]) + intel_pstate_busy_pid_reset(all_cpu_data[cpu]); + } +} + +/************************** debugfs begin ************************/ +static int pid_param_set(void *data, u64 val) +{ + *(u32 *)data = val; + intel_pstate_reset_all_pid(); + return 0; +} +static int pid_param_get(void *data, u64 *val) +{ + *val = *(u32 *)data; + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, + pid_param_set, "%llu\n"); + +struct pid_param { + char *name; + void *value; +}; + +static struct pid_param pid_files[] = { + {"sample_rate_ms", &default_policy.sample_rate_ms}, + {"d_gain_pct", &default_policy.d_gain_pct}, + {"i_gain_pct", &default_policy.i_gain_pct}, + {"deadband", &default_policy.deadband}, + {"setpoint", &default_policy.setpoint}, + {"p_gain_pct", &default_policy.p_gain_pct}, + {NULL, NULL} +}; + +static struct dentry *debugfs_parent; +static void intel_pstate_debug_expose_params(void) +{ + int i = 0; + + debugfs_parent = debugfs_create_dir("pstate_snb", NULL); + if (IS_ERR_OR_NULL(debugfs_parent)) + return; + while (pid_files[i].name) { + debugfs_create_file(pid_files[i].name, 0660, + debugfs_parent, pid_files[i].value, + &fops_pid_param); + i++; + } +} + +/************************** debugfs end ************************/ + +/************************** sysfs begin ************************/ +#define show_one(file_name, object) \ + static ssize_t show_##file_name \ + (struct kobject *kobj, struct attribute *attr, char *buf) \ + { \ + return sprintf(buf, "%u\n", limits.object); \ + } + +static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + limits.no_turbo = clamp_t(int, input, 0 , 1); + + return count; +} + +static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + limits.max_perf_pct = clamp_t(int, input, 0 , 100); + limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); + return count; +} + +static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + limits.min_perf_pct = clamp_t(int, input, 0 , 100); + limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); + + return count; +} + +show_one(no_turbo, no_turbo); +show_one(max_perf_pct, max_perf_pct); +show_one(min_perf_pct, min_perf_pct); + +define_one_global_rw(no_turbo); +define_one_global_rw(max_perf_pct); +define_one_global_rw(min_perf_pct); + +static struct attribute *intel_pstate_attributes[] = { + &no_turbo.attr, + &max_perf_pct.attr, + &min_perf_pct.attr, + NULL +}; + +static struct attribute_group intel_pstate_attr_group = { + .attrs = intel_pstate_attributes, +}; +static struct kobject *intel_pstate_kobject; + +static void intel_pstate_sysfs_expose_params(void) +{ + int rc; + + intel_pstate_kobject = kobject_create_and_add("intel_pstate", + &cpu_subsys.dev_root->kobj); + BUG_ON(!intel_pstate_kobject); + rc = sysfs_create_group(intel_pstate_kobject, + &intel_pstate_attr_group); + BUG_ON(rc); +} + +/************************** sysfs end ************************/ + +static int intel_pstate_min_pstate(void) +{ + u64 value; + rdmsrl(0xCE, value); + return (value >> 40) & 0xFF; +} + +static int intel_pstate_max_pstate(void) +{ + u64 value; + rdmsrl(0xCE, value); + return (value >> 8) & 0xFF; +} + +static int intel_pstate_turbo_pstate(void) +{ + u64 value; + int nont, ret; + rdmsrl(0x1AD, value); + nont = intel_pstate_max_pstate(); + ret = ((value) & 255); + if (ret <= nont) + ret = nont; + return ret; +} + +static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) +{ + int max_perf = cpu->pstate.turbo_pstate; + int min_perf; + if (limits.no_turbo) + max_perf = cpu->pstate.max_pstate; + + max_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); + *max = clamp_t(int, max_perf, + cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); + + min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf)); + *min = clamp_t(int, min_perf, + cpu->pstate.min_pstate, max_perf); +} + +static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) +{ + int max_perf, min_perf; + + intel_pstate_get_min_max(cpu, &min_perf, &max_perf); + + pstate = clamp_t(int, pstate, min_perf, max_perf); + + if (pstate == cpu->pstate.current_pstate) + return; + +#ifndef MODULE + trace_cpu_frequency(pstate * 100000, cpu->cpu); +#endif + cpu->pstate.current_pstate = pstate; + wrmsrl(MSR_IA32_PERF_CTL, pstate << 8); + +} + +static inline void intel_pstate_pstate_increase(struct cpudata *cpu, int steps) +{ + int target; + target = cpu->pstate.current_pstate + steps; + + intel_pstate_set_pstate(cpu, target); +} + +static inline void intel_pstate_pstate_decrease(struct cpudata *cpu, int steps) +{ + int target; + target = cpu->pstate.current_pstate - steps; + intel_pstate_set_pstate(cpu, target); +} + +static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) +{ + sprintf(cpu->name, "Intel 2nd generation core"); + + cpu->pstate.min_pstate = intel_pstate_min_pstate(); + cpu->pstate.max_pstate = intel_pstate_max_pstate(); + cpu->pstate.turbo_pstate = intel_pstate_turbo_pstate(); + + /* + * goto max pstate so we don't slow up boot if we are built-in if we are + * a module we will take care of it during normal operation + */ + intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); +} + +static inline void intel_pstate_calc_busy(struct cpudata *cpu, + struct sample *sample) +{ + u64 core_pct; + sample->pstate_pct_busy = 100 - div64_u64( + sample->idletime_us * 100, + sample->duration_us); + core_pct = div64_u64(sample->aperf * 100, sample->mperf); + sample->freq = cpu->pstate.turbo_pstate * core_pct * 1000; + + sample->core_pct_busy = sample->pstate_pct_busy * core_pct / 100; +} + +static inline void intel_pstate_sample(struct cpudata *cpu) +{ + ktime_t now; + u64 idle_time_us; + u64 aperf, mperf; + + now = ktime_get(); + idle_time_us = get_cpu_idle_time_us(cpu->cpu, NULL); + + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + /* for the first sample, don't actually record a sample, just + * set the baseline */ + if (cpu->prev_idle_time_us > 0) { + cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; + cpu->samples[cpu->sample_ptr].start_time = cpu->prev_sample; + cpu->samples[cpu->sample_ptr].end_time = now; + cpu->samples[cpu->sample_ptr].duration_us = + ktime_us_delta(now, cpu->prev_sample); + cpu->samples[cpu->sample_ptr].idletime_us = + idle_time_us - cpu->prev_idle_time_us; + + cpu->samples[cpu->sample_ptr].aperf = aperf; + cpu->samples[cpu->sample_ptr].mperf = mperf; + cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf; + cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf; + + intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]); + } + + cpu->prev_sample = now; + cpu->prev_idle_time_us = idle_time_us; + cpu->prev_aperf = aperf; + cpu->prev_mperf = mperf; +} + +static inline void intel_pstate_set_sample_time(struct cpudata *cpu) +{ + int sample_time, delay; + + sample_time = cpu->pstate_policy->sample_rate_ms; + delay = msecs_to_jiffies(sample_time); + delay -= jiffies % delay; + mod_timer_pinned(&cpu->timer, jiffies + delay); +} + +static inline void intel_pstate_idle_mode(struct cpudata *cpu) +{ + cpu->idle_mode = 1; +} + +static inline void intel_pstate_normal_mode(struct cpudata *cpu) +{ + cpu->idle_mode = 0; +} + +static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu) +{ + int32_t busy_scaled; + int32_t core_busy, turbo_pstate, current_pstate; + + core_busy = int_tofp(cpu->samples[cpu->sample_ptr].core_pct_busy); + turbo_pstate = int_tofp(cpu->pstate.turbo_pstate); + current_pstate = int_tofp(cpu->pstate.current_pstate); + busy_scaled = mul_fp(core_busy, div_fp(turbo_pstate, current_pstate)); + + return fp_toint(busy_scaled); +} + +static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) +{ + int busy_scaled; + struct _pid *pid; + signed int ctl = 0; + int steps; + + pid = &cpu->pid; + busy_scaled = intel_pstate_get_scaled_busy(cpu); + + ctl = pid_calc(pid, busy_scaled); + + steps = abs(ctl); + if (ctl < 0) + intel_pstate_pstate_increase(cpu, steps); + else + intel_pstate_pstate_decrease(cpu, steps); +} + +static inline void intel_pstate_adjust_idle_pstate(struct cpudata *cpu) +{ + int busy_scaled; + struct _pid *pid; + int ctl = 0; + int steps; + + pid = &cpu->idle_pid; + + busy_scaled = intel_pstate_get_scaled_busy(cpu); + + ctl = pid_calc(pid, 100 - busy_scaled); + + steps = abs(ctl); + if (ctl < 0) + intel_pstate_pstate_decrease(cpu, steps); + else + intel_pstate_pstate_increase(cpu, steps); + + if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) + intel_pstate_normal_mode(cpu); +} + +static void intel_pstate_timer_func(unsigned long __data) +{ + struct cpudata *cpu = (struct cpudata *) __data; + + intel_pstate_sample(cpu); + + if (!cpu->idle_mode) + intel_pstate_adjust_busy_pstate(cpu); + else + intel_pstate_adjust_idle_pstate(cpu); + +#if defined(XPERF_FIX) + if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) { + cpu->min_pstate_count++; + if (!(cpu->min_pstate_count % 5)) { + intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); + intel_pstate_idle_mode(cpu); + } + } else + cpu->min_pstate_count = 0; +#endif + intel_pstate_set_sample_time(cpu); +} + +#define ICPU(model, policy) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&policy } + +static const struct x86_cpu_id intel_pstate_cpu_ids[] = { + ICPU(0x2a, default_policy), + ICPU(0x2d, default_policy), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); + +static int intel_pstate_init_cpu(unsigned int cpunum) +{ + + const struct x86_cpu_id *id; + struct cpudata *cpu; + + id = x86_match_cpu(intel_pstate_cpu_ids); + if (!id) + return -ENODEV; + + all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL); + if (!all_cpu_data[cpunum]) + return -ENOMEM; + + cpu = all_cpu_data[cpunum]; + + intel_pstate_get_cpu_pstates(cpu); + + cpu->cpu = cpunum; + cpu->pstate_policy = + (struct pstate_adjust_policy *)id->driver_data; + init_timer_deferrable(&cpu->timer); + cpu->timer.function = intel_pstate_timer_func; + cpu->timer.data = + (unsigned long)cpu; + cpu->timer.expires = jiffies + HZ/100; + intel_pstate_busy_pid_reset(cpu); + intel_pstate_idle_pid_reset(cpu); + intel_pstate_sample(cpu); + intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); + + add_timer_on(&cpu->timer, cpunum); + + pr_info("Intel pstate controlling: cpu %d\n", cpunum); + + return 0; +} + +static unsigned int intel_pstate_get(unsigned int cpu_num) +{ + struct sample *sample; + struct cpudata *cpu; + + cpu = all_cpu_data[cpu_num]; + if (!cpu) + return 0; + sample = &cpu->samples[cpu->sample_ptr]; + return sample->freq; +} + +static int intel_pstate_set_policy(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + int min, max; + + cpu = all_cpu_data[policy->cpu]; + + intel_pstate_get_min_max(cpu, &min, &max); + + limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq; + limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100); + limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); + + limits.max_perf_pct = policy->max * 100 / policy->cpuinfo.max_freq; + limits.max_perf_pct = clamp_t(int, limits.max_perf_pct, 0 , 100); + limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); + + if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + limits.min_perf_pct = 100; + limits.min_perf = int_tofp(1); + limits.max_perf_pct = 100; + limits.max_perf = int_tofp(1); + limits.no_turbo = 0; + } + + return 0; +} + +static int intel_pstate_verify_policy(struct cpufreq_policy *policy) +{ + cpufreq_verify_within_limits(policy, + policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + + if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && + (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) + return -EINVAL; + + return 0; +} + +static int __cpuinit intel_pstate_cpu_exit(struct cpufreq_policy *policy) +{ + int cpu = policy->cpu; + + del_timer(&all_cpu_data[cpu]->timer); + kfree(all_cpu_data[cpu]); + all_cpu_data[cpu] = NULL; + return 0; +} + +static int __cpuinit intel_pstate_cpu_init(struct cpufreq_policy *policy) +{ + int rc, min_pstate, max_pstate; + struct cpudata *cpu; + + rc = intel_pstate_init_cpu(policy->cpu); + if (rc) + return rc; + + cpu = all_cpu_data[policy->cpu]; + + if (!limits.no_turbo && + limits.min_perf_pct == 100 && limits.max_perf_pct == 100) + policy->policy = CPUFREQ_POLICY_PERFORMANCE; + else + policy->policy = CPUFREQ_POLICY_POWERSAVE; + + intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate); + policy->min = min_pstate * 100000; + policy->max = max_pstate * 100000; + + /* cpuinfo and default policy values */ + policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000; + policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + cpumask_set_cpu(policy->cpu, policy->cpus); + + return 0; +} + +static struct cpufreq_driver intel_pstate_driver = { + .flags = CPUFREQ_CONST_LOOPS, + .verify = intel_pstate_verify_policy, + .setpolicy = intel_pstate_set_policy, + .get = intel_pstate_get, + .init = intel_pstate_cpu_init, + .exit = intel_pstate_cpu_exit, + .name = "intel_pstate", + .owner = THIS_MODULE, +}; + +static void intel_pstate_exit(void) +{ + int cpu; + + sysfs_remove_group(intel_pstate_kobject, + &intel_pstate_attr_group); + debugfs_remove_recursive(debugfs_parent); + + cpufreq_unregister_driver(&intel_pstate_driver); + + if (!all_cpu_data) + return; + + get_online_cpus(); + for_each_online_cpu(cpu) { + if (all_cpu_data[cpu]) { + del_timer_sync(&all_cpu_data[cpu]->timer); + kfree(all_cpu_data[cpu]); + } + } + + put_online_cpus(); + vfree(all_cpu_data); +} +module_exit(intel_pstate_exit); + +static int __init intel_pstate_init(void) +{ + int rc = 0; + const struct x86_cpu_id *id; + + id = x86_match_cpu(intel_pstate_cpu_ids); + if (!id) + return -ENODEV; + + pr_info("Intel P-state driver initializing.\n"); + + all_cpu_data = vmalloc(sizeof(void *) * num_possible_cpus()); + if (!all_cpu_data) + return -ENOMEM; + memset(all_cpu_data, 0, sizeof(void *) * num_possible_cpus()); + + rc = cpufreq_register_driver(&intel_pstate_driver); + if (rc) + goto out; + + intel_pstate_debug_expose_params(); + intel_pstate_sysfs_expose_params(); + return rc; +out: + intel_pstate_exit(); + return -ENODEV; +} +device_initcall(intel_pstate_init); + +MODULE_AUTHOR("Dirk Brandewie "); +MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); +MODULE_LICENSE("GPL"); From 2a4bd9f0db24ba14c8b38777d77add2682233c79 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 5 Feb 2013 22:52:51 +0100 Subject: [PATCH 51/55] cpufreq: kirkwood: Add a cpufreq driver for Marvell Kirkwood SoCs The Marvell Kirkwood SoCs have simple cpufreq support in hardware. The CPU can either use the a high speed cpu clock, or the slower DDR clock. Add a driver to swap between these two clock sources. Signed-off-by: Andrew Lunn Acked-by: Jason Cooper Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- .../devicetree/bindings/arm/kirkwood.txt | 27 ++ drivers/clk/mvebu/clk-gating-ctrl.c | 1 + drivers/cpufreq/Kconfig.arm | 6 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/kirkwood-cpufreq.c | 259 ++++++++++++++++++ 5 files changed, 294 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/kirkwood.txt create mode 100644 drivers/cpufreq/kirkwood-cpufreq.c diff --git a/Documentation/devicetree/bindings/arm/kirkwood.txt b/Documentation/devicetree/bindings/arm/kirkwood.txt new file mode 100644 index 000000000000..98cce9a653eb --- /dev/null +++ b/Documentation/devicetree/bindings/arm/kirkwood.txt @@ -0,0 +1,27 @@ +Marvell Kirkwood Platforms Device Tree Bindings +----------------------------------------------- + +Boards with a SoC of the Marvell Kirkwood +shall have the following property: + +Required root node property: + +compatible: must contain "marvell,kirkwood"; + +In order to support the kirkwood cpufreq driver, there must be a node +cpus/cpu@0 with three clocks, "cpu_clk", "ddrclk" and "powersave", +where the "powersave" clock is a gating clock used to switch the CPU +between the "cpu_clk" and the "ddrclk". + +Example: + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "marvell,sheeva-88SV131"; + clocks = <&core_clk 1>, <&core_clk 3>, <&gate_clk 11>; + clock-names = "cpu_clk", "ddrclk", "powersave"; + }; diff --git a/drivers/clk/mvebu/clk-gating-ctrl.c b/drivers/clk/mvebu/clk-gating-ctrl.c index 8fa5408b6c7d..ebf141d4374b 100644 --- a/drivers/clk/mvebu/clk-gating-ctrl.c +++ b/drivers/clk/mvebu/clk-gating-ctrl.c @@ -193,6 +193,7 @@ static const struct mvebu_soc_descr __initconst kirkwood_gating_descr[] = { { "runit", NULL, 7 }, { "xor0", NULL, 8 }, { "audio", NULL, 9 }, + { "powersave", "cpuclk", 11 }, { "sata0", NULL, 14 }, { "sata1", NULL, 15 }, { "xor1", NULL, 16 }, diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index c65226c4717d..7f333af1c059 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -77,6 +77,12 @@ config ARM_EXYNOS5250_CPUFREQ This adds the CPUFreq driver for Samsung EXYNOS5250 SoC. +config ARM_KIRKWOOD_CPUFREQ + def_bool ARCH_KIRKWOOD && OF + help + This adds the CPUFreq driver for Marvell Kirkwood + SoCs. + config ARM_IMX6Q_CPUFREQ tristate "Freescale i.MX6Q cpufreq support" depends on SOC_IMX6Q diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index cd3d4ffe9626..5399c45ac311 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_ARM_EXYNOS_CPUFREQ) += exynos-cpufreq.o obj-$(CONFIG_ARM_EXYNOS4210_CPUFREQ) += exynos4210-cpufreq.o obj-$(CONFIG_ARM_EXYNOS4X12_CPUFREQ) += exynos4x12-cpufreq.o obj-$(CONFIG_ARM_EXYNOS5250_CPUFREQ) += exynos5250-cpufreq.o +obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c new file mode 100644 index 000000000000..0e83e3c24f5b --- /dev/null +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -0,0 +1,259 @@ +/* + * kirkwood_freq.c: cpufreq driver for the Marvell kirkwood + * + * Copyright (C) 2013 Andrew Lunn + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CPU_SW_INT_BLK BIT(28) + +static struct priv +{ + struct clk *cpu_clk; + struct clk *ddr_clk; + struct clk *powersave_clk; + struct device *dev; + void __iomem *base; +} priv; + +#define STATE_CPU_FREQ 0x01 +#define STATE_DDR_FREQ 0x02 + +/* + * Kirkwood can swap the clock to the CPU between two clocks: + * + * - cpu clk + * - ddr clk + * + * The frequencies are set at runtime before registering this * + * table. + */ +static struct cpufreq_frequency_table kirkwood_freq_table[] = { + {STATE_CPU_FREQ, 0}, /* CPU uses cpuclk */ + {STATE_DDR_FREQ, 0}, /* CPU uses ddrclk */ + {0, CPUFREQ_TABLE_END}, +}; + +static unsigned int kirkwood_cpufreq_get_cpu_frequency(unsigned int cpu) +{ + if (__clk_is_enabled(priv.powersave_clk)) + return kirkwood_freq_table[1].frequency; + return kirkwood_freq_table[0].frequency; +} + +static void kirkwood_cpufreq_set_cpu_state(unsigned int index) +{ + struct cpufreq_freqs freqs; + unsigned int state = kirkwood_freq_table[index].index; + unsigned long reg; + + freqs.old = kirkwood_cpufreq_get_cpu_frequency(0); + freqs.new = kirkwood_freq_table[index].frequency; + freqs.cpu = 0; /* Kirkwood is UP */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + dev_dbg(priv.dev, "Attempting to set frequency to %i KHz\n", + kirkwood_freq_table[index].frequency); + dev_dbg(priv.dev, "old frequency was %i KHz\n", + kirkwood_cpufreq_get_cpu_frequency(0)); + + if (freqs.old != freqs.new) { + local_irq_disable(); + + /* Disable interrupts to the CPU */ + reg = readl_relaxed(priv.base); + reg |= CPU_SW_INT_BLK; + writel_relaxed(reg, priv.base); + + switch (state) { + case STATE_CPU_FREQ: + clk_disable(priv.powersave_clk); + break; + case STATE_DDR_FREQ: + clk_enable(priv.powersave_clk); + break; + } + + /* Wait-for-Interrupt, while the hardware changes frequency */ + cpu_do_idle(); + + /* Enable interrupts to the CPU */ + reg = readl_relaxed(priv.base); + reg &= ~CPU_SW_INT_BLK; + writel_relaxed(reg, priv.base); + + local_irq_enable(); + } + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +}; + +static int kirkwood_cpufreq_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, kirkwood_freq_table); +} + +static int kirkwood_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int index = 0; + + if (cpufreq_frequency_table_target(policy, kirkwood_freq_table, + target_freq, relation, &index)) + return -EINVAL; + + kirkwood_cpufreq_set_cpu_state(index); + + return 0; +} + +/* Module init and exit code */ +static int kirkwood_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + int result; + + /* cpuinfo and default policy values */ + policy->cpuinfo.transition_latency = 5000; /* 5uS */ + policy->cur = kirkwood_cpufreq_get_cpu_frequency(0); + + result = cpufreq_frequency_table_cpuinfo(policy, kirkwood_freq_table); + if (result) + return result; + + cpufreq_frequency_table_get_attr(kirkwood_freq_table, policy->cpu); + + return 0; +} + +static int kirkwood_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static struct freq_attr *kirkwood_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver kirkwood_cpufreq_driver = { + .get = kirkwood_cpufreq_get_cpu_frequency, + .verify = kirkwood_cpufreq_verify, + .target = kirkwood_cpufreq_target, + .init = kirkwood_cpufreq_cpu_init, + .exit = kirkwood_cpufreq_cpu_exit, + .name = "kirkwood-cpufreq", + .owner = THIS_MODULE, + .attr = kirkwood_cpufreq_attr, +}; + +static int kirkwood_cpufreq_probe(struct platform_device *pdev) +{ + struct device_node *np; + struct resource *res; + int err; + + priv.dev = &pdev->dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Cannot get memory resource\n"); + return -ENODEV; + } + priv.base = devm_request_and_ioremap(&pdev->dev, res); + if (!priv.base) { + dev_err(&pdev->dev, "Cannot ioremap\n"); + return -EADDRNOTAVAIL; + } + + np = of_find_node_by_path("/cpus/cpu@0"); + if (!np) + return -ENODEV; + + priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk"); + if (IS_ERR(priv.cpu_clk)) { + dev_err(priv.dev, "Unable to get cpuclk"); + return PTR_ERR(priv.cpu_clk); + } + + clk_prepare_enable(priv.cpu_clk); + kirkwood_freq_table[0].frequency = clk_get_rate(priv.cpu_clk) / 1000; + + priv.ddr_clk = of_clk_get_by_name(np, "ddrclk"); + if (IS_ERR(priv.ddr_clk)) { + dev_err(priv.dev, "Unable to get ddrclk"); + err = PTR_ERR(priv.ddr_clk); + goto out_cpu; + } + + clk_prepare_enable(priv.ddr_clk); + kirkwood_freq_table[1].frequency = clk_get_rate(priv.ddr_clk) / 1000; + + priv.powersave_clk = of_clk_get_by_name(np, "powersave"); + if (IS_ERR(priv.powersave_clk)) { + dev_err(priv.dev, "Unable to get powersave"); + err = PTR_ERR(priv.powersave_clk); + goto out_ddr; + } + clk_prepare(priv.powersave_clk); + + of_node_put(np); + np = NULL; + + err = cpufreq_register_driver(&kirkwood_cpufreq_driver); + if (!err) + return 0; + + dev_err(priv.dev, "Failed to register cpufreq driver"); + + clk_disable_unprepare(priv.powersave_clk); +out_ddr: + clk_disable_unprepare(priv.ddr_clk); +out_cpu: + clk_disable_unprepare(priv.cpu_clk); + of_node_put(np); + + return err; +} + +static int kirkwood_cpufreq_remove(struct platform_device *pdev) +{ + cpufreq_unregister_driver(&kirkwood_cpufreq_driver); + + clk_disable_unprepare(priv.powersave_clk); + clk_disable_unprepare(priv.ddr_clk); + clk_disable_unprepare(priv.cpu_clk); + + return 0; +} + +static struct platform_driver kirkwood_cpufreq_platform_driver = { + .probe = kirkwood_cpufreq_probe, + .remove = kirkwood_cpufreq_remove, + .driver = { + .name = "kirkwood-cpufreq", + .owner = THIS_MODULE, + }, +}; + +module_platform_driver(kirkwood_cpufreq_platform_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Andrew Lunn Date: Thu, 31 Jan 2013 07:56:04 +0530 Subject: [PATCH 52/55] cpufreq: exynos: simplify .init() for setting policy->cpus With the recent changes in cpufreq core, we just need to set mask of all possible cpus into policy->cpus. Rest would be done by core. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/exynos-cpufreq.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/cpufreq/exynos-cpufreq.c b/drivers/cpufreq/exynos-cpufreq.c index 7012ea8bf1e7..81eb84a24fa7 100644 --- a/drivers/cpufreq/exynos-cpufreq.c +++ b/drivers/cpufreq/exynos-cpufreq.c @@ -227,19 +227,7 @@ static int exynos_cpufreq_cpu_init(struct cpufreq_policy *policy) /* set the transition latency value */ policy->cpuinfo.transition_latency = 100000; - /* - * EXYNOS4 multi-core processors has 2 cores - * that the frequency cannot be set independently. - * Each cpu is bound to the same speed. - * So the affected cpu is all of the cpus. - */ - if (num_online_cpus() == 1) { - cpumask_copy(policy->related_cpus, cpu_possible_mask); - cpumask_copy(policy->cpus, cpu_online_mask); - } else { - policy->shared_type = CPUFREQ_SHARED_TYPE_ANY; - cpumask_setall(policy->cpus); - } + cpumask_setall(policy->cpus); return cpufreq_frequency_table_cpuinfo(policy, exynos_info->freq_table); } From 06eb09d17c9092c96e55a4ab886018c5c844d312 Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Fri, 8 Feb 2013 17:24:18 +0000 Subject: [PATCH 53/55] cpufreq: ondemand: Fix typos in comments Fix some typos in comments. Signed-off-by: Stratos Karafotis Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_ondemand.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 09b27ae6576f..f3eb26cd848f 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -26,7 +26,7 @@ #include "cpufreq_governor.h" -/* On-demand governor macors */ +/* On-demand governor macros */ #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) #define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_SAMPLING_DOWN_FACTOR (1) @@ -66,7 +66,7 @@ static void ondemand_powersave_bias_init_cpu(int cpu) * efficient idling at a higher frequency/voltage is. * Pavel Machek says this is not so for various generations of AMD and old * Intel systems. - * Mike Chan (androidlcom) calis this is also not true for ARM. + * Mike Chan (android.com) claims this is also not true for ARM. * Because of this, whitelist specific known (series) of CPUs by default, and * leave all others up to the user. */ @@ -74,7 +74,7 @@ static int should_io_be_busy(void) { #if defined(CONFIG_X86) /* - * For Intel, Core 2 (model 15) andl later have an efficient idle. + * For Intel, Core 2 (model 15) and later have an efficient idle. */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 6 && @@ -159,8 +159,8 @@ static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) /* * Every sampling_rate, we check, if current idle time is less than 20% - * (default), then we try to increase frequency Every sampling_rate, we look for - * a the lowest frequency which can sustain the load while keeping idle time + * (default), then we try to increase frequency. Every sampling_rate, we look + * for the lowest frequency which can sustain the load while keeping idle time * over 30%. If such a frequency exist, we try to decrease to this frequency. * * Any frequency increase takes it to the maximum frequency. Frequency reduction @@ -267,7 +267,7 @@ static ssize_t show_sampling_rate_min(struct kobject *kobj, * update_sampling_rate - update sampling rate effective immediately if needed. * @new_rate: new sampling rate * - * If new rate is smaller than the old, simply updaing + * If new rate is smaller than the old, simply updating * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the * original sampling_rate was 1 second and the requested new sampling rate is 10 * ms because the user needs immediate reaction from ondemand governor, but not From c88883cd545ea2a0baafe7308618de9cbf420bdd Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Fri, 8 Feb 2013 17:24:24 +0000 Subject: [PATCH 54/55] cpufreq: conservative: Fix typos in comments Fix a couple of typos in comments. Signed-off-by: Stratos Karafotis Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index e8bb91571672..4fd0006b1291 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -25,7 +25,7 @@ #include "cpufreq_governor.h" -/* Conservative governor macors */ +/* Conservative governor macros */ #define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_FREQUENCY_DOWN_THRESHOLD (20) #define DEF_SAMPLING_DOWN_FACTOR (1) @@ -144,7 +144,7 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, /* * we only care if our internally tracked freq moves outside the 'valid' - * ranges of freqency available to us otherwise we do not change it + * ranges of frequency available to us otherwise we do not change it */ if (dbs_info->requested_freq > policy->max || dbs_info->requested_freq < policy->min) From 191e5edf96dc4939f5db0605cc65de9f4d88d155 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Mon, 11 Feb 2013 20:33:34 +0100 Subject: [PATCH 55/55] cpufreq / intel_pstate: Fix 32 bit build Fixes 32 bit build. on i386: drivers/built-in.o: In function `intel_pstate_timer_func': intel_pstate.c:(.text+0x4ce97e): undefined reference to `__udivdi3' drivers/built-in.o: In function `intel_pstate_cpu_init': intel_pstate.c:(.cpuinit.text+0x974): undefined reference to `__udivdi3' Reported-by: Randy Dunlap Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 86ad4822305d..e87996355da0 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -456,7 +456,8 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu, core_pct = div64_u64(sample->aperf * 100, sample->mperf); sample->freq = cpu->pstate.turbo_pstate * core_pct * 1000; - sample->core_pct_busy = sample->pstate_pct_busy * core_pct / 100; + sample->core_pct_busy = div_s64((sample->pstate_pct_busy * core_pct), + 100); } static inline void intel_pstate_sample(struct cpudata *cpu)