diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt index c436096351f8..72f70b16d299 100644 --- a/Documentation/cpu-freq/cpu-drivers.txt +++ b/Documentation/cpu-freq/cpu-drivers.txt @@ -111,6 +111,12 @@ policy->governor must contain the "default policy" for For setting some of these values, the frequency table helpers might be helpful. See the section 2 for more information on them. +SMP systems normally have same clock source for a group of cpus. For these the +.init() would be called only once for the first online cpu. Here the .init() +routine must initialize policy->cpus with mask of all possible cpus (Online + +Offline) that share the clock. Then the core would copy this mask onto +policy->related_cpus and will reset policy->cpus to carry only online cpus. + 1.3 verify ------------ diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index 04f6b32993e6..ff2f28332cc4 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -190,11 +190,11 @@ scaling_max_freq show the current "policy limits" (in first set scaling_max_freq, then scaling_min_freq. -affected_cpus : List of CPUs that require software coordination - of frequency. +affected_cpus : List of Online CPUs that require software + coordination of frequency. -related_cpus : List of CPUs that need some sort of frequency - coordination, whether software or hardware. +related_cpus : List of Online + Offline CPUs that need software + coordination of frequency. scaling_driver : Hardware driver for cpufreq. diff --git a/Documentation/devicetree/bindings/arm/kirkwood.txt b/Documentation/devicetree/bindings/arm/kirkwood.txt new file mode 100644 index 000000000000..98cce9a653eb --- /dev/null +++ b/Documentation/devicetree/bindings/arm/kirkwood.txt @@ -0,0 +1,27 @@ +Marvell Kirkwood Platforms Device Tree Bindings +----------------------------------------------- + +Boards with a SoC of the Marvell Kirkwood +shall have the following property: + +Required root node property: + +compatible: must contain "marvell,kirkwood"; + +In order to support the kirkwood cpufreq driver, there must be a node +cpus/cpu@0 with three clocks, "cpu_clk", "ddrclk" and "powersave", +where the "powersave" clock is a gating clock used to switch the CPU +between the "cpu_clk" and the "ddrclk". + +Example: + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "marvell,sheeva-88SV131"; + clocks = <&core_clk 1>, <&core_clk 3>, <&gate_clk 11>; + clock-names = "cpu_clk", "ddrclk", "powersave"; + }; diff --git a/arch/arm/boot/dts/highbank.dts b/arch/arm/boot/dts/highbank.dts index 5927a8df5625..6aad34ad9517 100644 --- a/arch/arm/boot/dts/highbank.dts +++ b/arch/arm/boot/dts/highbank.dts @@ -37,6 +37,16 @@ next-level-cache = <&L2>; clocks = <&a9pll>; clock-names = "cpu"; + operating-points = < + /* kHz ignored */ + 1300000 1000000 + 1200000 1000000 + 1100000 1000000 + 800000 1000000 + 400000 1000000 + 200000 1000000 + >; + clock-latency = <100000>; }; cpu@901 { diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 49f335d301ba..ae0c7bb39ae8 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -31,7 +31,6 @@ static void __iomem *twd_base; static struct clk *twd_clk; static unsigned long twd_timer_rate; -static bool common_setup_called; static DEFINE_PER_CPU(bool, percpu_setup_called); static struct clock_event_device __percpu **twd_evt; @@ -239,25 +238,28 @@ static irqreturn_t twd_handler(int irq, void *dev_id) return IRQ_NONE; } -static struct clk *twd_get_clock(void) +static void twd_get_clock(struct device_node *np) { - struct clk *clk; int err; - clk = clk_get_sys("smp_twd", NULL); - if (IS_ERR(clk)) { - pr_err("smp_twd: clock not found: %d\n", (int)PTR_ERR(clk)); - return clk; + if (np) + twd_clk = of_clk_get(np, 0); + else + twd_clk = clk_get_sys("smp_twd", NULL); + + if (IS_ERR(twd_clk)) { + pr_err("smp_twd: clock not found %d\n", (int) PTR_ERR(twd_clk)); + return; } - err = clk_prepare_enable(clk); + err = clk_prepare_enable(twd_clk); if (err) { pr_err("smp_twd: clock failed to prepare+enable: %d\n", err); - clk_put(clk); - return ERR_PTR(err); + clk_put(twd_clk); + return; } - return clk; + twd_timer_rate = clk_get_rate(twd_clk); } /* @@ -280,26 +282,7 @@ static int __cpuinit twd_timer_setup(struct clock_event_device *clk) } per_cpu(percpu_setup_called, cpu) = true; - /* - * This stuff only need to be done once for the entire TWD cluster - * during the runtime of the system. - */ - if (!common_setup_called) { - twd_clk = twd_get_clock(); - - /* - * We use IS_ERR_OR_NULL() here, because if the clock stubs - * are active we will get a valid clk reference which is - * however NULL and will return the rate 0. In that case we - * need to calibrate the rate instead. - */ - if (!IS_ERR_OR_NULL(twd_clk)) - twd_timer_rate = clk_get_rate(twd_clk); - else - twd_calibrate_rate(); - - common_setup_called = true; - } + twd_calibrate_rate(); /* * The following is done once per CPU the first time .setup() is @@ -330,7 +313,7 @@ static struct local_timer_ops twd_lt_ops __cpuinitdata = { .stop = twd_timer_stop, }; -static int __init twd_local_timer_common_register(void) +static int __init twd_local_timer_common_register(struct device_node *np) { int err; @@ -350,6 +333,8 @@ static int __init twd_local_timer_common_register(void) if (err) goto out_irq; + twd_get_clock(np); + return 0; out_irq: @@ -373,7 +358,7 @@ int __init twd_local_timer_register(struct twd_local_timer *tlt) if (!twd_base) return -ENOMEM; - return twd_local_timer_common_register(); + return twd_local_timer_common_register(NULL); } #ifdef CONFIG_OF @@ -405,7 +390,7 @@ void __init twd_local_timer_of_register(void) goto out; } - err = twd_local_timer_common_register(); + err = twd_local_timer_common_register(np); out: WARN(err, "twd_local_timer_of_register failed (%d)\n", err); diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig index 551c97e87a78..44b12f9c1584 100644 --- a/arch/arm/mach-highbank/Kconfig +++ b/arch/arm/mach-highbank/Kconfig @@ -1,5 +1,7 @@ config ARCH_HIGHBANK bool "Calxeda ECX-1000/2000 (Highbank/Midway)" if ARCH_MULTI_V7 + select ARCH_HAS_CPUFREQ + select ARCH_HAS_OPP select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_AMBA select ARM_GIC @@ -11,5 +13,7 @@ config ARCH_HIGHBANK select GENERIC_CLOCKEVENTS select HAVE_ARM_SCU select HAVE_SMP + select MAILBOX + select PL320_MBOX select SPARSE_IRQ select USE_OF diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index a74d3c7d2e26..a36a03d3c9a0 100644 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -243,8 +243,7 @@ static int tegra_cpu_init(struct cpufreq_policy *policy) /* FIXME: what's the actual transition time? */ policy->cpuinfo.transition_latency = 300 * 1000; - policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; - cpumask_copy(policy->related_cpus, cpu_possible_mask); + cpumask_copy(policy->cpus, cpu_possible_mask); if (policy->cpu == 0) register_pm_notifier(&tegra_cpu_pm_notifier); diff --git a/drivers/Kconfig b/drivers/Kconfig index f5fb0722a63a..2b4e89ba15ad 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -134,6 +134,8 @@ source "drivers/hwspinlock/Kconfig" source "drivers/clocksource/Kconfig" +source "drivers/mailbox/Kconfig" + source "drivers/iommu/Kconfig" source "drivers/remoteproc/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 7863b9fee50b..a8d32f1094b4 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -130,6 +130,7 @@ obj-y += platform/ #common clk code obj-y += clk/ +obj-$(CONFIG_MAILBOX) += mailbox/ obj-$(CONFIG_HWSPINLOCK) += hwspinlock/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_IOMMU_SUPPORT) += iommu/ diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 50b2831e027d..32ee0fc7ea54 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -162,7 +162,7 @@ unsigned long opp_get_voltage(struct opp *opp) return v; } -EXPORT_SYMBOL(opp_get_voltage); +EXPORT_SYMBOL_GPL(opp_get_voltage); /** * opp_get_freq() - Gets the frequency corresponding to an available opp @@ -192,7 +192,7 @@ unsigned long opp_get_freq(struct opp *opp) return f; } -EXPORT_SYMBOL(opp_get_freq); +EXPORT_SYMBOL_GPL(opp_get_freq); /** * opp_get_opp_count() - Get number of opps available in the opp list @@ -225,7 +225,7 @@ int opp_get_opp_count(struct device *dev) return count; } -EXPORT_SYMBOL(opp_get_opp_count); +EXPORT_SYMBOL_GPL(opp_get_opp_count); /** * opp_find_freq_exact() - search for an exact frequency @@ -276,7 +276,7 @@ struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq, return opp; } -EXPORT_SYMBOL(opp_find_freq_exact); +EXPORT_SYMBOL_GPL(opp_find_freq_exact); /** * opp_find_freq_ceil() - Search for an rounded ceil freq @@ -323,7 +323,7 @@ struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq) return opp; } -EXPORT_SYMBOL(opp_find_freq_ceil); +EXPORT_SYMBOL_GPL(opp_find_freq_ceil); /** * opp_find_freq_floor() - Search for a rounded floor freq @@ -374,7 +374,7 @@ struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq) return opp; } -EXPORT_SYMBOL(opp_find_freq_floor); +EXPORT_SYMBOL_GPL(opp_find_freq_floor); /** * opp_add() - Add an OPP table from a table definitions @@ -568,7 +568,7 @@ int opp_enable(struct device *dev, unsigned long freq) { return opp_set_availability(dev, freq, true); } -EXPORT_SYMBOL(opp_enable); +EXPORT_SYMBOL_GPL(opp_enable); /** * opp_disable() - Disable a specific OPP @@ -590,7 +590,7 @@ int opp_disable(struct device *dev, unsigned long freq) { return opp_set_availability(dev, freq, false); } -EXPORT_SYMBOL(opp_disable); +EXPORT_SYMBOL_GPL(opp_disable); #ifdef CONFIG_CPU_FREQ /** @@ -661,6 +661,7 @@ int opp_init_cpufreq_table(struct device *dev, return 0; } +EXPORT_SYMBOL_GPL(opp_init_cpufreq_table); /** * opp_free_cpufreq_table() - free the cpufreq table @@ -678,6 +679,7 @@ void opp_free_cpufreq_table(struct device *dev, kfree(*table); *table = NULL; } +EXPORT_SYMBOL_GPL(opp_free_cpufreq_table); #endif /* CONFIG_CPU_FREQ */ /** @@ -738,4 +740,5 @@ int of_init_opp_table(struct device *dev) return 0; } +EXPORT_SYMBOL_GPL(of_init_opp_table); #endif diff --git a/drivers/clk/clk-highbank.c b/drivers/clk/clk-highbank.c index 52fecadf004a..3a0b723da2bc 100644 --- a/drivers/clk/clk-highbank.c +++ b/drivers/clk/clk-highbank.c @@ -182,8 +182,10 @@ static int clk_pll_set_rate(struct clk_hw *hwclk, unsigned long rate, reg |= HB_PLL_EXT_ENA; reg &= ~HB_PLL_EXT_BYPASS; } else { + writel(reg | HB_PLL_EXT_BYPASS, hbclk->reg); reg &= ~HB_PLL_DIVQ_MASK; reg |= divq << HB_PLL_DIVQ_SHIFT; + writel(reg | HB_PLL_EXT_BYPASS, hbclk->reg); } writel(reg, hbclk->reg); diff --git a/drivers/clk/mvebu/clk-gating-ctrl.c b/drivers/clk/mvebu/clk-gating-ctrl.c index 8fa5408b6c7d..ebf141d4374b 100644 --- a/drivers/clk/mvebu/clk-gating-ctrl.c +++ b/drivers/clk/mvebu/clk-gating-ctrl.c @@ -193,6 +193,7 @@ static const struct mvebu_soc_descr __initconst kirkwood_gating_descr[] = { { "runit", NULL, 7 }, { "xor0", NULL, 8 }, { "audio", NULL, 9 }, + { "powersave", "cpuclk", 11 }, { "sata0", NULL, 14 }, { "sata1", NULL, 15 }, { "xor1", NULL, 16 }, diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index e0a899f25e37..cbcb21e32771 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -185,7 +185,7 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. config GENERIC_CPUFREQ_CPU0 - bool "Generic CPU0 cpufreq driver" + tristate "Generic CPU0 cpufreq driver" depends on HAVE_CLK && REGULATOR && PM_OPP && OF select CPU_FREQ_TABLE help diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index a0b3661d90b0..7f333af1c059 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -77,9 +77,39 @@ config ARM_EXYNOS5250_CPUFREQ This adds the CPUFreq driver for Samsung EXYNOS5250 SoC. +config ARM_KIRKWOOD_CPUFREQ + def_bool ARCH_KIRKWOOD && OF + help + This adds the CPUFreq driver for Marvell Kirkwood + SoCs. + +config ARM_IMX6Q_CPUFREQ + tristate "Freescale i.MX6Q cpufreq support" + depends on SOC_IMX6Q + depends on REGULATOR_ANATOP + help + This adds cpufreq driver support for Freescale i.MX6Q SOC. + + If in doubt, say N. + config ARM_SPEAR_CPUFREQ bool "SPEAr CPUFreq support" depends on PLAT_SPEAR default y help This adds the CPUFreq driver support for SPEAr SOCs. + +config ARM_HIGHBANK_CPUFREQ + tristate "Calxeda Highbank-based" + depends on ARCH_HIGHBANK + select CPU_FREQ_TABLE + select GENERIC_CPUFREQ_CPU0 + select PM_OPP + select REGULATOR + + default m + help + This adds the CPUFreq driver for Calxeda Highbank SoC + based boards. + + If in doubt, say N. diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 7227cd734042..6aa7053ce2ef 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -2,6 +2,24 @@ # x86 CPU Frequency scaling drivers # +config X86_INTEL_PSTATE + tristate "Intel P state control" + depends on X86 + help + This driver provides a P state for Intel core processors. + The driver implements an internal governor and will become + the scaling driver and governor for Sandy bridge processors. + + When this driver is enabled it will become the perferred + scaling driver for Sandy bridge processors. + + Note: This driver should be built with the same settings as + the other scaling drivers configured into the system + (module/built-in) in order for the driver to register itself + as the scaling driver on the system. + + If in doubt, say N. + config X86_PCC_CPUFREQ tristate "Processor Clocking Control interface driver" depends on ACPI && ACPI_PROCESSOR diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index fadc4d496e2f..5399c45ac311 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -19,11 +19,12 @@ obj-$(CONFIG_GENERIC_CPUFREQ_CPU0) += cpufreq-cpu0.o ################################################################################## # x86 drivers. # Link order matters. K8 is preferred to ACPI because of firmware bugs in early -# K8 systems. ACPI is preferred to all other hardware-specific drivers. +# K8 systems. This is still the case but acpi-cpufreq errors out so that +# powernow-k8 can load then. ACPI is preferred to all other hardware-specific drivers. # speedstep-* is preferred over p4-clockmod. -obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o +obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o @@ -39,6 +40,7 @@ obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o +obj-$(CONFIG_X86_INTEL_PSTATE) += intel_pstate.o ################################################################################## # ARM SoC drivers @@ -50,8 +52,11 @@ obj-$(CONFIG_ARM_EXYNOS_CPUFREQ) += exynos-cpufreq.o obj-$(CONFIG_ARM_EXYNOS4210_CPUFREQ) += exynos4210-cpufreq.o obj-$(CONFIG_ARM_EXYNOS4X12_CPUFREQ) += exynos4x12-cpufreq.o obj-$(CONFIG_ARM_EXYNOS5250_CPUFREQ) += exynos5250-cpufreq.o -obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o +obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o +obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o +obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o +obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o ################################################################################## # PowerPC platform drivers diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 7b0d49d78c61..937bc286591f 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -734,7 +734,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) #ifdef CONFIG_SMP dmi_check_system(sw_any_bug_dmi_table); - if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) { + if (bios_with_sw_any_bug && !policy_is_shared(policy)) { policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; cpumask_copy(policy->cpus, cpu_core_mask(cpu)); } @@ -762,6 +762,12 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) switch (perf->control_register.space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 == 0xf) { + pr_debug("AMD K8 systems must use native drivers.\n"); + result = -ENODEV; + goto err_unreg; + } pr_debug("SYSTEM IO addr space\n"); data->cpu_feature = SYSTEM_IO_CAPABLE; break; diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c index debc5a7c8db6..4e5b7fb8927c 100644 --- a/drivers/cpufreq/cpufreq-cpu0.c +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -12,12 +12,12 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include #include #include #include +#include #include #include @@ -146,7 +146,6 @@ static int cpu0_cpufreq_init(struct cpufreq_policy *policy) * share the clock and voltage and clock. Use cpufreq affected_cpus * interface to have all CPUs scaled together. */ - policy->shared_type = CPUFREQ_SHARED_TYPE_ANY; cpumask_setall(policy->cpus); cpufreq_frequency_table_get_attr(freq_table, policy->cpu); @@ -177,34 +176,32 @@ static struct cpufreq_driver cpu0_cpufreq_driver = { .attr = cpu0_cpufreq_attr, }; -static int cpu0_cpufreq_driver_init(void) +static int cpu0_cpufreq_probe(struct platform_device *pdev) { struct device_node *np; int ret; - np = of_find_node_by_path("/cpus/cpu@0"); + for_each_child_of_node(of_find_node_by_path("/cpus"), np) { + if (of_get_property(np, "operating-points", NULL)) + break; + } + if (!np) { pr_err("failed to find cpu0 node\n"); return -ENOENT; } - cpu_dev = get_cpu_device(0); - if (!cpu_dev) { - pr_err("failed to get cpu0 device\n"); - ret = -ENODEV; - goto out_put_node; - } - + cpu_dev = &pdev->dev; cpu_dev->of_node = np; - cpu_clk = clk_get(cpu_dev, NULL); + cpu_clk = devm_clk_get(cpu_dev, NULL); if (IS_ERR(cpu_clk)) { ret = PTR_ERR(cpu_clk); pr_err("failed to get cpu0 clock: %d\n", ret); goto out_put_node; } - cpu_reg = regulator_get(cpu_dev, "cpu0"); + cpu_reg = devm_regulator_get(cpu_dev, "cpu0"); if (IS_ERR(cpu_reg)) { pr_warn("failed to get cpu0 regulator\n"); cpu_reg = NULL; @@ -267,7 +264,24 @@ out_put_node: of_node_put(np); return ret; } -late_initcall(cpu0_cpufreq_driver_init); + +static int cpu0_cpufreq_remove(struct platform_device *pdev) +{ + cpufreq_unregister_driver(&cpu0_cpufreq_driver); + opp_free_cpufreq_table(cpu_dev, &freq_table); + + return 0; +} + +static struct platform_driver cpu0_cpufreq_platdrv = { + .driver = { + .name = "cpufreq-cpu0", + .owner = THIS_MODULE, + }, + .probe = cpu0_cpufreq_probe, + .remove = cpu0_cpufreq_remove, +}; +module_platform_driver(cpu0_cpufreq_platdrv); MODULE_AUTHOR("Shawn Guo "); MODULE_DESCRIPTION("Generic CPU0 cpufreq driver"); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 99faadf454ec..b02824d092e7 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -59,8 +59,6 @@ static DEFINE_SPINLOCK(cpufreq_driver_lock); * mode before doing so. * * Additional rules: - * - All holders of the lock should check to make sure that the CPU they - * are concerned with are online after they get the lock. * - Governor routines that can be called in cpufreq hotplug path should not * take this sem as top level hotplug notifier handler takes this. * - Lock should not be held across @@ -70,38 +68,28 @@ static DEFINE_PER_CPU(int, cpufreq_policy_cpu); static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); #define lock_policy_rwsem(mode, cpu) \ -static int lock_policy_rwsem_##mode \ -(int cpu) \ +static int lock_policy_rwsem_##mode(int cpu) \ { \ int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); \ BUG_ON(policy_cpu == -1); \ down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ - if (unlikely(!cpu_online(cpu))) { \ - up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ - return -1; \ - } \ \ return 0; \ } lock_policy_rwsem(read, cpu); - lock_policy_rwsem(write, cpu); -static void unlock_policy_rwsem_read(int cpu) -{ - int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); - BUG_ON(policy_cpu == -1); - up_read(&per_cpu(cpu_policy_rwsem, policy_cpu)); -} - -static void unlock_policy_rwsem_write(int cpu) -{ - int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); - BUG_ON(policy_cpu == -1); - up_write(&per_cpu(cpu_policy_rwsem, policy_cpu)); +#define unlock_policy_rwsem(mode, cpu) \ +static void unlock_policy_rwsem_##mode(int cpu) \ +{ \ + int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); \ + BUG_ON(policy_cpu == -1); \ + up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ } +unlock_policy_rwsem(read, cpu); +unlock_policy_rwsem(write, cpu); /* internal prototypes */ static int __cpufreq_governor(struct cpufreq_policy *policy, @@ -180,6 +168,9 @@ err_out: struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) { + if (cpufreq_disabled()) + return NULL; + return __cpufreq_cpu_get(cpu, false); } EXPORT_SYMBOL_GPL(cpufreq_cpu_get); @@ -198,6 +189,9 @@ static void __cpufreq_cpu_put(struct cpufreq_policy *data, bool sysfs) void cpufreq_cpu_put(struct cpufreq_policy *data) { + if (cpufreq_disabled()) + return; + __cpufreq_cpu_put(data, false); } EXPORT_SYMBOL_GPL(cpufreq_cpu_put); @@ -261,14 +255,21 @@ static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) { struct cpufreq_policy *policy; + unsigned long flags; BUG_ON(irqs_disabled()); + if (cpufreq_disabled()) + return; + freqs->flags = cpufreq_driver->flags; pr_debug("notification %u of frequency transition to %u kHz\n", state, freqs->new); + spin_lock_irqsave(&cpufreq_driver_lock, flags); policy = per_cpu(cpufreq_cpu_data, freqs->cpu); + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + switch (state) { case CPUFREQ_PRECHANGE: @@ -542,8 +543,6 @@ static ssize_t show_cpus(const struct cpumask *mask, char *buf) */ static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf) { - if (cpumask_empty(policy->related_cpus)) - return show_cpus(policy->cpus, buf); return show_cpus(policy->related_cpus, buf); } @@ -699,87 +698,6 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; -/* - * Returns: - * Negative: Failure - * 0: Success - * Positive: When we have a managed CPU and the sysfs got symlinked - */ -static int cpufreq_add_dev_policy(unsigned int cpu, - struct cpufreq_policy *policy, - struct device *dev) -{ - int ret = 0; -#ifdef CONFIG_SMP - unsigned long flags; - unsigned int j; -#ifdef CONFIG_HOTPLUG_CPU - struct cpufreq_governor *gov; - - gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu)); - if (gov) { - policy->governor = gov; - pr_debug("Restoring governor %s for cpu %d\n", - policy->governor->name, cpu); - } -#endif - - for_each_cpu(j, policy->cpus) { - struct cpufreq_policy *managed_policy; - - if (cpu == j) - continue; - - /* Check for existing affected CPUs. - * They may not be aware of it due to CPU Hotplug. - * cpufreq_cpu_put is called when the device is removed - * in __cpufreq_remove_dev() - */ - managed_policy = cpufreq_cpu_get(j); - if (unlikely(managed_policy)) { - - /* Set proper policy_cpu */ - unlock_policy_rwsem_write(cpu); - per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu; - - if (lock_policy_rwsem_write(cpu) < 0) { - /* Should not go through policy unlock path */ - if (cpufreq_driver->exit) - cpufreq_driver->exit(policy); - cpufreq_cpu_put(managed_policy); - return -EBUSY; - } - - spin_lock_irqsave(&cpufreq_driver_lock, flags); - cpumask_copy(managed_policy->cpus, policy->cpus); - per_cpu(cpufreq_cpu_data, cpu) = managed_policy; - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - - pr_debug("CPU already managed, adding link\n"); - ret = sysfs_create_link(&dev->kobj, - &managed_policy->kobj, - "cpufreq"); - if (ret) - cpufreq_cpu_put(managed_policy); - /* - * Success. We only needed to be added to the mask. - * Call driver->exit() because only the cpu parent of - * the kobj needed to call init(). - */ - if (cpufreq_driver->exit) - cpufreq_driver->exit(policy); - - if (!ret) - return 1; - else - return ret; - } - } -#endif - return ret; -} - - /* symlink affected CPUs */ static int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy) @@ -793,8 +711,6 @@ static int cpufreq_add_dev_symlink(unsigned int cpu, if (j == cpu) continue; - if (!cpu_online(j)) - continue; pr_debug("CPU %u already managed, adding link\n", j); managed_policy = cpufreq_cpu_get(cpu); @@ -851,8 +767,6 @@ static int cpufreq_add_dev_interface(unsigned int cpu, spin_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) { - if (!cpu_online(j)) - continue; per_cpu(cpufreq_cpu_data, j) = policy; per_cpu(cpufreq_policy_cpu, j) = policy->cpu; } @@ -884,6 +798,42 @@ err_out_kobj_put: return ret; } +#ifdef CONFIG_HOTPLUG_CPU +static int cpufreq_add_policy_cpu(unsigned int cpu, unsigned int sibling, + struct device *dev) +{ + struct cpufreq_policy *policy; + int ret = 0; + unsigned long flags; + + policy = cpufreq_cpu_get(sibling); + WARN_ON(!policy); + + __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + + lock_policy_rwsem_write(sibling); + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + + cpumask_set_cpu(cpu, policy->cpus); + per_cpu(cpufreq_policy_cpu, cpu) = policy->cpu; + per_cpu(cpufreq_cpu_data, cpu) = policy; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + unlock_policy_rwsem_write(sibling); + + __cpufreq_governor(policy, CPUFREQ_GOV_START); + __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + + ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"); + if (ret) { + cpufreq_cpu_put(policy); + return ret; + } + + return 0; +} +#endif /** * cpufreq_add_dev - add a CPU device @@ -896,12 +846,12 @@ err_out_kobj_put: */ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) { - unsigned int cpu = dev->id; - int ret = 0, found = 0; + unsigned int j, cpu = dev->id; + int ret = -ENOMEM; struct cpufreq_policy *policy; unsigned long flags; - unsigned int j; #ifdef CONFIG_HOTPLUG_CPU + struct cpufreq_governor *gov; int sibling; #endif @@ -918,6 +868,19 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) cpufreq_cpu_put(policy); return 0; } + +#ifdef CONFIG_HOTPLUG_CPU + /* Check if this cpu was hot-unplugged earlier and has siblings */ + spin_lock_irqsave(&cpufreq_driver_lock, flags); + for_each_online_cpu(sibling) { + struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling); + if (cp && cpumask_test_cpu(cpu, cp->related_cpus)) { + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + return cpufreq_add_policy_cpu(cpu, sibling, dev); + } + } + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); +#endif #endif if (!try_module_get(cpufreq_driver->owner)) { @@ -925,7 +888,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) goto module_out; } - ret = -ENOMEM; policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL); if (!policy) goto nomem_out; @@ -937,66 +899,58 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) goto err_free_cpumask; policy->cpu = cpu; + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; cpumask_copy(policy->cpus, cpumask_of(cpu)); /* Initially set CPU itself as the policy_cpu */ per_cpu(cpufreq_policy_cpu, cpu) = cpu; - ret = (lock_policy_rwsem_write(cpu) < 0); - WARN_ON(ret); init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update); - /* Set governor before ->init, so that driver could check it */ -#ifdef CONFIG_HOTPLUG_CPU - for_each_online_cpu(sibling) { - struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling); - if (cp && cp->governor && - (cpumask_test_cpu(cpu, cp->related_cpus))) { - policy->governor = cp->governor; - found = 1; - break; - } - } -#endif - if (!found) - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; /* call driver. From then on the cpufreq must be able * to accept all calls to ->verify and ->setpolicy for this CPU */ ret = cpufreq_driver->init(policy); if (ret) { pr_debug("initialization failed\n"); - goto err_unlock_policy; + goto err_set_policy_cpu; } + + /* related cpus should atleast have policy->cpus */ + cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); + + /* + * affected cpus must always be the one, which are online. We aren't + * managing offline cpus here. + */ + cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); + policy->user_policy.min = policy->min; policy->user_policy.max = policy->max; blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_START, policy); - ret = cpufreq_add_dev_policy(cpu, policy, dev); - if (ret) { - if (ret > 0) - /* This is a managed cpu, symlink created, - exit with 0 */ - ret = 0; - goto err_unlock_policy; +#ifdef CONFIG_HOTPLUG_CPU + gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu)); + if (gov) { + policy->governor = gov; + pr_debug("Restoring governor %s for cpu %d\n", + policy->governor->name, cpu); } +#endif ret = cpufreq_add_dev_interface(cpu, policy, dev); if (ret) goto err_out_unregister; - unlock_policy_rwsem_write(cpu); - kobject_uevent(&policy->kobj, KOBJ_ADD); module_put(cpufreq_driver->owner); pr_debug("initialization complete\n"); return 0; - err_out_unregister: spin_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) @@ -1006,8 +960,8 @@ err_out_unregister: kobject_put(&policy->kobj); wait_for_completion(&policy->kobj_unregister); -err_unlock_policy: - unlock_policy_rwsem_write(cpu); +err_set_policy_cpu: + per_cpu(cpufreq_policy_cpu, cpu) = -1; free_cpumask_var(policy->related_cpus); err_free_cpumask: free_cpumask_var(policy->cpus); @@ -1019,6 +973,22 @@ module_out: return ret; } +static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) +{ + int j; + + policy->last_cpu = policy->cpu; + policy->cpu = cpu; + + for_each_cpu(j, policy->cpus) + per_cpu(cpufreq_policy_cpu, j) = cpu; + +#ifdef CONFIG_CPU_FREQ_TABLE + cpufreq_frequency_table_update_policy_cpu(policy); +#endif + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_UPDATE_POLICY_CPU, policy); +} /** * __cpufreq_remove_dev - remove a CPU device @@ -1029,129 +999,103 @@ module_out: */ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) { - unsigned int cpu = dev->id; + unsigned int cpu = dev->id, ret, cpus; unsigned long flags; struct cpufreq_policy *data; struct kobject *kobj; struct completion *cmp; -#ifdef CONFIG_SMP struct device *cpu_dev; - unsigned int j; -#endif - pr_debug("unregistering CPU %u\n", cpu); + pr_debug("%s: unregistering CPU %u\n", __func__, cpu); spin_lock_irqsave(&cpufreq_driver_lock, flags); - data = per_cpu(cpufreq_cpu_data, cpu); - if (!data) { - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - unlock_policy_rwsem_write(cpu); - return -EINVAL; - } + data = per_cpu(cpufreq_cpu_data, cpu); per_cpu(cpufreq_cpu_data, cpu) = NULL; - -#ifdef CONFIG_SMP - /* if this isn't the CPU which is the parent of the kobj, we - * only need to unlink, put and exit - */ - if (unlikely(cpu != data->cpu)) { - pr_debug("removing link\n"); - cpumask_clear_cpu(cpu, data->cpus); - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - kobj = &dev->kobj; - cpufreq_cpu_put(data); - unlock_policy_rwsem_write(cpu); - sysfs_remove_link(kobj, "cpufreq"); - return 0; - } -#endif - -#ifdef CONFIG_SMP - -#ifdef CONFIG_HOTPLUG_CPU - strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name, - CPUFREQ_NAME_LEN); -#endif - - /* if we have other CPUs still registered, we need to unlink them, - * or else wait_for_completion below will lock up. Clean the - * per_cpu(cpufreq_cpu_data) while holding the lock, and remove - * the sysfs links afterwards. - */ - if (unlikely(cpumask_weight(data->cpus) > 1)) { - for_each_cpu(j, data->cpus) { - if (j == cpu) - continue; - per_cpu(cpufreq_cpu_data, j) = NULL; - } - } - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - if (unlikely(cpumask_weight(data->cpus) > 1)) { - for_each_cpu(j, data->cpus) { - if (j == cpu) - continue; - pr_debug("removing link for cpu %u\n", j); -#ifdef CONFIG_HOTPLUG_CPU - strncpy(per_cpu(cpufreq_cpu_governor, j), - data->governor->name, CPUFREQ_NAME_LEN); -#endif - cpu_dev = get_cpu_device(j); - kobj = &cpu_dev->kobj; - unlock_policy_rwsem_write(cpu); - sysfs_remove_link(kobj, "cpufreq"); - lock_policy_rwsem_write(cpu); - cpufreq_cpu_put(data); - } + if (!data) { + pr_debug("%s: No cpu_data found\n", __func__); + return -EINVAL; } -#else - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); -#endif if (cpufreq_driver->target) __cpufreq_governor(data, CPUFREQ_GOV_STOP); - kobj = &data->kobj; - cmp = &data->kobj_unregister; - unlock_policy_rwsem_write(cpu); - kobject_put(kobj); - - /* we need to make sure that the underlying kobj is actually - * not referenced anymore by anybody before we proceed with - * unloading. - */ - pr_debug("waiting for dropping of refcount\n"); - wait_for_completion(cmp); - pr_debug("wait complete\n"); - - lock_policy_rwsem_write(cpu); - if (cpufreq_driver->exit) - cpufreq_driver->exit(data); - unlock_policy_rwsem_write(cpu); - #ifdef CONFIG_HOTPLUG_CPU - /* when the CPU which is the parent of the kobj is hotplugged - * offline, check for siblings, and create cpufreq sysfs interface - * and symlinks - */ - if (unlikely(cpumask_weight(data->cpus) > 1)) { - /* first sibling now owns the new sysfs dir */ - cpumask_clear_cpu(cpu, data->cpus); - cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL); - - /* finally remove our own symlink */ - lock_policy_rwsem_write(cpu); - __cpufreq_remove_dev(dev, sif); - } + if (!cpufreq_driver->setpolicy) + strncpy(per_cpu(cpufreq_cpu_governor, cpu), + data->governor->name, CPUFREQ_NAME_LEN); #endif - free_cpumask_var(data->related_cpus); - free_cpumask_var(data->cpus); - kfree(data); + WARN_ON(lock_policy_rwsem_write(cpu)); + cpus = cpumask_weight(data->cpus); + cpumask_clear_cpu(cpu, data->cpus); + unlock_policy_rwsem_write(cpu); + if (cpu != data->cpu) { + sysfs_remove_link(&dev->kobj, "cpufreq"); + } else if (cpus > 1) { + /* first sibling now owns the new sysfs dir */ + cpu_dev = get_cpu_device(cpumask_first(data->cpus)); + sysfs_remove_link(&cpu_dev->kobj, "cpufreq"); + ret = kobject_move(&data->kobj, &cpu_dev->kobj); + if (ret) { + pr_err("%s: Failed to move kobj: %d", __func__, ret); + + WARN_ON(lock_policy_rwsem_write(cpu)); + cpumask_set_cpu(cpu, data->cpus); + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + per_cpu(cpufreq_cpu_data, cpu) = data; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + unlock_policy_rwsem_write(cpu); + + ret = sysfs_create_link(&cpu_dev->kobj, &data->kobj, + "cpufreq"); + return -EINVAL; + } + + WARN_ON(lock_policy_rwsem_write(cpu)); + update_policy_cpu(data, cpu_dev->id); + unlock_policy_rwsem_write(cpu); + pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n", + __func__, cpu_dev->id, cpu); + } + + pr_debug("%s: removing link, cpu: %d\n", __func__, cpu); + cpufreq_cpu_put(data); + + /* If cpu is last user of policy, free policy */ + if (cpus == 1) { + lock_policy_rwsem_read(cpu); + kobj = &data->kobj; + cmp = &data->kobj_unregister; + unlock_policy_rwsem_read(cpu); + kobject_put(kobj); + + /* we need to make sure that the underlying kobj is actually + * not referenced anymore by anybody before we proceed with + * unloading. + */ + pr_debug("waiting for dropping of refcount\n"); + wait_for_completion(cmp); + pr_debug("wait complete\n"); + + if (cpufreq_driver->exit) + cpufreq_driver->exit(data); + + free_cpumask_var(data->related_cpus); + free_cpumask_var(data->cpus); + kfree(data); + } else if (cpufreq_driver->target) { + __cpufreq_governor(data, CPUFREQ_GOV_START); + __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); + } + + per_cpu(cpufreq_policy_cpu, cpu) = -1; return 0; } @@ -1164,9 +1108,6 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) if (cpu_is_offline(cpu)) return 0; - if (unlikely(lock_policy_rwsem_write(cpu))) - BUG(); - retval = __cpufreq_remove_dev(dev, sif); return retval; } @@ -1215,9 +1156,13 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, */ unsigned int cpufreq_quick_get(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy; unsigned int ret_freq = 0; + if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get) + return cpufreq_driver->get(cpu); + + policy = cpufreq_cpu_get(cpu); if (policy) { ret_freq = policy->cur; cpufreq_cpu_put(policy); @@ -1385,6 +1330,20 @@ static struct syscore_ops cpufreq_syscore_ops = { .resume = cpufreq_bp_resume, }; +/** + * cpufreq_get_current_driver - return current driver's name + * + * Return the name string of the currently loaded cpufreq driver + * or NULL, if none. + */ +const char *cpufreq_get_current_driver(void) +{ + if (cpufreq_driver) + return cpufreq_driver->name; + + return NULL; +} +EXPORT_SYMBOL_GPL(cpufreq_get_current_driver); /********************************************************************* * NOTIFIER LISTS INTERFACE * @@ -1407,6 +1366,9 @@ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list) { int ret; + if (cpufreq_disabled()) + return -EINVAL; + WARN_ON(!init_cpufreq_transition_notifier_list_called); switch (list) { @@ -1441,6 +1403,9 @@ int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list) { int ret; + if (cpufreq_disabled()) + return -EINVAL; + switch (list) { case CPUFREQ_TRANSITION_NOTIFIER: ret = srcu_notifier_chain_unregister( @@ -1486,7 +1451,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (target_freq == policy->cur) return 0; - if (cpu_online(policy->cpu) && cpufreq_driver->target) + if (cpufreq_driver->target) retval = cpufreq_driver->target(policy, target_freq, relation); return retval; @@ -1521,7 +1486,10 @@ int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu) { int ret = 0; - if (!(cpu_online(cpu) && cpufreq_driver->getavg)) + if (cpufreq_disabled()) + return ret; + + if (!cpufreq_driver->getavg) return 0; policy = cpufreq_cpu_get(policy->cpu); @@ -1576,6 +1544,11 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, policy->cpu, event); ret = policy->governor->governor(policy, event); + if (event == CPUFREQ_GOV_START) + policy->governor->initialized++; + else if (event == CPUFREQ_GOV_STOP) + policy->governor->initialized--; + /* we keep one module reference alive for each CPU governed by this CPU */ if ((event != CPUFREQ_GOV_START) || ret) @@ -1599,6 +1572,7 @@ int cpufreq_register_governor(struct cpufreq_governor *governor) mutex_lock(&cpufreq_governor_mutex); + governor->initialized = 0; err = -EBUSY; if (__find_governor(governor->name) == NULL) { err = 0; @@ -1796,7 +1770,7 @@ int cpufreq_update_policy(unsigned int cpu) pr_debug("Driver did not initialize current freq"); data->cur = policy.cur; } else { - if (data->cur != policy.cur) + if (data->cur != policy.cur && cpufreq_driver->target) cpufreq_out_of_sync(cpu, data->cur, policy.cur); } @@ -1828,9 +1802,6 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: - if (unlikely(lock_policy_rwsem_write(cpu))) - BUG(); - __cpufreq_remove_dev(dev, NULL); break; case CPU_DOWN_FAILED: diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 64ef737e7e72..4fd0006b1291 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -25,7 +25,7 @@ #include "cpufreq_governor.h" -/* Conservative governor macors */ +/* Conservative governor macros */ #define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_FREQUENCY_DOWN_THRESHOLD (20) #define DEF_SAMPLING_DOWN_FACTOR (1) @@ -113,17 +113,20 @@ static void cs_check_cpu(int cpu, unsigned int load) static void cs_dbs_timer(struct work_struct *work) { + struct delayed_work *dw = to_delayed_work(work); struct cs_cpu_dbs_info_s *dbs_info = container_of(work, struct cs_cpu_dbs_info_s, cdbs.work.work); - unsigned int cpu = dbs_info->cdbs.cpu; + unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; + struct cs_cpu_dbs_info_s *core_dbs_info = &per_cpu(cs_cpu_dbs_info, + cpu); int delay = delay_for_sampling_rate(cs_tuners.sampling_rate); - mutex_lock(&dbs_info->cdbs.timer_mutex); + mutex_lock(&core_dbs_info->cdbs.timer_mutex); + if (need_load_eval(&core_dbs_info->cdbs, cs_tuners.sampling_rate)) + dbs_check_cpu(&cs_dbs_data, cpu); - dbs_check_cpu(&cs_dbs_data, cpu); - - schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay); - mutex_unlock(&dbs_info->cdbs.timer_mutex); + schedule_delayed_work_on(smp_processor_id(), dw, delay); + mutex_unlock(&core_dbs_info->cdbs.timer_mutex); } static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, @@ -141,7 +144,7 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, /* * we only care if our internally tracked freq moves outside the 'valid' - * ranges of freqency available to us otherwise we do not change it + * ranges of frequency available to us otherwise we do not change it */ if (dbs_info->requested_freq > policy->max || dbs_info->requested_freq < policy->min) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 6c5f1d383cdc..5a76086ff09b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -161,25 +161,48 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) } EXPORT_SYMBOL_GPL(dbs_check_cpu); -static inline void dbs_timer_init(struct dbs_data *dbs_data, - struct cpu_dbs_common_info *cdbs, unsigned int sampling_rate) +static inline void dbs_timer_init(struct dbs_data *dbs_data, int cpu, + unsigned int sampling_rate) { int delay = delay_for_sampling_rate(sampling_rate); + struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); - INIT_DEFERRABLE_WORK(&cdbs->work, dbs_data->gov_dbs_timer); - schedule_delayed_work_on(cdbs->cpu, &cdbs->work, delay); + schedule_delayed_work_on(cpu, &cdbs->work, delay); } -static inline void dbs_timer_exit(struct cpu_dbs_common_info *cdbs) +static inline void dbs_timer_exit(struct dbs_data *dbs_data, int cpu) { + struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); + cancel_delayed_work_sync(&cdbs->work); } +/* Will return if we need to evaluate cpu load again or not */ +bool need_load_eval(struct cpu_dbs_common_info *cdbs, + unsigned int sampling_rate) +{ + if (policy_is_shared(cdbs->cur_policy)) { + ktime_t time_now = ktime_get(); + s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp); + + /* Do nothing if we recently have sampled */ + if (delta_us < (s64)(sampling_rate / 2)) + return false; + else + cdbs->time_stamp = time_now; + } + + return true; +} +EXPORT_SYMBOL_GPL(need_load_eval); + int cpufreq_governor_dbs(struct dbs_data *dbs_data, struct cpufreq_policy *policy, unsigned int event) { struct od_cpu_dbs_info_s *od_dbs_info = NULL; struct cs_cpu_dbs_info_s *cs_dbs_info = NULL; + struct cs_ops *cs_ops = NULL; + struct od_ops *od_ops = NULL; struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; struct cpu_dbs_common_info *cpu_cdbs; @@ -192,109 +215,111 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, cs_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu); sampling_rate = &cs_tuners->sampling_rate; ignore_nice = cs_tuners->ignore_nice; + cs_ops = dbs_data->gov_ops; } else { od_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu); sampling_rate = &od_tuners->sampling_rate; ignore_nice = od_tuners->ignore_nice; + od_ops = dbs_data->gov_ops; } switch (event) { case CPUFREQ_GOV_START: - if ((!cpu_online(cpu)) || (!policy->cur)) + if (!policy->cur) return -EINVAL; mutex_lock(&dbs_data->mutex); - dbs_data->enable++; - cpu_cdbs->cpu = cpu; for_each_cpu(j, policy->cpus) { - struct cpu_dbs_common_info *j_cdbs; - j_cdbs = dbs_data->get_cpu_cdbs(j); + struct cpu_dbs_common_info *j_cdbs = + dbs_data->get_cpu_cdbs(j); + j_cdbs->cpu = j; j_cdbs->cur_policy = policy; j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall); if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + + mutex_init(&j_cdbs->timer_mutex); + INIT_DEFERRABLE_WORK(&j_cdbs->work, + dbs_data->gov_dbs_timer); } - /* - * Start the timerschedule work, when this governor is used for - * first time - */ - if (dbs_data->enable != 1) - goto second_time; - - rc = sysfs_create_group(cpufreq_global_kobject, - dbs_data->attr_group); - if (rc) { - mutex_unlock(&dbs_data->mutex); - return rc; + if (!policy->governor->initialized) { + rc = sysfs_create_group(cpufreq_global_kobject, + dbs_data->attr_group); + if (rc) { + mutex_unlock(&dbs_data->mutex); + return rc; + } } - /* policy latency is in nS. Convert it to uS first */ - latency = policy->cpuinfo.transition_latency / 1000; - if (latency == 0) - latency = 1; - /* * conservative does not implement micro like ondemand * governor, thus we are bound to jiffes/HZ */ if (dbs_data->governor == GOV_CONSERVATIVE) { - struct cs_ops *ops = dbs_data->gov_ops; + cs_dbs_info->down_skip = 0; + cs_dbs_info->enable = 1; + cs_dbs_info->requested_freq = policy->cur; - cpufreq_register_notifier(ops->notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); + if (!policy->governor->initialized) { + cpufreq_register_notifier(cs_ops->notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); - dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * - jiffies_to_usecs(10); + dbs_data->min_sampling_rate = + MIN_SAMPLING_RATE_RATIO * + jiffies_to_usecs(10); + } } else { - struct od_ops *ops = dbs_data->gov_ops; + od_dbs_info->rate_mult = 1; + od_dbs_info->sample_type = OD_NORMAL_SAMPLE; + od_ops->powersave_bias_init_cpu(cpu); - od_tuners->io_is_busy = ops->io_busy(); + if (!policy->governor->initialized) + od_tuners->io_is_busy = od_ops->io_busy(); } + if (policy->governor->initialized) + goto unlock; + + /* policy latency is in nS. Convert it to uS first */ + latency = policy->cpuinfo.transition_latency / 1000; + if (latency == 0) + latency = 1; + /* Bring kernel and HW constraints together */ dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, MIN_LATENCY_MULTIPLIER * latency); *sampling_rate = max(dbs_data->min_sampling_rate, latency * LATENCY_MULTIPLIER); - -second_time: - if (dbs_data->governor == GOV_CONSERVATIVE) { - cs_dbs_info->down_skip = 0; - cs_dbs_info->enable = 1; - cs_dbs_info->requested_freq = policy->cur; - } else { - struct od_ops *ops = dbs_data->gov_ops; - od_dbs_info->rate_mult = 1; - od_dbs_info->sample_type = OD_NORMAL_SAMPLE; - ops->powersave_bias_init_cpu(cpu); - } +unlock: mutex_unlock(&dbs_data->mutex); - mutex_init(&cpu_cdbs->timer_mutex); - dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate); + /* Initiate timer time stamp */ + cpu_cdbs->time_stamp = ktime_get(); + + for_each_cpu(j, policy->cpus) + dbs_timer_init(dbs_data, j, *sampling_rate); break; case CPUFREQ_GOV_STOP: if (dbs_data->governor == GOV_CONSERVATIVE) cs_dbs_info->enable = 0; - dbs_timer_exit(cpu_cdbs); + for_each_cpu(j, policy->cpus) + dbs_timer_exit(dbs_data, j); mutex_lock(&dbs_data->mutex); mutex_destroy(&cpu_cdbs->timer_mutex); - dbs_data->enable--; - if (!dbs_data->enable) { - struct cs_ops *ops = dbs_data->gov_ops; + if (policy->governor->initialized == 1) { sysfs_remove_group(cpufreq_global_kobject, dbs_data->attr_group); if (dbs_data->governor == GOV_CONSERVATIVE) - cpufreq_unregister_notifier(ops->notifier_block, + cpufreq_unregister_notifier(cs_ops->notifier_block, CPUFREQ_TRANSITION_NOTIFIER); } mutex_unlock(&dbs_data->mutex); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index f6616540c53d..d2ac91150600 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -82,6 +82,7 @@ struct cpu_dbs_common_info { * the governor or limits. */ struct mutex timer_mutex; + ktime_t time_stamp; }; struct od_cpu_dbs_info_s { @@ -108,7 +109,7 @@ struct od_dbs_tuners { unsigned int sampling_rate; unsigned int sampling_down_factor; unsigned int up_threshold; - unsigned int down_differential; + unsigned int adj_up_threshold; unsigned int powersave_bias; unsigned int io_is_busy; }; @@ -129,7 +130,6 @@ struct dbs_data { #define GOV_CONSERVATIVE 1 int governor; unsigned int min_sampling_rate; - unsigned int enable; /* number of CPUs using this policy */ struct attribute_group *attr_group; void *tuners; @@ -171,6 +171,8 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) u64 get_cpu_idle_time(unsigned int cpu, u64 *wall); void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); +bool need_load_eval(struct cpu_dbs_common_info *cdbs, + unsigned int sampling_rate); int cpufreq_governor_dbs(struct dbs_data *dbs_data, struct cpufreq_policy *policy, unsigned int event); #endif /* _CPUFREQ_GOVERNER_H */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 7731f7c7e79a..f3eb26cd848f 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -26,7 +26,7 @@ #include "cpufreq_governor.h" -/* On-demand governor macors */ +/* On-demand governor macros */ #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) #define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_SAMPLING_DOWN_FACTOR (1) @@ -47,7 +47,8 @@ static struct cpufreq_governor cpufreq_gov_ondemand; static struct od_dbs_tuners od_tuners = { .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, - .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, + .adj_up_threshold = DEF_FREQUENCY_UP_THRESHOLD - + DEF_FREQUENCY_DOWN_DIFFERENTIAL, .ignore_nice = 0, .powersave_bias = 0, }; @@ -65,7 +66,7 @@ static void ondemand_powersave_bias_init_cpu(int cpu) * efficient idling at a higher frequency/voltage is. * Pavel Machek says this is not so for various generations of AMD and old * Intel systems. - * Mike Chan (androidlcom) calis this is also not true for ARM. + * Mike Chan (android.com) claims this is also not true for ARM. * Because of this, whitelist specific known (series) of CPUs by default, and * leave all others up to the user. */ @@ -73,7 +74,7 @@ static int should_io_be_busy(void) { #if defined(CONFIG_X86) /* - * For Intel, Core 2 (model 15) andl later have an efficient idle. + * For Intel, Core 2 (model 15) and later have an efficient idle. */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 6 && @@ -158,8 +159,8 @@ static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) /* * Every sampling_rate, we check, if current idle time is less than 20% - * (default), then we try to increase frequency Every sampling_rate, we look for - * a the lowest frequency which can sustain the load while keeping idle time + * (default), then we try to increase frequency. Every sampling_rate, we look + * for the lowest frequency which can sustain the load while keeping idle time * over 30%. If such a frequency exist, we try to decrease to this frequency. * * Any frequency increase takes it to the maximum frequency. Frequency reduction @@ -192,11 +193,9 @@ static void od_check_cpu(int cpu, unsigned int load_freq) * support the current CPU usage without triggering the up policy. To be * safe, we focus 10 points under the threshold. */ - if (load_freq < (od_tuners.up_threshold - od_tuners.down_differential) * - policy->cur) { + if (load_freq < od_tuners.adj_up_threshold * policy->cur) { unsigned int freq_next; - freq_next = load_freq / (od_tuners.up_threshold - - od_tuners.down_differential); + freq_next = load_freq / od_tuners.adj_up_threshold; /* No longer fully busy, reset rate_mult */ dbs_info->rate_mult = 1; @@ -218,33 +217,42 @@ static void od_check_cpu(int cpu, unsigned int load_freq) static void od_dbs_timer(struct work_struct *work) { + struct delayed_work *dw = to_delayed_work(work); struct od_cpu_dbs_info_s *dbs_info = container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work); - unsigned int cpu = dbs_info->cdbs.cpu; - int delay, sample_type = dbs_info->sample_type; + unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; + struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info, + cpu); + int delay, sample_type = core_dbs_info->sample_type; + bool eval_load; - mutex_lock(&dbs_info->cdbs.timer_mutex); + mutex_lock(&core_dbs_info->cdbs.timer_mutex); + eval_load = need_load_eval(&core_dbs_info->cdbs, + od_tuners.sampling_rate); /* Common NORMAL_SAMPLE setup */ - dbs_info->sample_type = OD_NORMAL_SAMPLE; + core_dbs_info->sample_type = OD_NORMAL_SAMPLE; if (sample_type == OD_SUB_SAMPLE) { - delay = dbs_info->freq_lo_jiffies; - __cpufreq_driver_target(dbs_info->cdbs.cur_policy, - dbs_info->freq_lo, CPUFREQ_RELATION_H); + delay = core_dbs_info->freq_lo_jiffies; + if (eval_load) + __cpufreq_driver_target(core_dbs_info->cdbs.cur_policy, + core_dbs_info->freq_lo, + CPUFREQ_RELATION_H); } else { - dbs_check_cpu(&od_dbs_data, cpu); - if (dbs_info->freq_lo) { + if (eval_load) + dbs_check_cpu(&od_dbs_data, cpu); + if (core_dbs_info->freq_lo) { /* Setup timer for SUB_SAMPLE */ - dbs_info->sample_type = OD_SUB_SAMPLE; - delay = dbs_info->freq_hi_jiffies; + core_dbs_info->sample_type = OD_SUB_SAMPLE; + delay = core_dbs_info->freq_hi_jiffies; } else { delay = delay_for_sampling_rate(od_tuners.sampling_rate - * dbs_info->rate_mult); + * core_dbs_info->rate_mult); } } - schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay); - mutex_unlock(&dbs_info->cdbs.timer_mutex); + schedule_delayed_work_on(smp_processor_id(), dw, delay); + mutex_unlock(&core_dbs_info->cdbs.timer_mutex); } /************************** sysfs interface ************************/ @@ -259,7 +267,7 @@ static ssize_t show_sampling_rate_min(struct kobject *kobj, * update_sampling_rate - update sampling rate effective immediately if needed. * @new_rate: new sampling rate * - * If new rate is smaller than the old, simply updaing + * If new rate is smaller than the old, simply updating * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the * original sampling_rate was 1 second and the requested new sampling rate is 10 * ms because the user needs immediate reaction from ondemand governor, but not @@ -287,7 +295,7 @@ static void update_sampling_rate(unsigned int new_rate) cpufreq_cpu_put(policy); continue; } - dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); + dbs_info = &per_cpu(od_cpu_dbs_info, cpu); cpufreq_cpu_put(policy); mutex_lock(&dbs_info->cdbs.timer_mutex); @@ -306,8 +314,7 @@ static void update_sampling_rate(unsigned int new_rate) cancel_delayed_work_sync(&dbs_info->cdbs.work); mutex_lock(&dbs_info->cdbs.timer_mutex); - schedule_delayed_work_on(dbs_info->cdbs.cpu, - &dbs_info->cdbs.work, + schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, usecs_to_jiffies(new_rate)); } @@ -351,6 +358,10 @@ static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, input < MIN_FREQUENCY_UP_THRESHOLD) { return -EINVAL; } + /* Calculate the new adj_up_threshold */ + od_tuners.adj_up_threshold += input; + od_tuners.adj_up_threshold -= od_tuners.up_threshold; + od_tuners.up_threshold = input; return count; } @@ -507,7 +518,8 @@ static int __init cpufreq_gov_dbs_init(void) if (idle_time != -1ULL) { /* Idle micro accounting is supported. Use finer thresholds */ od_tuners.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; - od_tuners.down_differential = MICRO_FREQUENCY_DOWN_DIFFERENTIAL; + od_tuners.adj_up_threshold = MICRO_FREQUENCY_UP_THRESHOLD - + MICRO_FREQUENCY_DOWN_DIFFERENTIAL; /* * In nohz/micro accounting case we set the minimum frequency * not depending on HZ, but fixed (very low). The deferred diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 9d7732b81044..2fd779eb1ed1 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -24,12 +24,6 @@ static spinlock_t cpufreq_stats_lock; -#define CPUFREQ_STATDEVICE_ATTR(_name, _mode, _show) \ -static struct freq_attr _attr_##_name = {\ - .attr = {.name = __stringify(_name), .mode = _mode, }, \ - .show = _show,\ -}; - struct cpufreq_stats { unsigned int cpu; unsigned int total_trans; @@ -136,17 +130,17 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) return PAGE_SIZE; return len; } -CPUFREQ_STATDEVICE_ATTR(trans_table, 0444, show_trans_table); +cpufreq_freq_attr_ro(trans_table); #endif -CPUFREQ_STATDEVICE_ATTR(total_trans, 0444, show_total_trans); -CPUFREQ_STATDEVICE_ATTR(time_in_state, 0444, show_time_in_state); +cpufreq_freq_attr_ro(total_trans); +cpufreq_freq_attr_ro(time_in_state); static struct attribute *default_attrs[] = { - &_attr_total_trans.attr, - &_attr_time_in_state.attr, + &total_trans.attr, + &time_in_state.attr, #ifdef CONFIG_CPU_FREQ_STAT_DETAILS - &_attr_trans_table.attr, + &trans_table.attr, #endif NULL }; @@ -170,11 +164,13 @@ static int freq_table_get_index(struct cpufreq_stats *stat, unsigned int freq) static void cpufreq_stats_free_table(unsigned int cpu) { struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, cpu); + if (stat) { + pr_debug("%s: Free stat table\n", __func__); kfree(stat->time_in_state); kfree(stat); + per_cpu(cpufreq_stats_table, cpu) = NULL; } - per_cpu(cpufreq_stats_table, cpu) = NULL; } /* must be called early in the CPU removal sequence (before @@ -183,8 +179,14 @@ static void cpufreq_stats_free_table(unsigned int cpu) static void cpufreq_stats_free_sysfs(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - if (policy && policy->cpu == cpu) + + if (!cpufreq_frequency_get_table(cpu)) + return; + + if (policy && !policy_is_shared(policy)) { + pr_debug("%s: Free sysfs stat\n", __func__); sysfs_remove_group(&policy->kobj, &stats_attr_group); + } if (policy) cpufreq_cpu_put(policy); } @@ -262,6 +264,19 @@ error_get_fail: return ret; } +static void cpufreq_stats_update_policy_cpu(struct cpufreq_policy *policy) +{ + struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, + policy->last_cpu); + + pr_debug("Updating stats_table for new_cpu %u from last_cpu %u\n", + policy->cpu, policy->last_cpu); + per_cpu(cpufreq_stats_table, policy->cpu) = per_cpu(cpufreq_stats_table, + policy->last_cpu); + per_cpu(cpufreq_stats_table, policy->last_cpu) = NULL; + stat->cpu = policy->cpu; +} + static int cpufreq_stat_notifier_policy(struct notifier_block *nb, unsigned long val, void *data) { @@ -269,6 +284,12 @@ static int cpufreq_stat_notifier_policy(struct notifier_block *nb, struct cpufreq_policy *policy = data; struct cpufreq_frequency_table *table; unsigned int cpu = policy->cpu; + + if (val == CPUFREQ_UPDATE_POLICY_CPU) { + cpufreq_stats_update_policy_cpu(policy); + return 0; + } + if (val != CPUFREQ_NOTIFY) return 0; table = cpufreq_frequency_get_table(cpu); diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index c8c3d293cc57..bbeb9c0720a6 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -118,8 +118,6 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, switch (event) { case CPUFREQ_GOV_START: - if (!cpu_online(cpu)) - return -EINVAL; BUG_ON(!policy->cur); mutex_lock(&userspace_mutex); diff --git a/drivers/cpufreq/db8500-cpufreq.c b/drivers/cpufreq/db8500-cpufreq.c index 4f154bc0ebe4..79a84860ea56 100644 --- a/drivers/cpufreq/db8500-cpufreq.c +++ b/drivers/cpufreq/db8500-cpufreq.c @@ -128,9 +128,7 @@ static int __cpuinit db8500_cpufreq_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = 20 * 1000; /* in ns */ /* policy sharing between dual CPUs */ - cpumask_copy(policy->cpus, cpu_present_mask); - - policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; + cpumask_setall(policy->cpus); return 0; } diff --git a/drivers/cpufreq/exynos-cpufreq.c b/drivers/cpufreq/exynos-cpufreq.c index 7012ea8bf1e7..81eb84a24fa7 100644 --- a/drivers/cpufreq/exynos-cpufreq.c +++ b/drivers/cpufreq/exynos-cpufreq.c @@ -227,19 +227,7 @@ static int exynos_cpufreq_cpu_init(struct cpufreq_policy *policy) /* set the transition latency value */ policy->cpuinfo.transition_latency = 100000; - /* - * EXYNOS4 multi-core processors has 2 cores - * that the frequency cannot be set independently. - * Each cpu is bound to the same speed. - * So the affected cpu is all of the cpus. - */ - if (num_online_cpus() == 1) { - cpumask_copy(policy->related_cpus, cpu_possible_mask); - cpumask_copy(policy->cpus, cpu_online_mask); - } else { - policy->shared_type = CPUFREQ_SHARED_TYPE_ANY; - cpumask_setall(policy->cpus); - } + cpumask_setall(policy->cpus); return cpufreq_frequency_table_cpuinfo(policy, exynos_info->freq_table); } diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 49cda256efb2..d7a79662e24c 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -63,9 +63,6 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, pr_debug("request for verification of policy (%u - %u kHz) for cpu %u\n", policy->min, policy->max, policy->cpu); - if (!cpu_online(policy->cpu)) - return -EINVAL; - cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, policy->cpuinfo.max_freq); @@ -121,9 +118,6 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, break; } - if (!cpu_online(policy->cpu)) - return -EINVAL; - for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { unsigned int freq = table[i].frequency; if (freq == CPUFREQ_ENTRY_INVALID) @@ -227,6 +221,15 @@ void cpufreq_frequency_table_put_attr(unsigned int cpu) } EXPORT_SYMBOL_GPL(cpufreq_frequency_table_put_attr); +void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy) +{ + pr_debug("Updating show_table for new_cpu %u from last_cpu %u\n", + policy->cpu, policy->last_cpu); + per_cpu(cpufreq_show_table, policy->cpu) = per_cpu(cpufreq_show_table, + policy->last_cpu); + per_cpu(cpufreq_show_table, policy->last_cpu) = NULL; +} + struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu) { return per_cpu(cpufreq_show_table, cpu); diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c new file mode 100644 index 000000000000..66e3a71b81a3 --- /dev/null +++ b/drivers/cpufreq/highbank-cpufreq.c @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2012 Calxeda, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This driver provides the clk notifier callbacks that are used when + * the cpufreq-cpu0 driver changes to frequency to alert the highbank + * EnergyCore Management Engine (ECME) about the need to change + * voltage. The ECME interfaces with the actual voltage regulators. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#define HB_CPUFREQ_CHANGE_NOTE 0x80000001 +#define HB_CPUFREQ_IPC_LEN 7 +#define HB_CPUFREQ_VOLT_RETRIES 15 + +static int hb_voltage_change(unsigned int freq) +{ + int i; + u32 msg[HB_CPUFREQ_IPC_LEN]; + + msg[0] = HB_CPUFREQ_CHANGE_NOTE; + msg[1] = freq / 1000000; + for (i = 2; i < HB_CPUFREQ_IPC_LEN; i++) + msg[i] = 0; + + return pl320_ipc_transmit(msg); +} + +static int hb_cpufreq_clk_notify(struct notifier_block *nb, + unsigned long action, void *hclk) +{ + struct clk_notifier_data *clk_data = hclk; + int i = 0; + + if (action == PRE_RATE_CHANGE) { + if (clk_data->new_rate > clk_data->old_rate) + while (hb_voltage_change(clk_data->new_rate)) + if (i++ > HB_CPUFREQ_VOLT_RETRIES) + return NOTIFY_BAD; + } else if (action == POST_RATE_CHANGE) { + if (clk_data->new_rate < clk_data->old_rate) + while (hb_voltage_change(clk_data->new_rate)) + if (i++ > HB_CPUFREQ_VOLT_RETRIES) + return NOTIFY_BAD; + } + + return NOTIFY_DONE; +} + +static struct notifier_block hb_cpufreq_clk_nb = { + .notifier_call = hb_cpufreq_clk_notify, +}; + +static int hb_cpufreq_driver_init(void) +{ + struct platform_device_info devinfo = { .name = "cpufreq-cpu0", }; + struct device *cpu_dev; + struct clk *cpu_clk; + struct device_node *np; + int ret; + + if (!of_machine_is_compatible("calxeda,highbank")) + return -ENODEV; + + for_each_child_of_node(of_find_node_by_path("/cpus"), np) + if (of_get_property(np, "operating-points", NULL)) + break; + + if (!np) { + pr_err("failed to find highbank cpufreq node\n"); + return -ENOENT; + } + + cpu_dev = get_cpu_device(0); + if (!cpu_dev) { + pr_err("failed to get highbank cpufreq device\n"); + ret = -ENODEV; + goto out_put_node; + } + + cpu_dev->of_node = np; + + cpu_clk = clk_get(cpu_dev, NULL); + if (IS_ERR(cpu_clk)) { + ret = PTR_ERR(cpu_clk); + pr_err("failed to get cpu0 clock: %d\n", ret); + goto out_put_node; + } + + ret = clk_notifier_register(cpu_clk, &hb_cpufreq_clk_nb); + if (ret) { + pr_err("failed to register clk notifier: %d\n", ret); + goto out_put_node; + } + + /* Instantiate cpufreq-cpu0 */ + platform_device_register_full(&devinfo); + +out_put_node: + of_node_put(np); + return ret; +} +module_init(hb_cpufreq_driver_init); + +MODULE_AUTHOR("Mark Langsdorf "); +MODULE_DESCRIPTION("Calxeda Highbank cpufreq driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c new file mode 100644 index 000000000000..d6b6ef350cb6 --- /dev/null +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -0,0 +1,336 @@ +/* + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PU_SOC_VOLTAGE_NORMAL 1250000 +#define PU_SOC_VOLTAGE_HIGH 1275000 +#define FREQ_1P2_GHZ 1200000000 + +static struct regulator *arm_reg; +static struct regulator *pu_reg; +static struct regulator *soc_reg; + +static struct clk *arm_clk; +static struct clk *pll1_sys_clk; +static struct clk *pll1_sw_clk; +static struct clk *step_clk; +static struct clk *pll2_pfd2_396m_clk; + +static struct device *cpu_dev; +static struct cpufreq_frequency_table *freq_table; +static unsigned int transition_latency; + +static int imx6q_verify_speed(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, freq_table); +} + +static unsigned int imx6q_get_speed(unsigned int cpu) +{ + return clk_get_rate(arm_clk) / 1000; +} + +static int imx6q_set_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + struct cpufreq_freqs freqs; + struct opp *opp; + unsigned long freq_hz, volt, volt_old; + unsigned int index, cpu; + int ret; + + ret = cpufreq_frequency_table_target(policy, freq_table, target_freq, + relation, &index); + if (ret) { + dev_err(cpu_dev, "failed to match target frequency %d: %d\n", + target_freq, ret); + return ret; + } + + freqs.new = freq_table[index].frequency; + freq_hz = freqs.new * 1000; + freqs.old = clk_get_rate(arm_clk) / 1000; + + if (freqs.old == freqs.new) + return 0; + + for_each_online_cpu(cpu) { + freqs.cpu = cpu; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + rcu_read_lock(); + opp = opp_find_freq_ceil(cpu_dev, &freq_hz); + if (IS_ERR(opp)) { + rcu_read_unlock(); + dev_err(cpu_dev, "failed to find OPP for %ld\n", freq_hz); + return PTR_ERR(opp); + } + + volt = opp_get_voltage(opp); + rcu_read_unlock(); + volt_old = regulator_get_voltage(arm_reg); + + dev_dbg(cpu_dev, "%u MHz, %ld mV --> %u MHz, %ld mV\n", + freqs.old / 1000, volt_old / 1000, + freqs.new / 1000, volt / 1000); + + /* scaling up? scale voltage before frequency */ + if (freqs.new > freqs.old) { + ret = regulator_set_voltage_tol(arm_reg, volt, 0); + if (ret) { + dev_err(cpu_dev, + "failed to scale vddarm up: %d\n", ret); + return ret; + } + + /* + * Need to increase vddpu and vddsoc for safety + * if we are about to run at 1.2 GHz. + */ + if (freqs.new == FREQ_1P2_GHZ / 1000) { + regulator_set_voltage_tol(pu_reg, + PU_SOC_VOLTAGE_HIGH, 0); + regulator_set_voltage_tol(soc_reg, + PU_SOC_VOLTAGE_HIGH, 0); + } + } + + /* + * The setpoints are selected per PLL/PDF frequencies, so we need to + * reprogram PLL for frequency scaling. The procedure of reprogramming + * PLL1 is as below. + * + * - Enable pll2_pfd2_396m_clk and reparent pll1_sw_clk to it + * - Reprogram pll1_sys_clk and reparent pll1_sw_clk back to it + * - Disable pll2_pfd2_396m_clk + */ + clk_prepare_enable(pll2_pfd2_396m_clk); + clk_set_parent(step_clk, pll2_pfd2_396m_clk); + clk_set_parent(pll1_sw_clk, step_clk); + if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk)) { + clk_set_rate(pll1_sys_clk, freqs.new * 1000); + /* + * If we are leaving 396 MHz set-point, we need to enable + * pll1_sys_clk and disable pll2_pfd2_396m_clk to keep + * their use count correct. + */ + if (freqs.old * 1000 <= clk_get_rate(pll2_pfd2_396m_clk)) { + clk_prepare_enable(pll1_sys_clk); + clk_disable_unprepare(pll2_pfd2_396m_clk); + } + clk_set_parent(pll1_sw_clk, pll1_sys_clk); + clk_disable_unprepare(pll2_pfd2_396m_clk); + } else { + /* + * Disable pll1_sys_clk if pll2_pfd2_396m_clk is sufficient + * to provide the frequency. + */ + clk_disable_unprepare(pll1_sys_clk); + } + + /* Ensure the arm clock divider is what we expect */ + ret = clk_set_rate(arm_clk, freqs.new * 1000); + if (ret) { + dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); + regulator_set_voltage_tol(arm_reg, volt_old, 0); + return ret; + } + + /* scaling down? scale voltage after frequency */ + if (freqs.new < freqs.old) { + ret = regulator_set_voltage_tol(arm_reg, volt, 0); + if (ret) + dev_warn(cpu_dev, + "failed to scale vddarm down: %d\n", ret); + + if (freqs.old == FREQ_1P2_GHZ / 1000) { + regulator_set_voltage_tol(pu_reg, + PU_SOC_VOLTAGE_NORMAL, 0); + regulator_set_voltage_tol(soc_reg, + PU_SOC_VOLTAGE_NORMAL, 0); + } + } + + for_each_online_cpu(cpu) { + freqs.cpu = cpu; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + + return 0; +} + +static int imx6q_cpufreq_init(struct cpufreq_policy *policy) +{ + int ret; + + ret = cpufreq_frequency_table_cpuinfo(policy, freq_table); + if (ret) { + dev_err(cpu_dev, "invalid frequency table: %d\n", ret); + return ret; + } + + policy->cpuinfo.transition_latency = transition_latency; + policy->cur = clk_get_rate(arm_clk) / 1000; + cpumask_setall(policy->cpus); + cpufreq_frequency_table_get_attr(freq_table, policy->cpu); + + return 0; +} + +static int imx6q_cpufreq_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static struct freq_attr *imx6q_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver imx6q_cpufreq_driver = { + .verify = imx6q_verify_speed, + .target = imx6q_set_target, + .get = imx6q_get_speed, + .init = imx6q_cpufreq_init, + .exit = imx6q_cpufreq_exit, + .name = "imx6q-cpufreq", + .attr = imx6q_cpufreq_attr, +}; + +static int imx6q_cpufreq_probe(struct platform_device *pdev) +{ + struct device_node *np; + struct opp *opp; + unsigned long min_volt, max_volt; + int num, ret; + + cpu_dev = &pdev->dev; + + np = of_find_node_by_path("/cpus/cpu@0"); + if (!np) { + dev_err(cpu_dev, "failed to find cpu0 node\n"); + return -ENOENT; + } + + cpu_dev->of_node = np; + + arm_clk = devm_clk_get(cpu_dev, "arm"); + pll1_sys_clk = devm_clk_get(cpu_dev, "pll1_sys"); + pll1_sw_clk = devm_clk_get(cpu_dev, "pll1_sw"); + step_clk = devm_clk_get(cpu_dev, "step"); + pll2_pfd2_396m_clk = devm_clk_get(cpu_dev, "pll2_pfd2_396m"); + if (IS_ERR(arm_clk) || IS_ERR(pll1_sys_clk) || IS_ERR(pll1_sw_clk) || + IS_ERR(step_clk) || IS_ERR(pll2_pfd2_396m_clk)) { + dev_err(cpu_dev, "failed to get clocks\n"); + ret = -ENOENT; + goto put_node; + } + + arm_reg = devm_regulator_get(cpu_dev, "arm"); + pu_reg = devm_regulator_get(cpu_dev, "pu"); + soc_reg = devm_regulator_get(cpu_dev, "soc"); + if (!arm_reg || !pu_reg || !soc_reg) { + dev_err(cpu_dev, "failed to get regulators\n"); + ret = -ENOENT; + goto put_node; + } + + /* We expect an OPP table supplied by platform */ + num = opp_get_opp_count(cpu_dev); + if (num < 0) { + ret = num; + dev_err(cpu_dev, "no OPP table is found: %d\n", ret); + goto put_node; + } + + ret = opp_init_cpufreq_table(cpu_dev, &freq_table); + if (ret) { + dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); + goto put_node; + } + + if (of_property_read_u32(np, "clock-latency", &transition_latency)) + transition_latency = CPUFREQ_ETERNAL; + + /* + * OPP is maintained in order of increasing frequency, and + * freq_table initialised from OPP is therefore sorted in the + * same order. + */ + rcu_read_lock(); + opp = opp_find_freq_exact(cpu_dev, + freq_table[0].frequency * 1000, true); + min_volt = opp_get_voltage(opp); + opp = opp_find_freq_exact(cpu_dev, + freq_table[--num].frequency * 1000, true); + max_volt = opp_get_voltage(opp); + rcu_read_unlock(); + ret = regulator_set_voltage_time(arm_reg, min_volt, max_volt); + if (ret > 0) + transition_latency += ret * 1000; + + /* Count vddpu and vddsoc latency in for 1.2 GHz support */ + if (freq_table[num].frequency == FREQ_1P2_GHZ / 1000) { + ret = regulator_set_voltage_time(pu_reg, PU_SOC_VOLTAGE_NORMAL, + PU_SOC_VOLTAGE_HIGH); + if (ret > 0) + transition_latency += ret * 1000; + ret = regulator_set_voltage_time(soc_reg, PU_SOC_VOLTAGE_NORMAL, + PU_SOC_VOLTAGE_HIGH); + if (ret > 0) + transition_latency += ret * 1000; + } + + ret = cpufreq_register_driver(&imx6q_cpufreq_driver); + if (ret) { + dev_err(cpu_dev, "failed register driver: %d\n", ret); + goto free_freq_table; + } + + of_node_put(np); + return 0; + +free_freq_table: + opp_free_cpufreq_table(cpu_dev, &freq_table); +put_node: + of_node_put(np); + return ret; +} + +static int imx6q_cpufreq_remove(struct platform_device *pdev) +{ + cpufreq_unregister_driver(&imx6q_cpufreq_driver); + opp_free_cpufreq_table(cpu_dev, &freq_table); + + return 0; +} + +static struct platform_driver imx6q_cpufreq_platdrv = { + .driver = { + .name = "imx6q-cpufreq", + .owner = THIS_MODULE, + }, + .probe = imx6q_cpufreq_probe, + .remove = imx6q_cpufreq_remove, +}; +module_platform_driver(imx6q_cpufreq_platdrv); + +MODULE_AUTHOR("Shawn Guo "); +MODULE_DESCRIPTION("Freescale i.MX6Q cpufreq driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c new file mode 100644 index 000000000000..e87996355da0 --- /dev/null +++ b/drivers/cpufreq/intel_pstate.c @@ -0,0 +1,807 @@ +/* + * cpufreq_snb.c: Native P state management for Intel processors + * + * (C) Copyright 2012 Intel Corporation + * Author: Dirk Brandewie + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define SAMPLE_COUNT 3 + +#define FRAC_BITS 8 +#define int_tofp(X) ((int64_t)(X) << FRAC_BITS) +#define fp_toint(X) ((X) >> FRAC_BITS) + +static inline int32_t mul_fp(int32_t x, int32_t y) +{ + return ((int64_t)x * (int64_t)y) >> FRAC_BITS; +} + +static inline int32_t div_fp(int32_t x, int32_t y) +{ + return div_s64((int64_t)x << FRAC_BITS, (int64_t)y); +} + +struct sample { + ktime_t start_time; + ktime_t end_time; + int core_pct_busy; + int pstate_pct_busy; + u64 duration_us; + u64 idletime_us; + u64 aperf; + u64 mperf; + int freq; +}; + +struct pstate_data { + int current_pstate; + int min_pstate; + int max_pstate; + int turbo_pstate; +}; + +struct _pid { + int setpoint; + int32_t integral; + int32_t p_gain; + int32_t i_gain; + int32_t d_gain; + int deadband; + int last_err; +}; + +struct cpudata { + int cpu; + + char name[64]; + + struct timer_list timer; + + struct pstate_adjust_policy *pstate_policy; + struct pstate_data pstate; + struct _pid pid; + struct _pid idle_pid; + + int min_pstate_count; + int idle_mode; + + ktime_t prev_sample; + u64 prev_idle_time_us; + u64 prev_aperf; + u64 prev_mperf; + int sample_ptr; + struct sample samples[SAMPLE_COUNT]; +}; + +static struct cpudata **all_cpu_data; +struct pstate_adjust_policy { + int sample_rate_ms; + int deadband; + int setpoint; + int p_gain_pct; + int d_gain_pct; + int i_gain_pct; +}; + +static struct pstate_adjust_policy default_policy = { + .sample_rate_ms = 10, + .deadband = 0, + .setpoint = 109, + .p_gain_pct = 17, + .d_gain_pct = 0, + .i_gain_pct = 4, +}; + +struct perf_limits { + int no_turbo; + int max_perf_pct; + int min_perf_pct; + int32_t max_perf; + int32_t min_perf; +}; + +static struct perf_limits limits = { + .no_turbo = 0, + .max_perf_pct = 100, + .max_perf = int_tofp(1), + .min_perf_pct = 0, + .min_perf = 0, +}; + +static inline void pid_reset(struct _pid *pid, int setpoint, int busy, + int deadband, int integral) { + pid->setpoint = setpoint; + pid->deadband = deadband; + pid->integral = int_tofp(integral); + pid->last_err = setpoint - busy; +} + +static inline void pid_p_gain_set(struct _pid *pid, int percent) +{ + pid->p_gain = div_fp(int_tofp(percent), int_tofp(100)); +} + +static inline void pid_i_gain_set(struct _pid *pid, int percent) +{ + pid->i_gain = div_fp(int_tofp(percent), int_tofp(100)); +} + +static inline void pid_d_gain_set(struct _pid *pid, int percent) +{ + + pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); +} + +static signed int pid_calc(struct _pid *pid, int busy) +{ + signed int err, result; + int32_t pterm, dterm, fp_error; + int32_t integral_limit; + + err = pid->setpoint - busy; + fp_error = int_tofp(err); + + if (abs(err) <= pid->deadband) + return 0; + + pterm = mul_fp(pid->p_gain, fp_error); + + pid->integral += fp_error; + + /* limit the integral term */ + integral_limit = int_tofp(30); + if (pid->integral > integral_limit) + pid->integral = integral_limit; + if (pid->integral < -integral_limit) + pid->integral = -integral_limit; + + dterm = mul_fp(pid->d_gain, (err - pid->last_err)); + pid->last_err = err; + + result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; + + return (signed int)fp_toint(result); +} + +static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) +{ + pid_p_gain_set(&cpu->pid, cpu->pstate_policy->p_gain_pct); + pid_d_gain_set(&cpu->pid, cpu->pstate_policy->d_gain_pct); + pid_i_gain_set(&cpu->pid, cpu->pstate_policy->i_gain_pct); + + pid_reset(&cpu->pid, + cpu->pstate_policy->setpoint, + 100, + cpu->pstate_policy->deadband, + 0); +} + +static inline void intel_pstate_idle_pid_reset(struct cpudata *cpu) +{ + pid_p_gain_set(&cpu->idle_pid, cpu->pstate_policy->p_gain_pct); + pid_d_gain_set(&cpu->idle_pid, cpu->pstate_policy->d_gain_pct); + pid_i_gain_set(&cpu->idle_pid, cpu->pstate_policy->i_gain_pct); + + pid_reset(&cpu->idle_pid, + 75, + 50, + cpu->pstate_policy->deadband, + 0); +} + +static inline void intel_pstate_reset_all_pid(void) +{ + unsigned int cpu; + for_each_online_cpu(cpu) { + if (all_cpu_data[cpu]) + intel_pstate_busy_pid_reset(all_cpu_data[cpu]); + } +} + +/************************** debugfs begin ************************/ +static int pid_param_set(void *data, u64 val) +{ + *(u32 *)data = val; + intel_pstate_reset_all_pid(); + return 0; +} +static int pid_param_get(void *data, u64 *val) +{ + *val = *(u32 *)data; + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, + pid_param_set, "%llu\n"); + +struct pid_param { + char *name; + void *value; +}; + +static struct pid_param pid_files[] = { + {"sample_rate_ms", &default_policy.sample_rate_ms}, + {"d_gain_pct", &default_policy.d_gain_pct}, + {"i_gain_pct", &default_policy.i_gain_pct}, + {"deadband", &default_policy.deadband}, + {"setpoint", &default_policy.setpoint}, + {"p_gain_pct", &default_policy.p_gain_pct}, + {NULL, NULL} +}; + +static struct dentry *debugfs_parent; +static void intel_pstate_debug_expose_params(void) +{ + int i = 0; + + debugfs_parent = debugfs_create_dir("pstate_snb", NULL); + if (IS_ERR_OR_NULL(debugfs_parent)) + return; + while (pid_files[i].name) { + debugfs_create_file(pid_files[i].name, 0660, + debugfs_parent, pid_files[i].value, + &fops_pid_param); + i++; + } +} + +/************************** debugfs end ************************/ + +/************************** sysfs begin ************************/ +#define show_one(file_name, object) \ + static ssize_t show_##file_name \ + (struct kobject *kobj, struct attribute *attr, char *buf) \ + { \ + return sprintf(buf, "%u\n", limits.object); \ + } + +static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + limits.no_turbo = clamp_t(int, input, 0 , 1); + + return count; +} + +static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + limits.max_perf_pct = clamp_t(int, input, 0 , 100); + limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); + return count; +} + +static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + limits.min_perf_pct = clamp_t(int, input, 0 , 100); + limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); + + return count; +} + +show_one(no_turbo, no_turbo); +show_one(max_perf_pct, max_perf_pct); +show_one(min_perf_pct, min_perf_pct); + +define_one_global_rw(no_turbo); +define_one_global_rw(max_perf_pct); +define_one_global_rw(min_perf_pct); + +static struct attribute *intel_pstate_attributes[] = { + &no_turbo.attr, + &max_perf_pct.attr, + &min_perf_pct.attr, + NULL +}; + +static struct attribute_group intel_pstate_attr_group = { + .attrs = intel_pstate_attributes, +}; +static struct kobject *intel_pstate_kobject; + +static void intel_pstate_sysfs_expose_params(void) +{ + int rc; + + intel_pstate_kobject = kobject_create_and_add("intel_pstate", + &cpu_subsys.dev_root->kobj); + BUG_ON(!intel_pstate_kobject); + rc = sysfs_create_group(intel_pstate_kobject, + &intel_pstate_attr_group); + BUG_ON(rc); +} + +/************************** sysfs end ************************/ + +static int intel_pstate_min_pstate(void) +{ + u64 value; + rdmsrl(0xCE, value); + return (value >> 40) & 0xFF; +} + +static int intel_pstate_max_pstate(void) +{ + u64 value; + rdmsrl(0xCE, value); + return (value >> 8) & 0xFF; +} + +static int intel_pstate_turbo_pstate(void) +{ + u64 value; + int nont, ret; + rdmsrl(0x1AD, value); + nont = intel_pstate_max_pstate(); + ret = ((value) & 255); + if (ret <= nont) + ret = nont; + return ret; +} + +static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) +{ + int max_perf = cpu->pstate.turbo_pstate; + int min_perf; + if (limits.no_turbo) + max_perf = cpu->pstate.max_pstate; + + max_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); + *max = clamp_t(int, max_perf, + cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); + + min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf)); + *min = clamp_t(int, min_perf, + cpu->pstate.min_pstate, max_perf); +} + +static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) +{ + int max_perf, min_perf; + + intel_pstate_get_min_max(cpu, &min_perf, &max_perf); + + pstate = clamp_t(int, pstate, min_perf, max_perf); + + if (pstate == cpu->pstate.current_pstate) + return; + +#ifndef MODULE + trace_cpu_frequency(pstate * 100000, cpu->cpu); +#endif + cpu->pstate.current_pstate = pstate; + wrmsrl(MSR_IA32_PERF_CTL, pstate << 8); + +} + +static inline void intel_pstate_pstate_increase(struct cpudata *cpu, int steps) +{ + int target; + target = cpu->pstate.current_pstate + steps; + + intel_pstate_set_pstate(cpu, target); +} + +static inline void intel_pstate_pstate_decrease(struct cpudata *cpu, int steps) +{ + int target; + target = cpu->pstate.current_pstate - steps; + intel_pstate_set_pstate(cpu, target); +} + +static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) +{ + sprintf(cpu->name, "Intel 2nd generation core"); + + cpu->pstate.min_pstate = intel_pstate_min_pstate(); + cpu->pstate.max_pstate = intel_pstate_max_pstate(); + cpu->pstate.turbo_pstate = intel_pstate_turbo_pstate(); + + /* + * goto max pstate so we don't slow up boot if we are built-in if we are + * a module we will take care of it during normal operation + */ + intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); +} + +static inline void intel_pstate_calc_busy(struct cpudata *cpu, + struct sample *sample) +{ + u64 core_pct; + sample->pstate_pct_busy = 100 - div64_u64( + sample->idletime_us * 100, + sample->duration_us); + core_pct = div64_u64(sample->aperf * 100, sample->mperf); + sample->freq = cpu->pstate.turbo_pstate * core_pct * 1000; + + sample->core_pct_busy = div_s64((sample->pstate_pct_busy * core_pct), + 100); +} + +static inline void intel_pstate_sample(struct cpudata *cpu) +{ + ktime_t now; + u64 idle_time_us; + u64 aperf, mperf; + + now = ktime_get(); + idle_time_us = get_cpu_idle_time_us(cpu->cpu, NULL); + + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + /* for the first sample, don't actually record a sample, just + * set the baseline */ + if (cpu->prev_idle_time_us > 0) { + cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; + cpu->samples[cpu->sample_ptr].start_time = cpu->prev_sample; + cpu->samples[cpu->sample_ptr].end_time = now; + cpu->samples[cpu->sample_ptr].duration_us = + ktime_us_delta(now, cpu->prev_sample); + cpu->samples[cpu->sample_ptr].idletime_us = + idle_time_us - cpu->prev_idle_time_us; + + cpu->samples[cpu->sample_ptr].aperf = aperf; + cpu->samples[cpu->sample_ptr].mperf = mperf; + cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf; + cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf; + + intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]); + } + + cpu->prev_sample = now; + cpu->prev_idle_time_us = idle_time_us; + cpu->prev_aperf = aperf; + cpu->prev_mperf = mperf; +} + +static inline void intel_pstate_set_sample_time(struct cpudata *cpu) +{ + int sample_time, delay; + + sample_time = cpu->pstate_policy->sample_rate_ms; + delay = msecs_to_jiffies(sample_time); + delay -= jiffies % delay; + mod_timer_pinned(&cpu->timer, jiffies + delay); +} + +static inline void intel_pstate_idle_mode(struct cpudata *cpu) +{ + cpu->idle_mode = 1; +} + +static inline void intel_pstate_normal_mode(struct cpudata *cpu) +{ + cpu->idle_mode = 0; +} + +static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu) +{ + int32_t busy_scaled; + int32_t core_busy, turbo_pstate, current_pstate; + + core_busy = int_tofp(cpu->samples[cpu->sample_ptr].core_pct_busy); + turbo_pstate = int_tofp(cpu->pstate.turbo_pstate); + current_pstate = int_tofp(cpu->pstate.current_pstate); + busy_scaled = mul_fp(core_busy, div_fp(turbo_pstate, current_pstate)); + + return fp_toint(busy_scaled); +} + +static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) +{ + int busy_scaled; + struct _pid *pid; + signed int ctl = 0; + int steps; + + pid = &cpu->pid; + busy_scaled = intel_pstate_get_scaled_busy(cpu); + + ctl = pid_calc(pid, busy_scaled); + + steps = abs(ctl); + if (ctl < 0) + intel_pstate_pstate_increase(cpu, steps); + else + intel_pstate_pstate_decrease(cpu, steps); +} + +static inline void intel_pstate_adjust_idle_pstate(struct cpudata *cpu) +{ + int busy_scaled; + struct _pid *pid; + int ctl = 0; + int steps; + + pid = &cpu->idle_pid; + + busy_scaled = intel_pstate_get_scaled_busy(cpu); + + ctl = pid_calc(pid, 100 - busy_scaled); + + steps = abs(ctl); + if (ctl < 0) + intel_pstate_pstate_decrease(cpu, steps); + else + intel_pstate_pstate_increase(cpu, steps); + + if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) + intel_pstate_normal_mode(cpu); +} + +static void intel_pstate_timer_func(unsigned long __data) +{ + struct cpudata *cpu = (struct cpudata *) __data; + + intel_pstate_sample(cpu); + + if (!cpu->idle_mode) + intel_pstate_adjust_busy_pstate(cpu); + else + intel_pstate_adjust_idle_pstate(cpu); + +#if defined(XPERF_FIX) + if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) { + cpu->min_pstate_count++; + if (!(cpu->min_pstate_count % 5)) { + intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); + intel_pstate_idle_mode(cpu); + } + } else + cpu->min_pstate_count = 0; +#endif + intel_pstate_set_sample_time(cpu); +} + +#define ICPU(model, policy) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&policy } + +static const struct x86_cpu_id intel_pstate_cpu_ids[] = { + ICPU(0x2a, default_policy), + ICPU(0x2d, default_policy), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); + +static int intel_pstate_init_cpu(unsigned int cpunum) +{ + + const struct x86_cpu_id *id; + struct cpudata *cpu; + + id = x86_match_cpu(intel_pstate_cpu_ids); + if (!id) + return -ENODEV; + + all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL); + if (!all_cpu_data[cpunum]) + return -ENOMEM; + + cpu = all_cpu_data[cpunum]; + + intel_pstate_get_cpu_pstates(cpu); + + cpu->cpu = cpunum; + cpu->pstate_policy = + (struct pstate_adjust_policy *)id->driver_data; + init_timer_deferrable(&cpu->timer); + cpu->timer.function = intel_pstate_timer_func; + cpu->timer.data = + (unsigned long)cpu; + cpu->timer.expires = jiffies + HZ/100; + intel_pstate_busy_pid_reset(cpu); + intel_pstate_idle_pid_reset(cpu); + intel_pstate_sample(cpu); + intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); + + add_timer_on(&cpu->timer, cpunum); + + pr_info("Intel pstate controlling: cpu %d\n", cpunum); + + return 0; +} + +static unsigned int intel_pstate_get(unsigned int cpu_num) +{ + struct sample *sample; + struct cpudata *cpu; + + cpu = all_cpu_data[cpu_num]; + if (!cpu) + return 0; + sample = &cpu->samples[cpu->sample_ptr]; + return sample->freq; +} + +static int intel_pstate_set_policy(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + int min, max; + + cpu = all_cpu_data[policy->cpu]; + + intel_pstate_get_min_max(cpu, &min, &max); + + limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq; + limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100); + limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); + + limits.max_perf_pct = policy->max * 100 / policy->cpuinfo.max_freq; + limits.max_perf_pct = clamp_t(int, limits.max_perf_pct, 0 , 100); + limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); + + if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + limits.min_perf_pct = 100; + limits.min_perf = int_tofp(1); + limits.max_perf_pct = 100; + limits.max_perf = int_tofp(1); + limits.no_turbo = 0; + } + + return 0; +} + +static int intel_pstate_verify_policy(struct cpufreq_policy *policy) +{ + cpufreq_verify_within_limits(policy, + policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + + if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && + (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) + return -EINVAL; + + return 0; +} + +static int __cpuinit intel_pstate_cpu_exit(struct cpufreq_policy *policy) +{ + int cpu = policy->cpu; + + del_timer(&all_cpu_data[cpu]->timer); + kfree(all_cpu_data[cpu]); + all_cpu_data[cpu] = NULL; + return 0; +} + +static int __cpuinit intel_pstate_cpu_init(struct cpufreq_policy *policy) +{ + int rc, min_pstate, max_pstate; + struct cpudata *cpu; + + rc = intel_pstate_init_cpu(policy->cpu); + if (rc) + return rc; + + cpu = all_cpu_data[policy->cpu]; + + if (!limits.no_turbo && + limits.min_perf_pct == 100 && limits.max_perf_pct == 100) + policy->policy = CPUFREQ_POLICY_PERFORMANCE; + else + policy->policy = CPUFREQ_POLICY_POWERSAVE; + + intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate); + policy->min = min_pstate * 100000; + policy->max = max_pstate * 100000; + + /* cpuinfo and default policy values */ + policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000; + policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + cpumask_set_cpu(policy->cpu, policy->cpus); + + return 0; +} + +static struct cpufreq_driver intel_pstate_driver = { + .flags = CPUFREQ_CONST_LOOPS, + .verify = intel_pstate_verify_policy, + .setpolicy = intel_pstate_set_policy, + .get = intel_pstate_get, + .init = intel_pstate_cpu_init, + .exit = intel_pstate_cpu_exit, + .name = "intel_pstate", + .owner = THIS_MODULE, +}; + +static void intel_pstate_exit(void) +{ + int cpu; + + sysfs_remove_group(intel_pstate_kobject, + &intel_pstate_attr_group); + debugfs_remove_recursive(debugfs_parent); + + cpufreq_unregister_driver(&intel_pstate_driver); + + if (!all_cpu_data) + return; + + get_online_cpus(); + for_each_online_cpu(cpu) { + if (all_cpu_data[cpu]) { + del_timer_sync(&all_cpu_data[cpu]->timer); + kfree(all_cpu_data[cpu]); + } + } + + put_online_cpus(); + vfree(all_cpu_data); +} +module_exit(intel_pstate_exit); + +static int __init intel_pstate_init(void) +{ + int rc = 0; + const struct x86_cpu_id *id; + + id = x86_match_cpu(intel_pstate_cpu_ids); + if (!id) + return -ENODEV; + + pr_info("Intel P-state driver initializing.\n"); + + all_cpu_data = vmalloc(sizeof(void *) * num_possible_cpus()); + if (!all_cpu_data) + return -ENOMEM; + memset(all_cpu_data, 0, sizeof(void *) * num_possible_cpus()); + + rc = cpufreq_register_driver(&intel_pstate_driver); + if (rc) + goto out; + + intel_pstate_debug_expose_params(); + intel_pstate_sysfs_expose_params(); + return rc; +out: + intel_pstate_exit(); + return -ENODEV; +} +device_initcall(intel_pstate_init); + +MODULE_AUTHOR("Dirk Brandewie "); +MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c new file mode 100644 index 000000000000..0e83e3c24f5b --- /dev/null +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -0,0 +1,259 @@ +/* + * kirkwood_freq.c: cpufreq driver for the Marvell kirkwood + * + * Copyright (C) 2013 Andrew Lunn + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CPU_SW_INT_BLK BIT(28) + +static struct priv +{ + struct clk *cpu_clk; + struct clk *ddr_clk; + struct clk *powersave_clk; + struct device *dev; + void __iomem *base; +} priv; + +#define STATE_CPU_FREQ 0x01 +#define STATE_DDR_FREQ 0x02 + +/* + * Kirkwood can swap the clock to the CPU between two clocks: + * + * - cpu clk + * - ddr clk + * + * The frequencies are set at runtime before registering this * + * table. + */ +static struct cpufreq_frequency_table kirkwood_freq_table[] = { + {STATE_CPU_FREQ, 0}, /* CPU uses cpuclk */ + {STATE_DDR_FREQ, 0}, /* CPU uses ddrclk */ + {0, CPUFREQ_TABLE_END}, +}; + +static unsigned int kirkwood_cpufreq_get_cpu_frequency(unsigned int cpu) +{ + if (__clk_is_enabled(priv.powersave_clk)) + return kirkwood_freq_table[1].frequency; + return kirkwood_freq_table[0].frequency; +} + +static void kirkwood_cpufreq_set_cpu_state(unsigned int index) +{ + struct cpufreq_freqs freqs; + unsigned int state = kirkwood_freq_table[index].index; + unsigned long reg; + + freqs.old = kirkwood_cpufreq_get_cpu_frequency(0); + freqs.new = kirkwood_freq_table[index].frequency; + freqs.cpu = 0; /* Kirkwood is UP */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + dev_dbg(priv.dev, "Attempting to set frequency to %i KHz\n", + kirkwood_freq_table[index].frequency); + dev_dbg(priv.dev, "old frequency was %i KHz\n", + kirkwood_cpufreq_get_cpu_frequency(0)); + + if (freqs.old != freqs.new) { + local_irq_disable(); + + /* Disable interrupts to the CPU */ + reg = readl_relaxed(priv.base); + reg |= CPU_SW_INT_BLK; + writel_relaxed(reg, priv.base); + + switch (state) { + case STATE_CPU_FREQ: + clk_disable(priv.powersave_clk); + break; + case STATE_DDR_FREQ: + clk_enable(priv.powersave_clk); + break; + } + + /* Wait-for-Interrupt, while the hardware changes frequency */ + cpu_do_idle(); + + /* Enable interrupts to the CPU */ + reg = readl_relaxed(priv.base); + reg &= ~CPU_SW_INT_BLK; + writel_relaxed(reg, priv.base); + + local_irq_enable(); + } + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +}; + +static int kirkwood_cpufreq_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, kirkwood_freq_table); +} + +static int kirkwood_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int index = 0; + + if (cpufreq_frequency_table_target(policy, kirkwood_freq_table, + target_freq, relation, &index)) + return -EINVAL; + + kirkwood_cpufreq_set_cpu_state(index); + + return 0; +} + +/* Module init and exit code */ +static int kirkwood_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + int result; + + /* cpuinfo and default policy values */ + policy->cpuinfo.transition_latency = 5000; /* 5uS */ + policy->cur = kirkwood_cpufreq_get_cpu_frequency(0); + + result = cpufreq_frequency_table_cpuinfo(policy, kirkwood_freq_table); + if (result) + return result; + + cpufreq_frequency_table_get_attr(kirkwood_freq_table, policy->cpu); + + return 0; +} + +static int kirkwood_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static struct freq_attr *kirkwood_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver kirkwood_cpufreq_driver = { + .get = kirkwood_cpufreq_get_cpu_frequency, + .verify = kirkwood_cpufreq_verify, + .target = kirkwood_cpufreq_target, + .init = kirkwood_cpufreq_cpu_init, + .exit = kirkwood_cpufreq_cpu_exit, + .name = "kirkwood-cpufreq", + .owner = THIS_MODULE, + .attr = kirkwood_cpufreq_attr, +}; + +static int kirkwood_cpufreq_probe(struct platform_device *pdev) +{ + struct device_node *np; + struct resource *res; + int err; + + priv.dev = &pdev->dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Cannot get memory resource\n"); + return -ENODEV; + } + priv.base = devm_request_and_ioremap(&pdev->dev, res); + if (!priv.base) { + dev_err(&pdev->dev, "Cannot ioremap\n"); + return -EADDRNOTAVAIL; + } + + np = of_find_node_by_path("/cpus/cpu@0"); + if (!np) + return -ENODEV; + + priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk"); + if (IS_ERR(priv.cpu_clk)) { + dev_err(priv.dev, "Unable to get cpuclk"); + return PTR_ERR(priv.cpu_clk); + } + + clk_prepare_enable(priv.cpu_clk); + kirkwood_freq_table[0].frequency = clk_get_rate(priv.cpu_clk) / 1000; + + priv.ddr_clk = of_clk_get_by_name(np, "ddrclk"); + if (IS_ERR(priv.ddr_clk)) { + dev_err(priv.dev, "Unable to get ddrclk"); + err = PTR_ERR(priv.ddr_clk); + goto out_cpu; + } + + clk_prepare_enable(priv.ddr_clk); + kirkwood_freq_table[1].frequency = clk_get_rate(priv.ddr_clk) / 1000; + + priv.powersave_clk = of_clk_get_by_name(np, "powersave"); + if (IS_ERR(priv.powersave_clk)) { + dev_err(priv.dev, "Unable to get powersave"); + err = PTR_ERR(priv.powersave_clk); + goto out_ddr; + } + clk_prepare(priv.powersave_clk); + + of_node_put(np); + np = NULL; + + err = cpufreq_register_driver(&kirkwood_cpufreq_driver); + if (!err) + return 0; + + dev_err(priv.dev, "Failed to register cpufreq driver"); + + clk_disable_unprepare(priv.powersave_clk); +out_ddr: + clk_disable_unprepare(priv.ddr_clk); +out_cpu: + clk_disable_unprepare(priv.cpu_clk); + of_node_put(np); + + return err; +} + +static int kirkwood_cpufreq_remove(struct platform_device *pdev) +{ + cpufreq_unregister_driver(&kirkwood_cpufreq_driver); + + clk_disable_unprepare(priv.powersave_clk); + clk_disable_unprepare(priv.ddr_clk); + clk_disable_unprepare(priv.cpu_clk); + + return 0; +} + +static struct platform_driver kirkwood_cpufreq_platform_driver = { + .probe = kirkwood_cpufreq_probe, + .remove = kirkwood_cpufreq_remove, + .driver = { + .name = "kirkwood-cpufreq", + .owner = THIS_MODULE, + }, +}; + +module_platform_driver(kirkwood_cpufreq_platform_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Andrew Lunn cpus, cpu_online_mask); + cpumask_setall(policy->cpus); cpufreq_frequency_table_get_attr(maple_cpu_freqs, policy->cpu); return cpufreq_frequency_table_cpuinfo(policy, diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index 97102b05843f..9128c07bafba 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -214,10 +214,8 @@ static int __cpuinit omap_cpu_init(struct cpufreq_policy *policy) * interface to handle this scenario. Additional is_smp() check * is to keep SMP_ON_UP build working. */ - if (is_smp()) { - policy->shared_type = CPUFREQ_SHARED_TYPE_ANY; + if (is_smp()) cpumask_setall(policy->cpus); - } /* FIXME: what's the actual transition time? */ policy->cpuinfo.transition_latency = 300 * 1000; diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 056faf6af1a9..d13a13678b5f 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -1249,39 +1249,59 @@ static struct cpufreq_driver cpufreq_amd64_driver = { .attr = powernow_k8_attr, }; +static void __request_acpi_cpufreq(void) +{ + const char *cur_drv, *drv = "acpi-cpufreq"; + + cur_drv = cpufreq_get_current_driver(); + if (!cur_drv) + goto request; + + if (strncmp(cur_drv, drv, min_t(size_t, strlen(cur_drv), strlen(drv)))) + pr_warn(PFX "WTF driver: %s\n", cur_drv); + + return; + + request: + pr_warn(PFX "This CPU is not supported anymore, using acpi-cpufreq instead.\n"); + request_module(drv); +} + /* driver entry point for init */ static int __cpuinit powernowk8_init(void) { unsigned int i, supported_cpus = 0; - int rv; + int ret; if (static_cpu_has(X86_FEATURE_HW_PSTATE)) { - pr_warn(PFX "this CPU is not supported anymore, using acpi-cpufreq instead.\n"); - request_module("acpi-cpufreq"); + __request_acpi_cpufreq(); return -ENODEV; } if (!x86_match_cpu(powernow_k8_ids)) return -ENODEV; + get_online_cpus(); for_each_online_cpu(i) { - int rc; - smp_call_function_single(i, check_supported_cpu, &rc, 1); - if (rc == 0) + smp_call_function_single(i, check_supported_cpu, &ret, 1); + if (!ret) supported_cpus++; } - if (supported_cpus != num_online_cpus()) + if (supported_cpus != num_online_cpus()) { + put_online_cpus(); return -ENODEV; + } + put_online_cpus(); - rv = cpufreq_register_driver(&cpufreq_amd64_driver); + ret = cpufreq_register_driver(&cpufreq_amd64_driver); + if (ret) + return ret; - if (!rv) - pr_info(PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", - num_online_nodes(), boot_cpu_data.x86_model_id, - supported_cpus); + pr_info(PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", + num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); - return rv; + return ret; } /* driver entry point for term */ diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c index 4575cfe41755..7e4d77327957 100644 --- a/drivers/cpufreq/spear-cpufreq.c +++ b/drivers/cpufreq/spear-cpufreq.c @@ -30,7 +30,7 @@ static struct { u32 cnt; } spear_cpufreq; -int spear_cpufreq_verify(struct cpufreq_policy *policy) +static int spear_cpufreq_verify(struct cpufreq_policy *policy) { return cpufreq_frequency_table_verify(policy, spear_cpufreq.freq_tbl); } @@ -157,7 +157,9 @@ static int spear_cpufreq_target(struct cpufreq_policy *policy, freqs.new = newfreq / 1000; freqs.new /= mult; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + for_each_cpu(freqs.cpu, policy->cpus) + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); if (mult == 2) ret = spear1340_set_cpu_rate(srcclk, newfreq); @@ -170,7 +172,8 @@ static int spear_cpufreq_target(struct cpufreq_policy *policy, freqs.new = clk_get_rate(spear_cpufreq.clk) / 1000; } - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + for_each_cpu(freqs.cpu, policy->cpus) + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); return ret; } @@ -188,8 +191,7 @@ static int spear_cpufreq_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = spear_cpufreq.transition_latency; policy->cur = spear_cpufreq_get(0); - cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu)); - cpumask_copy(policy->related_cpus, policy->cpus); + cpumask_setall(policy->cpus); return 0; } diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig new file mode 100644 index 000000000000..9545c9f03809 --- /dev/null +++ b/drivers/mailbox/Kconfig @@ -0,0 +1,19 @@ +menuconfig MAILBOX + bool "Mailbox Hardware Support" + help + Mailbox is a framework to control hardware communication between + on-chip processors through queued messages and interrupt driven + signals. Say Y if your platform supports hardware mailboxes. + +if MAILBOX +config PL320_MBOX + bool "ARM PL320 Mailbox" + depends on ARM_AMBA + help + An implementation of the ARM PL320 Interprocessor Communication + Mailbox (IPCM), tailored for the Calxeda Highbank. It is used to + send short messages between Highbank's A9 cores and the EnergyCore + Management Engine, primarily for cpufreq. Say Y here if you want + to use the PL320 IPCM support. + +endif diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile new file mode 100644 index 000000000000..543ad6a79505 --- /dev/null +++ b/drivers/mailbox/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_PL320_MBOX) += pl320-ipc.o diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c new file mode 100644 index 000000000000..c45b3aedafba --- /dev/null +++ b/drivers/mailbox/pl320-ipc.c @@ -0,0 +1,199 @@ +/* + * Copyright 2012 Calxeda, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define IPCMxSOURCE(m) ((m) * 0x40) +#define IPCMxDSET(m) (((m) * 0x40) + 0x004) +#define IPCMxDCLEAR(m) (((m) * 0x40) + 0x008) +#define IPCMxDSTATUS(m) (((m) * 0x40) + 0x00C) +#define IPCMxMODE(m) (((m) * 0x40) + 0x010) +#define IPCMxMSET(m) (((m) * 0x40) + 0x014) +#define IPCMxMCLEAR(m) (((m) * 0x40) + 0x018) +#define IPCMxMSTATUS(m) (((m) * 0x40) + 0x01C) +#define IPCMxSEND(m) (((m) * 0x40) + 0x020) +#define IPCMxDR(m, dr) (((m) * 0x40) + ((dr) * 4) + 0x024) + +#define IPCMMIS(irq) (((irq) * 8) + 0x800) +#define IPCMRIS(irq) (((irq) * 8) + 0x804) + +#define MBOX_MASK(n) (1 << (n)) +#define IPC_TX_MBOX 1 +#define IPC_RX_MBOX 2 + +#define CHAN_MASK(n) (1 << (n)) +#define A9_SOURCE 1 +#define M3_SOURCE 0 + +static void __iomem *ipc_base; +static int ipc_irq; +static DEFINE_MUTEX(ipc_m1_lock); +static DECLARE_COMPLETION(ipc_completion); +static ATOMIC_NOTIFIER_HEAD(ipc_notifier); + +static inline void set_destination(int source, int mbox) +{ + __raw_writel(CHAN_MASK(source), ipc_base + IPCMxDSET(mbox)); + __raw_writel(CHAN_MASK(source), ipc_base + IPCMxMSET(mbox)); +} + +static inline void clear_destination(int source, int mbox) +{ + __raw_writel(CHAN_MASK(source), ipc_base + IPCMxDCLEAR(mbox)); + __raw_writel(CHAN_MASK(source), ipc_base + IPCMxMCLEAR(mbox)); +} + +static void __ipc_send(int mbox, u32 *data) +{ + int i; + for (i = 0; i < 7; i++) + __raw_writel(data[i], ipc_base + IPCMxDR(mbox, i)); + __raw_writel(0x1, ipc_base + IPCMxSEND(mbox)); +} + +static u32 __ipc_rcv(int mbox, u32 *data) +{ + int i; + for (i = 0; i < 7; i++) + data[i] = __raw_readl(ipc_base + IPCMxDR(mbox, i)); + return data[1]; +} + +/* blocking implmentation from the A9 side, not usuable in interrupts! */ +int pl320_ipc_transmit(u32 *data) +{ + int ret; + + mutex_lock(&ipc_m1_lock); + + init_completion(&ipc_completion); + __ipc_send(IPC_TX_MBOX, data); + ret = wait_for_completion_timeout(&ipc_completion, + msecs_to_jiffies(1000)); + if (ret == 0) { + ret = -ETIMEDOUT; + goto out; + } + + ret = __ipc_rcv(IPC_TX_MBOX, data); +out: + mutex_unlock(&ipc_m1_lock); + return ret; +} +EXPORT_SYMBOL_GPL(pl320_ipc_transmit); + +static irqreturn_t ipc_handler(int irq, void *dev) +{ + u32 irq_stat; + u32 data[7]; + + irq_stat = __raw_readl(ipc_base + IPCMMIS(1)); + if (irq_stat & MBOX_MASK(IPC_TX_MBOX)) { + __raw_writel(0, ipc_base + IPCMxSEND(IPC_TX_MBOX)); + complete(&ipc_completion); + } + if (irq_stat & MBOX_MASK(IPC_RX_MBOX)) { + __ipc_rcv(IPC_RX_MBOX, data); + atomic_notifier_call_chain(&ipc_notifier, data[0], data + 1); + __raw_writel(2, ipc_base + IPCMxSEND(IPC_RX_MBOX)); + } + + return IRQ_HANDLED; +} + +int pl320_ipc_register_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&ipc_notifier, nb); +} +EXPORT_SYMBOL_GPL(pl320_ipc_register_notifier); + +int pl320_ipc_unregister_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&ipc_notifier, nb); +} +EXPORT_SYMBOL_GPL(pl320_ipc_unregister_notifier); + +static int __init pl320_probe(struct amba_device *adev, + const struct amba_id *id) +{ + int ret; + + ipc_base = ioremap(adev->res.start, resource_size(&adev->res)); + if (ipc_base == NULL) + return -ENOMEM; + + __raw_writel(0, ipc_base + IPCMxSEND(IPC_TX_MBOX)); + + ipc_irq = adev->irq[0]; + ret = request_irq(ipc_irq, ipc_handler, 0, dev_name(&adev->dev), NULL); + if (ret < 0) + goto err; + + /* Init slow mailbox */ + __raw_writel(CHAN_MASK(A9_SOURCE), + ipc_base + IPCMxSOURCE(IPC_TX_MBOX)); + __raw_writel(CHAN_MASK(M3_SOURCE), + ipc_base + IPCMxDSET(IPC_TX_MBOX)); + __raw_writel(CHAN_MASK(M3_SOURCE) | CHAN_MASK(A9_SOURCE), + ipc_base + IPCMxMSET(IPC_TX_MBOX)); + + /* Init receive mailbox */ + __raw_writel(CHAN_MASK(M3_SOURCE), + ipc_base + IPCMxSOURCE(IPC_RX_MBOX)); + __raw_writel(CHAN_MASK(A9_SOURCE), + ipc_base + IPCMxDSET(IPC_RX_MBOX)); + __raw_writel(CHAN_MASK(M3_SOURCE) | CHAN_MASK(A9_SOURCE), + ipc_base + IPCMxMSET(IPC_RX_MBOX)); + + return 0; +err: + iounmap(ipc_base); + return ret; +} + +static struct amba_id pl320_ids[] = { + { + .id = 0x00041320, + .mask = 0x000fffff, + }, + { 0, 0 }, +}; + +static struct amba_driver pl320_driver = { + .drv = { + .name = "pl320", + }, + .id_table = pl320_ids, + .probe = pl320_probe, +}; + +static int __init ipc_init(void) +{ + return amba_driver_register(&pl320_driver); +} +module_init(ipc_init); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a55b88eaf96a..a22944ca0526 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -89,11 +89,15 @@ struct cpufreq_real_policy { }; struct cpufreq_policy { - cpumask_var_t cpus; /* CPUs requiring sw coordination */ - cpumask_var_t related_cpus; /* CPUs with any coordination */ - unsigned int shared_type; /* ANY or ALL affected CPUs + /* CPUs sharing clock, require sw coordination */ + cpumask_var_t cpus; /* Online CPUs only */ + cpumask_var_t related_cpus; /* Online + Offline CPUs */ + + unsigned int shared_type; /* ACPI: ANY or ALL affected CPUs should set cpufreq */ - unsigned int cpu; /* cpu nr of registered CPU */ + unsigned int cpu; /* cpu nr of CPU managing this policy */ + unsigned int last_cpu; /* cpu nr of previous CPU that managed + * this policy */ struct cpufreq_cpuinfo cpuinfo;/* see above */ unsigned int min; /* in kHz */ @@ -112,16 +116,23 @@ struct cpufreq_policy { struct completion kobj_unregister; }; -#define CPUFREQ_ADJUST (0) -#define CPUFREQ_INCOMPATIBLE (1) -#define CPUFREQ_NOTIFY (2) -#define CPUFREQ_START (3) +#define CPUFREQ_ADJUST (0) +#define CPUFREQ_INCOMPATIBLE (1) +#define CPUFREQ_NOTIFY (2) +#define CPUFREQ_START (3) +#define CPUFREQ_UPDATE_POLICY_CPU (4) +/* Only for ACPI */ #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */ #define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */ #define CPUFREQ_SHARED_TYPE_ALL (2) /* All dependent CPUs should set freq */ #define CPUFREQ_SHARED_TYPE_ANY (3) /* Freq can be set from any dependent CPU*/ +static inline bool policy_is_shared(struct cpufreq_policy *policy) +{ + return cpumask_weight(policy->cpus) > 1; +} + /******************** cpufreq transition notifiers *******************/ #define CPUFREQ_PRECHANGE (0) @@ -173,6 +184,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, u_int mu struct cpufreq_governor { char name[CPUFREQ_NAME_LEN]; + int initialized; int (*governor) (struct cpufreq_policy *policy, unsigned int event); ssize_t (*show_setspeed) (struct cpufreq_policy *policy, @@ -308,6 +320,9 @@ __ATTR(_name, 0444, show_##_name, NULL) static struct global_attr _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) +struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu); +void cpufreq_cpu_put(struct cpufreq_policy *data); +const char *cpufreq_get_current_driver(void); /********************************************************************* * CPUFREQ 2.6. INTERFACE * @@ -397,14 +412,13 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, /* the following 3 funtions are for cpufreq core use only */ struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu); -struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu); -void cpufreq_cpu_put(struct cpufreq_policy *data); /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, unsigned int cpu); +void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy); void cpufreq_frequency_table_put_attr(unsigned int cpu); #endif /* _LINUX_CPUFREQ_H */ diff --git a/include/linux/mailbox.h b/include/linux/mailbox.h new file mode 100644 index 000000000000..5161f63ec1c8 --- /dev/null +++ b/include/linux/mailbox.h @@ -0,0 +1,17 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +int pl320_ipc_transmit(u32 *data); +int pl320_ipc_register_notifier(struct notifier_block *nb); +int pl320_ipc_unregister_notifier(struct notifier_block *nb);