From 5f98ced1c95e7706af6895f7b7b0d2216f075d59 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 9 Mar 2017 16:30:38 +0100
Subject: [PATCH 01/56] cpufreq: intel_pstate: Drop redundant wrapper function

intel_pstate_hwp_set_policy() is a wrapper around
intel_pstate_hwp_set(), but the only value it adds is to check
hwp_active before calling the latter and one of its two callers
has already checked hwp_active before that happens, so in that
code path the additional check is redundant and using the wrapper
is rather pointless.

For this reason, drop intel_pstate_hwp_set_policy() and make its
callers invoke intel_pstate_hwp_set() directly (after checking
hwp_active).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/intel_pstate.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3d37219a0dd7..162657228c15 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -930,14 +930,6 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 	}
 }
 
-static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy)
-{
-	if (hwp_active)
-		intel_pstate_hwp_set(policy);
-
-	return 0;
-}
-
 static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu_data = all_cpu_data[policy->cpu];
@@ -952,20 +944,17 @@ static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
 
 static int intel_pstate_resume(struct cpufreq_policy *policy)
 {
-	int ret;
-
 	if (!hwp_active)
 		return 0;
 
 	mutex_lock(&intel_pstate_limits_lock);
 
 	all_cpu_data[policy->cpu]->epp_policy = 0;
-
-	ret = intel_pstate_hwp_set_policy(policy);
+	intel_pstate_hwp_set(policy);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
-	return ret;
+	return 0;
 }
 
 static void intel_pstate_update_policies(void)
@@ -2169,7 +2158,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 
 	intel_pstate_set_update_util_hook(policy->cpu);
 
-	intel_pstate_hwp_set_policy(policy);
+	if (hwp_active)
+		intel_pstate_hwp_set(policy);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 

From 7db36b1c3ca0904c1673f2ccab4099b25637e49f Mon Sep 17 00:00:00 2001
From: David Wu <david.wu@rock-chips.com>
Date: Thu, 23 Feb 2017 20:33:11 +0800
Subject: [PATCH 02/56] PM / AVS: rockchip-io: add io selectors and supplies
 for rk3328

This adds the necessary data for handling io voltage domains on the rk3328.
As interesting tidbit, the rk3328 only contains one iodomain area in the
regular General Register Files (GRF).

Signed-off-by: David Wu <david.wu@rock-chips.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../bindings/power/rockchip-io-domain.txt     |  1 +
 drivers/power/avs/rockchip-io-domain.c        | 41 +++++++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
index d23dc002a87e..d3a5a93a65cd 100644
--- a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
+++ b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
@@ -33,6 +33,7 @@ Required properties:
 - compatible: should be one of:
   - "rockchip,rk3188-io-voltage-domain" for rk3188
   - "rockchip,rk3288-io-voltage-domain" for rk3288
+  - "rockchip,rk3328-io-voltage-domain" for rk3328
   - "rockchip,rk3368-io-voltage-domain" for rk3368
   - "rockchip,rk3368-pmu-io-voltage-domain" for rk3368 pmu-domains
   - "rockchip,rk3399-io-voltage-domain" for rk3399
diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c
index 56bce1908be2..85812521b6ba 100644
--- a/drivers/power/avs/rockchip-io-domain.c
+++ b/drivers/power/avs/rockchip-io-domain.c
@@ -43,6 +43,10 @@
 #define RK3288_SOC_CON2_FLASH0		BIT(7)
 #define RK3288_SOC_FLASH_SUPPLY_NUM	2
 
+#define RK3328_SOC_CON4			0x410
+#define RK3328_SOC_CON4_VCCIO2		BIT(7)
+#define RK3328_SOC_VCCIO2_SUPPLY_NUM	1
+
 #define RK3368_SOC_CON15		0x43c
 #define RK3368_SOC_CON15_FLASH0		BIT(14)
 #define RK3368_SOC_FLASH_SUPPLY_NUM	2
@@ -166,6 +170,25 @@ static void rk3288_iodomain_init(struct rockchip_iodomain *iod)
 		dev_warn(iod->dev, "couldn't update flash0 ctrl\n");
 }
 
+static void rk3328_iodomain_init(struct rockchip_iodomain *iod)
+{
+	int ret;
+	u32 val;
+
+	/* if no vccio2 supply we should leave things alone */
+	if (!iod->supplies[RK3328_SOC_VCCIO2_SUPPLY_NUM].reg)
+		return;
+
+	/*
+	 * set vccio2 iodomain to also use this framework
+	 * instead of a special gpio.
+	 */
+	val = RK3328_SOC_CON4_VCCIO2 | (RK3328_SOC_CON4_VCCIO2 << 16);
+	ret = regmap_write(iod->grf, RK3328_SOC_CON4, val);
+	if (ret < 0)
+		dev_warn(iod->dev, "couldn't update vccio2 vsel ctrl\n");
+}
+
 static void rk3368_iodomain_init(struct rockchip_iodomain *iod)
 {
 	int ret;
@@ -247,6 +270,20 @@ static const struct rockchip_iodomain_soc_data soc_data_rk3288 = {
 	.init = rk3288_iodomain_init,
 };
 
+static const struct rockchip_iodomain_soc_data soc_data_rk3328 = {
+	.grf_offset = 0x410,
+	.supply_names = {
+		"vccio1",
+		"vccio2",
+		"vccio3",
+		"vccio4",
+		"vccio5",
+		"vccio6",
+		"pmuio",
+	},
+	.init = rk3328_iodomain_init,
+};
+
 static const struct rockchip_iodomain_soc_data soc_data_rk3368 = {
 	.grf_offset = 0x900,
 	.supply_names = {
@@ -311,6 +348,10 @@ static const struct of_device_id rockchip_iodomain_match[] = {
 		.compatible = "rockchip,rk3288-io-voltage-domain",
 		.data = (void *)&soc_data_rk3288
 	},
+	{
+		.compatible = "rockchip,rk3328-io-voltage-domain",
+		.data = (void *)&soc_data_rk3328
+	},
 	{
 		.compatible = "rockchip,rk3368-io-voltage-domain",
 		.data = (void *)&soc_data_rk3368

From 08a74cbb1ba337fca6add5674506440c044b2c03 Mon Sep 17 00:00:00 2001
From: Daniel Kurtz <djkurtz@chromium.org>
Date: Thu, 2 Mar 2017 19:08:58 +0800
Subject: [PATCH 03/56] cpufreq: mt8173: Mark mt8173_cpufreq_driver_init as
 __init

This function is only called once at boot by device_initcall(), so mark
it as __init.

Signed-off-by: Daniel Kurtz <djkurtz@chromium.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/mt8173-cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index ab25b1235a5e..72bc1192bd30 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c
@@ -573,7 +573,7 @@ static struct platform_driver mt8173_cpufreq_platdrv = {
 	.probe		= mt8173_cpufreq_probe,
 };
 
-static int mt8173_cpufreq_driver_init(void)
+static int __init mt8173_cpufreq_driver_init(void)
 {
 	struct platform_device *pdev;
 	int err;

From cf9a2438257da2cbc55f82085b2e0add7583cf79 Mon Sep 17 00:00:00 2001
From: Daniel Kurtz <djkurtz@chromium.org>
Date: Thu, 2 Mar 2017 19:03:45 +0800
Subject: [PATCH 04/56] cpufreq: mediatek: Add support for MT8176 and MT817x

The Mediatek MT8173 is just one of several SOCs from the same MT817x
family, including the 6-core (4-little/2-big) MT8176.

The mt8173-cpufreq driver supports all of these SOCs, however,
machines using them may use a different machine compatible.

Since this driver checks explicitly for the machine compatible
string, add support for the whole family.

Signed-off-by: Daniel Kurtz <djkurtz@chromium.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/mt8173-cpufreq.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index 72bc1192bd30..fd1886faf33a 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c
@@ -573,14 +573,33 @@ static struct platform_driver mt8173_cpufreq_platdrv = {
 	.probe		= mt8173_cpufreq_probe,
 };
 
+/* List of machines supported by this driver */
+static const struct of_device_id mt8173_cpufreq_machines[] __initconst = {
+	{ .compatible = "mediatek,mt817x", },
+	{ .compatible = "mediatek,mt8173", },
+	{ .compatible = "mediatek,mt8176", },
+
+	{ }
+};
+
 static int __init mt8173_cpufreq_driver_init(void)
 {
+	struct device_node *np;
+	const struct of_device_id *match;
 	struct platform_device *pdev;
 	int err;
 
-	if (!of_machine_is_compatible("mediatek,mt8173"))
+	np = of_find_node_by_path("/");
+	if (!np)
 		return -ENODEV;
 
+	match = of_match_node(mt8173_cpufreq_machines, np);
+	of_node_put(np);
+	if (!match) {
+		pr_warn("Machine is not compatible with mt8173-cpufreq\n");
+		return -ENODEV;
+	}
+
 	err = platform_driver_register(&mt8173_cpufreq_platdrv);
 	if (err)
 		return err;

From b51d3388e2350138f6acc5cacda009a67f6f6352 Mon Sep 17 00:00:00 2001
From: YuanTian Tang <andy.tang@nxp.com>
Date: Fri, 10 Mar 2017 09:28:43 +0800
Subject: [PATCH 05/56] cpufreq: qoriq: enhance bus frequency calculation

On some platforms, property device-type may be missed in soc node
in dts which caused the bus-frequency can not be obtained correctly.

This patch enhanced the bus-frequency calculation. When property
device-type is missed in dts, bus-frequency will be obtained by
looking up clock table to get platform clock and hence get its
frequency.

Signed-off-by: Tang Yuantian <andy.tang@nxp.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/qoriq-cpufreq.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index bfec1bcd3835..e2ea433a5f9c 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -52,17 +52,27 @@ static u32 get_bus_freq(void)
 {
 	struct device_node *soc;
 	u32 sysfreq;
+	struct clk *pltclk;
+	int ret;
 
+	/* get platform freq by searching bus-frequency property */
 	soc = of_find_node_by_type(NULL, "soc");
-	if (!soc)
-		return 0;
+	if (soc) {
+		ret = of_property_read_u32(soc, "bus-frequency", &sysfreq);
+		of_node_put(soc);
+		if (!ret)
+			return sysfreq;
+	}
 
-	if (of_property_read_u32(soc, "bus-frequency", &sysfreq))
-		sysfreq = 0;
+	/* get platform freq by its clock name */
+	pltclk = clk_get(NULL, "cg-pll0-div1");
+	if (IS_ERR(pltclk)) {
+		pr_err("%s: can't get bus frequency %ld\n",
+		       __func__, PTR_ERR(pltclk));
+		return PTR_ERR(pltclk);
+	}
 
-	of_node_put(soc);
-
-	return sysfreq;
+	return clk_get_rate(pltclk);
 }
 
 static struct clk *cpu_to_clk(int cpu)

From 6f19363503541ee6020d35b468a998c213bace36 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 12 Mar 2017 14:16:47 +0100
Subject: [PATCH 06/56] MAINTAINERS: Add file patterns for cpufreq device tree
 bindings

Submitters of device tree binding documentation may forget to CC
the subsystem maintainer if this is missing.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c776906f67a9..fabed4c65542 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3450,6 +3450,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
 T:	git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates)
 B:	https://bugzilla.kernel.org
 F:	Documentation/cpu-freq/
+F:	Documentation/devicetree/bindings/cpufreq/
 F:	drivers/cpufreq/
 F:	include/linux/cpufreq.h
 F:	tools/testing/selftests/cpufreq/

From 994a8f2514e91c16616c4a1b53e9eb2b24de97b7 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 21 Feb 2017 10:15:18 +0530
Subject: [PATCH 07/56] cpufreq: schedutil: Redefine the rate_limit_us tunable

The rate_limit_us tunable is intended to reduce the possible overhead
from running the schedutil governor.  However, that overhead can be
divided into two separate parts: the governor computations and the
invocation of the scaling driver to set the CPU frequency.  The latter
is where the real overhead comes from.  The former is much less
expensive in terms of execution time and running it every time the
governor callback is invoked by the scheduler, after rate_limit_us
interval has passed since the last frequency update, would not be a
problem.

For this reason, redefine the rate_limit_us tunable so that it means the
minimum time that has to pass between two consecutive invocations of the
scaling driver by the schedutil governor (to set the CPU frequency).

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/sched/cpufreq_schedutil.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index cd7cd489f739..78468aa051ab 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -93,14 +93,13 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 {
 	struct cpufreq_policy *policy = sg_policy->policy;
 
-	sg_policy->last_freq_update_time = time;
-
 	if (policy->fast_switch_enabled) {
 		if (sg_policy->next_freq == next_freq) {
 			trace_cpu_frequency(policy->cur, smp_processor_id());
 			return;
 		}
 		sg_policy->next_freq = next_freq;
+		sg_policy->last_freq_update_time = time;
 		next_freq = cpufreq_driver_fast_switch(policy, next_freq);
 		if (next_freq == CPUFREQ_ENTRY_INVALID)
 			return;
@@ -109,6 +108,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 		trace_cpu_frequency(next_freq, smp_processor_id());
 	} else if (sg_policy->next_freq != next_freq) {
 		sg_policy->next_freq = next_freq;
+		sg_policy->last_freq_update_time = time;
 		sg_policy->work_in_progress = true;
 		irq_work_queue(&sg_policy->irq_work);
 	}

From cba1dfb57b94c234728b689d9b00d4267fa1a879 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 9 Mar 2017 09:34:54 +0530
Subject: [PATCH 08/56] cpufreq: schedutil: Refactor sugov_next_freq_shared()

The loop in sugov_next_freq_shared() contains an if block to skip the
loop for the current CPU. This turns out to be an unnecessary
conditional in the scheduler's hot-path for every CPU in the policy.

It would be better to drop the conditional and make the loop treat all
the CPUs in the same way. That would eliminate the need of calling
sugov_iowait_boost() at the top of the routine.

To keep the code optimized to return early if the current CPU has RT/DL
flags set, move the flags check to sugov_update_shared() instead in
order to avoid the function call entirely.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/sched/cpufreq_schedutil.c | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 78468aa051ab..f5ffe241812e 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -217,30 +217,19 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	sugov_update_commit(sg_policy, time, next_f);
 }
 
-static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
-					   unsigned long util, unsigned long max,
-					   unsigned int flags)
+static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
-	unsigned int max_f = policy->cpuinfo.max_freq;
 	u64 last_freq_update_time = sg_policy->last_freq_update_time;
+	unsigned long util = 0, max = 1;
 	unsigned int j;
 
-	if (flags & SCHED_CPUFREQ_RT_DL)
-		return max_f;
-
-	sugov_iowait_boost(sg_cpu, &util, &max);
-
 	for_each_cpu(j, policy->cpus) {
-		struct sugov_cpu *j_sg_cpu;
+		struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
 		unsigned long j_util, j_max;
 		s64 delta_ns;
 
-		if (j == smp_processor_id())
-			continue;
-
-		j_sg_cpu = &per_cpu(sugov_cpu, j);
 		/*
 		 * If the CPU utilization was last updated before the previous
 		 * frequency update and the time elapsed between the last update
@@ -254,7 +243,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
 			continue;
 		}
 		if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
-			return max_f;
+			return policy->cpuinfo.max_freq;
 
 		j_util = j_sg_cpu->util;
 		j_max = j_sg_cpu->max;
@@ -289,7 +278,11 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
 	sg_cpu->last_update = time;
 
 	if (sugov_should_update_freq(sg_policy, time)) {
-		next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
+		if (flags & SCHED_CPUFREQ_RT_DL)
+			next_f = sg_policy->policy->cpuinfo.max_freq;
+		else
+			next_f = sugov_next_freq_shared(sg_cpu);
+
 		sugov_update_commit(sg_policy, time, next_f);
 	}
 

From 19678ffb9fd6f216f530714e62fb469a961874db Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 14 Mar 2017 10:48:31 +0530
Subject: [PATCH 09/56] cpufreq: dbx500: Manage cooling device from cpufreq
 driver

The best place to register the CPU cooling device is from the cpufreq
driver as we would know if all the resources are already available or
not. That's what is done for the cpufreq-dt.c driver as well.

The cpu-cooling driver for dbx500 platform was just (un)registering
with the thermal framework and that can be handled easily by the cpufreq
driver as well and in proper sequence as well.

Get rid of the cooling driver and its its users and manage everything
from the cpufreq driver instead.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Tested-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm/boot/dts/ste-dbx5x0.dtsi        |   5 --
 drivers/cpufreq/dbx500-cpufreq.c         |  20 +++++
 drivers/thermal/Kconfig                  |  12 ---
 drivers/thermal/Makefile                 |   1 -
 drivers/thermal/db8500_cpufreq_cooling.c | 105 -----------------------
 5 files changed, 20 insertions(+), 123 deletions(-)
 delete mode 100644 drivers/thermal/db8500_cpufreq_cooling.c

diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi
index 82d8c4771293..9eea52013914 100644
--- a/arch/arm/boot/dts/ste-dbx5x0.dtsi
+++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi
@@ -1170,11 +1170,6 @@ external-bus@50000000 {
 			status = "disabled";
 		};
 
-		cpufreq-cooling {
-			compatible = "stericsson,db8500-cpufreq-cooling";
-			status = "disabled";
-		};
-
 		mcde@a0350000 {
 			compatible = "stericsson,mcde";
 			reg = <0xa0350000 0x1000>, /* MCDE */
diff --git a/drivers/cpufreq/dbx500-cpufreq.c b/drivers/cpufreq/dbx500-cpufreq.c
index 5c3ec1dd4921..3575b82210ba 100644
--- a/drivers/cpufreq/dbx500-cpufreq.c
+++ b/drivers/cpufreq/dbx500-cpufreq.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/cpufreq.h>
+#include <linux/cpu_cooling.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
@@ -18,6 +19,7 @@
 
 static struct cpufreq_frequency_table *freq_table;
 static struct clk *armss_clk;
+static struct thermal_cooling_device *cdev;
 
 static int dbx500_cpufreq_target(struct cpufreq_policy *policy,
 				unsigned int index)
@@ -32,6 +34,22 @@ static int dbx500_cpufreq_init(struct cpufreq_policy *policy)
 	return cpufreq_generic_init(policy, freq_table, 20 * 1000);
 }
 
+static int dbx500_cpufreq_exit(struct cpufreq_policy *policy)
+{
+	if (!IS_ERR(cdev))
+		cpufreq_cooling_unregister(cdev);
+	return 0;
+}
+
+static void dbx500_cpufreq_ready(struct cpufreq_policy *policy)
+{
+	cdev = cpufreq_cooling_register(policy->cpus);
+	if (IS_ERR(cdev))
+		pr_err("Failed to register cooling device %ld\n", PTR_ERR(cdev));
+	else
+		pr_info("Cooling device registered: %s\n", cdev->type);
+}
+
 static struct cpufreq_driver dbx500_cpufreq_driver = {
 	.flags  = CPUFREQ_STICKY | CPUFREQ_CONST_LOOPS |
 			CPUFREQ_NEED_INITIAL_FREQ_CHECK,
@@ -39,6 +57,8 @@ static struct cpufreq_driver dbx500_cpufreq_driver = {
 	.target_index = dbx500_cpufreq_target,
 	.get    = cpufreq_generic_get,
 	.init   = dbx500_cpufreq_init,
+	.exit  = dbx500_cpufreq_exit,
+	.ready  = dbx500_cpufreq_ready,
 	.name   = "DBX500",
 	.attr   = cpufreq_generic_attr,
 };
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 776b34396144..0a16cf4bed39 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -291,18 +291,6 @@ config ARMADA_THERMAL
 	  Enable this option if you want to have support for thermal management
 	  controller present in Armada 370 and Armada XP SoC.
 
-config DB8500_CPUFREQ_COOLING
-	tristate "DB8500 cpufreq cooling"
-	depends on ARCH_U8500 || COMPILE_TEST
-	depends on HAS_IOMEM
-	depends on CPU_THERMAL
-	default y
-	help
-	  Adds DB8500 cpufreq cooling devices, and these cooling devices can be
-	  bound to thermal zone trip points. When a trip point reached, the
-	  bound cpufreq cooling device turns active to set CPU frequency low to
-	  cool down the CPU.
-
 config INTEL_POWERCLAMP
 	tristate "Intel PowerClamp idle injection driver"
 	depends on THERMAL
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 7adae2029355..c2372f10dae5 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -41,7 +41,6 @@ obj-$(CONFIG_TANGO_THERMAL)	+= tango_thermal.o
 obj-$(CONFIG_IMX_THERMAL)	+= imx_thermal.o
 obj-$(CONFIG_MAX77620_THERMAL)	+= max77620_thermal.o
 obj-$(CONFIG_QORIQ_THERMAL)	+= qoriq_thermal.o
-obj-$(CONFIG_DB8500_CPUFREQ_COOLING)	+= db8500_cpufreq_cooling.o
 obj-$(CONFIG_INTEL_POWERCLAMP)	+= intel_powerclamp.o
 obj-$(CONFIG_X86_PKG_TEMP_THERMAL)	+= x86_pkg_temp_thermal.o
 obj-$(CONFIG_INTEL_SOC_DTS_IOSF_CORE)	+= intel_soc_dts_iosf.o
diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c
deleted file mode 100644
index e58bd0b658b5..000000000000
--- a/drivers/thermal/db8500_cpufreq_cooling.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * db8500_cpufreq_cooling.c - DB8500 cpufreq works as cooling device.
- *
- * Copyright (C) 2012 ST-Ericsson
- * Copyright (C) 2012 Linaro Ltd.
- *
- * Author: Hongbo Zhang <hongbo.zhang@linaro.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/cpu_cooling.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-
-static int db8500_cpufreq_cooling_probe(struct platform_device *pdev)
-{
-	struct thermal_cooling_device *cdev;
-
-	cdev = cpufreq_cooling_register(cpu_present_mask);
-	if (IS_ERR(cdev)) {
-		int ret = PTR_ERR(cdev);
-
-		if (ret != -EPROBE_DEFER)
-			dev_err(&pdev->dev,
-				"Failed to register cooling device %d\n",
-				ret);
-				
-		return ret;
-	}
-
-	platform_set_drvdata(pdev, cdev);
-
-	dev_info(&pdev->dev, "Cooling device registered: %s\n",	cdev->type);
-
-	return 0;
-}
-
-static int db8500_cpufreq_cooling_remove(struct platform_device *pdev)
-{
-	struct thermal_cooling_device *cdev = platform_get_drvdata(pdev);
-
-	cpufreq_cooling_unregister(cdev);
-
-	return 0;
-}
-
-static int db8500_cpufreq_cooling_suspend(struct platform_device *pdev,
-		pm_message_t state)
-{
-	return -ENOSYS;
-}
-
-static int db8500_cpufreq_cooling_resume(struct platform_device *pdev)
-{
-	return -ENOSYS;
-}
-
-#ifdef CONFIG_OF
-static const struct of_device_id db8500_cpufreq_cooling_match[] = {
-	{ .compatible = "stericsson,db8500-cpufreq-cooling" },
-	{},
-};
-MODULE_DEVICE_TABLE(of, db8500_cpufreq_cooling_match);
-#endif
-
-static struct platform_driver db8500_cpufreq_cooling_driver = {
-	.driver = {
-		.name = "db8500-cpufreq-cooling",
-		.of_match_table = of_match_ptr(db8500_cpufreq_cooling_match),
-	},
-	.probe = db8500_cpufreq_cooling_probe,
-	.suspend = db8500_cpufreq_cooling_suspend,
-	.resume = db8500_cpufreq_cooling_resume,
-	.remove = db8500_cpufreq_cooling_remove,
-};
-
-static int __init db8500_cpufreq_cooling_init(void)
-{
-	return platform_driver_register(&db8500_cpufreq_cooling_driver);
-}
-
-static void __exit db8500_cpufreq_cooling_exit(void)
-{
-	platform_driver_unregister(&db8500_cpufreq_cooling_driver);
-}
-
-/* Should be later than db8500_cpufreq_register */
-late_initcall(db8500_cpufreq_cooling_init);
-module_exit(db8500_cpufreq_cooling_exit);
-
-MODULE_AUTHOR("Hongbo Zhang <hongbo.zhang@stericsson.com>");
-MODULE_DESCRIPTION("DB8500 cpufreq cooling driver");
-MODULE_LICENSE("GPL");

From b7eaf1aab9f8bd2e49fceed77ebc66c1b5800718 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 22 Mar 2017 00:08:50 +0100
Subject: [PATCH 10/56] cpufreq: schedutil: Avoid reducing frequency of busy
 CPUs prematurely

The way the schedutil governor uses the PELT metric causes it to
underestimate the CPU utilization in some cases.

That can be easily demonstrated by running kernel compilation on
a Sandy Bridge Intel processor, running turbostat in parallel with
it and looking at the values written to the MSR_IA32_PERF_CTL
register.  Namely, the expected result would be that when all CPUs
were 100% busy, all of them would be requested to run in the maximum
P-state, but observation shows that this clearly isn't the case.
The CPUs run in the maximum P-state for a while and then are
requested to run slower and go back to the maximum P-state after
a while again.  That causes the actual frequency of the processor to
visibly oscillate below the sustainable maximum in a jittery fashion
which clearly is not desirable.

That has been attributed to CPU utilization metric updates on task
migration that cause the total utilization value for the CPU to be
reduced by the utilization of the migrated task.  If that happens,
the schedutil governor may see a CPU utilization reduction and will
attempt to reduce the CPU frequency accordingly right away.  That
may be premature, though, for example if the system is generally
busy and there are other runnable tasks waiting to be run on that
CPU already.

This is unlikely to be an issue on systems where cpufreq policies are
shared between multiple CPUs, because in those cases the policy
utilization is computed as the maximum of the CPU utilization values
over the whole policy and if that turns out to be low, reducing the
frequency for the policy most likely is a good idea anyway.  On
systems with one CPU per policy, however, it may affect performance
adversely and even lead to increased energy consumption in some cases.

On those systems it may be addressed by taking another utilization
metric into consideration, like whether or not the CPU whose
frequency is about to be reduced has been idle recently, because if
that's not the case, the CPU is likely to be busy in the near future
and its frequency should not be reduced.

To that end, use the counter of idle calls in the timekeeping code.
Namely, make the schedutil governor look at that counter for the
current CPU every time before its frequency is about to be reduced.
If the counter has not changed since the previous iteration of the
governor computations for that CPU, the CPU has been busy for all
that time and its frequency should not be decreased, so if the new
frequency would be lower than the one set previously, the governor
will skip the frequency update.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Joel Fernandes <joelaf@google.com>
---
 include/linux/tick.h             |  1 +
 kernel/sched/cpufreq_schedutil.c | 27 +++++++++++++++++++++++++++
 kernel/time/tick-sched.c         | 12 ++++++++++++
 3 files changed, 40 insertions(+)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index a04fea19676f..fe01e68bf520 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -117,6 +117,7 @@ extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
+extern unsigned long tick_nohz_get_idle_calls(void);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 #else /* !CONFIG_NO_HZ_COMMON */
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index f5ffe241812e..c1ffb5dc8af6 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -61,6 +61,11 @@ struct sugov_cpu {
 	unsigned long util;
 	unsigned long max;
 	unsigned int flags;
+
+	/* The field below is for single-CPU policies only. */
+#ifdef CONFIG_NO_HZ_COMMON
+	unsigned long saved_idle_calls;
+#endif
 };
 
 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
@@ -192,6 +197,19 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
 	sg_cpu->iowait_boost >>= 1;
 }
 
+#ifdef CONFIG_NO_HZ_COMMON
+static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
+{
+	unsigned long idle_calls = tick_nohz_get_idle_calls();
+	bool ret = idle_calls == sg_cpu->saved_idle_calls;
+
+	sg_cpu->saved_idle_calls = idle_calls;
+	return ret;
+}
+#else
+static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
+#endif /* CONFIG_NO_HZ_COMMON */
+
 static void sugov_update_single(struct update_util_data *hook, u64 time,
 				unsigned int flags)
 {
@@ -200,6 +218,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	struct cpufreq_policy *policy = sg_policy->policy;
 	unsigned long util, max;
 	unsigned int next_f;
+	bool busy;
 
 	sugov_set_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;
@@ -207,12 +226,20 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	if (!sugov_should_update_freq(sg_policy, time))
 		return;
 
+	busy = sugov_cpu_is_busy(sg_cpu);
+
 	if (flags & SCHED_CPUFREQ_RT_DL) {
 		next_f = policy->cpuinfo.max_freq;
 	} else {
 		sugov_get_util(&util, &max);
 		sugov_iowait_boost(sg_cpu, &util, &max);
 		next_f = get_next_freq(sg_policy, util, max);
+		/*
+		 * Do not reduce the frequency if the CPU has not been idle
+		 * recently, as the reduction is likely to be premature then.
+		 */
+		if (busy && next_f < sg_policy->next_freq)
+			next_f = sg_policy->next_freq;
 	}
 	sugov_update_commit(sg_policy, time, next_f);
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7fe53be86077..64c97fc130c4 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -993,6 +993,18 @@ ktime_t tick_nohz_get_sleep_length(void)
 	return ts->sleep_length;
 }
 
+/**
+ * tick_nohz_get_idle_calls - return the current idle calls counter value
+ *
+ * Called from the schedutil frequency scaling governor in scheduler context.
+ */
+unsigned long tick_nohz_get_idle_calls(void)
+{
+	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+	return ts->idle_calls;
+}
+
 static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 {
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE

From 38d4ea229d25d30be6bf41bcd6cd663a587866ca Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 22 Mar 2017 18:32:47 +0100
Subject: [PATCH 11/56] cpufreq: schedutil: Trace frequency only if it has
 changed

sugov_update_commit() calls trace_cpu_frequency() to record the
current CPU frequency if it has not changed in the fast switch case
to prevent utilities from getting confused (they may report that the
CPU is idle if the frequency has not been recorded for too long, for
example).

However, that may cause the tracepoint to be triggered quite often
for no real reason (if the frequency doesn't change, we will not
modify the last update time stamp and governor computations may
run again shortly when that happens), so don't do that (arguably, it
is done to work around a utilities bug anyway).

That allows code duplication in sugov_update_commit() to be reduced
somewhat too.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 kernel/sched/cpufreq_schedutil.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index c1ffb5dc8af6..1054f868d95c 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -98,22 +98,20 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 {
 	struct cpufreq_policy *policy = sg_policy->policy;
 
+	if (sg_policy->next_freq == next_freq)
+		return;
+
+	sg_policy->next_freq = next_freq;
+	sg_policy->last_freq_update_time = time;
+
 	if (policy->fast_switch_enabled) {
-		if (sg_policy->next_freq == next_freq) {
-			trace_cpu_frequency(policy->cur, smp_processor_id());
-			return;
-		}
-		sg_policy->next_freq = next_freq;
-		sg_policy->last_freq_update_time = time;
 		next_freq = cpufreq_driver_fast_switch(policy, next_freq);
 		if (next_freq == CPUFREQ_ENTRY_INVALID)
 			return;
 
 		policy->cur = next_freq;
 		trace_cpu_frequency(next_freq, smp_processor_id());
-	} else if (sg_policy->next_freq != next_freq) {
-		sg_policy->next_freq = next_freq;
-		sg_policy->last_freq_update_time = time;
+	} else {
 		sg_policy->work_in_progress = true;
 		irq_work_queue(&sg_policy->irq_work);
 	}

From eb5139d1a2272487b223c4879ecd2a1b48c2250e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 22 Mar 2017 23:52:18 +0100
Subject: [PATCH 12/56] cpufreq: intel_pstate: Support HWP processors in all
 operation modes

Currently, some processors supporting HWP are only supported by
intel_pstate if HWP is actually going to be used and not supported
otherwise which is confusing.

Specifically, they are not supported if "intel_pstate=no_hwp" is
passed to the kernel in the command line or if the driver is started
in the passive mode ("intel_pstate=passive").

There is no real reason for that, because everything about those
processor is known anyway and the driver can work with them in all
modes, so make that happen, but use the load-based P-state selection
algorithm for the active mode "powersave" policy with them.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 37 +++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 815f5577b32a..87725e2ac3ac 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2627,29 +2627,34 @@ static const struct x86_cpu_id hwp_support_ids[] __initconst = {
 
 static int __init intel_pstate_init(void)
 {
-	const struct x86_cpu_id *id;
-	struct cpu_defaults *cpu_def;
-	int rc = 0;
+	int rc;
 
 	if (no_load)
 		return -ENODEV;
 
-	if (x86_match_cpu(hwp_support_ids) && !no_hwp) {
+	if (x86_match_cpu(hwp_support_ids)) {
 		copy_cpu_funcs(&core_params.funcs);
-		hwp_active++;
-		intel_pstate.attr = hwp_cpufreq_attrs;
-		goto hwp_cpu_matched;
+		if (no_hwp) {
+			pstate_funcs.get_target_pstate = get_target_pstate_use_cpu_load;
+		} else {
+			hwp_active++;
+			intel_pstate.attr = hwp_cpufreq_attrs;
+			goto hwp_cpu_matched;
+		}
+	} else {
+		const struct x86_cpu_id *id;
+		struct cpu_defaults *cpu_def;
+
+		id = x86_match_cpu(intel_pstate_cpu_ids);
+		if (!id)
+			return -ENODEV;
+
+		cpu_def = (struct cpu_defaults *)id->driver_data;
+
+		copy_pid_params(&cpu_def->pid_policy);
+		copy_cpu_funcs(&cpu_def->funcs);
 	}
 
-	id = x86_match_cpu(intel_pstate_cpu_ids);
-	if (!id)
-		return -ENODEV;
-
-	cpu_def = (struct cpu_defaults *)id->driver_data;
-
-	copy_pid_params(&cpu_def->pid_policy);
-	copy_cpu_funcs(&cpu_def->funcs);
-
 	if (intel_pstate_msrs_not_valid())
 		return -ENODEV;
 

From 553953453b4b64fbccba31691257d006cee36613 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 22 Mar 2017 23:53:54 +0100
Subject: [PATCH 13/56] cpufreq: intel_pstate: Use load-based P-state selection
 more widely

Extend the set of systems for which intel_pstate will use the
"powersave" P-state selection algorithm based on CPU load in the
active mode by systems with ACPI preferred profile set to "tablet",
"appliance PC", "desktop", or "workstation" (ie. everything with a
specified preferred profile that is not a "server").

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 87725e2ac3ac..2ef02fd568a6 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2469,9 +2469,15 @@ static void __init copy_pid_params(struct pstate_adjust_policy *policy)
 #ifdef CONFIG_ACPI
 static void intel_pstate_use_acpi_profile(void)
 {
-	if (acpi_gbl_FADT.preferred_profile == PM_MOBILE)
+	switch (acpi_gbl_FADT.preferred_profile) {
+	case PM_MOBILE:
+	case PM_TABLET:
+	case PM_APPLIANCE_PC:
+	case PM_DESKTOP:
+	case PM_WORKSTATION:
 		pstate_funcs.get_target_pstate =
 				get_target_pstate_use_cpu_load;
+	}
 }
 #else
 static void intel_pstate_use_acpi_profile(void)

From c5a2ee7dde893e0a06044e75c16711f08d5c011d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 22 Mar 2017 23:58:57 +0100
Subject: [PATCH 14/56] cpufreq: intel_pstate: Active mode P-state limits
 rework

The coordination of P-state limits used by intel_pstate in the active
mode (ie. by default) is problematic, because it synchronizes all of
the limits (ie. the global ones and the per-policy ones) so as to use
one common pair of P-state limits (min and max) across all CPUs in
the system.  The drawbacks of that are as follows:

 - If P-states are coordinated in hardware, it is not necessary
   to coordinate them in software on top of that, so in that case
   all of the above activity is in vain.

 - If P-states are not coordinated in hardware, then the processor
   is actually capable of setting different P-states for different
   CPUs and coordinating them at the software level simply doesn't
   allow that capability to be utilized.

 - The coordination works in such a way that setting a per-policy
   limit (eg. scaling_max_freq) for one CPU causes the common
   effective limit to change (and it will affect all of the other
   CPUs too), but subsequent reads from the corresponding sysfs
   attributes for the other CPUs will return stale values (which
   is confusing).

 - Reads from the global P-state limit attributes, min_perf_pct and
   max_perf_pct, return the effective common values and not the last
   values set through these attributes.  However, the last values
   set through these attributes become hard limits that cannot be
   exceeded by writes to scaling_min_freq and scaling_max_freq,
   respectively, and they are not exposed, so essentially users
   have to remember what they are.

All of that is painful enough to warrant a change of the management
of P-state limits in the active mode.

To that end, redesign the active mode P-state limits management in
intel_pstate in accordance with the following rules:

 (1) All CPUs are affected by the global limits (that is, none of
     them can be requested to run faster than the global max and
     none of them can be requested to run slower than the global
     min).

 (2) Each individual CPU is affected by its own per-policy limits
     (that is, it cannot be requested to run faster than its own
     per-policy max and it cannot be requested to run slower than
     its own per-policy min).

 (3) The global and per-policy limits can be set independently.

Also, the global maximum and minimum P-state limits will be always
expressed as percentages of the maximum supported turbo P-state.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 185 +++++++++++++++------------------
 1 file changed, 85 insertions(+), 100 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 2ef02fd568a6..c0afa78624a1 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -187,44 +187,35 @@ struct _pid {
 
 /**
  * struct perf_limits - Store user and policy limits
- * @no_turbo:		User requested turbo state from intel_pstate sysfs
- * @turbo_disabled:	Platform turbo status either from msr
- *			MSR_IA32_MISC_ENABLE or when maximum available pstate
- *			matches the maximum turbo pstate
- * @max_perf_pct:	Effective maximum performance limit in percentage, this
- *			is minimum of either limits enforced by cpufreq policy
- *			or limits from user set limits via intel_pstate sysfs
- * @min_perf_pct:	Effective minimum performance limit in percentage, this
- *			is maximum of either limits enforced by cpufreq policy
- *			or limits from user set limits via intel_pstate sysfs
  * @max_perf:		This is a scaled value between 0 to 255 for max_perf_pct
  *			This value is used to limit max pstate
  * @min_perf:		This is a scaled value between 0 to 255 for min_perf_pct
  *			This value is used to limit min pstate
- * @max_policy_pct:	The maximum performance in percentage enforced by
- *			cpufreq setpolicy interface
- * @max_sysfs_pct:	The maximum performance in percentage enforced by
- *			intel pstate sysfs interface, unused when per cpu
- *			controls are enforced
- * @min_policy_pct:	The minimum performance in percentage enforced by
- *			cpufreq setpolicy interface
- * @min_sysfs_pct:	The minimum performance in percentage enforced by
- *			intel pstate sysfs interface, unused when per cpu
- *			controls are enforced
  *
- * Storage for user and policy defined limits.
+ * Storage for policy defined limits.
  */
 struct perf_limits {
-	int no_turbo;
-	int turbo_disabled;
-	int max_perf_pct;
-	int min_perf_pct;
 	int32_t max_perf;
 	int32_t min_perf;
-	int max_policy_pct;
-	int max_sysfs_pct;
-	int min_policy_pct;
-	int min_sysfs_pct;
+};
+
+/**
+ * struct global_params - Global parameters, mostly tunable via sysfs.
+ * @no_turbo:		Whether or not to use turbo P-states.
+ * @turbo_disabled:	Whethet or not turbo P-states are available at all,
+ *			based on the MSR_IA32_MISC_ENABLE value and whether or
+ *			not the maximum reported turbo P-state is different from
+ *			the maximum reported non-turbo one.
+ * @min_perf_pct:	Minimum capacity limit in percent of the maximum turbo
+ *			P-state capacity.
+ * @max_perf_pct:	Maximum capacity limit in percent of the maximum turbo
+ *			P-state capacity.
+ */
+struct global_params {
+	bool no_turbo;
+	bool turbo_disabled;
+	int max_perf_pct;
+	int min_perf_pct;
 };
 
 /**
@@ -245,9 +236,7 @@ struct perf_limits {
  * @prev_cummulative_iowait: IO Wait time difference from last and
  *			current sample
  * @sample:		Storage for storing last Sample data
- * @perf_limits:	Pointer to perf_limit unique to this CPU
- *			Not all field in the structure are applicable
- *			when per cpu controls are enforced
+ * @perf_limits:	Capacity limits unique to this CPU
  * @acpi_perf_data:	Stores ACPI perf information read from _PSS
  * @valid_pss_table:	Set to true for valid ACPI _PSS entries found
  * @epp_powersave:	Last saved HWP energy performance preference
@@ -279,7 +268,7 @@ struct cpudata {
 	u64	prev_tsc;
 	u64	prev_cummulative_iowait;
 	struct sample sample;
-	struct perf_limits *perf_limits;
+	struct perf_limits perf_limits;
 #ifdef CONFIG_ACPI
 	struct acpi_processor_performance acpi_perf_data;
 	bool valid_pss_table;
@@ -364,16 +353,7 @@ static bool driver_registered __read_mostly;
 static bool acpi_ppc;
 #endif
 
-static struct perf_limits global;
-
-static void intel_pstate_init_limits(struct perf_limits *limits)
-{
-	memset(limits, 0, sizeof(*limits));
-	limits->max_perf_pct = 100;
-	limits->max_perf = int_ext_tofp(1);
-	limits->max_policy_pct = 100;
-	limits->max_sysfs_pct = 100;
-}
+static struct global_params global;
 
 static DEFINE_MUTEX(intel_pstate_driver_lock);
 static DEFINE_MUTEX(intel_pstate_limits_lock);
@@ -621,6 +601,14 @@ static inline void update_turbo_state(void)
 		 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
 }
 
+static int min_perf_pct_min(void)
+{
+	struct cpudata *cpu = all_cpu_data[0];
+
+	return DIV_ROUND_UP(cpu->pstate.min_pstate * 100,
+			    cpu->pstate.turbo_pstate);
+}
+
 static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
 {
 	u64 epb;
@@ -841,16 +829,13 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
 static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 {
 	int min, hw_min, max, hw_max, cpu;
-	struct perf_limits *perf_limits = &global;
 	u64 value, cap;
 
 	for_each_cpu(cpu, policy->cpus) {
 		struct cpudata *cpu_data = all_cpu_data[cpu];
+		struct perf_limits *perf_limits = &cpu_data->perf_limits;
 		s16 epp;
 
-		if (per_cpu_limits)
-			perf_limits = all_cpu_data[cpu]->perf_limits;
-
 		rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
 		hw_min = HWP_LOWEST_PERF(cap);
 		if (global.no_turbo)
@@ -1163,6 +1148,15 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 
 	global.no_turbo = clamp_t(int, input, 0, 1);
 
+	if (global.no_turbo) {
+		struct cpudata *cpu = all_cpu_data[0];
+		int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate;
+
+		/* Squash the global minimum into the permitted range. */
+		if (global.min_perf_pct > pct)
+			global.min_perf_pct = pct;
+	}
+
 	mutex_unlock(&intel_pstate_limits_lock);
 
 	intel_pstate_update_policies();
@@ -1191,11 +1185,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_limits_lock);
 
-	global.max_sysfs_pct = clamp_t(int, input, 0 , 100);
-	global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct);
-	global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct);
-	global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct);
-	global.max_perf = percent_ext_fp(global.max_perf_pct);
+	global.max_perf_pct = clamp_t(int, input, global.min_perf_pct, 100);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
@@ -1225,11 +1215,8 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_limits_lock);
 
-	global.min_sysfs_pct = clamp_t(int, input, 0 , 100);
-	global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct);
-	global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct);
-	global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct);
-	global.min_perf = percent_ext_fp(global.min_perf_pct);
+	global.min_perf_pct = clamp_t(int, input,
+				      min_perf_pct_min(), global.max_perf_pct);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
@@ -1650,14 +1637,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 	int max_perf = cpu->pstate.turbo_pstate;
 	int max_perf_adj;
 	int min_perf;
-	struct perf_limits *perf_limits = &global;
+	struct perf_limits *perf_limits = &cpu->perf_limits;
 
 	if (global.no_turbo || global.turbo_disabled)
 		max_perf = cpu->pstate.max_pstate;
 
-	if (per_cpu_limits)
-		perf_limits = cpu->perf_limits;
-
 	/*
 	 * performance can be limited by user through sysfs, by cpufreq
 	 * policy, or by cpu specific default values determined through
@@ -1968,18 +1952,11 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 	cpu = all_cpu_data[cpunum];
 
 	if (!cpu) {
-		unsigned int size = sizeof(struct cpudata);
-
-		if (per_cpu_limits)
-			size += sizeof(struct perf_limits);
-
-		cpu = kzalloc(size, GFP_KERNEL);
+		cpu = kzalloc(sizeof(*cpu), GFP_KERNEL);
 		if (!cpu)
 			return -ENOMEM;
 
 		all_cpu_data[cpunum] = cpu;
-		if (per_cpu_limits)
-			cpu->perf_limits = (struct perf_limits *)(cpu + 1);
 
 		cpu->epp_default = -EINVAL;
 		cpu->epp_powersave = -EINVAL;
@@ -2045,8 +2022,9 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 }
 
 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
-					    struct perf_limits *limits)
+					    struct cpudata *cpu)
 {
+	struct perf_limits *limits = &cpu->perf_limits;
 	int32_t max_policy_perf, min_policy_perf;
 
 	max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq);
@@ -2061,29 +2039,45 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 	}
 
 	/* Normalize user input to [min_perf, max_perf] */
-	limits->min_perf = max(min_policy_perf,
-			       percent_ext_fp(limits->min_sysfs_pct));
-	limits->min_perf = min(limits->min_perf, max_policy_perf);
-	limits->max_perf = min(max_policy_perf,
-			       percent_ext_fp(limits->max_sysfs_pct));
-	limits->max_perf = max(min_policy_perf, limits->max_perf);
+	if (per_cpu_limits) {
+		limits->min_perf = min_policy_perf;
+		limits->max_perf = max_policy_perf;
+	} else {
+		int32_t global_min, global_max;
 
-	/* Make sure min_perf <= max_perf */
-	limits->min_perf = min(limits->min_perf, limits->max_perf);
+		/* Global limits are in percent of the maximum turbo P-state. */
+		global_max = percent_ext_fp(global.max_perf_pct);
+		global_min = percent_ext_fp(global.min_perf_pct);
+		if (policy->cpuinfo.max_freq != cpu->pstate.turbo_freq) {
+			int32_t turbo_factor;
+
+			turbo_factor = div_ext_fp(cpu->pstate.turbo_pstate,
+						  cpu->pstate.max_pstate);
+			global_min = mul_ext_fp(global_min, turbo_factor);
+			global_max = mul_ext_fp(global_max, turbo_factor);
+		}
+		global_min = clamp_t(int32_t, global_min, 0, global_max);
+
+		limits->min_perf = max(min_policy_perf, global_min);
+		limits->min_perf = min(limits->min_perf, max_policy_perf);
+		limits->max_perf = min(max_policy_perf, global_max);
+		limits->max_perf = max(min_policy_perf, limits->max_perf);
+
+		/* Make sure min_perf <= max_perf */
+		limits->min_perf = min(limits->min_perf, limits->max_perf);
+	}
 
 	limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
 	limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
-	limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100);
-	limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100);
 
 	pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
-		 limits->max_perf_pct, limits->min_perf_pct);
+		 fp_ext_toint(limits->max_perf * 100),
+		 fp_ext_toint(limits->min_perf * 100));
 }
 
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu;
-	struct perf_limits *perf_limits = &global;
 
 	if (!policy->cpuinfo.max_freq)
 		return -ENODEV;
@@ -2101,12 +2095,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 		policy->max = policy->cpuinfo.max_freq;
 	}
 
-	if (per_cpu_limits)
-		perf_limits = cpu->perf_limits;
-
 	mutex_lock(&intel_pstate_limits_lock);
 
-	intel_pstate_update_perf_limits(policy, perf_limits);
+	intel_pstate_update_perf_limits(policy, cpu);
 
 	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
 		/*
@@ -2142,17 +2133,6 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 	    policy->policy != CPUFREQ_POLICY_PERFORMANCE)
 		return -EINVAL;
 
-	/* When per-CPU limits are used, sysfs limits are not used */
-	if (!per_cpu_limits) {
-		unsigned int max_freq, min_freq;
-
-		max_freq = policy->cpuinfo.max_freq *
-					global.max_sysfs_pct / 100;
-		min_freq = policy->cpuinfo.max_freq *
-					global.min_sysfs_pct / 100;
-		cpufreq_verify_within_limits(policy, min_freq, max_freq);
-	}
-
 	return 0;
 }
 
@@ -2192,8 +2172,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 
 	cpu = all_cpu_data[policy->cpu];
 
-	if (per_cpu_limits)
-		intel_pstate_init_limits(cpu->perf_limits);
+	cpu->perf_limits.max_perf = int_ext_tofp(1);
+	cpu->perf_limits.min_perf = 0;
 
 	policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
@@ -2252,6 +2232,8 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 
 	cpufreq_verify_within_cpu_limits(policy);
 
+	intel_pstate_update_perf_limits(policy, cpu);
+
 	return 0;
 }
 
@@ -2354,7 +2336,8 @@ static int intel_pstate_register_driver(void)
 {
 	int ret;
 
-	intel_pstate_init_limits(&global);
+	memset(&global, 0, sizeof(global));
+	global.max_perf_pct = 100;
 
 	ret = cpufreq_register_driver(intel_pstate_driver);
 	if (ret) {
@@ -2362,6 +2345,8 @@ static int intel_pstate_register_driver(void)
 		return ret;
 	}
 
+	global.min_perf_pct = min_perf_pct_min();
+
 	mutex_lock(&intel_pstate_limits_lock);
 	driver_registered = true;
 	mutex_unlock(&intel_pstate_limits_lock);

From 80b120ca1a75c2df093d15936ab0591d90c99de9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 23 Mar 2017 00:00:47 +0100
Subject: [PATCH 15/56] cpufreq: intel_pstate: Avoid transient updates of
 cpuinfo.max_freq

Both intel_pstate_verify_policy() and intel_cpufreq_verify_policy()
set policy->cpuinfo.max_freq depending on the turbo status, but the
updates made by them are discarded by the core, because the policy
object passed to them by the core is temporary and cpuinfo.max_freq
from that object is not copied to the final policy object in
cpufreq_set_policy().

However, cpufreq_set_policy() passes the temporary policy object
to the ->setpolicy callback of the driver, so intel_pstate_set_policy()
actually sees the policy->cpuinfo.max_freq value updated by
intel_pstate_verify_policy() and not the final one.  It also
updates policy->max sometimes which basically has no effect after
it returns, because the core discards that update.

To avoid confusion, eliminate policy->cpuinfo.max_freq updates from
intel_pstate_verify_policy() and intel_cpufreq_verify_policy()
entirely and check the maximum frequency explicitly in
intel_pstate_update_perf_limits() instead of relying on the
transiently updated policy->cpuinfo.max_freq value.

Moreover, move the max->policy adjustment carried out in
intel_pstate_set_policy() to a separate function and call that
function from the ->verify driver callbacks to ensure that it will
actually be effective.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 47 ++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index c0afa78624a1..60544c210d75 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2021,19 +2021,25 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 	synchronize_sched();
 }
 
+static int intel_pstate_get_max_freq(struct cpudata *cpu)
+{
+	return global.turbo_disabled || global.no_turbo ?
+			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
+}
+
 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 					    struct cpudata *cpu)
 {
 	struct perf_limits *limits = &cpu->perf_limits;
+	int max_freq = intel_pstate_get_max_freq(cpu);
 	int32_t max_policy_perf, min_policy_perf;
 
-	max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq);
+	max_policy_perf = div_ext_fp(policy->max, max_freq);
 	max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1));
 	if (policy->max == policy->min) {
 		min_policy_perf = max_policy_perf;
 	} else {
-		min_policy_perf = div_ext_fp(policy->min,
-					     policy->cpuinfo.max_freq);
+		min_policy_perf = div_ext_fp(policy->min, max_freq);
 		min_policy_perf = clamp_t(int32_t, min_policy_perf,
 					  0, max_policy_perf);
 	}
@@ -2048,7 +2054,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 		/* Global limits are in percent of the maximum turbo P-state. */
 		global_max = percent_ext_fp(global.max_perf_pct);
 		global_min = percent_ext_fp(global.min_perf_pct);
-		if (policy->cpuinfo.max_freq != cpu->pstate.turbo_freq) {
+		if (max_freq != cpu->pstate.turbo_freq) {
 			int32_t turbo_factor;
 
 			turbo_factor = div_ext_fp(cpu->pstate.turbo_pstate,
@@ -2088,13 +2094,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	cpu = all_cpu_data[policy->cpu];
 	cpu->policy = policy->policy;
 
-	if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
-	    policy->max < policy->cpuinfo.max_freq &&
-	    policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) {
-		pr_debug("policy->max > max non turbo frequency\n");
-		policy->max = policy->cpuinfo.max_freq;
-	}
-
 	mutex_lock(&intel_pstate_limits_lock);
 
 	intel_pstate_update_perf_limits(policy, cpu);
@@ -2118,21 +2117,31 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	return 0;
 }
 
+static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy,
+					 struct cpudata *cpu)
+{
+	if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
+	    policy->max < policy->cpuinfo.max_freq &&
+	    policy->max > cpu->pstate.max_freq) {
+		pr_debug("policy->max > max non turbo frequency\n");
+		policy->max = policy->cpuinfo.max_freq;
+	}
+}
+
 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 
 	update_turbo_state();
-	policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ?
-					cpu->pstate.max_freq :
-					cpu->pstate.turbo_freq;
-
-	cpufreq_verify_within_cpu_limits(policy);
+	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+				     intel_pstate_get_max_freq(cpu));
 
 	if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
 	    policy->policy != CPUFREQ_POLICY_PERFORMANCE)
 		return -EINVAL;
 
+	intel_pstate_adjust_policy_max(policy, cpu);
+
 	return 0;
 }
 
@@ -2227,10 +2236,10 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 
 	update_turbo_state();
-	policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ?
-			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
+	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+				     intel_pstate_get_max_freq(cpu));
 
-	cpufreq_verify_within_cpu_limits(policy);
+	intel_pstate_adjust_policy_max(policy, cpu);
 
 	intel_pstate_update_perf_limits(policy, cpu);
 

From e14cf8857ebd7486a4e30fa7dad06ba187e6cb04 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:03:20 +0200
Subject: [PATCH 16/56] cpufreq: intel_pstate: Eliminate struct perf_limits

After recent changes the purpose of struct perf_limits is not
particularly clear any more and the code may be made somewhat
easier to follow by eliminating it, so go for that.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 59 +++++++++++++---------------------
 1 file changed, 23 insertions(+), 36 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 60544c210d75..a7ed42d6f366 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -185,20 +185,6 @@ struct _pid {
 	int32_t last_err;
 };
 
-/**
- * struct perf_limits - Store user and policy limits
- * @max_perf:		This is a scaled value between 0 to 255 for max_perf_pct
- *			This value is used to limit max pstate
- * @min_perf:		This is a scaled value between 0 to 255 for min_perf_pct
- *			This value is used to limit min pstate
- *
- * Storage for policy defined limits.
- */
-struct perf_limits {
-	int32_t max_perf;
-	int32_t min_perf;
-};
-
 /**
  * struct global_params - Global parameters, mostly tunable via sysfs.
  * @no_turbo:		Whether or not to use turbo P-states.
@@ -236,7 +222,10 @@ struct global_params {
  * @prev_cummulative_iowait: IO Wait time difference from last and
  *			current sample
  * @sample:		Storage for storing last Sample data
- * @perf_limits:	Capacity limits unique to this CPU
+ * @min_perf:		Minimum capacity limit as a fraction of the maximum
+ *			turbo P-state capacity.
+ * @max_perf:		Maximum capacity limit as a fraction of the maximum
+ *			turbo P-state capacity.
  * @acpi_perf_data:	Stores ACPI perf information read from _PSS
  * @valid_pss_table:	Set to true for valid ACPI _PSS entries found
  * @epp_powersave:	Last saved HWP energy performance preference
@@ -268,7 +257,8 @@ struct cpudata {
 	u64	prev_tsc;
 	u64	prev_cummulative_iowait;
 	struct sample sample;
-	struct perf_limits perf_limits;
+	int32_t	min_perf;
+	int32_t	max_perf;
 #ifdef CONFIG_ACPI
 	struct acpi_processor_performance acpi_perf_data;
 	bool valid_pss_table;
@@ -833,7 +823,6 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 
 	for_each_cpu(cpu, policy->cpus) {
 		struct cpudata *cpu_data = all_cpu_data[cpu];
-		struct perf_limits *perf_limits = &cpu_data->perf_limits;
 		s16 epp;
 
 		rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
@@ -843,11 +832,11 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 		else
 			hw_max = HWP_HIGHEST_PERF(cap);
 
-		max = fp_ext_toint(hw_max * perf_limits->max_perf);
+		max = fp_ext_toint(hw_max * cpu_data->max_perf);
 		if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
 			min = max;
 		else
-			min = fp_ext_toint(hw_max * perf_limits->min_perf);
+			min = fp_ext_toint(hw_max * cpu_data->min_perf);
 
 		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 
@@ -1637,7 +1626,6 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 	int max_perf = cpu->pstate.turbo_pstate;
 	int max_perf_adj;
 	int min_perf;
-	struct perf_limits *perf_limits = &cpu->perf_limits;
 
 	if (global.no_turbo || global.turbo_disabled)
 		max_perf = cpu->pstate.max_pstate;
@@ -1647,11 +1635,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 	 * policy, or by cpu specific default values determined through
 	 * experimentation.
 	 */
-	max_perf_adj = fp_ext_toint(max_perf * perf_limits->max_perf);
+	max_perf_adj = fp_ext_toint(max_perf * cpu->max_perf);
 	*max = clamp_t(int, max_perf_adj,
 			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
 
-	min_perf = fp_ext_toint(max_perf * perf_limits->min_perf);
+	min_perf = fp_ext_toint(max_perf * cpu->min_perf);
 	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
 }
 
@@ -2030,7 +2018,6 @@ static int intel_pstate_get_max_freq(struct cpudata *cpu)
 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 					    struct cpudata *cpu)
 {
-	struct perf_limits *limits = &cpu->perf_limits;
 	int max_freq = intel_pstate_get_max_freq(cpu);
 	int32_t max_policy_perf, min_policy_perf;
 
@@ -2046,8 +2033,8 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 
 	/* Normalize user input to [min_perf, max_perf] */
 	if (per_cpu_limits) {
-		limits->min_perf = min_policy_perf;
-		limits->max_perf = max_policy_perf;
+		cpu->min_perf = min_policy_perf;
+		cpu->max_perf = max_policy_perf;
 	} else {
 		int32_t global_min, global_max;
 
@@ -2064,21 +2051,21 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 		}
 		global_min = clamp_t(int32_t, global_min, 0, global_max);
 
-		limits->min_perf = max(min_policy_perf, global_min);
-		limits->min_perf = min(limits->min_perf, max_policy_perf);
-		limits->max_perf = min(max_policy_perf, global_max);
-		limits->max_perf = max(min_policy_perf, limits->max_perf);
+		cpu->min_perf = max(min_policy_perf, global_min);
+		cpu->min_perf = min(cpu->min_perf, max_policy_perf);
+		cpu->max_perf = min(max_policy_perf, global_max);
+		cpu->max_perf = max(min_policy_perf, cpu->max_perf);
 
 		/* Make sure min_perf <= max_perf */
-		limits->min_perf = min(limits->min_perf, limits->max_perf);
+		cpu->min_perf = min(cpu->min_perf, cpu->max_perf);
 	}
 
-	limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
-	limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
+	cpu->max_perf = round_up(cpu->max_perf, EXT_FRAC_BITS);
+	cpu->min_perf = round_up(cpu->min_perf, EXT_FRAC_BITS);
 
 	pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
-		 fp_ext_toint(limits->max_perf * 100),
-		 fp_ext_toint(limits->min_perf * 100));
+		 fp_ext_toint(cpu->max_perf * 100),
+		 fp_ext_toint(cpu->min_perf * 100));
 }
 
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
@@ -2181,8 +2168,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 
 	cpu = all_cpu_data[policy->cpu];
 
-	cpu->perf_limits.max_perf = int_ext_tofp(1);
-	cpu->perf_limits.min_perf = 0;
+	cpu->max_perf = int_ext_tofp(1);
+	cpu->min_perf = 0;
 
 	policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;

From 6404367862bb25730e373cb9d443757b76f6abcc Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:04:30 +0200
Subject: [PATCH 17/56] cpufreq: intel_pstate: Drop pointless initialization of
 PID parameters

The P-state selection algorithm used by intel_pstate for Atom
processors is not based on the PID controller and the initialization
of PID parametrs for those processors is pointless and confusing, so
drop it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 28 ++--------------------------
 1 file changed, 2 insertions(+), 26 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index a7ed42d6f366..efce4e7eeeca 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1540,14 +1540,6 @@ static struct cpu_defaults core_params = {
 };
 
 static const struct cpu_defaults silvermont_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 60,
-		.p_gain_pct = 14,
-		.d_gain_pct = 0,
-		.i_gain_pct = 4,
-	},
 	.funcs = {
 		.get_max = atom_get_max_pstate,
 		.get_max_physical = atom_get_max_pstate,
@@ -1561,14 +1553,6 @@ static const struct cpu_defaults silvermont_params = {
 };
 
 static const struct cpu_defaults airmont_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 60,
-		.p_gain_pct = 14,
-		.d_gain_pct = 0,
-		.i_gain_pct = 4,
-	},
 	.funcs = {
 		.get_max = atom_get_max_pstate,
 		.get_max_physical = atom_get_max_pstate,
@@ -1602,14 +1586,6 @@ static const struct cpu_defaults knl_params = {
 };
 
 static const struct cpu_defaults bxt_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 60,
-		.p_gain_pct = 14,
-		.d_gain_pct = 0,
-		.i_gain_pct = 4,
-	},
 	.funcs = {
 		.get_max = core_get_max_pstate,
 		.get_max_physical = core_get_max_pstate_physical,
@@ -2637,9 +2613,9 @@ static int __init intel_pstate_init(void)
 			return -ENODEV;
 
 		cpu_def = (struct cpu_defaults *)id->driver_data;
-
-		copy_pid_params(&cpu_def->pid_policy);
 		copy_cpu_funcs(&cpu_def->funcs);
+		if (pstate_funcs.get_target_pstate == get_target_pstate_use_performance)
+			copy_pid_params(&cpu_def->pid_policy);
 	}
 
 	if (intel_pstate_msrs_not_valid())

From 5c43905369bb85fd518363e743b68e2407d83f7c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:05:44 +0200
Subject: [PATCH 18/56] cpufreq: intel_pstate: Initialize pid_params statically

Notice that both the existing struct cpu_defaults instances in which
PID parameters are actually initialized use the same values of those
parameters, so it is not really necessary to copy them over to
pid_params dynamically.

Instead, initialize pid_params statically with those values and
drop the unused pid_policy member from struct cpu_defaults along
with copy_pid_params() used for initializing it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 42 ++++++++--------------------------
 1 file changed, 10 insertions(+), 32 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index efce4e7eeeca..01f8f289b882 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -321,19 +321,26 @@ struct pstate_funcs {
 
 /**
  * struct cpu_defaults- Per CPU model default config data
- * @pid_policy:	PID config data
  * @funcs:		Callback function data
  */
 struct cpu_defaults {
-	struct pstate_adjust_policy pid_policy;
 	struct pstate_funcs funcs;
 };
 
 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu);
 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
 
-static struct pstate_adjust_policy pid_params __read_mostly;
 static struct pstate_funcs pstate_funcs __read_mostly;
+static struct pstate_adjust_policy pid_params __read_mostly = {
+	.sample_rate_ms = 10,
+	.sample_rate_ns = 10 * NSEC_PER_MSEC,
+	.deadband = 0,
+	.setpoint = 97,
+	.p_gain_pct = 20,
+	.d_gain_pct = 0,
+	.i_gain_pct = 0,
+};
+
 static int hwp_active __read_mostly;
 static bool per_cpu_limits __read_mostly;
 
@@ -1520,14 +1527,6 @@ static int knl_get_turbo_pstate(void)
 }
 
 static struct cpu_defaults core_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 97,
-		.p_gain_pct = 20,
-		.d_gain_pct = 0,
-		.i_gain_pct = 0,
-	},
 	.funcs = {
 		.get_max = core_get_max_pstate,
 		.get_max_physical = core_get_max_pstate_physical,
@@ -1566,14 +1565,6 @@ static const struct cpu_defaults airmont_params = {
 };
 
 static const struct cpu_defaults knl_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 97,
-		.p_gain_pct = 20,
-		.d_gain_pct = 0,
-		.i_gain_pct = 0,
-	},
 	.funcs = {
 		.get_max = core_get_max_pstate,
 		.get_max_physical = core_get_max_pstate_physical,
@@ -2412,17 +2403,6 @@ static int __init intel_pstate_msrs_not_valid(void)
 	return 0;
 }
 
-static void __init copy_pid_params(struct pstate_adjust_policy *policy)
-{
-	pid_params.sample_rate_ms = policy->sample_rate_ms;
-	pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
-	pid_params.p_gain_pct = policy->p_gain_pct;
-	pid_params.i_gain_pct = policy->i_gain_pct;
-	pid_params.d_gain_pct = policy->d_gain_pct;
-	pid_params.deadband = policy->deadband;
-	pid_params.setpoint = policy->setpoint;
-}
-
 #ifdef CONFIG_ACPI
 static void intel_pstate_use_acpi_profile(void)
 {
@@ -2614,8 +2594,6 @@ static int __init intel_pstate_init(void)
 
 		cpu_def = (struct cpu_defaults *)id->driver_data;
 		copy_cpu_funcs(&cpu_def->funcs);
-		if (pstate_funcs.get_target_pstate == get_target_pstate_use_performance)
-			copy_pid_params(&cpu_def->pid_policy);
 	}
 
 	if (intel_pstate_msrs_not_valid())

From 4ddd0146c790e647a05ee5c734b82cb40ef26296 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:07:15 +0200
Subject: [PATCH 19/56] cpufreq: intel_pstate: Fold
 intel_pstate_reset_all_pid() into the caller

There is only one caller of intel_pstate_reset_all_pid(), which is
pid_param_set() used in the debugfs interface only, and having that
code split does not make it particularly convenient to follow.

For this reason, move the body of intel_pstate_reset_all_pid() into
its caller and drop that function.

Also change the loop from for_each_online_cpu() (which is obviously
racy with respect to CPU offline/online) to for_each_possible_cpu(),
so that all PID parameters are reset for all CPUs regardless of their
online/offline status (to prevent, for example, a previously offline
CPU from going online with a stale set of PID parameters).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 01f8f289b882..a5af890827eb 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -576,16 +576,6 @@ static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
 	pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
 }
 
-static inline void intel_pstate_reset_all_pid(void)
-{
-	unsigned int cpu;
-
-	for_each_online_cpu(cpu) {
-		if (all_cpu_data[cpu])
-			intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
-	}
-}
-
 static inline void update_turbo_state(void)
 {
 	u64 misc_en;
@@ -941,9 +931,14 @@ static void intel_pstate_update_policies(void)
 /************************** debugfs begin ************************/
 static int pid_param_set(void *data, u64 val)
 {
+	unsigned int cpu;
+
 	*(u32 *)data = val;
 	pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
-	intel_pstate_reset_all_pid();
+	for_each_possible_cpu(cpu)
+		if (all_cpu_data[cpu])
+			intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
+
 	return 0;
 }
 

From ff35f02ea1e3ac4e774f2784c1444fba4cf8e16a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:09:18 +0200
Subject: [PATCH 20/56] cpufreq: intel_pstate: Clean up
 intel_pstate_busy_pid_reset()

intel_pstate_busy_pid_reset() is the only caller of pid_reset(),
pid_p_gain_set(), pid_i_gain_set(), and pid_d_gain_set().  Moreover,
it passes constants as two parameters of pid_reset() and all of
the other routines above essentially contain the same code, so
fold all of them into the caller and drop unnecessary computations.

Introduce percent_fp() for converting integer values in percent
to fixed-point fractions and use it in the above code cleanup.

Finally, rename intel_pstate_busy_pid_reset() to
intel_pstate_pid_reset() as it also is used for the
initialization of PID parameters for every CPU and the
meaning of the "busy" part of the name is not particularly
clear.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 46 ++++++++++++----------------------
 1 file changed, 16 insertions(+), 30 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index a5af890827eb..5585a2d101a7 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -74,6 +74,11 @@ static inline int ceiling_fp(int32_t x)
 	return ret;
 }
 
+static inline int32_t percent_fp(int percent)
+{
+	return div_fp(percent, 100);
+}
+
 static inline u64 mul_ext_fp(u64 x, u64 y)
 {
 	return (x * y) >> EXT_FRAC_BITS;
@@ -507,29 +512,6 @@ static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 }
 #endif
 
-static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
-			     int deadband, int integral) {
-	pid->setpoint = int_tofp(setpoint);
-	pid->deadband  = int_tofp(deadband);
-	pid->integral  = int_tofp(integral);
-	pid->last_err  = int_tofp(setpoint) - int_tofp(busy);
-}
-
-static inline void pid_p_gain_set(struct _pid *pid, int percent)
-{
-	pid->p_gain = div_fp(percent, 100);
-}
-
-static inline void pid_i_gain_set(struct _pid *pid, int percent)
-{
-	pid->i_gain = div_fp(percent, 100);
-}
-
-static inline void pid_d_gain_set(struct _pid *pid, int percent)
-{
-	pid->d_gain = div_fp(percent, 100);
-}
-
 static signed int pid_calc(struct _pid *pid, int32_t busy)
 {
 	signed int result;
@@ -567,13 +549,17 @@ static signed int pid_calc(struct _pid *pid, int32_t busy)
 	return (signed int)fp_toint(result);
 }
 
-static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
+static inline void intel_pstate_pid_reset(struct cpudata *cpu)
 {
-	pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct);
-	pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct);
-	pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct);
+	struct _pid *pid = &cpu->pid;
 
-	pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
+	pid->p_gain = percent_fp(pid_params.p_gain_pct);
+	pid->d_gain = percent_fp(pid_params.d_gain_pct);
+	pid->i_gain = percent_fp(pid_params.i_gain_pct);
+	pid->setpoint = int_tofp(pid_params.setpoint);
+	pid->last_err  = pid->setpoint - int_tofp(100);
+	pid->deadband  = int_tofp(pid_params.deadband);
+	pid->integral  = 0;
 }
 
 static inline void update_turbo_state(void)
@@ -937,7 +923,7 @@ static int pid_param_set(void *data, u64 val)
 	pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
 	for_each_possible_cpu(cpu)
 		if (all_cpu_data[cpu])
-			intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
+			intel_pstate_pid_reset(all_cpu_data[cpu]);
 
 	return 0;
 }
@@ -1931,7 +1917,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 
 	intel_pstate_get_cpu_pstates(cpu);
 
-	intel_pstate_busy_pid_reset(cpu);
+	intel_pstate_pid_reset(cpu);
 
 	pr_debug("controlling: cpu %d\n", cpunum);
 

From 7aec5b50e97dd3e2e6ad8a87ccfd62ba8f49a105 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:10:47 +0200
Subject: [PATCH 21/56] cpufreq: intel_pstate: Set HWP sampling interval once

In the HWP enabled case pid_params.sample_rate_ns only needs to be
updated once, because it is global, so do that when setting hwp_active
instead of doing it during the initialization of every CPU.

Moreover, pid_params.sample_rate_ms is never used if HWP is enabled,
so do not update it at all then.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 5585a2d101a7..b631ab02f170 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1911,8 +1911,6 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 			intel_pstate_disable_ee(cpunum);
 
 		intel_pstate_hwp_enable(cpu);
-		pid_params.sample_rate_ms = 50;
-		pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
 	}
 
 	intel_pstate_get_cpu_pstates(cpu);
@@ -2563,6 +2561,7 @@ static int __init intel_pstate_init(void)
 		} else {
 			hwp_active++;
 			intel_pstate.attr = hwp_cpufreq_attrs;
+			pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
 			goto hwp_cpu_matched;
 		}
 	} else {

From 694cb173475a048a05daebf27cc8fdb7865c158b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:11:53 +0200
Subject: [PATCH 22/56] cpufreq: intel_pstate: Skip unnecessary PID resets on
 init

PID controller parameters only need to be initialized if the
get_target_pstate_use_performance() P-state selection routine
is going to be used.  It is not necessary to initialize them
otherwise, so don't do that.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b631ab02f170..ee61db93163c 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1911,12 +1911,12 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 			intel_pstate_disable_ee(cpunum);
 
 		intel_pstate_hwp_enable(cpu);
+	} else if (pstate_funcs.get_target_pstate == get_target_pstate_use_performance) {
+		intel_pstate_pid_reset(cpu);
 	}
 
 	intel_pstate_get_cpu_pstates(cpu);
 
-	intel_pstate_pid_reset(cpu);
-
 	pr_debug("controlling: cpu %d\n", cpunum);
 
 	return 0;

From ee8df89a68f27a7484f1fc71d3d69149dd4dd267 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:13:00 +0200
Subject: [PATCH 23/56] cpufreq: intel_pstate: Drop driver_registered variable

The driver_registered variable in intel_pstate is used for checking
whether or not the driver has been registered, but intel_pstate_driver
can be used for that too (with the rule that the driver is not
registered as long as it is NULL).

That is a bit more straightforward and the code may be simplified
a bit this way, so modify the driver accordingly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 46 ++++++++++++++--------------------
 1 file changed, 19 insertions(+), 27 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index ee61db93163c..73ccddf94cf4 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -349,7 +349,7 @@ static struct pstate_adjust_policy pid_params __read_mostly = {
 static int hwp_active __read_mostly;
 static bool per_cpu_limits __read_mostly;
 
-static bool driver_registered __read_mostly;
+static struct cpufreq_driver *intel_pstate_driver __read_mostly;
 
 #ifdef CONFIG_ACPI
 static bool acpi_ppc;
@@ -1035,7 +1035,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
@@ -1060,7 +1060,7 @@ static ssize_t show_num_pstates(struct kobject *kobj,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
@@ -1080,7 +1080,7 @@ static ssize_t show_no_turbo(struct kobject *kobj,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
@@ -1108,7 +1108,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
@@ -1155,7 +1155,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
@@ -1185,7 +1185,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
@@ -2255,7 +2255,7 @@ static struct cpufreq_driver intel_cpufreq = {
 	.name		= "intel_cpufreq",
 };
 
-static struct cpufreq_driver *intel_pstate_driver = &intel_pstate;
+static struct cpufreq_driver *default_driver = &intel_pstate;
 
 static void intel_pstate_driver_cleanup(void)
 {
@@ -2272,15 +2272,17 @@ static void intel_pstate_driver_cleanup(void)
 		}
 	}
 	put_online_cpus();
+	intel_pstate_driver = NULL;
 }
 
-static int intel_pstate_register_driver(void)
+static int intel_pstate_register_driver(struct cpufreq_driver *driver)
 {
 	int ret;
 
 	memset(&global, 0, sizeof(global));
 	global.max_perf_pct = 100;
 
+	intel_pstate_driver = driver;
 	ret = cpufreq_register_driver(intel_pstate_driver);
 	if (ret) {
 		intel_pstate_driver_cleanup();
@@ -2289,10 +2291,6 @@ static int intel_pstate_register_driver(void)
 
 	global.min_perf_pct = min_perf_pct_min();
 
-	mutex_lock(&intel_pstate_limits_lock);
-	driver_registered = true;
-	mutex_unlock(&intel_pstate_limits_lock);
-
 	if (intel_pstate_driver == &intel_pstate && !hwp_active &&
 	    pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
 		intel_pstate_debug_expose_params();
@@ -2309,10 +2307,6 @@ static int intel_pstate_unregister_driver(void)
 	    pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
 		intel_pstate_debug_hide_params();
 
-	mutex_lock(&intel_pstate_limits_lock);
-	driver_registered = false;
-	mutex_unlock(&intel_pstate_limits_lock);
-
 	cpufreq_unregister_driver(intel_pstate_driver);
 	intel_pstate_driver_cleanup();
 
@@ -2321,7 +2315,7 @@ static int intel_pstate_unregister_driver(void)
 
 static ssize_t intel_pstate_show_status(char *buf)
 {
-	if (!driver_registered)
+	if (!intel_pstate_driver)
 		return sprintf(buf, "off\n");
 
 	return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ?
@@ -2333,11 +2327,11 @@ static int intel_pstate_update_status(const char *buf, size_t size)
 	int ret;
 
 	if (size == 3 && !strncmp(buf, "off", size))
-		return driver_registered ?
+		return intel_pstate_driver ?
 			intel_pstate_unregister_driver() : -EINVAL;
 
 	if (size == 6 && !strncmp(buf, "active", size)) {
-		if (driver_registered) {
+		if (intel_pstate_driver) {
 			if (intel_pstate_driver == &intel_pstate)
 				return 0;
 
@@ -2346,12 +2340,11 @@ static int intel_pstate_update_status(const char *buf, size_t size)
 				return ret;
 		}
 
-		intel_pstate_driver = &intel_pstate;
-		return intel_pstate_register_driver();
+		return intel_pstate_register_driver(&intel_pstate);
 	}
 
 	if (size == 7 && !strncmp(buf, "passive", size)) {
-		if (driver_registered) {
+		if (intel_pstate_driver) {
 			if (intel_pstate_driver != &intel_pstate)
 				return 0;
 
@@ -2360,8 +2353,7 @@ static int intel_pstate_update_status(const char *buf, size_t size)
 				return ret;
 		}
 
-		intel_pstate_driver = &intel_cpufreq;
-		return intel_pstate_register_driver();
+		return intel_pstate_register_driver(&intel_cpufreq);
 	}
 
 	return -EINVAL;
@@ -2601,7 +2593,7 @@ static int __init intel_pstate_init(void)
 	intel_pstate_sysfs_expose_params();
 
 	mutex_lock(&intel_pstate_driver_lock);
-	rc = intel_pstate_register_driver();
+	rc = intel_pstate_register_driver(default_driver);
 	mutex_unlock(&intel_pstate_driver_lock);
 	if (rc)
 		return rc;
@@ -2622,7 +2614,7 @@ static int __init intel_pstate_setup(char *str)
 		no_load = 1;
 	} else if (!strcmp(str, "passive")) {
 		pr_info("Passive mode enabled\n");
-		intel_pstate_driver = &intel_cpufreq;
+		default_driver = &intel_cpufreq;
 		no_hwp = 1;
 	}
 	if (!strcmp(str, "no_hwp")) {

From 0042b2c0692ade097ef3a6bbffa491da5dc89273 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:14:08 +0200
Subject: [PATCH 24/56] cpufreq: intel_pstate: Modify check in
 intel_pstate_update_status()

One of the checks in intel_pstate_update_status() implicitly relies
on the information that there are only two struct cpufreq_driver
objects available, but it is better to do it directly against the
value it really is about (to make the code easier to follow if
nothing else).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 73ccddf94cf4..185006e9fbdb 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2345,7 +2345,7 @@ static int intel_pstate_update_status(const char *buf, size_t size)
 
 	if (size == 7 && !strncmp(buf, "passive", size)) {
 		if (intel_pstate_driver) {
-			if (intel_pstate_driver != &intel_pstate)
+			if (intel_pstate_driver == &intel_cpufreq)
 				return 0;
 
 			ret = intel_pstate_unregister_driver();

From eabd22c657f1d23c714f536b859a22a0f22ac7f5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:15:37 +0200
Subject: [PATCH 25/56] cpufreq: intel_pstate: Use different utilization update
 callbacks

Notice that some overhead in the utilization update callbacks
registered by intel_pstate in the active mode can be avoided if
those callbacks are tailored to specific configurations of the
driver.  For example, the utilization update callback for the HWP
enabled case only needs to update the average CPU performance
periodically whereas the utilization update callback for the
PID-based algorithm does not need to take IO-wait boosting into
account and so on.

With that in mind, define three utilization update callbacks for
three different use cases: HWP enabled, the CPU load "powersave"
P-state selection algorithm and the PID-based "powersave" P-state
selection algorithm and modify the driver initialization to
choose the callback matching its current configuration.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 79 +++++++++++++++++++++++-----------
 1 file changed, 54 insertions(+), 25 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 185006e9fbdb..ca7bc19bf10b 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -37,6 +37,9 @@
 #include <asm/cpufeature.h>
 #include <asm/intel-family.h>
 
+#define INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL	(10 * NSEC_PER_MSEC)
+#define INTEL_PSTATE_HWP_SAMPLING_INTERVAL	(50 * NSEC_PER_MSEC)
+
 #define INTEL_CPUFREQ_TRANSITION_LATENCY	20000
 
 #ifdef CONFIG_ACPI
@@ -1676,7 +1679,11 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
 	 * that sample.time will always be reset before setting the utilization
 	 * update hook and make the caller skip the sample then.
 	 */
-	return !!cpu->last_sample_time;
+	if (cpu->last_sample_time) {
+		intel_pstate_calc_avg_perf(cpu);
+		return true;
+	}
+	return false;
 }
 
 static inline int32_t get_avg_frequency(struct cpudata *cpu)
@@ -1783,7 +1790,7 @@ static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
 	wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
 }
 
-static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
+static void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 {
 	int from, target_pstate;
 	struct sample *sample;
@@ -1811,36 +1818,56 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 		fp_toint(cpu->iowait_boost * 100));
 }
 
+static void intel_pstate_update_util_hwp(struct update_util_data *data,
+					 u64 time, unsigned int flags)
+{
+	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
+	u64 delta_ns = time - cpu->sample.time;
+
+	if ((s64)delta_ns >= INTEL_PSTATE_HWP_SAMPLING_INTERVAL)
+		intel_pstate_sample(cpu, time);
+}
+
+static void intel_pstate_update_util_pid(struct update_util_data *data,
+					 u64 time, unsigned int flags)
+{
+	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
+	u64 delta_ns = time - cpu->sample.time;
+
+	if ((s64)delta_ns < pid_params.sample_rate_ns)
+		return;
+
+	if (intel_pstate_sample(cpu, time))
+		intel_pstate_adjust_busy_pstate(cpu);
+}
+
 static void intel_pstate_update_util(struct update_util_data *data, u64 time,
 				     unsigned int flags)
 {
 	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
 	u64 delta_ns;
 
-	if (pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load) {
-		if (flags & SCHED_CPUFREQ_IOWAIT) {
-			cpu->iowait_boost = int_tofp(1);
-		} else if (cpu->iowait_boost) {
-			/* Clear iowait_boost if the CPU may have been idle. */
-			delta_ns = time - cpu->last_update;
-			if (delta_ns > TICK_NSEC)
-				cpu->iowait_boost = 0;
-		}
-		cpu->last_update = time;
+	if (flags & SCHED_CPUFREQ_IOWAIT) {
+		cpu->iowait_boost = int_tofp(1);
+	} else if (cpu->iowait_boost) {
+		/* Clear iowait_boost if the CPU may have been idle. */
+		delta_ns = time - cpu->last_update;
+		if (delta_ns > TICK_NSEC)
+			cpu->iowait_boost = 0;
 	}
-
+	cpu->last_update = time;
 	delta_ns = time - cpu->sample.time;
-	if ((s64)delta_ns >= pid_params.sample_rate_ns) {
-		bool sample_taken = intel_pstate_sample(cpu, time);
+	if ((s64)delta_ns < INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL)
+		return;
 
-		if (sample_taken) {
-			intel_pstate_calc_avg_perf(cpu);
-			if (!hwp_active)
-				intel_pstate_adjust_busy_pstate(cpu);
-		}
-	}
+	if (intel_pstate_sample(cpu, time))
+		intel_pstate_adjust_busy_pstate(cpu);
 }
 
+/* Utilization update callback to register in the active mode. */
+static void (*update_util_cb)(struct update_util_data *data, u64 time,
+			      unsigned int flags) = intel_pstate_update_util;
+
 #define ICPU(model, policy) \
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
 			(unsigned long)&policy }
@@ -1938,8 +1965,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 
 	/* Prevent intel_pstate_update_util() from using stale data. */
 	cpu->sample.time = 0;
-	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
-				     intel_pstate_update_util);
+	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, update_util_cb);
 	cpu->update_util_set = true;
 }
 
@@ -2405,6 +2431,9 @@ static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
 	pstate_funcs.get_target_pstate = funcs->get_target_pstate;
 
 	intel_pstate_use_acpi_profile();
+
+	if (pstate_funcs.get_target_pstate == get_target_pstate_use_performance)
+		update_util_cb = intel_pstate_update_util_pid;
 }
 
 #ifdef CONFIG_ACPI
@@ -2549,11 +2578,11 @@ static int __init intel_pstate_init(void)
 	if (x86_match_cpu(hwp_support_ids)) {
 		copy_cpu_funcs(&core_params.funcs);
 		if (no_hwp) {
-			pstate_funcs.get_target_pstate = get_target_pstate_use_cpu_load;
+			update_util_cb = intel_pstate_update_util;
 		} else {
 			hwp_active++;
 			intel_pstate.attr = hwp_cpufreq_attrs;
-			pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
+			update_util_cb = intel_pstate_update_util_hwp;
 			goto hwp_cpu_matched;
 		}
 	} else {

From 67dd9bf4416305811d58280dbe108d78ab573d56 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:17:10 +0200
Subject: [PATCH 26/56] cpufreq: intel_pstate: Add update_util callback to
 pstate_funcs

Avoid using extra function pointers during P-state selection by
dropping the get_target_pstate member from struct pstate_funcs,
adding a new update_util callback to it (to be registered with
the CPU scheduler as the utilization update callback in the active
mode) and reworking the utilization update callback routines to
invoke specific P-state selection functions directly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 81 ++++++++++++++++++----------------
 1 file changed, 43 insertions(+), 38 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index ca7bc19bf10b..68ede1006b07 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -311,7 +311,7 @@ struct pstate_adjust_policy {
  * @get_scaling:	Callback to get frequency scaling factor
  * @get_val:		Callback to convert P state to actual MSR write value
  * @get_vid:		Callback to get VID data for Atom platforms
- * @get_target_pstate:	Callback to a function to calculate next P state to use
+ * @update_util:	Active mode utilization update callback.
  *
  * Core and Atom CPU models have different way to get P State limits. This
  * structure is used to store those callbacks.
@@ -324,7 +324,8 @@ struct pstate_funcs {
 	int (*get_scaling)(void);
 	u64 (*get_val)(struct cpudata*, int pstate);
 	void (*get_vid)(struct cpudata *);
-	int32_t (*get_target_pstate)(struct cpudata *);
+	void (*update_util)(struct update_util_data *data, u64 time,
+			    unsigned int flags);
 };
 
 /**
@@ -335,9 +336,6 @@ struct cpu_defaults {
 	struct pstate_funcs funcs;
 };
 
-static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu);
-static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
-
 static struct pstate_funcs pstate_funcs __read_mostly;
 static struct pstate_adjust_policy pid_params __read_mostly = {
 	.sample_rate_ms = 10,
@@ -1510,6 +1508,11 @@ static int knl_get_turbo_pstate(void)
 	return ret;
 }
 
+static void intel_pstate_update_util_pid(struct update_util_data *data,
+					 u64 time, unsigned int flags);
+static void intel_pstate_update_util(struct update_util_data *data, u64 time,
+				     unsigned int flags);
+
 static struct cpu_defaults core_params = {
 	.funcs = {
 		.get_max = core_get_max_pstate,
@@ -1518,7 +1521,7 @@ static struct cpu_defaults core_params = {
 		.get_turbo = core_get_turbo_pstate,
 		.get_scaling = core_get_scaling,
 		.get_val = core_get_val,
-		.get_target_pstate = get_target_pstate_use_performance,
+		.update_util = intel_pstate_update_util_pid,
 	},
 };
 
@@ -1531,7 +1534,7 @@ static const struct cpu_defaults silvermont_params = {
 		.get_val = atom_get_val,
 		.get_scaling = silvermont_get_scaling,
 		.get_vid = atom_get_vid,
-		.get_target_pstate = get_target_pstate_use_cpu_load,
+		.update_util = intel_pstate_update_util,
 	},
 };
 
@@ -1544,7 +1547,7 @@ static const struct cpu_defaults airmont_params = {
 		.get_val = atom_get_val,
 		.get_scaling = airmont_get_scaling,
 		.get_vid = atom_get_vid,
-		.get_target_pstate = get_target_pstate_use_cpu_load,
+		.update_util = intel_pstate_update_util,
 	},
 };
 
@@ -1556,7 +1559,7 @@ static const struct cpu_defaults knl_params = {
 		.get_turbo = knl_get_turbo_pstate,
 		.get_scaling = core_get_scaling,
 		.get_val = core_get_val,
-		.get_target_pstate = get_target_pstate_use_performance,
+		.update_util = intel_pstate_update_util_pid,
 	},
 };
 
@@ -1568,7 +1571,7 @@ static const struct cpu_defaults bxt_params = {
 		.get_turbo = core_get_turbo_pstate,
 		.get_scaling = core_get_scaling,
 		.get_val = core_get_val,
-		.get_target_pstate = get_target_pstate_use_cpu_load,
+		.update_util = intel_pstate_update_util,
 	},
 };
 
@@ -1704,6 +1707,9 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 	int32_t busy_frac, boost;
 	int target, avg_pstate;
 
+	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE)
+		return cpu->pstate.turbo_pstate;
+
 	busy_frac = div_fp(sample->mperf, sample->tsc);
 
 	boost = cpu->iowait_boost;
@@ -1740,6 +1746,9 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 	int32_t perf_scaled, max_pstate, current_pstate, sample_ratio;
 	u64 duration_ns;
 
+	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE)
+		return cpu->pstate.turbo_pstate;
+
 	/*
 	 * perf_scaled is the ratio of the average P-state during the last
 	 * sampling period to the P-state requested last time (in percent).
@@ -1790,16 +1799,11 @@ static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
 	wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
 }
 
-static void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
+static void intel_pstate_adjust_pstate(struct cpudata *cpu, int target_pstate)
 {
-	int from, target_pstate;
+	int from = cpu->pstate.current_pstate;
 	struct sample *sample;
 
-	from = cpu->pstate.current_pstate;
-
-	target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ?
-		cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu);
-
 	update_turbo_state();
 
 	target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
@@ -1837,8 +1841,12 @@ static void intel_pstate_update_util_pid(struct update_util_data *data,
 	if ((s64)delta_ns < pid_params.sample_rate_ns)
 		return;
 
-	if (intel_pstate_sample(cpu, time))
-		intel_pstate_adjust_busy_pstate(cpu);
+	if (intel_pstate_sample(cpu, time)) {
+		int target_pstate;
+
+		target_pstate = get_target_pstate_use_performance(cpu);
+		intel_pstate_adjust_pstate(cpu, target_pstate);
+	}
 }
 
 static void intel_pstate_update_util(struct update_util_data *data, u64 time,
@@ -1860,13 +1868,13 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time,
 	if ((s64)delta_ns < INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL)
 		return;
 
-	if (intel_pstate_sample(cpu, time))
-		intel_pstate_adjust_busy_pstate(cpu);
-}
+	if (intel_pstate_sample(cpu, time)) {
+		int target_pstate;
 
-/* Utilization update callback to register in the active mode. */
-static void (*update_util_cb)(struct update_util_data *data, u64 time,
-			      unsigned int flags) = intel_pstate_update_util;
+		target_pstate = get_target_pstate_use_cpu_load(cpu);
+		intel_pstate_adjust_pstate(cpu, target_pstate);
+	}
+}
 
 #define ICPU(model, policy) \
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
@@ -1938,7 +1946,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 			intel_pstate_disable_ee(cpunum);
 
 		intel_pstate_hwp_enable(cpu);
-	} else if (pstate_funcs.get_target_pstate == get_target_pstate_use_performance) {
+	} else if (pstate_funcs.update_util == intel_pstate_update_util_pid) {
 		intel_pstate_pid_reset(cpu);
 	}
 
@@ -1965,7 +1973,8 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 
 	/* Prevent intel_pstate_update_util() from using stale data. */
 	cpu->sample.time = 0;
-	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, update_util_cb);
+	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
+				     pstate_funcs.update_util);
 	cpu->update_util_set = true;
 }
 
@@ -2318,7 +2327,7 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver)
 	global.min_perf_pct = min_perf_pct_min();
 
 	if (intel_pstate_driver == &intel_pstate && !hwp_active &&
-	    pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
+	    pstate_funcs.update_util == intel_pstate_update_util_pid)
 		intel_pstate_debug_expose_params();
 
 	return 0;
@@ -2329,8 +2338,8 @@ static int intel_pstate_unregister_driver(void)
 	if (hwp_active)
 		return -EBUSY;
 
-	if (intel_pstate_driver == &intel_pstate && !hwp_active &&
-	    pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
+	if (intel_pstate_driver == &intel_pstate &&
+	    pstate_funcs.update_util == intel_pstate_update_util_pid)
 		intel_pstate_debug_hide_params();
 
 	cpufreq_unregister_driver(intel_pstate_driver);
@@ -2409,8 +2418,7 @@ static void intel_pstate_use_acpi_profile(void)
 	case PM_APPLIANCE_PC:
 	case PM_DESKTOP:
 	case PM_WORKSTATION:
-		pstate_funcs.get_target_pstate =
-				get_target_pstate_use_cpu_load;
+		pstate_funcs.update_util = intel_pstate_update_util;
 	}
 }
 #else
@@ -2428,12 +2436,9 @@ static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
 	pstate_funcs.get_scaling = funcs->get_scaling;
 	pstate_funcs.get_val   = funcs->get_val;
 	pstate_funcs.get_vid   = funcs->get_vid;
-	pstate_funcs.get_target_pstate = funcs->get_target_pstate;
+	pstate_funcs.update_util = funcs->update_util;
 
 	intel_pstate_use_acpi_profile();
-
-	if (pstate_funcs.get_target_pstate == get_target_pstate_use_performance)
-		update_util_cb = intel_pstate_update_util_pid;
 }
 
 #ifdef CONFIG_ACPI
@@ -2578,11 +2583,11 @@ static int __init intel_pstate_init(void)
 	if (x86_match_cpu(hwp_support_ids)) {
 		copy_cpu_funcs(&core_params.funcs);
 		if (no_hwp) {
-			update_util_cb = intel_pstate_update_util;
+			pstate_funcs.update_util = intel_pstate_update_util;
 		} else {
 			hwp_active++;
 			intel_pstate.attr = hwp_cpufreq_attrs;
-			update_util_cb = intel_pstate_update_util_hwp;
+			pstate_funcs.update_util = intel_pstate_update_util_hwp;
 			goto hwp_cpu_matched;
 		}
 	} else {

From de4a76cb585da13f8b0c9f2161ed6fcf5f2379fc Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:18:02 +0200
Subject: [PATCH 27/56] cpufreq: intel_pstate: Move cpu_defaults definitions

Move the definitions of the cpu_defaults structures after the
definitions of utilization update callback routines to avoid
extra declarations of the latter.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 129 ++++++++++++++++-----------------
 1 file changed, 62 insertions(+), 67 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 68ede1006b07..59312dc4c401 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1508,73 +1508,6 @@ static int knl_get_turbo_pstate(void)
 	return ret;
 }
 
-static void intel_pstate_update_util_pid(struct update_util_data *data,
-					 u64 time, unsigned int flags);
-static void intel_pstate_update_util(struct update_util_data *data, u64 time,
-				     unsigned int flags);
-
-static struct cpu_defaults core_params = {
-	.funcs = {
-		.get_max = core_get_max_pstate,
-		.get_max_physical = core_get_max_pstate_physical,
-		.get_min = core_get_min_pstate,
-		.get_turbo = core_get_turbo_pstate,
-		.get_scaling = core_get_scaling,
-		.get_val = core_get_val,
-		.update_util = intel_pstate_update_util_pid,
-	},
-};
-
-static const struct cpu_defaults silvermont_params = {
-	.funcs = {
-		.get_max = atom_get_max_pstate,
-		.get_max_physical = atom_get_max_pstate,
-		.get_min = atom_get_min_pstate,
-		.get_turbo = atom_get_turbo_pstate,
-		.get_val = atom_get_val,
-		.get_scaling = silvermont_get_scaling,
-		.get_vid = atom_get_vid,
-		.update_util = intel_pstate_update_util,
-	},
-};
-
-static const struct cpu_defaults airmont_params = {
-	.funcs = {
-		.get_max = atom_get_max_pstate,
-		.get_max_physical = atom_get_max_pstate,
-		.get_min = atom_get_min_pstate,
-		.get_turbo = atom_get_turbo_pstate,
-		.get_val = atom_get_val,
-		.get_scaling = airmont_get_scaling,
-		.get_vid = atom_get_vid,
-		.update_util = intel_pstate_update_util,
-	},
-};
-
-static const struct cpu_defaults knl_params = {
-	.funcs = {
-		.get_max = core_get_max_pstate,
-		.get_max_physical = core_get_max_pstate_physical,
-		.get_min = core_get_min_pstate,
-		.get_turbo = knl_get_turbo_pstate,
-		.get_scaling = core_get_scaling,
-		.get_val = core_get_val,
-		.update_util = intel_pstate_update_util_pid,
-	},
-};
-
-static const struct cpu_defaults bxt_params = {
-	.funcs = {
-		.get_max = core_get_max_pstate,
-		.get_max_physical = core_get_max_pstate_physical,
-		.get_min = core_get_min_pstate,
-		.get_turbo = core_get_turbo_pstate,
-		.get_scaling = core_get_scaling,
-		.get_val = core_get_val,
-		.update_util = intel_pstate_update_util,
-	},
-};
-
 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 {
 	int max_perf = cpu->pstate.turbo_pstate;
@@ -1876,6 +1809,68 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time,
 	}
 }
 
+static struct cpu_defaults core_params = {
+	.funcs = {
+		.get_max = core_get_max_pstate,
+		.get_max_physical = core_get_max_pstate_physical,
+		.get_min = core_get_min_pstate,
+		.get_turbo = core_get_turbo_pstate,
+		.get_scaling = core_get_scaling,
+		.get_val = core_get_val,
+		.update_util = intel_pstate_update_util_pid,
+	},
+};
+
+static const struct cpu_defaults silvermont_params = {
+	.funcs = {
+		.get_max = atom_get_max_pstate,
+		.get_max_physical = atom_get_max_pstate,
+		.get_min = atom_get_min_pstate,
+		.get_turbo = atom_get_turbo_pstate,
+		.get_val = atom_get_val,
+		.get_scaling = silvermont_get_scaling,
+		.get_vid = atom_get_vid,
+		.update_util = intel_pstate_update_util,
+	},
+};
+
+static const struct cpu_defaults airmont_params = {
+	.funcs = {
+		.get_max = atom_get_max_pstate,
+		.get_max_physical = atom_get_max_pstate,
+		.get_min = atom_get_min_pstate,
+		.get_turbo = atom_get_turbo_pstate,
+		.get_val = atom_get_val,
+		.get_scaling = airmont_get_scaling,
+		.get_vid = atom_get_vid,
+		.update_util = intel_pstate_update_util,
+	},
+};
+
+static const struct cpu_defaults knl_params = {
+	.funcs = {
+		.get_max = core_get_max_pstate,
+		.get_max_physical = core_get_max_pstate_physical,
+		.get_min = core_get_min_pstate,
+		.get_turbo = knl_get_turbo_pstate,
+		.get_scaling = core_get_scaling,
+		.get_val = core_get_val,
+		.update_util = intel_pstate_update_util_pid,
+	},
+};
+
+static const struct cpu_defaults bxt_params = {
+	.funcs = {
+		.get_max = core_get_max_pstate,
+		.get_max_physical = core_get_max_pstate_physical,
+		.get_min = core_get_min_pstate,
+		.get_turbo = core_get_turbo_pstate,
+		.get_scaling = core_get_scaling,
+		.get_val = core_get_val,
+		.update_util = intel_pstate_update_util,
+	},
+};
+
 #define ICPU(model, policy) \
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
 			(unsigned long)&policy }

From 2f49afc2a6f6ec4bacb2e50d5482ecc111b41ab5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:19:03 +0200
Subject: [PATCH 28/56] cpufreq: intel_pstate: Drop struct cpu_defaults

The cpu_defaults structure is redundant, because it only contains
one member of type struct pstate_funcs which can be used directly
instead of struct cpu_defaults.

For this reason, drop struct cpu_defaults, use struct pstate_funcs
directly instead of it where applicable and rename all of the
variables of that type accordingly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 154 ++++++++++++++-------------------
 1 file changed, 67 insertions(+), 87 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 59312dc4c401..f8496faa1085 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -328,14 +328,6 @@ struct pstate_funcs {
 			    unsigned int flags);
 };
 
-/**
- * struct cpu_defaults- Per CPU model default config data
- * @funcs:		Callback function data
- */
-struct cpu_defaults {
-	struct pstate_funcs funcs;
-};
-
 static struct pstate_funcs pstate_funcs __read_mostly;
 static struct pstate_adjust_policy pid_params __read_mostly = {
 	.sample_rate_ms = 10,
@@ -1809,66 +1801,56 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time,
 	}
 }
 
-static struct cpu_defaults core_params = {
-	.funcs = {
-		.get_max = core_get_max_pstate,
-		.get_max_physical = core_get_max_pstate_physical,
-		.get_min = core_get_min_pstate,
-		.get_turbo = core_get_turbo_pstate,
-		.get_scaling = core_get_scaling,
-		.get_val = core_get_val,
-		.update_util = intel_pstate_update_util_pid,
-	},
+static struct pstate_funcs core_funcs = {
+	.get_max = core_get_max_pstate,
+	.get_max_physical = core_get_max_pstate_physical,
+	.get_min = core_get_min_pstate,
+	.get_turbo = core_get_turbo_pstate,
+	.get_scaling = core_get_scaling,
+	.get_val = core_get_val,
+	.update_util = intel_pstate_update_util_pid,
 };
 
-static const struct cpu_defaults silvermont_params = {
-	.funcs = {
-		.get_max = atom_get_max_pstate,
-		.get_max_physical = atom_get_max_pstate,
-		.get_min = atom_get_min_pstate,
-		.get_turbo = atom_get_turbo_pstate,
-		.get_val = atom_get_val,
-		.get_scaling = silvermont_get_scaling,
-		.get_vid = atom_get_vid,
-		.update_util = intel_pstate_update_util,
-	},
+static const struct pstate_funcs silvermont_funcs = {
+	.get_max = atom_get_max_pstate,
+	.get_max_physical = atom_get_max_pstate,
+	.get_min = atom_get_min_pstate,
+	.get_turbo = atom_get_turbo_pstate,
+	.get_val = atom_get_val,
+	.get_scaling = silvermont_get_scaling,
+	.get_vid = atom_get_vid,
+	.update_util = intel_pstate_update_util,
 };
 
-static const struct cpu_defaults airmont_params = {
-	.funcs = {
-		.get_max = atom_get_max_pstate,
-		.get_max_physical = atom_get_max_pstate,
-		.get_min = atom_get_min_pstate,
-		.get_turbo = atom_get_turbo_pstate,
-		.get_val = atom_get_val,
-		.get_scaling = airmont_get_scaling,
-		.get_vid = atom_get_vid,
-		.update_util = intel_pstate_update_util,
-	},
+static const struct pstate_funcs airmont_funcs = {
+	.get_max = atom_get_max_pstate,
+	.get_max_physical = atom_get_max_pstate,
+	.get_min = atom_get_min_pstate,
+	.get_turbo = atom_get_turbo_pstate,
+	.get_val = atom_get_val,
+	.get_scaling = airmont_get_scaling,
+	.get_vid = atom_get_vid,
+	.update_util = intel_pstate_update_util,
 };
 
-static const struct cpu_defaults knl_params = {
-	.funcs = {
-		.get_max = core_get_max_pstate,
-		.get_max_physical = core_get_max_pstate_physical,
-		.get_min = core_get_min_pstate,
-		.get_turbo = knl_get_turbo_pstate,
-		.get_scaling = core_get_scaling,
-		.get_val = core_get_val,
-		.update_util = intel_pstate_update_util_pid,
-	},
+static const struct pstate_funcs knl_funcs = {
+	.get_max = core_get_max_pstate,
+	.get_max_physical = core_get_max_pstate_physical,
+	.get_min = core_get_min_pstate,
+	.get_turbo = knl_get_turbo_pstate,
+	.get_scaling = core_get_scaling,
+	.get_val = core_get_val,
+	.update_util = intel_pstate_update_util_pid,
 };
 
-static const struct cpu_defaults bxt_params = {
-	.funcs = {
-		.get_max = core_get_max_pstate,
-		.get_max_physical = core_get_max_pstate_physical,
-		.get_min = core_get_min_pstate,
-		.get_turbo = core_get_turbo_pstate,
-		.get_scaling = core_get_scaling,
-		.get_val = core_get_val,
-		.update_util = intel_pstate_update_util,
-	},
+static const struct pstate_funcs bxt_funcs = {
+	.get_max = core_get_max_pstate,
+	.get_max_physical = core_get_max_pstate_physical,
+	.get_min = core_get_min_pstate,
+	.get_turbo = core_get_turbo_pstate,
+	.get_scaling = core_get_scaling,
+	.get_val = core_get_val,
+	.update_util = intel_pstate_update_util,
 };
 
 #define ICPU(model, policy) \
@@ -1876,38 +1858,38 @@ static const struct cpu_defaults bxt_params = {
 			(unsigned long)&policy }
 
 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
-	ICPU(INTEL_FAM6_SANDYBRIDGE, 		core_params),
-	ICPU(INTEL_FAM6_SANDYBRIDGE_X,		core_params),
-	ICPU(INTEL_FAM6_ATOM_SILVERMONT1,	silvermont_params),
-	ICPU(INTEL_FAM6_IVYBRIDGE,		core_params),
-	ICPU(INTEL_FAM6_HASWELL_CORE,		core_params),
-	ICPU(INTEL_FAM6_BROADWELL_CORE,		core_params),
-	ICPU(INTEL_FAM6_IVYBRIDGE_X,		core_params),
-	ICPU(INTEL_FAM6_HASWELL_X,		core_params),
-	ICPU(INTEL_FAM6_HASWELL_ULT,		core_params),
-	ICPU(INTEL_FAM6_HASWELL_GT3E,		core_params),
-	ICPU(INTEL_FAM6_BROADWELL_GT3E,		core_params),
-	ICPU(INTEL_FAM6_ATOM_AIRMONT,		airmont_params),
-	ICPU(INTEL_FAM6_SKYLAKE_MOBILE,		core_params),
-	ICPU(INTEL_FAM6_BROADWELL_X,		core_params),
-	ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,	core_params),
-	ICPU(INTEL_FAM6_BROADWELL_XEON_D,	core_params),
-	ICPU(INTEL_FAM6_XEON_PHI_KNL,		knl_params),
-	ICPU(INTEL_FAM6_XEON_PHI_KNM,		knl_params),
-	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		bxt_params),
+	ICPU(INTEL_FAM6_SANDYBRIDGE, 		core_funcs),
+	ICPU(INTEL_FAM6_SANDYBRIDGE_X,		core_funcs),
+	ICPU(INTEL_FAM6_ATOM_SILVERMONT1,	silvermont_funcs),
+	ICPU(INTEL_FAM6_IVYBRIDGE,		core_funcs),
+	ICPU(INTEL_FAM6_HASWELL_CORE,		core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_CORE,		core_funcs),
+	ICPU(INTEL_FAM6_IVYBRIDGE_X,		core_funcs),
+	ICPU(INTEL_FAM6_HASWELL_X,		core_funcs),
+	ICPU(INTEL_FAM6_HASWELL_ULT,		core_funcs),
+	ICPU(INTEL_FAM6_HASWELL_GT3E,		core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_GT3E,		core_funcs),
+	ICPU(INTEL_FAM6_ATOM_AIRMONT,		airmont_funcs),
+	ICPU(INTEL_FAM6_SKYLAKE_MOBILE,		core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_X,		core_funcs),
+	ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,	core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_XEON_D,	core_funcs),
+	ICPU(INTEL_FAM6_XEON_PHI_KNL,		knl_funcs),
+	ICPU(INTEL_FAM6_XEON_PHI_KNM,		knl_funcs),
+	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		bxt_funcs),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
 
 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
-	ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params),
-	ICPU(INTEL_FAM6_BROADWELL_X, core_params),
-	ICPU(INTEL_FAM6_SKYLAKE_X, core_params),
+	ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_X, core_funcs),
+	ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
 	{}
 };
 
 static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
-	ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_params),
+	ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_funcs),
 	{}
 };
 
@@ -2576,7 +2558,7 @@ static int __init intel_pstate_init(void)
 		return -ENODEV;
 
 	if (x86_match_cpu(hwp_support_ids)) {
-		copy_cpu_funcs(&core_params.funcs);
+		copy_cpu_funcs(&core_funcs);
 		if (no_hwp) {
 			pstate_funcs.update_util = intel_pstate_update_util;
 		} else {
@@ -2587,14 +2569,12 @@ static int __init intel_pstate_init(void)
 		}
 	} else {
 		const struct x86_cpu_id *id;
-		struct cpu_defaults *cpu_def;
 
 		id = x86_match_cpu(intel_pstate_cpu_ids);
 		if (!id)
 			return -ENODEV;
 
-		cpu_def = (struct cpu_defaults *)id->driver_data;
-		copy_cpu_funcs(&cpu_def->funcs);
+		copy_cpu_funcs((struct pstate_funcs *)id->driver_data);
 	}
 
 	if (intel_pstate_msrs_not_valid())

From 8ca6ce37014e5a9b127fc076448eb95e2b366d05 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:20:13 +0200
Subject: [PATCH 29/56] cpufreq: intel_pstate: Introduce pid_in_use()

Add a new function pid_in_use() to return the information on whether
or not the PID-based P-state selection algorithm is in use.

That allows a couple of complicated conditions in the code to be
reduced to simple checks against the new function's return value.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index f8496faa1085..6384557cea69 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1893,6 +1893,8 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
 	{}
 };
 
+static bool pid_in_use(void);
+
 static int intel_pstate_init_cpu(unsigned int cpunum)
 {
 	struct cpudata *cpu;
@@ -1923,7 +1925,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 			intel_pstate_disable_ee(cpunum);
 
 		intel_pstate_hwp_enable(cpu);
-	} else if (pstate_funcs.update_util == intel_pstate_update_util_pid) {
+	} else if (pid_in_use()) {
 		intel_pstate_pid_reset(cpu);
 	}
 
@@ -2269,6 +2271,12 @@ static struct cpufreq_driver intel_cpufreq = {
 
 static struct cpufreq_driver *default_driver = &intel_pstate;
 
+static bool pid_in_use(void)
+{
+	return intel_pstate_driver == &intel_pstate &&
+		pstate_funcs.update_util == intel_pstate_update_util_pid;
+}
+
 static void intel_pstate_driver_cleanup(void)
 {
 	unsigned int cpu;
@@ -2303,8 +2311,7 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver)
 
 	global.min_perf_pct = min_perf_pct_min();
 
-	if (intel_pstate_driver == &intel_pstate && !hwp_active &&
-	    pstate_funcs.update_util == intel_pstate_update_util_pid)
+	if (pid_in_use())
 		intel_pstate_debug_expose_params();
 
 	return 0;
@@ -2315,8 +2322,7 @@ static int intel_pstate_unregister_driver(void)
 	if (hwp_active)
 		return -EBUSY;
 
-	if (intel_pstate_driver == &intel_pstate &&
-	    pstate_funcs.update_util == intel_pstate_update_util_pid)
+	if (pid_in_use())
 		intel_pstate_debug_hide_params();
 
 	cpufreq_unregister_driver(intel_pstate_driver);

From 2bfc4cbb5fd3848669f1b95fea793f63d8e77fa0 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:22:16 +0200
Subject: [PATCH 30/56] cpufreq: intel_pstate: Do not walk policy->cpus

intel_pstate_hwp_set() is the only function walking policy->cpus
in intel_pstate.  The rest of the code simply assumes one CPU per
policy, including the initialization code.

Therefore it doesn't make sense for intel_pstate_hwp_set() to
walk policy->cpus as it is guaranteed to have only one bit set
for policy->cpu.

For this reason, rearrange intel_pstate_hwp_set() to take the CPU
number as the argument and drop the loop over policy->cpus from it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 128 ++++++++++++++++-----------------
 1 file changed, 62 insertions(+), 66 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 6384557cea69..5236701958d0 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -792,84 +792,80 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
 	NULL,
 };
 
-static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
+static void intel_pstate_hwp_set(unsigned int cpu)
 {
-	int min, hw_min, max, hw_max, cpu;
+	struct cpudata *cpu_data = all_cpu_data[cpu];
+	int min, hw_min, max, hw_max;
 	u64 value, cap;
+	s16 epp;
 
-	for_each_cpu(cpu, policy->cpus) {
-		struct cpudata *cpu_data = all_cpu_data[cpu];
-		s16 epp;
+	rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
+	hw_min = HWP_LOWEST_PERF(cap);
+	if (global.no_turbo)
+		hw_max = HWP_GUARANTEED_PERF(cap);
+	else
+		hw_max = HWP_HIGHEST_PERF(cap);
 
-		rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
-		hw_min = HWP_LOWEST_PERF(cap);
-		if (global.no_turbo)
-			hw_max = HWP_GUARANTEED_PERF(cap);
-		else
-			hw_max = HWP_HIGHEST_PERF(cap);
+	max = fp_ext_toint(hw_max * cpu_data->max_perf);
+	if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
+		min = max;
+	else
+		min = fp_ext_toint(hw_max * cpu_data->min_perf);
 
-		max = fp_ext_toint(hw_max * cpu_data->max_perf);
-		if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
-			min = max;
-		else
-			min = fp_ext_toint(hw_max * cpu_data->min_perf);
+	rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 
-		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
+	value &= ~HWP_MIN_PERF(~0L);
+	value |= HWP_MIN_PERF(min);
 
-		value &= ~HWP_MIN_PERF(~0L);
-		value |= HWP_MIN_PERF(min);
+	value &= ~HWP_MAX_PERF(~0L);
+	value |= HWP_MAX_PERF(max);
 
-		value &= ~HWP_MAX_PERF(~0L);
-		value |= HWP_MAX_PERF(max);
+	if (cpu_data->epp_policy == cpu_data->policy)
+		goto skip_epp;
 
-		if (cpu_data->epp_policy == cpu_data->policy)
+	cpu_data->epp_policy = cpu_data->policy;
+
+	if (cpu_data->epp_saved >= 0) {
+		epp = cpu_data->epp_saved;
+		cpu_data->epp_saved = -EINVAL;
+		goto update_epp;
+	}
+
+	if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
+		epp = intel_pstate_get_epp(cpu_data, value);
+		cpu_data->epp_powersave = epp;
+		/* If EPP read was failed, then don't try to write */
+		if (epp < 0)
 			goto skip_epp;
 
-		cpu_data->epp_policy = cpu_data->policy;
+		epp = 0;
+	} else {
+		/* skip setting EPP, when saved value is invalid */
+		if (cpu_data->epp_powersave < 0)
+			goto skip_epp;
 
-		if (cpu_data->epp_saved >= 0) {
-			epp = cpu_data->epp_saved;
-			cpu_data->epp_saved = -EINVAL;
-			goto update_epp;
-		}
+		/*
+		 * No need to restore EPP when it is not zero. This
+		 * means:
+		 *  - Policy is not changed
+		 *  - user has manually changed
+		 *  - Error reading EPB
+		 */
+		epp = intel_pstate_get_epp(cpu_data, value);
+		if (epp)
+			goto skip_epp;
 
-		if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
-			epp = intel_pstate_get_epp(cpu_data, value);
-			cpu_data->epp_powersave = epp;
-			/* If EPP read was failed, then don't try to write */
-			if (epp < 0)
-				goto skip_epp;
-
-
-			epp = 0;
-		} else {
-			/* skip setting EPP, when saved value is invalid */
-			if (cpu_data->epp_powersave < 0)
-				goto skip_epp;
-
-			/*
-			 * No need to restore EPP when it is not zero. This
-			 * means:
-			 *  - Policy is not changed
-			 *  - user has manually changed
-			 *  - Error reading EPB
-			 */
-			epp = intel_pstate_get_epp(cpu_data, value);
-			if (epp)
-				goto skip_epp;
-
-			epp = cpu_data->epp_powersave;
-		}
-update_epp:
-		if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
-			value &= ~GENMASK_ULL(31, 24);
-			value |= (u64)epp << 24;
-		} else {
-			intel_pstate_set_epb(cpu, epp);
-		}
-skip_epp:
-		wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
+		epp = cpu_data->epp_powersave;
 	}
+update_epp:
+	if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+		value &= ~GENMASK_ULL(31, 24);
+		value |= (u64)epp << 24;
+	} else {
+		intel_pstate_set_epb(cpu, epp);
+	}
+skip_epp:
+	wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
 }
 
 static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
@@ -892,7 +888,7 @@ static int intel_pstate_resume(struct cpufreq_policy *policy)
 	mutex_lock(&intel_pstate_limits_lock);
 
 	all_cpu_data[policy->cpu]->epp_policy = 0;
-	intel_pstate_hwp_set(policy);
+	intel_pstate_hwp_set(policy->cpu);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
@@ -2057,7 +2053,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	intel_pstate_set_update_util_hook(policy->cpu);
 
 	if (hwp_active)
-		intel_pstate_hwp_set(policy);
+		intel_pstate_hwp_set(policy->cpu);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 

From b02aabe8ab9757a7dd5aa50e201a6d970f7e7a2f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2017 00:24:26 +0200
Subject: [PATCH 31/56] cpufreq: intel_pstate: Eliminate
 intel_pstate_get_min_max()

Some computations in intel_pstate_get_min_max() are not necessary
and one of its two callers doesn't even use the full result.

First off, the fixed-point value of cpu->max_perf represents a
non-negative number between 0 and 1 inclusive and cpu->min_perf
cannot be greater than cpu->max_perf.  It is not necessary to check
those conditions every time the numbers in question are used.

Moreover, since intel_pstate_max_within_limits() only needs the
upper boundary, it doesn't make sense to compute the lower one in
there and returning min and max from intel_pstate_get_min_max()
via pointers doesn't look particularly nice.

For the above reasons, drop intel_pstate_get_min_max(), add a helper
to get the base P-state for min/max computations and carry out them
directly in the previous callers of intel_pstate_get_min_max().

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 40 ++++++++++++----------------------
 1 file changed, 14 insertions(+), 26 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 5236701958d0..b62daf5a4ee8 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1496,26 +1496,10 @@ static int knl_get_turbo_pstate(void)
 	return ret;
 }
 
-static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
+static int intel_pstate_get_base_pstate(struct cpudata *cpu)
 {
-	int max_perf = cpu->pstate.turbo_pstate;
-	int max_perf_adj;
-	int min_perf;
-
-	if (global.no_turbo || global.turbo_disabled)
-		max_perf = cpu->pstate.max_pstate;
-
-	/*
-	 * performance can be limited by user through sysfs, by cpufreq
-	 * policy, or by cpu specific default values determined through
-	 * experimentation.
-	 */
-	max_perf_adj = fp_ext_toint(max_perf * cpu->max_perf);
-	*max = clamp_t(int, max_perf_adj,
-			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
-
-	min_perf = fp_ext_toint(max_perf * cpu->min_perf);
-	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
+	return global.no_turbo || global.turbo_disabled ?
+			cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
 }
 
 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
@@ -1538,11 +1522,13 @@ static void intel_pstate_set_min_pstate(struct cpudata *cpu)
 
 static void intel_pstate_max_within_limits(struct cpudata *cpu)
 {
-	int min_pstate, max_pstate;
+	int pstate;
 
 	update_turbo_state();
-	intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate);
-	intel_pstate_set_pstate(cpu, max_pstate);
+	pstate = intel_pstate_get_base_pstate(cpu);
+	pstate = max(cpu->pstate.min_pstate,
+		     fp_ext_toint(pstate * cpu->max_perf));
+	intel_pstate_set_pstate(cpu, pstate);
 }
 
 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
@@ -1704,11 +1690,13 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 
 static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
 {
-	int max_perf, min_perf;
+	int max_pstate = intel_pstate_get_base_pstate(cpu);
+	int min_pstate;
 
-	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
-	pstate = clamp_t(int, pstate, min_perf, max_perf);
-	return pstate;
+	min_pstate = max(cpu->pstate.min_pstate,
+			 fp_ext_toint(max_pstate * cpu->min_perf));
+	max_pstate = max(min_pstate, fp_ext_toint(max_pstate * cpu->max_perf));
+	return clamp_t(int, pstate, min_pstate, max_pstate);
 }
 
 static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)

From 8ce95844c85349243520b6943ec1225a047d7d6c Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 17 Mar 2017 11:19:21 +0530
Subject: [PATCH 32/56] PM / Domain: remove conditional from error case

There is no point running the conditional 'if' statement if the genpd
isn't present.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e697dec9d25b..1a0549f1944a 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1700,12 +1700,12 @@ int of_genpd_add_provider_simple(struct device_node *np,
 
 	mutex_lock(&gpd_list_lock);
 
-	if (pm_genpd_present(genpd))
+	if (pm_genpd_present(genpd)) {
 		ret = genpd_add_provider(np, genpd_xlate_simple, genpd);
-
-	if (!ret) {
-		genpd->provider = &np->fwnode;
-		genpd->has_provider = true;
+		if (!ret) {
+			genpd->provider = &np->fwnode;
+			genpd->has_provider = true;
+		}
 	}
 
 	mutex_unlock(&gpd_list_lock);

From 41e2c8e0060db250cf70bc2a41ea6595a90b360c Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 20 Mar 2017 11:19:20 +0100
Subject: [PATCH 33/56] PM / Domains: Clean up code validating genpd's status

There exists several similar validations of the genpd->status, against
GPD_STATE_ACTIVE and GPD_STATE_POWER_OFF. Let's clean up this code by
converting to use a helper macro, genpd_status_on().

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 1a0549f1944a..792fbab3dfc4 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -121,6 +121,7 @@ static const struct genpd_lock_ops genpd_spin_ops = {
 #define genpd_lock_interruptible(p)	p->lock_ops->lock_interruptible(p)
 #define genpd_unlock(p)			p->lock_ops->unlock(p)
 
+#define genpd_status_on(genpd)		(genpd->status == GPD_STATE_ACTIVE)
 #define genpd_is_irq_safe(genpd)	(genpd->flags & GENPD_FLAG_IRQ_SAFE)
 
 static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
@@ -296,8 +297,7 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
 	 * (1) The domain is already in the "power off" state.
 	 * (2) System suspend is in progress.
 	 */
-	if (genpd->status == GPD_STATE_POWER_OFF
-	    || genpd->prepared_count > 0)
+	if (!genpd_status_on(genpd) || genpd->prepared_count > 0)
 		return 0;
 
 	if (atomic_read(&genpd->sd_count) > 0)
@@ -373,7 +373,7 @@ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth)
 	struct gpd_link *link;
 	int ret = 0;
 
-	if (genpd->status == GPD_STATE_ACTIVE)
+	if (genpd_status_on(genpd))
 		return 0;
 
 	/*
@@ -752,7 +752,7 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
 {
 	struct gpd_link *link;
 
-	if (genpd->status == GPD_STATE_POWER_OFF)
+	if (!genpd_status_on(genpd))
 		return;
 
 	if (genpd->suspended_count != genpd->device_count
@@ -793,7 +793,7 @@ static void genpd_sync_power_on(struct generic_pm_domain *genpd, bool use_lock,
 {
 	struct gpd_link *link;
 
-	if (genpd->status == GPD_STATE_ACTIVE)
+	if (genpd_status_on(genpd))
 		return;
 
 	list_for_each_entry(link, &genpd->slave_links, slave_node) {
@@ -1329,8 +1329,7 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd,
 	genpd_lock(subdomain);
 	genpd_lock_nested(genpd, SINGLE_DEPTH_NESTING);
 
-	if (genpd->status == GPD_STATE_POWER_OFF
-	    &&  subdomain->status != GPD_STATE_POWER_OFF) {
+	if (!genpd_status_on(genpd) && genpd_status_on(subdomain)) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -1346,7 +1345,7 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd,
 	list_add_tail(&link->master_node, &genpd->master_links);
 	link->slave = subdomain;
 	list_add_tail(&link->slave_node, &subdomain->slave_links);
-	if (subdomain->status != GPD_STATE_POWER_OFF)
+	if (genpd_status_on(subdomain))
 		genpd_sd_counter_inc(genpd);
 
  out:
@@ -1406,7 +1405,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 		list_del(&link->master_node);
 		list_del(&link->slave_node);
 		kfree(link);
-		if (subdomain->status != GPD_STATE_POWER_OFF)
+		if (genpd_status_on(subdomain))
 			genpd_sd_counter_dec(genpd);
 
 		ret = 0;
@@ -2221,7 +2220,7 @@ static int pm_genpd_summary_one(struct seq_file *s,
 
 	if (WARN_ON(genpd->status >= ARRAY_SIZE(status_lookup)))
 		goto exit;
-	if (genpd->status == GPD_STATE_POWER_OFF)
+	if (!genpd_status_on(genpd))
 		snprintf(state, sizeof(state), "%s-%u",
 			 status_lookup[genpd->status], genpd->state_idx);
 	else

From ffaa42e8a40b7f1041e36b022cd28b7c45e2b564 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 20 Mar 2017 11:19:21 +0100
Subject: [PATCH 34/56] PM / Domains: Enable users of genpd to specify always
 on PM domains

The current way to implement an always on PM domain consists of returning
-EBUSY from the ->power_off() callback. This is a bit different compared to
using the always on genpd governor, which prevents the PM domain from being
powered off via runtime suspend, but not via system suspend.

The approach to return -EBUSY from the ->power_off() callback to support
always on PM domains in genpd is suboptimal. That is because it requires
genpd to follow the regular execution path of the power off sequence, which
ends by invoking the ->power_off() callback.

To enable genpd to early abort the power off sequence for always on PM
domains, it needs static information about these configurations. Therefore
let's add a new genpd configuration flag, GENPD_FLAG_ALWAYS_ON.

Users of the new GENPD_FLAG_ALWAYS_ON flag, are by genpd required to make
sure the PM domain is powered on before calling pm_genpd_init(). Moreover,
users don't need to implement the ->power_off() callback, as genpd doesn't
ever invoke it.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 14 ++++++++++++--
 include/linux/pm_domain.h   |  1 +
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 792fbab3dfc4..c71a7ef08b05 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -123,6 +123,7 @@ static const struct genpd_lock_ops genpd_spin_ops = {
 
 #define genpd_status_on(genpd)		(genpd->status == GPD_STATE_ACTIVE)
 #define genpd_is_irq_safe(genpd)	(genpd->flags & GENPD_FLAG_IRQ_SAFE)
+#define genpd_is_always_on(genpd)	(genpd->flags & GENPD_FLAG_ALWAYS_ON)
 
 static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
 		struct generic_pm_domain *genpd)
@@ -300,7 +301,12 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
 	if (!genpd_status_on(genpd) || genpd->prepared_count > 0)
 		return 0;
 
-	if (atomic_read(&genpd->sd_count) > 0)
+	/*
+	 * Abort power off for the PM domain in the following situations:
+	 * (1) The domain is configured as always on.
+	 * (2) When the domain has a subdomain being powered on.
+	 */
+	if (genpd_is_always_on(genpd) || atomic_read(&genpd->sd_count) > 0)
 		return -EBUSY;
 
 	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
@@ -752,7 +758,7 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
 {
 	struct gpd_link *link;
 
-	if (!genpd_status_on(genpd))
+	if (!genpd_status_on(genpd) || genpd_is_always_on(genpd))
 		return;
 
 	if (genpd->suspended_count != genpd->device_count
@@ -1491,6 +1497,10 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
 		genpd->dev_ops.start = pm_clk_resume;
 	}
 
+	/* Always-on domains must be powered on at initialization. */
+	if (genpd_is_always_on(genpd) && !genpd_status_on(genpd))
+		return -EINVAL;
+
 	/* Use only one "off" state if there were no states declared */
 	if (genpd->state_count == 0) {
 		ret = genpd_set_default_power_state(genpd);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 5339ed5bd6f9..9b6abe632587 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -20,6 +20,7 @@
 /* Defines used for the flags field in the struct generic_pm_domain */
 #define GENPD_FLAG_PM_CLK	(1U << 0) /* PM domain uses PM clk */
 #define GENPD_FLAG_IRQ_SAFE	(1U << 1) /* PM domain operates in atomic */
+#define GENPD_FLAG_ALWAYS_ON	(1U << 2) /* PM domain is always powered on */
 
 enum gpd_status {
 	GPD_STATE_ACTIVE = 0,	/* PM domain is active */

From 1c14967c6ea0deb3db4a974b1de519f5a5593ef4 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 20 Mar 2017 11:19:22 +0100
Subject: [PATCH 35/56] PM / Domains: Respect errors from genpd's ->power_off()
 callback

The current code in genpd_sync_power_off(), doesn't care about potential
errors being returned from genpd's ->power_off() callback.

Obviously this behaviour could lead to problems, such as incorrectly
setting the genpd's status to GPD_STATE_POWER_OFF, but also to incorrectly
decrease the subdomain count for the masters, which potentially allows them
to be powered off in the next recursive call to genpd_sync_power_off().

Let's fix this behaviour by bailing out when the ->power_off() callback
returns an error code.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index c71a7ef08b05..c0318c130396 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -767,7 +767,8 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
 
 	/* Choose the deepest state when suspending */
 	genpd->state_idx = genpd->state_count - 1;
-	_genpd_power_off(genpd, false);
+	if (_genpd_power_off(genpd, false))
+		return;
 
 	genpd->status = GPD_STATE_POWER_OFF;
 

From 075c37d59ecd4a8b7c9cb5570e90d5b538797ad2 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 20 Mar 2017 11:19:23 +0100
Subject: [PATCH 36/56] PM / Domains: Don't warn about IRQ safe device for an
 always on PM domain

When an IRQ safe device is attached to a no sleep domain, genpd prints a
warning once, as to indicate it is a suboptimal configuration from power
consumption point of view.

However the warning doesn't make sense for an always on domain, since it
anyway remains powered on. Therefore, let's change to not print the warning
for this configuration.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index c0318c130396..06807933a285 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -132,8 +132,12 @@ static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
 
 	ret = pm_runtime_is_irq_safe(dev) && !genpd_is_irq_safe(genpd);
 
-	/* Warn once if IRQ safe dev in no sleep domain */
-	if (ret)
+	/*
+	 * Warn once if an IRQ safe device is attached to a no sleep domain, as
+	 * to indicate a suboptimal configuration for PM. For an always on
+	 * domain this isn't case, thus don't warn.
+	 */
+	if (ret && !genpd_is_always_on(genpd))
 		dev_warn_once(dev, "PM domain %s will not be powered off\n",
 				genpd->name);
 

From ecad4502d090a8630f50a88cbe072b92f3a3229e Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Wed, 15 Mar 2017 13:45:53 +0530
Subject: [PATCH 37/56] powernv-cpuidle: Validate DT property array size

The various properties associated with powernv idle states such as
names, flags, residency-ns, latencies-ns, psscr, psscr-mask are
exposed in the device-tree as property arrays such the pointwise
entries in each of these arrays correspond to the properties of the
same idle state.

This patch validates that the lengths of the property arrays are the
same. If there is a mismatch, the patch will ensure that we bail out
and not expose the platform idle states via cpuidle.

Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Reviewed-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle-powernv.c | 64 +++++++++++++++++++++++++++++--
 1 file changed, 61 insertions(+), 3 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 370593006f5f..a06df51a36c0 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -197,11 +197,25 @@ static inline void add_powernv_state(int index, const char *name,
 	stop_psscr_table[index].mask = psscr_mask;
 }
 
+/*
+ * Returns 0 if prop1_len == prop2_len. Else returns -1
+ */
+static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
+					 const char *prop2, int prop2_len)
+{
+	if (prop1_len == prop2_len)
+		return 0;
+
+	pr_warn("cpuidle-powernv: array sizes don't match for %s and %s\n",
+		prop1, prop2);
+	return -1;
+}
+
 static int powernv_add_idle_states(void)
 {
 	struct device_node *power_mgt;
 	int nr_idle_states = 1; /* Snooze */
-	int dt_idle_states;
+	int dt_idle_states, count;
 	u32 latency_ns[CPUIDLE_STATE_MAX];
 	u32 residency_ns[CPUIDLE_STATE_MAX];
 	u32 flags[CPUIDLE_STATE_MAX];
@@ -226,6 +240,21 @@ static int powernv_add_idle_states(void)
 		goto out;
 	}
 
+	count = of_property_count_u32_elems(power_mgt,
+					    "ibm,cpu-idle-state-latencies-ns");
+
+	if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states,
+				   "ibm,cpu-idle-state-latencies-ns",
+				   count) != 0)
+		goto out;
+
+	count = of_property_count_strings(power_mgt,
+					  "ibm,cpu-idle-state-names");
+	if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states,
+				   "ibm,cpu-idle-state-names",
+				   count) != 0)
+		goto out;
+
 	/*
 	 * Since snooze is used as first idle state, max idle states allowed is
 	 * CPUIDLE_STATE_MAX -1
@@ -260,6 +289,22 @@ static int powernv_add_idle_states(void)
 	has_stop_states = (flags[0] &
 			   (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP));
 	if (has_stop_states) {
+		count = of_property_count_u64_elems(power_mgt,
+						    "ibm,cpu-idle-state-psscr");
+		if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
+					   dt_idle_states,
+					   "ibm,cpu-idle-state-psscr",
+					   count) != 0)
+			goto out;
+
+		count = of_property_count_u64_elems(power_mgt,
+						    "ibm,cpu-idle-state-psscr-mask");
+		if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
+					   dt_idle_states,
+					   "ibm,cpu-idle-state-psscr-mask",
+					   count) != 0)
+			goto out;
+
 		if (of_property_read_u64_array(power_mgt,
 		    "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) {
 			pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
@@ -274,8 +319,21 @@ static int powernv_add_idle_states(void)
 		}
 	}
 
-	rc = of_property_read_u32_array(power_mgt,
-		"ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states);
+	count = of_property_count_u32_elems(power_mgt,
+					    "ibm,cpu-idle-state-residency-ns");
+
+	if (count < 0) {
+		rc = count;
+	} else if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
+					  dt_idle_states,
+					  "ibm,cpu-idle-state-residency-ns",
+					  count) != 0) {
+		goto out;
+	} else {
+		rc = of_property_read_u32_array(power_mgt,
+						"ibm,cpu-idle-state-residency-ns",
+						residency_ns, dt_idle_states);
+	}
 
 	for (i = 0; i < dt_idle_states; i++) {
 		unsigned int exit_latency, target_residency;

From 630e57573efa20b586c808400005d0ebfb93fc6a Mon Sep 17 00:00:00 2001
From: "Box, David E" <david.e.box@intel.com>
Date: Wed, 29 Mar 2017 09:45:57 -0700
Subject: [PATCH 38/56] cpufreq: intel_pstate: Add support for Gemini Lake

Use same parameters as INTEL_FAM6_ATOM_GOLDMONT to enable
Gemini Lake.

Signed-off-by: Box, David E <david.e.box@intel.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b62daf5a4ee8..c31b72b16c2b 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1861,6 +1861,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
 	ICPU(INTEL_FAM6_XEON_PHI_KNL,		knl_funcs),
 	ICPU(INTEL_FAM6_XEON_PHI_KNM,		knl_funcs),
 	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		bxt_funcs),
+	ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE,       bxt_funcs),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);

From b539cc82d493d100606213df459c86e94f342996 Mon Sep 17 00:00:00 2001
From: Lina Iyer <lina.iyer@linaro.org>
Date: Fri, 3 Mar 2017 12:41:27 -0800
Subject: [PATCH 39/56] PM / Domains: Ignore domain-idle-states that are not
 compatible

domain-idle-states property may have phandles to idle state bindings
that may not be compatible with idle state definition defined in [1].
Such phandles would just be ignored and not throw and error when read by
the domain core.

Signed-off-by: Lina Iyer <lina.iyer@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../devicetree/bindings/power/power_domain.txt   |  4 +++-
 drivers/base/power/domain.c                      | 16 +++++++++-------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt
index 723e1ad937da..940707d095cc 100644
--- a/Documentation/devicetree/bindings/power/power_domain.txt
+++ b/Documentation/devicetree/bindings/power/power_domain.txt
@@ -31,7 +31,9 @@ Optional properties:
 
 - domain-idle-states : A phandle of an idle-state that shall be soaked into a
                 generic domain power state. The idle state definitions are
-                compatible with domain-idle-state specified in [1].
+                compatible with domain-idle-state specified in [1]. phandles
+                that are not compatible with domain-idle-state will be
+                ignored.
   The domain-idle-state property reflects the idle state of this PM domain and
   not the idle states of the devices or sub-domains in the PM domain. Devices
   and sub-domains have their own idle-states independent of the parent
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 06807933a285..ad196427b4f2 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2093,11 +2093,6 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state,
 	int err;
 	u32 residency;
 	u32 entry_latency, exit_latency;
-	const struct of_device_id *match_id;
-
-	match_id = of_match_node(idle_state_match, state_node);
-	if (!match_id)
-		return -EINVAL;
 
 	err = of_property_read_u32(state_node, "entry-latency-us",
 						&entry_latency);
@@ -2146,6 +2141,7 @@ int of_genpd_parse_idle_states(struct device_node *dn,
 	int err, ret;
 	int count;
 	struct of_phandle_iterator it;
+	const struct of_device_id *match_id;
 
 	count = of_count_phandle_with_args(dn, "domain-idle-states", NULL);
 	if (count <= 0)
@@ -2158,6 +2154,9 @@ int of_genpd_parse_idle_states(struct device_node *dn,
 	/* Loop over the phandles until all the requested entry is found */
 	of_for_each_phandle(&it, err, dn, "domain-idle-states", NULL, 0) {
 		np = it.node;
+		match_id = of_match_node(idle_state_match, np);
+		if (!match_id)
+			continue;
 		ret = genpd_parse_state(&st[i++], np);
 		if (ret) {
 			pr_err
@@ -2169,8 +2168,11 @@ int of_genpd_parse_idle_states(struct device_node *dn,
 		}
 	}
 
-	*n = count;
-	*states = st;
+	*n = i;
+	if (!i)
+		kfree(st);
+	else
+		*states = st;
 
 	return 0;
 }

From 3ea6b7001ef5da9f9816ee3c4fe731f4fe08b865 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 6 Apr 2017 13:19:35 +0900
Subject: [PATCH 40/56] PM / devfreq: Move struct devfreq_governor to devfreq
 directory

This patch moves the struct devfreq_governor from header file
to the devfreq directory because this structure is private data
and it have to be only accessed by the devfreq core.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/governor.h | 29 +++++++++++++++++++++++++++++
 include/linux/devfreq.h    | 30 +-----------------------------
 2 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
index 71576b8bdfef..a4f2fa1091e4 100644
--- a/drivers/devfreq/governor.h
+++ b/drivers/devfreq/governor.h
@@ -25,6 +25,35 @@
 #define DEVFREQ_GOV_SUSPEND			0x4
 #define DEVFREQ_GOV_RESUME			0x5
 
+/**
+ * struct devfreq_governor - Devfreq policy governor
+ * @node:		list node - contains registered devfreq governors
+ * @name:		Governor's name
+ * @immutable:		Immutable flag for governor. If the value is 1,
+ *			this govenror is never changeable to other governor.
+ * @get_target_freq:	Returns desired operating frequency for the device.
+ *			Basically, get_target_freq will run
+ *			devfreq_dev_profile.get_dev_status() to get the
+ *			status of the device (load = busy_time / total_time).
+ *			If no_central_polling is set, this callback is called
+ *			only with update_devfreq() notified by OPP.
+ * @event_handler:      Callback for devfreq core framework to notify events
+ *                      to governors. Events include per device governor
+ *                      init and exit, opp changes out of devfreq, suspend
+ *                      and resume of per device devfreq during device idle.
+ *
+ * Note that the callbacks are called with devfreq->lock locked by devfreq.
+ */
+struct devfreq_governor {
+	struct list_head node;
+
+	const char name[DEVFREQ_NAME_LEN];
+	const unsigned int immutable;
+	int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
+	int (*event_handler)(struct devfreq *devfreq,
+				unsigned int event, void *data);
+};
+
 /* Caution: devfreq->lock must be locked before calling update_devfreq */
 extern int update_devfreq(struct devfreq *devfreq);
 
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index e0acb0e5243b..6c220e4ebb6b 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -27,6 +27,7 @@
 #define DEVFREQ_POSTCHANGE		(1)
 
 struct devfreq;
+struct devfreq_governor;
 
 /**
  * struct devfreq_dev_status - Data given from devfreq user device to
@@ -100,35 +101,6 @@ struct devfreq_dev_profile {
 	unsigned int max_state;
 };
 
-/**
- * struct devfreq_governor - Devfreq policy governor
- * @node:		list node - contains registered devfreq governors
- * @name:		Governor's name
- * @immutable:		Immutable flag for governor. If the value is 1,
- *			this govenror is never changeable to other governor.
- * @get_target_freq:	Returns desired operating frequency for the device.
- *			Basically, get_target_freq will run
- *			devfreq_dev_profile.get_dev_status() to get the
- *			status of the device (load = busy_time / total_time).
- *			If no_central_polling is set, this callback is called
- *			only with update_devfreq() notified by OPP.
- * @event_handler:      Callback for devfreq core framework to notify events
- *                      to governors. Events include per device governor
- *                      init and exit, opp changes out of devfreq, suspend
- *                      and resume of per device devfreq during device idle.
- *
- * Note that the callbacks are called with devfreq->lock locked by devfreq.
- */
-struct devfreq_governor {
-	struct list_head node;
-
-	const char name[DEVFREQ_NAME_LEN];
-	const unsigned int immutable;
-	int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
-	int (*event_handler)(struct devfreq *devfreq,
-				unsigned int event, void *data);
-};
-
 /**
  * struct devfreq - Device devfreq structure
  * @node:	list node - contains the devices with devfreq that have been

From 39b64aa1c007b98727db9f501266454fa403166c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 30 Mar 2017 23:36:41 +0200
Subject: [PATCH 41/56] cpufreq: schedutil: Reduce frequencies slower

The schedutil governor reduces frequencies too fast in some
situations which cases undesirable performance drops to
appear.

To address that issue, make schedutil reduce the frequency slower by
setting it to the average of the value chosen during the previous
iteration of governor computations and the new one coming from its
frequency selection formula.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=194963
Reported-by: John <john.ettedgui@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 kernel/sched/cpufreq_schedutil.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 848cb47094cd..b1fedf9932d6 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -101,6 +101,9 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 	if (sg_policy->next_freq == next_freq)
 		return;
 
+	if (sg_policy->next_freq > next_freq)
+		next_freq = (sg_policy->next_freq + next_freq) >> 1;
+
 	sg_policy->next_freq = next_freq;
 	sg_policy->last_freq_update_time = time;
 

From 1b72e7fd304639f1cd49d1e11955c4974936d88c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 11 Apr 2017 00:20:41 +0200
Subject: [PATCH 42/56] cpufreq: schedutil: Use policy-dependent transition
 delays

Make the schedutil governor take the initial (default) value of the
rate_limit_us sysfs attribute from the (new) transition_delay_us
policy parameter (to be set by the scaling driver).

That will allow scaling drivers to make schedutil use smaller default
values of rate_limit_us and reduce the default average time interval
between consecutive frequency changes.

Make intel_pstate set transition_delay_us to 500.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/intel_pstate.c   |  2 ++
 include/linux/cpufreq.h          |  7 +++++++
 kernel/sched/cpufreq_schedutil.c | 15 ++++++++++-----
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index c31b72b16c2b..b7de5bd76a31 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -41,6 +41,7 @@
 #define INTEL_PSTATE_HWP_SAMPLING_INTERVAL	(50 * NSEC_PER_MSEC)
 
 #define INTEL_CPUFREQ_TRANSITION_LATENCY	20000
+#define INTEL_CPUFREQ_TRANSITION_DELAY		500
 
 #ifdef CONFIG_ACPI
 #include <acpi/processor.h>
@@ -2237,6 +2238,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		return ret;
 
 	policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY;
+	policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY;
 	/* This reflects the intel_pstate_get_cpu_pstates() setting. */
 	policy->cur = policy->cpuinfo.min_freq;
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 87165f06a307..a5ce0bbeadb5 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -120,6 +120,13 @@ struct cpufreq_policy {
 	bool			fast_switch_possible;
 	bool			fast_switch_enabled;
 
+	/*
+	 * Preferred average time interval between consecutive invocations of
+	 * the driver to set the frequency for this policy.  To be set by the
+	 * scaling driver (0, which is the default, means no preference).
+	 */
+	unsigned int		transition_delay_us;
+
 	 /* Cached frequency lookup from cpufreq_driver_resolve_freq. */
 	unsigned int cached_target_freq;
 	int cached_resolved_idx;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index b1fedf9932d6..76877a62b5fa 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -494,7 +494,6 @@ static int sugov_init(struct cpufreq_policy *policy)
 {
 	struct sugov_policy *sg_policy;
 	struct sugov_tunables *tunables;
-	unsigned int lat;
 	int ret = 0;
 
 	/* State should be equivalent to EXIT */
@@ -533,10 +532,16 @@ static int sugov_init(struct cpufreq_policy *policy)
 		goto stop_kthread;
 	}
 
-	tunables->rate_limit_us = LATENCY_MULTIPLIER;
-	lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
-	if (lat)
-		tunables->rate_limit_us *= lat;
+	if (policy->transition_delay_us) {
+		tunables->rate_limit_us = policy->transition_delay_us;
+	} else {
+		unsigned int lat;
+
+		tunables->rate_limit_us = LATENCY_MULTIPLIER;
+		lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
+		if (lat)
+			tunables->rate_limit_us *= lat;
+	}
 
 	policy->governor_data = sg_policy;
 	sg_policy->tunables = tunables;

From 010a522cf2d9a01612b8a95abd787e70ae123ea6 Mon Sep 17 00:00:00 2001
From: Doug Smythies <doug.smythies@gmail.com>
Date: Mon, 17 Apr 2017 17:12:13 -0700
Subject: [PATCH 43/56] tools/power/x86/intel_pstate_tracer: Adjust directory
 ownership

The intel_pstate_tracer.py script only needs to be run as root
when it is also used to actually acquire the trace data that
it will post process. Otherwise it is generally preferable
that it be run as a regular user.
If run the first time as root the results directory will be
incorrect for any subsequent run as a regular user. For any run
as root the specific testname subdirectory will not allow any
subsequent file saves by a regular user. Typically, and for example,
the regular user might be attempting to save a .csv file converted to
a spreadsheet with added calculations or graphs.

Set the directories and files owner and groups IDs to be the regular
user, if required.

Signed-off-by: Doug Smythies <dsmythies@telus.net>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../intel_pstate_tracer/intel_pstate_tracer.py  | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
index fd706ac0f347..0b24dd9d01ff 100755
--- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -353,6 +353,14 @@ def split_csv():
                 os.system('grep -m 1 common_cpu cpu.csv > cpu{:0>3}.csv'.format(index))
                 os.system('grep CPU_{:0>3} cpu.csv >> cpu{:0>3}.csv'.format(index, index))
 
+def fix_ownership(path):
+    """Change the owner of the file to SUDO_UID, if required"""
+
+    uid = os.environ.get('SUDO_UID')
+    gid = os.environ.get('SUDO_GID')
+    if uid is not None:
+        os.chown(path, int(uid), int(gid))
+
 def cleanup_data_files():
     """ clean up existing data files """
 
@@ -518,12 +526,16 @@ else:
 
 if not os.path.exists('results'):
     os.mkdir('results')
+    # The regular user needs to own the directory, not root.
+    fix_ownership('results')
 
 os.chdir('results')
 if os.path.exists(testname):
     print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
     sys.exit()
 os.mkdir(testname)
+# The regular user needs to own the directory, not root.
+fix_ownership(testname)
 os.chdir(testname)
 
 # Temporary (or perhaps not)
@@ -566,4 +578,9 @@ plot_scaled_cpu()
 plot_boost_cpu()
 plot_ghz_cpu()
 
+# It is preferrable, but not necessary, that the regular user owns the files, not root.
+for root, dirs, files in os.walk('.'):
+    for f in files:
+        fix_ownership(f)
+
 os.chdir('../../')

From 02018b3929a23acdd452b986e7c8aeca4529d492 Mon Sep 17 00:00:00 2001
From: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Date: Wed, 19 Apr 2017 13:20:54 +0200
Subject: [PATCH 44/56] cpuidle: cpuidle-cps: remove unused variable

'core' in cps_cpuidle_init has never been used and is unnecessary, so
remove the dead code.

Signed-off-by: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle-cps.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-cps.c b/drivers/cpuidle/cpuidle-cps.c
index 926ba9871c62..12b9145913de 100644
--- a/drivers/cpuidle/cpuidle-cps.c
+++ b/drivers/cpuidle/cpuidle-cps.c
@@ -118,7 +118,7 @@ static void __init cps_cpuidle_unregister(void)
 
 static int __init cps_cpuidle_init(void)
 {
-	int err, cpu, core, i;
+	int err, cpu, i;
 	struct cpuidle_device *device;
 
 	/* Detect supported states */
@@ -160,7 +160,6 @@ static int __init cps_cpuidle_init(void)
 	}
 
 	for_each_possible_cpu(cpu) {
-		core = cpu_data[cpu].core;
 		device = &per_cpu(cpuidle_dev, cpu);
 		device->cpu = cpu;
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED

From 79b578111febef642143669254b243ffbcf64ea9 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 4 Apr 2017 07:54:12 +1000
Subject: [PATCH 45/56] cpuidle: powernv: Don't bounce between low and very low
 thread priority

The core of snooze_loop() continually bounces between low and very
low thread priority. Changing thread priorities is an expensive
operation that can negatively impact other threads on a core.

All CPUs that can run PowerNV support very low priority, so we can
avoid the change completely.

Signed-off-by: Anton Blanchard <anton@samba.org>
Reviewed-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle-powernv.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index a06df51a36c0..0ddf1a5bb0a9 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -57,7 +57,6 @@ static int snooze_loop(struct cpuidle_device *dev,
 	snooze_exit_time = get_tb() + snooze_timeout;
 	ppc64_runlatch_off();
 	while (!need_resched()) {
-		HMT_low();
 		HMT_very_low();
 		if (snooze_timeout_en && get_tb() > snooze_exit_time)
 			break;

From 26eb48a9faf241abd60aa546e6beb896011667c1 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 4 Apr 2017 07:54:13 +1000
Subject: [PATCH 46/56] cpuidle: powernv: Don't continually set thread priority
 in snooze_loop()

The powerpc64 kernel exception handlers have preserved thread priorities
for a long time now, so there is no need to continually set it.

Just set it once on entry and once exit.

Signed-off-by: Anton Blanchard <anton@samba.org>
Reviewed-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle-powernv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 0ddf1a5bb0a9..f8901671fff4 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -56,8 +56,8 @@ static int snooze_loop(struct cpuidle_device *dev,
 
 	snooze_exit_time = get_tb() + snooze_timeout;
 	ppc64_runlatch_off();
+	HMT_very_low();
 	while (!need_resched()) {
-		HMT_very_low();
 		if (snooze_timeout_en && get_tb() > snooze_exit_time)
 			break;
 	}

From 0baa91cb73e296242edad89cfe3f60c59ab8a95a Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 4 Apr 2017 07:54:14 +1000
Subject: [PATCH 47/56] cpuidle: powernv: Avoid a branch in the core
 snooze_loop() loop

When in the snooze_loop() we want to take up the least amount of
resources. On my version of gcc (6.3), we end up with an extra
branch because it predicts snooze_timeout_en to be false, whereas it
is almost always true.

Use likely() to avoid the branch and be a little nicer to the
other non idle threads on the core.

Signed-off-by: Anton Blanchard <anton@samba.org>
Reviewed-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle-powernv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index f8901671fff4..5bb4bb303fba 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -58,7 +58,7 @@ static int snooze_loop(struct cpuidle_device *dev,
 	ppc64_runlatch_off();
 	HMT_very_low();
 	while (!need_resched()) {
-		if (snooze_timeout_en && get_tb() > snooze_exit_time)
+		if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time)
 			break;
 	}
 

From 54cad2fce71f3ed2995bfc6d17d4ea5c898f20b1 Mon Sep 17 00:00:00 2001
From: Irina Tirdea <irina.tirdea@nxp.com>
Date: Tue, 4 Apr 2017 20:04:11 +0300
Subject: [PATCH 48/56] cpufreq: imx6q: Fix handling EPROBE_DEFER from
 regulator

If there are any errors in getting the cpu0 regulators, the driver returns
-ENOENT. In case the regulators are not yet available, the devm_regulator_get
calls will return -EPROBE_DEFER, so that the driver can be probed later.
If we return -ENOENT, the driver will fail its initialization and will
not try to probe again (when the regulators become available).

Return the actual error received from regulator_get in probe. Print a
differentiated message in case we need to probe the device later and
in case we actually failed. Also add a message to inform when the
driver has been successfully registered.

Signed-off-by: Irina Tirdea <irina.tirdea@nxp.com>
Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/imx6q-cpufreq.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 7719b02e04f5..be90ee3810bf 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -222,6 +222,13 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 	arm_reg = regulator_get(cpu_dev, "arm");
 	pu_reg = regulator_get_optional(cpu_dev, "pu");
 	soc_reg = regulator_get(cpu_dev, "soc");
+	if (PTR_ERR(arm_reg) == -EPROBE_DEFER ||
+			PTR_ERR(soc_reg) == -EPROBE_DEFER ||
+			PTR_ERR(pu_reg) == -EPROBE_DEFER) {
+		ret = -EPROBE_DEFER;
+		dev_dbg(cpu_dev, "regulators not ready, defer\n");
+		goto put_reg;
+	}
 	if (IS_ERR(arm_reg) || IS_ERR(soc_reg)) {
 		dev_err(cpu_dev, "failed to get regulators\n");
 		ret = -ENOENT;

From 5aa1599ff039a68a5c43e9aa74973f40b1065746 Mon Sep 17 00:00:00 2001
From: Leonard Crestez <leonard.crestez@nxp.com>
Date: Tue, 4 Apr 2017 20:04:12 +0300
Subject: [PATCH 49/56] cpufreq: imx6q: Set max suspend_freq to avoid changes
 during suspend

If the cpufreq driver tries to modify voltage/freq during suspend/resume
it might need to control an external PMIC via I2C or SPI but those
devices might be already suspended. This issue is likely to happen
whenever the LDOs have their vin-supply set.

To avoid this scenario we just increase cpufreq to the maximum before
suspend.

Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/imx6q-cpufreq.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index be90ee3810bf..786122e0455e 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -161,8 +161,13 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index)
 
 static int imx6q_cpufreq_init(struct cpufreq_policy *policy)
 {
+	int ret;
+
 	policy->clk = arm_clk;
-	return cpufreq_generic_init(policy, freq_table, transition_latency);
+	ret = cpufreq_generic_init(policy, freq_table, transition_latency);
+	policy->suspend_freq = policy->max;
+
+	return ret;
 }
 
 static struct cpufreq_driver imx6q_cpufreq_driver = {
@@ -173,6 +178,7 @@ static struct cpufreq_driver imx6q_cpufreq_driver = {
 	.init = imx6q_cpufreq_init,
 	.name = "imx6q-cpufreq",
 	.attr = cpufreq_generic_attr,
+	.suspend = cpufreq_generic_suspend,
 };
 
 static int imx6q_cpufreq_probe(struct platform_device *pdev)

From eafca851639038a7863910e7fac869f5c8bdfb9d Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Sun, 9 Apr 2017 09:33:52 +0200
Subject: [PATCH 50/56] cpufreq: imx6q: Fix error handling code

According to the previous error handling code, it is likely that
'goto out_free_opp' is expected here in order to avoid a memory leak in
error handling path.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/imx6q-cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 786122e0455e..9c13f097fd8c 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -268,7 +268,7 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
 		dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
-		goto put_reg;
+		goto out_free_opp;
 	}
 
 	/* Make imx6_soc_volt array's size same as arm opp number */

From 939dc6f51e90c95a7d88034da48b747f01873bce Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 11 Apr 2017 11:09:15 +0300
Subject: [PATCH 51/56] cpufreq: Add Tegra186 cpufreq driver

Add a new cpufreq driver for Tegra186 (and likely later).
The CPUs are organized into two clusters, Denver and A57,
with two and four cores respectively. CPU frequency can be
adjusted by writing the desired rate divisor and a voltage
hint to a special per-core register.

The frequency of each core can be set individually; however,
this is just a hint as all CPUs in a cluster will run at
the maximum rate of non-idle CPUs in the cluster.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/Kconfig.arm        |   6 +
 drivers/cpufreq/Makefile           |   1 +
 drivers/cpufreq/tegra186-cpufreq.c | 275 +++++++++++++++++++++++++++++
 3 files changed, 282 insertions(+)
 create mode 100644 drivers/cpufreq/tegra186-cpufreq.c

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 74fa5c5904d3..74ed7e9a7f27 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -247,6 +247,12 @@ config ARM_TEGRA124_CPUFREQ
 	help
 	  This adds the CPUFreq driver support for Tegra124 SOCs.
 
+config ARM_TEGRA186_CPUFREQ
+	tristate "Tegra186 CPUFreq support"
+	depends on ARCH_TEGRA && TEGRA_BPMP
+	help
+	  This adds the CPUFreq driver support for Tegra186 SOCs.
+
 config ARM_TI_CPUFREQ
 	bool "Texas Instruments CPUFreq support"
 	depends on ARCH_OMAP2PLUS
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 9f5a8045f36d..b7e78f063c4f 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_ARM_SPEAR_CPUFREQ)		+= spear-cpufreq.o
 obj-$(CONFIG_ARM_STI_CPUFREQ)		+= sti-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA20_CPUFREQ)	+= tegra20-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA124_CPUFREQ)	+= tegra124-cpufreq.o
+obj-$(CONFIG_ARM_TEGRA186_CPUFREQ)	+= tegra186-cpufreq.o
 obj-$(CONFIG_ARM_TI_CPUFREQ)		+= ti-cpufreq.o
 obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ)	+= vexpress-spc-cpufreq.o
 obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o
diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c
new file mode 100644
index 000000000000..fe7875311d62
--- /dev/null
+++ b/drivers/cpufreq/tegra186-cpufreq.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <soc/tegra/bpmp.h>
+#include <soc/tegra/bpmp-abi.h>
+
+#define EDVD_CORE_VOLT_FREQ(core)		(0x20 + (core) * 0x4)
+#define EDVD_CORE_VOLT_FREQ_F_SHIFT		0
+#define EDVD_CORE_VOLT_FREQ_V_SHIFT		16
+
+struct tegra186_cpufreq_cluster_info {
+	unsigned long offset;
+	int cpus[4];
+	unsigned int bpmp_cluster_id;
+};
+
+#define NO_CPU -1
+static const struct tegra186_cpufreq_cluster_info tegra186_clusters[] = {
+	/* Denver cluster */
+	{
+		.offset = SZ_64K * 7,
+		.cpus = { 1, 2, NO_CPU, NO_CPU },
+		.bpmp_cluster_id = 0,
+	},
+	/* A57 cluster */
+	{
+		.offset = SZ_64K * 6,
+		.cpus = { 0, 3, 4, 5 },
+		.bpmp_cluster_id = 1,
+	},
+};
+
+struct tegra186_cpufreq_cluster {
+	const struct tegra186_cpufreq_cluster_info *info;
+	struct cpufreq_frequency_table *table;
+};
+
+struct tegra186_cpufreq_data {
+	void __iomem *regs;
+
+	size_t num_clusters;
+	struct tegra186_cpufreq_cluster *clusters;
+};
+
+static int tegra186_cpufreq_init(struct cpufreq_policy *policy)
+{
+	struct tegra186_cpufreq_data *data = cpufreq_get_driver_data();
+	unsigned int i;
+
+	for (i = 0; i < data->num_clusters; i++) {
+		struct tegra186_cpufreq_cluster *cluster = &data->clusters[i];
+		const struct tegra186_cpufreq_cluster_info *info =
+			cluster->info;
+		int core;
+
+		for (core = 0; core < ARRAY_SIZE(info->cpus); core++) {
+			if (info->cpus[core] == policy->cpu)
+				break;
+		}
+		if (core == ARRAY_SIZE(info->cpus))
+			continue;
+
+		policy->driver_data =
+			data->regs + info->offset + EDVD_CORE_VOLT_FREQ(core);
+		cpufreq_table_validate_and_show(policy, cluster->table);
+	}
+
+	policy->cpuinfo.transition_latency = 300 * 1000;
+
+	return 0;
+}
+
+static int tegra186_cpufreq_set_target(struct cpufreq_policy *policy,
+				       unsigned int index)
+{
+	struct cpufreq_frequency_table *tbl = policy->freq_table + index;
+	void __iomem *edvd_reg = policy->driver_data;
+	u32 edvd_val = tbl->driver_data;
+
+	writel(edvd_val, edvd_reg);
+
+	return 0;
+}
+
+static struct cpufreq_driver tegra186_cpufreq_driver = {
+	.name = "tegra186",
+	.flags = CPUFREQ_STICKY | CPUFREQ_HAVE_GOVERNOR_PER_POLICY,
+	.verify = cpufreq_generic_frequency_table_verify,
+	.target_index = tegra186_cpufreq_set_target,
+	.init = tegra186_cpufreq_init,
+	.attr = cpufreq_generic_attr,
+};
+
+static struct cpufreq_frequency_table *init_vhint_table(
+	struct platform_device *pdev, struct tegra_bpmp *bpmp,
+	unsigned int cluster_id)
+{
+	struct cpufreq_frequency_table *table;
+	struct mrq_cpu_vhint_request req;
+	struct tegra_bpmp_message msg;
+	struct cpu_vhint_data *data;
+	int err, i, j, num_rates = 0;
+	dma_addr_t phys;
+	void *virt;
+
+	virt = dma_alloc_coherent(bpmp->dev, sizeof(*data), &phys,
+				  GFP_KERNEL | GFP_DMA32);
+	if (!virt)
+		return ERR_PTR(-ENOMEM);
+
+	data = (struct cpu_vhint_data *)virt;
+
+	memset(&req, 0, sizeof(req));
+	req.addr = phys;
+	req.cluster_id = cluster_id;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.mrq = MRQ_CPU_VHINT;
+	msg.tx.data = &req;
+	msg.tx.size = sizeof(req);
+
+	err = tegra_bpmp_transfer(bpmp, &msg);
+	if (err) {
+		table = ERR_PTR(err);
+		goto free;
+	}
+
+	for (i = data->vfloor; i <= data->vceil; i++) {
+		u16 ndiv = data->ndiv[i];
+
+		if (ndiv < data->ndiv_min || ndiv > data->ndiv_max)
+			continue;
+
+		/* Only store lowest voltage index for each rate */
+		if (i > 0 && ndiv == data->ndiv[i - 1])
+			continue;
+
+		num_rates++;
+	}
+
+	table = devm_kcalloc(&pdev->dev, num_rates + 1, sizeof(*table),
+			     GFP_KERNEL);
+	if (!table) {
+		table = ERR_PTR(-ENOMEM);
+		goto free;
+	}
+
+	for (i = data->vfloor, j = 0; i <= data->vceil; i++) {
+		struct cpufreq_frequency_table *point;
+		u16 ndiv = data->ndiv[i];
+		u32 edvd_val = 0;
+
+		if (ndiv < data->ndiv_min || ndiv > data->ndiv_max)
+			continue;
+
+		/* Only store lowest voltage index for each rate */
+		if (i > 0 && ndiv == data->ndiv[i - 1])
+			continue;
+
+		edvd_val |= i << EDVD_CORE_VOLT_FREQ_V_SHIFT;
+		edvd_val |= ndiv << EDVD_CORE_VOLT_FREQ_F_SHIFT;
+
+		point = &table[j++];
+		point->driver_data = edvd_val;
+		point->frequency = data->ref_clk_hz * ndiv / data->pdiv /
+			data->mdiv / 1000;
+	}
+
+	table[j].frequency = CPUFREQ_TABLE_END;
+
+free:
+	dma_free_coherent(bpmp->dev, sizeof(*data), virt, phys);
+
+	return table;
+}
+
+static int tegra186_cpufreq_probe(struct platform_device *pdev)
+{
+	struct tegra186_cpufreq_data *data;
+	struct tegra_bpmp *bpmp;
+	struct resource *res;
+	unsigned int i = 0, err;
+
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->clusters = devm_kcalloc(&pdev->dev, ARRAY_SIZE(tegra186_clusters),
+				      sizeof(*data->clusters), GFP_KERNEL);
+	if (!data->clusters)
+		return -ENOMEM;
+
+	data->num_clusters = ARRAY_SIZE(tegra186_clusters);
+
+	bpmp = tegra_bpmp_get(&pdev->dev);
+	if (IS_ERR(bpmp))
+		return PTR_ERR(bpmp);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(data->regs)) {
+		err = PTR_ERR(data->regs);
+		goto put_bpmp;
+	}
+
+	for (i = 0; i < data->num_clusters; i++) {
+		struct tegra186_cpufreq_cluster *cluster = &data->clusters[i];
+
+		cluster->info = &tegra186_clusters[i];
+		cluster->table = init_vhint_table(
+			pdev, bpmp, cluster->info->bpmp_cluster_id);
+		if (IS_ERR(cluster->table)) {
+			err = PTR_ERR(cluster->table);
+			goto put_bpmp;
+		}
+	}
+
+	tegra_bpmp_put(bpmp);
+
+	tegra186_cpufreq_driver.driver_data = data;
+
+	err = cpufreq_register_driver(&tegra186_cpufreq_driver);
+	if (err)
+		return err;
+
+	return 0;
+
+put_bpmp:
+	tegra_bpmp_put(bpmp);
+
+	return err;
+}
+
+static int tegra186_cpufreq_remove(struct platform_device *pdev)
+{
+	cpufreq_unregister_driver(&tegra186_cpufreq_driver);
+
+	return 0;
+}
+
+static const struct of_device_id tegra186_cpufreq_of_match[] = {
+	{ .compatible = "nvidia,tegra186-ccplex-cluster", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, tegra186_cpufreq_of_match);
+
+static struct platform_driver tegra186_cpufreq_platform_driver = {
+	.driver = {
+		.name = "tegra186-cpufreq",
+		.of_match_table = tegra186_cpufreq_of_match,
+	},
+	.probe = tegra186_cpufreq_probe,
+	.remove = tegra186_cpufreq_remove,
+};
+module_platform_driver(tegra186_cpufreq_platform_driver);
+
+MODULE_AUTHOR("Mikko Perttunen <mperttunen@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA Tegra186 cpufreq driver");
+MODULE_LICENSE("GPL v2");

From bc167c7de8886f08b3d8266b176eefaa9f22cd80 Mon Sep 17 00:00:00 2001
From: Todd E Brandt <todd.e.brandt@linux.intel.com>
Date: Fri, 7 Apr 2017 11:05:35 -0700
Subject: [PATCH 52/56] tools: power: pm-graph: AnalyzeSuspend v4.6

Moved from scripts into tools, and updated from 4.5 to 4.6
- Changed the tool title to SleepGraph
- Reformatted the code so analyze_suspend can be used as a library
- Reorganized all html/js/css handling code to be used by other tools
- upgraded the -summary feature to work faster with better readability

Signed-off-by: Todd Brandt <todd.e.brandt@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../power/pm-graph}/analyze_suspend.py        | 916 ++++++++++--------
 1 file changed, 495 insertions(+), 421 deletions(-)
 rename {scripts => tools/power/pm-graph}/analyze_suspend.py (91%)

diff --git a/scripts/analyze_suspend.py b/tools/power/pm-graph/analyze_suspend.py
similarity index 91%
rename from scripts/analyze_suspend.py
rename to tools/power/pm-graph/analyze_suspend.py
index 20cdb2bc1dae..a9206e67fc1f 100755
--- a/scripts/analyze_suspend.py
+++ b/tools/power/pm-graph/analyze_suspend.py
@@ -12,10 +12,6 @@
 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 # more details.
 #
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-#
 # Authors:
 #	 Todd Brandt <todd.e.brandt@linux.intel.com>
 #
@@ -23,7 +19,7 @@
 #	 Home Page
 #	   https://01.org/suspendresume
 #	 Source repo
-#	   https://github.com/01org/suspendresume
+#	   https://github.com/01org/pm-graph
 #
 # Description:
 #	 This tool is designed to assist kernel and OS developers in optimizing
@@ -71,14 +67,16 @@ from subprocess import call, Popen, PIPE
 #	 A global, single-instance container used to
 #	 store system values and test parameters
 class SystemValues:
+	title = 'SleepGraph'
+	version = '4.6'
 	ansi = False
-	version = '4.5'
 	verbose = False
 	addlogs = False
 	mindevlen = 0.0
 	mincglen = 0.0
 	cgphase = ''
 	cgtest = -1
+	max_graph_depth = 0
 	callloopmaxgap = 0.0001
 	callloopmaxlen = 0.005
 	srgap = 0
@@ -106,8 +104,8 @@ class SystemValues:
 	ftracefile = ''
 	htmlfile = ''
 	embedded = False
-	rtcwake = False
-	rtcwaketime = 10
+	rtcwake = True
+	rtcwaketime = 15
 	rtcpath = ''
 	devicefilter = []
 	stamp = 0
@@ -235,6 +233,12 @@ class SystemValues:
 			self.rtcpath = rtc
 		if (hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()):
 			self.ansi = True
+	def rootUser(self, fatal=False):
+		if 'USER' in os.environ and os.environ['USER'] == 'root':
+			return True
+		if fatal:
+			doError('This command must be run as root')
+		return False
 	def setPrecision(self, num):
 		if num < 0 or num > 6:
 			return
@@ -564,7 +568,7 @@ class SystemValues:
 		self.fsetVal('global', 'trace_clock')
 		# set trace buffer to a huge value
 		self.fsetVal('nop', 'current_tracer')
-		self.fsetVal('100000', 'buffer_size_kb')
+		self.fsetVal('131073', 'buffer_size_kb')
 		# go no further if this is just a status check
 		if testing:
 			return
@@ -583,7 +587,7 @@ class SystemValues:
 			self.fsetVal('nofuncgraph-overhead', 'trace_options')
 			self.fsetVal('context-info', 'trace_options')
 			self.fsetVal('graph-time', 'trace_options')
-			self.fsetVal('0', 'max_graph_depth')
+			self.fsetVal('%d' % self.max_graph_depth, 'max_graph_depth')
 			cf = ['dpm_run_callback']
 			if(self.usetraceeventsonly):
 				cf += ['dpm_prepare', 'dpm_complete']
@@ -639,6 +643,12 @@ class SystemValues:
 		return '\x1B[%d;40m%s\x1B[m' % (color, str)
 
 sysvals = SystemValues()
+suspendmodename = {
+	'freeze': 'Freeze (S0)',
+	'standby': 'Standby (S1)',
+	'mem': 'Suspend (S3)',
+	'disk': 'Hibernate (S4)'
+}
 
 # Class: DevProps
 # Description:
@@ -1013,6 +1023,8 @@ class Data:
 		tmp = dict()
 		for devname in list:
 			dev = list[devname]
+			if dev['length'] == 0:
+				continue
 			tmp[dev['start']] = devname
 		for t in sorted(tmp):
 			slist.append(tmp[t])
@@ -1477,12 +1489,14 @@ class FTraceLine:
 #	 Each instance is tied to a single device in a single phase, and is
 #	 comprised of an ordered list of FTraceLine objects
 class FTraceCallGraph:
+	id = ''
 	start = -1.0
 	end = -1.0
 	list = []
 	invalid = False
 	depth = 0
 	pid = 0
+	name = ''
 	def __init__(self, pid):
 		self.start = -1.0
 		self.end = -1.0
@@ -1631,9 +1645,17 @@ class FTraceCallGraph:
 				return True
 		return False
 	def postProcess(self, debug=False):
+		if len(self.list) > 0:
+			self.name = self.list[0].name
 		stack = dict()
 		cnt = 0
+		last = 0
 		for l in self.list:
+			# ftrace bug: reported duration is not reliable
+			# check each leaf and clip it at max possible length
+			if(last and last.freturn and last.fcall):
+				if last.length > l.time - last.time:
+					last.length = l.time - last.time
 			if(l.fcall and not l.freturn):
 				stack[l.depth] = l
 				cnt += 1
@@ -1643,11 +1665,12 @@ class FTraceCallGraph:
 						print 'Post Process Error: Depth missing'
 						l.debugPrint()
 					return False
-				# transfer total time from return line to call line
-				stack[l.depth].length = l.length
+				# calculate call length from call/return lines
+				stack[l.depth].length = l.time - stack[l.depth].time
 				stack.pop(l.depth)
 				l.length = 0
 				cnt -= 1
+			last = l
 		if(cnt == 0):
 			# trace caught the whole call tree
 			return True
@@ -1664,8 +1687,8 @@ class FTraceCallGraph:
 			'dpm_prepare': 'suspend_prepare',
 			'dpm_complete': 'resume_complete'
 		}
-		if(self.list[0].name in borderphase):
-			p = borderphase[self.list[0].name]
+		if(self.name in borderphase):
+			p = borderphase[self.name]
 			list = data.dmesg[p]['list']
 			for devname in list:
 				dev = list[devname]
@@ -1690,7 +1713,7 @@ class FTraceCallGraph:
 				break
 		return found
 	def newActionFromFunction(self, data):
-		name = self.list[0].name
+		name = self.name
 		if name in ['dpm_run_callback', 'dpm_prepare', 'dpm_complete']:
 			return
 		fs = self.start
@@ -1710,7 +1733,7 @@ class FTraceCallGraph:
 			phase, myname = out
 			data.dmesg[phase]['list'][myname]['ftrace'] = self
 	def debugPrint(self):
-		print('[%f - %f] %s (%d)') % (self.start, self.end, self.list[0].name, self.pid)
+		print('[%f - %f] %s (%d)') % (self.start, self.end, self.name, self.pid)
 		for l in self.list:
 			if(l.freturn and l.fcall):
 				print('%f (%02d): %s(); (%.3f us)' % (l.time, \
@@ -1738,7 +1761,7 @@ class DevItem:
 #	 A container for a device timeline which calculates
 #	 all the html properties to display it correctly
 class Timeline:
-	html = {}
+	html = ''
 	height = 0	# total timeline height
 	scaleH = 20	# timescale (top) row height
 	rowH = 30	# device row height
@@ -1746,14 +1769,28 @@ class Timeline:
 	rows = 0	# total timeline rows
 	rowlines = dict()
 	rowheight = dict()
+	html_tblock = '<div id="block{0}" class="tblock" style="left:{1}%;width:{2}%;"><div class="tback" style="height:{3}px"></div>\n'
+	html_device = '<div id="{0}" title="{1}" class="thread{7}" style="left:{2}%;top:{3}px;height:{4}px;width:{5}%;{8}">{6}</div>\n'
+	html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}px;height:{3}px;background:{4}">{5}</div>\n'
+	html_phaselet = '<div id="{0}" class="phaselet" style="left:{1}%;width:{2}%;background:{3}"></div>\n'
 	def __init__(self, rowheight, scaleheight):
 		self.rowH = rowheight
 		self.scaleH = scaleheight
-		self.html = {
-			'header': '',
-			'timeline': '',
-			'legend': '',
-		}
+		self.html = ''
+	def createHeader(self, sv, suppress=''):
+		if(not sv.stamp['time']):
+			return
+		self.html += '<div class="version"><a href="https://01.org/suspendresume">%s v%s</a></div>' \
+			% (sv.title, sv.version)
+		if sv.logmsg and 'log' not in suppress:
+			self.html += '<button id="showtest" class="logbtn">log</button>'
+		if sv.addlogs and 'dmesg' not in suppress:
+			self.html += '<button id="showdmesg" class="logbtn">dmesg</button>'
+		if sv.addlogs and sv.ftracefile and 'ftrace' not in suppress:
+			self.html += '<button id="showftrace" class="logbtn">ftrace</button>'
+		headline_stamp = '<div class="stamp">{0} {1} {2} {3}</div>\n'
+		self.html += headline_stamp.format(sv.stamp['host'], sv.stamp['kernel'],
+			sv.stamp['mode'], sv.stamp['time'])
 	# Function: getDeviceRows
 	# Description:
 	#    determine how may rows the device funcs will take
@@ -1880,10 +1917,8 @@ class Timeline:
 				break
 			top += self.rowheight[test][phase][i]
 		return top
-	# Function: calcTotalRows
-	# Description:
-	#	 Calculate the heights and offsets for the header and rows
 	def calcTotalRows(self):
+		# Calculate the heights and offsets for the header and rows
 		maxrows = 0
 		standardphases = []
 		for t in self.rowlines:
@@ -1901,6 +1936,20 @@ class Timeline:
 		for t, p in standardphases:
 			for i in sorted(self.rowheight[t][p]):
 				self.rowheight[t][p][i] = self.bodyH/len(self.rowlines[t][p])
+	def createZoomBox(self, mode='command', testcount=1):
+		# Create bounding box, add buttons
+		html_zoombox = '<center><button id="zoomin">ZOOM IN +</button><button id="zoomout">ZOOM OUT -</button><button id="zoomdef">ZOOM 1:1</button></center>\n'
+		html_timeline = '<div id="dmesgzoombox" class="zoombox">\n<div id="{0}" class="timeline" style="height:{1}px">\n'
+		html_devlist1 = '<button id="devlist1" class="devlist" style="float:left;">Device Detail{0}</button>'
+		html_devlist2 = '<button id="devlist2" class="devlist" style="float:right;">Device Detail2</button>\n'
+		if mode != 'command':
+			if testcount > 1:
+				self.html += html_devlist2
+				self.html += html_devlist1.format('1')
+			else:
+				self.html += html_devlist1.format('')
+		self.html += html_zoombox
+		self.html += html_timeline.format('dmesg', self.height)
 	# Function: createTimeScale
 	# Description:
 	#	 Create the timescale for a timeline block
@@ -1913,7 +1962,7 @@ class Timeline:
 	#	 The html code needed to display the time scale
 	def createTimeScale(self, m0, mMax, tTotal, mode):
 		timescale = '<div class="t" style="right:{0}%">{1}</div>\n'
-		rline = '<div class="t" style="left:0;border-left:1px solid black;border-right:0;">Resume</div>\n'
+		rline = '<div class="t" style="left:0;border-left:1px solid black;border-right:0;">{0}</div>\n'
 		output = '<div class="timescale">\n'
 		# set scale for timeline
 		mTotal = mMax - m0
@@ -1926,21 +1975,20 @@ class Timeline:
 		divEdge = (mTotal - tS*(divTotal-1))*100/mTotal
 		for i in range(divTotal):
 			htmlline = ''
-			if(mode == 'resume'):
+			if(mode == 'suspend'):
+				pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal) - divEdge)
+				val = '%0.fms' % (float(i-divTotal+1)*tS*1000)
+				if(i == divTotal - 1):
+					val = mode
+				htmlline = timescale.format(pos, val)
+			else:
 				pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal))
 				val = '%0.fms' % (float(i)*tS*1000)
 				htmlline = timescale.format(pos, val)
 				if(i == 0):
-					htmlline = rline
-			else:
-				pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal) - divEdge)
-				val = '%0.fms' % (float(i-divTotal+1)*tS*1000)
-				if(i == divTotal - 1):
-					val = 'Suspend'
-				htmlline = timescale.format(pos, val)
+					htmlline = rline.format(mode)
 			output += htmlline
-		output += '</div>\n'
-		return output
+		self.html += output+'</div>\n'
 
 # Class: TestProps
 # Description:
@@ -2009,7 +2057,7 @@ class ProcessMonitor:
 				val['kern'] = kern
 			if ujiff > 0 or kjiff > 0:
 				running[pid] = ujiff + kjiff
-		result = process.wait()
+		process.wait()
 		out = ''
 		for pid in running:
 			jiffies = running[pid]
@@ -2071,26 +2119,6 @@ def parseStamp(line, data):
 	if not sysvals.stamp:
 		sysvals.stamp = data.stamp
 
-# Function: diffStamp
-# Description:
-#	compare the host, kernel, and mode fields in 3 stamps
-# Arguments:
-#	 stamp1: string array with mode, kernel, and host
-#	 stamp2: string array with mode, kernel, and host
-# Return:
-#	True if stamps differ, False if they're the same
-def diffStamp(stamp1, stamp2):
-	if 'host' in stamp1 and 'host' in stamp2:
-		if stamp1['host'] != stamp2['host']:
-			return True
-	if 'kernel' in stamp1 and 'kernel' in stamp2:
-		if stamp1['kernel'] != stamp2['kernel']:
-			return True
-	if 'mode' in stamp1 and 'mode' in stamp2:
-		if stamp1['mode'] != stamp2['mode']:
-			return True
-	return False
-
 # Function: doesTraceLogHaveTraceEvents
 # Description:
 #	 Quickly determine if the ftrace log has some or all of the trace events
@@ -2722,7 +2750,7 @@ def parseTraceLog():
 			# create blocks for orphan cg data
 			for sortkey in sorted(sortlist):
 				cg = sortlist[sortkey]
-				name = cg.list[0].name
+				name = cg.name
 				if sysvals.isCallgraphFunc(name):
 					vprint('Callgraph found for task %d: %.3fms, %s' % (cg.pid, (cg.end - cg.start)*1000, name))
 					cg.newActionFromFunction(data)
@@ -3100,149 +3128,154 @@ def parseKernelLog(data):
 	data.fixupInitcallsThatDidntReturn()
 	return True
 
+def callgraphHTML(sv, hf, num, cg, title, color, devid):
+	html_func_top = '<article id="{0}" class="atop" style="background:{1}">\n<input type="checkbox" class="pf" id="f{2}" checked/><label for="f{2}">{3} {4}</label>\n'
+	html_func_start = '<article>\n<input type="checkbox" class="pf" id="f{0}" checked/><label for="f{0}">{1} {2}</label>\n'
+	html_func_end = '</article>\n'
+	html_func_leaf = '<article>{0} {1}</article>\n'
+
+	cgid = devid
+	if cg.id:
+		cgid += cg.id
+	cglen = (cg.end - cg.start) * 1000
+	if cglen < sv.mincglen:
+		return num
+
+	fmt = '<r>(%.3f ms @ '+sv.timeformat+' to '+sv.timeformat+')</r>'
+	flen = fmt % (cglen, cg.start, cg.end)
+	hf.write(html_func_top.format(cgid, color, num, title, flen))
+	num += 1
+	for line in cg.list:
+		if(line.length < 0.000000001):
+			flen = ''
+		else:
+			fmt = '<n>(%.3f ms @ '+sv.timeformat+')</n>'
+			flen = fmt % (line.length*1000, line.time)
+		if(line.freturn and line.fcall):
+			hf.write(html_func_leaf.format(line.name, flen))
+		elif(line.freturn):
+			hf.write(html_func_end)
+		else:
+			hf.write(html_func_start.format(num, line.name, flen))
+			num += 1
+	hf.write(html_func_end)
+	return num
+
+def addCallgraphs(sv, hf, data):
+	hf.write('<section id="callgraphs" class="callgraph">\n')
+	# write out the ftrace data converted to html
+	num = 0
+	for p in data.phases:
+		if sv.cgphase and p != sv.cgphase:
+			continue
+		list = data.dmesg[p]['list']
+		for devname in data.sortedDevices(p):
+			dev = list[devname]
+			color = 'white'
+			if 'color' in data.dmesg[p]:
+				color = data.dmesg[p]['color']
+			if 'color' in dev:
+				color = dev['color']
+			name = devname
+			if(devname in sv.devprops):
+				name = sv.devprops[devname].altName(devname)
+			if sv.suspendmode in suspendmodename:
+				name += ' '+p
+			if('ftrace' in dev):
+				cg = dev['ftrace']
+				num = callgraphHTML(sv, hf, num, cg,
+					name, color, dev['id'])
+			if('ftraces' in dev):
+				for cg in dev['ftraces']:
+					num = callgraphHTML(sv, hf, num, cg,
+						name+' &rarr; '+cg.name, color, dev['id'])
+
+	hf.write('\n\n    </section>\n')
+
 # Function: createHTMLSummarySimple
 # Description:
 #	 Create summary html file for a series of tests
 # Arguments:
 #	 testruns: array of Data objects from parseTraceLog
-def createHTMLSummarySimple(testruns, htmlfile):
-	# print out the basic summary of all the tests
-	hf = open(htmlfile, 'w')
-
+def createHTMLSummarySimple(testruns, htmlfile, folder):
 	# write the html header first (html head, css code, up to body start)
 	html = '<!DOCTYPE html>\n<html>\n<head>\n\
 	<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n\
-	<title>AnalyzeSuspend Summary</title>\n\
+	<title>SleepGraph Summary</title>\n\
 	<style type=\'text/css\'>\n\
-		body {overflow-y: scroll;}\n\
-		.stamp {width: 100%;text-align:center;background-color:#495E09;line-height:30px;color:white;font: 25px Arial;}\n\
+		.stamp {width: 100%;text-align:center;background:#888;line-height:30px;color:white;font: 25px Arial;}\n\
 		table {width:100%;border-collapse: collapse;}\n\
-		.summary {font: 22px Arial;border:1px solid;}\n\
-		th {border: 1px solid black;background-color:#A7C942;color:white;}\n\
-		td {text-align: center;}\n\
-		tr.alt td {background-color:#EAF2D3;}\n\
-		tr.avg td {background-color:#BDE34C;}\n\
-		a:link {color: #90B521;}\n\
-		a:visited {color: #495E09;}\n\
-		a:hover {color: #B1DF28;}\n\
-		a:active {color: #FFFFFF;}\n\
+		.summary {border:1px solid;}\n\
+		th {border: 1px solid black;background:#222;color:white;}\n\
+		td {font: 16px "Times New Roman";text-align: center;}\n\
+		tr.alt td {background:#ddd;}\n\
+		tr.avg td {background:#aaa;}\n\
 	</style>\n</head>\n<body>\n'
 
 	# group test header
-	count = len(testruns)
-	headline_stamp = '<div class="stamp">{0} {1} {2} {3} ({4} tests)</div>\n'
-	html += headline_stamp.format(sysvals.stamp['host'],
-		sysvals.stamp['kernel'], sysvals.stamp['mode'],
-		sysvals.stamp['time'], count)
-
-	# check to see if all the tests have the same value
-	stampcolumns = False
-	for data in testruns:
-		if diffStamp(sysvals.stamp, data.stamp):
-			stampcolumns = True
-			break
-
+	html += '<div class="stamp">%s (%d tests)</div>\n' % (folder, len(testruns))
 	th = '\t<th>{0}</th>\n'
 	td = '\t<td>{0}</td>\n'
-	tdlink = '\t<td><a href="{0}">Click Here</a></td>\n'
+	tdlink = '\t<td><a href="{0}">html</a></td>\n'
 
 	# table header
-	html += '<table class="summary">\n<tr>\n'
-	html += th.format("Test #")
-	if stampcolumns:
-		html += th.format("Hostname")
-		html += th.format("Kernel Version")
-		html += th.format("Suspend Mode")
-	html += th.format("Test Time")
-	html += th.format("Suspend Time")
-	html += th.format("Resume Time")
-	html += th.format("Detail")
-	html += '</tr>\n'
+	html += '<table class="summary">\n<tr>\n' + th.format('#') +\
+		th.format('Mode') + th.format('Host') + th.format('Kernel') +\
+		th.format('Test Time') + th.format('Suspend') + th.format('Resume') +\
+		th.format('Detail') + '</tr>\n'
 
 	# test data, 1 row per test
-	sTimeAvg = 0.0
-	rTimeAvg = 0.0
-	num = 1
-	for data in testruns:
-		# data.end is the end of post_resume
-		resumeEnd = data.dmesg['resume_complete']['end']
+	avg = '<tr class="avg"><td></td><td></td><td></td><td></td>'+\
+		'<td>Average of {0} {1} tests</td><td>{2}</td><td>{3}</td><td></td></tr>\n'
+	sTimeAvg = rTimeAvg = 0.0
+	mode = ''
+	num = 0
+	for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'])):
+		if mode != data['mode']:
+			# test average line
+			if(num > 0):
+				sTimeAvg /= (num - 1)
+				rTimeAvg /= (num - 1)
+				html += avg.format('%d' % (num - 1), mode,
+					'%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg)
+			sTimeAvg = rTimeAvg = 0.0
+			mode = data['mode']
+			num = 1
+		# alternate row color
 		if num % 2 == 1:
 			html += '<tr class="alt">\n'
 		else:
 			html += '<tr>\n'
-
-		# test num
-		html += td.format("test %d" % num)
+		html += td.format("%d" % num)
 		num += 1
-		if stampcolumns:
-			# host name
+		# basic info
+		for item in ['mode', 'host', 'kernel', 'time']:
 			val = "unknown"
-			if('host' in data.stamp):
-				val = data.stamp['host']
+			if(item in data):
+				val = data[item]
 			html += td.format(val)
-			# host kernel
-			val = "unknown"
-			if('kernel' in data.stamp):
-				val = data.stamp['kernel']
-			html += td.format(val)
-			# suspend mode
-			val = "unknown"
-			if('mode' in data.stamp):
-				val = data.stamp['mode']
-			html += td.format(val)
-		# test time
-		val = "unknown"
-		if('time' in data.stamp):
-			val = data.stamp['time']
-		html += td.format(val)
 		# suspend time
-		sTime = (data.tSuspended - data.start)*1000
+		sTime = float(data['suspend'])
 		sTimeAvg += sTime
-		html += td.format("%3.3f ms" % sTime)
+		html += td.format('%.3f ms' % sTime)
 		# resume time
-		rTime = (resumeEnd - data.tResumed)*1000
+		rTime = float(data['resume'])
 		rTimeAvg += rTime
-		html += td.format("%3.3f ms" % rTime)
+		html += td.format('%.3f ms' % rTime)
 		# link to the output html
-		html += tdlink.format(data.outfile)
-
-		html += '</tr>\n'
-
-	# last line: test average
-	if(count > 0):
-		sTimeAvg /= count
-		rTimeAvg /= count
-	html += '<tr class="avg">\n'
-	html += td.format('Average') 	# name
-	if stampcolumns:
-		html += td.format('')			# host
-		html += td.format('')			# kernel
-		html += td.format('')			# mode
-	html += td.format('')			# time
-	html += td.format("%3.3f ms" % sTimeAvg)	# suspend time
-	html += td.format("%3.3f ms" % rTimeAvg)	# resume time
-	html += td.format('')			# output link
-	html += '</tr>\n'
+		html += tdlink.format(data['url']) + '</tr>\n'
+	# last test average line
+	if(num > 0):
+		sTimeAvg /= (num - 1)
+		rTimeAvg /= (num - 1)
+		html += avg.format('%d' % (num - 1), mode,
+			'%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg)
 
 	# flush the data to file
-	hf.write(html+'</table>\n')
-	hf.write('</body>\n</html>\n')
+	hf = open(htmlfile, 'w')
+	hf.write(html+'</table>\n</body>\n</html>\n')
 	hf.close()
 
-def htmlTitle():
-	modename = {
-		'freeze': 'Freeze (S0)',
-		'standby': 'Standby (S1)',
-		'mem': 'Suspend (S3)',
-		'disk': 'Hibernate (S4)'
-	}
-	kernel = sysvals.stamp['kernel']
-	host = sysvals.hostname[0].upper()+sysvals.hostname[1:]
-	mode = sysvals.suspendmode
-	if sysvals.suspendmode in modename:
-		mode = modename[sysvals.suspendmode]
-	return host+' '+mode+' '+kernel
-
 def ordinal(value):
 	suffix = 'th'
 	if value < 10 or value > 19:
@@ -3272,24 +3305,11 @@ def createHTML(testruns):
 			kerror = True
 		data.normalizeTime(testruns[-1].tSuspended)
 
-	x2changes = ['', 'absolute']
-	if len(testruns) > 1:
-		x2changes = ['1', 'relative']
 	# html function templates
-	headline_version = '<div class="version"><a href="https://01.org/suspendresume">AnalyzeSuspend v%s</a></div>' % sysvals.version
-	headline_stamp = '<div class="stamp">{0} {1} {2} {3}</div>\n'
-	html_devlist1 = '<button id="devlist1" class="devlist" style="float:left;">Device Detail%s</button>' % x2changes[0]
-	html_zoombox = '<center><button id="zoomin">ZOOM IN +</button><button id="zoomout">ZOOM OUT -</button><button id="zoomdef">ZOOM 1:1</button></center>\n'
-	html_devlist2 = '<button id="devlist2" class="devlist" style="float:right;">Device Detail2</button>\n'
-	html_timeline = '<div id="dmesgzoombox" class="zoombox">\n<div id="{0}" class="timeline" style="height:{1}px">\n'
-	html_tblock = '<div id="block{0}" class="tblock" style="left:{1}%;width:{2}%;"><div class="tback" style="height:{3}px"></div>\n'
-	html_device = '<div id="{0}" title="{1}" class="thread{7}" style="left:{2}%;top:{3}px;height:{4}px;width:{5}%;{8}">{6}</div>\n'
 	html_error = '<div id="{1}" title="kernel error/warning" class="err" style="right:{0}%">ERROR&rarr;</div>\n'
 	html_traceevent = '<div title="{0}" class="traceevent{6}" style="left:{1}%;top:{2}px;height:{3}px;width:{4}%;line-height:{3}px;{7}">{5}</div>\n'
 	html_cpuexec = '<div class="jiffie" style="left:{0}%;top:{1}px;height:{2}px;width:{3}%;background:{4};"></div>\n'
-	html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}px;height:{3}px;background-color:{4}">{5}</div>\n'
-	html_phaselet = '<div id="{0}" class="phaselet" style="left:{1}%;width:{2}%;background:{3}"></div>\n'
-	html_legend = '<div id="p{3}" class="square" style="left:{0}%;background-color:{1}">&nbsp;{2}</div>\n'
+	html_legend = '<div id="p{3}" class="square" style="left:{0}%;background:{1}">&nbsp;{2}</div>\n'
 	html_timetotal = '<table class="time1">\n<tr>'\
 		'<td class="green" title="{3}">{2} Suspend Time: <b>{0} ms</b></td>'\
 		'<td class="yellow" title="{4}">{2} Resume Time: <b>{1} ms</b></td>'\
@@ -3311,20 +3331,18 @@ def createHTML(testruns):
 		'</tr>\n</table>\n'
 
 	# html format variables
-	hoverZ = 'z-index:8;'
-	if sysvals.usedevsrc:
-		hoverZ = ''
 	scaleH = 20
-	scaleTH = 20
 	if kerror:
 		scaleH = 40
-		scaleTH = 60
 
 	# device timeline
 	vprint('Creating Device Timeline...')
 
 	devtl = Timeline(30, scaleH)
 
+	# write the test title and general info header
+	devtl.createHeader(sysvals)
+
 	# Generate the header for this timeline
 	for data in testruns:
 		tTotal = data.end - data.start
@@ -3346,7 +3364,7 @@ def createHTML(testruns):
 			if(len(testruns) > 1):
 				testdesc = ordinal(data.testnumber+1)+' '+testdesc
 			thtml = html_timetotal3.format(run_time, testdesc)
-			devtl.html['header'] += thtml
+			devtl.html += thtml
 		elif data.fwValid:
 			suspend_time = '%.0f'%(sktime + (data.fwSuspend/1000000.0))
 			resume_time = '%.0f'%(rktime + (data.fwResume/1000000.0))
@@ -3363,10 +3381,10 @@ def createHTML(testruns):
 			else:
 				thtml = html_timetotal2.format(suspend_time, low_time, \
 					resume_time, testdesc1, stitle, rtitle)
-			devtl.html['header'] += thtml
+			devtl.html += thtml
 			sftime = '%.3f'%(data.fwSuspend / 1000000.0)
 			rftime = '%.3f'%(data.fwResume / 1000000.0)
-			devtl.html['header'] += html_timegroups.format('%.3f'%sktime, \
+			devtl.html += html_timegroups.format('%.3f'%sktime, \
 				sftime, rftime, '%.3f'%rktime, testdesc2, sysvals.suspendmode)
 		else:
 			suspend_time = '%.3f' % sktime
@@ -3382,7 +3400,7 @@ def createHTML(testruns):
 			else:
 				thtml = html_timetotal2.format(suspend_time, low_time, \
 					resume_time, testdesc, stitle, rtitle)
-			devtl.html['header'] += thtml
+			devtl.html += thtml
 
 	# time scale for potentially multiple datasets
 	t0 = testruns[0].start
@@ -3429,15 +3447,8 @@ def createHTML(testruns):
 			devtl.getPhaseRows(threadlist, devtl.rows)
 	devtl.calcTotalRows()
 
-	# create bounding box, add buttons
-	if sysvals.suspendmode != 'command':
-		devtl.html['timeline'] += html_devlist1
-		if len(testruns) > 1:
-			devtl.html['timeline'] += html_devlist2
-	devtl.html['timeline'] += html_zoombox
-	devtl.html['timeline'] += html_timeline.format('dmesg', devtl.height)
-
 	# draw the full timeline
+	devtl.createZoomBox(sysvals.suspendmode, len(testruns))
 	phases = {'suspend':[],'resume':[]}
 	for phase in data.dmesg:
 		if 'resume' in phase:
@@ -3452,37 +3463,36 @@ def createHTML(testruns):
 			# draw suspend and resume blocks separately
 			bname = '%s%d' % (dir[0], data.testnumber)
 			if dir == 'suspend':
-				m0 = testruns[data.testnumber].start
-				mMax = testruns[data.testnumber].tSuspended
-				mTotal = mMax - m0
+				m0 = data.start
+				mMax = data.tSuspended
 				left = '%f' % (((m0-t0)*100.0)/tTotal)
 			else:
-				m0 = testruns[data.testnumber].tSuspended
-				mMax = testruns[data.testnumber].end
+				m0 = data.tSuspended
+				mMax = data.end
 				# in an x2 run, remove any gap between blocks
 				if len(testruns) > 1 and data.testnumber == 0:
 					mMax = testruns[1].start
-				mTotal = mMax - m0
 				left = '%f' % ((((m0-t0)*100.0)+sysvals.srgap/2)/tTotal)
+			mTotal = mMax - m0
 			# if a timeline block is 0 length, skip altogether
 			if mTotal == 0:
 				continue
 			width = '%f' % (((mTotal*100.0)-sysvals.srgap/2)/tTotal)
-			devtl.html['timeline'] += html_tblock.format(bname, left, width, devtl.scaleH)
+			devtl.html += devtl.html_tblock.format(bname, left, width, devtl.scaleH)
 			for b in sorted(phases[dir]):
 				# draw the phase color background
 				phase = data.dmesg[b]
 				length = phase['end']-phase['start']
 				left = '%f' % (((phase['start']-m0)*100.0)/mTotal)
 				width = '%f' % ((length*100.0)/mTotal)
-				devtl.html['timeline'] += html_phase.format(left, width, \
+				devtl.html += devtl.html_phase.format(left, width, \
 					'%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \
 					data.dmesg[b]['color'], '')
 			for e in data.errorinfo[dir]:
 				# draw red lines for any kernel errors found
 				t, err = e
 				right = '%f' % (((mMax-t)*100.0)/mTotal)
-				devtl.html['timeline'] += html_error.format(right, err)
+				devtl.html += html_error.format(right, err)
 			for b in sorted(phases[dir]):
 				# draw the devices for this phase
 				phaselist = data.dmesg[b]['list']
@@ -3496,7 +3506,7 @@ def createHTML(testruns):
 					if 'htmlclass' in dev:
 						xtraclass = dev['htmlclass']
 					if 'color' in dev:
-						xtrastyle = 'background-color:%s;' % dev['color']
+						xtrastyle = 'background:%s;' % dev['color']
 					if(d in sysvals.devprops):
 						name = sysvals.devprops[d].altName(d)
 						xtraclass = sysvals.devprops[d].xtraClass()
@@ -3521,7 +3531,7 @@ def createHTML(testruns):
 							title += 'post_resume_process'
 					else:
 						title += b
-					devtl.html['timeline'] += html_device.format(dev['id'], \
+					devtl.html += devtl.html_device.format(dev['id'], \
 						title, left, top, '%.3f'%rowheight, width, \
 						d+drv, xtraclass, xtrastyle)
 					if('cpuexec' in dev):
@@ -3535,7 +3545,7 @@ def createHTML(testruns):
 							left = '%f' % (((start-m0)*100)/mTotal)
 							width = '%f' % ((end-start)*100/mTotal)
 							color = 'rgba(255, 0, 0, %f)' % j
-							devtl.html['timeline'] += \
+							devtl.html += \
 								html_cpuexec.format(left, top, height, width, color)
 					if('src' not in dev):
 						continue
@@ -3548,20 +3558,20 @@ def createHTML(testruns):
 						xtrastyle = ''
 						if e.color:
 							xtrastyle = 'background:%s;' % e.color
-						devtl.html['timeline'] += \
+						devtl.html += \
 							html_traceevent.format(e.title(), \
 								left, top, height, width, e.text(), '', xtrastyle)
 			# draw the time scale, try to make the number of labels readable
-			devtl.html['timeline'] += devtl.createTimeScale(m0, mMax, tTotal, dir)
-			devtl.html['timeline'] += '</div>\n'
+			devtl.createTimeScale(m0, mMax, tTotal, dir)
+			devtl.html += '</div>\n'
 
 	# timeline is finished
-	devtl.html['timeline'] += '</div>\n</div>\n'
+	devtl.html += '</div>\n</div>\n'
 
 	# draw a legend which describes the phases by color
 	if sysvals.suspendmode != 'command':
 		data = testruns[-1]
-		devtl.html['legend'] = '<div class="legend">\n'
+		devtl.html += '<div class="legend">\n'
 		pdelta = 100.0/len(data.phases)
 		pmargin = pdelta / 4.0
 		for phase in data.phases:
@@ -3571,127 +3581,41 @@ def createHTML(testruns):
 				id += tmp[1][0]
 			order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin)
 			name = string.replace(phase, '_', ' &nbsp;')
-			devtl.html['legend'] += html_legend.format(order, \
+			devtl.html += html_legend.format(order, \
 				data.dmesg[phase]['color'], name, id)
-		devtl.html['legend'] += '</div>\n'
+		devtl.html += '</div>\n'
 
 	hf = open(sysvals.htmlfile, 'w')
 
-	if not sysvals.cgexp:
-		cgchk = 'checked'
-		cgnchk = 'not(:checked)'
-	else:
-		cgchk = 'not(:checked)'
-		cgnchk = 'checked'
-
-	# write the html header first (html head, css code, up to body start)
-	html_header = '<!DOCTYPE html>\n<html>\n<head>\n\
-	<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n\
-	<title>'+htmlTitle()+'</title>\n\
-	<style type=\'text/css\'>\n\
-		body {overflow-y:scroll;}\n\
-		.stamp {width:100%;text-align:center;background-color:gray;line-height:30px;color:white;font:25px Arial;}\n\
-		.callgraph {margin-top:30px;box-shadow:5px 5px 20px black;}\n\
-		.callgraph article * {padding-left:28px;}\n\
-		h1 {color:black;font:bold 30px Times;}\n\
-		t0 {color:black;font:bold 30px Times;}\n\
-		t1 {color:black;font:30px Times;}\n\
-		t2 {color:black;font:25px Times;}\n\
-		t3 {color:black;font:20px Times;white-space:nowrap;}\n\
-		t4 {color:black;font:bold 30px Times;line-height:60px;white-space:nowrap;}\n\
-		cS {font:bold 13px Times;}\n\
-		table {width:100%;}\n\
-		.gray {background-color:rgba(80,80,80,0.1);}\n\
-		.green {background-color:rgba(204,255,204,0.4);}\n\
-		.purple {background-color:rgba(128,0,128,0.2);}\n\
-		.yellow {background-color:rgba(255,255,204,0.4);}\n\
-		.time1 {font:22px Arial;border:1px solid;}\n\
-		.time2 {font:15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\
-		td {text-align:center;}\n\
-		r {color:#500000;font:15px Tahoma;}\n\
-		n {color:#505050;font:15px Tahoma;}\n\
-		.tdhl {color:red;}\n\
-		.hide {display:none;}\n\
-		.pf {display:none;}\n\
-		.pf:'+cgchk+' + label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/><rect x="8" y="4" width="2" height="10" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
-		.pf:'+cgnchk+' ~ label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
-		.pf:'+cgchk+' ~ *:not(:nth-child(2)) {display:none;}\n\
-		.zoombox {position:relative;width:100%;overflow-x:scroll;-webkit-user-select:none;-moz-user-select:none;user-select:none;}\n\
-		.timeline {position:relative;font-size:14px;cursor:pointer;width:100%; overflow:hidden;background:linear-gradient(#cccccc, white);}\n\
-		.thread {position:absolute;height:0%;overflow:hidden;z-index:7;line-height:30px;font-size:14px;border:1px solid;text-align:center;white-space:nowrap;}\n\
-		.thread.ps {border-radius:3px;background:linear-gradient(to top, #ccc, #eee);}\n\
-		.thread:hover {background-color:white;border:1px solid red;'+hoverZ+'}\n\
-		.thread.sec,.thread.sec:hover {background-color:black;border:0;color:white;line-height:15px;font-size:10px;}\n\
-		.hover {background-color:white;border:1px solid red;'+hoverZ+'}\n\
-		.hover.sync {background-color:white;}\n\
-		.hover.bg,.hover.kth,.hover.sync,.hover.ps {background-color:white;}\n\
-		.jiffie {position:absolute;pointer-events: none;z-index:8;}\n\
-		.traceevent {position:absolute;font-size:10px;z-index:7;overflow:hidden;color:black;text-align:center;white-space:nowrap;border-radius:5px;border:1px solid black;background:linear-gradient(to bottom right,#CCC,#969696);}\n\
-		.traceevent:hover {color:white;font-weight:bold;border:1px solid white;}\n\
-		.phase {position:absolute;overflow:hidden;border:0px;text-align:center;}\n\
-		.phaselet {position:absolute;overflow:hidden;border:0px;text-align:center;height:100px;font-size:24px;}\n\
-		.t {position:absolute;line-height:'+('%d'%scaleTH)+'px;pointer-events:none;top:0;height:100%;border-right:1px solid black;z-index:6;}\n\
-		.err {position:absolute;top:0%;height:100%;border-right:3px solid red;color:red;font:bold 14px Times;line-height:18px;}\n\
-		.legend {position:relative; width:100%; height:40px; text-align:center;margin-bottom:20px}\n\
-		.legend .square {position:absolute;cursor:pointer;top:10px; width:0px;height:20px;border:1px solid;padding-left:20px;}\n\
-		button {height:40px;width:200px;margin-bottom:20px;margin-top:20px;font-size:24px;}\n\
-		.logbtn {position:relative;float:right;height:25px;width:50px;margin-top:3px;margin-bottom:0;font-size:10px;text-align:center;}\n\
-		.devlist {position:'+x2changes[1]+';width:190px;}\n\
-		a:link {color:white;text-decoration:none;}\n\
-		a:visited {color:white;}\n\
-		a:hover {color:white;}\n\
-		a:active {color:white;}\n\
-		.version {position:relative;float:left;color:white;font-size:10px;line-height:30px;margin-left:10px;}\n\
-		#devicedetail {height:100px;box-shadow:5px 5px 20px black;}\n\
-		.tblock {position:absolute;height:100%;background-color:#ddd;}\n\
-		.tback {position:absolute;width:100%;background:linear-gradient(#ccc, #ddd);}\n\
-		.bg {z-index:1;}\n\
-	</style>\n</head>\n<body>\n'
-
 	# no header or css if its embedded
 	if(sysvals.embedded):
 		hf.write('pass True tSus %.3f tRes %.3f tLow %.3f fwvalid %s tSus %.3f tRes %.3f\n' %
 			(data.tSuspended-data.start, data.end-data.tSuspended, data.tLow, data.fwValid, \
 				data.fwSuspend/1000000, data.fwResume/1000000))
 	else:
-		hf.write(html_header)
-
-	# write the test title and general info header
-	if(sysvals.stamp['time'] != ""):
-		hf.write(headline_version)
-		if sysvals.logmsg:
-			hf.write('<button id="showtest" class="logbtn">log</button>')
-		if sysvals.addlogs and sysvals.dmesgfile:
-			hf.write('<button id="showdmesg" class="logbtn">dmesg</button>')
-		if sysvals.addlogs and sysvals.ftracefile:
-			hf.write('<button id="showftrace" class="logbtn">ftrace</button>')
-		hf.write(headline_stamp.format(sysvals.stamp['host'],
-			sysvals.stamp['kernel'], sysvals.stamp['mode'], \
-				sysvals.stamp['time']))
+		addCSS(hf, sysvals, len(testruns), kerror)
 
 	# write the device timeline
-	hf.write(devtl.html['header'])
-	hf.write(devtl.html['timeline'])
-	hf.write(devtl.html['legend'])
+	hf.write(devtl.html)
 	hf.write('<div id="devicedetailtitle"></div>\n')
 	hf.write('<div id="devicedetail" style="display:none;">\n')
 	# draw the colored boxes for the device detail section
 	for data in testruns:
 		hf.write('<div id="devicedetail%d">\n' % data.testnumber)
 		pscolor = 'linear-gradient(to top left, #ccc, #eee)'
-		hf.write(html_phaselet.format('pre_suspend_process', \
+		hf.write(devtl.html_phaselet.format('pre_suspend_process', \
 			'0', '0', pscolor))
 		for b in data.phases:
 			phase = data.dmesg[b]
 			length = phase['end']-phase['start']
 			left = '%.3f' % (((phase['start']-t0)*100.0)/tTotal)
 			width = '%.3f' % ((length*100.0)/tTotal)
-			hf.write(html_phaselet.format(b, left, width, \
+			hf.write(devtl.html_phaselet.format(b, left, width, \
 				data.dmesg[b]['color']))
-		hf.write(html_phaselet.format('post_resume_process', \
+		hf.write(devtl.html_phaselet.format('post_resume_process', \
 			'0', '0', pscolor))
 		if sysvals.suspendmode == 'command':
-			hf.write(html_phaselet.format('cmdexec', '0', '0', pscolor))
+			hf.write(devtl.html_phaselet.format('cmdexec', '0', '0', pscolor))
 		hf.write('</div>\n')
 	hf.write('</div>\n')
 
@@ -3701,52 +3625,7 @@ def createHTML(testruns):
 	else:
 		data = testruns[-1]
 	if(sysvals.usecallgraph and not sysvals.embedded):
-		hf.write('<section id="callgraphs" class="callgraph">\n')
-		# write out the ftrace data converted to html
-		html_func_top = '<article id="{0}" class="atop" style="background-color:{1}">\n<input type="checkbox" class="pf" id="f{2}" checked/><label for="f{2}">{3} {4}</label>\n'
-		html_func_start = '<article>\n<input type="checkbox" class="pf" id="f{0}" checked/><label for="f{0}">{1} {2}</label>\n'
-		html_func_end = '</article>\n'
-		html_func_leaf = '<article>{0} {1}</article>\n'
-		num = 0
-		for p in data.phases:
-			if sysvals.cgphase and p != sysvals.cgphase:
-				continue
-			list = data.dmesg[p]['list']
-			for devname in data.sortedDevices(p):
-				if('ftrace' not in list[devname]):
-					continue
-				devid = list[devname]['id']
-				cg = list[devname]['ftrace']
-				clen = (cg.end - cg.start) * 1000
-				if clen < sysvals.mincglen:
-					continue
-				fmt = '<r>(%.3f ms @ '+sysvals.timeformat+' to '+sysvals.timeformat+')</r>'
-				flen = fmt % (clen, cg.start, cg.end)
-				name = devname
-				if(devname in sysvals.devprops):
-					name = sysvals.devprops[devname].altName(devname)
-				if sysvals.suspendmode == 'command':
-					ftitle = name
-				else:
-					ftitle = name+' '+p
-				hf.write(html_func_top.format(devid, data.dmesg[p]['color'], \
-					num, ftitle, flen))
-				num += 1
-				for line in cg.list:
-					if(line.length < 0.000000001):
-						flen = ''
-					else:
-						fmt = '<n>(%.3f ms @ '+sysvals.timeformat+')</n>'
-						flen = fmt % (line.length*1000, line.time)
-					if(line.freturn and line.fcall):
-						hf.write(html_func_leaf.format(line.name, flen))
-					elif(line.freturn):
-						hf.write(html_func_end)
-					else:
-						hf.write(html_func_start.format(num, line.name, flen))
-						num += 1
-				hf.write(html_func_end)
-		hf.write('\n\n    </section>\n')
+		addCallgraphs(sysvals, hf, data)
 
 	# add the test log as a hidden div
 	if sysvals.logmsg:
@@ -3788,6 +3667,100 @@ def createHTML(testruns):
 	hf.close()
 	return True
 
+def addCSS(hf, sv, testcount=1, kerror=False, extra=''):
+	kernel = sv.stamp['kernel']
+	host = sv.hostname[0].upper()+sv.hostname[1:]
+	mode = sv.suspendmode
+	if sv.suspendmode in suspendmodename:
+		mode = suspendmodename[sv.suspendmode]
+	title = host+' '+mode+' '+kernel
+
+	# various format changes by flags
+	cgchk = 'checked'
+	cgnchk = 'not(:checked)'
+	if sv.cgexp:
+		cgchk = 'not(:checked)'
+		cgnchk = 'checked'
+
+	hoverZ = 'z-index:8;'
+	if sv.usedevsrc:
+		hoverZ = ''
+
+	devlistpos = 'absolute'
+	if testcount > 1:
+		devlistpos = 'relative'
+
+	scaleTH = 20
+	if kerror:
+		scaleTH = 60
+
+	# write the html header first (html head, css code, up to body start)
+	html_header = '<!DOCTYPE html>\n<html>\n<head>\n\
+	<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n\
+	<title>'+title+'</title>\n\
+	<style type=\'text/css\'>\n\
+		body {overflow-y:scroll;}\n\
+		.stamp {width:100%;text-align:center;background:gray;line-height:30px;color:white;font:25px Arial;}\n\
+		.callgraph {margin-top:30px;box-shadow:5px 5px 20px black;}\n\
+		.callgraph article * {padding-left:28px;}\n\
+		h1 {color:black;font:bold 30px Times;}\n\
+		t0 {color:black;font:bold 30px Times;}\n\
+		t1 {color:black;font:30px Times;}\n\
+		t2 {color:black;font:25px Times;}\n\
+		t3 {color:black;font:20px Times;white-space:nowrap;}\n\
+		t4 {color:black;font:bold 30px Times;line-height:60px;white-space:nowrap;}\n\
+		cS {font:bold 13px Times;}\n\
+		table {width:100%;}\n\
+		.gray {background:rgba(80,80,80,0.1);}\n\
+		.green {background:rgba(204,255,204,0.4);}\n\
+		.purple {background:rgba(128,0,128,0.2);}\n\
+		.yellow {background:rgba(255,255,204,0.4);}\n\
+		.blue {background:rgba(169,208,245,0.4);}\n\
+		.time1 {font:22px Arial;border:1px solid;}\n\
+		.time2 {font:15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\
+		td {text-align:center;}\n\
+		r {color:#500000;font:15px Tahoma;}\n\
+		n {color:#505050;font:15px Tahoma;}\n\
+		.tdhl {color:red;}\n\
+		.hide {display:none;}\n\
+		.pf {display:none;}\n\
+		.pf:'+cgchk+' + label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/><rect x="8" y="4" width="2" height="10" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
+		.pf:'+cgnchk+' ~ label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
+		.pf:'+cgchk+' ~ *:not(:nth-child(2)) {display:none;}\n\
+		.zoombox {position:relative;width:100%;overflow-x:scroll;-webkit-user-select:none;-moz-user-select:none;user-select:none;}\n\
+		.timeline {position:relative;font-size:14px;cursor:pointer;width:100%; overflow:hidden;background:linear-gradient(#cccccc, white);}\n\
+		.thread {position:absolute;height:0%;overflow:hidden;z-index:7;line-height:30px;font-size:14px;border:1px solid;text-align:center;white-space:nowrap;}\n\
+		.thread.ps {border-radius:3px;background:linear-gradient(to top, #ccc, #eee);}\n\
+		.thread:hover {background:white;border:1px solid red;'+hoverZ+'}\n\
+		.thread.sec,.thread.sec:hover {background:black;border:0;color:white;line-height:15px;font-size:10px;}\n\
+		.hover {background:white;border:1px solid red;'+hoverZ+'}\n\
+		.hover.sync {background:white;}\n\
+		.hover.bg,.hover.kth,.hover.sync,.hover.ps {background:white;}\n\
+		.jiffie {position:absolute;pointer-events: none;z-index:8;}\n\
+		.traceevent {position:absolute;font-size:10px;z-index:7;overflow:hidden;color:black;text-align:center;white-space:nowrap;border-radius:5px;border:1px solid black;background:linear-gradient(to bottom right,#CCC,#969696);}\n\
+		.traceevent:hover {color:white;font-weight:bold;border:1px solid white;}\n\
+		.phase {position:absolute;overflow:hidden;border:0px;text-align:center;}\n\
+		.phaselet {float:left;overflow:hidden;border:0px;text-align:center;min-height:100px;font-size:24px;}\n\
+		.t {position:absolute;line-height:'+('%d'%scaleTH)+'px;pointer-events:none;top:0;height:100%;border-right:1px solid black;z-index:6;}\n\
+		.err {position:absolute;top:0%;height:100%;border-right:3px solid red;color:red;font:bold 14px Times;line-height:18px;}\n\
+		.legend {position:relative; width:100%; height:40px; text-align:center;margin-bottom:20px}\n\
+		.legend .square {position:absolute;cursor:pointer;top:10px; width:0px;height:20px;border:1px solid;padding-left:20px;}\n\
+		button {height:40px;width:200px;margin-bottom:20px;margin-top:20px;font-size:24px;}\n\
+		.logbtn {position:relative;float:right;height:25px;width:50px;margin-top:3px;margin-bottom:0;font-size:10px;text-align:center;}\n\
+		.devlist {position:'+devlistpos+';width:190px;}\n\
+		a:link {color:white;text-decoration:none;}\n\
+		a:visited {color:white;}\n\
+		a:hover {color:white;}\n\
+		a:active {color:white;}\n\
+		.version {position:relative;float:left;color:white;font-size:10px;line-height:30px;margin-left:10px;}\n\
+		#devicedetail {min-height:100px;box-shadow:5px 5px 20px black;}\n\
+		.tblock {position:absolute;height:100%;background:#ddd;}\n\
+		.tback {position:absolute;width:100%;background:linear-gradient(#ccc, #ddd);}\n\
+		.bg {z-index:1;}\n\
+'+extra+'\
+	</style>\n</head>\n<body>\n'
+	hf.write(html_header)
+
 # Function: addScriptCode
 # Description:
 #	 Adds the javascript code to the output html
@@ -3809,7 +3782,7 @@ def addScriptCode(hf, testruns):
 	'	var resolution = -1;\n'\
 	'	var dragval = [0, 0];\n'\
 	'	function redrawTimescale(t0, tMax, tS) {\n'\
-	'		var rline = \'<div class="t" style="left:0;border-left:1px solid black;border-right:0;"><cS>&larr;R</cS></div>\';\n'\
+	'		var rline = \'<div class="t" style="left:0;border-left:1px solid black;border-right:0;">\';\n'\
 	'		var tTotal = tMax - t0;\n'\
 	'		var list = document.getElementsByClassName("tblock");\n'\
 	'		for (var i = 0; i < list.length; i++) {\n'\
@@ -3824,19 +3797,23 @@ def addScriptCode(hf, testruns):
 	'			var pos = 0.0, val = 0.0;\n'\
 	'			for (var j = 0; j < divTotal; j++) {\n'\
 	'				var htmlline = "";\n'\
-	'				if(list[i].id[5] == "r") {\n'\
-	'					pos = 100 - (((j)*tS*100)/mTotal);\n'\
-	'					val = (j)*tS;\n'\
-	'					htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\
-	'					if(j == 0)\n'\
-	'						htmlline = rline;\n'\
-	'				} else {\n'\
+	'				var mode = list[i].id[5];\n'\
+	'				if(mode == "s") {\n'\
 	'					pos = 100 - (((j)*tS*100)/mTotal) - divEdge;\n'\
 	'					val = (j-divTotal+1)*tS;\n'\
 	'					if(j == divTotal - 1)\n'\
 	'						htmlline = \'<div class="t" style="right:\'+pos+\'%"><cS>S&rarr;</cS></div>\';\n'\
 	'					else\n'\
 	'						htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\
+	'				} else {\n'\
+	'					pos = 100 - (((j)*tS*100)/mTotal);\n'\
+	'					val = (j)*tS;\n'\
+	'					htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\
+	'					if(j == 0)\n'\
+	'						if(mode == "r")\n'\
+	'							htmlline = rline+"<cS>&larr;R</cS></div>";\n'\
+	'						else\n'\
+	'							htmlline = rline+"<cS>0ms</div>";\n'\
 	'				}\n'\
 	'				html += htmlline;\n'\
 	'			}\n'\
@@ -4002,12 +3979,80 @@ def addScriptCode(hf, testruns):
 	'				}\n'\
 	'			}\n'\
 	'		}\n'\
+	'		if(typeof devstats !== \'undefined\')\n'\
+	'			callDetail(this.id, this.title);\n'\
 	'		var cglist = document.getElementById("callgraphs");\n'\
 	'		if(!cglist) return;\n'\
 	'		var cg = cglist.getElementsByClassName("atop");\n'\
 	'		if(cg.length < 10) return;\n'\
 	'		for (var i = 0; i < cg.length; i++) {\n'\
-	'			if(idlist.indexOf(cg[i].id) >= 0) {\n'\
+	'			cgid = cg[i].id.split("x")[0]\n'\
+	'			if(idlist.indexOf(cgid) >= 0) {\n'\
+	'				cg[i].style.display = "block";\n'\
+	'			} else {\n'\
+	'				cg[i].style.display = "none";\n'\
+	'			}\n'\
+	'		}\n'\
+	'	}\n'\
+	'	function callDetail(devid, devtitle) {\n'\
+	'		if(!(devid in devstats) || devstats[devid].length < 1)\n'\
+	'			return;\n'\
+	'		var list = devstats[devid];\n'\
+	'		var tmp = devtitle.split(" ");\n'\
+	'		var name = tmp[0], phase = tmp[tmp.length-1];\n'\
+	'		var dd = document.getElementById(phase);\n'\
+	'		var total = parseFloat(tmp[1].slice(1));\n'\
+	'		var mlist = [];\n'\
+	'		var maxlen = 0;\n'\
+	'		var info = []\n'\
+	'		for(var i in list) {\n'\
+	'			if(list[i][0] == "@") {\n'\
+	'				info = list[i].split("|");\n'\
+	'				continue;\n'\
+	'			}\n'\
+	'			var tmp = list[i].split("|");\n'\
+	'			var t = parseFloat(tmp[0]), f = tmp[1], c = parseInt(tmp[2]);\n'\
+	'			var p = (t*100.0/total).toFixed(2);\n'\
+	'			mlist[mlist.length] = [f, c, t.toFixed(2), p+"%"];\n'\
+	'			if(f.length > maxlen)\n'\
+	'				maxlen = f.length;\n'\
+	'		}\n'\
+	'		var pad = 5;\n'\
+	'		if(mlist.length == 0) pad = 30;\n'\
+	'		var html = \'<div style="padding-top:\'+pad+\'px"><t3> <b>\'+name+\':</b>\';\n'\
+	'		if(info.length > 2)\n'\
+	'			html += " start=<b>"+info[1]+"</b>, end=<b>"+info[2]+"</b>";\n'\
+	'		if(info.length > 3)\n'\
+	'			html += ", length<i>(w/o overhead)</i>=<b>"+info[3]+" ms</b>";\n'\
+	'		if(info.length > 4)\n'\
+	'			html += ", return=<b>"+info[4]+"</b>";\n'\
+	'		html += "</t3></div>";\n'\
+	'		if(mlist.length > 0) {\n'\
+	'			html += \'<table class=fstat style="padding-top:\'+(maxlen*5)+\'px;"><tr><th>Function</th>\';\n'\
+	'			for(var i in mlist)\n'\
+	'				html += "<td class=vt>"+mlist[i][0]+"</td>";\n'\
+	'			html += "</tr><tr><th>Calls</th>";\n'\
+	'			for(var i in mlist)\n'\
+	'				html += "<td>"+mlist[i][1]+"</td>";\n'\
+	'			html += "</tr><tr><th>Time(ms)</th>";\n'\
+	'			for(var i in mlist)\n'\
+	'				html += "<td>"+mlist[i][2]+"</td>";\n'\
+	'			html += "</tr><tr><th>Percent</th>";\n'\
+	'			for(var i in mlist)\n'\
+	'				html += "<td>"+mlist[i][3]+"</td>";\n'\
+	'			html += "</tr></table>";\n'\
+	'		}\n'\
+	'		dd.innerHTML = html;\n'\
+	'		var height = (maxlen*5)+100;\n'\
+	'		dd.style.height = height+"px";\n'\
+	'		document.getElementById("devicedetail").style.height = height+"px";\n'\
+	'	}\n'\
+	'	function callSelect() {\n'\
+	'		var cglist = document.getElementById("callgraphs");\n'\
+	'		if(!cglist) return;\n'\
+	'		var cg = cglist.getElementsByClassName("atop");\n'\
+	'		for (var i = 0; i < cg.length; i++) {\n'\
+	'			if(this.id == cg[i].id) {\n'\
 	'				cg[i].style.display = "block";\n'\
 	'			} else {\n'\
 	'				cg[i].style.display = "none";\n'\
@@ -4093,6 +4138,9 @@ def addScriptCode(hf, testruns):
 	'			dev[i].onmouseover = deviceHover;\n'\
 	'			dev[i].onmouseout = deviceUnhover;\n'\
 	'		}\n'\
+	'		var dev = dmesg.getElementsByClassName("srccall");\n'\
+	'		for (var i = 0; i < dev.length; i++)\n'\
+	'			dev[i].onclick = callSelect;\n'\
 	'		zoomTimeline();\n'\
 	'	});\n'\
 	'</script>\n'
@@ -4675,7 +4723,7 @@ def rootCheck(fatal):
 	if(os.access(sysvals.powerfile, os.W_OK)):
 		return True
 	if fatal:
-		doError('This command must be run as root')
+		doError('This command requires sysfs mount and root access')
 	return False
 
 # Function: getArgInt
@@ -4767,51 +4815,62 @@ def runTest(subdir, testpath=''):
 		cmd = 'chown -R {0}:{0} {1} > /dev/null 2>&1'
 		call(cmd.format(os.environ['SUDO_USER'], sysvals.testdir), shell=True)
 
+def find_in_html(html, strs, div=False):
+	for str in strs:
+		l = len(str)
+		i = html.find(str)
+		if i >= 0:
+			break
+	if i < 0:
+		return ''
+	if not div:
+		return re.search(r'[-+]?\d*\.\d+|\d+', html[i+l:i+l+50]).group()
+	n = html[i+l:].find('</div>')
+	if n < 0:
+		return ''
+	return html[i+l:i+l+n]
+
 # Function: runSummary
 # Description:
 #	 create a summary of tests in a sub-directory
-def runSummary(subdir, output):
-	# get a list of ftrace output files
-	files = []
+def runSummary(subdir, local=True):
+	inpath = os.path.abspath(subdir)
+	outpath = inpath
+	if local:
+		outpath = os.path.abspath('.')
+	print('Generating a summary of folder "%s"' % inpath)
+	testruns = []
 	for dirname, dirnames, filenames in os.walk(subdir):
 		for filename in filenames:
-			if(re.match('.*_ftrace.txt', filename)):
-				files.append("%s/%s" % (dirname, filename))
-
-	# process the files in order and get an array of data objects
-	testruns = []
-	for file in sorted(files):
-		if output:
-			print("Test found in %s" % os.path.dirname(file))
-		sysvals.ftracefile = file
-		sysvals.dmesgfile = file.replace('_ftrace.txt', '_dmesg.txt')
-		doesTraceLogHaveTraceEvents()
-		sysvals.usecallgraph = False
-		if not sysvals.usetraceeventsonly:
-			if(not os.path.exists(sysvals.dmesgfile)):
-				print("Skipping %s: not a valid test input" % file)
+			if(not re.match('.*.html', filename)):
 				continue
-			else:
-				if output:
-					f = os.path.basename(sysvals.ftracefile)
-					d = os.path.basename(sysvals.dmesgfile)
-					print("\tInput files: %s and %s" % (f, d))
-				testdata = loadKernelLog()
-				data = testdata[0]
-				parseKernelLog(data)
-				testdata = [data]
-				appendIncompleteTraceLog(testdata)
-		else:
-			if output:
-				print("\tInput file: %s" % os.path.basename(sysvals.ftracefile))
-			testdata = parseTraceLog()
-			data = testdata[0]
-		data.normalizeTime(data.tSuspended)
-		link = file.replace(subdir+'/', '').replace('_ftrace.txt', '.html')
-		data.outfile = link
-		testruns.append(data)
-
-	createHTMLSummarySimple(testruns, subdir+'/summary.html')
+			file = os.path.join(dirname, filename)
+			html = open(file, 'r').read(10000)
+			suspend = find_in_html(html,
+				['Kernel Suspend: ', 'Kernel Suspend Time: '])
+			resume = find_in_html(html,
+				['Kernel Resume: ', 'Kernel Resume Time: '])
+			line = find_in_html(html, ['<div class="stamp">'], True)
+			stmp = line.split()
+			if not suspend or not resume or len(stmp) < 4:
+				continue
+			data = {
+				'host': stmp[0],
+				'kernel': stmp[1],
+				'mode': stmp[2],
+				'time': string.join(stmp[3:], ' '),
+				'suspend': suspend,
+				'resume': resume,
+				'url': os.path.relpath(file, outpath),
+			}
+			if len(stmp) == 7:
+				data['kernel'] = 'unknown'
+				data['mode'] = stmp[1]
+				data['time'] = string.join(stmp[2:], ' ')
+			testruns.append(data)
+	outfile = os.path.join(outpath, 'summary.html')
+	print('Summary file: %s' % outfile)
+	createHTMLSummarySimple(testruns, outfile, inpath)
 
 # Function: checkArgBool
 # Description:
@@ -4869,9 +4928,14 @@ def configFromFile(file):
 				sysvals.predelay = getArgInt('-predelay', value, 0, 60000, False)
 			elif(opt.lower() == 'postdelay'):
 				sysvals.postdelay = getArgInt('-postdelay', value, 0, 60000, False)
+			elif(opt.lower() == 'maxdepth'):
+				sysvals.max_graph_depth = getArgInt('-maxdepth', value, 0, 1000, False)
 			elif(opt.lower() == 'rtcwake'):
-				sysvals.rtcwake = True
-				sysvals.rtcwaketime = getArgInt('-rtcwake', value, 0, 3600, False)
+				if value.lower() == 'off':
+					sysvals.rtcwake = False
+				else:
+					sysvals.rtcwake = True
+					sysvals.rtcwaketime = getArgInt('-rtcwake', value, 0, 3600, False)
 			elif(opt.lower() == 'timeprec'):
 				sysvals.setPrecision(getArgInt('-timeprec', value, 0, 6, False))
 			elif(opt.lower() == 'mindev'):
@@ -4969,8 +5033,8 @@ def printHelp():
 	modes = getModes()
 
 	print('')
-	print('AnalyzeSuspend v%s' % sysvals.version)
-	print('Usage: sudo analyze_suspend.py <options>')
+	print('%s v%s' % (sysvals.title, sysvals.version))
+	print('Usage: sudo sleepgraph <options> <commands>')
 	print('')
 	print('Description:')
 	print('  This tool is designed to assist kernel and OS developers in optimizing')
@@ -4981,22 +5045,22 @@ def printHelp():
 	print('  a detailed view of which devices/subsystems are taking the most')
 	print('  time in suspend/resume.')
 	print('')
+	print('  If no specific command is given, the default behavior is to initiate')
+	print('  a suspend/resume and capture the dmesg/ftrace output as an html timeline.')
+	print('')
 	print('  Generates output files in subdirectory: suspend-mmddyy-HHMMSS')
 	print('   HTML output:                    <hostname>_<mode>.html')
 	print('   raw dmesg output:               <hostname>_<mode>_dmesg.txt')
 	print('   raw ftrace output:              <hostname>_<mode>_ftrace.txt')
 	print('')
 	print('Options:')
-	print('  [general]')
 	print('   -h           Print this help text')
 	print('   -v           Print the current tool version')
 	print('   -config fn   Pull arguments and config options from file fn')
 	print('   -verbose     Print extra information during execution and analysis')
-	print('   -status      Test to see if the system is enabled to run this tool')
-	print('   -modes       List available suspend modes')
 	print('   -m mode      Mode to initiate for suspend %s (default: %s)') % (modes, sysvals.suspendmode)
 	print('   -o subdir    Override the output subdirectory')
-	print('   -rtcwake t   Use rtcwake to autoresume after <t> seconds (default: disabled)')
+	print('   -rtcwake t   Wakeup t seconds after suspend, set t to "off" to disable (default: 15)')
 	print('   -addlogs     Add the dmesg and ftrace logs to the html output')
 	print('   -srgap       Add a visible gap in the timeline between sus/res (default: disabled)')
 	print('  [advanced]')
@@ -5012,23 +5076,25 @@ def printHelp():
 	print('                be created in a new subdirectory with a summary page.')
 	print('  [debug]')
 	print('   -f           Use ftrace to create device callgraphs (default: disabled)')
+	print('   -maxdepth N  limit the callgraph data to N call levels (default: 0=all)')
 	print('   -expandcg    pre-expand the callgraph data in the html output (default: disabled)')
-	print('   -flist       Print the list of functions currently being captured in ftrace')
-	print('   -flistall    Print all functions capable of being captured in ftrace')
 	print('   -fadd file   Add functions to be graphed in the timeline from a list in a text file')
 	print('   -filter "d1,d2,..." Filter out all but this comma-delimited list of device names')
 	print('   -mincg  ms   Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
 	print('   -cgphase P   Only show callgraph data for phase P (e.g. suspend_late)')
 	print('   -cgtest N    Only show callgraph data for test N (e.g. 0 or 1 in an x2 run)')
 	print('   -timeprec N  Number of significant digits in timestamps (0:S, [3:ms], 6:us)')
-	print('  [utilities]')
+	print('  [commands]')
+	print('   -ftrace ftracefile  Create HTML output using ftrace input (used with -dmesg)')
+	print('   -dmesg dmesgfile    Create HTML output using dmesg (used with -ftrace)')
+	print('   -summary directory  Create a summary of all test in this dir')
+	print('   -modes       List available suspend modes')
+	print('   -status      Test to see if the system is enabled to run this tool')
 	print('   -fpdt        Print out the contents of the ACPI Firmware Performance Data Table')
 	print('   -usbtopo     Print out the current USB topology with power info')
 	print('   -usbauto     Enable autosuspend for all connected USB devices')
-	print('  [re-analyze data from previous runs]')
-	print('   -ftrace ftracefile  Create HTML output using ftrace input')
-	print('   -dmesg dmesgfile    Create HTML output using dmesg (not needed for kernel >= 3.15)')
-	print('   -summary directory  Create a summary of all test in this dir')
+	print('   -flist       Print the list of functions currently being captured in ftrace')
+	print('   -flistall    Print all functions capable of being captured in ftrace')
 	print('')
 	return True
 
@@ -5076,9 +5142,18 @@ if __name__ == '__main__':
 			sysvals.useprocmon = True
 		elif(arg == '-dev'):
 			sysvals.usedevsrc = True
+		elif(arg == '-maxdepth'):
+			sysvals.max_graph_depth = getArgInt('-maxdepth', args, 0, 1000)
 		elif(arg == '-rtcwake'):
-			sysvals.rtcwake = True
-			sysvals.rtcwaketime = getArgInt('-rtcwake', args, 0, 3600)
+			try:
+				val = args.next()
+			except:
+				doError('No rtcwake time supplied', True)
+			if val.lower() == 'off':
+				sysvals.rtcwake = False
+			else:
+				sysvals.rtcwake = True
+				sysvals.rtcwaketime = getArgInt('-rtcwake', val, 0, 3600, False)
 		elif(arg == '-timeprec'):
 			sysvals.setPrecision(getArgInt('-timeprec', args, 0, 6))
 		elif(arg == '-mindev'):
@@ -5201,7 +5276,6 @@ if __name__ == '__main__':
 		elif(cmd == 'usbauto'):
 			setUSBDevicesAuto()
 		elif(cmd == 'summary'):
-			print("Generating a summary of folder \"%s\"" % cmdarg)
 			runSummary(cmdarg, True)
 		sys.exit()
 

From c4980cee82efb4fef8afac3675cb25fba3baca34 Mon Sep 17 00:00:00 2001
From: Todd E Brandt <todd.e.brandt@linux.intel.com>
Date: Fri, 7 Apr 2017 11:05:36 -0700
Subject: [PATCH 53/56] tools: power: pm-graph: AnalyzeBoot v2.0

First release into the kernel tools source
- pulls in analyze_suspend.py as as library, same html formatting
- supplants scripts/bootgraph.pl, outputs HTML instead of SVG
- enables automatic reboot and collection for easy timeline capture
- enables ftrace callgraph collection from early boot

Signed-off-by: Todd Brandt <todd.e.brandt@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 tools/power/pm-graph/analyze_boot.py | 824 +++++++++++++++++++++++++++
 1 file changed, 824 insertions(+)
 create mode 100755 tools/power/pm-graph/analyze_boot.py

diff --git a/tools/power/pm-graph/analyze_boot.py b/tools/power/pm-graph/analyze_boot.py
new file mode 100755
index 000000000000..3e1dcbbf1adc
--- /dev/null
+++ b/tools/power/pm-graph/analyze_boot.py
@@ -0,0 +1,824 @@
+#!/usr/bin/python
+#
+# Tool for analyzing boot timing
+# Copyright (c) 2013, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# Authors:
+#	 Todd Brandt <todd.e.brandt@linux.intel.com>
+#
+# Description:
+#	 This tool is designed to assist kernel and OS developers in optimizing
+#	 their linux stack's boot time. It creates an html representation of
+#	 the kernel boot timeline up to the start of the init process.
+#
+
+# ----------------- LIBRARIES --------------------
+
+import sys
+import time
+import os
+import string
+import re
+import platform
+import shutil
+from datetime import datetime, timedelta
+from subprocess import call, Popen, PIPE
+import analyze_suspend as aslib
+
+# ----------------- CLASSES --------------------
+
+# Class: SystemValues
+# Description:
+#	 A global, single-instance container used to
+#	 store system values and test parameters
+class SystemValues(aslib.SystemValues):
+	title = 'BootGraph'
+	version = 2.0
+	hostname = 'localhost'
+	testtime = ''
+	kernel = ''
+	dmesgfile = ''
+	ftracefile = ''
+	htmlfile = 'bootgraph.html'
+	outfile = ''
+	phoronix = False
+	addlogs = False
+	useftrace = False
+	usedevsrc = True
+	suspendmode = 'boot'
+	max_graph_depth = 2
+	graph_filter = 'do_one_initcall'
+	reboot = False
+	manual = False
+	iscronjob = False
+	timeformat = '%.6f'
+	def __init__(self):
+		if('LOG_FILE' in os.environ and 'TEST_RESULTS_IDENTIFIER' in os.environ):
+			self.phoronix = True
+			self.addlogs = True
+			self.outfile = os.environ['LOG_FILE']
+			self.htmlfile = os.environ['LOG_FILE']
+		self.hostname = platform.node()
+		self.testtime = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
+		if os.path.exists('/proc/version'):
+			fp = open('/proc/version', 'r')
+			val = fp.read().strip()
+			fp.close()
+			self.kernel = self.kernelVersion(val)
+		else:
+			self.kernel = 'unknown'
+	def kernelVersion(self, msg):
+		return msg.split()[2]
+	def kernelParams(self):
+		cmdline = 'initcall_debug log_buf_len=32M'
+		if self.useftrace:
+			cmdline += ' trace_buf_size=128M trace_clock=global '\
+			'trace_options=nooverwrite,funcgraph-abstime,funcgraph-cpu,'\
+			'funcgraph-duration,funcgraph-proc,funcgraph-tail,'\
+			'nofuncgraph-overhead,context-info,graph-time '\
+			'ftrace=function_graph '\
+			'ftrace_graph_max_depth=%d '\
+			'ftrace_graph_filter=%s' % \
+				(self.max_graph_depth, self.graph_filter)
+		return cmdline
+	def setGraphFilter(self, val):
+		fp = open(self.tpath+'available_filter_functions')
+		master = fp.read().split('\n')
+		fp.close()
+		for i in val.split(','):
+			func = i.strip()
+			if func not in master:
+				doError('function "%s" not available for ftrace' % func)
+		self.graph_filter = val
+	def cronjobCmdString(self):
+		cmdline = '%s -cronjob' % os.path.abspath(sys.argv[0])
+		args = iter(sys.argv[1:])
+		for arg in args:
+			if arg in ['-h', '-v', '-cronjob', '-reboot']:
+				continue
+			elif arg in ['-o', '-dmesg', '-ftrace', '-filter']:
+				args.next()
+				continue
+			cmdline += ' '+arg
+		if self.graph_filter != 'do_one_initcall':
+			cmdline += ' -filter "%s"' % self.graph_filter
+		cmdline += ' -o "%s"' % os.path.abspath(self.htmlfile)
+		return cmdline
+	def manualRebootRequired(self):
+		cmdline = self.kernelParams()
+		print 'To generate a new timeline manually, follow these steps:\n'
+		print '1. Add the CMDLINE string to your kernel command line.'
+		print '2. Reboot the system.'
+		print '3. After reboot, re-run this tool with the same arguments but no command (w/o -reboot or -manual).\n'
+		print 'CMDLINE="%s"' % cmdline
+		sys.exit()
+
+sysvals = SystemValues()
+
+# Class: Data
+# Description:
+#	 The primary container for test data.
+class Data(aslib.Data):
+	dmesg = {}  # root data structure
+	start = 0.0 # test start
+	end = 0.0   # test end
+	dmesgtext = []   # dmesg text file in memory
+	testnumber = 0
+	idstr = ''
+	html_device_id = 0
+	valid = False
+	initstart = 0.0
+	boottime = ''
+	phases = ['boot']
+	do_one_initcall = False
+	def __init__(self, num):
+		self.testnumber = num
+		self.idstr = 'a'
+		self.dmesgtext = []
+		self.dmesg = {
+			'boot': {'list': dict(), 'start': -1.0, 'end': -1.0, 'row': 0, 'color': '#dddddd'}
+		}
+	def deviceTopology(self):
+		return ''
+	def newAction(self, phase, name, start, end, ret, ulen):
+		# new device callback for a specific phase
+		self.html_device_id += 1
+		devid = '%s%d' % (self.idstr, self.html_device_id)
+		list = self.dmesg[phase]['list']
+		length = -1.0
+		if(start >= 0 and end >= 0):
+			length = end - start
+		i = 2
+		origname = name
+		while(name in list):
+			name = '%s[%d]' % (origname, i)
+			i += 1
+		list[name] = {'name': name, 'start': start, 'end': end,
+			'pid': 0, 'length': length, 'row': 0, 'id': devid,
+			'ret': ret, 'ulen': ulen }
+		return name
+	def deviceMatch(self, cg):
+		if cg.end - cg.start == 0:
+			return True
+		list = self.dmesg['boot']['list']
+		for devname in list:
+			dev = list[devname]
+			if cg.name == 'do_one_initcall':
+				if(cg.start <= dev['start'] and cg.end >= dev['end'] and dev['length'] > 0):
+					dev['ftrace'] = cg
+					self.do_one_initcall = True
+					return True
+			else:
+				if(cg.start > dev['start'] and cg.end < dev['end']):
+					if 'ftraces' not in dev:
+						dev['ftraces'] = []
+					dev['ftraces'].append(cg)
+					return True
+		return False
+
+# ----------------- FUNCTIONS --------------------
+
+# Function: loadKernelLog
+# Description:
+#	 Load a raw kernel log from dmesg
+def loadKernelLog():
+	data = Data(0)
+	data.dmesg['boot']['start'] = data.start = ktime = 0.0
+	sysvals.stamp = {
+		'time': datetime.now().strftime('%B %d %Y, %I:%M:%S %p'),
+		'host': sysvals.hostname,
+		'mode': 'boot', 'kernel': ''}
+
+	devtemp = dict()
+	if(sysvals.dmesgfile):
+		lf = open(sysvals.dmesgfile, 'r')
+	else:
+		lf = Popen('dmesg', stdout=PIPE).stdout
+	for line in lf:
+		line = line.replace('\r\n', '')
+		idx = line.find('[')
+		if idx > 1:
+			line = line[idx:]
+		m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
+		if(not m):
+			continue
+		ktime = float(m.group('ktime'))
+		if(ktime > 120):
+			break
+		msg = m.group('msg')
+		data.end = data.initstart = ktime
+		data.dmesgtext.append(line)
+		if(ktime == 0.0 and re.match('^Linux version .*', msg)):
+			if(not sysvals.stamp['kernel']):
+				sysvals.stamp['kernel'] = sysvals.kernelVersion(msg)
+			continue
+		m = re.match('.* setting system clock to (?P<t>.*) UTC.*', msg)
+		if(m):
+			bt = datetime.strptime(m.group('t'), '%Y-%m-%d %H:%M:%S')
+			bt = bt - timedelta(seconds=int(ktime))
+			data.boottime = bt.strftime('%Y-%m-%d_%H:%M:%S')
+			sysvals.stamp['time'] = bt.strftime('%B %d %Y, %I:%M:%S %p')
+			continue
+		m = re.match('^calling *(?P<f>.*)\+.*', msg)
+		if(m):
+			devtemp[m.group('f')] = ktime
+			continue
+		m = re.match('^initcall *(?P<f>.*)\+.* returned (?P<r>.*) after (?P<t>.*) usecs', msg)
+		if(m):
+			data.valid = True
+			f, r, t = m.group('f', 'r', 't')
+			if(f in devtemp):
+				data.newAction('boot', f, devtemp[f], ktime, int(r), int(t))
+				data.end = ktime
+				del devtemp[f]
+			continue
+		if(re.match('^Freeing unused kernel memory.*', msg)):
+			break
+
+	data.dmesg['boot']['end'] = data.end
+	lf.close()
+	return data
+
+# Function: loadTraceLog
+# Description:
+#	 Check if trace is available and copy to a temp file
+def loadTraceLog(data):
+	# load the data to a temp file if none given
+	if not sysvals.ftracefile:
+		lib = aslib.sysvals
+		aslib.rootCheck(True)
+		if not lib.verifyFtrace():
+			doError('ftrace not available')
+		if lib.fgetVal('current_tracer').strip() != 'function_graph':
+			doError('ftrace not configured for a boot callgraph')
+		sysvals.ftracefile = '/tmp/boot_ftrace.%s.txt' % os.getpid()
+		call('cat '+lib.tpath+'trace > '+sysvals.ftracefile, shell=True)
+	if not sysvals.ftracefile:
+		doError('No trace data available')
+
+	# parse the trace log
+	ftemp = dict()
+	tp = aslib.TestProps()
+	tp.setTracerType('function_graph')
+	tf = open(sysvals.ftracefile, 'r')
+	for line in tf:
+		if line[0] == '#':
+			continue
+		m = re.match(tp.ftrace_line_fmt, line.strip())
+		if(not m):
+			continue
+		m_time, m_proc, m_pid, m_msg, m_dur = \
+			m.group('time', 'proc', 'pid', 'msg', 'dur')
+		if float(m_time) > data.end:
+			break
+		if(m_time and m_pid and m_msg):
+			t = aslib.FTraceLine(m_time, m_msg, m_dur)
+			pid = int(m_pid)
+		else:
+			continue
+		if t.fevent or t.fkprobe:
+			continue
+		key = (m_proc, pid)
+		if(key not in ftemp):
+			ftemp[key] = []
+			ftemp[key].append(aslib.FTraceCallGraph(pid))
+		cg = ftemp[key][-1]
+		if(cg.addLine(t)):
+			ftemp[key].append(aslib.FTraceCallGraph(pid))
+	tf.close()
+
+	# add the callgraph data to the device hierarchy
+	for key in ftemp:
+		proc, pid = key
+		for cg in ftemp[key]:
+			if len(cg.list) < 1 or cg.invalid:
+				continue
+			if(not cg.postProcess()):
+				print('Sanity check failed for %s-%d' % (proc, pid))
+				continue
+			# match cg data to devices
+			if not data.deviceMatch(cg):
+				print ' BAD: %s %s-%d [%f - %f]' % (cg.name, proc, pid, cg.start, cg.end)
+
+# Function: colorForName
+# Description:
+#	 Generate a repeatable color from a list for a given name
+def colorForName(name):
+	list = [
+		('c1', '#ec9999'),
+		('c2', '#ffc1a6'),
+		('c3', '#fff0a6'),
+		('c4', '#adf199'),
+		('c5', '#9fadea'),
+		('c6', '#a699c1'),
+		('c7', '#ad99b4'),
+		('c8', '#eaffea'),
+		('c9', '#dcecfb'),
+		('c10', '#ffffea')
+	]
+	i = 0
+	total = 0
+	count = len(list)
+	while i < len(name):
+		total += ord(name[i])
+		i += 1
+	return list[total % count]
+
+def cgOverview(cg, minlen):
+	stats = dict()
+	large = []
+	for l in cg.list:
+		if l.fcall and l.depth == 1:
+			if l.length >= minlen:
+				large.append(l)
+			if l.name not in stats:
+				stats[l.name] = [0, 0.0]
+			stats[l.name][0] += (l.length * 1000.0)
+			stats[l.name][1] += 1
+	return (large, stats)
+
+# Function: createBootGraph
+# Description:
+#	 Create the output html file from the resident test data
+# Arguments:
+#	 testruns: array of Data objects from parseKernelLog or parseTraceLog
+# Output:
+#	 True if the html file was created, false if it failed
+def createBootGraph(data, embedded):
+	# html function templates
+	html_srccall = '<div id={6} title="{5}" class="srccall" style="left:{1}%;top:{2}px;height:{3}px;width:{4}%;line-height:{3}px;">{0}</div>\n'
+	html_timetotal = '<table class="time1">\n<tr>'\
+		'<td class="blue">Time from Kernel Boot to start of User Mode: <b>{0} ms</b></td>'\
+		'</tr>\n</table>\n'
+
+	# device timeline
+	devtl = aslib.Timeline(100, 20)
+
+	# write the test title and general info header
+	devtl.createHeader(sysvals, 'noftrace')
+
+	# Generate the header for this timeline
+	t0 = data.start
+	tMax = data.end
+	tTotal = tMax - t0
+	if(tTotal == 0):
+		print('ERROR: No timeline data')
+		return False
+	boot_time = '%.0f'%(tTotal*1000)
+	devtl.html += html_timetotal.format(boot_time)
+
+	# determine the maximum number of rows we need to draw
+	phase = 'boot'
+	list = data.dmesg[phase]['list']
+	devlist = []
+	for devname in list:
+		d = aslib.DevItem(0, phase, list[devname])
+		devlist.append(d)
+	devtl.getPhaseRows(devlist)
+	devtl.calcTotalRows()
+
+	# draw the timeline background
+	devtl.createZoomBox()
+	boot = data.dmesg[phase]
+	length = boot['end']-boot['start']
+	left = '%.3f' % (((boot['start']-t0)*100.0)/tTotal)
+	width = '%.3f' % ((length*100.0)/tTotal)
+	devtl.html += devtl.html_tblock.format(phase, left, width, devtl.scaleH)
+	devtl.html += devtl.html_phase.format('0', '100', \
+		'%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \
+		'white', '')
+
+	# draw the device timeline
+	num = 0
+	devstats = dict()
+	for devname in sorted(list):
+		cls, color = colorForName(devname)
+		dev = list[devname]
+		info = '@|%.3f|%.3f|%.3f|%d' % (dev['start']*1000.0, dev['end']*1000.0,
+			dev['ulen']/1000.0, dev['ret'])
+		devstats[dev['id']] = {'info':info}
+		dev['color'] = color
+		height = devtl.phaseRowHeight(0, phase, dev['row'])
+		top = '%.6f' % ((dev['row']*height) + devtl.scaleH)
+		left = '%.6f' % (((dev['start']-t0)*100)/tTotal)
+		width = '%.6f' % (((dev['end']-dev['start'])*100)/tTotal)
+		length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000)
+		devtl.html += devtl.html_device.format(dev['id'],
+			devname+length+'kernel_mode', left, top, '%.3f'%height,
+			width, devname, ' '+cls, '')
+		rowtop = devtl.phaseRowTop(0, phase, dev['row'])
+		height = '%.6f' % (devtl.rowH / 2)
+		top = '%.6f' % (rowtop + devtl.scaleH + (devtl.rowH / 2))
+		if data.do_one_initcall:
+			if('ftrace' not in dev):
+				continue
+			cg = dev['ftrace']
+			large, stats = cgOverview(cg, 0.001)
+			devstats[dev['id']]['fstat'] = stats
+			for l in large:
+				left = '%f' % (((l.time-t0)*100)/tTotal)
+				width = '%f' % (l.length*100/tTotal)
+				title = '%s (%0.3fms)' % (l.name, l.length * 1000.0)
+				devtl.html += html_srccall.format(l.name, left,
+					top, height, width, title, 'x%d'%num)
+				num += 1
+			continue
+		if('ftraces' not in dev):
+			continue
+		for cg in dev['ftraces']:
+			left = '%f' % (((cg.start-t0)*100)/tTotal)
+			width = '%f' % ((cg.end-cg.start)*100/tTotal)
+			cglen = (cg.end - cg.start) * 1000.0
+			title = '%s (%0.3fms)' % (cg.name, cglen)
+			cg.id = 'x%d' % num
+			devtl.html += html_srccall.format(cg.name, left,
+				top, height, width, title, dev['id']+cg.id)
+			num += 1
+
+	# draw the time scale, try to make the number of labels readable
+	devtl.createTimeScale(t0, tMax, tTotal, phase)
+	devtl.html += '</div>\n'
+
+	# timeline is finished
+	devtl.html += '</div>\n</div>\n'
+
+	if(sysvals.outfile == sysvals.htmlfile):
+		hf = open(sysvals.htmlfile, 'a')
+	else:
+		hf = open(sysvals.htmlfile, 'w')
+
+	# add the css if this is not an embedded run
+	extra = '\
+		.c1 {background:rgba(209,0,0,0.4);}\n\
+		.c2 {background:rgba(255,102,34,0.4);}\n\
+		.c3 {background:rgba(255,218,33,0.4);}\n\
+		.c4 {background:rgba(51,221,0,0.4);}\n\
+		.c5 {background:rgba(17,51,204,0.4);}\n\
+		.c6 {background:rgba(34,0,102,0.4);}\n\
+		.c7 {background:rgba(51,0,68,0.4);}\n\
+		.c8 {background:rgba(204,255,204,0.4);}\n\
+		.c9 {background:rgba(169,208,245,0.4);}\n\
+		.c10 {background:rgba(255,255,204,0.4);}\n\
+		.vt {transform:rotate(-60deg);transform-origin:0 0;}\n\
+		table.fstat {table-layout:fixed;padding:150px 15px 0 0;font-size:10px;column-width:30px;}\n\
+		.fstat th {width:55px;}\n\
+		.fstat td {text-align:left;width:35px;}\n\
+		.srccall {position:absolute;font-size:10px;z-index:7;overflow:hidden;color:black;text-align:center;white-space:nowrap;border-radius:5px;border:1px solid black;background:linear-gradient(to bottom right,#CCC,#969696);}\n\
+		.srccall:hover {color:white;font-weight:bold;border:1px solid white;}\n'
+	if(not embedded):
+		aslib.addCSS(hf, sysvals, 1, False, extra)
+
+	# write the device timeline
+	hf.write(devtl.html)
+
+	# add boot specific html
+	statinfo = 'var devstats = {\n'
+	for n in sorted(devstats):
+		statinfo += '\t"%s": [\n\t\t"%s",\n' % (n, devstats[n]['info'])
+		if 'fstat' in devstats[n]:
+			funcs = devstats[n]['fstat']
+			for f in sorted(funcs, key=funcs.get, reverse=True):
+				if funcs[f][0] < 0.01 and len(funcs) > 10:
+					break
+				statinfo += '\t\t"%f|%s|%d",\n' % (funcs[f][0], f, funcs[f][1])
+		statinfo += '\t],\n'
+	statinfo += '};\n'
+	html = \
+		'<div id="devicedetailtitle"></div>\n'\
+		'<div id="devicedetail" style="display:none;">\n'\
+		'<div id="devicedetail0">\n'\
+		'<div id="kernel_mode" class="phaselet" style="left:0%;width:100%;background:#DDDDDD"></div>\n'\
+		'</div>\n</div>\n'\
+		'<script type="text/javascript">\n'+statinfo+\
+		'</script>\n'
+	hf.write(html)
+
+	# add the callgraph html
+	if(sysvals.usecallgraph):
+		aslib.addCallgraphs(sysvals, hf, data)
+
+	# add the dmesg log as a hidden div
+	if sysvals.addlogs:
+		hf.write('<div id="dmesglog" style="display:none;">\n')
+		for line in data.dmesgtext:
+			line = line.replace('<', '&lt').replace('>', '&gt')
+			hf.write(line)
+		hf.write('</div>\n')
+
+	if(not embedded):
+		# write the footer and close
+		aslib.addScriptCode(hf, [data])
+		hf.write('</body>\n</html>\n')
+	else:
+		# embedded out will be loaded in a page, skip the js
+		hf.write('<div id=bounds style=display:none>%f,%f</div>' % \
+			(data.start*1000, data.initstart*1000))
+	hf.close()
+	return True
+
+# Function: updateCron
+# Description:
+#    (restore=False) Set the tool to run automatically on reboot
+#    (restore=True) Restore the original crontab
+def updateCron(restore=False):
+	if not restore:
+		sysvals.rootUser(True)
+	crondir = '/var/spool/cron/crontabs/'
+	cronfile = crondir+'root'
+	backfile = crondir+'root-analyze_boot-backup'
+	if not os.path.exists(crondir):
+		doError('%s not found' % crondir)
+	out = Popen(['which', 'crontab'], stdout=PIPE).stdout.read()
+	if not out:
+		doError('crontab not found')
+	# on restore: move the backup cron back into place
+	if restore:
+		if os.path.exists(backfile):
+			shutil.move(backfile, cronfile)
+		return
+	# backup current cron and install new one with reboot
+	if os.path.exists(cronfile):
+		shutil.move(cronfile, backfile)
+	else:
+		fp = open(backfile, 'w')
+		fp.close()
+	res = -1
+	try:
+		fp = open(backfile, 'r')
+		op = open(cronfile, 'w')
+		for line in fp:
+			if '@reboot' not in line:
+				op.write(line)
+				continue
+		fp.close()
+		op.write('@reboot python %s\n' % sysvals.cronjobCmdString())
+		op.close()
+		res = call('crontab %s' % cronfile, shell=True)
+	except Exception, e:
+		print 'Exception: %s' % str(e)
+		shutil.move(backfile, cronfile)
+		res = -1
+	if res != 0:
+		doError('crontab failed')
+
+# Function: updateGrub
+# Description:
+#	 update grub.cfg for all kernels with our parameters
+def updateGrub(restore=False):
+	# call update-grub on restore
+	if restore:
+		try:
+			call(['update-grub'], stderr=PIPE, stdout=PIPE,
+				env={'PATH': '.:/sbin:/usr/sbin:/usr/bin:/sbin:/bin'})
+		except Exception, e:
+			print 'Exception: %s\n' % str(e)
+		return
+	# verify we can do this
+	sysvals.rootUser(True)
+	grubfile = '/etc/default/grub'
+	if not os.path.exists(grubfile):
+		print 'ERROR: Unable to set the kernel parameters via grub.\n'
+		sysvals.manualRebootRequired()
+	out = Popen(['which', 'update-grub'], stdout=PIPE).stdout.read()
+	if not out:
+		print 'ERROR: Unable to set the kernel parameters via grub.\n'
+		sysvals.manualRebootRequired()
+
+	# extract the option and create a grub config without it
+	tgtopt = 'GRUB_CMDLINE_LINUX_DEFAULT'
+	cmdline = ''
+	tempfile = '/etc/default/grub.analyze_boot'
+	shutil.move(grubfile, tempfile)
+	res = -1
+	try:
+		fp = open(tempfile, 'r')
+		op = open(grubfile, 'w')
+		cont = False
+		for line in fp:
+			line = line.strip()
+			if len(line) == 0 or line[0] == '#':
+				continue
+			opt = line.split('=')[0].strip()
+			if opt == tgtopt:
+				cmdline = line.split('=', 1)[1].strip('\\')
+				if line[-1] == '\\':
+					cont = True
+			elif cont:
+				cmdline += line.strip('\\')
+				if line[-1] != '\\':
+					cont = False
+			else:
+				op.write('%s\n' % line)
+		fp.close()
+		# if the target option value is in quotes, strip them
+		sp = '"'
+		val = cmdline.strip()
+		if val[0] == '\'' or val[0] == '"':
+			sp = val[0]
+			val = val.strip(sp)
+		cmdline = val
+		# append our cmd line options
+		if len(cmdline) > 0:
+			cmdline += ' '
+		cmdline += sysvals.kernelParams()
+		# write out the updated target option
+		op.write('\n%s=%s%s%s\n' % (tgtopt, sp, cmdline, sp))
+		op.close()
+		res = call('update-grub')
+		os.remove(grubfile)
+	except Exception, e:
+		print 'Exception: %s' % str(e)
+		res = -1
+	# cleanup
+	shutil.move(tempfile, grubfile)
+	if res != 0:
+		doError('update-grub failed')
+
+# Function: doError
+# Description:
+#	 generic error function for catastrphic failures
+# Arguments:
+#	 msg: the error message to print
+#	 help: True if printHelp should be called after, False otherwise
+def doError(msg, help=False):
+	if help == True:
+		printHelp()
+	print 'ERROR: %s\n' % msg
+	sys.exit()
+
+# Function: printHelp
+# Description:
+#	 print out the help text
+def printHelp():
+	print('')
+	print('%s v%.1f' % (sysvals.title, sysvals.version))
+	print('Usage: bootgraph <options> <command>')
+	print('')
+	print('Description:')
+	print('  This tool reads in a dmesg log of linux kernel boot and')
+	print('  creates an html representation of the boot timeline up to')
+	print('  the start of the init process.')
+	print('')
+	print('  If no specific command is given the tool reads the current dmesg')
+	print('  and/or ftrace log and outputs bootgraph.html')
+	print('')
+	print('Options:')
+	print('  -h            Print this help text')
+	print('  -v            Print the current tool version')
+	print('  -addlogs      Add the dmesg log to the html output')
+	print('  -o file       Html timeline name (default: bootgraph.html)')
+	print(' [advanced]')
+	print('  -f            Use ftrace to add function detail (default: disabled)')
+	print('  -callgraph    Add callgraph detail, can be very large (default: disabled)')
+	print('  -maxdepth N   limit the callgraph data to N call levels (default: 2)')
+	print('  -mincg ms     Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
+	print('  -timeprec N   Number of significant digits in timestamps (0:S, 3:ms, [6:us])')
+	print('  -expandcg     pre-expand the callgraph data in the html output (default: disabled)')
+	print('  -filter list  Limit ftrace to comma-delimited list of functions (default: do_one_initcall)')
+	print(' [commands]')
+	print('  -reboot       Reboot the machine automatically and generate a new timeline')
+	print('  -manual       Show the requirements to generate a new timeline manually')
+	print('  -dmesg file   Load a stored dmesg file (used with -ftrace)')
+	print('  -ftrace file  Load a stored ftrace file (used with -dmesg)')
+	print('  -flistall     Print all functions capable of being captured in ftrace')
+	print('')
+	return True
+
+# ----------------- MAIN --------------------
+# exec start (skipped if script is loaded as library)
+if __name__ == '__main__':
+	# loop through the command line arguments
+	cmd = ''
+	simplecmds = ['-updategrub', '-flistall']
+	args = iter(sys.argv[1:])
+	for arg in args:
+		if(arg == '-h'):
+			printHelp()
+			sys.exit()
+		elif(arg == '-v'):
+			print("Version %.1f" % sysvals.version)
+			sys.exit()
+		elif(arg in simplecmds):
+			cmd = arg[1:]
+		elif(arg == '-f'):
+			sysvals.useftrace = True
+		elif(arg == '-callgraph'):
+			sysvals.useftrace = True
+			sysvals.usecallgraph = True
+		elif(arg == '-mincg'):
+			sysvals.mincglen = aslib.getArgFloat('-mincg', args, 0.0, 10000.0)
+		elif(arg == '-timeprec'):
+			sysvals.setPrecision(aslib.getArgInt('-timeprec', args, 0, 6))
+		elif(arg == '-maxdepth'):
+			sysvals.max_graph_depth = aslib.getArgInt('-maxdepth', args, 0, 1000)
+		elif(arg == '-filter'):
+			try:
+				val = args.next()
+			except:
+				doError('No filter functions supplied', True)
+			aslib.rootCheck(True)
+			sysvals.setGraphFilter(val)
+		elif(arg == '-ftrace'):
+			try:
+				val = args.next()
+			except:
+				doError('No ftrace file supplied', True)
+			if(os.path.exists(val) == False):
+				doError('%s does not exist' % val)
+			sysvals.ftracefile = val
+		elif(arg == '-addlogs'):
+			sysvals.addlogs = True
+		elif(arg == '-expandcg'):
+			sysvals.cgexp = True
+		elif(arg == '-dmesg'):
+			try:
+				val = args.next()
+			except:
+				doError('No dmesg file supplied', True)
+			if(os.path.exists(val) == False):
+				doError('%s does not exist' % val)
+			if(sysvals.htmlfile == val or sysvals.outfile == val):
+				doError('Output filename collision')
+			sysvals.dmesgfile = val
+		elif(arg == '-o'):
+			try:
+				val = args.next()
+			except:
+				doError('No HTML filename supplied', True)
+			if(sysvals.dmesgfile == val or sysvals.ftracefile == val):
+				doError('Output filename collision')
+			sysvals.htmlfile = val
+		elif(arg == '-reboot'):
+			if sysvals.iscronjob:
+				doError('-reboot and -cronjob are incompatible')
+			sysvals.reboot = True
+		elif(arg == '-manual'):
+			sysvals.reboot = True
+			sysvals.manual = True
+		# remaining options are only for cron job use
+		elif(arg == '-cronjob'):
+			sysvals.iscronjob = True
+			if sysvals.reboot:
+				doError('-reboot and -cronjob are incompatible')
+		else:
+			doError('Invalid argument: '+arg, True)
+
+	if cmd != '':
+		if cmd == 'updategrub':
+			updateGrub()
+		elif cmd == 'flistall':
+			sysvals.getFtraceFilterFunctions(False)
+		sys.exit()
+
+	# update grub, setup a cronjob, and reboot
+	if sysvals.reboot:
+		if not sysvals.manual:
+			updateGrub()
+			updateCron()
+			call('reboot')
+		else:
+			sysvals.manualRebootRequired()
+		sys.exit()
+
+	# disable the cronjob
+	if sysvals.iscronjob:
+		updateCron(True)
+		updateGrub(True)
+
+	data = loadKernelLog()
+	if sysvals.useftrace:
+		loadTraceLog(data)
+		if sysvals.iscronjob:
+			try:
+				sysvals.fsetVal('0', 'tracing_on')
+			except:
+				pass
+
+	if(sysvals.outfile and sysvals.phoronix):
+		fp = open(sysvals.outfile, 'w')
+		fp.write('pass %s initstart %.3f end %.3f boot %s\n' %
+			(data.valid, data.initstart*1000, data.end*1000, data.boottime))
+		fp.close()
+	if(not data.valid):
+		if sysvals.dmesgfile:
+			doError('No initcall data found in %s' % sysvals.dmesgfile)
+		else:
+			doError('No initcall data found, is initcall_debug enabled?')
+
+	print('          Host: %s' % sysvals.hostname)
+	print('     Test time: %s' % sysvals.testtime)
+	print('     Boot time: %s' % data.boottime)
+	print('Kernel Version: %s' % sysvals.kernel)
+	print('  Kernel start: %.3f' % (data.start * 1000))
+	print('    init start: %.3f' % (data.initstart * 1000))
+
+	createBootGraph(data, sysvals.phoronix)

From 22440373e11ad3fba0b33a9bded1531469a72551 Mon Sep 17 00:00:00 2001
From: Todd E Brandt <todd.e.brandt@linux.intel.com>
Date: Fri, 7 Apr 2017 11:05:37 -0700
Subject: [PATCH 54/56] tools: power: pm-graph: Package makefile and man pages

BootGraph and SleepGraph man pages
- includes full descriptions of tool arguments and commands
- includes examples of common use cases

Makefile
- no build required, used only for install
- installs man pages and tools as libraries with links
- includes an uninstall

Signed-off-by: Todd Brandt <todd.e.brandt@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 tools/power/pm-graph/Makefile     |  28 ++++
 tools/power/pm-graph/bootgraph.8  | 132 ++++++++++++++++
 tools/power/pm-graph/sleepgraph.8 | 243 ++++++++++++++++++++++++++++++
 3 files changed, 403 insertions(+)
 create mode 100644 tools/power/pm-graph/Makefile
 create mode 100644 tools/power/pm-graph/bootgraph.8
 create mode 100644 tools/power/pm-graph/sleepgraph.8

diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile
new file mode 100644
index 000000000000..4d0ccc89e6c6
--- /dev/null
+++ b/tools/power/pm-graph/Makefile
@@ -0,0 +1,28 @@
+PREFIX		?= /usr
+DESTDIR		?=
+
+all:
+	@echo "Nothing to build"
+
+install :
+	install -d  $(DESTDIR)$(PREFIX)/lib/pm-graph
+	install analyze_suspend.py $(DESTDIR)$(PREFIX)/lib/pm-graph
+	install analyze_boot.py $(DESTDIR)$(PREFIX)/lib/pm-graph
+
+	ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_boot.py $(DESTDIR)$(PREFIX)/bin/bootgraph
+	ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_suspend.py $(DESTDIR)$(PREFIX)/bin/sleepgraph
+
+	install -d  $(DESTDIR)$(PREFIX)/share/man/man8
+	install bootgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
+	install sleepgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
+
+uninstall :
+	rm $(DESTDIR)$(PREFIX)/share/man/man8/bootgraph.8
+	rm $(DESTDIR)$(PREFIX)/share/man/man8/sleepgraph.8
+
+	rm $(DESTDIR)$(PREFIX)/bin/bootgraph
+	rm $(DESTDIR)$(PREFIX)/bin/sleepgraph
+
+	rm $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_boot.py
+	rm $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_suspend.py
+	rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph
diff --git a/tools/power/pm-graph/bootgraph.8 b/tools/power/pm-graph/bootgraph.8
new file mode 100644
index 000000000000..55272a67b0e7
--- /dev/null
+++ b/tools/power/pm-graph/bootgraph.8
@@ -0,0 +1,132 @@
+.TH BOOTGRAPH 8
+.SH NAME
+bootgraph \- Kernel boot timing analysis
+.SH SYNOPSIS
+.ft B
+.B bootgraph
+.RB [ OPTIONS ]
+.RB [ COMMAND ]
+.SH DESCRIPTION
+\fBbootgraph \fP reads the dmesg log from kernel boot and
+creates an html representation of the initcall timeline up to the start
+of the init process.
+.PP
+If no specific command is given, the tool reads the current dmesg log and
+outputs bootgraph.html.
+.PP
+The tool can also augment the timeline with ftrace data on custom target
+functions as well as full trace callgraphs.
+.SH OPTIONS
+.TP
+\fB-h\fR
+Print this help text
+.TP
+\fB-v\fR
+Print the current tool version
+.TP
+\fB-addlogs\fR
+Add the dmesg log to the html output. It will be viewable by
+clicking a button in the timeline.
+.TP
+\fB-o \fIfile\fR
+Override the HTML output filename (default: bootgraph.html)
+.SS "Ftrace Debug"
+.TP
+\fB-f\fR
+Use ftrace to add function detail (default: disabled)
+.TP
+\fB-callgraph\fR
+Use ftrace to create initcall callgraphs (default: disabled). If -filter
+is not used there will be one callgraph per initcall. This can produce
+very large outputs, i.e. 10MB - 100MB.
+.TP
+\fB-maxdepth \fIlevel\fR
+limit the callgraph trace depth to \fIlevel\fR (default: 2). This is
+the best way to limit the output size when using -callgraph.
+.TP
+\fB-mincg \fIt\fR
+Discard all callgraphs shorter than \fIt\fR milliseconds (default: 0=all).
+This reduces the html file size as there can be many tiny callgraphs
+which are barely visible in the timeline.
+The value is a float: e.g. 0.001 represents 1 us.
+.TP
+\fB-timeprec \fIn\fR
+Number of significant digits in timestamps (0:S, 3:ms, [6:us])
+.TP
+\fB-expandcg\fR
+pre-expand the callgraph data in the html output (default: disabled)
+.TP
+\fB-filter \fI"func1,func2,..."\fR
+Instead of tracing each initcall, trace a custom list of functions (default: do_one_initcall)
+
+.SH COMMANDS
+.TP
+\fB-reboot\fR
+Reboot the machine and generate a new timeline automatically. Works in 4 steps.
+  1. updates grub with the required kernel parameters
+  2. installs a cron job which re-runs the tool after reboot
+  3. reboots the system
+  4. after startup, extracts the data and generates the timeline
+.TP
+\fB-manual\fR
+Show the requirements to generate a new timeline manually. Requires 3 steps.
+  1. append the string to the kernel command line via your native boot manager.
+  2. reboot the system
+  3. after startup, re-run the tool with the same arguments and no command
+.TP
+\fB-dmesg \fIfile\fR
+Create HTML output from an existing dmesg file.
+.TP
+\fB-ftrace \fIfile\fR
+Create HTML output from an existing ftrace file (used with -dmesg).
+.TP
+\fB-flistall\fR
+Print all ftrace functions capable of being captured. These are all the
+possible values you can add to trace via the -filter argument.
+
+.SH EXAMPLES
+Create a timeline using the current dmesg log.
+.IP
+\f(CW$ bootgraph\fR
+.PP
+Create a timeline using the current dmesg and ftrace log.
+.IP
+\f(CW$ bootgraph -callgraph\fR
+.PP
+Create a timeline using the current dmesg, add the log to the html and change the name.
+.IP
+\f(CW$ bootgraph -addlogs -o myboot.html\fR
+.PP
+Capture a new boot timeline by automatically rebooting the machine.
+.IP
+\f(CW$ sudo bootgraph -reboot -addlogs -o latestboot.html\fR
+.PP
+Capture a new boot timeline with function trace data.
+.IP
+\f(CW$ sudo bootgraph -reboot -f\fR
+.PP
+Capture a new boot timeline with trace & callgraph data. Skip callgraphs smaller than 5ms.
+.IP
+\f(CW$ sudo bootgraph -reboot -callgraph -mincg 5\fR
+.PP
+Capture a new boot timeline with callgraph data over custom functions.
+.IP
+\f(CW$ sudo bootgraph -reboot -callgraph -filter "acpi_ps_parse_aml,msleep"\fR
+.PP
+Capture a brand new boot timeline with manual reboot.
+.IP
+\f(CW$ sudo bootgraph -callgraph -manual\fR
+.IP
+\f(CW$ vi /etc/default/grub      # add the CMDLINE string to your kernel params\fR
+.IP
+\f(CW$ sudo reboot               # reboot the machine\fR
+.IP
+\f(CW$ sudo bootgraph -callgraph # re-run the tool after restart\fR
+.PP
+
+.SH "SEE ALSO"
+dmesg(1), update-grub(8), crontab(1), reboot(8)
+.PP
+.SH AUTHOR
+.nf
+Written by Todd Brandt <todd.e.brandt@linux.intel.com>
diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8
new file mode 100644
index 000000000000..610e72ebbc06
--- /dev/null
+++ b/tools/power/pm-graph/sleepgraph.8
@@ -0,0 +1,243 @@
+.TH SLEEPGRAPH 8
+.SH NAME
+sleepgraph \- Suspend/Resume timing analysis
+.SH SYNOPSIS
+.ft B
+.B sleepgraph
+.RB [ OPTIONS ]
+.RB [ COMMAND ]
+.SH DESCRIPTION
+\fBsleepgraph \fP is designed to assist kernel and OS developers
+in optimizing their linux stack's suspend/resume time. Using a kernel
+image built with a few extra options enabled, the tool will execute a
+suspend and capture dmesg and ftrace data until resume is complete.
+This data is transformed into a device timeline and an optional
+callgraph to give a detailed view of which devices/subsystems are
+taking the most time in suspend/resume.
+.PP
+If no specific command is given, the default behavior is to initiate
+a suspend/resume.
+.PP
+Generates output files in subdirectory: suspend-yymmdd-HHMMSS
+   html timeline   :     <hostname>_<mode>.html
+   raw dmesg file  :     <hostname>_<mode>_dmesg.txt
+   raw ftrace file :     <hostname>_<mode>_ftrace.txt
+.SH OPTIONS
+.TP
+\fB-h\fR
+Print the help text.
+.TP
+\fB-v\fR
+Print the current tool version.
+.TP
+\fB-verbose\fR
+Print extra information during execution and analysis.
+.TP
+\fB-config \fIfile\fR
+Pull arguments and config options from a file.
+.TP
+\fB-m \fImode\fR
+Mode to initiate for suspend e.g. standby, freeze, mem (default: mem).
+.TP
+\fB-o \fIsubdir\fR
+Override the output subdirectory. Use {date}, {time}, {hostname} for current values.
+.sp
+e.g. suspend-{hostname}-{date}-{time}
+.TP
+\fB-rtcwake \fIt\fR | off
+Use rtcwake to autoresume after \fIt\fR seconds (default: 15). Set t to "off" to
+disable rtcwake and require a user keypress to resume.
+.TP
+\fB-addlogs\fR
+Add the dmesg and ftrace logs to the html output. They will be viewable by
+clicking buttons in the timeline.
+
+.SS "Advanced"
+.TP
+\fB-cmd \fIstr\fR
+Run the timeline over a custom suspend command, e.g. pm-suspend. By default
+the tool forces suspend via /sys/power/state so this allows testing over
+an OS's official suspend method. The output file will change to
+hostname_command.html and will autodetect which suspend mode was triggered.
+.TP
+\fB-filter \fI"d1,d2,..."\fR
+Filter out all but these device callbacks. These strings can be device names
+or module names. e.g. 0000:00:02.0, ata5, i915, usb, etc.
+.TP
+\fB-mindev \fIt\fR
+Discard all device callbacks shorter than \fIt\fR milliseconds (default: 0.0).
+This reduces the html file size as there can be many tiny callbacks which are barely
+visible. The value is a float: e.g. 0.001 represents 1 us.
+.TP
+\fB-proc\fR
+Add usermode process info into the timeline (default: disabled).
+.TP
+\fB-dev\fR
+Add kernel source calls and threads to the timeline (default: disabled).
+.TP
+\fB-x2\fR
+Run two suspend/resumes back to back (default: disabled).
+.TP
+\fB-x2delay \fIt\fR
+Include \fIt\fR ms delay between multiple test runs (default: 0 ms).
+.TP
+\fB-predelay \fIt\fR
+Include \fIt\fR ms delay before 1st suspend (default: 0 ms).
+.TP
+\fB-postdelay \fIt\fR
+Include \fIt\fR ms delay after last resume (default: 0 ms).
+.TP
+\fB-multi \fIn d\fR
+Execute \fIn\fR consecutive tests at \fId\fR seconds intervals. The outputs will
+be created in a new subdirectory with a summary page: suspend-xN-{date}-{time}.
+
+.SS "Ftrace Debug"
+.TP
+\fB-f\fR
+Use ftrace to create device callgraphs (default: disabled). This can produce
+very large outputs, i.e. 10MB - 100MB.
+.TP
+\fB-maxdepth \fIlevel\fR
+limit the callgraph trace depth to \fIlevel\fR (default: 0=all). This is
+the best way to limit the output size when using callgraphs via -f.
+.TP
+\fB-expandcg\fR
+pre-expand the callgraph data in the html output (default: disabled)
+.TP
+\fB-fadd \fIfile\fR
+Add functions to be graphed in the timeline from a list in a text file
+.TP
+\fB-mincg \fIt\fR
+Discard all callgraphs shorter than \fIt\fR milliseconds (default: 0.0).
+This reduces the html file size as there can be many tiny callgraphs
+which are barely visible in the timeline.
+The value is a float: e.g. 0.001 represents 1 us.
+.TP
+\fB-cgphase \fIp\fR
+Only show callgraph data for phase \fIp\fR (e.g. suspend_late).
+.TP
+\fB-cgtest \fIn\fR
+In an x2 run, only show callgraph data for test \fIn\fR (e.g. 0 or 1).
+.TP
+\fB-timeprec \fIn\fR
+Number of significant digits in timestamps (0:S, [3:ms], 6:us).
+
+.SH COMMANDS
+.TP
+\fB-ftrace \fIfile\fR
+Create HTML output from an existing ftrace file.
+.TP
+\fB-dmesg \fIfile\fR
+Create HTML output from an existing dmesg file.
+.TP
+\fB-summary \fIindir\fR
+Create a summary page of all tests in \fIindir\fR. Creates summary.html
+in the current folder. The output page is a table of tests with
+suspend and resume values sorted by suspend mode, host, and kernel.
+Includes test averages by mode and links to the test html files.
+.TP
+\fB-modes\fR
+List available suspend modes.
+.TP
+\fB-status\fR
+Test to see if the system is able to run this tool. Use this along
+with any options you intend to use to see if they will work.
+.TP
+\fB-fpdt\fR
+Print out the contents of the ACPI Firmware Performance Data Table.
+.TP
+\fB-usbtopo\fR
+Print out the current USB topology with power info.
+.TP
+\fB-usbauto\fR
+Enable autosuspend for all connected USB devices.
+.TP
+\fB-flist\fR
+Print the list of ftrace functions currently being captured. Functions
+that are not available as symbols in the current kernel are shown in red.
+By default, the tool traces a list of important suspend/resume functions
+in order to better fill out the timeline. If the user has added their own
+with -fadd they will also be checked.
+.TP
+\fB-flistall\fR
+Print all ftrace functions capable of being captured. These are all the
+possible values you can add to trace via the -fadd argument.
+
+.SH EXAMPLES
+.SS "Simple Commands"
+Check which suspend modes are currently supported.
+.IP
+\f(CW$ sleepgraph -modes\fR
+.PP
+Read the Firmware Performance Data Table (FPDT)
+.IP
+\f(CW$ sudo sleepgraph -fpdt\fR
+.PP
+Print out the current USB power topology
+.IP
+\f(CW$ sleepgraph -usbtopo
+.PP
+Verify that you can run a command with a set of arguments
+.IP
+\f(CW$ sudo sleepgraph -f -rtcwake 30 -status
+.PP
+Generate a summary of all timelines in a particular folder.
+.IP
+\f(CW$ sleepgraph -summary ~/workspace/myresults/\fR
+.PP
+Re-generate the html output from a previous run's dmesg and ftrace log.
+.IP
+\f(CW$ sleepgraph -dmesg myhost_mem_dmesg.txt -ftrace myhost_mem_ftrace.txt\fR
+.PP
+
+.SS "Capturing Simple Timelines"
+Execute a mem suspend with a 15 second wakeup. Include the logs in the html.
+.IP
+\f(CW$ sudo sleepgraph -rtcwake 15 -addlogs\fR
+.PP
+Execute a standby with a 15 second wakeup. Change the output folder name.
+.IP
+\f(CW$ sudo sleepgraph -m standby -rtcwake 15 -o "standby-{hostname}-{date}-{time}"\fR
+.PP
+Execute a freeze with no wakeup (require keypress). Change output folder name.
+.IP
+\f(CW$ sudo sleepgraph -m freeze -rtcwake off -o "freeze-{hostname}-{date}-{time}"\fR
+.PP
+
+.SS "Capturing Advanced Timelines"
+Execute a suspend & include dev mode source calls, limit callbacks to 5ms or larger.
+.IP
+\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -dev -mindev 5\fR
+.PP
+Run two suspends back to back, include a 500ms delay before, after, and in between runs.
+.IP
+\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -x2 -predelay 500 -x2delay 500 -postdelay 500\fR
+.PP
+Do a batch run of 10 freezes with 30 seconds delay between runs.
+.IP
+\f(CW$ sudo sleepgraph -m freeze -rtcwake 15 -multi 10 30\fR
+.PP
+Execute a suspend using a custom command.
+.IP
+\f(CW$ sudo sleepgraph -cmd "echo mem > /sys/power/state" -rtcwake 15\fR
+.PP
+
+
+.SS "Capturing Timelines with Callgraph Data"
+Add device callgraphs. Limit the trace depth and only show callgraphs 10ms or larger.
+.IP
+\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -f -maxdepth 5 -mincg 10\fR
+.PP
+Capture a full callgraph across all suspend, then filter the html by a single phase.
+.IP
+\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -f\fR
+.IP
+\f(CW$ sleepgraph -dmesg host_mem_dmesg.txt -ftrace host_mem_ftrace.txt -f -cgphase resume
+.PP
+
+.SH "SEE ALSO"
+dmesg(1)
+.PP
+.SH AUTHOR
+.nf
+Written by Todd Brandt <todd.e.brandt@linux.intel.com>

From 72ec2e17f9f7f7f6082ec57183080726eb3523e3 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 10 Apr 2017 13:38:24 +0200
Subject: [PATCH 55/56] PM / runtime: Fix autosuspend documentation

Update the autosuspend documentation which claimed that the autosuspend
delay is not taken into account when using the non-autosuspend helper
functions, something which is no longer true since commit d66e6db28df3
("PM / Runtime: Respect autosuspend when idle triggers suspend").

This specifically means that drivers must now disable autosuspend before
disabling runtime pm in probe error paths and remove callbacks if
pm_runtime_put_sync was being used to suspend the device before
returning. (If an idle callback can prevent suspend,
pm_runtime_put_sync_suspend must be used instead of pm_runtime_put_sync
as before.)

Also remove the claim that the autosuspend helpers behave "just like
the non-autosuspend counterparts", something which have never really
been true as some of the latter use idle notifications.

Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/runtime_pm.txt | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 64546eb9a16a..56a13b865927 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -836,9 +836,8 @@ of the non-autosuspend counterparts:
 	Instead of: pm_runtime_put_sync   use: pm_runtime_put_sync_autosuspend.
 
 Drivers may also continue to use the non-autosuspend helper functions; they
-will behave normally, not taking the autosuspend delay into account.
-Similarly, if the power.use_autosuspend field isn't set then the autosuspend
-helper functions will behave just like the non-autosuspend counterparts.
+will behave normally, which means sometimes taking the autosuspend delay into
+account (see pm_runtime_idle).
 
 Under some circumstances a driver or subsystem may want to prevent a device
 from autosuspending immediately, even though the usage counter is zero and the

From bafdcde73be7d462129cecfc339d3a6abcba91d2 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 10 Apr 2017 13:38:25 +0200
Subject: [PATCH 56/56] PM / runtime: Document autosuspend-helper side effects

Document the fact that the autosuspend delay and enable helpers may
change the power.usage_count and resume or suspend a device depending on
the values of power.autosuspend_delay and power.use_autosuspend.

Note that this means that a driver must disable autosuspend before
disabling runtime pm on probe errors and on driver unbind if the device
is to be suspended upon return (as a negative delay may otherwise keep
the device resumed).

Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/runtime_pm.txt | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 56a13b865927..ee69d7532172 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -478,15 +478,23 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
     - set the power.last_busy field to the current time
 
   void pm_runtime_use_autosuspend(struct device *dev);
-    - set the power.use_autosuspend flag, enabling autosuspend delays
+    - set the power.use_autosuspend flag, enabling autosuspend delays; call
+      pm_runtime_get_sync if the flag was previously cleared and
+      power.autosuspend_delay is negative
 
   void pm_runtime_dont_use_autosuspend(struct device *dev);
-    - clear the power.use_autosuspend flag, disabling autosuspend delays
+    - clear the power.use_autosuspend flag, disabling autosuspend delays;
+      decrement the device's usage counter if the flag was previously set and
+      power.autosuspend_delay is negative; call pm_runtime_idle
 
   void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
     - set the power.autosuspend_delay value to 'delay' (expressed in
       milliseconds); if 'delay' is negative then runtime suspends are
-      prevented
+      prevented; if power.use_autosuspend is set, pm_runtime_get_sync may be
+      called or the device's usage counter may be decremented and
+      pm_runtime_idle called depending on if power.autosuspend_delay is
+      changed to or from a negative value; if power.use_autosuspend is clear,
+      pm_runtime_idle is called
 
   unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
     - calculate the time when the current autosuspend delay period will expire,