diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt
index 723e1ad937da..940707d095cc 100644
--- a/Documentation/devicetree/bindings/power/power_domain.txt
+++ b/Documentation/devicetree/bindings/power/power_domain.txt
@@ -31,7 +31,9 @@ Optional properties:
 
 - domain-idle-states : A phandle of an idle-state that shall be soaked into a
                 generic domain power state. The idle state definitions are
-                compatible with domain-idle-state specified in [1].
+                compatible with domain-idle-state specified in [1]. phandles
+                that are not compatible with domain-idle-state will be
+                ignored.
   The domain-idle-state property reflects the idle state of this PM domain and
   not the idle states of the devices or sub-domains in the PM domain. Devices
   and sub-domains have their own idle-states independent of the parent
diff --git a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
index d23dc002a87e..d3a5a93a65cd 100644
--- a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
+++ b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
@@ -33,6 +33,7 @@ Required properties:
 - compatible: should be one of:
   - "rockchip,rk3188-io-voltage-domain" for rk3188
   - "rockchip,rk3288-io-voltage-domain" for rk3288
+  - "rockchip,rk3328-io-voltage-domain" for rk3328
   - "rockchip,rk3368-io-voltage-domain" for rk3368
   - "rockchip,rk3368-pmu-io-voltage-domain" for rk3368 pmu-domains
   - "rockchip,rk3399-io-voltage-domain" for rk3399
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 64546eb9a16a..ee69d7532172 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -478,15 +478,23 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
     - set the power.last_busy field to the current time
 
   void pm_runtime_use_autosuspend(struct device *dev);
-    - set the power.use_autosuspend flag, enabling autosuspend delays
+    - set the power.use_autosuspend flag, enabling autosuspend delays; call
+      pm_runtime_get_sync if the flag was previously cleared and
+      power.autosuspend_delay is negative
 
   void pm_runtime_dont_use_autosuspend(struct device *dev);
-    - clear the power.use_autosuspend flag, disabling autosuspend delays
+    - clear the power.use_autosuspend flag, disabling autosuspend delays;
+      decrement the device's usage counter if the flag was previously set and
+      power.autosuspend_delay is negative; call pm_runtime_idle
 
   void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
     - set the power.autosuspend_delay value to 'delay' (expressed in
       milliseconds); if 'delay' is negative then runtime suspends are
-      prevented
+      prevented; if power.use_autosuspend is set, pm_runtime_get_sync may be
+      called or the device's usage counter may be decremented and
+      pm_runtime_idle called depending on if power.autosuspend_delay is
+      changed to or from a negative value; if power.use_autosuspend is clear,
+      pm_runtime_idle is called
 
   unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
     - calculate the time when the current autosuspend delay period will expire,
@@ -836,9 +844,8 @@ of the non-autosuspend counterparts:
 	Instead of: pm_runtime_put_sync   use: pm_runtime_put_sync_autosuspend.
 
 Drivers may also continue to use the non-autosuspend helper functions; they
-will behave normally, not taking the autosuspend delay into account.
-Similarly, if the power.use_autosuspend field isn't set then the autosuspend
-helper functions will behave just like the non-autosuspend counterparts.
+will behave normally, which means sometimes taking the autosuspend delay into
+account (see pm_runtime_idle).
 
 Under some circumstances a driver or subsystem may want to prevent a device
 from autosuspending immediately, even though the usage counter is zero and the
diff --git a/MAINTAINERS b/MAINTAINERS
index 6e8897594291..0f6df84fec54 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3471,6 +3471,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
 T:	git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates)
 B:	https://bugzilla.kernel.org
 F:	Documentation/cpu-freq/
+F:	Documentation/devicetree/bindings/cpufreq/
 F:	drivers/cpufreq/
 F:	include/linux/cpufreq.h
 F:	tools/testing/selftests/cpufreq/
diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi
index 162e1eb5373d..6c5affe2d0f5 100644
--- a/arch/arm/boot/dts/ste-dbx5x0.dtsi
+++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi
@@ -1189,11 +1189,6 @@
 			status = "disabled";
 		};
 
-		cpufreq-cooling {
-			compatible = "stericsson,db8500-cpufreq-cooling";
-			status = "disabled";
-		};
-
 		mcde@a0350000 {
 			compatible = "stericsson,mcde";
 			reg = <0xa0350000 0x1000>, /* MCDE */
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e697dec9d25b..ad196427b4f2 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -121,7 +121,9 @@ static const struct genpd_lock_ops genpd_spin_ops = {
 #define genpd_lock_interruptible(p)	p->lock_ops->lock_interruptible(p)
 #define genpd_unlock(p)			p->lock_ops->unlock(p)
 
+#define genpd_status_on(genpd)		(genpd->status == GPD_STATE_ACTIVE)
 #define genpd_is_irq_safe(genpd)	(genpd->flags & GENPD_FLAG_IRQ_SAFE)
+#define genpd_is_always_on(genpd)	(genpd->flags & GENPD_FLAG_ALWAYS_ON)
 
 static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
 		struct generic_pm_domain *genpd)
@@ -130,8 +132,12 @@ static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
 
 	ret = pm_runtime_is_irq_safe(dev) && !genpd_is_irq_safe(genpd);
 
-	/* Warn once if IRQ safe dev in no sleep domain */
-	if (ret)
+	/*
+	 * Warn once if an IRQ safe device is attached to a no sleep domain, as
+	 * to indicate a suboptimal configuration for PM. For an always on
+	 * domain this isn't case, thus don't warn.
+	 */
+	if (ret && !genpd_is_always_on(genpd))
 		dev_warn_once(dev, "PM domain %s will not be powered off\n",
 				genpd->name);
 
@@ -296,11 +302,15 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
 	 * (1) The domain is already in the "power off" state.
 	 * (2) System suspend is in progress.
 	 */
-	if (genpd->status == GPD_STATE_POWER_OFF
-	    || genpd->prepared_count > 0)
+	if (!genpd_status_on(genpd) || genpd->prepared_count > 0)
 		return 0;
 
-	if (atomic_read(&genpd->sd_count) > 0)
+	/*
+	 * Abort power off for the PM domain in the following situations:
+	 * (1) The domain is configured as always on.
+	 * (2) When the domain has a subdomain being powered on.
+	 */
+	if (genpd_is_always_on(genpd) || atomic_read(&genpd->sd_count) > 0)
 		return -EBUSY;
 
 	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
@@ -373,7 +383,7 @@ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth)
 	struct gpd_link *link;
 	int ret = 0;
 
-	if (genpd->status == GPD_STATE_ACTIVE)
+	if (genpd_status_on(genpd))
 		return 0;
 
 	/*
@@ -752,7 +762,7 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
 {
 	struct gpd_link *link;
 
-	if (genpd->status == GPD_STATE_POWER_OFF)
+	if (!genpd_status_on(genpd) || genpd_is_always_on(genpd))
 		return;
 
 	if (genpd->suspended_count != genpd->device_count
@@ -761,7 +771,8 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
 
 	/* Choose the deepest state when suspending */
 	genpd->state_idx = genpd->state_count - 1;
-	_genpd_power_off(genpd, false);
+	if (_genpd_power_off(genpd, false))
+		return;
 
 	genpd->status = GPD_STATE_POWER_OFF;
 
@@ -793,7 +804,7 @@ static void genpd_sync_power_on(struct generic_pm_domain *genpd, bool use_lock,
 {
 	struct gpd_link *link;
 
-	if (genpd->status == GPD_STATE_ACTIVE)
+	if (genpd_status_on(genpd))
 		return;
 
 	list_for_each_entry(link, &genpd->slave_links, slave_node) {
@@ -1329,8 +1340,7 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd,
 	genpd_lock(subdomain);
 	genpd_lock_nested(genpd, SINGLE_DEPTH_NESTING);
 
-	if (genpd->status == GPD_STATE_POWER_OFF
-	    &&  subdomain->status != GPD_STATE_POWER_OFF) {
+	if (!genpd_status_on(genpd) && genpd_status_on(subdomain)) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -1346,7 +1356,7 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd,
 	list_add_tail(&link->master_node, &genpd->master_links);
 	link->slave = subdomain;
 	list_add_tail(&link->slave_node, &subdomain->slave_links);
-	if (subdomain->status != GPD_STATE_POWER_OFF)
+	if (genpd_status_on(subdomain))
 		genpd_sd_counter_inc(genpd);
 
  out:
@@ -1406,7 +1416,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 		list_del(&link->master_node);
 		list_del(&link->slave_node);
 		kfree(link);
-		if (subdomain->status != GPD_STATE_POWER_OFF)
+		if (genpd_status_on(subdomain))
 			genpd_sd_counter_dec(genpd);
 
 		ret = 0;
@@ -1492,6 +1502,10 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
 		genpd->dev_ops.start = pm_clk_resume;
 	}
 
+	/* Always-on domains must be powered on at initialization. */
+	if (genpd_is_always_on(genpd) && !genpd_status_on(genpd))
+		return -EINVAL;
+
 	/* Use only one "off" state if there were no states declared */
 	if (genpd->state_count == 0) {
 		ret = genpd_set_default_power_state(genpd);
@@ -1700,12 +1714,12 @@ int of_genpd_add_provider_simple(struct device_node *np,
 
 	mutex_lock(&gpd_list_lock);
 
-	if (pm_genpd_present(genpd))
+	if (pm_genpd_present(genpd)) {
 		ret = genpd_add_provider(np, genpd_xlate_simple, genpd);
-
-	if (!ret) {
-		genpd->provider = &np->fwnode;
-		genpd->has_provider = true;
+		if (!ret) {
+			genpd->provider = &np->fwnode;
+			genpd->has_provider = true;
+		}
 	}
 
 	mutex_unlock(&gpd_list_lock);
@@ -2079,11 +2093,6 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state,
 	int err;
 	u32 residency;
 	u32 entry_latency, exit_latency;
-	const struct of_device_id *match_id;
-
-	match_id = of_match_node(idle_state_match, state_node);
-	if (!match_id)
-		return -EINVAL;
 
 	err = of_property_read_u32(state_node, "entry-latency-us",
 						&entry_latency);
@@ -2132,6 +2141,7 @@ int of_genpd_parse_idle_states(struct device_node *dn,
 	int err, ret;
 	int count;
 	struct of_phandle_iterator it;
+	const struct of_device_id *match_id;
 
 	count = of_count_phandle_with_args(dn, "domain-idle-states", NULL);
 	if (count <= 0)
@@ -2144,6 +2154,9 @@ int of_genpd_parse_idle_states(struct device_node *dn,
 	/* Loop over the phandles until all the requested entry is found */
 	of_for_each_phandle(&it, err, dn, "domain-idle-states", NULL, 0) {
 		np = it.node;
+		match_id = of_match_node(idle_state_match, np);
+		if (!match_id)
+			continue;
 		ret = genpd_parse_state(&st[i++], np);
 		if (ret) {
 			pr_err
@@ -2155,8 +2168,11 @@ int of_genpd_parse_idle_states(struct device_node *dn,
 		}
 	}
 
-	*n = count;
-	*states = st;
+	*n = i;
+	if (!i)
+		kfree(st);
+	else
+		*states = st;
 
 	return 0;
 }
@@ -2221,7 +2237,7 @@ static int pm_genpd_summary_one(struct seq_file *s,
 
 	if (WARN_ON(genpd->status >= ARRAY_SIZE(status_lookup)))
 		goto exit;
-	if (genpd->status == GPD_STATE_POWER_OFF)
+	if (!genpd_status_on(genpd))
 		snprintf(state, sizeof(state), "%s-%u",
 			 status_lookup[genpd->status], genpd->state_idx);
 	else
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 74fa5c5904d3..74ed7e9a7f27 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -247,6 +247,12 @@ config ARM_TEGRA124_CPUFREQ
 	help
 	  This adds the CPUFreq driver support for Tegra124 SOCs.
 
+config ARM_TEGRA186_CPUFREQ
+	tristate "Tegra186 CPUFreq support"
+	depends on ARCH_TEGRA && TEGRA_BPMP
+	help
+	  This adds the CPUFreq driver support for Tegra186 SOCs.
+
 config ARM_TI_CPUFREQ
 	bool "Texas Instruments CPUFreq support"
 	depends on ARCH_OMAP2PLUS
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 9f5a8045f36d..b7e78f063c4f 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_ARM_SPEAR_CPUFREQ)		+= spear-cpufreq.o
 obj-$(CONFIG_ARM_STI_CPUFREQ)		+= sti-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA20_CPUFREQ)	+= tegra20-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA124_CPUFREQ)	+= tegra124-cpufreq.o
+obj-$(CONFIG_ARM_TEGRA186_CPUFREQ)	+= tegra186-cpufreq.o
 obj-$(CONFIG_ARM_TI_CPUFREQ)		+= ti-cpufreq.o
 obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ)	+= vexpress-spc-cpufreq.o
 obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o
diff --git a/drivers/cpufreq/dbx500-cpufreq.c b/drivers/cpufreq/dbx500-cpufreq.c
index 5c3ec1dd4921..3575b82210ba 100644
--- a/drivers/cpufreq/dbx500-cpufreq.c
+++ b/drivers/cpufreq/dbx500-cpufreq.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/cpufreq.h>
+#include <linux/cpu_cooling.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
@@ -18,6 +19,7 @@
 
 static struct cpufreq_frequency_table *freq_table;
 static struct clk *armss_clk;
+static struct thermal_cooling_device *cdev;
 
 static int dbx500_cpufreq_target(struct cpufreq_policy *policy,
 				unsigned int index)
@@ -32,6 +34,22 @@ static int dbx500_cpufreq_init(struct cpufreq_policy *policy)
 	return cpufreq_generic_init(policy, freq_table, 20 * 1000);
 }
 
+static int dbx500_cpufreq_exit(struct cpufreq_policy *policy)
+{
+	if (!IS_ERR(cdev))
+		cpufreq_cooling_unregister(cdev);
+	return 0;
+}
+
+static void dbx500_cpufreq_ready(struct cpufreq_policy *policy)
+{
+	cdev = cpufreq_cooling_register(policy->cpus);
+	if (IS_ERR(cdev))
+		pr_err("Failed to register cooling device %ld\n", PTR_ERR(cdev));
+	else
+		pr_info("Cooling device registered: %s\n", cdev->type);
+}
+
 static struct cpufreq_driver dbx500_cpufreq_driver = {
 	.flags  = CPUFREQ_STICKY | CPUFREQ_CONST_LOOPS |
 			CPUFREQ_NEED_INITIAL_FREQ_CHECK,
@@ -39,6 +57,8 @@ static struct cpufreq_driver dbx500_cpufreq_driver = {
 	.target_index = dbx500_cpufreq_target,
 	.get    = cpufreq_generic_get,
 	.init   = dbx500_cpufreq_init,
+	.exit  = dbx500_cpufreq_exit,
+	.ready  = dbx500_cpufreq_ready,
 	.name   = "DBX500",
 	.attr   = cpufreq_generic_attr,
 };
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 7719b02e04f5..9c13f097fd8c 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -161,8 +161,13 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index)
 
 static int imx6q_cpufreq_init(struct cpufreq_policy *policy)
 {
+	int ret;
+
 	policy->clk = arm_clk;
-	return cpufreq_generic_init(policy, freq_table, transition_latency);
+	ret = cpufreq_generic_init(policy, freq_table, transition_latency);
+	policy->suspend_freq = policy->max;
+
+	return ret;
 }
 
 static struct cpufreq_driver imx6q_cpufreq_driver = {
@@ -173,6 +178,7 @@ static struct cpufreq_driver imx6q_cpufreq_driver = {
 	.init = imx6q_cpufreq_init,
 	.name = "imx6q-cpufreq",
 	.attr = cpufreq_generic_attr,
+	.suspend = cpufreq_generic_suspend,
 };
 
 static int imx6q_cpufreq_probe(struct platform_device *pdev)
@@ -222,6 +228,13 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 	arm_reg = regulator_get(cpu_dev, "arm");
 	pu_reg = regulator_get_optional(cpu_dev, "pu");
 	soc_reg = regulator_get(cpu_dev, "soc");
+	if (PTR_ERR(arm_reg) == -EPROBE_DEFER ||
+			PTR_ERR(soc_reg) == -EPROBE_DEFER ||
+			PTR_ERR(pu_reg) == -EPROBE_DEFER) {
+		ret = -EPROBE_DEFER;
+		dev_dbg(cpu_dev, "regulators not ready, defer\n");
+		goto put_reg;
+	}
 	if (IS_ERR(arm_reg) || IS_ERR(soc_reg)) {
 		dev_err(cpu_dev, "failed to get regulators\n");
 		ret = -ENOENT;
@@ -255,7 +268,7 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
 		dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
-		goto put_reg;
+		goto out_free_opp;
 	}
 
 	/* Make imx6_soc_volt array's size same as arm opp number */
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 283491f742d3..b7de5bd76a31 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -37,7 +37,11 @@
 #include <asm/cpufeature.h>
 #include <asm/intel-family.h>
 
+#define INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL	(10 * NSEC_PER_MSEC)
+#define INTEL_PSTATE_HWP_SAMPLING_INTERVAL	(50 * NSEC_PER_MSEC)
+
 #define INTEL_CPUFREQ_TRANSITION_LATENCY	20000
+#define INTEL_CPUFREQ_TRANSITION_DELAY		500
 
 #ifdef CONFIG_ACPI
 #include <acpi/processor.h>
@@ -74,6 +78,11 @@ static inline int ceiling_fp(int32_t x)
 	return ret;
 }
 
+static inline int32_t percent_fp(int percent)
+{
+	return div_fp(percent, 100);
+}
+
 static inline u64 mul_ext_fp(u64 x, u64 y)
 {
 	return (x * y) >> EXT_FRAC_BITS;
@@ -186,45 +195,22 @@ struct _pid {
 };
 
 /**
- * struct perf_limits - Store user and policy limits
- * @no_turbo:		User requested turbo state from intel_pstate sysfs
- * @turbo_disabled:	Platform turbo status either from msr
- *			MSR_IA32_MISC_ENABLE or when maximum available pstate
- *			matches the maximum turbo pstate
- * @max_perf_pct:	Effective maximum performance limit in percentage, this
- *			is minimum of either limits enforced by cpufreq policy
- *			or limits from user set limits via intel_pstate sysfs
- * @min_perf_pct:	Effective minimum performance limit in percentage, this
- *			is maximum of either limits enforced by cpufreq policy
- *			or limits from user set limits via intel_pstate sysfs
- * @max_perf:		This is a scaled value between 0 to 255 for max_perf_pct
- *			This value is used to limit max pstate
- * @min_perf:		This is a scaled value between 0 to 255 for min_perf_pct
- *			This value is used to limit min pstate
- * @max_policy_pct:	The maximum performance in percentage enforced by
- *			cpufreq setpolicy interface
- * @max_sysfs_pct:	The maximum performance in percentage enforced by
- *			intel pstate sysfs interface, unused when per cpu
- *			controls are enforced
- * @min_policy_pct:	The minimum performance in percentage enforced by
- *			cpufreq setpolicy interface
- * @min_sysfs_pct:	The minimum performance in percentage enforced by
- *			intel pstate sysfs interface, unused when per cpu
- *			controls are enforced
- *
- * Storage for user and policy defined limits.
+ * struct global_params - Global parameters, mostly tunable via sysfs.
+ * @no_turbo:		Whether or not to use turbo P-states.
+ * @turbo_disabled:	Whethet or not turbo P-states are available at all,
+ *			based on the MSR_IA32_MISC_ENABLE value and whether or
+ *			not the maximum reported turbo P-state is different from
+ *			the maximum reported non-turbo one.
+ * @min_perf_pct:	Minimum capacity limit in percent of the maximum turbo
+ *			P-state capacity.
+ * @max_perf_pct:	Maximum capacity limit in percent of the maximum turbo
+ *			P-state capacity.
  */
-struct perf_limits {
-	int no_turbo;
-	int turbo_disabled;
+struct global_params {
+	bool no_turbo;
+	bool turbo_disabled;
 	int max_perf_pct;
 	int min_perf_pct;
-	int32_t max_perf;
-	int32_t min_perf;
-	int max_policy_pct;
-	int max_sysfs_pct;
-	int min_policy_pct;
-	int min_sysfs_pct;
 };
 
 /**
@@ -245,9 +231,10 @@ struct perf_limits {
  * @prev_cummulative_iowait: IO Wait time difference from last and
  *			current sample
  * @sample:		Storage for storing last Sample data
- * @perf_limits:	Pointer to perf_limit unique to this CPU
- *			Not all field in the structure are applicable
- *			when per cpu controls are enforced
+ * @min_perf:		Minimum capacity limit as a fraction of the maximum
+ *			turbo P-state capacity.
+ * @max_perf:		Maximum capacity limit as a fraction of the maximum
+ *			turbo P-state capacity.
  * @acpi_perf_data:	Stores ACPI perf information read from _PSS
  * @valid_pss_table:	Set to true for valid ACPI _PSS entries found
  * @epp_powersave:	Last saved HWP energy performance preference
@@ -279,7 +266,8 @@ struct cpudata {
 	u64	prev_tsc;
 	u64	prev_cummulative_iowait;
 	struct sample sample;
-	struct perf_limits *perf_limits;
+	int32_t	min_perf;
+	int32_t	max_perf;
 #ifdef CONFIG_ACPI
 	struct acpi_processor_performance acpi_perf_data;
 	bool valid_pss_table;
@@ -324,7 +312,7 @@ struct pstate_adjust_policy {
  * @get_scaling:	Callback to get frequency scaling factor
  * @get_val:		Callback to convert P state to actual MSR write value
  * @get_vid:		Callback to get VID data for Atom platforms
- * @get_target_pstate:	Callback to a function to calculate next P state to use
+ * @update_util:	Active mode utilization update callback.
  *
  * Core and Atom CPU models have different way to get P State limits. This
  * structure is used to store those callbacks.
@@ -337,43 +325,31 @@ struct pstate_funcs {
 	int (*get_scaling)(void);
 	u64 (*get_val)(struct cpudata*, int pstate);
 	void (*get_vid)(struct cpudata *);
-	int32_t (*get_target_pstate)(struct cpudata *);
+	void (*update_util)(struct update_util_data *data, u64 time,
+			    unsigned int flags);
 };
 
-/**
- * struct cpu_defaults- Per CPU model default config data
- * @pid_policy:	PID config data
- * @funcs:		Callback function data
- */
-struct cpu_defaults {
-	struct pstate_adjust_policy pid_policy;
-	struct pstate_funcs funcs;
-};
-
-static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu);
-static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
-
-static struct pstate_adjust_policy pid_params __read_mostly;
 static struct pstate_funcs pstate_funcs __read_mostly;
+static struct pstate_adjust_policy pid_params __read_mostly = {
+	.sample_rate_ms = 10,
+	.sample_rate_ns = 10 * NSEC_PER_MSEC,
+	.deadband = 0,
+	.setpoint = 97,
+	.p_gain_pct = 20,
+	.d_gain_pct = 0,
+	.i_gain_pct = 0,
+};
+
 static int hwp_active __read_mostly;
 static bool per_cpu_limits __read_mostly;
 
-static bool driver_registered __read_mostly;
+static struct cpufreq_driver *intel_pstate_driver __read_mostly;
 
 #ifdef CONFIG_ACPI
 static bool acpi_ppc;
 #endif
 
-static struct perf_limits global;
-
-static void intel_pstate_init_limits(struct perf_limits *limits)
-{
-	memset(limits, 0, sizeof(*limits));
-	limits->max_perf_pct = 100;
-	limits->max_perf = int_ext_tofp(1);
-	limits->max_policy_pct = 100;
-	limits->max_sysfs_pct = 100;
-}
+static struct global_params global;
 
 static DEFINE_MUTEX(intel_pstate_driver_lock);
 static DEFINE_MUTEX(intel_pstate_limits_lock);
@@ -530,29 +506,6 @@ static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 }
 #endif
 
-static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
-			     int deadband, int integral) {
-	pid->setpoint = int_tofp(setpoint);
-	pid->deadband  = int_tofp(deadband);
-	pid->integral  = int_tofp(integral);
-	pid->last_err  = int_tofp(setpoint) - int_tofp(busy);
-}
-
-static inline void pid_p_gain_set(struct _pid *pid, int percent)
-{
-	pid->p_gain = div_fp(percent, 100);
-}
-
-static inline void pid_i_gain_set(struct _pid *pid, int percent)
-{
-	pid->i_gain = div_fp(percent, 100);
-}
-
-static inline void pid_d_gain_set(struct _pid *pid, int percent)
-{
-	pid->d_gain = div_fp(percent, 100);
-}
-
 static signed int pid_calc(struct _pid *pid, int32_t busy)
 {
 	signed int result;
@@ -590,23 +543,17 @@ static signed int pid_calc(struct _pid *pid, int32_t busy)
 	return (signed int)fp_toint(result);
 }
 
-static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
+static inline void intel_pstate_pid_reset(struct cpudata *cpu)
 {
-	pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct);
-	pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct);
-	pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct);
+	struct _pid *pid = &cpu->pid;
 
-	pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
-}
-
-static inline void intel_pstate_reset_all_pid(void)
-{
-	unsigned int cpu;
-
-	for_each_online_cpu(cpu) {
-		if (all_cpu_data[cpu])
-			intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
-	}
+	pid->p_gain = percent_fp(pid_params.p_gain_pct);
+	pid->d_gain = percent_fp(pid_params.d_gain_pct);
+	pid->i_gain = percent_fp(pid_params.i_gain_pct);
+	pid->setpoint = int_tofp(pid_params.setpoint);
+	pid->last_err  = pid->setpoint - int_tofp(100);
+	pid->deadband  = int_tofp(pid_params.deadband);
+	pid->integral  = 0;
 }
 
 static inline void update_turbo_state(void)
@@ -621,6 +568,14 @@ static inline void update_turbo_state(void)
 		 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
 }
 
+static int min_perf_pct_min(void)
+{
+	struct cpudata *cpu = all_cpu_data[0];
+
+	return DIV_ROUND_UP(cpu->pstate.min_pstate * 100,
+			    cpu->pstate.turbo_pstate);
+}
+
 static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
 {
 	u64 epb;
@@ -838,96 +793,80 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
 	NULL,
 };
 
-static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
+static void intel_pstate_hwp_set(unsigned int cpu)
 {
-	int min, hw_min, max, hw_max, cpu;
-	struct perf_limits *perf_limits = &global;
+	struct cpudata *cpu_data = all_cpu_data[cpu];
+	int min, hw_min, max, hw_max;
 	u64 value, cap;
+	s16 epp;
 
-	for_each_cpu(cpu, policy->cpus) {
-		struct cpudata *cpu_data = all_cpu_data[cpu];
-		s16 epp;
+	rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
+	hw_min = HWP_LOWEST_PERF(cap);
+	if (global.no_turbo)
+		hw_max = HWP_GUARANTEED_PERF(cap);
+	else
+		hw_max = HWP_HIGHEST_PERF(cap);
 
-		if (per_cpu_limits)
-			perf_limits = all_cpu_data[cpu]->perf_limits;
+	max = fp_ext_toint(hw_max * cpu_data->max_perf);
+	if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
+		min = max;
+	else
+		min = fp_ext_toint(hw_max * cpu_data->min_perf);
 
-		rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
-		hw_min = HWP_LOWEST_PERF(cap);
-		if (global.no_turbo)
-			hw_max = HWP_GUARANTEED_PERF(cap);
-		else
-			hw_max = HWP_HIGHEST_PERF(cap);
+	rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 
-		max = fp_ext_toint(hw_max * perf_limits->max_perf);
-		if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
-			min = max;
-		else
-			min = fp_ext_toint(hw_max * perf_limits->min_perf);
+	value &= ~HWP_MIN_PERF(~0L);
+	value |= HWP_MIN_PERF(min);
 
-		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
+	value &= ~HWP_MAX_PERF(~0L);
+	value |= HWP_MAX_PERF(max);
 
-		value &= ~HWP_MIN_PERF(~0L);
-		value |= HWP_MIN_PERF(min);
+	if (cpu_data->epp_policy == cpu_data->policy)
+		goto skip_epp;
 
-		value &= ~HWP_MAX_PERF(~0L);
-		value |= HWP_MAX_PERF(max);
+	cpu_data->epp_policy = cpu_data->policy;
 
-		if (cpu_data->epp_policy == cpu_data->policy)
+	if (cpu_data->epp_saved >= 0) {
+		epp = cpu_data->epp_saved;
+		cpu_data->epp_saved = -EINVAL;
+		goto update_epp;
+	}
+
+	if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
+		epp = intel_pstate_get_epp(cpu_data, value);
+		cpu_data->epp_powersave = epp;
+		/* If EPP read was failed, then don't try to write */
+		if (epp < 0)
 			goto skip_epp;
 
-		cpu_data->epp_policy = cpu_data->policy;
+		epp = 0;
+	} else {
+		/* skip setting EPP, when saved value is invalid */
+		if (cpu_data->epp_powersave < 0)
+			goto skip_epp;
 
-		if (cpu_data->epp_saved >= 0) {
-			epp = cpu_data->epp_saved;
-			cpu_data->epp_saved = -EINVAL;
-			goto update_epp;
-		}
+		/*
+		 * No need to restore EPP when it is not zero. This
+		 * means:
+		 *  - Policy is not changed
+		 *  - user has manually changed
+		 *  - Error reading EPB
+		 */
+		epp = intel_pstate_get_epp(cpu_data, value);
+		if (epp)
+			goto skip_epp;
 
-		if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
-			epp = intel_pstate_get_epp(cpu_data, value);
-			cpu_data->epp_powersave = epp;
-			/* If EPP read was failed, then don't try to write */
-			if (epp < 0)
-				goto skip_epp;
-
-
-			epp = 0;
-		} else {
-			/* skip setting EPP, when saved value is invalid */
-			if (cpu_data->epp_powersave < 0)
-				goto skip_epp;
-
-			/*
-			 * No need to restore EPP when it is not zero. This
-			 * means:
-			 *  - Policy is not changed
-			 *  - user has manually changed
-			 *  - Error reading EPB
-			 */
-			epp = intel_pstate_get_epp(cpu_data, value);
-			if (epp)
-				goto skip_epp;
-
-			epp = cpu_data->epp_powersave;
-		}
-update_epp:
-		if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
-			value &= ~GENMASK_ULL(31, 24);
-			value |= (u64)epp << 24;
-		} else {
-			intel_pstate_set_epb(cpu, epp);
-		}
-skip_epp:
-		wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
+		epp = cpu_data->epp_powersave;
 	}
-}
-
-static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy)
-{
-	if (hwp_active)
-		intel_pstate_hwp_set(policy);
-
-	return 0;
+update_epp:
+	if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+		value &= ~GENMASK_ULL(31, 24);
+		value |= (u64)epp << 24;
+	} else {
+		intel_pstate_set_epb(cpu, epp);
+	}
+skip_epp:
+	wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
 }
 
 static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
@@ -944,20 +883,17 @@ static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
 
 static int intel_pstate_resume(struct cpufreq_policy *policy)
 {
-	int ret;
-
 	if (!hwp_active)
 		return 0;
 
 	mutex_lock(&intel_pstate_limits_lock);
 
 	all_cpu_data[policy->cpu]->epp_policy = 0;
-
-	ret = intel_pstate_hwp_set_policy(policy);
+	intel_pstate_hwp_set(policy->cpu);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
-	return ret;
+	return 0;
 }
 
 static void intel_pstate_update_policies(void)
@@ -971,9 +907,14 @@ static void intel_pstate_update_policies(void)
 /************************** debugfs begin ************************/
 static int pid_param_set(void *data, u64 val)
 {
+	unsigned int cpu;
+
 	*(u32 *)data = val;
 	pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
-	intel_pstate_reset_all_pid();
+	for_each_possible_cpu(cpu)
+		if (all_cpu_data[cpu])
+			intel_pstate_pid_reset(all_cpu_data[cpu]);
+
 	return 0;
 }
 
@@ -1084,7 +1025,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
@@ -1109,7 +1050,7 @@ static ssize_t show_num_pstates(struct kobject *kobj,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
@@ -1129,7 +1070,7 @@ static ssize_t show_no_turbo(struct kobject *kobj,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
@@ -1157,7 +1098,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
@@ -1174,6 +1115,15 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 
 	global.no_turbo = clamp_t(int, input, 0, 1);
 
+	if (global.no_turbo) {
+		struct cpudata *cpu = all_cpu_data[0];
+		int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate;
+
+		/* Squash the global minimum into the permitted range. */
+		if (global.min_perf_pct > pct)
+			global.min_perf_pct = pct;
+	}
+
 	mutex_unlock(&intel_pstate_limits_lock);
 
 	intel_pstate_update_policies();
@@ -1195,18 +1145,14 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
 
 	mutex_lock(&intel_pstate_limits_lock);
 
-	global.max_sysfs_pct = clamp_t(int, input, 0 , 100);
-	global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct);
-	global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct);
-	global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct);
-	global.max_perf = percent_ext_fp(global.max_perf_pct);
+	global.max_perf_pct = clamp_t(int, input, global.min_perf_pct, 100);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
@@ -1229,18 +1175,15 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
 
 	mutex_lock(&intel_pstate_limits_lock);
 
-	global.min_sysfs_pct = clamp_t(int, input, 0 , 100);
-	global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct);
-	global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct);
-	global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct);
-	global.min_perf = percent_ext_fp(global.min_perf_pct);
+	global.min_perf_pct = clamp_t(int, input,
+				      min_perf_pct_min(), global.max_perf_pct);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
@@ -1554,132 +1497,10 @@ static int knl_get_turbo_pstate(void)
 	return ret;
 }
 
-static struct cpu_defaults core_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 97,
-		.p_gain_pct = 20,
-		.d_gain_pct = 0,
-		.i_gain_pct = 0,
-	},
-	.funcs = {
-		.get_max = core_get_max_pstate,
-		.get_max_physical = core_get_max_pstate_physical,
-		.get_min = core_get_min_pstate,
-		.get_turbo = core_get_turbo_pstate,
-		.get_scaling = core_get_scaling,
-		.get_val = core_get_val,
-		.get_target_pstate = get_target_pstate_use_performance,
-	},
-};
-
-static const struct cpu_defaults silvermont_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 60,
-		.p_gain_pct = 14,
-		.d_gain_pct = 0,
-		.i_gain_pct = 4,
-	},
-	.funcs = {
-		.get_max = atom_get_max_pstate,
-		.get_max_physical = atom_get_max_pstate,
-		.get_min = atom_get_min_pstate,
-		.get_turbo = atom_get_turbo_pstate,
-		.get_val = atom_get_val,
-		.get_scaling = silvermont_get_scaling,
-		.get_vid = atom_get_vid,
-		.get_target_pstate = get_target_pstate_use_cpu_load,
-	},
-};
-
-static const struct cpu_defaults airmont_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 60,
-		.p_gain_pct = 14,
-		.d_gain_pct = 0,
-		.i_gain_pct = 4,
-	},
-	.funcs = {
-		.get_max = atom_get_max_pstate,
-		.get_max_physical = atom_get_max_pstate,
-		.get_min = atom_get_min_pstate,
-		.get_turbo = atom_get_turbo_pstate,
-		.get_val = atom_get_val,
-		.get_scaling = airmont_get_scaling,
-		.get_vid = atom_get_vid,
-		.get_target_pstate = get_target_pstate_use_cpu_load,
-	},
-};
-
-static const struct cpu_defaults knl_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 97,
-		.p_gain_pct = 20,
-		.d_gain_pct = 0,
-		.i_gain_pct = 0,
-	},
-	.funcs = {
-		.get_max = core_get_max_pstate,
-		.get_max_physical = core_get_max_pstate_physical,
-		.get_min = core_get_min_pstate,
-		.get_turbo = knl_get_turbo_pstate,
-		.get_scaling = core_get_scaling,
-		.get_val = core_get_val,
-		.get_target_pstate = get_target_pstate_use_performance,
-	},
-};
-
-static const struct cpu_defaults bxt_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 60,
-		.p_gain_pct = 14,
-		.d_gain_pct = 0,
-		.i_gain_pct = 4,
-	},
-	.funcs = {
-		.get_max = core_get_max_pstate,
-		.get_max_physical = core_get_max_pstate_physical,
-		.get_min = core_get_min_pstate,
-		.get_turbo = core_get_turbo_pstate,
-		.get_scaling = core_get_scaling,
-		.get_val = core_get_val,
-		.get_target_pstate = get_target_pstate_use_cpu_load,
-	},
-};
-
-static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
+static int intel_pstate_get_base_pstate(struct cpudata *cpu)
 {
-	int max_perf = cpu->pstate.turbo_pstate;
-	int max_perf_adj;
-	int min_perf;
-	struct perf_limits *perf_limits = &global;
-
-	if (global.no_turbo || global.turbo_disabled)
-		max_perf = cpu->pstate.max_pstate;
-
-	if (per_cpu_limits)
-		perf_limits = cpu->perf_limits;
-
-	/*
-	 * performance can be limited by user through sysfs, by cpufreq
-	 * policy, or by cpu specific default values determined through
-	 * experimentation.
-	 */
-	max_perf_adj = fp_ext_toint(max_perf * perf_limits->max_perf);
-	*max = clamp_t(int, max_perf_adj,
-			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
-
-	min_perf = fp_ext_toint(max_perf * perf_limits->min_perf);
-	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
+	return global.no_turbo || global.turbo_disabled ?
+			cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
 }
 
 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
@@ -1702,11 +1523,13 @@ static void intel_pstate_set_min_pstate(struct cpudata *cpu)
 
 static void intel_pstate_max_within_limits(struct cpudata *cpu)
 {
-	int min_pstate, max_pstate;
+	int pstate;
 
 	update_turbo_state();
-	intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate);
-	intel_pstate_set_pstate(cpu, max_pstate);
+	pstate = intel_pstate_get_base_pstate(cpu);
+	pstate = max(cpu->pstate.min_pstate,
+		     fp_ext_toint(pstate * cpu->max_perf));
+	intel_pstate_set_pstate(cpu, pstate);
 }
 
 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
@@ -1767,7 +1590,11 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
 	 * that sample.time will always be reset before setting the utilization
 	 * update hook and make the caller skip the sample then.
 	 */
-	return !!cpu->last_sample_time;
+	if (cpu->last_sample_time) {
+		intel_pstate_calc_avg_perf(cpu);
+		return true;
+	}
+	return false;
 }
 
 static inline int32_t get_avg_frequency(struct cpudata *cpu)
@@ -1788,6 +1615,9 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 	int32_t busy_frac, boost;
 	int target, avg_pstate;
 
+	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE)
+		return cpu->pstate.turbo_pstate;
+
 	busy_frac = div_fp(sample->mperf, sample->tsc);
 
 	boost = cpu->iowait_boost;
@@ -1824,6 +1654,9 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 	int32_t perf_scaled, max_pstate, current_pstate, sample_ratio;
 	u64 duration_ns;
 
+	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE)
+		return cpu->pstate.turbo_pstate;
+
 	/*
 	 * perf_scaled is the ratio of the average P-state during the last
 	 * sampling period to the P-state requested last time (in percent).
@@ -1858,11 +1691,13 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 
 static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
 {
-	int max_perf, min_perf;
+	int max_pstate = intel_pstate_get_base_pstate(cpu);
+	int min_pstate;
 
-	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
-	pstate = clamp_t(int, pstate, min_perf, max_perf);
-	return pstate;
+	min_pstate = max(cpu->pstate.min_pstate,
+			 fp_ext_toint(max_pstate * cpu->min_perf));
+	max_pstate = max(min_pstate, fp_ext_toint(max_pstate * cpu->max_perf));
+	return clamp_t(int, pstate, min_pstate, max_pstate);
 }
 
 static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
@@ -1874,16 +1709,11 @@ static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
 	wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
 }
 
-static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
+static void intel_pstate_adjust_pstate(struct cpudata *cpu, int target_pstate)
 {
-	int from, target_pstate;
+	int from = cpu->pstate.current_pstate;
 	struct sample *sample;
 
-	from = cpu->pstate.current_pstate;
-
-	target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ?
-		cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu);
-
 	update_turbo_state();
 
 	target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
@@ -1902,76 +1732,155 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 		fp_toint(cpu->iowait_boost * 100));
 }
 
+static void intel_pstate_update_util_hwp(struct update_util_data *data,
+					 u64 time, unsigned int flags)
+{
+	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
+	u64 delta_ns = time - cpu->sample.time;
+
+	if ((s64)delta_ns >= INTEL_PSTATE_HWP_SAMPLING_INTERVAL)
+		intel_pstate_sample(cpu, time);
+}
+
+static void intel_pstate_update_util_pid(struct update_util_data *data,
+					 u64 time, unsigned int flags)
+{
+	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
+	u64 delta_ns = time - cpu->sample.time;
+
+	if ((s64)delta_ns < pid_params.sample_rate_ns)
+		return;
+
+	if (intel_pstate_sample(cpu, time)) {
+		int target_pstate;
+
+		target_pstate = get_target_pstate_use_performance(cpu);
+		intel_pstate_adjust_pstate(cpu, target_pstate);
+	}
+}
+
 static void intel_pstate_update_util(struct update_util_data *data, u64 time,
 				     unsigned int flags)
 {
 	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
 	u64 delta_ns;
 
-	if (pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load) {
-		if (flags & SCHED_CPUFREQ_IOWAIT) {
-			cpu->iowait_boost = int_tofp(1);
-		} else if (cpu->iowait_boost) {
-			/* Clear iowait_boost if the CPU may have been idle. */
-			delta_ns = time - cpu->last_update;
-			if (delta_ns > TICK_NSEC)
-				cpu->iowait_boost = 0;
-		}
-		cpu->last_update = time;
+	if (flags & SCHED_CPUFREQ_IOWAIT) {
+		cpu->iowait_boost = int_tofp(1);
+	} else if (cpu->iowait_boost) {
+		/* Clear iowait_boost if the CPU may have been idle. */
+		delta_ns = time - cpu->last_update;
+		if (delta_ns > TICK_NSEC)
+			cpu->iowait_boost = 0;
 	}
-
+	cpu->last_update = time;
 	delta_ns = time - cpu->sample.time;
-	if ((s64)delta_ns >= pid_params.sample_rate_ns) {
-		bool sample_taken = intel_pstate_sample(cpu, time);
+	if ((s64)delta_ns < INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL)
+		return;
 
-		if (sample_taken) {
-			intel_pstate_calc_avg_perf(cpu);
-			if (!hwp_active)
-				intel_pstate_adjust_busy_pstate(cpu);
-		}
+	if (intel_pstate_sample(cpu, time)) {
+		int target_pstate;
+
+		target_pstate = get_target_pstate_use_cpu_load(cpu);
+		intel_pstate_adjust_pstate(cpu, target_pstate);
 	}
 }
 
+static struct pstate_funcs core_funcs = {
+	.get_max = core_get_max_pstate,
+	.get_max_physical = core_get_max_pstate_physical,
+	.get_min = core_get_min_pstate,
+	.get_turbo = core_get_turbo_pstate,
+	.get_scaling = core_get_scaling,
+	.get_val = core_get_val,
+	.update_util = intel_pstate_update_util_pid,
+};
+
+static const struct pstate_funcs silvermont_funcs = {
+	.get_max = atom_get_max_pstate,
+	.get_max_physical = atom_get_max_pstate,
+	.get_min = atom_get_min_pstate,
+	.get_turbo = atom_get_turbo_pstate,
+	.get_val = atom_get_val,
+	.get_scaling = silvermont_get_scaling,
+	.get_vid = atom_get_vid,
+	.update_util = intel_pstate_update_util,
+};
+
+static const struct pstate_funcs airmont_funcs = {
+	.get_max = atom_get_max_pstate,
+	.get_max_physical = atom_get_max_pstate,
+	.get_min = atom_get_min_pstate,
+	.get_turbo = atom_get_turbo_pstate,
+	.get_val = atom_get_val,
+	.get_scaling = airmont_get_scaling,
+	.get_vid = atom_get_vid,
+	.update_util = intel_pstate_update_util,
+};
+
+static const struct pstate_funcs knl_funcs = {
+	.get_max = core_get_max_pstate,
+	.get_max_physical = core_get_max_pstate_physical,
+	.get_min = core_get_min_pstate,
+	.get_turbo = knl_get_turbo_pstate,
+	.get_scaling = core_get_scaling,
+	.get_val = core_get_val,
+	.update_util = intel_pstate_update_util_pid,
+};
+
+static const struct pstate_funcs bxt_funcs = {
+	.get_max = core_get_max_pstate,
+	.get_max_physical = core_get_max_pstate_physical,
+	.get_min = core_get_min_pstate,
+	.get_turbo = core_get_turbo_pstate,
+	.get_scaling = core_get_scaling,
+	.get_val = core_get_val,
+	.update_util = intel_pstate_update_util,
+};
+
 #define ICPU(model, policy) \
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
 			(unsigned long)&policy }
 
 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
-	ICPU(INTEL_FAM6_SANDYBRIDGE, 		core_params),
-	ICPU(INTEL_FAM6_SANDYBRIDGE_X,		core_params),
-	ICPU(INTEL_FAM6_ATOM_SILVERMONT1,	silvermont_params),
-	ICPU(INTEL_FAM6_IVYBRIDGE,		core_params),
-	ICPU(INTEL_FAM6_HASWELL_CORE,		core_params),
-	ICPU(INTEL_FAM6_BROADWELL_CORE,		core_params),
-	ICPU(INTEL_FAM6_IVYBRIDGE_X,		core_params),
-	ICPU(INTEL_FAM6_HASWELL_X,		core_params),
-	ICPU(INTEL_FAM6_HASWELL_ULT,		core_params),
-	ICPU(INTEL_FAM6_HASWELL_GT3E,		core_params),
-	ICPU(INTEL_FAM6_BROADWELL_GT3E,		core_params),
-	ICPU(INTEL_FAM6_ATOM_AIRMONT,		airmont_params),
-	ICPU(INTEL_FAM6_SKYLAKE_MOBILE,		core_params),
-	ICPU(INTEL_FAM6_BROADWELL_X,		core_params),
-	ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,	core_params),
-	ICPU(INTEL_FAM6_BROADWELL_XEON_D,	core_params),
-	ICPU(INTEL_FAM6_XEON_PHI_KNL,		knl_params),
-	ICPU(INTEL_FAM6_XEON_PHI_KNM,		knl_params),
-	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		bxt_params),
+	ICPU(INTEL_FAM6_SANDYBRIDGE, 		core_funcs),
+	ICPU(INTEL_FAM6_SANDYBRIDGE_X,		core_funcs),
+	ICPU(INTEL_FAM6_ATOM_SILVERMONT1,	silvermont_funcs),
+	ICPU(INTEL_FAM6_IVYBRIDGE,		core_funcs),
+	ICPU(INTEL_FAM6_HASWELL_CORE,		core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_CORE,		core_funcs),
+	ICPU(INTEL_FAM6_IVYBRIDGE_X,		core_funcs),
+	ICPU(INTEL_FAM6_HASWELL_X,		core_funcs),
+	ICPU(INTEL_FAM6_HASWELL_ULT,		core_funcs),
+	ICPU(INTEL_FAM6_HASWELL_GT3E,		core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_GT3E,		core_funcs),
+	ICPU(INTEL_FAM6_ATOM_AIRMONT,		airmont_funcs),
+	ICPU(INTEL_FAM6_SKYLAKE_MOBILE,		core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_X,		core_funcs),
+	ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,	core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_XEON_D,	core_funcs),
+	ICPU(INTEL_FAM6_XEON_PHI_KNL,		knl_funcs),
+	ICPU(INTEL_FAM6_XEON_PHI_KNM,		knl_funcs),
+	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		bxt_funcs),
+	ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE,       bxt_funcs),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
 
 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
-	ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params),
-	ICPU(INTEL_FAM6_BROADWELL_X, core_params),
-	ICPU(INTEL_FAM6_SKYLAKE_X, core_params),
+	ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_X, core_funcs),
+	ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
 	{}
 };
 
 static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
-	ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_params),
+	ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_funcs),
 	{}
 };
 
+static bool pid_in_use(void);
+
 static int intel_pstate_init_cpu(unsigned int cpunum)
 {
 	struct cpudata *cpu;
@@ -1979,18 +1888,11 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 	cpu = all_cpu_data[cpunum];
 
 	if (!cpu) {
-		unsigned int size = sizeof(struct cpudata);
-
-		if (per_cpu_limits)
-			size += sizeof(struct perf_limits);
-
-		cpu = kzalloc(size, GFP_KERNEL);
+		cpu = kzalloc(sizeof(*cpu), GFP_KERNEL);
 		if (!cpu)
 			return -ENOMEM;
 
 		all_cpu_data[cpunum] = cpu;
-		if (per_cpu_limits)
-			cpu->perf_limits = (struct perf_limits *)(cpu + 1);
 
 		cpu->epp_default = -EINVAL;
 		cpu->epp_powersave = -EINVAL;
@@ -2009,14 +1911,12 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 			intel_pstate_disable_ee(cpunum);
 
 		intel_pstate_hwp_enable(cpu);
-		pid_params.sample_rate_ms = 50;
-		pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
+	} else if (pid_in_use()) {
+		intel_pstate_pid_reset(cpu);
 	}
 
 	intel_pstate_get_cpu_pstates(cpu);
 
-	intel_pstate_busy_pid_reset(cpu);
-
 	pr_debug("controlling: cpu %d\n", cpunum);
 
 	return 0;
@@ -2039,7 +1939,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 	/* Prevent intel_pstate_update_util() from using stale data. */
 	cpu->sample.time = 0;
 	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
-				     intel_pstate_update_util);
+				     pstate_funcs.update_util);
 	cpu->update_util_set = true;
 }
 
@@ -2055,46 +1955,68 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 	synchronize_sched();
 }
 
-static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
-					    struct perf_limits *limits)
+static int intel_pstate_get_max_freq(struct cpudata *cpu)
 {
+	return global.turbo_disabled || global.no_turbo ?
+			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
+}
+
+static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
+					    struct cpudata *cpu)
+{
+	int max_freq = intel_pstate_get_max_freq(cpu);
 	int32_t max_policy_perf, min_policy_perf;
 
-	max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq);
+	max_policy_perf = div_ext_fp(policy->max, max_freq);
 	max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1));
 	if (policy->max == policy->min) {
 		min_policy_perf = max_policy_perf;
 	} else {
-		min_policy_perf = div_ext_fp(policy->min,
-					     policy->cpuinfo.max_freq);
+		min_policy_perf = div_ext_fp(policy->min, max_freq);
 		min_policy_perf = clamp_t(int32_t, min_policy_perf,
 					  0, max_policy_perf);
 	}
 
 	/* Normalize user input to [min_perf, max_perf] */
-	limits->min_perf = max(min_policy_perf,
-			       percent_ext_fp(limits->min_sysfs_pct));
-	limits->min_perf = min(limits->min_perf, max_policy_perf);
-	limits->max_perf = min(max_policy_perf,
-			       percent_ext_fp(limits->max_sysfs_pct));
-	limits->max_perf = max(min_policy_perf, limits->max_perf);
+	if (per_cpu_limits) {
+		cpu->min_perf = min_policy_perf;
+		cpu->max_perf = max_policy_perf;
+	} else {
+		int32_t global_min, global_max;
 
-	/* Make sure min_perf <= max_perf */
-	limits->min_perf = min(limits->min_perf, limits->max_perf);
+		/* Global limits are in percent of the maximum turbo P-state. */
+		global_max = percent_ext_fp(global.max_perf_pct);
+		global_min = percent_ext_fp(global.min_perf_pct);
+		if (max_freq != cpu->pstate.turbo_freq) {
+			int32_t turbo_factor;
 
-	limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
-	limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
-	limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100);
-	limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100);
+			turbo_factor = div_ext_fp(cpu->pstate.turbo_pstate,
+						  cpu->pstate.max_pstate);
+			global_min = mul_ext_fp(global_min, turbo_factor);
+			global_max = mul_ext_fp(global_max, turbo_factor);
+		}
+		global_min = clamp_t(int32_t, global_min, 0, global_max);
+
+		cpu->min_perf = max(min_policy_perf, global_min);
+		cpu->min_perf = min(cpu->min_perf, max_policy_perf);
+		cpu->max_perf = min(max_policy_perf, global_max);
+		cpu->max_perf = max(min_policy_perf, cpu->max_perf);
+
+		/* Make sure min_perf <= max_perf */
+		cpu->min_perf = min(cpu->min_perf, cpu->max_perf);
+	}
+
+	cpu->max_perf = round_up(cpu->max_perf, EXT_FRAC_BITS);
+	cpu->min_perf = round_up(cpu->min_perf, EXT_FRAC_BITS);
 
 	pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
-		 limits->max_perf_pct, limits->min_perf_pct);
+		 fp_ext_toint(cpu->max_perf * 100),
+		 fp_ext_toint(cpu->min_perf * 100));
 }
 
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu;
-	struct perf_limits *perf_limits = &global;
 
 	if (!policy->cpuinfo.max_freq)
 		return -ENODEV;
@@ -2105,19 +2027,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	cpu = all_cpu_data[policy->cpu];
 	cpu->policy = policy->policy;
 
-	if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
-	    policy->max < policy->cpuinfo.max_freq &&
-	    policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) {
-		pr_debug("policy->max > max non turbo frequency\n");
-		policy->max = policy->cpuinfo.max_freq;
-	}
-
-	if (per_cpu_limits)
-		perf_limits = cpu->perf_limits;
-
 	mutex_lock(&intel_pstate_limits_lock);
 
-	intel_pstate_update_perf_limits(policy, perf_limits);
+	intel_pstate_update_perf_limits(policy, cpu);
 
 	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
 		/*
@@ -2130,38 +2042,38 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 
 	intel_pstate_set_update_util_hook(policy->cpu);
 
-	intel_pstate_hwp_set_policy(policy);
+	if (hwp_active)
+		intel_pstate_hwp_set(policy->cpu);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
 	return 0;
 }
 
+static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy,
+					 struct cpudata *cpu)
+{
+	if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
+	    policy->max < policy->cpuinfo.max_freq &&
+	    policy->max > cpu->pstate.max_freq) {
+		pr_debug("policy->max > max non turbo frequency\n");
+		policy->max = policy->cpuinfo.max_freq;
+	}
+}
+
 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 
 	update_turbo_state();
-	policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ?
-					cpu->pstate.max_freq :
-					cpu->pstate.turbo_freq;
-
-	cpufreq_verify_within_cpu_limits(policy);
+	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+				     intel_pstate_get_max_freq(cpu));
 
 	if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
 	    policy->policy != CPUFREQ_POLICY_PERFORMANCE)
 		return -EINVAL;
 
-	/* When per-CPU limits are used, sysfs limits are not used */
-	if (!per_cpu_limits) {
-		unsigned int max_freq, min_freq;
-
-		max_freq = policy->cpuinfo.max_freq *
-					global.max_sysfs_pct / 100;
-		min_freq = policy->cpuinfo.max_freq *
-					global.min_sysfs_pct / 100;
-		cpufreq_verify_within_limits(policy, min_freq, max_freq);
-	}
+	intel_pstate_adjust_policy_max(policy, cpu);
 
 	return 0;
 }
@@ -2202,8 +2114,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 
 	cpu = all_cpu_data[policy->cpu];
 
-	if (per_cpu_limits)
-		intel_pstate_init_limits(cpu->perf_limits);
+	cpu->max_perf = int_ext_tofp(1);
+	cpu->min_perf = 0;
 
 	policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
@@ -2257,10 +2169,12 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 
 	update_turbo_state();
-	policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ?
-			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
+	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+				     intel_pstate_get_max_freq(cpu));
 
-	cpufreq_verify_within_cpu_limits(policy);
+	intel_pstate_adjust_policy_max(policy, cpu);
+
+	intel_pstate_update_perf_limits(policy, cpu);
 
 	return 0;
 }
@@ -2324,6 +2238,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		return ret;
 
 	policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY;
+	policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY;
 	/* This reflects the intel_pstate_get_cpu_pstates() setting. */
 	policy->cur = policy->cpuinfo.min_freq;
 
@@ -2341,7 +2256,13 @@ static struct cpufreq_driver intel_cpufreq = {
 	.name		= "intel_cpufreq",
 };
 
-static struct cpufreq_driver *intel_pstate_driver = &intel_pstate;
+static struct cpufreq_driver *default_driver = &intel_pstate;
+
+static bool pid_in_use(void)
+{
+	return intel_pstate_driver == &intel_pstate &&
+		pstate_funcs.update_util == intel_pstate_update_util_pid;
+}
 
 static void intel_pstate_driver_cleanup(void)
 {
@@ -2358,26 +2279,26 @@ static void intel_pstate_driver_cleanup(void)
 		}
 	}
 	put_online_cpus();
+	intel_pstate_driver = NULL;
 }
 
-static int intel_pstate_register_driver(void)
+static int intel_pstate_register_driver(struct cpufreq_driver *driver)
 {
 	int ret;
 
-	intel_pstate_init_limits(&global);
+	memset(&global, 0, sizeof(global));
+	global.max_perf_pct = 100;
 
+	intel_pstate_driver = driver;
 	ret = cpufreq_register_driver(intel_pstate_driver);
 	if (ret) {
 		intel_pstate_driver_cleanup();
 		return ret;
 	}
 
-	mutex_lock(&intel_pstate_limits_lock);
-	driver_registered = true;
-	mutex_unlock(&intel_pstate_limits_lock);
+	global.min_perf_pct = min_perf_pct_min();
 
-	if (intel_pstate_driver == &intel_pstate && !hwp_active &&
-	    pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
+	if (pid_in_use())
 		intel_pstate_debug_expose_params();
 
 	return 0;
@@ -2388,14 +2309,9 @@ static int intel_pstate_unregister_driver(void)
 	if (hwp_active)
 		return -EBUSY;
 
-	if (intel_pstate_driver == &intel_pstate && !hwp_active &&
-	    pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
+	if (pid_in_use())
 		intel_pstate_debug_hide_params();
 
-	mutex_lock(&intel_pstate_limits_lock);
-	driver_registered = false;
-	mutex_unlock(&intel_pstate_limits_lock);
-
 	cpufreq_unregister_driver(intel_pstate_driver);
 	intel_pstate_driver_cleanup();
 
@@ -2404,7 +2320,7 @@ static int intel_pstate_unregister_driver(void)
 
 static ssize_t intel_pstate_show_status(char *buf)
 {
-	if (!driver_registered)
+	if (!intel_pstate_driver)
 		return sprintf(buf, "off\n");
 
 	return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ?
@@ -2416,11 +2332,11 @@ static int intel_pstate_update_status(const char *buf, size_t size)
 	int ret;
 
 	if (size == 3 && !strncmp(buf, "off", size))
-		return driver_registered ?
+		return intel_pstate_driver ?
 			intel_pstate_unregister_driver() : -EINVAL;
 
 	if (size == 6 && !strncmp(buf, "active", size)) {
-		if (driver_registered) {
+		if (intel_pstate_driver) {
 			if (intel_pstate_driver == &intel_pstate)
 				return 0;
 
@@ -2429,13 +2345,12 @@ static int intel_pstate_update_status(const char *buf, size_t size)
 				return ret;
 		}
 
-		intel_pstate_driver = &intel_pstate;
-		return intel_pstate_register_driver();
+		return intel_pstate_register_driver(&intel_pstate);
 	}
 
 	if (size == 7 && !strncmp(buf, "passive", size)) {
-		if (driver_registered) {
-			if (intel_pstate_driver != &intel_pstate)
+		if (intel_pstate_driver) {
+			if (intel_pstate_driver == &intel_cpufreq)
 				return 0;
 
 			ret = intel_pstate_unregister_driver();
@@ -2443,8 +2358,7 @@ static int intel_pstate_update_status(const char *buf, size_t size)
 				return ret;
 		}
 
-		intel_pstate_driver = &intel_cpufreq;
-		return intel_pstate_register_driver();
+		return intel_pstate_register_driver(&intel_cpufreq);
 	}
 
 	return -EINVAL;
@@ -2465,23 +2379,17 @@ static int __init intel_pstate_msrs_not_valid(void)
 	return 0;
 }
 
-static void __init copy_pid_params(struct pstate_adjust_policy *policy)
-{
-	pid_params.sample_rate_ms = policy->sample_rate_ms;
-	pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
-	pid_params.p_gain_pct = policy->p_gain_pct;
-	pid_params.i_gain_pct = policy->i_gain_pct;
-	pid_params.d_gain_pct = policy->d_gain_pct;
-	pid_params.deadband = policy->deadband;
-	pid_params.setpoint = policy->setpoint;
-}
-
 #ifdef CONFIG_ACPI
 static void intel_pstate_use_acpi_profile(void)
 {
-	if (acpi_gbl_FADT.preferred_profile == PM_MOBILE)
-		pstate_funcs.get_target_pstate =
-				get_target_pstate_use_cpu_load;
+	switch (acpi_gbl_FADT.preferred_profile) {
+	case PM_MOBILE:
+	case PM_TABLET:
+	case PM_APPLIANCE_PC:
+	case PM_DESKTOP:
+	case PM_WORKSTATION:
+		pstate_funcs.update_util = intel_pstate_update_util;
+	}
 }
 #else
 static void intel_pstate_use_acpi_profile(void)
@@ -2498,7 +2406,7 @@ static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
 	pstate_funcs.get_scaling = funcs->get_scaling;
 	pstate_funcs.get_val   = funcs->get_val;
 	pstate_funcs.get_vid   = funcs->get_vid;
-	pstate_funcs.get_target_pstate = funcs->get_target_pstate;
+	pstate_funcs.update_util = funcs->update_util;
 
 	intel_pstate_use_acpi_profile();
 }
@@ -2637,29 +2545,31 @@ static const struct x86_cpu_id hwp_support_ids[] __initconst = {
 
 static int __init intel_pstate_init(void)
 {
-	const struct x86_cpu_id *id;
-	struct cpu_defaults *cpu_def;
-	int rc = 0;
+	int rc;
 
 	if (no_load)
 		return -ENODEV;
 
-	if (x86_match_cpu(hwp_support_ids) && !no_hwp) {
-		copy_cpu_funcs(&core_params.funcs);
-		hwp_active++;
-		intel_pstate.attr = hwp_cpufreq_attrs;
-		goto hwp_cpu_matched;
+	if (x86_match_cpu(hwp_support_ids)) {
+		copy_cpu_funcs(&core_funcs);
+		if (no_hwp) {
+			pstate_funcs.update_util = intel_pstate_update_util;
+		} else {
+			hwp_active++;
+			intel_pstate.attr = hwp_cpufreq_attrs;
+			pstate_funcs.update_util = intel_pstate_update_util_hwp;
+			goto hwp_cpu_matched;
+		}
+	} else {
+		const struct x86_cpu_id *id;
+
+		id = x86_match_cpu(intel_pstate_cpu_ids);
+		if (!id)
+			return -ENODEV;
+
+		copy_cpu_funcs((struct pstate_funcs *)id->driver_data);
 	}
 
-	id = x86_match_cpu(intel_pstate_cpu_ids);
-	if (!id)
-		return -ENODEV;
-
-	cpu_def = (struct cpu_defaults *)id->driver_data;
-
-	copy_pid_params(&cpu_def->pid_policy);
-	copy_cpu_funcs(&cpu_def->funcs);
-
 	if (intel_pstate_msrs_not_valid())
 		return -ENODEV;
 
@@ -2685,7 +2595,7 @@ hwp_cpu_matched:
 	intel_pstate_sysfs_expose_params();
 
 	mutex_lock(&intel_pstate_driver_lock);
-	rc = intel_pstate_register_driver();
+	rc = intel_pstate_register_driver(default_driver);
 	mutex_unlock(&intel_pstate_driver_lock);
 	if (rc)
 		return rc;
@@ -2706,7 +2616,7 @@ static int __init intel_pstate_setup(char *str)
 		no_load = 1;
 	} else if (!strcmp(str, "passive")) {
 		pr_info("Passive mode enabled\n");
-		intel_pstate_driver = &intel_cpufreq;
+		default_driver = &intel_cpufreq;
 		no_hwp = 1;
 	}
 	if (!strcmp(str, "no_hwp")) {
diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index ab25b1235a5e..fd1886faf33a 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c
@@ -573,14 +573,33 @@ static struct platform_driver mt8173_cpufreq_platdrv = {
 	.probe		= mt8173_cpufreq_probe,
 };
 
-static int mt8173_cpufreq_driver_init(void)
+/* List of machines supported by this driver */
+static const struct of_device_id mt8173_cpufreq_machines[] __initconst = {
+	{ .compatible = "mediatek,mt817x", },
+	{ .compatible = "mediatek,mt8173", },
+	{ .compatible = "mediatek,mt8176", },
+
+	{ }
+};
+
+static int __init mt8173_cpufreq_driver_init(void)
 {
+	struct device_node *np;
+	const struct of_device_id *match;
 	struct platform_device *pdev;
 	int err;
 
-	if (!of_machine_is_compatible("mediatek,mt8173"))
+	np = of_find_node_by_path("/");
+	if (!np)
 		return -ENODEV;
 
+	match = of_match_node(mt8173_cpufreq_machines, np);
+	of_node_put(np);
+	if (!match) {
+		pr_warn("Machine is not compatible with mt8173-cpufreq\n");
+		return -ENODEV;
+	}
+
 	err = platform_driver_register(&mt8173_cpufreq_platdrv);
 	if (err)
 		return err;
diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index bfec1bcd3835..e2ea433a5f9c 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -52,17 +52,27 @@ static u32 get_bus_freq(void)
 {
 	struct device_node *soc;
 	u32 sysfreq;
+	struct clk *pltclk;
+	int ret;
 
+	/* get platform freq by searching bus-frequency property */
 	soc = of_find_node_by_type(NULL, "soc");
-	if (!soc)
-		return 0;
+	if (soc) {
+		ret = of_property_read_u32(soc, "bus-frequency", &sysfreq);
+		of_node_put(soc);
+		if (!ret)
+			return sysfreq;
+	}
 
-	if (of_property_read_u32(soc, "bus-frequency", &sysfreq))
-		sysfreq = 0;
+	/* get platform freq by its clock name */
+	pltclk = clk_get(NULL, "cg-pll0-div1");
+	if (IS_ERR(pltclk)) {
+		pr_err("%s: can't get bus frequency %ld\n",
+		       __func__, PTR_ERR(pltclk));
+		return PTR_ERR(pltclk);
+	}
 
-	of_node_put(soc);
-
-	return sysfreq;
+	return clk_get_rate(pltclk);
 }
 
 static struct clk *cpu_to_clk(int cpu)
diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c
new file mode 100644
index 000000000000..fe7875311d62
--- /dev/null
+++ b/drivers/cpufreq/tegra186-cpufreq.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <soc/tegra/bpmp.h>
+#include <soc/tegra/bpmp-abi.h>
+
+#define EDVD_CORE_VOLT_FREQ(core)		(0x20 + (core) * 0x4)
+#define EDVD_CORE_VOLT_FREQ_F_SHIFT		0
+#define EDVD_CORE_VOLT_FREQ_V_SHIFT		16
+
+struct tegra186_cpufreq_cluster_info {
+	unsigned long offset;
+	int cpus[4];
+	unsigned int bpmp_cluster_id;
+};
+
+#define NO_CPU -1
+static const struct tegra186_cpufreq_cluster_info tegra186_clusters[] = {
+	/* Denver cluster */
+	{
+		.offset = SZ_64K * 7,
+		.cpus = { 1, 2, NO_CPU, NO_CPU },
+		.bpmp_cluster_id = 0,
+	},
+	/* A57 cluster */
+	{
+		.offset = SZ_64K * 6,
+		.cpus = { 0, 3, 4, 5 },
+		.bpmp_cluster_id = 1,
+	},
+};
+
+struct tegra186_cpufreq_cluster {
+	const struct tegra186_cpufreq_cluster_info *info;
+	struct cpufreq_frequency_table *table;
+};
+
+struct tegra186_cpufreq_data {
+	void __iomem *regs;
+
+	size_t num_clusters;
+	struct tegra186_cpufreq_cluster *clusters;
+};
+
+static int tegra186_cpufreq_init(struct cpufreq_policy *policy)
+{
+	struct tegra186_cpufreq_data *data = cpufreq_get_driver_data();
+	unsigned int i;
+
+	for (i = 0; i < data->num_clusters; i++) {
+		struct tegra186_cpufreq_cluster *cluster = &data->clusters[i];
+		const struct tegra186_cpufreq_cluster_info *info =
+			cluster->info;
+		int core;
+
+		for (core = 0; core < ARRAY_SIZE(info->cpus); core++) {
+			if (info->cpus[core] == policy->cpu)
+				break;
+		}
+		if (core == ARRAY_SIZE(info->cpus))
+			continue;
+
+		policy->driver_data =
+			data->regs + info->offset + EDVD_CORE_VOLT_FREQ(core);
+		cpufreq_table_validate_and_show(policy, cluster->table);
+	}
+
+	policy->cpuinfo.transition_latency = 300 * 1000;
+
+	return 0;
+}
+
+static int tegra186_cpufreq_set_target(struct cpufreq_policy *policy,
+				       unsigned int index)
+{
+	struct cpufreq_frequency_table *tbl = policy->freq_table + index;
+	void __iomem *edvd_reg = policy->driver_data;
+	u32 edvd_val = tbl->driver_data;
+
+	writel(edvd_val, edvd_reg);
+
+	return 0;
+}
+
+static struct cpufreq_driver tegra186_cpufreq_driver = {
+	.name = "tegra186",
+	.flags = CPUFREQ_STICKY | CPUFREQ_HAVE_GOVERNOR_PER_POLICY,
+	.verify = cpufreq_generic_frequency_table_verify,
+	.target_index = tegra186_cpufreq_set_target,
+	.init = tegra186_cpufreq_init,
+	.attr = cpufreq_generic_attr,
+};
+
+static struct cpufreq_frequency_table *init_vhint_table(
+	struct platform_device *pdev, struct tegra_bpmp *bpmp,
+	unsigned int cluster_id)
+{
+	struct cpufreq_frequency_table *table;
+	struct mrq_cpu_vhint_request req;
+	struct tegra_bpmp_message msg;
+	struct cpu_vhint_data *data;
+	int err, i, j, num_rates = 0;
+	dma_addr_t phys;
+	void *virt;
+
+	virt = dma_alloc_coherent(bpmp->dev, sizeof(*data), &phys,
+				  GFP_KERNEL | GFP_DMA32);
+	if (!virt)
+		return ERR_PTR(-ENOMEM);
+
+	data = (struct cpu_vhint_data *)virt;
+
+	memset(&req, 0, sizeof(req));
+	req.addr = phys;
+	req.cluster_id = cluster_id;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.mrq = MRQ_CPU_VHINT;
+	msg.tx.data = &req;
+	msg.tx.size = sizeof(req);
+
+	err = tegra_bpmp_transfer(bpmp, &msg);
+	if (err) {
+		table = ERR_PTR(err);
+		goto free;
+	}
+
+	for (i = data->vfloor; i <= data->vceil; i++) {
+		u16 ndiv = data->ndiv[i];
+
+		if (ndiv < data->ndiv_min || ndiv > data->ndiv_max)
+			continue;
+
+		/* Only store lowest voltage index for each rate */
+		if (i > 0 && ndiv == data->ndiv[i - 1])
+			continue;
+
+		num_rates++;
+	}
+
+	table = devm_kcalloc(&pdev->dev, num_rates + 1, sizeof(*table),
+			     GFP_KERNEL);
+	if (!table) {
+		table = ERR_PTR(-ENOMEM);
+		goto free;
+	}
+
+	for (i = data->vfloor, j = 0; i <= data->vceil; i++) {
+		struct cpufreq_frequency_table *point;
+		u16 ndiv = data->ndiv[i];
+		u32 edvd_val = 0;
+
+		if (ndiv < data->ndiv_min || ndiv > data->ndiv_max)
+			continue;
+
+		/* Only store lowest voltage index for each rate */
+		if (i > 0 && ndiv == data->ndiv[i - 1])
+			continue;
+
+		edvd_val |= i << EDVD_CORE_VOLT_FREQ_V_SHIFT;
+		edvd_val |= ndiv << EDVD_CORE_VOLT_FREQ_F_SHIFT;
+
+		point = &table[j++];
+		point->driver_data = edvd_val;
+		point->frequency = data->ref_clk_hz * ndiv / data->pdiv /
+			data->mdiv / 1000;
+	}
+
+	table[j].frequency = CPUFREQ_TABLE_END;
+
+free:
+	dma_free_coherent(bpmp->dev, sizeof(*data), virt, phys);
+
+	return table;
+}
+
+static int tegra186_cpufreq_probe(struct platform_device *pdev)
+{
+	struct tegra186_cpufreq_data *data;
+	struct tegra_bpmp *bpmp;
+	struct resource *res;
+	unsigned int i = 0, err;
+
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->clusters = devm_kcalloc(&pdev->dev, ARRAY_SIZE(tegra186_clusters),
+				      sizeof(*data->clusters), GFP_KERNEL);
+	if (!data->clusters)
+		return -ENOMEM;
+
+	data->num_clusters = ARRAY_SIZE(tegra186_clusters);
+
+	bpmp = tegra_bpmp_get(&pdev->dev);
+	if (IS_ERR(bpmp))
+		return PTR_ERR(bpmp);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(data->regs)) {
+		err = PTR_ERR(data->regs);
+		goto put_bpmp;
+	}
+
+	for (i = 0; i < data->num_clusters; i++) {
+		struct tegra186_cpufreq_cluster *cluster = &data->clusters[i];
+
+		cluster->info = &tegra186_clusters[i];
+		cluster->table = init_vhint_table(
+			pdev, bpmp, cluster->info->bpmp_cluster_id);
+		if (IS_ERR(cluster->table)) {
+			err = PTR_ERR(cluster->table);
+			goto put_bpmp;
+		}
+	}
+
+	tegra_bpmp_put(bpmp);
+
+	tegra186_cpufreq_driver.driver_data = data;
+
+	err = cpufreq_register_driver(&tegra186_cpufreq_driver);
+	if (err)
+		return err;
+
+	return 0;
+
+put_bpmp:
+	tegra_bpmp_put(bpmp);
+
+	return err;
+}
+
+static int tegra186_cpufreq_remove(struct platform_device *pdev)
+{
+	cpufreq_unregister_driver(&tegra186_cpufreq_driver);
+
+	return 0;
+}
+
+static const struct of_device_id tegra186_cpufreq_of_match[] = {
+	{ .compatible = "nvidia,tegra186-ccplex-cluster", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, tegra186_cpufreq_of_match);
+
+static struct platform_driver tegra186_cpufreq_platform_driver = {
+	.driver = {
+		.name = "tegra186-cpufreq",
+		.of_match_table = tegra186_cpufreq_of_match,
+	},
+	.probe = tegra186_cpufreq_probe,
+	.remove = tegra186_cpufreq_remove,
+};
+module_platform_driver(tegra186_cpufreq_platform_driver);
+
+MODULE_AUTHOR("Mikko Perttunen <mperttunen@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA Tegra186 cpufreq driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/cpuidle/cpuidle-cps.c b/drivers/cpuidle/cpuidle-cps.c
index 926ba9871c62..12b9145913de 100644
--- a/drivers/cpuidle/cpuidle-cps.c
+++ b/drivers/cpuidle/cpuidle-cps.c
@@ -118,7 +118,7 @@ static void __init cps_cpuidle_unregister(void)
 
 static int __init cps_cpuidle_init(void)
 {
-	int err, cpu, core, i;
+	int err, cpu, i;
 	struct cpuidle_device *device;
 
 	/* Detect supported states */
@@ -160,7 +160,6 @@ static int __init cps_cpuidle_init(void)
 	}
 
 	for_each_possible_cpu(cpu) {
-		core = cpu_data[cpu].core;
 		device = &per_cpu(cpuidle_dev, cpu);
 		device->cpu = cpu;
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index cda8f62d555b..12409a519cc5 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -56,10 +56,9 @@ static int snooze_loop(struct cpuidle_device *dev,
 
 	snooze_exit_time = get_tb() + snooze_timeout;
 	ppc64_runlatch_off();
+	HMT_very_low();
 	while (!need_resched()) {
-		HMT_low();
-		HMT_very_low();
-		if (snooze_timeout_en && get_tb() > snooze_exit_time)
+		if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time)
 			break;
 	}
 
@@ -215,11 +214,25 @@ static inline void add_powernv_state(int index, const char *name,
 	stop_psscr_table[index].mask = psscr_mask;
 }
 
+/*
+ * Returns 0 if prop1_len == prop2_len. Else returns -1
+ */
+static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
+					 const char *prop2, int prop2_len)
+{
+	if (prop1_len == prop2_len)
+		return 0;
+
+	pr_warn("cpuidle-powernv: array sizes don't match for %s and %s\n",
+		prop1, prop2);
+	return -1;
+}
+
 static int powernv_add_idle_states(void)
 {
 	struct device_node *power_mgt;
 	int nr_idle_states = 1; /* Snooze */
-	int dt_idle_states;
+	int dt_idle_states, count;
 	u32 latency_ns[CPUIDLE_STATE_MAX];
 	u32 residency_ns[CPUIDLE_STATE_MAX];
 	u32 flags[CPUIDLE_STATE_MAX];
@@ -244,6 +257,21 @@ static int powernv_add_idle_states(void)
 		goto out;
 	}
 
+	count = of_property_count_u32_elems(power_mgt,
+					    "ibm,cpu-idle-state-latencies-ns");
+
+	if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states,
+				   "ibm,cpu-idle-state-latencies-ns",
+				   count) != 0)
+		goto out;
+
+	count = of_property_count_strings(power_mgt,
+					  "ibm,cpu-idle-state-names");
+	if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states,
+				   "ibm,cpu-idle-state-names",
+				   count) != 0)
+		goto out;
+
 	/*
 	 * Since snooze is used as first idle state, max idle states allowed is
 	 * CPUIDLE_STATE_MAX -1
@@ -278,6 +306,22 @@ static int powernv_add_idle_states(void)
 	has_stop_states = (flags[0] &
 			   (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP));
 	if (has_stop_states) {
+		count = of_property_count_u64_elems(power_mgt,
+						    "ibm,cpu-idle-state-psscr");
+		if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
+					   dt_idle_states,
+					   "ibm,cpu-idle-state-psscr",
+					   count) != 0)
+			goto out;
+
+		count = of_property_count_u64_elems(power_mgt,
+						    "ibm,cpu-idle-state-psscr-mask");
+		if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
+					   dt_idle_states,
+					   "ibm,cpu-idle-state-psscr-mask",
+					   count) != 0)
+			goto out;
+
 		if (of_property_read_u64_array(power_mgt,
 		    "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) {
 			pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
@@ -292,8 +336,21 @@ static int powernv_add_idle_states(void)
 		}
 	}
 
-	rc = of_property_read_u32_array(power_mgt,
-		"ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states);
+	count = of_property_count_u32_elems(power_mgt,
+					    "ibm,cpu-idle-state-residency-ns");
+
+	if (count < 0) {
+		rc = count;
+	} else if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
+					  dt_idle_states,
+					  "ibm,cpu-idle-state-residency-ns",
+					  count) != 0) {
+		goto out;
+	} else {
+		rc = of_property_read_u32_array(power_mgt,
+						"ibm,cpu-idle-state-residency-ns",
+						residency_ns, dt_idle_states);
+	}
 
 	for (i = 0; i < dt_idle_states; i++) {
 		unsigned int exit_latency, target_residency;
diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
index 71576b8bdfef..a4f2fa1091e4 100644
--- a/drivers/devfreq/governor.h
+++ b/drivers/devfreq/governor.h
@@ -25,6 +25,35 @@
 #define DEVFREQ_GOV_SUSPEND			0x4
 #define DEVFREQ_GOV_RESUME			0x5
 
+/**
+ * struct devfreq_governor - Devfreq policy governor
+ * @node:		list node - contains registered devfreq governors
+ * @name:		Governor's name
+ * @immutable:		Immutable flag for governor. If the value is 1,
+ *			this govenror is never changeable to other governor.
+ * @get_target_freq:	Returns desired operating frequency for the device.
+ *			Basically, get_target_freq will run
+ *			devfreq_dev_profile.get_dev_status() to get the
+ *			status of the device (load = busy_time / total_time).
+ *			If no_central_polling is set, this callback is called
+ *			only with update_devfreq() notified by OPP.
+ * @event_handler:      Callback for devfreq core framework to notify events
+ *                      to governors. Events include per device governor
+ *                      init and exit, opp changes out of devfreq, suspend
+ *                      and resume of per device devfreq during device idle.
+ *
+ * Note that the callbacks are called with devfreq->lock locked by devfreq.
+ */
+struct devfreq_governor {
+	struct list_head node;
+
+	const char name[DEVFREQ_NAME_LEN];
+	const unsigned int immutable;
+	int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
+	int (*event_handler)(struct devfreq *devfreq,
+				unsigned int event, void *data);
+};
+
 /* Caution: devfreq->lock must be locked before calling update_devfreq */
 extern int update_devfreq(struct devfreq *devfreq);
 
diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c
index 56bce1908be2..85812521b6ba 100644
--- a/drivers/power/avs/rockchip-io-domain.c
+++ b/drivers/power/avs/rockchip-io-domain.c
@@ -43,6 +43,10 @@
 #define RK3288_SOC_CON2_FLASH0		BIT(7)
 #define RK3288_SOC_FLASH_SUPPLY_NUM	2
 
+#define RK3328_SOC_CON4			0x410
+#define RK3328_SOC_CON4_VCCIO2		BIT(7)
+#define RK3328_SOC_VCCIO2_SUPPLY_NUM	1
+
 #define RK3368_SOC_CON15		0x43c
 #define RK3368_SOC_CON15_FLASH0		BIT(14)
 #define RK3368_SOC_FLASH_SUPPLY_NUM	2
@@ -166,6 +170,25 @@ static void rk3288_iodomain_init(struct rockchip_iodomain *iod)
 		dev_warn(iod->dev, "couldn't update flash0 ctrl\n");
 }
 
+static void rk3328_iodomain_init(struct rockchip_iodomain *iod)
+{
+	int ret;
+	u32 val;
+
+	/* if no vccio2 supply we should leave things alone */
+	if (!iod->supplies[RK3328_SOC_VCCIO2_SUPPLY_NUM].reg)
+		return;
+
+	/*
+	 * set vccio2 iodomain to also use this framework
+	 * instead of a special gpio.
+	 */
+	val = RK3328_SOC_CON4_VCCIO2 | (RK3328_SOC_CON4_VCCIO2 << 16);
+	ret = regmap_write(iod->grf, RK3328_SOC_CON4, val);
+	if (ret < 0)
+		dev_warn(iod->dev, "couldn't update vccio2 vsel ctrl\n");
+}
+
 static void rk3368_iodomain_init(struct rockchip_iodomain *iod)
 {
 	int ret;
@@ -247,6 +270,20 @@ static const struct rockchip_iodomain_soc_data soc_data_rk3288 = {
 	.init = rk3288_iodomain_init,
 };
 
+static const struct rockchip_iodomain_soc_data soc_data_rk3328 = {
+	.grf_offset = 0x410,
+	.supply_names = {
+		"vccio1",
+		"vccio2",
+		"vccio3",
+		"vccio4",
+		"vccio5",
+		"vccio6",
+		"pmuio",
+	},
+	.init = rk3328_iodomain_init,
+};
+
 static const struct rockchip_iodomain_soc_data soc_data_rk3368 = {
 	.grf_offset = 0x900,
 	.supply_names = {
@@ -311,6 +348,10 @@ static const struct of_device_id rockchip_iodomain_match[] = {
 		.compatible = "rockchip,rk3288-io-voltage-domain",
 		.data = (void *)&soc_data_rk3288
 	},
+	{
+		.compatible = "rockchip,rk3328-io-voltage-domain",
+		.data = (void *)&soc_data_rk3328
+	},
 	{
 		.compatible = "rockchip,rk3368-io-voltage-domain",
 		.data = (void *)&soc_data_rk3368
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 776b34396144..0a16cf4bed39 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -291,18 +291,6 @@ config ARMADA_THERMAL
 	  Enable this option if you want to have support for thermal management
 	  controller present in Armada 370 and Armada XP SoC.
 
-config DB8500_CPUFREQ_COOLING
-	tristate "DB8500 cpufreq cooling"
-	depends on ARCH_U8500 || COMPILE_TEST
-	depends on HAS_IOMEM
-	depends on CPU_THERMAL
-	default y
-	help
-	  Adds DB8500 cpufreq cooling devices, and these cooling devices can be
-	  bound to thermal zone trip points. When a trip point reached, the
-	  bound cpufreq cooling device turns active to set CPU frequency low to
-	  cool down the CPU.
-
 config INTEL_POWERCLAMP
 	tristate "Intel PowerClamp idle injection driver"
 	depends on THERMAL
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 7adae2029355..c2372f10dae5 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -41,7 +41,6 @@ obj-$(CONFIG_TANGO_THERMAL)	+= tango_thermal.o
 obj-$(CONFIG_IMX_THERMAL)	+= imx_thermal.o
 obj-$(CONFIG_MAX77620_THERMAL)	+= max77620_thermal.o
 obj-$(CONFIG_QORIQ_THERMAL)	+= qoriq_thermal.o
-obj-$(CONFIG_DB8500_CPUFREQ_COOLING)	+= db8500_cpufreq_cooling.o
 obj-$(CONFIG_INTEL_POWERCLAMP)	+= intel_powerclamp.o
 obj-$(CONFIG_X86_PKG_TEMP_THERMAL)	+= x86_pkg_temp_thermal.o
 obj-$(CONFIG_INTEL_SOC_DTS_IOSF_CORE)	+= intel_soc_dts_iosf.o
diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c
deleted file mode 100644
index e58bd0b658b5..000000000000
--- a/drivers/thermal/db8500_cpufreq_cooling.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * db8500_cpufreq_cooling.c - DB8500 cpufreq works as cooling device.
- *
- * Copyright (C) 2012 ST-Ericsson
- * Copyright (C) 2012 Linaro Ltd.
- *
- * Author: Hongbo Zhang <hongbo.zhang@linaro.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/cpu_cooling.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-
-static int db8500_cpufreq_cooling_probe(struct platform_device *pdev)
-{
-	struct thermal_cooling_device *cdev;
-
-	cdev = cpufreq_cooling_register(cpu_present_mask);
-	if (IS_ERR(cdev)) {
-		int ret = PTR_ERR(cdev);
-
-		if (ret != -EPROBE_DEFER)
-			dev_err(&pdev->dev,
-				"Failed to register cooling device %d\n",
-				ret);
-				
-		return ret;
-	}
-
-	platform_set_drvdata(pdev, cdev);
-
-	dev_info(&pdev->dev, "Cooling device registered: %s\n",	cdev->type);
-
-	return 0;
-}
-
-static int db8500_cpufreq_cooling_remove(struct platform_device *pdev)
-{
-	struct thermal_cooling_device *cdev = platform_get_drvdata(pdev);
-
-	cpufreq_cooling_unregister(cdev);
-
-	return 0;
-}
-
-static int db8500_cpufreq_cooling_suspend(struct platform_device *pdev,
-		pm_message_t state)
-{
-	return -ENOSYS;
-}
-
-static int db8500_cpufreq_cooling_resume(struct platform_device *pdev)
-{
-	return -ENOSYS;
-}
-
-#ifdef CONFIG_OF
-static const struct of_device_id db8500_cpufreq_cooling_match[] = {
-	{ .compatible = "stericsson,db8500-cpufreq-cooling" },
-	{},
-};
-MODULE_DEVICE_TABLE(of, db8500_cpufreq_cooling_match);
-#endif
-
-static struct platform_driver db8500_cpufreq_cooling_driver = {
-	.driver = {
-		.name = "db8500-cpufreq-cooling",
-		.of_match_table = of_match_ptr(db8500_cpufreq_cooling_match),
-	},
-	.probe = db8500_cpufreq_cooling_probe,
-	.suspend = db8500_cpufreq_cooling_suspend,
-	.resume = db8500_cpufreq_cooling_resume,
-	.remove = db8500_cpufreq_cooling_remove,
-};
-
-static int __init db8500_cpufreq_cooling_init(void)
-{
-	return platform_driver_register(&db8500_cpufreq_cooling_driver);
-}
-
-static void __exit db8500_cpufreq_cooling_exit(void)
-{
-	platform_driver_unregister(&db8500_cpufreq_cooling_driver);
-}
-
-/* Should be later than db8500_cpufreq_register */
-late_initcall(db8500_cpufreq_cooling_init);
-module_exit(db8500_cpufreq_cooling_exit);
-
-MODULE_AUTHOR("Hongbo Zhang <hongbo.zhang@stericsson.com>");
-MODULE_DESCRIPTION("DB8500 cpufreq cooling driver");
-MODULE_LICENSE("GPL");
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 87165f06a307..a5ce0bbeadb5 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -120,6 +120,13 @@ struct cpufreq_policy {
 	bool			fast_switch_possible;
 	bool			fast_switch_enabled;
 
+	/*
+	 * Preferred average time interval between consecutive invocations of
+	 * the driver to set the frequency for this policy.  To be set by the
+	 * scaling driver (0, which is the default, means no preference).
+	 */
+	unsigned int		transition_delay_us;
+
 	 /* Cached frequency lookup from cpufreq_driver_resolve_freq. */
 	unsigned int cached_target_freq;
 	int cached_resolved_idx;
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index e0acb0e5243b..6c220e4ebb6b 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -27,6 +27,7 @@
 #define DEVFREQ_POSTCHANGE		(1)
 
 struct devfreq;
+struct devfreq_governor;
 
 /**
  * struct devfreq_dev_status - Data given from devfreq user device to
@@ -100,35 +101,6 @@ struct devfreq_dev_profile {
 	unsigned int max_state;
 };
 
-/**
- * struct devfreq_governor - Devfreq policy governor
- * @node:		list node - contains registered devfreq governors
- * @name:		Governor's name
- * @immutable:		Immutable flag for governor. If the value is 1,
- *			this govenror is never changeable to other governor.
- * @get_target_freq:	Returns desired operating frequency for the device.
- *			Basically, get_target_freq will run
- *			devfreq_dev_profile.get_dev_status() to get the
- *			status of the device (load = busy_time / total_time).
- *			If no_central_polling is set, this callback is called
- *			only with update_devfreq() notified by OPP.
- * @event_handler:      Callback for devfreq core framework to notify events
- *                      to governors. Events include per device governor
- *                      init and exit, opp changes out of devfreq, suspend
- *                      and resume of per device devfreq during device idle.
- *
- * Note that the callbacks are called with devfreq->lock locked by devfreq.
- */
-struct devfreq_governor {
-	struct list_head node;
-
-	const char name[DEVFREQ_NAME_LEN];
-	const unsigned int immutable;
-	int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
-	int (*event_handler)(struct devfreq *devfreq,
-				unsigned int event, void *data);
-};
-
 /**
  * struct devfreq - Device devfreq structure
  * @node:	list node - contains the devices with devfreq that have been
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 5339ed5bd6f9..9b6abe632587 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -20,6 +20,7 @@
 /* Defines used for the flags field in the struct generic_pm_domain */
 #define GENPD_FLAG_PM_CLK	(1U << 0) /* PM domain uses PM clk */
 #define GENPD_FLAG_IRQ_SAFE	(1U << 1) /* PM domain operates in atomic */
+#define GENPD_FLAG_ALWAYS_ON	(1U << 2) /* PM domain is always powered on */
 
 enum gpd_status {
 	GPD_STATE_ACTIVE = 0,	/* PM domain is active */
diff --git a/include/linux/tick.h b/include/linux/tick.h
index a04fea19676f..fe01e68bf520 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -117,6 +117,7 @@ extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
+extern unsigned long tick_nohz_get_idle_calls(void);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 #else /* !CONFIG_NO_HZ_COMMON */
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 54c577578da6..76877a62b5fa 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -61,6 +61,11 @@ struct sugov_cpu {
 	unsigned long util;
 	unsigned long max;
 	unsigned int flags;
+
+	/* The field below is for single-CPU policies only. */
+#ifdef CONFIG_NO_HZ_COMMON
+	unsigned long saved_idle_calls;
+#endif
 };
 
 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
@@ -93,22 +98,23 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 {
 	struct cpufreq_policy *policy = sg_policy->policy;
 
+	if (sg_policy->next_freq == next_freq)
+		return;
+
+	if (sg_policy->next_freq > next_freq)
+		next_freq = (sg_policy->next_freq + next_freq) >> 1;
+
+	sg_policy->next_freq = next_freq;
 	sg_policy->last_freq_update_time = time;
 
 	if (policy->fast_switch_enabled) {
-		if (sg_policy->next_freq == next_freq) {
-			trace_cpu_frequency(policy->cur, smp_processor_id());
-			return;
-		}
-		sg_policy->next_freq = next_freq;
 		next_freq = cpufreq_driver_fast_switch(policy, next_freq);
 		if (next_freq == CPUFREQ_ENTRY_INVALID)
 			return;
 
 		policy->cur = next_freq;
 		trace_cpu_frequency(next_freq, smp_processor_id());
-	} else if (sg_policy->next_freq != next_freq) {
-		sg_policy->next_freq = next_freq;
+	} else {
 		sg_policy->work_in_progress = true;
 		irq_work_queue(&sg_policy->irq_work);
 	}
@@ -192,6 +198,19 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
 	sg_cpu->iowait_boost >>= 1;
 }
 
+#ifdef CONFIG_NO_HZ_COMMON
+static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
+{
+	unsigned long idle_calls = tick_nohz_get_idle_calls();
+	bool ret = idle_calls == sg_cpu->saved_idle_calls;
+
+	sg_cpu->saved_idle_calls = idle_calls;
+	return ret;
+}
+#else
+static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
+#endif /* CONFIG_NO_HZ_COMMON */
+
 static void sugov_update_single(struct update_util_data *hook, u64 time,
 				unsigned int flags)
 {
@@ -200,6 +219,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	struct cpufreq_policy *policy = sg_policy->policy;
 	unsigned long util, max;
 	unsigned int next_f;
+	bool busy;
 
 	sugov_set_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;
@@ -207,40 +227,37 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	if (!sugov_should_update_freq(sg_policy, time))
 		return;
 
+	busy = sugov_cpu_is_busy(sg_cpu);
+
 	if (flags & SCHED_CPUFREQ_RT_DL) {
 		next_f = policy->cpuinfo.max_freq;
 	} else {
 		sugov_get_util(&util, &max);
 		sugov_iowait_boost(sg_cpu, &util, &max);
 		next_f = get_next_freq(sg_policy, util, max);
+		/*
+		 * Do not reduce the frequency if the CPU has not been idle
+		 * recently, as the reduction is likely to be premature then.
+		 */
+		if (busy && next_f < sg_policy->next_freq)
+			next_f = sg_policy->next_freq;
 	}
 	sugov_update_commit(sg_policy, time, next_f);
 }
 
-static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
-					   unsigned long util, unsigned long max,
-					   unsigned int flags)
+static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
-	unsigned int max_f = policy->cpuinfo.max_freq;
 	u64 last_freq_update_time = sg_policy->last_freq_update_time;
+	unsigned long util = 0, max = 1;
 	unsigned int j;
 
-	if (flags & SCHED_CPUFREQ_RT_DL)
-		return max_f;
-
-	sugov_iowait_boost(sg_cpu, &util, &max);
-
 	for_each_cpu(j, policy->cpus) {
-		struct sugov_cpu *j_sg_cpu;
+		struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
 		unsigned long j_util, j_max;
 		s64 delta_ns;
 
-		if (j == smp_processor_id())
-			continue;
-
-		j_sg_cpu = &per_cpu(sugov_cpu, j);
 		/*
 		 * If the CPU utilization was last updated before the previous
 		 * frequency update and the time elapsed between the last update
@@ -254,7 +271,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
 			continue;
 		}
 		if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
-			return max_f;
+			return policy->cpuinfo.max_freq;
 
 		j_util = j_sg_cpu->util;
 		j_max = j_sg_cpu->max;
@@ -289,7 +306,11 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
 	sg_cpu->last_update = time;
 
 	if (sugov_should_update_freq(sg_policy, time)) {
-		next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
+		if (flags & SCHED_CPUFREQ_RT_DL)
+			next_f = sg_policy->policy->cpuinfo.max_freq;
+		else
+			next_f = sugov_next_freq_shared(sg_cpu);
+
 		sugov_update_commit(sg_policy, time, next_f);
 	}
 
@@ -473,7 +494,6 @@ static int sugov_init(struct cpufreq_policy *policy)
 {
 	struct sugov_policy *sg_policy;
 	struct sugov_tunables *tunables;
-	unsigned int lat;
 	int ret = 0;
 
 	/* State should be equivalent to EXIT */
@@ -512,10 +532,16 @@ static int sugov_init(struct cpufreq_policy *policy)
 		goto stop_kthread;
 	}
 
-	tunables->rate_limit_us = LATENCY_MULTIPLIER;
-	lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
-	if (lat)
-		tunables->rate_limit_us *= lat;
+	if (policy->transition_delay_us) {
+		tunables->rate_limit_us = policy->transition_delay_us;
+	} else {
+		unsigned int lat;
+
+		tunables->rate_limit_us = LATENCY_MULTIPLIER;
+		lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
+		if (lat)
+			tunables->rate_limit_us *= lat;
+	}
 
 	policy->governor_data = sg_policy;
 	sg_policy->tunables = tunables;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7fe53be86077..64c97fc130c4 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -993,6 +993,18 @@ ktime_t tick_nohz_get_sleep_length(void)
 	return ts->sleep_length;
 }
 
+/**
+ * tick_nohz_get_idle_calls - return the current idle calls counter value
+ *
+ * Called from the schedutil frequency scaling governor in scheduler context.
+ */
+unsigned long tick_nohz_get_idle_calls(void)
+{
+	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+	return ts->idle_calls;
+}
+
 static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 {
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile
new file mode 100644
index 000000000000..4d0ccc89e6c6
--- /dev/null
+++ b/tools/power/pm-graph/Makefile
@@ -0,0 +1,28 @@
+PREFIX		?= /usr
+DESTDIR		?=
+
+all:
+	@echo "Nothing to build"
+
+install :
+	install -d  $(DESTDIR)$(PREFIX)/lib/pm-graph
+	install analyze_suspend.py $(DESTDIR)$(PREFIX)/lib/pm-graph
+	install analyze_boot.py $(DESTDIR)$(PREFIX)/lib/pm-graph
+
+	ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_boot.py $(DESTDIR)$(PREFIX)/bin/bootgraph
+	ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_suspend.py $(DESTDIR)$(PREFIX)/bin/sleepgraph
+
+	install -d  $(DESTDIR)$(PREFIX)/share/man/man8
+	install bootgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
+	install sleepgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
+
+uninstall :
+	rm $(DESTDIR)$(PREFIX)/share/man/man8/bootgraph.8
+	rm $(DESTDIR)$(PREFIX)/share/man/man8/sleepgraph.8
+
+	rm $(DESTDIR)$(PREFIX)/bin/bootgraph
+	rm $(DESTDIR)$(PREFIX)/bin/sleepgraph
+
+	rm $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_boot.py
+	rm $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_suspend.py
+	rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph
diff --git a/tools/power/pm-graph/analyze_boot.py b/tools/power/pm-graph/analyze_boot.py
new file mode 100755
index 000000000000..3e1dcbbf1adc
--- /dev/null
+++ b/tools/power/pm-graph/analyze_boot.py
@@ -0,0 +1,824 @@
+#!/usr/bin/python
+#
+# Tool for analyzing boot timing
+# Copyright (c) 2013, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# Authors:
+#	 Todd Brandt <todd.e.brandt@linux.intel.com>
+#
+# Description:
+#	 This tool is designed to assist kernel and OS developers in optimizing
+#	 their linux stack's boot time. It creates an html representation of
+#	 the kernel boot timeline up to the start of the init process.
+#
+
+# ----------------- LIBRARIES --------------------
+
+import sys
+import time
+import os
+import string
+import re
+import platform
+import shutil
+from datetime import datetime, timedelta
+from subprocess import call, Popen, PIPE
+import analyze_suspend as aslib
+
+# ----------------- CLASSES --------------------
+
+# Class: SystemValues
+# Description:
+#	 A global, single-instance container used to
+#	 store system values and test parameters
+class SystemValues(aslib.SystemValues):
+	title = 'BootGraph'
+	version = 2.0
+	hostname = 'localhost'
+	testtime = ''
+	kernel = ''
+	dmesgfile = ''
+	ftracefile = ''
+	htmlfile = 'bootgraph.html'
+	outfile = ''
+	phoronix = False
+	addlogs = False
+	useftrace = False
+	usedevsrc = True
+	suspendmode = 'boot'
+	max_graph_depth = 2
+	graph_filter = 'do_one_initcall'
+	reboot = False
+	manual = False
+	iscronjob = False
+	timeformat = '%.6f'
+	def __init__(self):
+		if('LOG_FILE' in os.environ and 'TEST_RESULTS_IDENTIFIER' in os.environ):
+			self.phoronix = True
+			self.addlogs = True
+			self.outfile = os.environ['LOG_FILE']
+			self.htmlfile = os.environ['LOG_FILE']
+		self.hostname = platform.node()
+		self.testtime = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
+		if os.path.exists('/proc/version'):
+			fp = open('/proc/version', 'r')
+			val = fp.read().strip()
+			fp.close()
+			self.kernel = self.kernelVersion(val)
+		else:
+			self.kernel = 'unknown'
+	def kernelVersion(self, msg):
+		return msg.split()[2]
+	def kernelParams(self):
+		cmdline = 'initcall_debug log_buf_len=32M'
+		if self.useftrace:
+			cmdline += ' trace_buf_size=128M trace_clock=global '\
+			'trace_options=nooverwrite,funcgraph-abstime,funcgraph-cpu,'\
+			'funcgraph-duration,funcgraph-proc,funcgraph-tail,'\
+			'nofuncgraph-overhead,context-info,graph-time '\
+			'ftrace=function_graph '\
+			'ftrace_graph_max_depth=%d '\
+			'ftrace_graph_filter=%s' % \
+				(self.max_graph_depth, self.graph_filter)
+		return cmdline
+	def setGraphFilter(self, val):
+		fp = open(self.tpath+'available_filter_functions')
+		master = fp.read().split('\n')
+		fp.close()
+		for i in val.split(','):
+			func = i.strip()
+			if func not in master:
+				doError('function "%s" not available for ftrace' % func)
+		self.graph_filter = val
+	def cronjobCmdString(self):
+		cmdline = '%s -cronjob' % os.path.abspath(sys.argv[0])
+		args = iter(sys.argv[1:])
+		for arg in args:
+			if arg in ['-h', '-v', '-cronjob', '-reboot']:
+				continue
+			elif arg in ['-o', '-dmesg', '-ftrace', '-filter']:
+				args.next()
+				continue
+			cmdline += ' '+arg
+		if self.graph_filter != 'do_one_initcall':
+			cmdline += ' -filter "%s"' % self.graph_filter
+		cmdline += ' -o "%s"' % os.path.abspath(self.htmlfile)
+		return cmdline
+	def manualRebootRequired(self):
+		cmdline = self.kernelParams()
+		print 'To generate a new timeline manually, follow these steps:\n'
+		print '1. Add the CMDLINE string to your kernel command line.'
+		print '2. Reboot the system.'
+		print '3. After reboot, re-run this tool with the same arguments but no command (w/o -reboot or -manual).\n'
+		print 'CMDLINE="%s"' % cmdline
+		sys.exit()
+
+sysvals = SystemValues()
+
+# Class: Data
+# Description:
+#	 The primary container for test data.
+class Data(aslib.Data):
+	dmesg = {}  # root data structure
+	start = 0.0 # test start
+	end = 0.0   # test end
+	dmesgtext = []   # dmesg text file in memory
+	testnumber = 0
+	idstr = ''
+	html_device_id = 0
+	valid = False
+	initstart = 0.0
+	boottime = ''
+	phases = ['boot']
+	do_one_initcall = False
+	def __init__(self, num):
+		self.testnumber = num
+		self.idstr = 'a'
+		self.dmesgtext = []
+		self.dmesg = {
+			'boot': {'list': dict(), 'start': -1.0, 'end': -1.0, 'row': 0, 'color': '#dddddd'}
+		}
+	def deviceTopology(self):
+		return ''
+	def newAction(self, phase, name, start, end, ret, ulen):
+		# new device callback for a specific phase
+		self.html_device_id += 1
+		devid = '%s%d' % (self.idstr, self.html_device_id)
+		list = self.dmesg[phase]['list']
+		length = -1.0
+		if(start >= 0 and end >= 0):
+			length = end - start
+		i = 2
+		origname = name
+		while(name in list):
+			name = '%s[%d]' % (origname, i)
+			i += 1
+		list[name] = {'name': name, 'start': start, 'end': end,
+			'pid': 0, 'length': length, 'row': 0, 'id': devid,
+			'ret': ret, 'ulen': ulen }
+		return name
+	def deviceMatch(self, cg):
+		if cg.end - cg.start == 0:
+			return True
+		list = self.dmesg['boot']['list']
+		for devname in list:
+			dev = list[devname]
+			if cg.name == 'do_one_initcall':
+				if(cg.start <= dev['start'] and cg.end >= dev['end'] and dev['length'] > 0):
+					dev['ftrace'] = cg
+					self.do_one_initcall = True
+					return True
+			else:
+				if(cg.start > dev['start'] and cg.end < dev['end']):
+					if 'ftraces' not in dev:
+						dev['ftraces'] = []
+					dev['ftraces'].append(cg)
+					return True
+		return False
+
+# ----------------- FUNCTIONS --------------------
+
+# Function: loadKernelLog
+# Description:
+#	 Load a raw kernel log from dmesg
+def loadKernelLog():
+	data = Data(0)
+	data.dmesg['boot']['start'] = data.start = ktime = 0.0
+	sysvals.stamp = {
+		'time': datetime.now().strftime('%B %d %Y, %I:%M:%S %p'),
+		'host': sysvals.hostname,
+		'mode': 'boot', 'kernel': ''}
+
+	devtemp = dict()
+	if(sysvals.dmesgfile):
+		lf = open(sysvals.dmesgfile, 'r')
+	else:
+		lf = Popen('dmesg', stdout=PIPE).stdout
+	for line in lf:
+		line = line.replace('\r\n', '')
+		idx = line.find('[')
+		if idx > 1:
+			line = line[idx:]
+		m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
+		if(not m):
+			continue
+		ktime = float(m.group('ktime'))
+		if(ktime > 120):
+			break
+		msg = m.group('msg')
+		data.end = data.initstart = ktime
+		data.dmesgtext.append(line)
+		if(ktime == 0.0 and re.match('^Linux version .*', msg)):
+			if(not sysvals.stamp['kernel']):
+				sysvals.stamp['kernel'] = sysvals.kernelVersion(msg)
+			continue
+		m = re.match('.* setting system clock to (?P<t>.*) UTC.*', msg)
+		if(m):
+			bt = datetime.strptime(m.group('t'), '%Y-%m-%d %H:%M:%S')
+			bt = bt - timedelta(seconds=int(ktime))
+			data.boottime = bt.strftime('%Y-%m-%d_%H:%M:%S')
+			sysvals.stamp['time'] = bt.strftime('%B %d %Y, %I:%M:%S %p')
+			continue
+		m = re.match('^calling *(?P<f>.*)\+.*', msg)
+		if(m):
+			devtemp[m.group('f')] = ktime
+			continue
+		m = re.match('^initcall *(?P<f>.*)\+.* returned (?P<r>.*) after (?P<t>.*) usecs', msg)
+		if(m):
+			data.valid = True
+			f, r, t = m.group('f', 'r', 't')
+			if(f in devtemp):
+				data.newAction('boot', f, devtemp[f], ktime, int(r), int(t))
+				data.end = ktime
+				del devtemp[f]
+			continue
+		if(re.match('^Freeing unused kernel memory.*', msg)):
+			break
+
+	data.dmesg['boot']['end'] = data.end
+	lf.close()
+	return data
+
+# Function: loadTraceLog
+# Description:
+#	 Check if trace is available and copy to a temp file
+def loadTraceLog(data):
+	# load the data to a temp file if none given
+	if not sysvals.ftracefile:
+		lib = aslib.sysvals
+		aslib.rootCheck(True)
+		if not lib.verifyFtrace():
+			doError('ftrace not available')
+		if lib.fgetVal('current_tracer').strip() != 'function_graph':
+			doError('ftrace not configured for a boot callgraph')
+		sysvals.ftracefile = '/tmp/boot_ftrace.%s.txt' % os.getpid()
+		call('cat '+lib.tpath+'trace > '+sysvals.ftracefile, shell=True)
+	if not sysvals.ftracefile:
+		doError('No trace data available')
+
+	# parse the trace log
+	ftemp = dict()
+	tp = aslib.TestProps()
+	tp.setTracerType('function_graph')
+	tf = open(sysvals.ftracefile, 'r')
+	for line in tf:
+		if line[0] == '#':
+			continue
+		m = re.match(tp.ftrace_line_fmt, line.strip())
+		if(not m):
+			continue
+		m_time, m_proc, m_pid, m_msg, m_dur = \
+			m.group('time', 'proc', 'pid', 'msg', 'dur')
+		if float(m_time) > data.end:
+			break
+		if(m_time and m_pid and m_msg):
+			t = aslib.FTraceLine(m_time, m_msg, m_dur)
+			pid = int(m_pid)
+		else:
+			continue
+		if t.fevent or t.fkprobe:
+			continue
+		key = (m_proc, pid)
+		if(key not in ftemp):
+			ftemp[key] = []
+			ftemp[key].append(aslib.FTraceCallGraph(pid))
+		cg = ftemp[key][-1]
+		if(cg.addLine(t)):
+			ftemp[key].append(aslib.FTraceCallGraph(pid))
+	tf.close()
+
+	# add the callgraph data to the device hierarchy
+	for key in ftemp:
+		proc, pid = key
+		for cg in ftemp[key]:
+			if len(cg.list) < 1 or cg.invalid:
+				continue
+			if(not cg.postProcess()):
+				print('Sanity check failed for %s-%d' % (proc, pid))
+				continue
+			# match cg data to devices
+			if not data.deviceMatch(cg):
+				print ' BAD: %s %s-%d [%f - %f]' % (cg.name, proc, pid, cg.start, cg.end)
+
+# Function: colorForName
+# Description:
+#	 Generate a repeatable color from a list for a given name
+def colorForName(name):
+	list = [
+		('c1', '#ec9999'),
+		('c2', '#ffc1a6'),
+		('c3', '#fff0a6'),
+		('c4', '#adf199'),
+		('c5', '#9fadea'),
+		('c6', '#a699c1'),
+		('c7', '#ad99b4'),
+		('c8', '#eaffea'),
+		('c9', '#dcecfb'),
+		('c10', '#ffffea')
+	]
+	i = 0
+	total = 0
+	count = len(list)
+	while i < len(name):
+		total += ord(name[i])
+		i += 1
+	return list[total % count]
+
+def cgOverview(cg, minlen):
+	stats = dict()
+	large = []
+	for l in cg.list:
+		if l.fcall and l.depth == 1:
+			if l.length >= minlen:
+				large.append(l)
+			if l.name not in stats:
+				stats[l.name] = [0, 0.0]
+			stats[l.name][0] += (l.length * 1000.0)
+			stats[l.name][1] += 1
+	return (large, stats)
+
+# Function: createBootGraph
+# Description:
+#	 Create the output html file from the resident test data
+# Arguments:
+#	 testruns: array of Data objects from parseKernelLog or parseTraceLog
+# Output:
+#	 True if the html file was created, false if it failed
+def createBootGraph(data, embedded):
+	# html function templates
+	html_srccall = '<div id={6} title="{5}" class="srccall" style="left:{1}%;top:{2}px;height:{3}px;width:{4}%;line-height:{3}px;">{0}</div>\n'
+	html_timetotal = '<table class="time1">\n<tr>'\
+		'<td class="blue">Time from Kernel Boot to start of User Mode: <b>{0} ms</b></td>'\
+		'</tr>\n</table>\n'
+
+	# device timeline
+	devtl = aslib.Timeline(100, 20)
+
+	# write the test title and general info header
+	devtl.createHeader(sysvals, 'noftrace')
+
+	# Generate the header for this timeline
+	t0 = data.start
+	tMax = data.end
+	tTotal = tMax - t0
+	if(tTotal == 0):
+		print('ERROR: No timeline data')
+		return False
+	boot_time = '%.0f'%(tTotal*1000)
+	devtl.html += html_timetotal.format(boot_time)
+
+	# determine the maximum number of rows we need to draw
+	phase = 'boot'
+	list = data.dmesg[phase]['list']
+	devlist = []
+	for devname in list:
+		d = aslib.DevItem(0, phase, list[devname])
+		devlist.append(d)
+	devtl.getPhaseRows(devlist)
+	devtl.calcTotalRows()
+
+	# draw the timeline background
+	devtl.createZoomBox()
+	boot = data.dmesg[phase]
+	length = boot['end']-boot['start']
+	left = '%.3f' % (((boot['start']-t0)*100.0)/tTotal)
+	width = '%.3f' % ((length*100.0)/tTotal)
+	devtl.html += devtl.html_tblock.format(phase, left, width, devtl.scaleH)
+	devtl.html += devtl.html_phase.format('0', '100', \
+		'%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \
+		'white', '')
+
+	# draw the device timeline
+	num = 0
+	devstats = dict()
+	for devname in sorted(list):
+		cls, color = colorForName(devname)
+		dev = list[devname]
+		info = '@|%.3f|%.3f|%.3f|%d' % (dev['start']*1000.0, dev['end']*1000.0,
+			dev['ulen']/1000.0, dev['ret'])
+		devstats[dev['id']] = {'info':info}
+		dev['color'] = color
+		height = devtl.phaseRowHeight(0, phase, dev['row'])
+		top = '%.6f' % ((dev['row']*height) + devtl.scaleH)
+		left = '%.6f' % (((dev['start']-t0)*100)/tTotal)
+		width = '%.6f' % (((dev['end']-dev['start'])*100)/tTotal)
+		length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000)
+		devtl.html += devtl.html_device.format(dev['id'],
+			devname+length+'kernel_mode', left, top, '%.3f'%height,
+			width, devname, ' '+cls, '')
+		rowtop = devtl.phaseRowTop(0, phase, dev['row'])
+		height = '%.6f' % (devtl.rowH / 2)
+		top = '%.6f' % (rowtop + devtl.scaleH + (devtl.rowH / 2))
+		if data.do_one_initcall:
+			if('ftrace' not in dev):
+				continue
+			cg = dev['ftrace']
+			large, stats = cgOverview(cg, 0.001)
+			devstats[dev['id']]['fstat'] = stats
+			for l in large:
+				left = '%f' % (((l.time-t0)*100)/tTotal)
+				width = '%f' % (l.length*100/tTotal)
+				title = '%s (%0.3fms)' % (l.name, l.length * 1000.0)
+				devtl.html += html_srccall.format(l.name, left,
+					top, height, width, title, 'x%d'%num)
+				num += 1
+			continue
+		if('ftraces' not in dev):
+			continue
+		for cg in dev['ftraces']:
+			left = '%f' % (((cg.start-t0)*100)/tTotal)
+			width = '%f' % ((cg.end-cg.start)*100/tTotal)
+			cglen = (cg.end - cg.start) * 1000.0
+			title = '%s (%0.3fms)' % (cg.name, cglen)
+			cg.id = 'x%d' % num
+			devtl.html += html_srccall.format(cg.name, left,
+				top, height, width, title, dev['id']+cg.id)
+			num += 1
+
+	# draw the time scale, try to make the number of labels readable
+	devtl.createTimeScale(t0, tMax, tTotal, phase)
+	devtl.html += '</div>\n'
+
+	# timeline is finished
+	devtl.html += '</div>\n</div>\n'
+
+	if(sysvals.outfile == sysvals.htmlfile):
+		hf = open(sysvals.htmlfile, 'a')
+	else:
+		hf = open(sysvals.htmlfile, 'w')
+
+	# add the css if this is not an embedded run
+	extra = '\
+		.c1 {background:rgba(209,0,0,0.4);}\n\
+		.c2 {background:rgba(255,102,34,0.4);}\n\
+		.c3 {background:rgba(255,218,33,0.4);}\n\
+		.c4 {background:rgba(51,221,0,0.4);}\n\
+		.c5 {background:rgba(17,51,204,0.4);}\n\
+		.c6 {background:rgba(34,0,102,0.4);}\n\
+		.c7 {background:rgba(51,0,68,0.4);}\n\
+		.c8 {background:rgba(204,255,204,0.4);}\n\
+		.c9 {background:rgba(169,208,245,0.4);}\n\
+		.c10 {background:rgba(255,255,204,0.4);}\n\
+		.vt {transform:rotate(-60deg);transform-origin:0 0;}\n\
+		table.fstat {table-layout:fixed;padding:150px 15px 0 0;font-size:10px;column-width:30px;}\n\
+		.fstat th {width:55px;}\n\
+		.fstat td {text-align:left;width:35px;}\n\
+		.srccall {position:absolute;font-size:10px;z-index:7;overflow:hidden;color:black;text-align:center;white-space:nowrap;border-radius:5px;border:1px solid black;background:linear-gradient(to bottom right,#CCC,#969696);}\n\
+		.srccall:hover {color:white;font-weight:bold;border:1px solid white;}\n'
+	if(not embedded):
+		aslib.addCSS(hf, sysvals, 1, False, extra)
+
+	# write the device timeline
+	hf.write(devtl.html)
+
+	# add boot specific html
+	statinfo = 'var devstats = {\n'
+	for n in sorted(devstats):
+		statinfo += '\t"%s": [\n\t\t"%s",\n' % (n, devstats[n]['info'])
+		if 'fstat' in devstats[n]:
+			funcs = devstats[n]['fstat']
+			for f in sorted(funcs, key=funcs.get, reverse=True):
+				if funcs[f][0] < 0.01 and len(funcs) > 10:
+					break
+				statinfo += '\t\t"%f|%s|%d",\n' % (funcs[f][0], f, funcs[f][1])
+		statinfo += '\t],\n'
+	statinfo += '};\n'
+	html = \
+		'<div id="devicedetailtitle"></div>\n'\
+		'<div id="devicedetail" style="display:none;">\n'\
+		'<div id="devicedetail0">\n'\
+		'<div id="kernel_mode" class="phaselet" style="left:0%;width:100%;background:#DDDDDD"></div>\n'\
+		'</div>\n</div>\n'\
+		'<script type="text/javascript">\n'+statinfo+\
+		'</script>\n'
+	hf.write(html)
+
+	# add the callgraph html
+	if(sysvals.usecallgraph):
+		aslib.addCallgraphs(sysvals, hf, data)
+
+	# add the dmesg log as a hidden div
+	if sysvals.addlogs:
+		hf.write('<div id="dmesglog" style="display:none;">\n')
+		for line in data.dmesgtext:
+			line = line.replace('<', '&lt').replace('>', '&gt')
+			hf.write(line)
+		hf.write('</div>\n')
+
+	if(not embedded):
+		# write the footer and close
+		aslib.addScriptCode(hf, [data])
+		hf.write('</body>\n</html>\n')
+	else:
+		# embedded out will be loaded in a page, skip the js
+		hf.write('<div id=bounds style=display:none>%f,%f</div>' % \
+			(data.start*1000, data.initstart*1000))
+	hf.close()
+	return True
+
+# Function: updateCron
+# Description:
+#    (restore=False) Set the tool to run automatically on reboot
+#    (restore=True) Restore the original crontab
+def updateCron(restore=False):
+	if not restore:
+		sysvals.rootUser(True)
+	crondir = '/var/spool/cron/crontabs/'
+	cronfile = crondir+'root'
+	backfile = crondir+'root-analyze_boot-backup'
+	if not os.path.exists(crondir):
+		doError('%s not found' % crondir)
+	out = Popen(['which', 'crontab'], stdout=PIPE).stdout.read()
+	if not out:
+		doError('crontab not found')
+	# on restore: move the backup cron back into place
+	if restore:
+		if os.path.exists(backfile):
+			shutil.move(backfile, cronfile)
+		return
+	# backup current cron and install new one with reboot
+	if os.path.exists(cronfile):
+		shutil.move(cronfile, backfile)
+	else:
+		fp = open(backfile, 'w')
+		fp.close()
+	res = -1
+	try:
+		fp = open(backfile, 'r')
+		op = open(cronfile, 'w')
+		for line in fp:
+			if '@reboot' not in line:
+				op.write(line)
+				continue
+		fp.close()
+		op.write('@reboot python %s\n' % sysvals.cronjobCmdString())
+		op.close()
+		res = call('crontab %s' % cronfile, shell=True)
+	except Exception, e:
+		print 'Exception: %s' % str(e)
+		shutil.move(backfile, cronfile)
+		res = -1
+	if res != 0:
+		doError('crontab failed')
+
+# Function: updateGrub
+# Description:
+#	 update grub.cfg for all kernels with our parameters
+def updateGrub(restore=False):
+	# call update-grub on restore
+	if restore:
+		try:
+			call(['update-grub'], stderr=PIPE, stdout=PIPE,
+				env={'PATH': '.:/sbin:/usr/sbin:/usr/bin:/sbin:/bin'})
+		except Exception, e:
+			print 'Exception: %s\n' % str(e)
+		return
+	# verify we can do this
+	sysvals.rootUser(True)
+	grubfile = '/etc/default/grub'
+	if not os.path.exists(grubfile):
+		print 'ERROR: Unable to set the kernel parameters via grub.\n'
+		sysvals.manualRebootRequired()
+	out = Popen(['which', 'update-grub'], stdout=PIPE).stdout.read()
+	if not out:
+		print 'ERROR: Unable to set the kernel parameters via grub.\n'
+		sysvals.manualRebootRequired()
+
+	# extract the option and create a grub config without it
+	tgtopt = 'GRUB_CMDLINE_LINUX_DEFAULT'
+	cmdline = ''
+	tempfile = '/etc/default/grub.analyze_boot'
+	shutil.move(grubfile, tempfile)
+	res = -1
+	try:
+		fp = open(tempfile, 'r')
+		op = open(grubfile, 'w')
+		cont = False
+		for line in fp:
+			line = line.strip()
+			if len(line) == 0 or line[0] == '#':
+				continue
+			opt = line.split('=')[0].strip()
+			if opt == tgtopt:
+				cmdline = line.split('=', 1)[1].strip('\\')
+				if line[-1] == '\\':
+					cont = True
+			elif cont:
+				cmdline += line.strip('\\')
+				if line[-1] != '\\':
+					cont = False
+			else:
+				op.write('%s\n' % line)
+		fp.close()
+		# if the target option value is in quotes, strip them
+		sp = '"'
+		val = cmdline.strip()
+		if val[0] == '\'' or val[0] == '"':
+			sp = val[0]
+			val = val.strip(sp)
+		cmdline = val
+		# append our cmd line options
+		if len(cmdline) > 0:
+			cmdline += ' '
+		cmdline += sysvals.kernelParams()
+		# write out the updated target option
+		op.write('\n%s=%s%s%s\n' % (tgtopt, sp, cmdline, sp))
+		op.close()
+		res = call('update-grub')
+		os.remove(grubfile)
+	except Exception, e:
+		print 'Exception: %s' % str(e)
+		res = -1
+	# cleanup
+	shutil.move(tempfile, grubfile)
+	if res != 0:
+		doError('update-grub failed')
+
+# Function: doError
+# Description:
+#	 generic error function for catastrphic failures
+# Arguments:
+#	 msg: the error message to print
+#	 help: True if printHelp should be called after, False otherwise
+def doError(msg, help=False):
+	if help == True:
+		printHelp()
+	print 'ERROR: %s\n' % msg
+	sys.exit()
+
+# Function: printHelp
+# Description:
+#	 print out the help text
+def printHelp():
+	print('')
+	print('%s v%.1f' % (sysvals.title, sysvals.version))
+	print('Usage: bootgraph <options> <command>')
+	print('')
+	print('Description:')
+	print('  This tool reads in a dmesg log of linux kernel boot and')
+	print('  creates an html representation of the boot timeline up to')
+	print('  the start of the init process.')
+	print('')
+	print('  If no specific command is given the tool reads the current dmesg')
+	print('  and/or ftrace log and outputs bootgraph.html')
+	print('')
+	print('Options:')
+	print('  -h            Print this help text')
+	print('  -v            Print the current tool version')
+	print('  -addlogs      Add the dmesg log to the html output')
+	print('  -o file       Html timeline name (default: bootgraph.html)')
+	print(' [advanced]')
+	print('  -f            Use ftrace to add function detail (default: disabled)')
+	print('  -callgraph    Add callgraph detail, can be very large (default: disabled)')
+	print('  -maxdepth N   limit the callgraph data to N call levels (default: 2)')
+	print('  -mincg ms     Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
+	print('  -timeprec N   Number of significant digits in timestamps (0:S, 3:ms, [6:us])')
+	print('  -expandcg     pre-expand the callgraph data in the html output (default: disabled)')
+	print('  -filter list  Limit ftrace to comma-delimited list of functions (default: do_one_initcall)')
+	print(' [commands]')
+	print('  -reboot       Reboot the machine automatically and generate a new timeline')
+	print('  -manual       Show the requirements to generate a new timeline manually')
+	print('  -dmesg file   Load a stored dmesg file (used with -ftrace)')
+	print('  -ftrace file  Load a stored ftrace file (used with -dmesg)')
+	print('  -flistall     Print all functions capable of being captured in ftrace')
+	print('')
+	return True
+
+# ----------------- MAIN --------------------
+# exec start (skipped if script is loaded as library)
+if __name__ == '__main__':
+	# loop through the command line arguments
+	cmd = ''
+	simplecmds = ['-updategrub', '-flistall']
+	args = iter(sys.argv[1:])
+	for arg in args:
+		if(arg == '-h'):
+			printHelp()
+			sys.exit()
+		elif(arg == '-v'):
+			print("Version %.1f" % sysvals.version)
+			sys.exit()
+		elif(arg in simplecmds):
+			cmd = arg[1:]
+		elif(arg == '-f'):
+			sysvals.useftrace = True
+		elif(arg == '-callgraph'):
+			sysvals.useftrace = True
+			sysvals.usecallgraph = True
+		elif(arg == '-mincg'):
+			sysvals.mincglen = aslib.getArgFloat('-mincg', args, 0.0, 10000.0)
+		elif(arg == '-timeprec'):
+			sysvals.setPrecision(aslib.getArgInt('-timeprec', args, 0, 6))
+		elif(arg == '-maxdepth'):
+			sysvals.max_graph_depth = aslib.getArgInt('-maxdepth', args, 0, 1000)
+		elif(arg == '-filter'):
+			try:
+				val = args.next()
+			except:
+				doError('No filter functions supplied', True)
+			aslib.rootCheck(True)
+			sysvals.setGraphFilter(val)
+		elif(arg == '-ftrace'):
+			try:
+				val = args.next()
+			except:
+				doError('No ftrace file supplied', True)
+			if(os.path.exists(val) == False):
+				doError('%s does not exist' % val)
+			sysvals.ftracefile = val
+		elif(arg == '-addlogs'):
+			sysvals.addlogs = True
+		elif(arg == '-expandcg'):
+			sysvals.cgexp = True
+		elif(arg == '-dmesg'):
+			try:
+				val = args.next()
+			except:
+				doError('No dmesg file supplied', True)
+			if(os.path.exists(val) == False):
+				doError('%s does not exist' % val)
+			if(sysvals.htmlfile == val or sysvals.outfile == val):
+				doError('Output filename collision')
+			sysvals.dmesgfile = val
+		elif(arg == '-o'):
+			try:
+				val = args.next()
+			except:
+				doError('No HTML filename supplied', True)
+			if(sysvals.dmesgfile == val or sysvals.ftracefile == val):
+				doError('Output filename collision')
+			sysvals.htmlfile = val
+		elif(arg == '-reboot'):
+			if sysvals.iscronjob:
+				doError('-reboot and -cronjob are incompatible')
+			sysvals.reboot = True
+		elif(arg == '-manual'):
+			sysvals.reboot = True
+			sysvals.manual = True
+		# remaining options are only for cron job use
+		elif(arg == '-cronjob'):
+			sysvals.iscronjob = True
+			if sysvals.reboot:
+				doError('-reboot and -cronjob are incompatible')
+		else:
+			doError('Invalid argument: '+arg, True)
+
+	if cmd != '':
+		if cmd == 'updategrub':
+			updateGrub()
+		elif cmd == 'flistall':
+			sysvals.getFtraceFilterFunctions(False)
+		sys.exit()
+
+	# update grub, setup a cronjob, and reboot
+	if sysvals.reboot:
+		if not sysvals.manual:
+			updateGrub()
+			updateCron()
+			call('reboot')
+		else:
+			sysvals.manualRebootRequired()
+		sys.exit()
+
+	# disable the cronjob
+	if sysvals.iscronjob:
+		updateCron(True)
+		updateGrub(True)
+
+	data = loadKernelLog()
+	if sysvals.useftrace:
+		loadTraceLog(data)
+		if sysvals.iscronjob:
+			try:
+				sysvals.fsetVal('0', 'tracing_on')
+			except:
+				pass
+
+	if(sysvals.outfile and sysvals.phoronix):
+		fp = open(sysvals.outfile, 'w')
+		fp.write('pass %s initstart %.3f end %.3f boot %s\n' %
+			(data.valid, data.initstart*1000, data.end*1000, data.boottime))
+		fp.close()
+	if(not data.valid):
+		if sysvals.dmesgfile:
+			doError('No initcall data found in %s' % sysvals.dmesgfile)
+		else:
+			doError('No initcall data found, is initcall_debug enabled?')
+
+	print('          Host: %s' % sysvals.hostname)
+	print('     Test time: %s' % sysvals.testtime)
+	print('     Boot time: %s' % data.boottime)
+	print('Kernel Version: %s' % sysvals.kernel)
+	print('  Kernel start: %.3f' % (data.start * 1000))
+	print('    init start: %.3f' % (data.initstart * 1000))
+
+	createBootGraph(data, sysvals.phoronix)
diff --git a/scripts/analyze_suspend.py b/tools/power/pm-graph/analyze_suspend.py
similarity index 91%
rename from scripts/analyze_suspend.py
rename to tools/power/pm-graph/analyze_suspend.py
index 20cdb2bc1dae..a9206e67fc1f 100755
--- a/scripts/analyze_suspend.py
+++ b/tools/power/pm-graph/analyze_suspend.py
@@ -12,10 +12,6 @@
 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 # more details.
 #
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-#
 # Authors:
 #	 Todd Brandt <todd.e.brandt@linux.intel.com>
 #
@@ -23,7 +19,7 @@
 #	 Home Page
 #	   https://01.org/suspendresume
 #	 Source repo
-#	   https://github.com/01org/suspendresume
+#	   https://github.com/01org/pm-graph
 #
 # Description:
 #	 This tool is designed to assist kernel and OS developers in optimizing
@@ -71,14 +67,16 @@ from subprocess import call, Popen, PIPE
 #	 A global, single-instance container used to
 #	 store system values and test parameters
 class SystemValues:
+	title = 'SleepGraph'
+	version = '4.6'
 	ansi = False
-	version = '4.5'
 	verbose = False
 	addlogs = False
 	mindevlen = 0.0
 	mincglen = 0.0
 	cgphase = ''
 	cgtest = -1
+	max_graph_depth = 0
 	callloopmaxgap = 0.0001
 	callloopmaxlen = 0.005
 	srgap = 0
@@ -106,8 +104,8 @@ class SystemValues:
 	ftracefile = ''
 	htmlfile = ''
 	embedded = False
-	rtcwake = False
-	rtcwaketime = 10
+	rtcwake = True
+	rtcwaketime = 15
 	rtcpath = ''
 	devicefilter = []
 	stamp = 0
@@ -235,6 +233,12 @@ class SystemValues:
 			self.rtcpath = rtc
 		if (hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()):
 			self.ansi = True
+	def rootUser(self, fatal=False):
+		if 'USER' in os.environ and os.environ['USER'] == 'root':
+			return True
+		if fatal:
+			doError('This command must be run as root')
+		return False
 	def setPrecision(self, num):
 		if num < 0 or num > 6:
 			return
@@ -564,7 +568,7 @@ class SystemValues:
 		self.fsetVal('global', 'trace_clock')
 		# set trace buffer to a huge value
 		self.fsetVal('nop', 'current_tracer')
-		self.fsetVal('100000', 'buffer_size_kb')
+		self.fsetVal('131073', 'buffer_size_kb')
 		# go no further if this is just a status check
 		if testing:
 			return
@@ -583,7 +587,7 @@ class SystemValues:
 			self.fsetVal('nofuncgraph-overhead', 'trace_options')
 			self.fsetVal('context-info', 'trace_options')
 			self.fsetVal('graph-time', 'trace_options')
-			self.fsetVal('0', 'max_graph_depth')
+			self.fsetVal('%d' % self.max_graph_depth, 'max_graph_depth')
 			cf = ['dpm_run_callback']
 			if(self.usetraceeventsonly):
 				cf += ['dpm_prepare', 'dpm_complete']
@@ -639,6 +643,12 @@ class SystemValues:
 		return '\x1B[%d;40m%s\x1B[m' % (color, str)
 
 sysvals = SystemValues()
+suspendmodename = {
+	'freeze': 'Freeze (S0)',
+	'standby': 'Standby (S1)',
+	'mem': 'Suspend (S3)',
+	'disk': 'Hibernate (S4)'
+}
 
 # Class: DevProps
 # Description:
@@ -1013,6 +1023,8 @@ class Data:
 		tmp = dict()
 		for devname in list:
 			dev = list[devname]
+			if dev['length'] == 0:
+				continue
 			tmp[dev['start']] = devname
 		for t in sorted(tmp):
 			slist.append(tmp[t])
@@ -1477,12 +1489,14 @@ class FTraceLine:
 #	 Each instance is tied to a single device in a single phase, and is
 #	 comprised of an ordered list of FTraceLine objects
 class FTraceCallGraph:
+	id = ''
 	start = -1.0
 	end = -1.0
 	list = []
 	invalid = False
 	depth = 0
 	pid = 0
+	name = ''
 	def __init__(self, pid):
 		self.start = -1.0
 		self.end = -1.0
@@ -1631,9 +1645,17 @@ class FTraceCallGraph:
 				return True
 		return False
 	def postProcess(self, debug=False):
+		if len(self.list) > 0:
+			self.name = self.list[0].name
 		stack = dict()
 		cnt = 0
+		last = 0
 		for l in self.list:
+			# ftrace bug: reported duration is not reliable
+			# check each leaf and clip it at max possible length
+			if(last and last.freturn and last.fcall):
+				if last.length > l.time - last.time:
+					last.length = l.time - last.time
 			if(l.fcall and not l.freturn):
 				stack[l.depth] = l
 				cnt += 1
@@ -1643,11 +1665,12 @@ class FTraceCallGraph:
 						print 'Post Process Error: Depth missing'
 						l.debugPrint()
 					return False
-				# transfer total time from return line to call line
-				stack[l.depth].length = l.length
+				# calculate call length from call/return lines
+				stack[l.depth].length = l.time - stack[l.depth].time
 				stack.pop(l.depth)
 				l.length = 0
 				cnt -= 1
+			last = l
 		if(cnt == 0):
 			# trace caught the whole call tree
 			return True
@@ -1664,8 +1687,8 @@ class FTraceCallGraph:
 			'dpm_prepare': 'suspend_prepare',
 			'dpm_complete': 'resume_complete'
 		}
-		if(self.list[0].name in borderphase):
-			p = borderphase[self.list[0].name]
+		if(self.name in borderphase):
+			p = borderphase[self.name]
 			list = data.dmesg[p]['list']
 			for devname in list:
 				dev = list[devname]
@@ -1690,7 +1713,7 @@ class FTraceCallGraph:
 				break
 		return found
 	def newActionFromFunction(self, data):
-		name = self.list[0].name
+		name = self.name
 		if name in ['dpm_run_callback', 'dpm_prepare', 'dpm_complete']:
 			return
 		fs = self.start
@@ -1710,7 +1733,7 @@ class FTraceCallGraph:
 			phase, myname = out
 			data.dmesg[phase]['list'][myname]['ftrace'] = self
 	def debugPrint(self):
-		print('[%f - %f] %s (%d)') % (self.start, self.end, self.list[0].name, self.pid)
+		print('[%f - %f] %s (%d)') % (self.start, self.end, self.name, self.pid)
 		for l in self.list:
 			if(l.freturn and l.fcall):
 				print('%f (%02d): %s(); (%.3f us)' % (l.time, \
@@ -1738,7 +1761,7 @@ class DevItem:
 #	 A container for a device timeline which calculates
 #	 all the html properties to display it correctly
 class Timeline:
-	html = {}
+	html = ''
 	height = 0	# total timeline height
 	scaleH = 20	# timescale (top) row height
 	rowH = 30	# device row height
@@ -1746,14 +1769,28 @@ class Timeline:
 	rows = 0	# total timeline rows
 	rowlines = dict()
 	rowheight = dict()
+	html_tblock = '<div id="block{0}" class="tblock" style="left:{1}%;width:{2}%;"><div class="tback" style="height:{3}px"></div>\n'
+	html_device = '<div id="{0}" title="{1}" class="thread{7}" style="left:{2}%;top:{3}px;height:{4}px;width:{5}%;{8}">{6}</div>\n'
+	html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}px;height:{3}px;background:{4}">{5}</div>\n'
+	html_phaselet = '<div id="{0}" class="phaselet" style="left:{1}%;width:{2}%;background:{3}"></div>\n'
 	def __init__(self, rowheight, scaleheight):
 		self.rowH = rowheight
 		self.scaleH = scaleheight
-		self.html = {
-			'header': '',
-			'timeline': '',
-			'legend': '',
-		}
+		self.html = ''
+	def createHeader(self, sv, suppress=''):
+		if(not sv.stamp['time']):
+			return
+		self.html += '<div class="version"><a href="https://01.org/suspendresume">%s v%s</a></div>' \
+			% (sv.title, sv.version)
+		if sv.logmsg and 'log' not in suppress:
+			self.html += '<button id="showtest" class="logbtn">log</button>'
+		if sv.addlogs and 'dmesg' not in suppress:
+			self.html += '<button id="showdmesg" class="logbtn">dmesg</button>'
+		if sv.addlogs and sv.ftracefile and 'ftrace' not in suppress:
+			self.html += '<button id="showftrace" class="logbtn">ftrace</button>'
+		headline_stamp = '<div class="stamp">{0} {1} {2} {3}</div>\n'
+		self.html += headline_stamp.format(sv.stamp['host'], sv.stamp['kernel'],
+			sv.stamp['mode'], sv.stamp['time'])
 	# Function: getDeviceRows
 	# Description:
 	#    determine how may rows the device funcs will take
@@ -1880,10 +1917,8 @@ class Timeline:
 				break
 			top += self.rowheight[test][phase][i]
 		return top
-	# Function: calcTotalRows
-	# Description:
-	#	 Calculate the heights and offsets for the header and rows
 	def calcTotalRows(self):
+		# Calculate the heights and offsets for the header and rows
 		maxrows = 0
 		standardphases = []
 		for t in self.rowlines:
@@ -1901,6 +1936,20 @@ class Timeline:
 		for t, p in standardphases:
 			for i in sorted(self.rowheight[t][p]):
 				self.rowheight[t][p][i] = self.bodyH/len(self.rowlines[t][p])
+	def createZoomBox(self, mode='command', testcount=1):
+		# Create bounding box, add buttons
+		html_zoombox = '<center><button id="zoomin">ZOOM IN +</button><button id="zoomout">ZOOM OUT -</button><button id="zoomdef">ZOOM 1:1</button></center>\n'
+		html_timeline = '<div id="dmesgzoombox" class="zoombox">\n<div id="{0}" class="timeline" style="height:{1}px">\n'
+		html_devlist1 = '<button id="devlist1" class="devlist" style="float:left;">Device Detail{0}</button>'
+		html_devlist2 = '<button id="devlist2" class="devlist" style="float:right;">Device Detail2</button>\n'
+		if mode != 'command':
+			if testcount > 1:
+				self.html += html_devlist2
+				self.html += html_devlist1.format('1')
+			else:
+				self.html += html_devlist1.format('')
+		self.html += html_zoombox
+		self.html += html_timeline.format('dmesg', self.height)
 	# Function: createTimeScale
 	# Description:
 	#	 Create the timescale for a timeline block
@@ -1913,7 +1962,7 @@ class Timeline:
 	#	 The html code needed to display the time scale
 	def createTimeScale(self, m0, mMax, tTotal, mode):
 		timescale = '<div class="t" style="right:{0}%">{1}</div>\n'
-		rline = '<div class="t" style="left:0;border-left:1px solid black;border-right:0;">Resume</div>\n'
+		rline = '<div class="t" style="left:0;border-left:1px solid black;border-right:0;">{0}</div>\n'
 		output = '<div class="timescale">\n'
 		# set scale for timeline
 		mTotal = mMax - m0
@@ -1926,21 +1975,20 @@ class Timeline:
 		divEdge = (mTotal - tS*(divTotal-1))*100/mTotal
 		for i in range(divTotal):
 			htmlline = ''
-			if(mode == 'resume'):
+			if(mode == 'suspend'):
+				pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal) - divEdge)
+				val = '%0.fms' % (float(i-divTotal+1)*tS*1000)
+				if(i == divTotal - 1):
+					val = mode
+				htmlline = timescale.format(pos, val)
+			else:
 				pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal))
 				val = '%0.fms' % (float(i)*tS*1000)
 				htmlline = timescale.format(pos, val)
 				if(i == 0):
-					htmlline = rline
-			else:
-				pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal) - divEdge)
-				val = '%0.fms' % (float(i-divTotal+1)*tS*1000)
-				if(i == divTotal - 1):
-					val = 'Suspend'
-				htmlline = timescale.format(pos, val)
+					htmlline = rline.format(mode)
 			output += htmlline
-		output += '</div>\n'
-		return output
+		self.html += output+'</div>\n'
 
 # Class: TestProps
 # Description:
@@ -2009,7 +2057,7 @@ class ProcessMonitor:
 				val['kern'] = kern
 			if ujiff > 0 or kjiff > 0:
 				running[pid] = ujiff + kjiff
-		result = process.wait()
+		process.wait()
 		out = ''
 		for pid in running:
 			jiffies = running[pid]
@@ -2071,26 +2119,6 @@ def parseStamp(line, data):
 	if not sysvals.stamp:
 		sysvals.stamp = data.stamp
 
-# Function: diffStamp
-# Description:
-#	compare the host, kernel, and mode fields in 3 stamps
-# Arguments:
-#	 stamp1: string array with mode, kernel, and host
-#	 stamp2: string array with mode, kernel, and host
-# Return:
-#	True if stamps differ, False if they're the same
-def diffStamp(stamp1, stamp2):
-	if 'host' in stamp1 and 'host' in stamp2:
-		if stamp1['host'] != stamp2['host']:
-			return True
-	if 'kernel' in stamp1 and 'kernel' in stamp2:
-		if stamp1['kernel'] != stamp2['kernel']:
-			return True
-	if 'mode' in stamp1 and 'mode' in stamp2:
-		if stamp1['mode'] != stamp2['mode']:
-			return True
-	return False
-
 # Function: doesTraceLogHaveTraceEvents
 # Description:
 #	 Quickly determine if the ftrace log has some or all of the trace events
@@ -2722,7 +2750,7 @@ def parseTraceLog():
 			# create blocks for orphan cg data
 			for sortkey in sorted(sortlist):
 				cg = sortlist[sortkey]
-				name = cg.list[0].name
+				name = cg.name
 				if sysvals.isCallgraphFunc(name):
 					vprint('Callgraph found for task %d: %.3fms, %s' % (cg.pid, (cg.end - cg.start)*1000, name))
 					cg.newActionFromFunction(data)
@@ -3100,149 +3128,154 @@ def parseKernelLog(data):
 	data.fixupInitcallsThatDidntReturn()
 	return True
 
+def callgraphHTML(sv, hf, num, cg, title, color, devid):
+	html_func_top = '<article id="{0}" class="atop" style="background:{1}">\n<input type="checkbox" class="pf" id="f{2}" checked/><label for="f{2}">{3} {4}</label>\n'
+	html_func_start = '<article>\n<input type="checkbox" class="pf" id="f{0}" checked/><label for="f{0}">{1} {2}</label>\n'
+	html_func_end = '</article>\n'
+	html_func_leaf = '<article>{0} {1}</article>\n'
+
+	cgid = devid
+	if cg.id:
+		cgid += cg.id
+	cglen = (cg.end - cg.start) * 1000
+	if cglen < sv.mincglen:
+		return num
+
+	fmt = '<r>(%.3f ms @ '+sv.timeformat+' to '+sv.timeformat+')</r>'
+	flen = fmt % (cglen, cg.start, cg.end)
+	hf.write(html_func_top.format(cgid, color, num, title, flen))
+	num += 1
+	for line in cg.list:
+		if(line.length < 0.000000001):
+			flen = ''
+		else:
+			fmt = '<n>(%.3f ms @ '+sv.timeformat+')</n>'
+			flen = fmt % (line.length*1000, line.time)
+		if(line.freturn and line.fcall):
+			hf.write(html_func_leaf.format(line.name, flen))
+		elif(line.freturn):
+			hf.write(html_func_end)
+		else:
+			hf.write(html_func_start.format(num, line.name, flen))
+			num += 1
+	hf.write(html_func_end)
+	return num
+
+def addCallgraphs(sv, hf, data):
+	hf.write('<section id="callgraphs" class="callgraph">\n')
+	# write out the ftrace data converted to html
+	num = 0
+	for p in data.phases:
+		if sv.cgphase and p != sv.cgphase:
+			continue
+		list = data.dmesg[p]['list']
+		for devname in data.sortedDevices(p):
+			dev = list[devname]
+			color = 'white'
+			if 'color' in data.dmesg[p]:
+				color = data.dmesg[p]['color']
+			if 'color' in dev:
+				color = dev['color']
+			name = devname
+			if(devname in sv.devprops):
+				name = sv.devprops[devname].altName(devname)
+			if sv.suspendmode in suspendmodename:
+				name += ' '+p
+			if('ftrace' in dev):
+				cg = dev['ftrace']
+				num = callgraphHTML(sv, hf, num, cg,
+					name, color, dev['id'])
+			if('ftraces' in dev):
+				for cg in dev['ftraces']:
+					num = callgraphHTML(sv, hf, num, cg,
+						name+' &rarr; '+cg.name, color, dev['id'])
+
+	hf.write('\n\n    </section>\n')
+
 # Function: createHTMLSummarySimple
 # Description:
 #	 Create summary html file for a series of tests
 # Arguments:
 #	 testruns: array of Data objects from parseTraceLog
-def createHTMLSummarySimple(testruns, htmlfile):
-	# print out the basic summary of all the tests
-	hf = open(htmlfile, 'w')
-
+def createHTMLSummarySimple(testruns, htmlfile, folder):
 	# write the html header first (html head, css code, up to body start)
 	html = '<!DOCTYPE html>\n<html>\n<head>\n\
 	<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n\
-	<title>AnalyzeSuspend Summary</title>\n\
+	<title>SleepGraph Summary</title>\n\
 	<style type=\'text/css\'>\n\
-		body {overflow-y: scroll;}\n\
-		.stamp {width: 100%;text-align:center;background-color:#495E09;line-height:30px;color:white;font: 25px Arial;}\n\
+		.stamp {width: 100%;text-align:center;background:#888;line-height:30px;color:white;font: 25px Arial;}\n\
 		table {width:100%;border-collapse: collapse;}\n\
-		.summary {font: 22px Arial;border:1px solid;}\n\
-		th {border: 1px solid black;background-color:#A7C942;color:white;}\n\
-		td {text-align: center;}\n\
-		tr.alt td {background-color:#EAF2D3;}\n\
-		tr.avg td {background-color:#BDE34C;}\n\
-		a:link {color: #90B521;}\n\
-		a:visited {color: #495E09;}\n\
-		a:hover {color: #B1DF28;}\n\
-		a:active {color: #FFFFFF;}\n\
+		.summary {border:1px solid;}\n\
+		th {border: 1px solid black;background:#222;color:white;}\n\
+		td {font: 16px "Times New Roman";text-align: center;}\n\
+		tr.alt td {background:#ddd;}\n\
+		tr.avg td {background:#aaa;}\n\
 	</style>\n</head>\n<body>\n'
 
 	# group test header
-	count = len(testruns)
-	headline_stamp = '<div class="stamp">{0} {1} {2} {3} ({4} tests)</div>\n'
-	html += headline_stamp.format(sysvals.stamp['host'],
-		sysvals.stamp['kernel'], sysvals.stamp['mode'],
-		sysvals.stamp['time'], count)
-
-	# check to see if all the tests have the same value
-	stampcolumns = False
-	for data in testruns:
-		if diffStamp(sysvals.stamp, data.stamp):
-			stampcolumns = True
-			break
-
+	html += '<div class="stamp">%s (%d tests)</div>\n' % (folder, len(testruns))
 	th = '\t<th>{0}</th>\n'
 	td = '\t<td>{0}</td>\n'
-	tdlink = '\t<td><a href="{0}">Click Here</a></td>\n'
+	tdlink = '\t<td><a href="{0}">html</a></td>\n'
 
 	# table header
-	html += '<table class="summary">\n<tr>\n'
-	html += th.format("Test #")
-	if stampcolumns:
-		html += th.format("Hostname")
-		html += th.format("Kernel Version")
-		html += th.format("Suspend Mode")
-	html += th.format("Test Time")
-	html += th.format("Suspend Time")
-	html += th.format("Resume Time")
-	html += th.format("Detail")
-	html += '</tr>\n'
+	html += '<table class="summary">\n<tr>\n' + th.format('#') +\
+		th.format('Mode') + th.format('Host') + th.format('Kernel') +\
+		th.format('Test Time') + th.format('Suspend') + th.format('Resume') +\
+		th.format('Detail') + '</tr>\n'
 
 	# test data, 1 row per test
-	sTimeAvg = 0.0
-	rTimeAvg = 0.0
-	num = 1
-	for data in testruns:
-		# data.end is the end of post_resume
-		resumeEnd = data.dmesg['resume_complete']['end']
+	avg = '<tr class="avg"><td></td><td></td><td></td><td></td>'+\
+		'<td>Average of {0} {1} tests</td><td>{2}</td><td>{3}</td><td></td></tr>\n'
+	sTimeAvg = rTimeAvg = 0.0
+	mode = ''
+	num = 0
+	for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'])):
+		if mode != data['mode']:
+			# test average line
+			if(num > 0):
+				sTimeAvg /= (num - 1)
+				rTimeAvg /= (num - 1)
+				html += avg.format('%d' % (num - 1), mode,
+					'%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg)
+			sTimeAvg = rTimeAvg = 0.0
+			mode = data['mode']
+			num = 1
+		# alternate row color
 		if num % 2 == 1:
 			html += '<tr class="alt">\n'
 		else:
 			html += '<tr>\n'
-
-		# test num
-		html += td.format("test %d" % num)
+		html += td.format("%d" % num)
 		num += 1
-		if stampcolumns:
-			# host name
+		# basic info
+		for item in ['mode', 'host', 'kernel', 'time']:
 			val = "unknown"
-			if('host' in data.stamp):
-				val = data.stamp['host']
+			if(item in data):
+				val = data[item]
 			html += td.format(val)
-			# host kernel
-			val = "unknown"
-			if('kernel' in data.stamp):
-				val = data.stamp['kernel']
-			html += td.format(val)
-			# suspend mode
-			val = "unknown"
-			if('mode' in data.stamp):
-				val = data.stamp['mode']
-			html += td.format(val)
-		# test time
-		val = "unknown"
-		if('time' in data.stamp):
-			val = data.stamp['time']
-		html += td.format(val)
 		# suspend time
-		sTime = (data.tSuspended - data.start)*1000
+		sTime = float(data['suspend'])
 		sTimeAvg += sTime
-		html += td.format("%3.3f ms" % sTime)
+		html += td.format('%.3f ms' % sTime)
 		# resume time
-		rTime = (resumeEnd - data.tResumed)*1000
+		rTime = float(data['resume'])
 		rTimeAvg += rTime
-		html += td.format("%3.3f ms" % rTime)
+		html += td.format('%.3f ms' % rTime)
 		# link to the output html
-		html += tdlink.format(data.outfile)
-
-		html += '</tr>\n'
-
-	# last line: test average
-	if(count > 0):
-		sTimeAvg /= count
-		rTimeAvg /= count
-	html += '<tr class="avg">\n'
-	html += td.format('Average') 	# name
-	if stampcolumns:
-		html += td.format('')			# host
-		html += td.format('')			# kernel
-		html += td.format('')			# mode
-	html += td.format('')			# time
-	html += td.format("%3.3f ms" % sTimeAvg)	# suspend time
-	html += td.format("%3.3f ms" % rTimeAvg)	# resume time
-	html += td.format('')			# output link
-	html += '</tr>\n'
+		html += tdlink.format(data['url']) + '</tr>\n'
+	# last test average line
+	if(num > 0):
+		sTimeAvg /= (num - 1)
+		rTimeAvg /= (num - 1)
+		html += avg.format('%d' % (num - 1), mode,
+			'%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg)
 
 	# flush the data to file
-	hf.write(html+'</table>\n')
-	hf.write('</body>\n</html>\n')
+	hf = open(htmlfile, 'w')
+	hf.write(html+'</table>\n</body>\n</html>\n')
 	hf.close()
 
-def htmlTitle():
-	modename = {
-		'freeze': 'Freeze (S0)',
-		'standby': 'Standby (S1)',
-		'mem': 'Suspend (S3)',
-		'disk': 'Hibernate (S4)'
-	}
-	kernel = sysvals.stamp['kernel']
-	host = sysvals.hostname[0].upper()+sysvals.hostname[1:]
-	mode = sysvals.suspendmode
-	if sysvals.suspendmode in modename:
-		mode = modename[sysvals.suspendmode]
-	return host+' '+mode+' '+kernel
-
 def ordinal(value):
 	suffix = 'th'
 	if value < 10 or value > 19:
@@ -3272,24 +3305,11 @@ def createHTML(testruns):
 			kerror = True
 		data.normalizeTime(testruns[-1].tSuspended)
 
-	x2changes = ['', 'absolute']
-	if len(testruns) > 1:
-		x2changes = ['1', 'relative']
 	# html function templates
-	headline_version = '<div class="version"><a href="https://01.org/suspendresume">AnalyzeSuspend v%s</a></div>' % sysvals.version
-	headline_stamp = '<div class="stamp">{0} {1} {2} {3}</div>\n'
-	html_devlist1 = '<button id="devlist1" class="devlist" style="float:left;">Device Detail%s</button>' % x2changes[0]
-	html_zoombox = '<center><button id="zoomin">ZOOM IN +</button><button id="zoomout">ZOOM OUT -</button><button id="zoomdef">ZOOM 1:1</button></center>\n'
-	html_devlist2 = '<button id="devlist2" class="devlist" style="float:right;">Device Detail2</button>\n'
-	html_timeline = '<div id="dmesgzoombox" class="zoombox">\n<div id="{0}" class="timeline" style="height:{1}px">\n'
-	html_tblock = '<div id="block{0}" class="tblock" style="left:{1}%;width:{2}%;"><div class="tback" style="height:{3}px"></div>\n'
-	html_device = '<div id="{0}" title="{1}" class="thread{7}" style="left:{2}%;top:{3}px;height:{4}px;width:{5}%;{8}">{6}</div>\n'
 	html_error = '<div id="{1}" title="kernel error/warning" class="err" style="right:{0}%">ERROR&rarr;</div>\n'
 	html_traceevent = '<div title="{0}" class="traceevent{6}" style="left:{1}%;top:{2}px;height:{3}px;width:{4}%;line-height:{3}px;{7}">{5}</div>\n'
 	html_cpuexec = '<div class="jiffie" style="left:{0}%;top:{1}px;height:{2}px;width:{3}%;background:{4};"></div>\n'
-	html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}px;height:{3}px;background-color:{4}">{5}</div>\n'
-	html_phaselet = '<div id="{0}" class="phaselet" style="left:{1}%;width:{2}%;background:{3}"></div>\n'
-	html_legend = '<div id="p{3}" class="square" style="left:{0}%;background-color:{1}">&nbsp;{2}</div>\n'
+	html_legend = '<div id="p{3}" class="square" style="left:{0}%;background:{1}">&nbsp;{2}</div>\n'
 	html_timetotal = '<table class="time1">\n<tr>'\
 		'<td class="green" title="{3}">{2} Suspend Time: <b>{0} ms</b></td>'\
 		'<td class="yellow" title="{4}">{2} Resume Time: <b>{1} ms</b></td>'\
@@ -3311,20 +3331,18 @@ def createHTML(testruns):
 		'</tr>\n</table>\n'
 
 	# html format variables
-	hoverZ = 'z-index:8;'
-	if sysvals.usedevsrc:
-		hoverZ = ''
 	scaleH = 20
-	scaleTH = 20
 	if kerror:
 		scaleH = 40
-		scaleTH = 60
 
 	# device timeline
 	vprint('Creating Device Timeline...')
 
 	devtl = Timeline(30, scaleH)
 
+	# write the test title and general info header
+	devtl.createHeader(sysvals)
+
 	# Generate the header for this timeline
 	for data in testruns:
 		tTotal = data.end - data.start
@@ -3346,7 +3364,7 @@ def createHTML(testruns):
 			if(len(testruns) > 1):
 				testdesc = ordinal(data.testnumber+1)+' '+testdesc
 			thtml = html_timetotal3.format(run_time, testdesc)
-			devtl.html['header'] += thtml
+			devtl.html += thtml
 		elif data.fwValid:
 			suspend_time = '%.0f'%(sktime + (data.fwSuspend/1000000.0))
 			resume_time = '%.0f'%(rktime + (data.fwResume/1000000.0))
@@ -3363,10 +3381,10 @@ def createHTML(testruns):
 			else:
 				thtml = html_timetotal2.format(suspend_time, low_time, \
 					resume_time, testdesc1, stitle, rtitle)
-			devtl.html['header'] += thtml
+			devtl.html += thtml
 			sftime = '%.3f'%(data.fwSuspend / 1000000.0)
 			rftime = '%.3f'%(data.fwResume / 1000000.0)
-			devtl.html['header'] += html_timegroups.format('%.3f'%sktime, \
+			devtl.html += html_timegroups.format('%.3f'%sktime, \
 				sftime, rftime, '%.3f'%rktime, testdesc2, sysvals.suspendmode)
 		else:
 			suspend_time = '%.3f' % sktime
@@ -3382,7 +3400,7 @@ def createHTML(testruns):
 			else:
 				thtml = html_timetotal2.format(suspend_time, low_time, \
 					resume_time, testdesc, stitle, rtitle)
-			devtl.html['header'] += thtml
+			devtl.html += thtml
 
 	# time scale for potentially multiple datasets
 	t0 = testruns[0].start
@@ -3429,15 +3447,8 @@ def createHTML(testruns):
 			devtl.getPhaseRows(threadlist, devtl.rows)
 	devtl.calcTotalRows()
 
-	# create bounding box, add buttons
-	if sysvals.suspendmode != 'command':
-		devtl.html['timeline'] += html_devlist1
-		if len(testruns) > 1:
-			devtl.html['timeline'] += html_devlist2
-	devtl.html['timeline'] += html_zoombox
-	devtl.html['timeline'] += html_timeline.format('dmesg', devtl.height)
-
 	# draw the full timeline
+	devtl.createZoomBox(sysvals.suspendmode, len(testruns))
 	phases = {'suspend':[],'resume':[]}
 	for phase in data.dmesg:
 		if 'resume' in phase:
@@ -3452,37 +3463,36 @@ def createHTML(testruns):
 			# draw suspend and resume blocks separately
 			bname = '%s%d' % (dir[0], data.testnumber)
 			if dir == 'suspend':
-				m0 = testruns[data.testnumber].start
-				mMax = testruns[data.testnumber].tSuspended
-				mTotal = mMax - m0
+				m0 = data.start
+				mMax = data.tSuspended
 				left = '%f' % (((m0-t0)*100.0)/tTotal)
 			else:
-				m0 = testruns[data.testnumber].tSuspended
-				mMax = testruns[data.testnumber].end
+				m0 = data.tSuspended
+				mMax = data.end
 				# in an x2 run, remove any gap between blocks
 				if len(testruns) > 1 and data.testnumber == 0:
 					mMax = testruns[1].start
-				mTotal = mMax - m0
 				left = '%f' % ((((m0-t0)*100.0)+sysvals.srgap/2)/tTotal)
+			mTotal = mMax - m0
 			# if a timeline block is 0 length, skip altogether
 			if mTotal == 0:
 				continue
 			width = '%f' % (((mTotal*100.0)-sysvals.srgap/2)/tTotal)
-			devtl.html['timeline'] += html_tblock.format(bname, left, width, devtl.scaleH)
+			devtl.html += devtl.html_tblock.format(bname, left, width, devtl.scaleH)
 			for b in sorted(phases[dir]):
 				# draw the phase color background
 				phase = data.dmesg[b]
 				length = phase['end']-phase['start']
 				left = '%f' % (((phase['start']-m0)*100.0)/mTotal)
 				width = '%f' % ((length*100.0)/mTotal)
-				devtl.html['timeline'] += html_phase.format(left, width, \
+				devtl.html += devtl.html_phase.format(left, width, \
 					'%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \
 					data.dmesg[b]['color'], '')
 			for e in data.errorinfo[dir]:
 				# draw red lines for any kernel errors found
 				t, err = e
 				right = '%f' % (((mMax-t)*100.0)/mTotal)
-				devtl.html['timeline'] += html_error.format(right, err)
+				devtl.html += html_error.format(right, err)
 			for b in sorted(phases[dir]):
 				# draw the devices for this phase
 				phaselist = data.dmesg[b]['list']
@@ -3496,7 +3506,7 @@ def createHTML(testruns):
 					if 'htmlclass' in dev:
 						xtraclass = dev['htmlclass']
 					if 'color' in dev:
-						xtrastyle = 'background-color:%s;' % dev['color']
+						xtrastyle = 'background:%s;' % dev['color']
 					if(d in sysvals.devprops):
 						name = sysvals.devprops[d].altName(d)
 						xtraclass = sysvals.devprops[d].xtraClass()
@@ -3521,7 +3531,7 @@ def createHTML(testruns):
 							title += 'post_resume_process'
 					else:
 						title += b
-					devtl.html['timeline'] += html_device.format(dev['id'], \
+					devtl.html += devtl.html_device.format(dev['id'], \
 						title, left, top, '%.3f'%rowheight, width, \
 						d+drv, xtraclass, xtrastyle)
 					if('cpuexec' in dev):
@@ -3535,7 +3545,7 @@ def createHTML(testruns):
 							left = '%f' % (((start-m0)*100)/mTotal)
 							width = '%f' % ((end-start)*100/mTotal)
 							color = 'rgba(255, 0, 0, %f)' % j
-							devtl.html['timeline'] += \
+							devtl.html += \
 								html_cpuexec.format(left, top, height, width, color)
 					if('src' not in dev):
 						continue
@@ -3548,20 +3558,20 @@ def createHTML(testruns):
 						xtrastyle = ''
 						if e.color:
 							xtrastyle = 'background:%s;' % e.color
-						devtl.html['timeline'] += \
+						devtl.html += \
 							html_traceevent.format(e.title(), \
 								left, top, height, width, e.text(), '', xtrastyle)
 			# draw the time scale, try to make the number of labels readable
-			devtl.html['timeline'] += devtl.createTimeScale(m0, mMax, tTotal, dir)
-			devtl.html['timeline'] += '</div>\n'
+			devtl.createTimeScale(m0, mMax, tTotal, dir)
+			devtl.html += '</div>\n'
 
 	# timeline is finished
-	devtl.html['timeline'] += '</div>\n</div>\n'
+	devtl.html += '</div>\n</div>\n'
 
 	# draw a legend which describes the phases by color
 	if sysvals.suspendmode != 'command':
 		data = testruns[-1]
-		devtl.html['legend'] = '<div class="legend">\n'
+		devtl.html += '<div class="legend">\n'
 		pdelta = 100.0/len(data.phases)
 		pmargin = pdelta / 4.0
 		for phase in data.phases:
@@ -3571,127 +3581,41 @@ def createHTML(testruns):
 				id += tmp[1][0]
 			order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin)
 			name = string.replace(phase, '_', ' &nbsp;')
-			devtl.html['legend'] += html_legend.format(order, \
+			devtl.html += html_legend.format(order, \
 				data.dmesg[phase]['color'], name, id)
-		devtl.html['legend'] += '</div>\n'
+		devtl.html += '</div>\n'
 
 	hf = open(sysvals.htmlfile, 'w')
 
-	if not sysvals.cgexp:
-		cgchk = 'checked'
-		cgnchk = 'not(:checked)'
-	else:
-		cgchk = 'not(:checked)'
-		cgnchk = 'checked'
-
-	# write the html header first (html head, css code, up to body start)
-	html_header = '<!DOCTYPE html>\n<html>\n<head>\n\
-	<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n\
-	<title>'+htmlTitle()+'</title>\n\
-	<style type=\'text/css\'>\n\
-		body {overflow-y:scroll;}\n\
-		.stamp {width:100%;text-align:center;background-color:gray;line-height:30px;color:white;font:25px Arial;}\n\
-		.callgraph {margin-top:30px;box-shadow:5px 5px 20px black;}\n\
-		.callgraph article * {padding-left:28px;}\n\
-		h1 {color:black;font:bold 30px Times;}\n\
-		t0 {color:black;font:bold 30px Times;}\n\
-		t1 {color:black;font:30px Times;}\n\
-		t2 {color:black;font:25px Times;}\n\
-		t3 {color:black;font:20px Times;white-space:nowrap;}\n\
-		t4 {color:black;font:bold 30px Times;line-height:60px;white-space:nowrap;}\n\
-		cS {font:bold 13px Times;}\n\
-		table {width:100%;}\n\
-		.gray {background-color:rgba(80,80,80,0.1);}\n\
-		.green {background-color:rgba(204,255,204,0.4);}\n\
-		.purple {background-color:rgba(128,0,128,0.2);}\n\
-		.yellow {background-color:rgba(255,255,204,0.4);}\n\
-		.time1 {font:22px Arial;border:1px solid;}\n\
-		.time2 {font:15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\
-		td {text-align:center;}\n\
-		r {color:#500000;font:15px Tahoma;}\n\
-		n {color:#505050;font:15px Tahoma;}\n\
-		.tdhl {color:red;}\n\
-		.hide {display:none;}\n\
-		.pf {display:none;}\n\
-		.pf:'+cgchk+' + label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/><rect x="8" y="4" width="2" height="10" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
-		.pf:'+cgnchk+' ~ label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
-		.pf:'+cgchk+' ~ *:not(:nth-child(2)) {display:none;}\n\
-		.zoombox {position:relative;width:100%;overflow-x:scroll;-webkit-user-select:none;-moz-user-select:none;user-select:none;}\n\
-		.timeline {position:relative;font-size:14px;cursor:pointer;width:100%; overflow:hidden;background:linear-gradient(#cccccc, white);}\n\
-		.thread {position:absolute;height:0%;overflow:hidden;z-index:7;line-height:30px;font-size:14px;border:1px solid;text-align:center;white-space:nowrap;}\n\
-		.thread.ps {border-radius:3px;background:linear-gradient(to top, #ccc, #eee);}\n\
-		.thread:hover {background-color:white;border:1px solid red;'+hoverZ+'}\n\
-		.thread.sec,.thread.sec:hover {background-color:black;border:0;color:white;line-height:15px;font-size:10px;}\n\
-		.hover {background-color:white;border:1px solid red;'+hoverZ+'}\n\
-		.hover.sync {background-color:white;}\n\
-		.hover.bg,.hover.kth,.hover.sync,.hover.ps {background-color:white;}\n\
-		.jiffie {position:absolute;pointer-events: none;z-index:8;}\n\
-		.traceevent {position:absolute;font-size:10px;z-index:7;overflow:hidden;color:black;text-align:center;white-space:nowrap;border-radius:5px;border:1px solid black;background:linear-gradient(to bottom right,#CCC,#969696);}\n\
-		.traceevent:hover {color:white;font-weight:bold;border:1px solid white;}\n\
-		.phase {position:absolute;overflow:hidden;border:0px;text-align:center;}\n\
-		.phaselet {position:absolute;overflow:hidden;border:0px;text-align:center;height:100px;font-size:24px;}\n\
-		.t {position:absolute;line-height:'+('%d'%scaleTH)+'px;pointer-events:none;top:0;height:100%;border-right:1px solid black;z-index:6;}\n\
-		.err {position:absolute;top:0%;height:100%;border-right:3px solid red;color:red;font:bold 14px Times;line-height:18px;}\n\
-		.legend {position:relative; width:100%; height:40px; text-align:center;margin-bottom:20px}\n\
-		.legend .square {position:absolute;cursor:pointer;top:10px; width:0px;height:20px;border:1px solid;padding-left:20px;}\n\
-		button {height:40px;width:200px;margin-bottom:20px;margin-top:20px;font-size:24px;}\n\
-		.logbtn {position:relative;float:right;height:25px;width:50px;margin-top:3px;margin-bottom:0;font-size:10px;text-align:center;}\n\
-		.devlist {position:'+x2changes[1]+';width:190px;}\n\
-		a:link {color:white;text-decoration:none;}\n\
-		a:visited {color:white;}\n\
-		a:hover {color:white;}\n\
-		a:active {color:white;}\n\
-		.version {position:relative;float:left;color:white;font-size:10px;line-height:30px;margin-left:10px;}\n\
-		#devicedetail {height:100px;box-shadow:5px 5px 20px black;}\n\
-		.tblock {position:absolute;height:100%;background-color:#ddd;}\n\
-		.tback {position:absolute;width:100%;background:linear-gradient(#ccc, #ddd);}\n\
-		.bg {z-index:1;}\n\
-	</style>\n</head>\n<body>\n'
-
 	# no header or css if its embedded
 	if(sysvals.embedded):
 		hf.write('pass True tSus %.3f tRes %.3f tLow %.3f fwvalid %s tSus %.3f tRes %.3f\n' %
 			(data.tSuspended-data.start, data.end-data.tSuspended, data.tLow, data.fwValid, \
 				data.fwSuspend/1000000, data.fwResume/1000000))
 	else:
-		hf.write(html_header)
-
-	# write the test title and general info header
-	if(sysvals.stamp['time'] != ""):
-		hf.write(headline_version)
-		if sysvals.logmsg:
-			hf.write('<button id="showtest" class="logbtn">log</button>')
-		if sysvals.addlogs and sysvals.dmesgfile:
-			hf.write('<button id="showdmesg" class="logbtn">dmesg</button>')
-		if sysvals.addlogs and sysvals.ftracefile:
-			hf.write('<button id="showftrace" class="logbtn">ftrace</button>')
-		hf.write(headline_stamp.format(sysvals.stamp['host'],
-			sysvals.stamp['kernel'], sysvals.stamp['mode'], \
-				sysvals.stamp['time']))
+		addCSS(hf, sysvals, len(testruns), kerror)
 
 	# write the device timeline
-	hf.write(devtl.html['header'])
-	hf.write(devtl.html['timeline'])
-	hf.write(devtl.html['legend'])
+	hf.write(devtl.html)
 	hf.write('<div id="devicedetailtitle"></div>\n')
 	hf.write('<div id="devicedetail" style="display:none;">\n')
 	# draw the colored boxes for the device detail section
 	for data in testruns:
 		hf.write('<div id="devicedetail%d">\n' % data.testnumber)
 		pscolor = 'linear-gradient(to top left, #ccc, #eee)'
-		hf.write(html_phaselet.format('pre_suspend_process', \
+		hf.write(devtl.html_phaselet.format('pre_suspend_process', \
 			'0', '0', pscolor))
 		for b in data.phases:
 			phase = data.dmesg[b]
 			length = phase['end']-phase['start']
 			left = '%.3f' % (((phase['start']-t0)*100.0)/tTotal)
 			width = '%.3f' % ((length*100.0)/tTotal)
-			hf.write(html_phaselet.format(b, left, width, \
+			hf.write(devtl.html_phaselet.format(b, left, width, \
 				data.dmesg[b]['color']))
-		hf.write(html_phaselet.format('post_resume_process', \
+		hf.write(devtl.html_phaselet.format('post_resume_process', \
 			'0', '0', pscolor))
 		if sysvals.suspendmode == 'command':
-			hf.write(html_phaselet.format('cmdexec', '0', '0', pscolor))
+			hf.write(devtl.html_phaselet.format('cmdexec', '0', '0', pscolor))
 		hf.write('</div>\n')
 	hf.write('</div>\n')
 
@@ -3701,52 +3625,7 @@ def createHTML(testruns):
 	else:
 		data = testruns[-1]
 	if(sysvals.usecallgraph and not sysvals.embedded):
-		hf.write('<section id="callgraphs" class="callgraph">\n')
-		# write out the ftrace data converted to html
-		html_func_top = '<article id="{0}" class="atop" style="background-color:{1}">\n<input type="checkbox" class="pf" id="f{2}" checked/><label for="f{2}">{3} {4}</label>\n'
-		html_func_start = '<article>\n<input type="checkbox" class="pf" id="f{0}" checked/><label for="f{0}">{1} {2}</label>\n'
-		html_func_end = '</article>\n'
-		html_func_leaf = '<article>{0} {1}</article>\n'
-		num = 0
-		for p in data.phases:
-			if sysvals.cgphase and p != sysvals.cgphase:
-				continue
-			list = data.dmesg[p]['list']
-			for devname in data.sortedDevices(p):
-				if('ftrace' not in list[devname]):
-					continue
-				devid = list[devname]['id']
-				cg = list[devname]['ftrace']
-				clen = (cg.end - cg.start) * 1000
-				if clen < sysvals.mincglen:
-					continue
-				fmt = '<r>(%.3f ms @ '+sysvals.timeformat+' to '+sysvals.timeformat+')</r>'
-				flen = fmt % (clen, cg.start, cg.end)
-				name = devname
-				if(devname in sysvals.devprops):
-					name = sysvals.devprops[devname].altName(devname)
-				if sysvals.suspendmode == 'command':
-					ftitle = name
-				else:
-					ftitle = name+' '+p
-				hf.write(html_func_top.format(devid, data.dmesg[p]['color'], \
-					num, ftitle, flen))
-				num += 1
-				for line in cg.list:
-					if(line.length < 0.000000001):
-						flen = ''
-					else:
-						fmt = '<n>(%.3f ms @ '+sysvals.timeformat+')</n>'
-						flen = fmt % (line.length*1000, line.time)
-					if(line.freturn and line.fcall):
-						hf.write(html_func_leaf.format(line.name, flen))
-					elif(line.freturn):
-						hf.write(html_func_end)
-					else:
-						hf.write(html_func_start.format(num, line.name, flen))
-						num += 1
-				hf.write(html_func_end)
-		hf.write('\n\n    </section>\n')
+		addCallgraphs(sysvals, hf, data)
 
 	# add the test log as a hidden div
 	if sysvals.logmsg:
@@ -3788,6 +3667,100 @@ def createHTML(testruns):
 	hf.close()
 	return True
 
+def addCSS(hf, sv, testcount=1, kerror=False, extra=''):
+	kernel = sv.stamp['kernel']
+	host = sv.hostname[0].upper()+sv.hostname[1:]
+	mode = sv.suspendmode
+	if sv.suspendmode in suspendmodename:
+		mode = suspendmodename[sv.suspendmode]
+	title = host+' '+mode+' '+kernel
+
+	# various format changes by flags
+	cgchk = 'checked'
+	cgnchk = 'not(:checked)'
+	if sv.cgexp:
+		cgchk = 'not(:checked)'
+		cgnchk = 'checked'
+
+	hoverZ = 'z-index:8;'
+	if sv.usedevsrc:
+		hoverZ = ''
+
+	devlistpos = 'absolute'
+	if testcount > 1:
+		devlistpos = 'relative'
+
+	scaleTH = 20
+	if kerror:
+		scaleTH = 60
+
+	# write the html header first (html head, css code, up to body start)
+	html_header = '<!DOCTYPE html>\n<html>\n<head>\n\
+	<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n\
+	<title>'+title+'</title>\n\
+	<style type=\'text/css\'>\n\
+		body {overflow-y:scroll;}\n\
+		.stamp {width:100%;text-align:center;background:gray;line-height:30px;color:white;font:25px Arial;}\n\
+		.callgraph {margin-top:30px;box-shadow:5px 5px 20px black;}\n\
+		.callgraph article * {padding-left:28px;}\n\
+		h1 {color:black;font:bold 30px Times;}\n\
+		t0 {color:black;font:bold 30px Times;}\n\
+		t1 {color:black;font:30px Times;}\n\
+		t2 {color:black;font:25px Times;}\n\
+		t3 {color:black;font:20px Times;white-space:nowrap;}\n\
+		t4 {color:black;font:bold 30px Times;line-height:60px;white-space:nowrap;}\n\
+		cS {font:bold 13px Times;}\n\
+		table {width:100%;}\n\
+		.gray {background:rgba(80,80,80,0.1);}\n\
+		.green {background:rgba(204,255,204,0.4);}\n\
+		.purple {background:rgba(128,0,128,0.2);}\n\
+		.yellow {background:rgba(255,255,204,0.4);}\n\
+		.blue {background:rgba(169,208,245,0.4);}\n\
+		.time1 {font:22px Arial;border:1px solid;}\n\
+		.time2 {font:15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\
+		td {text-align:center;}\n\
+		r {color:#500000;font:15px Tahoma;}\n\
+		n {color:#505050;font:15px Tahoma;}\n\
+		.tdhl {color:red;}\n\
+		.hide {display:none;}\n\
+		.pf {display:none;}\n\
+		.pf:'+cgchk+' + label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/><rect x="8" y="4" width="2" height="10" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
+		.pf:'+cgnchk+' ~ label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
+		.pf:'+cgchk+' ~ *:not(:nth-child(2)) {display:none;}\n\
+		.zoombox {position:relative;width:100%;overflow-x:scroll;-webkit-user-select:none;-moz-user-select:none;user-select:none;}\n\
+		.timeline {position:relative;font-size:14px;cursor:pointer;width:100%; overflow:hidden;background:linear-gradient(#cccccc, white);}\n\
+		.thread {position:absolute;height:0%;overflow:hidden;z-index:7;line-height:30px;font-size:14px;border:1px solid;text-align:center;white-space:nowrap;}\n\
+		.thread.ps {border-radius:3px;background:linear-gradient(to top, #ccc, #eee);}\n\
+		.thread:hover {background:white;border:1px solid red;'+hoverZ+'}\n\
+		.thread.sec,.thread.sec:hover {background:black;border:0;color:white;line-height:15px;font-size:10px;}\n\
+		.hover {background:white;border:1px solid red;'+hoverZ+'}\n\
+		.hover.sync {background:white;}\n\
+		.hover.bg,.hover.kth,.hover.sync,.hover.ps {background:white;}\n\
+		.jiffie {position:absolute;pointer-events: none;z-index:8;}\n\
+		.traceevent {position:absolute;font-size:10px;z-index:7;overflow:hidden;color:black;text-align:center;white-space:nowrap;border-radius:5px;border:1px solid black;background:linear-gradient(to bottom right,#CCC,#969696);}\n\
+		.traceevent:hover {color:white;font-weight:bold;border:1px solid white;}\n\
+		.phase {position:absolute;overflow:hidden;border:0px;text-align:center;}\n\
+		.phaselet {float:left;overflow:hidden;border:0px;text-align:center;min-height:100px;font-size:24px;}\n\
+		.t {position:absolute;line-height:'+('%d'%scaleTH)+'px;pointer-events:none;top:0;height:100%;border-right:1px solid black;z-index:6;}\n\
+		.err {position:absolute;top:0%;height:100%;border-right:3px solid red;color:red;font:bold 14px Times;line-height:18px;}\n\
+		.legend {position:relative; width:100%; height:40px; text-align:center;margin-bottom:20px}\n\
+		.legend .square {position:absolute;cursor:pointer;top:10px; width:0px;height:20px;border:1px solid;padding-left:20px;}\n\
+		button {height:40px;width:200px;margin-bottom:20px;margin-top:20px;font-size:24px;}\n\
+		.logbtn {position:relative;float:right;height:25px;width:50px;margin-top:3px;margin-bottom:0;font-size:10px;text-align:center;}\n\
+		.devlist {position:'+devlistpos+';width:190px;}\n\
+		a:link {color:white;text-decoration:none;}\n\
+		a:visited {color:white;}\n\
+		a:hover {color:white;}\n\
+		a:active {color:white;}\n\
+		.version {position:relative;float:left;color:white;font-size:10px;line-height:30px;margin-left:10px;}\n\
+		#devicedetail {min-height:100px;box-shadow:5px 5px 20px black;}\n\
+		.tblock {position:absolute;height:100%;background:#ddd;}\n\
+		.tback {position:absolute;width:100%;background:linear-gradient(#ccc, #ddd);}\n\
+		.bg {z-index:1;}\n\
+'+extra+'\
+	</style>\n</head>\n<body>\n'
+	hf.write(html_header)
+
 # Function: addScriptCode
 # Description:
 #	 Adds the javascript code to the output html
@@ -3809,7 +3782,7 @@ def addScriptCode(hf, testruns):
 	'	var resolution = -1;\n'\
 	'	var dragval = [0, 0];\n'\
 	'	function redrawTimescale(t0, tMax, tS) {\n'\
-	'		var rline = \'<div class="t" style="left:0;border-left:1px solid black;border-right:0;"><cS>&larr;R</cS></div>\';\n'\
+	'		var rline = \'<div class="t" style="left:0;border-left:1px solid black;border-right:0;">\';\n'\
 	'		var tTotal = tMax - t0;\n'\
 	'		var list = document.getElementsByClassName("tblock");\n'\
 	'		for (var i = 0; i < list.length; i++) {\n'\
@@ -3824,19 +3797,23 @@ def addScriptCode(hf, testruns):
 	'			var pos = 0.0, val = 0.0;\n'\
 	'			for (var j = 0; j < divTotal; j++) {\n'\
 	'				var htmlline = "";\n'\
-	'				if(list[i].id[5] == "r") {\n'\
-	'					pos = 100 - (((j)*tS*100)/mTotal);\n'\
-	'					val = (j)*tS;\n'\
-	'					htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\
-	'					if(j == 0)\n'\
-	'						htmlline = rline;\n'\
-	'				} else {\n'\
+	'				var mode = list[i].id[5];\n'\
+	'				if(mode == "s") {\n'\
 	'					pos = 100 - (((j)*tS*100)/mTotal) - divEdge;\n'\
 	'					val = (j-divTotal+1)*tS;\n'\
 	'					if(j == divTotal - 1)\n'\
 	'						htmlline = \'<div class="t" style="right:\'+pos+\'%"><cS>S&rarr;</cS></div>\';\n'\
 	'					else\n'\
 	'						htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\
+	'				} else {\n'\
+	'					pos = 100 - (((j)*tS*100)/mTotal);\n'\
+	'					val = (j)*tS;\n'\
+	'					htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\
+	'					if(j == 0)\n'\
+	'						if(mode == "r")\n'\
+	'							htmlline = rline+"<cS>&larr;R</cS></div>";\n'\
+	'						else\n'\
+	'							htmlline = rline+"<cS>0ms</div>";\n'\
 	'				}\n'\
 	'				html += htmlline;\n'\
 	'			}\n'\
@@ -4002,12 +3979,80 @@ def addScriptCode(hf, testruns):
 	'				}\n'\
 	'			}\n'\
 	'		}\n'\
+	'		if(typeof devstats !== \'undefined\')\n'\
+	'			callDetail(this.id, this.title);\n'\
 	'		var cglist = document.getElementById("callgraphs");\n'\
 	'		if(!cglist) return;\n'\
 	'		var cg = cglist.getElementsByClassName("atop");\n'\
 	'		if(cg.length < 10) return;\n'\
 	'		for (var i = 0; i < cg.length; i++) {\n'\
-	'			if(idlist.indexOf(cg[i].id) >= 0) {\n'\
+	'			cgid = cg[i].id.split("x")[0]\n'\
+	'			if(idlist.indexOf(cgid) >= 0) {\n'\
+	'				cg[i].style.display = "block";\n'\
+	'			} else {\n'\
+	'				cg[i].style.display = "none";\n'\
+	'			}\n'\
+	'		}\n'\
+	'	}\n'\
+	'	function callDetail(devid, devtitle) {\n'\
+	'		if(!(devid in devstats) || devstats[devid].length < 1)\n'\
+	'			return;\n'\
+	'		var list = devstats[devid];\n'\
+	'		var tmp = devtitle.split(" ");\n'\
+	'		var name = tmp[0], phase = tmp[tmp.length-1];\n'\
+	'		var dd = document.getElementById(phase);\n'\
+	'		var total = parseFloat(tmp[1].slice(1));\n'\
+	'		var mlist = [];\n'\
+	'		var maxlen = 0;\n'\
+	'		var info = []\n'\
+	'		for(var i in list) {\n'\
+	'			if(list[i][0] == "@") {\n'\
+	'				info = list[i].split("|");\n'\
+	'				continue;\n'\
+	'			}\n'\
+	'			var tmp = list[i].split("|");\n'\
+	'			var t = parseFloat(tmp[0]), f = tmp[1], c = parseInt(tmp[2]);\n'\
+	'			var p = (t*100.0/total).toFixed(2);\n'\
+	'			mlist[mlist.length] = [f, c, t.toFixed(2), p+"%"];\n'\
+	'			if(f.length > maxlen)\n'\
+	'				maxlen = f.length;\n'\
+	'		}\n'\
+	'		var pad = 5;\n'\
+	'		if(mlist.length == 0) pad = 30;\n'\
+	'		var html = \'<div style="padding-top:\'+pad+\'px"><t3> <b>\'+name+\':</b>\';\n'\
+	'		if(info.length > 2)\n'\
+	'			html += " start=<b>"+info[1]+"</b>, end=<b>"+info[2]+"</b>";\n'\
+	'		if(info.length > 3)\n'\
+	'			html += ", length<i>(w/o overhead)</i>=<b>"+info[3]+" ms</b>";\n'\
+	'		if(info.length > 4)\n'\
+	'			html += ", return=<b>"+info[4]+"</b>";\n'\
+	'		html += "</t3></div>";\n'\
+	'		if(mlist.length > 0) {\n'\
+	'			html += \'<table class=fstat style="padding-top:\'+(maxlen*5)+\'px;"><tr><th>Function</th>\';\n'\
+	'			for(var i in mlist)\n'\
+	'				html += "<td class=vt>"+mlist[i][0]+"</td>";\n'\
+	'			html += "</tr><tr><th>Calls</th>";\n'\
+	'			for(var i in mlist)\n'\
+	'				html += "<td>"+mlist[i][1]+"</td>";\n'\
+	'			html += "</tr><tr><th>Time(ms)</th>";\n'\
+	'			for(var i in mlist)\n'\
+	'				html += "<td>"+mlist[i][2]+"</td>";\n'\
+	'			html += "</tr><tr><th>Percent</th>";\n'\
+	'			for(var i in mlist)\n'\
+	'				html += "<td>"+mlist[i][3]+"</td>";\n'\
+	'			html += "</tr></table>";\n'\
+	'		}\n'\
+	'		dd.innerHTML = html;\n'\
+	'		var height = (maxlen*5)+100;\n'\
+	'		dd.style.height = height+"px";\n'\
+	'		document.getElementById("devicedetail").style.height = height+"px";\n'\
+	'	}\n'\
+	'	function callSelect() {\n'\
+	'		var cglist = document.getElementById("callgraphs");\n'\
+	'		if(!cglist) return;\n'\
+	'		var cg = cglist.getElementsByClassName("atop");\n'\
+	'		for (var i = 0; i < cg.length; i++) {\n'\
+	'			if(this.id == cg[i].id) {\n'\
 	'				cg[i].style.display = "block";\n'\
 	'			} else {\n'\
 	'				cg[i].style.display = "none";\n'\
@@ -4093,6 +4138,9 @@ def addScriptCode(hf, testruns):
 	'			dev[i].onmouseover = deviceHover;\n'\
 	'			dev[i].onmouseout = deviceUnhover;\n'\
 	'		}\n'\
+	'		var dev = dmesg.getElementsByClassName("srccall");\n'\
+	'		for (var i = 0; i < dev.length; i++)\n'\
+	'			dev[i].onclick = callSelect;\n'\
 	'		zoomTimeline();\n'\
 	'	});\n'\
 	'</script>\n'
@@ -4675,7 +4723,7 @@ def rootCheck(fatal):
 	if(os.access(sysvals.powerfile, os.W_OK)):
 		return True
 	if fatal:
-		doError('This command must be run as root')
+		doError('This command requires sysfs mount and root access')
 	return False
 
 # Function: getArgInt
@@ -4767,51 +4815,62 @@ def runTest(subdir, testpath=''):
 		cmd = 'chown -R {0}:{0} {1} > /dev/null 2>&1'
 		call(cmd.format(os.environ['SUDO_USER'], sysvals.testdir), shell=True)
 
+def find_in_html(html, strs, div=False):
+	for str in strs:
+		l = len(str)
+		i = html.find(str)
+		if i >= 0:
+			break
+	if i < 0:
+		return ''
+	if not div:
+		return re.search(r'[-+]?\d*\.\d+|\d+', html[i+l:i+l+50]).group()
+	n = html[i+l:].find('</div>')
+	if n < 0:
+		return ''
+	return html[i+l:i+l+n]
+
 # Function: runSummary
 # Description:
 #	 create a summary of tests in a sub-directory
-def runSummary(subdir, output):
-	# get a list of ftrace output files
-	files = []
+def runSummary(subdir, local=True):
+	inpath = os.path.abspath(subdir)
+	outpath = inpath
+	if local:
+		outpath = os.path.abspath('.')
+	print('Generating a summary of folder "%s"' % inpath)
+	testruns = []
 	for dirname, dirnames, filenames in os.walk(subdir):
 		for filename in filenames:
-			if(re.match('.*_ftrace.txt', filename)):
-				files.append("%s/%s" % (dirname, filename))
-
-	# process the files in order and get an array of data objects
-	testruns = []
-	for file in sorted(files):
-		if output:
-			print("Test found in %s" % os.path.dirname(file))
-		sysvals.ftracefile = file
-		sysvals.dmesgfile = file.replace('_ftrace.txt', '_dmesg.txt')
-		doesTraceLogHaveTraceEvents()
-		sysvals.usecallgraph = False
-		if not sysvals.usetraceeventsonly:
-			if(not os.path.exists(sysvals.dmesgfile)):
-				print("Skipping %s: not a valid test input" % file)
+			if(not re.match('.*.html', filename)):
 				continue
-			else:
-				if output:
-					f = os.path.basename(sysvals.ftracefile)
-					d = os.path.basename(sysvals.dmesgfile)
-					print("\tInput files: %s and %s" % (f, d))
-				testdata = loadKernelLog()
-				data = testdata[0]
-				parseKernelLog(data)
-				testdata = [data]
-				appendIncompleteTraceLog(testdata)
-		else:
-			if output:
-				print("\tInput file: %s" % os.path.basename(sysvals.ftracefile))
-			testdata = parseTraceLog()
-			data = testdata[0]
-		data.normalizeTime(data.tSuspended)
-		link = file.replace(subdir+'/', '').replace('_ftrace.txt', '.html')
-		data.outfile = link
-		testruns.append(data)
-
-	createHTMLSummarySimple(testruns, subdir+'/summary.html')
+			file = os.path.join(dirname, filename)
+			html = open(file, 'r').read(10000)
+			suspend = find_in_html(html,
+				['Kernel Suspend: ', 'Kernel Suspend Time: '])
+			resume = find_in_html(html,
+				['Kernel Resume: ', 'Kernel Resume Time: '])
+			line = find_in_html(html, ['<div class="stamp">'], True)
+			stmp = line.split()
+			if not suspend or not resume or len(stmp) < 4:
+				continue
+			data = {
+				'host': stmp[0],
+				'kernel': stmp[1],
+				'mode': stmp[2],
+				'time': string.join(stmp[3:], ' '),
+				'suspend': suspend,
+				'resume': resume,
+				'url': os.path.relpath(file, outpath),
+			}
+			if len(stmp) == 7:
+				data['kernel'] = 'unknown'
+				data['mode'] = stmp[1]
+				data['time'] = string.join(stmp[2:], ' ')
+			testruns.append(data)
+	outfile = os.path.join(outpath, 'summary.html')
+	print('Summary file: %s' % outfile)
+	createHTMLSummarySimple(testruns, outfile, inpath)
 
 # Function: checkArgBool
 # Description:
@@ -4869,9 +4928,14 @@ def configFromFile(file):
 				sysvals.predelay = getArgInt('-predelay', value, 0, 60000, False)
 			elif(opt.lower() == 'postdelay'):
 				sysvals.postdelay = getArgInt('-postdelay', value, 0, 60000, False)
+			elif(opt.lower() == 'maxdepth'):
+				sysvals.max_graph_depth = getArgInt('-maxdepth', value, 0, 1000, False)
 			elif(opt.lower() == 'rtcwake'):
-				sysvals.rtcwake = True
-				sysvals.rtcwaketime = getArgInt('-rtcwake', value, 0, 3600, False)
+				if value.lower() == 'off':
+					sysvals.rtcwake = False
+				else:
+					sysvals.rtcwake = True
+					sysvals.rtcwaketime = getArgInt('-rtcwake', value, 0, 3600, False)
 			elif(opt.lower() == 'timeprec'):
 				sysvals.setPrecision(getArgInt('-timeprec', value, 0, 6, False))
 			elif(opt.lower() == 'mindev'):
@@ -4969,8 +5033,8 @@ def printHelp():
 	modes = getModes()
 
 	print('')
-	print('AnalyzeSuspend v%s' % sysvals.version)
-	print('Usage: sudo analyze_suspend.py <options>')
+	print('%s v%s' % (sysvals.title, sysvals.version))
+	print('Usage: sudo sleepgraph <options> <commands>')
 	print('')
 	print('Description:')
 	print('  This tool is designed to assist kernel and OS developers in optimizing')
@@ -4981,22 +5045,22 @@ def printHelp():
 	print('  a detailed view of which devices/subsystems are taking the most')
 	print('  time in suspend/resume.')
 	print('')
+	print('  If no specific command is given, the default behavior is to initiate')
+	print('  a suspend/resume and capture the dmesg/ftrace output as an html timeline.')
+	print('')
 	print('  Generates output files in subdirectory: suspend-mmddyy-HHMMSS')
 	print('   HTML output:                    <hostname>_<mode>.html')
 	print('   raw dmesg output:               <hostname>_<mode>_dmesg.txt')
 	print('   raw ftrace output:              <hostname>_<mode>_ftrace.txt')
 	print('')
 	print('Options:')
-	print('  [general]')
 	print('   -h           Print this help text')
 	print('   -v           Print the current tool version')
 	print('   -config fn   Pull arguments and config options from file fn')
 	print('   -verbose     Print extra information during execution and analysis')
-	print('   -status      Test to see if the system is enabled to run this tool')
-	print('   -modes       List available suspend modes')
 	print('   -m mode      Mode to initiate for suspend %s (default: %s)') % (modes, sysvals.suspendmode)
 	print('   -o subdir    Override the output subdirectory')
-	print('   -rtcwake t   Use rtcwake to autoresume after <t> seconds (default: disabled)')
+	print('   -rtcwake t   Wakeup t seconds after suspend, set t to "off" to disable (default: 15)')
 	print('   -addlogs     Add the dmesg and ftrace logs to the html output')
 	print('   -srgap       Add a visible gap in the timeline between sus/res (default: disabled)')
 	print('  [advanced]')
@@ -5012,23 +5076,25 @@ def printHelp():
 	print('                be created in a new subdirectory with a summary page.')
 	print('  [debug]')
 	print('   -f           Use ftrace to create device callgraphs (default: disabled)')
+	print('   -maxdepth N  limit the callgraph data to N call levels (default: 0=all)')
 	print('   -expandcg    pre-expand the callgraph data in the html output (default: disabled)')
-	print('   -flist       Print the list of functions currently being captured in ftrace')
-	print('   -flistall    Print all functions capable of being captured in ftrace')
 	print('   -fadd file   Add functions to be graphed in the timeline from a list in a text file')
 	print('   -filter "d1,d2,..." Filter out all but this comma-delimited list of device names')
 	print('   -mincg  ms   Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
 	print('   -cgphase P   Only show callgraph data for phase P (e.g. suspend_late)')
 	print('   -cgtest N    Only show callgraph data for test N (e.g. 0 or 1 in an x2 run)')
 	print('   -timeprec N  Number of significant digits in timestamps (0:S, [3:ms], 6:us)')
-	print('  [utilities]')
+	print('  [commands]')
+	print('   -ftrace ftracefile  Create HTML output using ftrace input (used with -dmesg)')
+	print('   -dmesg dmesgfile    Create HTML output using dmesg (used with -ftrace)')
+	print('   -summary directory  Create a summary of all test in this dir')
+	print('   -modes       List available suspend modes')
+	print('   -status      Test to see if the system is enabled to run this tool')
 	print('   -fpdt        Print out the contents of the ACPI Firmware Performance Data Table')
 	print('   -usbtopo     Print out the current USB topology with power info')
 	print('   -usbauto     Enable autosuspend for all connected USB devices')
-	print('  [re-analyze data from previous runs]')
-	print('   -ftrace ftracefile  Create HTML output using ftrace input')
-	print('   -dmesg dmesgfile    Create HTML output using dmesg (not needed for kernel >= 3.15)')
-	print('   -summary directory  Create a summary of all test in this dir')
+	print('   -flist       Print the list of functions currently being captured in ftrace')
+	print('   -flistall    Print all functions capable of being captured in ftrace')
 	print('')
 	return True
 
@@ -5076,9 +5142,18 @@ if __name__ == '__main__':
 			sysvals.useprocmon = True
 		elif(arg == '-dev'):
 			sysvals.usedevsrc = True
+		elif(arg == '-maxdepth'):
+			sysvals.max_graph_depth = getArgInt('-maxdepth', args, 0, 1000)
 		elif(arg == '-rtcwake'):
-			sysvals.rtcwake = True
-			sysvals.rtcwaketime = getArgInt('-rtcwake', args, 0, 3600)
+			try:
+				val = args.next()
+			except:
+				doError('No rtcwake time supplied', True)
+			if val.lower() == 'off':
+				sysvals.rtcwake = False
+			else:
+				sysvals.rtcwake = True
+				sysvals.rtcwaketime = getArgInt('-rtcwake', val, 0, 3600, False)
 		elif(arg == '-timeprec'):
 			sysvals.setPrecision(getArgInt('-timeprec', args, 0, 6))
 		elif(arg == '-mindev'):
@@ -5201,7 +5276,6 @@ if __name__ == '__main__':
 		elif(cmd == 'usbauto'):
 			setUSBDevicesAuto()
 		elif(cmd == 'summary'):
-			print("Generating a summary of folder \"%s\"" % cmdarg)
 			runSummary(cmdarg, True)
 		sys.exit()
 
diff --git a/tools/power/pm-graph/bootgraph.8 b/tools/power/pm-graph/bootgraph.8
new file mode 100644
index 000000000000..55272a67b0e7
--- /dev/null
+++ b/tools/power/pm-graph/bootgraph.8
@@ -0,0 +1,132 @@
+.TH BOOTGRAPH 8
+.SH NAME
+bootgraph \- Kernel boot timing analysis
+.SH SYNOPSIS
+.ft B
+.B bootgraph
+.RB [ OPTIONS ]
+.RB [ COMMAND ]
+.SH DESCRIPTION
+\fBbootgraph \fP reads the dmesg log from kernel boot and
+creates an html representation of the initcall timeline up to the start
+of the init process.
+.PP
+If no specific command is given, the tool reads the current dmesg log and
+outputs bootgraph.html.
+.PP
+The tool can also augment the timeline with ftrace data on custom target
+functions as well as full trace callgraphs.
+.SH OPTIONS
+.TP
+\fB-h\fR
+Print this help text
+.TP
+\fB-v\fR
+Print the current tool version
+.TP
+\fB-addlogs\fR
+Add the dmesg log to the html output. It will be viewable by
+clicking a button in the timeline.
+.TP
+\fB-o \fIfile\fR
+Override the HTML output filename (default: bootgraph.html)
+.SS "Ftrace Debug"
+.TP
+\fB-f\fR
+Use ftrace to add function detail (default: disabled)
+.TP
+\fB-callgraph\fR
+Use ftrace to create initcall callgraphs (default: disabled). If -filter
+is not used there will be one callgraph per initcall. This can produce
+very large outputs, i.e. 10MB - 100MB.
+.TP
+\fB-maxdepth \fIlevel\fR
+limit the callgraph trace depth to \fIlevel\fR (default: 2). This is
+the best way to limit the output size when using -callgraph.
+.TP
+\fB-mincg \fIt\fR
+Discard all callgraphs shorter than \fIt\fR milliseconds (default: 0=all).
+This reduces the html file size as there can be many tiny callgraphs
+which are barely visible in the timeline.
+The value is a float: e.g. 0.001 represents 1 us.
+.TP
+\fB-timeprec \fIn\fR
+Number of significant digits in timestamps (0:S, 3:ms, [6:us])
+.TP
+\fB-expandcg\fR
+pre-expand the callgraph data in the html output (default: disabled)
+.TP
+\fB-filter \fI"func1,func2,..."\fR
+Instead of tracing each initcall, trace a custom list of functions (default: do_one_initcall)
+
+.SH COMMANDS
+.TP
+\fB-reboot\fR
+Reboot the machine and generate a new timeline automatically. Works in 4 steps.
+  1. updates grub with the required kernel parameters
+  2. installs a cron job which re-runs the tool after reboot
+  3. reboots the system
+  4. after startup, extracts the data and generates the timeline
+.TP
+\fB-manual\fR
+Show the requirements to generate a new timeline manually. Requires 3 steps.
+  1. append the string to the kernel command line via your native boot manager.
+  2. reboot the system
+  3. after startup, re-run the tool with the same arguments and no command
+.TP
+\fB-dmesg \fIfile\fR
+Create HTML output from an existing dmesg file.
+.TP
+\fB-ftrace \fIfile\fR
+Create HTML output from an existing ftrace file (used with -dmesg).
+.TP
+\fB-flistall\fR
+Print all ftrace functions capable of being captured. These are all the
+possible values you can add to trace via the -filter argument.
+
+.SH EXAMPLES
+Create a timeline using the current dmesg log.
+.IP
+\f(CW$ bootgraph\fR
+.PP
+Create a timeline using the current dmesg and ftrace log.
+.IP
+\f(CW$ bootgraph -callgraph\fR
+.PP
+Create a timeline using the current dmesg, add the log to the html and change the name.
+.IP
+\f(CW$ bootgraph -addlogs -o myboot.html\fR
+.PP
+Capture a new boot timeline by automatically rebooting the machine.
+.IP
+\f(CW$ sudo bootgraph -reboot -addlogs -o latestboot.html\fR
+.PP
+Capture a new boot timeline with function trace data.
+.IP
+\f(CW$ sudo bootgraph -reboot -f\fR
+.PP
+Capture a new boot timeline with trace & callgraph data. Skip callgraphs smaller than 5ms.
+.IP
+\f(CW$ sudo bootgraph -reboot -callgraph -mincg 5\fR
+.PP
+Capture a new boot timeline with callgraph data over custom functions.
+.IP
+\f(CW$ sudo bootgraph -reboot -callgraph -filter "acpi_ps_parse_aml,msleep"\fR
+.PP
+Capture a brand new boot timeline with manual reboot.
+.IP
+\f(CW$ sudo bootgraph -callgraph -manual\fR
+.IP
+\f(CW$ vi /etc/default/grub      # add the CMDLINE string to your kernel params\fR
+.IP
+\f(CW$ sudo reboot               # reboot the machine\fR
+.IP
+\f(CW$ sudo bootgraph -callgraph # re-run the tool after restart\fR
+.PP
+
+.SH "SEE ALSO"
+dmesg(1), update-grub(8), crontab(1), reboot(8)
+.PP
+.SH AUTHOR
+.nf
+Written by Todd Brandt <todd.e.brandt@linux.intel.com>
diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8
new file mode 100644
index 000000000000..610e72ebbc06
--- /dev/null
+++ b/tools/power/pm-graph/sleepgraph.8
@@ -0,0 +1,243 @@
+.TH SLEEPGRAPH 8
+.SH NAME
+sleepgraph \- Suspend/Resume timing analysis
+.SH SYNOPSIS
+.ft B
+.B sleepgraph
+.RB [ OPTIONS ]
+.RB [ COMMAND ]
+.SH DESCRIPTION
+\fBsleepgraph \fP is designed to assist kernel and OS developers
+in optimizing their linux stack's suspend/resume time. Using a kernel
+image built with a few extra options enabled, the tool will execute a
+suspend and capture dmesg and ftrace data until resume is complete.
+This data is transformed into a device timeline and an optional
+callgraph to give a detailed view of which devices/subsystems are
+taking the most time in suspend/resume.
+.PP
+If no specific command is given, the default behavior is to initiate
+a suspend/resume.
+.PP
+Generates output files in subdirectory: suspend-yymmdd-HHMMSS
+   html timeline   :     <hostname>_<mode>.html
+   raw dmesg file  :     <hostname>_<mode>_dmesg.txt
+   raw ftrace file :     <hostname>_<mode>_ftrace.txt
+.SH OPTIONS
+.TP
+\fB-h\fR
+Print the help text.
+.TP
+\fB-v\fR
+Print the current tool version.
+.TP
+\fB-verbose\fR
+Print extra information during execution and analysis.
+.TP
+\fB-config \fIfile\fR
+Pull arguments and config options from a file.
+.TP
+\fB-m \fImode\fR
+Mode to initiate for suspend e.g. standby, freeze, mem (default: mem).
+.TP
+\fB-o \fIsubdir\fR
+Override the output subdirectory. Use {date}, {time}, {hostname} for current values.
+.sp
+e.g. suspend-{hostname}-{date}-{time}
+.TP
+\fB-rtcwake \fIt\fR | off
+Use rtcwake to autoresume after \fIt\fR seconds (default: 15). Set t to "off" to
+disable rtcwake and require a user keypress to resume.
+.TP
+\fB-addlogs\fR
+Add the dmesg and ftrace logs to the html output. They will be viewable by
+clicking buttons in the timeline.
+
+.SS "Advanced"
+.TP
+\fB-cmd \fIstr\fR
+Run the timeline over a custom suspend command, e.g. pm-suspend. By default
+the tool forces suspend via /sys/power/state so this allows testing over
+an OS's official suspend method. The output file will change to
+hostname_command.html and will autodetect which suspend mode was triggered.
+.TP
+\fB-filter \fI"d1,d2,..."\fR
+Filter out all but these device callbacks. These strings can be device names
+or module names. e.g. 0000:00:02.0, ata5, i915, usb, etc.
+.TP
+\fB-mindev \fIt\fR
+Discard all device callbacks shorter than \fIt\fR milliseconds (default: 0.0).
+This reduces the html file size as there can be many tiny callbacks which are barely
+visible. The value is a float: e.g. 0.001 represents 1 us.
+.TP
+\fB-proc\fR
+Add usermode process info into the timeline (default: disabled).
+.TP
+\fB-dev\fR
+Add kernel source calls and threads to the timeline (default: disabled).
+.TP
+\fB-x2\fR
+Run two suspend/resumes back to back (default: disabled).
+.TP
+\fB-x2delay \fIt\fR
+Include \fIt\fR ms delay between multiple test runs (default: 0 ms).
+.TP
+\fB-predelay \fIt\fR
+Include \fIt\fR ms delay before 1st suspend (default: 0 ms).
+.TP
+\fB-postdelay \fIt\fR
+Include \fIt\fR ms delay after last resume (default: 0 ms).
+.TP
+\fB-multi \fIn d\fR
+Execute \fIn\fR consecutive tests at \fId\fR seconds intervals. The outputs will
+be created in a new subdirectory with a summary page: suspend-xN-{date}-{time}.
+
+.SS "Ftrace Debug"
+.TP
+\fB-f\fR
+Use ftrace to create device callgraphs (default: disabled). This can produce
+very large outputs, i.e. 10MB - 100MB.
+.TP
+\fB-maxdepth \fIlevel\fR
+limit the callgraph trace depth to \fIlevel\fR (default: 0=all). This is
+the best way to limit the output size when using callgraphs via -f.
+.TP
+\fB-expandcg\fR
+pre-expand the callgraph data in the html output (default: disabled)
+.TP
+\fB-fadd \fIfile\fR
+Add functions to be graphed in the timeline from a list in a text file
+.TP
+\fB-mincg \fIt\fR
+Discard all callgraphs shorter than \fIt\fR milliseconds (default: 0.0).
+This reduces the html file size as there can be many tiny callgraphs
+which are barely visible in the timeline.
+The value is a float: e.g. 0.001 represents 1 us.
+.TP
+\fB-cgphase \fIp\fR
+Only show callgraph data for phase \fIp\fR (e.g. suspend_late).
+.TP
+\fB-cgtest \fIn\fR
+In an x2 run, only show callgraph data for test \fIn\fR (e.g. 0 or 1).
+.TP
+\fB-timeprec \fIn\fR
+Number of significant digits in timestamps (0:S, [3:ms], 6:us).
+
+.SH COMMANDS
+.TP
+\fB-ftrace \fIfile\fR
+Create HTML output from an existing ftrace file.
+.TP
+\fB-dmesg \fIfile\fR
+Create HTML output from an existing dmesg file.
+.TP
+\fB-summary \fIindir\fR
+Create a summary page of all tests in \fIindir\fR. Creates summary.html
+in the current folder. The output page is a table of tests with
+suspend and resume values sorted by suspend mode, host, and kernel.
+Includes test averages by mode and links to the test html files.
+.TP
+\fB-modes\fR
+List available suspend modes.
+.TP
+\fB-status\fR
+Test to see if the system is able to run this tool. Use this along
+with any options you intend to use to see if they will work.
+.TP
+\fB-fpdt\fR
+Print out the contents of the ACPI Firmware Performance Data Table.
+.TP
+\fB-usbtopo\fR
+Print out the current USB topology with power info.
+.TP
+\fB-usbauto\fR
+Enable autosuspend for all connected USB devices.
+.TP
+\fB-flist\fR
+Print the list of ftrace functions currently being captured. Functions
+that are not available as symbols in the current kernel are shown in red.
+By default, the tool traces a list of important suspend/resume functions
+in order to better fill out the timeline. If the user has added their own
+with -fadd they will also be checked.
+.TP
+\fB-flistall\fR
+Print all ftrace functions capable of being captured. These are all the
+possible values you can add to trace via the -fadd argument.
+
+.SH EXAMPLES
+.SS "Simple Commands"
+Check which suspend modes are currently supported.
+.IP
+\f(CW$ sleepgraph -modes\fR
+.PP
+Read the Firmware Performance Data Table (FPDT)
+.IP
+\f(CW$ sudo sleepgraph -fpdt\fR
+.PP
+Print out the current USB power topology
+.IP
+\f(CW$ sleepgraph -usbtopo
+.PP
+Verify that you can run a command with a set of arguments
+.IP
+\f(CW$ sudo sleepgraph -f -rtcwake 30 -status
+.PP
+Generate a summary of all timelines in a particular folder.
+.IP
+\f(CW$ sleepgraph -summary ~/workspace/myresults/\fR
+.PP
+Re-generate the html output from a previous run's dmesg and ftrace log.
+.IP
+\f(CW$ sleepgraph -dmesg myhost_mem_dmesg.txt -ftrace myhost_mem_ftrace.txt\fR
+.PP
+
+.SS "Capturing Simple Timelines"
+Execute a mem suspend with a 15 second wakeup. Include the logs in the html.
+.IP
+\f(CW$ sudo sleepgraph -rtcwake 15 -addlogs\fR
+.PP
+Execute a standby with a 15 second wakeup. Change the output folder name.
+.IP
+\f(CW$ sudo sleepgraph -m standby -rtcwake 15 -o "standby-{hostname}-{date}-{time}"\fR
+.PP
+Execute a freeze with no wakeup (require keypress). Change output folder name.
+.IP
+\f(CW$ sudo sleepgraph -m freeze -rtcwake off -o "freeze-{hostname}-{date}-{time}"\fR
+.PP
+
+.SS "Capturing Advanced Timelines"
+Execute a suspend & include dev mode source calls, limit callbacks to 5ms or larger.
+.IP
+\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -dev -mindev 5\fR
+.PP
+Run two suspends back to back, include a 500ms delay before, after, and in between runs.
+.IP
+\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -x2 -predelay 500 -x2delay 500 -postdelay 500\fR
+.PP
+Do a batch run of 10 freezes with 30 seconds delay between runs.
+.IP
+\f(CW$ sudo sleepgraph -m freeze -rtcwake 15 -multi 10 30\fR
+.PP
+Execute a suspend using a custom command.
+.IP
+\f(CW$ sudo sleepgraph -cmd "echo mem > /sys/power/state" -rtcwake 15\fR
+.PP
+
+
+.SS "Capturing Timelines with Callgraph Data"
+Add device callgraphs. Limit the trace depth and only show callgraphs 10ms or larger.
+.IP
+\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -f -maxdepth 5 -mincg 10\fR
+.PP
+Capture a full callgraph across all suspend, then filter the html by a single phase.
+.IP
+\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -f\fR
+.IP
+\f(CW$ sleepgraph -dmesg host_mem_dmesg.txt -ftrace host_mem_ftrace.txt -f -cgphase resume
+.PP
+
+.SH "SEE ALSO"
+dmesg(1)
+.PP
+.SH AUTHOR
+.nf
+Written by Todd Brandt <todd.e.brandt@linux.intel.com>
diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
index fd706ac0f347..0b24dd9d01ff 100755
--- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -353,6 +353,14 @@ def split_csv():
                 os.system('grep -m 1 common_cpu cpu.csv > cpu{:0>3}.csv'.format(index))
                 os.system('grep CPU_{:0>3} cpu.csv >> cpu{:0>3}.csv'.format(index, index))
 
+def fix_ownership(path):
+    """Change the owner of the file to SUDO_UID, if required"""
+
+    uid = os.environ.get('SUDO_UID')
+    gid = os.environ.get('SUDO_GID')
+    if uid is not None:
+        os.chown(path, int(uid), int(gid))
+
 def cleanup_data_files():
     """ clean up existing data files """
 
@@ -518,12 +526,16 @@ else:
 
 if not os.path.exists('results'):
     os.mkdir('results')
+    # The regular user needs to own the directory, not root.
+    fix_ownership('results')
 
 os.chdir('results')
 if os.path.exists(testname):
     print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
     sys.exit()
 os.mkdir(testname)
+# The regular user needs to own the directory, not root.
+fix_ownership(testname)
 os.chdir(testname)
 
 # Temporary (or perhaps not)
@@ -566,4 +578,9 @@ plot_scaled_cpu()
 plot_boost_cpu()
 plot_ghz_cpu()
 
+# It is preferrable, but not necessary, that the regular user owns the files, not root.
+for root, dirs, files in os.walk('.'):
+    for f in files:
+        fix_ownership(f)
+
 os.chdir('../../')