cpufreq: tegra194: use refclk delta based loop instead of udelay

Use reference clock count based loop instead of "udelay()" for
sampling of counters to improve the accuracy of re-generated CPU
frequency. "udelay()" internally calls "WFE" which stops the
counters and results in bigger delta between the last set freq
and the re-generated value from counters. The counter sampling
window used in loop is the minimum number of reference clock
cycles which is known to give a stable value of CPU frequency.
The change also helps to reduce the sampling window from "500us"
to "<50us".

Suggested-by: Antti Miettinen <amiettinen@nvidia.com>
Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
This commit is contained in:
Sumit Gupta 2023-10-04 19:35:37 +05:30 committed by Viresh Kumar
parent 6b121b4cf7
commit a60a556788

View file

@ -5,7 +5,6 @@
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/module.h>
#include <linux/of.h>
@ -21,10 +20,11 @@
#define KHZ 1000
#define REF_CLK_MHZ 408 /* 408 MHz */
#define US_DELAY 500
#define CPUFREQ_TBL_STEP_HZ (50 * KHZ * KHZ)
#define MAX_CNT ~0U
#define MAX_DELTA_KHZ 115200
#define NDIV_MASK 0x1FF
#define CORE_OFFSET(cpu) (cpu * 8)
@ -68,6 +68,7 @@ struct tegra_cpufreq_soc {
int maxcpus_per_cluster;
unsigned int num_clusters;
phys_addr_t actmon_cntr_base;
u32 refclk_delta_min;
};
struct tegra194_cpufreq_data {
@ -149,6 +150,8 @@ static void tegra234_read_counters(struct tegra_cpu_ctr *c)
{
struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
void __iomem *actmon_reg;
u32 delta_refcnt;
int cnt = 0;
u64 val;
actmon_reg = CORE_ACTMON_CNTR_REG(data, data->cpu_data[c->cpu].clusterid,
@ -157,10 +160,25 @@ static void tegra234_read_counters(struct tegra_cpu_ctr *c)
val = readq(actmon_reg);
c->last_refclk_cnt = upper_32_bits(val);
c->last_coreclk_cnt = lower_32_bits(val);
udelay(US_DELAY);
val = readq(actmon_reg);
c->refclk_cnt = upper_32_bits(val);
c->coreclk_cnt = lower_32_bits(val);
/*
* The sampling window is based on the minimum number of reference
* clock cycles which is known to give a stable value of CPU frequency.
*/
do {
val = readq(actmon_reg);
c->refclk_cnt = upper_32_bits(val);
c->coreclk_cnt = lower_32_bits(val);
if (c->refclk_cnt < c->last_refclk_cnt)
delta_refcnt = c->refclk_cnt + (MAX_CNT - c->last_refclk_cnt);
else
delta_refcnt = c->refclk_cnt - c->last_refclk_cnt;
if (++cnt >= 0xFFFF) {
pr_warn("cpufreq: problem with refclk on cpu:%d, delta_refcnt:%u, cnt:%d\n",
c->cpu, delta_refcnt, cnt);
break;
}
} while (delta_refcnt < data->soc->refclk_delta_min);
}
static struct tegra_cpufreq_ops tegra234_cpufreq_ops = {
@ -175,6 +193,7 @@ static const struct tegra_cpufreq_soc tegra234_cpufreq_soc = {
.actmon_cntr_base = 0x9000,
.maxcpus_per_cluster = 4,
.num_clusters = 3,
.refclk_delta_min = 16000,
};
static const struct tegra_cpufreq_soc tegra239_cpufreq_soc = {
@ -182,6 +201,7 @@ static const struct tegra_cpufreq_soc tegra239_cpufreq_soc = {
.actmon_cntr_base = 0x4000,
.maxcpus_per_cluster = 8,
.num_clusters = 1,
.refclk_delta_min = 16000,
};
static void tegra194_get_cpu_cluster_id(u32 cpu, u32 *cpuid, u32 *clusterid)
@ -222,15 +242,33 @@ static inline u32 map_ndiv_to_freq(struct mrq_cpu_ndiv_limits_response
static void tegra194_read_counters(struct tegra_cpu_ctr *c)
{
struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
u32 delta_refcnt;
int cnt = 0;
u64 val;
val = read_freq_feedback();
c->last_refclk_cnt = lower_32_bits(val);
c->last_coreclk_cnt = upper_32_bits(val);
udelay(US_DELAY);
val = read_freq_feedback();
c->refclk_cnt = lower_32_bits(val);
c->coreclk_cnt = upper_32_bits(val);
/*
* The sampling window is based on the minimum number of reference
* clock cycles which is known to give a stable value of CPU frequency.
*/
do {
val = read_freq_feedback();
c->refclk_cnt = lower_32_bits(val);
c->coreclk_cnt = upper_32_bits(val);
if (c->refclk_cnt < c->last_refclk_cnt)
delta_refcnt = c->refclk_cnt + (MAX_CNT - c->last_refclk_cnt);
else
delta_refcnt = c->refclk_cnt - c->last_refclk_cnt;
if (++cnt >= 0xFFFF) {
pr_warn("cpufreq: problem with refclk on cpu:%d, delta_refcnt:%u, cnt:%d\n",
c->cpu, delta_refcnt, cnt);
break;
}
} while (delta_refcnt < data->soc->refclk_delta_min);
}
static void tegra_read_counters(struct work_struct *work)
@ -288,9 +326,8 @@ static unsigned int tegra194_calculate_speed(u32 cpu)
u32 rate_mhz;
/*
* udelay() is required to reconstruct cpu frequency over an
* observation window. Using workqueue to call udelay() with
* interrupts enabled.
* Reconstruct cpu frequency over an observation/sampling window.
* Using workqueue to keep interrupts enabled during the interval.
*/
read_counters_work.c.cpu = cpu;
INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters);
@ -372,9 +409,9 @@ static unsigned int tegra194_get_speed(u32 cpu)
if (pos->driver_data != ndiv)
continue;
if (abs(pos->frequency - rate) > 115200) {
pr_warn("cpufreq: cpu%d,cur:%u,set:%u,set ndiv:%llu\n",
cpu, rate, pos->frequency, ndiv);
if (abs(pos->frequency - rate) > MAX_DELTA_KHZ) {
pr_warn("cpufreq: cpu%d,cur:%u,set:%u,delta:%d,set ndiv:%llu\n",
cpu, rate, pos->frequency, abs(rate - pos->frequency), ndiv);
} else {
rate = pos->frequency;
}
@ -568,6 +605,7 @@ static const struct tegra_cpufreq_soc tegra194_cpufreq_soc = {
.ops = &tegra194_cpufreq_ops,
.maxcpus_per_cluster = 2,
.num_clusters = 4,
.refclk_delta_min = 16000,
};
static void tegra194_cpufreq_free_resources(void)
@ -684,7 +722,7 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev)
soc = of_device_get_match_data(&pdev->dev);
if (soc->ops && soc->maxcpus_per_cluster && soc->num_clusters) {
if (soc->ops && soc->maxcpus_per_cluster && soc->num_clusters && soc->refclk_delta_min) {
data->soc = soc;
} else {
dev_err(&pdev->dev, "soc data missing\n");