- Add a tracepoint to read out LLC occupancy of resource monitor IDs with the

goal of freeing them sooner rather than later
 
 - Other code improvements and cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmZCW64ACgkQEsHwGGHe
 VUq7Dw//ZM+4OX3l0P6NTv4WJ9UDn3IltRm+D61J6hYw19iETlGGAel5T6DI1LPT
 GYAoOazd9ouNjwU0YhOn6Se3SVWKxLLOGH+/RIJtqwiCwTy2nGfSPHw3pnTxwtK4
 pRttm6fPQWIUuQyDrzmbJGP+va4YDtVtDyBkxNlk8pQTvF7X0QCcu6GjNW9r6+Md
 92J2AwzeoDAeIc16vKHru4S3wBCqdP7xZ9GqBb8wrNxBy8taSN4wE9cuwDjev5Yw
 ANGeREv3odWvYQ7p0fQVY2j25ddjGNE4qEEJ1iAIJDh9bIHURAF3s1aSPqcMyHyF
 eB8NNf7ZjQhycmBX9ci6CHYOKc3i25nWiMoaC1iWZKQEviTt3OCEeKr20mjAfKOz
 wlUs55iGrHkbS10kB91Z6lOMDNiIu+x4kuiF5y1W73SDfkY+pYv8zLQL9rhNpYnd
 BEcOF+YaJuhi4Y7GUDb0fWdIUZcfGItSJyNbR8jaznJKcP2pjznSUKqM/AphZyuU
 bVsVsYkYQiE2vl4xYdmyHnxsfnpuMTVNuPpIonyp1mIa77iDVeiwYabkau+pz8L9
 Rv1jhUmYVfawxKiRc6tOQAsxOtAiqrm2GBpZlisw8KtfzZaPC9h7U7bXC4up1TtH
 nZVt+qV/8M9nc3Trocb+d8djbrv+Uqh4EHPTBbFEfW6qsMFsXhk=
 =8EKr
 -----END PGP SIGNATURE-----

Merge tag 'x86_cache_for_v6.10_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 resource control updates from Borislav Petkov:

 - Add a tracepoint to read out LLC occupancy of resource monitor IDs
   with the goal of freeing them sooner rather than later

 - Other code improvements and cleanups

* tag 'x86_cache_for_v6.10_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/resctrl: Add tracepoint for llc_occupancy tracking
  x86/resctrl: Rename pseudo_lock_event.h to trace.h
  x86/resctrl: Simplify call convention for MSR update functions
  x86/resctrl: Pass domain to target CPU
This commit is contained in:
Linus Torvalds 2024-05-14 09:04:37 -07:00
commit 5186ba3323
8 changed files with 72 additions and 83 deletions

View File

@ -446,6 +446,12 @@ during mkdir.
max_threshold_occupancy is a user configurable value to determine the
occupancy at which an RMID can be freed.
The mon_llc_occupancy_limbo tracepoint gives the precise occupancy in bytes
for a subset of RMID that are not immediately available for allocation.
This can't be relied on to produce output every second, it may be necessary
to attempt to create an empty monitor group to force an update. Output may
only be produced if creation of a control or monitor group fails.
Schemata files - general concepts
---------------------------------
Each line in the file describes one resource. The line starts with

View File

@ -56,14 +56,9 @@ int max_name_width, max_data_width;
*/
bool rdt_alloc_capable;
static void
mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r);
static void
cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
static void
mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r);
static void mba_wrmsr_intel(struct msr_param *m);
static void cat_wrmsr(struct msr_param *m);
static void mba_wrmsr_amd(struct msr_param *m);
#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains)
@ -309,12 +304,11 @@ static void rdt_get_cdp_l2_config(void)
rdt_get_cdp_config(RDT_RESOURCE_L2);
}
static void
mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
static void mba_wrmsr_amd(struct msr_param *m)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
for (i = m->low; i < m->high; i++)
wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
@ -334,25 +328,22 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
return r->default_ctrl;
}
static void
mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r)
static void mba_wrmsr_intel(struct msr_param *m)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
/* Write the delay values for mba. */
for (i = m->low; i < m->high; i++)
wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], r));
wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res));
}
static void
cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
static void cat_wrmsr(struct msr_param *m)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
for (i = m->low; i < m->high; i++)
wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
@ -362,6 +353,8 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
{
struct rdt_domain *d;
lockdep_assert_cpus_held();
list_for_each_entry(d, &r->domains, list) {
/* Find the domain that contains this CPU */
if (cpumask_test_cpu(cpu, &d->cpu_mask))
@ -378,19 +371,11 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r)
void rdt_ctrl_update(void *arg)
{
struct rdt_hw_resource *hw_res;
struct msr_param *m = arg;
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
struct rdt_resource *r = m->res;
int cpu = smp_processor_id();
struct rdt_domain *d;
d = get_domain_from_cpu(cpu, r);
if (d) {
hw_res->msr_update(d, m, r);
return;
}
pr_warn_once("cpu %d not found in any domain for resource %s\n",
cpu, r->name);
hw_res = resctrl_to_arch_res(m->res);
hw_res->msr_update(m);
}
/*
@ -463,9 +448,11 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
hw_dom->ctrl_val = dc;
setup_default_ctrlval(r, dc);
m.res = r;
m.dom = d;
m.low = 0;
m.high = hw_res->num_closid;
hw_res->msr_update(d, &m, r);
hw_res->msr_update(&m);
return 0;
}

View File

@ -272,22 +272,6 @@ static u32 get_config_index(u32 closid, enum resctrl_conf_type type)
}
}
static bool apply_config(struct rdt_hw_domain *hw_dom,
struct resctrl_staged_config *cfg, u32 idx,
cpumask_var_t cpu_mask)
{
struct rdt_domain *dom = &hw_dom->d_resctrl;
if (cfg->new_ctrl != hw_dom->ctrl_val[idx]) {
cpumask_set_cpu(cpumask_any(&dom->cpu_mask), cpu_mask);
hw_dom->ctrl_val[idx] = cfg->new_ctrl;
return true;
}
return false;
}
int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
u32 closid, enum resctrl_conf_type t, u32 cfg_val)
{
@ -302,9 +286,10 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
hw_dom->ctrl_val[idx] = cfg_val;
msr_param.res = r;
msr_param.dom = d;
msr_param.low = idx;
msr_param.high = idx + 1;
hw_res->msr_update(d, &msr_param, r);
hw_res->msr_update(&msr_param);
return 0;
}
@ -315,48 +300,39 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
struct rdt_hw_domain *hw_dom;
struct msr_param msr_param;
enum resctrl_conf_type t;
cpumask_var_t cpu_mask;
struct rdt_domain *d;
u32 idx;
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
return -ENOMEM;
msr_param.res = NULL;
list_for_each_entry(d, &r->domains, list) {
hw_dom = resctrl_to_arch_dom(d);
msr_param.res = NULL;
for (t = 0; t < CDP_NUM_TYPES; t++) {
cfg = &hw_dom->d_resctrl.staged_config[t];
if (!cfg->have_new_ctrl)
continue;
idx = get_config_index(closid, t);
if (!apply_config(hw_dom, cfg, idx, cpu_mask))
if (cfg->new_ctrl == hw_dom->ctrl_val[idx])
continue;
hw_dom->ctrl_val[idx] = cfg->new_ctrl;
if (!msr_param.res) {
msr_param.low = idx;
msr_param.high = msr_param.low + 1;
msr_param.res = r;
msr_param.dom = d;
} else {
msr_param.low = min(msr_param.low, idx);
msr_param.high = max(msr_param.high, idx + 1);
}
}
if (msr_param.res)
smp_call_function_any(&d->cpu_mask, rdt_ctrl_update, &msr_param, 1);
}
if (cpumask_empty(cpu_mask))
goto done;
/* Update resource control msr on all the CPUs. */
on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1);
done:
free_cpumask_var(cpu_mask);
return 0;
}

View File

@ -379,11 +379,13 @@ static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r)
/**
* struct msr_param - set a range of MSRs from a domain
* @res: The resource to use
* @dom: The domain to update
* @low: Beginning index from base MSR
* @high: End index
*/
struct msr_param {
struct rdt_resource *res;
struct rdt_domain *dom;
u32 low;
u32 high;
};
@ -443,8 +445,7 @@ struct rdt_hw_resource {
struct rdt_resource r_resctrl;
u32 num_closid;
unsigned int msr_base;
void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r);
void (*msr_update)(struct msr_param *m);
unsigned int mon_scale;
unsigned int mbm_width;
unsigned int mbm_cfg_mask;

View File

@ -24,6 +24,7 @@
#include <asm/resctrl.h>
#include "internal.h"
#include "trace.h"
/**
* struct rmid_entry - dirty tracking for all RMID.
@ -354,6 +355,16 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
rmid_dirty = true;
} else {
rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
/*
* x86's CLOSID and RMID are independent numbers, so the entry's
* CLOSID is an empty CLOSID (X86_RESCTRL_EMPTY_CLOSID). On Arm the
* RMID (PMG) extends the CLOSID (PARTID) space with bits that aren't
* used to select the configuration. It is thus necessary to track both
* CLOSID and RMID because there may be dependencies between them
* on some architectures.
*/
trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->id, val);
}
if (force_free || !rmid_dirty) {

View File

@ -31,7 +31,7 @@
#include "internal.h"
#define CREATE_TRACE_POINTS
#include "pseudo_lock_event.h"
#include "trace.h"
/*
* The bits needed to disable hardware prefetching varies based on the

View File

@ -2813,16 +2813,12 @@ static int reset_all_ctrls(struct rdt_resource *r)
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
struct rdt_hw_domain *hw_dom;
struct msr_param msr_param;
cpumask_var_t cpu_mask;
struct rdt_domain *d;
int i;
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
return -ENOMEM;
msr_param.res = r;
msr_param.low = 0;
msr_param.high = hw_res->num_closid;
@ -2834,17 +2830,13 @@ static int reset_all_ctrls(struct rdt_resource *r)
*/
list_for_each_entry(d, &r->domains, list) {
hw_dom = resctrl_to_arch_dom(d);
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
for (i = 0; i < hw_res->num_closid; i++)
hw_dom->ctrl_val[i] = r->default_ctrl;
msr_param.dom = d;
smp_call_function_any(&d->cpu_mask, rdt_ctrl_update, &msr_param, 1);
}
/* Update CBM on all the CPUs in cpu_mask */
on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1);
free_cpumask_var(cpu_mask);
return 0;
}

View File

@ -2,8 +2,8 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM resctrl
#if !defined(_TRACE_PSEUDO_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PSEUDO_LOCK_H
#if !defined(_TRACE_RESCTRL_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_RESCTRL_H
#include <linux/tracepoint.h>
@ -35,9 +35,25 @@ TRACE_EVENT(pseudo_lock_l3,
TP_printk("hits=%llu miss=%llu",
__entry->l3_hits, __entry->l3_miss));
#endif /* _TRACE_PSEUDO_LOCK_H */
TRACE_EVENT(mon_llc_occupancy_limbo,
TP_PROTO(u32 ctrl_hw_id, u32 mon_hw_id, int domain_id, u64 llc_occupancy_bytes),
TP_ARGS(ctrl_hw_id, mon_hw_id, domain_id, llc_occupancy_bytes),
TP_STRUCT__entry(__field(u32, ctrl_hw_id)
__field(u32, mon_hw_id)
__field(int, domain_id)
__field(u64, llc_occupancy_bytes)),
TP_fast_assign(__entry->ctrl_hw_id = ctrl_hw_id;
__entry->mon_hw_id = mon_hw_id;
__entry->domain_id = domain_id;
__entry->llc_occupancy_bytes = llc_occupancy_bytes;),
TP_printk("ctrl_hw_id=%u mon_hw_id=%u domain_id=%d llc_occupancy_bytes=%llu",
__entry->ctrl_hw_id, __entry->mon_hw_id, __entry->domain_id,
__entry->llc_occupancy_bytes)
);
#endif /* _TRACE_RESCTRL_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE pseudo_lock_event
#define TRACE_INCLUDE_FILE trace
#include <trace/define_trace.h>