- Add logic to correct MBM total and local values fixing errata SKX99 and BDF102

(Fenghua Yu)
 
 - Cleanups.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAl/XhTUACgkQEsHwGGHe
 VUpTkA/9FNOaJohBa9XO2bv3RjsqE4/2PqZS434HJL6yrbbRDFyscSp2sNCDlfh/
 6zj9R72HpRf6xLW8CTeszrfUQ0z3CFnfwz8EAolhv5DOiJnM3wSS5inmLhtyTMgw
 mJ8qVXXELdAFm1R8rAQLVmA3FE9aV6u19POstKeXMUykCyaxKDhNLJgn3CiXpegU
 AvG3QWmrR/1x4DjQguMNwXMQiYVDkRdnfJa5SOzCTwyUj0D0an+kPmSHEMvhaOL6
 tUYfjLkZYdB5THG8eUM833EJmgRe7X1VtTQIydcVyt/6tbL50FmVYUpWk+SXuSnJ
 /uBdzx0IXmfvYKgGSFw+FGOyGH8u02nR4621rECNLNf1h8YknzpN7Ri1hB83GjQe
 PMiW/gCwxM6gfRsBpJ17xnAbmR5Az2dOz71uj5k13L1GNL8VZD2DZz//a4TDarzE
 4R8i3PQ/oUMgmL+ARpVWwycc/dhB8o+1glmAjOwWHO/kM1k/hx9Ou5bbLlhffbL5
 zkk6ORrNfKzAMvE1flkjim5XgNHY8n/L4CwBKXJFQ3IYbXBGg0CKxBHWvTpIz1KO
 O4h52OnYUpM15sY22NSvpdRiHcsgEqNOB4lb6K2dby9iE5b5RHpNgfMNO6+eOtag
 dgCWopamLK2+MFeNe48+1v/3bDveskhvJon91GUZYmfkFzhZTN8=
 =9Lsd
 -----END PGP SIGNATURE-----

Merge tag 'x86_cache_for_v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 cache resource control updates from Borislav Petkov:

 - add logic to correct MBM total and local values fixing errata SKX99
   and BDF102 (Fenghua Yu)

 - cleanups

* tag 'x86_cache_for_v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/resctrl: Clean up unused function parameter in rmdir path
  x86/resctrl: Constify kernfs_ops
  x86/resctrl: Correct MBM total and local values
  Documentation/x86: Rename resctrl_ui.rst and add two errata to the file
This commit is contained in:
Linus Torvalds 2020-12-14 13:53:17 -08:00
commit 8ba27ae36b
6 changed files with 189 additions and 16 deletions

View file

@ -27,7 +27,7 @@ x86-specific Documentation
pti
mds
microcode
resctrl_ui
resctrl
tsx_async_abort
usb-legacy-support
i386/index

View file

@ -1209,3 +1209,96 @@ View the llc occupancy snapshot::
# cat /sys/fs/resctrl/p1/mon_data/mon_L3_00/llc_occupancy
11234000
Intel RDT Errata
================
Intel MBM Counters May Report System Memory Bandwidth Incorrectly
-----------------------------------------------------------------
Errata SKX99 for Skylake server and BDF102 for Broadwell server.
Problem: Intel Memory Bandwidth Monitoring (MBM) counters track metrics
according to the assigned Resource Monitor ID (RMID) for that logical
core. The IA32_QM_CTR register (MSR 0xC8E), used to report these
metrics, may report incorrect system bandwidth for certain RMID values.
Implication: Due to the errata, system memory bandwidth may not match
what is reported.
Workaround: MBM total and local readings are corrected according to the
following correction factor table:
+---------------+---------------+---------------+-----------------+
|core count |rmid count |rmid threshold |correction factor|
+---------------+---------------+---------------+-----------------+
|1 |8 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|2 |16 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|3 |24 |15 |0.969650 |
+---------------+---------------+---------------+-----------------+
|4 |32 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|6 |48 |31 |0.969650 |
+---------------+---------------+---------------+-----------------+
|7 |56 |47 |1.142857 |
+---------------+---------------+---------------+-----------------+
|8 |64 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|9 |72 |63 |1.185115 |
+---------------+---------------+---------------+-----------------+
|10 |80 |63 |1.066553 |
+---------------+---------------+---------------+-----------------+
|11 |88 |79 |1.454545 |
+---------------+---------------+---------------+-----------------+
|12 |96 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|13 |104 |95 |1.230769 |
+---------------+---------------+---------------+-----------------+
|14 |112 |95 |1.142857 |
+---------------+---------------+---------------+-----------------+
|15 |120 |95 |1.066667 |
+---------------+---------------+---------------+-----------------+
|16 |128 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|17 |136 |127 |1.254863 |
+---------------+---------------+---------------+-----------------+
|18 |144 |127 |1.185255 |
+---------------+---------------+---------------+-----------------+
|19 |152 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|20 |160 |127 |1.066667 |
+---------------+---------------+---------------+-----------------+
|21 |168 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|22 |176 |159 |1.454334 |
+---------------+---------------+---------------+-----------------+
|23 |184 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|24 |192 |127 |0.969744 |
+---------------+---------------+---------------+-----------------+
|25 |200 |191 |1.280246 |
+---------------+---------------+---------------+-----------------+
|26 |208 |191 |1.230921 |
+---------------+---------------+---------------+-----------------+
|27 |216 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|28 |224 |191 |1.143118 |
+---------------+---------------+---------------+-----------------+
If rmid > rmid threshold, MBM total and local values should be multiplied
by the correction factor.
See:
1. Erratum SKX99 in Intel Xeon Processor Scalable Family Specification Update:
http://web.archive.org/web/20200716124958/https://www.intel.com/content/www/us/en/processors/xeon/scalable/xeon-scalable-spec-update.html
2. Erratum BDF102 in Intel Xeon E5-2600 v4 Processor Product Family Specification Update:
http://web.archive.org/web/20191125200531/https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/xeon-e5-v4-spec-update.pdf
3. The errata in Intel Resource Director Technology (Intel RDT) on 2nd Generation Intel Xeon Scalable Processors Reference Manual:
https://software.intel.com/content/www/us/en/develop/articles/intel-resource-director-technology-rdt-reference-manual.html
for further information.

View file

@ -895,6 +895,10 @@ static __init void __check_quirks_intel(void)
set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
else
set_rdt_options("!l3cat");
fallthrough;
case INTEL_FAM6_BROADWELL_X:
intel_rdt_mbm_apply_quirk();
break;
}
}

View file

@ -264,7 +264,7 @@ void __exit rdtgroup_exit(void);
struct rftype {
char *name;
umode_t mode;
struct kernfs_ops *kf_ops;
const struct kernfs_ops *kf_ops;
unsigned long flags;
unsigned long fflags;
@ -619,6 +619,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
void mbm_setup_overflow_handler(struct rdt_domain *dom,
unsigned long delay_ms);
void mbm_handle_overflow(struct work_struct *work);
void __init intel_rdt_mbm_apply_quirk(void);
bool is_mba_sc(struct rdt_resource *r);
void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
u32 delay_bw_map(unsigned long bw, struct rdt_resource *r);

View file

@ -64,6 +64,69 @@ unsigned int rdt_mon_features;
*/
unsigned int resctrl_cqm_threshold;
#define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5))
/*
* The correction factor table is documented in Documentation/x86/resctrl.rst.
* If rmid > rmid threshold, MBM total and local values should be multiplied
* by the correction factor.
*
* The original table is modified for better code:
*
* 1. The threshold 0 is changed to rmid count - 1 so don't do correction
* for the case.
* 2. MBM total and local correction table indexed by core counter which is
* equal to (x86_cache_max_rmid + 1) / 8 - 1 and is from 0 up to 27.
* 3. The correction factor is normalized to 2^20 (1048576) so it's faster
* to calculate corrected value by shifting:
* corrected_value = (original_value * correction_factor) >> 20
*/
static const struct mbm_correction_factor_table {
u32 rmidthreshold;
u64 cf;
} mbm_cf_table[] __initdata = {
{7, CF(1.000000)},
{15, CF(1.000000)},
{15, CF(0.969650)},
{31, CF(1.000000)},
{31, CF(1.066667)},
{31, CF(0.969650)},
{47, CF(1.142857)},
{63, CF(1.000000)},
{63, CF(1.185115)},
{63, CF(1.066553)},
{79, CF(1.454545)},
{95, CF(1.000000)},
{95, CF(1.230769)},
{95, CF(1.142857)},
{95, CF(1.066667)},
{127, CF(1.000000)},
{127, CF(1.254863)},
{127, CF(1.185255)},
{151, CF(1.000000)},
{127, CF(1.066667)},
{167, CF(1.000000)},
{159, CF(1.454334)},
{183, CF(1.000000)},
{127, CF(0.969744)},
{191, CF(1.280246)},
{191, CF(1.230921)},
{215, CF(1.000000)},
{191, CF(1.143118)},
};
static u32 mbm_cf_rmidthreshold __read_mostly = UINT_MAX;
static u64 mbm_cf __read_mostly;
static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
{
/* Correct MBM value. */
if (rmid > mbm_cf_rmidthreshold)
val = (val * mbm_cf) >> 20;
return val;
}
static inline struct rmid_entry *__rmid_entry(u32 rmid)
{
struct rmid_entry *entry;
@ -260,7 +323,8 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
m->chunks += chunks;
m->prev_msr = tval;
rr->val += m->chunks;
rr->val += get_corrected_mbm_count(rmid, m->chunks);
return 0;
}
@ -279,7 +343,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
return;
chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
cur_bw = (chunks * r->mon_scale) >> 20;
cur_bw = (get_corrected_mbm_count(rmid, chunks) * r->mon_scale) >> 20;
if (m->delta_comp)
m->delta_bw = abs(cur_bw - m->prev_bw);
@ -642,3 +706,17 @@ int rdt_get_mon_l3_config(struct rdt_resource *r)
return 0;
}
void __init intel_rdt_mbm_apply_quirk(void)
{
int cf_index;
cf_index = (boot_cpu_data.x86_cache_max_rmid + 1) / 8 - 1;
if (cf_index >= ARRAY_SIZE(mbm_cf_table)) {
pr_info("No MBM correction factor available\n");
return;
}
mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold;
mbm_cf = mbm_cf_table[cf_index].cf;
}

View file

@ -240,13 +240,13 @@ static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
return -EINVAL;
}
static struct kernfs_ops rdtgroup_kf_single_ops = {
static const struct kernfs_ops rdtgroup_kf_single_ops = {
.atomic_write_len = PAGE_SIZE,
.write = rdtgroup_file_write,
.seq_show = rdtgroup_seqfile_show,
};
static struct kernfs_ops kf_mondata_ops = {
static const struct kernfs_ops kf_mondata_ops = {
.atomic_write_len = PAGE_SIZE,
.seq_show = rdtgroup_mondata_show,
};
@ -3023,8 +3023,7 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
return -EPERM;
}
static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
cpumask_var_t tmpmask)
static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
{
struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
int cpu;
@ -3056,8 +3055,7 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
return 0;
}
static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
struct rdtgroup *rdtgrp)
static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
{
rdtgrp->flags = RDT_DELETED;
list_del(&rdtgrp->rdtgroup_list);
@ -3066,8 +3064,7 @@ static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
return 0;
}
static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
cpumask_var_t tmpmask)
static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
{
int cpu;
@ -3094,7 +3091,7 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
closid_free(rdtgrp->closid);
free_rmid(rdtgrp->mon.rmid);
rdtgroup_ctrl_remove(kn, rdtgrp);
rdtgroup_ctrl_remove(rdtgrp);
/*
* Free all the child monitor group rmids.
@ -3131,13 +3128,13 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
rdtgrp != &rdtgroup_default) {
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
ret = rdtgroup_ctrl_remove(kn, rdtgrp);
ret = rdtgroup_ctrl_remove(rdtgrp);
} else {
ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask);
ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
}
} else if (rdtgrp->type == RDTMON_GROUP &&
is_mon_groups(parent_kn, kn->name)) {
ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask);
ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
} else {
ret = -EPERM;
}