- Add support for newer AMD family 0x19, models 0x10-... CPUs to amd64_edac

- The usual amount of improvements and fixes
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmI4SEMACgkQEsHwGGHe
 VUo+Bg/+PCD4WKvztP5/O3uSRo19r6HJKJ85HRVtnTBlDXtev9SiGOw0fkOTJF76
 vpCC02mTGXoewjAYu0re/JNLWswRhNq5jctQU1l10hk1jbHWOocv0YjLMFnc6EGB
 sxrjwe/tAMM/lkL6z3ECH5z9uVELHM3LP2JsO4OxSujcjpQy6vdkHwgCfQV/TTBp
 25pQVH80KXYcoCpWNSf0cflKO1anVvduYNiHOTDpBQGsjUkGE8sqJTzE7llZR66U
 2PevA3ZLMSplTlupsDvusqff1T+ABrdMmc20i0tkBlq0r1rK/L2Gp+JBmqKYGMOA
 +wZKT5doCLiPHT2UXQoM4BQTl7/lz/Jt11OKbUNh9GntvELTUvCZFzOyHkf4/ehQ
 UgmUN0h4SskCVQY07N/iXBujpsSSbw+RViz6DEjBZLrv8oFsJ8wtBKNM5JgdGPV5
 UbW4bd3EnoiXpag+0+9VAE4yp+IaLsP7fxOcLlYCCXE7rjRjDNIwpuRQIsYe8XHK
 GqNtj/8UyZHbUvV8WbUS6aJCrVr/j+TU0vsNHtnMFHeYBgT6TY+Tpvs0fbIFd9hD
 aSkqrST9uV/O5hxxVFHHl8Q3tphnshqIMoYVq9dwBbve6nIdVUgm/mSBvSeVE6X6
 RuWegsWEOPBFCqWfo+1yUFH2vyjImKLaz5sF4HSEShT3he2q5mQ=
 =HCFm
 -----END PGP SIGNATURE-----

Merge tag 'edac_updates_for_v5.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull EDAC updates from Borislav Petkov:

 - Add support for newer AMD family 0x19, models 0x10-... CPUs to
   amd64_edac

 - The usual amount of improvements and fixes

* tag 'edac_updates_for_v5.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  EDAC/altera: Add SDRAM ECC check for U-Boot
  EDAC/amd64: Add new register offset support and related changes
  EDAC/amd64: Set memory type per DIMM
  EDAC/mc: Remove unnecessary cast to char * in edac_align_ptr()
  EDAC: Use default_groups in kobj_type
  EDAC: Use proper list of struct attribute for attributes
This commit is contained in:
Linus Torvalds 2022-03-21 11:05:04 -07:00
commit 5e8919170a
6 changed files with 183 additions and 51 deletions

View file

@ -1083,8 +1083,46 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat)
#ifdef CONFIG_EDAC_ALTERA_SDRAM
/*
* A legacy U-Boot bug only enabled memory mapped access to the ECC Enable
* register if ECC is enabled. Linux checks the ECC Enable register to
* determine ECC status.
* Use an SMC call (which always works) to determine ECC enablement.
*/
static int altr_s10_sdram_check_ecc_deps(struct altr_edac_device_dev *device)
{
const struct edac_device_prv_data *prv = device->data;
unsigned long sdram_ecc_addr;
struct arm_smccc_res result;
struct device_node *np;
phys_addr_t sdram_addr;
u32 read_reg;
int ret;
np = of_find_compatible_node(NULL, NULL, "altr,sdr-ctl");
if (!np)
goto sdram_err;
sdram_addr = of_translate_address(np, of_get_address(np, 0,
NULL, NULL));
of_node_put(np);
sdram_ecc_addr = (unsigned long)sdram_addr + prv->ecc_en_ofst;
arm_smccc_smc(INTEL_SIP_SMC_REG_READ, sdram_ecc_addr,
0, 0, 0, 0, 0, 0, &result);
read_reg = (unsigned int)result.a1;
ret = (int)result.a0;
if (!ret && (read_reg & prv->ecc_enable_mask))
return 0;
sdram_err:
edac_printk(KERN_ERR, EDAC_DEVICE,
"%s: No ECC present or ECC disabled.\n",
device->edac_dev_name);
return -ENODEV;
}
static const struct edac_device_prv_data s10_sdramecc_data = {
.setup = altr_check_ecc_deps,
.setup = altr_s10_sdram_check_ecc_deps,
.ce_clear_mask = ALTR_S10_ECC_SERRPENA,
.ue_clear_mask = ALTR_S10_ECC_DERRPENA,
.ecc_enable_mask = ALTR_S10_ECC_EN,

View file

@ -15,6 +15,21 @@ static struct msr __percpu *msrs;
static struct amd64_family_type *fam_type;
static inline u32 get_umc_reg(u32 reg)
{
if (!fam_type->flags.zn_regs_v2)
return reg;
switch (reg) {
case UMCCH_ADDR_CFG: return UMCCH_ADDR_CFG_DDR5;
case UMCCH_ADDR_MASK_SEC: return UMCCH_ADDR_MASK_SEC_DDR5;
case UMCCH_DIMM_CFG: return UMCCH_DIMM_CFG_DDR5;
}
WARN_ONCE(1, "%s: unknown register 0x%x", __func__, reg);
return 0;
}
/* Per-node stuff */
static struct ecc_settings **ecc_stngs;
@ -1429,8 +1444,10 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
edac_dbg(1, "UMC%d x16 DIMMs present: %s\n",
i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no");
if (pvt->dram_type == MEM_LRDDR4) {
amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp);
if (umc->dram_type == MEM_LRDDR4 || umc->dram_type == MEM_LRDDR5) {
amd_smn_read(pvt->mc_node_id,
umc_base + get_umc_reg(UMCCH_ADDR_CFG),
&tmp);
edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n",
i, 1 << ((tmp >> 4) & 0x3));
}
@ -1505,7 +1522,7 @@ static void prep_chip_selects(struct amd64_pvt *pvt)
for_each_umc(umc) {
pvt->csels[umc].b_cnt = 4;
pvt->csels[umc].m_cnt = 2;
pvt->csels[umc].m_cnt = fam_type->flags.zn_regs_v2 ? 4 : 2;
}
} else {
@ -1545,7 +1562,7 @@ static void read_umc_base_mask(struct amd64_pvt *pvt)
}
umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK;
umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC;
umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(UMCCH_ADDR_MASK_SEC);
for_each_chip_select_mask(cs, umc, pvt) {
mask = &pvt->csels[umc].csmasks[cs];
@ -1616,19 +1633,49 @@ static void read_dct_base_mask(struct amd64_pvt *pvt)
}
}
static void determine_memory_type_df(struct amd64_pvt *pvt)
{
struct amd64_umc *umc;
u32 i;
for_each_umc(i) {
umc = &pvt->umc[i];
if (!(umc->sdp_ctrl & UMC_SDP_INIT)) {
umc->dram_type = MEM_EMPTY;
continue;
}
/*
* Check if the system supports the "DDR Type" field in UMC Config
* and has DDR5 DIMMs in use.
*/
if (fam_type->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) {
if (umc->dimm_cfg & BIT(5))
umc->dram_type = MEM_LRDDR5;
else if (umc->dimm_cfg & BIT(4))
umc->dram_type = MEM_RDDR5;
else
umc->dram_type = MEM_DDR5;
} else {
if (umc->dimm_cfg & BIT(5))
umc->dram_type = MEM_LRDDR4;
else if (umc->dimm_cfg & BIT(4))
umc->dram_type = MEM_RDDR4;
else
umc->dram_type = MEM_DDR4;
}
edac_dbg(1, " UMC%d DIMM type: %s\n", i, edac_mem_types[umc->dram_type]);
}
}
static void determine_memory_type(struct amd64_pvt *pvt)
{
u32 dram_ctrl, dcsm;
if (pvt->umc) {
if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
pvt->dram_type = MEM_LRDDR4;
else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
pvt->dram_type = MEM_RDDR4;
else
pvt->dram_type = MEM_DDR4;
return;
}
if (pvt->umc)
return determine_memory_type_df(pvt);
switch (pvt->fam) {
case 0xf:
@ -2149,6 +2196,7 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
{
u32 addr_mask_orig, addr_mask_deinterleaved;
u32 msb, weight, num_zero_bits;
int cs_mask_nr = csrow_nr;
int dimm, size = 0;
/* No Chip Selects are enabled. */
@ -2164,17 +2212,33 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
return size;
/*
* There is one mask per DIMM, and two Chip Selects per DIMM.
* CS0 and CS1 -> DIMM0
* CS2 and CS3 -> DIMM1
* Family 17h introduced systems with one mask per DIMM,
* and two Chip Selects per DIMM.
*
* CS0 and CS1 -> MASK0 / DIMM0
* CS2 and CS3 -> MASK1 / DIMM1
*
* Family 19h Model 10h introduced systems with one mask per Chip Select,
* and two Chip Selects per DIMM.
*
* CS0 -> MASK0 -> DIMM0
* CS1 -> MASK1 -> DIMM0
* CS2 -> MASK2 -> DIMM1
* CS3 -> MASK3 -> DIMM1
*
* Keep the mask number equal to the Chip Select number for newer systems,
* and shift the mask number for older systems.
*/
dimm = csrow_nr >> 1;
if (!fam_type->flags.zn_regs_v2)
cs_mask_nr >>= 1;
/* Asymmetric dual-rank DIMM support. */
if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm];
addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr];
else
addr_mask_orig = pvt->csels[umc].csmasks[dimm];
addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
/*
* The number of zero bits in the mask is equal to the number of bits
@ -2930,6 +2994,7 @@ static struct amd64_family_type family_types[] = {
.f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6,
.max_mcs = 12,
.flags.zn_regs_v2 = 1,
.ops = {
.early_channel_count = f17_early_channel_count,
.dbam_to_cs = f17_addr_mask_to_cs_size,
@ -3368,7 +3433,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
umc_base = get_umc_base(i);
umc = &pvt->umc[i];
amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg);
amd_smn_read(nid, umc_base + get_umc_reg(UMCCH_DIMM_CFG), &umc->dimm_cfg);
amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
@ -3452,7 +3517,9 @@ static void read_mc_regs(struct amd64_pvt *pvt)
read_dct_base_mask(pvt);
determine_memory_type(pvt);
edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
if (!pvt->umc)
edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
determine_ecc_sym_sz(pvt);
}
@ -3548,7 +3615,7 @@ static int init_csrows_df(struct mem_ctl_info *mci)
pvt->mc_node_id, cs);
dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs);
dimm->mtype = pvt->dram_type;
dimm->mtype = pvt->umc[umc].dram_type;
dimm->edac_mode = edac_mode;
dimm->dtype = dev_type;
dimm->grain = 64;

View file

@ -273,8 +273,11 @@
#define UMCCH_BASE_ADDR_SEC 0x10
#define UMCCH_ADDR_MASK 0x20
#define UMCCH_ADDR_MASK_SEC 0x28
#define UMCCH_ADDR_MASK_SEC_DDR5 0x30
#define UMCCH_ADDR_CFG 0x30
#define UMCCH_ADDR_CFG_DDR5 0x40
#define UMCCH_DIMM_CFG 0x80
#define UMCCH_DIMM_CFG_DDR5 0x90
#define UMCCH_UMC_CFG 0x100
#define UMCCH_SDP_CTRL 0x104
#define UMCCH_ECC_CTRL 0x14C
@ -344,6 +347,9 @@ struct amd64_umc {
u32 sdp_ctrl; /* SDP Control reg */
u32 ecc_ctrl; /* DRAM ECC Control reg */
u32 umc_cap_hi; /* Capabilities High reg */
/* cache the dram_type */
enum mem_type dram_type;
};
struct amd64_pvt {
@ -391,7 +397,12 @@ struct amd64_pvt {
/* place to store error injection parameters prior to issue */
struct error_injection injection;
/* cache the dram_type */
/*
* cache the dram_type
*
* NOTE: Don't use this for Family 17h and later.
* Use dram_type in struct amd64_umc instead.
*/
enum mem_type dram_type;
struct amd64_umc *umc; /* UMC registers */
@ -480,11 +491,22 @@ struct low_ops {
unsigned cs_mode, int cs_mask_nr);
};
struct amd64_family_flags {
/*
* Indicates that the system supports the new register offsets, etc.
* first introduced with Family 19h Model 10h.
*/
__u64 zn_regs_v2 : 1,
__reserved : 63;
};
struct amd64_family_type {
const char *ctl_name;
u16 f0_id, f1_id, f2_id, f6_id;
/* Maximum number of memory controllers per die/node. */
u8 max_mcs;
struct amd64_family_flags flags;
struct low_ops ops;
};

View file

@ -163,13 +163,14 @@ CTL_INFO_ATTR(poll_msec, S_IRUGO | S_IWUSR,
edac_device_ctl_poll_msec_show, edac_device_ctl_poll_msec_store);
/* Base Attributes of the EDAC_DEVICE ECC object */
static struct ctl_info_attribute *device_ctrl_attr[] = {
&attr_ctl_info_panic_on_ue,
&attr_ctl_info_log_ue,
&attr_ctl_info_log_ce,
&attr_ctl_info_poll_msec,
static struct attribute *device_ctrl_attrs[] = {
&attr_ctl_info_panic_on_ue.attr,
&attr_ctl_info_log_ue.attr,
&attr_ctl_info_log_ce.attr,
&attr_ctl_info_poll_msec.attr,
NULL,
};
ATTRIBUTE_GROUPS(device_ctrl);
/*
* edac_device_ctrl_master_release
@ -217,7 +218,7 @@ static void edac_device_ctrl_master_release(struct kobject *kobj)
static struct kobj_type ktype_device_ctrl = {
.release = edac_device_ctrl_master_release,
.sysfs_ops = &device_ctl_info_ops,
.default_attrs = (struct attribute **)device_ctrl_attr,
.default_groups = device_ctrl_groups,
};
/*
@ -389,17 +390,18 @@ INSTANCE_ATTR(ce_count, S_IRUGO, instance_ce_count_show, NULL);
INSTANCE_ATTR(ue_count, S_IRUGO, instance_ue_count_show, NULL);
/* list of edac_dev 'instance' attributes */
static struct instance_attribute *device_instance_attr[] = {
&attr_instance_ce_count,
&attr_instance_ue_count,
static struct attribute *device_instance_attrs[] = {
&attr_instance_ce_count.attr,
&attr_instance_ue_count.attr,
NULL,
};
ATTRIBUTE_GROUPS(device_instance);
/* The 'ktype' for each edac_dev 'instance' */
static struct kobj_type ktype_instance_ctrl = {
.release = edac_device_ctrl_instance_release,
.sysfs_ops = &device_instance_ops,
.default_attrs = (struct attribute **)device_instance_attr,
.default_groups = device_instance_groups,
};
/* edac_dev -> instance -> block information */
@ -487,17 +489,18 @@ BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL);
BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL);
/* list of edac_dev 'block' attributes */
static struct edac_dev_sysfs_block_attribute *device_block_attr[] = {
&attr_block_ce_count,
&attr_block_ue_count,
static struct attribute *device_block_attrs[] = {
&attr_block_ce_count.attr,
&attr_block_ue_count.attr,
NULL,
};
ATTRIBUTE_GROUPS(device_block);
/* The 'ktype' for each edac_dev 'block' */
static struct kobj_type ktype_block_ctrl = {
.release = edac_device_ctrl_block_release,
.sysfs_ops = &device_block_ops,
.default_attrs = (struct attribute **)device_block_attr,
.default_groups = device_block_groups,
};
/* block ctor/dtor code */

View file

@ -213,12 +213,12 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems)
else if (size > sizeof(char))
align = sizeof(short);
else
return (char *)ptr;
return ptr;
r = (unsigned long)ptr % align;
if (r == 0)
return (char *)ptr;
return ptr;
*p += align - r;

View file

@ -135,17 +135,18 @@ INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL);
INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL);
/* pci instance attributes */
static struct instance_attribute *pci_instance_attr[] = {
&attr_instance_pe_count,
&attr_instance_npe_count,
static struct attribute *pci_instance_attrs[] = {
&attr_instance_pe_count.attr,
&attr_instance_npe_count.attr,
NULL
};
ATTRIBUTE_GROUPS(pci_instance);
/* the ktype for a pci instance */
static struct kobj_type ktype_pci_instance = {
.release = edac_pci_instance_release,
.sysfs_ops = &pci_instance_ops,
.default_attrs = (struct attribute **)pci_instance_attr,
.default_groups = pci_instance_groups,
};
/*
@ -292,15 +293,16 @@ EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL);
EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL);
/* Base Attributes of the memory ECC object */
static struct edac_pci_dev_attribute *edac_pci_attr[] = {
&edac_pci_attr_check_pci_errors,
&edac_pci_attr_edac_pci_log_pe,
&edac_pci_attr_edac_pci_log_npe,
&edac_pci_attr_edac_pci_panic_on_pe,
&edac_pci_attr_pci_parity_count,
&edac_pci_attr_pci_nonparity_count,
static struct attribute *edac_pci_attrs[] = {
&edac_pci_attr_check_pci_errors.attr,
&edac_pci_attr_edac_pci_log_pe.attr,
&edac_pci_attr_edac_pci_log_npe.attr,
&edac_pci_attr_edac_pci_panic_on_pe.attr,
&edac_pci_attr_pci_parity_count.attr,
&edac_pci_attr_pci_nonparity_count.attr,
NULL,
};
ATTRIBUTE_GROUPS(edac_pci);
/*
* edac_pci_release_main_kobj
@ -327,7 +329,7 @@ static void edac_pci_release_main_kobj(struct kobject *kobj)
static struct kobj_type ktype_edac_pci_main_kobj = {
.release = edac_pci_release_main_kobj,
.sysfs_ops = &edac_pci_sysfs_ops,
.default_attrs = (struct attribute **)edac_pci_attr,
.default_groups = edac_pci_groups,
};
/**