mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-29 13:53:33 +00:00
EDAC/skx_common: Enable EDAC support for the "near" memory
The current {skx,i10nm}_edac miss the EDAC support to decode errors from the 1st level memory (the fast "near" memory as cache) of the 2-level memory system. Introduce a helper function skx_error_in_mem() to check whether errors are from memory at the beginning of skx_mce_check_error(). As long as the errors are from memory (either the 1-level memory system or the 2-level memory system), decode the errors. Reported-and-tested-by: Youquan Song <youquan.song@intel.com> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com> Link: https://lore.kernel.org/all/20230113032802.41752-1-qiuxu.zhuo@intel.com
This commit is contained in:
parent
8d8fcc391f
commit
6e8746cb73
2 changed files with 36 additions and 6 deletions
|
@ -632,12 +632,18 @@ static bool skx_error_in_1st_level_mem(const struct mce *m)
|
|||
if (!skx_mem_cfg_2lm)
|
||||
return false;
|
||||
|
||||
errcode = GET_BITFIELD(m->status, 0, 15);
|
||||
errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK;
|
||||
|
||||
if ((errcode & 0xef80) != 0x280)
|
||||
return false;
|
||||
return errcode == MCACOD_EXT_MEM_ERR;
|
||||
}
|
||||
|
||||
return true;
|
||||
static bool skx_error_in_mem(const struct mce *m)
|
||||
{
|
||||
u32 errcode;
|
||||
|
||||
errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK;
|
||||
|
||||
return (errcode == MCACOD_MEM_CTL_ERR || errcode == MCACOD_EXT_MEM_ERR);
|
||||
}
|
||||
|
||||
int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
|
||||
|
@ -651,8 +657,8 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
|
|||
if (mce->kflags & MCE_HANDLED_CEC)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
/* ignore unless this is memory related with an address */
|
||||
if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
|
||||
/* Ignore unless this is memory related with an address */
|
||||
if (!skx_error_in_mem(mce) || !(mce->status & MCI_STATUS_ADDRV))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
memset(&res, 0, sizeof(res));
|
||||
|
|
|
@ -56,6 +56,30 @@
|
|||
#define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15)
|
||||
#define MCI_MISC_ECC_DDRT 8 /* read from DDRT */
|
||||
|
||||
/*
|
||||
* According to Intel Architecture spec vol 3B,
|
||||
* Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
|
||||
* memory errors should fit one of these masks:
|
||||
* 000f 0000 1mmm cccc (binary)
|
||||
* 000f 0010 1mmm cccc (binary) [RAM used as cache]
|
||||
* where:
|
||||
* f = Correction Report Filtering Bit. If 1, subsequent errors
|
||||
* won't be shown
|
||||
* mmm = error type
|
||||
* cccc = channel
|
||||
*/
|
||||
#define MCACOD_MEM_ERR_MASK 0xef80
|
||||
/*
|
||||
* Errors from either the memory of the 1-level memory system or the
|
||||
* 2nd level memory (the slow "far" memory) of the 2-level memory system.
|
||||
*/
|
||||
#define MCACOD_MEM_CTL_ERR 0x80
|
||||
/*
|
||||
* Errors from the 1st level memory (the fast "near" memory as cache)
|
||||
* of the 2-level memory system.
|
||||
*/
|
||||
#define MCACOD_EXT_MEM_ERR 0x280
|
||||
|
||||
/*
|
||||
* Each cpu socket contains some pci devices that provide global
|
||||
* information, and also some that are local to each of the two
|
||||
|
|
Loading…
Reference in a new issue