From 853d9b18f1e861d37e9b271742329f8c1176eabe Mon Sep 17 00:00:00 2001
From: Levente Kurusa <levex@linux.com>
Date: Fri, 29 Nov 2013 21:28:48 +0100
Subject: [PATCH 01/10] x86, mce: Call put_device on device_register failure

This patch adds a call to put_device() when the device_register() call
has failed. This is required so that the last reference to the device is
given up.

Signed-off-by: Levente Kurusa <levex@linux.com>
Link: http://lkml.kernel.org/r/5298F900.9000208@linux.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b3218cdee95f..a389c1d859ec 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2272,8 +2272,10 @@ static int mce_device_create(unsigned int cpu)
 	dev->release = &mce_device_release;
 
 	err = device_register(dev);
-	if (err)
+	if (err) {
+		put_device(dev);
 		return err;
+	}
 
 	for (i = 0; mce_device_attrs[i]; i++) {
 		err = device_create_file(dev, mce_device_attrs[i]);

From 545104dd2ba0feb62544284ee1d6c98fe6ef8245 Mon Sep 17 00:00:00 2001
From: Rui Wang <ruiv.wang@gmail.com>
Date: Sun, 8 Dec 2013 12:17:53 +0800
Subject: [PATCH 02/10] PCI, AER: Fix severity usage in aer trace event

There's inconsistency between dmesg and the trace event output.
When dmesg says "severity=Corrected", the trace event says
"severity=Fatal". What happens is that HW_EVENT_ERR_CORRECTED is
defined in edac.h:

enum hw_event_mc_err_type {
        HW_EVENT_ERR_CORRECTED,
        HW_EVENT_ERR_UNCORRECTED,
        HW_EVENT_ERR_FATAL,
        HW_EVENT_ERR_INFO,
};

while aer_print_error() uses aer_error_severity_string[] defined as:

static const char *aer_error_severity_string[] = {
        "Uncorrected (Non-Fatal)",
        "Uncorrected (Fatal)",
        "Corrected"
};

In this case dmesg is correct because info->severity is assigned in
aer_isr_one_error() using the definitions in include/linux/ras.h:

Signed-off-by: Rui Wang <rui.y.wang@intel.com>
Acked-by: Ethan Zhao <ethan.kernel@gmail.com>
Link: http://lkml.kernel.org/r/CANVTcTaP18CiGOSEcX5Ch_wPw9mEhkgokfp+d+ZOMFD+Ce4juA@mail.gmail.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 include/trace/events/ras.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h
index 88b878383797..1c875ad1ee5f 100644
--- a/include/trace/events/ras.h
+++ b/include/trace/events/ras.h
@@ -5,7 +5,7 @@
 #define _TRACE_AER_H
 
 #include <linux/tracepoint.h>
-#include <linux/edac.h>
+#include <linux/aer.h>
 
 
 /*
@@ -63,10 +63,10 @@ TRACE_EVENT(aer_event,
 
 	TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
 		__get_str(dev_name),
-		__entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" :
-			__entry->severity == HW_EVENT_ERR_FATAL ?
-			"Fatal" : "Uncorrected",
-		__entry->severity == HW_EVENT_ERR_CORRECTED ?
+		__entry->severity == AER_CORRECTABLE ? "Corrected" :
+			__entry->severity == AER_FATAL ?
+			"Fatal" : "Uncorrected, non-fatal",
+		__entry->severity == AER_CORRECTABLE ?
 		__print_flags(__entry->status, "|", aer_correctable_errors) :
 		__print_flags(__entry->status, "|", aer_uncorrectable_errors))
 );

From c700f013adb0ec57518a7fe0163e3117659ce249 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 6 Dec 2013 01:17:08 -0500
Subject: [PATCH 03/10] EDAC: Add an edac_report parameter to EDAC

This new parameter is used to control how to report HW error reporting,
especially for newer Intel platform, like Ivybridge-EX, which contains
an enhanced error decoding functionality in the firmware, i.e. eMCA.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1386310630-12529-2-git-send-email-gong.chen@linux.intel.com
[ Boris: massage commit message. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 Documentation/kernel-parameters.txt |  8 ++++++++
 drivers/edac/edac_stub.c            | 19 +++++++++++++++++++
 include/linux/edac.h                | 28 ++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 50680a59a2ff..453092c822f1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -881,6 +881,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 			The xen output can only be used by Xen PV guests.
 
+	edac_report=	[HW,EDAC] Control how to report EDAC event
+			Format: {"on" | "off" | "force"}
+			on: enable EDAC to report H/W event. May be overridden
+			by other higher priority error reporting module.
+			off: disable H/W event reporting through EDAC.
+			force: enforce the use of EDAC to report H/W event.
+			default: on.
+
 	ekgdboc=	[X86,KGDB] Allow early kernel console debugging
 			ekgdboc=kbd
 
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index 351945fa2ecd..9d9e18aefaaa 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -29,6 +29,25 @@ EXPORT_SYMBOL_GPL(edac_err_assert);
 
 static atomic_t edac_subsys_valid = ATOMIC_INIT(0);
 
+int edac_report_status = EDAC_REPORTING_ENABLED;
+EXPORT_SYMBOL_GPL(edac_report_status);
+
+static int __init edac_report_setup(char *str)
+{
+	if (!str)
+		return -EINVAL;
+
+	if (!strncmp(str, "on", 2))
+		set_edac_report_status(EDAC_REPORTING_ENABLED);
+	else if (!strncmp(str, "off", 3))
+		set_edac_report_status(EDAC_REPORTING_DISABLED);
+	else if (!strncmp(str, "force", 5))
+		set_edac_report_status(EDAC_REPORTING_FORCE);
+
+	return 0;
+}
+__setup("edac_report=", edac_report_setup);
+
 /*
  * called to determine if there is an EDAC driver interested in
  * knowing an event (such as NMI) occurred
diff --git a/include/linux/edac.h b/include/linux/edac.h
index dbdffe8d4469..8e6c20af11a2 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -35,6 +35,34 @@ extern void edac_atomic_assert_error(void);
 extern struct bus_type *edac_get_sysfs_subsys(void);
 extern void edac_put_sysfs_subsys(void);
 
+enum {
+	EDAC_REPORTING_ENABLED,
+	EDAC_REPORTING_DISABLED,
+	EDAC_REPORTING_FORCE
+};
+
+extern int edac_report_status;
+#ifdef CONFIG_EDAC
+static inline int get_edac_report_status(void)
+{
+	return edac_report_status;
+}
+
+static inline void set_edac_report_status(int new)
+{
+	edac_report_status = new;
+}
+#else
+static inline int get_edac_report_status(void)
+{
+	return EDAC_REPORTING_DISABLED;
+}
+
+static inline void set_edac_report_status(int new)
+{
+}
+#endif
+
 static inline void opstate_init(void)
 {
 	switch (edac_op_state) {

From fd521039666529a4674b9822f1cc873672f57ee6 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 6 Dec 2013 01:17:09 -0500
Subject: [PATCH 04/10] EDAC, sb_edac: Modify H/W event reporting policy

Newer Intel platforms support more than one method to report H/W event.
On this kind of platform, H/W event report can adopt new method and
traditional EDAC method should be disabled. Moreover, if EDAC event
report method is set to *force*, it means event must be reported via
EDAC interface. IOW, it overrides the default event report policy.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1386310630-12529-3-git-send-email-gong.chen@linux.intel.com
[ Boris: massage commit and error messages ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/sb_edac.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 8472405c5586..e2e2cb365764 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -1829,6 +1829,9 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 	struct mem_ctl_info *mci;
 	struct sbridge_pvt *pvt;
 
+	if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+		return NOTIFY_DONE;
+
 	mci = get_mci_for_node_id(mce->socketid);
 	if (!mci)
 		return NOTIFY_BAD;
@@ -2142,9 +2145,10 @@ static int __init sbridge_init(void)
 	opstate_init();
 
 	pci_rc = pci_register_driver(&sbridge_driver);
-
 	if (pci_rc >= 0) {
 		mce_register_decode_chain(&sbridge_mce_dec);
+		if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+			sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
 		return 0;
 	}
 

From 42139eb356e3384759ca143ae04d82376346eb4c Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 6 Dec 2013 01:17:10 -0500
Subject: [PATCH 05/10] ACPI, eMCA: Combine eMCA/EDAC event reporting priority

eMCA has higher H/W event reporting priority. Once it is
loaded, EDAC event reporting should be disabled, unless EDAC
overrides eMCA priority via command line parameter "edac_report=force".

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1386310630-12529-4-git-send-email-gong.chen@linux.intel.com
[ Boris: massage printk message. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/acpi/acpi_extlog.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index a6869e110ce5..5d33c5415405 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -12,6 +12,7 @@
 #include <acpi/acpi_bus.h>
 #include <linux/cper.h>
 #include <linux/ratelimit.h>
+#include <linux/edac.h>
 #include <asm/cpu.h>
 #include <asm/mce.h>
 
@@ -43,6 +44,8 @@ struct extlog_l1_head {
 	u8  rev1[12];
 };
 
+static int old_edac_report_status;
+
 static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295";
 
 /* L1 table related physical address */
@@ -150,7 +153,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 
 	rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
 
-	return NOTIFY_DONE;
+	return NOTIFY_STOP;
 }
 
 static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret)
@@ -231,8 +234,12 @@ static int __init extlog_init(void)
 	u64 cap;
 	int rc;
 
-	rc = -ENODEV;
+	if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
+		pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
+		return -EPERM;
+	}
 
+	rc = -ENODEV;
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	if (!(cap & MCG_ELOG_P))
 		return rc;
@@ -287,6 +294,12 @@ static int __init extlog_init(void)
 	if (elog_buf == NULL)
 		goto err_release_elog;
 
+	/*
+	 * eMCA event report method has higher priority than EDAC method,
+	 * unless EDAC event report method is mandatory.
+	 */
+	old_edac_report_status = get_edac_report_status();
+	set_edac_report_status(EDAC_REPORTING_DISABLED);
 	mce_register_decode_chain(&extlog_mce_dec);
 	/* enable OS to be involved to take over management from BIOS */
 	((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
@@ -308,6 +321,7 @@ err:
 
 static void __exit extlog_exit(void)
 {
+	set_edac_report_status(old_edac_report_status);
 	mce_unregister_decode_chain(&extlog_mce_dec);
 	((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
 	if (extlog_l1_addr)

From 3482fb5e0c1c20ce0dbcfc5ca3b6558a8c455b10 Mon Sep 17 00:00:00 2001
From: "Luck, Tony" <tony.luck@intel.com>
Date: Wed, 6 Nov 2013 13:30:36 -0800
Subject: [PATCH 06/10] ACPI, APEI, EINJ: Changes to the ACPI/APEI/EINJ debugfs
 interface

When I added support for ACPI5 I made the assumption that
injected processor errors would just need to know the APICID,
memory errors just the address and mask, and PCIe errors just the
segment/bus/device/function. So I had the code check the type of injection
and multiplex the "param1" value appropriately.

This was not a good assumption :-(

There are injection scenarios where we need to specify more than one of
these items. E.g. injecting a cache error we need to specify an APICID
of the cpu that owns the cache, and also an address (so that we can trip
the error by accessing the address).

Add a "flags" file to give the user direct access to specify which items
are valid in the ACPI SET_ERROR_TYPE_WITH_ADDRESS structure. Also add
new files param3 and param4 to hold all these values.

For backwards compatability with old injection scripts we maintain the
old behaviour if flags remains set at zero (or is reset to 0).

Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 Documentation/acpi/apei/einj.txt | 19 +++++++++++++++-
 drivers/acpi/apei/einj.c         | 39 ++++++++++++++++++++++++++++----
 2 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt
index a58b63da1a36..f51861bcb07b 100644
--- a/Documentation/acpi/apei/einj.txt
+++ b/Documentation/acpi/apei/einj.txt
@@ -45,11 +45,22 @@ directory apei/einj. The following files are provided.
   injection. Before this, please specify all necessary error
   parameters.
 
+- flags
+  Present for kernel version 3.13 and above. Used to specify which
+  of param{1..4} are valid and should be used by BIOS during injection.
+  Value is a bitmask as specified in ACPI5.0 spec for the
+  SET_ERROR_TYPE_WITH_ADDRESS data structure:
+	Bit 0 - Processor APIC field valid (see param3 below)
+	Bit 1 - Memory address and mask valid (param1 and param2)
+	Bit 2 - PCIe (seg,bus,dev,fn) valid (param4 below)
+  If set to zero, legacy behaviour is used where the type of injection
+  specifies just one bit set, and param1 is multiplexed.
+
 - param1
   This file is used to set the first error parameter value. Effect of
   parameter depends on error_type specified. For example, if error
   type is memory related type, the param1 should be a valid physical
-  memory address.
+  memory address. [Unless "flag" is set - see above]
 
 - param2
   This file is used to set the second error parameter value. Effect of
@@ -58,6 +69,12 @@ directory apei/einj. The following files are provided.
   address mask. Linux requires page or narrower granularity, say,
   0xfffffffffffff000.
 
+- param3
+  Used when the 0x1 bit is set in "flag" to specify the APIC id
+
+- param4
+  Used when the 0x4 bit is set in "flag" to specify target PCIe device
+
 - notrigger
   The EINJ mechanism is a two step process. First inject the error, then
   perform some actions to trigger it. Setting "notrigger" to 1 skips the
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index fb57d03e698b..c76674e2a01f 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -416,7 +416,8 @@ out:
 	return rc;
 }
 
-static int __einj_error_inject(u32 type, u64 param1, u64 param2)
+static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+			       u64 param3, u64 param4)
 {
 	struct apei_exec_context ctx;
 	u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
@@ -446,6 +447,12 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
 				break;
 			}
 			v5param->flags = vendor_flags;
+		} else if (flags) {
+				v5param->flags = flags;
+				v5param->memory_address = param1;
+				v5param->memory_address_range = param2;
+				v5param->apicid = param3;
+				v5param->pcie_sbdf = param4;
 		} else {
 			switch (type) {
 			case ACPI_EINJ_PROCESSOR_CORRECTABLE:
@@ -514,11 +521,17 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
 }
 
 /* Inject the specified hardware error */
-static int einj_error_inject(u32 type, u64 param1, u64 param2)
+static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+			     u64 param3, u64 param4)
 {
 	int rc;
 	unsigned long pfn;
 
+	/* If user manually set "flags", make sure it is legal */
+	if (flags && (flags &
+		~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
+		return -EINVAL;
+
 	/*
 	 * We need extra sanity checks for memory errors.
 	 * Other types leap directly to injection.
@@ -532,7 +545,7 @@ static int einj_error_inject(u32 type, u64 param1, u64 param2)
 	if (type & ACPI5_VENDOR_BIT) {
 		if (vendor_flags != SETWA_FLAGS_MEM)
 			goto inject;
-	} else if (!(type & MEM_ERROR_MASK))
+	} else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
 		goto inject;
 
 	/*
@@ -546,15 +559,18 @@ static int einj_error_inject(u32 type, u64 param1, u64 param2)
 
 inject:
 	mutex_lock(&einj_mutex);
-	rc = __einj_error_inject(type, param1, param2);
+	rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
 	mutex_unlock(&einj_mutex);
 
 	return rc;
 }
 
 static u32 error_type;
+static u32 error_flags;
 static u64 error_param1;
 static u64 error_param2;
+static u64 error_param3;
+static u64 error_param4;
 static struct dentry *einj_debug_dir;
 
 static int available_error_type_show(struct seq_file *m, void *v)
@@ -648,7 +664,8 @@ static int error_inject_set(void *data, u64 val)
 	if (!error_type)
 		return -EINVAL;
 
-	return einj_error_inject(error_type, error_param1, error_param2);
+	return einj_error_inject(error_type, error_flags, error_param1, error_param2,
+		error_param3, error_param4);
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(error_inject_fops, NULL,
@@ -729,6 +746,10 @@ static int __init einj_init(void)
 	rc = -ENOMEM;
 	einj_param = einj_get_parameter_address();
 	if ((param_extension || acpi5) && einj_param) {
+		fentry = debugfs_create_x32("flags", S_IRUSR | S_IWUSR,
+					    einj_debug_dir, &error_flags);
+		if (!fentry)
+			goto err_unmap;
 		fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
 					    einj_debug_dir, &error_param1);
 		if (!fentry)
@@ -737,6 +758,14 @@ static int __init einj_init(void)
 					    einj_debug_dir, &error_param2);
 		if (!fentry)
 			goto err_unmap;
+		fentry = debugfs_create_x64("param3", S_IRUSR | S_IWUSR,
+					    einj_debug_dir, &error_param3);
+		if (!fentry)
+			goto err_unmap;
+		fentry = debugfs_create_x64("param4", S_IRUSR | S_IWUSR,
+					    einj_debug_dir, &error_param4);
+		if (!fentry)
+			goto err_unmap;
 
 		fentry = debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR,
 					    einj_debug_dir, &notrigger);

From addccbb264e5e0e5762f4893f6df24afad327c8c Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Mon, 25 Nov 2013 02:15:00 -0500
Subject: [PATCH 07/10] ACPI, APEI, GHES: Do not report only correctable errors
 with SCI

Currently SCI is employed to handle corrected errors - memory corrected
errors, more specifically but in fact SCI still can be used to handle
any errors, e.g. uncorrected or even fatal ones if enabled by the BIOS.
Enable logging for those kinds of errors too.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1385363701-12387-1-git-send-email-gong.chen@linux.intel.com
[ Boris: massage commit message, rename function arg. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/cpu/mcheck/mce-apei.c | 14 ++++++++++----
 drivers/acpi/apei/ghes.c              |  3 +--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index de8b60a53f69..a1aef9533154 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -33,22 +33,28 @@
 #include <linux/acpi.h>
 #include <linux/cper.h>
 #include <acpi/apei.h>
+#include <acpi/ghes.h>
 #include <asm/mce.h>
 
 #include "mce-internal.h"
 
-void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
+void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
 {
 	struct mce m;
 
-	/* Only corrected MC is reported */
-	if (!corrected || !(mem_err->validation_bits & CPER_MEM_VALID_PA))
+	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
 		return;
 
 	mce_setup(&m);
 	m.bank = 1;
-	/* Fake a memory read corrected error with unknown channel */
+	/* Fake a memory read error with unknown channel */
 	m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
+
+	if (severity >= GHES_SEV_RECOVERABLE)
+		m.status |= MCI_STATUS_UC;
+	if (severity >= GHES_SEV_PANIC)
+		m.status |= MCI_STATUS_PCC;
+
 	m.addr = mem_err->physical_addr;
 	mce_log(&m);
 	mce_notify_irq();
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index a30bc313787b..ce3683d93a13 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -453,8 +453,7 @@ static void ghes_do_proc(struct ghes *ghes,
 			ghes_edac_report_mem_error(ghes, sev, mem_err);
 
 #ifdef CONFIG_X86_MCE
-			apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
-						  mem_err);
+			apei_mce_report_mem_error(sev, mem_err);
 #endif
 			ghes_handle_memory_failure(gdata, sev);
 		}

From d3ab3edc029bf79b09f91d6a22881c24ecaeb000 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Wed, 18 Dec 2013 01:30:49 -0500
Subject: [PATCH 08/10] ACPI, APEI: Cleanup alignment-aware accesses

We do use memcpy to avoid access alignment issues between firmware and
OS. Now we can use a better and standard way to avoid this issue. While
at it, simplify some variable names to avoid the 80 cols limit and
use structure assignment instead of unnecessary memcpy. No functional
changes.

Because ERST record id cache is implemented in memory to increase the
access speed via caching ERST content we can refrain from using memcpy
there too and use regular assignment instead.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Cc: Cc: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1387348249-20014-1-git-send-email-gong.chen@linux.intel.com
[ Boris: massage commit message a bit. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/acpi/apei/apei-base.c |  4 ++--
 drivers/acpi/apei/einj.c      | 19 +++++++++----------
 drivers/acpi/apei/erst.c      |  2 +-
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index 6d2c49b86b7f..e55584a072c6 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -41,6 +41,7 @@
 #include <linux/rculist.h>
 #include <linux/interrupt.h>
 #include <linux/debugfs.h>
+#include <asm/unaligned.h>
 
 #include "apei-internal.h"
 
@@ -567,8 +568,7 @@ static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr,
 	bit_offset = reg->bit_offset;
 	access_size_code = reg->access_width;
 	space_id = reg->space_id;
-	/* Handle possible alignment issues */
-	memcpy(paddr, &reg->address, sizeof(*paddr));
+	*paddr = get_unaligned(&reg->address);
 	if (!*paddr) {
 		pr_warning(FW_BUG APEI_PFX
 			   "Invalid physical address in GAR [0x%llx/%u/%u/%u/%u]\n",
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index fb57d03e698b..361177a9df3a 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -34,6 +34,7 @@
 #include <linux/delay.h>
 #include <linux/mm.h>
 #include <acpi/acpi.h>
+#include <asm/unaligned.h>
 
 #include "apei-internal.h"
 
@@ -216,7 +217,7 @@ static void check_vendor_extension(u64 paddr,
 static void *einj_get_parameter_address(void)
 {
 	int i;
-	u64 paddrv4 = 0, paddrv5 = 0;
+	u64 pa_v4 = 0, pa_v5 = 0;
 	struct acpi_whea_header *entry;
 
 	entry = EINJ_TAB_ENTRY(einj_tab);
@@ -225,30 +226,28 @@ static void *einj_get_parameter_address(void)
 		    entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
 		    entry->register_region.space_id ==
 		    ACPI_ADR_SPACE_SYSTEM_MEMORY)
-			memcpy(&paddrv4, &entry->register_region.address,
-			       sizeof(paddrv4));
+			pa_v4 = get_unaligned(&entry->register_region.address);
 		if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS &&
 		    entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
 		    entry->register_region.space_id ==
 		    ACPI_ADR_SPACE_SYSTEM_MEMORY)
-			memcpy(&paddrv5, &entry->register_region.address,
-			       sizeof(paddrv5));
+			pa_v5 = get_unaligned(&entry->register_region.address);
 		entry++;
 	}
-	if (paddrv5) {
+	if (pa_v5) {
 		struct set_error_type_with_address *v5param;
 
-		v5param = acpi_os_map_memory(paddrv5, sizeof(*v5param));
+		v5param = acpi_os_map_memory(pa_v5, sizeof(*v5param));
 		if (v5param) {
 			acpi5 = 1;
-			check_vendor_extension(paddrv5, v5param);
+			check_vendor_extension(pa_v5, v5param);
 			return v5param;
 		}
 	}
-	if (param_extension && paddrv4) {
+	if (param_extension && pa_v4) {
 		struct einj_parameter *v4param;
 
-		v4param = acpi_os_map_memory(paddrv4, sizeof(*v4param));
+		v4param = acpi_os_map_memory(pa_v4, sizeof(*v4param));
 		if (!v4param)
 			return NULL;
 		if (v4param->reserved1 || v4param->reserved2) {
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 26311f23c824..bf30a12f1988 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -611,7 +611,7 @@ static void __erst_record_id_cache_compact(void)
 		if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
 			continue;
 		if (wpos != i)
-			memcpy(&entries[wpos], &entries[i], sizeof(entries[i]));
+			entries[wpos] = entries[i];
 		wpos++;
 	}
 	erst_record_id_cache.len = wpos;

From ca104edc17841da87850b20ab77e57fe0a99ead6 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Mon, 25 Nov 2013 02:15:01 -0500
Subject: [PATCH 09/10] ACPI, APEI, GHES: Cleanup ghes memory error handling

Cleanup the logic in ghes_handle_memory_failure(). While at it, add
proper PFN validity check for UC error and cleanup the code logic to
make it simpler and cleaner.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1385363701-12387-2-git-send-email-gong.chen@linux.intel.com
[ Boris: massage commit message. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/acpi/apei/ghes.c | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index ce3683d93a13..46766ef7ef5d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -413,27 +413,31 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev)
 {
 #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
 	unsigned long pfn;
+	int flags = -1;
 	int sec_sev = ghes_severity(gdata->error_severity);
 	struct cper_sec_mem_err *mem_err;
 	mem_err = (struct cper_sec_mem_err *)(gdata + 1);
 
+	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
+		return;
+
+	pfn = mem_err->physical_addr >> PAGE_SHIFT;
+	if (!pfn_valid(pfn)) {
+		pr_warn_ratelimited(FW_WARN GHES_PFX
+		"Invalid address in generic error data: %#llx\n",
+		mem_err->physical_addr);
+		return;
+	}
+
+	/* iff following two events can be handled properly by now */
 	if (sec_sev == GHES_SEV_CORRECTED &&
-	    (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) &&
-	    (mem_err->validation_bits & CPER_MEM_VALID_PA)) {
-		pfn = mem_err->physical_addr >> PAGE_SHIFT;
-		if (pfn_valid(pfn))
-			memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE);
-		else if (printk_ratelimit())
-			pr_warn(FW_WARN GHES_PFX
-			"Invalid address in generic error data: %#llx\n",
-			mem_err->physical_addr);
-	}
-	if (sev == GHES_SEV_RECOVERABLE &&
-	    sec_sev == GHES_SEV_RECOVERABLE &&
-	    mem_err->validation_bits & CPER_MEM_VALID_PA) {
-		pfn = mem_err->physical_addr >> PAGE_SHIFT;
-		memory_failure_queue(pfn, 0, 0);
-	}
+	    (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
+		flags = MF_SOFT_OFFLINE;
+	if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
+		flags = 0;
+
+	if (flags != -1)
+		memory_failure_queue(pfn, 0, flags);
 #endif
 }
 

From 4f75d8412792777a314ac5c1393a9ed43d695fd1 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 23 Dec 2013 18:05:02 +0100
Subject: [PATCH 10/10] x86, mce: Fix mce_start_timer semantics

So mce_start_timer() has a 'cpu' argument which is supposed to mean to
start a timer on that cpu. However, the code currently starts a timer on
the *current* cpu the function runs on and causes the sanity-check in
mce_timer_fn to fire:

	WARNING: CPU: 0 PID: 0 at arch/x86/kernel/cpu/mcheck/mce.c:1286 mce_timer_fn

because it is running on the wrong cpu.

This was triggered by Prarit Bhargava <prarit@redhat.com> by offlining
all the cpus in succession.

Then, we were fiddling with the CMCI storm settings when starting the
timer whereas there's no need for that - if there's storm happening
on this newly restarted cpu, we're going to be in normal CMCI mode
initially and then when the CMCI interrupt starts firing, we're going to
go to the polling mode with the timer real soon.

Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Prarit Bhargava <prarit@redhat.com>
Cc: Tony Luck <tony.luck@intel.com>
Reviewed-by: Chen, Gong <gong.chen@linux.intel.com>
Link: http://lkml.kernel.org/r/1387722156-5511-1-git-send-email-prarit@redhat.com
---
 arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a389c1d859ec..4d5419b249da 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1638,15 +1638,15 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 
 static void mce_start_timer(unsigned int cpu, struct timer_list *t)
 {
-	unsigned long iv = mce_adjust_timer(check_interval * HZ);
-
-	__this_cpu_write(mce_next_interval, iv);
+	unsigned long iv = check_interval * HZ;
 
 	if (mca_cfg.ignore_ce || !iv)
 		return;
 
+	per_cpu(mce_next_interval, cpu) = iv;
+
 	t->expires = round_jiffies(jiffies + iv);
-	add_timer_on(t, smp_processor_id());
+	add_timer_on(t, cpu);
 }
 
 static void __mcheck_cpu_init_timer(void)