From 0114a8e87772c4172bf7fefa4c7a9a6bbc52d2ab Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Wed, 13 Sep 2017 17:12:44 +0800
Subject: [PATCH 01/60] x86/apic: Construct a selector for the interrupt
 delivery mode

There are quite some switches which are used to determine the final
interrupt delivery mode, as shown below:

1) Kconfig: CONFIG_X86_64; CONFIG_X86_LOCAL_APIC; CONFIG_x86_IO_APIC
2) Command line options: disable_apic; skip_ioapic_setup
3) CPU Capability: boot_cpu_has(X86_FEATURE_APIC)
4) MP table: smp_found_config
5) ACPI: acpi_lapic; acpi_ioapic; nr_ioapic

These switches are disordered and scattered and there are also some
dependencies between them. These make the code difficult to maintain and
read.

Construct a selector to unify them into a single function, then, Use this
selector to get an interrupt delivery mode directly.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: yinghai@kernel.org
Cc: bhe@redhat.com
Link: https://lkml.kernel.org/r/1505293975-26005-2-git-send-email-douly.fnst@cn.fujitsu.com
---
 arch/x86/kernel/apic/apic.c | 52 +++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d705c769f77d..39cb8c1ad98e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1218,6 +1218,58 @@ void __init sync_Arb_IDs(void)
 			APIC_INT_LEVELTRIG | APIC_DM_INIT);
 }
 
+enum apic_intr_mode {
+	APIC_PIC,
+	APIC_VIRTUAL_WIRE,
+	APIC_SYMMETRIC_IO,
+};
+
+static int __init apic_intr_mode_select(void)
+{
+	/* Check kernel option */
+	if (disable_apic) {
+		pr_info("APIC disabled via kernel command line\n");
+		return APIC_PIC;
+	}
+
+	/* Check BIOS */
+#ifdef CONFIG_X86_64
+	/* On 64-bit, the APIC must be integrated, Check local APIC only */
+	if (!boot_cpu_has(X86_FEATURE_APIC)) {
+		disable_apic = 1;
+		pr_info("APIC disabled by BIOS\n");
+		return APIC_PIC;
+	}
+#else
+	/* On 32-bit, the APIC may be integrated APIC or 82489DX */
+
+	/* Neither 82489DX nor integrated APIC ? */
+	if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
+		disable_apic = 1;
+		return APIC_PIC;
+	}
+
+	/* If the BIOS pretends there is an integrated APIC ? */
+	if (!boot_cpu_has(X86_FEATURE_APIC) &&
+		APIC_INTEGRATED(boot_cpu_apic_version)) {
+		disable_apic = 1;
+		pr_err(FW_BUG "Local APIC %d not detected, force emulation\n",
+				       boot_cpu_physical_apicid);
+		return APIC_PIC;
+	}
+#endif
+
+	/* Check MP table or ACPI MADT configuration */
+	if (!smp_found_config) {
+		disable_ioapic_support();
+		if (!acpi_lapic)
+			pr_info("APIC: ACPI MADT or MP tables are not detected\n");
+		return APIC_VIRTUAL_WIRE;
+	}
+
+	return APIC_SYMMETRIC_IO;
+}
+
 /*
  * An initial setup of the virtual wire mode.
  */

From 4b1669e8d1e4e6cb65b3b114fced6ca9bc39ddea Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Wed, 13 Sep 2017 17:12:45 +0800
Subject: [PATCH 02/60] x86/apic: Prepare for unifying the interrupt delivery
 modes setup

There are three places which initialize the interrupt delivery modes:

1) init_bsp_APIC() which is called early might setup the through-local-APIC
   virtual wire mode on non SMP systems.

2) In an SMP-capable system, native_smp_prepare_cpus() tries to switch to
   symmetric I/O model.

3) In UP system with UP_LATE_INIT=y, the local APIC and I/O APIC are set up
   in smp_init().

There is no technical reason to make these initializations at random places
and run the kernel with the potentially wrong mode through the early boot
stage, but it has a problematic side effect: The late switch to symmetric
I/O mode causes dump-capture kernel to hang when the kernel command line
option 'notsc' is active.

Provide a new function to unify that three positions. Preparatory patch to
initialize an interrupt mode directly.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: yinghai@kernel.org
Cc: bhe@redhat.com
Link: https://lkml.kernel.org/r/1505293975-26005-3-git-send-email-douly.fnst@cn.fujitsu.com
---
 arch/x86/include/asm/apic.h |  2 ++
 arch/x86/kernel/apic/apic.c | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 5f01671c68f2..1a970f5a6e75 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -128,6 +128,7 @@ extern void disable_local_APIC(void);
 extern void lapic_shutdown(void);
 extern void sync_Arb_IDs(void);
 extern void init_bsp_APIC(void);
+extern void apic_intr_mode_init(void);
 extern void setup_local_APIC(void);
 extern void init_apic_mappings(void);
 void register_lapic_address(unsigned long address);
@@ -170,6 +171,7 @@ static inline void disable_local_APIC(void) { }
 # define setup_boot_APIC_clock x86_init_noop
 # define setup_secondary_APIC_clock x86_init_noop
 static inline void lapic_update_tsc_freq(void) { }
+static inline void apic_intr_mode_init(void) { }
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 39cb8c1ad98e..08585bcbb38f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1319,6 +1319,22 @@ void __init init_bsp_APIC(void)
 	apic_write(APIC_LVT1, value);
 }
 
+/* Init the interrupt delivery mode for the BSP */
+void __init apic_intr_mode_init(void)
+{
+	switch (apic_intr_mode_select()) {
+	case APIC_PIC:
+		pr_info("APIC: Keep in PIC mode(8259)\n");
+		return;
+	case APIC_VIRTUAL_WIRE:
+		pr_info("APIC: Switch to virtual wire mode setup\n");
+		return;
+	case APIC_SYMMETRIC_IO:
+		pr_info("APIC: Switch to symmectic I/O mode setup\n");
+		return;
+	}
+}
+
 static void lapic_setup_esr(void)
 {
 	unsigned int oldvalue, value, maxlvt;

From a2510d156eae9cf85c928d428471e44edd82c5ca Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Wed, 13 Sep 2017 17:12:46 +0800
Subject: [PATCH 03/60] x86/apic: Split local APIC timer setup from the APIC
 setup

apic_bsp_setup() sets up the local APIC, I/O APIC and APIC timer.

The local APIC and I/O APIC setup belongs to interrupt delivery mode
setup. Setting up the local APIC timer for booting CPU is another job
and has nothing to do with interrupt delivery mode setup.

Split local APIC timer setup from the APIC setup, keep it in the original
position for SMP and UP kernel for now.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: yinghai@kernel.org
Cc: bhe@redhat.com
Link: https://lkml.kernel.org/r/1505293975-26005-4-git-send-email-douly.fnst@cn.fujitsu.com
---
 arch/x86/kernel/apic/apic.c | 4 ++--
 arch/x86/kernel/smpboot.c   | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 08585bcbb38f..ad373243c7b3 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2397,8 +2397,6 @@ int __init apic_bsp_setup(bool upmode)
 	end_local_APIC_setup();
 	irq_remap_enable_fault_handling();
 	setup_IO_APIC();
-	/* Setup local timer */
-	x86_init.timers.setup_percpu_clockev();
 	return id;
 }
 
@@ -2438,6 +2436,8 @@ int __init APIC_init_uniprocessor(void)
 
 	default_setup_apic_routing();
 	apic_bsp_setup(true);
+	/* Setup local timer */
+	x86_init.timers.setup_percpu_clockev();
 	return 0;
 }
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ad59edd84de7..dad0a099e433 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1340,6 +1340,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	case SMP_FORCE_UP:
 		disable_smp();
 		apic_bsp_setup(false);
+		/* Setup local timer */
+		x86_init.timers.setup_percpu_clockev();
 		return;
 	case SMP_OK:
 		break;
@@ -1354,6 +1356,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	default_setup_apic_routing();
 	cpu0_logical_apicid = apic_bsp_setup(false);
 
+	/* Setup local timer */
+	x86_init.timers.setup_percpu_clockev();
+
 	pr_info("CPU0: ");
 	print_cpu_info(&cpu_data(0));
 

From 4b1244b45c16cef63fa3282e5bb1cc4fa1aef06a Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Wed, 13 Sep 2017 17:12:47 +0800
Subject: [PATCH 04/60] x86/apic: Move logical APIC ID away from
 apic_bsp_setup()

apic_bsp_setup() sets and returns logical APIC ID for initializing
cpu0_logical_apicid in a SMP-capable system.

The id has nothing to do with the initialization of local APIC and I/O
APIC. And apic_bsp_setup() should be called for interrupt mode setup only.

Move the id setup into a separate helper function for cleanup and mark
apic_bsp_setup() void.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: yinghai@kernel.org
Cc: bhe@redhat.com
Link: https://lkml.kernel.org/r/1505293975-26005-5-git-send-email-douly.fnst@cn.fujitsu.com
---
 arch/x86/include/asm/apic.h |  2 +-
 arch/x86/kernel/apic/apic.c | 10 +---------
 arch/x86/kernel/smpboot.c   | 12 +++++++++++-
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 1a970f5a6e75..4e550c742130 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -146,7 +146,7 @@ static inline int apic_force_enable(unsigned long addr)
 extern int apic_force_enable(unsigned long addr);
 #endif
 
-extern int apic_bsp_setup(bool upmode);
+extern void apic_bsp_setup(bool upmode);
 extern void apic_ap_setup(void);
 
 /*
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ad373243c7b3..eafed8fbf340 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2379,25 +2379,17 @@ static void __init apic_bsp_up_setup(void)
  * Returns:
  * apic_id of BSP APIC
  */
-int __init apic_bsp_setup(bool upmode)
+void __init apic_bsp_setup(bool upmode)
 {
-	int id;
-
 	connect_bsp_APIC();
 	if (upmode)
 		apic_bsp_up_setup();
 	setup_local_APIC();
 
-	if (x2apic_mode)
-		id = apic_read(APIC_LDR);
-	else
-		id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
-
 	enable_IO_APIC();
 	end_local_APIC_setup();
 	irq_remap_enable_fault_handling();
 	setup_IO_APIC();
-	return id;
 }
 
 /*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index dad0a099e433..d367ddbec5d0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1294,6 +1294,14 @@ static void __init smp_cpu_index_default(void)
 	}
 }
 
+static void __init smp_get_logical_apicid(void)
+{
+	if (x2apic_mode)
+		cpu0_logical_apicid = apic_read(APIC_LDR);
+	else
+		cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
+}
+
 /*
  * Prepare for SMP bootup.  The MP table or ACPI has been read
  * earlier.  Just do some sanity checking here and enable APIC mode.
@@ -1354,11 +1362,13 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	}
 
 	default_setup_apic_routing();
-	cpu0_logical_apicid = apic_bsp_setup(false);
+	apic_bsp_setup(false);
 
 	/* Setup local timer */
 	x86_init.timers.setup_percpu_clockev();
 
+	smp_get_logical_apicid();
+
 	pr_info("CPU0: ");
 	print_cpu_info(&cpu_data(0));
 

From 3e730dad3b6da42d21c05007445ca1bfd219d7ce Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Wed, 13 Sep 2017 17:12:48 +0800
Subject: [PATCH 05/60] x86/apic: Unify interrupt mode setup for SMP-capable
 system

On a SMP-capable system, the kernel enables and sets up the APIC interrupt
delivery mode in native_smp_prepare_cpus(). The decision how to setup the
APIC is intermingled with the decision of setting up SMP or not.

Split the initialization of the APIC interrupt mode independent from other
decisions and have a separate apic_intr_mode_init() function for it.

The invocation time stays the same for now.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: yinghai@kernel.org
Cc: bhe@redhat.com
Link: https://lkml.kernel.org/r/1505293975-26005-6-git-send-email-douly.fnst@cn.fujitsu.com
---
 arch/x86/kernel/apic/apic.c | 38 ++++++++++++++++++++++++++++++++++---
 arch/x86/kernel/smpboot.c   | 14 ++------------
 2 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index eafed8fbf340..7ae97c26d23c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1221,7 +1221,9 @@ void __init sync_Arb_IDs(void)
 enum apic_intr_mode {
 	APIC_PIC,
 	APIC_VIRTUAL_WIRE,
+	APIC_VIRTUAL_WIRE_NO_CONFIG,
 	APIC_SYMMETRIC_IO,
+	APIC_SYMMETRIC_IO_NO_ROUTING,
 };
 
 static int __init apic_intr_mode_select(void)
@@ -1262,11 +1264,27 @@ static int __init apic_intr_mode_select(void)
 	/* Check MP table or ACPI MADT configuration */
 	if (!smp_found_config) {
 		disable_ioapic_support();
-		if (!acpi_lapic)
+		if (!acpi_lapic) {
 			pr_info("APIC: ACPI MADT or MP tables are not detected\n");
+			return APIC_VIRTUAL_WIRE_NO_CONFIG;
+		}
 		return APIC_VIRTUAL_WIRE;
 	}
 
+#ifdef CONFIG_SMP
+	/* If SMP should be disabled, then really disable it! */
+	if (!setup_max_cpus) {
+		pr_info("APIC: SMP mode deactivated\n");
+		return APIC_SYMMETRIC_IO_NO_ROUTING;
+	}
+
+	if (read_apic_id() != boot_cpu_physical_apicid) {
+		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
+		     read_apic_id(), boot_cpu_physical_apicid);
+		/* Or can we switch back to PIC here? */
+	}
+#endif
+
 	return APIC_SYMMETRIC_IO;
 }
 
@@ -1322,17 +1340,31 @@ void __init init_bsp_APIC(void)
 /* Init the interrupt delivery mode for the BSP */
 void __init apic_intr_mode_init(void)
 {
+	bool upmode = false;
+
 	switch (apic_intr_mode_select()) {
 	case APIC_PIC:
 		pr_info("APIC: Keep in PIC mode(8259)\n");
 		return;
 	case APIC_VIRTUAL_WIRE:
 		pr_info("APIC: Switch to virtual wire mode setup\n");
-		return;
+		default_setup_apic_routing();
+		break;
+	case APIC_VIRTUAL_WIRE_NO_CONFIG:
+		pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
+		upmode = true;
+		default_setup_apic_routing();
+		break;
 	case APIC_SYMMETRIC_IO:
 		pr_info("APIC: Switch to symmectic I/O mode setup\n");
-		return;
+		default_setup_apic_routing();
+		break;
+	case APIC_SYMMETRIC_IO_NO_ROUTING:
+		pr_info("APIC: Switch to symmectic I/O mode setup in no SMP routine\n");
+		break;
 	}
+
+	apic_bsp_setup(upmode);
 }
 
 static void lapic_setup_esr(void)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index d367ddbec5d0..d0a1d28c23e8 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1336,18 +1336,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 
 	set_cpu_sibling_map(0);
 
+	apic_intr_mode_init();
+
 	switch (smp_sanity_check(max_cpus)) {
 	case SMP_NO_CONFIG:
 		disable_smp();
-		if (APIC_init_uniprocessor())
-			pr_notice("Local APIC not detected. Using dummy APIC emulation.\n");
 		return;
 	case SMP_NO_APIC:
 		disable_smp();
 		return;
 	case SMP_FORCE_UP:
 		disable_smp();
-		apic_bsp_setup(false);
 		/* Setup local timer */
 		x86_init.timers.setup_percpu_clockev();
 		return;
@@ -1355,15 +1354,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 		break;
 	}
 
-	if (read_apic_id() != boot_cpu_physical_apicid) {
-		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
-		     read_apic_id(), boot_cpu_physical_apicid);
-		/* Or can we switch back to PIC here? */
-	}
-
-	default_setup_apic_routing();
-	apic_bsp_setup(false);
-
 	/* Setup local timer */
 	x86_init.timers.setup_percpu_clockev();
 

From 4f45ed9f848f0721967e2f79e5409b6538894a43 Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Wed, 13 Sep 2017 17:12:49 +0800
Subject: [PATCH 06/60] x86/apic: Mark the apic_intr_mode extern for sanity
 check cleanup

Calling native_smp_prepare_cpus() to prepare for SMP bootup, does some
sanity checking, enables APIC mode and disables SMP feature.

Now, APIC mode setup has been unified to apic_intr_mode_init(), some sanity
checks are redundant and need to be cleanup.

Mark the apic_intr_mode extern to refine the switch and remove the
redundant sanity check.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: yinghai@kernel.org
Cc: bhe@redhat.com
Link: https://lkml.kernel.org/r/1505293975-26005-7-git-send-email-douly.fnst@cn.fujitsu.com
---
 arch/x86/include/asm/apic.h |  9 ++++++
 arch/x86/kernel/apic/apic.c | 16 ++++-------
 arch/x86/kernel/smpboot.c   | 57 ++++++-------------------------------
 3 files changed, 24 insertions(+), 58 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 4e550c742130..01f3fc8f8691 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -53,6 +53,15 @@ extern int local_apic_timer_c2_ok;
 extern int disable_apic;
 extern unsigned int lapic_timer_frequency;
 
+extern enum apic_intr_mode_id apic_intr_mode;
+enum apic_intr_mode_id {
+	APIC_PIC,
+	APIC_VIRTUAL_WIRE,
+	APIC_VIRTUAL_WIRE_NO_CONFIG,
+	APIC_SYMMETRIC_IO,
+	APIC_SYMMETRIC_IO_NO_ROUTING
+};
+
 #ifdef CONFIG_SMP
 extern void __inquire_remote_apic(int apicid);
 #else /* CONFIG_SMP */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 7ae97c26d23c..21d584d82f1f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1218,13 +1218,7 @@ void __init sync_Arb_IDs(void)
 			APIC_INT_LEVELTRIG | APIC_DM_INIT);
 }
 
-enum apic_intr_mode {
-	APIC_PIC,
-	APIC_VIRTUAL_WIRE,
-	APIC_VIRTUAL_WIRE_NO_CONFIG,
-	APIC_SYMMETRIC_IO,
-	APIC_SYMMETRIC_IO_NO_ROUTING,
-};
+enum apic_intr_mode_id apic_intr_mode;
 
 static int __init apic_intr_mode_select(void)
 {
@@ -1342,7 +1336,9 @@ void __init apic_intr_mode_init(void)
 {
 	bool upmode = false;
 
-	switch (apic_intr_mode_select()) {
+	apic_intr_mode = apic_intr_mode_select();
+
+	switch (apic_intr_mode) {
 	case APIC_PIC:
 		pr_info("APIC: Keep in PIC mode(8259)\n");
 		return;
@@ -1974,8 +1970,8 @@ void __init init_apic_mappings(void)
 		 * yeah -- we lie about apic_version
 		 * in case if apic was disabled via boot option
 		 * but it's not a problem for SMP compiled kernel
-		 * since smp_sanity_check is prepared for such a case
-		 * and disable smp mode
+		 * since apic_intr_mode_select is prepared for such
+		 * a case and disable smp mode
 		 */
 		boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
 	}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index d0a1d28c23e8..161935c49166 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1190,17 +1190,10 @@ static __init void disable_smp(void)
 	cpumask_set_cpu(0, topology_core_cpumask(0));
 }
 
-enum {
-	SMP_OK,
-	SMP_NO_CONFIG,
-	SMP_NO_APIC,
-	SMP_FORCE_UP,
-};
-
 /*
  * Various sanity checks.
  */
-static int __init smp_sanity_check(unsigned max_cpus)
+static void __init smp_sanity_check(void)
 {
 	preempt_disable();
 
@@ -1237,16 +1230,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
 		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
 	}
 
-	/*
-	 * If we couldn't find an SMP configuration at boot time,
-	 * get out of here now!
-	 */
-	if (!smp_found_config && !acpi_lapic) {
-		preempt_enable();
-		pr_notice("SMP motherboard not detected\n");
-		return SMP_NO_CONFIG;
-	}
-
 	/*
 	 * Should not be necessary because the MP table should list the boot
 	 * CPU too, but we do it for the sake of robustness anyway.
@@ -1257,29 +1240,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
 		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
 	}
 	preempt_enable();
-
-	/*
-	 * If we couldn't find a local APIC, then get out of here now!
-	 */
-	if (APIC_INTEGRATED(boot_cpu_apic_version) &&
-	    !boot_cpu_has(X86_FEATURE_APIC)) {
-		if (!disable_apic) {
-			pr_err("BIOS bug, local APIC #%d not detected!...\n",
-				boot_cpu_physical_apicid);
-			pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n");
-		}
-		return SMP_NO_APIC;
-	}
-
-	/*
-	 * If SMP should be disabled, then really disable it!
-	 */
-	if (!max_cpus) {
-		pr_info("SMP mode deactivated\n");
-		return SMP_FORCE_UP;
-	}
-
-	return SMP_OK;
 }
 
 static void __init smp_cpu_index_default(void)
@@ -1338,19 +1298,20 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 
 	apic_intr_mode_init();
 
-	switch (smp_sanity_check(max_cpus)) {
-	case SMP_NO_CONFIG:
+	smp_sanity_check();
+
+	switch (apic_intr_mode) {
+	case APIC_PIC:
+	case APIC_VIRTUAL_WIRE_NO_CONFIG:
 		disable_smp();
 		return;
-	case SMP_NO_APIC:
-		disable_smp();
-		return;
-	case SMP_FORCE_UP:
+	case APIC_SYMMETRIC_IO_NO_ROUTING:
 		disable_smp();
 		/* Setup local timer */
 		x86_init.timers.setup_percpu_clockev();
 		return;
-	case SMP_OK:
+	case APIC_VIRTUAL_WIRE:
+	case APIC_SYMMETRIC_IO:
 		break;
 	}
 

From 0c759131ae568f2e620485662104ab8c1e770c81 Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Wed, 13 Sep 2017 17:12:50 +0800
Subject: [PATCH 07/60] x86/apic: Unify interrupt mode setup for UP system

In UniProcessor kernel with UP_LATE_INIT=y, the interrupt delivery mode is
initialized in up_late_init().

Use the new unified apic_intr_mode_init() function and remove
APIC_init_uniprocessor().

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: yinghai@kernel.org
Cc: bhe@redhat.com
Link: https://lkml.kernel.org/r/1505293975-26005-8-git-send-email-douly.fnst@cn.fujitsu.com
---
 arch/x86/include/asm/apic.h |  1 -
 arch/x86/kernel/apic/apic.c | 51 ++++++-------------------------------
 2 files changed, 8 insertions(+), 44 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 01f3fc8f8691..983a0dc564b3 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -144,7 +144,6 @@ void register_lapic_address(unsigned long address);
 extern void setup_boot_APIC_clock(void);
 extern void setup_secondary_APIC_clock(void);
 extern void lapic_update_tsc_freq(void);
-extern int APIC_init_uniprocessor(void);
 
 #ifdef CONFIG_X86_64
 static inline int apic_force_enable(unsigned long addr)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 21d584d82f1f..efc5fbd1c40c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1334,7 +1334,7 @@ void __init init_bsp_APIC(void)
 /* Init the interrupt delivery mode for the BSP */
 void __init apic_intr_mode_init(void)
 {
-	bool upmode = false;
+	bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
 
 	apic_intr_mode = apic_intr_mode_select();
 
@@ -2420,51 +2420,16 @@ void __init apic_bsp_setup(bool upmode)
 	setup_IO_APIC();
 }
 
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int __init APIC_init_uniprocessor(void)
-{
-	if (disable_apic) {
-		pr_info("Apic disabled\n");
-		return -1;
-	}
-#ifdef CONFIG_X86_64
-	if (!boot_cpu_has(X86_FEATURE_APIC)) {
-		disable_apic = 1;
-		pr_info("Apic disabled by BIOS\n");
-		return -1;
-	}
-#else
-	if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC))
-		return -1;
-
-	/*
-	 * Complain if the BIOS pretends there is one.
-	 */
-	if (!boot_cpu_has(X86_FEATURE_APIC) &&
-	    APIC_INTEGRATED(boot_cpu_apic_version)) {
-		pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
-			boot_cpu_physical_apicid);
-		return -1;
-	}
-#endif
-
-	if (!smp_found_config)
-		disable_ioapic_support();
-
-	default_setup_apic_routing();
-	apic_bsp_setup(true);
-	/* Setup local timer */
-	x86_init.timers.setup_percpu_clockev();
-	return 0;
-}
-
 #ifdef CONFIG_UP_LATE_INIT
 void __init up_late_init(void)
 {
-	APIC_init_uniprocessor();
+	apic_intr_mode_init();
+
+	if (apic_intr_mode == APIC_PIC)
+		return;
+
+	/* Setup local timer */
+	x86_init.timers.setup_percpu_clockev();
 }
 #endif
 

From ca7c6076baed396737e31e33b87a637d70e9fc5f Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Wed, 13 Sep 2017 17:12:51 +0800
Subject: [PATCH 08/60] x86/ioapic: Refactor the delay logic in
 timer_irq_works()

timer_irq_works() is used to detects the timer IRQs. It calls mdelay(10) to
delay ten ticks and check whether the timer IRQ work or not.

mdelay() depends on the loops_per_jiffy which is set up in
calibrate_delay(), but the delay calibration depends on a working timer
interrupt, which causes a chicken and egg problem.

The correct solution is to set up the interrupt mode and making sure that
the timer interrupt is delivered correctly before invoking calibrate_delay().
That means that mdelay() cannot be used in timer_irq_works().

Provide helper functions to make a rough delay estimate which is good enough
to prove that the timer interrupt is working. Either use TSC or a simple
delay loop and assume that 4GHz is the maximum CPU frequency to base the
delay calculation on.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: yinghai@kernel.org
Cc: bhe@redhat.com
Link: https://lkml.kernel.org/r/1505293975-26005-9-git-send-email-douly.fnst@cn.fujitsu.com
---
 arch/x86/kernel/apic/io_apic.c | 45 ++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 70e48aa6af98..f8f248749c56 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1585,6 +1585,43 @@ static int __init notimercheck(char *s)
 }
 __setup("no_timer_check", notimercheck);
 
+static void __init delay_with_tsc(void)
+{
+	unsigned long long start, now;
+	unsigned long end = jiffies + 4;
+
+	start = rdtsc();
+
+	/*
+	 * We don't know the TSC frequency yet, but waiting for
+	 * 40000000000/HZ TSC cycles is safe:
+	 * 4 GHz == 10 jiffies
+	 * 1 GHz == 40 jiffies
+	 */
+	do {
+		rep_nop();
+		now = rdtsc();
+	} while ((now - start) < 40000000000UL / HZ &&
+		time_before_eq(jiffies, end));
+}
+
+static void __init delay_without_tsc(void)
+{
+	unsigned long end = jiffies + 4;
+	int band = 1;
+
+	/*
+	 * We don't know any frequency yet, but waiting for
+	 * 40940000000/HZ cycles is safe:
+	 * 4 GHz == 10 jiffies
+	 * 1 GHz == 40 jiffies
+	 * 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094
+	 */
+	do {
+		__delay(((1U << band++) * 10000000UL) / HZ);
+	} while (band < 12 && time_before_eq(jiffies, end));
+}
+
 /*
  * There is a nasty bug in some older SMP boards, their mptable lies
  * about the timer IRQ. We do the following to work around the situation:
@@ -1603,8 +1640,12 @@ static int __init timer_irq_works(void)
 
 	local_save_flags(flags);
 	local_irq_enable();
-	/* Let ten ticks pass... */
-	mdelay((10 * 1000) / HZ);
+
+	if (boot_cpu_has(X86_FEATURE_TSC))
+		delay_with_tsc();
+	else
+		delay_without_tsc();
+
 	local_irq_restore(flags);
 
 	/*

From 34fba3e6b1e5d42c81fc00ede715e0cdd2ebfada Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Wed, 13 Sep 2017 17:12:52 +0800
Subject: [PATCH 09/60] x86/init: Add intr_mode_init to x86_init_ops

X86 and XEN initialize interrupt delivery mode in different way.

To avoid conditionals, add a new x86_init_ops function which defaults to
the standard function and can be overridden by the early XEN platform code.

[ tglx: Folded the XEN part which was a separate patch to preserve
  	bisectability ]

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: yinghai@kernel.org
Cc: bhe@redhat.com
Link: https://lkml.kernel.org/r/1505293975-26005-10-git-send-email-douly.fnst@cn.fujitsu.com
---
 arch/x86/include/asm/x86_init.h | 2 ++
 arch/x86/kernel/apic/apic.c     | 2 +-
 arch/x86/kernel/smpboot.c       | 2 +-
 arch/x86/kernel/x86_init.c      | 1 +
 arch/x86/xen/enlighten_pv.c     | 1 +
 5 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 7ba7e90a9ad6..f45acdf45957 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -50,11 +50,13 @@ struct x86_init_resources {
  *				are set up.
  * @intr_init:			interrupt init code
  * @trap_init:			platform specific trap setup
+ * @intr_mode_init:		interrupt delivery mode setup
  */
 struct x86_init_irqs {
 	void (*pre_vector_init)(void);
 	void (*intr_init)(void);
 	void (*trap_init)(void);
+	void (*intr_mode_init)(void);
 };
 
 /**
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index efc5fbd1c40c..8dbcff2f96eb 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2423,7 +2423,7 @@ void __init apic_bsp_setup(bool upmode)
 #ifdef CONFIG_UP_LATE_INIT
 void __init up_late_init(void)
 {
-	apic_intr_mode_init();
+	x86_init.irqs.intr_mode_init();
 
 	if (apic_intr_mode == APIC_PIC)
 		return;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 161935c49166..3d045e82352d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1296,7 +1296,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 
 	set_cpu_sibling_map(0);
 
-	apic_intr_mode_init();
+	x86_init.irqs.intr_mode_init();
 
 	smp_sanity_check();
 
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index a088b2c47f73..a7889b93e438 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -55,6 +55,7 @@ struct x86_init_ops x86_init __initdata = {
 		.pre_vector_init	= init_ISA_irqs,
 		.intr_init		= native_init_IRQ,
 		.trap_init		= x86_init_noop,
+		.intr_mode_init		= apic_intr_mode_init
 	},
 
 	.oem = {
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 69b9deff7e5c..73f809a6ca87 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1230,6 +1230,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	x86_platform.get_nmi_reason = xen_get_nmi_reason;
 
 	x86_init.resources.memory_setup = xen_memory_setup;
+	x86_init.irqs.intr_mode_init	= x86_init_noop;
 	x86_init.oem.arch_setup = xen_arch_setup;
 	x86_init.oem.banner = xen_banner;
 

From 935356cecda851d94381e1c6fea9dec443f908fe Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Wed, 13 Sep 2017 17:12:54 +0800
Subject: [PATCH 10/60] x86/apic: Initialize interrupt mode after timer init

A cold or warm boot through BIOS sets the APIC in default interrupt
delivery mode. A dump-capture kernel will not go through a BIOS reset and
leave the interrupt delivery mode in the state which was active on the
crashed kernel, but the dump kernel startup code assumes default delivery
mode which can result in interrupt delivery/handling to fail.

To solve this problem, it's required to set up the final interrupt delivery
mode as soon as possible. As IOAPIC setup needs the timer initialized for
verifying the timer interrupt delivery mode, the earliest point is right
after timer setup in late_time_init().

That results in the following init order:

  1) Set up the legacy timer, if applicable on the platform

  2) Set up APIC/IOAPIC which includes the verification of the legacy timer
     interrupt delivery.

  3) TSC calibration

  4) Local APIC timer setup


Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: yinghai@kernel.org
Cc: bhe@redhat.com
Link: https://lkml.kernel.org/r/1505293975-26005-12-git-send-email-douly.fnst@cn.fujitsu.com
---
 arch/x86/kernel/apic/apic.c | 2 --
 arch/x86/kernel/smpboot.c   | 7 +++----
 arch/x86/kernel/time.c      | 5 +++++
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 8dbcff2f96eb..3d08649acec6 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2423,8 +2423,6 @@ void __init apic_bsp_setup(bool upmode)
 #ifdef CONFIG_UP_LATE_INIT
 void __init up_late_init(void)
 {
-	x86_init.irqs.intr_mode_init();
-
 	if (apic_intr_mode == APIC_PIC)
 		return;
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3d045e82352d..81652e3b8c17 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1263,8 +1263,9 @@ static void __init smp_get_logical_apicid(void)
 }
 
 /*
- * Prepare for SMP bootup.  The MP table or ACPI has been read
- * earlier.  Just do some sanity checking here and enable APIC mode.
+ * Prepare for SMP bootup.
+ * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
+ *            for common interface support.
  */
 void __init native_smp_prepare_cpus(unsigned int max_cpus)
 {
@@ -1296,8 +1297,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 
 	set_cpu_sibling_map(0);
 
-	x86_init.irqs.intr_mode_init();
-
 	smp_sanity_check();
 
 	switch (apic_intr_mode) {
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index e0754cdbad37..3ceb834233c8 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -84,6 +84,11 @@ void __init hpet_time_init(void)
 static __init void x86_late_time_init(void)
 {
 	x86_init.timers.timer_init();
+	/*
+	 * After PIT/HPET timers init, select and setup
+	 * the final interrupt mode for delivering IRQs.
+	 */
+	x86_init.irqs.intr_mode_init();
 	tsc_init();
 }
 

From b371ae0d4a194b178817b0edfb6a7395c7aec37a Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Wed, 13 Sep 2017 17:12:55 +0800
Subject: [PATCH 11/60] x86/apic: Remove init_bsp_APIC()

init_bsp_APIC() which works for the virtual wire mode is used in ISA irq
initialization at boot time.

With the new APIC interrupt delivery mode scheme, which initializes the
APIC before the first interrupt is expected, init_bsp_APIC() is not longer
required and can be removed.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: yinghai@kernel.org
Cc: bhe@redhat.com
Link: https://lkml.kernel.org/r/1505293975-26005-13-git-send-email-douly.fnst@cn.fujitsu.com
---
 arch/x86/include/asm/apic.h |  1 -
 arch/x86/kernel/apic/apic.c | 49 -------------------------------------
 arch/x86/kernel/irqinit.c   |  3 ---
 3 files changed, 53 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 983a0dc564b3..7d247b2d8c54 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -136,7 +136,6 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
 extern void disable_local_APIC(void);
 extern void lapic_shutdown(void);
 extern void sync_Arb_IDs(void);
-extern void init_bsp_APIC(void);
 extern void apic_intr_mode_init(void);
 extern void setup_local_APIC(void);
 extern void init_apic_mappings(void);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3d08649acec6..a4ee36706999 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1282,55 +1282,6 @@ static int __init apic_intr_mode_select(void)
 	return APIC_SYMMETRIC_IO;
 }
 
-/*
- * An initial setup of the virtual wire mode.
- */
-void __init init_bsp_APIC(void)
-{
-	unsigned int value;
-
-	/*
-	 * Don't do the setup now if we have a SMP BIOS as the
-	 * through-I/O-APIC virtual wire mode might be active.
-	 */
-	if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
-		return;
-
-	/*
-	 * Do not trust the local APIC being empty at bootup.
-	 */
-	clear_local_APIC();
-
-	/*
-	 * Enable APIC.
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	value |= APIC_SPIV_APIC_ENABLED;
-
-#ifdef CONFIG_X86_32
-	/* This bit is reserved on P4/Xeon and should be cleared */
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-	    (boot_cpu_data.x86 == 15))
-		value &= ~APIC_SPIV_FOCUS_DISABLED;
-	else
-#endif
-		value |= APIC_SPIV_FOCUS_DISABLED;
-	value |= SPURIOUS_APIC_VECTOR;
-	apic_write(APIC_SPIV, value);
-
-	/*
-	 * Set up the virtual wire mode.
-	 */
-	apic_write(APIC_LVT0, APIC_DM_EXTINT);
-	value = APIC_DM_NMI;
-	if (!lapic_is_integrated())		/* 82489DX */
-		value |= APIC_LVT_LEVEL_TRIGGER;
-	if (apic_extnmi == APIC_EXTNMI_NONE)
-		value |= APIC_LVT_MASKED;
-	apic_write(APIC_LVT1, value);
-}
-
 /* Init the interrupt delivery mode for the BSP */
 void __init apic_intr_mode_init(void)
 {
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 1add9e08e83e..beafcf584e44 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -60,9 +60,6 @@ void __init init_ISA_irqs(void)
 	struct irq_chip *chip = legacy_pic->chip;
 	int i;
 
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
-	init_bsp_APIC();
-#endif
 	legacy_pic->init(0);
 
 	for (i = 0; i < nr_legacy_irqs(); i++)

From e3cccbce146fdc61e0f7ffc4cdda2b408b23cf3a Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Thu, 7 Sep 2017 16:49:20 +0800
Subject: [PATCH 12/60] x86/apic: Remove duplicate X86_64 conditional in
 lapic_is_integrated()

The macro APIC_INTEGRATED(x) is already wrapped by CONFIG_X86_32. So
it can be invoked unconditionally.

Remove the extra "#ifdef CONFIG_X86_64...". No functional change.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: bhe@redhat.com
Link: https://lkml.kernel.org/r/1504774161-7137-1-git-send-email-douly.fnst@cn.fujitsu.com
---
 arch/x86/kernel/apic/apic.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a4ee36706999..6708e25a09f5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -211,11 +211,7 @@ static inline int lapic_get_version(void)
  */
 static inline int lapic_is_integrated(void)
 {
-#ifdef CONFIG_X86_64
-	return 1;
-#else
 	return APIC_INTEGRATED(lapic_get_version());
-#endif
 }
 
 /*

From ae41a2a40ed4253b9e1e111df409bbecab0f9800 Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Thu, 7 Sep 2017 16:49:21 +0800
Subject: [PATCH 13/60] x86/apic: Use lapic_is_integrated() consistently

lapic_is_integrated() is a wrapper around APIC_INTEGRATED(), but not used
consistently.

Replace the direct usage of APIC_INTEGRATED() and fixup a hard to read tail
comment. No functional change.

[ tglx: Made it compile and work .... ]

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: bhe@redhat.com
Link: https://lkml.kernel.org/r/1504774161-7137-2-git-send-email-douly.fnst@cn.fujitsu.com
---
 arch/x86/kernel/apic/apic.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 6708e25a09f5..ffcd7556795f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -294,14 +294,11 @@ int get_physical_broadcast(void)
  */
 int lapic_get_maxlvt(void)
 {
-	unsigned int v;
-
-	v = apic_read(APIC_LVR);
 	/*
 	 * - we always have APIC integrated on 64bit mode
 	 * - 82489DXs do not report # of LVT entries
 	 */
-	return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
+	return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
 }
 
 /*
@@ -1531,7 +1528,9 @@ void setup_local_APIC(void)
 		value = APIC_DM_NMI;
 	else
 		value = APIC_DM_NMI | APIC_LVT_MASKED;
-	if (!lapic_is_integrated())		/* 82489DX */
+
+	/* Is 82489DX ? */
+	if (!lapic_is_integrated())
 		value |= APIC_LVT_LEVEL_TRIGGER;
 	apic_write(APIC_LVT1, value);
 

From 981c2eac1cb97c7db64acf567950e7e81019dd33 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:16 +0200
Subject: [PATCH 14/60] x86/apic: Deinline x2apic functions

These inline functions are used in both the cluster and the physical x2apic
code to fill in the function pointers of the apic structure. That means the
code is generated twice for no reason.

Move it to a C code and reuse it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213153.358954066@linutronix.de
---
 arch/x86/include/asm/x2apic.h         | 49 ---------------------------
 arch/x86/kernel/apic/x2apic.h         |  9 +++++
 arch/x86/kernel/apic/x2apic_cluster.c |  2 +-
 arch/x86/kernel/apic/x2apic_phys.c    | 40 +++++++++++++++++++++-
 4 files changed, 49 insertions(+), 51 deletions(-)
 delete mode 100644 arch/x86/include/asm/x2apic.h
 create mode 100644 arch/x86/kernel/apic/x2apic.h

diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h
deleted file mode 100644
index f90f0a587c66..000000000000
--- a/arch/x86/include/asm/x2apic.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Common bits for X2APIC cluster/physical modes.
- */
-
-#ifndef _ASM_X86_X2APIC_H
-#define _ASM_X86_X2APIC_H
-
-#include <asm/apic.h>
-#include <asm/ipi.h>
-#include <linux/cpumask.h>
-
-static int x2apic_apic_id_valid(int apicid)
-{
-	return 1;
-}
-
-static int x2apic_apic_id_registered(void)
-{
-	return 1;
-}
-
-static void
-__x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
-{
-	unsigned long cfg = __prepare_ICR(0, vector, dest);
-	native_x2apic_icr_write(cfg, apicid);
-}
-
-static unsigned int x2apic_get_apic_id(unsigned long id)
-{
-	return id;
-}
-
-static unsigned long x2apic_set_apic_id(unsigned int id)
-{
-	return id;
-}
-
-static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
-{
-	return initial_apicid >> index_msb;
-}
-
-static void x2apic_send_IPI_self(int vector)
-{
-	apic_write(APIC_SELF_IPI, vector);
-}
-
-#endif /* _ASM_X86_X2APIC_H */
diff --git a/arch/x86/kernel/apic/x2apic.h b/arch/x86/kernel/apic/x2apic.h
new file mode 100644
index 000000000000..4c38c2328948
--- /dev/null
+++ b/arch/x86/kernel/apic/x2apic.h
@@ -0,0 +1,9 @@
+/* Common bits for X2APIC cluster/physical modes. */
+
+int x2apic_apic_id_valid(int apicid);
+int x2apic_apic_id_registered(void);
+void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest);
+unsigned int x2apic_get_apic_id(unsigned long id);
+unsigned long x2apic_set_apic_id(unsigned int id);
+int x2apic_phys_pkg_id(int initial_apicid, int index_msb);
+void x2apic_send_IPI_self(int vector);
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 481237cb1544..d7f5132ba5ca 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -8,7 +8,7 @@
 #include <linux/cpu.h>
 
 #include <asm/smp.h>
-#include <asm/x2apic.h>
+#include "x2apic.h"
 
 static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
 static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 3baf0c3dc875..a4f28212dc5d 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -6,7 +6,8 @@
 #include <linux/dmar.h>
 
 #include <asm/smp.h>
-#include <asm/x2apic.h>
+#include <asm/ipi.h>
+#include "x2apic.h"
 
 int x2apic_phys;
 
@@ -98,6 +99,43 @@ static int x2apic_phys_probe(void)
 	return apic == &apic_x2apic_phys;
 }
 
+/* Common x2apic functions, also used by x2apic_cluster */
+int x2apic_apic_id_valid(int apicid)
+{
+	return 1;
+}
+
+int x2apic_apic_id_registered(void)
+{
+	return 1;
+}
+
+void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
+{
+	unsigned long cfg = __prepare_ICR(0, vector, dest);
+	native_x2apic_icr_write(cfg, apicid);
+}
+
+unsigned int x2apic_get_apic_id(unsigned long id)
+{
+	return id;
+}
+
+unsigned long x2apic_set_apic_id(unsigned int id)
+{
+	return id;
+}
+
+int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
+{
+	return initial_apicid >> index_msb;
+}
+
+void x2apic_send_IPI_self(int vector)
+{
+	apic_write(APIC_SELF_IPI, vector);
+}
+
 static struct apic apic_x2apic_phys __ro_after_init = {
 
 	.name				= "physical x2apic",

From 727657e6205d201e9acdb5d2c25bc1cd63c0ab16 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:17 +0200
Subject: [PATCH 15/60] x86/apic: Sanitize return value of apic.set_apic_id()

The set_apic_id() callback returns an unsigned long value which is handed
in to apic_write() as the value argument u32.

Adjust the return value so it returns u32 right away.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213153.437208268@linutronix.de
---
 arch/x86/include/asm/apic.h          | 2 +-
 arch/x86/kernel/apic/apic_flat_64.c  | 2 +-
 arch/x86/kernel/apic/apic_numachip.c | 4 ++--
 arch/x86/kernel/apic/x2apic.h        | 2 +-
 arch/x86/kernel/apic/x2apic_phys.c   | 2 +-
 arch/x86/kernel/apic/x2apic_uv_x.c   | 2 +-
 arch/x86/xen/apic.c                  | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 7d247b2d8c54..86a3a359e603 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -305,7 +305,7 @@ struct apic {
 
 	unsigned int (*get_apic_id)(unsigned long x);
 	/* Can't be NULL on 64-bit */
-	unsigned long (*set_apic_id)(unsigned int id);
+	u32 (*set_apic_id)(unsigned int id);
 
 	int (*cpu_mask_to_apicid)(const struct cpumask *cpumask,
 				  struct irq_data *irqdata,
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index dedd5a41ba48..6543648f0b81 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -119,7 +119,7 @@ static unsigned int flat_get_apic_id(unsigned long x)
 	return (x >> 24) & 0xFF;
 }
 
-static unsigned long set_apic_id(unsigned int id)
+static u32 set_apic_id(unsigned int id)
 {
 	return (id & 0xFF) << 24;
 }
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 2fda912219a6..d77c8cc4afc2 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -38,7 +38,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
 	return id;
 }
 
-static unsigned long numachip1_set_apic_id(unsigned int id)
+static u32 numachip1_set_apic_id(unsigned int id)
 {
 	return (id & 0xff) << 24;
 }
@@ -51,7 +51,7 @@ static unsigned int numachip2_get_apic_id(unsigned long x)
 	return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24);
 }
 
-static unsigned long numachip2_set_apic_id(unsigned int id)
+static u32 numachip2_set_apic_id(unsigned int id)
 {
 	return id << 24;
 }
diff --git a/arch/x86/kernel/apic/x2apic.h b/arch/x86/kernel/apic/x2apic.h
index 4c38c2328948..b107de381cb5 100644
--- a/arch/x86/kernel/apic/x2apic.h
+++ b/arch/x86/kernel/apic/x2apic.h
@@ -4,6 +4,6 @@ int x2apic_apic_id_valid(int apicid);
 int x2apic_apic_id_registered(void);
 void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest);
 unsigned int x2apic_get_apic_id(unsigned long id);
-unsigned long x2apic_set_apic_id(unsigned int id);
+u32 x2apic_set_apic_id(unsigned int id);
 int x2apic_phys_pkg_id(int initial_apicid, int index_msb);
 void x2apic_send_IPI_self(int vector);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index a4f28212dc5d..315c44eda399 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -121,7 +121,7 @@ unsigned int x2apic_get_apic_id(unsigned long id)
 	return id;
 }
 
-unsigned long x2apic_set_apic_id(unsigned int id)
+u32 x2apic_set_apic_id(unsigned int id)
 {
 	return id;
 }
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 0d57bb9079c9..1b23b9e0a8af 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -547,7 +547,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x)
 	return id;
 }
 
-static unsigned long set_apic_id(unsigned int id)
+static u32 set_apic_id(unsigned int id)
 {
 	/* CHECKME: Do we need to mask out the xapic extra bits? */
 	return id;
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index b5e48da7fbff..652d62458d8d 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -30,7 +30,7 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
 	return 0xfd;
 }
 
-static unsigned long xen_set_apic_id(unsigned int x)
+static u32 xen_set_apic_id(unsigned int x)
 {
 	WARN_ON(1);
 	return x;

From 57e0aa446176493f69a8f8e270e9c4addca80772 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:18 +0200
Subject: [PATCH 16/60] x86/apic: Sanitize return value of check_apicid_used()

The check is boolean, but the function returns unsigned long for no value.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213153.516730518@linutronix.de
---
 arch/x86/include/asm/apic.h      | 4 ++--
 arch/x86/kernel/apic/bigsmp_32.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 86a3a359e603..63f4ad5123cc 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -289,7 +289,7 @@ struct apic {
 	int disable_esr;
 
 	int dest_logical;
-	unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid);
+	bool (*check_apicid_used)(physid_mask_t *map, int apicid);
 
 	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask,
 					 const struct cpumask *mask);
@@ -581,7 +581,7 @@ default_vector_allocation_domain(int cpu, struct cpumask *retmask,
 	cpumask_copy(retmask, cpumask_of(cpu));
 }
 
-static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid)
+static inline bool default_check_apicid_used(physid_mask_t *map, int apicid)
 {
 	return physid_isset(apicid, *map);
 }
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 456e45e8bf84..6eb5f10f1599 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -26,9 +26,9 @@ static int bigsmp_apic_id_registered(void)
 	return 1;
 }
 
-static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
+static bool bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
 {
-	return 0;
+	return false;
 }
 
 static int bigsmp_early_logical_apicid(int cpu)

From 0801bbaac00b2c729adb1b1b0e0945ca8bbea088 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:19 +0200
Subject: [PATCH 17/60] x86/apic: Move probe32 specific APIC functions

The apic functions which are used in probe_32.c are implemented as inlines
or in apic.c. There is no reason to have them at random places.

Move them to the actual usage site and make them static.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213153.596768194@linutronix.de
---
 arch/x86/include/asm/apic.h     | 21 ---------------------
 arch/x86/kernel/apic/apic.c     | 10 ----------
 arch/x86/kernel/apic/probe_32.c | 25 +++++++++++++++++++++++++
 3 files changed, 25 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 63f4ad5123cc..06a023b2dca1 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -524,31 +524,10 @@ extern void default_setup_apic_routing(void);
 extern struct apic apic_noop;
 
 #ifdef CONFIG_X86_32
-
 static inline int noop_x86_32_early_logical_apicid(int cpu)
 {
 	return BAD_APICID;
 }
-
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116).  So here it goes...
- */
-extern void default_init_apic_ldr(void);
-
-static inline int default_apic_id_registered(void)
-{
-	return physid_isset(read_apic_id(), phys_cpu_present_map);
-}
-
-static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
-{
-	return cpuid_apic >> index_msb;
-}
-
 #endif
 
 extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask,
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ffcd7556795f..1b1aeda189d7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2273,16 +2273,6 @@ int hard_smp_processor_id(void)
 	return read_apic_id();
 }
 
-void default_init_apic_ldr(void)
-{
-	unsigned long val;
-
-	apic_write(APIC_DFR, APIC_DFR_VALUE);
-	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
-	val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
-	apic_write(APIC_LDR, val);
-}
-
 int default_cpu_mask_to_apicid(const struct cpumask *mask,
 			       struct irq_data *irqdata,
 			       unsigned int *apicid)
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 63287659adb6..12d171204c8a 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -66,6 +66,31 @@ static void setup_apic_flat_routing(void)
 #endif
 }
 
+static int default_apic_id_registered(void)
+{
+	return physid_isset(read_apic_id(), phys_cpu_present_map);
+}
+
+/*
+ * Set up the logical destination ID.  Intel recommends to set DFR, LDR and
+ * TPR before enabling an APIC.  See e.g. "AP-388 82489DX User's Manual"
+ * (Intel document number 292116).
+ */
+static void default_init_apic_ldr(void)
+{
+	unsigned long val;
+
+	apic_write(APIC_DFR, APIC_DFR_VALUE);
+	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+	val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
+	apic_write(APIC_LDR, val);
+}
+
+static int default_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+	return cpuid_apic >> index_msb;
+}
+
 /* should be called last. */
 static int probe_default(void)
 {

From 1da91779e1fb79aaed3de118a156b7040f6147c5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:20 +0200
Subject: [PATCH 18/60] x86/apic: Move APIC noop specific functions

Move more inlines to the place where they belong.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213153.677743545@linutronix.de
---
 arch/x86/include/asm/apic.h      | 7 -------
 arch/x86/kernel/apic/apic_noop.c | 7 +++++++
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 06a023b2dca1..cf10be9afde0 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -523,13 +523,6 @@ extern void default_setup_apic_routing(void);
 
 extern struct apic apic_noop;
 
-#ifdef CONFIG_X86_32
-static inline int noop_x86_32_early_logical_apicid(int cpu)
-{
-	return BAD_APICID;
-}
-#endif
-
 extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask,
 				   struct irq_data *irqdata,
 				   unsigned int *apicid);
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 6599f437b4ab..c2c6bac28e79 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -108,6 +108,13 @@ static void noop_apic_write(u32 reg, u32 v)
 	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
 }
 
+#ifdef CONFIG_X86_32
+static int noop_x86_32_early_logical_apicid(int cpu)
+{
+	return BAD_APICID;
+}
+#endif
+
 struct apic apic_noop __ro_after_init = {
 	.name				= "noop",
 	.probe				= noop_probe,

From 64063505835663c67cf18524c46e1eb70d30fb54 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:21 +0200
Subject: [PATCH 19/60] x86/apic: Sanitize 32/64bit APIC callbacks

The 32bit and the 64bit implementation of default_cpu_present_to_apicid()
and default_check_phys_apicid_present() are exactly the same, but
implemented and located differently.

Move them to common apic code and get rid of the pointless difference.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213153.757329991@linutronix.de
---
 arch/x86/include/asm/apic.h        | 30 ------------------------------
 arch/x86/include/asm/kvm_host.h    |  2 +-
 arch/x86/kernel/apic/Makefile      |  2 +-
 arch/x86/kernel/apic/apic_common.c | 20 ++++++++++++++++++++
 arch/x86/kernel/setup.c            | 12 ------------
 5 files changed, 22 insertions(+), 44 deletions(-)
 create mode 100644 arch/x86/kernel/apic/apic_common.c

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index cf10be9afde0..6561ea088b6a 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -472,9 +472,6 @@ static inline unsigned default_get_apic_id(unsigned long x)
 extern void apic_send_IPI_self(int vector);
 
 DECLARE_PER_CPU(int, x2apic_extra_bits);
-
-extern int default_cpu_present_to_apicid(int mps_cpu);
-extern int default_check_phys_apicid_present(int phys_apicid);
 #endif
 
 extern void generic_bigsmp_probe(void);
@@ -563,35 +560,8 @@ static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_ma
 	*retmap = *phys_map;
 }
 
-static inline int __default_cpu_present_to_apicid(int mps_cpu)
-{
-	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
-		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
-	else
-		return BAD_APICID;
-}
-
-static inline int
-__default_check_phys_apicid_present(int phys_apicid)
-{
-	return physid_isset(phys_apicid, phys_cpu_present_map);
-}
-
-#ifdef CONFIG_X86_32
-static inline int default_cpu_present_to_apicid(int mps_cpu)
-{
-	return __default_cpu_present_to_apicid(mps_cpu);
-}
-
-static inline int
-default_check_phys_apicid_present(int phys_apicid)
-{
-	return __default_check_phys_apicid_present(phys_apicid);
-}
-#else
 extern int default_cpu_present_to_apicid(int mps_cpu);
 extern int default_check_phys_apicid_present(int phys_apicid);
-#endif
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 extern void irq_enter(void);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c73e493adf07..9d7d856b2d89 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1419,7 +1419,7 @@ static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 static inline int kvm_cpu_get_apicid(int mps_cpu)
 {
 #ifdef CONFIG_X86_LOCAL_APIC
-	return __default_cpu_present_to_apicid(mps_cpu);
+	return default_cpu_present_to_apicid(mps_cpu);
 #else
 	WARN_ON_ONCE(1);
 	return BAD_APICID;
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 8e63ebdcbd0b..bd65ce2e768e 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -6,7 +6,7 @@
 # In particualr, smp_apic_timer_interrupt() is called in random places.
 KCOV_INSTRUMENT		:= n
 
-obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o vector.o
+obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_common.o apic_noop.o ipi.o vector.o
 obj-y				+= hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c
new file mode 100644
index 000000000000..2a084d48db37
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_common.c
@@ -0,0 +1,20 @@
+/*
+ * Common functions shared between the various APIC flavours
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+#include <linux/irq.h>
+#include <asm/apic.h>
+
+int default_cpu_present_to_apicid(int mps_cpu)
+{
+	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
+		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
+	else
+		return BAD_APICID;
+}
+
+int default_check_phys_apicid_present(int phys_apicid)
+{
+	return physid_isset(phys_apicid, phys_cpu_present_map);
+}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0957dd73d127..82559867e0a9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -136,18 +136,6 @@ RESERVE_BRK(dmi_alloc, 65536);
 static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
 unsigned long _brk_end = (unsigned long)__brk_base;
 
-#ifdef CONFIG_X86_64
-int default_cpu_present_to_apicid(int mps_cpu)
-{
-	return __default_cpu_present_to_apicid(mps_cpu);
-}
-
-int default_check_phys_apicid_present(int phys_apicid)
-{
-	return __default_check_phys_apicid_present(phys_apicid);
-}
-#endif
-
 struct boot_params boot_params;
 
 /*

From 83a105229c59e433409e4d86e9bb915ca281235c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:22 +0200
Subject: [PATCH 20/60] x86/apic: Move common APIC callbacks

Move more apic struct specific functions out of the header and the apic
management code into the common source file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213153.834421893@linutronix.de
---
 arch/x86/include/asm/apic.h        | 73 +++++-----------------------
 arch/x86/kernel/apic/apic.c        | 28 -----------
 arch/x86/kernel/apic/apic_common.c | 78 ++++++++++++++++++++++++++++++
 3 files changed, 90 insertions(+), 89 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 6561ea088b6a..1081cfb4f159 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -476,94 +476,45 @@ DECLARE_PER_CPU(int, x2apic_extra_bits);
 
 extern void generic_bigsmp_probe(void);
 
-
 #ifdef CONFIG_X86_LOCAL_APIC
 
 #include <asm/smp.h>
 
 #define APIC_DFR_VALUE	(APIC_DFR_FLAT)
 
-static inline const struct cpumask *default_target_cpus(void)
-{
-#ifdef CONFIG_SMP
-	return cpu_online_mask;
-#else
-	return cpumask_of(0);
-#endif
-}
-
-static inline const struct cpumask *online_target_cpus(void)
-{
-	return cpu_online_mask;
-}
-
 DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
 
+extern struct apic apic_noop;
 
 static inline unsigned int read_apic_id(void)
 {
-	unsigned int reg;
-
-	reg = apic_read(APIC_ID);
+	unsigned int reg = apic_read(APIC_ID);
 
 	return apic->get_apic_id(reg);
 }
 
-static inline int default_apic_id_valid(int apicid)
-{
-	return (apicid < 255);
-}
-
+extern const struct cpumask *default_target_cpus(void);
+extern const struct cpumask *online_target_cpus(void);
+extern int default_apic_id_valid(int apicid);
 extern int default_acpi_madt_oem_check(char *, char *);
-
 extern void default_setup_apic_routing(void);
-
-extern struct apic apic_noop;
-
 extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask,
 				   struct irq_data *irqdata,
 				   unsigned int *apicid);
 extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask,
 				      struct irq_data *irqdata,
 				      unsigned int *apicid);
-
-static inline void
-flat_vector_allocation_domain(int cpu, struct cpumask *retmask,
-			      const struct cpumask *mask)
-{
-	/* Careful. Some cpus do not strictly honor the set of cpus
-	 * specified in the interrupt destination when using lowest
-	 * priority interrupt delivery mode.
-	 *
-	 * In particular there was a hyperthreading cpu observed to
-	 * deliver interrupts to the wrong hyperthread when only one
-	 * hyperthread was specified in the interrupt desitination.
-	 */
-	cpumask_clear(retmask);
-	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
-static inline void
-default_vector_allocation_domain(int cpu, struct cpumask *retmask,
-				 const struct cpumask *mask)
-{
-	cpumask_copy(retmask, cpumask_of(cpu));
-}
-
-static inline bool default_check_apicid_used(physid_mask_t *map, int apicid)
-{
-	return physid_isset(apicid, *map);
-}
-
-static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
-{
-	*retmap = *phys_map;
-}
-
+extern bool default_check_apicid_used(physid_mask_t *map, int apicid);
+extern void flat_vector_allocation_domain(int cpu, struct cpumask *retmask,
+				   const struct cpumask *mask);
+extern void default_vector_allocation_domain(int cpu, struct cpumask *retmask,
+				      const struct cpumask *mask);
+extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap);
 extern int default_cpu_present_to_apicid(int mps_cpu);
 extern int default_check_phys_apicid_present(int phys_apicid);
 
 #endif /* CONFIG_X86_LOCAL_APIC */
+
 extern void irq_enter(void);
 extern void irq_exit(void);
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 1b1aeda189d7..ca5ec3fddc49 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2273,34 +2273,6 @@ int hard_smp_processor_id(void)
 	return read_apic_id();
 }
 
-int default_cpu_mask_to_apicid(const struct cpumask *mask,
-			       struct irq_data *irqdata,
-			       unsigned int *apicid)
-{
-	unsigned int cpu = cpumask_first(mask);
-
-	if (cpu >= nr_cpu_ids)
-		return -EINVAL;
-	*apicid = per_cpu(x86_cpu_to_apicid, cpu);
-	irq_data_update_effective_affinity(irqdata, cpumask_of(cpu));
-	return 0;
-}
-
-int flat_cpu_mask_to_apicid(const struct cpumask *mask,
-			    struct irq_data *irqdata,
-			    unsigned int *apicid)
-
-{
-	struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
-	unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS;
-
-	if (!cpu_mask)
-		return -EINVAL;
-	*apicid = (unsigned int)cpu_mask;
-	cpumask_bits(effmsk)[0] = cpu_mask;
-	return 0;
-}
-
 /*
  * Override the generic EOI implementation with an optimized version.
  * Only called during early boot when only one CPU is active and with
diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c
index 2a084d48db37..43f9eac53437 100644
--- a/arch/x86/kernel/apic/apic_common.c
+++ b/arch/x86/kernel/apic/apic_common.c
@@ -6,6 +6,64 @@
 #include <linux/irq.h>
 #include <asm/apic.h>
 
+int default_cpu_mask_to_apicid(const struct cpumask *msk, struct irq_data *irqd,
+			       unsigned int *apicid)
+{
+	unsigned int cpu = cpumask_first(msk);
+
+	if (cpu >= nr_cpu_ids)
+		return -EINVAL;
+	*apicid = per_cpu(x86_cpu_to_apicid, cpu);
+	irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
+	return 0;
+}
+
+int flat_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqd,
+			    unsigned int *apicid)
+
+{
+	struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqd);
+	unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS;
+
+	if (!cpu_mask)
+		return -EINVAL;
+	*apicid = (unsigned int)cpu_mask;
+	cpumask_bits(effmsk)[0] = cpu_mask;
+	return 0;
+}
+
+bool default_check_apicid_used(physid_mask_t *map, int apicid)
+{
+	return physid_isset(apicid, *map);
+}
+
+void flat_vector_allocation_domain(int cpu, struct cpumask *retmask,
+				   const struct cpumask *mask)
+{
+	/*
+	 * Careful. Some cpus do not strictly honor the set of cpus
+	 * specified in the interrupt destination when using lowest
+	 * priority interrupt delivery mode.
+	 *
+	 * In particular there was a hyperthreading cpu observed to
+	 * deliver interrupts to the wrong hyperthread when only one
+	 * hyperthread was specified in the interrupt desitination.
+	 */
+	cpumask_clear(retmask);
+	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
+}
+
+void default_vector_allocation_domain(int cpu, struct cpumask *retmask,
+				      const struct cpumask *mask)
+{
+	cpumask_copy(retmask, cpumask_of(cpu));
+}
+
+void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
+{
+	*retmap = *phys_map;
+}
+
 int default_cpu_present_to_apicid(int mps_cpu)
 {
 	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
@@ -13,8 +71,28 @@ int default_cpu_present_to_apicid(int mps_cpu)
 	else
 		return BAD_APICID;
 }
+EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid);
 
 int default_check_phys_apicid_present(int phys_apicid)
 {
 	return physid_isset(phys_apicid, phys_cpu_present_map);
 }
+
+const struct cpumask *default_target_cpus(void)
+{
+#ifdef CONFIG_SMP
+	return cpu_online_mask;
+#else
+	return cpumask_of(0);
+#endif
+}
+
+const struct cpumask *online_target_cpus(void)
+{
+	return cpu_online_mask;
+}
+
+int default_apic_id_valid(int apicid)
+{
+	return (apicid < 255);
+}

From 72f48a38505de105e798d4783942df073aeab7ac Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:23 +0200
Subject: [PATCH 21/60] x86/apic: Reorganize struct apic

struct apic has just grown over time by adding function pointers in random
places. Reorganize it so it becomes more cache line friendly.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213153.913642524@linutronix.de
---
 arch/x86/include/asm/apic.h | 103 ++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 56 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 1081cfb4f159..e3e0883fa96f 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -274,73 +274,63 @@ struct irq_data;
  * James Cleverdon.
  */
 struct apic {
-	char *name;
+	/* Hotpath functions first */
+	void	(*eoi_write)(u32 reg, u32 v);
+	void	(*native_eoi_write)(u32 reg, u32 v);
+	void	(*write)(u32 reg, u32 v);
+	u32	(*read)(u32 reg);
 
-	int (*probe)(void);
-	int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
-	int (*apic_id_valid)(int apicid);
-	int (*apic_id_registered)(void);
+	/* IPI related functions */
+	void	(*wait_icr_idle)(void);
+	u32	(*safe_wait_icr_idle)(void);
 
-	u32 irq_delivery_mode;
-	u32 irq_dest_mode;
+	void	(*send_IPI)(int cpu, int vector);
+	void	(*send_IPI_mask)(const struct cpumask *mask, int vector);
+	void	(*send_IPI_mask_allbutself)(const struct cpumask *msk, int vec);
+	void	(*send_IPI_allbutself)(int vector);
+	void	(*send_IPI_all)(int vector);
+	void	(*send_IPI_self)(int vector);
 
+	/* dest_logical is used by the IPI functions */
+	u32	dest_logical;
+	u32	disable_esr;
+	u32	irq_delivery_mode;
+	u32	irq_dest_mode;
+
+	/* Functions and data related to vector allocation */
 	const struct cpumask *(*target_cpus)(void);
+	void	(*vector_allocation_domain)(int cpu, struct cpumask *retmask,
+					    const struct cpumask *mask);
+	int	(*cpu_mask_to_apicid)(const struct cpumask *cpumask,
+				      struct irq_data *irqdata,
+				      unsigned int *apicid);
 
-	int disable_esr;
+	/* ICR related functions */
+	u64	(*icr_read)(void);
+	void	(*icr_write)(u32 low, u32 high);
 
-	int dest_logical;
-	bool (*check_apicid_used)(physid_mask_t *map, int apicid);
+	/* Probe, setup and smpboot functions */
+	int	(*probe)(void);
+	int	(*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
+	int	(*apic_id_valid)(int apicid);
+	int	(*apic_id_registered)(void);
 
-	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask,
-					 const struct cpumask *mask);
-	void (*init_apic_ldr)(void);
+	bool	(*check_apicid_used)(physid_mask_t *map, int apicid);
+	void	(*init_apic_ldr)(void);
+	void	(*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
+	void	(*setup_apic_routing)(void);
+	int	(*cpu_present_to_apicid)(int mps_cpu);
+	void	(*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
+	int	(*check_phys_apicid_present)(int phys_apicid);
+	int	(*phys_pkg_id)(int cpuid_apic, int index_msb);
 
-	void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
-
-	void (*setup_apic_routing)(void);
-	int (*cpu_present_to_apicid)(int mps_cpu);
-	void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
-	int (*check_phys_apicid_present)(int phys_apicid);
-	int (*phys_pkg_id)(int cpuid_apic, int index_msb);
-
-	unsigned int (*get_apic_id)(unsigned long x);
-	/* Can't be NULL on 64-bit */
-	u32 (*set_apic_id)(unsigned int id);
-
-	int (*cpu_mask_to_apicid)(const struct cpumask *cpumask,
-				  struct irq_data *irqdata,
-				  unsigned int *apicid);
-
-	/* ipi */
-	void (*send_IPI)(int cpu, int vector);
-	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
-	void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
-					 int vector);
-	void (*send_IPI_allbutself)(int vector);
-	void (*send_IPI_all)(int vector);
-	void (*send_IPI_self)(int vector);
+	u32	(*get_apic_id)(unsigned long x);
+	u32	(*set_apic_id)(unsigned int id);
 
 	/* wakeup_secondary_cpu */
-	int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
+	int	(*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
 
-	void (*inquire_remote_apic)(int apicid);
-
-	/* apic ops */
-	u32 (*read)(u32 reg);
-	void (*write)(u32 reg, u32 v);
-	/*
-	 * ->eoi_write() has the same signature as ->write().
-	 *
-	 * Drivers can support both ->eoi_write() and ->write() by passing the same
-	 * callback value. Kernel can override ->eoi_write() and fall back
-	 * on write for EOI.
-	 */
-	void (*eoi_write)(u32 reg, u32 v);
-	void (*native_eoi_write)(u32 reg, u32 v);
-	u64 (*icr_read)(void);
-	void (*icr_write)(u32 low, u32 high);
-	void (*wait_icr_idle)(void);
-	u32 (*safe_wait_icr_idle)(void);
+	void	(*inquire_remote_apic)(int apicid);
 
 #ifdef CONFIG_X86_32
 	/*
@@ -355,6 +345,7 @@ struct apic {
 	 */
 	int (*x86_32_early_logical_apicid)(int cpu);
 #endif
+	char	*name;
 };
 
 /*

From 023a611748fd58d46c8aa049cf4f22ebada983f5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:24 +0200
Subject: [PATCH 22/60] x86/apic/x2apic: Simplify cluster management

The cluster management code creates a cluster mask per cpu, which requires
that on cpu on/offline all cluster masks have to be iterated and
updated. Other information about the cluster is in different per cpu
variables.

Create a data structure which holds all information about a cluster and
fill it in when the first CPU of a cluster comes online. If another CPU of
a cluster comes online it just finds the pointer to the existing cluster
structure and reuses it.

That simplifies all usage sites and gets rid of quite some pointless
iterations over the online cpus to find the cpus which belong to the
cluster.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213153.992629420@linutronix.de
---
 arch/x86/kernel/apic/x2apic_cluster.c | 154 +++++++++++++-------------
 1 file changed, 76 insertions(+), 78 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index d7f5132ba5ca..729c0a512b72 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -10,20 +10,22 @@
 #include <asm/smp.h>
 #include "x2apic.h"
 
+struct cluster_mask {
+	unsigned int	clusterid;
+	int		node;
+	struct cpumask	mask;
+};
+
 static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
-static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster);
 static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
+static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks);
+static struct cluster_mask *cluster_hotplug_mask;
 
 static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
 	return x2apic_enabled();
 }
 
-static inline u32 x2apic_cluster(int cpu)
-{
-	return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16;
-}
-
 static void x2apic_send_IPI(int cpu, int vector)
 {
 	u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
@@ -35,49 +37,34 @@ static void x2apic_send_IPI(int cpu, int vector)
 static void
 __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
 {
-	struct cpumask *cpus_in_cluster_ptr;
-	struct cpumask *ipi_mask_ptr;
-	unsigned int cpu, this_cpu;
+	unsigned int cpu, clustercpu;
+	struct cpumask *tmpmsk;
 	unsigned long flags;
 	u32 dest;
 
 	x2apic_wrmsr_fence();
-
 	local_irq_save(flags);
 
-	this_cpu = smp_processor_id();
+	tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);
+	cpumask_copy(tmpmsk, mask);
+	/* If IPI should not be sent to self, clear current CPU */
+	if (apic_dest != APIC_DEST_ALLINC)
+		cpumask_clear_cpu(smp_processor_id(), tmpmsk);
 
-	/*
-	 * We are to modify mask, so we need an own copy
-	 * and be sure it's manipulated with irq off.
-	 */
-	ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask);
-	cpumask_copy(ipi_mask_ptr, mask);
+	/* Collapse cpus in a cluster so a single IPI per cluster is sent */
+	for_each_cpu(cpu, tmpmsk) {
+		struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu);
 
-	/*
-	 * The idea is to send one IPI per cluster.
-	 */
-	for_each_cpu(cpu, ipi_mask_ptr) {
-		unsigned long i;
-
-		cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu);
 		dest = 0;
-
-		/* Collect cpus in cluster. */
-		for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) {
-			if (apic_dest == APIC_DEST_ALLINC || i != this_cpu)
-				dest |= per_cpu(x86_cpu_to_logical_apicid, i);
-		}
+		for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
+			dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu);
 
 		if (!dest)
 			continue;
 
 		__x2apic_send_IPI_dest(dest, vector, apic->dest_logical);
-		/*
-		 * Cluster sibling cpus should be discared now so
-		 * we would not send IPI them second time.
-		 */
-		cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr);
+		/* Remove cluster CPUs from tmpmask */
+		cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask);
 	}
 
 	local_irq_restore(flags);
@@ -109,91 +96,100 @@ x2apic_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
 			  unsigned int *apicid)
 {
 	struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
+	struct cluster_mask *cmsk;
 	unsigned int cpu;
 	u32 dest = 0;
-	u16 cluster;
 
 	cpu = cpumask_first(mask);
 	if (cpu >= nr_cpu_ids)
 		return -EINVAL;
 
-	dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
-	cluster = x2apic_cluster(cpu);
-
+	cmsk = per_cpu(cluster_masks, cpu);
 	cpumask_clear(effmsk);
-	for_each_cpu(cpu, mask) {
-		if (cluster != x2apic_cluster(cpu))
-			continue;
+	for_each_cpu_and(cpu, &cmsk->mask, mask) {
 		dest |= per_cpu(x86_cpu_to_logical_apicid, cpu);
 		cpumask_set_cpu(cpu, effmsk);
 	}
-
 	*apicid = dest;
 	return 0;
 }
 
 static void init_x2apic_ldr(void)
 {
-	unsigned int this_cpu = smp_processor_id();
+	struct cluster_mask *cmsk = this_cpu_read(cluster_masks);
+	u32 cluster, apicid = apic_read(APIC_LDR);
 	unsigned int cpu;
 
-	per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR);
+	this_cpu_write(x86_cpu_to_logical_apicid, apicid);
 
-	cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
+	if (cmsk)
+		goto update;
+
+	cluster = apicid >> 16;
 	for_each_online_cpu(cpu) {
-		if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
-			continue;
-		cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
-		cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
+		cmsk = per_cpu(cluster_masks, cpu);
+		/* Matching cluster found. Link and update it. */
+		if (cmsk && cmsk->clusterid == cluster)
+			goto update;
 	}
+	cmsk = cluster_hotplug_mask;
+	cluster_hotplug_mask = NULL;
+update:
+	this_cpu_write(cluster_masks, cmsk);
+	cpumask_set_cpu(smp_processor_id(), &cmsk->mask);
 }
 
-/*
- * At CPU state changes, update the x2apic cluster sibling info.
- */
-static int x2apic_prepare_cpu(unsigned int cpu)
+static int alloc_clustermask(unsigned int cpu, int node)
 {
-	if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
-		return -ENOMEM;
-
-	if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) {
-		free_cpumask_var(per_cpu(cpus_in_cluster, cpu));
-		return -ENOMEM;
+	if (per_cpu(cluster_masks, cpu))
+		return 0;
+	/*
+	 * If a hotplug spare mask exists, check whether it's on the right
+	 * node. If not, free it and allocate a new one.
+	 */
+	if (cluster_hotplug_mask) {
+		if (cluster_hotplug_mask->node == node)
+			return 0;
+		kfree(cluster_hotplug_mask);
 	}
 
+	cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask),
+					    GFP_KERNEL, node);
+	if (!cluster_hotplug_mask)
+		return -ENOMEM;
+	cluster_hotplug_mask->node = node;
 	return 0;
 }
 
-static int x2apic_dead_cpu(unsigned int this_cpu)
+static int x2apic_prepare_cpu(unsigned int cpu)
 {
-	int cpu;
+	if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0)
+		return -ENOMEM;
+	if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL))
+		return -ENOMEM;
+	return 0;
+}
 
-	for_each_online_cpu(cpu) {
-		if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
-			continue;
-		cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
-		cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
-	}
-	free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
-	free_cpumask_var(per_cpu(ipi_mask, this_cpu));
+static int x2apic_dead_cpu(unsigned int dead_cpu)
+{
+	struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
+
+	cpumask_clear_cpu(smp_processor_id(), &cmsk->mask);
+	free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
 	return 0;
 }
 
 static int x2apic_cluster_probe(void)
 {
-	int cpu = smp_processor_id();
-	int ret;
-
 	if (!x2apic_mode)
 		return 0;
 
-	ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
-				x2apic_prepare_cpu, x2apic_dead_cpu);
-	if (ret < 0) {
+	if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
+			      x2apic_prepare_cpu, x2apic_dead_cpu) < 0) {
 		pr_err("Failed to register X2APIC_PREPARE\n");
 		return 0;
 	}
-	cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
+	init_x2apic_ldr();
 	return 1;
 }
 
@@ -208,6 +204,8 @@ static const struct cpumask *x2apic_cluster_target_cpus(void)
 static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
 					     const struct cpumask *mask)
 {
+	struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu);
+
 	/*
 	 * To minimize vector pressure, default case of boot, device bringup
 	 * etc will use a single cpu for the interrupt destination.
@@ -220,7 +218,7 @@ static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
 	if (mask == x2apic_cluster_target_cpus())
 		cpumask_copy(retmask, cpumask_of(cpu));
 	else
-		cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
+		cpumask_and(retmask, mask, &cmsk->mask);
 }
 
 static struct apic apic_x2apic_cluster __ro_after_init = {

From c1d1ee9ac1793d939ba1a1322767cc5f77a5b8fe Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:25 +0200
Subject: [PATCH 23/60] x86/apic: Get rid of apic->target_cpus

The target_cpus() callback of the apic struct is not really useful. Some
APICs return cpu_online_mask and others cpus_all_mask. The latter is bogus
as it does not take holes in the cpus_possible_mask into account.

Replace it with cpus_online_mask which makes the most sense and remove the
callback.

The usage sites will be removed in a later step anyway, so get rid of it
now to have incremental changes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213154.070850916@linutronix.de
---
 arch/x86/include/asm/apic.h           |  3 ---
 arch/x86/kernel/apic/apic_common.c    | 14 --------------
 arch/x86/kernel/apic/apic_flat_64.c   |  2 --
 arch/x86/kernel/apic/apic_noop.c      |  7 -------
 arch/x86/kernel/apic/apic_numachip.c  |  2 --
 arch/x86/kernel/apic/bigsmp_32.c      |  1 -
 arch/x86/kernel/apic/io_apic.c        |  7 +++----
 arch/x86/kernel/apic/probe_32.c       |  1 -
 arch/x86/kernel/apic/vector.c         |  2 +-
 arch/x86/kernel/apic/x2apic_cluster.c |  8 +-------
 arch/x86/kernel/apic/x2apic_phys.c    |  1 -
 arch/x86/kernel/apic/x2apic_uv_x.c    |  1 -
 arch/x86/xen/apic.c                   |  1 -
 13 files changed, 5 insertions(+), 45 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index e3e0883fa96f..ff0bddabaa04 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -298,7 +298,6 @@ struct apic {
 	u32	irq_dest_mode;
 
 	/* Functions and data related to vector allocation */
-	const struct cpumask *(*target_cpus)(void);
 	void	(*vector_allocation_domain)(int cpu, struct cpumask *retmask,
 					    const struct cpumask *mask);
 	int	(*cpu_mask_to_apicid)(const struct cpumask *cpumask,
@@ -484,8 +483,6 @@ static inline unsigned int read_apic_id(void)
 	return apic->get_apic_id(reg);
 }
 
-extern const struct cpumask *default_target_cpus(void);
-extern const struct cpumask *online_target_cpus(void);
 extern int default_apic_id_valid(int apicid);
 extern int default_acpi_madt_oem_check(char *, char *);
 extern void default_setup_apic_routing(void);
diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c
index 43f9eac53437..4791654cdeb2 100644
--- a/arch/x86/kernel/apic/apic_common.c
+++ b/arch/x86/kernel/apic/apic_common.c
@@ -78,20 +78,6 @@ int default_check_phys_apicid_present(int phys_apicid)
 	return physid_isset(phys_apicid, phys_cpu_present_map);
 }
 
-const struct cpumask *default_target_cpus(void)
-{
-#ifdef CONFIG_SMP
-	return cpu_online_mask;
-#else
-	return cpumask_of(0);
-#endif
-}
-
-const struct cpumask *online_target_cpus(void)
-{
-	return cpu_online_mask;
-}
-
 int default_apic_id_valid(int apicid)
 {
 	return (apicid < 255);
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 6543648f0b81..7ca354dee8af 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -154,7 +154,6 @@ static struct apic apic_flat __ro_after_init = {
 	.irq_delivery_mode		= dest_LowestPrio,
 	.irq_dest_mode			= 1, /* logical */
 
-	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
@@ -249,7 +248,6 @@ static struct apic apic_physflat __ro_after_init = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index c2c6bac28e79..a8a7cb1347dc 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -83,12 +83,6 @@ static int noop_apic_id_registered(void)
 	return physid_isset(0, phys_cpu_present_map);
 }
 
-static const struct cpumask *noop_target_cpus(void)
-{
-	/* only BSP here */
-	return cpumask_of(0);
-}
-
 static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
 					  const struct cpumask *mask)
 {
@@ -127,7 +121,6 @@ struct apic apic_noop __ro_after_init = {
 	/* logical delivery broadcast to all CPUs: */
 	.irq_dest_mode			= 1,
 
-	.target_cpus			= noop_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= default_check_apicid_used,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index d77c8cc4afc2..cc2f8843391f 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -249,7 +249,6 @@ static const struct apic apic_numachip1 __refconst = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
@@ -300,7 +299,6 @@ static const struct apic apic_numachip2 __refconst = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 6eb5f10f1599..72a1a0385549 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -154,7 +154,6 @@ static struct apic apic_bigsmp __ro_after_init = {
 	/* phys delivery to target CPU: */
 	.irq_dest_mode			= 0,
 
-	.target_cpus			= default_target_cpus,
 	.disable_esr			= 1,
 	.dest_logical			= 0,
 	.check_apicid_used		= bigsmp_check_apicid_used,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 11702d92407d..d8c75d61f766 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2553,9 +2553,8 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
 }
 
 /*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be apic->target_cpus()
+ * This function updates target affinity of IOAPIC interrupts to include
+ * the CPUs which came online during SMP bringup.
  */
 #ifdef CONFIG_SMP
 void __init setup_ioapic_dest(void)
@@ -2588,7 +2587,7 @@ void __init setup_ioapic_dest(void)
 		if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
 			mask = irq_data_get_affinity_mask(idata);
 		else
-			mask = apic->target_cpus();
+			mask = irq_default_affinity;
 
 		chip = irq_data_get_irq_chip(idata);
 		/* Might be lapic_chip for irq 0 */
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 12d171204c8a..95125bfb4e09 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -109,7 +109,6 @@ static struct apic apic_default __ro_after_init = {
 	/* logical delivery broadcast to all CPUs: */
 	.irq_dest_mode			= 1,
 
-	.target_cpus			= default_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= default_check_apicid_used,
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 88c214e75a6b..b6b963e42028 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -255,7 +255,7 @@ static int assign_irq_vector_policy(int irq, int node,
 	if (node != NUMA_NO_NODE &&
 	    assign_irq_vector(irq, data, cpumask_of_node(node), irqdata) == 0)
 		return 0;
-	return assign_irq_vector(irq, data, apic->target_cpus(), irqdata);
+	return assign_irq_vector(irq, data, cpu_online_mask, irqdata);
 }
 
 static void clear_irq_vector(int irq, struct apic_chip_data *data)
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 729c0a512b72..c1684f27226e 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -193,11 +193,6 @@ static int x2apic_cluster_probe(void)
 	return 1;
 }
 
-static const struct cpumask *x2apic_cluster_target_cpus(void)
-{
-	return cpu_all_mask;
-}
-
 /*
  * Each x2apic cluster is an allocation domain.
  */
@@ -215,7 +210,7 @@ static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
 	 * derived from the first cpu in the mask) members specified
 	 * in the mask.
 	 */
-	if (mask == x2apic_cluster_target_cpus())
+	if (cpumask_equal(mask, cpu_online_mask))
 		cpumask_copy(retmask, cpumask_of(cpu));
 	else
 		cpumask_and(retmask, mask, &cmsk->mask);
@@ -232,7 +227,6 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
 	.irq_delivery_mode		= dest_LowestPrio,
 	.irq_dest_mode			= 1, /* logical */
 
-	.target_cpus			= x2apic_cluster_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 315c44eda399..6903e69a7b60 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -147,7 +147,6 @@ static struct apic apic_x2apic_phys __ro_after_init = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 1b23b9e0a8af..9f6d551deeb4 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -584,7 +584,6 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* Physical */
 
-	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 652d62458d8d..58776bcf4251 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -160,7 +160,6 @@ static struct apic xen_pv_apic = {
 	/* .irq_delivery_mode - used in native_compose_msi_msg only */
 	/* .irq_dest_mode     - used in native_compose_msi_msg only */
 
-	.target_cpus			= default_target_cpus,
 	.disable_esr			= 0,
 	/* .dest_logical      -  default_send_IPI_ use it but we use our own. */
 	.check_apicid_used		= default_check_apicid_used, /* Used on 32-bit */

From 7854f82293e99f6bb3df793a2f579db4670ba71b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:26 +0200
Subject: [PATCH 24/60] x86/vector: Rename used_vectors to system_vectors

used_vectors is a nisnomer as it only has the system vectors which are
excluded from the regular vector allocation marked. It's not what the name
suggests storage for the actually used vectors.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213154.150209009@linutronix.de
---
 arch/x86/include/asm/desc.h   |  2 +-
 arch/x86/kernel/apic/vector.c |  2 +-
 arch/x86/kernel/idt.c         | 12 ++++++------
 arch/x86/kernel/irq.c         |  4 ++--
 arch/x86/kernel/traps.c       |  2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 9d0e13738ed3..c474bf4971d9 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -392,7 +392,7 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
 void update_intr_gate(unsigned int n, const void *addr);
 void alloc_intr_gate(unsigned int n, const void *addr);
 
-extern unsigned long used_vectors[];
+extern unsigned long system_vectors[];
 
 #ifdef CONFIG_X86_64
 DECLARE_PER_CPU(u32, debug_idt_ctr);
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index b6b963e42028..67d20ee60e33 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -175,7 +175,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 		if (unlikely(current_vector == vector))
 			goto next_cpu;
 
-		if (test_bit(vector, used_vectors))
+		if (test_bit(vector, system_vectors))
 			goto next;
 
 		for_each_cpu(new_cpu, vector_searchmask) {
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 6107ee1cb8d5..723fa9782186 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -225,7 +225,7 @@ idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sy
 		idt_init_desc(&desc, t);
 		write_idt_entry(idt, t->vector, &desc);
 		if (sys)
-			set_bit(t->vector, used_vectors);
+			set_bit(t->vector, system_vectors);
 	}
 }
 
@@ -313,14 +313,14 @@ void __init idt_setup_apic_and_irq_gates(void)
 
 	idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true);
 
-	for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) {
+	for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) {
 		entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
 		set_intr_gate(i, entry);
 	}
 
-	for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
+	for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
 #ifdef CONFIG_X86_LOCAL_APIC
-		set_bit(i, used_vectors);
+		set_bit(i, system_vectors);
 		set_intr_gate(i, spurious_interrupt);
 #else
 		entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
@@ -358,7 +358,7 @@ void idt_invalidate(void *addr)
 
 void __init update_intr_gate(unsigned int n, const void *addr)
 {
-	if (WARN_ON_ONCE(!test_bit(n, used_vectors)))
+	if (WARN_ON_ONCE(!test_bit(n, system_vectors)))
 		return;
 	set_intr_gate(n, addr);
 }
@@ -366,6 +366,6 @@ void __init update_intr_gate(unsigned int n, const void *addr)
 void alloc_intr_gate(unsigned int n, const void *addr)
 {
 	BUG_ON(n < FIRST_SYSTEM_VECTOR);
-	if (!test_and_set_bit(n, used_vectors))
+	if (!test_and_set_bit(n, system_vectors))
 		set_intr_gate(n, addr);
 }
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 52089c043160..188990c3a514 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -134,7 +134,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	seq_puts(p, "  Machine check polls\n");
 #endif
 #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
-	if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) {
+	if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
 		seq_printf(p, "%*s: ", prec, "HYP");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ",
@@ -416,7 +416,7 @@ int check_irq_vectors_for_cpu_disable(void)
 		 */
 		for (vector = FIRST_EXTERNAL_VECTOR;
 		     vector < FIRST_SYSTEM_VECTOR; vector++) {
-			if (!test_bit(vector, used_vectors) &&
+			if (!test_bit(vector, system_vectors) &&
 			    IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) {
 				if (++count == this_count)
 					return 0;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 34ea3651362e..321240f712e1 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -71,7 +71,7 @@
 #include <asm/proto.h>
 #endif
 
-DECLARE_BITMAP(used_vectors, NR_VECTORS);
+DECLARE_BITMAP(system_vectors, NR_VECTORS);
 
 static inline void cond_local_irq_enable(struct pt_regs *regs)
 {

From fdba46ffb4c203b6e6794163493fd310f98bb4be Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:27 +0200
Subject: [PATCH 25/60] x86/apic: Get rid of multi CPU affinity

Setting the interrupt affinity of a single interrupt to multiple CPUs has a
dubious value.

 1) This only works on machines where the APIC uses logical destination
    mode. If the APIC uses physical destination mode then it is already
    restricted to a single CPU

 2) Experiments have shown, that the benefit of multi CPU affinity is close
    to zero and in some test even worse than setting the affinity to a
    single CPU.

    The reason for this is that the delivery targets the APIC with the
    lowest ID first and only if that APIC is busy (servicing an interrupt,
    i.e. ISR is not empty) it hands it over to the next APIC. In the
    conducted tests the vast majority of interrupts ends up on the APIC
    with the lowest ID anyway, so there is no natural spreading of the
    interrupts possible.

Supporting multi CPU affinities adds a lot of complexity to the code, which
can turn the allocation search into a worst case of

    nr_vectors * nr_online_cpus * nr_bits_in_target_mask

As a first step disable it by restricting the vector search to a single
CPU.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213154.228824430@linutronix.de
---
 arch/x86/kernel/apic/vector.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 67d20ee60e33..93edc2236282 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -136,8 +136,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	while (cpu < nr_cpu_ids) {
 		int new_cpu, offset;
 
-		/* Get the possible target cpus for @mask/@cpu from the apic */
-		apic->vector_allocation_domain(cpu, vector_cpumask, mask);
+		cpumask_copy(vector_cpumask, cpumask_of(cpu));
 
 		/*
 		 * Clear the offline cpus from @vector_cpumask for searching
@@ -367,17 +366,11 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 		irq_data->chip = &lapic_controller;
 		irq_data->chip_data = data;
 		irq_data->hwirq = virq + i;
+		irqd_set_single_target(irq_data);
 		err = assign_irq_vector_policy(virq + i, node, data, info,
 					       irq_data);
 		if (err)
 			goto error;
-		/*
-		 * If the apic destination mode is physical, then the
-		 * effective affinity is restricted to a single target
-		 * CPU. Mark the interrupt accordingly.
-		 */
-		if (!apic->irq_dest_mode)
-			irqd_set_single_target(irq_data);
 	}
 
 	return 0;
@@ -434,7 +427,7 @@ static void __init init_legacy_irqs(void)
 		BUG_ON(!data);
 
 		data->cfg.vector = ISA_IRQ_VECTOR(i);
-		cpumask_setall(data->domain);
+		cpumask_copy(data->domain, cpumask_of(0));
 		irq_set_chip_data(i, data);
 	}
 }

From ef9e56d894eab99a33a06b96ba8057afa67d3702 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:28 +0200
Subject: [PATCH 26/60] x86/ioapic: Remove obsolete post hotplug update

With single CPU affinities the post SMP boot vector update is pointless as
it will just leave the affinities on the same vectors and the same CPUs.

Remove it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213154.308697243@linutronix.de
---
 arch/x86/include/asm/io_apic.h |  2 --
 arch/x86/kernel/apic/io_apic.c | 42 ----------------------------------
 arch/x86/kernel/smpboot.c      |  1 -
 3 files changed, 45 deletions(-)

diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 6cbf2cfb3f8a..731c686de37c 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -192,7 +192,6 @@ static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
 extern void setup_IO_APIC(void);
 extern void enable_IO_APIC(void);
 extern void disable_IO_APIC(void);
-extern void setup_ioapic_dest(void);
 extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin);
 extern void print_IO_APICs(void);
 #else  /* !CONFIG_X86_IO_APIC */
@@ -232,7 +231,6 @@ static inline void io_apic_init_mappings(void) { }
 
 static inline void setup_IO_APIC(void) { }
 static inline void enable_IO_APIC(void) { }
-static inline void setup_ioapic_dest(void) { }
 
 #endif
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index d8c75d61f766..81f35ae3f884 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2556,48 +2556,6 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
  * This function updates target affinity of IOAPIC interrupts to include
  * the CPUs which came online during SMP bringup.
  */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
-	int pin, ioapic, irq, irq_entry;
-	const struct cpumask *mask;
-	struct irq_desc *desc;
-	struct irq_data *idata;
-	struct irq_chip *chip;
-
-	if (skip_ioapic_setup == 1)
-		return;
-
-	for_each_ioapic_pin(ioapic, pin) {
-		irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-		if (irq_entry == -1)
-			continue;
-
-		irq = pin_2_irq(irq_entry, ioapic, pin, 0);
-		if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
-			continue;
-
-		desc = irq_to_desc(irq);
-		raw_spin_lock_irq(&desc->lock);
-		idata = irq_desc_get_irq_data(desc);
-
-		/*
-		 * Honour affinities which have been set in early boot
-		 */
-		if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
-			mask = irq_data_get_affinity_mask(idata);
-		else
-			mask = irq_default_affinity;
-
-		chip = irq_data_get_irq_chip(idata);
-		/* Might be lapic_chip for irq 0 */
-		if (chip->irq_set_affinity)
-			chip->irq_set_affinity(idata, mask, false);
-		raw_spin_unlock_irq(&desc->lock);
-	}
-}
-#endif
-
 #define IOAPIC_RESOURCE_NAME_SIZE 11
 
 static struct resource *ioapic_resources;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 81652e3b8c17..d8cef3222887 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1360,7 +1360,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 
 	nmi_selftest();
 	impress_friends();
-	setup_ioapic_dest();
 	mtrr_aps_init();
 }
 

From f0cc6ccaf7ba42a1247fe5a9244b6009a3beddd5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:29 +0200
Subject: [PATCH 27/60] x86/vector: Simplify the CPU hotplug vector update

With single CPU affinities it's not longer required to scan all interrupts
for potential destination masks which contain the newly booting CPU.

Reduce it to install the active legacy PIC vectors on the newly booting CPU
as those cannot be affinity controlled by the kernel and potentially end up
at any CPU in the system.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213154.388040204@linutronix.de
---
 arch/x86/kernel/apic/vector.c | 64 +++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 93edc2236282..b45020364cc0 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -459,33 +459,32 @@ int __init arch_early_irq_init(void)
 	return arch_early_ioapic_init();
 }
 
-/* Initialize vector_irq on a new cpu */
-static void __setup_vector_irq(int cpu)
+/* Temporary hack to keep things working */
+static void vector_update_shutdown_irqs(void)
 {
-	struct apic_chip_data *data;
 	struct irq_desc *desc;
-	int irq, vector;
+	int irq;
 
-	/* Mark the inuse vectors */
 	for_each_irq_desc(irq, desc) {
-		struct irq_data *idata = irq_desc_get_irq_data(desc);
+		struct irq_data *irqd = irq_desc_get_irq_data(desc);
+		struct apic_chip_data *ad = apic_chip_data(irqd);
 
-		data = apic_chip_data(idata);
-		if (!data || !cpumask_test_cpu(cpu, data->domain))
-			continue;
-		vector = data->cfg.vector;
-		per_cpu(vector_irq, cpu)[vector] = desc;
+		if (ad && cpumask_test_cpu(cpu, ad->domain) && ad->cfg.vector)
+			this_cpu_write(vector_irq[ad->cfg.vector], desc);
 	}
-	/* Mark the free vectors */
-	for (vector = 0; vector < NR_VECTORS; ++vector) {
-		desc = per_cpu(vector_irq, cpu)[vector];
-		if (IS_ERR_OR_NULL(desc))
-			continue;
+}
 
-		data = apic_chip_data(irq_desc_get_irq_data(desc));
-		if (!cpumask_test_cpu(cpu, data->domain))
-			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
-	}
+static struct irq_desc *__setup_vector_irq(int vector)
+{
+	int isairq = vector - ISA_IRQ_VECTOR(0);
+
+	/* Check whether the irq is in the legacy space */
+	if (isairq < 0 || isairq >= nr_legacy_irqs())
+		return VECTOR_UNUSED;
+	/* Check whether the irq is handled by the IOAPIC */
+	if (test_bit(isairq, &io_apic_irqs))
+		return VECTOR_UNUSED;
+	return irq_to_desc(isairq);
 }
 
 /*
@@ -493,20 +492,27 @@ static void __setup_vector_irq(int cpu)
  */
 void setup_vector_irq(int cpu)
 {
-	int irq;
+	unsigned int vector;
 
 	lockdep_assert_held(&vector_lock);
 	/*
-	 * On most of the platforms, legacy PIC delivers the interrupts on the
-	 * boot cpu. But there are certain platforms where PIC interrupts are
-	 * delivered to multiple cpu's. If the legacy IRQ is handled by the
-	 * legacy PIC, for the new cpu that is coming online, setup the static
-	 * legacy vector to irq mapping:
+	 * The interrupt affinity logic never targets interrupts to offline
+	 * CPUs. The exception are the legacy PIC interrupts. In general
+	 * they are only targeted to CPU0, but depending on the platform
+	 * they can be distributed to any online CPU in hardware. The
+	 * kernel has no influence on that. So all active legacy vectors
+	 * must be installed on all CPUs. All non legacy interrupts can be
+	 * cleared.
 	 */
-	for (irq = 0; irq < nr_legacy_irqs(); irq++)
-		per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq_to_desc(irq);
+	for (vector = 0; vector < NR_VECTORS; vector++)
+		this_cpu_write(vector_irq[vector], __setup_vector_irq(vector));
 
-	__setup_vector_irq(cpu);
+	/*
+	 * Until the rewrite of the managed interrupt management is in
+	 * place it's necessary to walk the irq descriptors and check for
+	 * interrupts which are targeted at this CPU.
+	 */
+	vector_update_shutdown_irqs();
 }
 
 static int apic_retrigger_irq(struct irq_data *irq_data)

From 86ba65514f8730d58e2c11fb6e25caa537d6bc93 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:30 +0200
Subject: [PATCH 28/60] x86/vector: Cleanup variable names

The naming convention of variables with the types irq_data and
apic_chip_data are inconsistent and confusing.

Before reworking the whole vector management make them consistent so
irq_data pointers are named 'irqd' and apic_chip_data are named 'apicd' all
over the place.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213154.465731667@linutronix.de
---
 arch/x86/kernel/apic/vector.c | 228 +++++++++++++++++-----------------
 1 file changed, 114 insertions(+), 114 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index b45020364cc0..a7f7c3730a09 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -50,22 +50,22 @@ void unlock_vector_lock(void)
 	raw_spin_unlock(&vector_lock);
 }
 
-static struct apic_chip_data *apic_chip_data(struct irq_data *irq_data)
+static struct apic_chip_data *apic_chip_data(struct irq_data *irqd)
 {
-	if (!irq_data)
+	if (!irqd)
 		return NULL;
 
-	while (irq_data->parent_data)
-		irq_data = irq_data->parent_data;
+	while (irqd->parent_data)
+		irqd = irqd->parent_data;
 
-	return irq_data->chip_data;
+	return irqd->chip_data;
 }
 
-struct irq_cfg *irqd_cfg(struct irq_data *irq_data)
+struct irq_cfg *irqd_cfg(struct irq_data *irqd)
 {
-	struct apic_chip_data *data = apic_chip_data(irq_data);
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
 
-	return data ? &data->cfg : NULL;
+	return apicd ? &apicd->cfg : NULL;
 }
 EXPORT_SYMBOL_GPL(irqd_cfg);
 
@@ -76,35 +76,35 @@ struct irq_cfg *irq_cfg(unsigned int irq)
 
 static struct apic_chip_data *alloc_apic_chip_data(int node)
 {
-	struct apic_chip_data *data;
+	struct apic_chip_data *apicd;
 
-	data = kzalloc_node(sizeof(*data), GFP_KERNEL, node);
-	if (!data)
+	apicd = kzalloc_node(sizeof(*apicd), GFP_KERNEL, node);
+	if (!apicd)
 		return NULL;
-	if (!zalloc_cpumask_var_node(&data->domain, GFP_KERNEL, node))
+	if (!zalloc_cpumask_var_node(&apicd->domain, GFP_KERNEL, node))
 		goto out_data;
-	if (!zalloc_cpumask_var_node(&data->old_domain, GFP_KERNEL, node))
+	if (!zalloc_cpumask_var_node(&apicd->old_domain, GFP_KERNEL, node))
 		goto out_domain;
-	return data;
+	return apicd;
 out_domain:
-	free_cpumask_var(data->domain);
+	free_cpumask_var(apicd->domain);
 out_data:
-	kfree(data);
+	kfree(apicd);
 	return NULL;
 }
 
-static void free_apic_chip_data(struct apic_chip_data *data)
+static void free_apic_chip_data(struct apic_chip_data *apicd)
 {
-	if (data) {
-		free_cpumask_var(data->domain);
-		free_cpumask_var(data->old_domain);
-		kfree(data);
+	if (apicd) {
+		free_cpumask_var(apicd->domain);
+		free_cpumask_var(apicd->old_domain);
+		kfree(apicd);
 	}
 }
 
 static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 			       const struct cpumask *mask,
-			       struct irq_data *irqdata)
+			       struct irq_data *irqd)
 {
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -226,62 +226,62 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	 * cpus masked out.
 	 */
 	cpumask_and(vector_searchmask, vector_searchmask, mask);
-	BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, irqdata,
+	BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, irqd,
 					&d->cfg.dest_apicid));
 	return 0;
 }
 
-static int assign_irq_vector(int irq, struct apic_chip_data *data,
+static int assign_irq_vector(int irq, struct apic_chip_data *apicd,
 			     const struct cpumask *mask,
-			     struct irq_data *irqdata)
+			     struct irq_data *irqd)
 {
 	int err;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
-	err = __assign_irq_vector(irq, data, mask, irqdata);
+	err = __assign_irq_vector(irq, apicd, mask, irqd);
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
 	return err;
 }
 
 static int assign_irq_vector_policy(int irq, int node,
-				    struct apic_chip_data *data,
+				    struct apic_chip_data *apicd,
 				    struct irq_alloc_info *info,
-				    struct irq_data *irqdata)
+				    struct irq_data *irqd)
 {
 	if (info && info->mask)
-		return assign_irq_vector(irq, data, info->mask, irqdata);
+		return assign_irq_vector(irq, apicd, info->mask, irqd);
 	if (node != NUMA_NO_NODE &&
-	    assign_irq_vector(irq, data, cpumask_of_node(node), irqdata) == 0)
+	    assign_irq_vector(irq, apicd, cpumask_of_node(node), irqd) == 0)
 		return 0;
-	return assign_irq_vector(irq, data, cpu_online_mask, irqdata);
+	return assign_irq_vector(irq, apicd, cpu_online_mask, irqd);
 }
 
-static void clear_irq_vector(int irq, struct apic_chip_data *data)
+static void clear_irq_vector(int irq, struct apic_chip_data *apicd)
 {
 	struct irq_desc *desc;
 	int cpu, vector;
 
-	if (!data->cfg.vector)
+	if (!apicd->cfg.vector)
 		return;
 
-	vector = data->cfg.vector;
-	for_each_cpu_and(cpu, data->domain, cpu_online_mask)
+	vector = apicd->cfg.vector;
+	for_each_cpu_and(cpu, apicd->domain, cpu_online_mask)
 		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
 
-	data->cfg.vector = 0;
-	cpumask_clear(data->domain);
+	apicd->cfg.vector = 0;
+	cpumask_clear(apicd->domain);
 
 	/*
 	 * If move is in progress or the old_domain mask is not empty,
 	 * i.e. the cleanup IPI has not been processed yet, we need to remove
 	 * the old references to desc from all cpus vector tables.
 	 */
-	if (!data->move_in_progress && cpumask_empty(data->old_domain))
+	if (!apicd->move_in_progress && cpumask_empty(apicd->old_domain))
 		return;
 
 	desc = irq_to_desc(irq);
-	for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
+	for_each_cpu_and(cpu, apicd->old_domain, cpu_online_mask) {
 		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
 		     vector++) {
 			if (per_cpu(vector_irq, cpu)[vector] != desc)
@@ -290,7 +290,7 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
 			break;
 		}
 	}
-	data->move_in_progress = 0;
+	apicd->move_in_progress = 0;
 }
 
 void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -311,20 +311,20 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
 static void x86_vector_free_irqs(struct irq_domain *domain,
 				 unsigned int virq, unsigned int nr_irqs)
 {
-	struct apic_chip_data *apic_data;
-	struct irq_data *irq_data;
+	struct apic_chip_data *apicd;
+	struct irq_data *irqd;
 	unsigned long flags;
 	int i;
 
 	for (i = 0; i < nr_irqs; i++) {
-		irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
-		if (irq_data && irq_data->chip_data) {
+		irqd = irq_domain_get_irq_data(x86_vector_domain, virq + i);
+		if (irqd && irqd->chip_data) {
 			raw_spin_lock_irqsave(&vector_lock, flags);
-			clear_irq_vector(virq + i, irq_data->chip_data);
-			apic_data = irq_data->chip_data;
-			irq_domain_reset_irq_data(irq_data);
+			clear_irq_vector(virq + i, irqd->chip_data);
+			apicd = irqd->chip_data;
+			irq_domain_reset_irq_data(irqd);
 			raw_spin_unlock_irqrestore(&vector_lock, flags);
-			free_apic_chip_data(apic_data);
+			free_apic_chip_data(apicd);
 #ifdef	CONFIG_X86_IO_APIC
 			if (virq + i < nr_legacy_irqs())
 				legacy_irq_data[virq + i] = NULL;
@@ -337,8 +337,8 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 				 unsigned int nr_irqs, void *arg)
 {
 	struct irq_alloc_info *info = arg;
-	struct apic_chip_data *data;
-	struct irq_data *irq_data;
+	struct apic_chip_data *apicd;
+	struct irq_data *irqd;
 	int i, err, node;
 
 	if (disable_apic)
@@ -349,26 +349,26 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 		return -ENOSYS;
 
 	for (i = 0; i < nr_irqs; i++) {
-		irq_data = irq_domain_get_irq_data(domain, virq + i);
-		BUG_ON(!irq_data);
-		node = irq_data_get_node(irq_data);
+		irqd = irq_domain_get_irq_data(domain, virq + i);
+		BUG_ON(!irqd);
+		node = irq_data_get_node(irqd);
 #ifdef	CONFIG_X86_IO_APIC
 		if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i])
-			data = legacy_irq_data[virq + i];
+			apicd = legacy_irq_data[virq + i];
 		else
 #endif
-			data = alloc_apic_chip_data(node);
-		if (!data) {
+			apicd = alloc_apic_chip_data(node);
+		if (!apicd) {
 			err = -ENOMEM;
 			goto error;
 		}
 
-		irq_data->chip = &lapic_controller;
-		irq_data->chip_data = data;
-		irq_data->hwirq = virq + i;
-		irqd_set_single_target(irq_data);
-		err = assign_irq_vector_policy(virq + i, node, data, info,
-					       irq_data);
+		irqd->chip = &lapic_controller;
+		irqd->chip_data = apicd;
+		irqd->hwirq = virq + i;
+		irqd_set_single_target(irqd);
+		err = assign_irq_vector_policy(virq + i, node, apicd, info,
+					       irqd);
 		if (err)
 			goto error;
 	}
@@ -416,19 +416,19 @@ int __init arch_probe_nr_irqs(void)
 static void __init init_legacy_irqs(void)
 {
 	int i, node = cpu_to_node(0);
-	struct apic_chip_data *data;
+	struct apic_chip_data *apicd;
 
 	/*
 	 * For legacy IRQ's, start with assigning irq0 to irq15 to
 	 * ISA_IRQ_VECTOR(i) for all cpu's.
 	 */
 	for (i = 0; i < nr_legacy_irqs(); i++) {
-		data = legacy_irq_data[i] = alloc_apic_chip_data(node);
-		BUG_ON(!data);
+		apicd = legacy_irq_data[i] = alloc_apic_chip_data(node);
+		BUG_ON(!apicd);
 
-		data->cfg.vector = ISA_IRQ_VECTOR(i);
-		cpumask_copy(data->domain, cpumask_of(0));
-		irq_set_chip_data(i, data);
+		apicd->cfg.vector = ISA_IRQ_VECTOR(i);
+		cpumask_copy(apicd->domain, cpumask_of(0));
+		irq_set_chip_data(i, apicd);
 	}
 }
 #else
@@ -515,32 +515,32 @@ void setup_vector_irq(int cpu)
 	vector_update_shutdown_irqs();
 }
 
-static int apic_retrigger_irq(struct irq_data *irq_data)
+static int apic_retrigger_irq(struct irq_data *irqd)
 {
-	struct apic_chip_data *data = apic_chip_data(irq_data);
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
 	unsigned long flags;
 	int cpu;
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
-	cpu = cpumask_first_and(data->domain, cpu_online_mask);
-	apic->send_IPI_mask(cpumask_of(cpu), data->cfg.vector);
+	cpu = cpumask_first_and(apicd->domain, cpu_online_mask);
+	apic->send_IPI_mask(cpumask_of(cpu), apicd->cfg.vector);
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
 
 	return 1;
 }
 
-void apic_ack_edge(struct irq_data *data)
+void apic_ack_edge(struct irq_data *irqd)
 {
-	irq_complete_move(irqd_cfg(data));
-	irq_move_irq(data);
+	irq_complete_move(irqd_cfg(irqd));
+	irq_move_irq(irqd);
 	ack_APIC_irq();
 }
 
-static int apic_set_affinity(struct irq_data *irq_data,
+static int apic_set_affinity(struct irq_data *irqd,
 			     const struct cpumask *dest, bool force)
 {
-	struct apic_chip_data *data = irq_data->chip_data;
-	int err, irq = irq_data->irq;
+	struct apic_chip_data *apicd = irqd->chip_data;
+	int err, irq = irqd->irq;
 
 	if (!IS_ENABLED(CONFIG_SMP))
 		return -EPERM;
@@ -548,7 +548,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
 	if (!cpumask_intersects(dest, cpu_online_mask))
 		return -EINVAL;
 
-	err = assign_irq_vector(irq, data, dest, irq_data);
+	err = assign_irq_vector(irq, apicd, dest, irqd);
 	return err ? err : IRQ_SET_MASK_OK;
 }
 
@@ -560,23 +560,23 @@ static struct irq_chip lapic_controller = {
 };
 
 #ifdef CONFIG_SMP
-static void __send_cleanup_vector(struct apic_chip_data *data)
+static void __send_cleanup_vector(struct apic_chip_data *apicd)
 {
 	raw_spin_lock(&vector_lock);
-	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
-	data->move_in_progress = 0;
-	if (!cpumask_empty(data->old_domain))
-		apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
+	cpumask_and(apicd->old_domain, apicd->old_domain, cpu_online_mask);
+	apicd->move_in_progress = 0;
+	if (!cpumask_empty(apicd->old_domain))
+		apic->send_IPI_mask(apicd->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
 	raw_spin_unlock(&vector_lock);
 }
 
 void send_cleanup_vector(struct irq_cfg *cfg)
 {
-	struct apic_chip_data *data;
+	struct apic_chip_data *apicd;
 
-	data = container_of(cfg, struct apic_chip_data, cfg);
-	if (data->move_in_progress)
-		__send_cleanup_vector(data);
+	apicd = container_of(cfg, struct apic_chip_data, cfg);
+	if (apicd->move_in_progress)
+		__send_cleanup_vector(apicd);
 }
 
 asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
@@ -590,7 +590,7 @@ asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
 
 	me = smp_processor_id();
 	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		struct apic_chip_data *data;
+		struct apic_chip_data *apicd;
 		struct irq_desc *desc;
 		unsigned int irr;
 
@@ -606,16 +606,16 @@ asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
 			goto retry;
 		}
 
-		data = apic_chip_data(irq_desc_get_irq_data(desc));
-		if (!data)
+		apicd = apic_chip_data(irq_desc_get_irq_data(desc));
+		if (!apicd)
 			goto unlock;
 
 		/*
 		 * Nothing to cleanup if irq migration is in progress
 		 * or this cpu is not set in the cleanup mask.
 		 */
-		if (data->move_in_progress ||
-		    !cpumask_test_cpu(me, data->old_domain))
+		if (apicd->move_in_progress ||
+		    !cpumask_test_cpu(me, apicd->old_domain))
 			goto unlock;
 
 		/*
@@ -630,8 +630,8 @@ asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
 		 * this cpu is part of the target mask. We better leave that
 		 * one alone.
 		 */
-		if (vector == data->cfg.vector &&
-		    cpumask_test_cpu(me, data->domain))
+		if (vector == apicd->cfg.vector &&
+		    cpumask_test_cpu(me, apicd->domain))
 			goto unlock;
 
 		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
@@ -647,7 +647,7 @@ asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
 			goto unlock;
 		}
 		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
-		cpumask_clear_cpu(me, data->old_domain);
+		cpumask_clear_cpu(me, apicd->old_domain);
 unlock:
 		raw_spin_unlock(&desc->lock);
 	}
@@ -660,15 +660,15 @@ asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
 static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
 {
 	unsigned me;
-	struct apic_chip_data *data;
+	struct apic_chip_data *apicd;
 
-	data = container_of(cfg, struct apic_chip_data, cfg);
-	if (likely(!data->move_in_progress))
+	apicd = container_of(cfg, struct apic_chip_data, cfg);
+	if (likely(!apicd->move_in_progress))
 		return;
 
 	me = smp_processor_id();
-	if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain))
-		__send_cleanup_vector(data);
+	if (vector == apicd->cfg.vector && cpumask_test_cpu(me, apicd->domain))
+		__send_cleanup_vector(apicd);
 }
 
 void irq_complete_move(struct irq_cfg *cfg)
@@ -681,8 +681,8 @@ void irq_complete_move(struct irq_cfg *cfg)
  */
 void irq_force_complete_move(struct irq_desc *desc)
 {
-	struct irq_data *irqdata;
-	struct apic_chip_data *data;
+	struct irq_data *irqd;
+	struct apic_chip_data *apicd;
 	struct irq_cfg *cfg;
 	unsigned int cpu;
 
@@ -695,13 +695,13 @@ void irq_force_complete_move(struct irq_desc *desc)
 	 * Check first that the chip_data is what we expect
 	 * (apic_chip_data) before touching it any further.
 	 */
-	irqdata = irq_domain_get_irq_data(x86_vector_domain,
+	irqd = irq_domain_get_irq_data(x86_vector_domain,
 					  irq_desc_get_irq(desc));
-	if (!irqdata)
+	if (!irqd)
 		return;
 
-	data = apic_chip_data(irqdata);
-	cfg = data ? &data->cfg : NULL;
+	apicd = apic_chip_data(irqd);
+	cfg = apicd ? &apicd->cfg : NULL;
 
 	if (!cfg)
 		return;
@@ -719,14 +719,14 @@ void irq_force_complete_move(struct irq_desc *desc)
 	 * Clean out all offline cpus (including the outgoing one) from the
 	 * old_domain mask.
 	 */
-	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
+	cpumask_and(apicd->old_domain, apicd->old_domain, cpu_online_mask);
 
 	/*
 	 * If move_in_progress is cleared and the old_domain mask is empty,
 	 * then there is nothing to cleanup. fixup_irqs() will take care of
 	 * the stale vectors on the outgoing cpu.
 	 */
-	if (!data->move_in_progress && cpumask_empty(data->old_domain)) {
+	if (!apicd->move_in_progress && cpumask_empty(apicd->old_domain)) {
 		raw_spin_unlock(&vector_lock);
 		return;
 	}
@@ -739,7 +739,7 @@ void irq_force_complete_move(struct irq_desc *desc)
 	 * 2) The interrupt has fired on the new vector, but the cleanup IPIs
 	 *    have not been processed yet.
 	 */
-	if (data->move_in_progress) {
+	if (apicd->move_in_progress) {
 		/*
 		 * In theory there is a race:
 		 *
@@ -773,18 +773,18 @@ void irq_force_complete_move(struct irq_desc *desc)
 		 * area arises.
 		 */
 		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
-			irqdata->irq, cfg->old_vector);
+			irqd->irq, cfg->old_vector);
 	}
 	/*
 	 * If old_domain is not empty, then other cpus still have the irq
 	 * descriptor set in their vector array. Clean it up.
 	 */
-	for_each_cpu(cpu, data->old_domain)
+	for_each_cpu(cpu, apicd->old_domain)
 		per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED;
 
 	/* Cleanup the left overs of the (half finished) move */
-	cpumask_clear(data->old_domain);
-	data->move_in_progress = 0;
+	cpumask_clear(apicd->old_domain);
+	apicd->move_in_progress = 0;
 	raw_spin_unlock(&vector_lock);
 }
 #endif

From 029c6e1c9df776fe1b2ba756a28fb65e9f9e9f69 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:31 +0200
Subject: [PATCH 29/60] x86/vector: Store the single CPU targets in apic data

Now that the interrupt affinities are targeted at single CPUs storing them
in a cpumask is overkill. Store them in a dedicated variable.

This does not yet remove the domain cpumasks because the current allocator
relies on them. Preparatory change for the allocator rework.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213154.544867277@linutronix.de
---
 arch/x86/kernel/apic/vector.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index a7f7c3730a09..7a9e0c6dd756 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -23,6 +23,8 @@
 
 struct apic_chip_data {
 	struct irq_cfg		cfg;
+	unsigned int		cpu;
+	unsigned int		prev_cpu;
 	cpumask_var_t		domain;
 	cpumask_var_t		old_domain;
 	u8			move_in_progress : 1;
@@ -214,6 +216,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
 	d->move_in_progress = !cpumask_empty(d->old_domain);
 	d->cfg.old_vector = d->move_in_progress ? d->cfg.vector : 0;
+	d->prev_cpu = d->cpu;
 	d->cfg.vector = vector;
 	cpumask_copy(d->domain, vector_cpumask);
 success:
@@ -228,6 +231,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	cpumask_and(vector_searchmask, vector_searchmask, mask);
 	BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, irqd,
 					&d->cfg.dest_apicid));
+	d->cpu = cpumask_first(vector_searchmask);
 	return 0;
 }
 
@@ -428,6 +432,7 @@ static void __init init_legacy_irqs(void)
 
 		apicd->cfg.vector = ISA_IRQ_VECTOR(i);
 		cpumask_copy(apicd->domain, cpumask_of(0));
+		apicd->cpu = 0;
 		irq_set_chip_data(i, apicd);
 	}
 }

From dccfe3147b42b78458ab8e4440822c805ee76d72 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:32 +0200
Subject: [PATCH 30/60] x86/vector: Simplify vector move cleanup

The vector move cleanup needs to walk the vector space and do a lot of
sanity checks to find a vector to cleanup.

With single CPU affinities this can be simplified and made more robust by
queueing the vector configuration which needs to be cleaned up in a hlist
on the CPU which was the previous target.

That removes all the race conditions because the cleanup either finds a
valid list entry or not. The latter happens when the interrupt was torn
down before the cleanup handler was able to run.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213154.622727892@linutronix.de
---
 arch/x86/kernel/apic/vector.c | 233 ++++++++++++----------------------
 1 file changed, 83 insertions(+), 150 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 7a9e0c6dd756..68f885913927 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -25,6 +25,7 @@ struct apic_chip_data {
 	struct irq_cfg		cfg;
 	unsigned int		cpu;
 	unsigned int		prev_cpu;
+	struct hlist_node	clist;
 	cpumask_var_t		domain;
 	cpumask_var_t		old_domain;
 	u8			move_in_progress : 1;
@@ -38,6 +39,9 @@ static struct irq_chip lapic_controller;
 #ifdef	CONFIG_X86_IO_APIC
 static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
 #endif
+#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct hlist_head, cleanup_list);
+#endif
 
 void lock_vector_lock(void)
 {
@@ -87,6 +91,7 @@ static struct apic_chip_data *alloc_apic_chip_data(int node)
 		goto out_data;
 	if (!zalloc_cpumask_var_node(&apicd->old_domain, GFP_KERNEL, node))
 		goto out_domain;
+	INIT_HLIST_NODE(&apicd->clist);
 	return apicd;
 out_domain:
 	free_cpumask_var(apicd->domain);
@@ -127,8 +132,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	 * If there is still a move in progress or the previous move has not
 	 * been cleaned up completely, tell the caller to come back later.
 	 */
-	if (d->move_in_progress ||
-	    cpumask_intersects(d->old_domain, cpu_online_mask))
+	if (d->cfg.old_vector)
 		return -EBUSY;
 
 	/* Only try and allocate irqs on cpus that are present */
@@ -263,38 +267,22 @@ static int assign_irq_vector_policy(int irq, int node,
 
 static void clear_irq_vector(int irq, struct apic_chip_data *apicd)
 {
-	struct irq_desc *desc;
-	int cpu, vector;
+	unsigned int vector = apicd->cfg.vector;
 
-	if (!apicd->cfg.vector)
+	if (!vector)
 		return;
 
-	vector = apicd->cfg.vector;
-	for_each_cpu_and(cpu, apicd->domain, cpu_online_mask)
-		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
-
+	per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
 	apicd->cfg.vector = 0;
-	cpumask_clear(apicd->domain);
 
-	/*
-	 * If move is in progress or the old_domain mask is not empty,
-	 * i.e. the cleanup IPI has not been processed yet, we need to remove
-	 * the old references to desc from all cpus vector tables.
-	 */
-	if (!apicd->move_in_progress && cpumask_empty(apicd->old_domain))
+	/* Clean up move in progress */
+	vector = apicd->cfg.old_vector;
+	if (!vector)
 		return;
 
-	desc = irq_to_desc(irq);
-	for_each_cpu_and(cpu, apicd->old_domain, cpu_online_mask) {
-		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
-		     vector++) {
-			if (per_cpu(vector_irq, cpu)[vector] != desc)
-				continue;
-			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
-			break;
-		}
-	}
+	per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
 	apicd->move_in_progress = 0;
+	hlist_del_init(&apicd->clist);
 }
 
 void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -474,7 +462,7 @@ static void vector_update_shutdown_irqs(void)
 		struct irq_data *irqd = irq_desc_get_irq_data(desc);
 		struct apic_chip_data *ad = apic_chip_data(irqd);
 
-		if (ad && cpumask_test_cpu(cpu, ad->domain) && ad->cfg.vector)
+		if (ad && ad->cfg.vector && ad->cpu == smp_processor_id())
 			this_cpu_write(vector_irq[ad->cfg.vector], desc);
 	}
 }
@@ -524,11 +512,9 @@ static int apic_retrigger_irq(struct irq_data *irqd)
 {
 	struct apic_chip_data *apicd = apic_chip_data(irqd);
 	unsigned long flags;
-	int cpu;
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
-	cpu = cpumask_first_and(apicd->domain, cpu_online_mask);
-	apic->send_IPI_mask(cpumask_of(cpu), apicd->cfg.vector);
+	apic->send_IPI(apicd->cpu, apicd->cfg.vector);
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
 
 	return 1;
@@ -565,13 +551,56 @@ static struct irq_chip lapic_controller = {
 };
 
 #ifdef CONFIG_SMP
+
+asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
+{
+	struct hlist_head *clhead = this_cpu_ptr(&cleanup_list);
+	struct apic_chip_data *apicd;
+	struct hlist_node *tmp;
+
+	entering_ack_irq();
+	/* Prevent vectors vanishing under us */
+	raw_spin_lock(&vector_lock);
+
+	hlist_for_each_entry_safe(apicd, tmp, clhead, clist) {
+		unsigned int irr, vector = apicd->cfg.old_vector;
+
+		/*
+		 * Paranoia: Check if the vector that needs to be cleaned
+		 * up is registered at the APICs IRR. If so, then this is
+		 * not the best time to clean it up. Clean it up in the
+		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
+		 * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest
+		 * priority external vector, so on return from this
+		 * interrupt the device interrupt will happen first.
+		 */
+		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+		if (irr & (1U << (vector % 32))) {
+			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
+			continue;
+		}
+		hlist_del_init(&apicd->clist);
+		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+		apicd->cfg.old_vector = 0;
+	}
+
+	raw_spin_unlock(&vector_lock);
+	exiting_irq();
+}
+
 static void __send_cleanup_vector(struct apic_chip_data *apicd)
 {
+	unsigned int cpu;
+
 	raw_spin_lock(&vector_lock);
-	cpumask_and(apicd->old_domain, apicd->old_domain, cpu_online_mask);
 	apicd->move_in_progress = 0;
-	if (!cpumask_empty(apicd->old_domain))
-		apic->send_IPI_mask(apicd->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
+	cpu = apicd->prev_cpu;
+	if (cpu_online(cpu)) {
+		hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
+		apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
+	} else {
+		apicd->cfg.old_vector = 0;
+	}
 	raw_spin_unlock(&vector_lock);
 }
 
@@ -584,95 +613,15 @@ void send_cleanup_vector(struct irq_cfg *cfg)
 		__send_cleanup_vector(apicd);
 }
 
-asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
-{
-	unsigned vector, me;
-
-	entering_ack_irq();
-
-	/* Prevent vectors vanishing under us */
-	raw_spin_lock(&vector_lock);
-
-	me = smp_processor_id();
-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		struct apic_chip_data *apicd;
-		struct irq_desc *desc;
-		unsigned int irr;
-
-	retry:
-		desc = __this_cpu_read(vector_irq[vector]);
-		if (IS_ERR_OR_NULL(desc))
-			continue;
-
-		if (!raw_spin_trylock(&desc->lock)) {
-			raw_spin_unlock(&vector_lock);
-			cpu_relax();
-			raw_spin_lock(&vector_lock);
-			goto retry;
-		}
-
-		apicd = apic_chip_data(irq_desc_get_irq_data(desc));
-		if (!apicd)
-			goto unlock;
-
-		/*
-		 * Nothing to cleanup if irq migration is in progress
-		 * or this cpu is not set in the cleanup mask.
-		 */
-		if (apicd->move_in_progress ||
-		    !cpumask_test_cpu(me, apicd->old_domain))
-			goto unlock;
-
-		/*
-		 * We have two cases to handle here:
-		 * 1) vector is unchanged but the target mask got reduced
-		 * 2) vector and the target mask has changed
-		 *
-		 * #1 is obvious, but in #2 we have two vectors with the same
-		 * irq descriptor: the old and the new vector. So we need to
-		 * make sure that we only cleanup the old vector. The new
-		 * vector has the current @vector number in the config and
-		 * this cpu is part of the target mask. We better leave that
-		 * one alone.
-		 */
-		if (vector == apicd->cfg.vector &&
-		    cpumask_test_cpu(me, apicd->domain))
-			goto unlock;
-
-		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
-		/*
-		 * Check if the vector that needs to be cleanedup is
-		 * registered at the cpu's IRR. If so, then this is not
-		 * the best time to clean it up. Lets clean it up in the
-		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
-		 * to myself.
-		 */
-		if (irr  & (1 << (vector % 32))) {
-			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
-			goto unlock;
-		}
-		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
-		cpumask_clear_cpu(me, apicd->old_domain);
-unlock:
-		raw_spin_unlock(&desc->lock);
-	}
-
-	raw_spin_unlock(&vector_lock);
-
-	exiting_irq();
-}
-
 static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
 {
-	unsigned me;
 	struct apic_chip_data *apicd;
 
 	apicd = container_of(cfg, struct apic_chip_data, cfg);
 	if (likely(!apicd->move_in_progress))
 		return;
 
-	me = smp_processor_id();
-	if (vector == apicd->cfg.vector && cpumask_test_cpu(me, apicd->domain))
+	if (vector == apicd->cfg.vector && apicd->cpu == smp_processor_id())
 		__send_cleanup_vector(apicd);
 }
 
@@ -686,10 +635,9 @@ void irq_complete_move(struct irq_cfg *cfg)
  */
 void irq_force_complete_move(struct irq_desc *desc)
 {
-	struct irq_data *irqd;
 	struct apic_chip_data *apicd;
-	struct irq_cfg *cfg;
-	unsigned int cpu;
+	struct irq_data *irqd;
+	unsigned int vector;
 
 	/*
 	 * The function is called for all descriptors regardless of which
@@ -701,42 +649,30 @@ void irq_force_complete_move(struct irq_desc *desc)
 	 * (apic_chip_data) before touching it any further.
 	 */
 	irqd = irq_domain_get_irq_data(x86_vector_domain,
-					  irq_desc_get_irq(desc));
+				       irq_desc_get_irq(desc));
 	if (!irqd)
 		return;
 
+	raw_spin_lock(&vector_lock);
 	apicd = apic_chip_data(irqd);
-	cfg = apicd ? &apicd->cfg : NULL;
-
-	if (!cfg)
-		return;
+	if (!apicd)
+		goto unlock;
 
 	/*
-	 * This is tricky. If the cleanup of @data->old_domain has not been
+	 * If old_vector is empty, no action required.
+	 */
+	vector = apicd->cfg.old_vector;
+	if (!vector)
+		goto unlock;
+
+	/*
+	 * This is tricky. If the cleanup of the old vector has not been
 	 * done yet, then the following setaffinity call will fail with
 	 * -EBUSY. This can leave the interrupt in a stale state.
 	 *
 	 * All CPUs are stuck in stop machine with interrupts disabled so
 	 * calling __irq_complete_move() would be completely pointless.
-	 */
-	raw_spin_lock(&vector_lock);
-	/*
-	 * Clean out all offline cpus (including the outgoing one) from the
-	 * old_domain mask.
-	 */
-	cpumask_and(apicd->old_domain, apicd->old_domain, cpu_online_mask);
-
-	/*
-	 * If move_in_progress is cleared and the old_domain mask is empty,
-	 * then there is nothing to cleanup. fixup_irqs() will take care of
-	 * the stale vectors on the outgoing cpu.
-	 */
-	if (!apicd->move_in_progress && cpumask_empty(apicd->old_domain)) {
-		raw_spin_unlock(&vector_lock);
-		return;
-	}
-
-	/*
+	 *
 	 * 1) The interrupt is in move_in_progress state. That means that we
 	 *    have not seen an interrupt since the io_apic was reprogrammed to
 	 *    the new vector.
@@ -778,18 +714,15 @@ void irq_force_complete_move(struct irq_desc *desc)
 		 * area arises.
 		 */
 		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
-			irqd->irq, cfg->old_vector);
+			irqd->irq, vector);
 	}
-	/*
-	 * If old_domain is not empty, then other cpus still have the irq
-	 * descriptor set in their vector array. Clean it up.
-	 */
-	for_each_cpu(cpu, apicd->old_domain)
-		per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED;
-
+	per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
 	/* Cleanup the left overs of the (half finished) move */
 	cpumask_clear(apicd->old_domain);
+	apicd->cfg.old_vector = 0;
 	apicd->move_in_progress = 0;
+	hlist_del_init(&apicd->clist);
+unlock:
 	raw_spin_unlock(&vector_lock);
 }
 #endif

From 3534be05e4adc303d41fae65901598695adea685 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:33 +0200
Subject: [PATCH 31/60] x86/ioapic: Mark legacy vectors at reallocation time

When the legacy PIC vectors are taken over by the IO APIC the current
vector assignement code is tricked to reuse the vector by allocating the
apic data in the early boot process. This can be avoided by marking the
allocation as legacy PIC take over. Preparatory patch for further cleanups.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213154.700501979@linutronix.de
---
 arch/x86/include/asm/irqdomain.h | 1 +
 arch/x86/kernel/apic/io_apic.c   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index 1d9091ffa140..73e9c42ce63b 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -8,6 +8,7 @@
 enum {
 	/* Allocate contiguous CPU vectors */
 	X86_IRQ_ALLOC_CONTIGUOUS_VECTORS		= 0x1,
+	X86_IRQ_ALLOC_LEGACY				= 0x2,
 };
 
 extern struct irq_domain *x86_vector_domain;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 81f35ae3f884..a4b0c60ab8e1 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1013,6 +1013,7 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain,
 					  info->ioapic_pin))
 			return -ENOMEM;
 	} else {
+		info->flags |= X86_IRQ_ALLOC_LEGACY;
 		irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true,
 					      NULL);
 		if (irq >= 0) {

From 4ef76eb6de734dc03a7f3b8f80884362364e6049 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:34 +0200
Subject: [PATCH 32/60] x86/apic: Get rid of the legacy irq data storage

Now that the legacy PIC takeover by the IOAPIC is marked accordingly the
early boot allocation of APIC data is not longer necessary. Use the regular
allocation mechansim as it is used by non legacy interrupts and fill in the
known information (vector and affinity) so the allocator reuses the vector,
This is important as the timer check might move the timer interrupt 0 back
to the PIC in case the delivery through the IOAPIC fails.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213154.780521549@linutronix.de
---
 arch/x86/kernel/apic/vector.c | 52 +++++++++--------------------------
 1 file changed, 13 insertions(+), 39 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 68f885913927..d6feb9ca8f52 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -36,9 +36,6 @@ EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
 static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
 static struct irq_chip lapic_controller;
-#ifdef	CONFIG_X86_IO_APIC
-static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
-#endif
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct hlist_head, cleanup_list);
 #endif
@@ -317,10 +314,6 @@ static void x86_vector_free_irqs(struct irq_domain *domain,
 			irq_domain_reset_irq_data(irqd);
 			raw_spin_unlock_irqrestore(&vector_lock, flags);
 			free_apic_chip_data(apicd);
-#ifdef	CONFIG_X86_IO_APIC
-			if (virq + i < nr_legacy_irqs())
-				legacy_irq_data[virq + i] = NULL;
-#endif
 		}
 	}
 }
@@ -344,12 +337,8 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 		irqd = irq_domain_get_irq_data(domain, virq + i);
 		BUG_ON(!irqd);
 		node = irq_data_get_node(irqd);
-#ifdef	CONFIG_X86_IO_APIC
-		if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i])
-			apicd = legacy_irq_data[virq + i];
-		else
-#endif
-			apicd = alloc_apic_chip_data(node);
+		WARN_ON_ONCE(irqd->chip_data);
+		apicd = alloc_apic_chip_data(node);
 		if (!apicd) {
 			err = -ENOMEM;
 			goto error;
@@ -359,6 +348,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 		irqd->chip_data = apicd;
 		irqd->hwirq = virq + i;
 		irqd_set_single_target(irqd);
+		/*
+		 * Make sure, that the legacy to IOAPIC transition stays on
+		 * the same vector. This is required for check_timer() to
+		 * work correctly as it might switch back to legacy mode.
+		 */
+		if (info->flags & X86_IRQ_ALLOC_LEGACY) {
+			apicd->cfg.vector = ISA_IRQ_VECTOR(virq + i);
+			apicd->cpu = 0;
+			cpumask_copy(apicd->domain, cpumask_of(0));
+		}
+
 		err = assign_irq_vector_policy(virq + i, node, apicd, info,
 					       irqd);
 		if (err)
@@ -404,36 +404,10 @@ int __init arch_probe_nr_irqs(void)
 	return legacy_pic->probe();
 }
 
-#ifdef	CONFIG_X86_IO_APIC
-static void __init init_legacy_irqs(void)
-{
-	int i, node = cpu_to_node(0);
-	struct apic_chip_data *apicd;
-
-	/*
-	 * For legacy IRQ's, start with assigning irq0 to irq15 to
-	 * ISA_IRQ_VECTOR(i) for all cpu's.
-	 */
-	for (i = 0; i < nr_legacy_irqs(); i++) {
-		apicd = legacy_irq_data[i] = alloc_apic_chip_data(node);
-		BUG_ON(!apicd);
-
-		apicd->cfg.vector = ISA_IRQ_VECTOR(i);
-		cpumask_copy(apicd->domain, cpumask_of(0));
-		apicd->cpu = 0;
-		irq_set_chip_data(i, apicd);
-	}
-}
-#else
-static inline void init_legacy_irqs(void) { }
-#endif
-
 int __init arch_early_irq_init(void)
 {
 	struct fwnode_handle *fn;
 
-	init_legacy_irqs();
-
 	fn = irq_domain_alloc_named_fwnode("VECTOR");
 	BUG_ON(!fn);
 	x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops,

From 258d86eef94fcaa72e088962259490866ad93489 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:35 +0200
Subject: [PATCH 33/60] x86/vector: Remove pointless pointer checks

The info pointer checks in assign_irq_vector_policy() are pointless because
the pointer cannot be NULL, otherwise the calling code would already crash.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213154.859484148@linutronix.de
---
 arch/x86/kernel/apic/vector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index d6feb9ca8f52..22cae8888e97 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -254,7 +254,7 @@ static int assign_irq_vector_policy(int irq, int node,
 				    struct irq_alloc_info *info,
 				    struct irq_data *irqd)
 {
-	if (info && info->mask)
+	if (info->mask)
 		return assign_irq_vector(irq, apicd, info->mask, irqd);
 	if (node != NUMA_NO_NODE &&
 	    assign_irq_vector(irq, apicd, cpumask_of_node(node), irqd) == 0)

From 99a1482d8aa105922dc4a3360ab11600f0bc9d80 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:36 +0200
Subject: [PATCH 34/60] x86/vector: Move helper functions around

Move the helper functions to a different place as they would end up in the
middle of management functions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213154.949581934@linutronix.de
---
 arch/x86/kernel/apic/vector.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 22cae8888e97..5d5c2c064a3e 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -53,6 +53,21 @@ void unlock_vector_lock(void)
 	raw_spin_unlock(&vector_lock);
 }
 
+void init_irq_alloc_info(struct irq_alloc_info *info,
+			 const struct cpumask *mask)
+{
+	memset(info, 0, sizeof(*info));
+	info->mask = mask;
+}
+
+void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
+{
+	if (src)
+		*dst = *src;
+	else
+		memset(dst, 0, sizeof(*dst));
+}
+
 static struct apic_chip_data *apic_chip_data(struct irq_data *irqd)
 {
 	if (!irqd)
@@ -282,21 +297,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *apicd)
 	hlist_del_init(&apicd->clist);
 }
 
-void init_irq_alloc_info(struct irq_alloc_info *info,
-			 const struct cpumask *mask)
-{
-	memset(info, 0, sizeof(*info));
-	info->mask = mask;
-}
-
-void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
-{
-	if (src)
-		*dst = *src;
-	else
-		memset(dst, 0, sizeof(*dst));
-}
-
 static void x86_vector_free_irqs(struct irq_domain *domain,
 				 unsigned int virq, unsigned int nr_irqs)
 {

From 9f9e3bb1cf2ecba7697bfb5e350ad2648e69dbdf Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:37 +0200
Subject: [PATCH 35/60] x86/apic: Add replacement for cpu_mask_to_apicid()

As preparation for replacing the vector allocator, provide a new function
which takes a cpu number instead of a cpu mask to calculate/lookup the
resulting APIC destination id.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
---
 arch/x86/include/asm/apic.h           |  5 +++++
 arch/x86/kernel/apic/apic_common.c    | 10 ++++++++++
 arch/x86/kernel/apic/apic_flat_64.c   |  2 ++
 arch/x86/kernel/apic/apic_noop.c      |  1 +
 arch/x86/kernel/apic/apic_numachip.c  |  2 ++
 arch/x86/kernel/apic/bigsmp_32.c      |  1 +
 arch/x86/kernel/apic/probe_32.c       |  1 +
 arch/x86/kernel/apic/x2apic_cluster.c |  6 ++++++
 arch/x86/kernel/apic/x2apic_phys.c    |  1 +
 arch/x86/kernel/apic/x2apic_uv_x.c    |  6 ++++++
 arch/x86/xen/apic.c                   |  1 +
 11 files changed, 36 insertions(+)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index ff0bddabaa04..01bcaa8b62b3 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -303,6 +303,7 @@ struct apic {
 	int	(*cpu_mask_to_apicid)(const struct cpumask *cpumask,
 				      struct irq_data *irqdata,
 				      unsigned int *apicid);
+	u32	(*calc_dest_apicid)(unsigned int cpu);
 
 	/* ICR related functions */
 	u64	(*icr_read)(void);
@@ -486,6 +487,10 @@ static inline unsigned int read_apic_id(void)
 extern int default_apic_id_valid(int apicid);
 extern int default_acpi_madt_oem_check(char *, char *);
 extern void default_setup_apic_routing(void);
+
+extern u32 apic_default_calc_apicid(unsigned int cpu);
+extern u32 apic_flat_calc_apicid(unsigned int cpu);
+
 extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask,
 				   struct irq_data *irqdata,
 				   unsigned int *apicid);
diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c
index 4791654cdeb2..ddc6a4301588 100644
--- a/arch/x86/kernel/apic/apic_common.c
+++ b/arch/x86/kernel/apic/apic_common.c
@@ -6,6 +6,11 @@
 #include <linux/irq.h>
 #include <asm/apic.h>
 
+u32 apic_default_calc_apicid(unsigned int cpu)
+{
+	return per_cpu(x86_cpu_to_apicid, cpu);
+}
+
 int default_cpu_mask_to_apicid(const struct cpumask *msk, struct irq_data *irqd,
 			       unsigned int *apicid)
 {
@@ -18,6 +23,11 @@ int default_cpu_mask_to_apicid(const struct cpumask *msk, struct irq_data *irqd,
 	return 0;
 }
 
+u32 apic_flat_calc_apicid(unsigned int cpu)
+{
+	return 1U << cpu;
+}
+
 int flat_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqd,
 			    unsigned int *apicid)
 
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 7ca354dee8af..697704443fda 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -172,6 +172,7 @@ static struct apic apic_flat __ro_after_init = {
 	.set_apic_id			= set_apic_id,
 
 	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_flat_calc_apicid,
 
 	.send_IPI			= default_send_IPI_single,
 	.send_IPI_mask			= flat_send_IPI_mask,
@@ -267,6 +268,7 @@ static struct apic apic_physflat __ro_after_init = {
 	.set_apic_id			= set_apic_id,
 
 	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= default_send_IPI_single_phys,
 	.send_IPI_mask			= default_send_IPI_mask_sequence_phys,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index a8a7cb1347dc..d8c24e6f1a11 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -142,6 +142,7 @@ struct apic apic_noop __ro_after_init = {
 	.set_apic_id			= NULL,
 
 	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_flat_calc_apicid,
 
 	.send_IPI			= noop_send_IPI,
 	.send_IPI_mask			= noop_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index cc2f8843391f..4ec293b30eb8 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -267,6 +267,7 @@ static const struct apic apic_numachip1 __refconst = {
 	.set_apic_id			= numachip1_set_apic_id,
 
 	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= numachip_send_IPI_one,
 	.send_IPI_mask			= numachip_send_IPI_mask,
@@ -317,6 +318,7 @@ static const struct apic apic_numachip2 __refconst = {
 	.set_apic_id			= numachip2_set_apic_id,
 
 	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= numachip_send_IPI_one,
 	.send_IPI_mask			= numachip_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 72a1a0385549..de2e8597f2df 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -172,6 +172,7 @@ static struct apic apic_bigsmp __ro_after_init = {
 	.set_apic_id			= NULL,
 
 	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= default_send_IPI_single_phys,
 	.send_IPI_mask			= default_send_IPI_mask_sequence_phys,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 95125bfb4e09..6a9020a3c243 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -127,6 +127,7 @@ static struct apic apic_default __ro_after_init = {
 	.set_apic_id			= NULL,
 
 	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_flat_calc_apicid,
 
 	.send_IPI			= default_send_IPI_single,
 	.send_IPI_mask			= default_send_IPI_mask_logical,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index c1684f27226e..17bf63f580d7 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -114,6 +114,11 @@ x2apic_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
 	return 0;
 }
 
+static u32 x2apic_calc_apicid(unsigned int cpu)
+{
+	return per_cpu(x86_cpu_to_logical_apicid, cpu);
+}
+
 static void init_x2apic_ldr(void)
 {
 	struct cluster_mask *cmsk = this_cpu_read(cluster_masks);
@@ -245,6 +250,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
 	.set_apic_id			= x2apic_set_apic_id,
 
 	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid,
+	.calc_dest_apicid		= x2apic_calc_apicid,
 
 	.send_IPI			= x2apic_send_IPI,
 	.send_IPI_mask			= x2apic_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 6903e69a7b60..ebad7ddbfdfc 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -165,6 +165,7 @@ static struct apic apic_x2apic_phys __ro_after_init = {
 	.set_apic_id			= x2apic_set_apic_id,
 
 	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= x2apic_send_IPI,
 	.send_IPI_mask			= x2apic_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 9f6d551deeb4..99c3c039646d 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -537,6 +537,11 @@ uv_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
 	return ret;
 }
 
+static u32 apic_uv_calc_apicid(unsigned int cpu)
+{
+	return apic_default_calc_apicid(cpu) | uv_apicid_hibits;
+}
+
 static unsigned int x2apic_get_apic_id(unsigned long x)
 {
 	unsigned int id;
@@ -602,6 +607,7 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
 	.set_apic_id			= set_apic_id,
 
 	.cpu_mask_to_apicid		= uv_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_uv_calc_apicid,
 
 	.send_IPI			= uv_send_IPI_one,
 	.send_IPI_mask			= uv_send_IPI_mask,
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 58776bcf4251..fb8522bed08c 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -178,6 +178,7 @@ static struct apic xen_pv_apic = {
 	.set_apic_id 			= xen_set_apic_id, /* Can be NULL on 32-bit. */
 
 	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_flat_calc_apicid,
 
 #ifdef CONFIG_SMP
 	.send_IPI_mask 			= xen_send_IPI_mask,

From 0fa115da408f645cca419a60a5af8f4426ad4188 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:38 +0200
Subject: [PATCH 36/60] x86/irq/vector: Initialize matrix allocator

Initialize the matrix allocator and add the proper accounting points to the
code.

No functional change, just preparation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213155.108410660@linutronix.de
---
 arch/x86/Kconfig              |  1 +
 arch/x86/include/asm/apic.h   |  6 ++++
 arch/x86/include/asm/hw_irq.h |  3 +-
 arch/x86/kernel/apic/vector.c | 56 ++++++++++++++++++++++++++++++++---
 arch/x86/kernel/i8259.c       |  1 +
 arch/x86/kernel/irqinit.c     |  1 +
 arch/x86/kernel/smpboot.c     |  3 +-
 7 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 971feac13506..64e99d3c5169 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -92,6 +92,7 @@ config X86
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_IOMAP
 	select GENERIC_IRQ_EFFECTIVE_AFF_MASK	if SMP
+	select GENERIC_IRQ_MATRIX_ALLOCATOR	if X86_LOCAL_APIC
 	select GENERIC_IRQ_MIGRATION		if SMP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 01bcaa8b62b3..7a8651921ed5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -169,6 +169,10 @@ static inline int apic_is_clustered_box(void)
 #endif
 
 extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
+extern void lapic_assign_system_vectors(void);
+extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
+extern void lapic_online(void);
+extern void lapic_offline(void);
 
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
@@ -179,6 +183,8 @@ static inline void disable_local_APIC(void) { }
 # define setup_secondary_APIC_clock x86_init_noop
 static inline void lapic_update_tsc_freq(void) { }
 static inline void apic_intr_mode_init(void) { }
+static inline void lapic_assign_system_vectors(void) { }
+static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 6dfe366a8804..386368890376 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -15,6 +15,8 @@
 
 #include <asm/irq_vectors.h>
 
+#define IRQ_MATRIX_BITS		NR_VECTORS
+
 #ifndef __ASSEMBLY__
 
 #include <linux/percpu.h>
@@ -130,7 +132,6 @@ extern struct irq_cfg *irq_cfg(unsigned int irq);
 extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
 extern void lock_vector_lock(void);
 extern void unlock_vector_lock(void);
-extern void setup_vector_irq(int cpu);
 #ifdef CONFIG_SMP
 extern void send_cleanup_vector(struct irq_cfg *);
 extern void irq_complete_move(struct irq_cfg *cfg);
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 5d5c2c064a3e..078fbd08499c 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -36,6 +36,7 @@ EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
 static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
 static struct irq_chip lapic_controller;
+static struct irq_matrix *vector_matrix;
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct hlist_head, cleanup_list);
 #endif
@@ -404,6 +405,36 @@ int __init arch_probe_nr_irqs(void)
 	return legacy_pic->probe();
 }
 
+void lapic_assign_legacy_vector(unsigned int irq, bool replace)
+{
+	/*
+	 * Use assign system here so it wont get accounted as allocated
+	 * and moveable in the cpu hotplug check and it prevents managed
+	 * irq reservation from touching it.
+	 */
+	irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
+}
+
+void __init lapic_assign_system_vectors(void)
+{
+	unsigned int i, vector = 0;
+
+	for_each_set_bit_from(vector, system_vectors, NR_VECTORS)
+		irq_matrix_assign_system(vector_matrix, vector, false);
+
+	if (nr_legacy_irqs() > 1)
+		lapic_assign_legacy_vector(PIC_CASCADE_IR, false);
+
+	/* System vectors are reserved, online it */
+	irq_matrix_online(vector_matrix);
+
+	/* Mark the preallocated legacy interrupts */
+	for (i = 0; i < nr_legacy_irqs(); i++) {
+		if (i != PIC_CASCADE_IR)
+			irq_matrix_assign(vector_matrix, ISA_IRQ_VECTOR(i));
+	}
+}
+
 int __init arch_early_irq_init(void)
 {
 	struct fwnode_handle *fn;
@@ -423,6 +454,14 @@ int __init arch_early_irq_init(void)
 	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
 	BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
 
+	/*
+	 * Allocate the vector matrix allocator data structure and limit the
+	 * search area.
+	 */
+	vector_matrix = irq_alloc_matrix(NR_VECTORS, FIRST_EXTERNAL_VECTOR,
+					 FIRST_SYSTEM_VECTOR);
+	BUG_ON(!vector_matrix);
+
 	return arch_early_ioapic_init();
 }
 
@@ -454,14 +493,16 @@ static struct irq_desc *__setup_vector_irq(int vector)
 	return irq_to_desc(isairq);
 }
 
-/*
- * Setup the vector to irq mappings. Must be called with vector_lock held.
- */
-void setup_vector_irq(int cpu)
+/* Online the local APIC infrastructure and initialize the vectors */
+void lapic_online(void)
 {
 	unsigned int vector;
 
 	lockdep_assert_held(&vector_lock);
+
+	/* Online the vector matrix array for this CPU */
+	irq_matrix_online(vector_matrix);
+
 	/*
 	 * The interrupt affinity logic never targets interrupts to offline
 	 * CPUs. The exception are the legacy PIC interrupts. In general
@@ -482,6 +523,13 @@ void setup_vector_irq(int cpu)
 	vector_update_shutdown_irqs();
 }
 
+void lapic_offline(void)
+{
+	lock_vector_lock();
+	irq_matrix_offline(vector_matrix);
+	unlock_vector_lock();
+}
+
 static int apic_retrigger_irq(struct irq_data *irqd)
 {
 	struct apic_chip_data *apicd = apic_chip_data(irqd);
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 4e3b8a587c88..317c5b38a318 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -113,6 +113,7 @@ static void make_8259A_irq(unsigned int irq)
 	io_apic_irqs &= ~(1<<irq);
 	irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
 	enable_irq(irq);
+	lapic_assign_legacy_vector(irq, true);
 }
 
 /*
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index beafcf584e44..c7fb57505f2d 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -90,6 +90,7 @@ void __init native_init_IRQ(void)
 	x86_init.irqs.pre_vector_init();
 
 	idt_setup_apic_and_irq_gates();
+	lapic_assign_system_vectors();
 
 	if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
 		setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index d8cef3222887..91c0d1cd651e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -260,7 +260,7 @@ static void notrace start_secondary(void *unused)
 	 * from seeing a half valid vector space.
 	 */
 	lock_vector_lock();
-	setup_vector_irq(smp_processor_id());
+	lapic_online();
 	set_cpu_online(smp_processor_id(), true);
 	unlock_vector_lock();
 	cpu_set_state_online(smp_processor_id());
@@ -1518,6 +1518,7 @@ void cpu_disable_common(void)
 	remove_cpu_from_maps(cpu);
 	unlock_vector_lock();
 	fixup_irqs();
+	lapic_offline();
 }
 
 int native_cpu_disable(void)

From 65d7ed57bd9708d562a37fa3f99bf9fd62052b9e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:39 +0200
Subject: [PATCH 37/60] x86/vector: Add vector domain debugfs support

Add the debug callback for the vector domain, which gives a detailed
information about vector usage if invoked for the domain by using rhe
matrix allocator debug function and vector/target information when invoked
for a particular interrupt.

Extra information foir the Vector domain:

Online bitmaps:       32
Global available:   6352
Global reserved:       5
Total allocated:      20
System: 41: 0-19,32,50,128,238-255
 | CPU | avl | man | act | vectors
     0   183     4    19  33-48,51-53
     1   199     4     1  33
     2   199     4     0

Extra information for interrupts:

     Vector:    42
     Target:     4

This allows a detailed analysis of the vector usage and the association to
interrupts and devices.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213155.188137174@linutronix.de
---
 arch/x86/kernel/apic/vector.c | 50 +++++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 078fbd08499c..acdc74df649d 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -11,6 +11,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/interrupt.h>
+#include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
 #include <linux/slab.h>
@@ -373,9 +374,54 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 	return err;
 }
 
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
+			   struct irq_data *irqd, int ind)
+{
+	unsigned int cpu, vec, prev_cpu, prev_vec;
+	struct apic_chip_data *apicd;
+	unsigned long flags;
+	int irq;
+
+	if (!irqd) {
+		irq_matrix_debug_show(m, vector_matrix, ind);
+		return;
+	}
+
+	irq = irqd->irq;
+	if (irq < nr_legacy_irqs() && !test_bit(irq, &io_apic_irqs)) {
+		seq_printf(m, "%*sVector: %5d\n", ind, "", ISA_IRQ_VECTOR(irq));
+		seq_printf(m, "%*sTarget: Legacy PIC all CPUs\n", ind, "");
+		return;
+	}
+
+	apicd = irqd->chip_data;
+	if (!apicd) {
+		seq_printf(m, "%*sVector: Not assigned\n", ind, "");
+		return;
+	}
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	cpu = apicd->cpu;
+	vec = apicd->cfg.vector;
+	prev_cpu = apicd->prev_cpu;
+	prev_vec = apicd->cfg.old_vector;
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	seq_printf(m, "%*sVector: %5u\n", ind, "", vec);
+	seq_printf(m, "%*sTarget: %5u\n", ind, "", cpu);
+	if (prev_vec) {
+		seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", prev_vec);
+		seq_printf(m, "%*sPrevious target: %5u\n", ind, "", prev_cpu);
+	}
+}
+#endif
+
 static const struct irq_domain_ops x86_vector_domain_ops = {
-	.alloc	= x86_vector_alloc_irqs,
-	.free	= x86_vector_free_irqs,
+	.alloc		= x86_vector_alloc_irqs,
+	.free		= x86_vector_free_irqs,
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+	.debug_show	= x86_vector_debug_show,
+#endif
 };
 
 int __init arch_probe_nr_irqs(void)

From 8ed4f3e66665cd186bc6b1d35f25a481e35c62ab Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:40 +0200
Subject: [PATCH 38/60] x86/smpboot: Set online before setting up vectors

There is no reason to set the CPU online after establishing the vectors on
the upcoming CPU. The vector space is protected by the vector lock so no
changes can happen.

Marking the CPU online before setting up the vector space makes tracing
work in the early vector management cpu online code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213155.264311994@linutronix.de
---
 arch/x86/kernel/smpboot.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 91c0d1cd651e..86739f04701b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -254,14 +254,14 @@ static void notrace start_secondary(void *unused)
 	check_tsc_sync_target();
 
 	/*
-	 * Lock vector_lock and initialize the vectors on this cpu
-	 * before setting the cpu online. We must set it online with
-	 * vector_lock held to prevent a concurrent setup/teardown
-	 * from seeing a half valid vector space.
+	 * Lock vector_lock, set CPU online and bring the vector
+	 * allocator online. Online must be set with vector_lock held
+	 * to prevent a concurrent irq setup/teardown from seeing a
+	 * half valid vector space.
 	 */
 	lock_vector_lock();
-	lapic_online();
 	set_cpu_online(smp_processor_id(), true);
+	lapic_online();
 	unlock_vector_lock();
 	cpu_set_state_online(smp_processor_id());
 	x86_platform.nmi_init();

From 8d1e3dca7de6e8513872799a748a1d47d8dce60d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:41 +0200
Subject: [PATCH 39/60] x86/vector: Add tracepoints for vector management

Add tracepoints for analysing the new vector management

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213155.357986795@linutronix.de
---
 arch/x86/include/asm/trace/irq_vectors.h | 244 +++++++++++++++++++++++
 arch/x86/kernel/apic/vector.c            |   2 +
 2 files changed, 246 insertions(+)

diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 1599d394c8c1..bc09c5cf6390 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -137,6 +137,250 @@ DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic);
 DEFINE_IRQ_VECTOR_EVENT(thermal_apic);
 #endif
 
+TRACE_EVENT(vector_config,
+
+	TP_PROTO(unsigned int irq, unsigned int vector,
+		 unsigned int cpu, unsigned int apicdest),
+
+	TP_ARGS(irq, vector, cpu, apicdest),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	irq		)
+		__field(	unsigned int,	vector		)
+		__field(	unsigned int,	cpu		)
+		__field(	unsigned int,	apicdest	)
+	),
+
+	TP_fast_assign(
+		__entry->irq		= irq;
+		__entry->vector		= vector;
+		__entry->cpu		= cpu;
+		__entry->apicdest	= apicdest;
+	),
+
+	TP_printk("irq=%u vector=%u cpu=%u apicdest=0x%08x",
+		  __entry->irq, __entry->vector, __entry->cpu,
+		  __entry->apicdest)
+);
+
+DECLARE_EVENT_CLASS(vector_mod,
+
+	TP_PROTO(unsigned int irq, unsigned int vector,
+		 unsigned int cpu, unsigned int prev_vector,
+		 unsigned int prev_cpu),
+
+	TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	irq		)
+		__field(	unsigned int,	vector		)
+		__field(	unsigned int,	cpu		)
+		__field(	unsigned int,	prev_vector	)
+		__field(	unsigned int,	prev_cpu	)
+	),
+
+	TP_fast_assign(
+		__entry->irq		= irq;
+		__entry->vector		= vector;
+		__entry->cpu		= cpu;
+		__entry->prev_vector	= prev_vector;
+		__entry->prev_cpu	= prev_cpu;
+
+	),
+
+	TP_printk("irq=%u vector=%u cpu=%u prev_vector=%u prev_cpu=%u",
+		  __entry->irq, __entry->vector, __entry->cpu,
+		  __entry->prev_vector, __entry->prev_cpu)
+);
+
+#define DEFINE_IRQ_VECTOR_MOD_EVENT(name)				\
+DEFINE_EVENT_FN(vector_mod, name,					\
+	TP_PROTO(unsigned int irq, unsigned int vector,			\
+		 unsigned int cpu, unsigned int prev_vector,		\
+		 unsigned int prev_cpu),				\
+	TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu), NULL, NULL);	\
+
+DEFINE_IRQ_VECTOR_MOD_EVENT(vector_update);
+DEFINE_IRQ_VECTOR_MOD_EVENT(vector_clear);
+
+DECLARE_EVENT_CLASS(vector_reserve,
+
+	TP_PROTO(unsigned int irq, int ret),
+
+	TP_ARGS(irq, ret),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	irq	)
+		__field(	int,		ret	)
+	),
+
+	TP_fast_assign(
+		__entry->irq = irq;
+		__entry->ret = ret;
+	),
+
+	TP_printk("irq=%u ret=%d", __entry->irq, __entry->ret)
+);
+
+#define DEFINE_IRQ_VECTOR_RESERVE_EVENT(name)	\
+DEFINE_EVENT_FN(vector_reserve, name,	\
+	TP_PROTO(unsigned int irq, int ret),	\
+	TP_ARGS(irq, ret), NULL, NULL);		\
+
+DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve_managed);
+DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve);
+
+TRACE_EVENT(vector_alloc,
+
+	TP_PROTO(unsigned int irq, unsigned int vector, bool reserved,
+		 int ret),
+
+	TP_ARGS(irq, vector, ret, reserved),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	irq		)
+		__field(	unsigned int,	vector		)
+		__field(	bool,		reserved	)
+		__field(	int,		ret		)
+	),
+
+	TP_fast_assign(
+		__entry->irq		= irq;
+		__entry->vector		= ret < 0 ? 0 : vector;
+		__entry->reserved	= reserved;
+		__entry->ret		= ret > 0 ? 0 : ret;
+	),
+
+	TP_printk("irq=%u vector=%u reserved=%d ret=%d",
+		  __entry->irq, __entry->vector,
+		  __entry->reserved, __entry->ret)
+);
+
+TRACE_EVENT(vector_alloc_managed,
+
+	TP_PROTO(unsigned int irq, unsigned int vector,
+		 int ret),
+
+	TP_ARGS(irq, vector, ret),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	irq		)
+		__field(	unsigned int,	vector		)
+		__field(	int,		ret		)
+	),
+
+	TP_fast_assign(
+		__entry->irq		= irq;
+		__entry->vector		= ret < 0 ? 0 : vector;
+		__entry->ret		= ret > 0 ? 0 : ret;
+	),
+
+	TP_printk("irq=%u vector=%u ret=%d",
+		  __entry->irq, __entry->vector, __entry->ret)
+);
+
+DECLARE_EVENT_CLASS(vector_activate,
+
+	TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve,
+		 bool early),
+
+	TP_ARGS(irq, is_managed, can_reserve, early),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	irq		)
+		__field(	bool,		is_managed	)
+		__field(	bool,		can_reserve	)
+		__field(	bool,		early		)
+	),
+
+	TP_fast_assign(
+		__entry->irq		= irq;
+		__entry->is_managed	= is_managed;
+		__entry->can_reserve	= can_reserve;
+		__entry->early		= early;
+	),
+
+	TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d",
+		  __entry->irq, __entry->is_managed, __entry->can_reserve,
+		  __entry->early)
+);
+
+#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name)				\
+DEFINE_EVENT_FN(vector_activate, name,					\
+	TP_PROTO(unsigned int irq, bool is_managed,			\
+		 bool can_reserve, bool early),				\
+	TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL);	\
+
+DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate);
+DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate);
+
+TRACE_EVENT(vector_teardown,
+
+	TP_PROTO(unsigned int irq, bool is_managed, bool has_reserved),
+
+	TP_ARGS(irq, is_managed, has_reserved),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	irq		)
+		__field(	bool,		is_managed	)
+		__field(	bool,		has_reserved	)
+	),
+
+	TP_fast_assign(
+		__entry->irq		= irq;
+		__entry->is_managed	= is_managed;
+		__entry->has_reserved	= has_reserved;
+	),
+
+	TP_printk("irq=%u is_managed=%d has_reserved=%d",
+		  __entry->irq, __entry->is_managed, __entry->has_reserved)
+);
+
+TRACE_EVENT(vector_setup,
+
+	TP_PROTO(unsigned int irq, bool is_legacy, int ret),
+
+	TP_ARGS(irq, is_legacy, ret),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	irq		)
+		__field(	bool,		is_legacy	)
+		__field(	int,		ret		)
+	),
+
+	TP_fast_assign(
+		__entry->irq		= irq;
+		__entry->is_legacy	= is_legacy;
+		__entry->ret		= ret;
+	),
+
+	TP_printk("irq=%u is_legacy=%d ret=%d",
+		  __entry->irq, __entry->is_legacy, __entry->ret)
+);
+
+TRACE_EVENT(vector_free_moved,
+
+	TP_PROTO(unsigned int irq, unsigned int vector, bool is_managed),
+
+	TP_ARGS(irq, vector, is_managed),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	irq		)
+		__field(	unsigned int,	vector		)
+		__field(	bool,		is_managed	)
+	),
+
+	TP_fast_assign(
+		__entry->irq		= irq;
+		__entry->vector		= vector;
+		__entry->is_managed	= is_managed;
+	),
+
+	TP_printk("irq=%u vector=%u is_managed=%d",
+		  __entry->irq, __entry->vector, __entry->is_managed)
+);
+
+
 #endif /* CONFIG_X86_LOCAL_APIC */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index acdc74df649d..a2761740d345 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -22,6 +22,8 @@
 #include <asm/desc.h>
 #include <asm/irq_remapping.h>
 
+#include <asm/trace/irq_vectors.h>
+
 struct apic_chip_data {
 	struct irq_cfg		cfg;
 	unsigned int		cpu;

From 69cde0004a4b5cfc7d1cec4ef9ce4cf4e26142f0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:42 +0200
Subject: [PATCH 40/60] x86/vector: Use matrix allocator for vector assignment

Replace the magic vector allocation code by a simple bitmap matrix
allocator. This avoids loops and hoops over CPUs and vector arrays, so in
case of densly used vector spaces it's way faster.

This also gets rid of the magic 'spread the vectors accross priority
levels' heuristics in the current allocator:

The comment in __asign_irq_vector says:

   * NOTE! The local APIC isn't very good at handling
   * multiple interrupts at the same interrupt level.
   * As the interrupt level is determined by taking the
   * vector number and shifting that right by 4, we
   * want to spread these out a bit so that they don't
   * all fall in the same interrupt level.

After doing some palaeontological research the following was found the
following in the PPro Developer Manual Volume 3:

     "7.4.2. Valid Interrupts

     The local and I/O APICs support 240 distinct vectors in the range of 16
     to 255. Interrupt priority is implied by its vector, according to the
     following relationship: priority = vector / 16

     One is the lowest priority and 15 is the highest. Vectors 16 through
     31 are reserved for exclusive use by the processor. The remaining
     vectors are for general use. The processor's local APIC includes an
     in-service entry and a holding entry for each priority level. To avoid
     losing inter- rupts, software should allocate no more than 2 interrupt
     vectors per priority."

The current SDM tells nothing about that, instead it states:

     "If more than one interrupt is generated with the same vector number,
      the local APIC can set the bit for the vector both in the IRR and the
      ISR. This means that for the Pentium 4 and Intel Xeon processors, the
      IRR and ISR can queue two interrupts for each interrupt vector: one
      in the IRR and one in the ISR. Any additional interrupts issued for
      the same interrupt vector are collapsed into the single bit in the
      IRR.

      For the P6 family and Pentium processors, the IRR and ISR registers
      can queue no more than two interrupts per interrupt vector and will
      reject other interrupts that are received within the same vector."

   Which means, that on P6/Pentium the APIC will reject a new message and
   tell the sender to retry, which increases the load on the APIC bus and
   nothing more.

There is no affirmative answer from Intel on that, but it's a sane approach
to remove that for the following reasons:

    1) No other (relevant Open Source) operating systems bothers to
       implement this or mentiones this at all.

    2) The current allocator has no enforcement for this and especially the
       legacy interrupts, which are the main source of interrupts on these
       P6 and older systmes, are allocated linearly in the same priority
       level and just work.

    3) The current machines have no problem with that at all as verified
       with some experiments.

    4) AMD at least confirmed that such an issue is unknown.

    5) P6 and older are dinosaurs almost 20 years EOL, so there is really
       no reason to worry about that too much.


Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213155.443678104@linutronix.de
---
 arch/x86/kernel/apic/vector.c | 290 ++++++++++++++--------------------
 1 file changed, 117 insertions(+), 173 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index a2761740d345..88219b80d9ec 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -28,16 +28,15 @@ struct apic_chip_data {
 	struct irq_cfg		cfg;
 	unsigned int		cpu;
 	unsigned int		prev_cpu;
+	unsigned int		irq;
 	struct hlist_node	clist;
-	cpumask_var_t		domain;
-	cpumask_var_t		old_domain;
 	u8			move_in_progress : 1;
 };
 
 struct irq_domain *x86_vector_domain;
 EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
+static cpumask_var_t vector_searchmask;
 static struct irq_chip lapic_controller;
 static struct irq_matrix *vector_matrix;
 #ifdef CONFIG_SMP
@@ -101,194 +100,124 @@ static struct apic_chip_data *alloc_apic_chip_data(int node)
 	struct apic_chip_data *apicd;
 
 	apicd = kzalloc_node(sizeof(*apicd), GFP_KERNEL, node);
-	if (!apicd)
-		return NULL;
-	if (!zalloc_cpumask_var_node(&apicd->domain, GFP_KERNEL, node))
-		goto out_data;
-	if (!zalloc_cpumask_var_node(&apicd->old_domain, GFP_KERNEL, node))
-		goto out_domain;
-	INIT_HLIST_NODE(&apicd->clist);
+	if (apicd)
+		INIT_HLIST_NODE(&apicd->clist);
 	return apicd;
-out_domain:
-	free_cpumask_var(apicd->domain);
-out_data:
-	kfree(apicd);
-	return NULL;
 }
 
 static void free_apic_chip_data(struct apic_chip_data *apicd)
 {
-	if (apicd) {
-		free_cpumask_var(apicd->domain);
-		free_cpumask_var(apicd->old_domain);
-		kfree(apicd);
-	}
+	kfree(apicd);
 }
 
-static int __assign_irq_vector(int irq, struct apic_chip_data *d,
-			       const struct cpumask *mask,
-			       struct irq_data *irqd)
+static void apic_update_irq_cfg(struct irq_data *irqd)
 {
-	/*
-	 * NOTE! The local APIC isn't very good at handling
-	 * multiple interrupts at the same interrupt level.
-	 * As the interrupt level is determined by taking the
-	 * vector number and shifting that right by 4, we
-	 * want to spread these out a bit so that they don't
-	 * all fall in the same interrupt level.
-	 *
-	 * Also, we've got to be careful not to trash gate
-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
-	 */
-	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
-	static int current_offset = VECTOR_OFFSET_START % 16;
-	int cpu, vector;
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
 
-	/*
-	 * If there is still a move in progress or the previous move has not
-	 * been cleaned up completely, tell the caller to come back later.
-	 */
-	if (d->cfg.old_vector)
-		return -EBUSY;
+	lockdep_assert_held(&vector_lock);
 
-	/* Only try and allocate irqs on cpus that are present */
-	cpumask_clear(d->old_domain);
-	cpumask_clear(searched_cpumask);
-	cpu = cpumask_first_and(mask, cpu_online_mask);
-	while (cpu < nr_cpu_ids) {
-		int new_cpu, offset;
+	apicd->cfg.dest_apicid = apic->calc_dest_apicid(apicd->cpu);
+	irq_data_update_effective_affinity(irqd, cpumask_of(apicd->cpu));
+	trace_vector_config(irqd->irq, apicd->cfg.vector, apicd->cpu,
+			    apicd->cfg.dest_apicid);
+}
 
-		cpumask_copy(vector_cpumask, cpumask_of(cpu));
+static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
+			       unsigned int newcpu)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	struct irq_desc *desc = irq_data_to_desc(irqd);
 
-		/*
-		 * Clear the offline cpus from @vector_cpumask for searching
-		 * and verify whether the result overlaps with @mask. If true,
-		 * then the call to apic->cpu_mask_to_apicid() will
-		 * succeed as well. If not, no point in trying to find a
-		 * vector in this mask.
-		 */
-		cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
-		if (!cpumask_intersects(vector_searchmask, mask))
-			goto next_cpu;
+	lockdep_assert_held(&vector_lock);
 
-		if (cpumask_subset(vector_cpumask, d->domain)) {
-			if (cpumask_equal(vector_cpumask, d->domain))
-				goto success;
-			/*
-			 * Mark the cpus which are not longer in the mask for
-			 * cleanup.
-			 */
-			cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
-			vector = d->cfg.vector;
-			goto update;
-		}
+	trace_vector_update(irqd->irq, newvec, newcpu, apicd->cfg.vector,
+			    apicd->cpu);
 
-		vector = current_vector;
-		offset = current_offset;
-next:
-		vector += 16;
-		if (vector >= FIRST_SYSTEM_VECTOR) {
-			offset = (offset + 1) % 16;
-			vector = FIRST_EXTERNAL_VECTOR + offset;
-		}
-
-		/* If the search wrapped around, try the next cpu */
-		if (unlikely(current_vector == vector))
-			goto next_cpu;
-
-		if (test_bit(vector, system_vectors))
-			goto next;
-
-		for_each_cpu(new_cpu, vector_searchmask) {
-			if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
-				goto next;
-		}
-		/* Found one! */
-		current_vector = vector;
-		current_offset = offset;
-		/* Schedule the old vector for cleanup on all cpus */
-		if (d->cfg.vector)
-			cpumask_copy(d->old_domain, d->domain);
-		for_each_cpu(new_cpu, vector_searchmask)
-			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
-		goto update;
-
-next_cpu:
-		/*
-		 * We exclude the current @vector_cpumask from the requested
-		 * @mask and try again with the next online cpu in the
-		 * result. We cannot modify @mask, so we use @vector_cpumask
-		 * as a temporary buffer here as it will be reassigned when
-		 * calling apic->vector_allocation_domain() above.
-		 */
-		cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
-		cpumask_andnot(vector_cpumask, mask, searched_cpumask);
-		cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
-		continue;
+	/* Setup the vector move, if required  */
+	if (apicd->cfg.vector && cpu_online(apicd->cpu)) {
+		apicd->move_in_progress = true;
+		apicd->cfg.old_vector = apicd->cfg.vector;
+		apicd->prev_cpu = apicd->cpu;
+	} else {
+		apicd->cfg.old_vector = 0;
 	}
-	return -ENOSPC;
 
-update:
+	apicd->cfg.vector = newvec;
+	apicd->cpu = newcpu;
+	BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
+	per_cpu(vector_irq, newcpu)[newvec] = desc;
+}
+
+static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	int vector = apicd->cfg.vector;
+	unsigned int cpu = apicd->cpu;
+
 	/*
-	 * Exclude offline cpus from the cleanup mask and set the
-	 * move_in_progress flag when the result is not empty.
+	 * If the current target CPU is online and in the new requested
+	 * affinity mask, there is no point in moving the interrupt from
+	 * one CPU to another.
 	 */
-	cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
-	d->move_in_progress = !cpumask_empty(d->old_domain);
-	d->cfg.old_vector = d->move_in_progress ? d->cfg.vector : 0;
-	d->prev_cpu = d->cpu;
-	d->cfg.vector = vector;
-	cpumask_copy(d->domain, vector_cpumask);
-success:
-	/*
-	 * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
-	 * as we already established, that mask & d->domain & cpu_online_mask
-	 * is not empty.
-	 *
-	 * vector_searchmask is a subset of d->domain and has the offline
-	 * cpus masked out.
-	 */
-	cpumask_and(vector_searchmask, vector_searchmask, mask);
-	BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, irqd,
-					&d->cfg.dest_apicid));
-	d->cpu = cpumask_first(vector_searchmask);
+	if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest))
+		return 0;
+
+	vector = irq_matrix_alloc(vector_matrix, dest, false, &cpu);
+	if (vector > 0)
+		apic_update_vector(irqd, vector, cpu);
+	trace_vector_alloc(irqd->irq, vector, false, vector);
+	return vector;
+}
+
+static int assign_vector_locked(struct irq_data *irqd,
+				const struct cpumask *dest)
+{
+	int vector = allocate_vector(irqd, dest);
+
+	if (vector < 0)
+		return vector;
+
+	apic_update_irq_cfg(irqd);
 	return 0;
 }
 
-static int assign_irq_vector(int irq, struct apic_chip_data *apicd,
-			     const struct cpumask *mask,
-			     struct irq_data *irqd)
+static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest)
 {
-	int err;
 	unsigned long flags;
+	int ret;
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
-	err = __assign_irq_vector(irq, apicd, mask, irqd);
+	cpumask_and(vector_searchmask, dest, cpu_online_mask);
+	ret = assign_vector_locked(irqd, vector_searchmask);
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
-	return err;
+	return ret;
 }
 
-static int assign_irq_vector_policy(int irq, int node,
-				    struct apic_chip_data *apicd,
-				    struct irq_alloc_info *info,
-				    struct irq_data *irqd)
+static int assign_irq_vector_policy(struct irq_data *irqd,
+				    struct irq_alloc_info *info, int node)
 {
 	if (info->mask)
-		return assign_irq_vector(irq, apicd, info->mask, irqd);
+		return assign_irq_vector(irqd, info->mask);
 	if (node != NUMA_NO_NODE &&
-	    assign_irq_vector(irq, apicd, cpumask_of_node(node), irqd) == 0)
+	    !assign_irq_vector(irqd, cpumask_of_node(node)))
 		return 0;
-	return assign_irq_vector(irq, apicd, cpu_online_mask, irqd);
+	return assign_irq_vector(irqd, cpu_online_mask);
 }
 
-static void clear_irq_vector(int irq, struct apic_chip_data *apicd)
+static void clear_irq_vector(struct irq_data *irqd)
 {
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
 	unsigned int vector = apicd->cfg.vector;
 
+	lockdep_assert_held(&vector_lock);
 	if (!vector)
 		return;
 
+	trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->cfg.old_vector,
+			   apicd->prev_cpu);
+
 	per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
+	irq_matrix_free(vector_matrix, apicd->cpu, vector, false);
 	apicd->cfg.vector = 0;
 
 	/* Clean up move in progress */
@@ -297,6 +226,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *apicd)
 		return;
 
 	per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
+	irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, false);
+	apicd->cfg.old_vector = 0;
 	apicd->move_in_progress = 0;
 	hlist_del_init(&apicd->clist);
 }
@@ -313,7 +244,7 @@ static void x86_vector_free_irqs(struct irq_domain *domain,
 		irqd = irq_domain_get_irq_data(x86_vector_domain, virq + i);
 		if (irqd && irqd->chip_data) {
 			raw_spin_lock_irqsave(&vector_lock, flags);
-			clear_irq_vector(virq + i, irqd->chip_data);
+			clear_irq_vector(irqd);
 			apicd = irqd->chip_data;
 			irq_domain_reset_irq_data(irqd);
 			raw_spin_unlock_irqrestore(&vector_lock, flags);
@@ -328,6 +259,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 	struct irq_alloc_info *info = arg;
 	struct apic_chip_data *apicd;
 	struct irq_data *irqd;
+	unsigned long flags;
 	int i, err, node;
 
 	if (disable_apic)
@@ -348,23 +280,30 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 			goto error;
 		}
 
+		apicd->irq = virq + i;
 		irqd->chip = &lapic_controller;
 		irqd->chip_data = apicd;
 		irqd->hwirq = virq + i;
 		irqd_set_single_target(irqd);
 		/*
-		 * Make sure, that the legacy to IOAPIC transition stays on
-		 * the same vector. This is required for check_timer() to
-		 * work correctly as it might switch back to legacy mode.
+		 * Legacy vectors are already assigned when the IOAPIC
+		 * takes them over. They stay on the same vector. This is
+		 * required for check_timer() to work correctly as it might
+		 * switch back to legacy mode. Only update the hardware
+		 * config.
 		 */
 		if (info->flags & X86_IRQ_ALLOC_LEGACY) {
 			apicd->cfg.vector = ISA_IRQ_VECTOR(virq + i);
 			apicd->cpu = 0;
-			cpumask_copy(apicd->domain, cpumask_of(0));
+			trace_vector_setup(virq + i, true, 0);
+			raw_spin_lock_irqsave(&vector_lock, flags);
+			apic_update_irq_cfg(irqd);
+			raw_spin_unlock_irqrestore(&vector_lock, flags);
+			continue;
 		}
 
-		err = assign_irq_vector_policy(virq + i, node, apicd, info,
-					       irqd);
+		err = assign_irq_vector_policy(irqd, info, node);
+		trace_vector_setup(virq + i, false, err);
 		if (err)
 			goto error;
 	}
@@ -498,9 +437,7 @@ int __init arch_early_irq_init(void)
 	arch_init_msi_domain(x86_vector_domain);
 	arch_init_htirq_domain(x86_vector_domain);
 
-	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
 	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
-	BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
 
 	/*
 	 * Allocate the vector matrix allocator data structure and limit the
@@ -523,8 +460,10 @@ static void vector_update_shutdown_irqs(void)
 		struct irq_data *irqd = irq_desc_get_irq_data(desc);
 		struct apic_chip_data *ad = apic_chip_data(irqd);
 
-		if (ad && ad->cfg.vector && ad->cpu == smp_processor_id())
-			this_cpu_write(vector_irq[ad->cfg.vector], desc);
+		if (!ad || !ad->cfg.vector || ad->cpu != smp_processor_id())
+			continue;
+		this_cpu_write(vector_irq[ad->cfg.vector], desc);
+		irq_matrix_assign(vector_matrix, ad->cfg.vector);
 	}
 }
 
@@ -600,8 +539,7 @@ void apic_ack_edge(struct irq_data *irqd)
 static int apic_set_affinity(struct irq_data *irqd,
 			     const struct cpumask *dest, bool force)
 {
-	struct apic_chip_data *apicd = irqd->chip_data;
-	int err, irq = irqd->irq;
+	int err;
 
 	if (!IS_ENABLED(CONFIG_SMP))
 		return -EPERM;
@@ -609,7 +547,7 @@ static int apic_set_affinity(struct irq_data *irqd,
 	if (!cpumask_intersects(dest, cpu_online_mask))
 		return -EINVAL;
 
-	err = assign_irq_vector(irq, apicd, dest, irqd);
+	err = assign_irq_vector(irqd, dest);
 	return err ? err : IRQ_SET_MASK_OK;
 }
 
@@ -622,6 +560,19 @@ static struct irq_chip lapic_controller = {
 
 #ifdef CONFIG_SMP
 
+static void free_moved_vector(struct apic_chip_data *apicd)
+{
+	unsigned int vector = apicd->cfg.old_vector;
+	unsigned int cpu = apicd->prev_cpu;
+
+	trace_vector_free_moved(apicd->irq, vector, false);
+	irq_matrix_free(vector_matrix, cpu, vector, false);
+	__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+	hlist_del_init(&apicd->clist);
+	apicd->cfg.old_vector = 0;
+	apicd->move_in_progress = 0;
+}
+
 asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
 {
 	struct hlist_head *clhead = this_cpu_ptr(&cleanup_list);
@@ -649,9 +600,7 @@ asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
 			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
 			continue;
 		}
-		hlist_del_init(&apicd->clist);
-		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
-		apicd->cfg.old_vector = 0;
+		free_moved_vector(apicd);
 	}
 
 	raw_spin_unlock(&vector_lock);
@@ -786,12 +735,7 @@ void irq_force_complete_move(struct irq_desc *desc)
 		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
 			irqd->irq, vector);
 	}
-	per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
-	/* Cleanup the left overs of the (half finished) move */
-	cpumask_clear(apicd->old_domain);
-	apicd->cfg.old_vector = 0;
-	apicd->move_in_progress = 0;
-	hlist_del_init(&apicd->clist);
+	free_moved_vector(apicd);
 unlock:
 	raw_spin_unlock(&vector_lock);
 }

From baab1e84b1124bfd3e40ef6c8e05b2a15136e3d5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:43 +0200
Subject: [PATCH 41/60] x86/apic: Remove unused callbacks

Now that the old allocator is gone, these apic functions are unused. Remove
them.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213155.524662349@linutronix.de
---
 arch/x86/kernel/apic/apic_common.c    | 48 ---------------------------
 arch/x86/kernel/apic/apic_flat_64.c   |  4 ---
 arch/x86/kernel/apic/apic_noop.c      | 10 ------
 arch/x86/kernel/apic/apic_numachip.c  |  4 ---
 arch/x86/kernel/apic/bigsmp_32.c      |  2 --
 arch/x86/kernel/apic/probe_32.c       |  2 --
 arch/x86/kernel/apic/x2apic_cluster.c | 48 ---------------------------
 arch/x86/kernel/apic/x2apic_phys.c    |  2 --
 arch/x86/kernel/apic/x2apic_uv_x.c    | 14 --------
 arch/x86/kernel/vsmp_64.c             | 19 -----------
 arch/x86/xen/apic.c                   |  2 --
 11 files changed, 155 deletions(-)

diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c
index ddc6a4301588..a360801779ae 100644
--- a/arch/x86/kernel/apic/apic_common.c
+++ b/arch/x86/kernel/apic/apic_common.c
@@ -11,64 +11,16 @@ u32 apic_default_calc_apicid(unsigned int cpu)
 	return per_cpu(x86_cpu_to_apicid, cpu);
 }
 
-int default_cpu_mask_to_apicid(const struct cpumask *msk, struct irq_data *irqd,
-			       unsigned int *apicid)
-{
-	unsigned int cpu = cpumask_first(msk);
-
-	if (cpu >= nr_cpu_ids)
-		return -EINVAL;
-	*apicid = per_cpu(x86_cpu_to_apicid, cpu);
-	irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
-	return 0;
-}
-
 u32 apic_flat_calc_apicid(unsigned int cpu)
 {
 	return 1U << cpu;
 }
 
-int flat_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqd,
-			    unsigned int *apicid)
-
-{
-	struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqd);
-	unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS;
-
-	if (!cpu_mask)
-		return -EINVAL;
-	*apicid = (unsigned int)cpu_mask;
-	cpumask_bits(effmsk)[0] = cpu_mask;
-	return 0;
-}
-
 bool default_check_apicid_used(physid_mask_t *map, int apicid)
 {
 	return physid_isset(apicid, *map);
 }
 
-void flat_vector_allocation_domain(int cpu, struct cpumask *retmask,
-				   const struct cpumask *mask)
-{
-	/*
-	 * Careful. Some cpus do not strictly honor the set of cpus
-	 * specified in the interrupt destination when using lowest
-	 * priority interrupt delivery mode.
-	 *
-	 * In particular there was a hyperthreading cpu observed to
-	 * deliver interrupts to the wrong hyperthread when only one
-	 * hyperthread was specified in the interrupt desitination.
-	 */
-	cpumask_clear(retmask);
-	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
-void default_vector_allocation_domain(int cpu, struct cpumask *retmask,
-				      const struct cpumask *mask)
-{
-	cpumask_copy(retmask, cpumask_of(cpu));
-}
-
 void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
 {
 	*retmap = *phys_map;
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 697704443fda..aa85690e9b64 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -158,7 +158,6 @@ static struct apic apic_flat __ro_after_init = {
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
 
-	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= flat_init_apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -171,7 +170,6 @@ static struct apic apic_flat __ro_after_init = {
 	.get_apic_id			= flat_get_apic_id,
 	.set_apic_id			= set_apic_id,
 
-	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
 	.calc_dest_apicid		= apic_flat_calc_apicid,
 
 	.send_IPI			= default_send_IPI_single,
@@ -253,7 +251,6 @@ static struct apic apic_physflat __ro_after_init = {
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
 
-	.vector_allocation_domain	= default_vector_allocation_domain,
 	/* not needed, but shouldn't hurt: */
 	.init_apic_ldr			= flat_init_apic_ldr,
 
@@ -267,7 +264,6 @@ static struct apic apic_physflat __ro_after_init = {
 	.get_apic_id			= flat_get_apic_id,
 	.set_apic_id			= set_apic_id,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
 	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= default_send_IPI_single_phys,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index d8c24e6f1a11..0285f28d531a 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -83,14 +83,6 @@ static int noop_apic_id_registered(void)
 	return physid_isset(0, phys_cpu_present_map);
 }
 
-static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
-					  const struct cpumask *mask)
-{
-	if (cpu != 0)
-		pr_warning("APIC: Vector allocated for non-BSP cpu\n");
-	cpumask_copy(retmask, cpumask_of(cpu));
-}
-
 static u32 noop_apic_read(u32 reg)
 {
 	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
@@ -125,7 +117,6 @@ struct apic apic_noop __ro_after_init = {
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= default_check_apicid_used,
 
-	.vector_allocation_domain	= noop_vector_allocation_domain,
 	.init_apic_ldr			= noop_init_apic_ldr,
 
 	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
@@ -141,7 +132,6 @@ struct apic apic_noop __ro_after_init = {
 	.get_apic_id			= noop_get_apic_id,
 	.set_apic_id			= NULL,
 
-	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
 	.calc_dest_apicid		= apic_flat_calc_apicid,
 
 	.send_IPI			= noop_send_IPI,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 4ec293b30eb8..134e04506ab4 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -253,7 +253,6 @@ static const struct apic apic_numachip1 __refconst = {
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
 
-	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= flat_init_apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -266,7 +265,6 @@ static const struct apic apic_numachip1 __refconst = {
 	.get_apic_id			= numachip1_get_apic_id,
 	.set_apic_id			= numachip1_set_apic_id,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
 	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= numachip_send_IPI_one,
@@ -304,7 +302,6 @@ static const struct apic apic_numachip2 __refconst = {
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
 
-	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= flat_init_apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -317,7 +314,6 @@ static const struct apic apic_numachip2 __refconst = {
 	.get_apic_id			= numachip2_get_apic_id,
 	.set_apic_id			= numachip2_set_apic_id,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
 	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= numachip_send_IPI_one,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index de2e8597f2df..7b754c513fa5 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -158,7 +158,6 @@ static struct apic apic_bigsmp __ro_after_init = {
 	.dest_logical			= 0,
 	.check_apicid_used		= bigsmp_check_apicid_used,
 
-	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= bigsmp_init_apic_ldr,
 
 	.ioapic_phys_id_map		= bigsmp_ioapic_phys_id_map,
@@ -171,7 +170,6 @@ static struct apic apic_bigsmp __ro_after_init = {
 	.get_apic_id			= bigsmp_get_apic_id,
 	.set_apic_id			= NULL,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
 	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= default_send_IPI_single_phys,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 6a9020a3c243..fa22017de806 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -113,7 +113,6 @@ static struct apic apic_default __ro_after_init = {
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= default_check_apicid_used,
 
-	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= default_init_apic_ldr,
 
 	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
@@ -126,7 +125,6 @@ static struct apic apic_default __ro_after_init = {
 	.get_apic_id			= default_get_apic_id,
 	.set_apic_id			= NULL,
 
-	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
 	.calc_dest_apicid		= apic_flat_calc_apicid,
 
 	.send_IPI			= default_send_IPI_single,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 17bf63f580d7..3da94277140f 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -91,29 +91,6 @@ static void x2apic_send_IPI_all(int vector)
 	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
 }
 
-static int
-x2apic_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
-			  unsigned int *apicid)
-{
-	struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
-	struct cluster_mask *cmsk;
-	unsigned int cpu;
-	u32 dest = 0;
-
-	cpu = cpumask_first(mask);
-	if (cpu >= nr_cpu_ids)
-		return -EINVAL;
-
-	cmsk = per_cpu(cluster_masks, cpu);
-	cpumask_clear(effmsk);
-	for_each_cpu_and(cpu, &cmsk->mask, mask) {
-		dest |= per_cpu(x86_cpu_to_logical_apicid, cpu);
-		cpumask_set_cpu(cpu, effmsk);
-	}
-	*apicid = dest;
-	return 0;
-}
-
 static u32 x2apic_calc_apicid(unsigned int cpu)
 {
 	return per_cpu(x86_cpu_to_logical_apicid, cpu);
@@ -198,29 +175,6 @@ static int x2apic_cluster_probe(void)
 	return 1;
 }
 
-/*
- * Each x2apic cluster is an allocation domain.
- */
-static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
-					     const struct cpumask *mask)
-{
-	struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu);
-
-	/*
-	 * To minimize vector pressure, default case of boot, device bringup
-	 * etc will use a single cpu for the interrupt destination.
-	 *
-	 * On explicit migration requests coming from irqbalance etc,
-	 * interrupts will be routed to the x2apic cluster (cluster-id
-	 * derived from the first cpu in the mask) members specified
-	 * in the mask.
-	 */
-	if (cpumask_equal(mask, cpu_online_mask))
-		cpumask_copy(retmask, cpumask_of(cpu));
-	else
-		cpumask_and(retmask, mask, &cmsk->mask);
-}
-
 static struct apic apic_x2apic_cluster __ro_after_init = {
 
 	.name				= "cluster x2apic",
@@ -236,7 +190,6 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
 
-	.vector_allocation_domain	= cluster_vector_allocation_domain,
 	.init_apic_ldr			= init_x2apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -249,7 +202,6 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= x2apic_set_apic_id,
 
-	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid,
 	.calc_dest_apicid		= x2apic_calc_apicid,
 
 	.send_IPI			= x2apic_send_IPI,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index ebad7ddbfdfc..17c2c5b0b7b9 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -151,7 +151,6 @@ static struct apic apic_x2apic_phys __ro_after_init = {
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
 
-	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= init_x2apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -164,7 +163,6 @@ static struct apic apic_x2apic_phys __ro_after_init = {
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= x2apic_set_apic_id,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
 	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= x2apic_send_IPI,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 99c3c039646d..5832df6d9c37 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -525,18 +525,6 @@ static void uv_init_apic_ldr(void)
 {
 }
 
-static int
-uv_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
-		      unsigned int *apicid)
-{
-	int ret = default_cpu_mask_to_apicid(mask, irqdata, apicid);
-
-	if (!ret)
-		*apicid |= uv_apicid_hibits;
-
-	return ret;
-}
-
 static u32 apic_uv_calc_apicid(unsigned int cpu)
 {
 	return apic_default_calc_apicid(cpu) | uv_apicid_hibits;
@@ -593,7 +581,6 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
 
-	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= uv_init_apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -606,7 +593,6 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= set_apic_id,
 
-	.cpu_mask_to_apicid		= uv_cpu_mask_to_apicid,
 	.calc_dest_apicid		= apic_uv_calc_apicid,
 
 	.send_IPI			= uv_send_IPI_one,
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index b034b1b14b9c..44685fb2a192 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -26,9 +26,6 @@
 
 #define TOPOLOGY_REGISTER_OFFSET 0x10
 
-/* Flag below is initialized once during vSMP PCI initialization. */
-static int irq_routing_comply = 1;
-
 #if defined CONFIG_PCI && defined CONFIG_PARAVIRT
 /*
  * Interrupt control on vSMPowered systems:
@@ -105,9 +102,6 @@ static void __init set_vsmp_pv_ops(void)
 	if (cap & ctl & BIT(8)) {
 		ctl &= ~BIT(8);
 
-		/* Interrupt routing set to ignore */
-		irq_routing_comply = 0;
-
 #ifdef CONFIG_PROC_FS
 		/* Don't let users change irq affinity via procfs */
 		no_irq_affinity = 1;
@@ -211,23 +205,10 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
 	return hard_smp_processor_id() >> index_msb;
 }
 
-/*
- * In vSMP, all cpus should be capable of handling interrupts, regardless of
- * the APIC used.
- */
-static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask,
-					  const struct cpumask *mask)
-{
-	cpumask_setall(retmask);
-}
-
 static void vsmp_apic_post_init(void)
 {
 	/* need to update phys_pkg_id */
 	apic->phys_pkg_id = apicid_phys_pkg_id;
-
-	if (!irq_routing_comply)
-		apic->vector_allocation_domain = fill_vector_allocation_domain;
 }
 
 void __init vsmp_init(void)
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index fb8522bed08c..4ba3fd7039b0 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -164,7 +164,6 @@ static struct apic xen_pv_apic = {
 	/* .dest_logical      -  default_send_IPI_ use it but we use our own. */
 	.check_apicid_used		= default_check_apicid_used, /* Used on 32-bit */
 
-	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= xen_noop, /* setup_local_APIC calls it */
 
 	.ioapic_phys_id_map		= default_ioapic_phys_id_map, /* Used on 32-bit */
@@ -177,7 +176,6 @@ static struct apic xen_pv_apic = {
 	.get_apic_id 			= xen_get_apic_id,
 	.set_apic_id 			= xen_set_apic_id, /* Can be NULL on 32-bit. */
 
-	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
 	.calc_dest_apicid		= apic_flat_calc_apicid,
 
 #ifdef CONFIG_SMP

From ba801640b10d87b1c4e26cbcbe414a001255404f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:44 +0200
Subject: [PATCH 42/60] x86/vector: Compile SMP only code conditionally

No point in compiling this for UP.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213155.603191841@linutronix.de
---
 arch/x86/kernel/apic/vector.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 88219b80d9ec..17d7d7fd45d9 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -450,6 +450,7 @@ int __init arch_early_irq_init(void)
 	return arch_early_ioapic_init();
 }
 
+#ifdef CONFIG_SMP
 /* Temporary hack to keep things working */
 static void vector_update_shutdown_irqs(void)
 {
@@ -517,6 +518,25 @@ void lapic_offline(void)
 	unlock_vector_lock();
 }
 
+static int apic_set_affinity(struct irq_data *irqd,
+			     const struct cpumask *dest, bool force)
+{
+	int err;
+
+	if (!IS_ENABLED(CONFIG_SMP))
+		return -EPERM;
+
+	if (!cpumask_intersects(dest, cpu_online_mask))
+		return -EINVAL;
+
+	err = assign_irq_vector(irqd, dest);
+	return err ? err : IRQ_SET_MASK_OK;
+}
+
+#else
+# define apic_set_affinity	NULL
+#endif
+
 static int apic_retrigger_irq(struct irq_data *irqd)
 {
 	struct apic_chip_data *apicd = apic_chip_data(irqd);
@@ -536,21 +556,6 @@ void apic_ack_edge(struct irq_data *irqd)
 	ack_APIC_irq();
 }
 
-static int apic_set_affinity(struct irq_data *irqd,
-			     const struct cpumask *dest, bool force)
-{
-	int err;
-
-	if (!IS_ENABLED(CONFIG_SMP))
-		return -EPERM;
-
-	if (!cpumask_intersects(dest, cpu_online_mask))
-		return -EINVAL;
-
-	err = assign_irq_vector(irqd, dest);
-	return err ? err : IRQ_SET_MASK_OK;
-}
-
 static struct irq_chip lapic_controller = {
 	.name			= "APIC",
 	.irq_ack		= apic_ack_edge,

From ba224feac8bb367edd62da33552353d4bdc3fe3a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:45 +0200
Subject: [PATCH 43/60] x86/vector: Untangle internal state from irq_cfg

The vector management state is not required to live in irq_cfg. irq_cfg is
only relevant for the depending irq domains (IOAPIC, DMAR, MSI ...).

The seperation of the vector management status allows to direct a shut down
interrupt to a special shutdown vector w/o confusing the internal state of
the vector management.

Preparatory change for the rework of managed interrupts and the global
vector reservation scheme.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213155.683712356@linutronix.de
---
 arch/x86/include/asm/hw_irq.h |  3 +-
 arch/x86/kernel/apic/vector.c | 88 +++++++++++++++++++----------------
 2 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 386368890376..661540a93072 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -124,8 +124,7 @@ struct irq_alloc_info {
 
 struct irq_cfg {
 	unsigned int		dest_apicid;
-	u8			vector;
-	u8			old_vector;
+	unsigned int		vector;
 };
 
 extern struct irq_cfg *irq_cfg(unsigned int irq);
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 17d7d7fd45d9..f08d44fabef4 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -25,7 +25,9 @@
 #include <asm/trace/irq_vectors.h>
 
 struct apic_chip_data {
-	struct irq_cfg		cfg;
+	struct irq_cfg		hw_irq_cfg;
+	unsigned int		vector;
+	unsigned int		prev_vector;
 	unsigned int		cpu;
 	unsigned int		prev_cpu;
 	unsigned int		irq;
@@ -86,7 +88,7 @@ struct irq_cfg *irqd_cfg(struct irq_data *irqd)
 {
 	struct apic_chip_data *apicd = apic_chip_data(irqd);
 
-	return apicd ? &apicd->cfg : NULL;
+	return apicd ? &apicd->hw_irq_cfg : NULL;
 }
 EXPORT_SYMBOL_GPL(irqd_cfg);
 
@@ -110,16 +112,18 @@ static void free_apic_chip_data(struct apic_chip_data *apicd)
 	kfree(apicd);
 }
 
-static void apic_update_irq_cfg(struct irq_data *irqd)
+static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector,
+				unsigned int cpu)
 {
 	struct apic_chip_data *apicd = apic_chip_data(irqd);
 
 	lockdep_assert_held(&vector_lock);
 
-	apicd->cfg.dest_apicid = apic->calc_dest_apicid(apicd->cpu);
-	irq_data_update_effective_affinity(irqd, cpumask_of(apicd->cpu));
-	trace_vector_config(irqd->irq, apicd->cfg.vector, apicd->cpu,
-			    apicd->cfg.dest_apicid);
+	apicd->hw_irq_cfg.vector = vector;
+	apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu);
+	irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
+	trace_vector_config(irqd->irq, vector, cpu,
+			    apicd->hw_irq_cfg.dest_apicid);
 }
 
 static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
@@ -130,19 +134,19 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
 
 	lockdep_assert_held(&vector_lock);
 
-	trace_vector_update(irqd->irq, newvec, newcpu, apicd->cfg.vector,
+	trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,
 			    apicd->cpu);
 
 	/* Setup the vector move, if required  */
-	if (apicd->cfg.vector && cpu_online(apicd->cpu)) {
+	if (apicd->vector && cpu_online(apicd->cpu)) {
 		apicd->move_in_progress = true;
-		apicd->cfg.old_vector = apicd->cfg.vector;
+		apicd->prev_vector = apicd->vector;
 		apicd->prev_cpu = apicd->cpu;
 	} else {
-		apicd->cfg.old_vector = 0;
+		apicd->prev_vector = 0;
 	}
 
-	apicd->cfg.vector = newvec;
+	apicd->vector = newvec;
 	apicd->cpu = newcpu;
 	BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
 	per_cpu(vector_irq, newcpu)[newvec] = desc;
@@ -151,8 +155,10 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
 static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
 {
 	struct apic_chip_data *apicd = apic_chip_data(irqd);
-	int vector = apicd->cfg.vector;
 	unsigned int cpu = apicd->cpu;
+	int vector = apicd->vector;
+
+	lockdep_assert_held(&vector_lock);
 
 	/*
 	 * If the current target CPU is online and in the new requested
@@ -172,12 +178,13 @@ static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
 static int assign_vector_locked(struct irq_data *irqd,
 				const struct cpumask *dest)
 {
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
 	int vector = allocate_vector(irqd, dest);
 
 	if (vector < 0)
 		return vector;
 
-	apic_update_irq_cfg(irqd);
+	apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
 	return 0;
 }
 
@@ -207,27 +214,28 @@ static int assign_irq_vector_policy(struct irq_data *irqd,
 static void clear_irq_vector(struct irq_data *irqd)
 {
 	struct apic_chip_data *apicd = apic_chip_data(irqd);
-	unsigned int vector = apicd->cfg.vector;
+	unsigned int vector = apicd->vector;
 
 	lockdep_assert_held(&vector_lock);
+
 	if (!vector)
 		return;
 
-	trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->cfg.old_vector,
+	trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
 			   apicd->prev_cpu);
 
 	per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
 	irq_matrix_free(vector_matrix, apicd->cpu, vector, false);
-	apicd->cfg.vector = 0;
+	apicd->vector = 0;
 
 	/* Clean up move in progress */
-	vector = apicd->cfg.old_vector;
+	vector = apicd->prev_vector;
 	if (!vector)
 		return;
 
 	per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
 	irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, false);
-	apicd->cfg.old_vector = 0;
+	apicd->prev_vector = 0;
 	apicd->move_in_progress = 0;
 	hlist_del_init(&apicd->clist);
 }
@@ -293,11 +301,11 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 		 * config.
 		 */
 		if (info->flags & X86_IRQ_ALLOC_LEGACY) {
-			apicd->cfg.vector = ISA_IRQ_VECTOR(virq + i);
+			apicd->vector = ISA_IRQ_VECTOR(virq + i);
 			apicd->cpu = 0;
 			trace_vector_setup(virq + i, true, 0);
 			raw_spin_lock_irqsave(&vector_lock, flags);
-			apic_update_irq_cfg(irqd);
+			apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
 			raw_spin_unlock_irqrestore(&vector_lock, flags);
 			continue;
 		}
@@ -319,7 +327,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
 			   struct irq_data *irqd, int ind)
 {
-	unsigned int cpu, vec, prev_cpu, prev_vec;
+	unsigned int cpu, vector, prev_cpu, prev_vector;
 	struct apic_chip_data *apicd;
 	unsigned long flags;
 	int irq;
@@ -344,14 +352,14 @@ void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
 	cpu = apicd->cpu;
-	vec = apicd->cfg.vector;
+	vector = apicd->vector;
 	prev_cpu = apicd->prev_cpu;
-	prev_vec = apicd->cfg.old_vector;
+	prev_vector = apicd->prev_vector;
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
-	seq_printf(m, "%*sVector: %5u\n", ind, "", vec);
+	seq_printf(m, "%*sVector: %5u\n", ind, "", vector);
 	seq_printf(m, "%*sTarget: %5u\n", ind, "", cpu);
-	if (prev_vec) {
-		seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", prev_vec);
+	if (prev_vector) {
+		seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", prev_vector);
 		seq_printf(m, "%*sPrevious target: %5u\n", ind, "", prev_cpu);
 	}
 }
@@ -461,10 +469,10 @@ static void vector_update_shutdown_irqs(void)
 		struct irq_data *irqd = irq_desc_get_irq_data(desc);
 		struct apic_chip_data *ad = apic_chip_data(irqd);
 
-		if (!ad || !ad->cfg.vector || ad->cpu != smp_processor_id())
+		if (!ad || !ad->vector || ad->cpu != smp_processor_id())
 			continue;
-		this_cpu_write(vector_irq[ad->cfg.vector], desc);
-		irq_matrix_assign(vector_matrix, ad->cfg.vector);
+		this_cpu_write(vector_irq[ad->vector], desc);
+		irq_matrix_assign(vector_matrix, ad->vector);
 	}
 }
 
@@ -543,7 +551,7 @@ static int apic_retrigger_irq(struct irq_data *irqd)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
-	apic->send_IPI(apicd->cpu, apicd->cfg.vector);
+	apic->send_IPI(apicd->cpu, apicd->vector);
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
 
 	return 1;
@@ -567,14 +575,14 @@ static struct irq_chip lapic_controller = {
 
 static void free_moved_vector(struct apic_chip_data *apicd)
 {
-	unsigned int vector = apicd->cfg.old_vector;
+	unsigned int vector = apicd->prev_vector;
 	unsigned int cpu = apicd->prev_cpu;
 
 	trace_vector_free_moved(apicd->irq, vector, false);
 	irq_matrix_free(vector_matrix, cpu, vector, false);
 	__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
 	hlist_del_init(&apicd->clist);
-	apicd->cfg.old_vector = 0;
+	apicd->prev_vector = 0;
 	apicd->move_in_progress = 0;
 }
 
@@ -589,7 +597,7 @@ asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
 	raw_spin_lock(&vector_lock);
 
 	hlist_for_each_entry_safe(apicd, tmp, clhead, clist) {
-		unsigned int irr, vector = apicd->cfg.old_vector;
+		unsigned int irr, vector = apicd->prev_vector;
 
 		/*
 		 * Paranoia: Check if the vector that needs to be cleaned
@@ -623,7 +631,7 @@ static void __send_cleanup_vector(struct apic_chip_data *apicd)
 		hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
 		apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
 	} else {
-		apicd->cfg.old_vector = 0;
+		apicd->prev_vector = 0;
 	}
 	raw_spin_unlock(&vector_lock);
 }
@@ -632,7 +640,7 @@ void send_cleanup_vector(struct irq_cfg *cfg)
 {
 	struct apic_chip_data *apicd;
 
-	apicd = container_of(cfg, struct apic_chip_data, cfg);
+	apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
 	if (apicd->move_in_progress)
 		__send_cleanup_vector(apicd);
 }
@@ -641,11 +649,11 @@ static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
 {
 	struct apic_chip_data *apicd;
 
-	apicd = container_of(cfg, struct apic_chip_data, cfg);
+	apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
 	if (likely(!apicd->move_in_progress))
 		return;
 
-	if (vector == apicd->cfg.vector && apicd->cpu == smp_processor_id())
+	if (vector == apicd->vector && apicd->cpu == smp_processor_id())
 		__send_cleanup_vector(apicd);
 }
 
@@ -683,9 +691,9 @@ void irq_force_complete_move(struct irq_desc *desc)
 		goto unlock;
 
 	/*
-	 * If old_vector is empty, no action required.
+	 * If prev_vector is empty, no action required.
 	 */
-	vector = apicd->cfg.old_vector;
+	vector = apicd->prev_vector;
 	if (!vector)
 		goto unlock;
 

From 2a85386a73fa57b114ba66421b57d3850dbcef9f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:46 +0200
Subject: [PATCH 44/60] x86/apic/msi: Force reactivation of interrupts at
 startup time

MSI(X) interrupts need a valid vector configuration early at allocation
time, i.e. before the PCI core enables MSI(X).

With managed interrupts and the new global reservation scheme, the early
configuration will not assign a real device vector, but a special shutdown
vector. When the irq is started up, then the interrupt must be
reconfigured. Tell the MSI irqdomain core about it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213155.774066582@linutronix.de
---
 arch/x86/kernel/apic/msi.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 9b18be764422..5b6dd1a85ec4 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -129,7 +129,7 @@ static struct msi_domain_ops pci_msi_domain_ops = {
 
 static struct msi_domain_info pci_msi_domain_info = {
 	.flags		= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
-			  MSI_FLAG_PCI_MSIX,
+			  MSI_FLAG_PCI_MSIX | MSI_FLAG_MUST_REACTIVATE,
 	.ops		= &pci_msi_domain_ops,
 	.chip		= &pci_msi_controller,
 	.handler	= handle_edge_irq,
@@ -167,7 +167,8 @@ static struct irq_chip pci_msi_ir_controller = {
 
 static struct msi_domain_info pci_msi_ir_domain_info = {
 	.flags		= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
-			  MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
+			  MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX |
+			  MSI_FLAG_MUST_REACTIVATE,
 	.ops		= &pci_msi_domain_ops,
 	.chip		= &pci_msi_ir_controller,
 	.handler	= handle_edge_irq,

From d491bdff888e8a287f6017c70a8dd10f46984851 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:47 +0200
Subject: [PATCH 45/60] iommu/vt-d: Reevaluate vector configuration on
 activate()

With the upcoming reservation/management scheme, early activation will
assign a special vector. The final activation at request_irq() assigns a
real vector, which needs to be updated in the tables.

Split out the reconfiguration code in set_affinity and use it for
reactivation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: iommu@lists.linux-foundation.org
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213155.853028808@linutronix.de
---
 drivers/iommu/intel_irq_remapping.c | 38 ++++++++++++++++-------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 762d84713b7a..e274d9d12ba4 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1121,6 +1121,24 @@ struct irq_remap_ops intel_irq_remap_ops = {
 	.get_irq_domain		= intel_get_irq_domain,
 };
 
+static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
+{
+	struct intel_ir_data *ir_data = irqd->chip_data;
+	struct irte *irte = &ir_data->irte_entry;
+	struct irq_cfg *cfg = irqd_cfg(irqd);
+
+	/*
+	 * Atomically updates the IRTE with the new destination, vector
+	 * and flushes the interrupt entry cache.
+	 */
+	irte->vector = cfg->vector;
+	irte->dest_id = IRTE_DEST(cfg->dest_apicid);
+
+	/* Update the hardware only if the interrupt is in remapped mode. */
+	if (!force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
+		modify_irte(&ir_data->irq_2_iommu, irte);
+}
+
 /*
  * Migrate the IO-APIC irq in the presence of intr-remapping.
  *
@@ -1139,27 +1157,15 @@ static int
 intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask,
 		      bool force)
 {
-	struct intel_ir_data *ir_data = data->chip_data;
-	struct irte *irte = &ir_data->irte_entry;
-	struct irq_cfg *cfg = irqd_cfg(data);
 	struct irq_data *parent = data->parent_data;
+	struct irq_cfg *cfg = irqd_cfg(data);
 	int ret;
 
 	ret = parent->chip->irq_set_affinity(parent, mask, force);
 	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
 		return ret;
 
-	/*
-	 * Atomically updates the IRTE with the new destination, vector
-	 * and flushes the interrupt entry cache.
-	 */
-	irte->vector = cfg->vector;
-	irte->dest_id = IRTE_DEST(cfg->dest_apicid);
-
-	/* Update the hardware only if the interrupt is in remapped mode. */
-	if (ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
-		modify_irte(&ir_data->irq_2_iommu, irte);
-
+	intel_ir_reconfigure_irte(data, false);
 	/*
 	 * After this point, all the interrupts will start arriving
 	 * at the new destination. So, time to cleanup the previous
@@ -1392,9 +1398,7 @@ static void intel_irq_remapping_free(struct irq_domain *domain,
 static int intel_irq_remapping_activate(struct irq_domain *domain,
 					struct irq_data *irq_data, bool early)
 {
-	struct intel_ir_data *data = irq_data->chip_data;
-
-	modify_irte(&data->irq_2_iommu, &data->irte_entry);
+	intel_ir_reconfigure_irte(irq_data, true);
 	return 0;
 }
 

From 5ba204a1817ba95a7b24dbe8ef2c7ddd4cea886e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:48 +0200
Subject: [PATCH 46/60] iommu/amd: Reevaluate vector configuration on
 activate()

With the upcoming reservation/management scheme, early activation will
assign a special vector. The final activation at request_irq() assigns a
real vector, which needs to be updated in the tables.

Split out the reconfiguration code in set_affinity and use it for
reactivation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: iommu@lists.linux-foundation.org
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213155.944883733@linutronix.de
---
 drivers/iommu/amd_iommu.c | 39 +++++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index ea03f4138f5f..a78fa34f113a 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4170,16 +4170,25 @@ static void irq_remapping_free(struct irq_domain *domain, unsigned int virq,
 	irq_domain_free_irqs_common(domain, virq, nr_irqs);
 }
 
+static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
+			       struct amd_ir_data *ir_data,
+			       struct irq_2_irte *irte_info,
+			       struct irq_cfg *cfg);
+
 static int irq_remapping_activate(struct irq_domain *domain,
 				  struct irq_data *irq_data, bool early)
 {
 	struct amd_ir_data *data = irq_data->chip_data;
 	struct irq_2_irte *irte_info = &data->irq_2_irte;
 	struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
+	struct irq_cfg *cfg = irqd_cfg(irq_data);
 
-	if (iommu)
-		iommu->irte_ops->activate(data->entry, irte_info->devid,
-					  irte_info->index);
+	if (!iommu)
+		return 0;
+
+	iommu->irte_ops->activate(data->entry, irte_info->devid,
+				  irte_info->index);
+	amd_ir_update_irte(irq_data, iommu, data, irte_info, cfg);
 	return 0;
 }
 
@@ -4267,6 +4276,22 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
 	return modify_irte_ga(irte_info->devid, irte_info->index, irte, ir_data);
 }
 
+
+static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
+			       struct amd_ir_data *ir_data,
+			       struct irq_2_irte *irte_info,
+			       struct irq_cfg *cfg)
+{
+
+	/*
+	 * Atomically updates the IRTE with the new destination, vector
+	 * and flushes the interrupt entry cache.
+	 */
+	iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid,
+				      irte_info->index, cfg->vector,
+				      cfg->dest_apicid);
+}
+
 static int amd_ir_set_affinity(struct irq_data *data,
 			       const struct cpumask *mask, bool force)
 {
@@ -4284,13 +4309,7 @@ static int amd_ir_set_affinity(struct irq_data *data,
 	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
 		return ret;
 
-	/*
-	 * Atomically updates the IRTE with the new destination, vector
-	 * and flushes the interrupt entry cache.
-	 */
-	iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid,
-			    irte_info->index, cfg->vector, cfg->dest_apicid);
-
+	amd_ir_update_irte(data, iommu, ir_data, irte_info, cfg);
 	/*
 	 * After this point, all the interrupts will start arriving
 	 * at the new destination. So, time to cleanup the previous

From 90ad9e2d91067983f3328e21b306323877e5f48a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:49 +0200
Subject: [PATCH 47/60] x86/io_apic: Reevaluate vector configuration on
 activate()

With the upcoming reservation/management scheme, early activation will
assign a special vector. The final activation at request_irq() assigns a
real vector, which needs to be updated in the ioapic.

Split out the reconfiguration code in set_affinity and use it for
reactivation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213156.025232175@linutronix.de
---
 arch/x86/kernel/apic/io_apic.c | 37 ++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index a4b0c60ab8e1..18c6a4861586 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1862,26 +1862,36 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data)
 	eoi_ioapic_pin(data->entry.vector, data);
 }
 
+static void ioapic_configure_entry(struct irq_data *irqd)
+{
+	struct mp_chip_data *mpd = irqd->chip_data;
+	struct irq_cfg *cfg = irqd_cfg(irqd);
+	struct irq_pin_list *entry;
+
+	/*
+	 * Only update when the parent is the vector domain, don't touch it
+	 * if the parent is the remapping domain. Check the installed
+	 * ioapic chip to verify that.
+	 */
+	if (irqd->chip == &ioapic_chip) {
+		mpd->entry.dest = cfg->dest_apicid;
+		mpd->entry.vector = cfg->vector;
+	}
+	for_each_irq_pin(entry, mpd->irq_2_pin)
+		__ioapic_write_entry(entry->apic, entry->pin, mpd->entry);
+}
+
 static int ioapic_set_affinity(struct irq_data *irq_data,
 			       const struct cpumask *mask, bool force)
 {
 	struct irq_data *parent = irq_data->parent_data;
-	struct mp_chip_data *data = irq_data->chip_data;
-	struct irq_pin_list *entry;
-	struct irq_cfg *cfg;
 	unsigned long flags;
 	int ret;
 
 	ret = parent->chip->irq_set_affinity(parent, mask, force);
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
-		cfg = irqd_cfg(irq_data);
-		data->entry.dest = cfg->dest_apicid;
-		data->entry.vector = cfg->vector;
-		for_each_irq_pin(entry, data->irq_2_pin)
-			__ioapic_write_entry(entry->apic, entry->pin,
-					     data->entry);
-	}
+	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE)
+		ioapic_configure_entry(irq_data);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return ret;
@@ -2980,12 +2990,9 @@ int mp_irqdomain_activate(struct irq_domain *domain,
 			  struct irq_data *irq_data, bool early)
 {
 	unsigned long flags;
-	struct irq_pin_list *entry;
-	struct mp_chip_data *data = irq_data->chip_data;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	for_each_irq_pin(entry, data->irq_2_pin)
-		__ioapic_write_entry(entry->apic, entry->pin, data->entry);
+	ioapic_configure_entry(irq_data);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 	return 0;
 }

From 2db1f959d9dc16035f2eb44ed5fdb2789b754d6a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:50 +0200
Subject: [PATCH 48/60] x86/vector: Handle managed interrupts proper

Managed interrupts need to reserve interrupt vectors permanently, but as
long as the interrupt is deactivated, the vector should not be active.

Reserve a new system vector, which can be used to initially initialize
MSI/DMAR/IOAPIC entries. In that situation the interrupts are disabled in
the corresponding MSI/DMAR/IOAPIC devices. So the vector should never be
sent to any CPU.

When the managed interrupt is started up, a real vector is assigned from
the managed vector space and configured in MSI/DMAR/IOAPIC.

This allows a clear separation of inactive and active modes and simplifies
the final decisions whether the global vector space is sufficient for CPU
offline operations.

The vector space can be reserved even on offline CPUs and will survive CPU
offline/online operations.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213156.104616625@linutronix.de
---
 arch/x86/include/asm/irq_vectors.h |   8 +-
 arch/x86/kernel/apic/vector.c      | 190 ++++++++++++++++++++++++++---
 2 files changed, 174 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index aaf8d28b5d00..1e9bd28f842d 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -101,12 +101,8 @@
 #define POSTED_INTR_NESTED_VECTOR	0xf0
 #endif
 
-/*
- * Local APIC timer IRQ vector is on a different priority level,
- * to work around the 'lost local interrupt if more than 2 IRQ
- * sources per level' errata.
- */
-#define LOCAL_TIMER_VECTOR		0xef
+#define MANAGED_IRQ_SHUTDOWN_VECTOR	0xef
+#define LOCAL_TIMER_VECTOR		0xee
 
 #define NR_VECTORS			 256
 
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f08d44fabef4..3f53572c89cb 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -32,7 +32,8 @@ struct apic_chip_data {
 	unsigned int		prev_cpu;
 	unsigned int		irq;
 	struct hlist_node	clist;
-	u8			move_in_progress : 1;
+	unsigned int		move_in_progress	: 1,
+				is_managed		: 1;
 };
 
 struct irq_domain *x86_vector_domain;
@@ -152,6 +153,28 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
 	per_cpu(vector_irq, newcpu)[newvec] = desc;
 }
 
+static void vector_assign_managed_shutdown(struct irq_data *irqd)
+{
+	unsigned int cpu = cpumask_first(cpu_online_mask);
+
+	apic_update_irq_cfg(irqd, MANAGED_IRQ_SHUTDOWN_VECTOR, cpu);
+}
+
+static int reserve_managed_vector(struct irq_data *irqd)
+{
+	const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	unsigned long flags;
+	int ret;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	apicd->is_managed = true;
+	ret = irq_matrix_reserve_managed(vector_matrix, affmsk);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	trace_vector_reserve_managed(irqd->irq, ret);
+	return ret;
+}
+
 static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
 {
 	struct apic_chip_data *apicd = apic_chip_data(irqd);
@@ -200,20 +223,65 @@ static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest)
 	return ret;
 }
 
-static int assign_irq_vector_policy(struct irq_data *irqd,
-				    struct irq_alloc_info *info, int node)
+static int assign_irq_vector_any_locked(struct irq_data *irqd)
 {
+	int node = irq_data_get_node(irqd);
+
+	if (node != NUMA_NO_NODE) {
+		if (!assign_vector_locked(irqd, cpumask_of_node(node)))
+			return 0;
+	}
+	return assign_vector_locked(irqd, cpu_online_mask);
+}
+
+static int assign_irq_vector_any(struct irq_data *irqd)
+{
+	unsigned long flags;
+	int ret;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	ret = assign_irq_vector_any_locked(irqd);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return ret;
+}
+
+static int
+assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info)
+{
+	if (irqd_affinity_is_managed(irqd))
+		return reserve_managed_vector(irqd);
 	if (info->mask)
 		return assign_irq_vector(irqd, info->mask);
-	if (node != NUMA_NO_NODE &&
-	    !assign_irq_vector(irqd, cpumask_of_node(node)))
+	return assign_irq_vector_any(irqd);
+}
+
+static int
+assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest)
+{
+	const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	int vector, cpu;
+
+	cpumask_and(vector_searchmask, vector_searchmask, affmsk);
+	cpu = cpumask_first(vector_searchmask);
+	if (cpu >= nr_cpu_ids)
+		return -EINVAL;
+	/* set_affinity might call here for nothing */
+	if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask))
 		return 0;
-	return assign_irq_vector(irqd, cpu_online_mask);
+	vector = irq_matrix_alloc_managed(vector_matrix, cpu);
+	trace_vector_alloc_managed(irqd->irq, vector, vector);
+	if (vector < 0)
+		return vector;
+	apic_update_vector(irqd, vector, cpu);
+	apic_update_irq_cfg(irqd, vector, cpu);
+	return 0;
 }
 
 static void clear_irq_vector(struct irq_data *irqd)
 {
 	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	bool managed = irqd_affinity_is_managed(irqd);
 	unsigned int vector = apicd->vector;
 
 	lockdep_assert_held(&vector_lock);
@@ -225,7 +293,7 @@ static void clear_irq_vector(struct irq_data *irqd)
 			   apicd->prev_cpu);
 
 	per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
-	irq_matrix_free(vector_matrix, apicd->cpu, vector, false);
+	irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
 	apicd->vector = 0;
 
 	/* Clean up move in progress */
@@ -234,12 +302,86 @@ static void clear_irq_vector(struct irq_data *irqd)
 		return;
 
 	per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
-	irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, false);
+	irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
 	apicd->prev_vector = 0;
 	apicd->move_in_progress = 0;
 	hlist_del_init(&apicd->clist);
 }
 
+static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	unsigned long flags;
+
+	trace_vector_deactivate(irqd->irq, apicd->is_managed,
+				false, false);
+
+	if (apicd->is_managed)
+		return;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	clear_irq_vector(irqd);
+	vector_assign_managed_shutdown(irqd);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+static int activate_managed(struct irq_data *irqd)
+{
+	const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
+	int ret;
+
+	cpumask_and(vector_searchmask, dest, cpu_online_mask);
+	if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) {
+		/* Something in the core code broke! Survive gracefully */
+		pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq);
+		return EINVAL;
+	}
+
+	ret = assign_managed_vector(irqd, vector_searchmask);
+	/*
+	 * This should not happen. The vector reservation got buggered.  Handle
+	 * it gracefully.
+	 */
+	if (WARN_ON_ONCE(ret < 0)) {
+		pr_err("Managed startup irq %u, no vector available\n",
+		       irqd->irq);
+	}
+       return ret;
+}
+
+static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
+			       bool early)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	unsigned long flags;
+	int ret = 0;
+
+	trace_vector_activate(irqd->irq, apicd->is_managed,
+				false, early);
+
+	if (!apicd->is_managed)
+		return 0;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	if (early || irqd_is_managed_and_shutdown(irqd))
+		vector_assign_managed_shutdown(irqd);
+	else
+		ret = activate_managed(irqd);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return ret;
+}
+
+static void vector_free_reserved_and_managed(struct irq_data *irqd)
+{
+	const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+
+	trace_vector_teardown(irqd->irq, apicd->is_managed, false);
+
+	if (apicd->is_managed)
+		irq_matrix_remove_managed(vector_matrix, dest);
+}
+
 static void x86_vector_free_irqs(struct irq_domain *domain,
 				 unsigned int virq, unsigned int nr_irqs)
 {
@@ -253,6 +395,7 @@ static void x86_vector_free_irqs(struct irq_domain *domain,
 		if (irqd && irqd->chip_data) {
 			raw_spin_lock_irqsave(&vector_lock, flags);
 			clear_irq_vector(irqd);
+			vector_free_reserved_and_managed(irqd);
 			apicd = irqd->chip_data;
 			irq_domain_reset_irq_data(irqd);
 			raw_spin_unlock_irqrestore(&vector_lock, flags);
@@ -310,7 +453,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 			continue;
 		}
 
-		err = assign_irq_vector_policy(irqd, info, node);
+		err = assign_irq_vector_policy(irqd, info);
 		trace_vector_setup(virq + i, false, err);
 		if (err)
 			goto error;
@@ -368,6 +511,8 @@ void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
 static const struct irq_domain_ops x86_vector_domain_ops = {
 	.alloc		= x86_vector_alloc_irqs,
 	.free		= x86_vector_free_irqs,
+	.activate	= x86_vector_activate,
+	.deactivate	= x86_vector_deactivate,
 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
 	.debug_show	= x86_vector_debug_show,
 #endif
@@ -531,13 +676,13 @@ static int apic_set_affinity(struct irq_data *irqd,
 {
 	int err;
 
-	if (!IS_ENABLED(CONFIG_SMP))
-		return -EPERM;
-
-	if (!cpumask_intersects(dest, cpu_online_mask))
-		return -EINVAL;
-
-	err = assign_irq_vector(irqd, dest);
+	raw_spin_lock(&vector_lock);
+	cpumask_and(vector_searchmask, dest, cpu_online_mask);
+	if (irqd_affinity_is_managed(irqd))
+		err = assign_managed_vector(irqd, vector_searchmask);
+	else
+		err = assign_vector_locked(irqd, vector_searchmask);
+	raw_spin_unlock(&vector_lock);
 	return err ? err : IRQ_SET_MASK_OK;
 }
 
@@ -577,9 +722,18 @@ static void free_moved_vector(struct apic_chip_data *apicd)
 {
 	unsigned int vector = apicd->prev_vector;
 	unsigned int cpu = apicd->prev_cpu;
+	bool managed = apicd->is_managed;
 
-	trace_vector_free_moved(apicd->irq, vector, false);
-	irq_matrix_free(vector_matrix, cpu, vector, false);
+	/*
+	 * This should never happen. Managed interrupts are not
+	 * migrated except on CPU down, which does not involve the
+	 * cleanup vector. But try to keep the accounting correct
+	 * nevertheless.
+	 */
+	WARN_ON_ONCE(managed);
+
+	trace_vector_free_moved(apicd->irq, vector, managed);
+	irq_matrix_free(vector_matrix, cpu, vector, managed);
 	__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
 	hlist_del_init(&apicd->clist);
 	apicd->prev_vector = 0;

From 4900be83602b6be07366d3e69f756c1959f4169a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:51 +0200
Subject: [PATCH 49/60] x86/vector/msi: Switch to global reservation mode

Devices with many queues allocate a huge number of interrupts and get
assigned a vector for each of them, even if the queues are not active and
the interrupts never requested. This causes problems with the decision
whether the global vector space is sufficient for CPU hot unplug
operations.

Change it to a reservation scheme, which allows overcommitment.

When the interrupt is allocated and initialized the vector assignment
merily updates the reservation request counter in the matrix
allocator. This counter is used to emit warnings when the reservation
exceeds the available vector space, but does not affect CPU offline
operations. Like the managed interrupts the corresponding MSI/DMAR/IOAPIC
entries are directed to the special shutdown vector.

When the interrupt is requested, then the activation code tries to assign a
real vector. If that succeeds the interrupt is started up and functional.

If that fails, then subsequently request_irq() fails with -ENOSPC.

This allows a clear separation of inactive and active modes and simplifies
the final decisions whether the global vector space is sufficient for CPU
offline operations.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213156.184211133@linutronix.de
---
 arch/x86/kernel/apic/vector.c | 97 +++++++++++++++++++++++------------
 1 file changed, 63 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 3f53572c89cb..46a9ae921819 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -33,7 +33,9 @@ struct apic_chip_data {
 	unsigned int		irq;
 	struct hlist_node	clist;
 	unsigned int		move_in_progress	: 1,
-				is_managed		: 1;
+				is_managed		: 1,
+				can_reserve		: 1,
+				has_reserved		: 1;
 };
 
 struct irq_domain *x86_vector_domain;
@@ -175,9 +177,31 @@ static int reserve_managed_vector(struct irq_data *irqd)
 	return ret;
 }
 
+static void reserve_irq_vector_locked(struct irq_data *irqd)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+
+	irq_matrix_reserve(vector_matrix);
+	apicd->can_reserve = true;
+	apicd->has_reserved = true;
+	trace_vector_reserve(irqd->irq, 0);
+	vector_assign_managed_shutdown(irqd);
+}
+
+static int reserve_irq_vector(struct irq_data *irqd)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	reserve_irq_vector_locked(irqd);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return 0;
+}
+
 static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
 {
 	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	bool resvd = apicd->has_reserved;
 	unsigned int cpu = apicd->cpu;
 	int vector = apicd->vector;
 
@@ -191,10 +215,10 @@ static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
 	if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest))
 		return 0;
 
-	vector = irq_matrix_alloc(vector_matrix, dest, false, &cpu);
+	vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu);
 	if (vector > 0)
 		apic_update_vector(irqd, vector, cpu);
-	trace_vector_alloc(irqd->irq, vector, false, vector);
+	trace_vector_alloc(irqd->irq, vector, resvd, vector);
 	return vector;
 }
 
@@ -252,7 +276,11 @@ assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info)
 		return reserve_managed_vector(irqd);
 	if (info->mask)
 		return assign_irq_vector(irqd, info->mask);
-	return assign_irq_vector_any(irqd);
+	if (info->type != X86_IRQ_ALLOC_TYPE_MSI &&
+	    info->type != X86_IRQ_ALLOC_TYPE_MSIX)
+		return assign_irq_vector_any(irqd);
+	/* For MSI(X) make only a global reservation with no guarantee */
+	return reserve_irq_vector(irqd);
 }
 
 static int
@@ -314,17 +342,35 @@ static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd)
 	unsigned long flags;
 
 	trace_vector_deactivate(irqd->irq, apicd->is_managed,
-				false, false);
+				apicd->can_reserve, false);
 
-	if (apicd->is_managed)
+	/* Regular fixed assigned interrupt */
+	if (!apicd->is_managed && !apicd->can_reserve)
+		return;
+	/* If the interrupt has a global reservation, nothing to do */
+	if (apicd->has_reserved)
 		return;
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
 	clear_irq_vector(irqd);
-	vector_assign_managed_shutdown(irqd);
+	if (apicd->can_reserve)
+		reserve_irq_vector_locked(irqd);
+	else
+		vector_assign_managed_shutdown(irqd);
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
 }
 
+static int activate_reserved(struct irq_data *irqd)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	int ret;
+
+	ret = assign_irq_vector_any_locked(irqd);
+	if (!ret)
+		apicd->has_reserved = false;
+	return ret;
+}
+
 static int activate_managed(struct irq_data *irqd)
 {
 	const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
@@ -357,16 +403,19 @@ static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
 	int ret = 0;
 
 	trace_vector_activate(irqd->irq, apicd->is_managed,
-				false, early);
+			      apicd->can_reserve, early);
 
-	if (!apicd->is_managed)
+	/* Nothing to do for fixed assigned vectors */
+	if (!apicd->can_reserve && !apicd->is_managed)
 		return 0;
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
 	if (early || irqd_is_managed_and_shutdown(irqd))
 		vector_assign_managed_shutdown(irqd);
-	else
+	else if (apicd->is_managed)
 		ret = activate_managed(irqd);
+	else if (apicd->has_reserved)
+		ret = activate_reserved(irqd);
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
 	return ret;
 }
@@ -376,8 +425,11 @@ static void vector_free_reserved_and_managed(struct irq_data *irqd)
 	const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
 	struct apic_chip_data *apicd = apic_chip_data(irqd);
 
-	trace_vector_teardown(irqd->irq, apicd->is_managed, false);
+	trace_vector_teardown(irqd->irq, apicd->is_managed,
+			      apicd->has_reserved);
 
+	if (apicd->has_reserved)
+		irq_matrix_remove_reserved(vector_matrix);
 	if (apicd->is_managed)
 		irq_matrix_remove_managed(vector_matrix, dest);
 }
@@ -604,22 +656,6 @@ int __init arch_early_irq_init(void)
 }
 
 #ifdef CONFIG_SMP
-/* Temporary hack to keep things working */
-static void vector_update_shutdown_irqs(void)
-{
-	struct irq_desc *desc;
-	int irq;
-
-	for_each_irq_desc(irq, desc) {
-		struct irq_data *irqd = irq_desc_get_irq_data(desc);
-		struct apic_chip_data *ad = apic_chip_data(irqd);
-
-		if (!ad || !ad->vector || ad->cpu != smp_processor_id())
-			continue;
-		this_cpu_write(vector_irq[ad->vector], desc);
-		irq_matrix_assign(vector_matrix, ad->vector);
-	}
-}
 
 static struct irq_desc *__setup_vector_irq(int vector)
 {
@@ -655,13 +691,6 @@ void lapic_online(void)
 	 */
 	for (vector = 0; vector < NR_VECTORS; vector++)
 		this_cpu_write(vector_irq[vector], __setup_vector_irq(vector));
-
-	/*
-	 * Until the rewrite of the managed interrupt management is in
-	 * place it's necessary to walk the irq descriptors and check for
-	 * interrupts which are targeted at this CPU.
-	 */
-	vector_update_shutdown_irqs();
 }
 
 void lapic_offline(void)

From 464d12309e1b5829597793db551ae8ecaecf4036 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:52 +0200
Subject: [PATCH 50/60] x86/vector: Switch IOAPIC to global reservation mode

IOAPICs install and allocate vectors for inactive interrupts. This results
in problems on CPU offline and wastes vector resources for nothing.

Handle inactive IOAPIC interrupts in the same way as inactive MSI
interrupts and switch them to the global reservation mode.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213156.273454591@linutronix.de
---
 arch/x86/kernel/apic/vector.c | 56 +++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 46a9ae921819..5e58da8efe77 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -258,17 +258,6 @@ static int assign_irq_vector_any_locked(struct irq_data *irqd)
 	return assign_vector_locked(irqd, cpu_online_mask);
 }
 
-static int assign_irq_vector_any(struct irq_data *irqd)
-{
-	unsigned long flags;
-	int ret;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	ret = assign_irq_vector_any_locked(irqd);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-	return ret;
-}
-
 static int
 assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info)
 {
@@ -276,10 +265,10 @@ assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info)
 		return reserve_managed_vector(irqd);
 	if (info->mask)
 		return assign_irq_vector(irqd, info->mask);
-	if (info->type != X86_IRQ_ALLOC_TYPE_MSI &&
-	    info->type != X86_IRQ_ALLOC_TYPE_MSIX)
-		return assign_irq_vector_any(irqd);
-	/* For MSI(X) make only a global reservation with no guarantee */
+	/*
+	 * Make only a global reservation with no guarantee. A real vector
+	 * is associated at activation time.
+	 */
 	return reserve_irq_vector(irqd);
 }
 
@@ -456,13 +445,39 @@ static void x86_vector_free_irqs(struct irq_domain *domain,
 	}
 }
 
+static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
+				    struct apic_chip_data *apicd)
+{
+	unsigned long flags;
+	bool realloc = false;
+
+	apicd->vector = ISA_IRQ_VECTOR(virq);
+	apicd->cpu = 0;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	/*
+	 * If the interrupt is activated, then it must stay at this vector
+	 * position. That's usually the timer interrupt (0).
+	 */
+	if (irqd_is_activated(irqd)) {
+		trace_vector_setup(virq, true, 0);
+		apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
+	} else {
+		/* Release the vector */
+		apicd->can_reserve = true;
+		clear_irq_vector(irqd);
+		realloc = true;
+	}
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return realloc;
+}
+
 static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 				 unsigned int nr_irqs, void *arg)
 {
 	struct irq_alloc_info *info = arg;
 	struct apic_chip_data *apicd;
 	struct irq_data *irqd;
-	unsigned long flags;
 	int i, err, node;
 
 	if (disable_apic)
@@ -496,13 +511,8 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 		 * config.
 		 */
 		if (info->flags & X86_IRQ_ALLOC_LEGACY) {
-			apicd->vector = ISA_IRQ_VECTOR(virq + i);
-			apicd->cpu = 0;
-			trace_vector_setup(virq + i, true, 0);
-			raw_spin_lock_irqsave(&vector_lock, flags);
-			apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
-			raw_spin_unlock_irqrestore(&vector_lock, flags);
-			continue;
+			if (!vector_configure_legacy(virq + i, irqd, apicd))
+				continue;
 		}
 
 		err = assign_irq_vector_policy(irqd, info);

From 2cffad7bad83157f89332872015f4305d2ac09ac Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:53 +0200
Subject: [PATCH 51/60] x86/irq: Simplify hotplug vector accounting

Before a CPU is taken offline the number of active interrupt vectors on the
outgoing CPU and the number of vectors which are available on the other
online CPUs are counted and compared. If the active vectors are more than
the available vectors on the other CPUs then the CPU hot-unplug operation
is aborted. This again uses loop based search and is inaccurate.

The bitmap matrix allocator has accurate accounting information and can
tell exactly whether the vector space is sufficient or not.

Emit a message when the number of globaly reserved (unallocated) vectors is
larger than the number of available vectors after offlining a CPU because
after that point request_irq() might fail.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213156.351193962@linutronix.de
---
 arch/x86/include/asm/apic.h   |  1 +
 arch/x86/include/asm/irq.h    |  4 --
 arch/x86/kernel/apic/vector.c | 32 ++++++++++-
 arch/x86/kernel/irq.c         | 99 -----------------------------------
 arch/x86/kernel/smpboot.c     |  2 +-
 5 files changed, 33 insertions(+), 105 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 7a8651921ed5..a9e57f08bfa6 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -386,6 +386,7 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[];
  */
 #ifdef CONFIG_SMP
 extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
+extern int lapic_can_unplug_cpu(void);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 9958ceea2fa3..1002a3e8fccc 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -25,11 +25,7 @@ extern void irq_ctx_init(int cpu);
 
 struct irq_desc;
 
-#ifdef CONFIG_HOTPLUG_CPU
-#include <linux/cpumask.h>
-extern int check_irq_vectors_for_cpu_disable(void);
 extern void fixup_irqs(void);
-#endif
 
 #ifdef CONFIG_HAVE_KVM
 extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 5e58da8efe77..14b21ca4483c 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -945,7 +945,37 @@ void irq_force_complete_move(struct irq_desc *desc)
 unlock:
 	raw_spin_unlock(&vector_lock);
 }
-#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Note, this is not accurate accounting, but at least good enough to
+ * prevent that the actual interrupt move will run out of vectors.
+ */
+int lapic_can_unplug_cpu(void)
+{
+	unsigned int rsvd, avl, tomove, cpu = smp_processor_id();
+	int ret = 0;
+
+	raw_spin_lock(&vector_lock);
+	tomove = irq_matrix_allocated(vector_matrix);
+	avl = irq_matrix_available(vector_matrix, true);
+	if (avl < tomove) {
+		pr_warn("CPU %u has %u vectors, %u available. Cannot disable CPU\n",
+			cpu, tomove, avl);
+		ret = -ENOSPC;
+		goto out;
+	}
+	rsvd = irq_matrix_reserved(vector_matrix);
+	if (avl < rsvd) {
+		pr_warn("Reserved vectors %u > available %u. IRQ request may fail\n",
+			rsvd, avl);
+	}
+out:
+	raw_spin_unlock(&vector_lock);
+	return ret;
+}
+#endif /* HOTPLUG_CPU */
+#endif /* SMP */
 
 static void __init print_APIC_field(int base)
 {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 188990c3a514..49cfd9fe7589 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -333,105 +333,6 @@ __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs)
 
 
 #ifdef CONFIG_HOTPLUG_CPU
-
-/* These two declarations are only used in check_irq_vectors_for_cpu_disable()
- * below, which is protected by stop_machine().  Putting them on the stack
- * results in a stack frame overflow.  Dynamically allocating could result in a
- * failure so declare these two cpumasks as global.
- */
-static struct cpumask affinity_new, online_new;
-
-/*
- * This cpu is going to be removed and its vectors migrated to the remaining
- * online cpus.  Check to see if there are enough vectors in the remaining cpus.
- * This function is protected by stop_machine().
- */
-int check_irq_vectors_for_cpu_disable(void)
-{
-	unsigned int this_cpu, vector, this_count, count;
-	struct irq_desc *desc;
-	struct irq_data *data;
-	int cpu;
-
-	this_cpu = smp_processor_id();
-	cpumask_copy(&online_new, cpu_online_mask);
-	cpumask_clear_cpu(this_cpu, &online_new);
-
-	this_count = 0;
-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		desc = __this_cpu_read(vector_irq[vector]);
-		if (IS_ERR_OR_NULL(desc))
-			continue;
-		/*
-		 * Protect against concurrent action removal, affinity
-		 * changes etc.
-		 */
-		raw_spin_lock(&desc->lock);
-		data = irq_desc_get_irq_data(desc);
-		cpumask_copy(&affinity_new,
-			     irq_data_get_affinity_mask(data));
-		cpumask_clear_cpu(this_cpu, &affinity_new);
-
-		/* Do not count inactive or per-cpu irqs. */
-		if (!irq_desc_has_action(desc) || irqd_is_per_cpu(data)) {
-			raw_spin_unlock(&desc->lock);
-			continue;
-		}
-
-		raw_spin_unlock(&desc->lock);
-		/*
-		 * A single irq may be mapped to multiple cpu's
-		 * vector_irq[] (for example IOAPIC cluster mode).  In
-		 * this case we have two possibilities:
-		 *
-		 * 1) the resulting affinity mask is empty; that is
-		 * this the down'd cpu is the last cpu in the irq's
-		 * affinity mask, or
-		 *
-		 * 2) the resulting affinity mask is no longer a
-		 * subset of the online cpus but the affinity mask is
-		 * not zero; that is the down'd cpu is the last online
-		 * cpu in a user set affinity mask.
-		 */
-		if (cpumask_empty(&affinity_new) ||
-		    !cpumask_subset(&affinity_new, &online_new))
-			this_count++;
-	}
-	/* No need to check any further. */
-	if (!this_count)
-		return 0;
-
-	count = 0;
-	for_each_online_cpu(cpu) {
-		if (cpu == this_cpu)
-			continue;
-		/*
-		 * We scan from FIRST_EXTERNAL_VECTOR to first system
-		 * vector. If the vector is marked in the used vectors
-		 * bitmap or an irq is assigned to it, we don't count
-		 * it as available.
-		 *
-		 * As this is an inaccurate snapshot anyway, we can do
-		 * this w/o holding vector_lock.
-		 */
-		for (vector = FIRST_EXTERNAL_VECTOR;
-		     vector < FIRST_SYSTEM_VECTOR; vector++) {
-			if (!test_bit(vector, system_vectors) &&
-			    IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) {
-				if (++count == this_count)
-					return 0;
-			}
-		}
-	}
-
-	if (count < this_count) {
-		pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n",
-			this_cpu, this_count, count);
-		return -ERANGE;
-	}
-	return 0;
-}
-
 /* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
 void fixup_irqs(void)
 {
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 86739f04701b..92aadfa30d61 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1525,7 +1525,7 @@ int native_cpu_disable(void)
 {
 	int ret;
 
-	ret = check_irq_vectors_for_cpu_disable();
+	ret = lapic_can_unplug_cpu();
 	if (ret)
 		return ret;
 

From d6ffc6ac83b1f9f12652d89b9cb5bcbfbea7796c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:54 +0200
Subject: [PATCH 52/60] x86/vector: Respect affinity mask in irq descriptor

The interrupt descriptor has a preset affinity mask at allocation
time, which is usually the default affinity mask.

The current code does not respect that mask and places the vector at some
random CPU, which gets corrected later by a set_affinity() call. That's
silly because the vector allocation can respect the mask upfront and place
the interrupt on a CPU which is in the mask. If that fails, then the
affinity is broken and a interrupt assigned on any online CPU.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Yu Chen <yu.c.chen@intel.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20170913213156.431670325@linutronix.de
---
 arch/x86/kernel/apic/vector.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 14b21ca4483c..6789e286def9 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -249,12 +249,25 @@ static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest)
 
 static int assign_irq_vector_any_locked(struct irq_data *irqd)
 {
+	/* Get the affinity mask - either irq_default_affinity or (user) set */
+	const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
 	int node = irq_data_get_node(irqd);
 
-	if (node != NUMA_NO_NODE) {
-		if (!assign_vector_locked(irqd, cpumask_of_node(node)))
-			return 0;
-	}
+	if (node == NUMA_NO_NODE)
+		goto all;
+	/* Try the intersection of @affmsk and node mask */
+	cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk);
+	if (!assign_vector_locked(irqd, vector_searchmask))
+		return 0;
+	/* Try the node mask */
+	if (!assign_vector_locked(irqd, cpumask_of_node(node)))
+		return 0;
+all:
+	/* Try the full affinity mask */
+	cpumask_and(vector_searchmask, affmsk, cpu_online_mask);
+	if (!assign_vector_locked(irqd, vector_searchmask))
+		return 0;
+	/* Try the full online mask */
 	return assign_vector_locked(irqd, cpu_online_mask);
 }
 

From 9c71206d060d4e84896f3bd680319b29fe88b8e8 Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Wed, 13 Sep 2017 17:17:54 +0800
Subject: [PATCH 53/60] ACPI/init: Invoke early ACPI initialization earlier

acpi_early_init() unmaps the temporary ACPI Table mappings which are used
in the early startup code and prepares for permanent table mappings.

Before the consolidation of the x86 APIC setup code the invocation of
acpi_early_init() happened before the interrupt remapping unit was
initialized. With the rework the remapping unit initialization moved in
front of acpi_early_init() which causes an ACPI warning when the ACPI root
tables get reallocated afterwards.

Invoke acpi_early_init() before late_time_init() which is before the access
to the DMAR tables happens.

Fixes: 935356cecda8 ("x86/apic: Initialize interrupt mode after timer init")
Reported-by: Xiaolong Ye <xiaolong.ye@intel.com>
Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-ia64@vger.kernel.org
Cc: bhe@redhat.com
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-acpi@vger.kernel.org
Cc: bp@alien8.de
Cc: Lv" <lv.zheng@intel.com>
Cc: yinghai@kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lkml.kernel.org/r/1505294274-441-1-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 init/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/init/main.c b/init/main.c
index 0ee9c6866ada..2fb98a48b6ae 100644
--- a/init/main.c
+++ b/init/main.c
@@ -664,12 +664,12 @@ asmlinkage __visible void __init start_kernel(void)
 	debug_objects_mem_init();
 	setup_per_cpu_pageset();
 	numa_policy_init();
+	acpi_early_init();
 	if (late_time_init)
 		late_time_init();
 	calibrate_delay();
 	pidmap_init();
 	anon_vma_init();
-	acpi_early_init();
 #ifdef CONFIG_X86
 	if (efi_enabled(EFI_RUNTIME_SERVICES))
 		efi_enter_virtual_mode();

From 1e66e2b86293ff1ded32104ac0ad26a7f08ec439 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 26 Sep 2017 19:08:45 +0200
Subject: [PATCH 54/60] x86/apic: Use dead_cpu instead of current CPU when
 cleaning up

x2apic_dead_cpu() cleans up the leftovers of a CPU which got unplugged, but
instead of clearing the dead cpu bit in the cluster mask it clears the
current (alive) cpu bit. Noticed because smp_processor_id() is called in
preemptible code and triggers a debug warning.

[ tglx: Rewrote changelog ]

Fixes: 023a611748fd ("x86/apic/x2apic: Simplify cluster management")
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20170926170845.13955-1-bp@alien8.de
---
 arch/x86/kernel/apic/x2apic_cluster.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 3da94277140f..6050c5364bdc 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -156,7 +156,7 @@ static int x2apic_dead_cpu(unsigned int dead_cpu)
 {
 	struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
 
-	cpumask_clear_cpu(smp_processor_id(), &cmsk->mask);
+	cpumask_clear_cpu(dead_cpu, &cmsk->mask);
 	free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
 	return 0;
 }

From 79761ce80aa0232157e428bde28c0cef6d43ac5f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 27 Sep 2017 11:22:23 +0100
Subject: [PATCH 55/60] x86/apic: Fix spelling mistake: "symmectic" ->
 "symmetric"

Trivial fix to spelling mistakes in pr_info messages

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Dou Liyang <douly.fnst@cn.fujitsu.com>
Link: https://lkml.kernel.org/r/20170927102223.31920-1-colin.king@canonical.com
---
 arch/x86/kernel/apic/apic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ca5ec3fddc49..a1ca2c08f532 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1296,11 +1296,11 @@ void __init apic_intr_mode_init(void)
 		default_setup_apic_routing();
 		break;
 	case APIC_SYMMETRIC_IO:
-		pr_info("APIC: Switch to symmectic I/O mode setup\n");
+		pr_info("APIC: Switch to symmetric I/O mode setup\n");
 		default_setup_apic_routing();
 		break;
 	case APIC_SYMMETRIC_IO_NO_ROUTING:
-		pr_info("APIC: Switch to symmectic I/O mode setup in no SMP routine\n");
+		pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
 		break;
 	}
 

From 02edee152d6ea325c88898f3a702f5db2d78de7a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 12 Oct 2017 11:05:28 +0200
Subject: [PATCH 56/60] x86/apic/vector: Ignore set_affinity call for inactive
 interrupts

The core interrupt code can call the affinity setter for inactive
interrupts under certain circumstances.

For inactive intererupts which use managed or reservation mode this is a
pointless exercise as the activation will assign a vector which fits the
destination mask.

Check for this and return w/o going through the vector assignment.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6789e286def9..573538e0981e 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -726,8 +726,21 @@ void lapic_offline(void)
 static int apic_set_affinity(struct irq_data *irqd,
 			     const struct cpumask *dest, bool force)
 {
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
 	int err;
 
+	/*
+	 * Core code can call here for inactive interrupts. For inactive
+	 * interrupts which use managed or reservation mode there is no
+	 * point in going through the vector assignment right now as the
+	 * activation will assign a vector which fits the destination
+	 * cpumask. Let the core code store the destination mask and be
+	 * done with it.
+	 */
+	if (!irqd_is_activated(irqd) &&
+	    (apicd->is_managed || apicd->can_reserve))
+		return IRQ_SET_MASK_OK;
+
 	raw_spin_lock(&vector_lock);
 	cpumask_and(vector_searchmask, dest, cpu_online_mask);
 	if (irqd_affinity_is_managed(irqd))

From 0696d059f23c05f2dbc3b19ef50e5bdd175b782b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 16 Oct 2017 16:16:19 +0200
Subject: [PATCH 57/60] x86/vector: Use correct per cpu variable in
 free_moved_vector()

free_moved_vector() accesses the per cpu vector array with this_cpu_write()
to clear the vector. The function has two call sites:

 1) The vector cleanup IPI
 2) The force_complete_move() code path

For #1 this_cpu_write() is correct as it runs on the CPU on which the
vector needs to be freed.

For #2 this_cpu_write() is wrong because the function is called from an
outgoing CPU which is not necessarily the CPU on which the previous vector
needs to be freed. As a result it sets the vector on the outgoing CPU to
NULL, which is pointless as that CPU does not handle interrupts
anymore. What's worse is that it leaves the vector on the previous target
CPU in place which later on triggers the BUG_ON(vector) in the vector
allocation code when the vector gets reused. That's possible because the
bitmap allocator entry of that CPU is freed correctly.

Always use the CPU to which the vector was associated and clear the vector
entry on that CPU. Fixup the tracepoint as well so it tracks on which CPU
the vector gets removed.

Fixes: 69cde0004a4b ("x86/vector: Use matrix allocator for vector assignment")
Reported-by: Petri Latvala <petri.latvala@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Juergen Gross <jgross@suse.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Yu Chen <yu.c.chen@intel.com>
Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1710161614430.1973@nanos
---
 arch/x86/include/asm/trace/irq_vectors.h | 12 ++++++++----
 arch/x86/kernel/apic/vector.c            |  4 ++--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index bc09c5cf6390..bfd480b827f5 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -360,24 +360,28 @@ TRACE_EVENT(vector_setup,
 
 TRACE_EVENT(vector_free_moved,
 
-	TP_PROTO(unsigned int irq, unsigned int vector, bool is_managed),
+	TP_PROTO(unsigned int irq, unsigned int cpu, unsigned int vector,
+		 bool is_managed),
 
-	TP_ARGS(irq, vector, is_managed),
+	TP_ARGS(irq, cpu, vector, is_managed),
 
 	TP_STRUCT__entry(
 		__field(	unsigned int,	irq		)
+		__field(	unsigned int,	cpu		)
 		__field(	unsigned int,	vector		)
 		__field(	bool,		is_managed	)
 	),
 
 	TP_fast_assign(
 		__entry->irq		= irq;
+		__entry->cpu		= cpu;
 		__entry->vector		= vector;
 		__entry->is_managed	= is_managed;
 	),
 
-	TP_printk("irq=%u vector=%u is_managed=%d",
-		  __entry->irq, __entry->vector, __entry->is_managed)
+	TP_printk("irq=%u cpu=%u vector=%u is_managed=%d",
+		  __entry->irq, __entry->cpu, __entry->vector,
+		  __entry->is_managed)
 );
 
 
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 573538e0981e..05c85e693a5d 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -797,9 +797,9 @@ static void free_moved_vector(struct apic_chip_data *apicd)
 	 */
 	WARN_ON_ONCE(managed);
 
-	trace_vector_free_moved(apicd->irq, vector, managed);
+	trace_vector_free_moved(apicd->irq, cpu, vector, managed);
 	irq_matrix_free(vector_matrix, cpu, vector, managed);
-	__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+	per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
 	hlist_del_init(&apicd->clist);
 	apicd->prev_vector = 0;
 	apicd->move_in_progress = 0;

From 2b5175c4fa974b6aa05bbd2ee8d443a8036a1714 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Oct 2017 09:54:57 +0200
Subject: [PATCH 58/60] genirq: Add config option for reservation mode

The interrupt reservation mode requires reactivation of PCI/MSI
interrupts. Create a config option, so the PCI code can set the
corresponding flag when required.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Josh Poulson <jopoulso@microsoft.com>
Cc: Mihai Costache <v-micos@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-pci@vger.kernel.org
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Simon Xiao <sixiao@microsoft.com>
Cc: Saeed Mahameed <saeedm@mellanox.com>
Cc: Jork Loeser <Jork.Loeser@microsoft.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: devel@linuxdriverproject.org
Cc: KY Srinivasan <kys@microsoft.com>
Link: https://lkml.kernel.org/r/20171017075600.369375409@linutronix.de
---
 kernel/irq/Kconfig | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index ac1a3e29d3b9..89e355866450 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -100,6 +100,9 @@ config IRQ_TIMINGS
 config GENERIC_IRQ_MATRIX_ALLOCATOR
 	bool
 
+config GENERIC_IRQ_RESERVATION_MODE
+	bool
+
 config IRQ_DOMAIN_DEBUG
 	bool "Expose hardware/virtual IRQ mapping via debugfs"
 	depends on IRQ_DOMAIN && DEBUG_FS

From 25e960efc63852b84d1c3739aef586285b177395 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Oct 2017 09:54:58 +0200
Subject: [PATCH 59/60] PCI/MSI: Set MSI_FLAG_MUST_REACTIVATE in core code

If interrupt reservation mode is enabled then the PCI/MSI interrupts must
be reactivated after early activation.

Make sure that all callers of pci_msi_create_irq_domain() have the
MSI_FLAG_MUST_REACTIVATE set when reservation mode is enabled.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Josh Poulson <jopoulso@microsoft.com>
Cc: Mihai Costache <v-micos@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-pci@vger.kernel.org
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Simon Xiao <sixiao@microsoft.com>
Cc: Saeed Mahameed <saeedm@mellanox.com>
Cc: Jork Loeser <Jork.Loeser@microsoft.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: devel@linuxdriverproject.org
Cc: KY Srinivasan <kys@microsoft.com>
Link: https://lkml.kernel.org/r/20171017075600.448649905@linutronix.de
---
 drivers/pci/msi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 496ed9130600..e06607167858 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1441,6 +1441,8 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
 		pci_msi_domain_update_chip_ops(info);
 
 	info->flags |= MSI_FLAG_ACTIVATE_EARLY;
+	if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
+		info->flags |= MSI_FLAG_MUST_REACTIVATE;
 
 	domain = msi_create_irq_domain(fwnode, info, parent);
 	if (!domain)

From c201c91799d687c0a6d8c3272950f51aad5ffebe Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Oct 2017 09:54:59 +0200
Subject: [PATCH 60/60] x86/vector/msi: Select
 CONFIG_GENERIC_IRQ_RESERVATION_MODE

Select CONFIG_GENERIC_IRQ_RESERVATION_MODE so PCI/MSI domains get the
MSI_FLAG_MUST_REACTIVATE flag set in pci_msi_create_irq_domain().

Remove the explicit setters of this flag in the apic/msi code as they are
not longer required.

Fixes: 4900be83602b ("x86/vector/msi: Switch to global reservation mode")
Reported-and-tested-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Josh Poulson <jopoulso@microsoft.com>
Cc: Mihai Costache <v-micos@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-pci@vger.kernel.org
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Simon Xiao <sixiao@microsoft.com>
Cc: Saeed Mahameed <saeedm@mellanox.com>
Cc: Jork Loeser <Jork.Loeser@microsoft.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: devel@linuxdriverproject.org
Cc: KY Srinivasan <kys@microsoft.com>
Link: https://lkml.kernel.org/r/20171017075600.527569354@linutronix.de
---
 arch/x86/Kconfig           | 1 +
 arch/x86/kernel/apic/msi.c | 5 ++---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 64e99d3c5169..ea4bedaba4b8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -95,6 +95,7 @@ config X86
 	select GENERIC_IRQ_MATRIX_ALLOCATOR	if X86_LOCAL_APIC
 	select GENERIC_IRQ_MIGRATION		if SMP
 	select GENERIC_IRQ_PROBE
+	select GENERIC_IRQ_RESERVATION_MODE
 	select GENERIC_IRQ_SHOW
 	select GENERIC_PENDING_IRQ		if SMP
 	select GENERIC_SMP_IDLE_THREAD
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 5b6dd1a85ec4..9b18be764422 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -129,7 +129,7 @@ static struct msi_domain_ops pci_msi_domain_ops = {
 
 static struct msi_domain_info pci_msi_domain_info = {
 	.flags		= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
-			  MSI_FLAG_PCI_MSIX | MSI_FLAG_MUST_REACTIVATE,
+			  MSI_FLAG_PCI_MSIX,
 	.ops		= &pci_msi_domain_ops,
 	.chip		= &pci_msi_controller,
 	.handler	= handle_edge_irq,
@@ -167,8 +167,7 @@ static struct irq_chip pci_msi_ir_controller = {
 
 static struct msi_domain_info pci_msi_ir_domain_info = {
 	.flags		= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
-			  MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX |
-			  MSI_FLAG_MUST_REACTIVATE,
+			  MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
 	.ops		= &pci_msi_domain_ops,
 	.chip		= &pci_msi_ir_controller,
 	.handler	= handle_edge_irq,