From 9f32d21c981bb638d0991ce5675a20337312066b Mon Sep 17 00:00:00 2001
From: Chris Lalancette <clalance@redhat.com>
Date: Thu, 23 Oct 2008 17:40:25 -0700
Subject: [PATCH 1/4] xen: fix Xen domU boot with batched mprotect

Impact: fix guest kernel boot crash on certain configs

Recent i686 2.6.27 kernels with a certain amount of memory (between
736 and 855MB) have a problem booting under a hypervisor that supports
batched mprotect (this includes the RHEL-5 Xen hypervisor as well as
any 3.3 or later Xen hypervisor).

The problem ends up being that xen_ptep_modify_prot_commit() is using
virt_to_machine to calculate which pfn to update.  However, this only
works for pages that are in the p2m list, and the pages coming from
change_pte_range() in mm/mprotect.c are kmap_atomic pages.  Because of
this, we can run into the situation where the lookup in the p2m table
returns an INVALID_MFN, which we then try to pass to the hypervisor,
which then (correctly) denies the request to a totally bogus pfn.

The right thing to do is to use arbitrary_virt_to_machine, so that we
can be sure we are modifying the right pfn.  This unfortunately
introduces a performance penalty because of a full page-table-walk,
but we can avoid that penalty for pages in the p2m list by checking if
virt_addr_valid is true, and if so, just doing the lookup in the p2m
table.

The attached patch implements this, and allows my 2.6.27 i686 based
guest with 768MB of memory to boot on a RHEL-5 hypervisor again.
Thanks to Jeremy for the suggestions about how to fix this particular
issue.

Signed-off-by: Chris Lalancette <clalance@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Chris Lalancette <clalance@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/mmu.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index d4d52f5a1cf7..aba77b2b7d18 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -246,11 +246,21 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr)
 {
 	unsigned long address = (unsigned long)vaddr;
 	unsigned int level;
-	pte_t *pte = lookup_address(address, &level);
-	unsigned offset = address & ~PAGE_MASK;
+	pte_t *pte;
+	unsigned offset;
 
+	/*
+	 * if the PFN is in the linear mapped vaddr range, we can just use
+	 * the (quick) virt_to_machine() p2m lookup
+	 */
+	if (virt_addr_valid(vaddr))
+		return virt_to_machine(vaddr);
+
+	/* otherwise we have to do a (slower) full page-table walk */
+
+	pte = lookup_address(address, &level);
 	BUG_ON(pte == NULL);
-
+	offset = address & ~PAGE_MASK;
 	return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset);
 }
 
@@ -410,7 +420,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 
 	xen_mc_batch();
 
-	u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
+	u.ptr = arbitrary_virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
 	u.val = pte_val_ma(pte);
 	xen_extend_mmu_update(&u);
 

From ef020ab0109aa5cd6eac2e93519b7641c9862828 Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Thu, 23 Oct 2008 17:54:05 -0500
Subject: [PATCH 2/4] x86/uv: memory allocation at initialization

Impact: on SGI UV platforms, fix boot crash

UV initialization is currently called too late to call alloc_bootmem_pages().
The current sequence is:

 start_kernel()
   mem_init()
     free_all_bootmem()           <--- discard of bootmem
   rest_init()
     kernel_init()
       smp_prepare_cpus()
       native_smp_prepare_cpus()
         uv_system_init()         <--- uses alloc_bootmem_pages()

It should be calling kmalloc().

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/genx2apic_uv_x.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 680a06557c5e..2c7dbdb98278 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -15,7 +15,6 @@
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/sched.h>
-#include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/hardirq.h>
 #include <asm/smp.h>
@@ -398,16 +397,16 @@ void __init uv_system_init(void)
 	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
 
 	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
-	uv_blade_info = alloc_bootmem_pages(bytes);
+	uv_blade_info = kmalloc(bytes, GFP_KERNEL);
 
 	get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
 
 	bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
-	uv_node_to_blade = alloc_bootmem_pages(bytes);
+	uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
 	memset(uv_node_to_blade, 255, bytes);
 
 	bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
-	uv_cpu_to_blade = alloc_bootmem_pages(bytes);
+	uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
 	memset(uv_cpu_to_blade, 255, bytes);
 
 	blade = 0;

From 3afa39493de510c33c56ddc76e6e1af7f87c5392 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 25 Oct 2008 22:58:21 -0700
Subject: [PATCH 3/4] x86: keep the /proc/meminfo page count correct

Impact: get correct page count in /proc/meminfo

found page count in /proc/meminfo is nor correct on 1G system in VirtualBox 2.0.4

# cat /proc/meminfo
MemTotal:        1017508 kB
MemFree:          822700 kB
Buffers:            1456 kB
Cached:            26632 kB
SwapCached:            0 kB
...
Hugepagesize:       2048 kB
DirectMap4k:      4032 kB
DirectMap2M:  18446744073709549568 kB

with this patch get:
...
DirectMap4k:      4032 kB
DirectMap2M:   1044480 kB

which is consistent to kernel_page_tables
---[ Low Kernel Mapping ]---
0xffff880000000000-0xffff880000001000           4K     RW     PCD     GLB x  pte
0xffff880000001000-0xffff88000009f000         632K     RW             GLB x  pte
0xffff88000009f000-0xffff8800000a0000           4K     RW     PCD     GLB x  pte
0xffff8800000a0000-0xffff880000200000        1408K     RW             GLB x  pte
0xffff880000200000-0xffff88003fe00000        1020M     RW         PSE GLB x  pmd
0xffff88003fe00000-0xffff88003fff0000        1984K     RW             GLB NX pte
0xffff88003fff0000-0xffff880040000000          64K                           pte
0xffff880040000000-0xffff888000000000         511G                           pud
0xffff888000000000-0xffffc20000000000       58880G                           pgd

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b8e461d49412..c7a4c5a9a21b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -350,8 +350,10 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
 		 * pagetable pages as RO. So assume someone who pre-setup
 		 * these mappings are more intelligent.
 		 */
-		if (pte_val(*pte))
+		if (pte_val(*pte)) {
+			pages++;
 			continue;
+		}
 
 		if (0)
 			printk("   pte=%p addr=%lx pte=%016lx\n",
@@ -418,8 +420,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 			 * not differ with respect to page frame and
 			 * attributes.
 			 */
-			if (page_size_mask & (1 << PG_LEVEL_2M))
+			if (page_size_mask & (1 << PG_LEVEL_2M)) {
+				pages++;
 				continue;
+			}
 			new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
 		}
 
@@ -499,8 +503,10 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 			 * not differ with respect to page frame and
 			 * attributes.
 			 */
-			if (page_size_mask & (1 << PG_LEVEL_1G))
+			if (page_size_mask & (1 << PG_LEVEL_1G)) {
+				pages++;
 				continue;
+			}
 			prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
 		}
 

From 60817c9b31ef7897d60bca2f384cbc316a3fdd8b Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Mon, 27 Oct 2008 13:03:18 -0700
Subject: [PATCH 4/4] x86, memory hotplug: remove wrong -1 in calling
 init_memory_mapping()

Impact: fix crash with memory hotplug

Shuahua Li found:

| I just did some experiments on a desktop for memory hotplug and this bug
| triggered a crash in my test.
|
| Yinghai's suggestion also fixed the bug.

We don't need to round it, just remove that extra -1

Signed-off-by: Yinghai <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index c7a4c5a9a21b..f79a02f64d10 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -837,7 +837,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
 
-	last_mapped_pfn = init_memory_mapping(start, start + size-1);
+	last_mapped_pfn = init_memory_mapping(start, start + size);
 	if (last_mapped_pfn > max_pfn_mapped)
 		max_pfn_mapped = last_mapped_pfn;