From 9f32d21c981bb638d0991ce5675a20337312066b Mon Sep 17 00:00:00 2001 From: Chris Lalancette Date: Thu, 23 Oct 2008 17:40:25 -0700 Subject: [PATCH 1/4] xen: fix Xen domU boot with batched mprotect Impact: fix guest kernel boot crash on certain configs Recent i686 2.6.27 kernels with a certain amount of memory (between 736 and 855MB) have a problem booting under a hypervisor that supports batched mprotect (this includes the RHEL-5 Xen hypervisor as well as any 3.3 or later Xen hypervisor). The problem ends up being that xen_ptep_modify_prot_commit() is using virt_to_machine to calculate which pfn to update. However, this only works for pages that are in the p2m list, and the pages coming from change_pte_range() in mm/mprotect.c are kmap_atomic pages. Because of this, we can run into the situation where the lookup in the p2m table returns an INVALID_MFN, which we then try to pass to the hypervisor, which then (correctly) denies the request to a totally bogus pfn. The right thing to do is to use arbitrary_virt_to_machine, so that we can be sure we are modifying the right pfn. This unfortunately introduces a performance penalty because of a full page-table-walk, but we can avoid that penalty for pages in the p2m list by checking if virt_addr_valid is true, and if so, just doing the lookup in the p2m table. The attached patch implements this, and allows my 2.6.27 i686 based guest with 768MB of memory to boot on a RHEL-5 hypervisor again. Thanks to Jeremy for the suggestions about how to fix this particular issue. Signed-off-by: Chris Lalancette Signed-off-by: Jeremy Fitzhardinge Cc: Chris Lalancette Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d4d52f5a1cf7..aba77b2b7d18 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -246,11 +246,21 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) { unsigned long address = (unsigned long)vaddr; unsigned int level; - pte_t *pte = lookup_address(address, &level); - unsigned offset = address & ~PAGE_MASK; + pte_t *pte; + unsigned offset; + /* + * if the PFN is in the linear mapped vaddr range, we can just use + * the (quick) virt_to_machine() p2m lookup + */ + if (virt_addr_valid(vaddr)) + return virt_to_machine(vaddr); + + /* otherwise we have to do a (slower) full page-table walk */ + + pte = lookup_address(address, &level); BUG_ON(pte == NULL); - + offset = address & ~PAGE_MASK; return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); } @@ -410,7 +420,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, xen_mc_batch(); - u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; + u.ptr = arbitrary_virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; u.val = pte_val_ma(pte); xen_extend_mmu_update(&u); From ef020ab0109aa5cd6eac2e93519b7641c9862828 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Thu, 23 Oct 2008 17:54:05 -0500 Subject: [PATCH 2/4] x86/uv: memory allocation at initialization Impact: on SGI UV platforms, fix boot crash UV initialization is currently called too late to call alloc_bootmem_pages(). The current sequence is: start_kernel() mem_init() free_all_bootmem() <--- discard of bootmem rest_init() kernel_init() smp_prepare_cpus() native_smp_prepare_cpus() uv_system_init() <--- uses alloc_bootmem_pages() It should be calling kmalloc(). Signed-off-by: Cliff Wickman Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 680a06557c5e..2c7dbdb98278 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -398,16 +397,16 @@ void __init uv_system_init(void) printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); - uv_blade_info = alloc_bootmem_pages(bytes); + uv_blade_info = kmalloc(bytes, GFP_KERNEL); get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); - uv_node_to_blade = alloc_bootmem_pages(bytes); + uv_node_to_blade = kmalloc(bytes, GFP_KERNEL); memset(uv_node_to_blade, 255, bytes); bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus(); - uv_cpu_to_blade = alloc_bootmem_pages(bytes); + uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL); memset(uv_cpu_to_blade, 255, bytes); blade = 0; From 3afa39493de510c33c56ddc76e6e1af7f87c5392 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 25 Oct 2008 22:58:21 -0700 Subject: [PATCH 3/4] x86: keep the /proc/meminfo page count correct Impact: get correct page count in /proc/meminfo found page count in /proc/meminfo is nor correct on 1G system in VirtualBox 2.0.4 # cat /proc/meminfo MemTotal: 1017508 kB MemFree: 822700 kB Buffers: 1456 kB Cached: 26632 kB SwapCached: 0 kB ... Hugepagesize: 2048 kB DirectMap4k: 4032 kB DirectMap2M: 18446744073709549568 kB with this patch get: ... DirectMap4k: 4032 kB DirectMap2M: 1044480 kB which is consistent to kernel_page_tables ---[ Low Kernel Mapping ]--- 0xffff880000000000-0xffff880000001000 4K RW PCD GLB x pte 0xffff880000001000-0xffff88000009f000 632K RW GLB x pte 0xffff88000009f000-0xffff8800000a0000 4K RW PCD GLB x pte 0xffff8800000a0000-0xffff880000200000 1408K RW GLB x pte 0xffff880000200000-0xffff88003fe00000 1020M RW PSE GLB x pmd 0xffff88003fe00000-0xffff88003fff0000 1984K RW GLB NX pte 0xffff88003fff0000-0xffff880040000000 64K pte 0xffff880040000000-0xffff888000000000 511G pud 0xffff888000000000-0xffffc20000000000 58880G pgd Signed-off-by: Yinghai Lu Acked-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index b8e461d49412..c7a4c5a9a21b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -350,8 +350,10 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, * pagetable pages as RO. So assume someone who pre-setup * these mappings are more intelligent. */ - if (pte_val(*pte)) + if (pte_val(*pte)) { + pages++; continue; + } if (0) printk(" pte=%p addr=%lx pte=%016lx\n", @@ -418,8 +420,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, * not differ with respect to page frame and * attributes. */ - if (page_size_mask & (1 << PG_LEVEL_2M)) + if (page_size_mask & (1 << PG_LEVEL_2M)) { + pages++; continue; + } new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); } @@ -499,8 +503,10 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, * not differ with respect to page frame and * attributes. */ - if (page_size_mask & (1 << PG_LEVEL_1G)) + if (page_size_mask & (1 << PG_LEVEL_1G)) { + pages++; continue; + } prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); } From 60817c9b31ef7897d60bca2f384cbc316a3fdd8b Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 27 Oct 2008 13:03:18 -0700 Subject: [PATCH 4/4] x86, memory hotplug: remove wrong -1 in calling init_memory_mapping() Impact: fix crash with memory hotplug Shuahua Li found: | I just did some experiments on a desktop for memory hotplug and this bug | triggered a crash in my test. | | Yinghai's suggestion also fixed the bug. We don't need to round it, just remove that extra -1 Signed-off-by: Yinghai Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index c7a4c5a9a21b..f79a02f64d10 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -837,7 +837,7 @@ int arch_add_memory(int nid, u64 start, u64 size) unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - last_mapped_pfn = init_memory_mapping(start, start + size-1); + last_mapped_pfn = init_memory_mapping(start, start + size); if (last_mapped_pfn > max_pfn_mapped) max_pfn_mapped = last_mapped_pfn;