diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 2a9cd74a841e..076327f2eff7 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -8,9 +8,9 @@ * On FSL-BookE we setup a 1:1 mapping which covers the first 2GiB of memory * and therefore we can only deal with memory within this range */ -#define KEXEC_SOURCE_MEMORY_LIMIT (2 * 1024 * 1024 * 1024UL) -#define KEXEC_DESTINATION_MEMORY_LIMIT (2 * 1024 * 1024 * 1024UL) -#define KEXEC_CONTROL_MEMORY_LIMIT (2 * 1024 * 1024 * 1024UL) +#define KEXEC_SOURCE_MEMORY_LIMIT (2 * 1024 * 1024 * 1024UL - 1) +#define KEXEC_DESTINATION_MEMORY_LIMIT (2 * 1024 * 1024 * 1024UL - 1) +#define KEXEC_CONTROL_MEMORY_LIMIT (2 * 1024 * 1024 * 1024UL - 1) #else diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 2102b214a87c..0e398cfee2c8 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -250,7 +250,9 @@ extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap) int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, int local, int ssize, unsigned int shift, unsigned int mmu_psize); - +extern void hash_failure_debug(unsigned long ea, unsigned long access, + unsigned long vsid, unsigned long trap, + int ssize, int psize, unsigned long pte); extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long pstart, unsigned long prot, int psize, int ssize); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 9d3953983fb7..fed9bf6187d1 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -414,7 +414,7 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node) u64 base, size, memblock_size; unsigned int is_kexec_kdump = 0, rngs; - ls = of_get_flat_dt_prop(node, "ibm,memblock-size", &l); + ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l); if (ls == NULL || l < dt_root_size_cells * sizeof(__be32)) return 0; memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls); diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index a719f53921a5..3079f6b44cf5 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -68,9 +68,6 @@ _GLOBAL(__hash_page_4K) std r8,STK_PARM(r8)(r1) std r9,STK_PARM(r9)(r1) - /* Add _PAGE_PRESENT to access */ - ori r4,r4,_PAGE_PRESENT - /* Save non-volatile registers. * r31 will hold "old PTE" * r30 is "new PTE" @@ -347,9 +344,6 @@ _GLOBAL(__hash_page_4K) std r8,STK_PARM(r8)(r1) std r9,STK_PARM(r9)(r1) - /* Add _PAGE_PRESENT to access */ - ori r4,r4,_PAGE_PRESENT - /* Save non-volatile registers. * r31 will hold "old PTE" * r30 is "new PTE" @@ -687,9 +681,6 @@ _GLOBAL(__hash_page_64K) std r8,STK_PARM(r8)(r1) std r9,STK_PARM(r9)(r1) - /* Add _PAGE_PRESENT to access */ - ori r4,r4,_PAGE_PRESENT - /* Save non-volatile registers. * r31 will hold "old PTE" * r30 is "new PTE" diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 98f262de5585..09dffe6efa46 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -871,6 +871,18 @@ static inline int subpage_protection(struct mm_struct *mm, unsigned long ea) } #endif +void hash_failure_debug(unsigned long ea, unsigned long access, + unsigned long vsid, unsigned long trap, + int ssize, int psize, unsigned long pte) +{ + if (!printk_ratelimit()) + return; + pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n", + ea, access, current->comm); + pr_info(" trap=0x%lx vsid=0x%lx ssize=%d psize=%d pte=0x%lx\n", + trap, vsid, ssize, psize, pte); +} + /* Result code is: * 0 - handled * 1 - normal page fault @@ -955,6 +967,17 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) return 1; } + /* Add _PAGE_PRESENT to the required access perm */ + access |= _PAGE_PRESENT; + + /* Pre-check access permissions (will be re-checked atomically + * in __hash_page_XX but this pre-check is a fast path + */ + if (access & ~pte_val(*ptep)) { + DBG_LOW(" no access !\n"); + return 1; + } + #ifdef CONFIG_HUGETLB_PAGE if (hugeshift) return __hash_page_huge(ea, access, vsid, ptep, trap, local, @@ -967,14 +990,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep), pte_val(*(ptep + PTRS_PER_PTE))); #endif - /* Pre-check access permissions (will be re-checked atomically - * in __hash_page_XX but this pre-check is a fast path - */ - if (access & ~pte_val(*ptep)) { - DBG_LOW(" no access !\n"); - return 1; - } - /* Do actual hashing */ #ifdef CONFIG_PPC_64K_PAGES /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ @@ -1033,6 +1048,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) local, ssize, spp); } + /* Dump some info in case of hash insertion failure, they should + * never happen so it is really useful to know if/when they do + */ + if (rc == -1) + hash_failure_debug(ea, access, vsid, trap, ssize, psize, + pte_val(*ptep)); #ifndef CONFIG_PPC_64K_PAGES DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); #else @@ -1051,8 +1072,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, void *pgdir; pte_t *ptep; unsigned long flags; - int local = 0; - int ssize; + int rc, ssize, local = 0; BUG_ON(REGION_ID(ea) != USER_REGION_ID); @@ -1098,11 +1118,18 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Hash it in */ #ifdef CONFIG_PPC_HAS_HASH_64K if (mm->context.user_psize == MMU_PAGE_64K) - __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ - __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, - subpage_protection(pgdir, ea)); + rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, + subpage_protection(pgdir, ea)); + + /* Dump some info in case of hash insertion failure, they should + * never happen so it is really useful to know if/when they do + */ + if (rc == -1) + hash_failure_debug(ea, access, vsid, trap, ssize, + mm->context.user_psize, pte_val(*ptep)); local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 199539882f92..cc5c273086cf 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -21,21 +21,13 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, unsigned long old_pte, new_pte; unsigned long va, rflags, pa, sz; long slot; - int err = 1; BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); /* Search the Linux page table for a match with va */ va = hpt_va(ea, vsid, ssize); - /* - * Check the user's access rights to the page. If access should be - * prevented then send the problem up to do_page_fault. - */ - if (unlikely(access & ~pte_val(*ptep))) - goto out; - /* - * At this point, we have a pte (old_pte) which can be used to build + /* At this point, we have a pte (old_pte) which can be used to build * or update an HPTE. There are 2 cases: * * 1. There is a valid (present) pte with no associated HPTE (this is @@ -49,9 +41,17 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, do { old_pte = pte_val(*ptep); - if (old_pte & _PAGE_BUSY) - goto out; + /* If PTE busy, retry the access */ + if (unlikely(old_pte & _PAGE_BUSY)) + return 0; + /* If PTE permissions don't match, take page fault */ + if (unlikely(access & ~old_pte)) + return 1; + /* Try to lock the PTE, add ACCESSED and DIRTY if it was + * a write access */ new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; + if (access & _PAGE_RW) + new_pte |= _PAGE_DIRTY; } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, old_pte, new_pte)); @@ -121,8 +121,16 @@ repeat: } } - if (unlikely(slot == -2)) - panic("hash_huge_page: pte_insert failed\n"); + /* + * Hypervisor failure. Restore old pte and return -1 + * similar to __hash_page_* + */ + if (unlikely(slot == -2)) { + *ptep = __pte(old_pte); + hash_failure_debug(ea, access, vsid, trap, ssize, + mmu_psize, old_pte); + return -1; + } new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX); } @@ -131,9 +139,5 @@ repeat: * No need to use ldarx/stdcx here */ *ptep = __pte(new_pte & ~_PAGE_BUSY); - - err = 0; - - out: - return err; + return 0; } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index f47364585ecd..aa731af720c0 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -398,15 +398,15 @@ static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) } /* - * Retreive and validate the ibm,memblock-size property for drconf memory + * Retreive and validate the ibm,lmb-size property for drconf memory * from the device tree. */ -static u64 of_get_memblock_size(struct device_node *memory) +static u64 of_get_lmb_size(struct device_node *memory) { const u32 *prop; u32 len; - prop = of_get_property(memory, "ibm,memblock-size", &len); + prop = of_get_property(memory, "ibm,lmb-size", &len); if (!prop || len < sizeof(unsigned int)) return 0; @@ -562,7 +562,7 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start, static inline int __init read_usm_ranges(const u32 **usm) { /* - * For each memblock in ibm,dynamic-memory a corresponding + * For each lmb in ibm,dynamic-memory a corresponding * entry in linux,drconf-usable-memory property contains * a counter followed by that many (base, size) duple. * read the counter from linux,drconf-usable-memory @@ -578,7 +578,7 @@ static void __init parse_drconf_memory(struct device_node *memory) { const u32 *dm, *usm; unsigned int n, rc, ranges, is_kexec_kdump = 0; - unsigned long memblock_size, base, size, sz; + unsigned long lmb_size, base, size, sz; int nid; struct assoc_arrays aa; @@ -586,8 +586,8 @@ static void __init parse_drconf_memory(struct device_node *memory) if (!n) return; - memblock_size = of_get_memblock_size(memory); - if (!memblock_size) + lmb_size = of_get_lmb_size(memory); + if (!lmb_size) return; rc = of_get_assoc_arrays(memory, &aa); @@ -611,7 +611,7 @@ static void __init parse_drconf_memory(struct device_node *memory) continue; base = drmem.base_addr; - size = memblock_size; + size = lmb_size; ranges = 1; if (is_kexec_kdump) { @@ -1072,7 +1072,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, { const u32 *dm; unsigned int drconf_cell_cnt, rc; - unsigned long memblock_size; + unsigned long lmb_size; struct assoc_arrays aa; int nid = -1; @@ -1080,8 +1080,8 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, if (!drconf_cell_cnt) return -1; - memblock_size = of_get_memblock_size(memory); - if (!memblock_size) + lmb_size = of_get_lmb_size(memory); + if (!lmb_size) return -1; rc = of_get_assoc_arrays(memory, &aa); @@ -1100,7 +1100,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, continue; if ((scn_addr < drmem.base_addr) - || (scn_addr >= (drmem.base_addr + memblock_size))) + || (scn_addr >= (drmem.base_addr + lmb_size))) continue; nid = of_drconf_to_nid_single(&drmem, &aa); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index deab5f946090..bc8803664140 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -69,7 +69,7 @@ static int pseries_remove_memory(struct device_node *np) const char *type; const unsigned int *regs; unsigned long base; - unsigned int memblock_size; + unsigned int lmb_size; int ret = -EINVAL; /* @@ -87,9 +87,9 @@ static int pseries_remove_memory(struct device_node *np) return ret; base = *(unsigned long *)regs; - memblock_size = regs[3]; + lmb_size = regs[3]; - ret = pseries_remove_memblock(base, memblock_size); + ret = pseries_remove_memblock(base, lmb_size); return ret; } @@ -98,7 +98,7 @@ static int pseries_add_memory(struct device_node *np) const char *type; const unsigned int *regs; unsigned long base; - unsigned int memblock_size; + unsigned int lmb_size; int ret = -EINVAL; /* @@ -116,36 +116,36 @@ static int pseries_add_memory(struct device_node *np) return ret; base = *(unsigned long *)regs; - memblock_size = regs[3]; + lmb_size = regs[3]; /* * Update memory region to represent the memory add */ - ret = memblock_add(base, memblock_size); + ret = memblock_add(base, lmb_size); return (ret < 0) ? -EINVAL : 0; } static int pseries_drconf_memory(unsigned long *base, unsigned int action) { struct device_node *np; - const unsigned long *memblock_size; + const unsigned long *lmb_size; int rc; np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (!np) return -EINVAL; - memblock_size = of_get_property(np, "ibm,memblock-size", NULL); - if (!memblock_size) { + lmb_size = of_get_property(np, "ibm,lmb-size", NULL); + if (!lmb_size) { of_node_put(np); return -EINVAL; } if (action == PSERIES_DRCONF_MEM_ADD) { - rc = memblock_add(*base, *memblock_size); + rc = memblock_add(*base, *lmb_size); rc = (rc < 0) ? -EINVAL : 0; } else if (action == PSERIES_DRCONF_MEM_REMOVE) { - rc = pseries_remove_memblock(*base, *memblock_size); + rc = pseries_remove_memblock(*base, *lmb_size); } else { rc = -EINVAL; } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 4b5902ad0d5d..030a954ed292 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -445,7 +445,7 @@ */ #define INIT_TASK_DATA_SECTION(align) \ . = ALIGN(align); \ - .data..init_task : { \ + .data..init_task : AT(ADDR(.data..init_task) - LOAD_OFFSET) { \ INIT_TASK_DATA(align) \ }