Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc

Pull sparc fixes from David Miller:
 "I've been auditing the THP support on sparc64 and found several bugs,
  hopefully most of which are fixed completely here.

  Also an RT kernel locking fix from Kirill Tkhai"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc:
  sparc64: Give more detailed information in {pgd,pmd}_ERROR() and kill pte_ERROR().
  sparc64: Add basic validations to {pud,pmd}_bad().
  sparc64: Use 'ILOG2_4MB' instead of constant '22'.
  sparc64: Fix range check in kern_addr_valid().
  sparc64: Fix top-level fault handling bugs.
  sparc64: Handle 32-bit tasks properly in compute_effective_address().
  sparc64: Don't use _PAGE_PRESENT in pte_modify() mask.
  sparc64: Fix hex values in comment above pte_modify().
  sparc64: Fix bugs in get_user_pages_fast() wrt. THP.
  sparc64: Fix huge PMD invalidation.
  sparc64: Fix executable bit testing in set_pmd_at() paths.
  sparc64: Normalize NMI watchdog logging and behavior.
  sparc64: Make itc_sync_lock raw
  sparc64: Fix argument sign extension for compat_sys_futex().
This commit is contained in:
Linus Torvalds 2014-05-06 09:08:03 -07:00
commit 9bd29c56ad
12 changed files with 149 additions and 108 deletions

View file

@ -71,6 +71,23 @@
#include <linux/sched.h>
extern unsigned long sparc64_valid_addr_bitmap[];
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
static inline bool __kern_addr_valid(unsigned long paddr)
{
if ((paddr >> MAX_PHYS_ADDRESS_BITS) != 0UL)
return false;
return test_bit(paddr >> ILOG2_4MB, sparc64_valid_addr_bitmap);
}
static inline bool kern_addr_valid(unsigned long addr)
{
unsigned long paddr = __pa(addr);
return __kern_addr_valid(paddr);
}
/* Entries per page directory level. */
#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3))
#define PTRS_PER_PMD (1UL << PMD_BITS)
@ -79,9 +96,12 @@
/* Kernel has a separate 44bit address space. */
#define FIRST_USER_ADDRESS 0
#define pte_ERROR(e) __builtin_trap()
#define pmd_ERROR(e) __builtin_trap()
#define pgd_ERROR(e) __builtin_trap()
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %p(%016lx) seen at (%pS)\n", \
__FILE__, __LINE__, &(e), pmd_val(e), __builtin_return_address(0))
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %p(%016lx) seen at (%pS)\n", \
__FILE__, __LINE__, &(e), pgd_val(e), __builtin_return_address(0))
#endif /* !(__ASSEMBLY__) */
@ -258,8 +278,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
{
unsigned long mask, tmp;
/* SUN4U: 0x600307ffffffecb8 (negated == 0x9ffcf80000001347)
* SUN4V: 0x30ffffffffffee17 (negated == 0xcf000000000011e8)
/* SUN4U: 0x630107ffffffec38 (negated == 0x9cfef800000013c7)
* SUN4V: 0x33ffffffffffee07 (negated == 0xcc000000000011f8)
*
* Even if we use negation tricks the result is still a 6
* instruction sequence, so don't try to play fancy and just
@ -289,10 +309,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
" .previous\n"
: "=r" (mask), "=r" (tmp)
: "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U |
_PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U |
_PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U |
_PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U),
"i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V |
_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V |
_PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V));
return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask));
@ -633,7 +653,7 @@ static inline unsigned long pmd_large(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));
return (pte_val(pte) & _PAGE_PMD_HUGE) && pte_present(pte);
return pte_val(pte) & _PAGE_PMD_HUGE;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@ -719,20 +739,6 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd)
return __pmd(pte_val(pte));
}
static inline pmd_t pmd_mknotpresent(pmd_t pmd)
{
unsigned long mask;
if (tlb_type == hypervisor)
mask = _PAGE_PRESENT_4V;
else
mask = _PAGE_PRESENT_4U;
pmd_val(pmd) &= ~mask;
return pmd;
}
static inline pmd_t pmd_mksplitting(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));
@ -757,6 +763,20 @@ static inline int pmd_present(pmd_t pmd)
#define pmd_none(pmd) (!pmd_val(pmd))
/* pmd_bad() is only called on non-trans-huge PMDs. Our encoding is
* very simple, it's just the physical address. PTE tables are of
* size PAGE_SIZE so make sure the sub-PAGE_SIZE bits are clear and
* the top bits outside of the range of any physical address size we
* support are clear as well. We also validate the physical itself.
*/
#define pmd_bad(pmd) ((pmd_val(pmd) & ~PAGE_MASK) || \
!__kern_addr_valid(pmd_val(pmd)))
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) ((pud_val(pud) & ~PAGE_MASK) || \
!__kern_addr_valid(pud_val(pud)))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd);
@ -790,10 +810,7 @@ static inline unsigned long __pmd_page(pmd_t pmd)
#define pud_page_vaddr(pud) \
((unsigned long) __va(pud_val(pud)))
#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud))
#define pmd_bad(pmd) (0)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL)
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (0)
#define pud_present(pud) (pud_val(pud) != 0U)
#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL)
@ -893,6 +910,10 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd);
#define __HAVE_ARCH_PMDP_INVALIDATE
extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp);
#define __HAVE_ARCH_PGTABLE_DEPOSIT
extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable);
@ -919,18 +940,6 @@ extern unsigned long pte_file(pte_t);
extern pte_t pgoff_to_pte(unsigned long);
#define PTE_FILE_MAX_BITS (64UL - PAGE_SHIFT - 1UL)
extern unsigned long sparc64_valid_addr_bitmap[];
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
static inline bool kern_addr_valid(unsigned long addr)
{
unsigned long paddr = __pa(addr);
if ((paddr >> 41UL) != 0UL)
return false;
return test_bit(paddr >> 22, sparc64_valid_addr_bitmap);
}
extern int page_in_phys_avail(unsigned long paddr);
/*

View file

@ -171,7 +171,8 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
andcc REG1, REG2, %g0; \
be,pt %xcc, 700f; \
sethi %hi(4 * 1024 * 1024), REG2; \
andn REG1, REG2, REG1; \
brgez,pn REG1, FAIL_LABEL; \
andn REG1, REG2, REG1; \
and VADDR, REG2, REG2; \
brlz,pt REG1, PTE_LABEL; \
or REG1, REG2, REG1; \

View file

@ -282,8 +282,8 @@ sun4v_chip_type:
stx %l2, [%l4 + 0x0]
ldx [%sp + 2047 + 128 + 0x50], %l3 ! physaddr low
/* 4MB align */
srlx %l3, 22, %l3
sllx %l3, 22, %l3
srlx %l3, ILOG2_4MB, %l3
sllx %l3, ILOG2_4MB, %l3
stx %l3, [%l4 + 0x8]
/* Leave service as-is, "call-method" */

View file

@ -277,7 +277,7 @@ kvmap_dtlb_load:
#ifdef CONFIG_SPARSEMEM_VMEMMAP
kvmap_vmemmap:
sub %g4, %g5, %g5
srlx %g5, 22, %g5
srlx %g5, ILOG2_4MB, %g5
sethi %hi(vmemmap_table), %g1
sllx %g5, 3, %g5
or %g1, %lo(vmemmap_table), %g1

View file

@ -68,27 +68,16 @@ EXPORT_SYMBOL(touch_nmi_watchdog);
static void die_nmi(const char *str, struct pt_regs *regs, int do_panic)
{
int this_cpu = smp_processor_id();
if (notify_die(DIE_NMIWATCHDOG, str, regs, 0,
pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)
return;
console_verbose();
bust_spinlocks(1);
printk(KERN_EMERG "%s", str);
printk(" on CPU%d, ip %08lx, registers:\n",
smp_processor_id(), regs->tpc);
show_regs(regs);
dump_stack();
bust_spinlocks(0);
if (do_panic || panic_on_oops)
panic("Non maskable interrupt");
nmi_exit();
local_irq_enable();
do_exit(SIGBUS);
panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
else
WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
}
notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)

View file

@ -149,7 +149,7 @@ void cpu_panic(void)
#define NUM_ROUNDS 64 /* magic value */
#define NUM_ITERS 5 /* likewise */
static DEFINE_SPINLOCK(itc_sync_lock);
static DEFINE_RAW_SPINLOCK(itc_sync_lock);
static unsigned long go[SLAVE + 1];
#define DEBUG_TICK_SYNC 0
@ -257,7 +257,7 @@ static void smp_synchronize_one_tick(int cpu)
go[MASTER] = 0;
membar_safe("#StoreLoad");
spin_lock_irqsave(&itc_sync_lock, flags);
raw_spin_lock_irqsave(&itc_sync_lock, flags);
{
for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
while (!go[MASTER])
@ -268,7 +268,7 @@ static void smp_synchronize_one_tick(int cpu)
membar_safe("#StoreLoad");
}
}
spin_unlock_irqrestore(&itc_sync_lock, flags);
raw_spin_unlock_irqrestore(&itc_sync_lock, flags);
}
#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)

View file

@ -44,7 +44,7 @@ SIGN1(sys32_timer_settime, compat_sys_timer_settime, %o1)
SIGN1(sys32_io_submit, compat_sys_io_submit, %o1)
SIGN1(sys32_mq_open, compat_sys_mq_open, %o1)
SIGN1(sys32_select, compat_sys_select, %o0)
SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5)
SIGN1(sys32_futex, compat_sys_futex, %o1)
SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0)
SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0)
SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0)

View file

@ -166,17 +166,23 @@ static unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs)
unsigned long compute_effective_address(struct pt_regs *regs,
unsigned int insn, unsigned int rd)
{
int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
unsigned int rs1 = (insn >> 14) & 0x1f;
unsigned int rs2 = insn & 0x1f;
int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
unsigned long addr;
if (insn & 0x2000) {
maybe_flush_windows(rs1, 0, rd, from_kernel);
return (fetch_reg(rs1, regs) + sign_extend_imm13(insn));
addr = (fetch_reg(rs1, regs) + sign_extend_imm13(insn));
} else {
maybe_flush_windows(rs1, rs2, rd, from_kernel);
return (fetch_reg(rs1, regs) + fetch_reg(rs2, regs));
addr = (fetch_reg(rs1, regs) + fetch_reg(rs2, regs));
}
if (!from_kernel && test_thread_flag(TIF_32BIT))
addr &= 0xffffffff;
return addr;
}
/* This is just to make gcc think die_if_kernel does return... */

View file

@ -96,38 +96,51 @@ static unsigned int get_user_insn(unsigned long tpc)
pte_t *ptep, pte;
unsigned long pa;
u32 insn = 0;
unsigned long pstate;
if (pgd_none(*pgdp))
goto outret;
if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
goto out;
pudp = pud_offset(pgdp, tpc);
if (pud_none(*pudp))
goto outret;
pmdp = pmd_offset(pudp, tpc);
if (pmd_none(*pmdp))
goto outret;
/* This disables preemption for us as well. */
__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
__asm__ __volatile__("wrpr %0, %1, %%pstate"
: : "r" (pstate), "i" (PSTATE_IE));
ptep = pte_offset_map(pmdp, tpc);
pte = *ptep;
if (!pte_present(pte))
if (pud_none(*pudp) || unlikely(pud_bad(*pudp)))
goto out;
pa = (pte_pfn(pte) << PAGE_SHIFT);
pa += (tpc & ~PAGE_MASK);
/* This disables preemption for us as well. */
local_irq_disable();
/* Use phys bypass so we don't pollute dtlb/dcache. */
__asm__ __volatile__("lduwa [%1] %2, %0"
: "=r" (insn)
: "r" (pa), "i" (ASI_PHYS_USE_EC));
pmdp = pmd_offset(pudp, tpc);
if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp)))
goto out_irq_enable;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (pmd_trans_huge(*pmdp)) {
if (pmd_trans_splitting(*pmdp))
goto out_irq_enable;
pa = pmd_pfn(*pmdp) << PAGE_SHIFT;
pa += tpc & ~HPAGE_MASK;
/* Use phys bypass so we don't pollute dtlb/dcache. */
__asm__ __volatile__("lduwa [%1] %2, %0"
: "=r" (insn)
: "r" (pa), "i" (ASI_PHYS_USE_EC));
} else
#endif
{
ptep = pte_offset_map(pmdp, tpc);
pte = *ptep;
if (pte_present(pte)) {
pa = (pte_pfn(pte) << PAGE_SHIFT);
pa += (tpc & ~PAGE_MASK);
/* Use phys bypass so we don't pollute dtlb/dcache. */
__asm__ __volatile__("lduwa [%1] %2, %0"
: "=r" (insn)
: "r" (pa), "i" (ASI_PHYS_USE_EC));
}
pte_unmap(ptep);
}
out_irq_enable:
local_irq_enable();
out:
pte_unmap(ptep);
__asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate));
outret:
return insn;
}
@ -153,7 +166,8 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
}
static void do_fault_siginfo(int code, int sig, struct pt_regs *regs,
unsigned int insn, int fault_code)
unsigned long fault_addr, unsigned int insn,
int fault_code)
{
unsigned long addr;
siginfo_t info;
@ -161,10 +175,18 @@ static void do_fault_siginfo(int code, int sig, struct pt_regs *regs,
info.si_code = code;
info.si_signo = sig;
info.si_errno = 0;
if (fault_code & FAULT_CODE_ITLB)
if (fault_code & FAULT_CODE_ITLB) {
addr = regs->tpc;
else
addr = compute_effective_address(regs, insn, 0);
} else {
/* If we were able to probe the faulting instruction, use it
* to compute a precise fault address. Otherwise use the fault
* time provided address which may only have page granularity.
*/
if (insn)
addr = compute_effective_address(regs, insn, 0);
else
addr = fault_addr;
}
info.si_addr = (void __user *) addr;
info.si_trapno = 0;
@ -239,7 +261,7 @@ static void __kprobes do_kernel_fault(struct pt_regs *regs, int si_code,
/* The si_code was set to make clear whether
* this was a SEGV_MAPERR or SEGV_ACCERR fault.
*/
do_fault_siginfo(si_code, SIGSEGV, regs, insn, fault_code);
do_fault_siginfo(si_code, SIGSEGV, regs, address, insn, fault_code);
return;
}
@ -525,7 +547,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
* Send a sigbus, regardless of whether we were in kernel
* or user mode.
*/
do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, insn, fault_code);
do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, address, insn, fault_code);
/* Kernel mode? Handle exceptions or die */
if (regs->tstate & TSTATE_PRIV)

View file

@ -73,7 +73,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
struct page *head, *page, *tail;
int refs;
if (!pmd_large(pmd))
if (!(pmd_val(pmd) & _PAGE_VALID))
return 0;
if (write && !pmd_write(pmd))

View file

@ -588,7 +588,7 @@ static void __init remap_kernel(void)
int i, tlb_ent = sparc64_highest_locked_tlbent();
tte_vaddr = (unsigned long) KERNBASE;
phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
phys_page = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB;
tte_data = kern_large_tte(phys_page);
kern_locked_tte_data = tte_data;
@ -1881,7 +1881,7 @@ void __init paging_init(void)
BUILD_BUG_ON(NR_CPUS > 4096);
kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
kern_base = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB;
kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
/* Invalidate both kernel TSBs. */
@ -1937,7 +1937,7 @@ void __init paging_init(void)
shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE);
real_end = (unsigned long)_end;
num_kernel_image_mappings = DIV_ROUND_UP(real_end - KERNBASE, 1 << 22);
num_kernel_image_mappings = DIV_ROUND_UP(real_end - KERNBASE, 1 << ILOG2_4MB);
printk("Kernel: Using %d locked TLB entries for main kernel image.\n",
num_kernel_image_mappings);
@ -2094,7 +2094,7 @@ static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap)
if (new_start <= old_start &&
new_end >= (old_start + PAGE_SIZE)) {
set_bit(old_start >> 22, bitmap);
set_bit(old_start >> ILOG2_4MB, bitmap);
goto do_next_page;
}
}
@ -2143,7 +2143,7 @@ void __init mem_init(void)
addr = PAGE_OFFSET + kern_base;
last = PAGE_ALIGN(kern_size) + addr;
while (addr < last) {
set_bit(__pa(addr) >> 22, sparc64_valid_addr_bitmap);
set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap);
addr += PAGE_SIZE;
}
@ -2267,7 +2267,7 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
void *block;
if (!(*vmem_pp & _PAGE_VALID)) {
block = vmemmap_alloc_block(1UL << 22, node);
block = vmemmap_alloc_block(1UL << ILOG2_4MB, node);
if (!block)
return -ENOMEM;

View file

@ -134,7 +134,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
pmd_t pmd, bool exec)
pmd_t pmd)
{
unsigned long end;
pte_t *pte;
@ -142,8 +142,11 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
pte = pte_offset_map(&pmd, vaddr);
end = vaddr + HPAGE_SIZE;
while (vaddr < end) {
if (pte_val(*pte) & _PAGE_VALID)
if (pte_val(*pte) & _PAGE_VALID) {
bool exec = pte_exec(*pte);
tlb_batch_add_one(mm, vaddr, exec);
}
pte++;
vaddr += PAGE_SIZE;
}
@ -177,19 +180,30 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
}
if (!pmd_none(orig)) {
pte_t orig_pte = __pte(pmd_val(orig));
bool exec = pte_exec(orig_pte);
addr &= HPAGE_MASK;
if (pmd_trans_huge(orig)) {
pte_t orig_pte = __pte(pmd_val(orig));
bool exec = pte_exec(orig_pte);
tlb_batch_add_one(mm, addr, exec);
tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec);
} else {
tlb_batch_pmd_scan(mm, addr, orig, exec);
tlb_batch_pmd_scan(mm, addr, orig);
}
}
}
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_t entry = *pmdp;
pmd_val(entry) &= ~_PAGE_VALID;
set_pmd_at(vma->vm_mm, address, pmdp, entry);
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable)
{