linux-stable/arch/arm64/mm/fault.c

606 lines
17 KiB
C
Raw Normal View History

/*
* Based on arch/arm/mm/fault.c
*
* Copyright (C) 1995 Linus Torvalds
* Copyright (C) 1995-2004 Russell King
* Copyright (C) 2012 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/module.h>
#include <linux/signal.h>
#include <linux/mm.h>
#include <linux/hardirq.h>
#include <linux/init.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/page-flags.h>
#include <linux/sched.h>
#include <linux/highmem.h>
#include <linux/perf_event.h>
#include <asm/cpufeature.h>
#include <asm/exception.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
#include <asm/sysreg.h>
#include <asm/system_misc.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
static const char *fault_name(unsigned int esr);
/*
* Dump out the page tables associated with 'addr' in mm 'mm'.
*/
void show_pte(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
if (!mm)
mm = &init_mm;
pr_alert("pgd = %p\n", mm->pgd);
pgd = pgd_offset(mm, addr);
pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd));
do {
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
if (pgd_none(*pgd) || pgd_bad(*pgd))
break;
pud = pud_offset(pgd, addr);
arm64: mm: Implement 4 levels of translation tables This patch implements 4 levels of translation tables since 3 levels of page tables with 4KB pages cannot support 40-bit physical address space described in [1] due to the following issue. It is a restriction that kernel logical memory map with 4KB + 3 levels (0xffffffc000000000-0xffffffffffffffff) cannot cover RAM region from 544GB to 1024GB in [1]. Specifically, ARM64 kernel fails to create mapping for this region in map_mem function since __phys_to_virt for this region reaches to address overflow. If SoC design follows the document, [1], over 32GB RAM would be placed from 544GB. Even 64GB system is supposed to use the region from 544GB to 576GB for only 32GB RAM. Naturally, it would reach to enable 4 levels of page tables to avoid hacking __virt_to_phys and __phys_to_virt. However, it is recommended 4 levels of page table should be only enabled if memory map is too sparse or there is about 512GB RAM. References ---------- [1]: Principles of ARM Memory Maps, White Paper, Issue C Signed-off-by: Jungseok Lee <jays.lee@samsung.com> Reviewed-by: Sungjinn Chung <sungjinn.chung@samsung.com> Acked-by: Kukjin Kim <kgene.kim@samsung.com> Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org> Reviewed-by: Steve Capper <steve.capper@linaro.org> [catalin.marinas@arm.com: MEMBLOCK_INITIAL_LIMIT removed, same as PUD_SIZE] [catalin.marinas@arm.com: early_ioremap_init() updated for 4 levels] [catalin.marinas@arm.com: 48-bit VA depends on BROKEN until KVM is fixed] Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> Tested-by: Jungseok Lee <jungseoklee85@gmail.com>
2014-05-12 09:40:51 +00:00
printk(", *pud=%016llx", pud_val(*pud));
if (pud_none(*pud) || pud_bad(*pud))
break;
pmd = pmd_offset(pud, addr);
printk(", *pmd=%016llx", pmd_val(*pmd));
if (pmd_none(*pmd) || pmd_bad(*pmd))
break;
pte = pte_offset_map(pmd, addr);
printk(", *pte=%016llx", pte_val(*pte));
pte_unmap(pte);
} while(0);
printk("\n");
}
/*
* The kernel tried to access some page that wasn't present.
*/
static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
unsigned int esr, struct pt_regs *regs)
{
/*
* Are we prepared to handle this kernel fault?
*/
if (fixup_exception(regs))
return;
/*
* No handler, we'll have to terminate things with extreme prejudice.
*/
bust_spinlocks(1);
pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
(addr < PAGE_SIZE) ? "NULL pointer dereference" :
"paging request", addr);
show_pte(mm, addr);
die("Oops", regs, esr);
bust_spinlocks(0);
do_exit(SIGKILL);
}
/*
* Something tried to access memory that isn't in our memory map. User mode
* accesses just cause a SIGSEGV
*/
static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
unsigned int esr, unsigned int sig, int code,
struct pt_regs *regs)
{
struct siginfo si;
if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) {
pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
tsk->comm, task_pid_nr(tsk), fault_name(esr), sig,
addr, esr);
show_pte(tsk->mm, addr);
show_regs(regs);
}
tsk->thread.fault_address = addr;
tsk->thread.fault_code = esr;
si.si_signo = sig;
si.si_errno = 0;
si.si_code = code;
si.si_addr = (void __user *)addr;
force_sig_info(sig, &si, tsk);
}
static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->active_mm;
/*
* If we are in kernel mode at this point, we have no context to
* handle this fault with.
*/
if (user_mode(regs))
__do_user_fault(tsk, addr, esr, SIGSEGV, SEGV_MAPERR, regs);
else
__do_kernel_fault(mm, addr, esr, regs);
}
#define VM_FAULT_BADMAP 0x010000
#define VM_FAULT_BADACCESS 0x020000
#define ESR_LNX_EXEC (1 << 24)
static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
unsigned int mm_flags, unsigned long vm_flags,
struct task_struct *tsk)
{
struct vm_area_struct *vma;
int fault;
vma = find_vma(mm, addr);
fault = VM_FAULT_BADMAP;
if (unlikely(!vma))
goto out;
if (unlikely(vma->vm_start > addr))
goto check_stack;
/*
* Ok, we have a good vm_area for this memory access, so we can handle
* it.
*/
good_area:
/*
* Check that the permissions on the VMA allow for the fault which
* occurred. If we encountered a write or exec fault, we must have
* appropriate permissions, otherwise we allow any permission.
*/
if (!(vma->vm_flags & vm_flags)) {
fault = VM_FAULT_BADACCESS;
goto out;
}
return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags);
check_stack:
if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
goto good_area;
out:
return fault;
}
static inline int permission_fault(unsigned int esr)
{
unsigned int ec = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT;
unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
}
static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
struct task_struct *tsk;
struct mm_struct *mm;
int fault, sig, code;
unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
tsk = current;
mm = tsk->mm;
/* Enable interrupts if they were enabled in the parent context. */
if (interrupts_enabled(regs))
local_irq_enable();
/*
* If we're in an interrupt or have no user context, we must not take
* the fault.
*/
mm/fault, arch: Use pagefault_disable() to check for disabled pagefaults in the handler Introduce faulthandler_disabled() and use it to check for irq context and disabled pagefaults (via pagefault_disable()) in the pagefault handlers. Please note that we keep the in_atomic() checks in place - to detect whether in irq context (in which case preemption is always properly disabled). In contrast, preempt_disable() should never be used to disable pagefaults. With !CONFIG_PREEMPT_COUNT, preempt_disable() doesn't modify the preempt counter, and therefore the result of in_atomic() differs. We validate that condition by using might_fault() checks when calling might_sleep(). Therefore, add a comment to faulthandler_disabled(), describing why this is needed. faulthandler_disabled() and pagefault_disable() are defined in linux/uaccess.h, so let's properly add that include to all relevant files. This patch is based on a patch from Thomas Gleixner. Reviewed-and-tested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: benh@kernel.crashing.org Cc: bigeasy@linutronix.de Cc: borntraeger@de.ibm.com Cc: daniel.vetter@intel.com Cc: heiko.carstens@de.ibm.com Cc: herbert@gondor.apana.org.au Cc: hocko@suse.cz Cc: hughd@google.com Cc: mst@redhat.com Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: schwidefsky@de.ibm.com Cc: yang.shi@windriver.com Link: http://lkml.kernel.org/r/1431359540-32227-7-git-send-email-dahi@linux.vnet.ibm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-05-11 15:52:11 +00:00
if (faulthandler_disabled() || !mm)
goto no_context;
if (user_mode(regs))
mm_flags |= FAULT_FLAG_USER;
if (esr & ESR_LNX_EXEC) {
vm_flags = VM_EXEC;
} else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}
if (permission_fault(esr) && (addr < USER_DS)) {
if (get_fs() == KERNEL_DS)
die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
if (!search_exception_tables(regs->pc))
die("Accessing user space memory outside uaccess.h routines", regs, esr);
}
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
* we can bug out early if this is from code which shouldn't.
*/
if (!down_read_trylock(&mm->mmap_sem)) {
if (!user_mode(regs) && !search_exception_tables(regs->pc))
goto no_context;
retry:
down_read(&mm->mmap_sem);
} else {
/*
* The above down_read_trylock() might have succeeded in which
* case, we'll have missed the might_sleep() from down_read().
*/
might_sleep();
#ifdef CONFIG_DEBUG_VM
if (!user_mode(regs) && !search_exception_tables(regs->pc))
goto no_context;
#endif
}
fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
/*
* If we need to retry but a fatal signal is pending, handle the
* signal first. We do not need to release the mmap_sem because it
* would already be released in __lock_page_or_retry in mm/filemap.c.
*/
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
return 0;
/*
* Major/minor page fault accounting is only done on the initial
* attempt. If we go through a retry, it is extremely likely that the
* page will be found in page cache at that point.
*/
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
if (fault & VM_FAULT_MAJOR) {
tsk->maj_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
addr);
} else {
tsk->min_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
addr);
}
if (fault & VM_FAULT_RETRY) {
/*
* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
* starvation.
*/
mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
mm_flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
up_read(&mm->mmap_sem);
/*
* Handle the "normal" case first - VM_FAULT_MAJOR
*/
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
VM_FAULT_BADACCESS))))
return 0;
/*
* If we are in kernel mode at this point, we have no context to
* handle this fault with.
*/
if (!user_mode(regs))
goto no_context;
if (fault & VM_FAULT_OOM) {
/*
* We ran out of memory, call the OOM killer, and return to
* userspace (which will retry the fault, or kill us if we got
* oom-killed).
*/
pagefault_out_of_memory();
return 0;
}
if (fault & VM_FAULT_SIGBUS) {
/*
* We had some memory, but were unable to successfully fix up
* this page fault.
*/
sig = SIGBUS;
code = BUS_ADRERR;
} else {
/*
* Something tried to access memory that isn't in our memory
* map.
*/
sig = SIGSEGV;
code = fault == VM_FAULT_BADACCESS ?
SEGV_ACCERR : SEGV_MAPERR;
}
__do_user_fault(tsk, addr, esr, sig, code, regs);
return 0;
no_context:
__do_kernel_fault(mm, addr, esr, regs);
return 0;
}
/*
* First Level Translation Fault Handler
*
* We enter here because the first level page table doesn't contain a valid
* entry for the address.
*
* If the address is in kernel space (>= TASK_SIZE), then we are probably
* faulting in the vmalloc() area.
*
* If the init_task's first level page tables contains the relevant entry, we
* copy the it to this task. If not, we send the process a signal, fixup the
* exception, or oops the kernel.
*
* NOTE! We MUST NOT take any locks for this case. We may be in an interrupt
* or a critical region, and should only copy the information from the master
* page table, nothing more.
*/
static int __kprobes do_translation_fault(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
{
if (addr < TASK_SIZE)
return do_page_fault(addr, esr, regs);
do_bad_area(addr, esr, regs);
return 0;
}
static int do_alignment_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
do_bad_area(addr, esr, regs);
return 0;
}
/*
* This abort handler always returns "fault".
*/
static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
return 1;
}
static struct fault_info {
int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs);
int sig;
int code;
const char *name;
} fault_info[] = {
{ do_bad, SIGBUS, 0, "ttbr address size fault" },
{ do_bad, SIGBUS, 0, "level 1 address size fault" },
{ do_bad, SIGBUS, 0, "level 2 address size fault" },
{ do_bad, SIGBUS, 0, "level 3 address size fault" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" },
{ do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },
{ do_bad, SIGBUS, 0, "unknown 8" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" },
{ do_bad, SIGBUS, 0, "unknown 12" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
{ do_bad, SIGBUS, 0, "synchronous external abort" },
{ do_bad, SIGBUS, 0, "unknown 17" },
{ do_bad, SIGBUS, 0, "unknown 18" },
{ do_bad, SIGBUS, 0, "unknown 19" },
{ do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error" },
{ do_bad, SIGBUS, 0, "unknown 25" },
{ do_bad, SIGBUS, 0, "unknown 26" },
{ do_bad, SIGBUS, 0, "unknown 27" },
{ do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "unknown 32" },
{ do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" },
{ do_bad, SIGBUS, 0, "unknown 34" },
{ do_bad, SIGBUS, 0, "unknown 35" },
{ do_bad, SIGBUS, 0, "unknown 36" },
{ do_bad, SIGBUS, 0, "unknown 37" },
{ do_bad, SIGBUS, 0, "unknown 38" },
{ do_bad, SIGBUS, 0, "unknown 39" },
{ do_bad, SIGBUS, 0, "unknown 40" },
{ do_bad, SIGBUS, 0, "unknown 41" },
{ do_bad, SIGBUS, 0, "unknown 42" },
{ do_bad, SIGBUS, 0, "unknown 43" },
{ do_bad, SIGBUS, 0, "unknown 44" },
{ do_bad, SIGBUS, 0, "unknown 45" },
{ do_bad, SIGBUS, 0, "unknown 46" },
{ do_bad, SIGBUS, 0, "unknown 47" },
{ do_bad, SIGBUS, 0, "TLB conflict abort" },
{ do_bad, SIGBUS, 0, "unknown 49" },
{ do_bad, SIGBUS, 0, "unknown 50" },
{ do_bad, SIGBUS, 0, "unknown 51" },
{ do_bad, SIGBUS, 0, "implementation fault (lockdown abort)" },
{ do_bad, SIGBUS, 0, "implementation fault (unsupported exclusive)" },
{ do_bad, SIGBUS, 0, "unknown 54" },
{ do_bad, SIGBUS, 0, "unknown 55" },
{ do_bad, SIGBUS, 0, "unknown 56" },
{ do_bad, SIGBUS, 0, "unknown 57" },
{ do_bad, SIGBUS, 0, "unknown 58" },
{ do_bad, SIGBUS, 0, "unknown 59" },
{ do_bad, SIGBUS, 0, "unknown 60" },
{ do_bad, SIGBUS, 0, "section domain fault" },
{ do_bad, SIGBUS, 0, "page domain fault" },
{ do_bad, SIGBUS, 0, "unknown 63" },
};
static const char *fault_name(unsigned int esr)
{
const struct fault_info *inf = fault_info + (esr & 63);
return inf->name;
}
/*
* Dispatch a data abort to the relevant handler.
*/
asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
const struct fault_info *inf = fault_info + (esr & 63);
struct siginfo info;
if (!inf->fn(addr, esr, regs))
return;
pr_alert("Unhandled fault: %s (0x%08x) at 0x%016lx\n",
inf->name, esr, addr);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
info.si_addr = (void __user *)addr;
arm64_notify_die("", regs, &info, esr);
}
/*
* Handle stack alignment exceptions.
*/
asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
{
struct siginfo info;
struct task_struct *tsk = current;
if (show_unhandled_signals && unhandled_signal(tsk, SIGBUS))
pr_info_ratelimited("%s[%d]: %s exception: pc=%p sp=%p\n",
tsk->comm, task_pid_nr(tsk),
esr_get_class_string(esr), (void *)regs->pc,
(void *)regs->sp);
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_ADRALN;
info.si_addr = (void __user *)addr;
arm64_notify_die("Oops - SP/PC alignment exception", regs, &info, esr);
}
int __init early_brk64(unsigned long addr, unsigned int esr,
struct pt_regs *regs);
/*
* __refdata because early_brk64 is __init, but the reference to it is
* clobbered at arch_initcall time.
* See traps.c and debug-monitors.c:debug_traps_init().
*/
static struct fault_info __refdata debug_fault_info[] = {
{ do_bad, SIGTRAP, TRAP_HWBKPT, "hardware breakpoint" },
{ do_bad, SIGTRAP, TRAP_HWBKPT, "hardware single-step" },
{ do_bad, SIGTRAP, TRAP_HWBKPT, "hardware watchpoint" },
{ do_bad, SIGBUS, 0, "unknown 3" },
{ do_bad, SIGTRAP, TRAP_BRKPT, "aarch32 BKPT" },
{ do_bad, SIGTRAP, 0, "aarch32 vector catch" },
{ early_brk64, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" },
{ do_bad, SIGBUS, 0, "unknown 7" },
};
void __init hook_debug_fault_code(int nr,
int (*fn)(unsigned long, unsigned int, struct pt_regs *),
int sig, int code, const char *name)
{
BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info));
debug_fault_info[nr].fn = fn;
debug_fault_info[nr].sig = sig;
debug_fault_info[nr].code = code;
debug_fault_info[nr].name = name;
}
asmlinkage int __exception do_debug_exception(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
{
const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
struct siginfo info;
arm64: mm: Add trace_irqflags annotations to do_debug_exception() With CONFIG_PROVE_LOCKING, CONFIG_DEBUG_LOCKDEP and CONFIG_TRACE_IRQFLAGS enabled, lockdep will compare current->hardirqs_enabled with the flags from local_irq_save(). When a debug exception occurs, interrupts are disabled in entry.S, but lockdep isn't told, resulting in: DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled) ------------[ cut here ]------------ WARNING: at ../kernel/locking/lockdep.c:3523 Modules linked in: CPU: 3 PID: 1752 Comm: perf Not tainted 4.5.0-rc4+ #2204 Hardware name: ARM Juno development board (r1) (DT) task: ffffffc974868000 ti: ffffffc975f40000 task.ti: ffffffc975f40000 PC is at check_flags.part.35+0x17c/0x184 LR is at check_flags.part.35+0x17c/0x184 pc : [<ffffff80080fc93c>] lr : [<ffffff80080fc93c>] pstate: 600003c5 [...] ---[ end trace 74631f9305ef5020 ]--- Call trace: [<ffffff80080fc93c>] check_flags.part.35+0x17c/0x184 [<ffffff80080ffe30>] lock_acquire+0xa8/0xc4 [<ffffff8008093038>] breakpoint_handler+0x118/0x288 [<ffffff8008082434>] do_debug_exception+0x3c/0xa8 [<ffffff80080854b4>] el1_dbg+0x18/0x6c [<ffffff80081e82f4>] do_filp_open+0x64/0xdc [<ffffff80081d6e60>] do_sys_open+0x140/0x204 [<ffffff80081d6f58>] SyS_openat+0x10/0x18 [<ffffff8008085d30>] el0_svc_naked+0x24/0x28 possible reason: unannotated irqs-off. irq event stamp: 65857 hardirqs last enabled at (65857): [<ffffff80081fb1c0>] lookup_mnt+0xf4/0x1b4 hardirqs last disabled at (65856): [<ffffff80081fb188>] lookup_mnt+0xbc/0x1b4 softirqs last enabled at (65790): [<ffffff80080bdca4>] __do_softirq+0x1f8/0x290 softirqs last disabled at (65757): [<ffffff80080be038>] irq_exit+0x9c/0xd0 This patch adds the annotations to do_debug_exception(), while trying not to call trace_hardirqs_off() if el1_dbg() interrupted a task that already had irqs disabled. Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-04-13 12:40:00 +00:00
int rv;
arm64: mm: Add trace_irqflags annotations to do_debug_exception() With CONFIG_PROVE_LOCKING, CONFIG_DEBUG_LOCKDEP and CONFIG_TRACE_IRQFLAGS enabled, lockdep will compare current->hardirqs_enabled with the flags from local_irq_save(). When a debug exception occurs, interrupts are disabled in entry.S, but lockdep isn't told, resulting in: DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled) ------------[ cut here ]------------ WARNING: at ../kernel/locking/lockdep.c:3523 Modules linked in: CPU: 3 PID: 1752 Comm: perf Not tainted 4.5.0-rc4+ #2204 Hardware name: ARM Juno development board (r1) (DT) task: ffffffc974868000 ti: ffffffc975f40000 task.ti: ffffffc975f40000 PC is at check_flags.part.35+0x17c/0x184 LR is at check_flags.part.35+0x17c/0x184 pc : [<ffffff80080fc93c>] lr : [<ffffff80080fc93c>] pstate: 600003c5 [...] ---[ end trace 74631f9305ef5020 ]--- Call trace: [<ffffff80080fc93c>] check_flags.part.35+0x17c/0x184 [<ffffff80080ffe30>] lock_acquire+0xa8/0xc4 [<ffffff8008093038>] breakpoint_handler+0x118/0x288 [<ffffff8008082434>] do_debug_exception+0x3c/0xa8 [<ffffff80080854b4>] el1_dbg+0x18/0x6c [<ffffff80081e82f4>] do_filp_open+0x64/0xdc [<ffffff80081d6e60>] do_sys_open+0x140/0x204 [<ffffff80081d6f58>] SyS_openat+0x10/0x18 [<ffffff8008085d30>] el0_svc_naked+0x24/0x28 possible reason: unannotated irqs-off. irq event stamp: 65857 hardirqs last enabled at (65857): [<ffffff80081fb1c0>] lookup_mnt+0xf4/0x1b4 hardirqs last disabled at (65856): [<ffffff80081fb188>] lookup_mnt+0xbc/0x1b4 softirqs last enabled at (65790): [<ffffff80080bdca4>] __do_softirq+0x1f8/0x290 softirqs last disabled at (65757): [<ffffff80080be038>] irq_exit+0x9c/0xd0 This patch adds the annotations to do_debug_exception(), while trying not to call trace_hardirqs_off() if el1_dbg() interrupted a task that already had irqs disabled. Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-04-13 12:40:00 +00:00
/*
* Tell lockdep we disabled irqs in entry.S. Do nothing if they were
* already disabled to preserve the last enabled/disabled addresses.
*/
if (interrupts_enabled(regs))
trace_hardirqs_off();
arm64: mm: Add trace_irqflags annotations to do_debug_exception() With CONFIG_PROVE_LOCKING, CONFIG_DEBUG_LOCKDEP and CONFIG_TRACE_IRQFLAGS enabled, lockdep will compare current->hardirqs_enabled with the flags from local_irq_save(). When a debug exception occurs, interrupts are disabled in entry.S, but lockdep isn't told, resulting in: DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled) ------------[ cut here ]------------ WARNING: at ../kernel/locking/lockdep.c:3523 Modules linked in: CPU: 3 PID: 1752 Comm: perf Not tainted 4.5.0-rc4+ #2204 Hardware name: ARM Juno development board (r1) (DT) task: ffffffc974868000 ti: ffffffc975f40000 task.ti: ffffffc975f40000 PC is at check_flags.part.35+0x17c/0x184 LR is at check_flags.part.35+0x17c/0x184 pc : [<ffffff80080fc93c>] lr : [<ffffff80080fc93c>] pstate: 600003c5 [...] ---[ end trace 74631f9305ef5020 ]--- Call trace: [<ffffff80080fc93c>] check_flags.part.35+0x17c/0x184 [<ffffff80080ffe30>] lock_acquire+0xa8/0xc4 [<ffffff8008093038>] breakpoint_handler+0x118/0x288 [<ffffff8008082434>] do_debug_exception+0x3c/0xa8 [<ffffff80080854b4>] el1_dbg+0x18/0x6c [<ffffff80081e82f4>] do_filp_open+0x64/0xdc [<ffffff80081d6e60>] do_sys_open+0x140/0x204 [<ffffff80081d6f58>] SyS_openat+0x10/0x18 [<ffffff8008085d30>] el0_svc_naked+0x24/0x28 possible reason: unannotated irqs-off. irq event stamp: 65857 hardirqs last enabled at (65857): [<ffffff80081fb1c0>] lookup_mnt+0xf4/0x1b4 hardirqs last disabled at (65856): [<ffffff80081fb188>] lookup_mnt+0xbc/0x1b4 softirqs last enabled at (65790): [<ffffff80080bdca4>] __do_softirq+0x1f8/0x290 softirqs last disabled at (65757): [<ffffff80080be038>] irq_exit+0x9c/0xd0 This patch adds the annotations to do_debug_exception(), while trying not to call trace_hardirqs_off() if el1_dbg() interrupted a task that already had irqs disabled. Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-04-13 12:40:00 +00:00
if (!inf->fn(addr, esr, regs)) {
rv = 1;
} else {
pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
inf->name, esr, addr);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
info.si_addr = (void __user *)addr;
arm64_notify_die("", regs, &info, 0);
rv = 0;
}
arm64: mm: Add trace_irqflags annotations to do_debug_exception() With CONFIG_PROVE_LOCKING, CONFIG_DEBUG_LOCKDEP and CONFIG_TRACE_IRQFLAGS enabled, lockdep will compare current->hardirqs_enabled with the flags from local_irq_save(). When a debug exception occurs, interrupts are disabled in entry.S, but lockdep isn't told, resulting in: DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled) ------------[ cut here ]------------ WARNING: at ../kernel/locking/lockdep.c:3523 Modules linked in: CPU: 3 PID: 1752 Comm: perf Not tainted 4.5.0-rc4+ #2204 Hardware name: ARM Juno development board (r1) (DT) task: ffffffc974868000 ti: ffffffc975f40000 task.ti: ffffffc975f40000 PC is at check_flags.part.35+0x17c/0x184 LR is at check_flags.part.35+0x17c/0x184 pc : [<ffffff80080fc93c>] lr : [<ffffff80080fc93c>] pstate: 600003c5 [...] ---[ end trace 74631f9305ef5020 ]--- Call trace: [<ffffff80080fc93c>] check_flags.part.35+0x17c/0x184 [<ffffff80080ffe30>] lock_acquire+0xa8/0xc4 [<ffffff8008093038>] breakpoint_handler+0x118/0x288 [<ffffff8008082434>] do_debug_exception+0x3c/0xa8 [<ffffff80080854b4>] el1_dbg+0x18/0x6c [<ffffff80081e82f4>] do_filp_open+0x64/0xdc [<ffffff80081d6e60>] do_sys_open+0x140/0x204 [<ffffff80081d6f58>] SyS_openat+0x10/0x18 [<ffffff8008085d30>] el0_svc_naked+0x24/0x28 possible reason: unannotated irqs-off. irq event stamp: 65857 hardirqs last enabled at (65857): [<ffffff80081fb1c0>] lookup_mnt+0xf4/0x1b4 hardirqs last disabled at (65856): [<ffffff80081fb188>] lookup_mnt+0xbc/0x1b4 softirqs last enabled at (65790): [<ffffff80080bdca4>] __do_softirq+0x1f8/0x290 softirqs last disabled at (65757): [<ffffff80080be038>] irq_exit+0x9c/0xd0 This patch adds the annotations to do_debug_exception(), while trying not to call trace_hardirqs_off() if el1_dbg() interrupted a task that already had irqs disabled. Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-04-13 12:40:00 +00:00
if (interrupts_enabled(regs))
trace_hardirqs_on();
arm64: mm: Add trace_irqflags annotations to do_debug_exception() With CONFIG_PROVE_LOCKING, CONFIG_DEBUG_LOCKDEP and CONFIG_TRACE_IRQFLAGS enabled, lockdep will compare current->hardirqs_enabled with the flags from local_irq_save(). When a debug exception occurs, interrupts are disabled in entry.S, but lockdep isn't told, resulting in: DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled) ------------[ cut here ]------------ WARNING: at ../kernel/locking/lockdep.c:3523 Modules linked in: CPU: 3 PID: 1752 Comm: perf Not tainted 4.5.0-rc4+ #2204 Hardware name: ARM Juno development board (r1) (DT) task: ffffffc974868000 ti: ffffffc975f40000 task.ti: ffffffc975f40000 PC is at check_flags.part.35+0x17c/0x184 LR is at check_flags.part.35+0x17c/0x184 pc : [<ffffff80080fc93c>] lr : [<ffffff80080fc93c>] pstate: 600003c5 [...] ---[ end trace 74631f9305ef5020 ]--- Call trace: [<ffffff80080fc93c>] check_flags.part.35+0x17c/0x184 [<ffffff80080ffe30>] lock_acquire+0xa8/0xc4 [<ffffff8008093038>] breakpoint_handler+0x118/0x288 [<ffffff8008082434>] do_debug_exception+0x3c/0xa8 [<ffffff80080854b4>] el1_dbg+0x18/0x6c [<ffffff80081e82f4>] do_filp_open+0x64/0xdc [<ffffff80081d6e60>] do_sys_open+0x140/0x204 [<ffffff80081d6f58>] SyS_openat+0x10/0x18 [<ffffff8008085d30>] el0_svc_naked+0x24/0x28 possible reason: unannotated irqs-off. irq event stamp: 65857 hardirqs last enabled at (65857): [<ffffff80081fb1c0>] lookup_mnt+0xf4/0x1b4 hardirqs last disabled at (65856): [<ffffff80081fb188>] lookup_mnt+0xbc/0x1b4 softirqs last enabled at (65790): [<ffffff80080bdca4>] __do_softirq+0x1f8/0x290 softirqs last disabled at (65757): [<ffffff80080be038>] irq_exit+0x9c/0xd0 This patch adds the annotations to do_debug_exception(), while trying not to call trace_hardirqs_off() if el1_dbg() interrupted a task that already had irqs disabled. Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-04-13 12:40:00 +00:00
return rv;
}
#ifdef CONFIG_ARM64_PAN
arm64: Delay cpu feature capability checks At the moment we run through the arm64_features capability list for each CPU and set the capability if one of the CPU supports it. This could be problematic in a heterogeneous system with differing capabilities. Delay the CPU feature checks until all the enabled CPUs are up(i.e, smp_cpus_done(), so that we can make better decisions based on the overall system capability. Once we decide and advertise the capabilities the alternatives can be applied. From this state, we cannot roll back a feature to disabled based on the values from a new hotplugged CPU, due to the runtime patching and other reasons. So, for all new CPUs, we need to make sure that they have the established system capabilities. Failing which, we bring the CPU down, preventing it from turning online. Once the capabilities are decided, any new CPU booting up goes through verification to ensure that it has all the enabled capabilities and also invokes the respective enable() method on the CPU. The CPU errata checks are not delayed and is still executed per-CPU to detect the respective capabilities. If we ever come across a non-errata capability that needs to be checked on each-CPU, we could introduce them via a new capability table(or introduce a flag), which can be processed per CPU. The next patch will make the feature checks use the system wide safe value of a feature register. NOTE: The enable() methods associated with the capability is scheduled on all the CPUs (which is the only use case at the moment). If we need a different type of 'enable()' which only needs to be run once on any CPU, we should be able to handle that when needed. Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com> Tested-by: Dave Martin <Dave.Martin@arm.com> [catalin.marinas@arm.com: static variable and coding style fixes] Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2015-10-19 13:24:50 +00:00
void cpu_enable_pan(void *__unused)
{
config_sctlr_el1(SCTLR_EL1_SPAN, 0);
}
#endif /* CONFIG_ARM64_PAN */
#ifdef CONFIG_ARM64_UAO
/*
* Kernel threads have fs=KERNEL_DS by default, and don't need to call
* set_fs(), devtmpfs in particular relies on this behaviour.
* We need to enable the feature at runtime (instead of adding it to
* PSR_MODE_EL1h) as the feature may not be implemented by the cpu.
*/
void cpu_enable_uao(void *__unused)
{
asm(SET_PSTATE_UAO(1));
}
#endif /* CONFIG_ARM64_UAO */