From 33d418da6f476b15e4510e0a590062583f63cd36 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 19 May 2023 15:13:11 +0200 Subject: [PATCH 1/6] riscv: Fix unused variable warning when BUILTIN_DTB is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ef69d2559fe9 ("riscv: Move early dtb mapping into the fixmap region") wrongly moved the #ifndef CONFIG_BUILTIN_DTB surrounding the pa variable definition in create_fdt_early_page_table(), so move it back to its right place to quiet the following warning: ../arch/riscv/mm/init.c: In function ‘create_fdt_early_page_table’: ../arch/riscv/mm/init.c:925:12: warning: unused variable ‘pa’ [-Wunused-variable] 925 | uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); Fixes: ef69d2559fe9 ("riscv: Move early dtb mapping into the fixmap region") Signed-off-by: Alexandre Ghiti Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230519131311.391960-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 747e5b1ef02d..c6bb966e4123 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -922,9 +922,9 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early) static void __init create_fdt_early_page_table(uintptr_t fix_fdt_va, uintptr_t dtb_pa) { +#ifndef CONFIG_BUILTIN_DTB uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); -#ifndef CONFIG_BUILTIN_DTB /* Make sure the fdt fixmap address is always aligned on PMD size */ BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE)); From ed309ce522185583b163bd0c74f0d9f299fe1826 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 26 May 2023 11:59:08 +0100 Subject: [PATCH 2/6] RISC-V: mark hibernation as nonportable Hibernation support depends on firmware marking its reserved/PMP protected regions as not accessible from Linux. The latest versions of the de-facto SBI implementation (OpenSBI) do not do this, having dropped the no-map property to enable 1 GiB huge page mappings by the kernel. This was exposed by commit 3335068f8721 ("riscv: Use PUD/P4D/PGD pages for the linear mapping"), which made the first 2 MiB of DRAM (where SBI typically resides) accessible by the kernel. Attempting to hibernate with either OpenSBI, or other implementations following its lead, will lead to a kernel panic ([1], [2]) as the hibernation process will attempt to save/restore any mapped regions, including the PMP protected regions in use by the SBI implementation. Mark hibernation as depending on "NONPORTABLE", as only a small subset of systems are capable of supporting it, until such time that an SBI implementation independent way to communicate what regions are in use has been agreed on. As hibernation support landed in v6.4-rc1, disabling it for most platforms does not constitute a regression. The alternative would have been reverting commit 3335068f8721 ("riscv: Use PUD/P4D/PGD pages for the linear mapping"). Doing so would permit hibernation on platforms with these SBI implementations, but would limit the options we have to solve the protection of the region without causing a regression in hibernation support. Reported-by: Song Shuai Link: https://lore.kernel.org/all/CAAYs2=gQvkhTeioMmqRDVGjdtNF_vhB+vm_1dHJxPNi75YDQ_Q@mail.gmail.com/ [1] Reported-by: JeeHeng Sia Link: https://groups.google.com/a/groups.riscv.org/g/sw-dev/c/ITXwaKfA6z8 [2] Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20230526-astride-detonator-9ae120051159@wendy Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 348c0fa1fc8c..2bb0c38419ff 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -799,8 +799,11 @@ menu "Power management options" source "kernel/power/Kconfig" +# Hibernation is only possible on systems where the SBI implementation has +# marked its reserved memory as not accessible from, or does not run +# from the same memory as, Linux config ARCH_HIBERNATION_POSSIBLE - def_bool y + def_bool NONPORTABLE config ARCH_HIBERNATION_HEADER def_bool HIBERNATION From 8dc2a7e8027fbeca0c7df81d4c82e735a59b5741 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 26 May 2023 17:46:30 +0200 Subject: [PATCH 3/6] riscv: Fix relocatable kernels with early alternatives using -fno-pie Early alternatives are called with the mmu disabled, and then should not access any global symbols through the GOT since it requires relocations, relocations that we do before but *virtually*. So only use medany code model for this early code. Signed-off-by: Alexandre Ghiti Tested-by: Conor Dooley # booted on nezha & unmatched Fixes: 39b33072941f ("riscv: Introduce CONFIG_RELOCATABLE") Link: https://lore.kernel.org/r/20230526154630.289374-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/Makefile | 4 ++++ arch/riscv/kernel/Makefile | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/riscv/errata/Makefile b/arch/riscv/errata/Makefile index a1055965fbee..7b2637c8c332 100644 --- a/arch/riscv/errata/Makefile +++ b/arch/riscv/errata/Makefile @@ -1,2 +1,6 @@ +ifdef CONFIG_RELOCATABLE +KBUILD_CFLAGS += -fno-pie +endif + obj-$(CONFIG_ERRATA_SIFIVE) += sifive/ obj-$(CONFIG_ERRATA_THEAD) += thead/ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index fbdccc21418a..153864e4f399 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -23,6 +23,10 @@ ifdef CONFIG_FTRACE CFLAGS_REMOVE_alternative.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_cpufeature.o = $(CC_FLAGS_FTRACE) endif +ifdef CONFIG_RELOCATABLE +CFLAGS_alternative.o += -fno-pie +CFLAGS_cpufeature.o += -fno-pie +endif ifdef CONFIG_KASAN KASAN_SANITIZE_alternative.o := n KASAN_SANITIZE_cpufeature.o := n From 9a7e8ec0d4cc64870ea449b4fce5779b77496cbb Mon Sep 17 00:00:00 2001 From: Ism Hong Date: Thu, 1 Jun 2023 17:53:55 +0800 Subject: [PATCH 4/6] riscv: perf: Fix callchain parse error with kernel tracepoint events For RISC-V, when tracing with tracepoint events, the IP and status are set to 0, preventing the perf code parsing the callchain and resolving the symbols correctly. ./ply 'tracepoint:kmem/kmem_cache_alloc { @[stack]=count(); }' @: { }: 1 The fix is to implement perf_arch_fetch_caller_regs for riscv, which fills several necessary registers used for callchain unwinding, including epc, sp, s0 and status. It's similar to commit b3eac0265bf6 ("arm: perf: Fix callchain parse error with kernel tracepoint events") and commit 5b09a094f2fb ("arm64: perf: Fix callchain parse error with kernel tracepoint events"). With this patch, callchain can be parsed correctly as: ./ply 'tracepoint:kmem/kmem_cache_alloc { @[stack]=count(); }' @: { __traceiter_kmem_cache_alloc+68 __traceiter_kmem_cache_alloc+68 kmem_cache_alloc+354 __sigqueue_alloc+94 __send_signal_locked+646 send_signal_locked+154 do_send_sig_info+84 __kill_pgrp_info+130 kill_pgrp+60 isig+150 n_tty_receive_signal_char+36 n_tty_receive_buf_standard+2214 n_tty_receive_buf_common+280 n_tty_receive_buf2+26 tty_ldisc_receive_buf+34 tty_port_default_receive_buf+62 flush_to_ldisc+158 process_one_work+458 worker_thread+138 kthread+178 riscv_cpufeature_patch_func+832 }: 1 Signed-off-by: Ism Hong Link: https://lore.kernel.org/r/20230601095355.1168910-1-ism.hong@gmail.com Fixes: 178e9fc47aae ("perf: riscv: preliminary RISC-V support") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/perf_event.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h index d42c901f9a97..665bbc9b2f84 100644 --- a/arch/riscv/include/asm/perf_event.h +++ b/arch/riscv/include/asm/perf_event.h @@ -10,4 +10,11 @@ #include #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs + +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->epc = (__ip); \ + (regs)->s0 = (unsigned long) __builtin_frame_address(0); \ + (regs)->sp = current_stack_pointer; \ + (regs)->status = SR_PP; \ +} #endif /* _ASM_RISCV_PERF_EVENT_H */ From 835e5ac3f98e78eca5c512bd48bd1880b90c4eb1 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 28 Apr 2023 14:01:19 +0200 Subject: [PATCH 5/6] riscv: Fix huge_ptep_set_wrprotect when PTE is a NAPOT We need to avoid inconsistencies across the PTEs that form a NAPOT region, so when we write protect such a region, we should clear and flush all the PTEs to make sure that any of those PTEs is not cached which would result in such inconsistencies (arm64 does the same). Fixes: 82a1a1f3bfb6 ("riscv: mm: support Svnapot in hugetlb page") Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20230428120120.21620-1-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/hugetlbpage.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index a163a3e0f0d4..238d00bdac14 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -218,6 +218,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, { pte_t pte = ptep_get(ptep); unsigned long order; + pte_t orig_pte; int i, pte_num; if (!pte_napot(pte)) { @@ -228,9 +229,12 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, order = napot_cont_order(pte); pte_num = napot_pte_num(order); ptep = huge_pte_offset(mm, addr, napot_cont_size(order)); + orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num); + + orig_pte = pte_wrprotect(orig_pte); for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) - ptep_set_wrprotect(mm, addr, ptep); + set_pte_at(mm, addr, ptep, orig_pte); } pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, From 6966d7988c4fb6af3e395868e9800c07f9e98a30 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 28 Apr 2023 14:01:20 +0200 Subject: [PATCH 6/6] riscv: Implement missing huge_ptep_get huge_ptep_get must be reimplemented in order to go through all the PTEs of a NAPOT region: this is needed because the HW can update the A/D bits of any of the PTE that constitutes the NAPOT region. Fixes: 82a1a1f3bfb6 ("riscv: mm: support Svnapot in hugetlb page") Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20230428120120.21620-2-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hugetlb.h | 3 +++ arch/riscv/mm/hugetlbpage.c | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index fe6f23006641..ce1ebda1a49a 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -36,6 +36,9 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty); +#define __HAVE_ARCH_HUGE_PTEP_GET +pte_t huge_ptep_get(pte_t *ptep); + pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); #define arch_make_huge_pte arch_make_huge_pte diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 238d00bdac14..e0ef56dc57b9 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -3,6 +3,30 @@ #include #ifdef CONFIG_RISCV_ISA_SVNAPOT +pte_t huge_ptep_get(pte_t *ptep) +{ + unsigned long pte_num; + int i; + pte_t orig_pte = ptep_get(ptep); + + if (!pte_present(orig_pte) || !pte_napot(orig_pte)) + return orig_pte; + + pte_num = napot_pte_num(napot_cont_order(orig_pte)); + + for (i = 0; i < pte_num; i++, ptep++) { + pte_t pte = ptep_get(ptep); + + if (pte_dirty(pte)) + orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); + } + + return orig_pte; +} + pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr,