membarrier: riscv: Provide core serializing command

RISC-V uses xRET instructions on return from interrupt and to go back
to user-space; the xRET instruction is not core serializing.

Use FENCE.I for providing core serialization as follows:

 - by calling sync_core_before_usermode() on return from interrupt (cf.
   ipi_sync_core()),

 - via switch_mm() and sync_core_before_usermode() (respectively, for
   uthread->uthread and kthread->uthread transitions) before returning
   to user-space.

On RISC-V, the serialization in switch_mm() is activated by resetting
the icache_stale_mask of the mm at prepare_sync_core_cmd().

Suggested-by: Palmer Dabbelt <palmer@dabbelt.com>
Signed-off-by: Andrea Parri <parri.andrea@gmail.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://lore.kernel.org/r/20240131144936.29190-5-parri.andrea@gmail.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
Andrea Parri 2024-01-31 15:49:36 +01:00 committed by Palmer Dabbelt
parent 4ff4c745a1
commit cd9b29014d
No known key found for this signature in database
GPG Key ID: 2E1319F35FBB1889
7 changed files with 77 additions and 1 deletions

View File

@ -10,6 +10,22 @@
# Rely on implicit context synchronization as a result of exception return
# when returning from IPI handler, and when returning to user-space.
#
# * riscv
#
# riscv uses xRET as return from interrupt and to return to user-space.
#
# Given that xRET is not core serializing, we rely on FENCE.I for providing
# core serialization:
#
# - by calling sync_core_before_usermode() on return from interrupt (cf.
# ipi_sync_core()),
#
# - via switch_mm() and sync_core_before_usermode() (respectively, for
# uthread->uthread and kthread->uthread transitions) before returning
# to user-space.
#
# The serialization in switch_mm() is activated by prepare_sync_core_cmd().
#
# * x86
#
# x86-32 uses IRET as return from interrupt, which takes care of the IPI.
@ -43,7 +59,7 @@
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
| riscv: | TODO |
| riscv: | ok |
| s390: | ok |
| sh: | TODO |
| sparc: | TODO |

View File

@ -14041,6 +14041,7 @@ L: linux-kernel@vger.kernel.org
S: Supported
F: Documentation/scheduler/membarrier.rst
F: arch/*/include/asm/membarrier.h
F: arch/*/include/asm/sync_core.h
F: include/uapi/linux/membarrier.h
F: kernel/sched/membarrier.c

View File

@ -28,14 +28,17 @@ config RISCV
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MMIOWB
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PMEM_API
select ARCH_HAS_PREPARE_SYNC_CORE_CMD
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SET_DIRECT_MAP if MMU
select ARCH_HAS_SET_MEMORY if MMU
select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UBSAN_SANITIZE_ALL

View File

@ -22,6 +22,25 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
/*
* The membarrier system call requires a full memory barrier
* after storing to rq->curr, before going back to user-space.
*
* This barrier is also needed for the SYNC_CORE command when
* switching between processes; in particular, on a transition
* from a thread belonging to another mm to a thread belonging
* to the mm for which a membarrier SYNC_CORE is done on CPU0:
*
* - [CPU0] sets all bits in the mm icache_stale_mask (in
* prepare_sync_core_cmd());
*
* - [CPU1] stores to rq->curr (by the scheduler);
*
* - [CPU0] loads rq->curr within membarrier and observes
* cpu_rq(1)->curr->mm != mm, so the IPI is skipped on
* CPU1; this means membarrier relies on switch_mm() to
* issue the sync-core;
*
* - [CPU1] switch_mm() loads icache_stale_mask; if the bit
* is zero, switch_mm() may incorrectly skip the sync-core.
*
* Matches a full barrier in the proximity of the membarrier
* system call entry.
*/

View File

@ -0,0 +1,29 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_RISCV_SYNC_CORE_H
#define _ASM_RISCV_SYNC_CORE_H
/*
* RISC-V implements return to user-space through an xRET instruction,
* which is not core serializing.
*/
static inline void sync_core_before_usermode(void)
{
asm volatile ("fence.i" ::: "memory");
}
#ifdef CONFIG_SMP
/*
* Ensure the next switch_mm() on every CPU issues a core serializing
* instruction for the given @mm.
*/
static inline void prepare_sync_core_cmd(struct mm_struct *mm)
{
cpumask_setall(&mm->context.icache_stale_mask);
}
#else
static inline void prepare_sync_core_cmd(struct mm_struct *mm)
{
}
#endif /* CONFIG_SMP */
#endif /* _ASM_RISCV_SYNC_CORE_H */

View File

@ -6721,6 +6721,10 @@ static void __sched notrace __schedule(unsigned int sched_mode)
*
* The barrier matches a full barrier in the proximity of
* the membarrier system call entry.
*
* On RISC-V, this barrier pairing is also needed for the
* SYNC_CORE command when switching between processes, cf.
* the inline comments in membarrier_arch_switch_mm().
*/
++*switch_count;

View File

@ -342,6 +342,10 @@ static int membarrier_private_expedited(int flags, int cpu_id)
/*
* Matches memory barriers after rq->curr modification in
* scheduler.
*
* On RISC-V, this barrier pairing is also needed for the
* SYNC_CORE command when switching between processes, cf.
* the inline comments in membarrier_arch_switch_mm().
*/
smp_mb(); /* system call entry is not a mb. */