2023-04-07 23:10:58 +00:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
|
|
/*
|
|
|
|
* Copyright 2022-2023 Rivos, Inc
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _ASM_CPUFEATURE_H
|
|
|
|
#define _ASM_CPUFEATURE_H
|
|
|
|
|
2023-05-09 18:25:02 +00:00
|
|
|
#include <linux/bitmap.h>
|
2023-10-31 06:45:52 +00:00
|
|
|
#include <linux/jump_label.h>
|
2023-05-09 18:25:02 +00:00
|
|
|
#include <asm/hwcap.h>
|
2023-10-31 06:45:52 +00:00
|
|
|
#include <asm/alternative-macros.h>
|
|
|
|
#include <asm/errno.h>
|
2023-05-09 18:25:02 +00:00
|
|
|
|
2023-04-07 23:10:58 +00:00
|
|
|
/*
|
|
|
|
* These are probed via a device_initcall(), via either the SBI or directly
|
|
|
|
* from the corresponding CSRs.
|
|
|
|
*/
|
|
|
|
struct riscv_cpuinfo {
|
|
|
|
unsigned long mvendorid;
|
|
|
|
unsigned long marchid;
|
|
|
|
unsigned long mimpid;
|
|
|
|
};
|
|
|
|
|
2023-05-09 18:25:02 +00:00
|
|
|
struct riscv_isainfo {
|
|
|
|
DECLARE_BITMAP(isa, RISCV_ISA_EXT_MAX);
|
|
|
|
};
|
|
|
|
|
2023-04-07 23:10:58 +00:00
|
|
|
DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
|
|
|
|
|
2023-04-07 23:11:01 +00:00
|
|
|
DECLARE_PER_CPU(long, misaligned_access_speed);
|
|
|
|
|
2023-05-09 18:25:02 +00:00
|
|
|
/* Per-cpu ISA extensions. */
|
|
|
|
extern struct riscv_isainfo hart_isa[NR_CPUS];
|
|
|
|
|
RISC-V: Enable cbo.zero in usermode
When Zicboz is present, enable its instruction (cbo.zero) in
usermode by setting its respective senvcfg bit. We don't bother
trying to set this bit per-task, which would also require an
interface for tasks to request enabling and/or disabling. Instead,
permanently set the bit for each hart which has the extension when
bringing it online.
This patch also introduces riscv_cpu_has_extension_[un]likely()
functions to check a specific hart's ISA bitmap for extensions.
Prior to checking the specific hart's bitmap in these functions
we try the bitmap which represents the LCD of extensions, but only
when we know it will use its optimized, alternatives path by gating
its call on CONFIG_RISCV_ALTERNATIVE. When alternatives are used, the
compiler ensures that the invocation of the LCD search becomes a
constant true or false. When it's true, even the new functions will
completely vanish from their callsites. OTOH, when the LCD check is
false, we need to do a search of the hart's ISA bitmap. Had we also
checked the LCD bitmap without the use of alternatives, then we would
have ended up with two bitmap searches instead of one.
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20230918131518.56803-10-ajones@ventanamicro.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2023-09-18 13:15:21 +00:00
|
|
|
void riscv_user_isa_enable(void);
|
RISC-V: Probe for unaligned access speed
Rather than deferring unaligned access speed determinations to a vendor
function, let's probe them and find out how fast they are. If we
determine that an unaligned word access is faster than N byte accesses,
mark the hardware's unaligned access as "fast". Otherwise, we mark
accesses as slow.
The algorithm itself runs for a fixed amount of jiffies. Within each
iteration it attempts to time a single loop, and then keeps only the best
(fastest) loop it saw. This algorithm was found to have lower variance from
run to run than my first attempt, which counted the total number of
iterations that could be done in that fixed amount of jiffies. By taking
only the best iteration in the loop, assuming at least one loop wasn't
perturbed by an interrupt, we eliminate the effects of interrupts and
other "warm up" factors like branch prediction. The only downside is it
depends on having an rdtime granular and accurate enough to measure a
single copy. If we ever manage to complete a loop in 0 rdtime ticks, we
leave the unaligned setting at UNKNOWN.
There is a slight change in user-visible behavior here. Previously, all
boards except the THead C906 reported misaligned access speed of
UNKNOWN. C906 reported FAST. With this change, since we're now measuring
misaligned access speed on each hart, all RISC-V systems will have this
key set as either FAST or SLOW.
Currently, we don't have a way to confidently measure the difference between
SLOW and EMULATED, so we label anything not fast as SLOW. This will
mislabel some systems that are actually EMULATED as SLOW. When we get
support for delegating misaligned access traps to the kernel (as opposed
to the firmware quietly handling it), we can explicitly test in Linux to
see if unaligned accesses trap. Those systems will start to report
EMULATED, though older (today's) systems without that new SBI mechanism
will continue to report SLOW.
I've updated the documentation for those hwprobe values to reflect
this, specifically: SLOW may or may not be emulated by software, and FAST
represents means being faster than equivalent byte accesses. The change
in documentation is accurate with respect to both the former and current
behavior.
Signed-off-by: Evan Green <evan@rivosinc.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20230818194136.4084400-2-evan@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2023-08-18 19:41:35 +00:00
|
|
|
|
2023-10-04 15:14:04 +00:00
|
|
|
#ifdef CONFIG_RISCV_MISALIGNED
|
|
|
|
bool unaligned_ctl_available(void);
|
|
|
|
bool check_unaligned_access_emulated(int cpu);
|
|
|
|
void unaligned_emulation_finish(void);
|
|
|
|
#else
|
|
|
|
static inline bool unaligned_ctl_available(void)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool check_unaligned_access_emulated(int cpu)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void unaligned_emulation_finish(void) {}
|
|
|
|
#endif
|
|
|
|
|
2023-10-31 06:45:52 +00:00
|
|
|
unsigned long riscv_get_elf_hwcap(void);
|
|
|
|
|
|
|
|
struct riscv_isa_ext_data {
|
|
|
|
const unsigned int id;
|
|
|
|
const char *name;
|
|
|
|
const char *property;
|
riscv: add ISA extension parsing for scalar crypto
The Scalar Crypto specification defines Zk as a shorthand for the
Zkn, Zkr and Zkt extensions. The same follows for both Zkn, Zks and Zbk,
which are all shorthands for various other extensions. The detailed
breakdown can be found in their dt-binding entries.
Since Zkn also implies the Zbkb, Zbkc and Zbkx extensions, simply passing
"zk" through a DT should enable all of Zbkb, Zbkc, Zbkx, Zkn, Zkr and Zkt.
For example, setting the "riscv,isa" DT property to "rv64imafdc_zk"
should generate the following cpuinfo output:
"rv64imafdc_zicntr_zicsr_zifencei_zihpm_zbkb_zbkc_zbkx_zknd_zkne_zknh_zkr_zkt"
riscv_isa_ext_data grows a pair of new members, to permit setting the
relevant bits for "bundled" extensions, both while parsing the ISA string
and the new dedicated extension properties.
Co-developed-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Evan Green <evan@rivosinc.com>
Signed-off-by: Clément Léger <cleger@rivosinc.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20231114141256.126749-4-cleger@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2023-11-14 14:12:39 +00:00
|
|
|
const unsigned int *subset_ext_ids;
|
|
|
|
const unsigned int subset_ext_size;
|
2023-10-31 06:45:52 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
extern const struct riscv_isa_ext_data riscv_isa_ext[];
|
|
|
|
extern const size_t riscv_isa_ext_count;
|
|
|
|
extern bool riscv_isa_fallback;
|
|
|
|
|
|
|
|
unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
|
|
|
|
|
riscv: add ISA extension parsing for scalar crypto
The Scalar Crypto specification defines Zk as a shorthand for the
Zkn, Zkr and Zkt extensions. The same follows for both Zkn, Zks and Zbk,
which are all shorthands for various other extensions. The detailed
breakdown can be found in their dt-binding entries.
Since Zkn also implies the Zbkb, Zbkc and Zbkx extensions, simply passing
"zk" through a DT should enable all of Zbkb, Zbkc, Zbkx, Zkn, Zkr and Zkt.
For example, setting the "riscv,isa" DT property to "rv64imafdc_zk"
should generate the following cpuinfo output:
"rv64imafdc_zicntr_zicsr_zifencei_zihpm_zbkb_zbkc_zbkx_zknd_zkne_zknh_zkr_zkt"
riscv_isa_ext_data grows a pair of new members, to permit setting the
relevant bits for "bundled" extensions, both while parsing the ISA string
and the new dedicated extension properties.
Co-developed-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Evan Green <evan@rivosinc.com>
Signed-off-by: Clément Léger <cleger@rivosinc.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20231114141256.126749-4-cleger@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2023-11-14 14:12:39 +00:00
|
|
|
bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit);
|
2023-10-31 06:45:52 +00:00
|
|
|
#define riscv_isa_extension_available(isa_bitmap, ext) \
|
|
|
|
__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
|
|
|
|
|
|
|
|
static __always_inline bool
|
|
|
|
riscv_has_extension_likely(const unsigned long ext)
|
|
|
|
{
|
|
|
|
compiletime_assert(ext < RISCV_ISA_EXT_MAX,
|
|
|
|
"ext must be < RISCV_ISA_EXT_MAX");
|
|
|
|
|
|
|
|
if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
|
work around gcc bugs with 'asm goto' with outputs
We've had issues with gcc and 'asm goto' before, and we created a
'asm_volatile_goto()' macro for that in the past: see commits
3f0116c3238a ("compiler/gcc4: Add quirk for 'asm goto' miscompilation
bug") and a9f180345f53 ("compiler/gcc4: Make quirk for
asm_volatile_goto() unconditional").
Then, much later, we ended up removing the workaround in commit
43c249ea0b1e ("compiler-gcc.h: remove ancient workaround for gcc PR
58670") because we no longer supported building the kernel with the
affected gcc versions, but we left the macro uses around.
Now, Sean Christopherson reports a new version of a very similar
problem, which is fixed by re-applying that ancient workaround. But the
problem in question is limited to only the 'asm goto with outputs'
cases, so instead of re-introducing the old workaround as-is, let's
rename and limit the workaround to just that much less common case.
It looks like there are at least two separate issues that all hit in
this area:
(a) some versions of gcc don't mark the asm goto as 'volatile' when it
has outputs:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420
which is easy to work around by just adding the 'volatile' by hand.
(b) Internal compiler errors:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422
which are worked around by adding the extra empty 'asm' as a
barrier, as in the original workaround.
but the problem Sean sees may be a third thing since it involves bad
code generation (not an ICE) even with the manually added 'volatile'.
but the same old workaround works for this case, even if this feels a
bit like voodoo programming and may only be hiding the issue.
Reported-and-tested-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Andrew Pinski <quic_apinski@quicinc.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2024-02-09 20:39:31 +00:00
|
|
|
asm goto(
|
2023-10-31 06:45:52 +00:00
|
|
|
ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1)
|
|
|
|
:
|
|
|
|
: [ext] "i" (ext)
|
|
|
|
:
|
|
|
|
: l_no);
|
|
|
|
} else {
|
|
|
|
if (!__riscv_isa_extension_available(NULL, ext))
|
|
|
|
goto l_no;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
l_no:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline bool
|
|
|
|
riscv_has_extension_unlikely(const unsigned long ext)
|
|
|
|
{
|
|
|
|
compiletime_assert(ext < RISCV_ISA_EXT_MAX,
|
|
|
|
"ext must be < RISCV_ISA_EXT_MAX");
|
|
|
|
|
|
|
|
if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
|
work around gcc bugs with 'asm goto' with outputs
We've had issues with gcc and 'asm goto' before, and we created a
'asm_volatile_goto()' macro for that in the past: see commits
3f0116c3238a ("compiler/gcc4: Add quirk for 'asm goto' miscompilation
bug") and a9f180345f53 ("compiler/gcc4: Make quirk for
asm_volatile_goto() unconditional").
Then, much later, we ended up removing the workaround in commit
43c249ea0b1e ("compiler-gcc.h: remove ancient workaround for gcc PR
58670") because we no longer supported building the kernel with the
affected gcc versions, but we left the macro uses around.
Now, Sean Christopherson reports a new version of a very similar
problem, which is fixed by re-applying that ancient workaround. But the
problem in question is limited to only the 'asm goto with outputs'
cases, so instead of re-introducing the old workaround as-is, let's
rename and limit the workaround to just that much less common case.
It looks like there are at least two separate issues that all hit in
this area:
(a) some versions of gcc don't mark the asm goto as 'volatile' when it
has outputs:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420
which is easy to work around by just adding the 'volatile' by hand.
(b) Internal compiler errors:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422
which are worked around by adding the extra empty 'asm' as a
barrier, as in the original workaround.
but the problem Sean sees may be a third thing since it involves bad
code generation (not an ICE) even with the manually added 'volatile'.
but the same old workaround works for this case, even if this feels a
bit like voodoo programming and may only be hiding the issue.
Reported-and-tested-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Andrew Pinski <quic_apinski@quicinc.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2024-02-09 20:39:31 +00:00
|
|
|
asm goto(
|
2023-10-31 06:45:52 +00:00
|
|
|
ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1)
|
|
|
|
:
|
|
|
|
: [ext] "i" (ext)
|
|
|
|
:
|
|
|
|
: l_yes);
|
|
|
|
} else {
|
|
|
|
if (__riscv_isa_extension_available(NULL, ext))
|
|
|
|
goto l_yes;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
l_yes:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
|
|
|
|
{
|
|
|
|
if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext)
|
|
|
|
{
|
|
|
|
if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
|
|
|
|
}
|
|
|
|
|
2024-01-08 23:57:03 +00:00
|
|
|
DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
|
|
|
|
|
2023-04-07 23:10:58 +00:00
|
|
|
#endif
|