RISC-V Patches for the 5.18 Merge Window, Part 1

* Support for Sv57-based virtual memory.
 * Various improvements for the MicroChip PolarFire SOC and the
   associated Icicle dev board, which should allow upstream kernels to
   boot without any additional modifications.
 * An improved memmove() implementation.
 * Support for the new Ssconfpmf and SBI PMU extensions, which allows for
   a much more useful perf implementation on RISC-V systems.
 * Support for restartable sequences.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCAAxFiEEKzw3R0RoQ7JKlDp6LhMZ81+7GIkFAmI96FcTHHBhbG1lckBk
 YWJiZWx0LmNvbQAKCRAuExnzX7sYiQBFD/425+6xmoOru6Wiki3Ja0fqQToNrQyW
 IbmE/8AxUP7UxMvJSNzvQm8deXgklzvmegXCtnjwZZins971vMzzDSI83k/zn8I7
 m5thVC9z01BjodV+pvIp/44hS6FesolOLzkVHksX0Zh6h0iidrc34Qf5HrqvvNfN
 CZ/4K1+E9ig5r9qZp4WdvocCXj+FzwF/30GjKoW9vwA599CEG/dCo+TNN9GKD6XS
 k+xiUGwlIRA+kCLSPFCi7ev9XPr1tCmQB7uB8Igcvr7Y3mWl8HKfajQVXBnXNRC3
 ifbDxpx1elJiLPyf7Rza8jIDwDhLQdxBiwPgDgP9h9R4x0uF4efq8PzLzFlFmaE+
 9Z9thfykBb5dXYDFDje9bAOXvKnGk7Iqxdsz0qWo/ChEQawX1+11bJb0TNN8QTT9
 YvlQfUXgb1dmEcj5yG2uVE1Y8L7YNLRMsZU3W3FbmPJZoavSOuU4x0yCGeLyv597
 76af3nuBJ5v80Db97gu6St+HIACeevKflsZUf/8GS/p7d1DlvmrWzQUMEycxPTG9
 UZpZak58jh7AqQ9JbLnavhwmeacY50vpZOw6QHGAHSN+8daCPlOHDG7Ver7Z+kNj
 +srJ7iKMvLnnaEjGNgavfxdqTOme1gv4LWs/JdHYMkpphqVN92xBDJnhXTPRVZiQ
 0x39vK86qtB46A==
 =Omc6
 -----END PGP SIGNATURE-----

Merge tag 'riscv-for-linus-5.18-mw0' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux

Pull RISC-V updates from Palmer Dabbelt:

 - Support for Sv57-based virtual memory.

 - Various improvements for the MicroChip PolarFire SOC and the
   associated Icicle dev board, which should allow upstream kernels to
   boot without any additional modifications.

 - An improved memmove() implementation.

 - Support for the new Ssconfpmf and SBI PMU extensions, which allows
   for a much more useful perf implementation on RISC-V systems.

 - Support for restartable sequences.

* tag 'riscv-for-linus-5.18-mw0' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (36 commits)
  rseq/selftests: Add support for RISC-V
  RISC-V: Add support for restartable sequence
  MAINTAINERS: Add entry for RISC-V PMU drivers
  Documentation: riscv: Remove the old documentation
  RISC-V: Add sscofpmf extension support
  RISC-V: Add perf platform driver based on SBI PMU extension
  RISC-V: Add RISC-V SBI PMU extension definitions
  RISC-V: Add a simple platform driver for RISC-V legacy perf
  RISC-V: Add a perf core library for pmu drivers
  RISC-V: Add CSR encodings for all HPMCOUNTERS
  RISC-V: Remove the current perf implementation
  RISC-V: Improve /proc/cpuinfo output for ISA extensions
  RISC-V: Do no continue isa string parsing without correct XLEN
  RISC-V: Implement multi-letter ISA extension probing framework
  RISC-V: Extract multi-letter extension names from "riscv, isa"
  RISC-V: Minimal parser for "riscv, isa" strings
  RISC-V: Correctly print supported extensions
  riscv: Fixed misaligned memory access. Fixed pointer comparison.
  MAINTAINERS: update riscv/microchip entry
  riscv: dts: microchip: add new peripherals to icicle kit device tree
  ...
This commit is contained in:
Linus Torvalds 2022-03-25 10:11:38 -07:00
commit aa5b537b0e
44 changed files with 3897 additions and 1052 deletions

View file

@ -0,0 +1,58 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/clock/microchip,mpfs.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Microchip PolarFire Clock Control Module Binding
maintainers:
- Daire McNamara <daire.mcnamara@microchip.com>
description: |
Microchip PolarFire clock control (CLKCFG) is an integrated clock controller,
which gates and enables all peripheral clocks.
This device tree binding describes 33 gate clocks. Clocks are referenced by
user nodes by the CLKCFG node phandle and the clock index in the group, from
0 to 32.
properties:
compatible:
const: microchip,mpfs-clkcfg
reg:
maxItems: 1
clocks:
maxItems: 1
'#clock-cells':
const: 1
description: |
The clock consumer should specify the desired clock by having the clock
ID in its "clocks" phandle cell. See include/dt-bindings/clock/microchip,mpfs-clock.h
for the full list of PolarFire clock IDs.
required:
- compatible
- reg
- clocks
- '#clock-cells'
additionalProperties: false
examples:
# Clock Config node:
- |
#include <dt-bindings/clock/microchip,mpfs-clock.h>
soc {
#address-cells = <2>;
#size-cells = <2>;
clkcfg: clock-controller@20002000 {
compatible = "microchip,mpfs-clkcfg";
reg = <0x0 0x20002000 0x0 0x1000>;
clocks = <&ref>;
#clock-cells = <1>;
};
};

View file

@ -0,0 +1,79 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/gpio/microchip,mpfs-gpio.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Microchip MPFS GPIO Controller Device Tree Bindings
maintainers:
- Conor Dooley <conor.dooley@microchip.com>
properties:
compatible:
items:
- enum:
- microchip,mpfs-gpio
reg:
maxItems: 1
interrupts:
description:
Interrupt mapping, one per GPIO. Maximum 32 GPIOs.
minItems: 1
maxItems: 32
interrupt-controller: true
clocks:
maxItems: 1
"#gpio-cells":
const: 2
"#interrupt-cells":
const: 1
ngpios:
description:
The number of GPIOs available.
minimum: 1
maximum: 32
default: 32
gpio-controller: true
required:
- compatible
- reg
- interrupts
- "#interrupt-cells"
- interrupt-controller
- "#gpio-cells"
- gpio-controller
- clocks
additionalProperties: false
examples:
- |
gpio@20122000 {
compatible = "microchip,mpfs-gpio";
reg = <0x20122000 0x1000>;
clocks = <&clkcfg 25>;
interrupt-parent = <&plic>;
gpio-controller;
#gpio-cells = <2>;
interrupt-controller;
#interrupt-cells = <1>;
interrupts = <53>, <53>, <53>, <53>,
<53>, <53>, <53>, <53>,
<53>, <53>, <53>, <53>,
<53>, <53>, <53>, <53>,
<53>, <53>, <53>, <53>,
<53>, <53>, <53>, <53>,
<53>, <53>, <53>, <53>,
<53>, <53>, <53>, <53>;
};
...

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: "http://devicetree.org/schemas/mailbox/microchip,polarfire-soc-mailbox.yaml#"
$id: "http://devicetree.org/schemas/mailbox/microchip,mpfs-mailbox.yaml#"
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) mailbox controller
@ -11,7 +11,7 @@ maintainers:
properties:
compatible:
const: microchip,polarfire-soc-mailbox
const: microchip,mpfs-mailbox
reg:
items:
@ -38,7 +38,7 @@ examples:
#address-cells = <2>;
#size-cells = <2>;
mbox: mailbox@37020000 {
compatible = "microchip,polarfire-soc-mailbox";
compatible = "microchip,mpfs-mailbox";
reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318c 0x0 0x40>;
interrupt-parent = <&L1>;
interrupts = <96>;

View file

@ -0,0 +1,81 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/pwm/microchip,corepwm.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Microchip IP corePWM controller bindings
maintainers:
- Conor Dooley <conor.dooley@microchip.com>
description: |
corePWM is an 16 channel pulse width modulator FPGA IP
https://www.microsemi.com/existing-parts/parts/152118
allOf:
- $ref: pwm.yaml#
properties:
compatible:
items:
- const: microchip,corepwm-rtl-v4
reg:
maxItems: 1
clocks:
maxItems: 1
"#pwm-cells":
const: 2
microchip,sync-update-mask:
description: |
Depending on how the IP is instantiated, there are two modes of operation.
In synchronous mode, all channels are updated at the beginning of the PWM period,
and in asynchronous mode updates happen as the control registers are written.
A 16 bit wide "SHADOW_REG_EN" parameter of the IP core controls whether synchronous
mode is possible for each channel, and is set by the bitstream programmed to the
FPGA. If the IP core is instantiated with SHADOW_REG_ENx=1, both registers that
control the duty cycle for channel x have a second "shadow"/buffer reg synthesised.
At runtime a bit wide register exposed to APB can be used to toggle on/off
synchronised mode for all channels it has been synthesised for.
Each bit of "microchip,sync-update-mask" corresponds to a PWM channel & represents
whether synchronous mode is possible for the PWM channel.
$ref: /schemas/types.yaml#/definitions/uint32
default: 0
microchip,dac-mode-mask:
description: |
Optional, per-channel Low Ripple DAC mode is possible on this IP core. It creates
a minimum period pulse train whose High/Low average is that of the chosen duty
cycle. This "DAC" will have far better bandwidth and ripple performance than the
standard PWM algorithm can achieve. A 16 bit DAC_MODE module parameter of the IP
core, set at instantiation and by the bitstream programmed to the FPGA, determines
whether a given channel operates in regular PWM or DAC mode.
Each bit corresponds to a PWM channel & represents whether DAC mode is enabled
for that channel.
$ref: /schemas/types.yaml#/definitions/uint32
default: 0
required:
- compatible
- reg
- clocks
additionalProperties: false
examples:
- |
pwm@41000000 {
compatible = "microchip,corepwm-rtl-v4";
microchip,sync-update-mask = /bits/ 32 <0>;
clocks = <&clkcfg 30>;
reg = <0x41000000 0xF0>;
#pwm-cells = <2>;
};

View file

@ -0,0 +1,58 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/rtc/microchip,mfps-rtc.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Microchip PolarFire Soc (MPFS) RTC Device Tree Bindings
allOf:
- $ref: rtc.yaml#
maintainers:
- Daire McNamara <daire.mcnamara@microchip.com>
- Lewis Hanly <lewis.hanly@microchip.com>
properties:
compatible:
enum:
- microchip,mpfs-rtc
reg:
maxItems: 1
interrupts:
items:
- description: |
RTC_WAKEUP interrupt
- description: |
RTC_MATCH, asserted when the content of the Alarm register is equal
to that of the RTC's count register.
clocks:
maxItems: 1
clock-names:
items:
- const: rtc
required:
- compatible
- reg
- interrupts
- clocks
- clock-names
additionalProperties: false
examples:
- |
rtc@20124000 {
compatible = "microchip,mpfs-rtc";
reg = <0x20124000 0x1000>;
clocks = <&clkcfg 21>;
clock-names = "rtc";
interrupts = <80>, <81>;
};
...

View file

@ -0,0 +1,40 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: "http://devicetree.org/schemas/soc/microchip/microchip,mpfs-sys-controller.yaml#"
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) system controller
maintainers:
- Conor Dooley <conor.dooley@microchip.com>
description: |
PolarFire SoC devices include a microcontroller acting as the system controller,
which provides "services" to the main processor and to the FPGA fabric. These
services include hardware rng, reprogramming of the FPGA and verfification of the
eNVM contents etc. More information on these services can be found online, at
https://onlinedocs.microchip.com/pr/GUID-1409CF11-8EF9-4C24-A94E-70979A688632-en-US-1/index.html
Communication with the system controller is done via a mailbox, of which the client
portion is documented here.
properties:
mboxes:
maxItems: 1
compatible:
const: microchip,mpfs-sys-controller
required:
- compatible
- mboxes
additionalProperties: false
examples:
- |
syscontroller {
compatible = "microchip,mpfs-sys-controller";
mboxes = <&mbox 0>;
};

View file

@ -1,35 +0,0 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: "http://devicetree.org/schemas/soc/microchip/microchip,polarfire-soc-sys-controller.yaml#"
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) system controller
maintainers:
- Conor Dooley <conor.dooley@microchip.com>
description: |
The PolarFire SoC system controller is communicated with via a mailbox.
This document describes the bindings for the client portion of that mailbox.
properties:
mboxes:
maxItems: 1
compatible:
const: microchip,polarfire-soc-sys-controller
required:
- compatible
- mboxes
additionalProperties: false
examples:
- |
syscontroller: syscontroller {
compatible = "microchip,polarfire-soc-sys-controller";
mboxes = <&mbox 0>;
};

View file

@ -1,255 +0,0 @@
===================================
Supporting PMUs on RISC-V platforms
===================================
Alan Kao <alankao@andestech.com>, Mar 2018
Introduction
------------
As of this writing, perf_event-related features mentioned in The RISC-V ISA
Privileged Version 1.10 are as follows:
(please check the manual for more details)
* [m|s]counteren
* mcycle[h], cycle[h]
* minstret[h], instret[h]
* mhpeventx, mhpcounterx[h]
With such function set only, porting perf would require a lot of work, due to
the lack of the following general architectural performance monitoring features:
* Enabling/Disabling counters
Counters are just free-running all the time in our case.
* Interrupt caused by counter overflow
No such feature in the spec.
* Interrupt indicator
It is not possible to have many interrupt ports for all counters, so an
interrupt indicator is required for software to tell which counter has
just overflowed.
* Writing to counters
There will be an SBI to support this since the kernel cannot modify the
counters [1]. Alternatively, some vendor considers to implement
hardware-extension for M-S-U model machines to write counters directly.
This document aims to provide developers a quick guide on supporting their
PMUs in the kernel. The following sections briefly explain perf' mechanism
and todos.
You may check previous discussions here [1][2]. Also, it might be helpful
to check the appendix for related kernel structures.
1. Initialization
-----------------
*riscv_pmu* is a global pointer of type *struct riscv_pmu*, which contains
various methods according to perf's internal convention and PMU-specific
parameters. One should declare such instance to represent the PMU. By default,
*riscv_pmu* points to a constant structure *riscv_base_pmu*, which has very
basic support to a baseline QEMU model.
Then he/she can either assign the instance's pointer to *riscv_pmu* so that
the minimal and already-implemented logic can be leveraged, or invent his/her
own *riscv_init_platform_pmu* implementation.
In other words, existing sources of *riscv_base_pmu* merely provide a
reference implementation. Developers can flexibly decide how many parts they
can leverage, and in the most extreme case, they can customize every function
according to their needs.
2. Event Initialization
-----------------------
When a user launches a perf command to monitor some events, it is first
interpreted by the userspace perf tool into multiple *perf_event_open*
system calls, and then each of them calls to the body of *event_init*
member function that was assigned in the previous step. In *riscv_base_pmu*'s
case, it is *riscv_event_init*.
The main purpose of this function is to translate the event provided by user
into bitmap, so that HW-related control registers or counters can directly be
manipulated. The translation is based on the mappings and methods provided in
*riscv_pmu*.
Note that some features can be done in this stage as well:
(1) interrupt setting, which is stated in the next section;
(2) privilege level setting (user space only, kernel space only, both);
(3) destructor setting. Normally it is sufficient to apply *riscv_destroy_event*;
(4) tweaks for non-sampling events, which will be utilized by functions such as
*perf_adjust_period*, usually something like the follows::
if (!is_sampling_event(event)) {
hwc->sample_period = x86_pmu.max_period;
hwc->last_period = hwc->sample_period;
local64_set(&hwc->period_left, hwc->sample_period);
}
In the case of *riscv_base_pmu*, only (3) is provided for now.
3. Interrupt
------------
3.1. Interrupt Initialization
This often occurs at the beginning of the *event_init* method. In common
practice, this should be a code segment like::
int x86_reserve_hardware(void)
{
int err = 0;
if (!atomic_inc_not_zero(&pmc_refcount)) {
mutex_lock(&pmc_reserve_mutex);
if (atomic_read(&pmc_refcount) == 0) {
if (!reserve_pmc_hardware())
err = -EBUSY;
else
reserve_ds_buffers();
}
if (!err)
atomic_inc(&pmc_refcount);
mutex_unlock(&pmc_reserve_mutex);
}
return err;
}
And the magic is in *reserve_pmc_hardware*, which usually does atomic
operations to make implemented IRQ accessible from some global function pointer.
*release_pmc_hardware* serves the opposite purpose, and it is used in event
destructors mentioned in previous section.
(Note: From the implementations in all the architectures, the *reserve/release*
pair are always IRQ settings, so the *pmc_hardware* seems somehow misleading.
It does NOT deal with the binding between an event and a physical counter,
which will be introduced in the next section.)
3.2. IRQ Structure
Basically, a IRQ runs the following pseudo code::
for each hardware counter that triggered this overflow
get the event of this counter
// following two steps are defined as *read()*,
// check the section Reading/Writing Counters for details.
count the delta value since previous interrupt
update the event->count (# event occurs) by adding delta, and
event->hw.period_left by subtracting delta
if the event overflows
sample data
set the counter appropriately for the next overflow
if the event overflows again
too frequently, throttle this event
fi
fi
end for
However as of this writing, none of the RISC-V implementations have designed an
interrupt for perf, so the details are to be completed in the future.
4. Reading/Writing Counters
---------------------------
They seem symmetric but perf treats them quite differently. For reading, there
is a *read* interface in *struct pmu*, but it serves more than just reading.
According to the context, the *read* function not only reads the content of the
counter (event->count), but also updates the left period to the next interrupt
(event->hw.period_left).
But the core of perf does not need direct write to counters. Writing counters
is hidden behind the abstraction of 1) *pmu->start*, literally start counting so one
has to set the counter to a good value for the next interrupt; 2) inside the IRQ
it should set the counter to the same resonable value.
Reading is not a problem in RISC-V but writing would need some effort, since
counters are not allowed to be written by S-mode.
5. add()/del()/start()/stop()
-----------------------------
Basic idea: add()/del() adds/deletes events to/from a PMU, and start()/stop()
starts/stop the counter of some event in the PMU. All of them take the same
arguments: *struct perf_event *event* and *int flag*.
Consider perf as a state machine, then you will find that these functions serve
as the state transition process between those states.
Three states (event->hw.state) are defined:
* PERF_HES_STOPPED: the counter is stopped
* PERF_HES_UPTODATE: the event->count is up-to-date
* PERF_HES_ARCH: arch-dependent usage ... we don't need this for now
A normal flow of these state transitions are as follows:
* A user launches a perf event, resulting in calling to *event_init*.
* When being context-switched in, *add* is called by the perf core, with a flag
PERF_EF_START, which means that the event should be started after it is added.
At this stage, a general event is bound to a physical counter, if any.
The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE, because it is now
stopped, and the (software) event count does not need updating.
- *start* is then called, and the counter is enabled.
With flag PERF_EF_RELOAD, it writes an appropriate value to the counter (check
previous section for detail).
Nothing is written if the flag does not contain PERF_EF_RELOAD.
The state now is reset to none, because it is neither stopped nor updated
(the counting already started)
* When being context-switched out, *del* is called. It then checks out all the
events in the PMU and calls *stop* to update their counts.
- *stop* is called by *del*
and the perf core with flag PERF_EF_UPDATE, and it often shares the same
subroutine as *read* with the same logic.
The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE, again.
- Life cycle of these two pairs: *add* and *del* are called repeatedly as
tasks switch in-and-out; *start* and *stop* is also called when the perf core
needs a quick stop-and-start, for instance, when the interrupt period is being
adjusted.
Current implementation is sufficient for now and can be easily extended to
features in the future.
A. Related Structures
---------------------
* struct pmu: include/linux/perf_event.h
* struct riscv_pmu: arch/riscv/include/asm/perf_event.h
Both structures are designed to be read-only.
*struct pmu* defines some function pointer interfaces, and most of them take
*struct perf_event* as a main argument, dealing with perf events according to
perf's internal state machine (check kernel/events/core.c for details).
*struct riscv_pmu* defines PMU-specific parameters. The naming follows the
convention of all other architectures.
* struct perf_event: include/linux/perf_event.h
* struct hw_perf_event
The generic structure that represents perf events, and the hardware-related
details.
* struct riscv_hw_events: arch/riscv/include/asm/perf_event.h
The structure that holds the status of events, has two fixed members:
the number of events and the array of the events.
References
----------
[1] https://github.com/riscv/riscv-linux/pull/124
[2] https://groups.google.com/a/groups.riscv.org/forum/#!topic/sw-dev/f19TmCNP6yA

View file

@ -16707,6 +16707,15 @@ S: Maintained
F: drivers/mtd/nand/raw/r852.c
F: drivers/mtd/nand/raw/r852.h
RISC-V PMU DRIVERS
M: Atish Patra <atishp@atishpatra.org>
R: Anup Patel <anup@brainfault.org>
L: linux-riscv@lists.infradead.org
S: Supported
F: drivers/perf/riscv_pmu.c
F: drivers/perf/riscv_pmu_legacy.c
F: drivers/perf/riscv_pmu_sbi.c
RISC-V ARCHITECTURE
M: Paul Walmsley <paul.walmsley@sifive.com>
M: Palmer Dabbelt <palmer@dabbelt.com>
@ -16721,8 +16730,10 @@ K: riscv
RISC-V/MICROCHIP POLARFIRE SOC SUPPORT
M: Lewis Hanly <lewis.hanly@microchip.com>
M: Conor Dooley <conor.dooley@microchip.com>
L: linux-riscv@lists.infradead.org
S: Supported
F: arch/riscv/boot/dts/microchip/
F: drivers/mailbox/mailbox-mpfs.c
F: drivers/soc/microchip/
F: include/soc/microchip/mpfs.h

View file

@ -102,6 +102,7 @@ config RISCV
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_RSEQ
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select MODULES_USE_ELF_RELA if MODULES
@ -152,7 +153,7 @@ config PAGE_OFFSET
hex
default 0xC0000000 if 32BIT
default 0x80000000 if 64BIT && !MMU
default 0xffffaf8000000000 if 64BIT
default 0xff60000000000000 if 64BIT
config KASAN_SHADOW_OFFSET
hex
@ -200,7 +201,7 @@ config FIX_EARLYCON_MEM
config PGTABLE_LEVELS
int
default 4 if 64BIT
default 5 if 64BIT
default 2
config LOCKDEP_SUPPORT
@ -331,19 +332,6 @@ config RISCV_ISA_C
If you don't know what to do here, say Y.
menu "supported PMU type"
depends on PERF_EVENTS
config RISCV_BASE_PMU
bool "Base Performance Monitoring Unit"
def_bool y
help
A base PMU that serves as a reference implementation and has limited
feature of perf. It can run on any RISC-V machines so serves as the
fallback, but this option can also be disable to reduce kernel size.
endmenu
config FPU
bool "FPU support"
default y

View file

@ -0,0 +1,25 @@
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
/* Copyright (c) 2020-2021 Microchip Technology Inc */
/ {
core_pwm0: pwm@41000000 {
compatible = "microchip,corepwm-rtl-v4";
reg = <0x0 0x41000000 0x0 0xF0>;
microchip,sync-update-mask = /bits/ 32 <0>;
#pwm-cells = <2>;
clocks = <&clkcfg CLK_FIC3>;
status = "disabled";
};
i2c2: i2c@44000000 {
compatible = "microchip,corei2c-rtl-v7";
reg = <0x0 0x44000000 0x0 0x1000>;
#address-cells = <1>;
#size-cells = <0>;
clocks = <&clkcfg CLK_FIC3>;
interrupt-parent = <&plic>;
interrupts = <122>;
clock-frequency = <100000>;
status = "disabled";
};
};

View file

@ -1,5 +1,5 @@
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
/* Copyright (c) 2020 Microchip Technology Inc */
/* Copyright (c) 2020-2021 Microchip Technology Inc */
/dts-v1/;
@ -13,25 +13,34 @@ / {
compatible = "microchip,mpfs-icicle-kit", "microchip,mpfs";
aliases {
ethernet0 = &emac1;
serial0 = &serial0;
serial1 = &serial1;
serial2 = &serial2;
serial3 = &serial3;
ethernet0 = &mac1;
serial0 = &mmuart0;
serial1 = &mmuart1;
serial2 = &mmuart2;
serial3 = &mmuart3;
serial4 = &mmuart4;
};
chosen {
stdout-path = "serial0:115200n8";
stdout-path = "serial1:115200n8";
};
cpus {
timebase-frequency = <RTCCLK_FREQ>;
};
memory@80000000 {
ddrc_cache_lo: memory@80000000 {
device_type = "memory";
reg = <0x0 0x80000000 0x0 0x40000000>;
clocks = <&clkcfg 26>;
reg = <0x0 0x80000000 0x0 0x2e000000>;
clocks = <&clkcfg CLK_DDRC>;
status = "okay";
};
ddrc_cache_hi: memory@1000000000 {
device_type = "memory";
reg = <0x10 0x0 0x0 0x40000000>;
clocks = <&clkcfg CLK_DDRC>;
status = "okay";
};
};
@ -39,19 +48,19 @@ &refclk {
clock-frequency = <600000000>;
};
&serial0 {
&mmuart1 {
status = "okay";
};
&serial1 {
&mmuart2 {
status = "okay";
};
&serial2 {
&mmuart3 {
status = "okay";
};
&serial3 {
&mmuart4 {
status = "okay";
};
@ -61,28 +70,92 @@ &mmc {
bus-width = <4>;
disable-wp;
cap-sd-highspeed;
cap-mmc-highspeed;
card-detect-delay = <200>;
mmc-ddr-1_8v;
mmc-hs200-1_8v;
sd-uhs-sdr12;
sd-uhs-sdr25;
sd-uhs-sdr50;
sd-uhs-sdr104;
};
&emac0 {
phy-mode = "sgmii";
phy-handle = <&phy0>;
phy0: ethernet-phy@8 {
reg = <8>;
ti,fifo-depth = <0x01>;
};
&spi0 {
status = "okay";
};
&emac1 {
&spi1 {
status = "okay";
};
&qspi {
status = "okay";
};
&i2c0 {
status = "okay";
};
&i2c1 {
status = "okay";
};
&i2c2 {
status = "okay";
};
&mac0 {
phy-mode = "sgmii";
phy-handle = <&phy0>;
};
&mac1 {
status = "okay";
phy-mode = "sgmii";
phy-handle = <&phy1>;
phy1: ethernet-phy@9 {
reg = <9>;
ti,fifo-depth = <0x01>;
ti,fifo-depth = <0x1>;
};
phy0: ethernet-phy@8 {
reg = <8>;
ti,fifo-depth = <0x1>;
};
};
&gpio2 {
interrupts = <53>, <53>, <53>, <53>,
<53>, <53>, <53>, <53>,
<53>, <53>, <53>, <53>,
<53>, <53>, <53>, <53>,
<53>, <53>, <53>, <53>,
<53>, <53>, <53>, <53>,
<53>, <53>, <53>, <53>,
<53>, <53>, <53>, <53>;
status = "okay";
};
&rtc {
status = "okay";
};
&usb {
status = "okay";
dr_mode = "host";
};
&mbox {
status = "okay";
};
&syscontroller {
status = "okay";
};
&pcie {
status = "okay";
};
&core_pwm0 {
status = "okay";
};

View file

@ -1,7 +1,9 @@
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
/* Copyright (c) 2020 Microchip Technology Inc */
/* Copyright (c) 2020-2021 Microchip Technology Inc */
/dts-v1/;
#include "dt-bindings/clock/microchip,mpfs-clock.h"
#include "microchip-mpfs-fabric.dtsi"
/ {
#address-cells = <2>;
@ -13,8 +15,7 @@ cpus {
#address-cells = <1>;
#size-cells = <0>;
cpu@0 {
clock-frequency = <0>;
cpu0: cpu@0 {
compatible = "sifive,e51", "sifive,rocket0", "riscv";
device_type = "cpu";
i-cache-block-size = <64>;
@ -22,6 +23,7 @@ cpu@0 {
i-cache-size = <16384>;
reg = <0>;
riscv,isa = "rv64imac";
clocks = <&clkcfg CLK_CPU>;
status = "disabled";
cpu0_intc: interrupt-controller {
@ -31,8 +33,7 @@ cpu0_intc: interrupt-controller {
};
};
cpu@1 {
clock-frequency = <0>;
cpu1: cpu@1 {
compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
d-cache-block-size = <64>;
d-cache-sets = <64>;
@ -48,6 +49,7 @@ cpu@1 {
mmu-type = "riscv,sv39";
reg = <1>;
riscv,isa = "rv64imafdc";
clocks = <&clkcfg CLK_CPU>;
tlb-split;
status = "okay";
@ -58,8 +60,7 @@ cpu1_intc: interrupt-controller {
};
};
cpu@2 {
clock-frequency = <0>;
cpu2: cpu@2 {
compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
d-cache-block-size = <64>;
d-cache-sets = <64>;
@ -75,6 +76,7 @@ cpu@2 {
mmu-type = "riscv,sv39";
reg = <2>;
riscv,isa = "rv64imafdc";
clocks = <&clkcfg CLK_CPU>;
tlb-split;
status = "okay";
@ -85,8 +87,7 @@ cpu2_intc: interrupt-controller {
};
};
cpu@3 {
clock-frequency = <0>;
cpu3: cpu@3 {
compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
d-cache-block-size = <64>;
d-cache-sets = <64>;
@ -102,6 +103,7 @@ cpu@3 {
mmu-type = "riscv,sv39";
reg = <3>;
riscv,isa = "rv64imafdc";
clocks = <&clkcfg CLK_CPU>;
tlb-split;
status = "okay";
@ -112,8 +114,7 @@ cpu3_intc: interrupt-controller {
};
};
cpu@4 {
clock-frequency = <0>;
cpu4: cpu@4 {
compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
d-cache-block-size = <64>;
d-cache-sets = <64>;
@ -129,6 +130,7 @@ cpu@4 {
mmu-type = "riscv,sv39";
reg = <4>;
riscv,isa = "rv64imafdc";
clocks = <&clkcfg CLK_CPU>;
tlb-split;
status = "okay";
cpu4_intc: interrupt-controller {
@ -150,8 +152,9 @@ soc {
compatible = "simple-bus";
ranges;
cache-controller@2010000 {
cctrllr: cache-controller@2010000 {
compatible = "sifive,fu540-c000-ccache", "cache";
reg = <0x0 0x2010000 0x0 0x1000>;
cache-block-size = <64>;
cache-level = <2>;
cache-sets = <1024>;
@ -159,10 +162,9 @@ cache-controller@2010000 {
cache-unified;
interrupt-parent = <&plic>;
interrupts = <1>, <2>, <3>;
reg = <0x0 0x2010000 0x0 0x1000>;
};
clint@2000000 {
clint: clint@2000000 {
compatible = "sifive,fu540-c000-clint", "sifive,clint0";
reg = <0x0 0x2000000 0x0 0xC000>;
interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>,
@ -186,15 +188,6 @@ plic: interrupt-controller@c000000 {
riscv,ndev = <186>;
};
dma@3000000 {
compatible = "sifive,fu540-c000-pdma";
reg = <0x0 0x3000000 0x0 0x8000>;
interrupt-parent = <&plic>;
interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>,
<30>;
#dma-cells = <1>;
};
clkcfg: clkcfg@20002000 {
compatible = "microchip,mpfs-clkcfg";
reg = <0x0 0x20002000 0x0 0x1000>;
@ -202,7 +195,7 @@ clkcfg: clkcfg@20002000 {
#clock-cells = <1>;
};
serial0: serial@20000000 {
mmuart0: serial@20000000 {
compatible = "ns16550a";
reg = <0x0 0x20000000 0x0 0x400>;
reg-io-width = <4>;
@ -210,11 +203,11 @@ serial0: serial@20000000 {
interrupt-parent = <&plic>;
interrupts = <90>;
current-speed = <115200>;
clocks = <&clkcfg 8>;
status = "disabled";
clocks = <&clkcfg CLK_MMUART0>;
status = "disabled"; /* Reserved for the HSS */
};
serial1: serial@20100000 {
mmuart1: serial@20100000 {
compatible = "ns16550a";
reg = <0x0 0x20100000 0x0 0x400>;
reg-io-width = <4>;
@ -222,11 +215,11 @@ serial1: serial@20100000 {
interrupt-parent = <&plic>;
interrupts = <91>;
current-speed = <115200>;
clocks = <&clkcfg 9>;
clocks = <&clkcfg CLK_MMUART1>;
status = "disabled";
};
serial2: serial@20102000 {
mmuart2: serial@20102000 {
compatible = "ns16550a";
reg = <0x0 0x20102000 0x0 0x400>;
reg-io-width = <4>;
@ -234,11 +227,11 @@ serial2: serial@20102000 {
interrupt-parent = <&plic>;
interrupts = <92>;
current-speed = <115200>;
clocks = <&clkcfg 10>;
clocks = <&clkcfg CLK_MMUART2>;
status = "disabled";
};
serial3: serial@20104000 {
mmuart3: serial@20104000 {
compatible = "ns16550a";
reg = <0x0 0x20104000 0x0 0x400>;
reg-io-width = <4>;
@ -246,7 +239,19 @@ serial3: serial@20104000 {
interrupt-parent = <&plic>;
interrupts = <93>;
current-speed = <115200>;
clocks = <&clkcfg 11>;
clocks = <&clkcfg CLK_MMUART3>;
status = "disabled";
};
mmuart4: serial@20106000 {
compatible = "ns16550a";
reg = <0x0 0x20106000 0x0 0x400>;
reg-io-width = <4>;
reg-shift = <2>;
interrupt-parent = <&plic>;
interrupts = <94>;
clocks = <&clkcfg CLK_MMUART4>;
current-speed = <115200>;
status = "disabled";
};
@ -255,37 +260,196 @@ mmc: mmc@20008000 {
compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc";
reg = <0x0 0x20008000 0x0 0x1000>;
interrupt-parent = <&plic>;
interrupts = <88>, <89>;
clocks = <&clkcfg 6>;
interrupts = <88>;
clocks = <&clkcfg CLK_MMC>;
max-frequency = <200000000>;
status = "disabled";
};
emac0: ethernet@20110000 {
spi0: spi@20108000 {
compatible = "microchip,mpfs-spi";
#address-cells = <1>;
#size-cells = <0>;
reg = <0x0 0x20108000 0x0 0x1000>;
interrupt-parent = <&plic>;
interrupts = <54>;
clocks = <&clkcfg CLK_SPI0>;
spi-max-frequency = <25000000>;
status = "disabled";
};
spi1: spi@20109000 {
compatible = "microchip,mpfs-spi";
#address-cells = <1>;
#size-cells = <0>;
reg = <0x0 0x20109000 0x0 0x1000>;
interrupt-parent = <&plic>;
interrupts = <55>;
clocks = <&clkcfg CLK_SPI1>;
spi-max-frequency = <25000000>;
status = "disabled";
};
qspi: spi@21000000 {
compatible = "microchip,mpfs-qspi";
#address-cells = <1>;
#size-cells = <0>;
reg = <0x0 0x21000000 0x0 0x1000>;
interrupt-parent = <&plic>;
interrupts = <85>;
clocks = <&clkcfg CLK_QSPI>;
spi-max-frequency = <25000000>;
status = "disabled";
};
i2c0: i2c@2010a000 {
compatible = "microchip,mpfs-i2c", "microchip,corei2c-rtl-v7";
reg = <0x0 0x2010a000 0x0 0x1000>;
#address-cells = <1>;
#size-cells = <0>;
interrupt-parent = <&plic>;
interrupts = <58>;
clocks = <&clkcfg CLK_I2C0>;
clock-frequency = <100000>;
status = "disabled";
};
i2c1: i2c@2010b000 {
compatible = "microchip,mpfs-i2c", "microchip,corei2c-rtl-v7";
reg = <0x0 0x2010b000 0x0 0x1000>;
#address-cells = <1>;
#size-cells = <0>;
interrupt-parent = <&plic>;
interrupts = <61>;
clocks = <&clkcfg CLK_I2C1>;
clock-frequency = <100000>;
status = "disabled";
};
mac0: ethernet@20110000 {
compatible = "cdns,macb";
reg = <0x0 0x20110000 0x0 0x2000>;
interrupt-parent = <&plic>;
interrupts = <64>, <65>, <66>, <67>;
local-mac-address = [00 00 00 00 00 00];
clocks = <&clkcfg 4>, <&clkcfg 2>;
clock-names = "pclk", "hclk";
status = "disabled";
#address-cells = <1>;
#size-cells = <0>;
interrupt-parent = <&plic>;
interrupts = <64>, <65>, <66>, <67>, <68>, <69>;
local-mac-address = [00 00 00 00 00 00];
clocks = <&clkcfg CLK_MAC0>, <&clkcfg CLK_AHB>;
clock-names = "pclk", "hclk";
status = "disabled";
};
emac1: ethernet@20112000 {
mac1: ethernet@20112000 {
compatible = "cdns,macb";
reg = <0x0 0x20112000 0x0 0x2000>;
interrupt-parent = <&plic>;
interrupts = <70>, <71>, <72>, <73>;
local-mac-address = [00 00 00 00 00 00];
clocks = <&clkcfg 5>, <&clkcfg 2>;
status = "disabled";
clock-names = "pclk", "hclk";
#address-cells = <1>;
#size-cells = <0>;
interrupt-parent = <&plic>;
interrupts = <70>, <71>, <72>, <73>, <74>, <75>;
local-mac-address = [00 00 00 00 00 00];
clocks = <&clkcfg CLK_MAC1>, <&clkcfg CLK_AHB>;
clock-names = "pclk", "hclk";
status = "disabled";
};
gpio0: gpio@20120000 {
compatible = "microchip,mpfs-gpio";
reg = <0x0 0x20120000 0x0 0x1000>;
interrupt-parent = <&plic>;
interrupt-controller;
#interrupt-cells = <1>;
clocks = <&clkcfg CLK_GPIO0>;
gpio-controller;
#gpio-cells = <2>;
status = "disabled";
};
gpio1: gpio@20121000 {
compatible = "microchip,mpfs-gpio";
reg = <000 0x20121000 0x0 0x1000>;
interrupt-parent = <&plic>;
interrupt-controller;
#interrupt-cells = <1>;
clocks = <&clkcfg CLK_GPIO1>;
gpio-controller;
#gpio-cells = <2>;
status = "disabled";
};
gpio2: gpio@20122000 {
compatible = "microchip,mpfs-gpio";
reg = <0x0 0x20122000 0x0 0x1000>;
interrupt-parent = <&plic>;
interrupt-controller;
#interrupt-cells = <1>;
clocks = <&clkcfg CLK_GPIO2>;
gpio-controller;
#gpio-cells = <2>;
status = "disabled";
};
rtc: rtc@20124000 {
compatible = "microchip,mpfs-rtc";
reg = <0x0 0x20124000 0x0 0x1000>;
interrupt-parent = <&plic>;
interrupts = <80>, <81>;
clocks = <&clkcfg CLK_RTC>;
clock-names = "rtc";
status = "disabled";
};
usb: usb@20201000 {
compatible = "microchip,mpfs-musb";
reg = <0x0 0x20201000 0x0 0x1000>;
interrupt-parent = <&plic>;
interrupts = <86>, <87>;
clocks = <&clkcfg CLK_USB>;
interrupt-names = "dma","mc";
status = "disabled";
};
pcie: pcie@2000000000 {
compatible = "microchip,pcie-host-1.0";
#address-cells = <0x3>;
#interrupt-cells = <0x1>;
#size-cells = <0x2>;
device_type = "pci";
reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>;
reg-names = "cfg", "apb";
bus-range = <0x0 0x7f>;
interrupt-parent = <&plic>;
interrupts = <119>;
interrupt-map = <0 0 0 1 &pcie_intc 0>,
<0 0 0 2 &pcie_intc 1>,
<0 0 0 3 &pcie_intc 2>,
<0 0 0 4 &pcie_intc 3>;
interrupt-map-mask = <0 0 0 7>;
clocks = <&clkcfg CLK_FIC0>, <&clkcfg CLK_FIC1>, <&clkcfg CLK_FIC3>;
clock-names = "fic0", "fic1", "fic3";
ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>;
msi-parent = <&pcie>;
msi-controller;
microchip,axi-m-atr0 = <0x10 0x0>;
status = "disabled";
pcie_intc: legacy-interrupt-controller {
#address-cells = <0>;
#interrupt-cells = <1>;
interrupt-controller;
};
};
mbox: mailbox@37020000 {
compatible = "microchip,mpfs-mailbox";
reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318C 0x0 0x40>;
interrupt-parent = <&plic>;
interrupts = <96>;
#mbox-cells = <1>;
status = "disabled";
};
syscontroller: syscontroller {
compatible = "microchip,mpfs-sys-controller";
mboxes = <&mbox 0>;
};
};
};

View file

@ -108,6 +108,7 @@ CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_NFS_V4_1=y

View file

@ -100,6 +100,7 @@ CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_NFS_V4_1=y

View file

@ -47,6 +47,7 @@
#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
#define SATP_MODE_48 _AC(0x9000000000000000, UL)
#define SATP_MODE_57 _AC(0xa000000000000000, UL)
#define SATP_ASID_BITS 16
#define SATP_ASID_SHIFT 44
#define SATP_ASID_MASK _AC(0xFFFF, UL)
@ -65,6 +66,7 @@
#define IRQ_S_EXT 9
#define IRQ_VS_EXT 10
#define IRQ_M_EXT 11
#define IRQ_PMU_OVF 13
/* Exception causes */
#define EXC_INST_MISALIGNED 0
@ -150,9 +152,69 @@
#define CSR_CYCLE 0xc00
#define CSR_TIME 0xc01
#define CSR_INSTRET 0xc02
#define CSR_HPMCOUNTER3 0xc03
#define CSR_HPMCOUNTER4 0xc04
#define CSR_HPMCOUNTER5 0xc05
#define CSR_HPMCOUNTER6 0xc06
#define CSR_HPMCOUNTER7 0xc07
#define CSR_HPMCOUNTER8 0xc08
#define CSR_HPMCOUNTER9 0xc09
#define CSR_HPMCOUNTER10 0xc0a
#define CSR_HPMCOUNTER11 0xc0b
#define CSR_HPMCOUNTER12 0xc0c
#define CSR_HPMCOUNTER13 0xc0d
#define CSR_HPMCOUNTER14 0xc0e
#define CSR_HPMCOUNTER15 0xc0f
#define CSR_HPMCOUNTER16 0xc10
#define CSR_HPMCOUNTER17 0xc11
#define CSR_HPMCOUNTER18 0xc12
#define CSR_HPMCOUNTER19 0xc13
#define CSR_HPMCOUNTER20 0xc14
#define CSR_HPMCOUNTER21 0xc15
#define CSR_HPMCOUNTER22 0xc16
#define CSR_HPMCOUNTER23 0xc17
#define CSR_HPMCOUNTER24 0xc18
#define CSR_HPMCOUNTER25 0xc19
#define CSR_HPMCOUNTER26 0xc1a
#define CSR_HPMCOUNTER27 0xc1b
#define CSR_HPMCOUNTER28 0xc1c
#define CSR_HPMCOUNTER29 0xc1d
#define CSR_HPMCOUNTER30 0xc1e
#define CSR_HPMCOUNTER31 0xc1f
#define CSR_CYCLEH 0xc80
#define CSR_TIMEH 0xc81
#define CSR_INSTRETH 0xc82
#define CSR_HPMCOUNTER3H 0xc83
#define CSR_HPMCOUNTER4H 0xc84
#define CSR_HPMCOUNTER5H 0xc85
#define CSR_HPMCOUNTER6H 0xc86
#define CSR_HPMCOUNTER7H 0xc87
#define CSR_HPMCOUNTER8H 0xc88
#define CSR_HPMCOUNTER9H 0xc89
#define CSR_HPMCOUNTER10H 0xc8a
#define CSR_HPMCOUNTER11H 0xc8b
#define CSR_HPMCOUNTER12H 0xc8c
#define CSR_HPMCOUNTER13H 0xc8d
#define CSR_HPMCOUNTER14H 0xc8e
#define CSR_HPMCOUNTER15H 0xc8f
#define CSR_HPMCOUNTER16H 0xc90
#define CSR_HPMCOUNTER17H 0xc91
#define CSR_HPMCOUNTER18H 0xc92
#define CSR_HPMCOUNTER19H 0xc93
#define CSR_HPMCOUNTER20H 0xc94
#define CSR_HPMCOUNTER21H 0xc95
#define CSR_HPMCOUNTER22H 0xc96
#define CSR_HPMCOUNTER23H 0xc97
#define CSR_HPMCOUNTER24H 0xc98
#define CSR_HPMCOUNTER25H 0xc99
#define CSR_HPMCOUNTER26H 0xc9a
#define CSR_HPMCOUNTER27H 0xc9b
#define CSR_HPMCOUNTER28H 0xc9c
#define CSR_HPMCOUNTER29H 0xc9d
#define CSR_HPMCOUNTER30H 0xc9e
#define CSR_HPMCOUNTER31H 0xc9f
#define CSR_SSCOUNTOVF 0xda0
#define CSR_SSTATUS 0x100
#define CSR_SIE 0x104
@ -240,7 +302,10 @@
# define RV_IRQ_SOFT IRQ_S_SOFT
# define RV_IRQ_TIMER IRQ_S_TIMER
# define RV_IRQ_EXT IRQ_S_EXT
#endif /* CONFIG_RISCV_M_MODE */
# define RV_IRQ_PMU IRQ_PMU_OVF
# define SIP_LCOFIP (_AC(0x1, UL) << IRQ_PMU_OVF)
#endif /* !CONFIG_RISCV_M_MODE */
/* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */
#define IE_SIE (_AC(0x1, UL) << RV_IRQ_SOFT)

View file

@ -25,6 +25,7 @@ enum fixed_addresses {
FIX_PTE,
FIX_PMD,
FIX_PUD,
FIX_P4D,
FIX_TEXT_POKE1,
FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,

View file

@ -34,7 +34,33 @@ extern unsigned long elf_hwcap;
#define RISCV_ISA_EXT_s ('s' - 'a')
#define RISCV_ISA_EXT_u ('u' - 'a')
/*
* Increse this to higher value as kernel support more ISA extensions.
*/
#define RISCV_ISA_EXT_MAX 64
#define RISCV_ISA_EXT_NAME_LEN_MAX 32
/* The base ID for multi-letter ISA extensions */
#define RISCV_ISA_EXT_BASE 26
/*
* This enum represent the logical ID for each multi-letter RISC-V ISA extension.
* The logical ID should start from RISCV_ISA_EXT_BASE and must not exceed
* RISCV_ISA_EXT_MAX. 0-25 range is reserved for single letter
* extensions while all the multi-letter extensions should define the next
* available logical extension id.
*/
enum riscv_isa_ext_id {
RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE,
RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
};
struct riscv_isa_ext_data {
/* Name of the extension displayed to userspace via /proc/cpuinfo */
char uprop[RISCV_ISA_EXT_NAME_LEN_MAX];
/* The logical ISA extension ID */
unsigned int isa_ext_id;
};
unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);

View file

@ -41,6 +41,7 @@
* By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
* define the PAGE_OFFSET value for SV39.
*/
#define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL)
#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL)
#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)

View file

@ -9,77 +9,5 @@
#define _ASM_RISCV_PERF_EVENT_H
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/interrupt.h>
#ifdef CONFIG_RISCV_BASE_PMU
#define RISCV_BASE_COUNTERS 2
/*
* The RISCV_MAX_COUNTERS parameter should be specified.
*/
#define RISCV_MAX_COUNTERS 2
/*
* These are the indexes of bits in counteren register *minus* 1,
* except for cycle. It would be coherent if it can directly mapped
* to counteren bit definition, but there is a *time* register at
* counteren[1]. Per-cpu structure is scarce resource here.
*
* According to the spec, an implementation can support counter up to
* mhpmcounter31, but many high-end processors has at most 6 general
* PMCs, we give the definition to MHPMCOUNTER8 here.
*/
#define RISCV_PMU_CYCLE 0
#define RISCV_PMU_INSTRET 1
#define RISCV_PMU_MHPMCOUNTER3 2
#define RISCV_PMU_MHPMCOUNTER4 3
#define RISCV_PMU_MHPMCOUNTER5 4
#define RISCV_PMU_MHPMCOUNTER6 5
#define RISCV_PMU_MHPMCOUNTER7 6
#define RISCV_PMU_MHPMCOUNTER8 7
#define RISCV_OP_UNSUPP (-EOPNOTSUPP)
struct cpu_hw_events {
/* # currently enabled events*/
int n_events;
/* currently enabled events */
struct perf_event *events[RISCV_MAX_COUNTERS];
/* vendor-defined PMU data */
void *platform;
};
struct riscv_pmu {
struct pmu *pmu;
/* generic hw/cache events table */
const int *hw_events;
const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
/* method used to map hw/cache events */
int (*map_hw_event)(u64 config);
int (*map_cache_event)(u64 config);
/* max generic hw events in map */
int max_events;
/* number total counters, 2(base) + x(general) */
int num_counters;
/* the width of the counter */
int counter_width;
/* vendor-defined PMU features */
void *platform;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
int irq;
};
#endif
#ifdef CONFIG_PERF_EVENTS
#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
#endif
#endif /* _ASM_RISCV_PERF_EVENT_H */

View file

@ -59,6 +59,26 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
}
}
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
{
if (pgtable_l5_enabled) {
unsigned long pfn = virt_to_pfn(p4d);
set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
}
static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd,
p4d_t *p4d)
{
if (pgtable_l5_enabled) {
unsigned long pfn = virt_to_pfn(p4d);
set_pgd_safe(pgd,
__pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
}
#define pud_alloc_one pud_alloc_one
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
@ -76,6 +96,35 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
}
#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
#define p4d_alloc_one p4d_alloc_one
static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
{
if (pgtable_l5_enabled) {
gfp_t gfp = GFP_PGTABLE_USER;
if (mm == &init_mm)
gfp = GFP_PGTABLE_KERNEL;
return (p4d_t *)get_zeroed_page(gfp);
}
return NULL;
}
static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
{
BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
free_page((unsigned long)p4d);
}
#define p4d_free p4d_free
static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
{
if (pgtable_l5_enabled)
__p4d_free(mm, p4d);
}
#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
#endif /* __PAGETABLE_PMD_FOLDED */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)

View file

@ -9,16 +9,24 @@
#include <linux/const.h>
extern bool pgtable_l4_enabled;
extern bool pgtable_l5_enabled;
#define PGDIR_SHIFT_L3 30
#define PGDIR_SHIFT_L4 39
#define PGDIR_SHIFT_L5 48
#define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3)
#define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
#define PGDIR_SHIFT (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \
(pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3))
/* Size of region mapped by a page global directory */
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
/* p4d is folded into pgd in case of 4-level page table */
#define P4D_SHIFT 39
#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE - 1))
/* pud is folded into pgd in case of 3-level page table */
#define PUD_SHIFT 30
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
@ -29,6 +37,15 @@ extern bool pgtable_l4_enabled;
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))
/* Page 4th Directory entry */
typedef struct {
unsigned long p4d;
} p4d_t;
#define p4d_val(x) ((x).p4d)
#define __p4d(x) ((p4d_t) { (x) })
#define PTRS_PER_P4D (PAGE_SIZE / sizeof(p4d_t))
/* Page Upper Directory entry */
typedef struct {
unsigned long pud;
@ -99,6 +116,15 @@ static inline struct page *pud_page(pud_t pud)
return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
}
#define mm_p4d_folded mm_p4d_folded
static inline bool mm_p4d_folded(struct mm_struct *mm)
{
if (pgtable_l5_enabled)
return false;
return true;
}
#define mm_pud_folded mm_pud_folded
static inline bool mm_pud_folded(struct mm_struct *mm)
{
@ -128,6 +154,9 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
#define pud_ERROR(e) \
pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
#define p4d_ERROR(e) \
pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e))
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
{
if (pgtable_l4_enabled)
@ -166,6 +195,16 @@ static inline void p4d_clear(p4d_t *p4d)
set_p4d(p4d, __p4d(0));
}
static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot)
{
return __p4d((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
}
static inline unsigned long _p4d_pfn(p4d_t p4d)
{
return p4d_val(p4d) >> _PAGE_PFN_SHIFT;
}
static inline pud_t *p4d_pgtable(p4d_t p4d)
{
if (pgtable_l4_enabled)
@ -173,6 +212,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d)
return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
}
#define p4d_page_vaddr(p4d) ((unsigned long)p4d_pgtable(p4d))
static inline struct page *p4d_page(p4d_t p4d)
{
@ -190,4 +230,68 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
return (pud_t *)p4d;
}
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
if (pgtable_l5_enabled)
*pgdp = pgd;
else
set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) });
}
static inline int pgd_none(pgd_t pgd)
{
if (pgtable_l5_enabled)
return (pgd_val(pgd) == 0);
return 0;
}
static inline int pgd_present(pgd_t pgd)
{
if (pgtable_l5_enabled)
return (pgd_val(pgd) & _PAGE_PRESENT);
return 1;
}
static inline int pgd_bad(pgd_t pgd)
{
if (pgtable_l5_enabled)
return !pgd_present(pgd);
return 0;
}
static inline void pgd_clear(pgd_t *pgd)
{
if (pgtable_l5_enabled)
set_pgd(pgd, __pgd(0));
}
static inline p4d_t *pgd_pgtable(pgd_t pgd)
{
if (pgtable_l5_enabled)
return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) });
}
#define pgd_page_vaddr(pgd) ((unsigned long)pgd_pgtable(pgd))
static inline struct page *pgd_page(pgd_t pgd)
{
return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
}
#define pgd_page(pgd) pgd_page(pgd)
#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
#define p4d_offset p4d_offset
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
if (pgtable_l5_enabled)
return pgd_pgtable(*pgd) + p4d_index(address);
return (p4d_t *)pgd;
}
#endif /* _ASM_RISCV_PGTABLE_64_H */

View file

@ -63,7 +63,8 @@
* position vmemmap directly below the VMALLOC region.
*/
#ifdef CONFIG_64BIT
#define VA_BITS (pgtable_l4_enabled ? 48 : 39)
#define VA_BITS (pgtable_l5_enabled ? \
57 : (pgtable_l4_enabled ? 48 : 39))
#else
#define VA_BITS 32
#endif
@ -103,7 +104,6 @@
#ifndef __ASSEMBLY__
#include <asm-generic/pgtable-nop4d.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#include <linux/mm_types.h>
@ -134,6 +134,8 @@ struct pt_alloc_ops {
phys_addr_t (*alloc_pmd)(uintptr_t va);
pud_t *(*get_pud_virt)(phys_addr_t pa);
phys_addr_t (*alloc_pud)(uintptr_t va);
p4d_t *(*get_p4d_virt)(phys_addr_t pa);
phys_addr_t (*alloc_p4d)(uintptr_t va);
#endif
};

View file

@ -29,6 +29,7 @@ enum sbi_ext_id {
SBI_EXT_RFENCE = 0x52464E43,
SBI_EXT_HSM = 0x48534D,
SBI_EXT_SRST = 0x53525354,
SBI_EXT_PMU = 0x504D55,
/* Experimentals extensions must lie within this range */
SBI_EXT_EXPERIMENTAL_START = 0x08000000,
@ -112,6 +113,98 @@ enum sbi_srst_reset_reason {
SBI_SRST_RESET_REASON_SYS_FAILURE,
};
enum sbi_ext_pmu_fid {
SBI_EXT_PMU_NUM_COUNTERS = 0,
SBI_EXT_PMU_COUNTER_GET_INFO,
SBI_EXT_PMU_COUNTER_CFG_MATCH,
SBI_EXT_PMU_COUNTER_START,
SBI_EXT_PMU_COUNTER_STOP,
SBI_EXT_PMU_COUNTER_FW_READ,
};
#define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(55, 0)
#define RISCV_PMU_RAW_EVENT_IDX 0x20000
/** General pmu event codes specified in SBI PMU extension */
enum sbi_pmu_hw_generic_events_t {
SBI_PMU_HW_NO_EVENT = 0,
SBI_PMU_HW_CPU_CYCLES = 1,
SBI_PMU_HW_INSTRUCTIONS = 2,
SBI_PMU_HW_CACHE_REFERENCES = 3,
SBI_PMU_HW_CACHE_MISSES = 4,
SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5,
SBI_PMU_HW_BRANCH_MISSES = 6,
SBI_PMU_HW_BUS_CYCLES = 7,
SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8,
SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9,
SBI_PMU_HW_REF_CPU_CYCLES = 10,
SBI_PMU_HW_GENERAL_MAX,
};
/**
* Special "firmware" events provided by the firmware, even if the hardware
* does not support performance events. These events are encoded as a raw
* event type in Linux kernel perf framework.
*/
enum sbi_pmu_fw_generic_events_t {
SBI_PMU_FW_MISALIGNED_LOAD = 0,
SBI_PMU_FW_MISALIGNED_STORE = 1,
SBI_PMU_FW_ACCESS_LOAD = 2,
SBI_PMU_FW_ACCESS_STORE = 3,
SBI_PMU_FW_ILLEGAL_INSN = 4,
SBI_PMU_FW_SET_TIMER = 5,
SBI_PMU_FW_IPI_SENT = 6,
SBI_PMU_FW_IPI_RECVD = 7,
SBI_PMU_FW_FENCE_I_SENT = 8,
SBI_PMU_FW_FENCE_I_RECVD = 9,
SBI_PMU_FW_SFENCE_VMA_SENT = 10,
SBI_PMU_FW_SFENCE_VMA_RCVD = 11,
SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12,
SBI_PMU_FW_SFENCE_VMA_ASID_RCVD = 13,
SBI_PMU_FW_HFENCE_GVMA_SENT = 14,
SBI_PMU_FW_HFENCE_GVMA_RCVD = 15,
SBI_PMU_FW_HFENCE_GVMA_VMID_SENT = 16,
SBI_PMU_FW_HFENCE_GVMA_VMID_RCVD = 17,
SBI_PMU_FW_HFENCE_VVMA_SENT = 18,
SBI_PMU_FW_HFENCE_VVMA_RCVD = 19,
SBI_PMU_FW_HFENCE_VVMA_ASID_SENT = 20,
SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD = 21,
SBI_PMU_FW_MAX,
};
/* SBI PMU event types */
enum sbi_pmu_event_type {
SBI_PMU_EVENT_TYPE_HW = 0x0,
SBI_PMU_EVENT_TYPE_CACHE = 0x1,
SBI_PMU_EVENT_TYPE_RAW = 0x2,
SBI_PMU_EVENT_TYPE_FW = 0xf,
};
/* SBI PMU event types */
enum sbi_pmu_ctr_type {
SBI_PMU_CTR_TYPE_HW = 0x0,
SBI_PMU_CTR_TYPE_FW,
};
/* Flags defined for config matching function */
#define SBI_PMU_CFG_FLAG_SKIP_MATCH (1 << 0)
#define SBI_PMU_CFG_FLAG_CLEAR_VALUE (1 << 1)
#define SBI_PMU_CFG_FLAG_AUTO_START (1 << 2)
#define SBI_PMU_CFG_FLAG_SET_VUINH (1 << 3)
#define SBI_PMU_CFG_FLAG_SET_VSNH (1 << 4)
#define SBI_PMU_CFG_FLAG_SET_UINH (1 << 5)
#define SBI_PMU_CFG_FLAG_SET_SINH (1 << 6)
#define SBI_PMU_CFG_FLAG_SET_MINH (1 << 7)
/* Flags defined for counter start function */
#define SBI_PMU_START_FLAG_SET_INIT_VALUE (1 << 0)
/* Flags defined for counter stop function */
#define SBI_PMU_STOP_FLAG_RESET (1 << 0)
#define SBI_SPEC_VERSION_DEFAULT 0x1
#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
@ -125,6 +218,8 @@ enum sbi_srst_reset_reason {
#define SBI_ERR_DENIED -4
#define SBI_ERR_INVALID_ADDRESS -5
#define SBI_ERR_ALREADY_AVAILABLE -6
#define SBI_ERR_ALREADY_STARTED -7
#define SBI_ERR_ALREADY_STOPPED -8
extern unsigned long sbi_spec_version;
struct sbiret {

View file

@ -53,7 +53,6 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o
obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o
obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
obj-$(CONFIG_RISCV_SBI) += sbi.o

View file

@ -6,6 +6,7 @@
#include <linux/init.h>
#include <linux/seq_file.h>
#include <linux/of.h>
#include <asm/hwcap.h>
#include <asm/smp.h>
#include <asm/pgtable.h>
@ -63,12 +64,73 @@ int riscv_of_parent_hartid(struct device_node *node)
}
#ifdef CONFIG_PROC_FS
#define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \
{ \
.uprop = #UPROP, \
.isa_ext_id = EXTID, \
}
/**
* Here are the ordering rules of extension naming defined by RISC-V
* specification :
* 1. All extensions should be separated from other multi-letter extensions
* from other multi-letter extensions by an underscore.
* 2. The first letter following the 'Z' conventionally indicates the most
* closely related alphabetical extension category, IMAFDQLCBKJTPVH.
* If multiple 'Z' extensions are named, they should be ordered first
* by category, then alphabetically within a category.
* 3. Standard supervisor-level extensions (starts with 'S') should be
* listed after standard unprivileged extensions. If multiple
* supervisor-level extensions are listed, they should be ordered
* alphabetically.
* 4. Non-standard extensions (starts with 'X') must be listed after all
* standard extensions. They must be separated from other multi-letter
* extensions by an underscore.
*/
static struct riscv_isa_ext_data isa_ext_arr[] = {
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
__RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
};
static void print_isa_ext(struct seq_file *f)
{
struct riscv_isa_ext_data *edata;
int i = 0, arr_sz;
arr_sz = ARRAY_SIZE(isa_ext_arr) - 1;
/* No extension support available */
if (arr_sz <= 0)
return;
for (i = 0; i <= arr_sz; i++) {
edata = &isa_ext_arr[i];
if (!__riscv_isa_extension_available(NULL, edata->isa_ext_id))
continue;
seq_printf(f, "_%s", edata->uprop);
}
}
/**
* These are the only valid base (single letter) ISA extensions as per the spec.
* It also specifies the canonical order in which it appears in the spec.
* Some of the extension may just be a place holder for now (B, K, P, J).
* This should be updated once corresponding extensions are ratified.
*/
static const char base_riscv_exts[13] = "imafdqcbkjpvh";
static void print_isa(struct seq_file *f, const char *isa)
{
/* Print the entire ISA as it is */
int i;
seq_puts(f, "isa\t\t: ");
seq_write(f, isa, strlen(isa));
/* Print the rv[64/32] part */
seq_write(f, isa, 4);
for (i = 0; i < sizeof(base_riscv_exts); i++) {
if (__riscv_isa_extension_available(NULL, base_riscv_exts[i] - 'a'))
/* Print only enabled the base ISA extensions */
seq_write(f, &base_riscv_exts[i], 1);
}
print_isa_ext(f);
seq_puts(f, "\n");
}
@ -79,7 +141,9 @@ static void print_mmu(struct seq_file *f)
#if defined(CONFIG_32BIT)
strncpy(sv_type, "sv32", 5);
#elif defined(CONFIG_64BIT)
if (pgtable_l4_enabled)
if (pgtable_l5_enabled)
strncpy(sv_type, "sv57", 5);
else if (pgtable_l4_enabled)
strncpy(sv_type, "sv48", 5);
else
strncpy(sv_type, "sv39", 5);

View file

@ -7,12 +7,15 @@
*/
#include <linux/bitmap.h>
#include <linux/ctype.h>
#include <linux/of.h>
#include <asm/processor.h>
#include <asm/hwcap.h>
#include <asm/smp.h>
#include <asm/switch_to.h>
#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
unsigned long elf_hwcap __read_mostly;
/* Host ISA bitmap */
@ -63,8 +66,8 @@ void __init riscv_fill_hwcap(void)
{
struct device_node *node;
const char *isa;
char print_str[BITS_PER_LONG + 1];
size_t i, j, isa_len;
char print_str[NUM_ALPHA_EXTS + 1];
int i, j;
static unsigned long isa2hwcap[256] = {0};
isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I;
@ -80,7 +83,8 @@ void __init riscv_fill_hwcap(void)
for_each_of_cpu_node(node) {
unsigned long this_hwcap = 0;
unsigned long this_isa = 0;
DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX);
const char *temp;
if (riscv_of_processor_hartid(node) < 0)
continue;
@ -90,23 +94,106 @@ void __init riscv_fill_hwcap(void)
continue;
}
i = 0;
isa_len = strlen(isa);
temp = isa;
#if IS_ENABLED(CONFIG_32BIT)
if (!strncmp(isa, "rv32", 4))
i += 4;
isa += 4;
#elif IS_ENABLED(CONFIG_64BIT)
if (!strncmp(isa, "rv64", 4))
i += 4;
isa += 4;
#endif
for (; i < isa_len; ++i) {
this_hwcap |= isa2hwcap[(unsigned char)(isa[i])];
/*
* TODO: X, Y and Z extension parsing for Host ISA
* bitmap will be added in-future.
*/
if ('a' <= isa[i] && isa[i] < 'x')
this_isa |= (1UL << (isa[i] - 'a'));
/* The riscv,isa DT property must start with rv64 or rv32 */
if (temp == isa)
continue;
bitmap_zero(this_isa, RISCV_ISA_EXT_MAX);
for (; *isa; ++isa) {
const char *ext = isa++;
const char *ext_end = isa;
bool ext_long = false, ext_err = false;
switch (*ext) {
case 's':
/**
* Workaround for invalid single-letter 's' & 'u'(QEMU).
* No need to set the bit in riscv_isa as 's' & 'u' are
* not valid ISA extensions. It works until multi-letter
* extension starting with "Su" appears.
*/
if (ext[-1] != '_' && ext[1] == 'u') {
++isa;
ext_err = true;
break;
}
fallthrough;
case 'x':
case 'z':
ext_long = true;
/* Multi-letter extension must be delimited */
for (; *isa && *isa != '_'; ++isa)
if (unlikely(!islower(*isa)
&& !isdigit(*isa)))
ext_err = true;
/* Parse backwards */
ext_end = isa;
if (unlikely(ext_err))
break;
if (!isdigit(ext_end[-1]))
break;
/* Skip the minor version */
while (isdigit(*--ext_end))
;
if (ext_end[0] != 'p'
|| !isdigit(ext_end[-1])) {
/* Advance it to offset the pre-decrement */
++ext_end;
break;
}
/* Skip the major version */
while (isdigit(*--ext_end))
;
++ext_end;
break;
default:
if (unlikely(!islower(*ext))) {
ext_err = true;
break;
}
/* Find next extension */
if (!isdigit(*isa))
break;
/* Skip the minor version */
while (isdigit(*++isa))
;
if (*isa != 'p')
break;
if (!isdigit(*++isa)) {
--isa;
break;
}
/* Skip the major version */
while (isdigit(*++isa))
;
break;
}
if (*isa != '_')
--isa;
#define SET_ISA_EXT_MAP(name, bit) \
do { \
if ((ext_end - ext == sizeof(name) - 1) && \
!memcmp(ext, name, sizeof(name) - 1)) \
set_bit(bit, this_isa); \
} while (false) \
if (unlikely(ext_err))
continue;
if (!ext_long) {
this_hwcap |= isa2hwcap[(unsigned char)(*ext)];
set_bit(*ext - 'a', this_isa);
} else {
SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
}
#undef SET_ISA_EXT_MAP
}
/*
@ -119,10 +206,11 @@ void __init riscv_fill_hwcap(void)
else
elf_hwcap = this_hwcap;
if (riscv_isa[0])
riscv_isa[0] &= this_isa;
if (bitmap_weight(riscv_isa, RISCV_ISA_EXT_MAX))
bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
else
riscv_isa[0] = this_isa;
bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
}
/* We don't support systems with F but without D, so mask those out
@ -133,13 +221,13 @@ void __init riscv_fill_hwcap(void)
}
memset(print_str, 0, sizeof(print_str));
for (i = 0, j = 0; i < BITS_PER_LONG; i++)
for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++)
if (riscv_isa[0] & BIT_MASK(i))
print_str[j++] = (char)('a' + i);
pr_info("riscv: ISA extensions %s\n", print_str);
pr_info("riscv: base ISA extensions %s\n", print_str);
memset(print_str, 0, sizeof(print_str));
for (i = 0, j = 0; i < BITS_PER_LONG; i++)
for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++)
if (elf_hwcap & BIT_MASK(i))
print_str[j++] = (char)('a' + i);
pr_info("riscv: ELF capabilities %s\n", print_str);

View file

@ -225,6 +225,10 @@ ret_from_syscall:
* (If it was configured with SECCOMP_RET_ERRNO/TRACE)
*/
ret_from_syscall_rejected:
#ifdef CONFIG_DEBUG_RSEQ
move a0, sp
call rseq_syscall
#endif
/* Trace syscalls, but only if requested by the user. */
REG_L t0, TASK_TI_FLAGS(tp)
andi t0, t0, _TIF_SYSCALL_WORK

View file

@ -1,485 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2009 Jaswinder Singh Rajput
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
* Copyright (C) 2009 Google, Inc., Stephane Eranian
* Copyright 2014 Tilera Corporation. All Rights Reserved.
* Copyright (C) 2018 Andes Technology Corporation
*
* Perf_events support for RISC-V platforms.
*
* Since the spec. (as of now, Priv-Spec 1.10) does not provide enough
* functionality for perf event to fully work, this file provides
* the very basic framework only.
*
* For platform portings, please check Documentations/riscv/pmu.txt.
*
* The Copyright line includes x86 and tile ones.
*/
#include <linux/kprobes.h>
#include <linux/kernel.h>
#include <linux/kdebug.h>
#include <linux/mutex.h>
#include <linux/bitmap.h>
#include <linux/irq.h>
#include <linux/perf_event.h>
#include <linux/atomic.h>
#include <linux/of.h>
#include <asm/perf_event.h>
static const struct riscv_pmu *riscv_pmu __read_mostly;
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
/*
* Hardware & cache maps and their methods
*/
static const int riscv_hw_event_map[] = {
[PERF_COUNT_HW_CPU_CYCLES] = RISCV_PMU_CYCLE,
[PERF_COUNT_HW_INSTRUCTIONS] = RISCV_PMU_INSTRET,
[PERF_COUNT_HW_CACHE_REFERENCES] = RISCV_OP_UNSUPP,
[PERF_COUNT_HW_CACHE_MISSES] = RISCV_OP_UNSUPP,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = RISCV_OP_UNSUPP,
[PERF_COUNT_HW_BRANCH_MISSES] = RISCV_OP_UNSUPP,
[PERF_COUNT_HW_BUS_CYCLES] = RISCV_OP_UNSUPP,
};
#define C(x) PERF_COUNT_HW_CACHE_##x
static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
},
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
},
};
static int riscv_map_hw_event(u64 config)
{
if (config >= riscv_pmu->max_events)
return -EINVAL;
return riscv_pmu->hw_events[config];
}
static int riscv_map_cache_decode(u64 config, unsigned int *type,
unsigned int *op, unsigned int *result)
{
return -ENOENT;
}
static int riscv_map_cache_event(u64 config)
{
unsigned int type, op, result;
int err = -ENOENT;
int code;
err = riscv_map_cache_decode(config, &type, &op, &result);
if (!riscv_pmu->cache_events || err)
return err;
if (type >= PERF_COUNT_HW_CACHE_MAX ||
op >= PERF_COUNT_HW_CACHE_OP_MAX ||
result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return -EINVAL;
code = (*riscv_pmu->cache_events)[type][op][result];
if (code == RISCV_OP_UNSUPP)
return -EINVAL;
return code;
}
/*
* Low-level functions: reading/writing counters
*/
static inline u64 read_counter(int idx)
{
u64 val = 0;
switch (idx) {
case RISCV_PMU_CYCLE:
val = csr_read(CSR_CYCLE);
break;
case RISCV_PMU_INSTRET:
val = csr_read(CSR_INSTRET);
break;
default:
WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS);
return -EINVAL;
}
return val;
}
static inline void write_counter(int idx, u64 value)
{
/* currently not supported */
WARN_ON_ONCE(1);
}
/*
* pmu->read: read and update the counter
*
* Other architectures' implementation often have a xxx_perf_event_update
* routine, which can return counter values when called in the IRQ, but
* return void when being called by the pmu->read method.
*/
static void riscv_pmu_read(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
u64 prev_raw_count, new_raw_count;
u64 oldval;
int idx = hwc->idx;
u64 delta;
do {
prev_raw_count = local64_read(&hwc->prev_count);
new_raw_count = read_counter(idx);
oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count);
} while (oldval != prev_raw_count);
/*
* delta is the value to update the counter we maintain in the kernel.
*/
delta = (new_raw_count - prev_raw_count) &
((1ULL << riscv_pmu->counter_width) - 1);
local64_add(delta, &event->count);
/*
* Something like local64_sub(delta, &hwc->period_left) here is
* needed if there is an interrupt for perf.
*/
}
/*
* State transition functions:
*
* stop()/start() & add()/del()
*/
/*
* pmu->stop: stop the counter
*/
static void riscv_pmu_stop(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
riscv_pmu->pmu->read(event);
hwc->state |= PERF_HES_UPTODATE;
}
}
/*
* pmu->start: start the event.
*/
static void riscv_pmu_start(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
return;
if (flags & PERF_EF_RELOAD) {
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
/*
* Set the counter to the period to the next interrupt here,
* if you have any.
*/
}
hwc->state = 0;
perf_event_update_userpage(event);
/*
* Since we cannot write to counters, this serves as an initialization
* to the delta-mechanism in pmu->read(); otherwise, the delta would be
* wrong when pmu->read is called for the first time.
*/
local64_set(&hwc->prev_count, read_counter(hwc->idx));
}
/*
* pmu->add: add the event to PMU.
*/
static int riscv_pmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
if (cpuc->n_events == riscv_pmu->num_counters)
return -ENOSPC;
/*
* We don't have general conunters, so no binding-event-to-counter
* process here.
*
* Indexing using hwc->config generally not works, since config may
* contain extra information, but here the only info we have in
* hwc->config is the event index.
*/
hwc->idx = hwc->config;
cpuc->events[hwc->idx] = event;
cpuc->n_events++;
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (flags & PERF_EF_START)
riscv_pmu->pmu->start(event, PERF_EF_RELOAD);
return 0;
}
/*
* pmu->del: delete the event from PMU.
*/
static void riscv_pmu_del(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
cpuc->events[hwc->idx] = NULL;
cpuc->n_events--;
riscv_pmu->pmu->stop(event, PERF_EF_UPDATE);
perf_event_update_userpage(event);
}
/*
* Interrupt: a skeletion for reference.
*/
static DEFINE_MUTEX(pmc_reserve_mutex);
static irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev)
{
return IRQ_NONE;
}
static int reserve_pmc_hardware(void)
{
int err = 0;
mutex_lock(&pmc_reserve_mutex);
if (riscv_pmu->irq >= 0 && riscv_pmu->handle_irq) {
err = request_irq(riscv_pmu->irq, riscv_pmu->handle_irq,
IRQF_PERCPU, "riscv-base-perf", NULL);
}
mutex_unlock(&pmc_reserve_mutex);
return err;
}
static void release_pmc_hardware(void)
{
mutex_lock(&pmc_reserve_mutex);
if (riscv_pmu->irq >= 0)
free_irq(riscv_pmu->irq, NULL);
mutex_unlock(&pmc_reserve_mutex);
}
/*
* Event Initialization/Finalization
*/
static atomic_t riscv_active_events = ATOMIC_INIT(0);
static void riscv_event_destroy(struct perf_event *event)
{
if (atomic_dec_return(&riscv_active_events) == 0)
release_pmc_hardware();
}
static int riscv_event_init(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
int err;
int code;
if (atomic_inc_return(&riscv_active_events) == 1) {
err = reserve_pmc_hardware();
if (err) {
pr_warn("PMC hardware not available\n");
atomic_dec(&riscv_active_events);
return -EBUSY;
}
}
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
code = riscv_pmu->map_hw_event(attr->config);
break;
case PERF_TYPE_HW_CACHE:
code = riscv_pmu->map_cache_event(attr->config);
break;
case PERF_TYPE_RAW:
return -EOPNOTSUPP;
default:
return -ENOENT;
}
event->destroy = riscv_event_destroy;
if (code < 0) {
event->destroy(event);
return code;
}
/*
* idx is set to -1 because the index of a general event should not be
* decided until binding to some counter in pmu->add().
*
* But since we don't have such support, later in pmu->add(), we just
* use hwc->config as the index instead.
*/
hwc->config = code;
hwc->idx = -1;
return 0;
}
/*
* Initialization
*/
static struct pmu min_pmu = {
.name = "riscv-base",
.event_init = riscv_event_init,
.add = riscv_pmu_add,
.del = riscv_pmu_del,
.start = riscv_pmu_start,
.stop = riscv_pmu_stop,
.read = riscv_pmu_read,
};
static const struct riscv_pmu riscv_base_pmu = {
.pmu = &min_pmu,
.max_events = ARRAY_SIZE(riscv_hw_event_map),
.map_hw_event = riscv_map_hw_event,
.hw_events = riscv_hw_event_map,
.map_cache_event = riscv_map_cache_event,
.cache_events = &riscv_cache_event_map,
.counter_width = 63,
.num_counters = RISCV_BASE_COUNTERS + 0,
.handle_irq = &riscv_base_pmu_handle_irq,
/* This means this PMU has no IRQ. */
.irq = -1,
};
static const struct of_device_id riscv_pmu_of_ids[] = {
{.compatible = "riscv,base-pmu", .data = &riscv_base_pmu},
{ /* sentinel value */ }
};
static int __init init_hw_perf_events(void)
{
struct device_node *node = of_find_node_by_type(NULL, "pmu");
const struct of_device_id *of_id;
riscv_pmu = &riscv_base_pmu;
if (node) {
of_id = of_match_node(riscv_pmu_of_ids, node);
if (of_id)
riscv_pmu = of_id->data;
of_node_put(node);
}
perf_pmu_register(riscv_pmu->pmu, "cpu", PERF_TYPE_RAW);
return 0;
}
arch_initcall(init_hw_perf_events);

View file

@ -258,6 +258,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
}
}
rseq_signal_deliver(ksig, regs);
/* Set up the stack frame */
ret = setup_rt_frame(ksig, oldset, regs);

View file

@ -1,64 +1,316 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2022 Michael T. Kloos <michael@michaelkloos.com>
*/
#include <linux/linkage.h>
#include <asm/asm.h>
ENTRY(__memmove)
WEAK(memmove)
move t0, a0
move t1, a1
SYM_FUNC_START(__memmove)
SYM_FUNC_START_WEAK(memmove)
/*
* Returns
* a0 - dest
*
* Parameters
* a0 - Inclusive first byte of dest
* a1 - Inclusive first byte of src
* a2 - Length of copy n
*
* Because the return matches the parameter register a0,
* we will not clobber or modify that register.
*
* Note: This currently only works on little-endian.
* To port to big-endian, reverse the direction of shifts
* in the 2 misaligned fixup copy loops.
*/
beq a0, a1, exit_memcpy
beqz a2, exit_memcpy
srli t2, a2, 0x2
/* Return if nothing to do */
beq a0, a1, return_from_memmove
beqz a2, return_from_memmove
slt t3, a0, a1
beqz t3, do_reverse
/*
* Register Uses
* Forward Copy: a1 - Index counter of src
* Reverse Copy: a4 - Index counter of src
* Forward Copy: t3 - Index counter of dest
* Reverse Copy: t4 - Index counter of dest
* Both Copy Modes: t5 - Inclusive first multibyte/aligned of dest
* Both Copy Modes: t6 - Non-Inclusive last multibyte/aligned of dest
* Both Copy Modes: t0 - Link / Temporary for load-store
* Both Copy Modes: t1 - Temporary for load-store
* Both Copy Modes: t2 - Temporary for load-store
* Both Copy Modes: a5 - dest to src alignment offset
* Both Copy Modes: a6 - Shift ammount
* Both Copy Modes: a7 - Inverse Shift ammount
* Both Copy Modes: a2 - Alternate breakpoint for unrolled loops
*/
andi a2, a2, 0x3
li t4, 1
beqz t2, byte_copy
/*
* Solve for some register values now.
* Byte copy does not need t5 or t6.
*/
mv t3, a0
add t4, a0, a2
add a4, a1, a2
word_copy:
lw t3, 0(a1)
addi t2, t2, -1
addi a1, a1, 4
sw t3, 0(a0)
addi a0, a0, 4
bnez t2, word_copy
beqz a2, exit_memcpy
j byte_copy
/*
* Byte copy if copying less than (2 * SZREG) bytes. This can
* cause problems with the bulk copy implementation and is
* small enough not to bother.
*/
andi t0, a2, -(2 * SZREG)
beqz t0, byte_copy
do_reverse:
add a0, a0, a2
add a1, a1, a2
andi a2, a2, 0x3
li t4, -1
beqz t2, reverse_byte_copy
/*
* Now solve for t5 and t6.
*/
andi t5, t3, -SZREG
andi t6, t4, -SZREG
/*
* If dest(Register t3) rounded down to the nearest naturally
* aligned SZREG address, does not equal dest, then add SZREG
* to find the low-bound of SZREG alignment in the dest memory
* region. Note that this could overshoot the dest memory
* region if n is less than SZREG. This is one reason why
* we always byte copy if n is less than SZREG.
* Otherwise, dest is already naturally aligned to SZREG.
*/
beq t5, t3, 1f
addi t5, t5, SZREG
1:
reverse_word_copy:
addi a1, a1, -4
addi t2, t2, -1
lw t3, 0(a1)
addi a0, a0, -4
sw t3, 0(a0)
bnez t2, reverse_word_copy
beqz a2, exit_memcpy
/*
* If the dest and src are co-aligned to SZREG, then there is
* no need for the full rigmarole of a full misaligned fixup copy.
* Instead, do a simpler co-aligned copy.
*/
xor t0, a0, a1
andi t1, t0, (SZREG - 1)
beqz t1, coaligned_copy
/* Fall through to misaligned fixup copy */
reverse_byte_copy:
addi a0, a0, -1
addi a1, a1, -1
misaligned_fixup_copy:
bltu a1, a0, misaligned_fixup_copy_reverse
misaligned_fixup_copy_forward:
jal t0, byte_copy_until_aligned_forward
andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */
slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
sub a5, a1, t3 /* Find the difference between src and dest */
andi a1, a1, -SZREG /* Align the src pointer */
addi a2, t6, SZREG /* The other breakpoint for the unrolled loop*/
/*
* Compute The Inverse Shift
* a7 = XLEN - a6 = XLEN + -a6
* 2s complement negation to find the negative: -a6 = ~a6 + 1
* Add that to XLEN. XLEN = SZREG * 8.
*/
not a7, a6
addi a7, a7, (SZREG * 8 + 1)
/*
* Fix Misalignment Copy Loop - Forward
* load_val0 = load_ptr[0];
* do {
* load_val1 = load_ptr[1];
* store_ptr += 2;
* store_ptr[0 - 2] = (load_val0 >> {a6}) | (load_val1 << {a7});
*
* if (store_ptr == {a2})
* break;
*
* load_val0 = load_ptr[2];
* load_ptr += 2;
* store_ptr[1 - 2] = (load_val1 >> {a6}) | (load_val0 << {a7});
*
* } while (store_ptr != store_ptr_end);
* store_ptr = store_ptr_end;
*/
REG_L t0, (0 * SZREG)(a1)
1:
REG_L t1, (1 * SZREG)(a1)
addi t3, t3, (2 * SZREG)
srl t0, t0, a6
sll t2, t1, a7
or t2, t0, t2
REG_S t2, ((0 * SZREG) - (2 * SZREG))(t3)
beq t3, a2, 2f
REG_L t0, (2 * SZREG)(a1)
addi a1, a1, (2 * SZREG)
srl t1, t1, a6
sll t2, t0, a7
or t2, t1, t2
REG_S t2, ((1 * SZREG) - (2 * SZREG))(t3)
bne t3, t6, 1b
2:
mv t3, t6 /* Fix the dest pointer in case the loop was broken */
add a1, t3, a5 /* Restore the src pointer */
j byte_copy_forward /* Copy any remaining bytes */
misaligned_fixup_copy_reverse:
jal t0, byte_copy_until_aligned_reverse
andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */
slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
sub a5, a4, t4 /* Find the difference between src and dest */
andi a4, a4, -SZREG /* Align the src pointer */
addi a2, t5, -SZREG /* The other breakpoint for the unrolled loop*/
/*
* Compute The Inverse Shift
* a7 = XLEN - a6 = XLEN + -a6
* 2s complement negation to find the negative: -a6 = ~a6 + 1
* Add that to XLEN. XLEN = SZREG * 8.
*/
not a7, a6
addi a7, a7, (SZREG * 8 + 1)
/*
* Fix Misalignment Copy Loop - Reverse
* load_val1 = load_ptr[0];
* do {
* load_val0 = load_ptr[-1];
* store_ptr -= 2;
* store_ptr[1] = (load_val0 >> {a6}) | (load_val1 << {a7});
*
* if (store_ptr == {a2})
* break;
*
* load_val1 = load_ptr[-2];
* load_ptr -= 2;
* store_ptr[0] = (load_val1 >> {a6}) | (load_val0 << {a7});
*
* } while (store_ptr != store_ptr_end);
* store_ptr = store_ptr_end;
*/
REG_L t1, ( 0 * SZREG)(a4)
1:
REG_L t0, (-1 * SZREG)(a4)
addi t4, t4, (-2 * SZREG)
sll t1, t1, a7
srl t2, t0, a6
or t2, t1, t2
REG_S t2, ( 1 * SZREG)(t4)
beq t4, a2, 2f
REG_L t1, (-2 * SZREG)(a4)
addi a4, a4, (-2 * SZREG)
sll t0, t0, a7
srl t2, t1, a6
or t2, t0, t2
REG_S t2, ( 0 * SZREG)(t4)
bne t4, t5, 1b
2:
mv t4, t5 /* Fix the dest pointer in case the loop was broken */
add a4, t4, a5 /* Restore the src pointer */
j byte_copy_reverse /* Copy any remaining bytes */
/*
* Simple copy loops for SZREG co-aligned memory locations.
* These also make calls to do byte copies for any unaligned
* data at their terminations.
*/
coaligned_copy:
bltu a1, a0, coaligned_copy_reverse
coaligned_copy_forward:
jal t0, byte_copy_until_aligned_forward
1:
REG_L t1, ( 0 * SZREG)(a1)
addi a1, a1, SZREG
addi t3, t3, SZREG
REG_S t1, (-1 * SZREG)(t3)
bne t3, t6, 1b
j byte_copy_forward /* Copy any remaining bytes */
coaligned_copy_reverse:
jal t0, byte_copy_until_aligned_reverse
1:
REG_L t1, (-1 * SZREG)(a4)
addi a4, a4, -SZREG
addi t4, t4, -SZREG
REG_S t1, ( 0 * SZREG)(t4)
bne t4, t5, 1b
j byte_copy_reverse /* Copy any remaining bytes */
/*
* These are basically sub-functions within the function. They
* are used to byte copy until the dest pointer is in alignment.
* At which point, a bulk copy method can be used by the
* calling code. These work on the same registers as the bulk
* copy loops. Therefore, the register values can be picked
* up from where they were left and we avoid code duplication
* without any overhead except the call in and return jumps.
*/
byte_copy_until_aligned_forward:
beq t3, t5, 2f
1:
lb t1, 0(a1)
addi a1, a1, 1
addi t3, t3, 1
sb t1, -1(t3)
bne t3, t5, 1b
2:
jalr zero, 0x0(t0) /* Return to multibyte copy loop */
byte_copy_until_aligned_reverse:
beq t4, t6, 2f
1:
lb t1, -1(a4)
addi a4, a4, -1
addi t4, t4, -1
sb t1, 0(t4)
bne t4, t6, 1b
2:
jalr zero, 0x0(t0) /* Return to multibyte copy loop */
/*
* Simple byte copy loops.
* These will byte copy until they reach the end of data to copy.
* At that point, they will call to return from memmove.
*/
byte_copy:
lb t3, 0(a1)
addi a2, a2, -1
sb t3, 0(a0)
add a1, a1, t4
add a0, a0, t4
bnez a2, byte_copy
bltu a1, a0, byte_copy_reverse
exit_memcpy:
move a0, t0
move a1, t1
ret
END(__memmove)
byte_copy_forward:
beq t3, t4, 2f
1:
lb t1, 0(a1)
addi a1, a1, 1
addi t3, t3, 1
sb t1, -1(t3)
bne t3, t4, 1b
2:
ret
byte_copy_reverse:
beq t4, t3, 2f
1:
lb t1, -1(a4)
addi a4, a4, -1
addi t4, t4, -1
sb t1, 0(t4)
bne t4, t3, 1b
2:
return_from_memmove:
ret
SYM_FUNC_END(memmove)
SYM_FUNC_END(__memmove)

View file

@ -38,14 +38,16 @@ EXPORT_SYMBOL(kernel_map);
#endif
#ifdef CONFIG_64BIT
u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39;
u64 satp_mode __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39;
#else
u64 satp_mode = SATP_MODE_32;
u64 satp_mode __ro_after_init = SATP_MODE_32;
#endif
EXPORT_SYMBOL(satp_mode);
bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
EXPORT_SYMBOL(pgtable_l4_enabled);
EXPORT_SYMBOL(pgtable_l5_enabled);
phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);
@ -227,6 +229,7 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
@ -318,6 +321,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd))
#endif /* CONFIG_XIP_KERNEL */
static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss;
static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
#ifdef CONFIG_XIP_KERNEL
#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d))
#define fixmap_p4d ((p4d_t *)XIP_FIXUP(fixmap_p4d))
#define early_p4d ((p4d_t *)XIP_FIXUP(early_p4d))
#endif /* CONFIG_XIP_KERNEL */
static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
@ -432,6 +445,44 @@ static phys_addr_t alloc_pud_late(uintptr_t va)
return __pa(vaddr);
}
static p4d_t *__init get_p4d_virt_early(phys_addr_t pa)
{
return (p4d_t *)((uintptr_t)pa);
}
static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa)
{
clear_fixmap(FIX_P4D);
return (p4d_t *)set_fixmap_offset(FIX_P4D, pa);
}
static p4d_t *__init get_p4d_virt_late(phys_addr_t pa)
{
return (p4d_t *)__va(pa);
}
static phys_addr_t __init alloc_p4d_early(uintptr_t va)
{
/* Only one P4D is available for early mapping */
BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
return (uintptr_t)early_p4d;
}
static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va)
{
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}
static phys_addr_t alloc_p4d_late(uintptr_t va)
{
unsigned long vaddr;
vaddr = __get_free_page(GFP_KERNEL);
BUG_ON(!vaddr);
return __pa(vaddr);
}
static void __init create_pud_mapping(pud_t *pudp,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
@ -459,21 +510,55 @@ static void __init create_pud_mapping(pud_t *pudp,
create_pmd_mapping(nextp, va, pa, sz, prot);
}
#define pgd_next_t pud_t
#define alloc_pgd_next(__va) (pgtable_l4_enabled ? \
pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))
#define get_pgd_next_virt(__pa) (pgtable_l4_enabled ? \
pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa))
static void __init create_p4d_mapping(p4d_t *p4dp,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
{
pud_t *nextp;
phys_addr_t next_phys;
uintptr_t p4d_index = p4d_index(va);
if (sz == P4D_SIZE) {
if (p4d_val(p4dp[p4d_index]) == 0)
p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot);
return;
}
if (p4d_val(p4dp[p4d_index]) == 0) {
next_phys = pt_ops.alloc_pud(va);
p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE);
nextp = pt_ops.get_pud_virt(next_phys);
memset(nextp, 0, PAGE_SIZE);
} else {
next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index]));
nextp = pt_ops.get_pud_virt(next_phys);
}
create_pud_mapping(nextp, va, pa, sz, prot);
}
#define pgd_next_t p4d_t
#define alloc_pgd_next(__va) (pgtable_l5_enabled ? \
pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ? \
pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)))
#define get_pgd_next_virt(__pa) (pgtable_l5_enabled ? \
pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ? \
pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa)))
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
(pgtable_l5_enabled ? \
create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \
(pgtable_l4_enabled ? \
create_pud_mapping(__nextp, __va, __pa, __sz, __prot) : \
create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))
#define fixmap_pgd_next (pgtable_l4_enabled ? \
(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)
#define trampoline_pgd_next (pgtable_l4_enabled ? \
(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)
#define early_dtb_pgd_next (pgtable_l4_enabled ? \
(uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)
create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) : \
create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)))
#define fixmap_pgd_next (pgtable_l5_enabled ? \
(uintptr_t)fixmap_p4d : (pgtable_l4_enabled ? \
(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd))
#define trampoline_pgd_next (pgtable_l5_enabled ? \
(uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \
(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
#define early_dtb_pgd_next (pgtable_l5_enabled ? \
(uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \
(uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd))
#else
#define pgd_next_t pte_t
#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
@ -482,6 +567,7 @@ static void __init create_pud_mapping(pud_t *pudp,
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
#define fixmap_pgd_next ((uintptr_t)fixmap_pte)
#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd)
#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot)
#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot)
#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
#endif /* __PAGETABLE_PMD_FOLDED */
@ -575,6 +661,13 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
#endif /* CONFIG_STRICT_KERNEL_RWX */
#ifdef CONFIG_64BIT
static void __init disable_pgtable_l5(void)
{
pgtable_l5_enabled = false;
kernel_map.page_offset = PAGE_OFFSET_L4;
satp_mode = SATP_MODE_48;
}
static void __init disable_pgtable_l4(void)
{
pgtable_l4_enabled = false;
@ -591,12 +684,12 @@ static void __init disable_pgtable_l4(void)
static __init void set_satp_mode(void)
{
u64 identity_satp, hw_satp;
uintptr_t set_satp_mode_pmd;
uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
bool check_l4 = false;
set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
create_pgd_mapping(early_pg_dir,
set_satp_mode_pmd, (uintptr_t)early_pud,
PGDIR_SIZE, PAGE_TABLE);
create_p4d_mapping(early_p4d,
set_satp_mode_pmd, (uintptr_t)early_pud,
P4D_SIZE, PAGE_TABLE);
create_pud_mapping(early_pud,
set_satp_mode_pmd, (uintptr_t)early_pmd,
PUD_SIZE, PAGE_TABLE);
@ -608,6 +701,11 @@ static __init void set_satp_mode(void)
set_satp_mode_pmd + PMD_SIZE,
set_satp_mode_pmd + PMD_SIZE,
PMD_SIZE, PAGE_KERNEL_EXEC);
retry:
create_pgd_mapping(early_pg_dir,
set_satp_mode_pmd,
check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d,
PGDIR_SIZE, PAGE_TABLE);
identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
@ -616,10 +714,17 @@ static __init void set_satp_mode(void)
hw_satp = csr_swap(CSR_SATP, 0ULL);
local_flush_tlb_all();
if (hw_satp != identity_satp)
if (hw_satp != identity_satp) {
if (!check_l4) {
disable_pgtable_l5();
check_l4 = true;
goto retry;
}
disable_pgtable_l4();
}
memset(early_pg_dir, 0, PAGE_SIZE);
memset(early_p4d, 0, PAGE_SIZE);
memset(early_pud, 0, PAGE_SIZE);
memset(early_pmd, 0, PAGE_SIZE);
}
@ -693,10 +798,13 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
PGDIR_SIZE,
IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
if (pgtable_l4_enabled) {
if (pgtable_l5_enabled)
create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
(uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
if (pgtable_l4_enabled)
create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
(uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
}
if (IS_ENABLED(CONFIG_64BIT)) {
create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
@ -732,6 +840,8 @@ void __init pt_ops_set_early(void)
pt_ops.get_pmd_virt = get_pmd_virt_early;
pt_ops.alloc_pud = alloc_pud_early;
pt_ops.get_pud_virt = get_pud_virt_early;
pt_ops.alloc_p4d = alloc_p4d_early;
pt_ops.get_p4d_virt = get_p4d_virt_early;
#endif
}
@ -752,6 +862,8 @@ void __init pt_ops_set_fixmap(void)
pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap);
pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap);
pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap);
pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap);
pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap);
#endif
}
@ -768,6 +880,8 @@ void __init pt_ops_set_late(void)
pt_ops.get_pmd_virt = get_pmd_virt_late;
pt_ops.alloc_pud = alloc_pud_late;
pt_ops.get_pud_virt = get_pud_virt_late;
pt_ops.alloc_p4d = alloc_p4d_late;
pt_ops.get_p4d_virt = get_p4d_virt_late;
#endif
}
@ -828,6 +942,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
#ifndef __PAGETABLE_PMD_FOLDED
/* Setup fixmap P4D and PUD */
if (pgtable_l5_enabled)
create_p4d_mapping(fixmap_p4d, FIXADDR_START,
(uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
/* Setup fixmap PUD and PMD */
if (pgtable_l4_enabled)
create_pud_mapping(fixmap_pud, FIXADDR_START,
@ -837,6 +955,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
/* Setup trampoline PGD and PMD */
create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
if (pgtable_l5_enabled)
create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
(uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
if (pgtable_l4_enabled)
create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
(uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
@ -938,6 +1059,7 @@ static void __init setup_vm_final(void)
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
clear_fixmap(FIX_PUD);
clear_fixmap(FIX_P4D);
/* Move to swapper page table */
csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode);

View file

@ -111,6 +111,8 @@ static void __init kasan_populate_pud(pgd_t *pgd,
* pt_ops facility.
*/
base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd)));
} else if (pgd_none(*pgd)) {
base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
} else {
base_pud = (pud_t *)pgd_page_vaddr(*pgd);
if (base_pud == lm_alias(kasan_early_shadow_pud)) {
@ -152,13 +154,72 @@ static void __init kasan_populate_pud(pgd_t *pgd,
set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
}
#define kasan_early_shadow_pgd_next (pgtable_l4_enabled ? \
static void __init kasan_populate_p4d(pgd_t *pgd,
unsigned long vaddr, unsigned long end,
bool early)
{
phys_addr_t phys_addr;
p4d_t *p4dp, *base_p4d;
unsigned long next;
if (early) {
/*
* We can't use pgd_page_vaddr here as it would return a linear
* mapping address but it is not mapped yet, but when populating
* early_pg_dir, we need the physical address and when populating
* swapper_pg_dir, we need the kernel virtual address so use
* pt_ops facility.
*/
base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgd)));
} else {
base_p4d = (p4d_t *)pgd_page_vaddr(*pgd);
if (base_p4d == lm_alias(kasan_early_shadow_p4d))
base_p4d = memblock_alloc(PTRS_PER_PUD * sizeof(p4d_t), PAGE_SIZE);
}
p4dp = base_p4d + p4d_index(vaddr);
do {
next = p4d_addr_end(vaddr, end);
if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) {
if (early) {
phys_addr = __pa(((uintptr_t)kasan_early_shadow_pud));
set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE));
continue;
} else {
phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE);
if (phys_addr) {
set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL));
continue;
}
}
}
kasan_populate_pud((pgd_t *)p4dp, vaddr, next, early);
} while (p4dp++, vaddr = next, vaddr != end);
/*
* Wait for the whole P4D to be populated before setting the P4D in
* the page table, otherwise, if we did set the P4D before populating
* it entirely, memblock could allocate a page at a physical address
* where KASAN is not populated yet and then we'd get a page fault.
*/
if (!early)
set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_p4d)), PAGE_TABLE));
}
#define kasan_early_shadow_pgd_next (pgtable_l5_enabled ? \
(uintptr_t)kasan_early_shadow_p4d : \
(pgtable_l4_enabled ? \
(uintptr_t)kasan_early_shadow_pud : \
(uintptr_t)kasan_early_shadow_pmd)
(uintptr_t)kasan_early_shadow_pmd))
#define kasan_populate_pgd_next(pgdp, vaddr, next, early) \
(pgtable_l5_enabled ? \
kasan_populate_p4d(pgdp, vaddr, next, early) : \
(pgtable_l4_enabled ? \
kasan_populate_pud(pgdp, vaddr, next, early) : \
kasan_populate_pmd((pud_t *)pgdp, vaddr, next))
kasan_populate_pmd((pud_t *)pgdp, vaddr, next)))
static void __init kasan_populate_pgd(pgd_t *pgdp,
unsigned long vaddr, unsigned long end,
@ -221,6 +282,14 @@ asmlinkage void __init kasan_early_init(void)
PAGE_TABLE));
}
if (pgtable_l5_enabled) {
for (i = 0; i < PTRS_PER_P4D; ++i)
set_p4d(kasan_early_shadow_p4d + i,
pfn_p4d(PFN_DOWN
(__pa(((uintptr_t)kasan_early_shadow_pud))),
PAGE_TABLE));
}
kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START),
KASAN_SHADOW_START, KASAN_SHADOW_END, true);
@ -246,9 +315,27 @@ static void __init kasan_populate(void *start, void *end)
memset(start, KASAN_SHADOW_INIT, end - start);
}
static void __init kasan_shallow_populate_pmd(pgd_t *pgdp,
unsigned long vaddr, unsigned long end)
{
unsigned long next;
pmd_t *pmdp, *base_pmd;
bool is_kasan_pte;
base_pmd = (pmd_t *)pgd_page_vaddr(*pgdp);
pmdp = base_pmd + pmd_index(vaddr);
do {
next = pmd_addr_end(vaddr, end);
is_kasan_pte = (pmd_pgtable(*pmdp) == lm_alias(kasan_early_shadow_pte));
if (is_kasan_pte)
pmd_clear(pmdp);
} while (pmdp++, vaddr = next, vaddr != end);
}
static void __init kasan_shallow_populate_pud(pgd_t *pgdp,
unsigned long vaddr, unsigned long end,
bool kasan_populate)
unsigned long vaddr, unsigned long end)
{
unsigned long next;
pud_t *pudp, *base_pud;
@ -258,21 +345,60 @@ static void __init kasan_shallow_populate_pud(pgd_t *pgdp,
base_pud = (pud_t *)pgd_page_vaddr(*pgdp);
pudp = base_pud + pud_index(vaddr);
if (kasan_populate)
memcpy(base_pud, (void *)kasan_early_shadow_pgd_next,
sizeof(pud_t) * PTRS_PER_PUD);
do {
next = pud_addr_end(vaddr, end);
is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd));
if (is_kasan_pmd) {
base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
}
if (!is_kasan_pmd)
continue;
base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
if (IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE)
continue;
memcpy(base_pmd, (void *)kasan_early_shadow_pmd, PAGE_SIZE);
kasan_shallow_populate_pmd((pgd_t *)pudp, vaddr, next);
} while (pudp++, vaddr = next, vaddr != end);
}
static void __init kasan_shallow_populate_p4d(pgd_t *pgdp,
unsigned long vaddr, unsigned long end)
{
unsigned long next;
p4d_t *p4dp, *base_p4d;
pud_t *base_pud;
bool is_kasan_pud;
base_p4d = (p4d_t *)pgd_page_vaddr(*pgdp);
p4dp = base_p4d + p4d_index(vaddr);
do {
next = p4d_addr_end(vaddr, end);
is_kasan_pud = (p4d_pgtable(*p4dp) == lm_alias(kasan_early_shadow_pud));
if (!is_kasan_pud)
continue;
base_pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_p4d(p4dp, pfn_p4d(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
if (IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE)
continue;
memcpy(base_pud, (void *)kasan_early_shadow_pud, PAGE_SIZE);
kasan_shallow_populate_pud((pgd_t *)p4dp, vaddr, next);
} while (p4dp++, vaddr = next, vaddr != end);
}
#define kasan_shallow_populate_pgd_next(pgdp, vaddr, next) \
(pgtable_l5_enabled ? \
kasan_shallow_populate_p4d(pgdp, vaddr, next) : \
(pgtable_l4_enabled ? \
kasan_shallow_populate_pud(pgdp, vaddr, next) : \
kasan_shallow_populate_pmd(pgdp, vaddr, next)))
static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end)
{
unsigned long next;
@ -293,7 +419,8 @@ static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long
if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE)
continue;
kasan_shallow_populate_pud(pgd_k, vaddr, next, is_kasan_pgd_next);
memcpy(p, (void *)kasan_early_shadow_pgd_next, PAGE_SIZE);
kasan_shallow_populate_pgd_next(pgd_k, vaddr, next);
} while (pgd_k++, vaddr = next, vaddr != end);
}

View file

@ -56,6 +56,36 @@ config ARM_PMU
Say y if you want to use CPU performance monitors on ARM-based
systems.
config RISCV_PMU
depends on RISCV
bool "RISC-V PMU framework"
default y
help
Say y if you want to use CPU performance monitors on RISCV-based
systems. This provides the core PMU framework that abstracts common
PMU functionalities in a core library so that different PMU drivers
can reuse it.
config RISCV_PMU_LEGACY
depends on RISCV_PMU
bool "RISC-V legacy PMU implementation"
default y
help
Say y if you want to use the legacy CPU performance monitor
implementation on RISC-V based systems. This only allows counting
of cycle/instruction counter and doesn't support counter overflow,
or programmable counters. It will be removed in future.
config RISCV_PMU_SBI
depends on RISCV_PMU && RISCV_SBI
bool "RISC-V PMU based on SBI PMU extension"
default y
help
Say y if you want to use the CPU performance monitor
using SBI PMU extension on RISC-V based systems. This option provides
full perf feature support i.e. counter overflow, privilege mode
filtering, counter configuration.
config ARM_PMU_ACPI
depends on ARM_PMU && ACPI
def_bool y

View file

@ -10,6 +10,9 @@ obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
obj-$(CONFIG_HISI_PMU) += hisilicon/
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
obj-$(CONFIG_RISCV_PMU) += riscv_pmu.o
obj-$(CONFIG_RISCV_PMU_LEGACY) += riscv_pmu_legacy.o
obj-$(CONFIG_RISCV_PMU_SBI) += riscv_pmu_sbi.o
obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o

324
drivers/perf/riscv_pmu.c Normal file
View file

@ -0,0 +1,324 @@
// SPDX-License-Identifier: GPL-2.0
/*
* RISC-V performance counter support.
*
* Copyright (C) 2021 Western Digital Corporation or its affiliates.
*
* This implementation is based on old RISC-V perf and ARM perf event code
* which are in turn based on sparc64 and x86 code.
*/
#include <linux/cpumask.h>
#include <linux/irq.h>
#include <linux/irqdesc.h>
#include <linux/perf/riscv_pmu.h>
#include <linux/printk.h>
#include <linux/smp.h>
#include <asm/sbi.h>
static unsigned long csr_read_num(int csr_num)
{
#define switchcase_csr_read(__csr_num, __val) {\
case __csr_num: \
__val = csr_read(__csr_num); \
break; }
#define switchcase_csr_read_2(__csr_num, __val) {\
switchcase_csr_read(__csr_num + 0, __val) \
switchcase_csr_read(__csr_num + 1, __val)}
#define switchcase_csr_read_4(__csr_num, __val) {\
switchcase_csr_read_2(__csr_num + 0, __val) \
switchcase_csr_read_2(__csr_num + 2, __val)}
#define switchcase_csr_read_8(__csr_num, __val) {\
switchcase_csr_read_4(__csr_num + 0, __val) \
switchcase_csr_read_4(__csr_num + 4, __val)}
#define switchcase_csr_read_16(__csr_num, __val) {\
switchcase_csr_read_8(__csr_num + 0, __val) \
switchcase_csr_read_8(__csr_num + 8, __val)}
#define switchcase_csr_read_32(__csr_num, __val) {\
switchcase_csr_read_16(__csr_num + 0, __val) \
switchcase_csr_read_16(__csr_num + 16, __val)}
unsigned long ret = 0;
switch (csr_num) {
switchcase_csr_read_32(CSR_CYCLE, ret)
switchcase_csr_read_32(CSR_CYCLEH, ret)
default :
break;
}
return ret;
#undef switchcase_csr_read_32
#undef switchcase_csr_read_16
#undef switchcase_csr_read_8
#undef switchcase_csr_read_4
#undef switchcase_csr_read_2
#undef switchcase_csr_read
}
/*
* Read the CSR of a corresponding counter.
*/
unsigned long riscv_pmu_ctr_read_csr(unsigned long csr)
{
if (csr < CSR_CYCLE || csr > CSR_HPMCOUNTER31H ||
(csr > CSR_HPMCOUNTER31 && csr < CSR_CYCLEH)) {
pr_err("Invalid performance counter csr %lx\n", csr);
return -EINVAL;
}
return csr_read_num(csr);
}
u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event)
{
int cwidth;
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
if (!rvpmu->ctr_get_width)
/**
* If the pmu driver doesn't support counter width, set it to default
* maximum allowed by the specification.
*/
cwidth = 63;
else {
if (hwc->idx == -1)
/* Handle init case where idx is not initialized yet */
cwidth = rvpmu->ctr_get_width(0);
else
cwidth = rvpmu->ctr_get_width(hwc->idx);
}
return GENMASK_ULL(cwidth, 0);
}
u64 riscv_pmu_event_update(struct perf_event *event)
{
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
u64 prev_raw_count, new_raw_count;
unsigned long cmask;
u64 oldval, delta;
if (!rvpmu->ctr_read)
return 0;
cmask = riscv_pmu_ctr_get_width_mask(event);
do {
prev_raw_count = local64_read(&hwc->prev_count);
new_raw_count = rvpmu->ctr_read(event);
oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count);
} while (oldval != prev_raw_count);
delta = (new_raw_count - prev_raw_count) & cmask;
local64_add(delta, &event->count);
local64_sub(delta, &hwc->period_left);
return delta;
}
static void riscv_pmu_stop(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
if (!(hwc->state & PERF_HES_STOPPED)) {
if (rvpmu->ctr_stop) {
rvpmu->ctr_stop(event, 0);
hwc->state |= PERF_HES_STOPPED;
}
riscv_pmu_event_update(event);
hwc->state |= PERF_HES_UPTODATE;
}
}
int riscv_pmu_event_set_period(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
s64 left = local64_read(&hwc->period_left);
s64 period = hwc->sample_period;
int overflow = 0;
uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
if (unlikely(left <= -period)) {
left = period;
local64_set(&hwc->period_left, left);
hwc->last_period = period;
overflow = 1;
}
if (unlikely(left <= 0)) {
left += period;
local64_set(&hwc->period_left, left);
hwc->last_period = period;
overflow = 1;
}
/*
* Limit the maximum period to prevent the counter value
* from overtaking the one we are about to program. In
* effect we are reducing max_period to account for
* interrupt latency (and we are being very conservative).
*/
if (left > (max_period >> 1))
left = (max_period >> 1);
local64_set(&hwc->prev_count, (u64)-left);
perf_event_update_userpage(event);
return overflow;
}
static void riscv_pmu_start(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
u64 init_val;
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
return;
if (flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
hwc->state = 0;
riscv_pmu_event_set_period(event);
init_val = local64_read(&hwc->prev_count) & max_period;
rvpmu->ctr_start(event, init_val);
perf_event_update_userpage(event);
}
static int riscv_pmu_add(struct perf_event *event, int flags)
{
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx;
idx = rvpmu->ctr_get_idx(event);
if (idx < 0)
return idx;
hwc->idx = idx;
cpuc->events[idx] = event;
cpuc->n_events++;
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (flags & PERF_EF_START)
riscv_pmu_start(event, PERF_EF_RELOAD);
/* Propagate our changes to the userspace mapping. */
perf_event_update_userpage(event);
return 0;
}
static void riscv_pmu_del(struct perf_event *event, int flags)
{
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
struct hw_perf_event *hwc = &event->hw;
riscv_pmu_stop(event, PERF_EF_UPDATE);
cpuc->events[hwc->idx] = NULL;
/* The firmware need to reset the counter mapping */
if (rvpmu->ctr_stop)
rvpmu->ctr_stop(event, RISCV_PMU_STOP_FLAG_RESET);
cpuc->n_events--;
if (rvpmu->ctr_clear_idx)
rvpmu->ctr_clear_idx(event);
perf_event_update_userpage(event);
hwc->idx = -1;
}
static void riscv_pmu_read(struct perf_event *event)
{
riscv_pmu_event_update(event);
}
static int riscv_pmu_event_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
int mapped_event;
u64 event_config = 0;
uint64_t cmask;
hwc->flags = 0;
mapped_event = rvpmu->event_map(event, &event_config);
if (mapped_event < 0) {
pr_debug("event %x:%llx not supported\n", event->attr.type,
event->attr.config);
return mapped_event;
}
/*
* idx is set to -1 because the index of a general event should not be
* decided until binding to some counter in pmu->add().
* config will contain the information about counter CSR
* the idx will contain the counter index
*/
hwc->config = event_config;
hwc->idx = -1;
hwc->event_base = mapped_event;
if (!is_sampling_event(event)) {
/*
* For non-sampling runs, limit the sample_period to half
* of the counter width. That way, the new counter value
* is far less likely to overtake the previous one unless
* you have some serious IRQ latency issues.
*/
cmask = riscv_pmu_ctr_get_width_mask(event);
hwc->sample_period = cmask >> 1;
hwc->last_period = hwc->sample_period;
local64_set(&hwc->period_left, hwc->sample_period);
}
return 0;
}
struct riscv_pmu *riscv_pmu_alloc(void)
{
struct riscv_pmu *pmu;
int cpuid, i;
struct cpu_hw_events *cpuc;
pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
if (!pmu)
goto out;
pmu->hw_events = alloc_percpu_gfp(struct cpu_hw_events, GFP_KERNEL);
if (!pmu->hw_events) {
pr_info("failed to allocate per-cpu PMU data.\n");
goto out_free_pmu;
}
for_each_possible_cpu(cpuid) {
cpuc = per_cpu_ptr(pmu->hw_events, cpuid);
cpuc->n_events = 0;
for (i = 0; i < RISCV_MAX_COUNTERS; i++)
cpuc->events[i] = NULL;
}
pmu->pmu = (struct pmu) {
.event_init = riscv_pmu_event_init,
.add = riscv_pmu_add,
.del = riscv_pmu_del,
.start = riscv_pmu_start,
.stop = riscv_pmu_stop,
.read = riscv_pmu_read,
};
return pmu;
out_free_pmu:
kfree(pmu);
out:
return NULL;
}

View file

@ -0,0 +1,142 @@
// SPDX-License-Identifier: GPL-2.0
/*
* RISC-V performance counter support.
*
* Copyright (C) 2021 Western Digital Corporation or its affiliates.
*
* This implementation is based on old RISC-V perf and ARM perf event code
* which are in turn based on sparc64 and x86 code.
*/
#include <linux/mod_devicetable.h>
#include <linux/perf/riscv_pmu.h>
#include <linux/platform_device.h>
#define RISCV_PMU_LEGACY_CYCLE 0
#define RISCV_PMU_LEGACY_INSTRET 1
#define RISCV_PMU_LEGACY_NUM_CTR 2
static bool pmu_init_done;
static int pmu_legacy_ctr_get_idx(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
if (event->attr.type != PERF_TYPE_HARDWARE)
return -EOPNOTSUPP;
if (attr->config == PERF_COUNT_HW_CPU_CYCLES)
return RISCV_PMU_LEGACY_CYCLE;
else if (attr->config == PERF_COUNT_HW_INSTRUCTIONS)
return RISCV_PMU_LEGACY_INSTRET;
else
return -EOPNOTSUPP;
}
/* For legacy config & counter index are same */
static int pmu_legacy_event_map(struct perf_event *event, u64 *config)
{
return pmu_legacy_ctr_get_idx(event);
}
static u64 pmu_legacy_read_ctr(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
u64 val;
if (idx == RISCV_PMU_LEGACY_CYCLE) {
val = riscv_pmu_ctr_read_csr(CSR_CYCLE);
if (IS_ENABLED(CONFIG_32BIT))
val = (u64)riscv_pmu_ctr_read_csr(CSR_CYCLEH) << 32 | val;
} else if (idx == RISCV_PMU_LEGACY_INSTRET) {
val = riscv_pmu_ctr_read_csr(CSR_INSTRET);
if (IS_ENABLED(CONFIG_32BIT))
val = ((u64)riscv_pmu_ctr_read_csr(CSR_INSTRETH)) << 32 | val;
} else
return 0;
return val;
}
static void pmu_legacy_ctr_start(struct perf_event *event, u64 ival)
{
struct hw_perf_event *hwc = &event->hw;
u64 initial_val = pmu_legacy_read_ctr(event);
/**
* The legacy method doesn't really have a start/stop method.
* It also can not update the counter with a initial value.
* But we still need to set the prev_count so that read() can compute
* the delta. Just use the current counter value to set the prev_count.
*/
local64_set(&hwc->prev_count, initial_val);
}
/**
* This is just a simple implementation to allow legacy implementations
* compatible with new RISC-V PMU driver framework.
* This driver only allows reading two counters i.e CYCLE & INSTRET.
* However, it can not start or stop the counter. Thus, it is not very useful
* will be removed in future.
*/
static void pmu_legacy_init(struct riscv_pmu *pmu)
{
pr_info("Legacy PMU implementation is available\n");
pmu->num_counters = RISCV_PMU_LEGACY_NUM_CTR;
pmu->ctr_start = pmu_legacy_ctr_start;
pmu->ctr_stop = NULL;
pmu->event_map = pmu_legacy_event_map;
pmu->ctr_get_idx = pmu_legacy_ctr_get_idx;
pmu->ctr_get_width = NULL;
pmu->ctr_clear_idx = NULL;
pmu->ctr_read = pmu_legacy_read_ctr;
perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
}
static int pmu_legacy_device_probe(struct platform_device *pdev)
{
struct riscv_pmu *pmu = NULL;
pmu = riscv_pmu_alloc();
if (!pmu)
return -ENOMEM;
pmu_legacy_init(pmu);
return 0;
}
static struct platform_driver pmu_legacy_driver = {
.probe = pmu_legacy_device_probe,
.driver = {
.name = RISCV_PMU_LEGACY_PDEV_NAME,
},
};
static int __init riscv_pmu_legacy_devinit(void)
{
int ret;
struct platform_device *pdev;
if (likely(pmu_init_done))
return 0;
ret = platform_driver_register(&pmu_legacy_driver);
if (ret)
return ret;
pdev = platform_device_register_simple(RISCV_PMU_LEGACY_PDEV_NAME, -1, NULL, 0);
if (IS_ERR(pdev)) {
platform_driver_unregister(&pmu_legacy_driver);
return PTR_ERR(pdev);
}
return ret;
}
late_initcall(riscv_pmu_legacy_devinit);
void riscv_pmu_legacy_skip_init(void)
{
pmu_init_done = true;
}

View file

@ -0,0 +1,790 @@
// SPDX-License-Identifier: GPL-2.0
/*
* RISC-V performance counter support.
*
* Copyright (C) 2021 Western Digital Corporation or its affiliates.
*
* This code is based on ARM perf event code which is in turn based on
* sparc64 and x86 code.
*/
#define pr_fmt(fmt) "riscv-pmu-sbi: " fmt
#include <linux/mod_devicetable.h>
#include <linux/perf/riscv_pmu.h>
#include <linux/platform_device.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/of_irq.h>
#include <linux/of.h>
#include <asm/sbi.h>
#include <asm/hwcap.h>
union sbi_pmu_ctr_info {
unsigned long value;
struct {
unsigned long csr:12;
unsigned long width:6;
#if __riscv_xlen == 32
unsigned long reserved:13;
#else
unsigned long reserved:45;
#endif
unsigned long type:1;
};
};
/**
* RISC-V doesn't have hetergenous harts yet. This need to be part of
* per_cpu in case of harts with different pmu counters
*/
static union sbi_pmu_ctr_info *pmu_ctr_list;
static unsigned int riscv_pmu_irq;
struct sbi_pmu_event_data {
union {
union {
struct hw_gen_event {
uint32_t event_code:16;
uint32_t event_type:4;
uint32_t reserved:12;
} hw_gen_event;
struct hw_cache_event {
uint32_t result_id:1;
uint32_t op_id:2;
uint32_t cache_id:13;
uint32_t event_type:4;
uint32_t reserved:12;
} hw_cache_event;
};
uint32_t event_idx;
};
};
static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
[PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = {
SBI_PMU_HW_CPU_CYCLES,
SBI_PMU_EVENT_TYPE_HW, 0}},
[PERF_COUNT_HW_INSTRUCTIONS] = {.hw_gen_event = {
SBI_PMU_HW_INSTRUCTIONS,
SBI_PMU_EVENT_TYPE_HW, 0}},
[PERF_COUNT_HW_CACHE_REFERENCES] = {.hw_gen_event = {
SBI_PMU_HW_CACHE_REFERENCES,
SBI_PMU_EVENT_TYPE_HW, 0}},
[PERF_COUNT_HW_CACHE_MISSES] = {.hw_gen_event = {
SBI_PMU_HW_CACHE_MISSES,
SBI_PMU_EVENT_TYPE_HW, 0}},
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {.hw_gen_event = {
SBI_PMU_HW_BRANCH_INSTRUCTIONS,
SBI_PMU_EVENT_TYPE_HW, 0}},
[PERF_COUNT_HW_BRANCH_MISSES] = {.hw_gen_event = {
SBI_PMU_HW_BRANCH_MISSES,
SBI_PMU_EVENT_TYPE_HW, 0}},
[PERF_COUNT_HW_BUS_CYCLES] = {.hw_gen_event = {
SBI_PMU_HW_BUS_CYCLES,
SBI_PMU_EVENT_TYPE_HW, 0}},
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {.hw_gen_event = {
SBI_PMU_HW_STALLED_CYCLES_FRONTEND,
SBI_PMU_EVENT_TYPE_HW, 0}},
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = {.hw_gen_event = {
SBI_PMU_HW_STALLED_CYCLES_BACKEND,
SBI_PMU_EVENT_TYPE_HW, 0}},
[PERF_COUNT_HW_REF_CPU_CYCLES] = {.hw_gen_event = {
SBI_PMU_HW_REF_CPU_CYCLES,
SBI_PMU_EVENT_TYPE_HW, 0}},
};
#define C(x) PERF_COUNT_HW_CACHE_##x
static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_READ), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), C(OP_READ),
C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
},
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
},
[C(NODE)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
},
},
};
static int pmu_sbi_ctr_get_width(int idx)
{
return pmu_ctr_list[idx].width;
}
static bool pmu_sbi_ctr_is_fw(int cidx)
{
union sbi_pmu_ctr_info *info;
info = &pmu_ctr_list[cidx];
if (!info)
return false;
return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false;
}
static int pmu_sbi_ctr_get_idx(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
struct sbiret ret;
int idx;
uint64_t cbase = 0;
uint64_t cmask = GENMASK_ULL(rvpmu->num_counters - 1, 0);
unsigned long cflags = 0;
if (event->attr.exclude_kernel)
cflags |= SBI_PMU_CFG_FLAG_SET_SINH;
if (event->attr.exclude_user)
cflags |= SBI_PMU_CFG_FLAG_SET_UINH;
/* retrieve the available counter index */
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask,
cflags, hwc->event_base, hwc->config, 0);
if (ret.error) {
pr_debug("Not able to find a counter for event %lx config %llx\n",
hwc->event_base, hwc->config);
return sbi_err_map_linux_errno(ret.error);
}
idx = ret.value;
if (idx >= rvpmu->num_counters || !pmu_ctr_list[idx].value)
return -ENOENT;
/* Additional sanity check for the counter id */
if (pmu_sbi_ctr_is_fw(idx)) {
if (!test_and_set_bit(idx, cpuc->used_fw_ctrs))
return idx;
} else {
if (!test_and_set_bit(idx, cpuc->used_hw_ctrs))
return idx;
}
return -ENOENT;
}
static void pmu_sbi_ctr_clear_idx(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
int idx = hwc->idx;
if (pmu_sbi_ctr_is_fw(idx))
clear_bit(idx, cpuc->used_fw_ctrs);
else
clear_bit(idx, cpuc->used_hw_ctrs);
}
static int pmu_event_find_cache(u64 config)
{
unsigned int cache_type, cache_op, cache_result, ret;
cache_type = (config >> 0) & 0xff;
if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
return -EINVAL;
cache_op = (config >> 8) & 0xff;
if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
return -EINVAL;
cache_result = (config >> 16) & 0xff;
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return -EINVAL;
ret = pmu_cache_event_map[cache_type][cache_op][cache_result].event_idx;
return ret;
}
static bool pmu_sbi_is_fw_event(struct perf_event *event)
{
u32 type = event->attr.type;
u64 config = event->attr.config;
if ((type == PERF_TYPE_RAW) && ((config >> 63) == 1))
return true;
else
return false;
}
static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
{
u32 type = event->attr.type;
u64 config = event->attr.config;
int bSoftware;
u64 raw_config_val;
int ret;
switch (type) {
case PERF_TYPE_HARDWARE:
if (config >= PERF_COUNT_HW_MAX)
return -EINVAL;
ret = pmu_hw_event_map[event->attr.config].event_idx;
break;
case PERF_TYPE_HW_CACHE:
ret = pmu_event_find_cache(config);
break;
case PERF_TYPE_RAW:
/*
* As per SBI specification, the upper 16 bits must be unused for
* a raw event. Use the MSB (63b) to distinguish between hardware
* raw event and firmware events.
*/
bSoftware = config >> 63;
raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK;
if (bSoftware) {
if (raw_config_val < SBI_PMU_FW_MAX)
ret = (raw_config_val & 0xFFFF) |
(SBI_PMU_EVENT_TYPE_FW << 16);
else
return -EINVAL;
} else {
ret = RISCV_PMU_RAW_EVENT_IDX;
*econfig = raw_config_val;
}
break;
default:
ret = -EINVAL;
break;
}
return ret;
}
static u64 pmu_sbi_ctr_read(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
struct sbiret ret;
union sbi_pmu_ctr_info info;
u64 val = 0;
if (pmu_sbi_is_fw_event(event)) {
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
hwc->idx, 0, 0, 0, 0, 0);
if (!ret.error)
val = ret.value;
} else {
info = pmu_ctr_list[idx];
val = riscv_pmu_ctr_read_csr(info.csr);
if (IS_ENABLED(CONFIG_32BIT))
val = ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 31 | val;
}
return val;
}
static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
{
struct sbiret ret;
struct hw_perf_event *hwc = &event->hw;
unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx,
1, flag, ival, ival >> 32, 0);
if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED))
pr_err("Starting counter idx %d failed with error %d\n",
hwc->idx, sbi_err_map_linux_errno(ret.error));
}
static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
{
struct sbiret ret;
struct hw_perf_event *hwc = &event->hw;
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
flag != SBI_PMU_STOP_FLAG_RESET)
pr_err("Stopping counter idx %d failed with error %d\n",
hwc->idx, sbi_err_map_linux_errno(ret.error));
}
static int pmu_sbi_find_num_ctrs(void)
{
struct sbiret ret;
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0);
if (!ret.error)
return ret.value;
else
return sbi_err_map_linux_errno(ret.error);
}
static int pmu_sbi_get_ctrinfo(int nctr)
{
struct sbiret ret;
int i, num_hw_ctr = 0, num_fw_ctr = 0;
union sbi_pmu_ctr_info cinfo;
pmu_ctr_list = kcalloc(nctr, sizeof(*pmu_ctr_list), GFP_KERNEL);
if (!pmu_ctr_list)
return -ENOMEM;
for (i = 0; i <= nctr; i++) {
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0);
if (ret.error)
/* The logical counter ids are not expected to be contiguous */
continue;
cinfo.value = ret.value;
if (cinfo.type == SBI_PMU_CTR_TYPE_FW)
num_fw_ctr++;
else
num_hw_ctr++;
pmu_ctr_list[i].value = cinfo.value;
}
pr_info("%d firmware and %d hardware counters\n", num_fw_ctr, num_hw_ctr);
return 0;
}
static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
{
/**
* No need to check the error because we are disabling all the counters
* which may include counters that are not enabled yet.
*/
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
0, GENMASK_ULL(pmu->num_counters - 1, 0), 0, 0, 0, 0);
}
static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
{
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
/* No need to check the error here as we can't do anything about the error */
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0,
cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0);
}
/**
* This function starts all the used counters in two step approach.
* Any counter that did not overflow can be start in a single step
* while the overflowed counters need to be started with updated initialization
* value.
*/
static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
unsigned long ctr_ovf_mask)
{
int idx = 0;
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
struct perf_event *event;
unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
unsigned long ctr_start_mask = 0;
uint64_t max_period;
struct hw_perf_event *hwc;
u64 init_val = 0;
ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask;
/* Start all the counters that did not overflow in a single shot */
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask,
0, 0, 0, 0);
/* Reinitialize and start all the counter that overflowed */
while (ctr_ovf_mask) {
if (ctr_ovf_mask & 0x01) {
event = cpu_hw_evt->events[idx];
hwc = &event->hw;
max_period = riscv_pmu_ctr_get_width_mask(event);
init_val = local64_read(&hwc->prev_count) & max_period;
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1,
flag, init_val, 0, 0);
}
ctr_ovf_mask = ctr_ovf_mask >> 1;
idx++;
}
}
static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
{
struct perf_sample_data data;
struct pt_regs *regs;
struct hw_perf_event *hw_evt;
union sbi_pmu_ctr_info *info;
int lidx, hidx, fidx;
struct riscv_pmu *pmu;
struct perf_event *event;
unsigned long overflow;
unsigned long overflowed_ctrs = 0;
struct cpu_hw_events *cpu_hw_evt = dev;
if (WARN_ON_ONCE(!cpu_hw_evt))
return IRQ_NONE;
/* Firmware counter don't support overflow yet */
fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS);
event = cpu_hw_evt->events[fidx];
if (!event) {
csr_clear(CSR_SIP, SIP_LCOFIP);
return IRQ_NONE;
}
pmu = to_riscv_pmu(event->pmu);
pmu_sbi_stop_hw_ctrs(pmu);
/* Overflow status register should only be read after counter are stopped */
overflow = csr_read(CSR_SSCOUNTOVF);
/**
* Overflow interrupt pending bit should only be cleared after stopping
* all the counters to avoid any race condition.
*/
csr_clear(CSR_SIP, SIP_LCOFIP);
/* No overflow bit is set */
if (!overflow)
return IRQ_NONE;
regs = get_irq_regs();
for_each_set_bit(lidx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) {
struct perf_event *event = cpu_hw_evt->events[lidx];
/* Skip if invalid event or user did not request a sampling */
if (!event || !is_sampling_event(event))
continue;
info = &pmu_ctr_list[lidx];
/* Do a sanity check */
if (!info || info->type != SBI_PMU_CTR_TYPE_HW)
continue;
/* compute hardware counter index */
hidx = info->csr - CSR_CYCLE;
/* check if the corresponding bit is set in sscountovf */
if (!(overflow & (1 << hidx)))
continue;
/*
* Keep a track of overflowed counters so that they can be started
* with updated initial value.
*/
overflowed_ctrs |= 1 << lidx;
hw_evt = &event->hw;
riscv_pmu_event_update(event);
perf_sample_data_init(&data, 0, hw_evt->last_period);
if (riscv_pmu_event_set_period(event)) {
/*
* Unlike other ISAs, RISC-V don't have to disable interrupts
* to avoid throttling here. As per the specification, the
* interrupt remains disabled until the OF bit is set.
* Interrupts are enabled again only during the start.
* TODO: We will need to stop the guest counters once
* virtualization support is added.
*/
perf_event_overflow(event, &data, regs);
}
}
pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
return IRQ_HANDLED;
}
static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
{
struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node);
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
/* Enable the access for TIME csr only from the user mode now */
csr_write(CSR_SCOUNTEREN, 0x2);
/* Stop all the counters so that they can be enabled from perf */
pmu_sbi_stop_all(pmu);
if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
cpu_hw_evt->irq = riscv_pmu_irq;
csr_clear(CSR_IP, BIT(RV_IRQ_PMU));
csr_set(CSR_IE, BIT(RV_IRQ_PMU));
enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE);
}
return 0;
}
static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node)
{
if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
disable_percpu_irq(riscv_pmu_irq);
csr_clear(CSR_IE, BIT(RV_IRQ_PMU));
}
/* Disable all counters access for user mode now */
csr_write(CSR_SCOUNTEREN, 0x0);
return 0;
}
static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pdev)
{
int ret;
struct cpu_hw_events __percpu *hw_events = pmu->hw_events;
struct device_node *cpu, *child;
struct irq_domain *domain = NULL;
if (!riscv_isa_extension_available(NULL, SSCOFPMF))
return -EOPNOTSUPP;
for_each_of_cpu_node(cpu) {
child = of_get_compatible_child(cpu, "riscv,cpu-intc");
if (!child) {
pr_err("Failed to find INTC node\n");
return -ENODEV;
}
domain = irq_find_host(child);
of_node_put(child);
if (domain)
break;
}
if (!domain) {
pr_err("Failed to find INTC IRQ root domain\n");
return -ENODEV;
}
riscv_pmu_irq = irq_create_mapping(domain, RV_IRQ_PMU);
if (!riscv_pmu_irq) {
pr_err("Failed to map PMU interrupt for node\n");
return -ENODEV;
}
ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, "riscv-pmu", hw_events);
if (ret) {
pr_err("registering percpu irq failed [%d]\n", ret);
return ret;
}
return 0;
}
static int pmu_sbi_device_probe(struct platform_device *pdev)
{
struct riscv_pmu *pmu = NULL;
int num_counters;
int ret = -ENODEV;
pr_info("SBI PMU extension is available\n");
pmu = riscv_pmu_alloc();
if (!pmu)
return -ENOMEM;
num_counters = pmu_sbi_find_num_ctrs();
if (num_counters < 0) {
pr_err("SBI PMU extension doesn't provide any counters\n");
goto out_free;
}
/* cache all the information about counters now */
if (pmu_sbi_get_ctrinfo(num_counters))
goto out_free;
ret = pmu_sbi_setup_irqs(pmu, pdev);
if (ret < 0) {
pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n");
pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
}
pmu->num_counters = num_counters;
pmu->ctr_start = pmu_sbi_ctr_start;
pmu->ctr_stop = pmu_sbi_ctr_stop;
pmu->event_map = pmu_sbi_event_map;
pmu->ctr_get_idx = pmu_sbi_ctr_get_idx;
pmu->ctr_get_width = pmu_sbi_ctr_get_width;
pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx;
pmu->ctr_read = pmu_sbi_ctr_read;
ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
if (ret)
return ret;
ret = perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
if (ret) {
cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
return ret;
}
return 0;
out_free:
kfree(pmu);
return ret;
}
static struct platform_driver pmu_sbi_driver = {
.probe = pmu_sbi_device_probe,
.driver = {
.name = RISCV_PMU_PDEV_NAME,
},
};
static int __init pmu_sbi_devinit(void)
{
int ret;
struct platform_device *pdev;
if (sbi_spec_version < sbi_mk_version(0, 3) ||
sbi_probe_extension(SBI_EXT_PMU) <= 0) {
return 0;
}
ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING,
"perf/riscv/pmu:starting",
pmu_sbi_starting_cpu, pmu_sbi_dying_cpu);
if (ret) {
pr_err("CPU hotplug notifier could not be registered: %d\n",
ret);
return ret;
}
ret = platform_driver_register(&pmu_sbi_driver);
if (ret)
return ret;
pdev = platform_device_register_simple(RISCV_PMU_PDEV_NAME, -1, NULL, 0);
if (IS_ERR(pdev)) {
platform_driver_unregister(&pmu_sbi_driver);
return PTR_ERR(pdev);
}
/* Notify legacy implementation that SBI pmu is available*/
riscv_pmu_legacy_skip_init();
return ret;
}
device_initcall(pmu_sbi_devinit)

View file

@ -0,0 +1,45 @@
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/*
* Daire McNamara,<daire.mcnamara@microchip.com>
* Copyright (C) 2020 Microchip Technology Inc. All rights reserved.
*/
#ifndef _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_
#define _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_
#define CLK_CPU 0
#define CLK_AXI 1
#define CLK_AHB 2
#define CLK_ENVM 3
#define CLK_MAC0 4
#define CLK_MAC1 5
#define CLK_MMC 6
#define CLK_TIMER 7
#define CLK_MMUART0 8
#define CLK_MMUART1 9
#define CLK_MMUART2 10
#define CLK_MMUART3 11
#define CLK_MMUART4 12
#define CLK_SPI0 13
#define CLK_SPI1 14
#define CLK_I2C0 15
#define CLK_I2C1 16
#define CLK_CAN0 17
#define CLK_CAN1 18
#define CLK_USB 19
#define CLK_RESERVED 20
#define CLK_RTC 21
#define CLK_QSPI 22
#define CLK_GPIO0 23
#define CLK_GPIO1 24
#define CLK_GPIO2 25
#define CLK_DDRC 26
#define CLK_FIC0 27
#define CLK_FIC1 28
#define CLK_FIC2 29
#define CLK_FIC3 30
#define CLK_ATHENA 31
#define CLK_CFM 32
#endif /* _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ */

View file

@ -166,6 +166,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
CPUHP_AP_PERF_ARM_ACPI_STARTING,
CPUHP_AP_PERF_ARM_STARTING,
CPUHP_AP_PERF_RISCV_STARTING,
CPUHP_AP_ARM_L2X0_STARTING,
CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
CPUHP_AP_ARM_ARCH_TIMER_STARTING,

View file

@ -0,0 +1,75 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2018 SiFive
* Copyright (C) 2018 Andes Technology Corporation
* Copyright (C) 2021 Western Digital Corporation or its affiliates.
*
*/
#ifndef _ASM_RISCV_PERF_EVENT_H
#define _ASM_RISCV_PERF_EVENT_H
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/interrupt.h>
#ifdef CONFIG_RISCV_PMU
/*
* The RISCV_MAX_COUNTERS parameter should be specified.
*/
#define RISCV_MAX_COUNTERS 64
#define RISCV_OP_UNSUPP (-EOPNOTSUPP)
#define RISCV_PMU_PDEV_NAME "riscv-pmu"
#define RISCV_PMU_LEGACY_PDEV_NAME "riscv-pmu-legacy"
#define RISCV_PMU_STOP_FLAG_RESET 1
struct cpu_hw_events {
/* currently enabled events */
int n_events;
/* Counter overflow interrupt */
int irq;
/* currently enabled events */
struct perf_event *events[RISCV_MAX_COUNTERS];
/* currently enabled hardware counters */
DECLARE_BITMAP(used_hw_ctrs, RISCV_MAX_COUNTERS);
/* currently enabled firmware counters */
DECLARE_BITMAP(used_fw_ctrs, RISCV_MAX_COUNTERS);
};
struct riscv_pmu {
struct pmu pmu;
char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
int num_counters;
u64 (*ctr_read)(struct perf_event *event);
int (*ctr_get_idx)(struct perf_event *event);
int (*ctr_get_width)(int idx);
void (*ctr_clear_idx)(struct perf_event *event);
void (*ctr_start)(struct perf_event *event, u64 init_val);
void (*ctr_stop)(struct perf_event *event, unsigned long flag);
int (*event_map)(struct perf_event *event, u64 *config);
struct cpu_hw_events __percpu *hw_events;
struct hlist_node node;
};
#define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu))
unsigned long riscv_pmu_ctr_read_csr(unsigned long csr);
int riscv_pmu_event_set_period(struct perf_event *event);
uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event);
u64 riscv_pmu_event_update(struct perf_event *event);
#ifdef CONFIG_RISCV_PMU_LEGACY
void riscv_pmu_legacy_skip_init(void);
#else
static inline void riscv_pmu_legacy_skip_init(void) {};
#endif
struct riscv_pmu *riscv_pmu_alloc(void);
#endif /* CONFIG_RISCV_PMU */
#endif /* _ASM_RISCV_PERF_EVENT_H */

View file

@ -207,6 +207,29 @@ unsigned int yield_mod_cnt, nr_abort;
"addiu " INJECT_ASM_REG ", -1\n\t" \
"bnez " INJECT_ASM_REG ", 222b\n\t" \
"333:\n\t"
#elif defined(__riscv)
#define RSEQ_INJECT_INPUT \
, [loop_cnt_1]"m"(loop_cnt[1]) \
, [loop_cnt_2]"m"(loop_cnt[2]) \
, [loop_cnt_3]"m"(loop_cnt[3]) \
, [loop_cnt_4]"m"(loop_cnt[4]) \
, [loop_cnt_5]"m"(loop_cnt[5]) \
, [loop_cnt_6]"m"(loop_cnt[6])
#define INJECT_ASM_REG "t1"
#define RSEQ_INJECT_CLOBBER \
, INJECT_ASM_REG
#define RSEQ_INJECT_ASM(n) \
"lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
"beqz " INJECT_ASM_REG ", 333f\n\t" \
"222:\n\t" \
"addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
"bnez " INJECT_ASM_REG ", 222b\n\t" \
"333:\n\t"
#else
#error unsupported target

View file

@ -0,0 +1,677 @@
/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
/*
* Select the instruction "csrw mhartid, x0" as the RSEQ_SIG. Unlike
* other architectures, the ebreak instruction has no immediate field for
* distinguishing purposes. Hence, ebreak is not suitable as RSEQ_SIG.
* "csrw mhartid, x0" can also satisfy the RSEQ requirement because it
* is an uncommon instruction and will raise an illegal instruction
* exception when executed in all modes.
*/
#include <endian.h>
#if defined(__BYTE_ORDER) ? (__BYTE_ORDER == __LITTLE_ENDIAN) : defined(__LITTLE_ENDIAN)
#define RSEQ_SIG 0xf1401073 /* csrr mhartid, x0 */
#else
#error "Currently, RSEQ only supports Little-Endian version"
#endif
#if __riscv_xlen == 64
#define __REG_SEL(a, b) a
#elif __riscv_xlen == 32
#define __REG_SEL(a, b) b
#endif
#define REG_L __REG_SEL("ld ", "lw ")
#define REG_S __REG_SEL("sd ", "sw ")
#define RISCV_FENCE(p, s) \
__asm__ __volatile__ ("fence " #p "," #s : : : "memory")
#define rseq_smp_mb() RISCV_FENCE(rw, rw)
#define rseq_smp_rmb() RISCV_FENCE(r, r)
#define rseq_smp_wmb() RISCV_FENCE(w, w)
#define RSEQ_ASM_TMP_REG_1 "t6"
#define RSEQ_ASM_TMP_REG_2 "t5"
#define RSEQ_ASM_TMP_REG_3 "t4"
#define RSEQ_ASM_TMP_REG_4 "t3"
#define rseq_smp_load_acquire(p) \
__extension__ ({ \
__typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \
RISCV_FENCE(r, rw) \
____p1; \
})
#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
#define rseq_smp_store_release(p, v) \
do { \
RISCV_FENCE(rw, w); \
RSEQ_WRITE_ONCE(*(p), v); \
} while (0)
#ifdef RSEQ_SKIP_FASTPATH
#include "rseq-skip.h"
#else /* !RSEQ_SKIP_FASTPATH */
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \
post_commit_offset, abort_ip) \
".pushsection __rseq_cs, \"aw\"\n" \
".balign 32\n" \
__rseq_str(label) ":\n" \
".long " __rseq_str(version) ", " __rseq_str(flags) "\n" \
".quad " __rseq_str(start_ip) ", " \
__rseq_str(post_commit_offset) ", " \
__rseq_str(abort_ip) "\n" \
".popsection\n\t" \
".pushsection __rseq_cs_ptr_array, \"aw\"\n" \
".quad " __rseq_str(label) "b\n" \
".popsection\n"
#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
((post_commit_ip) - (start_ip)), abort_ip)
/*
* Exit points of a rseq critical section consist of all instructions outside
* of the critical section where a critical section can either branch to or
* reach through the normal course of its execution. The abort IP and the
* post-commit IP are already part of the __rseq_cs section and should not be
* explicitly defined as additional exit points. Knowing all exit points is
* useful to assist debuggers stepping over the critical section.
*/
#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
".pushsection __rseq_exit_point_array, \"aw\"\n" \
".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n" \
".popsection\n"
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
"la "RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \
REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(rseq_cs) "]\n" \
__rseq_str(label) ":\n"
#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \
"j 222f\n" \
".balign 4\n" \
".long " __rseq_str(RSEQ_SIG) "\n" \
__rseq_str(label) ":\n" \
"j %l[" __rseq_str(abort_label) "]\n" \
"222:\n"
#define RSEQ_ASM_OP_STORE(value, var) \
REG_S "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"
#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
"bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
__rseq_str(label) "\n"
#define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \
"lw "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
"bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
__rseq_str(label) "\n"
#define RSEQ_ASM_OP_CMPNE(var, expect, label) \
REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
"beq "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
__rseq_str(label) "\n"
#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
RSEQ_INJECT_ASM(2) \
RSEQ_ASM_OP_CMPEQ32(current_cpu_id, cpu_id, label)
#define RSEQ_ASM_OP_R_LOAD(var) \
REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"
#define RSEQ_ASM_OP_R_STORE(var) \
REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"
#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \
"add "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \
RSEQ_ASM_TMP_REG_1 "\n" \
REG_L RSEQ_ASM_TMP_REG_1 ", (" RSEQ_ASM_TMP_REG_1 ")\n"
#define RSEQ_ASM_OP_R_ADD(count) \
"add "RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \
", %[" __rseq_str(count) "]\n"
#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \
RSEQ_ASM_OP_STORE(value, var) \
__rseq_str(post_commit_label) ":\n"
#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label) \
"fence rw, w\n" \
RSEQ_ASM_OP_STORE(value, var) \
__rseq_str(post_commit_label) ":\n"
#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \
REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
__rseq_str(post_commit_label) ":\n"
#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) \
"beqz %[" __rseq_str(len) "], 333f\n" \
"mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(len) "]\n" \
"mv " RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(src) "]\n" \
"mv " RSEQ_ASM_TMP_REG_3 ", %[" __rseq_str(dst) "]\n" \
"222:\n" \
"lb " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_2 ")\n" \
"sb " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_3 ")\n" \
"addi " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 ", -1\n" \
"addi " RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", 1\n" \
"addi " RSEQ_ASM_TMP_REG_3 ", " RSEQ_ASM_TMP_REG_3 ", 1\n" \
"bnez " RSEQ_ASM_TMP_REG_1 ", 222b\n" \
"333:\n"
#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, post_commit_label) \
"mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "]\n" \
RSEQ_ASM_OP_R_ADD(off) \
REG_L RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n" \
RSEQ_ASM_OP_R_ADD(inc) \
__rseq_str(post_commit_label) ":\n"
static inline __always_inline
int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
{
RSEQ_INJECT_C(9)
__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
#endif
RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
RSEQ_INJECT_ASM(5)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
[current_cpu_id] "m" (__rseq_abi.cpu_id),
[rseq_cs] "m" (__rseq_abi.rseq_cs),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
RSEQ_INJECT_INPUT
: "memory", RSEQ_ASM_TMP_REG_1
RSEQ_INJECT_CLOBBER
: abort, cmpfail
#ifdef RSEQ_COMPARE_TWICE
, error1, error2
#endif
);
return 0;
abort:
RSEQ_INJECT_FAILED
return -1;
cmpfail:
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
rseq_bug("cpu_id comparison failed");
error2:
rseq_bug("expected value comparison failed");
#endif
}
static inline __always_inline
int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
off_t voffp, intptr_t *load, int cpu)
{
RSEQ_INJECT_C(9)
__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[cmpfail]")
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[error2]")
#endif
RSEQ_ASM_OP_R_LOAD(v)
RSEQ_ASM_OP_R_STORE(load)
RSEQ_ASM_OP_R_LOAD_OFF(voffp)
RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
RSEQ_INJECT_ASM(5)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
[current_cpu_id] "m" (__rseq_abi.cpu_id),
[rseq_cs] "m" (__rseq_abi.rseq_cs),
[v] "m" (*v),
[expectnot] "r" (expectnot),
[load] "m" (*load),
[voffp] "r" (voffp)
RSEQ_INJECT_INPUT
: "memory", RSEQ_ASM_TMP_REG_1
RSEQ_INJECT_CLOBBER
: abort, cmpfail
#ifdef RSEQ_COMPARE_TWICE
, error1, error2
#endif
);
return 0;
abort:
RSEQ_INJECT_FAILED
return -1;
cmpfail:
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
rseq_bug("cpu_id comparison failed");
error2:
rseq_bug("expected value comparison failed");
#endif
}
static inline __always_inline
int rseq_addv(intptr_t *v, intptr_t count, int cpu)
{
RSEQ_INJECT_C(9)
__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
#endif
RSEQ_ASM_OP_R_LOAD(v)
RSEQ_ASM_OP_R_ADD(count)
RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
RSEQ_INJECT_ASM(4)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
[current_cpu_id] "m" (__rseq_abi.cpu_id),
[rseq_cs] "m" (__rseq_abi.rseq_cs),
[v] "m" (*v),
[count] "r" (count)
RSEQ_INJECT_INPUT
: "memory", RSEQ_ASM_TMP_REG_1
RSEQ_INJECT_CLOBBER
: abort
#ifdef RSEQ_COMPARE_TWICE
, error1
#endif
);
return 0;
abort:
RSEQ_INJECT_FAILED
return -1;
#ifdef RSEQ_COMPARE_TWICE
error1:
rseq_bug("cpu_id comparison failed");
#endif
}
static inline __always_inline
int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
intptr_t *v2, intptr_t newv2,
intptr_t newv, int cpu)
{
RSEQ_INJECT_C(9)
__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
#endif
RSEQ_ASM_OP_STORE(newv2, v2)
RSEQ_INJECT_ASM(5)
RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
RSEQ_INJECT_ASM(6)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
[current_cpu_id] "m" (__rseq_abi.cpu_id),
[rseq_cs] "m" (__rseq_abi.rseq_cs),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
[v2] "m" (*v2),
[newv2] "r" (newv2)
RSEQ_INJECT_INPUT
: "memory", RSEQ_ASM_TMP_REG_1
RSEQ_INJECT_CLOBBER
: abort, cmpfail
#ifdef RSEQ_COMPARE_TWICE
, error1, error2
#endif
);
return 0;
abort:
RSEQ_INJECT_FAILED
return -1;
cmpfail:
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
rseq_bug("cpu_id comparison failed");
error2:
rseq_bug("expected value comparison failed");
#endif
}
static inline __always_inline
int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
intptr_t *v2, intptr_t newv2,
intptr_t newv, int cpu)
{
RSEQ_INJECT_C(9)
__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
#endif
RSEQ_ASM_OP_STORE(newv2, v2)
RSEQ_INJECT_ASM(5)
RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
RSEQ_INJECT_ASM(6)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
[current_cpu_id] "m" (__rseq_abi.cpu_id),
[rseq_cs] "m" (__rseq_abi.rseq_cs),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
[v2] "m" (*v2),
[newv2] "r" (newv2)
RSEQ_INJECT_INPUT
: "memory", RSEQ_ASM_TMP_REG_1
RSEQ_INJECT_CLOBBER
: abort, cmpfail
#ifdef RSEQ_COMPARE_TWICE
, error1, error2
#endif
);
return 0;
abort:
RSEQ_INJECT_FAILED
return -1;
cmpfail:
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
rseq_bug("cpu_id comparison failed");
error2:
rseq_bug("expected value comparison failed");
#endif
}
static inline __always_inline
int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
intptr_t *v2, intptr_t expect2,
intptr_t newv, int cpu)
{
RSEQ_INJECT_C(9)
__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error3]")
#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
RSEQ_INJECT_ASM(4)
RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[cmpfail]")
RSEQ_INJECT_ASM(5)
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[error3]")
#endif
RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
RSEQ_INJECT_ASM(6)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
[current_cpu_id] "m" (__rseq_abi.cpu_id),
[rseq_cs] "m" (__rseq_abi.rseq_cs),
[v] "m" (*v),
[expect] "r" (expect),
[v2] "m" (*v2),
[expect2] "r" (expect2),
[newv] "r" (newv)
RSEQ_INJECT_INPUT
: "memory", RSEQ_ASM_TMP_REG_1
RSEQ_INJECT_CLOBBER
: abort, cmpfail
#ifdef RSEQ_COMPARE_TWICE
, error1, error2, error3
#endif
);
return 0;
abort:
RSEQ_INJECT_FAILED
return -1;
cmpfail:
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
rseq_bug("cpu_id comparison failed");
error2:
rseq_bug("expected value comparison failed");
error3:
rseq_bug("2nd expected value comparison failed");
#endif
}
static inline __always_inline
int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
void *dst, void *src, size_t len,
intptr_t newv, int cpu)
{
RSEQ_INJECT_C(9)
__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
#endif
RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
RSEQ_INJECT_ASM(5)
RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
RSEQ_INJECT_ASM(6)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
[current_cpu_id] "m" (__rseq_abi.cpu_id),
[rseq_cs] "m" (__rseq_abi.rseq_cs),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
[dst] "r" (dst),
[src] "r" (src),
[len] "r" (len)
RSEQ_INJECT_INPUT
: "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2,
RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4
RSEQ_INJECT_CLOBBER
: abort, cmpfail
#ifdef RSEQ_COMPARE_TWICE
, error1, error2
#endif
);
return 0;
abort:
RSEQ_INJECT_FAILED
return -1;
cmpfail:
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
rseq_bug("cpu_id comparison failed");
error2:
rseq_bug("expected value comparison failed");
#endif
}
static inline __always_inline
int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
void *dst, void *src, size_t len,
intptr_t newv, int cpu)
{
RSEQ_INJECT_C(9)
__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
#endif
RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
RSEQ_INJECT_ASM(5)
RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
RSEQ_INJECT_ASM(6)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
[current_cpu_id] "m" (__rseq_abi.cpu_id),
[rseq_cs] "m" (__rseq_abi.rseq_cs),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
[dst] "r" (dst),
[src] "r" (src),
[len] "r" (len)
RSEQ_INJECT_INPUT
: "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2,
RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4
RSEQ_INJECT_CLOBBER
: abort, cmpfail
#ifdef RSEQ_COMPARE_TWICE
, error1, error2
#endif
);
return 0;
abort:
RSEQ_INJECT_FAILED
return -1;
cmpfail:
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
rseq_bug("cpu_id comparison failed");
error2:
rseq_bug("expected value comparison failed");
#endif
}
#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
/*
* pval = *(ptr+off)
* *pval += inc;
*/
static inline __always_inline
int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
{
RSEQ_INJECT_C(9)
__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
#endif
RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3)
RSEQ_INJECT_ASM(4)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
[current_cpu_id] "m" (__rseq_abi.cpu_id),
[rseq_cs] "m" (__rseq_abi.rseq_cs),
[ptr] "r" (ptr),
[off] "er" (off),
[inc] "er" (inc)
RSEQ_INJECT_INPUT
: "memory", RSEQ_ASM_TMP_REG_1
RSEQ_INJECT_CLOBBER
: abort
#ifdef RSEQ_COMPARE_TWICE
, error1
#endif
);
return 0;
abort:
RSEQ_INJECT_FAILED
return -1;
#ifdef RSEQ_COMPARE_TWICE
error1:
rseq_bug("cpu_id comparison failed");
#endif
}
#endif /* !RSEQ_SKIP_FASTPATH */

View file

@ -93,6 +93,8 @@ static inline struct rseq_abi *rseq_get_abi(void)
#include <rseq-mips.h>
#elif defined(__s390__)
#include <rseq-s390.h>
#elif defined(__riscv)
#include <rseq-riscv.h>
#else
#error unsupported target
#endif