arm64 updates for 6.9:

* Reorganise the arm64 kernel VA space and add support for LPA2 (at
   stage 1, KVM stage 2 was merged earlier) - 52-bit VA/PA address range
   with 4KB and 16KB pages
 
 * Enable Rust on arm64
 
 * Support for the 2023 dpISA extensions (data processing ISA), host only
 
 * arm64 perf updates:
 
   - StarFive's StarLink (integrates one or more CPU cores with a shared
     L3 memory system) PMU support
 
   - Enable HiSilicon Erratum 162700402 quirk for HIP09
 
   - Several updates for the HiSilicon PCIe PMU driver
 
   - Arm CoreSight PMU support
 
   - Convert all drivers under drivers/perf/ to use .remove_new()
 
 * Miscellaneous:
 
   - Don't enable workarounds for "rare" errata by default
 
   - Clean up the DAIF flags handling for EL0 returns (in preparation for
     NMI support)
 
   - Kselftest update for ptrace()
 
   - Update some of the sysreg field definitions
 
   - Slight improvement in the code generation for inline asm I/O
     accessors to permit offset addressing
 
   - kretprobes: acquire regs via a BRK exception (previously done via a
     trampoline handler)
 
   - SVE/SME cleanups, comment updates
 
   - Allow CALL_OPS+CC_OPTIMIZE_FOR_SIZE with clang (previously disabled
     due to gcc silently ignoring -falign-functions=N)
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE5RElWfyWxS+3PLO2a9axLQDIXvEFAmXxiSgACgkQa9axLQDI
 XvHd7hAAjQrQqxJogPT2ahM5/gxct8qTrXpIgX0B1Y7bb5R8ztvOUN9MJNuDyRsj
 0s28SSZw387LReM5OUu+U6G/iahcuNAyP/8d9qeac32Tidd255fV3KPEh4C4eC+u
 0HeOqLBZ+stmNoa71tBC2K6SmchizhYyYduvRnri8km8K4OMDawHWqWRTXl0PNRT
 RMVJvZTDJMPfMBFeD4+B7EnSFOoP14tKCw9MZvlbpT2PEV0kINjhCQiojW2jJgqv
 w36vm/dhwsg1avSzT1xhy3KE+m+7n28+IC/wr1HB7c1WumvYKv7Z84ieCp3PlO3Z
 owvVO7dKJC6X3RkoY6Kge5p2RHU6poDerDVHYiAvG+Zi57nrDmHyAubskThsGTGR
 AibSEeJ5nQ0yM6hx7zAIQa5XEo4l0svD1ZM7NynY+5JR44W9cdAH3SnEsvIBMGIf
 /ja+iZ1W4ZQnIESQXD5uDPSxILfqQ8Ebhdorpw+Qg3rB7OhdTdGSSGQCi6V2PcJH
 d/ErFO+i0lFRBPJtBbUAN4EEu3HJcVYEoEnVJYQahC+6KyNGLxO+7L6sH0YO7Pag
 P1LRa6h8ktuBMrbCrOPWdmJYNDYCbb5rRtmcCwO0ItZ4g5tYWp9djFc8pyctCaNB
 MZxxRrUCNwXTOcFTDiYzyk+JCvpf3EvXfvj8AH+P8BMjFWgqHqw=
 =KTD/
 -----END PGP SIGNATURE-----

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Catalin Marinas:
 "The major features are support for LPA2 (52-bit VA/PA with 4K and 16K
  pages), the dpISA extension and Rust enabled on arm64. The changes are
  mostly contained within the usual arch/arm64/, drivers/perf, the arm64
  Documentation and kselftests. The exception is the Rust support which
  touches some generic build files.

  Summary:

   - Reorganise the arm64 kernel VA space and add support for LPA2 (at
     stage 1, KVM stage 2 was merged earlier) - 52-bit VA/PA address
     range with 4KB and 16KB pages

   - Enable Rust on arm64

   - Support for the 2023 dpISA extensions (data processing ISA), host
     only

   - arm64 perf updates:

      - StarFive's StarLink (integrates one or more CPU cores with a
        shared L3 memory system) PMU support

      - Enable HiSilicon Erratum 162700402 quirk for HIP09

      - Several updates for the HiSilicon PCIe PMU driver

      - Arm CoreSight PMU support

      - Convert all drivers under drivers/perf/ to use .remove_new()

   - Miscellaneous:

      - Don't enable workarounds for "rare" errata by default

      - Clean up the DAIF flags handling for EL0 returns (in preparation
        for NMI support)

      - Kselftest update for ptrace()

      - Update some of the sysreg field definitions

      - Slight improvement in the code generation for inline asm I/O
        accessors to permit offset addressing

      - kretprobes: acquire regs via a BRK exception (previously done
        via a trampoline handler)

      - SVE/SME cleanups, comment updates

      - Allow CALL_OPS+CC_OPTIMIZE_FOR_SIZE with clang (previously
        disabled due to gcc silently ignoring -falign-functions=N)"

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (134 commits)
  Revert "mm: add arch hook to validate mmap() prot flags"
  Revert "arm64: mm: add support for WXN memory translation attribute"
  Revert "ARM64: Dynamically allocate cpumasks and increase supported CPUs to 512"
  ARM64: Dynamically allocate cpumasks and increase supported CPUs to 512
  kselftest/arm64: Add 2023 DPISA hwcap test coverage
  kselftest/arm64: Add basic FPMR test
  kselftest/arm64: Handle FPMR context in generic signal frame parser
  arm64/hwcap: Define hwcaps for 2023 DPISA features
  arm64/ptrace: Expose FPMR via ptrace
  arm64/signal: Add FPMR signal handling
  arm64/fpsimd: Support FEAT_FPMR
  arm64/fpsimd: Enable host kernel access to FPMR
  arm64/cpufeature: Hook new identification registers up to cpufeature
  docs: perf: Fix build warning of hisi-pcie-pmu.rst
  perf: starfive: Only allow COMPILE_TEST for 64-bit architectures
  MAINTAINERS: Add entry for StarFive StarLink PMU
  docs: perf: Add description for StarFive's StarLink PMU
  dt-bindings: perf: starfive: Add JH8100 StarLink PMU
  perf: starfive: Add StarLink PMU support
  docs: perf: Update usage for target filter of hisi-pcie-pmu
  ...
This commit is contained in:
Linus Torvalds 2024-03-14 15:35:42 -07:00
commit 6d75c6f40a
137 changed files with 5513 additions and 1583 deletions

View File

@ -37,9 +37,21 @@ Example usage of perf::
hisi_pcie0_core0/rx_mwr_cnt/ [kernel PMU event]
------------------------------------------
$# perf stat -e hisi_pcie0_core0/rx_mwr_latency/
$# perf stat -e hisi_pcie0_core0/rx_mwr_cnt/
$# perf stat -g -e hisi_pcie0_core0/rx_mwr_latency/ -e hisi_pcie0_core0/rx_mwr_cnt/
$# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0xffff/
$# perf stat -e hisi_pcie0_core0/rx_mwr_cnt,port=0xffff/
The related events usually used to calculate the bandwidth, latency or others.
They need to start and end counting at the same time, therefore related events
are best used in the same event group to get the expected value. There are two
ways to know if they are related events:
a) By event name, such as the latency events "xxx_latency, xxx_cnt" or
bandwidth events "xxx_flux, xxx_time".
b) By event type, such as "event=0xXXXX, event=0x1XXXX".
Example usage of perf group::
$# perf stat -e "{hisi_pcie0_core0/rx_mwr_latency,port=0xffff/,hisi_pcie0_core0/rx_mwr_cnt,port=0xffff/}"
The current driver does not support sampling. So "perf record" is unsupported.
Also attach to a task is unsupported for PCIe PMU.
@ -51,8 +63,12 @@ Filter options
PMU could only monitor the performance of traffic downstream target Root
Ports or downstream target Endpoint. PCIe PMU driver support "port" and
"bdf" interfaces for users, and these two interfaces aren't supported at the
same time.
"bdf" interfaces for users.
Please notice that, one of these two interfaces must be set, and these two
interfaces aren't supported at the same time. If they are both set, only
"port" filter is valid.
If "port" filter not being set or is set explicitly to zero (default), the
"bdf" filter will be in effect, because "bdf=0" meaning 0000:000:00.0.
- port
@ -95,7 +111,7 @@ Filter options
Example usage of perf::
$# perf stat -e hisi_pcie0_core0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5
$# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,trig_len=0x4,trig_mode=1/ sleep 5
3. Threshold filter
@ -109,7 +125,7 @@ Filter options
Example usage of perf::
$# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
$# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,thr_len=0x4,thr_mode=1/ sleep 5
4. TLP Length filter
@ -127,4 +143,4 @@ Filter options
Example usage of perf::
$# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5
$# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,len_mode=0x1/ sleep 5

View File

@ -13,6 +13,7 @@ Performance monitor support
imx-ddr
qcom_l2_pmu
qcom_l3_pmu
starfive_starlink_pmu
arm-ccn
arm-cmn
xgene-pmu

View File

@ -0,0 +1,46 @@
================================================
StarFive StarLink Performance Monitor Unit (PMU)
================================================
StarFive StarLink Performance Monitor Unit (PMU) exists within the
StarLink Coherent Network on Chip (CNoC) that connects multiple CPU
clusters with an L3 memory system.
The uncore PMU supports overflow interrupt, up to 16 programmable 64bit
event counters, and an independent 64bit cycle counter.
The PMU can only be accessed via Memory Mapped I/O and are common to the
cores connected to the same PMU.
Driver exposes supported PMU events in sysfs "events" directory under::
/sys/bus/event_source/devices/starfive_starlink_pmu/events/
Driver exposes cpu used to handle PMU events in sysfs "cpumask" directory
under::
/sys/bus/event_source/devices/starfive_starlink_pmu/cpumask/
Driver describes the format of config (event ID) in sysfs "format" directory
under::
/sys/bus/event_source/devices/starfive_starlink_pmu/format/
Example of perf usage::
$ perf list
starfive_starlink_pmu/cycles/ [Kernel PMU event]
starfive_starlink_pmu/read_hit/ [Kernel PMU event]
starfive_starlink_pmu/read_miss/ [Kernel PMU event]
starfive_starlink_pmu/read_request/ [Kernel PMU event]
starfive_starlink_pmu/release_request/ [Kernel PMU event]
starfive_starlink_pmu/write_hit/ [Kernel PMU event]
starfive_starlink_pmu/write_miss/ [Kernel PMU event]
starfive_starlink_pmu/write_request/ [Kernel PMU event]
starfive_starlink_pmu/writeback/ [Kernel PMU event]
$ perf stat -a -e /starfive_starlink_pmu/cycles/ sleep 1
Sampling is not supported. As a result, "perf record" is not supported.
Attaching to a task is not supported, only system-wide counting is supported.

View File

@ -317,6 +317,55 @@ HWCAP2_LRCPC3
HWCAP2_LSE128
Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0011.
HWCAP2_FPMR
Functionality implied by ID_AA64PFR2_EL1.FMR == 0b0001.
HWCAP2_LUT
Functionality implied by ID_AA64ISAR2_EL1.LUT == 0b0001.
HWCAP2_FAMINMAX
Functionality implied by ID_AA64ISAR3_EL1.FAMINMAX == 0b0001.
HWCAP2_F8CVT
Functionality implied by ID_AA64FPFR0_EL1.F8CVT == 0b1.
HWCAP2_F8FMA
Functionality implied by ID_AA64FPFR0_EL1.F8FMA == 0b1.
HWCAP2_F8DP4
Functionality implied by ID_AA64FPFR0_EL1.F8DP4 == 0b1.
HWCAP2_F8DP2
Functionality implied by ID_AA64FPFR0_EL1.F8DP2 == 0b1.
HWCAP2_F8E4M3
Functionality implied by ID_AA64FPFR0_EL1.F8E4M3 == 0b1.
HWCAP2_F8E5M2
Functionality implied by ID_AA64FPFR0_EL1.F8E5M2 == 0b1.
HWCAP2_SME_LUTV2
Functionality implied by ID_AA64SMFR0_EL1.LUTv2 == 0b1.
HWCAP2_SME_F8F16
Functionality implied by ID_AA64SMFR0_EL1.F8F16 == 0b1.
HWCAP2_SME_F8F32
Functionality implied by ID_AA64SMFR0_EL1.F8F32 == 0b1.
HWCAP2_SME_SF8FMA
Functionality implied by ID_AA64SMFR0_EL1.SF8FMA == 0b1.
HWCAP2_SME_SF8DP4
Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1.
HWCAP2_SME_SF8DP2
Functionality implied by ID_AA64SMFR0_EL1.SF8DP2 == 0b1.
HWCAP2_SME_SF8DP4
Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1.
4. Unused AT_HWCAP bits
-----------------------

View File

@ -35,8 +35,9 @@ can be triggered by Linux).
For software workarounds that may adversely impact systems unaffected by
the erratum in question, a Kconfig entry is added under "Kernel
Features" -> "ARM errata workarounds via the alternatives framework".
These are enabled by default and patched in at runtime when an affected
CPU is detected. For less-intrusive workarounds, a Kconfig option is not
With the exception of workarounds for errata deemed "rare" by Arm, these
are enabled by default and patched in at runtime when an affected CPU is
detected. For less-intrusive workarounds, a Kconfig option is not
available and the code is structured (preferably with a comment) in such
a way that the erratum will not be hit.

View File

@ -75,7 +75,7 @@ model features for SME is included in Appendix A.
2. Vector lengths
------------------
SME defines a second vector length similar to the SVE vector length which is
SME defines a second vector length similar to the SVE vector length which
controls the size of the streaming mode SVE vectors and the ZA matrix array.
The ZA matrix is square with each side having as many bytes as a streaming
mode SVE vector.
@ -238,12 +238,12 @@ prctl(PR_SME_SET_VL, unsigned long arg)
bits of Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
unspecified, including both streaming and non-streaming SVE state.
Calling PR_SME_SET_VL with vl equal to the thread's current vector
length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag,
does not constitute a change to the vector length for this purpose.
* Changing the vector length causes PSTATE.ZA and PSTATE.SM to be cleared.
Calling PR_SME_SET_VL with vl equal to the thread's current vector
length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag,
does not constitute a change to the vector length for this purpose.
@ -379,9 +379,8 @@ The regset data starts with struct user_za_header, containing:
/proc/sys/abi/sme_default_vector_length
Writing the text representation of an integer to this file sets the system
default vector length to the specified value, unless the value is greater
than the maximum vector length supported by the system in which case the
default vector length is set to that maximum.
default vector length to the specified value rounded to a supported value
using the same rules as for setting vector length via PR_SME_SET_VL.
The result can be determined by reopening the file and reading its
contents.

View File

@ -117,11 +117,6 @@ the SVE instruction set architecture.
* The SVE registers are not used to pass arguments to or receive results from
any syscall.
* In practice the affected registers/bits will be preserved or will be replaced
with zeros on return from a syscall, but userspace should not make
assumptions about this. The kernel behaviour may vary on a case-by-case
basis.
* All other SVE state of a thread, including the currently configured vector
length, the state of the PR_SVE_VL_INHERIT flag, and the deferred vector
length (if any), is preserved across all syscalls, subject to the specific
@ -428,9 +423,8 @@ The regset data starts with struct user_sve_header, containing:
/proc/sys/abi/sve_default_vector_length
Writing the text representation of an integer to this file sets the system
default vector length to the specified value, unless the value is greater
than the maximum vector length supported by the system in which case the
default vector length is set to that maximum.
default vector length to the specified value rounded to a supported value
using the same rules as for setting vector length via PR_SVE_SET_VL.
The result can be determined by reopening the file and reading its
contents.

View File

@ -0,0 +1,39 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/perf/arm,coresight-pmu.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Arm Coresight Performance Monitoring Unit Architecture
maintainers:
- Robin Murphy <robin.murphy@arm.com>
properties:
compatible:
const: arm,coresight-pmu
reg:
items:
- description: Register page 0
- description: Register page 1, if the PMU implements the dual-page extension
minItems: 1
interrupts:
items:
- description: Overflow interrupt
cpus:
description: If the PMU is associated with a particular CPU or subset of CPUs,
array of phandles to the appropriate CPU node(s)
reg-io-width:
description: Granularity at which PMU register accesses are single-copy atomic
default: 4
enum: [4, 8]
required:
- compatible
- reg
additionalProperties: false

View File

@ -0,0 +1,46 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/perf/starfive,jh8100-starlink-pmu.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: StarFive JH8100 StarLink PMU
maintainers:
- Ji Sheng Teoh <jisheng.teoh@starfivetech.com>
description:
StarFive's JH8100 StarLink PMU integrates one or more CPU cores with a
shared L3 memory system. The PMU support overflow interrupt, up to
16 programmable 64bit event counters, and an independent 64bit cycle
counter. StarFive's JH8100 StarLink PMU is accessed via MMIO.
properties:
compatible:
const: starfive,jh8100-starlink-pmu
reg:
maxItems: 1
interrupts:
maxItems: 1
required:
- compatible
- reg
- interrupts
additionalProperties: false
examples:
- |
soc {
#address-cells = <2>;
#size-cells = <2>;
pmu@12900000 {
compatible = "starfive,jh8100-starlink-pmu";
reg = <0x0 0x12900000 0x0 0x10000>;
interrupts = <34>;
};
};

View File

@ -15,6 +15,7 @@ support corresponds to ``S`` values in the ``MAINTAINERS`` file.
============= ================ ==============================================
Architecture Level of support Constraints
============= ================ ==============================================
``arm64`` Maintained Little Endian only.
``loongarch`` Maintained -
``um`` Maintained ``x86_64`` only.
``x86`` Maintained ``x86_64`` only.

View File

@ -20974,6 +20974,13 @@ S: Maintained
T: git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/
F: Documentation/devicetree/bindings/soc/starfive/
STARFIVE STARLINK PMU DRIVER
M: Ji Sheng Teoh <jisheng.teoh@starfivetech.com>
S: Maintained
F: Documentation/admin-guide/perf/starfive_starlink_pmu.rst
F: Documentation/devicetree/bindings/perf/starfive,jh8100-starlink-pmu.yaml
F: drivers/perf/starfive_starlink_pmu.c
STARFIVE TRNG DRIVER
M: Jia Jie Ho <jiajie.ho@starfivetech.com>
S: Supported

View File

@ -561,7 +561,6 @@ KBUILD_CFLAGS += -fno-strict-aliasing
KBUILD_CPPFLAGS := -D__KERNEL__
KBUILD_RUSTFLAGS := $(rust_common_flags) \
--target=$(objtree)/scripts/target.json \
-Cpanic=abort -Cembed-bitcode=n -Clto=n \
-Cforce-unwind-tables=n -Ccodegen-units=1 \
-Csymbol-mangling-version=v0 \

View File

@ -164,7 +164,7 @@ config ARM64
select HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
select HAVE_ARCH_KASAN
select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN
select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE)
@ -198,7 +198,7 @@ config ARM64
if DYNAMIC_FTRACE_WITH_ARGS && DYNAMIC_FTRACE_WITH_CALL_OPS
select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS \
if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG && \
!CC_OPTIMIZE_FOR_SIZE)
(CC_IS_CLANG || !CC_OPTIMIZE_FOR_SIZE))
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
if DYNAMIC_FTRACE_WITH_ARGS
select HAVE_SAMPLE_FTRACE_DIRECT
@ -229,6 +229,7 @@ config ARM64
select HAVE_FUNCTION_ARG_ACCESS_API
select MMU_GATHER_RCU_TABLE_FREE
select HAVE_RSEQ
select HAVE_RUST if CPU_LITTLE_ENDIAN
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KPROBES
@ -362,7 +363,9 @@ config PGTABLE_LEVELS
default 3 if ARM64_64K_PAGES && (ARM64_VA_BITS_48 || ARM64_VA_BITS_52)
default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39
default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47
default 4 if ARM64_16K_PAGES && (ARM64_VA_BITS_48 || ARM64_VA_BITS_52)
default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48
default 5 if ARM64_4K_PAGES && ARM64_VA_BITS_52
config ARCH_SUPPORTS_UPROBES
def_bool y
@ -390,13 +393,13 @@ config BUILTIN_RETURN_ADDRESS_STRIPS_PAC
config KASAN_SHADOW_OFFSET
hex
depends on KASAN_GENERIC || KASAN_SW_TAGS
default 0xdfff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
default 0xdfffc00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
default 0xdfff800000000000 if (ARM64_VA_BITS_48 || (ARM64_VA_BITS_52 && !ARM64_16K_PAGES)) && !KASAN_SW_TAGS
default 0xdfffc00000000000 if (ARM64_VA_BITS_47 || ARM64_VA_BITS_52) && ARM64_16K_PAGES && !KASAN_SW_TAGS
default 0xdffffe0000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
default 0xdfffffc000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS
default 0xdffffff800000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS
default 0xefff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS
default 0xefffc00000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS
default 0xefff800000000000 if (ARM64_VA_BITS_48 || (ARM64_VA_BITS_52 && !ARM64_16K_PAGES)) && KASAN_SW_TAGS
default 0xefffc00000000000 if (ARM64_VA_BITS_47 || ARM64_VA_BITS_52) && ARM64_16K_PAGES && KASAN_SW_TAGS
default 0xeffffe0000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS
default 0xefffffc000000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS
default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
@ -541,9 +544,8 @@ config ARM64_ERRATUM_832075
If unsure, say Y.
config ARM64_ERRATUM_834220
bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault"
bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault (rare)"
depends on KVM
default y
help
This option adds an alternative code sequence to work around ARM
erratum 834220 on Cortex-A57 parts up to r1p2.
@ -559,7 +561,7 @@ config ARM64_ERRATUM_834220
as it depends on the alternative framework, which will only patch
the kernel if an affected CPU is detected.
If unsure, say Y.
If unsure, say N.
config ARM64_ERRATUM_1742098
bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence"
@ -686,8 +688,7 @@ config ARM64_WORKAROUND_REPEAT_TLBI
bool
config ARM64_ERRATUM_2441007
bool "Cortex-A55: Completion of affected memory accesses might not be guaranteed by completion of a TLBI"
default y
bool "Cortex-A55: Completion of affected memory accesses might not be guaranteed by completion of a TLBI (rare)"
select ARM64_WORKAROUND_REPEAT_TLBI
help
This option adds a workaround for ARM Cortex-A55 erratum #2441007.
@ -700,11 +701,10 @@ config ARM64_ERRATUM_2441007
Work around this by adding the affected CPUs to the list that needs
TLB sequences to be done twice.
If unsure, say Y.
If unsure, say N.
config ARM64_ERRATUM_1286807
bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
default y
bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation (rare)"
select ARM64_WORKAROUND_REPEAT_TLBI
help
This option adds a workaround for ARM Cortex-A76 erratum 1286807.
@ -718,6 +718,8 @@ config ARM64_ERRATUM_1286807
invalidated has been observed by other observers. The
workaround repeats the TLBI+DSB operation.
If unsure, say N.
config ARM64_ERRATUM_1463225
bool "Cortex-A76: Software Step might prevent interrupt recognition"
default y
@ -737,8 +739,7 @@ config ARM64_ERRATUM_1463225
If unsure, say Y.
config ARM64_ERRATUM_1542419
bool "Neoverse-N1: workaround mis-ordering of instruction fetches"
default y
bool "Neoverse-N1: workaround mis-ordering of instruction fetches (rare)"
help
This option adds a workaround for ARM Neoverse-N1 erratum
1542419.
@ -750,7 +751,7 @@ config ARM64_ERRATUM_1542419
Workaround the issue by hiding the DIC feature from EL0. This
forces user-space to perform cache maintenance.
If unsure, say Y.
If unsure, say N.
config ARM64_ERRATUM_1508412
bool "Cortex-A77: 1508412: workaround deadlock on sequence of NC/Device load and store exclusive or PAR read"
@ -925,8 +926,7 @@ config ARM64_ERRATUM_2224489
If unsure, say Y.
config ARM64_ERRATUM_2441009
bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI"
default y
bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI (rare)"
select ARM64_WORKAROUND_REPEAT_TLBI
help
This option adds a workaround for ARM Cortex-A510 erratum #2441009.
@ -939,7 +939,7 @@ config ARM64_ERRATUM_2441009
Work around this by adding the affected CPUs to the list that needs
TLB sequences to be done twice.
If unsure, say Y.
If unsure, say N.
config ARM64_ERRATUM_2064142
bool "Cortex-A510: 2064142: workaround TRBE register writes while disabled"
@ -1278,9 +1278,7 @@ endchoice
choice
prompt "Virtual address space size"
default ARM64_VA_BITS_39 if ARM64_4K_PAGES
default ARM64_VA_BITS_47 if ARM64_16K_PAGES
default ARM64_VA_BITS_42 if ARM64_64K_PAGES
default ARM64_VA_BITS_52
help
Allows choosing one of multiple possible virtual address
space sizes. The level of translation table is determined by
@ -1307,7 +1305,7 @@ config ARM64_VA_BITS_48
config ARM64_VA_BITS_52
bool "52-bit"
depends on ARM64_64K_PAGES && (ARM64_PAN || !ARM64_SW_TTBR0_PAN)
depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN
help
Enable 52-bit virtual addressing for userspace when explicitly
requested via a hint to mmap(). The kernel will also use 52-bit
@ -1354,10 +1352,11 @@ choice
config ARM64_PA_BITS_48
bool "48-bit"
depends on ARM64_64K_PAGES || !ARM64_VA_BITS_52
config ARM64_PA_BITS_52
bool "52-bit (ARMv8.2)"
depends on ARM64_64K_PAGES
bool "52-bit"
depends on ARM64_64K_PAGES || ARM64_VA_BITS_52
depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN
help
Enable support for a 52-bit physical address space, introduced as
@ -1374,6 +1373,10 @@ config ARM64_PA_BITS
default 48 if ARM64_PA_BITS_48
default 52 if ARM64_PA_BITS_52
config ARM64_LPA2
def_bool y
depends on ARM64_PA_BITS_52 && !ARM64_64K_PAGES
choice
prompt "Endianness"
default CPU_LITTLE_ENDIAN

View File

@ -41,6 +41,8 @@ KBUILD_CFLAGS += -mgeneral-regs-only \
KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
KBUILD_AFLAGS += $(compat_vdso)
KBUILD_RUSTFLAGS += --target=aarch64-unknown-none -Ctarget-feature="-neon"
KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
KBUILD_AFLAGS += $(call cc-option,-mabi=lp64)
@ -65,7 +67,9 @@ endif
ifeq ($(CONFIG_ARM64_BTI_KERNEL),y)
KBUILD_CFLAGS += -mbranch-protection=pac-ret+bti
KBUILD_RUSTFLAGS += -Zbranch-protection=bti,pac-ret
else ifeq ($(CONFIG_ARM64_PTR_AUTH_KERNEL),y)
KBUILD_RUSTFLAGS += -Zbranch-protection=pac-ret
ifeq ($(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET),y)
KBUILD_CFLAGS += -mbranch-protection=pac-ret
else

View File

@ -76,7 +76,6 @@ CONFIG_ARCH_VEXPRESS=y
CONFIG_ARCH_VISCONTI=y
CONFIG_ARCH_XGENE=y
CONFIG_ARCH_ZYNQMP=y
CONFIG_ARM64_VA_BITS_48=y
CONFIG_SCHED_MC=y
CONFIG_SCHED_SMT=y
CONFIG_NUMA=y

View File

@ -129,6 +129,4 @@ static inline bool __init __early_cpu_has_rndr(void)
return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf;
}
u64 kaslr_early_init(void *fdt);
#endif /* _ASM_ARCHRANDOM_H */

View File

@ -38,10 +38,6 @@
msr daifset, #0xf
.endm
.macro enable_daif
msr daifclr, #0xf
.endm
/*
* Save/restore interrupts.
*/
@ -345,20 +341,6 @@ alternative_cb_end
bfi \valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH
.endm
/*
* idmap_get_t0sz - get the T0SZ value needed to cover the ID map
*
* Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
* entire ID map region can be mapped. As T0SZ == (64 - #bits used),
* this number conveniently equals the number of leading zeroes in
* the physical address of _end.
*/
.macro idmap_get_t0sz, reg
adrp \reg, _end
orr \reg, \reg, #(1 << VA_BITS_MIN) - 1
clz \reg, \reg
.endm
/*
* tcr_compute_pa_size - set TCR.(I)PS to the highest supported
* ID_AA64MMFR0_EL1.PARange value
@ -590,18 +572,27 @@ alternative_endif
.endm
/*
* Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD.
* If the kernel is built for 52-bit virtual addressing but the hardware only
* supports 48 bits, we cannot program the pgdir address into TTBR1 directly,
* but we have to add an offset so that the TTBR1 address corresponds with the
* pgdir entry that covers the lowest 48-bit addressable VA.
*
* Note that this trick is only used for LVA/64k pages - LPA2/4k pages uses an
* additional paging level, and on LPA2/16k pages, we would end up with a root
* level table with only 2 entries, which is suboptimal in terms of TLB
* utilization, so there we fall back to 47 bits of translation if LPA2 is not
* supported.
*
* orr is used as it can cover the immediate value (and is idempotent).
* In future this may be nop'ed out when dealing with 52-bit kernel VAs.
* ttbr: Value of ttbr to set, modified.
*/
.macro offset_ttbr1, ttbr, tmp
#ifdef CONFIG_ARM64_VA_BITS_52
mrs_s \tmp, SYS_ID_AA64MMFR2_EL1
and \tmp, \tmp, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT)
cbnz \tmp, .Lskipoffs_\@
orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
.Lskipoffs_\@ :
#if defined(CONFIG_ARM64_VA_BITS_52) && !defined(CONFIG_ARM64_LPA2)
mrs \tmp, tcr_el1
and \tmp, \tmp, #TCR_T1SZ_MASK
cmp \tmp, #TCR_T1SZ(VA_BITS_MIN)
orr \tmp, \ttbr, #TTBR1_BADDR_4852_OFFSET
csel \ttbr, \tmp, \ttbr, eq
#endif
.endm
@ -623,25 +614,13 @@ alternative_endif
.macro phys_to_pte, pte, phys
#ifdef CONFIG_ARM64_PA_BITS_52
/*
* We assume \phys is 64K aligned and this is guaranteed by only
* supporting this configuration with 64K pages.
*/
orr \pte, \phys, \phys, lsr #36
and \pte, \pte, #PTE_ADDR_MASK
orr \pte, \phys, \phys, lsr #PTE_ADDR_HIGH_SHIFT
and \pte, \pte, #PHYS_TO_PTE_ADDR_MASK
#else
mov \pte, \phys
#endif
.endm
.macro pte_to_phys, phys, pte
and \phys, \pte, #PTE_ADDR_MASK
#ifdef CONFIG_ARM64_PA_BITS_52
orr \phys, \phys, \phys, lsl #PTE_ADDR_HIGH_SHIFT
and \phys, \phys, GENMASK_ULL(PHYS_MASK_SHIFT - 1, PAGE_SHIFT)
#endif
.endm
/*
* tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU.
*/

View File

@ -11,6 +11,7 @@
* 0x004: for installing kprobes
* 0x005: for installing uprobes
* 0x006: for kprobe software single-step
* 0x007: for kretprobe return
* Allowed values for kgdb are 0x400 - 0x7ff
* 0x100: for triggering a fault on purpose (reserved)
* 0x400: for dynamic BRK instruction
@ -23,6 +24,7 @@
#define KPROBES_BRK_IMM 0x004
#define UPROBES_BRK_IMM 0x005
#define KPROBES_BRK_SS_IMM 0x006
#define KRETPROBES_BRK_IMM 0x007
#define FAULT_BRK_IMM 0x100
#define KGDB_DYN_DBG_BRK_IMM 0x400
#define KGDB_COMPILED_DBG_BRK_IMM 0x401

View File

@ -52,14 +52,17 @@ struct cpuinfo_arm64 {
u64 reg_id_aa64isar0;
u64 reg_id_aa64isar1;
u64 reg_id_aa64isar2;
u64 reg_id_aa64isar3;
u64 reg_id_aa64mmfr0;
u64 reg_id_aa64mmfr1;
u64 reg_id_aa64mmfr2;
u64 reg_id_aa64mmfr3;
u64 reg_id_aa64pfr0;
u64 reg_id_aa64pfr1;
u64 reg_id_aa64pfr2;
u64 reg_id_aa64zfr0;
u64 reg_id_aa64smfr0;
u64 reg_id_aa64fpfr0;
struct cpuinfo_32bit aarch32;
};

View File

@ -17,6 +17,7 @@
#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0
#define ARM64_SW_FEATURE_OVERRIDE_HVHE 4
#define ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF 8
#ifndef __ASSEMBLY__
@ -768,6 +769,11 @@ static __always_inline bool system_supports_tpidr2(void)
return system_supports_sme();
}
static __always_inline bool system_supports_fpmr(void)
{
return alternative_has_cap_unlikely(ARM64_HAS_FPMR);
}
static __always_inline bool system_supports_cnp(void)
{
return alternative_has_cap_unlikely(ARM64_HAS_CNP);
@ -905,7 +911,9 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur);
struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id);
extern struct arm64_ftr_override id_aa64mmfr0_override;
extern struct arm64_ftr_override id_aa64mmfr1_override;
extern struct arm64_ftr_override id_aa64mmfr2_override;
extern struct arm64_ftr_override id_aa64pfr0_override;
extern struct arm64_ftr_override id_aa64pfr1_override;
extern struct arm64_ftr_override id_aa64zfr0_override;
@ -915,9 +923,114 @@ extern struct arm64_ftr_override id_aa64isar2_override;
extern struct arm64_ftr_override arm64_sw_feature_override;
static inline
u64 arm64_apply_feature_override(u64 val, int feat, int width,
const struct arm64_ftr_override *override)
{
u64 oval = override->val;
/*
* When it encounters an invalid override (e.g., an override that
* cannot be honoured due to a missing CPU feature), the early idreg
* override code will set the mask to 0x0 and the value to non-zero for
* the field in question. In order to determine whether the override is
* valid or not for the field we are interested in, we first need to
* disregard bits belonging to other fields.
*/
oval &= GENMASK_ULL(feat + width - 1, feat);
/*
* The override is valid if all value bits are accounted for in the
* mask. If so, replace the masked bits with the override value.
*/
if (oval == (oval & override->mask)) {
val &= ~override->mask;
val |= oval;
}
/* Extract the field from the updated value */
return cpuid_feature_extract_unsigned_field(val, feat);
}
static inline bool arm64_test_sw_feature_override(int feat)
{
/*
* Software features are pseudo CPU features that have no underlying
* CPUID system register value to apply the override to.
*/
return arm64_apply_feature_override(0, feat, 4,
&arm64_sw_feature_override);
}
static inline bool kaslr_disabled_cmdline(void)
{
return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_NOKASLR);
}
u32 get_kvm_ipa_limit(void);
void dump_cpu_features(void);
static inline bool cpu_has_bti(void)
{
if (!IS_ENABLED(CONFIG_ARM64_BTI))
return false;
return arm64_apply_feature_override(read_cpuid(ID_AA64PFR1_EL1),
ID_AA64PFR1_EL1_BT_SHIFT, 4,
&id_aa64pfr1_override);
}
static inline bool cpu_has_pac(void)
{
u64 isar1, isar2;
if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH))
return false;
isar1 = read_cpuid(ID_AA64ISAR1_EL1);
isar2 = read_cpuid(ID_AA64ISAR2_EL1);
if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_APA_SHIFT, 4,
&id_aa64isar1_override))
return true;
if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_API_SHIFT, 4,
&id_aa64isar1_override))
return true;
return arm64_apply_feature_override(isar2, ID_AA64ISAR2_EL1_APA3_SHIFT, 4,
&id_aa64isar2_override);
}
static inline bool cpu_has_lva(void)
{
u64 mmfr2;
mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
mmfr2 &= ~id_aa64mmfr2_override.mask;
mmfr2 |= id_aa64mmfr2_override.val;
return cpuid_feature_extract_unsigned_field(mmfr2,
ID_AA64MMFR2_EL1_VARange_SHIFT);
}
static inline bool cpu_has_lpa2(void)
{
#ifdef CONFIG_ARM64_LPA2
u64 mmfr0;
int feat;
mmfr0 = read_sysreg(id_aa64mmfr0_el1);
mmfr0 &= ~id_aa64mmfr0_override.mask;
mmfr0 |= id_aa64mmfr0_override.val;
feat = cpuid_feature_extract_signed_field(mmfr0,
ID_AA64MMFR0_EL1_TGRAN_SHIFT);
return feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2;
#else
return false;
#endif
}
#endif /* __ASSEMBLY__ */
#endif

View File

@ -201,16 +201,16 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
#define COMPAT_ELF_PLATFORM ("v8l")
#endif
#ifdef CONFIG_COMPAT
/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */
#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL
/* AArch32 registers. */
#define COMPAT_ELF_NGREG 18
typedef unsigned int compat_elf_greg_t;
typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
#ifdef CONFIG_COMPAT
/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */
#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL
/* AArch32 EABI. */
#define EF_ARM_EABI_MASK 0xff000000
int compat_elf_check_arch(const struct elf32_hdr *);

View File

@ -117,15 +117,9 @@
#define ESR_ELx_FSC_ACCESS (0x08)
#define ESR_ELx_FSC_FAULT (0x04)
#define ESR_ELx_FSC_PERM (0x0C)
#define ESR_ELx_FSC_SEA_TTW0 (0x14)
#define ESR_ELx_FSC_SEA_TTW1 (0x15)
#define ESR_ELx_FSC_SEA_TTW2 (0x16)
#define ESR_ELx_FSC_SEA_TTW3 (0x17)
#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n))
#define ESR_ELx_FSC_SECC (0x18)
#define ESR_ELx_FSC_SECC_TTW0 (0x1c)
#define ESR_ELx_FSC_SECC_TTW1 (0x1d)
#define ESR_ELx_FSC_SECC_TTW2 (0x1e)
#define ESR_ELx_FSC_SECC_TTW3 (0x1f)
#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n))
/* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV_SHIFT (24)
@ -394,6 +388,9 @@ static inline bool esr_is_data_abort(unsigned long esr)
static inline bool esr_fsc_is_translation_fault(unsigned long esr)
{
/* Translation fault, level -1 */
if ((esr & ESR_ELx_FSC) == 0b101011)
return true;
return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
}

View File

@ -74,7 +74,7 @@ void do_el0_fpac(struct pt_regs *regs, unsigned long esr);
void do_el1_fpac(struct pt_regs *regs, unsigned long esr);
void do_el0_mops(struct pt_regs *regs, unsigned long esr);
void do_serror(struct pt_regs *regs, unsigned long esr);
void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags);
void do_signal(struct pt_regs *regs);
void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far);
#endif /* __ASM_EXCEPTION_H */

View File

@ -87,6 +87,7 @@ enum fixed_addresses {
FIX_PTE,
FIX_PMD,
FIX_PUD,
FIX_P4D,
FIX_PGD,
__end_of_fixed_addresses
@ -100,7 +101,6 @@ enum fixed_addresses {
#define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE)
void __init early_fixmap_init(void);
void __init fixmap_copy(pgd_t *pgdir);
#define __early_set_fixmap __set_fixmap

View File

@ -21,7 +21,6 @@
#include <linux/stddef.h>
#include <linux/types.h>
#ifdef CONFIG_COMPAT
/* Masks for extracting the FPSR and FPCR from the FPSCR */
#define VFP_FPSCR_STAT_MASK 0xf800009f
#define VFP_FPSCR_CTRL_MASK 0x07f79f00
@ -30,7 +29,6 @@
* control/status register.
*/
#define VFP_STATE_SIZE ((32 * 8) + 4)
#endif
static inline unsigned long cpacr_save_enable_kernel_sve(void)
{
@ -89,6 +87,7 @@ struct cpu_fp_state {
void *sve_state;
void *sme_state;
u64 *svcr;
u64 *fpmr;
unsigned int sve_vl;
unsigned int sme_vl;
enum fp_type *fp_type;
@ -154,6 +153,7 @@ extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__unused);
extern u64 read_smcr_features(void);

View File

@ -59,7 +59,6 @@ static inline void decode_ctrl_reg(u32 reg,
/* Watchpoints */
#define ARM_BREAKPOINT_LOAD 1
#define ARM_BREAKPOINT_STORE 2
#define AARCH64_ESR_ACCESS_MASK (1 << 6)
/* Lengths */
#define ARM_BREAKPOINT_LEN_1 0x1

View File

@ -142,6 +142,21 @@
#define KERNEL_HWCAP_SVE_B16B16 __khwcap2_feature(SVE_B16B16)
#define KERNEL_HWCAP_LRCPC3 __khwcap2_feature(LRCPC3)
#define KERNEL_HWCAP_LSE128 __khwcap2_feature(LSE128)
#define KERNEL_HWCAP_FPMR __khwcap2_feature(FPMR)
#define KERNEL_HWCAP_LUT __khwcap2_feature(LUT)
#define KERNEL_HWCAP_FAMINMAX __khwcap2_feature(FAMINMAX)
#define KERNEL_HWCAP_F8CVT __khwcap2_feature(F8CVT)
#define KERNEL_HWCAP_F8FMA __khwcap2_feature(F8FMA)
#define KERNEL_HWCAP_F8DP4 __khwcap2_feature(F8DP4)
#define KERNEL_HWCAP_F8DP2 __khwcap2_feature(F8DP2)
#define KERNEL_HWCAP_F8E4M3 __khwcap2_feature(F8E4M3)
#define KERNEL_HWCAP_F8E5M2 __khwcap2_feature(F8E5M2)
#define KERNEL_HWCAP_SME_LUTV2 __khwcap2_feature(SME_LUTV2)
#define KERNEL_HWCAP_SME_F8F16 __khwcap2_feature(SME_F8F16)
#define KERNEL_HWCAP_SME_F8F32 __khwcap2_feature(SME_F8F32)
#define KERNEL_HWCAP_SME_SF8FMA __khwcap2_feature(SME_SF8FMA)
#define KERNEL_HWCAP_SME_SF8DP4 __khwcap2_feature(SME_SF8DP4)
#define KERNEL_HWCAP_SME_SF8DP2 __khwcap2_feature(SME_SF8DP2)
/*
* This yields a mask that user programs can use to figure out what

View File

@ -24,25 +24,29 @@
#define __raw_writeb __raw_writeb
static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr)
{
asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
volatile u8 __iomem *ptr = addr;
asm volatile("strb %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writew __raw_writew
static __always_inline void __raw_writew(u16 val, volatile void __iomem *addr)
{
asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
volatile u16 __iomem *ptr = addr;
asm volatile("strh %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writel __raw_writel
static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
{
asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
volatile u32 __iomem *ptr = addr;
asm volatile("str %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writeq __raw_writeq
static __always_inline void __raw_writeq(u64 val, volatile void __iomem *addr)
{
asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
volatile u64 __iomem *ptr = addr;
asm volatile("str %x0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_readb __raw_readb

View File

@ -17,11 +17,9 @@
asmlinkage void kasan_early_init(void);
void kasan_init(void);
void kasan_copy_shadow(pgd_t *pgdir);
#else
static inline void kasan_init(void) { }
static inline void kasan_copy_shadow(pgd_t *pgdir) { }
#endif
#endif

View File

@ -13,28 +13,27 @@
#include <asm/sparsemem.h>
/*
* The linear mapping and the start of memory are both 2M aligned (per
* the arm64 booting.txt requirements). Hence we can use section mapping
* with 4K (section size = 2M) but not with 16K (section size = 32M) or
* 64K (section size = 512M).
* The physical and virtual addresses of the start of the kernel image are
* equal modulo 2 MiB (per the arm64 booting.txt requirements). Hence we can
* use section mapping with 4K (section size = 2M) but not with 16K (section
* size = 32M) or 64K (section size = 512M).
*/
/*
* The idmap and swapper page tables need some space reserved in the kernel
* image. Both require pgd, pud (4 levels only) and pmd tables to (section)
* map the kernel. With the 64K page configuration, swapper and idmap need to
* map to pte level. The swapper also maps the FDT (see __create_page_tables
* for more information). Note that the number of ID map translation levels
* could be increased on the fly if system RAM is out of reach for the default
* VA range, so pages required to map highest possible PA are reserved in all
* cases.
*/
#ifdef CONFIG_ARM64_4K_PAGES
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
#if defined(PMD_SIZE) && PMD_SIZE <= MIN_KIMG_ALIGN
#define SWAPPER_BLOCK_SHIFT PMD_SHIFT
#define SWAPPER_SKIP_LEVEL 1
#else
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
#define SWAPPER_SKIP_LEVEL 0
#endif
#define SWAPPER_BLOCK_SIZE (UL(1) << SWAPPER_BLOCK_SHIFT)
#define SWAPPER_TABLE_SHIFT (SWAPPER_BLOCK_SHIFT + PAGE_SHIFT - 3)
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - SWAPPER_SKIP_LEVEL)
#define INIT_IDMAP_PGTABLE_LEVELS (IDMAP_LEVELS - SWAPPER_SKIP_LEVEL)
#define IDMAP_VA_BITS 48
#define IDMAP_LEVELS ARM64_HW_PGTABLE_LEVELS(IDMAP_VA_BITS)
#define IDMAP_ROOT_LEVEL (4 - IDMAP_LEVELS)
/*
* A relocatable kernel may execute from an address that differs from the one at
@ -50,57 +49,39 @@
#define EARLY_ENTRIES(vstart, vend, shift, add) \
(SPAN_NR_ENTRIES(vstart, vend, shift) + (add))
#define EARLY_PGDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT, add))
#define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \
(lvls > lvl ? EARLY_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * (PAGE_SHIFT - 3), add) : 0)
#if SWAPPER_PGTABLE_LEVELS > 3
#define EARLY_PUDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT, add))
#else
#define EARLY_PUDS(vstart, vend, add) (0)
#endif
#define EARLY_PAGES(lvls, vstart, vend, add) (1 /* PGDIR page */ \
+ EARLY_LEVEL(3, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
+ EARLY_LEVEL(2, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
+ EARLY_LEVEL(1, (lvls), (vstart), (vend), add))/* each entry needs a next level page table */
#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(SWAPPER_PGTABLE_LEVELS, KIMAGE_VADDR, _end, EXTRA_PAGE) \
+ EARLY_SEGMENT_EXTRA_PAGES))
#if SWAPPER_PGTABLE_LEVELS > 2
#define EARLY_PMDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT, add))
#else
#define EARLY_PMDS(vstart, vend, add) (0)
#endif
#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, _end, 1))
#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + EARLY_IDMAP_EXTRA_PAGES) * PAGE_SIZE)
#define EARLY_PAGES(vstart, vend, add) ( 1 /* PGDIR page */ \
+ EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \
+ EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \
+ EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */
#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EXTRA_PAGE))
#define INIT_IDMAP_FDT_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, 0UL, UL(MAX_FDT_SIZE), 1) - 1)
#define INIT_IDMAP_FDT_SIZE ((INIT_IDMAP_FDT_PAGES + EARLY_IDMAP_EXTRA_FDT_PAGES) * PAGE_SIZE)
/* the initial ID map may need two extra pages if it needs to be extended */
#if VA_BITS < 48
#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE)
#else
#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE)
#endif
#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE, 1)
/* Initial memory map size */
#ifdef CONFIG_ARM64_4K_PAGES
#define SWAPPER_BLOCK_SHIFT PMD_SHIFT
#define SWAPPER_BLOCK_SIZE PMD_SIZE
#define SWAPPER_TABLE_SHIFT PUD_SHIFT
#else
#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
#define SWAPPER_BLOCK_SIZE PAGE_SIZE
#define SWAPPER_TABLE_SHIFT PMD_SHIFT
#endif
/* The number of segments in the kernel image (text, rodata, inittext, initdata, data+bss) */
#define KERNEL_SEGMENT_COUNT 5
#if SWAPPER_BLOCK_SIZE > SEGMENT_ALIGN
#define EARLY_SEGMENT_EXTRA_PAGES (KERNEL_SEGMENT_COUNT + 1)
/*
* Initial memory map attributes.
* The initial ID map consists of the kernel image, mapped as two separate
* segments, and may appear misaligned wrt the swapper block size. This means
* we need 3 additional pages. The DT could straddle a swapper block boundary,
* so it may need 2.
*/
#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN)
#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PTE_UXN)
#ifdef CONFIG_ARM64_4K_PAGES
#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS | PTE_WRITE)
#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY)
#define EARLY_IDMAP_EXTRA_PAGES 3
#define EARLY_IDMAP_EXTRA_FDT_PAGES 2
#else
#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE)
#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY)
#define EARLY_SEGMENT_EXTRA_PAGES 0
#define EARLY_IDMAP_EXTRA_PAGES 0
#define EARLY_IDMAP_EXTRA_FDT_PAGES 0
#endif
#endif /* __ASM_KERNEL_PGTABLE_H */

View File

@ -105,7 +105,7 @@
#define HCRX_GUEST_FLAGS \
(HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \
(cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0))
#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En)
#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
/* TCR_EL2 Registers bits */
#define TCR_EL2_DS (1UL << 32)

View File

@ -425,15 +425,9 @@ static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
{
switch (kvm_vcpu_trap_get_fault(vcpu)) {
case ESR_ELx_FSC_EXTABT:
case ESR_ELx_FSC_SEA_TTW0:
case ESR_ELx_FSC_SEA_TTW1:
case ESR_ELx_FSC_SEA_TTW2:
case ESR_ELx_FSC_SEA_TTW3:
case ESR_ELx_FSC_SEA_TTW(-1) ... ESR_ELx_FSC_SEA_TTW(3):
case ESR_ELx_FSC_SECC:
case ESR_ELx_FSC_SECC_TTW0:
case ESR_ELx_FSC_SECC_TTW1:
case ESR_ELx_FSC_SECC_TTW2:
case ESR_ELx_FSC_SECC_TTW3:
case ESR_ELx_FSC_SECC_TTW(-1) ... ESR_ELx_FSC_SECC_TTW(3):
return true;
default:
return false;

View File

@ -543,6 +543,7 @@ struct kvm_vcpu_arch {
enum fp_type fp_type;
unsigned int sve_max_vl;
u64 svcr;
u64 fpmr;
/* Stage 2 paging state used by the hardware on next switch */
struct kvm_s2_mmu *hw_mmu;

View File

@ -30,8 +30,8 @@
* keep a constant PAGE_OFFSET and "fallback" to using the higher end
* of the VMEMMAP where 52-bit support is not available in hardware.
*/
#define VMEMMAP_SHIFT (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT)
#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT)
#define VMEMMAP_RANGE (_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET)
#define VMEMMAP_SIZE ((VMEMMAP_RANGE >> PAGE_SHIFT) * sizeof(struct page))
/*
* PAGE_OFFSET - the virtual address of the start of the linear map, at the
@ -47,14 +47,18 @@
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
#define MODULES_VSIZE (SZ_2G)
#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
#define PCI_IO_END (VMEMMAP_START - SZ_8M)
#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
#define FIXADDR_TOP (VMEMMAP_START - SZ_32M)
#define VMEMMAP_START (VMEMMAP_END - VMEMMAP_SIZE)
#define VMEMMAP_END (-UL(SZ_1G))
#define PCI_IO_START (VMEMMAP_END + SZ_8M)
#define PCI_IO_END (PCI_IO_START + PCI_IO_SIZE)
#define FIXADDR_TOP (-UL(SZ_8M))
#if VA_BITS > 48
#ifdef CONFIG_ARM64_16K_PAGES
#define VA_BITS_MIN (47)
#else
#define VA_BITS_MIN (48)
#endif
#else
#define VA_BITS_MIN (VA_BITS)
#endif
@ -209,9 +213,20 @@
#include <asm/boot.h>
#include <asm/bug.h>
#include <asm/sections.h>
#include <asm/sysreg.h>
static inline u64 __pure read_tcr(void)
{
u64 tcr;
// read_sysreg() uses asm volatile, so avoid it here
asm("mrs %0, tcr_el1" : "=r"(tcr));
return tcr;
}
#if VA_BITS > 48
extern u64 vabits_actual;
// For reasons of #include hell, we can't use TCR_T1SZ_OFFSET/TCR_T1SZ_MASK here
#define vabits_actual (64 - ((read_tcr() >> 16) & 63))
#else
#define vabits_actual ((u64)VA_BITS)
#endif

View File

@ -71,10 +71,46 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
pgprot_t prot, bool page_mappings_only);
extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
extern void mark_linear_text_alias_ro(void);
extern bool kaslr_requires_kpti(void);
/*
* This check is triggered during the early boot before the cpufeature
* is initialised. Checking the status on the local CPU allows the boot
* CPU to detect the need for non-global mappings and thus avoiding a
* pagetable re-write after all the CPUs are booted. This check will be
* anyway run on individual CPUs, allowing us to get the consistent
* state once the SMP CPUs are up and thus make the switch to non-global
* mappings if required.
*/
static inline bool kaslr_requires_kpti(void)
{
/*
* E0PD does a similar job to KPTI so can be used instead
* where available.
*/
if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
if (cpuid_feature_extract_unsigned_field(mmfr2,
ID_AA64MMFR2_EL1_E0PD_SHIFT))
return false;
}
/*
* Systems affected by Cavium erratum 24756 are incompatible
* with KPTI.
*/
if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
extern const struct midr_range cavium_erratum_27456_cpus[];
if (is_midr_in_range_list(read_cpuid_id(),
cavium_erratum_27456_cpus))
return false;
}
return true;
}
#define INIT_MM_CONTEXT(name) \
.pgd = init_pg_dir,
.pgd = swapper_pg_dir,
#endif /* !__ASSEMBLY__ */
#endif

View File

@ -61,11 +61,9 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
}
/*
* TCR.T0SZ value to use when the ID map is active. Usually equals
* TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
* physical memory, in which case it will be smaller.
* TCR.T0SZ value to use when the ID map is active.
*/
extern int idmap_t0sz;
#define idmap_t0sz TCR_T0SZ(IDMAP_VA_BITS)
/*
* Ensure TCR.T0SZ is set to the provided value.
@ -110,18 +108,13 @@ static inline void cpu_uninstall_idmap(void)
cpu_switch_mm(mm->pgd, mm);
}
static inline void __cpu_install_idmap(pgd_t *idmap)
static inline void cpu_install_idmap(void)
{
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
cpu_set_idmap_tcr_t0sz();
cpu_switch_mm(lm_alias(idmap), &init_mm);
}
static inline void cpu_install_idmap(void)
{
__cpu_install_idmap(idmap_pg_dir);
cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
}
/*
@ -148,51 +141,21 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
isb();
}
/*
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
* avoiding the possibility of conflicting TLB entries being allocated.
*/
static inline void __cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap, bool cnp)
{
typedef void (ttbr_replace_func)(phys_addr_t);
extern ttbr_replace_func idmap_cpu_replace_ttbr1;
ttbr_replace_func *replace_phys;
unsigned long daif;
/* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));
if (cnp)
ttbr1 |= TTBR_CNP_BIT;
replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
__cpu_install_idmap(idmap);
/*
* We really don't want to take *any* exceptions while TTBR1 is
* in the process of being replaced so mask everything.
*/
daif = local_daif_save();
replace_phys(ttbr1);
local_daif_restore(daif);
cpu_uninstall_idmap();
}
void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp);
static inline void cpu_enable_swapper_cnp(void)
{
__cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir, true);
__cpu_replace_ttbr1(lm_alias(swapper_pg_dir), true);
}
static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
static inline void cpu_replace_ttbr1(pgd_t *pgdp)
{
/*
* Only for early TTBR1 replacement before cpucaps are finalized and
* before we've decided whether to use CNP.
*/
WARN_ON(system_capabilities_finalized());
__cpu_replace_ttbr1(pgdp, idmap, false);
__cpu_replace_ttbr1(pgdp, false);
}
/*

View File

@ -14,6 +14,7 @@
#include <asm/tlbflush.h>
#define __HAVE_ARCH_PGD_FREE
#define __HAVE_ARCH_PUD_FREE
#include <asm-generic/pgalloc.h>
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
@ -43,7 +44,8 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
{
set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
if (pgtable_l4_enabled())
set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
}
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
@ -53,6 +55,13 @@ static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN;
__p4d_populate(p4dp, __pa(pudp), p4dval);
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
if (!pgtable_l4_enabled())
return;
__pud_free(mm, pud);
}
#else
static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
{
@ -60,6 +69,47 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
}
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
#if CONFIG_PGTABLE_LEVELS > 4
static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot)
{
if (pgtable_l5_enabled())
set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot));
}
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp)
{
pgdval_t pgdval = PGD_TYPE_TABLE;
pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN;
__pgd_populate(pgdp, __pa(p4dp), pgdval);
}
static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
{
gfp_t gfp = GFP_PGTABLE_USER;
if (mm == &init_mm)
gfp = GFP_PGTABLE_KERNEL;
return (p4d_t *)get_zeroed_page(gfp);
}
static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
{
if (!pgtable_l5_enabled())
return;
BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
free_page((unsigned long)p4d);
}
#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
#else
static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot)
{
BUILD_BUG();
}
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);

View File

@ -26,10 +26,10 @@
#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
/*
* Size mapped by an entry at level n ( 0 <= n <= 3)
* Size mapped by an entry at level n ( -1 <= n <= 3)
* We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
* in the final page. The maximum number of translation levels supported by
* the architecture is 4. Hence, starting at level n, we have further
* the architecture is 5. Hence, starting at level n, we have further
* ((4 - n) - 1) levels of translation excluding the offset within the page.
* So, the total number of bits mapped by an entry at level n is :
*
@ -62,9 +62,16 @@
#define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3))
#endif
#if CONFIG_PGTABLE_LEVELS > 4
#define P4D_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(0)
#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE-1))
#define PTRS_PER_P4D (1 << (PAGE_SHIFT - 3))
#endif
/*
* PGDIR_SHIFT determines the size a top-level page table entry can map
* (depending on the configuration, this level can be 0, 1 or 2).
* (depending on the configuration, this level can be -1, 0, 1 or 2).
*/
#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
@ -87,6 +94,15 @@
/*
* Hardware page table definitions.
*
* Level -1 descriptor (PGD).
*/
#define PGD_TYPE_TABLE (_AT(pgdval_t, 3) << 0)
#define PGD_TABLE_BIT (_AT(pgdval_t, 1) << 1)
#define PGD_TYPE_MASK (_AT(pgdval_t, 3) << 0)
#define PGD_TABLE_PXN (_AT(pgdval_t, 1) << 59)
#define PGD_TABLE_UXN (_AT(pgdval_t, 1) << 60)
/*
* Level 0 descriptor (P4D).
*/
#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0)
@ -155,13 +171,17 @@
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
#ifdef CONFIG_ARM64_PA_BITS_52
#ifdef CONFIG_ARM64_64K_PAGES
#define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12)
#define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH)
#define PTE_ADDR_HIGH_SHIFT 36
#define PHYS_TO_PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH)
#else
#define PTE_ADDR_MASK PTE_ADDR_LOW
#define PTE_ADDR_HIGH (_AT(pteval_t, 0x3) << 8)
#define PTE_ADDR_HIGH_SHIFT 42
#define PHYS_TO_PTE_ADDR_MASK GENMASK_ULL(49, 8)
#endif
#endif
/*
@ -284,6 +304,7 @@
#define TCR_E0PD1 (UL(1) << 56)
#define TCR_TCMA0 (UL(1) << 57)
#define TCR_TCMA1 (UL(1) << 58)
#define TCR_DS (UL(1) << 59)
/*
* TTBR.

View File

@ -30,8 +30,8 @@
#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG)
#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_MAYBE_NG | PTE_MAYBE_SHARED | PTE_AF)
#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_MAYBE_NG | PMD_MAYBE_SHARED | PMD_SECT_AF)
#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
@ -57,10 +57,6 @@
#define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
#define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
#ifdef __ASSEMBLY__
#define PTE_MAYBE_NG 0
#endif
#ifndef __ASSEMBLY__
#include <asm/cpufeature.h>
@ -71,7 +67,19 @@ extern bool arm64_use_ng_mappings;
#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
#ifndef CONFIG_ARM64_LPA2
#define lpa2_is_enabled() false
#define PTE_MAYBE_SHARED PTE_SHARED
#define PMD_MAYBE_SHARED PMD_SECT_S
#else
static inline bool __pure lpa2_is_enabled(void)
{
return read_tcr() & TCR_DS;
}
#define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED)
#define PMD_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PMD_SECT_S)
#endif
/*
* If we have userspace only BTI we don't want to mark kernel pages

View File

@ -36,6 +36,12 @@ typedef struct { pudval_t pud; } pud_t;
#define __pud(x) ((pud_t) { (x) } )
#endif
#if CONFIG_PGTABLE_LEVELS > 4
typedef struct { p4dval_t p4d; } p4d_t;
#define p4d_val(x) ((x).p4d)
#define __p4d(x) ((p4d_t) { (x) } )
#endif
typedef struct { pgdval_t pgd; } pgd_t;
#define pgd_val(x) ((x).pgd)
#define __pgd(x) ((pgd_t) { (x) } )

View File

@ -18,11 +18,15 @@
* VMALLOC range.
*
* VMALLOC_START: beginning of the kernel vmalloc space
* VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
* and fixed mappings
* VMALLOC_END: extends to the available space below vmemmap
*/
#define VMALLOC_START (MODULES_END)
#define VMALLOC_END (VMEMMAP_START - SZ_256M)
#if VA_BITS == VA_BITS_MIN
#define VMALLOC_END (VMEMMAP_START - SZ_8M)
#else
#define VMEMMAP_UNUSED_NPAGES ((_PAGE_OFFSET(vabits_actual) - PAGE_OFFSET) >> PAGE_SHIFT)
#define VMALLOC_END (VMEMMAP_START + VMEMMAP_UNUSED_NPAGES * sizeof(struct page) - SZ_8M)
#endif
#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
@ -76,15 +80,16 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#ifdef CONFIG_ARM64_PA_BITS_52
static inline phys_addr_t __pte_to_phys(pte_t pte)
{
pte_val(pte) &= ~PTE_MAYBE_SHARED;
return (pte_val(pte) & PTE_ADDR_LOW) |
((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT);
}
static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
{
return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PTE_ADDR_MASK;
return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK;
}
#else
#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK)
#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_LOW)
#define __phys_to_pte_val(phys) (phys)
#endif
@ -616,12 +621,12 @@ static inline bool pud_table(pud_t pud) { return true; }
PUD_TYPE_TABLE)
#endif
extern pgd_t init_pg_dir[PTRS_PER_PGD];
extern pgd_t init_pg_dir[];
extern pgd_t init_pg_end[];
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
extern pgd_t reserved_pg_dir[PTRS_PER_PGD];
extern pgd_t swapper_pg_dir[];
extern pgd_t idmap_pg_dir[];
extern pgd_t tramp_pg_dir[];
extern pgd_t reserved_pg_dir[];
extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
@ -694,14 +699,14 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#define pud_user(pud) pte_user(pud_pte(pud))
#define pud_user_exec(pud) pte_user_exec(pud_pte(pud))
static inline bool pgtable_l4_enabled(void);
static inline void set_pud(pud_t *pudp, pud_t pud)
{
#ifdef __PAGETABLE_PUD_FOLDED
if (in_swapper_pgdir(pudp)) {
if (!pgtable_l4_enabled() && in_swapper_pgdir(pudp)) {
set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud)));
return;
}
#endif /* __PAGETABLE_PUD_FOLDED */
WRITE_ONCE(*pudp, pud);
@ -754,12 +759,27 @@ static inline pmd_t *pud_pgtable(pud_t pud)
#if CONFIG_PGTABLE_LEVELS > 3
static __always_inline bool pgtable_l4_enabled(void)
{
if (CONFIG_PGTABLE_LEVELS > 4 || !IS_ENABLED(CONFIG_ARM64_LPA2))
return true;
if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT))
return vabits_actual == VA_BITS;
return alternative_has_cap_unlikely(ARM64_HAS_VA52);
}
static inline bool mm_pud_folded(const struct mm_struct *mm)
{
return !pgtable_l4_enabled();
}
#define mm_pud_folded mm_pud_folded
#define pud_ERROR(e) \
pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e))
#define p4d_none(p4d) (!p4d_val(p4d))
#define p4d_bad(p4d) (!(p4d_val(p4d) & 2))
#define p4d_present(p4d) (p4d_val(p4d))
#define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d))
#define p4d_bad(p4d) (pgtable_l4_enabled() && !(p4d_val(p4d) & 2))
#define p4d_present(p4d) (!p4d_none(p4d))
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
{
@ -775,7 +795,8 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
static inline void p4d_clear(p4d_t *p4dp)
{
set_p4d(p4dp, __p4d(0));
if (pgtable_l4_enabled())
set_p4d(p4dp, __p4d(0));
}
static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
@ -783,27 +804,75 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
return __p4d_to_phys(p4d);
}
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
static inline pud_t *p4d_to_folded_pud(p4d_t *p4dp, unsigned long addr)
{
return (pud_t *)PTR_ALIGN_DOWN(p4dp, PAGE_SIZE) + pud_index(addr);
}
static inline pud_t *p4d_pgtable(p4d_t p4d)
{
return (pud_t *)__va(p4d_page_paddr(p4d));
}
/* Find an entry in the first-level page table. */
#define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr)
{
BUG_ON(!pgtable_l4_enabled());
#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
#define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr))
#define pud_clear_fixmap() clear_fixmap(FIX_PUD)
return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t);
}
static inline
pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long addr)
{
if (!pgtable_l4_enabled())
return p4d_to_folded_pud(p4dp, addr);
return (pud_t *)__va(p4d_page_paddr(p4d)) + pud_index(addr);
}
#define pud_offset_lockless pud_offset_lockless
static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long addr)
{
return pud_offset_lockless(p4dp, READ_ONCE(*p4dp), addr);
}
#define pud_offset pud_offset
static inline pud_t *pud_set_fixmap(unsigned long addr)
{
if (!pgtable_l4_enabled())
return NULL;
return (pud_t *)set_fixmap_offset(FIX_PUD, addr);
}
static inline pud_t *pud_set_fixmap_offset(p4d_t *p4dp, unsigned long addr)
{
if (!pgtable_l4_enabled())
return p4d_to_folded_pud(p4dp, addr);
return pud_set_fixmap(pud_offset_phys(p4dp, addr));
}
static inline void pud_clear_fixmap(void)
{
if (pgtable_l4_enabled())
clear_fixmap(FIX_PUD);
}
/* use ONLY for statically allocated translation tables */
static inline pud_t *pud_offset_kimg(p4d_t *p4dp, u64 addr)
{
if (!pgtable_l4_enabled())
return p4d_to_folded_pud(p4dp, addr);
return (pud_t *)__phys_to_kimg(pud_offset_phys(p4dp, addr));
}
#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d)))
/* use ONLY for statically allocated translation tables */
#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
#else
static inline bool pgtable_l4_enabled(void) { return false; }
#define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;})
#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;})
/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
#define pud_set_fixmap(addr) NULL
@ -814,6 +883,122 @@ static inline pud_t *p4d_pgtable(p4d_t p4d)
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
#if CONFIG_PGTABLE_LEVELS > 4
static __always_inline bool pgtable_l5_enabled(void)
{
if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT))
return vabits_actual == VA_BITS;
return alternative_has_cap_unlikely(ARM64_HAS_VA52);
}
static inline bool mm_p4d_folded(const struct mm_struct *mm)
{
return !pgtable_l5_enabled();
}
#define mm_p4d_folded mm_p4d_folded
#define p4d_ERROR(e) \
pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e))
#define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd))
#define pgd_bad(pgd) (pgtable_l5_enabled() && !(pgd_val(pgd) & 2))
#define pgd_present(pgd) (!pgd_none(pgd))
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
if (in_swapper_pgdir(pgdp)) {
set_swapper_pgd(pgdp, __pgd(pgd_val(pgd)));
return;
}
WRITE_ONCE(*pgdp, pgd);
dsb(ishst);
isb();
}
static inline void pgd_clear(pgd_t *pgdp)
{
if (pgtable_l5_enabled())
set_pgd(pgdp, __pgd(0));
}
static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
{
return __pgd_to_phys(pgd);
}
#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr)
{
return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr);
}
static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr)
{
BUG_ON(!pgtable_l5_enabled());
return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t);
}
static inline
p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long addr)
{
if (!pgtable_l5_enabled())
return pgd_to_folded_p4d(pgdp, addr);
return (p4d_t *)__va(pgd_page_paddr(pgd)) + p4d_index(addr);
}
#define p4d_offset_lockless p4d_offset_lockless
static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long addr)
{
return p4d_offset_lockless(pgdp, READ_ONCE(*pgdp), addr);
}
static inline p4d_t *p4d_set_fixmap(unsigned long addr)
{
if (!pgtable_l5_enabled())
return NULL;
return (p4d_t *)set_fixmap_offset(FIX_P4D, addr);
}
static inline p4d_t *p4d_set_fixmap_offset(pgd_t *pgdp, unsigned long addr)
{
if (!pgtable_l5_enabled())
return pgd_to_folded_p4d(pgdp, addr);
return p4d_set_fixmap(p4d_offset_phys(pgdp, addr));
}
static inline void p4d_clear_fixmap(void)
{
if (pgtable_l5_enabled())
clear_fixmap(FIX_P4D);
}
/* use ONLY for statically allocated translation tables */
static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr)
{
if (!pgtable_l5_enabled())
return pgd_to_folded_p4d(pgdp, addr);
return (p4d_t *)__phys_to_kimg(p4d_offset_phys(pgdp, addr));
}
#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
#else
static inline bool pgtable_l5_enabled(void) { return false; }
/* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */
#define p4d_set_fixmap(addr) NULL
#define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp)
#define p4d_clear_fixmap()
#define p4d_offset_kimg(dir,addr) ((p4d_t *)dir)
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e))

View File

@ -155,6 +155,8 @@ struct thread_struct {
struct {
unsigned long tp_value; /* TLS register */
unsigned long tp2_value;
u64 fpmr;
unsigned long pad;
struct user_fpsimd_state fpsimd_state;
} uw;
@ -253,6 +255,8 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
BUILD_BUG_ON(sizeof_field(struct thread_struct, uw) !=
sizeof_field(struct thread_struct, uw.tp_value) +
sizeof_field(struct thread_struct, uw.tp2_value) +
sizeof_field(struct thread_struct, uw.fpmr) +
sizeof_field(struct thread_struct, uw.pad) +
sizeof_field(struct thread_struct, uw.fpsimd_state));
*offset = offsetof(struct thread_struct, uw);

View File

@ -33,37 +33,11 @@
#include <asm/cpufeature.h>
#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
static inline bool should_patch_pac_into_scs(void)
{
u64 reg;
/*
* We only enable the shadow call stack dynamically if we are running
* on a system that does not implement PAC or BTI. PAC and SCS provide
* roughly the same level of protection, and BTI relies on the PACIASP
* instructions serving as landing pads, preventing us from patching
* those instructions into something else.
*/
reg = read_sysreg_s(SYS_ID_AA64ISAR1_EL1);
if (SYS_FIELD_GET(ID_AA64ISAR1_EL1, APA, reg) |
SYS_FIELD_GET(ID_AA64ISAR1_EL1, API, reg))
return false;
reg = read_sysreg_s(SYS_ID_AA64ISAR2_EL1);
if (SYS_FIELD_GET(ID_AA64ISAR2_EL1, APA3, reg))
return false;
if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) {
reg = read_sysreg_s(SYS_ID_AA64PFR1_EL1);
if (reg & (0xf << ID_AA64PFR1_EL1_BT_SHIFT))
return false;
}
return true;
}
static inline void dynamic_scs_init(void)
{
if (should_patch_pac_into_scs()) {
extern bool __pi_dynamic_scs_is_enabled;
if (__pi_dynamic_scs_is_enabled) {
pr_info("Enabling dynamic shadow call stack\n");
static_branch_enable(&dynamic_scs_enabled);
}
@ -72,8 +46,8 @@ static inline void dynamic_scs_init(void)
static inline void dynamic_scs_init(void) {}
#endif
int scs_patch(const u8 eh_frame[], int size);
asmlinkage void scs_patch_vmlinux(void);
int __pi_scs_patch(const u8 eh_frame[], int size);
asmlinkage void __pi_scs_patch_vmlinux(void);
#endif /* __ASSEMBLY __ */

View File

@ -7,9 +7,6 @@
#include <uapi/asm/setup.h>
void *get_early_fdt_ptr(void);
void early_fdt_map(u64 dt_phys);
/*
* These two variables are used in the head.S file.
*/

View File

@ -103,6 +103,9 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
{
struct ptdesc *ptdesc = virt_to_ptdesc(pudp);
if (!pgtable_l4_enabled())
return;
pagetable_pud_dtor(ptdesc);
tlb_remove_ptdesc(tlb, ptdesc);
}

View File

@ -107,5 +107,20 @@
#define HWCAP2_SVE_B16B16 (1UL << 45)
#define HWCAP2_LRCPC3 (1UL << 46)
#define HWCAP2_LSE128 (1UL << 47)
#define HWCAP2_FPMR (1UL << 48)
#define HWCAP2_LUT (1UL << 49)
#define HWCAP2_FAMINMAX (1UL << 50)
#define HWCAP2_F8CVT (1UL << 51)
#define HWCAP2_F8FMA (1UL << 52)
#define HWCAP2_F8DP4 (1UL << 53)
#define HWCAP2_F8DP2 (1UL << 54)
#define HWCAP2_F8E4M3 (1UL << 55)
#define HWCAP2_F8E5M2 (1UL << 56)
#define HWCAP2_SME_LUTV2 (1UL << 57)
#define HWCAP2_SME_F8F16 (1UL << 58)
#define HWCAP2_SME_F8F32 (1UL << 59)
#define HWCAP2_SME_SF8FMA (1UL << 60)
#define HWCAP2_SME_SF8DP4 (1UL << 61)
#define HWCAP2_SME_SF8DP2 (1UL << 62)
#endif /* _UAPI__ASM_HWCAP_H */

View File

@ -152,6 +152,14 @@ struct tpidr2_context {
__u64 tpidr2;
};
/* FPMR context */
#define FPMR_MAGIC 0x46504d52
struct fpmr_context {
struct _aarch64_ctx head;
__u64 fpmr;
};
#define ZA_MAGIC 0x54366345
struct za_context {

View File

@ -13,6 +13,17 @@
#define __SVE_VQ_BYTES 16 /* number of bytes per quadword */
/*
* Yes, __SVE_VQ_MAX is 512 QUADWORDS.
*
* To help ensure forward portability, this is much larger than the
* current maximum value defined by the SVE architecture. While arrays
* or static allocations can be sized based on this value, watch out!
* It will waste a surprisingly large amount of memory.
*
* Dynamic sizing based on the actual runtime vector length is likely to
* be preferable for most purposes.
*/
#define __SVE_VQ_MIN 1
#define __SVE_VQ_MAX 512

View File

@ -33,8 +33,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
syscall.o proton-pack.o idreg-override.o idle.o \
patching.o
syscall.o proton-pack.o idle.o patching.o pi/
obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
sys_compat.o
@ -57,7 +56,7 @@ obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \
@ -72,14 +71,6 @@ obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
obj-$(CONFIG_ARM64_MTE) += mte.o
obj-y += vdso-wrap.o
obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o
# We need to prevent the SCS patching code from patching itself. Using
# -mbranch-protection=none here to avoid the patchable PAC opcodes from being
# generated triggers an issue with full LTO on Clang, which stops emitting PAC
# instructions altogether. So disable LTO as well for the compilation unit.
CFLAGS_patch-scs.o += -mbranch-protection=none
CFLAGS_REMOVE_patch-scs.o += $(CC_FLAGS_LTO)
# Force dependency (vdso*-wrap.S includes vdso.so through incbin)
$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so

View File

@ -75,8 +75,8 @@ int main(void)
DEFINE(S_FP, offsetof(struct pt_regs, regs[29]));
DEFINE(S_LR, offsetof(struct pt_regs, regs[30]));
DEFINE(S_SP, offsetof(struct pt_regs, sp));
DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
DEFINE(S_PC, offsetof(struct pt_regs, pc));
DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1));
DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save));

View File

@ -220,6 +220,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_LUT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CLRBHB_SHIFT, 4, 0),
@ -234,6 +235,11 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64isar3[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FAMINMAX_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV2_SHIFT, 4, 0),
@ -267,6 +273,11 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64pfr2[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_FPMR_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, 0),
@ -294,6 +305,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_LUTv2_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
@ -306,6 +319,10 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F8F16_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F8F32_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
@ -316,6 +333,22 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8FMA_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP4_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP2_SHIFT, 1, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64fpfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8CVT_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8FMA_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP4_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP2_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E4M3_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E5M2_SHIFT, 1, 0),
ARM64_FTR_END,
};
@ -655,13 +688,15 @@ static const struct arm64_ftr_bits ftr_raz[] = {
#define ARM64_FTR_REG(id, table) \
__ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
struct arm64_ftr_override __ro_after_init id_aa64pfr0_override;
struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
struct arm64_ftr_override __ro_after_init id_aa64zfr0_override;
struct arm64_ftr_override __ro_after_init id_aa64smfr0_override;
struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
struct arm64_ftr_override id_aa64mmfr0_override;
struct arm64_ftr_override id_aa64mmfr1_override;
struct arm64_ftr_override id_aa64mmfr2_override;
struct arm64_ftr_override id_aa64pfr0_override;
struct arm64_ftr_override id_aa64pfr1_override;
struct arm64_ftr_override id_aa64zfr0_override;
struct arm64_ftr_override id_aa64smfr0_override;
struct arm64_ftr_override id_aa64isar1_override;
struct arm64_ftr_override id_aa64isar2_override;
struct arm64_ftr_override arm64_sw_feature_override;
@ -702,10 +737,12 @@ static const struct __ftr_reg_entry {
&id_aa64pfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
&id_aa64pfr1_override),
ARM64_FTR_REG(SYS_ID_AA64PFR2_EL1, ftr_id_aa64pfr2),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0,
&id_aa64zfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0,
&id_aa64smfr0_override),
ARM64_FTR_REG(SYS_ID_AA64FPFR0_EL1, ftr_id_aa64fpfr0),
/* Op1 = 0, CRn = 0, CRm = 5 */
ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@ -717,12 +754,15 @@ static const struct __ftr_reg_entry {
&id_aa64isar1_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2,
&id_aa64isar2_override),
ARM64_FTR_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3),
/* Op1 = 0, CRn = 0, CRm = 7 */
ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0,
&id_aa64mmfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1,
&id_aa64mmfr1_override),
ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2,
&id_aa64mmfr2_override),
ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3),
/* Op1 = 1, CRn = 0, CRm = 0 */
@ -1043,14 +1083,17 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
init_cpu_ftr_reg(SYS_ID_AA64ISAR2_EL1, info->reg_id_aa64isar2);
init_cpu_ftr_reg(SYS_ID_AA64ISAR3_EL1, info->reg_id_aa64isar3);
init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
init_cpu_ftr_reg(SYS_ID_AA64MMFR3_EL1, info->reg_id_aa64mmfr3);
init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
init_cpu_ftr_reg(SYS_ID_AA64PFR2_EL1, info->reg_id_aa64pfr2);
init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0);
init_cpu_ftr_reg(SYS_ID_AA64FPFR0_EL1, info->reg_id_aa64fpfr0);
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
init_32bit_cpu_features(&info->aarch32);
@ -1272,6 +1315,8 @@ void update_cpu_features(int cpu,
info->reg_id_aa64isar1, boot->reg_id_aa64isar1);
taint |= check_update_ftr_reg(SYS_ID_AA64ISAR2_EL1, cpu,
info->reg_id_aa64isar2, boot->reg_id_aa64isar2);
taint |= check_update_ftr_reg(SYS_ID_AA64ISAR3_EL1, cpu,
info->reg_id_aa64isar3, boot->reg_id_aa64isar3);
/*
* Differing PARange support is fine as long as all peripherals and
@ -1291,6 +1336,8 @@ void update_cpu_features(int cpu,
info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0);
taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu,
info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1);
taint |= check_update_ftr_reg(SYS_ID_AA64PFR2_EL1, cpu,
info->reg_id_aa64pfr2, boot->reg_id_aa64pfr2);
taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu,
info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0);
@ -1298,6 +1345,9 @@ void update_cpu_features(int cpu,
taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu,
info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0);
taint |= check_update_ftr_reg(SYS_ID_AA64FPFR0_EL1, cpu,
info->reg_id_aa64fpfr0, boot->reg_id_aa64fpfr0);
/* Probe vector lengths */
if (IS_ENABLED(CONFIG_ARM64_SVE) &&
id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
@ -1410,8 +1460,10 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
read_sysreg_case(SYS_ID_AA64PFR0_EL1);
read_sysreg_case(SYS_ID_AA64PFR1_EL1);
read_sysreg_case(SYS_ID_AA64PFR2_EL1);
read_sysreg_case(SYS_ID_AA64ZFR0_EL1);
read_sysreg_case(SYS_ID_AA64SMFR0_EL1);
read_sysreg_case(SYS_ID_AA64FPFR0_EL1);
read_sysreg_case(SYS_ID_AA64DFR0_EL1);
read_sysreg_case(SYS_ID_AA64DFR1_EL1);
read_sysreg_case(SYS_ID_AA64MMFR0_EL1);
@ -1421,6 +1473,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
read_sysreg_case(SYS_ID_AA64ISAR0_EL1);
read_sysreg_case(SYS_ID_AA64ISAR1_EL1);
read_sysreg_case(SYS_ID_AA64ISAR2_EL1);
read_sysreg_case(SYS_ID_AA64ISAR3_EL1);
read_sysreg_case(SYS_CNTFRQ_EL0);
read_sysreg_case(SYS_CTR_EL0);
@ -1620,46 +1673,6 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
return has_cpuid_feature(entry, scope);
}
/*
* This check is triggered during the early boot before the cpufeature
* is initialised. Checking the status on the local CPU allows the boot
* CPU to detect the need for non-global mappings and thus avoiding a
* pagetable re-write after all the CPUs are booted. This check will be
* anyway run on individual CPUs, allowing us to get the consistent
* state once the SMP CPUs are up and thus make the switch to non-global
* mappings if required.
*/
bool kaslr_requires_kpti(void)
{
if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
return false;
/*
* E0PD does a similar job to KPTI so can be used instead
* where available.
*/
if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
if (cpuid_feature_extract_unsigned_field(mmfr2,
ID_AA64MMFR2_EL1_E0PD_SHIFT))
return false;
}
/*
* Systems affected by Cavium erratum 24756 are incompatible
* with KPTI.
*/
if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
extern const struct midr_range cavium_erratum_27456_cpus[];
if (is_midr_in_range_list(read_cpuid_id(),
cavium_erratum_27456_cpus))
return false;
}
return kaslr_enabled();
}
static bool __meltdown_safe = true;
static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
@ -1712,7 +1725,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
}
/* Useful for KASLR robustness */
if (kaslr_requires_kpti()) {
if (kaslr_enabled() && kaslr_requires_kpti()) {
if (!__kpti_forced) {
str = "KASLR";
__kpti_forced = 1;
@ -1801,6 +1814,11 @@ static int __init __kpti_install_ng_mappings(void *__unused)
pgd_t *kpti_ng_temp_pgd;
u64 alloc = 0;
if (levels == 5 && !pgtable_l5_enabled())
levels = 4;
else if (levels == 4 && !pgtable_l4_enabled())
levels = 3;
remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
if (!cpu) {
@ -1814,9 +1832,9 @@ static int __init __kpti_install_ng_mappings(void *__unused)
//
// The physical pages are laid out as follows:
//
// +--------+-/-------+-/------ +-\\--------+
// : PTE[] : | PMD[] : | PUD[] : || PGD[] :
// +--------+-\-------+-\------ +-//--------+
// +--------+-/-------+-/------ +-/------ +-\\\--------+
// : PTE[] : | PMD[] : | PUD[] : | P4D[] : ||| PGD[] :
// +--------+-\-------+-\------ +-\------ +-///--------+
// ^
// The first page is mapped into this hierarchy at a PMD_SHIFT
// aligned virtual address, so that we can manipulate the PTE
@ -2042,14 +2060,7 @@ static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
static bool hvhe_possible(const struct arm64_cpu_capabilities *entry,
int __unused)
{
u64 val;
val = read_sysreg(id_aa64mmfr1_el1);
if (!cpuid_feature_extract_unsigned_field(val, ID_AA64MMFR1_EL1_VH_SHIFT))
return false;
val = arm64_sw_feature_override.val & arm64_sw_feature_override.mask;
return cpuid_feature_extract_unsigned_field(val, ARM64_SW_FEATURE_OVERRIDE_HVHE);
return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_HVHE);
}
#ifdef CONFIG_ARM64_PAN
@ -2739,6 +2750,32 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_lpa2,
},
{
.desc = "FPMR",
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.capability = ARM64_HAS_FPMR,
.matches = has_cpuid_feature,
.cpu_enable = cpu_enable_fpmr,
ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, FPMR, IMP)
},
#ifdef CONFIG_ARM64_VA_BITS_52
{
.capability = ARM64_HAS_VA52,
.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
.matches = has_cpuid_feature,
#ifdef CONFIG_ARM64_64K_PAGES
.desc = "52-bit Virtual Addressing (LVA)",
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, VARange, 52)
#else
.desc = "52-bit Virtual Addressing (LPA2)",
#ifdef CONFIG_ARM64_4K_PAGES
ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, TGRAN4, 52_BIT)
#else
ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, TGRAN16, 52_BIT)
#endif
#endif
},
#endif
{},
};
@ -2822,6 +2859,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, FP16, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
HWCAP_CAP(ID_AA64PFR0_EL1, DIT, IMP, CAP_HWCAP, KERNEL_HWCAP_DIT),
HWCAP_CAP(ID_AA64PFR2_EL1, FPMR, IMP, CAP_HWCAP, KERNEL_HWCAP_FPMR),
HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, IMP, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, DPB2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
HWCAP_CAP(ID_AA64ISAR1_EL1, JSCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
@ -2835,6 +2873,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_EBF16),
HWCAP_CAP(ID_AA64ISAR1_EL1, DGH, IMP, CAP_HWCAP, KERNEL_HWCAP_DGH),
HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM),
HWCAP_CAP(ID_AA64ISAR2_EL1, LUT, IMP, CAP_HWCAP, KERNEL_HWCAP_LUT),
HWCAP_CAP(ID_AA64ISAR3_EL1, FAMINMAX, IMP, CAP_HWCAP, KERNEL_HWCAP_FAMINMAX),
HWCAP_CAP(ID_AA64MMFR2_EL1, AT, IMP, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
HWCAP_CAP(ID_AA64PFR0_EL1, SVE, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE),
@ -2875,6 +2915,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
#ifdef CONFIG_ARM64_SME
HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2),
HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
@ -2882,12 +2923,23 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16),
HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32),
HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA),
HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4),
HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2),
#endif /* CONFIG_ARM64_SME */
HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP4),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2),
{},
};
@ -3052,13 +3104,9 @@ static void __init enable_cpu_capabilities(u16 scope_mask)
boot_scope = !!(scope_mask & SCOPE_BOOT_CPU);
for (i = 0; i < ARM64_NCAPS; i++) {
unsigned int num;
caps = cpucap_ptrs[i];
if (!caps || !(caps->type & scope_mask))
continue;
num = caps->capability;
if (!cpus_have_cap(num))
if (!caps || !(caps->type & scope_mask) ||
!cpus_have_cap(caps->capability))
continue;
if (boot_scope && caps->cpu_enable)

View File

@ -128,6 +128,21 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SVE_B16B16] = "sveb16b16",
[KERNEL_HWCAP_LRCPC3] = "lrcpc3",
[KERNEL_HWCAP_LSE128] = "lse128",
[KERNEL_HWCAP_FPMR] = "fpmr",
[KERNEL_HWCAP_LUT] = "lut",
[KERNEL_HWCAP_FAMINMAX] = "faminmax",
[KERNEL_HWCAP_F8CVT] = "f8cvt",
[KERNEL_HWCAP_F8FMA] = "f8fma",
[KERNEL_HWCAP_F8DP4] = "f8dp4",
[KERNEL_HWCAP_F8DP2] = "f8dp2",
[KERNEL_HWCAP_F8E4M3] = "f8e4m3",
[KERNEL_HWCAP_F8E5M2] = "f8e5m2",
[KERNEL_HWCAP_SME_LUTV2] = "smelutv2",
[KERNEL_HWCAP_SME_F8F16] = "smef8f16",
[KERNEL_HWCAP_SME_F8F32] = "smef8f32",
[KERNEL_HWCAP_SME_SF8FMA] = "smesf8fma",
[KERNEL_HWCAP_SME_SF8DP4] = "smesf8dp4",
[KERNEL_HWCAP_SME_SF8DP2] = "smesf8dp2",
};
#ifdef CONFIG_COMPAT
@ -443,14 +458,17 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1);
info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
info->reg_id_aa64isar2 = read_cpuid(ID_AA64ISAR2_EL1);
info->reg_id_aa64isar3 = read_cpuid(ID_AA64ISAR3_EL1);
info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
info->reg_id_aa64mmfr3 = read_cpuid(ID_AA64MMFR3_EL1);
info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
info->reg_id_aa64pfr2 = read_cpuid(ID_AA64PFR2_EL1);
info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1);
info->reg_id_aa64fpfr0 = read_cpuid(ID_AA64FPFR0_EL1);
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
info->reg_gmid = read_cpuid(GMID_EL1);

View File

@ -10,6 +10,7 @@
#include <linux/linkage.h>
#include <linux/lockdep.h>
#include <linux/ptrace.h>
#include <linux/resume_user_mode.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/thread_info.h>
@ -126,16 +127,49 @@ static __always_inline void __exit_to_user_mode(void)
lockdep_hardirqs_on(CALLER_ADDR0);
}
static void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
{
do {
local_irq_enable();
if (thread_flags & _TIF_NEED_RESCHED)
schedule();
if (thread_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
clear_thread_flag(TIF_MTE_ASYNC_FAULT);
send_sig_fault(SIGSEGV, SEGV_MTEAERR,
(void __user *)NULL, current);
}
if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
do_signal(regs);
if (thread_flags & _TIF_NOTIFY_RESUME)
resume_user_mode_work(regs);
if (thread_flags & _TIF_FOREIGN_FPSTATE)
fpsimd_restore_current_state();
local_irq_disable();
thread_flags = read_thread_flags();
} while (thread_flags & _TIF_WORK_MASK);
}
static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
{
unsigned long flags;
local_daif_mask();
local_irq_disable();
flags = read_thread_flags();
if (unlikely(flags & _TIF_WORK_MASK))
do_notify_resume(regs, flags);
local_daif_mask();
lockdep_sys_exit();
}

View File

@ -359,6 +359,9 @@ static void task_fpsimd_load(void)
WARN_ON(preemptible());
WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
if (system_supports_fpmr())
write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);
if (system_supports_sve() || system_supports_sme()) {
switch (current->thread.fp_type) {
case FP_STATE_FPSIMD:
@ -446,6 +449,9 @@ static void fpsimd_save_user_state(void)
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return;
if (system_supports_fpmr())
*(last->fpmr) = read_sysreg_s(SYS_FPMR);
/*
* If a task is in a syscall the ABI allows us to only
* preserve the state shared with FPSIMD so don't bother
@ -688,6 +694,12 @@ static void sve_to_fpsimd(struct task_struct *task)
}
}
void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,
SYS_SCTLR_EL1);
}
#ifdef CONFIG_ARM64_SVE
/*
* Call __sve_free() directly only if you know task can't be scheduled
@ -1134,6 +1146,8 @@ void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
isb();
write_sysreg_s(0, SYS_ZCR_EL1);
}
void __init sve_setup(void)
@ -1245,6 +1259,9 @@ void cpu_enable_sme(const struct arm64_cpu_capabilities *__always_unused p)
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
isb();
/* Ensure all bits in SMCR are set to known values */
write_sysreg_s(0, SYS_SMCR_EL1);
/* Allow EL0 to access TPIDR2 */
write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);
isb();
@ -1696,6 +1713,7 @@ static void fpsimd_bind_task_to_cpu(void)
last->sve_vl = task_get_sve_vl(current);
last->sme_vl = task_get_sme_vl(current);
last->svcr = &current->thread.svcr;
last->fpmr = &current->thread.uw.fpmr;
last->fp_type = &current->thread.fp_type;
last->to_save = FP_STATE_CURRENT;
current->thread.fpsimd_cpu = smp_processor_id();

View File

@ -80,28 +80,42 @@
* x19 primary_entry() .. start_kernel() whether we entered with the MMU on
* x20 primary_entry() .. __primary_switch() CPU boot mode
* x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
* x22 create_idmap() .. start_kernel() ID map VA of the DT blob
* x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset
* x24 __primary_switch() linear map KASLR seed
* x25 primary_entry() .. start_kernel() supported VA size
* x28 create_idmap() callee preserved temp register
*/
SYM_CODE_START(primary_entry)
bl record_mmu_state
bl preserve_boot_args
bl create_idmap
adrp x1, early_init_stack
mov sp, x1
mov x29, xzr
adrp x0, init_idmap_pg_dir
mov x1, xzr
bl __pi_create_init_idmap
/*
* If the page tables have been populated with non-cacheable
* accesses (MMU disabled), invalidate those tables again to
* remove any speculatively loaded cache lines.
*/
cbnz x19, 0f
dmb sy
mov x1, x0 // end of used region
adrp x0, init_idmap_pg_dir
adr_l x2, dcache_inval_poc
blr x2
b 1f
/*
* If we entered with the MMU and caches on, clean the ID mapped part
* of the primary boot code to the PoC so we can safely execute it with
* the MMU off.
*/
cbz x19, 0f
adrp x0, __idmap_text_start
0: adrp x0, __idmap_text_start
adr_l x1, __idmap_text_end
adr_l x2, dcache_clean_poc
blr x2
0: mov x0, x19
1: mov x0, x19
bl init_kernel_el // w0=cpu_boot_mode
mov x20, x0
@ -111,14 +125,6 @@ SYM_CODE_START(primary_entry)
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
#if VA_BITS > 48
mrs_s x0, SYS_ID_AA64MMFR2_EL1
tst x0, ID_AA64MMFR2_EL1_VARange_MASK
mov x0, #VA_BITS
mov x25, #VA_BITS_MIN
csel x25, x25, x0, eq
mov x0, x25
#endif
bl __cpu_setup // initialise processor
b __primary_switch
SYM_CODE_END(primary_entry)
@ -177,267 +183,6 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
ret
SYM_CODE_END(preserve_boot_args)
SYM_FUNC_START_LOCAL(clear_page_tables)
/*
* Clear the init page tables.
*/
adrp x0, init_pg_dir
adrp x1, init_pg_end
sub x2, x1, x0
mov x1, xzr
b __pi_memset // tail call
SYM_FUNC_END(clear_page_tables)
/*
* Macro to populate page table entries, these entries can be pointers to the next level
* or last level entries pointing to physical memory.
*
* tbl: page table address
* rtbl: pointer to page table or physical memory
* index: start index to write
* eindex: end index to write - [index, eindex] written to
* flags: flags for pagetable entry to or in
* inc: increment to rtbl between each entry
* tmp1: temporary variable
*
* Preserves: tbl, eindex, flags, inc
* Corrupts: index, tmp1
* Returns: rtbl
*/
.macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1
.Lpe\@: phys_to_pte \tmp1, \rtbl
orr \tmp1, \tmp1, \flags // tmp1 = table entry
str \tmp1, [\tbl, \index, lsl #3]
add \rtbl, \rtbl, \inc // rtbl = pa next level
add \index, \index, #1
cmp \index, \eindex
b.ls .Lpe\@
.endm
/*
* Compute indices of table entries from virtual address range. If multiple entries
* were needed in the previous page table level then the next page table level is assumed
* to be composed of multiple pages. (This effectively scales the end index).
*
* vstart: virtual address of start of range
* vend: virtual address of end of range - we map [vstart, vend]
* shift: shift used to transform virtual address into index
* order: #imm 2log(number of entries in page table)
* istart: index in table corresponding to vstart
* iend: index in table corresponding to vend
* count: On entry: how many extra entries were required in previous level, scales
* our end index.
* On exit: returns how many extra entries required for next page table level
*
* Preserves: vstart, vend
* Returns: istart, iend, count
*/
.macro compute_indices, vstart, vend, shift, order, istart, iend, count
ubfx \istart, \vstart, \shift, \order
ubfx \iend, \vend, \shift, \order
add \iend, \iend, \count, lsl \order
sub \count, \iend, \istart
.endm
/*
* Map memory for specified virtual address range. Each level of page table needed supports
* multiple entries. If a level requires n entries the next page table level is assumed to be
* formed from n pages.
*
* tbl: location of page table
* rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE)
* vstart: virtual address of start of range
* vend: virtual address of end of range - we map [vstart, vend - 1]
* flags: flags to use to map last level entries
* phys: physical address corresponding to vstart - physical memory is contiguous
* order: #imm 2log(number of entries in PGD table)
*
* If extra_shift is set, an extra level will be populated if the end address does
* not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range.
*
* Temporaries: istart, iend, tmp, count, sv - these need to be different registers
* Preserves: vstart, flags
* Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv
*/
.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift
sub \vend, \vend, #1
add \rtbl, \tbl, #PAGE_SIZE
mov \count, #0
.ifnb \extra_shift
tst \vend, #~((1 << (\extra_shift)) - 1)
b.eq .L_\@
compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count
mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
.endif
.L_\@:
compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count
mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
#if SWAPPER_PGTABLE_LEVELS > 3
compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
#endif
#if SWAPPER_PGTABLE_LEVELS > 2
compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
#endif
compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1
populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
.endm
/*
* Remap a subregion created with the map_memory macro with modified attributes
* or output address. The entire remapped region must have been covered in the
* invocation of map_memory.
*
* x0: last level table address (returned in first argument to map_memory)
* x1: start VA of the existing mapping
* x2: start VA of the region to update
* x3: end VA of the region to update (exclusive)
* x4: start PA associated with the region to update
* x5: attributes to set on the updated region
* x6: order of the last level mappings
*/
SYM_FUNC_START_LOCAL(remap_region)
sub x3, x3, #1 // make end inclusive
// Get the index offset for the start of the last level table
lsr x1, x1, x6
bfi x1, xzr, #0, #PAGE_SHIFT - 3
// Derive the start and end indexes into the last level table
// associated with the provided region
lsr x2, x2, x6
lsr x3, x3, x6
sub x2, x2, x1
sub x3, x3, x1
mov x1, #1
lsl x6, x1, x6 // block size at this level
populate_entries x0, x4, x2, x3, x5, x6, x7
ret
SYM_FUNC_END(remap_region)
SYM_FUNC_START_LOCAL(create_idmap)
mov x28, lr
/*
* The ID map carries a 1:1 mapping of the physical address range
* covered by the loaded image, which could be anywhere in DRAM. This
* means that the required size of the VA (== PA) space is decided at
* boot time, and could be more than the configured size of the VA
* space for ordinary kernel and user space mappings.
*
* There are three cases to consider here:
* - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover
* the placement of the image. In this case, we configure one extra
* level of translation on the fly for the ID map only. (This case
* also covers 42-bit VA/52-bit PA on 64k pages).
*
* - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can
* only happen when using 64k pages, in which case we need to extend
* the root level table rather than add a level. Note that we can
* treat this case as 'always extended' as long as we take care not
* to program an unsupported T0SZ value into the TCR register.
*
* - Combinations that would require two additional levels of
* translation are not supported, e.g., VA_BITS==36 on 16k pages, or
* VA_BITS==39/4k pages with 5-level paging, where the input address
* requires more than 47 or 48 bits, respectively.
*/
#if (VA_BITS < 48)
#define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT)
#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
/*
* If VA_BITS < 48, we have to configure an additional table level.
* First, we have to verify our assumption that the current value of
* VA_BITS was chosen such that all translation levels are fully
* utilised, and that lowering T0SZ will always result in an additional
* translation level to be configured.
*/
#if VA_BITS != EXTRA_SHIFT
#error "Mismatch between VA_BITS and page size/number of translation levels"
#endif
#else
#define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT)
#define EXTRA_SHIFT
/*
* If VA_BITS == 48, we don't have to configure an additional
* translation level, but the top-level table has more entries.
*/
#endif
adrp x0, init_idmap_pg_dir
adrp x3, _text
adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
mov_q x7, SWAPPER_RX_MMUFLAGS
map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT
/* Remap the kernel page tables r/w in the ID map */
adrp x1, _text
adrp x2, init_pg_dir
adrp x3, init_pg_end
bic x4, x2, #SWAPPER_BLOCK_SIZE - 1
mov_q x5, SWAPPER_RW_MMUFLAGS
mov x6, #SWAPPER_BLOCK_SHIFT
bl remap_region
/* Remap the FDT after the kernel image */
adrp x1, _text
adrp x22, _end + SWAPPER_BLOCK_SIZE
bic x2, x22, #SWAPPER_BLOCK_SIZE - 1
bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address
add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
bic x4, x21, #SWAPPER_BLOCK_SIZE - 1
mov_q x5, SWAPPER_RW_MMUFLAGS
mov x6, #SWAPPER_BLOCK_SHIFT
bl remap_region
/*
* Since the page tables have been populated with non-cacheable
* accesses (MMU disabled), invalidate those tables again to
* remove any speculatively loaded cache lines.
*/
cbnz x19, 0f // skip cache invalidation if MMU is on
dmb sy
adrp x0, init_idmap_pg_dir
adrp x1, init_idmap_pg_end
bl dcache_inval_poc
0: ret x28
SYM_FUNC_END(create_idmap)
SYM_FUNC_START_LOCAL(create_kernel_mapping)
adrp x0, init_pg_dir
mov_q x5, KIMAGE_VADDR // compile time __va(_text)
#ifdef CONFIG_RELOCATABLE
add x5, x5, x23 // add KASLR displacement
#endif
adrp x6, _end // runtime __pa(_end)
adrp x3, _text // runtime __pa(_text)
sub x6, x6, x3 // _end - _text
add x6, x6, x5 // runtime __va(_end)
mov_q x7, SWAPPER_RW_MMUFLAGS
map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
dsb ishst // sync with page table walker
ret
SYM_FUNC_END(create_kernel_mapping)
/*
* Initialize CPU registers with task-specific and cpu-specific context.
*
@ -489,33 +234,8 @@ SYM_FUNC_START_LOCAL(__primary_switched)
mov x0, x20
bl set_cpu_boot_mode_flag
// Clear BSS
adr_l x0, __bss_start
mov x1, xzr
adr_l x2, __bss_stop
sub x2, x2, x0
bl __pi_memset
dsb ishst // Make zero page visible to PTW
#if VA_BITS > 48
adr_l x8, vabits_actual // Set this early so KASAN early init
str x25, [x8] // ... observes the correct value
dc civac, x8 // Make visible to booting secondaries
#endif
#ifdef CONFIG_RANDOMIZE_BASE
adrp x5, memstart_offset_seed // Save KASLR linear map seed
strh w24, [x5, :lo12:memstart_offset_seed]
#endif
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
bl kasan_early_init
#endif
mov x0, x21 // pass FDT address in x0
bl early_fdt_map // Try mapping the FDT early
mov x0, x20 // pass the full boot status
bl init_feature_override // Parse cpu feature overrides
#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
bl scs_patch_vmlinux
#endif
mov x0, x20
bl finalise_el2 // Prefer VHE if possible
@ -643,10 +363,13 @@ SYM_FUNC_START_LOCAL(secondary_startup)
* Common entry point for secondary CPUs.
*/
mov x20, x0 // preserve boot mode
#ifdef CONFIG_ARM64_VA_BITS_52
alternative_if ARM64_HAS_VA52
bl __cpu_secondary_check52bitva
#if VA_BITS > 48
ldr_l x0, vabits_actual
alternative_else_nop_endif
#endif
bl __cpu_setup // initialise processor
adrp x1, swapper_pg_dir
adrp x2, idmap_pg_dir
@ -749,15 +472,18 @@ SYM_FUNC_START(__enable_mmu)
ret
SYM_FUNC_END(__enable_mmu)
#ifdef CONFIG_ARM64_VA_BITS_52
SYM_FUNC_START(__cpu_secondary_check52bitva)
#if VA_BITS > 48
ldr_l x0, vabits_actual
cmp x0, #52
b.ne 2f
#ifndef CONFIG_ARM64_LPA2
mrs_s x0, SYS_ID_AA64MMFR2_EL1
and x0, x0, ID_AA64MMFR2_EL1_VARange_MASK
cbnz x0, 2f
#else
mrs x0, id_aa64mmfr0_el1
sbfx x0, x0, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4
cmp x0, #ID_AA64MMFR0_EL1_TGRAN_LPA2
b.ge 2f
#endif
update_early_cpu_boot_status \
CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_52_BIT_VA, x0, x1
@ -765,9 +491,9 @@ SYM_FUNC_START(__cpu_secondary_check52bitva)
wfi
b 1b
#endif
2: ret
SYM_FUNC_END(__cpu_secondary_check52bitva)
#endif
SYM_FUNC_START_LOCAL(__no_granule_support)
/* Indicate that this CPU can't boot and is stuck in the kernel */
@ -779,123 +505,18 @@ SYM_FUNC_START_LOCAL(__no_granule_support)
b 1b
SYM_FUNC_END(__no_granule_support)
#ifdef CONFIG_RELOCATABLE
SYM_FUNC_START_LOCAL(__relocate_kernel)
/*
* Iterate over each entry in the relocation table, and apply the
* relocations in place.
*/
adr_l x9, __rela_start
adr_l x10, __rela_end
mov_q x11, KIMAGE_VADDR // default virtual offset
add x11, x11, x23 // actual virtual offset
0: cmp x9, x10
b.hs 1f
ldp x12, x13, [x9], #24
ldr x14, [x9, #-8]
cmp w13, #R_AARCH64_RELATIVE
b.ne 0b
add x14, x14, x23 // relocate
str x14, [x12, x23]
b 0b
1:
#ifdef CONFIG_RELR
/*
* Apply RELR relocations.
*
* RELR is a compressed format for storing relative relocations. The
* encoded sequence of entries looks like:
* [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
*
* i.e. start with an address, followed by any number of bitmaps. The
* address entry encodes 1 relocation. The subsequent bitmap entries
* encode up to 63 relocations each, at subsequent offsets following
* the last address entry.
*
* The bitmap entries must have 1 in the least significant bit. The
* assumption here is that an address cannot have 1 in lsb. Odd
* addresses are not supported. Any odd addresses are stored in the RELA
* section, which is handled above.
*
* Excluding the least significant bit in the bitmap, each non-zero
* bit in the bitmap represents a relocation to be applied to
* a corresponding machine word that follows the base address
* word. The second least significant bit represents the machine
* word immediately following the initial address, and each bit
* that follows represents the next word, in linear order. As such,
* a single bitmap can encode up to 63 relocations in a 64-bit object.
*
* In this implementation we store the address of the next RELR table
* entry in x9, the address being relocated by the current address or
* bitmap entry in x13 and the address being relocated by the current
* bit in x14.
*/
adr_l x9, __relr_start
adr_l x10, __relr_end
2: cmp x9, x10
b.hs 7f
ldr x11, [x9], #8
tbnz x11, #0, 3f // branch to handle bitmaps
add x13, x11, x23
ldr x12, [x13] // relocate address entry
add x12, x12, x23
str x12, [x13], #8 // adjust to start of bitmap
b 2b
3: mov x14, x13
4: lsr x11, x11, #1
cbz x11, 6f
tbz x11, #0, 5f // skip bit if not set
ldr x12, [x14] // relocate bit
add x12, x12, x23
str x12, [x14]
5: add x14, x14, #8 // move to next bit's address
b 4b
6: /*
* Move to the next bitmap's address. 8 is the word size, and 63 is the
* number of significant bits in a bitmap entry.
*/
add x13, x13, #(8 * 63)
b 2b
7:
#endif
ret
SYM_FUNC_END(__relocate_kernel)
#endif
SYM_FUNC_START_LOCAL(__primary_switch)
adrp x1, reserved_pg_dir
adrp x2, init_idmap_pg_dir
bl __enable_mmu
#ifdef CONFIG_RELOCATABLE
adrp x23, KERNEL_START
and x23, x23, MIN_KIMG_ALIGN - 1
#ifdef CONFIG_RANDOMIZE_BASE
mov x0, x22
adrp x1, init_pg_end
adrp x1, early_init_stack
mov sp, x1
mov x29, xzr
bl __pi_kaslr_early_init
and x24, x0, #SZ_2M - 1 // capture memstart offset seed
bic x0, x0, #SZ_2M - 1
orr x23, x23, x0 // record kernel offset
#endif
#endif
bl clear_page_tables
bl create_kernel_mapping
mov x0, x20 // pass the full boot status
mov x1, x21 // pass the FDT
bl __pi_early_map_kernel // Map and relocate the kernel
adrp x1, init_pg_dir
load_ttbr1 x1, x1, x2
#ifdef CONFIG_RELOCATABLE
bl __relocate_kernel
#endif
ldr x8, =__primary_switched
adrp x0, KERNEL_START // __pa(KERNEL_START)
br x8

View File

@ -21,6 +21,7 @@
#include <asm/current.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
#include <asm/hw_breakpoint.h>
#include <asm/traps.h>
#include <asm/cputype.h>
@ -779,7 +780,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
* Check that the access type matches.
* 0 => load, otherwise => store
*/
access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W :
access = (esr & ESR_ELx_WNR) ? HW_BREAKPOINT_W :
HW_BREAKPOINT_R;
if (!(access & hw_breakpoint_type(wp)))
continue;

View File

@ -36,6 +36,41 @@ PROVIDE(__pi___memcpy = __pi_memcpy);
PROVIDE(__pi___memmove = __pi_memmove);
PROVIDE(__pi___memset = __pi_memset);
PROVIDE(__pi_id_aa64isar1_override = id_aa64isar1_override);
PROVIDE(__pi_id_aa64isar2_override = id_aa64isar2_override);
PROVIDE(__pi_id_aa64mmfr0_override = id_aa64mmfr0_override);
PROVIDE(__pi_id_aa64mmfr1_override = id_aa64mmfr1_override);
PROVIDE(__pi_id_aa64mmfr2_override = id_aa64mmfr2_override);
PROVIDE(__pi_id_aa64pfr0_override = id_aa64pfr0_override);
PROVIDE(__pi_id_aa64pfr1_override = id_aa64pfr1_override);
PROVIDE(__pi_id_aa64smfr0_override = id_aa64smfr0_override);
PROVIDE(__pi_id_aa64zfr0_override = id_aa64zfr0_override);
PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override);
PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings);
#ifdef CONFIG_CAVIUM_ERRATUM_27456
PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus);
#endif
PROVIDE(__pi__ctype = _ctype);
PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed);
PROVIDE(__pi_init_idmap_pg_dir = init_idmap_pg_dir);
PROVIDE(__pi_init_idmap_pg_end = init_idmap_pg_end);
PROVIDE(__pi_init_pg_dir = init_pg_dir);
PROVIDE(__pi_init_pg_end = init_pg_end);
PROVIDE(__pi_swapper_pg_dir = swapper_pg_dir);
PROVIDE(__pi__text = _text);
PROVIDE(__pi__stext = _stext);
PROVIDE(__pi__etext = _etext);
PROVIDE(__pi___start_rodata = __start_rodata);
PROVIDE(__pi___inittext_begin = __inittext_begin);
PROVIDE(__pi___inittext_end = __inittext_end);
PROVIDE(__pi___initdata_begin = __initdata_begin);
PROVIDE(__pi___initdata_end = __initdata_end);
PROVIDE(__pi__data = _data);
PROVIDE(__pi___bss_start = __bss_start);
PROVIDE(__pi__end = _end);
#ifdef CONFIG_KVM
/*

View File

@ -16,9 +16,7 @@ bool __ro_after_init __kaslr_is_enabled = false;
void __init kaslr_init(void)
{
if (cpuid_feature_extract_unsigned_field(arm64_sw_feature_override.val &
arm64_sw_feature_override.mask,
ARM64_SW_FEATURE_OVERRIDE_NOKASLR)) {
if (kaslr_disabled_cmdline()) {
pr_info("KASLR disabled on command line\n");
return;
}

View File

@ -595,7 +595,7 @@ int module_finalize(const Elf_Ehdr *hdr,
if (scs_is_dynamic()) {
s = find_section(hdr, sechdrs, ".init.eh_frame");
if (s)
scs_patch((void *)s->sh_addr, s->sh_size);
__pi_scs_patch((void *)s->sh_addr, s->sh_size);
}
return module_init_ftrace_plt(hdr, sechdrs, me);

3
arch/arm64/kernel/pi/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
relacheck

View File

@ -11,6 +11,9 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
-fno-asynchronous-unwind-tables -fno-unwind-tables \
$(call cc-option,-fno-addrsig)
# this code may run with the MMU off so disable unaligned accesses
CFLAGS_map_range.o += -mstrict-align
# remove SCS flags from all objects in this directory
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
# disable LTO
@ -22,14 +25,26 @@ KCSAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
hostprogs := relacheck
quiet_cmd_piobjcopy = $(quiet_cmd_objcopy)
cmd_piobjcopy = $(cmd_objcopy) && $(obj)/relacheck $(@) $(<)
$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
--remove-section=.note.gnu.property \
--prefix-alloc-sections=.init
$(obj)/%.pi.o: $(obj)/%.o FORCE
$(call if_changed,objcopy)
--remove-section=.note.gnu.property
$(obj)/%.pi.o: $(obj)/%.o $(obj)/relacheck FORCE
$(call if_changed,piobjcopy)
# ensure that all the lib- code ends up as __init code and data
$(obj)/lib-%.pi.o: OBJCOPYFLAGS += --prefix-alloc-sections=.init
$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
$(call if_changed_rule,cc_o_c)
obj-y := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
obj-y := idreg-override.pi.o \
map_kernel.pi.o map_range.pi.o \
lib-fdt.pi.o lib-fdt_ro.pi.o
obj-$(CONFIG_RELOCATABLE) += relocate.pi.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_early.pi.o
obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.pi.o
extra-y := $(patsubst %.pi.o,%.o,$(obj-y))

View File

@ -14,6 +14,8 @@
#include <asm/cpufeature.h>
#include <asm/setup.h>
#include "pi.h"
#define FTR_DESC_NAME_LEN 20
#define FTR_DESC_FIELD_LEN 10
#define FTR_ALIAS_NAME_LEN 30
@ -21,15 +23,6 @@
static u64 __boot_status __initdata;
// temporary __prel64 related definitions
// to be removed when this code is moved under pi/
#define __prel64_initconst __initconst
#define PREL64(type, name) union { type *name; }
#define prel64_pointer(__d) (__d)
typedef bool filter_t(u64 val);
struct ftr_set_desc {
@ -66,6 +59,35 @@ static const struct ftr_set_desc mmfr1 __prel64_initconst = {
},
};
static bool __init mmfr2_varange_filter(u64 val)
{
int __maybe_unused feat;
if (val)
return false;
#ifdef CONFIG_ARM64_LPA2
feat = cpuid_feature_extract_signed_field(read_sysreg(id_aa64mmfr0_el1),
ID_AA64MMFR0_EL1_TGRAN_SHIFT);
if (feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2) {
id_aa64mmfr0_override.val |=
(ID_AA64MMFR0_EL1_TGRAN_LPA2 - 1) << ID_AA64MMFR0_EL1_TGRAN_SHIFT;
id_aa64mmfr0_override.mask |= 0xfU << ID_AA64MMFR0_EL1_TGRAN_SHIFT;
}
#endif
return true;
}
static const struct ftr_set_desc mmfr2 __prel64_initconst = {
.name = "id_aa64mmfr2",
.override = &id_aa64mmfr2_override,
.fields = {
FIELD("varange", ID_AA64MMFR2_EL1_VARange_SHIFT, mmfr2_varange_filter),
{}
},
};
static bool __init pfr0_sve_filter(u64 val)
{
/*
@ -166,6 +188,7 @@ static const struct ftr_set_desc sw_features __prel64_initconst = {
.fields = {
FIELD("nokaslr", ARM64_SW_FEATURE_OVERRIDE_NOKASLR, NULL),
FIELD("hvhe", ARM64_SW_FEATURE_OVERRIDE_HVHE, hvhe_filter),
FIELD("rodataoff", ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF, NULL),
{}
},
};
@ -173,6 +196,7 @@ static const struct ftr_set_desc sw_features __prel64_initconst = {
static const
PREL64(const struct ftr_set_desc, reg) regs[] __prel64_initconst = {
{ &mmfr1 },
{ &mmfr2 },
{ &pfr0 },
{ &pfr1 },
{ &isar1 },
@ -197,6 +221,8 @@ static const struct {
{ "arm64.nomops", "id_aa64isar2.mops=0" },
{ "arm64.nomte", "id_aa64pfr1.mte=0" },
{ "nokaslr", "arm64_sw.nokaslr=1" },
{ "rodata=off", "arm64_sw.rodataoff=1" },
{ "arm64.nolva", "id_aa64mmfr2.varange=0" },
};
static int __init parse_hexdigit(const char *p, u64 *v)
@ -313,42 +339,35 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
} while (1);
}
static __init const u8 *get_bootargs_cmdline(void)
static __init const u8 *get_bootargs_cmdline(const void *fdt, int node)
{
static char const bootargs[] __initconst = "bootargs";
const u8 *prop;
void *fdt;
int node;
fdt = get_early_fdt_ptr();
if (!fdt)
return NULL;
node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
return NULL;
prop = fdt_getprop(fdt, node, "bootargs", NULL);
prop = fdt_getprop(fdt, node, bootargs, NULL);
if (!prop)
return NULL;
return strlen(prop) ? prop : NULL;
}
static __init void parse_cmdline(void)
static __init void parse_cmdline(const void *fdt, int chosen)
{
const u8 *prop = get_bootargs_cmdline();
static char const cmdline[] __initconst = CONFIG_CMDLINE;
const u8 *prop = get_bootargs_cmdline(fdt, chosen);
if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop)
__parse_cmdline(CONFIG_CMDLINE, true);
__parse_cmdline(cmdline, true);
if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop)
__parse_cmdline(prop, true);
}
/* Keep checkers quiet */
void init_feature_override(u64 boot_status);
asmlinkage void __init init_feature_override(u64 boot_status)
void __init init_feature_override(u64 boot_status, const void *fdt,
int chosen)
{
struct arm64_ftr_override *override;
const struct ftr_set_desc *reg;
@ -364,7 +383,7 @@ asmlinkage void __init init_feature_override(u64 boot_status)
__boot_status = boot_status;
parse_cmdline();
parse_cmdline(fdt, chosen);
for (i = 0; i < ARRAY_SIZE(regs); i++) {
reg = prel64_pointer(regs[i].reg);
@ -373,3 +392,10 @@ asmlinkage void __init init_feature_override(u64 boot_status)
(unsigned long)(override + 1));
}
}
char * __init skip_spaces(const char *str)
{
while (isspace(*str))
++str;
return (char *)str;
}

View File

@ -14,69 +14,23 @@
#include <asm/archrandom.h>
#include <asm/memory.h>
#include <asm/pgtable.h>
/* taken from lib/string.c */
static char *__strstr(const char *s1, const char *s2)
#include "pi.h"
extern u16 memstart_offset_seed;
static u64 __init get_kaslr_seed(void *fdt, int node)
{
size_t l1, l2;
l2 = strlen(s2);
if (!l2)
return (char *)s1;
l1 = strlen(s1);
while (l1 >= l2) {
l1--;
if (!memcmp(s1, s2, l2))
return (char *)s1;
s1++;
}
return NULL;
}
static bool cmdline_contains_nokaslr(const u8 *cmdline)
{
const u8 *str;
str = __strstr(cmdline, "nokaslr");
return str == cmdline || (str > cmdline && *(str - 1) == ' ');
}
static bool is_kaslr_disabled_cmdline(void *fdt)
{
if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
int node;
const u8 *prop;
node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
goto out;
prop = fdt_getprop(fdt, node, "bootargs", NULL);
if (!prop)
goto out;
if (cmdline_contains_nokaslr(prop))
return true;
if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
goto out;
return false;
}
out:
return cmdline_contains_nokaslr(CONFIG_CMDLINE);
}
static u64 get_kaslr_seed(void *fdt)
{
int node, len;
static char const seed_str[] __initconst = "kaslr-seed";
fdt64_t *prop;
u64 ret;
int len;
node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
return 0;
prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
prop = fdt_getprop_w(fdt, node, seed_str, &len);
if (!prop || len != sizeof(u64))
return 0;
@ -85,26 +39,28 @@ static u64 get_kaslr_seed(void *fdt)
return ret;
}
asmlinkage u64 kaslr_early_init(void *fdt)
u64 __init kaslr_early_init(void *fdt, int chosen)
{
u64 seed;
u64 seed, range;
if (is_kaslr_disabled_cmdline(fdt))
if (kaslr_disabled_cmdline())
return 0;
seed = get_kaslr_seed(fdt);
seed = get_kaslr_seed(fdt, chosen);
if (!seed) {
if (!__early_cpu_has_rndr() ||
!__arm64_rndr((unsigned long *)&seed))
return 0;
}
memstart_offset_seed = seed & U16_MAX;
/*
* OK, so we are proceeding with KASLR enabled. Calculate a suitable
* kernel image offset from the seed. Let's place the kernel in the
* middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
* the lower and upper quarters to avoid colliding with other
* allocations.
* 'middle' half of the VMALLOC area, and stay clear of the lower and
* upper quarters to avoid colliding with other allocations.
*/
return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
range = (VMALLOC_END - KIMAGE_VADDR) / 2;
return range / 2 + (((__uint128_t)range * seed) >> 64);
}

View File

@ -0,0 +1,253 @@
// SPDX-License-Identifier: GPL-2.0-only
// Copyright 2023 Google LLC
// Author: Ard Biesheuvel <ardb@google.com>
#include <linux/init.h>
#include <linux/libfdt.h>
#include <linux/linkage.h>
#include <linux/types.h>
#include <linux/sizes.h>
#include <linux/string.h>
#include <asm/memory.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include "pi.h"
extern const u8 __eh_frame_start[], __eh_frame_end[];
extern void idmap_cpu_replace_ttbr1(void *pgdir);
static void __init map_segment(pgd_t *pg_dir, u64 *pgd, u64 va_offset,
void *start, void *end, pgprot_t prot,
bool may_use_cont, int root_level)
{
map_range(pgd, ((u64)start + va_offset) & ~PAGE_OFFSET,
((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
prot, root_level, (pte_t *)pg_dir, may_use_cont, 0);
}
static void __init unmap_segment(pgd_t *pg_dir, u64 va_offset, void *start,
void *end, int root_level)
{
map_segment(pg_dir, NULL, va_offset, start, end, __pgprot(0),
false, root_level);
}
static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
{
bool enable_scs = IS_ENABLED(CONFIG_UNWIND_PATCH_PAC_INTO_SCS);
bool twopass = IS_ENABLED(CONFIG_RELOCATABLE);
u64 pgdp = (u64)init_pg_dir + PAGE_SIZE;
pgprot_t text_prot = PAGE_KERNEL_ROX;
pgprot_t data_prot = PAGE_KERNEL;
pgprot_t prot;
/*
* External debuggers may need to write directly to the text mapping to
* install SW breakpoints. Allow this (only) when explicitly requested
* with rodata=off.
*/
if (arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF))
text_prot = PAGE_KERNEL_EXEC;
/*
* We only enable the shadow call stack dynamically if we are running
* on a system that does not implement PAC or BTI. PAC and SCS provide
* roughly the same level of protection, and BTI relies on the PACIASP
* instructions serving as landing pads, preventing us from patching
* those instructions into something else.
*/
if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) && cpu_has_pac())
enable_scs = false;
if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && cpu_has_bti()) {
enable_scs = false;
/*
* If we have a CPU that supports BTI and a kernel built for
* BTI then mark the kernel executable text as guarded pages
* now so we don't have to rewrite the page tables later.
*/
text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
}
/* Map all code read-write on the first pass if needed */
twopass |= enable_scs;
prot = twopass ? data_prot : text_prot;
map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
!twopass, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
__inittext_begin, data_prot, false, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, __inittext_begin,
__inittext_end, prot, false, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, __initdata_begin,
__initdata_end, data_prot, false, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, _data, _end, data_prot,
true, root_level);
dsb(ishst);
idmap_cpu_replace_ttbr1(init_pg_dir);
if (twopass) {
if (IS_ENABLED(CONFIG_RELOCATABLE))
relocate_kernel(kaslr_offset);
if (enable_scs) {
scs_patch(__eh_frame_start + va_offset,
__eh_frame_end - __eh_frame_start);
asm("ic ialluis");
dynamic_scs_is_enabled = true;
}
/*
* Unmap the text region before remapping it, to avoid
* potential TLB conflicts when creating the contiguous
* descriptors.
*/
unmap_segment(init_pg_dir, va_offset, _stext, _etext,
root_level);
dsb(ishst);
isb();
__tlbi(vmalle1);
isb();
/*
* Remap these segments with different permissions
* No new page table allocations should be needed
*/
map_segment(init_pg_dir, NULL, va_offset, _stext, _etext,
text_prot, true, root_level);
map_segment(init_pg_dir, NULL, va_offset, __inittext_begin,
__inittext_end, text_prot, false, root_level);
}
/* Copy the root page table to its final location */
memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PAGE_SIZE);
dsb(ishst);
idmap_cpu_replace_ttbr1(swapper_pg_dir);
}
static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr)
{
u64 sctlr = read_sysreg(sctlr_el1);
u64 tcr = read_sysreg(tcr_el1) | TCR_DS;
asm(" msr sctlr_el1, %0 ;"
" isb ;"
" msr ttbr0_el1, %1 ;"
" msr tcr_el1, %2 ;"
" isb ;"
" tlbi vmalle1 ;"
" dsb nsh ;"
" isb ;"
" msr sctlr_el1, %3 ;"
" isb ;"
:: "r"(sctlr & ~SCTLR_ELx_M), "r"(ttbr), "r"(tcr), "r"(sctlr));
}
static void __init remap_idmap_for_lpa2(void)
{
/* clear the bits that change meaning once LPA2 is turned on */
pteval_t mask = PTE_SHARED;
/*
* We have to clear bits [9:8] in all block or page descriptors in the
* initial ID map, as otherwise they will be (mis)interpreted as
* physical address bits once we flick the LPA2 switch (TCR.DS). Since
* we cannot manipulate live descriptors in that way without creating
* potential TLB conflicts, let's create another temporary ID map in a
* LPA2 compatible fashion, and update the initial ID map while running
* from that.
*/
create_init_idmap(init_pg_dir, mask);
dsb(ishst);
set_ttbr0_for_lpa2((u64)init_pg_dir);
/*
* Recreate the initial ID map with the same granularity as before.
* Don't bother with the FDT, we no longer need it after this.
*/
memset(init_idmap_pg_dir, 0,
(u64)init_idmap_pg_dir - (u64)init_idmap_pg_end);
create_init_idmap(init_idmap_pg_dir, mask);
dsb(ishst);
/* switch back to the updated initial ID map */
set_ttbr0_for_lpa2((u64)init_idmap_pg_dir);
/* wipe the temporary ID map from memory */
memset(init_pg_dir, 0, (u64)init_pg_end - (u64)init_pg_dir);
}
static void __init map_fdt(u64 fdt)
{
static u8 ptes[INIT_IDMAP_FDT_SIZE] __initdata __aligned(PAGE_SIZE);
u64 efdt = fdt + MAX_FDT_SIZE;
u64 ptep = (u64)ptes;
/*
* Map up to MAX_FDT_SIZE bytes, but avoid overlap with
* the kernel image.
*/
map_range(&ptep, fdt, (u64)_text > fdt ? min((u64)_text, efdt) : efdt,
fdt, PAGE_KERNEL, IDMAP_ROOT_LEVEL,
(pte_t *)init_idmap_pg_dir, false, 0);
dsb(ishst);
}
asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
{
static char const chosen_str[] __initconst = "/chosen";
u64 va_base, pa_base = (u64)&_text;
u64 kaslr_offset = pa_base % MIN_KIMG_ALIGN;
int root_level = 4 - CONFIG_PGTABLE_LEVELS;
int va_bits = VA_BITS;
int chosen;
map_fdt((u64)fdt);
/* Clear BSS and the initial page tables */
memset(__bss_start, 0, (u64)init_pg_end - (u64)__bss_start);
/* Parse the command line for CPU feature overrides */
chosen = fdt_path_offset(fdt, chosen_str);
init_feature_override(boot_status, fdt, chosen);
if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && !cpu_has_lva()) {
va_bits = VA_BITS_MIN;
} else if (IS_ENABLED(CONFIG_ARM64_LPA2) && !cpu_has_lpa2()) {
va_bits = VA_BITS_MIN;
root_level++;
}
if (va_bits > VA_BITS_MIN)
sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(va_bits));
/*
* The virtual KASLR displacement modulo 2MiB is decided by the
* physical placement of the image, as otherwise, we might not be able
* to create the early kernel mapping using 2 MiB block descriptors. So
* take the low bits of the KASLR offset from the physical address, and
* fill in the high bits from the seed.
*/
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
u64 kaslr_seed = kaslr_early_init(fdt, chosen);
if (kaslr_seed && kaslr_requires_kpti())
arm64_use_ng_mappings = true;
kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1);
}
if (IS_ENABLED(CONFIG_ARM64_LPA2) && va_bits > VA_BITS_MIN)
remap_idmap_for_lpa2();
va_base = KIMAGE_VADDR + kaslr_offset;
map_kernel(kaslr_offset, va_base - pa_base, root_level);
}

View File

@ -0,0 +1,105 @@
// SPDX-License-Identifier: GPL-2.0-only
// Copyright 2023 Google LLC
// Author: Ard Biesheuvel <ardb@google.com>
#include <linux/types.h>
#include <linux/sizes.h>
#include <asm/memory.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include "pi.h"
/**
* map_range - Map a contiguous range of physical pages into virtual memory
*
* @pte: Address of physical pointer to array of pages to
* allocate page tables from
* @start: Virtual address of the start of the range
* @end: Virtual address of the end of the range (exclusive)
* @pa: Physical address of the start of the range
* @prot: Access permissions of the range
* @level: Translation level for the mapping
* @tbl: The level @level page table to create the mappings in
* @may_use_cont: Whether the use of the contiguous attribute is allowed
* @va_offset: Offset between a physical page and its current mapping
* in the VA space
*/
void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
int level, pte_t *tbl, bool may_use_cont, u64 va_offset)
{
u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
u64 protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
int lshift = (3 - level) * (PAGE_SHIFT - 3);
u64 lmask = (PAGE_SIZE << lshift) - 1;
start &= PAGE_MASK;
pa &= PAGE_MASK;
/* Advance tbl to the entry that covers start */
tbl += (start >> (lshift + PAGE_SHIFT)) % PTRS_PER_PTE;
/*
* Set the right block/page bits for this level unless we are
* clearing the mapping
*/
if (protval)
protval |= (level < 3) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
while (start < end) {
u64 next = min((start | lmask) + 1, PAGE_ALIGN(end));
if (level < 3 && (start | next | pa) & lmask) {
/*
* This chunk needs a finer grained mapping. Create a
* table mapping if necessary and recurse.
*/
if (pte_none(*tbl)) {
*tbl = __pte(__phys_to_pte_val(*pte) |
PMD_TYPE_TABLE | PMD_TABLE_UXN);
*pte += PTRS_PER_PTE * sizeof(pte_t);
}
map_range(pte, start, next, pa, prot, level + 1,
(pte_t *)(__pte_to_phys(*tbl) + va_offset),
may_use_cont, va_offset);
} else {
/*
* Start a contiguous range if start and pa are
* suitably aligned
*/
if (((start | pa) & cmask) == 0 && may_use_cont)
protval |= PTE_CONT;
/*
* Clear the contiguous attribute if the remaining
* range does not cover a contiguous block
*/
if ((end & ~cmask) <= start)
protval &= ~PTE_CONT;
/* Put down a block or page mapping */
*tbl = __pte(__phys_to_pte_val(pa) | protval);
}
pa += next - start;
start = next;
tbl++;
}
}
asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
{
u64 ptep = (u64)pg_dir + PAGE_SIZE;
pgprot_t text_prot = PAGE_KERNEL_ROX;
pgprot_t data_prot = PAGE_KERNEL;
pgprot_val(text_prot) &= ~clrmask;
pgprot_val(data_prot) &= ~clrmask;
map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
data_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
return ptep;
}

View File

@ -4,16 +4,17 @@
* Author: Ard Biesheuvel <ardb@google.com>
*/
#include <linux/bug.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/printk.h>
#include <linux/types.h>
#include <asm/cacheflush.h>
#include <asm/scs.h>
#include "pi.h"
bool dynamic_scs_is_enabled;
//
// This minimal DWARF CFI parser is partially based on the code in
// arch/arc/kernel/unwind.c, and on the document below:
@ -49,8 +50,6 @@
#define DW_CFA_GNU_negative_offset_extended 0x2f
#define DW_CFA_hi_user 0x3f
extern const u8 __eh_frame_start[], __eh_frame_end[];
enum {
PACIASP = 0xd503233f,
AUTIASP = 0xd50323bf,
@ -81,7 +80,11 @@ static void __always_inline scs_patch_loc(u64 loc)
*/
return;
}
dcache_clean_pou(loc, loc + sizeof(u32));
if (IS_ENABLED(CONFIG_ARM64_WORKAROUND_CLEAN_CACHE))
asm("dc civac, %0" :: "r"(loc));
else
asm(ALTERNATIVE("dc cvau, %0", "nop", ARM64_HAS_CACHE_IDC)
:: "r"(loc));
}
/*
@ -128,10 +131,10 @@ struct eh_frame {
};
};
static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
bool fde_has_augmentation_data,
int code_alignment_factor,
bool dry_run)
static int scs_handle_fde_frame(const struct eh_frame *frame,
bool fde_has_augmentation_data,
int code_alignment_factor,
bool dry_run)
{
int size = frame->size - offsetof(struct eh_frame, opcodes) + 4;
u64 loc = (u64)offset_to_ptr(&frame->initial_loc);
@ -198,14 +201,13 @@ static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
break;
default:
pr_err("unhandled opcode: %02x in FDE frame %lx\n", opcode[-1], (uintptr_t)frame);
return -ENOEXEC;
}
}
return 0;
}
int noinstr scs_patch(const u8 eh_frame[], int size)
int scs_patch(const u8 eh_frame[], int size)
{
const u8 *p = eh_frame;
@ -250,13 +252,3 @@ int noinstr scs_patch(const u8 eh_frame[], int size)
}
return 0;
}
asmlinkage void __init scs_patch_vmlinux(void)
{
if (!should_patch_pac_into_scs())
return;
WARN_ON(scs_patch(__eh_frame_start, __eh_frame_end - __eh_frame_start));
icache_inval_all_pou();
isb();
}

36
arch/arm64/kernel/pi/pi.h Normal file
View File

@ -0,0 +1,36 @@
// SPDX-License-Identifier: GPL-2.0-only
// Copyright 2023 Google LLC
// Author: Ard Biesheuvel <ardb@google.com>
#include <linux/types.h>
#define __prel64_initconst __section(".init.rodata.prel64")
#define PREL64(type, name) union { type *name; prel64_t name ## _prel; }
#define prel64_pointer(__d) (typeof(__d))prel64_to_pointer(&__d##_prel)
typedef volatile signed long prel64_t;
static inline void *prel64_to_pointer(const prel64_t *offset)
{
if (!*offset)
return NULL;
return (void *)offset + *offset;
}
extern bool dynamic_scs_is_enabled;
extern pgd_t init_idmap_pg_dir[], init_idmap_pg_end[];
void init_feature_override(u64 boot_status, const void *fdt, int chosen);
u64 kaslr_early_init(void *fdt, int chosen);
void relocate_kernel(u64 offset);
int scs_patch(const u8 eh_frame[], int size);
void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);

View File

@ -0,0 +1,130 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2023 - Google LLC
* Author: Ard Biesheuvel <ardb@google.com>
*/
#include <elf.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define HOST_ORDER ELFDATA2LSB
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define HOST_ORDER ELFDATA2MSB
#endif
static Elf64_Ehdr *ehdr;
static Elf64_Shdr *shdr;
static const char *strtab;
static bool swap;
static uint64_t swab_elfxword(uint64_t val)
{
return swap ? __builtin_bswap64(val) : val;
}
static uint32_t swab_elfword(uint32_t val)
{
return swap ? __builtin_bswap32(val) : val;
}
static uint16_t swab_elfhword(uint16_t val)
{
return swap ? __builtin_bswap16(val) : val;
}
int main(int argc, char *argv[])
{
struct stat stat;
int fd, ret;
if (argc < 3) {
fprintf(stderr, "file arguments missing\n");
exit(EXIT_FAILURE);
}
fd = open(argv[1], O_RDWR);
if (fd < 0) {
fprintf(stderr, "failed to open %s\n", argv[1]);
exit(EXIT_FAILURE);
}
ret = fstat(fd, &stat);
if (ret < 0) {
fprintf(stderr, "failed to stat() %s\n", argv[1]);
exit(EXIT_FAILURE);
}
ehdr = mmap(0, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (ehdr == MAP_FAILED) {
fprintf(stderr, "failed to mmap() %s\n", argv[1]);
exit(EXIT_FAILURE);
}
swap = ehdr->e_ident[EI_DATA] != HOST_ORDER;
shdr = (void *)ehdr + swab_elfxword(ehdr->e_shoff);
strtab = (void *)ehdr +
swab_elfxword(shdr[swab_elfhword(ehdr->e_shstrndx)].sh_offset);
for (int i = 0; i < swab_elfhword(ehdr->e_shnum); i++) {
unsigned long info, flags;
bool prel64 = false;
Elf64_Rela *rela;
int numrela;
if (swab_elfword(shdr[i].sh_type) != SHT_RELA)
continue;
/* only consider RELA sections operating on data */
info = swab_elfword(shdr[i].sh_info);
flags = swab_elfxword(shdr[info].sh_flags);
if ((flags & (SHF_ALLOC | SHF_EXECINSTR)) != SHF_ALLOC)
continue;
/*
* We generally don't permit ABS64 relocations in the code that
* runs before relocation processing occurs. If statically
* initialized absolute symbol references are unavoidable, they
* may be emitted into a *.rodata.prel64 section and they will
* be converted to place-relative 64-bit references. This
* requires special handling in the referring code.
*/
if (strstr(strtab + swab_elfword(shdr[info].sh_name),
".rodata.prel64")) {
prel64 = true;
}
rela = (void *)ehdr + swab_elfxword(shdr[i].sh_offset);
numrela = swab_elfxword(shdr[i].sh_size) / sizeof(*rela);
for (int j = 0; j < numrela; j++) {
uint64_t info = swab_elfxword(rela[j].r_info);
if (ELF64_R_TYPE(info) != R_AARCH64_ABS64)
continue;
if (prel64) {
/* convert ABS64 into PREL64 */
info ^= R_AARCH64_ABS64 ^ R_AARCH64_PREL64;
rela[j].r_info = swab_elfxword(info);
} else {
fprintf(stderr,
"Unexpected absolute relocations detected in %s\n",
argv[2]);
close(fd);
unlink(argv[1]);
exit(EXIT_FAILURE);
}
}
}
close(fd);
return 0;
}

View File

@ -0,0 +1,64 @@
// SPDX-License-Identifier: GPL-2.0-only
// Copyright 2023 Google LLC
// Authors: Ard Biesheuvel <ardb@google.com>
// Peter Collingbourne <pcc@google.com>
#include <linux/elf.h>
#include <linux/init.h>
#include <linux/types.h>
#include "pi.h"
extern const Elf64_Rela rela_start[], rela_end[];
extern const u64 relr_start[], relr_end[];
void __init relocate_kernel(u64 offset)
{
u64 *place = NULL;
for (const Elf64_Rela *rela = rela_start; rela < rela_end; rela++) {
if (ELF64_R_TYPE(rela->r_info) != R_AARCH64_RELATIVE)
continue;
*(u64 *)(rela->r_offset + offset) = rela->r_addend + offset;
}
if (!IS_ENABLED(CONFIG_RELR) || !offset)
return;
/*
* Apply RELR relocations.
*
* RELR is a compressed format for storing relative relocations. The
* encoded sequence of entries looks like:
* [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
*
* i.e. start with an address, followed by any number of bitmaps. The
* address entry encodes 1 relocation. The subsequent bitmap entries
* encode up to 63 relocations each, at subsequent offsets following
* the last address entry.
*
* The bitmap entries must have 1 in the least significant bit. The
* assumption here is that an address cannot have 1 in lsb. Odd
* addresses are not supported. Any odd addresses are stored in the
* RELA section, which is handled above.
*
* With the exception of the least significant bit, each bit in the
* bitmap corresponds with a machine word that follows the base address
* word, and the bit value indicates whether or not a relocation needs
* to be applied to it. The second least significant bit represents the
* machine word immediately following the initial address, and each bit
* that follows represents the next word, in linear order. As such, a
* single bitmap can encode up to 63 relocations in a 64-bit object.
*/
for (const u64 *relr = relr_start; relr < relr_end; relr++) {
if ((*relr & 1) == 0) {
place = (u64 *)(*relr + offset);
*place++ += offset;
} else {
for (u64 *p = place, r = *relr >> 1; r; p++, r >>= 1)
if (r & 1)
*p += offset;
place += 63;
}
}
}

View File

@ -371,6 +371,21 @@ static struct break_hook kprobes_break_ss_hook = {
.fn = kprobe_breakpoint_ss_handler,
};
static int __kprobes
kretprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
{
if (regs->pc != (unsigned long)__kretprobe_trampoline)
return DBG_HOOK_ERROR;
regs->pc = kretprobe_trampoline_handler(regs, (void *)regs->regs[29]);
return DBG_HOOK_HANDLED;
}
static struct break_hook kretprobes_break_hook = {
.imm = KRETPROBES_BRK_IMM,
.fn = kretprobe_breakpoint_handler,
};
/*
* Provide a blacklist of symbols identifying ranges which cannot be kprobed.
* This blacklist is exposed to userspace via debugfs (kprobes/blacklist).
@ -396,11 +411,6 @@ int __init arch_populate_kprobe_blacklist(void)
return ret;
}
void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
{
return (void *)kretprobe_trampoline_handler(regs, (void *)regs->regs[29]);
}
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@ -420,6 +430,7 @@ int __init arch_init_kprobes(void)
{
register_kernel_break_hook(&kprobes_break_hook);
register_kernel_break_hook(&kprobes_break_ss_hook);
register_kernel_break_hook(&kretprobes_break_hook);
return 0;
}

View File

@ -4,83 +4,17 @@
*/
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/asm-bug.h>
#include <asm/assembler.h>
.text
.macro save_all_base_regs
stp x0, x1, [sp, #S_X0]
stp x2, x3, [sp, #S_X2]
stp x4, x5, [sp, #S_X4]
stp x6, x7, [sp, #S_X6]
stp x8, x9, [sp, #S_X8]
stp x10, x11, [sp, #S_X10]
stp x12, x13, [sp, #S_X12]
stp x14, x15, [sp, #S_X14]
stp x16, x17, [sp, #S_X16]
stp x18, x19, [sp, #S_X18]
stp x20, x21, [sp, #S_X20]
stp x22, x23, [sp, #S_X22]
stp x24, x25, [sp, #S_X24]
stp x26, x27, [sp, #S_X26]
stp x28, x29, [sp, #S_X28]
add x0, sp, #PT_REGS_SIZE
stp lr, x0, [sp, #S_LR]
/*
* Construct a useful saved PSTATE
*/
mrs x0, nzcv
mrs x1, daif
orr x0, x0, x1
mrs x1, CurrentEL
orr x0, x0, x1
mrs x1, SPSel
orr x0, x0, x1
stp xzr, x0, [sp, #S_PC]
.endm
.macro restore_all_base_regs
ldr x0, [sp, #S_PSTATE]
and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT)
msr nzcv, x0
ldp x0, x1, [sp, #S_X0]
ldp x2, x3, [sp, #S_X2]
ldp x4, x5, [sp, #S_X4]
ldp x6, x7, [sp, #S_X6]
ldp x8, x9, [sp, #S_X8]
ldp x10, x11, [sp, #S_X10]
ldp x12, x13, [sp, #S_X12]
ldp x14, x15, [sp, #S_X14]
ldp x16, x17, [sp, #S_X16]
ldp x18, x19, [sp, #S_X18]
ldp x20, x21, [sp, #S_X20]
ldp x22, x23, [sp, #S_X22]
ldp x24, x25, [sp, #S_X24]
ldp x26, x27, [sp, #S_X26]
ldp x28, x29, [sp, #S_X28]
.endm
SYM_CODE_START(__kretprobe_trampoline)
sub sp, sp, #PT_REGS_SIZE
save_all_base_regs
/* Setup a frame pointer. */
add x29, sp, #S_FP
mov x0, sp
bl trampoline_probe_handler
/*
* Replace trampoline address in lr with actual orig_ret_addr return
* address.
* Trigger a breakpoint exception. The PC will be adjusted by
* kretprobe_breakpoint_handler(), and no subsequent instructions will
* be executed from the trampoline.
*/
mov lr, x0
/* The frame pointer (x29) is restored with other registers. */
restore_all_base_regs
add sp, sp, #PT_REGS_SIZE
ret
brk #KRETPROBES_BRK_IMM
ASM_BUG()
SYM_CODE_END(__kretprobe_trampoline)

View File

@ -290,9 +290,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
fpsimd_preserve_current_state();
*dst = *src;
/* We rely on the above assignment to initialize dst's thread_flags: */
BUILD_BUG_ON(!IS_ENABLED(CONFIG_THREAD_INFO_IN_TASK));
/*
* Detach src's sve_state (if any) from dst so that it does not
* get erroneously used or freed prematurely. dst's copies

View File

@ -174,7 +174,6 @@ static void ptrace_hbptriggered(struct perf_event *bp,
struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp);
const char *desc = "Hardware breakpoint trap (ptrace)";
#ifdef CONFIG_COMPAT
if (is_compat_task()) {
int si_errno = 0;
int i;
@ -196,7 +195,7 @@ static void ptrace_hbptriggered(struct perf_event *bp,
desc);
return;
}
#endif
arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, bkpt->trigger, desc);
}
@ -698,6 +697,39 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset,
return ret;
}
static int fpmr_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
if (!system_supports_fpmr())
return -EINVAL;
if (target == current)
fpsimd_preserve_current_state();
return membuf_store(&to, target->thread.uw.fpmr);
}
static int fpmr_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
int ret;
unsigned long fpmr;
if (!system_supports_fpmr())
return -EINVAL;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpmr, 0, count);
if (ret)
return ret;
target->thread.uw.fpmr = fpmr;
fpsimd_flush_task_state(target);
return 0;
}
static int system_call_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
@ -1419,6 +1451,7 @@ enum aarch64_regset {
REGSET_HW_BREAK,
REGSET_HW_WATCH,
#endif
REGSET_FPMR,
REGSET_SYSTEM_CALL,
#ifdef CONFIG_ARM64_SVE
REGSET_SVE,
@ -1497,6 +1530,14 @@ static const struct user_regset aarch64_regsets[] = {
.regset_get = system_call_get,
.set = system_call_set,
},
[REGSET_FPMR] = {
.core_note_type = NT_ARM_FPMR,
.n = 1,
.size = sizeof(u64),
.align = sizeof(u64),
.regset_get = fpmr_get,
.set = fpmr_set,
},
#ifdef CONFIG_ARM64_SVE
[REGSET_SVE] = { /* Scalable Vector Extension */
.core_note_type = NT_ARM_SVE,
@ -1597,7 +1638,6 @@ static const struct user_regset_view user_aarch64_view = {
.regsets = aarch64_regsets, .n = ARRAY_SIZE(aarch64_regsets)
};
#ifdef CONFIG_COMPAT
enum compat_regset {
REGSET_COMPAT_GPR,
REGSET_COMPAT_VFP,
@ -1854,6 +1894,7 @@ static const struct user_regset_view user_aarch32_ptrace_view = {
.regsets = aarch32_ptrace_regsets, .n = ARRAY_SIZE(aarch32_ptrace_regsets)
};
#ifdef CONFIG_COMPAT
static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off,
compat_ulong_t __user *ret)
{
@ -2115,7 +2156,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
{
#ifdef CONFIG_COMPAT
/*
* Core dumping of 32-bit tasks or compat ptrace requests must use the
* user_aarch32_view compatible with arm32. Native ptrace requests on
@ -2126,7 +2166,7 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
return &user_aarch32_view;
else if (is_compat_thread(task_thread_info(task)))
return &user_aarch32_ptrace_view;
#endif
return &user_aarch64_view;
}

View File

@ -166,21 +166,6 @@ static void __init smp_build_mpidr_hash(void)
pr_warn("Large number of MPIDR hash buckets detected\n");
}
static void *early_fdt_ptr __initdata;
void __init *get_early_fdt_ptr(void)
{
return early_fdt_ptr;
}
asmlinkage void __init early_fdt_map(u64 dt_phys)
{
int fdt_size;
early_fixmap_init();
early_fdt_ptr = fixmap_remap_fdt(dt_phys, &fdt_size, PAGE_KERNEL);
}
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
int size;
@ -298,13 +283,6 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
kaslr_init();
/*
* If know now we are going to need KPTI then use non-global
* mappings from the start, avoiding the cost of rewriting
* everything later.
*/
arm64_use_ng_mappings = kaslr_requires_kpti();
early_fixmap_init();
early_ioremap_init();
@ -320,9 +298,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
dynamic_scs_init();
/*
* Unmask asynchronous aborts and fiq after bringing up possible
* earlycon. (Report possible System Errors once we can report this
* occurred).
* Unmask SError as soon as possible after initializing earlycon so
* that we can report any SErrors immediately.
*/
local_daif_restore(DAIF_PROCCTX_NOIRQ);

View File

@ -16,8 +16,8 @@
#include <linux/uaccess.h>
#include <linux/sizes.h>
#include <linux/string.h>
#include <linux/resume_user_mode.h>
#include <linux/ratelimit.h>
#include <linux/rseq.h>
#include <linux/syscalls.h>
#include <asm/daifflags.h>
@ -60,6 +60,7 @@ struct rt_sigframe_user_layout {
unsigned long tpidr2_offset;
unsigned long za_offset;
unsigned long zt_offset;
unsigned long fpmr_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
@ -182,6 +183,8 @@ struct user_ctxs {
u32 za_size;
struct zt_context __user *zt;
u32 zt_size;
struct fpmr_context __user *fpmr;
u32 fpmr_size;
};
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
@ -227,6 +230,33 @@ static int restore_fpsimd_context(struct user_ctxs *user)
return err ? -EFAULT : 0;
}
static int preserve_fpmr_context(struct fpmr_context __user *ctx)
{
int err = 0;
current->thread.uw.fpmr = read_sysreg_s(SYS_FPMR);
__put_user_error(FPMR_MAGIC, &ctx->head.magic, err);
__put_user_error(sizeof(*ctx), &ctx->head.size, err);
__put_user_error(current->thread.uw.fpmr, &ctx->fpmr, err);
return err;
}
static int restore_fpmr_context(struct user_ctxs *user)
{
u64 fpmr;
int err = 0;
if (user->fpmr_size != sizeof(*user->fpmr))
return -EINVAL;
__get_user_error(fpmr, &user->fpmr->fpmr, err);
if (!err)
write_sysreg_s(fpmr, SYS_FPMR);
return err;
}
#ifdef CONFIG_ARM64_SVE
@ -590,6 +620,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->tpidr2 = NULL;
user->za = NULL;
user->zt = NULL;
user->fpmr = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
@ -684,6 +715,17 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->zt_size = size;
break;
case FPMR_MAGIC:
if (!system_supports_fpmr())
goto invalid;
if (user->fpmr)
goto invalid;
user->fpmr = (struct fpmr_context __user *)head;
user->fpmr_size = size;
break;
case EXTRA_MAGIC:
if (have_extra_context)
goto invalid;
@ -806,6 +848,9 @@ static int restore_sigframe(struct pt_regs *regs,
if (err == 0 && system_supports_tpidr2() && user.tpidr2)
err = restore_tpidr2_context(&user);
if (err == 0 && system_supports_fpmr() && user.fpmr)
err = restore_fpmr_context(&user);
if (err == 0 && system_supports_sme() && user.za)
err = restore_za_context(&user);
@ -928,6 +973,13 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
}
}
if (system_supports_fpmr()) {
err = sigframe_alloc(user, &user->fpmr_offset,
sizeof(struct fpmr_context));
if (err)
return err;
}
return sigframe_alloc_end(user);
}
@ -983,6 +1035,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
err |= preserve_tpidr2_context(tpidr2_ctx);
}
/* FPMR if supported */
if (system_supports_fpmr() && err == 0) {
struct fpmr_context __user *fpmr_ctx =
apply_user_offset(user, user->fpmr_offset);
err |= preserve_fpmr_context(fpmr_ctx);
}
/* ZA state if present */
if (system_supports_sme() && err == 0 && user->za_offset) {
struct za_context __user *za_ctx =
@ -1207,7 +1266,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
* the kernel can handle, and then we build all the user-level signal handling
* stack-frames in one go after that.
*/
static void do_signal(struct pt_regs *regs)
void do_signal(struct pt_regs *regs)
{
unsigned long continue_addr = 0, restart_addr = 0;
int retval = 0;
@ -1278,41 +1337,6 @@ static void do_signal(struct pt_regs *regs)
restore_saved_sigmask();
}
void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
{
do {
if (thread_flags & _TIF_NEED_RESCHED) {
/* Unmask Debug and SError for the next task */
local_daif_restore(DAIF_PROCCTX_NOIRQ);
schedule();
} else {
local_daif_restore(DAIF_PROCCTX);
if (thread_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
clear_thread_flag(TIF_MTE_ASYNC_FAULT);
send_sig_fault(SIGSEGV, SEGV_MTEAERR,
(void __user *)NULL, current);
}
if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
do_signal(regs);
if (thread_flags & _TIF_NOTIFY_RESUME)
resume_user_mode_work(regs);
if (thread_flags & _TIF_FOREIGN_FPSTATE)
fpsimd_restore_current_state();
}
local_daif_mask();
thread_flags = read_thread_flags();
} while (thread_flags & _TIF_WORK_MASK);
}
unsigned long __ro_after_init signal_minsigstksz;
/*

View File

@ -102,9 +102,6 @@ SYM_CODE_START(cpu_resume)
mov x0, xzr
bl init_kernel_el
mov x19, x0 // preserve boot mode
#if VA_BITS > 48
ldr_l x0, vabits_actual
#endif
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir

View File

@ -20,14 +20,11 @@ long sys_ni_syscall(void);
static long do_ni_syscall(struct pt_regs *regs, int scno)
{
#ifdef CONFIG_COMPAT
long ret;
if (is_compat_task()) {
ret = compat_arm_syscall(regs, scno);
long ret = compat_arm_syscall(regs, scno);
if (ret != -ENOSYS)
return ret;
}
#endif
return sys_ni_syscall();
}

View File

@ -126,9 +126,9 @@ jiffies = jiffies_64;
#ifdef CONFIG_UNWIND_TABLES
#define UNWIND_DATA_SECTIONS \
.eh_frame : { \
__eh_frame_start = .; \
__pi___eh_frame_start = .; \
*(.eh_frame) \
__eh_frame_end = .; \
__pi___eh_frame_end = .; \
}
#else
#define UNWIND_DATA_SECTIONS
@ -270,15 +270,15 @@ SECTIONS
HYPERVISOR_RELOC_SECTION
.rela.dyn : ALIGN(8) {
__rela_start = .;
__pi_rela_start = .;
*(.rela .rela*)
__rela_end = .;
__pi_rela_end = .;
}
.relr.dyn : ALIGN(8) {
__relr_start = .;
__pi_relr_start = .;
*(.relr.dyn)
__relr_end = .;
__pi_relr_end = .;
}
. = ALIGN(SEGMENT_ALIGN);
@ -311,12 +311,17 @@ SECTIONS
__pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin);
_edata = .;
/* start of zero-init region */
BSS_SECTION(SBSS_ALIGN, 0, 0)
. = ALIGN(PAGE_SIZE);
init_pg_dir = .;
. += INIT_DIR_SIZE;
init_pg_end = .;
/* end of zero-init region */
. += SZ_4K; /* stack for the early C runtime */
early_init_stack = .;
. = ALIGN(SEGMENT_ALIGN);
__pecoff_data_size = ABSOLUTE(. - __initdata_begin);

View File

@ -153,6 +153,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
fp_state.sve_vl = vcpu->arch.sve_max_vl;
fp_state.sme_state = NULL;
fp_state.svcr = &vcpu->arch.svcr;
fp_state.fpmr = &vcpu->arch.fpmr;
fp_state.fp_type = &vcpu->arch.fp_type;
if (vcpu_has_sve(vcpu))

View File

@ -805,7 +805,7 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
.pgd = (kvm_pteref_t)kvm->mm->pgd,
.ia_bits = vabits_actual,
.start_level = (KVM_PGTABLE_LAST_LEVEL -
CONFIG_PGTABLE_LEVELS + 1),
ARM64_HW_PGTABLE_LEVELS(pgt.ia_bits) + 1),
.mm_ops = &kvm_user_mm_ops,
};
unsigned long flags;
@ -1874,16 +1874,9 @@ int __init kvm_mmu_init(u32 *hyp_va_bits)
BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
/*
* The ID map may be configured to use an extended virtual address
* range. This is only the case if system RAM is out of range for the
* currently configured page size and VA_BITS_MIN, in which case we will
* also need the extended virtual range for the HYP ID map, or we won't
* be able to enable the EL2 MMU.
*
* However, in some cases the ID map may be configured for fewer than
* the number of VA bits used by the regular kernel stage 1. This
* happens when VA_BITS=52 and the kernel image is placed in PA space
* below 48 bits.
* The ID map is always configured for 48 bits of translation, which
* may be fewer than the number of VA bits used by the regular kernel
* stage 1, when VA_BITS=52.
*
* At EL2, there is only one TTBR register, and we can't switch between
* translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom
@ -1894,7 +1887,7 @@ int __init kvm_mmu_init(u32 *hyp_va_bits)
* 1 VA bits to assure that the hypervisor can both ID map its code page
* and map any kernel memory.
*/
idmap_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET);
idmap_bits = IDMAP_VA_BITS;
kernel_bits = vabits_actual;
*hyp_va_bits = max(idmap_bits, kernel_bits);

View File

@ -257,16 +257,14 @@ static bool is_el1_data_abort(unsigned long esr)
static inline bool is_el1_permission_fault(unsigned long addr, unsigned long esr,
struct pt_regs *regs)
{
unsigned long fsc_type = esr & ESR_ELx_FSC_TYPE;
if (!is_el1_data_abort(esr) && !is_el1_instruction_abort(esr))
return false;
if (fsc_type == ESR_ELx_FSC_PERM)
if (esr_fsc_is_permission_fault(esr))
return true;
if (is_ttbr0_addr(addr) && system_uses_ttbr0_pan())
return fsc_type == ESR_ELx_FSC_FAULT &&
return esr_fsc_is_translation_fault(esr) &&
(regs->pstate & PSR_PAN_BIT);
return false;
@ -279,8 +277,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
unsigned long flags;
u64 par, dfsc;
if (!is_el1_data_abort(esr) ||
(esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT)
if (!is_el1_data_abort(esr) || !esr_fsc_is_translation_fault(esr))
return false;
local_irq_save(flags);
@ -301,7 +298,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
* treat the translation fault as spurious.
*/
dfsc = FIELD_GET(SYS_PAR_EL1_FST, par);
return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT;
return !esr_fsc_is_translation_fault(dfsc);
}
static void die_kernel_fault(const char *msg, unsigned long addr,
@ -368,11 +365,6 @@ static bool is_el1_mte_sync_tag_check_fault(unsigned long esr)
return false;
}
static bool is_translation_fault(unsigned long esr)
{
return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
}
static void __do_kernel_fault(unsigned long addr, unsigned long esr,
struct pt_regs *regs)
{
@ -405,7 +397,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr,
} else if (addr < PAGE_SIZE) {
msg = "NULL pointer dereference";
} else {
if (is_translation_fault(esr) &&
if (esr_fsc_is_translation_fault(esr) &&
kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs))
return;
@ -782,18 +774,18 @@ static const struct fault_info fault_info[] = {
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 8" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 0 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 12" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 0 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
{ do_sea, SIGBUS, BUS_OBJERR, "synchronous external abort" },
{ do_tag_check_fault, SIGSEGV, SEGV_MTESERR, "synchronous tag check fault" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 18" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 19" },
{ do_sea, SIGKILL, SI_KERNEL, "level -1 (translation table walk)" },
{ do_sea, SIGKILL, SI_KERNEL, "level 0 (translation table walk)" },
{ do_sea, SIGKILL, SI_KERNEL, "level 1 (translation table walk)" },
{ do_sea, SIGKILL, SI_KERNEL, "level 2 (translation table walk)" },
@ -801,7 +793,7 @@ static const struct fault_info fault_info[] = {
{ do_sea, SIGBUS, BUS_OBJERR, "synchronous parity or ECC error" }, // Reserved when RAS is implemented
{ do_bad, SIGKILL, SI_KERNEL, "unknown 25" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 26" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 27" },
{ do_sea, SIGKILL, SI_KERNEL, "level -1 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented
{ do_sea, SIGKILL, SI_KERNEL, "level 0 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented
{ do_sea, SIGKILL, SI_KERNEL, "level 1 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented
{ do_sea, SIGKILL, SI_KERNEL, "level 2 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented
@ -815,9 +807,9 @@ static const struct fault_info fault_info[] = {
{ do_bad, SIGKILL, SI_KERNEL, "unknown 38" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 39" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 40" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 41" },
{ do_bad, SIGKILL, SI_KERNEL, "level -1 address size fault" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 42" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 43" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level -1 translation fault" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 44" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 45" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 46" },

View File

@ -16,6 +16,9 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
/* ensure that the fixmap region does not grow down into the PCI I/O region */
static_assert(FIXADDR_TOT_START > PCI_IO_END);
#define NR_BM_PTE_TABLES \
SPAN_NR_ENTRIES(FIXADDR_TOT_START, FIXADDR_TOP, PMD_SHIFT)
#define NR_BM_PMD_TABLES \
@ -101,7 +104,7 @@ void __init early_fixmap_init(void)
unsigned long end = FIXADDR_TOP;
pgd_t *pgdp = pgd_offset_k(addr);
p4d_t *p4dp = p4d_offset(pgdp, addr);
p4d_t *p4dp = p4d_offset_kimg(pgdp, addr);
early_fixmap_init_pud(p4dp, addr, end);
}
@ -167,37 +170,3 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
return dt_virt;
}
/*
* Copy the fixmap region into a new pgdir.
*/
void __init fixmap_copy(pgd_t *pgdir)
{
if (!READ_ONCE(pgd_val(*pgd_offset_pgd(pgdir, FIXADDR_TOT_START)))) {
/*
* The fixmap falls in a separate pgd to the kernel, and doesn't
* live in the carveout for the swapper_pg_dir. We can simply
* re-use the existing dir for the fixmap.
*/
set_pgd(pgd_offset_pgd(pgdir, FIXADDR_TOT_START),
READ_ONCE(*pgd_offset_k(FIXADDR_TOT_START)));
} else if (CONFIG_PGTABLE_LEVELS > 3) {
pgd_t *bm_pgdp;
p4d_t *bm_p4dp;
pud_t *bm_pudp;
/*
* The fixmap shares its top level pgd entry with the kernel
* mapping. This can really only occur when we are running
* with 16k/4 levels, so we can simply reuse the pud level
* entry instead.
*/
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
bm_pgdp = pgd_offset_pgd(pgdir, FIXADDR_TOT_START);
bm_p4dp = p4d_offset(bm_pgdp, FIXADDR_TOT_START);
bm_pudp = pud_set_fixmap_offset(bm_p4dp, FIXADDR_TOT_START);
pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd));
pud_clear_fixmap();
} else {
BUG();
}
}

View File

@ -238,7 +238,7 @@ void __init arm64_memblock_init(void)
* physical address of PAGE_OFFSET, we have to *subtract* from it.
*/
if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52))
memstart_addr -= _PAGE_OFFSET(48) - _PAGE_OFFSET(52);
memstart_addr -= _PAGE_OFFSET(vabits_actual) - _PAGE_OFFSET(52);
/*
* Apply the memory limit if it was set. Since the kernel may be loaded

View File

@ -23,7 +23,7 @@
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
static pgd_t tmp_pg_dir[PTRS_PER_PTE] __initdata __aligned(PAGE_SIZE);
/*
* The p*d_populate functions call virt_to_phys implicitly so they can't be used
@ -99,6 +99,19 @@ static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node,
return early ? pud_offset_kimg(p4dp, addr) : pud_offset(p4dp, addr);
}
static p4d_t *__init kasan_p4d_offset(pgd_t *pgdp, unsigned long addr, int node,
bool early)
{
if (pgd_none(READ_ONCE(*pgdp))) {
phys_addr_t p4d_phys = early ?
__pa_symbol(kasan_early_shadow_p4d)
: kasan_alloc_zeroed_page(node);
__pgd_populate(pgdp, p4d_phys, PGD_TYPE_TABLE);
}
return early ? p4d_offset_kimg(pgdp, addr) : p4d_offset(pgdp, addr);
}
static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
unsigned long end, int node, bool early)
{
@ -144,12 +157,12 @@ static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
p4d_t *p4dp = p4d_offset(pgdp, addr);
p4d_t *p4dp = kasan_p4d_offset(pgdp, addr, node, early);
do {
next = p4d_addr_end(addr, end);
kasan_pud_populate(p4dp, addr, next, node, early);
} while (p4dp++, addr = next, addr != end);
} while (p4dp++, addr = next, addr != end && p4d_none(READ_ONCE(*p4dp)));
}
static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
@ -165,19 +178,48 @@ static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
} while (pgdp++, addr = next, addr != end);
}
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS > 4
#define SHADOW_ALIGN P4D_SIZE
#else
#define SHADOW_ALIGN PUD_SIZE
#endif
/*
* Return whether 'addr' is aligned to the size covered by a root level
* descriptor.
*/
static bool __init root_level_aligned(u64 addr)
{
int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 1) * (PAGE_SHIFT - 3);
return (addr % (PAGE_SIZE << shift)) == 0;
}
/* The early shadow maps everything to a single page of zeroes */
asmlinkage void __init kasan_early_init(void)
{
BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
/*
* We cannot check the actual value of KASAN_SHADOW_START during build,
* as it depends on vabits_actual. As a best-effort approach, check
* potential values calculated based on VA_BITS and VA_BITS_MIN.
*/
BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), PGDIR_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), PGDIR_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), SHADOW_ALIGN));
BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), SHADOW_ALIGN));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, SHADOW_ALIGN));
if (!root_level_aligned(KASAN_SHADOW_START)) {
/*
* The start address is misaligned, and so the next level table
* will be shared with the linear region. This can happen with
* 4 or 5 level paging, so install a generic pte_t[] as the
* next level. This prevents the kasan_pgd_populate call below
* from inserting an entry that refers to the shared KASAN zero
* shadow pud_t[]/p4d_t[], which could end up getting corrupted
* when the linear region is mapped.
*/
static pte_t tbl[PTRS_PER_PTE] __page_aligned_bss;
pgd_t *pgdp = pgd_offset_k(KASAN_SHADOW_START);
set_pgd(pgdp, __pgd(__pa_symbol(tbl) | PGD_TYPE_TABLE));
}
kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE,
true);
}
@ -190,34 +232,74 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end,
}
/*
* Copy the current shadow region into a new pgdir.
* Return the descriptor index of 'addr' in the root level table
*/
void __init kasan_copy_shadow(pgd_t *pgdir)
{
pgd_t *pgdp, *pgdp_new, *pgdp_end;
pgdp = pgd_offset_k(KASAN_SHADOW_START);
pgdp_end = pgd_offset_k(KASAN_SHADOW_END);
pgdp_new = pgd_offset_pgd(pgdir, KASAN_SHADOW_START);
do {
set_pgd(pgdp_new, READ_ONCE(*pgdp));
} while (pgdp++, pgdp_new++, pgdp != pgdp_end);
}
static void __init clear_pgds(unsigned long start,
unsigned long end)
static int __init root_level_idx(u64 addr)
{
/*
* Remove references to kasan page tables from
* swapper_pg_dir. pgd_clear() can't be used
* here because it's nop on 2,3-level pagetable setups
* On 64k pages, the TTBR1 range root tables are extended for 52-bit
* virtual addressing, and TTBR1 will simply point to the pgd_t entry
* that covers the start of the 48-bit addressable VA space if LVA is
* not implemented. This means we need to index the table as usual,
* instead of masking off bits based on vabits_actual.
*/
for (; start < end; start += PGDIR_SIZE)
set_pgd(pgd_offset_k(start), __pgd(0));
u64 vabits = IS_ENABLED(CONFIG_ARM64_64K_PAGES) ? VA_BITS
: vabits_actual;
int shift = (ARM64_HW_PGTABLE_LEVELS(vabits) - 1) * (PAGE_SHIFT - 3);
return (addr & ~_PAGE_OFFSET(vabits)) >> (shift + PAGE_SHIFT);
}
/*
* Clone a next level table from swapper_pg_dir into tmp_pg_dir
*/
static void __init clone_next_level(u64 addr, pgd_t *tmp_pg_dir, pud_t *pud)
{
int idx = root_level_idx(addr);
pgd_t pgd = READ_ONCE(swapper_pg_dir[idx]);
pud_t *pudp = (pud_t *)__phys_to_kimg(__pgd_to_phys(pgd));
memcpy(pud, pudp, PAGE_SIZE);
tmp_pg_dir[idx] = __pgd(__phys_to_pgd_val(__pa_symbol(pud)) |
PUD_TYPE_TABLE);
}
/*
* Return the descriptor index of 'addr' in the next level table
*/
static int __init next_level_idx(u64 addr)
{
int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 2) * (PAGE_SHIFT - 3);
return (addr >> (shift + PAGE_SHIFT)) % PTRS_PER_PTE;
}
/*
* Dereference the table descriptor at 'pgd_idx' and clear the entries from
* 'start' to 'end' (exclusive) from the table.
*/
static void __init clear_next_level(int pgd_idx, int start, int end)
{
pgd_t pgd = READ_ONCE(swapper_pg_dir[pgd_idx]);
pud_t *pudp = (pud_t *)__phys_to_kimg(__pgd_to_phys(pgd));
memset(&pudp[start], 0, (end - start) * sizeof(pud_t));
}
static void __init clear_shadow(u64 start, u64 end)
{
int l = root_level_idx(start), m = root_level_idx(end);
if (!root_level_aligned(start))
clear_next_level(l++, next_level_idx(start), PTRS_PER_PTE);
if (!root_level_aligned(end))
clear_next_level(m, 0, next_level_idx(end));
memset(&swapper_pg_dir[l], 0, (m - l) * sizeof(pgd_t));
}
static void __init kasan_init_shadow(void)
{
static pud_t pud[2][PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
u64 kimg_shadow_start, kimg_shadow_end;
u64 mod_shadow_start;
u64 vmalloc_shadow_end;
@ -239,10 +321,23 @@ static void __init kasan_init_shadow(void)
* setup will be finished.
*/
memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
dsb(ishst);
cpu_replace_ttbr1(lm_alias(tmp_pg_dir), idmap_pg_dir);
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
/*
* If the start or end address of the shadow region is not aligned to
* the root level size, we have to allocate a temporary next-level table
* in each case, clone the next level of descriptors, and install the
* table into tmp_pg_dir. Note that with 5 levels of paging, the next
* level will in fact be p4d_t, but that makes no difference in this
* case.
*/
if (!root_level_aligned(KASAN_SHADOW_START))
clone_next_level(KASAN_SHADOW_START, tmp_pg_dir, pud[0]);
if (!root_level_aligned(KASAN_SHADOW_END))
clone_next_level(KASAN_SHADOW_END, tmp_pg_dir, pud[1]);
dsb(ishst);
cpu_replace_ttbr1(lm_alias(tmp_pg_dir));
clear_shadow(KASAN_SHADOW_START, KASAN_SHADOW_END);
kasan_map_populate(kimg_shadow_start, kimg_shadow_end,
early_pfn_to_nid(virt_to_pfn(lm_alias(KERNEL_START))));
@ -276,7 +371,7 @@ static void __init kasan_init_shadow(void)
PAGE_KERNEL_RO));
memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
}
static void __init kasan_init_depth(void)

View File

@ -73,6 +73,10 @@ static int __init adjust_protection_map(void)
protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY;
}
if (lpa2_is_enabled())
for (int i = 0; i < ARRAY_SIZE(protection_map); i++)
pgprot_val(protection_map[i]) &= ~PTE_SHARED;
return 0;
}
arch_initcall(adjust_protection_map);

View File

@ -45,18 +45,13 @@
#define NO_CONT_MAPPINGS BIT(1)
#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */
int idmap_t0sz __ro_after_init;
#if VA_BITS > 48
u64 vabits_actual __ro_after_init = VA_BITS_MIN;
EXPORT_SYMBOL(vabits_actual);
#endif
u64 kimage_voffset __ro_after_init;
EXPORT_SYMBOL(kimage_voffset);
u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL1 };
static bool rodata_is_rw __ro_after_init = true;
/*
* The booting CPU updates the failed status @__early_cpu_boot_status,
* with MMU turned off.
@ -73,10 +68,21 @@ EXPORT_SYMBOL(empty_zero_page);
static DEFINE_SPINLOCK(swapper_pgdir_lock);
static DEFINE_MUTEX(fixmap_lock);
void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
void noinstr set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
{
pgd_t *fixmap_pgdp;
/*
* Don't bother with the fixmap if swapper_pg_dir is still mapped
* writable in the kernel mapping.
*/
if (rodata_is_rw) {
WRITE_ONCE(*pgdp, pgd);
dsb(ishst);
isb();
return;
}
spin_lock(&swapper_pgdir_lock);
fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp));
WRITE_ONCE(*fixmap_pgdp, pgd);
@ -307,15 +313,14 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
} while (addr = next, addr != end);
}
static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(int),
int flags)
{
unsigned long next;
pud_t *pudp;
p4d_t *p4dp = p4d_offset(pgdp, addr);
p4d_t p4d = READ_ONCE(*p4dp);
pud_t *pudp;
if (p4d_none(p4d)) {
p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN;
@ -363,6 +368,46 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
pud_clear_fixmap();
}
static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(int),
int flags)
{
unsigned long next;
pgd_t pgd = READ_ONCE(*pgdp);
p4d_t *p4dp;
if (pgd_none(pgd)) {
pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_UXN;
phys_addr_t p4d_phys;
if (flags & NO_EXEC_MAPPINGS)
pgdval |= PGD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
p4d_phys = pgtable_alloc(P4D_SHIFT);
__pgd_populate(pgdp, p4d_phys, pgdval);
pgd = READ_ONCE(*pgdp);
}
BUG_ON(pgd_bad(pgd));
p4dp = p4d_set_fixmap_offset(pgdp, addr);
do {
p4d_t old_p4d = READ_ONCE(*p4dp);
next = p4d_addr_end(addr, end);
alloc_init_pud(p4dp, addr, next, phys, prot,
pgtable_alloc, flags);
BUG_ON(p4d_val(old_p4d) != 0 &&
p4d_val(old_p4d) != READ_ONCE(p4d_val(*p4dp)));
phys += next - addr;
} while (p4dp++, addr = next, addr != end);
p4d_clear_fixmap();
}
static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot,
@ -385,7 +430,7 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
do {
next = pgd_addr_end(addr, end);
alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
alloc_init_p4d(pgdp, addr, next, phys, prot, pgtable_alloc,
flags);
phys += next - addr;
} while (pgdp++, addr = next, addr != end);
@ -576,8 +621,12 @@ static void __init map_mem(pgd_t *pgdp)
* entries at any level are being shared between the linear region and
* the vmalloc region. Check whether this is true for the PGD level, in
* which case it is guaranteed to be true for all other levels as well.
* (Unless we are running with support for LPA2, in which case the
* entire reduced VA space is covered by a single pgd_t which will have
* been populated without the PXNTable attribute by the time we get here.)
*/
BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end) &&
pgd_index(_PAGE_OFFSET(VA_BITS_MIN)) != PTRS_PER_PGD - 1);
early_kfence_pool = arm64_kfence_alloc_pool();
@ -630,15 +679,16 @@ void mark_rodata_ro(void)
* to cover NOTES and EXCEPTION_TABLE.
*/
section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
WRITE_ONCE(rodata_is_rw, false);
update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
section_size, PAGE_KERNEL_RO);
debug_checkwx();
}
static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
pgprot_t prot, struct vm_struct *vma,
int flags, unsigned long vm_flags)
static void __init declare_vma(struct vm_struct *vma,
void *va_start, void *va_end,
unsigned long vm_flags)
{
phys_addr_t pa_start = __pa_symbol(va_start);
unsigned long size = va_end - va_start;
@ -646,9 +696,6 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
BUG_ON(!PAGE_ALIGNED(pa_start));
BUG_ON(!PAGE_ALIGNED(size));
__create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
early_pgtable_alloc, flags);
if (!(vm_flags & VM_NO_GUARD))
size += PAGE_SIZE;
@ -661,12 +708,12 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
vm_area_add_early(vma);
}
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
static pgprot_t kernel_exec_prot(void)
{
return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
}
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
static int __init map_entry_trampoline(void)
{
int i;
@ -701,80 +748,36 @@ core_initcall(map_entry_trampoline);
#endif
/*
* Open coded check for BTI, only for use to determine configuration
* for early mappings for before the cpufeature code has run.
* Declare the VMA areas for the kernel
*/
static bool arm64_early_this_cpu_has_bti(void)
static void __init declare_kernel_vmas(void)
{
u64 pfr1;
static struct vm_struct vmlinux_seg[KERNEL_SEGMENT_COUNT];
if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
return false;
pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1);
return cpuid_feature_extract_unsigned_field(pfr1,
ID_AA64PFR1_EL1_BT_SHIFT);
declare_vma(&vmlinux_seg[0], _stext, _etext, VM_NO_GUARD);
declare_vma(&vmlinux_seg[1], __start_rodata, __inittext_begin, VM_NO_GUARD);
declare_vma(&vmlinux_seg[2], __inittext_begin, __inittext_end, VM_NO_GUARD);
declare_vma(&vmlinux_seg[3], __initdata_begin, __initdata_end, VM_NO_GUARD);
declare_vma(&vmlinux_seg[4], _data, _end, 0);
}
/*
* Create fine-grained mappings for the kernel.
*/
static void __init map_kernel(pgd_t *pgdp)
{
static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
vmlinux_initdata, vmlinux_data;
void __pi_map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
/*
* External debuggers may need to write directly to the text
* mapping to install SW breakpoints. Allow this (only) when
* explicitly requested with rodata=off.
*/
pgprot_t text_prot = kernel_exec_prot();
/*
* If we have a CPU that supports BTI and a kernel built for
* BTI then mark the kernel executable text as guarded pages
* now so we don't have to rewrite the page tables later.
*/
if (arm64_early_this_cpu_has_bti())
text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
/*
* Only rodata will be remapped with different permissions later on,
* all other segments are allowed to use contiguous mappings.
*/
map_kernel_segment(pgdp, _stext, _etext, text_prot, &vmlinux_text, 0,
VM_NO_GUARD);
map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
&vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
&vmlinux_inittext, 0, VM_NO_GUARD);
map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
&vmlinux_initdata, 0, VM_NO_GUARD);
map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
fixmap_copy(pgdp);
kasan_copy_shadow(pgdp);
}
static u8 idmap_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init,
kpti_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init;
static void __init create_idmap(void)
{
u64 start = __pa_symbol(__idmap_text_start);
u64 size = __pa_symbol(__idmap_text_end) - start;
pgd_t *pgd = idmap_pg_dir;
u64 pgd_phys;
u64 end = __pa_symbol(__idmap_text_end);
u64 ptep = __pa_symbol(idmap_ptes);
/* check if we need an additional level of translation */
if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) {
pgd_phys = early_pgtable_alloc(PAGE_SHIFT);
set_pgd(&idmap_pg_dir[start >> VA_BITS],
__pgd(pgd_phys | P4D_TYPE_TABLE));
pgd = __va(pgd_phys);
}
__create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX,
early_pgtable_alloc, 0);
__pi_map_range(&ptep, start, end, start, PAGE_KERNEL_ROX,
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
__phys_to_virt(ptep) - ptep);
if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && !arm64_use_ng_mappings) {
extern u32 __idmap_kpti_flag;
u64 pa = __pa_symbol(&__idmap_kpti_flag);
@ -782,32 +785,21 @@ static void __init create_idmap(void)
* The KPTI G-to-nG conversion code needs a read-write mapping
* of its synchronization flag in the ID map.
*/
__create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL,
early_pgtable_alloc, 0);
ptep = __pa_symbol(kpti_ptes);
__pi_map_range(&ptep, pa, pa + sizeof(u32), pa, PAGE_KERNEL,
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
__phys_to_virt(ptep) - ptep);
}
}
void __init paging_init(void)
{
pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
extern pgd_t init_idmap_pg_dir[];
idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0));
map_kernel(pgdp);
map_mem(pgdp);
pgd_clear_fixmap();
cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir);
init_mm.pgd = swapper_pg_dir;
memblock_phys_free(__pa_symbol(init_pg_dir),
__pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
map_mem(swapper_pg_dir);
memblock_allow_resize();
create_idmap();
declare_kernel_vmas();
}
#ifdef CONFIG_MEMORY_HOTPLUG
@ -1073,10 +1065,10 @@ static void free_empty_pud_table(p4d_t *p4dp, unsigned long addr,
free_empty_pmd_table(pudp, addr, next, floor, ceiling);
} while (addr = next, addr < end);
if (CONFIG_PGTABLE_LEVELS <= 3)
if (!pgtable_l4_enabled())
return;
if (!pgtable_range_aligned(start, end, floor, ceiling, PGDIR_MASK))
if (!pgtable_range_aligned(start, end, floor, ceiling, P4D_MASK))
return;
/*
@ -1099,8 +1091,8 @@ static void free_empty_p4d_table(pgd_t *pgdp, unsigned long addr,
unsigned long end, unsigned long floor,
unsigned long ceiling)
{
unsigned long next;
p4d_t *p4dp, p4d;
unsigned long i, next, start = addr;
do {
next = p4d_addr_end(addr, end);
@ -1112,6 +1104,27 @@ static void free_empty_p4d_table(pgd_t *pgdp, unsigned long addr,
WARN_ON(!p4d_present(p4d));
free_empty_pud_table(p4dp, addr, next, floor, ceiling);
} while (addr = next, addr < end);
if (!pgtable_l5_enabled())
return;
if (!pgtable_range_aligned(start, end, floor, ceiling, PGDIR_MASK))
return;
/*
* Check whether we can free the p4d page if the rest of the
* entries are empty. Overlap with other regions have been
* handled by the floor/ceiling check.
*/
p4dp = p4d_offset(pgdp, 0UL);
for (i = 0; i < PTRS_PER_P4D; i++) {
if (!p4d_none(READ_ONCE(p4dp[i])))
return;
}
pgd_clear(pgdp);
__flush_tlb_kernel_pgtable(start);
free_hotplug_pgtable_page(virt_to_page(p4dp));
}
static void free_empty_tables(unsigned long addr, unsigned long end,
@ -1196,6 +1209,12 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
return 1;
}
#ifndef __PAGETABLE_P4D_FOLDED
void p4d_clear_huge(p4d_t *p4dp)
{
}
#endif
int pud_clear_huge(pud_t *pudp)
{
if (!pud_sect(READ_ONCE(*pudp)))
@ -1486,3 +1505,35 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte
{
set_pte_at(vma->vm_mm, addr, ptep, pte);
}
/*
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
* avoiding the possibility of conflicting TLB entries being allocated.
*/
void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp)
{
typedef void (ttbr_replace_func)(phys_addr_t);
extern ttbr_replace_func idmap_cpu_replace_ttbr1;
ttbr_replace_func *replace_phys;
unsigned long daif;
/* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));
if (cnp)
ttbr1 |= TTBR_CNP_BIT;
replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
cpu_install_idmap();
/*
* We really don't want to take *any* exceptions while TTBR1 is
* in the process of being replaced so mask everything.
*/
daif = local_daif_save();
replace_phys(ttbr1);
local_daif_restore(daif);
cpu_uninstall_idmap();
}

View File

@ -17,11 +17,22 @@
static struct kmem_cache *pgd_cache __ro_after_init;
static bool pgdir_is_page_size(void)
{
if (PGD_SIZE == PAGE_SIZE)
return true;
if (CONFIG_PGTABLE_LEVELS == 4)
return !pgtable_l4_enabled();
if (CONFIG_PGTABLE_LEVELS == 5)
return !pgtable_l5_enabled();
return false;
}
pgd_t *pgd_alloc(struct mm_struct *mm)
{
gfp_t gfp = GFP_PGTABLE_USER;
if (PGD_SIZE == PAGE_SIZE)
if (pgdir_is_page_size())
return (pgd_t *)__get_free_page(gfp);
else
return kmem_cache_alloc(pgd_cache, gfp);
@ -29,7 +40,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
if (PGD_SIZE == PAGE_SIZE)
if (pgdir_is_page_size())
free_page((unsigned long)pgd);
else
kmem_cache_free(pgd_cache, pgd);
@ -37,7 +48,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
void __init pgtable_cache_init(void)
{
if (PGD_SIZE == PAGE_SIZE)
if (pgdir_is_page_size())
return;
#ifdef CONFIG_ARM64_PA_BITS_52

View File

@ -195,27 +195,36 @@ SYM_TYPED_FUNC_START(idmap_cpu_replace_ttbr1)
ret
SYM_FUNC_END(idmap_cpu_replace_ttbr1)
SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1)
.popsection
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE)
#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | PTE_TYPE_PAGE | \
PTE_AF | PTE_SHARED | PTE_UXN | PTE_WRITE)
.pushsection ".idmap.text", "a"
.macro pte_to_phys, phys, pte
and \phys, \pte, #PTE_ADDR_LOW
#ifdef CONFIG_ARM64_PA_BITS_52
and \pte, \pte, #PTE_ADDR_HIGH
orr \phys, \phys, \pte, lsl #PTE_ADDR_HIGH_SHIFT
#endif
.endm
.macro kpti_mk_tbl_ng, type, num_entries
add end_\type\()p, cur_\type\()p, #\num_entries * 8
.Ldo_\type:
ldr \type, [cur_\type\()p] // Load the entry
ldr \type, [cur_\type\()p], #8 // Load the entry and advance
tbz \type, #0, .Lnext_\type // Skip invalid and
tbnz \type, #11, .Lnext_\type // non-global entries
orr \type, \type, #PTE_NG // Same bit for blocks and pages
str \type, [cur_\type\()p] // Update the entry
str \type, [cur_\type\()p, #-8] // Update the entry
.ifnc \type, pte
tbnz \type, #1, .Lderef_\type
.endif
.Lnext_\type:
add cur_\type\()p, cur_\type\()p, #8
cmp cur_\type\()p, end_\type\()p
b.ne .Ldo_\type
.endm
@ -225,18 +234,18 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1)
* fixmap slot associated with the current level.
*/
.macro kpti_map_pgtbl, type, level
str xzr, [temp_pte, #8 * (\level + 1)] // break before make
str xzr, [temp_pte, #8 * (\level + 2)] // break before make
dsb nshst
add pte, temp_pte, #PAGE_SIZE * (\level + 1)
add pte, temp_pte, #PAGE_SIZE * (\level + 2)
lsr pte, pte, #12
tlbi vaae1, pte
dsb nsh
isb
phys_to_pte pte, cur_\type\()p
add cur_\type\()p, temp_pte, #PAGE_SIZE * (\level + 1)
add cur_\type\()p, temp_pte, #PAGE_SIZE * (\level + 2)
orr pte, pte, pte_flags
str pte, [temp_pte, #8 * (\level + 1)]
str pte, [temp_pte, #8 * (\level + 2)]
dsb nshst
.endm
@ -269,6 +278,8 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
end_ptep .req x15
pte .req x16
valid .req x17
cur_p4dp .req x19
end_p4dp .req x20
mov x5, x3 // preserve temp_pte arg
mrs swapper_ttb, ttbr1_el1
@ -276,6 +287,12 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
cbnz cpu, __idmap_kpti_secondary
#if CONFIG_PGTABLE_LEVELS > 4
stp x29, x30, [sp, #-32]!
mov x29, sp
stp x19, x20, [sp, #16]
#endif
/* We're the boot CPU. Wait for the others to catch up */
sevl
1: wfe
@ -293,9 +310,32 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
mov_q pte_flags, KPTI_NG_PTE_FLAGS
/* Everybody is enjoying the idmap, so we can rewrite swapper. */
#ifdef CONFIG_ARM64_LPA2
/*
* If LPA2 support is configured, but 52-bit virtual addressing is not
* enabled at runtime, we will fall back to one level of paging less,
* and so we have to walk swapper_pg_dir as if we dereferenced its
* address from a PGD level entry, and terminate the PGD level loop
* right after.
*/
adrp pgd, swapper_pg_dir // walk &swapper_pg_dir at the next level
mov cur_pgdp, end_pgdp // must be equal to terminate the PGD loop
alternative_if_not ARM64_HAS_VA52
b .Lderef_pgd // skip to the next level
alternative_else_nop_endif
/*
* LPA2 based 52-bit virtual addressing requires 52-bit physical
* addressing to be enabled as well. In this case, the shareability
* bits are repurposed as physical address bits, and should not be
* set in pte_flags.
*/
bic pte_flags, pte_flags, #PTE_SHARED
#endif
/* PGD */
adrp cur_pgdp, swapper_pg_dir
kpti_map_pgtbl pgd, 0
kpti_map_pgtbl pgd, -1
kpti_mk_tbl_ng pgd, PTRS_PER_PGD
/* Ensure all the updated entries are visible to secondary CPUs */
@ -308,16 +348,33 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
/* Set the flag to zero to indicate that we're all done */
str wzr, [flag_ptr]
#if CONFIG_PGTABLE_LEVELS > 4
ldp x19, x20, [sp, #16]
ldp x29, x30, [sp], #32
#endif
ret
.Lderef_pgd:
/* P4D */
.if CONFIG_PGTABLE_LEVELS > 4
p4d .req x30
pte_to_phys cur_p4dp, pgd
kpti_map_pgtbl p4d, 0
kpti_mk_tbl_ng p4d, PTRS_PER_P4D
b .Lnext_pgd
.else /* CONFIG_PGTABLE_LEVELS <= 4 */
p4d .req pgd
.set .Lnext_p4d, .Lnext_pgd
.endif
.Lderef_p4d:
/* PUD */
.if CONFIG_PGTABLE_LEVELS > 3
pud .req x10
pte_to_phys cur_pudp, pgd
pte_to_phys cur_pudp, p4d
kpti_map_pgtbl pud, 1
kpti_mk_tbl_ng pud, PTRS_PER_PUD
b .Lnext_pgd
b .Lnext_p4d
.else /* CONFIG_PGTABLE_LEVELS <= 3 */
pud .req pgd
.set .Lnext_pud, .Lnext_pgd
@ -361,6 +418,9 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
.unreq end_ptep
.unreq pte
.unreq valid
.unreq cur_p4dp
.unreq end_p4dp
.unreq p4d
/* Secondary CPUs end up here */
__idmap_kpti_secondary:
@ -395,8 +455,6 @@ SYM_FUNC_END(idmap_kpti_install_ng_mappings)
*
* Initialise the processor for turning the MMU on.
*
* Input:
* x0 - actual number of VA bits (ignored unless VA_BITS > 48)
* Output:
* Return in x0 the value of the SCTLR_EL1 register.
*/
@ -420,20 +478,21 @@ SYM_FUNC_START(__cpu_setup)
mair .req x17
tcr .req x16
mov_q mair, MAIR_EL1_SET
mov_q tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS
mov_q tcr, TCR_T0SZ(IDMAP_VA_BITS) | TCR_T1SZ(VA_BITS_MIN) | TCR_CACHE_FLAGS | \
TCR_SMP_FLAGS | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS
tcr_clear_errata_bits tcr, x9, x5
#ifdef CONFIG_ARM64_VA_BITS_52
sub x9, xzr, x0
add x9, x9, #64
mov x9, #64 - VA_BITS
alternative_if ARM64_HAS_VA52
tcr_set_t1sz tcr, x9
#else
idmap_get_t0sz x9
#ifdef CONFIG_ARM64_LPA2
orr tcr, tcr, #TCR_DS
#endif
alternative_else_nop_endif
#endif
tcr_set_t0sz tcr, x9
/*
* Set the IPS bits in TCR_EL1.
@ -458,11 +517,26 @@ SYM_FUNC_START(__cpu_setup)
ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4
cbz x1, .Lskip_indirection
/*
* The PROT_* macros describing the various memory types may resolve to
* C expressions if they include the PTE_MAYBE_* macros, and so they
* can only be used from C code. The PIE_E* constants below are also
* defined in terms of those macros, but will mask out those
* PTE_MAYBE_* constants, whether they are set or not. So #define them
* as 0x0 here so we can evaluate the PIE_E* constants in asm context.
*/
#define PTE_MAYBE_NG 0
#define PTE_MAYBE_SHARED 0
mov_q x0, PIE_E0
msr REG_PIRE0_EL1, x0
mov_q x0, PIE_E1
msr REG_PIR_EL1, x0
#undef PTE_MAYBE_NG
#undef PTE_MAYBE_SHARED
mov x0, TCR2_EL1x_PIE
msr REG_TCR2_EL1, x0

View File

@ -26,34 +26,6 @@
#include <asm/ptdump.h>
enum address_markers_idx {
PAGE_OFFSET_NR = 0,
PAGE_END_NR,
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
KASAN_START_NR,
#endif
};
static struct addr_marker address_markers[] = {
{ PAGE_OFFSET, "Linear Mapping start" },
{ 0 /* PAGE_END */, "Linear Mapping end" },
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
{ 0 /* KASAN_SHADOW_START */, "Kasan shadow start" },
{ KASAN_SHADOW_END, "Kasan shadow end" },
#endif
{ MODULES_VADDR, "Modules start" },
{ MODULES_END, "Modules end" },
{ VMALLOC_START, "vmalloc() area" },
{ VMALLOC_END, "vmalloc() end" },
{ FIXADDR_TOT_START, "Fixmap start" },
{ FIXADDR_TOP, "Fixmap end" },
{ PCI_IO_START, "PCI I/O start" },
{ PCI_IO_END, "PCI I/O end" },
{ VMEMMAP_START, "vmemmap start" },
{ VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" },
{ -1, NULL },
};
#define pt_dump_seq_printf(m, fmt, args...) \
({ \
if (m) \
@ -76,6 +48,7 @@ struct pg_state {
struct ptdump_state ptdump;
struct seq_file *seq;
const struct addr_marker *marker;
const struct mm_struct *mm;
unsigned long start_address;
int level;
u64 current_prot;
@ -172,12 +145,12 @@ static const struct prot_bits pte_bits[] = {
struct pg_level {
const struct prot_bits *bits;
const char *name;
size_t num;
char name[4];
int num;
u64 mask;
};
static struct pg_level pg_level[] = {
static struct pg_level pg_level[] __ro_after_init = {
{ /* pgd */
.name = "PGD",
.bits = pte_bits,
@ -187,11 +160,11 @@ static struct pg_level pg_level[] = {
.bits = pte_bits,
.num = ARRAY_SIZE(pte_bits),
}, { /* pud */
.name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD",
.name = "PUD",
.bits = pte_bits,
.num = ARRAY_SIZE(pte_bits),
}, { /* pmd */
.name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD",
.name = "PMD",
.bits = pte_bits,
.num = ARRAY_SIZE(pte_bits),
}, { /* pte */
@ -255,6 +228,11 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
static const char units[] = "KMGTPE";
u64 prot = 0;
/* check if the current level has been folded dynamically */
if ((level == 1 && mm_p4d_folded(st->mm)) ||
(level == 2 && mm_pud_folded(st->mm)))
level = 0;
if (level >= 0)
prot = val & pg_level[level].mask;
@ -316,6 +294,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
st = (struct pg_state){
.seq = s,
.marker = info->markers,
.mm = info->mm,
.level = -1,
.ptdump = {
.note_page = note_page,
@ -339,10 +318,8 @@ static void __init ptdump_initialize(void)
pg_level[i].mask |= pg_level[i].bits[j].mask;
}
static struct ptdump_info kernel_ptdump_info = {
static struct ptdump_info kernel_ptdump_info __ro_after_init = {
.mm = &init_mm,
.markers = address_markers,
.base_addr = PAGE_OFFSET,
};
void ptdump_check_wx(void)
@ -358,7 +335,7 @@ void ptdump_check_wx(void)
.ptdump = {
.note_page = note_page,
.range = (struct ptdump_range[]) {
{PAGE_OFFSET, ~0UL},
{_PAGE_OFFSET(vabits_actual), ~0UL},
{0, 0}
}
}
@ -375,10 +352,32 @@ void ptdump_check_wx(void)
static int __init ptdump_init(void)
{
address_markers[PAGE_END_NR].start_address = PAGE_END;
u64 page_offset = _PAGE_OFFSET(vabits_actual);
u64 vmemmap_start = (u64)virt_to_page((void *)page_offset);
struct addr_marker m[] = {
{ PAGE_OFFSET, "Linear Mapping start" },
{ PAGE_END, "Linear Mapping end" },
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
address_markers[KASAN_START_NR].start_address = KASAN_SHADOW_START;
{ KASAN_SHADOW_START, "Kasan shadow start" },
{ KASAN_SHADOW_END, "Kasan shadow end" },
#endif
{ MODULES_VADDR, "Modules start" },
{ MODULES_END, "Modules end" },
{ VMALLOC_START, "vmalloc() area" },
{ VMALLOC_END, "vmalloc() end" },
{ vmemmap_start, "vmemmap start" },
{ VMEMMAP_END, "vmemmap end" },
{ PCI_IO_START, "PCI I/O start" },
{ PCI_IO_END, "PCI I/O end" },
{ FIXADDR_TOT_START, "Fixmap start" },
{ FIXADDR_TOP, "Fixmap end" },
{ -1, NULL },
};
static struct addr_marker address_markers[ARRAY_SIZE(m)] __ro_after_init;
kernel_ptdump_info.markers = memcpy(address_markers, m, sizeof(m));
kernel_ptdump_info.base_addr = page_offset;
ptdump_initialize();
ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables");
return 0;

View File

@ -26,6 +26,7 @@ HAS_ECV
HAS_ECV_CNTPOFF
HAS_EPAN
HAS_EVT
HAS_FPMR
HAS_FGT
HAS_FPSIMD
HAS_GENERIC_AUTH
@ -50,6 +51,7 @@ HAS_STAGE2_FWB
HAS_TCR2
HAS_TIDCP1
HAS_TLB_RANGE
HAS_VA52
HAS_VIRT_HOST_EXTN
HAS_WFXT
HW_DBM

View File

@ -200,6 +200,7 @@ UnsignedEnum 27:24 PerfMon
0b0110 PMUv3p5
0b0111 PMUv3p7
0b1000 PMUv3p8
0b1001 PMUv3p9
0b1111 IMPDEF
EndEnum
Enum 23:20 MProfDbg
@ -231,6 +232,7 @@ Enum 3:0 CopDbg
0b1000 Debugv8p2
0b1001 Debugv8p4
0b1010 Debugv8p8
0b1011 Debugv8p9
EndEnum
EndSysreg
@ -1221,6 +1223,7 @@ UnsignedEnum 35:32 PMSVer
0b0010 V1P1
0b0011 V1P2
0b0100 V1P3
0b0101 V1P4
EndEnum
Field 31:28 CTX_CMPs
Res0 27:24
@ -1247,11 +1250,41 @@ UnsignedEnum 3:0 DebugVer
0b1000 V8P2
0b1001 V8P4
0b1010 V8P8
0b1011 V8P9
EndEnum
EndSysreg
Sysreg ID_AA64DFR1_EL1 3 0 0 5 1
Res0 63:0
Field 63:56 ABL_CMPs
UnsignedEnum 55:52 DPFZS
0b0000 IGNR
0b0001 FRZN
EndEnum
UnsignedEnum 51:48 EBEP
0b0000 NI
0b0001 IMP
EndEnum
UnsignedEnum 47:44 ITE
0b0000 NI
0b0001 IMP
EndEnum
UnsignedEnum 43:40 ABLE
0b0000 NI
0b0001 IMP
EndEnum
UnsignedEnum 39:36 PMICNTR
0b0000 NI
0b0001 IMP
EndEnum
UnsignedEnum 35:32 SPMU
0b0000 NI
0b0001 IMP
0b0010 IMP_SPMZR
EndEnum
Field 31:24 CTX_CMPs
Field 23:16 WRPs
Field 15:8 BRPs
Field 7:0 SYSPMUID
EndSysreg
Sysreg ID_AA64AFR0_EL1 3 0 0 5 4
@ -1540,16 +1573,16 @@ Enum 35:32 TGRAN16_2
0b0010 IMP
0b0011 52_BIT
EndEnum
Enum 31:28 TGRAN4
SignedEnum 31:28 TGRAN4
0b0000 IMP
0b0001 52_BIT
0b1111 NI
EndEnum
Enum 27:24 TGRAN64
SignedEnum 27:24 TGRAN64
0b0000 IMP
0b1111 NI
EndEnum
Enum 23:20 TGRAN16
UnsignedEnum 23:20 TGRAN16
0b0000 NI
0b0001 IMP
0b0010 52_BIT
@ -1697,7 +1730,7 @@ Enum 23:20 CCIDX
0b0000 32
0b0001 64
EndEnum
Enum 19:16 VARange
UnsignedEnum 19:16 VARange
0b0000 48
0b0001 52
EndEnum

View File

@ -82,6 +82,7 @@ KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs
KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
endif
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
KBUILD_RUSTFLAGS_MODULE += -Crelocation-model=pic
ifeq ($(CONFIG_RELOCATABLE),y)

View File

@ -71,6 +71,7 @@ export BITS
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
#
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
ifeq ($(CONFIG_X86_KERNEL_IBT),y)

View File

@ -86,6 +86,15 @@ config RISCV_PMU_SBI
full perf feature support i.e. counter overflow, privilege mode
filtering, counter configuration.
config STARFIVE_STARLINK_PMU
depends on ARCH_STARFIVE || (COMPILE_TEST && 64BIT)
bool "StarFive StarLink PMU"
help
Provide support for StarLink Performance Monitor Unit.
StarLink Performance Monitor Unit integrates one or more cores with
an L3 memory system. The L3 cache events are added into perf event
subsystem, allowing monitoring of various L3 cache perf events.
config ARM_PMU_ACPI
depends on ARM_PMU && ACPI
def_bool y

View File

@ -15,6 +15,7 @@ obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
obj-$(CONFIG_RISCV_PMU) += riscv_pmu.o
obj-$(CONFIG_RISCV_PMU_LEGACY) += riscv_pmu_legacy.o
obj-$(CONFIG_RISCV_PMU_SBI) += riscv_pmu_sbi.o
obj-$(CONFIG_STARFIVE_STARLINK_PMU) += starfive_starlink_pmu.o
obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o

View File

@ -729,7 +729,7 @@ static int ali_drw_pmu_probe(struct platform_device *pdev)
return ret;
}
static int ali_drw_pmu_remove(struct platform_device *pdev)
static void ali_drw_pmu_remove(struct platform_device *pdev)
{
struct ali_drw_pmu *drw_pmu = platform_get_drvdata(pdev);
@ -739,8 +739,6 @@ static int ali_drw_pmu_remove(struct platform_device *pdev)
ali_drw_pmu_uninit_irq(drw_pmu);
perf_pmu_unregister(&drw_pmu->pmu);
return 0;
}
static int ali_drw_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
@ -795,7 +793,7 @@ static struct platform_driver ali_drw_pmu_driver = {
.acpi_match_table = ali_drw_acpi_match,
},
.probe = ali_drw_pmu_probe,
.remove = ali_drw_pmu_remove,
.remove_new = ali_drw_pmu_remove,
};
static int __init ali_drw_pmu_init(void)

View File

@ -355,11 +355,9 @@ static int g12_ddr_pmu_probe(struct platform_device *pdev)
return meson_ddr_pmu_create(pdev);
}
static int g12_ddr_pmu_remove(struct platform_device *pdev)
static void g12_ddr_pmu_remove(struct platform_device *pdev)
{
meson_ddr_pmu_remove(pdev);
return 0;
}
static const struct of_device_id meson_ddr_pmu_dt_match[] = {
@ -381,7 +379,7 @@ MODULE_DEVICE_TABLE(of, meson_ddr_pmu_dt_match);
static struct platform_driver g12_ddr_pmu_driver = {
.probe = g12_ddr_pmu_probe,
.remove = g12_ddr_pmu_remove,
.remove_new = g12_ddr_pmu_remove,
.driver = {
.name = "meson-g12-ddr-pmu",

View File

@ -1697,16 +1697,14 @@ error_pmu_init:
return ret;
}
static int cci_pmu_remove(struct platform_device *pdev)
static void cci_pmu_remove(struct platform_device *pdev)
{
if (!g_cci_pmu)
return 0;
return;
cpuhp_remove_state(CPUHP_AP_PERF_ARM_CCI_ONLINE);
perf_pmu_unregister(&g_cci_pmu->pmu);
g_cci_pmu = NULL;
return 0;
}
static struct platform_driver cci_pmu_driver = {
@ -1716,7 +1714,7 @@ static struct platform_driver cci_pmu_driver = {
.suppress_bind_attrs = true,
},
.probe = cci_pmu_probe,
.remove = cci_pmu_remove,
.remove_new = cci_pmu_remove,
};
module_platform_driver(cci_pmu_driver);

View File

@ -1515,13 +1515,11 @@ static int arm_ccn_probe(struct platform_device *pdev)
return arm_ccn_pmu_init(ccn);
}
static int arm_ccn_remove(struct platform_device *pdev)
static void arm_ccn_remove(struct platform_device *pdev)
{
struct arm_ccn *ccn = platform_get_drvdata(pdev);
arm_ccn_pmu_cleanup(ccn);
return 0;
}
static const struct of_device_id arm_ccn_match[] = {
@ -1539,7 +1537,7 @@ static struct platform_driver arm_ccn_driver = {
.suppress_bind_attrs = true,
},
.probe = arm_ccn_probe,
.remove = arm_ccn_remove,
.remove_new = arm_ccn_remove,
};
static int __init arm_ccn_init(void)

View File

@ -493,6 +493,7 @@ static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
for (dn = cmn->dns; dn->type; dn++) {
struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
int pad = dn->logid < 10;
if (dn->type == CMN_TYPE_XP)
continue;
@ -503,7 +504,7 @@ static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d)
continue;
seq_printf(s, " #%-2d |", dn->logid);
seq_printf(s, " %*c#%-*d |", pad + 1, ' ', 3 - pad, dn->logid);
return;
}
seq_puts(s, " |");
@ -516,7 +517,7 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
seq_puts(s, " X");
for (x = 0; x < cmn->mesh_x; x++)
seq_printf(s, " %d ", x);
seq_printf(s, " %-2d ", x);
seq_puts(s, "\nY P D+");
y = cmn->mesh_y;
while (y--) {
@ -526,13 +527,13 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
for (x = 0; x < cmn->mesh_x; x++)
seq_puts(s, "--------+");
seq_printf(s, "\n%d |", y);
seq_printf(s, "\n%-2d |", y);
for (x = 0; x < cmn->mesh_x; x++) {
struct arm_cmn_node *xp = cmn->xps + xp_base + x;
for (p = 0; p < CMN_MAX_PORTS; p++)
port[p][x] = arm_cmn_device_connect_info(cmn, xp, p);
seq_printf(s, " XP #%-2d |", xp_base + x);
seq_printf(s, " XP #%-3d|", xp_base + x);
}
seq_puts(s, "\n |");
@ -2515,7 +2516,7 @@ static int arm_cmn_probe(struct platform_device *pdev)
return err;
}
static int arm_cmn_remove(struct platform_device *pdev)
static void arm_cmn_remove(struct platform_device *pdev)
{
struct arm_cmn *cmn = platform_get_drvdata(pdev);
@ -2524,7 +2525,6 @@ static int arm_cmn_remove(struct platform_device *pdev)
perf_pmu_unregister(&cmn->pmu);
cpuhp_state_remove_instance_nocalls(arm_cmn_hp_state, &cmn->cpuhp_node);
debugfs_remove(cmn->debug);
return 0;
}
#ifdef CONFIG_OF
@ -2555,7 +2555,7 @@ static struct platform_driver arm_cmn_driver = {
.acpi_match_table = ACPI_PTR(arm_cmn_acpi_match),
},
.probe = arm_cmn_probe,
.remove = arm_cmn_remove,
.remove_new = arm_cmn_remove,
};
static int __init arm_cmn_init(void)

Some files were not shown because too many files have changed in this diff Show More