diff --git a/Documentation/devicetree/bindings/spi/fsl-spi.txt b/Documentation/devicetree/bindings/spi/fsl-spi.txt index 411375eac54d..0654380eb751 100644 --- a/Documentation/devicetree/bindings/spi/fsl-spi.txt +++ b/Documentation/devicetree/bindings/spi/fsl-spi.txt @@ -15,13 +15,13 @@ Required properties: - clock-frequency : input clock frequency to non FSL_SOC cores Optional properties: -- gpios : specifies the gpio pins to be used for chipselects. +- cs-gpios : specifies the gpio pins to be used for chipselects. The gpios will be referred to as reg = in the SPI child nodes. If unspecified, a single SPI device without a chip select can be used. - fsl,spisel_boot : for the MPC8306 and MPC8309, specifies that the SPISEL_BOOT signal is used as chip select for a slave device. Use reg = in the corresponding child node, i.e. 0 if - the gpios property is not present. + the cs-gpios property is not present. Example: spi@4c0 { @@ -31,8 +31,8 @@ Example: interrupts = <82 0>; interrupt-parent = <700>; mode = "cpu"; - gpios = <&gpio 18 1 // device reg=<0> - &gpio 19 1>; // device reg=<1> + cs-gpios = <&gpio 18 1 // device reg=<0> + &gpio 19 1>; // device reg=<1> }; diff --git a/Documentation/powerpc/imc.rst b/Documentation/powerpc/imc.rst new file mode 100644 index 000000000000..633bcee7dc85 --- /dev/null +++ b/Documentation/powerpc/imc.rst @@ -0,0 +1,199 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. _imc: + +=================================== +IMC (In-Memory Collection Counters) +=================================== + +Anju T Sudhakar, 10 May 2019 + +.. contents:: + :depth: 3 + + +Basic overview +============== + +IMC (In-Memory collection counters) is a hardware monitoring facility that +collects large numbers of hardware performance events at Nest level (these are +on-chip but off-core), Core level and Thread level. + +The Nest PMU counters are handled by a Nest IMC microcode which runs in the OCC +(On-Chip Controller) complex. The microcode collects the counter data and moves +the nest IMC counter data to memory. + +The Core and Thread IMC PMU counters are handled in the core. Core level PMU +counters give us the IMC counters' data per core and thread level PMU counters +give us the IMC counters' data per CPU thread. + +OPAL obtains the IMC PMU and supported events information from the IMC Catalog +and passes on to the kernel via the device tree. The event's information +contains: + +- Event name +- Event Offset +- Event description + +and possibly also: + +- Event scale +- Event unit + +Some PMUs may have a common scale and unit values for all their supported +events. For those cases, the scale and unit properties for those events must be +inherited from the PMU. + +The event offset in the memory is where the counter data gets accumulated. + +IMC catalog is available at: + https://github.com/open-power/ima-catalog + +The kernel discovers the IMC counters information in the device tree at the +`imc-counters` device node which has a compatible field +`ibm,opal-in-memory-counters`. From the device tree, the kernel parses the PMUs +and their event's information and register the PMU and its attributes in the +kernel. + +IMC example usage +================= + +.. code-block:: sh + + # perf list + [...] + nest_mcs01/PM_MCS01_64B_RD_DISP_PORT01/ [Kernel PMU event] + nest_mcs01/PM_MCS01_64B_RD_DISP_PORT23/ [Kernel PMU event] + [...] + core_imc/CPM_0THRD_NON_IDLE_PCYC/ [Kernel PMU event] + core_imc/CPM_1THRD_NON_IDLE_INST/ [Kernel PMU event] + [...] + thread_imc/CPM_0THRD_NON_IDLE_PCYC/ [Kernel PMU event] + thread_imc/CPM_1THRD_NON_IDLE_INST/ [Kernel PMU event] + +To see per chip data for nest_mcs0/PM_MCS_DOWN_128B_DATA_XFER_MC0/: + +.. code-block:: sh + + # ./perf stat -e "nest_mcs01/PM_MCS01_64B_WR_DISP_PORT01/" -a --per-socket + +To see non-idle instructions for core 0: + +.. code-block:: sh + + # ./perf stat -e "core_imc/CPM_NON_IDLE_INST/" -C 0 -I 1000 + +To see non-idle instructions for a "make": + +.. code-block:: sh + + # ./perf stat -e "thread_imc/CPM_NON_IDLE_PCYC/" make + + +IMC Trace-mode +=============== + +POWER9 supports two modes for IMC which are the Accumulation mode and Trace +mode. In Accumulation mode, event counts are accumulated in system Memory. +Hypervisor then reads the posted counts periodically or when requested. In IMC +Trace mode, the 64 bit trace SCOM value is initialized with the event +information. The CPMCxSEL and CPMC_LOAD in the trace SCOM, specifies the event +to be monitored and the sampling duration. On each overflow in the CPMCxSEL, +hardware snapshots the program counter along with event counts and writes into +memory pointed by LDBAR. + +LDBAR is a 64 bit special purpose per thread register, it has bits to indicate +whether hardware is configured for accumulation or trace mode. + +LDBAR Register Layout +--------------------- + + +-------+----------------------+ + | 0 | Enable/Disable | + +-------+----------------------+ + | 1 | 0: Accumulation Mode | + | +----------------------+ + | | 1: Trace Mode | + +-------+----------------------+ + | 2:3 | Reserved | + +-------+----------------------+ + | 4-6 | PB scope | + +-------+----------------------+ + | 7 | Reserved | + +-------+----------------------+ + | 8:50 | Counter Address | + +-------+----------------------+ + | 51:63 | Reserved | + +-------+----------------------+ + +TRACE_IMC_SCOM bit representation +--------------------------------- + + +-------+------------+ + | 0:1 | SAMPSEL | + +-------+------------+ + | 2:33 | CPMC_LOAD | + +-------+------------+ + | 34:40 | CPMC1SEL | + +-------+------------+ + | 41:47 | CPMC2SEL | + +-------+------------+ + | 48:50 | BUFFERSIZE | + +-------+------------+ + | 51:63 | RESERVED | + +-------+------------+ + +CPMC_LOAD contains the sampling duration. SAMPSEL and CPMCxSEL determines the +event to count. BUFFERSIZE indicates the memory range. On each overflow, +hardware snapshots the program counter along with event counts and updates the +memory and reloads the CMPC_LOAD value for the next sampling duration. IMC +hardware does not support exceptions, so it quietly wraps around if memory +buffer reaches the end. + +*Currently the event monitored for trace-mode is fixed as cycle.* + +Trace IMC example usage +======================= + +.. code-block:: sh + + # perf list + [....] + trace_imc/trace_cycles/ [Kernel PMU event] + +To record an application/process with trace-imc event: + +.. code-block:: sh + + # perf record -e trace_imc/trace_cycles/ yes > /dev/null + [ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 0.012 MB perf.data (21 samples) ] + +The `perf.data` generated, can be read using perf report. + +Benefits of using IMC trace-mode +================================ + +PMI (Performance Monitoring Interrupts) interrupt handling is avoided, since IMC +trace mode snapshots the program counter and updates to the memory. And this +also provide a way for the operating system to do instruction sampling in real +time without PMI processing overhead. + +Performance data using `perf top` with and without trace-imc event. + +PMI interrupts count when `perf top` command is executed without trace-imc event. + +.. code-block:: sh + + # grep PMI /proc/interrupts + PMI: 0 0 0 0 Performance monitoring interrupts + # ./perf top + ... + # grep PMI /proc/interrupts + PMI: 39735 8710 17338 17801 Performance monitoring interrupts + # ./perf top -e trace_imc/trace_cycles/ + ... + # grep PMI /proc/interrupts + PMI: 39735 8710 17338 17801 Performance monitoring interrupts + + +That is, the PMI interrupt counts do not increment when using the `trace_imc` event. diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst index ba5edb3211c0..0d45f0fc8e57 100644 --- a/Documentation/powerpc/index.rst +++ b/Documentation/powerpc/index.rst @@ -18,9 +18,11 @@ powerpc elfnote firmware-assisted-dump hvcs + imc isa-versions kaslr-booke32 mpc52xx + papr_hcalls pci_iov_resource_on_powernv pmu-ebb ptrace diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst new file mode 100644 index 000000000000..3493631a60f8 --- /dev/null +++ b/Documentation/powerpc/papr_hcalls.rst @@ -0,0 +1,250 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================== +Hypercall Op-codes (hcalls) +=========================== + +Overview +========= + +Virtualization on 64-bit Power Book3S Platforms is based on the PAPR +specification [1]_ which describes the run-time environment for a guest +operating system and how it should interact with the hypervisor for +privileged operations. Currently there are two PAPR compliant hypervisors: + +- **IBM PowerVM (PHYP)**: IBM's proprietary hypervisor that supports AIX, + IBM-i and Linux as supported guests (termed as Logical Partitions + or LPARS). It supports the full PAPR specification. + +- **Qemu/KVM**: Supports PPC64 linux guests running on a PPC64 linux host. + Though it only implements a subset of PAPR specification called LoPAPR [2]_. + +On PPC64 arch a guest kernel running on top of a PAPR hypervisor is called +a *pSeries guest*. A pseries guest runs in a supervisor mode (HV=0) and must +issue hypercalls to the hypervisor whenever it needs to perform an action +that is hypervisor priviledged [3]_ or for other services managed by the +hypervisor. + +Hence a Hypercall (hcall) is essentially a request by the pseries guest +asking hypervisor to perform a privileged operation on behalf of the guest. The +guest issues a with necessary input operands. The hypervisor after performing +the privilege operation returns a status code and output operands back to the +guest. + +HCALL ABI +========= +The ABI specification for a hcall between a pseries guest and PAPR hypervisor +is covered in section 14.5.3 of ref [2]_. Switch to the Hypervisor context is +done via the instruction **HVCS** that expects the Opcode for hcall is set in *r3* +and any in-arguments for the hcall are provided in registers *r4-r12*. If values +have to be passed through a memory buffer, the data stored in that buffer should be +in Big-endian byte order. + +Once control is returns back to the guest after hypervisor has serviced the +'HVCS' instruction the return value of the hcall is available in *r3* and any +out values are returned in registers *r4-r12*. Again like in case of in-arguments, +any out values stored in a memory buffer will be in Big-endian byte order. + +Powerpc arch code provides convenient wrappers named **plpar_hcall_xxx** defined +in a arch specific header [4]_ to issue hcalls from the linux kernel +running as pseries guest. + +Register Conventions +==================== + +Any hcall should follow same register convention as described in section 2.2.1.1 +of "64-Bit ELF V2 ABI Specification: Power Architecture"[5]_. Table below +summarizes these conventions: + ++----------+----------+-------------------------------------------+ +| Register |Volatile | Purpose | +| Range |(Y/N) | | ++==========+==========+===========================================+ +| r0 | Y | Optional-usage | ++----------+----------+-------------------------------------------+ +| r1 | N | Stack Pointer | ++----------+----------+-------------------------------------------+ +| r2 | N | TOC | ++----------+----------+-------------------------------------------+ +| r3 | Y | hcall opcode/return value | ++----------+----------+-------------------------------------------+ +| r4-r10 | Y | in and out values | ++----------+----------+-------------------------------------------+ +| r11 | Y | Optional-usage/Environmental pointer | ++----------+----------+-------------------------------------------+ +| r12 | Y | Optional-usage/Function entry address at | +| | | global entry point | ++----------+----------+-------------------------------------------+ +| r13 | N | Thread-Pointer | ++----------+----------+-------------------------------------------+ +| r14-r31 | N | Local Variables | ++----------+----------+-------------------------------------------+ +| LR | Y | Link Register | ++----------+----------+-------------------------------------------+ +| CTR | Y | Loop Counter | ++----------+----------+-------------------------------------------+ +| XER | Y | Fixed-point exception register. | ++----------+----------+-------------------------------------------+ +| CR0-1 | Y | Condition register fields. | ++----------+----------+-------------------------------------------+ +| CR2-4 | N | Condition register fields. | ++----------+----------+-------------------------------------------+ +| CR5-7 | Y | Condition register fields. | ++----------+----------+-------------------------------------------+ +| Others | N | | ++----------+----------+-------------------------------------------+ + +DRC & DRC Indexes +================= +:: + + DR1 Guest + +--+ +------------+ +---------+ + | | <----> | | | User | + +--+ DRC1 | | DRC | Space | + | PAPR | Index +---------+ + DR2 | Hypervisor | | | + +--+ | | <-----> | Kernel | + | | <----> | | Hcall | | + +--+ DRC2 +------------+ +---------+ + +PAPR hypervisor terms shared hardware resources like PCI devices, NVDIMMs etc +available for use by LPARs as Dynamic Resource (DR). When a DR is allocated to +an LPAR, PHYP creates a data-structure called Dynamic Resource Connector (DRC) +to manage LPAR access. An LPAR refers to a DRC via an opaque 32-bit number +called DRC-Index. The DRC-index value is provided to the LPAR via device-tree +where its present as an attribute in the device tree node associated with the +DR. + +HCALL Return-values +=================== + +After servicing the hcall, hypervisor sets the return-value in *r3* indicating +success or failure of the hcall. In case of a failure an error code indicates +the cause for error. These codes are defined and documented in arch specific +header [4]_. + +In some cases a hcall can potentially take a long time and need to be issued +multiple times in order to be completely serviced. These hcalls will usually +accept an opaque value *continue-token* within there argument list and a +return value of *H_CONTINUE* indicates that hypervisor hasn't still finished +servicing the hcall yet. + +To make such hcalls the guest need to set *continue-token == 0* for the +initial call and use the hypervisor returned value of *continue-token* +for each subsequent hcall until hypervisor returns a non *H_CONTINUE* +return value. + +HCALL Op-codes +============== + +Below is a partial list of HCALLs that are supported by PHYP. For the +corresponding opcode values please look into the arch specific header [4]_: + +**H_SCM_READ_METADATA** + +| Input: *drcIndex, offset, buffer-address, numBytesToRead* +| Out: *numBytesRead* +| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_Hardware* + +Given a DRC Index of an NVDIMM, read N-bytes from the the metadata area +associated with it, at a specified offset and copy it to provided buffer. +The metadata area stores configuration information such as label information, +bad-blocks etc. The metadata area is located out-of-band of NVDIMM storage +area hence a separate access semantics is provided. + +**H_SCM_WRITE_METADATA** + +| Input: *drcIndex, offset, data, numBytesToWrite* +| Out: *None* +| Return Value: *H_Success, H_Parameter, H_P2, H_P4, H_Hardware* + +Given a DRC Index of an NVDIMM, write N-bytes to the metadata area +associated with it, at the specified offset and from the provided buffer. + +**H_SCM_BIND_MEM** + +| Input: *drcIndex, startingScmBlockIndex, numScmBlocksToBind,* +| *targetLogicalMemoryAddress, continue-token* +| Out: *continue-token, targetLogicalMemoryAddress, numScmBlocksToBound* +| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_P4, H_Overlap,* +| *H_Too_Big, H_P5, H_Busy* + +Given a DRC-Index of an NVDIMM, map a continuous SCM blocks range +*(startingScmBlockIndex, startingScmBlockIndex+numScmBlocksToBind)* to the guest +at *targetLogicalMemoryAddress* within guest physical address space. In +case *targetLogicalMemoryAddress == 0xFFFFFFFF_FFFFFFFF* then hypervisor +assigns a target address to the guest. The HCALL can fail if the Guest has +an active PTE entry to the SCM block being bound. + +**H_SCM_UNBIND_MEM** +| Input: drcIndex, startingScmLogicalMemoryAddress, numScmBlocksToUnbind +| Out: numScmBlocksUnbound +| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_In_Use, H_Overlap,* +| *H_Busy, H_LongBusyOrder1mSec, H_LongBusyOrder10mSec* + +Given a DRC-Index of an NVDimm, unmap *numScmBlocksToUnbind* SCM blocks starting +at *startingScmLogicalMemoryAddress* from guest physical address space. The +HCALL can fail if the Guest has an active PTE entry to the SCM block being +unbound. + +**H_SCM_QUERY_BLOCK_MEM_BINDING** + +| Input: *drcIndex, scmBlockIndex* +| Out: *Guest-Physical-Address* +| Return Value: *H_Success, H_Parameter, H_P2, H_NotFound* + +Given a DRC-Index and an SCM Block index return the guest physical address to +which the SCM block is mapped to. + +**H_SCM_QUERY_LOGICAL_MEM_BINDING** + +| Input: *Guest-Physical-Address* +| Out: *drcIndex, scmBlockIndex* +| Return Value: *H_Success, H_Parameter, H_P2, H_NotFound* + +Given a guest physical address return which DRC Index and SCM block is mapped +to that address. + +**H_SCM_UNBIND_ALL** + +| Input: *scmTargetScope, drcIndex* +| Out: *None* +| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_In_Use, H_Busy,* +| *H_LongBusyOrder1mSec, H_LongBusyOrder10mSec* + +Depending on the Target scope unmap all SCM blocks belonging to all NVDIMMs +or all SCM blocks belonging to a single NVDIMM identified by its drcIndex +from the LPAR memory. + +**H_SCM_HEALTH** + +| Input: drcIndex +| Out: *health-bitmap, health-bit-valid-bitmap* +| Return Value: *H_Success, H_Parameter, H_Hardware* + +Given a DRC Index return the info on predictive failure and overall health of +the NVDIMM. The asserted bits in the health-bitmap indicate a single predictive +failure and health-bit-valid-bitmap indicate which bits in health-bitmap are +valid. + +**H_SCM_PERFORMANCE_STATS** + +| Input: drcIndex, resultBuffer Addr +| Out: None +| Return Value: *H_Success, H_Parameter, H_Unsupported, H_Hardware, H_Authority, H_Privilege* + +Given a DRC Index collect the performance statistics for NVDIMM and copy them +to the resultBuffer. + +References +========== +.. [1] "Power Architecture Platform Reference" + https://en.wikipedia.org/wiki/Power_Architecture_Platform_Reference +.. [2] "Linux on Power Architecture Platform Reference" + https://members.openpowerfoundation.org/document/dl/469 +.. [3] "Definitions and Notation" Book III-Section 14.5.3 + https://openpowerfoundation.org/?resource_lib=power-isa-version-3-0 +.. [4] arch/powerpc/include/asm/hvcall.h +.. [5] "64-Bit ELF V2 ABI Specification: Power Architecture" + https://openpowerfoundation.org/?resource_lib=64-bit-elf-v2-abi-specification-power-architecture diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index bf2b538aba12..497b7d0b2d7e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1,10 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 source "arch/powerpc/platforms/Kconfig.cputype" -config PPC32 - bool - default y if !PPC64 - config 32BIT bool default y if PPC32 @@ -133,7 +129,7 @@ config PPC select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 - select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) + select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UACCESS_MCSAFE if PPC64 @@ -173,6 +169,7 @@ config PPC select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if PPC32 + select HAVE_ARCH_KASAN_VMALLOC if PPC32 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT @@ -482,7 +479,7 @@ config MPROFILE_KERNEL config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" depends on SMP && (PPC_PSERIES || \ - PPC_PMAC || PPC_POWERNV || FSL_SOC_BOOKE) + PPC_PMAC || PPC_POWERNV || FSL_SOC_BOOKE) help Say Y here to be able to disable and re-enable individual CPUs at runtime on SMP machines. diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 4e1d39847462..0b063830eea8 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -371,7 +371,7 @@ config PPC_PTDUMP config PPC_DEBUG_WX bool "Warn on W+X mappings at boot" - depends on PPC_PTDUMP + depends on PPC_PTDUMP && STRICT_KERNEL_RWX help Generate a warning if any W+X mappings are found at boot. diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink index 134f12f89b92..2268396ff4bb 100644 --- a/arch/powerpc/Makefile.postlink +++ b/arch/powerpc/Makefile.postlink @@ -17,11 +17,11 @@ quiet_cmd_head_check = CHKHEAD $@ quiet_cmd_relocs_check = CHKREL $@ ifdef CONFIG_PPC_BOOK3S_64 cmd_relocs_check = \ - $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@" ; \ + $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" ; \ $(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@" else cmd_relocs_check = \ - $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@" + $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" endif # `@true` prevents complaint when there is nothing to be done diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c index 1699e9531552..00c4d843a023 100644 --- a/arch/powerpc/boot/4xx.c +++ b/arch/powerpc/boot/4xx.c @@ -228,7 +228,7 @@ void ibm4xx_denali_fixup_memsize(void) dpath = 8; /* 64 bits */ /* get address pins (rows) */ - val = SDRAM0_READ(DDR0_42); + val = SDRAM0_READ(DDR0_42); row = DDR_GET_VAL(val, DDR_APIN, DDR_APIN_SHIFT); if (row > max_row) diff --git a/arch/powerpc/boot/dts/mgcoge.dts b/arch/powerpc/boot/dts/mgcoge.dts index a2dd5f1da621..7de068991bde 100644 --- a/arch/powerpc/boot/dts/mgcoge.dts +++ b/arch/powerpc/boot/dts/mgcoge.dts @@ -224,7 +224,7 @@ spi@11aa0 { reg = <0x11a80 0x40 0x89fc 0x2>; interrupts = <2 8>; interrupt-parent = <&PIC>; - gpios = < &cpm2_pio_d 19 0>; + cs-gpios = < &cpm2_pio_d 19 0>; #address-cells = <1>; #size-cells = <0>; ds3106@1 { diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts index b6257186528e..ecebc27a2898 100644 --- a/arch/powerpc/boot/dts/mpc832x_rdb.dts +++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts @@ -249,7 +249,7 @@ spi@4c0 { reg = <0x4c0 0x40>; interrupts = <2>; interrupt-parent = <&qeic>; - gpios = <&qe_pio_d 13 0>; + cs-gpios = <&qe_pio_d 13 0>; mode = "cpu-qe"; mmc-slot@0 { diff --git a/arch/powerpc/boot/dts/mpc8610_hpcd.dts b/arch/powerpc/boot/dts/mpc8610_hpcd.dts index 1a8321ac105a..33bbe58c1ad0 100644 --- a/arch/powerpc/boot/dts/mpc8610_hpcd.dts +++ b/arch/powerpc/boot/dts/mpc8610_hpcd.dts @@ -200,7 +200,7 @@ spi@7000 { interrupts = <59 2>; interrupt-parent = <&mpic>; mode = "cpu"; - gpios = <&sdcsr_pio 7 0>; + cs-gpios = <&sdcsr_pio 7 0>; sleep = <&pmc 0x00000800 0>; mmc-slot@0 { diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index f0c8a07cc274..7705a5c3f4ea 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -59,7 +59,6 @@ CONFIG_BLK_DEV_SD=y # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set # CONFIG_NET_VENDOR_EXAR is not set -# CONFIG_NET_VENDOR_HP is not set CONFIG_IBM_EMAC=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MELLANOX is not set diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig index ed02f12dbd54..22dc0dadf576 100644 --- a/arch/powerpc/configs/44x/sam440ep_defconfig +++ b/arch/powerpc/configs/44x/sam440ep_defconfig @@ -10,8 +10,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_AMIGA_PARTITION=y -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set # CONFIG_EBONY is not set CONFIG_SAM440EP=y CONFIG_CMDLINE_BOOL=y diff --git a/arch/powerpc/configs/52xx/pcm030_defconfig b/arch/powerpc/configs/52xx/pcm030_defconfig index fdb11daeb688..789622ffd844 100644 --- a/arch/powerpc/configs/52xx/pcm030_defconfig +++ b/arch/powerpc/configs/52xx/pcm030_defconfig @@ -14,8 +14,6 @@ CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set # CONFIG_PPC_CHRP is not set CONFIG_PPC_MPC52xx=y CONFIG_PPC_MPC5200_SIMPLE=y diff --git a/arch/powerpc/configs/83xx/kmeter1_defconfig b/arch/powerpc/configs/83xx/kmeter1_defconfig index 648c6b3dccf9..24bf1bde1bb4 100644 --- a/arch/powerpc/configs/83xx/kmeter1_defconfig +++ b/arch/powerpc/configs/83xx/kmeter1_defconfig @@ -11,8 +11,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y # CONFIG_MSDOS_PARTITION is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index 510f7fd1f6a3..f55e23cb176c 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -9,7 +9,6 @@ CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y -# CONFIG_IOSCHED_CFQ is not set CONFIG_PPC_ADDER875=y CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig index 6e08d9502d89..00d69965f898 100644 --- a/arch/powerpc/configs/ep8248e_defconfig +++ b/arch/powerpc/configs/ep8248e_defconfig @@ -6,7 +6,6 @@ CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y CONFIG_PARTITION_ADVANCED=y -# CONFIG_IOSCHED_CFQ is not set # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_82xx=y diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index 9c1bf60f1e19..0e2e5e81a359 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -11,7 +11,6 @@ CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y -# CONFIG_IOSCHED_CFQ is not set CONFIG_PPC_EP88XC=y CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig index 6ce4f206eac7..dcc8dccf54f3 100644 --- a/arch/powerpc/configs/mgcoge_defconfig +++ b/arch/powerpc/configs/mgcoge_defconfig @@ -12,7 +12,6 @@ CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_SLAB=y CONFIG_PARTITION_ADVANCED=y -# CONFIG_IOSCHED_CFQ is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_82xx=y CONFIG_MGCOGE=y diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index 1f3a045ab081..e39346b3dc3b 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig @@ -9,7 +9,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y -# CONFIG_IOSCHED_CFQ is not set # CONFIG_PPC_CHRP is not set CONFIG_PPC_MPC512x=y CONFIG_MPC512x_LPBFIFO=y diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 0327a329316f..82a008c04eae 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -11,7 +11,6 @@ CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y -# CONFIG_IOSCHED_CFQ is not set CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y CONFIG_HZ_100=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 32841456a573..71749377d164 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -181,7 +181,6 @@ CONFIG_MLX5_FPGA=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MYRI10GE=m -CONFIG_QLGE=m CONFIG_NETXEN_NIC=m CONFIG_USB_NET_DRIVERS=m # CONFIG_WLAN is not set diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index b250e6f5a7ca..7e68cb222c7b 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -189,7 +189,6 @@ CONFIG_MLX4_EN=m CONFIG_MYRI10GE=m CONFIG_S2IO=m CONFIG_PASEMI_MAC=y -CONFIG_QLGE=m CONFIG_NETXEN_NIC=m CONFIG_SUNGEM=y CONFIG_GELIC_NET=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 7e28919041cf..3e2f44f38ac5 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -507,7 +507,6 @@ CONFIG_FORCEDETH=m CONFIG_HAMACHI=m CONFIG_YELLOWFIN=m CONFIG_QLA3XXX=m -CONFIG_QLGE=m CONFIG_NETXEN_NIC=m CONFIG_8139CP=m CONFIG_8139TOO=m diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 26126b4d4de3..6b68109e248f 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -169,7 +169,6 @@ CONFIG_IXGBE=m CONFIG_I40E=m CONFIG_MLX4_EN=m CONFIG_MYRI10GE=m -CONFIG_QLGE=m CONFIG_NETXEN_NIC=m CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 069f67f12731..1b6bdad36b13 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -1,8 +1,3 @@ -CONFIG_PPC64=y -CONFIG_ALTIVEC=y -CONFIG_VSX=y -CONFIG_NR_CPUS=2048 -CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_KERNEL_XZ=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y @@ -28,17 +23,15 @@ CONFIG_EXPERT=y # CONFIG_AIO is not set CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set +# CONFIG_SLAB_MERGE_DEFAULT is not set +CONFIG_SLAB_FREELIST_RANDOM=y CONFIG_SLAB_FREELIST_HARDENED=y -CONFIG_JUMP_LABEL=y -CONFIG_STRICT_KERNEL_RWX=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_SIG=y -CONFIG_MODULE_SIG_FORCE=y -CONFIG_MODULE_SIG_SHA512=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MQ_IOSCHED_DEADLINE is not set -# CONFIG_MQ_IOSCHED_KYBER is not set +CONFIG_PPC64=y +CONFIG_ALTIVEC=y +CONFIG_VSX=y +CONFIG_NR_CPUS=2048 +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_PANIC_TIMEOUT=30 # CONFIG_PPC_VAS is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set @@ -46,17 +39,27 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_IDLE=y CONFIG_HZ_100=y CONFIG_KEXEC=y +CONFIG_KEXEC_FILE=y CONFIG_PRESERVE_FA_DUMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_NUMA=y -# CONFIG_COMPACTION is not set -# CONFIG_MIGRATION is not set CONFIG_PPC_64K_PAGES=y CONFIG_SCHED_SMT=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 quiet" # CONFIG_SECCOMP is not set # CONFIG_PPC_MEM_KEYS is not set +CONFIG_JUMP_LABEL=y +CONFIG_STRICT_KERNEL_RWX=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_SIG_FORCE=y +CONFIG_MODULE_SIG_SHA512=y +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MQ_IOSCHED_DEADLINE is not set +# CONFIG_MQ_IOSCHED_KYBER is not set +# CONFIG_COMPACTION is not set +# CONFIG_MIGRATION is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -138,7 +141,6 @@ CONFIG_TIGON3=m CONFIG_BNX2X=m # CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_CADENCE is not set -# CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_CAVIUM is not set CONFIG_CHELSIO_T1=m # CONFIG_NET_VENDOR_CISCO is not set @@ -147,7 +149,6 @@ CONFIG_CHELSIO_T1=m # CONFIG_NET_VENDOR_DLINK is not set CONFIG_BE2NET=m # CONFIG_NET_VENDOR_EZCHIP is not set -# CONFIG_NET_VENDOR_HP is not set # CONFIG_NET_VENDOR_HUAWEI is not set CONFIG_E1000=m CONFIG_E1000E=m @@ -155,7 +156,6 @@ CONFIG_IGB=m CONFIG_IXGB=m CONFIG_IXGBE=m CONFIG_I40E=m -CONFIG_S2IO=m # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m # CONFIG_MLX4_CORE_GEN2 is not set @@ -166,12 +166,12 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_MICROSEMI is not set CONFIG_MYRI10GE=m # CONFIG_NET_VENDOR_NATSEMI is not set +CONFIG_S2IO=m # CONFIG_NET_VENDOR_NETRONOME is not set # CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_NVIDIA is not set # CONFIG_NET_VENDOR_OKI is not set # CONFIG_NET_VENDOR_PACKET_ENGINES is not set -CONFIG_QLGE=m CONFIG_NETXEN_NIC=m CONFIG_QED=m CONFIG_QEDE=m @@ -238,7 +238,6 @@ CONFIG_HID_CYPRESS=y CONFIG_HID_EZKEY=y CONFIG_HID_ITE=y CONFIG_HID_KENSINGTON=y -CONFIG_HID_LOGITECH=y CONFIG_HID_MICROSOFT=y CONFIG_HID_MONTEREY=y CONFIG_USB_HIDDEV=y @@ -275,6 +274,18 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y +CONFIG_ENCRYPTED_KEYS=y +CONFIG_SECURITY=y +CONFIG_HARDENED_USERCOPY=y +# CONFIG_HARDENED_USERCOPY_FALLBACK is not set +CONFIG_HARDENED_USERCOPY_PAGESPAN=y +CONFIG_FORTIFY_SOURCE=y +CONFIG_SECURITY_LOCKDOWN_LSM=y +CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y +CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y +# CONFIG_INTEGRITY is not set +CONFIG_LSM="yama,loadpin,safesetid,integrity" +# CONFIG_CRYPTO_HW is not set CONFIG_CRC16=y CONFIG_CRC_ITU_T=y CONFIG_LIBCRC32C=y @@ -285,17 +296,20 @@ CONFIG_LIBCRC32C=y # CONFIG_XZ_DEC_SPARC is not set CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y +CONFIG_SLUB_DEBUG_ON=y +CONFIG_SCHED_STACK_END_CHECK=y CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_PANIC_ON_OOPS=y CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y CONFIG_WQ_WATCHDOG=y # CONFIG_SCHED_DEBUG is not set +CONFIG_DEBUG_SG=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_BUG_ON_DATA_CORRUPTION=y +CONFIG_DEBUG_CREDENTIALS=y # CONFIG_FTRACE is not set -# CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_XMON=y -CONFIG_XMON_DEFAULT=y -CONFIG_ENCRYPTED_KEYS=y -# CONFIG_CRYPTO_ECHAINIV is not set -# CONFIG_CRYPTO_HW is not set +# CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index 29b19ec7e5d7..b964084e4056 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -77,5 +77,4 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y CONFIG_CRC_T10DIF=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index ffed2b4256d6..eda8bfb2d0a3 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -14,7 +14,6 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y -# CONFIG_IOSCHED_CFQ is not set CONFIG_TQM8XX=y CONFIG_8xx_COPYBACK=y # CONFIG_8xx_CPU15 is not set diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index f9dc597b0b86..3c0ba22dc360 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -102,41 +102,91 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) isync(); /* Context sync required after mtsrin() */ } -static inline void allow_user_access(void __user *to, const void __user *from, u32 size) +static __always_inline void allow_user_access(void __user *to, const void __user *from, + u32 size, unsigned long dir) { u32 addr, end; - if (__builtin_constant_p(to) && to == NULL) + BUILD_BUG_ON(!__builtin_constant_p(dir)); + BUILD_BUG_ON(dir == KUAP_CURRENT); + + if (!(dir & KUAP_WRITE)) return; addr = (__force u32)to; - if (!addr || addr >= TASK_SIZE || !size) + if (unlikely(addr >= TASK_SIZE || !size)) return; end = min(addr + size, TASK_SIZE); + current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf); kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */ } -static inline void prevent_user_access(void __user *to, const void __user *from, u32 size) +static __always_inline void prevent_user_access(void __user *to, const void __user *from, + u32 size, unsigned long dir) { - u32 addr = (__force u32)to; - u32 end = min(addr + size, TASK_SIZE); + u32 addr, end; - if (!addr || addr >= TASK_SIZE || !size) + BUILD_BUG_ON(!__builtin_constant_p(dir)); + + if (dir == KUAP_CURRENT) { + u32 kuap = current->thread.kuap; + + if (unlikely(!kuap)) + return; + + addr = kuap & 0xf0000000; + end = kuap << 28; + } else if (dir & KUAP_WRITE) { + addr = (__force u32)to; + end = min(addr + size, TASK_SIZE); + + if (unlikely(addr >= TASK_SIZE || !size)) + return; + } else { return; + } current->thread.kuap = 0; kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */ } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +static inline unsigned long prevent_user_access_return(void) { + unsigned long flags = current->thread.kuap; + unsigned long addr = flags & 0xf0000000; + unsigned long end = flags << 28; + void __user *to = (__force void __user *)addr; + + if (flags) + prevent_user_access(to, to, end - addr, KUAP_READ_WRITE); + + return flags; +} + +static inline void restore_user_access(unsigned long flags) +{ + unsigned long addr = flags & 0xf0000000; + unsigned long end = flags << 28; + void __user *to = (__force void __user *)addr; + + if (flags) + allow_user_access(to, to, end - addr, KUAP_READ_WRITE); +} + +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + unsigned long begin = regs->kuap & 0xf0000000; + unsigned long end = regs->kuap << 28; + if (!is_write) return false; - return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !"); + return WARN(address < begin || address >= end, + "Bug: write fault blocked by segment registers !"); } #endif /* CONFIG_PPC_KUAP */ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 0796533d37dd..5b39c11e884a 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -193,7 +193,12 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #else #define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) #endif + +#ifdef CONFIG_KASAN_VMALLOC +#define VMALLOC_END _ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) +#else #define VMALLOC_END ioremap_bot +#endif #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index f254de956d6a..90dd3a3fc8c7 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -63,6 +63,14 @@ * because that would require an expensive read/modify write of the AMR. */ +static inline unsigned long get_kuap(void) +{ + if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) + return 0; + + return mfspr(SPRN_AMR); +} + static inline void set_kuap(unsigned long value) { if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) @@ -77,25 +85,43 @@ static inline void set_kuap(unsigned long value) isync(); } -static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { // This is written so we can resolve to a single case at build time - if (__builtin_constant_p(to) && to == NULL) + BUILD_BUG_ON(!__builtin_constant_p(dir)); + if (dir == KUAP_READ) set_kuap(AMR_KUAP_BLOCK_WRITE); - else if (__builtin_constant_p(from) && from == NULL) + else if (dir == KUAP_WRITE) set_kuap(AMR_KUAP_BLOCK_READ); - else + else if (dir == KUAP_READ_WRITE) set_kuap(0); + else + BUILD_BUG(); } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +static inline unsigned long prevent_user_access_return(void) +{ + unsigned long flags = get_kuap(); + + set_kuap(AMR_KUAP_BLOCKED); + + return flags; +} + +static inline void restore_user_access(unsigned long flags) +{ + set_kuap(flags); +} + +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index cf00ff0d121d..40a4d3c6fd99 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -212,6 +212,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000) #define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000) #define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000) +#define CPU_FTR_P9_RADIX_PREFETCH_BUG LONG_ASM_CONST(0x0002000000000000) #ifndef __ASSEMBLY__ @@ -459,8 +460,10 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR) -#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 -#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1) +#define CPU_FTRS_POWER9_DD2_0 (CPU_FTRS_POWER9 | CPU_FTR_P9_RADIX_PREFETCH_BUG) +#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | \ + CPU_FTR_P9_RADIX_PREFETCH_BUG | \ + CPU_FTR_POWER9_DD2_1) #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \ CPU_FTR_P9_TM_HV_ASSIST | \ CPU_FTR_P9_TM_XER_SO_BUG) diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index b3e214a97f3a..ca33f4ef6cb4 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -33,7 +33,7 @@ #define FW_FEATURE_LLAN ASM_CONST(0x0000000000010000) #define FW_FEATURE_BULK_REMOVE ASM_CONST(0x0000000000020000) #define FW_FEATURE_XDABR ASM_CONST(0x0000000000040000) -#define FW_FEATURE_MULTITCE ASM_CONST(0x0000000000080000) +#define FW_FEATURE_PUT_TCE_IND ASM_CONST(0x0000000000080000) #define FW_FEATURE_SPLPAR ASM_CONST(0x0000000000100000) #define FW_FEATURE_LPAR ASM_CONST(0x0000000000400000) #define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000) @@ -51,6 +51,7 @@ #define FW_FEATURE_BLOCK_REMOVE ASM_CONST(0x0000001000000000) #define FW_FEATURE_PAPR_SCM ASM_CONST(0x0000002000000000) #define FW_FEATURE_ULTRAVISOR ASM_CONST(0x0000004000000000) +#define FW_FEATURE_STUFF_TCE ASM_CONST(0x0000008000000000) #ifndef __ASSEMBLY__ @@ -63,7 +64,8 @@ enum { FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ | FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | FW_FEATURE_BULK_REMOVE | FW_FEATURE_XDABR | - FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | + FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE | + FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO | FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY | FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN | diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index 27ac6f5d2891..f2f8d8aa8e3b 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -34,7 +34,11 @@ struct arch_hw_breakpoint { #define HW_BRK_TYPE_PRIV_ALL (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \ HW_BRK_TYPE_HYP) +#ifdef CONFIG_PPC_8xx +#define HW_BREAKPOINT_ALIGN 0x3 +#else #define HW_BREAKPOINT_ALIGN 0x7 +#endif #define DABR_MAX_LEN 8 #define DAWR_MAX_LEN 512 diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 296e51c2f066..fbff9ff9032e 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -31,9 +31,11 @@ void kasan_early_init(void); void kasan_mmu_init(void); void kasan_init(void); +void kasan_late_init(void); #else static inline void kasan_init(void) { } static inline void kasan_mmu_init(void) { } +static inline void kasan_late_init(void) { } #endif #endif /* __ASSEMBLY */ diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 5b5e39643a27..92bcd1a26d73 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -2,6 +2,16 @@ #ifndef _ASM_POWERPC_KUP_H_ #define _ASM_POWERPC_KUP_H_ +#define KUAP_READ 1 +#define KUAP_WRITE 2 +#define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE) +/* + * For prevent_user_access() only. + * Use the current saved situation instead of the to/from/size params. + * Used on book3s/32 + */ +#define KUAP_CURRENT 4 + #ifdef CONFIG_PPC64 #include #endif @@ -42,32 +52,55 @@ void setup_kuap(bool disabled); #else static inline void setup_kuap(bool disabled) { } static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) { } + unsigned long size, unsigned long dir) { } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) { } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; } + unsigned long size, unsigned long dir) { } +static inline unsigned long prevent_user_access_return(void) { return 0UL; } +static inline void restore_user_access(unsigned long flags) { } +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return false; +} #endif /* CONFIG_PPC_KUAP */ static inline void allow_read_from_user(const void __user *from, unsigned long size) { - allow_user_access(NULL, from, size); + allow_user_access(NULL, from, size, KUAP_READ); } static inline void allow_write_to_user(void __user *to, unsigned long size) { - allow_user_access(to, NULL, size); + allow_user_access(to, NULL, size, KUAP_WRITE); +} + +static inline void allow_read_write_user(void __user *to, const void __user *from, + unsigned long size) +{ + allow_user_access(to, from, size, KUAP_READ_WRITE); } static inline void prevent_read_from_user(const void __user *from, unsigned long size) { - prevent_user_access(NULL, from, size); + prevent_user_access(NULL, from, size, KUAP_READ); } static inline void prevent_write_to_user(void __user *to, unsigned long size) { - prevent_user_access(to, NULL, size); + prevent_user_access(to, NULL, size, KUAP_WRITE); +} + +static inline void prevent_read_write_user(void __user *to, const void __user *from, + unsigned long size) +{ + prevent_user_access(to, from, size, KUAP_READ_WRITE); +} + +static inline void prevent_current_access_user(void) +{ + prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT); } #endif /* !__ASSEMBLY__ */ -#endif /* _ASM_POWERPC_KUP_H_ */ +#endif /* _ASM_POWERPC_KUAP_H_ */ diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 1006a427e99c..85ed2390fb99 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -35,18 +35,33 @@ #include static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { mtspr(SPRN_MD_AP, MD_APG_INIT); } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { mtspr(SPRN_MD_AP, MD_APG_KUAP); } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +static inline unsigned long prevent_user_access_return(void) +{ + unsigned long flags = mfspr(SPRN_MD_AP); + + mtspr(SPRN_MD_AP, MD_APG_KUAP); + + return flags; +} + +static inline void restore_user_access(unsigned long flags) +{ + mtspr(SPRN_MD_AP, flags); +} + +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000), "Bug: fault blocked by AP register !"); diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 552b96eef0c8..60c4d829152e 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -114,7 +114,12 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #else #define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) #endif + +#ifdef CONFIG_KASAN_VMALLOC +#define VMALLOC_END _ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) +#else #define VMALLOC_END ioremap_bot +#endif /* * Bits in a linux-style PTE. These match the bits in the diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 7f1fd41e3065..86332080399a 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -209,7 +209,7 @@ static inline bool pfn_valid(unsigned long pfn) */ #if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE) #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET)) -#define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET) +#define __pa(x) ((phys_addr_t)(unsigned long)(x) - VIRT_PHYS_OFFSET) #else #ifdef CONFIG_PPC64 /* diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index ea6ec65970ef..69f4cb3b7c56 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -223,12 +223,15 @@ struct pci_dn { extern struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus, int devfn); extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev); -extern struct pci_dn *add_dev_pci_data(struct pci_dev *pdev); -extern void remove_dev_pci_data(struct pci_dev *pdev); extern struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, struct device_node *dn); extern void pci_remove_device_node_info(struct device_node *dn); +#ifdef CONFIG_PCI_IOV +struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev); +void remove_sriov_vf_pdns(struct pci_dev *pdev); +#endif + static inline int pci_device_from_OF_node(struct device_node *np, u8 *bus, u8 *devfn) { diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 327567b8f7d6..63ed7e3b0ba3 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -113,7 +113,6 @@ extern pgprot_t pci_phys_mem_access_prot(struct file *file, pgprot_t prot); extern resource_size_t pcibios_io_space_offset(struct pci_controller *hose); -extern void pcibios_setup_bus_devices(struct pci_bus *bus); extern void pcibios_setup_bus_self(struct pci_bus *bus); extern void pcibios_setup_phb_io_space(struct pci_controller *hose); extern void pcibios_scan_phb(struct pci_controller *hose); diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 0e4ec8cc37b7..8cc543ed114c 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -94,12 +94,6 @@ void mark_initmem_nx(void); static inline void mark_initmem_nx(void) { } #endif -#ifdef CONFIG_PPC_DEBUG_WX -void ptdump_check_wx(void); -#else -static inline void ptdump_check_wx(void) { } -#endif - /* * When used, PTE_FRAG_NR is defined in subarch pgtable.h * so we are sure it is included when arriving here. diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index edcb1fc50aeb..d0ee0ede5767 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -15,6 +15,7 @@ #define PCI_SLOT_ID_PREFIX (1UL << 63) #define PCI_SLOT_ID(phb_id, bdfn) \ (PCI_SLOT_ID_PREFIX | ((uint64_t)(bdfn) << 16) | (phb_id)) +#define PCI_PHB_SLOT_ID(phb_id) (phb_id) extern int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id); extern int pnv_pci_get_device_tree(uint32_t phandle, void *buf, uint64_t len); diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index a9993e7a443b..8387698bd5b6 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -162,6 +162,12 @@ struct thread_struct { #endif #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) unsigned long kuap; /* opened segments for user access */ +#endif +#ifdef CONFIG_VMAP_STACK + unsigned long srr0; + unsigned long srr1; + unsigned long dar; + unsigned long dsisr; #endif /* Debug Registers */ struct debug_reg debug; @@ -412,6 +418,9 @@ static inline unsigned long get_clean_sp(unsigned long sp, int is_32) extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val); extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val); extern unsigned long isa206_idle_insn_mayloss(unsigned long type); +#ifdef CONFIG_PPC_970_NAP +extern void power4_idle_nap(void); +#endif extern unsigned long cpuidle_disable; enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h index 07df35ee8cbc..299ee7be0f67 100644 --- a/arch/powerpc/include/asm/reg_8xx.h +++ b/arch/powerpc/include/asm/reg_8xx.h @@ -35,7 +35,21 @@ #define SPRN_CMPE 152 #define SPRN_CMPF 153 #define SPRN_LCTRL1 156 +#define LCTRL1_CTE_GT 0xc0000000 +#define LCTRL1_CTF_LT 0x14000000 +#define LCTRL1_CRWE_RW 0x00000000 +#define LCTRL1_CRWE_RO 0x00040000 +#define LCTRL1_CRWE_WO 0x000c0000 +#define LCTRL1_CRWF_RW 0x00000000 +#define LCTRL1_CRWF_RO 0x00010000 +#define LCTRL1_CRWF_WO 0x00030000 #define SPRN_LCTRL2 157 +#define LCTRL2_LW0EN 0x80000000 +#define LCTRL2_LW0LA_E 0x00000000 +#define LCTRL2_LW0LA_F 0x04000000 +#define LCTRL2_LW0LA_EandF 0x08000000 +#define LCTRL2_LW0LADC 0x02000000 +#define LCTRL2_SLW0EN 0x00000002 #ifdef CONFIG_PPC_8xx #define SPRN_ICTRL 158 #endif diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 8e1d0195ac36..a2270749b282 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -10,13 +10,31 @@ #define _ASM_POWERPC_THREAD_INFO_H #include +#include #ifdef __KERNEL__ +#if defined(CONFIG_VMAP_STACK) && CONFIG_THREAD_SHIFT < PAGE_SHIFT +#define THREAD_SHIFT PAGE_SHIFT +#else #define THREAD_SHIFT CONFIG_THREAD_SHIFT +#endif #define THREAD_SIZE (1 << THREAD_SHIFT) +/* + * By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by + * checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry + * assembly. + */ +#ifdef CONFIG_VMAP_STACK +#define THREAD_ALIGN_SHIFT (THREAD_SHIFT + 1) +#else +#define THREAD_ALIGN_SHIFT THREAD_SHIFT +#endif + +#define THREAD_ALIGN (1 << THREAD_ALIGN_SHIFT) + #ifndef __ASSEMBLY__ #include #include diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index c92fe7fe9692..2f500debae21 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -91,9 +91,14 @@ static inline int __access_ok(unsigned long addr, unsigned long size, __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) #define __get_user(x, ptr) \ - __get_user_nocheck((x), (ptr), sizeof(*(ptr))) + __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true) #define __put_user(x, ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) + __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), true) + +#define __get_user_allowed(x, ptr) \ + __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false) +#define __put_user_allowed(x, ptr) \ + __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), false) #define __get_user_inatomic(x, ptr) \ __get_user_nosleep((x), (ptr), sizeof(*(ptr))) @@ -138,10 +143,9 @@ extern long __put_user_bad(void); : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err)) #endif /* __powerpc64__ */ -#define __put_user_size(x, ptr, size, retval) \ +#define __put_user_size_allowed(x, ptr, size, retval) \ do { \ retval = 0; \ - allow_write_to_user(ptr, size); \ switch (size) { \ case 1: __put_user_asm(x, ptr, retval, "stb"); break; \ case 2: __put_user_asm(x, ptr, retval, "sth"); break; \ @@ -149,17 +153,26 @@ do { \ case 8: __put_user_asm2(x, ptr, retval); break; \ default: __put_user_bad(); \ } \ +} while (0) + +#define __put_user_size(x, ptr, size, retval) \ +do { \ + allow_write_to_user(ptr, size); \ + __put_user_size_allowed(x, ptr, size, retval); \ prevent_write_to_user(ptr, size); \ } while (0) -#define __put_user_nocheck(x, ptr, size) \ +#define __put_user_nocheck(x, ptr, size, do_allow) \ ({ \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ if (!is_kernel_addr((unsigned long)__pu_addr)) \ might_fault(); \ __chk_user_ptr(ptr); \ - __put_user_size((x), __pu_addr, (size), __pu_err); \ + if (do_allow) \ + __put_user_size((x), __pu_addr, (size), __pu_err); \ + else \ + __put_user_size_allowed((x), __pu_addr, (size), __pu_err); \ __pu_err; \ }) @@ -236,13 +249,12 @@ extern long __get_user_bad(void); : "b" (addr), "i" (-EFAULT), "0" (err)) #endif /* __powerpc64__ */ -#define __get_user_size(x, ptr, size, retval) \ +#define __get_user_size_allowed(x, ptr, size, retval) \ do { \ retval = 0; \ __chk_user_ptr(ptr); \ if (size > sizeof(x)) \ (x) = __get_user_bad(); \ - allow_read_from_user(ptr, size); \ switch (size) { \ case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \ case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \ @@ -250,6 +262,12 @@ do { \ case 8: __get_user_asm2(x, ptr, retval); break; \ default: (x) = __get_user_bad(); \ } \ +} while (0) + +#define __get_user_size(x, ptr, size, retval) \ +do { \ + allow_read_from_user(ptr, size); \ + __get_user_size_allowed(x, ptr, size, retval); \ prevent_read_from_user(ptr, size); \ } while (0) @@ -260,7 +278,7 @@ do { \ #define __long_type(x) \ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) -#define __get_user_nocheck(x, ptr, size) \ +#define __get_user_nocheck(x, ptr, size, do_allow) \ ({ \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ @@ -269,7 +287,10 @@ do { \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ barrier_nospec(); \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + if (do_allow) \ + __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + else \ + __get_user_size_allowed(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) @@ -313,9 +334,9 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) unsigned long ret; barrier_nospec(); - allow_user_access(to, from, n); + allow_read_write_user(to, from, n); ret = __copy_tofrom_user(to, from, n); - prevent_user_access(to, from, n); + prevent_read_write_user(to, from, n); return ret; } #endif /* __powerpc64__ */ @@ -356,33 +377,40 @@ static inline unsigned long raw_copy_from_user(void *to, return ret; } -static inline unsigned long raw_copy_to_user(void __user *to, - const void *from, unsigned long n) +static inline unsigned long +raw_copy_to_user_allowed(void __user *to, const void *from, unsigned long n) { - unsigned long ret; if (__builtin_constant_p(n) && (n <= 8)) { - ret = 1; + unsigned long ret = 1; switch (n) { case 1: - __put_user_size(*(u8 *)from, (u8 __user *)to, 1, ret); + __put_user_size_allowed(*(u8 *)from, (u8 __user *)to, 1, ret); break; case 2: - __put_user_size(*(u16 *)from, (u16 __user *)to, 2, ret); + __put_user_size_allowed(*(u16 *)from, (u16 __user *)to, 2, ret); break; case 4: - __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret); + __put_user_size_allowed(*(u32 *)from, (u32 __user *)to, 4, ret); break; case 8: - __put_user_size(*(u64 *)from, (u64 __user *)to, 8, ret); + __put_user_size_allowed(*(u64 *)from, (u64 __user *)to, 8, ret); break; } if (ret == 0) return 0; } + return __copy_tofrom_user(to, (__force const void __user *)from, n); +} + +static inline unsigned long +raw_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + unsigned long ret; + allow_write_to_user(to, n); - ret = __copy_tofrom_user(to, (__force const void __user *)from, n); + ret = raw_copy_to_user_allowed(to, from, n); prevent_write_to_user(to, n); return ret; } @@ -428,4 +456,22 @@ extern long __copy_from_user_flushcache(void *dst, const void __user *src, extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset, size_t len); +static __must_check inline bool user_access_begin(const void __user *ptr, size_t len) +{ + if (unlikely(!access_ok(ptr, len))) + return false; + allow_read_write_user((void __user *)ptr, ptr, len); + return true; +} +#define user_access_begin user_access_begin +#define user_access_end prevent_current_access_user +#define user_access_save prevent_user_access_return +#define user_access_restore restore_user_access + +#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) +#define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e) +#define unsafe_put_user(x, p, e) unsafe_op_wrap(__put_user_allowed(x, p), e) +#define unsafe_copy_to_user(d, s, l, e) \ + unsafe_op_wrap(raw_copy_to_user_allowed(d, s, l), e) + #endif /* _ARCH_POWERPC_UACCESS_H */ diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index 40f13f3626d3..b9ef6cf50ea5 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -108,16 +108,22 @@ struct vdso_data { __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ __u32 hrtimer_res; /* hrtimer resolution */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ - __u32 dcache_block_size; /* L1 d-cache block size */ - __u32 icache_block_size; /* L1 i-cache block size */ - __u32 dcache_log_block_size; /* L1 d-cache log block size */ - __u32 icache_log_block_size; /* L1 i-cache log block size */ }; #endif /* CONFIG_PPC64 */ extern struct vdso_data *vdso_data; +#else /* __ASSEMBLY__ */ + +.macro get_datapage ptr, tmp + bcl 20, 31, .+4 + mflr \ptr + addi \ptr, \ptr, (__kernel_datapage_offset - (.-4))@l + lwz \tmp, 0(\ptr) + add \ptr, \tmp, \ptr +.endm + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 24cdf97376c4..93f982dbb3d4 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -87,56 +87,56 @@ extern bool __xive_enabled; static inline bool xive_enabled(void) { return __xive_enabled; } -extern bool xive_spapr_init(void); -extern bool xive_native_init(void); -extern void xive_smp_probe(void); -extern int xive_smp_prepare_cpu(unsigned int cpu); -extern void xive_smp_setup_cpu(void); -extern void xive_smp_disable_cpu(void); -extern void xive_teardown_cpu(void); -extern void xive_shutdown(void); -extern void xive_flush_interrupt(void); +bool xive_spapr_init(void); +bool xive_native_init(void); +void xive_smp_probe(void); +int xive_smp_prepare_cpu(unsigned int cpu); +void xive_smp_setup_cpu(void); +void xive_smp_disable_cpu(void); +void xive_teardown_cpu(void); +void xive_shutdown(void); +void xive_flush_interrupt(void); /* xmon hook */ -extern void xmon_xive_do_dump(int cpu); -extern int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d); +void xmon_xive_do_dump(int cpu); +int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d); /* APIs used by KVM */ -extern u32 xive_native_default_eq_shift(void); -extern u32 xive_native_alloc_vp_block(u32 max_vcpus); -extern void xive_native_free_vp_block(u32 vp_base); -extern int xive_native_populate_irq_data(u32 hw_irq, - struct xive_irq_data *data); -extern void xive_cleanup_irq_data(struct xive_irq_data *xd); -extern u32 xive_native_alloc_irq(void); -extern void xive_native_free_irq(u32 irq); -extern int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq); +u32 xive_native_default_eq_shift(void); +u32 xive_native_alloc_vp_block(u32 max_vcpus); +void xive_native_free_vp_block(u32 vp_base); +int xive_native_populate_irq_data(u32 hw_irq, + struct xive_irq_data *data); +void xive_cleanup_irq_data(struct xive_irq_data *xd); +u32 xive_native_alloc_irq(void); +void xive_native_free_irq(u32 irq); +int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq); -extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, - __be32 *qpage, u32 order, bool can_escalate); -extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio); +int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, + __be32 *qpage, u32 order, bool can_escalate); +void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio); -extern void xive_native_sync_source(u32 hw_irq); -extern void xive_native_sync_queue(u32 hw_irq); -extern bool is_xive_irq(struct irq_chip *chip); -extern int xive_native_enable_vp(u32 vp_id, bool single_escalation); -extern int xive_native_disable_vp(u32 vp_id); -extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); -extern bool xive_native_has_single_escalation(void); +void xive_native_sync_source(u32 hw_irq); +void xive_native_sync_queue(u32 hw_irq); +bool is_xive_irq(struct irq_chip *chip); +int xive_native_enable_vp(u32 vp_id, bool single_escalation); +int xive_native_disable_vp(u32 vp_id); +int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); +bool xive_native_has_single_escalation(void); -extern int xive_native_get_queue_info(u32 vp_id, uint32_t prio, - u64 *out_qpage, - u64 *out_qsize, - u64 *out_qeoi_page, - u32 *out_escalate_irq, - u64 *out_qflags); +int xive_native_get_queue_info(u32 vp_id, uint32_t prio, + u64 *out_qpage, + u64 *out_qsize, + u64 *out_qeoi_page, + u32 *out_escalate_irq, + u64 *out_qflags); -extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle, - u32 *qindex); -extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle, - u32 qindex); -extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state); -extern bool xive_native_has_queue_state_support(void); +int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle, + u32 *qindex); +int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle, + u32 qindex); +int xive_native_get_vp_state(u32 vp_id, u64 *out_state); +bool xive_native_has_queue_state_support(void); #else diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 157b0147921f..78a1b22d4fd8 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -62,8 +62,7 @@ obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o -obj-$(CONFIG_PPC_970_NAP) += idle_power4.o -obj-$(CONFIG_PPC_P7_NAP) += idle_book3s.o +obj-$(CONFIG_PPC_BOOK3S_IDLE) += idle_book3s.o procfs-y := proc_powerpc.o obj-$(CONFIG_PROC_FS) += $(procfs-y) rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 3d47aec7becf..c25e562f1cd9 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -127,6 +127,12 @@ int main(void) OFFSET(KSP_VSID, thread_struct, ksp_vsid); #else /* CONFIG_PPC64 */ OFFSET(PGDIR, thread_struct, pgdir); +#ifdef CONFIG_VMAP_STACK + OFFSET(SRR0, thread_struct, srr0); + OFFSET(SRR1, thread_struct, srr1); + OFFSET(DAR, thread_struct, dar); + OFFSET(DSISR, thread_struct, dsisr); +#endif #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); OFFSET(THREAD_ACC, thread_struct, acc); @@ -389,11 +395,11 @@ int main(void) OFFSET(STAMP_XTIME_NSEC, vdso_data, stamp_xtime_nsec); OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction); OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res); +#ifdef CONFIG_PPC64 OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size); OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size); OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size); OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size); -#ifdef CONFIG_PPC64 OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64); OFFSET(TVAL64_TV_SEC, __kernel_old_timeval, tv_sec); OFFSET(TVAL64_TV_USEC, __kernel_old_timeval, tv_usec); @@ -413,7 +419,10 @@ int main(void) DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); + DEFINE(CLOCK_MAX, CLOCK_TAI); DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); + DEFINE(EINVAL, EINVAL); + DEFINE(KTIME_LOW_RES, KTIME_LOW_RES); #ifdef CONFIG_BUG DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 180b3a5d1001..182b4047c1ef 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -727,17 +727,20 @@ static __init void cpufeatures_cpu_quirks(void) /* * Not all quirks can be derived from the cpufeatures device tree. */ - if ((version & 0xffffefff) == 0x004e0200) - ; /* DD2.0 has no feature flag */ - else if ((version & 0xffffefff) == 0x004e0201) + if ((version & 0xffffefff) == 0x004e0200) { + /* DD2.0 has no feature flag */ + cur_cpu_spec->cpu_features |= CPU_FTR_P9_RADIX_PREFETCH_BUG; + } else if ((version & 0xffffefff) == 0x004e0201) { cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; - else if ((version & 0xffffefff) == 0x004e0202) { + cur_cpu_spec->cpu_features |= CPU_FTR_P9_RADIX_PREFETCH_BUG; + } else if ((version & 0xffffefff) == 0x004e0202) { cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST; cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG; cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; - } else if ((version & 0xffff0000) == 0x004e0000) + } else if ((version & 0xffff0000) == 0x004e0000) { /* DD2.1 and up have DD2_1 */ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; + } if ((version & 0xffff0000) == 0x004e0000) { cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index bc8a551013be..17cb3e9b5697 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -503,7 +503,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) rc = 1; if (pe->state & EEH_PE_ISOLATED) { pe->check_count++; - if (pe->check_count % EEH_MAX_FAILS == 0) { + if (pe->check_count == EEH_MAX_FAILS) { dn = pci_device_to_OF_node(dev); if (dn) location = of_get_property(dn, "ibm,loc-code", @@ -1191,7 +1191,6 @@ void eeh_add_device_late(struct pci_dev *dev) eeh_rmv_from_parent_pe(edev); eeh_addr_cache_rmv_dev(edev->pdev); eeh_sysfs_remove_device(edev->pdev); - edev->mode &= ~EEH_DEV_SYSFS; /* * We definitely should have the PCI device removed @@ -1295,6 +1294,23 @@ void eeh_remove_device(struct pci_dev *dev) */ edev->pdev = NULL; + /* + * eeh_sysfs_remove_device() uses pci_dev_to_eeh_dev() so we need to + * remove the sysfs files before clearing dev.archdata.edev + */ + if (edev->mode & EEH_DEV_SYSFS) + eeh_sysfs_remove_device(dev); + + /* + * We're removing from the PCI subsystem, that means + * the PCI device driver can't support EEH or not + * well. So we rely on hotplug completely to do recovery + * for the specific PCI device. + */ + edev->mode |= EEH_DEV_NO_HANDLER; + + eeh_addr_cache_rmv_dev(dev); + /* * The flag "in_error" is used to trace EEH devices for VFs * in error state or not. It's set in eeh_report_error(). If @@ -1307,18 +1323,6 @@ void eeh_remove_device(struct pci_dev *dev) eeh_rmv_from_parent_pe(edev); else edev->mode |= EEH_DEV_DISCONNECTED; - - /* - * We're removing from the PCI subsystem, that means - * the PCI device driver can't support EEH or not - * well. So we rely on hotplug completely to do recovery - * for the specific PCI device. - */ - edev->mode |= EEH_DEV_NO_HANDLER; - - eeh_addr_cache_rmv_dev(dev); - eeh_sysfs_remove_device(dev); - edev->mode &= ~EEH_DEV_SYSFS; } int eeh_unfreeze_pe(struct eeh_pe *pe) diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index cf11277ebd02..6b50bf15d8c1 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -159,18 +159,10 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo, static void __eeh_addr_cache_insert_dev(struct pci_dev *dev) { - struct pci_dn *pdn; struct eeh_dev *edev; int i; - pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); - if (!pdn) { - pr_warn("PCI: no pci dn found for dev=%s\n", - pci_name(dev)); - return; - } - - edev = pdn_to_eeh_dev(pdn); + edev = pci_dev_to_eeh_dev(dev); if (!edev) { pr_warn("PCI: no EEH dev found for %s\n", pci_name(dev)); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 3dd1a422fc29..a1eaffe868de 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -525,12 +525,6 @@ static void eeh_rmv_device(struct eeh_dev *edev, void *userdata) pci_iov_remove_virtfn(edev->physfn, pdn->vf_index); edev->pdev = NULL; - - /* - * We have to set the VF PE number to invalid one, which is - * required to plug the VF successfully. - */ - pdn->pe_number = IODA_INVALID_PE; #endif if (rmv_data) list_add(&edev->rmv_entry, &rmv_data->removed_vf_list); diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index ab44d965a53c..4fb0f1e1017a 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -14,7 +14,7 @@ /** * EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic * @_name: name of file in sysfs directory - * @_memb: name of member in struct pci_dn to access + * @_memb: name of member in struct eeh_dev to access * @_format: printf format for display * * All of the attributes look very similar, so just @@ -75,7 +75,7 @@ static ssize_t eeh_pe_state_store(struct device *dev, static DEVICE_ATTR_RW(eeh_pe_state); -#ifdef CONFIG_PCI_IOV +#if defined(CONFIG_PCI_IOV) && defined(CONFIG_PPC_PSERIES) static ssize_t eeh_notify_resume_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -86,7 +86,6 @@ static ssize_t eeh_notify_resume_show(struct device *dev, if (!edev || !edev->pe) return -ENODEV; - pdn = pci_get_pdn(pdev); return sprintf(buf, "%d\n", pdn->last_allow_rc); } @@ -132,7 +131,7 @@ static void eeh_notify_resume_remove(struct pci_dev *pdev) #else static inline int eeh_notify_resume_add(struct pci_dev *pdev) { return 0; } static inline void eeh_notify_resume_remove(struct pci_dev *pdev) { } -#endif /* CONFIG_PCI_IOV */ +#endif /* CONFIG_PCI_IOV && CONFIG PPC_PSERIES*/ void eeh_sysfs_add_device(struct pci_dev *pdev) { @@ -160,22 +159,23 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev) { struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + if (!edev) { + WARN_ON(eeh_enabled()); + return; + } + + edev->mode &= ~EEH_DEV_SYSFS; + /* * The parent directory might have been removed. We needn't * continue for that case. */ - if (!pdev->dev.kobj.sd) { - if (edev) - edev->mode &= ~EEH_DEV_SYSFS; + if (!pdev->dev.kobj.sd) return; - } device_remove_file(&pdev->dev, &dev_attr_eeh_mode); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state); eeh_notify_resume_remove(pdev); - - if (edev) - edev->mode &= ~EEH_DEV_SYSFS; } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e1a4c39b83b8..77abbc34bbe0 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -140,6 +140,7 @@ transfer_to_handler: stw r12,_CTR(r11) stw r2,_XER(r11) mfspr r12,SPRN_SPRG_THREAD + tovirt_vmstack r12, r12 beq 2f /* if from user, fix up THREAD.regs */ addi r2, r12, -THREAD addi r11,r1,STACK_FRAME_OVERHEAD @@ -179,11 +180,13 @@ transfer_to_handler: 2: /* if from kernel, check interrupted DOZE/NAP mode and * check for stack overflow */ - kuap_save_and_lock r11, r12, r9, r2, r0 + kuap_save_and_lock r11, r12, r9, r2, r6 addi r2, r12, -THREAD +#ifndef CONFIG_VMAP_STACK lwz r9,KSP_LIMIT(r12) cmplw r1,r9 /* if r1 <= ksp_limit */ ble- stack_ovf /* then the kernel stack overflowed */ +#endif 5: #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) lwz r12,TI_LOCAL_FLAGS(r2) @@ -195,7 +198,8 @@ transfer_to_handler: transfer_to_handler_cont: 3: mflr r9 - tovirt(r2, r2) /* set r2 to current */ + tovirt_novmstack r2, r2 /* set r2 to current */ + tovirt_vmstack r9, r9 lwz r11,0(r9) /* virtual address of handler */ lwz r9,4(r9) /* where to go when done */ #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) @@ -284,9 +288,11 @@ reenable_mmu: rlwinm r9,r9,0,~MSR_EE lwz r12,_LINK(r11) /* and return to address in LR */ kuap_restore r11, r2, r3, r4, r5 + lwz r2, GPR2(r11) b fast_exception_return #endif +#ifndef CONFIG_VMAP_STACK /* * On kernel stack overflow, load up an initial stack pointer * and call StackOverflow(regs), which should not return. @@ -312,6 +318,7 @@ stack_ovf: mtspr SPRN_SRR1,r10 SYNC RFI +#endif #ifdef CONFIG_TRACE_IRQFLAGS trace_syscall_entry_irq_off: @@ -397,7 +404,7 @@ ret_from_syscall: LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */ /* Note: We don't bother telling lockdep about it */ SYNC - MTMSRD(r10) + mtmsr r10 lwz r9,TI_FLAGS(r2) li r8,-MAX_ERRNO andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) @@ -554,7 +561,7 @@ syscall_exit_work: */ ori r10,r10,MSR_EE SYNC - MTMSRD(r10) + mtmsr r10 /* Save NVGPRS if they're not saved already */ lwz r4,_TRAP(r1) @@ -621,7 +628,6 @@ ppc_swapcontext: */ .globl handle_page_fault handle_page_fault: - stw r4,_DAR(r1) addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_BOOK3S_32 andis. r0,r5,DSISR_DABRMATCH@h @@ -697,7 +703,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) and. r0,r0,r11 /* FP or altivec or SPE enabled? */ beq+ 1f andc r11,r11,r0 - MTMSRD(r11) + mtmsr r11 isync 1: stw r11,_MSR(r1) mfcr r10 @@ -831,7 +837,7 @@ ret_from_except: /* Note: We don't bother telling lockdep about it */ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) SYNC /* Some chip revs have problems here... */ - MTMSRD(r10) /* disable interrupts */ + mtmsr r10 /* disable interrupts */ lwz r3,_MSR(r1) /* Returning to user mode? */ andi. r0,r3,MSR_PR @@ -998,7 +1004,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) */ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI) SYNC - MTMSRD(r10) /* clear the RI bit */ + mtmsr r10 /* clear the RI bit */ .globl exc_exit_restart exc_exit_restart: lwz r12,_NIP(r1) @@ -1234,7 +1240,7 @@ do_resched: /* r10 contains MSR_KERNEL here */ #endif ori r10,r10,MSR_EE SYNC - MTMSRD(r10) /* hard-enable interrupts */ + mtmsr r10 /* hard-enable interrupts */ bl schedule recheck: /* Note: And we don't tell it we are disabling them again @@ -1243,7 +1249,7 @@ recheck: */ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) SYNC - MTMSRD(r10) /* disable interrupts */ + mtmsr r10 /* disable interrupts */ lwz r9,TI_FLAGS(r2) andi. r0,r9,_TIF_NEED_RESCHED bne- do_resched @@ -1252,7 +1258,7 @@ recheck: do_user_signal: /* r10 contains MSR_KERNEL here */ ori r10,r10,MSR_EE SYNC - MTMSRD(r10) /* hard-enable interrupts */ + mtmsr r10 /* hard-enable interrupts */ /* save r13-r31 in the exception frame, if not already done */ lwz r3,_TRAP(r1) andi. r0,r3,1 @@ -1334,14 +1340,14 @@ _GLOBAL(enter_rtas) lis r6,1f@ha /* physical return address for rtas */ addi r6,r6,1f@l tophys(r6,r6) - tophys(r7,r1) + tophys_novmstack r7, r1 lwz r8,RTASENTRY(r4) lwz r4,RTASBASE(r4) mfmsr r9 stw r9,8(r1) LOAD_REG_IMMEDIATE(r0,MSR_KERNEL) SYNC /* disable interrupts so SRR0/1 */ - MTMSRD(r0) /* don't get trashed */ + mtmsr r0 /* don't get trashed */ li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR) mtlr r6 stw r7, THREAD + RTAS_SP(r2) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index a9a1d3cdb523..6ba675b0cf7d 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -597,8 +597,7 @@ _GLOBAL(_switch) std r0,16(r1) stdu r1,-SWITCH_FRAME_SIZE(r1) /* r3-r13 are caller saved -- Cort */ - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) + SAVE_NVGPRS(r1) std r0,_NIP(r1) /* Return to switch caller */ mfcr r23 std r23,_CCR(r1) @@ -722,8 +721,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtcrf 0xFF,r6 /* r3-r13 are destroyed -- Cort */ - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) + REST_NVGPRS(r1) /* convert old thread to its task_struct for return value */ addi r3,r3,-THREAD @@ -1155,8 +1153,7 @@ _GLOBAL(enter_rtas) */ SAVE_GPR(2, r1) /* Save the TOC */ SAVE_GPR(13, r1) /* Save paca */ - SAVE_8GPRS(14, r1) /* Save the non-volatiles */ - SAVE_10GPRS(22, r1) /* ditto */ + SAVE_NVGPRS(r1) /* Save the non-volatiles */ mfcr r4 std r4,_CCR(r1) @@ -1263,8 +1260,7 @@ rtas_restore_regs: /* relocation is on at this point */ REST_GPR(2, r1) /* Restore the TOC */ REST_GPR(13, r1) /* Restore paca */ - REST_8GPRS(14, r1) /* Restore the non-volatiles */ - REST_10GPRS(22, r1) /* ditto */ + REST_NVGPRS(r1) /* Restore the non-volatiles */ GET_PACA(r13) @@ -1298,8 +1294,7 @@ _GLOBAL(enter_prom) */ SAVE_GPR(2, r1) SAVE_GPR(13, r1) - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) + SAVE_NVGPRS(r1) mfcr r10 mfmsr r11 std r10,_CCR(r1) @@ -1343,8 +1338,7 @@ _GLOBAL(enter_prom) /* Restore other registers */ REST_GPR(2, r1) REST_GPR(13, r1) - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) + REST_NVGPRS(r1) ld r4,_CCR(r1) mtcr r4 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 46508b148e16..ffc15f4f079d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1408,22 +1408,9 @@ EXC_VIRT_NONE(0x4b00, 0x100) * * Call convention: * - * syscall register convention is in Documentation/powerpc/syscall64-abi.rst - * - * For hypercalls, the register convention is as follows: - * r0 volatile - * r1-2 nonvolatile - * r3 volatile parameter and return value for status - * r4-r10 volatile input and output value - * r11 volatile hypercall number and output value - * r12 volatile input and output value - * r13-r31 nonvolatile - * LR nonvolatile - * CTR volatile - * XER volatile - * CR0-1 CR5-7 volatile - * CR2-4 nonvolatile - * Other registers nonvolatile + * syscall and hypercalls register conventions are documented in + * Documentation/powerpc/syscall64-abi.rst and + * Documentation/powerpc/papr_hcalls.rst respectively. * * The intersection of volatile registers that don't contain possible * inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry @@ -2208,11 +2195,20 @@ __end_interrupts: DEFINE_FIXED_SYMBOL(__end_interrupts) #ifdef CONFIG_PPC_970_NAP + /* + * Called by exception entry code if _TLF_NAPPING was set, this clears + * the NAPPING flag, and redirects the exception exit to + * power4_fixup_nap_return. + */ + .globl power4_fixup_nap EXC_COMMON_BEGIN(power4_fixup_nap) andc r9,r9,r10 std r9,TI_LOCAL_FLAGS(r11) - ld r10,_LINK(r1) /* make idle task do the */ - std r10,_NIP(r1) /* equivalent of a blr */ + LOAD_REG_ADDR(r10, power4_idle_nap_return) + std r10,_NIP(r1) + blr + +power4_idle_nap_return: blr #endif diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 0bb991ddd264..3235a8da6af7 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -94,6 +94,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) /* enable use of FP after return */ #ifdef CONFIG_PPC32 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ +#ifdef CONFIG_VMAP_STACK + tovirt(r5, r5) +#endif lwz r4,THREAD_FPEXC_MODE(r5) ori r9,r9,MSR_FP /* enable FP for current */ or r9,r9,r4 diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 4a24f8f026c7..0493fcac6409 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -272,16 +272,21 @@ __secondary_hold_acknowledge: */ . = 0x200 DO_KVM 0x200 - mtspr SPRN_SPRG_SCRATCH0,r10 - mtspr SPRN_SPRG_SCRATCH1,r11 - mfcr r10 +MachineCheck: + EXCEPTION_PROLOG_0 +#ifdef CONFIG_VMAP_STACK + li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ + mtmsr r11 + isync +#endif #ifdef CONFIG_PPC_CHRP mfspr r11, SPRN_SPRG_THREAD + tovirt_vmstack r11, r11 lwz r11, RTAS_SP(r11) cmpwi cr1, r11, 0 bne cr1, 7f #endif /* CONFIG_PPC_CHRP */ - EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_1 for_rtas=1 7: EXCEPTION_PROLOG_2 addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_CHRP @@ -296,24 +301,21 @@ __secondary_hold_acknowledge: . = 0x300 DO_KVM 0x300 DataAccess: - EXCEPTION_PROLOG - mfspr r10,SPRN_DSISR - stw r10,_DSISR(r11) -#ifdef CONFIG_PPC_KUAP - andis. r0,r10,(DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h -#else - andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h -#endif - bne 1f /* if not, try to put a PTE */ - mfspr r4,SPRN_DAR /* into the hash table */ - rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */ + EXCEPTION_PROLOG handle_dar_dsisr=1 + get_and_save_dar_dsisr_on_stack r4, r5, r11 BEGIN_MMU_FTR_SECTION +#ifdef CONFIG_PPC_KUAP + andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h +#else + andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h +#endif + bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ + rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ bl hash_page -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) -1: lwz r5,_DSISR(r11) /* get DSISR value */ - mfspr r4,SPRN_DAR - EXC_XFER_LITE(0x300, handle_page_fault) - + b handle_page_fault_tramp_1 +FTR_SECTION_ELSE + b handle_page_fault_tramp_2 +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) /* Instruction access exception. */ . = 0x400 @@ -329,6 +331,7 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) 1: mr r4,r12 andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ + stw r4, _DAR(r11) EXC_XFER_LITE(0x400, handle_page_fault) /* External interrupt */ @@ -338,11 +341,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) . = 0x600 DO_KVM 0x600 Alignment: - EXCEPTION_PROLOG - mfspr r4,SPRN_DAR - stw r4,_DAR(r11) - mfspr r5,SPRN_DSISR - stw r5,_DSISR(r11) + EXCEPTION_PROLOG handle_dar_dsisr=1 + save_dar_dsisr_on_stack r4, r5, r11 addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x600, alignment_exception) @@ -645,6 +645,16 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) . = 0x3000 +handle_page_fault_tramp_1: + lwz r4, _DAR(r11) + lwz r5, _DSISR(r11) + /* fall through */ +handle_page_fault_tramp_2: + EXC_XFER_LITE(0x300, handle_page_fault) + +stack_overflow: + vmap_stack_overflow_exception + AltiVecUnavailable: EXCEPTION_PROLOG #ifdef CONFIG_ALTIVEC @@ -917,6 +927,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) ori r4,r4,2f@l tophys(r4,r4) li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR) + + .align 4 mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r3 SYNC @@ -1058,6 +1070,8 @@ _ENTRY(update_bats) rlwinm r0, r6, 0, ~MSR_RI rlwinm r0, r0, 0, ~MSR_EE mtmsr r0 + + .align 4 mtspr SPRN_SRR0, r4 mtspr SPRN_SRR1, r3 SYNC @@ -1097,6 +1111,8 @@ mmu_off: andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */ beqlr andc r3,r3,r0 + + .align 4 mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r3 sync diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 8abc7783dbe5..a6a5fbbf8504 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -10,28 +10,60 @@ * We assume sprg3 has the physical address of the current * task's thread_struct. */ +.macro EXCEPTION_PROLOG handle_dar_dsisr=0 + EXCEPTION_PROLOG_0 handle_dar_dsisr=\handle_dar_dsisr + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 handle_dar_dsisr=\handle_dar_dsisr +.endm -.macro EXCEPTION_PROLOG +.macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0 mtspr SPRN_SPRG_SCRATCH0,r10 mtspr SPRN_SPRG_SCRATCH1,r11 +#ifdef CONFIG_VMAP_STACK + mfspr r10, SPRN_SPRG_THREAD + .if \handle_dar_dsisr + mfspr r11, SPRN_DAR + stw r11, DAR(r10) + mfspr r11, SPRN_DSISR + stw r11, DSISR(r10) + .endif + mfspr r11, SPRN_SRR0 + stw r11, SRR0(r10) +#endif + mfspr r11, SPRN_SRR1 /* check whether user or kernel */ +#ifdef CONFIG_VMAP_STACK + stw r11, SRR1(r10) +#endif mfcr r10 - EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 + andi. r11, r11, MSR_PR .endm -.macro EXCEPTION_PROLOG_1 - mfspr r11,SPRN_SRR1 /* check whether user or kernel */ - andi. r11,r11,MSR_PR +.macro EXCEPTION_PROLOG_1 for_rtas=0 +#ifdef CONFIG_VMAP_STACK + .ifeq \for_rtas + li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ + mtmsr r11 + isync + .endif + subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ +#else tophys(r11,r1) /* use tophys(r1) if kernel */ + subi r11, r11, INT_FRAME_SIZE /* alloc exc. frame */ +#endif beq 1f mfspr r11,SPRN_SPRG_THREAD + tovirt_vmstack r11, r11 lwz r11,TASK_STACK-THREAD(r11) - addi r11,r11,THREAD_SIZE - tophys(r11,r11) -1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ + addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE + tophys_novmstack r11, r11 +1: +#ifdef CONFIG_VMAP_STACK + mtcrf 0x7f, r11 + bt 32 - THREAD_ALIGN_SHIFT, stack_overflow +#endif .endm -.macro EXCEPTION_PROLOG_2 +.macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 stw r10,_CCR(r11) /* save registers */ stw r12,GPR12(r11) stw r9,GPR9(r11) @@ -41,16 +73,33 @@ stw r12,GPR11(r11) mflr r10 stw r10,_LINK(r11) +#ifdef CONFIG_VMAP_STACK + mfspr r12, SPRN_SPRG_THREAD + tovirt(r12, r12) + .if \handle_dar_dsisr + lwz r10, DAR(r12) + stw r10, _DAR(r11) + lwz r10, DSISR(r12) + stw r10, _DSISR(r11) + .endif + lwz r9, SRR1(r12) + lwz r12, SRR0(r12) +#else mfspr r12,SPRN_SRR0 mfspr r9,SPRN_SRR1 +#endif stw r1,GPR1(r11) stw r1,0(r11) - tovirt(r1,r11) /* set new kernel sp */ + tovirt_novmstack r1, r11 /* set new kernel sp */ #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#else +#ifdef CONFIG_VMAP_STACK + li r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */ #else li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */ - MTMSRD(r10) /* (except for mach check in rtas) */ +#endif + mtmsr r10 /* (except for mach check in rtas) */ #endif stw r0,GPR0(r11) lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ @@ -62,25 +111,46 @@ .macro SYSCALL_ENTRY trapno mfspr r12,SPRN_SPRG_THREAD +#ifdef CONFIG_VMAP_STACK + mfspr r9, SPRN_SRR0 + mfspr r11, SPRN_SRR1 + stw r9, SRR0(r12) + stw r11, SRR1(r12) +#endif mfcr r10 lwz r11,TASK_STACK-THREAD(r12) - mflr r9 - addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ - tophys(r11,r11) + addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE +#ifdef CONFIG_VMAP_STACK + li r9, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ + mtmsr r9 + isync +#endif + tovirt_vmstack r12, r12 + tophys_novmstack r11, r11 + mflr r9 stw r10,_CCR(r11) /* save registers */ + stw r9, _LINK(r11) +#ifdef CONFIG_VMAP_STACK + lwz r10, SRR0(r12) + lwz r9, SRR1(r12) +#else mfspr r10,SPRN_SRR0 - stw r9,_LINK(r11) mfspr r9,SPRN_SRR1 +#endif stw r1,GPR1(r11) stw r1,0(r11) - tovirt(r1,r11) /* set new kernel sp */ + tovirt_novmstack r1, r11 /* set new kernel sp */ stw r10,_NIP(r11) #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#else +#ifdef CONFIG_VMAP_STACK + LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~MSR_IR) /* can take exceptions */ #else LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */ - MTMSRD(r10) /* (except for mach check in rtas) */ +#endif + mtmsr r10 /* (except for mach check in rtas) */ #endif lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ stw r2,GPR2(r11) @@ -118,7 +188,7 @@ #endif 3: - tovirt(r2, r2) /* set r2 to current */ + tovirt_novmstack r2, r2 /* set r2 to current */ lis r11, transfer_to_syscall@h ori r11, r11, transfer_to_syscall@l #ifdef CONFIG_TRACE_IRQFLAGS @@ -141,6 +211,54 @@ RFI /* jump to handler, enable MMU */ .endm +.macro save_dar_dsisr_on_stack reg1, reg2, sp +#ifndef CONFIG_VMAP_STACK + mfspr \reg1, SPRN_DAR + mfspr \reg2, SPRN_DSISR + stw \reg1, _DAR(\sp) + stw \reg2, _DSISR(\sp) +#endif +.endm + +.macro get_and_save_dar_dsisr_on_stack reg1, reg2, sp +#ifdef CONFIG_VMAP_STACK + lwz \reg1, _DAR(\sp) + lwz \reg2, _DSISR(\sp) +#else + save_dar_dsisr_on_stack \reg1, \reg2, \sp +#endif +.endm + +.macro tovirt_vmstack dst, src +#ifdef CONFIG_VMAP_STACK + tovirt(\dst, \src) +#else + .ifnc \dst, \src + mr \dst, \src + .endif +#endif +.endm + +.macro tovirt_novmstack dst, src +#ifndef CONFIG_VMAP_STACK + tovirt(\dst, \src) +#else + .ifnc \dst, \src + mr \dst, \src + .endif +#endif +.endm + +.macro tophys_novmstack dst, src +#ifndef CONFIG_VMAP_STACK + tophys(\dst, \src) +#else + .ifnc \dst, \src + mr \dst, \src + .endif +#endif +.endm + /* * Note: code which follows this uses cr0.eq (set if from kernel), * r11, r12 (SRR0), and r9 (SRR1). @@ -187,4 +305,28 @@ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ ret_from_except) +.macro vmap_stack_overflow_exception +#ifdef CONFIG_VMAP_STACK +#ifdef CONFIG_SMP + mfspr r11, SPRN_SPRG_THREAD + tovirt(r11, r11) + lwz r11, TASK_CPU - THREAD(r11) + slwi r11, r11, 3 + addis r11, r11, emergency_ctx@ha +#else + lis r11, emergency_ctx@ha +#endif + lwz r11, emergency_ctx@l(r11) + cmpwi cr1, r11, 0 + bne cr1, 1f + lis r11, init_thread_union@ha + addi r11, r11, init_thread_union@l +1: addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE + EXCEPTION_PROLOG_2 + SAVE_NVGPRS(r11) + addi r3, r1, STACK_FRAME_OVERHEAD + EXC_XFER_STD(0, stack_overflow_exception) +#endif +.endm + #endif /* __HEAD_32_H__ */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 585ea1976550..9bb663977e84 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -313,6 +313,7 @@ _ENTRY(saved_ksp_limit) START_EXCEPTION(0x0400, InstructionAccess) EXCEPTION_PROLOG mr r4,r12 /* Pass SRR0 as arg2 */ + stw r4, _DEAR(r11) li r5,0 /* Pass zero as arg3 */ EXC_XFER_LITE(0x400, handle_page_fault) @@ -676,6 +677,7 @@ DataAccess: mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ stw r5,_ESR(r11) mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + stw r4, _DEAR(r11) EXC_XFER_LITE(0x300, handle_page_fault) /* Other PowerPC processors, namely those derived from the 6xx-series diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 19f583e18402..9922306ae512 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -127,56 +127,36 @@ instruction_counter: /* Machine check */ . = 0x200 MachineCheck: - EXCEPTION_PROLOG - mfspr r4,SPRN_DAR - stw r4,_DAR(r11) - li r5,RPN_PATTERN - mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ - mfspr r5,SPRN_DSISR - stw r5,_DSISR(r11) + EXCEPTION_PROLOG handle_dar_dsisr=1 + save_dar_dsisr_on_stack r4, r5, r11 + li r6, RPN_PATTERN + mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */ addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x200, machine_check_exception) -/* Data access exception. - * This is "never generated" by the MPC8xx. - */ - . = 0x300 -DataAccess: - -/* Instruction access exception. - * This is "never generated" by the MPC8xx. - */ - . = 0x400 -InstructionAccess: - /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) /* Alignment exception */ . = 0x600 Alignment: - EXCEPTION_PROLOG - mfspr r4,SPRN_DAR - stw r4,_DAR(r11) - li r5,RPN_PATTERN - mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ - mfspr r5,SPRN_DSISR - stw r5,_DSISR(r11) + EXCEPTION_PROLOG handle_dar_dsisr=1 + save_dar_dsisr_on_stack r4, r5, r11 + li r6, RPN_PATTERN + mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */ addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x600, alignment_exception) + b .Lalignment_exception_ool /* Program check exception */ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) -/* No FPU on MPC8xx. This exception is not supposed to happen. -*/ - EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD) - /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD) - EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD) + /* With VMAP_STACK there's not enough room for this at 0x600 */ + . = 0xa00 +.Lalignment_exception_ool: + EXC_XFER_STD(0x600, alignment_exception) /* System call */ . = 0xc00 @@ -185,25 +165,12 @@ SystemCall: /* Single step - not used on 601 */ EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) - EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_STD) /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. */ EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD) -/* Called from DataStoreTLBMiss when perf TLB misses events are activated */ -#ifdef CONFIG_PERF_EVENTS - patch_site 0f, patch__dtlbmiss_perf -0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - addi r10, r10, 1 - stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 - rfi -#endif - . = 0x1100 /* * For the MPC8xx, this is a software tablewalk to load the instruction @@ -343,8 +310,8 @@ ITLBMissLinear: . = 0x1200 DataStoreTLBMiss: - mtspr SPRN_SPRG_SCRATCH0, r10 - mtspr SPRN_SPRG_SCRATCH1, r11 + mtspr SPRN_DAR, r10 + mtspr SPRN_M_TW, r11 mfcr r11 /* If we are faulting a kernel address, we have to use the @@ -409,10 +376,10 @@ DataStoreTLBMiss: mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ /* Restore registers */ - mtspr SPRN_DAR, r11 /* Tag DAR */ -0: mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 +0: mfspr r10, SPRN_DAR + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_M_TW rfi patch_site 0b, patch__dtlbmiss_exit_1 @@ -428,10 +395,10 @@ DTLBMissIMMR: mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ li r11, RPN_PATTERN - mtspr SPRN_DAR, r11 /* Tag DAR */ -0: mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 +0: mfspr r10, SPRN_DAR + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_M_TW rfi patch_site 0b, patch__dtlbmiss_exit_2 @@ -465,10 +432,10 @@ DTLBMissLinear: mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ li r11, RPN_PATTERN - mtspr SPRN_DAR, r11 /* Tag DAR */ -0: mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 +0: mfspr r10, SPRN_DAR + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_M_TW rfi patch_site 0b, patch__dtlbmiss_exit_3 @@ -486,6 +453,7 @@ InstructionTLBError: tlbie r4 /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ .Litlbie: + stw r4, _DAR(r11) EXC_XFER_LITE(0x400, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to @@ -494,58 +462,69 @@ InstructionTLBError: */ . = 0x1400 DataTLBError: - mtspr SPRN_SPRG_SCRATCH0, r10 - mtspr SPRN_SPRG_SCRATCH1, r11 - mfcr r10 - + EXCEPTION_PROLOG_0 handle_dar_dsisr=1 mfspr r11, SPRN_DAR - cmpwi cr0, r11, RPN_PATTERN - beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ + cmpwi cr1, r11, RPN_PATTERN + beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ +#ifdef CONFIG_VMAP_STACK + li r11, RPN_PATTERN + mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */ +#endif EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 - mfspr r5,SPRN_DSISR - stw r5,_DSISR(r11) - mfspr r4,SPRN_DAR + EXCEPTION_PROLOG_2 handle_dar_dsisr=1 + get_and_save_dar_dsisr_on_stack r4, r5, r11 andis. r10,r5,DSISR_NOHPTE@h beq+ .Ldtlbie tlbie r4 .Ldtlbie: +#ifndef CONFIG_VMAP_STACK li r10,RPN_PATTERN mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ +#endif /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD) +/* Called from DataStoreTLBMiss when perf TLB misses events are activated */ +#ifdef CONFIG_PERF_EVENTS + patch_site 0f, patch__dtlbmiss_perf +0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) + addi r10, r10, 1 + stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) + mfspr r10, SPRN_DAR + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_M_TW + rfi +#endif + +stack_overflow: + vmap_stack_overflow_exception /* On the MPC8xx, these next four traps are used for development * support of breakpoints and such. Someday I will get around to * using them. */ - . = 0x1c00 -DataBreakpoint: - mtspr SPRN_SPRG_SCRATCH0, r10 - mtspr SPRN_SPRG_SCRATCH1, r11 - mfcr r10 - mfspr r11, SPRN_SRR0 - cmplwi cr0, r11, (.Ldtlbie - PAGE_OFFSET)@l - cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l - beq- cr0, 11f - beq- cr7, 11f +do_databreakpoint: EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 + EXCEPTION_PROLOG_2 handle_dar_dsisr=1 addi r3,r1,STACK_FRAME_OVERHEAD mfspr r4,SPRN_BAR stw r4,_DAR(r11) +#ifdef CONFIG_VMAP_STACK + lwz r5,_DSISR(r11) +#else mfspr r5,SPRN_DSISR +#endif EXC_XFER_STD(0x1c00, do_break) -11: + + . = 0x1c00 +DataBreakpoint: + EXCEPTION_PROLOG_0 handle_dar_dsisr=1 + mfspr r11, SPRN_SRR0 + cmplwi cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l + cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l + cror 4*cr1+eq, 4*cr1+eq, 4*cr7+eq + bne cr1, do_databreakpoint mtcr r10 mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 @@ -581,9 +560,9 @@ FixupDAR:/* Entry point for dcbx workaround. */ mfspr r10, SPRN_SRR0 mtspr SPRN_MD_EPN, r10 rlwinm r11, r10, 16, 0xfff8 - cmpli cr0, r11, PAGE_OFFSET@h + cmpli cr1, r11, PAGE_OFFSET@h mfspr r11, SPRN_M_TWB /* Get level 1 table */ - blt+ 3f + blt+ cr1, 3f rlwinm r11, r10, 16, 0xfff8 0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h @@ -598,7 +577,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ 3: lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ mtspr SPRN_MD_TWC, r11 - mtcr r11 + mtcrf 0x01, r11 mfspr r11, SPRN_MD_TWC lwz r11, 0(r11) /* Get the pte */ bt 28,200f /* bit 28 = Large page (8M) */ @@ -611,16 +590,16 @@ FixupDAR:/* Entry point for dcbx workaround. */ * no need to include them here */ xoris r10, r11, 0x7c00 /* check if major OP code is 31 */ rlwinm r10, r10, 0, 21, 5 - cmpwi cr0, r10, 2028 /* Is dcbz? */ - beq+ 142f - cmpwi cr0, r10, 940 /* Is dcbi? */ - beq+ 142f - cmpwi cr0, r10, 108 /* Is dcbst? */ - beq+ 144f /* Fix up store bit! */ - cmpwi cr0, r10, 172 /* Is dcbf? */ - beq+ 142f - cmpwi cr0, r10, 1964 /* Is icbi? */ - beq+ 142f + cmpwi cr1, r10, 2028 /* Is dcbz? */ + beq+ cr1, 142f + cmpwi cr1, r10, 940 /* Is dcbi? */ + beq+ cr1, 142f + cmpwi cr1, r10, 108 /* Is dcbst? */ + beq+ cr1, 144f /* Fix up store bit! */ + cmpwi cr1, r10, 172 /* Is dcbf? */ + beq+ cr1, 142f + cmpwi cr1, r10, 1964 /* Is icbi? */ + beq+ cr1, 142f 141: mfspr r10,SPRN_M_TW b DARFixed /* Nope, go back to normal TLB processing */ @@ -679,8 +658,9 @@ FixupDAR:/* Entry point for dcbx workaround. */ add r10, r10, r30 ;b 151f add r10, r10, r31 151: - rlwinm. r11,r11,19,24,28 /* offset into jump table for reg RA */ - beq 152f /* if reg RA is zero, don't add it */ + rlwinm r11,r11,19,24,28 /* offset into jump table for reg RA */ + cmpwi cr1, r11, 0 + beq cr1, 152f /* if reg RA is zero, don't add it */ addi r11, r11, 150b@l /* add start of table */ mtctr r11 /* load ctr with jump address */ rlwinm r11,r11,0,16,10 /* make sure we don't execute this more than once */ @@ -688,7 +668,14 @@ FixupDAR:/* Entry point for dcbx workaround. */ 152: mfdar r11 mtctr r11 /* restore ctr reg from DAR */ +#ifdef CONFIG_VMAP_STACK + mfspr r11, SPRN_SPRG_THREAD + stw r10, DAR(r11) + mfspr r10, SPRN_DSISR + stw r10, DSISR(r11) +#else mtdar r10 /* save fault EA to DAR */ +#endif mfspr r10,SPRN_M_TW b DARFixed /* Go back to normal TLB handling */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 2ae635df9026..37fc84ed90e3 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -467,6 +467,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ + stw r4, _DEAR(r11); \ EXC_XFER_LITE(0x0300, handle_page_fault) #define INSTRUCTION_STORAGE_EXCEPTION \ @@ -475,6 +476,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ mr r4,r12; /* Pass SRR0 as arg2 */ \ + stw r4, _DEAR(r11); \ li r5,0; /* Pass zero as arg3 */ \ EXC_XFER_LITE(0x0400, handle_page_fault) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 6f7a3a7162c5..840af004041e 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -378,6 +378,7 @@ interrupt_base: mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ andis. r10,r5,(ESR_ILK|ESR_DLK)@h bne 1f + stw r4, _DEAR(r11) EXC_XFER_LITE(0x0300, handle_page_fault) 1: addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 58ce3d37c2a3..2462cd7c565c 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -160,6 +160,9 @@ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw) /* DAWR region can't cross 512 bytes boundary */ if ((start_addr >> 9) != (end_addr >> 9)) return -EINVAL; + } else if (IS_ENABLED(CONFIG_PPC_8xx)) { + /* 8xx can setup a range without limitation */ + max_len = U16_MAX; } if (hw_len > max_len) @@ -328,13 +331,11 @@ int hw_breakpoint_handler(struct die_args *args) } info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (IS_ENABLED(CONFIG_PPC_8xx)) { - if (!dar_within_range(regs->dar, info)) - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; - } else { - if (!stepping_handler(regs, bp, info)) - goto out; - } + if (!dar_within_range(regs->dar, info)) + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + + if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info)) + goto out; /* * As a policy, the callback is invoked in a 'trigger-after-execute' diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index a36fd053c3db..422e31d2f5a2 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -77,6 +77,31 @@ void arch_cpu_idle(void) int powersave_nap; +#ifdef CONFIG_PPC_970_NAP +void power4_idle(void) +{ + if (!cpu_has_feature(CPU_FTR_CAN_NAP)) + return; + + if (!powersave_nap) + return; + + if (!prep_irq_for_idle()) + return; + + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + asm volatile("DSSALL ; sync" ::: "memory"); + + power4_idle_nap(); + + /* + * power4_idle_nap returns with interrupts enabled (soft and hard). + * to our caller with interrupts enabled (soft and hard). Our caller + * can cope with either interrupts disabled or enabled upon return. + */ +} +#endif + #ifdef CONFIG_SYSCTL /* * Register the sysctl to set/clear powersave_nap. diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index d32751994a62..22f249b6f58d 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -15,7 +15,9 @@ #include #include #include +#include /* TLF_NAPPING */ +#ifdef CONFIG_PPC_P7_NAP /* * Desired PSSCR in r3 * @@ -181,4 +183,22 @@ _GLOBAL(isa206_idle_insn_mayloss) bne 2f IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) 2: IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) +#endif +#ifdef CONFIG_PPC_970_NAP +_GLOBAL(power4_idle_nap) + LOAD_REG_IMMEDIATE(r7, MSR_KERNEL|MSR_EE|MSR_POW) + ld r9,PACA_THREAD_INFO(r13) + ld r8,TI_LOCAL_FLAGS(r9) + ori r8,r8,_TLF_NAPPING + std r8,TI_LOCAL_FLAGS(r9) + /* + * NAPPING bit is set, from this point onward power4_fixup_nap + * will cause exceptions to return to power4_idle_nap_return. + */ +1: sync + isync + mtmsrd r7 + isync + b 1b +#endif diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S deleted file mode 100644 index 33c625329078..000000000000 --- a/arch/powerpc/kernel/idle_power4.S +++ /dev/null @@ -1,83 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * This file contains the power_save function for 970-family CPUs. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#undef DEBUG - - .text - -_GLOBAL(power4_idle) -BEGIN_FTR_SECTION - blr -END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) - /* Now check if user or arch enabled NAP mode */ - LOAD_REG_ADDRBASE(r3,powersave_nap) - lwz r4,ADDROFF(powersave_nap)(r3) - cmpwi 0,r4,0 - beqlr - - /* This sequence is similar to prep_irq_for_idle() */ - - /* Hard disable interrupts */ - mfmsr r7 - rldicl r0,r7,48,1 - rotldi r0,r0,16 - mtmsrd r0,1 - - /* Check if something happened while soft-disabled */ - lbz r0,PACAIRQHAPPENED(r13) - cmpwi cr0,r0,0 - bne- 2f - - /* - * Soft-enable interrupts. This will make power4_fixup_nap return - * to our caller with interrupts enabled (soft and hard). The caller - * can cope with either interrupts disabled or enabled upon return. - */ -#ifdef CONFIG_TRACE_IRQFLAGS - /* Tell the tracer interrupts are on, because idle responds to them. */ - mflr r0 - std r0,16(r1) - stdu r1,-128(r1) - bl trace_hardirqs_on - addi r1,r1,128 - ld r0,16(r1) - mtlr r0 - mfmsr r7 -#endif /* CONFIG_TRACE_IRQFLAGS */ - - li r0,IRQS_ENABLED - stb r0,PACAIRQSOFTMASK(r13) /* we'll hard-enable shortly */ -BEGIN_FTR_SECTION - DSSALL - sync -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - ld r9, PACA_THREAD_INFO(r13) - ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */ - ori r8,r8,_TLF_NAPPING /* so when we take an exception */ - std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */ - ori r7,r7,MSR_EE - oris r7,r7,MSR_POW@h -1: sync - isync - mtmsrd r7 - isync - b 1b - -2: /* Return if an interrupt had happened while soft disabled */ - /* Set the HARD_DIS flag because interrupts are now hard disabled */ - ori r0,r0,PACA_IRQ_HARD_DIS - stb r0,PACAIRQHAPPENED(r13) - blr diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index add67498c126..5c9b11878555 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -664,8 +665,29 @@ void do_IRQ(struct pt_regs *regs) set_irq_regs(old_regs); } +static void *__init alloc_vm_stack(void) +{ + return __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, VMALLOC_START, + VMALLOC_END, THREADINFO_GFP, PAGE_KERNEL, + 0, NUMA_NO_NODE, (void*)_RET_IP_); +} + +static void __init vmap_irqstack_init(void) +{ + int i; + + for_each_possible_cpu(i) { + softirq_ctx[i] = alloc_vm_stack(); + hardirq_ctx[i] = alloc_vm_stack(); + } +} + + void __init init_IRQ(void) { + if (IS_ENABLED(CONFIG_VMAP_STACK)) + vmap_irqstack_init(); + if (ppc_md.init_IRQ) ppc_md.init_IRQ(); } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 1c448cf25506..c6c03416a151 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -261,12 +261,6 @@ int pcibios_sriov_disable(struct pci_dev *pdev) #endif /* CONFIG_PCI_IOV */ -void pcibios_bus_add_device(struct pci_dev *pdev) -{ - if (ppc_md.pcibios_bus_add_device) - ppc_md.pcibios_bus_add_device(pdev); -} - static resource_size_t pcibios_io_size(const struct pci_controller *hose) { #ifdef CONFIG_PPC64 @@ -964,7 +958,7 @@ void pcibios_setup_bus_self(struct pci_bus *bus) phb->controller_ops.dma_bus_setup(bus); } -static void pcibios_setup_device(struct pci_dev *dev) +void pcibios_bus_add_device(struct pci_dev *dev) { struct pci_controller *phb; /* Fixup NUMA node as it may not be setup yet by the generic @@ -985,17 +979,13 @@ static void pcibios_setup_device(struct pci_dev *dev) pci_read_irq_line(dev); if (ppc_md.pci_irq_fixup) ppc_md.pci_irq_fixup(dev); + + if (ppc_md.pcibios_bus_add_device) + ppc_md.pcibios_bus_add_device(dev); } int pcibios_add_device(struct pci_dev *dev) { - /* - * We can only call pcibios_setup_device() after bus setup is complete, - * since some of the platform specific DMA setup code depends on it. - */ - if (dev->bus->is_added) - pcibios_setup_device(dev); - #ifdef CONFIG_PCI_IOV if (ppc_md.pcibios_fixup_sriov) ppc_md.pcibios_fixup_sriov(dev); @@ -1004,24 +994,6 @@ int pcibios_add_device(struct pci_dev *dev) return 0; } -void pcibios_setup_bus_devices(struct pci_bus *bus) -{ - struct pci_dev *dev; - - pr_debug("PCI: Fixup bus devices %d (%s)\n", - bus->number, bus->self ? pci_name(bus->self) : "PHB"); - - list_for_each_entry(dev, &bus->devices, bus_list) { - /* Cardbus can call us to add new devices to a bus, so ignore - * those who are already fully discovered - */ - if (pci_dev_is_added(dev)) - continue; - - pcibios_setup_device(dev); - } -} - void pcibios_set_master(struct pci_dev *dev) { /* No special bus mastering setup handling */ @@ -1037,19 +1009,9 @@ void pcibios_fixup_bus(struct pci_bus *bus) /* Now fixup the bus bus */ pcibios_setup_bus_self(bus); - - /* Now fixup devices on that bus */ - pcibios_setup_bus_devices(bus); } EXPORT_SYMBOL(pcibios_fixup_bus); -void pci_fixup_cardbus(struct pci_bus *bus) -{ - /* Now fixup devices on that bus */ - pcibios_setup_bus_devices(bus); -} - - static int skip_isa_ioresource_align(struct pci_dev *dev) { if (pci_has_flag(PCI_CAN_SKIP_ISA_ALIGN) && diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index fc62c4bc47b1..d6a67f814983 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -134,7 +134,6 @@ void pci_hp_add_devices(struct pci_bus *bus) */ slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); - pcibios_setup_bus_devices(bus); max = bus->busn_res.start; /* * Scan bridges that are already configured. We don't touch diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 9524009ca1ae..4e654df55969 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -125,7 +125,7 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev) } #ifdef CONFIG_PCI_IOV -static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, +static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent, int vf_index, int busno, int devfn) { @@ -151,17 +151,15 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, return pdn; } -#endif -struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) +struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev) { -#ifdef CONFIG_PCI_IOV struct pci_dn *parent, *pdn; int i; /* Only support IOV for now */ - if (!pdev->is_physfn) - return pci_get_pdn(pdev); + if (WARN_ON(!pdev->is_physfn)) + return NULL; /* Check if VFs have been populated */ pdn = pci_get_pdn(pdev); @@ -176,7 +174,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { struct eeh_dev *edev __maybe_unused; - pdn = add_one_dev_pci_data(parent, i, + pdn = add_one_sriov_vf_pdn(parent, i, pci_iov_virtfn_bus(pdev, i), pci_iov_virtfn_devfn(pdev, i)); if (!pdn) { @@ -192,31 +190,17 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) edev->physfn = pdev; #endif /* CONFIG_EEH */ } -#endif /* CONFIG_PCI_IOV */ - return pci_get_pdn(pdev); } -void remove_dev_pci_data(struct pci_dev *pdev) +void remove_sriov_vf_pdns(struct pci_dev *pdev) { -#ifdef CONFIG_PCI_IOV struct pci_dn *parent; struct pci_dn *pdn, *tmp; int i; - /* - * VF and VF PE are created/released dynamically, so we need to - * bind/unbind them. Otherwise the VF and VF PE would be mismatched - * when re-enabling SR-IOV. - */ - if (pdev->is_virtfn) { - pdn = pci_get_pdn(pdev); - pdn->pe_number = IODA_INVALID_PE; - return; - } - /* Only support IOV PF for now */ - if (!pdev->is_physfn) + if (WARN_ON(!pdev->is_physfn)) return; /* Check if VFs have been populated */ @@ -244,9 +228,22 @@ void remove_dev_pci_data(struct pci_dev *pdev) continue; #ifdef CONFIG_EEH - /* Release EEH device for the VF */ + /* + * Release EEH state for this VF. The PCI core + * has already torn down the pci_dev for this VF, but + * we're responsible to removing the eeh_dev since it + * has the same lifetime as the pci_dn that spawned it. + */ edev = pdn_to_eeh_dev(pdn); if (edev) { + /* + * We allocate pci_dn's for the totalvfs count, + * but only only the vfs that were activated + * have a configured PE. + */ + if (edev->pe) + eeh_rmv_from_parent_pe(edev); + pdn->edev = NULL; kfree(edev); } @@ -258,8 +255,8 @@ void remove_dev_pci_data(struct pci_dev *pdev) kfree(pdn); } } -#endif /* CONFIG_PCI_IOV */ } +#endif /* CONFIG_PCI_IOV */ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, struct device_node *dn) diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index f91d7e94872e..c3024f104765 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -414,7 +414,6 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus, */ if (!rescan_existing) pcibios_setup_bus_self(bus); - pcibios_setup_bus_devices(bus); /* Now scan child busses */ for_each_pci_bridge(dev, bus) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4df94b6e2f32..fad50db9dcf2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -740,28 +740,6 @@ static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) mtspr(SPRN_DABRX, dabrx); return 0; } -#elif defined(CONFIG_PPC_8xx) -static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) -{ - unsigned long addr = dabr & ~HW_BRK_TYPE_DABR; - unsigned long lctrl1 = 0x90000000; /* compare type: equal on E & F */ - unsigned long lctrl2 = 0x8e000002; /* watchpoint 1 on cmp E | F */ - - if ((dabr & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_READ) - lctrl1 |= 0xa0000; - else if ((dabr & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_WRITE) - lctrl1 |= 0xf0000; - else if ((dabr & HW_BRK_TYPE_RDWR) == 0) - lctrl2 = 0; - - mtspr(SPRN_LCTRL2, 0); - mtspr(SPRN_CMPE, addr); - mtspr(SPRN_CMPF, addr + 4); - mtspr(SPRN_LCTRL1, lctrl1); - mtspr(SPRN_LCTRL2, lctrl2); - - return 0; -} #else static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) { @@ -782,6 +760,39 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk) return __set_dabr(dabr, dabrx); } +static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) +{ + unsigned long lctrl1 = LCTRL1_CTE_GT | LCTRL1_CTF_LT | LCTRL1_CRWE_RW | + LCTRL1_CRWF_RW; + unsigned long lctrl2 = LCTRL2_LW0EN | LCTRL2_LW0LADC | LCTRL2_SLW0EN; + unsigned long start_addr = brk->address & ~HW_BREAKPOINT_ALIGN; + unsigned long end_addr = (brk->address + brk->len - 1) | HW_BREAKPOINT_ALIGN; + + if (start_addr == 0) + lctrl2 |= LCTRL2_LW0LA_F; + else if (end_addr == ~0U) + lctrl2 |= LCTRL2_LW0LA_E; + else + lctrl2 |= LCTRL2_LW0LA_EandF; + + mtspr(SPRN_LCTRL2, 0); + + if ((brk->type & HW_BRK_TYPE_RDWR) == 0) + return 0; + + if ((brk->type & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_READ) + lctrl1 |= LCTRL1_CRWE_RO | LCTRL1_CRWF_RO; + if ((brk->type & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_WRITE) + lctrl1 |= LCTRL1_CRWE_WO | LCTRL1_CRWF_WO; + + mtspr(SPRN_CMPE, start_addr - 1); + mtspr(SPRN_CMPF, end_addr + 1); + mtspr(SPRN_LCTRL1, lctrl1); + mtspr(SPRN_LCTRL2, lctrl2); + + return 0; +} + void __set_breakpoint(struct arch_hw_breakpoint *brk) { memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); @@ -789,6 +800,8 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk) if (dawr_enabled()) // Power8 or later set_dawr(brk); + else if (IS_ENABLED(CONFIG_PPC_8xx)) + set_breakpoint_8xx(brk); else if (!cpu_has_feature(CPU_FTR_ARCH_207S)) // Power7 or earlier set_dabr(brk); @@ -1264,16 +1277,6 @@ void show_user_instructions(struct pt_regs *regs) pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); - /* - * Make sure the NIP points at userspace, not kernel text/data or - * elsewhere. - */ - if (!__access_ok(pc, NR_INSN_TO_PRINT * sizeof(int), USER_DS)) { - pr_info("%s[%d]: Bad NIP, not dumping instructions.\n", - current->comm, current->pid); - return; - } - seq_buf_init(&s, buf, sizeof(buf)); while (n) { @@ -1284,7 +1287,7 @@ void show_user_instructions(struct pt_regs *regs) for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) { int instr; - if (probe_kernel_address((const void *)pc, instr)) { + if (probe_user_read(&instr, (void __user *)pc, sizeof(instr))) { seq_buf_printf(&s, "XXXXXXXX "); continue; } diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index c82577c4b15d..2dd0d9cb5a20 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -35,7 +35,7 @@ void exc_lvl_early_init(void); static inline void exc_lvl_early_init(void) { }; #endif -#ifdef CONFIG_PPC64 +#if defined(CONFIG_PPC64) || defined(CONFIG_VMAP_STACK) void emergency_stack_init(void); #else static inline void emergency_stack_init(void) { }; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index dcffe927f5b9..5b49b26eb154 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -140,7 +140,7 @@ arch_initcall(ppc_init); static void *__init alloc_stack(void) { - void *ptr = memblock_alloc(THREAD_SIZE, THREAD_SIZE); + void *ptr = memblock_alloc(THREAD_SIZE, THREAD_ALIGN); if (!ptr) panic("cannot allocate %d bytes for stack at %pS\n", @@ -153,6 +153,9 @@ void __init irqstack_early_init(void) { unsigned int i; + if (IS_ENABLED(CONFIG_VMAP_STACK)) + return; + /* interrupt stacks must be in lowmem, we get that for free on ppc32 * as the memblock is limited to lowmem by default */ for_each_possible_cpu(i) { @@ -161,6 +164,18 @@ void __init irqstack_early_init(void) } } +#ifdef CONFIG_VMAP_STACK +void *emergency_ctx[NR_CPUS] __ro_after_init; + +void __init emergency_stack_init(void) +{ + unsigned int i; + + for_each_possible_cpu(i) + emergency_ctx[i] = alloc_stack(); +} +#endif + #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) void __init exc_lvl_early_init(void) { diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6104917a282d..e05e6dd67ae6 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -633,7 +633,7 @@ static void *__init alloc_stack(unsigned long limit, int cpu) BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16); - ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_SIZE, + ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_ALIGN, MEMBLOCK_LOW_LIMIT, limit, early_cpu_to_node(cpu)); if (!ptr) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 014ff0701f24..82a3438300fd 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1637,6 +1637,15 @@ void StackOverflow(struct pt_regs *regs) panic("kernel stack overflow"); } +void stack_overflow_exception(struct pt_regs *regs) +{ + enum ctx_state prev_state = exception_enter(); + + die("Kernel stack overflow", regs, SIGSEGV); + + exception_exit(prev_state); +} + void kernel_fp_unavailable_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index eae9ddaecbcf..b9a108411c0d 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -728,11 +728,6 @@ static int __init vdso_init(void) */ vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT; DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages); -#else - vdso_data->dcache_block_size = L1_CACHE_BYTES; - vdso_data->dcache_log_block_size = L1_CACHE_SHIFT; - vdso_data->icache_block_size = L1_CACHE_BYTES; - vdso_data->icache_log_block_size = L1_CACHE_SHIFT; #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 06f54d947057..e147bbdc12cd 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -2,9 +2,7 @@ # List of files in the vdso, has to be asm only for now -obj-vdso32-$(CONFIG_PPC64) = getcpu.o -obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \ - $(obj-vdso32-y) +obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o # Build rules diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S index 7f882e7b9f43..3440ddf21c8b 100644 --- a/arch/powerpc/kernel/vdso32/cacheflush.S +++ b/arch/powerpc/kernel/vdso32/cacheflush.S @@ -8,7 +8,9 @@ #include #include #include +#include #include +#include .text @@ -22,42 +24,62 @@ */ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc +#ifdef CONFIG_PPC64 mflr r12 .cfi_register lr,r12 - mr r11,r3 - bl __get_datapage@local + get_datapage r10, r0 mtlr r12 - mr r10,r3 +#endif +#ifdef CONFIG_PPC64 lwz r7,CFG_DCACHE_BLOCKSZ(r10) addi r5,r7,-1 - andc r6,r11,r5 /* round low to line bdy */ +#else + li r5, L1_CACHE_BYTES - 1 +#endif + andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ +#ifdef CONFIG_PPC64 lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) srw. r8,r8,r9 /* compute line count */ +#else + srwi. r8, r8, L1_CACHE_SHIFT + mr r7, r6 +#endif crclr cr0*4+so beqlr /* nothing to do? */ mtctr r8 1: dcbst 0,r6 +#ifdef CONFIG_PPC64 add r6,r6,r7 +#else + addi r6, r6, L1_CACHE_BYTES +#endif bdnz 1b sync /* Now invalidate the instruction cache */ +#ifdef CONFIG_PPC64 lwz r7,CFG_ICACHE_BLOCKSZ(r10) addi r5,r7,-1 - andc r6,r11,r5 /* round low to line bdy */ + andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) srw. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ +#endif mtctr r8 +#ifdef CONFIG_PPC64 2: icbi 0,r6 add r6,r6,r7 +#else +2: icbi 0, r7 + addi r7, r7, L1_CACHE_BYTES +#endif bdnz 2b isync li r3,0 diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 6c7401bd284e..217bb630f8f9 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -10,35 +10,13 @@ #include #include #include +#include .text .global __kernel_datapage_offset; __kernel_datapage_offset: .long 0 -V_FUNCTION_BEGIN(__get_datapage) - .cfi_startproc - /* We don't want that exposed or overridable as we want other objects - * to be able to bl directly to here - */ - .protected __get_datapage - .hidden __get_datapage - - mflr r0 - .cfi_register lr,r0 - - bcl 20,31,data_page_branch -data_page_branch: - mflr r3 - mtlr r0 - addi r3, r3, __kernel_datapage_offset-data_page_branch - lwz r0,0(r3) - .cfi_restore lr - add r3,r0,r3 - blr - .cfi_endproc -V_FUNCTION_END(__get_datapage) - /* * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; * @@ -52,11 +30,10 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) .cfi_startproc mflr r12 .cfi_register lr,r12 - mr r4,r3 - bl __get_datapage@local + mr. r4,r3 + get_datapage r3, r0 mtlr r12 addi r3,r3,CFG_SYSCALL_MAP32 - cmpli cr0,r4,0 beqlr li r0,NR_syscalls stw r0,0(r4) @@ -75,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - bl __get_datapage@local + get_datapage r3, r0 lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) lwz r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S index 63e914539e1a..ff5e214fec41 100644 --- a/arch/powerpc/kernel/vdso32/getcpu.S +++ b/arch/powerpc/kernel/vdso32/getcpu.S @@ -15,6 +15,7 @@ * int __kernel_getcpu(unsigned *cpu, unsigned *node); * */ +#if defined(CONFIG_PPC64) V_FUNCTION_BEGIN(__kernel_getcpu) .cfi_startproc mfspr r5,SPRN_SPRG_VDSO_READ @@ -24,10 +25,26 @@ V_FUNCTION_BEGIN(__kernel_getcpu) rlwinm r7,r5,16,31-15,31-0 beq cr0,1f stw r6,0(r3) -1: beq cr1,2f - stw r7,0(r4) -2: crclr cr0*4+so +1: crclr cr0*4+so li r3,0 /* always success */ + beqlr cr1 + stw r7,0(r4) blr .cfi_endproc V_FUNCTION_END(__kernel_getcpu) +#elif !defined(CONFIG_SMP) +V_FUNCTION_BEGIN(__kernel_getcpu) + .cfi_startproc + cmpwi cr0, r3, 0 + cmpwi cr1, r4, 0 + li r5, 0 + beq cr0, 1f + stw r5, 0(r3) +1: li r3, 0 /* always success */ + crclr cr0*4+so + beqlr cr1 + stw r5, 0(r4) + blr + .cfi_endproc +V_FUNCTION_END(__kernel_getcpu) +#endif diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 3306672f57a9..a3951567118a 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -31,28 +32,26 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) mflr r12 .cfi_register lr,r12 - mr r10,r3 /* r10 saves tv */ + mr. r10,r3 /* r10 saves tv */ mr r11,r4 /* r11 saves tz */ - bl __get_datapage@local /* get data page */ - mr r9, r3 /* datapage ptr in r9 */ - cmplwi r10,0 /* check if tv is NULL */ + get_datapage r9, r0 beq 3f - lis r7,1000000@ha /* load up USEC_PER_SEC */ - addi r7,r7,1000000@l /* so we get microseconds in r4 */ + LOAD_REG_IMMEDIATE(r7, 1000000) /* load up USEC_PER_SEC */ bl __do_get_tspec@local /* get sec/usec from tb & kernel */ stw r3,TVAL32_TV_SEC(r10) stw r4,TVAL32_TV_USEC(r10) 3: cmplwi r11,0 /* check if tz is NULL */ - beq 1f + mtlr r12 + crclr cr0*4+so + li r3,0 + beqlr + lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */ lwz r5,CFG_TZ_DSTTIME(r9) stw r4,TZONE_TZ_MINWEST(r11) stw r5,TZONE_TZ_DSTTIME(r11) -1: mtlr r12 - crclr cr0*4+so - li r3,0 blr .cfi_endproc V_FUNCTION_END(__kernel_gettimeofday) @@ -69,17 +68,23 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) cmpli cr0,r3,CLOCK_REALTIME cmpli cr1,r3,CLOCK_MONOTONIC cror cr0*4+eq,cr0*4+eq,cr1*4+eq - bne cr0,99f + + cmpli cr5,r3,CLOCK_REALTIME_COARSE + cmpli cr6,r3,CLOCK_MONOTONIC_COARSE + cror cr5*4+eq,cr5*4+eq,cr6*4+eq + + cror cr0*4+eq,cr0*4+eq,cr5*4+eq + bne cr0, .Lgettime_fallback mflr r12 /* r12 saves lr */ .cfi_register lr,r12 mr r11,r4 /* r11 saves tp */ - bl __get_datapage@local /* get data page */ - mr r9,r3 /* datapage ptr in r9 */ - lis r7,NSEC_PER_SEC@h /* want nanoseconds */ - ori r7,r7,NSEC_PER_SEC@l -50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */ - bne cr1,80f /* not monotonic -> all done */ + get_datapage r9, r0 + LOAD_REG_IMMEDIATE(r7, NSEC_PER_SEC) /* load up NSEC_PER_SEC */ + beq cr5, .Lcoarse_clocks +.Lprecise_clocks: + bl __do_get_tspec@local /* get sec/nsec from tb & kernel */ + bne cr1, .Lfinish /* not monotonic -> all done */ /* * CLOCK_MONOTONIC @@ -103,12 +108,53 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) add r9,r9,r0 lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) cmpl cr0,r8,r0 /* check if updated */ - bne- 50b + bne- .Lprecise_clocks + b .Lfinish_monotonic + + /* + * For coarse clocks we get data directly from the vdso data page, so + * we don't need to call __do_get_tspec, but we still need to do the + * counter trick. + */ +.Lcoarse_clocks: + lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) + andi. r0,r8,1 /* pending update ? loop */ + bne- .Lcoarse_clocks + add r9,r9,r0 /* r0 is already 0 */ + + /* + * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE + * too + */ + lwz r3,STAMP_XTIME_SEC+LOPART(r9) + lwz r4,STAMP_XTIME_NSEC+LOPART(r9) + bne cr6,1f + + /* CLOCK_MONOTONIC_COARSE */ + lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9) + lwz r6,WTOM_CLOCK_NSEC(r9) + + /* check if counter has updated */ + or r0,r6,r5 +1: or r0,r0,r3 + or r0,r0,r4 + xor r0,r0,r0 + add r3,r3,r0 + lwz r0,CFG_TB_UPDATE_COUNT+LOPART(r9) + cmpl cr0,r0,r8 /* check if updated */ + bne- .Lcoarse_clocks + + /* Counter has not updated, so continue calculating proper values for + * sec and nsec if monotonic coarse, or just return with the proper + * values for realtime. + */ + bne cr6, .Lfinish /* Calculate and store result. Note that this mimics the C code, * which may cause funny results if nsec goes negative... is that * possible at all ? */ +.Lfinish_monotonic: add r3,r3,r5 add r4,r4,r6 cmpw cr0,r4,r7 @@ -116,11 +162,12 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) blt 1f subf r4,r7,r4 addi r3,r3,1 -1: bge cr1,80f +1: bge cr1, .Lfinish addi r3,r3,-1 add r4,r4,r7 -80: stw r3,TSPC32_TV_SEC(r11) +.Lfinish: + stw r3,TSPC32_TV_SEC(r11) stw r4,TSPC32_TV_NSEC(r11) mtlr r12 @@ -131,7 +178,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) /* * syscall fallback */ -99: +.Lgettime_fallback: li r0,__NR_clock_gettime .cfi_restore lr sc @@ -149,17 +196,20 @@ V_FUNCTION_END(__kernel_clock_gettime) V_FUNCTION_BEGIN(__kernel_clock_getres) .cfi_startproc /* Check for supported clock IDs */ - cmpwi cr0,r3,CLOCK_REALTIME - cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - bne cr0,99f + cmplwi cr0, r3, CLOCK_MAX + cmpwi cr1, r3, CLOCK_REALTIME_COARSE + cmpwi cr7, r3, CLOCK_MONOTONIC_COARSE + bgt cr0, 99f + LOAD_REG_IMMEDIATE(r5, KTIME_LOW_RES) + beq cr1, 1f + beq cr7, 1f mflr r12 .cfi_register lr,r12 - bl __get_datapage@local /* get data page */ + get_datapage r3, r0 lwz r5, CLOCK_HRTIMER_RES(r3) mtlr r12 - li r3,0 +1: li r3,0 cmpli cr0,r4,0 crclr cr0*4+so beqlr @@ -168,11 +218,11 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) blr /* - * syscall fallback + * invalid clock */ 99: - li r0,__NR_clock_getres - sc + li r3, EINVAL + crset so blr .cfi_endproc V_FUNCTION_END(__kernel_clock_getres) @@ -190,16 +240,15 @@ V_FUNCTION_BEGIN(__kernel_time) .cfi_register lr,r12 mr r11,r3 /* r11 holds t */ - bl __get_datapage@local - mr r9, r3 /* datapage ptr in r9 */ + get_datapage r9, r0 lwz r3,STAMP_XTIME_SEC+LOPART(r9) cmplwi r11,0 /* check if t is NULL */ - beq 2f - stw r3,0(r11) /* store result at *t */ -2: mtlr r12 + mtlr r12 crclr cr0*4+so + beqlr + stw r3,0(r11) /* store result at *t */ blr .cfi_endproc V_FUNCTION_END(__kernel_time) diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 00c025ba4a92..5206c2eb2a1d 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -155,7 +155,7 @@ VERSION __kernel_sync_dicache_p5; __kernel_sigtramp32; __kernel_sigtramp_rt32; -#ifdef CONFIG_PPC64 +#if defined(CONFIG_PPC64) || !defined(CONFIG_SMP) __kernel_getcpu; #endif diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 8eb867dbad5f..25c14a0981bf 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -67,6 +67,9 @@ _GLOBAL(load_up_altivec) #ifdef CONFIG_PPC32 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ oris r9,r9,MSR_VEC@h +#ifdef CONFIG_VMAP_STACK + tovirt(r5, r5) +#endif #else ld r4,PACACURRENT(r13) addi r5,r4,THREAD /* Get THREAD */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8834220036a5..b4c89a1acebb 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -323,7 +323,7 @@ SECTIONS #endif /* The initial task and kernel stack */ - INIT_TASK_DATA_SECTION(THREAD_SIZE) + INIT_TASK_DATA_SECTION(THREAD_ALIGN) .data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) { PAGE_ALIGNED_DATA(PAGE_SIZE) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 744dba98e5d1..803940d79b73 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -63,12 +63,10 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, } isync(); - pagefault_disable(); if (is_load) - ret = raw_copy_from_user(to, from, n); + ret = probe_user_read(to, (const void __user *)from, n); else - ret = raw_copy_to_user(to, from, n); - pagefault_enable(); + ret = probe_user_write((void __user *)to, from, n); /* switch the pid first to avoid running host with unallocated pid */ if (quadrant == 1 && pid != old_pid) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c6fbbd29bd87..dbc2fecc37f0 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1801,6 +1801,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) tlbsync ptesync +BEGIN_FTR_SECTION /* Radix: Handle the case where the guest used an illegal PID */ LOAD_REG_ADDR(r4, mmu_base_pid) lwz r3, VCPU_GUEST_PID(r9) @@ -1830,6 +1831,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) addi r7,r7,0x1000 bdnz 1b ptesync +END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG) 2: #endif /* CONFIG_PPC_RADIX_MMU */ diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 66858b7d3c6b..85215e79db42 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -484,7 +484,7 @@ static void xive_finish_unmask(struct kvmppc_xive *xive, kvmppc_xive_select_irq(state, &hw_num, &xd); /* - * See command in xive_lock_and_mask() concerning masking + * See comment in xive_lock_and_mask() concerning masking * via firmware. */ if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 8bbbd9775c8a..c11b0a005196 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -25,6 +25,12 @@ #include #include +#ifdef CONFIG_VMAP_STACK +#define ADDR_OFFSET 0 +#else +#define ADDR_OFFSET PAGE_OFFSET +#endif + #ifdef CONFIG_SMP .section .bss .align 2 @@ -47,8 +53,8 @@ mmu_hash_lock: .text _GLOBAL(hash_page) #ifdef CONFIG_SMP - lis r8, (mmu_hash_lock - PAGE_OFFSET)@h - ori r8, r8, (mmu_hash_lock - PAGE_OFFSET)@l + lis r8, (mmu_hash_lock - ADDR_OFFSET)@h + ori r8, r8, (mmu_hash_lock - ADDR_OFFSET)@l lis r0,0x0fff b 10f 11: lwz r6,0(r8) @@ -66,9 +72,12 @@ _GLOBAL(hash_page) cmplw 0,r4,r0 ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ mfspr r5, SPRN_SPRG_PGDIR /* phys page-table root */ +#ifdef CONFIG_VMAP_STACK + tovirt(r5, r5) +#endif blt+ 112f /* assume user more likely */ - lis r5, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - addi r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + lis r5, (swapper_pg_dir - ADDR_OFFSET)@ha /* if kernel address, use */ + addi r5 ,r5 ,(swapper_pg_dir - ADDR_OFFSET)@l /* kernel page table */ rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ 112: #ifndef CONFIG_PTE_64BIT @@ -80,6 +89,9 @@ _GLOBAL(hash_page) lwzx r8,r8,r5 /* Get L1 entry */ rlwinm. r8,r8,0,0,20 /* extract pt base address */ #endif +#ifdef CONFIG_VMAP_STACK + tovirt(r8, r8) +#endif #ifdef CONFIG_SMP beq- hash_page_out /* return if no mapping */ #else @@ -137,9 +149,9 @@ retry: #ifdef CONFIG_SMP eieio - lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha + lis r8, (mmu_hash_lock - ADDR_OFFSET)@ha li r0,0 - stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) + stw r0, (mmu_hash_lock - ADDR_OFFSET)@l(r8) #endif /* Return from the exception */ @@ -152,9 +164,9 @@ retry: #ifdef CONFIG_SMP hash_page_out: eieio - lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha + lis r8, (mmu_hash_lock - ADDR_OFFSET)@ha li r0,0 - stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) + stw r0, (mmu_hash_lock - ADDR_OFFSET)@l(r8) blr #endif /* CONFIG_SMP */ @@ -329,7 +341,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) patch_site 1f, patch__hash_page_A1 patch_site 2f, patch__hash_page_A2 /* Get the address of the primary PTE group in the hash table (r3) */ -0: lis r0, (Hash_base - PAGE_OFFSET)@h /* base address of hash table */ +0: lis r0, (Hash_base - ADDR_OFFSET)@h /* base address of hash table */ 1: rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ 2: rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ xor r3,r3,r0 /* make primary hash */ @@ -343,10 +355,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) beq+ 10f /* no PTE: go look for an empty slot */ tlbie r4 - lis r4, (htab_hash_searches - PAGE_OFFSET)@ha - lwz r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) + lis r4, (htab_hash_searches - ADDR_OFFSET)@ha + lwz r6, (htab_hash_searches - ADDR_OFFSET)@l(r4) addi r6,r6,1 /* count how many searches we do */ - stw r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) + stw r6, (htab_hash_searches - ADDR_OFFSET)@l(r4) /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ mtctr r0 @@ -378,10 +390,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) beq+ found_empty /* update counter of times that the primary PTEG is full */ - lis r4, (primary_pteg_full - PAGE_OFFSET)@ha - lwz r6, (primary_pteg_full - PAGE_OFFSET)@l(r4) + lis r4, (primary_pteg_full - ADDR_OFFSET)@ha + lwz r6, (primary_pteg_full - ADDR_OFFSET)@l(r4) addi r6,r6,1 - stw r6, (primary_pteg_full - PAGE_OFFSET)@l(r4) + stw r6, (primary_pteg_full - ADDR_OFFSET)@l(r4) patch_site 0f, patch__hash_page_C /* Search the secondary PTEG for an empty slot */ @@ -415,8 +427,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) * lockup here but that shouldn't happen */ -1: lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */ - lwz r6, (next_slot - PAGE_OFFSET)@l(r4) +1: lis r4, (next_slot - ADDR_OFFSET)@ha /* get next evict slot */ + lwz r6, (next_slot - ADDR_OFFSET)@l(r4) addi r6,r6,HPTE_SIZE /* search for candidate */ andi. r6,r6,7*HPTE_SIZE stw r6,next_slot@l(r4) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 69b2419accef..0a1c65a2c565 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -413,6 +413,7 @@ void __init MMU_init_hw(void) void __init MMU_init_hw_patch(void) { unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE); + unsigned int hash; if (ppc_md.progress) ppc_md.progress("hash:patch", 0x345); @@ -424,8 +425,12 @@ void __init MMU_init_hw_patch(void) /* * Patch up the instructions in hashtable.S:create_hpte */ - modify_instruction_site(&patch__hash_page_A0, 0xffff, - ((unsigned int)Hash - PAGE_OFFSET) >> 16); + if (IS_ENABLED(CONFIG_VMAP_STACK)) + hash = (unsigned int)Hash; + else + hash = (unsigned int)Hash - PAGE_OFFSET; + + modify_instruction_site(&patch__hash_page_A0, 0xffff, hash >> 16); modify_instruction_site(&patch__hash_page_A1, 0x7c0, hash_mb << 6); modify_instruction_site(&patch__hash_page_A2, 0x7c0, hash_mb2 << 6); modify_instruction_site(&patch__hash_page_B, 0xffff, hmask); diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index b30435c7d804..523d4d39d11e 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -652,6 +652,7 @@ static void init_hpte_page_sizes(void) static void __init htab_init_page_sizes(void) { + bool aligned = true; init_hpte_page_sizes(); if (!debug_pagealloc_enabled()) { @@ -659,7 +660,15 @@ static void __init htab_init_page_sizes(void) * Pick a size for the linear mapping. Currently, we only * support 16M, 1M and 4K which is the default */ - if (mmu_psize_defs[MMU_PAGE_16M].shift) + if (IS_ENABLED(STRICT_KERNEL_RWX) && + (unsigned long)_stext % 0x1000000) { + if (mmu_psize_defs[MMU_PAGE_16M].shift) + pr_warn("Kernel not 16M aligned, " + "disabling 16M linear map alignment"); + aligned = false; + } + + if (mmu_psize_defs[MMU_PAGE_16M].shift && aligned) mmu_linear_psize = MMU_PAGE_16M; else if (mmu_psize_defs[MMU_PAGE_1M].shift) mmu_linear_psize = MMU_PAGE_1M; diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 974109bb85db..dd1bea45325c 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -337,7 +337,11 @@ static void __init radix_init_pgtable(void) } /* Find out how many PID bits are supported */ - if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) { + if (!mmu_pid_bits) + mmu_pid_bits = 20; + mmu_base_pid = 1; + } else if (cpu_has_feature(CPU_FTR_HVMODE)) { if (!mmu_pid_bits) mmu_pid_bits = 20; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index a95175c0972b..03f43c924e00 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -1161,6 +1161,9 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) if (unlikely(pid == MMU_NO_CONTEXT)) return; + if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) + return; + /* * If this context hasn't run on that CPU before and KVM is * around, there's a slim chance that the guest on another diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b5047f9b5dec..8db0507619e2 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -233,7 +233,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, // Read/write fault in a valid region (the exception table search passed // above), but blocked by KUAP is bad, it can never succeed. - if (bad_kuap_fault(regs, is_write)) + if (bad_kuap_fault(regs, address, is_write)) return true; // What's left? Kernel fault on user in well defined regions (extable @@ -279,12 +279,8 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && access_ok(nip, sizeof(*nip))) { unsigned int inst; - int res; - pagefault_disable(); - res = __get_user_inatomic(inst, nip); - pagefault_enable(); - if (!res) + if (!probe_user_read(&inst, nip, sizeof(inst))) return !store_updates_sp(inst); *must_retry = true; } @@ -354,6 +350,9 @@ static void sanity_check_fault(bool is_write, bool is_user, * Userspace trying to access kernel address, we get PROTFAULT for that. */ if (is_user && address >= TASK_SIZE) { + if ((long)address == -1) + return; + pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n", current->comm, current->pid, address, from_kuid(&init_user_ns, current_uid())); diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 0e6ed4413eea..16dd95bd0749 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -12,7 +12,7 @@ #include #include -static pgprot_t kasan_prot_ro(void) +static pgprot_t __init kasan_prot_ro(void) { if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) return PAGE_READONLY; @@ -20,7 +20,7 @@ static pgprot_t kasan_prot_ro(void) return PAGE_KERNEL_RO; } -static void kasan_populate_pte(pte_t *ptep, pgprot_t prot) +static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot) { unsigned long va = (unsigned long)kasan_early_shadow_page; phys_addr_t pa = __pa(kasan_early_shadow_page); @@ -30,29 +30,25 @@ static void kasan_populate_pte(pte_t *ptep, pgprot_t prot) __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); } -static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) +static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) { pmd_t *pmd; unsigned long k_cur, k_next; - pgprot_t prot = slab_is_available() ? kasan_prot_ro() : PAGE_KERNEL; + pte_t *new = NULL; pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start); for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) { - pte_t *new; - k_next = pgd_addr_end(k_cur, k_end); if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte) continue; - if (slab_is_available()) - new = pte_alloc_one_kernel(&init_mm); - else + if (!new) new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE); if (!new) return -ENOMEM; - kasan_populate_pte(new, prot); + kasan_populate_pte(new, PAGE_KERNEL); smp_wmb(); /* See comment in __pte_alloc */ @@ -63,39 +59,27 @@ static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned l new = NULL; } spin_unlock(&init_mm.page_table_lock); - - if (new && slab_is_available()) - pte_free_kernel(&init_mm, new); } return 0; } -static void __ref *kasan_get_one_page(void) -{ - if (slab_is_available()) - return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - - return memblock_alloc(PAGE_SIZE, PAGE_SIZE); -} - -static int __ref kasan_init_region(void *start, size_t size) +static int __init kasan_init_region(void *start, size_t size) { unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); unsigned long k_cur; int ret; - void *block = NULL; + void *block; ret = kasan_init_shadow_page_tables(k_start, k_end); if (ret) return ret; - if (!slab_is_available()) - block = memblock_alloc(k_end - k_start, PAGE_SIZE); + block = memblock_alloc(k_end - k_start, PAGE_SIZE); for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); - void *va = block ? block + k_cur - k_start : kasan_get_one_page(); + void *va = block + k_cur - k_start; pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); if (!va) @@ -129,6 +113,31 @@ static void __init kasan_remap_early_shadow_ro(void) flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); } +static void __init kasan_unmap_early_shadow_vmalloc(void) +{ + unsigned long k_start = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_START); + unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END); + unsigned long k_cur; + phys_addr_t pa = __pa(kasan_early_shadow_page); + + if (!early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) { + int ret = kasan_init_shadow_page_tables(k_start, k_end); + + if (ret) + panic("kasan: kasan_init_shadow_page_tables() failed"); + } + for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); + pte_t *ptep = pte_offset_kernel(pmd, k_cur); + + if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) + continue; + + __set_pte_at(&init_mm, k_cur, ptep, __pte(0), 0); + } + flush_tlb_kernel_range(k_start, k_end); +} + void __init kasan_mmu_init(void) { int ret; @@ -165,34 +174,22 @@ void __init kasan_init(void) pr_info("KASAN init done\n"); } -#ifdef CONFIG_MODULES -void *module_alloc(unsigned long size) +void __init kasan_late_init(void) { - void *base; - - base = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); - - if (!base) - return NULL; - - if (!kasan_init_region(base, size)) - return base; - - vfree(base); - - return NULL; + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) + kasan_unmap_early_shadow_vmalloc(); } -#endif #ifdef CONFIG_PPC_BOOK3S_32 u8 __initdata early_hash[256 << 10] __aligned(256 << 10) = {0}; static void __init kasan_early_hash_table(void) { - modify_instruction_site(&patch__hash_page_A0, 0xffff, __pa(early_hash) >> 16); - modify_instruction_site(&patch__flush_hash_A0, 0xffff, __pa(early_hash) >> 16); + unsigned int hash = IS_ENABLED(CONFIG_VMAP_STACK) ? (unsigned int)early_hash : + __pa(early_hash); + + modify_instruction_site(&patch__hash_page_A0, 0xffff, hash >> 16); + modify_instruction_site(&patch__flush_hash_A0, 0xffff, hash >> 16); Hash = (struct hash_pte *)early_hash; } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index f5535eae637f..ef7b1119b2e2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -301,6 +302,9 @@ void __init mem_init(void) high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); set_max_mapnr(max_pfn); + + kasan_late_init(); + memblock_free_all(); #ifdef CONFIG_HIGHMEM diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 8e99649c24fc..7097e07a209a 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -181,3 +181,9 @@ void mmu_mark_rodata_ro(void); static inline void mmu_mark_initmem_nx(void) { } static inline void mmu_mark_rodata_ro(void) { } #endif + +#ifdef CONFIG_PPC_DEBUG_WX +void ptdump_check_wx(void); +#else +static inline void ptdump_check_wx(void) { } +#endif diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 96eb8e43f39b..3189308dece4 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -21,33 +21,34 @@ extern int __map_without_ltlbs; static unsigned long block_mapped_ram; /* - * Return PA for this VA if it is in an area mapped with LTLBs. + * Return PA for this VA if it is in an area mapped with LTLBs or fixmap. * Otherwise, returns 0 */ phys_addr_t v_block_mapped(unsigned long va) { unsigned long p = PHYS_IMMR_BASE; - if (__map_without_ltlbs) - return 0; if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE) return p + va - VIRT_IMMR_BASE; + if (__map_without_ltlbs) + return 0; if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram) return __pa(va); return 0; } /* - * Return VA for a given PA mapped with LTLBs or 0 if not mapped + * Return VA for a given PA mapped with LTLBs or fixmap + * Return 0 if not mapped */ unsigned long p_block_mapped(phys_addr_t pa) { unsigned long p = PHYS_IMMR_BASE; - if (__map_without_ltlbs) - return 0; if (pa >= p && pa < p + IMMR_SIZE) return VIRT_IMMR_BASE + pa - p; + if (__map_without_ltlbs) + return 0; if (pa < block_mapped_ram) return (unsigned long)__va(pa); return 0; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 73b84166d06a..5fb90edd865e 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -218,6 +218,7 @@ void mark_rodata_ro(void) if (v_block_mapped((unsigned long)_sinittext)) { mmu_mark_rodata_ro(); + ptdump_check_wx(); return; } diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 2f9ddc29c535..206156255247 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -24,6 +24,8 @@ #include #include +#include + #include "ptdump.h" /* @@ -173,10 +175,12 @@ static void dump_addr(struct pg_state *st, unsigned long addr) static void note_prot_wx(struct pg_state *st, unsigned long addr) { + pte_t pte = __pte(st->current_flags); + if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx) return; - if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X))) + if (!pte_write(pte) || !pte_exec(pte)) return; WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n", diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index 6ffcb80cf844..6f347fa29f41 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -28,15 +28,12 @@ static unsigned int user_getsp32(unsigned int sp, int is_first) unsigned int stack_frame[2]; void __user *p = compat_ptr(sp); - if (!access_ok(p, sizeof(stack_frame))) - return 0; - /* * The most likely reason for this is that we returned -EFAULT, * which means that we've done all that we can do from * interrupt context. */ - if (__copy_from_user_inatomic(stack_frame, p, sizeof(stack_frame))) + if (probe_user_read(stack_frame, (void __user *)p, sizeof(stack_frame))) return 0; if (!is_first) @@ -54,11 +51,7 @@ static unsigned long user_getsp64(unsigned long sp, int is_first) { unsigned long stack_frame[3]; - if (!access_ok((void __user *)sp, sizeof(stack_frame))) - return 0; - - if (__copy_from_user_inatomic(stack_frame, (void __user *)sp, - sizeof(stack_frame))) + if (probe_user_read(stack_frame, (void __user *)sp, sizeof(stack_frame))) return 0; if (!is_first) @@ -103,7 +96,6 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) first_frame = 0; } } else { - pagefault_disable(); #ifdef CONFIG_PPC64 if (!is_32bit_task()) { while (depth--) { @@ -112,7 +104,6 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) break; first_frame = 0; } - pagefault_enable(); return; } #endif @@ -123,6 +114,5 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) break; first_frame = 0; } - pagefault_enable(); } } diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index 19124b0b171a..1ad03c55c88c 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -157,10 +157,6 @@ static void mpc8xx_pmu_read(struct perf_event *event) static void mpc8xx_pmu_del(struct perf_event *event, int flags) { - /* mfspr r10, SPRN_SPRG_SCRATCH0 */ - unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) | - __PPC_SPR(SPRN_SPRG_SCRATCH0); - mpc8xx_pmu_read(event); /* If it was the last user, stop counting to avoid useles overhead */ @@ -173,6 +169,10 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags) break; case PERF_8xx_ID_ITLB_LOAD_MISS: if (atomic_dec_return(&itlb_miss_ref) == 0) { + /* mfspr r10, SPRN_SPRG_SCRATCH0 */ + unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) | + __PPC_SPR(SPRN_SPRG_SCRATCH0); + patch_instruction_site(&patch__itlbmiss_exit_1, insn); #ifndef CONFIG_PIN_TLB_TEXT patch_instruction_site(&patch__itlbmiss_exit_2, insn); @@ -181,6 +181,10 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags) break; case PERF_8xx_ID_DTLB_LOAD_MISS: if (atomic_dec_return(&dtlb_miss_ref) == 0) { + /* mfspr r10, SPRN_DAR */ + unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) | + __PPC_SPR(SPRN_DAR); + patch_instruction_site(&patch__dtlbmiss_exit_1, insn); patch_instruction_site(&patch__dtlbmiss_exit_2, insn); patch_instruction_site(&patch__dtlbmiss_exit_3, insn); diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 35d542515faf..cbc251981209 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -155,12 +155,8 @@ static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) ((unsigned long)ptr & 7)) return -EFAULT; - pagefault_disable(); - if (!__get_user_inatomic(*ret, ptr)) { - pagefault_enable(); + if (!probe_user_read(ret, ptr, sizeof(*ret))) return 0; - } - pagefault_enable(); return read_user_stack_slow(ptr, ret, 8); } @@ -171,12 +167,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) ((unsigned long)ptr & 3)) return -EFAULT; - pagefault_disable(); - if (!__get_user_inatomic(*ret, ptr)) { - pagefault_enable(); + if (!probe_user_read(ret, ptr, sizeof(*ret))) return 0; - } - pagefault_enable(); return read_user_stack_slow(ptr, ret, 4); } @@ -293,17 +285,11 @@ static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, */ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) { - int rc; - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || ((unsigned long)ptr & 3)) return -EFAULT; - pagefault_disable(); - rc = __get_user_inatomic(*ret, ptr); - pagefault_enable(); - - return rc; + return probe_user_read(ret, ptr, sizeof(*ret)); } static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 48604625ab31..3086055bf681 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -415,7 +415,6 @@ static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) static __u64 power_pmu_bhrb_to(u64 addr) { unsigned int instr; - int ret; __u64 target; if (is_kernel_addr(addr)) { @@ -426,13 +425,8 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Userspace: need copy instruction here then translate it */ - pagefault_disable(); - ret = __get_user_inatomic(instr, (unsigned int __user *)addr); - if (ret) { - pagefault_enable(); + if (probe_user_read(&instr, (unsigned int __user *)addr, sizeof(instr))) return 0; - } - pagefault_enable(); target = branch_target(&instr); if ((!target) || (instr & BRANCH_ABSOLUTE)) diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c index 13631f35cd14..04bf6ecf7d55 100644 --- a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c +++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c @@ -434,9 +434,9 @@ static int mpc512x_lpbfifo_probe(struct platform_device *pdev) memset(&lpbfifo, 0, sizeof(struct lpbfifo_data)); spin_lock_init(&lpbfifo.lock); - lpbfifo.chan = dma_request_slave_channel(&pdev->dev, "rx-tx"); - if (lpbfifo.chan == NULL) - return -EPROBE_DEFER; + lpbfifo.chan = dma_request_chan(&pdev->dev, "rx-tx"); + if (IS_ERR(lpbfifo.chan)) + return PTR_ERR(lpbfifo.chan); if (of_address_to_resource(pdev->dev.of_node, 0, &r) != 0) { dev_err(&pdev->dev, "bad 'reg' in 'sclpc' device tree node\n"); diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c index 273145aed90a..b0d5471f620d 100644 --- a/arch/powerpc/platforms/83xx/km83xx.c +++ b/arch/powerpc/platforms/83xx/km83xx.c @@ -64,7 +64,7 @@ static void quirk_mpc8360e_qe_enet10(void) return; } - base = ioremap(res.start, res.end - res.start + 1); + base = ioremap(res.start, resource_size(&res)); /* * set output delay adjustments to default values according diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 8c7ea2486bc0..48f7d96ae37d 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -252,6 +252,15 @@ static int smp_85xx_start_cpu(int cpu) out_be64((u64 *)(&spin_table->addr_h), __pa(ppc_function_entry(generic_secondary_smp_init))); #else +#ifdef CONFIG_PHYS_ADDR_T_64BIT + /* + * We need also to write addr_h to spin table for systems + * in which their physical memory start address was configured + * to above 4G, otherwise the secondary core can not get + * correct entry to start from. + */ + out_be32(&spin_table->addr_h, __pa(__early_start) >> 32); +#endif out_be32(&spin_table->addr_l, __pa(__early_start)); #endif flush_spin_table(spin_table); diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c index 6c3c0cdaee9a..b301ef9d6ce7 100644 --- a/arch/powerpc/platforms/85xx/twr_p102x.c +++ b/arch/powerpc/platforms/85xx/twr_p102x.c @@ -60,10 +60,6 @@ static void __init twr_p1025_pic_init(void) */ static void __init twr_p1025_setup_arch(void) { -#ifdef CONFIG_QUICC_ENGINE - struct device_node *np; -#endif - if (ppc_md.progress) ppc_md.progress("twr_p1025_setup_arch()", 0); @@ -77,6 +73,7 @@ static void __init twr_p1025_setup_arch(void) #if IS_ENABLED(CONFIG_UCC_GETH) || IS_ENABLED(CONFIG_SERIAL_QE) if (machine_is(twr_p1025)) { struct ccsr_guts __iomem *guts; + struct device_node *np; np = of_find_compatible_node(NULL, NULL, "fsl,p1021-guts"); if (np) { diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index e28df298df56..1f8025383caa 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -177,6 +177,10 @@ config PPC_970_NAP config PPC_P7_NAP bool +config PPC_BOOK3S_IDLE + def_bool y + depends on (PPC_970_NAP || PPC_P7_NAP) + config PPC_INDIRECT_PIO bool select GENERIC_IOMAP diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 8d7f9c3dc771..6caedc88474f 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -1,4 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 +config PPC32 + bool + default y if !PPC64 + select KASAN_VMALLOC if KASAN && MODULES + config PPC64 bool "64-bit kernel" select ZLIB_DEFLATE @@ -31,12 +36,14 @@ config PPC_BOOK3S_6xx select PPC_HAVE_PMU_SUPPORT select PPC_HAVE_KUEP select PPC_HAVE_KUAP + select HAVE_ARCH_VMAP_STACK config PPC_BOOK3S_601 bool "PowerPC 601" select PPC_BOOK3S_32 select PPC_FPU select PPC_HAVE_KUAP + select HAVE_ARCH_VMAP_STACK config PPC_85xx bool "Freescale 85xx" @@ -49,6 +56,7 @@ config PPC_8xx select PPC_HAVE_KUEP select PPC_HAVE_KUAP select PPC_MM_SLICES if HUGETLB_PAGE + select HAVE_ARCH_VMAP_STACK config 40x bool "AMCC 40x" diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 47f73103ef74..6f019df37916 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -229,7 +229,7 @@ static void __init maple_init_IRQ(void) root = of_find_node_by_path("/"); naddr = of_n_addr_cells(root); opprop = of_get_property(root, "platform-open-pic", &opplen); - if (opprop != 0) { + if (opprop) { openpic_addr = of_read_number(opprop, naddr); has_isus = (opplen > naddr); printk(KERN_DEBUG "OpenPIC addr: %lx, has ISUs: %d\n", diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index a6ee08009f0f..2b3dfd0b6cdd 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -790,42 +790,6 @@ static int opal_sysfs_init(void) return 0; } -static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t count) -{ - return memory_read_from_buffer(buf, count, &off, bin_attr->private, - bin_attr->size); -} - -static struct bin_attribute symbol_map_attr = { - .attr = {.name = "symbol_map", .mode = 0400}, - .read = symbol_map_read -}; - -static void opal_export_symmap(void) -{ - const __be64 *syms; - unsigned int size; - struct device_node *fw; - int rc; - - fw = of_find_node_by_path("/ibm,opal/firmware"); - if (!fw) - return; - syms = of_get_property(fw, "symbol-map", &size); - if (!syms || size != 2 * sizeof(__be64)) - return; - - /* Setup attributes */ - symbol_map_attr.private = __va(be64_to_cpu(syms[0])); - symbol_map_attr.size = be64_to_cpu(syms[1]); - - rc = sysfs_create_bin_file(opal_kobj, &symbol_map_attr); - if (rc) - pr_warn("Error %d creating OPAL symbols file\n", rc); -} - static ssize_t export_attr_read(struct file *fp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) @@ -834,6 +798,75 @@ static ssize_t export_attr_read(struct file *fp, struct kobject *kobj, bin_attr->size); } +static int opal_add_one_export(struct kobject *parent, const char *export_name, + struct device_node *np, const char *prop_name) +{ + struct bin_attribute *attr = NULL; + const char *name = NULL; + u64 vals[2]; + int rc; + + rc = of_property_read_u64_array(np, prop_name, &vals[0], 2); + if (rc) + goto out; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + name = kstrdup(export_name, GFP_KERNEL); + if (!name) { + rc = -ENOMEM; + goto out; + } + + sysfs_bin_attr_init(attr); + attr->attr.name = name; + attr->attr.mode = 0400; + attr->read = export_attr_read; + attr->private = __va(vals[0]); + attr->size = vals[1]; + + rc = sysfs_create_bin_file(parent, attr); +out: + if (rc) { + kfree(name); + kfree(attr); + } + + return rc; +} + +static void opal_add_exported_attrs(struct device_node *np, + struct kobject *kobj) +{ + struct device_node *child; + struct property *prop; + + for_each_property_of_node(np, prop) { + int rc; + + if (!strcmp(prop->name, "name") || + !strcmp(prop->name, "phandle")) + continue; + + rc = opal_add_one_export(kobj, prop->name, np, prop->name); + if (rc) { + pr_warn("Unable to add export %pOF/%s, rc = %d!\n", + np, prop->name, rc); + } + } + + for_each_child_of_node(np, child) { + struct kobject *child_kobj; + + child_kobj = kobject_create_and_add(child->name, kobj); + if (!child_kobj) { + pr_err("Unable to create export dir for %pOF\n", child); + continue; + } + + opal_add_exported_attrs(child, child_kobj); + } +} + /* * opal_export_attrs: creates a sysfs node for each property listed in * the device-tree under /ibm,opal/firmware/exports/ @@ -843,11 +876,8 @@ static ssize_t export_attr_read(struct file *fp, struct kobject *kobj, */ static void opal_export_attrs(void) { - struct bin_attribute *attr; struct device_node *np; - struct property *prop; struct kobject *kobj; - u64 vals[2]; int rc; np = of_find_node_by_path("/ibm,opal/firmware/exports"); @@ -861,41 +891,16 @@ static void opal_export_attrs(void) return; } - for_each_property_of_node(np, prop) { - if (!strcmp(prop->name, "name") || !strcmp(prop->name, "phandle")) - continue; + opal_add_exported_attrs(np, kobj); - if (of_property_read_u64_array(np, prop->name, &vals[0], 2)) - continue; - - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - - if (attr == NULL) { - pr_warn("Failed kmalloc for bin_attribute!"); - continue; - } - - sysfs_bin_attr_init(attr); - attr->attr.name = kstrdup(prop->name, GFP_KERNEL); - attr->attr.mode = 0400; - attr->read = export_attr_read; - attr->private = __va(vals[0]); - attr->size = vals[1]; - - if (attr->attr.name == NULL) { - pr_warn("Failed kstrdup for bin_attribute attr.name"); - kfree(attr); - continue; - } - - rc = sysfs_create_bin_file(kobj, attr); - if (rc) { - pr_warn("Error %d creating OPAL sysfs exports/%s file\n", - rc, prop->name); - kfree(attr->attr.name); - kfree(attr); - } - } + /* + * NB: symbol_map existed before the generic export interface so it + * lives under the top level opal_kobj. + */ + rc = opal_add_one_export(opal_kobj, "symbol_map", + np->parent, "symbol-map"); + if (rc) + pr_warn("Error %d creating OPAL symbols file\n", rc); of_node_put(np); } @@ -1042,8 +1047,6 @@ static int __init opal_init(void) /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); if (rc == 0) { - /* Export symbol map to userspace */ - opal_export_symmap(); /* Setup dump region interface */ opal_dump_region_init(); /* Setup error log interface */ @@ -1056,11 +1059,10 @@ static int __init opal_init(void) opal_sys_param_init(); /* Setup message log sysfs interface. */ opal_msglog_sysfs_init(); + /* Add all export properties*/ + opal_export_attrs(); } - /* Export all properties */ - opal_export_attrs(); - /* Initialize platform devices: IPMI backend, PRD & flash interface */ opal_pdev_init("ibm,opal-ipmi"); opal_pdev_init("ibm,opal-flash"); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index da1068a9c263..22c22cd7bd82 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -188,7 +188,7 @@ static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) unsigned int pe_num = pe->pe_number; WARN_ON(pe->pdev); - WARN_ON(pe->npucomp); /* NPUs are not supposed to be freed */ + WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */ kfree(pe->npucomp); memset(pe, 0, sizeof(struct pnv_ioda_pe)); clear_bit(pe_num, phb->ioda.pe_alloc); @@ -777,6 +777,34 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb, return 0; } +static void pnv_ioda_unset_peltv(struct pnv_phb *phb, + struct pnv_ioda_pe *pe, + struct pci_dev *parent) +{ + int64_t rc; + + while (parent) { + struct pci_dn *pdn = pci_get_pdn(parent); + + if (pdn && pdn->pe_number != IODA_INVALID_PE) { + rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, + pe->pe_number, + OPAL_REMOVE_PE_FROM_DOMAIN); + /* XXX What to do in case of error ? */ + } + parent = parent->bus->self; + } + + opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + + /* Disassociate PE in PELT */ + rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, + pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); + if (rc) + pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc); +} + static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; @@ -792,7 +820,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; parent = pe->pbus->self; if (pe->flags & PNV_IODA_PE_BUS_ALL) - count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; + count = resource_size(&pe->pbus->busn_res); else count = 1; @@ -827,25 +855,13 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) for (rid = pe->rid; rid < rid_end; rid++) phb->ioda.pe_rmap[rid] = IODA_INVALID_PE; - /* Release from all parents PELT-V */ - while (parent) { - struct pci_dn *pdn = pci_get_pdn(parent); - if (pdn && pdn->pe_number != IODA_INVALID_PE) { - rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, - pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); - /* XXX What to do in case of error ? */ - } - parent = parent->bus->self; - } + /* + * Release from all parents PELT-V. NPUs don't have a PELTV + * table + */ + if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI) + pnv_ioda_unset_peltv(phb, pe, parent); - opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, - OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); - - /* Disassociate PE in PELT */ - rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, - pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); - if (rc) - pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc); rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, bcomp, dcomp, fcomp, OPAL_UNMAP_PE); if (rc) @@ -874,7 +890,7 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; parent = pe->pbus->self; if (pe->flags & PNV_IODA_PE_BUS_ALL) - count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; + count = resource_size(&pe->pbus->busn_res); else count = 1; @@ -1062,20 +1078,20 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) return NULL; } - /* NOTE: We get only one ref to the pci_dev for the pdn, not for the - * pointer in the PE data structure, both should be destroyed at the - * same time. However, this needs to be looked at more closely again - * once we actually start removing things (Hotplug, SR-IOV, ...) + /* NOTE: We don't get a reference for the pointer in the PE + * data structure, both the device and PE structures should be + * destroyed at the same time. However, removing nvlink + * devices will need some work. * * At some point we want to remove the PDN completely anyways */ - pci_dev_get(dev); pdn->pe_number = pe->pe_number; pe->flags = PNV_IODA_PE_DEV; pe->pdev = dev; pe->pbus = NULL; pe->mve_number = -1; pe->rid = dev->bus->number << 8 | pdn->devfn; + pe->device_count++; pe_info(pe, "Associated device to PE\n"); @@ -1084,13 +1100,13 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) pnv_ioda_free_pe(pe); pdn->pe_number = IODA_INVALID_PE; pe->pdev = NULL; - pci_dev_put(dev); return NULL; } /* Put PE to the list */ + mutex_lock(&phb->ioda.pe_list_mutex); list_add_tail(&pe->list, &phb->ioda.pe_list); - + mutex_unlock(&phb->ioda.pe_list_mutex); return pe; } @@ -1205,6 +1221,14 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus); struct pnv_phb *phb = hose->private_data; + /* + * Intentionally leak a reference on the npu device (for + * nvlink only; this is not an opencapi path) to make sure it + * never goes away, as it's been the case all along and some + * work is needed otherwise. + */ + pci_dev_get(npu_pdev); + /* * Due to a hardware errata PE#0 on the NPU is reserved for * error handling. This means we only have three PEs remaining @@ -1228,11 +1252,11 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) */ dev_info(&npu_pdev->dev, "Associating to existing PE %x\n", pe_num); - pci_dev_get(npu_pdev); npu_pdn = pci_get_pdn(npu_pdev); rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; npu_pdn->pe_number = pe_num; phb->ioda.pe_rmap[rid] = pe->pe_number; + pe->device_count++; /* Map the PE to this link */ rc = opal_pci_set_pe(phb->opal_id, pe_num, rid, @@ -1268,8 +1292,6 @@ static void pnv_pci_ioda_setup_PEs(void) { struct pci_controller *hose; struct pnv_phb *phb; - struct pci_bus *bus; - struct pci_dev *pdev; struct pnv_ioda_pe *pe; list_for_each_entry(hose, &hose_list, list_node) { @@ -1281,11 +1303,6 @@ static void pnv_pci_ioda_setup_PEs(void) if (phb->model == PNV_PHB_MODEL_NPU2) WARN_ON_ONCE(pnv_npu2_init(hose)); } - if (phb->type == PNV_PHB_NPU_OCAPI) { - bus = hose->bus; - list_for_each_entry(pdev, &bus->devices, bus_list) - pnv_ioda_setup_dev_PE(pdev); - } } list_for_each_entry(hose, &hose_list, list_node) { phb = hose->private_data; @@ -1558,6 +1575,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) /* Reserve PE for each VF */ for (vf_index = 0; vf_index < num_vfs; vf_index++) { + int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); + int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); + struct pci_dn *vf_pdn; + if (pdn->m64_single_mode) pe_num = pdn->pe_num_map[vf_index]; else @@ -1570,13 +1591,11 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pe->pbus = NULL; pe->parent_dev = pdev; pe->mve_number = -1; - pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) | - pci_iov_virtfn_devfn(pdev, vf_index); + pe->rid = (vf_bus << 8) | vf_devfn; pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", hose->global_number, pdev->bus->number, - PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), - PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num); + PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */ @@ -1590,6 +1609,15 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) list_add_tail(&pe->list, &phb->ioda.pe_list); mutex_unlock(&phb->ioda.pe_list_mutex); + /* associate this pe to it's pdn */ + list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { + if (vf_pdn->busno == vf_bus && + vf_pdn->devfn == vf_devfn) { + vf_pdn->pe_number = pe_num; + break; + } + } + pnv_pci_ioda2_setup_dma_pe(phb, pe); #ifdef CONFIG_IOMMU_API iommu_register_group(&pe->table_group, @@ -1719,21 +1747,23 @@ int pnv_pcibios_sriov_disable(struct pci_dev *pdev) pnv_pci_sriov_disable(pdev); /* Release PCI data */ - remove_dev_pci_data(pdev); + remove_sriov_vf_pdns(pdev); return 0; } int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { /* Allocate PCI data */ - add_dev_pci_data(pdev); + add_sriov_vf_pdns(pdev); return pnv_pci_sriov_enable(pdev, num_vfs); } #endif /* CONFIG_PCI_IOV */ -static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) +static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev) { + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; @@ -2889,9 +2919,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) struct pci_dn *pdn; int mul, total_vfs; - if (!pdev->is_physfn || pci_dev_is_added(pdev)) - return; - pdn = pci_get_pdn(pdev); pdn->vfs_expanded = 0; pdn->m64_single_mode = false; @@ -2966,6 +2993,30 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) res->end = res->start - 1; } } + +static void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) +{ + if (WARN_ON(pci_dev_is_added(pdev))) + return; + + if (pdev->is_virtfn) { + struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev); + + /* + * VF PEs are single-device PEs so their pdev pointer needs to + * be set. The pdev doesn't exist when the PE is allocated (in + * (pcibios_sriov_enable()) so we fix it up here. + */ + pe->pdev = pdev; + WARN_ON(!(pe->flags & PNV_IODA_PE_VF)); + } else if (pdev->is_physfn) { + /* + * For PFs adjust their allocated IOV resources to match what + * the PHB can support using it's M64 BAR table. + */ + pnv_pci_ioda_fixup_iov_resources(pdev); + } +} #endif /* CONFIG_PCI_IOV */ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, @@ -3062,19 +3113,9 @@ static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe) #ifdef CONFIG_DEBUG_FS static int pnv_pci_diag_data_set(void *data, u64 val) { - struct pci_controller *hose; - struct pnv_phb *phb; + struct pnv_phb *phb = data; s64 ret; - if (val != 1ULL) - return -EINVAL; - - hose = (struct pci_controller *)data; - if (!hose || !hose->private_data) - return -ENODEV; - - phb = hose->private_data; - /* Retrieve the diag data from firmware */ ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data, phb->diag_data_size); @@ -3089,6 +3130,33 @@ static int pnv_pci_diag_data_set(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_diag_data_fops, NULL, pnv_pci_diag_data_set, "%llu\n"); +static int pnv_pci_ioda_pe_dump(void *data, u64 val) +{ + struct pnv_phb *phb = data; + int pe_num; + + for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) { + struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_num]; + + if (!test_bit(pe_num, phb->ioda.pe_alloc)) + continue; + + pe_warn(pe, "rid: %04x dev count: %2d flags: %s%s%s%s%s%s\n", + pe->rid, pe->device_count, + (pe->flags & PNV_IODA_PE_DEV) ? "dev " : "", + (pe->flags & PNV_IODA_PE_BUS) ? "bus " : "", + (pe->flags & PNV_IODA_PE_BUS_ALL) ? "all " : "", + (pe->flags & PNV_IODA_PE_MASTER) ? "master " : "", + (pe->flags & PNV_IODA_PE_SLAVE) ? "slave " : "", + (pe->flags & PNV_IODA_PE_VF) ? "vf " : ""); + } + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_ioda_pe_dump_fops, NULL, + pnv_pci_ioda_pe_dump, "%llu\n"); + #endif /* CONFIG_DEBUG_FS */ static void pnv_pci_ioda_create_dbgfs(void) @@ -3113,7 +3181,9 @@ static void pnv_pci_ioda_create_dbgfs(void) } debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs, - hose, &pnv_pci_diag_data_fops); + phb, &pnv_pci_diag_data_fops); + debugfs_create_file_unsafe("dump_ioda_pe_state", 0200, phb->dbgfs, + phb, &pnv_pci_ioda_pe_dump_fops); } #endif /* CONFIG_DEBUG_FS */ } @@ -3383,6 +3453,28 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) return true; } +static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct pci_dn *pdn; + struct pnv_ioda_pe *pe; + + if (!phb->initialized) + return true; + + pdn = pci_get_pdn(dev); + if (!pdn) + return false; + + if (pdn->pe_number == IODA_INVALID_PE) { + pe = pnv_ioda_setup_dev_PE(dev); + if (!pe) + return false; + } + return true; +} + static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group, int num) { @@ -3512,7 +3604,10 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) struct pnv_phb *phb = pe->phb; struct pnv_ioda_pe *slave, *tmp; + mutex_lock(&phb->ioda.pe_list_mutex); list_del(&pe->list); + mutex_unlock(&phb->ioda.pe_list_mutex); + switch (phb->type) { case PNV_PHB_IODA1: pnv_pci_ioda1_release_pe_dma(pe); @@ -3520,6 +3615,8 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) case PNV_PHB_IODA2: pnv_pci_ioda2_release_pe_dma(pe); break; + case PNV_PHB_NPU_OCAPI: + break; default: WARN_ON(1); } @@ -3594,9 +3691,29 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose) OPAL_ASSERT_RESET); } +static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus) +{ + struct pci_controller *hose = bus->sysdata; + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pe; + + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))) + continue; + + if (!pe->pbus) + continue; + + if (bus->number == ((pe->rid >> 8) & 0xFF)) { + pe->pbus = bus; + break; + } + } +} + static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { - .dma_dev_setup = pnv_pci_dma_dev_setup, - .dma_bus_setup = pnv_pci_dma_bus_setup, + .dma_dev_setup = pnv_pci_ioda_dma_dev_setup, + .dma_bus_setup = pnv_pci_ioda_dma_bus_setup, .iommu_bypass_supported = pnv_pci_ioda_iommu_bypass_supported, .setup_msi_irqs = pnv_setup_msi_irqs, .teardown_msi_irqs = pnv_teardown_msi_irqs, @@ -3609,7 +3726,6 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { }; static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { - .dma_dev_setup = pnv_pci_dma_dev_setup, .setup_msi_irqs = pnv_setup_msi_irqs, .teardown_msi_irqs = pnv_teardown_msi_irqs, .enable_device_hook = pnv_pci_enable_device_hook, @@ -3620,7 +3736,8 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { }; static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { - .enable_device_hook = pnv_pci_enable_device_hook, + .enable_device_hook = pnv_ocapi_enable_device_hook, + .release_device = pnv_pci_release_device, .window_alignment = pnv_pci_window_alignment, .reset_secondary_bus = pnv_pci_reset_secondary_bus, .shutdown = pnv_pci_ioda_shutdown, @@ -3855,14 +3972,13 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops; break; default: - phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; hose->controller_ops = pnv_pci_ioda_controller_ops; } ppc_md.pcibios_default_alignment = pnv_pci_default_alignment; #ifdef CONFIG_PCI_IOV - ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; + ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov; ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment; ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable; ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index c0bea75ac27b..5bf818246339 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -38,7 +38,7 @@ static DEFINE_MUTEX(tunnel_mutex); int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) { - struct device_node *parent = np; + struct device_node *node = np; u32 bdfn; u64 phbid; int ret; @@ -48,25 +48,29 @@ int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) return -ENXIO; bdfn = ((bdfn & 0x00ffff00) >> 8); - while ((parent = of_get_parent(parent))) { - if (!PCI_DN(parent)) { - of_node_put(parent); + for (node = np; node; node = of_get_parent(node)) { + if (!PCI_DN(node)) { + of_node_put(node); break; } - if (!of_device_is_compatible(parent, "ibm,ioda2-phb") && - !of_device_is_compatible(parent, "ibm,ioda3-phb")) { - of_node_put(parent); + if (!of_device_is_compatible(node, "ibm,ioda2-phb") && + !of_device_is_compatible(node, "ibm,ioda3-phb") && + !of_device_is_compatible(node, "ibm,ioda2-npu2-opencapi-phb")) { + of_node_put(node); continue; } - ret = of_property_read_u64(parent, "ibm,opal-phbid", &phbid); + ret = of_property_read_u64(node, "ibm,opal-phbid", &phbid); if (ret) { - of_node_put(parent); + of_node_put(node); return -ENXIO; } - *id = PCI_SLOT_ID(phbid, bdfn); + if (of_device_is_compatible(node, "ibm,ioda2-npu2-opencapi-phb")) + *id = PCI_PHB_SLOT_ID(phbid); + else + *id = PCI_SLOT_ID(phbid, bdfn); return 0; } @@ -810,53 +814,6 @@ struct iommu_table *pnv_pci_table_alloc(int nid) return tbl; } -void pnv_pci_dma_dev_setup(struct pci_dev *pdev) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; -#ifdef CONFIG_PCI_IOV - struct pnv_ioda_pe *pe; - struct pci_dn *pdn; - - /* Fix the VF pdn PE number */ - if (pdev->is_virtfn) { - pdn = pci_get_pdn(pdev); - WARN_ON(pdn->pe_number != IODA_INVALID_PE); - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - if (pe->rid == ((pdev->bus->number << 8) | - (pdev->devfn & 0xff))) { - pdn->pe_number = pe->pe_number; - pe->pdev = pdev; - break; - } - } - } -#endif /* CONFIG_PCI_IOV */ - - if (phb && phb->dma_dev_setup) - phb->dma_dev_setup(phb, pdev); -} - -void pnv_pci_dma_bus_setup(struct pci_bus *bus) -{ - struct pci_controller *hose = bus->sysdata; - struct pnv_phb *phb = hose->private_data; - struct pnv_ioda_pe *pe; - - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))) - continue; - - if (!pe->pbus) - continue; - - if (bus->number == ((pe->rid >> 8) & 0xFF)) { - pe->pbus = bus; - break; - } - } -} - struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index f914f0b14e4e..d3bbdeab3a32 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -108,7 +108,6 @@ struct pnv_phb { int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev, unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg); - void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); int (*init_m64)(struct pnv_phb *phb); int (*get_pe_state)(struct pnv_phb *phb, int pe_no); void (*freeze_pe)(struct pnv_phb *phb, int pe_no); @@ -189,8 +188,6 @@ extern void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr); extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); -extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev); -extern void pnv_pci_dma_bus_setup(struct pci_bus *bus); extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 83498604d322..11fdae81b5dd 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -233,6 +233,10 @@ static void __noreturn pnv_restart(char *cmd) rc = opal_cec_reboot(); else if (strcmp(cmd, "full") == 0) rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL); + else if (strcmp(cmd, "mpipl") == 0) + rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, NULL); + else if (strcmp(cmd, "error") == 0) + rc = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, NULL); else rc = OPAL_UNSUPPORTED; diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 595e9f8a6539..24c18362e5ea 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -21,7 +21,6 @@ config PPC_PSERIES select PPC_DOORBELL select HOTPLUG_CPU select ARCH_RANDOM - select PPC_DOORBELL select FORCE_SMP select SWIOTLB default y diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index d4a8f1702417..3e49cc23a97a 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "pseries.h" @@ -55,7 +56,8 @@ hypertas_fw_features_table[] = { {FW_FEATURE_LLAN, "hcall-lLAN"}, {FW_FEATURE_BULK_REMOVE, "hcall-bulk"}, {FW_FEATURE_XDABR, "hcall-xdabr"}, - {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, + {FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE, + "hcall-multi-tce"}, {FW_FEATURE_SPLPAR, "hcall-splpar"}, {FW_FEATURE_VPHN, "hcall-vphn"}, {FW_FEATURE_SET_MODE, "hcall-set-mode"}, @@ -100,6 +102,12 @@ static void __init fw_hypertas_feature_init(const char *hypertas, } } + if (is_secure_guest() && + (powerpc_firmware_features & FW_FEATURE_PUT_TCE_IND)) { + powerpc_firmware_features &= ~FW_FEATURE_PUT_TCE_IND; + pr_debug("SVM: disabling PUT_TCE_IND firmware feature\n"); + } + pr_debug(" <- fw_hypertas_feature_init()\n"); } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index c126b94d1943..a4d40a3ceea3 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -360,8 +360,10 @@ static bool lmb_is_removable(struct drmem_lmb *lmb) for (i = 0; i < scns_per_block; i++) { pfn = PFN_DOWN(phys_addr); - if (!pfn_present(pfn)) + if (!pfn_present(pfn)) { + phys_addr += MIN_MEMORY_BLOCK_SIZE; continue; + } rc = rc && is_mem_section_removable(pfn, PAGES_PER_SECTION); phys_addr += MIN_MEMORY_BLOCK_SIZE; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 6ba081dd61c9..2e0a8eab5588 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -36,7 +36,6 @@ #include #include #include -#include #include "pseries.h" @@ -133,10 +132,10 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) return be64_to_cpu(*tcep); } -static void tce_free_pSeriesLP(struct iommu_table*, long, long); +static void tce_free_pSeriesLP(unsigned long liobn, long, long); static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); -static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, +static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, long npages, unsigned long uaddr, enum dma_data_direction direction, unsigned long attrs) @@ -147,25 +146,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, int ret = 0; long tcenum_start = tcenum, npages_start = npages; - rpn = __pa(uaddr) >> TCE_SHIFT; + rpn = __pa(uaddr) >> tceshift; proto_tce = TCE_PCI_READ; if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; while (npages--) { - tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); + tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift; + rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce); if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { ret = (int)rc; - tce_free_pSeriesLP(tbl, tcenum_start, + tce_free_pSeriesLP(liobn, tcenum_start, (npages_start - (npages + 1))); break; } if (rc && printk_ratelimit()) { printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); - printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tindex = 0x%llx\n", (u64)liobn); printk("\ttcenum = 0x%llx\n", (u64)tcenum); printk("\ttce val = 0x%llx\n", tce ); dump_stack(); @@ -193,8 +192,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, int ret = 0; unsigned long flags; - if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) { - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, + if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) { + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tbl->it_page_shift, npages, uaddr, direction, attrs); } @@ -210,8 +210,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, /* If allocation fails, fall back to the loop implementation */ if (!tcep) { local_irq_restore(flags); - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, - direction, attrs); + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tbl->it_page_shift, + npages, uaddr, direction, attrs); } __this_cpu_write(tce_page, tcep); } @@ -262,16 +263,16 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, return ret; } -static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) +static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages) { u64 rc; while (npages--) { - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0); + rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0); if (rc && printk_ratelimit()) { printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); - printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tindex = 0x%llx\n", (u64)liobn); printk("\ttcenum = 0x%llx\n", (u64)tcenum); dump_stack(); } @@ -285,8 +286,8 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n { u64 rc; - if (!firmware_has_feature(FW_FEATURE_MULTITCE)) - return tce_free_pSeriesLP(tbl, tcenum, npages); + if (!firmware_has_feature(FW_FEATURE_STUFF_TCE)) + return tce_free_pSeriesLP(tbl->it_index, tcenum, npages); rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); @@ -401,6 +402,19 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, u64 rc = 0; long l, limit; + if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) { + unsigned long tceshift = be32_to_cpu(maprange->tce_shift); + unsigned long dmastart = (start_pfn << PAGE_SHIFT) + + be64_to_cpu(maprange->dma_base); + unsigned long tcenum = dmastart >> tceshift; + unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift; + void *uaddr = __va(start_pfn << PAGE_SHIFT); + + return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn), + tcenum, tceshift, npages, (unsigned long) uaddr, + DMA_BIDIRECTIONAL, 0); + } + local_irq_disable(); /* to protect tcep and the page behind it */ tcep = __this_cpu_read(tce_page); @@ -1320,24 +1334,18 @@ void iommu_init_early_pSeries(void) of_reconfig_notifier_register(&iommu_reconfig_nb); register_memory_notifier(&iommu_mem_nb); - /* - * Secure guest memory is inacessible to devices so regular DMA isn't - * possible. - * - * In that case keep devices' dma_map_ops as NULL so that the generic - * DMA code path will use SWIOTLB to bounce buffers for DMA. - */ - if (!is_secure_guest()) - set_pci_dma_ops(&dma_iommu_ops); + set_pci_dma_ops(&dma_iommu_ops); } static int __init disable_multitce(char *str) { if (strcmp(str, "off") == 0 && firmware_has_feature(FW_FEATURE_LPAR) && - firmware_has_feature(FW_FEATURE_MULTITCE)) { + (firmware_has_feature(FW_FEATURE_PUT_TCE_IND) || + firmware_has_feature(FW_FEATURE_STUFF_TCE))) { printk(KERN_INFO "Disabling MULTITCE firmware feature\n"); - powerpc_firmware_features &= ~FW_FEATURE_MULTITCE; + powerpc_firmware_features &= + ~(FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE); } return 1; } diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index f43e778dd7c8..b8d28ab88178 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -435,10 +435,10 @@ static void maxmem_data(struct seq_file *m) { unsigned long maxmem = 0; - maxmem += drmem_info->n_lmbs * drmem_info->lmb_size; + maxmem += (unsigned long)drmem_info->n_lmbs * drmem_info->lmb_size; maxmem += hugetlb_total_pages() * PAGE_SIZE; - seq_printf(m, "MaxMem=%ld\n", maxmem); + seq_printf(m, "MaxMem=%lu\n", maxmem); } static int pseries_lparcfg_data(struct seq_file *m, void *v) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index c2ef320ba1bf..0b4467e378e5 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -69,7 +69,8 @@ static int drc_pmem_bind(struct papr_scm_priv *p) return rc; p->bound_addr = saved; - dev_dbg(&p->pdev->dev, "bound drc 0x%x to %pR\n", p->drc_index, &p->res); + dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", + p->drc_index, (unsigned long)saved); return rc; } @@ -133,7 +134,7 @@ static int drc_pmem_query_n_bind(struct papr_scm_priv *p) goto err_out; p->bound_addr = start_addr; - dev_dbg(&p->pdev->dev, "bound drc 0x%x to %pR\n", p->drc_index, &p->res); + dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", p->drc_index, start_addr); return rc; err_out: @@ -322,6 +323,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) p->bus = nvdimm_bus_register(NULL, &p->bus_desc); if (!p->bus) { dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn); + kfree(p->bus_desc.provider_name); return -ENXIO; } @@ -356,7 +358,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) ndr_desc.mapping = &mapping; ndr_desc.num_mappings = 1; ndr_desc.nd_set = &p->nd_set; - set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); if (p->is_volatile) p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc); @@ -477,6 +478,7 @@ static int papr_scm_remove(struct platform_device *pdev) nvdimm_bus_unregister(p->bus); drc_pmem_unbind(p); + kfree(p->bus_desc.provider_name); kfree(p); return 0; diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 722830978639..911534b89c85 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -192,7 +192,7 @@ int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { /* Allocate PCI data */ - add_dev_pci_data(pdev); + add_sriov_vf_pdns(pdev); return pseries_pci_sriov_enable(pdev, num_vfs); } @@ -204,7 +204,7 @@ int pseries_pcibios_sriov_disable(struct pci_dev *pdev) /* Releasing pe_num_map */ kfree(pdn->pe_num_map); /* Release PCI data */ - remove_dev_pci_data(pdev); + remove_sriov_vf_pdns(pdev); pci_vf_drivers_autoprobe(pdev, true); return 0; } diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 79e2287991db..f682b7babc09 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1176,6 +1176,8 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) if (tbl == NULL) return NULL; + kref_init(&tbl->it_kref); + of_parse_dma_window(dev->dev.of_node, dma_window, &tbl->it_index, &offset, &size); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 617a443d673d..4a8874bc1057 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1065,13 +1065,11 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) addr += mfspr(SPRN_MCAR); if (is_in_pci_mem_space(addr)) { - if (user_mode(regs)) { - pagefault_disable(); - ret = get_user(inst, (__u32 __user *)regs->nip); - pagefault_enable(); - } else { + if (user_mode(regs)) + ret = probe_user_read(&inst, (void __user *)regs->nip, + sizeof(inst)); + else ret = probe_kernel_address((void *)regs->nip, inst); - } if (!ret && mcheck_handle_load(regs, inst)) { regs->nip += 4; diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 934a77324f6b..a3a72b780e67 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -964,7 +964,7 @@ static struct irq_chip mpic_irq_chip = { }; #ifdef CONFIG_SMP -static struct irq_chip mpic_ipi_chip = { +static const struct irq_chip mpic_ipi_chip = { .irq_mask = mpic_mask_ipi, .irq_unmask = mpic_unmask_ipi, .irq_eoi = mpic_end_ipi, @@ -978,7 +978,7 @@ static struct irq_chip mpic_tm_chip = { }; #ifdef CONFIG_MPIC_U3_HT_IRQS -static struct irq_chip mpic_irq_ht_chip = { +static const struct irq_chip mpic_irq_ht_chip = { .irq_startup = mpic_startup_ht_irq, .irq_shutdown = mpic_shutdown_ht_irq, .irq_mask = mpic_mask_irq, diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh index 7b9fe0a567cf..014e00e74d2b 100755 --- a/arch/powerpc/tools/relocs_check.sh +++ b/arch/powerpc/tools/relocs_check.sh @@ -10,14 +10,21 @@ # based on relocs_check.pl # Copyright © 2009 IBM Corporation -if [ $# -lt 2 ]; then - echo "$0 [path to objdump] [path to vmlinux]" 1>&2 +if [ $# -lt 3 ]; then + echo "$0 [path to objdump] [path to nm] [path to vmlinux]" 1>&2 exit 1 fi -# Have Kbuild supply the path to objdump so we handle cross compilation. +# Have Kbuild supply the path to objdump and nm so we handle cross compilation. objdump="$1" -vmlinux="$2" +nm="$2" +vmlinux="$3" + +# Remove from the bad relocations those that match an undefined weak symbol +# which will result in an absolute relocation to 0. +# Weak unresolved symbols are of that form in nm output: +# " w _binary__btf_vmlinux_bin_end" +undef_weak_symbols=$($nm "$vmlinux" | awk '$1 ~ /w/ { print $2 }') bad_relocs=$( $objdump -R "$vmlinux" | @@ -26,8 +33,6 @@ $objdump -R "$vmlinux" | # These relocations are okay # On PPC64: # R_PPC64_RELATIVE, R_PPC64_NONE - # R_PPC64_ADDR64 mach_ - # R_PPC64_ADDR64 __crc_ # On PPC: # R_PPC_RELATIVE, R_PPC_ADDR16_HI, # R_PPC_ADDR16_HA,R_PPC_ADDR16_LO, @@ -39,8 +44,7 @@ R_PPC_ADDR16_HI R_PPC_ADDR16_HA R_PPC_RELATIVE R_PPC_NONE' | - grep -E -v '\pdev ? pci_warn((sl)->pdev, x) : dev_warn(&(sl)->bus->dev, x)) + struct pnv_php_event { bool added; struct pnv_php_slot *php_slot; @@ -151,17 +154,11 @@ static void pnv_php_rmv_pdns(struct device_node *dn) static void pnv_php_detach_device_nodes(struct device_node *parent) { struct device_node *dn; - int refcount; for_each_child_of_node(parent, dn) { pnv_php_detach_device_nodes(dn); of_node_put(dn); - refcount = kref_read(&dn->kobj.kref); - if (refcount != 1) - pr_warn("Invalid refcount %d on <%pOF>\n", - refcount, dn); - of_detach_node(dn); } } @@ -271,7 +268,7 @@ static int pnv_php_add_devtree(struct pnv_php_slot *php_slot) ret = pnv_pci_get_device_tree(php_slot->dn->phandle, fdt1, 0x10000); if (ret) { - pci_warn(php_slot->pdev, "Error %d getting FDT blob\n", ret); + SLOT_WARN(php_slot, "Error %d getting FDT blob\n", ret); goto free_fdt1; } @@ -285,7 +282,7 @@ static int pnv_php_add_devtree(struct pnv_php_slot *php_slot) dt = of_fdt_unflatten_tree(fdt, php_slot->dn, NULL); if (!dt) { ret = -EINVAL; - pci_warn(php_slot->pdev, "Cannot unflatten FDT\n"); + SLOT_WARN(php_slot, "Cannot unflatten FDT\n"); goto free_fdt; } @@ -295,15 +292,15 @@ static int pnv_php_add_devtree(struct pnv_php_slot *php_slot) ret = pnv_php_populate_changeset(&php_slot->ocs, php_slot->dn); if (ret) { pnv_php_reverse_nodes(php_slot->dn); - pci_warn(php_slot->pdev, "Error %d populating changeset\n", - ret); + SLOT_WARN(php_slot, "Error %d populating changeset\n", + ret); goto free_dt; } php_slot->dn->child = NULL; ret = of_changeset_apply(&php_slot->ocs); if (ret) { - pci_warn(php_slot->pdev, "Error %d applying changeset\n", ret); + SLOT_WARN(php_slot, "Error %d applying changeset\n", ret); goto destroy_changeset; } @@ -342,18 +339,19 @@ int pnv_php_set_slot_power_state(struct hotplug_slot *slot, ret = pnv_pci_set_power_state(php_slot->id, state, &msg); if (ret > 0) { if (be64_to_cpu(msg.params[1]) != php_slot->dn->phandle || - be64_to_cpu(msg.params[2]) != state || - be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) { - pci_warn(php_slot->pdev, "Wrong msg (%lld, %lld, %lld)\n", - be64_to_cpu(msg.params[1]), - be64_to_cpu(msg.params[2]), - be64_to_cpu(msg.params[3])); + be64_to_cpu(msg.params[2]) != state) { + SLOT_WARN(php_slot, "Wrong msg (%lld, %lld, %lld)\n", + be64_to_cpu(msg.params[1]), + be64_to_cpu(msg.params[2]), + be64_to_cpu(msg.params[3])); return -ENOMSG; } + if (be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) { + ret = -ENODEV; + goto error; + } } else if (ret < 0) { - pci_warn(php_slot->pdev, "Error %d powering %s\n", - ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off"); - return ret; + goto error; } if (state == OPAL_PCI_SLOT_POWER_OFF || state == OPAL_PCI_SLOT_OFFLINE) @@ -362,6 +360,11 @@ int pnv_php_set_slot_power_state(struct hotplug_slot *slot, ret = pnv_php_add_devtree(php_slot); return ret; + +error: + SLOT_WARN(php_slot, "Error %d powering %s\n", + ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off"); + return ret; } EXPORT_SYMBOL_GPL(pnv_php_set_slot_power_state); @@ -378,8 +381,8 @@ static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state) */ ret = pnv_pci_get_power_state(php_slot->id, &power_state); if (ret) { - pci_warn(php_slot->pdev, "Error %d getting power status\n", - ret); + SLOT_WARN(php_slot, "Error %d getting power status\n", + ret); } else { *state = power_state; } @@ -402,7 +405,7 @@ static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state) *state = presence; ret = 0; } else { - pci_warn(php_slot->pdev, "Error %d getting presence\n", ret); + SLOT_WARN(php_slot, "Error %d getting presence\n", ret); } return ret; @@ -566,7 +569,13 @@ static int pnv_php_disable_slot(struct hotplug_slot *slot) struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); int ret; - if (php_slot->state != PNV_PHP_STATE_POPULATED) + /* + * Allow to disable a slot already in the registered state to + * cover cases where the slot couldn't be enabled and never + * reached the populated state + */ + if (php_slot->state != PNV_PHP_STATE_POPULATED && + php_slot->state != PNV_PHP_STATE_REGISTERED) return 0; /* Remove all devices behind the slot */ @@ -675,7 +684,7 @@ static int pnv_php_register_slot(struct pnv_php_slot *php_slot) ret = pci_hp_register(&php_slot->slot, php_slot->bus, php_slot->slot_no, php_slot->name); if (ret) { - pci_warn(php_slot->pdev, "Error %d registering slot\n", ret); + SLOT_WARN(php_slot, "Error %d registering slot\n", ret); return ret; } @@ -728,7 +737,7 @@ static int pnv_php_enable_msix(struct pnv_php_slot *php_slot) /* Enable MSIx */ ret = pci_enable_msix_exact(pdev, &entry, 1); if (ret) { - pci_warn(pdev, "Error %d enabling MSIx\n", ret); + SLOT_WARN(php_slot, "Error %d enabling MSIx\n", ret); return ret; } @@ -778,8 +787,9 @@ static irqreturn_t pnv_php_interrupt(int irq, void *data) (sts & PCI_EXP_SLTSTA_PDC)) { ret = pnv_pci_get_presence_state(php_slot->id, &presence); if (ret) { - pci_warn(pdev, "PCI slot [%s] error %d getting presence (0x%04x), to retry the operation.\n", - php_slot->name, ret, sts); + SLOT_WARN(php_slot, + "PCI slot [%s] error %d getting presence (0x%04x), to retry the operation.\n", + php_slot->name, ret, sts); return IRQ_HANDLED; } @@ -809,8 +819,9 @@ static irqreturn_t pnv_php_interrupt(int irq, void *data) */ event = kzalloc(sizeof(*event), GFP_ATOMIC); if (!event) { - pci_warn(pdev, "PCI slot [%s] missed hotplug event 0x%04x\n", - php_slot->name, sts); + SLOT_WARN(php_slot, + "PCI slot [%s] missed hotplug event 0x%04x\n", + php_slot->name, sts); return IRQ_HANDLED; } @@ -834,7 +845,7 @@ static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq) /* Allocate workqueue */ php_slot->wq = alloc_workqueue("pciehp-%s", 0, 0, php_slot->name); if (!php_slot->wq) { - pci_warn(pdev, "Cannot alloc workqueue\n"); + SLOT_WARN(php_slot, "Cannot alloc workqueue\n"); pnv_php_disable_irq(php_slot, true); return; } @@ -858,7 +869,7 @@ static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq) php_slot->name, php_slot); if (ret) { pnv_php_disable_irq(php_slot, true); - pci_warn(pdev, "Error %d enabling IRQ %d\n", ret, irq); + SLOT_WARN(php_slot, "Error %d enabling IRQ %d\n", ret, irq); return; } @@ -894,7 +905,7 @@ static void pnv_php_enable_irq(struct pnv_php_slot *php_slot) ret = pci_enable_device(pdev); if (ret) { - pci_warn(pdev, "Error %d enabling device\n", ret); + SLOT_WARN(php_slot, "Error %d enabling device\n", ret); return; } @@ -1009,6 +1020,8 @@ static int __init pnv_php_init(void) for_each_compatible_node(dn, NULL, "ibm,ioda3-phb") pnv_php_register(dn); + for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb") + pnv_php_register_one(dn); /* slot directly under the PHB */ return 0; } @@ -1021,6 +1034,9 @@ static void __exit pnv_php_exit(void) for_each_compatible_node(dn, NULL, "ibm,ioda3-phb") pnv_php_unregister(dn); + + for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb") + pnv_php_unregister_one(dn); /* slot directly under the PHB */ } module_init(pnv_php_init); diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh index 26112ab5cdf4..f52ed92b53e7 100755 --- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh @@ -53,9 +53,13 @@ eeh_one_dev() { # is a no-op. echo $dev >/sys/kernel/debug/powerpc/eeh_dev_check - # Enforce a 30s timeout for recovery. Even the IPR, which is infamously - # slow to reset, should recover within 30s. - max_wait=30 + # Default to a 60s timeout when waiting for a device to recover. This + # is an arbitrary default which can be overridden by setting the + # EEH_MAX_WAIT environmental variable when required. + + # The current record holder for longest recovery time is: + # "Adaptec Series 8 12G SAS/PCIe 3" at 39 seconds + max_wait=${EEH_MAX_WAIT:=60} for i in `seq 0 ${max_wait}` ; do if pe_ok $dev ; then diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 7101ffd08d66..0ebeaea22641 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -5,3 +5,4 @@ prot_sao segv_errors wild_bctr large_vm_fork_separation +bad_accesses diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index ed1565809d2b..b9103c4bb414 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -3,7 +3,7 @@ noarg: $(MAKE) -C ../ TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ - large_vm_fork_separation + large_vm_fork_separation bad_accesses TEST_GEN_PROGS_EXTENDED := tlbie_test TEST_GEN_FILES := tempfile @@ -16,6 +16,7 @@ $(OUTPUT)/prot_sao: ../utils.c $(OUTPUT)/wild_bctr: CFLAGS += -m64 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 +$(OUTPUT)/bad_accesses: CFLAGS += -m64 $(OUTPUT)/tempfile: dd if=/dev/zero of=$@ bs=64k count=1 diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c new file mode 100644 index 000000000000..adc465f499ef --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Copyright 2019, Michael Ellerman, IBM Corp. +// +// Test that out-of-bounds reads/writes behave as expected. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +// Old distros (Ubuntu 16.04 at least) don't define this +#ifndef SEGV_BNDERR +#define SEGV_BNDERR 3 +#endif + +// 64-bit kernel is always here +#define PAGE_OFFSET (0xcul << 60) + +static unsigned long kernel_virt_end; + +static volatile int fault_code; +static volatile unsigned long fault_addr; +static jmp_buf setjmp_env; + +static void segv_handler(int n, siginfo_t *info, void *ctxt_v) +{ + fault_code = info->si_code; + fault_addr = (unsigned long)info->si_addr; + siglongjmp(setjmp_env, 1); +} + +int bad_access(char *p, bool write) +{ + char x; + + fault_code = 0; + fault_addr = 0; + + if (sigsetjmp(setjmp_env, 1) == 0) { + if (write) + *p = 1; + else + x = *p; + + printf("Bad - no SEGV! (%c)\n", x); + return 1; + } + + // If we see MAPERR that means we took a page fault rather than an SLB + // miss. We only expect to take page faults for addresses within the + // valid kernel range. + FAIL_IF(fault_code == SEGV_MAPERR && \ + (fault_addr < PAGE_OFFSET || fault_addr >= kernel_virt_end)); + + FAIL_IF(fault_code != SEGV_MAPERR && fault_code != SEGV_BNDERR); + + return 0; +} + +static int using_hash_mmu(bool *using_hash) +{ + char line[128]; + FILE *f; + int rc; + + f = fopen("/proc/cpuinfo", "r"); + FAIL_IF(!f); + + rc = 0; + while (fgets(line, sizeof(line), f) != NULL) { + if (strcmp(line, "MMU : Hash\n") == 0) { + *using_hash = true; + goto out; + } + + if (strcmp(line, "MMU : Radix\n") == 0) { + *using_hash = false; + goto out; + } + } + + rc = -1; +out: + fclose(f); + return rc; +} + +static int test(void) +{ + unsigned long i, j, addr, region_shift, page_shift, page_size; + struct sigaction sig; + bool hash_mmu; + + sig = (struct sigaction) { + .sa_sigaction = segv_handler, + .sa_flags = SA_SIGINFO, + }; + + FAIL_IF(sigaction(SIGSEGV, &sig, NULL) != 0); + + FAIL_IF(using_hash_mmu(&hash_mmu)); + + page_size = sysconf(_SC_PAGESIZE); + if (page_size == (64 * 1024)) + page_shift = 16; + else + page_shift = 12; + + if (page_size == (64 * 1024) || !hash_mmu) { + region_shift = 52; + + // We have 7 512T regions (4 kernel linear, vmalloc, io, vmemmap) + kernel_virt_end = PAGE_OFFSET + (7 * (512ul << 40)); + } else if (page_size == (4 * 1024) && hash_mmu) { + region_shift = 46; + + // We have 7 64T regions (4 kernel linear, vmalloc, io, vmemmap) + kernel_virt_end = PAGE_OFFSET + (7 * (64ul << 40)); + } else + FAIL_IF(true); + + printf("Using %s MMU, PAGE_SIZE = %dKB start address 0x%016lx\n", + hash_mmu ? "hash" : "radix", + (1 << page_shift) >> 10, + 1ul << region_shift); + + // This generates access patterns like: + // 0x0010000000000000 + // 0x0010000000010000 + // 0x0010000000020000 + // ... + // 0x0014000000000000 + // 0x0018000000000000 + // 0x0020000000000000 + // 0x0020000000010000 + // 0x0020000000020000 + // ... + // 0xf400000000000000 + // 0xf800000000000000 + + for (i = 1; i <= ((0xful << 60) >> region_shift); i++) { + for (j = page_shift - 1; j < 60; j++) { + unsigned long base, delta; + + base = i << region_shift; + delta = 1ul << j; + + if (delta >= base) + break; + + addr = (base | delta) & ~((1 << page_shift) - 1); + + FAIL_IF(bad_access((char *)addr, false)); + FAIL_IF(bad_access((char *)addr, true)); + } + } + + return 0; +} + +int main(void) +{ + return test_harness(test, "bad_accesses"); +} diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index 7deedbc16b0b..fc477dfe86a2 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -455,9 +455,8 @@ run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr) if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) { test_sethwdebug_exact(child_pid); - if (!is_8xx) - test_sethwdebug_range_aligned(child_pid); - if (dawr && !is_8xx) { + test_sethwdebug_range_aligned(child_pid); + if (dawr || is_8xx) { test_sethwdebug_range_unaligned(child_pid); test_sethwdebug_range_unaligned_dar(child_pid); test_sethwdebug_dawr_max_range(child_pid);