Fix AMDTEE memory leak in amdtee_open_session()

-----BEGIN PGP SIGNATURE----- iQJOBAABCgA4FiEEK3Mh2f1G6lqRPfDvpvbt8nX18hUFAl5X7PcaHGplbnMud2lr bGFuZGVyQGxpbmFyby5vcmcACgkQpvbt8nX18hXjKA/9HGAnIBnn2poGnrrIQ+b9 +Txa2DawaS/HgMm3WsCqWWY5pkSeB4tVfyOA+k7NcAlW8XtX5jvDlZRHz/t2wIj5 7ZS/HpBmOzpdeEfvKe/+lOyL81Ugvwitpxy/Az2KbtZtoTNekpCP2XqBvxr7WCkt nS28OWpSJZlY7bhahIsb20rwL/QIpkhiQNITlmzTuaeoWYvwrApuOeSKHbBNhqez cBP3aYK1aNvCKfCV6m08LW/09qrZ4c+rk9ZfA5LxPZUIsMKgpez/AkU7lhtcYh0U YS3FLpo6w8CG91H3lBihoTj8mh5WlNj2H00Vd6wuPfwOOkAtDxyOAHi5MasArmIK eeI+W1NKiAWavZ2ULlPHb6g1ekSZ/r0zuffe7MAw37yFWg0AuZq3e4tQ/o6i3e6F CznIJqh0tFmDsdF387XPsIuWYJeTQ0rQvxRDjYeIGmnLGFXb2gfsaU06bMCzO4Mb fXoGsFmr77NuL3a1kdnbOuODkMCvHArKmn/hjzCksDE1DWeQVFux1LZEV/VbRBVR zNrWcDzi+fVgpoPcqJ3IRoTVdDYGPWv/F8182CNAwdOFmgAmZWtHosJse+zTLUJL xKIYw2nNhC3/OoAt3DZ+Dq7GVrvH+GrmAS0C+CJf08naawYcKCB5C478yaw85umt fJt0s+9N02Q3g+rsUalSZ9Y= =8Bsc -----END PGP SIGNATURE----- Merge tag 'tee-amdtee-fix-for-5.6' of https://git.linaro.org/people/jens.wiklander/linux-tee into arm/fixes Fix AMDTEE memory leak in amdtee_open_session() * tag 'tee-amdtee-fix-for-5.6' of https://git.linaro.org/people/jens.wiklander/linux-tee: (344 commits) tee: amdtee: fix memory leak in amdtee_open_session() Linux 5.6-rc2 ext4: improve explanation of a mount failure caused by a misconfigured kernel Input: cyapa - replace zero-length array with flexible-array member Input: tca6416-keypad - replace zero-length array with flexible-array member Input: gpio_keys_polled - replace zero-length array with flexible-array member IB/mlx5: Use div64_u64 for num_var_hw_entries calculation nvme: fix the parameter order for nvme_get_log in nvme_get_fw_slot_info nvme/pci: move cqe check after device shutdown nvme: prevent warning triggered by nvme_stop_keep_alive nvme/tcp: fix bug on double requeue when send fails cifs: make sure we do not overflow the max EA buffer size cifs: enable change notification for SMB2.1 dialect netdevice.h: fix all kernel-doc and Sphinx warnings net: dsa: tag_ar9331: Make sure there is headroom for tag net: dsa: tag_qca: Make sure there is headroom for tag net, ip6_tunnel: enhance tunnel locate with link check net/smc: no peer ID in CLC decline for SMCD net/smc: transfer fasync_list in case of fallback net: hns3: fix a copying IPv6 address error in hclge_fd_get_flow_tuples() ... Link: https://lore.kernel.org/r/20200227165205.GA7926@jade Signed-off-by: Olof Johansson <olof@lixom.net>
2024-08-28 03:40:04 +00:00 · 2020-02-27 10:07:47 -08:00 · 2020-02-27 10:07:47 -08:00 · f9a15f39e5
commit f9a15f39e5
parent c689300b9c b83685bcee
343 changed files with 8859 additions and 5602 deletions
--- a/Documentation/devicetree/bindings/input/ilitek,ili2xxx.txt
+++ b/Documentation/devicetree/bindings/input/ilitek,ili2xxx.txt
@ -1,9 +1,10 @@
-Ilitek ILI210x/ILI2117/ILI251x touchscreen controller
+Ilitek ILI210x/ILI2117/ILI2120/ILI251x touchscreen controller

 Required properties:
 - compatible:
    ilitek,ili210x for ILI210x
    ilitek,ili2117 for ILI2117
+    ilitek,ili2120 for ILI2120
    ilitek,ili251x for ILI251x

 - reg: The I2C address of the device
--- a/Documentation/driver-api/ipmb.rst
+++ b/Documentation/driver-api/ipmb.rst
@ -71,9 +71,13 @@ b) Example for device tree::
            ipmb@10 {
                    compatible = "ipmb-dev";
                    reg = <0x10>;
+                    i2c-protocol;
            };
     };

+If xmit of data to be done using raw i2c block vs smbus
+then "i2c-protocol" needs to be defined as above.
+
 2) Manually from Linux::

     modprobe ipmb-dev-int
--- a/Documentation/virtual/guest-halt-polling.txt
+++ b/Documentation/virtual/guest-halt-polling.txt
@ -1,9 +1,11 @@
+==================
 Guest halt polling
 ==================

 The cpuidle_haltpoll driver, with the haltpoll governor, allows
 the guest vcpus to poll for a specified amount of time before
 halting.
+
 This provides the following benefits to host side polling:

 	1) The POLL flag is set while polling is performed, which allows
@ -29,18 +31,21 @@ Module Parameters
 The haltpoll governor has 5 tunable module parameters:

 1) guest_halt_poll_ns:
+
 Maximum amount of time, in nanoseconds, that polling is
 performed before halting.

 Default: 200000

 2) guest_halt_poll_shrink:
+
 Division factor used to shrink per-cpu guest_halt_poll_ns when
 wakeup event occurs after the global guest_halt_poll_ns.

 Default: 2

 3) guest_halt_poll_grow:
+
 Multiplication factor used to grow per-cpu guest_halt_poll_ns
 when event occurs after per-cpu guest_halt_poll_ns
 but before global guest_halt_poll_ns.
@ -48,6 +53,7 @@ but before global guest_halt_poll_ns.
 Default: 2

 4) guest_halt_poll_grow_start:
+
 The per-cpu guest_halt_poll_ns eventually reaches zero
 in case of an idle system. This value sets the initial
 per-cpu guest_halt_poll_ns when growing. This can
@ -66,7 +72,7 @@ high once achieves global guest_halt_poll_ns value).

 Default: Y

-The module parameters can be set from the debugfs files in:
+The module parameters can be set from the debugfs files in::

 	/sys/module/haltpoll/parameters/

@ -74,5 +80,5 @@ Further Notes
 =============

 - Care should be taken when setting the guest_halt_poll_ns parameter as a
-large value has the potential to drive the cpu usage to 100% on a machine which
-would be almost entirely idle otherwise.
+  large value has the potential to drive the cpu usage to 100% on a machine
+  which would be almost entirely idle otherwise.
--- a/Documentation/virt/index.rst
+++ b/Documentation/virt/index.rst
@ -8,7 +8,9 @@ Linux Virtualization Support
   :maxdepth: 2

   kvm/index
+   uml/user_mode_linux
   paravirt_ops
+   guest-halt-polling

 .. only:: html and subproject

--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
--- a/Documentation/virt/kvm/arm/hyp-abi.rst
+++ b/Documentation/virt/kvm/arm/hyp-abi.rst
@ -1,4 +1,8 @@
-* Internal ABI between the kernel and HYP
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================
+Internal ABI between the kernel and HYP
+=======================================

 This file documents the interaction between the Linux kernel and the
 hypervisor layer when running Linux as a hypervisor (for example
@ -19,25 +23,31 @@ and only act on individual CPUs.
 Unless specified otherwise, any built-in hypervisor must implement
 these functions (see arch/arm{,64}/include/asm/virt.h):

-* r0/x0 = HVC_SET_VECTORS
-  r1/x1 = vectors
+* ::
+
+    r0/x0 = HVC_SET_VECTORS
+    r1/x1 = vectors

  Set HVBAR/VBAR_EL2 to 'vectors' to enable a hypervisor. 'vectors'
  must be a physical address, and respect the alignment requirements
  of the architecture. Only implemented by the initial stubs, not by
  Linux hypervisors.

-* r0/x0 = HVC_RESET_VECTORS
+* ::
+
+    r0/x0 = HVC_RESET_VECTORS

  Turn HYP/EL2 MMU off, and reset HVBAR/VBAR_EL2 to the initials
  stubs' exception vector value. This effectively disables an existing
  hypervisor.

-* r0/x0 = HVC_SOFT_RESTART
-  r1/x1 = restart address
-  x2 = x0's value when entering the next payload (arm64)
-  x3 = x1's value when entering the next payload (arm64)
-  x4 = x2's value when entering the next payload (arm64)
+* ::
+
+    r0/x0 = HVC_SOFT_RESTART
+    r1/x1 = restart address
+    x2 = x0's value when entering the next payload (arm64)
+    x3 = x1's value when entering the next payload (arm64)
+    x4 = x2's value when entering the next payload (arm64)

  Mask all exceptions, disable the MMU, move the arguments into place
  (arm64 only), and jump to the restart address while at HYP/EL2. This
--- a/Documentation/virt/kvm/arm/index.rst
+++ b/Documentation/virt/kvm/arm/index.rst
@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===
+ARM
+===
+
+.. toctree::
+   :maxdepth: 2
+
+   hyp-abi
+   psci
+   pvtime
--- a/Documentation/virt/kvm/arm/psci.rst
+++ b/Documentation/virt/kvm/arm/psci.rst
@ -1,3 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+Power State Coordination Interface (PSCI)
+=========================================
+
 KVM implements the PSCI (Power State Coordination Interface)
 specification in order to provide services such as CPU on/off, reset
 and power-off to the guest.
@ -30,32 +36,42 @@ The following register is defined:
  - Affects the whole VM (even if the register view is per-vcpu)

 * KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
-  Holds the state of the firmware support to mitigate CVE-2017-5715, as
-  offered by KVM to the guest via a HVC call. The workaround is described
-  under SMCCC_ARCH_WORKAROUND_1 in [1].
+    Holds the state of the firmware support to mitigate CVE-2017-5715, as
+    offered by KVM to the guest via a HVC call. The workaround is described
+    under SMCCC_ARCH_WORKAROUND_1 in [1].
+
  Accepted values are:
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer
+
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL:
+      KVM does not offer
      firmware support for the workaround. The mitigation status for the
      guest is unknown.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL:
+      The workaround HVC call is
      available to the guest and required for the mitigation.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED:
+      The workaround HVC call
      is available to the guest, but it is not needed on this VCPU.

 * KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
-  Holds the state of the firmware support to mitigate CVE-2018-3639, as
-  offered by KVM to the guest via a HVC call. The workaround is described
-  under SMCCC_ARCH_WORKAROUND_2 in [1].
+    Holds the state of the firmware support to mitigate CVE-2018-3639, as
+    offered by KVM to the guest via a HVC call. The workaround is described
+    under SMCCC_ARCH_WORKAROUND_2 in [1]_.
+
  Accepted values are:
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not
+
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
+      A workaround is not
      available. KVM does not offer firmware support for the workaround.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
+      The workaround state is
      unknown. KVM does not offer firmware support for the workaround.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available,
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
+      The workaround is available,
      and can be disabled by a vCPU. If
      KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
      this vCPU.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is
-      always active on this vCPU or it is not needed.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
+      The workaround is always active on this vCPU or it is not needed.

-[1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
+.. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
--- a/Documentation/virt/kvm/devices/arm-vgic-its.rst
+++ b/Documentation/virt/kvm/devices/arm-vgic-its.rst
@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================
 ARM Virtual Interrupt Translation Service (ITS)
 ===============================================

@ -12,22 +15,32 @@ There can be multiple ITS controllers per guest, each of them has to have
 a separate, non-overlapping MMIO region.


-Groups:
-  KVM_DEV_ARM_VGIC_GRP_ADDR
+Groups
+======
+
+KVM_DEV_ARM_VGIC_GRP_ADDR
+-------------------------
+
  Attributes:
    KVM_VGIC_ITS_ADDR_TYPE (rw, 64-bit)
      Base address in the guest physical address space of the GICv3 ITS
      control register frame.
      This address needs to be 64K aligned and the region covers 128K.
+
  Errors:
-    -E2BIG:  Address outside of addressable IPA range
-    -EINVAL: Incorrectly aligned address
-    -EEXIST: Address already configured
-    -EFAULT: Invalid user pointer for attr->addr.
-    -ENODEV: Incorrect attribute or the ITS is not supported.
+
+    =======  =================================================
+    -E2BIG   Address outside of addressable IPA range
+    -EINVAL  Incorrectly aligned address
+    -EEXIST  Address already configured
+    -EFAULT  Invalid user pointer for attr->addr.
+    -ENODEV  Incorrect attribute or the ITS is not supported.
+    =======  =================================================


-  KVM_DEV_ARM_VGIC_GRP_CTRL
+KVM_DEV_ARM_VGIC_GRP_CTRL
+-------------------------
+
  Attributes:
    KVM_DEV_ARM_VGIC_CTRL_INIT
      request the initialization of the ITS, no additional parameter in
@ -58,16 +71,21 @@ Groups:
      "ITS Restore Sequence".

  Errors:
-    -ENXIO:  ITS not properly configured as required prior to setting
-             this attribute
-    -ENOMEM: Memory shortage when allocating ITS internal data
-    -EINVAL: Inconsistent restored data
-    -EFAULT: Invalid guest ram access
-    -EBUSY:  One or more VCPUS are running
-    -EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the
-	     state is not available

-  KVM_DEV_ARM_VGIC_GRP_ITS_REGS
+    =======  ==========================================================
+     -ENXIO  ITS not properly configured as required prior to setting
+             this attribute
+    -ENOMEM  Memory shortage when allocating ITS internal data
+    -EINVAL  Inconsistent restored data
+    -EFAULT  Invalid guest ram access
+    -EBUSY   One or more VCPUS are running
+    -EACCES  The virtual ITS is backed by a physical GICv4 ITS, and the
+	     state is not available
+    =======  ==========================================================
+
+KVM_DEV_ARM_VGIC_GRP_ITS_REGS
+-----------------------------
+
  Attributes:
      The attr field of kvm_device_attr encodes the offset of the
      ITS register, relative to the ITS control frame base address
@ -78,6 +96,7 @@ Groups:
      be accessed with full length.

      Writes to read-only registers are ignored by the kernel except for:
+
      - GITS_CREADR. It must be restored otherwise commands in the queue
        will be re-executed after restoring CWRITER. GITS_CREADR must be
        restored before restoring the GITS_CTLR which is likely to enable the
@ -91,30 +110,36 @@ Groups:

      For other registers, getting or setting a register has the same
      effect as reading/writing the register on real hardware.
-  Errors:
-    -ENXIO: Offset does not correspond to any supported register
-    -EFAULT: Invalid user pointer for attr->addr
-    -EINVAL: Offset is not 64-bit aligned
-    -EBUSY: one or more VCPUS are running

- ITS Restore Sequence:
- -------------------------
+  Errors:
+
+    =======  ====================================================
+    -ENXIO   Offset does not correspond to any supported register
+    -EFAULT  Invalid user pointer for attr->addr
+    -EINVAL  Offset is not 64-bit aligned
+    -EBUSY   one or more VCPUS are running
+    =======  ====================================================
+
+ITS Restore Sequence:
+---------------------

 The following ordering must be followed when restoring the GIC and the ITS:
+
 a) restore all guest memory and create vcpus
 b) restore all redistributors
 c) provide the ITS base address
   (KVM_DEV_ARM_VGIC_GRP_ADDR)
 d) restore the ITS in the following order:
-   1. Restore GITS_CBASER
-   2. Restore all other GITS_ registers, except GITS_CTLR!
-   3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
-   4. Restore GITS_CTLR
+
+     1. Restore GITS_CBASER
+     2. Restore all other ``GITS_`` registers, except GITS_CTLR!
+     3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
+     4. Restore GITS_CTLR

 Then vcpus can be started.

- ITS Table ABI REV0:
- -------------------
+ITS Table ABI REV0:
+-------------------

 Revision 0 of the ABI only supports the features of a virtual GICv3, and does
 not support a virtual GICv4 with support for direct injection of virtual
@ -125,12 +150,13 @@ Then vcpus can be started.
 entries in the collection are listed in no particular order.
 All entries are 8 bytes.

- Device Table Entry (DTE):
+ Device Table Entry (DTE)::

- bits:     | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
- values:   | V |   next    | ITT_addr |  Size   |
+   bits:     | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
+   values:   | V |   next    | ITT_addr |  Size   |
+
+ where:

- where;
 - V indicates whether the entry is valid. If not, other fields
   are not meaningful.
 - next: equals to 0 if this entry is the last one; otherwise it
@ -140,32 +166,34 @@ Then vcpus can be started.
 - Size specifies the supported number of bits for the EventID,
   minus one

- Collection Table Entry (CTE):
+ Collection Table Entry (CTE)::

- bits:     | 63| 62 ..  52  | 51 ... 16 | 15  ...   0 |
- values:   | V |    RES0    |  RDBase   |    ICID     |
+   bits:     | 63| 62 ..  52  | 51 ... 16 | 15  ...   0 |
+   values:   | V |    RES0    |  RDBase   |    ICID     |

 where:
+
 - V indicates whether the entry is valid. If not, other fields are
   not meaningful.
 - RES0: reserved field with Should-Be-Zero-or-Preserved behavior.
 - RDBase is the PE number (GICR_TYPER.Processor_Number semantic),
 - ICID is the collection ID

- Interrupt Translation Entry (ITE):
+ Interrupt Translation Entry (ITE)::

- bits:     | 63 ... 48 | 47 ... 16 | 15 ... 0 |
- values:   |    next   |   pINTID  |  ICID    |
+   bits:     | 63 ... 48 | 47 ... 16 | 15 ... 0 |
+   values:   |    next   |   pINTID  |  ICID    |

 where:
+
 - next: equals to 0 if this entry is the last one; otherwise it corresponds
   to the EventID offset to the next ITE capped by 2^16 -1.
 - pINTID is the physical LPI ID; if zero, it means the entry is not valid
   and other fields are not meaningful.
 - ICID is the collection ID

- ITS Reset State:
- ----------------
+ITS Reset State:
+----------------

 RESET returns the ITS to the same state that it was when first created and
 initialized. When the RESET command returns, the following things are
--- a/Documentation/virt/kvm/devices/arm-vgic-v3.rst
+++ b/Documentation/virt/kvm/devices/arm-vgic-v3.rst
@ -1,9 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================================================
 ARM Virtual Generic Interrupt Controller v3 and later (VGICv3)
 ==============================================================


 Device types supported:
-  KVM_DEV_TYPE_ARM_VGIC_V3     ARM Generic Interrupt Controller v3.0
+  - KVM_DEV_TYPE_ARM_VGIC_V3     ARM Generic Interrupt Controller v3.0

 Only one VGIC instance may be instantiated through this API.  The created VGIC
 will act as the VM interrupt controller, requiring emulated user-space devices
@ -15,7 +18,8 @@ Creating a guest GICv3 device requires a host GICv3 as well.

 Groups:
  KVM_DEV_ARM_VGIC_GRP_ADDR
-  Attributes:
+   Attributes:
+
    KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit)
      Base address in the guest physical address space of the GICv3 distributor
      register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
@ -29,21 +33,25 @@ Groups:
      This address needs to be 64K aligned.

    KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION (rw, 64-bit)
-      The attribute data pointed to by kvm_device_attr.addr is a __u64 value:
-      bits:     | 63   ....  52  |  51   ....   16 | 15 - 12  |11 - 0
-      values:   |     count      |       base      |  flags   | index
+      The attribute data pointed to by kvm_device_attr.addr is a __u64 value::
+
+        bits:     | 63   ....  52  |  51   ....   16 | 15 - 12  |11 - 0
+        values:   |     count      |       base      |  flags   | index
+
      - index encodes the unique redistributor region index
      - flags: reserved for future use, currently 0
      - base field encodes bits [51:16] of the guest physical base address
        of the first redistributor in the region.
      - count encodes the number of redistributors in the region. Must be
        greater than 0.
+
      There are two 64K pages for each redistributor in the region and
      redistributors are laid out contiguously within the region. Regions
      are filled with redistributors in the index order. The sum of all
      region count fields must be greater than or equal to the number of
      VCPUs. Redistributor regions must be registered in the incremental
      index order, starting from index 0.
+
      The characteristics of a specific redistributor region can be read
      by presetting the index field in the attr data.
      Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
@ -52,23 +60,27 @@ Groups:
  KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes.

  Errors:
-    -E2BIG:  Address outside of addressable IPA range
-    -EINVAL: Incorrectly aligned address, bad redistributor region
+
+    =======  =============================================================
+    -E2BIG   Address outside of addressable IPA range
+    -EINVAL  Incorrectly aligned address, bad redistributor region
             count/index, mixed redistributor region attribute usage
-    -EEXIST: Address already configured
-    -ENOENT: Attempt to read the characteristics of a non existing
+    -EEXIST  Address already configured
+    -ENOENT  Attempt to read the characteristics of a non existing
             redistributor region
-    -ENXIO:  The group or attribute is unknown/unsupported for this device
+    -ENXIO   The group or attribute is unknown/unsupported for this device
             or hardware support is missing.
-    -EFAULT: Invalid user pointer for attr->addr.
+    -EFAULT  Invalid user pointer for attr->addr.
+    =======  =============================================================


-  KVM_DEV_ARM_VGIC_GRP_DIST_REGS
-  KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
-  Attributes:
-    The attr field of kvm_device_attr encodes two values:
-    bits:     | 63   ....  32  |  31   ....    0 |
-    values:   |      mpidr     |      offset     |
+  KVM_DEV_ARM_VGIC_GRP_DIST_REGS, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
+   Attributes:
+
+    The attr field of kvm_device_attr encodes two values::
+
+      bits:     | 63   ....  32  |  31   ....    0 |
+      values:   |      mpidr     |      offset     |

    All distributor regs are (rw, 32-bit) and kvm_device_attr.addr points to a
    __u32 value.  64-bit registers must be accessed by separately accessing the
@ -93,7 +105,8 @@ Groups:
    redistributor is accessed.  The mpidr is ignored for the distributor.

    The mpidr encoding is based on the affinity information in the
-    architecture defined MPIDR, and the field is encoded as follows:
+    architecture defined MPIDR, and the field is encoded as follows::
+
      | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
      |    Aff3    |    Aff2    |    Aff1    |    Aff0    |

@ -148,24 +161,30 @@ Groups:
    ignored.

  Errors:
-    -ENXIO: Getting or setting this register is not yet supported
-    -EBUSY: One or more VCPUs are running
+
+    ======  =====================================================
+    -ENXIO  Getting or setting this register is not yet supported
+    -EBUSY  One or more VCPUs are running
+    ======  =====================================================


  KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS
-  Attributes:
-    The attr field of kvm_device_attr encodes two values:
-    bits:     | 63      ....       32 | 31  ....  16 | 15  ....  0 |
-    values:   |         mpidr         |      RES     |    instr    |
+   Attributes:
+
+    The attr field of kvm_device_attr encodes two values::
+
+      bits:     | 63      ....       32 | 31  ....  16 | 15  ....  0 |
+      values:   |         mpidr         |      RES     |    instr    |

    The mpidr field encodes the CPU ID based on the affinity information in the
-    architecture defined MPIDR, and the field is encoded as follows:
+    architecture defined MPIDR, and the field is encoded as follows::
+
      | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
      |    Aff3    |    Aff2    |    Aff1    |    Aff0    |

    The instr field encodes the system register to access based on the fields
    defined in the A64 instruction set encoding for system register access
-    (RES means the bits are reserved for future use and should be zero):
+    (RES means the bits are reserved for future use and should be zero)::

      | 15 ... 14 | 13 ... 11 | 10 ... 7 | 6 ... 3 | 2 ... 0 |
      |   Op 0    |    Op1    |    CRn   |   CRm   |   Op2   |
@ -178,26 +197,35 @@ Groups:

    CPU interface registers access is not implemented for AArch32 mode.
    Error -ENXIO is returned when accessed in AArch32 mode.
+
  Errors:
-    -ENXIO: Getting or setting this register is not yet supported
-    -EBUSY: VCPU is running
-    -EINVAL: Invalid mpidr or register value supplied
+
+    =======  =====================================================
+    -ENXIO   Getting or setting this register is not yet supported
+    -EBUSY   VCPU is running
+    -EINVAL  Invalid mpidr or register value supplied
+    =======  =====================================================


  KVM_DEV_ARM_VGIC_GRP_NR_IRQS
-  Attributes:
+   Attributes:
+
    A value describing the number of interrupts (SGI, PPI and SPI) for
    this GIC instance, ranging from 64 to 1024, in increments of 32.

    kvm_device_attr.addr points to a __u32 value.

  Errors:
-    -EINVAL: Value set is out of the expected range
-    -EBUSY: Value has already be set.
+
+    =======  ======================================
+    -EINVAL  Value set is out of the expected range
+    -EBUSY   Value has already be set.
+    =======  ======================================


  KVM_DEV_ARM_VGIC_GRP_CTRL
-  Attributes:
+   Attributes:
+
    KVM_DEV_ARM_VGIC_CTRL_INIT
      request the initialization of the VGIC, no additional parameter in
      kvm_device_attr.addr.
@ -205,20 +233,26 @@ Groups:
      save all LPI pending bits into guest RAM pending tables.

      The first kB of the pending table is not altered by this operation.
+
  Errors:
-    -ENXIO: VGIC not properly configured as required prior to calling
-     this attribute
-    -ENODEV: no online VCPU
-    -ENOMEM: memory shortage when allocating vgic internal data
-    -EFAULT: Invalid guest ram access
-    -EBUSY:  One or more VCPUS are running
+
+    =======  ========================================================
+    -ENXIO   VGIC not properly configured as required prior to calling
+             this attribute
+    -ENODEV  no online VCPU
+    -ENOMEM  memory shortage when allocating vgic internal data
+    -EFAULT  Invalid guest ram access
+    -EBUSY   One or more VCPUS are running
+    =======  ========================================================


  KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
-  Attributes:
-    The attr field of kvm_device_attr encodes the following values:
-    bits:     | 63      ....       32 | 31   ....    10 | 9  ....  0 |
-    values:   |         mpidr         |      info       |   vINTID   |
+   Attributes:
+
+    The attr field of kvm_device_attr encodes the following values::
+
+      bits:     | 63      ....       32 | 31   ....    10 | 9  ....  0 |
+      values:   |         mpidr         |      info       |   vINTID   |

    The vINTID specifies which set of IRQs is reported on.

@ -228,6 +262,7 @@ Groups:
      VGIC_LEVEL_INFO_LINE_LEVEL:
 	Get/Set the input level of the IRQ line for a set of 32 contiguously
 	numbered interrupts.
+
 	vINTID must be a multiple of 32.

 	kvm_device_attr.addr points to a __u32 value which will contain a
@ -243,9 +278,14 @@ Groups:
    reported with the same value regardless of the mpidr specified.

    The mpidr field encodes the CPU ID based on the affinity information in the
-    architecture defined MPIDR, and the field is encoded as follows:
+    architecture defined MPIDR, and the field is encoded as follows::
+
      | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
      |    Aff3    |    Aff2    |    Aff1    |    Aff0    |
+
  Errors:
-    -EINVAL: vINTID is not multiple of 32 or
-     info field is not VGIC_LEVEL_INFO_LINE_LEVEL
+
+    =======  =============================================
+    -EINVAL  vINTID is not multiple of 32 or info field is
+	     not VGIC_LEVEL_INFO_LINE_LEVEL
+    =======  =============================================
--- a/Documentation/virt/kvm/devices/arm-vgic.rst
+++ b/Documentation/virt/kvm/devices/arm-vgic.rst
@ -1,8 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================================================
 ARM Virtual Generic Interrupt Controller v2 (VGIC)
 ==================================================

 Device types supported:
-  KVM_DEV_TYPE_ARM_VGIC_V2     ARM Generic Interrupt Controller v2.0
+
+  - KVM_DEV_TYPE_ARM_VGIC_V2     ARM Generic Interrupt Controller v2.0

 Only one VGIC instance may be instantiated through either this API or the
 legacy KVM_CREATE_IRQCHIP API.  The created VGIC will act as the VM interrupt
@ -17,7 +21,8 @@ create both a GICv3 and GICv2 device on the same VM.

 Groups:
  KVM_DEV_ARM_VGIC_GRP_ADDR
-  Attributes:
+   Attributes:
+
    KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
      Base address in the guest physical address space of the GIC distributor
      register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
@ -27,19 +32,25 @@ Groups:
      Base address in the guest physical address space of the GIC virtual cpu
      interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
      This address needs to be 4K aligned and the region covers 4 KByte.
+
  Errors:
-    -E2BIG:  Address outside of addressable IPA range
-    -EINVAL: Incorrectly aligned address
-    -EEXIST: Address already configured
-    -ENXIO:  The group or attribute is unknown/unsupported for this device
+
+    =======  =============================================================
+    -E2BIG   Address outside of addressable IPA range
+    -EINVAL  Incorrectly aligned address
+    -EEXIST  Address already configured
+    -ENXIO   The group or attribute is unknown/unsupported for this device
             or hardware support is missing.
-    -EFAULT: Invalid user pointer for attr->addr.
+    -EFAULT  Invalid user pointer for attr->addr.
+    =======  =============================================================

  KVM_DEV_ARM_VGIC_GRP_DIST_REGS
-  Attributes:
-    The attr field of kvm_device_attr encodes two values:
-    bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
-    values:   |    reserved   | vcpu_index |      offset     |
+   Attributes:
+
+    The attr field of kvm_device_attr encodes two values::
+
+      bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
+      values:   |    reserved   | vcpu_index |      offset     |

    All distributor regs are (rw, 32-bit)

@ -58,16 +69,22 @@ Groups:
    KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure
    the expected behavior. Unless GICD_IIDR has been set from userspace, writes
    to the interrupt group registers (GICD_IGROUPR) are ignored.
+
  Errors:
-    -ENXIO: Getting or setting this register is not yet supported
-    -EBUSY: One or more VCPUs are running
-    -EINVAL: Invalid vcpu_index supplied
+
+    =======  =====================================================
+    -ENXIO   Getting or setting this register is not yet supported
+    -EBUSY   One or more VCPUs are running
+    -EINVAL  Invalid vcpu_index supplied
+    =======  =====================================================

  KVM_DEV_ARM_VGIC_GRP_CPU_REGS
-  Attributes:
-    The attr field of kvm_device_attr encodes two values:
-    bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
-    values:   |    reserved   | vcpu_index |      offset     |
+   Attributes:
+
+    The attr field of kvm_device_attr encodes two values::
+
+      bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
+      values:   |    reserved   | vcpu_index |      offset     |

    All CPU interface regs are (rw, 32-bit)

@ -101,27 +118,39 @@ Groups:
    value left by 3 places to obtain the actual priority mask level.

  Errors:
-    -ENXIO: Getting or setting this register is not yet supported
-    -EBUSY: One or more VCPUs are running
-    -EINVAL: Invalid vcpu_index supplied
+
+    =======  =====================================================
+    -ENXIO   Getting or setting this register is not yet supported
+    -EBUSY   One or more VCPUs are running
+    -EINVAL  Invalid vcpu_index supplied
+    =======  =====================================================

  KVM_DEV_ARM_VGIC_GRP_NR_IRQS
-  Attributes:
+   Attributes:
+
    A value describing the number of interrupts (SGI, PPI and SPI) for
    this GIC instance, ranging from 64 to 1024, in increments of 32.

  Errors:
-    -EINVAL: Value set is out of the expected range
-    -EBUSY: Value has already be set, or GIC has already been initialized
-            with default values.
+
+    =======  =============================================================
+    -EINVAL  Value set is out of the expected range
+    -EBUSY   Value has already be set, or GIC has already been initialized
+             with default values.
+    =======  =============================================================

  KVM_DEV_ARM_VGIC_GRP_CTRL
-  Attributes:
+   Attributes:
+
    KVM_DEV_ARM_VGIC_CTRL_INIT
      request the initialization of the VGIC or ITS, no additional parameter
      in kvm_device_attr.addr.
+
  Errors:
-    -ENXIO: VGIC not properly configured as required prior to calling
-     this attribute
-    -ENODEV: no online VCPU
-    -ENOMEM: memory shortage when allocating vgic internal data
+
+    =======  =========================================================
+    -ENXIO   VGIC not properly configured as required prior to calling
+             this attribute
+    -ENODEV  no online VCPU
+    -ENOMEM  memory shortage when allocating vgic internal data
+    =======  =========================================================
--- a/Documentation/virt/kvm/devices/index.rst
+++ b/Documentation/virt/kvm/devices/index.rst
@ -0,0 +1,19 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+Devices
+=======
+
+.. toctree::
+   :maxdepth: 2
+
+   arm-vgic-its
+   arm-vgic
+   arm-vgic-v3
+   mpic
+   s390_flic
+   vcpu
+   vfio
+   vm
+   xics
+   xive
--- a/Documentation/virt/kvm/devices/mpic.rst
+++ b/Documentation/virt/kvm/devices/mpic.rst
@ -1,9 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
 MPIC interrupt controller
 =========================

 Device types supported:
-  KVM_DEV_TYPE_FSL_MPIC_20     Freescale MPIC v2.0
-  KVM_DEV_TYPE_FSL_MPIC_42     Freescale MPIC v4.2
+
+  - KVM_DEV_TYPE_FSL_MPIC_20     Freescale MPIC v2.0
+  - KVM_DEV_TYPE_FSL_MPIC_42     Freescale MPIC v4.2

 Only one MPIC instance, of any type, may be instantiated.  The created
 MPIC will act as the system interrupt controller, connecting to each
@ -11,7 +15,8 @@ vcpu's interrupt inputs.

 Groups:
  KVM_DEV_MPIC_GRP_MISC
-  Attributes:
+   Attributes:
+
    KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
      Base address of the 256 KiB MPIC register space.  Must be
      naturally aligned.  A value of zero disables the mapping.
--- a/Documentation/virt/kvm/devices/s390_flic.rst
+++ b/Documentation/virt/kvm/devices/s390_flic.rst
@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================
 FLIC (floating interrupt controller)
 ====================================

@ -31,8 +34,10 @@ Groups:
    Copies all floating interrupts into a buffer provided by userspace.
    When the buffer is too small it returns -ENOMEM, which is the indication
    for userspace to try again with a bigger buffer.
+
    -ENOBUFS is returned when the allocation of a kernelspace buffer has
    failed.
+
    -EFAULT is returned when copying data to userspace failed.
    All interrupts remain pending, i.e. are not deleted from the list of
    currently pending interrupts.
@ -60,38 +65,41 @@ Groups:

  KVM_DEV_FLIC_ADAPTER_REGISTER
    Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter
-    describing the adapter to register:
+    describing the adapter to register::

-struct kvm_s390_io_adapter {
-	__u32 id;
-	__u8 isc;
-	__u8 maskable;
-	__u8 swap;
-	__u8 flags;
-};
+	struct kvm_s390_io_adapter {
+		__u32 id;
+		__u8 isc;
+		__u8 maskable;
+		__u8 swap;
+		__u8 flags;
+	};

   id contains the unique id for the adapter, isc the I/O interruption subclass
   to use, maskable whether this adapter may be masked (interrupts turned off),
   swap whether the indicators need to be byte swapped, and flags contains
   further characteristics of the adapter.
+
   Currently defined values for 'flags' are:
+
   - KVM_S390_ADAPTER_SUPPRESSIBLE: adapter is subject to AIS
     (adapter-interrupt-suppression) facility. This flag only has an effect if
     the AIS capability is enabled.
+
   Unknown flag values are ignored.


  KVM_DEV_FLIC_ADAPTER_MODIFY
    Modifies attributes of an existing I/O adapter interrupt source. Takes
-    a kvm_s390_io_adapter_req specifying the adapter and the operation:
+    a kvm_s390_io_adapter_req specifying the adapter and the operation::

-struct kvm_s390_io_adapter_req {
-	__u32 id;
-	__u8 type;
-	__u8 mask;
-	__u16 pad0;
-	__u64 addr;
-};
+	struct kvm_s390_io_adapter_req {
+		__u32 id;
+		__u8 type;
+		__u8 mask;
+		__u16 pad0;
+		__u64 addr;
+	};

    id specifies the adapter and type the operation. The supported operations
    are:
@ -103,8 +111,9 @@ struct kvm_s390_io_adapter_req {
      perform a gmap translation for the guest address provided in addr,
      pin a userspace page for the translated address and add it to the
      list of mappings
-      Note: A new mapping will be created unconditionally; therefore,
-            the calling code should avoid making duplicate mappings.
+
+      .. note:: A new mapping will be created unconditionally; therefore,
+	        the calling code should avoid making duplicate mappings.

    KVM_S390_IO_ADAPTER_UNMAP
      release a userspace page for the translated address specified in addr
@ -112,16 +121,17 @@ struct kvm_s390_io_adapter_req {

  KVM_DEV_FLIC_AISM
    modify the adapter-interruption-suppression mode for a given isc if the
-    AIS capability is enabled. Takes a kvm_s390_ais_req describing:
+    AIS capability is enabled. Takes a kvm_s390_ais_req describing::

-struct kvm_s390_ais_req {
-	__u8 isc;
-	__u16 mode;
-};
+	struct kvm_s390_ais_req {
+		__u8 isc;
+		__u16 mode;
+	};

    isc contains the target I/O interruption subclass, mode the target
    adapter-interruption-suppression mode. The following modes are
    currently supported:
+
    - KVM_S390_AIS_MODE_ALL: ALL-Interruptions Mode, i.e. airq injection
      is always allowed;
    - KVM_S390_AIS_MODE_SINGLE: SINGLE-Interruption Mode, i.e. airq
@ -139,12 +149,12 @@ struct kvm_s390_ais_req {

  KVM_DEV_FLIC_AISM_ALL
    Gets or sets the adapter-interruption-suppression mode for all ISCs. Takes
-    a kvm_s390_ais_all describing:
+    a kvm_s390_ais_all describing::

-struct kvm_s390_ais_all {
-       __u8 simm; /* Single-Interruption-Mode mask */
-       __u8 nimm; /* No-Interruption-Mode mask *
-};
+	struct kvm_s390_ais_all {
+	       __u8 simm; /* Single-Interruption-Mode mask */
+	       __u8 nimm; /* No-Interruption-Mode mask *
+	};

    simm contains Single-Interruption-Mode mask for all ISCs, nimm contains
    No-Interruption-Mode mask for all ISCs. Each bit in simm and nimm corresponds
@ -159,5 +169,5 @@ ENXIO, as specified in the API documentation). It is not possible to conclude
 that a FLIC operation is unavailable based on the error code resulting from a
 usage attempt.

-Note: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a zero
-schid is specified.
+.. note:: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a
+	  zero schid is specified.
--- a/Documentation/virt/kvm/devices/vcpu.rst
+++ b/Documentation/virt/kvm/devices/vcpu.rst
@ -0,0 +1,114 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+Generic vcpu interface
+======================
+
+The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
+KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
+kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
+
+The groups and attributes per virtual cpu, if any, are architecture specific.
+
+1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
+==================================
+
+:Architectures: ARM64
+
+1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
+---------------------------------------
+
+:Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
+	     pointer to an int
+
+Returns:
+
+	 =======  ========================================================
+	 -EBUSY   The PMU overflow interrupt is already set
+	 -ENXIO   The overflow interrupt not set when attempting to get it
+	 -ENODEV  PMUv3 not supported
+	 -EINVAL  Invalid PMU overflow interrupt number supplied or
+		  trying to set the IRQ number without using an in-kernel
+		  irqchip.
+	 =======  ========================================================
+
+A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
+number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
+type must be same for each vcpu. As a PPI, the interrupt number is the same for
+all vcpus, while as an SPI it must be a separate number per vcpu.
+
+1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
+---------------------------------------
+
+:Parameters: no additional parameter in kvm_device_attr.addr
+
+Returns:
+
+	 =======  ======================================================
+	 -ENODEV  PMUv3 not supported or GIC not initialized
+	 -ENXIO   PMUv3 not properly configured or in-kernel irqchip not
+		  configured as required prior to calling this attribute
+	 -EBUSY   PMUv3 already initialized
+	 =======  ======================================================
+
+Request the initialization of the PMUv3.  If using the PMUv3 with an in-kernel
+virtual GIC implementation, this must be done after initializing the in-kernel
+irqchip.
+
+
+2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
+=================================
+
+:Architectures: ARM, ARM64
+
+2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_VTIMER, KVM_ARM_VCPU_TIMER_IRQ_PTIMER
+-----------------------------------------------------------------------------
+
+:Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
+	     pointer to an int
+
+Returns:
+
+	 =======  =================================
+	 -EINVAL  Invalid timer interrupt number
+	 -EBUSY   One or more VCPUs has already run
+	 =======  =================================
+
+A value describing the architected timer interrupt number when connected to an
+in-kernel virtual GIC.  These must be a PPI (16 <= intid < 32).  Setting the
+attribute overrides the default values (see below).
+
+=============================  ==========================================
+KVM_ARM_VCPU_TIMER_IRQ_VTIMER  The EL1 virtual timer intid (default: 27)
+KVM_ARM_VCPU_TIMER_IRQ_PTIMER  The EL1 physical timer intid (default: 30)
+=============================  ==========================================
+
+Setting the same PPI for different timers will prevent the VCPUs from running.
+Setting the interrupt number on a VCPU configures all VCPUs created at that
+time to use the number provided for a given timer, overwriting any previously
+configured values on other VCPUs.  Userspace should configure the interrupt
+numbers on at least one VCPU after creating all VCPUs and before running any
+VCPUs.
+
+3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
+==================================
+
+:Architectures: ARM64
+
+3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
+--------------------------------------
+
+:Parameters: 64-bit base address
+
+Returns:
+
+	 =======  ======================================
+	 -ENXIO   Stolen time not implemented
+	 -EEXIST  Base address already set for this VCPU
+	 -EINVAL  Base address not 64 byte aligned
+	 =======  ======================================
+
+Specifies the base address of the stolen time structure for this VCPU. The
+base address must be 64 byte aligned and exist within a valid guest memory
+region. See Documentation/virt/kvm/arm/pvtime.txt for more information
+including the layout of the stolen time structure.
--- a/Documentation/virt/kvm/devices/vcpu.txt
+++ b/Documentation/virt/kvm/devices/vcpu.txt
@ -1,76 +0,0 @@
-Generic vcpu interface
-====================================
-
-The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
-KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
-kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
-
-The groups and attributes per virtual cpu, if any, are architecture specific.
-
-1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
-Architectures: ARM64
-
-1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
-Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
-            pointer to an int
-Returns: -EBUSY: The PMU overflow interrupt is already set
-         -ENXIO: The overflow interrupt not set when attempting to get it
-         -ENODEV: PMUv3 not supported
-         -EINVAL: Invalid PMU overflow interrupt number supplied or
-                  trying to set the IRQ number without using an in-kernel
-                  irqchip.
-
-A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
-number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
-type must be same for each vcpu. As a PPI, the interrupt number is the same for
-all vcpus, while as an SPI it must be a separate number per vcpu.
-
-1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
-Parameters: no additional parameter in kvm_device_attr.addr
-Returns: -ENODEV: PMUv3 not supported or GIC not initialized
-         -ENXIO: PMUv3 not properly configured or in-kernel irqchip not
-                 configured as required prior to calling this attribute
-         -EBUSY: PMUv3 already initialized
-
-Request the initialization of the PMUv3.  If using the PMUv3 with an in-kernel
-virtual GIC implementation, this must be done after initializing the in-kernel
-irqchip.
-
-
-2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
-Architectures: ARM,ARM64
-
-2.1. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_VTIMER
-2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_PTIMER
-Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
-            pointer to an int
-Returns: -EINVAL: Invalid timer interrupt number
-         -EBUSY:  One or more VCPUs has already run
-
-A value describing the architected timer interrupt number when connected to an
-in-kernel virtual GIC.  These must be a PPI (16 <= intid < 32).  Setting the
-attribute overrides the default values (see below).
-
-KVM_ARM_VCPU_TIMER_IRQ_VTIMER: The EL1 virtual timer intid (default: 27)
-KVM_ARM_VCPU_TIMER_IRQ_PTIMER: The EL1 physical timer intid (default: 30)
-
-Setting the same PPI for different timers will prevent the VCPUs from running.
-Setting the interrupt number on a VCPU configures all VCPUs created at that
-time to use the number provided for a given timer, overwriting any previously
-configured values on other VCPUs.  Userspace should configure the interrupt
-numbers on at least one VCPU after creating all VCPUs and before running any
-VCPUs.
-
-3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
-Architectures: ARM64
-
-3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
-Parameters: 64-bit base address
-Returns: -ENXIO:  Stolen time not implemented
-         -EEXIST: Base address already set for this VCPU
-         -EINVAL: Base address not 64 byte aligned
-
-Specifies the base address of the stolen time structure for this VCPU. The
-base address must be 64 byte aligned and exist within a valid guest memory
-region. See Documentation/virt/kvm/arm/pvtime.txt for more information
-including the layout of the stolen time structure.
--- a/Documentation/virt/kvm/devices/vfio.rst
+++ b/Documentation/virt/kvm/devices/vfio.rst
@ -1,8 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
 VFIO virtual device
 ===================

 Device types supported:
-  KVM_DEV_TYPE_VFIO
+
+  - KVM_DEV_TYPE_VFIO

 Only one VFIO instance may be created per VM.  The created device
 tracks VFIO groups in use by the VM and features of those groups
@ -23,14 +27,15 @@ KVM_DEV_VFIO_GROUP attributes:
 	for the VFIO group.
  KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table
 	allocated by sPAPR KVM.
-	kvm_device_attr.addr points to a struct:
+	kvm_device_attr.addr points to a struct::

-	struct kvm_vfio_spapr_tce {
-		__s32	groupfd;
-		__s32	tablefd;
-	};
+		struct kvm_vfio_spapr_tce {
+			__s32	groupfd;
+			__s32	tablefd;
+		};

-	where
-	@groupfd is a file descriptor for a VFIO group;
-	@tablefd is a file descriptor for a TCE table allocated via
-		KVM_CREATE_SPAPR_TCE.
+	where:
+
+	- @groupfd is a file descriptor for a VFIO group;
+	- @tablefd is a file descriptor for a TCE table allocated via
+	  KVM_CREATE_SPAPR_TCE.
--- a/Documentation/virt/kvm/devices/vm.rst
+++ b/Documentation/virt/kvm/devices/vm.rst
@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
 Generic vm interface
-====================================
+====================

 The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
 KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
@ -10,30 +13,38 @@ The groups and attributes per virtual machine, if any, are architecture
 specific.

 1. GROUP: KVM_S390_VM_MEM_CTRL
-Architectures: s390
+==============================
+
+:Architectures: s390

 1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
-Parameters: none
-Returns: -EBUSY if a vcpu is already defined, otherwise 0
+-------------------------------------------
+
+:Parameters: none
+:Returns: -EBUSY if a vcpu is already defined, otherwise 0

 Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.

 1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
-Parameters: none
-Returns: -EINVAL if CMMA was not enabled
-         0 otherwise
+----------------------------------------
+
+:Parameters: none
+:Returns: -EINVAL if CMMA was not enabled;
+	  0 otherwise

 Clear the CMMA status for all guest pages, so any pages the guest marked
 as unused are again used any may not be reclaimed by the host.

 1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE
-Parameters: in attr->addr the address for the new limit of guest memory
-Returns: -EFAULT if the given address is not accessible
-         -EINVAL if the virtual machine is of type UCONTROL
-         -E2BIG if the given guest memory is to big for that machine
-         -EBUSY if a vcpu is already defined
-         -ENOMEM if not enough memory is available for a new shadow guest mapping
-          0 otherwise
+-----------------------------------------
+
+:Parameters: in attr->addr the address for the new limit of guest memory
+:Returns: -EFAULT if the given address is not accessible;
+	  -EINVAL if the virtual machine is of type UCONTROL;
+	  -E2BIG if the given guest memory is to big for that machine;
+	  -EBUSY if a vcpu is already defined;
+	  -ENOMEM if not enough memory is available for a new shadow guest mapping;
+	  0 otherwise.

 Allows userspace to query the actual limit and set a new limit for
 the maximum guest memory size. The limit will be rounded up to
@ -42,78 +53,92 @@ the number of page table levels. In the case that there is no limit we will set
 the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX).

 2. GROUP: KVM_S390_VM_CPU_MODEL
-Architectures: s390
+===============================
+
+:Architectures: s390

 2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o)
+---------------------------------------------

-Allows user space to retrieve machine and kvm specific cpu related information:
+Allows user space to retrieve machine and kvm specific cpu related information::

-struct kvm_s390_vm_cpu_machine {
+  struct kvm_s390_vm_cpu_machine {
       __u64 cpuid;           # CPUID of host
       __u32 ibc;             # IBC level range offered by host
       __u8  pad[4];
       __u64 fac_mask[256];   # set of cpu facilities enabled by KVM
       __u64 fac_list[256];   # set of cpu facilities offered by host
-}
+  }

-Parameters: address of buffer to store the machine related cpu data
-            of type struct kvm_s390_vm_cpu_machine*
-Returns:    -EFAULT if the given address is not accessible from kernel space
-	    -ENOMEM if not enough memory is available to process the ioctl
-	    0 in case of success
+:Parameters: address of buffer to store the machine related cpu data
+	     of type struct kvm_s390_vm_cpu_machine*
+:Returns:   -EFAULT if the given address is not accessible from kernel space;
+	    -ENOMEM if not enough memory is available to process the ioctl;
+	    0 in case of success.

 2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w)
+===============================================

-Allows user space to retrieve or request to change cpu related information for a vcpu:
+Allows user space to retrieve or request to change cpu related information for a vcpu::

-struct kvm_s390_vm_cpu_processor {
+  struct kvm_s390_vm_cpu_processor {
       __u64 cpuid;           # CPUID currently (to be) used by this vcpu
       __u16 ibc;             # IBC level currently (to be) used by this vcpu
       __u8  pad[6];
       __u64 fac_list[256];   # set of cpu facilities currently (to be) used
-                              # by this vcpu
-}
+			      # by this vcpu
+  }

 KVM does not enforce or limit the cpu model data in any form. Take the information
 retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration
 setups. Instruction interceptions triggered by additionally set facility bits that
 are not handled by KVM need to by imlemented in the VM driver code.

-Parameters: address of buffer to store/set the processor related cpu
-	    data of type struct kvm_s390_vm_cpu_processor*.
-Returns:    -EBUSY in case 1 or more vcpus are already activated (only in write case)
-	    -EFAULT if the given address is not accessible from kernel space
-	    -ENOMEM if not enough memory is available to process the ioctl
-	    0 in case of success
+:Parameters: address of buffer to store/set the processor related cpu
+	     data of type struct kvm_s390_vm_cpu_processor*.
+:Returns:  -EBUSY in case 1 or more vcpus are already activated (only in write case);
+	   -EFAULT if the given address is not accessible from kernel space;
+	   -ENOMEM if not enough memory is available to process the ioctl;
+	   0 in case of success.
+
+.. _KVM_S390_VM_CPU_MACHINE_FEAT:

 2.3. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_FEAT (r/o)
+--------------------------------------------------

 Allows user space to retrieve available cpu features. A feature is available if
 provided by the hardware and supported by kvm. In theory, cpu features could
 even be completely emulated by kvm.

-struct kvm_s390_vm_cpu_feat {
-        __u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering
-};
+::

-Parameters: address of a buffer to load the feature list from.
-Returns:    -EFAULT if the given address is not accessible from kernel space.
-	    0 in case of success.
+  struct kvm_s390_vm_cpu_feat {
+	__u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering
+  };
+
+:Parameters: address of a buffer to load the feature list from.
+:Returns:  -EFAULT if the given address is not accessible from kernel space;
+	   0 in case of success.

 2.4. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_FEAT (r/w)
+----------------------------------------------------

 Allows user space to retrieve or change enabled cpu features for all VCPUs of a
 VM. Features that are not available cannot be enabled.

-See 2.3. for a description of the parameter struct.
+See :ref:`KVM_S390_VM_CPU_MACHINE_FEAT` for
+a description of the parameter struct.

-Parameters: address of a buffer to store/load the feature list from.
-Returns:    -EFAULT if the given address is not accessible from kernel space.
-	    -EINVAL if a cpu feature that is not available is to be enabled.
-	    -EBUSY if at least one VCPU has already been defined.
+:Parameters: address of a buffer to store/load the feature list from.
+:Returns:   -EFAULT if the given address is not accessible from kernel space;
+	    -EINVAL if a cpu feature that is not available is to be enabled;
+	    -EBUSY if at least one VCPU has already been defined;
 	    0 in case of success.

+.. _KVM_S390_VM_CPU_MACHINE_SUBFUNC:
+
 2.5. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_SUBFUNC (r/o)
+-----------------------------------------------------

 Allows user space to retrieve available cpu subfunctions without any filtering
 done by a set IBC. These subfunctions are indicated to the guest VCPU via
@ -126,7 +151,9 @@ contained in the returned struct. If the affected instruction
 indicates subfunctions via a "test bit" mechanism, the subfunction codes are
 contained in the returned struct in MSB 0 bit numbering.

-struct kvm_s390_vm_cpu_subfunc {
+::
+
+  struct kvm_s390_vm_cpu_subfunc {
       u8 plo[32];           # always valid (ESA/390 feature)
       u8 ptff[16];          # valid with TOD-clock steering
       u8 kmac[16];          # valid with Message-Security-Assist
@ -143,13 +170,14 @@ struct kvm_s390_vm_cpu_subfunc {
       u8 kma[16];           # valid with Message-Security-Assist-Extension 8
       u8 kdsa[16];          # valid with Message-Security-Assist-Extension 9
       u8 reserved[1792];    # reserved for future instructions
-};
+  };

-Parameters: address of a buffer to load the subfunction blocks from.
-Returns:    -EFAULT if the given address is not accessible from kernel space.
+:Parameters: address of a buffer to load the subfunction blocks from.
+:Returns:   -EFAULT if the given address is not accessible from kernel space;
 	    0 in case of success.

 2.6. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_SUBFUNC (r/w)
+-------------------------------------------------------

 Allows user space to retrieve or change cpu subfunctions to be indicated for
 all VCPUs of a VM. This attribute will only be available if kernel and
@ -164,107 +192,125 @@ As long as no data has been written, a read will fail. The IBC will be used
 to determine available subfunctions in this case, this will guarantee backward
 compatibility.

-See 2.5. for a description of the parameter struct.
+See :ref:`KVM_S390_VM_CPU_MACHINE_SUBFUNC` for a
+description of the parameter struct.

-Parameters: address of a buffer to store/load the subfunction blocks from.
-Returns:    -EFAULT if the given address is not accessible from kernel space.
-	    -EINVAL when reading, if there was no write yet.
-	    -EBUSY if at least one VCPU has already been defined.
+:Parameters: address of a buffer to store/load the subfunction blocks from.
+:Returns:   -EFAULT if the given address is not accessible from kernel space;
+	    -EINVAL when reading, if there was no write yet;
+	    -EBUSY if at least one VCPU has already been defined;
 	    0 in case of success.

 3. GROUP: KVM_S390_VM_TOD
-Architectures: s390
+=========================
+
+:Architectures: s390

 3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH
+------------------------------------

 Allows user space to set/get the TOD clock extension (u8) (superseded by
 KVM_S390_VM_TOD_EXT).

-Parameters: address of a buffer in user space to store the data (u8) to
-Returns:    -EFAULT if the given address is not accessible from kernel space
+:Parameters: address of a buffer in user space to store the data (u8) to
+:Returns:   -EFAULT if the given address is not accessible from kernel space;
 	    -EINVAL if setting the TOD clock extension to != 0 is not supported

 3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW
+-----------------------------------

 Allows user space to set/get bits 0-63 of the TOD clock register as defined in
 the POP (u64).

-Parameters: address of a buffer in user space to store the data (u64) to
-Returns:    -EFAULT if the given address is not accessible from kernel space
+:Parameters: address of a buffer in user space to store the data (u64) to
+:Returns:    -EFAULT if the given address is not accessible from kernel space

 3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT
+-----------------------------------
+
 Allows user space to set/get bits 0-63 of the TOD clock register as defined in
 the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it
 also allows user space to get/set it. If the guest CPU model does not support
 it, it is stored as 0 and not allowed to be set to a value != 0.

-Parameters: address of a buffer in user space to store the data
-            (kvm_s390_vm_tod_clock) to
-Returns:    -EFAULT if the given address is not accessible from kernel space
+:Parameters: address of a buffer in user space to store the data
+	     (kvm_s390_vm_tod_clock) to
+:Returns:   -EFAULT if the given address is not accessible from kernel space;
 	    -EINVAL if setting the TOD clock extension to != 0 is not supported

 4. GROUP: KVM_S390_VM_CRYPTO
-Architectures: s390
+============================
+
+:Architectures: s390

 4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o)
+------------------------------------------------------

 Allows user space to enable aes key wrapping, including generating a new
 wrapping key.

-Parameters: none
-Returns:    0
+:Parameters: none
+:Returns:    0

 4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o)
+------------------------------------------------------

 Allows user space to enable dea key wrapping, including generating a new
 wrapping key.

-Parameters: none
-Returns:    0
+:Parameters: none
+:Returns:    0

 4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o)
+-------------------------------------------------------

 Allows user space to disable aes key wrapping, clearing the wrapping key.

-Parameters: none
-Returns:    0
+:Parameters: none
+:Returns:    0

 4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o)
+-------------------------------------------------------

 Allows user space to disable dea key wrapping, clearing the wrapping key.

-Parameters: none
-Returns:    0
+:Parameters: none
+:Returns:    0

 5. GROUP: KVM_S390_VM_MIGRATION
-Architectures: s390
+===============================
+
+:Architectures: s390

 5.1. ATTRIBUTE: KVM_S390_VM_MIGRATION_STOP (w/o)
+------------------------------------------------

 Allows userspace to stop migration mode, needed for PGSTE migration.
 Setting this attribute when migration mode is not active will have no
 effects.

-Parameters: none
-Returns:    0
+:Parameters: none
+:Returns:    0

 5.2. ATTRIBUTE: KVM_S390_VM_MIGRATION_START (w/o)
+-------------------------------------------------

 Allows userspace to start migration mode, needed for PGSTE migration.
 Setting this attribute when migration mode is already active will have
 no effects.

-Parameters: none
-Returns:    -ENOMEM if there is not enough free memory to start migration mode
-	    -EINVAL if the state of the VM is invalid (e.g. no memory defined)
+:Parameters: none
+:Returns:   -ENOMEM if there is not enough free memory to start migration mode;
+	    -EINVAL if the state of the VM is invalid (e.g. no memory defined);
 	    0 in case of success.

 5.3. ATTRIBUTE: KVM_S390_VM_MIGRATION_STATUS (r/o)
+--------------------------------------------------

 Allows userspace to query the status of migration mode.

-Parameters: address of a buffer in user space to store the data (u64) to;
-	    the data itself is either 0 if migration mode is disabled or 1
-	    if it is enabled
-Returns:    -EFAULT if the given address is not accessible from kernel space
+:Parameters: address of a buffer in user space to store the data (u64) to;
+	     the data itself is either 0 if migration mode is disabled or 1
+	     if it is enabled
+:Returns:   -EFAULT if the given address is not accessible from kernel space;
 	    0 in case of success.
--- a/Documentation/virt/kvm/devices/xics.rst
+++ b/Documentation/virt/kvm/devices/xics.rst
@ -1,20 +1,31 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
 XICS interrupt controller
+=========================

 Device type supported: KVM_DEV_TYPE_XICS

 Groups:
  1. KVM_DEV_XICS_GRP_SOURCES
-  Attributes: One per interrupt source, indexed by the source number.
+       Attributes:

+         One per interrupt source, indexed by the source number.
  2. KVM_DEV_XICS_GRP_CTRL
-  Attributes:
-    2.1 KVM_DEV_XICS_NR_SERVERS (write only)
+       Attributes:
+
+         2.1 KVM_DEV_XICS_NR_SERVERS (write only)
+
  The kvm_device_attr.addr points to a __u32 value which is the number of
  interrupt server numbers (ie, highest possible vcpu id plus one).
+
  Errors:
-    -EINVAL: Value greater than KVM_MAX_VCPU_ID.
-    -EFAULT: Invalid user pointer for attr->addr.
-    -EBUSY:  A vcpu is already connected to the device.
+
+    =======  ==========================================
+    -EINVAL  Value greater than KVM_MAX_VCPU_ID.
+    -EFAULT  Invalid user pointer for attr->addr.
+    -EBUSY   A vcpu is already connected to the device.
+    =======  ==========================================

 This device emulates the XICS (eXternal Interrupt Controller
 Specification) defined in PAPR.  The XICS has a set of interrupt
@ -53,24 +64,29 @@ the interrupt source number.  The 64 bit state word has the following
 bitfields, starting from the least-significant end of the word:

 * Destination (server number), 32 bits
+
  This specifies where the interrupt should be sent, and is the
  interrupt server number specified for the destination vcpu.

 * Priority, 8 bits
+
  This is the priority specified for this interrupt source, where 0 is
  the highest priority and 255 is the lowest.  An interrupt with a
  priority of 255 will never be delivered.

 * Level sensitive flag, 1 bit
+
  This bit is 1 for a level-sensitive interrupt source, or 0 for
  edge-sensitive (or MSI).

 * Masked flag, 1 bit
+
  This bit is set to 1 if the interrupt is masked (cannot be delivered
  regardless of its priority), for example by the ibm,int-off RTAS
  call, or 0 if it is not masked.

 * Pending flag, 1 bit
+
  This bit is 1 if the source has a pending interrupt, otherwise 0.

 Only one XICS instance may be created per VM.
--- a/Documentation/virt/kvm/devices/xive.rst
+++ b/Documentation/virt/kvm/devices/xive.rst
@ -1,8 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================================
 POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1)
-==========================================================
+===========================================================

 Device types supported:
-  KVM_DEV_TYPE_XIVE     POWER9 XIVE Interrupt Controller generation 1
+  - KVM_DEV_TYPE_XIVE     POWER9 XIVE Interrupt Controller generation 1

 This device acts as a VM interrupt controller. It provides the KVM
 interface to configure the interrupt sources of a VM in the underlying
@ -64,72 +67,100 @@ the legacy interrupt mode, referred as XICS (POWER7/8).

 * Groups:

-  1. KVM_DEV_XIVE_GRP_CTRL
-  Provides global controls on the device
+1. KVM_DEV_XIVE_GRP_CTRL
+     Provides global controls on the device
+
  Attributes:
    1.1 KVM_DEV_XIVE_RESET (write only)
    Resets the interrupt controller configuration for sources and event
    queues. To be used by kexec and kdump.
+
    Errors: none

    1.2 KVM_DEV_XIVE_EQ_SYNC (write only)
    Sync all the sources and queues and mark the EQ pages dirty. This
    to make sure that a consistent memory state is captured when
    migrating the VM.
+
    Errors: none

    1.3 KVM_DEV_XIVE_NR_SERVERS (write only)
    The kvm_device_attr.addr points to a __u32 value which is the number of
    interrupt server numbers (ie, highest possible vcpu id plus one).
-    Errors:
-      -EINVAL: Value greater than KVM_MAX_VCPU_ID.
-      -EFAULT: Invalid user pointer for attr->addr.
-      -EBUSY:  A vCPU is already connected to the device.

-  2. KVM_DEV_XIVE_GRP_SOURCE (write only)
-  Initializes a new source in the XIVE device and mask it.
+    Errors:
+
+      =======  ==========================================
+      -EINVAL  Value greater than KVM_MAX_VCPU_ID.
+      -EFAULT  Invalid user pointer for attr->addr.
+      -EBUSY   A vCPU is already connected to the device.
+      =======  ==========================================
+
+2. KVM_DEV_XIVE_GRP_SOURCE (write only)
+     Initializes a new source in the XIVE device and mask it.
+
  Attributes:
    Interrupt source number  (64-bit)
-  The kvm_device_attr.addr points to a __u64 value:
-  bits:     | 63   ....  2 |   1   |   0
-  values:   |    unused    | level | type
+
+  The kvm_device_attr.addr points to a __u64 value::
+
+    bits:     | 63   ....  2 |   1   |   0
+    values:   |    unused    | level | type
+
  - type:  0:MSI 1:LSI
  - level: assertion level in case of an LSI.
-  Errors:
-    -E2BIG:  Interrupt source number is out of range
-    -ENOMEM: Could not create a new source block
-    -EFAULT: Invalid user pointer for attr->addr.
-    -ENXIO:  Could not allocate underlying HW interrupt

-  3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
-  Configures source targeting
+  Errors:
+
+    =======  ==========================================
+    -E2BIG   Interrupt source number is out of range
+    -ENOMEM  Could not create a new source block
+    -EFAULT  Invalid user pointer for attr->addr.
+    -ENXIO   Could not allocate underlying HW interrupt
+    =======  ==========================================
+
+3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
+     Configures source targeting
+
  Attributes:
    Interrupt source number  (64-bit)
-  The kvm_device_attr.addr points to a __u64 value:
-  bits:     | 63   ....  33 |  32  | 31 .. 3 |  2 .. 0
-  values:   |    eisn       | mask |  server | priority
+
+  The kvm_device_attr.addr points to a __u64 value::
+
+    bits:     | 63   ....  33 |  32  | 31 .. 3 |  2 .. 0
+    values:   |    eisn       | mask |  server | priority
+
  - priority: 0-7 interrupt priority level
  - server: CPU number chosen to handle the interrupt
  - mask: mask flag (unused)
  - eisn: Effective Interrupt Source Number
-  Errors:
-    -ENOENT: Unknown source number
-    -EINVAL: Not initialized source number
-    -EINVAL: Invalid priority
-    -EINVAL: Invalid CPU number.
-    -EFAULT: Invalid user pointer for attr->addr.
-    -ENXIO:  CPU event queues not configured or configuration of the
-             underlying HW interrupt failed
-    -EBUSY:  No CPU available to serve interrupt

-  4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
-  Configures an event queue of a CPU
+  Errors:
+
+    =======  =======================================================
+    -ENOENT  Unknown source number
+    -EINVAL  Not initialized source number
+    -EINVAL  Invalid priority
+    -EINVAL  Invalid CPU number.
+    -EFAULT  Invalid user pointer for attr->addr.
+    -ENXIO   CPU event queues not configured or configuration of the
+	     underlying HW interrupt failed
+    -EBUSY   No CPU available to serve interrupt
+    =======  =======================================================
+
+4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
+     Configures an event queue of a CPU
+
  Attributes:
    EQ descriptor identifier (64-bit)
-  The EQ descriptor identifier is a tuple (server, priority) :
-  bits:     | 63   ....  32 | 31 .. 3 |  2 .. 0
-  values:   |    unused     |  server | priority
-  The kvm_device_attr.addr points to :
+
+  The EQ descriptor identifier is a tuple (server, priority)::
+
+    bits:     | 63   ....  32 | 31 .. 3 |  2 .. 0
+    values:   |    unused     |  server | priority
+
+  The kvm_device_attr.addr points to::
+
    struct kvm_ppc_xive_eq {
 	__u32 flags;
 	__u32 qshift;
@ -138,8 +169,9 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
 	__u32 qindex;
 	__u8  pad[40];
    };
+
  - flags: queue flags
-    KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
+      KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
 	forces notification without using the coalescing mechanism
 	provided by the XIVE END ESBs.
  - qshift: queue size (power of 2)
@ -147,22 +179,31 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
  - qtoggle: current queue toggle bit
  - qindex: current queue index
  - pad: reserved for future use
-  Errors:
-    -ENOENT: Invalid CPU number
-    -EINVAL: Invalid priority
-    -EINVAL: Invalid flags
-    -EINVAL: Invalid queue size
-    -EINVAL: Invalid queue address
-    -EFAULT: Invalid user pointer for attr->addr.
-    -EIO:    Configuration of the underlying HW failed

-  5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
-  Synchronize the source to flush event notifications
+  Errors:
+
+    =======  =========================================
+    -ENOENT  Invalid CPU number
+    -EINVAL  Invalid priority
+    -EINVAL  Invalid flags
+    -EINVAL  Invalid queue size
+    -EINVAL  Invalid queue address
+    -EFAULT  Invalid user pointer for attr->addr.
+    -EIO     Configuration of the underlying HW failed
+    =======  =========================================
+
+5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
+     Synchronize the source to flush event notifications
+
  Attributes:
    Interrupt source number  (64-bit)
+
  Errors:
-    -ENOENT: Unknown source number
-    -EINVAL: Not initialized source number
+
+    =======  =============================
+    -ENOENT  Unknown source number
+    -EINVAL  Not initialized source number
+    =======  =============================

 * VCPU state

@ -175,11 +216,12 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
  as it synthesizes the priorities of the pending interrupts. We
  capture a bit more to report debug information.

-  KVM_REG_PPC_VP_STATE (2 * 64bits)
-  bits:     |  63  ....  32  |  31  ....  0  |
-  values:   |   TIMA word0   |   TIMA word1  |
-  bits:     | 127       ..........       64  |
-  values:   |            unused              |
+  KVM_REG_PPC_VP_STATE (2 * 64bits)::
+
+    bits:     |  63  ....  32  |  31  ....  0  |
+    values:   |   TIMA word0   |   TIMA word1  |
+    bits:     | 127       ..........       64  |
+    values:   |            unused              |

 * Migration:

@ -196,7 +238,7 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
  3. Capture the state of the source targeting, the EQs configuration
  and the state of thread interrupt context registers.

-  Restore is similar :
+  Restore is similar:

  1. Restore the EQ configuration. As targeting depends on it.
  2. Restore targeting
--- a/Documentation/virt/kvm/halt-polling.rst
+++ b/Documentation/virt/kvm/halt-polling.rst
@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
 The KVM halt polling system
 ===========================

@ -68,7 +71,8 @@ steady state polling interval but will only really do a good job for wakeups
 which come at an approximately constant rate, otherwise there will be constant
 adjustment of the polling interval.

-[0] total block time: the time between when the halt polling function is
+[0] total block time:
+		      the time between when the halt polling function is
 		      invoked and a wakeup source received (irrespective of
 		      whether the scheduler is invoked within that function).

@ -81,31 +85,32 @@ shrunk. These variables are defined in include/linux/kvm_host.h and as module
 parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the
 powerpc kvm-hv case.

-Module Parameter	|   Description		    |	     Default Value
--------------------------------------------------------------------------------
-halt_poll_ns		| The global max polling    | KVM_HALT_POLL_NS_DEFAULT
-			| interval which defines    |
-			| the ceiling value of the  |
-			| polling interval for      | (per arch value)
-			| each vcpu.		    |
--------------------------------------------------------------------------------
-halt_poll_ns_grow	| The value by which the    | 2
-			| halt polling interval is  |
-			| multiplied in the	    |
-			| grow_halt_poll_ns()	    |
-			| function.		    |
--------------------------------------------------------------------------------
-halt_poll_ns_grow_start | The initial value to grow | 10000
-			| to from zero in the	    |
-			| grow_halt_poll_ns()	    |
-			| function.		    |
--------------------------------------------------------------------------------
-halt_poll_ns_shrink	| The value by which the    | 0
-			| halt polling interval is  |
-			| divided in the	    |
-			| shrink_halt_poll_ns()	    |
-			| function.		    |
--------------------------------------------------------------------------------
+-----------------------+---------------------------+-------------------------+
+|Module Parameter	|   Description		    |	     Default Value    |
+-----------------------+---------------------------+-------------------------+
+|halt_poll_ns		| The global max polling    | KVM_HALT_POLL_NS_DEFAULT|
+|			| interval which defines    |			      |
+|			| the ceiling value of the  |			      |
+|			| polling interval for      | (per arch value)	      |
+|			| each vcpu.		    |			      |
+-----------------------+---------------------------+-------------------------+
+|halt_poll_ns_grow	| The value by which the    | 2			      |
+|			| halt polling interval is  |			      |
+|			| multiplied in the	    |			      |
+|			| grow_halt_poll_ns()	    |			      |
+|			| function.		    |			      |
+-----------------------+---------------------------+-------------------------+
+|halt_poll_ns_grow_start| The initial value to grow | 10000		      |
+|			| to from zero in the	    |			      |
+|			| grow_halt_poll_ns()	    |			      |
+|			| function.		    |			      |
+-----------------------+---------------------------+-------------------------+
+|halt_poll_ns_shrink	| The value by which the    | 0			      |
+|			| halt polling interval is  |			      |
+|			| divided in the	    |			      |
+|			| shrink_halt_poll_ns()	    |			      |
+|			| function.		    |			      |
+-----------------------+---------------------------+-------------------------+

 These module parameters can be set from the debugfs files in:

@ -117,20 +122,19 @@ Note: that these module parameters are system wide values and are not able to
 Further Notes
 =============

- Care should be taken when setting the halt_poll_ns module parameter as a
-large value has the potential to drive the cpu usage to 100% on a machine which
-would be almost entirely idle otherwise. This is because even if a guest has
-wakeups during which very little work is done and which are quite far apart, if
-the period is shorter than the global max polling interval (halt_poll_ns) then
-the host will always poll for the entire block time and thus cpu utilisation
-will go to 100%.
+- Care should be taken when setting the halt_poll_ns module parameter as a large value
+  has the potential to drive the cpu usage to 100% on a machine which would be almost
+  entirely idle otherwise. This is because even if a guest has wakeups during which very
+  little work is done and which are quite far apart, if the period is shorter than the
+  global max polling interval (halt_poll_ns) then the host will always poll for the
+  entire block time and thus cpu utilisation will go to 100%.

- Halt polling essentially presents a trade off between power usage and latency
-and the module parameters should be used to tune the affinity for this. Idle
-cpu time is essentially converted to host kernel time with the aim of decreasing
-latency when entering the guest.
+- Halt polling essentially presents a trade off between power usage and latency and
+  the module parameters should be used to tune the affinity for this. Idle cpu time is
+  essentially converted to host kernel time with the aim of decreasing latency when
+  entering the guest.

- Halt polling will only be conducted by the host when no other tasks are
-runnable on that cpu, otherwise the polling will cease immediately and
-schedule will be invoked to allow that other task to run. Thus this doesn't
-allow a guest to denial of service the cpu.
+- Halt polling will only be conducted by the host when no other tasks are runnable on
+  that cpu, otherwise the polling will cease immediately and schedule will be invoked to
+  allow that other task to run. Thus this doesn't allow a guest to denial of service the
+  cpu.
--- a/Documentation/virt/kvm/hypercalls.rst
+++ b/Documentation/virt/kvm/hypercalls.rst
@ -1,5 +1,9 @@
-Linux KVM Hypercall:
+.. SPDX-License-Identifier: GPL-2.0
+
 ===================
+Linux KVM Hypercall
+===================
+
 X86:
 KVM Hypercalls have a three-byte sequence of either the vmcall or the vmmcall
 instruction. The hypervisor can replace it with instructions that are
@ -20,7 +24,7 @@ S390:
  For further information on the S390 diagnose call as supported by KVM,
  refer to Documentation/virt/kvm/s390-diag.txt.

- PowerPC:
+PowerPC:
  It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
  Return value is placed in R3.

@ -34,7 +38,8 @@ MIPS:
  the return value is placed in $2 (v0).

 KVM Hypercalls Documentation
-===========================
+============================
+
 The template for each hypercall is:
 1. Hypercall name.
 2. Architecture(s)
@ -43,56 +48,64 @@ The template for each hypercall is:

 1. KVM_HC_VAPIC_POLL_IRQ
 ------------------------
-Architecture: x86
-Status: active
-Purpose: Trigger guest exit so that the host can check for pending
-interrupts on reentry.
+
+:Architecture: x86
+:Status: active
+:Purpose: Trigger guest exit so that the host can check for pending
+          interrupts on reentry.

 2. KVM_HC_MMU_OP
------------------------
-Architecture: x86
-Status: deprecated.
-Purpose: Support MMU operations such as writing to PTE,
-flushing TLB, release PT.
+----------------
+
+:Architecture: x86
+:Status: deprecated.
+:Purpose: Support MMU operations such as writing to PTE,
+          flushing TLB, release PT.

 3. KVM_HC_FEATURES
------------------------
-Architecture: PPC
-Status: active
-Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
-used to enumerate which hypercalls are available. On PPC, either device tree
-based lookup ( which is also what EPAPR dictates) OR KVM specific enumeration
-mechanism (which is this hypercall) can be used.
+------------------
+
+:Architecture: PPC
+:Status: active
+:Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
+          used to enumerate which hypercalls are available. On PPC, either
+	  device tree based lookup ( which is also what EPAPR dictates)
+	  OR KVM specific enumeration mechanism (which is this hypercall)
+	  can be used.

 4. KVM_HC_PPC_MAP_MAGIC_PAGE
------------------------
-Architecture: PPC
-Status: active
-Purpose: To enable communication between the hypervisor and guest there is a
-shared page that contains parts of supervisor visible register state.
-The guest can map this shared page to access its supervisor register through
-memory using this hypercall.
+----------------------------
+
+:Architecture: PPC
+:Status: active
+:Purpose: To enable communication between the hypervisor and guest there is a
+	  shared page that contains parts of supervisor visible register state.
+	  The guest can map this shared page to access its supervisor register
+	  through memory using this hypercall.

 5. KVM_HC_KICK_CPU
------------------------
-Architecture: x86
-Status: active
-Purpose: Hypercall used to wakeup a vcpu from HLT state
-Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest
-kernel mode for an event to occur (ex: a spinlock to become available) can
-execute HLT instruction once it has busy-waited for more than a threshold
-time-interval. Execution of HLT instruction would cause the hypervisor to put
-the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
-same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
-specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
-is used in the hypercall for future use.
+------------------
+
+:Architecture: x86
+:Status: active
+:Purpose: Hypercall used to wakeup a vcpu from HLT state
+:Usage example:
+  A vcpu of a paravirtualized guest that is busywaiting in guest
+  kernel mode for an event to occur (ex: a spinlock to become available) can
+  execute HLT instruction once it has busy-waited for more than a threshold
+  time-interval. Execution of HLT instruction would cause the hypervisor to put
+  the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
+  same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
+  specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
+  is used in the hypercall for future use.


 6. KVM_HC_CLOCK_PAIRING
------------------------
-Architecture: x86
-Status: active
-Purpose: Hypercall used to synchronize host and guest clocks.
+-----------------------
+:Architecture: x86
+:Status: active
+:Purpose: Hypercall used to synchronize host and guest clocks.
+
 Usage:

 a0: guest physical address where host copies
@ -101,6 +114,8 @@ a0: guest physical address where host copies
 a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0)
 is supported (corresponding to the host's CLOCK_REALTIME clock).

+       ::
+
 		struct kvm_clock_pairing {
 			__s64 sec;
 			__s64 nsec;
@ -123,15 +138,16 @@ Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
 or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.

 6. KVM_HC_SEND_IPI
------------------------
-Architecture: x86
-Status: active
-Purpose: Send IPIs to multiple vCPUs.
+------------------

-a0: lower part of the bitmap of destination APIC IDs
-a1: higher part of the bitmap of destination APIC IDs
-a2: the lowest APIC ID in bitmap
-a3: APIC ICR
+:Architecture: x86
+:Status: active
+:Purpose: Send IPIs to multiple vCPUs.
+
+- a0: lower part of the bitmap of destination APIC IDs
+- a1: higher part of the bitmap of destination APIC IDs
+- a2: the lowest APIC ID in bitmap
+- a3: APIC ICR

 The hypercall lets a guest send multicast IPIs, with at most 128
 128 destinations per hypercall in 64-bit mode and 64 vCPUs per
@ -143,12 +159,13 @@ corresponds to the APIC ID a2+1, and so on.
 Returns the number of CPUs to which the IPIs were delivered successfully.

 7. KVM_HC_SCHED_YIELD
------------------------
-Architecture: x86
-Status: active
-Purpose: Hypercall used to yield if the IPI target vCPU is preempted
+---------------------
+
+:Architecture: x86
+:Status: active
+:Purpose: Hypercall used to yield if the IPI target vCPU is preempted

 a0: destination APIC ID

-Usage example: When sending a call-function IPI-many to vCPUs, yield if
-any of the IPI target vCPUs was preempted.
+:Usage example: When sending a call-function IPI-many to vCPUs, yield if
+	        any of the IPI target vCPUs was preempted.
--- a/Documentation/virt/kvm/index.rst
+++ b/Documentation/virt/kvm/index.rst
@ -7,6 +7,22 @@ KVM
 .. toctree::
   :maxdepth: 2

+   api
   amd-memory-encryption
   cpuid
+   halt-polling
+   hypercalls
+   locking
+   mmu
+   msr
+   nested-vmx
+   ppc-pv
+   s390-diag
+   timekeeping
   vcpu-requests
+
+   review-checklist
+
+   arm/index
+
+   devices/index
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@ -0,0 +1,243 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+KVM Lock Overview
+=================
+
+1. Acquisition Orders
+---------------------
+
+The acquisition orders for mutexes are as follows:
+
+- kvm->lock is taken outside vcpu->mutex
+
+- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
+
+- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
+  them together is quite rare.
+
+On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
+
+Everything else is a leaf: no other lock is taken inside the critical
+sections.
+
+2. Exception
+------------
+
+Fast page fault:
+
+Fast page fault is the fast path which fixes the guest page fault out of
+the mmu-lock on x86. Currently, the page fault can be fast in one of the
+following two cases:
+
+1. Access Tracking: The SPTE is not present, but it is marked for access
+   tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
+   restore the saved R/X bits. This is described in more detail later below.
+
+2. Write-Protection: The SPTE is present and the fault is
+   caused by write-protect. That means we just need to change the W bit of
+   the spte.
+
+What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
+SPTE_MMU_WRITEABLE bit on the spte:
+
+- SPTE_HOST_WRITEABLE means the gfn is writable on host.
+- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
+  the gfn is writable on guest mmu and it is not write-protected by shadow
+  page write-protection.
+
+On fast page fault path, we will use cmpxchg to atomically set the spte W
+bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or
+restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
+is safe because whenever changing these bits can be detected by cmpxchg.
+
+But we need carefully check these cases:
+
+1) The mapping from gfn to pfn
+
+The mapping from gfn to pfn may be changed since we can only ensure the pfn
+is not changed during cmpxchg. This is a ABA problem, for example, below case
+will happen:
+
+------------------------------------------------------------------------+
+| At the beginning::                                                     |
+|                                                                        |
+|	gpte = gfn1                                                      |
+|	gfn1 is mapped to pfn1 on host                                   |
+|	spte is the shadow page table entry corresponding with gpte and  |
+|	spte = pfn1                                                      |
+------------------------------------------------------------------------+
+| On fast page fault path:                                               |
+------------------------------------+-----------------------------------+
+| CPU 0:                             | CPU 1:                            |
+------------------------------------+-----------------------------------+
+| ::                                 |                                   |
+|                                    |                                   |
+|   old_spte = *spte;                |                                   |
+------------------------------------+-----------------------------------+
+|                                    | pfn1 is swapped out::             |
+|                                    |                                   |
+|                                    |    spte = 0;                      |
+|                                    |                                   |
+|                                    | pfn1 is re-alloced for gfn2.      |
+|                                    |                                   |
+|                                    | gpte is changed to point to       |
+|                                    | gfn2 by the guest::               |
+|                                    |                                   |
+|                                    |    spte = pfn1;                   |
+------------------------------------+-----------------------------------+
+| ::                                                                     |
+|                                                                        |
+|   if (cmpxchg(spte, old_spte, old_spte+W)                              |
+|	mark_page_dirty(vcpu->kvm, gfn1)                                 |
+|            OOPS!!!                                                     |
+------------------------------------------------------------------------+
+
+We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
+
+For direct sp, we can easily avoid it since the spte of direct sp is fixed
+to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
+to pin gfn to pfn, because after gfn_to_pfn_atomic():
+
+- We have held the refcount of pfn that means the pfn can not be freed and
+  be reused for another gfn.
+- The pfn is writable that means it can not be shared between different gfns
+  by KSM.
+
+Then, we can ensure the dirty bitmaps is correctly set for a gfn.
+
+Currently, to simplify the whole things, we disable fast page fault for
+indirect shadow page.
+
+2) Dirty bit tracking
+
+In the origin code, the spte can be fast updated (non-atomically) if the
+spte is read-only and the Accessed bit has already been set since the
+Accessed bit and Dirty bit can not be lost.
+
+But it is not true after fast page fault since the spte can be marked
+writable between reading spte and updating spte. Like below case:
+
+------------------------------------------------------------------------+
+| At the beginning::                                                     |
+|                                                                        |
+|	spte.W = 0                                                       |
+|	spte.Accessed = 1                                                |
+------------------------------------+-----------------------------------+
+| CPU 0:                             | CPU 1:                            |
+------------------------------------+-----------------------------------+
+| In mmu_spte_clear_track_bits()::   |                                   |
+|                                    |                                   |
+|  old_spte = *spte;                 |                                   |
+|                                    |                                   |
+|                                    |                                   |
+|  /* 'if' condition is satisfied. */|                                   |
+|  if (old_spte.Accessed == 1 &&     |                                   |
+|       old_spte.W == 0)             |                                   |
+|     spte = 0ull;                   |                                   |
+------------------------------------+-----------------------------------+
+|                                    | on fast page fault path::         |
+|                                    |                                   |
+|                                    |    spte.W = 1                     |
+|                                    |                                   |
+|                                    | memory write on the spte::        |
+|                                    |                                   |
+|                                    |    spte.Dirty = 1                 |
+------------------------------------+-----------------------------------+
+|  ::                                |                                   |
+|                                    |                                   |
+|   else                             |                                   |
+|     old_spte = xchg(spte, 0ull)    |                                   |
+|   if (old_spte.Accessed == 1)      |                                   |
+|     kvm_set_pfn_accessed(spte.pfn);|                                   |
+|   if (old_spte.Dirty == 1)         |                                   |
+|     kvm_set_pfn_dirty(spte.pfn);   |                                   |
+|     OOPS!!!                        |                                   |
+------------------------------------+-----------------------------------+
+
+The Dirty bit is lost in this case.
+
+In order to avoid this kind of issue, we always treat the spte as "volatile"
+if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
+the spte is always atomically updated in this case.
+
+3) flush tlbs due to spte updated
+
+If the spte is updated from writable to readonly, we should flush all TLBs,
+otherwise rmap_write_protect will find a read-only spte, even though the
+writable spte might be cached on a CPU's TLB.
+
+As mentioned before, the spte can be updated to writable out of mmu-lock on
+fast page fault path, in order to easily audit the path, we see if TLBs need
+be flushed caused by this reason in mmu_spte_update() since this is a common
+function to update spte (present -> present).
+
+Since the spte is "volatile" if it can be updated out of mmu-lock, we always
+atomically update the spte, the race caused by fast page fault can be avoided,
+See the comments in spte_has_volatile_bits() and mmu_spte_update().
+
+Lockless Access Tracking:
+
+This is used for Intel CPUs that are using EPT but do not support the EPT A/D
+bits. In this case, when the KVM MMU notifier is called to track accesses to a
+page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
+by clearing the RWX bits in the PTE and storing the original R & X bits in
+some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
+PTE (using the ignored bit 62). When the VM tries to access the page later on,
+a fault is generated and the fast page fault mechanism described above is used
+to atomically restore the PTE to a Present state. The W bit is not saved when
+the PTE is marked for access tracking and during restoration to the Present
+state, the W bit is set depending on whether or not it was a write access. If
+it wasn't, then the W bit will remain clear until a write access happens, at
+which time it will be set using the Dirty tracking mechanism described above.
+
+3. Reference
+------------
+
+:Name:		kvm_lock
+:Type:		mutex
+:Arch:		any
+:Protects:	- vm_list
+
+:Name:		kvm_count_lock
+:Type:		raw_spinlock_t
+:Arch:		any
+:Protects:	- hardware virtualization enable/disable
+:Comment:	'raw' because hardware enabling/disabling must be atomic /wrt
+		migration.
+
+:Name:		kvm_arch::tsc_write_lock
+:Type:		raw_spinlock
+:Arch:		x86
+:Protects:	- kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
+		- tsc offset in vmcb
+:Comment:	'raw' because updating the tsc offsets must not be preempted.
+
+:Name:		kvm->mmu_lock
+:Type:		spinlock_t
+:Arch:		any
+:Protects:	-shadow page/shadow tlb entry
+:Comment:	it is a spinlock since it is used in mmu notifier.
+
+:Name:		kvm->srcu
+:Type:		srcu lock
+:Arch:		any
+:Protects:	- kvm->memslots
+		- kvm->buses
+:Comment:	The srcu read lock must be held while accessing memslots (e.g.
+		when using gfn_to_* functions) and while accessing in-kernel
+		MMIO/PIO address->device structure mapping (kvm->buses).
+		The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
+		if it is needed by multiple functions.
+
+:Name:		blocked_vcpu_on_cpu_lock
+:Type:		spinlock_t
+:Arch:		x86
+:Protects:	blocked_vcpu_on_cpu
+:Comment:	This is a per-CPU lock and it is used for VT-d posted-interrupts.
+		When VT-d posted-interrupts is supported and the VM has assigned
+		devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
+		protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
+		wakeup notification event since external interrupts from the
+		assigned devices happens, we will find the vCPU on the list to
+		wakeup.
--- a/Documentation/virt/kvm/locking.txt
+++ b/Documentation/virt/kvm/locking.txt
@ -1,215 +0,0 @@
-KVM Lock Overview
-=================
-
-1. Acquisition Orders
---------------------
-
-The acquisition orders for mutexes are as follows:
-
- kvm->lock is taken outside vcpu->mutex
-
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
-
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
-  them together is quite rare.
-
-On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
-
-Everything else is a leaf: no other lock is taken inside the critical
-sections.
-
-2: Exception
------------
-
-Fast page fault:
-
-Fast page fault is the fast path which fixes the guest page fault out of
-the mmu-lock on x86. Currently, the page fault can be fast in one of the
-following two cases:
-
-1. Access Tracking: The SPTE is not present, but it is marked for access
-tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
-restore the saved R/X bits. This is described in more detail later below.
-
-2. Write-Protection: The SPTE is present and the fault is
-caused by write-protect. That means we just need to change the W bit of the 
-spte.
-
-What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
-SPTE_MMU_WRITEABLE bit on the spte:
- SPTE_HOST_WRITEABLE means the gfn is writable on host.
- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
-  the gfn is writable on guest mmu and it is not write-protected by shadow
-  page write-protection.
-
-On fast page fault path, we will use cmpxchg to atomically set the spte W
-bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or 
-restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
-is safe because whenever changing these bits can be detected by cmpxchg.
-
-But we need carefully check these cases:
-1): The mapping from gfn to pfn
-The mapping from gfn to pfn may be changed since we can only ensure the pfn
-is not changed during cmpxchg. This is a ABA problem, for example, below case
-will happen:
-
-At the beginning:
-gpte = gfn1
-gfn1 is mapped to pfn1 on host
-spte is the shadow page table entry corresponding with gpte and
-spte = pfn1
-
-   VCPU 0                           VCPU0
-on fast page fault path:
-
-   old_spte = *spte;
-                                 pfn1 is swapped out:
-                                    spte = 0;
-
-                                 pfn1 is re-alloced for gfn2.
-
-                                 gpte is changed to point to
-                                 gfn2 by the guest:
-                                    spte = pfn1;
-
-   if (cmpxchg(spte, old_spte, old_spte+W)
-	mark_page_dirty(vcpu->kvm, gfn1)
-             OOPS!!!
-
-We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
-
-For direct sp, we can easily avoid it since the spte of direct sp is fixed
-to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
-to pin gfn to pfn, because after gfn_to_pfn_atomic():
- We have held the refcount of pfn that means the pfn can not be freed and
-  be reused for another gfn.
- The pfn is writable that means it can not be shared between different gfns
-  by KSM.
-
-Then, we can ensure the dirty bitmaps is correctly set for a gfn.
-
-Currently, to simplify the whole things, we disable fast page fault for
-indirect shadow page.
-
-2): Dirty bit tracking
-In the origin code, the spte can be fast updated (non-atomically) if the
-spte is read-only and the Accessed bit has already been set since the
-Accessed bit and Dirty bit can not be lost.
-
-But it is not true after fast page fault since the spte can be marked
-writable between reading spte and updating spte. Like below case:
-
-At the beginning:
-spte.W = 0
-spte.Accessed = 1
-
-   VCPU 0                                       VCPU0
-In mmu_spte_clear_track_bits():
-
-   old_spte = *spte;
-
-   /* 'if' condition is satisfied. */
-   if (old_spte.Accessed == 1 &&
-        old_spte.W == 0)
-      spte = 0ull;
-                                         on fast page fault path:
-                                             spte.W = 1
-                                         memory write on the spte:
-                                             spte.Dirty = 1
-
-
-   else
-      old_spte = xchg(spte, 0ull)
-
-
-   if (old_spte.Accessed == 1)
-      kvm_set_pfn_accessed(spte.pfn);
-   if (old_spte.Dirty == 1)
-      kvm_set_pfn_dirty(spte.pfn);
-      OOPS!!!
-
-The Dirty bit is lost in this case.
-
-In order to avoid this kind of issue, we always treat the spte as "volatile"
-if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
-the spte is always atomically updated in this case.
-
-3): flush tlbs due to spte updated
-If the spte is updated from writable to readonly, we should flush all TLBs,
-otherwise rmap_write_protect will find a read-only spte, even though the
-writable spte might be cached on a CPU's TLB.
-
-As mentioned before, the spte can be updated to writable out of mmu-lock on
-fast page fault path, in order to easily audit the path, we see if TLBs need
-be flushed caused by this reason in mmu_spte_update() since this is a common
-function to update spte (present -> present).
-
-Since the spte is "volatile" if it can be updated out of mmu-lock, we always
-atomically update the spte, the race caused by fast page fault can be avoided,
-See the comments in spte_has_volatile_bits() and mmu_spte_update().
-
-Lockless Access Tracking:
-
-This is used for Intel CPUs that are using EPT but do not support the EPT A/D
-bits. In this case, when the KVM MMU notifier is called to track accesses to a
-page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
-by clearing the RWX bits in the PTE and storing the original R & X bits in
-some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
-PTE (using the ignored bit 62). When the VM tries to access the page later on,
-a fault is generated and the fast page fault mechanism described above is used
-to atomically restore the PTE to a Present state. The W bit is not saved when
-the PTE is marked for access tracking and during restoration to the Present
-state, the W bit is set depending on whether or not it was a write access. If
-it wasn't, then the W bit will remain clear until a write access happens, at 
-which time it will be set using the Dirty tracking mechanism described above.
-
-3. Reference
------------
-
-Name:		kvm_lock
-Type:		mutex
-Arch:		any
-Protects:	- vm_list
-
-Name:		kvm_count_lock
-Type:		raw_spinlock_t
-Arch:		any
-Protects:	- hardware virtualization enable/disable
-Comment:	'raw' because hardware enabling/disabling must be atomic /wrt
-		migration.
-
-Name:		kvm_arch::tsc_write_lock
-Type:		raw_spinlock
-Arch:		x86
-Protects:	- kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
-		- tsc offset in vmcb
-Comment:	'raw' because updating the tsc offsets must not be preempted.
-
-Name:		kvm->mmu_lock
-Type:		spinlock_t
-Arch:		any
-Protects:	-shadow page/shadow tlb entry
-Comment:	it is a spinlock since it is used in mmu notifier.
-
-Name:		kvm->srcu
-Type:		srcu lock
-Arch:		any
-Protects:	- kvm->memslots
-		- kvm->buses
-Comment:	The srcu read lock must be held while accessing memslots (e.g.
-		when using gfn_to_* functions) and while accessing in-kernel
-		MMIO/PIO address->device structure mapping (kvm->buses).
-		The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
-		if it is needed by multiple functions.
-
-Name:		blocked_vcpu_on_cpu_lock
-Type:		spinlock_t
-Arch:		x86
-Protects:	blocked_vcpu_on_cpu
-Comment:	This is a per-CPU lock and it is used for VT-d posted-interrupts.
-		When VT-d posted-interrupts is supported and the VM has assigned
-		devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
-		protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
-		wakeup notification event since external interrupts from the
-		assigned devices happens, we will find the vCPU on the list to
-		wakeup.
--- a/Documentation/virt/kvm/mmu.rst
+++ b/Documentation/virt/kvm/mmu.rst
@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
 The x86 kvm shadow mmu
 ======================

@ -7,27 +10,37 @@ physical addresses to host physical addresses.

 The mmu code attempts to satisfy the following requirements:

- correctness: the guest should not be able to determine that it is running
+- correctness:
+	       the guest should not be able to determine that it is running
               on an emulated mmu except for timing (we attempt to comply
               with the specification, not emulate the characteristics of
               a particular implementation such as tlb size)
- security:    the guest must not be able to touch host memory not assigned
+- security:
+	       the guest must not be able to touch host memory not assigned
               to it
- performance: minimize the performance penalty imposed by the mmu
- scaling:     need to scale to large memory and large vcpu guests
- hardware:    support the full range of x86 virtualization hardware
- integration: Linux memory management code must be in control of guest memory
+- performance:
+               minimize the performance penalty imposed by the mmu
+- scaling:
+               need to scale to large memory and large vcpu guests
+- hardware:
+               support the full range of x86 virtualization hardware
+- integration:
+               Linux memory management code must be in control of guest memory
               so that swapping, page migration, page merging, transparent
               hugepages, and similar features work without change
- dirty tracking: report writes to guest memory to enable live migration
+- dirty tracking:
+               report writes to guest memory to enable live migration
               and framebuffer-based displays
- footprint:   keep the amount of pinned kernel memory low (most memory
+- footprint:
+               keep the amount of pinned kernel memory low (most memory
               should be shrinkable)
- reliability:  avoid multipage or GFP_ATOMIC allocations
+- reliability:
+               avoid multipage or GFP_ATOMIC allocations

 Acronyms
 ========

+====  ====================================================================
 pfn   host page frame number
 hpa   host physical address
 hva   host virtual address
@ -41,6 +54,7 @@ pte   page table entry (used also to refer generically to paging structure
 gpte  guest pte (referring to gfns)
 spte  shadow pte (referring to pfns)
 tdp   two dimensional paging (vendor neutral term for NPT and EPT)
+====  ====================================================================

 Virtual and real hardware supported
 ===================================
@ -90,11 +104,13 @@ Events
 The mmu is driven by events, some from the guest, some from the host.

 Guest generated events:
+
 - writes to control registers (especially cr3)
 - invlpg/invlpga instruction execution
 - access to missing or protected translations

 Host generated events:
+
 - changes in the gpa->hpa translation (either through gpa->hva changes or
  through hva->hpa changes)
 - memory pressure (the shrinker)
@ -117,16 +133,19 @@ Leaf ptes point at guest pages.
 The following table shows translations encoded by leaf ptes, with higher-level
 translations in parentheses:

- Non-nested guests:
+ Non-nested guests::
+
  nonpaging:     gpa->hpa
  paging:        gva->gpa->hpa
  paging, tdp:   (gva->)gpa->hpa
- Nested guests:
+
+ Nested guests::
+
  non-tdp:       ngva->gpa->hpa  (*)
  tdp:           (ngva->)ngpa->gpa->hpa

-(*) the guest hypervisor will encode the ngva->gpa translation into its page
-    tables if npt is not present
+  (*) the guest hypervisor will encode the ngva->gpa translation into its page
+      tables if npt is not present

 Shadow pages contain the following information:
  role.level:
@ -291,28 +310,41 @@ Handling a page fault is performed as follows:

 - if the RSV bit of the error code is set, the page fault is caused by guest
   accessing MMIO and cached MMIO information is available.
+
   - walk shadow page table
   - check for valid generation number in the spte (see "Fast invalidation of
     MMIO sptes" below)
   - cache the information to vcpu->arch.mmio_gva, vcpu->arch.mmio_access and
     vcpu->arch.mmio_gfn, and call the emulator
+
 - If both P bit and R/W bit of error code are set, this could possibly
   be handled as a "fast page fault" (fixed without taking the MMU lock).  See
   the description in Documentation/virt/kvm/locking.txt.
+
 - if needed, walk the guest page tables to determine the guest translation
   (gva->gpa or ngpa->gpa)
+
   - if permissions are insufficient, reflect the fault back to the guest
+
 - determine the host page
+
   - if this is an mmio request, there is no host page; cache the info to
     vcpu->arch.mmio_gva, vcpu->arch.mmio_access and vcpu->arch.mmio_gfn
+
 - walk the shadow page table to find the spte for the translation,
   instantiating missing intermediate page tables as necessary
+
   - If this is an mmio request, cache the mmio info to the spte and set some
     reserved bit on the spte (see callers of kvm_mmu_set_mmio_spte_mask)
+
 - try to unsynchronize the page
+
   - if successful, we can let the guest continue and modify the gpte
+
 - emulate the instruction
+
   - if failed, unshadow the page and let the guest continue
+
 - update any translations that were modified by the instruction

 invlpg handling:
@ -324,10 +356,12 @@ invlpg handling:
 Guest control register updates:

 - mov to cr3
+
  - look up new shadow roots
  - synchronize newly reachable shadow pages

 - mov to cr0/cr4/efer
+
  - set up mmu context for new paging mode
  - look up new shadow roots
  - synchronize newly reachable shadow pages
@ -358,6 +392,7 @@ on fault type:
 (user write faults generate a #PF)

 In the first case there are two additional complications:
+
 - if CR4.SMEP is enabled: since we've turned the page into a kernel page,
  the kernel may now execute it.  We handle this by also setting spte.nx.
  If we get a user fetch or read fault, we'll change spte.u=1 and
@ -446,4 +481,3 @@ Further reading

 - NPT presentation from KVM Forum 2008
  http://www.linux-kvm.org/images/c/c8/KvmForum2008%24kdf2008_21.pdf
-
--- a/Documentation/virt/kvm/msr.rst
+++ b/Documentation/virt/kvm/msr.rst
@ -1,6 +1,10 @@
-KVM-specific MSRs.
-Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
-=====================================================
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+KVM-specific MSRs
+=================
+
+:Author: Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010

 KVM makes use of some custom MSRs to service some requests.

@ -9,34 +13,39 @@ Custom MSRs have a range reserved for them, that goes from
 but they are deprecated and their use is discouraged.

 Custom MSR list
--------
+---------------

 The current supported Custom MSR list is:

-MSR_KVM_WALL_CLOCK_NEW:   0x4b564d00
+MSR_KVM_WALL_CLOCK_NEW:
+	0x4b564d00

-	data: 4-byte alignment physical address of a memory area which must be
+data:
+	4-byte alignment physical address of a memory area which must be
 	in guest RAM. This memory is expected to hold a copy of the following
-	structure:
+	structure::

-	struct pvclock_wall_clock {
+	 struct pvclock_wall_clock {
 		u32   version;
 		u32   sec;
 		u32   nsec;
-	} __attribute__((__packed__));
+	  } __attribute__((__packed__));

 	whose data will be filled in by the hypervisor. The hypervisor is only
 	guaranteed to update this data at the moment of MSR write.
 	Users that want to reliably query this information more than once have
 	to write more than once to this MSR. Fields have the following meanings:

-		version: guest has to check version before and after grabbing
+	version:
+		guest has to check version before and after grabbing
 		time information and check that they are both equal and even.
 		An odd version indicates an in-progress update.

-		sec: number of seconds for wallclock at time of boot.
+	sec:
+		 number of seconds for wallclock at time of boot.

-		nsec: number of nanoseconds for wallclock at time of boot.
+	nsec:
+		 number of nanoseconds for wallclock at time of boot.

 	In order to get the current wallclock time, the system_time from
 	MSR_KVM_SYSTEM_TIME_NEW needs to be added.
@ -47,13 +56,15 @@ MSR_KVM_WALL_CLOCK_NEW:   0x4b564d00
 	Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
 	leaf prior to usage.

-MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01
+MSR_KVM_SYSTEM_TIME_NEW:
+	0x4b564d01

-	data: 4-byte aligned physical address of a memory area which must be in
+data:
+	4-byte aligned physical address of a memory area which must be in
 	guest RAM, plus an enable bit in bit 0. This memory is expected to hold
-	a copy of the following structure:
+	a copy of the following structure::

-	struct pvclock_vcpu_time_info {
+	  struct pvclock_vcpu_time_info {
 		u32   version;
 		u32   pad0;
 		u64   tsc_timestamp;
@ -62,7 +73,7 @@ MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01
 		s8    tsc_shift;
 		u8    flags;
 		u8    pad[2];
-	} __attribute__((__packed__)); /* 32 bytes */
+	  } __attribute__((__packed__)); /* 32 bytes */

 	whose data will be filled in by the hypervisor periodically. Only one
 	write, or registration, is needed for each VCPU. The interval between
@ -72,23 +83,28 @@ MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01

 	Fields have the following meanings:

-		version: guest has to check version before and after grabbing
+	version:
+		guest has to check version before and after grabbing
 		time information and check that they are both equal and even.
 		An odd version indicates an in-progress update.

-		tsc_timestamp: the tsc value at the current VCPU at the time
+	tsc_timestamp:
+		the tsc value at the current VCPU at the time
 		of the update of this structure. Guests can subtract this value
 		from current tsc to derive a notion of elapsed time since the
 		structure update.

-		system_time: a host notion of monotonic time, including sleep
+	system_time:
+		a host notion of monotonic time, including sleep
 		time at the time this structure was last updated. Unit is
 		nanoseconds.

-		tsc_to_system_mul: multiplier to be used when converting
+	tsc_to_system_mul:
+		multiplier to be used when converting
 		tsc-related quantity to nanoseconds

-		tsc_shift: shift to be used when converting tsc-related
+	tsc_shift:
+		shift to be used when converting tsc-related
 		quantity to nanoseconds. This shift will ensure that
 		multiplication with tsc_to_system_mul does not overflow.
 		A positive value denotes a left shift, a negative value
@ -96,7 +112,7 @@ MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01

 		The conversion from tsc to nanoseconds involves an additional
 		right shift by 32 bits. With this information, guests can
-		derive per-CPU time by doing:
+		derive per-CPU time by doing::

 			time = (current_tsc - tsc_timestamp)
 			if (tsc_shift >= 0)
@ -106,29 +122,34 @@ MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01
 			time = (time * tsc_to_system_mul) >> 32
 			time = time + system_time

-		flags: bits in this field indicate extended capabilities
+	flags:
+		bits in this field indicate extended capabilities
 		coordinated between the guest and the hypervisor. Availability
 		of specific flags has to be checked in 0x40000001 cpuid leaf.
 		Current flags are:

-		 flag bit   | cpuid bit    | meaning
-		-------------------------------------------------------------
-			    |	           | time measures taken across
-		     0      |	   24      | multiple cpus are guaranteed to
-			    |		   | be monotonic
-		-------------------------------------------------------------
-			    |		   | guest vcpu has been paused by
-		     1	    |	  N/A	   | the host
-			    |		   | See 4.70 in api.txt
-		-------------------------------------------------------------
+
+		+-----------+--------------+----------------------------------+
+		| flag bit  | cpuid bit    | meaning			      |
+		+-----------+--------------+----------------------------------+
+		|	    |		   | time measures taken across       |
+		|    0      |	   24      | multiple cpus are guaranteed to  |
+		|	    |		   | be monotonic		      |
+		+-----------+--------------+----------------------------------+
+		|	    |		   | guest vcpu has been paused by    |
+		|    1	    |	  N/A	   | the host			      |
+		|	    |		   | See 4.70 in api.txt	      |
+		+-----------+--------------+----------------------------------+

 	Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
 	leaf prior to usage.


-MSR_KVM_WALL_CLOCK:  0x11
+MSR_KVM_WALL_CLOCK:
+	0x11

-	data and functioning: same as MSR_KVM_WALL_CLOCK_NEW. Use that instead.
+data and functioning:
+	same as MSR_KVM_WALL_CLOCK_NEW. Use that instead.

 	This MSR falls outside the reserved KVM range and may be removed in the
 	future. Its usage is deprecated.
@ -136,9 +157,11 @@ MSR_KVM_WALL_CLOCK:  0x11
 	Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
 	leaf prior to usage.

-MSR_KVM_SYSTEM_TIME: 0x12
+MSR_KVM_SYSTEM_TIME:
+	0x12

-	data and functioning: same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead.
+data and functioning:
+	same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead.

 	This MSR falls outside the reserved KVM range and may be removed in the
 	future. Its usage is deprecated.
@ -146,7 +169,7 @@ MSR_KVM_SYSTEM_TIME: 0x12
 	Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
 	leaf prior to usage.

-	The suggested algorithm for detecting kvmclock presence is then:
+	The suggested algorithm for detecting kvmclock presence is then::

 		if (!kvm_para_available())    /* refer to cpuid.txt */
 			return NON_PRESENT;
@ -163,8 +186,11 @@ MSR_KVM_SYSTEM_TIME: 0x12
 		} else
 			return NON_PRESENT;

-MSR_KVM_ASYNC_PF_EN: 0x4b564d02
-	data: Bits 63-6 hold 64-byte aligned physical address of a
+MSR_KVM_ASYNC_PF_EN:
+	0x4b564d02
+
+data:
+	Bits 63-6 hold 64-byte aligned physical address of a
 	64 byte memory area which must be in guest RAM and must be
 	zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
 	when asynchronous page faults are enabled on the vcpu 0 when
@ -200,20 +226,22 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
 	Currently type 2 APF will be always delivered on the same vcpu as
 	type 1 was, but guest should not rely on that.

-MSR_KVM_STEAL_TIME: 0x4b564d03
+MSR_KVM_STEAL_TIME:
+	0x4b564d03

-	data: 64-byte alignment physical address of a memory area which must be
+data:
+	64-byte alignment physical address of a memory area which must be
 	in guest RAM, plus an enable bit in bit 0. This memory is expected to
-	hold a copy of the following structure:
+	hold a copy of the following structure::

-	struct kvm_steal_time {
+	  struct kvm_steal_time {
 		__u64 steal;
 		__u32 version;
 		__u32 flags;
 		__u8  preempted;
 		__u8  u8_pad[3];
 		__u32 pad[11];
-	}
+	  }

 	whose data will be filled in by the hypervisor periodically. Only one
 	write, or registration, is needed for each VCPU. The interval between
@ -224,25 +252,32 @@ MSR_KVM_STEAL_TIME: 0x4b564d03

 	Fields have the following meanings:

-		version: a sequence counter. In other words, guest has to check
+	version:
+		a sequence counter. In other words, guest has to check
 		this field before and after grabbing time information and make
 		sure they are both equal and even. An odd version indicates an
 		in-progress update.

-		flags: At this point, always zero. May be used to indicate
+	flags:
+		At this point, always zero. May be used to indicate
 		changes in this structure in the future.

-		steal: the amount of time in which this vCPU did not run, in
+	steal:
+		the amount of time in which this vCPU did not run, in
 		nanoseconds. Time during which the vcpu is idle, will not be
 		reported as steal time.

-		preempted: indicate the vCPU who owns this struct is running or
+	preempted:
+		indicate the vCPU who owns this struct is running or
 		not. Non-zero values mean the vCPU has been preempted. Zero
 		means the vCPU is not preempted. NOTE, it is always zero if the
 		the hypervisor doesn't support this field.

-MSR_KVM_EOI_EN: 0x4b564d04
-	data: Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
+MSR_KVM_EOI_EN:
+	0x4b564d04
+
+data:
+	Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
 	when disabled.  Bit 1 is reserved and must be zero.  When PV end of
 	interrupt is enabled (bit 0 set), bits 63-2 hold a 4-byte aligned
 	physical address of a 4 byte memory area which must be in guest RAM and
@ -274,11 +309,13 @@ MSR_KVM_EOI_EN: 0x4b564d04
 	clear it using a single CPU instruction, such as test and clear, or
 	compare and exchange.

-MSR_KVM_POLL_CONTROL: 0x4b564d05
+MSR_KVM_POLL_CONTROL:
+	0x4b564d05
+
 	Control host-side polling.

-	data: Bit 0 enables (1) or disables (0) host-side HLT polling logic.
+data:
+	Bit 0 enables (1) or disables (0) host-side HLT polling logic.

 	KVM guests can request the host not to poll on HLT, for example if
 	they are performing polling themselves.
-
--- a/Documentation/virt/kvm/nested-vmx.rst
+++ b/Documentation/virt/kvm/nested-vmx.rst
@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========
 Nested VMX
 ==========

@ -41,9 +44,9 @@ No modifications are required to user space (qemu). However, qemu's default
 emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
 explicitly enabled, by giving qemu one of the following options:

-     -cpu host              (emulated CPU has all features of the real CPU)
+     - cpu host              (emulated CPU has all features of the real CPU)

-     -cpu qemu64,+vmx       (add just the vmx feature to a named CPU type)
+     - cpu qemu64,+vmx       (add just the vmx feature to a named CPU type)


 ABIs
@ -75,6 +78,8 @@ of this structure changes, this can break live migration across KVM versions.
 VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner
 struct shadow_vmcs is ever changed.

+::
+
 	typedef u64 natural_width;
 	struct __packed vmcs12 {
 		/* According to the Intel spec, a VMCS region must start with
@ -220,21 +225,21 @@ Authors
 -------

 These patches were written by:
-     Abel Gordon, abelg <at> il.ibm.com
-     Nadav Har'El, nyh <at> il.ibm.com
-     Orit Wasserman, oritw <at> il.ibm.com
-     Ben-Ami Yassor, benami <at> il.ibm.com
-     Muli Ben-Yehuda, muli <at> il.ibm.com
+    - Abel Gordon, abelg <at> il.ibm.com
+    - Nadav Har'El, nyh <at> il.ibm.com
+    - Orit Wasserman, oritw <at> il.ibm.com
+    - Ben-Ami Yassor, benami <at> il.ibm.com
+    - Muli Ben-Yehuda, muli <at> il.ibm.com

 With contributions by:
-     Anthony Liguori, aliguori <at> us.ibm.com
-     Mike Day, mdday <at> us.ibm.com
-     Michael Factor, factor <at> il.ibm.com
-     Zvi Dubitzky, dubi <at> il.ibm.com
+    - Anthony Liguori, aliguori <at> us.ibm.com
+    - Mike Day, mdday <at> us.ibm.com
+    - Michael Factor, factor <at> il.ibm.com
+    - Zvi Dubitzky, dubi <at> il.ibm.com

 And valuable reviews by:
-     Avi Kivity, avi <at> redhat.com
-     Gleb Natapov, gleb <at> redhat.com
-     Marcelo Tosatti, mtosatti <at> redhat.com
-     Kevin Tian, kevin.tian <at> intel.com
-     and others.
+    - Avi Kivity, avi <at> redhat.com
+    - Gleb Natapov, gleb <at> redhat.com
+    - Marcelo Tosatti, mtosatti <at> redhat.com
+    - Kevin Tian, kevin.tian <at> intel.com
+    - and others.
--- a/Documentation/virt/kvm/ppc-pv.rst
+++ b/Documentation/virt/kvm/ppc-pv.rst
@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================
 The PPC KVM paravirtual interface
 =================================

@ -34,8 +37,9 @@ up the hypercall. To call a hypercall, just call these instructions.

 The parameters are as follows:

+        ========	================	================
 	Register	IN			OUT
-
+        ========	================	================
 	r0		-			volatile
 	r3		1st parameter		Return code
 	r4		2nd parameter		1st output value
@ -47,6 +51,7 @@ The parameters are as follows:
 	r10		8th parameter		7th output value
 	r11		hypercall number	8th output value
 	r12		-			volatile
+        ========	================	================

 Hypercall definitions are shared in generic code, so the same hypercall numbers
 apply for x86 and powerpc alike with the exception that each KVM hypercall
@ -54,11 +59,13 @@ also needs to be ORed with the KVM vendor code which is (42 << 16).

 Return codes can be as follows:

+	====		=========================
 	Code		Meaning
-
+	====		=========================
 	0		Success
 	12		Hypercall not implemented
 	<0		Error
+	====		=========================

 The magic page
 ==============
@ -72,7 +79,7 @@ desired location. The first parameter indicates the effective address when the
 MMU is enabled. The second parameter indicates the address in real mode, if
 applicable to the target. For now, we always map the page to -4096. This way we
 can access it using absolute load and store functions. The following
-instruction reads the first field of the magic page:
+instruction reads the first field of the magic page::

 	ld	rX, -4096(0)

@ -93,8 +100,10 @@ a bitmap of available features inside the magic page.

 The following enhancements to the magic page are currently available:

+  ============================  =======================================
  KVM_MAGIC_FEAT_SR		Maps SR registers r/w in the magic page
  KVM_MAGIC_FEAT_MAS0_TO_SPRG7	Maps MASn, ESR, PIR and high SPRGs
+  ============================  =======================================

 For enhanced features in the magic page, please check for the existence of the
 feature before using them!
@ -121,8 +130,8 @@ when entering the guest or don't have any impact on the hypervisor's behavior.

 The following bits are safe to be set inside the guest:

-  MSR_EE
-  MSR_RI
+  - MSR_EE
+  - MSR_RI

 If any other bit changes in the MSR, please still use mtmsr(d).

@ -138,9 +147,9 @@ guest. Implementing any of those mappings is optional, as the instruction traps
 also act on the shared page. So calling privileged instructions still works as
 before.

+======================= ================================
 From			To
-====			==
-
+======================= ================================
 mfmsr	rX		ld	rX, magic_page->msr
 mfsprg	rX, 0		ld	rX, magic_page->sprg0
 mfsprg	rX, 1		ld	rX, magic_page->sprg1
@ -173,7 +182,7 @@ mtsrin	rX, rY		b	<special mtsrin section>

 [BookE only]
 wrteei	[0|1]		b	<special wrteei section>
-
+======================= ================================

 Some instructions require more logic to determine what's going on than a load
 or store instruction can deliver. To enable patching of those, we keep some
@ -191,6 +200,7 @@ for example.

 Hypercall ABIs in KVM on PowerPC
 =================================
+
 1) KVM hypercalls (ePAPR)

 These are ePAPR compliant hypercall implementation (mentioned above). Even
--- a/Documentation/virt/kvm/review-checklist.rst
+++ b/Documentation/virt/kvm/review-checklist.rst
@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
 Review checklist for kvm patches
 ================================

--- a/Documentation/virt/kvm/s390-diag.rst
+++ b/Documentation/virt/kvm/s390-diag.rst
@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================
 The s390 DIAGNOSE call on KVM
 =============================

@ -16,12 +19,12 @@ DIAGNOSE calls by the guest cause a mandatory intercept. This implies
 all supported DIAGNOSE calls need to be handled by either KVM or its
 userspace.

-All DIAGNOSE calls supported by KVM use the RS-a format:
+All DIAGNOSE calls supported by KVM use the RS-a format::

--------------------------------------
-|  '83'  | R1 | R3 | B2 |     D2     |
--------------------------------------
-0        8    12   16   20           31
+  --------------------------------------
+  |  '83'  | R1 | R3 | B2 |     D2     |
+  --------------------------------------
+  0        8    12   16   20           31

 The second-operand address (obtained by the base/displacement calculation)
 is not used to address data. Instead, bits 48-63 of this address specify
--- a/Documentation/virt/kvm/timekeeping.rst
+++ b/Documentation/virt/kvm/timekeeping.rst
@ -1,17 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0

-	Timekeeping Virtualization for X86-Based Architectures
+======================================================
+Timekeeping Virtualization for X86-Based Architectures
+======================================================

-	Zachary Amsden <zamsden@redhat.com>
-	Copyright (c) 2010, Red Hat.  All rights reserved.
+:Author: Zachary Amsden <zamsden@redhat.com>
+:Copyright: (c) 2010, Red Hat.  All rights reserved.

-1) Overview
-2) Timing Devices
-3) TSC Hardware
-4) Virtualization Problems
+.. Contents

-=========================================================================
+   1) Overview
+   2) Timing Devices
+   3) TSC Hardware
+   4) Virtualization Problems

-1) Overview
+1. Overview
+===========

 One of the most complicated parts of the X86 platform, and specifically,
 the virtualization of this platform is the plethora of timing devices available
@ -27,15 +31,15 @@ The purpose of this document is to collect data and information relevant to
 timekeeping which may be difficult to find elsewhere, specifically,
 information relevant to KVM and hardware-based virtualization.

-=========================================================================
-
-2) Timing Devices
+2. Timing Devices
+=================

 First we discuss the basic hardware devices available.  TSC and the related
 KVM clock are special enough to warrant a full exposition and are described in
 the following section.

-2.1) i8254 - PIT
+2.1. i8254 - PIT
+----------------

 One of the first timer devices available is the programmable interrupt timer,
 or PIT.  The PIT has a fixed frequency 1.193182 MHz base clock and three
@ -50,13 +54,13 @@ The PIT uses I/O ports 0x40 - 0x43.  Access to the 16-bit counters is done
 using single or multiple byte access to the I/O ports.  There are 6 modes
 available, but not all modes are available to all timers, as only timer 2
 has a connected gate input, required for modes 1 and 5.  The gate line is
-controlled by port 61h, bit 0, as illustrated in the following diagram.
+controlled by port 61h, bit 0, as illustrated in the following diagram::

- --------------             ----------------
-|              |           |                |
-|  1.1932 MHz  |---------->| CLOCK      OUT | ---------> IRQ 0
-|    Clock     |   |       |                |
- --------------    |    +->| GATE  TIMER 0  |
+  --------------             ----------------
+  |            |           |                |
+  |  1.1932 MHz|---------->| CLOCK      OUT | ---------> IRQ 0
+  |    Clock   |   |       |                |
+  --------------   |    +->| GATE  TIMER 0  |
                   |        ----------------
                   |
                   |        ----------------
@ -70,29 +74,33 @@ controlled by port 61h, bit 0, as illustrated in the following diagram.
                   |       |                |
                   |------>| CLOCK      OUT | ---------> Port 61h, bit 5
                           |                |      |
-Port 61h, bit 0 ---------->| GATE  TIMER 2  |       \_.----   ____
+  Port 61h, bit 0 -------->| GATE  TIMER 2  |       \_.----   ____
                            ----------------         _|    )--|LPF|---Speaker
                                                    / *----   \___/
-Port 61h, bit 1 -----------------------------------/
+  Port 61h, bit 1 ---------------------------------/

 The timer modes are now described.

-Mode 0: Single Timeout.   This is a one-shot software timeout that counts down
+Mode 0: Single Timeout.
+ This is a one-shot software timeout that counts down
 when the gate is high (always true for timers 0 and 1).  When the count
 reaches zero, the output goes high.

-Mode 1: Triggered One-shot.  The output is initially set high.  When the gate
+Mode 1: Triggered One-shot.
+ The output is initially set high.  When the gate
 line is set high, a countdown is initiated (which does not stop if the gate is
 lowered), during which the output is set low.  When the count reaches zero,
 the output goes high.

-Mode 2: Rate Generator.  The output is initially set high.  When the countdown
+Mode 2: Rate Generator.
+ The output is initially set high.  When the countdown
 reaches 1, the output goes low for one count and then returns high.  The value
 is reloaded and the countdown automatically resumes.  If the gate line goes
 low, the count is halted.  If the output is low when the gate is lowered, the
 output automatically goes high (this only affects timer 2).

-Mode 3: Square Wave.   This generates a high / low square wave.  The count
+Mode 3: Square Wave.
+ This generates a high / low square wave.  The count
 determines the length of the pulse, which alternates between high and low
 when zero is reached.  The count only proceeds when gate is high and is
 automatically reloaded on reaching zero.  The count is decremented twice at
@ -103,12 +111,14 @@ Mode 3: Square Wave.   This generates a high / low square wave.  The count
 values are not observed when reading.  This is the intended mode for timer 2,
 which generates sine-like tones by low-pass filtering the square wave output.

-Mode 4: Software Strobe.  After programming this mode and loading the counter,
+Mode 4: Software Strobe.
+ After programming this mode and loading the counter,
 the output remains high until the counter reaches zero.  Then the output
 goes low for 1 clock cycle and returns high.  The counter is not reloaded.
 Counting only occurs when gate is high.

-Mode 5: Hardware Strobe.  After programming and loading the counter, the
+Mode 5: Hardware Strobe.
+ After programming and loading the counter, the
 output remains high.  When the gate is raised, a countdown is initiated
 (which does not stop if the gate is lowered).  When the counter reaches zero,
 the output goes low for 1 clock cycle and then returns high.  The counter is
@ -118,49 +128,49 @@ In addition to normal binary counting, the PIT supports BCD counting.  The
 command port, 0x43 is used to set the counter and mode for each of the three
 timers.

-PIT commands, issued to port 0x43, using the following bit encoding:
+PIT commands, issued to port 0x43, using the following bit encoding::

-Bit 7-4: Command (See table below)
-Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
-Bit 0  : Binary (0) / BCD (1)
+  Bit 7-4: Command (See table below)
+  Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
+  Bit 0  : Binary (0) / BCD (1)

-Command table:
+Command table::

-0000 - Latch Timer 0 count for port 0x40
+  0000 - Latch Timer 0 count for port 0x40
 	sample and hold the count to be read in port 0x40;
 	additional commands ignored until counter is read;
 	mode bits ignored.

-0001 - Set Timer 0 LSB mode for port 0x40
+  0001 - Set Timer 0 LSB mode for port 0x40
 	set timer to read LSB only and force MSB to zero;
 	mode bits set timer mode

-0010 - Set Timer 0 MSB mode for port 0x40
+  0010 - Set Timer 0 MSB mode for port 0x40
 	set timer to read MSB only and force LSB to zero;
 	mode bits set timer mode

-0011 - Set Timer 0 16-bit mode for port 0x40
+  0011 - Set Timer 0 16-bit mode for port 0x40
 	set timer to read / write LSB first, then MSB;
 	mode bits set timer mode

-0100 - Latch Timer 1 count for port 0x41 - as described above
-0101 - Set Timer 1 LSB mode for port 0x41 - as described above
-0110 - Set Timer 1 MSB mode for port 0x41 - as described above
-0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
+  0100 - Latch Timer 1 count for port 0x41 - as described above
+  0101 - Set Timer 1 LSB mode for port 0x41 - as described above
+  0110 - Set Timer 1 MSB mode for port 0x41 - as described above
+  0111 - Set Timer 1 16-bit mode for port 0x41 - as described above

-1000 - Latch Timer 2 count for port 0x42 - as described above
-1001 - Set Timer 2 LSB mode for port 0x42 - as described above
-1010 - Set Timer 2 MSB mode for port 0x42 - as described above
-1011 - Set Timer 2 16-bit mode for port 0x42 as described above
+  1000 - Latch Timer 2 count for port 0x42 - as described above
+  1001 - Set Timer 2 LSB mode for port 0x42 - as described above
+  1010 - Set Timer 2 MSB mode for port 0x42 - as described above
+  1011 - Set Timer 2 16-bit mode for port 0x42 as described above

-1101 - General counter latch
+  1101 - General counter latch
 	Latch combination of counters into corresponding ports
 	Bit 3 = Counter 2
 	Bit 2 = Counter 1
 	Bit 1 = Counter 0
 	Bit 0 = Unused

-1110 - Latch timer status
+  1110 - Latch timer status
 	Latch combination of counter mode into corresponding ports
 	Bit 3 = Counter 2
 	Bit 2 = Counter 1
@ -177,7 +187,8 @@ Command table:
 	Bit 3-1 = Mode
 	Bit 0 = Binary (0) / BCD mode (1)

-2.2) RTC
+2.2. RTC
+--------

 The second device which was available in the original PC was the MC146818 real
 time clock.  The original device is now obsolete, and usually emulated by the
@ -201,21 +212,21 @@ in progress, as indicated in the status register.
 The clock uses a 32.768kHz crystal, so bits 6-4 of register A should be
 programmed to a 32kHz divider if the RTC is to count seconds.

-This is the RAM map originally used for the RTC/CMOS:
+This is the RAM map originally used for the RTC/CMOS::

-Location    Size    Description
------------------------------------------
-00h         byte    Current second (BCD)
-01h         byte    Seconds alarm (BCD)
-02h         byte    Current minute (BCD)
-03h         byte    Minutes alarm (BCD)
-04h         byte    Current hour (BCD)
-05h         byte    Hours alarm (BCD)
-06h         byte    Current day of week (BCD)
-07h         byte    Current day of month (BCD)
-08h         byte    Current month (BCD)
-09h         byte    Current year (BCD)
-0Ah         byte    Register A
+  Location    Size    Description
+  ------------------------------------------
+  00h         byte    Current second (BCD)
+  01h         byte    Seconds alarm (BCD)
+  02h         byte    Current minute (BCD)
+  03h         byte    Minutes alarm (BCD)
+  04h         byte    Current hour (BCD)
+  05h         byte    Hours alarm (BCD)
+  06h         byte    Current day of week (BCD)
+  07h         byte    Current day of month (BCD)
+  08h         byte    Current month (BCD)
+  09h         byte    Current year (BCD)
+  0Ah         byte    Register A
                       bit 7   = Update in progress
                       bit 6-4 = Divider for clock
                                  000 = 4.194 MHz
@ -234,7 +245,7 @@ Location    Size    Description
                                 1101 = 125 mS
                                 1110 = 250 mS
                                 1111 = 500 mS
-0Bh         byte    Register B
+  0Bh         byte    Register B
                       bit 7   = Run (0) / Halt (1)
                       bit 6   = Periodic interrupt enable
                       bit 5   = Alarm interrupt enable
@ -243,19 +254,20 @@ Location    Size    Description
                       bit 2   = BCD calendar (0) / Binary (1)
                       bit 1   = 12-hour mode (0) / 24-hour mode (1)
                       bit 0   = 0 (DST off) / 1 (DST enabled)
-OCh         byte    Register C (read only)
+  OCh         byte    Register C (read only)
                       bit 7   = interrupt request flag (IRQF)
                       bit 6   = periodic interrupt flag (PF)
                       bit 5   = alarm interrupt flag (AF)
                       bit 4   = update interrupt flag (UF)
                       bit 3-0 = reserved
-ODh         byte    Register D (read only)
+  ODh         byte    Register D (read only)
                       bit 7   = RTC has power
                       bit 6-0 = reserved
-32h         byte    Current century BCD (*)
+  32h         byte    Current century BCD (*)
  (*) location vendor specific and now determined from ACPI global tables

-2.3) APIC
+2.3. APIC
+---------

 On Pentium and later processors, an on-board timer is available to each CPU
 as part of the Advanced Programmable Interrupt Controller.  The APIC is
@ -276,7 +288,8 @@ timer is programmed through the LVT (local vector timer) register, is capable
 of one-shot or periodic operation, and is based on the bus clock divided down
 by the programmable divider register.

-2.4) HPET
+2.4. HPET
+---------

 HPET is quite complex, and was originally intended to replace the PIT / RTC
 support of the X86 PC.  It remains to be seen whether that will be the case, as
@ -297,7 +310,8 @@ indicated through ACPI tables by the BIOS.
 Detailed specification of the HPET is beyond the current scope of this
 document, as it is also very well documented elsewhere.

-2.5) Offboard Timers
+2.5. Offboard Timers
+--------------------

 Several cards, both proprietary (watchdog boards) and commonplace (e1000) have
 timing chips built into the cards which may have registers which are accessible
@ -307,9 +321,8 @@ general frowned upon as not playing by the agreed rules of the game.  Such a
 timer device would require additional support to be virtualized properly and is
 not considered important at this time as no known operating system does this.

-=========================================================================
-
-3) TSC Hardware
+3. TSC Hardware
+===============

 The TSC or time stamp counter is relatively simple in theory; it counts
 instruction cycles issued by the processor, which can be used as a measure of
@ -340,7 +353,8 @@ allows the guest visible TSC to be offset by a constant.  Newer implementations
 promise to allow the TSC to additionally be scaled, but this hardware is not
 yet widely available.

-3.1) TSC synchronization
+3.1. TSC synchronization
+------------------------

 The TSC is a CPU-local clock in most implementations.  This means, on SMP
 platforms, the TSCs of different CPUs may start at different times depending
@ -357,7 +371,8 @@ practice, getting a perfectly synchronized TSC will not be possible unless all
 values are read from the same clock, which generally only is possible on single
 socket systems or those with special hardware support.

-3.2) TSC and CPU hotplug
+3.2. TSC and CPU hotplug
+------------------------

 As touched on already, CPUs which arrive later than the boot time of the system
 may not have a TSC value that is synchronized with the rest of the system.
@ -367,7 +382,8 @@ a guarantee.  This can have the effect of bringing a system from a state where
 TSC is synchronized back to a state where TSC synchronization flaws, however
 small, may be exposed to the OS and any virtualization environment.

-3.3) TSC and multi-socket / NUMA
+3.3. TSC and multi-socket / NUMA
+--------------------------------

 Multi-socket systems, especially large multi-socket systems are likely to have
 individual clocksources rather than a single, universally distributed clock.
@ -385,7 +401,8 @@ standards for telecommunications and computer equipment.
 It is recommended not to trust the TSCs to remain synchronized on NUMA or
 multiple socket systems for these reasons.

-3.4) TSC and C-states
+3.4. TSC and C-states
+---------------------

 C-states, or idling states of the processor, especially C1E and deeper sleep
 states may be problematic for TSC as well.  The TSC may stop advancing in such
@ -396,7 +413,8 @@ based on CPU and chipset identifications.
 The TSC in such a case may be corrected by catching it up to a known external
 clocksource.

-3.5) TSC frequency change / P-states
+3.5. TSC frequency change / P-states
+------------------------------------

 To make things slightly more interesting, some CPUs may change frequency.  They
 may or may not run the TSC at the same rate, and because the frequency change
@ -416,14 +434,16 @@ other processors.  In such cases, the TSC on halted CPUs could advance faster
 than that of non-halted processors.  AMD Turion processors are known to have
 this problem.

-3.6) TSC and STPCLK / T-states
+3.6. TSC and STPCLK / T-states
+------------------------------

 External signals given to the processor may also have the effect of stopping
 the TSC.  This is typically done for thermal emergency power control to prevent
 an overheating condition, and typically, there is no way to detect that this
 condition has happened.

-3.7) TSC virtualization - VMX
+3.7. TSC virtualization - VMX
+-----------------------------

 VMX provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
 instructions, which is enough for full virtualization of TSC in any manner.  In
@ -431,14 +451,16 @@ addition, VMX allows passing through the host TSC plus an additional TSC_OFFSET
 field specified in the VMCS.  Special instructions must be used to read and
 write the VMCS field.

-3.8) TSC virtualization - SVM
+3.8. TSC virtualization - SVM
+-----------------------------

 SVM provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
 instructions, which is enough for full virtualization of TSC in any manner.  In
 addition, SVM allows passing through the host TSC plus an additional offset
 field specified in the SVM control block.

-3.9) TSC feature bits in Linux
+3.9. TSC feature bits in Linux
+------------------------------

 In summary, there is no way to guarantee the TSC remains in perfect
 synchronization unless it is explicitly guaranteed by the architecture.  Even
@ -448,13 +470,16 @@ despite being locally consistent.
 The following feature bits are used by Linux to signal various TSC attributes,
 but they can only be taken to be meaningful for UP or single node systems.

-X86_FEATURE_TSC 		: The TSC is available in hardware
-X86_FEATURE_RDTSCP		: The RDTSCP instruction is available
-X86_FEATURE_CONSTANT_TSC 	: The TSC rate is unchanged with P-states
-X86_FEATURE_NONSTOP_TSC		: The TSC does not stop in C-states
-X86_FEATURE_TSC_RELIABLE	: TSC sync checks are skipped (VMware)
+=========================	=======================================
+X86_FEATURE_TSC			The TSC is available in hardware
+X86_FEATURE_RDTSCP		The RDTSCP instruction is available
+X86_FEATURE_CONSTANT_TSC	The TSC rate is unchanged with P-states
+X86_FEATURE_NONSTOP_TSC		The TSC does not stop in C-states
+X86_FEATURE_TSC_RELIABLE	TSC sync checks are skipped (VMware)
+=========================	=======================================

-4) Virtualization Problems
+4. Virtualization Problems
+==========================

 Timekeeping is especially problematic for virtualization because a number of
 challenges arise.  The most obvious problem is that time is now shared between
@ -473,7 +498,8 @@ BIOS, but not in such an extreme fashion.  However, the fact that SMM mode may
 cause similar problems to virtualization makes it a good justification for
 solving many of these problems on bare metal.

-4.1) Interrupt clocking
+4.1. Interrupt clocking
+-----------------------

 One of the most immediate problems that occurs with legacy operating systems
 is that the system timekeeping routines are often designed to keep track of
@ -502,7 +528,8 @@ thus requires interrupt slewing to keep proper time.  It does use a low enough
 rate (ed: is it 18.2 Hz?) however that it has not yet been a problem in
 practice.

-4.2) TSC sampling and serialization
+4.2. TSC sampling and serialization
+-----------------------------------

 As the highest precision time source available, the cycle counter of the CPU
 has aroused much interest from developers.  As explained above, this timer has
@ -524,7 +551,8 @@ it may be necessary for an implementation to guard against "backwards" reads of
 the TSC as seen from other CPUs, even in an otherwise perfectly synchronized
 system.

-4.3) Timespec aliasing
+4.3. Timespec aliasing
+----------------------

 Additionally, this lack of serialization from the TSC poses another challenge
 when using results of the TSC when measured against another time source.  As
@ -548,7 +576,8 @@ This aliasing requires care in the computation and recalibration of kvmclock
 and any other values derived from TSC computation (such as TSC virtualization
 itself).

-4.4) Migration
+4.4. Migration
+--------------

 Migration of a virtual machine raises problems for timekeeping in two ways.
 First, the migration itself may take time, during which interrupts cannot be
@ -566,7 +595,8 @@ always be caught up to the original rate.  KVM clock avoids these problems by
 simply storing multipliers and offsets against the TSC for the guest to convert
 back into nanosecond resolution values.

-4.5) Scheduling
+4.5. Scheduling
+---------------

 Since scheduling may be based on precise timing and firing of interrupts, the
 scheduling algorithms of an operating system may be adversely affected by
@ -579,7 +609,8 @@ In an attempt to work around this, several implementations have provided a
 paravirtualized scheduler clock, which reveals the true amount of CPU time for
 which a virtual machine has been running.

-4.6) Watchdogs
+4.6. Watchdogs
+--------------

 Watchdog timers, such as the lock detector in Linux may fire accidentally when
 running under hardware virtualization due to timer interrupts being delayed or
@ -587,7 +618,8 @@ misinterpretation of the passage of real time.  Usually, these warnings are
 spurious and can be ignored, but in some circumstances it may be necessary to
 disable such detection.

-4.7) Delays and precision timing
+4.7. Delays and precision timing
+--------------------------------

 Precise timing and delays may not be possible in a virtualized system.  This
 can happen if the system is controlling physical hardware, or issues delays to
@ -600,7 +632,8 @@ The second issue may cause performance problems, but this is unlikely to be a
 significant issue.  In many cases these delays may be eliminated through
 configuration or paravirtualization.

-4.8) Covert channels and leaks
+4.8. Covert channels and leaks
+------------------------------

 In addition to the above problems, time information will inevitably leak to the
 guest about the host in anything but a perfect implementation of virtualized
--- a/Documentation/virt/uml/UserModeLinux-HOWTO.txt
+++ b/Documentation/virt/uml/UserModeLinux-HOWTO.txt
--- a/28
+++ b/28
@ -2796,11 +2796,11 @@ F:	drivers/block/aoe/

 ATHEROS 71XX/9XXX GPIO DRIVER
 M:	Alban Bedel <albeu@free.fr>
+S:	Maintained
 W:	https://github.com/AlbanBedel/linux
 T:	git git://github.com/AlbanBedel/linux
-S:	Maintained
-F:	drivers/gpio/gpio-ath79.c
 F:	Documentation/devicetree/bindings/gpio/gpio-ath79.txt
+F:	drivers/gpio/gpio-ath79.c

 ATHEROS 71XX/9XXX USB PHY DRIVER
 M:	Alban Bedel <albeu@free.fr>
@ -3422,8 +3422,8 @@ BROADCOM BRCMSTB GPIO DRIVER
 M:	Gregory Fong <gregory.0xf0@gmail.com>
 L:	bcm-kernel-feedback-list@broadcom.com
 S:	Supported
-F:	drivers/gpio/gpio-brcmstb.c
 F:	Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
+F:	drivers/gpio/gpio-brcmstb.c

 BROADCOM BRCMSTB I2C DRIVER
 M:	Kamal Dasu <kdasu.kdev@gmail.com>
@ -3481,8 +3481,8 @@ BROADCOM KONA GPIO DRIVER
 M:	Ray Jui <rjui@broadcom.com>
 L:	bcm-kernel-feedback-list@broadcom.com
 S:	Supported
-F:	drivers/gpio/gpio-bcm-kona.c
 F:	Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
+F:	drivers/gpio/gpio-bcm-kona.c

 BROADCOM NETXTREME-E ROCE DRIVER
 M:	Selvin Xavier <selvin.xavier@broadcom.com>
@ -3597,8 +3597,8 @@ F:	sound/pci/bt87x.c

 BT8XXGPIO DRIVER
 M:	Michael Buesch <m@bues.ch>
-W:	http://bu3sch.de/btgpio.php
 S:	Maintained
+W:	http://bu3sch.de/btgpio.php
 F:	drivers/gpio/gpio-bt8xx.c

 BTRFS FILE SYSTEM
@ -7143,18 +7143,18 @@ GPIO SUBSYSTEM
 M:	Linus Walleij <linus.walleij@linaro.org>
 M:	Bartosz Golaszewski <bgolaszewski@baylibre.com>
 L:	linux-gpio@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
 S:	Maintained
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
+F:	Documentation/ABI/obsolete/sysfs-gpio
+F:	Documentation/ABI/testing/gpio-cdev
+F:	Documentation/admin-guide/gpio/
 F:	Documentation/devicetree/bindings/gpio/
 F:	Documentation/driver-api/gpio/
-F:	Documentation/admin-guide/gpio/
-F:	Documentation/ABI/testing/gpio-cdev
-F:	Documentation/ABI/obsolete/sysfs-gpio
 F:	drivers/gpio/
+F:	include/asm-generic/gpio.h
 F:	include/linux/gpio/
 F:	include/linux/gpio.h
 F:	include/linux/of_gpio.h
-F:	include/asm-generic/gpio.h
 F:	include/uapi/linux/gpio.h
 F:	tools/gpio/

@ -8055,8 +8055,8 @@ F:	drivers/scsi/ips.*
 ICH LPC AND GPIO DRIVER
 M:	Peter Tyser <ptyser@xes-inc.com>
 S:	Maintained
-F:	drivers/mfd/lpc_ich.c
 F:	drivers/gpio/gpio-ich.c
+F:	drivers/mfd/lpc_ich.c

 ICY I2C DRIVER
 M:	Max Staudt <max@enpas.org>
@ -16075,8 +16075,8 @@ F:	Documentation/devicetree/bindings/reset/snps,axs10x-reset.txt
 SYNOPSYS CREG GPIO DRIVER
 M:	Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
 S:	Maintained
-F:	drivers/gpio/gpio-creg-snps.c
 F:	Documentation/devicetree/bindings/gpio/snps,creg-gpio.txt
+F:	drivers/gpio/gpio-creg-snps.c

 SYNOPSYS DESIGNWARE 8250 UART DRIVER
 R:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
@ -16087,8 +16087,8 @@ SYNOPSYS DESIGNWARE APB GPIO DRIVER
 M:	Hoan Tran <hoan@os.amperecomputing.com>
 L:	linux-gpio@vger.kernel.org
 S:	Maintained
-F:	drivers/gpio/gpio-dwapb.c
 F:	Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
+F:	drivers/gpio/gpio-dwapb.c

 SYNOPSYS DESIGNWARE AXI DMAC DRIVER
 M:	Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
@ -18414,8 +18414,8 @@ M:	Nandor Han <nandor.han@ge.com>
 M:	Semi Malinen <semi.malinen@ge.com>
 L:	linux-gpio@vger.kernel.org
 S:	Maintained
-F:	drivers/gpio/gpio-xra1403.c
 F:	Documentation/devicetree/bindings/gpio/gpio-xra1403.txt
+F:	drivers/gpio/gpio-xra1403.c

 XTENSA XTFPGA PLATFORM SUPPORT
 M:	Max Filippov <jcmvbkbc@gmail.com>
--- a/2
+++ b/2
@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 6
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Kleptomaniac Octopus

 # *DOCUMENTATION*
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@ -78,13 +78,10 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old,
 {
 	unsigned long replaced;

-	if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
+	if (IS_ENABLED(CONFIG_THUMB2_KERNEL))
 		old = __opcode_to_mem_thumb32(old);
-		new = __opcode_to_mem_thumb32(new);
-	} else {
+	else
 		old = __opcode_to_mem_arm(old);
-		new = __opcode_to_mem_arm(new);
-	}

 	if (validate) {
 		if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE))
--- a/arch/arm/kernel/patch.c
+++ b/arch/arm/kernel/patch.c
@ -16,10 +16,10 @@ struct patch {
 	unsigned int insn;
 };

+#ifdef CONFIG_MMU
 static DEFINE_RAW_SPINLOCK(patch_lock);

 static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
-	__acquires(&patch_lock)
 {
 	unsigned int uintaddr = (uintptr_t) addr;
 	bool module = !core_kernel_text(uintaddr);
@ -34,8 +34,6 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)

 	if (flags)
 		raw_spin_lock_irqsave(&patch_lock, *flags);
-	else
-		__acquire(&patch_lock);

 	set_fixmap(fixmap, page_to_phys(page));

@ -43,15 +41,19 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
 }

 static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
-	__releases(&patch_lock)
 {
 	clear_fixmap(fixmap);

 	if (flags)
 		raw_spin_unlock_irqrestore(&patch_lock, *flags);
-	else
-		__release(&patch_lock);
 }
+#else
+static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
+{
+	return addr;
+}
+static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { }
+#endif

 void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
 {
@ -64,8 +66,6 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)

 	if (remap)
 		waddr = patch_map(addr, FIX_TEXT_POKE0, &flags);
-	else
-		__acquire(&patch_lock);

 	if (thumb2 && __opcode_is_thumb16(insn)) {
 		*(u16 *)waddr = __opcode_to_mem_thumb16(insn);
@ -102,8 +102,7 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
 	if (waddr != addr) {
 		flush_kernel_vmap_range(waddr, twopage ? size / 2 : size);
 		patch_unmap(FIX_TEXT_POKE0, &flags);
-	} else
-		__release(&patch_lock);
+	}

 	flush_icache_range((uintptr_t)(addr),
 			   (uintptr_t)(addr) + size);
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@ -33,7 +33,6 @@ static inline u32 disr_to_esr(u64 disr)

 asmlinkage void enter_from_user_mode(void);
 void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
-void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
 void do_undefinstr(struct pt_regs *regs);
 asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
 void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
@ -47,7 +46,4 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
 void do_cp15instr(unsigned int esr, struct pt_regs *regs);
 void do_el0_svc(struct pt_regs *regs);
 void do_el0_svc_compat(struct pt_regs *regs);
-void do_el0_ia_bp_hardening(unsigned long addr,  unsigned int esr,
-			    struct pt_regs *regs);
-
 #endif	/* __ASM_EXCEPTION_H */
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@ -18,6 +18,10 @@
 * See:
 * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net
 */
-#define vcpu_is_preempted(cpu)	false
+#define vcpu_is_preempted vcpu_is_preempted
+static inline bool vcpu_is_preempted(int cpu)
+{
+	return false;
+}

 #endif /* __ASM_SPINLOCK_H */
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/types.h>

+#include <asm/archrandom.h>
 #include <asm/cacheflush.h>
 #include <asm/fixmap.h>
 #include <asm/kernel-pgtable.h>
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@ -466,6 +466,13 @@ static void ssbs_thread_switch(struct task_struct *next)
 	if (unlikely(next->flags & PF_KTHREAD))
 		return;

+	/*
+	 * If all CPUs implement the SSBS extension, then we just need to
+	 * context-switch the PSTATE field.
+	 */
+	if (cpu_have_feature(cpu_feature(SSBS)))
+		return;
+
 	/* If the mitigation is enabled, then we leave SSBS clear. */
 	if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
 	    test_tsk_thread_flag(next, TIF_SSBD))
@ -608,8 +615,6 @@ long get_tagged_addr_ctrl(void)
 * only prevents the tagged address ABI enabling via prctl() and does not
 * disable it for tasks that already opted in to the relaxed ABI.
 */
-static int zero;
-static int one = 1;

 static struct ctl_table tagged_addr_sysctl_table[] = {
 	{
@ -618,8 +623,8 @@ static struct ctl_table tagged_addr_sysctl_table[] = {
 		.data		= &tagged_addr_disabled,
 		.maxlen		= sizeof(int),
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &one,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
 	},
 	{ }
 };
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@ -23,7 +23,7 @@
 #include <linux/irq.h>
 #include <linux/delay.h>
 #include <linux/clocksource.h>
-#include <linux/clk-provider.h>
+#include <linux/of_clk.h>
 #include <linux/acpi.h>

 #include <clocksource/arm_arch_timer.h>
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@ -15,7 +15,8 @@ void uv_query_info(void)
 	if (!test_facility(158))
 		return;

-	if (uv_call(0, (uint64_t)&uvcb))
+	/* rc==0x100 means that there is additional data we do not process */
+	if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)
 		return;

 	if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@ -155,7 +155,7 @@ static inline void get_tod_clock_ext(char *clk)

 static inline unsigned long long get_tod_clock(void)
 {
-	unsigned char clk[STORE_CLOCK_EXT_SIZE];
+	char clk[STORE_CLOCK_EXT_SIZE];

 	get_tod_clock_ext(clk);
 	return *((unsigned long long *)&clk[1]);
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@ -250,6 +250,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
 	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
 	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
 	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0xff60,
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x0964,
 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
 	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x0287,
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@ -4765,6 +4765,7 @@ __init int intel_pmu_init(void)
 		break;

 	case INTEL_FAM6_ATOM_TREMONT_D:
+	case INTEL_FAM6_ATOM_TREMONT:
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@ -40,17 +40,18 @@
 * Model specific counters:
 *	MSR_CORE_C1_RES: CORE C1 Residency Counter
 *			 perf code: 0x00
- *			 Available model: SLM,AMT,GLM,CNL
+ *			 Available model: SLM,AMT,GLM,CNL,TNT
 *			 Scope: Core (each processor core has a MSR)
 *	MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
 *			       perf code: 0x01
 *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM,
- *						CNL,KBL,CML
+ *						CNL,KBL,CML,TNT
 *			       Scope: Core
 *	MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
 *			       perf code: 0x02
 *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
- *						SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
+ *						SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL,
+ *						TNT
 *			       Scope: Core
 *	MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
 *			       perf code: 0x03
@ -60,17 +61,18 @@
 *	MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
 *			       perf code: 0x00
 *			       Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
- *						KBL,CML,ICL,TGL
+ *						KBL,CML,ICL,TGL,TNT
 *			       Scope: Package (physical package)
 *	MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
 *			       perf code: 0x01
 *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
- *						GLM,CNL,KBL,CML,ICL,TGL
+ *						GLM,CNL,KBL,CML,ICL,TGL,TNT
 *			       Scope: Package (physical package)
 *	MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
 *			       perf code: 0x02
- *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
- *						SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
+ *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
+ *						SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL,
+ *						TNT
 *			       Scope: Package (physical package)
 *	MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
 *			       perf code: 0x03
@ -87,7 +89,8 @@
 *			       Scope: Package (physical package)
 *	MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
 *			       perf code: 0x06
- *			       Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL
+ *			       Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
+ *						TNT
 *			       Scope: Package (physical package)
 *
 */
@ -640,8 +643,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {

 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT,   glm_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_D, glm_cstates),
-
 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT_D, glm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT, glm_cstates),

 	X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE,   icl_cstates),
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@ -1714,6 +1714,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
 	old = ((s64)(prev_raw_count << shift) >> shift);
 	local64_add(new - old + count * period, &event->count);

+	local64_set(&hwc->period_left, -new);
+
 	perf_event_update_userpage(event);

 	return 0;
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@ -75,8 +75,9 @@ static bool test_intel(int idx, void *data)

 	case INTEL_FAM6_ATOM_GOLDMONT:
 	case INTEL_FAM6_ATOM_GOLDMONT_D:
-
 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+	case INTEL_FAM6_ATOM_TREMONT_D:
+	case INTEL_FAM6_ATOM_TREMONT:

 	case INTEL_FAM6_XEON_PHI_KNL:
 	case INTEL_FAM6_XEON_PHI_KNM:
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@ -781,9 +781,19 @@ struct kvm_vcpu_arch {
 	u64 msr_kvm_poll_control;

 	/*
-	 * Indicate whether the access faults on its page table in guest
-	 * which is set when fix page fault and used to detect unhandeable
-	 * instruction.
+	 * Indicates the guest is trying to write a gfn that contains one or
+	 * more of the PTEs used to translate the write itself, i.e. the access
+	 * is changing its own translation in the guest page tables.  KVM exits
+	 * to userspace if emulation of the faulting instruction fails and this
+	 * flag is set, as KVM cannot make forward progress.
+	 *
+	 * If emulation fails for a write to guest page tables, KVM unprotects
+	 * (zaps) the shadow page for the target gfn and resumes the guest to
+	 * retry the non-emulatable instruction (on hardware).  Unprotecting the
+	 * gfn doesn't allow forward progress for a self-changing access because
+	 * doing so also zaps the translation for the gfn, i.e. retrying the
+	 * instruction will hit a !PRESENT fault, which results in a new shadow
+	 * page and sends KVM back to square one.
 	 */
 	bool write_fault_to_shadow_pgtable;

--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@ -1080,9 +1080,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			result = 1;
 			/* assumes that there are only KVM_APIC_INIT/SIPI */
 			apic->pending_events = (1UL << KVM_APIC_INIT);
-			/* make sure pending_events is visible before sending
-			 * the request */
-			smp_wmb();
 			kvm_make_request(KVM_REQ_EVENT, vcpu);
 			kvm_vcpu_kick(vcpu);
 		}
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@ -102,6 +102,19 @@ static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu)
 					      kvm_get_active_pcid(vcpu));
 }

+int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+		       bool prefault);
+
+static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+					u32 err, bool prefault)
+{
+#ifdef CONFIG_RETPOLINE
+	if (likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault))
+		return kvm_tdp_page_fault(vcpu, cr2_or_gpa, err, prefault);
+#endif
+	return vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, err, prefault);
+}
+
 /*
 * Currently, we have two sorts of write-protection, a) the first one
 * write-protects guest page to sync the guest modification, b) another one is
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@ -4219,8 +4219,8 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
 }
 EXPORT_SYMBOL_GPL(kvm_handle_page_fault);

-static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
-			  bool prefault)
+int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+		       bool prefault)
 {
 	int max_level;

@ -4925,7 +4925,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 		return;

 	context->mmu_role.as_u64 = new_role.as_u64;
-	context->page_fault = tdp_page_fault;
+	context->page_fault = kvm_tdp_page_fault;
 	context->sync_page = nonpaging_sync_page;
 	context->invlpg = nonpaging_invlpg;
 	context->update_pte = nonpaging_update_pte;
@ -5436,9 +5436,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
 	}

 	if (r == RET_PF_INVALID) {
-		r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa,
-					       lower_32_bits(error_code),
-					       false);
+		r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
+					  lower_32_bits(error_code), false);
 		WARN_ON(r == RET_PF_INVALID);
 	}

--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@ -33,7 +33,7 @@
 	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
 	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
 	#ifdef CONFIG_X86_64
-	#define PT_MAX_FULL_LEVELS 4
+	#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
 	#define CMPXCHG cmpxchg
 	#else
 	#define CMPXCHG cmpxchg64
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@ -2175,7 +2175,6 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	u32 dummy;
 	u32 eax = 1;

-	vcpu->arch.microcode_version = 0x01000065;
 	svm->spec_ctrl = 0;
 	svm->virt_spec_ctrl = 0;

@ -2266,6 +2265,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 	init_vmcb(svm);

 	svm_init_osvw(vcpu);
+	vcpu->arch.microcode_version = 0x01000065;

 	return 0;

--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@ -544,7 +544,8 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
 	}
 }

-static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) {
+static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
+{
 	int msr;

 	for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
@ -1981,7 +1982,7 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
 	}

 	/*
-	 * Clean fields data can't de used on VMLAUNCH and when we switch
+	 * Clean fields data can't be used on VMLAUNCH and when we switch
 	 * between different L2 guests as KVM keeps a single VMCS12 per L1.
 	 */
 	if (from_launch || evmcs_gpa_changed)
@ -3575,6 +3576,33 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
 	nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
 }

+/*
+ * Returns true if a debug trap is pending delivery.
+ *
+ * In KVM, debug traps bear an exception payload. As such, the class of a #DB
+ * exception may be inferred from the presence of an exception payload.
+ */
+static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.exception.pending &&
+			vcpu->arch.exception.nr == DB_VECTOR &&
+			vcpu->arch.exception.payload;
+}
+
+/*
+ * Certain VM-exits set the 'pending debug exceptions' field to indicate a
+ * recognized #DB (data or single-step) that has yet to be delivered. Since KVM
+ * represents these debug traps with a payload that is said to be compatible
+ * with the 'pending debug exceptions' field, write the payload to the VMCS
+ * field if a VM-exit is delivered before the debug trap.
+ */
+static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
+{
+	if (vmx_pending_dbg_trap(vcpu))
+		vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+			    vcpu->arch.exception.payload);
+}
+
 static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@ -3587,6 +3615,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 		test_bit(KVM_APIC_INIT, &apic->pending_events)) {
 		if (block_nested_events)
 			return -EBUSY;
+		nested_vmx_update_pending_dbg(vcpu);
 		clear_bit(KVM_APIC_INIT, &apic->pending_events);
 		nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
 		return 0;
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@ -2947,6 +2947,9 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)

 static int get_ept_level(struct kvm_vcpu *vcpu)
 {
+	/* Nested EPT currently only supports 4-level walks. */
+	if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
+		return 4;
 	if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
 		return 5;
 	return 4;
@ -4238,7 +4241,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)

 	vmx->msr_ia32_umwait_control = 0;

-	vcpu->arch.microcode_version = 0x100000000ULL;
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	vmx->hv_deadline_tsc = -1;
 	kvm_set_cr8(vcpu, 0);
@ -6763,6 +6765,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 	vmx->nested.posted_intr_nv = -1;
 	vmx->nested.current_vmptr = -1ull;

+	vcpu->arch.microcode_version = 0x100000000ULL;
 	vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;

 	/*
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@ -438,6 +438,14 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
 		 * for #DB exceptions under VMX.
 		 */
 		vcpu->arch.dr6 ^= payload & DR6_RTM;
+
+		/*
+		 * The #DB payload is defined as compatible with the 'pending
+		 * debug exceptions' field under VMX, not DR6. While bit 12 is
+		 * defined in the 'pending debug exceptions' field (enabled
+		 * breakpoint), it is reserved and must be zero in DR6.
+		 */
+		vcpu->arch.dr6 &= ~BIT(12);
 		break;
 	case PF_VECTOR:
 		vcpu->arch.cr2 = payload;
@ -490,19 +498,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 		vcpu->arch.exception.error_code = error_code;
 		vcpu->arch.exception.has_payload = has_payload;
 		vcpu->arch.exception.payload = payload;
-		/*
-		 * In guest mode, payload delivery should be deferred,
-		 * so that the L1 hypervisor can intercept #PF before
-		 * CR2 is modified (or intercept #DB before DR6 is
-		 * modified under nVMX).  However, for ABI
-		 * compatibility with KVM_GET_VCPU_EVENTS and
-		 * KVM_SET_VCPU_EVENTS, we can't delay payload
-		 * delivery unless userspace has enabled this
-		 * functionality via the per-VM capability,
-		 * KVM_CAP_EXCEPTION_PAYLOAD.
-		 */
-		if (!vcpu->kvm->arch.exception_payload_enabled ||
-		    !is_guest_mode(vcpu))
+		if (!is_guest_mode(vcpu))
 			kvm_deliver_exception_payload(vcpu);
 		return;
 	}
@ -2448,7 +2444,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
 	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
 	vcpu->last_guest_tsc = tsc_timestamp;
-	WARN_ON(vcpu->hv_clock.system_time < 0);
+	WARN_ON((s64)vcpu->hv_clock.system_time < 0);

 	/* If the host uses TSC clocksource, then it is stable */
 	pvclock_flags = 0;
@ -3795,6 +3791,21 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 {
 	process_nmi(vcpu);

+	/*
+	 * In guest mode, payload delivery should be deferred,
+	 * so that the L1 hypervisor can intercept #PF before
+	 * CR2 is modified (or intercept #DB before DR6 is
+	 * modified under nVMX). Unless the per-VM capability,
+	 * KVM_CAP_EXCEPTION_PAYLOAD, is set, we may not defer the delivery of
+	 * an exception payload and handle after a KVM_GET_VCPU_EVENTS. Since we
+	 * opportunistically defer the exception payload, deliver it if the
+	 * capability hasn't been requested before processing a
+	 * KVM_GET_VCPU_EVENTS.
+	 */
+	if (!vcpu->kvm->arch.exception_payload_enabled &&
+	    vcpu->arch.exception.pending && vcpu->arch.exception.has_payload)
+		kvm_deliver_exception_payload(vcpu);
+
 	/*
 	 * The API doesn't provide the instruction length for software
 	 * exceptions, so don't report them. As long as the guest RIP
@ -8942,7 +8953,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,

 	kvm_rip_write(vcpu, ctxt->eip);
 	kvm_set_rflags(vcpu, ctxt->eflags);
-	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	return 1;
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
@ -10182,7 +10192,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 	      work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
 		return;

-	vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true);
+	kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
 }

 static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@ -136,8 +136,6 @@ config CRYPTO_USER
 	  Userspace configuration for cryptographic instantiations such as
 	  cbc(aes).

-if CRYPTO_MANAGER2
-
 config CRYPTO_MANAGER_DISABLE_TESTS
 	bool "Disable run-time self tests"
 	default y
@ -155,8 +153,6 @@ config CRYPTO_MANAGER_EXTRA_TESTS
 	  This is intended for developer use only, as these tests take much
 	  longer to run than the normal self tests.

-endif	# if CRYPTO_MANAGER2
-
 config CRYPTO_GF128MUL
 	tristate

--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@ -4436,6 +4436,15 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.cipher = __VECS(tf_cbc_tv_template)
 		},
 	}, {
+#if IS_ENABLED(CONFIG_CRYPTO_PAES_S390)
+		.alg = "cbc-paes-s390",
+		.fips_allowed = 1,
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(aes_cbc_tv_template)
+		}
+	}, {
+#endif
 		.alg = "cbcmac(aes)",
 		.fips_allowed = 1,
 		.test = alg_test_hash,
@ -4587,6 +4596,15 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.cipher = __VECS(tf_ctr_tv_template)
 		}
 	}, {
+#if IS_ENABLED(CONFIG_CRYPTO_PAES_S390)
+		.alg = "ctr-paes-s390",
+		.fips_allowed = 1,
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(aes_ctr_tv_template)
+		}
+	}, {
+#endif
 		.alg = "cts(cbc(aes))",
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
@ -4879,6 +4897,15 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.cipher = __VECS(xtea_tv_template)
 		}
 	}, {
+#if IS_ENABLED(CONFIG_CRYPTO_PAES_S390)
+		.alg = "ecb-paes-s390",
+		.fips_allowed = 1,
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(aes_tv_template)
+		}
+	}, {
+#endif
 		.alg = "ecdh",
 		.test = alg_test_kpp,
 		.fips_allowed = 1,
@ -5465,6 +5492,15 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.cipher = __VECS(tf_xts_tv_template)
 		}
 	}, {
+#if IS_ENABLED(CONFIG_CRYPTO_PAES_S390)
+		.alg = "xts-paes-s390",
+		.fips_allowed = 1,
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(aes_xts_tv_template)
+		}
+	}, {
+#endif
 		.alg = "xts4096(paes)",
 		.test = alg_test_null,
 		.fips_allowed = 1,
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@ -101,6 +101,8 @@ acpi_status acpi_hw_enable_all_runtime_gpes(void);

 acpi_status acpi_hw_enable_all_wakeup_gpes(void);

+u8 acpi_hw_check_all_gpes(void);
+
 acpi_status
 acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 				 struct acpi_gpe_block_info *gpe_block,
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@ -795,6 +795,38 @@ acpi_status acpi_enable_all_wakeup_gpes(void)

 ACPI_EXPORT_SYMBOL(acpi_enable_all_wakeup_gpes)

+/******************************************************************************
+ *
+ * FUNCTION:    acpi_any_gpe_status_set
+ *
+ * PARAMETERS:  None
+ *
+ * RETURN:      Whether or not the status bit is set for any GPE
+ *
+ * DESCRIPTION: Check the status bits of all enabled GPEs and return TRUE if any
+ *              of them is set or FALSE otherwise.
+ *
+ ******************************************************************************/
+u32 acpi_any_gpe_status_set(void)
+{
+	acpi_status status;
+	u8 ret;
+
+	ACPI_FUNCTION_TRACE(acpi_any_gpe_status_set);
+
+	status = acpi_ut_acquire_mutex(ACPI_MTX_EVENTS);
+	if (ACPI_FAILURE(status)) {
+		return (FALSE);
+	}
+
+	ret = acpi_hw_check_all_gpes();
+	(void)acpi_ut_release_mutex(ACPI_MTX_EVENTS);
+
+	return (ret);
+}
+
+ACPI_EXPORT_SYMBOL(acpi_any_gpe_status_set)
+
 /*******************************************************************************
 *
 * FUNCTION:    acpi_install_gpe_block
--- a/drivers/acpi/acpica/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c
@ -444,6 +444,53 @@ acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 	return (AE_OK);
 }

+/******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_get_gpe_block_status
+ *
+ * PARAMETERS:  gpe_xrupt_info      - GPE Interrupt info
+ *              gpe_block           - Gpe Block info
+ *
+ * RETURN:      Success
+ *
+ * DESCRIPTION: Produce a combined GPE status bits mask for the given block.
+ *
+ ******************************************************************************/
+
+static acpi_status
+acpi_hw_get_gpe_block_status(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
+			     struct acpi_gpe_block_info *gpe_block,
+			     void *ret_ptr)
+{
+	struct acpi_gpe_register_info *gpe_register_info;
+	u64 in_enable, in_status;
+	acpi_status status;
+	u8 *ret = ret_ptr;
+	u32 i;
+
+	/* Examine each GPE Register within the block */
+
+	for (i = 0; i < gpe_block->register_count; i++) {
+		gpe_register_info = &gpe_block->register_info[i];
+
+		status = acpi_hw_read(&in_enable,
+				      &gpe_register_info->enable_address);
+		if (ACPI_FAILURE(status)) {
+			continue;
+		}
+
+		status = acpi_hw_read(&in_status,
+				      &gpe_register_info->status_address);
+		if (ACPI_FAILURE(status)) {
+			continue;
+		}
+
+		*ret |= in_enable & in_status;
+	}
+
+	return (AE_OK);
+}
+
 /******************************************************************************
 *
 * FUNCTION:    acpi_hw_disable_all_gpes
@ -510,4 +557,28 @@ acpi_status acpi_hw_enable_all_wakeup_gpes(void)
 	return_ACPI_STATUS(status);
 }

+/******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_check_all_gpes
+ *
+ * PARAMETERS:  None
+ *
+ * RETURN:      Combined status of all GPEs
+ *
+ * DESCRIPTION: Check all enabled GPEs in all GPE blocks and return TRUE if the
+ *              status bit is set for at least one of them of FALSE otherwise.
+ *
+ ******************************************************************************/
+
+u8 acpi_hw_check_all_gpes(void)
+{
+	u8 ret = 0;
+
+	ACPI_FUNCTION_TRACE(acpi_hw_check_all_gpes);
+
+	(void)acpi_ev_walk_gpe_list(acpi_hw_get_gpe_block_status, &ret);
+
+	return (ret != 0);
+}
+
 #endif				/* !ACPI_REDUCED_HARDWARE */
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@ -179,6 +179,7 @@ EXPORT_SYMBOL(first_ec);

 static struct acpi_ec *boot_ec;
 static bool boot_ec_is_ecdt = false;
+static struct workqueue_struct *ec_wq;
 static struct workqueue_struct *ec_query_wq;

 static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */
@ -469,7 +470,7 @@ static void acpi_ec_submit_query(struct acpi_ec *ec)
 		ec_dbg_evt("Command(%s) submitted/blocked",
 			   acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY));
 		ec->nr_pending_queries++;
-		schedule_work(&ec->work);
+		queue_work(ec_wq, &ec->work);
 	}
 }

@ -535,7 +536,7 @@ static void acpi_ec_enable_event(struct acpi_ec *ec)
 #ifdef CONFIG_PM_SLEEP
 static void __acpi_ec_flush_work(void)
 {
-	flush_scheduled_work(); /* flush ec->work */
+	drain_workqueue(ec_wq); /* flush ec->work */
 	flush_workqueue(ec_query_wq); /* flush queries */
 }

@ -556,8 +557,8 @@ static void acpi_ec_disable_event(struct acpi_ec *ec)

 void acpi_ec_flush_work(void)
 {
-	/* Without ec_query_wq there is nothing to flush. */
-	if (!ec_query_wq)
+	/* Without ec_wq there is nothing to flush. */
+	if (!ec_wq)
 		return;

 	__acpi_ec_flush_work();
@ -2107,25 +2108,33 @@ static struct acpi_driver acpi_ec_driver = {
 	.drv.pm = &acpi_ec_pm,
 };

-static inline int acpi_ec_query_init(void)
+static void acpi_ec_destroy_workqueues(void)
 {
-	if (!ec_query_wq) {
-		ec_query_wq = alloc_workqueue("kec_query", 0,
-					      ec_max_queries);
-		if (!ec_query_wq)
-			return -ENODEV;
+	if (ec_wq) {
+		destroy_workqueue(ec_wq);
+		ec_wq = NULL;
 	}
-	return 0;
-}
-
-static inline void acpi_ec_query_exit(void)
-{
 	if (ec_query_wq) {
 		destroy_workqueue(ec_query_wq);
 		ec_query_wq = NULL;
 	}
 }

+static int acpi_ec_init_workqueues(void)
+{
+	if (!ec_wq)
+		ec_wq = alloc_ordered_workqueue("kec", 0);
+
+	if (!ec_query_wq)
+		ec_query_wq = alloc_workqueue("kec_query", 0, ec_max_queries);
+
+	if (!ec_wq || !ec_query_wq) {
+		acpi_ec_destroy_workqueues();
+		return -ENODEV;
+	}
+	return 0;
+}
+
 static const struct dmi_system_id acpi_ec_no_wakeup[] = {
 	{
 		.ident = "Thinkpad X1 Carbon 6th",
@ -2156,8 +2165,7 @@ int __init acpi_ec_init(void)
 	int result;
 	int ecdt_fail, dsdt_fail;

-	/* register workqueue for _Qxx evaluations */
-	result = acpi_ec_query_init();
+	result = acpi_ec_init_workqueues();
 	if (result)
 		return result;

@ -2188,6 +2196,6 @@ static void __exit acpi_ec_exit(void)
 {

 	acpi_bus_unregister_driver(&acpi_ec_driver);
-	acpi_ec_query_exit();
+	acpi_ec_destroy_workqueues();
 }
 #endif	/* 0 */
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@ -990,21 +990,34 @@ static void acpi_s2idle_sync(void)
 	acpi_os_wait_events_complete(); /* synchronize Notify handling */
 }

-static void acpi_s2idle_wake(void)
+static bool acpi_s2idle_wake(void)
 {
-	/*
-	 * If IRQD_WAKEUP_ARMED is set for the SCI at this point, the SCI has
-	 * not triggered while suspended, so bail out.
-	 */
-	if (!acpi_sci_irq_valid() ||
-	    irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq)))
-		return;
+	if (!acpi_sci_irq_valid())
+		return pm_wakeup_pending();
+
+	while (pm_wakeup_pending()) {
+		/*
+		 * If IRQD_WAKEUP_ARMED is set for the SCI at this point, the
+		 * SCI has not triggered while suspended, so bail out (the
+		 * wakeup is pending anyway and the SCI is not the source of
+		 * it).
+		 */
+		if (irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq)))
+			return true;
+
+		/*
+		 * If there are no EC events to process and at least one of the
+		 * other enabled GPEs is active, the wakeup is regarded as a
+		 * genuine one.
+		 *
+		 * Note that the checks below must be carried out in this order
+		 * to avoid returning prematurely due to a change of the EC GPE
+		 * status bit from unset to set between the checks with the
+		 * status bits of all the other GPEs unset.
+		 */
+		if (acpi_any_gpe_status_set() && !acpi_ec_dispatch_gpe())
+			return true;

-	/*
-	 * If there are EC events to process, the wakeup may be a spurious one
-	 * coming from the EC.
-	 */
-	if (acpi_ec_dispatch_gpe()) {
 		/*
 		 * Cancel the wakeup and process all pending events in case
 		 * there are any wakeup ones in there.
@ -1017,8 +1030,19 @@ static void acpi_s2idle_wake(void)

 		acpi_s2idle_sync();

+		/*
+		 * The SCI is in the "suspended" state now and it cannot produce
+		 * new wakeup events till the rearming below, so if any of them
+		 * are pending here, they must be resulting from the processing
+		 * of EC events above or coming from somewhere else.
+		 */
+		if (pm_wakeup_pending())
+			return true;
+
 		rearm_wake_irq(acpi_sci_irq);
 	}
+
+	return false;
 }

 static void acpi_s2idle_restore_early(void)
--- a/drivers/char/ipmi/ipmb_dev_int.c
+++ b/drivers/char/ipmi/ipmb_dev_int.c
@ -19,7 +19,7 @@
 #include <linux/spinlock.h>
 #include <linux/wait.h>

-#define MAX_MSG_LEN		128
+#define MAX_MSG_LEN		240
 #define IPMB_REQUEST_LEN_MIN	7
 #define NETFN_RSP_BIT_MASK	0x4
 #define REQUEST_QUEUE_MAX_LEN	256
@ -63,6 +63,7 @@ struct ipmb_dev {
 	spinlock_t lock;
 	wait_queue_head_t wait_queue;
 	struct mutex file_mutex;
+	bool is_i2c_protocol;
 };

 static inline struct ipmb_dev *to_ipmb_dev(struct file *file)
@ -112,6 +113,25 @@ static ssize_t ipmb_read(struct file *file, char __user *buf, size_t count,
 	return ret < 0 ? ret : count;
 }

+static int ipmb_i2c_write(struct i2c_client *client, u8 *msg, u8 addr)
+{
+	struct i2c_msg i2c_msg;
+
+	/*
+	 * subtract 1 byte (rq_sa) from the length of the msg passed to
+	 * raw i2c_transfer
+	 */
+	i2c_msg.len = msg[IPMB_MSG_LEN_IDX] - 1;
+
+	/* Assign message to buffer except first 2 bytes (length and address) */
+	i2c_msg.buf = msg + 2;
+
+	i2c_msg.addr = addr;
+	i2c_msg.flags = client->flags & I2C_CLIENT_PEC;
+
+	return i2c_transfer(client->adapter, &i2c_msg, 1);
+}
+
 static ssize_t ipmb_write(struct file *file, const char __user *buf,
 			size_t count, loff_t *ppos)
 {
@ -133,6 +153,12 @@ static ssize_t ipmb_write(struct file *file, const char __user *buf,
 	rq_sa = GET_7BIT_ADDR(msg[RQ_SA_8BIT_IDX]);
 	netf_rq_lun = msg[NETFN_LUN_IDX];

+	/* Check i2c block transfer vs smbus */
+	if (ipmb_dev->is_i2c_protocol) {
+		ret = ipmb_i2c_write(ipmb_dev->client, msg, rq_sa);
+		return (ret == 1) ? count : ret;
+	}
+
 	/*
 	 * subtract rq_sa and netf_rq_lun from the length of the msg passed to
 	 * i2c_smbus_xfer
@ -253,7 +279,7 @@ static int ipmb_slave_cb(struct i2c_client *client,
 		break;

 	case I2C_SLAVE_WRITE_RECEIVED:
-		if (ipmb_dev->msg_idx >= sizeof(struct ipmb_msg))
+		if (ipmb_dev->msg_idx >= sizeof(struct ipmb_msg) - 1)
 			break;

 		buf[++ipmb_dev->msg_idx] = *val;
@ -302,6 +328,9 @@ static int ipmb_probe(struct i2c_client *client,
 	if (ret)
 		return ret;

+	ipmb_dev->is_i2c_protocol
+		= device_property_read_bool(&client->dev, "i2c-protocol");
+
 	ipmb_dev->client = client;
 	i2c_set_clientdata(client, ipmb_dev);
 	ret = i2c_slave_register(client, ipmb_slave_cb);
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@ -775,10 +775,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
 	flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
 	msg = ssif_info->curr_msg;
 	if (msg) {
+		if (data) {
+			if (len > IPMI_MAX_MSG_LENGTH)
+				len = IPMI_MAX_MSG_LENGTH;
+			memcpy(msg->rsp, data, len);
+		} else {
+			len = 0;
+		}
 		msg->rsp_size = len;
-		if (msg->rsp_size > IPMI_MAX_MSG_LENGTH)
-			msg->rsp_size = IPMI_MAX_MSG_LENGTH;
-		memcpy(msg->rsp, data, msg->rsp_size);
 		ssif_info->curr_msg = NULL;
 	}

--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@ -105,6 +105,8 @@ bool have_governor_per_policy(void)
 }
 EXPORT_SYMBOL_GPL(have_governor_per_policy);

+static struct kobject *cpufreq_global_kobject;
+
 struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
 {
 	if (have_governor_per_policy())
@ -2745,9 +2747,6 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
 }
 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);

-struct kobject *cpufreq_global_kobject;
-EXPORT_SYMBOL(cpufreq_global_kobject);
-
 static int __init cpufreq_core_init(void)
 {
 	if (cpufreq_disabled())
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@ -61,7 +61,7 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
 {
 	if (!blk_queue_dax(bdev->bd_queue))
 		return NULL;
-	return fs_dax_get_by_host(bdev->bd_disk->disk_name);
+	return dax_get_by_host(bdev->bd_disk->disk_name);
 }
 EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
 #endif
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@ -505,16 +505,10 @@ void edac_mc_free(struct mem_ctl_info *mci)
 {
 	edac_dbg(1, "\n");

-	/* If we're not yet registered with sysfs free only what was allocated
-	 * in edac_mc_alloc().
-	 */
-	if (!device_is_registered(&mci->dev)) {
-		_edac_mc_free(mci);
-		return;
-	}
+	if (device_is_registered(&mci->dev))
+		edac_unregister_sysfs(mci);

-	/* the mci instance is freed here, when the sysfs object is dropped */
-	edac_unregister_sysfs(mci);
+	_edac_mc_free(mci);
 }
 EXPORT_SYMBOL_GPL(edac_mc_free);

--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@ -276,10 +276,7 @@ static const struct attribute_group *csrow_attr_groups[] = {

 static void csrow_attr_release(struct device *dev)
 {
-	struct csrow_info *csrow = container_of(dev, struct csrow_info, dev);
-
-	edac_dbg(1, "device %s released\n", dev_name(dev));
-	kfree(csrow);
+	/* release device with _edac_mc_free() */
 }

 static const struct device_type csrow_attr_type = {
@ -447,8 +444,7 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci)
 		csrow = mci->csrows[i];
 		if (!nr_pages_per_csrow(csrow))
 			continue;
-
-		device_del(&mci->csrows[i]->dev);
+		device_unregister(&mci->csrows[i]->dev);
 	}

 	return err;
@ -608,10 +604,7 @@ static const struct attribute_group *dimm_attr_groups[] = {

 static void dimm_attr_release(struct device *dev)
 {
-	struct dimm_info *dimm = container_of(dev, struct dimm_info, dev);
-
-	edac_dbg(1, "device %s released\n", dev_name(dev));
-	kfree(dimm);
+	/* release device with _edac_mc_free() */
 }

 static const struct device_type dimm_attr_type = {
@ -893,10 +886,7 @@ static const struct attribute_group *mci_attr_groups[] = {

 static void mci_attr_release(struct device *dev)
 {
-	struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
-
-	edac_dbg(1, "device %s released\n", dev_name(dev));
-	kfree(mci);
+	/* release device with _edac_mc_free() */
 }

 static const struct device_type mci_attr_type = {
--- a/drivers/gpio/gpio-bd71828.c
+++ b/drivers/gpio/gpio-bd71828.c
@ -10,16 +10,6 @@
 #define GPIO_OUT_REG(off) (BD71828_REG_GPIO_CTRL1 + (off))
 #define HALL_GPIO_OFFSET 3

-/*
- * These defines can be removed when
- * "gpio: Add definition for GPIO direction"
- * (9208b1e77d6e8e9776f34f46ef4079ecac9c3c25 in GPIO tree) gets merged,
- */
-#ifndef GPIO_LINE_DIRECTION_IN
-	#define GPIO_LINE_DIRECTION_IN 1
-	#define GPIO_LINE_DIRECTION_OUT 0
-#endif
-
 struct bd71828_gpio {
 	struct rohm_regmap_dev chip;
 	struct gpio_chip gpio;
--- a/drivers/gpio/gpio-sifive.c
+++ b/drivers/gpio/gpio-sifive.c
@ -35,7 +35,7 @@ struct sifive_gpio {
 	void __iomem		*base;
 	struct gpio_chip	gc;
 	struct regmap		*regs;
-	u32			irq_state;
+	unsigned long		irq_state;
 	unsigned int		trigger[SIFIVE_GPIO_MAX];
 	unsigned int		irq_parent[SIFIVE_GPIO_MAX];
 };
@ -94,7 +94,7 @@ static void sifive_gpio_irq_enable(struct irq_data *d)
 	spin_unlock_irqrestore(&gc->bgpio_lock, flags);

 	/* Enable interrupts */
-	assign_bit(offset, (unsigned long *)&chip->irq_state, 1);
+	assign_bit(offset, &chip->irq_state, 1);
 	sifive_gpio_set_ie(chip, offset);
 }

@ -104,7 +104,7 @@ static void sifive_gpio_irq_disable(struct irq_data *d)
 	struct sifive_gpio *chip = gpiochip_get_data(gc);
 	int offset = irqd_to_hwirq(d) % SIFIVE_GPIO_MAX;

-	assign_bit(offset, (unsigned long *)&chip->irq_state, 0);
+	assign_bit(offset, &chip->irq_state, 0);
 	sifive_gpio_set_ie(chip, offset);
 	irq_chip_disable_parent(d);
 }
--- a/drivers/gpio/gpio-xilinx.c
+++ b/drivers/gpio/gpio-xilinx.c
@ -147,9 +147,10 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
 	for (i = 0; i < gc->ngpio; i++) {
 		if (*mask == 0)
 			break;
+		/* Once finished with an index write it out to the register */
 		if (index !=  xgpio_index(chip, i)) {
 			xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET +
-				       xgpio_regoffset(chip, i),
+				       index * XGPIO_CHANNEL_OFFSET,
 				       chip->gpio_state[index]);
 			spin_unlock_irqrestore(&chip->gpio_lock[index], flags);
 			index =  xgpio_index(chip, i);
@ -165,7 +166,7 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
 	}

 	xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET +
-		       xgpio_regoffset(chip, i), chip->gpio_state[index]);
+		       index * XGPIO_CHANNEL_OFFSET, chip->gpio_state[index]);

 	spin_unlock_irqrestore(&chip->gpio_lock[index], flags);
 }
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@ -3035,13 +3035,33 @@ EXPORT_SYMBOL_GPL(gpiochip_free_own_desc);
 * rely on gpio_request() having been called beforehand.
 */

-static int gpio_set_config(struct gpio_chip *gc, unsigned int offset,
-			   enum pin_config_param mode)
+static int gpio_do_set_config(struct gpio_chip *gc, unsigned int offset,
+			      unsigned long config)
 {
 	if (!gc->set_config)
 		return -ENOTSUPP;

-	return gc->set_config(gc, offset, mode);
+	return gc->set_config(gc, offset, config);
+}
+
+static int gpio_set_config(struct gpio_chip *gc, unsigned int offset,
+			   enum pin_config_param mode)
+{
+	unsigned long config;
+	unsigned arg;
+
+	switch (mode) {
+	case PIN_CONFIG_BIAS_PULL_DOWN:
+	case PIN_CONFIG_BIAS_PULL_UP:
+		arg = 1;
+		break;
+
+	default:
+		arg = 0;
+	}
+
+	config = PIN_CONF_PACKED(mode, arg);
+	return gpio_do_set_config(gc, offset, config);
 }

 static int gpio_set_bias(struct gpio_chip *chip, struct gpio_desc *desc)
@ -3277,7 +3297,7 @@ int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)
 	chip = desc->gdev->chip;

 	config = pinconf_to_config_packed(PIN_CONFIG_INPUT_DEBOUNCE, debounce);
-	return gpio_set_config(chip, gpio_chip_hwgpio(desc), config);
+	return gpio_do_set_config(chip, gpio_chip_hwgpio(desc), config);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_debounce);

@ -3311,7 +3331,7 @@ int gpiod_set_transitory(struct gpio_desc *desc, bool transitory)
 	packed = pinconf_to_config_packed(PIN_CONFIG_PERSIST_STATE,
 					  !transitory);
 	gpio = gpio_chip_hwgpio(desc);
-	rc = gpio_set_config(chip, gpio, packed);
+	rc = gpio_do_set_config(chip, gpio, packed);
 	if (rc == -ENOTSUPP) {
 		dev_dbg(&desc->gdev->dev, "Persistence not supported for GPIO %d\n",
 				gpio);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
@ -52,7 +52,7 @@ static int amdgpu_perf_event_init(struct perf_event *event)
 		return -ENOENT;

 	/* update the hw_perf_event struct with config data */
-	hwc->conf = event->attr.config;
+	hwc->config = event->attr.config;

 	return 0;
 }
@ -74,9 +74,9 @@ static void amdgpu_perf_start(struct perf_event *event, int flags)
 	switch (pe->pmu_perf_type) {
 	case PERF_TYPE_AMDGPU_DF:
 		if (!(flags & PERF_EF_RELOAD))
-			pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1);
+			pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 1);

-		pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 0);
+		pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 0);
 		break;
 	default:
 		break;
@ -101,7 +101,7 @@ static void amdgpu_perf_read(struct perf_event *event)

 		switch (pe->pmu_perf_type) {
 		case PERF_TYPE_AMDGPU_DF:
-			pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->conf,
+			pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->config,
 							  &count);
 			break;
 		default:
@ -126,7 +126,7 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags)

 	switch (pe->pmu_perf_type) {
 	case PERF_TYPE_AMDGPU_DF:
-		pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 0);
+		pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 0);
 		break;
 	default:
 		break;
@ -156,7 +156,8 @@ static int amdgpu_perf_add(struct perf_event *event, int flags)

 	switch (pe->pmu_perf_type) {
 	case PERF_TYPE_AMDGPU_DF:
-		retval = pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1);
+		retval = pe->adev->df.funcs->pmc_start(pe->adev,
+						       hwc->config, 1);
 		break;
 	default:
 		return 0;
@ -184,7 +185,7 @@ static void amdgpu_perf_del(struct perf_event *event, int flags)

 	switch (pe->pmu_perf_type) {
 	case PERF_TYPE_AMDGPU_DF:
-		pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 1);
+		pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 1);
 		break;
 	default:
 		break;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@ -179,6 +179,7 @@ struct amdgpu_vcn_inst {
 	struct amdgpu_irq_src	irq;
 	struct amdgpu_vcn_reg	external;
 	struct amdgpu_bo	*dpg_sram_bo;
+	struct dpg_pause_state	pause_state;
 	void			*dpg_sram_cpu_addr;
 	uint64_t		dpg_sram_gpu_addr;
 	uint32_t		*dpg_sram_curr_addr;
@ -190,8 +191,6 @@ struct amdgpu_vcn {
 	const struct firmware	*fw;	/* VCN firmware */
 	unsigned		num_enc_rings;
 	enum amd_powergating_state cur_state;
-	struct dpg_pause_state pause_state;
-
 	bool			indirect_sram;

 	uint8_t	num_vcn_inst;
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@ -4374,9 +4374,17 @@ static int gfx_v9_0_ecc_late_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int r;

-	r = gfx_v9_0_do_edc_gds_workarounds(adev);
-	if (r)
-		return r;
+	/*
+	 * Temp workaround to fix the issue that CP firmware fails to
+	 * update read pointer when CPDMA is writing clearing operation
+	 * to GDS in suspend/resume sequence on several cards. So just
+	 * limit this operation in cold boot sequence.
+	 */
+	if (!adev->in_suspend) {
+		r = gfx_v9_0_do_edc_gds_workarounds(adev);
+		if (r)
+			return r;
+	}

 	/* requires IBs so do in late init after IB pool is initialized */
 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@ -1207,9 +1207,10 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
 	struct amdgpu_ring *ring;

 	/* pause/unpause if state is changed */
-	if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+	if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
 		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
-			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+			adev->vcn.inst[inst_idx].pause_state.fw_based,
+			adev->vcn.inst[inst_idx].pause_state.jpeg,
 			new_state->fw_based, new_state->jpeg);

 		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
@ -1258,13 +1259,14 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
 			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
 			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
 		}
-		adev->vcn.pause_state.fw_based = new_state->fw_based;
+		adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
 	}

 	/* pause/unpause if state is changed */
-	if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
+	if (adev->vcn.inst[inst_idx].pause_state.jpeg != new_state->jpeg) {
 		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
-			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+			adev->vcn.inst[inst_idx].pause_state.fw_based,
+			adev->vcn.inst[inst_idx].pause_state.jpeg,
 			new_state->fw_based, new_state->jpeg);

 		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
@ -1318,7 +1320,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
 			reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
 			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
 		}
-		adev->vcn.pause_state.jpeg = new_state->jpeg;
+		adev->vcn.inst[inst_idx].pause_state.jpeg = new_state->jpeg;
 	}

 	return 0;
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@ -1137,9 +1137,9 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
 	int ret_code;

 	/* pause/unpause if state is changed */
-	if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+	if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
 		DRM_DEBUG("dpg pause state changed %d -> %d",
-			adev->vcn.pause_state.fw_based,	new_state->fw_based);
+			adev->vcn.inst[inst_idx].pause_state.fw_based,	new_state->fw_based);
 		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
 			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);

@ -1185,7 +1185,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
 			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
 			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
 		}
-		adev->vcn.pause_state.fw_based = new_state->fw_based;
+		adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
 	}

 	return 0;
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@ -1367,9 +1367,9 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
 	int ret_code;

 	/* pause/unpause if state is changed */
-	if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+	if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
 		DRM_DEBUG("dpg pause state changed %d -> %d",
-			adev->vcn.pause_state.fw_based,	new_state->fw_based);
+			adev->vcn.inst[inst_idx].pause_state.fw_based,	new_state->fw_based);
 		reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) &
 			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);

@ -1407,14 +1407,14 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
 					   RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF);

 				SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS,
-					   0x0, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+					   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
 			}
 		} else {
 			/* unpause dpg, no need to wait */
 			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
 			WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
 		}
-		adev->vcn.pause_state.fw_based = new_state->fw_based;
+		adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
 	}

 	return 0;
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@ -8408,7 +8408,6 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
 	/* Calculate number of static frames before generating interrupt to
 	 * enter PSR.
 	 */
-	unsigned int frame_time_microsec = 1000000 / vsync_rate_hz;
 	// Init fail safe of 2 frames static
 	unsigned int num_frames_static = 2;

@ -8423,8 +8422,10 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
 	 * Calculate number of frames such that at least 30 ms of time has
 	 * passed.
 	 */
-	if (vsync_rate_hz != 0)
+	if (vsync_rate_hz != 0) {
+		unsigned int frame_time_microsec = 1000000 / vsync_rate_hz;
 		num_frames_static = (30000 / frame_time_microsec) + 1;
+	}

 	params.triggers.cursor_update = true;
 	params.triggers.overlay_update = true;
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@ -711,10 +711,6 @@ static void enable_disp_power_gating_dmcub(
 	power_gating.header.sub_type = DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING;
 	power_gating.power_gating.pwr = *pwr;

-	/* ATOM_ENABLE is old API in DMUB */
-	if (power_gating.power_gating.pwr.enable == ATOM_ENABLE)
-		power_gating.power_gating.pwr.enable = ATOM_INIT;
-
 	dc_dmub_srv_cmd_queue(dmcub, &power_gating.header);
 	dc_dmub_srv_cmd_execute(dmcub);
 	dc_dmub_srv_wait_idle(dmcub);
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
@ -87,6 +87,12 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN20)
 ###############################################################################
 CLK_MGR_DCN21 = rn_clk_mgr.o rn_clk_mgr_vbios_smu.o

+# prevent build errors regarding soft-float vs hard-float FP ABI tags
+# this code is currently unused on ppc64, as it applies to Renoir APUs only
+ifdef CONFIG_PPC64
+CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
+endif
+
 AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21))

 AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21)
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
@ -117,7 +117,7 @@ void dcn20_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,

 		prev_dppclk_khz = clk_mgr->base.ctx->dc->current_state->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;

-		if (safe_to_lower || prev_dppclk_khz < dppclk_khz) {
+		if ((prev_dppclk_khz > dppclk_khz && safe_to_lower) || prev_dppclk_khz < dppclk_khz) {
 			clk_mgr->dccg->funcs->update_dpp_dto(
 							clk_mgr->dccg, dpp_inst, dppclk_khz);
 		}
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@ -151,6 +151,12 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
 		rn_vbios_smu_set_min_deep_sleep_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_deep_sleep_khz);
 	}

+	// workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow.
+	if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
+		if (new_clocks->dppclk_khz < 100000)
+			new_clocks->dppclk_khz = 100000;
+	}
+
 	if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
 		if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
 			dpp_clock_lowered = true;
@ -412,19 +418,19 @@ void build_watermark_ranges(struct clk_bw_params *bw_params, struct pp_smu_wm_ra

 		ranges->reader_wm_sets[num_valid_sets].wm_inst = bw_params->wm_table.entries[i].wm_inst;
 		ranges->reader_wm_sets[num_valid_sets].wm_type = bw_params->wm_table.entries[i].wm_type;
-		/* We will not select WM based on dcfclk, so leave it as unconstrained */
-		ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN;
-		ranges->reader_wm_sets[num_valid_sets].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
-		/* fclk wil be used to select WM*/
+		/* We will not select WM based on fclk, so leave it as unconstrained */
+		ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN;
+		ranges->reader_wm_sets[num_valid_sets].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
+		/* dcfclk wil be used to select WM*/

 		if (ranges->reader_wm_sets[num_valid_sets].wm_type == WM_TYPE_PSTATE_CHG) {
 			if (i == 0)
-				ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = 0;
+				ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = 0;
 			else {
 				/* add 1 to make it non-overlapping with next lvl */
-				ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = bw_params->clk_table.entries[i - 1].fclk_mhz + 1;
+				ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = bw_params->clk_table.entries[i - 1].dcfclk_mhz + 1;
 			}
-			ranges->reader_wm_sets[num_valid_sets].max_fill_clk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+			ranges->reader_wm_sets[num_valid_sets].max_drain_clk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;

 		} else {
 			/* unconstrained for memory retraining */
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
@ -400,7 +400,7 @@ static bool acquire(
 {
 	enum gpio_result result;

-	if (!is_engine_available(engine))
+	if ((engine == NULL) || !is_engine_available(engine))
 		return false;

 	result = dal_ddc_open(ddc, GPIO_MODE_HARDWARE,
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@ -572,7 +572,6 @@ void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	dpp->funcs->dpp_dppclk_control(dpp, false, false);

 	hubp->power_gated = true;
-	dc->optimized_required = false; /* We're powering off, no need to optimize */

 	hws->funcs.plane_atomic_power_down(dc,
 			pipe_ctx->plane_res.dpp,
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@ -60,6 +60,7 @@
 #include "dcn20/dcn20_dccg.h"
 #include "dcn21_hubbub.h"
 #include "dcn10/dcn10_resource.h"
+#include "dce110/dce110_resource.h"

 #include "dcn20/dcn20_dwb.h"
 #include "dcn20/dcn20_mmhubbub.h"
@ -856,6 +857,7 @@ static const struct dc_debug_options debug_defaults_diags = {
 enum dcn20_clk_src_array_id {
 	DCN20_CLK_SRC_PLL0,
 	DCN20_CLK_SRC_PLL1,
+	DCN20_CLK_SRC_PLL2,
 	DCN20_CLK_SRC_TOTAL_DCN21
 };

@ -1718,6 +1720,10 @@ static bool dcn21_resource_construct(
 			dcn21_clock_source_create(ctx, ctx->dc_bios,
 				CLOCK_SOURCE_COMBO_PHY_PLL1,
 				&clk_src_regs[1], false);
+	pool->base.clock_sources[DCN20_CLK_SRC_PLL2] =
+			dcn21_clock_source_create(ctx, ctx->dc_bios,
+				CLOCK_SOURCE_COMBO_PHY_PLL2,
+				&clk_src_regs[2], false);

 	pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL_DCN21;

--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h
@ -39,21 +39,39 @@
 #define SMU_11_0_PP_OVERDRIVE_VERSION                   0x0800
 #define SMU_11_0_PP_POWERSAVINGCLOCK_VERSION            0x0100

+enum SMU_11_0_ODFEATURE_CAP {
+    SMU_11_0_ODCAP_GFXCLK_LIMITS = 0,
+    SMU_11_0_ODCAP_GFXCLK_CURVE,
+    SMU_11_0_ODCAP_UCLK_MAX,
+    SMU_11_0_ODCAP_POWER_LIMIT,
+    SMU_11_0_ODCAP_FAN_ACOUSTIC_LIMIT,
+    SMU_11_0_ODCAP_FAN_SPEED_MIN,
+    SMU_11_0_ODCAP_TEMPERATURE_FAN,
+    SMU_11_0_ODCAP_TEMPERATURE_SYSTEM,
+    SMU_11_0_ODCAP_MEMORY_TIMING_TUNE,
+    SMU_11_0_ODCAP_FAN_ZERO_RPM_CONTROL,
+    SMU_11_0_ODCAP_AUTO_UV_ENGINE,
+    SMU_11_0_ODCAP_AUTO_OC_ENGINE,
+    SMU_11_0_ODCAP_AUTO_OC_MEMORY,
+    SMU_11_0_ODCAP_FAN_CURVE,
+    SMU_11_0_ODCAP_COUNT,
+};
+
 enum SMU_11_0_ODFEATURE_ID {
-    SMU_11_0_ODFEATURE_GFXCLK_LIMITS        = 1 << 0,         //GFXCLK Limit feature
-    SMU_11_0_ODFEATURE_GFXCLK_CURVE         = 1 << 1,         //GFXCLK Curve feature
-    SMU_11_0_ODFEATURE_UCLK_MAX             = 1 << 2,         //UCLK Limit feature
-    SMU_11_0_ODFEATURE_POWER_LIMIT          = 1 << 3,         //Power Limit feature
-    SMU_11_0_ODFEATURE_FAN_ACOUSTIC_LIMIT   = 1 << 4,         //Fan Acoustic RPM feature
-    SMU_11_0_ODFEATURE_FAN_SPEED_MIN        = 1 << 5,         //Minimum Fan Speed feature
-    SMU_11_0_ODFEATURE_TEMPERATURE_FAN      = 1 << 6,         //Fan Target Temperature Limit feature
-    SMU_11_0_ODFEATURE_TEMPERATURE_SYSTEM   = 1 << 7,         //Operating Temperature Limit feature
-    SMU_11_0_ODFEATURE_MEMORY_TIMING_TUNE   = 1 << 8,         //AC Timing Tuning feature
-    SMU_11_0_ODFEATURE_FAN_ZERO_RPM_CONTROL = 1 << 9,         //Zero RPM feature
-    SMU_11_0_ODFEATURE_AUTO_UV_ENGINE       = 1 << 10,        //Auto Under Volt GFXCLK feature
-    SMU_11_0_ODFEATURE_AUTO_OC_ENGINE       = 1 << 11,        //Auto Over Clock GFXCLK feature
-    SMU_11_0_ODFEATURE_AUTO_OC_MEMORY       = 1 << 12,        //Auto Over Clock MCLK feature
-    SMU_11_0_ODFEATURE_FAN_CURVE            = 1 << 13,        //VICTOR TODO
+    SMU_11_0_ODFEATURE_GFXCLK_LIMITS        = 1 << SMU_11_0_ODCAP_GFXCLK_LIMITS,            //GFXCLK Limit feature
+    SMU_11_0_ODFEATURE_GFXCLK_CURVE         = 1 << SMU_11_0_ODCAP_GFXCLK_CURVE,             //GFXCLK Curve feature
+    SMU_11_0_ODFEATURE_UCLK_MAX             = 1 << SMU_11_0_ODCAP_UCLK_MAX,                 //UCLK Limit feature
+    SMU_11_0_ODFEATURE_POWER_LIMIT          = 1 << SMU_11_0_ODCAP_POWER_LIMIT,              //Power Limit feature
+    SMU_11_0_ODFEATURE_FAN_ACOUSTIC_LIMIT   = 1 << SMU_11_0_ODCAP_FAN_ACOUSTIC_LIMIT,       //Fan Acoustic RPM feature
+    SMU_11_0_ODFEATURE_FAN_SPEED_MIN        = 1 << SMU_11_0_ODCAP_FAN_SPEED_MIN,            //Minimum Fan Speed feature
+    SMU_11_0_ODFEATURE_TEMPERATURE_FAN      = 1 << SMU_11_0_ODCAP_TEMPERATURE_FAN,          //Fan Target Temperature Limit feature
+    SMU_11_0_ODFEATURE_TEMPERATURE_SYSTEM   = 1 << SMU_11_0_ODCAP_TEMPERATURE_SYSTEM,       //Operating Temperature Limit feature
+    SMU_11_0_ODFEATURE_MEMORY_TIMING_TUNE   = 1 << SMU_11_0_ODCAP_MEMORY_TIMING_TUNE,       //AC Timing Tuning feature
+    SMU_11_0_ODFEATURE_FAN_ZERO_RPM_CONTROL = 1 << SMU_11_0_ODCAP_FAN_ZERO_RPM_CONTROL,     //Zero RPM feature
+    SMU_11_0_ODFEATURE_AUTO_UV_ENGINE       = 1 << SMU_11_0_ODCAP_AUTO_UV_ENGINE,           //Auto Under Volt GFXCLK feature
+    SMU_11_0_ODFEATURE_AUTO_OC_ENGINE       = 1 << SMU_11_0_ODCAP_AUTO_OC_ENGINE,           //Auto Over Clock GFXCLK feature
+    SMU_11_0_ODFEATURE_AUTO_OC_MEMORY       = 1 << SMU_11_0_ODCAP_AUTO_OC_MEMORY,           //Auto Over Clock MCLK feature
+    SMU_11_0_ODFEATURE_FAN_CURVE            = 1 << SMU_11_0_ODCAP_FAN_CURVE,                //Fan Curve feature
    SMU_11_0_ODFEATURE_COUNT                = 14,
 };
 #define SMU_11_0_MAX_ODFEATURE    32          //Maximum Number of OD Features
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@ -736,9 +736,9 @@ static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum smu
 	return dpm_desc->SnapToDiscrete == 0 ? true : false;
 }

-static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_ID feature)
+static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_CAP cap)
 {
-	return od_table->cap[feature];
+	return od_table->cap[cap];
 }

 static void navi10_od_setting_get_range(struct smu_11_0_overdrive_table *od_table,
@ -846,7 +846,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 	case SMU_OD_SCLK:
 		if (!smu->od_enabled || !od_table || !od_settings)
 			break;
-		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS))
+		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS))
 			break;
 		size += sprintf(buf + size, "OD_SCLK:\n");
 		size += sprintf(buf + size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax);
@ -854,7 +854,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 	case SMU_OD_MCLK:
 		if (!smu->od_enabled || !od_table || !od_settings)
 			break;
-		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX))
+		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX))
 			break;
 		size += sprintf(buf + size, "OD_MCLK:\n");
 		size += sprintf(buf + size, "1: %uMHz\n", od_table->UclkFmax);
@ -862,7 +862,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 	case SMU_OD_VDDC_CURVE:
 		if (!smu->od_enabled || !od_table || !od_settings)
 			break;
-		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE))
+		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE))
 			break;
 		size += sprintf(buf + size, "OD_VDDC_CURVE:\n");
 		for (i = 0; i < 3; i++) {
@ -887,7 +887,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 			break;
 		size = sprintf(buf, "%s:\n", "OD_RANGE");

-		if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) {
+		if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) {
 			navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMIN,
 						    &min_value, NULL);
 			navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMAX,
@ -896,14 +896,14 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 					min_value, max_value);
 		}

-		if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) {
+		if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX)) {
 			navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_UCLKFMAX,
 						    &min_value, &max_value);
 			size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n",
 					min_value, max_value);
 		}

-		if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) {
+		if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE)) {
 			navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P1,
 						    &min_value, &max_value);
 			size += sprintf(buf + size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n",
@ -2056,7 +2056,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL

 	switch (type) {
 	case PP_OD_EDIT_SCLK_VDDC_TABLE:
-		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) {
+		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) {
 			pr_warn("GFXCLK_LIMITS not supported!\n");
 			return -ENOTSUPP;
 		}
@ -2102,7 +2102,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL
 		}
 		break;
 	case PP_OD_EDIT_MCLK_VDDC_TABLE:
-		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) {
+		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX)) {
 			pr_warn("UCLK_MAX not supported!\n");
 			return -ENOTSUPP;
 		}
@ -2143,7 +2143,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL
 		}
 		break;
 	case PP_OD_EDIT_VDDC_CURVE:
-		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) {
+		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE)) {
 			pr_warn("GFXCLK_CURVE not supported!\n");
 			return -ENOTSUPP;
 		}
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@ -3838,7 +3838,8 @@ drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr,
 		else if (msg->req_type == DP_RESOURCE_STATUS_NOTIFY)
 			guid = msg->u.resource_stat.guid;

-		mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid);
+		if (guid)
+			mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid);
 	} else {
 		mstb = drm_dp_get_mst_branch_device(mgr, hdr->lct, hdr->rad);
 	}
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@ -3211,7 +3211,7 @@ static u8 *drm_find_cea_extension(const struct edid *edid)
 	return cea;
 }

-static const struct drm_display_mode *cea_mode_for_vic(u8 vic)
+static __always_inline const struct drm_display_mode *cea_mode_for_vic(u8 vic)
 {
 	BUILD_BUG_ON(1 + ARRAY_SIZE(edid_cea_modes_1) - 1 != 127);
 	BUILD_BUG_ON(193 + ARRAY_SIZE(edid_cea_modes_193) - 1 != 219);
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@ -357,14 +357,16 @@ parse_generic_dtd(struct drm_i915_private *dev_priv,
 		panel_fixed_mode->hdisplay + dtd->hfront_porch;
 	panel_fixed_mode->hsync_end =
 		panel_fixed_mode->hsync_start + dtd->hsync;
-	panel_fixed_mode->htotal = panel_fixed_mode->hsync_end;
+	panel_fixed_mode->htotal =
+		panel_fixed_mode->hdisplay + dtd->hblank;

 	panel_fixed_mode->vdisplay = dtd->vactive;
 	panel_fixed_mode->vsync_start =
 		panel_fixed_mode->vdisplay + dtd->vfront_porch;
 	panel_fixed_mode->vsync_end =
 		panel_fixed_mode->vsync_start + dtd->vsync;
-	panel_fixed_mode->vtotal = panel_fixed_mode->vsync_end;
+	panel_fixed_mode->vtotal =
+		panel_fixed_mode->vdisplay + dtd->vblank;

 	panel_fixed_mode->clock = dtd->pixel_clock;
 	panel_fixed_mode->width_mm = dtd->width_mm;
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@ -12366,6 +12366,7 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state)
 		/* Copy parameters to slave plane */
 		linked_state->ctl = plane_state->ctl | PLANE_CTL_YUV420_Y_PLANE;
 		linked_state->color_ctl = plane_state->color_ctl;
+		linked_state->view = plane_state->view;
 		memcpy(linked_state->color_plane, plane_state->color_plane,
 		       sizeof(linked_state->color_plane));

@ -14476,37 +14477,23 @@ static int intel_atomic_check_crtcs(struct intel_atomic_state *state)
 	return 0;
 }

-static bool intel_cpu_transcoder_needs_modeset(struct intel_atomic_state *state,
-					       enum transcoder transcoder)
+static bool intel_cpu_transcoders_need_modeset(struct intel_atomic_state *state,
+					       u8 transcoders)
 {
-	struct intel_crtc_state *new_crtc_state;
+	const struct intel_crtc_state *new_crtc_state;
 	struct intel_crtc *crtc;
 	int i;

-	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i)
-		if (new_crtc_state->cpu_transcoder == transcoder)
-			return needs_modeset(new_crtc_state);
+	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
+		if (new_crtc_state->hw.enable &&
+		    transcoders & BIT(new_crtc_state->cpu_transcoder) &&
+		    needs_modeset(new_crtc_state))
+			return true;
+	}

 	return false;
 }

-static void
-intel_modeset_synced_crtcs(struct intel_atomic_state *state,
-			   u8 transcoders)
-{
-	struct intel_crtc_state *new_crtc_state;
-	struct intel_crtc *crtc;
-	int i;
-
-	for_each_new_intel_crtc_in_state(state, crtc,
-					 new_crtc_state, i) {
-		if (transcoders & BIT(new_crtc_state->cpu_transcoder)) {
-			new_crtc_state->uapi.mode_changed = true;
-			new_crtc_state->update_pipe = false;
-		}
-	}
-}
-
 static int
 intel_modeset_all_tiles(struct intel_atomic_state *state, int tile_grp_id)
 {
@ -14662,15 +14649,20 @@ static int intel_atomic_check(struct drm_device *dev,
 		if (intel_dp_mst_is_slave_trans(new_crtc_state)) {
 			enum transcoder master = new_crtc_state->mst_master_transcoder;

-			if (intel_cpu_transcoder_needs_modeset(state, master)) {
+			if (intel_cpu_transcoders_need_modeset(state, BIT(master))) {
 				new_crtc_state->uapi.mode_changed = true;
 				new_crtc_state->update_pipe = false;
 			}
-		} else if (is_trans_port_sync_mode(new_crtc_state)) {
+		}
+
+		if (is_trans_port_sync_mode(new_crtc_state)) {
 			u8 trans = new_crtc_state->sync_mode_slaves_mask |
 				   BIT(new_crtc_state->master_transcoder);

-			intel_modeset_synced_crtcs(state, trans);
+			if (intel_cpu_transcoders_need_modeset(state, trans)) {
+				new_crtc_state->uapi.mode_changed = true;
+				new_crtc_state->update_pipe = false;
+			}
 		}
 	}

--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@ -384,6 +384,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
 	return data;
 }

+#ifdef CONFIG_ACPI
 static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
 {
 	struct i2c_adapter_lookup *lookup = data;
@ -393,8 +394,7 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
 	acpi_handle adapter_handle;
 	acpi_status status;

-	if (intel_dsi->i2c_bus_num >= 0 ||
-	    !i2c_acpi_get_i2c_resource(ares, &sb))
+	if (!i2c_acpi_get_i2c_resource(ares, &sb))
 		return 1;

 	if (lookup->slave_addr != sb->slave_address)
@ -413,14 +413,41 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
 	return 1;
 }

+static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi,
+				  const u16 slave_addr)
+{
+	struct drm_device *drm_dev = intel_dsi->base.base.dev;
+	struct device *dev = &drm_dev->pdev->dev;
+	struct acpi_device *acpi_dev;
+	struct list_head resource_list;
+	struct i2c_adapter_lookup lookup;
+
+	acpi_dev = ACPI_COMPANION(dev);
+	if (acpi_dev) {
+		memset(&lookup, 0, sizeof(lookup));
+		lookup.slave_addr = slave_addr;
+		lookup.intel_dsi = intel_dsi;
+		lookup.dev_handle = acpi_device_handle(acpi_dev);
+
+		INIT_LIST_HEAD(&resource_list);
+		acpi_dev_get_resources(acpi_dev, &resource_list,
+				       i2c_adapter_lookup,
+				       &lookup);
+		acpi_dev_free_resource_list(&resource_list);
+	}
+}
+#else
+static inline void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi,
+					 const u16 slave_addr)
+{
+}
+#endif
+
 static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
 {
 	struct drm_device *drm_dev = intel_dsi->base.base.dev;
 	struct device *dev = &drm_dev->pdev->dev;
 	struct i2c_adapter *adapter;
-	struct acpi_device *acpi_dev;
-	struct list_head resource_list;
-	struct i2c_adapter_lookup lookup;
 	struct i2c_msg msg;
 	int ret;
 	u8 vbt_i2c_bus_num = *(data + 2);
@ -431,20 +458,7 @@ static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)

 	if (intel_dsi->i2c_bus_num < 0) {
 		intel_dsi->i2c_bus_num = vbt_i2c_bus_num;
-
-		acpi_dev = ACPI_COMPANION(dev);
-		if (acpi_dev) {
-			memset(&lookup, 0, sizeof(lookup));
-			lookup.slave_addr = slave_addr;
-			lookup.intel_dsi = intel_dsi;
-			lookup.dev_handle = acpi_device_handle(acpi_dev);
-
-			INIT_LIST_HEAD(&resource_list);
-			acpi_dev_get_resources(acpi_dev, &resource_list,
-					       i2c_adapter_lookup,
-					       &lookup);
-			acpi_dev_free_resource_list(&resource_list);
-		}
+		i2c_acpi_find_adapter(intel_dsi, slave_addr);
 	}

 	adapter = i2c_get_adapter(intel_dsi->i2c_bus_num);
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@ -1981,9 +1981,20 @@ static int __eb_parse(struct dma_fence_work *work)
 				       pw->trampoline);
 }

+static void __eb_parse_release(struct dma_fence_work *work)
+{
+	struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
+
+	if (pw->trampoline)
+		i915_active_release(&pw->trampoline->active);
+	i915_active_release(&pw->shadow->active);
+	i915_active_release(&pw->batch->active);
+}
+
 static const struct dma_fence_work_ops eb_parse_ops = {
 	.name = "eb_parse",
 	.work = __eb_parse,
+	.release = __eb_parse_release,
 };

 static int eb_parse_pipeline(struct i915_execbuffer *eb,
@ -1997,6 +2008,20 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
 	if (!pw)
 		return -ENOMEM;

+	err = i915_active_acquire(&eb->batch->active);
+	if (err)
+		goto err_free;
+
+	err = i915_active_acquire(&shadow->active);
+	if (err)
+		goto err_batch;
+
+	if (trampoline) {
+		err = i915_active_acquire(&trampoline->active);
+		if (err)
+			goto err_shadow;
+	}
+
 	dma_fence_work_init(&pw->base, &eb_parse_ops);

 	pw->engine = eb->engine;
@ -2006,7 +2031,9 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
 	pw->shadow = shadow;
 	pw->trampoline = trampoline;

-	dma_resv_lock(pw->batch->resv, NULL);
+	err = dma_resv_lock_interruptible(pw->batch->resv, NULL);
+	if (err)
+		goto err_trampoline;

 	err = dma_resv_reserve_shared(pw->batch->resv, 1);
 	if (err)
@ -2034,6 +2061,14 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,

 err_batch_unlock:
 	dma_resv_unlock(pw->batch->resv);
+err_trampoline:
+	if (trampoline)
+		i915_active_release(&trampoline->active);
+err_shadow:
+	i915_active_release(&shadow->active);
+err_batch:
+	i915_active_release(&eb->batch->active);
+err_free:
 	kfree(pw);
 	return err;
 }
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@ -455,10 +455,11 @@ static void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)

 void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
 {
-	struct i915_mmap_offset *mmo;
+	struct i915_mmap_offset *mmo, *mn;

 	spin_lock(&obj->mmo.lock);
-	list_for_each_entry(mmo, &obj->mmo.offsets, offset) {
+	rbtree_postorder_for_each_entry_safe(mmo, mn,
+					     &obj->mmo.offsets, offset) {
 		/*
 		 * vma_node_unmap for GTT mmaps handled already in
 		 * __i915_gem_object_release_mmap_gtt
@ -487,6 +488,67 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
 	i915_gem_object_release_mmap_offset(obj);
 }

+static struct i915_mmap_offset *
+lookup_mmo(struct drm_i915_gem_object *obj,
+	   enum i915_mmap_type mmap_type)
+{
+	struct rb_node *rb;
+
+	spin_lock(&obj->mmo.lock);
+	rb = obj->mmo.offsets.rb_node;
+	while (rb) {
+		struct i915_mmap_offset *mmo =
+			rb_entry(rb, typeof(*mmo), offset);
+
+		if (mmo->mmap_type == mmap_type) {
+			spin_unlock(&obj->mmo.lock);
+			return mmo;
+		}
+
+		if (mmo->mmap_type < mmap_type)
+			rb = rb->rb_right;
+		else
+			rb = rb->rb_left;
+	}
+	spin_unlock(&obj->mmo.lock);
+
+	return NULL;
+}
+
+static struct i915_mmap_offset *
+insert_mmo(struct drm_i915_gem_object *obj, struct i915_mmap_offset *mmo)
+{
+	struct rb_node *rb, **p;
+
+	spin_lock(&obj->mmo.lock);
+	rb = NULL;
+	p = &obj->mmo.offsets.rb_node;
+	while (*p) {
+		struct i915_mmap_offset *pos;
+
+		rb = *p;
+		pos = rb_entry(rb, typeof(*pos), offset);
+
+		if (pos->mmap_type == mmo->mmap_type) {
+			spin_unlock(&obj->mmo.lock);
+			drm_vma_offset_remove(obj->base.dev->vma_offset_manager,
+					      &mmo->vma_node);
+			kfree(mmo);
+			return pos;
+		}
+
+		if (pos->mmap_type < mmo->mmap_type)
+			p = &rb->rb_right;
+		else
+			p = &rb->rb_left;
+	}
+	rb_link_node(&mmo->offset, rb, p);
+	rb_insert_color(&mmo->offset, &obj->mmo.offsets);
+	spin_unlock(&obj->mmo.lock);
+
+	return mmo;
+}
+
 static struct i915_mmap_offset *
 mmap_offset_attach(struct drm_i915_gem_object *obj,
 		   enum i915_mmap_type mmap_type,
@ -496,20 +558,22 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
 	struct i915_mmap_offset *mmo;
 	int err;

+	mmo = lookup_mmo(obj, mmap_type);
+	if (mmo)
+		goto out;
+
 	mmo = kmalloc(sizeof(*mmo), GFP_KERNEL);
 	if (!mmo)
 		return ERR_PTR(-ENOMEM);

 	mmo->obj = obj;
-	mmo->dev = obj->base.dev;
-	mmo->file = file;
 	mmo->mmap_type = mmap_type;
 	drm_vma_node_reset(&mmo->vma_node);

-	err = drm_vma_offset_add(mmo->dev->vma_offset_manager, &mmo->vma_node,
-				 obj->base.size / PAGE_SIZE);
+	err = drm_vma_offset_add(obj->base.dev->vma_offset_manager,
+				 &mmo->vma_node, obj->base.size / PAGE_SIZE);
 	if (likely(!err))
-		goto out;
+		goto insert;

 	/* Attempt to reap some mmap space from dead objects */
 	err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT);
@ -517,19 +581,17 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
 		goto err;

 	i915_gem_drain_freed_objects(i915);
-	err = drm_vma_offset_add(mmo->dev->vma_offset_manager, &mmo->vma_node,
-				 obj->base.size / PAGE_SIZE);
+	err = drm_vma_offset_add(obj->base.dev->vma_offset_manager,
+				 &mmo->vma_node, obj->base.size / PAGE_SIZE);
 	if (err)
 		goto err;

+insert:
+	mmo = insert_mmo(obj, mmo);
+	GEM_BUG_ON(lookup_mmo(obj, mmap_type) != mmo);
 out:
 	if (file)
 		drm_vma_node_allow(&mmo->vma_node, file);
-
-	spin_lock(&obj->mmo.lock);
-	list_add(&mmo->offset, &obj->mmo.offsets);
-	spin_unlock(&obj->mmo.lock);
-
 	return mmo;

 err:
@ -745,60 +807,43 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 	struct drm_vma_offset_node *node;
 	struct drm_file *priv = filp->private_data;
 	struct drm_device *dev = priv->minor->dev;
+	struct drm_i915_gem_object *obj = NULL;
 	struct i915_mmap_offset *mmo = NULL;
-	struct drm_gem_object *obj = NULL;
 	struct file *anon;

 	if (drm_dev_is_unplugged(dev))
 		return -ENODEV;

+	rcu_read_lock();
 	drm_vma_offset_lock_lookup(dev->vma_offset_manager);
 	node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
 						  vma->vm_pgoff,
 						  vma_pages(vma));
-	if (likely(node)) {
-		mmo = container_of(node, struct i915_mmap_offset,
-				   vma_node);
-		/*
-		 * In our dependency chain, the drm_vma_offset_node
-		 * depends on the validity of the mmo, which depends on
-		 * the gem object. However the only reference we have
-		 * at this point is the mmo (as the parent of the node).
-		 * Try to check if the gem object was at least cleared.
-		 */
-		if (!mmo || !mmo->obj) {
-			drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
-			return -EINVAL;
-		}
+	if (node && drm_vma_node_is_allowed(node, priv)) {
 		/*
 		 * Skip 0-refcnted objects as it is in the process of being
 		 * destroyed and will be invalid when the vma manager lock
 		 * is released.
 		 */
-		obj = &mmo->obj->base;
-		if (!kref_get_unless_zero(&obj->refcount))
-			obj = NULL;
+		mmo = container_of(node, struct i915_mmap_offset, vma_node);
+		obj = i915_gem_object_get_rcu(mmo->obj);
 	}
 	drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
+	rcu_read_unlock();
 	if (!obj)
-		return -EINVAL;
+		return node ? -EACCES : -EINVAL;

-	if (!drm_vma_node_is_allowed(node, priv)) {
-		drm_gem_object_put_unlocked(obj);
-		return -EACCES;
-	}
-
-	if (i915_gem_object_is_readonly(to_intel_bo(obj))) {
+	if (i915_gem_object_is_readonly(obj)) {
 		if (vma->vm_flags & VM_WRITE) {
-			drm_gem_object_put_unlocked(obj);
+			i915_gem_object_put(obj);
 			return -EINVAL;
 		}
 		vma->vm_flags &= ~VM_MAYWRITE;
 	}

-	anon = mmap_singleton(to_i915(obj->dev));
+	anon = mmap_singleton(to_i915(dev));
 	if (IS_ERR(anon)) {
-		drm_gem_object_put_unlocked(obj);
+		i915_gem_object_put(obj);
 		return PTR_ERR(anon);
 	}

--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@ -63,7 +63,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&obj->lut_list);

 	spin_lock_init(&obj->mmo.lock);
-	INIT_LIST_HEAD(&obj->mmo.offsets);
+	obj->mmo.offsets = RB_ROOT;

 	init_rcu_head(&obj->rcu);

@ -100,8 +100,8 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 {
 	struct drm_i915_gem_object *obj = to_intel_bo(gem);
 	struct drm_i915_file_private *fpriv = file->driver_priv;
+	struct i915_mmap_offset *mmo, *mn;
 	struct i915_lut_handle *lut, *ln;
-	struct i915_mmap_offset *mmo;
 	LIST_HEAD(close);

 	i915_gem_object_lock(obj);
@ -117,14 +117,8 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 	i915_gem_object_unlock(obj);

 	spin_lock(&obj->mmo.lock);
-	list_for_each_entry(mmo, &obj->mmo.offsets, offset) {
-		if (mmo->file != file)
-			continue;
-
-		spin_unlock(&obj->mmo.lock);
+	rbtree_postorder_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset)
 		drm_vma_node_revoke(&mmo->vma_node, file);
-		spin_lock(&obj->mmo.lock);
-	}
 	spin_unlock(&obj->mmo.lock);

 	list_for_each_entry_safe(lut, ln, &close, obj_link) {
@ -203,12 +197,14 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,

 		i915_gem_object_release_mmap(obj);

-		list_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset) {
+		rbtree_postorder_for_each_entry_safe(mmo, mn,
+						     &obj->mmo.offsets,
+						     offset) {
 			drm_vma_offset_remove(obj->base.dev->vma_offset_manager,
 					      &mmo->vma_node);
 			kfree(mmo);
 		}
-		INIT_LIST_HEAD(&obj->mmo.offsets);
+		obj->mmo.offsets = RB_ROOT;

 		GEM_BUG_ON(atomic_read(&obj->bind_count));
 		GEM_BUG_ON(obj->userfault_count);
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@ -69,6 +69,15 @@ i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
 	return idr_find(&file->object_idr, handle);
 }

+static inline struct drm_i915_gem_object *
+i915_gem_object_get_rcu(struct drm_i915_gem_object *obj)
+{
+	if (obj && !kref_get_unless_zero(&obj->base.refcount))
+		obj = NULL;
+
+	return obj;
+}
+
 static inline struct drm_i915_gem_object *
 i915_gem_object_lookup(struct drm_file *file, u32 handle)
 {
@ -76,8 +85,7 @@ i915_gem_object_lookup(struct drm_file *file, u32 handle)

 	rcu_read_lock();
 	obj = i915_gem_object_lookup_rcu(file, handle);
-	if (obj && !kref_get_unless_zero(&obj->base.refcount))
-		obj = NULL;
+	obj = i915_gem_object_get_rcu(obj);
 	rcu_read_unlock();

 	return obj;
--- a/Show more
+++ b/Show more