Merge drm/drm-next into drm-intel-next

Backmerge to sync up with drm-intel-gt-next and drm-misc-next. Signed-off-by: Jani Nikula <jani.nikula@intel.com>
2024-09-27 04:47:05 +00:00 · 2023-10-04 18:06:27 +03:00 · 2023-10-04 18:06:27 +03:00 · 7824a88b42
commit 7824a88b42
parent 25591b66d0 caacbdc28f
658 changed files with 10469 additions and 6024 deletions
--- a/Documentation/accel/qaic/qaic.rst
+++ b/Documentation/accel/qaic/qaic.rst
@ -123,6 +123,16 @@ DRM_IOCTL_QAIC_PART_DEV
  AIC100 device and can be used for limiting a process to some subset of
  resources.

+DRM_IOCTL_QAIC_DETACH_SLICE_BO
+  This IOCTL allows userspace to remove the slicing information from a BO that
+  was originally provided by a call to DRM_IOCTL_QAIC_ATTACH_SLICE_BO. This
+  is the inverse of DRM_IOCTL_QAIC_ATTACH_SLICE_BO. The BO must be idle for
+  DRM_IOCTL_QAIC_DETACH_SLICE_BO to be called. After a successful detach slice
+  operation the BO may have new slicing information attached with a new call
+  to DRM_IOCTL_QAIC_ATTACH_SLICE_BO. After detach slice, the BO cannot be
+  executed until after a new attach slice operation. Combining attach slice
+  and detach slice calls allows userspace to use a BO with multiple workloads.
+
 Userspace Client Isolation
 ==========================

--- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml
@ -17,6 +17,7 @@ properties:
      - analogix,anx7808
      - analogix,anx7812
      - analogix,anx7814
+      - analogix,anx7816
      - analogix,anx7818

  reg:
--- a/Documentation/devicetree/bindings/display/panel/jdi,lpm102a188a.yaml
+++ b/Documentation/devicetree/bindings/display/panel/jdi,lpm102a188a.yaml
@ -0,0 +1,94 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/jdi,lpm102a188a.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: JDI LPM102A188A 2560x1800 10.2" DSI Panel
+
+maintainers:
+  - Diogo Ivo <diogo.ivo@tecnico.ulisboa.pt>
+
+description: |
+  This panel requires a dual-channel DSI host to operate. It supports two modes:
+  - left-right: each channel drives the left or right half of the screen
+  - even-odd: each channel drives the even or odd lines of the screen
+
+  Each of the DSI channels controls a separate DSI peripheral. The peripheral
+  driven by the first link (DSI-LINK1) is considered the primary peripheral
+  and controls the device. The 'link2' property contains a phandle to the
+  peripheral driven by the second link (DSI-LINK2).
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: jdi,lpm102a188a
+
+  reg: true
+  enable-gpios: true
+  reset-gpios: true
+  power-supply: true
+  backlight: true
+
+  ddi-supply:
+    description: The regulator that provides IOVCC (1.8V).
+
+  link2:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description: |
+      phandle to the DSI peripheral on the secondary link. Note that the
+      presence of this property marks the containing node as DSI-LINK1.
+
+required:
+  - compatible
+  - reg
+
+if:
+  required:
+    - link2
+then:
+  required:
+    - power-supply
+    - ddi-supply
+    - enable-gpios
+    - reset-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/gpio/tegra-gpio.h>
+
+    dsia: dsi@54300000 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        reg = <0x0 0x54300000 0x0 0x00040000>;
+
+        link2: panel@0 {
+            compatible = "jdi,lpm102a188a";
+            reg = <0>;
+        };
+    };
+
+    dsib: dsi@54400000{
+        #address-cells = <1>;
+        #size-cells = <0>;
+        reg = <0x0 0x54400000 0x0 0x00040000>;
+        nvidia,ganged-mode = <&dsia>;
+
+        link1: panel@0 {
+            compatible = "jdi,lpm102a188a";
+            reg = <0>;
+            power-supply = <&pplcd_vdd>;
+            ddi-supply = <&pp1800_lcdio>;
+            enable-gpios = <&gpio TEGRA_GPIO(V, 1) GPIO_ACTIVE_HIGH>;
+            reset-gpios = <&gpio TEGRA_GPIO(V, 2) GPIO_ACTIVE_LOW>;
+            link2 = <&link2>;
+            backlight = <&backlight>;
+        };
+    };
+
+...
--- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
@ -238,6 +238,8 @@ properties:
      - logictechno,lttd800480070-l6wh-rt
        # Mitsubishi "AA070MC01 7.0" WVGA TFT LCD panel
      - mitsubishi,aa070mc01-ca1
+        # Mitsubishi AA084XE01 8.4" XGA TFT LCD panel
+      - mitsubishi,aa084xe01
        # Multi-Inno Technology Co.,Ltd MI0700S4T-6 7" 800x480 TFT Resistive Touch Module
      - multi-inno,mi0700s4t-6
        # Multi-Inno Technology Co.,Ltd MI0800FT-9 8" 800x600 TFT Resistive Touch Module
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml
@ -18,6 +18,7 @@ properties:
          - rockchip,rk3288-mipi-dsi
          - rockchip,rk3399-mipi-dsi
          - rockchip,rk3568-mipi-dsi
+          - rockchip,rv1126-mipi-dsi
      - const: snps,dw-mipi-dsi

  interrupts:
@ -77,6 +78,7 @@ allOf:
            enum:
              - rockchip,px30-mipi-dsi
              - rockchip,rk3568-mipi-dsi
+              - rockchip,rv1126-mipi-dsi

    then:
      properties:
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml
@ -31,6 +31,7 @@ properties:
      - rockchip,rk3368-vop
      - rockchip,rk3399-vop-big
      - rockchip,rk3399-vop-lit
+      - rockchip,rv1126-vop

  reg:
    minItems: 1
--- a/Documentation/driver-api/dma-buf.rst
+++ b/Documentation/driver-api/dma-buf.rst
@ -5,14 +5,30 @@ The dma-buf subsystem provides the framework for sharing buffers for
 hardware (DMA) access across multiple device drivers and subsystems, and
 for synchronizing asynchronous hardware access.

-This is used, for example, by drm "prime" multi-GPU support, but is of
-course not limited to GPU use cases.
+As an example, it is used extensively by the DRM subsystem to exchange
+buffers between processes, contexts, library APIs within the same
+process, and also to exchange buffers with other subsystems such as
+V4L2.
+
+This document describes the way in which kernel subsystems can use and
+interact with the three main primitives offered by dma-buf:
+
+ - dma-buf, representing a sg_table and exposed to userspace as a file
+   descriptor to allow passing between processes, subsystems, devices,
+   etc;
+ - dma-fence, providing a mechanism to signal when an asynchronous
+   hardware operation has completed; and
+ - dma-resv, which manages a set of dma-fences for a particular dma-buf
+   allowing implicit (kernel-ordered) synchronization of work to
+   preserve the illusion of coherent access
+
+
+Userspace API principles and use
+--------------------------------
+
+For more details on how to design your subsystem's API for dma-buf use, please
+see Documentation/userspace-api/dma-buf-alloc-exchange.rst.

-The three main components of this are: (1) dma-buf, representing a
-sg_table and exposed to userspace as a file descriptor to allow passing
-between devices, (2) fence, which provides a mechanism to signal when
-one device has finished access, and (3) reservation, which manages the
-shared or exclusive fence(s) associated with the buffer.

 Shared DMA Buffers
 ------------------
--- a/Documentation/filesystems/btrfs.rst
+++ b/Documentation/filesystems/btrfs.rst
@ -37,7 +37,6 @@ For more information please refer to the documentation site or wiki

  https://btrfs.readthedocs.io

-  https://btrfs.wiki.kernel.org

 that maintains information about administration tasks, frequently asked
 questions, use cases, mount options, comprehensible changelogs, features,
--- a/Documentation/gpu/drm-uapi.rst
+++ b/Documentation/gpu/drm-uapi.rst
@ -486,3 +486,10 @@ and the CRTC index is its position in this array.

 .. kernel-doc:: include/uapi/drm/drm_mode.h
   :internal:
+
+
+dma-buf interoperability
+========================
+
+Please see Documentation/userspace-api/dma-buf-alloc-exchange.rst for
+information on how dma-buf is integrated and exposed within DRM.
--- a/Documentation/gpu/rfc/xe.rst
+++ b/Documentation/gpu/rfc/xe.rst
@ -67,14 +67,8 @@ platforms.

 When the time comes for Xe, the protection will be lifted on Xe and kept in i915.

-Xe driver will be protected with both STAGING Kconfig and force_probe. Changes in
-the uAPI are expected while the driver is behind these protections. STAGING will
-be removed when the driver uAPI gets to a mature state where we can guarantee the
-‘no regression’ rule. Then force_probe will be lifted only for future platforms
-that will be productized with Xe driver, but not with i915.
-
-Xe – Pre-Merge Goals
-====================
+Xe – Pre-Merge Goals - Work-in-Progress
+=======================================

 Drm_scheduler
 -------------
@ -94,41 +88,6 @@ depend on any other patch touching drm_scheduler itself that was not yet merged
 through drm-misc. This, by itself, already includes the reach of an agreement for
 uniform 1 to 1 relationship implementation / usage across drivers.

-GPU VA
------
-Two main goals of Xe are meeting together here:
-
-1) Have an uAPI that aligns with modern UMD needs.
-
-2) Early upstream engagement.
-
-RedHat engineers working on Nouveau proposed a new DRM feature to handle keeping
-track of GPU virtual address mappings. This is still not merged upstream, but
-this aligns very well with our goals and with our VM_BIND. The engagement with
-upstream and the port of Xe towards GPUVA is already ongoing.
-
-As a key measurable result, Xe needs to be aligned with the GPU VA and working in
-our tree. Missing Nouveau patches should *not* block Xe and any needed GPUVA
-related patch should be independent and present on dri-devel or acked by
-maintainers to go along with the first Xe pull request towards drm-next.
-
-DRM_VM_BIND
-----------
-Nouveau, and Xe are all implementing ‘VM_BIND’ and new ‘Exec’ uAPIs in order to
-fulfill the needs of the modern uAPI. Xe merge should *not* be blocked on the
-development of a common new drm_infrastructure. However, the Xe team needs to
-engage with the community to explore the options of a common API.
-
-As a key measurable result, the DRM_VM_BIND needs to be documented in this file
-below, or this entire block deleted if the consensus is for independent drivers
-vm_bind ioctls.
-
-Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
-Xe merged, it is mandatory to enforce the overall locking scheme for all major
-structs and list (so vm and vma). So, a consensus is needed, and possibly some
-common helpers. If helpers are needed, they should be also documented in this
-document.
-
 ASYNC VM_BIND
 -------------
 Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
@ -212,6 +171,14 @@ This item ties into the GPUVA, VM_BIND, and even long-running compute support.
 As a key measurable result, we need to have a community consensus documented in
 this document and the Xe driver prepared for the changes, if necessary.

+Xe – uAPI high level overview
+=============================
+
+...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
+
+Xe – Pre-Merge Goals - Completed
+================================
+
 Dev_coredump
 ------------

@ -229,7 +196,37 @@ infrastructure with overall possible improvements, like multiple file support
 for better organization of the dumps, snapshot support, dmesg extra print,
 and whatever may make sense and help the overall infrastructure.

-Xe – uAPI high level overview
-=============================
+DRM_VM_BIND
+-----------
+Nouveau, and Xe are all implementing ‘VM_BIND’ and new ‘Exec’ uAPIs in order to
+fulfill the needs of the modern uAPI. Xe merge should *not* be blocked on the
+development of a common new drm_infrastructure. However, the Xe team needs to
+engage with the community to explore the options of a common API.

-...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
+As a key measurable result, the DRM_VM_BIND needs to be documented in this file
+below, or this entire block deleted if the consensus is for independent drivers
+vm_bind ioctls.
+
+Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
+Xe merged, it is mandatory to enforce the overall locking scheme for all major
+structs and list (so vm and vma). So, a consensus is needed, and possibly some
+common helpers. If helpers are needed, they should be also documented in this
+document.
+
+GPU VA
+------
+Two main goals of Xe are meeting together here:
+
+1) Have an uAPI that aligns with modern UMD needs.
+
+2) Early upstream engagement.
+
+RedHat engineers working on Nouveau proposed a new DRM feature to handle keeping
+track of GPU virtual address mappings. This is still not merged upstream, but
+this aligns very well with our goals and with our VM_BIND. The engagement with
+upstream and the port of Xe towards GPUVA is already ongoing.
+
+As a key measurable result, Xe needs to be aligned with the GPU VA and working in
+our tree. Missing Nouveau patches should *not* block Xe and any needed GPUVA
+related patch should be independent and present on dri-devel or acked by
+maintainers to go along with the first Xe pull request towards drm-next.
--- a/Documentation/process/embargoed-hardware-issues.rst
+++ b/Documentation/process/embargoed-hardware-issues.rst
@ -251,6 +251,7 @@ an involved disclosed party. The current ambassadors list:
  IBM Z		Christian Borntraeger <borntraeger@de.ibm.com>
  Intel		Tony Luck <tony.luck@intel.com>
  Qualcomm	Trilok Soni <tsoni@codeaurora.org>
+  RISC-V	Palmer Dabbelt <palmer@dabbelt.com>
  Samsung	Javier González <javier.gonz@samsung.com>

  Microsoft	James Morris <jamorris@linux.microsoft.com>
--- a/Documentation/userspace-api/dma-buf-alloc-exchange.rst
+++ b/Documentation/userspace-api/dma-buf-alloc-exchange.rst
@ -0,0 +1,389 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. Copyright 2021-2023 Collabora Ltd.
+
+========================
+Exchanging pixel buffers
+========================
+
+As originally designed, the Linux graphics subsystem had extremely limited
+support for sharing pixel-buffer allocations between processes, devices, and
+subsystems. Modern systems require extensive integration between all three
+classes; this document details how applications and kernel subsystems should
+approach this sharing for two-dimensional image data.
+
+It is written with reference to the DRM subsystem for GPU and display devices,
+V4L2 for media devices, and also to Vulkan, EGL and Wayland, for userspace
+support, however any other subsystems should also follow this design and advice.
+
+
+Glossary of terms
+=================
+
+.. glossary::
+
+    image:
+      Conceptually a two-dimensional array of pixels. The pixels may be stored
+      in one or more memory buffers. Has width and height in pixels, pixel
+      format and modifier (implicit or explicit).
+
+    row:
+      A span along a single y-axis value, e.g. from co-ordinates (0,100) to
+      (200,100).
+
+    scanline:
+      Synonym for row.
+
+    column:
+      A span along a single x-axis value, e.g. from co-ordinates (100,0) to
+      (100,100).
+
+    memory buffer:
+      A piece of memory for storing (parts of) pixel data. Has stride and size
+      in bytes and at least one handle in some API. May contain one or more
+      planes.
+
+    plane:
+      A two-dimensional array of some or all of an image's color and alpha
+      channel values.
+
+    pixel:
+      A picture element. Has a single color value which is defined by one or
+      more color channels values, e.g. R, G and B, or Y, Cb and Cr. May also
+      have an alpha value as an additional channel.
+
+    pixel data:
+      Bytes or bits that represent some or all of the color/alpha channel values
+      of a pixel or an image. The data for one pixel may be spread over several
+      planes or memory buffers depending on format and modifier.
+
+    color value:
+      A tuple of numbers, representing a color. Each element in the tuple is a
+      color channel value.
+
+    color channel:
+      One of the dimensions in a color model. For example, RGB model has
+      channels R, G, and B. Alpha channel is sometimes counted as a color
+      channel as well.
+
+    pixel format:
+      A description of how pixel data represents the pixel's color and alpha
+      values.
+
+    modifier:
+      A description of how pixel data is laid out in memory buffers.
+
+    alpha:
+      A value that denotes the color coverage in a pixel. Sometimes used for
+      translucency instead.
+
+    stride:
+      A value that denotes the relationship between pixel-location co-ordinates
+      and byte-offset values. Typically used as the byte offset between two
+      pixels at the start of vertically-consecutive tiling blocks. For linear
+      layouts, the byte offset between two vertically-adjacent pixels. For
+      non-linear formats the stride must be computed in a consistent way, which
+      usually is done as-if the layout was linear.
+
+    pitch:
+      Synonym for stride.
+
+
+Formats and modifiers
+=====================
+
+Each buffer must have an underlying format. This format describes the color
+values provided for each pixel. Although each subsystem has its own format
+descriptions (e.g. V4L2 and fbdev), the ``DRM_FORMAT_*`` tokens should be reused
+wherever possible, as they are the standard descriptions used for interchange.
+These tokens are described in the ``drm_fourcc.h`` file, which is a part of
+DRM's uAPI.
+
+Each ``DRM_FORMAT_*`` token describes the translation between a pixel
+co-ordinate in an image, and the color values for that pixel contained within
+its memory buffers. The number and type of color channels are described:
+whether they are RGB or YUV, integer or floating-point, the size of each channel
+and their locations within the pixel memory, and the relationship between color
+planes.
+
+For example, ``DRM_FORMAT_ARGB8888`` describes a format in which each pixel has
+a single 32-bit value in memory. Alpha, red, green, and blue, color channels are
+available at 8-bit precision per channel, ordered respectively from most to
+least significant bits in little-endian storage. ``DRM_FORMAT_*`` is not
+affected by either CPU or device endianness; the byte pattern in memory is
+always as described in the format definition, which is usually little-endian.
+
+As a more complex example, ``DRM_FORMAT_NV12`` describes a format in which luma
+and chroma YUV samples are stored in separate planes, where the chroma plane is
+stored at half the resolution in both dimensions (i.e. one U/V chroma
+sample is stored for each 2x2 pixel grouping).
+
+Format modifiers describe a translation mechanism between these per-pixel memory
+samples, and the actual memory storage for the buffer. The most straightforward
+modifier is ``DRM_FORMAT_MOD_LINEAR``, describing a scheme in which each plane
+is laid out row-sequentially, from the top-left to the bottom-right corner.
+This is considered the baseline interchange format, and most convenient for CPU
+access.
+
+Modern hardware employs much more sophisticated access mechanisms, typically
+making use of tiled access and possibly also compression. For example, the
+``DRM_FORMAT_MOD_VIVANTE_TILED`` modifier describes memory storage where pixels
+are stored in 4x4 blocks arranged in row-major ordering, i.e. the first tile in
+a plane stores pixels (0,0) to (3,3) inclusive, and the second tile in a plane
+stores pixels (4,0) to (7,3) inclusive.
+
+Some modifiers may modify the number of planes required for an image; for
+example, the ``I915_FORMAT_MOD_Y_TILED_CCS`` modifier adds a second plane to RGB
+formats in which it stores data about the status of every tile, notably
+including whether the tile is fully populated with pixel data, or can be
+expanded from a single solid color.
+
+These extended layouts are highly vendor-specific, and even specific to
+particular generations or configurations of devices per-vendor. For this reason,
+support of modifiers must be explicitly enumerated and negotiated by all users
+in order to ensure a compatible and optimal pipeline, as discussed below.
+
+
+Dimensions and size
+===================
+
+Each pixel buffer must be accompanied by logical pixel dimensions. This refers
+to the number of unique samples which can be extracted from, or stored to, the
+underlying memory storage. For example, even though a 1920x1080
+``DRM_FORMAT_NV12`` buffer has a luma plane containing 1920x1080 samples for the Y
+component, and 960x540 samples for the U and V components, the overall buffer is
+still described as having dimensions of 1920x1080.
+
+The in-memory storage of a buffer is not guaranteed to begin immediately at the
+base address of the underlying memory, nor is it guaranteed that the memory
+storage is tightly clipped to either dimension.
+
+Each plane must therefore be described with an ``offset`` in bytes, which will be
+added to the base address of the memory storage before performing any per-pixel
+calculations. This may be used to combine multiple planes into a single memory
+buffer; for example, ``DRM_FORMAT_NV12`` may be stored in a single memory buffer
+where the luma plane's storage begins immediately at the start of the buffer
+with an offset of 0, and the chroma plane's storage follows within the same buffer
+beginning from the byte offset for that plane.
+
+Each plane must also have a ``stride`` in bytes, expressing the offset in memory
+between two contiguous row. For example, a ``DRM_FORMAT_MOD_LINEAR`` buffer
+with dimensions of 1000x1000 may have been allocated as if it were 1024x1000, in
+order to allow for aligned access patterns. In this case, the buffer will still
+be described with a width of 1000, however the stride will be ``1024 * bpp``,
+indicating that there are 24 pixels at the positive extreme of the x axis whose
+values are not significant.
+
+Buffers may also be padded further in the y dimension, simply by allocating a
+larger area than would ordinarily be required. For example, many media decoders
+are not able to natively output buffers of height 1080, but instead require an
+effective height of 1088 pixels. In this case, the buffer continues to be
+described as having a height of 1080, with the memory allocation for each buffer
+being increased to account for the extra padding.
+
+
+Enumeration
+===========
+
+Every user of pixel buffers must be able to enumerate a set of supported formats
+and modifiers, described together. Within KMS, this is achieved with the
+``IN_FORMATS`` property on each DRM plane, listing the supported DRM formats, and
+the modifiers supported for each format. In userspace, this is supported through
+the `EGL_EXT_image_dma_buf_import_modifiers`_ extension entrypoints for EGL, the
+`VK_EXT_image_drm_format_modifier`_ extension for Vulkan, and the
+`zwp_linux_dmabuf_v1`_ extension for Wayland.
+
+Each of these interfaces allows users to query a set of supported
+format+modifier combinations.
+
+
+Negotiation
+===========
+
+It is the responsibility of userspace to negotiate an acceptable format+modifier
+combination for its usage. This is performed through a simple intersection of
+lists. For example, if a user wants to use Vulkan to render an image to be
+displayed on a KMS plane, it must:
+
+ - query KMS for the ``IN_FORMATS`` property for the given plane
+ - query Vulkan for the supported formats for its physical device, making sure
+   to pass the ``VkImageUsageFlagBits`` and ``VkImageCreateFlagBits``
+   corresponding to the intended rendering use
+ - intersect these formats to determine the most appropriate one
+ - for this format, intersect the lists of supported modifiers for both KMS and
+   Vulkan, to obtain a final list of acceptable modifiers for that format
+
+This intersection must be performed for all usages. For example, if the user
+also wishes to encode the image to a video stream, it must query the media API
+it intends to use for encoding for the set of modifiers it supports, and
+additionally intersect against this list.
+
+If the intersection of all lists is an empty list, it is not possible to share
+buffers in this way, and an alternate strategy must be considered (e.g. using
+CPU access routines to copy data between the different uses, with the
+corresponding performance cost).
+
+The resulting modifier list is unsorted; the order is not significant.
+
+
+Allocation
+==========
+
+Once userspace has determined an appropriate format, and corresponding list of
+acceptable modifiers, it must allocate the buffer. As there is no universal
+buffer-allocation interface available at either kernel or userspace level, the
+client makes an arbitrary choice of allocation interface such as Vulkan, GBM, or
+a media API.
+
+Each allocation request must take, at a minimum: the pixel format, a list of
+acceptable modifiers, and the buffer's width and height. Each API may extend
+this set of properties in different ways, such as allowing allocation in more
+than two dimensions, intended usage patterns, etc.
+
+The component which allocates the buffer will make an arbitrary choice of what
+it considers the 'best' modifier within the acceptable list for the requested
+allocation, any padding required, and further properties of the underlying
+memory buffers such as whether they are stored in system or device-specific
+memory, whether or not they are physically contiguous, and their cache mode.
+These properties of the memory buffer are not visible to userspace, however the
+``dma-heaps`` API is an effort to address this.
+
+After allocation, the client must query the allocator to determine the actual
+modifier selected for the buffer, as well as the per-plane offset and stride.
+Allocators are not permitted to vary the format in use, to select a modifier not
+provided within the acceptable list, nor to vary the pixel dimensions other than
+the padding expressed through offset, stride, and size.
+
+Communicating additional constraints, such as alignment of stride or offset,
+placement within a particular memory area, etc, is out of scope of dma-buf,
+and is not solved by format and modifier tokens.
+
+
+Import
+======
+
+To use a buffer within a different context, device, or subsystem, the user
+passes these parameters (format, modifier, width, height, and per-plane offset
+and stride) to an importing API.
+
+Each memory buffer is referred to by a buffer handle, which may be unique or
+duplicated within an image. For example, a ``DRM_FORMAT_NV12`` buffer may have
+the luma and chroma buffers combined into a single memory buffer by use of the
+per-plane offset parameters, or they may be completely separate allocations in
+memory. For this reason, each import and allocation API must provide a separate
+handle for each plane.
+
+Each kernel subsystem has its own types and interfaces for buffer management.
+DRM uses GEM buffer objects (BOs), V4L2 has its own references, etc. These types
+are not portable between contexts, processes, devices, or subsystems.
+
+To address this, ``dma-buf`` handles are used as the universal interchange for
+buffers. Subsystem-specific operations are used to export native buffer handles
+to a ``dma-buf`` file descriptor, and to import those file descriptors into a
+native buffer handle. dma-buf file descriptors can be transferred between
+contexts, processes, devices, and subsystems.
+
+For example, a Wayland media player may use V4L2 to decode a video frame into a
+``DRM_FORMAT_NV12`` buffer. This will result in two memory planes (luma and
+chroma) being dequeued by the user from V4L2. These planes are then exported to
+one dma-buf file descriptor per plane, these descriptors are then sent along
+with the metadata (format, modifier, width, height, per-plane offset and stride)
+to the Wayland server. The Wayland server will then import these file
+descriptors as an EGLImage for use through EGL/OpenGL (ES), a VkImage for use
+through Vulkan, or a KMS framebuffer object; each of these import operations
+will take the same metadata and convert the dma-buf file descriptors into their
+native buffer handles.
+
+Having a non-empty intersection of supported modifiers does not guarantee that
+import will succeed into all consumers; they may have constraints beyond those
+implied by modifiers which must be satisfied.
+
+
+Implicit modifiers
+==================
+
+The concept of modifiers post-dates all of the subsystems mentioned above. As
+such, it has been retrofitted into all of these APIs, and in order to ensure
+backwards compatibility, support is needed for drivers and userspace which do
+not (yet) support modifiers.
+
+As an example, GBM is used to allocate buffers to be shared between EGL for
+rendering and KMS for display. It has two entrypoints for allocating buffers:
+``gbm_bo_create`` which only takes the format, width, height, and a usage token,
+and ``gbm_bo_create_with_modifiers`` which extends this with a list of modifiers.
+
+In the latter case, the allocation is as discussed above, being provided with a
+list of acceptable modifiers that the implementation can choose from (or fail if
+it is not possible to allocate within those constraints). In the former case
+where modifiers are not provided, the GBM implementation must make its own
+choice as to what is likely to be the 'best' layout. Such a choice is entirely
+implementation-specific: some will internally use tiled layouts which are not
+CPU-accessible if the implementation decides that is a good idea through
+whatever heuristic. It is the implementation's responsibility to ensure that
+this choice is appropriate.
+
+To support this case where the layout is not known because there is no awareness
+of modifiers, a special ``DRM_FORMAT_MOD_INVALID`` token has been defined. This
+pseudo-modifier declares that the layout is not known, and that the driver
+should use its own logic to determine what the underlying layout may be.
+
+.. note::
+
+  ``DRM_FORMAT_MOD_INVALID`` is a non-zero value. The modifier value zero is
+  ``DRM_FORMAT_MOD_LINEAR``, which is an explicit guarantee that the image
+  has the linear layout. Care and attention should be taken to ensure that
+  zero as a default value is not mixed up with either no modifier or the linear
+  modifier. Also note that in some APIs the invalid modifier value is specified
+  with an out-of-band flag, like in ``DRM_IOCTL_MODE_ADDFB2``.
+
+There are four cases where this token may be used:
+  - during enumeration, an interface may return ``DRM_FORMAT_MOD_INVALID``, either
+    as the sole member of a modifier list to declare that explicit modifiers are
+    not supported, or as part of a larger list to declare that implicit modifiers
+    may be used
+  - during allocation, a user may supply ``DRM_FORMAT_MOD_INVALID``, either as the
+    sole member of a modifier list (equivalent to not supplying a modifier list
+    at all) to declare that explicit modifiers are not supported and must not be
+    used, or as part of a larger list to declare that an allocation using implicit
+    modifiers is acceptable
+  - in a post-allocation query, an implementation may return
+    ``DRM_FORMAT_MOD_INVALID`` as the modifier of the allocated buffer to declare
+    that the underlying layout is implementation-defined and that an explicit
+    modifier description is not available; per the above rules, this may only be
+    returned when the user has included ``DRM_FORMAT_MOD_INVALID`` as part of the
+    list of acceptable modifiers, or not provided a list
+  - when importing a buffer, the user may supply ``DRM_FORMAT_MOD_INVALID`` as the
+    buffer modifier (or not supply a modifier) to indicate that the modifier is
+    unknown for whatever reason; this is only acceptable when the buffer has
+    not been allocated with an explicit modifier
+
+It follows from this that for any single buffer, the complete chain of operations
+formed by the producer and all the consumers must be either fully implicit or fully
+explicit. For example, if a user wishes to allocate a buffer for use between
+GPU, display, and media, but the media API does not support modifiers, then the
+user **must not** allocate the buffer with explicit modifiers and attempt to
+import the buffer into the media API with no modifier, but either perform the
+allocation using implicit modifiers, or allocate the buffer for media use
+separately and copy between the two buffers.
+
+As one exception to the above, allocations may be 'upgraded' from implicit
+to explicit modifiers. For example, if the buffer is allocated with
+``gbm_bo_create`` (taking no modifiers), the user may then query the modifier with
+``gbm_bo_get_modifier`` and then use this modifier as an explicit modifier token
+if a valid modifier is returned.
+
+When allocating buffers for exchange between different users and modifiers are
+not available, implementations are strongly encouraged to use
+``DRM_FORMAT_MOD_LINEAR`` for their allocation, as this is the universal baseline
+for exchange. However, it is not guaranteed that this will result in the correct
+interpretation of buffer content, as implicit modifier operation may still be
+subject to driver-specific heuristics.
+
+Any new users - userspace programs and protocols, kernel subsystems, etc -
+wishing to exchange buffers must offer interoperability through dma-buf file
+descriptors for memory planes, DRM format tokens to describe the format, DRM
+format modifiers to describe the layout in memory, at least width and height for
+dimensions, and at least offset and stride for each memory plane.
+
+.. _zwp_linux_dmabuf_v1: https://gitlab.freedesktop.org/wayland/wayland-protocols/-/blob/main/unstable/linux-dmabuf/linux-dmabuf-unstable-v1.xml
+.. _VK_EXT_image_drm_format_modifier: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_image_drm_format_modifier.html
+.. _EGL_EXT_image_dma_buf_import_modifiers: https://registry.khronos.org/EGL/extensions/EXT/EGL_EXT_image_dma_buf_import_modifiers.txt
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@ -22,6 +22,7 @@ place where this information is gathered.
   unshare
   spec_ctrl
   accelerators/ocxl
+   dma-buf-alloc-exchange
   ebpf/index
   ELF
   ioctl/index
--- a/36
+++ b/36
@ -1626,10 +1626,9 @@ F:	drivers/gpu/drm/arm/display/include/
 F:	drivers/gpu/drm/arm/display/komeda/

 ARM MALI PANFROST DRM DRIVER
+M:	Boris Brezillon <boris.brezillon@collabora.com>
 M:	Rob Herring <robh@kernel.org>
-M:	Tomeu Vizoso <tomeu.vizoso@collabora.com>
 R:	Steven Price <steven.price@arm.com>
-R:	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
 L:	dri-devel@lists.freedesktop.org
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@ -1855,7 +1854,7 @@ F:	Documentation/devicetree/bindings/phy/amlogic*
 F:	arch/arm/boot/dts/amlogic/
 F:	arch/arm/mach-meson/
 F:	arch/arm64/boot/dts/amlogic/
-F:	drivers/genpd/amlogic/
+F:	drivers/pmdomain/amlogic/
 F:	drivers/mmc/host/meson*
 F:	drivers/phy/amlogic/
 F:	drivers/pinctrl/meson/
@ -1918,7 +1917,7 @@ F:	drivers/bluetooth/hci_bcm4377.c
 F:	drivers/clk/clk-apple-nco.c
 F:	drivers/cpufreq/apple-soc-cpufreq.c
 F:	drivers/dma/apple-admac.c
-F:	drivers/genpd/apple/
+F:	drivers/pmdomain/apple/
 F:	drivers/i2c/busses/i2c-pasemi-core.c
 F:	drivers/i2c/busses/i2c-pasemi-platform.c
 F:	drivers/iommu/apple-dart.c
@ -2435,7 +2434,7 @@ F:	arch/arm/mach-ux500/
 F:	drivers/clk/clk-nomadik.c
 F:	drivers/clocksource/clksrc-dbx500-prcmu.c
 F:	drivers/dma/ste_dma40*
-F:	drivers/genpd/st/ste-ux500-pm-domain.c
+F:	drivers/pmdomain/st/ste-ux500-pm-domain.c
 F:	drivers/hwspinlock/u8500_hsem.c
 F:	drivers/i2c/busses/i2c-nomadik.c
 F:	drivers/iio/adc/ab8500-gpadc.c
@ -2598,7 +2597,7 @@ F:	arch/arm/include/debug/renesas-scif.S
 F:	arch/arm/mach-shmobile/
 F:	arch/arm64/boot/dts/renesas/
 F:	arch/riscv/boot/dts/renesas/
-F:	drivers/genpd/renesas/
+F:	drivers/pmdomain/renesas/
 F:	drivers/soc/renesas/
 F:	include/linux/soc/renesas/
 K:	\brenesas,
@ -4026,7 +4025,7 @@ F:	arch/mips/kernel/*bmips*
 F:	drivers/irqchip/irq-bcm63*
 F:	drivers/irqchip/irq-bcm7*
 F:	drivers/irqchip/irq-brcmstb*
-F:	drivers/genpd/bcm/bcm63xx-power.c
+F:	drivers/pmdomain/bcm/bcm63xx-power.c
 F:	include/linux/bcm963xx_nvram.h
 F:	include/linux/bcm963xx_tag.h

@ -4248,7 +4247,7 @@ R:	Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
 L:	linux-pm@vger.kernel.org
 S:	Maintained
 T:	git https://github.com/broadcom/stblinux.git
-F:	drivers/genpd/bcm/bcm-pmb.c
+F:	drivers/pmdomain/bcm/bcm-pmb.c
 F:	include/dt-bindings/soc/bcm-pmb.h

 BROADCOM SPECIFIC AMBA DRIVER (BCMA)
@ -4378,7 +4377,6 @@ M:	David Sterba <dsterba@suse.com>
 L:	linux-btrfs@vger.kernel.org
 S:	Maintained
 W:	https://btrfs.readthedocs.io
-W:	https://btrfs.wiki.kernel.org/
 Q:	https://patchwork.kernel.org/project/linux-btrfs/list/
 C:	irc://irc.libera.chat/btrfs
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git
@ -6133,6 +6131,7 @@ L:	linaro-mm-sig@lists.linaro.org (moderated for non-subscribers)
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/driver-api/dma-buf.rst
+F:	Documentation/userspace-api/dma-buf-alloc-exchange.rst
 F:	drivers/dma-buf/
 F:	include/linux/*fence.h
 F:	include/linux/dma-buf.h
@ -6909,7 +6908,9 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/display/bridge/
 F:	drivers/gpu/drm/bridge/
 F:	drivers/gpu/drm/drm_bridge.c
+F:	drivers/gpu/drm/drm_bridge_connector.c
 F:	include/drm/drm_bridge.h
+F:	include/drm/drm_bridge_connector.h

 DRM DRIVERS FOR EXYNOS
 M:	Inki Dae <inki.dae@samsung.com>
@ -6933,10 +6934,12 @@ F:	Documentation/devicetree/bindings/display/fsl,dcu.txt
 F:	Documentation/devicetree/bindings/display/fsl,tcon.txt
 F:	drivers/gpu/drm/fsl-dcu/

-DRM DRIVERS FOR FREESCALE IMX
+DRM DRIVERS FOR FREESCALE IMX 5/6
 M:	Philipp Zabel <p.zabel@pengutronix.de>
 L:	dri-devel@lists.freedesktop.org
 S:	Maintained
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+T:	git git://git.pengutronix.de/git/pza/linux
 F:	Documentation/devicetree/bindings/display/imx/
 F:	drivers/gpu/drm/imx/ipuv3/
 F:	drivers/gpu/ipu-v3/
@ -6955,7 +6958,7 @@ DRM DRIVERS FOR GMA500 (Poulsbo, Moorestown and derivative chipsets)
 M:	Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
 L:	dri-devel@lists.freedesktop.org
 S:	Maintained
-T:	git git://github.com/patjak/drm-gma500
+T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/gma500/

 DRM DRIVERS FOR HISILICON
@ -7139,6 +7142,7 @@ F:	include/drm/gpu_scheduler.h

 DRM PANEL DRIVERS
 M:	Neil Armstrong <neil.armstrong@linaro.org>
+R:	Jessica Zhang <quic_jesszhan@quicinc.com>
 R:	Sam Ravnborg <sam@ravnborg.org>
 L:	dri-devel@lists.freedesktop.org
 S:	Maintained
@ -8729,7 +8733,7 @@ M:	Ulf Hansson <ulf.hansson@linaro.org>
 L:	linux-pm@vger.kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm.git
-F:	drivers/genpd/
+F:	drivers/pmdomain/

 GENERIC RESISTIVE TOUCHSCREEN ADC DRIVER
 M:	Eugen Hristev <eugen.hristev@microchip.com>
@ -17680,7 +17684,7 @@ L:	linux-pm@vger.kernel.org
 L:	linux-arm-msm@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml
-F:	drivers/genpd/qcom/cpr.c
+F:	drivers/pmdomain/qcom/cpr.c

 QUALCOMM CPUFREQ DRIVER MSM8996/APQ8096
 M:	Ilia Lin <ilia.lin@kernel.org>
@ -20514,7 +20518,7 @@ STARFIVE JH71XX PMU CONTROLLER DRIVER
 M:	Walker Chen <walker.chen@starfivetech.com>
 S:	Supported
 F:	Documentation/devicetree/bindings/power/starfive*
-F:	drivers/genpd/starfive/jh71xx-pmu.c
+F:	drivers/pmdomain/starfive/jh71xx-pmu.c
 F:	include/dt-bindings/power/starfive,jh7110-pmu.h

 STARFIVE SOC DRIVERS
@ -21339,7 +21343,7 @@ F:	drivers/irqchip/irq-ti-sci-inta.c
 F:	drivers/irqchip/irq-ti-sci-intr.c
 F:	drivers/reset/reset-ti-sci.c
 F:	drivers/soc/ti/ti_sci_inta_msi.c
-F:	drivers/genpd/ti/ti_sci_pm_domains.c
+F:	drivers/pmdomain/ti/ti_sci_pm_domains.c
 F:	include/dt-bindings/soc/ti,sci_pm_domain.h
 F:	include/linux/soc/ti/ti_sci_inta_msi.h
 F:	include/linux/soc/ti/ti_sci_protocol.h
@ -21581,7 +21585,7 @@ L:	linux-kernel@vger.kernel.org
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ti/linux.git
-F:	drivers/genpd/ti/omap_prm.c
+F:	drivers/pmdomain/ti/omap_prm.c
 F:	drivers/soc/ti/*

 TI LM49xxx FAMILY ASoC CODEC DRIVERS
--- a/2
+++ b/2
@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 6
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Hurr durr I'ma ninja sloth

 # *DOCUMENTATION*
--- a/arch/parisc/include/asm/cache.h
+++ b/arch/parisc/include/asm/cache.h
@ -37,6 +37,7 @@ extern int split_tlb;
 extern int dcache_stride;
 extern int icache_stride;
 extern struct pdc_cache_info cache_info;
+extern struct pdc_btlb_info btlb_info;
 void parisc_setup_cache_timing(void);

 #define pdtlb(sr, addr)	asm volatile("pdtlb 0(%%sr%0,%1)" \
--- a/arch/parisc/include/asm/mckinley.h
+++ b/arch/parisc/include/asm/mckinley.h
@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_PARISC_MCKINLEY_H
-#define ASM_PARISC_MCKINLEY_H
-
-/* declared in arch/parisc/kernel/setup.c */
-extern struct proc_dir_entry * proc_mckinley_root;
-
-#endif /*ASM_PARISC_MCKINLEY_H*/
--- a/arch/parisc/include/asm/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@ -44,10 +44,11 @@ int pdc_model_capabilities(unsigned long *capabilities);
 int pdc_model_platform_info(char *orig_prod_num, char *current_prod_num, char *serial_no);
 int pdc_cache_info(struct pdc_cache_info *cache);
 int pdc_spaceid_bits(unsigned long *space_bits);
-#ifndef CONFIG_PA20
 int pdc_btlb_info(struct pdc_btlb_info *btlb);
+int pdc_btlb_insert(unsigned long long vpage, unsigned long physpage, unsigned long len,
+                    unsigned long entry_info, unsigned long slot);
+int pdc_btlb_purge_all(void);
 int pdc_mem_map_hpa(struct pdc_memory_map *r_addr, struct pdc_module_path *mod_path);
-#endif /* !CONFIG_PA20 */
 int pdc_pim_toc11(struct pdc_toc_pim_11 *ret);
 int pdc_pim_toc20(struct pdc_toc_pim_20 *ret);
 int pdc_lan_station_id(char *lan_addr, unsigned long net_hpa);
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@ -310,6 +310,7 @@ extern void do_syscall_trace_exit(struct pt_regs *);
 struct seq_file;
 extern void early_trap_init(void);
 extern void collect_boot_cpu_data(void);
+extern void btlb_init_per_cpu(void);
 extern int show_cpuinfo (struct seq_file *m, void *v);

 /* driver code in driver/parisc */
--- a/arch/parisc/include/asm/ropes.h
+++ b/arch/parisc/include/asm/ropes.h
@ -29,7 +29,7 @@
 struct ioc {
 	void __iomem	*ioc_hpa;	/* I/O MMU base address */
 	char		*res_map;	/* resource map, bit == pdir entry */
-	u64		*pdir_base;	/* physical base address */
+	__le64		*pdir_base;	/* physical base address */
 	unsigned long	ibase;		/* pdir IOV Space base - shared w/lba_pci */
 	unsigned long	imask;		/* pdir IOV Space mask - shared w/lba_pci */
 #ifdef ZX1_SUPPORT
@ -86,6 +86,9 @@ struct sba_device {
 	struct ioc		ioc[MAX_IOC];
 };

+/* list of SBA's in system, see drivers/parisc/sba_iommu.c */
+extern struct sba_device *sba_list;
+
 #define ASTRO_RUNWAY_PORT	0x582
 #define IKE_MERCED_PORT		0x803
 #define REO_MERCED_PORT		0x804
@ -110,7 +113,7 @@ static inline int IS_PLUTO(struct parisc_device *d) {

 #define SBA_PDIR_VALID_BIT	0x8000000000000000ULL

-#define SBA_AGPGART_COOKIE	0x0000badbadc0ffeeULL
+#define SBA_AGPGART_COOKIE	(__force __le64) 0x0000badbadc0ffeeULL

 #define SBA_FUNC_ID	0x0000	/* function id */
 #define SBA_FCLASS	0x0008	/* function class, bist, header, rev... */
--- a/arch/parisc/include/asm/shmparam.h
+++ b/arch/parisc/include/asm/shmparam.h
@ -2,6 +2,21 @@
 #ifndef _ASMPARISC_SHMPARAM_H
 #define _ASMPARISC_SHMPARAM_H

+/*
+ * PA-RISC uses virtually indexed & physically tagged (VIPT) caches
+ * which has strict requirements when two pages to the same physical
+ * address are accessed through different mappings. Read the section
+ * "Address Aliasing" in the arch docs for more detail:
+ * PA-RISC 1.1 (page 3-6):
+ * https://parisc.wiki.kernel.org/images-parisc/6/68/Pa11_acd.pdf
+ * PA-RISC 2.0 (page F-5):
+ * https://parisc.wiki.kernel.org/images-parisc/7/73/Parisc2.0.pdf
+ *
+ * For Linux we allow kernel and userspace to map pages on page size
+ * granularity (SHMLBA) but have to ensure that, if two pages are
+ * mapped to the same physical address, the virtual and physical
+ * addresses modulo SHM_COLOUR are identical.
+ */
 #define SHMLBA	   PAGE_SIZE	/* attach addr a multiple of this */
 #define SHM_COLOUR 0x00400000	/* shared mappings colouring */

--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@ -275,6 +275,8 @@ int main(void)
 	 * and kernel data on physical huge pages */
 #ifdef CONFIG_HUGETLB_PAGE
 	DEFINE(HUGEPAGE_SIZE, 1UL << REAL_HPAGE_SHIFT);
+#elif !defined(CONFIG_64BIT)
+	DEFINE(HUGEPAGE_SIZE, 4*1024*1024);
 #else
 	DEFINE(HUGEPAGE_SIZE, PAGE_SIZE);
 #endif
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@ -58,7 +58,7 @@ int pa_serialize_tlb_flushes __ro_after_init;

 struct pdc_cache_info cache_info __ro_after_init;
 #ifndef CONFIG_PA20
-static struct pdc_btlb_info btlb_info __ro_after_init;
+struct pdc_btlb_info btlb_info __ro_after_init;
 #endif

 DEFINE_STATIC_KEY_TRUE(parisc_has_cache);
@ -264,12 +264,6 @@ parisc_cache_init(void)
 	icache_stride = CAFL_STRIDE(cache_info.ic_conf);
 #undef CAFL_STRIDE

-#ifndef CONFIG_PA20
-	if (pdc_btlb_info(&btlb_info) < 0) {
-		memset(&btlb_info, 0, sizeof btlb_info);
-	}
-#endif
-
 	if ((boot_cpu_data.pdc.capabilities & PDC_MODEL_NVA_MASK) ==
 						PDC_MODEL_NVA_UNSUPPORTED) {
 		printk(KERN_WARNING "parisc_cache_init: Only equivalent aliasing supported!\n");
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@ -925,9 +925,9 @@ static __init void qemu_header(void)
 	pr_info("#define PARISC_MODEL \"%s\"\n\n",
 			boot_cpu_data.pdc.sys_model_name);

+	#define p ((unsigned long *)&boot_cpu_data.pdc.model)
 	pr_info("#define PARISC_PDC_MODEL 0x%lx, 0x%lx, 0x%lx, "
 		"0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx\n\n",
-	#define p ((unsigned long *)&boot_cpu_data.pdc.model)
 		p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8]);
 	#undef p

--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@ -687,7 +687,6 @@ int pdc_spaceid_bits(unsigned long *space_bits)
 	return retval;
 }

-#ifndef CONFIG_PA20
 /**
 * pdc_btlb_info - Return block TLB information.
 * @btlb: The return buffer.
@ -696,18 +695,51 @@ int pdc_spaceid_bits(unsigned long *space_bits)
 */
 int pdc_btlb_info(struct pdc_btlb_info *btlb) 
 {
-        int retval;
+	int retval;
 	unsigned long flags;

-        spin_lock_irqsave(&pdc_lock, flags);
-        retval = mem_pdc_call(PDC_BLOCK_TLB, PDC_BTLB_INFO, __pa(pdc_result), 0);
-        memcpy(btlb, pdc_result, sizeof(*btlb));
-        spin_unlock_irqrestore(&pdc_lock, flags);
+	if (IS_ENABLED(CONFIG_PA20))
+		return PDC_BAD_PROC;

-        if(retval < 0) {
-                btlb->max_size = 0;
-        }
-        return retval;
+	spin_lock_irqsave(&pdc_lock, flags);
+	retval = mem_pdc_call(PDC_BLOCK_TLB, PDC_BTLB_INFO, __pa(pdc_result), 0);
+	memcpy(btlb, pdc_result, sizeof(*btlb));
+	spin_unlock_irqrestore(&pdc_lock, flags);
+
+	if(retval < 0) {
+		btlb->max_size = 0;
+	}
+	return retval;
+}
+
+int pdc_btlb_insert(unsigned long long vpage, unsigned long physpage, unsigned long len,
+		    unsigned long entry_info, unsigned long slot)
+{
+	int retval;
+	unsigned long flags;
+
+	if (IS_ENABLED(CONFIG_PA20))
+		return PDC_BAD_PROC;
+
+	spin_lock_irqsave(&pdc_lock, flags);
+	retval = mem_pdc_call(PDC_BLOCK_TLB, PDC_BTLB_INSERT, (unsigned long) (vpage >> 32),
+			      (unsigned long) vpage, physpage, len, entry_info, slot);
+	spin_unlock_irqrestore(&pdc_lock, flags);
+	return retval;
+}
+
+int pdc_btlb_purge_all(void)
+{
+	int retval;
+	unsigned long flags;
+
+	if (IS_ENABLED(CONFIG_PA20))
+		return PDC_BAD_PROC;
+
+	spin_lock_irqsave(&pdc_lock, flags);
+	retval = mem_pdc_call(PDC_BLOCK_TLB, PDC_BTLB_PURGE_ALL);
+	spin_unlock_irqrestore(&pdc_lock, flags);
+	return retval;
 }

 /**
@ -728,6 +760,9 @@ int pdc_mem_map_hpa(struct pdc_memory_map *address,
        int retval;
 	unsigned long flags;

+	if (IS_ENABLED(CONFIG_PA20))
+		return PDC_BAD_PROC;
+
        spin_lock_irqsave(&pdc_lock, flags);
        memcpy(pdc_result2, mod_path, sizeof(*mod_path));
        retval = mem_pdc_call(PDC_MEM_MAP, PDC_MEM_MAP_HPA, __pa(pdc_result),
@ -737,7 +772,6 @@ int pdc_mem_map_hpa(struct pdc_memory_map *address,

        return retval;
 }
-#endif	/* !CONFIG_PA20 */

 /**
 * pdc_lan_station_id - Get the LAN address.
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@ -180,10 +180,10 @@ $pgt_fill_loop:
 	std		%dp,0x18(%r10)
 #endif

-#ifdef CONFIG_64BIT
-	/* Get PDCE_PROC for monarch CPU. */
 #define MEM_PDC_LO 0x388
 #define MEM_PDC_HI 0x35C
+#ifdef CONFIG_64BIT
+	/* Get PDCE_PROC for monarch CPU. */
 	ldw             MEM_PDC_LO(%r0),%r3
 	ldw             MEM_PDC_HI(%r0),%r10
 	depd            %r10, 31, 32, %r3        /* move to upper word */
@ -269,7 +269,17 @@ stext_pdc_ret:
 	tovirt_r1	%r6
 	mtctl		%r6,%cr30		/* restore task thread info */
 #endif
-	
+
+#ifndef CONFIG_64BIT
+	/* clear all BTLBs */
+	ldi		PDC_BLOCK_TLB,%arg0
+	load32          PA(stext_pdc_btlb_ret), %rp
+	ldw             MEM_PDC_LO(%r0),%r3
+	bv              (%r3)
+	ldi		PDC_BTLB_PURGE_ALL,%arg1
+stext_pdc_btlb_ret:
+#endif
+
 	/* PARANOID: clear user scratch/user space SR's */
 	mtsp	%r0,%sr0
 	mtsp	%r0,%sr1
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@ -365,7 +365,7 @@ union irq_stack_union {
 	volatile unsigned int lock[1];
 };

-DEFINE_PER_CPU(union irq_stack_union, irq_stack_union) = {
+static DEFINE_PER_CPU(union irq_stack_union, irq_stack_union) = {
 		.slock = { 1,1,1,1 },
 	};
 #endif
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@ -368,6 +368,8 @@ int init_per_cpu(int cpunum)
 	/* FUTURE: Enable Performance Monitor : ccr bit 0x20 */
 	init_percpu_prof(cpunum);

+	btlb_init_per_cpu();
+
 	return ret;
 }

--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@ -154,6 +154,7 @@ SECTIONS
 	}

 	/* End of data section */
+	. = ALIGN(PAGE_SIZE);
 	_edata = .;

 	/* BSS */
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@ -32,6 +32,7 @@
 #include <asm/sections.h>
 #include <asm/msgbuf.h>
 #include <asm/sparsemem.h>
+#include <asm/asm-offsets.h>

 extern int  data_start;
 extern void parisc_kernel_start(void);	/* Kernel entry point in head.S */
@ -720,6 +721,77 @@ void __init paging_init(void)
 	parisc_bootmem_free();
 }

+static void alloc_btlb(unsigned long start, unsigned long end, int *slot,
+			unsigned long entry_info)
+{
+	const int slot_max = btlb_info.fixed_range_info.num_comb;
+	int min_num_pages = btlb_info.min_size;
+	unsigned long size;
+
+	/* map at minimum 4 pages */
+	if (min_num_pages < 4)
+		min_num_pages = 4;
+
+	size = HUGEPAGE_SIZE;
+	while (start < end && *slot < slot_max && size >= PAGE_SIZE) {
+		/* starting address must have same alignment as size! */
+		/* if correctly aligned and fits in double size, increase */
+		if (((start & (2 * size - 1)) == 0) &&
+		    (end - start) >= (2 * size)) {
+			size <<= 1;
+			continue;
+		}
+		/* if current size alignment is too big, try smaller size */
+		if ((start & (size - 1)) != 0) {
+			size >>= 1;
+			continue;
+		}
+		if ((end - start) >= size) {
+			if ((size >> PAGE_SHIFT) >= min_num_pages)
+				pdc_btlb_insert(start >> PAGE_SHIFT, __pa(start) >> PAGE_SHIFT,
+					size >> PAGE_SHIFT, entry_info, *slot);
+			(*slot)++;
+			start += size;
+			continue;
+		}
+		size /= 2;
+		continue;
+	}
+}
+
+void btlb_init_per_cpu(void)
+{
+	unsigned long s, t, e;
+	int slot;
+
+	/* BTLBs are not available on 64-bit CPUs */
+	if (IS_ENABLED(CONFIG_PA20))
+		return;
+	else if (pdc_btlb_info(&btlb_info) < 0) {
+		memset(&btlb_info, 0, sizeof btlb_info);
+	}
+
+	/* insert BLTLBs for code and data segments */
+	s = (uintptr_t) dereference_function_descriptor(&_stext);
+	e = (uintptr_t) dereference_function_descriptor(&_etext);
+	t = (uintptr_t) dereference_function_descriptor(&_sdata);
+	BUG_ON(t != e);
+
+	/* code segments */
+	slot = 0;
+	alloc_btlb(s, e, &slot, 0x13800000);
+
+	/* sanity check */
+	t = (uintptr_t) dereference_function_descriptor(&_edata);
+	e = (uintptr_t) dereference_function_descriptor(&__bss_start);
+	BUG_ON(t != e);
+
+	/* data segments */
+	s = (uintptr_t) dereference_function_descriptor(&_sdata);
+	e = (uintptr_t) dereference_function_descriptor(&__bss_stop);
+	alloc_btlb(s, e, &slot, 0x11800000);
+}
+
 #ifdef CONFIG_PA20

 /*
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@ -105,7 +105,7 @@ asm volatile(ALTERNATIVE(						\
 * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
 *   0000001    01001      rs1       000      00000  0001011
 * dcache.cva rs1 (clean, virtual address)
- *   0000001    00100      rs1       000      00000  0001011
+ *   0000001    00101      rs1       000      00000  0001011
 *
 * dcache.cipa rs1 (clean then invalidate, physical address)
 * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
@ -118,7 +118,7 @@ asm volatile(ALTERNATIVE(						\
 *   0000000    11001     00000      000      00000  0001011
 */
 #define THEAD_inval_A0	".long 0x0265000b"
-#define THEAD_clean_A0	".long 0x0245000b"
+#define THEAD_clean_A0	".long 0x0255000b"
 #define THEAD_flush_A0	".long 0x0275000b"
 #define THEAD_SYNC_S	".long 0x0190000b"

--- a/arch/riscv/kernel/elf_kexec.c
+++ b/arch/riscv/kernel/elf_kexec.c
@ -98,7 +98,13 @@ static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
 	kbuf.image = image;
 	kbuf.buf_min = lowest_paddr;
 	kbuf.buf_max = ULONG_MAX;
-	kbuf.buf_align = PAGE_SIZE;
+
+	/*
+	 * Current riscv boot protocol requires 2MB alignment for
+	 * RV64 and 4MB alignment for RV32
+	 *
+	 */
+	kbuf.buf_align = PMD_SIZE;
 	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
 	kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
 	kbuf.top_down = false;
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@ -1945,6 +1945,7 @@ config EFI
 	select UCS2_STRING
 	select EFI_RUNTIME_WRAPPERS
 	select ARCH_USE_MEMREMAP_PROT
+	select EFI_RUNTIME_MAP if KEXEC_CORE
 	help
 	  This enables the kernel to use EFI runtime services that are
 	  available (such as the EFI variable services).
@ -2020,7 +2021,6 @@ config EFI_MAX_FAKE_MEM
 config EFI_RUNTIME_MAP
 	bool "Export EFI runtime maps to sysfs" if EXPERT
 	depends on EFI
-	default KEXEC_CORE
 	help
 	  Export EFI runtime memory regions to /sys/firmware/efi/runtime-map.
 	  That memory map is required by the 2nd kernel to set up EFI virtual
--- a/arch/x86/boot/compressed/ident_map_64.c
+++ b/arch/x86/boot/compressed/ident_map_64.c
@ -59,6 +59,14 @@ static void *alloc_pgt_page(void *context)
 		return NULL;
 	}

+	/* Consumed more tables than expected? */
+	if (pages->pgt_buf_offset == BOOT_PGT_SIZE_WARN) {
+		debug_putstr("pgt_buf running low in " __FILE__ "\n");
+		debug_putstr("Need to raise BOOT_PGT_SIZE?\n");
+		debug_putaddr(pages->pgt_buf_offset);
+		debug_putaddr(pages->pgt_buf_size);
+	}
+
 	entry = pages->pgt_buf + pages->pgt_buf_offset;
 	pages->pgt_buf_offset += PAGE_SIZE;

--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@ -40,23 +40,40 @@
 #ifdef CONFIG_X86_64
 # define BOOT_STACK_SIZE	0x4000

-# define BOOT_INIT_PGT_SIZE	(6*4096)
-# ifdef CONFIG_RANDOMIZE_BASE
 /*
- * Assuming all cross the 512GB boundary:
- * 1 page for level4
- * (2+2)*4 pages for kernel, param, cmd_line, and randomized kernel
- * 2 pages for first 2M (video RAM: CONFIG_X86_VERBOSE_BOOTUP).
- * Total is 19 pages.
+ * Used by decompressor's startup_32() to allocate page tables for identity
+ * mapping of the 4G of RAM in 4-level paging mode:
+ * - 1 level4 table;
+ * - 1 level3 table;
+ * - 4 level2 table that maps everything with 2M pages;
+ *
+ * The additional level5 table needed for 5-level paging is allocated from
+ * trampoline_32bit memory.
 */
-#  ifdef CONFIG_X86_VERBOSE_BOOTUP
-#   define BOOT_PGT_SIZE	(19*4096)
-#  else /* !CONFIG_X86_VERBOSE_BOOTUP */
-#   define BOOT_PGT_SIZE	(17*4096)
-#  endif
-# else /* !CONFIG_RANDOMIZE_BASE */
-#  define BOOT_PGT_SIZE		BOOT_INIT_PGT_SIZE
-# endif
+# define BOOT_INIT_PGT_SIZE	(6*4096)
+
+/*
+ * Total number of page tables kernel_add_identity_map() can allocate,
+ * including page tables consumed by startup_32().
+ *
+ * Worst-case scenario:
+ *  - 5-level paging needs 1 level5 table;
+ *  - KASLR needs to map kernel, boot_params, cmdline and randomized kernel,
+ *    assuming all of them cross 256T boundary:
+ *    + 4*2 level4 table;
+ *    + 4*2 level3 table;
+ *    + 4*2 level2 table;
+ *  - X86_VERBOSE_BOOTUP needs to map the first 2M (video RAM):
+ *    + 1 level4 table;
+ *    + 1 level3 table;
+ *    + 1 level2 table;
+ * Total: 28 tables
+ *
+ * Add 4 spare table in case decompressor touches anything beyond what is
+ * accounted above. Warn if it happens.
+ */
+# define BOOT_PGT_SIZE_WARN	(28*4096)
+# define BOOT_PGT_SIZE		(32*4096)

 #else /* !CONFIG_X86_64 */
 # define BOOT_STACK_SIZE	0x1000
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@ -91,19 +91,6 @@ static inline void efi_fpu_end(void)

 #ifdef CONFIG_X86_32
 #define EFI_X86_KERNEL_ALLOC_LIMIT		(SZ_512M - 1)
-
-#define arch_efi_call_virt_setup()					\
-({									\
-	efi_fpu_begin();						\
-	firmware_restrict_branch_speculation_start();			\
-})
-
-#define arch_efi_call_virt_teardown()					\
-({									\
-	firmware_restrict_branch_speculation_end();			\
-	efi_fpu_end();							\
-})
-
 #else /* !CONFIG_X86_32 */
 #define EFI_X86_KERNEL_ALLOC_LIMIT		EFI_ALLOC_LIMIT

@ -116,14 +103,6 @@ extern bool efi_disable_ibt_for_runtime;
 	__efi_call(__VA_ARGS__);					\
 })

-#define arch_efi_call_virt_setup()					\
-({									\
-	efi_sync_low_kernel_mappings();					\
-	efi_fpu_begin();						\
-	firmware_restrict_branch_speculation_start();			\
-	efi_enter_mm();							\
-})
-
 #undef arch_efi_call_virt
 #define arch_efi_call_virt(p, f, args...) ({				\
 	u64 ret, ibt = ibt_save(efi_disable_ibt_for_runtime);		\
@ -132,13 +111,6 @@ extern bool efi_disable_ibt_for_runtime;
 	ret;								\
 })

-#define arch_efi_call_virt_teardown()					\
-({									\
-	efi_leave_mm();							\
-	firmware_restrict_branch_speculation_end();			\
-	efi_fpu_end();							\
-})
-
 #ifdef CONFIG_KASAN
 /*
 * CONFIG_KASAN may redefine memset to __memset.  __memset function is present
@ -168,8 +140,8 @@ extern void efi_delete_dummy_variable(void);
 extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr);
 extern void efi_free_boot_services(void);

-void efi_enter_mm(void);
-void efi_leave_mm(void);
+void arch_efi_call_virt_setup(void);
+void arch_efi_call_virt_teardown(void);

 /* kexec external ABI */
 struct efi_setup_data {
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@ -8,6 +8,14 @@
 #undef notrace
 #define notrace __attribute__((no_instrument_function))

+#ifdef CONFIG_64BIT
+/*
+ * The generic version tends to create spurious ENDBR instructions under
+ * certain conditions.
+ */
+#define _THIS_IP_ ({ unsigned long __here; asm ("lea 0(%%rip), %0" : "=r" (__here)); __here; })
+#endif
+
 #ifdef CONFIG_X86_32
 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
 #endif /* CONFIG_X86_32 */
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@ -1533,7 +1533,7 @@ static void __init build_socket_tables(void)
 {
 	struct uv_gam_range_entry *gre = uv_gre_table;
 	int nums, numn, nump;
-	int cpu, i, lnid;
+	int i, lnid, apicid;
 	int minsock = _min_socket;
 	int maxsock = _max_socket;
 	int minpnode = _min_pnode;
@ -1584,15 +1584,14 @@ static void __init build_socket_tables(void)

 	/* Set socket -> node values: */
 	lnid = NUMA_NO_NODE;
-	for_each_possible_cpu(cpu) {
-		int nid = cpu_to_node(cpu);
-		int apicid, sockid;
+	for (apicid = 0; apicid < ARRAY_SIZE(__apicid_to_node); apicid++) {
+		int nid = __apicid_to_node[apicid];
+		int sockid;

-		if (lnid == nid)
+		if ((nid == NUMA_NO_NODE) || (lnid == nid))
 			continue;
 		lnid = nid;

-		apicid = per_cpu(x86_cpu_to_apicid, cpu);
 		sockid = apicid >> uv_cpuid.socketid_shift;

 		if (_socket_to_node[sockid - minsock] == SOCK_EMPTY)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@ -579,7 +579,6 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
 }


-#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_CLUSTER) || defined(CONFIG_SCHED_MC)
 static inline int x86_sched_itmt_flags(void)
 {
 	return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
@ -603,7 +602,14 @@ static int x86_cluster_flags(void)
 	return cpu_cluster_flags() | x86_sched_itmt_flags();
 }
 #endif
-#endif
+
+static int x86_die_flags(void)
+{
+	if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
+	       return x86_sched_itmt_flags();
+
+	return 0;
+}

 /*
 * Set if a package/die has multiple NUMA nodes inside.
@ -640,7 +646,7 @@ static void __init build_sched_topology(void)
 	 */
 	if (!x86_has_numa_in_package) {
 		x86_topology[i++] = (struct sched_domain_topology_level){
-			cpu_cpu_mask, SD_INIT_NAME(DIE)
+			cpu_cpu_mask, x86_die_flags, SD_INIT_NAME(DIE)
 		};
 	}

--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@ -56,7 +56,6 @@ SYM_FUNC_END(__put_user_1)
 EXPORT_SYMBOL(__put_user_1)

 SYM_FUNC_START(__put_user_nocheck_1)
-	ENDBR
 	ASM_STAC
 2:	movb %al,(%_ASM_CX)
 	xor %ecx,%ecx
@ -76,7 +75,6 @@ SYM_FUNC_END(__put_user_2)
 EXPORT_SYMBOL(__put_user_2)

 SYM_FUNC_START(__put_user_nocheck_2)
-	ENDBR
 	ASM_STAC
 4:	movw %ax,(%_ASM_CX)
 	xor %ecx,%ecx
@ -96,7 +94,6 @@ SYM_FUNC_END(__put_user_4)
 EXPORT_SYMBOL(__put_user_4)

 SYM_FUNC_START(__put_user_nocheck_4)
-	ENDBR
 	ASM_STAC
 6:	movl %eax,(%_ASM_CX)
 	xor %ecx,%ecx
@ -119,7 +116,6 @@ SYM_FUNC_END(__put_user_8)
 EXPORT_SYMBOL(__put_user_8)

 SYM_FUNC_START(__put_user_nocheck_8)
-	ENDBR
 	ASM_STAC
 9:	mov %_ASM_AX,(%_ASM_CX)
 #ifdef CONFIG_X86_32
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@ -140,3 +140,15 @@ void __init efi_runtime_update_mappings(void)
 		}
 	}
 }
+
+void arch_efi_call_virt_setup(void)
+{
+	efi_fpu_begin();
+	firmware_restrict_branch_speculation_start();
+}
+
+void arch_efi_call_virt_teardown(void)
+{
+	firmware_restrict_branch_speculation_end();
+	efi_fpu_end();
+}
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@ -474,19 +474,34 @@ void __init efi_dump_pagetable(void)
 * can not change under us.
 * It should be ensured that there are no concurrent calls to this function.
 */
-void efi_enter_mm(void)
+static void efi_enter_mm(void)
 {
 	efi_prev_mm = current->active_mm;
 	current->active_mm = &efi_mm;
 	switch_mm(efi_prev_mm, &efi_mm, NULL);
 }

-void efi_leave_mm(void)
+static void efi_leave_mm(void)
 {
 	current->active_mm = efi_prev_mm;
 	switch_mm(&efi_mm, efi_prev_mm, NULL);
 }

+void arch_efi_call_virt_setup(void)
+{
+	efi_sync_low_kernel_mappings();
+	efi_fpu_begin();
+	firmware_restrict_branch_speculation_start();
+	efi_enter_mm();
+}
+
+void arch_efi_call_virt_teardown(void)
+{
+	efi_leave_mm();
+	firmware_restrict_branch_speculation_end();
+	efi_fpu_end();
+}
+
 static DEFINE_SPINLOCK(efi_runtime_lock);

 /*
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@ -19,6 +19,10 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
 # optimization flags.
 KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS))

+# When LTO is enabled, llvm emits many text sections, which is not supported
+# by kexec. Remove -flto=* flags.
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS))
+
 # When linking purgatory.ro with -r unresolved symbols are not checked,
 # also link a purgatory.chk binary without -r to check for unresolved symbols.
 PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@ -4405,11 +4405,8 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
 	struct blk_mq_tags **new_tags;
 	int i;

-	if (set->nr_hw_queues >= new_nr_hw_queues) {
-		for (i = new_nr_hw_queues; i < set->nr_hw_queues; i++)
-			__blk_mq_free_map_and_rqs(set, i);
+	if (set->nr_hw_queues >= new_nr_hw_queues)
 		goto done;
-	}

 	new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
 				GFP_KERNEL, set->numa_node);
@ -4719,7 +4716,8 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 {
 	struct request_queue *q;
 	LIST_HEAD(head);
-	int prev_nr_hw_queues;
+	int prev_nr_hw_queues = set->nr_hw_queues;
+	int i;

 	lockdep_assert_held(&set->tag_list_lock);

@ -4746,7 +4744,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 		blk_mq_sysfs_unregister_hctxs(q);
 	}

-	prev_nr_hw_queues = set->nr_hw_queues;
 	if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
 		goto reregister;

@ -4781,6 +4778,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,

 	list_for_each_entry(q, &set->tag_list, tag_set_list)
 		blk_mq_unfreeze_queue(q);
+
+	/* Free the excess tags when nr_hw_queues shrink. */
+	for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++)
+		__blk_mq_free_map_and_rqs(set, i);
 }

 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
--- a/drivers/Makefile
+++ b/drivers/Makefile
@ -46,7 +46,7 @@ obj-$(CONFIG_DMADEVICES)	+= dma/

 # SOC specific infrastructure drivers.
 obj-y				+= soc/
-obj-$(CONFIG_PM_GENERIC_DOMAINS)	+= genpd/
+obj-$(CONFIG_PM_GENERIC_DOMAINS)	+= pmdomain/

 obj-y				+= virtio/
 obj-$(CONFIG_VDPA)		+= vdpa/
--- a/drivers/accel/drm_accel.c
+++ b/drivers/accel/drm_accel.c
@ -79,29 +79,30 @@ static const struct drm_info_list accel_debugfs_list[] = {
 #define ACCEL_DEBUGFS_ENTRIES ARRAY_SIZE(accel_debugfs_list)

 /**
- * accel_debugfs_init() - Initialize debugfs for accel minor
- * @minor: Pointer to the drm_minor instance.
- * @minor_id: The minor's id
+ * accel_debugfs_init() - Initialize debugfs for device
+ * @dev: Pointer to the device instance.
 *
- * This function initializes the drm minor's debugfs members and creates
- * a root directory for the minor in debugfs. It also creates common files
- * for accelerators and calls the driver's debugfs init callback.
+ * This function creates a root directory for the device in debugfs.
 */
-void accel_debugfs_init(struct drm_minor *minor, int minor_id)
+void accel_debugfs_init(struct drm_device *dev)
 {
-	struct drm_device *dev = minor->dev;
-	char name[64];
+	drm_debugfs_dev_init(dev, accel_debugfs_root);
+}

-	INIT_LIST_HEAD(&minor->debugfs_list);
-	mutex_init(&minor->debugfs_lock);
-	sprintf(name, "%d", minor_id);
-	minor->debugfs_root = debugfs_create_dir(name, accel_debugfs_root);
+/**
+ * accel_debugfs_register() - Register debugfs for device
+ * @dev: Pointer to the device instance.
+ *
+ * Creates common files for accelerators.
+ */
+void accel_debugfs_register(struct drm_device *dev)
+{
+	struct drm_minor *minor = dev->accel;
+
+	minor->debugfs_root = dev->debugfs_root;

 	drm_debugfs_create_files(accel_debugfs_list, ACCEL_DEBUGFS_ENTRIES,
-				 minor->debugfs_root, minor);
-
-	if (dev->driver->debugfs_init)
-		dev->driver->debugfs_init(minor);
+				 dev->debugfs_root, minor);
 }

 /**
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@ -518,78 +518,52 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
 	lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);

 	ret = ivpu_pci_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize PCI device: %d\n", ret);
+	if (ret)
 		goto err_xa_destroy;
-	}

 	ret = ivpu_irq_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize IRQs: %d\n", ret);
+	if (ret)
 		goto err_xa_destroy;
-	}

 	/* Init basic HW info based on buttress registers which are accessible before power up */
 	ret = ivpu_hw_info_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize HW info: %d\n", ret);
+	if (ret)
 		goto err_xa_destroy;
-	}

 	/* Power up early so the rest of init code can access VPU registers */
 	ret = ivpu_hw_power_up(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
+	if (ret)
 		goto err_xa_destroy;
-	}

 	ret = ivpu_mmu_global_context_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize global MMU context: %d\n", ret);
+	if (ret)
 		goto err_power_down;
-	}

 	ret = ivpu_mmu_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize MMU device: %d\n", ret);
+	if (ret)
+		goto err_mmu_gctx_fini;
+
+	ret = ivpu_mmu_reserved_context_init(vdev);
+	if (ret)
 		goto err_mmu_gctx_fini;
-	}

 	ret = ivpu_fw_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize firmware: %d\n", ret);
-		goto err_mmu_gctx_fini;
-	}
+	if (ret)
+		goto err_mmu_rctx_fini;

 	ret = ivpu_ipc_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize IPC: %d\n", ret);
+	if (ret)
 		goto err_fw_fini;
-	}

-	ret = ivpu_pm_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize PM: %d\n", ret);
-		goto err_ipc_fini;
-	}
+	ivpu_pm_init(vdev);

 	ret = ivpu_job_done_thread_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret);
+	if (ret)
 		goto err_ipc_fini;
-	}
-
-	ret = ivpu_fw_load(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to load firmware: %d\n", ret);
-		goto err_job_done_thread_fini;
-	}

 	ret = ivpu_boot(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to boot: %d\n", ret);
+	if (ret)
 		goto err_job_done_thread_fini;
-	}

 	ivpu_pm_enable(vdev);

@ -601,6 +575,8 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
 	ivpu_ipc_fini(vdev);
 err_fw_fini:
 	ivpu_fw_fini(vdev);
+err_mmu_rctx_fini:
+	ivpu_mmu_reserved_context_fini(vdev);
 err_mmu_gctx_fini:
 	ivpu_mmu_global_context_fini(vdev);
 err_power_down:
@ -624,6 +600,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev)

 	ivpu_ipc_fini(vdev);
 	ivpu_fw_fini(vdev);
+	ivpu_mmu_reserved_context_fini(vdev);
 	ivpu_mmu_global_context_fini(vdev);

 	drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
@ -651,10 +628,8 @@ static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_set_drvdata(pdev, vdev);

 	ret = ivpu_dev_init(vdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to initialize VPU device: %d\n", ret);
+	if (ret)
 		return ret;
-	}

 	ret = drm_dev_register(&vdev->drm, 0);
 	if (ret) {
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@ -28,12 +28,13 @@
 #define IVPU_HW_37XX	37
 #define IVPU_HW_40XX	40

-#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0
-/* SSID 1 is used by the VPU to represent invalid context */
-#define IVPU_USER_CONTEXT_MIN_SSID   2
-#define IVPU_USER_CONTEXT_MAX_SSID   (IVPU_USER_CONTEXT_MIN_SSID + 63)
+#define IVPU_GLOBAL_CONTEXT_MMU_SSID   0
+/* SSID 1 is used by the VPU to represent reserved context */
+#define IVPU_RESERVED_CONTEXT_MMU_SSID 1
+#define IVPU_USER_CONTEXT_MIN_SSID     2
+#define IVPU_USER_CONTEXT_MAX_SSID     (IVPU_USER_CONTEXT_MIN_SSID + 63)

-#define IVPU_NUM_ENGINES	     2
+#define IVPU_NUM_ENGINES 2

 #define IVPU_PLATFORM_SILICON 0
 #define IVPU_PLATFORM_SIMICS  2
@ -75,6 +76,11 @@

 #define IVPU_WA(wa_name) (vdev->wa.wa_name)

+#define IVPU_PRINT_WA(wa_name) do {					\
+	if (IVPU_WA(wa_name))						\
+		ivpu_dbg(vdev, MISC, "Using WA: " #wa_name "\n");	\
+} while (0)
+
 struct ivpu_wa_table {
 	bool punit_disabled;
 	bool clear_runtime_mem;
@ -104,6 +110,7 @@ struct ivpu_device {
 	struct ivpu_pm_info *pm;

 	struct ivpu_mmu_context gctx;
+	struct ivpu_mmu_context rctx;
 	struct xarray context_xa;
 	struct xa_limit context_xa_limit;

@ -117,6 +124,7 @@ struct ivpu_device {
 		int jsm;
 		int tdr;
 		int reschedule_suspend;
+		int autosuspend;
 	} timeout;
 };

--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@ -301,6 +301,8 @@ int ivpu_fw_init(struct ivpu_device *vdev)
 	if (ret)
 		goto err_fw_release;

+	ivpu_fw_load(vdev);
+
 	return 0;

 err_fw_release:
@ -314,7 +316,7 @@ void ivpu_fw_fini(struct ivpu_device *vdev)
 	ivpu_fw_release(vdev);
 }

-int ivpu_fw_load(struct ivpu_device *vdev)
+void ivpu_fw_load(struct ivpu_device *vdev)
 {
 	struct ivpu_fw_info *fw = vdev->fw;
 	u64 image_end_offset = fw->image_load_offset + fw->image_size;
@ -331,8 +333,6 @@ int ivpu_fw_load(struct ivpu_device *vdev)
 	}

 	wmb(); /* Flush WC buffers after writing fw->mem */
-
-	return 0;
 }

 static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@ -31,7 +31,7 @@ struct ivpu_fw_info {

 int ivpu_fw_init(struct ivpu_device *vdev);
 void ivpu_fw_fini(struct ivpu_device *vdev);
-int ivpu_fw_load(struct ivpu_device *vdev);
+void ivpu_fw_load(struct ivpu_device *vdev);
 void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *bp);

 static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev)
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@ -104,6 +104,11 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev)

 	if (ivpu_device_id(vdev) == PCI_DEVICE_ID_MTL && ivpu_revision(vdev) < 4)
 		vdev->wa.interrupt_clear_with_0 = true;
+
+	IVPU_PRINT_WA(punit_disabled);
+	IVPU_PRINT_WA(clear_runtime_mem);
+	IVPU_PRINT_WA(d3hot_after_power_off);
+	IVPU_PRINT_WA(interrupt_clear_with_0);
 }

 static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
@ -113,11 +118,13 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
 		vdev->timeout.jsm = 50000;
 		vdev->timeout.tdr = 2000000;
 		vdev->timeout.reschedule_suspend = 1000;
+		vdev->timeout.autosuspend = -1;
 	} else {
 		vdev->timeout.boot = 1000;
 		vdev->timeout.jsm = 500;
 		vdev->timeout.tdr = 2000;
 		vdev->timeout.reschedule_suspend = 10;
+		vdev->timeout.autosuspend = 10;
 	}
 }

@ -345,10 +352,10 @@ static int ivpu_boot_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val)

 static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val)
 {
-	u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN);
+	u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN);

-	if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) ||
-	    !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val))
+	if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) ||
+	    !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val))
 		return -EIO;

 	return 0;
@ -356,10 +363,10 @@ static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val)

 static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val)
 {
-	u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QACCEPTN);
+	u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QACCEPTN);

-	if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) ||
-	    !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val))
+	if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) ||
+	    !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val))
 		return -EIO;

 	return 0;
@ -367,10 +374,10 @@ static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_va

 static int ivpu_boot_top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val)
 {
-	u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QDENY);
+	u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QDENY);

-	if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) ||
-	    !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val))
+	if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) ||
+	    !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val))
 		return -EIO;

 	return 0;
@ -423,15 +430,15 @@ static int ivpu_boot_host_ss_top_noc_drive(struct ivpu_device *vdev, bool enable
 	int ret;
 	u32 val;

-	val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN);
+	val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN);
 	if (enable) {
-		val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val);
-		val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
+		val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val);
+		val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
 	} else {
-		val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val);
-		val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
+		val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val);
+		val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
 	}
-	REGV_WR32(MTL_VPU_TOP_NOC_QREQN, val);
+	REGV_WR32(VPU_37XX_TOP_NOC_QREQN, val);

 	ret = ivpu_boot_top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0);
 	if (ret) {
@ -563,17 +570,17 @@ static void ivpu_boot_soc_cpu_boot(struct ivpu_device *vdev)
 {
 	u32 val;

-	val = REGV_RD32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC);
-	val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val);
+	val = REGV_RD32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC);
+	val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val);

-	val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val);
-	REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+	val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val);
+	REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);

-	val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
-	REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+	val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
+	REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);

-	val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
-	REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+	val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
+	REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);

 	val = vdev->fw->entry_point >> 9;
 	REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val);
@ -777,17 +784,17 @@ static void ivpu_hw_37xx_wdt_disable(struct ivpu_device *vdev)
 	u32 val;

 	/* Enable writing and set non-zero WDT value */
-	REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
-	REGV_WR32(MTL_VPU_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE);
+	REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
+	REGV_WR32(VPU_37XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE);

 	/* Enable writing and disable watchdog timer */
-	REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
-	REGV_WR32(MTL_VPU_CPU_SS_TIM_WDOG_EN, 0);
+	REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
+	REGV_WR32(VPU_37XX_CPU_SS_TIM_WDOG_EN, 0);

 	/* Now clear the timeout interrupt */
-	val = REGV_RD32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG);
-	val = REG_CLR_FLD(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val);
-	REGV_WR32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, val);
+	val = REGV_RD32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG);
+	val = REG_CLR_FLD(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val);
+	REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val);
 }

 static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config)
@ -834,10 +841,10 @@ static u32 ivpu_hw_37xx_reg_telemetry_enable_get(struct ivpu_device *vdev)

 static void ivpu_hw_37xx_reg_db_set(struct ivpu_device *vdev, u32 db_id)
 {
-	u32 reg_stride = MTL_VPU_CPU_SS_DOORBELL_1 - MTL_VPU_CPU_SS_DOORBELL_0;
-	u32 val = REG_FLD(MTL_VPU_CPU_SS_DOORBELL_0, SET);
+	u32 reg_stride = VPU_37XX_CPU_SS_DOORBELL_1 - VPU_37XX_CPU_SS_DOORBELL_0;
+	u32 val = REG_FLD(VPU_37XX_CPU_SS_DOORBELL_0, SET);

-	REGV_WR32I(MTL_VPU_CPU_SS_DOORBELL_0, reg_stride, db_id, val);
+	REGV_WR32I(VPU_37XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val);
 }

 static u32 ivpu_hw_37xx_reg_ipc_rx_addr_get(struct ivpu_device *vdev)
@ -854,7 +861,7 @@ static u32 ivpu_hw_37xx_reg_ipc_rx_count_get(struct ivpu_device *vdev)

 static void ivpu_hw_37xx_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr)
 {
-	REGV_WR32(MTL_VPU_CPU_SS_TIM_IPC_FIFO, vpu_addr);
+	REGV_WR32(VPU_37XX_CPU_SS_TIM_IPC_FIFO, vpu_addr);
 }

 static void ivpu_hw_37xx_irq_clear(struct ivpu_device *vdev)
--- a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
+++ b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
@ -3,70 +3,70 @@
 * Copyright (C) 2020-2023 Intel Corporation
 */

-#ifndef __IVPU_HW_MTL_REG_H__
-#define __IVPU_HW_MTL_REG_H__
+#ifndef __IVPU_HW_37XX_REG_H__
+#define __IVPU_HW_37XX_REG_H__

 #include <linux/bits.h>

-#define VPU_37XX_BUTTRESS_INTERRUPT_TYPE					0x00000000u
+#define VPU_37XX_BUTTRESS_INTERRUPT_TYPE				0x00000000u

-#define VPU_37XX_BUTTRESS_INTERRUPT_STAT					0x00000004u
-#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK			BIT_MASK(0)
+#define VPU_37XX_BUTTRESS_INTERRUPT_STAT				0x00000004u
+#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK		BIT_MASK(0)
 #define VPU_37XX_BUTTRESS_INTERRUPT_STAT_ATS_ERR_MASK			BIT_MASK(1)
 #define VPU_37XX_BUTTRESS_INTERRUPT_STAT_UFI_ERR_MASK			BIT_MASK(2)

-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0					0x00000008u
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK			GENMASK(15, 0)
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK			GENMASK(31, 16)
+#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0				0x00000008u
+#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK		GENMASK(15, 0)
+#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK		GENMASK(31, 16)

-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1					0x0000000cu
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK			GENMASK(15, 0)
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK				GENMASK(31, 16)
+#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1				0x0000000cu
+#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK		GENMASK(15, 0)
+#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK			GENMASK(31, 16)

-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2					0x00000010u
+#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2				0x00000010u
 #define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2_CONFIG_MASK			GENMASK(15, 0)

-#define VPU_37XX_BUTTRESS_WP_REQ_CMD						0x00000014u
+#define VPU_37XX_BUTTRESS_WP_REQ_CMD					0x00000014u
 #define VPU_37XX_BUTTRESS_WP_REQ_CMD_SEND_MASK				BIT_MASK(0)

 #define VPU_37XX_BUTTRESS_WP_DOWNLOAD					0x00000018u
 #define VPU_37XX_BUTTRESS_WP_DOWNLOAD_TARGET_RATIO_MASK			GENMASK(15, 0)

 #define VPU_37XX_BUTTRESS_CURRENT_PLL					0x0000001cu
-#define VPU_37XX_BUTTRESS_CURRENT_PLL_RATIO_MASK				GENMASK(15, 0)
+#define VPU_37XX_BUTTRESS_CURRENT_PLL_RATIO_MASK			GENMASK(15, 0)

-#define VPU_37XX_BUTTRESS_PLL_ENABLE						0x00000020u
+#define VPU_37XX_BUTTRESS_PLL_ENABLE					0x00000020u

-#define VPU_37XX_BUTTRESS_FMIN_FUSE						0x00000024u
-#define VPU_37XX_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK				GENMASK(7, 0)
-#define VPU_37XX_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK				GENMASK(15, 8)
+#define VPU_37XX_BUTTRESS_FMIN_FUSE					0x00000024u
+#define VPU_37XX_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK			GENMASK(7, 0)
+#define VPU_37XX_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK			GENMASK(15, 8)

-#define VPU_37XX_BUTTRESS_FMAX_FUSE						0x00000028u
-#define VPU_37XX_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK				GENMASK(7, 0)
+#define VPU_37XX_BUTTRESS_FMAX_FUSE					0x00000028u
+#define VPU_37XX_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK			GENMASK(7, 0)

-#define VPU_37XX_BUTTRESS_TILE_FUSE						0x0000002cu
+#define VPU_37XX_BUTTRESS_TILE_FUSE					0x0000002cu
 #define VPU_37XX_BUTTRESS_TILE_FUSE_VALID_MASK				BIT_MASK(0)
-#define VPU_37XX_BUTTRESS_TILE_FUSE_SKU_MASK					GENMASK(3, 2)
+#define VPU_37XX_BUTTRESS_TILE_FUSE_SKU_MASK				GENMASK(3, 2)

-#define VPU_37XX_BUTTRESS_LOCAL_INT_MASK					0x00000030u
-#define VPU_37XX_BUTTRESS_GLOBAL_INT_MASK					0x00000034u
+#define VPU_37XX_BUTTRESS_LOCAL_INT_MASK				0x00000030u
+#define VPU_37XX_BUTTRESS_GLOBAL_INT_MASK				0x00000034u

-#define VPU_37XX_BUTTRESS_PLL_STATUS						0x00000040u
+#define VPU_37XX_BUTTRESS_PLL_STATUS					0x00000040u
 #define VPU_37XX_BUTTRESS_PLL_STATUS_LOCK_MASK				BIT_MASK(1)

-#define VPU_37XX_BUTTRESS_VPU_STATUS						0x00000044u
+#define VPU_37XX_BUTTRESS_VPU_STATUS					0x00000044u
 #define VPU_37XX_BUTTRESS_VPU_STATUS_READY_MASK				BIT_MASK(0)
 #define VPU_37XX_BUTTRESS_VPU_STATUS_IDLE_MASK				BIT_MASK(1)

-#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL					0x00000060u
-#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_INPROGRESS_MASK			BIT_MASK(0)
-#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_I3_MASK				BIT_MASK(2)
+#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL				0x00000060u
+#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_INPROGRESS_MASK		BIT_MASK(0)
+#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_I3_MASK			BIT_MASK(2)

 #define VPU_37XX_BUTTRESS_VPU_IP_RESET					0x00000050u
-#define VPU_37XX_BUTTRESS_VPU_IP_RESET_TRIGGER_MASK				BIT_MASK(0)
+#define VPU_37XX_BUTTRESS_VPU_IP_RESET_TRIGGER_MASK			BIT_MASK(0)

 #define VPU_37XX_BUTTRESS_VPU_TELEMETRY_OFFSET				0x00000080u
-#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_SIZE					0x00000084u
+#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_SIZE				0x00000084u
 #define VPU_37XX_BUTTRESS_VPU_TELEMETRY_ENABLE				0x00000088u

 #define VPU_37XX_BUTTRESS_ATS_ERR_LOG_0					0x000000a0u
@ -74,9 +74,9 @@
 #define VPU_37XX_BUTTRESS_ATS_ERR_CLEAR					0x000000a8u

 #define VPU_37XX_BUTTRESS_UFI_ERR_LOG					0x000000b0u
-#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_CQ_ID_MASK				GENMASK(11, 0)
-#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_AXI_ID_MASK				GENMASK(19, 12)
-#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_OPCODE_MASK				GENMASK(24, 20)
+#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_CQ_ID_MASK			GENMASK(11, 0)
+#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_AXI_ID_MASK			GENMASK(19, 12)
+#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_OPCODE_MASK			GENMASK(24, 20)

 #define VPU_37XX_BUTTRESS_UFI_ERR_CLEAR					0x000000b4u

@ -113,17 +113,17 @@
 #define VPU_37XX_HOST_SS_NOC_QDENY					0x0000015cu
 #define VPU_37XX_HOST_SS_NOC_QDENY_TOP_SOCMMIO_MASK			BIT_MASK(0)

-#define MTL_VPU_TOP_NOC_QREQN						0x00000160u
-#define MTL_VPU_TOP_NOC_QREQN_CPU_CTRL_MASK				BIT_MASK(0)
-#define MTL_VPU_TOP_NOC_QREQN_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
+#define VPU_37XX_TOP_NOC_QREQN						0x00000160u
+#define VPU_37XX_TOP_NOC_QREQN_CPU_CTRL_MASK				BIT_MASK(0)
+#define VPU_37XX_TOP_NOC_QREQN_HOSTIF_L2CACHE_MASK			BIT_MASK(1)

-#define MTL_VPU_TOP_NOC_QACCEPTN					0x00000164u
-#define MTL_VPU_TOP_NOC_QACCEPTN_CPU_CTRL_MASK				BIT_MASK(0)
-#define MTL_VPU_TOP_NOC_QACCEPTN_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
+#define VPU_37XX_TOP_NOC_QACCEPTN					0x00000164u
+#define VPU_37XX_TOP_NOC_QACCEPTN_CPU_CTRL_MASK				BIT_MASK(0)
+#define VPU_37XX_TOP_NOC_QACCEPTN_HOSTIF_L2CACHE_MASK			BIT_MASK(1)

-#define MTL_VPU_TOP_NOC_QDENY						0x00000168u
-#define MTL_VPU_TOP_NOC_QDENY_CPU_CTRL_MASK				BIT_MASK(0)
-#define MTL_VPU_TOP_NOC_QDENY_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
+#define VPU_37XX_TOP_NOC_QDENY						0x00000168u
+#define VPU_37XX_TOP_NOC_QDENY_CPU_CTRL_MASK				BIT_MASK(0)
+#define VPU_37XX_TOP_NOC_QDENY_HOSTIF_L2CACHE_MASK			BIT_MASK(1)

 #define VPU_37XX_HOST_SS_FW_SOC_IRQ_EN					0x00000170u
 #define VPU_37XX_HOST_SS_FW_SOC_IRQ_EN_CSS_ROM_CMX_MASK			BIT_MASK(0)
@ -140,9 +140,9 @@
 #define VPU_37XX_HOST_SS_ICB_STATUS_0_TIMER_2_INT_MASK			BIT_MASK(2)
 #define VPU_37XX_HOST_SS_ICB_STATUS_0_TIMER_3_INT_MASK			BIT_MASK(3)
 #define VPU_37XX_HOST_SS_ICB_STATUS_0_HOST_IPC_FIFO_INT_MASK		BIT_MASK(4)
-#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_0_INT_MASK			BIT_MASK(5)
-#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_1_INT_MASK			BIT_MASK(6)
-#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_2_INT_MASK			BIT_MASK(7)
+#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_0_INT_MASK		BIT_MASK(5)
+#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_1_INT_MASK		BIT_MASK(6)
+#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_2_INT_MASK		BIT_MASK(7)
 #define VPU_37XX_HOST_SS_ICB_STATUS_0_NOC_FIREWALL_INT_MASK		BIT_MASK(8)
 #define VPU_37XX_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_0_INT_MASK	BIT_MASK(30)
 #define VPU_37XX_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_1_INT_MASK	BIT_MASK(31)
@ -164,14 +164,14 @@
 #define VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT_FILL_LEVEL_MASK		GENMASK(23, 16)
 #define VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT_RSVD0_MASK			GENMASK(31, 24)

-#define VPU_37XX_HOST_SS_AON_PWR_ISO_EN0					0x00030020u
+#define VPU_37XX_HOST_SS_AON_PWR_ISO_EN0				0x00030020u
 #define VPU_37XX_HOST_SS_AON_PWR_ISO_EN0_MSS_CPU_MASK			BIT_MASK(3)

 #define VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0				0x00030024u
-#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0_MSS_CPU_MASK			BIT_MASK(3)
+#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0_MSS_CPU_MASK		BIT_MASK(3)

 #define VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0			0x00030028u
-#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0_MSS_CPU_MASK		BIT_MASK(3)
+#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0_MSS_CPU_MASK	BIT_MASK(3)

 #define VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0				0x0003002cu
 #define VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0_MSS_CPU_MASK		BIT_MASK(3)
@ -187,47 +187,14 @@
 #define VPU_37XX_HOST_SS_LOADING_ADDRESS_LO_IOSF_RS_ID_MASK		GENMASK(2, 1)
 #define VPU_37XX_HOST_SS_LOADING_ADDRESS_LO_IMAGE_LOCATION_MASK		GENMASK(31, 3)

-#define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR				0x00082020u
+#define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR			0x00082020u
 #define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR_FINAL_PLL_FREQ_MASK	GENMASK(15, 0)
 #define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR_CONFIG_ID_MASK		GENMASK(31, 16)

-#define VPU_37XX_HOST_MMU_IDR0						0x00200000u
-#define VPU_37XX_HOST_MMU_IDR1						0x00200004u
-#define VPU_37XX_HOST_MMU_IDR3						0x0020000cu
-#define VPU_37XX_HOST_MMU_IDR5						0x00200014u
-#define VPU_37XX_HOST_MMU_CR0						0x00200020u
-#define VPU_37XX_HOST_MMU_CR0ACK						0x00200024u
-#define VPU_37XX_HOST_MMU_CR1						0x00200028u
-#define VPU_37XX_HOST_MMU_CR2						0x0020002cu
-#define VPU_37XX_HOST_MMU_IRQ_CTRL					0x00200050u
-#define VPU_37XX_HOST_MMU_IRQ_CTRLACK					0x00200054u
-
-#define VPU_37XX_HOST_MMU_GERROR						0x00200060u
-#define VPU_37XX_HOST_MMU_GERROR_CMDQ_MASK				BIT_MASK(0)
-#define VPU_37XX_HOST_MMU_GERROR_EVTQ_ABT_MASK				BIT_MASK(2)
-#define VPU_37XX_HOST_MMU_GERROR_PRIQ_ABT_MASK				BIT_MASK(3)
-#define VPU_37XX_HOST_MMU_GERROR_MSI_CMDQ_ABT_MASK			BIT_MASK(4)
-#define VPU_37XX_HOST_MMU_GERROR_MSI_EVTQ_ABT_MASK			BIT_MASK(5)
-#define VPU_37XX_HOST_MMU_GERROR_MSI_PRIQ_ABT_MASK			BIT_MASK(6)
-#define VPU_37XX_HOST_MMU_GERROR_MSI_ABT_MASK				BIT_MASK(7)
-
-#define VPU_37XX_HOST_MMU_GERRORN					0x00200064u
-
-#define VPU_37XX_HOST_MMU_STRTAB_BASE					0x00200080u
-#define VPU_37XX_HOST_MMU_STRTAB_BASE_CFG				0x00200088u
-#define VPU_37XX_HOST_MMU_CMDQ_BASE					0x00200090u
-#define VPU_37XX_HOST_MMU_CMDQ_PROD					0x00200098u
-#define VPU_37XX_HOST_MMU_CMDQ_CONS					0x0020009cu
-#define VPU_37XX_HOST_MMU_EVTQ_BASE					0x002000a0u
-#define VPU_37XX_HOST_MMU_EVTQ_PROD					0x002000a8u
-#define VPU_37XX_HOST_MMU_EVTQ_CONS					0x002000acu
-#define VPU_37XX_HOST_MMU_EVTQ_PROD_SEC					(0x002000a8u + SZ_64K)
-#define VPU_37XX_HOST_MMU_EVTQ_CONS_SEC					(0x002000acu + SZ_64K)
-
 #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES				0x00360000u
 #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_CACHE_OVERRIDE_EN_MASK	BIT_MASK(0)
-#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AWCACHE_OVERRIDE_MASK		BIT_MASK(1)
-#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_ARCACHE_OVERRIDE_MASK		BIT_MASK(2)
+#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AWCACHE_OVERRIDE_MASK	BIT_MASK(1)
+#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_ARCACHE_OVERRIDE_MASK	BIT_MASK(2)
 #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_NOSNOOP_OVERRIDE_EN_MASK	BIT_MASK(3)
 #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AW_NOSNOOP_OVERRIDE_MASK	BIT_MASK(4)
 #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AR_NOSNOOP_OVERRIDE_MASK	BIT_MASK(5)
@ -246,36 +213,36 @@
 #define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU4_AWMMUSSIDV_MASK		BIT_MASK(8)
 #define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU4_ARMMUSSIDV_MASK		BIT_MASK(9)

-#define MTL_VPU_CPU_SS_DSU_LEON_RT_BASE					0x04000000u
-#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_CTRL				0x04000000u
-#define MTL_VPU_CPU_SS_DSU_LEON_RT_PC_REG				0x04400010u
-#define MTL_VPU_CPU_SS_DSU_LEON_RT_NPC_REG				0x04400014u
-#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_TRAP_REG				0x04400020u
+#define VPU_37XX_CPU_SS_DSU_LEON_RT_BASE				0x04000000u
+#define VPU_37XX_CPU_SS_DSU_LEON_RT_DSU_CTRL				0x04000000u
+#define VPU_37XX_CPU_SS_DSU_LEON_RT_PC_REG				0x04400010u
+#define VPU_37XX_CPU_SS_DSU_LEON_RT_NPC_REG				0x04400014u
+#define VPU_37XX_CPU_SS_DSU_LEON_RT_DSU_TRAP_REG			0x04400020u

-#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET				0x06010004u
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET_CPU_DSU_MASK			BIT_MASK(1)
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_CLK_SET				0x06010004u
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_CLK_SET_CPU_DSU_MASK			BIT_MASK(1)

-#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR				0x06010018u
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR_CPU_DSU_MASK			BIT_MASK(1)
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_RST_CLR				0x06010018u
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_RST_CLR_CPU_DSU_MASK			BIT_MASK(1)

-#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC				0x06010040u
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN0_MASK		BIT_MASK(0)
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME0_MASK		BIT_MASK(1)
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN1_MASK		BIT_MASK(2)
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME1_MASK		BIT_MASK(3)
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTVEC_MASK		GENMASK(31, 4)
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC				0x06010040u
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN0_MASK	BIT_MASK(0)
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME0_MASK	BIT_MASK(1)
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN1_MASK	BIT_MASK(2)
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME1_MASK	BIT_MASK(3)
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTVEC_MASK		GENMASK(31, 4)

-#define MTL_VPU_CPU_SS_TIM_WATCHDOG					0x0602009cu
-#define MTL_VPU_CPU_SS_TIM_WDOG_EN					0x060200a4u
-#define MTL_VPU_CPU_SS_TIM_SAFE						0x060200a8u
-#define MTL_VPU_CPU_SS_TIM_IPC_FIFO					0x060200f0u
+#define VPU_37XX_CPU_SS_TIM_WATCHDOG					0x0602009cu
+#define VPU_37XX_CPU_SS_TIM_WDOG_EN					0x060200a4u
+#define VPU_37XX_CPU_SS_TIM_SAFE					0x060200a8u
+#define VPU_37XX_CPU_SS_TIM_IPC_FIFO					0x060200f0u

-#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG					0x06021008u
-#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK		BIT_MASK(9)
+#define VPU_37XX_CPU_SS_TIM_GEN_CONFIG					0x06021008u
+#define VPU_37XX_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK		BIT_MASK(9)

-#define MTL_VPU_CPU_SS_DOORBELL_0					0x06300000u
-#define MTL_VPU_CPU_SS_DOORBELL_0_SET_MASK				BIT_MASK(0)
+#define VPU_37XX_CPU_SS_DOORBELL_0					0x06300000u
+#define VPU_37XX_CPU_SS_DOORBELL_0_SET_MASK				BIT_MASK(0)

-#define MTL_VPU_CPU_SS_DOORBELL_1					0x06301000u
+#define VPU_37XX_CPU_SS_DOORBELL_1					0x06301000u

-#endif /* __IVPU_HW_MTL_REG_H__ */
+#endif /* __IVPU_HW_37XX_REG_H__ */
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@ -126,6 +126,10 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev)

 	if (ivpu_hw_gen(vdev) == IVPU_HW_40XX)
 		vdev->wa.disable_clock_relinquish = true;
+
+	IVPU_PRINT_WA(punit_disabled);
+	IVPU_PRINT_WA(clear_runtime_mem);
+	IVPU_PRINT_WA(disable_clock_relinquish);
 }

 static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
@ -135,16 +139,19 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
 		vdev->timeout.jsm = 50000;
 		vdev->timeout.tdr = 2000000;
 		vdev->timeout.reschedule_suspend = 1000;
+		vdev->timeout.autosuspend = -1;
 	} else if (ivpu_is_simics(vdev)) {
 		vdev->timeout.boot = 50;
 		vdev->timeout.jsm = 500;
 		vdev->timeout.tdr = 10000;
 		vdev->timeout.reschedule_suspend = 10;
+		vdev->timeout.autosuspend = -1;
 	} else {
 		vdev->timeout.boot = 1000;
 		vdev->timeout.jsm = 500;
 		vdev->timeout.tdr = 2000;
 		vdev->timeout.reschedule_suspend = 10;
+		vdev->timeout.autosuspend = 10;
 	}
 }

--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@ -426,15 +426,20 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
 int ivpu_ipc_init(struct ivpu_device *vdev)
 {
 	struct ivpu_ipc_info *ipc = vdev->ipc;
-	int ret = -ENOMEM;
+	int ret;

 	ipc->mem_tx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC);
-	if (!ipc->mem_tx)
-		return ret;
+	if (!ipc->mem_tx) {
+		ivpu_err(vdev, "Failed to allocate mem_tx\n");
+		return -ENOMEM;
+	}

 	ipc->mem_rx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC);
-	if (!ipc->mem_rx)
+	if (!ipc->mem_rx) {
+		ivpu_err(vdev, "Failed to allocate mem_rx\n");
+		ret = -ENOMEM;
 		goto err_free_tx;
+	}

 	ipc->mm_tx = devm_gen_pool_create(vdev->drm.dev, __ffs(IVPU_IPC_ALIGNMENT),
 					  -1, "TX_IPC_JSM");
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@ -7,12 +7,45 @@
 #include <linux/highmem.h>

 #include "ivpu_drv.h"
-#include "ivpu_hw_37xx_reg.h"
 #include "ivpu_hw_reg_io.h"
 #include "ivpu_mmu.h"
 #include "ivpu_mmu_context.h"
 #include "ivpu_pm.h"

+#define IVPU_MMU_REG_IDR0		      0x00200000u
+#define IVPU_MMU_REG_IDR1		      0x00200004u
+#define IVPU_MMU_REG_IDR3		      0x0020000cu
+#define IVPU_MMU_REG_IDR5		      0x00200014u
+#define IVPU_MMU_REG_CR0		      0x00200020u
+#define IVPU_MMU_REG_CR0ACK		      0x00200024u
+#define IVPU_MMU_REG_CR1		      0x00200028u
+#define IVPU_MMU_REG_CR2		      0x0020002cu
+#define IVPU_MMU_REG_IRQ_CTRL		      0x00200050u
+#define IVPU_MMU_REG_IRQ_CTRLACK	      0x00200054u
+
+#define IVPU_MMU_REG_GERROR		      0x00200060u
+#define IVPU_MMU_REG_GERROR_CMDQ_MASK	      BIT_MASK(0)
+#define IVPU_MMU_REG_GERROR_EVTQ_ABT_MASK     BIT_MASK(2)
+#define IVPU_MMU_REG_GERROR_PRIQ_ABT_MASK     BIT_MASK(3)
+#define IVPU_MMU_REG_GERROR_MSI_CMDQ_ABT_MASK BIT_MASK(4)
+#define IVPU_MMU_REG_GERROR_MSI_EVTQ_ABT_MASK BIT_MASK(5)
+#define IVPU_MMU_REG_GERROR_MSI_PRIQ_ABT_MASK BIT_MASK(6)
+#define IVPU_MMU_REG_GERROR_MSI_ABT_MASK      BIT_MASK(7)
+
+#define IVPU_MMU_REG_GERRORN		      0x00200064u
+
+#define IVPU_MMU_REG_STRTAB_BASE	      0x00200080u
+#define IVPU_MMU_REG_STRTAB_BASE_CFG	      0x00200088u
+#define IVPU_MMU_REG_CMDQ_BASE		      0x00200090u
+#define IVPU_MMU_REG_CMDQ_PROD		      0x00200098u
+#define IVPU_MMU_REG_CMDQ_CONS		      0x0020009cu
+#define IVPU_MMU_REG_EVTQ_BASE		      0x002000a0u
+#define IVPU_MMU_REG_EVTQ_PROD		      0x002000a8u
+#define IVPU_MMU_REG_EVTQ_CONS		      0x002000acu
+#define IVPU_MMU_REG_EVTQ_PROD_SEC	      (0x002000a8u + SZ_64K)
+#define IVPU_MMU_REG_EVTQ_CONS_SEC	      (0x002000acu + SZ_64K)
+#define IVPU_MMU_REG_CMDQ_CONS_ERR_MASK	      GENMASK(30, 24)
+
 #define IVPU_MMU_IDR0_REF		0x080f3e0f
 #define IVPU_MMU_IDR0_REF_SIMICS	0x080f3e1f
 #define IVPU_MMU_IDR1_REF		0x0e739d18
@ -186,13 +219,13 @@
 #define IVPU_MMU_REG_TIMEOUT_US		(10 * USEC_PER_MSEC)
 #define IVPU_MMU_QUEUE_TIMEOUT_US	(100 * USEC_PER_MSEC)

-#define IVPU_MMU_GERROR_ERR_MASK ((REG_FLD(VPU_37XX_HOST_MMU_GERROR, CMDQ)) | \
-				  (REG_FLD(VPU_37XX_HOST_MMU_GERROR, EVTQ_ABT)) | \
-				  (REG_FLD(VPU_37XX_HOST_MMU_GERROR, PRIQ_ABT)) | \
-				  (REG_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_CMDQ_ABT)) | \
-				  (REG_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_EVTQ_ABT)) | \
-				  (REG_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_PRIQ_ABT)) | \
-				  (REG_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_ABT)))
+#define IVPU_MMU_GERROR_ERR_MASK ((REG_FLD(IVPU_MMU_REG_GERROR, CMDQ)) | \
+				  (REG_FLD(IVPU_MMU_REG_GERROR, EVTQ_ABT)) | \
+				  (REG_FLD(IVPU_MMU_REG_GERROR, PRIQ_ABT)) | \
+				  (REG_FLD(IVPU_MMU_REG_GERROR, MSI_CMDQ_ABT)) | \
+				  (REG_FLD(IVPU_MMU_REG_GERROR, MSI_EVTQ_ABT)) | \
+				  (REG_FLD(IVPU_MMU_REG_GERROR, MSI_PRIQ_ABT)) | \
+				  (REG_FLD(IVPU_MMU_REG_GERROR, MSI_ABT)))

 static char *ivpu_mmu_event_to_str(u32 cmd)
 {
@ -250,15 +283,15 @@ static void ivpu_mmu_config_check(struct ivpu_device *vdev)
 	else
 		val_ref = IVPU_MMU_IDR0_REF;

-	val = REGV_RD32(VPU_37XX_HOST_MMU_IDR0);
+	val = REGV_RD32(IVPU_MMU_REG_IDR0);
 	if (val != val_ref)
 		ivpu_dbg(vdev, MMU, "IDR0 0x%x != IDR0_REF 0x%x\n", val, val_ref);

-	val = REGV_RD32(VPU_37XX_HOST_MMU_IDR1);
+	val = REGV_RD32(IVPU_MMU_REG_IDR1);
 	if (val != IVPU_MMU_IDR1_REF)
 		ivpu_dbg(vdev, MMU, "IDR1 0x%x != IDR1_REF 0x%x\n", val, IVPU_MMU_IDR1_REF);

-	val = REGV_RD32(VPU_37XX_HOST_MMU_IDR3);
+	val = REGV_RD32(IVPU_MMU_REG_IDR3);
 	if (val != IVPU_MMU_IDR3_REF)
 		ivpu_dbg(vdev, MMU, "IDR3 0x%x != IDR3_REF 0x%x\n", val, IVPU_MMU_IDR3_REF);

@ -269,7 +302,7 @@ static void ivpu_mmu_config_check(struct ivpu_device *vdev)
 	else
 		val_ref = IVPU_MMU_IDR5_REF;

-	val = REGV_RD32(VPU_37XX_HOST_MMU_IDR5);
+	val = REGV_RD32(IVPU_MMU_REG_IDR5);
 	if (val != val_ref)
 		ivpu_dbg(vdev, MMU, "IDR5 0x%x != IDR5_REF 0x%x\n", val, val_ref);
 }
@ -396,18 +429,18 @@ static int ivpu_mmu_irqs_setup(struct ivpu_device *vdev)
 	u32 irq_ctrl = IVPU_MMU_IRQ_EVTQ_EN | IVPU_MMU_IRQ_GERROR_EN;
 	int ret;

-	ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_IRQ_CTRL, 0);
+	ret = ivpu_mmu_reg_write(vdev, IVPU_MMU_REG_IRQ_CTRL, 0);
 	if (ret)
 		return ret;

-	return ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_IRQ_CTRL, irq_ctrl);
+	return ivpu_mmu_reg_write(vdev, IVPU_MMU_REG_IRQ_CTRL, irq_ctrl);
 }

 static int ivpu_mmu_cmdq_wait_for_cons(struct ivpu_device *vdev)
 {
 	struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq;

-	return REGV_POLL(VPU_37XX_HOST_MMU_CMDQ_CONS, cmdq->cons, (cmdq->prod == cmdq->cons),
+	return REGV_POLL(IVPU_MMU_REG_CMDQ_CONS, cmdq->cons, (cmdq->prod == cmdq->cons),
 			 IVPU_MMU_QUEUE_TIMEOUT_US);
 }

@ -447,7 +480,7 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev)
 		return ret;

 	clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE);
-	REGV_WR32(VPU_37XX_HOST_MMU_CMDQ_PROD, q->prod);
+	REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, q->prod);

 	ret = ivpu_mmu_cmdq_wait_for_cons(vdev);
 	if (ret)
@ -495,7 +528,7 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
 	mmu->evtq.prod = 0;
 	mmu->evtq.cons = 0;

-	ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, 0);
+	ret = ivpu_mmu_reg_write(vdev, IVPU_MMU_REG_CR0, 0);
 	if (ret)
 		return ret;

@ -505,17 +538,17 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
 	      FIELD_PREP(IVPU_MMU_CR1_QUEUE_SH, IVPU_MMU_SH_ISH) |
 	      FIELD_PREP(IVPU_MMU_CR1_QUEUE_OC, IVPU_MMU_CACHE_WB) |
 	      FIELD_PREP(IVPU_MMU_CR1_QUEUE_IC, IVPU_MMU_CACHE_WB);
-	REGV_WR32(VPU_37XX_HOST_MMU_CR1, val);
+	REGV_WR32(IVPU_MMU_REG_CR1, val);

-	REGV_WR64(VPU_37XX_HOST_MMU_STRTAB_BASE, mmu->strtab.dma_q);
-	REGV_WR32(VPU_37XX_HOST_MMU_STRTAB_BASE_CFG, mmu->strtab.base_cfg);
+	REGV_WR64(IVPU_MMU_REG_STRTAB_BASE, mmu->strtab.dma_q);
+	REGV_WR32(IVPU_MMU_REG_STRTAB_BASE_CFG, mmu->strtab.base_cfg);

-	REGV_WR64(VPU_37XX_HOST_MMU_CMDQ_BASE, mmu->cmdq.dma_q);
-	REGV_WR32(VPU_37XX_HOST_MMU_CMDQ_PROD, 0);
-	REGV_WR32(VPU_37XX_HOST_MMU_CMDQ_CONS, 0);
+	REGV_WR64(IVPU_MMU_REG_CMDQ_BASE, mmu->cmdq.dma_q);
+	REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, 0);
+	REGV_WR32(IVPU_MMU_REG_CMDQ_CONS, 0);

 	val = IVPU_MMU_CR0_CMDQEN;
-	ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, val);
+	ret = ivpu_mmu_reg_write(vdev, IVPU_MMU_REG_CR0, val);
 	if (ret)
 		return ret;

@ -531,17 +564,17 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
 	if (ret)
 		return ret;

-	REGV_WR64(VPU_37XX_HOST_MMU_EVTQ_BASE, mmu->evtq.dma_q);
-	REGV_WR32(VPU_37XX_HOST_MMU_EVTQ_PROD_SEC, 0);
-	REGV_WR32(VPU_37XX_HOST_MMU_EVTQ_CONS_SEC, 0);
+	REGV_WR64(IVPU_MMU_REG_EVTQ_BASE, mmu->evtq.dma_q);
+	REGV_WR32(IVPU_MMU_REG_EVTQ_PROD_SEC, 0);
+	REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, 0);

 	val |= IVPU_MMU_CR0_EVTQEN;
-	ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, val);
+	ret = ivpu_mmu_reg_write(vdev, IVPU_MMU_REG_CR0, val);
 	if (ret)
 		return ret;

 	val |= IVPU_MMU_CR0_ATSCHK;
-	ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, val);
+	ret = ivpu_mmu_reg_write(vdev, IVPU_MMU_REG_CR0, val);
 	if (ret)
 		return ret;

@ -550,7 +583,7 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
 		return ret;

 	val |= IVPU_MMU_CR0_SMMUEN;
-	return ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, val);
+	return ivpu_mmu_reg_write(vdev, IVPU_MMU_REG_CR0, val);
 }

 static void ivpu_mmu_strtab_link_cd(struct ivpu_device *vdev, u32 sid)
@ -801,14 +834,14 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev)
 	u32 idx = IVPU_MMU_Q_IDX(evtq->cons);
 	u32 *evt = evtq->base + (idx * IVPU_MMU_EVTQ_CMD_SIZE);

-	evtq->prod = REGV_RD32(VPU_37XX_HOST_MMU_EVTQ_PROD_SEC);
+	evtq->prod = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC);
 	if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT))
 		return NULL;

 	clflush_cache_range(evt, IVPU_MMU_EVTQ_CMD_SIZE);

 	evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK;
-	REGV_WR32(VPU_37XX_HOST_MMU_EVTQ_CONS_SEC, evtq->cons);
+	REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, evtq->cons);

 	return evt;
 }
@ -841,35 +874,35 @@ void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev)

 	ivpu_dbg(vdev, IRQ, "MMU error\n");

-	gerror_val = REGV_RD32(VPU_37XX_HOST_MMU_GERROR);
-	gerrorn_val = REGV_RD32(VPU_37XX_HOST_MMU_GERRORN);
+	gerror_val = REGV_RD32(IVPU_MMU_REG_GERROR);
+	gerrorn_val = REGV_RD32(IVPU_MMU_REG_GERRORN);

 	active = gerror_val ^ gerrorn_val;
 	if (!(active & IVPU_MMU_GERROR_ERR_MASK))
 		return;

-	if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_ABT, active))
+	if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_ABT, active))
 		ivpu_warn_ratelimited(vdev, "MMU MSI ABT write aborted\n");

-	if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_PRIQ_ABT, active))
+	if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_PRIQ_ABT, active))
 		ivpu_warn_ratelimited(vdev, "MMU PRIQ MSI ABT write aborted\n");

-	if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_EVTQ_ABT, active))
+	if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_EVTQ_ABT, active))
 		ivpu_warn_ratelimited(vdev, "MMU EVTQ MSI ABT write aborted\n");

-	if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_CMDQ_ABT, active))
+	if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_CMDQ_ABT, active))
 		ivpu_warn_ratelimited(vdev, "MMU CMDQ MSI ABT write aborted\n");

-	if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, PRIQ_ABT, active))
+	if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, PRIQ_ABT, active))
 		ivpu_err_ratelimited(vdev, "MMU PRIQ write aborted\n");

-	if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, EVTQ_ABT, active))
+	if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, EVTQ_ABT, active))
 		ivpu_err_ratelimited(vdev, "MMU EVTQ write aborted\n");

-	if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, CMDQ, active))
+	if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, CMDQ, active))
 		ivpu_err_ratelimited(vdev, "MMU CMDQ write aborted\n");

-	REGV_WR32(VPU_37XX_HOST_MMU_GERRORN, gerror_val);
+	REGV_WR32(IVPU_MMU_REG_GERRORN, gerror_val);
 }

 int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable)
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@ -427,8 +427,10 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3
 	INIT_LIST_HEAD(&ctx->bo_list);

 	ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
-	if (ret)
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize pgtable for ctx %u: %d\n", context_id, ret);
 		return ret;
+	}

 	if (!context_id) {
 		start = vdev->hw->ranges.global.start;
@ -467,6 +469,16 @@ void ivpu_mmu_global_context_fini(struct ivpu_device *vdev)
 	return ivpu_mmu_context_fini(vdev, &vdev->gctx);
 }

+int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev)
+{
+	return ivpu_mmu_user_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID);
+}
+
+void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev)
+{
+	return ivpu_mmu_user_context_fini(vdev, &vdev->rctx);
+}
+
 void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)
 {
 	struct ivpu_file_priv *file_priv;
@ -488,13 +500,13 @@ int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context

 	ret = ivpu_mmu_context_init(vdev, ctx, ctx_id);
 	if (ret) {
-		ivpu_err(vdev, "Failed to initialize context: %d\n", ret);
+		ivpu_err(vdev, "Failed to initialize context %u: %d\n", ctx_id, ret);
 		return ret;
 	}

 	ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable);
 	if (ret) {
-		ivpu_err(vdev, "Failed to set page table: %d\n", ret);
+		ivpu_err(vdev, "Failed to set page table for context %u: %d\n", ctx_id, ret);
 		goto err_context_fini;
 	}

--- a/drivers/accel/ivpu/ivpu_mmu_context.h
+++ b/drivers/accel/ivpu/ivpu_mmu_context.h
@ -32,6 +32,8 @@ struct ivpu_mmu_context {

 int ivpu_mmu_global_context_init(struct ivpu_device *vdev);
 void ivpu_mmu_global_context_fini(struct ivpu_device *vdev);
+int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev);
+void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev);

 int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id);
 void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@ -282,10 +282,11 @@ void ivpu_pm_reset_done_cb(struct pci_dev *pdev)
 	pm_runtime_put_autosuspend(vdev->drm.dev);
 }

-int ivpu_pm_init(struct ivpu_device *vdev)
+void ivpu_pm_init(struct ivpu_device *vdev)
 {
 	struct device *dev = vdev->drm.dev;
 	struct ivpu_pm_info *pm = vdev->pm;
+	int delay;

 	pm->vdev = vdev;
 	pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
@ -293,16 +294,15 @@ int ivpu_pm_init(struct ivpu_device *vdev)
 	atomic_set(&pm->in_reset, 0);
 	INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work);

-	pm_runtime_use_autosuspend(dev);
-
 	if (ivpu_disable_recovery)
-		pm_runtime_set_autosuspend_delay(dev, -1);
-	else if (ivpu_is_silicon(vdev))
-		pm_runtime_set_autosuspend_delay(dev, 100);
+		delay = -1;
 	else
-		pm_runtime_set_autosuspend_delay(dev, 60000);
+		delay = vdev->timeout.autosuspend;

-	return 0;
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, delay);
+
+	ivpu_dbg(vdev, PM, "Autosuspend delay = %d\n", delay);
 }

 void ivpu_pm_cancel_recovery(struct ivpu_device *vdev)
--- a/drivers/accel/ivpu/ivpu_pm.h
+++ b/drivers/accel/ivpu/ivpu_pm.h
@ -19,7 +19,7 @@ struct ivpu_pm_info {
 	u32 suspend_reschedule_counter;
 };

-int ivpu_pm_init(struct ivpu_device *vdev);
+void ivpu_pm_init(struct ivpu_device *vdev);
 void ivpu_pm_enable(struct ivpu_device *vdev);
 void ivpu_pm_disable(struct ivpu_device *vdev);
 void ivpu_pm_cancel_recovery(struct ivpu_device *vdev);
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@ -27,6 +27,9 @@
 #define QAIC_DBC_OFF(i)		((i) * QAIC_DBC_SIZE + QAIC_DBC_BASE)

 #define to_qaic_bo(obj) container_of(obj, struct qaic_bo, base)
+#define to_qaic_drm_device(dev) container_of(dev, struct qaic_drm_device, drm)
+#define to_drm(qddev) (&(qddev)->drm)
+#define to_accel_kdev(qddev) (to_drm(qddev)->accel->kdev) /* Return Linux device of accel node */

 extern bool datapath_polling;

@ -137,6 +140,8 @@ struct qaic_device {
 };

 struct qaic_drm_device {
+	/* The drm device struct of this drm device */
+	struct drm_device	drm;
 	/* Pointer to the root device struct driven by this driver */
 	struct qaic_device	*qdev;
 	/*
@ -146,8 +151,6 @@ struct qaic_drm_device {
 	 * device is the actual physical device
 	 */
 	s32			partition_id;
-	/* Pointer to the drm device struct of this drm device */
-	struct drm_device	*ddev;
 	/* Head in list of users who have opened this drm device */
 	struct list_head	users;
 	/* Synchronizes access to users list */
@ -158,8 +161,6 @@ struct qaic_bo {
 	struct drm_gem_object	base;
 	/* Scatter/gather table for allocate/imported BO */
 	struct sg_table		*sgt;
-	/* BO size requested by user. GEM object might be bigger in size. */
-	u64			size;
 	/* Head in list of slices of this BO */
 	struct list_head	slices;
 	/* Total nents, for all slices of this BO */
@ -221,7 +222,8 @@ struct qaic_bo {
 		 */
 		u32		queue_level_before;
 	} perf_stats;
-
+	/* Synchronizes BO operations */
+	struct mutex		lock;
 };

 struct bo_slice {
@ -277,6 +279,7 @@ int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *f
 int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
 int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
 int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
+int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
 void irq_polling_work(struct work_struct *work);

 #endif /* _QAIC_H_ */
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@ -154,6 +154,7 @@ static void free_slice(struct kref *kref)
 {
 	struct bo_slice *slice = container_of(kref, struct bo_slice, ref_count);

+	slice->bo->total_slice_nents -= slice->nents;
 	list_del(&slice->slice);
 	drm_gem_object_put(&slice->bo->base);
 	sg_free_table(slice->sgt);
@ -579,7 +580,7 @@ static void qaic_gem_print_info(struct drm_printer *p, unsigned int indent,
 {
 	struct qaic_bo *bo = to_qaic_bo(obj);

-	drm_printf_indent(p, indent, "user requested size=%llu\n", bo->size);
+	drm_printf_indent(p, indent, "BO DMA direction %d\n", bo->dir);
 }

 static const struct vm_operations_struct drm_vm_ops = {
@ -623,6 +624,7 @@ static void qaic_free_object(struct drm_gem_object *obj)
 		qaic_free_sgt(bo->sgt);
 	}

+	mutex_destroy(&bo->lock);
 	drm_gem_object_release(obj);
 	kfree(bo);
 }
@ -634,6 +636,19 @@ static const struct drm_gem_object_funcs qaic_gem_funcs = {
 	.vm_ops = &drm_vm_ops,
 };

+static void qaic_init_bo(struct qaic_bo *bo, bool reinit)
+{
+	if (reinit) {
+		bo->sliced = false;
+		reinit_completion(&bo->xfer_done);
+	} else {
+		mutex_init(&bo->lock);
+		init_completion(&bo->xfer_done);
+	}
+	complete_all(&bo->xfer_done);
+	INIT_LIST_HEAD(&bo->slices);
+}
+
 static struct qaic_bo *qaic_alloc_init_bo(void)
 {
 	struct qaic_bo *bo;
@ -642,9 +657,7 @@ static struct qaic_bo *qaic_alloc_init_bo(void)
 	if (!bo)
 		return ERR_PTR(-ENOMEM);

-	INIT_LIST_HEAD(&bo->slices);
-	init_completion(&bo->xfer_done);
-	complete_all(&bo->xfer_done);
+	qaic_init_bo(bo, false);

 	return bo;
 }
@ -695,8 +708,6 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
 	if (ret)
 		goto free_bo;

-	bo->size = args->size;
-
 	ret = drm_gem_handle_create(file_priv, obj, &args->handle);
 	if (ret)
 		goto free_sgt;
@ -828,7 +839,6 @@ static int qaic_prepare_import_bo(struct qaic_bo *bo, struct qaic_attach_slice_h
 	}

 	bo->sgt = sgt;
-	bo->size = hdr->size;

 	return 0;
 }
@ -838,7 +848,7 @@ static int qaic_prepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo,
 {
 	int ret;

-	if (bo->size != hdr->size)
+	if (bo->base.size < hdr->size)
 		return -EINVAL;

 	ret = dma_map_sgtable(&qdev->pdev->dev, bo->sgt, hdr->dir, 0);
@ -857,9 +867,9 @@ static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo,
 		ret = qaic_prepare_import_bo(bo, hdr);
 	else
 		ret = qaic_prepare_export_bo(qdev, bo, hdr);
-
-	if (ret == 0)
-		bo->dir = hdr->dir;
+	bo->dir = hdr->dir;
+	bo->dbc = &qdev->dbc[hdr->dbc_id];
+	bo->nr_slice = hdr->count;

 	return ret;
 }
@ -868,7 +878,6 @@ static void qaic_unprepare_import_bo(struct qaic_bo *bo)
 {
 	dma_buf_unmap_attachment(bo->base.import_attach, bo->sgt, bo->dir);
 	bo->sgt = NULL;
-	bo->size = 0;
 }

 static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo)
@ -884,6 +893,8 @@ static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo)
 		qaic_unprepare_export_bo(qdev, bo);

 	bo->dir = 0;
+	bo->dbc = NULL;
+	bo->nr_slice = 0;
 }

 static void qaic_free_slices_bo(struct qaic_bo *bo)
@ -892,6 +903,9 @@ static void qaic_free_slices_bo(struct qaic_bo *bo)

 	list_for_each_entry_safe(slice, temp, &bo->slices, slice)
 		kref_put(&slice->ref_count, free_slice);
+	if (WARN_ON_ONCE(bo->total_slice_nents != 0))
+		bo->total_slice_nents = 0;
+	bo->nr_slice = 0;
 }

 static int qaic_attach_slicing_bo(struct qaic_device *qdev, struct qaic_bo *bo,
@ -908,15 +922,11 @@ static int qaic_attach_slicing_bo(struct qaic_device *qdev, struct qaic_bo *bo,
 		}
 	}

-	if (bo->total_slice_nents > qdev->dbc[hdr->dbc_id].nelem) {
+	if (bo->total_slice_nents > bo->dbc->nelem) {
 		qaic_free_slices_bo(bo);
 		return -ENOSPC;
 	}

-	bo->sliced = true;
-	bo->nr_slice = hdr->count;
-	list_add_tail(&bo->bo_list, &qdev->dbc[hdr->dbc_id].bo_lists);
-
 	return 0;
 }

@ -994,10 +1004,13 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
 	}

 	bo = to_qaic_bo(obj);
+	ret = mutex_lock_interruptible(&bo->lock);
+	if (ret)
+		goto put_bo;

 	if (bo->sliced) {
 		ret = -EINVAL;
-		goto put_bo;
+		goto unlock_bo;
 	}

 	dbc = &qdev->dbc[args->hdr.dbc_id];
@ -1018,9 +1031,10 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
 	if (args->hdr.dir == DMA_TO_DEVICE)
 		dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, args->hdr.dir);

-	bo->dbc = dbc;
+	bo->sliced = true;
+	list_add_tail(&bo->bo_list, &bo->dbc->bo_lists);
 	srcu_read_unlock(&dbc->ch_lock, rcu_id);
-	drm_gem_object_put(obj);
+	mutex_unlock(&bo->lock);
 	kfree(slice_ent);
 	srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
 	srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
@ -1031,6 +1045,8 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
 	qaic_unprepare_bo(qdev, bo);
 unlock_ch_srcu:
 	srcu_read_unlock(&dbc->ch_lock, rcu_id);
+unlock_bo:
+	mutex_unlock(&bo->lock);
 put_bo:
 	drm_gem_object_put(obj);
 free_slice_ent:
@ -1185,15 +1201,18 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
 		}

 		bo = to_qaic_bo(obj);
+		ret = mutex_lock_interruptible(&bo->lock);
+		if (ret)
+			goto failed_to_send_bo;

 		if (!bo->sliced) {
 			ret = -EINVAL;
-			goto failed_to_send_bo;
+			goto unlock_bo;
 		}

-		if (is_partial && pexec[i].resize > bo->size) {
+		if (is_partial && pexec[i].resize > bo->base.size) {
 			ret = -EINVAL;
-			goto failed_to_send_bo;
+			goto unlock_bo;
 		}

 		spin_lock_irqsave(&dbc->xfer_lock, flags);
@ -1202,7 +1221,7 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
 		if (queued) {
 			spin_unlock_irqrestore(&dbc->xfer_lock, flags);
 			ret = -EINVAL;
-			goto failed_to_send_bo;
+			goto unlock_bo;
 		}

 		bo->req_id = dbc->next_req_id++;
@ -1233,17 +1252,20 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
 			if (ret) {
 				bo->queued = false;
 				spin_unlock_irqrestore(&dbc->xfer_lock, flags);
-				goto failed_to_send_bo;
+				goto unlock_bo;
 			}
 		}
 		reinit_completion(&bo->xfer_done);
 		list_add_tail(&bo->xfer_list, &dbc->xfer_list);
 		spin_unlock_irqrestore(&dbc->xfer_lock, flags);
 		dma_sync_sgtable_for_device(&qdev->pdev->dev, bo->sgt, bo->dir);
+		mutex_unlock(&bo->lock);
 	}

 	return 0;

+unlock_bo:
+	mutex_unlock(&bo->lock);
 failed_to_send_bo:
 	if (likely(obj))
 		drm_gem_object_put(obj);
@ -1799,6 +1821,91 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
 	return ret;
 }

+static void detach_slice_bo(struct qaic_device *qdev, struct qaic_bo *bo)
+{
+	qaic_free_slices_bo(bo);
+	qaic_unprepare_bo(qdev, bo);
+	qaic_init_bo(bo, true);
+	list_del(&bo->bo_list);
+	drm_gem_object_put(&bo->base);
+}
+
+int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+	struct qaic_detach_slice *args = data;
+	int rcu_id, usr_rcu_id, qdev_rcu_id;
+	struct dma_bridge_chan *dbc;
+	struct drm_gem_object *obj;
+	struct qaic_device *qdev;
+	struct qaic_user *usr;
+	unsigned long flags;
+	struct qaic_bo *bo;
+	int ret;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	usr = file_priv->driver_priv;
+	usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
+	if (!usr->qddev) {
+		ret = -ENODEV;
+		goto unlock_usr_srcu;
+	}
+
+	qdev = usr->qddev->qdev;
+	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
+	if (qdev->in_reset) {
+		ret = -ENODEV;
+		goto unlock_dev_srcu;
+	}
+
+	obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!obj) {
+		ret = -ENOENT;
+		goto unlock_dev_srcu;
+	}
+
+	bo = to_qaic_bo(obj);
+	ret = mutex_lock_interruptible(&bo->lock);
+	if (ret)
+		goto put_bo;
+
+	if (!bo->sliced) {
+		ret = -EINVAL;
+		goto unlock_bo;
+	}
+
+	dbc = bo->dbc;
+	rcu_id = srcu_read_lock(&dbc->ch_lock);
+	if (dbc->usr != usr) {
+		ret = -EINVAL;
+		goto unlock_ch_srcu;
+	}
+
+	/* Check if BO is committed to H/W for DMA */
+	spin_lock_irqsave(&dbc->xfer_lock, flags);
+	if (bo->queued) {
+		spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+		ret = -EBUSY;
+		goto unlock_ch_srcu;
+	}
+	spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+
+	detach_slice_bo(qdev, bo);
+
+unlock_ch_srcu:
+	srcu_read_unlock(&dbc->ch_lock, rcu_id);
+unlock_bo:
+	mutex_unlock(&bo->lock);
+put_bo:
+	drm_gem_object_put(obj);
+unlock_dev_srcu:
+	srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
+unlock_usr_srcu:
+	srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
+	return ret;
+}
+
 static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc)
 {
 	unsigned long flags;
@ -1810,6 +1917,12 @@ static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *db
 		bo->queued = false;
 		list_del(&bo->xfer_list);
 		spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+		bo->nr_slice_xfer_done = 0;
+		bo->req_id = 0;
+		bo->perf_stats.req_received_ts = 0;
+		bo->perf_stats.req_submit_ts = 0;
+		bo->perf_stats.req_processed_ts = 0;
+		bo->perf_stats.queue_level_before = 0;
 		dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
 		complete_all(&bo->xfer_done);
 		drm_gem_object_put(&bo->base);
@ -1857,7 +1970,6 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id)

 void release_dbc(struct qaic_device *qdev, u32 dbc_id)
 {
-	struct bo_slice *slice, *slice_temp;
 	struct qaic_bo *bo, *bo_temp;
 	struct dma_bridge_chan *dbc;

@ -1875,24 +1987,11 @@ void release_dbc(struct qaic_device *qdev, u32 dbc_id)
 	dbc->usr = NULL;

 	list_for_each_entry_safe(bo, bo_temp, &dbc->bo_lists, bo_list) {
-		list_for_each_entry_safe(slice, slice_temp, &bo->slices, slice)
-			kref_put(&slice->ref_count, free_slice);
-		bo->sliced = false;
-		INIT_LIST_HEAD(&bo->slices);
-		bo->total_slice_nents = 0;
-		bo->dir = 0;
-		bo->dbc = NULL;
-		bo->nr_slice = 0;
-		bo->nr_slice_xfer_done = 0;
-		bo->queued = false;
-		bo->req_id = 0;
-		init_completion(&bo->xfer_done);
-		complete_all(&bo->xfer_done);
-		list_del(&bo->bo_list);
-		bo->perf_stats.req_received_ts = 0;
-		bo->perf_stats.req_submit_ts = 0;
-		bo->perf_stats.req_processed_ts = 0;
-		bo->perf_stats.queue_level_before = 0;
+		drm_gem_object_get(&bo->base);
+		mutex_lock(&bo->lock);
+		detach_slice_bo(qdev, bo);
+		mutex_unlock(&bo->lock);
+		drm_gem_object_put(&bo->base);
 	}

 	dbc->in_use = false;
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@ -22,6 +22,7 @@
 #include <drm/drm_file.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
 #include <uapi/drm/qaic_accel.h>

 #include "mhi_controller.h"
@ -55,7 +56,7 @@ static void free_usr(struct kref *kref)

 static int qaic_open(struct drm_device *dev, struct drm_file *file)
 {
-	struct qaic_drm_device *qddev = dev->dev_private;
+	struct qaic_drm_device *qddev = to_qaic_drm_device(dev);
 	struct qaic_device *qdev = qddev->qdev;
 	struct qaic_user *usr;
 	int rcu_id;
@ -150,6 +151,7 @@ static const struct drm_ioctl_desc qaic_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(QAIC_PARTIAL_EXECUTE_BO, qaic_partial_execute_bo_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(QAIC_WAIT_BO, qaic_wait_bo_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(QAIC_PERF_STATS_BO, qaic_perf_stats_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(QAIC_DETACH_SLICE_BO, qaic_detach_slice_bo_ioctl, 0),
 };

 static const struct drm_driver qaic_accel_driver = {
@ -170,64 +172,39 @@ static const struct drm_driver qaic_accel_driver = {

 static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id)
 {
-	struct qaic_drm_device *qddev;
-	struct drm_device *ddev;
-	struct device *pdev;
+	struct qaic_drm_device *qddev = qdev->qddev;
+	struct drm_device *drm = to_drm(qddev);
 	int ret;

 	/* Hold off implementing partitions until the uapi is determined */
 	if (partition_id != QAIC_NO_PARTITION)
 		return -EINVAL;

-	pdev = &qdev->pdev->dev;
-
-	qddev = kzalloc(sizeof(*qddev), GFP_KERNEL);
-	if (!qddev)
-		return -ENOMEM;
-
-	ddev = drm_dev_alloc(&qaic_accel_driver, pdev);
-	if (IS_ERR(ddev)) {
-		ret = PTR_ERR(ddev);
-		goto ddev_fail;
-	}
-
-	ddev->dev_private = qddev;
-	qddev->ddev = ddev;
-
-	qddev->qdev = qdev;
 	qddev->partition_id = partition_id;
-	INIT_LIST_HEAD(&qddev->users);
-	mutex_init(&qddev->users_mutex);

-	qdev->qddev = qddev;
+	/*
+	 * drm_dev_unregister() sets the driver data to NULL and
+	 * drm_dev_register() does not update the driver data. During a SOC
+	 * reset drm dev is unregistered and registered again leaving the
+	 * driver data to NULL.
+	 */
+	dev_set_drvdata(to_accel_kdev(qddev), drm->accel);
+	ret = drm_dev_register(drm, 0);
+	if (ret)
+		pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret);

-	ret = drm_dev_register(ddev, 0);
-	if (ret) {
-		pci_dbg(qdev->pdev, "%s: drm_dev_register failed %d\n", __func__, ret);
-		goto drm_reg_fail;
-	}
-
-	return 0;
-
-drm_reg_fail:
-	mutex_destroy(&qddev->users_mutex);
-	qdev->qddev = NULL;
-	drm_dev_put(ddev);
-ddev_fail:
-	kfree(qddev);
 	return ret;
 }

 static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
 {
-	struct qaic_drm_device *qddev;
+	struct qaic_drm_device *qddev = qdev->qddev;
+	struct drm_device *drm = to_drm(qddev);
 	struct qaic_user *usr;

-	qddev = qdev->qddev;
-	qdev->qddev = NULL;
-	if (!qddev)
-		return;
-
+	drm_dev_get(drm);
+	drm_dev_unregister(drm);
+	qddev->partition_id = 0;
 	/*
 	 * Existing users get unresolvable errors till they close FDs.
 	 * Need to sync carefully with users calling close(). The
@ -254,13 +231,7 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
 		mutex_lock(&qddev->users_mutex);
 	}
 	mutex_unlock(&qddev->users_mutex);
-
-	if (qddev->ddev) {
-		drm_dev_unregister(qddev->ddev);
-		drm_dev_put(qddev->ddev);
-	}
-
-	kfree(qddev);
+	drm_dev_put(drm);
 }

 static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
@ -344,8 +315,20 @@ void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset)
 		qdev->in_reset = false;
 }

+static void cleanup_qdev(struct qaic_device *qdev)
+{
+	int i;
+
+	for (i = 0; i < qdev->num_dbc; ++i)
+		cleanup_srcu_struct(&qdev->dbc[i].ch_lock);
+	cleanup_srcu_struct(&qdev->dev_lock);
+	pci_set_drvdata(qdev->pdev, NULL);
+	destroy_workqueue(qdev->cntl_wq);
+}
+
 static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_device_id *id)
 {
+	struct qaic_drm_device *qddev;
 	struct qaic_device *qdev;
 	int i;

@ -381,20 +364,20 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de
 		INIT_LIST_HEAD(&qdev->dbc[i].bo_lists);
 	}

+	qddev = devm_drm_dev_alloc(&pdev->dev, &qaic_accel_driver, struct qaic_drm_device, drm);
+	if (IS_ERR(qddev)) {
+		cleanup_qdev(qdev);
+		return NULL;
+	}
+
+	drmm_mutex_init(to_drm(qddev), &qddev->users_mutex);
+	INIT_LIST_HEAD(&qddev->users);
+	qddev->qdev = qdev;
+	qdev->qddev = qddev;
+
 	return qdev;
 }

-static void cleanup_qdev(struct qaic_device *qdev)
-{
-	int i;
-
-	for (i = 0; i < qdev->num_dbc; ++i)
-		cleanup_srcu_struct(&qdev->dbc[i].ch_lock);
-	cleanup_srcu_struct(&qdev->dev_lock);
-	pci_set_drvdata(qdev->pdev, NULL);
-	destroy_workqueue(qdev->cntl_wq);
-}
-
 static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev)
 {
 	int bars;
@ -591,22 +574,22 @@ static int __init qaic_init(void)
 {
 	int ret;

-	ret = mhi_driver_register(&qaic_mhi_driver);
-	if (ret) {
-		pr_debug("qaic: mhi_driver_register failed %d\n", ret);
-		return ret;
-	}
-
 	ret = pci_register_driver(&qaic_pci_driver);
 	if (ret) {
 		pr_debug("qaic: pci_register_driver failed %d\n", ret);
-		goto free_mhi;
+		return ret;
+	}
+
+	ret = mhi_driver_register(&qaic_mhi_driver);
+	if (ret) {
+		pr_debug("qaic: mhi_driver_register failed %d\n", ret);
+		goto free_pci;
 	}

 	return 0;

-free_mhi:
-	mhi_driver_unregister(&qaic_mhi_driver);
+free_pci:
+	pci_unregister_driver(&qaic_pci_driver);
 	return ret;
 }

@ -628,8 +611,8 @@ static void __exit qaic_exit(void)
 	 * reinitializing the link_up state after the cleanup is done.
 	 */
 	link_up = true;
-	pci_unregister_driver(&qaic_pci_driver);
 	mhi_driver_unregister(&qaic_mhi_driver);
+	pci_unregister_driver(&qaic_pci_driver);
 }

 module_init(qaic_init);
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@ -492,7 +492,7 @@ static int thermal_get_temp(struct thermal_zone_device *thermal, int *temp)
 }

 static int thermal_get_trend(struct thermal_zone_device *thermal,
-			     struct thermal_trip *trip,
+			     const struct thermal_trip *trip,
 			     enum thermal_trend *trend)
 {
 	struct acpi_thermal *tz = thermal_zone_device_priv(thermal);
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@ -1883,6 +1883,15 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	else
 		dev_info(&pdev->dev, "SSS flag set, parallel bus scan disabled\n");

+	if (!(hpriv->cap & HOST_CAP_PART))
+		host->flags |= ATA_HOST_NO_PART;
+
+	if (!(hpriv->cap & HOST_CAP_SSC))
+		host->flags |= ATA_HOST_NO_SSC;
+
+	if (!(hpriv->cap2 & HOST_CAP2_SDS))
+		host->flags |= ATA_HOST_NO_DEVSLP;
+
 	if (pi.flags & ATA_FLAG_EM)
 		ahci_reset_em(host);

--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@ -1256,6 +1256,26 @@ static ssize_t ahci_activity_show(struct ata_device *dev, char *buf)
 	return sprintf(buf, "%d\n", emp->blink_policy);
 }

+static void ahci_port_clear_pending_irq(struct ata_port *ap)
+{
+	struct ahci_host_priv *hpriv = ap->host->private_data;
+	void __iomem *port_mmio = ahci_port_base(ap);
+	u32 tmp;
+
+	/* clear SError */
+	tmp = readl(port_mmio + PORT_SCR_ERR);
+	dev_dbg(ap->host->dev, "PORT_SCR_ERR 0x%x\n", tmp);
+	writel(tmp, port_mmio + PORT_SCR_ERR);
+
+	/* clear port IRQ */
+	tmp = readl(port_mmio + PORT_IRQ_STAT);
+	dev_dbg(ap->host->dev, "PORT_IRQ_STAT 0x%x\n", tmp);
+	if (tmp)
+		writel(tmp, port_mmio + PORT_IRQ_STAT);
+
+	writel(1 << ap->port_no, hpriv->mmio + HOST_IRQ_STAT);
+}
+
 static void ahci_port_init(struct device *dev, struct ata_port *ap,
 			   int port_no, void __iomem *mmio,
 			   void __iomem *port_mmio)
@ -1270,18 +1290,7 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap,
 	if (rc)
 		dev_warn(dev, "%s (%d)\n", emsg, rc);

-	/* clear SError */
-	tmp = readl(port_mmio + PORT_SCR_ERR);
-	dev_dbg(dev, "PORT_SCR_ERR 0x%x\n", tmp);
-	writel(tmp, port_mmio + PORT_SCR_ERR);
-
-	/* clear port IRQ */
-	tmp = readl(port_mmio + PORT_IRQ_STAT);
-	dev_dbg(dev, "PORT_IRQ_STAT 0x%x\n", tmp);
-	if (tmp)
-		writel(tmp, port_mmio + PORT_IRQ_STAT);
-
-	writel(1 << port_no, mmio + HOST_IRQ_STAT);
+	ahci_port_clear_pending_irq(ap);

 	/* mark esata ports */
 	tmp = readl(port_mmio + PORT_CMD);
@ -1603,6 +1612,8 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class,
 	tf.status = ATA_BUSY;
 	ata_tf_to_fis(&tf, 0, 0, d2h_fis);

+	ahci_port_clear_pending_irq(ap);
+
 	rc = sata_link_hardreset(link, timing, deadline, online,
 				 ahci_check_ready);

--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@ -4783,11 +4783,8 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
 	 * been aborted by the device due to a limit timeout using the policy
 	 * 0xD. For these commands, invoke EH to get the command sense data.
 	 */
-	if (qc->result_tf.status & ATA_SENSE &&
-	    ((ata_is_ncq(qc->tf.protocol) &&
-	      dev->flags & ATA_DFLAG_CDL_ENABLED) ||
-	     (!ata_is_ncq(qc->tf.protocol) &&
-	      ata_id_sense_reporting_enabled(dev->id)))) {
+	if (qc->flags & ATA_QCFLAG_HAS_CDL &&
+	    qc->result_tf.status & ATA_SENSE) {
 		/*
 		 * Tell SCSI EH to not overwrite scmd->result even if this
 		 * command is finished with result SAM_STAT_GOOD.
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@ -2796,23 +2796,13 @@ int ata_eh_reset(struct ata_link *link, int classify,
 		}
 	}

-	/*
-	 * Some controllers can't be frozen very well and may set spurious
-	 * error conditions during reset.  Clear accumulated error
-	 * information and re-thaw the port if frozen.  As reset is the
-	 * final recovery action and we cross check link onlineness against
-	 * device classification later, no hotplug event is lost by this.
-	 */
+	/* clear cached SError */
 	spin_lock_irqsave(link->ap->lock, flags);
-	memset(&link->eh_info, 0, sizeof(link->eh_info));
+	link->eh_info.serror = 0;
 	if (slave)
-		memset(&slave->eh_info, 0, sizeof(link->eh_info));
-	ap->pflags &= ~ATA_PFLAG_EH_PENDING;
+		slave->eh_info.serror = 0;
 	spin_unlock_irqrestore(link->ap->lock, flags);

-	if (ata_port_is_frozen(ap))
-		ata_eh_thaw_port(ap);
-
 	/*
 	 * Make sure onlineness and classification result correspond.
 	 * Hotplug could have happened during reset and some
--- a/drivers/ata/libata-sata.c
+++ b/drivers/ata/libata-sata.c
@ -396,10 +396,23 @@ int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 	case ATA_LPM_MED_POWER_WITH_DIPM:
 	case ATA_LPM_MIN_POWER_WITH_PARTIAL:
 	case ATA_LPM_MIN_POWER:
-		if (ata_link_nr_enabled(link) > 0)
-			/* no restrictions on LPM transitions */
+		if (ata_link_nr_enabled(link) > 0) {
+			/* assume no restrictions on LPM transitions */
 			scontrol &= ~(0x7 << 8);
-		else {
+
+			/*
+			 * If the controller does not support partial, slumber,
+			 * or devsleep, then disallow these transitions.
+			 */
+			if (link->ap->host->flags & ATA_HOST_NO_PART)
+				scontrol |= (0x1 << 8);
+
+			if (link->ap->host->flags & ATA_HOST_NO_SSC)
+				scontrol |= (0x2 << 8);
+
+			if (link->ap->host->flags & ATA_HOST_NO_DEVSLP)
+				scontrol |= (0x4 << 8);
+		} else {
 			/* empty port, power off */
 			scontrol &= ~0xf;
 			scontrol |= (0x1 << 2);
--- a/drivers/ata/pata_parport/comm.c
+++ b/drivers/ata/pata_parport/comm.c
@ -37,7 +37,7 @@ static int comm_read_regr(struct pi_adapter *pi, int cont, int regr)
 {
 	int l, h, r;

-        r = regr + cont_map[cont];
+	r = regr + cont_map[cont];

 	switch (pi->mode) {
 	case 0:
@ -90,7 +90,6 @@ static void comm_connect(struct pi_adapter *pi)
 }

 static void comm_disconnect(struct pi_adapter *pi)
-
 {
 	w2(0); w2(0); w2(0); w2(4);
 	w0(pi->saved_r0);
@ -172,12 +171,12 @@ static void comm_write_block(struct pi_adapter *pi, char *buf, int count)
 			w4l(swab16(((u16 *)buf)[2 * k]) |
 			    swab16(((u16 *)buf)[2 * k + 1]) << 16);
 		break;
-        }
+	}
 }

 static void comm_log_adapter(struct pi_adapter *pi)
-
-{       char *mode_string[5] = { "4-bit", "8-bit", "EPP-8", "EPP-16", "EPP-32" };
+{
+	char *mode_string[5] = { "4-bit", "8-bit", "EPP-8", "EPP-16", "EPP-32" };

 	dev_info(&pi->dev,
 		 "DataStor Commuter at 0x%x, mode %d (%s), delay %d\n",
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@ -1255,8 +1255,8 @@ static void mv_dump_mem(struct device *dev, void __iomem *start, unsigned bytes)

 	for (b = 0; b < bytes; ) {
 		for (w = 0, o = 0; b < bytes && w < 4; w++) {
-			o += snprintf(linebuf + o, sizeof(linebuf) - o,
-				      "%08x ", readl(start + b));
+			o += scnprintf(linebuf + o, sizeof(linebuf) - o,
+				       "%08x ", readl(start + b));
 			b += sizeof(u32);
 		}
 		dev_dbg(dev, "%s: %p: %s\n",
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@ -3537,6 +3537,8 @@ int device_add(struct device *dev)
 	/* subsystems can specify simple device enumeration */
 	else if (dev->bus && dev->bus->dev_name)
 		error = dev_set_name(dev, "%s%u", dev->bus->dev_name, dev->id);
+	else
+		error = -EINVAL;
 	if (error)
 		goto name_error;

--- a/drivers/char/agp/parisc-agp.c
+++ b/drivers/char/agp/parisc-agp.c
@ -394,8 +394,6 @@ find_quicksilver(struct device *dev, void *data)
 static int __init
 parisc_agp_init(void)
 {
-	extern struct sba_device *sba_list;
-
 	int err = -1;
 	struct parisc_device *sba = NULL, *lba = NULL;
 	struct lba_device *lbadev = NULL;
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@ -33,7 +33,7 @@ const struct class tpm_class = {
 	.shutdown_pre = tpm_class_shutdown,
 };
 const struct class tpmrm_class = {
-	.name = "tmprm",
+	.name = "tpmrm",
 };
 dev_t tpm_devt;

--- a/drivers/comedi/Kconfig
+++ b/drivers/comedi/Kconfig
@ -67,7 +67,6 @@ config COMEDI_TEST

 config COMEDI_PARPORT
 	tristate "Parallel port support"
-	depends on HAS_IOPORT
 	help
 	  Enable support for the standard parallel port.
 	  A cheap and easy way to get a few more digital I/O lines. Steal
@ -80,7 +79,6 @@ config COMEDI_PARPORT
 config COMEDI_SSV_DNP
 	tristate "SSV Embedded Systems DIL/Net-PC support"
 	depends on X86_32 || COMPILE_TEST
-	depends on HAS_IOPORT
 	help
 	  Enable support for SSV Embedded Systems DIL/Net-PC

@ -91,7 +89,6 @@ endif # COMEDI_MISC_DRIVERS

 menuconfig COMEDI_ISA_DRIVERS
 	bool "Comedi ISA and PC/104 drivers"
-	depends on ISA
 	help
 	  Enable comedi ISA and PC/104 drivers to be built

@ -103,8 +100,7 @@ if COMEDI_ISA_DRIVERS

 config COMEDI_PCL711
 	tristate "Advantech PCL-711/711b and ADlink ACL-8112 ISA card support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for Advantech PCL-711 and 711b, ADlink ACL-8112

@ -165,9 +161,8 @@ config COMEDI_PCL730

 config COMEDI_PCL812
 	tristate "Advantech PCL-812/813 and ADlink ACL-8112/8113/8113/8216"
-	depends on HAS_IOPORT
 	select COMEDI_ISADMA if ISA_DMA_API
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for Advantech PCL-812/PG, PCL-813/B, ADLink
 	  ACL-8112DG/HG/PG, ACL-8113, ACL-8216, ICP DAS A-821PGH/PGL/PGL-NDA,
@ -178,9 +173,8 @@ config COMEDI_PCL812

 config COMEDI_PCL816
 	tristate "Advantech PCL-814 and PCL-816 ISA card support"
-	depends on HAS_IOPORT
 	select COMEDI_ISADMA if ISA_DMA_API
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for Advantech PCL-814 and PCL-816 ISA cards

@ -189,9 +183,8 @@ config COMEDI_PCL816

 config COMEDI_PCL818
 	tristate "Advantech PCL-718 and PCL-818 ISA card support"
-	depends on HAS_IOPORT
 	select COMEDI_ISADMA if ISA_DMA_API
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for Advantech PCL-818 ISA cards
 	  PCL-818L, PCL-818H, PCL-818HD, PCL-818HG, PCL-818 and PCL-718
@ -210,7 +203,7 @@ config COMEDI_PCM3724

 config COMEDI_AMPLC_DIO200_ISA
 	tristate "Amplicon PC212E/PC214E/PC215E/PC218E/PC272E"
-	depends on COMEDI_AMPLC_DIO200
+	select COMEDI_AMPLC_DIO200
 	help
 	  Enable support for Amplicon PC212E, PC214E, PC215E, PC218E and
 	  PC272E ISA DIO boards
@ -262,8 +255,7 @@ config COMEDI_DAC02

 config COMEDI_DAS16M1
 	tristate "MeasurementComputing CIO-DAS16/M1DAS-16 ISA card support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	select COMEDI_8255
 	help
 	  Enable support for Measurement Computing CIO-DAS16/M1 ISA cards.
@ -273,7 +265,7 @@ config COMEDI_DAS16M1

 config COMEDI_DAS08_ISA
 	tristate "DAS-08 compatible ISA and PC/104 card support"
-	depends on COMEDI_DAS08
+	select COMEDI_DAS08
 	help
 	  Enable support for Keithley Metrabyte/ComputerBoards DAS08
 	  and compatible ISA and PC/104 cards:
@ -286,9 +278,8 @@ config COMEDI_DAS08_ISA

 config COMEDI_DAS16
 	tristate "DAS-16 compatible ISA and PC/104 card support"
-	depends on HAS_IOPORT
 	select COMEDI_ISADMA if ISA_DMA_API
-	depends on COMEDI_8254
+	select COMEDI_8254
 	select COMEDI_8255
 	help
 	  Enable support for Keithley Metrabyte/ComputerBoards DAS16
@ -305,8 +296,7 @@ config COMEDI_DAS16

 config COMEDI_DAS800
 	tristate "DAS800 and compatible ISA card support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for Keithley Metrabyte DAS800 and compatible ISA cards
 	  Keithley Metrabyte DAS-800, DAS-801, DAS-802
@ -318,9 +308,8 @@ config COMEDI_DAS800

 config COMEDI_DAS1800
 	tristate "DAS1800 and compatible ISA card support"
-	depends on HAS_IOPORT
 	select COMEDI_ISADMA if ISA_DMA_API
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for DAS1800 and compatible ISA cards
 	  Keithley Metrabyte DAS-1701ST, DAS-1701ST-DA, DAS-1701/AO,
@ -334,8 +323,7 @@ config COMEDI_DAS1800

 config COMEDI_DAS6402
 	tristate "DAS6402 and compatible ISA card support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for DAS6402 and compatible ISA cards
 	  Computerboards, Keithley Metrabyte DAS6402 and compatibles
@ -414,8 +402,7 @@ config COMEDI_FL512

 config COMEDI_AIO_AIO12_8
 	tristate "I/O Products PC/104 AIO12-8 Analog I/O Board support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	select COMEDI_8255
 	help
 	  Enable support for I/O Products PC/104 AIO12-8 Analog I/O Board
@ -469,9 +456,8 @@ config COMEDI_ADQ12B

 config COMEDI_NI_AT_A2150
 	tristate "NI AT-A2150 ISA card support"
-	depends on HAS_IOPORT
 	select COMEDI_ISADMA if ISA_DMA_API
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for National Instruments AT-A2150 cards

@ -480,8 +466,7 @@ config COMEDI_NI_AT_A2150

 config COMEDI_NI_AT_AO
 	tristate "NI AT-AO-6/10 EISA card support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for National Instruments AT-AO-6/10 cards

@ -512,7 +497,7 @@ config COMEDI_NI_ATMIO16D

 config COMEDI_NI_LABPC_ISA
 	tristate "NI Lab-PC and compatibles ISA support"
-	depends on COMEDI_NI_LABPC
+	select COMEDI_NI_LABPC
 	help
 	  Enable support for National Instruments Lab-PC and compatibles
 	  Lab-PC-1200, Lab-PC-1200AI, Lab-PC+.
@ -576,7 +561,7 @@ endif # COMEDI_ISA_DRIVERS

 menuconfig COMEDI_PCI_DRIVERS
 	tristate "Comedi PCI drivers"
-	depends on PCI && HAS_IOPORT
+	depends on PCI
 	help
 	  Enable support for comedi PCI drivers.

@ -725,8 +710,7 @@ config COMEDI_ADL_PCI8164

 config COMEDI_ADL_PCI9111
 	tristate "ADLink PCI-9111HR support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for ADlink PCI9111 cards

@ -736,7 +720,7 @@ config COMEDI_ADL_PCI9111
 config COMEDI_ADL_PCI9118
 	tristate "ADLink PCI-9118DG, PCI-9118HG, PCI-9118HR support"
 	depends on HAS_DMA
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for ADlink PCI-9118DG, PCI-9118HG, PCI-9118HR cards

@ -745,8 +729,7 @@ config COMEDI_ADL_PCI9118

 config COMEDI_ADV_PCI1710
 	tristate "Advantech PCI-171x and PCI-1731 support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for Advantech PCI-1710, PCI-1710HG, PCI-1711,
 	  PCI-1713 and PCI-1731
@ -790,8 +773,7 @@ config COMEDI_ADV_PCI1760

 config COMEDI_ADV_PCI_DIO
 	tristate "Advantech PCI DIO card support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	select COMEDI_8255
 	help
 	  Enable support for Advantech PCI DIO cards
@ -804,7 +786,7 @@ config COMEDI_ADV_PCI_DIO

 config COMEDI_AMPLC_DIO200_PCI
 	tristate "Amplicon PCI215/PCI272/PCIe215/PCIe236/PCIe296 DIO support"
-	depends on COMEDI_AMPLC_DIO200
+	select COMEDI_AMPLC_DIO200
 	help
 	  Enable support for Amplicon PCI215, PCI272, PCIe215, PCIe236
 	  and PCIe296 DIO boards.
@ -832,8 +814,7 @@ config COMEDI_AMPLC_PC263_PCI

 config COMEDI_AMPLC_PCI224
 	tristate "Amplicon PCI224 and PCI234 support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for Amplicon PCI224 and PCI234 AO boards

@ -842,8 +823,7 @@ config COMEDI_AMPLC_PCI224

 config COMEDI_AMPLC_PCI230
 	tristate "Amplicon PCI230 and PCI260 support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	select COMEDI_8255
 	help
 	  Enable support for Amplicon PCI230 and PCI260 Multifunction I/O
@ -862,7 +842,7 @@ config COMEDI_CONTEC_PCI_DIO

 config COMEDI_DAS08_PCI
 	tristate "DAS-08 PCI support"
-	depends on COMEDI_DAS08
+	select COMEDI_DAS08
 	help
 	  Enable support for PCI DAS-08 cards.

@ -949,8 +929,7 @@ config COMEDI_CB_PCIDAS64

 config COMEDI_CB_PCIDAS
 	tristate "MeasurementComputing PCI-DAS support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	select COMEDI_8255
 	help
 	  Enable support for ComputerBoards/MeasurementComputing PCI-DAS with
@ -974,8 +953,7 @@ config COMEDI_CB_PCIDDA

 config COMEDI_CB_PCIMDAS
 	tristate "MeasurementComputing PCIM-DAS1602/16, PCIe-DAS1602/16 support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	select COMEDI_8255
 	help
 	  Enable support for ComputerBoards/MeasurementComputing PCI Migration
@ -995,8 +973,7 @@ config COMEDI_CB_PCIMDDA

 config COMEDI_ME4000
 	tristate "Meilhaus ME-4000 support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for Meilhaus PCI data acquisition cards
 	  ME-4650, ME-4670i, ME-4680, ME-4680i and ME-4680is
@ -1054,7 +1031,7 @@ config COMEDI_NI_670X

 config COMEDI_NI_LABPC_PCI
 	tristate "NI Lab-PC PCI-1200 support"
-	depends on COMEDI_NI_LABPC
+	select COMEDI_NI_LABPC
 	help
 	  Enable support for National Instruments Lab-PC PCI-1200.

@ -1076,7 +1053,6 @@ config COMEDI_NI_PCIDIO
 config COMEDI_NI_PCIMIO
 	tristate "NI PCI-MIO-E series and M series support"
 	depends on HAS_DMA
-	depends on HAS_IOPORT
 	select COMEDI_NI_TIOCMD
 	select COMEDI_8255
 	help
@ -1098,8 +1074,7 @@ config COMEDI_NI_PCIMIO

 config COMEDI_RTD520
 	tristate "Real Time Devices PCI4520/DM7520 support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for Real Time Devices PCI4520/DM7520

@ -1139,8 +1114,7 @@ if COMEDI_PCMCIA_DRIVERS

 config COMEDI_CB_DAS16_CS
 	tristate "CB DAS16 series PCMCIA support"
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	help
 	  Enable support for the ComputerBoards/MeasurementComputing PCMCIA
 	  cards DAS16/16, PCM-DAS16D/12 and PCM-DAS16s/16
@ -1150,7 +1124,7 @@ config COMEDI_CB_DAS16_CS

 config COMEDI_DAS08_CS
 	tristate "CB DAS08 PCMCIA support"
-	depends on COMEDI_DAS08
+	select COMEDI_DAS08
 	help
 	  Enable support for the ComputerBoards/MeasurementComputing DAS-08
 	  PCMCIA card
@ -1160,7 +1134,6 @@ config COMEDI_DAS08_CS

 config COMEDI_NI_DAQ_700_CS
 	tristate "NI DAQCard-700 PCMCIA support"
-	depends on HAS_IOPORT
 	help
 	  Enable support for the National Instruments PCMCIA DAQCard-700 DIO

@ -1169,7 +1142,6 @@ config COMEDI_NI_DAQ_700_CS

 config COMEDI_NI_DAQ_DIO24_CS
 	tristate "NI DAQ-Card DIO-24 PCMCIA support"
-	depends on HAS_IOPORT
 	select COMEDI_8255
 	help
 	  Enable support for the National Instruments PCMCIA DAQ-Card DIO-24
@ -1179,7 +1151,7 @@ config COMEDI_NI_DAQ_DIO24_CS

 config COMEDI_NI_LABPC_CS
 	tristate "NI DAQCard-1200 PCMCIA support"
-	depends on COMEDI_NI_LABPC
+	select COMEDI_NI_LABPC
 	help
 	  Enable support for the National Instruments PCMCIA DAQCard-1200

@ -1188,7 +1160,6 @@ config COMEDI_NI_LABPC_CS

 config COMEDI_NI_MIO_CS
 	tristate "NI DAQCard E series PCMCIA support"
-	depends on HAS_IOPORT
 	select COMEDI_NI_TIO
 	select COMEDI_8255
 	help
@ -1201,7 +1172,6 @@ config COMEDI_NI_MIO_CS

 config COMEDI_QUATECH_DAQP_CS
 	tristate "Quatech DAQP PCMCIA data capture card support"
-	depends on HAS_IOPORT
 	help
 	  Enable support for the Quatech DAQP PCMCIA data capture cards
 	  DAQP-208 and DAQP-308
@ -1278,14 +1248,12 @@ endif # COMEDI_USB_DRIVERS

 config COMEDI_8254
 	tristate
-	depends on HAS_IOPORT

 config COMEDI_8255
 	tristate

 config COMEDI_8255_SA
 	tristate "Standalone 8255 support"
-	depends on HAS_IOPORT
 	select COMEDI_8255
 	help
 	  Enable support for 8255 digital I/O as a standalone driver.
@ -1317,7 +1285,7 @@ config COMEDI_KCOMEDILIB
 	  called kcomedilib.

 config COMEDI_AMPLC_DIO200
-	depends on COMEDI_8254
+	select COMEDI_8254
 	tristate

 config COMEDI_AMPLC_PC236
@ -1326,7 +1294,7 @@ config COMEDI_AMPLC_PC236

 config COMEDI_DAS08
 	tristate
-	depends on COMEDI_8254
+	select COMEDI_8254
 	select COMEDI_8255

 config COMEDI_ISADMA
@ -1334,8 +1302,7 @@ config COMEDI_ISADMA

 config COMEDI_NI_LABPC
 	tristate
-	depends on HAS_IOPORT
-	depends on COMEDI_8254
+	select COMEDI_8254
 	select COMEDI_8255

 config COMEDI_NI_LABPC_ISADMA
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@ -1211,7 +1211,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 		 * without actually having a link.
 		 */
 create:
-		device = kzalloc(sizeof(*device), GFP_KERNEL);
+		device = kzalloc(sizeof(*device), GFP_ATOMIC);
 		if (device == NULL)
 			break;

--- a/drivers/firewire/core-topology.c
+++ b/drivers/firewire/core-topology.c
@ -101,7 +101,7 @@ static struct fw_node *fw_node_create(u32 sid, int port_count, int color)
 {
 	struct fw_node *node;

-	node = kzalloc(struct_size(node, ports, port_count), GFP_KERNEL);
+	node = kzalloc(struct_size(node, ports, port_count), GFP_ATOMIC);
 	if (node == NULL)
 		return NULL;

--- a/drivers/firmware/efi/libstub/unaccepted_memory.c
+++ b/drivers/firmware/efi/libstub/unaccepted_memory.c
@ -62,7 +62,7 @@ efi_status_t allocate_unaccepted_bitmap(__u32 nr_desc,
 	bitmap_size = DIV_ROUND_UP(unaccepted_end - unaccepted_start,
 				   EFI_UNACCEPTED_UNIT_SIZE * BITS_PER_BYTE);

-	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA,
+	status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY,
 			     sizeof(*unaccepted_table) + bitmap_size,
 			     (void **)&unaccepted_table);
 	if (status != EFI_SUCCESS) {
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@ -216,6 +216,13 @@ config DRM_EXEC
 	help
 	  Execution context for command submissions

+config DRM_GPUVM
+	tristate
+	depends on DRM
+	help
+	  GPU-VM representation providing helpers to manage a GPUs virtual
+	  address space
+
 config DRM_BUDDY
 	tristate
 	depends on DRM
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@ -45,7 +45,6 @@ drm-y := \
 	drm_vblank.o \
 	drm_vblank_work.o \
 	drm_vma_manager.o \
-	drm_gpuva_mgr.o \
 	drm_writeback.o
 drm-$(CONFIG_DRM_LEGACY) += \
 	drm_agpsupport.o \
@ -81,6 +80,7 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o
 #
 #
 obj-$(CONFIG_DRM_EXEC) += drm_exec.o
+obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o

 obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@ -1293,7 +1293,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
 int amdgpu_device_pci_reset(struct amdgpu_device *adev);
 bool amdgpu_device_need_post(struct amdgpu_device *adev);
-bool amdgpu_sg_display_supported(struct amdgpu_device *adev);
 bool amdgpu_device_pcie_dynamic_switching_supported(void);
 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
 bool amdgpu_device_aspm_support_quirk(void);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@ -478,7 +478,7 @@ void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *c
 	cu_info->cu_active_number = acu_info.number;
 	cu_info->cu_ao_mask = acu_info.ao_cu_mask;
 	memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
-	       sizeof(acu_info.bitmap));
+	       sizeof(cu_info->cu_bitmap));
 	cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
 	cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
 	cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@ -980,8 +980,7 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
 						uint32_t wait_times,
 						uint32_t grace_period,
 						uint32_t *reg_offset,
-						uint32_t *reg_data,
-						uint32_t inst)
+						uint32_t *reg_data)
 {
 	*reg_data = wait_times;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
@ -55,5 +55,4 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
 					       uint32_t wait_times,
 					       uint32_t grace_period,
 					       uint32_t *reg_offset,
-					       uint32_t *reg_data,
-					       uint32_t inst);
+					       uint32_t *reg_data);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@ -1103,8 +1103,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
 		uint32_t wait_times,
 		uint32_t grace_period,
 		uint32_t *reg_offset,
-		uint32_t *reg_data,
-		uint32_t inst)
+		uint32_t *reg_data)
 {
 	*reg_data = wait_times;

@ -1120,8 +1119,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
 			SCH_WAVE,
 			grace_period);

-	*reg_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
-			mmCP_IQ_WAIT_TIME2);
+	*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
 }

 void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@ -100,5 +100,4 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
 					       uint32_t wait_times,
 					       uint32_t grace_period,
 					       uint32_t *reg_offset,
-					       uint32_t *reg_data,
-					       uint32_t inst);
+					       uint32_t *reg_data);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@ -1244,32 +1244,6 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
 	return true;
 }

-/*
- * On APUs with >= 64GB white flickering has been observed w/ SG enabled.
- * Disable S/G on such systems until we have a proper fix.
- * https://gitlab.freedesktop.org/drm/amd/-/issues/2354
- * https://gitlab.freedesktop.org/drm/amd/-/issues/2735
- */
-bool amdgpu_sg_display_supported(struct amdgpu_device *adev)
-{
-	switch (amdgpu_sg_display) {
-	case -1:
-		break;
-	case 0:
-		return false;
-	case 1:
-		return true;
-	default:
-		return false;
-	}
-	if ((totalram_pages() << (PAGE_SHIFT - 10)) +
-	    (adev->gmc.real_vram_size / 1024) >= 64000000) {
-		DRM_WARN("Disabling S/G due to >=64GB RAM\n");
-		return false;
-	}
-	return true;
-}
-
 /*
 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
 * speed switching. Until we have confirmation from Intel that a specific host
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@ -962,6 +962,7 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
 	list_for_each_entry(file, &dev->filelist, lhead) {
 		struct task_struct *task;
 		struct drm_gem_object *gobj;
+		struct pid *pid;
 		int id;

 		/*
@ -971,8 +972,9 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
 		 * Therefore, we need to protect this ->comm access using RCU.
 		 */
 		rcu_read_lock();
-		task = pid_task(file->pid, PIDTYPE_TGID);
-		seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
+		pid = rcu_dereference(file->pid);
+		task = pid_task(pid, PIDTYPE_TGID);
+		seq_printf(m, "pid %8d command %s:\n", pid_nr(pid),
 			   task ? task->comm : "<unknown>");
 		rcu_read_unlock();

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@ -43,6 +43,7 @@
 #define AMDGPU_GFX_LBPW_DISABLED_MODE		0x00000008L

 #define AMDGPU_MAX_GC_INSTANCES		8
+#define KGD_MAX_QUEUES			128

 #define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
 #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
@ -257,7 +258,7 @@ struct amdgpu_cu_info {
 	uint32_t number;
 	uint32_t ao_cu_mask;
 	uint32_t ao_cu_bitmap[4][4];
-	uint32_t bitmap[4][4];
+	uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
 };

 struct amdgpu_gfx_ras {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@ -839,7 +839,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0],
 		       sizeof(adev->gfx.cu_info.ao_cu_bitmap));
 		memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
-		       sizeof(adev->gfx.cu_info.bitmap));
+		       sizeof(dev_info->cu_bitmap));
 		dev_info->vram_type = adev->gmc.vram_type;
 		dev_info->vram_bit_width = adev->gmc.vram_width;
 		dev_info->vce_harvest_config = adev->vce.harvest_config;
@ -940,12 +940,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 			struct atom_context *atom_context;

 			atom_context = adev->mode_info.atom_context;
-			memcpy(vbios_info.name, atom_context->name, sizeof(atom_context->name));
-			memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, sizeof(atom_context->vbios_pn));
-			vbios_info.version = atom_context->version;
-			memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
-						sizeof(atom_context->vbios_ver_str));
-			memcpy(vbios_info.date, atom_context->date, sizeof(atom_context->date));
+			if (atom_context) {
+				memcpy(vbios_info.name, atom_context->name,
+				       sizeof(atom_context->name));
+				memcpy(vbios_info.vbios_pn, atom_context->vbios_pn,
+				       sizeof(atom_context->vbios_pn));
+				vbios_info.version = atom_context->version;
+				memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
+				       sizeof(atom_context->vbios_ver_str));
+				memcpy(vbios_info.date, atom_context->date,
+				       sizeof(atom_context->date));
+			}

 			return copy_to_user(out, &vbios_info,
 						min((size_t)size, sizeof(vbios_info))) ? -EFAULT : 0;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@ -1052,7 +1052,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 	info->ce_count = obj->err_data.ce_count;

 	if (err_data.ce_count) {
-		if (adev->smuio.funcs &&
+		if (!adev->aid_mask &&
+		    adev->smuio.funcs &&
 		    adev->smuio.funcs->get_socket_id &&
 		    adev->smuio.funcs->get_die_id) {
 			dev_info(adev->dev, "socket: %d, die: %d "
@ -1072,7 +1073,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 		}
 	}
 	if (err_data.ue_count) {
-		if (adev->smuio.funcs &&
+		if (!adev->aid_mask &&
+		    adev->smuio.funcs &&
 		    adev->smuio.funcs->get_socket_id &&
 		    adev->smuio.funcs->get_die_id) {
 			dev_info(adev->dev, "socket: %d, die: %d "
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@ -81,7 +81,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 		     unsigned int size)
 {
 	struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
-						   GFP_KERNEL, true, 0);
+						   GFP_KERNEL, false, 0);

 	if (IS_ERR(sa)) {
 		*sa_bo = NULL;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@ -424,9 +424,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 			       const struct ttm_place *place,
 			       struct ttm_resource **res)
 {
-	u64 vis_usage = 0, max_bytes, cur_size, min_block_size;
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
+	u64 vis_usage = 0, max_bytes, min_block_size;
 	struct amdgpu_vram_mgr_resource *vres;
 	u64 size, remaining_size, lpfn, fpfn;
 	struct drm_buddy *mm = &mgr->mm;
@ -474,6 +474,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	if (place->flags & TTM_PL_FLAG_TOPDOWN)
 		vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;

+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+		vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+
 	if (fpfn || lpfn != mgr->mm.size)
 		/* Allocate blocks in desired range */
 		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
@ -496,25 +499,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 				!(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
 			min_block_size = (u64)pages_per_block << PAGE_SHIFT;

-		cur_size = size;
-
-		if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) {
-			/*
-			 * Except for actual range allocation, modify the size and
-			 * min_block_size conforming to continuous flag enablement
-			 */
-			if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-				size = roundup_pow_of_two(size);
-				min_block_size = size;
-			/*
-			 * Modify the size value if size is not
-			 * aligned with min_block_size
-			 */
-			} else if (!IS_ALIGNED(size, min_block_size)) {
-				size = round_up(size, min_block_size);
-			}
-		}
-
 		r = drm_buddy_alloc_blocks(mm, fpfn,
 					   lpfn,
 					   size,
@ -531,40 +515,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	}
 	mutex_unlock(&mgr->lock);

-	if (cur_size != size) {
-		struct drm_buddy_block *block;
-		struct list_head *trim_list;
-		u64 original_size;
-		LIST_HEAD(temp);
-
-		trim_list = &vres->blocks;
-		original_size = (u64)vres->base.size;
-
-		/*
-		 * If size value is rounded up to min_block_size, trim the last
-		 * block to the required size
-		 */
-		if (!list_is_singular(&vres->blocks)) {
-			block = list_last_entry(&vres->blocks, typeof(*block), link);
-			list_move_tail(&block->link, &temp);
-			trim_list = &temp;
-			/*
-			 * Compute the original_size value by subtracting the
-			 * last block size with (aligned size - original size)
-			 */
-			original_size = amdgpu_vram_mgr_block_size(block) - (size - cur_size);
-		}
-
-		mutex_lock(&mgr->lock);
-		drm_buddy_block_trim(mm,
-				     original_size,
-				     trim_list);
-		mutex_unlock(&mgr->lock);
-
-		if (!list_empty(&temp))
-			list_splice_tail(trim_list, &vres->blocks);
-	}
-
 	vres->base.start = 0;
 	list_for_each_entry(block, &vres->blocks, link) {
 		unsigned long start;
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@ -9449,7 +9449,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
 				gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
 					adev, disable_masks[i * 2 + j]);
 			bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev);
-			cu_info->bitmap[i][j] = bitmap;
+			cu_info->bitmap[0][i][j] = bitmap;

 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
 				if (bitmap & mask) {
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@ -6368,7 +6368,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
 			 *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
 			 *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
 			 */
-			cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
+			cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;

 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
 				if (bitmap & mask)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@ -3577,7 +3577,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
 				gfx_v6_0_set_user_cu_inactive_bitmap(
 					adev, disable_masks[i * 2 + j]);
 			bitmap = gfx_v6_0_get_cu_enabled(adev);
-			cu_info->bitmap[i][j] = bitmap;
+			cu_info->bitmap[0][i][j] = bitmap;

 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
 				if (bitmap & mask) {
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@ -5119,7 +5119,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
 				gfx_v7_0_set_user_cu_inactive_bitmap(
 					adev, disable_masks[i * 2 + j]);
 			bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
-			cu_info->bitmap[i][j] = bitmap;
+			cu_info->bitmap[0][i][j] = bitmap;

 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
 				if (bitmap & mask) {
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@ -7121,7 +7121,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 				gfx_v8_0_set_user_cu_inactive_bitmap(
 					adev, disable_masks[i * 2 + j]);
 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
-			cu_info->bitmap[i][j] = bitmap;
+			cu_info->bitmap[0][i][j] = bitmap;

 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
 				if (bitmap & mask) {
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@ -1499,7 +1499,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);

 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
-				if (cu_info->bitmap[i][j] & mask) {
+				if (cu_info->bitmap[0][i][j] & mask) {
 					if (counter == pg_always_on_cu_num)
 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
 					if (counter < always_on_cu_num)
@ -7233,7 +7233,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 			 *    SE6,SH0 --> bitmap[2][1]
 			 *    SE7,SH0 --> bitmap[3][1]
 			 */
-			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
+			cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;

 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
 				if (bitmap & mask) {
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@ -4259,7 +4259,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
 }

 static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
-						 u32 bitmap)
+						 u32 bitmap, int xcc_id)
 {
 	u32 data;

@ -4269,15 +4269,15 @@ static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;

-	WREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG, data);
+	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data);
 }

-static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev)
+static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_id)
 {
 	u32 data, mask;

-	data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_GC_SHADER_ARRAY_CONFIG);
-	data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG);
+	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG);
+	data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG);

 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
@ -4290,7 +4290,7 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev)
 static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
 				 struct amdgpu_cu_info *cu_info)
 {
-	int i, j, k, counter, active_cu_number = 0;
+	int i, j, k, counter, xcc_id, active_cu_number = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 	unsigned disable_masks[4 * 4];

@ -4309,46 +4309,38 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
 				    adev->gfx.config.max_sh_per_se);

 	mutex_lock(&adev->grbm_idx_mutex);
-	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
-		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			mask = 1;
-			ao_bitmap = 0;
-			counter = 0;
-			gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, 0);
-			gfx_v9_4_3_set_user_cu_inactive_bitmap(
-				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
-			bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev);
+	for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
+		for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+			for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+				mask = 1;
+				ao_bitmap = 0;
+				counter = 0;
+				gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
+				gfx_v9_4_3_set_user_cu_inactive_bitmap(
+					adev,
+					disable_masks[i * adev->gfx.config.max_sh_per_se + j],
+					xcc_id);
+				bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev, xcc_id);

-			/*
-			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
-			 * 4x4 size array, and it's usually suitable for Vega
-			 * ASICs which has 4*2 SE/SH layout.
-			 * But for Arcturus, SE/SH layout is changed to 8*1.
-			 * To mostly reduce the impact, we make it compatible
-			 * with current bitmap array as below:
-			 *    SE4,SH0 --> bitmap[0][1]
-			 *    SE5,SH0 --> bitmap[1][1]
-			 *    SE6,SH0 --> bitmap[2][1]
-			 *    SE7,SH0 --> bitmap[3][1]
-			 */
-			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
+				cu_info->bitmap[xcc_id][i][j] = bitmap;

-			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
-				if (bitmap & mask) {
-					if (counter < adev->gfx.config.max_cu_per_sh)
-						ao_bitmap |= mask;
-					counter++;
+				for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+					if (bitmap & mask) {
+						if (counter < adev->gfx.config.max_cu_per_sh)
+							ao_bitmap |= mask;
+						counter++;
+					}
+					mask <<= 1;
 				}
-				mask <<= 1;
+				active_cu_number += counter;
+				if (i < 2 && j < 2)
+					ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+				cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
 			}
-			active_cu_number += counter;
-			if (i < 2 && j < 2)
-				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
-			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
 		}
+		gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+					    xcc_id);
 	}
-	gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
-				    0);
 	mutex_unlock(&adev->grbm_idx_mutex);

 	cu_info->number = active_cu_number;
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
@ -345,6 +345,9 @@ static void nbio_v4_3_init_registers(struct amdgpu_device *adev)
 		data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK;
 		WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data);
 	}
+	if (amdgpu_sriov_vf(adev))
+		adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+			regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }

 static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)
--- a/Show more
+++ b/Show more