From dd08ebf6c3525a7ea2186e636df064ea47281987 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 30 Mar 2023 17:31:57 -0400
Subject: [PATCH] drm/xe: Introduce a new DRM driver for Intel GPUs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Xe, is a new driver for Intel GPUs that supports both integrated and
discrete platforms starting with Tiger Lake (first Intel Xe Architecture).

The code is at a stage where it is already functional and has experimental
support for multiple platforms starting from Tiger Lake, with initial
support implemented in Mesa (for Iris and Anv, our OpenGL and Vulkan
drivers), as well as in NEO (for OpenCL and Level0).

The new Xe driver leverages a lot from i915.

As for display, the intent is to share the display code with the i915
driver so that there is maximum reuse there. But it is not added
in this patch.

This initial work is a collaboration of many people and unfortunately
the big squashed patch won't fully honor the proper credits. But let's
get some git quick stats so we can at least try to preserve some of the
credits:

Co-developed-by: Matthew Brost <matthew.brost@intel.com>
Co-developed-by: Matthew Auld <matthew.auld@intel.com>
Co-developed-by: Matt Roper <matthew.d.roper@intel.com>
Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Co-developed-by: Francois Dugast <francois.dugast@intel.com>
Co-developed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Co-developed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Co-developed-by: Philippe Lecluse <philippe.lecluse@intel.com>
Co-developed-by: Nirmoy Das <nirmoy.das@intel.com>
Co-developed-by: Jani Nikula <jani.nikula@intel.com>
Co-developed-by: José Roberto de Souza <jose.souza@intel.com>
Co-developed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Co-developed-by: Dave Airlie <airlied@redhat.com>
Co-developed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Co-developed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Co-developed-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 Documentation/gpu/drivers.rst                 |    1 +
 Documentation/gpu/xe/index.rst                |   23 +
 Documentation/gpu/xe/xe_cs.rst                |    8 +
 Documentation/gpu/xe/xe_firmware.rst          |   34 +
 Documentation/gpu/xe/xe_gt_mcr.rst            |   13 +
 Documentation/gpu/xe/xe_map.rst               |    8 +
 Documentation/gpu/xe/xe_migrate.rst           |    8 +
 Documentation/gpu/xe/xe_mm.rst                |   14 +
 Documentation/gpu/xe/xe_pcode.rst             |   14 +
 Documentation/gpu/xe/xe_pm.rst                |   14 +
 Documentation/gpu/xe/xe_rtp.rst               |   20 +
 Documentation/gpu/xe/xe_wa.rst                |   14 +
 drivers/gpu/drm/Kconfig                       |    2 +
 drivers/gpu/drm/Makefile                      |    1 +
 drivers/gpu/drm/xe/.gitignore                 |    2 +
 drivers/gpu/drm/xe/Kconfig                    |   63 +
 drivers/gpu/drm/xe/Kconfig.debug              |   96 +
 drivers/gpu/drm/xe/Makefile                   |  121 +
 drivers/gpu/drm/xe/abi/guc_actions_abi.h      |  219 ++
 drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h |  249 ++
 .../drm/xe/abi/guc_communication_ctb_abi.h    |  189 +
 .../drm/xe/abi/guc_communication_mmio_abi.h   |   49 +
 drivers/gpu/drm/xe/abi/guc_errors_abi.h       |   37 +
 drivers/gpu/drm/xe/abi/guc_klvs_abi.h         |  322 ++
 drivers/gpu/drm/xe/abi/guc_messages_abi.h     |  234 ++
 drivers/gpu/drm/xe/tests/Makefile             |    4 +
 drivers/gpu/drm/xe/tests/xe_bo.c              |  303 ++
 drivers/gpu/drm/xe/tests/xe_bo_test.c         |   25 +
 drivers/gpu/drm/xe/tests/xe_dma_buf.c         |  259 ++
 drivers/gpu/drm/xe/tests/xe_dma_buf_test.c    |   23 +
 drivers/gpu/drm/xe/tests/xe_migrate.c         |  378 ++
 drivers/gpu/drm/xe/tests/xe_migrate_test.c    |   23 +
 drivers/gpu/drm/xe/tests/xe_test.h            |   66 +
 drivers/gpu/drm/xe/xe_bb.c                    |   97 +
 drivers/gpu/drm/xe/xe_bb.h                    |   27 +
 drivers/gpu/drm/xe/xe_bb_types.h              |   20 +
 drivers/gpu/drm/xe/xe_bo.c                    | 1698 ++++++++
 drivers/gpu/drm/xe/xe_bo.h                    |  290 ++
 drivers/gpu/drm/xe/xe_bo_doc.h                |  179 +
 drivers/gpu/drm/xe/xe_bo_evict.c              |  225 ++
 drivers/gpu/drm/xe/xe_bo_evict.h              |   15 +
 drivers/gpu/drm/xe/xe_bo_types.h              |   73 +
 drivers/gpu/drm/xe/xe_debugfs.c               |  129 +
 drivers/gpu/drm/xe/xe_debugfs.h               |   13 +
 drivers/gpu/drm/xe/xe_device.c                |  359 ++
 drivers/gpu/drm/xe/xe_device.h                |  126 +
 drivers/gpu/drm/xe/xe_device_types.h          |  214 ++
 drivers/gpu/drm/xe/xe_dma_buf.c               |  307 ++
 drivers/gpu/drm/xe/xe_dma_buf.h               |   15 +
 drivers/gpu/drm/xe/xe_drv.h                   |   24 +
 drivers/gpu/drm/xe/xe_engine.c                |  734 ++++
 drivers/gpu/drm/xe/xe_engine.h                |   54 +
 drivers/gpu/drm/xe/xe_engine_types.h          |  208 +
 drivers/gpu/drm/xe/xe_exec.c                  |  390 ++
 drivers/gpu/drm/xe/xe_exec.h                  |   14 +
 drivers/gpu/drm/xe/xe_execlist.c              |  489 +++
 drivers/gpu/drm/xe/xe_execlist.h              |   21 +
 drivers/gpu/drm/xe/xe_execlist_types.h        |   49 +
 drivers/gpu/drm/xe/xe_force_wake.c            |  203 +
 drivers/gpu/drm/xe/xe_force_wake.h            |   40 +
 drivers/gpu/drm/xe/xe_force_wake_types.h      |   84 +
 drivers/gpu/drm/xe/xe_ggtt.c                  |  304 ++
 drivers/gpu/drm/xe/xe_ggtt.h                  |   28 +
 drivers/gpu/drm/xe/xe_ggtt_types.h            |   28 +
 drivers/gpu/drm/xe/xe_gpu_scheduler.c         |  101 +
 drivers/gpu/drm/xe/xe_gpu_scheduler.h         |   73 +
 drivers/gpu/drm/xe/xe_gpu_scheduler_types.h   |   57 +
 drivers/gpu/drm/xe/xe_gt.c                    |  830 ++++
 drivers/gpu/drm/xe/xe_gt.h                    |   64 +
 drivers/gpu/drm/xe/xe_gt_clock.c              |   83 +
 drivers/gpu/drm/xe/xe_gt_clock.h              |   13 +
 drivers/gpu/drm/xe/xe_gt_debugfs.c            |  160 +
 drivers/gpu/drm/xe/xe_gt_debugfs.h            |   13 +
 drivers/gpu/drm/xe/xe_gt_mcr.c                |  552 +++
 drivers/gpu/drm/xe/xe_gt_mcr.h                |   26 +
 drivers/gpu/drm/xe/xe_gt_pagefault.c          |  750 ++++
 drivers/gpu/drm/xe/xe_gt_pagefault.h          |   22 +
 drivers/gpu/drm/xe/xe_gt_sysfs.c              |   55 +
 drivers/gpu/drm/xe/xe_gt_sysfs.h              |   19 +
 drivers/gpu/drm/xe/xe_gt_sysfs_types.h        |   26 +
 drivers/gpu/drm/xe/xe_gt_topology.c           |  144 +
 drivers/gpu/drm/xe/xe_gt_topology.h           |   20 +
 drivers/gpu/drm/xe/xe_gt_types.h              |  320 ++
 drivers/gpu/drm/xe/xe_guc.c                   |  875 +++++
 drivers/gpu/drm/xe/xe_guc.h                   |   57 +
 drivers/gpu/drm/xe/xe_guc_ads.c               |  676 ++++
 drivers/gpu/drm/xe/xe_guc_ads.h               |   17 +
 drivers/gpu/drm/xe/xe_guc_ads_types.h         |   25 +
 drivers/gpu/drm/xe/xe_guc_ct.c                | 1196 ++++++
 drivers/gpu/drm/xe/xe_guc_ct.h                |   62 +
 drivers/gpu/drm/xe/xe_guc_ct_types.h          |   87 +
 drivers/gpu/drm/xe/xe_guc_debugfs.c           |  105 +
 drivers/gpu/drm/xe/xe_guc_debugfs.h           |   14 +
 drivers/gpu/drm/xe/xe_guc_engine_types.h      |   52 +
 drivers/gpu/drm/xe/xe_guc_fwif.h              |  392 ++
 drivers/gpu/drm/xe/xe_guc_hwconfig.c          |  125 +
 drivers/gpu/drm/xe/xe_guc_hwconfig.h          |   17 +
 drivers/gpu/drm/xe/xe_guc_log.c               |  109 +
 drivers/gpu/drm/xe/xe_guc_log.h               |   48 +
 drivers/gpu/drm/xe/xe_guc_log_types.h         |   23 +
 drivers/gpu/drm/xe/xe_guc_pc.c                |  843 ++++
 drivers/gpu/drm/xe/xe_guc_pc.h                |   15 +
 drivers/gpu/drm/xe/xe_guc_pc_types.h          |   34 +
 drivers/gpu/drm/xe/xe_guc_reg.h               |  147 +
 drivers/gpu/drm/xe/xe_guc_submit.c            | 1695 ++++++++
 drivers/gpu/drm/xe/xe_guc_submit.h            |   30 +
 drivers/gpu/drm/xe/xe_guc_types.h             |   71 +
 drivers/gpu/drm/xe/xe_huc.c                   |  131 +
 drivers/gpu/drm/xe/xe_huc.h                   |   19 +
 drivers/gpu/drm/xe/xe_huc_debugfs.c           |   71 +
 drivers/gpu/drm/xe/xe_huc_debugfs.h           |   14 +
 drivers/gpu/drm/xe/xe_huc_types.h             |   19 +
 drivers/gpu/drm/xe/xe_hw_engine.c             |  658 ++++
 drivers/gpu/drm/xe/xe_hw_engine.h             |   27 +
 drivers/gpu/drm/xe/xe_hw_engine_types.h       |  107 +
 drivers/gpu/drm/xe/xe_hw_fence.c              |  230 ++
 drivers/gpu/drm/xe/xe_hw_fence.h              |   27 +
 drivers/gpu/drm/xe/xe_hw_fence_types.h        |   72 +
 drivers/gpu/drm/xe/xe_irq.c                   |  565 +++
 drivers/gpu/drm/xe/xe_irq.h                   |   18 +
 drivers/gpu/drm/xe/xe_lrc.c                   |  841 ++++
 drivers/gpu/drm/xe/xe_lrc.h                   |   50 +
 drivers/gpu/drm/xe/xe_lrc_types.h             |   47 +
 drivers/gpu/drm/xe/xe_macros.h                |   20 +
 drivers/gpu/drm/xe/xe_map.h                   |   93 +
 drivers/gpu/drm/xe/xe_migrate.c               | 1168 ++++++
 drivers/gpu/drm/xe/xe_migrate.h               |   88 +
 drivers/gpu/drm/xe/xe_migrate_doc.h           |   88 +
 drivers/gpu/drm/xe/xe_mmio.c                  |  466 +++
 drivers/gpu/drm/xe/xe_mmio.h                  |  110 +
 drivers/gpu/drm/xe/xe_mocs.c                  |  557 +++
 drivers/gpu/drm/xe/xe_mocs.h                  |   29 +
 drivers/gpu/drm/xe/xe_module.c                |   76 +
 drivers/gpu/drm/xe/xe_module.h                |   13 +
 drivers/gpu/drm/xe/xe_pci.c                   |  651 ++++
 drivers/gpu/drm/xe/xe_pci.h                   |   21 +
 drivers/gpu/drm/xe/xe_pcode.c                 |  296 ++
 drivers/gpu/drm/xe/xe_pcode.h                 |   25 +
 drivers/gpu/drm/xe/xe_pcode_api.h             |   40 +
 drivers/gpu/drm/xe/xe_platform_types.h        |   32 +
 drivers/gpu/drm/xe/xe_pm.c                    |  207 +
 drivers/gpu/drm/xe/xe_pm.h                    |   24 +
 drivers/gpu/drm/xe/xe_preempt_fence.c         |  157 +
 drivers/gpu/drm/xe/xe_preempt_fence.h         |   61 +
 drivers/gpu/drm/xe/xe_preempt_fence_types.h   |   33 +
 drivers/gpu/drm/xe/xe_pt.c                    | 1542 ++++++++
 drivers/gpu/drm/xe/xe_pt.h                    |   54 +
 drivers/gpu/drm/xe/xe_pt_types.h              |   57 +
 drivers/gpu/drm/xe/xe_pt_walk.c               |  160 +
 drivers/gpu/drm/xe/xe_pt_walk.h               |  161 +
 drivers/gpu/drm/xe/xe_query.c                 |  387 ++
 drivers/gpu/drm/xe/xe_query.h                 |   14 +
 drivers/gpu/drm/xe/xe_reg_sr.c                |  248 ++
 drivers/gpu/drm/xe/xe_reg_sr.h                |   28 +
 drivers/gpu/drm/xe/xe_reg_sr_types.h          |   44 +
 drivers/gpu/drm/xe/xe_reg_whitelist.c         |   73 +
 drivers/gpu/drm/xe/xe_reg_whitelist.h         |   13 +
 drivers/gpu/drm/xe/xe_res_cursor.h            |  226 ++
 drivers/gpu/drm/xe/xe_ring_ops.c              |  373 ++
 drivers/gpu/drm/xe/xe_ring_ops.h              |   17 +
 drivers/gpu/drm/xe/xe_ring_ops_types.h        |   22 +
 drivers/gpu/drm/xe/xe_rtp.c                   |  144 +
 drivers/gpu/drm/xe/xe_rtp.h                   |  340 ++
 drivers/gpu/drm/xe/xe_rtp_types.h             |  105 +
 drivers/gpu/drm/xe/xe_sa.c                    |   96 +
 drivers/gpu/drm/xe/xe_sa.h                    |   42 +
 drivers/gpu/drm/xe/xe_sa_types.h              |   19 +
 drivers/gpu/drm/xe/xe_sched_job.c             |  246 ++
 drivers/gpu/drm/xe/xe_sched_job.h             |   76 +
 drivers/gpu/drm/xe/xe_sched_job_types.h       |   46 +
 drivers/gpu/drm/xe/xe_step.c                  |  189 +
 drivers/gpu/drm/xe/xe_step.h                  |   18 +
 drivers/gpu/drm/xe/xe_step_types.h            |   51 +
 drivers/gpu/drm/xe/xe_sync.c                  |  276 ++
 drivers/gpu/drm/xe/xe_sync.h                  |   27 +
 drivers/gpu/drm/xe/xe_sync_types.h            |   27 +
 drivers/gpu/drm/xe/xe_trace.c                 |    9 +
 drivers/gpu/drm/xe/xe_trace.h                 |  513 +++
 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c           |  130 +
 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h           |   16 +
 drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h     |   18 +
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c          |  403 ++
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.h          |   41 +
 drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h    |   44 +
 drivers/gpu/drm/xe/xe_tuning.c                |   39 +
 drivers/gpu/drm/xe/xe_tuning.h                |   13 +
 drivers/gpu/drm/xe/xe_uc.c                    |  226 ++
 drivers/gpu/drm/xe/xe_uc.h                    |   21 +
 drivers/gpu/drm/xe/xe_uc_debugfs.c            |   26 +
 drivers/gpu/drm/xe/xe_uc_debugfs.h            |   14 +
 drivers/gpu/drm/xe/xe_uc_fw.c                 |  406 ++
 drivers/gpu/drm/xe/xe_uc_fw.h                 |  180 +
 drivers/gpu/drm/xe/xe_uc_fw_abi.h             |   81 +
 drivers/gpu/drm/xe/xe_uc_fw_types.h           |  112 +
 drivers/gpu/drm/xe/xe_uc_types.h              |   25 +
 drivers/gpu/drm/xe/xe_vm.c                    | 3407 +++++++++++++++++
 drivers/gpu/drm/xe/xe_vm.h                    |  141 +
 drivers/gpu/drm/xe/xe_vm_doc.h                |  555 +++
 drivers/gpu/drm/xe/xe_vm_madvise.c            |  347 ++
 drivers/gpu/drm/xe/xe_vm_madvise.h            |   15 +
 drivers/gpu/drm/xe/xe_vm_types.h              |  337 ++
 drivers/gpu/drm/xe/xe_wa.c                    |  326 ++
 drivers/gpu/drm/xe/xe_wa.h                    |   18 +
 drivers/gpu/drm/xe/xe_wait_user_fence.c       |  202 +
 drivers/gpu/drm/xe/xe_wait_user_fence.h       |   15 +
 drivers/gpu/drm/xe/xe_wopcm.c                 |  263 ++
 drivers/gpu/drm/xe/xe_wopcm.h                 |   16 +
 drivers/gpu/drm/xe/xe_wopcm_types.h           |   26 +
 include/drm/xe_pciids.h                       |  195 +
 include/uapi/drm/xe_drm.h                     |  787 ++++
 210 files changed, 40575 insertions(+)
 create mode 100644 Documentation/gpu/xe/index.rst
 create mode 100644 Documentation/gpu/xe/xe_cs.rst
 create mode 100644 Documentation/gpu/xe/xe_firmware.rst
 create mode 100644 Documentation/gpu/xe/xe_gt_mcr.rst
 create mode 100644 Documentation/gpu/xe/xe_map.rst
 create mode 100644 Documentation/gpu/xe/xe_migrate.rst
 create mode 100644 Documentation/gpu/xe/xe_mm.rst
 create mode 100644 Documentation/gpu/xe/xe_pcode.rst
 create mode 100644 Documentation/gpu/xe/xe_pm.rst
 create mode 100644 Documentation/gpu/xe/xe_rtp.rst
 create mode 100644 Documentation/gpu/xe/xe_wa.rst
 create mode 100644 drivers/gpu/drm/xe/.gitignore
 create mode 100644 drivers/gpu/drm/xe/Kconfig
 create mode 100644 drivers/gpu/drm/xe/Kconfig.debug
 create mode 100644 drivers/gpu/drm/xe/Makefile
 create mode 100644 drivers/gpu/drm/xe/abi/guc_actions_abi.h
 create mode 100644 drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
 create mode 100644 drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
 create mode 100644 drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h
 create mode 100644 drivers/gpu/drm/xe/abi/guc_errors_abi.h
 create mode 100644 drivers/gpu/drm/xe/abi/guc_klvs_abi.h
 create mode 100644 drivers/gpu/drm/xe/abi/guc_messages_abi.h
 create mode 100644 drivers/gpu/drm/xe/tests/Makefile
 create mode 100644 drivers/gpu/drm/xe/tests/xe_bo.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_bo_test.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_dma_buf.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_migrate.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_migrate_test.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_test.h
 create mode 100644 drivers/gpu/drm/xe/xe_bb.c
 create mode 100644 drivers/gpu/drm/xe/xe_bb.h
 create mode 100644 drivers/gpu/drm/xe/xe_bb_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_bo.c
 create mode 100644 drivers/gpu/drm/xe/xe_bo.h
 create mode 100644 drivers/gpu/drm/xe/xe_bo_doc.h
 create mode 100644 drivers/gpu/drm/xe/xe_bo_evict.c
 create mode 100644 drivers/gpu/drm/xe/xe_bo_evict.h
 create mode 100644 drivers/gpu/drm/xe/xe_bo_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_debugfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_debugfs.h
 create mode 100644 drivers/gpu/drm/xe/xe_device.c
 create mode 100644 drivers/gpu/drm/xe/xe_device.h
 create mode 100644 drivers/gpu/drm/xe/xe_device_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_dma_buf.c
 create mode 100644 drivers/gpu/drm/xe/xe_dma_buf.h
 create mode 100644 drivers/gpu/drm/xe/xe_drv.h
 create mode 100644 drivers/gpu/drm/xe/xe_engine.c
 create mode 100644 drivers/gpu/drm/xe/xe_engine.h
 create mode 100644 drivers/gpu/drm/xe/xe_engine_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_exec.c
 create mode 100644 drivers/gpu/drm/xe/xe_exec.h
 create mode 100644 drivers/gpu/drm/xe/xe_execlist.c
 create mode 100644 drivers/gpu/drm/xe/xe_execlist.h
 create mode 100644 drivers/gpu/drm/xe/xe_execlist_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_force_wake.c
 create mode 100644 drivers/gpu/drm/xe/xe_force_wake.h
 create mode 100644 drivers/gpu/drm/xe/xe_force_wake_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_ggtt.c
 create mode 100644 drivers/gpu/drm/xe/xe_ggtt.h
 create mode 100644 drivers/gpu/drm/xe/xe_ggtt_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_gpu_scheduler.c
 create mode 100644 drivers/gpu/drm/xe/xe_gpu_scheduler.h
 create mode 100644 drivers/gpu/drm/xe/xe_gpu_scheduler_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_clock.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_clock.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_debugfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_debugfs.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_mcr.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_mcr.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_pagefault.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_pagefault.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sysfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sysfs.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sysfs_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_topology.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_topology.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_ads.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc_ads.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_ads_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_ct.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc_ct.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_ct_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_debugfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc_debugfs.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_engine_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_fwif.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_hwconfig.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc_hwconfig.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_log.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc_log.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_log_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_pc.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc_pc.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_pc_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_reg.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_submit.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc_submit.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_huc.c
 create mode 100644 drivers/gpu/drm/xe/xe_huc.h
 create mode 100644 drivers/gpu/drm/xe/xe_huc_debugfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_huc_debugfs.h
 create mode 100644 drivers/gpu/drm/xe/xe_huc_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_hw_engine.c
 create mode 100644 drivers/gpu/drm/xe/xe_hw_engine.h
 create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_hw_fence.c
 create mode 100644 drivers/gpu/drm/xe/xe_hw_fence.h
 create mode 100644 drivers/gpu/drm/xe/xe_hw_fence_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_irq.c
 create mode 100644 drivers/gpu/drm/xe/xe_irq.h
 create mode 100644 drivers/gpu/drm/xe/xe_lrc.c
 create mode 100644 drivers/gpu/drm/xe/xe_lrc.h
 create mode 100644 drivers/gpu/drm/xe/xe_lrc_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_macros.h
 create mode 100644 drivers/gpu/drm/xe/xe_map.h
 create mode 100644 drivers/gpu/drm/xe/xe_migrate.c
 create mode 100644 drivers/gpu/drm/xe/xe_migrate.h
 create mode 100644 drivers/gpu/drm/xe/xe_migrate_doc.h
 create mode 100644 drivers/gpu/drm/xe/xe_mmio.c
 create mode 100644 drivers/gpu/drm/xe/xe_mmio.h
 create mode 100644 drivers/gpu/drm/xe/xe_mocs.c
 create mode 100644 drivers/gpu/drm/xe/xe_mocs.h
 create mode 100644 drivers/gpu/drm/xe/xe_module.c
 create mode 100644 drivers/gpu/drm/xe/xe_module.h
 create mode 100644 drivers/gpu/drm/xe/xe_pci.c
 create mode 100644 drivers/gpu/drm/xe/xe_pci.h
 create mode 100644 drivers/gpu/drm/xe/xe_pcode.c
 create mode 100644 drivers/gpu/drm/xe/xe_pcode.h
 create mode 100644 drivers/gpu/drm/xe/xe_pcode_api.h
 create mode 100644 drivers/gpu/drm/xe/xe_platform_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_pm.c
 create mode 100644 drivers/gpu/drm/xe/xe_pm.h
 create mode 100644 drivers/gpu/drm/xe/xe_preempt_fence.c
 create mode 100644 drivers/gpu/drm/xe/xe_preempt_fence.h
 create mode 100644 drivers/gpu/drm/xe/xe_preempt_fence_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_pt.c
 create mode 100644 drivers/gpu/drm/xe/xe_pt.h
 create mode 100644 drivers/gpu/drm/xe/xe_pt_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_pt_walk.c
 create mode 100644 drivers/gpu/drm/xe/xe_pt_walk.h
 create mode 100644 drivers/gpu/drm/xe/xe_query.c
 create mode 100644 drivers/gpu/drm/xe/xe_query.h
 create mode 100644 drivers/gpu/drm/xe/xe_reg_sr.c
 create mode 100644 drivers/gpu/drm/xe/xe_reg_sr.h
 create mode 100644 drivers/gpu/drm/xe/xe_reg_sr_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_reg_whitelist.c
 create mode 100644 drivers/gpu/drm/xe/xe_reg_whitelist.h
 create mode 100644 drivers/gpu/drm/xe/xe_res_cursor.h
 create mode 100644 drivers/gpu/drm/xe/xe_ring_ops.c
 create mode 100644 drivers/gpu/drm/xe/xe_ring_ops.h
 create mode 100644 drivers/gpu/drm/xe/xe_ring_ops_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_rtp.c
 create mode 100644 drivers/gpu/drm/xe/xe_rtp.h
 create mode 100644 drivers/gpu/drm/xe/xe_rtp_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_sa.c
 create mode 100644 drivers/gpu/drm/xe/xe_sa.h
 create mode 100644 drivers/gpu/drm/xe/xe_sa_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_sched_job.c
 create mode 100644 drivers/gpu/drm/xe/xe_sched_job.h
 create mode 100644 drivers/gpu/drm/xe/xe_sched_job_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_step.c
 create mode 100644 drivers/gpu/drm/xe/xe_step.h
 create mode 100644 drivers/gpu/drm/xe/xe_step_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_sync.c
 create mode 100644 drivers/gpu/drm/xe/xe_sync.h
 create mode 100644 drivers/gpu/drm/xe/xe_sync_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_trace.c
 create mode 100644 drivers/gpu/drm/xe/xe_trace.h
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_tuning.c
 create mode 100644 drivers/gpu/drm/xe/xe_tuning.h
 create mode 100644 drivers/gpu/drm/xe/xe_uc.c
 create mode 100644 drivers/gpu/drm/xe/xe_uc.h
 create mode 100644 drivers/gpu/drm/xe/xe_uc_debugfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_uc_debugfs.h
 create mode 100644 drivers/gpu/drm/xe/xe_uc_fw.c
 create mode 100644 drivers/gpu/drm/xe/xe_uc_fw.h
 create mode 100644 drivers/gpu/drm/xe/xe_uc_fw_abi.h
 create mode 100644 drivers/gpu/drm/xe/xe_uc_fw_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_uc_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_vm.c
 create mode 100644 drivers/gpu/drm/xe/xe_vm.h
 create mode 100644 drivers/gpu/drm/xe/xe_vm_doc.h
 create mode 100644 drivers/gpu/drm/xe/xe_vm_madvise.c
 create mode 100644 drivers/gpu/drm/xe/xe_vm_madvise.h
 create mode 100644 drivers/gpu/drm/xe/xe_vm_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_wa.c
 create mode 100644 drivers/gpu/drm/xe/xe_wa.h
 create mode 100644 drivers/gpu/drm/xe/xe_wait_user_fence.c
 create mode 100644 drivers/gpu/drm/xe/xe_wait_user_fence.h
 create mode 100644 drivers/gpu/drm/xe/xe_wopcm.c
 create mode 100644 drivers/gpu/drm/xe/xe_wopcm.h
 create mode 100644 drivers/gpu/drm/xe/xe_wopcm_types.h
 create mode 100644 include/drm/xe_pciids.h
 create mode 100644 include/uapi/drm/xe_drm.h

diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst
index cc6535f5f28c..b899cbc5c2b4 100644
--- a/Documentation/gpu/drivers.rst
+++ b/Documentation/gpu/drivers.rst
@@ -18,6 +18,7 @@ GPU Driver Documentation
    vkms
    bridge/dw-hdmi
    xen-front
+   xe/index
    afbc
    komeda-kms
    panfrost
diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst
new file mode 100644
index 000000000000..2fddf9ed251e
--- /dev/null
+++ b/Documentation/gpu/xe/index.rst
@@ -0,0 +1,23 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=======================
+drm/xe Intel GFX Driver
+=======================
+
+The drm/xe driver supports some future GFX cards with rendering, display,
+compute and media. Support for currently available platforms like TGL, ADL,
+DG2, etc is provided to prototype the driver.
+
+.. toctree::
+   :titlesonly:
+
+   xe_mm
+   xe_map
+   xe_migrate
+   xe_cs
+   xe_pm
+   xe_pcode
+   xe_gt_mcr
+   xe_wa
+   xe_rtp
+   xe_firmware
diff --git a/Documentation/gpu/xe/xe_cs.rst b/Documentation/gpu/xe/xe_cs.rst
new file mode 100644
index 000000000000..e379aed4f5a8
--- /dev/null
+++ b/Documentation/gpu/xe/xe_cs.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+==================
+Command submission
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_exec.c
+   :doc: Execbuf (User GPU command submission)
diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst
new file mode 100644
index 000000000000..c01246ae99f5
--- /dev/null
+++ b/Documentation/gpu/xe/xe_firmware.rst
@@ -0,0 +1,34 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+========
+Firmware
+========
+
+Firmware Layout
+===============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h
+   :doc: Firmware Layout
+
+Write Once Protected Content Memory (WOPCM) Layout
+==================================================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_wopcm.c
+   :doc: Write Once Protected Content Memory (WOPCM) Layout
+
+GuC CTB Blob
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_ct.c
+   :doc: GuC CTB Blob
+
+GuC Power Conservation (PC)
+===========================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_pc.c
+   :doc: GuC Power Conservation (PC)
+
+Internal API
+============
+
+TODO
diff --git a/Documentation/gpu/xe/xe_gt_mcr.rst b/Documentation/gpu/xe/xe_gt_mcr.rst
new file mode 100644
index 000000000000..848c07bc36d0
--- /dev/null
+++ b/Documentation/gpu/xe/xe_gt_mcr.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+==============================================
+GT Multicast/Replicated (MCR) Register Support
+==============================================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_mcr.c
+   :doc: GT Multicast/Replicated (MCR) Register Support
+
+Internal API
+============
+
+TODO
diff --git a/Documentation/gpu/xe/xe_map.rst b/Documentation/gpu/xe/xe_map.rst
new file mode 100644
index 000000000000..a098cfd2df04
--- /dev/null
+++ b/Documentation/gpu/xe/xe_map.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=========
+Map Layer
+=========
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_map.h
+   :doc: Map layer
diff --git a/Documentation/gpu/xe/xe_migrate.rst b/Documentation/gpu/xe/xe_migrate.rst
new file mode 100644
index 000000000000..f92faec0ac94
--- /dev/null
+++ b/Documentation/gpu/xe/xe_migrate.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=============
+Migrate Layer
+=============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_migrate_doc.h
+   :doc: Migrate Layer
diff --git a/Documentation/gpu/xe/xe_mm.rst b/Documentation/gpu/xe/xe_mm.rst
new file mode 100644
index 000000000000..6c8fd8b4a466
--- /dev/null
+++ b/Documentation/gpu/xe/xe_mm.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=================
+Memory Management
+=================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_bo_doc.h
+   :doc: Buffer Objects (BO)
+
+Pagetable building
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pt.c
+   :doc: Pagetable building
diff --git a/Documentation/gpu/xe/xe_pcode.rst b/Documentation/gpu/xe/xe_pcode.rst
new file mode 100644
index 000000000000..d2e22cc45061
--- /dev/null
+++ b/Documentation/gpu/xe/xe_pcode.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=====
+Pcode
+=====
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c
+   :doc: PCODE
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c
+   :internal:
diff --git a/Documentation/gpu/xe/xe_pm.rst b/Documentation/gpu/xe/xe_pm.rst
new file mode 100644
index 000000000000..6781cdfb24f6
--- /dev/null
+++ b/Documentation/gpu/xe/xe_pm.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+========================
+Runtime Power Management
+========================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c
+   :doc: Xe Power Management
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c
+   :internal:
diff --git a/Documentation/gpu/xe/xe_rtp.rst b/Documentation/gpu/xe/xe_rtp.rst
new file mode 100644
index 000000000000..7fdf4b6c1a04
--- /dev/null
+++ b/Documentation/gpu/xe/xe_rtp.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=========================
+Register Table Processing
+=========================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c
+   :doc: Register Table Processing
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp_types.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c
+   :internal:
diff --git a/Documentation/gpu/xe/xe_wa.rst b/Documentation/gpu/xe/xe_wa.rst
new file mode 100644
index 000000000000..f8811cc6adcc
--- /dev/null
+++ b/Documentation/gpu/xe/xe_wa.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+====================
+Hardware workarounds
+====================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c
+   :doc: Hardware workarounds
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c
+   :internal:
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 31cfe2c2a2af..2520db0b776e 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -276,6 +276,8 @@ source "drivers/gpu/drm/nouveau/Kconfig"
 
 source "drivers/gpu/drm/i915/Kconfig"
 
+source "drivers/gpu/drm/xe/Kconfig"
+
 source "drivers/gpu/drm/kmb/Kconfig"
 
 config DRM_VGEM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 8ac6f4b9546e..104b42df2e95 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -134,6 +134,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
 obj-$(CONFIG_DRM_I915)	+= i915/
+obj-$(CONFIG_DRM_XE)	+= xe/
 obj-$(CONFIG_DRM_KMB_DISPLAY)  += kmb/
 obj-$(CONFIG_DRM_MGAG200) += mgag200/
 obj-$(CONFIG_DRM_V3D)  += v3d/
diff --git a/drivers/gpu/drm/xe/.gitignore b/drivers/gpu/drm/xe/.gitignore
new file mode 100644
index 000000000000..81972dce1aff
--- /dev/null
+++ b/drivers/gpu/drm/xe/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+*.hdrtest
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
new file mode 100644
index 000000000000..62f54e6d62d9
--- /dev/null
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config DRM_XE
+	tristate "Intel Xe Graphics"
+	depends on DRM && PCI && MMU
+	select INTERVAL_TREE
+	# we need shmfs for the swappable backing store, and in particular
+	# the shmem_readpage() which depends upon tmpfs
+	select SHMEM
+	select TMPFS
+	select DRM_BUDDY
+	select DRM_KMS_HELPER
+	select DRM_PANEL
+	select DRM_SUBALLOC_HELPER
+	select RELAY
+	select IRQ_WORK
+	select SYNC_FILE
+	select IOSF_MBI
+	select CRC32
+	select SND_HDA_I915 if SND_HDA_CORE
+	select CEC_CORE if CEC_NOTIFIER
+	select VMAP_PFN
+	select DRM_TTM
+	select DRM_TTM_HELPER
+	select DRM_SCHED
+	select MMU_NOTIFIER
+	help
+	  Experimental driver for Intel Xe series GPUs
+
+	  If "M" is selected, the module will be called xe.
+
+config DRM_XE_FORCE_PROBE
+	string "Force probe xe for selected Intel hardware IDs"
+	depends on DRM_XE
+	help
+	  This is the default value for the xe.force_probe module
+	  parameter. Using the module parameter overrides this option.
+
+	  Force probe the xe for Intel graphics devices that are
+	  recognized but not properly supported by this kernel version. It is
+	  recommended to upgrade to a kernel version with proper support as soon
+	  as it is available.
+
+	  It can also be used to block the probe of recognized and fully
+	  supported devices.
+
+	  Use "" to disable force probe. If in doubt, use this.
+
+	  Use "<pci-id>[,<pci-id>,...]" to force probe the xe for listed
+	  devices. For example, "4500" or "4500,4571".
+
+	  Use "*" to force probe the driver for all known devices.
+
+	  Use "!" right before the ID to block the probe of the device. For
+	  example, "4500,!4571" forces the probe of 4500 and blocks the probe of
+	  4571.
+
+	  Use "!*" to block the probe of the driver for all known devices.
+
+menu "drm/Xe Debugging"
+depends on DRM_XE
+depends on EXPERT
+source "drivers/gpu/drm/xe/Kconfig.debug"
+endmenu
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
new file mode 100644
index 000000000000..b61fd43a76fe
--- /dev/null
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config DRM_XE_WERROR
+	bool "Force GCC to throw an error instead of a warning when compiling"
+	# As this may inadvertently break the build, only allow the user
+	# to shoot oneself in the foot iff they aim really hard
+	depends on EXPERT
+	# We use the dependency on !COMPILE_TEST to not be enabled in
+	# allmodconfig or allyesconfig configurations
+	depends on !COMPILE_TEST
+	default n
+	help
+	  Add -Werror to the build flags for (and only for) xe.ko.
+	  Do not enable this unless you are writing code for the xe.ko module.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_DEBUG
+	bool "Enable additional driver debugging"
+	depends on DRM_XE
+	depends on EXPERT
+	depends on !COMPILE_TEST
+	default n
+	help
+	  Choose this option to turn on extra driver debugging that may affect
+	  performance but will catch some internal issues.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_DEBUG_VM
+	bool "Enable extra VM debugging info"
+	default n
+	help
+	  Enable extra VM debugging info
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_DEBUG_MEM
+	bool "Enable passing SYS/LMEM addresses to user space"
+	default n
+	help
+	  Pass object location trough uapi. Intended for extended
+	  testing and development only.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_SIMPLE_ERROR_CAPTURE
+	bool "Enable simple error capture to dmesg on job timeout"
+	default n
+	help
+	  Choose this option when debugging an unexpected job timeout
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_KUNIT_TEST
+        tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS
+	depends on DRM_XE && KUNIT
+	default KUNIT_ALL_TESTS
+	select DRM_EXPORT_FOR_TESTS if m
+	help
+	  Choose this option to allow the driver to perform selftests under
+	  the kunit framework
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_LARGE_GUC_BUFFER
+        bool "Enable larger guc log buffer"
+        default n
+        help
+          Choose this option when debugging guc issues.
+          Buffer should be large enough for complex issues.
+
+          Recommended for driver developers only.
+
+          If in doubt, say "N".
+
+config DRM_XE_USERPTR_INVAL_INJECT
+       bool "Inject userptr invalidation -EINVAL errors"
+       default n
+       help
+         Choose this option when debugging error paths that
+	 are hit during checks for userptr invalidations.
+
+	 Recomended for driver developers only.
+	 If in doubt, say "N".
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
new file mode 100644
index 000000000000..228a87f2fe7b
--- /dev/null
+++ b/drivers/gpu/drm/xe/Makefile
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the drm device driver.  This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+# Add a set of useful warning flags and enable -Werror for CI to prevent
+# trivial mistakes from creeping in. We have to do this piecemeal as we reject
+# any patch that isn't warning clean, so turning on -Wall -Wextra (or W=1) we
+# need to filter out dubious warnings.  Still it is our interest
+# to keep running locally with W=1 C=1 until we are completely clean.
+#
+# Note the danger in using -Wall -Wextra is that when CI updates gcc we
+# will most likely get a sudden build breakage... Hopefully we will fix
+# new warnings before CI updates!
+subdir-ccflags-y := -Wall -Wextra
+# making these call cc-disable-warning breaks when trying to build xe.mod.o
+# by calling make M=drivers/gpu/drm/xe. This doesn't happen in upstream tree,
+# so it was somehow fixed by the changes in the build system. Move it back to
+# $(call cc-disable-warning, ...) after rebase.
+subdir-ccflags-y += -Wno-unused-parameter
+subdir-ccflags-y += -Wno-type-limits
+#subdir-ccflags-y += $(call cc-disable-warning, unused-parameter)
+#subdir-ccflags-y += $(call cc-disable-warning, type-limits)
+subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers)
+subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable)
+# clang warnings
+subdir-ccflags-y += $(call cc-disable-warning, sign-compare)
+subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized)
+subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides)
+subdir-ccflags-y += $(call cc-disable-warning, frame-address)
+subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror
+
+# Fine grained warnings disable
+CFLAGS_xe_pci.o = $(call cc-disable-warning, override-init)
+
+subdir-ccflags-y += -I$(srctree)/$(src)
+
+# Please keep these build lists sorted!
+
+# core driver code
+
+xe-y += xe_bb.o \
+	xe_bo.o \
+	xe_bo_evict.o \
+	xe_debugfs.o \
+	xe_device.o \
+	xe_dma_buf.o \
+	xe_engine.o \
+	xe_exec.o \
+	xe_execlist.o \
+	xe_force_wake.o \
+	xe_ggtt.o \
+	xe_gpu_scheduler.o \
+	xe_gt.o \
+	xe_gt_clock.o \
+	xe_gt_debugfs.o \
+	xe_gt_mcr.o \
+	xe_gt_pagefault.o \
+	xe_gt_sysfs.o \
+	xe_gt_topology.o \
+	xe_guc.o \
+	xe_guc_ads.o \
+	xe_guc_ct.o \
+	xe_guc_debugfs.o \
+	xe_guc_hwconfig.o \
+	xe_guc_log.o \
+	xe_guc_pc.o \
+	xe_guc_submit.o \
+	xe_hw_engine.o \
+	xe_hw_fence.o \
+	xe_huc.o \
+	xe_huc_debugfs.o \
+	xe_irq.o \
+	xe_lrc.o \
+	xe_migrate.o \
+	xe_mmio.o \
+	xe_mocs.o \
+	xe_module.o \
+	xe_pci.o \
+	xe_pcode.o \
+	xe_pm.o \
+	xe_preempt_fence.o \
+	xe_pt.o \
+	xe_pt_walk.o \
+	xe_query.o \
+	xe_reg_sr.o \
+	xe_reg_whitelist.o \
+	xe_rtp.o \
+	xe_ring_ops.o \
+	xe_sa.o \
+	xe_sched_job.o \
+	xe_step.o \
+	xe_sync.o \
+	xe_trace.o \
+	xe_ttm_gtt_mgr.o \
+	xe_ttm_vram_mgr.o \
+	xe_tuning.o \
+	xe_uc.o \
+	xe_uc_debugfs.o \
+	xe_uc_fw.o \
+	xe_vm.o \
+	xe_vm_madvise.o \
+	xe_wait_user_fence.o \
+	xe_wa.o \
+	xe_wopcm.o
+
+# XXX: Needed for i915 register definitions. Will be removed after xe-regs.
+subdir-ccflags-y += -I$(srctree)/drivers/gpu/drm/i915/
+
+obj-$(CONFIG_DRM_XE) += xe.o
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
+\
+# header test
+always-$(CONFIG_DRM_XE_WERROR) += \
+	$(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h'))
+
+quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
+      cmd_hdrtest = $(CC) -DHDRTEST $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@
+
+$(obj)/%.hdrtest: $(src)/%.h FORCE
+	$(call if_changed_dep,hdrtest)
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
new file mode 100644
index 000000000000..3062e0e0d467
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_ACTIONS_ABI_H
+#define _ABI_GUC_ACTIONS_ABI_H
+
+/**
+ * DOC: HOST2GUC_SELF_CFG
+ *
+ * This message is used by Host KMD to setup of the `GuC Self Config KLVs`_.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SELF_CFG` = 0x0508            |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 | 31:16 | **KLV_KEY** - KLV key, see `GuC Self Config KLVs`_           |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **KLV_LEN** - KLV length                                     |
+ *  |   |       |                                                              |
+ *  |   |       |   - 32 bit KLV = 1                                           |
+ *  |   |       |   - 64 bit KLV = 2                                           |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **VALUE32** - Bits 31-0 of the KLV value                     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 3 |  31:0 | **VALUE64** - Bits 63-32 of the KLV value (**KLV_LEN** = 2)  |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = **NUM** - 1 if KLV was parsed, 0 if not recognized   |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_HOST2GUC_SELF_CFG			0x0508
+
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY		(0xffff << 16)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN		(0xffff << 0)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32		GUC_HXG_REQUEST_MSG_n_DATAn
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64		GUC_HXG_REQUEST_MSG_n_DATAn
+
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_0_NUM		GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/**
+ * DOC: HOST2GUC_CONTROL_CTB
+ *
+ * This H2G action allows Vf Host to enable or disable H2G and G2H `CT Buffer`_.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_CONTROL_CTB` = 0x4509         |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **CONTROL** - control `CTB based communication`_             |
+ *  |   |       |                                                              |
+ *  |   |       |   - _`GUC_CTB_CONTROL_DISABLE` = 0                           |
+ *  |   |       |   - _`GUC_CTB_CONTROL_ENABLE` = 1                            |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = MBZ                                                  |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_HOST2GUC_CONTROL_CTB			0x4509
+
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL	GUC_HXG_REQUEST_MSG_n_DATAn
+#define   GUC_CTB_CONTROL_DISABLE			0u
+#define   GUC_CTB_CONTROL_ENABLE			1u
+
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_0_MBZ		GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/* legacy definitions */
+
+enum xe_guc_action {
+	XE_GUC_ACTION_DEFAULT = 0x0,
+	XE_GUC_ACTION_REQUEST_PREEMPTION = 0x2,
+	XE_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3,
+	XE_GUC_ACTION_ALLOCATE_DOORBELL = 0x10,
+	XE_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20,
+	XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30,
+	XE_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40,
+	XE_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302,
+	XE_GUC_ACTION_ENTER_S_STATE = 0x501,
+	XE_GUC_ACTION_EXIT_S_STATE = 0x502,
+	XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506,
+	XE_GUC_ACTION_SCHED_CONTEXT = 0x1000,
+	XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001,
+	XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
+	XE_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003,
+	XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004,
+	XE_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005,
+	XE_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006,
+	XE_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007,
+	XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
+	XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
+	XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B,
+	XE_GUC_ACTION_SETUP_PC_GUCRC = 0x3004,
+	XE_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+	XE_GUC_ACTION_GET_HWCONFIG = 0x4100,
+	XE_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
+	XE_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
+	XE_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
+	XE_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
+	XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
+	XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
+	XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
+	XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
+	XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000,
+	XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002,
+	XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003,
+	XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY = 0x6004,
+	XE_GUC_ACTION_TLB_INVALIDATION = 0x7000,
+	XE_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001,
+	XE_GUC_ACTION_TLB_INVALIDATION_ALL = 0x7002,
+	XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002,
+	XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
+	XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
+	XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005,
+	XE_GUC_ACTION_LIMIT
+};
+
+enum xe_guc_rc_options {
+	XE_GUCRC_HOST_CONTROL,
+	XE_GUCRC_FIRMWARE_CONTROL,
+};
+
+enum xe_guc_preempt_options {
+	XE_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4,
+	XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8,
+};
+
+enum xe_guc_report_status {
+	XE_GUC_REPORT_STATUS_UNKNOWN = 0x0,
+	XE_GUC_REPORT_STATUS_ACKED = 0x1,
+	XE_GUC_REPORT_STATUS_ERROR = 0x2,
+	XE_GUC_REPORT_STATUS_COMPLETE = 0x4,
+};
+
+enum xe_guc_sleep_state_status {
+	XE_GUC_SLEEP_STATE_SUCCESS = 0x1,
+	XE_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x2,
+	XE_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x3
+#define XE_GUC_SLEEP_STATE_INVALID_MASK 0x80000000
+};
+
+#define GUC_LOG_CONTROL_LOGGING_ENABLED	(1 << 0)
+#define GUC_LOG_CONTROL_VERBOSITY_SHIFT	4
+#define GUC_LOG_CONTROL_VERBOSITY_MASK	(0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT)
+#define GUC_LOG_CONTROL_DEFAULT_LOGGING	(1 << 8)
+
+#define XE_GUC_TLB_INVAL_TYPE_SHIFT 0
+#define XE_GUC_TLB_INVAL_MODE_SHIFT 8
+/* Flush PPC or SMRO caches along with TLB invalidation request */
+#define XE_GUC_TLB_INVAL_FLUSH_CACHE (1 << 31)
+
+enum xe_guc_tlb_invalidation_type {
+	XE_GUC_TLB_INVAL_FULL = 0x0,
+	XE_GUC_TLB_INVAL_PAGE_SELECTIVE = 0x1,
+	XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX = 0x2,
+	XE_GUC_TLB_INVAL_GUC = 0x3,
+};
+
+/*
+ * 0: Heavy mode of Invalidation:
+ * The pipeline of the engine(s) for which the invalidation is targeted to is
+ * blocked, and all the in-flight transactions are guaranteed to be Globally
+ * Observed before completing the TLB invalidation
+ * 1: Lite mode of Invalidation:
+ * TLBs of the targeted engine(s) are immediately invalidated.
+ * In-flight transactions are NOT guaranteed to be Globally Observed before
+ * completing TLB invalidation.
+ * Light Invalidation Mode is to be used only when
+ * it can be guaranteed (by SW) that the address translations remain invariant
+ * for the in-flight transactions across the TLB invalidation. In other words,
+ * this mode can be used when the TLB invalidation is intended to clear out the
+ * stale cached translations that are no longer in use. Light Invalidation Mode
+ * is much faster than the Heavy Invalidation Mode, as it does not wait for the
+ * in-flight transactions to be GOd.
+ */
+enum xe_guc_tlb_inval_mode {
+	XE_GUC_TLB_INVAL_MODE_HEAVY = 0x0,
+	XE_GUC_TLB_INVAL_MODE_LITE = 0x1,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
new file mode 100644
index 000000000000..811add10c30d
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _GUC_ACTIONS_SLPC_ABI_H_
+#define _GUC_ACTIONS_SLPC_ABI_H_
+
+#include <linux/types.h>
+
+/**
+ * DOC: SLPC SHARED DATA STRUCTURE
+ *
+ *  +----+------+--------------------------------------------------------------+
+ *  | CL | Bytes| Description                                                  |
+ *  +====+======+==============================================================+
+ *  | 1  | 0-3  | SHARED DATA SIZE                                             |
+ *  |    +------+--------------------------------------------------------------+
+ *  |    | 4-7  | GLOBAL STATE                                                 |
+ *  |    +------+--------------------------------------------------------------+
+ *  |    | 8-11 | DISPLAY DATA ADDRESS                                         |
+ *  |    +------+--------------------------------------------------------------+
+ *  |    | 12:63| PADDING                                                      |
+ *  +----+------+--------------------------------------------------------------+
+ *  |    | 0:63 | PADDING(PLATFORM INFO)                                       |
+ *  +----+------+--------------------------------------------------------------+
+ *  | 3  | 0-3  | TASK STATE DATA                                              |
+ *  +    +------+--------------------------------------------------------------+
+ *  |    | 4:63 | PADDING                                                      |
+ *  +----+------+--------------------------------------------------------------+
+ *  |4-21|0:1087| OVERRIDE PARAMS AND BIT FIELDS                               |
+ *  +----+------+--------------------------------------------------------------+
+ *  |    |      | PADDING + EXTRA RESERVED PAGE                                |
+ *  +----+------+--------------------------------------------------------------+
+ */
+
+/*
+ * SLPC exposes certain parameters for global configuration by the host.
+ * These are referred to as override parameters, because in most cases
+ * the host will not need to modify the default values used by SLPC.
+ * SLPC remembers the default values which allows the host to easily restore
+ * them by simply unsetting the override. The host can set or unset override
+ * parameters during SLPC (re-)initialization using the SLPC Reset event.
+ * The host can also set or unset override parameters on the fly using the
+ * Parameter Set and Parameter Unset events
+ */
+
+#define SLPC_MAX_OVERRIDE_PARAMETERS		256
+#define SLPC_OVERRIDE_BITFIELD_SIZE \
+		(SLPC_MAX_OVERRIDE_PARAMETERS / 32)
+
+#define SLPC_PAGE_SIZE_BYTES			4096
+#define SLPC_CACHELINE_SIZE_BYTES		64
+#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER	SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO	SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE	SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE	SLPC_PAGE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_MAX		(2 * SLPC_PAGE_SIZE_BYTES)
+
+/*
+ * Cacheline size aligned (Total size needed for
+ * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes)
+ */
+#define SLPC_OVERRIDE_PARAMS_TOTAL_BYTES	(((((SLPC_MAX_OVERRIDE_PARAMETERS * 4) \
+						+ ((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \
+		+ (SLPC_CACHELINE_SIZE_BYTES - 1)) / SLPC_CACHELINE_SIZE_BYTES) * \
+					SLPC_CACHELINE_SIZE_BYTES)
+
+#define SLPC_SHARED_DATA_SIZE_BYTE_OTHER	(SLPC_SHARED_DATA_SIZE_BYTE_MAX - \
+					(SLPC_SHARED_DATA_SIZE_BYTE_HEADER \
+					+ SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO \
+					+ SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE \
+					+ SLPC_OVERRIDE_PARAMS_TOTAL_BYTES \
+					+ SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE))
+
+enum slpc_task_enable {
+	SLPC_PARAM_TASK_DEFAULT = 0,
+	SLPC_PARAM_TASK_ENABLED,
+	SLPC_PARAM_TASK_DISABLED,
+	SLPC_PARAM_TASK_UNKNOWN
+};
+
+enum slpc_global_state {
+	SLPC_GLOBAL_STATE_NOT_RUNNING = 0,
+	SLPC_GLOBAL_STATE_INITIALIZING = 1,
+	SLPC_GLOBAL_STATE_RESETTING = 2,
+	SLPC_GLOBAL_STATE_RUNNING = 3,
+	SLPC_GLOBAL_STATE_SHUTTING_DOWN = 4,
+	SLPC_GLOBAL_STATE_ERROR = 5
+};
+
+enum slpc_param_id {
+	SLPC_PARAM_TASK_ENABLE_GTPERF = 0,
+	SLPC_PARAM_TASK_DISABLE_GTPERF = 1,
+	SLPC_PARAM_TASK_ENABLE_BALANCER = 2,
+	SLPC_PARAM_TASK_DISABLE_BALANCER = 3,
+	SLPC_PARAM_TASK_ENABLE_DCC = 4,
+	SLPC_PARAM_TASK_DISABLE_DCC = 5,
+	SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ = 6,
+	SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ = 7,
+	SLPC_PARAM_GLOBAL_MIN_GT_SLICE_FREQ_MHZ = 8,
+	SLPC_PARAM_GLOBAL_MAX_GT_SLICE_FREQ_MHZ = 9,
+	SLPC_PARAM_GTPERF_THRESHOLD_MAX_FPS = 10,
+	SLPC_PARAM_GLOBAL_DISABLE_GT_FREQ_MANAGEMENT = 11,
+	SLPC_PARAM_GTPERF_ENABLE_FRAMERATE_STALLING = 12,
+	SLPC_PARAM_GLOBAL_DISABLE_RC6_MODE_CHANGE = 13,
+	SLPC_PARAM_GLOBAL_OC_UNSLICE_FREQ_MHZ = 14,
+	SLPC_PARAM_GLOBAL_OC_SLICE_FREQ_MHZ = 15,
+	SLPC_PARAM_GLOBAL_ENABLE_IA_GT_BALANCING = 16,
+	SLPC_PARAM_GLOBAL_ENABLE_ADAPTIVE_BURST_TURBO = 17,
+	SLPC_PARAM_GLOBAL_ENABLE_EVAL_MODE = 18,
+	SLPC_PARAM_GLOBAL_ENABLE_BALANCER_IN_NON_GAMING_MODE = 19,
+	SLPC_PARAM_GLOBAL_RT_MODE_TURBO_FREQ_DELTA_MHZ = 20,
+	SLPC_PARAM_PWRGATE_RC_MODE = 21,
+	SLPC_PARAM_EDR_MODE_COMPUTE_TIMEOUT_MS = 22,
+	SLPC_PARAM_EDR_QOS_FREQ_MHZ = 23,
+	SLPC_PARAM_MEDIA_FF_RATIO_MODE = 24,
+	SLPC_PARAM_ENABLE_IA_FREQ_LIMITING = 25,
+	SLPC_PARAM_STRATEGIES = 26,
+	SLPC_PARAM_POWER_PROFILE = 27,
+	SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY = 28,
+	SLPC_MAX_PARAM = 32,
+};
+
+enum slpc_media_ratio_mode {
+	SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL = 0,
+	SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE = 1,
+	SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2,
+};
+
+enum slpc_gucrc_mode {
+	SLPC_GUCRC_MODE_HW = 0,
+	SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1,
+	SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2,
+	SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3,
+
+	SLPC_GUCRC_MODE_MAX,
+};
+
+enum slpc_event_id {
+	SLPC_EVENT_RESET = 0,
+	SLPC_EVENT_SHUTDOWN = 1,
+	SLPC_EVENT_PLATFORM_INFO_CHANGE = 2,
+	SLPC_EVENT_DISPLAY_MODE_CHANGE = 3,
+	SLPC_EVENT_FLIP_COMPLETE = 4,
+	SLPC_EVENT_QUERY_TASK_STATE = 5,
+	SLPC_EVENT_PARAMETER_SET = 6,
+	SLPC_EVENT_PARAMETER_UNSET = 7,
+};
+
+struct slpc_task_state_data {
+	union {
+		u32 task_status_padding;
+		struct {
+			u32 status;
+#define SLPC_GTPERF_TASK_ENABLED	REG_BIT(0)
+#define SLPC_DCC_TASK_ENABLED		REG_BIT(11)
+#define SLPC_IN_DCC			REG_BIT(12)
+#define SLPC_BALANCER_ENABLED		REG_BIT(15)
+#define SLPC_IBC_TASK_ENABLED		REG_BIT(16)
+#define SLPC_BALANCER_IA_LMT_ENABLED	REG_BIT(17)
+#define SLPC_BALANCER_IA_LMT_ACTIVE	REG_BIT(18)
+		};
+	};
+	union {
+		u32 freq_padding;
+		struct {
+#define SLPC_MAX_UNSLICE_FREQ_MASK	REG_GENMASK(7, 0)
+#define SLPC_MIN_UNSLICE_FREQ_MASK	REG_GENMASK(15, 8)
+#define SLPC_MAX_SLICE_FREQ_MASK	REG_GENMASK(23, 16)
+#define SLPC_MIN_SLICE_FREQ_MASK	REG_GENMASK(31, 24)
+			u32 freq;
+		};
+	};
+} __packed;
+
+struct slpc_shared_data_header {
+	/* Total size in bytes of this shared buffer. */
+	u32 size;
+	u32 global_state;
+	u32 display_data_addr;
+} __packed;
+
+struct slpc_override_params {
+	u32 bits[SLPC_OVERRIDE_BITFIELD_SIZE];
+	u32 values[SLPC_MAX_OVERRIDE_PARAMETERS];
+} __packed;
+
+struct slpc_shared_data {
+	struct slpc_shared_data_header header;
+	u8 shared_data_header_pad[SLPC_SHARED_DATA_SIZE_BYTE_HEADER -
+				sizeof(struct slpc_shared_data_header)];
+
+	u8 platform_info_pad[SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO];
+
+	struct slpc_task_state_data task_state_data;
+	u8 task_state_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE -
+				sizeof(struct slpc_task_state_data)];
+
+	struct slpc_override_params override_params;
+	u8 override_params_pad[SLPC_OVERRIDE_PARAMS_TOTAL_BYTES -
+				sizeof(struct slpc_override_params)];
+
+	u8 shared_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_OTHER];
+
+	/* PAGE 2 (4096 bytes), mode based parameter will be removed soon */
+	u8 reserved_mode_definition[4096];
+} __packed;
+
+/**
+ * DOC: SLPC H2G MESSAGE FORMAT
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_PC_SLPM_REQUEST` = 0x3003     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:8 | **EVENT_ID**                                                 |
+ *  +   +-------+--------------------------------------------------------------+
+ *  |   |   7:0 | **EVENT_ARGC** - number of data arguments                    |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **EVENT_DATA1**                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |...|  31:0 | ...                                                          |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |2+n|  31:0 | **EVENT_DATAn**                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST		0x3003
+
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_MIN_LEN \
+				(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS		9
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_MAX_LEN \
+		(HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \
+			HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID		(0xff << 8)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC	(0xff << 0)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N	GUC_HXG_REQUEST_MSG_n_DATAn
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
new file mode 100644
index 000000000000..41244055cc0c
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_COMMUNICATION_CTB_ABI_H
+#define _ABI_GUC_COMMUNICATION_CTB_ABI_H
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+#include "guc_messages_abi.h"
+
+/**
+ * DOC: CT Buffer
+ *
+ * Circular buffer used to send `CTB Message`_
+ */
+
+/**
+ * DOC: CTB Descriptor
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |  31:0 | **HEAD** - offset (in dwords) to the last dword that was     |
+ *  |   |       | read from the `CT Buffer`_.                                  |
+ *  |   |       | It can only be updated by the receiver.                      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **TAIL** - offset (in dwords) to the last dword that was     |
+ *  |   |       | written to the `CT Buffer`_.                                 |
+ *  |   |       | It can only be updated by the sender.                        |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **STATUS** - status of the CTB                               |
+ *  |   |       |                                                              |
+ *  |   |       |   - _`GUC_CTB_STATUS_NO_ERROR` = 0 (normal operation)        |
+ *  |   |       |   - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large)     |
+ *  |   |       |   - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message)      |
+ *  |   |       |   - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified)      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |...|       | RESERVED = MBZ                                               |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 15|  31:0 | RESERVED = MBZ                                               |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+struct guc_ct_buffer_desc {
+	u32 head;
+	u32 tail;
+	u32 status;
+#define GUC_CTB_STATUS_NO_ERROR				0
+#define GUC_CTB_STATUS_OVERFLOW				(1 << 0)
+#define GUC_CTB_STATUS_UNDERFLOW			(1 << 1)
+#define GUC_CTB_STATUS_MISMATCH				(1 << 2)
+	u32 reserved[13];
+} __packed;
+static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
+
+/**
+ * DOC: CTB Message
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 | 31:16 | **FENCE** - message identifier                               |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 15:12 | **FORMAT** - format of the CTB message                       |
+ *  |   |       |  - _`GUC_CTB_FORMAT_HXG` = 0 - see `CTB HXG Message`_        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  11:8 | **RESERVED**                                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |   7:0 | **NUM_DWORDS** - length of the CTB message (w/o header)      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | optional (depends on FORMAT)                                 |
+ *  +---+-------+                                                              |
+ *  |...|       |                                                              |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_CTB_HDR_LEN				1u
+#define GUC_CTB_MSG_MIN_LEN			GUC_CTB_HDR_LEN
+#define GUC_CTB_MSG_MAX_LEN			256u
+#define GUC_CTB_MSG_0_FENCE			(0xffff << 16)
+#define GUC_CTB_MSG_0_FORMAT			(0xf << 12)
+#define   GUC_CTB_FORMAT_HXG			0u
+#define GUC_CTB_MSG_0_RESERVED			(0xf << 8)
+#define GUC_CTB_MSG_0_NUM_DWORDS		(0xff << 0)
+
+/**
+ * DOC: CTB HXG Message
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 | 31:16 | FENCE                                                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 15:12 | FORMAT = GUC_CTB_FORMAT_HXG_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  11:8 | RESERVED = MBZ                                               |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |   7:0 | NUM_DWORDS = length (in dwords) of the embedded HXG message  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | [Embedded `HXG Message`_]                                    |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_CTB_HXG_MSG_MIN_LEN		(GUC_CTB_MSG_MIN_LEN + GUC_HXG_MSG_MIN_LEN)
+#define GUC_CTB_HXG_MSG_MAX_LEN		GUC_CTB_MSG_MAX_LEN
+
+/**
+ * DOC: CTB based communication
+ *
+ * The CTB (command transport buffer) communication between Host and GuC
+ * is based on u32 data stream written to the shared buffer. One buffer can
+ * be used to transmit data only in one direction (one-directional channel).
+ *
+ * Current status of the each buffer is stored in the buffer descriptor.
+ * Buffer descriptor holds tail and head fields that represents active data
+ * stream. The tail field is updated by the data producer (sender), and head
+ * field is updated by the data consumer (receiver)::
+ *
+ *      +------------+
+ *      | DESCRIPTOR |          +=================+============+========+
+ *      +============+          |                 | MESSAGE(s) |        |
+ *      | address    |--------->+=================+============+========+
+ *      +------------+
+ *      | head       |          ^-----head--------^
+ *      +------------+
+ *      | tail       |          ^---------tail-----------------^
+ *      +------------+
+ *      | size       |          ^---------------size--------------------^
+ *      +------------+
+ *
+ * Each message in data stream starts with the single u32 treated as a header,
+ * followed by optional set of u32 data that makes message specific payload::
+ *
+ *      +------------+---------+---------+---------+
+ *      |         MESSAGE                          |
+ *      +------------+---------+---------+---------+
+ *      |   msg[0]   |   [1]   |   ...   |  [n-1]  |
+ *      +------------+---------+---------+---------+
+ *      |   MESSAGE  |       MESSAGE PAYLOAD       |
+ *      +   HEADER   +---------+---------+---------+
+ *      |            |    0    |   ...   |    n    |
+ *      +======+=====+=========+=========+=========+
+ *      | 31:16| code|         |         |         |
+ *      +------+-----+         |         |         |
+ *      |  15:5|flags|         |         |         |
+ *      +------+-----+         |         |         |
+ *      |   4:0|  len|         |         |         |
+ *      +------+-----+---------+---------+---------+
+ *
+ *                   ^-------------len-------------^
+ *
+ * The message header consists of:
+ *
+ * - **len**, indicates length of the message payload (in u32)
+ * - **code**, indicates message code
+ * - **flags**, holds various bits to control message handling
+ */
+
+/*
+ * Definition of the command transport message header (DW0)
+ *
+ * bit[4..0]	message len (in dwords)
+ * bit[7..5]	reserved
+ * bit[8]	response (G2H only)
+ * bit[8]	write fence to desc (H2G only)
+ * bit[9]	write status to H2G buff (H2G only)
+ * bit[10]	send status back via G2H (H2G only)
+ * bit[15..11]	reserved
+ * bit[31..16]	action code
+ */
+#define GUC_CT_MSG_LEN_SHIFT			0
+#define GUC_CT_MSG_LEN_MASK			0x1F
+#define GUC_CT_MSG_IS_RESPONSE			(1 << 8)
+#define GUC_CT_MSG_WRITE_FENCE_TO_DESC		(1 << 8)
+#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF		(1 << 9)
+#define GUC_CT_MSG_SEND_STATUS			(1 << 10)
+#define GUC_CT_MSG_ACTION_SHIFT			16
+#define GUC_CT_MSG_ACTION_MASK			0xFFFF
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h
new file mode 100644
index 000000000000..ef538e34f894
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_COMMUNICATION_MMIO_ABI_H
+#define _ABI_GUC_COMMUNICATION_MMIO_ABI_H
+
+/**
+ * DOC: GuC MMIO based communication
+ *
+ * The MMIO based communication between Host and GuC relies on special
+ * hardware registers which format could be defined by the software
+ * (so called scratch registers).
+ *
+ * Each MMIO based message, both Host to GuC (H2G) and GuC to Host (G2H)
+ * messages, which maximum length depends on number of available scratch
+ * registers, is directly written into those scratch registers.
+ *
+ * For Gen9+, there are 16 software scratch registers 0xC180-0xC1B8,
+ * but no H2G command takes more than 4 parameters and the GuC firmware
+ * itself uses an 4-element array to store the H2G message.
+ *
+ * For Gen11+, there are additional 4 registers 0x190240-0x19024C, which
+ * are, regardless on lower count, preferred over legacy ones.
+ *
+ * The MMIO based communication is mainly used during driver initialization
+ * phase to setup the `CTB based communication`_ that will be used afterwards.
+ */
+
+#define GUC_MAX_MMIO_MSG_LEN		4
+
+/**
+ * DOC: MMIO HXG Message
+ *
+ * Format of the MMIO messages follows definitions of `HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | [Embedded `HXG Message`_]                                    |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h
new file mode 100644
index 000000000000..ec83551bf9c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_ERRORS_ABI_H
+#define _ABI_GUC_ERRORS_ABI_H
+
+enum xe_guc_response_status {
+	XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
+	XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
+};
+
+enum xe_guc_load_status {
+	XE_GUC_LOAD_STATUS_DEFAULT                          = 0x00,
+	XE_GUC_LOAD_STATUS_START                            = 0x01,
+	XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH       = 0x02,
+	XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH       = 0x03,
+	XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE      = 0x04,
+	XE_GUC_LOAD_STATUS_GDT_DONE                         = 0x10,
+	XE_GUC_LOAD_STATUS_IDT_DONE                         = 0x20,
+	XE_GUC_LOAD_STATUS_LAPIC_DONE                       = 0x30,
+	XE_GUC_LOAD_STATUS_GUCINT_DONE                      = 0x40,
+	XE_GUC_LOAD_STATUS_DPC_READY                        = 0x50,
+	XE_GUC_LOAD_STATUS_DPC_ERROR                        = 0x60,
+	XE_GUC_LOAD_STATUS_EXCEPTION                        = 0x70,
+	XE_GUC_LOAD_STATUS_INIT_DATA_INVALID                = 0x71,
+	XE_GUC_LOAD_STATUS_PXP_TEARDOWN_CTRL_ENABLED        = 0x72,
+	XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START,
+	XE_GUC_LOAD_STATUS_MPU_DATA_INVALID                 = 0x73,
+	XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID   = 0x74,
+	XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END,
+
+	XE_GUC_LOAD_STATUS_READY                            = 0xF0,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
new file mode 100644
index 000000000000..47094b9b044c
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -0,0 +1,322 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_KLVS_ABI_H
+#define _ABI_GUC_KLVS_ABI_H
+
+#include <linux/types.h>
+
+/**
+ * DOC: GuC KLV
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 | 31:16 | **KEY** - KLV key identifier                                 |
+ *  |   |       |   - `GuC Self Config KLVs`_                                  |
+ *  |   |       |   - `GuC VGT Policy KLVs`_                                   |
+ *  |   |       |   - `GuC VF Configuration KLVs`_                             |
+ *  |   |       |                                                              |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **LEN** - length of VALUE (in 32bit dwords)                  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **VALUE** - actual value of the KLV (format depends on KEY)  |
+ *  +---+-------+                                                              |
+ *  |...|       |                                                              |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_KLV_LEN_MIN				1u
+#define GUC_KLV_0_KEY				(0xffff << 16)
+#define GUC_KLV_0_LEN				(0xffff << 0)
+#define GUC_KLV_n_VALUE				(0xffffffff << 0)
+
+/**
+ * DOC: GuC Self Config KLVs
+ *
+ * `GuC KLV`_ keys available for use with HOST2GUC_SELF_CFG_.
+ *
+ * _`GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR` : 0x0900
+ *      Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts
+ *      status vector for use by the GuC.
+ *
+ * _`GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR` : 0x0901
+ *      Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts
+ *      source vector for use by the GuC.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_ADDR` : 0x0902
+ *      Refers to 64 bit Global Gfx address of H2G `CT Buffer`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR : 0x0903
+ *      Refers to 64 bit Global Gfx address of H2G `CTB Descriptor`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_SIZE : 0x0904
+ *      Refers to size of H2G `CT Buffer`_ in bytes.
+ *      Should be a multiple of 4K.
+ *
+ * _`GUC_KLV_SELF_CFG_G2H_CTB_ADDR : 0x0905
+ *      Refers to 64 bit Global Gfx address of G2H `CT Buffer`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR : 0x0906
+ *      Refers to 64 bit Global Gfx address of G2H `CTB Descriptor`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _GUC_KLV_SELF_CFG_G2H_CTB_SIZE : 0x0907
+ *      Refers to size of G2H `CT Buffer`_ in bytes.
+ *      Should be a multiple of 4K.
+ */
+
+#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_KEY		0x0900
+#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY		0x0901
+#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY		0x0902
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY	0x0903
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_LEN	2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY		0x0904
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_LEN		1u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY		0x0905
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY	0x0906
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_LEN	2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY		0x0907
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN		1u
+
+/*
+ * Per context scheduling policy update keys.
+ */
+enum  {
+	GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM			= 0x2001,
+	GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT			= 0x2002,
+	GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY			= 0x2003,
+	GUC_CONTEXT_POLICIES_KLV_ID_PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY	= 0x2004,
+	GUC_CONTEXT_POLICIES_KLV_ID_SLPM_GT_FREQUENCY			= 0x2005,
+
+	GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5,
+};
+
+/**
+ * DOC: GuC VGT Policy KLVs
+ *
+ * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY.
+ *
+ * _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001
+ *      This config sets whether strict scheduling is enabled whereby any VF
+ *      that doesn’t have work to submit is still allocated a fixed execution
+ *      time-slice to ensure active VFs execution is always consitent even
+ *      during other VF reprovisiong / rebooting events. Changing this KLV
+ *      impacts all VFs and takes effect on the next VF-Switch event.
+ *
+ *      :0: don't schedule idle (default)
+ *      :1: schedule if idle
+ *
+ * _`GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD` : 0x8002
+ *      This config sets the sample period for tracking adverse event counters.
+ *       A sample period is the period in millisecs during which events are counted.
+ *       This is applicable for all the VFs.
+ *
+ *      :0: adverse events are not counted (default)
+ *      :n: sample period in milliseconds
+ *
+ * _`GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH` : 0x8D00
+ *      This enum is to reset utilized HW engine after VF Switch (i.e to clean
+ *      up Stale HW register left behind by previous VF)
+ *
+ *      :0: don't reset (default)
+ *      :1: reset
+ */
+
+#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY		0x8001
+#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_LEN		1u
+
+#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY	0x8002
+#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_LEN	1u
+
+#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY	0x8D00
+#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_LEN	1u
+
+/**
+ * DOC: GuC VF Configuration KLVs
+ *
+ * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VF_CFG.
+ *
+ * _`GUC_KLV_VF_CFG_GGTT_START` : 0x0001
+ *      A 4K aligned start GTT address/offset assigned to VF.
+ *      Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_GGTT_SIZE` : 0x0002
+ *      A 4K aligned size of GGTT assigned to VF.
+ *      Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_LMEM_SIZE` : 0x0003
+ *      A 2M aligned size of local memory assigned to VF.
+ *      Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_NUM_CONTEXTS` : 0x0004
+ *      Refers to the number of contexts allocated to this VF.
+ *
+ *      :0: no contexts (default)
+ *      :1-65535: number of contexts (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_TILE_MASK` : 0x0005
+ *      For multi-tiled products, this field contains the bitwise-OR of tiles
+ *      assigned to the VF. Bit-0-set means VF has access to Tile-0,
+ *      Bit-31-set means VF has access to Tile-31, and etc.
+ *      At least one tile will always be allocated.
+ *      If all bits are zero, VF KMD should treat this as a fatal error.
+ *      For, single-tile products this KLV config is ignored.
+ *
+ * _`GUC_KLV_VF_CFG_NUM_DOORBELLS` : 0x0006
+ *      Refers to the number of doorbells allocated to this VF.
+ *
+ *      :0: no doorbells (default)
+ *      :1-255: number of doorbells (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_EXEC_QUANTUM` : 0x8A01
+ *      This config sets the VFs-execution-quantum in milliseconds.
+ *      GUC will attempt to obey the maximum values as much as HW is capable
+ *      of and this will never be perfectly-exact (accumulated nano-second
+ *      granularity) since the GPUs clock time runs off a different crystal
+ *      from the CPUs clock. Changing this KLV on a VF that is currently
+ *      running a context wont take effect until a new context is scheduled in.
+ *      That said, when the PF is changing this value from 0xFFFFFFFF to
+ *      something else, it might never take effect if the VF is running an
+ *      inifinitely long compute or shader kernel. In such a scenario, the
+ *      PF would need to trigger a VM PAUSE and then change the KLV to force
+ *      it to take effect. Such cases might typically happen on a 1PF+1VF
+ *      Virtualization config enabled for heavier workloads like AI/ML.
+ *
+ *      :0: infinite exec quantum (default)
+ *
+ * _`GUC_KLV_VF_CFG_PREEMPT_TIMEOUT` : 0x8A02
+ *      This config sets the VF-preemption-timeout in microseconds.
+ *      GUC will attempt to obey the minimum and maximum values as much as
+ *      HW is capable and this will never be perfectly-exact (accumulated
+ *      nano-second granularity) since the GPUs clock time runs off a
+ *      different crystal from the CPUs clock. Changing this KLV on a VF
+ *      that is currently running a context wont take effect until a new
+ *      context is scheduled in.
+ *      That said, when the PF is changing this value from 0xFFFFFFFF to
+ *      something else, it might never take effect if the VF is running an
+ *      inifinitely long compute or shader kernel.
+ *      In this case, the PF would need to trigger a VM PAUSE and then change
+ *      the KLV to force it to take effect. Such cases might typically happen
+ *      on a 1PF+1VF Virtualization config enabled for heavier workloads like
+ *      AI/ML.
+ *
+ *      :0: no preemption timeout (default)
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR` : 0x8A03
+ *      This config sets threshold for CAT errors caused by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET` : 0x8A04
+ *      This config sets threshold for engine reset caused by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT` : 0x8A05
+ *      This config sets threshold for page fault errors caused by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM` : 0x8A06
+ *      This config sets threshold for H2G interrupts triggered by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM` : 0x8A07
+ *      This config sets threshold for GT interrupts triggered by the VF's
+ *      workloads.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM` : 0x8A08
+ *      This config sets threshold for doorbell's ring triggered by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID` : 0x8A0A
+ *      Refers to the start index of doorbell assigned to this VF.
+ *
+ *      :0: (default)
+ *      :1-255: number of doorbells (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID` : 0x8A0B
+ *      Refers to the start index in context array allocated to this VF’s use.
+ *
+ *      :0: (default)
+ *      :1-65535: number of contexts (Gen12)
+ */
+
+#define GUC_KLV_VF_CFG_GGTT_START_KEY		0x0001
+#define GUC_KLV_VF_CFG_GGTT_START_LEN		2u
+
+#define GUC_KLV_VF_CFG_GGTT_SIZE_KEY		0x0002
+#define GUC_KLV_VF_CFG_GGTT_SIZE_LEN		2u
+
+#define GUC_KLV_VF_CFG_LMEM_SIZE_KEY		0x0003
+#define GUC_KLV_VF_CFG_LMEM_SIZE_LEN		2u
+
+#define GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY		0x0004
+#define GUC_KLV_VF_CFG_NUM_CONTEXTS_LEN		1u
+
+#define GUC_KLV_VF_CFG_TILE_MASK_KEY		0x0005
+#define GUC_KLV_VF_CFG_TILE_MASK_LEN		1u
+
+#define GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY	0x0006
+#define GUC_KLV_VF_CFG_NUM_DOORBELLS_LEN	1u
+
+#define GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY		0x8a01
+#define GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN		1u
+
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY	0x8a02
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN	1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_KEY		0x8a03
+#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_KEY	0x8a04
+#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_LEN	1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_KEY		0x8a05
+#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_KEY		0x8a06
+#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_KEY		0x8a07
+#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_KEY	0x8a08
+#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_LEN	1u
+
+#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_KEY	0x8a0a
+#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_LEN	1u
+
+#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY	0x8a0b
+#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN	1u
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
new file mode 100644
index 000000000000..3d199016cf88
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_MESSAGES_ABI_H
+#define _ABI_GUC_MESSAGES_ABI_H
+
+/**
+ * DOC: HXG Message
+ *
+ * All messages exchanged with GuC are defined using 32 bit dwords.
+ * First dword is treated as a message header. Remaining dwords are optional.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  |   |       |                                                              |
+ *  | 0 |    31 | **ORIGIN** - originator of the message                       |
+ *  |   |       |   - _`GUC_HXG_ORIGIN_HOST` = 0                               |
+ *  |   |       |   - _`GUC_HXG_ORIGIN_GUC` = 1                                |
+ *  |   |       |                                                              |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | **TYPE** - message type                                      |
+ *  |   |       |   - _`GUC_HXG_TYPE_REQUEST` = 0                              |
+ *  |   |       |   - _`GUC_HXG_TYPE_EVENT` = 1                                |
+ *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3                     |
+ *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5                    |
+ *  |   |       |   - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6                     |
+ *  |   |       |   - _`GUC_HXG_TYPE_RESPONSE_SUCCESS` = 7                     |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **AUX** - auxiliary data (depends on TYPE)                   |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **PAYLOAD** - optional payload (depends on TYPE)             |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_MSG_MIN_LEN			1u
+#define GUC_HXG_MSG_0_ORIGIN			(0x1 << 31)
+#define   GUC_HXG_ORIGIN_HOST			0u
+#define   GUC_HXG_ORIGIN_GUC			1u
+#define GUC_HXG_MSG_0_TYPE			(0x7 << 28)
+#define   GUC_HXG_TYPE_REQUEST			0u
+#define   GUC_HXG_TYPE_EVENT			1u
+#define   GUC_HXG_TYPE_NO_RESPONSE_BUSY		3u
+#define   GUC_HXG_TYPE_NO_RESPONSE_RETRY	5u
+#define   GUC_HXG_TYPE_RESPONSE_FAILURE		6u
+#define   GUC_HXG_TYPE_RESPONSE_SUCCESS		7u
+#define GUC_HXG_MSG_0_AUX			(0xfffffff << 0)
+#define GUC_HXG_MSG_n_PAYLOAD			(0xffffffff << 0)
+
+/**
+ * DOC: HXG Request
+ *
+ * The `HXG Request`_ message should be used to initiate synchronous activity
+ * for which confirmation or return data is expected.
+ *
+ * The recipient of this message shall use `HXG Response`_, `HXG Failure`_
+ * or `HXG Retry`_ message as a definite reply, and may use `HXG Busy`_
+ * message as a intermediate reply.
+ *
+ * Format of @DATA0 and all @DATAn fields depends on the @ACTION code.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | **DATA0** - request data (depends on ACTION)                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **ACTION** - requested action code                           |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **DATAn** - optional data (depends on ACTION)                |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_REQUEST_MSG_MIN_LEN		GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_REQUEST_MSG_0_DATA0		(0xfff << 16)
+#define GUC_HXG_REQUEST_MSG_0_ACTION		(0xffff << 0)
+#define GUC_HXG_REQUEST_MSG_n_DATAn		GUC_HXG_MSG_n_PAYLOAD
+
+/**
+ * DOC: HXG Event
+ *
+ * The `HXG Event`_ message should be used to initiate asynchronous activity
+ * that does not involves immediate confirmation nor data.
+ *
+ * Format of @DATA0 and all @DATAn fields depends on the @ACTION code.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_                                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | **DATA0** - event data (depends on ACTION)                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **ACTION** - event action code                               |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **DATAn** - optional event  data (depends on ACTION)         |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_EVENT_MSG_MIN_LEN		GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_EVENT_MSG_0_DATA0		(0xfff << 16)
+#define GUC_HXG_EVENT_MSG_0_ACTION		(0xffff << 0)
+#define GUC_HXG_EVENT_MSG_n_DATAn		GUC_HXG_MSG_n_PAYLOAD
+
+/**
+ * DOC: HXG Busy
+ *
+ * The `HXG Busy`_ message may be used to acknowledge reception of the `HXG Request`_
+ * message if the recipient expects that it processing will be longer than default
+ * timeout.
+ *
+ * The @COUNTER field may be used as a progress indicator.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_BUSY_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **COUNTER** - progress indicator                             |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_BUSY_MSG_LEN			GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_BUSY_MSG_0_COUNTER		GUC_HXG_MSG_0_AUX
+
+/**
+ * DOC: HXG Retry
+ *
+ * The `HXG Retry`_ message should be used by recipient to indicate that the
+ * `HXG Request`_ message was dropped and it should be resent again.
+ *
+ * The @REASON field may be used to provide additional information.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_RETRY_                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **REASON** - reason for retry                                |
+ *  |   |       |  - _`GUC_HXG_RETRY_REASON_UNSPECIFIED` = 0                   |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_RETRY_MSG_LEN			GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_RETRY_MSG_0_REASON		GUC_HXG_MSG_0_AUX
+#define   GUC_HXG_RETRY_REASON_UNSPECIFIED	0u
+
+/**
+ * DOC: HXG Failure
+ *
+ * The `HXG Failure`_ message shall be used as a reply to the `HXG Request`_
+ * message that could not be processed due to an error.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_FAILURE_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | **HINT** - additional error hint                             |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **ERROR** - error/result code                                |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_FAILURE_MSG_LEN			GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_FAILURE_MSG_0_HINT		(0xfff << 16)
+#define GUC_HXG_FAILURE_MSG_0_ERROR		(0xffff << 0)
+
+/**
+ * DOC: HXG Response
+ *
+ * The `HXG Response`_ message shall be used as a reply to the `HXG Request`_
+ * message that was successfully processed without an error.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **DATA0** - data (depends on ACTION from `HXG Request`_)     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **DATAn** - data (depends on ACTION from `HXG Request`_)     |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_RESPONSE_MSG_MIN_LEN		GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_RESPONSE_MSG_0_DATA0		GUC_HXG_MSG_0_AUX
+#define GUC_HXG_RESPONSE_MSG_n_DATAn		GUC_HXG_MSG_n_PAYLOAD
+
+/* deprecated */
+#define INTEL_GUC_MSG_TYPE_SHIFT	28
+#define INTEL_GUC_MSG_TYPE_MASK		(0xF << INTEL_GUC_MSG_TYPE_SHIFT)
+#define INTEL_GUC_MSG_DATA_SHIFT	16
+#define INTEL_GUC_MSG_DATA_MASK		(0xFFF << INTEL_GUC_MSG_DATA_SHIFT)
+#define INTEL_GUC_MSG_CODE_SHIFT	0
+#define INTEL_GUC_MSG_CODE_MASK		(0xFFFF << INTEL_GUC_MSG_CODE_SHIFT)
+
+enum intel_guc_msg_type {
+	INTEL_GUC_MSG_TYPE_REQUEST = 0x0,
+	INTEL_GUC_MSG_TYPE_RESPONSE = 0xF,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
new file mode 100644
index 000000000000..47056b6459e3
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_bo_test.o xe_dma_buf_test.o \
+	xe_migrate_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
new file mode 100644
index 000000000000..87ac21cc8ca9
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_bo_evict.h"
+#include "xe_pci.h"
+
+static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo,
+			    bool clear, u64 get_val, u64 assign_val,
+			    struct kunit *test)
+{
+	struct dma_fence *fence;
+	struct ttm_tt *ttm;
+	struct page *page;
+	pgoff_t ccs_page;
+	long timeout;
+	u64 *cpu_map;
+	int ret;
+	u32 offset;
+
+	/* Move bo to VRAM if not already there. */
+	ret = xe_bo_validate(bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate bo.\n");
+		return ret;
+	}
+
+	/* Optionally clear bo *and* CCS data in VRAM. */
+	if (clear) {
+		fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource, 0);
+		if (IS_ERR(fence)) {
+			KUNIT_FAIL(test, "Failed to submit bo clear.\n");
+			return PTR_ERR(fence);
+		}
+		dma_fence_put(fence);
+	}
+
+	/* Evict to system. CCS data should be copied. */
+	ret = xe_bo_evict(bo, true);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to evict bo.\n");
+		return ret;
+	}
+
+	/* Sync all migration blits */
+	timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+					DMA_RESV_USAGE_KERNEL,
+					true,
+					5 * HZ);
+	if (timeout <= 0) {
+		KUNIT_FAIL(test, "Failed to sync bo eviction.\n");
+		return -ETIME;
+	}
+
+	/*
+	 * Bo with CCS data is now in system memory. Verify backing store
+	 * and data integrity. Then assign for the next testing round while
+	 * we still have a CPU map.
+	 */
+	ttm = bo->ttm.ttm;
+	if (!ttm || !ttm_tt_is_populated(ttm)) {
+		KUNIT_FAIL(test, "Bo was not in expected placement.\n");
+		return -EINVAL;
+	}
+
+	ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT;
+	if (ccs_page >= ttm->num_pages) {
+		KUNIT_FAIL(test, "No TTM CCS pages present.\n");
+		return -EINVAL;
+	}
+
+	page = ttm->pages[ccs_page];
+	cpu_map = kmap_local_page(page);
+
+	/* Check first CCS value */
+	if (cpu_map[0] != get_val) {
+		KUNIT_FAIL(test,
+			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
+			   (unsigned long long)get_val,
+			   (unsigned long long)cpu_map[0]);
+		ret = -EINVAL;
+	}
+
+	/* Check last CCS value, or at least last value in page. */
+	offset = xe_device_ccs_bytes(gt->xe, bo->size);
+	offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
+	if (cpu_map[offset] != get_val) {
+		KUNIT_FAIL(test,
+			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
+			   (unsigned long long)get_val,
+			   (unsigned long long)cpu_map[offset]);
+		ret = -EINVAL;
+	}
+
+	cpu_map[0] = assign_val;
+	cpu_map[offset] = assign_val;
+	kunmap_local(cpu_map);
+
+	return ret;
+}
+
+static void ccs_test_run_gt(struct xe_device *xe, struct xe_gt *gt,
+			    struct kunit *test)
+{
+	struct xe_bo *bo;
+	u32 vram_bit;
+	int ret;
+
+	/* TODO: Sanity check */
+	vram_bit = XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id;
+	kunit_info(test, "Testing gt id %u vram id %u\n", gt->info.id,
+		   gt->info.vram_id);
+
+	bo = xe_bo_create_locked(xe, NULL, NULL, SZ_1M, ttm_bo_type_device,
+				 vram_bit);
+	if (IS_ERR(bo)) {
+		KUNIT_FAIL(test, "Failed to create bo.\n");
+		return;
+	}
+
+	kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
+	ret = ccs_test_migrate(gt, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
+			       test);
+	if (ret)
+		goto out_unlock;
+
+	kunit_info(test, "Verifying that CCS data survives migration.\n");
+	ret = ccs_test_migrate(gt, bo, false, 0xdeadbeefdeadbeefULL,
+			       0xdeadbeefdeadbeefULL, test);
+	if (ret)
+		goto out_unlock;
+
+	kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
+	ret = ccs_test_migrate(gt, bo, true, 0ULL, 0ULL, test);
+
+out_unlock:
+	xe_bo_unlock_no_vm(bo);
+	xe_bo_put(bo);
+}
+
+static int ccs_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_gt *gt;
+	int id;
+
+	if (!xe_device_has_flat_ccs(xe)) {
+		kunit_info(test, "Skipping non-flat-ccs device.\n");
+		return 0;
+	}
+
+	for_each_gt(gt, xe, id)
+		ccs_test_run_gt(xe, gt, test);
+
+	return 0;
+}
+
+void xe_ccs_migrate_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(ccs_test_run_device);
+}
+EXPORT_SYMBOL(xe_ccs_migrate_kunit);
+
+static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kunit *test)
+{
+	struct xe_bo *bo, *external;
+	unsigned int bo_flags = XE_BO_CREATE_USER_BIT |
+		XE_BO_CREATE_VRAM_IF_DGFX(gt);
+	struct xe_vm *vm = xe_migrate_get_vm(xe->gt[0].migrate);
+	struct ww_acquire_ctx ww;
+	int err, i;
+
+	kunit_info(test, "Testing device %s gt id %u vram id %u\n",
+		   dev_name(xe->drm.dev), gt->info.id, gt->info.vram_id);
+
+	for (i = 0; i < 2; ++i) {
+		xe_vm_lock(vm, &ww, 0, false);
+		bo = xe_bo_create(xe, NULL, vm, 0x10000, ttm_bo_type_device,
+				  bo_flags);
+		xe_vm_unlock(vm, &ww);
+		if (IS_ERR(bo)) {
+			KUNIT_FAIL(test, "bo create err=%pe\n", bo);
+			break;
+		}
+
+		external = xe_bo_create(xe, NULL, NULL, 0x10000,
+					ttm_bo_type_device, bo_flags);
+		if (IS_ERR(external)) {
+			KUNIT_FAIL(test, "external bo create err=%pe\n", external);
+			goto cleanup_bo;
+		}
+
+		xe_bo_lock(external, &ww, 0, false);
+		err = xe_bo_pin_external(external);
+		xe_bo_unlock(external, &ww);
+		if (err) {
+			KUNIT_FAIL(test, "external bo pin err=%pe\n",
+				   ERR_PTR(err));
+			goto cleanup_external;
+		}
+
+		err = xe_bo_evict_all(xe);
+		if (err) {
+			KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err));
+			goto cleanup_all;
+		}
+
+		err = xe_bo_restore_kernel(xe);
+		if (err) {
+			KUNIT_FAIL(test, "restore kernel err=%pe\n",
+				   ERR_PTR(err));
+			goto cleanup_all;
+		}
+
+		err = xe_bo_restore_user(xe);
+		if (err) {
+			KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
+			goto cleanup_all;
+		}
+
+		if (!xe_bo_is_vram(external)) {
+			KUNIT_FAIL(test, "external bo is not vram\n");
+			err = -EPROTO;
+			goto cleanup_all;
+		}
+
+		if (xe_bo_is_vram(bo)) {
+			KUNIT_FAIL(test, "bo is vram\n");
+			err = -EPROTO;
+			goto cleanup_all;
+		}
+
+		if (i) {
+			down_read(&vm->lock);
+			xe_vm_lock(vm, &ww, 0, false);
+			err = xe_bo_validate(bo, bo->vm, false);
+			xe_vm_unlock(vm, &ww);
+			up_read(&vm->lock);
+			if (err) {
+				KUNIT_FAIL(test, "bo valid err=%pe\n",
+					   ERR_PTR(err));
+				goto cleanup_all;
+			}
+			xe_bo_lock(external, &ww, 0, false);
+			err = xe_bo_validate(external, NULL, false);
+			xe_bo_unlock(external, &ww);
+			if (err) {
+				KUNIT_FAIL(test, "external bo valid err=%pe\n",
+					   ERR_PTR(err));
+				goto cleanup_all;
+			}
+		}
+
+		xe_bo_lock(external, &ww, 0, false);
+		xe_bo_unpin_external(external);
+		xe_bo_unlock(external, &ww);
+
+		xe_bo_put(external);
+		xe_bo_put(bo);
+		continue;
+
+cleanup_all:
+		xe_bo_lock(external, &ww, 0, false);
+		xe_bo_unpin_external(external);
+		xe_bo_unlock(external, &ww);
+cleanup_external:
+		xe_bo_put(external);
+cleanup_bo:
+		xe_bo_put(bo);
+		break;
+	}
+
+	xe_vm_put(vm);
+
+	return 0;
+}
+
+static int evict_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_gt *gt;
+	int id;
+
+	if (!IS_DGFX(xe)) {
+		kunit_info(test, "Skipping non-discrete device %s.\n",
+			   dev_name(xe->drm.dev));
+		return 0;
+	}
+
+	for_each_gt(gt, xe, id)
+		evict_test_run_gt(xe, gt, test);
+
+	return 0;
+}
+
+void xe_bo_evict_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(evict_test_run_device);
+}
+EXPORT_SYMBOL(xe_bo_evict_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c
new file mode 100644
index 000000000000..c8fa29b0b3b2
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+void xe_ccs_migrate_kunit(struct kunit *test);
+void xe_bo_evict_kunit(struct kunit *test);
+
+static struct kunit_case xe_bo_tests[] = {
+	KUNIT_CASE(xe_ccs_migrate_kunit),
+	KUNIT_CASE(xe_bo_evict_kunit),
+	{}
+};
+
+static struct kunit_suite xe_bo_test_suite = {
+	.name = "xe_bo",
+	.test_cases = xe_bo_tests,
+};
+
+kunit_test_suite(xe_bo_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
new file mode 100644
index 000000000000..615d22e3f731
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_pci.h"
+
+static bool p2p_enabled(struct dma_buf_test_params *params)
+{
+	return IS_ENABLED(CONFIG_PCI_P2PDMA) && params->attach_ops &&
+		params->attach_ops->allow_peer2peer;
+}
+
+static bool is_dynamic(struct dma_buf_test_params *params)
+{
+	return IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY) && params->attach_ops &&
+		params->attach_ops->move_notify;
+}
+
+static void check_residency(struct kunit *test, struct xe_bo *exported,
+			    struct xe_bo *imported, struct dma_buf *dmabuf)
+{
+	struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
+	u32 mem_type;
+	int ret;
+
+	xe_bo_assert_held(exported);
+	xe_bo_assert_held(imported);
+
+	mem_type = XE_PL_VRAM0;
+	if (!(params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
+		/* No VRAM allowed */
+		mem_type = XE_PL_TT;
+	else if (params->force_different_devices && !p2p_enabled(params))
+		/* No P2P */
+		mem_type = XE_PL_TT;
+	else if (params->force_different_devices && !is_dynamic(params) &&
+		 (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT))
+		/* Pin migrated to TT */
+		mem_type = XE_PL_TT;
+
+	if (!xe_bo_is_mem_type(exported, mem_type)) {
+		KUNIT_FAIL(test, "Exported bo was not in expected memory type.\n");
+		return;
+	}
+
+	if (xe_bo_is_pinned(exported))
+		return;
+
+	/*
+	 * Evict exporter. Note that the gem object dma_buf member isn't
+	 * set from xe_gem_prime_export(), and it's needed for the move_notify()
+	 * functionality, so hack that up here. Evicting the exported bo will
+	 * evict also the imported bo through the move_notify() functionality if
+	 * importer is on a different device. If they're on the same device,
+	 * the exporter and the importer should be the same bo.
+	 */
+	swap(exported->ttm.base.dma_buf, dmabuf);
+	ret = xe_bo_evict(exported, true);
+	swap(exported->ttm.base.dma_buf, dmabuf);
+	if (ret) {
+		if (ret != -EINTR && ret != -ERESTARTSYS)
+			KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n",
+				   ret);
+		return;
+	}
+
+	/* Verify that also importer has been evicted to SYSTEM */
+	if (!xe_bo_is_mem_type(imported, XE_PL_SYSTEM)) {
+		KUNIT_FAIL(test, "Importer wasn't properly evicted.\n");
+		return;
+	}
+
+	/* Re-validate the importer. This should move also exporter in. */
+	ret = xe_bo_validate(imported, NULL, false);
+	if (ret) {
+		if (ret != -EINTR && ret != -ERESTARTSYS)
+			KUNIT_FAIL(test, "Validating importer failed with err=%d.\n",
+				   ret);
+		return;
+	}
+
+	/*
+	 * If on different devices, the exporter is kept in system  if
+	 * possible, saving a migration step as the transfer is just
+	 * likely as fast from system memory.
+	 */
+	if (params->force_different_devices &&
+	    params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)
+		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT));
+	else
+		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
+
+	if (params->force_different_devices)
+		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT));
+	else
+		KUNIT_EXPECT_TRUE(test, exported == imported);
+}
+
+static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
+	struct drm_gem_object *import;
+	struct dma_buf *dmabuf;
+	struct xe_bo *bo;
+
+	/* No VRAM on this device? */
+	if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) &&
+	    (params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
+		return;
+
+	kunit_info(test, "running %s\n", __func__);
+	bo = xe_bo_create(xe, NULL, NULL, PAGE_SIZE, ttm_bo_type_device,
+			  XE_BO_CREATE_USER_BIT | params->mem_mask);
+	if (IS_ERR(bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(bo));
+		return;
+	}
+
+	dmabuf = xe_gem_prime_export(&bo->ttm.base, 0);
+	if (IS_ERR(dmabuf)) {
+		KUNIT_FAIL(test, "xe_gem_prime_export() failed with err=%ld\n",
+			   PTR_ERR(dmabuf));
+		goto out;
+	}
+
+	import = xe_gem_prime_import(&xe->drm, dmabuf);
+	if (!IS_ERR(import)) {
+		struct xe_bo *import_bo = gem_to_xe_bo(import);
+
+		/*
+		 * Did import succeed when it shouldn't due to lack of p2p support?
+		 */
+		if (params->force_different_devices &&
+		    !p2p_enabled(params) &&
+		    !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) {
+			KUNIT_FAIL(test,
+				   "xe_gem_prime_import() succeeded when it shouldn't have\n");
+		} else {
+			int err;
+
+			/* Is everything where we expect it to be? */
+			xe_bo_lock_no_vm(import_bo, NULL);
+			err = xe_bo_validate(import_bo, NULL, false);
+			if (err && err != -EINTR && err != -ERESTARTSYS)
+				KUNIT_FAIL(test,
+					   "xe_bo_validate() failed with err=%d\n", err);
+
+			check_residency(test, bo, import_bo, dmabuf);
+			xe_bo_unlock_no_vm(import_bo);
+		}
+		drm_gem_object_put(import);
+	} else if (PTR_ERR(import) != -EOPNOTSUPP) {
+		/* Unexpected error code. */
+		KUNIT_FAIL(test,
+			   "xe_gem_prime_import failed with the wrong err=%ld\n",
+			   PTR_ERR(import));
+	} else if (!params->force_different_devices ||
+		   p2p_enabled(params) ||
+		   (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) {
+		/* Shouldn't fail if we can reuse same bo, use p2p or use system */
+		KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n",
+			   PTR_ERR(import));
+	}
+	dma_buf_put(dmabuf);
+out:
+	drm_gem_object_put(&bo->ttm.base);
+}
+
+static const struct dma_buf_attach_ops nop2p_attach_ops = {
+	.allow_peer2peer = false,
+	.move_notify = xe_dma_buf_move_notify
+};
+
+/*
+ * We test the implementation with bos of different residency and with
+ * importers with different capabilities; some lacking p2p support and some
+ * lacking dynamic capabilities (attach_ops == NULL). We also fake
+ * different devices avoiding the import shortcut that just reuses the same
+ * gem object.
+ */
+static const struct dma_buf_test_params test_params[] = {
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops},
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops},
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT},
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &nop2p_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &nop2p_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .force_different_devices = true},
+
+	{}
+};
+
+static int dma_buf_run_device(struct xe_device *xe)
+{
+	const struct dma_buf_test_params *params;
+	struct kunit *test = xe_cur_kunit();
+
+	for (params = test_params; params->mem_mask; ++params) {
+		struct dma_buf_test_params p = *params;
+
+		p.base.id = XE_TEST_LIVE_DMA_BUF;
+		test->priv = &p;
+		xe_test_dmabuf_import_same_driver(xe);
+	}
+
+	/* A non-zero return would halt iteration over driver devices */
+	return 0;
+}
+
+void xe_dma_buf_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(dma_buf_run_device);
+}
+EXPORT_SYMBOL(xe_dma_buf_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
new file mode 100644
index 000000000000..7bb292da1193
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+void xe_dma_buf_kunit(struct kunit *test);
+
+static struct kunit_case xe_dma_buf_tests[] = {
+	KUNIT_CASE(xe_dma_buf_kunit),
+	{}
+};
+
+static struct kunit_suite xe_dma_buf_test_suite = {
+	.name = "xe_dma_buf",
+	.test_cases = xe_dma_buf_tests,
+};
+
+kunit_test_suite(xe_dma_buf_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
new file mode 100644
index 000000000000..0f3b819f0a34
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020-2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_pci.h"
+
+static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence,
+				const char *str, struct kunit *test)
+{
+	long ret;
+
+	if (IS_ERR(fence)) {
+		KUNIT_FAIL(test, "Failed to create fence for %s: %li\n", str,
+			   PTR_ERR(fence));
+		return true;
+	}
+	if (!fence)
+		return true;
+
+	ret = dma_fence_wait_timeout(fence, false, 5 * HZ);
+	if (ret <= 0) {
+		KUNIT_FAIL(test, "Fence timed out for %s: %li\n", str, ret);
+		return true;
+	}
+
+	return false;
+}
+
+static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
+			  struct xe_bb *bb, u32 second_idx, const char *str,
+			  struct kunit *test)
+{
+	struct xe_sched_job *job = xe_bb_create_migration_job(m->eng, bb,
+							      m->batch_base_ofs,
+							      second_idx);
+	struct dma_fence *fence;
+
+	if (IS_ERR(job)) {
+		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+			   PTR_ERR(job));
+		return PTR_ERR(job);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	if (sanity_fence_failed(xe, fence, str, test))
+		return -ETIMEDOUT;
+
+	dma_fence_put(fence);
+	kunit_info(test, "%s: Job completed\n", str);
+	return 0;
+}
+
+static void
+sanity_populate_cb(struct xe_migrate_pt_update *pt_update,
+		   struct xe_gt *gt, struct iosys_map *map, void *dst,
+		   u32 qword_ofs, u32 num_qwords,
+		   const struct xe_vm_pgtable_update *update)
+{
+	int i;
+	u64 *ptr = dst;
+
+	for (i = 0; i < num_qwords; i++)
+		ptr[i] = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL;
+}
+
+static const struct xe_migrate_pt_update_ops sanity_ops = {
+	.populate = sanity_populate_cb,
+};
+
+#define check(_retval, _expected, str, _test)				\
+	do { if ((_retval) != (_expected)) {				\
+			KUNIT_FAIL(_test, "Sanity check failed: " str	\
+				   " expected %llx, got %llx\n",	\
+				   (u64)(_expected), (u64)(_retval));	\
+		} } while (0)
+
+static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
+		      struct kunit *test)
+{
+	struct xe_device *xe = gt_to_xe(m->gt);
+	u64 retval, expected = 0xc0c0c0c0c0c0c0c0ULL;
+	bool big = bo->size >= SZ_2M;
+	struct dma_fence *fence;
+	const char *str = big ? "Copying big bo" : "Copying small bo";
+	int err;
+
+	struct xe_bo *sysmem = xe_bo_create_locked(xe, m->gt, NULL,
+						   bo->size,
+						   ttm_bo_type_kernel,
+						   XE_BO_CREATE_SYSTEM_BIT);
+	if (IS_ERR(sysmem)) {
+		KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n",
+			   str, PTR_ERR(sysmem));
+		return;
+	}
+
+	err = xe_bo_validate(sysmem, NULL, false);
+	if (err) {
+		KUNIT_FAIL(test, "Failed to validate system bo for %s: %li\n",
+			   str, err);
+		goto out_unlock;
+	}
+
+	err = xe_bo_vmap(sysmem);
+	if (err) {
+		KUNIT_FAIL(test, "Failed to vmap system bo for %s: %li\n",
+			   str, err);
+		goto out_unlock;
+	}
+
+	xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size);
+	fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource, 0xc0c0c0c0);
+	if (!sanity_fence_failed(xe, fence, big ? "Clearing sysmem big bo" :
+				 "Clearing sysmem small bo", test)) {
+		retval = xe_map_rd(xe, &sysmem->vmap, 0, u64);
+		check(retval, expected, "sysmem first offset should be cleared",
+		      test);
+		retval = xe_map_rd(xe, &sysmem->vmap, sysmem->size - 8, u64);
+		check(retval, expected, "sysmem last offset should be cleared",
+		      test);
+	}
+	dma_fence_put(fence);
+
+	/* Try to copy 0xc0 from sysmem to lmem with 2MB or 64KiB/4KiB pages */
+	xe_map_memset(xe, &sysmem->vmap, 0, 0xc0, sysmem->size);
+	xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size);
+
+	fence = xe_migrate_copy(m, sysmem, sysmem->ttm.resource,
+				bo->ttm.resource);
+	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo sysmem -> vram" :
+				 "Copying small bo sysmem -> vram", test)) {
+		retval = xe_map_rd(xe, &bo->vmap, 0, u64);
+		check(retval, expected,
+		      "sysmem -> vram bo first offset should be copied", test);
+		retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64);
+		check(retval, expected,
+		      "sysmem -> vram bo offset should be copied", test);
+	}
+	dma_fence_put(fence);
+
+	/* And other way around.. slightly hacky.. */
+	xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size);
+	xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size);
+
+	fence = xe_migrate_copy(m, sysmem, bo->ttm.resource,
+				sysmem->ttm.resource);
+	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> sysmem" :
+				 "Copying small bo vram -> sysmem", test)) {
+		retval = xe_map_rd(xe, &sysmem->vmap, 0, u64);
+		check(retval, expected,
+		      "vram -> sysmem bo first offset should be copied", test);
+		retval = xe_map_rd(xe, &sysmem->vmap, bo->size - 8, u64);
+		check(retval, expected,
+		      "vram -> sysmem bo last offset should be copied", test);
+	}
+	dma_fence_put(fence);
+
+	xe_bo_vunmap(sysmem);
+out_unlock:
+	xe_bo_unlock_no_vm(sysmem);
+	xe_bo_put(sysmem);
+}
+
+static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
+			   struct kunit *test)
+{
+	struct xe_device *xe = gt_to_xe(m->gt);
+	struct dma_fence *fence;
+	u64 retval, expected;
+	int i;
+
+	struct xe_vm_pgtable_update update = {
+		.ofs = 1,
+		.qwords = 0x10,
+		.pt_bo = pt,
+	};
+	struct xe_migrate_pt_update pt_update = {
+		.ops = &sanity_ops,
+	};
+
+	/* Test xe_migrate_update_pgtables() updates the pagetable as expected */
+	expected = 0xf0f0f0f0f0f0f0f0ULL;
+	xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size);
+
+	fence = xe_migrate_update_pgtables(m, NULL, NULL, m->eng, &update, 1,
+					   NULL, 0, &pt_update);
+	if (sanity_fence_failed(xe, fence, "Migration pagetable update", test))
+		return;
+
+	dma_fence_put(fence);
+	retval = xe_map_rd(xe, &pt->vmap, 0, u64);
+	check(retval, expected, "PTE[0] must stay untouched", test);
+
+	for (i = 0; i < update.qwords; i++) {
+		retval = xe_map_rd(xe, &pt->vmap, (update.ofs + i) * 8, u64);
+		check(retval, i * 0x1111111111111111ULL, "PTE update", test);
+	}
+
+	retval = xe_map_rd(xe, &pt->vmap, 8 * (update.ofs + update.qwords),
+			   u64);
+	check(retval, expected, "PTE[0x11] must stay untouched", test);
+}
+
+static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
+{
+	struct xe_gt *gt = m->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *pt, *bo = m->pt_bo, *big, *tiny;
+	struct xe_res_cursor src_it;
+	struct dma_fence *fence;
+	u64 retval, expected;
+	struct xe_bb *bb;
+	int err;
+	u8 id = gt->info.id;
+
+	err = xe_bo_vmap(bo);
+	if (err) {
+		KUNIT_FAIL(test, "Failed to vmap our pagetables: %li\n",
+			   PTR_ERR(bo));
+		return;
+	}
+
+	big = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, SZ_4M,
+				   ttm_bo_type_kernel,
+				   XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+				   XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(big)) {
+		KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big));
+		goto vunmap;
+	}
+
+	pt = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, GEN8_PAGE_SIZE,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+				  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(pt)) {
+		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+			   PTR_ERR(pt));
+		goto free_big;
+	}
+
+	tiny = xe_bo_create_pin_map(xe, m->gt, m->eng->vm,
+				    2 * SZ_4K,
+				    ttm_bo_type_kernel,
+				    XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+				    XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(tiny)) {
+		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+			   PTR_ERR(pt));
+		goto free_pt;
+	}
+
+	bb = xe_bb_new(m->gt, 32, xe->info.supports_usm);
+	if (IS_ERR(bb)) {
+		KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n",
+			   PTR_ERR(bb));
+		goto free_tiny;
+	}
+
+	kunit_info(test, "Starting tests, top level PT addr: %llx, special pagetable base addr: %llx\n",
+		   xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, GEN8_PAGE_SIZE),
+		   xe_bo_main_addr(m->pt_bo, GEN8_PAGE_SIZE));
+
+	/* First part of the test, are we updating our pagetable bo with a new entry? */
+	xe_map_wr(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef);
+	expected = gen8_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0);
+	if (m->eng->vm->flags & XE_VM_FLAGS_64K)
+		expected |= GEN12_PTE_PS64;
+	xe_res_first(pt->ttm.resource, 0, pt->size, &src_it);
+	emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt),
+		 &src_it, GEN8_PAGE_SIZE, pt);
+	run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test);
+
+	retval = xe_map_rd(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1),
+			   u64);
+	check(retval, expected, "PTE entry write", test);
+
+	/* Now try to write data to our newly mapped 'pagetable', see if it succeeds */
+	bb->len = 0;
+	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+	xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead);
+	expected = 0x12345678U;
+
+	emit_clear(m->gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4,
+		   expected, IS_DGFX(xe));
+	run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable",
+		       test);
+
+	retval = xe_map_rd(xe, &pt->vmap, 0, u32);
+	check(retval, expected, "Write to PT after adding PTE", test);
+
+	/* Sanity checks passed, try the full ones! */
+
+	/* Clear a small bo */
+	kunit_info(test, "Clearing small buffer object\n");
+	xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size);
+	expected = 0x224488ff;
+	fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, expected);
+	if (sanity_fence_failed(xe, fence, "Clearing small bo", test))
+		goto out;
+
+	dma_fence_put(fence);
+	retval = xe_map_rd(xe, &tiny->vmap, 0, u32);
+	check(retval, expected, "Command clear small first value", test);
+	retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32);
+	check(retval, expected, "Command clear small last value", test);
+
+	if (IS_DGFX(xe)) {
+		kunit_info(test, "Copying small buffer object to system\n");
+		test_copy(m, tiny, test);
+	}
+
+	/* Clear a big bo with a fixed value */
+	kunit_info(test, "Clearing big buffer object\n");
+	xe_map_memset(xe, &big->vmap, 0, 0x11, big->size);
+	expected = 0x11223344U;
+	fence = xe_migrate_clear(m, big, big->ttm.resource, expected);
+	if (sanity_fence_failed(xe, fence, "Clearing big bo", test))
+		goto out;
+
+	dma_fence_put(fence);
+	retval = xe_map_rd(xe, &big->vmap, 0, u32);
+	check(retval, expected, "Command clear big first value", test);
+	retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32);
+	check(retval, expected, "Command clear big last value", test);
+
+	if (IS_DGFX(xe)) {
+		kunit_info(test, "Copying big buffer object to system\n");
+		test_copy(m, big, test);
+	}
+
+	test_pt_update(m, pt, test);
+
+out:
+	xe_bb_free(bb, NULL);
+free_tiny:
+	xe_bo_unpin(tiny);
+	xe_bo_put(tiny);
+free_pt:
+	xe_bo_unpin(pt);
+	xe_bo_put(pt);
+free_big:
+	xe_bo_unpin(big);
+	xe_bo_put(big);
+vunmap:
+	xe_bo_vunmap(m->pt_bo);
+}
+
+static int migrate_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_gt *gt;
+	int id;
+
+	for_each_gt(gt, xe, id) {
+		struct xe_migrate *m = gt->migrate;
+		struct ww_acquire_ctx ww;
+
+		kunit_info(test, "Testing gt id %d.\n", id);
+		xe_vm_lock(m->eng->vm, &ww, 0, true);
+		xe_migrate_sanity_test(m, test);
+		xe_vm_unlock(m->eng->vm, &ww);
+	}
+
+	return 0;
+}
+
+void xe_migrate_sanity_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(migrate_test_run_device);
+}
+EXPORT_SYMBOL(xe_migrate_sanity_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
new file mode 100644
index 000000000000..ad779e2bd071
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+void xe_migrate_sanity_kunit(struct kunit *test);
+
+static struct kunit_case xe_migrate_tests[] = {
+	KUNIT_CASE(xe_migrate_sanity_kunit),
+	{}
+};
+
+static struct kunit_suite xe_migrate_test_suite = {
+	.name = "xe_migrate",
+	.test_cases = xe_migrate_tests,
+};
+
+kunit_test_suite(xe_migrate_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/xe/tests/xe_test.h b/drivers/gpu/drm/xe/tests/xe_test.h
new file mode 100644
index 000000000000..1ec502b5acf3
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_test.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __XE_TEST_H__
+#define __XE_TEST_H__
+
+#include <linux/types.h>
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include <linux/sched.h>
+#include <kunit/test.h>
+
+/*
+ * Each test that provides a kunit private test structure, place a test id
+ * here and point the kunit->priv to an embedded struct xe_test_priv.
+ */
+enum xe_test_priv_id {
+	XE_TEST_LIVE_DMA_BUF,
+};
+
+/**
+ * struct xe_test_priv - Base class for test private info
+ * @id: enum xe_test_priv_id to identify the subclass.
+ */
+struct xe_test_priv {
+	enum xe_test_priv_id id;
+};
+
+#define XE_TEST_DECLARE(x) x
+#define XE_TEST_ONLY(x) unlikely(x)
+#define XE_TEST_EXPORT
+#define xe_cur_kunit() current->kunit_test
+
+/**
+ * xe_cur_kunit_priv - Obtain the struct xe_test_priv pointed to by
+ * current->kunit->priv if it exists and is embedded in the expected subclass.
+ * @id: Id of the expected subclass.
+ *
+ * Return: NULL if the process is not a kunit test, and NULL if the
+ * current kunit->priv pointer is not pointing to an object of the expected
+ * subclass. A pointer to the embedded struct xe_test_priv otherwise.
+ */
+static inline struct xe_test_priv *
+xe_cur_kunit_priv(enum xe_test_priv_id id)
+{
+	struct xe_test_priv *priv;
+
+	if (!xe_cur_kunit())
+		return NULL;
+
+	priv = xe_cur_kunit()->priv;
+	return priv->id == id ? priv : NULL;
+}
+
+#else /* if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) */
+
+#define XE_TEST_DECLARE(x)
+#define XE_TEST_ONLY(x) 0
+#define XE_TEST_EXPORT static
+#define xe_cur_kunit() NULL
+#define xe_cur_kunit_priv(_id) NULL
+
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
new file mode 100644
index 000000000000..8b9209571fd0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bb.h"
+#include "xe_sa.h"
+#include "xe_device.h"
+#include "xe_engine_types.h"
+#include "xe_hw_fence.h"
+#include "xe_sched_job.h"
+#include "xe_vm_types.h"
+
+#include "gt/intel_gpu_commands.h"
+
+struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
+{
+	struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
+	int err;
+
+	if (!bb)
+		return ERR_PTR(-ENOMEM);
+
+	bb->bo = xe_sa_bo_new(!usm ? &gt->kernel_bb_pool :
+			      &gt->usm.bb_pool, 4 * dwords + 4);
+	if (IS_ERR(bb->bo)) {
+		err = PTR_ERR(bb->bo);
+		goto err;
+	}
+
+	bb->cs = xe_sa_bo_cpu_addr(bb->bo);
+	bb->len = 0;
+
+	return bb;
+err:
+	kfree(bb);
+	return ERR_PTR(err);
+}
+
+static struct xe_sched_job *
+__xe_bb_create_job(struct xe_engine *kernel_eng, struct xe_bb *bb, u64 *addr)
+{
+	u32 size = drm_suballoc_size(bb->bo);
+
+	XE_BUG_ON((bb->len * 4 + 1) > size);
+
+	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+
+	xe_sa_bo_flush_write(bb->bo);
+
+	return xe_sched_job_create(kernel_eng, addr);
+}
+
+struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng,
+					 struct xe_bb *bb, u64 batch_base_ofs)
+{
+	u64 addr = batch_base_ofs + drm_suballoc_soffset(bb->bo);
+
+	XE_BUG_ON(!(wa_eng->vm->flags & XE_VM_FLAG_MIGRATION));
+
+	return __xe_bb_create_job(wa_eng, bb, &addr);
+}
+
+struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng,
+						struct xe_bb *bb,
+						u64 batch_base_ofs,
+						u32 second_idx)
+{
+	u64 addr[2] = {
+		batch_base_ofs + drm_suballoc_soffset(bb->bo),
+		batch_base_ofs + drm_suballoc_soffset(bb->bo) +
+		4 * second_idx,
+	};
+
+	BUG_ON(second_idx > bb->len);
+	BUG_ON(!(kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION));
+
+	return __xe_bb_create_job(kernel_eng, bb, addr);
+}
+
+struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng,
+				      struct xe_bb *bb)
+{
+	u64 addr = xe_sa_bo_gpu_addr(bb->bo);
+
+	BUG_ON(kernel_eng->vm && kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION);
+	return __xe_bb_create_job(kernel_eng, bb, &addr);
+}
+
+void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence)
+{
+	if (!bb)
+		return;
+
+	xe_sa_bo_free(bb->bo, fence);
+	kfree(bb);
+}
diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h
new file mode 100644
index 000000000000..0cc9260c9634
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BB_H_
+#define _XE_BB_H_
+
+#include "xe_bb_types.h"
+
+struct dma_fence;
+
+struct xe_gt;
+struct xe_engine;
+struct xe_sched_job;
+
+struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm);
+struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng,
+				      struct xe_bb *bb);
+struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng,
+						struct xe_bb *bb, u64 batch_ofs,
+						u32 second_idx);
+struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng,
+					 struct xe_bb *bb, u64 batch_ofs);
+void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bb_types.h b/drivers/gpu/drm/xe/xe_bb_types.h
new file mode 100644
index 000000000000..b7d30308cf90
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb_types.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BB_TYPES_H_
+#define _XE_BB_TYPES_H_
+
+#include <linux/types.h>
+
+struct drm_suballoc;
+
+struct xe_bb {
+	struct drm_suballoc *bo;
+
+	u32 *cs;
+	u32 len; /* in dwords */
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
new file mode 100644
index 000000000000..ef2c9196c113
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -0,0 +1,1698 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+
+#include "xe_bo.h"
+
+#include <linux/dma-buf.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/ttm/ttm_device.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_dma_buf.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_migrate.h"
+#include "xe_preempt_fence.h"
+#include "xe_res_cursor.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+static const struct ttm_place sys_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.mem_type = XE_PL_SYSTEM,
+	.flags = 0,
+};
+
+static struct ttm_placement sys_placement = {
+	.num_placement = 1,
+	.placement = &sys_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags,
+};
+
+bool mem_type_is_vram(u32 mem_type)
+{
+	return mem_type >= XE_PL_VRAM0;
+}
+
+static bool resource_is_vram(struct ttm_resource *res)
+{
+	return mem_type_is_vram(res->mem_type);
+}
+
+bool xe_bo_is_vram(struct xe_bo *bo)
+{
+	return resource_is_vram(bo->ttm.resource);
+}
+
+static bool xe_bo_is_user(struct xe_bo *bo)
+{
+	return bo->flags & XE_BO_CREATE_USER_BIT;
+}
+
+static struct xe_gt *
+mem_type_to_gt(struct xe_device *xe, u32 mem_type)
+{
+	XE_BUG_ON(!mem_type_is_vram(mem_type));
+
+	return xe_device_get_gt(xe, mem_type - XE_PL_VRAM0);
+}
+
+static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
+			   u32 bo_flags, u32 *c)
+{
+	if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
+		places[*c] = (struct ttm_place) {
+			.mem_type = XE_PL_TT,
+		};
+		*c += 1;
+
+		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
+			bo->props.preferred_mem_type = XE_PL_TT;
+	}
+}
+
+static void try_add_vram0(struct xe_device *xe, struct xe_bo *bo,
+			  struct ttm_place *places, u32 bo_flags, u32 *c)
+{
+	struct xe_gt *gt;
+
+	if (bo_flags & XE_BO_CREATE_VRAM0_BIT) {
+		gt = mem_type_to_gt(xe, XE_PL_VRAM0);
+		XE_BUG_ON(!gt->mem.vram.size);
+
+		places[*c] = (struct ttm_place) {
+			.mem_type = XE_PL_VRAM0,
+			/*
+			 * For eviction / restore on suspend / resume objects
+			 * pinned in VRAM must be contiguous
+			 */
+			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
+					     XE_BO_CREATE_GGTT_BIT) ?
+				TTM_PL_FLAG_CONTIGUOUS : 0,
+		};
+		*c += 1;
+
+		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
+			bo->props.preferred_mem_type = XE_PL_VRAM0;
+	}
+}
+
+static void try_add_vram1(struct xe_device *xe, struct xe_bo *bo,
+			  struct ttm_place *places, u32 bo_flags, u32 *c)
+{
+	struct xe_gt *gt;
+
+	if (bo_flags & XE_BO_CREATE_VRAM1_BIT) {
+		gt = mem_type_to_gt(xe, XE_PL_VRAM1);
+		XE_BUG_ON(!gt->mem.vram.size);
+
+		places[*c] = (struct ttm_place) {
+			.mem_type = XE_PL_VRAM1,
+			/*
+			 * For eviction / restore on suspend / resume objects
+			 * pinned in VRAM must be contiguous
+			 */
+			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
+					     XE_BO_CREATE_GGTT_BIT) ?
+				TTM_PL_FLAG_CONTIGUOUS : 0,
+		};
+		*c += 1;
+
+		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
+			bo->props.preferred_mem_type = XE_PL_VRAM1;
+	}
+}
+
+static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+				       u32 bo_flags)
+{
+	struct ttm_place *places = bo->placements;
+	u32 c = 0;
+
+	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
+
+	/* The order of placements should indicate preferred location */
+
+	if (bo->props.preferred_mem_class == XE_MEM_REGION_CLASS_SYSMEM) {
+		try_add_system(bo, places, bo_flags, &c);
+		if (bo->props.preferred_gt == XE_GT1) {
+			try_add_vram1(xe, bo, places, bo_flags, &c);
+			try_add_vram0(xe, bo, places, bo_flags, &c);
+		} else {
+			try_add_vram0(xe, bo, places, bo_flags, &c);
+			try_add_vram1(xe, bo, places, bo_flags, &c);
+		}
+	} else if (bo->props.preferred_gt == XE_GT1) {
+		try_add_vram1(xe, bo, places, bo_flags, &c);
+		try_add_vram0(xe, bo, places, bo_flags, &c);
+		try_add_system(bo, places, bo_flags, &c);
+	} else {
+		try_add_vram0(xe, bo, places, bo_flags, &c);
+		try_add_vram1(xe, bo, places, bo_flags, &c);
+		try_add_system(bo, places, bo_flags, &c);
+	}
+
+	if (!c)
+		return -EINVAL;
+
+	bo->placement = (struct ttm_placement) {
+		.num_placement = c,
+		.placement = places,
+		.num_busy_placement = c,
+		.busy_placement = places,
+	};
+
+	return 0;
+}
+
+int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+			      u32 bo_flags)
+{
+	xe_bo_assert_held(bo);
+	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
+}
+
+static void xe_evict_flags(struct ttm_buffer_object *tbo,
+			   struct ttm_placement *placement)
+{
+	struct xe_bo *bo;
+
+	if (!xe_bo_is_xe_bo(tbo)) {
+		/* Don't handle scatter gather BOs */
+		if (tbo->type == ttm_bo_type_sg) {
+			placement->num_placement = 0;
+			placement->num_busy_placement = 0;
+			return;
+		}
+
+		*placement = sys_placement;
+		return;
+	}
+
+	/*
+	 * For xe, sg bos that are evicted to system just triggers a
+	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
+	 */
+
+	bo = ttm_to_xe_bo(tbo);
+	switch (tbo->resource->mem_type) {
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+	case XE_PL_TT:
+	default:
+		/* for now kick out to system */
+		*placement = sys_placement;
+		break;
+	}
+}
+
+struct xe_ttm_tt {
+	struct ttm_tt ttm;
+	struct device *dev;
+	struct sg_table sgt;
+	struct sg_table *sg;
+};
+
+static int xe_tt_map_sg(struct ttm_tt *tt)
+{
+	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+	unsigned long num_pages = tt->num_pages;
+	int ret;
+
+	XE_BUG_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
+
+	if (xe_tt->sg)
+		return 0;
+
+	ret = sg_alloc_table_from_pages(&xe_tt->sgt, tt->pages, num_pages,
+					0, (u64)num_pages << PAGE_SHIFT,
+					GFP_KERNEL);
+	if (ret)
+		return ret;
+
+	xe_tt->sg = &xe_tt->sgt;
+	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
+			      DMA_ATTR_SKIP_CPU_SYNC);
+	if (ret) {
+		sg_free_table(xe_tt->sg);
+		xe_tt->sg = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+struct sg_table *xe_bo_get_sg(struct xe_bo *bo)
+{
+	struct ttm_tt *tt = bo->ttm.ttm;
+	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+	return xe_tt->sg;
+}
+
+static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
+				       u32 page_flags)
+{
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+	struct xe_device *xe = xe_bo_device(bo);
+	struct xe_ttm_tt *tt;
+	int err;
+
+	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
+	if (!tt)
+		return NULL;
+
+	tt->dev = xe->drm.dev;
+
+	/* TODO: Select caching mode */
+	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags,
+			  bo->flags & XE_BO_SCANOUT_BIT ? ttm_write_combined : ttm_cached,
+			  DIV_ROUND_UP(xe_device_ccs_bytes(xe_bo_device(bo),
+							   bo->ttm.base.size),
+				       PAGE_SIZE));
+	if (err) {
+		kfree(tt);
+		return NULL;
+	}
+
+	return &tt->ttm;
+}
+
+static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
+			      struct ttm_operation_ctx *ctx)
+{
+	int err;
+
+	/*
+	 * dma-bufs are not populated with pages, and the dma-
+	 * addresses are set up when moved to XE_PL_TT.
+	 */
+	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
+		return 0;
+
+	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
+	if (err)
+		return err;
+
+	/* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
+	err = xe_tt_map_sg(tt);
+	if (err)
+		ttm_pool_free(&ttm_dev->pool, tt);
+
+	return err;
+}
+
+static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
+{
+	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
+		return;
+
+	if (xe_tt->sg) {
+		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
+				  DMA_BIDIRECTIONAL, 0);
+		sg_free_table(xe_tt->sg);
+		xe_tt->sg = NULL;
+	}
+
+	return ttm_pool_free(&ttm_dev->pool, tt);
+}
+
+static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
+{
+	ttm_tt_fini(tt);
+	kfree(tt);
+}
+
+static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
+				 struct ttm_resource *mem)
+{
+	struct xe_device *xe = ttm_to_xe_device(bdev);
+	struct xe_gt *gt;
+
+	switch (mem->mem_type) {
+	case XE_PL_SYSTEM:
+	case XE_PL_TT:
+		return 0;
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+		gt = mem_type_to_gt(xe, mem->mem_type);
+		mem->bus.offset = mem->start << PAGE_SHIFT;
+
+		if (gt->mem.vram.mapping &&
+		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
+			mem->bus.addr = (u8 *)gt->mem.vram.mapping +
+				mem->bus.offset;
+
+		mem->bus.offset += gt->mem.vram.io_start;
+		mem->bus.is_iomem = true;
+
+#if  !defined(CONFIG_X86)
+		mem->bus.caching = ttm_write_combined;
+#endif
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
+				const struct ttm_operation_ctx *ctx)
+{
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	struct xe_vma *vma;
+	int ret = 0;
+
+	dma_resv_assert_held(bo->ttm.base.resv);
+
+	if (!xe_device_in_fault_mode(xe) && !list_empty(&bo->vmas)) {
+		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
+				    DMA_RESV_USAGE_BOOKKEEP);
+		dma_resv_for_each_fence_unlocked(&cursor, fence)
+			dma_fence_enable_sw_signaling(fence);
+		dma_resv_iter_end(&cursor);
+	}
+
+	list_for_each_entry(vma, &bo->vmas, bo_link) {
+		struct xe_vm *vm = vma->vm;
+
+		trace_xe_vma_evict(vma);
+
+		if (xe_vm_in_fault_mode(vm)) {
+			/* Wait for pending binds / unbinds. */
+			long timeout;
+
+			if (ctx->no_wait_gpu &&
+			    !dma_resv_test_signaled(bo->ttm.base.resv,
+						    DMA_RESV_USAGE_BOOKKEEP))
+				return -EBUSY;
+
+			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+							DMA_RESV_USAGE_BOOKKEEP,
+							ctx->interruptible,
+							MAX_SCHEDULE_TIMEOUT);
+			if (timeout > 0) {
+				ret = xe_vm_invalidate_vma(vma);
+				XE_WARN_ON(ret);
+			} else if (!timeout) {
+				ret = -ETIME;
+			} else {
+				ret = timeout;
+			}
+
+		} else {
+			bool vm_resv_locked = false;
+			struct xe_vm *vm = vma->vm;
+
+			/*
+			 * We need to put the vma on the vm's rebind_list,
+			 * but need the vm resv to do so. If we can't verify
+			 * that we indeed have it locked, put the vma an the
+			 * vm's notifier.rebind_list instead and scoop later.
+			 */
+			if (dma_resv_trylock(&vm->resv))
+				vm_resv_locked = true;
+			else if (ctx->resv != &vm->resv) {
+				spin_lock(&vm->notifier.list_lock);
+				list_move_tail(&vma->notifier.rebind_link,
+					       &vm->notifier.rebind_list);
+				spin_unlock(&vm->notifier.list_lock);
+				continue;
+			}
+
+			xe_vm_assert_held(vm);
+			if (list_empty(&vma->rebind_link) && vma->gt_present)
+				list_add_tail(&vma->rebind_link, &vm->rebind_list);
+
+			if (vm_resv_locked)
+				dma_resv_unlock(&vm->resv);
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
+ * Note that unmapping the attachment is deferred to the next
+ * map_attachment time, or to bo destroy (after idling) whichever comes first.
+ * This is to avoid syncing before unmap_attachment(), assuming that the
+ * caller relies on idling the reservation object before moving the
+ * backing store out. Should that assumption not hold, then we will be able
+ * to unconditionally call unmap_attachment() when moving out to system.
+ */
+static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
+			     struct ttm_resource *old_res,
+			     struct ttm_resource *new_res)
+{
+	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
+	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
+					       ttm);
+	struct sg_table *sg;
+
+	XE_BUG_ON(!attach);
+	XE_BUG_ON(!ttm_bo->ttm);
+
+	if (new_res->mem_type == XE_PL_SYSTEM)
+		goto out;
+
+	if (ttm_bo->sg) {
+		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
+		ttm_bo->sg = NULL;
+	}
+
+	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(sg))
+		return PTR_ERR(sg);
+
+	ttm_bo->sg = sg;
+	xe_tt->sg = sg;
+
+out:
+	ttm_bo_move_null(ttm_bo, new_res);
+
+	return 0;
+}
+
+/**
+ * xe_bo_move_notify - Notify subsystems of a pending move
+ * @bo: The buffer object
+ * @ctx: The struct ttm_operation_ctx controlling locking and waits.
+ *
+ * This function notifies subsystems of an upcoming buffer move.
+ * Upon receiving such a notification, subsystems should schedule
+ * halting access to the underlying pages and optionally add a fence
+ * to the buffer object's dma_resv object, that signals when access is
+ * stopped. The caller will wait on all dma_resv fences before
+ * starting the move.
+ *
+ * A subsystem may commence access to the object after obtaining
+ * bindings to the new backing memory under the object lock.
+ *
+ * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
+ * negative error code on error.
+ */
+static int xe_bo_move_notify(struct xe_bo *bo,
+			     const struct ttm_operation_ctx *ctx)
+{
+	struct ttm_buffer_object *ttm_bo = &bo->ttm;
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	int ret;
+
+	/*
+	 * If this starts to call into many components, consider
+	 * using a notification chain here.
+	 */
+
+	if (xe_bo_is_pinned(bo))
+		return -EINVAL;
+
+	xe_bo_vunmap(bo);
+	ret = xe_bo_trigger_rebind(xe, bo, ctx);
+	if (ret)
+		return ret;
+
+	/* Don't call move_notify() for imported dma-bufs. */
+	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
+		dma_buf_move_notify(ttm_bo->base.dma_buf);
+
+	return 0;
+}
+
+static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
+		      struct ttm_operation_ctx *ctx,
+		      struct ttm_resource *new_mem,
+		      struct ttm_place *hop)
+{
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+	struct ttm_resource *old_mem = ttm_bo->resource;
+	struct ttm_tt *ttm = ttm_bo->ttm;
+	struct xe_gt *gt = NULL;
+	struct dma_fence *fence;
+	bool move_lacks_source;
+	bool needs_clear;
+	int ret = 0;
+
+	if (!old_mem) {
+		if (new_mem->mem_type != TTM_PL_SYSTEM) {
+			hop->mem_type = TTM_PL_SYSTEM;
+			hop->flags = TTM_PL_FLAG_TEMPORARY;
+			ret = -EMULTIHOP;
+			goto out;
+		}
+
+		ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
+	}
+
+	if (ttm_bo->type == ttm_bo_type_sg) {
+		ret = xe_bo_move_notify(bo, ctx);
+		if (!ret)
+			ret = xe_bo_move_dmabuf(ttm_bo, old_mem, new_mem);
+		goto out;
+	}
+
+	move_lacks_source = !resource_is_vram(old_mem) &&
+		(!ttm || !ttm_tt_is_populated(ttm));
+
+	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
+		(!ttm && ttm_bo->type == ttm_bo_type_device);
+
+	if ((move_lacks_source && !needs_clear) ||
+	    (old_mem->mem_type == XE_PL_SYSTEM &&
+	     new_mem->mem_type == XE_PL_TT)) {
+		ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
+	}
+
+	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
+		ret = xe_bo_move_notify(bo, ctx);
+		if (ret)
+			goto out;
+	}
+
+	if (old_mem->mem_type == XE_PL_TT &&
+	    new_mem->mem_type == XE_PL_SYSTEM) {
+		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
+						     DMA_RESV_USAGE_BOOKKEEP,
+						     true,
+						     MAX_SCHEDULE_TIMEOUT);
+		if (timeout < 0) {
+			ret = timeout;
+			goto out;
+		}
+		ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
+	}
+
+	if (!move_lacks_source &&
+	    ((old_mem->mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
+	     (resource_is_vram(old_mem) &&
+	      new_mem->mem_type == XE_PL_SYSTEM))) {
+		hop->fpfn = 0;
+		hop->lpfn = 0;
+		hop->mem_type = XE_PL_TT;
+		hop->flags = TTM_PL_FLAG_TEMPORARY;
+		ret = -EMULTIHOP;
+		goto out;
+	}
+
+	if (bo->gt)
+		gt = bo->gt;
+	else if (resource_is_vram(new_mem))
+		gt = mem_type_to_gt(xe, new_mem->mem_type);
+	else if (resource_is_vram(old_mem))
+		gt = mem_type_to_gt(xe, old_mem->mem_type);
+
+	XE_BUG_ON(!gt);
+	XE_BUG_ON(!gt->migrate);
+
+	trace_xe_bo_move(bo);
+	xe_device_mem_access_get(xe);
+
+	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
+		/*
+		 * Kernel memory that is pinned should only be moved on suspend
+		 * / resume, some of the pinned memory is required for the
+		 * device to resume / use the GPU to move other evicted memory
+		 * (user memory) around. This likely could be optimized a bit
+		 * futher where we find the minimum set of pinned memory
+		 * required for resume but for simplity doing a memcpy for all
+		 * pinned memory.
+		 */
+		ret = xe_bo_vmap(bo);
+		if (!ret) {
+			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
+
+			/* Create a new VMAP once kernel BO back in VRAM */
+			if (!ret && resource_is_vram(new_mem)) {
+				void *new_addr = gt->mem.vram.mapping +
+					(new_mem->start << PAGE_SHIFT);
+
+				XE_BUG_ON(new_mem->start !=
+					  bo->placements->fpfn);
+
+				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
+			}
+		}
+	} else {
+		if (move_lacks_source)
+			fence = xe_migrate_clear(gt->migrate, bo, new_mem, 0);
+		else
+			fence = xe_migrate_copy(gt->migrate, bo, old_mem, new_mem);
+		if (IS_ERR(fence)) {
+			ret = PTR_ERR(fence);
+			xe_device_mem_access_put(xe);
+			goto out;
+		}
+		ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true,
+						new_mem);
+		dma_fence_put(fence);
+	}
+
+	xe_device_mem_access_put(xe);
+	trace_printk("new_mem->mem_type=%d\n", new_mem->mem_type);
+
+out:
+	return ret;
+
+}
+
+static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
+				       unsigned long page_offset)
+{
+	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
+	struct xe_gt *gt = mem_type_to_gt(xe, bo->resource->mem_type);
+	struct xe_res_cursor cursor;
+
+	xe_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
+	return (gt->mem.vram.io_start + cursor.start) >> PAGE_SHIFT;
+}
+
+static void __xe_bo_vunmap(struct xe_bo *bo);
+
+/*
+ * TODO: Move this function to TTM so we don't rely on how TTM does its
+ * locking, thereby abusing TTM internals.
+ */
+static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
+{
+	bool locked;
+
+	XE_WARN_ON(kref_read(&ttm_bo->kref));
+
+	/*
+	 * We can typically only race with TTM trylocking under the
+	 * lru_lock, which will immediately be unlocked again since
+	 * the ttm_bo refcount is zero at this point. So trylocking *should*
+	 * always succeed here, as long as we hold the lru lock.
+	 */
+	spin_lock(&ttm_bo->bdev->lru_lock);
+	locked = dma_resv_trylock(ttm_bo->base.resv);
+	spin_unlock(&ttm_bo->bdev->lru_lock);
+	XE_WARN_ON(!locked);
+
+	return locked;
+}
+
+static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
+{
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	struct dma_fence *replacement = NULL;
+	struct xe_bo *bo;
+
+	if (!xe_bo_is_xe_bo(ttm_bo))
+		return;
+
+	bo = ttm_to_xe_bo(ttm_bo);
+	XE_WARN_ON(bo->created && kref_read(&ttm_bo->base.refcount));
+
+	/*
+	 * Corner case where TTM fails to allocate memory and this BOs resv
+	 * still points the VMs resv
+	 */
+	if (ttm_bo->base.resv != &ttm_bo->base._resv)
+		return;
+
+	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
+		return;
+
+	/*
+	 * Scrub the preempt fences if any. The unbind fence is already
+	 * attached to the resv.
+	 * TODO: Don't do this for external bos once we scrub them after
+	 * unbind.
+	 */
+	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
+				DMA_RESV_USAGE_BOOKKEEP, fence) {
+		if (xe_fence_is_xe_preempt(fence) &&
+		    !dma_fence_is_signaled(fence)) {
+			if (!replacement)
+				replacement = dma_fence_get_stub();
+
+			dma_resv_replace_fences(ttm_bo->base.resv,
+						fence->context,
+						replacement,
+						DMA_RESV_USAGE_BOOKKEEP);
+		}
+	}
+	dma_fence_put(replacement);
+
+	dma_resv_unlock(ttm_bo->base.resv);
+}
+
+static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
+{
+	if (!xe_bo_is_xe_bo(ttm_bo))
+		return;
+
+	/*
+	 * Object is idle and about to be destroyed. Release the
+	 * dma-buf attachment.
+	 */
+	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
+		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
+						       struct xe_ttm_tt, ttm);
+
+		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
+					 DMA_BIDIRECTIONAL);
+		ttm_bo->sg = NULL;
+		xe_tt->sg = NULL;
+	}
+}
+
+struct ttm_device_funcs xe_ttm_funcs = {
+	.ttm_tt_create = xe_ttm_tt_create,
+	.ttm_tt_populate = xe_ttm_tt_populate,
+	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
+	.ttm_tt_destroy = xe_ttm_tt_destroy,
+	.evict_flags = xe_evict_flags,
+	.move = xe_bo_move,
+	.io_mem_reserve = xe_ttm_io_mem_reserve,
+	.io_mem_pfn = xe_ttm_io_mem_pfn,
+	.release_notify = xe_ttm_bo_release_notify,
+	.eviction_valuable = ttm_bo_eviction_valuable,
+	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
+};
+
+static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
+{
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+
+	if (bo->ttm.base.import_attach)
+		drm_prime_gem_destroy(&bo->ttm.base, NULL);
+	drm_gem_object_release(&bo->ttm.base);
+
+	WARN_ON(!list_empty(&bo->vmas));
+
+	if (bo->ggtt_node.size)
+		xe_ggtt_remove_bo(bo->gt->mem.ggtt, bo);
+
+	if (bo->vm && xe_bo_is_user(bo))
+		xe_vm_put(bo->vm);
+
+	kfree(bo);
+}
+
+static void xe_gem_object_free(struct drm_gem_object *obj)
+{
+	/* Our BO reference counting scheme works as follows:
+	 *
+	 * The gem object kref is typically used throughout the driver,
+	 * and the gem object holds a ttm_buffer_object refcount, so
+	 * that when the last gem object reference is put, which is when
+	 * we end up in this function, we put also that ttm_buffer_object
+	 * refcount. Anything using gem interfaces is then no longer
+	 * allowed to access the object in a way that requires a gem
+	 * refcount, including locking the object.
+	 *
+	 * driver ttm callbacks is allowed to use the ttm_buffer_object
+	 * refcount directly if needed.
+	 */
+	__xe_bo_vunmap(gem_to_xe_bo(obj));
+	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
+}
+
+static bool should_migrate_to_system(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+
+	return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic;
+}
+
+static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
+{
+	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
+	struct drm_device *ddev = tbo->base.dev;
+	vm_fault_t ret;
+	int idx, r = 0;
+
+	ret = ttm_bo_vm_reserve(tbo, vmf);
+	if (ret)
+		return ret;
+
+	if (drm_dev_enter(ddev, &idx)) {
+		struct xe_bo *bo = ttm_to_xe_bo(tbo);
+
+		trace_xe_bo_cpu_fault(bo);
+
+		if (should_migrate_to_system(bo)) {
+			r = xe_bo_migrate(bo, XE_PL_TT);
+			if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
+				ret = VM_FAULT_NOPAGE;
+			else if (r)
+				ret = VM_FAULT_SIGBUS;
+		}
+		if (!ret)
+			ret = ttm_bo_vm_fault_reserved(vmf,
+						       vmf->vma->vm_page_prot,
+						       TTM_BO_VM_NUM_PREFAULT);
+
+		drm_dev_exit(idx);
+	} else {
+		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+	}
+	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+		return ret;
+
+	dma_resv_unlock(tbo->base.resv);
+	return ret;
+}
+
+static const struct vm_operations_struct xe_gem_vm_ops = {
+	.fault = xe_gem_fault,
+	.open = ttm_bo_vm_open,
+	.close = ttm_bo_vm_close,
+	.access = ttm_bo_vm_access
+};
+
+static const struct drm_gem_object_funcs xe_gem_object_funcs = {
+	.free = xe_gem_object_free,
+	.mmap = drm_gem_ttm_mmap,
+	.export = xe_gem_prime_export,
+	.vm_ops = &xe_gem_vm_ops,
+};
+
+/**
+ * xe_bo_alloc - Allocate storage for a struct xe_bo
+ *
+ * This funcition is intended to allocate storage to be used for input
+ * to __xe_bo_create_locked(), in the case a pointer to the bo to be
+ * created is needed before the call to __xe_bo_create_locked().
+ * If __xe_bo_create_locked ends up never to be called, then the
+ * storage allocated with this function needs to be freed using
+ * xe_bo_free().
+ *
+ * Return: A pointer to an uninitialized struct xe_bo on success,
+ * ERR_PTR(-ENOMEM) on error.
+ */
+struct xe_bo *xe_bo_alloc(void)
+{
+	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+
+	return bo;
+}
+
+/**
+ * xe_bo_free - Free storage allocated using xe_bo_alloc()
+ * @bo: The buffer object storage.
+ *
+ * Refer to xe_bo_alloc() documentation for valid use-cases.
+ */
+void xe_bo_free(struct xe_bo *bo)
+{
+	kfree(bo);
+}
+
+struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
+				    struct xe_gt *gt, struct dma_resv *resv,
+				    size_t size, enum ttm_bo_type type,
+				    u32 flags)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+	struct ttm_placement *placement;
+	uint32_t alignment;
+	int err;
+
+	/* Only kernel objects should set GT */
+	XE_BUG_ON(gt && type != ttm_bo_type_kernel);
+
+	if (!bo) {
+		bo = xe_bo_alloc();
+		if (IS_ERR(bo))
+			return bo;
+	}
+
+	if (flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT) &&
+	    !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
+	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
+		size = ALIGN(size, SZ_64K);
+		flags |= XE_BO_INTERNAL_64K;
+		alignment = SZ_64K >> PAGE_SHIFT;
+	} else {
+		alignment = SZ_4K >> PAGE_SHIFT;
+	}
+
+	bo->gt = gt;
+	bo->size = size;
+	bo->flags = flags;
+	bo->ttm.base.funcs = &xe_gem_object_funcs;
+	bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
+	bo->props.preferred_gt = XE_BO_PROPS_INVALID;
+	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
+	bo->ttm.priority = DRM_XE_VMA_PRIORITY_NORMAL;
+	INIT_LIST_HEAD(&bo->vmas);
+	INIT_LIST_HEAD(&bo->pinned_link);
+
+	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
+
+	if (resv) {
+		ctx.allow_res_evict = true;
+		ctx.resv = resv;
+	}
+
+	err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
+	if (WARN_ON(err))
+		return ERR_PTR(err);
+
+	/* Defer populating type_sg bos */
+	placement = (type == ttm_bo_type_sg ||
+		     bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement :
+		&bo->placement;
+	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
+				   placement, alignment,
+				   &ctx, NULL, resv, xe_ttm_bo_destroy);
+	if (err)
+		return ERR_PTR(err);
+
+	bo->created = true;
+	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+	return bo;
+}
+
+struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt,
+				  struct xe_vm *vm, size_t size,
+				  enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo;
+	int err;
+
+	if (vm)
+		xe_vm_assert_held(vm);
+	bo = __xe_bo_create_locked(xe, NULL, gt, vm ? &vm->resv : NULL, size,
+				   type, flags);
+	if (IS_ERR(bo))
+		return bo;
+
+	if (vm && xe_bo_is_user(bo))
+		xe_vm_get(vm);
+	bo->vm = vm;
+
+	if (flags & XE_BO_CREATE_GGTT_BIT) {
+		XE_BUG_ON(!gt);
+
+		err = xe_ggtt_insert_bo(gt->mem.ggtt, bo);
+		if (err)
+			goto err_unlock_put_bo;
+	}
+
+	return bo;
+
+err_unlock_put_bo:
+	xe_bo_unlock_vm_held(bo);
+	xe_bo_put(bo);
+	return ERR_PTR(err);
+}
+
+struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt,
+			   struct xe_vm *vm, size_t size,
+			   enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags);
+
+	if (!IS_ERR(bo))
+		xe_bo_unlock_vm_held(bo);
+
+	return bo;
+}
+
+struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt,
+				   struct xe_vm *vm, size_t size,
+				   enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags);
+	int err;
+
+	if (IS_ERR(bo))
+		return bo;
+
+	err = xe_bo_pin(bo);
+	if (err)
+		goto err_put;
+
+	err = xe_bo_vmap(bo);
+	if (err)
+		goto err_unpin;
+
+	xe_bo_unlock_vm_held(bo);
+
+	return bo;
+
+err_unpin:
+	xe_bo_unpin(bo);
+err_put:
+	xe_bo_unlock_vm_held(bo);
+	xe_bo_put(bo);
+	return ERR_PTR(err);
+}
+
+struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
+				     const void *data, size_t size,
+				     enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo = xe_bo_create_pin_map(xe, gt, NULL,
+						ALIGN(size, PAGE_SIZE),
+						type, flags);
+	if (IS_ERR(bo))
+		return bo;
+
+	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
+
+	return bo;
+}
+
+/*
+ * XXX: This is in the VM bind data path, likely should calculate this once and
+ * store, with a recalculation if the BO is moved.
+ */
+static uint64_t vram_region_io_offset(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	struct xe_gt *gt = mem_type_to_gt(xe, bo->ttm.resource->mem_type);
+
+	return gt->mem.vram.io_start - xe->mem.vram.io_start;
+}
+
+/**
+ * xe_bo_pin_external - pin an external BO
+ * @bo: buffer object to be pinned
+ *
+ * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
+ * BO. Unique call compared to xe_bo_pin as this function has it own set of
+ * asserts and code to ensure evict / restore on suspend / resume.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_bo_pin_external(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	int err;
+
+	XE_BUG_ON(bo->vm);
+	XE_BUG_ON(!xe_bo_is_user(bo));
+
+	if (!xe_bo_is_pinned(bo)) {
+		err = xe_bo_validate(bo, NULL, false);
+		if (err)
+			return err;
+
+		if (xe_bo_is_vram(bo)) {
+			spin_lock(&xe->pinned.lock);
+			list_add_tail(&bo->pinned_link,
+				      &xe->pinned.external_vram);
+			spin_unlock(&xe->pinned.lock);
+		}
+	}
+
+	ttm_bo_pin(&bo->ttm);
+
+	/*
+	 * FIXME: If we always use the reserve / unreserve functions for locking
+	 * we do not need this.
+	 */
+	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+	return 0;
+}
+
+int xe_bo_pin(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	int err;
+
+	/* We currently don't expect user BO to be pinned */
+	XE_BUG_ON(xe_bo_is_user(bo));
+
+	/* Pinned object must be in GGTT or have pinned flag */
+	XE_BUG_ON(!(bo->flags & (XE_BO_CREATE_PINNED_BIT |
+				 XE_BO_CREATE_GGTT_BIT)));
+
+	/*
+	 * No reason we can't support pinning imported dma-bufs we just don't
+	 * expect to pin an imported dma-buf.
+	 */
+	XE_BUG_ON(bo->ttm.base.import_attach);
+
+	/* We only expect at most 1 pin */
+	XE_BUG_ON(xe_bo_is_pinned(bo));
+
+	err = xe_bo_validate(bo, NULL, false);
+	if (err)
+		return err;
+
+	/*
+	 * For pinned objects in on DGFX, we expect these objects to be in
+	 * contiguous VRAM memory. Required eviction / restore during suspend /
+	 * resume (force restore to same physical address).
+	 */
+	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
+	    bo->flags & XE_BO_INTERNAL_TEST)) {
+		struct ttm_place *place = &(bo->placements[0]);
+		bool lmem;
+
+		XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS));
+		XE_BUG_ON(!mem_type_is_vram(place->mem_type));
+
+		place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &lmem) -
+			       vram_region_io_offset(bo)) >> PAGE_SHIFT;
+		place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
+
+		spin_lock(&xe->pinned.lock);
+		list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+		spin_unlock(&xe->pinned.lock);
+	}
+
+	ttm_bo_pin(&bo->ttm);
+
+	/*
+	 * FIXME: If we always use the reserve / unreserve functions for locking
+	 * we do not need this.
+	 */
+	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+	return 0;
+}
+
+/**
+ * xe_bo_unpin_external - unpin an external BO
+ * @bo: buffer object to be unpinned
+ *
+ * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
+ * BO. Unique call compared to xe_bo_unpin as this function has it own set of
+ * asserts and code to ensure evict / restore on suspend / resume.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+void xe_bo_unpin_external(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+
+	XE_BUG_ON(bo->vm);
+	XE_BUG_ON(!xe_bo_is_pinned(bo));
+	XE_BUG_ON(!xe_bo_is_user(bo));
+
+	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
+		spin_lock(&xe->pinned.lock);
+		list_del_init(&bo->pinned_link);
+		spin_unlock(&xe->pinned.lock);
+	}
+
+	ttm_bo_unpin(&bo->ttm);
+
+	/*
+	 * FIXME: If we always use the reserve / unreserve functions for locking
+	 * we do not need this.
+	 */
+	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+}
+
+void xe_bo_unpin(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+
+	XE_BUG_ON(bo->ttm.base.import_attach);
+	XE_BUG_ON(!xe_bo_is_pinned(bo));
+
+	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
+	    bo->flags & XE_BO_INTERNAL_TEST)) {
+		XE_BUG_ON(list_empty(&bo->pinned_link));
+
+		spin_lock(&xe->pinned.lock);
+		list_del_init(&bo->pinned_link);
+		spin_unlock(&xe->pinned.lock);
+	}
+
+	ttm_bo_unpin(&bo->ttm);
+}
+
+/**
+ * xe_bo_validate() - Make sure the bo is in an allowed placement
+ * @bo: The bo,
+ * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
+ *      NULL. Used together with @allow_res_evict.
+ * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
+ *                   reservation object.
+ *
+ * Make sure the bo is in allowed placement, migrating it if necessary. If
+ * needed, other bos will be evicted. If bos selected for eviction shares
+ * the @vm's reservation object, they can be evicted iff @allow_res_evict is
+ * set to true, otherwise they will be bypassed.
+ *
+ * Return: 0 on success, negative error code on failure. May return
+ * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
+ */
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+
+	if (vm) {
+		lockdep_assert_held(&vm->lock);
+		xe_vm_assert_held(vm);
+
+		ctx.allow_res_evict = allow_res_evict;
+		ctx.resv = &vm->resv;
+	}
+
+	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
+}
+
+bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
+{
+	if (bo->destroy == &xe_ttm_bo_destroy)
+		return true;
+
+	return false;
+}
+
+dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
+		      size_t page_size, bool *is_lmem)
+{
+	struct xe_res_cursor cur;
+	u64 page;
+
+	if (!READ_ONCE(bo->ttm.pin_count))
+		xe_bo_assert_held(bo);
+
+	XE_BUG_ON(page_size > PAGE_SIZE);
+	page = offset >> PAGE_SHIFT;
+	offset &= (PAGE_SIZE - 1);
+
+	*is_lmem = xe_bo_is_vram(bo);
+
+	if (!*is_lmem) {
+		XE_BUG_ON(!bo->ttm.ttm);
+
+		xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT,
+				page_size, &cur);
+		return xe_res_dma(&cur) + offset;
+	} else {
+		struct xe_res_cursor cur;
+
+		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
+			     page_size, &cur);
+		return cur.start + offset + vram_region_io_offset(bo);
+	}
+}
+
+int xe_bo_vmap(struct xe_bo *bo)
+{
+	void *virtual;
+	bool is_iomem;
+	int ret;
+
+	xe_bo_assert_held(bo);
+
+	if (!iosys_map_is_null(&bo->vmap))
+		return 0;
+
+	/*
+	 * We use this more or less deprecated interface for now since
+	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
+	 * single page bos, which is done here.
+	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
+	 * to use struct iosys_map.
+	 */
+	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
+	if (ret)
+		return ret;
+
+	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
+	if (is_iomem)
+		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
+	else
+		iosys_map_set_vaddr(&bo->vmap, virtual);
+
+	return 0;
+}
+
+static void __xe_bo_vunmap(struct xe_bo *bo)
+{
+	if (!iosys_map_is_null(&bo->vmap)) {
+		iosys_map_clear(&bo->vmap);
+		ttm_bo_kunmap(&bo->kmap);
+	}
+}
+
+void xe_bo_vunmap(struct xe_bo *bo)
+{
+	xe_bo_assert_held(bo);
+	__xe_bo_vunmap(bo);
+}
+
+int xe_gem_create_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_gem_create *args = data;
+	struct ww_acquire_ctx ww;
+	struct xe_vm *vm = NULL;
+	struct xe_bo *bo;
+	unsigned bo_flags = XE_BO_CREATE_USER_BIT;
+	u32 handle;
+	int err;
+
+	if (XE_IOCTL_ERR(xe, args->extensions))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags &
+			 ~(XE_GEM_CREATE_FLAG_DEFER_BACKING |
+			   XE_GEM_CREATE_FLAG_SCANOUT |
+			   xe->info.mem_region_mask)))
+		return -EINVAL;
+
+	/* at least one memory type must be specified */
+	if (XE_IOCTL_ERR(xe, !(args->flags & xe->info.mem_region_mask)))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->handle))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->size > SIZE_MAX))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->size & ~PAGE_MASK))
+		return -EINVAL;
+
+	if (args->vm_id) {
+		vm = xe_vm_lookup(xef, args->vm_id);
+		if (XE_IOCTL_ERR(xe, !vm))
+			return -ENOENT;
+		err = xe_vm_lock(vm, &ww, 0, true);
+		if (err) {
+			xe_vm_put(vm);
+			return err;
+		}
+	}
+
+	if (args->flags & XE_GEM_CREATE_FLAG_DEFER_BACKING)
+		bo_flags |= XE_BO_DEFER_BACKING;
+
+	if (args->flags & XE_GEM_CREATE_FLAG_SCANOUT)
+		bo_flags |= XE_BO_SCANOUT_BIT;
+
+	bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
+	bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device,
+			  bo_flags);
+	if (vm) {
+		xe_vm_unlock(vm, &ww);
+		xe_vm_put(vm);
+	}
+
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
+	xe_bo_put(bo);
+	if (err)
+		return err;
+
+	args->handle = handle;
+
+	return 0;
+}
+
+int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct drm_xe_gem_mmap_offset *args = data;
+	struct drm_gem_object *gem_obj;
+
+	if (XE_IOCTL_ERR(xe, args->extensions))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags))
+		return -EINVAL;
+
+	gem_obj = drm_gem_object_lookup(file, args->handle);
+	if (XE_IOCTL_ERR(xe, !gem_obj))
+		return -ENOENT;
+
+	/* The mmap offset was set up at BO allocation time. */
+	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+
+	xe_bo_put(gem_to_xe_bo(gem_obj));
+	return 0;
+}
+
+int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww,
+	       int num_resv, bool intr)
+{
+	struct ttm_validate_buffer tv_bo;
+	LIST_HEAD(objs);
+	LIST_HEAD(dups);
+
+	XE_BUG_ON(!ww);
+
+	tv_bo.num_shared = num_resv;
+	tv_bo.bo = &bo->ttm;;
+	list_add_tail(&tv_bo.head, &objs);
+
+	return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
+}
+
+void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww)
+{
+	dma_resv_unlock(bo->ttm.base.resv);
+	ww_acquire_fini(ww);
+}
+
+/**
+ * xe_bo_can_migrate - Whether a buffer object likely can be migrated
+ * @bo: The buffer object to migrate
+ * @mem_type: The TTM memory type intended to migrate to
+ *
+ * Check whether the buffer object supports migration to the
+ * given memory type. Note that pinning may affect the ability to migrate as
+ * returned by this function.
+ *
+ * This function is primarily intended as a helper for checking the
+ * possibility to migrate buffer objects and can be called without
+ * the object lock held.
+ *
+ * Return: true if migration is possible, false otherwise.
+ */
+bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
+{
+	unsigned int cur_place;
+
+	if (bo->ttm.type == ttm_bo_type_kernel)
+		return true;
+
+	if (bo->ttm.type == ttm_bo_type_sg)
+		return false;
+
+	for (cur_place = 0; cur_place < bo->placement.num_placement;
+	     cur_place++) {
+		if (bo->placements[cur_place].mem_type == mem_type)
+			return true;
+	}
+
+	return false;
+}
+
+static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
+{
+	memset(place, 0, sizeof(*place));
+	place->mem_type = mem_type;
+}
+
+/**
+ * xe_bo_migrate - Migrate an object to the desired region id
+ * @bo: The buffer object to migrate.
+ * @mem_type: The TTM region type to migrate to.
+ *
+ * Attempt to migrate the buffer object to the desired memory region. The
+ * buffer object may not be pinned, and must be locked.
+ * On successful completion, the object memory type will be updated,
+ * but an async migration task may not have completed yet, and to
+ * accomplish that, the object's kernel fences must be signaled with
+ * the object lock held.
+ *
+ * Return: 0 on success. Negative error code on failure. In particular may
+ * return -EINTR or -ERESTARTSYS if signal pending.
+ */
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+	struct ttm_placement placement;
+	struct ttm_place requested;
+
+	xe_bo_assert_held(bo);
+
+	if (bo->ttm.resource->mem_type == mem_type)
+		return 0;
+
+	if (xe_bo_is_pinned(bo))
+		return -EBUSY;
+
+	if (!xe_bo_can_migrate(bo, mem_type))
+		return -EINVAL;
+
+	xe_place_from_ttm_type(mem_type, &requested);
+	placement.num_placement = 1;
+	placement.num_busy_placement = 1;
+	placement.placement = &requested;
+	placement.busy_placement = &requested;
+
+	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
+}
+
+/**
+ * xe_bo_evict - Evict an object to evict placement
+ * @bo: The buffer object to migrate.
+ * @force_alloc: Set force_alloc in ttm_operation_ctx
+ *
+ * On successful completion, the object memory will be moved to evict
+ * placement. Ths function blocks until the object has been fully moved.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false,
+		.force_alloc = force_alloc,
+	};
+	struct ttm_placement placement;
+	int ret;
+
+	xe_evict_flags(&bo->ttm, &placement);
+	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
+	if (ret)
+		return ret;
+
+	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+			      false, MAX_SCHEDULE_TIMEOUT);
+
+	return 0;
+}
+
+/**
+ * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
+ * placed in system memory.
+ * @bo: The xe_bo
+ *
+ * If a bo has an allowable placement in XE_PL_TT memory, it can't use
+ * flat CCS compression, because the GPU then has no way to access the
+ * CCS metadata using relevant commands. For the opposite case, we need to
+ * allocate storage for the CCS metadata when the BO is not resident in
+ * VRAM memory.
+ *
+ * Return: true if extra pages need to be allocated, false otherwise.
+ */
+bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
+{
+	return bo->ttm.type == ttm_bo_type_device &&
+		!(bo->flags & XE_BO_CREATE_SYSTEM_BIT) &&
+		(bo->flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT));
+}
+
+/**
+ * __xe_bo_release_dummy() - Dummy kref release function
+ * @kref: The embedded struct kref.
+ *
+ * Dummy release function for xe_bo_put_deferred(). Keep off.
+ */
+void __xe_bo_release_dummy(struct kref *kref)
+{
+}
+
+/**
+ * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
+ * @deferred: The lockless list used for the call to xe_bo_put_deferred().
+ *
+ * Puts all bos whose put was deferred by xe_bo_put_deferred().
+ * The @deferred list can be either an onstack local list or a global
+ * shared list used by a workqueue.
+ */
+void xe_bo_put_commit(struct llist_head *deferred)
+{
+	struct llist_node *freed;
+	struct xe_bo *bo, *next;
+
+	if (!deferred)
+		return;
+
+	freed = llist_del_all(deferred);
+	if (!freed)
+		return;
+
+	llist_for_each_entry_safe(bo, next, freed, freed)
+		drm_gem_object_free(&bo->ttm.base.refcount);
+}
+
+/**
+ * xe_bo_dumb_create - Create a dumb bo as backing for a fb
+ * @file_priv: ...
+ * @dev: ...
+ * @args: ...
+ *
+ * See dumb_create() hook in include/drm/drm_drv.h
+ *
+ * Return: ...
+ */
+int xe_bo_dumb_create(struct drm_file *file_priv,
+		      struct drm_device *dev,
+		      struct drm_mode_create_dumb *args)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_bo *bo;
+	uint32_t handle;
+	int cpp = DIV_ROUND_UP(args->bpp, 8);
+	int err;
+	u32 page_size = max_t(u32, PAGE_SIZE,
+		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
+
+	args->pitch = ALIGN(args->width * cpp, 64);
+	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
+			   page_size);
+
+	bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device,
+			  XE_BO_CREATE_VRAM_IF_DGFX(to_gt(xe)) |
+			  XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_put(&bo->ttm.base);
+	if (!err)
+		args->handle = handle;
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_bo.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
new file mode 100644
index 000000000000..1a49c0a3c4c6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -0,0 +1,290 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_BO_H_
+#define _XE_BO_H_
+
+#include "xe_bo_types.h"
+#include "xe_macros.h"
+#include "xe_vm_types.h"
+
+#define XE_DEFAULT_GTT_SIZE_MB          3072ULL /* 3GB by default */
+
+#define XE_BO_CREATE_USER_BIT		BIT(1)
+#define XE_BO_CREATE_SYSTEM_BIT		BIT(2)
+#define XE_BO_CREATE_VRAM0_BIT		BIT(3)
+#define XE_BO_CREATE_VRAM1_BIT		BIT(4)
+#define XE_BO_CREATE_VRAM_IF_DGFX(gt) \
+	(IS_DGFX(gt_to_xe(gt)) ? XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id : \
+	 XE_BO_CREATE_SYSTEM_BIT)
+#define XE_BO_CREATE_GGTT_BIT		BIT(5)
+#define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6)
+#define XE_BO_CREATE_PINNED_BIT		BIT(7)
+#define XE_BO_DEFER_BACKING		BIT(8)
+#define XE_BO_SCANOUT_BIT		BIT(9)
+/* this one is trigger internally only */
+#define XE_BO_INTERNAL_TEST		BIT(30)
+#define XE_BO_INTERNAL_64K		BIT(31)
+
+#define PPAT_UNCACHED                   GENMASK_ULL(4, 3)
+#define PPAT_CACHED_PDE                 0
+#define PPAT_CACHED                     BIT_ULL(7)
+#define PPAT_DISPLAY_ELLC               BIT_ULL(4)
+
+#define GEN8_PTE_SHIFT			12
+#define GEN8_PAGE_SIZE			(1 << GEN8_PTE_SHIFT)
+#define GEN8_PTE_MASK			(GEN8_PAGE_SIZE - 1)
+#define GEN8_PDE_SHIFT			(GEN8_PTE_SHIFT - 3)
+#define GEN8_PDES			(1 << GEN8_PDE_SHIFT)
+#define GEN8_PDE_MASK			(GEN8_PDES - 1)
+
+#define GEN8_64K_PTE_SHIFT		16
+#define GEN8_64K_PAGE_SIZE		(1 << GEN8_64K_PTE_SHIFT)
+#define GEN8_64K_PTE_MASK		(GEN8_64K_PAGE_SIZE - 1)
+#define GEN8_64K_PDE_MASK		(GEN8_PDE_MASK >> 4)
+
+#define GEN8_PDE_PS_2M			BIT_ULL(7)
+#define GEN8_PDPE_PS_1G			BIT_ULL(7)
+#define GEN8_PDE_IPS_64K		BIT_ULL(11)
+
+#define GEN12_GGTT_PTE_LM		BIT_ULL(1)
+#define GEN12_USM_PPGTT_PTE_AE		BIT_ULL(10)
+#define GEN12_PPGTT_PTE_LM		BIT_ULL(11)
+#define GEN12_PDE_64K			BIT_ULL(6)
+#define GEN12_PTE_PS64                  BIT_ULL(8)
+
+#define GEN8_PAGE_PRESENT		BIT_ULL(0)
+#define GEN8_PAGE_RW			BIT_ULL(1)
+
+#define PTE_READ_ONLY			BIT(0)
+
+#define XE_PL_SYSTEM		TTM_PL_SYSTEM
+#define XE_PL_TT		TTM_PL_TT
+#define XE_PL_VRAM0		TTM_PL_VRAM
+#define XE_PL_VRAM1		(XE_PL_VRAM0 + 1)
+
+#define XE_BO_PROPS_INVALID	(-1)
+
+struct sg_table;
+
+struct xe_bo *xe_bo_alloc(void);
+void xe_bo_free(struct xe_bo *bo);
+
+struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
+				    struct xe_gt *gt, struct dma_resv *resv,
+				    size_t size, enum ttm_bo_type type,
+				    u32 flags);
+struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt,
+				  struct xe_vm *vm, size_t size,
+				  enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt,
+			   struct xe_vm *vm, size_t size,
+			   enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt,
+				   struct xe_vm *vm, size_t size,
+				   enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
+				     const void *data, size_t size,
+				     enum ttm_bo_type type, u32 flags);
+
+int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+			      u32 bo_flags);
+
+static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo)
+{
+	return container_of(bo, struct xe_bo, ttm);
+}
+
+static inline struct xe_bo *gem_to_xe_bo(const struct drm_gem_object *obj)
+{
+	return container_of(obj, struct xe_bo, ttm.base);
+}
+
+#define xe_bo_device(bo) ttm_to_xe_device((bo)->ttm.bdev)
+
+static inline struct xe_bo *xe_bo_get(struct xe_bo *bo)
+{
+	if (bo)
+		drm_gem_object_get(&bo->ttm.base);
+
+	return bo;
+}
+
+static inline void xe_bo_put(struct xe_bo *bo)
+{
+	if (bo)
+		drm_gem_object_put(&bo->ttm.base);
+}
+
+static inline void xe_bo_assert_held(struct xe_bo *bo)
+{
+	if (bo)
+		dma_resv_assert_held((bo)->ttm.base.resv);
+}
+
+int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww,
+	       int num_resv, bool intr);
+
+void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww);
+
+static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
+{
+	if (bo) {
+		XE_BUG_ON(bo->vm && bo->ttm.base.resv != &bo->vm->resv);
+		if (bo->vm)
+			xe_vm_assert_held(bo->vm);
+		else
+			dma_resv_unlock(bo->ttm.base.resv);
+	}
+}
+
+static inline void xe_bo_lock_no_vm(struct xe_bo *bo,
+				    struct ww_acquire_ctx *ctx)
+{
+	if (bo) {
+		XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg &&
+				     bo->ttm.base.resv != &bo->ttm.base._resv));
+		dma_resv_lock(bo->ttm.base.resv, ctx);
+	}
+}
+
+static inline void xe_bo_unlock_no_vm(struct xe_bo *bo)
+{
+	if (bo) {
+		XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg &&
+				     bo->ttm.base.resv != &bo->ttm.base._resv));
+		dma_resv_unlock(bo->ttm.base.resv);
+	}
+}
+
+int xe_bo_pin_external(struct xe_bo *bo);
+int xe_bo_pin(struct xe_bo *bo);
+void xe_bo_unpin_external(struct xe_bo *bo);
+void xe_bo_unpin(struct xe_bo *bo);
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict);
+
+static inline bool xe_bo_is_pinned(struct xe_bo *bo)
+{
+	return bo->ttm.pin_count;
+}
+
+static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo)
+{
+	if (likely(bo)) {
+		xe_bo_lock_no_vm(bo, NULL);
+		xe_bo_unpin(bo);
+		xe_bo_unlock_no_vm(bo);
+
+		xe_bo_put(bo);
+	}
+}
+
+bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo);
+dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
+		      size_t page_size, bool *is_lmem);
+
+static inline dma_addr_t
+xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
+{
+	bool is_lmem;
+
+	return xe_bo_addr(bo, 0, page_size, &is_lmem);
+}
+
+static inline u32
+xe_bo_ggtt_addr(struct xe_bo *bo)
+{
+	XE_BUG_ON(bo->ggtt_node.size > bo->size);
+	XE_BUG_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32));
+	return bo->ggtt_node.start;
+}
+
+int xe_bo_vmap(struct xe_bo *bo);
+void xe_bo_vunmap(struct xe_bo *bo);
+
+bool mem_type_is_vram(u32 mem_type);
+bool xe_bo_is_vram(struct xe_bo *bo);
+
+bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
+
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type);
+int xe_bo_evict(struct xe_bo *bo, bool force_alloc);
+
+extern struct ttm_device_funcs xe_ttm_funcs;
+
+int xe_gem_create_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int xe_bo_dumb_create(struct drm_file *file_priv,
+		      struct drm_device *dev,
+		      struct drm_mode_create_dumb *args);
+
+bool xe_bo_needs_ccs_pages(struct xe_bo *bo);
+
+static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo)
+{
+	return PAGE_ALIGN(bo->ttm.base.size);
+}
+
+void __xe_bo_release_dummy(struct kref *kref);
+
+/**
+ * xe_bo_put_deferred() - Put a buffer object with delayed final freeing
+ * @bo: The bo to put.
+ * @deferred: List to which to add the buffer object if we cannot put, or
+ * NULL if the function is to put unconditionally.
+ *
+ * Since the final freeing of an object includes both sleeping and (!)
+ * memory allocation in the dma_resv individualization, it's not ok
+ * to put an object from atomic context nor from within a held lock
+ * tainted by reclaim. In such situations we want to defer the final
+ * freeing until we've exited the restricting context, or in the worst
+ * case to a workqueue.
+ * This function either puts the object if possible without the refcount
+ * reaching zero, or adds it to the @deferred list if that was not possible.
+ * The caller needs to follow up with a call to xe_bo_put_commit() to actually
+ * put the bo iff this function returns true. It's safe to always
+ * follow up with a call to xe_bo_put_commit().
+ * TODO: It's TTM that is the villain here. Perhaps TTM should add an
+ * interface like this.
+ *
+ * Return: true if @bo was the first object put on the @freed list,
+ * false otherwise.
+ */
+static inline bool
+xe_bo_put_deferred(struct xe_bo *bo, struct llist_head *deferred)
+{
+	if (!deferred) {
+		xe_bo_put(bo);
+		return false;
+	}
+
+	if (!kref_put(&bo->ttm.base.refcount, __xe_bo_release_dummy))
+		return false;
+
+	return llist_add(&bo->freed, deferred);
+}
+
+void xe_bo_put_commit(struct llist_head *deferred);
+
+struct sg_table *xe_bo_get_sg(struct xe_bo *bo);
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+/**
+ * xe_bo_is_mem_type - Whether the bo currently resides in the given
+ * TTM memory type
+ * @bo: The bo to check.
+ * @mem_type: The TTM memory type.
+ *
+ * Return: true iff the bo resides in @mem_type, false otherwise.
+ */
+static inline bool xe_bo_is_mem_type(struct xe_bo *bo, u32 mem_type)
+{
+	xe_bo_assert_held(bo);
+	return bo->ttm.resource->mem_type == mem_type;
+}
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h
new file mode 100644
index 000000000000..f57d440cc95a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_doc.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_DOC_H_
+#define _XE_BO_DOC_H_
+
+/**
+ * DOC: Buffer Objects (BO)
+ *
+ * BO management
+ * =============
+ *
+ * TTM manages (placement, eviction, etc...) all BOs in XE.
+ *
+ * BO creation
+ * ===========
+ *
+ * Create a chunk of memory which can be used by the GPU. Placement rules
+ * (sysmem or vram region) passed in upon creation. TTM handles placement of BO
+ * and can trigger eviction of other BOs to make space for the new BO.
+ *
+ * Kernel BOs
+ * ----------
+ *
+ * A kernel BO is created as part of driver load (e.g. uC firmware images, GuC
+ * ADS, etc...) or a BO created as part of a user operation which requires
+ * a kernel BO (e.g. engine state, memory for page tables, etc...). These BOs
+ * are typically mapped in the GGTT (any kernel BOs aside memory for page tables
+ * are in the GGTT), are pinned (can't move or be evicted at runtime), have a
+ * vmap (XE can access the memory via xe_map layer) and have contiguous physical
+ * memory.
+ *
+ * More details of why kernel BOs are pinned and contiguous below.
+ *
+ * User BOs
+ * --------
+ *
+ * A user BO is created via the DRM_IOCTL_XE_GEM_CREATE IOCTL. Once it is
+ * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user
+ * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All
+ * user BOs are evictable and user BOs are never pinned by XE. The allocation of
+ * the backing store can be defered from creation time until first use which is
+ * either mmap, bind, or pagefault.
+ *
+ * Private BOs
+ * ~~~~~~~~~~~
+ *
+ * A private BO is a user BO created with a valid VM argument passed into the
+ * create IOCTL. If a BO is private it cannot be exported via prime FD and
+ * mappings can only be created for the BO within the VM it is tied to. Lastly,
+ * the BO dma-resv slots / lock point to the VM's dma-resv slots / lock (all
+ * private BOs to a VM share common dma-resv slots / lock).
+ *
+ * External BOs
+ * ~~~~~~~~~~~~
+ *
+ * An external BO is a user BO created with a NULL VM argument passed into the
+ * create IOCTL. An external BO can be shared with different UMDs / devices via
+ * prime FD and the BO can be mapped into multiple VMs. An external BO has its
+ * own unique dma-resv slots / lock. An external BO will be in an array of all
+ * VMs which has a mapping of the BO. This allows VMs to lookup and lock all
+ * external BOs mapped in the VM as needed.
+ *
+ * BO placement
+ * ~~~~~~~~~~~~
+ *
+ * When a user BO is created, a mask of valid placements is passed indicating
+ * which memory regions are considered valid.
+ *
+ * The memory region information is available via query uAPI (TODO: add link).
+ *
+ * BO validation
+ * =============
+ *
+ * BO validation (ttm_bo_validate) refers to ensuring a BO has a valid
+ * placement. If a BO was swapped to temporary storage, a validation call will
+ * trigger a move back to a valid (location where GPU can access BO) placement.
+ * Validation of a BO may evict other BOs to make room for the BO being
+ * validated.
+ *
+ * BO eviction / moving
+ * ====================
+ *
+ * All eviction (or in other words, moving a BO from one memory location to
+ * another) is routed through TTM with a callback into XE.
+ *
+ * Runtime eviction
+ * ----------------
+ *
+ * Runtime evictions refers to during normal operations where TTM decides it
+ * needs to move a BO. Typically this is because TTM needs to make room for
+ * another BO and the evicted BO is first BO on LRU list that is not locked.
+ *
+ * An example of this is a new BO which can only be placed in VRAM but there is
+ * not space in VRAM. There could be multiple BOs which have sysmem and VRAM
+ * placement rules which currently reside in VRAM, TTM trigger a will move of
+ * one (or multiple) of these BO(s) until there is room in VRAM to place the new
+ * BO. The evicted BO(s) are valid but still need new bindings before the BO
+ * used again (exec or compute mode rebind worker).
+ *
+ * Another example would be, TTM can't find a BO to evict which has another
+ * valid placement. In this case TTM will evict one (or multiple) unlocked BO(s)
+ * to a temporary unreachable (invalid) placement. The evicted BO(s) are invalid
+ * and before next use need to be moved to a valid placement and rebound.
+ *
+ * In both cases, moves of these BOs are scheduled behind the fences in the BO's
+ * dma-resv slots.
+ *
+ * WW locking tries to ensures if 2 VMs use 51% of the memory forward progress
+ * is made on both VMs.
+ *
+ * Runtime eviction uses per a GT migration engine (TODO: link to migration
+ * engine doc) to do a GPU memcpy from one location to another.
+ *
+ * Rebinds after runtime eviction
+ * ------------------------------
+ *
+ * When BOs are moved, every mapping (VMA) of the BO needs to rebound before
+ * the BO is used again. Every VMA is added to an evicted list of its VM when
+ * the BO is moved. This is safe because of the VM locking structure (TODO: link
+ * to VM locking doc). On the next use of a VM (exec or compute mode rebind
+ * worker) the evicted VMA list is checked and rebinds are triggered. In the
+ * case of faulting VM, the rebind is done in the page fault handler.
+ *
+ * Suspend / resume eviction of VRAM
+ * ---------------------------------
+ *
+ * During device suspend / resume VRAM may lose power which means the contents
+ * of VRAM's memory is blown away. Thus BOs present in VRAM at the time of
+ * suspend must be moved to sysmem in order for their contents to be saved.
+ *
+ * A simple TTM call (ttm_resource_manager_evict_all) can move all non-pinned
+ * (user) BOs to sysmem. External BOs that are pinned need to be manually
+ * evicted with a simple loop + xe_bo_evict call. It gets a little trickier
+ * with kernel BOs.
+ *
+ * Some kernel BOs are used by the GT migration engine to do moves, thus we
+ * can't move all of the BOs via the GT migration engine. For simplity, use a
+ * TTM memcpy (CPU) to move any kernel (pinned) BO on either suspend or resume.
+ *
+ * Some kernel BOs need to be restored to the exact same physical location. TTM
+ * makes this rather easy but the caveat is the memory must be contiguous. Again
+ * for simplity, we enforce that all kernel (pinned) BOs are contiguous and
+ * restored to the same physical location.
+ *
+ * Pinned external BOs in VRAM are restored on resume via the GPU.
+ *
+ * Rebinds after suspend / resume
+ * ------------------------------
+ *
+ * Most kernel BOs have GGTT mappings which must be restored during the resume
+ * process. All user BOs are rebound after validation on their next use.
+ *
+ * Future work
+ * ===========
+ *
+ * Trim the list of BOs which is saved / restored via TTM memcpy on suspend /
+ * resume. All we really need to save / restore via TTM memcpy is the memory
+ * required for the GuC to load and the memory for the GT migrate engine to
+ * operate.
+ *
+ * Do not require kernel BOs to be contiguous in physical memory / restored to
+ * the same physical address on resume. In all likelihood the only memory that
+ * needs to be restored to the same physical address is memory used for page
+ * tables. All of that memory is allocated 1 page at time so the contiguous
+ * requirement isn't needed. Some work on the vmap code would need to be done if
+ * kernel BOs are not contiguous too.
+ *
+ * Make some kernel BO evictable rather than pinned. An example of this would be
+ * engine state, in all likelihood if the dma-slots of these BOs where properly
+ * used rather than pinning we could safely evict + rebind these BOs as needed.
+ *
+ * Some kernel BOs do not need to be restored on resume (e.g. GuC ADS as that is
+ * repopulated on resume), add flag to mark such objects as no save / restore.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
new file mode 100644
index 000000000000..7046dc203138
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_bo_evict.h"
+#include "xe_device.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+
+/**
+ * xe_bo_evict_all - evict all BOs from VRAM
+ *
+ * @xe: xe device
+ *
+ * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next
+ * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU.
+ * All eviction magic done via TTM calls.
+ *
+ * Evict == move VRAM BOs to temporary (typically system) memory.
+ *
+ * This function should be called before the device goes into a suspend state
+ * where the VRAM loses power.
+ */
+int xe_bo_evict_all(struct xe_device *xe)
+{
+	struct ttm_device *bdev = &xe->ttm;
+	struct ww_acquire_ctx ww;
+	struct xe_bo *bo;
+	struct xe_gt *gt;
+	struct list_head still_in_list;
+	u32 mem_type;
+	u8 id;
+	int ret;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	/* User memory */
+	for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
+		struct ttm_resource_manager *man =
+			ttm_manager_type(bdev, mem_type);
+
+		if (man) {
+			ret = ttm_resource_manager_evict_all(bdev, man);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/* Pinned user memory in VRAM */
+	INIT_LIST_HEAD(&still_in_list);
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.external_vram,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		xe_bo_get(bo);
+		list_move_tail(&bo->pinned_link, &still_in_list);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, &ww, 0, false);
+		ret = xe_bo_evict(bo, true);
+		xe_bo_unlock(bo, &ww);
+		xe_bo_put(bo);
+		if (ret) {
+			spin_lock(&xe->pinned.lock);
+			list_splice_tail(&still_in_list,
+					 &xe->pinned.external_vram);
+			spin_unlock(&xe->pinned.lock);
+			return ret;
+		}
+
+		spin_lock(&xe->pinned.lock);
+	}
+	list_splice_tail(&still_in_list, &xe->pinned.external_vram);
+	spin_unlock(&xe->pinned.lock);
+
+	/*
+	 * Wait for all user BO to be evicted as those evictions depend on the
+	 * memory moved below.
+	 */
+	for_each_gt(gt, xe, id)
+		xe_gt_migrate_wait(gt);
+
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		xe_bo_get(bo);
+		list_move_tail(&bo->pinned_link, &xe->pinned.evicted);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, &ww, 0, false);
+		ret = xe_bo_evict(bo, true);
+		xe_bo_unlock(bo, &ww);
+		xe_bo_put(bo);
+		if (ret)
+			return ret;
+
+		spin_lock(&xe->pinned.lock);
+	}
+	spin_unlock(&xe->pinned.lock);
+
+	return 0;
+}
+
+/**
+ * xe_bo_restore_kernel - restore kernel BOs to VRAM
+ *
+ * @xe: xe device
+ *
+ * Move kernel BOs from temporary (typically system) memory to VRAM via CPU. All
+ * moves done via TTM calls.
+ *
+ * This function should be called early, before trying to init the GT, on device
+ * resume.
+ */
+int xe_bo_restore_kernel(struct xe_device *xe)
+{
+	struct ww_acquire_ctx ww;
+	struct xe_bo *bo;
+	int ret;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.evicted,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		xe_bo_get(bo);
+		list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, &ww, 0, false);
+		ret = xe_bo_validate(bo, NULL, false);
+		xe_bo_unlock(bo, &ww);
+		if (ret) {
+			xe_bo_put(bo);
+			return ret;
+		}
+
+		if (bo->flags & XE_BO_CREATE_GGTT_BIT)
+			xe_ggtt_map_bo(bo->gt->mem.ggtt, bo);
+
+		/*
+		 * We expect validate to trigger a move VRAM and our move code
+		 * should setup the iosys map.
+		 */
+		XE_BUG_ON(iosys_map_is_null(&bo->vmap));
+		XE_BUG_ON(!xe_bo_is_vram(bo));
+
+		xe_bo_put(bo);
+
+		spin_lock(&xe->pinned.lock);
+	}
+	spin_unlock(&xe->pinned.lock);
+
+	return 0;
+}
+
+/**
+ * xe_bo_restore_user - restore pinned user BOs to VRAM
+ *
+ * @xe: xe device
+ *
+ * Move pinned user BOs from temporary (typically system) memory to VRAM via
+ * CPU. All moves done via TTM calls.
+ *
+ * This function should be called late, after GT init, on device resume.
+ */
+int xe_bo_restore_user(struct xe_device *xe)
+{
+	struct ww_acquire_ctx ww;
+	struct xe_bo *bo;
+	struct xe_gt *gt;
+	struct list_head still_in_list;
+	u8 id;
+	int ret;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	/* Pinned user memory in VRAM should be validated on resume */
+	INIT_LIST_HEAD(&still_in_list);
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.external_vram,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		list_move_tail(&bo->pinned_link, &still_in_list);
+		xe_bo_get(bo);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, &ww, 0, false);
+		ret = xe_bo_validate(bo, NULL, false);
+		xe_bo_unlock(bo, &ww);
+		xe_bo_put(bo);
+		if (ret) {
+			spin_lock(&xe->pinned.lock);
+			list_splice_tail(&still_in_list,
+					 &xe->pinned.external_vram);
+			spin_unlock(&xe->pinned.lock);
+			return ret;
+		}
+
+		spin_lock(&xe->pinned.lock);
+	}
+	list_splice_tail(&still_in_list, &xe->pinned.external_vram);
+	spin_unlock(&xe->pinned.lock);
+
+	/* Wait for validate to complete */
+	for_each_gt(gt, xe, id)
+		xe_gt_migrate_wait(gt);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h
new file mode 100644
index 000000000000..746894798852
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_evict.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_EVICT_H_
+#define _XE_BO_EVICT_H_
+
+struct xe_device;
+
+int xe_bo_evict_all(struct xe_device *xe);
+int xe_bo_restore_kernel(struct xe_device *xe);
+int xe_bo_restore_user(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
new file mode 100644
index 000000000000..06de3330211d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_TYPES_H_
+#define _XE_BO_TYPES_H_
+
+#include <linux/iosys-map.h>
+
+#include <drm/drm_mm.h>
+#include <drm/ttm/ttm_bo.h>
+#include <drm/ttm/ttm_device.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_placement.h>
+
+struct xe_device;
+struct xe_vm;
+
+#define XE_BO_MAX_PLACEMENTS	3
+
+/** @xe_bo: XE buffer object */
+struct xe_bo {
+	/** @ttm: TTM base buffer object */
+	struct ttm_buffer_object ttm;
+	/** @size: Size of this buffer object */
+	size_t size;
+	/** @flags: flags for this buffer object */
+	u32 flags;
+	/** @vm: VM this BO is attached to, for extobj this will be NULL */
+	struct xe_vm *vm;
+	/** @gt: GT this BO is attached to (kernel BO only) */
+	struct xe_gt *gt;
+	/** @vmas: List of VMAs for this BO */
+	struct list_head vmas;
+	/** @placements: valid placements for this BO */
+	struct ttm_place placements[XE_BO_MAX_PLACEMENTS];
+	/** @placement: current placement for this BO */
+	struct ttm_placement placement;
+	/** @ggtt_node: GGTT node if this BO is mapped in the GGTT */
+	struct drm_mm_node ggtt_node;
+	/** @vmap: iosys map of this buffer */
+	struct iosys_map vmap;
+	/** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
+	struct ttm_bo_kmap_obj kmap;
+	/** @pinned_link: link to present / evicted list of pinned BO */
+	struct list_head pinned_link;
+	/** @props: BO user controlled properties */
+	struct {
+		/** @preferred_mem: preferred memory class for this BO */
+		s16 preferred_mem_class;
+		/** @prefered_gt: preferred GT for this BO */
+		s16 preferred_gt;
+		/** @preferred_mem_type: preferred memory type */
+		s32 preferred_mem_type;
+		/**
+		 * @cpu_atomic: the CPU expects to do atomics operations to
+		 * this BO
+		 */
+		bool cpu_atomic;
+		/**
+		 * @device_atomic: the device expects to do atomics operations
+		 * to this BO
+		 */
+		bool device_atomic;
+	} props;
+	/** @freed: List node for delayed put. */
+	struct llist_node freed;
+	/** @created: Whether the bo has passed initial creation */
+	bool created;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
new file mode 100644
index 000000000000..84db7b3f501e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/string_helpers.h>
+
+#include <drm/drm_debugfs.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_debugfs.h"
+#include "xe_gt_debugfs.h"
+#include "xe_step.h"
+
+#ifdef CONFIG_DRM_XE_DEBUG
+#include "xe_bo_evict.h"
+#include "xe_migrate.h"
+#include "xe_vm.h"
+#endif
+
+static struct xe_device *node_to_xe(struct drm_info_node *node)
+{
+	return to_xe_device(node->minor->dev);
+}
+
+static int info(struct seq_file *m, void *data)
+{
+	struct xe_device *xe = node_to_xe(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+	struct xe_gt *gt;
+	u8 id;
+
+	drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100);
+	drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100);
+	drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n",
+		   xe_step_name(xe->info.step.graphics),
+		   xe_step_name(xe->info.step.media),
+		   xe_step_name(xe->info.step.display),
+		   xe_step_name(xe->info.step.basedie));
+	drm_printf(&p, "is_dgfx %s\n", str_yes_no(xe->info.is_dgfx));
+	drm_printf(&p, "platform %d\n", xe->info.platform);
+	drm_printf(&p, "subplatform %d\n",
+		   xe->info.subplatform > XE_SUBPLATFORM_NONE ? xe->info.subplatform : 0);
+	drm_printf(&p, "devid 0x%x\n", xe->info.devid);
+	drm_printf(&p, "revid %d\n", xe->info.revid);
+	drm_printf(&p, "tile_count %d\n", xe->info.tile_count);
+	drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level);
+	drm_printf(&p, "enable_guc %s\n", str_yes_no(xe->info.enable_guc));
+	drm_printf(&p, "supports_usm %s\n", str_yes_no(xe->info.supports_usm));
+	drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs));
+	for_each_gt(gt, xe, id) {
+		drm_printf(&p, "gt%d force wake %d\n", id,
+			   xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT));
+		drm_printf(&p, "gt%d engine_mask 0x%llx\n", id,
+			   gt->info.engine_mask);
+	}
+
+	return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+	{"info", info, 0},
+};
+
+static int forcewake_open(struct inode *inode, struct file *file)
+{
+	struct xe_device *xe = inode->i_private;
+	struct xe_gt *gt;
+	u8 id;
+
+	for_each_gt(gt, xe, id)
+		XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+	return 0;
+}
+
+static int forcewake_release(struct inode *inode, struct file *file)
+{
+	struct xe_device *xe = inode->i_private;
+	struct xe_gt *gt;
+	u8 id;
+
+	for_each_gt(gt, xe, id)
+		XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+	return 0;
+}
+
+static const struct file_operations forcewake_all_fops = {
+	.owner = THIS_MODULE,
+	.open = forcewake_open,
+	.release = forcewake_release,
+};
+
+void xe_debugfs_register(struct xe_device *xe)
+{
+	struct ttm_device *bdev = &xe->ttm;
+	struct drm_minor *minor = xe->drm.primary;
+	struct dentry *root = minor->debugfs_root;
+	struct ttm_resource_manager *man;
+	struct xe_gt *gt;
+	u32 mem_type;
+	u8 id;
+
+	drm_debugfs_create_files(debugfs_list,
+				 ARRAY_SIZE(debugfs_list),
+				 root, minor);
+
+	debugfs_create_file("forcewake_all", 0400, root, xe,
+			    &forcewake_all_fops);
+
+	for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
+		man = ttm_manager_type(bdev, mem_type);
+
+		if (man) {
+			char name[16];
+
+			sprintf(name, "vram%d_mm", mem_type - XE_PL_VRAM0);
+			ttm_resource_manager_create_debugfs(man, root, name);
+		}
+	}
+
+	man = ttm_manager_type(bdev, XE_PL_TT);
+	ttm_resource_manager_create_debugfs(man, root, "gtt_mm");
+
+	for_each_gt(gt, xe, id)
+		xe_gt_debugfs_register(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_debugfs.h b/drivers/gpu/drm/xe/xe_debugfs.h
new file mode 100644
index 000000000000..715b8e2e0bd9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_debugfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_DEBUGFS_H_
+#define _XE_DEBUGFS_H_
+
+struct xe_device;
+
+void xe_debugfs_register(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
new file mode 100644
index 000000000000..93dea2b9c464
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_device.h"
+
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_aperture.h>
+#include <drm/drm_ioctl.h>
+#include <drm/xe_drm.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_atomic_helper.h>
+
+#include "xe_bo.h"
+#include "xe_debugfs.h"
+#include "xe_dma_buf.h"
+#include "xe_drv.h"
+#include "xe_engine.h"
+#include "xe_exec.h"
+#include "xe_gt.h"
+#include "xe_irq.h"
+#include "xe_module.h"
+#include "xe_mmio.h"
+#include "xe_pcode.h"
+#include "xe_pm.h"
+#include "xe_query.h"
+#include "xe_vm.h"
+#include "xe_vm_madvise.h"
+#include "xe_wait_user_fence.h"
+
+static int xe_file_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct xe_file *xef;
+
+	xef = kzalloc(sizeof(*xef), GFP_KERNEL);
+	if (!xef)
+		return -ENOMEM;
+
+	xef->drm = file;
+
+	mutex_init(&xef->vm.lock);
+	xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
+
+	mutex_init(&xef->engine.lock);
+	xa_init_flags(&xef->engine.xa, XA_FLAGS_ALLOC1);
+
+	file->driver_priv = xef;
+	return 0;
+}
+
+static void device_kill_persitent_engines(struct xe_device *xe,
+					  struct xe_file *xef);
+
+static void xe_file_close(struct drm_device *dev, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = file->driver_priv;
+	struct xe_vm *vm;
+	struct xe_engine *e;
+	unsigned long idx;
+
+	mutex_lock(&xef->engine.lock);
+	xa_for_each(&xef->engine.xa, idx, e) {
+		xe_engine_kill(e);
+		xe_engine_put(e);
+	}
+	mutex_unlock(&xef->engine.lock);
+	mutex_destroy(&xef->engine.lock);
+	device_kill_persitent_engines(xe, xef);
+
+	mutex_lock(&xef->vm.lock);
+	xa_for_each(&xef->vm.xa, idx, vm)
+		xe_vm_close_and_put(vm);
+	mutex_unlock(&xef->vm.lock);
+	mutex_destroy(&xef->vm.lock);
+
+	kfree(xef);
+}
+
+static const struct drm_ioctl_desc xe_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_ENGINE_CREATE, xe_engine_create_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_ENGINE_DESTROY, xe_engine_destroy_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_MMIO, xe_mmio_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_ENGINE_SET_PROPERTY, xe_engine_set_property_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_VM_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW),
+};
+
+static const struct file_operations xe_driver_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release_noglobal,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = drm_gem_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+//	.compat_ioctl = i915_ioc32_compat_ioctl,
+	.llseek = noop_llseek,
+};
+
+static void xe_driver_release(struct drm_device *dev)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL);
+}
+
+static struct drm_driver driver = {
+	/* Don't use MTRRs here; the Xserver or userspace app should
+	 * deal with them for Intel hardware.
+	 */
+	.driver_features =
+	    DRIVER_GEM |
+	    DRIVER_RENDER | DRIVER_SYNCOBJ |
+	    DRIVER_SYNCOBJ_TIMELINE,
+	.open = xe_file_open,
+	.postclose = xe_file_close,
+
+	.gem_prime_import = xe_gem_prime_import,
+
+	.dumb_create = xe_bo_dumb_create,
+	.dumb_map_offset = drm_gem_ttm_dumb_map_offset,
+	.release = &xe_driver_release,
+
+	.ioctls = xe_ioctls,
+	.num_ioctls = ARRAY_SIZE(xe_ioctls),
+	.fops = &xe_driver_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static void xe_device_destroy(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	destroy_workqueue(xe->ordered_wq);
+	mutex_destroy(&xe->persitent_engines.lock);
+	ttm_device_fini(&xe->ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+				   const struct pci_device_id *ent)
+{
+	struct xe_device *xe;
+	int err;
+
+	err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
+	if (err)
+		return ERR_PTR(err);
+
+	xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
+	if (IS_ERR(xe))
+		return xe;
+
+	err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
+			      xe->drm.anon_inode->i_mapping,
+			      xe->drm.vma_offset_manager, false, false);
+	if (WARN_ON(err))
+		goto err_put;
+
+	xe->info.devid = pdev->device;
+	xe->info.revid = pdev->revision;
+	xe->info.enable_guc = enable_guc;
+
+	spin_lock_init(&xe->irq.lock);
+
+	init_waitqueue_head(&xe->ufence_wq);
+
+	mutex_init(&xe->usm.lock);
+	xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC1);
+
+	mutex_init(&xe->persitent_engines.lock);
+	INIT_LIST_HEAD(&xe->persitent_engines.list);
+
+	spin_lock_init(&xe->pinned.lock);
+	INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
+	INIT_LIST_HEAD(&xe->pinned.external_vram);
+	INIT_LIST_HEAD(&xe->pinned.evicted);
+
+	xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
+
+	mutex_init(&xe->sb_lock);
+	xe->enabled_irq_mask = ~0;
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
+	if (err)
+		goto err_put;
+
+	mutex_init(&xe->mem_access.lock);
+	return xe;
+
+err_put:
+	drm_dev_put(&xe->drm);
+
+	return ERR_PTR(err);
+}
+
+int xe_device_probe(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	int err;
+	u8 id;
+
+	xe->info.mem_region_mask = 1;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_alloc(xe, gt);
+		if (err)
+			return err;
+	}
+
+	err = xe_mmio_init(xe);
+	if (err)
+		return err;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_pcode_probe(gt);
+		if (err)
+			return err;
+	}
+
+	err = xe_irq_install(xe);
+	if (err)
+		return err;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_init_early(gt);
+		if (err)
+			goto err_irq_shutdown;
+	}
+
+	err = xe_mmio_probe_vram(xe);
+	if (err)
+		goto err_irq_shutdown;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_init_noalloc(gt);
+		if (err)
+			goto err_irq_shutdown;
+	}
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_init(gt);
+		if (err)
+			goto err_irq_shutdown;
+	}
+
+	err = drm_dev_register(&xe->drm, 0);
+	if (err)
+		goto err_irq_shutdown;
+
+	xe_debugfs_register(xe);
+
+	return 0;
+
+err_irq_shutdown:
+	xe_irq_shutdown(xe);
+	return err;
+}
+
+void xe_device_remove(struct xe_device *xe)
+{
+	xe_irq_shutdown(xe);
+}
+
+void xe_device_shutdown(struct xe_device *xe)
+{
+}
+
+void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e)
+{
+	mutex_lock(&xe->persitent_engines.lock);
+	list_add_tail(&e->persitent.link, &xe->persitent_engines.list);
+	mutex_unlock(&xe->persitent_engines.lock);
+}
+
+void xe_device_remove_persitent_engines(struct xe_device *xe,
+					struct xe_engine *e)
+{
+	mutex_lock(&xe->persitent_engines.lock);
+	if (!list_empty(&e->persitent.link))
+		list_del(&e->persitent.link);
+	mutex_unlock(&xe->persitent_engines.lock);
+}
+
+static void device_kill_persitent_engines(struct xe_device *xe,
+					  struct xe_file *xef)
+{
+	struct xe_engine *e, *next;
+
+	mutex_lock(&xe->persitent_engines.lock);
+	list_for_each_entry_safe(e, next, &xe->persitent_engines.list,
+				 persitent.link)
+		if (e->persitent.xef == xef) {
+			xe_engine_kill(e);
+			list_del_init(&e->persitent.link);
+		}
+	mutex_unlock(&xe->persitent_engines.lock);
+}
+
+#define SOFTWARE_FLAGS_SPR33         _MMIO(0x4F084)
+
+void xe_device_wmb(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+
+	wmb();
+	if (IS_DGFX(xe))
+		xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33.reg, 0);
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
+{
+	return xe_device_has_flat_ccs(xe) ?
+		DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0;
+}
+
+void xe_device_mem_access_get(struct xe_device *xe)
+{
+	bool resumed = xe_pm_runtime_resume_if_suspended(xe);
+
+	mutex_lock(&xe->mem_access.lock);
+	if (xe->mem_access.ref++ == 0)
+		xe->mem_access.hold_rpm = xe_pm_runtime_get_if_active(xe);
+	mutex_unlock(&xe->mem_access.lock);
+
+	/* The usage counter increased if device was immediately resumed */
+	if (resumed)
+		xe_pm_runtime_put(xe);
+
+	XE_WARN_ON(xe->mem_access.ref == U32_MAX);
+}
+
+void xe_device_mem_access_put(struct xe_device *xe)
+{
+	mutex_lock(&xe->mem_access.lock);
+	if (--xe->mem_access.ref == 0 && xe->mem_access.hold_rpm)
+		xe_pm_runtime_put(xe);
+	mutex_unlock(&xe->mem_access.lock);
+
+	XE_WARN_ON(xe->mem_access.ref < 0);
+}
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
new file mode 100644
index 000000000000..88d55671b068
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_DEVICE_H_
+#define _XE_DEVICE_H_
+
+struct xe_engine;
+struct xe_file;
+
+#include <drm/drm_util.h>
+
+#include "xe_device_types.h"
+#include "xe_macros.h"
+#include "xe_force_wake.h"
+
+#include "gt/intel_gpu_commands.h"
+
+static inline struct xe_device *to_xe_device(const struct drm_device *dev)
+{
+	return container_of(dev, struct xe_device, drm);
+}
+
+static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev)
+{
+	return pci_get_drvdata(pdev);
+}
+
+static inline struct xe_device *ttm_to_xe_device(struct ttm_device *ttm)
+{
+	return container_of(ttm, struct xe_device, ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+				   const struct pci_device_id *ent);
+int xe_device_probe(struct xe_device *xe);
+void xe_device_remove(struct xe_device *xe);
+void xe_device_shutdown(struct xe_device *xe);
+
+void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e);
+void xe_device_remove_persitent_engines(struct xe_device *xe,
+					struct xe_engine *e);
+
+void xe_device_wmb(struct xe_device *xe);
+
+static inline struct xe_file *to_xe_file(const struct drm_file *file)
+{
+	return file->driver_priv;
+}
+
+static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
+{
+	struct xe_gt *gt;
+
+	XE_BUG_ON(gt_id > XE_MAX_GT);
+	gt = xe->gt + gt_id;
+	XE_BUG_ON(gt->info.id != gt_id);
+	XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED);
+
+	return gt;
+}
+
+/*
+ * FIXME: Placeholder until multi-gt lands. Once that lands, kill this function.
+ */
+static inline struct xe_gt *to_gt(struct xe_device *xe)
+{
+	return xe->gt;
+}
+
+static inline bool xe_device_guc_submission_enabled(struct xe_device *xe)
+{
+	return xe->info.enable_guc;
+}
+
+static inline void xe_device_guc_submission_disable(struct xe_device *xe)
+{
+	xe->info.enable_guc = false;
+}
+
+#define for_each_gt(gt__, xe__, id__) \
+	for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__++)) \
+		for_each_if ((gt__) = xe_device_get_gt((xe__), (id__)))
+
+static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt)
+{
+	return &gt->mmio.fw;
+}
+
+void xe_device_mem_access_get(struct xe_device *xe);
+void xe_device_mem_access_put(struct xe_device *xe);
+
+static inline void xe_device_assert_mem_access(struct xe_device *xe)
+{
+	XE_WARN_ON(!xe->mem_access.ref);
+}
+
+static inline bool xe_device_mem_access_ongoing(struct xe_device *xe)
+{
+	bool ret;
+
+	mutex_lock(&xe->mem_access.lock);
+	ret = xe->mem_access.ref;
+	mutex_unlock(&xe->mem_access.lock);
+
+	return ret;
+}
+
+static inline bool xe_device_in_fault_mode(struct xe_device *xe)
+{
+	return xe->usm.num_vm_in_fault_mode != 0;
+}
+
+static inline bool xe_device_in_non_fault_mode(struct xe_device *xe)
+{
+	return xe->usm.num_vm_in_non_fault_mode != 0;
+}
+
+static inline bool xe_device_has_flat_ccs(struct xe_device *xe)
+{
+	return xe->info.has_flat_ccs;
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
new file mode 100644
index 000000000000..d62ee85bfcbe
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_DEVICE_TYPES_H_
+#define _XE_DEVICE_TYPES_H_
+
+#include <linux/pci.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
+
+#include "xe_gt_types.h"
+#include "xe_platform_types.h"
+#include "xe_step_types.h"
+
+#define XE_BO_INVALID_OFFSET	LONG_MAX
+
+#define GRAPHICS_VER(xe) ((xe)->info.graphics_verx100 / 100)
+#define MEDIA_VER(xe) ((xe)->info.media_verx100 / 100)
+#define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100)
+#define MEDIA_VERx100(xe) ((xe)->info.media_verx100)
+#define IS_DGFX(xe) ((xe)->info.is_dgfx)
+
+#define XE_VRAM_FLAGS_NEED64K		BIT(0)
+
+#define XE_GT0		0
+#define XE_GT1		1
+#define XE_MAX_GT	(XE_GT1 + 1)
+
+#define XE_MAX_ASID	(BIT(20))
+
+#define IS_PLATFORM_STEP(_xe, _platform, min_step, max_step)	\
+	((_xe)->info.platform == (_platform) &&			\
+	 (_xe)->info.step.graphics >= (min_step) &&		\
+	 (_xe)->info.step.graphics < (max_step))
+#define IS_SUBPLATFORM_STEP(_xe, _platform, sub, min_step, max_step)	\
+	((_xe)->info.platform == (_platform) &&				\
+	 (_xe)->info.subplatform == (sub) &&				\
+	 (_xe)->info.step.graphics >= (min_step) &&			\
+	 (_xe)->info.step.graphics < (max_step))
+
+/**
+ * struct xe_device - Top level struct of XE device
+ */
+struct xe_device {
+	/** @drm: drm device */
+	struct drm_device drm;
+
+	/** @info: device info */
+	struct intel_device_info {
+		/** @graphics_verx100: graphics IP version */
+		u32 graphics_verx100;
+		/** @media_verx100: media IP version */
+		u32 media_verx100;
+		/** @mem_region_mask: mask of valid memory regions */
+		u32 mem_region_mask;
+		/** @is_dgfx: is discrete device */
+		bool is_dgfx;
+		/** @platform: XE platform enum */
+		enum xe_platform platform;
+		/** @subplatform: XE subplatform enum */
+		enum xe_subplatform subplatform;
+		/** @devid: device ID */
+		u16 devid;
+		/** @revid: device revision */
+		u8 revid;
+		/** @step: stepping information for each IP */
+		struct xe_step_info step;
+		/** @dma_mask_size: DMA address bits */
+		u8 dma_mask_size;
+		/** @vram_flags: Vram flags */
+		u8 vram_flags;
+		/** @tile_count: Number of tiles */
+		u8 tile_count;
+		/** @vm_max_level: Max VM level */
+		u8 vm_max_level;
+		/** @media_ver: Media version */
+		u8 media_ver;
+		/** @supports_usm: Supports unified shared memory */
+		bool supports_usm;
+		/** @enable_guc: GuC submission enabled */
+		bool enable_guc;
+		/** @has_flat_ccs: Whether flat CCS metadata is used */
+		bool has_flat_ccs;
+		/** @has_4tile: Whether tile-4 tiling is supported */
+		bool has_4tile;
+	} info;
+
+	/** @irq: device interrupt state */
+	struct {
+		/** @lock: lock for processing irq's on this device */
+		spinlock_t lock;
+
+		/** @enabled: interrupts enabled on this device */
+		bool enabled;
+	} irq;
+
+	/** @ttm: ttm device */
+	struct ttm_device ttm;
+
+	/** @mmio: mmio info for device */
+	struct {
+		/** @size: size of MMIO space for device */
+		size_t size;
+		/** @regs: pointer to MMIO space for device */
+		void *regs;
+	} mmio;
+
+	/** @mem: memory info for device */
+	struct {
+		/** @vram: VRAM info for device */
+		struct {
+			/** @io_start: start address of VRAM */
+			resource_size_t io_start;
+			/** @size: size of VRAM */
+			resource_size_t size;
+			/** @mapping: pointer to VRAM mappable space */
+			void *__iomem mapping;
+		} vram;
+	} mem;
+
+	/** @usm: unified memory state */
+	struct {
+		/** @asid: convert a ASID to VM */
+		struct xarray asid_to_vm;
+		/** @next_asid: next ASID, used to cyclical alloc asids */
+		u32 next_asid;
+		/** @num_vm_in_fault_mode: number of VM in fault mode */
+		u32 num_vm_in_fault_mode;
+		/** @num_vm_in_non_fault_mode: number of VM in non-fault mode */
+		u32 num_vm_in_non_fault_mode;
+		/** @lock: protects UM state */
+		struct mutex lock;
+	} usm;
+
+	/** @persitent_engines: engines that are closed but still running */
+	struct {
+		/** @lock: protects persitent engines */
+		struct mutex lock;
+		/** @list: list of persitent engines */
+		struct list_head list;
+	} persitent_engines;
+
+	/** @pinned: pinned BO state */
+	struct {
+		/** @lock: protected pinned BO list state */
+		spinlock_t lock;
+		/** @evicted: pinned kernel BO that are present */
+		struct list_head kernel_bo_present;
+		/** @evicted: pinned BO that have been evicted */
+		struct list_head evicted;
+		/** @external_vram: pinned external BO in vram*/
+		struct list_head external_vram;
+	} pinned;
+
+	/** @ufence_wq: user fence wait queue */
+	wait_queue_head_t ufence_wq;
+
+	/** @ordered_wq: used to serialize compute mode resume */
+	struct workqueue_struct *ordered_wq;
+
+	/** @gt: graphics tile */
+	struct xe_gt gt[XE_MAX_GT];
+
+	/**
+	 * @mem_access: keep track of memory access in the device, possibly
+	 * triggering additional actions when they occur.
+	 */
+	struct {
+		/** @lock: protect the ref count */
+		struct mutex lock;
+		/** @ref: ref count of memory accesses */
+		u32 ref;
+		/** @hold_rpm: need to put rpm ref back at the end */
+		bool hold_rpm;
+	} mem_access;
+
+	/** @d3cold_allowed: Indicates if d3cold is a valid device state */
+	bool d3cold_allowed;
+
+	/* For pcode */
+	struct mutex sb_lock;
+
+	u32 enabled_irq_mask;
+};
+
+/**
+ * struct xe_file - file handle for XE driver
+ */
+struct xe_file {
+	/** @drm: base DRM file */
+	struct drm_file *drm;
+
+	/** @vm: VM state for file */
+	struct {
+		/** @xe: xarray to store VMs */
+		struct xarray xa;
+		/** @lock: protects file VM state */
+		struct mutex lock;
+	} vm;
+
+	/** @engine: Submission engine state for file */
+	struct {
+		/** @xe: xarray to store engines */
+		struct xarray xa;
+		/** @lock: protects file engine state */
+		struct mutex lock;
+	} engine;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
new file mode 100644
index 000000000000..d09ff25bd940
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/dma-buf.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_prime.h>
+
+#include <drm/ttm/ttm_tt.h>
+
+#include <kunit/test.h>
+#include <linux/pci-p2pdma.h>
+
+#include "tests/xe_test.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_dma_buf.h"
+#include "xe_ttm_vram_mgr.h"
+#include "xe_vm.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+
+static int xe_dma_buf_attach(struct dma_buf *dmabuf,
+			     struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+
+	if (attach->peer2peer &&
+	    pci_p2pdma_distance(to_pci_dev(obj->dev->dev), attach->dev, false) < 0)
+		attach->peer2peer = false;
+
+	if (!attach->peer2peer && !xe_bo_can_migrate(gem_to_xe_bo(obj), XE_PL_TT))
+		return -EOPNOTSUPP;
+
+	xe_device_mem_access_get(to_xe_device(obj->dev));
+	return 0;
+}
+
+static void xe_dma_buf_detach(struct dma_buf *dmabuf,
+			      struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+
+	xe_device_mem_access_put(to_xe_device(obj->dev));
+}
+
+static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+
+	/*
+	 * Migrate to TT first to increase the chance of non-p2p clients
+	 * can attach.
+	 */
+	(void)xe_bo_migrate(bo, XE_PL_TT);
+	xe_bo_pin_external(bo);
+
+	return 0;
+}
+
+static void xe_dma_buf_unpin(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+
+	xe_bo_unpin_external(bo);
+}
+
+static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach,
+				       enum dma_data_direction dir)
+{
+	struct dma_buf *dma_buf = attach->dmabuf;
+	struct drm_gem_object *obj = dma_buf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+	struct sg_table *sgt;
+	int r = 0;
+
+	if (!attach->peer2peer && !xe_bo_can_migrate(bo, XE_PL_TT))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (!xe_bo_is_pinned(bo)) {
+		if (!attach->peer2peer ||
+		    bo->ttm.resource->mem_type == XE_PL_SYSTEM) {
+			if (xe_bo_can_migrate(bo, XE_PL_TT))
+				r = xe_bo_migrate(bo, XE_PL_TT);
+			else
+				r = xe_bo_validate(bo, NULL, false);
+		}
+		if (r)
+			return ERR_PTR(r);
+	}
+
+	switch (bo->ttm.resource->mem_type) {
+	case XE_PL_TT:
+		sgt = drm_prime_pages_to_sg(obj->dev,
+					    bo->ttm.ttm->pages,
+					    bo->ttm.ttm->num_pages);
+		if (IS_ERR(sgt))
+			return sgt;
+
+		if (dma_map_sgtable(attach->dev, sgt, dir,
+				    DMA_ATTR_SKIP_CPU_SYNC))
+			goto error_free;
+		break;
+
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+		r = xe_ttm_vram_mgr_alloc_sgt(xe_bo_device(bo),
+					      bo->ttm.resource, 0,
+					      bo->ttm.base.size, attach->dev,
+					      dir, &sgt);
+		if (r)
+			return ERR_PTR(r);
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	return sgt;
+
+error_free:
+	sg_free_table(sgt);
+	kfree(sgt);
+	return ERR_PTR(-EBUSY);
+}
+
+static void xe_dma_buf_unmap(struct dma_buf_attachment *attach,
+			     struct sg_table *sgt,
+			     enum dma_data_direction dir)
+{
+	struct dma_buf *dma_buf = attach->dmabuf;
+	struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv);
+
+	if (!xe_bo_is_vram(bo)) {
+		dma_unmap_sgtable(attach->dev, sgt, dir, 0);
+		sg_free_table(sgt);
+		kfree(sgt);
+	} else {
+		xe_ttm_vram_mgr_free_sgt(attach->dev, dir, sgt);
+	}
+}
+
+static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
+				       enum dma_data_direction direction)
+{
+	struct drm_gem_object *obj = dma_buf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+	bool reads =  (direction == DMA_BIDIRECTIONAL ||
+		       direction == DMA_FROM_DEVICE);
+
+	if (!reads)
+		return 0;
+
+	xe_bo_lock_no_vm(bo, NULL);
+	(void)xe_bo_migrate(bo, XE_PL_TT);
+	xe_bo_unlock_no_vm(bo);
+
+	return 0;
+}
+
+const struct dma_buf_ops xe_dmabuf_ops = {
+	.attach = xe_dma_buf_attach,
+	.detach = xe_dma_buf_detach,
+	.pin = xe_dma_buf_pin,
+	.unpin = xe_dma_buf_unpin,
+	.map_dma_buf = xe_dma_buf_map,
+	.unmap_dma_buf = xe_dma_buf_unmap,
+	.release = drm_gem_dmabuf_release,
+	.begin_cpu_access = xe_dma_buf_begin_cpu_access,
+	.mmap = drm_gem_dmabuf_mmap,
+	.vmap = drm_gem_dmabuf_vmap,
+	.vunmap = drm_gem_dmabuf_vunmap,
+};
+
+struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags)
+{
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+	struct dma_buf *buf;
+
+	if (bo->vm)
+		return ERR_PTR(-EPERM);
+
+	buf = drm_gem_prime_export(obj, flags);
+	if (!IS_ERR(buf))
+		buf->ops = &xe_dmabuf_ops;
+
+	return buf;
+}
+
+static struct drm_gem_object *
+xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
+		    struct dma_buf *dma_buf)
+{
+	struct dma_resv *resv = dma_buf->resv;
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_bo *bo;
+	int ret;
+
+	dma_resv_lock(resv, NULL);
+	bo = __xe_bo_create_locked(xe, storage, NULL, resv, dma_buf->size,
+				   ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
+	if (IS_ERR(bo)) {
+		ret = PTR_ERR(bo);
+		goto error;
+	}
+	dma_resv_unlock(resv);
+
+	return &bo->ttm.base;
+
+error:
+	dma_resv_unlock(resv);
+	return ERR_PTR(ret);
+}
+
+static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->importer_priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+
+	XE_WARN_ON(xe_bo_evict(bo, false));
+}
+
+static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = {
+	.allow_peer2peer = true,
+	.move_notify = xe_dma_buf_move_notify
+};
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+
+struct dma_buf_test_params {
+	struct xe_test_priv base;
+	const struct dma_buf_attach_ops *attach_ops;
+	bool force_different_devices;
+	u32 mem_mask;
+};
+
+#define to_dma_buf_test_params(_priv) \
+	container_of(_priv, struct dma_buf_test_params, base)
+#endif
+
+struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
+					   struct dma_buf *dma_buf)
+{
+	XE_TEST_DECLARE(struct dma_buf_test_params *test =
+			to_dma_buf_test_params
+			(xe_cur_kunit_priv(XE_TEST_LIVE_DMA_BUF));)
+	const struct dma_buf_attach_ops *attach_ops;
+	struct dma_buf_attachment *attach;
+	struct drm_gem_object *obj;
+	struct xe_bo *bo;
+
+	if (dma_buf->ops == &xe_dmabuf_ops) {
+		obj = dma_buf->priv;
+		if (obj->dev == dev &&
+		    !XE_TEST_ONLY(test && test->force_different_devices)) {
+			/*
+			 * Importing dmabuf exported from out own gem increases
+			 * refcount on gem itself instead of f_count of dmabuf.
+			 */
+			drm_gem_object_get(obj);
+			return obj;
+		}
+	}
+
+	/*
+	 * Don't publish the bo until we have a valid attachment, and a
+	 * valid attachment needs the bo address. So pre-create a bo before
+	 * creating the attachment and publish.
+	 */
+	bo = xe_bo_alloc();
+	if (IS_ERR(bo))
+		return ERR_CAST(bo);
+
+	attach_ops = &xe_dma_buf_attach_ops;
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+	if (test)
+		attach_ops = test->attach_ops;
+#endif
+
+	attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base);
+	if (IS_ERR(attach)) {
+		obj = ERR_CAST(attach);
+		goto out_err;
+	}
+
+	/* Errors here will take care of freeing the bo. */
+	obj = xe_dma_buf_init_obj(dev, bo, dma_buf);
+	if (IS_ERR(obj))
+		return obj;
+
+
+	get_dma_buf(dma_buf);
+	obj->import_attach = attach;
+	return obj;
+
+out_err:
+	xe_bo_free(bo);
+
+	return obj;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_dma_buf.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.h b/drivers/gpu/drm/xe/xe_dma_buf.h
new file mode 100644
index 000000000000..861dd28a862c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_dma_buf.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_DMA_BUF_H_
+#define _XE_DMA_BUF_H_
+
+#include <drm/drm_gem.h>
+
+struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags);
+struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
+					   struct dma_buf *dma_buf);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h
new file mode 100644
index 000000000000..0377e5e4e35f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_drv.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_DRV_H_
+#define _XE_DRV_H_
+
+#include <drm/drm_drv.h>
+
+#define DRIVER_NAME		"xe"
+#define DRIVER_DESC		"Intel Xe Graphics"
+#define DRIVER_DATE		"20201103"
+#define DRIVER_TIMESTAMP	1604406085
+
+/* Interface history:
+ *
+ * 1.1: Original.
+ */
+#define DRIVER_MAJOR		1
+#define DRIVER_MINOR		1
+#define DRIVER_PATCHLEVEL	0
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
new file mode 100644
index 000000000000..63219bd98be7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_engine.h"
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/xe_drm.h>
+#include <linux/nospec.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_migrate.h"
+#include "xe_pm.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+static struct xe_engine *__xe_engine_create(struct xe_device *xe,
+					    struct xe_vm *vm,
+					    u32 logical_mask,
+					    u16 width, struct xe_hw_engine *hwe,
+					    u32 flags)
+{
+	struct xe_engine *e;
+	struct xe_gt *gt = hwe->gt;
+	int err;
+	int i;
+
+	e = kzalloc(sizeof(*e) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&e->refcount);
+	e->flags = flags;
+	e->hwe = hwe;
+	e->gt = gt;
+	if (vm)
+		e->vm = xe_vm_get(vm);
+	e->class = hwe->class;
+	e->width = width;
+	e->logical_mask = logical_mask;
+	e->fence_irq = &gt->fence_irq[hwe->class];
+	e->ring_ops = gt->ring_ops[hwe->class];
+	e->ops = gt->engine_ops;
+	INIT_LIST_HEAD(&e->persitent.link);
+	INIT_LIST_HEAD(&e->compute.link);
+	INIT_LIST_HEAD(&e->multi_gt_link);
+
+	/* FIXME: Wire up to configurable default value */
+	e->sched_props.timeslice_us = 1 * 1000;
+	e->sched_props.preempt_timeout_us = 640 * 1000;
+
+	if (xe_engine_is_parallel(e)) {
+		e->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
+		e->parallel.composite_fence_seqno = 1;
+	}
+	if (e->flags & ENGINE_FLAG_VM) {
+		e->bind.fence_ctx = dma_fence_context_alloc(1);
+		e->bind.fence_seqno = 1;
+	}
+
+	for (i = 0; i < width; ++i) {
+		err = xe_lrc_init(e->lrc + i, hwe, e, vm, SZ_16K);
+		if (err)
+			goto err_lrc;
+	}
+
+	err = e->ops->init(e);
+	if (err)
+		goto err_lrc;
+
+	return e;
+
+err_lrc:
+	for (i = i - 1; i >= 0; --i)
+		xe_lrc_finish(e->lrc + i);
+	kfree(e);
+	return ERR_PTR(err);
+}
+
+struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm,
+				   u32 logical_mask, u16 width,
+				   struct xe_hw_engine *hwe, u32 flags)
+{
+	struct ww_acquire_ctx ww;
+	struct xe_engine *e;
+	int err;
+
+	if (vm) {
+		err = xe_vm_lock(vm, &ww, 0, true);
+		if (err)
+			return ERR_PTR(err);
+	}
+	e = __xe_engine_create(xe, vm, logical_mask, width, hwe, flags);
+	if (vm)
+		xe_vm_unlock(vm, &ww);
+
+	return e;
+}
+
+struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt,
+					 struct xe_vm *vm,
+					 enum xe_engine_class class, u32 flags)
+{
+	struct xe_hw_engine *hwe, *hwe0 = NULL;
+	enum xe_hw_engine_id id;
+	u32 logical_mask = 0;
+
+	for_each_hw_engine(hwe, gt, id) {
+		if (xe_hw_engine_is_reserved(hwe))
+			continue;
+
+		if (hwe->class == class) {
+			logical_mask |= BIT(hwe->logical_instance);
+			if (!hwe0)
+				hwe0 = hwe;
+		}
+	}
+
+	if (!logical_mask)
+		return ERR_PTR(-ENODEV);
+
+	return xe_engine_create(xe, vm, logical_mask, 1, hwe0, flags);
+}
+
+void xe_engine_destroy(struct kref *ref)
+{
+	struct xe_engine *e = container_of(ref, struct xe_engine, refcount);
+	struct xe_engine *engine, *next;
+
+	if (!(e->flags & ENGINE_FLAG_BIND_ENGINE_CHILD)) {
+		list_for_each_entry_safe(engine, next, &e->multi_gt_list,
+					 multi_gt_link)
+			xe_engine_put(engine);
+	}
+
+	e->ops->fini(e);
+}
+
+void xe_engine_fini(struct xe_engine *e)
+{
+	int i;
+
+	for (i = 0; i < e->width; ++i)
+		xe_lrc_finish(e->lrc + i);
+	if (e->vm)
+		xe_vm_put(e->vm);
+
+	kfree(e);
+}
+
+struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id)
+{
+	struct xe_engine *e;
+
+	mutex_lock(&xef->engine.lock);
+	e = xa_load(&xef->engine.xa, id);
+	mutex_unlock(&xef->engine.lock);
+
+	if (e)
+		xe_engine_get(e);
+
+	return e;
+}
+
+static int engine_set_priority(struct xe_device *xe, struct xe_engine *e,
+			       u64 value, bool create)
+{
+	if (XE_IOCTL_ERR(xe, value > XE_ENGINE_PRIORITY_HIGH))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, value == XE_ENGINE_PRIORITY_HIGH &&
+			 !capable(CAP_SYS_NICE)))
+		return -EPERM;
+
+	return e->ops->set_priority(e, value);
+}
+
+static int engine_set_timeslice(struct xe_device *xe, struct xe_engine *e,
+				u64 value, bool create)
+{
+	if (!capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	return e->ops->set_timeslice(e, value);
+}
+
+static int engine_set_preemption_timeout(struct xe_device *xe,
+					 struct xe_engine *e, u64 value,
+					 bool create)
+{
+	if (!capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	return e->ops->set_preempt_timeout(e, value);
+}
+
+static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e,
+				   u64 value, bool create)
+{
+	if (XE_IOCTL_ERR(xe, !create))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_VM))
+		return -EINVAL;
+
+	if (value) {
+		struct xe_vm *vm = e->vm;
+		int err;
+
+		if (XE_IOCTL_ERR(xe, xe_vm_in_fault_mode(vm)))
+			return -EOPNOTSUPP;
+
+		if (XE_IOCTL_ERR(xe, !xe_vm_in_compute_mode(vm)))
+			return -EOPNOTSUPP;
+
+		if (XE_IOCTL_ERR(xe, e->width != 1))
+			return -EINVAL;
+
+		e->compute.context = dma_fence_context_alloc(1);
+		spin_lock_init(&e->compute.lock);
+
+		err = xe_vm_add_compute_engine(vm, e);
+		if (XE_IOCTL_ERR(xe, err))
+			return err;
+
+		e->flags |= ENGINE_FLAG_COMPUTE_MODE;
+		e->flags &= ~ENGINE_FLAG_PERSISTENT;
+	}
+
+	return 0;
+}
+
+static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e,
+				  u64 value, bool create)
+{
+	if (XE_IOCTL_ERR(xe, !create))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
+		return -EINVAL;
+
+	if (value)
+		e->flags |= ENGINE_FLAG_PERSISTENT;
+	else
+		e->flags &= ~ENGINE_FLAG_PERSISTENT;
+
+	return 0;
+}
+
+static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e,
+				  u64 value, bool create)
+{
+	if (XE_IOCTL_ERR(xe, !create))
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	return e->ops->set_job_timeout(e, value);
+}
+
+static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e,
+				  u64 value, bool create)
+{
+	if (XE_IOCTL_ERR(xe, !create))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
+		return -EINVAL;
+
+	e->usm.acc_trigger = value;
+
+	return 0;
+}
+
+static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e,
+				 u64 value, bool create)
+{
+	if (XE_IOCTL_ERR(xe, !create))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
+		return -EINVAL;
+
+	e->usm.acc_notify = value;
+
+	return 0;
+}
+
+static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e,
+				      u64 value, bool create)
+{
+	if (XE_IOCTL_ERR(xe, !create))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
+		return -EINVAL;
+
+	e->usm.acc_granularity = value;
+
+	return 0;
+}
+
+typedef int (*xe_engine_set_property_fn)(struct xe_device *xe,
+					 struct xe_engine *e,
+					 u64 value, bool create);
+
+static const xe_engine_set_property_fn engine_set_property_funcs[] = {
+	[XE_ENGINE_PROPERTY_PRIORITY] = engine_set_priority,
+	[XE_ENGINE_PROPERTY_TIMESLICE] = engine_set_timeslice,
+	[XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout,
+	[XE_ENGINE_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode,
+	[XE_ENGINE_PROPERTY_PERSISTENCE] = engine_set_persistence,
+	[XE_ENGINE_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout,
+	[XE_ENGINE_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger,
+	[XE_ENGINE_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify,
+	[XE_ENGINE_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity,
+};
+
+static int engine_user_ext_set_property(struct xe_device *xe,
+					struct xe_engine *e,
+					u64 extension,
+					bool create)
+{
+	u64 __user *address = u64_to_user_ptr(extension);
+	struct drm_xe_ext_engine_set_property ext;
+	int err;
+	u32 idx;
+
+	err = __copy_from_user(&ext, address, sizeof(ext));
+	if (XE_IOCTL_ERR(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_ERR(xe, ext.property >=
+			 ARRAY_SIZE(engine_set_property_funcs)))
+		return -EINVAL;
+
+	idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs));
+	return engine_set_property_funcs[idx](xe, e, ext.value,  create);
+}
+
+typedef int (*xe_engine_user_extension_fn)(struct xe_device *xe,
+					   struct xe_engine *e,
+					   u64 extension,
+					   bool create);
+
+static const xe_engine_set_property_fn engine_user_extension_funcs[] = {
+	[XE_ENGINE_EXTENSION_SET_PROPERTY] = engine_user_ext_set_property,
+};
+
+#define MAX_USER_EXTENSIONS	16
+static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e,
+				  u64 extensions, int ext_number, bool create)
+{
+	u64 __user *address = u64_to_user_ptr(extensions);
+	struct xe_user_extension ext;
+	int err;
+	u32 idx;
+
+	if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS))
+		return -E2BIG;
+
+	err = __copy_from_user(&ext, address, sizeof(ext));
+	if (XE_IOCTL_ERR(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_ERR(xe, ext.name >=
+			 ARRAY_SIZE(engine_user_extension_funcs)))
+		return -EINVAL;
+
+	idx = array_index_nospec(ext.name,
+				 ARRAY_SIZE(engine_user_extension_funcs));
+	err = engine_user_extension_funcs[idx](xe, e, extensions, create);
+	if (XE_IOCTL_ERR(xe, err))
+		return err;
+
+	if (ext.next_extension)
+		return engine_user_extensions(xe, e, ext.next_extension,
+					      ++ext_number, create);
+
+	return 0;
+}
+
+static const enum xe_engine_class user_to_xe_engine_class[] = {
+	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
+	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
+	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
+	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
+};
+
+static struct xe_hw_engine *
+find_hw_engine(struct xe_device *xe,
+	       struct drm_xe_engine_class_instance eci)
+{
+	u32 idx;
+
+	if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
+		return NULL;
+
+	if (eci.gt_id >= xe->info.tile_count)
+		return NULL;
+
+	idx = array_index_nospec(eci.engine_class,
+				 ARRAY_SIZE(user_to_xe_engine_class));
+
+	return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
+			       user_to_xe_engine_class[idx],
+			       eci.engine_instance, true);
+}
+
+static u32 bind_engine_logical_mask(struct xe_device *xe, struct xe_gt *gt,
+				    struct drm_xe_engine_class_instance *eci,
+				    u16 width, u16 num_placements)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 logical_mask = 0;
+
+	if (XE_IOCTL_ERR(xe, width != 1))
+		return 0;
+	if (XE_IOCTL_ERR(xe, num_placements != 1))
+		return 0;
+	if (XE_IOCTL_ERR(xe, eci[0].engine_instance != 0))
+		return 0;
+
+	eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY;
+
+	for_each_hw_engine(hwe, gt, id) {
+		if (xe_hw_engine_is_reserved(hwe))
+			continue;
+
+		if (hwe->class ==
+		    user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY])
+			logical_mask |= BIT(hwe->logical_instance);
+	}
+
+	return logical_mask;
+}
+
+static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
+				      struct drm_xe_engine_class_instance *eci,
+				      u16 width, u16 num_placements)
+{
+	int len = width * num_placements;
+	int i, j, n;
+	u16 class;
+	u16 gt_id;
+	u32 return_mask = 0, prev_mask;
+
+	if (XE_IOCTL_ERR(xe, !xe_device_guc_submission_enabled(xe) &&
+			 len > 1))
+		return 0;
+
+	for (i = 0; i < width; ++i) {
+		u32 current_mask = 0;
+
+		for (j = 0; j < num_placements; ++j) {
+			struct xe_hw_engine *hwe;
+
+			n = j * width + i;
+
+			hwe = find_hw_engine(xe, eci[n]);
+			if (XE_IOCTL_ERR(xe, !hwe))
+				return 0;
+
+			if (XE_IOCTL_ERR(xe, xe_hw_engine_is_reserved(hwe)))
+				return 0;
+
+			if (XE_IOCTL_ERR(xe, n && eci[n].gt_id != gt_id) ||
+			    XE_IOCTL_ERR(xe, n && eci[n].engine_class != class))
+				return 0;
+
+			class = eci[n].engine_class;
+			gt_id = eci[n].gt_id;
+
+			if (width == 1 || !i)
+				return_mask |= BIT(eci[n].engine_instance);
+			current_mask |= BIT(eci[n].engine_instance);
+		}
+
+		/* Parallel submissions must be logically contiguous */
+		if (i && XE_IOCTL_ERR(xe, current_mask != prev_mask << 1))
+			return 0;
+
+		prev_mask = current_mask;
+	}
+
+	return return_mask;
+}
+
+int xe_engine_create_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_engine_create *args = data;
+	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
+	struct drm_xe_engine_class_instance __user *user_eci =
+		u64_to_user_ptr(args->instances);
+	struct xe_hw_engine *hwe;
+	struct xe_vm *vm, *migrate_vm;
+	struct xe_gt *gt;
+	struct xe_engine *e = NULL;
+	u32 logical_mask;
+	u32 id;
+	int len;
+	int err;
+
+	if (XE_IOCTL_ERR(xe, args->flags))
+		return -EINVAL;
+
+	len = args->width * args->num_placements;
+	if (XE_IOCTL_ERR(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
+		return -EINVAL;
+
+	err = __copy_from_user(eci, user_eci,
+			       sizeof(struct drm_xe_engine_class_instance) *
+			       len);
+	if (XE_IOCTL_ERR(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_ERR(xe, eci[0].gt_id >= xe->info.tile_count))
+	       return -EINVAL;
+
+	xe_pm_runtime_get(xe);
+
+	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
+		for_each_gt(gt, xe, id) {
+			struct xe_engine *new;
+
+			if (xe_gt_is_media_type(gt))
+				continue;
+
+			eci[0].gt_id = gt->info.id;
+			logical_mask = bind_engine_logical_mask(xe, gt, eci,
+								args->width,
+								args->num_placements);
+			if (XE_IOCTL_ERR(xe, !logical_mask)) {
+				err = -EINVAL;
+				goto put_rpm;
+			}
+
+			hwe = find_hw_engine(xe, eci[0]);
+			if (XE_IOCTL_ERR(xe, !hwe)) {
+				err = -EINVAL;
+				goto put_rpm;
+			}
+
+			migrate_vm = xe_migrate_get_vm(gt->migrate);
+			new = xe_engine_create(xe, migrate_vm, logical_mask,
+					       args->width, hwe,
+					       ENGINE_FLAG_PERSISTENT |
+					       ENGINE_FLAG_VM |
+					       (id ?
+					       ENGINE_FLAG_BIND_ENGINE_CHILD :
+					       0));
+			xe_vm_put(migrate_vm);
+			if (IS_ERR(new)) {
+				err = PTR_ERR(new);
+				if (e)
+					goto put_engine;
+				goto put_rpm;
+			}
+			if (id == 0)
+				e = new;
+			else
+				list_add_tail(&new->multi_gt_list,
+					      &e->multi_gt_link);
+		}
+	} else {
+		gt = xe_device_get_gt(xe, eci[0].gt_id);
+		logical_mask = calc_validate_logical_mask(xe, gt, eci,
+							  args->width,
+							  args->num_placements);
+		if (XE_IOCTL_ERR(xe, !logical_mask)) {
+			err = -EINVAL;
+			goto put_rpm;
+		}
+
+		hwe = find_hw_engine(xe, eci[0]);
+		if (XE_IOCTL_ERR(xe, !hwe)) {
+			err = -EINVAL;
+			goto put_rpm;
+		}
+
+		vm = xe_vm_lookup(xef, args->vm_id);
+		if (XE_IOCTL_ERR(xe, !vm)) {
+			err = -ENOENT;
+			goto put_rpm;
+		}
+
+		e = xe_engine_create(xe, vm, logical_mask,
+				     args->width, hwe, ENGINE_FLAG_PERSISTENT);
+		xe_vm_put(vm);
+		if (IS_ERR(e)) {
+			err = PTR_ERR(e);
+			goto put_rpm;
+		}
+	}
+
+	if (args->extensions) {
+		err = engine_user_extensions(xe, e, args->extensions, 0, true);
+		if (XE_IOCTL_ERR(xe, err))
+			goto put_engine;
+	}
+
+	if (XE_IOCTL_ERR(xe, e->vm && xe_vm_in_compute_mode(e->vm) !=
+			 !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) {
+		err = -ENOTSUPP;
+		goto put_engine;
+	}
+
+	e->persitent.xef = xef;
+
+	mutex_lock(&xef->engine.lock);
+	err = xa_alloc(&xef->engine.xa, &id, e, xa_limit_32b, GFP_KERNEL);
+	mutex_unlock(&xef->engine.lock);
+	if (err)
+		goto put_engine;
+
+	args->engine_id = id;
+
+	return 0;
+
+put_engine:
+	xe_engine_kill(e);
+	xe_engine_put(e);
+put_rpm:
+	xe_pm_runtime_put(xe);
+	return err;
+}
+
+static void engine_kill_compute(struct xe_engine *e)
+{
+	if (!xe_vm_in_compute_mode(e->vm))
+		return;
+
+	down_write(&e->vm->lock);
+	list_del(&e->compute.link);
+	--e->vm->preempt.num_engines;
+	if (e->compute.pfence) {
+		dma_fence_enable_sw_signaling(e->compute.pfence);
+		dma_fence_put(e->compute.pfence);
+		e->compute.pfence = NULL;
+	}
+	up_write(&e->vm->lock);
+}
+
+void xe_engine_kill(struct xe_engine *e)
+{
+	struct xe_engine *engine = e, *next;
+
+	list_for_each_entry_safe(engine, next, &engine->multi_gt_list,
+				 multi_gt_link) {
+		e->ops->kill(engine);
+		engine_kill_compute(engine);
+	}
+
+	e->ops->kill(e);
+	engine_kill_compute(e);
+}
+
+int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_engine_destroy *args = data;
+	struct xe_engine *e;
+
+	if (XE_IOCTL_ERR(xe, args->pad))
+		return -EINVAL;
+
+	mutex_lock(&xef->engine.lock);
+	e = xa_erase(&xef->engine.xa, args->engine_id);
+	mutex_unlock(&xef->engine.lock);
+	if (XE_IOCTL_ERR(xe, !e))
+		return -ENOENT;
+
+	if (!(e->flags & ENGINE_FLAG_PERSISTENT))
+		xe_engine_kill(e);
+	else
+		xe_device_add_persitent_engines(xe, e);
+
+	trace_xe_engine_close(e);
+	xe_engine_put(e);
+	xe_pm_runtime_put(xe);
+
+	return 0;
+}
+
+int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_engine_set_property *args = data;
+	struct xe_engine *e;
+	int ret;
+	u32 idx;
+
+	e = xe_engine_lookup(xef, args->engine_id);
+	if (XE_IOCTL_ERR(xe, !e))
+		return -ENOENT;
+
+	if (XE_IOCTL_ERR(xe, args->property >=
+			 ARRAY_SIZE(engine_set_property_funcs))) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	idx = array_index_nospec(args->property,
+				 ARRAY_SIZE(engine_set_property_funcs));
+	ret = engine_set_property_funcs[idx](xe, e, args->value, false);
+	if (XE_IOCTL_ERR(xe, ret))
+		goto out;
+
+	if (args->extensions)
+		ret = engine_user_extensions(xe, e, args->extensions, 0,
+					     false);
+out:
+	xe_engine_put(e);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h
new file mode 100644
index 000000000000..4d1b609fea7e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_engine.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_ENGINE_H_
+#define _XE_ENGINE_H_
+
+#include "xe_engine_types.h"
+#include "xe_vm_types.h"
+
+struct drm_device;
+struct drm_file;
+struct xe_device;
+struct xe_file;
+
+struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm,
+				   u32 logical_mask, u16 width,
+				   struct xe_hw_engine *hw_engine, u32 flags);
+struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt,
+					 struct xe_vm *vm,
+					 enum xe_engine_class class, u32 flags);
+
+void xe_engine_fini(struct xe_engine *e);
+void xe_engine_destroy(struct kref *ref);
+
+struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id);
+
+static inline struct xe_engine *xe_engine_get(struct xe_engine *engine)
+{
+	kref_get(&engine->refcount);
+	return engine;
+}
+
+static inline void xe_engine_put(struct xe_engine *engine)
+{
+	kref_put(&engine->refcount, xe_engine_destroy);
+}
+
+static inline bool xe_engine_is_parallel(struct xe_engine *engine)
+{
+	return engine->width > 1;
+}
+
+void xe_engine_kill(struct xe_engine *e);
+
+int xe_engine_create_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file);
+int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h
new file mode 100644
index 000000000000..3dfa1c14e181
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_engine_types.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_ENGINE_TYPES_H_
+#define _XE_ENGINE_TYPES_H_
+
+#include <linux/kref.h>
+
+#include <drm/gpu_scheduler.h>
+
+#include "xe_gpu_scheduler_types.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence_types.h"
+#include "xe_lrc_types.h"
+
+struct xe_execlist_engine;
+struct xe_gt;
+struct xe_guc_engine;
+struct xe_hw_engine;
+struct xe_vm;
+
+enum xe_engine_priority {
+	XE_ENGINE_PRIORITY_UNSET = -2, /* For execlist usage only */
+	XE_ENGINE_PRIORITY_LOW = 0,
+	XE_ENGINE_PRIORITY_NORMAL,
+	XE_ENGINE_PRIORITY_HIGH,
+	XE_ENGINE_PRIORITY_KERNEL,
+
+	XE_ENGINE_PRIORITY_COUNT
+};
+
+/**
+ * struct xe_engine - Submission engine
+ *
+ * Contains all state necessary for submissions. Can either be a user object or
+ * a kernel object.
+ */
+struct xe_engine {
+	/** @gt: graphics tile this engine can submit to */
+	struct xe_gt *gt;
+	/**
+	 * @hwe: A hardware of the same class. May (physical engine) or may not
+	 * (virtual engine) be where jobs actual engine up running. Should never
+	 * really be used for submissions.
+	 */
+	struct xe_hw_engine *hwe;
+	/** @refcount: ref count of this engine */
+	struct kref refcount;
+	/** @vm: VM (address space) for this engine */
+	struct xe_vm *vm;
+	/** @class: class of this engine */
+	enum xe_engine_class class;
+	/** @priority: priority of this exec queue */
+	enum xe_engine_priority priority;
+	/**
+	 * @logical_mask: logical mask of where job submitted to engine can run
+	 */
+	u32 logical_mask;
+	/** @name: name of this engine */
+	char name[MAX_FENCE_NAME_LEN];
+	/** @width: width (number BB submitted per exec) of this engine */
+	u16 width;
+	/** @fence_irq: fence IRQ used to signal job completion */
+	struct xe_hw_fence_irq *fence_irq;
+
+#define ENGINE_FLAG_BANNED		BIT(0)
+#define ENGINE_FLAG_KERNEL		BIT(1)
+#define ENGINE_FLAG_PERSISTENT		BIT(2)
+#define ENGINE_FLAG_COMPUTE_MODE	BIT(3)
+#define ENGINE_FLAG_VM			BIT(4)
+#define ENGINE_FLAG_BIND_ENGINE_CHILD	BIT(5)
+#define ENGINE_FLAG_WA			BIT(6)
+
+	/**
+	 * @flags: flags for this engine, should statically setup aside from ban
+	 * bit
+	 */
+	unsigned long flags;
+
+	union {
+		/** @multi_gt_list: list head for VM bind engines if multi-GT */
+		struct list_head multi_gt_list;
+		/** @multi_gt_link: link for VM bind engines if multi-GT */
+		struct list_head multi_gt_link;
+	};
+
+	union {
+		/** @execlist: execlist backend specific state for engine */
+		struct xe_execlist_engine *execlist;
+		/** @guc: GuC backend specific state for engine */
+		struct xe_guc_engine *guc;
+	};
+
+	/**
+	 * @persitent: persitent engine state
+	 */
+	struct {
+		/** @xef: file which this engine belongs to */
+		struct xe_file *xef;
+		/** @link: link in list of persitent engines */
+		struct list_head link;
+	} persitent;
+
+	union {
+		/**
+		 * @parallel: parallel submission state
+		 */
+		struct {
+			/** @composite_fence_ctx: context composite fence */
+			u64 composite_fence_ctx;
+			/** @composite_fence_seqno: seqno for composite fence */
+			u32 composite_fence_seqno;
+		} parallel;
+		/**
+		 * @bind: bind submission state
+		 */
+		struct {
+			/** @fence_ctx: context bind fence */
+			u64 fence_ctx;
+			/** @fence_seqno: seqno for bind fence */
+			u32 fence_seqno;
+		} bind;
+	};
+
+	/** @sched_props: scheduling properties */
+	struct {
+		/** @timeslice_us: timeslice period in micro-seconds */
+		u32 timeslice_us;
+		/** @preempt_timeout_us: preemption timeout in micro-seconds */
+		u32 preempt_timeout_us;
+	} sched_props;
+
+	/** @compute: compute engine state */
+	struct {
+		/** @pfence: preemption fence */
+		struct dma_fence *pfence;
+		/** @context: preemption fence context */
+		u64 context;
+		/** @seqno: preemption fence seqno */
+		u32 seqno;
+		/** @link: link into VM's list of engines */
+		struct list_head link;
+		/** @lock: preemption fences lock */
+		spinlock_t lock;
+	} compute;
+
+	/** @usm: unified shared memory state */
+	struct {
+		/** @acc_trigger: access counter trigger */
+		u32 acc_trigger;
+		/** @acc_notify: access counter notify */
+		u32 acc_notify;
+		/** @acc_granularity: access counter granularity */
+		u32 acc_granularity;
+	} usm;
+
+	/** @ops: submission backend engine operations */
+	const struct xe_engine_ops *ops;
+
+	/** @ring_ops: ring operations for this engine */
+	const struct xe_ring_ops *ring_ops;
+	/** @entity: DRM sched entity for this engine (1 to 1 relationship) */
+	struct drm_sched_entity *entity;
+	/** @lrc: logical ring context for this engine */
+	struct xe_lrc lrc[0];
+};
+
+/**
+ * struct xe_engine_ops - Submission backend engine operations
+ */
+struct xe_engine_ops {
+	/** @init: Initialize engine for submission backend */
+	int (*init)(struct xe_engine *e);
+	/** @kill: Kill inflight submissions for backend */
+	void (*kill)(struct xe_engine *e);
+	/** @fini: Fini engine for submission backend */
+	void (*fini)(struct xe_engine *e);
+	/** @set_priority: Set priority for engine */
+	int (*set_priority)(struct xe_engine *e,
+			    enum xe_engine_priority priority);
+	/** @set_timeslice: Set timeslice for engine */
+	int (*set_timeslice)(struct xe_engine *e, u32 timeslice_us);
+	/** @set_preempt_timeout: Set preemption timeout for engine */
+	int (*set_preempt_timeout)(struct xe_engine *e, u32 preempt_timeout_us);
+	/** @set_job_timeout: Set job timeout for engine */
+	int (*set_job_timeout)(struct xe_engine *e, u32 job_timeout_ms);
+	/**
+	 * @suspend: Suspend engine from executing, allowed to be called
+	 * multiple times in a row before resume with the caveat that
+	 * suspend_wait returns before calling suspend again.
+	 */
+	int (*suspend)(struct xe_engine *e);
+	/**
+	 * @suspend_wait: Wait for an engine to suspend executing, should be
+	 * call after suspend.
+	 */
+	void (*suspend_wait)(struct xe_engine *e);
+	/**
+	 * @resume: Resume engine execution, engine must be in a suspended
+	 * state and dma fence returned from most recent suspend call must be
+	 * signalled when this function is called.
+	 */
+	void (*resume)(struct xe_engine *e);
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
new file mode 100644
index 000000000000..00f298acc436
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/xe_drm.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_exec.h"
+#include "xe_macros.h"
+#include "xe_sched_job.h"
+#include "xe_sync.h"
+#include "xe_vm.h"
+
+/**
+ * DOC: Execbuf (User GPU command submission)
+ *
+ * Execs have historically been rather complicated in DRM drivers (at least in
+ * the i915) because a few things:
+ *
+ * - Passing in a list BO which are read / written to creating implicit syncs
+ * - Binding at exec time
+ * - Flow controlling the ring at exec time
+ *
+ * In XE we avoid all of this complication by not allowing a BO list to be
+ * passed into an exec, using the dma-buf implicit sync uAPI, have binds as
+ * seperate operations, and using the DRM scheduler to flow control the ring.
+ * Let's deep dive on each of these.
+ *
+ * We can get away from a BO list by forcing the user to use in / out fences on
+ * every exec rather than the kernel tracking dependencies of BO (e.g. if the
+ * user knows an exec writes to a BO and reads from the BO in the next exec, it
+ * is the user's responsibility to pass in / out fence between the two execs).
+ *
+ * Implicit dependencies for external BOs are handled by using the dma-buf
+ * implicit dependency uAPI (TODO: add link). To make this works each exec must
+ * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external
+ * BO mapped in the VM.
+ *
+ * We do not allow a user to trigger a bind at exec time rather we have a VM
+ * bind IOCTL which uses the same in / out fence interface as exec. In that
+ * sense, a VM bind is basically the same operation as an exec from the user
+ * perspective. e.g. If an exec depends on a VM bind use the in / out fence
+ * interface (struct drm_xe_sync) to synchronize like syncing between two
+ * dependent execs.
+ *
+ * Although a user cannot trigger a bind, we still have to rebind userptrs in
+ * the VM that have been invalidated since the last exec, likewise we also have
+ * to rebind BOs that have been evicted by the kernel. We schedule these rebinds
+ * behind any pending kernel operations on any external BOs in VM or any BOs
+ * private to the VM. This is accomplished by the rebinds waiting on BOs
+ * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs
+ * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and
+ * in DMA_RESV_USAGE_WRITE for external BOs).
+ *
+ * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute
+ * mode VMs we use preempt fences and a rebind worker (TODO: add link).
+ *
+ * There is no need to flow control the ring in the exec as we write the ring at
+ * submission time and set the DRM scheduler max job limit SIZE_OF_RING /
+ * MAX_JOB_SIZE. The DRM scheduler will then hold all jobs until space in the
+ * ring is available.
+ *
+ * All of this results in a rather simple exec implementation.
+ *
+ * Flow
+ * ~~~~
+ *
+ * .. code-block::
+ *
+ *	Parse input arguments
+ *	Wait for any async VM bind passed as in-fences to start
+ *	<----------------------------------------------------------------------|
+ *	Lock global VM lock in read mode                                       |
+ *	Pin userptrs (also finds userptr invalidated since last exec)          |
+ *	Lock exec (VM dma-resv lock, external BOs dma-resv locks)              |
+ *	Validate BOs that have been evicted                                    |
+ *	Create job                                                             |
+ *	Rebind invalidated userptrs + evicted BOs (non-compute-mode)           |
+ *	Add rebind fence dependency to job                                     |
+ *	Add job VM dma-resv bookkeeping slot (non-compute mode)                |
+ *	Add job to external BOs dma-resv write slots (non-compute mode)        |
+ *	Check if any userptrs invalidated since pin ------ Drop locks ---------|
+ *	Install in / out fences for job
+ *	Submit job
+ *	Unlock all
+ */
+
+static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
+			 struct ttm_validate_buffer tv_onstack[],
+			 struct ttm_validate_buffer **tv,
+			 struct list_head *objs)
+{
+	struct xe_vm *vm = e->vm;
+	struct xe_vma *vma;
+	LIST_HEAD(dups);
+	int err;
+
+	*tv = NULL;
+	if (xe_vm_no_dma_fences(e->vm))
+		return 0;
+
+	err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1);
+	if (err)
+		return err;
+
+	/*
+	 * Validate BOs that have been evicted (i.e. make sure the
+	 * BOs have valid placements possibly moving an evicted BO back
+	 * to a location where the GPU can access it).
+	 */
+	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
+		if (xe_vma_is_userptr(vma))
+			continue;
+
+		err = xe_bo_validate(vma->bo, vm, false);
+		if (err) {
+			xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs);
+			*tv = NULL;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void xe_exec_end(struct xe_engine *e,
+			struct ttm_validate_buffer *tv_onstack,
+			struct ttm_validate_buffer *tv,
+			struct ww_acquire_ctx *ww,
+			struct list_head *objs)
+{
+	if (!xe_vm_no_dma_fences(e->vm))
+		xe_vm_unlock_dma_resv(e->vm, tv_onstack, tv, ww, objs);
+}
+
+int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_exec *args = data;
+	struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs);
+	u64 __user *addresses_user = u64_to_user_ptr(args->address);
+	struct xe_engine *engine;
+	struct xe_sync_entry *syncs = NULL;
+	u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
+	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
+	struct ttm_validate_buffer *tv = NULL;
+	u32 i, num_syncs = 0;
+	struct xe_sched_job *job;
+	struct dma_fence *rebind_fence;
+	struct xe_vm *vm;
+	struct ww_acquire_ctx ww;
+	struct list_head objs;
+	bool write_locked;
+	int err = 0;
+
+	if (XE_IOCTL_ERR(xe, args->extensions))
+		return -EINVAL;
+
+	engine = xe_engine_lookup(xef, args->engine_id);
+	if (XE_IOCTL_ERR(xe, !engine))
+		return -ENOENT;
+
+	if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_VM))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, engine->width != args->num_batch_buffer))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_BANNED)) {
+		err = -ECANCELED;
+		goto err_engine;
+	}
+
+	if (args->num_syncs) {
+		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
+		if (!syncs) {
+			err = -ENOMEM;
+			goto err_engine;
+		}
+	}
+
+	vm = engine->vm;
+
+	for (i = 0; i < args->num_syncs; i++) {
+		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
+					  &syncs_user[i], true,
+					  xe_vm_no_dma_fences(vm));
+		if (err)
+			goto err_syncs;
+	}
+
+	if (xe_engine_is_parallel(engine)) {
+		err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
+				       engine->width);
+		if (err) {
+			err = -EFAULT;
+			goto err_syncs;
+		}
+	}
+
+	/*
+	 * We can't install a job into the VM dma-resv shared slot before an
+	 * async VM bind passed in as a fence without the risk of deadlocking as
+	 * the bind can trigger an eviction which in turn depends on anything in
+	 * the VM dma-resv shared slots. Not an ideal solution, but we wait for
+	 * all dependent async VM binds to start (install correct fences into
+	 * dma-resv slots) before moving forward.
+	 */
+	if (!xe_vm_no_dma_fences(vm) &&
+	    vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
+		for (i = 0; i < args->num_syncs; i++) {
+			struct dma_fence *fence = syncs[i].fence;
+			if (fence) {
+				err = xe_vm_async_fence_wait_start(fence);
+				if (err)
+					goto err_syncs;
+			}
+		}
+	}
+
+retry:
+	if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) {
+		err = down_write_killable(&vm->lock);
+		write_locked = true;
+	} else {
+		/* We don't allow execs while the VM is in error state */
+		err = down_read_interruptible(&vm->lock);
+		write_locked = false;
+	}
+	if (err)
+		goto err_syncs;
+
+	/* We don't allow execs while the VM is in error state */
+	if (vm->async_ops.error) {
+		err = vm->async_ops.error;
+		goto err_unlock_list;
+	}
+
+	/*
+	 * Extreme corner where we exit a VM error state with a munmap style VM
+	 * unbind inflight which requires a rebind. In this case the rebind
+	 * needs to install some fences into the dma-resv slots. The worker to
+	 * do this queued, let that worker make progress by dropping vm->lock,
+	 * flushing the worker and retrying the exec.
+	 */
+	if (vm->async_ops.munmap_rebind_inflight) {
+		if (write_locked)
+			up_write(&vm->lock);
+		else
+			up_read(&vm->lock);
+		flush_work(&vm->async_ops.work);
+		goto retry;
+	}
+
+	if (write_locked) {
+		err = xe_vm_userptr_pin(vm);
+		downgrade_write(&vm->lock);
+		write_locked = false;
+		if (err)
+			goto err_unlock_list;
+	}
+
+	err = xe_exec_begin(engine, &ww, tv_onstack, &tv, &objs);
+	if (err)
+		goto err_unlock_list;
+
+	if (xe_vm_is_closed(engine->vm)) {
+		drm_warn(&xe->drm, "Trying to schedule after vm is closed\n");
+		err = -EIO;
+		goto err_engine_end;
+	}
+
+	job = xe_sched_job_create(engine, xe_engine_is_parallel(engine) ?
+				  addresses : &args->address);
+	if (IS_ERR(job)) {
+		err = PTR_ERR(job);
+		goto err_engine_end;
+	}
+
+	/*
+	 * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
+	 * VM mode only.
+	 */
+	rebind_fence = xe_vm_rebind(vm, false);
+	if (IS_ERR(rebind_fence)) {
+		err = PTR_ERR(rebind_fence);
+		goto err_put_job;
+	}
+
+	/*
+	 * We store the rebind_fence in the VM so subsequent execs don't get
+	 * scheduled before the rebinds of userptrs / evicted BOs is complete.
+	 */
+	if (rebind_fence) {
+		dma_fence_put(vm->rebind_fence);
+		vm->rebind_fence = rebind_fence;
+	}
+	if (vm->rebind_fence) {
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+			     &vm->rebind_fence->flags)) {
+			dma_fence_put(vm->rebind_fence);
+			vm->rebind_fence = NULL;
+		} else {
+			dma_fence_get(vm->rebind_fence);
+			err = drm_sched_job_add_dependency(&job->drm,
+							   vm->rebind_fence);
+			if (err)
+				goto err_put_job;
+		}
+	}
+
+	/* Wait behind munmap style rebinds */
+	if (!xe_vm_no_dma_fences(vm)) {
+		err = drm_sched_job_add_resv_dependencies(&job->drm,
+							  &vm->resv,
+							  DMA_RESV_USAGE_KERNEL);
+		if (err)
+			goto err_put_job;
+	}
+
+	for (i = 0; i < num_syncs && !err; i++)
+		err = xe_sync_entry_add_deps(&syncs[i], job);
+	if (err)
+		goto err_put_job;
+
+	if (!xe_vm_no_dma_fences(vm)) {
+		err = down_read_interruptible(&vm->userptr.notifier_lock);
+		if (err)
+			goto err_put_job;
+
+		err = __xe_vm_userptr_needs_repin(vm);
+		if (err)
+			goto err_repin;
+	}
+
+	/*
+	 * Point of no return, if we error after this point just set an error on
+	 * the job and let the DRM scheduler / backend clean up the job.
+	 */
+	xe_sched_job_arm(job);
+	if (!xe_vm_no_dma_fences(vm)) {
+		/* Block userptr invalidations / BO eviction */
+		dma_resv_add_fence(&vm->resv,
+				   &job->drm.s_fence->finished,
+				   DMA_RESV_USAGE_BOOKKEEP);
+
+		/*
+		 * Make implicit sync work across drivers, assuming all external
+		 * BOs are written as we don't pass in a read / write list.
+		 */
+		xe_vm_fence_all_extobjs(vm, &job->drm.s_fence->finished,
+					DMA_RESV_USAGE_WRITE);
+	}
+
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_signal(&syncs[i], job,
+				     &job->drm.s_fence->finished);
+
+	xe_sched_job_push(job);
+
+err_repin:
+	if (!xe_vm_no_dma_fences(vm))
+		up_read(&vm->userptr.notifier_lock);
+err_put_job:
+	if (err)
+		xe_sched_job_put(job);
+err_engine_end:
+	xe_exec_end(engine, tv_onstack, tv, &ww, &objs);
+err_unlock_list:
+	if (write_locked)
+		up_write(&vm->lock);
+	else
+		up_read(&vm->lock);
+	if (err == -EAGAIN)
+		goto retry;
+err_syncs:
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_cleanup(&syncs[i]);
+	kfree(syncs);
+err_engine:
+	xe_engine_put(engine);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_exec.h b/drivers/gpu/drm/xe/xe_exec.h
new file mode 100644
index 000000000000..e4932494cea3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_EXEC_H_
+#define _XE_EXEC_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
new file mode 100644
index 000000000000..47587571123a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_execlist.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_hw_fence.h"
+#include "xe_gt.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_mocs.h"
+#include "xe_ring_ops_types.h"
+#include "xe_sched_job.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_engine_regs.h"
+
+#define XE_EXECLIST_HANG_LIMIT 1
+
+#define GEN11_SW_CTX_ID_SHIFT 37
+#define GEN11_SW_CTX_ID_WIDTH 11
+#define XEHP_SW_CTX_ID_SHIFT  39
+#define XEHP_SW_CTX_ID_WIDTH  16
+
+#define GEN11_SW_CTX_ID \
+	GENMASK_ULL(GEN11_SW_CTX_ID_WIDTH + GEN11_SW_CTX_ID_SHIFT - 1, \
+		    GEN11_SW_CTX_ID_SHIFT)
+
+#define XEHP_SW_CTX_ID \
+	GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
+		    XEHP_SW_CTX_ID_SHIFT)
+
+
+static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
+			u32 ctx_id)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	u64 lrc_desc;
+
+	printk(KERN_INFO "__start_lrc(%s, 0x%p, %u)\n", hwe->name, lrc, ctx_id);
+
+	lrc_desc = xe_lrc_descriptor(lrc);
+
+	if (GRAPHICS_VERx100(xe) >= 1250) {
+		XE_BUG_ON(!FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
+		lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
+	} else {
+		XE_BUG_ON(!FIELD_FIT(GEN11_SW_CTX_ID, ctx_id));
+		lrc_desc |= FIELD_PREP(GEN11_SW_CTX_ID, ctx_id);
+	}
+
+	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
+		xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg,
+				_MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
+
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+	lrc->ring.old_tail = lrc->ring.tail;
+
+	/*
+	 * Make sure the context image is complete before we submit it to HW.
+	 *
+	 * Ostensibly, writes (including the WCB) should be flushed prior to
+	 * an uncached write such as our mmio register access, the empirical
+	 * evidence (esp. on Braswell) suggests that the WC write into memory
+	 * may not be visible to the HW prior to the completion of the UC
+	 * register write and that we may begin execution from the context
+	 * before its image is complete leading to invalid PD chasing.
+	 */
+	wmb();
+
+	xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base).reg,
+			xe_bo_ggtt_addr(hwe->hwsp));
+	xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base).reg);
+	xe_mmio_write32(gt, RING_MODE_GEN7(hwe->mmio_base).reg,
+			_MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+
+	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 0,
+			lower_32_bits(lrc_desc));
+	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 4,
+			upper_32_bits(lrc_desc));
+	xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base).reg,
+			EL_CTRL_LOAD);
+}
+
+static void __xe_execlist_port_start(struct xe_execlist_port *port,
+				     struct xe_execlist_engine *exl)
+{
+	struct xe_device *xe = gt_to_xe(port->hwe->gt);
+	int max_ctx = FIELD_MAX(GEN11_SW_CTX_ID);
+
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
+
+	xe_execlist_port_assert_held(port);
+
+	if (port->running_exl != exl || !exl->has_run) {
+		port->last_ctx_id++;
+
+		/* 0 is reserved for the kernel context */
+		if (port->last_ctx_id > max_ctx)
+			port->last_ctx_id = 1;
+	}
+
+	__start_lrc(port->hwe, exl->engine->lrc, port->last_ctx_id);
+	port->running_exl = exl;
+	exl->has_run = true;
+}
+
+static void __xe_execlist_port_idle(struct xe_execlist_port *port)
+{
+	u32 noop[2] = { MI_NOOP, MI_NOOP };
+
+	xe_execlist_port_assert_held(port);
+
+	if (!port->running_exl)
+		return;
+
+	printk(KERN_INFO "__xe_execlist_port_idle(%d:%d)\n", port->hwe->class,
+	       port->hwe->instance);
+
+	xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop));
+	__start_lrc(port->hwe, &port->hwe->kernel_lrc, 0);
+	port->running_exl = NULL;
+}
+
+static bool xe_execlist_is_idle(struct xe_execlist_engine *exl)
+{
+	struct xe_lrc *lrc = exl->engine->lrc;
+
+	return lrc->ring.tail == lrc->ring.old_tail;
+}
+
+static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
+{
+	struct xe_execlist_engine *exl = NULL;
+	int i;
+
+	xe_execlist_port_assert_held(port);
+
+	for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
+		while (!list_empty(&port->active[i])) {
+			exl = list_first_entry(&port->active[i],
+					       struct xe_execlist_engine,
+					       active_link);
+			list_del(&exl->active_link);
+
+			if (xe_execlist_is_idle(exl)) {
+				exl->active_priority = XE_ENGINE_PRIORITY_UNSET;
+				continue;
+			}
+
+			list_add_tail(&exl->active_link, &port->active[i]);
+			__xe_execlist_port_start(port, exl);
+			return;
+		}
+	}
+
+	__xe_execlist_port_idle(port);
+}
+
+static u64 read_execlist_status(struct xe_hw_engine *hwe)
+{
+	struct xe_gt *gt = hwe->gt;
+	u32 hi, lo;
+
+	lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base).reg);
+	hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base).reg);
+
+	printk(KERN_INFO "EXECLIST_STATUS %d:%d = 0x%08x %08x\n", hwe->class,
+	       hwe->instance, hi, lo);
+
+	return lo | (u64)hi << 32;
+}
+
+static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
+{
+	u64 status;
+
+	xe_execlist_port_assert_held(port);
+
+	status = read_execlist_status(port->hwe);
+	if (status & BIT(7))
+		return;
+
+	__xe_execlist_port_start_next_active(port);
+}
+
+static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
+					 u16 intr_vec)
+{
+	struct xe_execlist_port *port = hwe->exl_port;
+
+	spin_lock(&port->lock);
+	xe_execlist_port_irq_handler_locked(port);
+	spin_unlock(&port->lock);
+}
+
+static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
+					 enum xe_engine_priority priority)
+{
+	xe_execlist_port_assert_held(port);
+
+	if (port->running_exl && port->running_exl->active_priority >= priority)
+		return;
+
+	__xe_execlist_port_start_next_active(port);
+}
+
+static void xe_execlist_make_active(struct xe_execlist_engine *exl)
+{
+	struct xe_execlist_port *port = exl->port;
+	enum xe_engine_priority priority = exl->active_priority;
+
+	XE_BUG_ON(priority == XE_ENGINE_PRIORITY_UNSET);
+	XE_BUG_ON(priority < 0);
+	XE_BUG_ON(priority >= ARRAY_SIZE(exl->port->active));
+
+	spin_lock_irq(&port->lock);
+
+	if (exl->active_priority != priority &&
+	    exl->active_priority != XE_ENGINE_PRIORITY_UNSET) {
+		/* Priority changed, move it to the right list */
+		list_del(&exl->active_link);
+		exl->active_priority = XE_ENGINE_PRIORITY_UNSET;
+	}
+
+	if (exl->active_priority == XE_ENGINE_PRIORITY_UNSET) {
+		exl->active_priority = priority;
+		list_add_tail(&exl->active_link, &port->active[priority]);
+	}
+
+	xe_execlist_port_wake_locked(exl->port, priority);
+
+	spin_unlock_irq(&port->lock);
+}
+
+static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
+{
+	struct xe_execlist_port *port =
+		container_of(timer, struct xe_execlist_port, irq_fail);
+
+	spin_lock_irq(&port->lock);
+	xe_execlist_port_irq_handler_locked(port);
+	spin_unlock_irq(&port->lock);
+
+	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
+	add_timer(&port->irq_fail);
+}
+
+struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
+						 struct xe_hw_engine *hwe)
+{
+	struct drm_device *drm = &xe->drm;
+	struct xe_execlist_port *port;
+	int i;
+
+	port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return ERR_PTR(-ENOMEM);
+
+	port->hwe = hwe;
+
+	spin_lock_init(&port->lock);
+	for (i = 0; i < ARRAY_SIZE(port->active); i++)
+		INIT_LIST_HEAD(&port->active[i]);
+
+	port->last_ctx_id = 1;
+	port->running_exl = NULL;
+
+	hwe->irq_handler = xe_execlist_port_irq_handler;
+
+	/* TODO: Fix the interrupt code so it doesn't race like mad */
+	timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
+	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
+	add_timer(&port->irq_fail);
+
+	return port;
+}
+
+void xe_execlist_port_destroy(struct xe_execlist_port *port)
+{
+	del_timer(&port->irq_fail);
+
+	/* Prevent an interrupt while we're destroying */
+	spin_lock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
+	port->hwe->irq_handler = NULL;
+	spin_unlock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
+}
+
+static struct dma_fence *
+execlist_run_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+	struct xe_engine *e = job->engine;
+	struct xe_execlist_engine *exl = job->engine->execlist;
+
+	e->ring_ops->emit_job(job);
+	xe_execlist_make_active(exl);
+
+	return dma_fence_get(job->fence);
+}
+
+static void execlist_job_free(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+
+	xe_sched_job_put(job);
+}
+
+static const struct drm_sched_backend_ops drm_sched_ops = {
+	.run_job = execlist_run_job,
+	.free_job = execlist_job_free,
+};
+
+static int execlist_engine_init(struct xe_engine *e)
+{
+	struct drm_gpu_scheduler *sched;
+	struct xe_execlist_engine *exl;
+	int err;
+
+	XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt)));
+
+	exl = kzalloc(sizeof(*exl), GFP_KERNEL);
+	if (!exl)
+		return -ENOMEM;
+
+	exl->engine = e;
+
+	err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
+			     e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
+			     XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
+			     NULL, NULL, e->hwe->name,
+			     gt_to_xe(e->gt)->drm.dev);
+	if (err)
+		goto err_free;
+
+	sched = &exl->sched;
+	err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
+	if (err)
+		goto err_sched;
+
+	exl->port = e->hwe->exl_port;
+	exl->has_run = false;
+	exl->active_priority = XE_ENGINE_PRIORITY_UNSET;
+	e->execlist = exl;
+	e->entity = &exl->entity;
+
+	switch (e->class) {
+	case XE_ENGINE_CLASS_RENDER:
+		sprintf(e->name, "rcs%d", ffs(e->logical_mask) - 1);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+		sprintf(e->name, "vcs%d", ffs(e->logical_mask) - 1);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		sprintf(e->name, "vecs%d", ffs(e->logical_mask) - 1);
+		break;
+	case XE_ENGINE_CLASS_COPY:
+		sprintf(e->name, "bcs%d", ffs(e->logical_mask) - 1);
+		break;
+	case XE_ENGINE_CLASS_COMPUTE:
+		sprintf(e->name, "ccs%d", ffs(e->logical_mask) - 1);
+		break;
+	default:
+		XE_WARN_ON(e->class);
+	}
+
+	return 0;
+
+err_sched:
+	drm_sched_fini(&exl->sched);
+err_free:
+	kfree(exl);
+	return err;
+}
+
+static void execlist_engine_fini_async(struct work_struct *w)
+{
+	struct xe_execlist_engine *ee =
+		container_of(w, struct xe_execlist_engine, fini_async);
+	struct xe_engine *e = ee->engine;
+	struct xe_execlist_engine *exl = e->execlist;
+	unsigned long flags;
+
+	XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt)));
+
+	spin_lock_irqsave(&exl->port->lock, flags);
+	if (WARN_ON(exl->active_priority != XE_ENGINE_PRIORITY_UNSET))
+		list_del(&exl->active_link);
+	spin_unlock_irqrestore(&exl->port->lock, flags);
+
+	if (e->flags & ENGINE_FLAG_PERSISTENT)
+		xe_device_remove_persitent_engines(gt_to_xe(e->gt), e);
+	drm_sched_entity_fini(&exl->entity);
+	drm_sched_fini(&exl->sched);
+	kfree(exl);
+
+	xe_engine_fini(e);
+}
+
+static void execlist_engine_kill(struct xe_engine *e)
+{
+	/* NIY */
+}
+
+static void execlist_engine_fini(struct xe_engine *e)
+{
+	INIT_WORK(&e->execlist->fini_async, execlist_engine_fini_async);
+	queue_work(system_unbound_wq, &e->execlist->fini_async);
+}
+
+static int execlist_engine_set_priority(struct xe_engine *e,
+					enum xe_engine_priority priority)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_engine_set_preempt_timeout(struct xe_engine *e,
+					       u32 preempt_timeout_us)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_engine_set_job_timeout(struct xe_engine *e,
+					   u32 job_timeout_ms)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_engine_suspend(struct xe_engine *e)
+{
+	/* NIY */
+	return 0;
+}
+
+static void execlist_engine_suspend_wait(struct xe_engine *e)
+
+{
+	/* NIY */
+}
+
+static void execlist_engine_resume(struct xe_engine *e)
+{
+	xe_mocs_init_engine(e);
+}
+
+static const struct xe_engine_ops execlist_engine_ops = {
+	.init = execlist_engine_init,
+	.kill = execlist_engine_kill,
+	.fini = execlist_engine_fini,
+	.set_priority = execlist_engine_set_priority,
+	.set_timeslice = execlist_engine_set_timeslice,
+	.set_preempt_timeout = execlist_engine_set_preempt_timeout,
+	.set_job_timeout = execlist_engine_set_job_timeout,
+	.suspend = execlist_engine_suspend,
+	.suspend_wait = execlist_engine_suspend_wait,
+	.resume = execlist_engine_resume,
+};
+
+int xe_execlist_init(struct xe_gt *gt)
+{
+	/* GuC submission enabled, nothing to do */
+	if (xe_device_guc_submission_enabled(gt_to_xe(gt)))
+		return 0;
+
+	gt->engine_ops = &execlist_engine_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_execlist.h b/drivers/gpu/drm/xe/xe_execlist.h
new file mode 100644
index 000000000000..6a0442a6eff6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_EXECLIST_H_
+#define _XE_EXECLIST_H_
+
+#include "xe_execlist_types.h"
+
+struct xe_device;
+struct xe_gt;
+
+#define xe_execlist_port_assert_held(port) lockdep_assert_held(&(port)->lock);
+
+int xe_execlist_init(struct xe_gt *gt);
+struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
+						 struct xe_hw_engine *hwe);
+void xe_execlist_port_destroy(struct xe_execlist_port *port);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h
new file mode 100644
index 000000000000..9b1239b47292
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist_types.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_EXECLIST_TYPES_H_
+#define _XE_EXECLIST_TYPES_H_
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "xe_engine_types.h"
+
+struct xe_hw_engine;
+struct xe_execlist_engine;
+
+struct xe_execlist_port {
+	struct xe_hw_engine *hwe;
+
+	spinlock_t lock;
+
+	struct list_head active[XE_ENGINE_PRIORITY_COUNT];
+
+	u32 last_ctx_id;
+
+	struct xe_execlist_engine *running_exl;
+
+	struct timer_list irq_fail;
+};
+
+struct xe_execlist_engine {
+	struct xe_engine *engine;
+
+	struct drm_gpu_scheduler sched;
+
+	struct drm_sched_entity entity;
+
+	struct xe_execlist_port *port;
+
+	bool has_run;
+
+	struct work_struct fini_async;
+
+	enum xe_engine_priority active_priority;
+	struct list_head active_link;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
new file mode 100644
index 000000000000..0320ce7ba3d1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_util.h>
+
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+#include "gt/intel_gt_regs.h"
+
+#define XE_FORCE_WAKE_ACK_TIMEOUT_MS	50
+
+static struct xe_gt *
+fw_to_gt(struct xe_force_wake *fw)
+{
+	return fw->gt;
+}
+
+static struct xe_device *
+fw_to_xe(struct xe_force_wake *fw)
+{
+	return gt_to_xe(fw_to_gt(fw));
+}
+
+static void domain_init(struct xe_force_wake_domain *domain,
+			enum xe_force_wake_domain_id id,
+			u32 reg, u32 ack, u32 val, u32 mask)
+{
+	domain->id = id;
+	domain->reg_ctl = reg;
+	domain->reg_ack = ack;
+	domain->val = val;
+	domain->mask = mask;
+}
+
+#define FORCEWAKE_ACK_GT_MTL                 _MMIO(0xdfc)
+
+void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	fw->gt = gt;
+	mutex_init(&fw->lock);
+
+	/* Assuming gen11+ so assert this assumption is correct */
+	XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+
+	if (xe->info.platform == XE_METEORLAKE) {
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
+			    XE_FW_DOMAIN_ID_GT,
+			    FORCEWAKE_GT_GEN9.reg,
+			    FORCEWAKE_ACK_GT_MTL.reg,
+			    BIT(0), BIT(16));
+	} else {
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
+			    XE_FW_DOMAIN_ID_GT,
+			    FORCEWAKE_GT_GEN9.reg,
+			    FORCEWAKE_ACK_GT_GEN9.reg,
+			    BIT(0), BIT(16));
+	}
+}
+
+void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
+{
+	int i, j;
+
+	/* Assuming gen11+ so assert this assumption is correct */
+	XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+
+	if (!xe_gt_is_media_type(gt))
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER],
+			    XE_FW_DOMAIN_ID_RENDER,
+			    FORCEWAKE_RENDER_GEN9.reg,
+			    FORCEWAKE_ACK_RENDER_GEN9.reg,
+			    BIT(0), BIT(16));
+
+	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j],
+			    XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j,
+			    FORCEWAKE_MEDIA_VDBOX_GEN11(j).reg,
+			    FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(j).reg,
+			    BIT(0), BIT(16));
+	}
+
+	for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j],
+			    XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j,
+			    FORCEWAKE_MEDIA_VEBOX_GEN11(j).reg,
+			    FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(j).reg,
+			    BIT(0), BIT(16));
+	}
+}
+
+void xe_force_wake_prune(struct xe_gt *gt, struct xe_force_wake *fw)
+{
+	int i, j;
+
+	/* Call after fuses have been read, prune domains that are fused off */
+
+	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j)
+		if (!(gt->info.engine_mask & BIT(i)))
+			fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j].reg_ctl = 0;
+
+	for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j)
+		if (!(gt->info.engine_mask & BIT(i)))
+			fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j].reg_ctl = 0;
+}
+
+static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain)
+{
+	xe_mmio_write32(gt, domain->reg_ctl, domain->mask | domain->val);
+}
+
+static int domain_wake_wait(struct xe_gt *gt,
+			    struct xe_force_wake_domain *domain)
+{
+	return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val,
+			      XE_FORCE_WAKE_ACK_TIMEOUT_MS);
+}
+
+static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain)
+{
+	xe_mmio_write32(gt, domain->reg_ctl, domain->mask);
+}
+
+static int domain_sleep_wait(struct xe_gt *gt,
+			     struct xe_force_wake_domain *domain)
+{
+	return xe_mmio_wait32(gt, domain->reg_ack, 0, domain->val,
+			      XE_FORCE_WAKE_ACK_TIMEOUT_MS);
+}
+
+#define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \
+	for (tmp__ = (mask__); tmp__ ;) \
+		for_each_if((domain__ = ((fw__)->domains + \
+					 __mask_next_bit(tmp__))) && \
+					 domain__->reg_ctl)
+
+int xe_force_wake_get(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains)
+{
+	struct xe_device *xe = fw_to_xe(fw);
+	struct xe_gt *gt = fw_to_gt(fw);
+	struct xe_force_wake_domain *domain;
+	enum xe_force_wake_domains tmp, woken = 0;
+	int ret, ret2 = 0;
+
+	mutex_lock(&fw->lock);
+	for_each_fw_domain_masked(domain, domains, fw, tmp) {
+		if (!domain->ref++) {
+			woken |= BIT(domain->id);
+			domain_wake(gt, domain);
+		}
+	}
+	for_each_fw_domain_masked(domain, woken, fw, tmp) {
+		ret = domain_wake_wait(gt, domain);
+		ret2 |= ret;
+		if (ret)
+			drm_notice(&xe->drm, "Force wake domain (%d) failed to ack wake, ret=%d\n",
+				   domain->id, ret);
+	}
+	fw->awake_domains |= woken;
+	mutex_unlock(&fw->lock);
+
+	return ret2;
+}
+
+int xe_force_wake_put(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains)
+{
+	struct xe_device *xe = fw_to_xe(fw);
+	struct xe_gt *gt = fw_to_gt(fw);
+	struct xe_force_wake_domain *domain;
+	enum xe_force_wake_domains tmp, sleep = 0;
+	int ret, ret2 = 0;
+
+	mutex_lock(&fw->lock);
+	for_each_fw_domain_masked(domain, domains, fw, tmp) {
+		if (!--domain->ref) {
+			sleep |= BIT(domain->id);
+			domain_sleep(gt, domain);
+		}
+	}
+	for_each_fw_domain_masked(domain, sleep, fw, tmp) {
+		ret = domain_sleep_wait(gt, domain);
+		ret2 |= ret;
+		if (ret)
+			drm_notice(&xe->drm, "Force wake domain (%d) failed to ack sleep, ret=%d\n",
+				   domain->id, ret);
+	}
+	fw->awake_domains &= ~sleep;
+	mutex_unlock(&fw->lock);
+
+	return ret2;
+}
diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h
new file mode 100644
index 000000000000..5adb8daa3b71
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_FORCE_WAKE_H_
+#define _XE_FORCE_WAKE_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_macros.h"
+
+struct xe_gt;
+
+void xe_force_wake_init_gt(struct xe_gt *gt,
+			   struct xe_force_wake *fw);
+void xe_force_wake_init_engines(struct xe_gt *gt,
+				struct xe_force_wake *fw);
+void xe_force_wake_prune(struct  xe_gt *gt,
+			 struct xe_force_wake *fw);
+int xe_force_wake_get(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains);
+int xe_force_wake_put(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains);
+
+static inline int
+xe_force_wake_ref(struct xe_force_wake *fw,
+		  enum xe_force_wake_domains domain)
+{
+	XE_BUG_ON(!domain);
+	return fw->domains[ffs(domain) - 1].ref;
+}
+
+static inline void
+xe_force_wake_assert_held(struct xe_force_wake *fw,
+			  enum xe_force_wake_domains domain)
+{
+	XE_BUG_ON(!(fw->awake_domains & domain));
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h
new file mode 100644
index 000000000000..208dd629d7b1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake_types.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_FORCE_WAKE_TYPES_H_
+#define _XE_FORCE_WAKE_TYPES_H_
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+enum xe_force_wake_domain_id {
+	XE_FW_DOMAIN_ID_GT = 0,
+	XE_FW_DOMAIN_ID_RENDER,
+	XE_FW_DOMAIN_ID_MEDIA,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX0,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX1,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX2,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX3,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX4,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX5,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX6,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX7,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX0,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX1,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX2,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX3,
+	XE_FW_DOMAIN_ID_GSC,
+	XE_FW_DOMAIN_ID_COUNT
+};
+
+enum xe_force_wake_domains {
+	XE_FW_GT		= BIT(XE_FW_DOMAIN_ID_GT),
+	XE_FW_RENDER		= BIT(XE_FW_DOMAIN_ID_RENDER),
+	XE_FW_MEDIA		= BIT(XE_FW_DOMAIN_ID_MEDIA),
+	XE_FW_MEDIA_VDBOX0	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX0),
+	XE_FW_MEDIA_VDBOX1	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX1),
+	XE_FW_MEDIA_VDBOX2	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX2),
+	XE_FW_MEDIA_VDBOX3	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX3),
+	XE_FW_MEDIA_VDBOX4	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX4),
+	XE_FW_MEDIA_VDBOX5	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX5),
+	XE_FW_MEDIA_VDBOX6	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX6),
+	XE_FW_MEDIA_VDBOX7	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX7),
+	XE_FW_MEDIA_VEBOX0	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX0),
+	XE_FW_MEDIA_VEBOX1	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX1),
+	XE_FW_MEDIA_VEBOX2	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2),
+	XE_FW_MEDIA_VEBOX3	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3),
+	XE_FW_GSC		= BIT(XE_FW_DOMAIN_ID_GSC),
+	XE_FORCEWAKE_ALL	= BIT(XE_FW_DOMAIN_ID_COUNT) - 1
+};
+
+/**
+ * struct xe_force_wake_domain - XE force wake domains
+ */
+struct xe_force_wake_domain {
+	/** @id: domain force wake id */
+	enum xe_force_wake_domain_id id;
+	/** @reg_ctl: domain wake control register address */
+	u32 reg_ctl;
+	/** @reg_ack: domain ack register address */
+	u32 reg_ack;
+	/** @val: domain wake write value */
+	u32 val;
+	/** @mask: domain mask */
+	u32 mask;
+	/** @ref: domain reference */
+	u32 ref;
+};
+
+/**
+ * struct xe_force_wake - XE force wake
+ */
+struct xe_force_wake {
+	/** @gt: back pointers to GT */
+	struct xe_gt *gt;
+	/** @lock: protects everything force wake struct */
+	struct mutex lock;
+	/** @awake_domains: mask of all domains awake */
+	enum xe_force_wake_domains awake_domains;
+	/** @domains: force wake domains */
+	struct xe_force_wake_domain domains[XE_FW_DOMAIN_ID_COUNT];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
new file mode 100644
index 000000000000..eab74a509f68
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_ggtt.h"
+
+#include <linux/sizes.h>
+#include <drm/i915_drm.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+#include "xe_wopcm.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gt_regs.h"
+
+/* FIXME: Common file, preferably auto-gen */
+#define MTL_GGTT_PTE_PAT0	BIT(52)
+#define MTL_GGTT_PTE_PAT1	BIT(53)
+
+u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	u64 pte;
+	bool is_lmem;
+
+	pte = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem);
+	pte |= GEN8_PAGE_PRESENT;
+
+	if (is_lmem)
+		pte |= GEN12_GGTT_PTE_LM;
+
+	/* FIXME: vfunc + pass in caching rules */
+	if (xe->info.platform == XE_METEORLAKE) {
+		pte |= MTL_GGTT_PTE_PAT0;
+		pte |= MTL_GGTT_PTE_PAT1;
+	}
+
+	return pte;
+}
+
+static unsigned int probe_gsm_size(struct pci_dev *pdev)
+{
+	u16 gmch_ctl, ggms;
+
+	pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl);
+	ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK;
+	return ggms ? SZ_1M << ggms : 0;
+}
+
+void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
+{
+	XE_BUG_ON(addr & GEN8_PTE_MASK);
+	XE_BUG_ON(addr >= ggtt->size);
+
+	writeq(pte, &ggtt->gsm[addr >> GEN8_PTE_SHIFT]);
+}
+
+static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
+{
+	u64 end = start + size - 1;
+	u64 scratch_pte;
+
+	XE_BUG_ON(start >= end);
+
+	if (ggtt->scratch)
+		scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0);
+	else
+		scratch_pte = 0;
+
+	while (start < end) {
+		xe_ggtt_set_pte(ggtt, start, scratch_pte);
+		start += GEN8_PAGE_SIZE;
+	}
+}
+
+static void ggtt_fini_noalloc(struct drm_device *drm, void *arg)
+{
+	struct xe_ggtt *ggtt = arg;
+
+	mutex_destroy(&ggtt->lock);
+	drm_mm_takedown(&ggtt->mm);
+
+	xe_bo_unpin_map_no_vm(ggtt->scratch);
+}
+
+int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	unsigned int gsm_size;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	ggtt->gt = gt;
+
+	gsm_size = probe_gsm_size(pdev);
+	if (gsm_size == 0) {
+		drm_err(&xe->drm, "Hardware reported no preallocated GSM\n");
+		return -ENOMEM;
+	}
+
+	ggtt->gsm = gt->mmio.regs + SZ_8M;
+	ggtt->size = (gsm_size / 8) * (u64)GEN8_PAGE_SIZE;
+
+	/*
+	 * 8B per entry, each points to a 4KB page.
+	 *
+	 * The GuC owns the WOPCM space, thus we can't allocate GGTT address in
+	 * this area. Even though we likely configure the WOPCM to less than the
+	 * maximum value, to simplify the driver load (no need to fetch HuC +
+	 * GuC firmwares and determine there sizes before initializing the GGTT)
+	 * just start the GGTT allocation above the max WOPCM size. This might
+	 * waste space in the GGTT (WOPCM is 2MB on modern platforms) but we can
+	 * live with this.
+	 *
+	 * Another benifit of this is the GuC bootrom can't access anything
+	 * below the WOPCM max size so anything the bootom needs to access (e.g.
+	 * a RSA key) needs to be placed in the GGTT above the WOPCM max size.
+	 * Starting the GGTT allocations above the WOPCM max give us the correct
+	 * placement for free.
+	 */
+	drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
+		    ggtt->size - xe_wopcm_size(xe));
+	mutex_init(&ggtt->lock);
+
+	return drmm_add_action_or_reset(&xe->drm, ggtt_fini_noalloc, ggtt);
+}
+
+static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
+{
+	struct drm_mm_node *hole;
+	u64 start, end;
+
+	/* Display may have allocated inside ggtt, so be careful with clearing here */
+	mutex_lock(&ggtt->lock);
+	drm_mm_for_each_hole(hole, &ggtt->mm, start, end)
+		xe_ggtt_clear(ggtt, start, end - start);
+
+	xe_ggtt_invalidate(ggtt->gt);
+	mutex_unlock(&ggtt->lock);
+}
+
+int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	ggtt->scratch = xe_bo_create_locked(xe, gt, NULL, GEN8_PAGE_SIZE,
+					    ttm_bo_type_kernel,
+					    XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+					    XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(ggtt->scratch)) {
+		err = PTR_ERR(ggtt->scratch);
+		goto err;
+	}
+
+	err = xe_bo_pin(ggtt->scratch);
+	xe_bo_unlock_no_vm(ggtt->scratch);
+	if (err) {
+		xe_bo_put(ggtt->scratch);
+		goto err;
+	}
+
+	xe_ggtt_initial_clear(ggtt);
+	return 0;
+err:
+	ggtt->scratch = NULL;
+	return err;
+}
+
+#define GEN12_GUC_TLB_INV_CR                     _MMIO(0xcee8)
+#define   GEN12_GUC_TLB_INV_CR_INVALIDATE        (1 << 0)
+#define PVC_GUC_TLB_INV_DESC0			_MMIO(0xcf7c)
+#define   PVC_GUC_TLB_INV_DESC0_VALID		 (1 << 0)
+#define PVC_GUC_TLB_INV_DESC1			_MMIO(0xcf80)
+#define   PVC_GUC_TLB_INV_DESC1_INVALIDATE	 (1 << 6)
+
+void xe_ggtt_invalidate(struct xe_gt *gt)
+{
+	/* TODO: vfunc for GuC vs. non-GuC */
+
+	/* TODO: i915 makes comments about this being uncached and
+	 * therefore flushing WC buffers.  Is that really true here?
+	 */
+	xe_mmio_write32(gt, GFX_FLSH_CNTL_GEN6.reg, GFX_FLSH_CNTL_EN);
+	if (xe_device_guc_submission_enabled(gt_to_xe(gt))) {
+		struct xe_device *xe = gt_to_xe(gt);
+
+		/* TODO: also use vfunc here */
+		if (xe->info.platform == XE_PVC) {
+			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1.reg,
+					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
+			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0.reg,
+					PVC_GUC_TLB_INV_DESC0_VALID);
+		} else
+			xe_mmio_write32(gt, GEN12_GUC_TLB_INV_CR.reg,
+					GEN12_GUC_TLB_INV_CR_INVALIDATE);
+	}
+}
+
+void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
+{
+	u64 addr, scratch_pte;
+
+	scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0);
+
+	printk("%sGlobal GTT:", prefix);
+	for (addr = 0; addr < ggtt->size; addr += GEN8_PAGE_SIZE) {
+		unsigned int i = addr / GEN8_PAGE_SIZE;
+
+		XE_BUG_ON(addr > U32_MAX);
+		if (ggtt->gsm[i] == scratch_pte)
+			continue;
+
+		printk("%s    ggtt[0x%08x] = 0x%016llx",
+		       prefix, (u32)addr, ggtt->gsm[i]);
+	}
+}
+
+int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+				       u32 size, u32 align, u32 mm_flags)
+{
+	return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0,
+					  mm_flags);
+}
+
+int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+				u32 size, u32 align)
+{
+	int ret;
+
+	mutex_lock(&ggtt->lock);
+	ret = xe_ggtt_insert_special_node_locked(ggtt, node, size,
+						 align, DRM_MM_INSERT_HIGH);
+	mutex_unlock(&ggtt->lock);
+
+	return ret;
+}
+
+void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+	u64 start = bo->ggtt_node.start;
+	u64 offset, pte;
+
+	for (offset = 0; offset < bo->size; offset += GEN8_PAGE_SIZE) {
+		pte = xe_ggtt_pte_encode(bo, offset);
+		xe_ggtt_set_pte(ggtt, start + offset, pte);
+	}
+
+	xe_ggtt_invalidate(ggtt->gt);
+}
+
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+	int err;
+
+	if (XE_WARN_ON(bo->ggtt_node.size)) {
+		/* Someone's already inserted this BO in the GGTT */
+		XE_BUG_ON(bo->ggtt_node.size != bo->size);
+		return 0;
+	}
+
+	err = xe_bo_validate(bo, NULL, false);
+	if (err)
+		return err;
+
+	mutex_lock(&ggtt->lock);
+	err = drm_mm_insert_node(&ggtt->mm, &bo->ggtt_node, bo->size);
+	if (!err)
+		xe_ggtt_map_bo(ggtt, bo);
+	mutex_unlock(&ggtt->lock);
+
+	return 0;
+}
+
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
+{
+	mutex_lock(&ggtt->lock);
+
+	xe_ggtt_clear(ggtt, node->start, node->size);
+	drm_mm_remove_node(node);
+	node->size = 0;
+
+	xe_ggtt_invalidate(ggtt->gt);
+
+	mutex_unlock(&ggtt->lock);
+}
+
+void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+	if (XE_WARN_ON(!bo->ggtt_node.size))
+		return;
+
+	/* This BO is not currently in the GGTT */
+	XE_BUG_ON(bo->ggtt_node.size != bo->size);
+
+	xe_ggtt_remove_node(ggtt, &bo->ggtt_node);
+}
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
new file mode 100644
index 000000000000..289c6852ad1a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_GGTT_H_
+#define _XE_GGTT_H_
+
+#include "xe_ggtt_types.h"
+
+u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset);
+void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte);
+void xe_ggtt_invalidate(struct xe_gt *gt);
+int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt);
+int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt);
+void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
+
+int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+				u32 size, u32 align);
+int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt,
+				       struct drm_mm_node *node,
+				       u32 size, u32 align, u32 mm_flags);
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node);
+void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
new file mode 100644
index 000000000000..e04193001763
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GGTT_TYPES_H_
+#define _XE_GGTT_TYPES_H_
+
+#include <drm/drm_mm.h>
+
+struct xe_bo;
+struct xe_gt;
+
+struct xe_ggtt {
+	struct xe_gt *gt;
+
+	u64 size;
+
+	struct xe_bo *scratch;
+
+	struct mutex lock;
+
+	u64 __iomem *gsm;
+
+	struct drm_mm mm;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
new file mode 100644
index 000000000000..e4ad1d6ce1d5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gpu_scheduler.h"
+
+static void xe_sched_process_msg_queue(struct xe_gpu_scheduler *sched)
+{
+	if (!READ_ONCE(sched->base.pause_submit))
+		queue_work(sched->base.submit_wq, &sched->work_process_msg);
+}
+
+static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched)
+{
+	struct xe_sched_msg *msg;
+
+	spin_lock(&sched->base.job_list_lock);
+	msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link);
+	if (msg)
+		xe_sched_process_msg_queue(sched);
+	spin_unlock(&sched->base.job_list_lock);
+}
+
+static struct xe_sched_msg *
+xe_sched_get_msg(struct xe_gpu_scheduler *sched)
+{
+	struct xe_sched_msg *msg;
+
+	spin_lock(&sched->base.job_list_lock);
+	msg = list_first_entry_or_null(&sched->msgs,
+				       struct xe_sched_msg, link);
+	if (msg)
+		list_del(&msg->link);
+	spin_unlock(&sched->base.job_list_lock);
+
+	return msg;
+}
+
+static void xe_sched_process_msg_work(struct work_struct *w)
+{
+	struct xe_gpu_scheduler *sched =
+		container_of(w, struct xe_gpu_scheduler, work_process_msg);
+	struct xe_sched_msg *msg;
+
+	if (READ_ONCE(sched->base.pause_submit))
+		return;
+
+	msg = xe_sched_get_msg(sched);
+	if (msg) {
+		sched->ops->process_msg(msg);
+
+		xe_sched_process_msg_queue_if_ready(sched);
+	}
+}
+
+int xe_sched_init(struct xe_gpu_scheduler *sched,
+		  const struct drm_sched_backend_ops *ops,
+		  const struct xe_sched_backend_ops *xe_ops,
+		  struct workqueue_struct *submit_wq,
+		  uint32_t hw_submission, unsigned hang_limit,
+		  long timeout, struct workqueue_struct *timeout_wq,
+		  atomic_t *score, const char *name,
+		  struct device *dev)
+{
+	sched->ops = xe_ops;
+	INIT_LIST_HEAD(&sched->msgs);
+	INIT_WORK(&sched->work_process_msg, xe_sched_process_msg_work);
+
+	return drm_sched_init(&sched->base, ops, submit_wq, 1, hw_submission,
+			      hang_limit, timeout, timeout_wq, score, name,
+			      dev);
+}
+
+void xe_sched_fini(struct xe_gpu_scheduler *sched)
+{
+	xe_sched_submission_stop(sched);
+	drm_sched_fini(&sched->base);
+}
+
+void xe_sched_submission_start(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_wqueue_start(&sched->base);
+	queue_work(sched->base.submit_wq, &sched->work_process_msg);
+}
+
+void xe_sched_submission_stop(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_wqueue_stop(&sched->base);
+	cancel_work_sync(&sched->work_process_msg);
+}
+
+void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
+		      struct xe_sched_msg *msg)
+{
+	spin_lock(&sched->base.job_list_lock);
+	list_add_tail(&msg->link, &sched->msgs);
+	spin_unlock(&sched->base.job_list_lock);
+
+	xe_sched_process_msg_queue(sched);
+}
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
new file mode 100644
index 000000000000..10c6bb9c9386
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GPU_SCHEDULER_H_
+#define _XE_GPU_SCHEDULER_H_
+
+#include "xe_gpu_scheduler_types.h"
+#include "xe_sched_job_types.h"
+
+int xe_sched_init(struct xe_gpu_scheduler *sched,
+		  const struct drm_sched_backend_ops *ops,
+		  const struct xe_sched_backend_ops *xe_ops,
+		  struct workqueue_struct *submit_wq,
+		  uint32_t hw_submission, unsigned hang_limit,
+		  long timeout, struct workqueue_struct *timeout_wq,
+		  atomic_t *score, const char *name,
+		  struct device *dev);
+void xe_sched_fini(struct xe_gpu_scheduler *sched);
+
+void xe_sched_submission_start(struct xe_gpu_scheduler *sched);
+void xe_sched_submission_stop(struct xe_gpu_scheduler *sched);
+
+void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
+		      struct xe_sched_msg *msg);
+
+static inline void xe_sched_stop(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_stop(&sched->base, NULL);
+}
+
+static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_tdr_queue_imm(&sched->base);
+}
+
+static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_resubmit_jobs(&sched->base);
+}
+
+static inline bool
+xe_sched_invalidate_job(struct xe_sched_job *job, int threshold)
+{
+	return drm_sched_invalidate_job(&job->drm, threshold);
+}
+
+static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched,
+					    struct xe_sched_job *job)
+{
+	list_add(&job->drm.list, &sched->base.pending_list);
+}
+
+static inline
+struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched)
+{
+	return list_first_entry_or_null(&sched->base.pending_list,
+					struct xe_sched_job, drm.list);
+}
+
+static inline int
+xe_sched_entity_init(struct xe_sched_entity *entity,
+		     struct xe_gpu_scheduler *sched)
+{
+	return drm_sched_entity_init(entity, 0,
+				     (struct drm_gpu_scheduler **)&sched,
+				     1, NULL);
+}
+
+#define xe_sched_entity_fini drm_sched_entity_fini
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h
new file mode 100644
index 000000000000..6731b13da8bb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GPU_SCHEDULER_TYPES_H_
+#define _XE_GPU_SCHEDULER_TYPES_H_
+
+#include <drm/gpu_scheduler.h>
+
+/**
+ * struct xe_sched_msg - an in-band (relative to GPU scheduler run queue)
+ * message
+ *
+ * Generic enough for backend defined messages, backend can expand if needed.
+ */
+struct xe_sched_msg {
+	/** @link: list link into the gpu scheduler list of messages */
+	struct list_head		link;
+	/**
+	 * @private_data: opaque pointer to message private data (backend defined)
+	 */
+	void				*private_data;
+	/** @opcode: opcode of message (backend defined) */
+	unsigned int			opcode;
+};
+
+/**
+ * struct xe_sched_backend_ops - Define the backend operations called by the
+ * scheduler
+ */
+struct xe_sched_backend_ops {
+	/**
+	 * @process_msg: Process a message. Allowed to block, it is this
+	 * function's responsibility to free message if dynamically allocated.
+	 */
+	void (*process_msg)(struct xe_sched_msg *msg);
+};
+
+/**
+ * struct xe_gpu_scheduler - Xe GPU scheduler
+ */
+struct xe_gpu_scheduler {
+	/** @base: DRM GPU scheduler */
+	struct drm_gpu_scheduler		base;
+	/** @ops: Xe scheduler ops */
+	const struct xe_sched_backend_ops	*ops;
+	/** @msgs: list of messages to be processed in @work_process_msg */
+	struct list_head			msgs;
+	/** @work_process_msg: processes messages */
+	struct work_struct		work_process_msg;
+};
+
+#define xe_sched_entity		drm_sched_entity
+#define xe_sched_policy		drm_sched_policy
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
new file mode 100644
index 000000000000..5f8fa9d98d5a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -0,0 +1,830 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/minmax.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_execlist.h"
+#include "xe_force_wake.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_gt_clock.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_sysfs.h"
+#include "xe_gt_topology.h"
+#include "xe_hw_fence.h"
+#include "xe_irq.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_migrate.h"
+#include "xe_mmio.h"
+#include "xe_mocs.h"
+#include "xe_reg_sr.h"
+#include "xe_ring_ops.h"
+#include "xe_sa.h"
+#include "xe_sched_job.h"
+#include "xe_ttm_gtt_mgr.h"
+#include "xe_ttm_vram_mgr.h"
+#include "xe_tuning.h"
+#include "xe_uc.h"
+#include "xe_vm.h"
+#include "xe_wa.h"
+#include "xe_wopcm.h"
+
+#include "gt/intel_gt_regs.h"
+
+struct xe_gt *xe_find_full_gt(struct xe_gt *gt)
+{
+	struct xe_gt *search;
+	u8 id;
+
+	XE_BUG_ON(!xe_gt_is_media_type(gt));
+
+	for_each_gt(search, gt_to_xe(gt), id) {
+		if (search->info.vram_id == gt->info.vram_id)
+			return search;
+	}
+
+	XE_BUG_ON("NOT POSSIBLE");
+	return NULL;
+}
+
+int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt)
+{
+	struct drm_device *drm = &xe->drm;
+
+	XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED);
+
+	if (!xe_gt_is_media_type(gt)) {
+		gt->mem.ggtt = drmm_kzalloc(drm, sizeof(*gt->mem.ggtt),
+					    GFP_KERNEL);
+		if (!gt->mem.ggtt)
+			return -ENOMEM;
+
+		gt->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.vram_mgr),
+						GFP_KERNEL);
+		if (!gt->mem.vram_mgr)
+			return -ENOMEM;
+
+		gt->mem.gtt_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.gtt_mgr),
+					       GFP_KERNEL);
+		if (!gt->mem.gtt_mgr)
+			return -ENOMEM;
+	} else {
+		struct xe_gt *full_gt = xe_find_full_gt(gt);
+
+		gt->mem.ggtt = full_gt->mem.ggtt;
+		gt->mem.vram_mgr = full_gt->mem.vram_mgr;
+		gt->mem.gtt_mgr = full_gt->mem.gtt_mgr;
+	}
+
+	gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
+
+	return 0;
+}
+
+/* FIXME: These should be in a common file */
+#define CHV_PPAT_SNOOP			REG_BIT(6)
+#define GEN8_PPAT_AGE(x)		((x)<<4)
+#define GEN8_PPAT_LLCeLLC		(3<<2)
+#define GEN8_PPAT_LLCELLC		(2<<2)
+#define GEN8_PPAT_LLC			(1<<2)
+#define GEN8_PPAT_WB			(3<<0)
+#define GEN8_PPAT_WT			(2<<0)
+#define GEN8_PPAT_WC			(1<<0)
+#define GEN8_PPAT_UC			(0<<0)
+#define GEN8_PPAT_ELLC_OVERRIDE		(0<<2)
+#define GEN8_PPAT(i, x)			((u64)(x) << ((i) * 8))
+#define GEN12_PPAT_CLOS(x)              ((x)<<2)
+
+static void tgl_setup_private_ppat(struct xe_gt *gt)
+{
+	/* TGL doesn't support LLC or AGE settings */
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_UC);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, GEN8_PPAT_WB);
+}
+
+static void pvc_setup_private_ppat(struct xe_gt *gt)
+{
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_UC);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg,
+			GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg,
+			GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg,
+			GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg,
+			GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB);
+}
+
+#define MTL_PPAT_L4_CACHE_POLICY_MASK   REG_GENMASK(3, 2)
+#define MTL_PAT_INDEX_COH_MODE_MASK     REG_GENMASK(1, 0)
+#define MTL_PPAT_3_UC   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
+#define MTL_PPAT_1_WT   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
+#define MTL_PPAT_0_WB   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
+#define MTL_3_COH_2W    REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
+#define MTL_2_COH_1W    REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
+#define MTL_0_COH_NON   REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
+
+static void mtl_setup_private_ppat(struct xe_gt *gt)
+{
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, MTL_PPAT_0_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg,
+			MTL_PPAT_1_WT | MTL_2_COH_1W);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg,
+			MTL_PPAT_3_UC | MTL_2_COH_1W);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg,
+			MTL_PPAT_0_WB | MTL_2_COH_1W);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg,
+			MTL_PPAT_0_WB | MTL_3_COH_2W);
+}
+
+static void setup_private_ppat(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (xe->info.platform == XE_METEORLAKE)
+		mtl_setup_private_ppat(gt);
+	else if (xe->info.platform == XE_PVC)
+		pvc_setup_private_ppat(gt);
+	else
+		tgl_setup_private_ppat(gt);
+}
+
+static int gt_ttm_mgr_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+	struct sysinfo si;
+	u64 gtt_size;
+
+	si_meminfo(&si);
+	gtt_size = (u64)si.totalram * si.mem_unit * 3/4;
+
+	if (gt->mem.vram.size) {
+		err = xe_ttm_vram_mgr_init(gt, gt->mem.vram_mgr);
+		if (err)
+			return err;
+		gtt_size = min(max((XE_DEFAULT_GTT_SIZE_MB << 20),
+				   gt->mem.vram.size),
+			       gtt_size);
+		xe->info.mem_region_mask |= BIT(gt->info.vram_id) << 1;
+	}
+
+	err = xe_ttm_gtt_mgr_init(gt, gt->mem.gtt_mgr, gtt_size);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void gt_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+	int i;
+
+	destroy_workqueue(gt->ordered_wq);
+
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+}
+
+static void gt_reset_worker(struct work_struct *w);
+
+int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
+{
+	struct xe_sched_job *job;
+	struct xe_bb *bb;
+	struct dma_fence *fence;
+	u64 batch_ofs;
+	long timeout;
+
+	bb = xe_bb_new(gt, 4, false);
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo);
+	job = xe_bb_create_wa_job(e, bb, batch_ofs);
+	if (IS_ERR(job)) {
+		xe_bb_free(bb, NULL);
+		return PTR_ERR(bb);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	xe_bb_free(bb, NULL);
+	if (timeout < 0)
+		return timeout;
+	else if (!timeout)
+		return -ETIME;
+
+	return 0;
+}
+
+int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
+{
+	struct xe_reg_sr *sr = &e->hwe->reg_lrc;
+	struct xe_reg_sr_entry *entry;
+	unsigned long reg;
+	struct xe_sched_job *job;
+	struct xe_bb *bb;
+	struct dma_fence *fence;
+	u64 batch_ofs;
+	long timeout;
+	int count = 0;
+
+	bb = xe_bb_new(gt, SZ_4K, false);	/* Just pick a large BB size */
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	xa_for_each(&sr->xa, reg, entry)
+		++count;
+
+	if (count) {
+		bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM(count);
+		xa_for_each(&sr->xa, reg, entry) {
+			bb->cs[bb->len++] = reg;
+			bb->cs[bb->len++] = entry->set_bits;
+		}
+	}
+	bb->cs[bb->len++] = MI_NOOP;
+	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+
+	batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo);
+	job = xe_bb_create_wa_job(e, bb, batch_ofs);
+	if (IS_ERR(job)) {
+		xe_bb_free(bb, NULL);
+		return PTR_ERR(bb);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	xe_bb_free(bb, NULL);
+	if (timeout < 0)
+		return timeout;
+	else if (!timeout)
+		return -ETIME;
+
+	return 0;
+}
+
+int xe_gt_record_default_lrcs(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	int err = 0;
+
+	for_each_hw_engine(hwe, gt, id) {
+		struct xe_engine *e, *nop_e;
+		struct xe_vm *vm;
+		void *default_lrc;
+
+		if (gt->default_lrc[hwe->class])
+			continue;
+
+		xe_reg_sr_init(&hwe->reg_lrc, "LRC", xe);
+		xe_wa_process_lrc(hwe);
+
+		default_lrc = drmm_kzalloc(&xe->drm,
+					   xe_lrc_size(xe, hwe->class),
+					   GFP_KERNEL);
+		if (!default_lrc)
+			return -ENOMEM;
+
+		vm = xe_migrate_get_vm(gt->migrate);
+		e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), 1,
+				     hwe, ENGINE_FLAG_WA);
+		if (IS_ERR(e)) {
+			err = PTR_ERR(e);
+			goto put_vm;
+		}
+
+		/* Prime golden LRC with known good state */
+		err = emit_wa_job(gt, e);
+		if (err)
+			goto put_engine;
+
+		nop_e = xe_engine_create(xe, vm, BIT(hwe->logical_instance),
+					 1, hwe, ENGINE_FLAG_WA);
+		if (IS_ERR(nop_e)) {
+			err = PTR_ERR(nop_e);
+			goto put_engine;
+		}
+
+		/* Switch to different LRC */
+		err = emit_nop_job(gt, nop_e);
+		if (err)
+			goto put_nop_e;
+
+		/* Reload golden LRC to record the effect of any indirect W/A */
+		err = emit_nop_job(gt, e);
+		if (err)
+			goto put_nop_e;
+
+		xe_map_memcpy_from(xe, default_lrc,
+				   &e->lrc[0].bo->vmap,
+				   xe_lrc_pphwsp_offset(&e->lrc[0]),
+				   xe_lrc_size(xe, hwe->class));
+
+		gt->default_lrc[hwe->class] = default_lrc;
+put_nop_e:
+		xe_engine_put(nop_e);
+put_engine:
+		xe_engine_put(e);
+put_vm:
+		xe_vm_put(vm);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+int xe_gt_init_early(struct xe_gt *gt)
+{
+	int err;
+
+	xe_force_wake_init_gt(gt, gt_to_fw(gt));
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	xe_gt_topology_init(gt);
+	xe_gt_mcr_init(gt);
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	xe_reg_sr_init(&gt->reg_sr, "GT", gt_to_xe(gt));
+	xe_wa_process_gt(gt);
+	xe_tuning_process_gt(gt);
+
+	return 0;
+}
+
+/**
+ * xe_gt_init_noalloc - Init GT up to the point where allocations can happen.
+ * @gt: The GT to initialize.
+ *
+ * This function prepares the GT to allow memory allocations to VRAM, but is not
+ * allowed to allocate memory itself. This state is useful for display readout,
+ * because the inherited display framebuffer will otherwise be overwritten as it
+ * is usually put at the start of VRAM.
+ *
+ * Returns: 0 on success, negative error code on error.
+ */
+int xe_gt_init_noalloc(struct xe_gt *gt)
+{
+	int err, err2;
+
+	if (xe_gt_is_media_type(gt))
+		return 0;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err;
+
+	err = gt_ttm_mgr_init(gt);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_ggtt_init_noalloc(gt, gt->mem.ggtt);
+
+err_force_wake:
+	err2 = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	XE_WARN_ON(err2);
+	xe_device_mem_access_put(gt_to_xe(gt));
+err:
+	return err;
+}
+
+static int gt_fw_domain_init(struct xe_gt *gt)
+{
+	int err, i;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_hw_fence_irq;
+
+	if (!xe_gt_is_media_type(gt)) {
+		err = xe_ggtt_init(gt, gt->mem.ggtt);
+		if (err)
+			goto err_force_wake;
+	}
+
+	/* Allow driver to load if uC init fails (likely missing firmware) */
+	err = xe_uc_init(&gt->uc);
+	XE_WARN_ON(err);
+
+	err = xe_uc_init_hwconfig(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	/* Enables per hw engine IRQs */
+	xe_gt_irq_postinstall(gt);
+
+	/* Rerun MCR init as we now have hw engine list */
+	xe_gt_mcr_init(gt);
+
+	err = xe_hw_engines_init_early(gt);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	XE_WARN_ON(err);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return 0;
+
+err_force_wake:
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_hw_fence_irq:
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return err;
+}
+
+static int all_fw_domain_init(struct xe_gt *gt)
+{
+	int err, i;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_hw_fence_irq;
+
+	setup_private_ppat(gt);
+
+	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
+
+	err = xe_gt_clock_init(gt);
+	if (err)
+		goto err_force_wake;
+
+	xe_mocs_init(gt);
+	err = xe_execlist_init(gt);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_hw_engines_init(gt);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_uc_init_post_hwconfig(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	/*
+	 * FIXME: This should be ok as SA should only be used by gt->migrate and
+	 * vm->gt->migrate and both should be pointing to a non-media GT. But to
+	 * realy safe, convert gt->kernel_bb_pool to a pointer and point a media
+	 * GT to the kernel_bb_pool on a real tile.
+	 */
+	if (!xe_gt_is_media_type(gt)) {
+		err = xe_sa_bo_manager_init(gt, &gt->kernel_bb_pool, SZ_1M, 16);
+		if (err)
+			goto err_force_wake;
+
+		/*
+		 * USM has its only SA pool to non-block behind user operations
+		 */
+		if (gt_to_xe(gt)->info.supports_usm) {
+			err = xe_sa_bo_manager_init(gt, &gt->usm.bb_pool,
+						    SZ_1M, 16);
+			if (err)
+				goto err_force_wake;
+		}
+	}
+
+	if (!xe_gt_is_media_type(gt)) {
+		gt->migrate = xe_migrate_init(gt);
+		if (IS_ERR(gt->migrate))
+			goto err_force_wake;
+	} else {
+		gt->migrate = xe_find_full_gt(gt)->migrate;
+	}
+
+	err = xe_uc_init_hw(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	XE_WARN_ON(err);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return 0;
+
+err_force_wake:
+	xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+err_hw_fence_irq:
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return err;
+}
+
+int xe_gt_init(struct xe_gt *gt)
+{
+	int err;
+	int i;
+
+	INIT_WORK(&gt->reset.worker, gt_reset_worker);
+
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) {
+		gt->ring_ops[i] = xe_ring_ops_get(gt, i);
+		xe_hw_fence_irq_init(&gt->fence_irq[i]);
+	}
+
+	err = xe_gt_pagefault_init(gt);
+	if (err)
+		return err;
+
+	xe_gt_sysfs_init(gt);
+
+	err = gt_fw_domain_init(gt);
+	if (err)
+		return err;
+
+	xe_force_wake_init_engines(gt, gt_to_fw(gt));
+
+	err = all_fw_domain_init(gt);
+	if (err)
+		return err;
+
+	xe_force_wake_prune(gt, gt_to_fw(gt));
+
+	err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int do_gt_reset(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_FULL);
+	err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5);
+	if (err)
+		drm_err(&xe->drm,
+			"GT reset failed to clear GEN11_GRDOM_FULL\n");
+
+	return err;
+}
+
+static int do_gt_restart(struct xe_gt *gt)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	int err;
+
+	setup_private_ppat(gt);
+
+	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
+
+	err = xe_wopcm_init(&gt->uc.wopcm);
+	if (err)
+		return err;
+
+	for_each_hw_engine(hwe, gt, id)
+		xe_hw_engine_enable_ring(hwe);
+
+	err = xe_uc_init_hw(&gt->uc);
+	if (err)
+		return err;
+
+	xe_mocs_init(gt);
+	err = xe_uc_start(&gt->uc);
+	if (err)
+		return err;
+
+	for_each_hw_engine(hwe, gt, id) {
+		xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
+		xe_reg_sr_apply_whitelist(&hwe->reg_whitelist,
+					  hwe->mmio_base, gt);
+	}
+
+	return 0;
+}
+
+static int gt_reset(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	/* We only support GT resets with GuC submission */
+	if (!xe_device_guc_submission_enabled(gt_to_xe(gt)))
+		return -ENODEV;
+
+	drm_info(&xe->drm, "GT reset started\n");
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_msg;
+
+	xe_uc_stop_prepare(&gt->uc);
+	xe_gt_pagefault_reset(gt);
+
+	err = xe_uc_stop(&gt->uc);
+	if (err)
+		goto err_out;
+
+	err = do_gt_reset(gt);
+	if (err)
+		goto err_out;
+
+	err = do_gt_restart(gt);
+	if (err)
+		goto err_out;
+
+	xe_device_mem_access_put(gt_to_xe(gt));
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	XE_WARN_ON(err);
+
+	drm_info(&xe->drm, "GT reset done\n");
+
+	return 0;
+
+err_out:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+	XE_WARN_ON(xe_uc_start(&gt->uc));
+	xe_device_mem_access_put(gt_to_xe(gt));
+	drm_err(&xe->drm, "GT reset failed, err=%d\n", err);
+
+	return err;
+}
+
+static void gt_reset_worker(struct work_struct *w)
+{
+	struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker);
+
+	gt_reset(gt);
+}
+
+void xe_gt_reset_async(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	drm_info(&xe->drm, "Try GT reset\n");
+
+	/* Don't do a reset while one is already in flight */
+	if (xe_uc_reset_prepare(&gt->uc))
+		return;
+
+	drm_info(&xe->drm, "Doing GT reset\n");
+	queue_work(gt->ordered_wq, &gt->reset.worker);
+}
+
+void xe_gt_suspend_prepare(struct xe_gt *gt)
+{
+	xe_device_mem_access_get(gt_to_xe(gt));
+	XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+	xe_uc_stop_prepare(&gt->uc);
+
+	xe_device_mem_access_put(gt_to_xe(gt));
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+}
+
+int xe_gt_suspend(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	/* For now suspend/resume is only allowed with GuC */
+	if (!xe_device_guc_submission_enabled(gt_to_xe(gt)))
+		return -ENODEV;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_msg;
+
+	err = xe_uc_suspend(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	xe_device_mem_access_put(gt_to_xe(gt));
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	drm_info(&xe->drm, "GT suspended\n");
+
+	return 0;
+
+err_force_wake:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+	xe_device_mem_access_put(gt_to_xe(gt));
+	drm_err(&xe->drm, "GT suspend failed: %d\n", err);
+
+	return err;
+}
+
+int xe_gt_resume(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_msg;
+
+	err = do_gt_restart(gt);
+	if (err)
+		goto err_force_wake;
+
+	xe_device_mem_access_put(gt_to_xe(gt));
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	drm_info(&xe->drm, "GT resumed\n");
+
+	return 0;
+
+err_force_wake:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+	xe_device_mem_access_put(gt_to_xe(gt));
+	drm_err(&xe->drm, "GT resume failed: %d\n", err);
+
+	return err;
+}
+
+void xe_gt_migrate_wait(struct xe_gt *gt)
+{
+	xe_migrate_wait(gt->migrate);
+}
+
+struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
+				     enum xe_engine_class class,
+				     u16 instance, bool logical)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id)
+		if (hwe->class == class &&
+		    ((!logical && hwe->instance == instance) ||
+		    (logical && hwe->logical_instance == instance)))
+			return hwe;
+
+	return NULL;
+}
+
+struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt,
+							 enum xe_engine_class class)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id) {
+		switch (class) {
+		case XE_ENGINE_CLASS_RENDER:
+		case XE_ENGINE_CLASS_COMPUTE:
+			if (hwe->class == XE_ENGINE_CLASS_RENDER ||
+			    hwe->class == XE_ENGINE_CLASS_COMPUTE)
+				return hwe;
+			break;
+		default:
+			if (hwe->class == class)
+				return hwe;
+		}
+	}
+
+	return NULL;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
new file mode 100644
index 000000000000..5dc08a993cfe
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_H_
+#define _XE_GT_H_
+
+#include <drm/drm_util.h>
+
+#include "xe_device_types.h"
+#include "xe_hw_engine.h"
+
+#define for_each_hw_engine(hwe__, gt__, id__) \
+	for ((id__) = 0; (id__) < ARRAY_SIZE((gt__)->hw_engines); (id__)++) \
+	     for_each_if (((hwe__) = (gt__)->hw_engines + (id__)) && \
+			  xe_hw_engine_is_valid((hwe__)))
+
+int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt);
+int xe_gt_init_early(struct xe_gt *gt);
+int xe_gt_init_noalloc(struct xe_gt *gt);
+int xe_gt_init(struct xe_gt *gt);
+int xe_gt_record_default_lrcs(struct xe_gt *gt);
+void xe_gt_suspend_prepare(struct xe_gt *gt);
+int xe_gt_suspend(struct xe_gt *gt);
+int xe_gt_resume(struct xe_gt *gt);
+void xe_gt_reset_async(struct xe_gt *gt);
+void xe_gt_migrate_wait(struct xe_gt *gt);
+
+struct xe_gt *xe_find_full_gt(struct xe_gt *gt);
+
+/**
+ * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
+ * first that matches the same reset domain as @class
+ * @gt: GT structure
+ * @class: hw engine class to lookup
+ */
+struct xe_hw_engine *
+xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, enum xe_engine_class class);
+
+struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
+				     enum xe_engine_class class,
+				     u16 instance,
+				     bool logical);
+
+static inline bool xe_gt_is_media_type(struct xe_gt *gt)
+{
+	return gt->info.type == XE_GT_TYPE_MEDIA;
+}
+
+static inline struct xe_device * gt_to_xe(struct xe_gt *gt)
+{
+	return gt->xe;
+}
+
+static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
+		hwe->instance == gt->usm.reserved_bcs_instance;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
new file mode 100644
index 000000000000..575433e9718a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "i915_reg.h"
+#include "gt/intel_gt_regs.h"
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_clock.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+
+static u32 read_reference_ts_freq(struct xe_gt *gt)
+{
+	u32 ts_override = xe_mmio_read32(gt, GEN9_TIMESTAMP_OVERRIDE.reg);
+	u32 base_freq, frac_freq;
+
+	base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >>
+		     GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1;
+	base_freq *= 1000000;
+
+	frac_freq = ((ts_override &
+		      GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
+		     GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
+	frac_freq = 1000000 / (frac_freq + 1);
+
+	return base_freq + frac_freq;
+}
+
+static u32 get_crystal_clock_freq(u32 rpm_config_reg)
+{
+	const u32 f19_2_mhz = 19200000;
+	const u32 f24_mhz = 24000000;
+	const u32 f25_mhz = 25000000;
+	const u32 f38_4_mhz = 38400000;
+	u32 crystal_clock =
+		(rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
+		GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
+
+	switch (crystal_clock) {
+	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
+		return f24_mhz;
+	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
+		return f19_2_mhz;
+	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ:
+		return f38_4_mhz;
+	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
+		return f25_mhz;
+	default:
+		XE_BUG_ON("NOT_POSSIBLE");
+		return 0;
+	}
+}
+
+int xe_gt_clock_init(struct xe_gt *gt)
+{
+	u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE.reg);
+	u32 freq = 0;
+
+	/* Assuming gen11+ so assert this assumption is correct */
+	XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+
+	if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) {
+		freq = read_reference_ts_freq(gt);
+	} else {
+		u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0.reg);
+
+		freq = get_crystal_clock_freq(c0);
+
+		/*
+		 * Now figure out how the command stream's timestamp
+		 * register increments from this frequency (it might
+		 * increment only every few clock cycle).
+		 */
+		freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
+			      GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
+	}
+
+	gt->info.clock_freq = freq;
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h
new file mode 100644
index 000000000000..511923afd224
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_clock.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_CLOCK_H_
+#define _XE_GT_CLOCK_H_
+
+struct xe_gt;
+
+int xe_gt_clock_init(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
new file mode 100644
index 000000000000..cd1888784141
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_debugfs.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_topology.h"
+#include "xe_hw_engine.h"
+#include "xe_macros.h"
+#include "xe_uc_debugfs.h"
+
+static struct xe_gt *node_to_gt(struct drm_info_node *node)
+{
+	return node->info_ent->data;
+}
+
+static int hw_engines(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct drm_printer p = drm_seq_file_printer(m);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	int err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err) {
+		xe_device_mem_access_put(xe);
+		return err;
+	}
+
+	for_each_hw_engine(hwe, gt, id)
+		xe_hw_engine_print_state(hwe, &p);
+
+	xe_device_mem_access_put(xe);
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int force_reset(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+
+	xe_gt_reset_async(gt);
+
+	return 0;
+}
+
+static int sa_info(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	drm_suballoc_dump_debug_info(&gt->kernel_bb_pool.base, &p,
+				     gt->kernel_bb_pool.gpu_addr);
+
+	return 0;
+}
+
+static int topology(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_gt_topology_dump(gt, &p);
+
+	return 0;
+}
+
+static int steering(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_gt_mcr_steering_dump(gt, &p);
+
+	return 0;
+}
+
+#ifdef CONFIG_DRM_XE_DEBUG
+static int invalidate_tlb(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	int seqno;
+	int ret = 0;
+
+	seqno = xe_gt_tlb_invalidation(gt);
+	XE_WARN_ON(seqno < 0);
+	if (seqno > 0)
+		ret = xe_gt_tlb_invalidation_wait(gt, seqno);
+	XE_WARN_ON(ret < 0);
+
+	return 0;
+}
+#endif
+
+static const struct drm_info_list debugfs_list[] = {
+	{"hw_engines", hw_engines, 0},
+	{"force_reset", force_reset, 0},
+	{"sa_info", sa_info, 0},
+	{"topology", topology, 0},
+	{"steering", steering, 0},
+#ifdef CONFIG_DRM_XE_DEBUG
+	{"invalidate_tlb", invalidate_tlb, 0},
+#endif
+};
+
+void xe_gt_debugfs_register(struct xe_gt *gt)
+{
+	struct drm_minor *minor = gt_to_xe(gt)->drm.primary;
+	struct dentry *root;
+	struct drm_info_list *local;
+	char name[8];
+	int i;
+
+	XE_BUG_ON(!minor->debugfs_root);
+
+	sprintf(name, "gt%d", gt->info.id);
+	root = debugfs_create_dir(name, minor->debugfs_root);
+	if (IS_ERR(root)) {
+		XE_WARN_ON("Create GT directory failed");
+		return;
+	}
+
+	/*
+	 * Allocate local copy as we need to pass in the GT to the debugfs
+	 * entry and drm_debugfs_create_files just references the drm_info_list
+	 * passed in (e.g. can't define this on the stack).
+	 */
+#define DEBUGFS_SIZE	ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)
+	local = drmm_kmalloc(&gt_to_xe(gt)->drm, DEBUGFS_SIZE, GFP_KERNEL);
+	if (!local) {
+		XE_WARN_ON("Couldn't allocate memory");
+		return;
+	}
+
+	memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+		local[i].data = gt;
+
+	drm_debugfs_create_files(local,
+				 ARRAY_SIZE(debugfs_list),
+				 root, minor);
+
+	xe_uc_debugfs_register(&gt->uc, root);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h
new file mode 100644
index 000000000000..5a329f118a57
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_DEBUGFS_H_
+#define _XE_GT_DEBUGFS_H_
+
+struct xe_gt;
+
+void xe_gt_debugfs_register(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
new file mode 100644
index 000000000000..b69c0d6c6b2f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -0,0 +1,552 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_topology.h"
+#include "xe_gt_types.h"
+#include "xe_mmio.h"
+
+#include "gt/intel_gt_regs.h"
+
+/**
+ * DOC: GT Multicast/Replicated (MCR) Register Support
+ *
+ * Some GT registers are designed as "multicast" or "replicated" registers:
+ * multiple instances of the same register share a single MMIO offset.  MCR
+ * registers are generally used when the hardware needs to potentially track
+ * independent values of a register per hardware unit (e.g., per-subslice,
+ * per-L3bank, etc.).  The specific types of replication that exist vary
+ * per-platform.
+ *
+ * MMIO accesses to MCR registers are controlled according to the settings
+ * programmed in the platform's MCR_SELECTOR register(s).  MMIO writes to MCR
+ * registers can be done in either a (i.e., a single write updates all
+ * instances of the register to the same value) or unicast (a write updates only
+ * one specific instance).  Reads of MCR registers always operate in a unicast
+ * manner regardless of how the multicast/unicast bit is set in MCR_SELECTOR.
+ * Selection of a specific MCR instance for unicast operations is referred to
+ * as "steering."
+ *
+ * If MCR register operations are steered toward a hardware unit that is
+ * fused off or currently powered down due to power gating, the MMIO operation
+ * is "terminated" by the hardware.  Terminated read operations will return a
+ * value of zero and terminated unicast write operations will be silently
+ * ignored.
+ */
+
+enum {
+	MCR_OP_READ,
+	MCR_OP_WRITE
+};
+
+static const struct xe_mmio_range xelp_l3bank_steering_table[] = {
+	{ 0x00B100, 0x00B3FF },
+	{},
+};
+
+/*
+ * Although the bspec lists more "MSLICE" ranges than shown here, some of those
+ * are of a "GAM" subclass that has special rules and doesn't need to be
+ * included here.
+ */
+static const struct xe_mmio_range xehp_mslice_steering_table[] = {
+	{ 0x00DD00, 0x00DDFF },
+	{ 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
+	{},
+};
+
+static const struct xe_mmio_range xehp_lncf_steering_table[] = {
+	{ 0x00B000, 0x00B0FF },
+	{ 0x00D880, 0x00D8FF },
+	{},
+};
+
+/*
+ * We have several types of MCR registers where steering to (0,0) will always
+ * provide us with a non-terminated value.  We'll stick them all in the same
+ * table for simplicity.
+ */
+static const struct xe_mmio_range xehpc_instance0_steering_table[] = {
+	{ 0x004000, 0x004AFF },		/* HALF-BSLICE */
+	{ 0x008800, 0x00887F },		/* CC */
+	{ 0x008A80, 0x008AFF },		/* TILEPSMI */
+	{ 0x00B000, 0x00B0FF },		/* HALF-BSLICE */
+	{ 0x00B100, 0x00B3FF },		/* L3BANK */
+	{ 0x00C800, 0x00CFFF },		/* HALF-BSLICE */
+	{ 0x00D800, 0x00D8FF },		/* HALF-BSLICE */
+	{ 0x00DD00, 0x00DDFF },		/* BSLICE */
+	{ 0x00E900, 0x00E9FF },		/* HALF-BSLICE */
+	{ 0x00EC00, 0x00EEFF },		/* HALF-BSLICE */
+	{ 0x00F000, 0x00FFFF },		/* HALF-BSLICE */
+	{ 0x024180, 0x0241FF },		/* HALF-BSLICE */
+	{},
+};
+
+static const struct xe_mmio_range xelpg_instance0_steering_table[] = {
+	{ 0x000B00, 0x000BFF },         /* SQIDI */
+	{ 0x001000, 0x001FFF },         /* SQIDI */
+	{ 0x004000, 0x0048FF },         /* GAM */
+	{ 0x008700, 0x0087FF },         /* SQIDI */
+	{ 0x00B000, 0x00B0FF },         /* NODE */
+	{ 0x00C800, 0x00CFFF },         /* GAM */
+	{ 0x00D880, 0x00D8FF },         /* NODE */
+	{ 0x00DD00, 0x00DDFF },         /* OAAL2 */
+	{},
+};
+
+static const struct xe_mmio_range xelpg_l3bank_steering_table[] = {
+	{ 0x00B100, 0x00B3FF },
+	{},
+};
+
+static const struct xe_mmio_range xelp_dss_steering_table[] = {
+	{ 0x008150, 0x00815F },
+	{ 0x009520, 0x00955F },
+	{ 0x00DE80, 0x00E8FF },
+	{ 0x024A00, 0x024A7F },
+	{},
+};
+
+/* DSS steering is used for GSLICE ranges as well */
+static const struct xe_mmio_range xehp_dss_steering_table[] = {
+	{ 0x005200, 0x0052FF },		/* GSLICE */
+	{ 0x005400, 0x007FFF },		/* GSLICE */
+	{ 0x008140, 0x00815F },		/* GSLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+	{ 0x008D00, 0x008DFF },		/* DSS */
+	{ 0x0094D0, 0x00955F },		/* GSLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },		/* DSS */
+	{ 0x00D800, 0x00D87F },		/* GSLICE */
+	{ 0x00DC00, 0x00DCFF },		/* GSLICE */
+	{ 0x00DE80, 0x00E8FF },		/* DSS (0xE000-0xE0FF reserved ) */
+	{ 0x017000, 0x017FFF },		/* GSLICE */
+	{ 0x024A00, 0x024A7F },		/* DSS */
+	{},
+};
+
+/* DSS steering is used for COMPUTE ranges as well */
+static const struct xe_mmio_range xehpc_dss_steering_table[] = {
+	{ 0x008140, 0x00817F },		/* COMPUTE (0x8140-0x814F & 0x8160-0x817F), DSS (0x8150-0x815F) */
+	{ 0x0094D0, 0x00955F },		/* COMPUTE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },		/* DSS */
+	{ 0x00DC00, 0x00DCFF },		/* COMPUTE */
+	{ 0x00DE80, 0x00E7FF },		/* DSS (0xDF00-0xE1FF reserved ) */
+	{},
+};
+
+/* DSS steering is used for SLICE ranges as well */
+static const struct xe_mmio_range xelpg_dss_steering_table[] = {
+	{ 0x005200, 0x0052FF },		/* SLICE */
+	{ 0x005500, 0x007FFF },		/* SLICE */
+	{ 0x008140, 0x00815F },		/* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+	{ 0x0094D0, 0x00955F },		/* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },		/* DSS */
+	{ 0x00D800, 0x00D87F },		/* SLICE */
+	{ 0x00DC00, 0x00DCFF },		/* SLICE */
+	{ 0x00DE80, 0x00E8FF },		/* DSS (0xE000-0xE0FF reserved) */
+	{},
+};
+
+static const struct xe_mmio_range xelpmp_oaddrm_steering_table[] = {
+	{ 0x393200, 0x39323F },
+	{ 0x393400, 0x3934FF },
+	{},
+};
+
+/*
+ * DG2 GAM registers are a special case; this table is checked directly in
+ * xe_gt_mcr_get_nonterminated_steering and is not hooked up via
+ * gt->steering[].
+ */
+static const struct xe_mmio_range dg2_gam_ranges[] = {
+	{ 0x004000, 0x004AFF },
+	{ 0x00C800, 0x00CFFF },
+	{ 0x00F000, 0x00FFFF },
+	{},
+};
+
+static void init_steering_l3bank(struct xe_gt *gt)
+{
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+		u32 mslice_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK,
+						xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
+		u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK,
+					      xe_mmio_read32(gt, XEHP_FUSE4.reg));
+
+		/*
+		 * Group selects mslice, instance selects bank within mslice.
+		 * Bank 0 is always valid _except_ when the bank mask is 010b.
+		 */
+		gt->steering[L3BANK].group_target = __ffs(mslice_mask);
+		gt->steering[L3BANK].instance_target =
+			bank_mask & BIT(0) ? 0 : 2;
+	} else {
+		u32 fuse = REG_FIELD_GET(GEN10_L3BANK_MASK,
+					 ~xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
+
+		gt->steering[L3BANK].group_target = 0;	/* unused */
+		gt->steering[L3BANK].instance_target = __ffs(fuse);
+	}
+}
+
+static void init_steering_mslice(struct xe_gt *gt)
+{
+	u32 mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK,
+				 xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
+
+	/*
+	 * mslice registers are valid (not terminated) if either the meml3
+	 * associated with the mslice is present, or at least one DSS associated
+	 * with the mslice is present.  There will always be at least one meml3
+	 * so we can just use that to find a non-terminated mslice and ignore
+	 * the DSS fusing.
+	 */
+	gt->steering[MSLICE].group_target = __ffs(mask);
+	gt->steering[MSLICE].instance_target = 0;	/* unused */
+
+	/*
+	 * LNCF termination is also based on mslice presence, so we'll set
+	 * it up here.  Either LNCF within a non-terminated mslice will work,
+	 * so we just always pick LNCF 0 here.
+	 */
+	gt->steering[LNCF].group_target = __ffs(mask) << 1;
+	gt->steering[LNCF].instance_target = 0;		/* unused */
+}
+
+static void init_steering_dss(struct xe_gt *gt)
+{
+	unsigned int dss = min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0),
+			       xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0));
+	unsigned int dss_per_grp = gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4;
+
+	gt->steering[DSS].group_target = dss / dss_per_grp;
+	gt->steering[DSS].instance_target = dss % dss_per_grp;
+}
+
+static void init_steering_oaddrm(struct xe_gt *gt)
+{
+	/*
+	 * First instance is only terminated if the entire first media slice
+	 * is absent (i.e., no VCS0 or VECS0).
+	 */
+	if (gt->info.engine_mask & (XE_HW_ENGINE_VCS0 | XE_HW_ENGINE_VECS0))
+		gt->steering[OADDRM].group_target = 0;
+	else
+		gt->steering[OADDRM].group_target = 1;
+
+	gt->steering[DSS].instance_target = 0;		/* unused */
+}
+
+static void init_steering_inst0(struct xe_gt *gt)
+{
+	gt->steering[DSS].group_target = 0;		/* unused */
+	gt->steering[DSS].instance_target = 0;		/* unused */
+}
+
+static const struct {
+	const char *name;
+	void (*init)(struct xe_gt *);
+} xe_steering_types[] = {
+	{ "L3BANK",	init_steering_l3bank },
+	{ "MSLICE",	init_steering_mslice },
+	{ "LNCF",	NULL },		/* initialized by mslice init */
+	{ "DSS",	init_steering_dss },
+	{ "OADDRM",	init_steering_oaddrm },
+	{ "INSTANCE 0",	init_steering_inst0 },
+};
+
+void xe_gt_mcr_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES);
+
+	spin_lock_init(&gt->mcr_lock);
+
+	if (gt->info.type == XE_GT_TYPE_MEDIA) {
+		drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13);
+
+		gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table;
+	} else if (GRAPHICS_VERx100(xe) >= 1270) {
+		gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table;
+		gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table;
+		gt->steering[DSS].ranges = xelpg_dss_steering_table;
+	} else if (xe->info.platform == XE_PVC) {
+		gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table;
+		gt->steering[DSS].ranges = xehpc_dss_steering_table;
+	} else if (xe->info.platform == XE_DG2) {
+		gt->steering[MSLICE].ranges = xehp_mslice_steering_table;
+		gt->steering[LNCF].ranges = xehp_lncf_steering_table;
+		gt->steering[DSS].ranges = xehp_dss_steering_table;
+	} else {
+		gt->steering[L3BANK].ranges = xelp_l3bank_steering_table;
+		gt->steering[DSS].ranges = xelp_dss_steering_table;
+	}
+
+	/* Select non-terminated steering target for each type */
+	for (int i = 0; i < NUM_STEERING_TYPES; i++)
+		if (gt->steering[i].ranges && xe_steering_types[i].init)
+			xe_steering_types[i].init(gt);
+}
+
+/*
+ * xe_gt_mcr_get_nonterminated_steering - find group/instance values that
+ *    will steer a register to a non-terminated instance
+ * @gt: GT structure
+ * @reg: register for which the steering is required
+ * @group: return variable for group steering
+ * @instance: return variable for instance steering
+ *
+ * This function returns a group/instance pair that is guaranteed to work for
+ * read steering of the given register. Note that a value will be returned even
+ * if the register is not replicated and therefore does not actually require
+ * steering.
+ *
+ * Returns true if the caller should steer to the @group/@instance values
+ * returned.  Returns false if the caller need not perform any steering (i.e.,
+ * the DG2 GAM range special case).
+ */
+static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
+						 i915_mcr_reg_t reg,
+						 u8 *group, u8 *instance)
+{
+	for (int type = 0; type < NUM_STEERING_TYPES; type++) {
+		if (!gt->steering[type].ranges)
+			continue;
+
+		for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) {
+			if (xe_mmio_in_range(&gt->steering[type].ranges[i], reg.reg)) {
+				*group = gt->steering[type].group_target;
+				*instance = gt->steering[type].instance_target;
+				return true;
+			}
+		}
+	}
+
+	/*
+	 * All MCR registers should usually be part of one of the steering
+	 * ranges we're tracking.  However there's one special case:  DG2
+	 * GAM registers are technically multicast registers, but are special
+	 * in a number of ways:
+	 *  - they have their own dedicated steering control register (they
+	 *    don't share 0xFDC with other MCR classes)
+	 *  - all reads should be directed to instance 1 (unicast reads against
+	 *    other instances are not allowed), and instance 1 is already the
+	 *    the hardware's default steering target, which we never change
+	 *
+	 * Ultimately this means that we can just treat them as if they were
+	 * unicast registers and all operations will work properly.
+	 */
+	for (int i = 0; dg2_gam_ranges[i].end > 0; i++)
+		if (xe_mmio_in_range(&dg2_gam_ranges[i], reg.reg))
+			return false;
+
+	/*
+	 * Not found in a steering table and not a DG2 GAM register?  We'll
+	 * just steer to 0/0 as a guess and raise a warning.
+	 */
+	drm_WARN(&gt_to_xe(gt)->drm, true,
+		 "Did not find MCR register %#x in any MCR steering table\n",
+		 reg.reg);
+	*group = 0;
+	*instance = 0;
+
+	return true;
+}
+
+#define STEER_SEMAPHORE		0xFD0
+
+/*
+ * Obtain exclusive access to MCR steering.  On MTL and beyond we also need
+ * to synchronize with external clients (e.g., firmware), so a semaphore
+ * register will also need to be taken.
+ */
+static void mcr_lock(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int ret;
+
+	spin_lock(&gt->mcr_lock);
+
+	/*
+	 * Starting with MTL we also need to grab a semaphore register
+	 * to synchronize with external agents (e.g., firmware) that now
+	 * shares the same steering control register.
+	 */
+	if (GRAPHICS_VERx100(xe) >= 1270)
+		ret = wait_for_us(xe_mmio_read32(gt, STEER_SEMAPHORE) == 0x1, 10);
+
+	drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
+}
+
+static void mcr_unlock(struct xe_gt *gt) {
+	/* Release hardware semaphore */
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
+		xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1);
+
+	spin_unlock(&gt->mcr_lock);
+}
+
+/*
+ * Access a register with specific MCR steering
+ *
+ * Caller needs to make sure the relevant forcewake wells are up.
+ */
+static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag,
+				int group, int instance, u32 value)
+{
+	u32 steer_reg, steer_val, val = 0;
+
+	lockdep_assert_held(&gt->mcr_lock);
+
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+		steer_reg = MTL_MCR_SELECTOR.reg;
+		steer_val = REG_FIELD_PREP(MTL_MCR_GROUPID, group) |
+			REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance);
+	} else {
+		steer_reg = GEN8_MCR_SELECTOR.reg;
+		steer_val = REG_FIELD_PREP(GEN11_MCR_SLICE_MASK, group) |
+			REG_FIELD_PREP(GEN11_MCR_SUBSLICE_MASK, instance);
+	}
+
+	/*
+	 * Always leave the hardware in multicast mode when doing reads
+	 * (see comment about Wa_22013088509 below) and only change it
+	 * to unicast mode when doing writes of a specific instance.
+	 *
+	 * No need to save old steering reg value.
+	 */
+	if (rw_flag == MCR_OP_READ)
+		steer_val |= GEN11_MCR_MULTICAST;
+
+	xe_mmio_write32(gt, steer_reg, steer_val);
+
+	if (rw_flag == MCR_OP_READ)
+		val = xe_mmio_read32(gt, reg.reg);
+	else
+		xe_mmio_write32(gt, reg.reg, value);
+
+	/*
+	 * If we turned off the multicast bit (during a write) we're required
+	 * to turn it back on before finishing.  The group and instance values
+	 * don't matter since they'll be re-programmed on the next MCR
+	 * operation.
+	 */
+	if (rw_flag == MCR_OP_WRITE)
+		xe_mmio_write32(gt, steer_reg, GEN11_MCR_MULTICAST);
+
+	return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_read_any - reads a non-terminated instance of an MCR register
+ * @gt: GT structure
+ * @reg: register to read
+ *
+ * Reads a GT MCR register.  The read will be steered to a non-terminated
+ * instance (i.e., one that isn't fused off or powered down by power gating).
+ * This function assumes the caller is already holding any necessary forcewake
+ * domains.
+ *
+ * Returns the value from a non-terminated instance of @reg.
+ */
+u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg)
+{
+	u8 group, instance;
+	u32 val;
+	bool steer;
+
+	steer = xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance);
+
+	if (steer) {
+		mcr_lock(gt);
+		val = rw_with_mcr_steering(gt, reg, MCR_OP_READ,
+					   group, instance, 0);
+		mcr_unlock(gt);
+	} else {
+		/* DG2 GAM special case rules; treat as if unicast */
+		val = xe_mmio_read32(gt, reg.reg);
+	}
+
+	return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_read - read a specific instance of an MCR register
+ * @gt: GT structure
+ * @reg: the MCR register to read
+ * @group: the MCR group
+ * @instance: the MCR instance
+ *
+ * Returns the value read from an MCR register after steering toward a specific
+ * group/instance.
+ */
+u32 xe_gt_mcr_unicast_read(struct xe_gt *gt,
+			   i915_mcr_reg_t reg,
+			   int group, int instance)
+{
+	u32 val;
+
+	mcr_lock(gt);
+	val = rw_with_mcr_steering(gt, reg, MCR_OP_READ, group, instance, 0);
+	mcr_unlock(gt);
+
+	return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_write - write a specific instance of an MCR register
+ * @gt: GT structure
+ * @reg: the MCR register to write
+ * @value: value to write
+ * @group: the MCR group
+ * @instance: the MCR instance
+ *
+ * Write an MCR register in unicast mode after steering toward a specific
+ * group/instance.
+ */
+void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value,
+			     int group, int instance)
+{
+	mcr_lock(gt);
+	rw_with_mcr_steering(gt, reg, MCR_OP_WRITE, group, instance, value);
+	mcr_unlock(gt);
+}
+
+/**
+ * xe_gt_mcr_multicast_write - write a value to all instances of an MCR register
+ * @gt: GT structure
+ * @reg: the MCR register to write
+ * @value: value to write
+ *
+ * Write an MCR register in multicast mode to update all instances.
+ */
+void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value)
+{
+	/*
+	 * Synchronize with any unicast operations.  Once we have exclusive
+	 * access, the MULTICAST bit should already be set, so there's no need
+	 * to touch the steering register.
+	 */
+	mcr_lock(gt);
+	xe_mmio_write32(gt, reg.reg, value);
+	mcr_unlock(gt);
+}
+
+void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	for (int i = 0; i < NUM_STEERING_TYPES; i++) {
+		if (gt->steering[i].ranges) {
+			drm_printf(p, "%s steering: group=%#x, instance=%#x\n",
+				   xe_steering_types[i].name,
+				   gt->steering[i].group_target,
+				   gt->steering[i].instance_target);
+			for (int j = 0; gt->steering[i].ranges[j].end; j++)
+				drm_printf(p, "\t0x%06x - 0x%06x\n",
+					   gt->steering[i].ranges[j].start,
+					   gt->steering[i].ranges[j].end);
+		}
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
new file mode 100644
index 000000000000..62ec6eb654a0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_MCR_H_
+#define _XE_GT_MCR_H_
+
+#include "i915_reg_defs.h"
+
+struct drm_printer;
+struct xe_gt;
+
+void xe_gt_mcr_init(struct xe_gt *gt);
+
+u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, i915_mcr_reg_t reg,
+			   int group, int instance);
+u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg);
+
+void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value,
+			     int group, int instance);
+void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value);
+
+void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
+
+#endif /* _XE_GT_MCR_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
new file mode 100644
index 000000000000..7125113b7390
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -0,0 +1,750 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/circ_buf.h>
+
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_gt_pagefault.h"
+#include "xe_migrate.h"
+#include "xe_pt.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+struct pagefault {
+	u64 page_addr;
+	u32 asid;
+	u16 pdata;
+	u8 vfid;
+	u8 access_type;
+	u8 fault_type;
+	u8 fault_level;
+	u8 engine_class;
+	u8 engine_instance;
+	u8 fault_unsuccessful;
+};
+
+enum access_type {
+	ACCESS_TYPE_READ = 0,
+	ACCESS_TYPE_WRITE = 1,
+	ACCESS_TYPE_ATOMIC = 2,
+	ACCESS_TYPE_RESERVED = 3,
+};
+
+enum fault_type {
+	NOT_PRESENT = 0,
+	WRITE_ACCESS_VIOLATION = 1,
+	ATOMIC_ACCESS_VIOLATION = 2,
+};
+
+struct acc {
+	u64 va_range_base;
+	u32 asid;
+	u32 sub_granularity;
+	u8 granularity;
+	u8 vfid;
+	u8 access_type;
+	u8 engine_class;
+	u8 engine_instance;
+};
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+	return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static int send_tlb_invalidation(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 action[] = {
+		XE_GUC_ACTION_TLB_INVALIDATION,
+		0,
+		XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT |
+		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT |
+		XE_GUC_TLB_INVAL_FLUSH_CACHE,
+	};
+	int seqno;
+	int ret;
+
+	/*
+	 * XXX: The seqno algorithm relies on TLB invalidation being processed
+	 * in order which they currently are, if that changes the algorithm will
+	 * need to be updated.
+	 */
+	mutex_lock(&guc->ct.lock);
+	seqno = gt->usm.tlb_invalidation_seqno;
+	action[1] = seqno;
+	gt->usm.tlb_invalidation_seqno = (gt->usm.tlb_invalidation_seqno + 1) %
+		TLB_INVALIDATION_SEQNO_MAX;
+	if (!gt->usm.tlb_invalidation_seqno)
+		gt->usm.tlb_invalidation_seqno = 1;
+	ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action),
+				    G2H_LEN_DW_TLB_INVALIDATE, 1);
+	if (!ret)
+		ret = seqno;
+	mutex_unlock(&guc->ct.lock);
+
+	return ret;
+}
+
+static bool access_is_atomic(enum access_type access_type)
+{
+	return access_type == ACCESS_TYPE_ATOMIC;
+}
+
+static bool vma_is_valid(struct xe_gt *gt, struct xe_vma *vma)
+{
+	return BIT(gt->info.id) & vma->gt_present &&
+		!(BIT(gt->info.id) & vma->usm.gt_invalidated);
+}
+
+static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup)
+{
+	if (lookup->start > vma->end || lookup->end < vma->start)
+		return false;
+
+	return true;
+}
+
+static bool only_needs_bo_lock(struct xe_bo *bo)
+{
+	return bo && bo->vm;
+}
+
+static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
+{
+	struct xe_vma *vma = NULL, lookup;
+
+	lookup.start = page_addr;
+	lookup.end = lookup.start + SZ_4K - 1;
+	if (vm->usm.last_fault_vma) {   /* Fast lookup */
+		if (vma_matches(vm->usm.last_fault_vma, &lookup))
+			vma = vm->usm.last_fault_vma;
+	}
+	if (!vma)
+		vma = xe_vm_find_overlapping_vma(vm, &lookup);
+
+	return vma;
+}
+
+static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_vm *vm;
+	struct xe_vma *vma = NULL;
+	struct xe_bo *bo;
+	LIST_HEAD(objs);
+	LIST_HEAD(dups);
+	struct ttm_validate_buffer tv_bo, tv_vm;
+	struct ww_acquire_ctx ww;
+	struct dma_fence *fence;
+	bool write_locked;
+	int ret = 0;
+	bool atomic;
+
+	/* ASID to VM */
+	mutex_lock(&xe->usm.lock);
+	vm = xa_load(&xe->usm.asid_to_vm, pf->asid);
+	if (vm)
+		xe_vm_get(vm);
+	mutex_unlock(&xe->usm.lock);
+	if (!vm || !xe_vm_in_fault_mode(vm))
+		return -EINVAL;
+
+retry_userptr:
+	/*
+	 * TODO: Avoid exclusive lock if VM doesn't have userptrs, or
+	 * start out read-locked?
+	 */
+	down_write(&vm->lock);
+	write_locked = true;
+	vma = lookup_vma(vm, pf->page_addr);
+	if (!vma) {
+		ret = -EINVAL;
+		goto unlock_vm;
+	}
+
+	if (!xe_vma_is_userptr(vma) || !xe_vma_userptr_check_repin(vma)) {
+		downgrade_write(&vm->lock);
+		write_locked = false;
+	}
+
+	trace_xe_vma_pagefault(vma);
+
+	atomic = access_is_atomic(pf->access_type);
+
+	/* Check if VMA is valid */
+	if (vma_is_valid(gt, vma) && !atomic)
+		goto unlock_vm;
+
+	/* TODO: Validate fault */
+
+	if (xe_vma_is_userptr(vma) && write_locked) {
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_del_init(&vma->userptr.invalidate_link);
+		spin_unlock(&vm->userptr.invalidated_lock);
+
+		ret = xe_vma_userptr_pin_pages(vma);
+		if (ret)
+			goto unlock_vm;
+
+		downgrade_write(&vm->lock);
+		write_locked = false;
+	}
+
+	/* Lock VM and BOs dma-resv */
+	bo = vma->bo;
+	if (only_needs_bo_lock(bo)) {
+		/* This path ensures the BO's LRU is updated */
+		ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false);
+	} else {
+		tv_vm.num_shared = xe->info.tile_count;
+		tv_vm.bo = xe_vm_ttm_bo(vm);
+		list_add(&tv_vm.head, &objs);
+		if (bo) {
+			tv_bo.bo = &bo->ttm;
+			tv_bo.num_shared = xe->info.tile_count;
+			list_add(&tv_bo.head, &objs);
+		}
+		ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
+	}
+	if (ret)
+		goto unlock_vm;
+
+	if (atomic) {
+		if (xe_vma_is_userptr(vma)) {
+			ret = -EACCES;
+			goto unlock_dma_resv;
+		}
+
+		/* Migrate to VRAM, move should invalidate the VMA first */
+		ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id);
+		if (ret)
+			goto unlock_dma_resv;
+	} else if (bo) {
+		/* Create backing store if needed */
+		ret = xe_bo_validate(bo, vm, true);
+		if (ret)
+			goto unlock_dma_resv;
+	}
+
+	/* Bind VMA only to the GT that has faulted */
+	trace_xe_vma_pf_bind(vma);
+	fence = __xe_pt_bind_vma(gt, vma, xe_gt_migrate_engine(gt), NULL, 0,
+				 vma->gt_present & BIT(gt->info.id));
+	if (IS_ERR(fence)) {
+		ret = PTR_ERR(fence);
+		goto unlock_dma_resv;
+	}
+
+	/*
+	 * XXX: Should we drop the lock before waiting? This only helps if doing
+	 * GPU binds which is currently only done if we have to wait for more
+	 * than 10ms on a move.
+	 */
+	dma_fence_wait(fence, false);
+	dma_fence_put(fence);
+
+	if (xe_vma_is_userptr(vma))
+		ret = xe_vma_userptr_check_repin(vma);
+	vma->usm.gt_invalidated &= ~BIT(gt->info.id);
+
+unlock_dma_resv:
+	if (only_needs_bo_lock(bo))
+		xe_bo_unlock(bo, &ww);
+	else
+		ttm_eu_backoff_reservation(&ww, &objs);
+unlock_vm:
+	if (!ret)
+		vm->usm.last_fault_vma = vma;
+	if (write_locked)
+		up_write(&vm->lock);
+	else
+		up_read(&vm->lock);
+	if (ret == -EAGAIN)
+		goto retry_userptr;
+
+	if (!ret) {
+		/*
+		 * FIXME: Doing a full TLB invalidation for now, likely could
+		 * defer TLB invalidate + fault response to a callback of fence
+		 * too
+		 */
+		ret = send_tlb_invalidation(&gt->uc.guc);
+		if (ret >= 0)
+			ret = 0;
+	}
+	xe_vm_put(vm);
+
+	return ret;
+}
+
+static int send_pagefault_reply(struct xe_guc *guc,
+				struct xe_guc_pagefault_reply *reply)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_PAGE_FAULT_RES_DESC,
+		reply->dw0,
+		reply->dw1,
+	};
+
+	return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+static void print_pagefault(struct xe_device *xe, struct pagefault *pf)
+{
+	drm_warn(&xe->drm, "\n\tASID: %d\n"
+		 "\tVFID: %d\n"
+		 "\tPDATA: 0x%04x\n"
+		 "\tFaulted Address: 0x%08x%08x\n"
+		 "\tFaultType: %d\n"
+		 "\tAccessType: %d\n"
+		 "\tFaultLevel: %d\n"
+		 "\tEngineClass: %d\n"
+		 "\tEngineInstance: %d\n",
+		 pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr),
+		 lower_32_bits(pf->page_addr),
+		 pf->fault_type, pf->access_type, pf->fault_level,
+		 pf->engine_class, pf->engine_instance);
+}
+
+#define PF_MSG_LEN_DW	4
+
+static int get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
+{
+	const struct xe_guc_pagefault_desc *desc;
+	int ret = 0;
+
+	spin_lock_irq(&pf_queue->lock);
+	if (pf_queue->head != pf_queue->tail) {
+		desc = (const struct xe_guc_pagefault_desc *)
+			(pf_queue->data + pf_queue->head);
+
+		pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0);
+		pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0);
+		pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0);
+		pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) <<
+			PFD_PDATA_HI_SHIFT;
+		pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0);
+		pf->asid = FIELD_GET(PFD_ASID, desc->dw1);
+		pf->vfid = FIELD_GET(PFD_VFID, desc->dw2);
+		pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2);
+		pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2);
+		pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) <<
+			PFD_VIRTUAL_ADDR_HI_SHIFT;
+		pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) <<
+			PFD_VIRTUAL_ADDR_LO_SHIFT;
+
+		pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
+			PF_QUEUE_NUM_DW;
+	} else {
+		ret = -1;
+	}
+	spin_unlock_irq(&pf_queue->lock);
+
+	return ret;
+}
+
+static bool pf_queue_full(struct pf_queue *pf_queue)
+{
+	lockdep_assert_held(&pf_queue->lock);
+
+	return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <=
+		PF_MSG_LEN_DW;
+}
+
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct pf_queue *pf_queue;
+	unsigned long flags;
+	u32 asid;
+	bool full;
+
+	if (unlikely(len != PF_MSG_LEN_DW))
+		return -EPROTO;
+
+	asid = FIELD_GET(PFD_ASID, msg[1]);
+	pf_queue = &gt->usm.pf_queue[asid % NUM_PF_QUEUE];
+
+	spin_lock_irqsave(&pf_queue->lock, flags);
+	full = pf_queue_full(pf_queue);
+	if (!full) {
+		memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32));
+		pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW;
+		queue_work(gt->usm.pf_wq, &pf_queue->worker);
+	} else {
+		XE_WARN_ON("PF Queue full, shouldn't be possible");
+	}
+	spin_unlock_irqrestore(&pf_queue->lock, flags);
+
+	return full ? -ENOSPC : 0;
+}
+
+static void pf_queue_work_func(struct work_struct *w)
+{
+	struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker);
+	struct xe_gt *gt = pf_queue->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_guc_pagefault_reply reply = {};
+	struct pagefault pf = {};
+	int ret;
+
+	ret = get_pagefault(pf_queue, &pf);
+	if (ret)
+		return;
+
+	ret = handle_pagefault(gt, &pf);
+	if (unlikely(ret)) {
+		print_pagefault(xe, &pf);
+		pf.fault_unsuccessful = 1;
+		drm_warn(&xe->drm, "Fault response: Unsuccessful %d\n", ret);
+	}
+
+	reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
+		FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
+		FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
+		FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
+		FIELD_PREP(PFR_ASID, pf.asid);
+
+	reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
+		FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
+		FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
+		FIELD_PREP(PFR_PDATA, pf.pdata);
+
+	send_pagefault_reply(&gt->uc.guc, &reply);
+}
+
+static void acc_queue_work_func(struct work_struct *w);
+
+int xe_gt_pagefault_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i;
+
+	if (!xe->info.supports_usm)
+		return 0;
+
+	gt->usm.tlb_invalidation_seqno = 1;
+	for (i = 0; i < NUM_PF_QUEUE; ++i) {
+		gt->usm.pf_queue[i].gt = gt;
+		spin_lock_init(&gt->usm.pf_queue[i].lock);
+		INIT_WORK(&gt->usm.pf_queue[i].worker, pf_queue_work_func);
+	}
+	for (i = 0; i < NUM_ACC_QUEUE; ++i) {
+		gt->usm.acc_queue[i].gt = gt;
+		spin_lock_init(&gt->usm.acc_queue[i].lock);
+		INIT_WORK(&gt->usm.acc_queue[i].worker, acc_queue_work_func);
+	}
+
+	gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue",
+					WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE);
+	if (!gt->usm.pf_wq)
+		return -ENOMEM;
+
+	gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue",
+					 WQ_UNBOUND | WQ_HIGHPRI,
+					 NUM_ACC_QUEUE);
+	if (!gt->usm.acc_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void xe_gt_pagefault_reset(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i;
+
+	if (!xe->info.supports_usm)
+		return;
+
+	for (i = 0; i < NUM_PF_QUEUE; ++i) {
+		spin_lock_irq(&gt->usm.pf_queue[i].lock);
+		gt->usm.pf_queue[i].head = 0;
+		gt->usm.pf_queue[i].tail = 0;
+		spin_unlock_irq(&gt->usm.pf_queue[i].lock);
+	}
+
+	for (i = 0; i < NUM_ACC_QUEUE; ++i) {
+		spin_lock(&gt->usm.acc_queue[i].lock);
+		gt->usm.acc_queue[i].head = 0;
+		gt->usm.acc_queue[i].tail = 0;
+		spin_unlock(&gt->usm.acc_queue[i].lock);
+	}
+}
+
+int xe_gt_tlb_invalidation(struct xe_gt *gt)
+{
+	return send_tlb_invalidation(&gt->uc.guc);
+}
+
+static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
+{
+	if (gt->usm.tlb_invalidation_seqno_recv >= seqno)
+		return true;
+
+	if (seqno - gt->usm.tlb_invalidation_seqno_recv >
+	    (TLB_INVALIDATION_SEQNO_MAX / 2))
+		return true;
+
+	return false;
+}
+
+int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_guc *guc = &gt->uc.guc;
+	int ret;
+
+	/*
+	 * XXX: See above, this algorithm only works if seqno are always in
+	 * order
+	 */
+	ret = wait_event_timeout(guc->ct.wq,
+				 tlb_invalidation_seqno_past(gt, seqno),
+				 HZ / 5);
+	if (!ret) {
+		drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
+			seqno, gt->usm.tlb_invalidation_seqno_recv);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
+int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	int expected_seqno;
+
+	if (unlikely(len != 1))
+		return -EPROTO;
+
+	/* Sanity check on seqno */
+	expected_seqno = (gt->usm.tlb_invalidation_seqno_recv + 1) %
+		TLB_INVALIDATION_SEQNO_MAX;
+	XE_WARN_ON(expected_seqno != msg[0]);
+
+	gt->usm.tlb_invalidation_seqno_recv = msg[0];
+	smp_wmb();
+	wake_up_all(&guc->ct.wq);
+
+	return 0;
+}
+
+static int granularity_in_byte(int val)
+{
+	switch (val) {
+	case 0:
+		return SZ_128K;
+	case 1:
+		return SZ_2M;
+	case 2:
+		return SZ_16M;
+	case 3:
+		return SZ_64M;
+	default:
+		return 0;
+	}
+}
+
+static int sub_granularity_in_byte(int val)
+{
+	return (granularity_in_byte(val) / 32);
+}
+
+static void print_acc(struct xe_device *xe, struct acc *acc)
+{
+	drm_warn(&xe->drm, "Access counter request:\n"
+		 "\tType: %s\n"
+		 "\tASID: %d\n"
+		 "\tVFID: %d\n"
+		 "\tEngine: %d:%d\n"
+		 "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n"
+		 "\tSub_Granularity Vector: 0x%08x\n"
+		 "\tVA Range base: 0x%016llx\n",
+		 acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL",
+		 acc->asid, acc->vfid, acc->engine_class, acc->engine_instance,
+		 granularity_in_byte(acc->granularity) / SZ_1K,
+		 sub_granularity_in_byte(acc->granularity) / SZ_1K,
+		 acc->sub_granularity, acc->va_range_base);
+}
+
+static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc)
+{
+	u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) *
+		sub_granularity_in_byte(acc->granularity);
+	struct xe_vma lookup;
+
+	lookup.start = page_va;
+	lookup.end = lookup.start + SZ_4K - 1;
+
+	return xe_vm_find_overlapping_vma(vm, &lookup);
+}
+
+static int handle_acc(struct xe_gt *gt, struct acc *acc)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_vm *vm;
+	struct xe_vma *vma;
+	struct xe_bo *bo;
+	LIST_HEAD(objs);
+	LIST_HEAD(dups);
+	struct ttm_validate_buffer tv_bo, tv_vm;
+	struct ww_acquire_ctx ww;
+	int ret = 0;
+
+	/* We only support ACC_TRIGGER at the moment */
+	if (acc->access_type != ACC_TRIGGER)
+		return -EINVAL;
+
+	/* ASID to VM */
+	mutex_lock(&xe->usm.lock);
+	vm = xa_load(&xe->usm.asid_to_vm, acc->asid);
+	if (vm)
+		xe_vm_get(vm);
+	mutex_unlock(&xe->usm.lock);
+	if (!vm || !xe_vm_in_fault_mode(vm))
+		return -EINVAL;
+
+	down_read(&vm->lock);
+
+	/* Lookup VMA */
+	vma = get_acc_vma(vm, acc);
+	if (!vma) {
+		ret = -EINVAL;
+		goto unlock_vm;
+	}
+
+	trace_xe_vma_acc(vma);
+
+	/* Userptr can't be migrated, nothing to do */
+	if (xe_vma_is_userptr(vma))
+		goto unlock_vm;
+
+	/* Lock VM and BOs dma-resv */
+	bo = vma->bo;
+	if (only_needs_bo_lock(bo)) {
+		/* This path ensures the BO's LRU is updated */
+		ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false);
+	} else {
+		tv_vm.num_shared = xe->info.tile_count;
+		tv_vm.bo = xe_vm_ttm_bo(vm);
+		list_add(&tv_vm.head, &objs);
+		tv_bo.bo = &bo->ttm;
+		tv_bo.num_shared = xe->info.tile_count;
+		list_add(&tv_bo.head, &objs);
+		ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
+	}
+	if (ret)
+		goto unlock_vm;
+
+	/* Migrate to VRAM, move should invalidate the VMA first */
+	ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id);
+
+	if (only_needs_bo_lock(bo))
+		xe_bo_unlock(bo, &ww);
+	else
+		ttm_eu_backoff_reservation(&ww, &objs);
+unlock_vm:
+	up_read(&vm->lock);
+	xe_vm_put(vm);
+
+	return ret;
+}
+
+#define make_u64(hi__, low__)  ((u64)(hi__) << 32 | (u64)(low__))
+
+static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
+{
+	const struct xe_guc_acc_desc *desc;
+	int ret = 0;
+
+	spin_lock(&acc_queue->lock);
+	if (acc_queue->head != acc_queue->tail) {
+		desc = (const struct xe_guc_acc_desc *)
+			(acc_queue->data + acc_queue->head);
+
+		acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2);
+		acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 |
+			FIELD_GET(ACC_SUBG_LO, desc->dw0);
+		acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1);
+		acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1);
+		acc->asid =  FIELD_GET(ACC_ASID, desc->dw1);
+		acc->vfid =  FIELD_GET(ACC_VFID, desc->dw2);
+		acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0);
+		acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI,
+					      desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO);
+	} else {
+		ret = -1;
+	}
+	spin_unlock(&acc_queue->lock);
+
+	return ret;
+}
+
+static void acc_queue_work_func(struct work_struct *w)
+{
+	struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker);
+	struct xe_gt *gt = acc_queue->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct acc acc = {};
+	int ret;
+
+	ret = get_acc(acc_queue, &acc);
+	if (ret)
+		return;
+
+	ret = handle_acc(gt, &acc);
+	if (unlikely(ret)) {
+		print_acc(xe, &acc);
+		drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
+	}
+}
+
+#define ACC_MSG_LEN_DW	4
+
+static bool acc_queue_full(struct acc_queue *acc_queue)
+{
+	lockdep_assert_held(&acc_queue->lock);
+
+	return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <=
+		ACC_MSG_LEN_DW;
+}
+
+int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct acc_queue *acc_queue;
+	u32 asid;
+	bool full;
+
+	if (unlikely(len != ACC_MSG_LEN_DW))
+		return -EPROTO;
+
+	asid = FIELD_GET(ACC_ASID, msg[1]);
+	acc_queue = &gt->usm.acc_queue[asid % NUM_ACC_QUEUE];
+
+	spin_lock(&acc_queue->lock);
+	full = acc_queue_full(acc_queue);
+	if (!full) {
+		memcpy(acc_queue->data + acc_queue->tail, msg,
+		       len * sizeof(u32));
+		acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW;
+		queue_work(gt->usm.acc_wq, &acc_queue->worker);
+	} else {
+		drm_warn(&gt_to_xe(gt)->drm, "ACC Queue full, dropping ACC");
+	}
+	spin_unlock(&acc_queue->lock);
+
+	return full ? -ENOSPC : 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h
new file mode 100644
index 000000000000..35f68027cc9c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_PAGEFAULT_H_
+#define _XE_GT_PAGEFAULT_H_
+
+#include <linux/types.h>
+
+struct xe_gt;
+struct xe_guc;
+
+int xe_gt_pagefault_init(struct xe_gt *gt);
+void xe_gt_pagefault_reset(struct xe_gt *gt);
+int xe_gt_tlb_invalidation(struct xe_gt *gt);
+int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+#endif	/* _XE_GT_PAGEFAULT_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
new file mode 100644
index 000000000000..2d966d935b8e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <drm/drm_managed.h>
+#include "xe_gt.h"
+#include "xe_gt_sysfs.h"
+
+static void xe_gt_sysfs_kobj_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static struct kobj_type xe_gt_sysfs_kobj_type = {
+	.release = xe_gt_sysfs_kobj_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void gt_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+
+	kobject_put(gt->sysfs);
+}
+
+int xe_gt_sysfs_init(struct xe_gt *gt)
+{
+	struct device *dev = gt_to_xe(gt)->drm.dev;
+	struct kobj_gt *kg;
+	int err;
+
+	kg = kzalloc(sizeof(*kg), GFP_KERNEL);
+	if (!kg)
+		return -ENOMEM;
+
+	kobject_init(&kg->base, &xe_gt_sysfs_kobj_type);
+	kg->gt = gt;
+
+	err = kobject_add(&kg->base, &dev->kobj, "gt%d", gt->info.id);
+	if (err) {
+		kobject_put(&kg->base);
+		return err;
+	}
+
+	gt->sysfs = &kg->base;
+
+	err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_sysfs_fini, gt);
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h
new file mode 100644
index 000000000000..ecbfcc5c7d42
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_SYSFS_H_
+#define _XE_GT_SYSFS_H_
+
+#include "xe_gt_sysfs_types.h"
+
+int xe_gt_sysfs_init(struct xe_gt *gt);
+
+static inline struct xe_gt *
+kobj_to_gt(struct kobject *kobj)
+{
+	return container_of(kobj, struct kobj_gt, base)->gt;
+}
+
+#endif /* _XE_GT_SYSFS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs_types.h b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h
new file mode 100644
index 000000000000..d3bc6b83360f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_SYSFS_TYPES_H_
+#define _XE_GT_SYSFS_TYPES_H_
+
+#include <linux/kobject.h>
+
+struct xe_gt;
+
+/**
+ * struct kobj_gt - A GT's kobject struct that connects the kobject and the GT
+ *
+ * When dealing with multiple GTs, this struct helps to understand which GT
+ * needs to be addressed on a given sysfs call.
+ */
+struct kobj_gt {
+	/** @base: The actual kobject */
+	struct kobject base;
+	/** @gt: A pointer to the GT itself */
+	struct xe_gt *gt;
+};
+
+#endif	/* _XE_GT_SYSFS_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
new file mode 100644
index 000000000000..8e02e362ba27
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/bitmap.h>
+
+#include "xe_gt.h"
+#include "xe_gt_topology.h"
+#include "xe_mmio.h"
+
+#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
+#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
+
+#define XELP_EU_ENABLE				0x9134	/* "_DISABLE" on Xe_LP */
+#define   XELP_EU_MASK				REG_GENMASK(7, 0)
+#define XELP_GT_GEOMETRY_DSS_ENABLE		0x913c
+#define XEHP_GT_COMPUTE_DSS_ENABLE		0x9144
+#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT		0x9148
+
+static void
+load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
+{
+	va_list argp;
+	u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
+	int i;
+
+	if (drm_WARN_ON(&gt_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
+		numregs = XE_MAX_DSS_FUSE_REGS;
+
+	va_start(argp, numregs);
+	for (i = 0; i < numregs; i++)
+		fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, u32));
+	va_end(argp);
+
+	bitmap_from_arr32(mask, fuse_val, numregs * 32);
+}
+
+static void
+load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 reg = xe_mmio_read32(gt, XELP_EU_ENABLE);
+	u32 val = 0;
+	int i;
+
+	BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
+
+	/*
+	 * Pre-Xe_HP platforms inverted the bit meaning (disable instead
+	 * of enable).
+	 */
+	if (GRAPHICS_VERx100(xe) < 1250)
+		reg = ~reg & XELP_EU_MASK;
+
+	/* On PVC, one bit = one EU */
+	if (GRAPHICS_VERx100(xe) == 1260) {
+		val = reg;
+	} else {
+		/* All other platforms, one bit = 2 EU */
+		for (i = 0; i < fls(reg); i++)
+			if (reg & BIT(i))
+				val |= 0x3 << 2 * i;
+	}
+
+	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
+}
+
+void
+xe_gt_topology_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct drm_printer p = drm_debug_printer("GT topology");
+	int num_geometry_regs, num_compute_regs;
+
+	if (GRAPHICS_VERx100(xe) == 1260) {
+		num_geometry_regs = 0;
+		num_compute_regs = 2;
+	} else if (GRAPHICS_VERx100(xe) >= 1250) {
+		num_geometry_regs = 1;
+		num_compute_regs = 1;
+	} else {
+		num_geometry_regs = 1;
+		num_compute_regs = 0;
+	}
+
+	load_dss_mask(gt, gt->fuse_topo.g_dss_mask, num_geometry_regs,
+		      XELP_GT_GEOMETRY_DSS_ENABLE);
+	load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
+		      XEHP_GT_COMPUTE_DSS_ENABLE,
+		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT);
+	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
+
+	xe_gt_topology_dump(gt, &p);
+}
+
+unsigned int
+xe_gt_topology_count_dss(xe_dss_mask_t mask)
+{
+	return bitmap_weight(mask, XE_MAX_DSS_FUSE_BITS);
+}
+
+u64
+xe_gt_topology_dss_group_mask(xe_dss_mask_t mask, int grpsize)
+{
+	xe_dss_mask_t per_dss_mask = {};
+	u64 grpmask = 0;
+
+	WARN_ON(DIV_ROUND_UP(XE_MAX_DSS_FUSE_BITS, grpsize) > BITS_PER_TYPE(grpmask));
+
+	bitmap_fill(per_dss_mask, grpsize);
+	for (int i = 0; !bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); i++) {
+		if (bitmap_intersects(mask, per_dss_mask, grpsize))
+			grpmask |= BIT(i);
+
+		bitmap_shift_right(mask, mask, grpsize, XE_MAX_DSS_FUSE_BITS);
+	}
+
+	return grpmask;
+}
+
+void
+xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
+		   gt->fuse_topo.g_dss_mask);
+	drm_printf(p, "dss mask (compute):  %*pb\n", XE_MAX_DSS_FUSE_BITS,
+		   gt->fuse_topo.c_dss_mask);
+
+	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
+		   gt->fuse_topo.eu_mask_per_dss);
+
+}
+
+/*
+ * Used to obtain the index of the first DSS.  Can start searching from the
+ * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
+ * groupsize and groupnum are non-zero.
+ */
+unsigned int
+xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum)
+{
+	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
new file mode 100644
index 000000000000..7a0abc64084f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __XE_GT_TOPOLOGY_H__
+#define __XE_GT_TOPOLOGY_H__
+
+#include "xe_gt_types.h"
+
+struct drm_printer;
+
+void xe_gt_topology_init(struct xe_gt *gt);
+
+void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p);
+
+unsigned int
+xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum);
+
+#endif /* __XE_GT_TOPOLOGY_H__ */
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
new file mode 100644
index 000000000000..c80a9215098d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_TYPES_H_
+#define _XE_GT_TYPES_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence_types.h"
+#include "xe_reg_sr_types.h"
+#include "xe_sa_types.h"
+#include "xe_uc_types.h"
+
+struct xe_engine_ops;
+struct xe_ggtt;
+struct xe_migrate;
+struct xe_ring_ops;
+struct xe_ttm_gtt_mgr;
+struct xe_ttm_vram_mgr;
+
+enum xe_gt_type {
+	XE_GT_TYPE_UNINITIALIZED,
+	XE_GT_TYPE_MAIN,
+	XE_GT_TYPE_REMOTE,
+	XE_GT_TYPE_MEDIA,
+};
+
+#define XE_MAX_DSS_FUSE_REGS	2
+#define XE_MAX_EU_FUSE_REGS	1
+
+typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
+typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
+
+struct xe_mmio_range {
+	u32 start;
+	u32 end;
+};
+
+/*
+ * The hardware has multiple kinds of multicast register ranges that need
+ * special register steering (and future platforms are expected to add
+ * additional types).
+ *
+ * During driver startup, we initialize the steering control register to
+ * direct reads to a slice/subslice that are valid for the 'subslice' class
+ * of multicast registers.  If another type of steering does not have any
+ * overlap in valid steering targets with 'subslice' style registers, we will
+ * need to explicitly re-steer reads of registers of the other type.
+ *
+ * Only the replication types that may need additional non-default steering
+ * are listed here.
+ */
+enum xe_steering_type {
+	L3BANK,
+	MSLICE,
+	LNCF,
+	DSS,
+	OADDRM,
+
+	/*
+	 * On some platforms there are multiple types of MCR registers that
+	 * will always return a non-terminated value at instance (0, 0).  We'll
+	 * lump those all into a single category to keep things simple.
+	 */
+	INSTANCE0,
+
+	NUM_STEERING_TYPES
+};
+
+/**
+ * struct xe_gt - Top level struct of a graphics tile
+ *
+ * A graphics tile may be a physical split (duplicate pieces of silicon,
+ * different GGTT + VRAM) or a virtual split (shared GGTT + VRAM). Either way
+ * this structure encapsulates of everything a GT is (MMIO, VRAM, memory
+ * management, microcontrols, and a hardware set of engines).
+ */
+struct xe_gt {
+	/** @xe: backpointer to XE device */
+	struct xe_device *xe;
+
+	/** @info: GT info */
+	struct {
+		/** @type: type of GT */
+		enum xe_gt_type type;
+		/** @id: id of GT */
+		u8 id;
+		/** @vram: id of the VRAM for this GT */
+		u8 vram_id;
+		/** @clock_freq: clock frequency */
+		u32 clock_freq;
+		/** @engine_mask: mask of engines present on GT */
+		u64 engine_mask;
+	} info;
+
+	/**
+	 * @mmio: mmio info for GT, can be subset of the global device mmio
+	 * space
+	 */
+	struct {
+		/** @size: size of MMIO space on GT */
+		size_t size;
+		/** @regs: pointer to MMIO space on GT */
+		void *regs;
+		/** @fw: force wake for GT */
+		struct xe_force_wake fw;
+		/**
+		 * @adj_limit: adjust MMIO address if address is below this
+		 * value
+		 */
+		u32 adj_limit;
+		/** @adj_offset: offect to add to MMIO address when adjusting */
+		u32 adj_offset;
+	} mmio;
+
+	/**
+	 * @reg_sr: table with registers to be restored on GT init/resume/reset
+	 */
+	struct xe_reg_sr reg_sr;
+
+	/**
+	 * @mem: memory management info for GT, multiple GTs can point to same
+	 * objects (virtual split)
+	 */
+	struct {
+		/**
+		 * @vram: VRAM info for GT, multiple GTs can point to same info
+		 * (virtual split), can be subset of global device VRAM
+		 */
+		struct {
+			/** @io_start: start address of VRAM */
+			resource_size_t io_start;
+			/** @size: size of VRAM */
+			resource_size_t size;
+			/** @mapping: pointer to VRAM mappable space */
+			void *__iomem mapping;
+		} vram;
+		/** @vram_mgr: VRAM TTM manager */
+		struct xe_ttm_vram_mgr *vram_mgr;
+		/** @gtt_mr: GTT TTM manager */
+		struct xe_ttm_gtt_mgr *gtt_mgr;
+		/** @ggtt: Global graphics translation table */
+		struct xe_ggtt *ggtt;
+	} mem;
+
+	/** @reset: state for GT resets */
+	struct {
+		/**
+		 * @worker: work so GT resets can done async allowing to reset
+		 * code to safely flush all code paths
+		 */
+		struct work_struct worker;
+	} reset;
+
+	/** @usm: unified shared memory state */
+	struct {
+		/**
+		 * @bb_pool: Pool from which batchbuffers, for USM operations
+		 * (e.g. migrations, fixing page tables), are allocated.
+		 * Dedicated pool needed so USM operations to not get blocked
+		 * behind any user operations which may have resulted in a
+		 * fault.
+		 */
+		struct xe_sa_manager bb_pool;
+		/**
+		 * @reserved_bcs_instance: reserved BCS instance used for USM
+		 * operations (e.g. mmigrations, fixing page tables)
+		 */
+		u16 reserved_bcs_instance;
+		/**
+		 * @tlb_invalidation_seqno: TLB invalidation seqno, protected by
+		 * CT lock
+		 */
+#define TLB_INVALIDATION_SEQNO_MAX	0x100000
+		int tlb_invalidation_seqno;
+		/**
+		 * @tlb_invalidation_seqno_recv: last received TLB invalidation
+		 * seqno, protected by CT lock
+		 */
+		int tlb_invalidation_seqno_recv;
+		/** @pf_wq: page fault work queue, unbound, high priority */
+		struct workqueue_struct *pf_wq;
+		/** @acc_wq: access counter work queue, unbound, high priority */
+		struct workqueue_struct *acc_wq;
+		/**
+		 * @pf_queue: Page fault queue used to sync faults so faults can
+		 * be processed not under the GuC CT lock. The queue is sized so
+		 * it can sync all possible faults (1 per physical engine).
+		 * Multiple queues exists for page faults from different VMs are
+		 * be processed in parallel.
+		 */
+		struct pf_queue {
+			/** @gt: back pointer to GT */
+			struct xe_gt *gt;
+#define PF_QUEUE_NUM_DW	128
+			/** @data: data in the page fault queue */
+			u32 data[PF_QUEUE_NUM_DW];
+			/**
+			 * @head: head pointer in DWs for page fault queue,
+			 * moved by worker which processes faults.
+			 */
+			u16 head;
+			/**
+			 * @tail: tail pointer in DWs for page fault queue,
+			 * moved by G2H handler.
+			 */
+			u16 tail;
+			/** @lock: protects page fault queue */
+			spinlock_t lock;
+			/** @worker: to process page faults */
+			struct work_struct worker;
+#define NUM_PF_QUEUE	4
+		} pf_queue[NUM_PF_QUEUE];
+		/**
+		 * @acc_queue: Same as page fault queue, cannot process access
+		 * counters under CT lock.
+		 */
+		struct acc_queue {
+			/** @gt: back pointer to GT */
+			struct xe_gt *gt;
+#define ACC_QUEUE_NUM_DW	128
+			/** @data: data in the page fault queue */
+			u32 data[ACC_QUEUE_NUM_DW];
+			/**
+			 * @head: head pointer in DWs for page fault queue,
+			 * moved by worker which processes faults.
+			 */
+			u16 head;
+			/**
+			 * @tail: tail pointer in DWs for page fault queue,
+			 * moved by G2H handler.
+			 */
+			u16 tail;
+			/** @lock: protects page fault queue */
+			spinlock_t lock;
+			/** @worker: to process access counters */
+			struct work_struct worker;
+#define NUM_ACC_QUEUE	4
+		} acc_queue[NUM_ACC_QUEUE];
+	} usm;
+
+	/** @ordered_wq: used to serialize GT resets and TDRs */
+	struct workqueue_struct *ordered_wq;
+
+	/** @uc: micro controllers on the GT */
+	struct xe_uc uc;
+
+	/** @engine_ops: submission backend engine operations */
+	const struct xe_engine_ops *engine_ops;
+
+	/**
+	 * @ring_ops: ring operations for this hw engine (1 per engine class)
+	 */
+	const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX];
+
+	/** @fence_irq: fence IRQs (1 per engine class) */
+	struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX];
+
+	/** @default_lrc: default LRC state */
+	void *default_lrc[XE_ENGINE_CLASS_MAX];
+
+	/** @hw_engines: hardware engines on the GT */
+	struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES];
+
+	/** @kernel_bb_pool: Pool from which batchbuffers are allocated */
+	struct xe_sa_manager kernel_bb_pool;
+
+	/** @migrate: Migration helper for vram blits and clearing */
+	struct xe_migrate *migrate;
+
+	/** @pcode: GT's PCODE */
+	struct {
+		/** @lock: protecting GT's PCODE mailbox data */
+		struct mutex lock;
+	} pcode;
+
+	/** @sysfs: sysfs' kobj used by xe_gt_sysfs */
+	struct kobject *sysfs;
+
+	/** @mocs: info */
+	struct {
+		/** @uc_index: UC index */
+		u8 uc_index;
+		/** @wb_index: WB index, only used on L3_CCS platforms */
+		u8 wb_index;
+	} mocs;
+
+	/** @fuse_topo: GT topology reported by fuse registers */
+	struct {
+		/** @g_dss_mask: dual-subslices usable by geometry */
+		xe_dss_mask_t g_dss_mask;
+
+		/** @c_dss_mask: dual-subslices usable by compute */
+		xe_dss_mask_t c_dss_mask;
+
+		/** @eu_mask_per_dss: EU mask per DSS*/
+		xe_eu_mask_t eu_mask_per_dss;
+	} fuse_topo;
+
+	/** @steering: register steering for individual HW units */
+	struct {
+		/* @ranges: register ranges used for this steering type */
+		const struct xe_mmio_range *ranges;
+
+		/** @group_target: target to steer accesses to */
+		u16 group_target;
+		/** @instance_target: instance to steer accesses to */
+		u16 instance_target;
+	} steering[NUM_STEERING_TYPES];
+
+	/**
+	 * @mcr_lock: protects the MCR_SELECTOR register for the duration
+	 *    of a steered operation
+	 */
+	spinlock_t mcr_lock;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
new file mode 100644
index 000000000000..3c285d849ef6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -0,0 +1,875 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_guc.h"
+#include "xe_guc_ads.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_hwconfig.h"
+#include "xe_guc_log.h"
+#include "xe_guc_reg.h"
+#include "xe_guc_pc.h"
+#include "xe_guc_submit.h"
+#include "xe_gt.h"
+#include "xe_platform_types.h"
+#include "xe_uc_fw.h"
+#include "xe_wopcm.h"
+#include "xe_mmio.h"
+#include "xe_force_wake.h"
+#include "i915_reg_defs.h"
+#include "gt/intel_gt_regs.h"
+
+/* TODO: move to common file */
+#define GUC_PVC_MOCS_INDEX_MASK		REG_GENMASK(25, 24)
+#define PVC_MOCS_UC_INDEX		1
+#define PVC_GUC_MOCS_INDEX(index)	REG_FIELD_PREP(GUC_PVC_MOCS_INDEX_MASK,\
+						       index)
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+	return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static struct xe_device *
+guc_to_xe(struct xe_guc *guc)
+{
+	return gt_to_xe(guc_to_gt(guc));
+}
+
+/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
+#define GUC_GGTT_TOP    0xFEE00000
+static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
+			    struct xe_bo *bo)
+{
+	u32 addr = xe_bo_ggtt_addr(bo);
+
+	XE_BUG_ON(addr < xe_wopcm_size(guc_to_xe(guc)));
+	XE_BUG_ON(range_overflows_t(u32, addr, bo->size, GUC_GGTT_TOP));
+
+	return addr;
+}
+
+static u32 guc_ctl_debug_flags(struct xe_guc *guc)
+{
+	u32 level = xe_guc_log_get_level(&guc->log);
+	u32 flags = 0;
+
+	if (!GUC_LOG_LEVEL_IS_VERBOSE(level))
+		flags |= GUC_LOG_DISABLED;
+	else
+		flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) <<
+			 GUC_LOG_VERBOSITY_SHIFT;
+
+	return flags;
+}
+
+static u32 guc_ctl_feature_flags(struct xe_guc *guc)
+{
+	return GUC_CTL_ENABLE_SLPC;
+}
+
+static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
+{
+	u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> PAGE_SHIFT;
+	u32 flags;
+
+	#if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0)
+	#define LOG_UNIT SZ_1M
+	#define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS
+	#else
+	#define LOG_UNIT SZ_4K
+	#define LOG_FLAG 0
+	#endif
+
+	#if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0)
+	#define CAPTURE_UNIT SZ_1M
+	#define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS
+	#else
+	#define CAPTURE_UNIT SZ_4K
+	#define CAPTURE_FLAG 0
+	#endif
+
+	BUILD_BUG_ON(!CRASH_BUFFER_SIZE);
+	BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT));
+	BUILD_BUG_ON(!DEBUG_BUFFER_SIZE);
+	BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT));
+	BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE);
+	BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
+
+	BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) >
+			(GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT));
+	BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) >
+			(GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT));
+	BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) >
+			(GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT));
+
+	flags = GUC_LOG_VALID |
+		GUC_LOG_NOTIFY_ON_HALF_FULL |
+		CAPTURE_FLAG |
+		LOG_FLAG |
+		((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
+		((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
+		((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) <<
+		 GUC_LOG_CAPTURE_SHIFT) |
+		(offset << GUC_LOG_BUF_ADDR_SHIFT);
+
+	#undef LOG_UNIT
+	#undef LOG_FLAG
+	#undef CAPTURE_UNIT
+	#undef CAPTURE_FLAG
+
+	return flags;
+}
+
+static u32 guc_ctl_ads_flags(struct xe_guc *guc)
+{
+	u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT;
+	u32 flags = ads << GUC_ADS_ADDR_SHIFT;
+
+	return flags;
+}
+
+static u32 guc_ctl_wa_flags(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 flags = 0;
+
+	/* Wa_22012773006:gen11,gen12 < XeHP */
+	if (GRAPHICS_VER(xe) >= 11 &&
+	    GRAPHICS_VERx100(xe) < 1250)
+		flags |= GUC_WA_POLLCS;
+
+	/* Wa_16011759253 */
+	/* Wa_22011383443 */
+	if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_B0) ||
+	    IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0))
+		flags |= GUC_WA_GAM_CREDITS;
+
+	/* Wa_14014475959 */
+	if (IS_PLATFORM_STEP(xe, XE_METEORLAKE, STEP_A0, STEP_B0) ||
+	    xe->info.platform == XE_DG2)
+		flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
+
+	/*
+	 * Wa_14012197797
+	 * Wa_22011391025
+	 *
+	 * The same WA bit is used for both and 22011391025 is applicable to
+	 * all DG2.
+	 */
+	if (xe->info.platform == XE_DG2)
+		flags |= GUC_WA_DUAL_QUEUE;
+
+	/*
+	 * Wa_2201180203
+	 * GUC_WA_PRE_PARSER causes media workload hang for PVC A0 and PCIe
+	 * errors. Disable this for PVC A0 steppings.
+	 */
+	if (GRAPHICS_VER(xe) <= 12 &&
+	    !IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0))
+		flags |= GUC_WA_PRE_PARSER;
+
+	/* Wa_16011777198 */
+	if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) ||
+	    IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0,
+				STEP_B0))
+		flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
+
+	/*
+	 * Wa_22012727170
+	 * Wa_22012727685
+	 *
+	 * This WA is applicable to PVC CT A0, but causes media regressions. 
+	 * Drop the WA for PVC.
+	 */
+	if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) ||
+	    IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0,
+				STEP_FOREVER))
+		flags |= GUC_WA_CONTEXT_ISOLATION;
+
+	/* Wa_16015675438, Wa_18020744125 */
+	if (!xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER))
+		flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
+
+	/* Wa_1509372804 */
+	if (IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_C0))
+		flags |= GUC_WA_RENDER_RST_RC6_EXIT;
+
+
+	return flags;
+}
+
+static u32 guc_ctl_devid(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+
+	return (((u32)xe->info.devid) << 16) | xe->info.revid;
+}
+
+static void guc_init_params(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 *params = guc->params;
+	int i;
+
+	BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
+	BUILD_BUG_ON(SOFT_SCRATCH_COUNT != GUC_CTL_MAX_DWORDS + 2);
+
+	params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
+	params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
+	params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
+	params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc);
+	params[GUC_CTL_WA] = guc_ctl_wa_flags(guc);
+	params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
+
+	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+		drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]);
+}
+
+/*
+ * Initialise the GuC parameter block before starting the firmware
+ * transfer. These parameters are read by the firmware on startup
+ * and cannot be changed thereafter.
+ */
+void guc_write_params(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	int i;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	xe_mmio_write32(gt, SOFT_SCRATCH(0).reg, 0);
+
+	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+		xe_mmio_write32(gt, SOFT_SCRATCH(1 + i).reg, guc->params[i]);
+}
+
+#define MEDIA_GUC_HOST_INTERRUPT        _MMIO(0x190304)
+
+int xe_guc_init(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	int ret;
+
+	guc->fw.type = XE_UC_FW_TYPE_GUC;
+	ret = xe_uc_fw_init(&guc->fw);
+	if (ret)
+		goto out;
+
+	ret = xe_guc_log_init(&guc->log);
+	if (ret)
+		goto out;
+
+	ret = xe_guc_ads_init(&guc->ads);
+	if (ret)
+		goto out;
+
+	ret = xe_guc_ct_init(&guc->ct);
+	if (ret)
+		goto out;
+
+	ret = xe_guc_pc_init(&guc->pc);
+	if (ret)
+		goto out;
+
+	guc_init_params(guc);
+
+	if (xe_gt_is_media_type(gt))
+		guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT.reg;
+	else
+		guc->notify_reg = GEN11_GUC_HOST_INTERRUPT.reg;
+
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
+
+	return 0;
+
+out:
+	drm_err(&xe->drm, "GuC init failed with %d", ret);
+	return ret;
+}
+
+/**
+ * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load
+ * @guc: The GuC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_init_post_hwconfig(struct xe_guc *guc)
+{
+	return xe_guc_ads_init_post_hwconfig(&guc->ads);
+}
+
+int xe_guc_post_load_init(struct xe_guc *guc)
+{
+	xe_guc_ads_populate_post_load(&guc->ads);
+
+	return 0;
+}
+
+int xe_guc_reset(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 guc_status;
+	int ret;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_GUC);
+
+	ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5);
+	if (ret) {
+		drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n",
+			xe_mmio_read32(gt, GEN6_GDRST.reg));
+		goto err_out;
+	}
+
+	guc_status = xe_mmio_read32(gt, GUC_STATUS.reg);
+	if (!(guc_status & GS_MIA_IN_RESET)) {
+		drm_err(&xe->drm,
+			"GuC status: 0x%x, MIA core expected to be in reset\n",
+			guc_status);
+		ret = -EIO;
+		goto err_out;
+	}
+
+	return 0;
+
+err_out:
+
+	return ret;
+}
+
+static void guc_prepare_xfer(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_device *xe =  guc_to_xe(guc);
+	u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
+		GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
+		GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
+		GUC_ENABLE_MIA_CLOCK_GATING;
+
+	if (GRAPHICS_VERx100(xe) < 1250)
+		shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES |
+				GUC_ENABLE_MIA_CACHING;
+
+	if (xe->info.platform == XE_PVC)
+		shim_flags |= PVC_GUC_MOCS_INDEX(PVC_MOCS_UC_INDEX);
+
+	/* Must program this register before loading the ucode with DMA */
+	xe_mmio_write32(gt, GUC_SHIM_CONTROL.reg, shim_flags);
+
+	xe_mmio_write32(gt, GEN9_GT_PM_CONFIG.reg, GT_DOORBELL_ENABLE);
+}
+
+/*
+ * Supporting MMIO & in memory RSA
+ */
+static int guc_xfer_rsa(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 rsa[UOS_RSA_SCRATCH_COUNT];
+	size_t copied;
+	int i;
+
+	if (guc->fw.rsa_size > 256) {
+		u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) +
+				    xe_uc_fw_rsa_offset(&guc->fw);
+		xe_mmio_write32(gt, UOS_RSA_SCRATCH(0).reg, rsa_ggtt_addr);
+		return 0;
+	}
+
+	copied = xe_uc_fw_copy_rsa(&guc->fw, rsa, sizeof(rsa));
+	if (copied < sizeof(rsa))
+		return -ENOMEM;
+
+	for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
+		xe_mmio_write32(gt, UOS_RSA_SCRATCH(i).reg, rsa[i]);
+
+	return 0;
+}
+
+/*
+ * Read the GuC status register (GUC_STATUS) and store it in the
+ * specified location; then return a boolean indicating whether
+ * the value matches either of two values representing completion
+ * of the GuC boot process.
+ *
+ * This is used for polling the GuC status in a wait_for()
+ * loop below.
+ */
+static bool guc_ready(struct xe_guc *guc, u32 *status)
+{
+	u32 val = xe_mmio_read32(guc_to_gt(guc), GUC_STATUS.reg);
+	u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val);
+
+	*status = val;
+	return uk_val == XE_GUC_LOAD_STATUS_READY;
+}
+
+static int guc_wait_ucode(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 status;
+	int ret;
+
+	/*
+	 * Wait for the GuC to start up.
+	 * NB: Docs recommend not using the interrupt for completion.
+	 * Measurements indicate this should take no more than 20ms
+	 * (assuming the GT clock is at maximum frequency). So, a
+	 * timeout here indicates that the GuC has failed and is unusable.
+	 * (Higher levels of the driver may decide to reset the GuC and
+	 * attempt the ucode load again if this happens.)
+	 *
+	 * FIXME: There is a known (but exceedingly unlikely) race condition
+	 * where the asynchronous frequency management code could reduce
+	 * the GT clock while a GuC reload is in progress (during a full
+	 * GT reset). A fix is in progress but there are complex locking
+	 * issues to be resolved. In the meantime bump the timeout to
+	 * 200ms. Even at slowest clock, this should be sufficient. And
+	 * in the working case, a larger timeout makes no difference.
+	 */
+	ret = wait_for(guc_ready(guc, &status), 200);
+	if (ret) {
+		struct drm_device *drm = &xe->drm;
+		struct drm_printer p = drm_info_printer(drm->dev);
+
+		drm_info(drm, "GuC load failed: status = 0x%08X\n", status);
+		drm_info(drm, "GuC load failed: status: Reset = %d, "
+			"BootROM = 0x%02X, UKernel = 0x%02X, "
+			"MIA = 0x%02X, Auth = 0x%02X\n",
+			REG_FIELD_GET(GS_MIA_IN_RESET, status),
+			REG_FIELD_GET(GS_BOOTROM_MASK, status),
+			REG_FIELD_GET(GS_UKERNEL_MASK, status),
+			REG_FIELD_GET(GS_MIA_MASK, status),
+			REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+
+		if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
+			drm_info(drm, "GuC firmware signature verification failed\n");
+			ret = -ENOEXEC;
+		}
+
+		if (REG_FIELD_GET(GS_UKERNEL_MASK, status) ==
+		    XE_GUC_LOAD_STATUS_EXCEPTION) {
+			drm_info(drm, "GuC firmware exception. EIP: %#x\n",
+				 xe_mmio_read32(guc_to_gt(guc),
+						SOFT_SCRATCH(13).reg));
+			ret = -ENXIO;
+		}
+
+		xe_guc_log_print(&guc->log, &p);
+	} else {
+		drm_dbg(&xe->drm, "GuC successfully loaded");
+	}
+
+	return ret;
+}
+
+static int __xe_guc_upload(struct xe_guc *guc)
+{
+	int ret;
+
+	guc_write_params(guc);
+	guc_prepare_xfer(guc);
+
+	/*
+	 * Note that GuC needs the CSS header plus uKernel code to be copied
+	 * by the DMA engine in one operation, whereas the RSA signature is
+	 * loaded separately, either by copying it to the UOS_RSA_SCRATCH
+	 * register (if key size <= 256) or through a ggtt-pinned vma (if key
+	 * size > 256). The RSA size and therefore the way we provide it to the
+	 * HW is fixed for each platform and hard-coded in the bootrom.
+	 */
+	ret = guc_xfer_rsa(guc);
+	if (ret)
+		goto out;
+	/*
+	 * Current uCode expects the code to be loaded at 8k; locations below
+	 * this are used for the stack.
+	 */
+	ret = xe_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE);
+	if (ret)
+		goto out;
+
+	/* Wait for authentication */
+	ret = guc_wait_ucode(guc);
+	if (ret)
+		goto out;
+
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING);
+	return 0;
+
+out:
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+	return 0	/* FIXME: ret, don't want to stop load currently */;
+}
+
+/**
+ * xe_guc_min_load_for_hwconfig - load minimal GuC and read hwconfig table
+ * @guc: The GuC object
+ *
+ * This function uploads a minimal GuC that does not support submissions but
+ * in a state where the hwconfig table can be read. Next, it reads and parses
+ * the hwconfig table so it can be used for subsequent steps in the driver load.
+ * Lastly, it enables CT communication (XXX: this is needed for PFs/VFs only).
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_min_load_for_hwconfig(struct xe_guc *guc)
+{
+	int ret;
+
+	xe_guc_ads_populate_minimal(&guc->ads);
+
+	ret = __xe_guc_upload(guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_hwconfig_init(guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_enable_communication(guc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int xe_guc_upload(struct xe_guc *guc)
+{
+	xe_guc_ads_populate(&guc->ads);
+
+	return __xe_guc_upload(guc);
+}
+
+static void guc_handle_mmio_msg(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 msg;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	msg = xe_mmio_read32(gt, SOFT_SCRATCH(15).reg);
+	msg &= XE_GUC_RECV_MSG_EXCEPTION |
+		XE_GUC_RECV_MSG_CRASH_DUMP_POSTED;
+	xe_mmio_write32(gt, SOFT_SCRATCH(15).reg, 0);
+
+	if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED)
+		drm_err(&guc_to_xe(guc)->drm,
+			"Received early GuC crash dump notification!\n");
+
+	if (msg & XE_GUC_RECV_MSG_EXCEPTION)
+		drm_err(&guc_to_xe(guc)->drm,
+			"Received early GuC exception notification!\n");
+}
+
+void guc_enable_irq(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 events = xe_gt_is_media_type(gt) ?
+		REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST)  :
+		REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
+
+	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg,
+			REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST));
+	if (xe_gt_is_media_type(gt))
+		xe_mmio_rmw32(gt, GEN11_GUC_SG_INTR_MASK.reg, events, 0);
+	else
+		xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~events);
+}
+
+int xe_guc_enable_communication(struct xe_guc *guc)
+{
+	int err;
+
+	guc_enable_irq(guc);
+
+	xe_mmio_rmw32(guc_to_gt(guc), GEN6_PMINTRMSK.reg,
+		      ARAT_EXPIRED_INTRMSK, 0);
+
+	err = xe_guc_ct_enable(&guc->ct);
+	if (err)
+		return err;
+
+	guc_handle_mmio_msg(guc);
+
+	return 0;
+}
+
+int xe_guc_suspend(struct xe_guc *guc)
+{
+	int ret;
+	u32 action[] = {
+		XE_GUC_ACTION_CLIENT_SOFT_RESET,
+	};
+
+	ret = xe_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+	if (ret) {
+		drm_err(&guc_to_xe(guc)->drm,
+			"GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret);
+		return ret;
+	}
+
+	xe_guc_sanitize(guc);
+	return 0;
+}
+
+void xe_guc_notify(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+
+	xe_mmio_write32(gt, guc->notify_reg, GUC_SEND_TRIGGER);
+}
+
+int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_AUTHENTICATE_HUC,
+		rsa_addr
+	};
+
+	return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+#define MEDIA_SOFT_SCRATCH(n)           _MMIO(0x190310 + (n) * 4)
+#define MEDIA_SOFT_SCRATCH_COUNT        4
+
+int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 header;
+	u32 reply_reg = xe_gt_is_media_type(gt) ?
+		MEDIA_SOFT_SCRATCH(0).reg : GEN11_SOFT_SCRATCH(0).reg;
+	int ret;
+	int i;
+
+	XE_BUG_ON(guc->ct.enabled);
+	XE_BUG_ON(!len);
+	XE_BUG_ON(len > GEN11_SOFT_SCRATCH_COUNT);
+	XE_BUG_ON(len > MEDIA_SOFT_SCRATCH_COUNT);
+	XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) !=
+		  GUC_HXG_ORIGIN_HOST);
+	XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) !=
+		  GUC_HXG_TYPE_REQUEST);
+
+retry:
+	/* Not in critical data-path, just do if else for GT type */
+	if (xe_gt_is_media_type(gt)) {
+		for (i = 0; i < len; ++i)
+			xe_mmio_write32(gt, MEDIA_SOFT_SCRATCH(i).reg,
+					request[i]);
+#define LAST_INDEX	MEDIA_SOFT_SCRATCH_COUNT - 1
+		xe_mmio_read32(gt, MEDIA_SOFT_SCRATCH(LAST_INDEX).reg);
+	} else {
+		for (i = 0; i < len; ++i)
+			xe_mmio_write32(gt, GEN11_SOFT_SCRATCH(i).reg,
+					request[i]);
+#undef LAST_INDEX
+#define LAST_INDEX	GEN11_SOFT_SCRATCH_COUNT - 1
+		xe_mmio_read32(gt, GEN11_SOFT_SCRATCH(LAST_INDEX).reg);
+	}
+
+	xe_guc_notify(guc);
+
+	ret = xe_mmio_wait32(gt, reply_reg,
+			     FIELD_PREP(GUC_HXG_MSG_0_ORIGIN,
+					GUC_HXG_ORIGIN_GUC),
+			     GUC_HXG_MSG_0_ORIGIN,
+			     50);
+	if (ret) {
+timeout:
+		drm_err(&xe->drm, "mmio request 0x%08x: no reply 0x%08x\n",
+			request[0], xe_mmio_read32(gt, reply_reg));
+		return ret;
+	}
+
+	header = xe_mmio_read32(gt, reply_reg);
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+	    GUC_HXG_TYPE_NO_RESPONSE_BUSY) {
+#define done ({ header = xe_mmio_read32(gt, reply_reg); \
+		FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != \
+		GUC_HXG_ORIGIN_GUC || \
+		FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != \
+		GUC_HXG_TYPE_NO_RESPONSE_BUSY; })
+
+		ret = wait_for(done, 1000);
+		if (unlikely(ret))
+			goto timeout;
+		if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
+				       GUC_HXG_ORIGIN_GUC))
+			goto proto;
+#undef done
+	}
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+	    GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+		u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header);
+
+		drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %u\n",
+			request[0], reason);
+		goto retry;
+	}
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+	    GUC_HXG_TYPE_RESPONSE_FAILURE) {
+		u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header);
+		u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header);
+
+		drm_err(&xe->drm, "mmio request %#x: failure %x/%u\n",
+			request[0], error, hint);
+		return -ENXIO;
+	}
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) !=
+	    GUC_HXG_TYPE_RESPONSE_SUCCESS) {
+proto:
+		drm_err(&xe->drm, "mmio request %#x: unexpected reply %#x\n",
+			request[0], header);
+		return -EPROTO;
+	}
+
+	/* Use data from the GuC response as our return value */
+	return FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header);
+}
+
+static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val)
+{
+	u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+			   GUC_ACTION_HOST2GUC_SELF_CFG),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY, key) |
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN, len),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32,
+			   lower_32_bits(val)),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64,
+			   upper_32_bits(val)),
+	};
+	int ret;
+
+	XE_BUG_ON(len > 2);
+	XE_BUG_ON(len == 1 && upper_32_bits(val));
+
+	/* Self config must go over MMIO */
+	ret = xe_guc_send_mmio(guc, request, ARRAY_SIZE(request));
+
+	if (unlikely(ret < 0))
+		return ret;
+	if (unlikely(ret > 1))
+		return -EPROTO;
+	if (unlikely(!ret))
+		return -ENOKEY;
+
+	return 0;
+}
+
+int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val)
+{
+	return guc_self_cfg(guc, key, 1, val);
+}
+
+int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val)
+{
+	return guc_self_cfg(guc, key, 2, val);
+}
+
+void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir)
+{
+	if (iir & GUC_INTR_GUC2HOST)
+		xe_guc_ct_irq_handler(&guc->ct);
+}
+
+void xe_guc_sanitize(struct xe_guc *guc)
+{
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
+	xe_guc_ct_disable(&guc->ct);
+}
+
+int xe_guc_reset_prepare(struct xe_guc *guc)
+{
+	return xe_guc_submit_reset_prepare(guc);
+}
+
+void xe_guc_reset_wait(struct xe_guc *guc)
+{
+	xe_guc_submit_reset_wait(guc);
+}
+
+void xe_guc_stop_prepare(struct xe_guc *guc)
+{
+	XE_WARN_ON(xe_guc_pc_stop(&guc->pc));
+}
+
+int xe_guc_stop(struct xe_guc *guc)
+{
+	int ret;
+
+	xe_guc_ct_disable(&guc->ct);
+
+	ret = xe_guc_submit_stop(guc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int xe_guc_start(struct xe_guc *guc)
+{
+	int ret;
+
+	ret = xe_guc_submit_start(guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_pc_start(&guc->pc);
+	XE_WARN_ON(ret);
+
+	return 0;
+}
+
+void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 status;
+	int err;
+	int i;
+
+	xe_uc_fw_print(&guc->fw, p);
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return;
+
+	status = xe_mmio_read32(gt, GUC_STATUS.reg);
+
+	drm_printf(p, "\nGuC status 0x%08x:\n", status);
+	drm_printf(p, "\tBootrom status = 0x%x\n",
+		   (status & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
+	drm_printf(p, "\tuKernel status = 0x%x\n",
+		   (status & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
+	drm_printf(p, "\tMIA Core status = 0x%x\n",
+		   (status & GS_MIA_MASK) >> GS_MIA_SHIFT);
+	drm_printf(p, "\tLog level = %d\n",
+		   xe_guc_log_get_level(&guc->log));
+
+	drm_puts(p, "\nScratch registers:\n");
+	for (i = 0; i < SOFT_SCRATCH_COUNT; i++) {
+		drm_printf(p, "\t%2d: \t0x%x\n",
+			   i, xe_mmio_read32(gt, SOFT_SCRATCH(i).reg));
+	}
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+
+	xe_guc_ct_print(&guc->ct, p);
+	xe_guc_submit_print(guc, p);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
new file mode 100644
index 000000000000..72b71d75566c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_H_
+#define _XE_GUC_H_
+
+#include "xe_hw_engine_types.h"
+#include "xe_guc_types.h"
+#include "xe_macros.h"
+
+struct drm_printer;
+
+int xe_guc_init(struct xe_guc *guc);
+int xe_guc_init_post_hwconfig(struct xe_guc *guc);
+int xe_guc_post_load_init(struct xe_guc *guc);
+int xe_guc_reset(struct xe_guc *guc);
+int xe_guc_upload(struct xe_guc *guc);
+int xe_guc_min_load_for_hwconfig(struct xe_guc *guc);
+int xe_guc_enable_communication(struct xe_guc *guc);
+int xe_guc_suspend(struct xe_guc *guc);
+void xe_guc_notify(struct xe_guc *guc);
+int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr);
+int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len);
+int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val);
+int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val);
+void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir);
+void xe_guc_sanitize(struct xe_guc *guc);
+void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p);
+int xe_guc_reset_prepare(struct xe_guc *guc);
+void xe_guc_reset_wait(struct xe_guc *guc);
+void xe_guc_stop_prepare(struct xe_guc *guc);
+int xe_guc_stop(struct xe_guc *guc);
+int xe_guc_start(struct xe_guc *guc);
+
+static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
+{
+	switch (class) {
+	case XE_ENGINE_CLASS_RENDER:
+		return GUC_RENDER_CLASS;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+		return GUC_VIDEO_CLASS;
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		return GUC_VIDEOENHANCE_CLASS;
+	case XE_ENGINE_CLASS_COPY:
+		return GUC_BLITTER_CLASS;
+	case XE_ENGINE_CLASS_COMPUTE:
+		return GUC_COMPUTE_CLASS;
+	case XE_ENGINE_CLASS_OTHER:
+	default:
+		XE_WARN_ON(class);
+		return -1;
+	}
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
new file mode 100644
index 000000000000..0c08cecaca40
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -0,0 +1,676 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ads.h"
+#include "xe_guc_reg.h"
+#include "xe_hw_engine.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_engine_regs.h"
+
+/* Slack of a few additional entries per engine */
+#define ADS_REGSET_EXTRA_MAX	8
+
+static struct xe_guc *
+ads_to_guc(struct xe_guc_ads *ads)
+{
+	return container_of(ads, struct xe_guc, ads);
+}
+
+static struct xe_gt *
+ads_to_gt(struct xe_guc_ads *ads)
+{
+	return container_of(ads, struct xe_gt, uc.guc.ads);
+}
+
+static struct xe_device *
+ads_to_xe(struct xe_guc_ads *ads)
+{
+	return gt_to_xe(ads_to_gt(ads));
+}
+
+static struct iosys_map *
+ads_to_map(struct xe_guc_ads *ads)
+{
+	return &ads->bo->vmap;
+}
+
+/* UM Queue parameters: */
+#define GUC_UM_QUEUE_SIZE       (SZ_64K)
+#define GUC_PAGE_RES_TIMEOUT_US (-1)
+
+/*
+ * The Additional Data Struct (ADS) has pointers for different buffers used by
+ * the GuC. One single gem object contains the ADS struct itself (guc_ads) and
+ * all the extra buffers indirectly linked via the ADS struct's entries.
+ *
+ * Layout of the ADS blob allocated for the GuC:
+ *
+ *      +---------------------------------------+ <== base
+ *      | guc_ads                               |
+ *      +---------------------------------------+
+ *      | guc_policies                          |
+ *      +---------------------------------------+
+ *      | guc_gt_system_info                    |
+ *      +---------------------------------------+
+ *      | guc_engine_usage                      |
+ *      +---------------------------------------+
+ *      | guc_um_init_params                    |
+ *      +---------------------------------------+ <== static
+ *      | guc_mmio_reg[countA] (engine 0.0)     |
+ *      | guc_mmio_reg[countB] (engine 0.1)     |
+ *      | guc_mmio_reg[countC] (engine 1.0)     |
+ *      |   ...                                 |
+ *      +---------------------------------------+ <== dynamic
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | golden contexts                       |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | capture lists                         |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | UM queues                             |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | private data                          |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ */
+struct __guc_ads_blob {
+	struct guc_ads ads;
+	struct guc_policies policies;
+	struct guc_gt_system_info system_info;
+	struct guc_engine_usage engine_usage;
+	struct guc_um_init_params um_init_params;
+	/* From here on, location is dynamic! Refer to above diagram. */
+	struct guc_mmio_reg regset[0];
+} __packed;
+
+#define ads_blob_read(ads_, field_) \
+	xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
+			struct __guc_ads_blob, field_)
+
+#define ads_blob_write(ads_, field_, val_)			\
+	xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0,	\
+			struct __guc_ads_blob, field_, val_)
+
+#define info_map_write(xe_, map_, field_, val_) \
+	xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_)
+
+#define info_map_read(xe_, map_, field_) \
+	xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_)
+
+static size_t guc_ads_regset_size(struct xe_guc_ads *ads)
+{
+	XE_BUG_ON(!ads->regset_size);
+
+	return ads->regset_size;
+}
+
+static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads)
+{
+	return PAGE_ALIGN(ads->golden_lrc_size);
+}
+
+static size_t guc_ads_capture_size(struct xe_guc_ads *ads)
+{
+	/* FIXME: Allocate a proper capture list */
+	return PAGE_ALIGN(PAGE_SIZE);
+}
+
+static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+
+	if (!xe->info.supports_usm)
+		return 0;
+
+	return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX;
+}
+
+static size_t guc_ads_private_data_size(struct xe_guc_ads *ads)
+{
+	return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size);
+}
+
+static size_t guc_ads_regset_offset(struct xe_guc_ads *ads)
+{
+	return offsetof(struct __guc_ads_blob, regset);
+}
+
+static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads)
+{
+	size_t offset;
+
+	offset = guc_ads_regset_offset(ads) +
+		guc_ads_regset_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_capture_offset(struct xe_guc_ads *ads)
+{
+	size_t offset;
+
+	offset = guc_ads_golden_lrc_offset(ads) +
+		guc_ads_golden_lrc_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads)
+{
+	u32 offset;
+
+	offset = guc_ads_capture_offset(ads) +
+		 guc_ads_capture_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads)
+{
+	size_t offset;
+
+	offset = guc_ads_um_queues_offset(ads) +
+		guc_ads_um_queues_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_size(struct xe_guc_ads *ads)
+{
+	return guc_ads_private_data_offset(ads) +
+		guc_ads_private_data_size(ads);
+}
+
+static void guc_ads_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_ads *ads = arg;
+
+	xe_bo_unpin_map_no_vm(ads->bo);
+}
+
+static size_t calculate_regset_size(struct xe_gt *gt)
+{
+	struct xe_reg_sr_entry *sr_entry;
+	unsigned long sr_idx;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	unsigned int count = 0;
+
+	for_each_hw_engine(hwe, gt, id)
+		xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry)
+			count++;
+
+	count += (ADS_REGSET_EXTRA_MAX + LNCFCMOCS_REG_COUNT) * XE_NUM_HW_ENGINES;
+
+	return count * sizeof(struct guc_mmio_reg);
+}
+
+static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 mask = 0;
+
+	for_each_hw_engine(hwe, gt, id)
+		if (hwe->class == class)
+			mask |= BIT(hwe->instance);
+
+	return mask;
+}
+
+static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	size_t total_size = 0, alloc_size, real_size;
+	int class;
+
+	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+		if (class == XE_ENGINE_CLASS_OTHER)
+			continue;
+
+		if (!engine_enable_mask(gt, class))
+			continue;
+
+		real_size = xe_lrc_size(xe, class);
+		alloc_size = PAGE_ALIGN(real_size);
+		total_size += alloc_size;
+	}
+
+	return total_size;
+}
+
+#define MAX_GOLDEN_LRC_SIZE	(SZ_4K * 64)
+
+int xe_guc_ads_init(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct xe_bo *bo;
+	int err;
+
+	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
+	ads->regset_size = calculate_regset_size(gt);
+
+	bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ads_size(ads) +
+				  MAX_GOLDEN_LRC_SIZE,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	ads->bo = bo;
+
+	err = drmm_add_action_or_reset(&xe->drm, guc_ads_fini, ads);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/**
+ * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load
+ * @ads: Additional data structures object
+ *
+ * Recalcuate golden_lrc_size & regset_size as the number hardware engines may
+ * have changed after the hwconfig was loaded. Also verify the new sizes fit in
+ * the already allocated ADS buffer object.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
+{
+	struct xe_gt *gt = ads_to_gt(ads);
+	u32 prev_regset_size = ads->regset_size;
+
+	XE_BUG_ON(!ads->bo);
+
+	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
+	ads->regset_size = calculate_regset_size(gt);
+
+	XE_WARN_ON(ads->golden_lrc_size +
+		   (ads->regset_size - prev_regset_size) >
+		   MAX_GOLDEN_LRC_SIZE);
+
+	return 0;
+}
+
+static void guc_policies_init(struct xe_guc_ads *ads)
+{
+	ads_blob_write(ads, policies.dpc_promote_time,
+		       GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
+	ads_blob_write(ads, policies.max_num_work_items,
+		       GLOBAL_POLICY_MAX_NUM_WI);
+	ads_blob_write(ads, policies.global_flags, 0);
+	ads_blob_write(ads, policies.is_valid, 1);
+}
+
+static void fill_engine_enable_masks(struct xe_gt *gt,
+				     struct iosys_map *info_map)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_COPY));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE));
+	info_map_write(xe, info_map,
+		       engine_enabled_masks[GUC_VIDEOENHANCE_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE));
+}
+
+static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	u8 guc_class;
+
+	for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) {
+		if (!info_map_read(xe, &info_map,
+				   engine_enabled_masks[guc_class]))
+			continue;
+
+		ads_blob_write(ads, ads.eng_state_size[guc_class],
+			       guc_ads_golden_lrc_size(ads) -
+			       xe_lrc_skip_size(xe));
+		ads_blob_write(ads, ads.golden_context_lrca[guc_class],
+			       xe_bo_ggtt_addr(ads->bo) +
+			       guc_ads_golden_lrc_offset(ads));
+	}
+}
+
+static void guc_mapping_table_init_invalid(struct xe_gt *gt,
+					   struct iosys_map *info_map)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	unsigned int i, j;
+
+	/* Table must be set to invalid values for entries not used */
+	for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
+		for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
+			info_map_write(xe, info_map, mapping_table[i][j],
+				       GUC_MAX_INSTANCES_PER_CLASS);
+}
+
+static void guc_mapping_table_init(struct xe_gt *gt,
+				   struct iosys_map *info_map)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	guc_mapping_table_init_invalid(gt, info_map);
+
+	for_each_hw_engine(hwe, gt, id) {
+		u8 guc_class;
+
+		guc_class = xe_engine_class_to_guc_class(hwe->class);
+		info_map_write(xe, info_map,
+			       mapping_table[guc_class][hwe->logical_instance],
+			       hwe->instance);
+	}
+}
+
+static void guc_capture_list_init(struct xe_guc_ads *ads)
+{
+	int i, j;
+	u32 addr = xe_bo_ggtt_addr(ads->bo) + guc_ads_capture_offset(ads);
+
+	/* FIXME: Populate a proper capture list */
+	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
+		for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
+			ads_blob_write(ads, ads.capture_instance[i][j], addr);
+			ads_blob_write(ads, ads.capture_class[i][j], addr);
+		}
+
+		ads_blob_write(ads, ads.capture_global[i], addr);
+	}
+}
+
+static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
+				      struct iosys_map *regset_map,
+				      u32 reg, u32 flags,
+				      unsigned int n_entry)
+{
+	struct guc_mmio_reg entry = {
+		.offset = reg,
+		.flags = flags,
+		/* TODO: steering */
+	};
+
+	xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
+			 &entry, sizeof(entry));
+}
+
+static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
+					  struct iosys_map *regset_map,
+					  struct xe_hw_engine *hwe)
+{
+	struct xe_hw_engine *hwe_rcs_reset_domain =
+		xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER);
+	struct xe_reg_sr_entry *entry;
+	unsigned long idx;
+	unsigned count = 0;
+	const struct {
+		u32 reg;
+		u32 flags;
+		bool skip;
+	} *e, extra_regs[] = {
+		{ .reg = RING_MODE_GEN7(hwe->mmio_base).reg,		},
+		{ .reg = RING_HWS_PGA(hwe->mmio_base).reg,		},
+		{ .reg = RING_IMR(hwe->mmio_base).reg,			},
+		{ .reg = GEN12_RCU_MODE.reg, .flags = 0x3,
+		  .skip = hwe != hwe_rcs_reset_domain			},
+	};
+	u32 i;
+
+	BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX);
+
+	xa_for_each(&hwe->reg_sr.xa, idx, entry) {
+		u32 flags = entry->masked_reg ? GUC_REGSET_MASKED : 0;
+
+		guc_mmio_regset_write_one(ads, regset_map, idx, flags, count++);
+	}
+
+	for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) {
+		if (e->skip)
+			continue;
+
+		guc_mmio_regset_write_one(ads, regset_map,
+					  e->reg, e->flags, count++);
+	}
+
+	for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
+		guc_mmio_regset_write_one(ads, regset_map,
+					  GEN9_LNCFCMOCS(i).reg, 0, count++);
+	}
+
+	XE_BUG_ON(ads->regset_size < (count * sizeof(struct guc_mmio_reg)));
+
+	return count;
+}
+
+static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
+{
+	size_t regset_offset = guc_ads_regset_offset(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset;
+	struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+							    regset_offset);
+
+	for_each_hw_engine(hwe, gt, id) {
+		unsigned int count;
+		u8 gc;
+
+		/*
+		 * 1. Write all MMIO entries for this engine to the table. No
+		 * need to worry about fused-off engines and when there are
+		 * entries in the regset: the reg_state_list has been zero'ed
+		 * by xe_guc_ads_populate()
+		 */
+		count = guc_mmio_regset_write(ads, &regset_map, hwe);
+		if (!count)
+			continue;
+
+		/*
+		 * 2. Record in the header (ads.reg_state_list) the address
+		 * location and number of entries
+		 */
+		gc = xe_engine_class_to_guc_class(hwe->class);
+		ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr);
+		ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count);
+
+		addr += count * sizeof(struct guc_mmio_reg);
+		iosys_map_incr(&regset_map, count * sizeof(struct guc_mmio_reg));
+	}
+}
+
+static void guc_um_init_params(struct xe_guc_ads *ads)
+{
+	u32 um_queue_offset = guc_ads_um_queues_offset(ads);
+	u64 base_dpa;
+	u32 base_ggtt;
+	int i;
+
+	base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset;
+	base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset;
+
+	for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) {
+		ads_blob_write(ads, um_init_params.queue_params[i].base_dpa,
+			       base_dpa + (i * GUC_UM_QUEUE_SIZE));
+		ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address,
+			       base_ggtt + (i * GUC_UM_QUEUE_SIZE));
+		ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes,
+			       GUC_UM_QUEUE_SIZE);
+	}
+
+	ads_blob_write(ads, um_init_params.page_response_timeout_in_us,
+		       GUC_PAGE_RES_TIMEOUT_US);
+}
+
+static void guc_doorbell_init(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+
+	if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
+		u32 distdbreg =
+			xe_mmio_read32(gt, GEN12_DIST_DBS_POPULATED.reg);
+
+		ads_blob_write(ads,
+			       system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
+			       ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT)
+				& GEN12_DOORBELLS_PER_SQIDI) + 1);
+	}
+}
+
+/**
+ * xe_guc_ads_populate_minimal - populate minimal ADS
+ * @ads: Additional data structures object
+ *
+ * This function populates a minimal ADS that does not support submissions but
+ * enough so the GuC can load and the hwconfig table can be read.
+ */
+void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
+{
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	u32 base = xe_bo_ggtt_addr(ads->bo);
+
+	XE_BUG_ON(!ads->bo);
+
+	xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
+	guc_policies_init(ads);
+	guc_prep_golden_lrc_null(ads);
+	guc_mapping_table_init_invalid(gt, &info_map);
+	guc_doorbell_init(ads);
+
+	ads_blob_write(ads, ads.scheduler_policies, base +
+		       offsetof(struct __guc_ads_blob, policies));
+	ads_blob_write(ads, ads.gt_system_info, base +
+		       offsetof(struct __guc_ads_blob, system_info));
+	ads_blob_write(ads, ads.private_data, base +
+		       guc_ads_private_data_offset(ads));
+}
+
+void xe_guc_ads_populate(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	u32 base = xe_bo_ggtt_addr(ads->bo);
+
+	XE_BUG_ON(!ads->bo);
+
+	xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
+	guc_policies_init(ads);
+	fill_engine_enable_masks(gt, &info_map);
+	guc_mmio_reg_state_init(ads);
+	guc_prep_golden_lrc_null(ads);
+	guc_mapping_table_init(gt, &info_map);
+	guc_capture_list_init(ads);
+	guc_doorbell_init(ads);
+
+	if (xe->info.supports_usm) {
+		guc_um_init_params(ads);
+		ads_blob_write(ads, ads.um_init_data, base +
+			       offsetof(struct __guc_ads_blob, um_init_params));
+	}
+
+	ads_blob_write(ads, ads.scheduler_policies, base +
+		       offsetof(struct __guc_ads_blob, policies));
+	ads_blob_write(ads, ads.gt_system_info, base +
+		       offsetof(struct __guc_ads_blob, system_info));
+	ads_blob_write(ads, ads.private_data, base +
+		       guc_ads_private_data_offset(ads));
+}
+
+static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	size_t total_size = 0, alloc_size, real_size;
+	u32 addr_ggtt, offset;
+	int class;
+
+	offset = guc_ads_golden_lrc_offset(ads);
+	addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
+
+	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+		u8 guc_class;
+
+		if (class == XE_ENGINE_CLASS_OTHER)
+			continue;
+
+		guc_class = xe_engine_class_to_guc_class(class);
+
+		if (!info_map_read(xe, &info_map,
+				   engine_enabled_masks[guc_class]))
+			continue;
+
+		XE_BUG_ON(!gt->default_lrc[class]);
+
+		real_size = xe_lrc_size(xe, class);
+		alloc_size = PAGE_ALIGN(real_size);
+		total_size += alloc_size;
+
+		/*
+		 * This interface is slightly confusing. We need to pass the
+		 * base address of the full golden context and the size of just
+		 * the engine state, which is the section of the context image
+		 * that starts after the execlists LRC registers. This is
+		 * required to allow the GuC to restore just the engine state
+		 * when a watchdog reset occurs.
+		 * We calculate the engine state size by removing the size of
+		 * what comes before it in the context image (which is identical
+		 * on all engines).
+		 */
+		ads_blob_write(ads, ads.eng_state_size[guc_class],
+			       real_size - xe_lrc_skip_size(xe));
+		ads_blob_write(ads, ads.golden_context_lrca[guc_class],
+			       addr_ggtt);
+
+		xe_map_memcpy_to(xe, ads_to_map(ads), offset,
+				 gt->default_lrc[class], real_size);
+
+		addr_ggtt += alloc_size;
+		offset += alloc_size;
+	}
+
+	XE_BUG_ON(total_size != ads->golden_lrc_size);
+}
+
+void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
+{
+	guc_populate_golden_lrc(ads);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h
new file mode 100644
index 000000000000..138ef6267671
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ADS_H_
+#define _XE_GUC_ADS_H_
+
+#include "xe_guc_ads_types.h"
+
+int xe_guc_ads_init(struct xe_guc_ads *ads);
+int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads);
+void xe_guc_ads_populate(struct xe_guc_ads *ads);
+void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads);
+void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h
new file mode 100644
index 000000000000..4afe44bece4b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ADS_TYPES_H_
+#define _XE_GUC_ADS_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/**
+ * struct xe_guc_ads - GuC additional data structures (ADS)
+ */
+struct xe_guc_ads {
+	/** @bo: XE BO for GuC ads blob */
+	struct xe_bo *bo;
+	/** @golden_lrc_size: golden LRC size */
+	size_t golden_lrc_size;
+	/** @regset_size: size of register set passed to GuC for save/restore */
+	u32 regset_size;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
new file mode 100644
index 000000000000..61a424c41779
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -0,0 +1,1196 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_gt_pagefault.h"
+#include "xe_guc_submit.h"
+#include "xe_map.h"
+#include "xe_trace.h"
+
+/* Used when a CT send wants to block and / or receive data */
+struct g2h_fence {
+	wait_queue_head_t wq;
+	u32 *response_buffer;
+	u32 seqno;
+	u16 response_len;
+	u16 error;
+	u16 hint;
+	u16 reason;
+	bool retry;
+	bool fail;
+	bool done;
+};
+
+static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
+{
+	g2h_fence->response_buffer = response_buffer;
+	g2h_fence->response_len = 0;
+	g2h_fence->fail = false;
+	g2h_fence->retry = false;
+	g2h_fence->done = false;
+	g2h_fence->seqno = ~0x0;
+}
+
+static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
+{
+	return g2h_fence->seqno == ~0x0;
+}
+
+static struct xe_guc *
+ct_to_guc(struct xe_guc_ct *ct)
+{
+	return container_of(ct, struct xe_guc, ct);
+}
+
+static struct xe_gt *
+ct_to_gt(struct xe_guc_ct *ct)
+{
+	return container_of(ct, struct xe_gt, uc.guc.ct);
+}
+
+static struct xe_device *
+ct_to_xe(struct xe_guc_ct *ct)
+{
+	return gt_to_xe(ct_to_gt(ct));
+}
+
+/**
+ * DOC: GuC CTB Blob
+ *
+ * We allocate single blob to hold both CTB descriptors and buffers:
+ *
+ *      +--------+-----------------------------------------------+------+
+ *      | offset | contents                                      | size |
+ *      +========+===============================================+======+
+ *      | 0x0000 | H2G CTB Descriptor (send)                     |      |
+ *      +--------+-----------------------------------------------+  4K  |
+ *      | 0x0800 | G2H CTB Descriptor (g2h)                      |      |
+ *      +--------+-----------------------------------------------+------+
+ *      | 0x1000 | H2G CT Buffer (send)                          | n*4K |
+ *      |        |                                               |      |
+ *      +--------+-----------------------------------------------+------+
+ *      | 0x1000 | G2H CT Buffer (g2h)                           | m*4K |
+ *      | + n*4K |                                               |      |
+ *      +--------+-----------------------------------------------+------+
+ *
+ * Size of each ``CT Buffer`` must be multiple of 4K.
+ * We don't expect too many messages in flight at any time, unless we are
+ * using the GuC submission. In that case each request requires a minimum
+ * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this
+ * enough space to avoid backpressure on the driver. We increase the size
+ * of the receive buffer (relative to the send) to ensure a G2H response
+ * CTB has a landing spot.
+ */
+
+#define CTB_DESC_SIZE		ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
+#define CTB_H2G_BUFFER_SIZE	(SZ_4K)
+#define CTB_G2H_BUFFER_SIZE	(4 * CTB_H2G_BUFFER_SIZE)
+#define G2H_ROOM_BUFFER_SIZE	(CTB_G2H_BUFFER_SIZE / 4)
+
+static size_t guc_ct_size(void)
+{
+	return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
+		CTB_G2H_BUFFER_SIZE;
+}
+
+static void guc_ct_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_ct *ct = arg;
+
+	xa_destroy(&ct->fence_lookup);
+	xe_bo_unpin_map_no_vm(ct->bo);
+}
+
+static void g2h_worker_func(struct work_struct *w);
+
+static void primelockdep(struct xe_guc_ct *ct)
+{
+	if (!IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+
+	fs_reclaim_acquire(GFP_KERNEL);
+	might_lock(&ct->lock);
+	fs_reclaim_release(GFP_KERNEL);
+}
+
+int xe_guc_ct_init(struct xe_guc_ct *ct)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_gt *gt = ct_to_gt(ct);
+	struct xe_bo *bo;
+	int err;
+
+	XE_BUG_ON(guc_ct_size() % PAGE_SIZE);
+
+	mutex_init(&ct->lock);
+	spin_lock_init(&ct->fast_lock);
+	xa_init(&ct->fence_lookup);
+	ct->fence_context = dma_fence_context_alloc(1);
+	INIT_WORK(&ct->g2h_worker, g2h_worker_func);
+	init_waitqueue_head(&ct->wq);
+
+	primelockdep(ct);
+
+	bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ct_size(),
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	ct->bo = bo;
+
+	err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+#define desc_read(xe_, guc_ctb__, field_)			\
+	xe_map_rd_field(xe_, &guc_ctb__->desc, 0,		\
+			struct guc_ct_buffer_desc, field_)
+
+#define desc_write(xe_, guc_ctb__, field_, val_)		\
+	xe_map_wr_field(xe_, &guc_ctb__->desc, 0,		\
+			struct guc_ct_buffer_desc, field_, val_)
+
+static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
+				struct iosys_map *map)
+{
+	h2g->size = CTB_H2G_BUFFER_SIZE / sizeof(u32);
+	h2g->resv_space = 0;
+	h2g->tail = 0;
+	h2g->head = 0;
+	h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) -
+		h2g->resv_space;
+	h2g->broken = false;
+
+	h2g->desc = *map;
+	xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+	h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2);
+}
+
+static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h,
+				struct iosys_map *map)
+{
+	g2h->size = CTB_G2H_BUFFER_SIZE / sizeof(u32);
+	g2h->resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32);
+	g2h->head = 0;
+	g2h->tail = 0;
+	g2h->space = CIRC_SPACE(g2h->tail, g2h->head, g2h->size) -
+		g2h->resv_space;
+	g2h->broken = false;
+
+	g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE);
+	xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+	g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 +
+					    CTB_H2G_BUFFER_SIZE);
+}
+
+static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct)
+{
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 desc_addr, ctb_addr, size;
+	int err;
+
+	desc_addr = xe_bo_ggtt_addr(ct->bo);
+	ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2;
+	size = ct->ctbs.h2g.size * sizeof(u32);
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY,
+				desc_addr);
+	if (err)
+		return err;
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY,
+				ctb_addr);
+	if (err)
+		return err;
+
+	return xe_guc_self_cfg32(guc,
+				 GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY,
+				 size);
+}
+
+static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct)
+{
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 desc_addr, ctb_addr, size;
+	int err;
+
+	desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE;
+	ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 +
+		CTB_H2G_BUFFER_SIZE;
+	size = ct->ctbs.g2h.size * sizeof(u32);
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY,
+				desc_addr);
+	if (err)
+		return err;
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY,
+				ctb_addr);
+	if (err)
+		return err;
+
+	return xe_guc_self_cfg32(guc,
+				 GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY,
+				 size);
+}
+
+static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
+{
+	u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+			   GUC_ACTION_HOST2GUC_CONTROL_CTB),
+		FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL,
+			   enable ? GUC_CTB_CONTROL_ENABLE :
+			   GUC_CTB_CONTROL_DISABLE),
+	};
+	int ret = xe_guc_send_mmio(ct_to_guc(ct), request, ARRAY_SIZE(request));
+
+	return ret > 0 ? -EPROTO : ret;
+}
+
+int xe_guc_ct_enable(struct xe_guc_ct *ct)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	int err;
+
+	XE_BUG_ON(ct->enabled);
+
+	guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
+	guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
+
+	err = guc_ct_ctb_h2g_register(ct);
+	if (err)
+		goto err_out;
+
+	err = guc_ct_ctb_g2h_register(ct);
+	if (err)
+		goto err_out;
+
+	err = guc_ct_control_toggle(ct, true);
+	if (err)
+		goto err_out;
+
+	mutex_lock(&ct->lock);
+	ct->g2h_outstanding = 0;
+	ct->enabled = true;
+	mutex_unlock(&ct->lock);
+
+	smp_mb();
+	wake_up_all(&ct->wq);
+	drm_dbg(&xe->drm, "GuC CT communication channel enabled\n");
+
+	return 0;
+
+err_out:
+	drm_err(&xe->drm, "Failed to enabled CT (%d)\n", err);
+
+	return err;
+}
+
+void xe_guc_ct_disable(struct xe_guc_ct *ct)
+{
+	mutex_lock(&ct->lock);
+	ct->enabled = false;
+	mutex_unlock(&ct->lock);
+
+	xa_destroy(&ct->fence_lookup);
+}
+
+static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
+{
+	struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+	lockdep_assert_held(&ct->lock);
+
+	if (cmd_len > h2g->space) {
+		h2g->head = desc_read(ct_to_xe(ct), h2g, head);
+		h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) -
+			h2g->resv_space;
+		if (cmd_len > h2g->space)
+			return false;
+	}
+
+	return true;
+}
+
+static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len)
+{
+	lockdep_assert_held(&ct->lock);
+
+	return ct->ctbs.g2h.space > g2h_len;
+}
+
+static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len)
+{
+	lockdep_assert_held(&ct->lock);
+
+	if (!g2h_has_room(ct, g2h_len) || !h2g_has_room(ct, cmd_len))
+		return -EBUSY;
+
+	return 0;
+}
+
+static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
+{
+	lockdep_assert_held(&ct->lock);
+	ct->ctbs.h2g.space -= cmd_len;
+}
+
+static void g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
+{
+	XE_BUG_ON(g2h_len > ct->ctbs.g2h.space);
+
+	if (g2h_len) {
+		spin_lock_irq(&ct->fast_lock);
+		ct->ctbs.g2h.space -= g2h_len;
+		ct->g2h_outstanding += num_g2h;
+		spin_unlock_irq(&ct->fast_lock);
+	}
+}
+
+static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+	lockdep_assert_held(&ct->fast_lock);
+	XE_WARN_ON(ct->ctbs.g2h.space + g2h_len >
+		   ct->ctbs.g2h.size - ct->ctbs.g2h.resv_space);
+
+	ct->ctbs.g2h.space += g2h_len;
+	--ct->g2h_outstanding;
+}
+
+static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+	spin_lock_irq(&ct->fast_lock);
+	__g2h_release_space(ct, g2h_len);
+	spin_unlock_irq(&ct->fast_lock);
+}
+
+static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		     u32 ct_fence_value, bool want_response)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct guc_ctb *h2g = &ct->ctbs.h2g;
+	u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)];
+	u32 cmd_len = len + GUC_CTB_HDR_LEN;
+	u32 cmd_idx = 0, i;
+	u32 tail = h2g->tail;
+	struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
+							 tail * sizeof(u32));
+
+	lockdep_assert_held(&ct->lock);
+	XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN);
+	XE_BUG_ON(tail > h2g->size);
+
+	/* Command will wrap, zero fill (NOPs), return and check credits again */
+	if (tail + cmd_len > h2g->size) {
+		xe_map_memset(xe, &map, 0, 0, (h2g->size - tail) * sizeof(u32));
+		h2g_reserve_space(ct, (h2g->size - tail));
+		h2g->tail = 0;
+		desc_write(xe, h2g, tail, h2g->tail);
+
+		return -EAGAIN;
+	}
+
+	/*
+	 * dw0: CT header (including fence)
+	 * dw1: HXG header (including action code)
+	 * dw2+: action data
+	 */
+	cmd[cmd_idx++] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
+		FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
+		FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
+	if (want_response) {
+		cmd[cmd_idx++] =
+			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+			FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+	} else {
+		cmd[cmd_idx++] =
+			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+			FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+	}
+	for (i = 1; i < len; ++i)
+		cmd[cmd_idx++] = action[i];
+
+	/* Write H2G ensuring visable before descriptor update */
+	xe_map_memcpy_to(xe, &map, 0, cmd, cmd_len * sizeof(u32));
+	xe_device_wmb(ct_to_xe(ct));
+
+	/* Update local copies */
+	h2g->tail = (tail + cmd_len) % h2g->size;
+	h2g_reserve_space(ct, cmd_len);
+
+	/* Update descriptor */
+	desc_write(xe, h2g, tail, h2g->tail);
+
+	return 0;
+}
+
+static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
+				u32 len, u32 g2h_len, u32 num_g2h,
+				struct g2h_fence *g2h_fence)
+{
+	int ret;
+
+	XE_BUG_ON(g2h_len && g2h_fence);
+	XE_BUG_ON(num_g2h && g2h_fence);
+	XE_BUG_ON(g2h_len && !num_g2h);
+	XE_BUG_ON(!g2h_len && num_g2h);
+	lockdep_assert_held(&ct->lock);
+
+	if (unlikely(ct->ctbs.h2g.broken)) {
+		ret = -EPIPE;
+		goto out;
+	}
+
+	if (unlikely(!ct->enabled)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (g2h_fence) {
+		g2h_len = GUC_CTB_HXG_MSG_MAX_LEN;
+		num_g2h = 1;
+
+		if (g2h_fence_needs_alloc(g2h_fence)) {
+			void *ptr;
+
+			g2h_fence->seqno = (ct->fence_seqno++ & 0xffff);
+			init_waitqueue_head(&g2h_fence->wq);
+			ptr = xa_store(&ct->fence_lookup,
+				       g2h_fence->seqno,
+				       g2h_fence, GFP_ATOMIC);
+			if (IS_ERR(ptr)) {
+				ret = PTR_ERR(ptr);
+				goto out;
+			}
+		}
+	}
+
+	xe_device_mem_access_get(ct_to_xe(ct));
+retry:
+	ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len);
+	if (unlikely(ret))
+		goto put_wa;
+
+	ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0,
+			!!g2h_fence);
+	if (unlikely(ret)) {
+		if (ret == -EAGAIN)
+			goto retry;
+		goto put_wa;
+	}
+
+	g2h_reserve_space(ct, g2h_len, num_g2h);
+	xe_guc_notify(ct_to_guc(ct));
+put_wa:
+	xe_device_mem_access_put(ct_to_xe(ct));
+out:
+
+	return ret;
+}
+
+static void kick_reset(struct xe_guc_ct *ct)
+{
+	xe_gt_reset_async(ct_to_gt(ct));
+}
+
+static int dequeue_one_g2h(struct xe_guc_ct *ct);
+
+static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			      u32 g2h_len, u32 num_g2h,
+			      struct g2h_fence *g2h_fence)
+{
+	struct drm_device *drm = &ct_to_xe(ct)->drm;
+	struct drm_printer p = drm_info_printer(drm->dev);
+	unsigned int sleep_period_ms = 1;
+	int ret;
+
+	XE_BUG_ON(g2h_len && g2h_fence);
+	lockdep_assert_held(&ct->lock);
+
+try_again:
+	ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
+				   g2h_fence);
+
+	/*
+	 * We wait to try to restore credits for about 1 second before bailing.
+	 * In the case of H2G credits we have no choice but just to wait for the
+	 * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In
+	 * the case of G2H we process any G2H in the channel, hopefully freeing
+	 * credits as we consume the G2H messages.
+	 */
+	if (unlikely(ret == -EBUSY &&
+		     !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
+		struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+		if (sleep_period_ms == 1024)
+			goto broken;
+
+		trace_xe_guc_ct_h2g_flow_control(h2g->head, h2g->tail,
+						 h2g->size, h2g->space,
+						 len + GUC_CTB_HDR_LEN);
+		msleep(sleep_period_ms);
+		sleep_period_ms <<= 1;
+
+		goto try_again;
+	} else if (unlikely(ret == -EBUSY)) {
+		struct xe_device *xe = ct_to_xe(ct);
+		struct guc_ctb *g2h = &ct->ctbs.g2h;
+
+		trace_xe_guc_ct_g2h_flow_control(g2h->head,
+						 desc_read(xe, g2h, tail),
+						 g2h->size, g2h->space,
+						 g2h_fence ?
+						 GUC_CTB_HXG_MSG_MAX_LEN :
+						 g2h_len);
+
+#define g2h_avail(ct)	\
+	(desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.head)
+		if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding ||
+					g2h_avail(ct), HZ))
+			goto broken;
+#undef g2h_avail
+
+		if (dequeue_one_g2h(ct) < 0)
+			goto broken;
+
+		goto try_again;
+	}
+
+	return ret;
+
+broken:
+	drm_err(drm, "No forward process on H2G, reset required");
+	xe_guc_ct_print(ct, &p);
+	ct->ctbs.h2g.broken = true;
+
+	return -EDEADLK;
+}
+
+static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		       u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence)
+{
+	int ret;
+
+	XE_BUG_ON(g2h_len && g2h_fence);
+
+	mutex_lock(&ct->lock);
+	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
+	mutex_unlock(&ct->lock);
+
+	return ret;
+}
+
+int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		   u32 g2h_len, u32 num_g2h)
+{
+	int ret;
+
+	ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL);
+	if (ret == -EDEADLK)
+		kick_reset(ct);
+
+	return ret;
+}
+
+int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			  u32 g2h_len, u32 num_g2h)
+{
+	int ret;
+
+	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL);
+	if (ret == -EDEADLK)
+		kick_reset(ct);
+
+	return ret;
+}
+
+int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+	int ret;
+
+	lockdep_assert_held(&ct->lock);
+
+	ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL);
+	if (ret == -EDEADLK)
+		kick_reset(ct);
+
+	return ret;
+}
+
+/*
+ * Check if a GT reset is in progress or will occur and if GT reset brought the
+ * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset.
+ */
+static bool retry_failure(struct xe_guc_ct *ct, int ret)
+{
+	if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV))
+		return false;
+
+#define ct_alive(ct)	\
+	(ct->enabled && !ct->ctbs.h2g.broken && !ct->ctbs.g2h.broken)
+	if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct),  HZ * 5))
+		return false;
+#undef ct_alive
+
+	return true;
+}
+
+static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			    u32 *response_buffer, bool no_fail)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct g2h_fence g2h_fence;
+	int ret = 0;
+
+	/*
+	 * We use a fence to implement blocking sends / receiving response data.
+	 * The seqno of the fence is sent in the H2G, returned in the G2H, and
+	 * an xarray is used as storage media with the seqno being to key.
+	 * Fields in the fence hold success, failure, retry status and the
+	 * response data. Safe to allocate on the stack as the xarray is the
+	 * only reference and it cannot be present after this function exits.
+	 */
+retry:
+	g2h_fence_init(&g2h_fence, response_buffer);
+retry_same_fence:
+	ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
+	if (unlikely(ret == -ENOMEM)) {
+		void *ptr;
+
+		/* Retry allocation /w GFP_KERNEL */
+		ptr = xa_store(&ct->fence_lookup,
+			       g2h_fence.seqno,
+			       &g2h_fence, GFP_KERNEL);
+		if (IS_ERR(ptr)) {
+			return PTR_ERR(ptr);
+		}
+
+		goto retry_same_fence;
+	} else if (unlikely(ret)) {
+		if (ret == -EDEADLK)
+			kick_reset(ct);
+
+		if (no_fail && retry_failure(ct, ret))
+			goto retry_same_fence;
+
+		if (!g2h_fence_needs_alloc(&g2h_fence))
+			xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+
+		return ret;
+	}
+
+	ret = wait_event_timeout(g2h_fence.wq, g2h_fence.done, HZ);
+	if (!ret) {
+		drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x",
+			g2h_fence.seqno, action[0]);
+		xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+		return -ETIME;
+	}
+
+	if (g2h_fence.retry) {
+		drm_warn(&xe->drm, "Send retry, action 0x%04x, reason %d",
+			 action[0], g2h_fence.reason);
+		goto retry;
+	}
+	if (g2h_fence.fail) {
+		drm_err(&xe->drm, "Send failed, action 0x%04x, error %d, hint %d",
+			action[0], g2h_fence.error, g2h_fence.hint);
+		ret = -EIO;
+	}
+
+	return ret > 0 ? 0 : ret;
+}
+
+int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			u32 *response_buffer)
+{
+	return guc_ct_send_recv(ct, action, len, response_buffer, false);
+}
+
+int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
+				u32 len, u32 *response_buffer)
+{
+	return guc_ct_send_recv(ct, action, len, response_buffer, true);
+}
+
+static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+
+	lockdep_assert_held(&ct->lock);
+
+	switch (action) {
+	case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+	case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+	case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		g2h_release_space(ct, len);
+	}
+
+	return 0;
+}
+
+static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	u32 response_len = len - GUC_CTB_MSG_MIN_LEN;
+	u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]);
+	u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]);
+	struct g2h_fence *g2h_fence;
+
+	lockdep_assert_held(&ct->lock);
+
+	g2h_fence = xa_erase(&ct->fence_lookup, fence);
+	if (unlikely(!g2h_fence)) {
+		/* Don't tear down channel, as send could've timed out */
+		drm_warn(&xe->drm, "G2H fence (%u) not found!\n", fence);
+		g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+		return 0;
+	}
+
+	XE_WARN_ON(fence != g2h_fence->seqno);
+
+	if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
+		g2h_fence->fail = true;
+		g2h_fence->error =
+			FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]);
+		g2h_fence->hint =
+			FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]);
+	} else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+		g2h_fence->retry = true;
+		g2h_fence->reason =
+			FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[0]);
+	} else if (g2h_fence->response_buffer) {
+		g2h_fence->response_len = response_len;
+		memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN,
+		       response_len * sizeof(u32));
+	}
+
+	g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+
+	g2h_fence->done = true;
+	smp_mb();
+
+	wake_up(&g2h_fence->wq);
+
+	return 0;
+}
+
+static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	u32 header, hxg, origin, type;
+	int ret;
+
+	lockdep_assert_held(&ct->lock);
+
+	header = msg[0];
+	hxg = msg[1];
+
+	origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg);
+	if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
+		drm_err(&xe->drm,
+			"G2H channel broken on read, origin=%d, reset required\n",
+			origin);
+		ct->ctbs.g2h.broken = true;
+
+		return -EPROTO;
+	}
+
+	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg);
+	switch (type) {
+	case GUC_HXG_TYPE_EVENT:
+		ret = parse_g2h_event(ct, msg, len);
+		break;
+	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+	case GUC_HXG_TYPE_RESPONSE_FAILURE:
+	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+		ret = parse_g2h_response(ct, msg, len);
+		break;
+	default:
+		drm_err(&xe->drm,
+			"G2H channel broken on read, type=%d, reset required\n",
+			type);
+		ct->ctbs.g2h.broken = true;
+
+		ret = -EOPNOTSUPP;
+	}
+
+	return ret;
+}
+
+static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+	u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
+	u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+	int ret = 0;
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+		return 0;
+
+	switch (action) {
+	case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+		ret = xe_guc_sched_done_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+		ret = xe_guc_deregister_done_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
+		ret = xe_guc_engine_reset_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
+		ret = xe_guc_engine_reset_failure_handler(guc, payload,
+							  adj_len);
+		break;
+	case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+		/* Selftest only at the moment */
+		break;
+	case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+	case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
+		/* FIXME: Handle this */
+		break;
+	case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR:
+		ret = xe_guc_engine_memory_cat_error_handler(guc, payload,
+							     adj_len);
+		break;
+	case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+		ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+							   adj_len);
+		break;
+	case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY:
+		ret = xe_guc_access_counter_notify_handler(guc, payload,
+							   adj_len);
+		break;
+	default:
+		drm_err(&xe->drm, "unexpected action 0x%04x\n", action);
+	}
+
+	if (ret)
+		drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n",
+			action, ret);
+
+	return 0;
+}
+
+static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct guc_ctb *g2h = &ct->ctbs.g2h;
+	u32 tail, head, len;
+	s32 avail;
+
+	lockdep_assert_held(&ct->fast_lock);
+
+	if (!ct->enabled)
+		return -ENODEV;
+
+	if (g2h->broken)
+		return -EPIPE;
+
+	/* Calculate DW available to read */
+	tail = desc_read(xe, g2h, tail);
+	avail = tail - g2h->head;
+	if (unlikely(avail == 0))
+		return 0;
+
+	if (avail < 0)
+		avail += g2h->size;
+
+	/* Read header */
+	xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->head, sizeof(u32));
+	len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN;
+	if (len > avail) {
+		drm_err(&xe->drm,
+			"G2H channel broken on read, avail=%d, len=%d, reset required\n",
+			avail, len);
+		g2h->broken = true;
+
+		return -EPROTO;
+	}
+
+	head = (g2h->head + 1) % g2h->size;
+	avail = len - 1;
+
+	/* Read G2H message */
+	if (avail + head > g2h->size) {
+		u32 avail_til_wrap = g2h->size - head;
+
+		xe_map_memcpy_from(xe, msg + 1,
+				   &g2h->cmds, sizeof(u32) * head,
+				   avail_til_wrap * sizeof(u32));
+		xe_map_memcpy_from(xe, msg + 1 + avail_til_wrap,
+				   &g2h->cmds, 0,
+				   (avail - avail_til_wrap) * sizeof(u32));
+	} else {
+		xe_map_memcpy_from(xe, msg + 1,
+				   &g2h->cmds, sizeof(u32) * head,
+				   avail * sizeof(u32));
+	}
+
+	if (fast_path) {
+		if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+			return 0;
+
+		switch (FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1])) {
+		case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+			break;	/* Process these in fast-path */
+		default:
+			return 0;
+		}
+	}
+
+	/* Update local / descriptor header */
+	g2h->head = (head + avail) % g2h->size;
+	desc_write(xe, g2h, head, g2h->head);
+
+	return len;
+}
+
+static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+	u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
+	u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+	int ret = 0;
+
+	switch (action) {
+	case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+		ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		__g2h_release_space(ct, len);
+		ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+							   adj_len);
+		break;
+	default:
+		XE_WARN_ON("NOT_POSSIBLE");
+	}
+
+	if (ret)
+		drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n",
+			action, ret);
+}
+
+/**
+ * xe_guc_ct_fast_path - process critical G2H in the IRQ handler
+ * @ct: GuC CT object
+ *
+ * Anything related to page faults is critical for performance, process these
+ * critical G2H in the IRQ. This is safe as these handlers either just wake up
+ * waiters or queue another worker.
+ */
+void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	int len;
+
+	if (!xe_device_in_fault_mode(xe) || !xe_device_mem_access_ongoing(xe))
+		return;
+
+	spin_lock(&ct->fast_lock);
+	do {
+		len = g2h_read(ct, ct->fast_msg, true);
+		if (len > 0)
+			g2h_fast_path(ct, ct->fast_msg, len);
+	} while (len > 0);
+	spin_unlock(&ct->fast_lock);
+}
+
+/* Returns less than zero on error, 0 on done, 1 on more available */
+static int dequeue_one_g2h(struct xe_guc_ct *ct)
+{
+	int len;
+	int ret;
+
+	lockdep_assert_held(&ct->lock);
+
+	spin_lock_irq(&ct->fast_lock);
+	len = g2h_read(ct, ct->msg, false);
+	spin_unlock_irq(&ct->fast_lock);
+	if (len <= 0)
+		return len;
+
+	ret = parse_g2h_msg(ct, ct->msg, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	ret = process_g2h_msg(ct, ct->msg, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	return 1;
+}
+
+static void g2h_worker_func(struct work_struct *w)
+{
+	struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker);
+	int ret;
+
+	xe_device_mem_access_get(ct_to_xe(ct));
+	do {
+		mutex_lock(&ct->lock);
+		ret = dequeue_one_g2h(ct);
+		mutex_unlock(&ct->lock);
+
+		if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
+			struct drm_device *drm = &ct_to_xe(ct)->drm;
+			struct drm_printer p = drm_info_printer(drm->dev);
+
+			xe_guc_ct_print(ct, &p);
+			kick_reset(ct);
+		}
+	} while (ret == 1);
+	xe_device_mem_access_put(ct_to_xe(ct));
+}
+
+static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
+			     struct drm_printer *p)
+{
+	u32 head, tail;
+
+	drm_printf(p, "\tsize: %d\n", ctb->size);
+	drm_printf(p, "\tresv_space: %d\n", ctb->resv_space);
+	drm_printf(p, "\thead: %d\n", ctb->head);
+	drm_printf(p, "\ttail: %d\n", ctb->tail);
+	drm_printf(p, "\tspace: %d\n", ctb->space);
+	drm_printf(p, "\tbroken: %d\n", ctb->broken);
+
+	head = desc_read(xe, ctb, head);
+	tail = desc_read(xe, ctb, tail);
+	drm_printf(p, "\thead (memory): %d\n", head);
+	drm_printf(p, "\ttail (memory): %d\n", tail);
+	drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status));
+
+	if (head != tail) {
+		struct iosys_map map =
+			IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
+
+		while (head != tail) {
+			drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
+				   xe_map_rd(xe, &map, 0, u32));
+			++head;
+			if (head == ctb->size) {
+				head = 0;
+				map = ctb->cmds;
+			} else {
+				iosys_map_incr(&map, sizeof(u32));
+			}
+		}
+	}
+}
+
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
+{
+	if (ct->enabled) {
+		drm_puts(p, "\nH2G CTB (all sizes in DW):\n");
+		guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p);
+
+		drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
+		guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p);
+		drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding);
+	} else {
+		drm_puts(p, "\nCT disabled\n");
+	}
+}
+
+#ifdef XE_GUC_CT_SELFTEST
+/*
+ * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow
+ * control if enough sent, 8k sends is enough. Verify forward process, verify
+ * credits expected values on exit.
+ */
+void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p)
+{
+	struct guc_ctb *g2h = &ct->ctbs.g2h;
+	u32 action[] = { XE_GUC_ACTION_SCHED_ENGINE_MODE_SET, 0, 0, 1, };
+	u32 bad_action[] = { XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 0, 0, };
+	int ret;
+	int i;
+
+	ct->suppress_irq_handler = true;
+	drm_puts(p, "Starting GuC CT selftest\n");
+
+	for (i = 0; i < 8192; ++i) {
+		ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 4, 1);
+		if (ret) {
+			drm_printf(p, "Aborted pass %d, ret %d\n", i, ret);
+			xe_guc_ct_print(ct, p);
+			break;
+		}
+	}
+
+	ct->suppress_irq_handler = false;
+	if (!ret) {
+		xe_guc_ct_irq_handler(ct);
+		msleep(200);
+		if (g2h->space !=
+		    CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) {
+			drm_printf(p, "Mismatch on space %d, %d\n",
+				   g2h->space,
+				   CIRC_SPACE(0, 0, g2h->size) -
+				   g2h->resv_space);
+			ret = -EIO;
+		}
+		if (ct->g2h_outstanding) {
+			drm_printf(p, "Outstanding G2H, %d\n",
+				   ct->g2h_outstanding);
+			ret = -EIO;
+		}
+	}
+
+	/* Check failure path for blocking CTs too */
+	xe_guc_ct_send_block(ct, bad_action, ARRAY_SIZE(bad_action));
+	if (g2h->space !=
+	    CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) {
+		drm_printf(p, "Mismatch on space %d, %d\n",
+			   g2h->space,
+			   CIRC_SPACE(0, 0, g2h->size) -
+			   g2h->resv_space);
+		ret = -EIO;
+	}
+	if (ct->g2h_outstanding) {
+		drm_printf(p, "Outstanding G2H, %d\n",
+			   ct->g2h_outstanding);
+		ret = -EIO;
+	}
+
+	drm_printf(p, "GuC CT selftest done - %s\n", ret ? "FAIL" : "PASS");
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
new file mode 100644
index 000000000000..49fb74f91e4d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_CT_H_
+#define _XE_GUC_CT_H_
+
+#include "xe_guc_ct_types.h"
+
+struct drm_printer;
+
+int xe_guc_ct_init(struct xe_guc_ct *ct);
+int xe_guc_ct_enable(struct xe_guc_ct *ct);
+void xe_guc_ct_disable(struct xe_guc_ct *ct);
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
+void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
+
+static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
+{
+	wake_up_all(&ct->wq);
+#ifdef XE_GUC_CT_SELFTEST
+	if (!ct->suppress_irq_handler && ct->enabled)
+		queue_work(system_unbound_wq, &ct->g2h_worker);
+#else
+	if (ct->enabled)
+		queue_work(system_unbound_wq, &ct->g2h_worker);
+#endif
+	xe_guc_ct_fast_path(ct);
+}
+
+/* Basic CT send / receives */
+int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		   u32 g2h_len, u32 num_g2h);
+int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			  u32 g2h_len, u32 num_g2h);
+int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			u32 *response_buffer);
+static inline int
+xe_guc_ct_send_block(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+	return xe_guc_ct_send_recv(ct, action, len, NULL);
+}
+
+/* This is only version of the send CT you can call from a G2H handler */
+int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action,
+			       u32 len);
+
+/* Can't fail because a GT reset is in progress */
+int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
+				u32 len, u32 *response_buffer);
+static inline int
+xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+	return xe_guc_ct_send_recv_no_fail(ct, action, len, NULL);
+}
+
+#ifdef XE_GUC_CT_SELFTEST
+void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p);
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
new file mode 100644
index 000000000000..17b148bf3735
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_CT_TYPES_H_
+#define _XE_GUC_CT_TYPES_H_
+
+#include <linux/iosys-map.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock_types.h>
+#include <linux/wait.h>
+#include <linux/xarray.h>
+
+#include "abi/guc_communication_ctb_abi.h"
+
+#define XE_GUC_CT_SELFTEST
+
+struct xe_bo;
+
+/**
+ * struct guc_ctb - GuC command transport buffer (CTB)
+ */
+struct guc_ctb {
+	/** @desc: dma buffer map for CTB descriptor */
+	struct iosys_map desc;
+	/** @cmds: dma buffer map for CTB commands */
+	struct iosys_map cmds;
+	/** @size: size of CTB commands (DW) */
+	u32 size;
+	/** @resv_space: reserved space of CTB commands (DW) */
+	u32 resv_space;
+	/** @head: head of CTB commands (DW) */
+	u32 head;
+	/** @tail: tail of CTB commands (DW) */
+	u32 tail;
+	/** @space: space in CTB commands (DW) */
+	u32 space;
+	/** @broken: channel broken */
+	bool broken;
+};
+
+/**
+ * struct xe_guc_ct - GuC command transport (CT) layer
+ *
+ * Includes a pair of CT buffers for bi-directional communication and tracking
+ * for the H2G and G2H requests sent and received through the buffers.
+ */
+struct xe_guc_ct {
+	/** @bo: XE BO for CT */
+	struct xe_bo *bo;
+	/** @lock: protects everything in CT layer */
+	struct mutex lock;
+	/** @fast_lock: protects G2H channel and credits */
+	spinlock_t fast_lock;
+	/** @ctbs: buffers for sending and receiving commands */
+	struct {
+		/** @send: Host to GuC (H2G, send) channel */
+		struct guc_ctb h2g;
+		/** @recv: GuC to Host (G2H, receive) channel */
+		struct guc_ctb g2h;
+	} ctbs;
+	/** @g2h_outstanding: number of outstanding G2H */
+	u32 g2h_outstanding;
+	/** @g2h_worker: worker to process G2H messages */
+	struct work_struct g2h_worker;
+	/** @enabled: CT enabled */
+	bool enabled;
+	/** @fence_seqno: G2H fence seqno - 16 bits used by CT */
+	u32 fence_seqno;
+	/** @fence_context: context for G2H fence */
+	u64 fence_context;
+	/** @fence_lookup: G2H fence lookup */
+	struct xarray fence_lookup;
+	/** @wq: wait queue used for reliable CT sends and freeing G2H credits */
+	wait_queue_head_t wq;
+#ifdef XE_GUC_CT_SELFTEST
+	/** @suppress_irq_handler: force flow control to sender */
+	bool suppress_irq_handler;
+#endif
+	/** @msg: Message buffer */
+	u32 msg[GUC_CTB_MSG_MAX_LEN];
+	/** @fast_msg: Message buffer */
+	u32 fast_msg[GUC_CTB_MSG_MAX_LEN];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
new file mode 100644
index 000000000000..916e9633b322
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_debugfs.h"
+#include "xe_guc_log.h"
+#include "xe_macros.h"
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+	return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static struct xe_device *
+guc_to_xe(struct xe_guc *guc)
+{
+	return gt_to_xe(guc_to_gt(guc));
+}
+
+static struct xe_guc *node_to_guc(struct drm_info_node *node)
+{
+	return node->info_ent->data;
+}
+
+static int guc_info(struct seq_file *m, void *data)
+{
+	struct xe_guc *guc = node_to_guc(m->private);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_device_mem_access_get(xe);
+	xe_guc_print_info(guc, &p);
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+static int guc_log(struct seq_file *m, void *data)
+{
+	struct xe_guc *guc = node_to_guc(m->private);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_device_mem_access_get(xe);
+	xe_guc_log_print(&guc->log, &p);
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+#ifdef XE_GUC_CT_SELFTEST
+static int guc_ct_selftest(struct seq_file *m, void *data)
+{
+	struct xe_guc *guc = node_to_guc(m->private);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_device_mem_access_get(xe);
+	xe_guc_ct_selftest(&guc->ct, &p);
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+#endif
+
+static const struct drm_info_list debugfs_list[] = {
+	{"guc_info", guc_info, 0},
+	{"guc_log", guc_log, 0},
+#ifdef XE_GUC_CT_SELFTEST
+	{"guc_ct_selftest", guc_ct_selftest, 0},
+#endif
+};
+
+void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
+{
+	struct drm_minor *minor = guc_to_xe(guc)->drm.primary;
+	struct drm_info_list *local;
+	int i;
+
+#define DEBUGFS_SIZE	ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)
+	local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
+	if (!local) {
+		XE_WARN_ON("Couldn't allocate memory");
+		return;
+	}
+
+	memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+		local[i].data = guc;
+
+	drm_debugfs_create_files(local,
+				 ARRAY_SIZE(debugfs_list),
+				 parent, minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.h b/drivers/gpu/drm/xe/xe_guc_debugfs.h
new file mode 100644
index 000000000000..4756dff26fca
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_DEBUGFS_H_
+#define _XE_GUC_DEBUGFS_H_
+
+struct dentry;
+struct xe_guc;
+
+void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_types.h b/drivers/gpu/drm/xe/xe_guc_engine_types.h
new file mode 100644
index 000000000000..512615d1ce8c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_engine_types.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ENGINE_TYPES_H_
+#define _XE_GUC_ENGINE_TYPES_H_
+
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "xe_gpu_scheduler_types.h"
+
+struct dma_fence;
+struct xe_engine;
+
+/**
+ * struct xe_guc_engine - GuC specific state for an xe_engine
+ */
+struct xe_guc_engine {
+	/** @engine: Backpointer to parent xe_engine */
+	struct xe_engine *engine;
+	/** @sched: GPU scheduler for this xe_engine */
+	struct xe_gpu_scheduler sched;
+	/** @entity: Scheduler entity for this xe_engine */
+	struct xe_sched_entity entity;
+	/**
+	 * @static_msgs: Static messages for this xe_engine, used when a message
+	 * needs to sent through the GPU scheduler but memory allocations are
+	 * not allowed.
+	 */
+#define MAX_STATIC_MSG_TYPE	3
+	struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE];
+	/** @fini_async: do final fini async from this worker */
+	struct work_struct fini_async;
+	/** @resume_time: time of last resume */
+	u64 resume_time;
+	/** @state: GuC specific state for this xe_engine */
+	atomic_t state;
+	/** @wqi_head: work queue item tail */
+	u32 wqi_head;
+	/** @wqi_tail: work queue item tail */
+	u32 wqi_tail;
+	/** @id: GuC id for this xe_engine */
+	u16 id;
+	/** @suspend_wait: wait queue used to wait on pending suspends */
+	wait_queue_head_t suspend_wait;
+	/** @suspend_pending: a suspend of the engine is pending */
+	bool suspend_pending;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
new file mode 100644
index 000000000000..f562404a6cf7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -0,0 +1,392 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_FWIF_H
+#define _XE_GUC_FWIF_H
+
+#include <linux/bits.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_actions_slpc_abi.h"
+#include "abi/guc_errors_abi.h"
+#include "abi/guc_communication_mmio_abi.h"
+#include "abi/guc_communication_ctb_abi.h"
+#include "abi/guc_klvs_abi.h"
+#include "abi/guc_messages_abi.h"
+
+#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET	4
+#define G2H_LEN_DW_DEREGISTER_CONTEXT		3
+#define G2H_LEN_DW_TLB_INVALIDATE		3
+
+#define GUC_CONTEXT_DISABLE		0
+#define GUC_CONTEXT_ENABLE		1
+
+#define GUC_CLIENT_PRIORITY_KMD_HIGH	0
+#define GUC_CLIENT_PRIORITY_HIGH	1
+#define GUC_CLIENT_PRIORITY_KMD_NORMAL	2
+#define GUC_CLIENT_PRIORITY_NORMAL	3
+#define GUC_CLIENT_PRIORITY_NUM		4
+
+#define GUC_RENDER_ENGINE		0
+#define GUC_VIDEO_ENGINE		1
+#define GUC_BLITTER_ENGINE		2
+#define GUC_VIDEOENHANCE_ENGINE		3
+#define GUC_VIDEO_ENGINE2		4
+#define GUC_MAX_ENGINES_NUM		(GUC_VIDEO_ENGINE2 + 1)
+
+#define GUC_RENDER_CLASS		0
+#define GUC_VIDEO_CLASS			1
+#define GUC_VIDEOENHANCE_CLASS		2
+#define GUC_BLITTER_CLASS		3
+#define GUC_COMPUTE_CLASS		4
+#define GUC_GSC_OTHER_CLASS		5
+#define GUC_LAST_ENGINE_CLASS		GUC_GSC_OTHER_CLASS
+#define GUC_MAX_ENGINE_CLASSES		16
+#define GUC_MAX_INSTANCES_PER_CLASS	32
+
+/* Work item for submitting workloads into work queue of GuC. */
+#define WQ_STATUS_ACTIVE		1
+#define WQ_STATUS_SUSPENDED		2
+#define WQ_STATUS_CMD_ERROR		3
+#define WQ_STATUS_ENGINE_ID_NOT_USED	4
+#define WQ_STATUS_SUSPENDED_FROM_RESET	5
+#define WQ_TYPE_NOOP			0x4
+#define WQ_TYPE_MULTI_LRC		0x5
+#define WQ_TYPE_MASK			GENMASK(7, 0)
+#define WQ_LEN_MASK			GENMASK(26, 16)
+
+#define WQ_GUC_ID_MASK			GENMASK(15, 0)
+#define WQ_RING_TAIL_MASK		GENMASK(28, 18)
+
+struct guc_wq_item {
+	u32 header;
+	u32 context_desc;
+	u32 submit_element_info;
+	u32 fence_id;
+} __packed;
+
+struct guc_sched_wq_desc {
+	u32 head;
+	u32 tail;
+	u32 error_offset;
+	u32 wq_status;
+	u32 reserved[28];
+} __packed;
+
+/* Helper for context registration H2G */
+struct guc_ctxt_registration_info {
+	u32 flags;
+	u32 context_idx;
+	u32 engine_class;
+	u32 engine_submit_mask;
+	u32 wq_desc_lo;
+	u32 wq_desc_hi;
+	u32 wq_base_lo;
+	u32 wq_base_hi;
+	u32 wq_size;
+	u32 hwlrca_lo;
+	u32 hwlrca_hi;
+};
+#define CONTEXT_REGISTRATION_FLAG_KMD	BIT(0)
+
+/* 32-bit KLV structure as used by policy updates and others */
+struct guc_klv_generic_dw_t {
+        u32 kl;
+        u32 value;
+} __packed;
+
+/* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */
+struct guc_update_engine_policy_header {
+        u32 action;
+        u32 guc_id;
+} __packed;
+
+struct guc_update_engine_policy {
+        struct guc_update_engine_policy_header header;
+        struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS];
+} __packed;
+
+/* GUC_CTL_* - Parameters for loading the GuC */
+#define GUC_CTL_LOG_PARAMS		0
+#define   GUC_LOG_VALID			BIT(0)
+#define   GUC_LOG_NOTIFY_ON_HALF_FULL	BIT(1)
+#define   GUC_LOG_CAPTURE_ALLOC_UNITS	BIT(2)
+#define   GUC_LOG_LOG_ALLOC_UNITS	BIT(3)
+#define   GUC_LOG_CRASH_SHIFT		4
+#define   GUC_LOG_CRASH_MASK		(0x3 << GUC_LOG_CRASH_SHIFT)
+#define   GUC_LOG_DEBUG_SHIFT		6
+#define   GUC_LOG_DEBUG_MASK	        (0xF << GUC_LOG_DEBUG_SHIFT)
+#define   GUC_LOG_CAPTURE_SHIFT		10
+#define   GUC_LOG_CAPTURE_MASK	        (0x3 << GUC_LOG_CAPTURE_SHIFT)
+#define   GUC_LOG_BUF_ADDR_SHIFT	12
+
+#define GUC_CTL_WA			1
+#define   GUC_WA_GAM_CREDITS		BIT(10)
+#define   GUC_WA_DUAL_QUEUE		BIT(11)
+#define   GUC_WA_RCS_RESET_BEFORE_RC6	BIT(13)
+#define   GUC_WA_CONTEXT_ISOLATION	BIT(15)
+#define   GUC_WA_PRE_PARSER		BIT(14)
+#define   GUC_WA_HOLD_CCS_SWITCHOUT	BIT(17)
+#define   GUC_WA_POLLCS			BIT(18)
+#define   GUC_WA_RENDER_RST_RC6_EXIT	BIT(19)
+#define   GUC_WA_RCS_REGS_IN_CCS_REGS_LIST	BIT(21)
+
+#define GUC_CTL_FEATURE			2
+#define   GUC_CTL_ENABLE_SLPC		BIT(2)
+#define   GUC_CTL_DISABLE_SCHEDULER	BIT(14)
+
+#define GUC_CTL_DEBUG			3
+#define   GUC_LOG_VERBOSITY_SHIFT	0
+#define   GUC_LOG_VERBOSITY_LOW		(0 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_MED		(1 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_HIGH	(2 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_ULTRA	(3 << GUC_LOG_VERBOSITY_SHIFT)
+#define	  GUC_LOG_VERBOSITY_MIN		0
+#define	  GUC_LOG_VERBOSITY_MAX		3
+#define	  GUC_LOG_VERBOSITY_MASK	0x0000000f
+#define	  GUC_LOG_DESTINATION_MASK	(3 << 4)
+#define   GUC_LOG_DISABLED		(1 << 6)
+#define   GUC_PROFILE_ENABLED		(1 << 7)
+
+#define GUC_CTL_ADS			4
+#define   GUC_ADS_ADDR_SHIFT		1
+#define   GUC_ADS_ADDR_MASK		(0xFFFFF << GUC_ADS_ADDR_SHIFT)
+
+#define GUC_CTL_DEVID			5
+
+#define GUC_CTL_MAX_DWORDS		14
+
+/* Scheduling policy settings */
+
+#define GLOBAL_POLICY_MAX_NUM_WI 15
+
+/* Don't reset an engine upon preemption failure */
+#define GLOBAL_POLICY_DISABLE_ENGINE_RESET				BIT(0)
+
+#define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000
+
+struct guc_policies {
+	u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES];
+	/* In micro seconds. How much time to allow before DPC processing is
+	 * called back via interrupt (to prevent DPC queue drain starving).
+	 * Typically 1000s of micro seconds (example only, not granularity). */
+	u32 dpc_promote_time;
+
+	/* Must be set to take these new values. */
+	u32 is_valid;
+
+	/* Max number of WIs to process per call. A large value may keep CS
+	 * idle. */
+	u32 max_num_work_items;
+
+	u32 global_flags;
+	u32 reserved[4];
+} __packed;
+
+/* GuC MMIO reg state struct */
+struct guc_mmio_reg {
+	u32 offset;
+	u32 value;
+	u32 flags;
+	u32 mask;
+#define GUC_REGSET_MASKED		BIT(0)
+#define GUC_REGSET_MASKED_WITH_VALUE	BIT(2)
+#define GUC_REGSET_RESTORE_ONLY		BIT(3)
+} __packed;
+
+/* GuC register sets */
+struct guc_mmio_reg_set {
+	u32 address;
+	u16 count;
+	u16 reserved;
+} __packed;
+
+/* Generic GT SysInfo data types */
+#define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED		0
+#define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK	1
+#define GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI	2
+#define GUC_GENERIC_GT_SYSINFO_MAX			16
+
+/* HW info */
+struct guc_gt_system_info {
+	u8 mapping_table[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+	u32 engine_enabled_masks[GUC_MAX_ENGINE_CLASSES];
+	u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX];
+} __packed;
+
+enum {
+	GUC_CAPTURE_LIST_INDEX_PF = 0,
+	GUC_CAPTURE_LIST_INDEX_VF = 1,
+	GUC_CAPTURE_LIST_INDEX_MAX = 2,
+};
+
+/* GuC Additional Data Struct */
+struct guc_ads {
+	struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+	u32 reserved0;
+	u32 scheduler_policies;
+	u32 gt_system_info;
+	u32 reserved1;
+	u32 control_data;
+	u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES];
+	u32 eng_state_size[GUC_MAX_ENGINE_CLASSES];
+	u32 private_data;
+	u32 um_init_data;
+	u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+	u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+	u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX];
+	u32 reserved[14];
+} __packed;
+
+/* Engine usage stats */
+struct guc_engine_usage_record {
+	u32 current_context_index;
+	u32 last_switch_in_stamp;
+	u32 reserved0;
+	u32 total_runtime;
+	u32 reserved1[4];
+} __packed;
+
+struct guc_engine_usage {
+	struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+} __packed;
+
+/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
+enum xe_guc_recv_message {
+	XE_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
+	XE_GUC_RECV_MSG_EXCEPTION = BIT(30),
+};
+
+/* Page fault structures */
+struct access_counter_desc {
+	u32 dw0;
+#define ACCESS_COUNTER_TYPE	BIT(0)
+#define ACCESS_COUNTER_SUBG_LO	GENMASK(31, 1)
+
+	u32 dw1;
+#define ACCESS_COUNTER_SUBG_HI	BIT(0)
+#define ACCESS_COUNTER_RSVD0	GENMASK(2, 1)
+#define ACCESS_COUNTER_ENG_INSTANCE	GENMASK(8, 3)
+#define ACCESS_COUNTER_ENG_CLASS	GENMASK(11, 9)
+#define ACCESS_COUNTER_ASID	GENMASK(31, 12)
+
+	u32 dw2;
+#define ACCESS_COUNTER_VFID	GENMASK(5, 0)
+#define ACCESS_COUNTER_RSVD1	GENMASK(7, 6)
+#define ACCESS_COUNTER_GRANULARITY	GENMASK(10, 8)
+#define ACCESS_COUNTER_RSVD2	GENMASK(16, 11)
+#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_LO	GENMASK(31, 17)
+
+	u32 dw3;
+#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_HI	GENMASK(31, 0)
+} __packed;
+
+enum guc_um_queue_type {
+	GUC_UM_HW_QUEUE_PAGE_FAULT = 0,
+	GUC_UM_HW_QUEUE_PAGE_FAULT_RESPONSE,
+	GUC_UM_HW_QUEUE_ACCESS_COUNTER,
+	GUC_UM_HW_QUEUE_MAX
+};
+
+struct guc_um_queue_params {
+	u64 base_dpa;
+	u32 base_ggtt_address;
+	u32 size_in_bytes;
+	u32 rsvd[4];
+} __packed;
+
+struct guc_um_init_params {
+	u64 page_response_timeout_in_us;
+	u32 rsvd[6];
+	struct guc_um_queue_params queue_params[GUC_UM_HW_QUEUE_MAX];
+} __packed;
+
+enum xe_guc_fault_reply_type {
+	PFR_ACCESS = 0,
+	PFR_ENGINE,
+	PFR_VFID,
+	PFR_ALL,
+	PFR_INVALID
+};
+
+enum xe_guc_response_desc_type {
+	TLB_INVALIDATION_DESC = 0,
+	FAULT_RESPONSE_DESC
+};
+
+struct xe_guc_pagefault_desc {
+	u32 dw0;
+#define PFD_FAULT_LEVEL		GENMASK(2, 0)
+#define PFD_SRC_ID		GENMASK(10, 3)
+#define PFD_RSVD_0		GENMASK(17, 11)
+#define XE2_PFD_TRVA_FAULT	BIT(18)
+#define PFD_ENG_INSTANCE	GENMASK(24, 19)
+#define PFD_ENG_CLASS		GENMASK(27, 25)
+#define PFD_PDATA_LO		GENMASK(31, 28)
+
+	u32 dw1;
+#define PFD_PDATA_HI		GENMASK(11, 0)
+#define PFD_PDATA_HI_SHIFT	4
+#define PFD_ASID		GENMASK(31, 12)
+
+	u32 dw2;
+#define PFD_ACCESS_TYPE		GENMASK(1, 0)
+#define PFD_FAULT_TYPE		GENMASK(3, 2)
+#define PFD_VFID		GENMASK(9, 4)
+#define PFD_RSVD_1		GENMASK(11, 10)
+#define PFD_VIRTUAL_ADDR_LO	GENMASK(31, 12)
+#define PFD_VIRTUAL_ADDR_LO_SHIFT 12
+
+	u32 dw3;
+#define PFD_VIRTUAL_ADDR_HI	GENMASK(31, 0)
+#define PFD_VIRTUAL_ADDR_HI_SHIFT 32
+} __packed;
+
+struct xe_guc_pagefault_reply {
+	u32 dw0;
+#define PFR_VALID		BIT(0)
+#define PFR_SUCCESS		BIT(1)
+#define PFR_REPLY		GENMASK(4, 2)
+#define PFR_RSVD_0		GENMASK(9, 5)
+#define PFR_DESC_TYPE		GENMASK(11, 10)
+#define PFR_ASID		GENMASK(31, 12)
+
+	u32 dw1;
+#define PFR_VFID		GENMASK(5, 0)
+#define PFR_RSVD_1		BIT(6)
+#define PFR_ENG_INSTANCE	GENMASK(12, 7)
+#define PFR_ENG_CLASS		GENMASK(15, 13)
+#define PFR_PDATA		GENMASK(31, 16)
+
+	u32 dw2;
+#define PFR_RSVD_2		GENMASK(31, 0)
+} __packed;
+
+struct xe_guc_acc_desc {
+	u32 dw0;
+#define ACC_TYPE	BIT(0)
+#define ACC_TRIGGER	0
+#define ACC_NOTIFY	1
+#define ACC_SUBG_LO	GENMASK(31, 1)
+
+	u32 dw1;
+#define ACC_SUBG_HI	BIT(0)
+#define ACC_RSVD0	GENMASK(2, 1)
+#define ACC_ENG_INSTANCE	GENMASK(8, 3)
+#define ACC_ENG_CLASS	GENMASK(11, 9)
+#define ACC_ASID	GENMASK(31, 12)
+
+	u32 dw2;
+#define ACC_VFID	GENMASK(5, 0)
+#define ACC_RSVD1	GENMASK(7, 6)
+#define ACC_GRANULARITY	GENMASK(10, 8)
+#define ACC_RSVD2	GENMASK(16, 11)
+#define ACC_VIRTUAL_ADDR_RANGE_LO	GENMASK(31, 17)
+
+	u32 dw3;
+#define ACC_VIRTUAL_ADDR_RANGE_HI	GENMASK(31, 0)
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
new file mode 100644
index 000000000000..8dfd48f71a7c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_hwconfig.h"
+#include "xe_map.h"
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+	return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static struct xe_device *
+guc_to_xe(struct xe_guc *guc)
+{
+	return gt_to_xe(guc_to_gt(guc));
+}
+
+static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_GET_HWCONFIG,
+		lower_32_bits(ggtt_addr),
+		upper_32_bits(ggtt_addr),
+		size,
+	};
+
+	return xe_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+}
+
+static int guc_hwconfig_size(struct xe_guc *guc, u32 *size)
+{
+	int ret = send_get_hwconfig(guc, 0, 0);
+
+	if (ret < 0)
+		return ret;
+
+	*size = ret;
+	return 0;
+}
+
+static int guc_hwconfig_copy(struct xe_guc *guc)
+{
+	int ret = send_get_hwconfig(guc, xe_bo_ggtt_addr(guc->hwconfig.bo),
+				    guc->hwconfig.size);
+
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static void guc_hwconfig_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc *guc = arg;
+
+	xe_bo_unpin_map_no_vm(guc->hwconfig.bo);
+}
+
+int xe_guc_hwconfig_init(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_bo *bo;
+	u32 size;
+	int err;
+
+	/* Initialization already done */
+	if (guc->hwconfig.bo)
+		return 0;
+
+	/*
+	 * All hwconfig the same across GTs so only GT0 needs to be configured
+	 */
+	if (gt->info.id != XE_GT0)
+		return 0;
+
+	/* ADL_P, DG2+ supports hwconfig table */
+	if (GRAPHICS_VERx100(xe) < 1255 && xe->info.platform != XE_ALDERLAKE_P)
+		return 0;
+
+	err = guc_hwconfig_size(guc, &size);
+	if (err)
+		return err;
+	if (!size)
+		return -EINVAL;
+
+	bo = xe_bo_create_pin_map(xe, gt, NULL, PAGE_ALIGN(size),
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+	guc->hwconfig.bo = bo;
+	guc->hwconfig.size = size;
+
+	err = drmm_add_action_or_reset(&xe->drm, guc_hwconfig_fini, guc);
+	if (err)
+		return err;
+
+	return guc_hwconfig_copy(guc);
+}
+
+u32 xe_guc_hwconfig_size(struct xe_guc *guc)
+{
+	return !guc->hwconfig.bo ? 0 : guc->hwconfig.size;
+}
+
+void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+
+	XE_BUG_ON(!guc->hwconfig.bo);
+
+	xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0,
+			   guc->hwconfig.size);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h
new file mode 100644
index 000000000000..b5794d641900
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_HWCONFIG_H_
+#define _XE_GUC_HWCONFIG_H_
+
+#include <linux/types.h>
+
+struct xe_guc;
+
+int xe_guc_hwconfig_init(struct xe_guc *guc);
+u32 xe_guc_hwconfig_size(struct xe_guc *guc);
+void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
new file mode 100644
index 000000000000..7ec1b2bb1f8e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_guc_log.h"
+#include "xe_map.h"
+#include "xe_module.h"
+
+static struct xe_gt *
+log_to_gt(struct xe_guc_log *log)
+{
+	return container_of(log, struct xe_gt, uc.guc.log);
+}
+
+static struct xe_device *
+log_to_xe(struct xe_guc_log *log)
+{
+	return gt_to_xe(log_to_gt(log));
+}
+
+static size_t guc_log_size(void)
+{
+	/*
+	 *  GuC Log buffer Layout
+	 *
+	 *  +===============================+ 00B
+	 *  |    Crash dump state header    |
+	 *  +-------------------------------+ 32B
+	 *  |      Debug state header       |
+	 *  +-------------------------------+ 64B
+	 *  |     Capture state header      |
+	 *  +-------------------------------+ 96B
+	 *  |                               |
+	 *  +===============================+ PAGE_SIZE (4KB)
+	 *  |        Crash Dump logs        |
+	 *  +===============================+ + CRASH_SIZE
+	 *  |          Debug logs           |
+	 *  +===============================+ + DEBUG_SIZE
+	 *  |         Capture logs          |
+	 *  +===============================+ + CAPTURE_SIZE
+	 */
+	return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
+		CAPTURE_BUFFER_SIZE;
+}
+
+void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
+{
+	struct xe_device *xe = log_to_xe(log);
+	size_t size;
+	int i, j;
+
+	XE_BUG_ON(!log->bo);
+
+	size = log->bo->size;
+
+#define DW_PER_READ		128
+	XE_BUG_ON(size % (DW_PER_READ * sizeof(u32)));
+	for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) {
+		u32 read[DW_PER_READ];
+
+		xe_map_memcpy_from(xe, read, &log->bo->vmap, i * sizeof(u32),
+				   DW_PER_READ * sizeof(u32));
+#define DW_PER_PRINT		4
+		for (j = 0; j < DW_PER_READ / DW_PER_PRINT; ++j) {
+			u32 *print = read + j * DW_PER_PRINT;
+
+			drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+				   *(print + 0), *(print + 1),
+				   *(print + 2), *(print + 3));
+		}
+	}
+}
+
+static void guc_log_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_log *log = arg;
+
+	xe_bo_unpin_map_no_vm(log->bo);
+}
+
+int xe_guc_log_init(struct xe_guc_log *log)
+{
+	struct xe_device *xe = log_to_xe(log);
+	struct xe_gt *gt = log_to_gt(log);
+	struct xe_bo *bo;
+	int err;
+
+	bo = xe_bo_create_pin_map(xe, gt, NULL, guc_log_size(),
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size());
+	log->bo = bo;
+	log->level = xe_guc_log_level;
+
+	err = drmm_add_action_or_reset(&xe->drm, guc_log_fini, log);
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
new file mode 100644
index 000000000000..2d25ab28b4b3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_LOG_H_
+#define _XE_GUC_LOG_H_
+
+#include "xe_guc_log_types.h"
+
+struct drm_printer;
+
+#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER)
+#define CRASH_BUFFER_SIZE       SZ_1M
+#define DEBUG_BUFFER_SIZE       SZ_8M
+#define CAPTURE_BUFFER_SIZE     SZ_2M
+#else
+#define CRASH_BUFFER_SIZE	SZ_8K
+#define DEBUG_BUFFER_SIZE	SZ_64K
+#define CAPTURE_BUFFER_SIZE	SZ_16K
+#endif
+/*
+ * While we're using plain log level in i915, GuC controls are much more...
+ * "elaborate"? We have a couple of bits for verbosity, separate bit for actual
+ * log enabling, and separate bit for default logging - which "conveniently"
+ * ignores the enable bit.
+ */
+#define GUC_LOG_LEVEL_DISABLED		0
+#define GUC_LOG_LEVEL_NON_VERBOSE	1
+#define GUC_LOG_LEVEL_IS_ENABLED(x)	((x) > GUC_LOG_LEVEL_DISABLED)
+#define GUC_LOG_LEVEL_IS_VERBOSE(x)	((x) > GUC_LOG_LEVEL_NON_VERBOSE)
+#define GUC_LOG_LEVEL_TO_VERBOSITY(x) ({		\
+	typeof(x) _x = (x);				\
+	GUC_LOG_LEVEL_IS_VERBOSE(_x) ? _x - 2 : 0;	\
+})
+#define GUC_VERBOSITY_TO_LOG_LEVEL(x)	((x) + 2)
+#define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX)
+
+int xe_guc_log_init(struct xe_guc_log *log);
+void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p);
+
+static inline u32
+xe_guc_log_get_level(struct xe_guc_log *log)
+{
+	return log->level;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
new file mode 100644
index 000000000000..125080d138a7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_LOG_TYPES_H_
+#define _XE_GUC_LOG_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/**
+ * struct xe_guc_log - GuC log
+ */
+struct xe_guc_log {
+	/** @level: GuC log level */
+	u32 level;
+	/** @bo: XE BO for GuC log */
+	struct xe_bo *bo;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
new file mode 100644
index 000000000000..227e30a482e3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -0,0 +1,843 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_types.h"
+#include "xe_gt_sysfs.h"
+#include "xe_guc_ct.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_pcode.h"
+#include "i915_reg_defs.h"
+#include "i915_reg.h"
+
+#include "intel_mchbar_regs.h"
+
+/* For GEN6_RP_STATE_CAP.reg to be merged when the definition moves to Xe */
+#define   RP0_MASK	REG_GENMASK(7, 0)
+#define   RP1_MASK	REG_GENMASK(15, 8)
+#define   RPN_MASK	REG_GENMASK(23, 16)
+
+#define GEN10_FREQ_INFO_REC	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
+#define   RPE_MASK		REG_GENMASK(15, 8)
+
+#include "gt/intel_gt_regs.h"
+/* For GEN6_RPNSWREQ.reg to be merged when the definition moves to Xe */
+#define   REQ_RATIO_MASK	REG_GENMASK(31, 23)
+
+/* For GEN6_GT_CORE_STATUS.reg to be merged when the definition moves to Xe */
+#define   RCN_MASK	REG_GENMASK(2, 0)
+
+#define GEN12_RPSTAT1		_MMIO(0x1381b4)
+#define   GEN12_CAGF_MASK	REG_GENMASK(19, 11)
+
+#define GT_FREQUENCY_MULTIPLIER	50
+#define GEN9_FREQ_SCALER	3
+
+/**
+ * DOC: GuC Power Conservation (PC)
+ *
+ * GuC Power Conservation (PC) supports multiple features for the most
+ * efficient and performing use of the GT when GuC submission is enabled,
+ * including frequency management, Render-C states management, and various
+ * algorithms for power balancing.
+ *
+ * Single Loop Power Conservation (SLPC) is the name given to the suite of
+ * connected power conservation features in the GuC firmware. The firmware
+ * exposes a programming interface to the host for the control of SLPC.
+ *
+ * Frequency management:
+ * =====================
+ *
+ * Xe driver enables SLPC with all of its defaults features and frequency
+ * selection, which varies per platform.
+ * Xe's GuC PC provides a sysfs API for frequency management:
+ *
+ * device/gt#/freq_* *read-only* files:
+ * - freq_act: The actual resolved frequency decided by PCODE.
+ * - freq_cur: The current one requested by GuC PC to the Hardware.
+ * - freq_rpn: The Render Performance (RP) N level, which is the minimal one.
+ * - freq_rpe: The Render Performance (RP) E level, which is the efficient one.
+ * - freq_rp0: The Render Performance (RP) 0 level, which is the maximum one.
+ *
+ * device/gt#/freq_* *read-write* files:
+ * - freq_min: GuC PC min request.
+ * - freq_max: GuC PC max request.
+ *             If max <= min, then freq_min becomes a fixed frequency request.
+ *
+ * Render-C States:
+ * ================
+ *
+ * Render-C states is also a GuC PC feature that is now enabled in Xe for
+ * all platforms.
+ * Xe's GuC PC provides a sysfs API for Render-C States:
+ *
+ * device/gt#/rc* *read-only* files:
+ * - rc_status: Provide the actual immediate status of Render-C: (rc0 or rc6)
+ * - rc6_residency: Provide the rc6_residency counter in units of 1.28 uSec.
+ *                  Prone to overflows.
+ */
+
+static struct xe_guc *
+pc_to_guc(struct xe_guc_pc *pc)
+{
+	return container_of(pc, struct xe_guc, pc);
+}
+
+static struct xe_device *
+pc_to_xe(struct xe_guc_pc *pc)
+{
+	struct xe_guc *guc = pc_to_guc(pc);
+	struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc);
+
+	return gt_to_xe(gt);
+}
+
+static struct xe_gt *
+pc_to_gt(struct xe_guc_pc *pc)
+{
+	return container_of(pc, struct xe_gt, uc.guc.pc);
+}
+
+static struct xe_guc_pc *
+dev_to_pc(struct device *dev)
+{
+	return &kobj_to_gt(&dev->kobj)->uc.guc.pc;
+}
+
+static struct iosys_map *
+pc_to_maps(struct xe_guc_pc *pc)
+{
+	return &pc->bo->vmap;
+}
+
+#define slpc_shared_data_read(pc_, field_) \
+	xe_map_rd_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \
+			struct slpc_shared_data, field_)
+
+#define slpc_shared_data_write(pc_, field_, val_) \
+	xe_map_wr_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \
+			struct slpc_shared_data, field_, val_)
+
+#define SLPC_EVENT(id, count) \
+	(FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \
+	 FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count))
+
+static bool pc_is_in_state(struct xe_guc_pc *pc, enum slpc_global_state state)
+{
+	xe_device_assert_mem_access(pc_to_xe(pc));
+	return slpc_shared_data_read(pc, header.global_state) == state;
+}
+
+static int pc_action_reset(struct xe_guc_pc *pc)
+{
+	struct  xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_RESET, 2),
+		xe_bo_ggtt_addr(pc->bo),
+		0,
+	};
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC reset: %pe", ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_shutdown(struct xe_guc_pc *pc)
+{
+	struct  xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_SHUTDOWN, 2),
+		xe_bo_ggtt_addr(pc->bo),
+		0,
+	};
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC shutdown %pe",
+			ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_query_task_state(struct xe_guc_pc *pc)
+{
+	struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2),
+		xe_bo_ggtt_addr(pc->bo),
+		0,
+	};
+
+	if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+		return -EAGAIN;
+
+	/* Blocking here to ensure the results are ready before reading them */
+	ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action));
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm,
+			"GuC PC query task state failed: %pe", ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value)
+{
+	struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+		id,
+		value,
+	};
+
+	if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+		return -EAGAIN;
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC set param failed: %pe",
+			ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode)
+{
+	struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	u32 action[] = {
+		XE_GUC_ACTION_SETUP_PC_GUCRC,
+		mode,
+	};
+	int ret;
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC RC enable failed: %pe",
+			ERR_PTR(ret));
+	return ret;
+}
+
+static u32 decode_freq(u32 raw)
+{
+	return DIV_ROUND_CLOSEST(raw * GT_FREQUENCY_MULTIPLIER,
+				 GEN9_FREQ_SCALER);
+}
+
+static u32 pc_get_min_freq(struct xe_guc_pc *pc)
+{
+	u32 freq;
+
+	freq = FIELD_GET(SLPC_MIN_UNSLICE_FREQ_MASK,
+			 slpc_shared_data_read(pc, task_state_data.freq));
+
+	return decode_freq(freq);
+}
+
+static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	/*
+	 * Let's only check for the rpn-rp0 range. If max < min,
+	 * min becomes a fixed request.
+	 */
+	if (freq < pc->rpn_freq || freq > pc->rp0_freq)
+		return -EINVAL;
+
+	/*
+	 * GuC policy is to elevate minimum frequency to the efficient levels
+	 * Our goal is to have the admin choices respected.
+	 */
+	pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
+			    freq < pc->rpe_freq);
+
+	return pc_action_set_param(pc,
+				   SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+				   freq);
+}
+
+static int pc_get_max_freq(struct xe_guc_pc *pc)
+{
+	u32 freq;
+
+	freq = FIELD_GET(SLPC_MAX_UNSLICE_FREQ_MASK,
+			 slpc_shared_data_read(pc, task_state_data.freq));
+
+	return decode_freq(freq);
+}
+
+static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	/*
+	 * Let's only check for the rpn-rp0 range. If max < min,
+	 * min becomes a fixed request.
+	 * Also, overclocking is not supported.
+	 */
+	if (freq < pc->rpn_freq || freq > pc->rp0_freq)
+		return -EINVAL;
+
+	return pc_action_set_param(pc,
+				   SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ,
+				   freq);
+}
+
+static void pc_update_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 reg;
+
+	/*
+	 * For PVC we still need to use fused RP1 as the approximation for RPe
+	 * For other platforms than PVC we get the resolved RPe directly from
+	 * PCODE at a different register
+	 */
+	if (xe->info.platform == XE_PVC)
+		reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg);
+	else
+		reg = xe_mmio_read32(gt, GEN10_FREQ_INFO_REC.reg);
+
+	pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+
+	/*
+	 * RPe is decided at runtime by PCODE. In the rare case where that's
+	 * smaller than the fused min, we will trust the PCODE and use that
+	 * as our minimum one.
+	 */
+	pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq);
+}
+
+static ssize_t freq_act_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct kobject *kobj = &dev->kobj;
+	struct xe_gt *gt = kobj_to_gt(kobj);
+	u32 freq;
+	ssize_t ret;
+
+	/*
+	 * When in RC6, actual frequency is 0. Let's block RC6 so we are able
+	 * to verify that our freq requests are really happening.
+	 */
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		return ret;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	freq = xe_mmio_read32(gt, GEN12_RPSTAT1.reg);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq);
+	ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	return ret;
+}
+static DEVICE_ATTR_RO(freq_act);
+
+static ssize_t freq_cur_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct kobject *kobj = &dev->kobj;
+	struct xe_gt *gt = kobj_to_gt(kobj);
+	u32 freq;
+	ssize_t ret;
+
+	/*
+	 * GuC SLPC plays with cur freq request when GuCRC is enabled
+	 * Block RC6 for a more reliable read.
+	 */
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		return ret;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	freq = xe_mmio_read32(gt, GEN6_RPNSWREQ.reg);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	freq = REG_FIELD_GET(REQ_RATIO_MASK, freq);
+	ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	return ret;
+}
+static DEVICE_ATTR_RO(freq_cur);
+
+static ssize_t freq_rp0_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", pc->rp0_freq);
+}
+static DEVICE_ATTR_RO(freq_rp0);
+
+static ssize_t freq_rpe_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	pc_update_rp_values(pc);
+	return sysfs_emit(buf, "%d\n", pc->rpe_freq);
+}
+static DEVICE_ATTR_RO(freq_rpe);
+
+static ssize_t freq_rpn_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", pc->rpn_freq);
+}
+static DEVICE_ATTR_RO(freq_rpn);
+
+static ssize_t freq_min_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	struct xe_gt *gt = pc_to_gt(pc);
+	ssize_t ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	/*
+	 * GuC SLPC plays with min freq request when GuCRC is enabled
+	 * Block RC6 for a more reliable read.
+	 */
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		goto out;
+
+	ret = pc_action_query_task_state(pc);
+	if (ret)
+		goto fw;
+
+	ret = sysfs_emit(buf, "%d\n", pc_get_min_freq(pc));
+
+fw:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+static ssize_t freq_min_store(struct device *dev, struct device_attribute *attr,
+			      const char *buff, size_t count)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = kstrtou32(buff, 0, &freq);
+	if (ret)
+		return ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	ret = pc_set_min_freq(pc, freq);
+	if (ret)
+		goto out;
+
+	pc->user_requested_min = freq;
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret ?: count;
+}
+static DEVICE_ATTR_RW(freq_min);
+
+static ssize_t freq_max_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	ssize_t ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	ret = pc_action_query_task_state(pc);
+	if (ret)
+		goto out;
+
+	ret = sysfs_emit(buf, "%d\n", pc_get_max_freq(pc));
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+static ssize_t freq_max_store(struct device *dev, struct device_attribute *attr,
+			      const char *buff, size_t count)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = kstrtou32(buff, 0, &freq);
+	if (ret)
+		return ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	ret = pc_set_max_freq(pc, freq);
+	if (ret)
+		goto out;
+
+	pc->user_requested_max = freq;
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret ?: count;
+}
+static DEVICE_ATTR_RW(freq_max);
+
+static ssize_t rc_status_show(struct device *dev,
+			      struct device_attribute *attr, char *buff)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	reg = xe_mmio_read32(gt, GEN6_GT_CORE_STATUS.reg);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	switch (REG_FIELD_GET(RCN_MASK, reg)) {
+	case GEN6_RC6:
+		return sysfs_emit(buff, "rc6\n");
+	case GEN6_RC0:
+		return sysfs_emit(buff, "rc0\n");
+	default:
+		return -ENOENT;
+	}
+}
+static DEVICE_ATTR_RO(rc_status);
+
+static ssize_t rc6_residency_show(struct device *dev,
+				  struct device_attribute *attr, char *buff)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg;
+	ssize_t ret;
+
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		return ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	reg = xe_mmio_read32(gt, GEN6_GT_GFX_RC6.reg);
+	xe_device_mem_access_put(pc_to_xe(pc));
+
+	ret = sysfs_emit(buff, "%u\n", reg);
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	return ret;
+}
+static DEVICE_ATTR_RO(rc6_residency);
+
+static const struct attribute *pc_attrs[] = {
+	&dev_attr_freq_act.attr,
+	&dev_attr_freq_cur.attr,
+	&dev_attr_freq_rp0.attr,
+	&dev_attr_freq_rpe.attr,
+	&dev_attr_freq_rpn.attr,
+	&dev_attr_freq_min.attr,
+	&dev_attr_freq_max.attr,
+	&dev_attr_rc_status.attr,
+	&dev_attr_rc6_residency.attr,
+	NULL
+};
+
+static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 reg;
+
+	xe_device_assert_mem_access(pc_to_xe(pc));
+
+	if (xe->info.platform == XE_PVC)
+		reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg);
+	else
+		reg = xe_mmio_read32(gt, GEN6_RP_STATE_CAP.reg);
+	pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+	pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+}
+
+static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
+{
+	int ret;
+
+	lockdep_assert_held(&pc->freq_lock);
+
+	ret = pc_action_query_task_state(pc);
+	if (ret)
+		return ret;
+
+	/*
+	 * GuC defaults to some RPmax that is not actually achievable without
+	 * overclocking. Let's adjust it to the Hardware RP0, which is the
+	 * regular maximum
+	 */
+	if (pc_get_max_freq(pc) > pc->rp0_freq)
+		pc_set_max_freq(pc, pc->rp0_freq);
+
+	/*
+	 * Same thing happens for Server platforms where min is listed as
+	 * RPMax
+	 */
+	if (pc_get_min_freq(pc) > pc->rp0_freq)
+		pc_set_min_freq(pc, pc->rp0_freq);
+
+	return 0;
+}
+
+static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
+{
+	int ret = 0;
+
+	lockdep_assert_held(&pc->freq_lock);
+
+	if (pc->user_requested_min != 0) {
+		ret = pc_set_min_freq(pc, pc->user_requested_min);
+		if (ret)
+			return ret;
+	}
+
+	if (pc->user_requested_max != 0) {
+		ret = pc_set_max_freq(pc, pc->user_requested_max);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+static int pc_gucrc_disable(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	int ret;
+
+	xe_device_assert_mem_access(pc_to_xe(pc));
+
+	ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL);
+	if (ret)
+		return ret;
+
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		return ret;
+
+	xe_mmio_write32(gt, GEN9_PG_ENABLE.reg, 0);
+	xe_mmio_write32(gt, GEN6_RC_CONTROL.reg, 0);
+	xe_mmio_write32(gt, GEN6_RC_STATE.reg, 0);
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	return 0;
+}
+
+static void pc_init_pcode_freq(struct xe_guc_pc *pc)
+{
+	u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER);
+	u32 max = DIV_ROUND_CLOSEST(pc->rp0_freq, GT_FREQUENCY_MULTIPLIER);
+
+	XE_WARN_ON(xe_pcode_init_min_freq_table(pc_to_gt(pc), min, max));
+}
+
+static int pc_init_freqs(struct xe_guc_pc *pc)
+{
+	int ret;
+
+	mutex_lock(&pc->freq_lock);
+
+	ret = pc_adjust_freq_bounds(pc);
+	if (ret)
+		goto out;
+
+	ret = pc_adjust_requested_freq(pc);
+	if (ret)
+		goto out;
+
+	pc_update_rp_values(pc);
+
+	pc_init_pcode_freq(pc);
+
+	/*
+	 * The frequencies are really ready for use only after the user
+	 * requested ones got restored.
+	 */
+	pc->freq_ready = true;
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	return ret;
+}
+
+/**
+ * xe_guc_pc_start - Start GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_start(struct xe_guc_pc *pc)
+{
+	struct xe_device *xe = pc_to_xe(pc);
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+	int ret;
+
+	XE_WARN_ON(!xe_device_guc_submission_enabled(xe));
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+
+	memset(pc->bo->vmap.vaddr, 0, size);
+	slpc_shared_data_write(pc, header.size, size);
+
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		return ret;
+
+	ret = pc_action_reset(pc);
+	if (ret)
+		goto out;
+
+	if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING), 5)) {
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC Start failed\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	ret = pc_init_freqs(pc);
+	if (ret)
+		goto out;
+
+	if (xe->info.platform == XE_PVC) {
+		pc_gucrc_disable(pc);
+		ret = 0;
+		goto out;
+	}
+
+	ret = pc_action_setup_gucrc(pc, XE_GUCRC_FIRMWARE_CONTROL);
+
+out:
+	xe_device_mem_access_put(pc_to_xe(pc));
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	return ret;
+}
+
+/**
+ * xe_guc_pc_stop - Stop GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_stop(struct xe_guc_pc *pc)
+{
+	int ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+
+	ret = pc_gucrc_disable(pc);
+	if (ret)
+		goto out;
+
+	mutex_lock(&pc->freq_lock);
+	pc->freq_ready = false;
+	mutex_unlock(&pc->freq_lock);
+
+	ret = pc_action_shutdown(pc);
+	if (ret)
+		goto out;
+
+	if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING), 5)) {
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n");
+		ret = -EIO;
+	}
+
+out:
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+static void pc_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_pc *pc = arg;
+
+	XE_WARN_ON(xe_guc_pc_stop(pc));
+	sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs);
+	xe_bo_unpin_map_no_vm(pc->bo);
+}
+
+/**
+ * xe_guc_pc_init - Initialize GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_init(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *bo;
+	u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+	int err;
+
+	mutex_init(&pc->freq_lock);
+
+	bo = xe_bo_create_pin_map(xe, gt, NULL, size,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_GGTT_BIT);
+
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	pc->bo = bo;
+
+	pc_init_fused_rp_values(pc);
+
+	err = sysfs_create_files(gt->sysfs, pc_attrs);
+	if (err)
+		return err;
+
+	err = drmm_add_action_or_reset(&xe->drm, pc_fini, pc);
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
new file mode 100644
index 000000000000..da29e4934868
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_PC_H_
+#define _XE_GUC_PC_H_
+
+#include "xe_guc_pc_types.h"
+
+int xe_guc_pc_init(struct xe_guc_pc *pc);
+int xe_guc_pc_start(struct xe_guc_pc *pc);
+int xe_guc_pc_stop(struct xe_guc_pc *pc);
+
+#endif /* _XE_GUC_PC_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
new file mode 100644
index 000000000000..39548e03acf4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_PC_TYPES_H_
+#define _XE_GUC_PC_TYPES_H_
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+
+/**
+ * struct xe_guc_pc - GuC Power Conservation (PC)
+ */
+struct xe_guc_pc {
+	/** @bo: GGTT buffer object that is shared with GuC PC */
+	struct xe_bo *bo;
+	/** @rp0_freq: HW RP0 frequency - The Maximum one */
+	u32 rp0_freq;
+	/** @rpe_freq: HW RPe frequency - The Efficient one */
+	u32 rpe_freq;
+	/** @rpn_freq: HW RPN frequency - The Minimum one */
+	u32 rpn_freq;
+	/** @user_requested_min: Stash the minimum requested freq by user */
+	u32 user_requested_min;
+	/** @user_requested_max: Stash the maximum requested freq by user */
+	u32 user_requested_max;
+	/** @freq_lock: Let's protect the frequencies */
+	struct mutex freq_lock;
+	/** @freq_ready: Only handle freq changes, if they are really ready */
+	bool freq_ready;
+};
+
+#endif	/* _XE_GUC_PC_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_reg.h b/drivers/gpu/drm/xe/xe_guc_reg.h
new file mode 100644
index 000000000000..1e16a9b76ddc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_reg.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_REG_H_
+#define _XE_GUC_REG_H_
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include "i915_reg.h"
+
+/* Definitions of GuC H/W registers, bits, etc */
+
+#define GUC_STATUS			_MMIO(0xc000)
+#define   GS_RESET_SHIFT		0
+#define   GS_MIA_IN_RESET		  (0x01 << GS_RESET_SHIFT)
+#define   GS_BOOTROM_SHIFT		1
+#define   GS_BOOTROM_MASK		  (0x7F << GS_BOOTROM_SHIFT)
+#define   GS_BOOTROM_RSA_FAILED		  (0x50 << GS_BOOTROM_SHIFT)
+#define   GS_BOOTROM_JUMP_PASSED	  (0x76 << GS_BOOTROM_SHIFT)
+#define   GS_UKERNEL_SHIFT		8
+#define   GS_UKERNEL_MASK		  (0xFF << GS_UKERNEL_SHIFT)
+#define   GS_MIA_SHIFT			16
+#define   GS_MIA_MASK			  (0x07 << GS_MIA_SHIFT)
+#define   GS_MIA_CORE_STATE		  (0x01 << GS_MIA_SHIFT)
+#define   GS_MIA_HALT_REQUESTED		  (0x02 << GS_MIA_SHIFT)
+#define   GS_MIA_ISR_ENTRY		  (0x04 << GS_MIA_SHIFT)
+#define   GS_AUTH_STATUS_SHIFT		30
+#define   GS_AUTH_STATUS_MASK		  (0x03 << GS_AUTH_STATUS_SHIFT)
+#define   GS_AUTH_STATUS_BAD		  (0x01 << GS_AUTH_STATUS_SHIFT)
+#define   GS_AUTH_STATUS_GOOD		  (0x02 << GS_AUTH_STATUS_SHIFT)
+
+#define SOFT_SCRATCH(n)			_MMIO(0xc180 + (n) * 4)
+#define SOFT_SCRATCH_COUNT		16
+
+#define GEN11_SOFT_SCRATCH(n)		_MMIO(0x190240 + (n) * 4)
+#define GEN11_SOFT_SCRATCH_COUNT	4
+
+#define UOS_RSA_SCRATCH(i)		_MMIO(0xc200 + (i) * 4)
+#define UOS_RSA_SCRATCH_COUNT		64
+
+#define DMA_ADDR_0_LOW			_MMIO(0xc300)
+#define DMA_ADDR_0_HIGH			_MMIO(0xc304)
+#define DMA_ADDR_1_LOW			_MMIO(0xc308)
+#define DMA_ADDR_1_HIGH			_MMIO(0xc30c)
+#define   DMA_ADDRESS_SPACE_WOPCM	  (7 << 16)
+#define   DMA_ADDRESS_SPACE_GTT		  (8 << 16)
+#define DMA_COPY_SIZE			_MMIO(0xc310)
+#define DMA_CTRL			_MMIO(0xc314)
+#define   HUC_UKERNEL			  (1<<9)
+#define   UOS_MOVE			  (1<<4)
+#define   START_DMA			  (1<<0)
+#define DMA_GUC_WOPCM_OFFSET		_MMIO(0xc340)
+#define   GUC_WOPCM_OFFSET_VALID	  (1<<0)
+#define   HUC_LOADING_AGENT_VCR		  (0<<1)
+#define   HUC_LOADING_AGENT_GUC		  (1<<1)
+#define   GUC_WOPCM_OFFSET_SHIFT	14
+#define   GUC_WOPCM_OFFSET_MASK		  (0x3ffff << GUC_WOPCM_OFFSET_SHIFT)
+#define GUC_MAX_IDLE_COUNT		_MMIO(0xC3E4)
+
+#define HUC_STATUS2             _MMIO(0xD3B0)
+#define   HUC_FW_VERIFIED       (1<<7)
+
+#define GEN11_HUC_KERNEL_LOAD_INFO	_MMIO(0xC1DC)
+#define   HUC_LOAD_SUCCESSFUL		  (1 << 0)
+
+#define GUC_WOPCM_SIZE			_MMIO(0xc050)
+#define   GUC_WOPCM_SIZE_LOCKED		  (1<<0)
+#define   GUC_WOPCM_SIZE_SHIFT		12
+#define   GUC_WOPCM_SIZE_MASK		  (0xfffff << GUC_WOPCM_SIZE_SHIFT)
+
+#define GEN8_GT_PM_CONFIG		_MMIO(0x138140)
+#define GEN9LP_GT_PM_CONFIG		_MMIO(0x138140)
+#define GEN9_GT_PM_CONFIG		_MMIO(0x13816c)
+#define   GT_DOORBELL_ENABLE		  (1<<0)
+
+#define GEN8_GTCR			_MMIO(0x4274)
+#define   GEN8_GTCR_INVALIDATE		  (1<<0)
+
+#define GEN12_GUC_TLB_INV_CR		_MMIO(0xcee8)
+#define   GEN12_GUC_TLB_INV_CR_INVALIDATE	(1 << 0)
+
+#define GUC_ARAT_C6DIS			_MMIO(0xA178)
+
+#define GUC_SHIM_CONTROL		_MMIO(0xc064)
+#define   GUC_DISABLE_SRAM_INIT_TO_ZEROES	(1<<0)
+#define   GUC_ENABLE_READ_CACHE_LOGIC		(1<<1)
+#define   GUC_ENABLE_MIA_CACHING		(1<<2)
+#define   GUC_GEN10_MSGCH_ENABLE		(1<<4)
+#define   GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA	(1<<9)
+#define   GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA	(1<<10)
+#define   GUC_ENABLE_MIA_CLOCK_GATING		(1<<15)
+#define   GUC_GEN10_SHIM_WC_ENABLE		(1<<21)
+
+#define GUC_SEND_INTERRUPT		_MMIO(0xc4c8)
+#define   GUC_SEND_TRIGGER		  (1<<0)
+#define GEN11_GUC_HOST_INTERRUPT	_MMIO(0x1901f0)
+
+#define GUC_NUM_DOORBELLS		256
+
+/* format of the HW-monitored doorbell cacheline */
+struct guc_doorbell_info {
+	u32 db_status;
+#define GUC_DOORBELL_DISABLED		0
+#define GUC_DOORBELL_ENABLED		1
+
+	u32 cookie;
+	u32 reserved[14];
+} __packed;
+
+#define GEN8_DRBREGL(x)			_MMIO(0x1000 + (x) * 8)
+#define   GEN8_DRB_VALID		  (1<<0)
+#define GEN8_DRBREGU(x)			_MMIO(0x1000 + (x) * 8 + 4)
+
+#define GEN12_DIST_DBS_POPULATED		_MMIO(0xd08)
+#define   GEN12_DOORBELLS_PER_SQIDI_SHIFT	16
+#define   GEN12_DOORBELLS_PER_SQIDI		(0xff)
+#define   GEN12_SQIDIS_DOORBELL_EXIST		(0xffff)
+
+#define DE_GUCRMR			_MMIO(0x44054)
+
+#define GUC_BCS_RCS_IER			_MMIO(0xC550)
+#define GUC_VCS2_VCS1_IER		_MMIO(0xC554)
+#define GUC_WD_VECS_IER			_MMIO(0xC558)
+#define GUC_PM_P24C_IER			_MMIO(0xC55C)
+
+/* GuC Interrupt Vector */
+#define GUC_INTR_GUC2HOST		BIT(15)
+#define GUC_INTR_EXEC_ERROR		BIT(14)
+#define GUC_INTR_DISPLAY_EVENT		BIT(13)
+#define GUC_INTR_SEM_SIG		BIT(12)
+#define GUC_INTR_IOMMU2GUC		BIT(11)
+#define GUC_INTR_DOORBELL_RANG		BIT(10)
+#define GUC_INTR_DMA_DONE		BIT(9)
+#define GUC_INTR_FATAL_ERROR		BIT(8)
+#define GUC_INTR_NOTIF_ERROR		BIT(7)
+#define GUC_INTR_SW_INT_6		BIT(6)
+#define GUC_INTR_SW_INT_5		BIT(5)
+#define GUC_INTR_SW_INT_4		BIT(4)
+#define GUC_INTR_SW_INT_3		BIT(3)
+#define GUC_INTR_SW_INT_2		BIT(2)
+#define GUC_INTR_SW_INT_1		BIT(1)
+#define GUC_INTR_SW_INT_0		BIT(0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
new file mode 100644
index 000000000000..e0d424c2b78c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -0,0 +1,1695 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/dma-fence-array.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_engine_types.h"
+#include "xe_guc_submit.h"
+#include "xe_gt.h"
+#include "xe_force_wake.h"
+#include "xe_gpu_scheduler.h"
+#include "xe_hw_engine.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_mocs.h"
+#include "xe_ring_ops_types.h"
+#include "xe_sched_job.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+#include "gt/intel_lrc_reg.h"
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+	return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static struct xe_device *
+guc_to_xe(struct xe_guc *guc)
+{
+	return gt_to_xe(guc_to_gt(guc));
+}
+
+static struct xe_guc *
+engine_to_guc(struct xe_engine *e)
+{
+	return &e->gt->uc.guc;
+}
+
+/*
+ * Helpers for engine state, using an atomic as some of the bits can transition
+ * as the same time (e.g. a suspend can be happning at the same time as schedule
+ * engine done being processed).
+ */
+#define ENGINE_STATE_REGISTERED		(1 << 0)
+#define ENGINE_STATE_ENABLED		(1 << 1)
+#define ENGINE_STATE_PENDING_ENABLE	(1 << 2)
+#define ENGINE_STATE_PENDING_DISABLE	(1 << 3)
+#define ENGINE_STATE_DESTROYED		(1 << 4)
+#define ENGINE_STATE_SUSPENDED		(1 << 5)
+#define ENGINE_STATE_RESET		(1 << 6)
+#define ENGINE_STATE_KILLED		(1 << 7)
+
+static bool engine_registered(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_REGISTERED;
+}
+
+static void set_engine_registered(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_REGISTERED, &e->guc->state);
+}
+
+static void clear_engine_registered(struct xe_engine *e)
+{
+	atomic_and(~ENGINE_STATE_REGISTERED, &e->guc->state);
+}
+
+static bool engine_enabled(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_ENABLED;
+}
+
+static void set_engine_enabled(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_ENABLED, &e->guc->state);
+}
+
+static void clear_engine_enabled(struct xe_engine *e)
+{
+	atomic_and(~ENGINE_STATE_ENABLED, &e->guc->state);
+}
+
+static bool engine_pending_enable(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_ENABLE;
+}
+
+static void set_engine_pending_enable(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_PENDING_ENABLE, &e->guc->state);
+}
+
+static void clear_engine_pending_enable(struct xe_engine *e)
+{
+	atomic_and(~ENGINE_STATE_PENDING_ENABLE, &e->guc->state);
+}
+
+static bool engine_pending_disable(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_DISABLE;
+}
+
+static void set_engine_pending_disable(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_PENDING_DISABLE, &e->guc->state);
+}
+
+static void clear_engine_pending_disable(struct xe_engine *e)
+{
+	atomic_and(~ENGINE_STATE_PENDING_DISABLE, &e->guc->state);
+}
+
+static bool engine_destroyed(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_DESTROYED;
+}
+
+static void set_engine_destroyed(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_DESTROYED, &e->guc->state);
+}
+
+static bool engine_banned(struct xe_engine *e)
+{
+	return (e->flags & ENGINE_FLAG_BANNED);
+}
+
+static void set_engine_banned(struct xe_engine *e)
+{
+	e->flags |= ENGINE_FLAG_BANNED;
+}
+
+static bool engine_suspended(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_SUSPENDED;
+}
+
+static void set_engine_suspended(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_SUSPENDED, &e->guc->state);
+}
+
+static void clear_engine_suspended(struct xe_engine *e)
+{
+	atomic_and(~ENGINE_STATE_SUSPENDED, &e->guc->state);
+}
+
+static bool engine_reset(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_RESET;
+}
+
+static void set_engine_reset(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_RESET, &e->guc->state);
+}
+
+static bool engine_killed(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_KILLED;
+}
+
+static void set_engine_killed(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_KILLED, &e->guc->state);
+}
+
+static bool engine_killed_or_banned(struct xe_engine *e)
+{
+	return engine_killed(e) || engine_banned(e);
+}
+
+static void guc_submit_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc *guc = arg;
+
+	xa_destroy(&guc->submission_state.engine_lookup);
+	ida_destroy(&guc->submission_state.guc_ids);
+	bitmap_free(guc->submission_state.guc_ids_bitmap);
+}
+
+#define GUC_ID_MAX		65535
+#define GUC_ID_NUMBER_MLRC	4096
+#define GUC_ID_NUMBER_SLRC	(GUC_ID_MAX - GUC_ID_NUMBER_MLRC)
+#define GUC_ID_START_MLRC	GUC_ID_NUMBER_SLRC
+
+static const struct xe_engine_ops guc_engine_ops;
+
+static void primelockdep(struct xe_guc *guc)
+{
+	if (!IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+
+	fs_reclaim_acquire(GFP_KERNEL);
+
+	mutex_lock(&guc->submission_state.lock);
+	might_lock(&guc->submission_state.suspend.lock);
+	mutex_unlock(&guc->submission_state.lock);
+
+	fs_reclaim_release(GFP_KERNEL);
+}
+
+int xe_guc_submit_init(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	int err;
+
+	guc->submission_state.guc_ids_bitmap =
+		bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL);
+	if (!guc->submission_state.guc_ids_bitmap)
+		return -ENOMEM;
+
+	gt->engine_ops = &guc_engine_ops;
+
+	mutex_init(&guc->submission_state.lock);
+	xa_init(&guc->submission_state.engine_lookup);
+	ida_init(&guc->submission_state.guc_ids);
+
+	spin_lock_init(&guc->submission_state.suspend.lock);
+	guc->submission_state.suspend.context = dma_fence_context_alloc(1);
+
+	primelockdep(guc);
+
+	err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e)
+{
+	int ret;
+	void *ptr;
+
+	/*
+	 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
+	 * worse case user gets -ENOMEM on engine create and has to try again.
+	 *
+	 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
+	 * failure.
+	 */
+	lockdep_assert_held(&guc->submission_state.lock);
+
+	if (xe_engine_is_parallel(e)) {
+		void *bitmap = guc->submission_state.guc_ids_bitmap;
+
+		ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC,
+					      order_base_2(e->width));
+	} else {
+		ret = ida_simple_get(&guc->submission_state.guc_ids, 0,
+				     GUC_ID_NUMBER_SLRC, GFP_NOWAIT);
+	}
+	if (ret < 0)
+		return ret;
+
+	e->guc->id = ret;
+	if (xe_engine_is_parallel(e))
+		e->guc->id += GUC_ID_START_MLRC;
+
+	ptr = xa_store(&guc->submission_state.engine_lookup,
+		       e->guc->id, e, GFP_NOWAIT);
+	if (IS_ERR(ptr)) {
+		ret = PTR_ERR(ptr);
+		goto err_release;
+	}
+
+	return 0;
+
+err_release:
+	ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id);
+	return ret;
+}
+
+static void release_guc_id(struct xe_guc *guc, struct xe_engine *e)
+{
+	mutex_lock(&guc->submission_state.lock);
+	xa_erase(&guc->submission_state.engine_lookup, e->guc->id);
+	if (xe_engine_is_parallel(e))
+		bitmap_release_region(guc->submission_state.guc_ids_bitmap,
+				      e->guc->id - GUC_ID_START_MLRC,
+				      order_base_2(e->width));
+	else
+		ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id);
+	mutex_unlock(&guc->submission_state.lock);
+}
+
+struct engine_policy {
+	u32 count;
+	struct guc_update_engine_policy h2g;
+};
+
+static u32 __guc_engine_policy_action_size(struct engine_policy *policy)
+{
+	size_t bytes = sizeof(policy->h2g.header) +
+		       (sizeof(policy->h2g.klv[0]) * policy->count);
+
+	return bytes / sizeof(u32);
+}
+
+static void __guc_engine_policy_start_klv(struct engine_policy *policy,
+					  u16 guc_id)
+{
+	policy->h2g.header.action =
+		XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
+	policy->h2g.header.guc_id = guc_id;
+	policy->count = 0;
+}
+
+#define MAKE_ENGINE_POLICY_ADD(func, id) \
+static void __guc_engine_policy_add_##func(struct engine_policy *policy, \
+					   u32 data) \
+{ \
+	XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
+ \
+	policy->h2g.klv[policy->count].kl = \
+		FIELD_PREP(GUC_KLV_0_KEY, \
+			   GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
+		FIELD_PREP(GUC_KLV_0_LEN, 1); \
+	policy->h2g.klv[policy->count].value = data; \
+	policy->count++; \
+}
+
+MAKE_ENGINE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
+MAKE_ENGINE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
+MAKE_ENGINE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
+#undef MAKE_ENGINE_POLICY_ADD
+
+static const int xe_engine_prio_to_guc[] = {
+	[XE_ENGINE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
+	[XE_ENGINE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
+	[XE_ENGINE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
+	[XE_ENGINE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
+};
+
+static void init_policies(struct xe_guc *guc, struct xe_engine *e)
+{
+        struct engine_policy policy;
+	enum xe_engine_priority prio = e->priority;
+	u32 timeslice_us = e->sched_props.timeslice_us;
+	u32 preempt_timeout_us = e->sched_props.preempt_timeout_us;
+
+	XE_BUG_ON(!engine_registered(e));
+
+        __guc_engine_policy_start_klv(&policy, e->guc->id);
+        __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]);
+        __guc_engine_policy_add_execution_quantum(&policy, timeslice_us);
+        __guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us);
+
+	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
+		       __guc_engine_policy_action_size(&policy), 0, 0);
+}
+
+static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e)
+{
+	struct engine_policy policy;
+
+        __guc_engine_policy_start_klv(&policy, e->guc->id);
+        __guc_engine_policy_add_preemption_timeout(&policy, 1);
+
+	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
+		       __guc_engine_policy_action_size(&policy), 0, 0);
+}
+
+#define PARALLEL_SCRATCH_SIZE	2048
+#define WQ_SIZE			(PARALLEL_SCRATCH_SIZE / 2)
+#define WQ_OFFSET		(PARALLEL_SCRATCH_SIZE - WQ_SIZE)
+#define CACHELINE_BYTES		64
+
+struct sync_semaphore {
+	u32 semaphore;
+	u8 unused[CACHELINE_BYTES - sizeof(u32)];
+};
+
+struct parallel_scratch {
+	struct guc_sched_wq_desc wq_desc;
+
+	struct sync_semaphore go;
+	struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE];
+
+	u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
+		sizeof(struct sync_semaphore) * (XE_HW_ENGINE_MAX_INSTANCE + 1)];
+
+	u32 wq[WQ_SIZE / sizeof(u32)];
+};
+
+#define parallel_read(xe_, map_, field_) \
+	xe_map_rd_field(xe_, &map_, 0, struct parallel_scratch, field_)
+#define parallel_write(xe_, map_, field_, val_) \
+	xe_map_wr_field(xe_, &map_, 0, struct parallel_scratch, field_, val_)
+
+static void __register_mlrc_engine(struct xe_guc *guc,
+				   struct xe_engine *e,
+				   struct guc_ctxt_registration_info *info)
+{
+#define MAX_MLRC_REG_SIZE      (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
+	u32 action[MAX_MLRC_REG_SIZE];
+	int len = 0;
+	int i;
+
+	XE_BUG_ON(!xe_engine_is_parallel(e));
+
+	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
+	action[len++] = info->flags;
+	action[len++] = info->context_idx;
+	action[len++] = info->engine_class;
+	action[len++] = info->engine_submit_mask;
+	action[len++] = info->wq_desc_lo;
+	action[len++] = info->wq_desc_hi;
+	action[len++] = info->wq_base_lo;
+	action[len++] = info->wq_base_hi;
+	action[len++] = info->wq_size;
+	action[len++] = e->width;
+	action[len++] = info->hwlrca_lo;
+	action[len++] = info->hwlrca_hi;
+
+	for (i = 1; i < e->width; ++i) {
+		struct xe_lrc *lrc = e->lrc + i;
+
+		action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
+		action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
+	}
+
+	XE_BUG_ON(len > MAX_MLRC_REG_SIZE);
+#undef MAX_MLRC_REG_SIZE
+
+	xe_guc_ct_send(&guc->ct, action, len, 0, 0);
+}
+
+static void __register_engine(struct xe_guc *guc,
+			      struct guc_ctxt_registration_info *info)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_REGISTER_CONTEXT,
+		info->flags,
+		info->context_idx,
+		info->engine_class,
+		info->engine_submit_mask,
+		info->wq_desc_lo,
+		info->wq_desc_hi,
+		info->wq_base_lo,
+		info->wq_base_hi,
+		info->wq_size,
+		info->hwlrca_lo,
+		info->hwlrca_hi,
+	};
+
+	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+static void register_engine(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_lrc *lrc = e->lrc;
+	struct guc_ctxt_registration_info info;
+
+	XE_BUG_ON(engine_registered(e));
+
+	memset(&info, 0, sizeof(info));
+	info.context_idx = e->guc->id;
+	info.engine_class = xe_engine_class_to_guc_class(e->class);
+	info.engine_submit_mask = e->logical_mask;
+	info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
+	info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
+	info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
+
+	if (xe_engine_is_parallel(e)) {
+		u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
+		struct iosys_map map = xe_lrc_parallel_map(lrc);
+
+		info.wq_desc_lo = lower_32_bits(ggtt_addr +
+			offsetof(struct parallel_scratch, wq_desc));
+		info.wq_desc_hi = upper_32_bits(ggtt_addr +
+			offsetof(struct parallel_scratch, wq_desc));
+		info.wq_base_lo = lower_32_bits(ggtt_addr +
+			offsetof(struct parallel_scratch, wq[0]));
+		info.wq_base_hi = upper_32_bits(ggtt_addr +
+			offsetof(struct parallel_scratch, wq[0]));
+		info.wq_size = WQ_SIZE;
+
+		e->guc->wqi_head = 0;
+		e->guc->wqi_tail = 0;
+		xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
+		parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
+	}
+
+	set_engine_registered(e);
+	trace_xe_engine_register(e);
+	if (xe_engine_is_parallel(e))
+		__register_mlrc_engine(guc, e, &info);
+	else
+		__register_engine(guc, &info);
+	init_policies(guc, e);
+}
+
+static u32 wq_space_until_wrap(struct xe_engine *e)
+{
+	return (WQ_SIZE - e->guc->wqi_tail);
+}
+
+static int wq_wait_for_space(struct xe_engine *e, u32 wqi_size)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+	unsigned int sleep_period_ms = 1;
+
+#define AVAILABLE_SPACE \
+	CIRC_SPACE(e->guc->wqi_tail, e->guc->wqi_head, WQ_SIZE)
+	if (wqi_size > AVAILABLE_SPACE) {
+try_again:
+		e->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
+		if (wqi_size > AVAILABLE_SPACE) {
+			if (sleep_period_ms == 1024) {
+				xe_gt_reset_async(e->gt);
+				return -ENODEV;
+			}
+
+			msleep(sleep_period_ms);
+			sleep_period_ms <<= 1;
+			goto try_again;
+		}
+	}
+#undef AVAILABLE_SPACE
+
+	return 0;
+}
+
+static int wq_noop_append(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+	u32 len_dw = wq_space_until_wrap(e) / sizeof(u32) - 1;
+
+	if (wq_wait_for_space(e, wq_space_until_wrap(e)))
+		return -ENODEV;
+
+	XE_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
+
+	parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)],
+		       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
+		       FIELD_PREP(WQ_LEN_MASK, len_dw));
+	e->guc->wqi_tail = 0;
+
+	return 0;
+}
+
+static void wq_item_append(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + 3];
+	u32 wqi_size = (e->width + 3) * sizeof(u32);
+	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
+	int i = 0, j;
+
+	if (wqi_size > wq_space_until_wrap(e)) {
+		if (wq_noop_append(e))
+			return;
+	}
+	if (wq_wait_for_space(e, wqi_size))
+		return;
+
+	wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
+		FIELD_PREP(WQ_LEN_MASK, len_dw);
+	wqi[i++] = xe_lrc_descriptor(e->lrc);
+	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, e->guc->id) |
+		FIELD_PREP(WQ_RING_TAIL_MASK, e->lrc->ring.tail / sizeof(u64));
+	wqi[i++] = 0;
+	for (j = 1; j < e->width; ++j) {
+		struct xe_lrc *lrc = e->lrc + j;
+
+		wqi[i++] = lrc->ring.tail / sizeof(u64);
+	}
+
+	XE_BUG_ON(i != wqi_size / sizeof(u32));
+
+	iosys_map_incr(&map, offsetof(struct parallel_scratch,
+					wq[e->guc->wqi_tail / sizeof(u32)]));
+	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
+	e->guc->wqi_tail += wqi_size;
+	XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE);
+
+	xe_device_wmb(xe);
+
+	map = xe_lrc_parallel_map(e->lrc);
+	parallel_write(xe, map, wq_desc.tail, e->guc->wqi_tail);
+}
+
+#define RESUME_PENDING	~0x0ull
+static void submit_engine(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_lrc *lrc = e->lrc;
+	u32 action[3];
+	u32 g2h_len = 0;
+	u32 num_g2h = 0;
+	int len = 0;
+	bool extra_submit = false;
+
+	XE_BUG_ON(!engine_registered(e));
+
+	if (xe_engine_is_parallel(e))
+		wq_item_append(e);
+	else
+		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+
+	if (engine_suspended(e) && !xe_engine_is_parallel(e))
+		return;
+
+	if (!engine_enabled(e) && !engine_suspended(e)) {
+		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
+		action[len++] = e->guc->id;
+		action[len++] = GUC_CONTEXT_ENABLE;
+		g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
+		num_g2h = 1;
+		if (xe_engine_is_parallel(e))
+			extra_submit = true;
+
+		e->guc->resume_time = RESUME_PENDING;
+		set_engine_pending_enable(e);
+		set_engine_enabled(e);
+		trace_xe_engine_scheduling_enable(e);
+	} else {
+		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
+		action[len++] = e->guc->id;
+		trace_xe_engine_submit(e);
+	}
+
+	xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
+
+	if (extra_submit) {
+		len = 0;
+		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
+		action[len++] = e->guc->id;
+		trace_xe_engine_submit(e);
+
+		xe_guc_ct_send(&guc->ct, action, len, 0, 0);
+	}
+}
+
+static struct dma_fence *
+guc_engine_run_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+	struct xe_engine *e = job->engine;
+
+	XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) &&
+		  !engine_banned(e) && !engine_suspended(e));
+
+	trace_xe_sched_job_run(job);
+
+	if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) {
+		if (!engine_registered(e))
+			register_engine(e);
+		e->ring_ops->emit_job(job);
+		submit_engine(e);
+	}
+
+	if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags))
+		return job->fence;
+	else
+		return dma_fence_get(job->fence);
+}
+
+static void guc_engine_free_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+
+	trace_xe_sched_job_free(job);
+	xe_sched_job_put(job);
+}
+
+static int guc_read_stopped(struct xe_guc *guc)
+{
+	return atomic_read(&guc->submission_state.stopped);
+}
+
+#define MAKE_SCHED_CONTEXT_ACTION(e, enable_disable)			\
+	u32 action[] = {						\
+		XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET,			\
+		e->guc->id,						\
+		GUC_CONTEXT_##enable_disable,				\
+	}
+
+static void disable_scheduling_deregister(struct xe_guc *guc,
+					  struct xe_engine *e)
+{
+	MAKE_SCHED_CONTEXT_ACTION(e, DISABLE);
+	int ret;
+
+	set_min_preemption_timeout(guc, e);
+	smp_rmb();
+	ret = wait_event_timeout(guc->ct.wq, !engine_pending_enable(e) ||
+				 guc_read_stopped(guc), HZ * 5);
+	if (!ret) {
+		struct xe_gpu_scheduler *sched = &e->guc->sched;
+
+		XE_WARN_ON("Pending enable failed to respond");
+		xe_sched_submission_start(sched);
+		xe_gt_reset_async(e->gt);
+		xe_sched_tdr_queue_imm(sched);
+		return;
+	}
+
+	clear_engine_enabled(e);
+	set_engine_pending_disable(e);
+	set_engine_destroyed(e);
+	trace_xe_engine_scheduling_disable(e);
+
+	/*
+	 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
+	 * handler and we are not allowed to reserved G2H space in handlers.
+	 */
+	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
+		       G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
+}
+
+static void guc_engine_print(struct xe_engine *e, struct drm_printer *p);
+
+#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
+static void simple_error_capture(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+	struct drm_printer p = drm_err_printer("");
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 adj_logical_mask = e->logical_mask;
+	u32 width_mask = (0x1 << e->width) - 1;
+	int i;
+	bool cookie;
+
+	if (e->vm && !e->vm->error_capture.capture_once) {
+		e->vm->error_capture.capture_once = true;
+		cookie = dma_fence_begin_signalling();
+		for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
+			if (adj_logical_mask & BIT(i)) {
+				adj_logical_mask |= width_mask << i;
+				i += e->width;
+			} else {
+				++i;
+			}
+		}
+
+		xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+		xe_guc_ct_print(&guc->ct, &p);
+		guc_engine_print(e, &p);
+		for_each_hw_engine(hwe, guc_to_gt(guc), id) {
+			if (hwe->class != e->hwe->class ||
+			    !(BIT(hwe->logical_instance) & adj_logical_mask))
+				continue;
+			xe_hw_engine_print_state(hwe, &p);
+		}
+		xe_analyze_vm(&p, e->vm, e->gt->info.id);
+		xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+		dma_fence_end_signalling(cookie);
+	}
+}
+#else
+static void simple_error_capture(struct xe_engine *e)
+{
+}
+#endif
+
+static enum drm_gpu_sched_stat
+guc_engine_timedout_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+	struct xe_sched_job *tmp_job;
+	struct xe_engine *e = job->engine;
+	struct xe_gpu_scheduler *sched = &e->guc->sched;
+	struct xe_device *xe = guc_to_xe(engine_to_guc(e));
+	int err = -ETIME;
+	int i = 0;
+
+	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
+		XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL);
+		XE_WARN_ON(e->flags & ENGINE_FLAG_VM && !engine_killed(e));
+
+		drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
+			   xe_sched_job_seqno(job), e->guc->id, e->flags);
+		simple_error_capture(e);
+	} else {
+		drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
+			 xe_sched_job_seqno(job), e->guc->id, e->flags);
+	}
+	trace_xe_sched_job_timedout(job);
+
+	/* Kill the run_job entry point */
+	xe_sched_submission_stop(sched);
+
+	/*
+	 * Kernel jobs should never fail, nor should VM jobs if they do
+	 * somethings has gone wrong and the GT needs a reset
+	 */
+	if (e->flags & ENGINE_FLAG_KERNEL ||
+	    (e->flags & ENGINE_FLAG_VM && !engine_killed(e))) {
+		if (!xe_sched_invalidate_job(job, 2)) {
+			xe_sched_add_pending_job(sched, job);
+			xe_sched_submission_start(sched);
+			xe_gt_reset_async(e->gt);
+			goto out;
+		}
+	}
+
+	/* Engine state now stable, disable scheduling if needed */
+	if (engine_enabled(e)) {
+		struct xe_guc *guc = engine_to_guc(e);
+		int ret;
+
+		if (engine_reset(e))
+			err = -EIO;
+		set_engine_banned(e);
+		xe_engine_get(e);
+		disable_scheduling_deregister(engine_to_guc(e), e);
+
+		/*
+		 * Must wait for scheduling to be disabled before signalling
+		 * any fences, if GT broken the GT reset code should signal us.
+		 *
+		 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
+		 * error) messages which can cause the schedule disable to get
+		 * lost. If this occurs, trigger a GT reset to recover.
+		 */
+		smp_rmb();
+		ret = wait_event_timeout(guc->ct.wq,
+					 !engine_pending_disable(e) ||
+					 guc_read_stopped(guc), HZ * 5);
+		if (!ret) {
+			XE_WARN_ON("Schedule disable failed to respond");
+			xe_sched_add_pending_job(sched, job);
+			xe_sched_submission_start(sched);
+			xe_gt_reset_async(e->gt);
+			xe_sched_tdr_queue_imm(sched);
+			goto out;
+		}
+	}
+
+	/* Stop fence signaling */
+	xe_hw_fence_irq_stop(e->fence_irq);
+
+	/*
+	 * Fence state now stable, stop / start scheduler which cleans up any
+	 * fences that are complete
+	 */
+	xe_sched_add_pending_job(sched, job);
+	xe_sched_submission_start(sched);
+	xe_sched_tdr_queue_imm(&e->guc->sched);
+
+	/* Mark all outstanding jobs as bad, thus completing them */
+	spin_lock(&sched->base.job_list_lock);
+	list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
+		xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
+	spin_unlock(&sched->base.job_list_lock);
+
+	/* Start fence signaling */
+	xe_hw_fence_irq_start(e->fence_irq);
+
+out:
+	return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+static void __guc_engine_fini_async(struct work_struct *w)
+{
+	struct xe_guc_engine *ge =
+		container_of(w, struct xe_guc_engine, fini_async);
+	struct xe_engine *e = ge->engine;
+	struct xe_guc *guc = engine_to_guc(e);
+
+	trace_xe_engine_destroy(e);
+
+	if (e->flags & ENGINE_FLAG_PERSISTENT)
+		xe_device_remove_persitent_engines(gt_to_xe(e->gt), e);
+	release_guc_id(guc, e);
+	xe_sched_entity_fini(&ge->entity);
+	xe_sched_fini(&ge->sched);
+
+	if (!(e->flags & ENGINE_FLAG_KERNEL)) {
+		kfree(ge);
+		xe_engine_fini(e);
+	}
+}
+
+static void guc_engine_fini_async(struct xe_engine *e)
+{
+	bool kernel = e->flags & ENGINE_FLAG_KERNEL;
+
+	INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async);
+	queue_work(system_unbound_wq, &e->guc->fini_async);
+
+	/* We must block on kernel engines so slabs are empty on driver unload */
+	if (kernel) {
+		struct xe_guc_engine *ge = e->guc;
+
+		flush_work(&ge->fini_async);
+		kfree(ge);
+		xe_engine_fini(e);
+	}
+}
+
+static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e)
+{
+	/*
+	 * Might be done from within the GPU scheduler, need to do async as we
+	 * fini the scheduler when the engine is fini'd, the scheduler can't
+	 * complete fini within itself (circular dependency). Async resolves
+	 * this we and don't really care when everything is fini'd, just that it
+	 * is.
+	 */
+	guc_engine_fini_async(e);
+}
+
+static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg)
+{
+	struct xe_engine *e = msg->private_data;
+	struct xe_guc *guc = engine_to_guc(e);
+
+	XE_BUG_ON(e->flags & ENGINE_FLAG_KERNEL);
+	trace_xe_engine_cleanup_entity(e);
+
+	if (engine_registered(e))
+		disable_scheduling_deregister(guc, e);
+	else
+		__guc_engine_fini(guc, e);
+}
+
+static bool guc_engine_allowed_to_change_state(struct xe_engine *e)
+{
+	return !engine_killed_or_banned(e) && engine_registered(e);
+}
+
+static void __guc_engine_process_msg_set_sched_props(struct xe_sched_msg *msg)
+{
+	struct xe_engine *e = msg->private_data;
+	struct xe_guc *guc = engine_to_guc(e);
+
+	if (guc_engine_allowed_to_change_state(e))
+		init_policies(guc, e);
+	kfree(msg);
+}
+
+static void suspend_fence_signal(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+
+	XE_BUG_ON(!engine_suspended(e) && !engine_killed(e) &&
+		  !guc_read_stopped(guc));
+	XE_BUG_ON(!e->guc->suspend_pending);
+
+	e->guc->suspend_pending = false;
+	smp_wmb();
+	wake_up(&e->guc->suspend_wait);
+}
+
+static void __guc_engine_process_msg_suspend(struct xe_sched_msg *msg)
+{
+	struct xe_engine *e = msg->private_data;
+	struct xe_guc *guc = engine_to_guc(e);
+
+	if (guc_engine_allowed_to_change_state(e) && !engine_suspended(e) &&
+	    engine_enabled(e)) {
+		wait_event(guc->ct.wq, e->guc->resume_time != RESUME_PENDING ||
+			   guc_read_stopped(guc));
+
+		if (!guc_read_stopped(guc)) {
+			MAKE_SCHED_CONTEXT_ACTION(e, DISABLE);
+			s64 since_resume_ms =
+				ktime_ms_delta(ktime_get(),
+					       e->guc->resume_time);
+			s64 wait_ms = e->vm->preempt.min_run_period_ms -
+				since_resume_ms;
+
+			if (wait_ms > 0 && e->guc->resume_time)
+				msleep(wait_ms);
+
+			set_engine_suspended(e);
+			clear_engine_enabled(e);
+			set_engine_pending_disable(e);
+			trace_xe_engine_scheduling_disable(e);
+
+			xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+				       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
+		}
+	} else if (e->guc->suspend_pending) {
+		set_engine_suspended(e);
+		suspend_fence_signal(e);
+	}
+}
+
+static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg)
+{
+	struct xe_engine *e = msg->private_data;
+	struct xe_guc *guc = engine_to_guc(e);
+
+	if (guc_engine_allowed_to_change_state(e)) {
+		MAKE_SCHED_CONTEXT_ACTION(e, ENABLE);
+
+		e->guc->resume_time = RESUME_PENDING;
+		clear_engine_suspended(e);
+		set_engine_pending_enable(e);
+		set_engine_enabled(e);
+		trace_xe_engine_scheduling_enable(e);
+
+		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
+	} else {
+		clear_engine_suspended(e);
+	}
+}
+
+#define CLEANUP		1	/* Non-zero values to catch uninitialized msg */
+#define SET_SCHED_PROPS	2
+#define SUSPEND		3
+#define RESUME		4
+
+static void guc_engine_process_msg(struct xe_sched_msg *msg)
+{
+	trace_xe_sched_msg_recv(msg);
+
+	switch (msg->opcode) {
+	case CLEANUP:
+		__guc_engine_process_msg_cleanup(msg);
+		break;
+	case SET_SCHED_PROPS:
+		__guc_engine_process_msg_set_sched_props(msg);
+		break;
+	case SUSPEND:
+		__guc_engine_process_msg_suspend(msg);
+		break;
+	case RESUME:
+		__guc_engine_process_msg_resume(msg);
+		break;
+	default:
+		XE_BUG_ON("Unknown message type");
+	}
+}
+
+static const struct drm_sched_backend_ops drm_sched_ops = {
+	.run_job = guc_engine_run_job,
+	.free_job = guc_engine_free_job,
+	.timedout_job = guc_engine_timedout_job,
+};
+
+static const struct xe_sched_backend_ops xe_sched_ops = {
+	.process_msg = guc_engine_process_msg,
+};
+
+static int guc_engine_init(struct xe_engine *e)
+{
+	struct xe_gpu_scheduler *sched;
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_guc_engine *ge;
+	long timeout;
+	int err;
+
+	XE_BUG_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc)));
+
+	ge = kzalloc(sizeof(*ge), GFP_KERNEL);
+	if (!ge)
+		return -ENOMEM;
+
+	e->guc = ge;
+	ge->engine = e;
+	init_waitqueue_head(&ge->suspend_wait);
+
+	timeout = xe_vm_no_dma_fences(e->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5;
+	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL,
+			     e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
+			     64, timeout, guc_to_gt(guc)->ordered_wq, NULL,
+			     e->name, gt_to_xe(e->gt)->drm.dev);
+	if (err)
+		goto err_free;
+
+	sched = &ge->sched;
+	err = xe_sched_entity_init(&ge->entity, sched);
+	if (err)
+		goto err_sched;
+	e->priority = XE_ENGINE_PRIORITY_NORMAL;
+
+	mutex_lock(&guc->submission_state.lock);
+
+	err = alloc_guc_id(guc, e);
+	if (err)
+		goto err_entity;
+
+	e->entity = &ge->entity;
+
+	if (guc_read_stopped(guc))
+		xe_sched_stop(sched);
+
+	mutex_unlock(&guc->submission_state.lock);
+
+	switch (e->class) {
+	case XE_ENGINE_CLASS_RENDER:
+		sprintf(e->name, "rcs%d", e->guc->id);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+		sprintf(e->name, "vcs%d", e->guc->id);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		sprintf(e->name, "vecs%d", e->guc->id);
+		break;
+	case XE_ENGINE_CLASS_COPY:
+		sprintf(e->name, "bcs%d", e->guc->id);
+		break;
+	case XE_ENGINE_CLASS_COMPUTE:
+		sprintf(e->name, "ccs%d", e->guc->id);
+		break;
+	default:
+		XE_WARN_ON(e->class);
+	}
+
+	trace_xe_engine_create(e);
+
+	return 0;
+
+err_entity:
+	xe_sched_entity_fini(&ge->entity);
+err_sched:
+	xe_sched_fini(&ge->sched);
+err_free:
+	kfree(ge);
+
+	return err;
+}
+
+static void guc_engine_kill(struct xe_engine *e)
+{
+	trace_xe_engine_kill(e);
+	set_engine_killed(e);
+	xe_sched_tdr_queue_imm(&e->guc->sched);
+}
+
+static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg,
+			       u32 opcode)
+{
+	INIT_LIST_HEAD(&msg->link);
+	msg->opcode = opcode;
+	msg->private_data = e;
+
+	trace_xe_sched_msg_add(msg);
+	xe_sched_add_msg(&e->guc->sched, msg);
+}
+
+#define STATIC_MSG_CLEANUP	0
+#define STATIC_MSG_SUSPEND	1
+#define STATIC_MSG_RESUME	2
+static void guc_engine_fini(struct xe_engine *e)
+{
+	struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_CLEANUP;
+
+	if (!(e->flags & ENGINE_FLAG_KERNEL))
+		guc_engine_add_msg(e, msg, CLEANUP);
+	else
+		__guc_engine_fini(engine_to_guc(e), e);
+}
+
+static int guc_engine_set_priority(struct xe_engine *e,
+				   enum xe_engine_priority priority)
+{
+	struct xe_sched_msg *msg;
+
+	if (e->priority == priority || engine_killed_or_banned(e))
+		return 0;
+
+	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
+	e->priority = priority;
+
+	return 0;
+}
+
+static int guc_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us)
+{
+	struct xe_sched_msg *msg;
+
+	if (e->sched_props.timeslice_us == timeslice_us ||
+	    engine_killed_or_banned(e))
+		return 0;
+
+	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	e->sched_props.timeslice_us = timeslice_us;
+	guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
+
+	return 0;
+}
+
+static int guc_engine_set_preempt_timeout(struct xe_engine *e,
+					  u32 preempt_timeout_us)
+{
+	struct xe_sched_msg *msg;
+
+	if (e->sched_props.preempt_timeout_us == preempt_timeout_us ||
+	    engine_killed_or_banned(e))
+		return 0;
+
+	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	e->sched_props.preempt_timeout_us = preempt_timeout_us;
+	guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
+
+	return 0;
+}
+
+static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms)
+{
+	struct xe_gpu_scheduler *sched = &e->guc->sched;
+
+	XE_BUG_ON(engine_registered(e));
+	XE_BUG_ON(engine_banned(e));
+	XE_BUG_ON(engine_killed(e));
+
+	sched->base.timeout = job_timeout_ms;
+
+	return 0;
+}
+
+static int guc_engine_suspend(struct xe_engine *e)
+{
+	struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_SUSPEND;
+
+	if (engine_killed_or_banned(e) || e->guc->suspend_pending)
+		return -EINVAL;
+
+	e->guc->suspend_pending = true;
+	guc_engine_add_msg(e, msg, SUSPEND);
+
+	return 0;
+}
+
+static void guc_engine_suspend_wait(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+
+	wait_event(e->guc->suspend_wait, !e->guc->suspend_pending ||
+		   guc_read_stopped(guc));
+}
+
+static void guc_engine_resume(struct xe_engine *e)
+{
+	struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME;
+
+	XE_BUG_ON(e->guc->suspend_pending);
+
+	xe_mocs_init_engine(e);
+	guc_engine_add_msg(e, msg, RESUME);
+}
+
+/*
+ * All of these functions are an abstraction layer which other parts of XE can
+ * use to trap into the GuC backend. All of these functions, aside from init,
+ * really shouldn't do much other than trap into the DRM scheduler which
+ * synchronizes these operations.
+ */
+static const struct xe_engine_ops guc_engine_ops = {
+	.init = guc_engine_init,
+	.kill = guc_engine_kill,
+	.fini = guc_engine_fini,
+	.set_priority = guc_engine_set_priority,
+	.set_timeslice = guc_engine_set_timeslice,
+	.set_preempt_timeout = guc_engine_set_preempt_timeout,
+	.set_job_timeout = guc_engine_set_job_timeout,
+	.suspend = guc_engine_suspend,
+	.suspend_wait = guc_engine_suspend_wait,
+	.resume = guc_engine_resume,
+};
+
+static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e)
+{
+	struct xe_gpu_scheduler *sched = &e->guc->sched;
+
+	/* Stop scheduling + flush any DRM scheduler operations */
+	xe_sched_submission_stop(sched);
+
+	/* Clean up lost G2H + reset engine state */
+	if (engine_destroyed(e) && engine_registered(e)) {
+		if (engine_banned(e))
+			xe_engine_put(e);
+		else
+			__guc_engine_fini(guc, e);
+	}
+	if (e->guc->suspend_pending) {
+		set_engine_suspended(e);
+		suspend_fence_signal(e);
+	}
+	atomic_and(ENGINE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED,
+		   &e->guc->state);
+	e->guc->resume_time = 0;
+	trace_xe_engine_stop(e);
+
+	/*
+	 * Ban any engine (aside from kernel and engines used for VM ops) with a
+	 * started but not complete job or if a job has gone through a GT reset
+	 * more than twice.
+	 */
+	if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM))) {
+		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
+
+		if (job) {
+			if ((xe_sched_job_started(job) &&
+			    !xe_sched_job_completed(job)) ||
+			    xe_sched_invalidate_job(job, 2)) {
+				trace_xe_sched_job_ban(job);
+				xe_sched_tdr_queue_imm(&e->guc->sched);
+				set_engine_banned(e);
+			}
+		}
+	}
+}
+
+int xe_guc_submit_reset_prepare(struct xe_guc *guc)
+{
+	int ret;
+
+	/*
+	 * Using an atomic here rather than submission_state.lock as this
+	 * function can be called while holding the CT lock (engine reset
+	 * failure). submission_state.lock needs the CT lock to resubmit jobs.
+	 * Atomic is not ideal, but it works to prevent against concurrent reset
+	 * and releasing any TDRs waiting on guc->submission_state.stopped.
+	 */
+	ret = atomic_fetch_or(1, &guc->submission_state.stopped);
+	smp_wmb();
+	wake_up_all(&guc->ct.wq);
+
+	return ret;
+}
+
+void xe_guc_submit_reset_wait(struct xe_guc *guc)
+{
+	wait_event(guc->ct.wq, !guc_read_stopped(guc));
+}
+
+int xe_guc_submit_stop(struct xe_guc *guc)
+{
+	struct xe_engine *e;
+	unsigned long index;
+
+	XE_BUG_ON(guc_read_stopped(guc) != 1);
+
+	mutex_lock(&guc->submission_state.lock);
+
+	xa_for_each(&guc->submission_state.engine_lookup, index, e)
+		guc_engine_stop(guc, e);
+
+	mutex_unlock(&guc->submission_state.lock);
+
+	/*
+	 * No one can enter the backend at this point, aside from new engine
+	 * creation which is protected by guc->submission_state.lock.
+	 */
+
+	return 0;
+}
+
+static void guc_engine_start(struct xe_engine *e)
+{
+	struct xe_gpu_scheduler *sched = &e->guc->sched;
+
+	if (!engine_killed_or_banned(e)) {
+		int i;
+
+		trace_xe_engine_resubmit(e);
+		for (i = 0; i < e->width; ++i)
+			xe_lrc_set_ring_head(e->lrc + i, e->lrc[i].ring.tail);
+		xe_sched_resubmit_jobs(sched);
+	}
+
+	xe_sched_submission_start(sched);
+}
+
+int xe_guc_submit_start(struct xe_guc *guc)
+{
+	struct xe_engine *e;
+	unsigned long index;
+
+	XE_BUG_ON(guc_read_stopped(guc) != 1);
+
+	mutex_lock(&guc->submission_state.lock);
+	atomic_dec(&guc->submission_state.stopped);
+	xa_for_each(&guc->submission_state.engine_lookup, index, e)
+		guc_engine_start(e);
+	mutex_unlock(&guc->submission_state.lock);
+
+	wake_up_all(&guc->ct.wq);
+
+	return 0;
+}
+
+static struct xe_engine *
+g2h_engine_lookup(struct xe_guc *guc, u32 guc_id)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_engine *e;
+
+	if (unlikely(guc_id >= GUC_ID_MAX)) {
+		drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
+		return NULL;
+	}
+
+	e = xa_load(&guc->submission_state.engine_lookup, guc_id);
+	if (unlikely(!e)) {
+		drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
+		return NULL;
+	}
+
+	XE_BUG_ON(e->guc->id != guc_id);
+
+	return e;
+}
+
+static void deregister_engine(struct xe_guc *guc, struct xe_engine *e)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_DEREGISTER_CONTEXT,
+		e->guc->id,
+	};
+
+	trace_xe_engine_deregister(e);
+
+	xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_engine *e;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 2)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	e = g2h_engine_lookup(guc, guc_id);
+	if (unlikely(!e))
+		return -EPROTO;
+
+	if (unlikely(!engine_pending_enable(e) &&
+		     !engine_pending_disable(e))) {
+		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
+			atomic_read(&e->guc->state));
+		return -EPROTO;
+	}
+
+	trace_xe_engine_scheduling_done(e);
+
+	if (engine_pending_enable(e)) {
+		e->guc->resume_time = ktime_get();
+		clear_engine_pending_enable(e);
+		smp_wmb();
+		wake_up_all(&guc->ct.wq);
+	} else {
+		clear_engine_pending_disable(e);
+		if (e->guc->suspend_pending) {
+			suspend_fence_signal(e);
+		} else {
+			if (engine_banned(e)) {
+				smp_wmb();
+				wake_up_all(&guc->ct.wq);
+			}
+			deregister_engine(guc, e);
+		}
+	}
+
+	return 0;
+}
+
+int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_engine *e;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 1)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	e = g2h_engine_lookup(guc, guc_id);
+	if (unlikely(!e))
+		return -EPROTO;
+
+	if (!engine_destroyed(e) || engine_pending_disable(e) ||
+	    engine_pending_enable(e) || engine_enabled(e)) {
+		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
+			atomic_read(&e->guc->state));
+		return -EPROTO;
+	}
+
+	trace_xe_engine_deregister_done(e);
+
+	clear_engine_registered(e);
+	if (engine_banned(e))
+		xe_engine_put(e);
+	else
+		__guc_engine_fini(guc, e);
+
+	return 0;
+}
+
+int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_engine *e;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 1)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	e = g2h_engine_lookup(guc, guc_id);
+	if (unlikely(!e))
+		return -EPROTO;
+
+	drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id);
+
+	/* FIXME: Do error capture, most likely async */
+
+	trace_xe_engine_reset(e);
+
+	/*
+	 * A banned engine is a NOP at this point (came from
+	 * guc_engine_timedout_job). Otherwise, kick drm scheduler to cancel
+	 * jobs by setting timeout of the job to the minimum value kicking
+	 * guc_engine_timedout_job.
+	 */
+	set_engine_reset(e);
+	if (!engine_banned(e))
+		xe_sched_tdr_queue_imm(&e->guc->sched);
+
+	return 0;
+}
+
+int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
+					   u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_engine *e;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 1)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	e = g2h_engine_lookup(guc, guc_id);
+	if (unlikely(!e))
+		return -EPROTO;
+
+	drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id);
+	trace_xe_engine_memory_cat_error(e);
+
+	/* Treat the same as engine reset */
+	set_engine_reset(e);
+	if (!engine_banned(e))
+		xe_sched_tdr_queue_imm(&e->guc->sched);
+
+	return 0;
+}
+
+int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u8 guc_class, instance;
+	u32 reason;
+
+	if (unlikely(len != 3)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	guc_class = msg[0];
+	instance = msg[1];
+	reason = msg[2];
+
+	/* Unexpected failure of a hardware feature, log an actual error */
+	drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X",
+		guc_class, instance, reason);
+
+	xe_gt_reset_async(guc_to_gt(guc));
+
+	return 0;
+}
+
+static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+	int i;
+
+	drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
+		   e->guc->wqi_head, parallel_read(xe, map, wq_desc.head));
+	drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
+		   e->guc->wqi_tail, parallel_read(xe, map, wq_desc.tail));
+	drm_printf(p, "\tWQ status: %u\n",
+		   parallel_read(xe, map, wq_desc.wq_status));
+	if (parallel_read(xe, map, wq_desc.head) !=
+	    parallel_read(xe, map, wq_desc.tail)) {
+		for (i = parallel_read(xe, map, wq_desc.head);
+		     i != parallel_read(xe, map, wq_desc.tail);
+		     i = (i + sizeof(u32)) % WQ_SIZE)
+			drm_printf(p, "\tWQ[%ld]: 0x%08x\n", i / sizeof(u32),
+				   parallel_read(xe, map, wq[i / sizeof(u32)]));
+	}
+}
+
+static void guc_engine_print(struct xe_engine *e, struct drm_printer *p)
+{
+	struct xe_gpu_scheduler *sched = &e->guc->sched;
+	struct xe_sched_job *job;
+	int i;
+
+	drm_printf(p, "\nGuC ID: %d\n", e->guc->id);
+	drm_printf(p, "\tName: %s\n", e->name);
+	drm_printf(p, "\tClass: %d\n", e->class);
+	drm_printf(p, "\tLogical mask: 0x%x\n", e->logical_mask);
+	drm_printf(p, "\tWidth: %d\n", e->width);
+	drm_printf(p, "\tRef: %d\n", kref_read(&e->refcount));
+	drm_printf(p, "\tTimeout: %ld (ms)\n", sched->base.timeout);
+	drm_printf(p, "\tTimeslice: %u (us)\n", e->sched_props.timeslice_us);
+	drm_printf(p, "\tPreempt timeout: %u (us)\n",
+		   e->sched_props.preempt_timeout_us);
+	for (i = 0; i < e->width; ++i ) {
+		struct xe_lrc *lrc = e->lrc + i;
+
+		drm_printf(p, "\tHW Context Desc: 0x%08x\n",
+			   lower_32_bits(xe_lrc_ggtt_addr(lrc)));
+		drm_printf(p, "\tLRC Head: (memory) %u\n",
+			   xe_lrc_ring_head(lrc));
+		drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
+			   lrc->ring.tail,
+			   xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL));
+		drm_printf(p, "\tStart seqno: (memory) %d\n",
+			   xe_lrc_start_seqno(lrc));
+		drm_printf(p, "\tSeqno: (memory) %d\n", xe_lrc_seqno(lrc));
+	}
+	drm_printf(p, "\tSchedule State: 0x%x\n", atomic_read(&e->guc->state));
+	drm_printf(p, "\tFlags: 0x%lx\n", e->flags);
+	if (xe_engine_is_parallel(e))
+		guc_engine_wq_print(e, p);
+
+	spin_lock(&sched->base.job_list_lock);
+	list_for_each_entry(job, &sched->base.pending_list, drm.list)
+		drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
+			   xe_sched_job_seqno(job),
+			   dma_fence_is_signaled(job->fence) ? 1 : 0,
+			   dma_fence_is_signaled(&job->drm.s_fence->finished) ?
+			   1 : 0);
+	spin_unlock(&sched->base.job_list_lock);
+}
+
+void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
+{
+	struct xe_engine *e;
+	unsigned long index;
+
+	if (!xe_device_guc_submission_enabled(guc_to_xe(guc)))
+		return;
+
+	mutex_lock(&guc->submission_state.lock);
+	xa_for_each(&guc->submission_state.engine_lookup, index, e)
+		guc_engine_print(e, p);
+	mutex_unlock(&guc->submission_state.lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
new file mode 100644
index 000000000000..8002734d6f24
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_SUBMIT_H_
+#define _XE_GUC_SUBMIT_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_engine;
+struct xe_guc;
+
+int xe_guc_submit_init(struct xe_guc *guc);
+void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
+
+int xe_guc_submit_reset_prepare(struct xe_guc *guc);
+void xe_guc_submit_reset_wait(struct xe_guc *guc);
+int xe_guc_submit_stop(struct xe_guc *guc);
+int xe_guc_submit_start(struct xe_guc *guc);
+
+int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
+					   u32 len);
+int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
new file mode 100644
index 000000000000..ca177853cc12
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_TYPES_H_
+#define _XE_GUC_TYPES_H_
+
+#include <linux/idr.h>
+#include <linux/xarray.h>
+
+#include "xe_guc_ads_types.h"
+#include "xe_guc_ct_types.h"
+#include "xe_guc_fwif.h"
+#include "xe_guc_log_types.h"
+#include "xe_guc_pc_types.h"
+#include "xe_uc_fw_types.h"
+
+/**
+ * struct xe_guc - Graphic micro controller
+ */
+struct xe_guc {
+	/** @fw: Generic uC firmware management */
+	struct xe_uc_fw fw;
+	/** @log: GuC log */
+	struct xe_guc_log log;
+	/** @ads: GuC ads */
+	struct xe_guc_ads ads;
+	/** @ct: GuC ct */
+	struct xe_guc_ct ct;
+	/** @pc: GuC Power Conservation */
+	struct xe_guc_pc pc;
+	/** @submission_state: GuC submission state */
+	struct {
+		/** @engine_lookup: Lookup an xe_engine from guc_id */
+		struct xarray engine_lookup;
+		/** @guc_ids: used to allocate new guc_ids, single-lrc */
+		struct ida guc_ids;
+		/** @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */
+		unsigned long *guc_ids_bitmap;
+		/** @stopped: submissions are stopped */
+		atomic_t stopped;
+		/** @lock: protects submission state */
+		struct mutex lock;
+		/** @suspend: suspend fence state */
+		struct {
+			/** @lock: suspend fences lock */
+			spinlock_t lock;
+			/** @context: suspend fences context */
+			u64 context;
+			/** @seqno: suspend fences seqno */
+			u32 seqno;
+		} suspend;
+	} submission_state;
+	/** @hwconfig: Hardware config state */
+	struct {
+		/** @bo: buffer object of the hardware config */
+		struct xe_bo *bo;
+		/** @size: size of the hardware config */
+		u32 size;
+	} hwconfig;
+
+	/**
+	 * @notify_reg: Register which is written to notify GuC of H2G messages
+	 */
+	u32 notify_reg;
+	/** @params: Control params for fw initialization */
+	u32 params[GUC_CTL_MAX_DWORDS];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
new file mode 100644
index 000000000000..93b22fac6e14
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_reg.h"
+#include "xe_huc.h"
+#include "xe_mmio.h"
+#include "xe_uc_fw.h"
+
+static struct xe_gt *
+huc_to_gt(struct xe_huc *huc)
+{
+	return container_of(huc, struct xe_gt, uc.huc);
+}
+
+static struct xe_device *
+huc_to_xe(struct xe_huc *huc)
+{
+	return gt_to_xe(huc_to_gt(huc));
+}
+
+static struct xe_guc *
+huc_to_guc(struct xe_huc *huc)
+{
+	return &container_of(huc, struct xe_uc, huc)->guc;
+}
+
+int xe_huc_init(struct xe_huc *huc)
+{
+	struct xe_device *xe = huc_to_xe(huc);
+	int ret;
+
+	huc->fw.type = XE_UC_FW_TYPE_HUC;
+	ret = xe_uc_fw_init(&huc->fw);
+	if (ret)
+		goto out;
+
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
+
+	return 0;
+
+out:
+	if (xe_uc_fw_is_disabled(&huc->fw)) {
+		drm_info(&xe->drm, "HuC disabled\n");
+		return 0;
+	}
+	drm_err(&xe->drm, "HuC init failed with %d", ret);
+	return ret;
+}
+
+int xe_huc_upload(struct xe_huc *huc)
+{
+	if (xe_uc_fw_is_disabled(&huc->fw))
+		return 0;
+	return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL);
+}
+
+int xe_huc_auth(struct xe_huc *huc)
+{
+	struct xe_device *xe = huc_to_xe(huc);
+	struct xe_gt *gt = huc_to_gt(huc);
+	struct xe_guc *guc = huc_to_guc(huc);
+	int ret;
+	if (xe_uc_fw_is_disabled(&huc->fw))
+		return 0;
+
+	XE_BUG_ON(xe_uc_fw_is_running(&huc->fw));
+
+	if (!xe_uc_fw_is_loaded(&huc->fw))
+		return -ENOEXEC;
+
+	ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) +
+			      xe_uc_fw_rsa_offset(&huc->fw));
+	if (ret) {
+		drm_err(&xe->drm, "HuC: GuC did not ack Auth request %d\n",
+			ret);
+		goto fail;
+	}
+
+	ret = xe_mmio_wait32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg,
+			     HUC_LOAD_SUCCESSFUL,
+			     HUC_LOAD_SUCCESSFUL, 100);
+	if (ret) {
+		drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret);
+		goto fail;
+	}
+
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING);
+	drm_dbg(&xe->drm, "HuC authenticated\n");
+
+	return 0;
+
+fail:
+	drm_err(&xe->drm, "HuC authentication failed %d\n", ret);
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+
+	return ret;
+}
+
+void xe_huc_sanitize(struct xe_huc *huc)
+{
+	if (xe_uc_fw_is_disabled(&huc->fw))
+		return;
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
+}
+
+void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p)
+{
+	struct xe_gt *gt = huc_to_gt(huc);
+	int err;
+
+	xe_uc_fw_print(&huc->fw, p);
+
+	if (xe_uc_fw_is_disabled(&huc->fw))
+		return;
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return;
+
+	drm_printf(p, "\nHuC status: 0x%08x\n",
+		   xe_mmio_read32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg));
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h
new file mode 100644
index 000000000000..5802c43b6ce2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_H_
+#define _XE_HUC_H_
+
+#include "xe_huc_types.h"
+
+struct drm_printer;
+
+int xe_huc_init(struct xe_huc *huc);
+int xe_huc_upload(struct xe_huc *huc);
+int xe_huc_auth(struct xe_huc *huc);
+void xe_huc_sanitize(struct xe_huc *huc);
+void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c
new file mode 100644
index 000000000000..268bac36336a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_huc.h"
+#include "xe_huc_debugfs.h"
+#include "xe_macros.h"
+
+static struct xe_gt *
+huc_to_gt(struct xe_huc *huc)
+{
+	return container_of(huc, struct xe_gt, uc.huc);
+}
+
+static struct xe_device *
+huc_to_xe(struct xe_huc *huc)
+{
+	return gt_to_xe(huc_to_gt(huc));
+}
+
+static struct xe_huc *node_to_huc(struct drm_info_node *node)
+{
+	return node->info_ent->data;
+}
+
+static int huc_info(struct seq_file *m, void *data)
+{
+	struct xe_huc *huc = node_to_huc(m->private);
+	struct xe_device *xe = huc_to_xe(huc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_device_mem_access_get(xe);
+	xe_huc_print_info(huc, &p);
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+	{"huc_info", huc_info, 0},
+};
+
+void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent)
+{
+	struct drm_minor *minor = huc_to_xe(huc)->drm.primary;
+	struct drm_info_list *local;
+	int i;
+
+#define DEBUGFS_SIZE	ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)
+	local = drmm_kmalloc(&huc_to_xe(huc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
+	if (!local) {
+		XE_WARN_ON("Couldn't allocate memory");
+		return;
+	}
+
+	memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+		local[i].data = huc;
+
+	drm_debugfs_create_files(local,
+				 ARRAY_SIZE(debugfs_list),
+				 parent, minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.h b/drivers/gpu/drm/xe/xe_huc_debugfs.h
new file mode 100644
index 000000000000..ec58f1818804
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_DEBUGFS_H_
+#define _XE_HUC_DEBUGFS_H_
+
+struct dentry;
+struct xe_huc;
+
+void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_huc_types.h b/drivers/gpu/drm/xe/xe_huc_types.h
new file mode 100644
index 000000000000..cae6d19097df
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_TYPES_H_
+#define _XE_HUC_TYPES_H_
+
+#include "xe_uc_fw_types.h"
+
+/**
+ * struct xe_huc - HuC
+ */
+struct xe_huc {
+	/** @fw: Generic uC firmware management */
+	struct xe_uc_fw fw;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
new file mode 100644
index 000000000000..fd89dd90131c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -0,0 +1,658 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_hw_engine.h"
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_execlist.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_topology.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_reg_sr.h"
+#include "xe_sched_job.h"
+#include "xe_wa.h"
+
+#include "gt/intel_engine_regs.h"
+#include "i915_reg.h"
+#include "gt/intel_gt_regs.h"
+
+#define MAX_MMIO_BASES 3
+struct engine_info {
+	const char *name;
+	unsigned int class : 8;
+	unsigned int instance : 8;
+	enum xe_force_wake_domains domain;
+	/* mmio bases table *must* be sorted in reverse graphics_ver order */
+	struct engine_mmio_base {
+		unsigned int graphics_ver : 8;
+		unsigned int base : 24;
+	} mmio_bases[MAX_MMIO_BASES];
+};
+
+static const struct engine_info engine_infos[] = {
+	[XE_HW_ENGINE_RCS0] = {
+		.name = "rcs0",
+		.class = XE_ENGINE_CLASS_RENDER,
+		.instance = 0,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 1, .base = RENDER_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS0] = {
+		.name = "bcs0",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 0,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 6, .base = BLT_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS1] = {
+		.name = "bcs1",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 1,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS2] = {
+		.name = "bcs2",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 2,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS3] = {
+		.name = "bcs3",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 3,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS4] = {
+		.name = "bcs4",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 4,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS5] = {
+		.name = "bcs5",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 5,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS6] = {
+		.name = "bcs6",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 6,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS7] = {
+		.name = "bcs7",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 7,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS8] = {
+		.name = "bcs8",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 8,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE }
+		},
+	},
+
+	[XE_HW_ENGINE_VCS0] = {
+		.name = "vcs0",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 0,
+		.domain = XE_FW_MEDIA_VDBOX0,
+		.mmio_bases = {
+			{ .graphics_ver = 11, .base = GEN11_BSD_RING_BASE },
+			{ .graphics_ver = 6, .base = GEN6_BSD_RING_BASE },
+			{ .graphics_ver = 4, .base = BSD_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VCS1] = {
+		.name = "vcs1",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 1,
+		.domain = XE_FW_MEDIA_VDBOX1,
+		.mmio_bases = {
+			{ .graphics_ver = 11, .base = GEN11_BSD2_RING_BASE },
+			{ .graphics_ver = 8, .base = GEN8_BSD2_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VCS2] = {
+		.name = "vcs2",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 2,
+		.domain = XE_FW_MEDIA_VDBOX2,
+		.mmio_bases = {
+			{ .graphics_ver = 11, .base = GEN11_BSD3_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VCS3] = {
+		.name = "vcs3",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 3,
+		.domain = XE_FW_MEDIA_VDBOX3,
+		.mmio_bases = {
+			{ .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VCS4] = {
+		.name = "vcs4",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 4,
+		.domain = XE_FW_MEDIA_VDBOX4,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VCS5] = {
+		.name = "vcs5",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 5,
+		.domain = XE_FW_MEDIA_VDBOX5,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VCS6] = {
+		.name = "vcs6",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 6,
+		.domain = XE_FW_MEDIA_VDBOX6,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VCS7] = {
+		.name = "vcs7",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 7,
+		.domain = XE_FW_MEDIA_VDBOX7,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VECS0] = {
+		.name = "vecs0",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 0,
+		.domain = XE_FW_MEDIA_VEBOX0,
+		.mmio_bases = {
+			{ .graphics_ver = 11, .base = GEN11_VEBOX_RING_BASE },
+			{ .graphics_ver = 7, .base = VEBOX_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VECS1] = {
+		.name = "vecs1",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 1,
+		.domain = XE_FW_MEDIA_VEBOX1,
+		.mmio_bases = {
+			{ .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VECS2] = {
+		.name = "vecs2",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 2,
+		.domain = XE_FW_MEDIA_VEBOX2,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VECS3] = {
+		.name = "vecs3",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 3,
+		.domain = XE_FW_MEDIA_VEBOX3,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_CCS0] = {
+		.name = "ccs0",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 0,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE },
+		},
+	},
+	[XE_HW_ENGINE_CCS1] = {
+		.name = "ccs1",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 1,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE },
+		},
+	},
+	[XE_HW_ENGINE_CCS2] = {
+		.name = "ccs2",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 2,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE },
+		},
+	},
+	[XE_HW_ENGINE_CCS3] = {
+		.name = "ccs3",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 3,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE },
+		},
+	},
+};
+
+static u32 engine_info_mmio_base(const struct engine_info *info,
+				 unsigned int graphics_ver)
+{
+	int i;
+
+	for (i = 0; i < MAX_MMIO_BASES; i++)
+		if (graphics_ver >= info->mmio_bases[i].graphics_ver)
+			break;
+
+	XE_BUG_ON(i == MAX_MMIO_BASES);
+	XE_BUG_ON(!info->mmio_bases[i].base);
+
+	return info->mmio_bases[i].base;
+}
+
+static void hw_engine_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_hw_engine *hwe = arg;
+
+	if (hwe->exl_port)
+		xe_execlist_port_destroy(hwe->exl_port);
+	xe_lrc_finish(&hwe->kernel_lrc);
+
+	xe_bo_unpin_map_no_vm(hwe->hwsp);
+
+	hwe->gt = NULL;
+}
+
+static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, u32 reg, u32 val)
+{
+	XE_BUG_ON(reg & hwe->mmio_base);
+	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
+
+	xe_mmio_write32(hwe->gt, reg + hwe->mmio_base, val);
+}
+
+static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, u32 reg)
+{
+	XE_BUG_ON(reg & hwe->mmio_base);
+	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
+
+	return xe_mmio_read32(hwe->gt, reg + hwe->mmio_base);
+}
+
+void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
+{
+	u32 ccs_mask =
+		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
+
+	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask & BIT(0))
+		xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg,
+				_MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
+
+	hw_engine_mmio_write32(hwe, RING_HWSTAM(0).reg, ~0x0);
+	hw_engine_mmio_write32(hwe, RING_HWS_PGA(0).reg,
+			       xe_bo_ggtt_addr(hwe->hwsp));
+	hw_engine_mmio_write32(hwe, RING_MODE_GEN7(0).reg,
+			       _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+	hw_engine_mmio_write32(hwe, RING_MI_MODE(0).reg,
+			       _MASKED_BIT_DISABLE(STOP_RING));
+	hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg);
+}
+
+static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
+				 enum xe_hw_engine_id id)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	const struct engine_info *info;
+
+	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
+		return;
+
+	if (!(gt->info.engine_mask & BIT(id)))
+		return;
+
+	info = &engine_infos[id];
+
+	XE_BUG_ON(hwe->gt);
+
+	hwe->gt = gt;
+	hwe->class = info->class;
+	hwe->instance = info->instance;
+	hwe->mmio_base = engine_info_mmio_base(info, GRAPHICS_VER(xe));
+	hwe->domain = info->domain;
+	hwe->name = info->name;
+	hwe->fence_irq = &gt->fence_irq[info->class];
+	hwe->engine_id = id;
+
+	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
+	xe_wa_process_engine(hwe);
+
+	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
+	xe_reg_whitelist_process_engine(hwe);
+}
+
+static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
+			  enum xe_hw_engine_id id)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	XE_BUG_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name);
+	XE_BUG_ON(!(gt->info.engine_mask & BIT(id)));
+
+	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
+	xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, hwe->mmio_base, gt);
+
+	hwe->hwsp = xe_bo_create_locked(xe, gt, NULL, SZ_4K, ttm_bo_type_kernel,
+					XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+					XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(hwe->hwsp)) {
+		err = PTR_ERR(hwe->hwsp);
+		goto err_name;
+	}
+
+	err = xe_bo_pin(hwe->hwsp);
+	if (err)
+		goto err_unlock_put_hwsp;
+
+	err = xe_bo_vmap(hwe->hwsp);
+	if (err)
+		goto err_unpin_hwsp;
+
+	xe_bo_unlock_no_vm(hwe->hwsp);
+
+	err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K);
+	if (err)
+		goto err_hwsp;
+
+	if (!xe_device_guc_submission_enabled(xe)) {
+		hwe->exl_port = xe_execlist_port_create(xe, hwe);
+		if (IS_ERR(hwe->exl_port)) {
+			err = PTR_ERR(hwe->exl_port);
+			goto err_kernel_lrc;
+		}
+	}
+
+	if (xe_device_guc_submission_enabled(xe))
+		xe_hw_engine_enable_ring(hwe);
+
+	/* We reserve the highest BCS instance for USM */
+	if (xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY)
+		gt->usm.reserved_bcs_instance = hwe->instance;
+
+	err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
+	if (err)
+		return err;
+
+	return 0;
+
+err_unpin_hwsp:
+	xe_bo_unpin(hwe->hwsp);
+err_unlock_put_hwsp:
+	xe_bo_unlock_no_vm(hwe->hwsp);
+	xe_bo_put(hwe->hwsp);
+err_kernel_lrc:
+	xe_lrc_finish(&hwe->kernel_lrc);
+err_hwsp:
+	xe_bo_put(hwe->hwsp);
+err_name:
+	hwe->name = NULL;
+
+	return err;
+}
+
+static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
+{
+	int class;
+
+	/* FIXME: Doing a simple logical mapping that works for most hardware */
+	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+		struct xe_hw_engine *hwe;
+		enum xe_hw_engine_id id;
+		int logical_instance = 0;
+
+		for_each_hw_engine(hwe, gt, id)
+			if (hwe->class == class)
+				hwe->logical_instance = logical_instance++;
+	}
+}
+
+static void read_fuses(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 media_fuse;
+	u16 vdbox_mask;
+	u16 vebox_mask;
+	u32 bcs_mask;
+	int i, j;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	/*
+	 * FIXME: Hack job, thinking we should have table of vfuncs for each
+	 * class which picks the correct vfunc based on IP version.
+	 */
+
+	media_fuse = xe_mmio_read32(gt, GEN11_GT_VEBOX_VDBOX_DISABLE.reg);
+	if (GRAPHICS_VERx100(xe) < 1250)
+		media_fuse = ~media_fuse;
+
+	vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
+	vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
+		      GEN11_GT_VEBOX_DISABLE_SHIFT;
+
+	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!(BIT(j) & vdbox_mask)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "vcs%u fused off\n", j);
+		}
+	}
+
+	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!(BIT(j) & vebox_mask)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "vecs%u fused off\n", j);
+		}
+	}
+
+	bcs_mask = xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg);
+	bcs_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, bcs_mask);
+
+	for (i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!(BIT(j/2) & bcs_mask)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "bcs%u fused off\n", j);
+		}
+	}
+
+	/* TODO: compute engines */
+}
+
+int xe_hw_engines_init_early(struct xe_gt *gt)
+{
+	int i;
+
+	read_fuses(gt);
+
+	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
+		hw_engine_init_early(gt, &gt->hw_engines[i], i);
+
+	return 0;
+}
+
+int xe_hw_engines_init(struct xe_gt *gt)
+{
+	int err;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id) {
+		err = hw_engine_init(gt, hwe, id);
+		if (err)
+			return err;
+	}
+
+	hw_engine_setup_logical_mapping(gt);
+
+	return 0;
+}
+
+void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
+{
+	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
+
+	if (hwe->irq_handler)
+		hwe->irq_handler(hwe, intr_vec);
+
+	if (intr_vec & GT_RENDER_USER_INTERRUPT)
+		xe_hw_fence_irq_run(hwe->fence_irq);
+}
+
+void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p)
+{
+	if (!xe_hw_engine_is_valid(hwe))
+		return;
+
+	drm_printf(p, "%s (physical), logical instance=%d\n", hwe->name,
+		hwe->logical_instance);
+	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
+		hwe->domain,
+		xe_force_wake_ref(gt_to_fw(hwe->gt), hwe->domain));
+	drm_printf(p, "\tMMIO base: 0x%08x\n", hwe->mmio_base);
+
+	drm_printf(p, "\tHWSTAM: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_HWSTAM(0).reg));
+	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_HWS_PGA(0).reg));
+
+	drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0).reg));
+	drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0).reg));
+	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n",
+		hw_engine_mmio_read32(hwe,
+					 RING_EXECLIST_SQ_CONTENTS(0).reg));
+	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n",
+		hw_engine_mmio_read32(hwe,
+					 RING_EXECLIST_SQ_CONTENTS(0).reg) + 4);
+	drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0).reg));
+
+	drm_printf(p, "\tRING_START: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_START(0).reg));
+	drm_printf(p, "\tRING_HEAD:  0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_HEAD(0).reg) & HEAD_ADDR);
+	drm_printf(p, "\tRING_TAIL:  0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_TAIL(0).reg) & TAIL_ADDR);
+	drm_printf(p, "\tRING_CTL: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_CTL(0).reg));
+	drm_printf(p, "\tRING_MODE: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg));
+	drm_printf(p, "\tRING_MODE_GEN7: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_MODE_GEN7(0).reg));
+
+	drm_printf(p, "\tRING_IMR:   0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_IMR(0).reg));
+	drm_printf(p, "\tRING_ESR:   0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_ESR(0).reg));
+	drm_printf(p, "\tRING_EMR:   0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_EMR(0).reg));
+	drm_printf(p, "\tRING_EIR:   0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_EIR(0).reg));
+
+        drm_printf(p, "\tACTHD:  0x%08x_%08x\n",
+		hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0).reg),
+		hw_engine_mmio_read32(hwe, RING_ACTHD(0).reg));
+        drm_printf(p, "\tBBADDR: 0x%08x_%08x\n",
+		hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0).reg),
+		hw_engine_mmio_read32(hwe, RING_BBADDR(0).reg));
+        drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
+		hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0).reg),
+		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0).reg));
+
+	drm_printf(p, "\tIPEIR: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, IPEIR(0).reg));
+	drm_printf(p, "\tIPEHR: 0x%08x\n\n",
+		hw_engine_mmio_read32(hwe, IPEHR(0).reg));
+
+	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
+		drm_printf(p, "\tGEN12_RCU_MODE: 0x%08x\n",
+			xe_mmio_read32(hwe->gt, GEN12_RCU_MODE.reg));
+
+}
+
+u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
+				enum xe_engine_class engine_class)
+{
+	u32 mask = 0;
+	enum xe_hw_engine_id id;
+
+	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
+		if (engine_infos[id].class == engine_class &&
+		    gt->info.engine_mask & BIT(id))
+			mask |= BIT(engine_infos[id].instance);
+	}
+	return mask;
+}
+
+bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+
+	return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
+		hwe->instance == gt->usm.reserved_bcs_instance;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
new file mode 100644
index 000000000000..ceab65397256
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_HW_ENGINE_H_
+#define _XE_HW_ENGINE_H_
+
+#include "xe_hw_engine_types.h"
+
+struct drm_printer;
+
+int xe_hw_engines_init_early(struct xe_gt *gt);
+int xe_hw_engines_init(struct xe_gt *gt);
+void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec);
+void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe);
+void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p);
+u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
+				enum xe_engine_class engine_class);
+
+bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe);
+static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe)
+{
+	return hwe->name;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
new file mode 100644
index 000000000000..05a2fdc381d7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HW_ENGINE_TYPES_H_
+#define _XE_HW_ENGINE_TYPES_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_lrc_types.h"
+#include "xe_reg_sr_types.h"
+
+/* See "Engine ID Definition" struct in the Icelake PRM */
+enum xe_engine_class {
+	XE_ENGINE_CLASS_RENDER = 0,
+	XE_ENGINE_CLASS_VIDEO_DECODE = 1,
+	XE_ENGINE_CLASS_VIDEO_ENHANCE = 2,
+	XE_ENGINE_CLASS_COPY = 3,
+	XE_ENGINE_CLASS_OTHER = 4,
+	XE_ENGINE_CLASS_COMPUTE = 5,
+	XE_ENGINE_CLASS_MAX = 6,
+};
+
+enum xe_hw_engine_id {
+	XE_HW_ENGINE_RCS0,
+	XE_HW_ENGINE_BCS0,
+	XE_HW_ENGINE_BCS1,
+	XE_HW_ENGINE_BCS2,
+	XE_HW_ENGINE_BCS3,
+	XE_HW_ENGINE_BCS4,
+	XE_HW_ENGINE_BCS5,
+	XE_HW_ENGINE_BCS6,
+	XE_HW_ENGINE_BCS7,
+	XE_HW_ENGINE_BCS8,
+	XE_HW_ENGINE_VCS0,
+	XE_HW_ENGINE_VCS1,
+	XE_HW_ENGINE_VCS2,
+	XE_HW_ENGINE_VCS3,
+	XE_HW_ENGINE_VCS4,
+	XE_HW_ENGINE_VCS5,
+	XE_HW_ENGINE_VCS6,
+	XE_HW_ENGINE_VCS7,
+	XE_HW_ENGINE_VECS0,
+	XE_HW_ENGINE_VECS1,
+	XE_HW_ENGINE_VECS2,
+	XE_HW_ENGINE_VECS3,
+	XE_HW_ENGINE_CCS0,
+	XE_HW_ENGINE_CCS1,
+	XE_HW_ENGINE_CCS2,
+	XE_HW_ENGINE_CCS3,
+	XE_NUM_HW_ENGINES,
+};
+
+/* FIXME: s/XE_HW_ENGINE_MAX_INSTANCE/XE_HW_ENGINE_MAX_COUNT */
+#define XE_HW_ENGINE_MAX_INSTANCE	9
+
+struct xe_bo;
+struct xe_execlist_port;
+struct xe_gt;
+
+/**
+ * struct xe_hw_engine - Hardware engine
+ *
+ * Contains all the hardware engine state for physical instances.
+ */
+struct xe_hw_engine {
+	/** @gt: graphics tile this hw engine belongs to */
+	struct xe_gt *gt;
+	/** @name: name of this hw engine */
+	const char *name;
+	/** @class: class of this hw engine */
+	enum xe_engine_class class;
+	/** @instance: physical instance of this hw engine */
+	u16 instance;
+	/** @logical_instance: logical instance of this hw engine */
+	u16 logical_instance;
+	/** @mmio_base: MMIO base address of this hw engine*/
+	u32 mmio_base;
+	/**
+	 * @reg_sr: table with registers to be restored on GT init/resume/reset
+	 */
+	struct xe_reg_sr reg_sr;
+	/**
+	 * @reg_whitelist: table with registers to be whitelisted
+	 */
+	struct xe_reg_sr reg_whitelist;
+	/**
+	 * @reg_lrc: LRC workaround registers
+	 */
+	struct xe_reg_sr reg_lrc;
+	/** @domain: force wake domain of this hw engine */
+	enum xe_force_wake_domains domain;
+	/** @hwsp: hardware status page buffer object */
+	struct xe_bo *hwsp;
+	/** @kernel_lrc: Kernel LRC (should be replaced /w an xe_engine) */
+	struct xe_lrc kernel_lrc;
+	/** @exl_port: execlists port */
+	struct xe_execlist_port *exl_port;
+	/** @fence_irq: fence IRQ to run when a hw engine IRQ is received */
+	struct xe_hw_fence_irq *fence_irq;
+	/** @irq_handler: IRQ handler to run when hw engine IRQ is received */
+	void (*irq_handler)(struct xe_hw_engine *, u16);
+	/** @engine_id: id  for this hw engine */
+	enum xe_hw_engine_id engine_id;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
new file mode 100644
index 000000000000..e56ca2867545
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_hw_fence.h"
+
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_trace.h"
+
+static struct kmem_cache *xe_hw_fence_slab;
+
+int __init xe_hw_fence_module_init(void)
+{
+	xe_hw_fence_slab = kmem_cache_create("xe_hw_fence",
+					     sizeof(struct xe_hw_fence), 0,
+					     SLAB_HWCACHE_ALIGN, NULL);
+	if (!xe_hw_fence_slab)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void xe_hw_fence_module_exit(void)
+{
+	rcu_barrier();
+	kmem_cache_destroy(xe_hw_fence_slab);
+}
+
+static struct xe_hw_fence *fence_alloc(void)
+{
+	return kmem_cache_zalloc(xe_hw_fence_slab, GFP_KERNEL);
+}
+
+static void fence_free(struct rcu_head *rcu)
+{
+	struct xe_hw_fence *fence =
+		container_of(rcu, struct xe_hw_fence, dma.rcu);
+
+	if (!WARN_ON_ONCE(!fence))
+		kmem_cache_free(xe_hw_fence_slab, fence);
+}
+
+static void hw_fence_irq_run_cb(struct irq_work *work)
+{
+	struct xe_hw_fence_irq *irq = container_of(work, typeof(*irq), work);
+	struct xe_hw_fence *fence, *next;
+	bool tmp;
+
+	tmp = dma_fence_begin_signalling();
+	spin_lock(&irq->lock);
+	if (irq->enabled) {
+		list_for_each_entry_safe(fence, next, &irq->pending, irq_link) {
+			struct dma_fence *dma_fence = &fence->dma;
+
+			trace_xe_hw_fence_try_signal(fence);
+			if (dma_fence_is_signaled_locked(dma_fence)) {
+				trace_xe_hw_fence_signal(fence);
+				list_del_init(&fence->irq_link);
+				dma_fence_put(dma_fence);
+			}
+		}
+	}
+	spin_unlock(&irq->lock);
+	dma_fence_end_signalling(tmp);
+}
+
+void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq)
+{
+	spin_lock_init(&irq->lock);
+	init_irq_work(&irq->work, hw_fence_irq_run_cb);
+	INIT_LIST_HEAD(&irq->pending);
+	irq->enabled = true;
+}
+
+void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq)
+{
+	struct xe_hw_fence *fence, *next;
+	unsigned long flags;
+	int err;
+	bool tmp;
+
+	if (XE_WARN_ON(!list_empty(&irq->pending))) {
+		tmp = dma_fence_begin_signalling();
+		spin_lock_irqsave(&irq->lock, flags);
+		list_for_each_entry_safe(fence, next, &irq->pending, irq_link) {
+			list_del_init(&fence->irq_link);
+			err = dma_fence_signal_locked(&fence->dma);
+			dma_fence_put(&fence->dma);
+			XE_WARN_ON(err);
+		}
+		spin_unlock_irqrestore(&irq->lock, flags);
+		dma_fence_end_signalling(tmp);
+	}
+}
+
+void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq)
+{
+	irq_work_queue(&irq->work);
+}
+
+void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq)
+{
+	spin_lock_irq(&irq->lock);
+	irq->enabled = false;
+	spin_unlock_irq(&irq->lock);
+}
+
+void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq)
+{
+	spin_lock_irq(&irq->lock);
+	irq->enabled = true;
+	spin_unlock_irq(&irq->lock);
+
+	irq_work_queue(&irq->work);
+}
+
+void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt,
+			  struct xe_hw_fence_irq *irq, const char *name)
+{
+	ctx->gt = gt;
+	ctx->irq = irq;
+	ctx->dma_fence_ctx = dma_fence_context_alloc(1);
+	ctx->next_seqno = 1;
+	sprintf(ctx->name, "%s", name);
+}
+
+void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx)
+{
+}
+
+static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence);
+
+static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence)
+{
+	return container_of(fence->dma.lock, struct xe_hw_fence_irq, lock);
+}
+
+static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+	return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev);
+}
+
+static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+	return fence->ctx->name;
+}
+
+static bool xe_hw_fence_signaled(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+	struct xe_device *xe = gt_to_xe(fence->ctx->gt);
+	u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32);
+
+	return dma_fence->error ||
+		(s32)fence->dma.seqno <= (s32)seqno;
+}
+
+static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+	struct xe_hw_fence_irq *irq = xe_hw_fence_irq(fence);
+
+	dma_fence_get(dma_fence);
+	list_add_tail(&fence->irq_link, &irq->pending);
+
+	/* SW completed (no HW IRQ) so kick handler to signal fence */
+	if (xe_hw_fence_signaled(dma_fence))
+		xe_hw_fence_irq_run(irq);
+
+	return true;
+}
+
+static void xe_hw_fence_release(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+	trace_xe_hw_fence_free(fence);
+	XE_BUG_ON(!list_empty(&fence->irq_link));
+	call_rcu(&dma_fence->rcu, fence_free);
+}
+
+static const struct dma_fence_ops xe_hw_fence_ops = {
+	.get_driver_name = xe_hw_fence_get_driver_name,
+	.get_timeline_name = xe_hw_fence_get_timeline_name,
+	.enable_signaling = xe_hw_fence_enable_signaling,
+	.signaled = xe_hw_fence_signaled,
+	.release = xe_hw_fence_release,
+};
+
+static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence)
+{
+	if (XE_WARN_ON(fence->ops != &xe_hw_fence_ops))
+		return NULL;
+
+	return container_of(fence, struct xe_hw_fence, dma);
+}
+
+struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx,
+				       struct iosys_map seqno_map)
+{
+	struct xe_hw_fence *fence;
+
+	fence = fence_alloc();
+	if (!fence)
+		return ERR_PTR(-ENOMEM);
+
+	dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock,
+		       ctx->dma_fence_ctx, ctx->next_seqno++);
+
+	fence->ctx = ctx;
+	fence->seqno_map = seqno_map;
+	INIT_LIST_HEAD(&fence->irq_link);
+
+	trace_xe_hw_fence_create(fence);
+
+	return fence;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h
new file mode 100644
index 000000000000..07f202db6526
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_HW_FENCE_H_
+#define _XE_HW_FENCE_H_
+
+#include "xe_hw_fence_types.h"
+
+int xe_hw_fence_module_init(void);
+void xe_hw_fence_module_exit(void);
+
+void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq);
+
+void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt,
+			  struct xe_hw_fence_irq *irq, const char *name);
+void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx);
+
+struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx,
+				       struct iosys_map seqno_map);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h
new file mode 100644
index 000000000000..a78e50eb3cb8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HW_FENCE_TYPES_H_
+#define _XE_HW_FENCE_TYPES_H_
+
+#include <linux/iosys-map.h>
+#include <linux/dma-fence.h>
+#include <linux/irq_work.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct xe_gt;
+
+/**
+ * struct xe_hw_fence_irq - hardware fence IRQ handler
+ *
+ * One per engine class, signals completed xe_hw_fences, triggered via hw engine
+ * interrupt. On each trigger, search list of pending fences and signal.
+ */
+struct xe_hw_fence_irq {
+	/** @lock: protects all xe_hw_fences + pending list */
+	spinlock_t lock;
+	/** @work: IRQ worker run to signal the fences */
+	struct irq_work work;
+	/** @pending: list of pending xe_hw_fences */
+	struct list_head pending;
+	/** @enabled: fence signaling enabled */
+	bool enabled;
+};
+
+#define MAX_FENCE_NAME_LEN	16
+
+/**
+ * struct xe_hw_fence_ctx - hardware fence context
+ *
+ * The context for a hardware fence. 1 to 1 relationship with xe_engine. Points
+ * to a xe_hw_fence_irq, maintains serial seqno.
+ */
+struct xe_hw_fence_ctx {
+	/** @gt: graphics tile of hardware fence context */
+	struct xe_gt *gt;
+	/** @irq: fence irq handler */
+	struct xe_hw_fence_irq *irq;
+	/** @dma_fence_ctx: dma fence context for hardware fence */
+	u64 dma_fence_ctx;
+	/** @next_seqno: next seqno for hardware fence */
+	u32 next_seqno;
+	/** @name: name of hardware fence context */
+	char name[MAX_FENCE_NAME_LEN];
+};
+
+/**
+ * struct xe_hw_fence - hardware fence
+ *
+ * Used to indicate a xe_sched_job is complete via a seqno written to memory.
+ * Signals on error or seqno past.
+ */
+struct xe_hw_fence {
+	/** @dma: base dma fence for hardware fence context */
+	struct dma_fence dma;
+	/** @ctx: hardware fence context */
+	struct xe_hw_fence_ctx *ctx;
+	/** @seqno_map: I/O map for seqno */
+	struct iosys_map seqno_map;
+	/** @irq_link: Link in struct xe_hw_fence_irq.pending */
+	struct list_head irq_link;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
new file mode 100644
index 000000000000..df2e3573201d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -0,0 +1,565 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <linux/sched/clock.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_drv.h"
+#include "xe_guc.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_mmio.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gt_regs.h"
+
+static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg)
+{
+	u32 val = xe_mmio_read32(gt, reg.reg);
+
+	if (val == 0)
+		return;
+
+	drm_WARN(&gt_to_xe(gt)->drm, 1,
+		 "Interrupt register 0x%x is not zero: 0x%08x\n",
+		 reg.reg, val);
+	xe_mmio_write32(gt, reg.reg, 0xffffffff);
+	xe_mmio_read32(gt, reg.reg);
+	xe_mmio_write32(gt, reg.reg, 0xffffffff);
+	xe_mmio_read32(gt, reg.reg);
+}
+
+static void gen3_irq_init(struct xe_gt *gt,
+			  i915_reg_t imr, u32 imr_val,
+			  i915_reg_t ier, u32 ier_val,
+			  i915_reg_t iir)
+{
+	gen3_assert_iir_is_zero(gt, iir);
+
+	xe_mmio_write32(gt, ier.reg, ier_val);
+	xe_mmio_write32(gt, imr.reg, imr_val);
+	xe_mmio_read32(gt, imr.reg);
+}
+#define GEN3_IRQ_INIT(gt, type, imr_val, ier_val) \
+	gen3_irq_init((gt), \
+		      type##IMR, imr_val, \
+		      type##IER, ier_val, \
+		      type##IIR)
+
+static void gen3_irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir,
+			   i915_reg_t ier)
+{
+	xe_mmio_write32(gt, imr.reg, 0xffffffff);
+	xe_mmio_read32(gt, imr.reg);
+
+	xe_mmio_write32(gt, ier.reg, 0);
+
+	/* IIR can theoretically queue up two events. Be paranoid. */
+	xe_mmio_write32(gt, iir.reg, 0xffffffff);
+	xe_mmio_read32(gt, iir.reg);
+	xe_mmio_write32(gt, iir.reg, 0xffffffff);
+	xe_mmio_read32(gt, iir.reg);
+}
+#define GEN3_IRQ_RESET(gt, type) \
+	gen3_irq_reset((gt), type##IMR, type##IIR, type##IER)
+
+static u32 gen11_intr_disable(struct xe_gt *gt)
+{
+	xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, 0);
+
+	/*
+	 * Now with master disabled, get a sample of level indications
+	 * for this interrupt. Indications will be cleared on related acks.
+	 * New indications can and will light up during processing,
+	 * and will generate new interrupt after enabling master.
+	 */
+	return xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg);
+}
+
+static u32
+gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl)
+{
+	u32 iir;
+
+	if (!(master_ctl & GEN11_GU_MISC_IRQ))
+		return 0;
+
+	iir = xe_mmio_read32(gt, GEN11_GU_MISC_IIR.reg);
+	if (likely(iir))
+		xe_mmio_write32(gt, GEN11_GU_MISC_IIR.reg, iir);
+
+	return iir;
+}
+
+static inline void gen11_intr_enable(struct xe_gt *gt, bool stall)
+{
+	xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, GEN11_MASTER_IRQ);
+	if (stall)
+		xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg);
+}
+
+static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+{
+	u32 irqs, dmask, smask;
+	u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
+	u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
+
+	if (xe_device_guc_submission_enabled(xe)) {
+		irqs = GT_RENDER_USER_INTERRUPT |
+			GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
+	} else {
+		irqs = GT_RENDER_USER_INTERRUPT |
+		       GT_CS_MASTER_ERROR_INTERRUPT |
+		       GT_CONTEXT_SWITCH_INTERRUPT |
+		       GT_WAIT_SEMAPHORE_INTERRUPT;
+	}
+
+	dmask = irqs << 16 | irqs;
+	smask = irqs << 16;
+
+	/* Enable RCS, BCS, VCS and VECS class interrupts. */
+	xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg, dmask);
+	xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg, dmask);
+	if (ccs_mask)
+		xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, smask);
+
+	/* Unmask irqs on RCS, BCS, VCS and VECS engines. */
+	xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg, ~smask);
+	xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg, ~smask);
+	if (bcs_mask & (BIT(1)|BIT(2)))
+		xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~dmask);
+	if (bcs_mask & (BIT(3)|BIT(4)))
+		xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~dmask);
+	if (bcs_mask & (BIT(5)|BIT(6)))
+		xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~dmask);
+	if (bcs_mask & (BIT(7)|BIT(8)))
+		xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~dmask);
+	xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg, ~dmask);
+	xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg, ~dmask);
+	//if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5))
+	//	intel_uncore_write(uncore, GEN12_VCS4_VCS5_INTR_MASK, ~dmask);
+	//if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7))
+	//	intel_uncore_write(uncore, GEN12_VCS6_VCS7_INTR_MASK, ~dmask);
+	xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg, ~dmask);
+	//if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3))
+	//	intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask);
+	if (ccs_mask & (BIT(0)|BIT(1)))
+		xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~dmask);
+	if (ccs_mask & (BIT(2)|BIT(3)))
+		xe_mmio_write32(gt,  GEN12_CCS2_CCS3_INTR_MASK.reg, ~dmask);
+
+	/*
+	 * RPS interrupts will get enabled/disabled on demand when RPS itself
+	 * is enabled/disabled.
+	 */
+	/* TODO: gt->pm_ier, gt->pm_imr */
+	xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0);
+	xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg,  ~0);
+
+	/* Same thing for GuC interrupts */
+	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, 0);
+	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg,  ~0);
+}
+
+static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+{
+	/* TODO: PCH */
+
+	gen11_gt_irq_postinstall(xe, gt);
+
+	GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE,
+		      GEN11_GU_MISC_GSE);
+
+	gen11_intr_enable(gt, true);
+}
+
+static u32
+gen11_gt_engine_identity(struct xe_device *xe,
+			 struct xe_gt *gt,
+			 const unsigned int bank,
+			 const unsigned int bit)
+{
+	u32 timeout_ts;
+	u32 ident;
+
+	lockdep_assert_held(&xe->irq.lock);
+
+	xe_mmio_write32(gt, GEN11_IIR_REG_SELECTOR(bank).reg, BIT(bit));
+
+	/*
+	 * NB: Specs do not specify how long to spin wait,
+	 * so we do ~100us as an educated guess.
+	 */
+	timeout_ts = (local_clock() >> 10) + 100;
+	do {
+		ident = xe_mmio_read32(gt, GEN11_INTR_IDENTITY_REG(bank).reg);
+	} while (!(ident & GEN11_INTR_DATA_VALID) &&
+		 !time_after32(local_clock() >> 10, timeout_ts));
+
+	if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) {
+		drm_err(&xe->drm, "INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n",
+			bank, bit, ident);
+		return 0;
+	}
+
+	xe_mmio_write32(gt, GEN11_INTR_IDENTITY_REG(bank).reg,
+			GEN11_INTR_DATA_VALID);
+
+	return ident;
+}
+
+#define   OTHER_MEDIA_GUC_INSTANCE           16
+
+static void
+gen11_gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir)
+{
+	if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt))
+		return xe_guc_irq_handler(&gt->uc.guc, iir);
+	if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt))
+		return xe_guc_irq_handler(&gt->uc.guc, iir);
+
+	if (instance != OTHER_GUC_INSTANCE &&
+	    instance != OTHER_MEDIA_GUC_INSTANCE) {
+		WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
+			  instance, iir);
+	}
+}
+
+static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt,
+				 u32 master_ctl, long unsigned int *intr_dw,
+				 u32 *identity)
+{
+	unsigned int bank, bit;
+	u16 instance, intr_vec;
+	enum xe_engine_class class;
+	struct xe_hw_engine *hwe;
+
+	spin_lock(&xe->irq.lock);
+
+	for (bank = 0; bank < 2; bank++) {
+		if (!(master_ctl & GEN11_GT_DW_IRQ(bank)))
+			continue;
+
+		if (!xe_gt_is_media_type(gt)) {
+			intr_dw[bank] =
+				xe_mmio_read32(gt, GEN11_GT_INTR_DW(bank).reg);
+			for_each_set_bit(bit, intr_dw + bank, 32)
+				identity[bit] = gen11_gt_engine_identity(xe, gt,
+									 bank,
+									 bit);
+			xe_mmio_write32(gt, GEN11_GT_INTR_DW(bank).reg,
+					intr_dw[bank]);
+		}
+
+		for_each_set_bit(bit, intr_dw + bank, 32) {
+			class = GEN11_INTR_ENGINE_CLASS(identity[bit]);
+			instance = GEN11_INTR_ENGINE_INSTANCE(identity[bit]);
+			intr_vec = GEN11_INTR_ENGINE_INTR(identity[bit]);
+
+			if (class == XE_ENGINE_CLASS_OTHER) {
+				gen11_gt_other_irq_handler(gt, instance,
+							   intr_vec);
+				continue;
+			}
+
+			hwe = xe_gt_hw_engine(gt, class, instance, false);
+			if (!hwe)
+				continue;
+
+			xe_hw_engine_handle_irq(hwe, intr_vec);
+		}
+	}
+
+	spin_unlock(&xe->irq.lock);
+}
+
+static irqreturn_t gen11_irq_handler(int irq, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);	/* Only 1 GT here */
+	u32 master_ctl, gu_misc_iir;
+	long unsigned int intr_dw[2];
+	u32 identity[32];
+
+	master_ctl = gen11_intr_disable(gt);
+	if (!master_ctl) {
+		gen11_intr_enable(gt, false);
+		return IRQ_NONE;
+	}
+
+	gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
+
+	gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl);
+
+	gen11_intr_enable(gt, false);
+
+	return IRQ_HANDLED;
+}
+
+static u32 dg1_intr_disable(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	u32 val;
+
+	/* First disable interrupts */
+	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, 0);
+
+	/* Get the indication levels and ack the master unit */
+	val = xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg);
+	if (unlikely(!val))
+		return 0;
+
+	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, val);
+
+	return val;
+}
+
+static void dg1_intr_enable(struct xe_device *xe, bool stall)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+
+	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, DG1_MSTR_IRQ);
+	if (stall)
+		xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg);
+}
+
+static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+{
+	gen11_gt_irq_postinstall(xe, gt);
+
+	GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE,
+		      GEN11_GU_MISC_GSE);
+
+	if (gt->info.id + 1 == xe->info.tile_count)
+		dg1_intr_enable(xe, true);
+}
+
+static irqreturn_t dg1_irq_handler(int irq, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_gt *gt;
+	u32 master_tile_ctl, master_ctl = 0, gu_misc_iir;
+	long unsigned int intr_dw[2];
+	u32 identity[32];
+	u8 id;
+
+	/* TODO: This really shouldn't be copied+pasted */
+
+	master_tile_ctl = dg1_intr_disable(xe);
+	if (!master_tile_ctl) {
+		dg1_intr_enable(xe, false);
+		return IRQ_NONE;
+	}
+
+	for_each_gt(gt, xe, id) {
+		if ((master_tile_ctl & DG1_MSTR_TILE(gt->info.vram_id)) == 0)
+			continue;
+
+		if (!xe_gt_is_media_type(gt))
+			master_ctl = xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg);
+
+		/*
+		 * We might be in irq handler just when PCIe DPC is initiated
+		 * and all MMIO reads will be returned with all 1's. Ignore this
+		 * irq as device is inaccessible.
+		 */
+		if (master_ctl == REG_GENMASK(31, 0)) {
+			dev_dbg(gt_to_xe(gt)->drm.dev,
+				"Ignore this IRQ as device might be in DPC containment.\n");
+			return IRQ_HANDLED;
+		}
+
+		if (!xe_gt_is_media_type(gt))
+			xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, master_ctl);
+		gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
+	}
+
+	gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl);
+
+	dg1_intr_enable(xe, false);
+
+	return IRQ_HANDLED;
+}
+
+static void gen11_gt_irq_reset(struct xe_gt *gt)
+{
+	u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
+	u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
+
+	/* Disable RCS, BCS, VCS and VECS class engines. */
+	xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg,	 0);
+	xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg,	 0);
+	if (ccs_mask)
+		xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, 0);
+
+	/* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
+	xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg,	~0);
+	xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg,	~0);
+	if (bcs_mask & (BIT(1)|BIT(2)))
+		xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~0);
+	if (bcs_mask & (BIT(3)|BIT(4)))
+		xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~0);
+	if (bcs_mask & (BIT(5)|BIT(6)))
+		xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~0);
+	if (bcs_mask & (BIT(7)|BIT(8)))
+		xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~0);
+	xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg,	~0);
+	xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg,	~0);
+//	if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5))
+//		xe_mmio_write32(xe, GEN12_VCS4_VCS5_INTR_MASK.reg,   ~0);
+//	if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7))
+//		xe_mmio_write32(xe, GEN12_VCS6_VCS7_INTR_MASK.reg,   ~0);
+	xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg,	~0);
+//	if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3))
+//		xe_mmio_write32(xe, GEN12_VECS2_VECS3_INTR_MASK.reg, ~0);
+	if (ccs_mask & (BIT(0)|BIT(1)))
+		xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~0);
+	if (ccs_mask & (BIT(2)|BIT(3)))
+		xe_mmio_write32(gt,  GEN12_CCS2_CCS3_INTR_MASK.reg, ~0);
+
+	xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0);
+	xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg,  ~0);
+	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg,	 0);
+	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg,		~0);
+}
+
+static void gen11_irq_reset(struct xe_gt *gt)
+{
+	gen11_intr_disable(gt);
+
+	gen11_gt_irq_reset(gt);
+
+	GEN3_IRQ_RESET(gt, GEN11_GU_MISC_);
+	GEN3_IRQ_RESET(gt, GEN8_PCU_);
+}
+
+static void dg1_irq_reset(struct xe_gt *gt)
+{
+	if (gt->info.id == 0)
+		dg1_intr_disable(gt_to_xe(gt));
+
+	gen11_gt_irq_reset(gt);
+
+	GEN3_IRQ_RESET(gt, GEN11_GU_MISC_);
+	GEN3_IRQ_RESET(gt, GEN8_PCU_);
+}
+
+void xe_irq_reset(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+
+	for_each_gt(gt, xe, id) {
+		if (GRAPHICS_VERx100(xe) >= 1210) {
+			dg1_irq_reset(gt);
+		} else if (GRAPHICS_VER(xe) >= 11) {
+			gen11_irq_reset(gt);
+		} else {
+			drm_err(&xe->drm, "No interrupt reset hook");
+		}
+	}
+}
+
+void xe_gt_irq_postinstall(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (GRAPHICS_VERx100(xe) >= 1210)
+		dg1_irq_postinstall(xe, gt);
+	else if (GRAPHICS_VER(xe) >= 11)
+		gen11_irq_postinstall(xe, gt);
+	else
+		drm_err(&xe->drm, "No interrupt postinstall hook");
+}
+
+static void xe_irq_postinstall(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+
+	for_each_gt(gt, xe, id)
+		xe_gt_irq_postinstall(gt);
+}
+
+static irq_handler_t xe_irq_handler(struct xe_device *xe)
+{
+	if (GRAPHICS_VERx100(xe) >= 1210) {
+		return dg1_irq_handler;
+	} else if (GRAPHICS_VER(xe) >= 11) {
+		return gen11_irq_handler;
+	} else {
+		return NULL;
+	}
+}
+
+static void irq_uninstall(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	int irq = pdev->irq;
+
+	if (!xe->irq.enabled)
+		return;
+
+	xe->irq.enabled = false;
+	xe_irq_reset(xe);
+	free_irq(irq, xe);
+	if (pdev->msi_enabled)
+		pci_disable_msi(pdev);
+}
+
+int xe_irq_install(struct xe_device *xe)
+{
+	int irq = to_pci_dev(xe->drm.dev)->irq;
+	static irq_handler_t irq_handler;
+	int err;
+
+	irq_handler = xe_irq_handler(xe);
+	if (!irq_handler) {
+		drm_err(&xe->drm, "No supported interrupt handler");
+		return -EINVAL;
+	}
+
+	xe->irq.enabled = true;
+
+	xe_irq_reset(xe);
+
+	err = request_irq(irq, irq_handler,
+			  IRQF_SHARED, DRIVER_NAME, xe);
+	if (err < 0) {
+		xe->irq.enabled = false;
+		return err;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe);
+	if (err)
+		return err;
+
+	return err;
+}
+
+void xe_irq_shutdown(struct xe_device *xe)
+{
+	irq_uninstall(&xe->drm, xe);
+}
+
+void xe_irq_suspend(struct xe_device *xe)
+{
+	spin_lock_irq(&xe->irq.lock);
+	xe->irq.enabled = false;
+	xe_irq_reset(xe);
+	spin_unlock_irq(&xe->irq.lock);
+}
+
+void xe_irq_resume(struct xe_device *xe)
+{
+	spin_lock_irq(&xe->irq.lock);
+	xe->irq.enabled = true;
+	xe_irq_reset(xe);
+	xe_irq_postinstall(xe);
+	spin_unlock_irq(&xe->irq.lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h
new file mode 100644
index 000000000000..34ecf22b32d3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_irq.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_IRQ_H_
+#define _XE_IRQ_H_
+
+struct xe_device;
+struct xe_gt;
+
+int xe_irq_install(struct xe_device *xe);
+void xe_gt_irq_postinstall(struct xe_gt *gt);
+void xe_irq_shutdown(struct xe_device *xe);
+void xe_irq_suspend(struct xe_device *xe);
+void xe_irq_resume(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
new file mode 100644
index 000000000000..056c2c5a0b81
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -0,0 +1,841 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_lrc.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine_types.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_hw_fence.h"
+#include "xe_vm.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_engine_regs.h"
+
+#define GEN8_CTX_VALID				(1 << 0)
+#define GEN8_CTX_L3LLC_COHERENT			(1 << 5)
+#define GEN8_CTX_PRIVILEGE			(1 << 8)
+#define GEN8_CTX_ADDRESSING_MODE_SHIFT		3
+#define INTEL_LEGACY_64B_CONTEXT		3
+
+#define GEN11_ENGINE_CLASS_SHIFT		61
+#define GEN11_ENGINE_INSTANCE_SHIFT		48
+
+static struct xe_device *
+lrc_to_xe(struct xe_lrc *lrc)
+{
+	return gt_to_xe(lrc->fence_ctx.gt);
+}
+
+size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
+{
+	switch (class) {
+	case XE_ENGINE_CLASS_RENDER:
+	case XE_ENGINE_CLASS_COMPUTE:
+		/* 14 pages since graphics_ver == 11 */
+		return 14 * SZ_4K;
+	default:
+		WARN(1, "Unknown engine class: %d", class);
+		fallthrough;
+	case XE_ENGINE_CLASS_COPY:
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		return 2 * SZ_4K;
+	}
+}
+
+/*
+ * The per-platform tables are u8-encoded in @data. Decode @data and set the
+ * addresses' offset and commands in @regs. The following encoding is used
+ * for each byte. There are 2 steps: decoding commands and decoding addresses.
+ *
+ * Commands:
+ * [7]: create NOPs - number of NOPs are set in lower bits
+ * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
+ *      MI_LRI_FORCE_POSTED
+ * [5:0]: Number of NOPs or registers to set values to in case of
+ *        MI_LOAD_REGISTER_IMM
+ *
+ * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
+ * number of registers. They are set by using the REG/REG16 macros: the former
+ * is used for offsets smaller than 0x200 while the latter is for values bigger
+ * than that. Those macros already set all the bits documented below correctly:
+ *
+ * [7]: When a register offset needs more than 6 bits, use additional bytes, to
+ *      follow, for the lower bits
+ * [6:0]: Register offset, without considering the engine base.
+ *
+ * This function only tweaks the commands and register offsets. Values are not
+ * filled out.
+ */
+static void set_offsets(u32 *regs,
+			const u8 *data,
+			const struct xe_hw_engine *hwe)
+#define NOP(x) (BIT(7) | (x))
+#define LRI(count, flags) ((flags) << 6 | (count) | \
+			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
+#define POSTED BIT(0)
+#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
+#define REG16(x) \
+	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
+	(((x) >> 2) & 0x7f)
+#define END 0
+{
+	const u32 base = hwe->mmio_base;
+
+	while (*data) {
+		u8 count, flags;
+
+		if (*data & BIT(7)) { /* skip */
+			count = *data++ & ~BIT(7);
+			regs += count;
+			continue;
+		}
+
+		count = *data & 0x3f;
+		flags = *data >> 6;
+		data++;
+
+		*regs = MI_LOAD_REGISTER_IMM(count);
+		if (flags & POSTED)
+			*regs |= MI_LRI_FORCE_POSTED;
+		*regs |= MI_LRI_LRM_CS_MMIO;
+		regs++;
+
+		XE_BUG_ON(!count);
+		do {
+			u32 offset = 0;
+			u8 v;
+
+			do {
+				v = *data++;
+				offset <<= 7;
+				offset |= v & ~BIT(7);
+			} while (v & BIT(7));
+
+			regs[0] = base + (offset << 2);
+			regs += 2;
+		} while (--count);
+	}
+
+	*regs = MI_BATCH_BUFFER_END | BIT(0);
+}
+
+static const u8 gen12_xcs_offsets[] = {
+	NOP(1),
+	LRI(13, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+
+	NOP(5),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	END
+};
+
+static const u8 dg2_xcs_offsets[] = {
+	NOP(1),
+	LRI(15, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+	REG(0x120),
+	REG(0x124),
+
+	NOP(1),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	END
+};
+
+static const u8 gen12_rcs_offsets[] = {
+	NOP(1),
+	LRI(13, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+
+	NOP(5),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	LRI(3, POSTED),
+	REG(0x1b0),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+	NOP(3 + 9 + 1),
+
+	LRI(51, POSTED),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG(0x028),
+	REG(0x09c),
+	REG(0x0c0),
+	REG(0x178),
+	REG(0x17c),
+	REG16(0x358),
+	REG(0x170),
+	REG(0x150),
+	REG(0x154),
+	REG(0x158),
+	REG16(0x41c),
+	REG16(0x600),
+	REG16(0x604),
+	REG16(0x608),
+	REG16(0x60c),
+	REG16(0x610),
+	REG16(0x614),
+	REG16(0x618),
+	REG16(0x61c),
+	REG16(0x620),
+	REG16(0x624),
+	REG16(0x628),
+	REG16(0x62c),
+	REG16(0x630),
+	REG16(0x634),
+	REG16(0x638),
+	REG16(0x63c),
+	REG16(0x640),
+	REG16(0x644),
+	REG16(0x648),
+	REG16(0x64c),
+	REG16(0x650),
+	REG16(0x654),
+	REG16(0x658),
+	REG16(0x65c),
+	REG16(0x660),
+	REG16(0x664),
+	REG16(0x668),
+	REG16(0x66c),
+	REG16(0x670),
+	REG16(0x674),
+	REG16(0x678),
+	REG16(0x67c),
+	REG(0x068),
+	REG(0x084),
+	NOP(1),
+
+	END
+};
+
+static const u8 xehp_rcs_offsets[] = {
+	NOP(1),
+	LRI(13, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+
+	NOP(5),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	LRI(3, POSTED),
+	REG(0x1b0),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+
+	END
+};
+
+static const u8 dg2_rcs_offsets[] = {
+	NOP(1),
+	LRI(15, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+	REG(0x120),
+	REG(0x124),
+
+	NOP(1),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	LRI(3, POSTED),
+	REG(0x1b0),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+
+	END
+};
+
+static const u8 mtl_rcs_offsets[] = {
+       NOP(1),
+       LRI(15, POSTED),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+       REG(0x180),
+       REG16(0x2b4),
+       REG(0x120),
+       REG(0x124),
+
+       NOP(1),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       NOP(2),
+       LRI(2, POSTED),
+       REG16(0x5a8),
+       REG16(0x5ac),
+
+       NOP(6),
+       LRI(1, 0),
+       REG(0x0c8),
+
+       END
+};
+
+#undef END
+#undef REG16
+#undef REG
+#undef LRI
+#undef NOP
+
+static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
+{
+	if (class == XE_ENGINE_CLASS_RENDER) {
+		if (GRAPHICS_VERx100(xe) >= 1270)
+			return mtl_rcs_offsets;
+		else if (GRAPHICS_VERx100(xe) >= 1255)
+			return dg2_rcs_offsets;
+		else if (GRAPHICS_VERx100(xe) >= 1250)
+			return xehp_rcs_offsets;
+		else
+			return gen12_rcs_offsets;
+	} else {
+		if (GRAPHICS_VERx100(xe) >= 1255)
+			return dg2_xcs_offsets;
+		else
+			return gen12_xcs_offsets;
+	}
+}
+
+static void set_context_control(u32 * regs, struct xe_hw_engine *hwe)
+{
+	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
+				    _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
+				    CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+
+	/* TODO: Timestamp */
+}
+
+static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
+{
+	struct xe_device *xe = gt_to_xe(hwe->gt);
+
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		return 0x70;
+	else
+		return 0x60;
+}
+
+static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
+{
+	int x;
+
+	x = lrc_ring_mi_mode(hwe);
+	regs[x + 1] &= ~STOP_RING;
+	regs[x + 1] |= STOP_RING << 16;
+}
+
+static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
+{
+	return 0;
+}
+
+u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
+{
+	return lrc->ring.size;
+}
+
+/* Make the magic macros work */
+#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
+
+#define LRC_SEQNO_PPHWSP_OFFSET 512
+#define LRC_START_SEQNO_PPHWSP_OFFSET LRC_SEQNO_PPHWSP_OFFSET + 8
+#define LRC_PARALLEL_PPHWSP_OFFSET 2048
+#define LRC_PPHWSP_SIZE SZ_4K
+
+static size_t lrc_reg_size(struct xe_device *xe)
+{
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		return 96 * sizeof(u32);
+	else
+		return 80 * sizeof(u32);
+}
+
+size_t xe_lrc_skip_size(struct xe_device *xe)
+{
+	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
+}
+
+static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
+{
+	/* The seqno is stored in the driver-defined portion of PPHWSP */
+	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
+{
+	/* The start seqno is stored in the driver-defined portion of PPHWSP */
+	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
+{
+	/* The parallel is stored in the driver-defined portion of PPHWSP */
+	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
+{
+	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
+}
+
+#define DECL_MAP_ADDR_HELPERS(elem) \
+static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
+{ \
+	struct iosys_map map = lrc->bo->vmap; \
+\
+	XE_BUG_ON(iosys_map_is_null(&map)); \
+	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
+	return map; \
+} \
+static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
+{ \
+	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
+} \
+
+DECL_MAP_ADDR_HELPERS(ring)
+DECL_MAP_ADDR_HELPERS(pphwsp)
+DECL_MAP_ADDR_HELPERS(seqno)
+DECL_MAP_ADDR_HELPERS(regs)
+DECL_MAP_ADDR_HELPERS(start_seqno)
+DECL_MAP_ADDR_HELPERS(parallel)
+
+#undef DECL_MAP_ADDR_HELPERS
+
+u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_pphwsp_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+	struct iosys_map map;
+
+	map = __xe_lrc_regs_map(lrc);
+	iosys_map_incr(&map, reg_nr * sizeof(u32));
+	return xe_map_read32(xe, &map);
+}
+
+void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+	struct iosys_map map;
+
+	map = __xe_lrc_regs_map(lrc);
+	iosys_map_incr(&map, reg_nr * sizeof(u32));
+	xe_map_write32(xe, &map, val);
+}
+
+static void *empty_lrc_data(struct xe_hw_engine *hwe)
+{
+	struct xe_device *xe = gt_to_xe(hwe->gt);
+	void *data;
+	u32 *regs;
+
+	data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL);
+	if (!data)
+		return NULL;
+
+	/* 1st page: Per-Process of HW status Page */
+	regs = data + LRC_PPHWSP_SIZE;
+	set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
+	set_context_control(regs, hwe);
+	reset_stop_ring(regs, hwe);
+
+	return data;
+}
+
+static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
+{
+	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->full_gt);
+
+	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
+	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
+}
+
+#define PVC_CTX_ASID		(0x2e + 1)
+#define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
+#define ACC_GRANULARITY_S       20
+#define ACC_NOTIFY_S            16
+
+int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+		struct xe_engine *e, struct xe_vm *vm, u32 ring_size)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct iosys_map map;
+	void *init_data = NULL;
+	u32 arb_enable;
+	int err;
+
+	lrc->flags = 0;
+
+	lrc->bo = xe_bo_create_locked(xe, hwe->gt, vm,
+				      ring_size + xe_lrc_size(xe, hwe->class),
+				      ttm_bo_type_kernel,
+				      XE_BO_CREATE_VRAM_IF_DGFX(hwe->gt) |
+				      XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(lrc->bo))
+		return PTR_ERR(lrc->bo);
+
+	if (xe_gt_is_media_type(hwe->gt))
+		lrc->full_gt = xe_find_full_gt(hwe->gt);
+	else
+		lrc->full_gt = hwe->gt;
+
+	/*
+	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
+	 * via VM bind calls.
+	 */
+	err = xe_bo_pin(lrc->bo);
+	if (err)
+		goto err_unlock_put_bo;
+	lrc->flags |= XE_LRC_PINNED;
+
+	err = xe_bo_vmap(lrc->bo);
+	if (err)
+		goto err_unpin_bo;
+
+	xe_bo_unlock_vm_held(lrc->bo);
+
+	lrc->ring.size = ring_size;
+	lrc->ring.tail = 0;
+
+	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
+			     hwe->fence_irq, hwe->name);
+
+	if (!gt->default_lrc[hwe->class]) {
+		init_data = empty_lrc_data(hwe);
+		if (!init_data) {
+			xe_lrc_finish(lrc);
+			return -ENOMEM;
+		}
+	}
+
+	/*
+	 * Init Per-Process of HW status Page, LRC / context state to known
+	 * values
+	 */
+	map = __xe_lrc_pphwsp_map(lrc);
+	if (!init_data) {
+		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
+		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
+				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
+				 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE);
+	} else {
+		xe_map_memcpy_to(xe, &map, 0, init_data,
+				 xe_lrc_size(xe, hwe->class));
+		kfree(init_data);
+	}
+
+	if (vm)
+		xe_lrc_set_ppgtt(lrc, vm);
+
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
+			     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
+	if (xe->info.supports_usm && vm) {
+		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID,
+				     (e->usm.acc_granularity <<
+				      ACC_GRANULARITY_S) | vm->usm.asid);
+		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD,
+				     (e->usm.acc_notify << ACC_NOTIFY_S) |
+				     e->usm.acc_trigger);
+	}
+
+	lrc->desc = GEN8_CTX_VALID;
+	lrc->desc |= INTEL_LEGACY_64B_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT;
+	/* TODO: Priority */
+
+	/* While this appears to have something about privileged batches or
+	 * some such, it really just means PPGTT mode.
+	 */
+	if (vm)
+		lrc->desc |= GEN8_CTX_PRIVILEGE;
+
+	if (GRAPHICS_VERx100(xe) < 1250) {
+		lrc->desc |= (u64)hwe->instance << GEN11_ENGINE_INSTANCE_SHIFT;
+		lrc->desc |= (u64)hwe->class << GEN11_ENGINE_CLASS_SHIFT;
+	}
+
+	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
+
+	return 0;
+
+err_unpin_bo:
+	if (lrc->flags & XE_LRC_PINNED)
+		xe_bo_unpin(lrc->bo);
+err_unlock_put_bo:
+	xe_bo_unlock_vm_held(lrc->bo);
+	xe_bo_put(lrc->bo);
+	return err;
+}
+
+void xe_lrc_finish(struct xe_lrc *lrc)
+{
+	struct ww_acquire_ctx ww;
+
+	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
+	if (lrc->flags & XE_LRC_PINNED) {
+		if (lrc->bo->vm)
+			xe_vm_lock(lrc->bo->vm, &ww, 0, false);
+		else
+			xe_bo_lock_no_vm(lrc->bo, NULL);
+		xe_bo_unpin(lrc->bo);
+		if (lrc->bo->vm)
+			xe_vm_unlock(lrc->bo->vm, &ww);
+		else
+			xe_bo_unlock_no_vm(lrc->bo);
+	}
+	xe_bo_put(lrc->bo);
+}
+
+void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
+{
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
+}
+
+u32 xe_lrc_ring_head(struct xe_lrc *lrc)
+{
+	return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
+}
+
+u32 xe_lrc_ring_space(struct xe_lrc *lrc)
+{
+	const u32 head = xe_lrc_ring_head(lrc);
+	const u32 tail = lrc->ring.tail;
+	const u32 size = lrc->ring.size;
+
+	return ((head - tail - 1) & (size - 1)) + 1;
+}
+
+static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
+				const void *data, size_t size)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+
+	iosys_map_incr(&ring, lrc->ring.tail);
+	xe_map_memcpy_to(xe, &ring, 0, data, size);
+	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
+}
+
+void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
+{
+	struct iosys_map ring;
+	u32 rhs;
+	size_t aligned_size;
+
+	XE_BUG_ON(!IS_ALIGNED(size, 4));
+	aligned_size = ALIGN(size, 8);
+
+	ring = __xe_lrc_ring_map(lrc);
+
+	XE_BUG_ON(lrc->ring.tail >= lrc->ring.size);
+	rhs = lrc->ring.size - lrc->ring.tail;
+	if (size > rhs) {
+		__xe_lrc_write_ring(lrc, ring, data, rhs);
+		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
+	} else {
+		__xe_lrc_write_ring(lrc, ring, data, size);
+	}
+
+	if (aligned_size > size) {
+		u32 noop = MI_NOOP;
+
+		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
+	}
+}
+
+u64 xe_lrc_descriptor(struct xe_lrc *lrc)
+{
+	return lrc->desc | xe_lrc_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_seqno_ggtt_addr(lrc);
+}
+
+struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc)
+{
+	return &xe_hw_fence_create(&lrc->fence_ctx,
+				   __xe_lrc_seqno_map(lrc))->dma;
+}
+
+s32 xe_lrc_seqno(struct xe_lrc *lrc)
+{
+	struct iosys_map map = __xe_lrc_seqno_map(lrc);
+
+	return xe_map_read32(lrc_to_xe(lrc), &map);
+}
+
+s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
+{
+	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
+
+	return xe_map_read32(lrc_to_xe(lrc), &map);
+}
+
+u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_start_seqno_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_parallel_ggtt_addr(lrc);
+}
+
+struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
+{
+	return __xe_lrc_parallel_map(lrc);
+}
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
new file mode 100644
index 000000000000..e37f89e75ef8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+#ifndef _XE_LRC_H_
+#define _XE_LRC_H_
+
+#include "xe_lrc_types.h"
+
+struct xe_device;
+struct xe_engine;
+enum xe_engine_class;
+struct xe_hw_engine;
+struct xe_vm;
+
+#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
+
+int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+		struct xe_engine *e, struct xe_vm *vm, u32 ring_size);
+void xe_lrc_finish(struct xe_lrc *lrc);
+
+size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class);
+u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
+
+void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head);
+u32 xe_lrc_ring_head(struct xe_lrc *lrc);
+u32 xe_lrc_ring_space(struct xe_lrc *lrc);
+void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size);
+
+u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc);
+u32 *xe_lrc_regs(struct xe_lrc *lrc);
+
+u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr);
+void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val);
+
+u64 xe_lrc_descriptor(struct xe_lrc *lrc);
+
+u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc);
+struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc);
+s32 xe_lrc_seqno(struct xe_lrc *lrc);
+
+u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc);
+s32 xe_lrc_start_seqno(struct xe_lrc *lrc);
+
+u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc);
+struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc);
+
+size_t xe_lrc_skip_size(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
new file mode 100644
index 000000000000..2827efa2091d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_LRC_TYPES_H_
+#define _XE_LRC_TYPES_H_
+
+#include "xe_hw_fence_types.h"
+
+struct xe_bo;
+
+/**
+ * struct xe_lrc - Logical ring context (LRC) and submission ring object
+ */
+struct xe_lrc {
+	/**
+	 * @bo: buffer object (memory) for logical ring context, per process HW
+	 * status page, and submission ring.
+	 */
+	struct xe_bo *bo;
+
+	/** @full_gt: full GT which this LRC belongs to */
+	struct xe_gt *full_gt;
+
+	/** @flags: LRC flags */
+	u32 flags;
+#define XE_LRC_PINNED BIT(1)
+
+	/** @ring: submission ring state */
+	struct {
+		/** @size: size of submission ring */
+		u32 size;
+		/** @tail: tail of submission ring */
+		u32 tail;
+		/** @old_tail: shadow of tail */
+		u32 old_tail;
+	} ring;
+
+	/** @desc: LRC descriptor */
+	u64 desc;
+
+	/** @fence_ctx: context for hw fence */
+	struct xe_hw_fence_ctx fence_ctx;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h
new file mode 100644
index 000000000000..0d24c124d202
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_macros.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_MACROS_H_
+#define _XE_MACROS_H_
+
+#include <linux/bug.h>
+
+#define XE_EXTRA_DEBUG 1
+#define XE_WARN_ON WARN_ON
+#define XE_BUG_ON BUG_ON
+
+#define XE_IOCTL_ERR(xe, cond) \
+	((cond) && (drm_info(&(xe)->drm, \
+			    "Ioctl argument check failed at %s:%d: %s", \
+			    __FILE__, __LINE__, #cond), 1))
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h
new file mode 100644
index 000000000000..0bac1f73a80d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_map.h
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __XE_MAP_H__
+#define __XE_MAP_H__
+
+#include <linux/iosys-map.h>
+
+#include <xe_device.h>
+
+/**
+ * DOC: Map layer
+ *
+ * All access to any memory shared with a device (both sysmem and vram) in the
+ * XE driver should go through this layer (xe_map). This layer is built on top
+ * of :ref:`driver-api/device-io:Generalizing Access to System and I/O Memory`
+ * and with extra hooks into the XE driver that allows adding asserts to memory
+ * accesses (e.g. for blocking runtime_pm D3Cold on Discrete Graphics).
+ */
+
+static inline void xe_map_memcpy_to(struct xe_device *xe, struct iosys_map *dst,
+				    size_t dst_offset, const void *src,
+				    size_t len)
+{
+	xe_device_assert_mem_access(xe);
+	iosys_map_memcpy_to(dst, dst_offset, src, len);
+}
+
+static inline void xe_map_memcpy_from(struct xe_device *xe, void *dst,
+				      const struct iosys_map *src,
+				      size_t src_offset, size_t len)
+{
+	xe_device_assert_mem_access(xe);
+	iosys_map_memcpy_from(dst, src, src_offset, len);
+}
+
+static inline void xe_map_memset(struct xe_device *xe,
+				 struct iosys_map *dst, size_t offset,
+				 int value, size_t len)
+{
+	xe_device_assert_mem_access(xe);
+	iosys_map_memset(dst, offset, value, len);
+}
+
+/* FIXME: We likely should kill these two functions sooner or later */
+static inline u32 xe_map_read32(struct xe_device *xe, struct iosys_map *map)
+{
+	xe_device_assert_mem_access(xe);
+
+	if (map->is_iomem)
+		return readl(map->vaddr_iomem);
+	else
+		return READ_ONCE(*(u32 *)map->vaddr);
+}
+
+static inline void xe_map_write32(struct xe_device *xe, struct iosys_map *map,
+				  u32 val)
+{
+	xe_device_assert_mem_access(xe);
+
+	if (map->is_iomem)
+		writel(val, map->vaddr_iomem);
+	else
+		*(u32 *)map->vaddr = val;
+}
+
+#define xe_map_rd(xe__, map__, offset__, type__) ({			\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_rd(map__, offset__, type__);				\
+})
+
+#define xe_map_wr(xe__, map__, offset__, type__, val__) ({		\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_wr(map__, offset__, type__, val__);			\
+})
+
+#define xe_map_rd_field(xe__, map__, struct_offset__, struct_type__, field__) ({	\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_rd_field(map__, struct_offset__, struct_type__, field__);		\
+})
+
+#define xe_map_wr_field(xe__, map__, struct_offset__, struct_type__, field__, val__) ({	\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__);	\
+})
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
new file mode 100644
index 000000000000..7fc40e8009c3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -0,0 +1,1168 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+#include "xe_migrate.h"
+
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_engine.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_mocs.h"
+#include "xe_pt.h"
+#include "xe_res_cursor.h"
+#include "xe_sched_job.h"
+#include "xe_sync.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+#include <linux/sizes.h>
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+
+#include "gt/intel_gpu_commands.h"
+
+struct xe_migrate {
+	struct xe_engine *eng;
+	struct xe_gt *gt;
+	struct mutex job_mutex;
+	struct xe_bo *pt_bo;
+	struct xe_bo *cleared_bo;
+	u64 batch_base_ofs;
+	u64 usm_batch_base_ofs;
+	u64 cleared_vram_ofs;
+	struct dma_fence *fence;
+	struct drm_suballoc_manager vm_update_sa;
+};
+
+#define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */
+#define NUM_KERNEL_PDE 17
+#define NUM_PT_SLOTS 32
+#define NUM_PT_PER_BLIT (MAX_PREEMPTDISABLE_TRANSFER / SZ_2M)
+
+struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt)
+{
+	return gt->migrate->eng;
+}
+
+static void xe_migrate_fini(struct drm_device *dev, void *arg)
+{
+	struct xe_migrate *m = arg;
+	struct ww_acquire_ctx ww;
+
+	xe_vm_lock(m->eng->vm, &ww, 0, false);
+	xe_bo_unpin(m->pt_bo);
+	if (m->cleared_bo)
+		xe_bo_unpin(m->cleared_bo);
+	xe_vm_unlock(m->eng->vm, &ww);
+
+	dma_fence_put(m->fence);
+	if (m->cleared_bo)
+		xe_bo_put(m->cleared_bo);
+	xe_bo_put(m->pt_bo);
+	drm_suballoc_manager_fini(&m->vm_update_sa);
+	mutex_destroy(&m->job_mutex);
+	xe_vm_close_and_put(m->eng->vm);
+	xe_engine_put(m->eng);
+}
+
+static u64 xe_migrate_vm_addr(u64 slot, u32 level)
+{
+	XE_BUG_ON(slot >= NUM_PT_SLOTS);
+
+	/* First slot is reserved for mapping of PT bo and bb, start from 1 */
+	return (slot + 1ULL) << xe_pt_shift(level + 1);
+}
+
+static u64 xe_migrate_vram_ofs(u64 addr)
+{
+	return addr + (256ULL << xe_pt_shift(2));
+}
+
+/*
+ * For flat CCS clearing we need a cleared chunk of memory to copy from,
+ * since the CCS clearing mode of XY_FAST_COLOR_BLT appears to be buggy
+ * (it clears on only 14 bytes in each chunk of 16).
+ * If clearing the main surface one can use the part of the main surface
+ * already cleared, but for clearing as part of copying non-compressed
+ * data out of system memory, we don't readily have a cleared part of
+ * VRAM to copy from, so create one to use for that case.
+ */
+static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm)
+{
+	struct xe_gt *gt = m->gt;
+	struct xe_device *xe = vm->xe;
+	size_t cleared_size;
+	u64 vram_addr;
+	bool is_vram;
+
+	if (!xe_device_has_flat_ccs(xe))
+		return 0;
+
+	cleared_size = xe_device_ccs_bytes(xe, MAX_PREEMPTDISABLE_TRANSFER);
+	cleared_size = PAGE_ALIGN(cleared_size);
+	m->cleared_bo = xe_bo_create_pin_map(xe, gt, vm, cleared_size,
+					     ttm_bo_type_kernel,
+					     XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+					     XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(m->cleared_bo))
+		return PTR_ERR(m->cleared_bo);
+
+	xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size);
+	vram_addr = xe_bo_addr(m->cleared_bo, 0, GEN8_PAGE_SIZE, &is_vram);
+	XE_BUG_ON(!is_vram);
+	m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr);
+
+	return 0;
+}
+
+static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
+				 struct xe_vm *vm)
+{
+	u8 id = gt->info.id;
+	u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
+	u32 map_ofs, level, i;
+	struct xe_device *xe = gt_to_xe(m->gt);
+	struct xe_bo *bo, *batch = gt->kernel_bb_pool.bo;
+	u64 entry;
+	int ret;
+
+	/* Can't bump NUM_PT_SLOTS too high */
+	BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/GEN8_PAGE_SIZE);
+	/* Must be a multiple of 64K to support all platforms */
+	BUILD_BUG_ON(NUM_PT_SLOTS * GEN8_PAGE_SIZE % SZ_64K);
+	/* And one slot reserved for the 4KiB page table updates */
+	BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1));
+
+	/* Need to be sure everything fits in the first PT, or create more */
+	XE_BUG_ON(m->batch_base_ofs + batch->size >= SZ_2M);
+
+	bo = xe_bo_create_pin_map(vm->xe, m->gt, vm,
+				  num_entries * GEN8_PAGE_SIZE,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+				  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	ret = xe_migrate_create_cleared_bo(m, vm);
+	if (ret) {
+		xe_bo_put(bo);
+		return ret;
+	}
+
+	entry = gen8_pde_encode(bo, bo->size - GEN8_PAGE_SIZE, XE_CACHE_WB);
+	xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
+
+	map_ofs = (num_entries - num_level) * GEN8_PAGE_SIZE;
+
+	/* Map the entire BO in our level 0 pt */
+	for (i = 0, level = 0; i < num_entries; level++) {
+		entry = gen8_pte_encode(NULL, bo, i * GEN8_PAGE_SIZE,
+					XE_CACHE_WB, 0, 0);
+
+		xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
+
+		if (vm->flags & XE_VM_FLAGS_64K)
+			i += 16;
+		else
+			i += 1;
+	}
+
+	if (!IS_DGFX(xe)) {
+		XE_BUG_ON(xe->info.supports_usm);
+
+		/* Write out batch too */
+		m->batch_base_ofs = NUM_PT_SLOTS * GEN8_PAGE_SIZE;
+		for (i = 0; i < batch->size;
+		     i += vm->flags & XE_VM_FLAGS_64K ? GEN8_64K_PAGE_SIZE :
+			     GEN8_PAGE_SIZE) {
+			entry = gen8_pte_encode(NULL, batch, i,
+						XE_CACHE_WB, 0, 0);
+
+			xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
+				  entry);
+			level++;
+		}
+	} else {
+		bool is_lmem;
+		u64 batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, &is_lmem);
+
+		m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
+
+		if (xe->info.supports_usm) {
+			batch = gt->usm.bb_pool.bo;
+			batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE,
+						&is_lmem);
+			m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
+		}
+	}
+
+	for (level = 1; level < num_level; level++) {
+		u32 flags = 0;
+
+		if (vm->flags & XE_VM_FLAGS_64K && level == 1)
+			flags = GEN12_PDE_64K;
+
+		entry = gen8_pde_encode(bo, map_ofs + (level - 1) *
+					GEN8_PAGE_SIZE, XE_CACHE_WB);
+		xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE * level, u64,
+			  entry | flags);
+	}
+
+	/* Write PDE's that point to our BO. */
+	for (i = 0; i < num_entries - num_level; i++) {
+		entry = gen8_pde_encode(bo, i * GEN8_PAGE_SIZE,
+					XE_CACHE_WB);
+
+		xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE +
+			  (i + 1) * 8, u64, entry);
+	}
+
+	/* Identity map the entire vram at 256GiB offset */
+	if (IS_DGFX(xe)) {
+		u64 pos, ofs, flags;
+
+		level = 2;
+		ofs = map_ofs + GEN8_PAGE_SIZE * level + 256 * 8;
+		flags = GEN8_PAGE_RW | GEN8_PAGE_PRESENT | PPAT_CACHED |
+			GEN12_PPGTT_PTE_LM | GEN8_PDPE_PS_1G;
+
+		/*
+		 * Use 1GB pages, it shouldn't matter the physical amount of
+		 * vram is less, when we don't access it.
+		 */
+		for (pos = 0; pos < xe->mem.vram.size; pos += SZ_1G, ofs += 8)
+			xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
+	}
+
+	/*
+	 * Example layout created above, with root level = 3:
+	 * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's
+	 * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's
+	 * [PT9...PT28]: Userspace PT's for VM_BIND, 4 KiB PTE's
+	 * [PT29 = PDE 0] [PT30 = PDE 1] [PT31 = PDE 2]
+	 *
+	 * This makes the lowest part of the VM point to the pagetables.
+	 * Hence the lowest 2M in the vm should point to itself, with a few writes
+	 * and flushes, other parts of the VM can be used either for copying and
+	 * clearing.
+	 *
+	 * For performance, the kernel reserves PDE's, so about 20 are left
+	 * for async VM updates.
+	 *
+	 * To make it easier to work, each scratch PT is put in slot (1 + PT #)
+	 * everywhere, this allows lockless updates to scratch pages by using
+	 * the different addresses in VM.
+	 */
+#define NUM_VMUSA_UNIT_PER_PAGE	32
+#define VM_SA_UPDATE_UNIT_SIZE	(GEN8_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE)
+#define NUM_VMUSA_WRITES_PER_UNIT	(VM_SA_UPDATE_UNIT_SIZE / sizeof(u64))
+	drm_suballoc_manager_init(&m->vm_update_sa,
+				  (map_ofs / GEN8_PAGE_SIZE - NUM_KERNEL_PDE) *
+				  NUM_VMUSA_UNIT_PER_PAGE, 0);
+
+	m->pt_bo = bo;
+	return 0;
+}
+
+struct xe_migrate *xe_migrate_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_migrate *m;
+	struct xe_vm *vm;
+	struct ww_acquire_ctx ww;
+	int err;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL);
+	if (!m)
+		return ERR_PTR(-ENOMEM);
+
+	m->gt = gt;
+
+	/* Special layout, prepared below.. */
+	vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION |
+			  XE_VM_FLAG_SET_GT_ID(gt));
+	if (IS_ERR(vm))
+		return ERR_CAST(vm);
+
+	xe_vm_lock(vm, &ww, 0, false);
+	err = xe_migrate_prepare_vm(gt, m, vm);
+	xe_vm_unlock(vm, &ww);
+	if (err) {
+		xe_vm_close_and_put(vm);
+		return ERR_PTR(err);
+	}
+
+	if (xe->info.supports_usm) {
+		struct xe_hw_engine *hwe = xe_gt_hw_engine(gt,
+							   XE_ENGINE_CLASS_COPY,
+							   gt->usm.reserved_bcs_instance,
+							   false);
+		if (!hwe)
+			return ERR_PTR(-EINVAL);
+
+		m->eng = xe_engine_create(xe, vm,
+					  BIT(hwe->logical_instance), 1,
+					  hwe, ENGINE_FLAG_KERNEL);
+	} else {
+		m->eng = xe_engine_create_class(xe, gt, vm,
+						XE_ENGINE_CLASS_COPY,
+						ENGINE_FLAG_KERNEL);
+	}
+	if (IS_ERR(m->eng)) {
+		xe_vm_close_and_put(vm);
+		return ERR_CAST(m->eng);
+	}
+
+	mutex_init(&m->job_mutex);
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m);
+	if (err)
+		return ERR_PTR(err);
+
+	return m;
+}
+
+static void emit_arb_clear(struct xe_bb *bb)
+{
+	/* 1 dword */
+	bb->cs[bb->len++] = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+}
+
+static u64 xe_migrate_res_sizes(struct xe_res_cursor *cur)
+{
+	/*
+	 * For VRAM we use identity mapped pages so we are limited to current
+	 * cursor size. For system we program the pages ourselves so we have no
+	 * such limitation.
+	 */
+	return min_t(u64, MAX_PREEMPTDISABLE_TRANSFER,
+		     mem_type_is_vram(cur->mem_type) ? cur->size :
+		     cur->remaining);
+}
+
+static u32 pte_update_size(struct xe_migrate *m,
+			   bool is_vram,
+			   struct xe_res_cursor *cur,
+			   u64 *L0, u64 *L0_ofs, u32 *L0_pt,
+			   u32 cmd_size, u32 pt_ofs, u32 avail_pts)
+{
+	u32 cmds = 0;
+
+	*L0_pt = pt_ofs;
+	if (!is_vram) {
+		/* Clip L0 to available size */
+		u64 size = min(*L0, (u64)avail_pts * SZ_2M);
+		u64 num_4k_pages = DIV_ROUND_UP(size, GEN8_PAGE_SIZE);
+
+		*L0 = size;
+		*L0_ofs = xe_migrate_vm_addr(pt_ofs, 0);
+
+		/* MI_STORE_DATA_IMM */
+		cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff);
+
+		/* PDE qwords */
+		cmds += num_4k_pages * 2;
+
+		/* Each chunk has a single blit command */
+		cmds += cmd_size;
+	} else {
+		/* Offset into identity map. */
+		*L0_ofs = xe_migrate_vram_ofs(cur->start);
+		cmds += cmd_size;
+	}
+
+	return cmds;
+}
+
+static void emit_pte(struct xe_migrate *m,
+		     struct xe_bb *bb, u32 at_pt,
+		     bool is_vram,
+		     struct xe_res_cursor *cur,
+		     u32 size, struct xe_bo *bo)
+{
+	u32 ptes;
+	u64 ofs = at_pt * GEN8_PAGE_SIZE;
+	u64 cur_ofs;
+
+	/*
+	 * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently
+	 * we're only emitting VRAM PTEs during sanity tests, so when
+	 * that's moved to a Kunit test, we should condition VRAM PTEs
+	 * on running tests.
+	 */
+
+	ptes = DIV_ROUND_UP(size, GEN8_PAGE_SIZE);
+
+	while (ptes) {
+		u32 chunk = min(0x1ffU, ptes);
+
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) |
+			(chunk * 2 + 1);
+		bb->cs[bb->len++] = ofs;
+		bb->cs[bb->len++] = 0;
+
+		cur_ofs = ofs;
+		ofs += chunk * 8;
+		ptes -= chunk;
+
+		while (chunk--) {
+			u64 addr;
+
+			XE_BUG_ON(cur->start & (PAGE_SIZE - 1));
+
+			if (is_vram) {
+				addr = cur->start;
+
+				/* Is this a 64K PTE entry? */
+				if ((m->eng->vm->flags & XE_VM_FLAGS_64K) &&
+				    !(cur_ofs & (16 * 8 - 1))) {
+					XE_WARN_ON(!IS_ALIGNED(addr, SZ_64K));
+					addr |= GEN12_PTE_PS64;
+				}
+
+				addr |= GEN12_PPGTT_PTE_LM;
+			} else {
+				addr = xe_res_dma(cur);
+			}
+			addr |= PPAT_CACHED | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+			bb->cs[bb->len++] = lower_32_bits(addr);
+			bb->cs[bb->len++] = upper_32_bits(addr);
+
+			xe_res_next(cur, PAGE_SIZE);
+			cur_ofs += 8;
+		}
+	}
+}
+
+#define EMIT_COPY_CCS_DW 5
+static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
+			  u64 dst_ofs, bool dst_is_indirect,
+			  u64 src_ofs, bool src_is_indirect,
+			  u32 size)
+{
+	u32 *cs = bb->cs + bb->len;
+	u32 num_ccs_blks;
+	u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
+
+	num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
+				    NUM_CCS_BYTES_PER_BLOCK);
+	XE_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER);
+	*cs++ = XY_CTRL_SURF_COPY_BLT |
+		(src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
+		(dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
+		((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT;
+	*cs++ = lower_32_bits(src_ofs);
+	*cs++ = upper_32_bits(src_ofs) |
+		FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
+	*cs++ = lower_32_bits(dst_ofs);
+	*cs++ = upper_32_bits(dst_ofs) |
+		FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
+
+	bb->len = cs - bb->cs;
+}
+
+#define EMIT_COPY_DW 10
+static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
+		      u64 src_ofs, u64 dst_ofs, unsigned int size,
+		      unsigned pitch)
+{
+	XE_BUG_ON(size / pitch > S16_MAX);
+	XE_BUG_ON(pitch / 4 > S16_MAX);
+	XE_BUG_ON(pitch > U16_MAX);
+
+	bb->cs[bb->len++] = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
+	bb->cs[bb->len++] = BLT_DEPTH_32 | pitch;
+	bb->cs[bb->len++] = 0;
+	bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
+	bb->cs[bb->len++] = lower_32_bits(dst_ofs);
+	bb->cs[bb->len++] = upper_32_bits(dst_ofs);
+	bb->cs[bb->len++] = 0;
+	bb->cs[bb->len++] = pitch;
+	bb->cs[bb->len++] = lower_32_bits(src_ofs);
+	bb->cs[bb->len++] = upper_32_bits(src_ofs);
+}
+
+static int job_add_deps(struct xe_sched_job *job, struct dma_resv *resv,
+			enum dma_resv_usage usage)
+{
+	return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage);
+}
+
+static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm)
+{
+	return usm ? m->usm_batch_base_ofs : m->batch_base_ofs;
+}
+
+static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
+			       struct xe_bb *bb,
+			       u64 src_ofs, bool src_is_vram,
+			       u64 dst_ofs, bool dst_is_vram, u32 dst_size,
+			       u64 ccs_ofs, bool copy_ccs)
+{
+	struct xe_gt *gt = m->gt;
+	u32 flush_flags = 0;
+
+	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) {
+		/*
+		 * If the bo doesn't have any CCS metadata attached, we still
+		 * need to clear it for security reasons.
+		 */
+		emit_copy_ccs(gt, bb, dst_ofs, true, m->cleared_vram_ofs, false,
+			      dst_size);
+		flush_flags = MI_FLUSH_DW_CCS;
+	} else if (copy_ccs) {
+		if (!src_is_vram)
+			src_ofs = ccs_ofs;
+		else if (!dst_is_vram)
+			dst_ofs = ccs_ofs;
+
+		/*
+		 * At the moment, we don't support copying CCS metadata from
+		 * system to system.
+		 */
+		XE_BUG_ON(!src_is_vram && !dst_is_vram);
+
+		emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs,
+			      src_is_vram, dst_size);
+		if (dst_is_vram)
+			flush_flags = MI_FLUSH_DW_CCS;
+	}
+
+	return flush_flags;
+}
+
+struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
+				  struct xe_bo *bo,
+				  struct ttm_resource *src,
+				  struct ttm_resource *dst)
+{
+	struct xe_gt *gt = m->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct dma_fence *fence = NULL;
+	u64 size = bo->size;
+	struct xe_res_cursor src_it, dst_it, ccs_it;
+	u64 src_L0_ofs, dst_L0_ofs;
+	u32 src_L0_pt, dst_L0_pt;
+	u64 src_L0, dst_L0;
+	int pass = 0;
+	int err;
+	bool src_is_vram = mem_type_is_vram(src->mem_type);
+	bool dst_is_vram = mem_type_is_vram(dst->mem_type);
+	bool copy_ccs = xe_device_has_flat_ccs(xe) && xe_bo_needs_ccs_pages(bo);
+	bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
+
+	if (!src_is_vram)
+		xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it);
+	else
+		xe_res_first(src, 0, bo->size, &src_it);
+	if (!dst_is_vram)
+		xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &dst_it);
+	else
+		xe_res_first(dst, 0, bo->size, &dst_it);
+
+	if (copy_system_ccs)
+		xe_res_first_sg(xe_bo_get_sg(bo), xe_bo_ccs_pages_start(bo),
+				PAGE_ALIGN(xe_device_ccs_bytes(xe, size)),
+				&ccs_it);
+
+	while (size) {
+		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 flush_flags;
+		u32 update_idx;
+		u64 ccs_ofs, ccs_size;
+		u32 ccs_pt;
+		bool usm = xe->info.supports_usm;
+
+		src_L0 = xe_migrate_res_sizes(&src_it);
+		dst_L0 = xe_migrate_res_sizes(&dst_it);
+
+		drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n",
+			pass++, src_L0, dst_L0);
+
+		src_L0 = min(src_L0, dst_L0);
+
+		batch_size += pte_update_size(m, src_is_vram, &src_it, &src_L0,
+					      &src_L0_ofs, &src_L0_pt, 0, 0,
+					      NUM_PT_PER_BLIT);
+
+		batch_size += pte_update_size(m, dst_is_vram, &dst_it, &src_L0,
+					      &dst_L0_ofs, &dst_L0_pt, 0,
+					      NUM_PT_PER_BLIT, NUM_PT_PER_BLIT);
+
+		if (copy_system_ccs) {
+			ccs_size = xe_device_ccs_bytes(xe, src_L0);
+			batch_size += pte_update_size(m, false, &ccs_it, &ccs_size,
+						      &ccs_ofs, &ccs_pt, 0,
+						      2 * NUM_PT_PER_BLIT,
+						      NUM_PT_PER_BLIT);
+		}
+
+		/* Add copy commands size here */
+		batch_size += EMIT_COPY_DW +
+			(xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0);
+
+		bb = xe_bb_new(gt, batch_size, usm);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		/* Preemption is enabled again by the ring ops. */
+		if (!src_is_vram || !dst_is_vram)
+			emit_arb_clear(bb);
+
+		if (!src_is_vram)
+			emit_pte(m, bb, src_L0_pt, src_is_vram, &src_it, src_L0,
+				 bo);
+		else
+			xe_res_next(&src_it, src_L0);
+
+		if (!dst_is_vram)
+			emit_pte(m, bb, dst_L0_pt, dst_is_vram, &dst_it, src_L0,
+				 bo);
+		else
+			xe_res_next(&dst_it, src_L0);
+
+		if (copy_system_ccs)
+			emit_pte(m, bb, ccs_pt, false, &ccs_it, ccs_size, bo);
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, GEN8_PAGE_SIZE);
+		flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram,
+						  dst_L0_ofs, dst_is_vram,
+						  src_L0, ccs_ofs, copy_ccs);
+
+		mutex_lock(&m->job_mutex);
+		job = xe_bb_create_migration_job(m->eng, bb,
+						 xe_migrate_batch_base(m, usm),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, flush_flags);
+		if (!fence) {
+			err = job_add_deps(job, bo->ttm.base.resv,
+					   DMA_RESV_USAGE_BOOKKEEP);
+			if (err)
+				goto err_job;
+		}
+
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		size -= src_L0;
+		continue;
+
+err_job:
+		xe_sched_job_put(job);
+err:
+		mutex_unlock(&m->job_mutex);
+		xe_bb_free(bb, NULL);
+
+err_sync:
+		/* Sync partial copy if any. */
+		if (fence) {
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+		}
+
+		return ERR_PTR(err);
+	}
+
+	return fence;
+}
+
+static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+		      u32 size, u32 pitch, u32 value, bool is_vram)
+{
+	u32 *cs = bb->cs + bb->len;
+	u32 len = XY_FAST_COLOR_BLT_DW;
+	u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
+
+	if (GRAPHICS_VERx100(gt->xe) < 1250)
+		len = 11;
+
+	*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
+		(len - 2);
+	*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
+		(pitch - 1);
+	*cs++ = 0;
+	*cs++ = (size / pitch) << 16 | pitch / 4;
+	*cs++ = lower_32_bits(src_ofs);
+	*cs++ = upper_32_bits(src_ofs);
+	*cs++ = (is_vram ? 0x0 : 0x1) <<  XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
+	*cs++ = value;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	if (len > 11) {
+		*cs++ = 0;
+		*cs++ = 0;
+		*cs++ = 0;
+		*cs++ = 0;
+		*cs++ = 0;
+	}
+
+	XE_BUG_ON(cs - bb->cs != len + bb->len);
+	bb->len += len;
+
+	return 0;
+}
+
+struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
+				   struct xe_bo *bo,
+				   struct ttm_resource *dst,
+				   u32 value)
+{
+	bool clear_vram = mem_type_is_vram(dst->mem_type);
+	struct xe_gt *gt = m->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct dma_fence *fence = NULL;
+	u64 size = bo->size;
+	struct xe_res_cursor src_it;
+	struct ttm_resource *src = dst;
+	int err;
+	int pass = 0;
+
+	if (!clear_vram)
+		xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it);
+	else
+		xe_res_first(src, 0, bo->size, &src_it);
+
+	while (size) {
+		u64 clear_L0_ofs;
+		u32 clear_L0_pt;
+		u32 flush_flags = 0;
+		u64 clear_L0;
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 batch_size, update_idx;
+		bool usm = xe->info.supports_usm;
+
+		clear_L0 = xe_migrate_res_sizes(&src_it);
+		drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0);
+
+		/* Calculate final sizes and batch size.. */
+		batch_size = 2 +
+			pte_update_size(m, clear_vram, &src_it,
+					&clear_L0, &clear_L0_ofs, &clear_L0_pt,
+					XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT);
+		if (xe_device_has_flat_ccs(xe) && clear_vram)
+			batch_size += EMIT_COPY_CCS_DW;
+
+		/* Clear commands */
+
+		if (WARN_ON_ONCE(!clear_L0))
+			break;
+
+		bb = xe_bb_new(gt, batch_size, usm);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		size -= clear_L0;
+
+		/* TODO: Add dependencies here */
+
+		/* Preemption is enabled again by the ring ops. */
+		if (!clear_vram) {
+			emit_arb_clear(bb);
+			emit_pte(m, bb, clear_L0_pt, clear_vram, &src_it, clear_L0,
+				 bo);
+		} else {
+			xe_res_next(&src_it, clear_L0);
+		}
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		emit_clear(gt, bb, clear_L0_ofs, clear_L0, GEN8_PAGE_SIZE,
+			   value, clear_vram);
+		if (xe_device_has_flat_ccs(xe) && clear_vram) {
+			emit_copy_ccs(gt, bb, clear_L0_ofs, true,
+				      m->cleared_vram_ofs, false, clear_L0);
+			flush_flags = MI_FLUSH_DW_CCS;
+		}
+
+		mutex_lock(&m->job_mutex);
+		job = xe_bb_create_migration_job(m->eng, bb,
+						 xe_migrate_batch_base(m, usm),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, flush_flags);
+
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		continue;
+
+err:
+		mutex_unlock(&m->job_mutex);
+		xe_bb_free(bb, NULL);
+err_sync:
+		/* Sync partial copies if any. */
+		if (fence) {
+			dma_fence_wait(m->fence, false);
+			dma_fence_put(fence);
+		}
+
+		return ERR_PTR(err);
+	}
+
+	return fence;
+}
+
+static void write_pgtable(struct xe_gt *gt, struct xe_bb *bb, u64 ppgtt_ofs,
+			  const struct xe_vm_pgtable_update *update,
+			  struct xe_migrate_pt_update *pt_update)
+{
+	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+	u32 chunk;
+	u32 ofs = update->ofs, size = update->qwords;
+
+	/*
+	 * If we have 512 entries (max), we would populate it ourselves,
+	 * and update the PDE above it to the new pointer.
+	 * The only time this can only happen if we have to update the top
+	 * PDE. This requires a BO that is almost vm->size big.
+	 *
+	 * This shouldn't be possible in practice.. might change when 16K
+	 * pages are used. Hence the BUG_ON.
+	 */
+	XE_BUG_ON(update->qwords > 0x1ff);
+	if (!ppgtt_ofs) {
+		bool is_lmem;
+
+		ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0,
+							   GEN8_PAGE_SIZE,
+							   &is_lmem));
+		XE_BUG_ON(!is_lmem);
+	}
+
+	do {
+		u64 addr = ppgtt_ofs + ofs * 8;
+		chunk = min(update->qwords, 0x1ffU);
+
+		/* Ensure populatefn can do memset64 by aligning bb->cs */
+		if (!(bb->len & 1))
+			bb->cs[bb->len++] = MI_NOOP;
+
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) |
+			(chunk * 2 + 1);
+		bb->cs[bb->len++] = lower_32_bits(addr);
+		bb->cs[bb->len++] = upper_32_bits(addr);
+		ops->populate(pt_update, gt, NULL, bb->cs + bb->len, ofs, chunk,
+			      update);
+
+		bb->len += chunk * 2;
+		ofs += chunk;
+		size -= chunk;
+	} while (size);
+}
+
+struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m)
+{
+	return xe_vm_get(m->eng->vm);
+}
+
+static struct dma_fence *
+xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
+			       struct xe_vm *vm, struct xe_bo *bo,
+			       const struct  xe_vm_pgtable_update *updates,
+			       u32 num_updates, bool wait_vm,
+			       struct xe_migrate_pt_update *pt_update)
+{
+	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+	struct dma_fence *fence;
+	int err;
+	u32 i;
+
+	/* Wait on BO moves for 10 ms, then fall back to GPU job */
+	if (bo) {
+		long wait;
+
+		wait = dma_resv_wait_timeout(bo->ttm.base.resv,
+					     DMA_RESV_USAGE_KERNEL,
+					     true, HZ / 100);
+		if (wait <= 0)
+			return ERR_PTR(-ETIME);
+	}
+	if (wait_vm) {
+		long wait;
+
+		wait = dma_resv_wait_timeout(&vm->resv,
+					     DMA_RESV_USAGE_BOOKKEEP,
+					     true, HZ / 100);
+		if (wait <= 0)
+			return ERR_PTR(-ETIME);
+	}
+
+	if (ops->pre_commit) {
+		err = ops->pre_commit(pt_update);
+		if (err)
+			return ERR_PTR(err);
+	}
+	for (i = 0; i < num_updates; i++) {
+		const struct xe_vm_pgtable_update *update = &updates[i];
+
+		ops->populate(pt_update, m->gt, &update->pt_bo->vmap, NULL,
+			      update->ofs, update->qwords, update);
+	}
+
+	trace_xe_vm_cpu_bind(vm);
+	xe_device_wmb(vm->xe);
+
+	fence = dma_fence_get_stub();
+
+	return fence;
+}
+
+static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	int i;
+
+	for (i = 0; i < num_syncs; i++) {
+		struct dma_fence *fence = syncs[i].fence;
+
+		if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				       &fence->flags))
+			return false;
+	}
+
+	return true;
+}
+
+static bool engine_is_idle(struct xe_engine *e)
+{
+	return !e || e->lrc[0].fence_ctx.next_seqno == 1 ||
+		xe_lrc_seqno(&e->lrc[0]) == e->lrc[0].fence_ctx.next_seqno;
+}
+
+struct dma_fence *
+xe_migrate_update_pgtables(struct xe_migrate *m,
+			   struct xe_vm *vm,
+			   struct xe_bo *bo,
+			   struct xe_engine *eng,
+			   const struct xe_vm_pgtable_update *updates,
+			   u32 num_updates,
+			   struct xe_sync_entry *syncs, u32 num_syncs,
+			   struct xe_migrate_pt_update *pt_update)
+{
+	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+	struct xe_gt *gt = m->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_sched_job *job;
+	struct dma_fence *fence;
+	struct drm_suballoc *sa_bo = NULL;
+	struct xe_vma *vma = pt_update->vma;
+	struct xe_bb *bb;
+	u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0;
+	u64 addr;
+	int err = 0;
+	bool usm = !eng && xe->info.supports_usm;
+	bool first_munmap_rebind = vma && vma->first_munmap_rebind;
+
+	/* Use the CPU if no in syncs and engine is idle */
+	if (no_in_syncs(syncs, num_syncs) && engine_is_idle(eng)) {
+		fence =  xe_migrate_update_pgtables_cpu(m, vm, bo, updates,
+							num_updates,
+							first_munmap_rebind,
+							pt_update);
+		if (!IS_ERR(fence) || fence == ERR_PTR(-EAGAIN))
+			return fence;
+	}
+
+	/* fixed + PTE entries */
+	if (IS_DGFX(xe))
+		batch_size = 2;
+	else
+		batch_size = 6 + num_updates * 2;
+
+	for (i = 0; i < num_updates; i++) {
+		u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff);
+
+		/* align noop + MI_STORE_DATA_IMM cmd prefix */
+		batch_size += 4 * num_cmds + updates[i].qwords * 2;
+	}
+
+	/*
+	 * XXX: Create temp bo to copy from, if batch_size becomes too big?
+	 *
+	 * Worst case: Sum(2 * (each lower level page size) + (top level page size))
+	 * Should be reasonably bound..
+	 */
+	XE_BUG_ON(batch_size >= SZ_128K);
+
+	bb = xe_bb_new(gt, batch_size, !eng && xe->info.supports_usm);
+	if (IS_ERR(bb))
+		return ERR_CAST(bb);
+
+	/* For sysmem PTE's, need to map them in our hole.. */
+	if (!IS_DGFX(xe)) {
+		ppgtt_ofs = NUM_KERNEL_PDE - 1;
+		if (eng) {
+			XE_BUG_ON(num_updates > NUM_VMUSA_WRITES_PER_UNIT);
+
+			sa_bo = drm_suballoc_new(&m->vm_update_sa, 1,
+						 GFP_KERNEL, true, 0);
+			if (IS_ERR(sa_bo)) {
+				err = PTR_ERR(sa_bo);
+				goto err;
+			}
+
+			ppgtt_ofs = NUM_KERNEL_PDE +
+				(drm_suballoc_soffset(sa_bo) /
+				 NUM_VMUSA_UNIT_PER_PAGE);
+			page_ofs = (drm_suballoc_soffset(sa_bo) %
+				    NUM_VMUSA_UNIT_PER_PAGE) *
+				VM_SA_UPDATE_UNIT_SIZE;
+		}
+
+		/* Preemption is enabled again by the ring ops. */
+		emit_arb_clear(bb);
+
+		/* Map our PT's to gtt */
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) |
+			(num_updates * 2 + 1);
+		bb->cs[bb->len++] = ppgtt_ofs * GEN8_PAGE_SIZE + page_ofs;
+		bb->cs[bb->len++] = 0; /* upper_32_bits */
+
+		for (i = 0; i < num_updates; i++) {
+			struct xe_bo *pt_bo = updates[i].pt_bo;
+
+			BUG_ON(pt_bo->size != SZ_4K);
+
+			addr = gen8_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB,
+					       0, 0);
+			bb->cs[bb->len++] = lower_32_bits(addr);
+			bb->cs[bb->len++] = upper_32_bits(addr);
+		}
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		addr = xe_migrate_vm_addr(ppgtt_ofs, 0) +
+			(page_ofs / sizeof(u64)) * GEN8_PAGE_SIZE;
+		for (i = 0; i < num_updates; i++)
+			write_pgtable(m->gt, bb, addr + i * GEN8_PAGE_SIZE,
+				      &updates[i], pt_update);
+	} else {
+		/* phys pages, no preamble required */
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		/* Preemption is enabled again by the ring ops. */
+		emit_arb_clear(bb);
+		for (i = 0; i < num_updates; i++)
+			write_pgtable(m->gt, bb, 0, &updates[i], pt_update);
+	}
+
+	if (!eng)
+		mutex_lock(&m->job_mutex);
+
+	job = xe_bb_create_migration_job(eng ?: m->eng, bb,
+					 xe_migrate_batch_base(m, usm),
+					 update_idx);
+	if (IS_ERR(job)) {
+		err = PTR_ERR(job);
+		goto err_bb;
+	}
+
+	/* Wait on BO move */
+	if (bo) {
+		err = job_add_deps(job, bo->ttm.base.resv,
+				   DMA_RESV_USAGE_KERNEL);
+		if (err)
+			goto err_job;
+	}
+
+	/*
+	 * Munmap style VM unbind, need to wait for all jobs to be complete /
+	 * trigger preempts before moving forward
+	 */
+	if (first_munmap_rebind) {
+		err = job_add_deps(job, &vm->resv,
+				   DMA_RESV_USAGE_BOOKKEEP);
+		if (err)
+			goto err_job;
+	}
+
+	for (i = 0; !err && i < num_syncs; i++)
+		err = xe_sync_entry_add_deps(&syncs[i], job);
+
+	if (err)
+		goto err_job;
+
+	if (ops->pre_commit) {
+		err = ops->pre_commit(pt_update);
+		if (err)
+			goto err_job;
+	}
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	if (!eng)
+		mutex_unlock(&m->job_mutex);
+
+	xe_bb_free(bb, fence);
+	drm_suballoc_free(sa_bo, fence);
+
+	return fence;
+
+err_job:
+	xe_sched_job_put(job);
+err_bb:
+	if (!eng)
+		mutex_unlock(&m->job_mutex);
+	xe_bb_free(bb, NULL);
+err:
+	drm_suballoc_free(sa_bo, NULL);
+	return ERR_PTR(err);
+}
+
+void xe_migrate_wait(struct xe_migrate *m)
+{
+	if (m->fence)
+		dma_fence_wait(m->fence, false);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_migrate.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
new file mode 100644
index 000000000000..267057a3847f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __XE_MIGRATE__
+#define __XE_MIGRATE__
+
+#include <drm/drm_mm.h>
+
+struct dma_fence;
+struct iosys_map;
+struct ttm_resource;
+
+struct xe_bo;
+struct xe_gt;
+struct xe_engine;
+struct xe_migrate;
+struct xe_migrate_pt_update;
+struct xe_sync_entry;
+struct xe_pt;
+struct xe_vm;
+struct xe_vm_pgtable_update;
+struct xe_vma;
+
+struct xe_migrate_pt_update_ops {
+	/**
+	 * populate() - Populate a command buffer or page-table with ptes.
+	 * @pt_update: Embeddable callback argument.
+	 * @gt: The gt for the current operation.
+	 * @map: struct iosys_map into the memory to be populated.
+	 * @pos: If @map is NULL, map into the memory to be populated.
+	 * @ofs: qword offset into @map, unused if @map is NULL.
+	 * @num_qwords: Number of qwords to write.
+	 * @update: Information about the PTEs to be inserted.
+	 *
+	 * This interface is intended to be used as a callback into the
+	 * page-table system to populate command buffers or shared
+	 * page-tables with PTEs.
+	 */
+	void (*populate)(struct xe_migrate_pt_update *pt_update,
+			 struct xe_gt *gt, struct iosys_map *map,
+			 void *pos, u32 ofs, u32 num_qwords,
+			 const struct xe_vm_pgtable_update *update);
+
+	/**
+	 * pre_commit(): Callback to be called just before arming the
+	 * sched_job.
+	 * @pt_update: Pointer to embeddable callback argument.
+	 *
+	 * Return: 0 on success, negative error code on error.
+	 */
+	int (*pre_commit)(struct xe_migrate_pt_update *pt_update);
+};
+
+struct xe_migrate_pt_update {
+	const struct xe_migrate_pt_update_ops *ops;
+	struct xe_vma *vma;
+};
+
+struct xe_migrate *xe_migrate_init(struct xe_gt *gt);
+
+struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
+				  struct xe_bo *bo,
+				  struct ttm_resource *src,
+				  struct ttm_resource *dst);
+
+struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
+				   struct xe_bo *bo,
+				   struct ttm_resource *dst,
+				   u32 value);
+
+struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m);
+
+struct dma_fence *
+xe_migrate_update_pgtables(struct xe_migrate *m,
+			   struct xe_vm *vm,
+			   struct xe_bo *bo,
+			   struct xe_engine *eng,
+			   const struct xe_vm_pgtable_update *updates,
+			   u32 num_updates,
+			   struct xe_sync_entry *syncs, u32 num_syncs,
+			   struct xe_migrate_pt_update *pt_update);
+
+void xe_migrate_wait(struct xe_migrate *m);
+
+struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate_doc.h b/drivers/gpu/drm/xe/xe_migrate_doc.h
new file mode 100644
index 000000000000..6a68fdff08dc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate_doc.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_MIGRATE_DOC_H_
+#define _XE_MIGRATE_DOC_H_
+
+/**
+ * DOC: Migrate Layer
+ *
+ * The XE migrate layer is used generate jobs which can copy memory (eviction),
+ * clear memory, or program tables (binds). This layer exists in every GT, has
+ * a migrate engine, and uses a special VM for all generated jobs.
+ *
+ * Special VM details
+ * ==================
+ *
+ * The special VM is configured with a page structure where we can dynamically
+ * map BOs which need to be copied and cleared, dynamically map other VM's page
+ * table BOs for updates, and identity map the entire device's VRAM with 1 GB
+ * pages.
+ *
+ * Currently the page structure consists of 48 phyiscal pages with 16 being
+ * reserved for BO mapping during copies and clear, 1 reserved for kernel binds,
+ * several pages are needed to setup the identity mappings (exact number based
+ * on how many bits of address space the device has), and the rest are reserved
+ * user bind operations.
+ *
+ * TODO: Diagram of layout
+ *
+ * Bind jobs
+ * =========
+ *
+ * A bind job consist of two batches and runs either on the migrate engine
+ * (kernel binds) or the bind engine passed in (user binds). In both cases the
+ * VM of the engine is the migrate VM.
+ *
+ * The first batch is used to update the migration VM page structure to point to
+ * the bind VM page table BOs which need to be updated. A physical page is
+ * required for this. If it is a user bind, the page is allocated from pool of
+ * pages reserved user bind operations with drm_suballoc managing this pool. If
+ * it is a kernel bind, the page reserved for kernel binds is used.
+ *
+ * The first batch is only required for devices without VRAM as when the device
+ * has VRAM the bind VM page table BOs are in VRAM and the identity mapping can
+ * be used.
+ *
+ * The second batch is used to program page table updated in the bind VM. Why
+ * not just one batch? Well the TLBs need to be invalidated between these two
+ * batches and that only can be done from the ring.
+ *
+ * When the bind job complete, the page allocated is returned the pool of pages
+ * reserved for user bind operations if a user bind. No need do this for kernel
+ * binds as the reserved kernel page is serially used by each job.
+ *
+ * Copy / clear jobs
+ * =================
+ *
+ * A copy or clear job consist of two batches and runs on the migrate engine.
+ *
+ * Like binds, the first batch is used update the migration VM page structure.
+ * In copy jobs, we need to map the source and destination of the BO into page
+ * the structure. In clear jobs, we just need to add 1 mapping of BO into the
+ * page structure. We use the 16 reserved pages in migration VM for mappings,
+ * this gives us a maximum copy size of 16 MB and maximum clear size of 32 MB.
+ *
+ * The second batch is used do either do the copy or clear. Again similar to
+ * binds, two batches are required as the TLBs need to be invalidated from the
+ * ring between the batches.
+ *
+ * More than one job will be generated if the BO is larger than maximum copy /
+ * clear size.
+ *
+ * Future work
+ * ===========
+ *
+ * Update copy and clear code to use identity mapped VRAM.
+ *
+ * Can we rework the use of the pages async binds to use all the entries in each
+ * page?
+ *
+ * Using large pages for sysmem mappings.
+ *
+ * Is it possible to identity map the sysmem? We should explore this.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
new file mode 100644
index 000000000000..42e2405f2f48
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_mmio.h"
+
+#include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_macros.h"
+#include "xe_module.h"
+
+#include "i915_reg.h"
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt_regs.h"
+
+#define XEHP_MTCFG_ADDR		_MMIO(0x101800)
+#define TILE_COUNT		REG_GENMASK(15, 8)
+#define GEN12_LMEM_BAR		2
+
+static int xe_set_dma_info(struct xe_device *xe)
+{
+	unsigned int mask_size = xe->info.dma_mask_size;
+	int err;
+
+	/*
+	 * We don't have a max segment size, so set it to the max so sg's
+	 * debugging layer doesn't complain
+	 */
+	dma_set_max_seg_size(xe->drm.dev, UINT_MAX);
+
+	err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+	if (err)
+		goto mask_err;
+
+	err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+	if (err)
+		goto mask_err;
+
+	return 0;
+
+mask_err:
+	drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
+	return err;
+}
+
+#ifdef CONFIG_64BIT
+static int
+_resize_bar(struct xe_device *xe, int resno, resource_size_t size)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	int bar_size = pci_rebar_bytes_to_size(size);
+	int ret;
+
+	if (pci_resource_len(pdev, resno))
+		pci_release_resource(pdev, resno);
+
+	ret = pci_resize_resource(pdev, resno, bar_size);
+	if (ret) {
+		drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe)\n",
+			 resno, 1 << bar_size, ERR_PTR(ret));
+		return -1;
+	}
+
+	drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size);
+	return 1;
+}
+
+static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct pci_bus *root = pdev->bus;
+	struct resource *root_res;
+	resource_size_t rebar_size;
+	resource_size_t current_size;
+	u32 pci_cmd;
+	int i;
+	int ret;
+	u64 force_lmem_bar_size = xe_force_lmem_bar_size;
+
+	current_size = roundup_pow_of_two(pci_resource_len(pdev, GEN12_LMEM_BAR));
+
+	if (force_lmem_bar_size) {
+		u32 bar_sizes;
+
+		rebar_size = force_lmem_bar_size * (resource_size_t)SZ_1M;
+		bar_sizes = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR);
+
+		if (rebar_size == current_size)
+			return 0;
+
+		if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) ||
+		    rebar_size >= roundup_pow_of_two(lmem_size)) {
+			rebar_size = lmem_size;
+			drm_info(&xe->drm,
+				 "Given bar size is not within supported size, setting it to default: %llu\n",
+				 (u64)lmem_size >> 20);
+		}
+	} else {
+		rebar_size = current_size;
+
+		if (rebar_size != roundup_pow_of_two(lmem_size))
+			rebar_size = lmem_size;
+		else
+			return 0;
+	}
+
+	while (root->parent)
+		root = root->parent;
+
+	pci_bus_for_each_resource(root, root_res, i) {
+		if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
+		    root_res->start > 0x100000000ull)
+			break;
+	}
+
+	if (!root_res) {
+		drm_info(&xe->drm, "Can't resize LMEM BAR - platform support is missing\n");
+		return -1;
+	}
+
+	pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
+	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
+
+	ret = _resize_bar(xe, GEN12_LMEM_BAR, rebar_size);
+
+	pci_assign_unassigned_bus_resources(pdev->bus);
+	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
+	return ret;
+}
+#else
+static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) { return 0; }
+#endif
+
+static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar)
+{
+	if (!pci_resource_flags(pdev, bar))
+		return false;
+
+	if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET)
+		return false;
+
+	if (!pci_resource_len(pdev, bar))
+		return false;
+
+	return true;
+}
+
+int xe_mmio_probe_vram(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct xe_gt *gt;
+	u8 id;
+	u64 lmem_size;
+	u64 original_size;
+	u64 current_size;
+	u64 flat_ccs_base;
+	int resize_result;
+
+	if (!IS_DGFX(xe)) {
+		xe->mem.vram.mapping = 0;
+		xe->mem.vram.size = 0;
+		xe->mem.vram.io_start = 0;
+
+		for_each_gt(gt, xe, id) {
+			gt->mem.vram.mapping = 0;
+			gt->mem.vram.size = 0;
+			gt->mem.vram.io_start = 0;
+		}
+		return 0;
+	}
+
+	if (!xe_pci_resource_valid(pdev, GEN12_LMEM_BAR)) {
+		drm_err(&xe->drm, "pci resource is not valid\n");
+		return -ENXIO;
+	}
+
+	gt = xe_device_get_gt(xe, 0);
+	lmem_size = xe_mmio_read64(gt, GEN12_GSMBASE.reg);
+
+	original_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
+
+	if (xe->info.has_flat_ccs)  {
+		int err;
+		u32 reg;
+
+		err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+		if (err)
+			return err;
+		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE);
+		lmem_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G;
+		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
+		flat_ccs_base = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
+
+		drm_info(&xe->drm, "lmem_size: 0x%llx flat_ccs_base: 0x%llx\n",
+			 lmem_size, flat_ccs_base);
+
+		err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+		if (err)
+			return err;
+	} else {
+		flat_ccs_base = lmem_size;
+	}
+
+	resize_result = xe_resize_lmem_bar(xe, lmem_size);
+	current_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
+	xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR);
+
+	xe->mem.vram.size = min(current_size, lmem_size);
+
+	if (!xe->mem.vram.size)
+		return -EIO;
+
+	if (resize_result > 0)
+		drm_info(&xe->drm, "Successfully resize LMEM from %lluMiB to %lluMiB\n",
+			 (u64)original_size >> 20,
+			 (u64)current_size >> 20);
+	else if (xe->mem.vram.size < lmem_size && !xe_force_lmem_bar_size)
+		drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' support in your BIOS.\n",
+			 (u64)xe->mem.vram.size >> 20);
+	if (xe->mem.vram.size < lmem_size)
+		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n",
+			 lmem_size, xe->mem.vram.size);
+
+#ifdef CONFIG_64BIT
+	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size);
+#endif
+
+	xe->mem.vram.size = min_t(u64, xe->mem.vram.size, flat_ccs_base);
+
+	drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size);
+
+	/* FIXME: Assuming equally partitioned VRAM, incorrect */
+	if (xe->info.tile_count > 1) {
+		u8 adj_tile_count = xe->info.tile_count;
+		resource_size_t size, io_start;
+
+		for_each_gt(gt, xe, id)
+			if (xe_gt_is_media_type(gt))
+				--adj_tile_count;
+
+		XE_BUG_ON(!adj_tile_count);
+
+		size = xe->mem.vram.size / adj_tile_count;
+		io_start = xe->mem.vram.io_start;
+
+		for_each_gt(gt, xe, id) {
+			if (id && !xe_gt_is_media_type(gt))
+				io_start += size;
+
+			gt->mem.vram.size = size;
+			gt->mem.vram.io_start = io_start;
+			gt->mem.vram.mapping = xe->mem.vram.mapping +
+				(io_start - xe->mem.vram.io_start);
+
+			drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n",
+				 id, gt->info.vram_id, &gt->mem.vram.io_start,
+				 &gt->mem.vram.size);
+		}
+	} else {
+		gt->mem.vram.size = xe->mem.vram.size;
+		gt->mem.vram.io_start = xe->mem.vram.io_start;
+		gt->mem.vram.mapping = xe->mem.vram.mapping;
+
+		drm_info(&xe->drm, "VRAM: %pa\n", &gt->mem.vram.size);
+	}
+	return 0;
+}
+
+static void xe_mmio_probe_tiles(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	u32 mtcfg;
+	u8 adj_tile_count;
+	u8 id;
+
+	if (xe->info.tile_count == 1)
+		return;
+
+	mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR.reg);
+	adj_tile_count = xe->info.tile_count =
+		REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
+	if (xe->info.media_ver >= 13)
+		xe->info.tile_count *= 2;
+
+	drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n",
+		 xe->info.tile_count, adj_tile_count);
+
+	if (xe->info.tile_count > 1) {
+		const int mmio_bar = 0;
+		size_t size;
+		void *regs;
+
+		if (adj_tile_count > 1) {
+			pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
+			xe->mmio.size = SZ_16M * adj_tile_count;
+			xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev),
+						  mmio_bar, xe->mmio.size);
+		}
+
+		size = xe->mmio.size / adj_tile_count;
+		regs = xe->mmio.regs;
+
+		for_each_gt(gt, xe, id) {
+			if (id && !xe_gt_is_media_type(gt))
+				regs += size;
+			gt->mmio.size = size;
+			gt->mmio.regs = regs;
+		}
+	}
+}
+
+static void mmio_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+
+	pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
+	if (xe->mem.vram.mapping)
+		iounmap(xe->mem.vram.mapping);
+}
+
+int xe_mmio_init(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	const int mmio_bar = 0;
+	int err;
+
+	/*
+	 * Map the entire BAR, which includes registers (0-4MB), reserved space
+	 * (4MB-8MB), and GGTT (8MB-16MB). Other parts of the driver (GTs,
+	 * GGTTs) will derive the pointers they need from the mapping in the
+	 * device structure.
+	 */
+	xe->mmio.size = SZ_16M;
+	xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar,
+				  xe->mmio.size);
+	if (xe->mmio.regs == NULL) {
+		drm_err(&xe->drm, "failed to map registers\n");
+		return -EIO;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, mmio_fini, xe);
+	if (err)
+		return err;
+
+	/* 1 GT for now, 1 to 1 mapping, may change on multi-GT devices */
+	gt->mmio.size = xe->mmio.size;
+	gt->mmio.regs = xe->mmio.regs;
+
+	/*
+	 * The boot firmware initializes local memory and assesses its health.
+	 * If memory training fails, the punit will have been instructed to
+	 * keep the GT powered down; we won't be able to communicate with it
+	 * and we should not continue with driver initialization.
+	 */
+	if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL.reg) & LMEM_INIT)) {
+		drm_err(&xe->drm, "LMEM not initialized by firmware\n");
+		return -ENODEV;
+	}
+
+	err = xe_set_dma_info(xe);
+	if (err)
+		return err;
+
+	xe_mmio_probe_tiles(xe);
+
+	return 0;
+}
+
+#define VALID_MMIO_FLAGS (\
+	DRM_XE_MMIO_BITS_MASK |\
+	DRM_XE_MMIO_READ |\
+	DRM_XE_MMIO_WRITE)
+
+static const i915_reg_t mmio_read_whitelist[] = {
+	RING_TIMESTAMP(RENDER_RING_BASE),
+};
+
+int xe_mmio_ioctl(struct drm_device *dev, void *data,
+		  struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct drm_xe_mmio *args = data;
+	unsigned int bits_flag, bytes;
+	bool allowed;
+	int ret = 0;
+
+	if (XE_IOCTL_ERR(xe, args->extensions))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags & ~VALID_MMIO_FLAGS))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_MMIO_WRITE) && args->value))
+		return -EINVAL;
+
+	allowed = capable(CAP_SYS_ADMIN);
+	if (!allowed && ((args->flags & ~DRM_XE_MMIO_BITS_MASK) == DRM_XE_MMIO_READ)) {
+		unsigned int i;
+
+		for (i = 0; i < ARRAY_SIZE(mmio_read_whitelist); i++) {
+			if (mmio_read_whitelist[i].reg == args->addr) {
+				allowed = true;
+				break;
+			}
+		}
+	}
+
+	if (XE_IOCTL_ERR(xe, !allowed))
+		return -EPERM;
+
+	bits_flag = args->flags & DRM_XE_MMIO_BITS_MASK;
+	bytes = 1 << bits_flag;
+	if (XE_IOCTL_ERR(xe, args->addr + bytes > xe->mmio.size))
+		return -EINVAL;
+
+	xe_force_wake_get(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL);
+
+	if (args->flags & DRM_XE_MMIO_WRITE) {
+		switch (bits_flag) {
+		case DRM_XE_MMIO_8BIT:
+			return -EINVAL; /* TODO */
+		case DRM_XE_MMIO_16BIT:
+			return -EINVAL; /* TODO */
+		case DRM_XE_MMIO_32BIT:
+			if (XE_IOCTL_ERR(xe, args->value > U32_MAX))
+				return -EINVAL;
+			xe_mmio_write32(to_gt(xe), args->addr, args->value);
+			break;
+		case DRM_XE_MMIO_64BIT:
+			xe_mmio_write64(to_gt(xe), args->addr, args->value);
+			break;
+		default:
+			drm_WARN(&xe->drm, 1, "Invalid MMIO bit size");
+			ret = -EINVAL;
+			goto exit;
+		}
+	}
+
+	if (args->flags & DRM_XE_MMIO_READ) {
+		switch (bits_flag) {
+		case DRM_XE_MMIO_8BIT:
+			return -EINVAL; /* TODO */
+		case DRM_XE_MMIO_16BIT:
+			return -EINVAL; /* TODO */
+		case DRM_XE_MMIO_32BIT:
+			args->value = xe_mmio_read32(to_gt(xe), args->addr);
+			break;
+		case DRM_XE_MMIO_64BIT:
+			args->value = xe_mmio_read64(to_gt(xe), args->addr);
+			break;
+		default:
+			drm_WARN(&xe->drm, 1, "Invalid MMIO bit size");
+			ret = -EINVAL;
+		}
+	}
+
+exit:
+	xe_force_wake_put(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
new file mode 100644
index 000000000000..09d24467096f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_MMIO_H_
+#define _XE_MMIO_H_
+
+#include <linux/delay.h>
+
+#include "xe_gt_types.h"
+
+/*
+ * FIXME: This header has been deemed evil and we need to kill it. Temporarily
+ * including so we can use 'wait_for' and unblock initial development. A follow
+ * should replace 'wait_for' with a sane version and drop including this header.
+ */
+#include "i915_utils.h"
+
+struct drm_device;
+struct drm_file;
+struct xe_device;
+
+int xe_mmio_init(struct xe_device *xe);
+
+static inline u8 xe_mmio_read8(struct xe_gt *gt, u32 reg)
+{
+	if (reg < gt->mmio.adj_limit)
+		reg += gt->mmio.adj_offset;
+
+	return readb(gt->mmio.regs + reg);
+}
+
+static inline void xe_mmio_write32(struct xe_gt *gt,
+				   u32 reg, u32 val)
+{
+	if (reg < gt->mmio.adj_limit)
+		reg += gt->mmio.adj_offset;
+
+	writel(val, gt->mmio.regs + reg);
+}
+
+static inline u32 xe_mmio_read32(struct xe_gt *gt, u32 reg)
+{
+	if (reg < gt->mmio.adj_limit)
+		reg += gt->mmio.adj_offset;
+
+	return readl(gt->mmio.regs + reg);
+}
+
+static inline u32 xe_mmio_rmw32(struct xe_gt *gt, u32 reg, u32 mask,
+				 u32 val)
+{
+	u32 old, reg_val;
+
+	old = xe_mmio_read32(gt, reg);
+	reg_val = (old & mask) | val;
+	xe_mmio_write32(gt, reg, reg_val);
+
+	return old;
+}
+
+static inline void xe_mmio_write64(struct xe_gt *gt,
+				   u32 reg, u64 val)
+{
+	if (reg < gt->mmio.adj_limit)
+		reg += gt->mmio.adj_offset;
+
+	writeq(val, gt->mmio.regs + reg);
+}
+
+static inline u64 xe_mmio_read64(struct xe_gt *gt, u32 reg)
+{
+	if (reg < gt->mmio.adj_limit)
+		reg += gt->mmio.adj_offset;
+
+	return readq(gt->mmio.regs + reg);
+}
+
+static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
+					     u32 reg, u32 val,
+					     u32 mask, u32 eval)
+{
+	u32 reg_val;
+
+	xe_mmio_write32(gt, reg, val);
+	reg_val = xe_mmio_read32(gt, reg);
+
+	return (reg_val & mask) != eval ? -EINVAL : 0;
+}
+
+static inline int xe_mmio_wait32(struct xe_gt *gt,
+				 u32 reg, u32 val,
+				 u32 mask, u32 timeout_ms)
+{
+	return wait_for((xe_mmio_read32(gt, reg) & mask) == val,
+			timeout_ms);
+}
+
+int xe_mmio_ioctl(struct drm_device *dev, void *data,
+		  struct drm_file *file);
+
+static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, u32 reg)
+{
+	return range && reg >= range->start && reg <= range->end;
+}
+
+int xe_mmio_probe_vram(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
new file mode 100644
index 000000000000..86b966fffbe5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -0,0 +1,557 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_gt.h"
+#include "xe_platform_types.h"
+#include "xe_mmio.h"
+#include "xe_mocs.h"
+#include "xe_step_types.h"
+
+#include "gt/intel_gt_regs.h"
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+#define mocs_dbg drm_dbg
+#else
+__printf(2, 3)
+static inline void mocs_dbg(const struct drm_device *dev,
+			    const char *format, ...)
+{ /* noop */ }
+#endif
+
+/*
+ * MOCS indexes used for GPU surfaces, defining the cacheability of the
+ * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
+ */
+enum xe_mocs_info_index {
+	/*
+	 * Not cached anywhere, coherency between CPU and GPU accesses is
+	 * guaranteed.
+	 */
+	XE_MOCS_UNCACHED,
+	/*
+	 * Cacheability and coherency controlled by the kernel automatically
+	 * based on the xxxx  IOCTL setting and the current
+	 * usage of the surface (used for display scanout or not).
+	 */
+	XE_MOCS_PTE,
+	/*
+	 * Cached in all GPU caches available on the platform.
+	 * Coherency between CPU and GPU accesses to the surface is not
+	 * guaranteed without extra synchronization.
+	 */
+	XE_MOCS_CACHED,
+};
+
+enum {
+	HAS_GLOBAL_MOCS = BIT(0),
+	HAS_RENDER_L3CC = BIT(1),
+};
+
+struct xe_mocs_entry {
+	u32 control_value;
+	u16 l3cc_value;
+	u16 used;
+};
+
+struct xe_mocs_info {
+	unsigned int size;
+	unsigned int n_entries;
+	const struct xe_mocs_entry *table;
+	u8 uc_index;
+	u8 wb_index;
+	u8 unused_entries_index;
+};
+
+/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
+#define _LE_CACHEABILITY(value)	((value) << 0)
+#define _LE_TGT_CACHE(value)	((value) << 2)
+#define LE_LRUM(value)		((value) << 4)
+#define LE_AOM(value)		((value) << 6)
+#define LE_RSC(value)		((value) << 7)
+#define LE_SCC(value)		((value) << 8)
+#define LE_PFM(value)		((value) << 11)
+#define LE_SCF(value)		((value) << 14)
+#define LE_COS(value)		((value) << 15)
+#define LE_SSE(value)		((value) << 17)
+
+/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
+#define L3_ESC(value)		((value) << 0)
+#define L3_SCC(value)		((value) << 1)
+#define _L3_CACHEABILITY(value)	((value) << 4)
+#define L3_GLBGO(value)		((value) << 6)
+#define L3_LKUP(value)		((value) << 7)
+
+/* Helper defines */
+#define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
+#define PVC_NUM_MOCS_ENTRIES	3
+#define MTL_NUM_MOCS_ENTRIES    16
+
+/* (e)LLC caching options */
+/*
+ * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means
+ * the same as LE_UC
+ */
+#define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
+#define LE_1_UC			_LE_CACHEABILITY(1)
+#define LE_2_WT			_LE_CACHEABILITY(2)
+#define LE_3_WB			_LE_CACHEABILITY(3)
+
+/* Target cache */
+#define LE_TC_0_PAGETABLE	_LE_TGT_CACHE(0)
+#define LE_TC_1_LLC		_LE_TGT_CACHE(1)
+#define LE_TC_2_LLC_ELLC	_LE_TGT_CACHE(2)
+#define LE_TC_3_LLC_ELLC_ALT	_LE_TGT_CACHE(3)
+
+/* L3 caching options */
+#define L3_0_DIRECT		_L3_CACHEABILITY(0)
+#define L3_1_UC			_L3_CACHEABILITY(1)
+#define L3_2_RESERVED		_L3_CACHEABILITY(2)
+#define L3_3_WB			_L3_CACHEABILITY(3)
+
+#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
+	[__idx] = { \
+		.control_value = __control_value, \
+		.l3cc_value = __l3cc_value, \
+		.used = 1, \
+	}
+
+/*
+ * MOCS tables
+ *
+ * These are the MOCS tables that are programmed across all the rings.
+ * The control value is programmed to all the rings that support the
+ * MOCS registers. While the l3cc_values are only programmed to the
+ * LNCFCMOCS0 - LNCFCMOCS32 registers.
+ *
+ * These tables are intended to be kept reasonably consistent across
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, for Icelake and above, list of entries is published as part
+ * of bspec.
+ *
+ * Entries not part of the following tables are undefined as far as
+ * userspace is concerned and shouldn't be relied upon.  For Gen < 12
+ * they will be initialized to PTE. Gen >= 12 don't have a setting for
+ * PTE and those platforms except TGL/RKL will be initialized L3 WB to
+ * catch accidental use of reserved and unused mocs indexes.
+ *
+ * The last few entries are reserved by the hardware. For ICL+ they
+ * should be initialized according to bspec and never used, for older
+ * platforms they should never be written to.
+ *
+ * NOTE1: These tables are part of bspec and defined as part of hardware
+ *       interface for ICL+. For older platforms, they are part of kernel
+ *       ABI. It is expected that, for specific hardware platform, existing
+ *       entries will remain constant and the table will only be updated by
+ *       adding new entries, filling unused positions.
+ *
+ * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS
+ *       indices have been set to L3 WB. These reserved entries should never
+ *       be used, they may be changed to low performant variants with better
+ *       coherency in the future if more entries are needed.
+ *       For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC.
+ */
+
+#define GEN11_MOCS_ENTRIES \
+	/* Entries 0 and 1 are defined per-platform */ \
+	/* Base - L3 + LLC */ \
+	MOCS_ENTRY(2, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		L3_3_WB), \
+	/* Base - Uncached */ \
+	MOCS_ENTRY(3, \
+		LE_1_UC | LE_TC_1_LLC, \
+		L3_1_UC), \
+	/* Base - L3 */ \
+	MOCS_ENTRY(4, \
+		LE_1_UC | LE_TC_1_LLC, \
+		L3_3_WB), \
+	/* Base - LLC */ \
+	MOCS_ENTRY(5, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		L3_1_UC), \
+	/* Age 0 - LLC */ \
+	MOCS_ENTRY(6, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+		L3_1_UC), \
+	/* Age 0 - L3 + LLC */ \
+	MOCS_ENTRY(7, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+		L3_3_WB), \
+	/* Age: Don't Chg. - LLC */ \
+	MOCS_ENTRY(8, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+		L3_1_UC), \
+	/* Age: Don't Chg. - L3 + LLC */ \
+	MOCS_ENTRY(9, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+		L3_3_WB), \
+	/* No AOM - LLC */ \
+	MOCS_ENTRY(10, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+		L3_1_UC), \
+	/* No AOM - L3 + LLC */ \
+	MOCS_ENTRY(11, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+		L3_3_WB), \
+	/* No AOM; Age 0 - LLC */ \
+	MOCS_ENTRY(12, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+		L3_1_UC), \
+	/* No AOM; Age 0 - L3 + LLC */ \
+	MOCS_ENTRY(13, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+		L3_3_WB), \
+	/* No AOM; Age:DC - LLC */ \
+	MOCS_ENTRY(14, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+		L3_1_UC), \
+	/* No AOM; Age:DC - L3 + LLC */ \
+	MOCS_ENTRY(15, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+		L3_3_WB), \
+	/* Self-Snoop - L3 + LLC */ \
+	MOCS_ENTRY(18, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
+		L3_3_WB), \
+	/* Skip Caching - L3 + LLC(12.5%) */ \
+	MOCS_ENTRY(19, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
+		L3_3_WB), \
+	/* Skip Caching - L3 + LLC(25%) */ \
+	MOCS_ENTRY(20, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
+		L3_3_WB), \
+	/* Skip Caching - L3 + LLC(50%) */ \
+	MOCS_ENTRY(21, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
+		L3_3_WB), \
+	/* Skip Caching - L3 + LLC(75%) */ \
+	MOCS_ENTRY(22, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
+		L3_3_WB), \
+	/* Skip Caching - L3 + LLC(87.5%) */ \
+	MOCS_ENTRY(23, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
+		L3_3_WB), \
+	/* HW Reserved - SW program but never use */ \
+	MOCS_ENTRY(62, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		L3_1_UC), \
+	/* HW Reserved - SW program but never use */ \
+	MOCS_ENTRY(63, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		L3_1_UC)
+
+static const struct xe_mocs_entry tgl_mocs_desc[] = {
+	/*
+	 * NOTE:
+	 * Reserved and unspecified MOCS indices have been set to (L3 + LCC).
+	 * These reserved entries should never be used, they may be changed
+	 * to low performant variants with better coherency in the future if
+	 * more entries are needed. We are programming index XE_MOCS_PTE(1)
+	 * only, __init_mocs_table() take care to program unused index with
+	 * this entry.
+	 */
+	MOCS_ENTRY(XE_MOCS_PTE,
+		   LE_0_PAGETABLE | LE_TC_0_PAGETABLE,
+		   L3_1_UC),
+	GEN11_MOCS_ENTRIES,
+
+	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+	MOCS_ENTRY(48,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + L3 */
+	MOCS_ENTRY(49,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + LLC */
+	MOCS_ENTRY(50,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* Implicitly enable L1 - HDC:L1 */
+	MOCS_ENTRY(51,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_1_UC),
+	/* HW Special Case (CCS) */
+	MOCS_ENTRY(60,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* HW Special Case (Displayable) */
+	MOCS_ENTRY(61,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+};
+
+static const struct xe_mocs_entry dg1_mocs_desc[] = {
+	/* UC */
+	MOCS_ENTRY(1, 0, L3_1_UC),
+	/* WB - L3 */
+	MOCS_ENTRY(5, 0, L3_3_WB),
+	/* WB - L3 50% */
+	MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB),
+	/* WB - L3 25% */
+	MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB),
+	/* WB - L3 12.5% */
+	MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB),
+
+	/* HDC:L1 + L3 */
+	MOCS_ENTRY(48, 0, L3_3_WB),
+	/* HDC:L1 */
+	MOCS_ENTRY(49, 0, L3_1_UC),
+
+	/* HW Reserved */
+	MOCS_ENTRY(60, 0, L3_1_UC),
+	MOCS_ENTRY(61, 0, L3_1_UC),
+	MOCS_ENTRY(62, 0, L3_1_UC),
+	MOCS_ENTRY(63, 0, L3_1_UC),
+};
+
+static const struct xe_mocs_entry gen12_mocs_desc[] = {
+	GEN11_MOCS_ENTRIES,
+	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+	MOCS_ENTRY(48,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + L3 */
+	MOCS_ENTRY(49,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + LLC */
+	MOCS_ENTRY(50,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* Implicitly enable L1 - HDC:L1 */
+	MOCS_ENTRY(51,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_1_UC),
+	/* HW Special Case (CCS) */
+	MOCS_ENTRY(60,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* HW Special Case (Displayable) */
+	MOCS_ENTRY(61,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+};
+
+static const struct xe_mocs_entry dg2_mocs_desc[] = {
+	/* UC - Coherent; GO:L3 */
+	MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)),
+	/* UC - Coherent; GO:Memory */
+	MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+	/* UC - Non-Coherent; GO:Memory */
+	MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+
+	/* WB - LC */
+	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+};
+
+static const struct xe_mocs_entry dg2_mocs_desc_g10_ax[] = {
+	/* Wa_14011441408: Set Go to Memory for MOCS#0 */
+	MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+	/* UC - Coherent; GO:Memory */
+	MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+	/* UC - Non-Coherent; GO:Memory */
+	MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+
+	/* WB - LC */
+	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+};
+
+static const struct xe_mocs_entry pvc_mocs_desc[] = {
+	/* Error */
+	MOCS_ENTRY(0, 0, L3_3_WB),
+
+	/* UC */
+	MOCS_ENTRY(1, 0, L3_1_UC),
+
+	/* WB */
+	MOCS_ENTRY(2, 0, L3_3_WB),
+};
+
+static unsigned int get_mocs_settings(struct xe_device *xe,
+				      struct xe_mocs_info *info)
+{
+	unsigned int flags;
+
+	memset(info, 0, sizeof(struct xe_mocs_info));
+
+	info->unused_entries_index = XE_MOCS_PTE;
+	switch (xe->info.platform) {
+	case XE_PVC:
+		info->size = ARRAY_SIZE(pvc_mocs_desc);
+		info->table = pvc_mocs_desc;
+		info->n_entries = PVC_NUM_MOCS_ENTRIES;
+		info->uc_index = 1;
+		info->wb_index = 2;
+		info->unused_entries_index = 2;
+		break;
+	case XE_METEORLAKE:
+		info->size = ARRAY_SIZE(dg2_mocs_desc);
+		info->table = dg2_mocs_desc;
+		info->n_entries = MTL_NUM_MOCS_ENTRIES;
+		info->uc_index = 1;
+		info->unused_entries_index = 3;
+		break;
+	case XE_DG2:
+		if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 &&
+		    xe->info.step.graphics >= STEP_A0 &&
+		    xe->info.step.graphics <= STEP_B0) {
+			info->size = ARRAY_SIZE(dg2_mocs_desc_g10_ax);
+			info->table = dg2_mocs_desc_g10_ax;
+		} else {
+			info->size = ARRAY_SIZE(dg2_mocs_desc);
+			info->table = dg2_mocs_desc;
+		}
+		info->uc_index = 1;
+		info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+		info->unused_entries_index = 3;
+		break;
+	case XE_DG1:
+		info->size = ARRAY_SIZE(dg1_mocs_desc);
+		info->table = dg1_mocs_desc;
+		info->uc_index = 1;
+		info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+		info->uc_index = 1;
+		info->unused_entries_index = 5;
+		break;
+	case XE_TIGERLAKE:
+		info->size  = ARRAY_SIZE(tgl_mocs_desc);
+		info->table = tgl_mocs_desc;
+		info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+		info->uc_index = 3;
+		break;
+	case XE_ALDERLAKE_S:
+	case XE_ALDERLAKE_P:
+		info->size  = ARRAY_SIZE(gen12_mocs_desc);
+		info->table = gen12_mocs_desc;
+		info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+		info->uc_index = 3;
+		info->unused_entries_index = 2;
+		break;
+	default:
+		drm_err(&xe->drm, "Platform that should have a MOCS table does not.\n");
+		return 0;
+	}
+
+	if (XE_WARN_ON(info->size > info->n_entries))
+		return 0;
+
+	flags = HAS_RENDER_L3CC;
+	if (!IS_DGFX(xe))
+		flags |= HAS_GLOBAL_MOCS;
+
+	return flags;
+}
+
+/*
+ * Get control_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is non-zero then its value will be returned
+ * otherwise XE_MOCS_PTE's value is returned in this case.
+ */
+static u32 get_entry_control(const struct xe_mocs_info *info,
+			     unsigned int index)
+{
+	if (index < info->size && info->table[index].used)
+		return info->table[index].control_value;
+	return info->table[info->unused_entries_index].control_value;
+}
+
+static void __init_mocs_table(struct xe_gt *gt,
+			      const struct xe_mocs_info *info,
+			      u32 addr)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	unsigned int i;
+	u32 mocs;
+
+	mocs_dbg(&gt->xe->drm, "entries:%d\n", info->n_entries);
+	drm_WARN_ONCE(&xe->drm, !info->unused_entries_index,
+		      "Unused entries index should have been defined\n");
+	for (i = 0;
+	     i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
+	     i++) {
+		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, _MMIO(addr + i * 4).reg, mocs);
+		xe_mmio_write32(gt, _MMIO(addr + i * 4).reg, mocs);
+	}
+}
+
+/*
+ * Get l3cc_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is not zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
+ */
+static u16 get_entry_l3cc(const struct xe_mocs_info *info,
+			  unsigned int index)
+{
+	if (index < info->size && info->table[index].used)
+		return info->table[index].l3cc_value;
+	return info->table[info->unused_entries_index].l3cc_value;
+}
+
+static u32 l3cc_combine(u16 low, u16 high)
+{
+	return low | (u32)high << 16;
+}
+
+static void init_l3cc_table(struct xe_gt *gt,
+			    const struct xe_mocs_info *info)
+{
+	unsigned int i;
+	u32 l3cc;
+
+	mocs_dbg(&gt->xe->drm, "entries:%d\n", info->n_entries);
+	for (i = 0;
+	     i < (info->n_entries + 1) / 2 ?
+	     (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
+				  get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
+	     i++) {
+		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, GEN9_LNCFCMOCS(i).reg, l3cc);
+		xe_mmio_write32(gt, GEN9_LNCFCMOCS(i).reg, l3cc);
+	}
+}
+
+void xe_mocs_init_engine(const struct xe_engine *engine)
+{
+	struct xe_mocs_info table;
+	unsigned int flags;
+
+	flags = get_mocs_settings(engine->gt->xe, &table);
+	if (!flags)
+		return;
+
+	if (flags & HAS_RENDER_L3CC && engine->class == XE_ENGINE_CLASS_RENDER)
+		init_l3cc_table(engine->gt, &table);
+}
+
+void xe_mocs_init(struct xe_gt *gt)
+{
+	struct xe_mocs_info table;
+	unsigned int flags;
+
+	/*
+	 * LLC and eDRAM control values are not applicable to dgfx
+	 */
+	flags = get_mocs_settings(gt->xe, &table);
+	mocs_dbg(&gt->xe->drm, "flag:0x%x\n", flags);
+	gt->mocs.uc_index = table.uc_index;
+	gt->mocs.wb_index = table.wb_index;
+
+	if (flags & HAS_GLOBAL_MOCS)
+		__init_mocs_table(gt, &table, GEN12_GLOBAL_MOCS(0).reg);
+
+	/*
+	 * Initialize the L3CC table as part of mocs initalization to make
+	 * sure the LNCFCMOCSx registers are programmed for the subsequent
+	 * memory transactions including guc transactions
+	 */
+	if (flags & HAS_RENDER_L3CC)
+		init_l3cc_table(gt, &table);
+}
diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h
new file mode 100644
index 000000000000..aba1abe216ab
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mocs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_MOCS_H_
+#define _XE_MOCS_H_
+
+#include <linux/types.h>
+
+struct xe_engine;
+struct xe_gt;
+
+void xe_mocs_init_engine(const struct xe_engine *engine);
+void xe_mocs_init(struct xe_gt *gt);
+
+/**
+ * xe_mocs_index_to_value - Translate mocs index to the mocs value exected by
+ * most blitter commands.
+ * @mocs_index: index into the mocs tables
+ *
+ * Return: The corresponding mocs value to be programmed.
+ */
+static inline u32 xe_mocs_index_to_value(u32 mocs_index)
+{
+	return mocs_index << 1;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
new file mode 100644
index 000000000000..cc862553a252
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include "xe_drv.h"
+#include "xe_hw_fence.h"
+#include "xe_module.h"
+#include "xe_pci.h"
+#include "xe_sched_job.h"
+
+bool enable_guc = true;
+module_param_named_unsafe(enable_guc, enable_guc, bool, 0444);
+MODULE_PARM_DESC(enable_guc, "Enable GuC submission");
+
+u32 xe_force_lmem_bar_size;
+module_param_named(lmem_bar_size, xe_force_lmem_bar_size, uint, 0600);
+MODULE_PARM_DESC(lmem_bar_size, "Set the lmem bar size(in MiB)");
+
+int xe_guc_log_level = 5;
+module_param_named(guc_log_level, xe_guc_log_level, int, 0600);
+MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)");
+
+char *xe_param_force_probe = CONFIG_DRM_XE_FORCE_PROBE;
+module_param_named_unsafe(force_probe, xe_param_force_probe, charp, 0400);
+MODULE_PARM_DESC(force_probe,
+		 "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details.");
+
+struct init_funcs {
+	int (*init)(void);
+	void (*exit)(void);
+};
+#define MAKE_INIT_EXIT_FUNCS(name)		\
+	{ .init = xe_##name##_module_init,	\
+	  .exit = xe_##name##_module_exit, }
+static const struct init_funcs init_funcs[] = {
+	MAKE_INIT_EXIT_FUNCS(hw_fence),
+	MAKE_INIT_EXIT_FUNCS(sched_job),
+};
+
+static int __init xe_init(void)
+{
+	int err, i;
+
+	for (i = 0; i < ARRAY_SIZE(init_funcs); i++) {
+		err = init_funcs[i].init();
+		if (err) {
+			while (i--)
+				init_funcs[i].exit();
+			return err;
+		}
+	}
+
+	return xe_register_pci_driver();
+}
+
+static void __exit xe_exit(void)
+{
+	int i;
+
+	xe_unregister_pci_driver();
+
+	for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--)
+		init_funcs[i].exit();
+}
+
+module_init(xe_init);
+module_exit(xe_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
new file mode 100644
index 000000000000..2c6ee46f5595
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/init.h>
+
+/* Module modprobe variables */
+extern bool enable_guc;
+extern bool enable_display;
+extern u32 xe_force_lmem_bar_size;
+extern int xe_guc_log_level;
+extern char *xe_param_force_probe;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
new file mode 100644
index 000000000000..55d8a597a068
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -0,0 +1,651 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_pci.h"
+
+#include <linux/device/driver.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_color_mgmt.h>
+#include <drm/xe_pciids.h>
+
+#include "xe_drv.h"
+#include "xe_device.h"
+#include "xe_macros.h"
+#include "xe_module.h"
+#include "xe_pm.h"
+#include "xe_step.h"
+
+#include "i915_reg.h"
+
+#define DEV_INFO_FOR_EACH_FLAG(func) \
+	func(require_force_probe); \
+	func(is_dgfx); \
+	/* Keep has_* in alphabetical order */ \
+
+struct xe_subplatform_desc {
+	enum xe_subplatform subplatform;
+	const char *name;
+	const u16 *pciidlist;
+};
+
+struct xe_gt_desc {
+	enum xe_gt_type type;
+	u8 vram_id;
+	u64 engine_mask;
+	u32 mmio_adj_limit;
+	u32 mmio_adj_offset;
+};
+
+struct xe_device_desc {
+	u8 graphics_ver;
+	u8 graphics_rel;
+	u8 media_ver;
+	u8 media_rel;
+
+	u64 platform_engine_mask; /* Engines supported by the HW */
+
+	enum xe_platform platform;
+	const char *platform_name;
+	const struct xe_subplatform_desc *subplatforms;
+	const struct xe_gt_desc *extra_gts;
+
+	u8 dma_mask_size; /* available DMA address bits */
+
+	u8 gt; /* GT number, 0 if undefined */
+
+#define DEFINE_FLAG(name) u8 name:1
+	DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG);
+#undef DEFINE_FLAG
+
+	u8 vram_flags;
+	u8 max_tiles;
+	u8 vm_max_level;
+
+	bool supports_usm;
+	bool has_flat_ccs;
+	bool has_4tile;
+};
+
+#define PLATFORM(x)		\
+	.platform = (x),	\
+	.platform_name = #x
+
+#define NOP(x)	x
+
+/* Keep in gen based order, and chronological order within a gen */
+#define GEN12_FEATURES \
+	.require_force_probe = true, \
+	.graphics_ver = 12, \
+	.media_ver = 12, \
+	.dma_mask_size = 39, \
+	.max_tiles = 1, \
+	.vm_max_level = 3, \
+	.vram_flags = 0
+
+static const struct xe_device_desc tgl_desc = {
+	GEN12_FEATURES,
+	PLATFORM(XE_TIGERLAKE),
+	.platform_engine_mask =
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
+		BIT(XE_HW_ENGINE_VCS2),
+};
+
+static const struct xe_device_desc adl_s_desc = {
+	GEN12_FEATURES,
+	PLATFORM(XE_ALDERLAKE_S),
+	.platform_engine_mask =
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
+		BIT(XE_HW_ENGINE_VCS2),
+};
+
+static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 };
+
+static const struct xe_device_desc adl_p_desc = {
+	GEN12_FEATURES,
+	PLATFORM(XE_ALDERLAKE_P),
+	.platform_engine_mask =
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
+		BIT(XE_HW_ENGINE_VCS2),
+	.subplatforms = (const struct xe_subplatform_desc[]) {
+		{ XE_SUBPLATFORM_ADLP_RPLU, "RPLU", adlp_rplu_ids },
+		{},
+	},
+};
+
+#define DGFX_FEATURES \
+	.is_dgfx = 1
+
+static const struct xe_device_desc dg1_desc = {
+	GEN12_FEATURES,
+	DGFX_FEATURES,
+	.graphics_rel = 10,
+	PLATFORM(XE_DG1),
+	.platform_engine_mask =
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
+		BIT(XE_HW_ENGINE_VCS2),
+};
+
+#define XE_HP_FEATURES \
+	.require_force_probe = true, \
+	.graphics_ver = 12, \
+	.graphics_rel = 50, \
+	.has_flat_ccs = true, \
+	.dma_mask_size = 46, \
+	.max_tiles = 1, \
+	.vm_max_level = 3
+
+#define XE_HPM_FEATURES \
+	.media_ver = 12, \
+	.media_rel = 50
+
+static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 };
+static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 };
+static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
+
+#define DG2_FEATURES \
+	DGFX_FEATURES, \
+	.graphics_rel = 55, \
+	.media_rel = 55, \
+	PLATFORM(XE_DG2), \
+	.subplatforms = (const struct xe_subplatform_desc[]) { \
+		{ XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \
+		{ XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \
+		{ XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \
+		{ } \
+	}, \
+	.platform_engine_mask = \
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1) | \
+		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \
+		BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \
+		BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), \
+	.require_force_probe = true, \
+	.vram_flags = XE_VRAM_FLAGS_NEED64K, \
+	.has_4tile = 1
+
+static const struct xe_device_desc ats_m_desc = {
+	XE_HP_FEATURES,
+	XE_HPM_FEATURES,
+
+	DG2_FEATURES,
+};
+
+static const struct xe_device_desc dg2_desc = {
+	XE_HP_FEATURES,
+	XE_HPM_FEATURES,
+
+	DG2_FEATURES,
+};
+
+#define PVC_ENGINES \
+	BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_BCS1) | \
+	BIT(XE_HW_ENGINE_BCS2) | BIT(XE_HW_ENGINE_BCS3) | \
+	BIT(XE_HW_ENGINE_BCS4) | BIT(XE_HW_ENGINE_BCS5) | \
+	BIT(XE_HW_ENGINE_BCS6) | BIT(XE_HW_ENGINE_BCS7) | \
+	BIT(XE_HW_ENGINE_BCS8) | \
+	BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS1) | \
+	BIT(XE_HW_ENGINE_VCS2) | \
+	BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \
+	BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3)
+
+static const struct xe_gt_desc pvc_gts[] = {
+	{
+		.type = XE_GT_TYPE_REMOTE,
+		.vram_id = 1,
+		.engine_mask = PVC_ENGINES,
+		.mmio_adj_limit = 0,
+		.mmio_adj_offset = 0,
+	},
+};
+
+static const __maybe_unused struct xe_device_desc pvc_desc = {
+	XE_HP_FEATURES,
+	XE_HPM_FEATURES,
+	DGFX_FEATURES,
+	PLATFORM(XE_PVC),
+	.extra_gts = pvc_gts,
+	.graphics_rel = 60,
+	.has_flat_ccs = 0,
+	.media_rel = 60,
+	.platform_engine_mask = PVC_ENGINES,
+	.vram_flags = XE_VRAM_FLAGS_NEED64K,
+	.dma_mask_size = 52,
+	.max_tiles = 2,
+	.vm_max_level = 4,
+	.supports_usm = true,
+};
+
+#define MTL_MEDIA_ENGINES \
+	BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \
+	BIT(XE_HW_ENGINE_VECS0)	/* TODO: GSC0 */
+
+static const struct xe_gt_desc xelpmp_gts[] = {
+	{
+		.type = XE_GT_TYPE_MEDIA,
+		.vram_id = 0,
+		.engine_mask = MTL_MEDIA_ENGINES,
+		.mmio_adj_limit = 0x40000,
+		.mmio_adj_offset = 0x380000,
+	},
+};
+
+#define MTL_MAIN_ENGINES \
+	BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \
+	BIT(XE_HW_ENGINE_CCS0)
+
+static const struct xe_device_desc mtl_desc = {
+	/*
+	 * Real graphics IP version will be obtained from hardware GMD_ID
+	 * register.  Value provided here is just for sanity checking.
+	 */
+	.require_force_probe = true,
+	.graphics_ver = 12,
+	.graphics_rel = 70,
+	.dma_mask_size = 46,
+	.max_tiles = 2,
+	.vm_max_level = 3,
+	.media_ver = 13,
+	PLATFORM(XE_METEORLAKE),
+	.extra_gts = xelpmp_gts,
+	.platform_engine_mask = MTL_MAIN_ENGINES,
+};
+
+#undef PLATFORM
+
+#define INTEL_VGA_DEVICE(id, info) {			\
+	PCI_DEVICE(PCI_VENDOR_ID_INTEL, id),		\
+	PCI_BASE_CLASS_DISPLAY << 16, 0xff << 16,	\
+	(unsigned long) info }
+
+/*
+ * Make sure any device matches here are from most specific to most
+ * general.  For example, since the Quanta match is based on the subsystem
+ * and subvendor IDs, we need it to come before the more general IVB
+ * PCI ID matches, otherwise we'll use the wrong info struct above.
+ */
+static const struct pci_device_id pciidlist[] = {
+	XE_TGL_GT2_IDS(INTEL_VGA_DEVICE, &tgl_desc),
+	XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
+	XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
+	XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
+	XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
+	XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+	XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, pciidlist);
+
+#undef INTEL_VGA_DEVICE
+
+/* is device_id present in comma separated list of ids */
+static bool device_id_in_list(u16 device_id, const char *devices, bool negative)
+{
+	char *s, *p, *tok;
+	bool ret;
+
+	if (!devices || !*devices)
+		return false;
+
+	/* match everything */
+	if (negative && strcmp(devices, "!*") == 0)
+		return true;
+	if (!negative && strcmp(devices, "*") == 0)
+		return true;
+
+	s = kstrdup(devices, GFP_KERNEL);
+	if (!s)
+		return false;
+
+	for (p = s, ret = false; (tok = strsep(&p, ",")) != NULL; ) {
+		u16 val;
+
+		if (negative && tok[0] == '!')
+			tok++;
+		else if ((negative && tok[0] != '!') ||
+			 (!negative && tok[0] == '!'))
+			continue;
+
+		if (kstrtou16(tok, 16, &val) == 0 && val == device_id) {
+			ret = true;
+			break;
+		}
+	}
+
+	kfree(s);
+
+	return ret;
+}
+
+static bool id_forced(u16 device_id)
+{
+	return device_id_in_list(device_id, xe_param_force_probe, false);
+}
+
+static bool id_blocked(u16 device_id)
+{
+	return device_id_in_list(device_id, xe_param_force_probe, true);
+}
+
+static const struct xe_subplatform_desc *
+subplatform_get(const struct xe_device *xe, const struct xe_device_desc *desc)
+{
+	const struct xe_subplatform_desc *sp;
+	const u16 *id;
+
+	for (sp = desc->subplatforms; sp && sp->subplatform; sp++)
+		for (id = sp->pciidlist; *id; id++)
+			if (*id == xe->info.devid)
+				return sp;
+
+	return NULL;
+}
+
+static void xe_pci_remove(struct pci_dev *pdev)
+{
+	struct xe_device *xe;
+
+	xe = pci_get_drvdata(pdev);
+	if (!xe) /* driver load aborted, nothing to cleanup */
+		return;
+
+	xe_device_remove(xe);
+	pci_set_drvdata(pdev, NULL);
+}
+
+static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	const struct xe_device_desc *desc = (void *)ent->driver_data;
+	const struct xe_subplatform_desc *spd;
+	struct xe_device *xe;
+	struct xe_gt *gt;
+	u8 id;
+	int err;
+
+	if (desc->require_force_probe && !id_forced(pdev->device)) {
+		dev_info(&pdev->dev,
+			 "Your graphics device %04x is not officially supported\n"
+			 "by xe driver in this kernel version. To force Xe probe,\n"
+			 "use xe.force_probe='%04x' and i915.force_probe='!%04x'\n"
+			 "module parameters or CONFIG_DRM_XE_FORCE_PROBE='%04x' and\n"
+			 "CONFIG_DRM_I915_FORCE_PROBE='!%04x' configuration options.\n",
+			 pdev->device, pdev->device, pdev->device,
+			 pdev->device, pdev->device);
+		return -ENODEV;
+	}
+
+	if (id_blocked(pdev->device)) {
+		dev_info(&pdev->dev, "Probe blocked for device [%04x:%04x].\n",
+			 pdev->vendor, pdev->device);
+		return -ENODEV;
+	}
+
+	xe = xe_device_create(pdev, ent);
+	if (IS_ERR(xe))
+		return PTR_ERR(xe);
+
+	xe->info.graphics_verx100 = desc->graphics_ver * 100 +
+				    desc->graphics_rel;
+	xe->info.media_verx100 = desc->media_ver * 100 +
+				 desc->media_rel;
+	xe->info.is_dgfx = desc->is_dgfx;
+	xe->info.platform = desc->platform;
+	xe->info.dma_mask_size = desc->dma_mask_size;
+	xe->info.vram_flags = desc->vram_flags;
+	xe->info.tile_count = desc->max_tiles;
+	xe->info.vm_max_level = desc->vm_max_level;
+	xe->info.media_ver = desc->media_ver;
+	xe->info.supports_usm = desc->supports_usm;
+	xe->info.has_flat_ccs = desc->has_flat_ccs;
+	xe->info.has_4tile = desc->has_4tile;
+
+	spd = subplatform_get(xe, desc);
+	xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE;
+	xe->info.step = xe_step_get(xe);
+
+	for (id = 0; id < xe->info.tile_count; ++id) {
+		gt = xe->gt + id;
+		gt->info.id = id;
+		gt->xe = xe;
+
+		if (id == 0) {
+			gt->info.type = XE_GT_TYPE_MAIN;
+			gt->info.vram_id = id;
+			gt->info.engine_mask = desc->platform_engine_mask;
+			gt->mmio.adj_limit = 0;
+			gt->mmio.adj_offset = 0;
+		} else {
+			gt->info.type = desc->extra_gts[id - 1].type;
+			gt->info.vram_id = desc->extra_gts[id - 1].vram_id;
+			gt->info.engine_mask =
+				desc->extra_gts[id - 1].engine_mask;
+			gt->mmio.adj_limit =
+				desc->extra_gts[id - 1].mmio_adj_limit;
+			gt->mmio.adj_offset =
+				desc->extra_gts[id - 1].mmio_adj_offset;
+		}
+	}
+
+	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx100:%d media100:%d dma_m_s:%d tc:%d",
+		desc->platform_name, spd ? spd->name : "",
+		xe->info.devid, xe->info.revid,
+		xe->info.is_dgfx, xe->info.graphics_verx100,
+		xe->info.media_verx100,
+		xe->info.dma_mask_size, xe->info.tile_count);
+
+	drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n",
+		xe_step_name(xe->info.step.graphics),
+		xe_step_name(xe->info.step.media),
+		xe_step_name(xe->info.step.display),
+		xe_step_name(xe->info.step.basedie));
+
+	pci_set_drvdata(pdev, xe);
+	err = pci_enable_device(pdev);
+	if (err) {
+		drm_dev_put(&xe->drm);
+		return err;
+	}
+
+	pci_set_master(pdev);
+
+	if (pci_enable_msi(pdev) < 0)
+		drm_dbg(&xe->drm, "can't enable MSI");
+
+	err = xe_device_probe(xe);
+	if (err) {
+		pci_disable_device(pdev);
+		return err;
+	}
+
+	xe_pm_runtime_init(xe);
+
+	return 0;
+}
+
+static void xe_pci_shutdown(struct pci_dev *pdev)
+{
+	xe_device_shutdown(pdev_to_xe_device(pdev));
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int xe_pci_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int err;
+
+	err = xe_pm_suspend(pdev_to_xe_device(pdev));
+	if (err)
+		return err;
+
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+
+	err = pci_set_power_state(pdev, PCI_D3hot);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int xe_pci_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int err;
+
+	err = pci_set_power_state(pdev, PCI_D0);
+	if (err)
+		return err;
+
+	pci_restore_state(pdev);
+
+	err = pci_enable_device(pdev);
+	if (err)
+		return err;
+
+	pci_set_master(pdev);
+
+	err = xe_pm_resume(pdev_to_xe_device(pdev));
+	if (err)
+		return err;
+
+	return 0;
+}
+#endif
+
+static int xe_pci_runtime_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	int err;
+
+	err = xe_pm_runtime_suspend(xe);
+	if (err)
+		return err;
+
+	pci_save_state(pdev);
+
+	if (xe->d3cold_allowed) {
+		pci_disable_device(pdev);
+		pci_ignore_hotplug(pdev);
+		pci_set_power_state(pdev, PCI_D3cold);
+	} else {
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+
+	return 0;
+}
+
+static int xe_pci_runtime_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	int err;
+
+	err = pci_set_power_state(pdev, PCI_D0);
+	if (err)
+		return err;
+
+	pci_restore_state(pdev);
+
+	if (xe->d3cold_allowed) {
+		err = pci_enable_device(pdev);
+		if (err)
+			return err;
+
+		pci_set_master(pdev);
+	}
+
+	return xe_pm_runtime_resume(xe);
+}
+
+static int xe_pci_runtime_idle(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+
+	/*
+	 * FIXME: d3cold should be allowed (true) if
+	 * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe))
+	 * however the change to the buddy allocator broke the
+	 * xe_bo_restore_kernel when the pci device is disabled
+	 */
+	 xe->d3cold_allowed = false;
+
+	return 0;
+}
+
+static const struct dev_pm_ops xe_pm_ops = {
+	.suspend = xe_pci_suspend,
+	.resume = xe_pci_resume,
+	.freeze = xe_pci_suspend,
+	.thaw = xe_pci_resume,
+	.poweroff = xe_pci_suspend,
+	.restore = xe_pci_resume,
+	.runtime_suspend = xe_pci_runtime_suspend,
+	.runtime_resume = xe_pci_runtime_resume,
+	.runtime_idle = xe_pci_runtime_idle,
+};
+
+static struct pci_driver xe_pci_driver = {
+	.name = DRIVER_NAME,
+	.id_table = pciidlist,
+	.probe = xe_pci_probe,
+	.remove = xe_pci_remove,
+	.shutdown = xe_pci_shutdown,
+	.driver.pm = &xe_pm_ops,
+};
+
+int xe_register_pci_driver(void)
+{
+	return pci_register_driver(&xe_pci_driver);
+}
+
+void xe_unregister_pci_driver(void)
+{
+	pci_unregister_driver(&xe_pci_driver);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+static int dev_to_xe_device_fn(struct device *dev, void *data)
+
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	int (*xe_fn)(struct xe_device *xe) = data;
+	int ret = 0;
+	int idx;
+
+	if (drm_dev_enter(drm, &idx))
+		ret = xe_fn(to_xe_device(dev_get_drvdata(dev)));
+	drm_dev_exit(idx);
+
+	return ret;
+}
+
+/**
+ * xe_call_for_each_device - Iterate over all devices this driver binds to
+ * @xe_fn: Function to call for each device.
+ *
+ * This function iterated over all devices this driver binds to, and calls
+ * @xe_fn: for each one of them. If the called function returns anything else
+ * than 0, iteration is stopped and the return value is returned by this
+ * function. Across each function call, drm_dev_enter() / drm_dev_exit() is
+ * called for the corresponding drm device.
+ *
+ * Return: Zero or the error code of a call to @xe_fn returning an error
+ * code.
+ */
+int xe_call_for_each_device(xe_device_fn xe_fn)
+{
+	return driver_for_each_device(&xe_pci_driver.driver, NULL,
+				      xe_fn, dev_to_xe_device_fn);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
new file mode 100644
index 000000000000..9e3089549d5f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_PCI_H_
+#define _XE_PCI_H_
+
+#include "tests/xe_test.h"
+
+int xe_register_pci_driver(void);
+void xe_unregister_pci_driver(void);
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+struct xe_device;
+
+typedef int (*xe_device_fn)(struct xe_device *);
+
+int xe_call_for_each_device(xe_device_fn xe_fn);
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
new file mode 100644
index 000000000000..236159c8a6c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_pcode_api.h"
+#include "xe_pcode.h"
+
+#include "xe_gt.h"
+#include "xe_mmio.h"
+
+#include <linux/errno.h>
+
+/**
+ * DOC: PCODE
+ *
+ * Xe PCODE is the component responsible for interfacing with the PCODE
+ * firmware.
+ * It shall provide a very simple ABI to other Xe components, but be the
+ * single and consolidated place that will communicate with PCODE. All read
+ * and write operations to PCODE will be internal and private to this component.
+ *
+ * What's next:
+ * - PCODE hw metrics
+ * - PCODE for display operations
+ */
+
+static int pcode_mailbox_status(struct xe_gt *gt)
+{
+	u32 err;
+	static const struct pcode_err_decode err_decode[] = {
+		[PCODE_ILLEGAL_CMD] = {-ENXIO, "Illegal Command"},
+		[PCODE_TIMEOUT] = {-ETIMEDOUT, "Timed out"},
+		[PCODE_ILLEGAL_DATA] = {-EINVAL, "Illegal Data"},
+		[PCODE_ILLEGAL_SUBCOMMAND] = {-ENXIO, "Illegal Subcommand"},
+		[PCODE_LOCKED] = {-EBUSY, "PCODE Locked"},
+		[PCODE_GT_RATIO_OUT_OF_RANGE] = {-EOVERFLOW,
+			"GT ratio out of range"},
+		[PCODE_REJECTED] = {-EACCES, "PCODE Rejected"},
+		[PCODE_ERROR_MASK] = {-EPROTO, "Unknown"},
+	};
+
+	lockdep_assert_held(&gt->pcode.lock);
+
+	err = xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_ERROR_MASK;
+	if (err) {
+		drm_err(&gt_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err,
+			err_decode[err].str ?: "Unknown");
+		return err_decode[err].errno ?: -EPROTO;
+	}
+
+	return 0;
+}
+
+static bool pcode_mailbox_done(struct xe_gt *gt)
+{
+	lockdep_assert_held(&gt->pcode.lock);
+	return (xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_READY) == 0;
+}
+
+static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
+			    unsigned int timeout, bool return_data, bool atomic)
+{
+	lockdep_assert_held(&gt->pcode.lock);
+
+	if (!pcode_mailbox_done(gt))
+		return -EAGAIN;
+
+	xe_mmio_write32(gt, PCODE_DATA0.reg, *data0);
+	xe_mmio_write32(gt, PCODE_DATA1.reg, data1 ? *data1 : 0);
+	xe_mmio_write32(gt, PCODE_MAILBOX.reg, PCODE_READY | mbox);
+
+	if (atomic)
+		_wait_for_atomic(pcode_mailbox_done(gt), timeout * 1000, 1);
+	else
+		wait_for(pcode_mailbox_done(gt), timeout);
+
+	if (return_data) {
+		*data0 = xe_mmio_read32(gt, PCODE_DATA0.reg);
+		if (data1)
+			*data1 = xe_mmio_read32(gt, PCODE_DATA1.reg);
+	}
+
+	return pcode_mailbox_status(gt);
+}
+
+int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout)
+{
+	int err;
+
+	mutex_lock(&gt->pcode.lock);
+	err = pcode_mailbox_rw(gt, mbox, &data, NULL, timeout, false, false);
+	mutex_unlock(&gt->pcode.lock);
+
+	return err;
+}
+
+int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1)
+{
+	int err;
+
+	mutex_lock(&gt->pcode.lock);
+	err = pcode_mailbox_rw(gt, mbox, val, val1, 1, true, false);
+	mutex_unlock(&gt->pcode.lock);
+
+	return err;
+}
+
+static bool xe_pcode_try_request(struct xe_gt *gt, u32 mbox,
+				  u32 request, u32 reply_mask, u32 reply,
+				  u32 *status, bool atomic)
+{
+	*status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, atomic);
+
+	return (*status == 0) && ((request & reply_mask) == reply);
+}
+
+/**
+ * xe_pcode_request - send PCODE request until acknowledgment
+ * @gt: gt
+ * @mbox: PCODE mailbox ID the request is targeted for
+ * @request: request ID
+ * @reply_mask: mask used to check for request acknowledgment
+ * @reply: value used to check for request acknowledgment
+ * @timeout_base_ms: timeout for polling with preemption enabled
+ *
+ * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
+ * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
+ * The request is acknowledged once the PCODE reply dword equals @reply after
+ * applying @reply_mask. Polling is first attempted with preemption enabled
+ * for @timeout_base_ms and if this times out for another 50 ms with
+ * preemption disabled.
+ *
+ * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
+ * other error as reported by PCODE.
+ */
+int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
+		      u32 reply_mask, u32 reply, int timeout_base_ms)
+{
+	u32 status;
+	int ret;
+	bool atomic = false;
+
+	mutex_lock(&gt->pcode.lock);
+
+#define COND \
+	xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, atomic)
+
+	/*
+	 * Prime the PCODE by doing a request first. Normally it guarantees
+	 * that a subsequent request, at most @timeout_base_ms later, succeeds.
+	 * _wait_for() doesn't guarantee when its passed condition is evaluated
+	 * first, so send the first request explicitly.
+	 */
+	if (COND) {
+		ret = 0;
+		goto out;
+	}
+	ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
+	if (!ret)
+		goto out;
+
+	/*
+	 * The above can time out if the number of requests was low (2 in the
+	 * worst case) _and_ PCODE was busy for some reason even after a
+	 * (queued) request and @timeout_base_ms delay. As a workaround retry
+	 * the poll with preemption disabled to maximize the number of
+	 * requests. Increase the timeout from @timeout_base_ms to 50ms to
+	 * account for interrupts that could reduce the number of these
+	 * requests, and for any quirks of the PCODE firmware that delays
+	 * the request completion.
+	 */
+	drm_err(&gt_to_xe(gt)->drm,
+		"PCODE timeout, retrying with preemption disabled\n");
+	drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1);
+	preempt_disable();
+	atomic = true;
+	ret = wait_for_atomic(COND, 50);
+	atomic = false;
+	preempt_enable();
+
+out:
+	mutex_unlock(&gt->pcode.lock);
+	return status ? status : ret;
+#undef COND
+}
+/**
+ * xe_pcode_init_min_freq_table - Initialize PCODE's QOS frequency table
+ * @gt: gt instance
+ * @min_gt_freq: Minimal (RPn) GT frequency in units of 50MHz.
+ * @max_gt_freq: Maximal (RP0) GT frequency in units of 50MHz.
+ *
+ * This function initialize PCODE's QOS frequency table for a proper minimal
+ * frequency/power steering decision, depending on the current requested GT
+ * frequency. For older platforms this was a more complete table including
+ * the IA freq. However for the latest platforms this table become a simple
+ * 1-1 Ring vs GT frequency. Even though, without setting it, PCODE might
+ * not take the right decisions for some memory frequencies and affect latency.
+ *
+ * It returns 0 on success, and -ERROR number on failure, -EINVAL if max
+ * frequency is higher then the minimal, and other errors directly translated
+ * from the PCODE Error returs:
+ * - -ENXIO: "Illegal Command"
+ * - -ETIMEDOUT: "Timed out"
+ * - -EINVAL: "Illegal Data"
+ * - -ENXIO, "Illegal Subcommand"
+ * - -EBUSY: "PCODE Locked"
+ * - -EOVERFLOW, "GT ratio out of range"
+ * - -EACCES, "PCODE Rejected"
+ * - -EPROTO, "Unknown"
+ */
+int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
+				 u32 max_gt_freq)
+{
+	int ret;
+	u32 freq;
+
+	if (IS_DGFX(gt_to_xe(gt)))
+		return 0;
+
+	if (max_gt_freq <= min_gt_freq)
+		return -EINVAL;
+
+	mutex_lock(&gt->pcode.lock);
+	for (freq = min_gt_freq; freq <= max_gt_freq; freq++) {
+		u32 data = freq << PCODE_FREQ_RING_RATIO_SHIFT | freq;
+
+		ret = pcode_mailbox_rw(gt, PCODE_WRITE_MIN_FREQ_TABLE,
+				       &data, NULL, 1, false, false);
+		if (ret)
+			goto unlock;
+	}
+
+unlock:
+	mutex_unlock(&gt->pcode.lock);
+	return ret;
+}
+
+static bool pcode_dgfx_status_complete(struct xe_gt *gt)
+{
+	u32 data = DGFX_GET_INIT_STATUS;
+	int status = pcode_mailbox_rw(gt, DGFX_PCODE_STATUS,
+				      &data, NULL, 1, true, false);
+
+	return status == 0 &&
+		(data & DGFX_INIT_STATUS_COMPLETE) == DGFX_INIT_STATUS_COMPLETE;
+}
+
+/**
+ * xe_pcode_init - Ensure PCODE is initialized
+ * @gt: gt instance
+ *
+ * This function ensures that PCODE is properly initialized. To be called during
+ * probe and resume paths.
+ *
+ * It returns 0 on success, and -error number on failure.
+ */
+int xe_pcode_init(struct xe_gt *gt)
+{
+	int timeout = 180000; /* 3 min */
+	int ret;
+
+	if (!IS_DGFX(gt_to_xe(gt)))
+		return 0;
+
+	mutex_lock(&gt->pcode.lock);
+	ret = wait_for(pcode_dgfx_status_complete(gt), timeout);
+	mutex_unlock(&gt->pcode.lock);
+
+	if (ret)
+		drm_err(&gt_to_xe(gt)->drm,
+			"PCODE initialization timedout after: %d min\n",
+			timeout / 60000);
+
+	return ret;
+}
+
+/**
+ * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized.
+ * @gt: gt instance
+ *
+ * This function initializes the xe_pcode component, and when needed, it ensures
+ * that PCODE has properly performed its initialization and it is really ready
+ * to go. To be called once only during probe.
+ *
+ * It returns 0 on success, and -error number on failure.
+ */
+int xe_pcode_probe(struct xe_gt *gt)
+{
+	mutex_init(&gt->pcode.lock);
+
+	if (!IS_DGFX(gt_to_xe(gt)))
+		return 0;
+
+	return xe_pcode_init(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h
new file mode 100644
index 000000000000..3b4aa8c1a3ba
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PCODE_H_
+#define _XE_PCODE_H_
+
+#include <linux/types.h>
+struct xe_gt;
+
+int xe_pcode_probe(struct xe_gt *gt);
+int xe_pcode_init(struct xe_gt *gt);
+int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
+				 u32 max_gt_freq);
+int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1);
+int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 val,
+			   int timeout_ms);
+#define xe_pcode_write(gt, mbox, val) \
+	xe_pcode_write_timeout(gt, mbox, val, 1)
+
+int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
+		     u32 reply_mask, u32 reply, int timeout_ms);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
new file mode 100644
index 000000000000..0762c8a912c7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+/* Internal to xe_pcode */
+
+#define PCODE_MAILBOX			_MMIO(0x138124)
+#define   PCODE_READY			REG_BIT(31)
+#define   PCODE_MB_PARAM2		REG_GENMASK(23, 16)
+#define   PCODE_MB_PARAM1		REG_GENMASK(15, 8)
+#define   PCODE_MB_COMMAND		REG_GENMASK(7, 0)
+#define   PCODE_ERROR_MASK		0xFF
+#define     PCODE_SUCCESS		0x0
+#define     PCODE_ILLEGAL_CMD		0x1
+#define     PCODE_TIMEOUT		0x2
+#define     PCODE_ILLEGAL_DATA		0x3
+#define     PCODE_ILLEGAL_SUBCOMMAND	0x4
+#define     PCODE_LOCKED		0x6
+#define     PCODE_GT_RATIO_OUT_OF_RANGE	0x10
+#define     PCODE_REJECTED		0x11
+
+#define PCODE_DATA0			_MMIO(0x138128)
+#define PCODE_DATA1			_MMIO(0x13812C)
+
+/* Min Freq QOS Table */
+#define   PCODE_WRITE_MIN_FREQ_TABLE	0x8
+#define   PCODE_READ_MIN_FREQ_TABLE	0x9
+#define   PCODE_FREQ_RING_RATIO_SHIFT	16
+
+/* PCODE Init */
+#define   DGFX_PCODE_STATUS		0x7E
+#define     DGFX_GET_INIT_STATUS	0x0
+#define     DGFX_INIT_STATUS_COMPLETE	0x1
+
+struct pcode_err_decode {
+	int errno;
+	const char *str;
+};
+
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
new file mode 100644
index 000000000000..72612c832e88
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PLATFORM_INFO_TYPES_H_
+#define _XE_PLATFORM_INFO_TYPES_H_
+
+/* Keep in gen based order, and chronological order within a gen */
+enum xe_platform {
+	XE_PLATFORM_UNINITIALIZED = 0,
+	/* gen12 */
+	XE_TIGERLAKE,
+	XE_ROCKETLAKE,
+	XE_DG1,
+	XE_DG2,
+	XE_PVC,
+	XE_ALDERLAKE_S,
+	XE_ALDERLAKE_P,
+	XE_METEORLAKE,
+};
+
+enum xe_subplatform {
+	XE_SUBPLATFORM_UNINITIALIZED = 0,
+	XE_SUBPLATFORM_NONE,
+	XE_SUBPLATFORM_DG2_G10,
+	XE_SUBPLATFORM_DG2_G11,
+	XE_SUBPLATFORM_DG2_G12,
+	XE_SUBPLATFORM_ADLP_RPLU,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
new file mode 100644
index 000000000000..fb0355530e7b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/pm_runtime.h>
+
+#include <drm/ttm/ttm_placement.h>
+
+#include "xe_bo.h"
+#include "xe_bo_evict.h"
+#include "xe_device.h"
+#include "xe_pm.h"
+#include "xe_gt.h"
+#include "xe_ggtt.h"
+#include "xe_irq.h"
+#include "xe_pcode.h"
+
+/**
+ * DOC: Xe Power Management
+ *
+ * Xe PM shall be guided by the simplicity.
+ * Use the simplest hook options whenever possible.
+ * Let's not reinvent the runtime_pm references and hooks.
+ * Shall have a clear separation of display and gt underneath this component.
+ *
+ * What's next:
+ *
+ * For now s2idle and s3 are only working in integrated devices. The next step
+ * is to iterate through all VRAM's BO backing them up into the system memory
+ * before allowing the system suspend.
+ *
+ * Also runtime_pm needs to be here from the beginning.
+ *
+ * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC
+ * and no wait boost. Frequency optimizations should come on a next stage.
+ */
+
+/**
+ * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
+ * @xe: xe device instance
+ *
+ * Return: 0 on success
+ */
+int xe_pm_suspend(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+	int err;
+
+	for_each_gt(gt, xe, id)
+		xe_gt_suspend_prepare(gt);
+
+	/* FIXME: Super racey... */
+	err = xe_bo_evict_all(xe);
+	if (err)
+		return err;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_suspend(gt);
+		if (err)
+			return err;
+	}
+
+	xe_irq_suspend(xe);
+
+	return 0;
+}
+
+/**
+ * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
+ * @xe: xe device instance
+ *
+ * Return: 0 on success
+ */
+int xe_pm_resume(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+	int err;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_pcode_init(gt);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * This only restores pinned memory which is the memory required for the
+	 * GT(s) to resume.
+	 */
+	err = xe_bo_restore_kernel(xe);
+	if (err)
+		return err;
+
+	xe_irq_resume(xe);
+
+	for_each_gt(gt, xe, id)
+		xe_gt_resume(gt);
+
+	err = xe_bo_restore_user(xe);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+void xe_pm_runtime_init(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, 1000);
+	pm_runtime_set_active(dev);
+	pm_runtime_allow(dev);
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put_autosuspend(dev);
+}
+
+int xe_pm_runtime_suspend(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+	int err;
+
+	if (xe->d3cold_allowed) {
+		if (xe_device_mem_access_ongoing(xe))
+			return -EBUSY;
+
+		err = xe_bo_evict_all(xe);
+		if (err)
+			return err;
+	}
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_suspend(gt);
+		if (err)
+			return err;
+	}
+
+	xe_irq_suspend(xe);
+
+	return 0;
+}
+
+int xe_pm_runtime_resume(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+	int err;
+
+	if (xe->d3cold_allowed) {
+		for_each_gt(gt, xe, id) {
+			err = xe_pcode_init(gt);
+			if (err)
+				return err;
+		}
+
+		/*
+		 * This only restores pinned memory which is the memory
+		 * required for the GT(s) to resume.
+		 */
+		err = xe_bo_restore_kernel(xe);
+		if (err)
+			return err;
+	}
+
+	xe_irq_resume(xe);
+
+	for_each_gt(gt, xe, id)
+		xe_gt_resume(gt);
+
+	if (xe->d3cold_allowed) {
+		err = xe_bo_restore_user(xe);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+int xe_pm_runtime_get(struct xe_device *xe)
+{
+	return pm_runtime_get_sync(xe->drm.dev);
+}
+
+int xe_pm_runtime_put(struct xe_device *xe)
+{
+	pm_runtime_mark_last_busy(xe->drm.dev);
+	return pm_runtime_put_autosuspend(xe->drm.dev);
+}
+
+/* Return true if resume operation happened and usage count was increased */
+bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe)
+{
+	/* In case we are suspended we need to immediately wake up */
+	if (pm_runtime_suspended(xe->drm.dev))
+		return !pm_runtime_resume_and_get(xe->drm.dev);
+
+	return false;
+}
+
+int xe_pm_runtime_get_if_active(struct xe_device *xe)
+{
+	WARN_ON(pm_runtime_suspended(xe->drm.dev));
+	return pm_runtime_get_if_active(xe->drm.dev, true);
+}
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
new file mode 100644
index 000000000000..b8c5f9558e26
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PM_H_
+#define _XE_PM_H_
+
+#include <linux/pm_runtime.h>
+
+struct xe_device;
+
+int xe_pm_suspend(struct xe_device *xe);
+int xe_pm_resume(struct xe_device *xe);
+
+void xe_pm_runtime_init(struct xe_device *xe);
+int xe_pm_runtime_suspend(struct xe_device *xe);
+int xe_pm_runtime_resume(struct xe_device *xe);
+int xe_pm_runtime_get(struct xe_device *xe);
+int xe_pm_runtime_put(struct xe_device *xe);
+bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe);
+int xe_pm_runtime_get_if_active(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
new file mode 100644
index 000000000000..6ab9ff442766
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/slab.h>
+
+#include "xe_engine.h"
+#include "xe_preempt_fence.h"
+#include "xe_vm.h"
+
+static void preempt_fence_work_func(struct work_struct *w)
+{
+	bool cookie = dma_fence_begin_signalling();
+	struct xe_preempt_fence *pfence =
+		container_of(w, typeof(*pfence), preempt_work);
+	struct xe_engine *e = pfence->engine;
+
+	if (pfence->error)
+		dma_fence_set_error(&pfence->base, pfence->error);
+	else
+		e->ops->suspend_wait(e);
+
+	dma_fence_signal(&pfence->base);
+	dma_fence_end_signalling(cookie);
+
+	queue_work(system_unbound_wq, &e->vm->preempt.rebind_work);
+
+	xe_engine_put(e);
+}
+
+static const char *
+preempt_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "xe";
+}
+
+static const char *
+preempt_fence_get_timeline_name(struct dma_fence *fence)
+{
+	return "preempt";
+}
+
+static bool preempt_fence_enable_signaling(struct dma_fence *fence)
+{
+	struct xe_preempt_fence *pfence =
+		container_of(fence, typeof(*pfence), base);
+	struct xe_engine *e = pfence->engine;
+
+	pfence->error = e->ops->suspend(e);
+	queue_work(system_unbound_wq, &pfence->preempt_work);
+	return true;
+}
+
+static const struct dma_fence_ops preempt_fence_ops = {
+	.get_driver_name = preempt_fence_get_driver_name,
+	.get_timeline_name = preempt_fence_get_timeline_name,
+	.enable_signaling = preempt_fence_enable_signaling,
+};
+
+/**
+ * xe_preempt_fence_alloc() - Allocate a preempt fence with minimal
+ * initialization
+ *
+ * Allocate a preempt fence, and initialize its list head.
+ * If the preempt_fence allocated has been armed with
+ * xe_preempt_fence_arm(), it must be freed using dma_fence_put(). If not,
+ * it must be freed using xe_preempt_fence_free().
+ *
+ * Return: A struct xe_preempt_fence pointer used for calling into
+ * xe_preempt_fence_arm() or xe_preempt_fence_free().
+ * An error pointer on error.
+ */
+struct xe_preempt_fence *xe_preempt_fence_alloc(void)
+{
+	struct xe_preempt_fence *pfence;
+
+	pfence = kmalloc(sizeof(*pfence), GFP_KERNEL);
+	if (!pfence)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&pfence->link);
+	INIT_WORK(&pfence->preempt_work, preempt_fence_work_func);
+
+	return pfence;
+}
+
+/**
+ * xe_preempt_fence_free() - Free a preempt fence allocated using
+ * xe_preempt_fence_alloc().
+ * @pfence: pointer obtained from xe_preempt_fence_alloc();
+ *
+ * Free a preempt fence that has not yet been armed.
+ */
+void xe_preempt_fence_free(struct xe_preempt_fence *pfence)
+{
+	list_del(&pfence->link);
+	kfree(pfence);
+}
+
+/**
+ * xe_preempt_fence_arm() - Arm a preempt fence allocated using
+ * xe_preempt_fence_alloc().
+ * @pfence: The struct xe_preempt_fence pointer returned from
+ *          xe_preempt_fence_alloc().
+ * @e: The struct xe_engine used for arming.
+ * @context: The dma-fence context used for arming.
+ * @seqno: The dma-fence seqno used for arming.
+ *
+ * Inserts the preempt fence into @context's timeline, takes @link off any
+ * list, and registers the struct xe_engine as the xe_engine to be preempted.
+ *
+ * Return: A pointer to a struct dma_fence embedded into the preempt fence.
+ * This function doesn't error.
+ */
+struct dma_fence *
+xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e,
+		     u64 context, u32 seqno)
+{
+	list_del_init(&pfence->link);
+	pfence->engine = xe_engine_get(e);
+	dma_fence_init(&pfence->base, &preempt_fence_ops,
+		      &e->compute.lock, context, seqno);
+
+	return &pfence->base;
+}
+
+/**
+ * xe_preempt_fence_create() - Helper to create and arm a preempt fence.
+ * @e: The struct xe_engine used for arming.
+ * @context: The dma-fence context used for arming.
+ * @seqno: The dma-fence seqno used for arming.
+ *
+ * Allocates and inserts the preempt fence into @context's timeline,
+ * and registers @e as the struct xe_engine to be preempted.
+ *
+ * Return: A pointer to the resulting struct dma_fence on success. An error
+ * pointer on error. In particular if allocation fails it returns
+ * ERR_PTR(-ENOMEM);
+ */
+struct dma_fence *
+xe_preempt_fence_create(struct xe_engine *e,
+			u64 context, u32 seqno)
+{
+	struct xe_preempt_fence *pfence;
+
+	pfence = xe_preempt_fence_alloc();
+	if (IS_ERR(pfence))
+		return ERR_CAST(pfence);
+
+	return xe_preempt_fence_arm(pfence, e, context, seqno);
+}
+
+bool xe_fence_is_xe_preempt(const struct dma_fence *fence)
+{
+	return fence->ops == &preempt_fence_ops;
+}
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.h b/drivers/gpu/drm/xe/xe_preempt_fence.h
new file mode 100644
index 000000000000..4f3966103203
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PREEMPT_FENCE_H_
+#define _XE_PREEMPT_FENCE_H_
+
+#include "xe_preempt_fence_types.h"
+
+struct list_head;
+
+struct dma_fence *
+xe_preempt_fence_create(struct xe_engine *e,
+			u64 context, u32 seqno);
+
+struct xe_preempt_fence *xe_preempt_fence_alloc(void);
+
+void xe_preempt_fence_free(struct xe_preempt_fence *pfence);
+
+struct dma_fence *
+xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e,
+		     u64 context, u32 seqno);
+
+static inline struct xe_preempt_fence *
+to_preempt_fence(struct dma_fence *fence)
+{
+	return container_of(fence, struct xe_preempt_fence, base);
+}
+
+/**
+ * xe_preempt_fence_link() - Return a link used to keep unarmed preempt
+ * fences on a list.
+ * @pfence: Pointer to the preempt fence.
+ *
+ * The link is embedded in the struct xe_preempt_fence. Use
+ * link_to_preempt_fence() to convert back to the preempt fence.
+ *
+ * Return: A pointer to an embedded struct list_head.
+ */
+static inline struct list_head *
+xe_preempt_fence_link(struct xe_preempt_fence *pfence)
+{
+	return &pfence->link;
+}
+
+/**
+ * to_preempt_fence_from_link() - Convert back to a preempt fence pointer
+ * from a link obtained with xe_preempt_fence_link().
+ * @link: The struct list_head obtained from xe_preempt_fence_link().
+ *
+ * Return: A pointer to the embedding struct xe_preempt_fence.
+ */
+static inline struct xe_preempt_fence *
+to_preempt_fence_from_link(struct list_head *link)
+{
+	return container_of(link, struct xe_preempt_fence, link);
+}
+
+bool xe_fence_is_xe_preempt(const struct dma_fence *fence);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
new file mode 100644
index 000000000000..9d9efd8ff0ed
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PREEMPT_FENCE_TYPES_H_
+#define _XE_PREEMPT_FENCE_TYPES_H_
+
+#include <linux/dma-fence.h>
+#include <linux/workqueue.h>
+
+struct xe_engine;
+
+/**
+ * struct xe_preempt_fence - XE preempt fence
+ *
+ * A preemption fence which suspends the execution of an xe_engine on the
+ * hardware and triggers a callback once the xe_engine is complete.
+ */
+struct xe_preempt_fence {
+	/** @base: dma fence base */
+	struct dma_fence base;
+	/** @link: link into list of pending preempt fences */
+	struct list_head link;
+	/** @engine: xe engine for this preempt fence */
+	struct xe_engine *engine;
+	/** @preempt_work: work struct which issues preemption */
+	struct work_struct preempt_work;
+	/** @error: preempt fence is in error state */
+	int error;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
new file mode 100644
index 000000000000..81193ddd0af7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -0,0 +1,1542 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_migrate.h"
+#include "xe_pt.h"
+#include "xe_pt_types.h"
+#include "xe_pt_walk.h"
+#include "xe_vm.h"
+#include "xe_res_cursor.h"
+
+struct xe_pt_dir {
+	struct xe_pt pt;
+	/** @dir: Directory structure for the xe_pt_walk functionality */
+	struct xe_ptw_dir dir;
+};
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr))
+#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr)
+#else
+#define xe_pt_set_addr(__xe_pt, __addr)
+#define xe_pt_addr(__xe_pt) 0ull
+#endif
+
+static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48};
+static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48};
+
+#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1)
+
+static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt)
+{
+	return container_of(pt, struct xe_pt_dir, pt);
+}
+
+static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
+{
+	return container_of(pt_dir->dir.entries[index], struct xe_pt, base);
+}
+
+/**
+ * gen8_pde_encode() - Encode a page-table directory entry pointing to
+ * another page-table.
+ * @bo: The page-table bo of the page-table to point to.
+ * @bo_offset: Offset in the page-table bo to point to.
+ * @level: The cache level indicating the caching of @bo.
+ *
+ * TODO: Rename.
+ *
+ * Return: An encoded page directory entry. No errors.
+ */
+u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset,
+		    const enum xe_cache_level level)
+{
+	u64 pde;
+	bool is_lmem;
+
+	pde = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem);
+	pde |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+	XE_WARN_ON(IS_DGFX(xe_bo_device(bo)) && !is_lmem);
+
+	/* FIXME: I don't think the PPAT handling is correct for MTL */
+
+	if (level != XE_CACHE_NONE)
+		pde |= PPAT_CACHED_PDE;
+	else
+		pde |= PPAT_UNCACHED;
+
+	return pde;
+}
+
+static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
+			   size_t page_size, bool *is_lmem)
+{
+	if (xe_vma_is_userptr(vma)) {
+		struct xe_res_cursor cur;
+		u64 page;
+
+		*is_lmem = false;
+		page = offset >> PAGE_SHIFT;
+		offset &= (PAGE_SIZE - 1);
+
+		xe_res_first_sg(vma->userptr.sg, page << PAGE_SHIFT, page_size,
+				&cur);
+		return xe_res_dma(&cur) + offset;
+	} else {
+		return xe_bo_addr(vma->bo, offset, page_size, is_lmem);
+	}
+}
+
+static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
+			     u32 pt_level)
+{
+	pte |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+	if (unlikely(flags & PTE_READ_ONLY))
+		pte &= ~GEN8_PAGE_RW;
+
+	/* FIXME: I don't think the PPAT handling is correct for MTL */
+
+	switch (cache) {
+	case XE_CACHE_NONE:
+		pte |= PPAT_UNCACHED;
+		break;
+	case XE_CACHE_WT:
+		pte |= PPAT_DISPLAY_ELLC;
+		break;
+	default:
+		pte |= PPAT_CACHED;
+		break;
+	}
+
+	if (pt_level == 1)
+		pte |= GEN8_PDE_PS_2M;
+	else if (pt_level == 2)
+		pte |= GEN8_PDPE_PS_1G;
+
+	/* XXX: Does hw support 1 GiB pages? */
+	XE_BUG_ON(pt_level > 2);
+
+	return pte;
+}
+
+/**
+ * gen8_pte_encode() - Encode a page-table entry pointing to memory.
+ * @vma: The vma representing the memory to point to.
+ * @bo: If @vma is NULL, representing the memory to point to.
+ * @offset: The offset into @vma or @bo.
+ * @cache: The cache level indicating
+ * @flags: Currently only supports PTE_READ_ONLY for read-only access.
+ * @pt_level: The page-table level of the page-table into which the entry
+ * is to be inserted.
+ *
+ * TODO: Rename.
+ *
+ * Return: An encoded page-table entry. No errors.
+ */
+u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
+		    u64 offset, enum xe_cache_level cache,
+		    u32 flags, u32 pt_level)
+{
+	u64 pte;
+	bool is_vram;
+
+	if (vma)
+		pte = vma_addr(vma, offset, GEN8_PAGE_SIZE, &is_vram);
+	else
+		pte = xe_bo_addr(bo, offset, GEN8_PAGE_SIZE, &is_vram);
+
+	if (is_vram) {
+		pte |= GEN12_PPGTT_PTE_LM;
+		if (vma && vma->use_atomic_access_pte_bit)
+			pte |= GEN12_USM_PPGTT_PTE_AE;
+	}
+
+	return __gen8_pte_encode(pte, cache, flags, pt_level);
+}
+
+static u64 __xe_pt_empty_pte(struct xe_gt *gt, struct xe_vm *vm,
+			     unsigned int level)
+{
+	u8 id = gt->info.id;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	if (!vm->scratch_bo[id])
+		return 0;
+
+	if (level == 0) {
+		u64 empty = gen8_pte_encode(NULL, vm->scratch_bo[id], 0,
+					    XE_CACHE_WB, 0, 0);
+		if (vm->flags & XE_VM_FLAGS_64K)
+			empty |= GEN12_PTE_PS64;
+
+		return empty;
+	} else {
+		return gen8_pde_encode(vm->scratch_pt[id][level - 1]->bo, 0,
+				       XE_CACHE_WB);
+	}
+}
+
+/**
+ * xe_pt_create() - Create a page-table.
+ * @vm: The vm to create for.
+ * @gt: The gt to create for.
+ * @level: The page-table level.
+ *
+ * Allocate and initialize a single struct xe_pt metadata structure. Also
+ * create the corresponding page-table bo, but don't initialize it. If the
+ * level is grater than zero, then it's assumed to be a directory page-
+ * table and the directory structure is also allocated and initialized to
+ * NULL pointers.
+ *
+ * Return: A valid struct xe_pt pointer on success, Pointer error code on
+ * error.
+ */
+struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt,
+			   unsigned int level)
+{
+	struct xe_pt *pt;
+	struct xe_bo *bo;
+	size_t size;
+	int err;
+
+	size = !level ?  sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) +
+		GEN8_PDES * sizeof(struct xe_ptw *);
+	pt = kzalloc(size, GFP_KERNEL);
+	if (!pt)
+		return ERR_PTR(-ENOMEM);
+
+	bo = xe_bo_create_pin_map(vm->xe, gt, vm, SZ_4K,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
+				  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto err_kfree;
+	}
+	pt->bo = bo;
+	pt->level = level;
+	pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL;
+
+	XE_BUG_ON(level > XE_VM_MAX_LEVEL);
+
+	return pt;
+
+err_kfree:
+	kfree(pt);
+	return ERR_PTR(err);
+}
+
+/**
+ * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero
+ * entries.
+ * @gt: The gt the scratch pagetable of which to use.
+ * @vm: The vm we populate for.
+ * @pt: The pagetable the bo of which to initialize.
+ *
+ * Populate the page-table bo of @pt with entries pointing into the gt's
+ * scratch page-table tree if any. Otherwise populate with zeros.
+ */
+void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm,
+			  struct xe_pt *pt)
+{
+	struct iosys_map *map = &pt->bo->vmap;
+	u64 empty;
+	int i;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	if (!vm->scratch_bo[gt->info.id]) {
+		/*
+		 * FIXME: Some memory is allocated already allocated to zero?
+		 * Find out which memory that is and avoid this memset...
+		 */
+		xe_map_memset(vm->xe, map, 0, 0, SZ_4K);
+	} else {
+		empty = __xe_pt_empty_pte(gt, vm, pt->level);
+		for (i = 0; i < GEN8_PDES; i++)
+			xe_pt_write(vm->xe, map, i, empty);
+	}
+}
+
+/**
+ * xe_pt_shift() - Return the ilog2 value of the size of the address range of
+ * a page-table at a certain level.
+ * @level: The level.
+ *
+ * Return: The ilog2 value of the size of the address range of a page-table
+ * at level @level.
+ */
+unsigned int xe_pt_shift(unsigned int level)
+{
+	return GEN8_PTE_SHIFT + GEN8_PDE_SHIFT * level;
+}
+
+/**
+ * xe_pt_destroy() - Destroy a page-table tree.
+ * @pt: The root of the page-table tree to destroy.
+ * @flags: vm flags. Currently unused.
+ * @deferred: List head of lockless list for deferred putting. NULL for
+ *            immediate putting.
+ *
+ * Puts the page-table bo, recursively calls xe_pt_destroy on all children
+ * and finally frees @pt. TODO: Can we remove the @flags argument?
+ */
+void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
+{
+	int i;
+
+	if (!pt)
+		return;
+
+	XE_BUG_ON(!list_empty(&pt->bo->vmas));
+	xe_bo_unpin(pt->bo);
+	xe_bo_put_deferred(pt->bo, deferred);
+
+	if (pt->level > 0 && pt->num_live) {
+		struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
+
+		for (i = 0; i < GEN8_PDES; i++) {
+			if (xe_pt_entry(pt_dir, i))
+				xe_pt_destroy(xe_pt_entry(pt_dir, i), flags,
+					      deferred);
+		}
+	}
+	kfree(pt);
+}
+
+/**
+ * xe_pt_create_scratch() - Setup a scratch memory pagetable tree for the
+ * given gt and vm.
+ * @xe: xe device.
+ * @gt: gt to set up for.
+ * @vm: vm to set up for.
+ *
+ * Sets up a pagetable tree with one page-table per level and a single
+ * leaf bo. All pagetable entries point to the single page-table or,
+ * for L0, the single bo one level below.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt,
+			 struct xe_vm *vm)
+{
+	u8 id = gt->info.id;
+	int i;
+
+	vm->scratch_bo[id] = xe_bo_create(xe, gt, vm, SZ_4K,
+					  ttm_bo_type_kernel,
+					  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+					  XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
+					  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(vm->scratch_bo[id]))
+		return PTR_ERR(vm->scratch_bo[id]);
+	xe_bo_pin(vm->scratch_bo[id]);
+
+	for (i = 0; i < vm->pt_root[id]->level; i++) {
+		vm->scratch_pt[id][i] = xe_pt_create(vm, gt, i);
+		if (IS_ERR(vm->scratch_pt[id][i]))
+			return PTR_ERR(vm->scratch_pt[id][i]);
+
+		xe_pt_populate_empty(gt, vm, vm->scratch_pt[id][i]);
+	}
+
+	return 0;
+}
+
+/**
+ * DOC: Pagetable building
+ *
+ * Below we use the term "page-table" for both page-directories, containing
+ * pointers to lower level page-directories or page-tables, and level 0
+ * page-tables that contain only page-table-entries pointing to memory pages.
+ *
+ * When inserting an address range in an already existing page-table tree
+ * there will typically be a set of page-tables that are shared with other
+ * address ranges, and a set that are private to this address range.
+ * The set of shared page-tables can be at most two per level,
+ * and those can't be updated immediately because the entries of those
+ * page-tables may still be in use by the gpu for other mappings. Therefore
+ * when inserting entries into those, we instead stage those insertions by
+ * adding insertion data into struct xe_vm_pgtable_update structures. This
+ * data, (subtrees for the cpu and page-table-entries for the gpu) is then
+ * added in a separate commit step. CPU-data is committed while still under the
+ * vm lock, the object lock and for userptr, the notifier lock in read mode.
+ * The GPU async data is committed either by the GPU or CPU after fulfilling
+ * relevant dependencies.
+ * For non-shared page-tables (and, in fact, for shared ones that aren't
+ * existing at the time of staging), we add the data in-place without the
+ * special update structures. This private part of the page-table tree will
+ * remain disconnected from the vm page-table tree until data is committed to
+ * the shared page tables of the vm tree in the commit phase.
+ */
+
+struct xe_pt_update {
+	/** @update: The update structure we're building for this parent. */
+	struct xe_vm_pgtable_update *update;
+	/** @parent: The parent. Used to detect a parent change. */
+	struct xe_pt *parent;
+	/** @preexisting: Whether the parent was pre-existing or allocated */
+	bool preexisting;
+};
+
+struct xe_pt_stage_bind_walk {
+	/** base: The base class. */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @vm: The vm we're building for. */
+	struct xe_vm *vm;
+	/** @gt: The gt we're building for. */
+	struct xe_gt *gt;
+	/** @cache: Desired cache level for the ptes */
+	enum xe_cache_level cache;
+	/** @default_pte: PTE flag only template. No address is associated */
+	u64 default_pte;
+	/** @dma_offset: DMA offset to add to the PTE. */
+	u64 dma_offset;
+	/**
+	 * @needs_64k: This address range enforces 64K alignment and
+	 * granularity.
+	 */
+	bool needs_64K;
+	/**
+	 * @pte_flags: Flags determining PTE setup. These are not flags
+	 * encoded directly in the PTE. See @default_pte for those.
+	 */
+	u32 pte_flags;
+
+	/* Also input, but is updated during the walk*/
+	/** @curs: The DMA address cursor. */
+	struct xe_res_cursor *curs;
+	/** @va_curs_start: The Virtual address coresponding to @curs->start */
+	u64 va_curs_start;
+
+	/* Output */
+	struct xe_walk_update {
+		/** @wupd.entries: Caller provided storage. */
+		struct xe_vm_pgtable_update *entries;
+		/** @wupd.num_used_entries: Number of update @entries used. */
+		unsigned int num_used_entries;
+		/** @wupd.updates: Tracks the update entry at a given level */
+		struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1];
+	} wupd;
+
+	/* Walk state */
+	/**
+	 * @l0_end_addr: The end address of the current l0 leaf. Used for
+	 * 64K granularity detection.
+	 */
+	u64 l0_end_addr;
+	/** @addr_64K: The start address of the current 64K chunk. */
+	u64 addr_64K;
+	/** @found_64: Whether @add_64K actually points to a 64K chunk. */
+	bool found_64K;
+};
+
+static int
+xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent,
+		 pgoff_t offset, bool alloc_entries)
+{
+	struct xe_pt_update *upd = &wupd->updates[parent->level];
+	struct xe_vm_pgtable_update *entry;
+
+	/*
+	 * For *each level*, we could only have one active
+	 * struct xt_pt_update at any one time. Once we move on to a
+	 * new parent and page-directory, the old one is complete, and
+	 * updates are either already stored in the build tree or in
+	 * @wupd->entries
+	 */
+	if (likely(upd->parent == parent))
+		return 0;
+
+	upd->parent = parent;
+	upd->preexisting = true;
+
+	if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1)
+		return -EINVAL;
+
+	entry = wupd->entries + wupd->num_used_entries++;
+	upd->update = entry;
+	entry->ofs = offset;
+	entry->pt_bo = parent->bo;
+	entry->pt = parent;
+	entry->flags = 0;
+	entry->qwords = 0;
+
+	if (alloc_entries) {
+		entry->pt_entries = kmalloc_array(GEN8_PDES,
+						  sizeof(*entry->pt_entries),
+						  GFP_KERNEL);
+		if (!entry->pt_entries)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * NOTE: This is a very frequently called function so we allow ourselves
+ * to annotate (using branch prediction hints) the fastpath of updating a
+ * non-pre-existing pagetable with leaf ptes.
+ */
+static int
+xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent,
+		   pgoff_t offset, struct xe_pt *xe_child, u64 pte)
+{
+	struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level];
+	struct xe_pt_update *child_upd = xe_child ?
+		&xe_walk->wupd.updates[xe_child->level] : NULL;
+	int ret;
+
+	ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true);
+	if (unlikely(ret))
+		return ret;
+
+	/*
+	 * Register this new pagetable so that it won't be recognized as
+	 * a shared pagetable by a subsequent insertion.
+	 */
+	if (unlikely(child_upd)) {
+		child_upd->update = NULL;
+		child_upd->parent = xe_child;
+		child_upd->preexisting = false;
+	}
+
+	if (likely(!upd->preexisting)) {
+		/* Continue building a non-connected subtree. */
+		struct iosys_map *map = &parent->bo->vmap;
+
+		if (unlikely(xe_child))
+			parent->base.dir->entries[offset] = &xe_child->base;
+
+		xe_pt_write(xe_walk->vm->xe, map, offset, pte);
+		parent->num_live++;
+	} else {
+		/* Shared pt. Stage update. */
+		unsigned int idx;
+		struct xe_vm_pgtable_update *entry = upd->update;
+
+		idx = offset - entry->ofs;
+		entry->pt_entries[idx].pt = xe_child;
+		entry->pt_entries[idx].pte = pte;
+		entry->qwords++;
+	}
+
+	return 0;
+}
+
+static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
+				   struct xe_pt_stage_bind_walk *xe_walk)
+{
+	u64 size, dma;
+
+	/* Does the virtual range requested cover a huge pte? */
+	if (!xe_pt_covers(addr, next, level, &xe_walk->base))
+		return false;
+
+	/* Does the DMA segment cover the whole pte? */
+	if (next - xe_walk->va_curs_start > xe_walk->curs->size)
+		return false;
+
+	/* Is the DMA address huge PTE size aligned? */
+	size = next - addr;
+	dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs);
+
+	return IS_ALIGNED(dma, size);
+}
+
+/*
+ * Scan the requested mapping to check whether it can be done entirely
+ * with 64K PTEs.
+ */
+static bool
+xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
+{
+	struct xe_res_cursor curs = *xe_walk->curs;
+
+	if (!IS_ALIGNED(addr, SZ_64K))
+		return false;
+
+	if (next > xe_walk->l0_end_addr)
+		return false;
+
+	xe_res_next(&curs, addr - xe_walk->va_curs_start);
+	for (; addr < next; addr += SZ_64K) {
+		if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K)
+			return false;
+
+		xe_res_next(&curs, SZ_64K);
+	}
+
+	return addr == next;
+}
+
+/*
+ * For non-compact "normal" 4K level-0 pagetables, we want to try to group
+ * addresses together in 64K-contigous regions to add a 64K TLB hint for the
+ * device to the PTE.
+ * This function determines whether the address is part of such a
+ * segment. For VRAM in normal pagetables, this is strictly necessary on
+ * some devices.
+ */
+static bool
+xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
+{
+	/* Address is within an already found 64k region */
+	if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K)
+		return true;
+
+	xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk);
+	xe_walk->addr_64K = addr;
+
+	return xe_walk->found_64K;
+}
+
+static int
+xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
+		       unsigned int level, u64 addr, u64 next,
+		       struct xe_ptw **child,
+		       enum page_walk_action *action,
+		       struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_bind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
+	struct xe_pt *xe_child;
+	bool covers;
+	int ret = 0;
+	u64 pte;
+
+	/* Is this a leaf entry ?*/
+	if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
+		struct xe_res_cursor *curs = xe_walk->curs;
+
+		XE_WARN_ON(xe_walk->va_curs_start != addr);
+
+		pte = __gen8_pte_encode(xe_res_dma(curs) + xe_walk->dma_offset,
+					xe_walk->cache, xe_walk->pte_flags,
+					level);
+		pte |= xe_walk->default_pte;
+
+		/*
+		 * Set the GEN12_PTE_PS64 hint if possible, otherwise if
+		 * this device *requires* 64K PTE size for VRAM, fail.
+		 */
+		if (level == 0 && !xe_parent->is_compact) {
+			if (xe_pt_is_pte_ps64K(addr, next, xe_walk))
+				pte |= GEN12_PTE_PS64;
+			else if (XE_WARN_ON(xe_walk->needs_64K))
+				return -EINVAL;
+		}
+
+		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte);
+		if (unlikely(ret))
+			return ret;
+
+		xe_res_next(curs, next - addr);
+		xe_walk->va_curs_start = next;
+		*action = ACTION_CONTINUE;
+
+		return ret;
+	}
+
+	/*
+	 * Descending to lower level. Determine if we need to allocate a
+	 * new page table or -directory, which we do if there is no
+	 * previous one or there is one we can completely replace.
+	 */
+	if (level == 1) {
+		walk->shifts = xe_normal_pt_shifts;
+		xe_walk->l0_end_addr = next;
+	}
+
+	covers = xe_pt_covers(addr, next, level, &xe_walk->base);
+	if (covers || !*child) {
+		u64 flags = 0;
+
+		xe_child = xe_pt_create(xe_walk->vm, xe_walk->gt, level - 1);
+		if (IS_ERR(xe_child))
+			return PTR_ERR(xe_child);
+
+		xe_pt_set_addr(xe_child,
+			       round_down(addr, 1ull << walk->shifts[level]));
+
+		if (!covers)
+			xe_pt_populate_empty(xe_walk->gt, xe_walk->vm, xe_child);
+
+		*child = &xe_child->base;
+
+		/*
+		 * Prefer the compact pagetable layout for L0 if possible.
+		 * TODO: Suballocate the pt bo to avoid wasting a lot of
+		 * memory.
+		 */
+		if (GRAPHICS_VERx100(xe_walk->gt->xe) >= 1250 && level == 1 &&
+		    covers && xe_pt_scan_64K(addr, next, xe_walk)) {
+			walk->shifts = xe_compact_pt_shifts;
+			flags |= GEN12_PDE_64K;
+			xe_child->is_compact = true;
+		}
+
+		pte = gen8_pde_encode(xe_child->bo, 0, xe_walk->cache) | flags;
+		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
+					 pte);
+	}
+
+	*action = ACTION_SUBTREE;
+	return ret;
+}
+
+static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
+	.pt_entry = xe_pt_stage_bind_entry,
+};
+
+/**
+ * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
+ * range.
+ * @gt: The gt we're building for.
+ * @vma: The vma indicating the address range.
+ * @entries: Storage for the update entries used for connecting the tree to
+ * the main tree at commit time.
+ * @num_entries: On output contains the number of @entries used.
+ *
+ * This function builds a disconnected page-table tree for a given address
+ * range. The tree is connected to the main vm tree for the gpu using
+ * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind().
+ * The function builds xe_vm_pgtable_update structures for already existing
+ * shared page-tables, and non-existing shared and non-shared page-tables
+ * are built and populated directly.
+ *
+ * Return 0 on success, negative error code on error.
+ */
+static int
+xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
+		 struct xe_vm_pgtable_update *entries, u32 *num_entries)
+{
+	struct xe_bo *bo = vma->bo;
+	bool is_vram = !xe_vma_is_userptr(vma) && bo && xe_bo_is_vram(bo);
+	struct xe_res_cursor curs;
+	struct xe_pt_stage_bind_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_stage_bind_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.vm = vma->vm,
+		.gt = gt,
+		.curs = &curs,
+		.va_curs_start = vma->start,
+		.pte_flags = vma->pte_flags,
+		.wupd.entries = entries,
+		.needs_64K = (vma->vm->flags & XE_VM_FLAGS_64K) && is_vram,
+	};
+	struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
+	int ret;
+
+	if (is_vram) {
+		xe_walk.default_pte = GEN12_PPGTT_PTE_LM;
+		if (vma && vma->use_atomic_access_pte_bit)
+			xe_walk.default_pte |= GEN12_USM_PPGTT_PTE_AE;
+		xe_walk.dma_offset = gt->mem.vram.io_start -
+			gt_to_xe(gt)->mem.vram.io_start;
+		xe_walk.cache = XE_CACHE_WB;
+	} else {
+		if (!xe_vma_is_userptr(vma) && bo->flags & XE_BO_SCANOUT_BIT)
+			xe_walk.cache = XE_CACHE_WT;
+		else
+			xe_walk.cache = XE_CACHE_WB;
+	}
+
+	xe_bo_assert_held(bo);
+	if (xe_vma_is_userptr(vma))
+		xe_res_first_sg(vma->userptr.sg, 0, vma->end - vma->start + 1,
+				&curs);
+	else if (xe_bo_is_vram(bo))
+		xe_res_first(bo->ttm.resource, vma->bo_offset,
+			     vma->end - vma->start + 1, &curs);
+	else
+		xe_res_first_sg(xe_bo_get_sg(bo), vma->bo_offset,
+				vma->end - vma->start + 1, &curs);
+
+	ret = xe_pt_walk_range(&pt->base, pt->level, vma->start, vma->end + 1,
+				&xe_walk.base);
+
+	*num_entries = xe_walk.wupd.num_used_entries;
+	return ret;
+}
+
+/**
+ * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a
+ * shared pagetable.
+ * @addr: The start address within the non-shared pagetable.
+ * @end: The end address within the non-shared pagetable.
+ * @level: The level of the non-shared pagetable.
+ * @walk: Walk info. The function adjusts the walk action.
+ * @action: next action to perform (see enum page_walk_action)
+ * @offset: Ignored on input, First non-shared entry on output.
+ * @end_offset: Ignored on input, Last non-shared entry + 1 on output.
+ *
+ * A non-shared page-table has some entries that belong to the address range
+ * and others that don't. This function determines the entries that belong
+ * fully to the address range. Depending on level, some entries may
+ * partially belong to the address range (that can't happen at level 0).
+ * The function detects that and adjust those offsets to not include those
+ * partial entries. Iff it does detect partial entries, we know that there must
+ * be shared page tables also at lower levels, so it adjusts the walk action
+ * accordingly.
+ *
+ * Return: true if there were non-shared entries, false otherwise.
+ */
+static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level,
+				    struct xe_pt_walk *walk,
+				    enum page_walk_action *action,
+				    pgoff_t *offset, pgoff_t *end_offset)
+{
+	u64 size = 1ull << walk->shifts[level];
+
+	*offset = xe_pt_offset(addr, level, walk);
+	*end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset;
+
+	if (!level)
+		return true;
+
+	/*
+	 * If addr or next are not size aligned, there are shared pts at lower
+	 * level, so in that case traverse down the subtree
+	 */
+	*action = ACTION_CONTINUE;
+	if (!IS_ALIGNED(addr, size)) {
+		*action = ACTION_SUBTREE;
+		(*offset)++;
+	}
+
+	if (!IS_ALIGNED(end, size)) {
+		*action = ACTION_SUBTREE;
+		(*end_offset)--;
+	}
+
+	return *end_offset > *offset;
+}
+
+struct xe_pt_zap_ptes_walk {
+	/** @base: The walk base-class */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @gt: The gt we're building for */
+	struct xe_gt *gt;
+
+	/* Output */
+	/** @needs_invalidate: Whether we need to invalidate TLB*/
+	bool needs_invalidate;
+};
+
+static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset,
+				unsigned int level, u64 addr, u64 next,
+				struct xe_ptw **child,
+				enum page_walk_action *action,
+				struct xe_pt_walk *walk)
+{
+	struct xe_pt_zap_ptes_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+	pgoff_t end_offset;
+
+	XE_BUG_ON(!*child);
+	XE_BUG_ON(!level && xe_child->is_compact);
+
+	/*
+	 * Note that we're called from an entry callback, and we're dealing
+	 * with the child of that entry rather than the parent, so need to
+	 * adjust level down.
+	 */
+	if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset,
+				    &end_offset)) {
+		xe_map_memset(gt_to_xe(xe_walk->gt), &xe_child->bo->vmap,
+			      offset * sizeof(u64), 0,
+			      (end_offset - offset) * sizeof(u64));
+		xe_walk->needs_invalidate = true;
+	}
+
+	return 0;
+}
+
+static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = {
+	.pt_entry = xe_pt_zap_ptes_entry,
+};
+
+/**
+ * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range
+ * @gt: The gt we're zapping for.
+ * @vma: GPU VMA detailing address range.
+ *
+ * Eviction and Userptr invalidation needs to be able to zap the
+ * gpu ptes of a given address range in pagefaulting mode.
+ * In order to be able to do that, that function needs access to the shared
+ * page-table entrieaso it can either clear the leaf PTEs or
+ * clear the pointers to lower-level page-tables. The caller is required
+ * to hold the necessary locks to ensure neither the page-table connectivity
+ * nor the page-table entries of the range is updated from under us.
+ *
+ * Return: Whether ptes were actually updated and a TLB invalidation is
+ * required.
+ */
+bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma)
+{
+	struct xe_pt_zap_ptes_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_zap_ptes_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.gt = gt,
+	};
+	struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
+
+	if (!(vma->gt_present & BIT(gt->info.id)))
+		return false;
+
+	(void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1,
+				 &xe_walk.base);
+
+	return xe_walk.needs_invalidate;
+}
+
+static void
+xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_gt *gt,
+		       struct iosys_map *map, void *data,
+		       u32 qword_ofs, u32 num_qwords,
+		       const struct xe_vm_pgtable_update *update)
+{
+	struct xe_pt_entry *ptes = update->pt_entries;
+	u64 *ptr = data;
+	u32 i;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	for (i = 0; i < num_qwords; i++) {
+		if (map)
+			xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) *
+				  sizeof(u64), u64, ptes[i].pte);
+		else
+			ptr[i] = ptes[i].pte;
+	}
+}
+
+static void xe_pt_abort_bind(struct xe_vma *vma,
+			     struct xe_vm_pgtable_update *entries,
+			     u32 num_entries)
+{
+	u32 i, j;
+
+	for (i = 0; i < num_entries; i++) {
+		if (!entries[i].pt_entries)
+			continue;
+
+		for (j = 0; j < entries[i].qwords; j++)
+			xe_pt_destroy(entries[i].pt_entries[j].pt, vma->vm->flags, NULL);
+		kfree(entries[i].pt_entries);
+	}
+}
+
+static void xe_pt_commit_locks_assert(struct xe_vma *vma)
+{
+	struct xe_vm *vm = vma->vm;
+
+	lockdep_assert_held(&vm->lock);
+
+	if (xe_vma_is_userptr(vma))
+		lockdep_assert_held_read(&vm->userptr.notifier_lock);
+	else
+		dma_resv_assert_held(vma->bo->ttm.base.resv);
+
+	dma_resv_assert_held(&vm->resv);
+}
+
+static void xe_pt_commit_bind(struct xe_vma *vma,
+			      struct xe_vm_pgtable_update *entries,
+			      u32 num_entries, bool rebind,
+			      struct llist_head *deferred)
+{
+	u32 i, j;
+
+	xe_pt_commit_locks_assert(vma);
+
+	for (i = 0; i < num_entries; i++) {
+		struct xe_pt *pt = entries[i].pt;
+		struct xe_pt_dir *pt_dir;
+
+		if (!rebind)
+			pt->num_live += entries[i].qwords;
+
+		if (!pt->level) {
+			kfree(entries[i].pt_entries);
+			continue;
+		}
+
+		pt_dir = as_xe_pt_dir(pt);
+		for (j = 0; j < entries[i].qwords; j++) {
+			u32 j_ = j + entries[i].ofs;
+			struct xe_pt *newpte = entries[i].pt_entries[j].pt;
+
+			if (xe_pt_entry(pt_dir, j_))
+				xe_pt_destroy(xe_pt_entry(pt_dir, j_),
+					      vma->vm->flags, deferred);
+
+			pt_dir->dir.entries[j_] = &newpte->base;
+		}
+		kfree(entries[i].pt_entries);
+	}
+}
+
+static int
+xe_pt_prepare_bind(struct xe_gt *gt, struct xe_vma *vma,
+		   struct xe_vm_pgtable_update *entries, u32 *num_entries,
+		   bool rebind)
+{
+	int err;
+
+	*num_entries = 0;
+	err = xe_pt_stage_bind(gt, vma, entries, num_entries);
+	if (!err)
+		BUG_ON(!*num_entries);
+	else /* abort! */
+		xe_pt_abort_bind(vma, entries, *num_entries);
+
+	return err;
+}
+
+static void xe_vm_dbg_print_entries(struct xe_device *xe,
+				    const struct xe_vm_pgtable_update *entries,
+				    unsigned int num_entries)
+#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
+{
+	unsigned int i;
+
+	vm_dbg(&xe->drm, "%u entries to update\n", num_entries);
+	for (i = 0; i < num_entries; i++) {
+		const struct xe_vm_pgtable_update *entry = &entries[i];
+		struct xe_pt *xe_pt = entry->pt;
+		u64 page_size = 1ull << xe_pt_shift(xe_pt->level);
+		u64 end;
+		u64 start;
+
+		XE_BUG_ON(entry->pt->is_compact);
+		start = entry->ofs * page_size;
+		end = start + page_size * entry->qwords;
+		vm_dbg(&xe->drm,
+		       "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n",
+		       i, xe_pt->level, entry->ofs, entry->qwords,
+		       xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0);
+	}
+}
+#else
+{}
+#endif
+
+#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
+
+static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
+{
+	u32 divisor = vma->userptr.divisor ? vma->userptr.divisor : 2;
+	static u32 count;
+
+	if (count++ % divisor == divisor - 1) {
+		struct xe_vm *vm = vma->vm;
+
+		vma->userptr.divisor = divisor << 1;
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_move_tail(&vma->userptr.invalidate_link,
+			       &vm->userptr.invalidated);
+		spin_unlock(&vm->userptr.invalidated_lock);
+		return true;
+	}
+
+	return false;
+}
+
+#else
+
+static bool xe_pt_userptr_inject_eagain(struct xe_vma *vma)
+{
+	return false;
+}
+
+#endif
+
+/**
+ * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks
+ * @base: Base we derive from.
+ * @bind: Whether this is a bind or an unbind operation. A bind operation
+ *        makes the pre-commit callback error with -EAGAIN if it detects a
+ *        pending invalidation.
+ * @locked: Whether the pre-commit callback locked the userptr notifier lock
+ *          and it needs unlocking.
+ */
+struct xe_pt_migrate_pt_update {
+	struct xe_migrate_pt_update base;
+	bool bind;
+	bool locked;
+};
+
+static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
+{
+	struct xe_pt_migrate_pt_update *userptr_update =
+		container_of(pt_update, typeof(*userptr_update), base);
+	struct xe_vma *vma = pt_update->vma;
+	unsigned long notifier_seq = vma->userptr.notifier_seq;
+	struct xe_vm *vm = vma->vm;
+
+	userptr_update->locked = false;
+
+	/*
+	 * Wait until nobody is running the invalidation notifier, and
+	 * since we're exiting the loop holding the notifier lock,
+	 * nobody can proceed invalidating either.
+	 *
+	 * Note that we don't update the vma->userptr.notifier_seq since
+	 * we don't update the userptr pages.
+	 */
+	do {
+		down_read(&vm->userptr.notifier_lock);
+		if (!mmu_interval_read_retry(&vma->userptr.notifier,
+					     notifier_seq))
+			break;
+
+		up_read(&vm->userptr.notifier_lock);
+
+		if (userptr_update->bind)
+			return -EAGAIN;
+
+		notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
+	} while (true);
+
+	/* Inject errors to test_whether they are handled correctly */
+	if (userptr_update->bind && xe_pt_userptr_inject_eagain(vma)) {
+		up_read(&vm->userptr.notifier_lock);
+		return -EAGAIN;
+	}
+
+	userptr_update->locked = true;
+
+	return 0;
+}
+
+static const struct xe_migrate_pt_update_ops bind_ops = {
+	.populate = xe_vm_populate_pgtable,
+};
+
+static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
+	.populate = xe_vm_populate_pgtable,
+	.pre_commit = xe_pt_userptr_pre_commit,
+};
+
+/**
+ * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma
+ * address range.
+ * @gt: The gt to bind for.
+ * @vma: The vma to bind.
+ * @e: The engine with which to do pipelined page-table updates.
+ * @syncs: Entries to sync on before binding the built tree to the live vm tree.
+ * @num_syncs: Number of @sync entries.
+ * @rebind: Whether we're rebinding this vma to the same address range without
+ * an unbind in-between.
+ *
+ * This function builds a page-table tree (see xe_pt_stage_bind() for more
+ * information on page-table building), and the xe_vm_pgtable_update entries
+ * abstracting the operations needed to attach it to the main vm tree. It
+ * then takes the relevant locks and updates the metadata side of the main
+ * vm tree and submits the operations for pipelined attachment of the
+ * gpu page-table to the vm main tree, (which can be done either by the
+ * cpu and the GPU).
+ *
+ * Return: A valid dma-fence representing the pipelined attachment operation
+ * on success, an error pointer on error.
+ */
+struct dma_fence *
+__xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+		 struct xe_sync_entry *syncs, u32 num_syncs,
+		 bool rebind)
+{
+	struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
+	struct xe_pt_migrate_pt_update bind_pt_update = {
+		.base = {
+			.ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops,
+			.vma = vma,
+		},
+		.bind = true,
+	};
+	struct xe_vm *vm = vma->vm;
+	u32 num_entries;
+	struct dma_fence *fence;
+	int err;
+
+	bind_pt_update.locked = false;
+	xe_bo_assert_held(vma->bo);
+	xe_vm_assert_held(vm);
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	vm_dbg(&vma->vm->xe->drm,
+	       "Preparing bind, with range [%llx...%llx) engine %p.\n",
+	       vma->start, vma->end, e);
+
+	err = xe_pt_prepare_bind(gt, vma, entries, &num_entries, rebind);
+	if (err)
+		goto err;
+	XE_BUG_ON(num_entries > ARRAY_SIZE(entries));
+
+	xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries);
+
+	fence = xe_migrate_update_pgtables(gt->migrate,
+					   vm, vma->bo,
+					   e ? e : vm->eng[gt->info.id],
+					   entries, num_entries,
+					   syncs, num_syncs,
+					   &bind_pt_update.base);
+	if (!IS_ERR(fence)) {
+		LLIST_HEAD(deferred);
+
+		/* add shared fence now for pagetable delayed destroy */
+		dma_resv_add_fence(&vm->resv, fence, !rebind &&
+				   vma->last_munmap_rebind ?
+				   DMA_RESV_USAGE_KERNEL :
+				   DMA_RESV_USAGE_BOOKKEEP);
+
+		if (!xe_vma_is_userptr(vma) && !vma->bo->vm)
+			dma_resv_add_fence(vma->bo->ttm.base.resv, fence,
+					   DMA_RESV_USAGE_BOOKKEEP);
+		xe_pt_commit_bind(vma, entries, num_entries, rebind,
+				  bind_pt_update.locked ? &deferred : NULL);
+
+		/* This vma is live (again?) now */
+		vma->gt_present |= BIT(gt->info.id);
+
+		if (bind_pt_update.locked) {
+			vma->userptr.initial_bind = true;
+			up_read(&vm->userptr.notifier_lock);
+			xe_bo_put_commit(&deferred);
+		}
+		if (!rebind && vma->last_munmap_rebind &&
+		    xe_vm_in_compute_mode(vm))
+			queue_work(vm->xe->ordered_wq,
+				   &vm->preempt.rebind_work);
+	} else {
+		if (bind_pt_update.locked)
+			up_read(&vm->userptr.notifier_lock);
+		xe_pt_abort_bind(vma, entries, num_entries);
+	}
+
+	return fence;
+
+err:
+	return ERR_PTR(err);
+}
+
+struct xe_pt_stage_unbind_walk {
+	/** @base: The pagewalk base-class. */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @gt: The gt we're unbinding from. */
+	struct xe_gt *gt;
+
+	/**
+	 * @modified_start: Walk range start, modified to include any
+	 * shared pagetables that we're the only user of and can thus
+	 * treat as private.
+	 */
+	u64 modified_start;
+	/** @modified_end: Walk range start, modified like @modified_start. */
+	u64 modified_end;
+
+	/* Output */
+	/* @wupd: Structure to track the page-table updates we're building */
+	struct xe_walk_update wupd;
+};
+
+/*
+ * Check whether this range is the only one populating this pagetable,
+ * and in that case, update the walk range checks so that higher levels don't
+ * view us as a shared pagetable.
+ */
+static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level,
+			     const struct xe_pt *child,
+			     enum page_walk_action *action,
+			     struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_unbind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	unsigned int shift = walk->shifts[level];
+	u64 size = 1ull << shift;
+
+	if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) &&
+	    ((next - addr) >> shift) == child->num_live) {
+		u64 size = 1ull << walk->shifts[level + 1];
+
+		*action = ACTION_CONTINUE;
+
+		if (xe_walk->modified_start >= addr)
+			xe_walk->modified_start = round_down(addr, size);
+		if (xe_walk->modified_end <= next)
+			xe_walk->modified_end = round_up(next, size);
+
+		return true;
+	}
+
+	return false;
+}
+
+static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
+				    unsigned int level, u64 addr, u64 next,
+				    struct xe_ptw **child,
+				    enum page_walk_action *action,
+				    struct xe_pt_walk *walk)
+{
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+
+	XE_BUG_ON(!*child);
+	XE_BUG_ON(!level && xe_child->is_compact);
+
+	xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
+
+	return 0;
+}
+
+static int
+xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset,
+				unsigned int level, u64 addr, u64 next,
+				struct xe_ptw **child,
+				enum page_walk_action *action,
+				struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_unbind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+	pgoff_t end_offset;
+	u64 size = 1ull << walk->shifts[--level];
+
+	if (!IS_ALIGNED(addr, size))
+		addr = xe_walk->modified_start;
+	if (!IS_ALIGNED(next, size))
+		next = xe_walk->modified_end;
+
+	/* Parent == *child is the root pt. Don't kill it. */
+	if (parent != *child &&
+	    xe_pt_check_kill(addr, next, level, xe_child, action, walk))
+		return 0;
+
+	if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset,
+				     &end_offset))
+		return 0;
+
+	(void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false);
+	xe_walk->wupd.updates[level].update->qwords = end_offset - offset;
+
+	return 0;
+}
+
+static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = {
+	.pt_entry = xe_pt_stage_unbind_entry,
+	.pt_post_descend = xe_pt_stage_unbind_post_descend,
+};
+
+/**
+ * xe_pt_stage_unbind() - Build page-table update structures for an unbind
+ * operation
+ * @gt: The gt we're unbinding for.
+ * @vma: The vma we're unbinding.
+ * @entries: Caller-provided storage for the update structures.
+ *
+ * Builds page-table update structures for an unbind operation. The function
+ * will attempt to remove all page-tables that we're the only user
+ * of, and for that to work, the unbind operation must be committed in the
+ * same critical section that blocks racing binds to the same page-table tree.
+ *
+ * Return: The number of entries used.
+ */
+static unsigned int xe_pt_stage_unbind(struct xe_gt *gt, struct xe_vma *vma,
+				       struct xe_vm_pgtable_update *entries)
+{
+	struct xe_pt_stage_unbind_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_stage_unbind_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.gt = gt,
+		.modified_start = vma->start,
+		.modified_end = vma->end + 1,
+		.wupd.entries = entries,
+	};
+	struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
+
+	(void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1,
+				 &xe_walk.base);
+
+	return xe_walk.wupd.num_used_entries;
+}
+
+static void
+xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update,
+				  struct xe_gt *gt, struct iosys_map *map,
+				  void *ptr, u32 qword_ofs, u32 num_qwords,
+				  const struct xe_vm_pgtable_update *update)
+{
+	struct xe_vma *vma = pt_update->vma;
+	u64 empty = __xe_pt_empty_pte(gt, vma->vm, update->pt->level);
+	int i;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	if (map && map->is_iomem)
+		for (i = 0; i < num_qwords; ++i)
+			xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) *
+				  sizeof(u64), u64, empty);
+	else if (map)
+		memset64(map->vaddr + qword_ofs * sizeof(u64), empty,
+			 num_qwords);
+	else
+		memset64(ptr, empty, num_qwords);
+}
+
+static void
+xe_pt_commit_unbind(struct xe_vma *vma,
+		    struct xe_vm_pgtable_update *entries, u32 num_entries,
+		    struct llist_head *deferred)
+{
+	u32 j;
+
+	xe_pt_commit_locks_assert(vma);
+
+	for (j = 0; j < num_entries; ++j) {
+		struct xe_vm_pgtable_update *entry = &entries[j];
+		struct xe_pt *pt = entry->pt;
+
+		pt->num_live -= entry->qwords;
+		if (pt->level) {
+			struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
+			u32 i;
+
+			for (i = entry->ofs; i < entry->ofs + entry->qwords;
+			     i++) {
+				if (xe_pt_entry(pt_dir, i))
+					xe_pt_destroy(xe_pt_entry(pt_dir, i),
+						      vma->vm->flags, deferred);
+
+				pt_dir->dir.entries[i] = NULL;
+			}
+		}
+	}
+}
+
+static const struct xe_migrate_pt_update_ops unbind_ops = {
+	.populate = xe_migrate_clear_pgtable_callback,
+};
+
+static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
+	.populate = xe_migrate_clear_pgtable_callback,
+	.pre_commit = xe_pt_userptr_pre_commit,
+};
+
+/**
+ * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma
+ * address range.
+ * @gt: The gt to unbind for.
+ * @vma: The vma to unbind.
+ * @e: The engine with which to do pipelined page-table updates.
+ * @syncs: Entries to sync on before disconnecting the tree to be destroyed.
+ * @num_syncs: Number of @sync entries.
+ *
+ * This function builds a the xe_vm_pgtable_update entries abstracting the
+ * operations needed to detach the page-table tree to be destroyed from the
+ * man vm tree.
+ * It then takes the relevant locks and submits the operations for
+ * pipelined detachment of the gpu page-table from  the vm main tree,
+ * (which can be done either by the cpu and the GPU), Finally it frees the
+ * detached page-table tree.
+ *
+ * Return: A valid dma-fence representing the pipelined detachment operation
+ * on success, an error pointer on error.
+ */
+struct dma_fence *
+__xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+		   struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
+	struct xe_pt_migrate_pt_update unbind_pt_update = {
+		.base = {
+			.ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops :
+			&unbind_ops,
+			.vma = vma,
+		},
+	};
+	struct xe_vm *vm = vma->vm;
+	u32 num_entries;
+	struct dma_fence *fence = NULL;
+	LLIST_HEAD(deferred);
+
+	xe_bo_assert_held(vma->bo);
+	xe_vm_assert_held(vm);
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	vm_dbg(&vma->vm->xe->drm,
+	       "Preparing unbind, with range [%llx...%llx) engine %p.\n",
+	       vma->start, vma->end, e);
+
+	num_entries = xe_pt_stage_unbind(gt, vma, entries);
+	XE_BUG_ON(num_entries > ARRAY_SIZE(entries));
+
+	xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries);
+
+	/*
+	 * Even if we were already evicted and unbind to destroy, we need to
+	 * clear again here. The eviction may have updated pagetables at a
+	 * lower level, because it needs to be more conservative.
+	 */
+	fence = xe_migrate_update_pgtables(gt->migrate,
+					   vm, NULL, e ? e :
+					   vm->eng[gt->info.id],
+					   entries, num_entries,
+					   syncs, num_syncs,
+					   &unbind_pt_update.base);
+	if (!IS_ERR(fence)) {
+		/* add shared fence now for pagetable delayed destroy */
+		dma_resv_add_fence(&vm->resv, fence,
+				   DMA_RESV_USAGE_BOOKKEEP);
+
+		/* This fence will be installed by caller when doing eviction */
+		if (!xe_vma_is_userptr(vma) && !vma->bo->vm)
+			dma_resv_add_fence(vma->bo->ttm.base.resv, fence,
+					   DMA_RESV_USAGE_BOOKKEEP);
+		xe_pt_commit_unbind(vma, entries, num_entries,
+				    unbind_pt_update.locked ? &deferred : NULL);
+		vma->gt_present &= ~BIT(gt->info.id);
+	}
+
+	if (!vma->gt_present)
+		list_del_init(&vma->rebind_link);
+
+	if (unbind_pt_update.locked) {
+		XE_WARN_ON(!xe_vma_is_userptr(vma));
+
+		if (!vma->gt_present) {
+			spin_lock(&vm->userptr.invalidated_lock);
+			list_del_init(&vma->userptr.invalidate_link);
+			spin_unlock(&vm->userptr.invalidated_lock);
+		}
+		up_read(&vm->userptr.notifier_lock);
+		xe_bo_put_commit(&deferred);
+	}
+
+	return fence;
+}
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
new file mode 100644
index 000000000000..1152043e5c63
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_PT_H_
+#define _XE_PT_H_
+
+#include <linux/types.h>
+
+#include "xe_pt_types.h"
+
+struct dma_fence;
+struct xe_bo;
+struct xe_device;
+struct xe_engine;
+struct xe_gt;
+struct xe_sync_entry;
+struct xe_vm;
+struct xe_vma;
+
+#define xe_pt_write(xe, map, idx, data) \
+	xe_map_wr(xe, map, (idx) * sizeof(u64), u64, data)
+
+unsigned int xe_pt_shift(unsigned int level);
+
+struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt,
+			   unsigned int level);
+
+int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt,
+			 struct xe_vm *vm);
+
+void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm,
+			  struct xe_pt *pt);
+
+void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred);
+
+struct dma_fence *
+__xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+		 struct xe_sync_entry *syncs, u32 num_syncs,
+		 bool rebind);
+
+struct dma_fence *
+__xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+		   struct xe_sync_entry *syncs, u32 num_syncs);
+
+bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma);
+
+u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset,
+		    const enum xe_cache_level level);
+
+u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
+		    u64 offset, enum xe_cache_level cache,
+		    u32 flags, u32 pt_level);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
new file mode 100644
index 000000000000..2ed64c0a4485
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PT_TYPES_H_
+#define _XE_PT_TYPES_H_
+
+#include "xe_pt_walk.h"
+
+enum xe_cache_level {
+	XE_CACHE_NONE,
+	XE_CACHE_WT,
+	XE_CACHE_WB,
+};
+
+#define XE_VM_MAX_LEVEL 4
+
+struct xe_pt {
+	struct xe_ptw base;
+	struct xe_bo *bo;
+	unsigned int level;
+	unsigned int num_live;
+	bool rebind;
+	bool is_compact;
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+	/** addr: Virtual address start address of the PT. */
+	u64 addr;
+#endif
+};
+
+struct xe_pt_entry {
+	struct xe_pt *pt;
+	u64 pte;
+};
+
+struct xe_vm_pgtable_update {
+	/** @bo: page table bo to write to */
+	struct xe_bo *pt_bo;
+
+	/** @ofs: offset inside this PTE to begin writing to (in qwords) */
+	u32 ofs;
+
+	/** @qwords: number of PTE's to write */
+	u32 qwords;
+
+	/** @pt: opaque pointer useful for the caller of xe_migrate_update_pgtables */
+	struct xe_pt *pt;
+
+	/** @pt_entries: Newly added pagetable entries */
+	struct xe_pt_entry *pt_entries;
+
+	/** @flags: Target flags */
+	u32 flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c
new file mode 100644
index 000000000000..0def89af4372
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_walk.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#include "xe_pt_walk.h"
+
+/**
+ * DOC: GPU page-table tree walking.
+ * The utilities in this file are similar to the CPU page-table walk
+ * utilities in mm/pagewalk.c. The main difference is that we distinguish
+ * the various levels of a page-table tree with an unsigned integer rather
+ * than by name. 0 is the lowest level, and page-tables with level 0 can
+ * not be directories pointing to lower levels, whereas all other levels
+ * can. The user of the utilities determines the highest level.
+ *
+ * Nomenclature:
+ * Each struct xe_ptw, regardless of level is referred to as a page table, and
+ * multiple page tables typically form a page table tree with page tables at
+ * intermediate levels being page directories pointing at page tables at lower
+ * levels. A shared page table for a given address range is a page-table which
+ * is neither fully within nor fully outside the address range and that can
+ * thus be shared by two or more address ranges.
+ *
+ * Please keep this code generic so that it can used as a drm-wide page-
+ * table walker should other drivers find use for it.
+ */
+static u64 xe_pt_addr_end(u64 addr, u64 end, unsigned int level,
+			  const struct xe_pt_walk *walk)
+{
+	u64 size = 1ull << walk->shifts[level];
+	u64 tmp = round_up(addr + 1, size);
+
+	return min_t(u64, tmp, end);
+}
+
+static bool xe_pt_next(pgoff_t *offset, u64 *addr, u64 next, u64 end,
+		       unsigned int level, const struct xe_pt_walk *walk)
+{
+	pgoff_t step = 1;
+
+	/* Shared pt walk skips to the last pagetable */
+	if (unlikely(walk->shared_pt_mode)) {
+		unsigned int shift = walk->shifts[level];
+		u64 skip_to = round_down(end, 1ull << shift);
+
+		if (skip_to > next) {
+			step += (skip_to - next) >> shift;
+			next = skip_to;
+		}
+	}
+
+	*addr = next;
+	*offset += step;
+
+	return next != end;
+}
+
+/**
+ * xe_pt_walk_range() - Walk a range of a gpu page table tree with callbacks
+ * for each page-table entry in all levels.
+ * @parent: The root page table for walk start.
+ * @level: The root page table level.
+ * @addr: Virtual address start.
+ * @end: Virtual address end + 1.
+ * @walk: Walk info.
+ *
+ * Similar to the CPU page-table walker, this is a helper to walk
+ * a gpu page table and call a provided callback function for each entry.
+ *
+ * Return: 0 on success, negative error code on error. The error is
+ * propagated from the callback and on error the walk is terminated.
+ */
+int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level,
+		     u64 addr, u64 end, struct xe_pt_walk *walk)
+{
+	pgoff_t offset = xe_pt_offset(addr, level, walk);
+	struct xe_ptw **entries = parent->dir ? parent->dir->entries : NULL;
+	const struct xe_pt_walk_ops *ops = walk->ops;
+	enum page_walk_action action;
+	struct xe_ptw *child;
+	int err = 0;
+	u64 next;
+
+	do {
+		next = xe_pt_addr_end(addr, end, level, walk);
+		if (walk->shared_pt_mode && xe_pt_covers(addr, next, level,
+							 walk))
+			continue;
+again:
+		action = ACTION_SUBTREE;
+		child = entries ? entries[offset] : NULL;
+		err = ops->pt_entry(parent, offset, level, addr, next,
+				    &child, &action, walk);
+		if (err)
+			break;
+
+		/* Probably not needed yet for gpu pagetable walk. */
+		if (unlikely(action == ACTION_AGAIN))
+			goto again;
+
+		if (likely(!level || !child || action == ACTION_CONTINUE))
+			continue;
+
+		err = xe_pt_walk_range(child, level - 1, addr, next, walk);
+
+		if (!err && ops->pt_post_descend)
+			err = ops->pt_post_descend(parent, offset, level, addr,
+						   next, &child, &action, walk);
+		if (err)
+			break;
+
+	} while (xe_pt_next(&offset, &addr, next, end, level, walk));
+
+	return err;
+}
+
+/**
+ * xe_pt_walk_shared() - Walk shared page tables of a page-table tree.
+ * @parent: Root page table directory.
+ * @level: Level of the root.
+ * @addr: Start address.
+ * @end: Last address + 1.
+ * @walk: Walk info.
+ *
+ * This function is similar to xe_pt_walk_range() but it skips page tables
+ * that are private to the range. Since the root (or @parent) page table is
+ * typically also a shared page table this function is different in that it
+ * calls the pt_entry callback and the post_descend callback also for the
+ * root. The root can be detected in the callbacks by checking whether
+ * parent == *child.
+ * Walking only the shared page tables is common for unbind-type operations
+ * where the page-table entries for an address range are cleared or detached
+ * from the main page-table tree.
+ *
+ * Return: 0 on success, negative error code on error: If a callback
+ * returns an error, the walk will be terminated and the error returned by
+ * this function.
+ */
+int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level,
+		      u64 addr, u64 end, struct xe_pt_walk *walk)
+{
+	const struct xe_pt_walk_ops *ops = walk->ops;
+	enum page_walk_action action = ACTION_SUBTREE;
+	struct xe_ptw *child = parent;
+	int err;
+
+	walk->shared_pt_mode = true;
+	err = walk->ops->pt_entry(parent, 0, level + 1, addr, end,
+				  &child, &action, walk);
+
+	if (err || action != ACTION_SUBTREE)
+		return err;
+
+	err = xe_pt_walk_range(parent, level, addr, end, walk);
+	if (!err && ops->pt_post_descend) {
+		err = ops->pt_post_descend(parent, 0, level + 1, addr, end,
+					   &child, &action, walk);
+	}
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h
new file mode 100644
index 000000000000..42c51fa601ec
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_walk.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef __XE_PT_WALK__
+#define __XE_PT_WALK__
+
+#include <linux/pagewalk.h>
+#include <linux/types.h>
+
+struct xe_ptw_dir;
+
+/**
+ * struct xe_ptw - base class for driver pagetable subclassing.
+ * @dir: Pointer to an array of children if any.
+ *
+ * Drivers could subclass this, and if it's a page-directory, typically
+ * embed the xe_ptw_dir::entries array in the same allocation.
+ */
+struct xe_ptw {
+	struct xe_ptw_dir *dir;
+};
+
+/**
+ * struct xe_ptw_dir - page directory structure
+ * @entries: Array holding page directory children.
+ *
+ * It is the responsibility of the user to ensure @entries is
+ * correctly sized.
+ */
+struct xe_ptw_dir {
+	struct xe_ptw *entries[0];
+};
+
+/**
+ * struct xe_pt_walk - Embeddable struct for walk parameters
+ */
+struct xe_pt_walk {
+	/** @ops: The walk ops used for the pagewalk */
+	const struct xe_pt_walk_ops *ops;
+	/**
+	 * @shifts: Array of page-table entry shifts used for the
+	 * different levels, starting out with the leaf level 0
+	 * page-shift as the first entry. It's legal for this pointer to be
+	 * changed during the walk.
+	 */
+	const u64 *shifts;
+	/** @max_level: Highest populated level in @sizes */
+	unsigned int max_level;
+	/**
+	 * @shared_pt_mode: Whether to skip all entries that are private
+	 * to the address range and called only for entries that are
+	 * shared with other address ranges. Such entries are referred to
+	 * as shared pagetables.
+	 */
+	bool shared_pt_mode;
+};
+
+/**
+ * typedef xe_pt_entry_fn - gpu page-table-walk callback-function
+ * @parent: The parent page.table.
+ * @offset: The offset (number of entries) into the page table.
+ * @level: The level of @parent.
+ * @addr: The virtual address.
+ * @next: The virtual address for the next call, or end address.
+ * @child: Pointer to pointer to child page-table at this @offset. The
+ * function may modify the value pointed to if, for example, allocating a
+ * child page table.
+ * @action: The walk action to take upon return. See <linux/pagewalk.h>.
+ * @walk: The walk parameters.
+ */
+typedef int (*xe_pt_entry_fn)(struct xe_ptw *parent, pgoff_t offset,
+			      unsigned int level, u64 addr, u64 next,
+			      struct xe_ptw **child,
+			      enum page_walk_action *action,
+			      struct xe_pt_walk *walk);
+
+/**
+ * struct xe_pt_walk_ops - Walk callbacks.
+ */
+struct xe_pt_walk_ops {
+	/**
+	 * @pt_entry: Callback to be called for each page table entry prior
+	 * to descending to the next level. The returned value of the action
+	 * function parameter is honored.
+	 */
+	xe_pt_entry_fn pt_entry;
+	/**
+	 * @pt_post_descend: Callback to be called for each page table entry
+	 * after return from descending to the next level. The returned value
+	 * of the action function parameter is ignored.
+	 */
+	xe_pt_entry_fn pt_post_descend;
+};
+
+int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level,
+		     u64 addr, u64 end, struct xe_pt_walk *walk);
+
+int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level,
+		      u64 addr, u64 end, struct xe_pt_walk *walk);
+
+/**
+ * xe_pt_covers - Whether the address range covers an entire entry in @level
+ * @addr: Start of the range.
+ * @end: End of range + 1.
+ * @level: Page table level.
+ * @walk: Page table walk info.
+ *
+ * This function is a helper to aid in determining whether a leaf page table
+ * entry can be inserted at this @level.
+ *
+ * Return: Whether the range provided covers exactly an entry at this level.
+ */
+static inline bool xe_pt_covers(u64 addr, u64 end, unsigned int level,
+				const struct xe_pt_walk *walk)
+{
+	u64 pt_size = 1ull << walk->shifts[level];
+
+	return end - addr == pt_size && IS_ALIGNED(addr, pt_size);
+}
+
+/**
+ * xe_pt_num_entries: Number of page-table entries of a given range at this
+ * level
+ * @addr: Start address.
+ * @end: End address.
+ * @level: Page table level.
+ * @walk: Walk info.
+ *
+ * Return: The number of page table entries at this level between @start and
+ * @end.
+ */
+static inline pgoff_t
+xe_pt_num_entries(u64 addr, u64 end, unsigned int level,
+		  const struct xe_pt_walk *walk)
+{
+	u64 pt_size = 1ull << walk->shifts[level];
+
+	return (round_up(end, pt_size) - round_down(addr, pt_size)) >>
+		walk->shifts[level];
+}
+
+/**
+ * xe_pt_offset: Offset of the page-table entry for a given address.
+ * @addr: The address.
+ * @level: Page table level.
+ * @walk: Walk info.
+ *
+ * Return: The page table entry offset for the given address in a
+ * page table with size indicated by @level.
+ */
+static inline pgoff_t
+xe_pt_offset(u64 addr, unsigned int level, const struct xe_pt_walk *walk)
+{
+	if (level < walk->max_level)
+		addr &= ((1ull << walk->shifts[level + 1]) - 1);
+
+	return addr >> walk->shifts[level];
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
new file mode 100644
index 000000000000..6e904e97f456
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/xe_drm.h>
+#include <drm/ttm/ttm_placement.h>
+#include <linux/nospec.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_query.h"
+#include "xe_ggtt.h"
+#include "xe_guc_hwconfig.h"
+
+static const enum xe_engine_class xe_to_user_engine_class[] = {
+	[XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
+	[XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY,
+	[XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE,
+	[XE_ENGINE_CLASS_VIDEO_ENHANCE] = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE,
+	[XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE,
+};
+
+static size_t calc_hw_engine_info_size(struct xe_device *xe)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct xe_gt *gt;
+	u8 gt_id;
+	int i = 0;
+
+	for_each_gt(gt, xe, gt_id)
+		for_each_hw_engine(hwe, gt, id) {
+			if (xe_hw_engine_is_reserved(hwe))
+				continue;
+			i++;
+		}
+
+	return i * sizeof(struct drm_xe_engine_class_instance);
+}
+
+static int query_engines(struct xe_device *xe,
+			 struct drm_xe_device_query *query)
+{
+	size_t size = calc_hw_engine_info_size(xe);
+	struct drm_xe_engine_class_instance __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct drm_xe_engine_class_instance *hw_engine_info;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct xe_gt *gt;
+	u8 gt_id;
+	int i = 0;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	hw_engine_info = kmalloc(size, GFP_KERNEL);
+	if (XE_IOCTL_ERR(xe, !hw_engine_info))
+		return -ENOMEM;
+
+	for_each_gt(gt, xe, gt_id)
+		for_each_hw_engine(hwe, gt, id) {
+			if (xe_hw_engine_is_reserved(hwe))
+				continue;
+
+			hw_engine_info[i].engine_class =
+				xe_to_user_engine_class[hwe->class];
+			hw_engine_info[i].engine_instance =
+				hwe->logical_instance;
+			hw_engine_info[i++].gt_id = gt->info.id;
+		}
+
+	if (copy_to_user(query_ptr, hw_engine_info, size)) {
+		kfree(hw_engine_info);
+		return -EFAULT;
+	}
+	kfree(hw_engine_info);
+
+	return 0;
+}
+
+static size_t calc_memory_usage_size(struct xe_device *xe)
+{
+	u32 num_managers = 1;
+	int i;
+
+	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i)
+		if (ttm_manager_type(&xe->ttm, i))
+			num_managers++;
+
+	return offsetof(struct drm_xe_query_mem_usage, regions[num_managers]);
+}
+
+static int query_memory_usage(struct xe_device *xe,
+			      struct drm_xe_device_query *query)
+{
+	size_t size = calc_memory_usage_size(xe);
+	struct drm_xe_query_mem_usage *usage;
+	struct drm_xe_query_mem_usage __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct ttm_resource_manager *man;
+	int ret, i;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	usage = kmalloc(size, GFP_KERNEL);
+	if (XE_IOCTL_ERR(xe, !usage))
+		return -ENOMEM;
+
+	usage->pad = 0;
+
+	man = ttm_manager_type(&xe->ttm, XE_PL_TT);
+	usage->regions[0].mem_class = XE_MEM_REGION_CLASS_SYSMEM;
+	usage->regions[0].instance = 0;
+	usage->regions[0].pad = 0;
+	usage->regions[0].min_page_size = PAGE_SIZE;
+	usage->regions[0].max_page_size = PAGE_SIZE;
+	usage->regions[0].total_size = man->size << PAGE_SHIFT;
+	usage->regions[0].used = ttm_resource_manager_usage(man);
+	usage->num_regions = 1;
+
+	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
+		man = ttm_manager_type(&xe->ttm, i);
+		if (man) {
+			usage->regions[usage->num_regions].mem_class =
+				XE_MEM_REGION_CLASS_VRAM;
+			usage->regions[usage->num_regions].instance =
+				usage->num_regions;
+			usage->regions[usage->num_regions].pad = 0;
+			usage->regions[usage->num_regions].min_page_size =
+				xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ?
+				SZ_64K : PAGE_SIZE;
+			usage->regions[usage->num_regions].max_page_size =
+				SZ_1G;
+			usage->regions[usage->num_regions].total_size =
+				man->size;
+			usage->regions[usage->num_regions++].used =
+				ttm_resource_manager_usage(man);
+		}
+	}
+
+	if (!copy_to_user(query_ptr, usage, size))
+		ret = 0;
+	else
+		ret = -ENOSPC;
+
+	kfree(usage);
+	return ret;
+}
+
+static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
+{
+	u32 num_params = XE_QUERY_CONFIG_NUM_PARAM;
+	size_t size =
+		sizeof(struct drm_xe_query_config) + num_params * sizeof(u64);
+	struct drm_xe_query_config __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct drm_xe_query_config *config;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	config = kzalloc(size, GFP_KERNEL);
+	if (XE_IOCTL_ERR(xe, !config))
+		return -ENOMEM;
+
+	config->num_params = num_params;
+	config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] =
+		xe->info.devid | (xe->info.revid << 16);
+	if (to_gt(xe)->mem.vram.size)
+		config->info[XE_QUERY_CONFIG_FLAGS] =
+			XE_QUERY_CONFIG_FLAGS_HAS_VRAM;
+	if (xe->info.enable_guc)
+		config->info[XE_QUERY_CONFIG_FLAGS] |=
+			XE_QUERY_CONFIG_FLAGS_USE_GUC;
+	config->info[XE_QUERY_CONFIG_MIN_ALIGNEMENT] =
+		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
+	config->info[XE_QUERY_CONFIG_VA_BITS] = 12 +
+		(9 * (xe->info.vm_max_level + 1));
+	config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.tile_count;
+	config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] =
+		hweight_long(xe->info.mem_region_mask);
+
+	if (copy_to_user(query_ptr, config, size)) {
+		kfree(config);
+		return -EFAULT;
+	}
+	kfree(config);
+
+	return 0;
+}
+
+static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query)
+{
+	struct xe_gt *gt;
+	size_t size = sizeof(struct drm_xe_query_gts) +
+		xe->info.tile_count * sizeof(struct drm_xe_query_gt);
+	struct drm_xe_query_gts __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct drm_xe_query_gts *gts;
+	u8 id;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	gts = kzalloc(size, GFP_KERNEL);
+	if (XE_IOCTL_ERR(xe, !gts))
+		return -ENOMEM;
+
+	gts->num_gt = xe->info.tile_count;
+	for_each_gt(gt, xe, id) {
+		if (id == 0)
+			gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN;
+		else if (xe_gt_is_media_type(gt))
+			gts->gts[id].type = XE_QUERY_GT_TYPE_MEDIA;
+		else
+			gts->gts[id].type = XE_QUERY_GT_TYPE_REMOTE;
+		gts->gts[id].instance = id;
+		gts->gts[id].clock_freq = gt->info.clock_freq;
+		if (!IS_DGFX(xe))
+			gts->gts[id].native_mem_regions = 0x1;
+		else
+			gts->gts[id].native_mem_regions =
+				BIT(gt->info.vram_id) << 1;
+		gts->gts[id].slow_mem_regions = xe->info.mem_region_mask ^
+			gts->gts[id].native_mem_regions;
+	}
+
+	if (copy_to_user(query_ptr, gts, size)) {
+		kfree(gts);
+		return -EFAULT;
+	}
+	kfree(gts);
+
+	return 0;
+}
+
+static int query_hwconfig(struct xe_device *xe,
+			  struct drm_xe_device_query *query)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	size_t size = xe_guc_hwconfig_size(&gt->uc.guc);
+	void __user *query_ptr = u64_to_user_ptr(query->data);
+	void *hwconfig;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	hwconfig = kzalloc(size, GFP_KERNEL);
+	if (XE_IOCTL_ERR(xe, !hwconfig))
+		return -ENOMEM;
+
+	xe_device_mem_access_get(xe);
+	xe_guc_hwconfig_copy(&gt->uc.guc, hwconfig);
+	xe_device_mem_access_put(xe);
+
+	if (copy_to_user(query_ptr, hwconfig, size)) {
+		kfree(hwconfig);
+		return -EFAULT;
+	}
+	kfree(hwconfig);
+
+	return 0;
+}
+
+static size_t calc_topo_query_size(struct xe_device *xe)
+{
+	return xe->info.tile_count *
+		(3 * sizeof(struct drm_xe_query_topology_mask) +
+		 sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
+		 sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
+		 sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
+}
+
+static void __user *copy_mask(void __user *ptr,
+			      struct drm_xe_query_topology_mask *topo,
+			      void *mask, size_t mask_size)
+{
+	topo->num_bytes = mask_size;
+
+	if (copy_to_user(ptr, topo, sizeof(*topo)))
+		return ERR_PTR(-EFAULT);
+	ptr += sizeof(topo);
+
+	if (copy_to_user(ptr, mask, mask_size))
+		return ERR_PTR(-EFAULT);
+	ptr += mask_size;
+
+	return ptr;
+}
+
+static int query_gt_topology(struct xe_device *xe,
+			     struct drm_xe_device_query *query)
+{
+	void __user *query_ptr = u64_to_user_ptr(query->data);
+	size_t size = calc_topo_query_size(xe);
+	struct drm_xe_query_topology_mask topo;
+	struct xe_gt *gt;
+	int id;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	for_each_gt(gt, xe, id) {
+		topo.gt_id = id;
+
+		topo.type = XE_TOPO_DSS_GEOMETRY;
+		query_ptr = copy_mask(query_ptr, &topo,
+				      gt->fuse_topo.g_dss_mask,
+				      sizeof(gt->fuse_topo.g_dss_mask));
+		if (IS_ERR(query_ptr))
+			return PTR_ERR(query_ptr);
+
+		topo.type = XE_TOPO_DSS_COMPUTE;
+		query_ptr = copy_mask(query_ptr, &topo,
+				      gt->fuse_topo.c_dss_mask,
+				      sizeof(gt->fuse_topo.c_dss_mask));
+		if (IS_ERR(query_ptr))
+			return PTR_ERR(query_ptr);
+
+		topo.type = XE_TOPO_EU_PER_DSS;
+		query_ptr = copy_mask(query_ptr, &topo,
+				      gt->fuse_topo.eu_mask_per_dss,
+				      sizeof(gt->fuse_topo.eu_mask_per_dss));
+		if (IS_ERR(query_ptr))
+			return PTR_ERR(query_ptr);
+	}
+
+	return 0;
+}
+
+static int (* const xe_query_funcs[])(struct xe_device *xe,
+				      struct drm_xe_device_query *query) = {
+	query_engines,
+	query_memory_usage,
+	query_config,
+	query_gts,
+	query_hwconfig,
+	query_gt_topology,
+};
+
+int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct drm_xe_device_query *query = data;
+	u32 idx;
+
+	if (XE_IOCTL_ERR(xe, query->extensions != 0))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, query->query > ARRAY_SIZE(xe_query_funcs)))
+		return -EINVAL;
+
+	idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs));
+	if (XE_IOCTL_ERR(xe, !xe_query_funcs[idx]))
+		return -EINVAL;
+
+	return xe_query_funcs[idx](xe, query);
+}
diff --git a/drivers/gpu/drm/xe/xe_query.h b/drivers/gpu/drm/xe/xe_query.h
new file mode 100644
index 000000000000..beeb7a8192b4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_query.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_QUERY_H_
+#define _XE_QUERY_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
new file mode 100644
index 000000000000..16e025dcf2cc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_reg_sr.h"
+
+#include <linux/align.h>
+#include <linux/string_helpers.h>
+#include <linux/xarray.h>
+
+#include <drm/drm_print.h>
+#include <drm/drm_managed.h>
+
+#include "xe_rtp_types.h"
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt_regs.h"
+
+#define XE_REG_SR_GROW_STEP_DEFAULT	16
+
+static void reg_sr_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_reg_sr *sr = arg;
+
+	xa_destroy(&sr->xa);
+	kfree(sr->pool.arr);
+	memset(&sr->pool, 0, sizeof(sr->pool));
+}
+
+int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe)
+{
+	xa_init(&sr->xa);
+	memset(&sr->pool, 0, sizeof(sr->pool));
+	sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT;
+	sr->name = name;
+
+	return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr);
+}
+
+int xe_reg_sr_dump_kv(struct xe_reg_sr *sr,
+		      struct xe_reg_sr_kv **dst)
+{
+	struct xe_reg_sr_kv *iter;
+	struct xe_reg_sr_entry *entry;
+	unsigned long idx;
+
+	if (xa_empty(&sr->xa)) {
+		*dst = NULL;
+		return 0;
+	}
+
+	*dst = kmalloc_array(sr->pool.used, sizeof(**dst), GFP_KERNEL);
+	if (!*dst)
+		return -ENOMEM;
+
+	iter = *dst;
+	xa_for_each(&sr->xa, idx, entry) {
+		iter->k = idx;
+		iter->v = *entry;
+		iter++;
+	}
+
+	return 0;
+}
+
+static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr)
+{
+	if (sr->pool.used == sr->pool.allocated) {
+		struct xe_reg_sr_entry *arr;
+
+		arr = krealloc_array(sr->pool.arr,
+				     ALIGN(sr->pool.allocated + 1, sr->pool.grow_step),
+				     sizeof(*arr), GFP_KERNEL);
+		if (!arr)
+			return NULL;
+
+		sr->pool.arr = arr;
+		sr->pool.allocated += sr->pool.grow_step;
+	}
+
+	return &sr->pool.arr[sr->pool.used++];
+}
+
+static bool compatible_entries(const struct xe_reg_sr_entry *e1,
+			       const struct xe_reg_sr_entry *e2)
+{
+	/*
+	 * Don't allow overwriting values: clr_bits/set_bits should be disjoint
+	 * when operating in the same register
+	 */
+	if (e1->clr_bits & e2->clr_bits || e1->set_bits & e2->set_bits ||
+	    e1->clr_bits & e2->set_bits || e1->set_bits & e2->clr_bits)
+		return false;
+
+	if (e1->masked_reg != e2->masked_reg)
+		return false;
+
+	if (e1->reg_type != e2->reg_type)
+		return false;
+
+	return true;
+}
+
+int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg,
+		  const struct xe_reg_sr_entry *e)
+{
+	unsigned long idx = reg;
+	struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx);
+	int ret;
+
+	if (pentry) {
+		if (!compatible_entries(pentry, e)) {
+			ret = -EINVAL;
+			goto fail;
+		}
+
+		pentry->clr_bits |= e->clr_bits;
+		pentry->set_bits |= e->set_bits;
+		pentry->read_mask |= e->read_mask;
+
+		return 0;
+	}
+
+	pentry = alloc_entry(sr);
+	if (!pentry) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	*pentry = *e;
+	ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL));
+	if (ret)
+		goto fail;
+
+	return 0;
+
+fail:
+	DRM_ERROR("Discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s): ret=%d\n",
+		  idx, e->clr_bits, e->set_bits,
+		  str_yes_no(e->masked_reg), ret);
+
+	return ret;
+}
+
+static void apply_one_mmio(struct xe_gt *gt, u32 reg,
+			   struct xe_reg_sr_entry *entry)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 val;
+
+	/*
+	 * If this is a masked register, need to figure what goes on the upper
+	 * 16 bits: it's either the clr_bits (when using FIELD_SET and WR) or
+	 * the set_bits, when using SET.
+	 *
+	 * When it's not masked, we have to read it from hardware, unless we are
+	 * supposed to set all bits.
+	 */
+	if (entry->masked_reg)
+		val = (entry->clr_bits ?: entry->set_bits << 16);
+	else if (entry->clr_bits + 1)
+		val = (entry->reg_type == XE_RTP_REG_MCR ?
+		       xe_gt_mcr_unicast_read_any(gt, MCR_REG(reg)) :
+		       xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+	else
+		val = 0;
+
+	/*
+	 * TODO: add selftest to validate all tables, regardless of platform:
+	 *   - Masked registers can't have set_bits with upper bits set
+	 *   - set_bits must be contained in clr_bits
+	 */
+	val |= entry->set_bits;
+
+	drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg, val);
+
+	if (entry->reg_type == XE_RTP_REG_MCR)
+		xe_gt_mcr_multicast_write(gt, MCR_REG(reg), val);
+	else
+		xe_mmio_write32(gt, reg, val);
+}
+
+void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_reg_sr_entry *entry;
+	unsigned long reg;
+	int err;
+
+	drm_dbg(&xe->drm, "Applying %s save-restore MMIOs\n", sr->name);
+
+	err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_force_wake;
+
+	xa_for_each(&sr->xa, reg, entry)
+		apply_one_mmio(gt, reg, entry);
+
+	err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	XE_WARN_ON(err);
+
+	return;
+
+err_force_wake:
+	drm_err(&xe->drm, "Failed to apply, err=%d\n", err);
+}
+
+void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
+			       struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_reg_sr_entry *entry;
+	unsigned long reg;
+	unsigned int slot = 0;
+	int err;
+
+	drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name);
+
+	err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_force_wake;
+
+	xa_for_each(&sr->xa, reg, entry) {
+		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg,
+				reg | entry->set_bits);
+		slot++;
+	}
+
+	/* And clear the rest just in case of garbage */
+	for (; slot < RING_MAX_NONPRIV_SLOTS; slot++)
+		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg,
+				RING_NOPID(mmio_base).reg);
+
+	err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	XE_WARN_ON(err);
+
+	return;
+
+err_force_wake:
+	drm_err(&xe->drm, "Failed to apply, err=%d\n", err);
+}
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h
new file mode 100644
index 000000000000..c3a9db251e92
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_REG_SR_
+#define _XE_REG_SR_
+
+#include "xe_reg_sr_types.h"
+
+/*
+ * Reg save/restore bookkeeping
+ */
+
+struct xe_device;
+struct xe_gt;
+
+int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe);
+int xe_reg_sr_dump_kv(struct xe_reg_sr *sr,
+		      struct xe_reg_sr_kv **dst);
+
+int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg,
+		  const struct xe_reg_sr_entry *e);
+void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt);
+void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
+			       struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h
new file mode 100644
index 000000000000..2fa7ff3966ba
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_REG_SR_TYPES_
+#define _XE_REG_SR_TYPES_
+
+#include <linux/xarray.h>
+#include <linux/types.h>
+
+#include "i915_reg_defs.h"
+
+struct xe_reg_sr_entry {
+	u32		clr_bits;
+	u32		set_bits;
+	/* Mask for bits to consider when reading value back */
+	u32		read_mask;
+	/*
+	 * "Masked registers" are marked in spec as register with the upper 16
+	 * bits as a mask for the bits that is being updated on the lower 16
+	 * bits when writing to it.
+	 */
+	u8		masked_reg;
+	u8		reg_type;
+};
+
+struct xe_reg_sr_kv {
+	u32			k;
+	struct xe_reg_sr_entry	v;
+};
+
+struct xe_reg_sr {
+	struct {
+		struct xe_reg_sr_entry *arr;
+		unsigned int used;
+		unsigned int allocated;
+		unsigned int grow_step;
+	} pool;
+	struct xarray xa;
+	const char *name;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
new file mode 100644
index 000000000000..2e0c87b72395
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_reg_whitelist.h"
+
+#include "xe_platform_types.h"
+#include "xe_gt_types.h"
+#include "xe_rtp.h"
+
+#include "../i915/gt/intel_engine_regs.h"
+#include "../i915/gt/intel_gt_regs.h"
+
+#undef _MMIO
+#undef MCR_REG
+#define _MMIO(x)	_XE_RTP_REG(x)
+#define MCR_REG(x)	_XE_RTP_MCR_REG(x)
+
+static bool match_not_render(const struct xe_gt *gt,
+			     const struct xe_hw_engine *hwe)
+{
+	return hwe->class != XE_ENGINE_CLASS_RENDER;
+}
+
+static const struct xe_rtp_entry register_whitelist[] = {
+	{ XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_WHITELIST_REGISTER(PS_INVOCATION_COUNT,
+				RING_FORCE_TO_NONPRIV_ACCESS_RD |
+				RING_FORCE_TO_NONPRIV_RANGE_4)
+	},
+	{ XE_RTP_NAME("1508744258, 14012131227, 1808121037"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_WHITELIST_REGISTER(GEN7_COMMON_SLICE_CHICKEN1, 0)
+	},
+	{ XE_RTP_NAME("1806527549"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_WHITELIST_REGISTER(HIZ_CHICKEN, 0)
+	},
+	{ XE_RTP_NAME("allow_read_ctx_timestamp"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260), FUNC(match_not_render)),
+	  XE_WHITELIST_REGISTER(RING_CTX_TIMESTAMP(0),
+				RING_FORCE_TO_NONPRIV_ACCESS_RD,
+				XE_RTP_FLAG(ENGINE_BASE))
+	},
+	{ XE_RTP_NAME("16014440446_part_1"),
+	  XE_RTP_RULES(PLATFORM(PVC)),
+	  XE_WHITELIST_REGISTER(_MMIO(0x4400),
+				RING_FORCE_TO_NONPRIV_DENY |
+				RING_FORCE_TO_NONPRIV_RANGE_64)
+	},
+	{ XE_RTP_NAME("16014440446_part_2"),
+	  XE_RTP_RULES(PLATFORM(PVC)),
+	  XE_WHITELIST_REGISTER(_MMIO(0x4500),
+				RING_FORCE_TO_NONPRIV_DENY |
+				RING_FORCE_TO_NONPRIV_RANGE_64)
+	},
+	{}
+};
+
+/**
+ * xe_reg_whitelist_process_engine - process table of registers to whitelist
+ * @hwe: engine instance to process whitelist for
+ *
+ * Process wwhitelist table for this platform, saving in @hwe all the
+ * registers that need to be whitelisted by the hardware so they can be accessed
+ * by userspace.
+ */
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
+{
+	xe_rtp_process(register_whitelist, &hwe->reg_whitelist, hwe->gt, hwe);
+}
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h
new file mode 100644
index 000000000000..6e861b1bdb01
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_REG_WHITELIST_
+#define _XE_REG_WHITELIST_
+
+struct xe_hw_engine;
+
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
new file mode 100644
index 000000000000..f54409850d74
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XE_RES_CURSOR_H__
+#define __XE_RES_CURSOR_H__
+
+#include <linux/scatterlist.h>
+
+#include <drm/drm_mm.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_resource.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "xe_bo.h"
+#include "xe_macros.h"
+#include "xe_ttm_vram_mgr.h"
+
+/* state back for walking over vram_mgr and gtt_mgr allocations */
+struct xe_res_cursor {
+	u64 start;
+	u64 size;
+	u64 remaining;
+	void *node;
+	u32 mem_type;
+	struct scatterlist *sgl;
+};
+
+/**
+ * xe_res_first - initialize a xe_res_cursor
+ *
+ * @res: TTM resource object to walk
+ * @start: Start of the range
+ * @size: Size of the range
+ * @cur: cursor object to initialize
+ *
+ * Start walking over the range of allocations between @start and @size.
+ */
+static inline void xe_res_first(struct ttm_resource *res,
+				u64 start, u64 size,
+				struct xe_res_cursor *cur)
+{
+	struct drm_buddy_block *block;
+	struct list_head *head, *next;
+
+	cur->sgl = NULL;
+	if (!res)
+		goto fallback;
+
+	XE_BUG_ON(start + size > res->size);
+
+	cur->mem_type = res->mem_type;
+
+	switch (cur->mem_type) {
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+		head = &to_xe_ttm_vram_mgr_resource(res)->blocks;
+
+		block = list_first_entry_or_null(head,
+						 struct drm_buddy_block,
+						 link);
+		if (!block)
+			goto fallback;
+
+		while (start >= xe_ttm_vram_mgr_block_size(block)) {
+			start -= xe_ttm_vram_mgr_block_size(block);
+
+			next = block->link.next;
+			if (next != head)
+				block = list_entry(next, struct drm_buddy_block,
+						   link);
+		}
+
+		cur->start = xe_ttm_vram_mgr_block_start(block) + start;
+		cur->size = min(xe_ttm_vram_mgr_block_size(block) - start,
+				size);
+		cur->remaining = size;
+		cur->node = block;
+		break;
+	default:
+		goto fallback;
+	}
+
+	return;
+
+fallback:
+	cur->start = start;
+	cur->size = size;
+	cur->remaining = size;
+	cur->node = NULL;
+	cur->mem_type = XE_PL_TT;
+	XE_WARN_ON(res && start + size > res->size);
+	return;
+}
+
+static inline void __xe_res_sg_next(struct xe_res_cursor *cur)
+{
+	struct scatterlist *sgl = cur->sgl;
+	u64 start = cur->start;
+
+	while (start >= sg_dma_len(sgl)) {
+		start -= sg_dma_len(sgl);
+		sgl = sg_next(sgl);
+		XE_BUG_ON(!sgl);
+	}
+
+	cur->start = start;
+	cur->size = sg_dma_len(sgl) - start;
+	cur->sgl = sgl;
+}
+
+/**
+ * xe_res_first_sg - initialize a xe_res_cursor with a scatter gather table
+ *
+ * @sg: scatter gather table to walk
+ * @start: Start of the range
+ * @size: Size of the range
+ * @cur: cursor object to initialize
+ *
+ * Start walking over the range of allocations between @start and @size.
+ */
+static inline void xe_res_first_sg(const struct sg_table *sg,
+				   u64 start, u64 size,
+				   struct xe_res_cursor *cur)
+{
+	XE_BUG_ON(!sg);
+	XE_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE) ||
+		  !IS_ALIGNED(size, PAGE_SIZE));
+	cur->node = NULL;
+	cur->start = start;
+	cur->remaining = size;
+	cur->size = 0;
+	cur->sgl = sg->sgl;
+	cur->mem_type = XE_PL_TT;
+	__xe_res_sg_next(cur);
+}
+
+/**
+ * xe_res_next - advance the cursor
+ *
+ * @cur: the cursor to advance
+ * @size: number of bytes to move forward
+ *
+ * Move the cursor @size bytes forwrad, walking to the next node if necessary.
+ */
+static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
+{
+	struct drm_buddy_block *block;
+	struct list_head *next;
+	u64 start;
+
+	XE_BUG_ON(size > cur->remaining);
+
+	cur->remaining -= size;
+	if (!cur->remaining)
+		return;
+
+	if (cur->size > size) {
+		cur->size -= size;
+		cur->start += size;
+		return;
+	}
+
+	if (cur->sgl) {
+		cur->start += size;
+		__xe_res_sg_next(cur);
+		return;
+	}
+
+	switch (cur->mem_type) {
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+		start = size - cur->size;
+		block = cur->node;
+
+		next = block->link.next;
+		block = list_entry(next, struct drm_buddy_block, link);
+
+
+		while (start >= xe_ttm_vram_mgr_block_size(block)) {
+			start -= xe_ttm_vram_mgr_block_size(block);
+
+			next = block->link.next;
+			block = list_entry(next, struct drm_buddy_block, link);
+		}
+
+		cur->start = xe_ttm_vram_mgr_block_start(block) + start;
+		cur->size = min(xe_ttm_vram_mgr_block_size(block) - start,
+				cur->remaining);
+		cur->node = block;
+		break;
+	default:
+		return;
+	}
+}
+
+/**
+ * xe_res_dma - return dma address of cursor at current position
+ *
+ * @cur: the cursor to return the dma address from
+ */
+static inline u64 xe_res_dma(const struct xe_res_cursor *cur)
+{
+	return cur->sgl ? sg_dma_address(cur->sgl) + cur->start : cur->start;
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
new file mode 100644
index 000000000000..fda7978a63e0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_engine_types.h"
+#include "xe_gt.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_ring_ops.h"
+#include "xe_sched_job.h"
+#include "xe_vm_types.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc_reg.h"
+
+static u32 preparser_disable(bool state)
+{
+	return MI_ARB_CHECK | BIT(8) | state;
+}
+
+static int emit_aux_table_inv(struct xe_gt *gt, u32 addr, u32 *dw, int i)
+{
+	dw[i++] = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
+	dw[i++] = addr + gt->mmio.adj_offset;
+	dw[i++] = AUX_INV;
+	dw[i++] = MI_NOOP;
+
+	return i;
+}
+
+static int emit_user_interrupt(u32 *dw, int i)
+{
+	dw[i++] = MI_USER_INTERRUPT;
+	dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	dw[i++] = MI_ARB_CHECK;
+
+	return i;
+}
+
+static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
+{
+	dw[i++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2;
+	dw[i++] = addr;
+	dw[i++] = 0;
+	dw[i++] = value;
+
+	return i;
+}
+
+static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
+{
+	dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+	dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
+	dw[i++] = 0;
+	dw[i++] = value;
+
+	return i;
+}
+
+static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
+{
+	dw[i++] = MI_BATCH_BUFFER_START_GEN8 | ppgtt_flag;
+	dw[i++] = lower_32_bits(batch_addr);
+	dw[i++] = upper_32_bits(batch_addr);
+
+	return i;
+}
+
+static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
+{
+	dw[i] = MI_FLUSH_DW + 1;
+	dw[i] |= flag;
+	dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
+		MI_FLUSH_DW_STORE_INDEX;
+
+	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+	dw[i++] = 0;
+	dw[i++] = ~0U;
+
+	return i;
+}
+
+static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i)
+{
+	u32 flags = PIPE_CONTROL_CS_STALL |
+		PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
+		PIPE_CONTROL_TLB_INVALIDATE |
+		PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
+		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+		PIPE_CONTROL_VF_CACHE_INVALIDATE |
+		PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+		PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+		PIPE_CONTROL_QW_WRITE |
+		PIPE_CONTROL_STORE_DATA_INDEX;
+
+	flags &= ~mask_flags;
+
+	dw[i++] = GFX_OP_PIPE_CONTROL(6);
+	dw[i++] = flags;
+	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR;
+	dw[i++] = 0;
+	dw[i++] = 0;
+	dw[i++] = 0;
+
+	return i;
+}
+
+#define MI_STORE_QWORD_IMM_GEN8_POSTED (MI_INSTR(0x20, 3) | (1 << 21))
+
+static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
+				       u32 *dw, int i)
+{
+	dw[i++] = MI_STORE_QWORD_IMM_GEN8_POSTED;
+	dw[i++] = lower_32_bits(addr);
+	dw[i++] = upper_32_bits(addr);
+	dw[i++] = lower_32_bits(value);
+	dw[i++] = upper_32_bits(value);
+
+	return i;
+}
+
+static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
+			      int i)
+{
+	dw[i++] = GFX_OP_PIPE_CONTROL(6);
+	dw[i++] = (stall_only ? PIPE_CONTROL_CS_STALL :
+		   PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL) |
+		PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE;
+	dw[i++] = addr;
+	dw[i++] = 0;
+	dw[i++] = value;
+	dw[i++] = 0; /* We're thrashing one extra dword. */
+
+	return i;
+}
+
+static u32 get_ppgtt_flag(struct xe_sched_job *job)
+{
+	return !(job->engine->flags & ENGINE_FLAG_WA) ? BIT(8) : 0;
+}
+
+static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
+				  u64 batch_addr, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+	u32 ppgtt_flag = get_ppgtt_flag(job);
+
+	/* XXX: Conditional flushing possible */
+	dw[i++] = preparser_disable(true);
+	i = emit_flush_invalidate(0, dw, i);
+	dw[i++] = preparser_disable(false);
+
+	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+				seqno, dw, i);
+
+	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+	if (job->user_fence.used)
+		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+						job->user_fence.value,
+						dw, i);
+
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i);
+
+	i = emit_user_interrupt(dw, i);
+
+	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
+				   u64 batch_addr, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+	u32 ppgtt_flag = get_ppgtt_flag(job);
+	struct xe_gt *gt = job->engine->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE;
+
+	/* XXX: Conditional flushing possible */
+	dw[i++] = preparser_disable(true);
+	i = emit_flush_invalidate(decode ? MI_INVALIDATE_BSD : 0, dw, i);
+	/* Wa_1809175790 */
+	if (!xe->info.has_flat_ccs) {
+		if (decode)
+			i = emit_aux_table_inv(gt, GEN12_VD0_AUX_INV.reg, dw, i);
+		else
+			i = emit_aux_table_inv(gt, GEN12_VE0_AUX_INV.reg, dw, i);
+	}
+	dw[i++] = preparser_disable(false);
+
+	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+				seqno, dw, i);
+
+	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+	if (job->user_fence.used)
+		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+						job->user_fence.value,
+						dw, i);
+
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i);
+
+	i = emit_user_interrupt(dw, i);
+
+	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+/*
+ * 3D-related flags that can't be set on _engines_ that lack access to the 3D
+ * pipeline (i.e., CCS engines).
+ */
+#define PIPE_CONTROL_3D_ENGINE_FLAGS (\
+		PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \
+		PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
+		PIPE_CONTROL_TILE_CACHE_FLUSH | \
+		PIPE_CONTROL_DEPTH_STALL | \
+		PIPE_CONTROL_STALL_AT_SCOREBOARD | \
+		PIPE_CONTROL_PSD_SYNC | \
+		PIPE_CONTROL_AMFS_FLUSH | \
+		PIPE_CONTROL_VF_CACHE_INVALIDATE | \
+		PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET)
+
+/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */
+#define PIPE_CONTROL_3D_ARCH_FLAGS ( \
+		PIPE_CONTROL_3D_ENGINE_FLAGS | \
+		PIPE_CONTROL_INDIRECT_STATE_DISABLE | \
+		PIPE_CONTROL_FLUSH_ENABLE | \
+		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
+		PIPE_CONTROL_DC_FLUSH_ENABLE)
+
+static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
+					    struct xe_lrc *lrc,
+					    u64 batch_addr, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+	u32 ppgtt_flag = get_ppgtt_flag(job);
+	struct xe_gt *gt = job->engine->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	bool pvc = xe->info.platform == XE_PVC;
+	u32 mask_flags = 0;
+
+	/* XXX: Conditional flushing possible */
+	dw[i++] = preparser_disable(true);
+	if (pvc)
+		mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
+	else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
+		mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
+	i = emit_pipe_invalidate(mask_flags, dw, i);
+	/* Wa_1809175790 */
+	if (!xe->info.has_flat_ccs)
+		i = emit_aux_table_inv(gt, GEN12_CCS_AUX_INV.reg, dw, i);
+	dw[i++] = preparser_disable(false);
+
+	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+				seqno, dw, i);
+
+	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+	if (job->user_fence.used)
+		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+						job->user_fence.value,
+						dw, i);
+
+	i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, pvc, dw, i);
+
+	i = emit_user_interrupt(dw, i);
+
+	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void emit_migration_job_gen12(struct xe_sched_job *job,
+				     struct xe_lrc *lrc, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+
+	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+				seqno, dw, i);
+
+	i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i);
+
+	dw[i++] = preparser_disable(true);
+	i = emit_flush_invalidate(0, dw, i);
+	dw[i++] = preparser_disable(false);
+
+	i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i);
+
+	dw[i++] = (MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags |
+		   MI_FLUSH_DW_OP_STOREDW) + 1;
+	dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT;
+	dw[i++] = 0;
+	dw[i++] = seqno; /* value */
+
+	i = emit_user_interrupt(dw, i);
+
+	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void emit_job_gen12_copy(struct xe_sched_job *job)
+{
+	int i;
+
+	if (xe_sched_job_is_migration(job->engine)) {
+		emit_migration_job_gen12(job, job->engine->lrc,
+					 xe_sched_job_seqno(job));
+		return;
+	}
+
+	for (i = 0; i < job->engine->width; ++i)
+		__emit_job_gen12_copy(job, job->engine->lrc + i,
+				      job->batch_addr[i],
+				      xe_sched_job_seqno(job));
+}
+
+static void emit_job_gen12_video(struct xe_sched_job *job)
+{
+	int i;
+
+	/* FIXME: Not doing parallel handshake for now */
+	for (i = 0; i < job->engine->width; ++i)
+		__emit_job_gen12_video(job, job->engine->lrc + i,
+				       job->batch_addr[i],
+				       xe_sched_job_seqno(job));
+}
+
+static void emit_job_gen12_render_compute(struct xe_sched_job *job)
+{
+	int i;
+
+	for (i = 0; i < job->engine->width; ++i)
+		__emit_job_gen12_render_compute(job, job->engine->lrc + i,
+						job->batch_addr[i],
+						xe_sched_job_seqno(job));
+}
+
+static const struct xe_ring_ops ring_ops_gen12_copy = {
+	.emit_job = emit_job_gen12_copy,
+};
+
+static const struct xe_ring_ops ring_ops_gen12_video = {
+	.emit_job = emit_job_gen12_video,
+};
+
+static const struct xe_ring_ops ring_ops_gen12_render_compute = {
+	.emit_job = emit_job_gen12_render_compute,
+};
+
+const struct xe_ring_ops *
+xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class)
+{
+	switch (class) {
+	case XE_ENGINE_CLASS_COPY:
+		return &ring_ops_gen12_copy;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		return &ring_ops_gen12_video;
+	case XE_ENGINE_CLASS_RENDER:
+	case XE_ENGINE_CLASS_COMPUTE:
+		return &ring_ops_gen12_render_compute;
+	default:
+		return NULL;
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.h b/drivers/gpu/drm/xe/xe_ring_ops.h
new file mode 100644
index 000000000000..e942735d76a6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RING_OPS_H_
+#define _XE_RING_OPS_H_
+
+#include "xe_hw_engine_types.h"
+#include "xe_ring_ops_types.h"
+
+struct xe_gt;
+
+const struct xe_ring_ops *
+xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h
new file mode 100644
index 000000000000..1ae56e2ee7b4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RING_OPS_TYPES_H_
+#define _XE_RING_OPS_TYPES_H_
+
+struct xe_sched_job;
+
+#define MAX_JOB_SIZE_DW 48
+#define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4)
+
+/**
+ * struct xe_ring_ops - Ring operations
+ */
+struct xe_ring_ops {
+	/** @emit_job: Write job to ring */
+	void (*emit_job)(struct xe_sched_job *job);
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
new file mode 100644
index 000000000000..9e8d0e43c643
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_rtp.h"
+
+#include <drm/xe_drm.h>
+
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_reg_sr.h"
+
+/**
+ * DOC: Register Table Processing
+ *
+ * Internal infrastructure to define how registers should be updated based on
+ * rules and actions. This can be used to define tables with multiple entries
+ * (one per register) that will be walked over at some point in time to apply
+ * the values to the registers that have matching rules.
+ */
+
+static bool rule_matches(struct xe_gt *gt,
+			 struct xe_hw_engine *hwe,
+			 const struct xe_rtp_entry *entry)
+{
+	const struct xe_device *xe = gt_to_xe(gt);
+	const struct xe_rtp_rule *r;
+	unsigned int i;
+	bool match;
+
+	for (r = entry->rules, i = 0; i < entry->n_rules;
+	     r = &entry->rules[++i]) {
+		switch (r->match_type) {
+		case XE_RTP_MATCH_PLATFORM:
+			match = xe->info.platform == r->platform;
+			break;
+		case XE_RTP_MATCH_SUBPLATFORM:
+			match = xe->info.platform == r->platform &&
+				xe->info.subplatform == r->subplatform;
+			break;
+		case XE_RTP_MATCH_GRAPHICS_VERSION:
+			/* TODO: match display */
+			match = xe->info.graphics_verx100 == r->ver_start;
+			break;
+		case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE:
+			match = xe->info.graphics_verx100 >= r->ver_start &&
+				xe->info.graphics_verx100 <= r->ver_end;
+			break;
+		case XE_RTP_MATCH_MEDIA_VERSION:
+			match = xe->info.media_verx100 == r->ver_start;
+			break;
+		case XE_RTP_MATCH_MEDIA_VERSION_RANGE:
+			match = xe->info.media_verx100 >= r->ver_start &&
+				xe->info.media_verx100 <= r->ver_end;
+			break;
+		case XE_RTP_MATCH_STEP:
+			/* TODO: match media/display */
+			match = xe->info.step.graphics >= r->step_start &&
+				xe->info.step.graphics < r->step_end;
+			break;
+		case XE_RTP_MATCH_ENGINE_CLASS:
+			match = hwe->class == r->engine_class;
+			break;
+		case XE_RTP_MATCH_NOT_ENGINE_CLASS:
+			match = hwe->class != r->engine_class;
+			break;
+		case XE_RTP_MATCH_FUNC:
+			match = r->match_func(gt, hwe);
+			break;
+		case XE_RTP_MATCH_INTEGRATED:
+			match = !xe->info.is_dgfx;
+			break;
+		case XE_RTP_MATCH_DISCRETE:
+			match = xe->info.is_dgfx;
+			break;
+
+		default:
+			XE_WARN_ON(r->match_type);
+		}
+
+		if (!match)
+			return false;
+	}
+
+	return true;
+}
+
+static void rtp_add_sr_entry(const struct xe_rtp_entry *entry,
+			     struct xe_gt *gt,
+			     u32 mmio_base,
+			     struct xe_reg_sr *sr)
+{
+	u32 reg = entry->regval.reg + mmio_base;
+	struct xe_reg_sr_entry sr_entry = {
+		.clr_bits = entry->regval.clr_bits,
+		.set_bits = entry->regval.set_bits,
+		.read_mask = entry->regval.read_mask,
+		.masked_reg = entry->regval.flags & XE_RTP_FLAG_MASKED_REG,
+		.reg_type = entry->regval.reg_type,
+	};
+
+	xe_reg_sr_add(sr, reg, &sr_entry);
+}
+
+/**
+ * xe_rtp_process - Process all rtp @entries, adding the matching ones to @sr
+ * @entries: Table with RTP definitions
+ * @sr: Where to add an entry to with the values for matching. This can be
+ *      viewed as the "coalesced view" of multiple the tables. The bits for each
+ *      register set are expected not to collide with previously added entries
+ * @gt: The GT to be used for matching rules
+ * @hwe: Engine instance to use for matching rules and as mmio base
+ *
+ * Walk the table pointed by @entries (with an empty sentinel) and add all
+ * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is
+ * used to calculate the right register offset
+ */
+void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
+		    struct xe_gt *gt, struct xe_hw_engine *hwe)
+{
+	const struct xe_rtp_entry *entry;
+
+	for (entry = entries; entry && entry->name; entry++) {
+		u32 mmio_base = 0;
+
+		if (entry->regval.flags & XE_RTP_FLAG_FOREACH_ENGINE) {
+			struct xe_hw_engine *each_hwe;
+			enum xe_hw_engine_id id;
+
+			for_each_hw_engine(each_hwe, gt, id) {
+				mmio_base = each_hwe->mmio_base;
+
+				if (rule_matches(gt, each_hwe, entry))
+					rtp_add_sr_entry(entry, gt, mmio_base, sr);
+			}
+		} else if (rule_matches(gt, hwe, entry)) {
+			if (entry->regval.flags & XE_RTP_FLAG_ENGINE_BASE)
+				mmio_base = hwe->mmio_base;
+
+			rtp_add_sr_entry(entry, gt, mmio_base, sr);
+		}
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
new file mode 100644
index 000000000000..d4e11fdde77f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -0,0 +1,340 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RTP_
+#define _XE_RTP_
+
+#include <linux/xarray.h>
+#include <linux/types.h>
+
+#include "xe_rtp_types.h"
+
+#include "i915_reg_defs.h"
+
+/*
+ * Register table poke infrastructure
+ */
+
+struct xe_hw_engine;
+struct xe_gt;
+struct xe_reg_sr;
+
+/*
+ * Helper macros - not to be used outside this header.
+ */
+/* This counts to 12. Any more, it will return 13th argument. */
+#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
+#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+#define __CONCAT(a, b) a ## b
+#define CONCATENATE(a, b) __CONCAT(a, b)
+
+#define __CALL_FOR_EACH_1(MACRO_, x, ...) MACRO_(x)
+#define __CALL_FOR_EACH_2(MACRO_, x, ...)					\
+	MACRO_(x) __CALL_FOR_EACH_1(MACRO_, ##__VA_ARGS__)
+#define __CALL_FOR_EACH_3(MACRO_, x, ...)					\
+	MACRO_(x) __CALL_FOR_EACH_2(MACRO_, ##__VA_ARGS__)
+#define __CALL_FOR_EACH_4(MACRO_, x, ...)					\
+	MACRO_(x) __CALL_FOR_EACH_3(MACRO_, ##__VA_ARGS__)
+
+#define _CALL_FOR_EACH(NARGS_, MACRO_, x, ...)					\
+	CONCATENATE(__CALL_FOR_EACH_, NARGS_)(MACRO_, x, ##__VA_ARGS__)
+#define CALL_FOR_EACH(MACRO_, x, ...)						\
+	_CALL_FOR_EACH(COUNT_ARGS(x, ##__VA_ARGS__), MACRO_, x, ##__VA_ARGS__)
+
+#define _XE_RTP_REG(x_)	(x_),						\
+			.reg_type = XE_RTP_REG_REGULAR
+#define _XE_RTP_MCR_REG(x_) (x_),					\
+			    .reg_type = XE_RTP_REG_MCR
+
+/*
+ * Helper macros for concatenating prefix - do not use them directly outside
+ * this header
+ */
+#define __ADD_XE_RTP_FLAG_PREFIX(x) CONCATENATE(XE_RTP_FLAG_, x) |
+#define __ADD_XE_RTP_RULE_PREFIX(x) CONCATENATE(XE_RTP_RULE_, x) ,
+
+/*
+ * Macros to encode rules to match against platform, IP version, stepping, etc.
+ * Shouldn't be used directly - see XE_RTP_RULES()
+ */
+
+#define _XE_RTP_RULE_PLATFORM(plat__)						\
+	{ .match_type = XE_RTP_MATCH_PLATFORM, .platform = plat__ }
+
+#define _XE_RTP_RULE_SUBPLATFORM(plat__, sub__)					\
+	{ .match_type = XE_RTP_MATCH_SUBPLATFORM,				\
+	  .platform = plat__, .subplatform = sub__ }
+
+#define _XE_RTP_RULE_STEP(start__, end__)					\
+	{ .match_type = XE_RTP_MATCH_STEP,					\
+	  .step_start = start__, .step_end = end__ }
+
+#define _XE_RTP_RULE_ENGINE_CLASS(cls__)					\
+	{ .match_type = XE_RTP_MATCH_ENGINE_CLASS,				\
+	  .engine_class = (cls__) }
+
+/**
+ * XE_RTP_RULE_PLATFORM - Create rule matching platform
+ * @plat_: platform to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_PLATFORM(plat_)						\
+	_XE_RTP_RULE_PLATFORM(XE_##plat_)
+
+/**
+ * XE_RTP_RULE_SUBPLATFORM - Create rule matching platform and sub-platform
+ * @plat_: platform to match
+ * @sub_: sub-platform to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_SUBPLATFORM(plat_, sub_)					\
+	_XE_RTP_RULE_SUBPLATFORM(XE_##plat_, XE_SUBPLATFORM_##plat_##_##sub_)
+
+/**
+ * XE_RTP_RULE_STEP - Create rule matching platform stepping
+ * @start_: First stepping matching the rule
+ * @end_: First stepping that does not match the rule
+ *
+ * Note that the range matching this rule [ @start_, @end_ ), i.e. inclusive on
+ * the left, exclusive on the right.
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_STEP(start_, end_)						\
+	_XE_RTP_RULE_STEP(STEP_##start_, STEP_##end_)
+
+/**
+ * XE_RTP_RULE_ENGINE_CLASS - Create rule matching an engine class
+ * @cls_: Engine class to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_ENGINE_CLASS(cls_)						\
+	_XE_RTP_RULE_ENGINE_CLASS(XE_ENGINE_CLASS_##cls_)
+
+/**
+ * XE_RTP_RULE_FUNC - Create rule using callback function for match
+ * @func__: Function to call to decide if rule matches
+ *
+ * This allows more complex checks to be performed. The ``XE_RTP``
+ * infrastructure will simply call the function @func_ passed to decide if this
+ * rule matches the device.
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_FUNC(func__)						\
+	{ .match_type = XE_RTP_MATCH_FUNC,					\
+	  .match_func = (func__) }
+
+/**
+ * XE_RTP_RULE_GRAPHICS_VERSION - Create rule matching graphics version
+ * @ver__: Graphics IP version to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_GRAPHICS_VERSION(ver__)					\
+	{ .match_type = XE_RTP_MATCH_GRAPHICS_VERSION,				\
+	  .ver_start = ver__, }
+
+/**
+ * XE_RTP_RULE_GRAPHICS_VERSION_RANGE - Create rule matching a range of graphics version
+ * @ver_start__: First graphics IP version to match
+ * @ver_end__: Last graphics IP version to match
+ *
+ * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e.
+ * inclusive on boths sides
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_GRAPHICS_VERSION_RANGE(ver_start__, ver_end__)		\
+	{ .match_type = XE_RTP_MATCH_GRAPHICS_VERSION_RANGE,			\
+	  .ver_start = ver_start__, .ver_end = ver_end__, }
+
+/**
+ * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version
+ * @ver__: Graphics IP version to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_MEDIA_VERSION(ver__)					\
+	{ .match_type = XE_RTP_MATCH_MEDIA_VERSION,				\
+	  .ver_start = ver__, }
+
+/**
+ * XE_RTP_RULE_MEDIA_VERSION_RANGE - Create rule matching a range of media version
+ * @ver_start__: First media IP version to match
+ * @ver_end__: Last media IP version to match
+ *
+ * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e.
+ * inclusive on boths sides
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_MEDIA_VERSION_RANGE(ver_start__, ver_end__)			\
+	{ .match_type = XE_RTP_MATCH_MEDIA_VERSION_RANGE,			\
+	  .ver_start = ver_start__, .ver_end = ver_end__, }
+
+/**
+ * XE_RTP_RULE_IS_INTEGRATED - Create a rule matching integrated graphics devices
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_IS_INTEGRATED						\
+	{ .match_type = XE_RTP_MATCH_INTEGRATED }
+
+/**
+ * XE_RTP_RULE_IS_DISCRETE - Create a rule matching discrete graphics devices
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_IS_DISCRETE							\
+	{ .match_type = XE_RTP_MATCH_DISCRETE }
+
+/**
+ * XE_RTP_WR - Helper to write a value to the register, overriding all the bits
+ * @reg_: Register
+ * @val_: Value to set
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * The correspondent notation in bspec is:
+ *
+ *	REGNAME = VALUE
+ */
+#define XE_RTP_WR(reg_, val_, ...)						\
+	.regval = { .reg = reg_, .clr_bits = ~0u, .set_bits = (val_),		\
+		    .read_mask = (~0u), ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_SET - Set bits from @val_ in the register.
+ * @reg_: Register
+ * @val_: Bits to set in the register
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is (example for bits 2
+ * and 5, but could be any):
+ *
+ *	REGNAME[2] = 1
+ *	REGNAME[5] = 1
+ */
+#define XE_RTP_SET(reg_, val_, ...)						\
+	.regval = { .reg = reg_, .clr_bits = (val_), .set_bits = (val_),	\
+		    .read_mask = (val_), ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_CLR: Clear bits from @val_ in the register.
+ * @reg_: Register
+ * @val_: Bits to clear in the register
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is (example for bits 2
+ * and 5, but could be any):
+ *
+ *	REGNAME[2] = 0
+ *	REGNAME[5] = 0
+ */
+#define XE_RTP_CLR(reg_, val_, ...)						\
+	.regval = { .reg = reg_, .clr_bits = (val_), .set_bits = 0,		\
+		    .read_mask = (val_), ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_FIELD_SET: Set a bit range, defined by @mask_bits_, to the value in
+ * @reg_: Register
+ * @mask_bits_: Mask of bits to be changed in the register, forming a field
+ * @val_: Value to set in the field denoted by @mask_bits_
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is:
+ *
+ *	REGNAME[<end>:<start>] = VALUE
+ */
+#define XE_RTP_FIELD_SET(reg_, mask_bits_, val_, ...)				\
+	.regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\
+		    .read_mask = (mask_bits_), ##__VA_ARGS__ }
+
+#define XE_RTP_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...)		\
+	.regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\
+		    .read_mask = 0, ##__VA_ARGS__ }
+
+/**
+ * XE_WHITELIST_REGISTER - Add register to userspace whitelist
+ * @reg_: Register
+ * @flags_: Whitelist-specific flags to set
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * Add a register to the whitelist, allowing userspace to modify the ster with
+ * regular user privileges.
+ */
+#define XE_WHITELIST_REGISTER(reg_, flags_, ...)				\
+	/* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\
+	.regval = { .reg = reg_, .set_bits = (flags_),			\
+		    .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID,		\
+		    ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_NAME - Helper to set the name in xe_rtp_entry
+ * @s_: Name describing this rule, often a HW-specific number
+ *
+ * TODO: maybe move this behind a debug config?
+ */
+#define XE_RTP_NAME(s_)	.name = (s_)
+
+/**
+ * XE_RTP_FLAG - Helper to add multiple flags to a struct xe_rtp_regval entry
+ * @f1_: Last part of a ``XE_RTP_FLAG_*``
+ * @...: Additional flags, defined like @f1_
+ *
+ * Helper to automatically add a ``XE_RTP_FLAG_`` prefix to @f1_ so it can be
+ * easily used to define struct xe_rtp_regval entries. Example:
+ *
+ * .. code-block:: c
+ *
+ *	const struct xe_rtp_entry wa_entries[] = {
+ *		...
+ *		{ XE_RTP_NAME("test-entry"),
+ *		  XE_RTP_FLAG(FOREACH_ENGINE, MASKED_REG),
+ *		  ...
+ *		},
+ *		...
+ *	};
+ */
+#define XE_RTP_FLAG(f1_, ...)							\
+	.flags = (CALL_FOR_EACH(__ADD_XE_RTP_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0)
+
+/**
+ * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry entry
+ * @r1: Last part of XE_RTP_MATCH_*
+ * @...: Additional rules, defined like @r1
+ *
+ * At least one rule is needed and up to 4 are supported. Multiple rules are
+ * AND'ed together, i.e. all the rules must evaluate to true for the entry to
+ * be processed. See XE_RTP_MATCH_* for the possible match rules. Example:
+ *
+ * .. code-block:: c
+ *
+ *	const struct xe_rtp_entry wa_entries[] = {
+ *		...
+ *		{ XE_RTP_NAME("test-entry"),
+ *		  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+ *		  ...
+ *		},
+ *		...
+ *	};
+ */
+#define XE_RTP_RULES(r1, ...)							\
+	.n_rules = COUNT_ARGS(r1, ##__VA_ARGS__),				\
+	.rules = (struct xe_rtp_rule[]) {					\
+		CALL_FOR_EACH(__ADD_XE_RTP_RULE_PREFIX, r1, ##__VA_ARGS__)	\
+	}
+
+void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
+		    struct xe_gt *gt, struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
new file mode 100644
index 000000000000..b55b556a2495
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RTP_TYPES_
+#define _XE_RTP_TYPES_
+
+#include <linux/types.h>
+
+#include "i915_reg_defs.h"
+
+struct xe_hw_engine;
+struct xe_gt;
+
+enum {
+	XE_RTP_REG_REGULAR,
+	XE_RTP_REG_MCR,
+};
+
+/**
+ * struct xe_rtp_regval - register and value for rtp table
+ */
+struct xe_rtp_regval {
+	/** @reg: Register */
+	u32		reg;
+	/*
+	 * TODO: maybe we need a union here with a func pointer for cases
+	 * that are too specific to be generalized
+	 */
+	/** @clr_bits: bits to clear when updating register */
+	u32		clr_bits;
+	/** @set_bits: bits to set when updating register */
+	u32		set_bits;
+#define XE_RTP_NOCHECK		.read_mask = 0
+	/** @read_mask: mask for bits to consider when reading value back */
+	u32		read_mask;
+#define XE_RTP_FLAG_FOREACH_ENGINE	BIT(0)
+#define XE_RTP_FLAG_MASKED_REG		BIT(1)
+#define XE_RTP_FLAG_ENGINE_BASE		BIT(2)
+	/** @flags: flags to apply on rule evaluation or action */
+	u8		flags;
+	/** @reg_type: register type, see ``XE_RTP_REG_*`` */
+	u8		reg_type;
+};
+
+enum {
+	XE_RTP_MATCH_PLATFORM,
+	XE_RTP_MATCH_SUBPLATFORM,
+	XE_RTP_MATCH_GRAPHICS_VERSION,
+	XE_RTP_MATCH_GRAPHICS_VERSION_RANGE,
+	XE_RTP_MATCH_MEDIA_VERSION,
+	XE_RTP_MATCH_MEDIA_VERSION_RANGE,
+	XE_RTP_MATCH_INTEGRATED,
+	XE_RTP_MATCH_DISCRETE,
+	XE_RTP_MATCH_STEP,
+	XE_RTP_MATCH_ENGINE_CLASS,
+	XE_RTP_MATCH_NOT_ENGINE_CLASS,
+	XE_RTP_MATCH_FUNC,
+};
+
+/** struct xe_rtp_rule - match rule for processing entry */
+struct xe_rtp_rule {
+	u8 match_type;
+
+	/* match filters */
+	union {
+		/* MATCH_PLATFORM / MATCH_SUBPLATFORM */
+		struct {
+			u8 platform;
+			u8 subplatform;
+		};
+		/*
+		 * MATCH_GRAPHICS_VERSION / XE_RTP_MATCH_GRAPHICS_VERSION_RANGE /
+		 * MATCH_MEDIA_VERSION  / XE_RTP_MATCH_MEDIA_VERSION_RANGE
+		 */
+		struct {
+			u32 ver_start;
+#define XE_RTP_END_VERSION_UNDEFINED	U32_MAX
+			u32 ver_end;
+		};
+		/* MATCH_STEP */
+		struct {
+			u8 step_start;
+			u8 step_end;
+		};
+		/* MATCH_ENGINE_CLASS / MATCH_NOT_ENGINE_CLASS */
+		struct {
+			u8 engine_class;
+		};
+		/* MATCH_FUNC */
+		bool (*match_func)(const struct xe_gt *gt,
+				   const struct xe_hw_engine *hwe);
+	};
+};
+
+/** struct xe_rtp_entry - Entry in an rtp table */
+struct xe_rtp_entry {
+	const char *name;
+	const struct xe_rtp_regval regval;
+	const struct xe_rtp_rule *rules;
+	unsigned int n_rules;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
new file mode 100644
index 000000000000..7403410cd806
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/kernel.h>
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_sa.h"
+
+static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_sa_manager *sa_manager = arg;
+	struct xe_bo *bo = sa_manager->bo;
+
+	if (!bo) {
+		drm_err(drm, "no bo for sa manager\n");
+		return;
+	}
+
+	drm_suballoc_manager_fini(&sa_manager->base);
+
+	if (bo->vmap.is_iomem)
+		kvfree(sa_manager->cpu_ptr);
+
+	xe_bo_unpin_map_no_vm(bo);
+	sa_manager->bo = NULL;
+}
+
+int xe_sa_bo_manager_init(struct xe_gt *gt,
+			  struct xe_sa_manager *sa_manager,
+			  u32 size, u32 align)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 managed_size = size - SZ_4K;
+	struct xe_bo *bo;
+
+	sa_manager->bo = NULL;
+
+	bo = xe_bo_create_pin_map(xe, gt, NULL, size, ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo)) {
+		drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n",
+			PTR_ERR(bo));
+		return PTR_ERR(bo);
+	}
+	sa_manager->bo = bo;
+
+	drm_suballoc_manager_init(&sa_manager->base, managed_size, align);
+	sa_manager->gpu_addr = xe_bo_ggtt_addr(bo);
+
+	if (bo->vmap.is_iomem) {
+		sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL);
+		if (!sa_manager->cpu_ptr) {
+			xe_bo_unpin_map_no_vm(sa_manager->bo);
+			sa_manager->bo = NULL;
+			return -ENOMEM;
+		}
+	} else {
+		sa_manager->cpu_ptr = bo->vmap.vaddr;
+		memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size);
+	}
+
+	return drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini,
+					sa_manager);
+}
+
+struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
+				  unsigned size)
+{
+	return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0);
+}
+
+void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo)
+{
+	struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager);
+	struct xe_device *xe = gt_to_xe(sa_manager->bo->gt);
+
+	if (!sa_manager->bo->vmap.is_iomem)
+		return;
+
+	xe_map_memcpy_to(xe, &sa_manager->bo->vmap, drm_suballoc_soffset(sa_bo),
+			 xe_sa_bo_cpu_addr(sa_bo),
+			 drm_suballoc_size(sa_bo));
+}
+
+void xe_sa_bo_free(struct drm_suballoc *sa_bo,
+		   struct dma_fence *fence)
+{
+	drm_suballoc_free(sa_bo, fence);
+}
diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h
new file mode 100644
index 000000000000..742282ef7179
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_SA_H_
+#define _XE_SA_H_
+
+#include "xe_sa_types.h"
+
+struct dma_fence;
+struct xe_bo;
+struct xe_gt;
+
+int xe_sa_bo_manager_init(struct xe_gt *gt,
+			  struct xe_sa_manager *sa_manager,
+			  u32 size, u32 align);
+
+struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
+				  u32 size);
+void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo);
+void xe_sa_bo_free(struct drm_suballoc *sa_bo,
+		   struct dma_fence *fence);
+
+static inline struct xe_sa_manager *
+to_xe_sa_manager(struct drm_suballoc_manager *mng)
+{
+	return container_of(mng, struct xe_sa_manager, base);
+}
+
+static inline u64 xe_sa_bo_gpu_addr(struct drm_suballoc *sa)
+{
+	return to_xe_sa_manager(sa->manager)->gpu_addr +
+		drm_suballoc_soffset(sa);
+}
+
+static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa)
+{
+	return to_xe_sa_manager(sa->manager)->cpu_ptr +
+		drm_suballoc_soffset(sa);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h
new file mode 100644
index 000000000000..2ef896aeca1d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_SA_TYPES_H_
+#define _XE_SA_TYPES_H_
+
+#include <drm/drm_suballoc.h>
+
+struct xe_bo;
+
+struct xe_sa_manager {
+	struct drm_suballoc_manager base;
+	struct xe_bo *bo;
+	u64 gpu_addr;
+	void *cpu_ptr;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
new file mode 100644
index 000000000000..ab81bfe17e8a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_sched_job.h"
+
+#include <linux/dma-fence-array.h>
+#include <linux/slab.h>
+
+#include "xe_device_types.h"
+#include "xe_engine.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+static struct kmem_cache *xe_sched_job_slab;
+static struct kmem_cache *xe_sched_job_parallel_slab;
+
+int __init xe_sched_job_module_init(void)
+{
+	xe_sched_job_slab =
+		kmem_cache_create("xe_sched_job",
+				  sizeof(struct xe_sched_job) +
+				  sizeof(u64), 0,
+				  SLAB_HWCACHE_ALIGN, NULL);
+	if (!xe_sched_job_slab)
+		return -ENOMEM;
+
+	xe_sched_job_parallel_slab =
+		kmem_cache_create("xe_sched_job_parallel",
+				  sizeof(struct xe_sched_job) +
+				  sizeof(u64) *
+				  XE_HW_ENGINE_MAX_INSTANCE , 0,
+				  SLAB_HWCACHE_ALIGN, NULL);
+	if (!xe_sched_job_parallel_slab) {
+		kmem_cache_destroy(xe_sched_job_slab);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void xe_sched_job_module_exit(void)
+{
+	kmem_cache_destroy(xe_sched_job_slab);
+	kmem_cache_destroy(xe_sched_job_parallel_slab);
+}
+
+static struct xe_sched_job *job_alloc(bool parallel)
+{
+	return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab :
+				 xe_sched_job_slab, GFP_KERNEL);
+}
+
+bool xe_sched_job_is_migration(struct xe_engine *e)
+{
+	return e->vm && (e->vm->flags & XE_VM_FLAG_MIGRATION) &&
+		!(e->flags & ENGINE_FLAG_WA);
+}
+
+static void job_free(struct xe_sched_job *job)
+{
+	struct xe_engine *e = job->engine;
+	bool is_migration = xe_sched_job_is_migration(e);
+
+	kmem_cache_free(xe_engine_is_parallel(job->engine) || is_migration ?
+			xe_sched_job_parallel_slab : xe_sched_job_slab, job);
+}
+
+struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
+					 u64 *batch_addr)
+{
+	struct xe_sched_job *job;
+	struct dma_fence **fences;
+	bool is_migration = xe_sched_job_is_migration(e);
+	int err;
+	int i, j;
+	u32 width;
+
+	/* Migration and kernel engines have their own locking */
+	if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM |
+			  ENGINE_FLAG_WA))) {
+		lockdep_assert_held(&e->vm->lock);
+		if (!xe_vm_no_dma_fences(e->vm))
+			xe_vm_assert_held(e->vm);
+	}
+
+	job = job_alloc(xe_engine_is_parallel(e) || is_migration);
+	if (!job)
+		return ERR_PTR(-ENOMEM);
+
+	job->engine = e;
+	kref_init(&job->refcount);
+	xe_engine_get(job->engine);
+
+	err = drm_sched_job_init(&job->drm, e->entity, 1, NULL);
+	if (err)
+		goto err_free;
+
+	if (!xe_engine_is_parallel(e)) {
+		job->fence = xe_lrc_create_seqno_fence(e->lrc);
+		if (IS_ERR(job->fence)) {
+			err = PTR_ERR(job->fence);
+			goto err_sched_job;
+		}
+	} else {
+		struct dma_fence_array *cf;
+
+		fences = kmalloc_array(e->width, sizeof(*fences), GFP_KERNEL);
+		if (!fences) {
+			err = -ENOMEM;
+			goto err_sched_job;
+		}
+
+		for (j = 0; j < e->width; ++j) {
+			fences[j] = xe_lrc_create_seqno_fence(e->lrc + j);
+			if (IS_ERR(fences[j])) {
+				err = PTR_ERR(fences[j]);
+				goto err_fences;
+			}
+		}
+
+		cf = dma_fence_array_create(e->width, fences,
+					    e->parallel.composite_fence_ctx,
+					    e->parallel.composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--e->parallel.composite_fence_seqno;
+			err = -ENOMEM;
+			goto err_fences;
+		}
+
+		/* Sanity check */
+		for (j = 0; j < e->width; ++j)
+			XE_BUG_ON(cf->base.seqno != fences[j]->seqno);
+
+		job->fence = &cf->base;
+	}
+
+	width = e->width;
+	if (is_migration)
+		width = 2;
+
+	for (i = 0; i < width; ++i)
+		job->batch_addr[i] = batch_addr[i];
+
+	trace_xe_sched_job_create(job);
+	return job;
+
+err_fences:
+	for (j = j - 1; j >= 0; --j) {
+		--e->lrc[j].fence_ctx.next_seqno;
+		dma_fence_put(fences[j]);
+	}
+	kfree(fences);
+err_sched_job:
+	drm_sched_job_cleanup(&job->drm);
+err_free:
+	xe_engine_put(e);
+	job_free(job);
+	return ERR_PTR(err);
+}
+
+/**
+ * xe_sched_job_destroy - Destroy XE schedule job
+ * @ref: reference to XE schedule job
+ *
+ * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup
+ * base DRM schedule job, and free memory for XE schedule job.
+ */
+void xe_sched_job_destroy(struct kref *ref)
+{
+	struct xe_sched_job *job =
+		container_of(ref, struct xe_sched_job, refcount);
+
+	xe_engine_put(job->engine);
+	dma_fence_put(job->fence);
+	drm_sched_job_cleanup(&job->drm);
+	job_free(job);
+}
+
+void xe_sched_job_set_error(struct xe_sched_job *job, int error)
+{
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags))
+		return;
+
+	dma_fence_set_error(job->fence, error);
+
+	if (dma_fence_is_array(job->fence)) {
+		struct dma_fence_array *array =
+			to_dma_fence_array(job->fence);
+		struct dma_fence **child = array->fences;
+		unsigned int nchild = array->num_fences;
+
+		do {
+			struct dma_fence *current_fence = *child++;
+
+			if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				     &current_fence->flags))
+				continue;
+			dma_fence_set_error(current_fence, error);
+		} while (--nchild);
+	}
+
+	trace_xe_sched_job_set_error(job);
+
+	dma_fence_enable_sw_signaling(job->fence);
+	xe_hw_fence_irq_run(job->engine->fence_irq);
+}
+
+bool xe_sched_job_started(struct xe_sched_job *job)
+{
+	struct xe_lrc *lrc = job->engine->lrc;
+
+	return xe_lrc_start_seqno(lrc) >= xe_sched_job_seqno(job);
+}
+
+bool xe_sched_job_completed(struct xe_sched_job *job)
+{
+	struct xe_lrc *lrc = job->engine->lrc;
+
+	/*
+	 * Can safely check just LRC[0] seqno as that is last seqno written when
+	 * parallel handshake is done.
+	 */
+
+	return xe_lrc_seqno(lrc) >= xe_sched_job_seqno(job);
+}
+
+void xe_sched_job_arm(struct xe_sched_job *job)
+{
+	drm_sched_job_arm(&job->drm);
+}
+
+void xe_sched_job_push(struct xe_sched_job *job)
+{
+	xe_sched_job_get(job);
+	trace_xe_sched_job_exec(job);
+	drm_sched_entity_push_job(&job->drm);
+	xe_sched_job_put(job);
+}
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
new file mode 100644
index 000000000000..5315ad8656c2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_SCHED_JOB_H_
+#define _XE_SCHED_JOB_H_
+
+#include "xe_sched_job_types.h"
+
+#define XE_SCHED_HANG_LIMIT 1
+#define XE_SCHED_JOB_TIMEOUT LONG_MAX
+
+int xe_sched_job_module_init(void);
+void xe_sched_job_module_exit(void);
+
+struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
+					 u64 *batch_addr);
+void xe_sched_job_destroy(struct kref *ref);
+
+/**
+ * xe_sched_job_get - get reference to XE schedule job
+ * @job: XE schedule job object
+ *
+ * Increment XE schedule job's reference count
+ */
+static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job)
+{
+	kref_get(&job->refcount);
+	return job;
+}
+
+/**
+ * xe_sched_job_put - put reference to XE schedule job
+ * @job: XE schedule job object
+ *
+ * Decrement XE schedule job's reference count, call xe_sched_job_destroy when
+ * reference count == 0.
+ */
+static inline void xe_sched_job_put(struct xe_sched_job *job)
+{
+	kref_put(&job->refcount, xe_sched_job_destroy);
+}
+
+void xe_sched_job_set_error(struct xe_sched_job *job, int error);
+static inline bool xe_sched_job_is_error(struct xe_sched_job *job)
+{
+	return job->fence->error < 0;
+}
+
+bool xe_sched_job_started(struct xe_sched_job *job);
+bool xe_sched_job_completed(struct xe_sched_job *job);
+
+void xe_sched_job_arm(struct xe_sched_job *job);
+void xe_sched_job_push(struct xe_sched_job *job);
+
+static inline struct xe_sched_job *
+to_xe_sched_job(struct drm_sched_job *drm)
+{
+	return container_of(drm, struct xe_sched_job, drm);
+}
+
+static inline u32 xe_sched_job_seqno(struct xe_sched_job *job)
+{
+	return job->fence->seqno;
+}
+
+static inline void
+xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags)
+{
+	job->migrate_flush_flags = flags;
+}
+
+bool xe_sched_job_is_migration(struct xe_engine *e);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
new file mode 100644
index 000000000000..fd1d75996127
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_SCHED_JOB_TYPES_H_
+#define _XE_SCHED_JOB_TYPES_H_
+
+#include <linux/kref.h>
+
+#include <drm/gpu_scheduler.h>
+
+struct xe_engine;
+
+/**
+ * struct xe_sched_job - XE schedule job (batch buffer tracking)
+ */
+struct xe_sched_job {
+	/** @drm: base DRM scheduler job */
+	struct drm_sched_job drm;
+	/** @engine: XE submission engine */
+	struct xe_engine *engine;
+	/** @refcount: ref count of this job */
+	struct kref refcount;
+	/**
+	 * @fence: dma fence to indicate completion. 1 way relationship - job
+	 * can safely reference fence, fence cannot safely reference job.
+	 */
+#define JOB_FLAG_SUBMIT		DMA_FENCE_FLAG_USER_BITS
+	struct dma_fence *fence;
+	/** @user_fence: write back value when BB is complete */
+	struct {
+		/** @used: user fence is used */
+		bool used;
+		/** @addr: address to write to */
+		u64 addr;
+		/** @value: write back value */
+		u64 value;
+	} user_fence;
+	/** @migrate_flush_flags: Additional flush flags for migration jobs */
+	u32 migrate_flush_flags;
+	/** @batch_addr: batch buffer address of job */
+	u64 batch_addr[0];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
new file mode 100644
index 000000000000..ca77d0971529
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_step.h"
+
+#include "xe_device.h"
+#include "xe_platform_types.h"
+
+/*
+ * Provide mapping between PCI's revision ID to the individual GMD
+ * (Graphics/Media/Display) stepping values that can be compared numerically.
+ *
+ * Some platforms may have unusual ways of mapping PCI revision ID to GMD
+ * steppings.  E.g., in some cases a higher PCI revision may translate to a
+ * lower stepping of the GT and/or display IP.
+ *
+ * Also note that some revisions/steppings may have been set aside as
+ * placeholders but never materialized in real hardware; in those cases there
+ * may be jumps in the revision IDs or stepping values in the tables below.
+ */
+
+/*
+ * Some platforms always have the same stepping value for GT and display;
+ * use a macro to define these to make it easier to identify the platforms
+ * where the two steppings can deviate.
+ */
+#define COMMON_GT_MEDIA_STEP(x_)	\
+	.graphics = STEP_##x_,		\
+	.media = STEP_##x_
+
+#define COMMON_STEP(x_)			\
+	COMMON_GT_MEDIA_STEP(x_),	\
+	.graphics = STEP_##x_,		\
+	.media = STEP_##x_,		\
+	.display = STEP_##x_
+
+__diag_push();
+__diag_ignore_all("-Woverride-init", "Allow field overrides in table");
+
+/* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */
+static const struct xe_step_info tgl_revids[] = {
+	[0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 },
+	[1] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_D0 },
+};
+
+static const struct xe_step_info dg1_revids[] = {
+	[0] = { COMMON_STEP(A0) },
+	[1] = { COMMON_STEP(B0) },
+};
+
+static const struct xe_step_info adls_revids[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
+	[0x1] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A2 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 },
+	[0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_B0 },
+	[0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 },
+};
+
+static const struct xe_step_info dg2_g10_revid_step_tbl[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
+	[0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 },
+	[0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 },
+};
+
+static const struct xe_step_info dg2_g11_revid_step_tbl[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_C0 },
+	[0x5] = { COMMON_GT_MEDIA_STEP(B1), .display = STEP_C0 },
+};
+
+static const struct xe_step_info dg2_g12_revid_step_tbl[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_C0 },
+	[0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_C0 },
+};
+
+static const struct xe_step_info pvc_revid_step_tbl[] = {
+	[0x3] = { .graphics = STEP_A0 },
+	[0x5] = { .graphics = STEP_B0 },
+	[0x6] = { .graphics = STEP_B1 },
+	[0x7] = { .graphics = STEP_C0 },
+};
+
+static const int pvc_basedie_subids[] = {
+	[0x0] = STEP_A0,
+	[0x3] = STEP_B0,
+	[0x4] = STEP_B1,
+	[0x5] = STEP_B3,
+};
+
+__diag_pop();
+
+struct xe_step_info xe_step_get(struct xe_device *xe)
+{
+	const struct xe_step_info *revids = NULL;
+	struct xe_step_info step = {};
+	u16 revid = xe->info.revid;
+	int size = 0;
+	const int *basedie_info = NULL;
+	int basedie_size = 0;
+	int baseid = 0;
+
+	if (xe->info.platform == XE_PVC) {
+		baseid = FIELD_GET(GENMASK(5, 3), xe->info.revid);
+		revid = FIELD_GET(GENMASK(2, 0), xe->info.revid);
+		revids = pvc_revid_step_tbl;
+		size = ARRAY_SIZE(pvc_revid_step_tbl);
+		basedie_info = pvc_basedie_subids;
+		basedie_size = ARRAY_SIZE(pvc_basedie_subids);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10) {
+		revids = dg2_g10_revid_step_tbl;
+		size = ARRAY_SIZE(dg2_g10_revid_step_tbl);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G11) {
+		revids = dg2_g11_revid_step_tbl;
+		size = ARRAY_SIZE(dg2_g11_revid_step_tbl);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G12) {
+		revids = dg2_g12_revid_step_tbl;
+		size = ARRAY_SIZE(dg2_g12_revid_step_tbl);
+	} else if (xe->info.platform == XE_ALDERLAKE_S) {
+		revids = adls_revids;
+		size = ARRAY_SIZE(adls_revids);
+	} else if (xe->info.platform == XE_DG1) {
+		revids = dg1_revids;
+		size = ARRAY_SIZE(dg1_revids);
+	} else if (xe->info.platform == XE_TIGERLAKE) {
+		revids = tgl_revids;
+		size = ARRAY_SIZE(tgl_revids);
+	}
+
+	/* Not using the stepping scheme for the platform yet. */
+	if (!revids)
+		return step;
+
+	if (revid < size && revids[revid].graphics != STEP_NONE) {
+		step = revids[revid];
+	} else {
+		drm_warn(&xe->drm, "Unknown revid 0x%02x\n", revid);
+
+		/*
+		 * If we hit a gap in the revid array, use the information for
+		 * the next revid.
+		 *
+		 * This may be wrong in all sorts of ways, especially if the
+		 * steppings in the array are not monotonically increasing, but
+		 * it's better than defaulting to 0.
+		 */
+		while (revid < size && revids[revid].graphics == STEP_NONE)
+			revid++;
+
+		if (revid < size) {
+			drm_dbg(&xe->drm, "Using steppings for revid 0x%02x\n",
+				revid);
+			step = revids[revid];
+		} else {
+			drm_dbg(&xe->drm, "Using future steppings\n");
+			step.graphics = STEP_FUTURE;
+			step.display = STEP_FUTURE;
+		}
+	}
+
+	drm_WARN_ON(&xe->drm, step.graphics == STEP_NONE);
+
+	if (basedie_info && basedie_size) {
+		if (baseid < basedie_size && basedie_info[baseid] != STEP_NONE) {
+			step.basedie = basedie_info[baseid];
+		} else {
+			drm_warn(&xe->drm, "Unknown baseid 0x%02x\n", baseid);
+			step.basedie = STEP_FUTURE;
+		}
+	}
+
+	return step;
+}
+
+#define STEP_NAME_CASE(name)	\
+	case STEP_##name:	\
+		return #name;
+
+const char *xe_step_name(enum xe_step step)
+{
+	switch (step) {
+	STEP_NAME_LIST(STEP_NAME_CASE);
+
+	default:
+		return "**";
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h
new file mode 100644
index 000000000000..0c596c8579fb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_STEP_H_
+#define _XE_STEP_H_
+
+#include <linux/types.h>
+
+#include "xe_step_types.h"
+
+struct xe_device;
+
+struct xe_step_info xe_step_get(struct xe_device *xe);
+const char *xe_step_name(enum xe_step step);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h
new file mode 100644
index 000000000000..b7859f9647ca
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step_types.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_STEP_TYPES_H_
+#define _XE_STEP_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_step_info {
+	u8 graphics;
+	u8 media;
+	u8 display;
+	u8 basedie;
+};
+
+#define STEP_ENUM_VAL(name)  STEP_##name,
+
+#define STEP_NAME_LIST(func)		\
+	func(A0)			\
+	func(A1)			\
+	func(A2)			\
+	func(B0)			\
+	func(B1)			\
+	func(B2)			\
+	func(B3)			\
+	func(C0)			\
+	func(C1)			\
+	func(D0)			\
+	func(D1)			\
+	func(E0)			\
+	func(F0)			\
+	func(G0)			\
+	func(H0)			\
+	func(I0)			\
+	func(I1)			\
+	func(J0)
+
+/*
+ * Symbolic steppings that do not match the hardware. These are valid both as gt
+ * and display steppings as symbolic names.
+ */
+enum xe_step {
+	STEP_NONE = 0,
+	STEP_NAME_LIST(STEP_ENUM_VAL)
+	STEP_FUTURE,
+	STEP_FOREVER,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
new file mode 100644
index 000000000000..0fbd8d0978cf
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_sync.h"
+
+#include <linux/kthread.h>
+#include <linux/sched/mm.h>
+#include <linux/uaccess.h>
+#include <drm/xe_drm.h>
+#include <drm/drm_print.h>
+#include <drm/drm_syncobj.h>
+
+#include "xe_device_types.h"
+#include "xe_sched_job_types.h"
+#include "xe_macros.h"
+
+#define SYNC_FLAGS_TYPE_MASK 0x3
+#define SYNC_FLAGS_FENCE_INSTALLED	0x10000
+
+struct user_fence {
+	struct xe_device *xe;
+	struct kref refcount;
+	struct dma_fence_cb cb;
+	struct work_struct worker;
+	struct mm_struct *mm;
+	u64 __user *addr;
+	u64 value;
+};
+
+static void user_fence_destroy(struct kref *kref)
+{
+	struct user_fence *ufence = container_of(kref, struct user_fence,
+						 refcount);
+
+	mmdrop(ufence->mm);
+	kfree(ufence);
+}
+
+static void user_fence_get(struct user_fence *ufence)
+{
+	kref_get(&ufence->refcount);
+}
+
+static void user_fence_put(struct user_fence *ufence)
+{
+	kref_put(&ufence->refcount, user_fence_destroy);
+}
+
+static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr,
+					    u64 value)
+{
+	struct user_fence *ufence;
+
+	ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
+	if (!ufence)
+		return NULL;
+
+	ufence->xe = xe;
+	kref_init(&ufence->refcount);
+	ufence->addr = u64_to_user_ptr(addr);
+	ufence->value = value;
+	ufence->mm = current->mm;
+	mmgrab(ufence->mm);
+
+	return ufence;
+}
+
+static void user_fence_worker(struct work_struct *w)
+{
+	struct user_fence *ufence = container_of(w, struct user_fence, worker);
+
+	if (mmget_not_zero(ufence->mm)) {
+		kthread_use_mm(ufence->mm);
+		if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value)))
+			XE_WARN_ON("Copy to user failed");
+		kthread_unuse_mm(ufence->mm);
+		mmput(ufence->mm);
+	}
+
+	wake_up_all(&ufence->xe->ufence_wq);
+	user_fence_put(ufence);
+}
+
+static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence)
+{
+	INIT_WORK(&ufence->worker, user_fence_worker);
+	queue_work(ufence->xe->ordered_wq, &ufence->worker);
+	dma_fence_put(fence);
+}
+
+static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct user_fence *ufence = container_of(cb, struct user_fence, cb);
+
+	kick_ufence(ufence, fence);
+}
+
+int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
+			struct xe_sync_entry *sync,
+			struct drm_xe_sync __user *sync_user,
+			bool exec, bool no_dma_fences)
+{
+	struct drm_xe_sync sync_in;
+	int err;
+
+	if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user)))
+		return -EFAULT;
+
+	if (XE_IOCTL_ERR(xe, sync_in.flags &
+			 ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL)))
+		return -EINVAL;
+
+	switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) {
+	case DRM_XE_SYNC_SYNCOBJ:
+		if (XE_IOCTL_ERR(xe, no_dma_fences))
+			return -ENOTSUPP;
+
+		if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr)))
+			return -EINVAL;
+
+		sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle);
+		if (XE_IOCTL_ERR(xe, !sync->syncobj))
+			return -ENOENT;
+
+		if (!(sync_in.flags & DRM_XE_SYNC_SIGNAL)) {
+			sync->fence = drm_syncobj_fence_get(sync->syncobj);
+			if (XE_IOCTL_ERR(xe, !sync->fence))
+				return -EINVAL;
+		}
+		break;
+
+	case DRM_XE_SYNC_TIMELINE_SYNCOBJ:
+		if (XE_IOCTL_ERR(xe, no_dma_fences))
+			return -ENOTSUPP;
+
+		if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr)))
+			return -EINVAL;
+
+		if (XE_IOCTL_ERR(xe, sync_in.timeline_value == 0))
+			return -EINVAL;
+
+		sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle);
+		if (XE_IOCTL_ERR(xe, !sync->syncobj))
+			return -ENOENT;
+
+		if (sync_in.flags & DRM_XE_SYNC_SIGNAL) {
+			sync->chain_fence = dma_fence_chain_alloc();
+			if (!sync->chain_fence)
+				return -ENOMEM;
+		} else {
+			sync->fence = drm_syncobj_fence_get(sync->syncobj);
+			if (XE_IOCTL_ERR(xe, !sync->fence))
+				return -EINVAL;
+
+			err = dma_fence_chain_find_seqno(&sync->fence,
+							 sync_in.timeline_value);
+			if (err)
+				return err;
+		}
+		break;
+
+	case DRM_XE_SYNC_DMA_BUF:
+		if (XE_IOCTL_ERR(xe, "TODO"))
+			return -EINVAL;
+		break;
+
+	case DRM_XE_SYNC_USER_FENCE:
+		if (XE_IOCTL_ERR(xe, !(sync_in.flags & DRM_XE_SYNC_SIGNAL)))
+			return -ENOTSUPP;
+
+		if (XE_IOCTL_ERR(xe, sync_in.addr & 0x7))
+			return -EINVAL;
+
+		if (exec) {
+			sync->addr = sync_in.addr;
+		} else {
+			sync->ufence = user_fence_create(xe, sync_in.addr,
+							 sync_in.timeline_value);
+			if (XE_IOCTL_ERR(xe, !sync->ufence))
+				return -ENOMEM;
+		}
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	sync->flags = sync_in.flags;
+	sync->timeline_value = sync_in.timeline_value;
+
+	return 0;
+}
+
+int xe_sync_entry_wait(struct xe_sync_entry *sync)
+{
+	if (sync->fence)
+		dma_fence_wait(sync->fence, true);
+
+	return 0;
+}
+
+int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
+{
+	int err;
+
+	if (sync->fence) {
+		err = drm_sched_job_add_dependency(&job->drm,
+						   dma_fence_get(sync->fence));
+		if (err) {
+			dma_fence_put(sync->fence);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
+			  struct dma_fence *fence)
+{
+	if (!(sync->flags & DRM_XE_SYNC_SIGNAL) ||
+	    sync->flags & SYNC_FLAGS_FENCE_INSTALLED)
+		return false;
+
+	if (sync->chain_fence) {
+		drm_syncobj_add_point(sync->syncobj, sync->chain_fence,
+				      fence, sync->timeline_value);
+		/*
+		 * The chain's ownership is transferred to the
+		 * timeline.
+		 */
+		sync->chain_fence = NULL;
+	} else if (sync->syncobj) {
+		drm_syncobj_replace_fence(sync->syncobj, fence);
+	} else if (sync->ufence) {
+		int err;
+
+		dma_fence_get(fence);
+		user_fence_get(sync->ufence);
+		err = dma_fence_add_callback(fence, &sync->ufence->cb,
+					     user_fence_cb);
+		if (err == -ENOENT) {
+			kick_ufence(sync->ufence, fence);
+		} else if (err) {
+			XE_WARN_ON("failed to add user fence");
+			user_fence_put(sync->ufence);
+			dma_fence_put(fence);
+		}
+	} else if ((sync->flags & SYNC_FLAGS_TYPE_MASK) ==
+		   DRM_XE_SYNC_USER_FENCE) {
+		job->user_fence.used = true;
+		job->user_fence.addr = sync->addr;
+		job->user_fence.value = sync->timeline_value;
+	}
+
+	/* TODO: external BO? */
+
+	sync->flags |= SYNC_FLAGS_FENCE_INSTALLED;
+
+	return true;
+}
+
+void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
+{
+	if (sync->syncobj)
+		drm_syncobj_put(sync->syncobj);
+	if (sync->fence)
+		dma_fence_put(sync->fence);
+	if (sync->chain_fence)
+		dma_fence_put(&sync->chain_fence->base);
+	if (sync->ufence)
+		user_fence_put(sync->ufence);
+}
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
new file mode 100644
index 000000000000..4cbcf7a19911
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_SYNC_H_
+#define _XE_SYNC_H_
+
+#include "xe_sync_types.h"
+
+struct xe_device;
+struct xe_file;
+struct xe_sched_job;
+
+int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
+			struct xe_sync_entry *sync,
+			struct drm_xe_sync __user *sync_user,
+			bool exec, bool compute_mode);
+int xe_sync_entry_wait(struct xe_sync_entry *sync);
+int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
+			   struct xe_sched_job *job);
+bool xe_sync_entry_signal(struct xe_sync_entry *sync,
+			  struct xe_sched_job *job,
+			  struct dma_fence *fence);
+void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h
new file mode 100644
index 000000000000..24fccc26cb53
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync_types.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_SYNC_TYPES_H_
+#define _XE_SYNC_TYPES_H_
+
+#include <linux/types.h>
+
+struct drm_syncobj;
+struct dma_fence;
+struct dma_fence_chain;
+struct drm_xe_sync;
+struct user_fence;
+
+struct xe_sync_entry {
+	struct drm_syncobj *syncobj;
+	struct dma_fence *fence;
+	struct dma_fence_chain *chain_fence;
+	struct user_fence *ufence;
+	u64 addr;
+	u64 timeline_value;
+	u32 flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_trace.c b/drivers/gpu/drm/xe/xe_trace.c
new file mode 100644
index 000000000000..2570c0b859c4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_trace.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "xe_trace.h"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
new file mode 100644
index 000000000000..a5f963f1f6eb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -0,0 +1,513 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM xe
+
+#if !defined(_XE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _XE_TRACE_H_
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include "xe_bo_types.h"
+#include "xe_engine_types.h"
+#include "xe_gpu_scheduler_types.h"
+#include "xe_gt_types.h"
+#include "xe_guc_engine_types.h"
+#include "xe_sched_job.h"
+#include "xe_vm_types.h"
+
+DECLARE_EVENT_CLASS(xe_bo,
+		    TP_PROTO(struct xe_bo *bo),
+		    TP_ARGS(bo),
+
+		    TP_STRUCT__entry(
+			     __field(size_t, size)
+			     __field(u32, flags)
+			     __field(u64, vm)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->size = bo->size;
+			   __entry->flags = bo->flags;
+			   __entry->vm = (u64)bo->vm;
+			   ),
+
+		    TP_printk("size=%ld, flags=0x%02x, vm=0x%016llx",
+			      __entry->size, __entry->flags, __entry->vm)
+);
+
+DEFINE_EVENT(xe_bo, xe_bo_cpu_fault,
+	     TP_PROTO(struct xe_bo *bo),
+	     TP_ARGS(bo)
+);
+
+DEFINE_EVENT(xe_bo, xe_bo_move,
+	     TP_PROTO(struct xe_bo *bo),
+	     TP_ARGS(bo)
+);
+
+DECLARE_EVENT_CLASS(xe_engine,
+		    TP_PROTO(struct xe_engine *e),
+		    TP_ARGS(e),
+
+		    TP_STRUCT__entry(
+			     __field(enum xe_engine_class, class)
+			     __field(u32, logical_mask)
+			     __field(u8, gt_id)
+			     __field(u16, width)
+			     __field(u16, guc_id)
+			     __field(u32, guc_state)
+			     __field(u32, flags)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->class = e->class;
+			   __entry->logical_mask = e->logical_mask;
+			   __entry->gt_id = e->gt->info.id;
+			   __entry->width = e->width;
+			   __entry->guc_id = e->guc->id;
+			   __entry->guc_state = atomic_read(&e->guc->state);
+			   __entry->flags = e->flags;
+			   ),
+
+		    TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x",
+			      __entry->class, __entry->logical_mask,
+			      __entry->gt_id, __entry->width, __entry->guc_id,
+			      __entry->guc_state, __entry->flags)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_create,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_supress_resume,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_submit,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_scheduling_enable,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_scheduling_disable,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_scheduling_done,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_register,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_deregister,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_deregister_done,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_close,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_kill,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_cleanup_entity,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_destroy,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_reset,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_memory_cat_error,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_stop,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_resubmit,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DECLARE_EVENT_CLASS(xe_sched_job,
+		    TP_PROTO(struct xe_sched_job *job),
+		    TP_ARGS(job),
+
+		    TP_STRUCT__entry(
+			     __field(u32, seqno)
+			     __field(u16, guc_id)
+			     __field(u32, guc_state)
+			     __field(u32, flags)
+			     __field(int, error)
+			     __field(u64, fence)
+			     __field(u64, batch_addr)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->seqno = xe_sched_job_seqno(job);
+			   __entry->guc_id = job->engine->guc->id;
+			   __entry->guc_state =
+			   atomic_read(&job->engine->guc->state);
+			   __entry->flags = job->engine->flags;
+			   __entry->error = job->fence->error;
+			   __entry->fence = (u64)job->fence;
+			   __entry->batch_addr = (u64)job->batch_addr[0];
+			   ),
+
+		    TP_printk("fence=0x%016llx, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
+			      __entry->fence, __entry->seqno, __entry->guc_id,
+			      __entry->batch_addr, __entry->guc_state,
+			      __entry->flags, __entry->error)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_create,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_exec,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_run,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_free,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_timedout,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_set_error,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_ban,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DECLARE_EVENT_CLASS(xe_sched_msg,
+		    TP_PROTO(struct xe_sched_msg *msg),
+		    TP_ARGS(msg),
+
+		    TP_STRUCT__entry(
+			     __field(u32, opcode)
+			     __field(u16, guc_id)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->opcode = msg->opcode;
+			   __entry->guc_id =
+			   ((struct xe_engine *)msg->private_data)->guc->id;
+			   ),
+
+		    TP_printk("guc_id=%d, opcode=%u", __entry->guc_id,
+			      __entry->opcode)
+);
+
+DEFINE_EVENT(xe_sched_msg, xe_sched_msg_add,
+	     TP_PROTO(struct xe_sched_msg *msg),
+	     TP_ARGS(msg)
+);
+
+DEFINE_EVENT(xe_sched_msg, xe_sched_msg_recv,
+	     TP_PROTO(struct xe_sched_msg *msg),
+	     TP_ARGS(msg)
+);
+
+DECLARE_EVENT_CLASS(xe_hw_fence,
+		    TP_PROTO(struct xe_hw_fence *fence),
+		    TP_ARGS(fence),
+
+		    TP_STRUCT__entry(
+			     __field(u64, ctx)
+			     __field(u32, seqno)
+			     __field(u64, fence)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->ctx = fence->dma.context;
+			   __entry->seqno = fence->dma.seqno;
+			   __entry->fence = (u64)fence;
+			   ),
+
+		    TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u",
+			      __entry->ctx, __entry->fence, __entry->seqno)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_create,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_signal,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_try_signal,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_free,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DECLARE_EVENT_CLASS(xe_vma,
+		    TP_PROTO(struct xe_vma *vma),
+		    TP_ARGS(vma),
+
+		    TP_STRUCT__entry(
+			     __field(u64, vma)
+			     __field(u32, asid)
+			     __field(u64, start)
+			     __field(u64, end)
+			     __field(u64, ptr)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->vma = (u64)vma;
+			   __entry->asid = vma->vm->usm.asid;
+			   __entry->start = vma->start;
+			   __entry->end = vma->end;
+			   __entry->ptr = (u64)vma->userptr.ptr;
+			   ),
+
+		    TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,",
+			      __entry->vma, __entry->asid, __entry->start,
+			      __entry->end, __entry->ptr)
+)
+
+DEFINE_EVENT(xe_vma, xe_vma_flush,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_pagefault,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_acc,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_fail,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_bind,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_pf_bind,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_unbind,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_rebind_worker,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_rebind_exec,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_usm_invalidate,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_evict,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DECLARE_EVENT_CLASS(xe_vm,
+		    TP_PROTO(struct xe_vm *vm),
+		    TP_ARGS(vm),
+
+		    TP_STRUCT__entry(
+			     __field(u64, vm)
+			     __field(u32, asid)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->vm = (u64)vm;
+			   __entry->asid = vm->usm.asid;
+			   ),
+
+		    TP_printk("vm=0x%016llx, asid=0x%05x",  __entry->vm,
+			      __entry->asid)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_create,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_free,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_cpu_bind,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_restart,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+TRACE_EVENT(xe_guc_ct_h2g_flow_control,
+	    TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+	    TP_ARGS(_head, _tail, size, space, len),
+
+	    TP_STRUCT__entry(
+		     __field(u32, _head)
+		     __field(u32, _tail)
+		     __field(u32, size)
+		     __field(u32, space)
+		     __field(u32, len)
+		     ),
+
+	    TP_fast_assign(
+		   __entry->_head = _head;
+		   __entry->_tail = _tail;
+		   __entry->size = size;
+		   __entry->space = space;
+		   __entry->len = len;
+		   ),
+
+	    TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u",
+		      __entry->_head, __entry->_tail, __entry->size,
+		      __entry->space, __entry->len)
+);
+
+TRACE_EVENT(xe_guc_ct_g2h_flow_control,
+	    TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+	    TP_ARGS(_head, _tail, size, space, len),
+
+	    TP_STRUCT__entry(
+		     __field(u32, _head)
+		     __field(u32, _tail)
+		     __field(u32, size)
+		     __field(u32, space)
+		     __field(u32, len)
+		     ),
+
+	    TP_fast_assign(
+		   __entry->_head = _head;
+		   __entry->_tail = _tail;
+		   __entry->size = size;
+		   __entry->space = space;
+		   __entry->len = len;
+		   ),
+
+	    TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u",
+		      __entry->_head, __entry->_tail, __entry->size,
+		      __entry->space, __entry->len)
+);
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe
+#define TRACE_INCLUDE_FILE xe_trace
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c
new file mode 100644
index 000000000000..a0ba8bba84d1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2022 Intel Corporation
+ * Copyright (C) 2021-2002 Red Hat
+ */
+
+#include <drm/drm_managed.h>
+
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_ttm_gtt_mgr.h"
+
+struct xe_ttm_gtt_node {
+	struct ttm_buffer_object *tbo;
+	struct ttm_range_mgr_node base;
+};
+
+static inline struct xe_ttm_gtt_mgr *
+to_gtt_mgr(struct ttm_resource_manager *man)
+{
+	return container_of(man, struct xe_ttm_gtt_mgr, manager);
+}
+
+static inline struct xe_ttm_gtt_node *
+to_xe_ttm_gtt_node(struct ttm_resource *res)
+{
+	return container_of(res, struct xe_ttm_gtt_node, base.base);
+}
+
+static int xe_ttm_gtt_mgr_new(struct ttm_resource_manager *man,
+			      struct ttm_buffer_object *tbo,
+			      const struct ttm_place *place,
+			      struct ttm_resource **res)
+{
+	struct xe_ttm_gtt_node *node;
+	int r;
+
+	node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	node->tbo = tbo;
+	ttm_resource_init(tbo, place, &node->base.base);
+
+	if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
+	    ttm_resource_manager_usage(man) > (man->size << PAGE_SHIFT)) {
+		r = -ENOSPC;
+		goto err_fini;
+	}
+
+	node->base.mm_nodes[0].start = 0;
+	node->base.mm_nodes[0].size = PFN_UP(node->base.base.size);
+	node->base.base.start = XE_BO_INVALID_OFFSET;
+
+	*res = &node->base.base;
+
+	return 0;
+
+err_fini:
+	ttm_resource_fini(man, &node->base.base);
+	kfree(node);
+	return r;
+}
+
+static void xe_ttm_gtt_mgr_del(struct ttm_resource_manager *man,
+			       struct ttm_resource *res)
+{
+	struct xe_ttm_gtt_node *node = to_xe_ttm_gtt_node(res);
+
+	ttm_resource_fini(man, res);
+	kfree(node);
+}
+
+static void xe_ttm_gtt_mgr_debug(struct ttm_resource_manager *man,
+				 struct drm_printer *printer)
+{
+
+}
+
+static const struct ttm_resource_manager_func xe_ttm_gtt_mgr_func = {
+	.alloc = xe_ttm_gtt_mgr_new,
+	.free = xe_ttm_gtt_mgr_del,
+	.debug = xe_ttm_gtt_mgr_debug
+};
+
+static void ttm_gtt_mgr_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_ttm_gtt_mgr *mgr = arg;
+	struct xe_device *xe = gt_to_xe(mgr->gt);
+	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
+
+	ttm_resource_manager_set_used(man, false);
+
+	err = ttm_resource_manager_evict_all(&xe->ttm, man);
+	if (err)
+		return;
+
+	ttm_resource_manager_cleanup(man);
+	ttm_set_driver_manager(&xe->ttm, XE_PL_TT, NULL);
+}
+
+int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr,
+			u64 gtt_size)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	mgr->gt = gt;
+	man->use_tt = true;
+	man->func = &xe_ttm_gtt_mgr_func;
+
+	ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT);
+
+	ttm_set_driver_manager(&xe->ttm, XE_PL_TT, &mgr->manager);
+	ttm_resource_manager_set_used(man, true);
+
+	err = drmm_add_action_or_reset(&xe->drm, ttm_gtt_mgr_fini, mgr);
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h
new file mode 100644
index 000000000000..d1d57cb9c2b8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTGM_GTT_MGR_H_
+#define _XE_TTGM_GTT_MGR_H_
+
+#include "xe_ttm_gtt_mgr_types.h"
+
+struct xe_gt;
+
+int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr,
+			u64 gtt_size);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h
new file mode 100644
index 000000000000..c66737488326
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_GTT_MGR_TYPES_H_
+#define _XE_TTM_GTT_MGR_TYPES_H_
+
+#include <drm/ttm/ttm_device.h>
+
+struct xe_gt;
+
+struct xe_ttm_gtt_mgr {
+	struct xe_gt *gt;
+	struct ttm_resource_manager manager;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
new file mode 100644
index 000000000000..e391e81d3640
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2022 Intel Corporation
+ * Copyright (C) 2021-2002 Red Hat
+ */
+
+#include <drm/drm_managed.h>
+
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_placement.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_res_cursor.h"
+#include "xe_ttm_vram_mgr.h"
+
+static inline struct xe_ttm_vram_mgr *
+to_vram_mgr(struct ttm_resource_manager *man)
+{
+	return container_of(man, struct xe_ttm_vram_mgr, manager);
+}
+
+static inline struct xe_gt *
+mgr_to_gt(struct xe_ttm_vram_mgr *mgr)
+{
+	return mgr->gt;
+}
+
+static inline struct drm_buddy_block *
+xe_ttm_vram_mgr_first_block(struct list_head *list)
+{
+	return list_first_entry_or_null(list, struct drm_buddy_block, link);
+}
+
+static inline bool xe_is_vram_mgr_blocks_contiguous(struct list_head *head)
+{
+	struct drm_buddy_block *block;
+	u64 start, size;
+
+	block = xe_ttm_vram_mgr_first_block(head);
+	if (!block)
+		return false;
+
+	while (head != block->link.next) {
+		start = xe_ttm_vram_mgr_block_start(block);
+		size = xe_ttm_vram_mgr_block_size(block);
+
+		block = list_entry(block->link.next, struct drm_buddy_block,
+				   link);
+		if (start + size != xe_ttm_vram_mgr_block_start(block))
+			return false;
+	}
+
+	return true;
+}
+
+static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
+			       struct ttm_buffer_object *tbo,
+			       const struct ttm_place *place,
+			       struct ttm_resource **res)
+{
+	u64 max_bytes, cur_size, min_block_size;
+	struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man);
+	struct xe_ttm_vram_mgr_resource *vres;
+	u64 size, remaining_size, lpfn, fpfn;
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
+	unsigned long pages_per_block;
+	int r;
+
+	lpfn = (u64)place->lpfn << PAGE_SHIFT;
+	if (!lpfn)
+		lpfn = man->size;
+
+	fpfn = (u64)place->fpfn << PAGE_SHIFT;
+
+	max_bytes = mgr->gt->mem.vram.size;
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		pages_per_block = ~0ul;
+	} else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+		pages_per_block = HPAGE_PMD_NR;
+#else
+		/* default to 2MB */
+		pages_per_block = 2UL << (20UL - PAGE_SHIFT);
+#endif
+
+		pages_per_block = max_t(uint32_t, pages_per_block,
+					tbo->page_alignment);
+	}
+
+	vres = kzalloc(sizeof(*vres), GFP_KERNEL);
+	if (!vres)
+		return -ENOMEM;
+
+	ttm_resource_init(tbo, place, &vres->base);
+	remaining_size = vres->base.size;
+
+	/* bail out quickly if there's likely not enough VRAM for this BO */
+	if (ttm_resource_manager_usage(man) > max_bytes) {
+		r = -ENOSPC;
+		goto error_fini;
+	}
+
+	INIT_LIST_HEAD(&vres->blocks);
+
+	if (place->flags & TTM_PL_FLAG_TOPDOWN)
+		vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
+	if (fpfn || lpfn != man->size)
+		/* Allocate blocks in desired range */
+		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
+	mutex_lock(&mgr->lock);
+	while (remaining_size) {
+		if (tbo->page_alignment)
+			min_block_size = tbo->page_alignment << PAGE_SHIFT;
+		else
+			min_block_size = mgr->default_page_size;
+
+		XE_BUG_ON(min_block_size < mm->chunk_size);
+
+		/* Limit maximum size to 2GiB due to SG table limitations */
+		size = min(remaining_size, 2ULL << 30);
+
+		if (size >= pages_per_block << PAGE_SHIFT)
+			min_block_size = pages_per_block << PAGE_SHIFT;
+
+		cur_size = size;
+
+		if (fpfn + size != place->lpfn << PAGE_SHIFT) {
+			/*
+			 * Except for actual range allocation, modify the size and
+			 * min_block_size conforming to continuous flag enablement
+			 */
+			if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+				size = roundup_pow_of_two(size);
+				min_block_size = size;
+			/*
+			 * Modify the size value if size is not
+			 * aligned with min_block_size
+			 */
+			} else if (!IS_ALIGNED(size, min_block_size)) {
+				size = round_up(size, min_block_size);
+			}
+		}
+
+		r = drm_buddy_alloc_blocks(mm, fpfn,
+					   lpfn,
+					   size,
+					   min_block_size,
+					   &vres->blocks,
+					   vres->flags);
+		if (unlikely(r))
+			goto error_free_blocks;
+
+		if (size > remaining_size)
+			remaining_size = 0;
+		else
+			remaining_size -= size;
+	}
+	mutex_unlock(&mgr->lock);
+
+	if (cur_size != size) {
+		struct drm_buddy_block *block;
+		struct list_head *trim_list;
+		u64 original_size;
+		LIST_HEAD(temp);
+
+		trim_list = &vres->blocks;
+		original_size = vres->base.size;
+
+		/*
+		 * If size value is rounded up to min_block_size, trim the last
+		 * block to the required size
+		 */
+		if (!list_is_singular(&vres->blocks)) {
+			block = list_last_entry(&vres->blocks, typeof(*block), link);
+			list_move_tail(&block->link, &temp);
+			trim_list = &temp;
+			/*
+			 * Compute the original_size value by subtracting the
+			 * last block size with (aligned size - original size)
+			 */
+			original_size = xe_ttm_vram_mgr_block_size(block) -
+				(size - cur_size);
+		}
+
+		mutex_lock(&mgr->lock);
+		drm_buddy_block_trim(mm,
+				     original_size,
+				     trim_list);
+		mutex_unlock(&mgr->lock);
+
+		if (!list_empty(&temp))
+			list_splice_tail(trim_list, &vres->blocks);
+	}
+
+	vres->base.start = 0;
+	list_for_each_entry(block, &vres->blocks, link) {
+		unsigned long start;
+
+		start = xe_ttm_vram_mgr_block_start(block) +
+			xe_ttm_vram_mgr_block_size(block);
+		start >>= PAGE_SHIFT;
+
+		if (start > PFN_UP(vres->base.size))
+			start -= PFN_UP(vres->base.size);
+		else
+			start = 0;
+		vres->base.start = max(vres->base.start, start);
+	}
+
+	if (xe_is_vram_mgr_blocks_contiguous(&vres->blocks))
+		vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
+
+	*res = &vres->base;
+	return 0;
+
+error_free_blocks:
+	drm_buddy_free_list(mm, &vres->blocks);
+	mutex_unlock(&mgr->lock);
+error_fini:
+	ttm_resource_fini(man, &vres->base);
+	kfree(vres);
+
+	return r;
+}
+
+static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
+				struct ttm_resource *res)
+{
+	struct xe_ttm_vram_mgr_resource *vres =
+		to_xe_ttm_vram_mgr_resource(res);
+	struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man);
+	struct drm_buddy *mm = &mgr->mm;
+
+	mutex_lock(&mgr->lock);
+	drm_buddy_free_list(mm, &vres->blocks);
+	mutex_unlock(&mgr->lock);
+
+	ttm_resource_fini(man, res);
+
+	kfree(vres);
+}
+
+static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man,
+				  struct drm_printer *printer)
+{
+	struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man);
+	struct drm_buddy *mm = &mgr->mm;
+
+	mutex_lock(&mgr->lock);
+	drm_buddy_print(mm, printer);
+	mutex_unlock(&mgr->lock);
+	drm_printf(printer, "man size:%llu\n", man->size);
+}
+
+static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = {
+	.alloc	= xe_ttm_vram_mgr_new,
+	.free	= xe_ttm_vram_mgr_del,
+	.debug	= xe_ttm_vram_mgr_debug
+};
+
+static void ttm_vram_mgr_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_ttm_vram_mgr *mgr = arg;
+	struct xe_device *xe = gt_to_xe(mgr->gt);
+	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
+
+	ttm_resource_manager_set_used(man, false);
+
+	err = ttm_resource_manager_evict_all(&xe->ttm, man);
+	if (err)
+		return;
+
+	drm_buddy_fini(&mgr->mm);
+
+	ttm_resource_manager_cleanup(man);
+	ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + mgr->gt->info.vram_id,
+			       NULL);
+}
+
+int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	mgr->gt = gt;
+	man->func = &xe_ttm_vram_mgr_func;
+
+	ttm_resource_manager_init(man, &xe->ttm, gt->mem.vram.size);
+	err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+	if (err)
+		return err;
+
+	mutex_init(&mgr->lock);
+	mgr->default_page_size = PAGE_SIZE;
+
+	ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + gt->info.vram_id,
+			       &mgr->manager);
+	ttm_resource_manager_set_used(man, true);
+
+	err = drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
+			      struct ttm_resource *res,
+			      u64 offset, u64 length,
+			      struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table **sgt)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0);
+	struct xe_res_cursor cursor;
+	struct scatterlist *sg;
+	int num_entries = 0;
+	int i, r;
+
+	*sgt = kmalloc(sizeof(**sgt), GFP_KERNEL);
+	if (!*sgt)
+		return -ENOMEM;
+
+	/* Determine the number of DRM_BUDDY blocks to export */
+	xe_res_first(res, offset, length, &cursor);
+	while (cursor.remaining) {
+		num_entries++;
+		xe_res_next(&cursor, cursor.size);
+	}
+
+	r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
+	if (r)
+		goto error_free;
+
+	/* Initialize scatterlist nodes of sg_table */
+	for_each_sgtable_sg((*sgt), sg, i)
+		sg->length = 0;
+
+	/*
+	 * Walk down DRM_BUDDY blocks to populate scatterlist nodes
+	 * @note: Use iterator api to get first the DRM_BUDDY block
+	 * and the number of bytes from it. Access the following
+	 * DRM_BUDDY block(s) if more buffer needs to exported
+	 */
+	xe_res_first(res, offset, length, &cursor);
+	for_each_sgtable_sg((*sgt), sg, i) {
+		phys_addr_t phys = cursor.start + gt->mem.vram.io_start;
+		size_t size = cursor.size;
+		dma_addr_t addr;
+
+		addr = dma_map_resource(dev, phys, size, dir,
+					DMA_ATTR_SKIP_CPU_SYNC);
+		r = dma_mapping_error(dev, addr);
+		if (r)
+			goto error_unmap;
+
+		sg_set_page(sg, NULL, size, 0);
+		sg_dma_address(sg) = addr;
+		sg_dma_len(sg) = size;
+
+		xe_res_next(&cursor, cursor.size);
+	}
+
+	return 0;
+
+error_unmap:
+	for_each_sgtable_sg((*sgt), sg, i) {
+		if (!sg->length)
+			continue;
+
+		dma_unmap_resource(dev, sg->dma_address,
+				   sg->length, dir,
+				   DMA_ATTR_SKIP_CPU_SYNC);
+	}
+	sg_free_table(*sgt);
+
+error_free:
+	kfree(*sgt);
+	return r;
+}
+
+void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
+			      struct sg_table *sgt)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sgtable_sg(sgt, sg, i)
+		dma_unmap_resource(dev, sg->dma_address,
+				   sg->length, dir,
+				   DMA_ATTR_SKIP_CPU_SYNC);
+	sg_free_table(sgt);
+	kfree(sgt);
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
new file mode 100644
index 000000000000..537fccec4318
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_VRAM_MGR_H_
+#define _XE_TTM_VRAM_MGR_H_
+
+#include "xe_ttm_vram_mgr_types.h"
+
+enum dma_data_direction;
+struct xe_device;
+struct xe_gt;
+
+int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr);
+int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
+			      struct ttm_resource *res,
+			      u64 offset, u64 length,
+			      struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table **sgt);
+void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
+			      struct sg_table *sgt);
+
+static inline u64 xe_ttm_vram_mgr_block_start(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_offset(block);
+}
+
+static inline u64 xe_ttm_vram_mgr_block_size(struct drm_buddy_block *block)
+{
+	return PAGE_SIZE << drm_buddy_block_order(block);
+}
+
+static inline struct xe_ttm_vram_mgr_resource *
+to_xe_ttm_vram_mgr_resource(struct ttm_resource *res)
+{
+	return container_of(res, struct xe_ttm_vram_mgr_resource, base);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
new file mode 100644
index 000000000000..39b93c71c21b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_VRAM_MGR_TYPES_H_
+#define _XE_TTM_VRAM_MGR_TYPES_H_
+
+#include <drm/drm_buddy.h>
+#include <drm/ttm/ttm_device.h>
+
+struct xe_gt;
+
+/**
+ * struct xe_ttm_vram_mgr - XE TTM VRAM manager
+ *
+ * Manages placement of TTM resource in VRAM.
+ */
+struct xe_ttm_vram_mgr {
+	/** @gt: Graphics tile which the VRAM belongs to */
+	struct xe_gt *gt;
+	/** @manager: Base TTM resource manager */
+	struct ttm_resource_manager manager;
+	/** @mm: DRM buddy allocator which manages the VRAM */
+	struct drm_buddy mm;
+	/** @default_page_size: default page size */
+	u64 default_page_size;
+	/** @lock: protects allocations of VRAM */
+	struct mutex lock;
+};
+
+/**
+ * struct xe_ttm_vram_mgr_resource - XE TTM VRAM resource
+ */
+struct xe_ttm_vram_mgr_resource {
+	/** @base: Base TTM resource */
+	struct ttm_resource base;
+	/** @blocks: list of DRM buddy blocks */
+	struct list_head blocks;
+	/** @flags: flags associated with the resource */
+	unsigned long flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
new file mode 100644
index 000000000000..e043db037368
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_wa.h"
+
+#include "xe_platform_types.h"
+#include "xe_gt_types.h"
+#include "xe_rtp.h"
+
+#include "gt/intel_gt_regs.h"
+
+#undef _MMIO
+#undef MCR_REG
+#define _MMIO(x)	_XE_RTP_REG(x)
+#define MCR_REG(x)	_XE_RTP_MCR_REG(x)
+
+static const struct xe_rtp_entry gt_tunings[] = {
+	{ XE_RTP_NAME("Tuning: 32B Access Enable"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_SET(XEHP_SQCM, EN_32B_ACCESS)
+	},
+	{}
+};
+
+static const struct xe_rtp_entry context_tunings[] = {
+	{ XE_RTP_NAME("1604555607"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200)),
+	  XE_RTP_FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
+					FF_MODE2_TDS_TIMER_128)
+	},
+	{}
+};
+
+void xe_tuning_process_gt(struct xe_gt *gt)
+{
+	xe_rtp_process(gt_tunings, &gt->reg_sr, gt, NULL);
+}
diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h
new file mode 100644
index 000000000000..66dbc93192bd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tuning.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TUNING_
+#define _XE_TUNING_
+
+struct xe_gt;
+
+void xe_tuning_process_gt(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
new file mode 100644
index 000000000000..938d14698003
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_device.h"
+#include "xe_huc.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_pc.h"
+#include "xe_guc_submit.h"
+#include "xe_uc.h"
+#include "xe_uc_fw.h"
+#include "xe_wopcm.h"
+
+static struct xe_gt *
+uc_to_gt(struct xe_uc *uc)
+{
+	return container_of(uc, struct xe_gt, uc);
+}
+
+static struct xe_device *
+uc_to_xe(struct xe_uc *uc)
+{
+	return gt_to_xe(uc_to_gt(uc));
+}
+
+/* Should be called once at driver load only */
+int xe_uc_init(struct xe_uc *uc)
+{
+	int ret;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	ret = xe_guc_init(&uc->guc);
+	if (ret)
+		goto err;
+
+	ret = xe_huc_init(&uc->huc);
+	if (ret)
+		goto err;
+
+	ret = xe_wopcm_init(&uc->wopcm);
+	if (ret)
+		goto err;
+
+	ret = xe_guc_submit_init(&uc->guc);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	/* If any uC firmwares not found, fall back to execlists */
+	xe_device_guc_submission_disable(uc_to_xe(uc));
+
+	return ret;
+}
+
+/**
+ * xe_uc_init_post_hwconfig - init Uc post hwconfig load
+ * @uc: The UC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_uc_init_post_hwconfig(struct xe_uc *uc)
+{
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	return xe_guc_init_post_hwconfig(&uc->guc);
+}
+
+static int uc_reset(struct xe_uc *uc)
+{
+	struct xe_device *xe = uc_to_xe(uc);
+	int ret;
+
+	ret = xe_guc_reset(&uc->guc);
+	if (ret) {
+		drm_err(&xe->drm, "Failed to reset GuC, ret = %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int uc_sanitize(struct xe_uc *uc)
+{
+	xe_huc_sanitize(&uc->huc);
+	xe_guc_sanitize(&uc->guc);
+
+	return uc_reset(uc);
+}
+
+/**
+ * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig
+ * @uc: The UC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_uc_init_hwconfig(struct xe_uc *uc)
+{
+	int ret;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	ret = xe_guc_min_load_for_hwconfig(&uc->guc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * Should be called during driver load, after every GT reset, and after every
+ * suspend to reload / auth the firmwares.
+ */
+int xe_uc_init_hw(struct xe_uc *uc)
+{
+	int ret;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	ret = uc_sanitize(uc);
+	if (ret)
+		return ret;
+
+	ret = xe_huc_upload(&uc->huc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_upload(&uc->guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_enable_communication(&uc->guc);
+	if (ret)
+		return ret;
+
+	ret = xe_gt_record_default_lrcs(uc_to_gt(uc));
+	if (ret)
+		return ret;
+
+	ret = xe_guc_post_load_init(&uc->guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_pc_start(&uc->guc.pc);
+	if (ret)
+		return ret;
+
+	/* We don't fail the driver load if HuC fails to auth, but let's warn */
+	ret = xe_huc_auth(&uc->huc);
+	XE_WARN_ON(ret);
+
+	return 0;
+}
+
+int xe_uc_reset_prepare(struct xe_uc *uc)
+{
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	return xe_guc_reset_prepare(&uc->guc);
+}
+
+void xe_uc_stop_prepare(struct xe_uc *uc)
+{
+	xe_guc_stop_prepare(&uc->guc);
+}
+
+int xe_uc_stop(struct xe_uc *uc)
+{
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	return xe_guc_stop(&uc->guc);
+}
+
+int xe_uc_start(struct xe_uc *uc)
+{
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	return xe_guc_start(&uc->guc);
+}
+
+static void uc_reset_wait(struct xe_uc *uc)
+{
+       int ret;
+
+again:
+       xe_guc_reset_wait(&uc->guc);
+
+       ret = xe_uc_reset_prepare(uc);
+       if (ret)
+               goto again;
+}
+
+int xe_uc_suspend(struct xe_uc *uc)
+{
+	int ret;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	uc_reset_wait(uc);
+
+	ret = xe_uc_stop(uc);
+	if (ret)
+		return ret;
+
+	return xe_guc_suspend(&uc->guc);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
new file mode 100644
index 000000000000..380e722f95fc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_H_
+#define _XE_UC_H_
+
+#include "xe_uc_types.h"
+
+int xe_uc_init(struct xe_uc *uc);
+int xe_uc_init_hwconfig(struct xe_uc *uc);
+int xe_uc_init_post_hwconfig(struct xe_uc *uc);
+int xe_uc_init_hw(struct xe_uc *uc);
+int xe_uc_reset_prepare(struct xe_uc *uc);
+void xe_uc_stop_prepare(struct xe_uc *uc);
+int xe_uc_stop(struct xe_uc *uc);
+int xe_uc_start(struct xe_uc *uc);
+int xe_uc_suspend(struct xe_uc *uc);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.c b/drivers/gpu/drm/xe/xe_uc_debugfs.c
new file mode 100644
index 000000000000..0a39ec5a6e99
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_debugfs.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_debugfs.h>
+
+#include "xe_gt.h"
+#include "xe_guc_debugfs.h"
+#include "xe_huc_debugfs.h"
+#include "xe_macros.h"
+#include "xe_uc_debugfs.h"
+
+void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent)
+{
+	struct dentry *root;
+
+	root = debugfs_create_dir("uc", parent);
+	if (IS_ERR(root)) {
+		XE_WARN_ON("Create UC directory failed");
+		return;
+	}
+
+	xe_guc_debugfs_register(&uc->guc, root);
+	xe_huc_debugfs_register(&uc->huc, root);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.h b/drivers/gpu/drm/xe/xe_uc_debugfs.h
new file mode 100644
index 000000000000..a13382df2bd7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_DEBUGFS_H_
+#define _XE_UC_DEBUGFS_H_
+
+struct dentry;
+struct xe_uc;
+
+void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
new file mode 100644
index 000000000000..86c47b7f0901
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/firmware.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_guc_reg.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_uc_fw.h"
+
+static struct xe_gt *
+__uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type)
+{
+	if (type == XE_UC_FW_TYPE_GUC)
+		return container_of(uc_fw, struct xe_gt, uc.guc.fw);
+
+	XE_BUG_ON(type != XE_UC_FW_TYPE_HUC);
+	return container_of(uc_fw, struct xe_gt, uc.huc.fw);
+}
+
+static struct xe_gt *uc_fw_to_gt(struct xe_uc_fw *uc_fw)
+{
+	return __uc_fw_to_gt(uc_fw, uc_fw->type);
+}
+
+static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw)
+{
+	return gt_to_xe(uc_fw_to_gt(uc_fw));
+}
+
+/*
+ * List of required GuC and HuC binaries per-platform.
+ * Must be ordered based on platform + revid, from newer to older.
+ */
+#define XE_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
+	fw_def(METEORLAKE,   0, guc_def(mtl,  70, 5, 2)) \
+	fw_def(ALDERLAKE_P,  0, guc_def(adlp,  70, 5, 2)) \
+	fw_def(ALDERLAKE_S,  0, guc_def(tgl,  70, 5, 2)) \
+	fw_def(PVC,          0, guc_def(pvc,  70, 5, 2)) \
+	fw_def(DG2,          0, guc_def(dg2,  70, 5, 2)) \
+	fw_def(DG1,          0, guc_def(dg1,  70, 5, 2)) \
+	fw_def(TIGERLAKE,    0, guc_def(tgl,  70, 5, 2))
+
+#define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
+	fw_def(DG1,          0, huc_def(dg1,  7, 9, 3)) \
+	fw_def(TIGERLAKE,    0, huc_def(tgl,  7, 9, 3))
+
+#define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \
+	"xe/" \
+	__stringify(prefix_) "_" name_ "_" \
+	__stringify(major_) ".bin"
+
+#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
+        "xe/" \
+       __stringify(prefix_) name_ \
+       __stringify(major_) "." \
+       __stringify(minor_) "." \
+       __stringify(patch_) ".bin"
+
+#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \
+	__MAKE_UC_FW_PATH_MAJOR(prefix_, "guc", major_)
+
+#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \
+	__MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_)
+
+/* All blobs need to be declared via MODULE_FIRMWARE() */
+#define XE_UC_MODULE_FW(platform_, revid_, uc_) \
+	MODULE_FIRMWARE(uc_);
+
+XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_GUC_FW_PATH)
+XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_HUC_FW_PATH)
+
+/* The below structs and macros are used to iterate across the list of blobs */
+struct __packed uc_fw_blob {
+	u8 major;
+	u8 minor;
+	const char *path;
+};
+
+#define UC_FW_BLOB(major_, minor_, path_) \
+	{ .major = major_, .minor = minor_, .path = path_ }
+
+#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \
+	UC_FW_BLOB(major_, minor_, \
+		   MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_))
+
+#define HUC_FW_BLOB(prefix_, major_, minor_, bld_num_) \
+	UC_FW_BLOB(major_, minor_, \
+		   MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_))
+
+struct __packed uc_fw_platform_requirement {
+	enum xe_platform p;
+	u8 rev; /* first platform rev using this FW */
+	const struct uc_fw_blob blob;
+};
+
+#define MAKE_FW_LIST(platform_, revid_, uc_) \
+{ \
+	.p = XE_##platform_, \
+	.rev = revid_, \
+	.blob = uc_, \
+},
+
+struct fw_blobs_by_type {
+	const struct uc_fw_platform_requirement *blobs;
+	u32 count;
+};
+
+static void
+uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
+{
+	static const struct uc_fw_platform_requirement blobs_guc[] = {
+		XE_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
+	};
+	static const struct uc_fw_platform_requirement blobs_huc[] = {
+		XE_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB)
+	};
+	static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = {
+		[XE_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
+		[XE_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
+	};
+	static const struct uc_fw_platform_requirement *fw_blobs;
+	enum xe_platform p = xe->info.platform;
+	u32 fw_count;
+	u8 rev = xe->info.revid;
+	int i;
+
+	XE_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
+	fw_blobs = blobs_all[uc_fw->type].blobs;
+	fw_count = blobs_all[uc_fw->type].count;
+
+	for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) {
+		if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
+			const struct uc_fw_blob *blob = &fw_blobs[i].blob;
+
+			uc_fw->path = blob->path;
+			uc_fw->major_ver_wanted = blob->major;
+			uc_fw->minor_ver_wanted = blob->minor;
+			break;
+		}
+	}
+}
+
+/**
+ * xe_uc_fw_copy_rsa - copy fw RSA to buffer
+ *
+ * @uc_fw: uC firmware
+ * @dst: dst buffer
+ * @max_len: max number of bytes to copy
+ *
+ * Return: number of copied bytes.
+ */
+size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	u32 size = min_t(u32, uc_fw->rsa_size, max_len);
+
+	XE_BUG_ON(size % 4);
+	XE_BUG_ON(!xe_uc_fw_is_available(uc_fw));
+
+	xe_map_memcpy_from(xe, dst, &uc_fw->bo->vmap,
+			   xe_uc_fw_rsa_offset(uc_fw), size);
+
+	return size;
+}
+
+static void uc_fw_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_uc_fw *uc_fw = arg;
+
+	if (!xe_uc_fw_is_available(uc_fw))
+		return;
+
+	xe_bo_unpin_map_no_vm(uc_fw->bo);
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED);
+}
+
+int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	struct device *dev = xe->drm.dev;
+	const struct firmware *fw = NULL;
+	struct uc_css_header *css;
+	struct xe_bo *obj;
+	size_t size;
+	int err;
+
+	/*
+	 * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status
+	 * before we're looked at the HW caps to see if we have uc support
+	 */
+	BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED);
+	XE_BUG_ON(uc_fw->status);
+	XE_BUG_ON(uc_fw->path);
+
+	uc_fw_auto_select(xe, uc_fw);
+	xe_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ?
+			       XE_UC_FIRMWARE_SELECTED :
+			       XE_UC_FIRMWARE_DISABLED :
+			       XE_UC_FIRMWARE_NOT_SUPPORTED);
+
+	/* Transform no huc in the list into firmware disabled */
+	if (uc_fw->type == XE_UC_FW_TYPE_HUC && !xe_uc_fw_is_supported(uc_fw)) {
+		xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED);
+		err = -ENOPKG;
+		return err;
+	}
+	err = request_firmware(&fw, uc_fw->path, dev);
+	if (err)
+		goto fail;
+
+	/* Check the size of the blob before examining buffer contents */
+	if (unlikely(fw->size < sizeof(struct uc_css_header))) {
+		drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n",
+			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			 fw->size, sizeof(struct uc_css_header));
+		err = -ENODATA;
+		goto fail;
+	}
+
+	css = (struct uc_css_header *)fw->data;
+
+	/* Check integrity of size values inside CSS header */
+	size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
+		css->exponent_size_dw) * sizeof(u32);
+	if (unlikely(size != sizeof(struct uc_css_header))) {
+		drm_warn(&xe->drm,
+			 "%s firmware %s: unexpected header size: %zu != %zu\n",
+			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			 fw->size, sizeof(struct uc_css_header));
+		err = -EPROTO;
+		goto fail;
+	}
+
+	/* uCode size must calculated from other sizes */
+	uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
+
+	/* now RSA */
+	uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
+
+	/* At least, it should have header, uCode and RSA. Size of all three. */
+	size = sizeof(struct uc_css_header) + uc_fw->ucode_size +
+		uc_fw->rsa_size;
+	if (unlikely(fw->size < size)) {
+		drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n",
+			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			 fw->size, size);
+		err = -ENOEXEC;
+		goto fail;
+	}
+
+	/* Get version numbers from the CSS header */
+	uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
+					   css->sw_version);
+	uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
+					   css->sw_version);
+
+	if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
+	    uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
+		drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			   uc_fw->major_ver_found, uc_fw->minor_ver_found,
+			   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
+		if (!xe_uc_fw_is_overridden(uc_fw)) {
+			err = -ENOEXEC;
+			goto fail;
+		}
+	}
+
+	if (uc_fw->type == XE_UC_FW_TYPE_GUC)
+		uc_fw->private_data_size = css->private_data_size;
+
+	obj = xe_bo_create_from_data(xe, gt, fw->data, fw->size,
+				     ttm_bo_type_kernel,
+				     XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				     XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(obj)) {
+		drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo",
+			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+		err = PTR_ERR(obj);
+		goto fail;
+	}
+
+	uc_fw->bo = obj;
+	uc_fw->size = fw->size;
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_AVAILABLE);
+
+	release_firmware(fw);
+
+	err = drmm_add_action_or_reset(&xe->drm, uc_fw_fini, uc_fw);
+	if (err)
+		return err;
+
+	return 0;
+
+fail:
+	xe_uc_fw_change_status(uc_fw, err == -ENOENT ?
+			       XE_UC_FIRMWARE_MISSING :
+			       XE_UC_FIRMWARE_ERROR);
+
+	drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n",
+		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
+	drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n",
+		 xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL);
+
+	release_firmware(fw);		/* OK even if fw is NULL */
+	return err;
+}
+
+static u32 uc_fw_ggtt_offset(struct xe_uc_fw *uc_fw)
+{
+	return xe_bo_ggtt_addr(uc_fw->bo);
+}
+
+static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	u32 src_offset;
+	int ret;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	/* Set the source address for the uCode */
+	src_offset = uc_fw_ggtt_offset(uc_fw);
+	xe_mmio_write32(gt, DMA_ADDR_0_LOW.reg, lower_32_bits(src_offset));
+	xe_mmio_write32(gt, DMA_ADDR_0_HIGH.reg, upper_32_bits(src_offset));
+
+	/* Set the DMA destination */
+	xe_mmio_write32(gt, DMA_ADDR_1_LOW.reg, offset);
+	xe_mmio_write32(gt, DMA_ADDR_1_HIGH.reg, DMA_ADDRESS_SPACE_WOPCM);
+
+	/*
+	 * Set the transfer size. The header plus uCode will be copied to WOPCM
+	 * via DMA, excluding any other components
+	 */
+	xe_mmio_write32(gt, DMA_COPY_SIZE.reg,
+			sizeof(struct uc_css_header) + uc_fw->ucode_size);
+
+	/* Start the DMA */
+	xe_mmio_write32(gt, DMA_CTRL.reg,
+			_MASKED_BIT_ENABLE(dma_flags | START_DMA));
+
+	/* Wait for DMA to finish */
+	ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100);
+	if (ret)
+		drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
+			xe_uc_fw_type_repr(uc_fw->type),
+			xe_mmio_read32(gt, DMA_CTRL.reg));
+
+	/* Disable the bits once DMA is over */
+	xe_mmio_write32(gt, DMA_CTRL.reg, _MASKED_BIT_DISABLE(dma_flags));
+
+	return ret;
+}
+
+int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	int err;
+
+	/* make sure the status was cleared the last time we reset the uc */
+	XE_BUG_ON(xe_uc_fw_is_loaded(uc_fw));
+
+	if (!xe_uc_fw_is_loadable(uc_fw))
+		return -ENOEXEC;
+
+	/* Call custom loader */
+	err = uc_fw_xfer(uc_fw, offset, dma_flags);
+	if (err)
+		goto fail;
+
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_TRANSFERRED);
+	return 0;
+
+fail:
+	drm_err(&xe->drm, "Failed to load %s firmware %s (%d)\n",
+		xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+		err);
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOAD_FAIL);
+	return err;
+}
+
+
+void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p)
+{
+	drm_printf(p, "%s firmware: %s\n",
+		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+	drm_printf(p, "\tstatus: %s\n",
+		   xe_uc_fw_status_repr(uc_fw->status));
+	drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
+		   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
+		   uc_fw->major_ver_found, uc_fw->minor_ver_found);
+	drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
+	drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
new file mode 100644
index 000000000000..b0df5064b27d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_H_
+#define _XE_UC_FW_H_
+
+#include <linux/errno.h>
+
+#include "xe_uc_fw_types.h"
+#include "xe_uc_fw_abi.h"
+#include "xe_macros.h"
+
+struct drm_printer;
+
+int xe_uc_fw_init(struct xe_uc_fw *uc_fw);
+size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len);
+int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags);
+void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p);
+
+static inline u32 xe_uc_fw_rsa_offset(struct xe_uc_fw *uc_fw)
+{
+	return sizeof(struct uc_css_header) + uc_fw->ucode_size;
+}
+
+static inline void xe_uc_fw_change_status(struct xe_uc_fw *uc_fw,
+					  enum xe_uc_fw_status status)
+{
+	uc_fw->__status = status;
+}
+
+static inline
+const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status)
+{
+	switch (status) {
+	case XE_UC_FIRMWARE_NOT_SUPPORTED:
+		return "N/A";
+	case XE_UC_FIRMWARE_UNINITIALIZED:
+		return "UNINITIALIZED";
+	case XE_UC_FIRMWARE_DISABLED:
+		return "DISABLED";
+	case XE_UC_FIRMWARE_SELECTED:
+		return "SELECTED";
+	case XE_UC_FIRMWARE_MISSING:
+		return "MISSING";
+	case XE_UC_FIRMWARE_ERROR:
+		return "ERROR";
+	case XE_UC_FIRMWARE_AVAILABLE:
+		return "AVAILABLE";
+	case XE_UC_FIRMWARE_INIT_FAIL:
+		return "INIT FAIL";
+	case XE_UC_FIRMWARE_LOADABLE:
+		return "LOADABLE";
+	case XE_UC_FIRMWARE_LOAD_FAIL:
+		return "LOAD FAIL";
+	case XE_UC_FIRMWARE_TRANSFERRED:
+		return "TRANSFERRED";
+	case XE_UC_FIRMWARE_RUNNING:
+		return "RUNNING";
+	}
+	return "<invalid>";
+}
+
+static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status)
+{
+	switch (status) {
+	case XE_UC_FIRMWARE_NOT_SUPPORTED:
+		return -ENODEV;
+	case XE_UC_FIRMWARE_UNINITIALIZED:
+		return -EACCES;
+	case XE_UC_FIRMWARE_DISABLED:
+		return -EPERM;
+	case XE_UC_FIRMWARE_MISSING:
+		return -ENOENT;
+	case XE_UC_FIRMWARE_ERROR:
+		return -ENOEXEC;
+	case XE_UC_FIRMWARE_INIT_FAIL:
+	case XE_UC_FIRMWARE_LOAD_FAIL:
+		return -EIO;
+	case XE_UC_FIRMWARE_SELECTED:
+		return -ESTALE;
+	case XE_UC_FIRMWARE_AVAILABLE:
+	case XE_UC_FIRMWARE_LOADABLE:
+	case XE_UC_FIRMWARE_TRANSFERRED:
+	case XE_UC_FIRMWARE_RUNNING:
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type)
+{
+	switch (type) {
+	case XE_UC_FW_TYPE_GUC:
+		return "GuC";
+	case XE_UC_FW_TYPE_HUC:
+		return "HuC";
+	}
+	return "uC";
+}
+
+static inline enum xe_uc_fw_status
+__xe_uc_fw_status(struct xe_uc_fw *uc_fw)
+{
+	/* shouldn't call this before checking hw/blob availability */
+	XE_BUG_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED);
+	return uc_fw->status;
+}
+
+static inline bool xe_uc_fw_is_supported(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) != XE_UC_FIRMWARE_NOT_SUPPORTED;
+}
+
+static inline bool xe_uc_fw_is_enabled(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) > XE_UC_FIRMWARE_DISABLED;
+}
+
+static inline bool xe_uc_fw_is_disabled(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_DISABLED;
+}
+
+static inline bool xe_uc_fw_is_available(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_AVAILABLE;
+}
+
+static inline bool xe_uc_fw_is_loadable(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE;
+}
+
+static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_TRANSFERRED;
+}
+
+static inline bool xe_uc_fw_is_running(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_RUNNING;
+}
+
+static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw)
+{
+	return uc_fw->user_overridden;
+}
+
+static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw)
+{
+	if (xe_uc_fw_is_loaded(uc_fw))
+		xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOADABLE);
+}
+
+static inline u32 __xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw)
+{
+	return sizeof(struct uc_css_header) + uc_fw->ucode_size;
+}
+
+/**
+ * xe_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded.
+ * @uc_fw: uC firmware.
+ *
+ * Get the size of the firmware and header that will be uploaded to WOPCM.
+ *
+ * Return: Upload firmware size, or zero on firmware fetch failure.
+ */
+static inline u32 xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw)
+{
+	if (!xe_uc_fw_is_available(uc_fw))
+		return 0;
+
+	return __xe_uc_fw_get_upload_size(uc_fw);
+}
+
+#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/xe"
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
new file mode 100644
index 000000000000..dafd26cb0c41
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_ABI_H
+#define _XE_UC_FW_ABI_H
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+/**
+ * DOC: Firmware Layout
+ *
+ * The GuC/HuC firmware layout looks like this::
+ *
+ *      +======================================================================+
+ *      |  Firmware blob                                                       |
+ *      +===============+===============+============+============+============+
+ *      |  CSS header   |     uCode     |  RSA key   |  modulus   |  exponent  |
+ *      +===============+===============+============+============+============+
+ *       <-header size->                 <---header size continued ----------->
+ *       <--- size ----------------------------------------------------------->
+ *                                       <-key size->
+ *                                                    <-mod size->
+ *                                                                 <-exp size->
+ *
+ * The firmware may or may not have modulus key and exponent data. The header,
+ * uCode and RSA signature are must-have components that will be used by driver.
+ * Length of each components, which is all in dwords, can be found in header.
+ * In the case that modulus and exponent are not present in fw, a.k.a truncated
+ * image, the length value still appears in header.
+ *
+ * Driver will do some basic fw size validation based on the following rules:
+ *
+ * 1. Header, uCode and RSA are must-have components.
+ * 2. All firmware components, if they present, are in the sequence illustrated
+ *    in the layout table above.
+ * 3. Length info of each component can be found in header, in dwords.
+ * 4. Modulus and exponent key are not required by driver. They may not appear
+ *    in fw. So driver will load a truncated firmware in this case.
+ */
+
+struct uc_css_header {
+	u32 module_type;
+	/*
+	 * header_size includes all non-uCode bits, including css_header, rsa
+	 * key, modulus key and exponent data.
+	 */
+	u32 header_size_dw;
+	u32 header_version;
+	u32 module_id;
+	u32 module_vendor;
+	u32 date;
+#define CSS_DATE_DAY			(0xFF << 0)
+#define CSS_DATE_MONTH			(0xFF << 8)
+#define CSS_DATE_YEAR			(0xFFFF << 16)
+	u32 size_dw; /* uCode plus header_size_dw */
+	u32 key_size_dw;
+	u32 modulus_size_dw;
+	u32 exponent_size_dw;
+	u32 time;
+#define CSS_TIME_HOUR			(0xFF << 0)
+#define CSS_DATE_MIN			(0xFF << 8)
+#define CSS_DATE_SEC			(0xFFFF << 16)
+	char username[8];
+	char buildnumber[12];
+	u32 sw_version;
+#define CSS_SW_VERSION_UC_MAJOR		(0xFF << 16)
+#define CSS_SW_VERSION_UC_MINOR		(0xFF << 8)
+#define CSS_SW_VERSION_UC_PATCH		(0xFF << 0)
+	u32 reserved0[13];
+	union {
+		u32 private_data_size; /* only applies to GuC */
+		u32 reserved1;
+	};
+	u32 header_info;
+} __packed;
+static_assert(sizeof(struct uc_css_header) == 128);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
new file mode 100644
index 000000000000..1cfd30a655df
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_TYPES_H_
+#define _XE_UC_FW_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/*
+ * +------------+---------------------------------------------------+
+ * |   PHASE    |           FIRMWARE STATUS TRANSITIONS             |
+ * +============+===================================================+
+ * |            |               UNINITIALIZED                       |
+ * +------------+-               /   |   \                         -+
+ * |            |   DISABLED <--/    |    \--> NOT_SUPPORTED        |
+ * | init_early |                    V                              |
+ * |            |                 SELECTED                          |
+ * +------------+-               /   |   \                         -+
+ * |            |    MISSING <--/    |    \--> ERROR                |
+ * |   fetch    |                    V                              |
+ * |            |                 AVAILABLE                         |
+ * +------------+-                   |   \                         -+
+ * |            |                    |    \--> INIT FAIL            |
+ * |   init     |                    V                              |
+ * |            |        /------> LOADABLE <----<-----------\       |
+ * +------------+-       \         /    \        \           \     -+
+ * |            |    LOAD FAIL <--<      \--> TRANSFERRED     \     |
+ * |   upload   |                  \           /   \          /     |
+ * |            |                   \---------/     \--> RUNNING    |
+ * +------------+---------------------------------------------------+
+ */
+
+/*
+ * FIXME: Ported from the i915 and this is state machine is way too complicated.
+ * Circle back and simplify this.
+ */
+enum xe_uc_fw_status {
+	XE_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */
+	XE_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too early */
+	XE_UC_FIRMWARE_DISABLED, /* disabled */
+	XE_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */
+	XE_UC_FIRMWARE_MISSING, /* blob not found on the system */
+	XE_UC_FIRMWARE_ERROR, /* invalid format or version */
+	XE_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
+	XE_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */
+	XE_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */
+	XE_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */
+	XE_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
+	XE_UC_FIRMWARE_RUNNING /* init/auth done */
+};
+
+enum xe_uc_fw_type {
+	XE_UC_FW_TYPE_GUC = 0,
+	XE_UC_FW_TYPE_HUC
+};
+#define XE_UC_FW_NUM_TYPES 2
+
+/**
+ * struct xe_uc_fw - XE micro controller firmware
+ */
+struct xe_uc_fw {
+	/** @type: type uC firmware */
+	enum xe_uc_fw_type type;
+	union {
+		/** @status: firmware load status */
+		const enum xe_uc_fw_status status;
+		/**
+		 * @__status: private firmware load status - only to be used
+		 * by firmware laoding code
+		 */
+		enum xe_uc_fw_status __status;
+	};
+	/** @path: path to uC firmware */
+	const char *path;
+	/** @user_overridden: user provided path to uC firmware via modparam */
+	bool user_overridden;
+	/** @size: size of uC firmware including css header */
+	size_t size;
+
+	/** @bo: XE BO for uC firmware */
+	struct xe_bo *bo;
+
+	/*
+	 * The firmware build process will generate a version header file with
+	 * major and minor version defined. The versions are built into CSS
+	 * header of firmware. The xe kernel driver set the minimal firmware
+	 * version required per platform.
+	 */
+
+	/** @major_ver_wanted: major firmware version wanted by platform */
+	u16 major_ver_wanted;
+	/** @minor_ver_wanted: minor firmware version wanted by platform */
+	u16 minor_ver_wanted;
+	/** @major_ver_found: major version found in firmware blob */
+	u16 major_ver_found;
+	/** @minor_ver_found: major version found in firmware blob */
+	u16 minor_ver_found;
+
+	/** @rsa_size: RSA size */
+	u32 rsa_size;
+	/** @ucode_size: micro kernel size */
+	u32 ucode_size;
+
+	/** @private_data_size: size of private data found in uC css header */
+	u32 private_data_size;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_types.h b/drivers/gpu/drm/xe/xe_uc_types.h
new file mode 100644
index 000000000000..49bef6498b85
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_TYPES_H_
+#define _XE_UC_TYPES_H_
+
+#include "xe_guc_types.h"
+#include "xe_huc_types.h"
+#include "xe_wopcm_types.h"
+
+/**
+ * struct xe_uc - XE micro controllers
+ */
+struct xe_uc {
+	/** @guc: Graphics micro controller */
+	struct xe_guc guc;
+	/** @huc: HuC */
+	struct xe_huc huc;
+	/** @wopcm: WOPCM */
+	struct xe_wopcm wopcm;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
new file mode 100644
index 000000000000..d47a8617c5b6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -0,0 +1,3407 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_vm.h"
+
+#include <linux/dma-fence-array.h>
+
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_gt.h"
+#include "xe_gt_pagefault.h"
+#include "xe_migrate.h"
+#include "xe_pm.h"
+#include "xe_preempt_fence.h"
+#include "xe_pt.h"
+#include "xe_res_cursor.h"
+#include "xe_trace.h"
+#include "xe_sync.h"
+
+#define TEST_VM_ASYNC_OPS_ERROR
+
+/**
+ * xe_vma_userptr_check_repin() - Advisory check for repin needed
+ * @vma: The userptr vma
+ *
+ * Check if the userptr vma has been invalidated since last successful
+ * repin. The check is advisory only and can the function can be called
+ * without the vm->userptr.notifier_lock held. There is no guarantee that the
+ * vma userptr will remain valid after a lockless check, so typically
+ * the call needs to be followed by a proper check under the notifier_lock.
+ *
+ * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
+ */
+int xe_vma_userptr_check_repin(struct xe_vma *vma)
+{
+	return mmu_interval_check_retry(&vma->userptr.notifier,
+					vma->userptr.notifier_seq) ?
+		-EAGAIN : 0;
+}
+
+int xe_vma_userptr_pin_pages(struct xe_vma *vma)
+{
+	struct xe_vm *vm = vma->vm;
+	struct xe_device *xe = vm->xe;
+	const unsigned long num_pages =
+		(vma->end - vma->start + 1) >> PAGE_SHIFT;
+	struct page **pages;
+	bool in_kthread = !current->mm;
+	unsigned long notifier_seq;
+	int pinned, ret, i;
+	bool read_only = vma->pte_flags & PTE_READ_ONLY;
+
+	lockdep_assert_held(&vm->lock);
+	XE_BUG_ON(!xe_vma_is_userptr(vma));
+retry:
+	if (vma->destroyed)
+		return 0;
+
+	notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
+	if (notifier_seq == vma->userptr.notifier_seq)
+		return 0;
+
+	pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	if (vma->userptr.sg) {
+		dma_unmap_sgtable(xe->drm.dev,
+				  vma->userptr.sg,
+				  read_only ? DMA_TO_DEVICE :
+				  DMA_BIDIRECTIONAL, 0);
+		sg_free_table(vma->userptr.sg);
+		vma->userptr.sg = NULL;
+	}
+
+	pinned = ret = 0;
+	if (in_kthread) {
+		if (!mmget_not_zero(vma->userptr.notifier.mm)) {
+			ret = -EFAULT;
+			goto mm_closed;
+		}
+		kthread_use_mm(vma->userptr.notifier.mm);
+	}
+
+	while (pinned < num_pages) {
+		ret = get_user_pages_fast(vma->userptr.ptr + pinned * PAGE_SIZE,
+					  num_pages - pinned,
+					  read_only ? 0 : FOLL_WRITE,
+					  &pages[pinned]);
+		if (ret < 0) {
+			if (in_kthread)
+				ret = 0;
+			break;
+		}
+
+		pinned += ret;
+		ret = 0;
+	}
+
+	if (in_kthread) {
+		kthread_unuse_mm(vma->userptr.notifier.mm);
+		mmput(vma->userptr.notifier.mm);
+	}
+mm_closed:
+	if (ret)
+		goto out;
+
+	ret = sg_alloc_table_from_pages(&vma->userptr.sgt, pages, pinned,
+					0, (u64)pinned << PAGE_SHIFT,
+					GFP_KERNEL);
+	if (ret) {
+		vma->userptr.sg = NULL;
+		goto out;
+	}
+	vma->userptr.sg = &vma->userptr.sgt;
+
+	ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg,
+			      read_only ? DMA_TO_DEVICE :
+			      DMA_BIDIRECTIONAL,
+			      DMA_ATTR_SKIP_CPU_SYNC |
+			      DMA_ATTR_NO_KERNEL_MAPPING);
+	if (ret) {
+		sg_free_table(vma->userptr.sg);
+		vma->userptr.sg = NULL;
+		goto out;
+	}
+
+	for (i = 0; i < pinned; ++i) {
+		if (!read_only) {
+			lock_page(pages[i]);
+			set_page_dirty(pages[i]);
+			unlock_page(pages[i]);
+		}
+
+		mark_page_accessed(pages[i]);
+	}
+
+out:
+	release_pages(pages, pinned);
+	kvfree(pages);
+
+	if (!(ret < 0)) {
+		vma->userptr.notifier_seq = notifier_seq;
+		if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
+			goto retry;
+	}
+
+	return ret < 0 ? ret : 0;
+}
+
+static bool preempt_fences_waiting(struct xe_vm *vm)
+{
+	struct xe_engine *e;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
+		if (!e->compute.pfence || (e->compute.pfence &&
+		    test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			     &e->compute.pfence->flags))) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void free_preempt_fences(struct list_head *list)
+{
+	struct list_head *link, *next;
+
+	list_for_each_safe(link, next, list)
+		xe_preempt_fence_free(to_preempt_fence_from_link(link));
+}
+
+static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
+				unsigned int *count)
+{
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	if (*count >= vm->preempt.num_engines)
+		return 0;
+
+	for (; *count < vm->preempt.num_engines; ++(*count)) {
+		struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
+
+		if (IS_ERR(pfence))
+			return PTR_ERR(pfence);
+
+		list_move_tail(xe_preempt_fence_link(pfence), list);
+	}
+
+	return 0;
+}
+
+static int wait_for_existing_preempt_fences(struct xe_vm *vm)
+{
+	struct xe_engine *e;
+
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
+		if (e->compute.pfence) {
+			long timeout = dma_fence_wait(e->compute.pfence, false);
+
+			if (timeout < 0)
+				return -ETIME;
+			dma_fence_put(e->compute.pfence);
+			e->compute.pfence = NULL;
+		}
+	}
+
+	return 0;
+}
+
+static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
+{
+	struct list_head *link;
+	struct xe_engine *e;
+
+	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
+		struct dma_fence *fence;
+
+		link = list->next;
+		XE_BUG_ON(link == list);
+
+		fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
+					     e, e->compute.context,
+					     ++e->compute.seqno);
+		dma_fence_put(e->compute.pfence);
+		e->compute.pfence = fence;
+	}
+}
+
+static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
+{
+	struct xe_engine *e;
+	struct ww_acquire_ctx ww;
+	int err;
+
+	err = xe_bo_lock(bo, &ww, vm->preempt.num_engines, true);
+	if (err)
+		return err;
+
+	list_for_each_entry(e, &vm->preempt.engines, compute.link)
+		if (e->compute.pfence) {
+			dma_resv_add_fence(bo->ttm.base.resv,
+					   e->compute.pfence,
+					   DMA_RESV_USAGE_BOOKKEEP);
+		}
+
+	xe_bo_unlock(bo, &ww);
+	return 0;
+}
+
+/**
+ * xe_vm_fence_all_extobjs() - Add a fence to vm's external objects' resv
+ * @vm: The vm.
+ * @fence: The fence to add.
+ * @usage: The resv usage for the fence.
+ *
+ * Loops over all of the vm's external object bindings and adds a @fence
+ * with the given @usage to all of the external object's reservation
+ * objects.
+ */
+void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
+			     enum dma_resv_usage usage)
+{
+	struct xe_vma *vma;
+
+	list_for_each_entry(vma, &vm->extobj.list, extobj.link)
+		dma_resv_add_fence(vma->bo->ttm.base.resv, fence, usage);
+}
+
+static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
+{
+	struct xe_engine *e;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
+		e->ops->resume(e);
+
+		dma_resv_add_fence(&vm->resv, e->compute.pfence,
+				   DMA_RESV_USAGE_BOOKKEEP);
+		xe_vm_fence_all_extobjs(vm, e->compute.pfence,
+					DMA_RESV_USAGE_BOOKKEEP);
+	}
+}
+
+int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e)
+{
+	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
+	struct ttm_validate_buffer *tv;
+	struct ww_acquire_ctx ww;
+	struct list_head objs;
+	struct dma_fence *pfence;
+	int err;
+	bool wait;
+
+	XE_BUG_ON(!xe_vm_in_compute_mode(vm));
+
+	down_write(&vm->lock);
+
+	err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, true, 1);
+	if (err)
+		goto out_unlock_outer;
+
+	pfence = xe_preempt_fence_create(e, e->compute.context,
+					 ++e->compute.seqno);
+	if (!pfence) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+
+	list_add(&e->compute.link, &vm->preempt.engines);
+	++vm->preempt.num_engines;
+	e->compute.pfence = pfence;
+
+	down_read(&vm->userptr.notifier_lock);
+
+	dma_resv_add_fence(&vm->resv, pfence,
+			   DMA_RESV_USAGE_BOOKKEEP);
+
+	xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP);
+
+	/*
+	 * Check to see if a preemption on VM is in flight or userptr
+	 * invalidation, if so trigger this preempt fence to sync state with
+	 * other preempt fences on the VM.
+	 */
+	wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
+	if (wait)
+		dma_fence_enable_sw_signaling(pfence);
+
+	up_read(&vm->userptr.notifier_lock);
+
+out_unlock:
+	xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
+out_unlock_outer:
+	up_write(&vm->lock);
+
+	return err;
+}
+
+/**
+ * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function checks for whether the VM has userptrs that need repinning,
+ * and provides a release-type barrier on the userptr.notifier_lock after
+ * checking.
+ *
+ * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
+ */
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
+{
+	lockdep_assert_held_read(&vm->userptr.notifier_lock);
+
+	return (list_empty(&vm->userptr.repin_list) &&
+		list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+/**
+ * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv
+ * objects of the vm's external buffer objects.
+ * @vm: The vm.
+ * @ww: Pointer to a struct ww_acquire_ctx locking context.
+ * @tv_onstack: Array size XE_ONSTACK_TV of storage for the struct
+ * ttm_validate_buffers used for locking.
+ * @tv: Pointer to a pointer that on output contains the actual storage used.
+ * @objs: List head for the buffer objects locked.
+ * @intr: Whether to lock interruptible.
+ * @num_shared: Number of dma-fence slots to reserve in the locked objects.
+ *
+ * Locks the vm dma-resv objects and all the dma-resv objects of the
+ * buffer objects on the vm external object list. The TTM utilities require
+ * a list of struct ttm_validate_buffers pointing to the actual buffer
+ * objects to lock. Storage for those struct ttm_validate_buffers should
+ * be provided in @tv_onstack, and is typically reserved on the stack
+ * of the caller. If the size of @tv_onstack isn't sufficient, then
+ * storage will be allocated internally using kvmalloc().
+ *
+ * The function performs deadlock handling internally, and after a
+ * successful return the ww locking transaction should be considered
+ * sealed.
+ *
+ * Return: 0 on success, Negative error code on error. In particular if
+ * @intr is set to true, -EINTR or -ERESTARTSYS may be returned. In case
+ * of error, any locking performed has been reverted.
+ */
+int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
+			struct ttm_validate_buffer *tv_onstack,
+			struct ttm_validate_buffer **tv,
+			struct list_head *objs,
+			bool intr,
+			unsigned int num_shared)
+{
+	struct ttm_validate_buffer *tv_vm, *tv_bo;
+	struct xe_vma *vma, *next;
+	LIST_HEAD(dups);
+	int err;
+
+	lockdep_assert_held(&vm->lock);
+
+	if (vm->extobj.entries < XE_ONSTACK_TV) {
+		tv_vm = tv_onstack;
+	} else {
+		tv_vm = kvmalloc_array(vm->extobj.entries + 1, sizeof(*tv_vm),
+				       GFP_KERNEL);
+		if (!tv_vm)
+			return -ENOMEM;
+	}
+	tv_bo = tv_vm + 1;
+
+	INIT_LIST_HEAD(objs);
+	list_for_each_entry(vma, &vm->extobj.list, extobj.link) {
+		tv_bo->num_shared = num_shared;
+		tv_bo->bo = &vma->bo->ttm;
+
+		list_add_tail(&tv_bo->head, objs);
+		tv_bo++;
+	}
+	tv_vm->num_shared = num_shared;
+	tv_vm->bo = xe_vm_ttm_bo(vm);
+	list_add_tail(&tv_vm->head, objs);
+	err = ttm_eu_reserve_buffers(ww, objs, intr, &dups);
+	if (err)
+		goto out_err;
+
+	spin_lock(&vm->notifier.list_lock);
+	list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list,
+				 notifier.rebind_link) {
+		xe_bo_assert_held(vma->bo);
+
+		list_del_init(&vma->notifier.rebind_link);
+		if (vma->gt_present && !vma->destroyed)
+			list_move_tail(&vma->rebind_link, &vm->rebind_list);
+	}
+	spin_unlock(&vm->notifier.list_lock);
+
+	*tv = tv_vm;
+	return 0;
+
+out_err:
+	if (tv_vm != tv_onstack)
+		kvfree(tv_vm);
+
+	return err;
+}
+
+/**
+ * xe_vm_unlock_dma_resv() - Unlock reservation objects locked by
+ * xe_vm_lock_dma_resv()
+ * @vm: The vm.
+ * @tv_onstack: The @tv_onstack array given to xe_vm_lock_dma_resv().
+ * @tv: The value of *@tv given by xe_vm_lock_dma_resv().
+ * @ww: The ww_acquire_context used for locking.
+ * @objs: The list returned from xe_vm_lock_dma_resv().
+ *
+ * Unlocks the reservation objects and frees any memory allocated by
+ * xe_vm_lock_dma_resv().
+ */
+void xe_vm_unlock_dma_resv(struct xe_vm *vm,
+			   struct ttm_validate_buffer *tv_onstack,
+			   struct ttm_validate_buffer *tv,
+			   struct ww_acquire_ctx *ww,
+			   struct list_head *objs)
+{
+	/*
+	 * Nothing should've been able to enter the list while we were locked,
+	 * since we've held the dma-resvs of all the vm's external objects,
+	 * and holding the dma_resv of an object is required for list
+	 * addition, and we shouldn't add ourselves.
+	 */
+	XE_WARN_ON(!list_empty(&vm->notifier.rebind_list));
+
+	ttm_eu_backoff_reservation(ww, objs);
+	if (tv && tv != tv_onstack)
+		kvfree(tv);
+}
+
+static void preempt_rebind_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
+	struct xe_vma *vma;
+	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
+	struct ttm_validate_buffer *tv;
+	struct ww_acquire_ctx ww;
+	struct list_head objs;
+	struct dma_fence *rebind_fence;
+	unsigned int fence_count = 0;
+	LIST_HEAD(preempt_fences);
+	int err;
+	long wait;
+	int __maybe_unused tries = 0;
+
+	XE_BUG_ON(!xe_vm_in_compute_mode(vm));
+	trace_xe_vm_rebind_worker_enter(vm);
+
+	if (xe_vm_is_closed(vm)) {
+		trace_xe_vm_rebind_worker_exit(vm);
+		return;
+	}
+
+	down_write(&vm->lock);
+
+retry:
+	if (vm->async_ops.error)
+		goto out_unlock_outer;
+
+	/*
+	 * Extreme corner where we exit a VM error state with a munmap style VM
+	 * unbind inflight which requires a rebind. In this case the rebind
+	 * needs to install some fences into the dma-resv slots. The worker to
+	 * do this queued, let that worker make progress by dropping vm->lock
+	 * and trying this again.
+	 */
+	if (vm->async_ops.munmap_rebind_inflight) {
+		up_write(&vm->lock);
+		flush_work(&vm->async_ops.work);
+		goto retry;
+	}
+
+	if (xe_vm_userptr_check_repin(vm)) {
+		err = xe_vm_userptr_pin(vm);
+		if (err)
+			goto out_unlock_outer;
+	}
+
+	err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs,
+				  false, vm->preempt.num_engines);
+	if (err)
+		goto out_unlock_outer;
+
+	/* Fresh preempt fences already installed. Everyting is running. */
+	if (!preempt_fences_waiting(vm))
+		goto out_unlock;
+
+	/*
+	 * This makes sure vm is completely suspended and also balances
+	 * xe_engine suspend- and resume; we resume *all* vm engines below.
+	 */
+	err = wait_for_existing_preempt_fences(vm);
+	if (err)
+		goto out_unlock;
+
+	err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
+	if (err)
+		goto out_unlock;
+
+	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
+		if (xe_vma_is_userptr(vma) || vma->destroyed)
+			continue;
+
+		err = xe_bo_validate(vma->bo, vm, false);
+		if (err)
+			goto out_unlock;
+	}
+
+	rebind_fence = xe_vm_rebind(vm, true);
+	if (IS_ERR(rebind_fence)) {
+		err = PTR_ERR(rebind_fence);
+		goto out_unlock;
+	}
+
+	if (rebind_fence) {
+		dma_fence_wait(rebind_fence, false);
+		dma_fence_put(rebind_fence);
+	}
+
+	/* Wait on munmap style VM unbinds */
+	wait = dma_resv_wait_timeout(&vm->resv,
+				     DMA_RESV_USAGE_KERNEL,
+				     false, MAX_SCHEDULE_TIMEOUT);
+	if (wait <= 0) {
+		err = -ETIME;
+		goto out_unlock;
+	}
+
+#define retry_required(__tries, __vm) \
+	(IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
+	(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
+	__xe_vm_userptr_needs_repin(__vm))
+
+	down_read(&vm->userptr.notifier_lock);
+	if (retry_required(tries, vm)) {
+		up_read(&vm->userptr.notifier_lock);
+		err = -EAGAIN;
+		goto out_unlock;
+	}
+
+#undef retry_required
+
+	/* Point of no return. */
+	arm_preempt_fences(vm, &preempt_fences);
+	resume_and_reinstall_preempt_fences(vm);
+	up_read(&vm->userptr.notifier_lock);
+
+out_unlock:
+	xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
+out_unlock_outer:
+	if (err == -EAGAIN) {
+		trace_xe_vm_rebind_worker_retry(vm);
+		goto retry;
+	}
+	up_write(&vm->lock);
+
+	free_preempt_fences(&preempt_fences);
+
+	XE_WARN_ON(err < 0);	/* TODO: Kill VM or put in error state */
+	trace_xe_vm_rebind_worker_exit(vm);
+}
+
+struct async_op_fence;
+static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
+			struct xe_engine *e, struct xe_sync_entry *syncs,
+			u32 num_syncs, struct async_op_fence *afence);
+
+static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
+				   const struct mmu_notifier_range *range,
+				   unsigned long cur_seq)
+{
+	struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
+	struct xe_vm *vm = vma->vm;
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	long err;
+
+	XE_BUG_ON(!xe_vma_is_userptr(vma));
+	trace_xe_vma_userptr_invalidate(vma);
+
+	if (!mmu_notifier_range_blockable(range))
+		return false;
+
+	down_write(&vm->userptr.notifier_lock);
+	mmu_interval_set_seq(mni, cur_seq);
+
+	/* No need to stop gpu access if the userptr is not yet bound. */
+	if (!vma->userptr.initial_bind) {
+		up_write(&vm->userptr.notifier_lock);
+		return true;
+	}
+
+	/*
+	 * Tell exec and rebind worker they need to repin and rebind this
+	 * userptr.
+	 */
+	if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->gt_present) {
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_move_tail(&vma->userptr.invalidate_link,
+			       &vm->userptr.invalidated);
+		spin_unlock(&vm->userptr.invalidated_lock);
+	}
+
+	up_write(&vm->userptr.notifier_lock);
+
+	/*
+	 * Preempt fences turn into schedule disables, pipeline these.
+	 * Note that even in fault mode, we need to wait for binds and
+	 * unbinds to complete, and those are attached as BOOKMARK fences
+	 * to the vm.
+	 */
+	dma_resv_iter_begin(&cursor, &vm->resv,
+			    DMA_RESV_USAGE_BOOKKEEP);
+	dma_resv_for_each_fence_unlocked(&cursor, fence)
+		dma_fence_enable_sw_signaling(fence);
+	dma_resv_iter_end(&cursor);
+
+	err = dma_resv_wait_timeout(&vm->resv,
+				    DMA_RESV_USAGE_BOOKKEEP,
+				    false, MAX_SCHEDULE_TIMEOUT);
+	XE_WARN_ON(err <= 0);
+
+	if (xe_vm_in_fault_mode(vm)) {
+		err = xe_vm_invalidate_vma(vma);
+		XE_WARN_ON(err);
+	}
+
+	trace_xe_vma_userptr_invalidate_complete(vma);
+
+	return true;
+}
+
+static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
+	.invalidate = vma_userptr_invalidate,
+};
+
+int xe_vm_userptr_pin(struct xe_vm *vm)
+{
+	struct xe_vma *vma, *next;
+	int err = 0;
+	LIST_HEAD(tmp_evict);
+
+	lockdep_assert_held_write(&vm->lock);
+
+	/* Collect invalidated userptrs */
+	spin_lock(&vm->userptr.invalidated_lock);
+	list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
+				 userptr.invalidate_link) {
+		list_del_init(&vma->userptr.invalidate_link);
+		list_move_tail(&vma->userptr_link, &vm->userptr.repin_list);
+	}
+	spin_unlock(&vm->userptr.invalidated_lock);
+
+	/* Pin and move to temporary list */
+	list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, userptr_link) {
+		err = xe_vma_userptr_pin_pages(vma);
+		if (err < 0)
+			goto out_err;
+
+		list_move_tail(&vma->userptr_link, &tmp_evict);
+	}
+
+	/* Take lock and move to rebind_list for rebinding. */
+	err = dma_resv_lock_interruptible(&vm->resv, NULL);
+	if (err)
+		goto out_err;
+
+	list_for_each_entry_safe(vma, next, &tmp_evict, userptr_link) {
+		list_del_init(&vma->userptr_link);
+		list_move_tail(&vma->rebind_link, &vm->rebind_list);
+	}
+
+	dma_resv_unlock(&vm->resv);
+
+	return 0;
+
+out_err:
+	list_splice_tail(&tmp_evict, &vm->userptr.repin_list);
+
+	return err;
+}
+
+/**
+ * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function does an advisory check for whether the VM has userptrs that
+ * need repinning.
+ *
+ * Return: 0 if there are no indications of userptrs needing repinning,
+ * -EAGAIN if there are.
+ */
+int xe_vm_userptr_check_repin(struct xe_vm *vm)
+{
+	return (list_empty_careful(&vm->userptr.repin_list) &&
+		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+static struct dma_fence *
+xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
+	       struct xe_sync_entry *syncs, u32 num_syncs);
+
+struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
+{
+	struct dma_fence *fence = NULL;
+	struct xe_vma *vma, *next;
+
+	lockdep_assert_held(&vm->lock);
+	if (xe_vm_no_dma_fences(vm) && !rebind_worker)
+		return NULL;
+
+	xe_vm_assert_held(vm);
+	list_for_each_entry_safe(vma, next, &vm->rebind_list, rebind_link) {
+		XE_WARN_ON(!vma->gt_present);
+
+		list_del_init(&vma->rebind_link);
+		dma_fence_put(fence);
+		if (rebind_worker)
+			trace_xe_vma_rebind_worker(vma);
+		else
+			trace_xe_vma_rebind_exec(vma);
+		fence = xe_vm_bind_vma(vma, NULL, NULL, 0);
+		if (IS_ERR(fence))
+			return fence;
+	}
+
+	return fence;
+}
+
+static struct xe_vma *xe_vma_create(struct xe_vm *vm,
+				    struct xe_bo *bo,
+				    u64 bo_offset_or_userptr,
+				    u64 start, u64 end,
+				    bool read_only,
+				    u64 gt_mask)
+{
+	struct xe_vma *vma;
+	struct xe_gt *gt;
+	u8 id;
+
+	XE_BUG_ON(start >= end);
+	XE_BUG_ON(end >= vm->size);
+
+	vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	if (!vma) {
+		vma = ERR_PTR(-ENOMEM);
+		return vma;
+	}
+
+	INIT_LIST_HEAD(&vma->rebind_link);
+	INIT_LIST_HEAD(&vma->unbind_link);
+	INIT_LIST_HEAD(&vma->userptr_link);
+	INIT_LIST_HEAD(&vma->userptr.invalidate_link);
+	INIT_LIST_HEAD(&vma->notifier.rebind_link);
+	INIT_LIST_HEAD(&vma->extobj.link);
+
+	vma->vm = vm;
+	vma->start = start;
+	vma->end = end;
+	if (read_only)
+		vma->pte_flags = PTE_READ_ONLY;
+
+	if (gt_mask) {
+		vma->gt_mask = gt_mask;
+	} else {
+		for_each_gt(gt, vm->xe, id)
+			if (!xe_gt_is_media_type(gt))
+				vma->gt_mask |= 0x1 << id;
+	}
+
+	if (vm->xe->info.platform == XE_PVC)
+		vma->use_atomic_access_pte_bit = true;
+
+	if (bo) {
+		xe_bo_assert_held(bo);
+		vma->bo_offset = bo_offset_or_userptr;
+		vma->bo = xe_bo_get(bo);
+		list_add_tail(&vma->bo_link, &bo->vmas);
+	} else /* userptr */ {
+		u64 size = end - start + 1;
+		int err;
+
+		vma->userptr.ptr = bo_offset_or_userptr;
+
+		err = mmu_interval_notifier_insert(&vma->userptr.notifier,
+						   current->mm,
+						   vma->userptr.ptr, size,
+						   &vma_userptr_notifier_ops);
+		if (err) {
+			kfree(vma);
+			vma = ERR_PTR(err);
+			return vma;
+		}
+
+		vma->userptr.notifier_seq = LONG_MAX;
+		xe_vm_get(vm);
+	}
+
+	return vma;
+}
+
+static bool vm_remove_extobj(struct xe_vma *vma)
+{
+	if (!list_empty(&vma->extobj.link)) {
+		vma->vm->extobj.entries--;
+		list_del_init(&vma->extobj.link);
+		return true;
+	}
+	return false;
+}
+
+static void xe_vma_destroy_late(struct xe_vma *vma)
+{
+	struct xe_vm *vm = vma->vm;
+	struct xe_device *xe = vm->xe;
+	bool read_only = vma->pte_flags & PTE_READ_ONLY;
+
+	if (xe_vma_is_userptr(vma)) {
+		if (vma->userptr.sg) {
+			dma_unmap_sgtable(xe->drm.dev,
+					  vma->userptr.sg,
+					  read_only ? DMA_TO_DEVICE :
+					  DMA_BIDIRECTIONAL, 0);
+			sg_free_table(vma->userptr.sg);
+			vma->userptr.sg = NULL;
+		}
+
+		/*
+		 * Since userptr pages are not pinned, we can't remove
+		 * the notifer until we're sure the GPU is not accessing
+		 * them anymore
+		 */
+		mmu_interval_notifier_remove(&vma->userptr.notifier);
+		xe_vm_put(vm);
+	} else {
+		xe_bo_put(vma->bo);
+	}
+
+	kfree(vma);
+}
+
+static void vma_destroy_work_func(struct work_struct *w)
+{
+	struct xe_vma *vma =
+		container_of(w, struct xe_vma, destroy_work);
+
+	xe_vma_destroy_late(vma);
+}
+
+static struct xe_vma *
+bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm,
+			    struct xe_vma *ignore)
+{
+	struct xe_vma *vma;
+
+	list_for_each_entry(vma, &bo->vmas, bo_link) {
+		if (vma != ignore && vma->vm == vm && !vma->destroyed)
+			return vma;
+	}
+
+	return NULL;
+}
+
+static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm,
+				 struct xe_vma *ignore)
+{
+	struct ww_acquire_ctx ww;
+	bool ret;
+
+	xe_bo_lock(bo, &ww, 0, false);
+	ret = !!bo_has_vm_references_locked(bo, vm, ignore);
+	xe_bo_unlock(bo, &ww);
+
+	return ret;
+}
+
+static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
+{
+	list_add(&vma->extobj.link, &vm->extobj.list);
+	vm->extobj.entries++;
+}
+
+static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
+{
+	struct xe_bo *bo = vma->bo;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	if (bo_has_vm_references(bo, vm, vma))
+		return;
+
+	__vm_insert_extobj(vm, vma);
+}
+
+static void vma_destroy_cb(struct dma_fence *fence,
+			   struct dma_fence_cb *cb)
+{
+	struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
+
+	INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
+	queue_work(system_unbound_wq, &vma->destroy_work);
+}
+
+static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
+{
+	struct xe_vm *vm = vma->vm;
+
+	lockdep_assert_held_write(&vm->lock);
+	XE_BUG_ON(!list_empty(&vma->unbind_link));
+
+	if (xe_vma_is_userptr(vma)) {
+		XE_WARN_ON(!vma->destroyed);
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_del_init(&vma->userptr.invalidate_link);
+		spin_unlock(&vm->userptr.invalidated_lock);
+		list_del(&vma->userptr_link);
+	} else {
+		xe_bo_assert_held(vma->bo);
+		list_del(&vma->bo_link);
+
+		spin_lock(&vm->notifier.list_lock);
+		list_del(&vma->notifier.rebind_link);
+		spin_unlock(&vm->notifier.list_lock);
+
+		if (!vma->bo->vm && vm_remove_extobj(vma)) {
+			struct xe_vma *other;
+
+			other = bo_has_vm_references_locked(vma->bo, vm, NULL);
+
+			if (other)
+				__vm_insert_extobj(vm, other);
+		}
+	}
+
+	xe_vm_assert_held(vm);
+	if (!list_empty(&vma->rebind_link))
+		list_del(&vma->rebind_link);
+
+	if (fence) {
+		int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
+						 vma_destroy_cb);
+
+		if (ret) {
+			XE_WARN_ON(ret != -ENOENT);
+			xe_vma_destroy_late(vma);
+		}
+	} else {
+		xe_vma_destroy_late(vma);
+	}
+}
+
+static void xe_vma_destroy_unlocked(struct xe_vma *vma)
+{
+	struct ttm_validate_buffer tv[2];
+	struct ww_acquire_ctx ww;
+	struct xe_bo *bo = vma->bo;
+	LIST_HEAD(objs);
+	LIST_HEAD(dups);
+	int err;
+
+	memset(tv, 0, sizeof(tv));
+	tv[0].bo = xe_vm_ttm_bo(vma->vm);
+	list_add(&tv[0].head, &objs);
+
+	if (bo) {
+		tv[1].bo = &xe_bo_get(bo)->ttm;
+		list_add(&tv[1].head, &objs);
+	}
+	err = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
+	XE_WARN_ON(err);
+
+	xe_vma_destroy(vma, NULL);
+
+	ttm_eu_backoff_reservation(&ww, &objs);
+	if (bo)
+		xe_bo_put(bo);
+}
+
+static struct xe_vma *to_xe_vma(const struct rb_node *node)
+{
+	BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0);
+	return (struct xe_vma *)node;
+}
+
+static int xe_vma_cmp(const struct xe_vma *a, const struct xe_vma *b)
+{
+	if (a->end < b->start) {
+		return -1;
+	} else if (b->end < a->start) {
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+static bool xe_vma_less_cb(struct rb_node *a, const struct rb_node *b)
+{
+	return xe_vma_cmp(to_xe_vma(a), to_xe_vma(b)) < 0;
+}
+
+int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node)
+{
+	struct xe_vma *cmp = to_xe_vma(node);
+	const struct xe_vma *own = key;
+
+	if (own->start > cmp->end)
+		return 1;
+
+	if (own->end < cmp->start)
+		return -1;
+
+	return 0;
+}
+
+struct xe_vma *
+xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma)
+{
+	struct rb_node *node;
+
+	if (xe_vm_is_closed(vm))
+		return NULL;
+
+	XE_BUG_ON(vma->end >= vm->size);
+	lockdep_assert_held(&vm->lock);
+
+	node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb);
+
+	return node ? to_xe_vma(node) : NULL;
+}
+
+static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+	XE_BUG_ON(vma->vm != vm);
+	lockdep_assert_held(&vm->lock);
+
+	rb_add(&vma->vm_node, &vm->vmas, xe_vma_less_cb);
+}
+
+static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+	XE_BUG_ON(vma->vm != vm);
+	lockdep_assert_held(&vm->lock);
+
+	rb_erase(&vma->vm_node, &vm->vmas);
+	if (vm->usm.last_fault_vma == vma)
+		vm->usm.last_fault_vma = NULL;
+}
+
+static void async_op_work_func(struct work_struct *w);
+static void vm_destroy_work_func(struct work_struct *w);
+
+struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
+{
+	struct xe_vm *vm;
+	int err, i = 0, number_gts = 0;
+	struct xe_gt *gt;
+	u8 id;
+
+	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+	if (!vm)
+		return ERR_PTR(-ENOMEM);
+
+	vm->xe = xe;
+	kref_init(&vm->refcount);
+	dma_resv_init(&vm->resv);
+
+	vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1);
+
+	vm->vmas = RB_ROOT;
+	vm->flags = flags;
+
+	init_rwsem(&vm->lock);
+
+	INIT_LIST_HEAD(&vm->rebind_list);
+
+	INIT_LIST_HEAD(&vm->userptr.repin_list);
+	INIT_LIST_HEAD(&vm->userptr.invalidated);
+	init_rwsem(&vm->userptr.notifier_lock);
+	spin_lock_init(&vm->userptr.invalidated_lock);
+
+	INIT_LIST_HEAD(&vm->notifier.rebind_list);
+	spin_lock_init(&vm->notifier.list_lock);
+
+	INIT_LIST_HEAD(&vm->async_ops.pending);
+	INIT_WORK(&vm->async_ops.work, async_op_work_func);
+	spin_lock_init(&vm->async_ops.lock);
+
+	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
+
+	INIT_LIST_HEAD(&vm->preempt.engines);
+	vm->preempt.min_run_period_ms = 10;	/* FIXME: Wire up to uAPI */
+
+	INIT_LIST_HEAD(&vm->extobj.list);
+
+	if (!(flags & XE_VM_FLAG_MIGRATION)) {
+		/* We need to immeditatelly exit from any D3 state */
+		xe_pm_runtime_get(xe);
+		xe_device_mem_access_get(xe);
+	}
+
+	err = dma_resv_lock_interruptible(&vm->resv, NULL);
+	if (err)
+		goto err_put;
+
+	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+		vm->flags |= XE_VM_FLAGS_64K;
+
+	for_each_gt(gt, xe, id) {
+		if (xe_gt_is_media_type(gt))
+			continue;
+
+		if (flags & XE_VM_FLAG_MIGRATION &&
+		    gt->info.id != XE_VM_FLAG_GT_ID(flags))
+			continue;
+
+		vm->pt_root[id] = xe_pt_create(vm, gt, xe->info.vm_max_level);
+		if (IS_ERR(vm->pt_root[id])) {
+			err = PTR_ERR(vm->pt_root[id]);
+			vm->pt_root[id] = NULL;
+			goto err_destroy_root;
+		}
+	}
+
+	if (flags & XE_VM_FLAG_SCRATCH_PAGE) {
+		for_each_gt(gt, xe, id) {
+			if (!vm->pt_root[id])
+				continue;
+
+			err = xe_pt_create_scratch(xe, gt, vm);
+			if (err)
+				goto err_scratch_pt;
+		}
+	}
+
+	if (flags & DRM_XE_VM_CREATE_COMPUTE_MODE) {
+		INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
+		vm->flags |= XE_VM_FLAG_COMPUTE_MODE;
+	}
+
+	if (flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) {
+		vm->async_ops.fence.context = dma_fence_context_alloc(1);
+		vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
+	}
+
+	/* Fill pt_root after allocating scratch tables */
+	for_each_gt(gt, xe, id) {
+		if (!vm->pt_root[id])
+			continue;
+
+		xe_pt_populate_empty(gt, vm, vm->pt_root[id]);
+	}
+	dma_resv_unlock(&vm->resv);
+
+	/* Kernel migration VM shouldn't have a circular loop.. */
+	if (!(flags & XE_VM_FLAG_MIGRATION)) {
+		for_each_gt(gt, xe, id) {
+			struct xe_vm *migrate_vm;
+			struct xe_engine *eng;
+
+			if (!vm->pt_root[id])
+				continue;
+
+			migrate_vm = xe_migrate_get_vm(gt->migrate);
+			eng = xe_engine_create_class(xe, gt, migrate_vm,
+						     XE_ENGINE_CLASS_COPY,
+						     ENGINE_FLAG_VM);
+			xe_vm_put(migrate_vm);
+			if (IS_ERR(eng)) {
+				xe_vm_close_and_put(vm);
+				return ERR_CAST(eng);
+			}
+			vm->eng[id] = eng;
+			number_gts++;
+		}
+	}
+
+	if (number_gts > 1)
+		vm->composite_fence_ctx = dma_fence_context_alloc(1);
+
+	mutex_lock(&xe->usm.lock);
+	if (flags & XE_VM_FLAG_FAULT_MODE)
+		xe->usm.num_vm_in_fault_mode++;
+	else if (!(flags & XE_VM_FLAG_MIGRATION))
+		xe->usm.num_vm_in_non_fault_mode++;
+	mutex_unlock(&xe->usm.lock);
+
+	trace_xe_vm_create(vm);
+
+	return vm;
+
+err_scratch_pt:
+	for_each_gt(gt, xe, id) {
+		if (!vm->pt_root[id])
+			continue;
+
+		i = vm->pt_root[id]->level;
+		while (i)
+			if (vm->scratch_pt[id][--i])
+				xe_pt_destroy(vm->scratch_pt[id][i],
+					      vm->flags, NULL);
+		xe_bo_unpin(vm->scratch_bo[id]);
+		xe_bo_put(vm->scratch_bo[id]);
+	}
+err_destroy_root:
+	for_each_gt(gt, xe, id) {
+		if (vm->pt_root[id])
+			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
+	}
+	dma_resv_unlock(&vm->resv);
+err_put:
+	dma_resv_fini(&vm->resv);
+	kfree(vm);
+	if (!(flags & XE_VM_FLAG_MIGRATION)) {
+		xe_device_mem_access_put(xe);
+		xe_pm_runtime_put(xe);
+	}
+	return ERR_PTR(err);
+}
+
+static void flush_async_ops(struct xe_vm *vm)
+{
+	queue_work(system_unbound_wq, &vm->async_ops.work);
+	flush_work(&vm->async_ops.work);
+}
+
+static void vm_error_capture(struct xe_vm *vm, int err,
+			     u32 op, u64 addr, u64 size)
+{
+	struct drm_xe_vm_bind_op_error_capture capture;
+	u64 __user *address =
+		u64_to_user_ptr(vm->async_ops.error_capture.addr);
+	bool in_kthread = !current->mm;
+
+	capture.error = err;
+	capture.op = op;
+	capture.addr = addr;
+	capture.size = size;
+
+	if (in_kthread) {
+		if (!mmget_not_zero(vm->async_ops.error_capture.mm))
+			goto mm_closed;
+		kthread_use_mm(vm->async_ops.error_capture.mm);
+	}
+
+	if (copy_to_user(address, &capture, sizeof(capture)))
+		XE_WARN_ON("Copy to user failed");
+
+	if (in_kthread) {
+		kthread_unuse_mm(vm->async_ops.error_capture.mm);
+		mmput(vm->async_ops.error_capture.mm);
+	}
+
+mm_closed:
+	wake_up_all(&vm->async_ops.error_capture.wq);
+}
+
+void xe_vm_close_and_put(struct xe_vm *vm)
+{
+	struct rb_root contested = RB_ROOT;
+	struct ww_acquire_ctx ww;
+	struct xe_device *xe = vm->xe;
+	struct xe_gt *gt;
+	u8 id;
+
+	XE_BUG_ON(vm->preempt.num_engines);
+
+	vm->size = 0;
+	smp_mb();
+	flush_async_ops(vm);
+	if (xe_vm_in_compute_mode(vm))
+		flush_work(&vm->preempt.rebind_work);
+
+	for_each_gt(gt, xe, id) {
+		if (vm->eng[id]) {
+			xe_engine_kill(vm->eng[id]);
+			xe_engine_put(vm->eng[id]);
+			vm->eng[id] = NULL;
+		}
+	}
+
+	down_write(&vm->lock);
+	xe_vm_lock(vm, &ww, 0, false);
+	while (vm->vmas.rb_node) {
+		struct xe_vma *vma = to_xe_vma(vm->vmas.rb_node);
+
+		if (xe_vma_is_userptr(vma)) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->destroyed = true;
+			up_read(&vm->userptr.notifier_lock);
+		}
+
+		rb_erase(&vma->vm_node, &vm->vmas);
+
+		/* easy case, remove from VMA? */
+		if (xe_vma_is_userptr(vma) || vma->bo->vm) {
+			xe_vma_destroy(vma, NULL);
+			continue;
+		}
+
+		rb_add(&vma->vm_node, &contested, xe_vma_less_cb);
+	}
+
+	/*
+	 * All vm operations will add shared fences to resv.
+	 * The only exception is eviction for a shared object,
+	 * but even so, the unbind when evicted would still
+	 * install a fence to resv. Hence it's safe to
+	 * destroy the pagetables immediately.
+	 */
+	for_each_gt(gt, xe, id) {
+		if (vm->scratch_bo[id]) {
+			u32 i;
+
+			xe_bo_unpin(vm->scratch_bo[id]);
+			xe_bo_put(vm->scratch_bo[id]);
+			for (i = 0; i < vm->pt_root[id]->level; i++)
+				xe_pt_destroy(vm->scratch_pt[id][i], vm->flags,
+					      NULL);
+		}
+	}
+	xe_vm_unlock(vm, &ww);
+
+	if (contested.rb_node) {
+
+		/*
+		 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
+		 * Since we hold a refcount to the bo, we can remove and free
+		 * the members safely without locking.
+		 */
+		while (contested.rb_node) {
+			struct xe_vma *vma = to_xe_vma(contested.rb_node);
+
+			rb_erase(&vma->vm_node, &contested);
+			xe_vma_destroy_unlocked(vma);
+		}
+	}
+
+	if (vm->async_ops.error_capture.addr)
+		wake_up_all(&vm->async_ops.error_capture.wq);
+
+	XE_WARN_ON(!list_empty(&vm->extobj.list));
+	up_write(&vm->lock);
+
+	xe_vm_put(vm);
+}
+
+static void vm_destroy_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm =
+		container_of(w, struct xe_vm, destroy_work);
+	struct ww_acquire_ctx ww;
+	struct xe_device *xe = vm->xe;
+	struct xe_gt *gt;
+	u8 id;
+	void *lookup;
+
+	/* xe_vm_close_and_put was not called? */
+	XE_WARN_ON(vm->size);
+
+	if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
+		xe_device_mem_access_put(xe);
+		xe_pm_runtime_put(xe);
+
+		mutex_lock(&xe->usm.lock);
+		lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+		XE_WARN_ON(lookup != vm);
+		mutex_unlock(&xe->usm.lock);
+	}
+
+	/*
+	 * XXX: We delay destroying the PT root until the VM if freed as PT root
+	 * is needed for xe_vm_lock to work. If we remove that dependency this
+	 * can be moved to xe_vm_close_and_put.
+	 */
+	xe_vm_lock(vm, &ww, 0, false);
+	for_each_gt(gt, xe, id) {
+		if (vm->pt_root[id]) {
+			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
+			vm->pt_root[id] = NULL;
+		}
+	}
+	xe_vm_unlock(vm, &ww);
+
+	mutex_lock(&xe->usm.lock);
+	if (vm->flags & XE_VM_FLAG_FAULT_MODE)
+		xe->usm.num_vm_in_fault_mode--;
+	else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
+		xe->usm.num_vm_in_non_fault_mode--;
+	mutex_unlock(&xe->usm.lock);
+
+	trace_xe_vm_free(vm);
+	dma_fence_put(vm->rebind_fence);
+	dma_resv_fini(&vm->resv);
+	kfree(vm);
+
+}
+
+void xe_vm_free(struct kref *ref)
+{
+	struct xe_vm *vm = container_of(ref, struct xe_vm, refcount);
+
+	/* To destroy the VM we need to be able to sleep */
+	queue_work(system_unbound_wq, &vm->destroy_work);
+}
+
+struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
+{
+	struct xe_vm *vm;
+
+	mutex_lock(&xef->vm.lock);
+	vm = xa_load(&xef->vm.xa, id);
+	mutex_unlock(&xef->vm.lock);
+
+	if (vm)
+		xe_vm_get(vm);
+
+	return vm;
+}
+
+u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt)
+{
+	XE_BUG_ON(xe_gt_is_media_type(full_gt));
+
+	return gen8_pde_encode(vm->pt_root[full_gt->info.id]->bo, 0,
+			       XE_CACHE_WB);
+}
+
+static struct dma_fence *
+xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
+		 struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	struct xe_gt *gt;
+	struct dma_fence *fence = NULL;
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+	struct xe_vm *vm = vma->vm;
+	int cur_fence = 0, i;
+	int number_gts = hweight_long(vma->gt_present);
+	int err;
+	u8 id;
+
+	trace_xe_vma_unbind(vma);
+
+	if (number_gts > 1) {
+		fences = kmalloc_array(number_gts, sizeof(*fences),
+				       GFP_KERNEL);
+		if (!fences)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	for_each_gt(gt, vm->xe, id) {
+		if (!(vma->gt_present & BIT(id)))
+			goto next;
+
+		XE_BUG_ON(xe_gt_is_media_type(gt));
+
+		fence = __xe_pt_unbind_vma(gt, vma, e, syncs, num_syncs);
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			goto err_fences;
+		}
+
+		if (fences)
+			fences[cur_fence++] = fence;
+
+next:
+		if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list))
+			e = list_next_entry(e, multi_gt_list);
+	}
+
+	if (fences) {
+		cf = dma_fence_array_create(number_gts, fences,
+					    vm->composite_fence_ctx,
+					    vm->composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--vm->composite_fence_seqno;
+			err = -ENOMEM;
+			goto err_fences;
+		}
+	}
+
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence);
+
+	return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence;
+
+err_fences:
+	if (fences) {
+		while (cur_fence) {
+			/* FIXME: Rewind the previous binds? */
+			dma_fence_put(fences[--cur_fence]);
+		}
+		kfree(fences);
+	}
+
+	return ERR_PTR(err);
+}
+
+static struct dma_fence *
+xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
+	       struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	struct xe_gt *gt;
+	struct dma_fence *fence;
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+	struct xe_vm *vm = vma->vm;
+	int cur_fence = 0, i;
+	int number_gts = hweight_long(vma->gt_mask);
+	int err;
+	u8 id;
+
+	trace_xe_vma_bind(vma);
+
+	if (number_gts > 1) {
+		fences = kmalloc_array(number_gts, sizeof(*fences),
+				       GFP_KERNEL);
+		if (!fences)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	for_each_gt(gt, vm->xe, id) {
+		if (!(vma->gt_mask & BIT(id)))
+			goto next;
+
+		XE_BUG_ON(xe_gt_is_media_type(gt));
+		fence = __xe_pt_bind_vma(gt, vma, e, syncs, num_syncs,
+					 vma->gt_present & BIT(id));
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			goto err_fences;
+		}
+
+		if (fences)
+			fences[cur_fence++] = fence;
+
+next:
+		if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list))
+			e = list_next_entry(e, multi_gt_list);
+	}
+
+	if (fences) {
+		cf = dma_fence_array_create(number_gts, fences,
+					    vm->composite_fence_ctx,
+					    vm->composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--vm->composite_fence_seqno;
+			err = -ENOMEM;
+			goto err_fences;
+		}
+	}
+
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence);
+
+	return cf ? &cf->base : fence;
+
+err_fences:
+	if (fences) {
+		while (cur_fence) {
+			/* FIXME: Rewind the previous binds? */
+			dma_fence_put(fences[--cur_fence]);
+		}
+		kfree(fences);
+	}
+
+	return ERR_PTR(err);
+}
+
+struct async_op_fence {
+	struct dma_fence fence;
+	struct dma_fence_cb cb;
+	struct xe_vm *vm;
+	wait_queue_head_t wq;
+	bool started;
+};
+
+static const char *async_op_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+	return "xe";
+}
+
+static const char *
+async_op_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+	return "async_op_fence";
+}
+
+static const struct dma_fence_ops async_op_fence_ops = {
+	.get_driver_name = async_op_fence_get_driver_name,
+	.get_timeline_name = async_op_fence_get_timeline_name,
+};
+
+static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct async_op_fence *afence =
+		container_of(cb, struct async_op_fence, cb);
+
+	dma_fence_signal(&afence->fence);
+	xe_vm_put(afence->vm);
+	dma_fence_put(&afence->fence);
+}
+
+static void add_async_op_fence_cb(struct xe_vm *vm,
+				  struct dma_fence *fence,
+				  struct async_op_fence *afence)
+{
+	int ret;
+
+	if (!xe_vm_no_dma_fences(vm)) {
+		afence->started = true;
+		smp_wmb();
+		wake_up_all(&afence->wq);
+	}
+
+	afence->vm = xe_vm_get(vm);
+	dma_fence_get(&afence->fence);
+	ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb);
+	if (ret == -ENOENT)
+		dma_fence_signal(&afence->fence);
+	if (ret) {
+		xe_vm_put(vm);
+		dma_fence_put(&afence->fence);
+	}
+	XE_WARN_ON(ret && ret != -ENOENT);
+}
+
+int xe_vm_async_fence_wait_start(struct dma_fence *fence)
+{
+	if (fence->ops == &async_op_fence_ops) {
+		struct async_op_fence *afence =
+			container_of(fence, struct async_op_fence, fence);
+
+		XE_BUG_ON(xe_vm_no_dma_fences(afence->vm));
+
+		smp_rmb();
+		return wait_event_interruptible(afence->wq, afence->started);
+	}
+
+	return 0;
+}
+
+static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
+			struct xe_engine *e, struct xe_sync_entry *syncs,
+			u32 num_syncs, struct async_op_fence *afence)
+{
+	struct dma_fence *fence;
+
+	xe_vm_assert_held(vm);
+
+	fence = xe_vm_bind_vma(vma, e, syncs, num_syncs);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+	if (afence)
+		add_async_op_fence_cb(vm, fence, afence);
+
+	dma_fence_put(fence);
+	return 0;
+}
+
+static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e,
+		      struct xe_bo *bo, struct xe_sync_entry *syncs,
+		      u32 num_syncs, struct async_op_fence *afence)
+{
+	int err;
+
+	xe_vm_assert_held(vm);
+	xe_bo_assert_held(bo);
+
+	if (bo) {
+		err = xe_bo_validate(bo, vm, true);
+		if (err)
+			return err;
+	}
+
+	return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence);
+}
+
+static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
+			struct xe_engine *e, struct xe_sync_entry *syncs,
+			u32 num_syncs, struct async_op_fence *afence)
+{
+	struct dma_fence *fence;
+
+	xe_vm_assert_held(vm);
+	xe_bo_assert_held(vma->bo);
+
+	fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+	if (afence)
+		add_async_op_fence_cb(vm, fence, afence);
+
+	xe_vma_destroy(vma, fence);
+	dma_fence_put(fence);
+
+	return 0;
+}
+
+static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm,
+					u64 value)
+{
+	if (XE_IOCTL_ERR(xe, !value))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
+		return -ENOTSUPP;
+
+	if (XE_IOCTL_ERR(xe, vm->async_ops.error_capture.addr))
+		return -ENOTSUPP;
+
+	vm->async_ops.error_capture.mm = current->mm;
+	vm->async_ops.error_capture.addr = value;
+	init_waitqueue_head(&vm->async_ops.error_capture.wq);
+
+	return 0;
+}
+
+typedef int (*xe_vm_set_property_fn)(struct xe_device *xe, struct xe_vm *vm,
+				     u64 value);
+
+static const xe_vm_set_property_fn vm_set_property_funcs[] = {
+	[XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS] =
+		vm_set_error_capture_address,
+};
+
+static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm,
+				    u64 extension)
+{
+	u64 __user *address = u64_to_user_ptr(extension);
+	struct drm_xe_ext_vm_set_property ext;
+	int err;
+
+	err = __copy_from_user(&ext, address, sizeof(ext));
+	if (XE_IOCTL_ERR(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_ERR(xe, ext.property >=
+			 ARRAY_SIZE(vm_set_property_funcs)))
+		return -EINVAL;
+
+	return vm_set_property_funcs[ext.property](xe, vm, ext.value);
+}
+
+typedef int (*xe_vm_user_extension_fn)(struct xe_device *xe, struct xe_vm *vm,
+				       u64 extension);
+
+static const xe_vm_set_property_fn vm_user_extension_funcs[] = {
+	[XE_VM_EXTENSION_SET_PROPERTY] = vm_user_ext_set_property,
+};
+
+#define MAX_USER_EXTENSIONS	16
+static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm,
+			      u64 extensions, int ext_number)
+{
+	u64 __user *address = u64_to_user_ptr(extensions);
+	struct xe_user_extension ext;
+	int err;
+
+	if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS))
+		return -E2BIG;
+
+	err = __copy_from_user(&ext, address, sizeof(ext));
+	if (XE_IOCTL_ERR(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_ERR(xe, ext.name >=
+			 ARRAY_SIZE(vm_user_extension_funcs)))
+		return -EINVAL;
+
+	err = vm_user_extension_funcs[ext.name](xe, vm, extensions);
+	if (XE_IOCTL_ERR(xe, err))
+		return err;
+
+	if (ext.next_extension)
+		return vm_user_extensions(xe, vm, ext.next_extension,
+					  ++ext_number);
+
+	return 0;
+}
+
+#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \
+				    DRM_XE_VM_CREATE_COMPUTE_MODE | \
+				    DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \
+				    DRM_XE_VM_CREATE_FAULT_MODE)
+
+int xe_vm_create_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_create *args = data;
+	struct xe_vm *vm;
+	u32 id, asid;
+	int err;
+	u32 flags = 0;
+
+	if (XE_IOCTL_ERR(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE &&
+			 args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE &&
+			 args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
+			 xe_device_in_non_fault_mode(xe)))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) &&
+			 xe_device_in_fault_mode(xe)))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
+			 !xe->info.supports_usm))
+		return -EINVAL;
+
+	if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE)
+		flags |= XE_VM_FLAG_SCRATCH_PAGE;
+	if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE)
+		flags |= XE_VM_FLAG_COMPUTE_MODE;
+	if (args->flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS)
+		flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
+	if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE)
+		flags |= XE_VM_FLAG_FAULT_MODE;
+
+	vm = xe_vm_create(xe, flags);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+
+	if (args->extensions) {
+		err = vm_user_extensions(xe, vm, args->extensions, 0);
+		if (XE_IOCTL_ERR(xe, err)) {
+			xe_vm_close_and_put(vm);
+			return err;
+		}
+	}
+
+	mutex_lock(&xef->vm.lock);
+	err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
+	mutex_unlock(&xef->vm.lock);
+	if (err) {
+		xe_vm_close_and_put(vm);
+		return err;
+	}
+
+	mutex_lock(&xe->usm.lock);
+	err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
+			      XA_LIMIT(0, XE_MAX_ASID - 1),
+			      &xe->usm.next_asid, GFP_KERNEL);
+	mutex_unlock(&xe->usm.lock);
+	if (err) {
+		xe_vm_close_and_put(vm);
+		return err;
+	}
+	vm->usm.asid = asid;
+
+	args->vm_id = id;
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
+	/* Warning: Security issue - never enable by default */
+	args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, GEN8_PAGE_SIZE);
+#endif
+
+	return 0;
+}
+
+int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_destroy *args = data;
+	struct xe_vm *vm;
+
+	if (XE_IOCTL_ERR(xe, args->pad))
+		return -EINVAL;
+
+	vm = xe_vm_lookup(xef, args->vm_id);
+	if (XE_IOCTL_ERR(xe, !vm))
+		return -ENOENT;
+	xe_vm_put(vm);
+
+	/* FIXME: Extend this check to non-compute mode VMs */
+	if (XE_IOCTL_ERR(xe, vm->preempt.num_engines))
+		return -EBUSY;
+
+	mutex_lock(&xef->vm.lock);
+	xa_erase(&xef->vm.xa, args->vm_id);
+	mutex_unlock(&xef->vm.lock);
+
+	xe_vm_close_and_put(vm);
+
+	return 0;
+}
+
+static const u32 region_to_mem_type[] = {
+	XE_PL_TT,
+	XE_PL_VRAM0,
+	XE_PL_VRAM1,
+};
+
+static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
+			  struct xe_engine *e, u32 region,
+			  struct xe_sync_entry *syncs, u32 num_syncs,
+			  struct async_op_fence *afence)
+{
+	int err;
+
+	XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type));
+
+	if (!xe_vma_is_userptr(vma)) {
+		err = xe_bo_migrate(vma->bo, region_to_mem_type[region]);
+		if (err)
+			return err;
+	}
+
+	if (vma->gt_mask != (vma->gt_present & ~vma->usm.gt_invalidated)) {
+		return xe_vm_bind(vm, vma, e, vma->bo, syncs, num_syncs,
+				  afence);
+	} else {
+		int i;
+
+		/* Nothing to do, signal fences now */
+		for (i = 0; i < num_syncs; i++)
+			xe_sync_entry_signal(&syncs[i], NULL,
+					     dma_fence_get_stub());
+		if (afence)
+			dma_fence_signal(&afence->fence);
+		return 0;
+	}
+}
+
+#define VM_BIND_OP(op)	(op & 0xffff)
+
+static int __vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
+			   struct xe_engine *e, struct xe_bo *bo, u32 op,
+			   u32 region, struct xe_sync_entry *syncs,
+			   u32 num_syncs, struct async_op_fence *afence)
+{
+	switch (VM_BIND_OP(op)) {
+	case XE_VM_BIND_OP_MAP:
+		return xe_vm_bind(vm, vma, e, bo, syncs, num_syncs, afence);
+	case XE_VM_BIND_OP_UNMAP:
+	case XE_VM_BIND_OP_UNMAP_ALL:
+		return xe_vm_unbind(vm, vma, e, syncs, num_syncs, afence);
+	case XE_VM_BIND_OP_MAP_USERPTR:
+		return xe_vm_bind(vm, vma, e, NULL, syncs, num_syncs, afence);
+	case XE_VM_BIND_OP_PREFETCH:
+		return xe_vm_prefetch(vm, vma, e, region, syncs, num_syncs,
+				      afence);
+		break;
+	default:
+		XE_BUG_ON("NOT POSSIBLE");
+		return -EINVAL;
+	}
+}
+
+struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm)
+{
+	int idx = vm->flags & XE_VM_FLAG_MIGRATION ?
+		XE_VM_FLAG_GT_ID(vm->flags) : 0;
+
+	/* Safe to use index 0 as all BO in the VM share a single dma-resv lock */
+	return &vm->pt_root[idx]->bo->ttm;
+}
+
+static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv)
+{
+	tv->num_shared = 1;
+	tv->bo = xe_vm_ttm_bo(vm);
+}
+
+static bool is_map_op(u32 op)
+{
+	return VM_BIND_OP(op) == XE_VM_BIND_OP_MAP ||
+		VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR;
+}
+
+static bool is_unmap_op(u32 op)
+{
+	return VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP ||
+		VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL;
+}
+
+static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
+			 struct xe_engine *e, struct xe_bo *bo,
+			 struct drm_xe_vm_bind_op *bind_op,
+			 struct xe_sync_entry *syncs, u32 num_syncs,
+			 struct async_op_fence *afence)
+{
+	LIST_HEAD(objs);
+	LIST_HEAD(dups);
+	struct ttm_validate_buffer tv_bo, tv_vm;
+	struct ww_acquire_ctx ww;
+	struct xe_bo *vbo;
+	int err, i;
+
+	lockdep_assert_held(&vm->lock);
+	XE_BUG_ON(!list_empty(&vma->unbind_link));
+
+	/* Binds deferred to faults, signal fences now */
+	if (xe_vm_in_fault_mode(vm) && is_map_op(bind_op->op) &&
+	    !(bind_op->op & XE_VM_BIND_FLAG_IMMEDIATE)) {
+		for (i = 0; i < num_syncs; i++)
+			xe_sync_entry_signal(&syncs[i], NULL,
+					     dma_fence_get_stub());
+		if (afence)
+			dma_fence_signal(&afence->fence);
+		return 0;
+	}
+
+	xe_vm_tv_populate(vm, &tv_vm);
+	list_add_tail(&tv_vm.head, &objs);
+	vbo = vma->bo;
+	if (vbo) {
+		/*
+		 * An unbind can drop the last reference to the BO and
+		 * the BO is needed for ttm_eu_backoff_reservation so
+		 * take a reference here.
+		 */
+		xe_bo_get(vbo);
+
+		tv_bo.bo = &vbo->ttm;
+		tv_bo.num_shared = 1;
+		list_add(&tv_bo.head, &objs);
+	}
+
+again:
+	err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups);
+	if (!err) {
+		err = __vm_bind_ioctl(vm, vma, e, bo,
+				      bind_op->op, bind_op->region, syncs,
+				      num_syncs, afence);
+		ttm_eu_backoff_reservation(&ww, &objs);
+		if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
+			lockdep_assert_held_write(&vm->lock);
+			err = xe_vma_userptr_pin_pages(vma);
+			if (!err)
+				goto again;
+		}
+	}
+	xe_bo_put(vbo);
+
+	return err;
+}
+
+struct async_op {
+	struct xe_vma *vma;
+	struct xe_engine *engine;
+	struct xe_bo *bo;
+	struct drm_xe_vm_bind_op bind_op;
+	struct xe_sync_entry *syncs;
+	u32 num_syncs;
+	struct list_head link;
+	struct async_op_fence *fence;
+};
+
+static void async_op_cleanup(struct xe_vm *vm, struct async_op *op)
+{
+	while (op->num_syncs--)
+		xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
+	kfree(op->syncs);
+	xe_bo_put(op->bo);
+	if (op->engine)
+		xe_engine_put(op->engine);
+	xe_vm_put(vm);
+	if (op->fence)
+		dma_fence_put(&op->fence->fence);
+	kfree(op);
+}
+
+static struct async_op *next_async_op(struct xe_vm *vm)
+{
+	return list_first_entry_or_null(&vm->async_ops.pending,
+					struct async_op, link);
+}
+
+static void vm_set_async_error(struct xe_vm *vm, int err)
+{
+	lockdep_assert_held(&vm->lock);
+	vm->async_ops.error = err;
+}
+
+static void async_op_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work);
+
+	for (;;) {
+		struct async_op *op;
+		int err;
+
+		if (vm->async_ops.error && !xe_vm_is_closed(vm))
+			break;
+
+		spin_lock_irq(&vm->async_ops.lock);
+		op = next_async_op(vm);
+		if (op)
+			list_del_init(&op->link);
+		spin_unlock_irq(&vm->async_ops.lock);
+
+		if (!op)
+			break;
+
+		if (!xe_vm_is_closed(vm)) {
+			bool first, last;
+
+			down_write(&vm->lock);
+again:
+			first = op->vma->first_munmap_rebind;
+			last = op->vma->last_munmap_rebind;
+#ifdef TEST_VM_ASYNC_OPS_ERROR
+#define FORCE_ASYNC_OP_ERROR	BIT(31)
+			if (!(op->bind_op.op & FORCE_ASYNC_OP_ERROR)) {
+				err = vm_bind_ioctl(vm, op->vma, op->engine,
+						    op->bo, &op->bind_op,
+						    op->syncs, op->num_syncs,
+						    op->fence);
+			} else {
+				err = -ENOMEM;
+				op->bind_op.op &= ~FORCE_ASYNC_OP_ERROR;
+			}
+#else
+			err = vm_bind_ioctl(vm, op->vma, op->engine, op->bo,
+					    &op->bind_op, op->syncs,
+					    op->num_syncs, op->fence);
+#endif
+			/*
+			 * In order for the fencing to work (stall behind
+			 * existing jobs / prevent new jobs from running) all
+			 * the dma-resv slots need to be programmed in a batch
+			 * relative to execs / the rebind worker. The vm->lock
+			 * ensure this.
+			 */
+			if (!err && ((first && VM_BIND_OP(op->bind_op.op) ==
+				      XE_VM_BIND_OP_UNMAP) ||
+				     vm->async_ops.munmap_rebind_inflight)) {
+				if (last) {
+					op->vma->last_munmap_rebind = false;
+					vm->async_ops.munmap_rebind_inflight =
+						false;
+				} else {
+					vm->async_ops.munmap_rebind_inflight =
+						true;
+
+					async_op_cleanup(vm, op);
+
+					spin_lock_irq(&vm->async_ops.lock);
+					op = next_async_op(vm);
+					XE_BUG_ON(!op);
+					list_del_init(&op->link);
+					spin_unlock_irq(&vm->async_ops.lock);
+
+					goto again;
+				}
+			}
+			if (err) {
+				trace_xe_vma_fail(op->vma);
+				drm_warn(&vm->xe->drm, "Async VM op(%d) failed with %d",
+					 VM_BIND_OP(op->bind_op.op),
+					 err);
+
+				spin_lock_irq(&vm->async_ops.lock);
+				list_add(&op->link, &vm->async_ops.pending);
+				spin_unlock_irq(&vm->async_ops.lock);
+
+				vm_set_async_error(vm, err);
+				up_write(&vm->lock);
+
+				if (vm->async_ops.error_capture.addr)
+					vm_error_capture(vm, err,
+							 op->bind_op.op,
+							 op->bind_op.addr,
+							 op->bind_op.range);
+				break;
+			}
+			up_write(&vm->lock);
+		} else {
+			trace_xe_vma_flush(op->vma);
+
+			if (is_unmap_op(op->bind_op.op)) {
+				down_write(&vm->lock);
+				xe_vma_destroy_unlocked(op->vma);
+				up_write(&vm->lock);
+			}
+
+			if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+						   &op->fence->fence.flags)) {
+				if (!xe_vm_no_dma_fences(vm)) {
+					op->fence->started = true;
+					smp_wmb();
+					wake_up_all(&op->fence->wq);
+				}
+				dma_fence_signal(&op->fence->fence);
+			}
+		}
+
+		async_op_cleanup(vm, op);
+	}
+}
+
+static int __vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
+				 struct xe_engine *e, struct xe_bo *bo,
+				 struct drm_xe_vm_bind_op *bind_op,
+				 struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	struct async_op *op;
+	bool installed = false;
+	u64 seqno;
+	int i;
+
+	lockdep_assert_held(&vm->lock);
+
+	op = kmalloc(sizeof(*op), GFP_KERNEL);
+	if (!op) {
+		return -ENOMEM;
+	}
+
+	if (num_syncs) {
+		op->fence = kmalloc(sizeof(*op->fence), GFP_KERNEL);
+		if (!op->fence) {
+			kfree(op);
+			return -ENOMEM;
+		}
+
+		seqno = e ? ++e->bind.fence_seqno : ++vm->async_ops.fence.seqno;
+		dma_fence_init(&op->fence->fence, &async_op_fence_ops,
+			       &vm->async_ops.lock, e ? e->bind.fence_ctx :
+			       vm->async_ops.fence.context, seqno);
+
+		if (!xe_vm_no_dma_fences(vm)) {
+			op->fence->vm = vm;
+			op->fence->started = false;
+			init_waitqueue_head(&op->fence->wq);
+		}
+	} else {
+		op->fence = NULL;
+	}
+	op->vma = vma;
+	op->engine = e;
+	op->bo = bo;
+	op->bind_op = *bind_op;
+	op->syncs = syncs;
+	op->num_syncs = num_syncs;
+	INIT_LIST_HEAD(&op->link);
+
+	for (i = 0; i < num_syncs; i++)
+		installed |= xe_sync_entry_signal(&syncs[i], NULL,
+						  &op->fence->fence);
+
+	if (!installed && op->fence)
+		dma_fence_signal(&op->fence->fence);
+
+	spin_lock_irq(&vm->async_ops.lock);
+	list_add_tail(&op->link, &vm->async_ops.pending);
+	spin_unlock_irq(&vm->async_ops.lock);
+
+	if (!vm->async_ops.error)
+		queue_work(system_unbound_wq, &vm->async_ops.work);
+
+	return 0;
+}
+
+static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
+			       struct xe_engine *e, struct xe_bo *bo,
+			       struct drm_xe_vm_bind_op *bind_op,
+			       struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	struct xe_vma *__vma, *next;
+	struct list_head rebind_list;
+	struct xe_sync_entry *in_syncs = NULL, *out_syncs = NULL;
+	u32 num_in_syncs = 0, num_out_syncs = 0;
+	bool first = true, last;
+	int err;
+	int i;
+
+	lockdep_assert_held(&vm->lock);
+
+	/* Not a linked list of unbinds + rebinds, easy */
+	if (list_empty(&vma->unbind_link))
+		return __vm_bind_ioctl_async(vm, vma, e, bo, bind_op,
+					     syncs, num_syncs);
+
+	/*
+	 * Linked list of unbinds + rebinds, decompose syncs into 'in / out'
+	 * passing the 'in' to the first operation and 'out' to the last. Also
+	 * the reference counting is a little tricky, increment the VM / bind
+	 * engine ref count on all but the last operation and increment the BOs
+	 * ref count on each rebind.
+	 */
+
+	XE_BUG_ON(VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP &&
+		  VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP_ALL &&
+		  VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_PREFETCH);
+
+	/* Decompose syncs */
+	if (num_syncs) {
+		in_syncs = kmalloc(sizeof(*in_syncs) * num_syncs, GFP_KERNEL);
+		out_syncs = kmalloc(sizeof(*out_syncs) * num_syncs, GFP_KERNEL);
+		if (!in_syncs || !out_syncs) {
+			err = -ENOMEM;
+			goto out_error;
+		}
+
+		for (i = 0; i < num_syncs; ++i) {
+			bool signal = syncs[i].flags & DRM_XE_SYNC_SIGNAL;
+
+			if (signal)
+				out_syncs[num_out_syncs++] = syncs[i];
+			else
+				in_syncs[num_in_syncs++] = syncs[i];
+		}
+	}
+
+	/* Do unbinds + move rebinds to new list */
+	INIT_LIST_HEAD(&rebind_list);
+	list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link) {
+		if (__vma->destroyed ||
+		    VM_BIND_OP(bind_op->op) == XE_VM_BIND_OP_PREFETCH) {
+			list_del_init(&__vma->unbind_link);
+			xe_bo_get(bo);
+			err = __vm_bind_ioctl_async(xe_vm_get(vm), __vma,
+						    e ? xe_engine_get(e) : NULL,
+						    bo, bind_op, first ?
+						    in_syncs : NULL,
+						    first ? num_in_syncs : 0);
+			if (err) {
+				xe_bo_put(bo);
+				xe_vm_put(vm);
+				if (e)
+					xe_engine_put(e);
+				goto out_error;
+			}
+			in_syncs = NULL;
+			first = false;
+		} else {
+			list_move_tail(&__vma->unbind_link, &rebind_list);
+		}
+	}
+	last = list_empty(&rebind_list);
+	if (!last) {
+		xe_vm_get(vm);
+		if (e)
+			xe_engine_get(e);
+	}
+	err = __vm_bind_ioctl_async(vm, vma, e,
+				    bo, bind_op,
+				    first ? in_syncs :
+				    last ? out_syncs : NULL,
+				    first ? num_in_syncs :
+				    last ? num_out_syncs : 0);
+	if (err) {
+		if (!last) {
+			xe_vm_put(vm);
+			if (e)
+				xe_engine_put(e);
+		}
+		goto out_error;
+	}
+	in_syncs = NULL;
+
+	/* Do rebinds */
+	list_for_each_entry_safe(__vma, next, &rebind_list, unbind_link) {
+		list_del_init(&__vma->unbind_link);
+		last = list_empty(&rebind_list);
+
+		if (xe_vma_is_userptr(__vma)) {
+			bind_op->op = XE_VM_BIND_FLAG_ASYNC |
+				XE_VM_BIND_OP_MAP_USERPTR;
+		} else {
+			bind_op->op = XE_VM_BIND_FLAG_ASYNC |
+				XE_VM_BIND_OP_MAP;
+			xe_bo_get(__vma->bo);
+		}
+
+		if (!last) {
+			xe_vm_get(vm);
+			if (e)
+				xe_engine_get(e);
+		}
+
+		err = __vm_bind_ioctl_async(vm, __vma, e,
+					    __vma->bo, bind_op, last ?
+					    out_syncs : NULL,
+					    last ? num_out_syncs : 0);
+		if (err) {
+			if (!last) {
+				xe_vm_put(vm);
+				if (e)
+					xe_engine_put(e);
+			}
+			goto out_error;
+		}
+	}
+
+	kfree(syncs);
+	return 0;
+
+out_error:
+	kfree(in_syncs);
+	kfree(out_syncs);
+	kfree(syncs);
+
+	return err;
+}
+
+static int __vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
+				      u64 addr, u64 range, u32 op)
+{
+	struct xe_device *xe = vm->xe;
+	struct xe_vma *vma, lookup;
+	bool async = !!(op & XE_VM_BIND_FLAG_ASYNC);
+
+	lockdep_assert_held(&vm->lock);
+
+	lookup.start = addr;
+	lookup.end = addr + range - 1;
+
+	switch (VM_BIND_OP(op)) {
+	case XE_VM_BIND_OP_MAP:
+	case XE_VM_BIND_OP_MAP_USERPTR:
+		vma = xe_vm_find_overlapping_vma(vm, &lookup);
+		if (XE_IOCTL_ERR(xe, vma))
+			return -EBUSY;
+		break;
+	case XE_VM_BIND_OP_UNMAP:
+	case XE_VM_BIND_OP_PREFETCH:
+		vma = xe_vm_find_overlapping_vma(vm, &lookup);
+		if (XE_IOCTL_ERR(xe, !vma) ||
+		    XE_IOCTL_ERR(xe, (vma->start != addr ||
+				 vma->end != addr + range - 1) && !async))
+			return -EINVAL;
+		break;
+	case XE_VM_BIND_OP_UNMAP_ALL:
+		break;
+	default:
+		XE_BUG_ON("NOT POSSIBLE");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma)
+{
+	down_read(&vm->userptr.notifier_lock);
+	vma->destroyed = true;
+	up_read(&vm->userptr.notifier_lock);
+	xe_vm_remove_vma(vm, vma);
+}
+
+static int prep_replacement_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+	int err;
+
+	if (vma->bo && !vma->bo->vm) {
+		vm_insert_extobj(vm, vma);
+		err = add_preempt_fences(vm, vma->bo);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/*
+ * Find all overlapping VMAs in lookup range and add to a list in the returned
+ * VMA, all of VMAs found will be unbound. Also possibly add 2 new VMAs that
+ * need to be bound if first / last VMAs are not fully unbound. This is akin to
+ * how munmap works.
+ */
+static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
+					    struct xe_vma *lookup)
+{
+	struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup);
+	struct rb_node *node;
+	struct xe_vma *first = vma, *last = vma, *new_first = NULL,
+		      *new_last = NULL, *__vma, *next;
+	int err = 0;
+	bool first_munmap_rebind = false;
+
+	lockdep_assert_held(&vm->lock);
+	XE_BUG_ON(!vma);
+
+	node = &vma->vm_node;
+	while ((node = rb_next(node))) {
+		if (!xe_vma_cmp_vma_cb(lookup, node)) {
+			__vma = to_xe_vma(node);
+			list_add_tail(&__vma->unbind_link, &vma->unbind_link);
+			last = __vma;
+		} else {
+			break;
+		}
+	}
+
+	node = &vma->vm_node;
+	while ((node = rb_prev(node))) {
+		if (!xe_vma_cmp_vma_cb(lookup, node)) {
+			__vma = to_xe_vma(node);
+			list_add(&__vma->unbind_link, &vma->unbind_link);
+			first = __vma;
+		} else {
+			break;
+		}
+	}
+
+	if (first->start != lookup->start) {
+		struct ww_acquire_ctx ww;
+
+		if (first->bo)
+			err = xe_bo_lock(first->bo, &ww, 0, true);
+		if (err)
+			goto unwind;
+		new_first = xe_vma_create(first->vm, first->bo,
+					  first->bo ? first->bo_offset :
+					  first->userptr.ptr,
+					  first->start,
+					  lookup->start - 1,
+					  (first->pte_flags & PTE_READ_ONLY),
+					  first->gt_mask);
+		if (first->bo)
+			xe_bo_unlock(first->bo, &ww);
+		if (!new_first) {
+			err = -ENOMEM;
+			goto unwind;
+		}
+		if (!first->bo) {
+			err = xe_vma_userptr_pin_pages(new_first);
+			if (err)
+				goto unwind;
+		}
+		err = prep_replacement_vma(vm, new_first);
+		if (err)
+			goto unwind;
+	}
+
+	if (last->end != lookup->end) {
+		struct ww_acquire_ctx ww;
+		u64 chunk = lookup->end + 1 - last->start;
+
+		if (last->bo)
+			err = xe_bo_lock(last->bo, &ww, 0, true);
+		if (err)
+			goto unwind;
+		new_last = xe_vma_create(last->vm, last->bo,
+					 last->bo ? last->bo_offset + chunk :
+					 last->userptr.ptr + chunk,
+					 last->start + chunk,
+					 last->end,
+					 (last->pte_flags & PTE_READ_ONLY),
+					 last->gt_mask);
+		if (last->bo)
+			xe_bo_unlock(last->bo, &ww);
+		if (!new_last) {
+			err = -ENOMEM;
+			goto unwind;
+		}
+		if (!last->bo) {
+			err = xe_vma_userptr_pin_pages(new_last);
+			if (err)
+				goto unwind;
+		}
+		err = prep_replacement_vma(vm, new_last);
+		if (err)
+			goto unwind;
+	}
+
+	prep_vma_destroy(vm, vma);
+	if (list_empty(&vma->unbind_link) && (new_first || new_last))
+		vma->first_munmap_rebind = true;
+	list_for_each_entry(__vma, &vma->unbind_link, unbind_link) {
+		if ((new_first || new_last) && !first_munmap_rebind) {
+			__vma->first_munmap_rebind = true;
+			first_munmap_rebind = true;
+		}
+		prep_vma_destroy(vm, __vma);
+	}
+	if (new_first) {
+		xe_vm_insert_vma(vm, new_first);
+		list_add_tail(&new_first->unbind_link, &vma->unbind_link);
+		if (!new_last)
+			new_first->last_munmap_rebind = true;
+	}
+	if (new_last) {
+		xe_vm_insert_vma(vm, new_last);
+		list_add_tail(&new_last->unbind_link, &vma->unbind_link);
+		new_last->last_munmap_rebind = true;
+	}
+
+	return vma;
+
+unwind:
+	list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link)
+		list_del_init(&__vma->unbind_link);
+	if (new_last) {
+		prep_vma_destroy(vm, new_last);
+		xe_vma_destroy_unlocked(new_last);
+	}
+	if (new_first) {
+		prep_vma_destroy(vm, new_first);
+		xe_vma_destroy_unlocked(new_first);
+	}
+
+	return ERR_PTR(err);
+}
+
+/*
+ * Similar to vm_unbind_lookup_vmas, find all VMAs in lookup range to prefetch
+ */
+static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm,
+					      struct xe_vma *lookup,
+					      u32 region)
+{
+	struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup), *__vma,
+		      *next;
+	struct rb_node *node;
+
+	if (!xe_vma_is_userptr(vma)) {
+		if (!xe_bo_can_migrate(vma->bo, region_to_mem_type[region]))
+			return ERR_PTR(-EINVAL);
+	}
+
+	node = &vma->vm_node;
+	while ((node = rb_next(node))) {
+		if (!xe_vma_cmp_vma_cb(lookup, node)) {
+			__vma = to_xe_vma(node);
+			if (!xe_vma_is_userptr(__vma)) {
+				if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region]))
+					goto flush_list;
+			}
+			list_add_tail(&__vma->unbind_link, &vma->unbind_link);
+		} else {
+			break;
+		}
+	}
+
+	node = &vma->vm_node;
+	while ((node = rb_prev(node))) {
+		if (!xe_vma_cmp_vma_cb(lookup, node)) {
+			__vma = to_xe_vma(node);
+			if (!xe_vma_is_userptr(__vma)) {
+				if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region]))
+					goto flush_list;
+			}
+			list_add(&__vma->unbind_link, &vma->unbind_link);
+		} else {
+			break;
+		}
+	}
+
+	return vma;
+
+flush_list:
+	list_for_each_entry_safe(__vma, next, &vma->unbind_link,
+				 unbind_link)
+		list_del_init(&__vma->unbind_link);
+
+	return ERR_PTR(-EINVAL);
+}
+
+static struct xe_vma *vm_unbind_all_lookup_vmas(struct xe_vm *vm,
+						struct xe_bo *bo)
+{
+	struct xe_vma *first = NULL, *vma;
+
+	lockdep_assert_held(&vm->lock);
+	xe_bo_assert_held(bo);
+
+	list_for_each_entry(vma, &bo->vmas, bo_link) {
+		if (vma->vm != vm)
+			continue;
+
+		prep_vma_destroy(vm, vma);
+		if (!first)
+			first = vma;
+		else
+			list_add_tail(&vma->unbind_link, &first->unbind_link);
+	}
+
+	return first;
+}
+
+static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm,
+					       struct xe_bo *bo,
+					       u64 bo_offset_or_userptr,
+					       u64 addr, u64 range, u32 op,
+					       u64 gt_mask, u32 region)
+{
+	struct ww_acquire_ctx ww;
+	struct xe_vma *vma, lookup;
+	int err;
+
+	lockdep_assert_held(&vm->lock);
+
+	lookup.start = addr;
+	lookup.end = addr + range - 1;
+
+	switch (VM_BIND_OP(op)) {
+	case XE_VM_BIND_OP_MAP:
+		XE_BUG_ON(!bo);
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return ERR_PTR(err);
+		vma = xe_vma_create(vm, bo, bo_offset_or_userptr, addr,
+				    addr + range - 1,
+				    op & XE_VM_BIND_FLAG_READONLY,
+				    gt_mask);
+		xe_bo_unlock(bo, &ww);
+		if (!vma)
+			return ERR_PTR(-ENOMEM);
+
+		xe_vm_insert_vma(vm, vma);
+		if (!bo->vm) {
+			vm_insert_extobj(vm, vma);
+			err = add_preempt_fences(vm, bo);
+			if (err) {
+				prep_vma_destroy(vm, vma);
+				xe_vma_destroy_unlocked(vma);
+
+				return ERR_PTR(err);
+			}
+		}
+		break;
+	case XE_VM_BIND_OP_UNMAP:
+		vma = vm_unbind_lookup_vmas(vm, &lookup);
+		break;
+	case XE_VM_BIND_OP_PREFETCH:
+		vma = vm_prefetch_lookup_vmas(vm, &lookup, region);
+		break;
+	case XE_VM_BIND_OP_UNMAP_ALL:
+		XE_BUG_ON(!bo);
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return ERR_PTR(err);
+		vma = vm_unbind_all_lookup_vmas(vm, bo);
+		if (!vma)
+			vma = ERR_PTR(-EINVAL);
+		xe_bo_unlock(bo, &ww);
+		break;
+	case XE_VM_BIND_OP_MAP_USERPTR:
+		XE_BUG_ON(bo);
+
+		vma = xe_vma_create(vm, NULL, bo_offset_or_userptr, addr,
+				    addr + range - 1,
+				    op & XE_VM_BIND_FLAG_READONLY,
+				    gt_mask);
+		if (!vma)
+			return ERR_PTR(-ENOMEM);
+
+		err = xe_vma_userptr_pin_pages(vma);
+		if (err) {
+			xe_vma_destroy(vma, NULL);
+
+			return ERR_PTR(err);
+		} else {
+			xe_vm_insert_vma(vm, vma);
+		}
+		break;
+	default:
+		XE_BUG_ON("NOT POSSIBLE");
+		vma = ERR_PTR(-EINVAL);
+	}
+
+	return vma;
+}
+
+#ifdef TEST_VM_ASYNC_OPS_ERROR
+#define SUPPORTED_FLAGS	\
+	(FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \
+	 XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | 0xffff)
+#else
+#define SUPPORTED_FLAGS	\
+	(XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \
+	 XE_VM_BIND_FLAG_IMMEDIATE | 0xffff)
+#endif
+#define XE_64K_PAGE_MASK 0xffffull
+
+#define MAX_BINDS	512	/* FIXME: Picking random upper limit */
+
+static int vm_bind_ioctl_check_args(struct xe_device *xe,
+				    struct drm_xe_vm_bind *args,
+				    struct drm_xe_vm_bind_op **bind_ops,
+				    bool *async)
+{
+	int err;
+	int i;
+
+	if (XE_IOCTL_ERR(xe, args->extensions) ||
+	    XE_IOCTL_ERR(xe, !args->num_binds) ||
+	    XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS))
+		return -EINVAL;
+
+	if (args->num_binds > 1) {
+		u64 __user *bind_user =
+			u64_to_user_ptr(args->vector_of_binds);
+
+		*bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) *
+				    args->num_binds, GFP_KERNEL);
+		if (!*bind_ops)
+			return -ENOMEM;
+
+		err = __copy_from_user(*bind_ops, bind_user,
+				       sizeof(struct drm_xe_vm_bind_op) *
+				       args->num_binds);
+		if (XE_IOCTL_ERR(xe, err)) {
+			err = -EFAULT;
+			goto free_bind_ops;
+		}
+	} else {
+		*bind_ops = &args->bind;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = (*bind_ops)[i].range;
+		u64 addr = (*bind_ops)[i].addr;
+		u32 op = (*bind_ops)[i].op;
+		u32 obj = (*bind_ops)[i].obj;
+		u64 obj_offset = (*bind_ops)[i].obj_offset;
+		u32 region = (*bind_ops)[i].region;
+
+		if (i == 0) {
+			*async = !!(op & XE_VM_BIND_FLAG_ASYNC);
+		} else if (XE_IOCTL_ERR(xe, !*async) ||
+			   XE_IOCTL_ERR(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) ||
+			   XE_IOCTL_ERR(xe, VM_BIND_OP(op) ==
+					XE_VM_BIND_OP_RESTART)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_ERR(xe, !*async &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_ERR(xe, !*async &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_ERR(xe, VM_BIND_OP(op) >
+				 XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_ERR(xe, op & ~SUPPORTED_FLAGS) ||
+		    XE_IOCTL_ERR(xe, !obj &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_MAP) ||
+		    XE_IOCTL_ERR(xe, !obj &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_ERR(xe, addr &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_ERR(xe, range &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_ERR(xe, obj &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR) ||
+		    XE_IOCTL_ERR(xe, obj &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_ERR(xe, region &&
+				 VM_BIND_OP(op) != XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_ERR(xe, !(BIT(region) &
+				       xe->info.mem_region_mask)) ||
+		    XE_IOCTL_ERR(xe, obj &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_ERR(xe, obj_offset & ~PAGE_MASK) ||
+		    XE_IOCTL_ERR(xe, addr & ~PAGE_MASK) ||
+		    XE_IOCTL_ERR(xe, range & ~PAGE_MASK) ||
+		    XE_IOCTL_ERR(xe, !range && VM_BIND_OP(op) !=
+				 XE_VM_BIND_OP_RESTART &&
+				 VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+	}
+
+	return 0;
+
+free_bind_ops:
+	if (args->num_binds > 1)
+		kfree(*bind_ops);
+	return err;
+}
+
+int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_bind *args = data;
+	struct drm_xe_sync __user *syncs_user;
+	struct xe_bo **bos = NULL;
+	struct xe_vma **vmas = NULL;
+	struct xe_vm *vm;
+	struct xe_engine *e = NULL;
+	u32 num_syncs;
+	struct xe_sync_entry *syncs = NULL;
+	struct drm_xe_vm_bind_op *bind_ops;
+	bool async;
+	int err;
+	int i, j = 0;
+
+	err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async);
+	if (err)
+		return err;
+
+	vm = xe_vm_lookup(xef, args->vm_id);
+	if (XE_IOCTL_ERR(xe, !vm)) {
+		err = -EINVAL;
+		goto free_objs;
+	}
+
+	if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) {
+		DRM_ERROR("VM closed while we began looking up?\n");
+		err = -ENOENT;
+		goto put_vm;
+	}
+
+	if (args->engine_id) {
+		e = xe_engine_lookup(xef, args->engine_id);
+		if (XE_IOCTL_ERR(xe, !e)) {
+			err = -ENOENT;
+			goto put_vm;
+		}
+		if (XE_IOCTL_ERR(xe, !(e->flags & ENGINE_FLAG_VM))) {
+			err = -EINVAL;
+			goto put_engine;
+		}
+	}
+
+	if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) {
+		if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
+			err = -ENOTSUPP;
+		if (XE_IOCTL_ERR(xe, !err && args->num_syncs))
+			err = EINVAL;
+		if (XE_IOCTL_ERR(xe, !err && !vm->async_ops.error))
+			err = -EPROTO;
+
+		if (!err) {
+			down_write(&vm->lock);
+			trace_xe_vm_restart(vm);
+			vm_set_async_error(vm, 0);
+			up_write(&vm->lock);
+
+			queue_work(system_unbound_wq, &vm->async_ops.work);
+
+			/* Rebinds may have been blocked, give worker a kick */
+			if (xe_vm_in_compute_mode(vm))
+				queue_work(vm->xe->ordered_wq,
+					   &vm->preempt.rebind_work);
+		}
+
+		goto put_engine;
+	}
+
+	if (XE_IOCTL_ERR(xe, !vm->async_ops.error &&
+			 async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) {
+		err = -ENOTSUPP;
+		goto put_engine;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+
+		if (XE_IOCTL_ERR(xe, range > vm->size) ||
+		    XE_IOCTL_ERR(xe, addr > vm->size - range)) {
+			err = -EINVAL;
+			goto put_engine;
+		}
+
+		if (bind_ops[i].gt_mask) {
+			u64 valid_gts = BIT(xe->info.tile_count) - 1;
+
+			if (XE_IOCTL_ERR(xe, bind_ops[i].gt_mask &
+					 ~valid_gts)) {
+				err = -EINVAL;
+				goto put_engine;
+			}
+		}
+	}
+
+	bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL);
+	if (!bos) {
+		err = -ENOMEM;
+		goto put_engine;
+	}
+
+	vmas = kzalloc(sizeof(*vmas) * args->num_binds, GFP_KERNEL);
+	if (!vmas) {
+		err = -ENOMEM;
+		goto put_engine;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		struct drm_gem_object *gem_obj;
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+		u32 obj = bind_ops[i].obj;
+		u64 obj_offset = bind_ops[i].obj_offset;
+
+		if (!obj)
+			continue;
+
+		gem_obj = drm_gem_object_lookup(file, obj);
+		if (XE_IOCTL_ERR(xe, !gem_obj)) {
+			err = -ENOENT;
+			goto put_obj;
+		}
+		bos[i] = gem_to_xe_bo(gem_obj);
+
+		if (XE_IOCTL_ERR(xe, range > bos[i]->size) ||
+		    XE_IOCTL_ERR(xe, obj_offset >
+				 bos[i]->size - range)) {
+			err = -EINVAL;
+			goto put_obj;
+		}
+
+		if (bos[i]->flags & XE_BO_INTERNAL_64K) {
+			if (XE_IOCTL_ERR(xe, obj_offset &
+					 XE_64K_PAGE_MASK) ||
+			    XE_IOCTL_ERR(xe, addr & XE_64K_PAGE_MASK) ||
+			    XE_IOCTL_ERR(xe, range & XE_64K_PAGE_MASK)) {
+				err = -EINVAL;
+				goto put_obj;
+			}
+		}
+	}
+
+	if (args->num_syncs) {
+		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
+		if (!syncs) {
+			err = -ENOMEM;
+			goto put_obj;
+		}
+	}
+
+	syncs_user = u64_to_user_ptr(args->syncs);
+	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
+		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
+					  &syncs_user[num_syncs], false,
+					  xe_vm_no_dma_fences(vm));
+		if (err)
+			goto free_syncs;
+	}
+
+	err = down_write_killable(&vm->lock);
+	if (err)
+		goto free_syncs;
+
+	/* Do some error checking first to make the unwind easier */
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+		u32 op = bind_ops[i].op;
+
+		err = __vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op);
+		if (err)
+			goto release_vm_lock;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+		u32 op = bind_ops[i].op;
+		u64 obj_offset = bind_ops[i].obj_offset;
+		u64 gt_mask = bind_ops[i].gt_mask;
+		u32 region = bind_ops[i].region;
+
+		vmas[i] = vm_bind_ioctl_lookup_vma(vm, bos[i], obj_offset,
+						   addr, range, op, gt_mask,
+						   region);
+		if (IS_ERR(vmas[i])) {
+			err = PTR_ERR(vmas[i]);
+			vmas[i] = NULL;
+			goto destroy_vmas;
+		}
+	}
+
+	for (j = 0; j < args->num_binds; ++j) {
+		struct xe_sync_entry *__syncs;
+		u32 __num_syncs = 0;
+		bool first_or_last = j == 0 || j == args->num_binds - 1;
+
+		if (args->num_binds == 1) {
+			__num_syncs = num_syncs;
+			__syncs = syncs;
+		} else if (first_or_last && num_syncs) {
+			bool first = j == 0;
+
+			__syncs = kmalloc(sizeof(*__syncs) * num_syncs,
+					  GFP_KERNEL);
+			if (!__syncs) {
+				err = ENOMEM;
+				break;
+			}
+
+			/* in-syncs on first bind, out-syncs on last bind */
+			for (i = 0; i < num_syncs; ++i) {
+				bool signal = syncs[i].flags &
+					DRM_XE_SYNC_SIGNAL;
+
+				if ((first && !signal) || (!first && signal))
+					__syncs[__num_syncs++] = syncs[i];
+			}
+		} else {
+			__num_syncs = 0;
+			__syncs = NULL;
+		}
+
+		if (async) {
+			bool last = j == args->num_binds - 1;
+
+			/*
+			 * Each pass of async worker drops the ref, take a ref
+			 * here, 1 set of refs taken above
+			 */
+			if (!last) {
+				if (e)
+					xe_engine_get(e);
+				xe_vm_get(vm);
+			}
+
+			err = vm_bind_ioctl_async(vm, vmas[j], e, bos[j],
+						  bind_ops + j, __syncs,
+						  __num_syncs);
+			if (err && !last) {
+				if (e)
+					xe_engine_put(e);
+				xe_vm_put(vm);
+			}
+			if (err)
+				break;
+		} else {
+			XE_BUG_ON(j != 0);	/* Not supported */
+			err = vm_bind_ioctl(vm, vmas[j], e, bos[j],
+					    bind_ops + j, __syncs,
+					    __num_syncs, NULL);
+			break;	/* Needed so cleanup loops work */
+		}
+	}
+
+	/* Most of cleanup owned by the async bind worker */
+	if (async && !err) {
+		up_write(&vm->lock);
+		if (args->num_binds > 1)
+			kfree(syncs);
+		goto free_objs;
+	}
+
+destroy_vmas:
+	for (i = j; err && i < args->num_binds; ++i) {
+		u32 op = bind_ops[i].op;
+		struct xe_vma *vma, *next;
+
+		if (!vmas[i])
+			break;
+
+		list_for_each_entry_safe(vma, next, &vma->unbind_link,
+					 unbind_link) {
+			list_del_init(&vma->unbind_link);
+			if (!vma->destroyed) {
+				prep_vma_destroy(vm, vma);
+				xe_vma_destroy_unlocked(vma);
+			}
+		}
+
+		switch (VM_BIND_OP(op)) {
+		case XE_VM_BIND_OP_MAP:
+			prep_vma_destroy(vm, vmas[i]);
+			xe_vma_destroy_unlocked(vmas[i]);
+			break;
+		case XE_VM_BIND_OP_MAP_USERPTR:
+			prep_vma_destroy(vm, vmas[i]);
+			xe_vma_destroy_unlocked(vmas[i]);
+			break;
+		}
+	}
+release_vm_lock:
+	up_write(&vm->lock);
+free_syncs:
+	while (num_syncs--) {
+		if (async && j &&
+		    !(syncs[num_syncs].flags & DRM_XE_SYNC_SIGNAL))
+			continue;	/* Still in async worker */
+		xe_sync_entry_cleanup(&syncs[num_syncs]);
+	}
+
+	kfree(syncs);
+put_obj:
+	for (i = j; i < args->num_binds; ++i)
+		xe_bo_put(bos[i]);
+put_engine:
+	if (e)
+		xe_engine_put(e);
+put_vm:
+	xe_vm_put(vm);
+free_objs:
+	kfree(bos);
+	kfree(vmas);
+	if (args->num_binds > 1)
+		kfree(bind_ops);
+	return err;
+}
+
+/*
+ * XXX: Using the TTM wrappers for now, likely can call into dma-resv code
+ * directly to optimize. Also this likely should be an inline function.
+ */
+int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
+	       int num_resv, bool intr)
+{
+	struct ttm_validate_buffer tv_vm;
+	LIST_HEAD(objs);
+	LIST_HEAD(dups);
+
+	XE_BUG_ON(!ww);
+
+	tv_vm.num_shared = num_resv;
+	tv_vm.bo = xe_vm_ttm_bo(vm);;
+	list_add_tail(&tv_vm.head, &objs);
+
+	return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
+}
+
+void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww)
+{
+	dma_resv_unlock(&vm->resv);
+	ww_acquire_fini(ww);
+}
+
+/**
+ * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
+ * @vma: VMA to invalidate
+ *
+ * Walks a list of page tables leaves which it memset the entries owned by this
+ * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
+ * complete.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_vm_invalidate_vma(struct xe_vma *vma)
+{
+	struct xe_device *xe = vma->vm->xe;
+	struct xe_gt *gt;
+	u32 gt_needs_invalidate = 0;
+	int seqno[XE_MAX_GT];
+	u8 id;
+	int ret;
+
+	XE_BUG_ON(!xe_vm_in_fault_mode(vma->vm));
+	trace_xe_vma_usm_invalidate(vma);
+
+	/* Check that we don't race with page-table updates */
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+		if (xe_vma_is_userptr(vma)) {
+			WARN_ON_ONCE(!mmu_interval_check_retry
+				     (&vma->userptr.notifier,
+				      vma->userptr.notifier_seq));
+			WARN_ON_ONCE(!dma_resv_test_signaled(&vma->vm->resv,
+							     DMA_RESV_USAGE_BOOKKEEP));
+
+		} else {
+			xe_bo_assert_held(vma->bo);
+		}
+	}
+
+	for_each_gt(gt, xe, id) {
+		if (xe_pt_zap_ptes(gt, vma)) {
+			gt_needs_invalidate |= BIT(id);
+			xe_device_wmb(xe);
+			seqno[id] = xe_gt_tlb_invalidation(gt);
+			if (seqno[id] < 0)
+				return seqno[id];
+		}
+	}
+
+	for_each_gt(gt, xe, id) {
+		if (gt_needs_invalidate & BIT(id)) {
+			ret = xe_gt_tlb_invalidation_wait(gt, seqno[id]);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	vma->usm.gt_invalidated = vma->gt_mask;
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
+int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
+{
+	struct rb_node *node;
+	bool is_lmem;
+	uint64_t addr;
+
+	if (!down_read_trylock(&vm->lock)) {
+		drm_printf(p, " Failed to acquire VM lock to dump capture");
+		return 0;
+	}
+	if (vm->pt_root[gt_id]) {
+		addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_lmem);
+		drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_lmem ? "LMEM" : "SYS");
+	}
+
+	for (node = rb_first(&vm->vmas); node; node = rb_next(node)) {
+		struct xe_vma *vma = to_xe_vma(node);
+		bool is_userptr = xe_vma_is_userptr(vma);
+
+		if (is_userptr) {
+			struct xe_res_cursor cur;
+
+			xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, &cur);
+			addr = xe_res_dma(&cur);
+		} else {
+			addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_lmem);
+		}
+		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
+			   vma->start, vma->end, vma->end - vma->start + 1ull,
+			   addr, is_userptr ? "USR" : is_lmem ? "VRAM" : "SYS");
+	}
+	up_read(&vm->lock);
+
+	return 0;
+}
+#else
+int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
+{
+	return 0;
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
new file mode 100644
index 000000000000..3468ed9d0528
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_VM_H_
+#define _XE_VM_H_
+
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_vm_types.h"
+
+struct drm_device;
+struct drm_printer;
+struct drm_file;
+
+struct ttm_buffer_object;
+struct ttm_validate_buffer;
+
+struct xe_engine;
+struct xe_file;
+struct xe_sync_entry;
+
+struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags);
+void xe_vm_free(struct kref *ref);
+
+struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id);
+int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node);
+
+static inline struct xe_vm *xe_vm_get(struct xe_vm *vm)
+{
+	kref_get(&vm->refcount);
+	return vm;
+}
+
+static inline void xe_vm_put(struct xe_vm *vm)
+{
+	kref_put(&vm->refcount, xe_vm_free);
+}
+
+int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
+	       int num_resv, bool intr);
+
+void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww);
+
+static inline bool xe_vm_is_closed(struct xe_vm *vm)
+{
+	/* Only guaranteed not to change when vm->resv is held */
+	return !vm->size;
+}
+
+struct xe_vma *
+xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma);
+
+#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
+
+u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt);
+
+int xe_vm_create_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file);
+int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int xe_vm_bind_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *file);
+
+void xe_vm_close_and_put(struct xe_vm *vm);
+
+static inline bool xe_vm_in_compute_mode(struct xe_vm *vm)
+{
+	return vm->flags & XE_VM_FLAG_COMPUTE_MODE;
+}
+
+static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
+{
+	return vm->flags & XE_VM_FLAG_FAULT_MODE;
+}
+
+static inline bool xe_vm_no_dma_fences(struct xe_vm *vm)
+{
+	return xe_vm_in_compute_mode(vm) || xe_vm_in_fault_mode(vm);
+}
+
+int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e);
+
+int xe_vm_userptr_pin(struct xe_vm *vm);
+
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
+
+int xe_vm_userptr_check_repin(struct xe_vm *vm);
+
+struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
+
+int xe_vm_invalidate_vma(struct xe_vma *vma);
+
+int xe_vm_async_fence_wait_start(struct dma_fence *fence);
+
+extern struct ttm_device_funcs xe_ttm_funcs;
+
+struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm);
+
+static inline bool xe_vma_is_userptr(struct xe_vma *vma)
+{
+	return !vma->bo;
+}
+
+int xe_vma_userptr_pin_pages(struct xe_vma *vma);
+
+int xe_vma_userptr_check_repin(struct xe_vma *vma);
+
+/*
+ * XE_ONSTACK_TV is used to size the tv_onstack array that is input
+ * to xe_vm_lock_dma_resv() and xe_vm_unlock_dma_resv().
+ */
+#define XE_ONSTACK_TV 20
+int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
+			struct ttm_validate_buffer *tv_onstack,
+			struct ttm_validate_buffer **tv,
+			struct list_head *objs,
+			bool intr,
+			unsigned int num_shared);
+
+void xe_vm_unlock_dma_resv(struct xe_vm *vm,
+			   struct ttm_validate_buffer *tv_onstack,
+			   struct ttm_validate_buffer *tv,
+			   struct ww_acquire_ctx *ww,
+			   struct list_head *objs);
+
+void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
+			     enum dma_resv_usage usage);
+
+int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+#define vm_dbg drm_dbg
+#else
+__printf(2, 3)
+static inline void vm_dbg(const struct drm_device *dev,
+			  const char *format, ...)
+{ /* noop */ }
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h
new file mode 100644
index 000000000000..5b6216964c45
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_doc.h
@@ -0,0 +1,555 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_VM_DOC_H_
+#define _XE_VM_DOC_H_
+
+/**
+ * DOC: XE VM (user address space)
+ *
+ * VM creation
+ * ===========
+ *
+ * Allocate a physical page for root of the page table structure, create default
+ * bind engine, and return a handle to the user.
+ *
+ * Scratch page
+ * ------------
+ *
+ * If the VM is created with the flag, DRM_XE_VM_CREATE_SCRATCH_PAGE, set the
+ * entire page table structure defaults pointing to blank page allocated by the
+ * VM. Invalid memory access rather than fault just read / write to this page.
+ *
+ * VM bind (create GPU mapping for a BO or userptr)
+ * ================================================
+ *
+ * Creates GPU mapings for a BO or userptr within a VM. VM binds uses the same
+ * in / out fence interface (struct drm_xe_sync) as execs which allows users to
+ * think of binds and execs as more or less the same operation.
+ *
+ * Operations
+ * ----------
+ *
+ * XE_VM_BIND_OP_MAP		- Create mapping for a BO
+ * XE_VM_BIND_OP_UNMAP		- Destroy mapping for a BO / userptr
+ * XE_VM_BIND_OP_MAP_USERPTR	- Create mapping for userptr
+ *
+ * Implementation details
+ * ~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * All bind operations are implemented via a hybrid approach of using the CPU
+ * and GPU to modify page tables. If a new physical page is allocated in the
+ * page table structure we populate that page via the CPU and insert that new
+ * page into the existing page table structure via a GPU job. Also any existing
+ * pages in the page table structure that need to be modified also are updated
+ * via the GPU job. As the root physical page is prealloced on VM creation our
+ * GPU job will always have at least 1 update. The in / out fences are passed to
+ * this job so again this is conceptually the same as an exec.
+ *
+ * Very simple example of few binds on an empty VM with 48 bits of address space
+ * and the resulting operations:
+ *
+ * .. code-block::
+ *
+ *	bind BO0 0x0-0x1000
+ *	alloc page level 3a, program PTE[0] to BO0 phys address (CPU)
+ *	alloc page level 2, program PDE[0] page level 3a phys address (CPU)
+ *	alloc page level 1, program PDE[0] page level 2 phys address (CPU)
+ *	update root PDE[0] to page level 1 phys address (GPU)
+ *
+ *	bind BO1 0x201000-0x202000
+ *	alloc page level 3b, program PTE[1] to BO1 phys address (CPU)
+ *	update page level 2 PDE[1] to page level 3b phys address (GPU)
+ *
+ *	bind BO2 0x1ff000-0x201000
+ *	update page level 3a PTE[511] to BO2 phys addres (GPU)
+ *	update page level 3b PTE[0] to BO2 phys addres + 0x1000 (GPU)
+ *
+ * GPU bypass
+ * ~~~~~~~~~~
+ *
+ * In the above example the steps using the GPU can be converted to CPU if the
+ * bind can be done immediately (all in-fences satisfied, VM dma-resv kernel
+ * slot is idle).
+ *
+ * Address space
+ * -------------
+ *
+ * Depending on platform either 48 or 57 bits of address space is supported.
+ *
+ * Page sizes
+ * ----------
+ *
+ * The minimum page size is either 4k or 64k depending on platform and memory
+ * placement (sysmem vs. VRAM). We enforce that binds must be aligned to the
+ * minimum page size.
+ *
+ * Larger pages (2M or 1GB) can be used for BOs in VRAM, the BO physical address
+ * is aligned to the larger pages size, and VA is aligned to the larger page
+ * size. Larger pages for userptrs / BOs in sysmem should be possible but is not
+ * yet implemented.
+ *
+ * Sync error handling mode
+ * ------------------------
+ *
+ * In both modes during the bind IOCTL the user input is validated. In sync
+ * error handling mode the newly bound BO is validated (potentially moved back
+ * to a region of memory where is can be used), page tables are updated by the
+ * CPU and the job to do the GPU binds is created in the IOCTL itself. This step
+ * can fail due to memory pressure. The user can recover by freeing memory and
+ * trying this operation again.
+ *
+ * Async error handling mode
+ * -------------------------
+ *
+ * In async error handling the step of validating the BO, updating page tables,
+ * and generating a job are deferred to an async worker. As this step can now
+ * fail after the IOCTL has reported success we need an error handling flow for
+ * which the user can recover from.
+ *
+ * The solution is for a user to register a user address with the VM which the
+ * VM uses to report errors to. The ufence wait interface can be used to wait on
+ * a VM going into an error state. Once an error is reported the VM's async
+ * worker is paused. While the VM's async worker is paused sync,
+ * XE_VM_BIND_OP_UNMAP operations are allowed (this can free memory). Once the
+ * uses believe the error state is fixed, the async worker can be resumed via
+ * XE_VM_BIND_OP_RESTART operation. When VM async bind work is restarted, the
+ * first operation processed is the operation that caused the original error.
+ *
+ * Bind queues / engines
+ * ---------------------
+ *
+ * Think of the case where we have two bind operations A + B and are submitted
+ * in that order. A has in fences while B has none. If using a single bind
+ * queue, B is now blocked on A's in fences even though it is ready to run. This
+ * example is a real use case for VK sparse binding. We work around this
+ * limitation by implementing bind engines.
+ *
+ * In the bind IOCTL the user can optionally pass in an engine ID which must map
+ * to an engine which is of the special class DRM_XE_ENGINE_CLASS_VM_BIND.
+ * Underneath this is a really virtual engine that can run on any of the copy
+ * hardware engines. The job(s) created each IOCTL are inserted into this
+ * engine's ring. In the example above if A and B have different bind engines B
+ * is free to pass A. If the engine ID field is omitted, the default bind queue
+ * for the VM is used.
+ *
+ * TODO: Explain race in issue 41 and how we solve it
+ *
+ * Array of bind operations
+ * ------------------------
+ *
+ * The uAPI allows multiple binds operations to be passed in via a user array,
+ * of struct drm_xe_vm_bind_op, in a single VM bind IOCTL. This interface
+ * matches the VK sparse binding API. The implementation is rather simple, parse
+ * the array into a list of operations, pass the in fences to the first operation,
+ * and pass the out fences to the last operation. The ordered nature of a bind
+ * engine makes this possible.
+ *
+ * Munmap semantics for unbinds
+ * ----------------------------
+ *
+ * Munmap allows things like:
+ *
+ * .. code-block::
+ *
+ *	0x0000-0x2000 and 0x3000-0x5000 have mappings
+ *	Munmap 0x1000-0x4000, results in mappings 0x0000-0x1000 and 0x4000-0x5000
+ *
+ * To support this semantic in the above example we decompose the above example
+ * into 4 operations:
+ *
+ * .. code-block::
+ *
+ *	unbind 0x0000-0x2000
+ *	unbind 0x3000-0x5000
+ *	rebind 0x0000-0x1000
+ *	rebind 0x4000-0x5000
+ *
+ * Why not just do a partial unbind of 0x1000-0x2000 and 0x3000-0x4000? This
+ * falls apart when using large pages at the edges and the unbind forces us to
+ * use a smaller page size. For simplity we always issue a set of unbinds
+ * unmapping anything in the range and at most 2 rebinds on the edges.
+ *
+ * Similar to an array of binds, in fences are passed to the first operation and
+ * out fences are signaled on the last operation.
+ *
+ * In this example there is a window of time where 0x0000-0x1000 and
+ * 0x4000-0x5000 are invalid but the user didn't ask for these addresses to be
+ * removed from the mapping. To work around this we treat any munmap style
+ * unbinds which require a rebind as a kernel operations (BO eviction or userptr
+ * invalidation). The first operation waits on the VM's
+ * DMA_RESV_USAGE_PREEMPT_FENCE slots (waits for all pending jobs on VM to
+ * complete / triggers preempt fences) and the last operation is installed in
+ * the VM's DMA_RESV_USAGE_KERNEL slot (blocks future jobs / resume compute mode
+ * VM). The caveat is all dma-resv slots must be updated atomically with respect
+ * to execs and compute mode rebind worker. To accomplish this, hold the
+ * vm->lock in write mode from the first operation until the last.
+ *
+ * Deferred binds in fault mode
+ * ----------------------------
+ *
+ * In a VM is in fault mode (TODO: link to fault mode), new bind operations that
+ * create mappings are by default are deferred to the page fault handler (first
+ * use). This behavior can be overriden by setting the flag
+ * XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping
+ * immediately.
+ *
+ * User pointer
+ * ============
+ *
+ * User pointers are user allocated memory (malloc'd, mmap'd, etc..) for which the
+ * user wants to create a GPU mapping. Typically in other DRM drivers a dummy BO
+ * was created and then a binding was created. We bypass creating a dummy BO in
+ * XE and simply create a binding directly from the userptr.
+ *
+ * Invalidation
+ * ------------
+ *
+ * Since this a core kernel managed memory the kernel can move this memory
+ * whenever it wants. We register an invalidation MMU notifier to alert XE when
+ * a user poiter is about to move. The invalidation notifier needs to block
+ * until all pending users (jobs or compute mode engines) of the userptr are
+ * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots.
+ *
+ * Rebinds
+ * -------
+ *
+ * Either the next exec (non-compute) or rebind worker (compute mode) will
+ * rebind the userptr. The invalidation MMU notifier kicks the rebind worker
+ * after the VM dma-resv wait if the VM is in compute mode.
+ *
+ * Compute mode
+ * ============
+ *
+ * A VM in compute mode enables long running workloads and ultra low latency
+ * submission (ULLS). ULLS is implemented via a continuously running batch +
+ * semaphores. This enables to the user to insert jump to new batch commands
+ * into the continuously running batch. In both cases these batches exceed the
+ * time a dma fence is allowed to exist for before signaling, as such dma fences
+ * are not used when a VM is in compute mode. User fences (TODO: link user fence
+ * doc) are used instead to signal operation's completion.
+ *
+ * Preempt fences
+ * --------------
+ *
+ * If the kernel decides to move memory around (either userptr invalidate, BO
+ * eviction, or mumap style unbind which results in a rebind) and a batch is
+ * running on an engine, that batch can fault or cause a memory corruption as
+ * page tables for the moved memory are no longer valid. To work around this we
+ * introduce the concept of preempt fences. When sw signaling is enabled on a
+ * preempt fence it tells the submission backend to kick that engine off the
+ * hardware and the preempt fence signals when the engine is off the hardware.
+ * Once all preempt fences are signaled for a VM the kernel can safely move the
+ * memory and kick the rebind worker which resumes all the engines execution.
+ *
+ * A preempt fence, for every engine using the VM, is installed the VM's
+ * dma-resv DMA_RESV_USAGE_PREEMPT_FENCE slot. The same preempt fence, for every
+ * engine using the VM, is also installed into the same dma-resv slot of every
+ * external BO mapped in the VM.
+ *
+ * Rebind worker
+ * -------------
+ *
+ * The rebind worker is very similar to an exec. It is resposible for rebinding
+ * evicted BOs or userptrs, waiting on those operations, installing new preempt
+ * fences, and finally resuming executing of engines in the VM.
+ *
+ * Flow
+ * ~~~~
+ *
+ * .. code-block::
+ *
+ *	<----------------------------------------------------------------------|
+ *	Check if VM is closed, if so bail out                                  |
+ *	Lock VM global lock in read mode                                       |
+ *	Pin userptrs (also finds userptr invalidated since last rebind worker) |
+ *	Lock VM dma-resv and external BOs dma-resv                             |
+ *	Validate BOs that have been evicted                                    |
+ *	Wait on and allocate new preempt fences for every engine using the VM  |
+ *	Rebind invalidated userptrs + evicted BOs                              |
+ *	Wait on last rebind fence                                              |
+ *	Wait VM's DMA_RESV_USAGE_KERNEL dma-resv slot                          |
+ *	Install preeempt fences and issue resume for every engine using the VM |
+ *	Check if any userptrs invalidated since pin                            |
+ *		Squash resume for all engines                                  |
+ *		Unlock all                                                     |
+ *		Wait all VM's dma-resv slots                                   |
+ *		Retry ----------------------------------------------------------
+ *	Release all engines waiting to resume
+ *	Unlock all
+ *
+ * Timeslicing
+ * -----------
+ *
+ * In order to prevent an engine from continuously being kicked off the hardware
+ * and making no forward progress an engine has a period of time it allowed to
+ * run after resume before it can be kicked off again. This effectively gives
+ * each engine a timeslice.
+ *
+ * Handling multiple GTs
+ * =====================
+ *
+ * If a GT has slower access to some regions and the page table structure are in
+ * the slow region, the performance on that GT could adversely be affected. To
+ * work around this we allow a VM page tables to be shadowed in multiple GTs.
+ * When VM is created, a default bind engine and PT table structure are created
+ * on each GT.
+ *
+ * Binds can optionally pass in a mask of GTs where a mapping should be created,
+ * if this mask is zero then default to all the GTs where the VM has page
+ * tables.
+ *
+ * The implementation for this breaks down into a bunch for_each_gt loops in
+ * various places plus exporting a composite fence for multi-GT binds to the
+ * user.
+ *
+ * Fault mode (unified shared memory)
+ * ==================================
+ *
+ * A VM in fault mode can be enabled on devices that support page faults. If
+ * page faults are enabled, using dma fences can potentially induce a deadlock:
+ * A pending page fault can hold up the GPU work which holds up the dma fence
+ * signaling, and memory allocation is usually required to resolve a page
+ * fault, but memory allocation is not allowed to gate dma fence signaling. As
+ * such, dma fences are not allowed when VM is in fault mode. Because dma-fences
+ * are not allowed, long running workloads and ULLS are enabled on a faulting
+ * VM.
+ *
+ * Defered VM binds
+ * ----------------
+ *
+ * By default, on a faulting VM binds just allocate the VMA and the actual
+ * updating of the page tables is defered to the page fault handler. This
+ * behavior can be overridden by setting the flag XE_VM_BIND_FLAG_IMMEDIATE in
+ * the VM bind which will then do the bind immediately.
+ *
+ * Page fault handler
+ * ------------------
+ *
+ * Page faults are received in the G2H worker under the CT lock which is in the
+ * path of dma fences (no memory allocations are allowed, faults require memory
+ * allocations) thus we cannot process faults under the CT lock. Another issue
+ * is faults issue TLB invalidations which require G2H credits and we cannot
+ * allocate G2H credits in the G2H handlers without deadlocking. Lastly, we do
+ * not want the CT lock to be an outer lock of the VM global lock (VM global
+ * lock required to fault processing).
+ *
+ * To work around the above issue with processing faults in the G2H worker, we
+ * sink faults to a buffer which is large enough to sink all possible faults on
+ * the GT (1 per hardware engine) and kick a worker to process the faults. Since
+ * the page faults G2H are already received in a worker, kicking another worker
+ * adds more latency to a critical performance path. We add a fast path in the
+ * G2H irq handler which looks at first G2H and if it is a page fault we sink
+ * the fault to the buffer and kick the worker to process the fault. TLB
+ * invalidation responses are also in the critical path so these can also be
+ * processed in this fast path.
+ *
+ * Multiple buffers and workers are used and hashed over based on the ASID so
+ * faults from different VMs can be processed in parallel.
+ *
+ * The page fault handler itself is rather simple, flow is below.
+ *
+ * .. code-block::
+ *
+ *	Lookup VM from ASID in page fault G2H
+ *	Lock VM global lock in read mode
+ *	Lookup VMA from address in page fault G2H
+ *	Check if VMA is valid, if not bail
+ *	Check if VMA's BO has backing store, if not allocate
+ *	<----------------------------------------------------------------------|
+ *	If userptr, pin pages                                                  |
+ *	Lock VM & BO dma-resv locks                                            |
+ *	If atomic fault, migrate to VRAM, else validate BO location            |
+ *	Issue rebind                                                           |
+ *	Wait on rebind to complete                                             |
+ *	Check if userptr invalidated since pin                                 |
+ *		Drop VM & BO dma-resv locks                                    |
+ *		Retry ----------------------------------------------------------
+ *	Unlock all
+ *	Issue blocking TLB invalidation                                        |
+ *	Send page fault response to GuC
+ *
+ * Access counters
+ * ---------------
+ *
+ * Access counters can be configured to trigger a G2H indicating the device is
+ * accessing VMAs in system memory frequently as hint to migrate those VMAs to
+ * VRAM.
+ *
+ * Same as the page fault handler, access counters G2H cannot be processed the
+ * G2H worker under the CT lock. Again we use a buffer to sink access counter
+ * G2H. Unlike page faults there is no upper bound so if the buffer is full we
+ * simply drop the G2H. Access counters are a best case optimization and it is
+ * safe to drop these unlike page faults.
+ *
+ * The access counter handler itself is rather simple flow is below.
+ *
+ * .. code-block::
+ *
+ *	Lookup VM from ASID in access counter G2H
+ *	Lock VM global lock in read mode
+ *	Lookup VMA from address in access counter G2H
+ *	If userptr, bail nothing to do
+ *	Lock VM & BO dma-resv locks
+ *	Issue migration to VRAM
+ *	Unlock all
+ *
+ * Notice no rebind is issued in the access counter handler as the rebind will
+ * be issued on next page fault.
+ *
+ * Cavets with eviction / user pointer invalidation
+ * ------------------------------------------------
+ *
+ * In the case of eviction and user pointer invalidation on a faulting VM, there
+ * is no need to issue a rebind rather we just need to blow away the page tables
+ * for the VMAs and the page fault handler will rebind the VMAs when they fault.
+ * The cavet is to update / read the page table structure the VM global lock is
+ * neeeed. In both the case of eviction and user pointer invalidation locks are
+ * held which make acquiring the VM global lock impossible. To work around this
+ * every VMA maintains a list of leaf page table entries which should be written
+ * to zero to blow away the VMA's page tables. After writing zero to these
+ * entries a blocking TLB invalidate is issued. At this point it is safe for the
+ * kernel to move the VMA's memory around. This is a necessary lockless
+ * algorithm and is safe as leafs cannot be changed while either an eviction or
+ * userptr invalidation is occurring.
+ *
+ * Locking
+ * =======
+ *
+ * VM locking protects all of the core data paths (bind operations, execs,
+ * evictions, and compute mode rebind worker) in XE.
+ *
+ * Locks
+ * -----
+ *
+ * VM global lock (vm->lock) - rw semaphore lock. Outer most lock which protects
+ * the list of userptrs mapped in the VM, the list of engines using this VM, and
+ * the array of external BOs mapped in the VM. When adding or removing any of the
+ * aforemented state from the VM should acquire this lock in write mode. The VM
+ * bind path also acquires this lock in write while while the exec / compute
+ * mode rebind worker acquire this lock in read mode.
+ *
+ * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv
+ * slots which is shared with any private BO in the VM. Expected to be acquired
+ * during VM binds, execs, and compute mode rebind worker. This lock is also
+ * held when private BOs are being evicted.
+ *
+ * external BO dma-resv lock (bo->ttm.base.resv->lock) - WW lock. Protects
+ * external BO dma-resv slots. Expected to be acquired during VM binds (in
+ * addition to the VM dma-resv lock). All external BO dma-locks within a VM are
+ * expected to be acquired (in addition to the VM dma-resv lock) during execs
+ * and the compute mode rebind worker. This lock is also held when an external
+ * BO is being evicted.
+ *
+ * Putting it all together
+ * -----------------------
+ *
+ * 1. An exec and bind operation with the same VM can't be executing at the same
+ * time (vm->lock).
+ *
+ * 2. A compute mode rebind worker and bind operation with the same VM can't be
+ * executing at the same time (vm->lock).
+ *
+ * 3. We can't add / remove userptrs or external BOs to a VM while an exec with
+ * the same VM is executing (vm->lock).
+ *
+ * 4. We can't add / remove userptrs, external BOs, or engines to a VM while a
+ * compute mode rebind worker with the same VM is executing (vm->lock).
+ *
+ * 5. Evictions within a VM can't be happen while an exec with the same VM is
+ * executing (dma-resv locks).
+ *
+ * 6. Evictions within a VM can't be happen while a compute mode rebind worker
+ * with the same VM is executing (dma-resv locks).
+ *
+ * dma-resv usage
+ * ==============
+ *
+ * As previously stated to enforce the ordering of kernel ops (eviction, userptr
+ * invalidation, munmap style unbinds which result in a rebind), rebinds during
+ * execs, execs, and resumes in the rebind worker we use both the VMs and
+ * external BOs dma-resv slots. Let try to make this as clear as possible.
+ *
+ * Slot installation
+ * -----------------
+ *
+ * 1. Jobs from kernel ops install themselves into the DMA_RESV_USAGE_KERNEL
+ * slot of either an external BO or VM (depends on if kernel op is operating on
+ * an external or private BO)
+ *
+ * 2. In non-compute mode, jobs from execs install themselves into the
+ * DMA_RESV_USAGE_BOOKKEEP slot of the VM
+ *
+ * 3. In non-compute mode, jobs from execs install themselves into the
+ * DMA_RESV_USAGE_WRITE slot of all external BOs in the VM
+ *
+ * 4. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot
+ * of the VM
+ *
+ * 5. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot
+ * of the external BO (if the bind is to an external BO, this is addition to #4)
+ *
+ * 6. Every engine using a compute mode VM has a preempt fence in installed into
+ * the DMA_RESV_USAGE_PREEMPT_FENCE slot of the VM
+ *
+ * 7. Every engine using a compute mode VM has a preempt fence in installed into
+ * the DMA_RESV_USAGE_PREEMPT_FENCE slot of all the external BOs in the VM
+ *
+ * Slot waiting
+ * ------------
+ *
+ * 1. The exection of all jobs from kernel ops shall wait on all slots
+ * (DMA_RESV_USAGE_PREEMPT_FENCE) of either an external BO or VM (depends on if
+ * kernel op is operating on external or private BO)
+ *
+ * 2. In non-compute mode, the exection of all jobs from rebinds in execs shall
+ * wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO or VM
+ * (depends on if the rebind is operatiing on an external or private BO)
+ *
+ * 3. In non-compute mode, the exection of all jobs from execs shall wait on the
+ * last rebind job
+ *
+ * 4. In compute mode, the exection of all jobs from rebinds in the rebind
+ * worker shall wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO
+ * or VM (depends on if rebind is operating on external or private BO)
+ *
+ * 5. In compute mode, resumes in rebind worker shall wait on last rebind fence
+ *
+ * 6. In compute mode, resumes in rebind worker shall wait on the
+ * DMA_RESV_USAGE_KERNEL slot of the VM
+ *
+ * Putting it all together
+ * -----------------------
+ *
+ * 1. New jobs from kernel ops are blocked behind any existing jobs from
+ * non-compute mode execs
+ *
+ * 2. New jobs from non-compute mode execs are blocked behind any existing jobs
+ * from kernel ops and rebinds
+ *
+ * 3. New jobs from kernel ops are blocked behind all preempt fences signaling in
+ * compute mode
+ *
+ * 4. Compute mode engine resumes are blocked behind any existing jobs from
+ * kernel ops and rebinds
+ *
+ * Future work
+ * ===========
+ *
+ * Support large pages for sysmem and userptr.
+ *
+ * Update page faults to handle BOs are page level grainularity (e.g. part of BO
+ * could be in system memory while another part could be in VRAM).
+ *
+ * Page fault handler likely we be optimized a bit more (e.g. Rebinds always
+ * wait on the dma-resv kernel slots of VM or BO, technically we only have to
+ * wait the BO moving. If using a job to do the rebind, we could not block in
+ * the page fault handler rather attach a callback to fence of the rebind job to
+ * signal page fault complete. Our handling of short circuting for atomic faults
+ * for bound VMAs could be better. etc...). We can tune all of this once we have
+ * benchmarks / performance number from workloads up and running.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
new file mode 100644
index 000000000000..4498aa2fbd47
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/xe_drm.h>
+#include <drm/ttm/ttm_tt.h>
+#include <linux/nospec.h>
+
+#include "xe_bo.h"
+#include "xe_vm.h"
+#include "xe_vm_madvise.h"
+
+static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm,
+				       struct xe_vma **vmas, int num_vmas,
+				       u64 value)
+{
+	int i, err;
+
+	if (XE_IOCTL_ERR(xe, value > XE_MEM_REGION_CLASS_VRAM))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, value == XE_MEM_REGION_CLASS_VRAM &&
+			 !xe->info.is_dgfx))
+		return -EINVAL;
+
+	for (i = 0; i < num_vmas; ++i) {
+		struct xe_bo *bo;
+		struct ww_acquire_ctx ww;
+
+		bo = vmas[i]->bo;
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return err;
+		bo->props.preferred_mem_class = value;
+		xe_bo_placement_for_flags(xe, bo, bo->flags);
+		xe_bo_unlock(bo, &ww);
+	}
+
+	return 0;
+}
+
+static int madvise_preferred_gt(struct xe_device *xe, struct xe_vm *vm,
+				struct xe_vma **vmas, int num_vmas, u64 value)
+{
+	int i, err;
+
+	if (XE_IOCTL_ERR(xe, value > xe->info.tile_count))
+		return -EINVAL;
+
+	for (i = 0; i < num_vmas; ++i) {
+		struct xe_bo *bo;
+		struct ww_acquire_ctx ww;
+
+		bo = vmas[i]->bo;
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return err;
+		bo->props.preferred_gt = value;
+		xe_bo_placement_for_flags(xe, bo, bo->flags);
+		xe_bo_unlock(bo, &ww);
+	}
+
+	return 0;
+}
+
+static int madvise_preferred_mem_class_gt(struct xe_device *xe,
+					  struct xe_vm *vm,
+					  struct xe_vma **vmas, int num_vmas,
+					  u64 value)
+{
+	int i, err;
+	u32 gt_id = upper_32_bits(value);
+	u32 mem_class = lower_32_bits(value);
+
+	if (XE_IOCTL_ERR(xe, mem_class > XE_MEM_REGION_CLASS_VRAM))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, mem_class == XE_MEM_REGION_CLASS_VRAM &&
+			 !xe->info.is_dgfx))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, gt_id > xe->info.tile_count))
+		return -EINVAL;
+
+	for (i = 0; i < num_vmas; ++i) {
+		struct xe_bo *bo;
+		struct ww_acquire_ctx ww;
+
+		bo = vmas[i]->bo;
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return err;
+		bo->props.preferred_mem_class = mem_class;
+		bo->props.preferred_gt = gt_id;
+		xe_bo_placement_for_flags(xe, bo, bo->flags);
+		xe_bo_unlock(bo, &ww);
+	}
+
+	return 0;
+}
+
+static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm,
+			      struct xe_vma **vmas, int num_vmas, u64 value)
+{
+	int i, err;
+
+	for (i = 0; i < num_vmas; ++i) {
+		struct xe_bo *bo;
+		struct ww_acquire_ctx ww;
+
+		bo = vmas[i]->bo;
+		if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT)))
+			return -EINVAL;
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return err;
+		bo->props.cpu_atomic = !!value;
+
+		/*
+		 * All future CPU accesses must be from system memory only, we
+		 * just invalidate the CPU page tables which will trigger a
+		 * migration on next access.
+		 */
+		if (bo->props.cpu_atomic)
+			ttm_bo_unmap_virtual(&bo->ttm);
+		xe_bo_unlock(bo, &ww);
+	}
+
+	return 0;
+}
+
+static int madvise_device_atomic(struct xe_device *xe, struct xe_vm *vm,
+				 struct xe_vma **vmas, int num_vmas, u64 value)
+{
+	int i, err;
+
+	for (i = 0; i < num_vmas; ++i) {
+		struct xe_bo *bo;
+		struct ww_acquire_ctx ww;
+
+		bo = vmas[i]->bo;
+		if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) &&
+				 !(bo->flags & XE_BO_CREATE_VRAM1_BIT)))
+			return -EINVAL;
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return err;
+		bo->props.device_atomic = !!value;
+		xe_bo_unlock(bo, &ww);
+	}
+
+	return 0;
+}
+
+static int madvise_priority(struct xe_device *xe, struct xe_vm *vm,
+			    struct xe_vma **vmas, int num_vmas, u64 value)
+{
+	int i, err;
+
+	if (XE_IOCTL_ERR(xe, value > DRM_XE_VMA_PRIORITY_HIGH))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, value == DRM_XE_VMA_PRIORITY_HIGH &&
+			 !capable(CAP_SYS_NICE)))
+		return -EPERM;
+
+	for (i = 0; i < num_vmas; ++i) {
+		struct xe_bo *bo;
+		struct ww_acquire_ctx ww;
+
+		bo = vmas[i]->bo;
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return err;
+		bo->ttm.priority = value;
+		ttm_bo_move_to_lru_tail(&bo->ttm);
+		xe_bo_unlock(bo, &ww);
+	}
+
+	return 0;
+}
+
+static int madvise_pin(struct xe_device *xe, struct xe_vm *vm,
+		       struct xe_vma **vmas, int num_vmas, u64 value)
+{
+	XE_WARN_ON("NIY");
+	return 0;
+}
+
+typedef int (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
+			    struct xe_vma **vmas, int num_vmas, u64 value);
+
+static const madvise_func madvise_funcs[] = {
+	[DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS] = madvise_preferred_mem_class,
+	[DRM_XE_VM_MADVISE_PREFERRED_GT] = madvise_preferred_gt,
+	[DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT] =
+		madvise_preferred_mem_class_gt,
+	[DRM_XE_VM_MADVISE_CPU_ATOMIC] = madvise_cpu_atomic,
+	[DRM_XE_VM_MADVISE_DEVICE_ATOMIC] = madvise_device_atomic,
+	[DRM_XE_VM_MADVISE_PRIORITY] = madvise_priority,
+	[DRM_XE_VM_MADVISE_PIN] = madvise_pin,
+};
+
+static struct xe_vma *node_to_vma(const struct rb_node *node)
+{
+	BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0);
+	return (struct xe_vma *)node;
+}
+
+static struct xe_vma **
+get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range)
+{
+	struct xe_vma **vmas;
+	struct xe_vma *vma, *__vma, lookup;
+	int max_vmas = 8;
+	struct rb_node *node;
+
+	lockdep_assert_held(&vm->lock);
+
+	vmas = kmalloc(max_vmas * sizeof(*vmas), GFP_KERNEL);
+	if (!vmas)
+		return NULL;
+
+	lookup.start = addr;
+	lookup.end = addr + range - 1;
+
+	vma = xe_vm_find_overlapping_vma(vm, &lookup);
+	if (!vma)
+		return vmas;
+
+	if (!xe_vma_is_userptr(vma)) {
+		vmas[*num_vmas] = vma;
+		*num_vmas += 1;
+	}
+
+	node = &vma->vm_node;
+	while ((node = rb_next(node))) {
+		if (!xe_vma_cmp_vma_cb(&lookup, node)) {
+			__vma = node_to_vma(node);
+			if (xe_vma_is_userptr(__vma))
+				continue;
+
+			if (*num_vmas == max_vmas) {
+				struct xe_vma **__vmas =
+					krealloc(vmas, max_vmas * sizeof(*vmas),
+						 GFP_KERNEL);
+
+				if (!__vmas)
+					return NULL;
+				vmas = __vmas;
+			}
+			vmas[*num_vmas] = __vma;
+			*num_vmas += 1;
+		} else {
+			break;
+		}
+	}
+
+	node = &vma->vm_node;
+	while ((node = rb_prev(node))) {
+		if (!xe_vma_cmp_vma_cb(&lookup, node)) {
+			__vma = node_to_vma(node);
+			if (xe_vma_is_userptr(__vma))
+				continue;
+
+			if (*num_vmas == max_vmas) {
+				struct xe_vma **__vmas =
+					krealloc(vmas, max_vmas * sizeof(*vmas),
+						 GFP_KERNEL);
+
+				if (!__vmas)
+					return NULL;
+				vmas = __vmas;
+			}
+			vmas[*num_vmas] = __vma;
+			*num_vmas += 1;
+		} else {
+			break;
+		}
+	}
+
+	return vmas;
+}
+
+int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_madvise *args = data;
+	struct xe_vm *vm;
+	struct xe_vma **vmas = NULL;
+	int num_vmas = 0, err = 0, idx;
+
+	if (XE_IOCTL_ERR(xe, args->extensions))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->property > ARRAY_SIZE(madvise_funcs)))
+		return -EINVAL;
+
+	vm = xe_vm_lookup(xef, args->vm_id);
+	if (XE_IOCTL_ERR(xe, !vm))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) {
+		err = -ENOENT;
+		goto put_vm;
+	}
+
+	if (XE_IOCTL_ERR(xe, !xe_vm_in_fault_mode(vm))) {
+		err = -EINVAL;
+		goto put_vm;
+	}
+
+	down_read(&vm->lock);
+
+	vmas = get_vmas(vm, &num_vmas, args->addr, args->range);
+	if (XE_IOCTL_ERR(xe, err))
+		goto unlock_vm;
+
+	if (XE_IOCTL_ERR(xe, !vmas)) {
+		err = -ENOMEM;
+		goto unlock_vm;
+	}
+
+	if (XE_IOCTL_ERR(xe, !num_vmas)) {
+		err = -EINVAL;
+		goto unlock_vm;
+	}
+
+	idx = array_index_nospec(args->property, ARRAY_SIZE(madvise_funcs));
+	err = madvise_funcs[idx](xe, vm, vmas, num_vmas, args->value);
+
+unlock_vm:
+	up_read(&vm->lock);
+put_vm:
+	xe_vm_put(vm);
+	kfree(vmas);
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h
new file mode 100644
index 000000000000..eecd33acd248
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_VM_MADVISE_H_
+#define _XE_VM_MADVISE_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
new file mode 100644
index 000000000000..2a3b911ab358
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -0,0 +1,337 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_VM_TYPES_H_
+#define _XE_VM_TYPES_H_
+
+#include <linux/dma-resv.h>
+#include <linux/kref.h>
+#include <linux/mmu_notifier.h>
+#include <linux/scatterlist.h>
+
+#include "xe_device_types.h"
+#include "xe_pt_types.h"
+
+struct xe_bo;
+struct xe_vm;
+
+struct xe_vma {
+	struct rb_node vm_node;
+	/** @vm: VM which this VMA belongs to */
+	struct xe_vm *vm;
+
+	/**
+	 * @start: start address of this VMA within its address domain, end -
+	 * start + 1 == VMA size
+	 */
+	u64 start;
+	/** @end: end address of this VMA within its address domain */
+	u64 end;
+	/** @pte_flags: pte flags for this VMA */
+	u32 pte_flags;
+
+	/** @bo: BO if not a userptr, must be NULL is userptr */
+	struct xe_bo *bo;
+	/** @bo_offset: offset into BO if not a userptr, unused for userptr */
+	u64 bo_offset;
+
+	/** @gt_mask: GT mask of where to create binding for this VMA */
+	u64 gt_mask;
+
+	/**
+	 * @gt_present: GT mask of binding are present for this VMA.
+	 * protected by vm->lock, vm->resv and for userptrs,
+	 * vm->userptr.notifier_lock for writing. Needs either for reading,
+	 * but if reading is done under the vm->lock only, it needs to be held
+	 * in write mode.
+	 */
+	u64 gt_present;
+
+	/**
+	 * @destroyed: VMA is destroyed, in the sense that it shouldn't be
+	 * subject to rebind anymore. This field must be written under
+	 * the vm lock in write mode and the userptr.notifier_lock in
+	 * either mode. Read under the vm lock or the userptr.notifier_lock in
+	 * write mode.
+	 */
+	bool destroyed;
+
+	/**
+	 * @first_munmap_rebind: VMA is first in a sequence of ops that triggers
+	 * a rebind (munmap style VM unbinds). This indicates the operation
+	 * using this VMA must wait on all dma-resv slots (wait for pending jobs
+	 * / trigger preempt fences).
+	 */
+	bool first_munmap_rebind;
+
+	/**
+	 * @last_munmap_rebind: VMA is first in a sequence of ops that triggers
+	 * a rebind (munmap style VM unbinds). This indicates the operation
+	 * using this VMA must install itself into kernel dma-resv slot (blocks
+	 * future jobs) and kick the rebind work in compute mode.
+	 */
+	bool last_munmap_rebind;
+
+	/** @use_atomic_access_pte_bit: Set atomic access bit in PTE */
+	bool use_atomic_access_pte_bit;
+
+	union {
+		/** @bo_link: link into BO if not a userptr */
+		struct list_head bo_link;
+		/** @userptr_link: link into VM repin list if userptr */
+		struct list_head userptr_link;
+	};
+
+	/**
+	 * @rebind_link: link into VM if this VMA needs rebinding, and
+	 * if it's a bo (not userptr) needs validation after a possible
+	 * eviction. Protected by the vm's resv lock.
+	 */
+	struct list_head rebind_link;
+
+	/**
+	 * @unbind_link: link or list head if an unbind of multiple VMAs, in
+	 * single unbind op, is being done.
+	 */
+	struct list_head unbind_link;
+
+	/** @destroy_cb: callback to destroy VMA when unbind job is done */
+	struct dma_fence_cb destroy_cb;
+
+	/** @destroy_work: worker to destroy this BO */
+	struct work_struct destroy_work;
+
+	/** @userptr: user pointer state */
+	struct {
+		/** @ptr: user pointer */
+		uintptr_t ptr;
+		/** @invalidate_link: Link for the vm::userptr.invalidated list */
+		struct list_head invalidate_link;
+		/**
+		 * @notifier: MMU notifier for user pointer (invalidation call back)
+		 */
+		struct mmu_interval_notifier notifier;
+		/** @sgt: storage for a scatter gather table */
+		struct sg_table sgt;
+		/** @sg: allocated scatter gather table */
+		struct sg_table *sg;
+		/** @notifier_seq: notifier sequence number */
+		unsigned long notifier_seq;
+		/**
+		 * @initial_bind: user pointer has been bound at least once.
+		 * write: vm->userptr.notifier_lock in read mode and vm->resv held.
+		 * read: vm->userptr.notifier_lock in write mode or vm->resv held.
+		 */
+		bool initial_bind;
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+		u32 divisor;
+#endif
+	} userptr;
+
+	/** @usm: unified shared memory state */
+	struct {
+		/** @gt_invalidated: VMA has been invalidated */
+		u64 gt_invalidated;
+	} usm;
+
+	struct {
+		struct list_head rebind_link;
+	} notifier;
+
+	struct {
+		/**
+		 * @extobj.link: Link into vm's external object list.
+		 * protected by the vm lock.
+		 */
+		struct list_head link;
+	} extobj;
+};
+
+struct xe_device;
+
+#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
+
+struct xe_vm {
+	struct xe_device *xe;
+
+	struct kref refcount;
+
+	/* engine used for (un)binding vma's */
+	struct xe_engine *eng[XE_MAX_GT];
+
+	/** Protects @rebind_list and the page-table structures */
+	struct dma_resv resv;
+
+	u64 size;
+	struct rb_root vmas;
+
+	struct xe_pt *pt_root[XE_MAX_GT];
+	struct xe_bo *scratch_bo[XE_MAX_GT];
+	struct xe_pt *scratch_pt[XE_MAX_GT][XE_VM_MAX_LEVEL];
+
+	/** @flags: flags for this VM, statically setup a creation time */
+#define XE_VM_FLAGS_64K			BIT(0)
+#define XE_VM_FLAG_COMPUTE_MODE		BIT(1)
+#define XE_VM_FLAG_ASYNC_BIND_OPS	BIT(2)
+#define XE_VM_FLAG_MIGRATION		BIT(3)
+#define XE_VM_FLAG_SCRATCH_PAGE		BIT(4)
+#define XE_VM_FLAG_FAULT_MODE		BIT(5)
+#define XE_VM_FLAG_GT_ID(flags)		(((flags) >> 6) & 0x3)
+#define XE_VM_FLAG_SET_GT_ID(gt)	((gt)->info.id << 6)
+	unsigned long flags;
+
+	/** @composite_fence_ctx: context composite fence */
+	u64 composite_fence_ctx;
+	/** @composite_fence_seqno: seqno for composite fence */
+	u32 composite_fence_seqno;
+
+	/**
+	 * @lock: outer most lock, protects objects of anything attached to this
+	 * VM
+	 */
+	struct rw_semaphore lock;
+
+	/**
+	 * @rebind_list: list of VMAs that need rebinding, and if they are
+	 * bos (not userptr), need validation after a possible eviction. The
+	 * list is protected by @resv.
+	 */
+	struct list_head rebind_list;
+
+	/** @rebind_fence: rebind fence from execbuf */
+	struct dma_fence *rebind_fence;
+
+	/**
+	 * @destroy_work: worker to destroy VM, needed as a dma_fence signaling
+	 * from an irq context can be last put and the destroy needs to be able
+	 * to sleep.
+	 */
+	struct work_struct destroy_work;
+
+	/** @extobj: bookkeeping for external objects. Protected by the vm lock */
+	struct {
+		/** @enties: number of external BOs attached this VM */
+		u32 entries;
+		/** @list: list of vmas with external bos attached */
+		struct list_head list;
+	} extobj;
+
+	/** @async_ops: async VM operations (bind / unbinds) */
+	struct {
+		/** @list: list of pending async VM ops */
+		struct list_head pending;
+		/** @work: worker to execute async VM ops */
+		struct work_struct work;
+		/** @lock: protects list of pending async VM ops and fences */
+		spinlock_t lock;
+		/** @error_capture: error capture state */
+		struct {
+			/** @mm: user MM */
+			struct mm_struct *mm;
+			/**
+			 * @addr: user pointer to copy error capture state too
+			 */
+			u64 addr;
+			/** @wq: user fence wait queue for VM errors */
+			wait_queue_head_t wq;
+		} error_capture;
+		/** @fence: fence state */
+		struct {
+			/** @context: context of async fence */
+			u64 context;
+			/** @seqno: seqno of async fence */
+			u32 seqno;
+		} fence;
+		/** @error: error state for async VM ops */
+		int error;
+		/**
+		 * @munmap_rebind_inflight: an munmap style VM bind is in the
+		 * middle of a set of ops which requires a rebind at the end.
+		 */
+		bool munmap_rebind_inflight;
+	} async_ops;
+
+	/** @userptr: user pointer state */
+	struct {
+		/**
+		 * @userptr.repin_list: list of VMAs which are user pointers,
+		 * and needs repinning. Protected by @lock.
+		 */
+		struct list_head repin_list;
+		/**
+		 * @notifier_lock: protects notifier in write mode and
+		 * submission in read mode.
+		 */
+		struct rw_semaphore notifier_lock;
+		/**
+		 * @userptr.invalidated_lock: Protects the
+		 * @userptr.invalidated list.
+		 */
+		spinlock_t invalidated_lock;
+		/**
+		 * @userptr.invalidated: List of invalidated userptrs, not yet
+		 * picked
+		 * up for revalidation. Protected from access with the
+		 * @invalidated_lock. Removing items from the list
+		 * additionally requires @lock in write mode, and adding
+		 * items to the list requires the @userptr.notifer_lock in
+		 * write mode.
+		 */
+		struct list_head invalidated;
+	} userptr;
+
+	/** @preempt: preempt state */
+	struct {
+		/**
+		 * @min_run_period_ms: The minimum run period before preempting
+		 * an engine again
+		 */
+		s64 min_run_period_ms;
+		/** @engines: list of engines attached to this VM */
+		struct list_head engines;
+		/** @num_engines: number user engines attached to this VM */
+		int num_engines;
+		/**
+		 * @rebind_work: worker to rebind invalidated userptrs / evicted
+		 * BOs
+		 */
+		struct work_struct rebind_work;
+	} preempt;
+
+	/** @um: unified memory state */
+	struct {
+		/** @asid: address space ID, unique to each VM */
+		u32 asid;
+		/**
+		 * @last_fault_vma: Last fault VMA, used for fast lookup when we
+		 * get a flood of faults to the same VMA
+		 */
+		struct xe_vma *last_fault_vma;
+	} usm;
+
+	/**
+	 * @notifier: Lists and locks for temporary usage within notifiers where
+	 * we either can't grab the vm lock or the vm resv.
+	 */
+	struct {
+		/** @notifier.list_lock: lock protecting @rebind_list */
+		spinlock_t list_lock;
+		/**
+		 * @notifier.rebind_list: list of vmas that we want to put on the
+		 * main @rebind_list. This list is protected for writing by both
+		 * notifier.list_lock, and the resv of the bo the vma points to,
+		 * and for reading by the notifier.list_lock only.
+		 */
+		struct list_head rebind_list;
+	} notifier;
+
+	/** @error_capture: allow to track errors */
+	struct {
+		/** @capture_once: capture only one error per VM */
+		bool capture_once;
+	} error_capture;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
new file mode 100644
index 000000000000..b56141ba7145
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_wa.h"
+
+#include <linux/compiler_types.h>
+
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_types.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+#include "xe_rtp.h"
+#include "xe_step.h"
+
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt_regs.h"
+#include "i915_reg.h"
+
+/**
+ * DOC: Hardware workarounds
+ *
+ * Hardware workarounds are register programming documented to be executed in
+ * the driver that fall outside of the normal programming sequences for a
+ * platform. There are some basic categories of workarounds, depending on
+ * how/when they are applied:
+ *
+ * - LRC workarounds: workarounds that touch registers that are
+ *   saved/restored to/from the HW context image. The list is emitted (via Load
+ *   Register Immediate commands) once when initializing the device and saved in
+ *   the default context. That default context is then used on every context
+ *   creation to have a "primed golden context", i.e. a context image that
+ *   already contains the changes needed to all the registers.
+ *
+ *   TODO: Although these workarounds are maintained here, they are not
+ *   currently being applied.
+ *
+ * - Engine workarounds: the list of these WAs is applied whenever the specific
+ *   engine is reset. It's also possible that a set of engine classes share a
+ *   common power domain and they are reset together. This happens on some
+ *   platforms with render and compute engines. In this case (at least) one of
+ *   them need to keeep the workaround programming: the approach taken in the
+ *   driver is to tie those workarounds to the first compute/render engine that
+ *   is registered.  When executing with GuC submission, engine resets are
+ *   outside of kernel driver control, hence the list of registers involved in
+ *   written once, on engine initialization, and then passed to GuC, that
+ *   saves/restores their values before/after the reset takes place. See
+ *   ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference.
+ *
+ * - GT workarounds: the list of these WAs is applied whenever these registers
+ *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
+ *
+ * - Register whitelist: some workarounds need to be implemented in userspace,
+ *   but need to touch privileged registers. The whitelist in the kernel
+ *   instructs the hardware to allow the access to happen. From the kernel side,
+ *   this is just a special case of a MMIO workaround (as we write the list of
+ *   these to/be-whitelisted registers to some special HW registers).
+ *
+ * - Workaround batchbuffers: buffers that get executed automatically by the
+ *   hardware on every HW context restore. These buffers are created and
+ *   programmed in the default context so the hardware always go through those
+ *   programming sequences when switching contexts. The support for workaround
+ *   batchbuffers is enabled these hardware mechanisms:
+ *
+ *   #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
+ *      context, pointing the hardware to jump to that location when that offset
+ *      is reached in the context restore. Workaround batchbuffer in the driver
+ *      currently uses this mechanism for all platforms.
+ *
+ *   #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
+ *      pointing the hardware to a buffer to continue executing after the
+ *      engine registers are restored in a context restore sequence. This is
+ *      currently not used in the driver.
+ *
+ * - Other:  There are WAs that, due to their nature, cannot be applied from a
+ *   central place. Those are peppered around the rest of the code, as needed.
+ *   Workarounds related to the display IP are the main example.
+ *
+ * .. [1] Technically, some registers are powercontext saved & restored, so they
+ *    survive a suspend/resume. In practice, writing them again is not too
+ *    costly and simplifies things, so it's the approach taken in the driver.
+ *
+ * .. note::
+ *    Hardware workarounds in xe work the same way as in i915, with the
+ *    difference of how they are maintained in the code. In xe it uses the
+ *    xe_rtp infrastructure so the workarounds can be kept in tables, following
+ *    a more declarative approach rather than procedural.
+ */
+
+#undef _MMIO
+#undef MCR_REG
+#define _MMIO(x)	_XE_RTP_REG(x)
+#define MCR_REG(x)	_XE_RTP_MCR_REG(x)
+
+static bool match_14011060649(const struct xe_gt *gt,
+			      const struct xe_hw_engine *hwe)
+{
+	return hwe->instance % 2 == 0;
+}
+
+static const struct xe_rtp_entry gt_was[] = {
+	{ XE_RTP_NAME("14011060649"),
+	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255),
+		       ENGINE_CLASS(VIDEO_DECODE),
+		       FUNC(match_14011060649)),
+	  XE_RTP_SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS,
+		     XE_RTP_FLAG(FOREACH_ENGINE))
+	},
+	{ XE_RTP_NAME("16010515920"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10),
+		       STEP(A0, B0),
+		       ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS,
+		     XE_RTP_FLAG(FOREACH_ENGINE))
+	},
+	{ XE_RTP_NAME("22010523718"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
+	  XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14011006942"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
+	  XE_RTP_SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14010948348"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14011037102"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14011371254"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_SET(GEN11_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14011431319/0"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_SET(UNSLCGCTL9440,
+		     GAMTLBOACS_CLKGATE_DIS |
+		     GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS |
+		     GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS |
+		     GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS |
+		     GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS |
+		     GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS |
+		     GAMTLBBLT_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14011431319/1"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_SET(UNSLCGCTL9444,
+		     GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS |
+		     GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS |
+		     GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS |
+		     GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS |
+		     GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS |
+		     GAMTLBMERT_CLKGATE_DIS |
+		     GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS |
+		     GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14010569222"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14011028019"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14014830051"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_CLR(SARB_CHICKEN1, COMP_CKN_IN)
+	},
+	{ XE_RTP_NAME("14015795083"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE)
+	},
+	{ XE_RTP_NAME("14011059788"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  XE_RTP_SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE)
+	},
+	{ XE_RTP_NAME("1409420604"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("1408615072"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL)
+	},
+	{}
+};
+
+static const struct xe_rtp_entry engine_was[] = {
+	{ XE_RTP_NAME("14015227452"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("1606931601"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("22010931296, 18011464164, 14010919138"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE)
+	},
+	{ XE_RTP_NAME("14010826681, 1606700617, 22010271021"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("18019627453"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("1409804808"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
+		       ENGINE_CLASS(RENDER),
+		       IS_INTEGRATED),
+	  XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("14010229206, 1409085225"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
+		       ENGINE_CLASS(RENDER),
+		       IS_INTEGRATED),
+	  XE_RTP_SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
+	  XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE),
+		     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+		     GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
+	  XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE),
+		     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+		     GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("1406941453"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{}
+};
+
+static const struct xe_rtp_entry lrc_was[] = {
+	{ XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  XE_RTP_SET(GEN11_COMMON_SLICE_CHICKEN3,
+		     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  XE_RTP_FIELD_SET(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+			   GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL,
+			   XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("16011163337"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  /* read verification is ignored due to 1608008084. */
+	  XE_RTP_FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2, FF_MODE2_GS_TIMER_MASK,
+					FF_MODE2_GS_TIMER_224)
+	},
+	{ XE_RTP_NAME("1409044764"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_CLR(GEN11_COMMON_SLICE_CHICKEN3,
+		     DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("22010493298"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_SET(HIZ_CHICKEN,
+		     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{}
+};
+
+/**
+ * xe_wa_process_gt - process GT workaround table
+ * @gt: GT instance to process workarounds for
+ *
+ * Process GT workaround table for this platform, saving in @gt all the
+ * workarounds that need to be applied at the GT level.
+ */
+void xe_wa_process_gt(struct xe_gt *gt)
+{
+	xe_rtp_process(gt_was, &gt->reg_sr, gt, NULL);
+}
+
+/**
+ * xe_wa_process_engine - process engine workaround table
+ * @hwe: engine instance to process workarounds for
+ *
+ * Process engine workaround table for this platform, saving in @hwe all the
+ * workarounds that need to be applied at the engine level that match this
+ * engine.
+ */
+void xe_wa_process_engine(struct xe_hw_engine *hwe)
+{
+	xe_rtp_process(engine_was, &hwe->reg_sr, hwe->gt, hwe);
+}
+
+/**
+ * xe_wa_process_lrc - process context workaround table
+ * @hwe: engine instance to process workarounds for
+ *
+ * Process context workaround table for this platform, saving in @hwe all the
+ * workarounds that need to be applied on context restore. These are workarounds
+ * touching registers that are part of the HW context image.
+ */
+void xe_wa_process_lrc(struct xe_hw_engine *hwe)
+{
+	xe_rtp_process(lrc_was, &hwe->reg_lrc, hwe->gt, hwe);
+}
diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h
new file mode 100644
index 000000000000..cd2307d58795
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wa.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WA_
+#define _XE_WA_
+
+struct xe_gt;
+struct xe_hw_engine;
+
+void xe_wa_process_gt(struct xe_gt *gt);
+void xe_wa_process_engine(struct xe_hw_engine *hwe);
+void xe_wa_process_lrc(struct xe_hw_engine *hwe);
+
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
new file mode 100644
index 000000000000..8a8d814a0e7a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_vm.h"
+
+static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
+{
+	u64 rvalue;
+	int err;
+	bool passed;
+
+	err = copy_from_user(&rvalue, u64_to_user_ptr(addr), sizeof(rvalue));
+	if (err)
+		return -EFAULT;
+
+	switch (op) {
+	case DRM_XE_UFENCE_WAIT_EQ:
+		passed = (rvalue & mask) == (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_NEQ:
+		passed = (rvalue & mask) != (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_GT:
+		passed = (rvalue & mask) > (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_GTE:
+		passed = (rvalue & mask) >= (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_LT:
+		passed = (rvalue & mask) < (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_LTE:
+		passed = (rvalue & mask) <= (value & mask);
+		break;
+	default:
+		XE_BUG_ON("Not possible");
+	}
+
+	return passed ? 0 : 1;
+}
+
+static const enum xe_engine_class user_to_xe_engine_class[] = {
+	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
+	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
+	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
+	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
+};
+
+int check_hw_engines(struct xe_device *xe,
+		     struct drm_xe_engine_class_instance *eci,
+		     int num_engines)
+{
+	int i;
+
+	for (i = 0; i < num_engines; ++i) {
+		enum xe_engine_class user_class =
+			user_to_xe_engine_class[eci[i].engine_class];
+
+		if (eci[i].gt_id >= xe->info.tile_count)
+			return -EINVAL;
+
+		if (!xe_gt_hw_engine(xe_device_get_gt(xe, eci[i].gt_id),
+				     user_class, eci[i].engine_instance, true))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+#define VALID_FLAGS	(DRM_XE_UFENCE_WAIT_SOFT_OP | \
+			 DRM_XE_UFENCE_WAIT_ABSTIME | \
+			 DRM_XE_UFENCE_WAIT_VM_ERROR)
+#define MAX_OP		DRM_XE_UFENCE_WAIT_LTE
+
+int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	DEFINE_WAIT_FUNC(w_wait, woken_wake_function);
+	struct drm_xe_wait_user_fence *args = data;
+	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
+	struct drm_xe_engine_class_instance __user *user_eci =
+		u64_to_user_ptr(args->instances);
+	struct xe_vm *vm = NULL;
+	u64 addr = args->addr;
+	int err;
+	bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP ||
+		args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR;
+	unsigned long timeout = args->timeout;
+
+	if (XE_IOCTL_ERR(xe, args->extensions))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags & ~VALID_FLAGS))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->op > MAX_OP))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, no_engines &&
+			 (args->num_engines || args->instances)))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !no_engines && !args->num_engines))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) &&
+			 addr & 0x7))
+		return -EINVAL;
+
+	if (!no_engines) {
+		err = copy_from_user(eci, user_eci,
+				     sizeof(struct drm_xe_engine_class_instance) *
+			     args->num_engines);
+		if (XE_IOCTL_ERR(xe, err))
+			return -EFAULT;
+
+		if (XE_IOCTL_ERR(xe, check_hw_engines(xe, eci,
+						      args->num_engines)))
+			return -EINVAL;
+	}
+
+	if (args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) {
+		if (XE_IOCTL_ERR(xe, args->vm_id >> 32))
+			return -EINVAL;
+
+		vm = xe_vm_lookup(to_xe_file(file), args->vm_id);
+		if (XE_IOCTL_ERR(xe, !vm))
+			return -ENOENT;
+
+		if (XE_IOCTL_ERR(xe, !vm->async_ops.error_capture.addr)) {
+			xe_vm_put(vm);
+			return -ENOTSUPP;
+		}
+
+		addr = vm->async_ops.error_capture.addr;
+	}
+
+	if (XE_IOCTL_ERR(xe, timeout > MAX_SCHEDULE_TIMEOUT))
+		return -EINVAL;
+
+	/*
+	 * FIXME: Very simple implementation at the moment, single wait queue
+	 * for everything. Could be optimized to have a wait queue for every
+	 * hardware engine. Open coding as 'do_compare' can sleep which doesn't
+	 * work with the wait_event_* macros.
+	 */
+	if (vm)
+		add_wait_queue(&vm->async_ops.error_capture.wq, &w_wait);
+	else
+		add_wait_queue(&xe->ufence_wq, &w_wait);
+	for (;;) {
+		if (vm && xe_vm_is_closed(vm)) {
+			err = -ENODEV;
+			break;
+		}
+		err = do_compare(addr, args->value, args->mask, args->op);
+		if (err <= 0)
+			break;
+
+		if (signal_pending(current)) {
+			err = -ERESTARTSYS;
+			break;
+		}
+
+		if (!timeout) {
+			err = -ETIME;
+			break;
+		}
+
+		timeout = wait_woken(&w_wait, TASK_INTERRUPTIBLE, timeout);
+	}
+	if (vm) {
+		remove_wait_queue(&vm->async_ops.error_capture.wq, &w_wait);
+		xe_vm_put(vm);
+	} else {
+		remove_wait_queue(&xe->ufence_wq, &w_wait);
+	}
+	if (XE_IOCTL_ERR(xe, err < 0))
+		return err;
+	else if (XE_IOCTL_ERR(xe, !timeout))
+		return -ETIME;
+
+	/*
+	 * Again very simple, return the time in jiffies that has past, may need
+	 * a more precision
+	 */
+	if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)
+		args->timeout = args->timeout - timeout;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.h b/drivers/gpu/drm/xe/xe_wait_user_fence.h
new file mode 100644
index 000000000000..0e268978f9e6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WAIT_USER_FENCE_H_
+#define _XE_WAIT_USER_FENCE_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
new file mode 100644
index 000000000000..e4a8d4a1899e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_guc_reg.h"
+#include "xe_mmio.h"
+#include "xe_uc_fw.h"
+#include "xe_wopcm.h"
+
+#include "i915_utils.h"
+
+/**
+ * DOC: Write Once Protected Content Memory (WOPCM) Layout
+ *
+ * The layout of the WOPCM will be fixed after writing to GuC WOPCM size and
+ * offset registers whose values are calculated and determined by HuC/GuC
+ * firmware size and set of hardware requirements/restrictions as shown below:
+ *
+ * ::
+ *
+ *    +=========> +====================+ <== WOPCM Top
+ *    ^           |  HW contexts RSVD  |
+ *    |     +===> +====================+ <== GuC WOPCM Top
+ *    |     ^     |                    |
+ *    |     |     |                    |
+ *    |     |     |                    |
+ *    |    GuC    |                    |
+ *    |   WOPCM   |                    |
+ *    |    Size   +--------------------+
+ *  WOPCM   |     |    GuC FW RSVD     |
+ *    |     |     +--------------------+
+ *    |     |     |   GuC Stack RSVD   |
+ *    |     |     +------------------- +
+ *    |     v     |   GuC WOPCM RSVD   |
+ *    |     +===> +====================+ <== GuC WOPCM base
+ *    |           |     WOPCM RSVD     |
+ *    |           +------------------- + <== HuC Firmware Top
+ *    v           |      HuC FW        |
+ *    +=========> +====================+ <== WOPCM Base
+ *
+ * GuC accessible WOPCM starts at GuC WOPCM base and ends at GuC WOPCM top.
+ * The top part of the WOPCM is reserved for hardware contexts (e.g. RC6
+ * context).
+ */
+
+/* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */
+#define DGFX_WOPCM_SIZE			SZ_4M	/* FIXME: Larger size require
+						   for 2 tile PVC, do a proper
+						   probe sooner or later */
+#define MTL_WOPCM_SIZE			SZ_4M	/* FIXME: Larger size require
+						   for MTL, do a proper probe
+						   sooner or later */
+#define GEN11_WOPCM_SIZE		SZ_2M
+/* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */
+#define WOPCM_RESERVED_SIZE		SZ_16K
+
+/* 16KB reserved at the beginning of GuC WOPCM. */
+#define GUC_WOPCM_RESERVED		SZ_16K
+/* 8KB from GUC_WOPCM_RESERVED is reserved for GuC stack. */
+#define GUC_WOPCM_STACK_RESERVED	SZ_8K
+
+/* GuC WOPCM Offset value needs to be aligned to 16KB. */
+#define GUC_WOPCM_OFFSET_ALIGNMENT	(1UL << GUC_WOPCM_OFFSET_SHIFT)
+
+/* 36KB WOPCM reserved at the end of WOPCM on GEN11. */
+#define GEN11_WOPCM_HW_CTX_RESERVED	(SZ_32K + SZ_4K)
+
+static inline struct xe_gt *wopcm_to_gt(struct xe_wopcm *wopcm)
+{
+	return container_of(wopcm, struct xe_gt, uc.wopcm);
+}
+
+static inline struct xe_device *wopcm_to_xe(struct xe_wopcm *wopcm)
+{
+	return gt_to_xe(wopcm_to_gt(wopcm));
+}
+
+static u32 context_reserved_size(void)
+{
+	return GEN11_WOPCM_HW_CTX_RESERVED;
+}
+
+static bool __check_layout(struct xe_device *xe, u32 wopcm_size,
+			   u32 guc_wopcm_base, u32 guc_wopcm_size,
+			   u32 guc_fw_size, u32 huc_fw_size)
+{
+	const u32 ctx_rsvd = context_reserved_size();
+	u32 size;
+
+	size = wopcm_size - ctx_rsvd;
+	if (unlikely(range_overflows(guc_wopcm_base, guc_wopcm_size, size))) {
+		drm_err(&xe->drm,
+			"WOPCM: invalid GuC region layout: %uK + %uK > %uK\n",
+			guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K,
+			size / SZ_1K);
+		return false;
+	}
+
+	size = guc_fw_size + GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED;
+	if (unlikely(guc_wopcm_size < size)) {
+		drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n",
+			xe_uc_fw_type_repr(XE_UC_FW_TYPE_GUC),
+			guc_wopcm_size / SZ_1K, size / SZ_1K);
+		return false;
+	}
+
+	size = huc_fw_size + WOPCM_RESERVED_SIZE;
+	if (unlikely(guc_wopcm_base < size)) {
+		drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n",
+			xe_uc_fw_type_repr(XE_UC_FW_TYPE_HUC),
+			guc_wopcm_base / SZ_1K, size / SZ_1K);
+		return false;
+	}
+
+	return true;
+}
+
+static bool __wopcm_regs_locked(struct xe_gt *gt,
+				u32 *guc_wopcm_base, u32 *guc_wopcm_size)
+{
+	u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg);
+	u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg);
+
+	if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) ||
+	    !(reg_base & GUC_WOPCM_OFFSET_VALID))
+		return false;
+
+	*guc_wopcm_base = reg_base & GUC_WOPCM_OFFSET_MASK;
+	*guc_wopcm_size = reg_size & GUC_WOPCM_SIZE_MASK;
+	return true;
+}
+
+static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt,
+			     struct xe_wopcm *wopcm)
+{
+	u32 base = wopcm->guc.base;
+	u32 size = wopcm->guc.size;
+	u32 huc_agent = xe_uc_fw_is_disabled(&gt->uc.huc.fw) ? 0 :
+		HUC_LOADING_AGENT_GUC;
+	u32 mask;
+	int err;
+
+	XE_BUG_ON(!(base & GUC_WOPCM_OFFSET_MASK));
+	XE_BUG_ON(base & ~GUC_WOPCM_OFFSET_MASK);
+	XE_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK));
+	XE_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK);
+
+	mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
+	err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE.reg, size, mask,
+					 size | GUC_WOPCM_SIZE_LOCKED);
+	if (err)
+		goto err_out;
+
+	mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent;
+	err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET.reg,
+					 base | huc_agent, mask,
+					 base | huc_agent |
+					 GUC_WOPCM_OFFSET_VALID);
+	if (err)
+		goto err_out;
+
+	return 0;
+
+err_out:
+	drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n");
+	drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
+		   DMA_GUC_WOPCM_OFFSET.reg,
+		   xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg));
+	drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
+		   GUC_WOPCM_SIZE.reg,
+		   xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg));
+
+	return err;
+}
+
+u32 xe_wopcm_size(struct xe_device *xe)
+{
+	return IS_DGFX(xe) ? DGFX_WOPCM_SIZE :
+		xe->info.platform == XE_METEORLAKE ? MTL_WOPCM_SIZE :
+		GEN11_WOPCM_SIZE;
+}
+
+/**
+ * xe_wopcm_init() - Initialize the WOPCM structure.
+ * @wopcm: pointer to xe_wopcm.
+ *
+ * This function will partition WOPCM space based on GuC and HuC firmware sizes
+ * and will allocate max remaining for use by GuC. This function will also
+ * enforce platform dependent hardware restrictions on GuC WOPCM offset and
+ * size. It will fail the WOPCM init if any of these checks fail, so that the
+ * following WOPCM registers setup and GuC firmware uploading would be aborted.
+ */
+int xe_wopcm_init(struct xe_wopcm *wopcm)
+{
+	struct xe_device *xe = wopcm_to_xe(wopcm);
+	struct xe_gt *gt = wopcm_to_gt(wopcm);
+	u32 guc_fw_size = xe_uc_fw_get_upload_size(&gt->uc.guc.fw);
+	u32 huc_fw_size = xe_uc_fw_get_upload_size(&gt->uc.huc.fw);
+	u32 ctx_rsvd = context_reserved_size();
+	u32 guc_wopcm_base;
+	u32 guc_wopcm_size;
+	bool locked;
+	int ret = 0;
+
+	if (!guc_fw_size)
+		return -EINVAL;
+
+	wopcm->size = xe_wopcm_size(xe);
+	drm_dbg(&xe->drm, "WOPCM: %uK\n", wopcm->size / SZ_1K);
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+	XE_BUG_ON(guc_fw_size >= wopcm->size);
+	XE_BUG_ON(huc_fw_size >= wopcm->size);
+	XE_BUG_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size);
+
+	locked = __wopcm_regs_locked(gt, &guc_wopcm_base, &guc_wopcm_size);
+	if (locked) {
+		drm_dbg(&xe->drm, "GuC WOPCM is already locked [%uK, %uK)\n",
+			guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
+		goto check;
+	}
+
+	/*
+	 * Aligned value of guc_wopcm_base will determine available WOPCM space
+	 * for HuC firmware and mandatory reserved area.
+	 */
+	guc_wopcm_base = huc_fw_size + WOPCM_RESERVED_SIZE;
+	guc_wopcm_base = ALIGN(guc_wopcm_base, GUC_WOPCM_OFFSET_ALIGNMENT);
+
+	/*
+	 * Need to clamp guc_wopcm_base now to make sure the following math is
+	 * correct. Formal check of whole WOPCM layout will be done below.
+	 */
+	guc_wopcm_base = min(guc_wopcm_base, wopcm->size - ctx_rsvd);
+
+	/* Aligned remainings of usable WOPCM space can be assigned to GuC. */
+	guc_wopcm_size = wopcm->size - ctx_rsvd - guc_wopcm_base;
+	guc_wopcm_size &= GUC_WOPCM_SIZE_MASK;
+
+	drm_dbg(&xe->drm, "Calculated GuC WOPCM [%uK, %uK)\n",
+		guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
+
+check:
+	if (__check_layout(xe, wopcm->size, guc_wopcm_base, guc_wopcm_size,
+			   guc_fw_size, huc_fw_size)) {
+		wopcm->guc.base = guc_wopcm_base;
+		wopcm->guc.size = guc_wopcm_size;
+		XE_BUG_ON(!wopcm->guc.base);
+		XE_BUG_ON(!wopcm->guc.size);
+	} else {
+		drm_notice(&xe->drm, "Unsuccessful WOPCM partitioning\n");
+		return -E2BIG;
+	}
+
+	if (!locked)
+		ret = __wopcm_init_regs(xe, gt, wopcm);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_wopcm.h b/drivers/gpu/drm/xe/xe_wopcm.h
new file mode 100644
index 000000000000..0197a282460b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WOPCM_H_
+#define _XE_WOPCM_H_
+
+#include "xe_wopcm_types.h"
+
+struct xe_device;
+
+int xe_wopcm_init(struct xe_wopcm *wopcm);
+u32 xe_wopcm_size(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wopcm_types.h b/drivers/gpu/drm/xe/xe_wopcm_types.h
new file mode 100644
index 000000000000..486d850c4084
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm_types.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WOPCM_TYPES_H_
+#define _XE_WOPCM_TYPES_H_
+
+#include <linux/types.h>
+
+/**
+ * struct xe_wopcm - Overall WOPCM info and WOPCM regions.
+ */
+struct xe_wopcm {
+	/** @size: Size of overall WOPCM */
+	u32 size;
+	/** @guc: GuC WOPCM Region info */
+	struct {
+		/** @base: GuC WOPCM base which is offset from WOPCM base */
+		u32 base;
+		/** @size: Size of the GuC WOPCM region */
+		u32 size;
+	} guc;
+};
+
+#endif
diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h
new file mode 100644
index 000000000000..e539594ed939
--- /dev/null
+++ b/include/drm/xe_pciids.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PCIIDS_H_
+#define _XE_PCIIDS_H_
+
+/*
+ * Lists below can be turned into initializers for a struct pci_device_id
+ * by defining INTEL_VGA_DEVICE:
+ *
+ * #define INTEL_VGA_DEVICE(id, info) { \
+ *	0x8086, id,			\
+ *	~0, ~0,				\
+ *	0x030000, 0xff0000,		\
+ *	(unsigned long) info }
+ *
+ * And then calling like:
+ *
+ * XE_TGL_12_GT1_IDS(INTEL_VGA_DEVICE, ## __VA_ARGS__)
+ *
+ * To turn them into something else, just provide a different macro passed as
+ * first argument.
+ */
+
+/* TGL */
+#define XE_TGL_GT1_IDS(MACRO__, ...)		\
+	MACRO__(0x9A60, ## __VA_ARGS__),	\
+	MACRO__(0x9A68, ## __VA_ARGS__),	\
+	MACRO__(0x9A70, ## __VA_ARGS__)
+
+#define XE_TGL_GT2_IDS(MACRO__, ...)		\
+	MACRO__(0x9A40, ## __VA_ARGS__),	\
+	MACRO__(0x9A49, ## __VA_ARGS__),	\
+	MACRO__(0x9A59, ## __VA_ARGS__),	\
+	MACRO__(0x9A78, ## __VA_ARGS__),	\
+	MACRO__(0x9AC0, ## __VA_ARGS__),	\
+	MACRO__(0x9AC9, ## __VA_ARGS__),	\
+	MACRO__(0x9AD9, ## __VA_ARGS__),	\
+	MACRO__(0x9AF8, ## __VA_ARGS__)
+
+#define XE_TGL_IDS(MACRO__, ...)		\
+	XE_TGL_GT1_IDS(MACRO__, ...),		\
+	XE_TGL_GT2_IDS(MACRO__, ...)
+
+/* RKL */
+#define XE_RKL_IDS(MACRO__, ...)		\
+	MACRO__(0x4C80, ## __VA_ARGS__),	\
+	MACRO__(0x4C8A, ## __VA_ARGS__),	\
+	MACRO__(0x4C8B, ## __VA_ARGS__),	\
+	MACRO__(0x4C8C, ## __VA_ARGS__),	\
+	MACRO__(0x4C90, ## __VA_ARGS__),	\
+	MACRO__(0x4C9A, ## __VA_ARGS__)
+
+/* DG1 */
+#define XE_DG1_IDS(MACRO__, ...)		\
+	MACRO__(0x4905, ## __VA_ARGS__),	\
+	MACRO__(0x4906, ## __VA_ARGS__),	\
+	MACRO__(0x4907, ## __VA_ARGS__),	\
+	MACRO__(0x4908, ## __VA_ARGS__),	\
+	MACRO__(0x4909, ## __VA_ARGS__)
+
+/* ADL-S */
+#define XE_ADLS_IDS(MACRO__, ...)		\
+	MACRO__(0x4680, ## __VA_ARGS__),	\
+	MACRO__(0x4682, ## __VA_ARGS__),	\
+	MACRO__(0x4688, ## __VA_ARGS__),	\
+	MACRO__(0x468A, ## __VA_ARGS__),	\
+	MACRO__(0x4690, ## __VA_ARGS__),	\
+	MACRO__(0x4692, ## __VA_ARGS__),	\
+	MACRO__(0x4693, ## __VA_ARGS__)
+
+/* ADL-P */
+#define XE_ADLP_IDS(MACRO__, ...)		\
+	MACRO__(0x46A0, ## __VA_ARGS__),	\
+	MACRO__(0x46A1, ## __VA_ARGS__),	\
+	MACRO__(0x46A2, ## __VA_ARGS__),	\
+	MACRO__(0x46A3, ## __VA_ARGS__),	\
+	MACRO__(0x46A6, ## __VA_ARGS__),	\
+	MACRO__(0x46A8, ## __VA_ARGS__),	\
+	MACRO__(0x46AA, ## __VA_ARGS__),	\
+	MACRO__(0x462A, ## __VA_ARGS__),	\
+	MACRO__(0x4626, ## __VA_ARGS__),	\
+	MACRO__(0x4628, ## __VA_ARGS__),	\
+	MACRO__(0x46B0, ## __VA_ARGS__),	\
+	MACRO__(0x46B1, ## __VA_ARGS__),	\
+	MACRO__(0x46B2, ## __VA_ARGS__),	\
+	MACRO__(0x46B3, ## __VA_ARGS__),	\
+	MACRO__(0x46C0, ## __VA_ARGS__),	\
+	MACRO__(0x46C1, ## __VA_ARGS__),	\
+	MACRO__(0x46C2, ## __VA_ARGS__),	\
+	MACRO__(0x46C3, ## __VA_ARGS__)
+
+/* ADL-N */
+#define XE_ADLN_IDS(MACRO__, ...)		\
+	MACRO__(0x46D0, ## __VA_ARGS__),	\
+	MACRO__(0x46D1, ## __VA_ARGS__),	\
+	MACRO__(0x46D2, ## __VA_ARGS__)
+
+/* RPL-S */
+#define XE_RPLS_IDS(MACRO__, ...)		\
+	MACRO__(0xA780, ## __VA_ARGS__),	\
+	MACRO__(0xA781, ## __VA_ARGS__),	\
+	MACRO__(0xA782, ## __VA_ARGS__),	\
+	MACRO__(0xA783, ## __VA_ARGS__),	\
+	MACRO__(0xA788, ## __VA_ARGS__),	\
+	MACRO__(0xA789, ## __VA_ARGS__),	\
+	MACRO__(0xA78A, ## __VA_ARGS__),	\
+	MACRO__(0xA78B, ## __VA_ARGS__)
+
+/* RPL-U */
+#define XE_RPLU_IDS(MACRO__, ...)		\
+	MACRO__(0xA721, ## __VA_ARGS__),	\
+	MACRO__(0xA7A1, ## __VA_ARGS__),	\
+	MACRO__(0xA7A9, ## __VA_ARGS__)
+
+/* RPL-P */
+#define XE_RPLP_IDS(MACRO__, ...)		\
+	MACRO__(0xA720, ## __VA_ARGS__),	\
+	MACRO__(0xA7A0, ## __VA_ARGS__),	\
+	MACRO__(0xA7A8, ## __VA_ARGS__)
+
+/* DG2 */
+#define XE_DG2_G10_IDS(MACRO__, ...)		\
+	MACRO__(0x5690, ## __VA_ARGS__),	\
+	MACRO__(0x5691, ## __VA_ARGS__),	\
+	MACRO__(0x5692, ## __VA_ARGS__),	\
+	MACRO__(0x56A0, ## __VA_ARGS__),	\
+	MACRO__(0x56A1, ## __VA_ARGS__),	\
+	MACRO__(0x56A2, ## __VA_ARGS__)
+
+#define XE_DG2_G11_IDS(MACRO__, ...)		\
+	MACRO__(0x5693, ## __VA_ARGS__),	\
+	MACRO__(0x5694, ## __VA_ARGS__),	\
+	MACRO__(0x5695, ## __VA_ARGS__),	\
+	MACRO__(0x5698, ## __VA_ARGS__),	\
+	MACRO__(0x56A5, ## __VA_ARGS__),	\
+	MACRO__(0x56A6, ## __VA_ARGS__),	\
+	MACRO__(0x56B0, ## __VA_ARGS__),	\
+	MACRO__(0x56B1, ## __VA_ARGS__)
+
+#define XE_DG2_G12_IDS(MACRO__, ...)		\
+	MACRO__(0x5696, ## __VA_ARGS__),	\
+	MACRO__(0x5697, ## __VA_ARGS__),	\
+	MACRO__(0x56A3, ## __VA_ARGS__),	\
+	MACRO__(0x56A4, ## __VA_ARGS__),	\
+	MACRO__(0x56B2, ## __VA_ARGS__),	\
+	MACRO__(0x56B3, ## __VA_ARGS__)
+
+#define XE_DG2_IDS(MACRO__, ...)		\
+	XE_DG2_G10_IDS(MACRO__, ## __VA_ARGS__),\
+	XE_DG2_G11_IDS(MACRO__, ## __VA_ARGS__),\
+	XE_DG2_G12_IDS(MACRO__, ## __VA_ARGS__)
+
+#define XE_ATS_M150_IDS(MACRO__, ...)		\
+	MACRO__(0x56C0, ## __VA_ARGS__)
+
+#define XE_ATS_M75_IDS(MACRO__, ...)		\
+	MACRO__(0x56C1, ## __VA_ARGS__)
+
+#define XE_ATS_M_IDS(MACRO__, ...)		\
+	XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\
+	XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__)
+
+/* MTL */
+#define XE_MTL_M_IDS(MACRO__, ...)		\
+	MACRO__(0x7D40, ## __VA_ARGS__),	\
+	MACRO__(0x7D43, ## __VA_ARGS__),	\
+	MACRO__(0x7DC0, ## __VA_ARGS__)
+
+#define XE_MTL_P_IDS(MACRO__, ...)		\
+	MACRO__(0x7D45, ## __VA_ARGS__),	\
+	MACRO__(0x7D47, ## __VA_ARGS__),	\
+	MACRO__(0x7D50, ## __VA_ARGS__),	\
+	MACRO__(0x7D55, ## __VA_ARGS__),	\
+	MACRO__(0x7DC5, ## __VA_ARGS__),	\
+	MACRO__(0x7DD0, ## __VA_ARGS__),	\
+	MACRO__(0x7DD5, ## __VA_ARGS__)
+
+#define XE_MTL_S_IDS(MACRO__, ...)		\
+	MACRO__(0x7D60, ## __VA_ARGS__),	\
+	MACRO__(0x7DE0, ## __VA_ARGS__)
+
+#define XE_ARL_IDS(MACRO__, ...)		\
+	MACRO__(0x7D66, ## __VA_ARGS__),	\
+	MACRO__(0x7D76, ## __VA_ARGS__)
+
+#define XE_MTL_IDS(MACRO__, ...)		\
+	XE_MTL_M_IDS(MACRO__, ## __VA_ARGS__),	\
+	XE_MTL_P_IDS(MACRO__, ## __VA_ARGS__),	\
+	XE_MTL_S_IDS(MACRO__, ## __VA_ARGS__),	\
+	XE_ARL_IDS(MACRO__, ## __VA_ARGS__)
+
+#endif
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
new file mode 100644
index 000000000000..f64b1c785fad
--- /dev/null
+++ b/include/uapi/drm/xe_drm.h
@@ -0,0 +1,787 @@
+/*
+ * Copyright 2021 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _UAPI_XE_DRM_H_
+#define _UAPI_XE_DRM_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Please note that modifications to all structs defined here are
+ * subject to backwards-compatibility constraints.
+ */
+
+/**
+ * struct i915_user_extension - Base class for defining a chain of extensions
+ *
+ * Many interfaces need to grow over time. In most cases we can simply
+ * extend the struct and have userspace pass in more data. Another option,
+ * as demonstrated by Vulkan's approach to providing extensions for forward
+ * and backward compatibility, is to use a list of optional structs to
+ * provide those extra details.
+ *
+ * The key advantage to using an extension chain is that it allows us to
+ * redefine the interface more easily than an ever growing struct of
+ * increasing complexity, and for large parts of that interface to be
+ * entirely optional. The downside is more pointer chasing; chasing across
+ * the __user boundary with pointers encapsulated inside u64.
+ *
+ * Example chaining:
+ *
+ * .. code-block:: C
+ *
+ *	struct i915_user_extension ext3 {
+ *		.next_extension = 0, // end
+ *		.name = ...,
+ *	};
+ *	struct i915_user_extension ext2 {
+ *		.next_extension = (uintptr_t)&ext3,
+ *		.name = ...,
+ *	};
+ *	struct i915_user_extension ext1 {
+ *		.next_extension = (uintptr_t)&ext2,
+ *		.name = ...,
+ *	};
+ *
+ * Typically the struct i915_user_extension would be embedded in some uAPI
+ * struct, and in this case we would feed it the head of the chain(i.e ext1),
+ * which would then apply all of the above extensions.
+ *
+ */
+struct xe_user_extension {
+	/**
+	 * @next_extension:
+	 *
+	 * Pointer to the next struct i915_user_extension, or zero if the end.
+	 */
+	__u64 next_extension;
+	/**
+	 * @name: Name of the extension.
+	 *
+	 * Note that the name here is just some integer.
+	 *
+	 * Also note that the name space for this is not global for the whole
+	 * driver, but rather its scope/meaning is limited to the specific piece
+	 * of uAPI which has embedded the struct i915_user_extension.
+	 */
+	__u32 name;
+	/**
+	 * @flags: MBZ
+	 *
+	 * All undefined bits must be zero.
+	 */
+	__u32 pad;
+};
+
+/*
+ * i915 specific ioctls.
+ *
+ * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie
+ * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset
+ * against DRM_COMMAND_BASE and should be between [0x0, 0x60).
+ */
+#define DRM_XE_DEVICE_QUERY		0x00
+#define DRM_XE_GEM_CREATE		0x01
+#define DRM_XE_GEM_MMAP_OFFSET		0x02
+#define DRM_XE_VM_CREATE		0x03
+#define DRM_XE_VM_DESTROY		0x04
+#define DRM_XE_VM_BIND			0x05
+#define DRM_XE_ENGINE_CREATE		0x06
+#define DRM_XE_ENGINE_DESTROY		0x07
+#define DRM_XE_EXEC			0x08
+#define DRM_XE_MMIO			0x09
+#define DRM_XE_ENGINE_SET_PROPERTY	0x0a
+#define DRM_XE_WAIT_USER_FENCE		0x0b
+#define DRM_XE_VM_MADVISE		0x0c
+
+/* Must be kept compact -- no holes */
+#define DRM_IOCTL_XE_DEVICE_QUERY		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
+#define DRM_IOCTL_XE_GEM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create)
+#define DRM_IOCTL_XE_GEM_MMAP_OFFSET		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_MMAP_OFFSET, struct drm_xe_gem_mmap_offset)
+#define DRM_IOCTL_XE_VM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create)
+#define DRM_IOCTL_XE_VM_DESTROY			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy)
+#define DRM_IOCTL_XE_VM_BIND			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind)
+#define DRM_IOCTL_XE_ENGINE_CREATE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_CREATE, struct drm_xe_engine_create)
+#define DRM_IOCTL_XE_ENGINE_DESTROY		DRM_IOW( DRM_COMMAND_BASE + DRM_XE_ENGINE_DESTROY, struct drm_xe_engine_destroy)
+#define DRM_IOCTL_XE_EXEC			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
+#define DRM_IOCTL_XE_MMIO			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_MMIO, struct drm_xe_mmio)
+#define DRM_IOCTL_XE_ENGINE_SET_PROPERTY	DRM_IOW( DRM_COMMAND_BASE + DRM_XE_ENGINE_SET_PROPERTY, struct drm_xe_engine_set_property)
+#define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
+#define DRM_IOCTL_XE_VM_MADVISE			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
+
+struct drm_xe_engine_class_instance {
+	__u16 engine_class;
+
+#define DRM_XE_ENGINE_CLASS_RENDER		0
+#define DRM_XE_ENGINE_CLASS_COPY		1
+#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE	2
+#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE	3
+#define DRM_XE_ENGINE_CLASS_COMPUTE		4
+	/*
+	 * Kernel only class (not actual hardware engine class). Used for
+	 * creating ordered queues of VM bind operations.
+	 */
+#define DRM_XE_ENGINE_CLASS_VM_BIND		5
+
+	__u16 engine_instance;
+	__u16 gt_id;
+};
+
+#define XE_MEM_REGION_CLASS_SYSMEM	0
+#define XE_MEM_REGION_CLASS_VRAM	1
+
+struct drm_xe_query_mem_usage {
+	__u32 num_regions;
+	__u32 pad;
+
+	struct drm_xe_query_mem_region {
+		__u16 mem_class;
+		__u16 instance;	/* unique ID even among different classes */
+		__u32 pad;
+		__u32 min_page_size;
+		__u32 max_page_size;
+		__u64 total_size;
+		__u64 used;
+		__u64 reserved[8];
+	} regions[];
+};
+
+struct drm_xe_query_config {
+	__u32 num_params;
+	__u32 pad;
+#define XE_QUERY_CONFIG_REV_AND_DEVICE_ID	0
+#define XE_QUERY_CONFIG_FLAGS			1
+	#define XE_QUERY_CONFIG_FLAGS_HAS_VRAM		(0x1 << 0)
+	#define XE_QUERY_CONFIG_FLAGS_USE_GUC		(0x1 << 1)
+#define XE_QUERY_CONFIG_MIN_ALIGNEMENT		2
+#define XE_QUERY_CONFIG_VA_BITS			3
+#define XE_QUERY_CONFIG_GT_COUNT		4
+#define XE_QUERY_CONFIG_MEM_REGION_COUNT	5
+#define XE_QUERY_CONFIG_NUM_PARAM		XE_QUERY_CONFIG_MEM_REGION_COUNT + 1
+	__u64 info[];
+};
+
+struct drm_xe_query_gts {
+	__u32 num_gt;
+	__u32 pad;
+
+	/*
+	 * TODO: Perhaps info about every mem region relative to this GT? e.g.
+	 * bandwidth between this GT and remote region?
+	 */
+
+	struct drm_xe_query_gt {
+#define XE_QUERY_GT_TYPE_MAIN		0
+#define XE_QUERY_GT_TYPE_REMOTE		1
+#define XE_QUERY_GT_TYPE_MEDIA		2
+		__u16 type;
+		__u16 instance;
+		__u32 clock_freq;
+		__u64 features;
+		__u64 native_mem_regions;	/* bit mask of instances from drm_xe_query_mem_usage */
+		__u64 slow_mem_regions;		/* bit mask of instances from drm_xe_query_mem_usage */
+		__u64 inaccessible_mem_regions;	/* bit mask of instances from drm_xe_query_mem_usage */
+		__u64 reserved[8];
+	} gts[];
+};
+
+struct drm_xe_query_topology_mask {
+	/** @gt_id: GT ID the mask is associated with */
+	__u16 gt_id;
+
+	/** @type: type of mask */
+	__u16 type;
+#define XE_TOPO_DSS_GEOMETRY	(1 << 0)
+#define XE_TOPO_DSS_COMPUTE	(1 << 1)
+#define XE_TOPO_EU_PER_DSS	(1 << 2)
+
+	/** @num_bytes: number of bytes in requested mask */
+	__u32 num_bytes;
+
+	/** @mask: little-endian mask of @num_bytes */
+	__u8 mask[];
+};
+
+struct drm_xe_device_query {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @query: The type of data to query */
+	__u32 query;
+
+#define DRM_XE_DEVICE_QUERY_ENGINES	0
+#define DRM_XE_DEVICE_QUERY_MEM_USAGE	1
+#define DRM_XE_DEVICE_QUERY_CONFIG	2
+#define DRM_XE_DEVICE_QUERY_GTS		3
+#define DRM_XE_DEVICE_QUERY_HWCONFIG	4
+#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY	5
+
+	/** @size: Size of the queried data */
+	__u32 size;
+
+	/** @data: Queried data is placed here */
+	__u64 data;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_gem_create {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/**
+	 * @size: Requested size for the object
+	 *
+	 * The (page-aligned) allocated size for the object will be returned.
+	 */
+	__u64 size;
+
+	/**
+	 * @flags: Flags, currently a mask of memory instances of where BO can
+	 * be placed
+	 */
+#define XE_GEM_CREATE_FLAG_DEFER_BACKING	(0x1 << 24)
+#define XE_GEM_CREATE_FLAG_SCANOUT		(0x1 << 25)
+	__u32 flags;
+
+	/**
+	 * @vm_id: Attached VM, if any
+	 *
+	 * If a VM is specified, this BO must:
+	 *
+	 *  1. Only ever be bound to that VM.
+	 *
+	 *  2. Cannot be exported as a PRIME fd.
+	 */
+	__u32 vm_id;
+
+	/**
+	 * @handle: Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_gem_mmap_offset {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @handle: Handle for the object being mapped. */
+	__u32 handle;
+
+	/** @flags: Must be zero */
+	__u32 flags;
+
+	/** @offset: The fake offset to use for subsequent mmap call */
+	__u64 offset;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_vm_bind_op_error_capture - format of VM bind op error capture
+ */
+struct drm_xe_vm_bind_op_error_capture {
+	/** @error: errno that occured */
+	__s32 error;
+	/** @op: operation that encounter an error */
+	__u32 op;
+	/** @addr: address of bind op */
+	__u64 addr;
+	/** @size: size of bind */
+	__u64 size;
+};
+
+/** struct drm_xe_ext_vm_set_property - VM set property extension */
+struct drm_xe_ext_vm_set_property {
+	/** @base: base user extension */
+	struct xe_user_extension base;
+
+	/** @property: property to set */
+#define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS		0
+	__u32 property;
+
+	/** @value: property value */
+	__u64 value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_vm_create {
+	/** @extensions: Pointer to the first extension struct, if any */
+#define XE_VM_EXTENSION_SET_PROPERTY	0
+	__u64 extensions;
+
+	/** @flags: Flags */
+	__u32 flags;
+
+#define DRM_XE_VM_CREATE_SCRATCH_PAGE	(0x1 << 0)
+#define DRM_XE_VM_CREATE_COMPUTE_MODE	(0x1 << 1)
+#define DRM_XE_VM_CREATE_ASYNC_BIND_OPS	(0x1 << 2)
+#define DRM_XE_VM_CREATE_FAULT_MODE	(0x1 << 3)
+
+	/** @vm_id: Returned VM ID */
+	__u32 vm_id;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_vm_destroy {
+	/** @vm_id: VM ID */
+	__u32 vm_id;
+
+	/** @pad: MBZ */
+	__u32 pad;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_vm_bind_op {
+	/**
+	 * @obj: GEM object to operate on, MBZ for MAP_USERPTR, MBZ for UNMAP
+	 */
+	__u32 obj;
+
+	union {
+		/**
+		 * @obj_offset: Offset into the object, MBZ for CLEAR_RANGE,
+		 * ignored for unbind
+		 */
+		__u64 obj_offset;
+		/** @userptr: user pointer to bind on */
+		__u64 userptr;
+	};
+
+	/**
+	 * @range: Number of bytes from the object to bind to addr, MBZ for UNMAP_ALL
+	 */
+	__u64 range;
+
+	/** @addr: Address to operate on, MBZ for UNMAP_ALL */
+	__u64 addr;
+
+	/**
+	 * @gt_mask: Mask for which GTs to create binds for, 0 == All GTs,
+	 * only applies to creating new VMAs
+	 */
+	__u64 gt_mask;
+
+	/** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */
+	__u32 op;
+
+	/** @mem_region: Memory region to prefetch VMA to, instance not a mask */
+	__u32 region;
+
+#define XE_VM_BIND_OP_MAP		0x0
+#define XE_VM_BIND_OP_UNMAP		0x1
+#define XE_VM_BIND_OP_MAP_USERPTR	0x2
+#define XE_VM_BIND_OP_RESTART		0x3
+#define XE_VM_BIND_OP_UNMAP_ALL		0x4
+#define XE_VM_BIND_OP_PREFETCH		0x5
+
+#define XE_VM_BIND_FLAG_READONLY	(0x1 << 16)
+	/*
+	 * A bind ops completions are always async, hence the support for out
+	 * sync. This flag indicates the allocation of the memory for new page
+	 * tables and the job to program the pages tables is asynchronous
+	 * relative to the IOCTL. That part of a bind operation can fail under
+	 * memory pressure, the job in practice can't fail unless the system is
+	 * totally shot.
+	 *
+	 * If this flag is clear and the IOCTL doesn't return an error, in
+	 * practice the bind op is good and will complete.
+	 *
+	 * If this flag is set and doesn't return return an error, the bind op
+	 * can still fail and recovery is needed. If configured, the bind op that
+	 * caused the error will be captured in drm_xe_vm_bind_op_error_capture.
+	 * Once the user sees the error (via a ufence +
+	 * XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS), it should free memory
+	 * via non-async unbinds, and then restart all queue'd async binds op via
+	 * XE_VM_BIND_OP_RESTART. Or alternatively the user should destroy the
+	 * VM.
+	 *
+	 * This flag is only allowed when DRM_XE_VM_CREATE_ASYNC_BIND_OPS is
+	 * configured in the VM and must be set if the VM is configured with
+	 * DRM_XE_VM_CREATE_ASYNC_BIND_OPS and not in an error state.
+	 */
+#define XE_VM_BIND_FLAG_ASYNC		(0x1 << 17)
+	/*
+	 * Valid on a faulting VM only, do the MAP operation immediately rather
+	 * than differing the MAP to the page fault handler.
+	 */
+#define XE_VM_BIND_FLAG_IMMEDIATE	(0x1 << 18)
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_vm_bind {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @vm_id: The ID of the VM to bind to */
+	__u32 vm_id;
+
+	/**
+	 * @engine_id: engine_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND
+	 * and engine must have same vm_id. If zero, the default VM bind engine
+	 * is used.
+	 */
+	__u32 engine_id;
+
+	/** @num_binds: number of binds in this IOCTL */
+	__u32 num_binds;
+
+	union {
+		/** @bind: used if num_binds == 1 */
+		struct drm_xe_vm_bind_op bind;
+		/**
+		 * @vector_of_binds: userptr to array of struct
+		 * drm_xe_vm_bind_op if num_binds > 1
+		 */
+		__u64 vector_of_binds;
+	};
+
+	/** @num_syncs: amount of syncs to wait on */
+	__u32 num_syncs;
+
+	/** @syncs: pointer to struct drm_xe_sync array */
+	__u64 syncs;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/** struct drm_xe_ext_engine_set_property - engine set property extension */
+struct drm_xe_ext_engine_set_property {
+	/** @base: base user extension */
+	struct xe_user_extension base;
+
+	/** @property: property to set */
+	__u32 property;
+
+	/** @value: property value */
+	__u64 value;
+};
+
+/**
+ * struct drm_xe_engine_set_property - engine set property
+ *
+ * Same namespace for extensions as drm_xe_engine_create
+ */
+struct drm_xe_engine_set_property {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @engine_id: Engine ID */
+	__u32 engine_id;
+
+	/** @property: property to set */
+#define XE_ENGINE_PROPERTY_PRIORITY			0
+#define XE_ENGINE_PROPERTY_TIMESLICE			1
+#define XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT		2
+	/*
+	 * Long running or ULLS engine mode. DMA fences not allowed in this
+	 * mode. Must match the value of DRM_XE_VM_CREATE_COMPUTE_MODE, serves
+	 * as a sanity check the UMD knows what it is doing. Can only be set at
+	 * engine create time.
+	 */
+#define XE_ENGINE_PROPERTY_COMPUTE_MODE			3
+#define XE_ENGINE_PROPERTY_PERSISTENCE			4
+#define XE_ENGINE_PROPERTY_JOB_TIMEOUT			5
+#define XE_ENGINE_PROPERTY_ACC_TRIGGER			6
+#define XE_ENGINE_PROPERTY_ACC_NOTIFY			7
+#define XE_ENGINE_PROPERTY_ACC_GRANULARITY		8
+	__u32 property;
+
+	/** @value: property value */
+	__u64 value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_engine_create {
+	/** @extensions: Pointer to the first extension struct, if any */
+#define XE_ENGINE_EXTENSION_SET_PROPERTY               0
+	__u64 extensions;
+
+	/** @width: submission width (number BB per exec) for this engine */
+	__u16 width;
+
+	/** @num_placements: number of valid placements for this engine */
+	__u16 num_placements;
+
+	/** @vm_id: VM to use for this engine */
+	__u32 vm_id;
+
+	/** @flags: MBZ */
+	__u32 flags;
+
+	/** @engine_id: Returned engine ID */
+	__u32 engine_id;
+
+	/**
+	 * @instances: user pointer to a 2-d array of struct
+	 * drm_xe_engine_class_instance
+	 *
+	 * length = width (i) * num_placements (j)
+	 * index = j + i * width
+	 */
+	__u64 instances;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_engine_destroy {
+	/** @vm_id: VM ID */
+	__u32 engine_id;
+
+	/** @pad: MBZ */
+	__u32 pad;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_sync {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	__u32 flags;
+
+#define DRM_XE_SYNC_SYNCOBJ		0x0
+#define DRM_XE_SYNC_TIMELINE_SYNCOBJ	0x1
+#define DRM_XE_SYNC_DMA_BUF		0x2
+#define DRM_XE_SYNC_USER_FENCE		0x3
+#define DRM_XE_SYNC_SIGNAL		0x10
+
+	union {
+		__u32 handle;
+		/**
+		 * @addr: Address of user fence. When sync passed in via exec
+		 * IOCTL this a GPU address in the VM. When sync passed in via
+		 * VM bind IOCTL this is a user pointer. In either case, it is
+		 * the users responsibility that this address is present and
+		 * mapped when the user fence is signalled. Must be qword
+		 * aligned.
+		 */
+		__u64 addr;
+	};
+
+	__u64 timeline_value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_exec {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @engine_id: Engine ID for the batch buffer */
+	__u32 engine_id;
+
+	/** @num_syncs: Amount of struct drm_xe_sync in array. */
+	__u32 num_syncs;
+
+	/** @syncs: Pointer to struct drm_xe_sync array. */
+	__u64 syncs;
+
+	/**
+	  * @address: address of batch buffer if num_batch_buffer == 1 or an
+	  * array of batch buffer addresses
+	  */
+	__u64 address;
+
+	/**
+	 * @num_batch_buffer: number of batch buffer in this exec, must match
+	 * the width of the engine
+	 */
+	__u16 num_batch_buffer;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_mmio {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	__u32 addr;
+
+	__u32 flags;
+
+#define DRM_XE_MMIO_8BIT	0x0
+#define DRM_XE_MMIO_16BIT	0x1
+#define DRM_XE_MMIO_32BIT	0x2
+#define DRM_XE_MMIO_64BIT	0x3
+#define DRM_XE_MMIO_BITS_MASK	0x3
+#define DRM_XE_MMIO_READ	0x4
+#define DRM_XE_MMIO_WRITE	0x8
+
+	__u64 value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_wait_user_fence - wait user fence
+ *
+ * Wait on user fence, XE will wakeup on every HW engine interrupt in the
+ * instances list and check if user fence is complete:
+ * (*addr & MASK) OP (VALUE & MASK)
+ *
+ * Returns to user on user fence completion or timeout.
+ */
+struct drm_xe_wait_user_fence {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+	union {
+		/**
+		 * @addr: user pointer address to wait on, must qword aligned
+		 */
+		__u64 addr;
+		/**
+		 * @vm_id: The ID of the VM which encounter an error used with
+		 * DRM_XE_UFENCE_WAIT_VM_ERROR. Upper 32 bits must be clear.
+		 */
+		__u64 vm_id;
+	};
+	/** @op: wait operation (type of comparison) */
+#define DRM_XE_UFENCE_WAIT_EQ	0
+#define DRM_XE_UFENCE_WAIT_NEQ	1
+#define DRM_XE_UFENCE_WAIT_GT	2
+#define DRM_XE_UFENCE_WAIT_GTE	3
+#define DRM_XE_UFENCE_WAIT_LT	4
+#define DRM_XE_UFENCE_WAIT_LTE	5
+	__u16 op;
+	/** @flags: wait flags */
+#define DRM_XE_UFENCE_WAIT_SOFT_OP	(1 << 0)	/* e.g. Wait on VM bind */
+#define DRM_XE_UFENCE_WAIT_ABSTIME	(1 << 1)
+#define DRM_XE_UFENCE_WAIT_VM_ERROR	(1 << 2)
+	__u16 flags;
+	/** @value: compare value */
+	__u64 value;
+	/** @mask: comparison mask */
+#define DRM_XE_UFENCE_WAIT_U8		0xffu
+#define DRM_XE_UFENCE_WAIT_U16		0xffffu
+#define DRM_XE_UFENCE_WAIT_U32		0xffffffffu
+#define DRM_XE_UFENCE_WAIT_U64		0xffffffffffffffffu
+	__u64 mask;
+	/** @timeout: how long to wait before bailing, value in jiffies */
+	__s64 timeout;
+	/**
+	 * @num_engines: number of engine instances to wait on, must be zero
+	 * when DRM_XE_UFENCE_WAIT_SOFT_OP set
+	 */
+	__u64 num_engines;
+	/**
+	 * @instances: user pointer to array of drm_xe_engine_class_instance to
+	 * wait on, must be NULL when DRM_XE_UFENCE_WAIT_SOFT_OP set
+	 */
+	__u64 instances;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_vm_madvise {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @vm_id: The ID VM in which the VMA exists */
+	__u32 vm_id;
+
+	/** @range: Number of bytes in the VMA */
+	__u64 range;
+
+	/** @addr: Address of the VMA to operation on */
+	__u64 addr;
+
+	/*
+	 * Setting the preferred location will trigger a migrate of the VMA
+	 * backing store to new location if the backing store is already
+	 * allocated.
+	 */
+#define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS	0
+#define DRM_XE_VM_MADVISE_PREFERRED_GT		1
+	/*
+	 * In this case lower 32 bits are mem class, upper 32 are GT.
+	 * Combination provides a single IOCTL plus migrate VMA to preferred
+	 * location.
+	 */
+#define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT	2
+	/*
+	 * The CPU will do atomic memory operations to this VMA. Must be set on
+	 * some devices for atomics to behave correctly.
+	 */
+#define DRM_XE_VM_MADVISE_CPU_ATOMIC		3
+	/*
+	 * The device will do atomic memory operations to this VMA. Must be set
+	 * on some devices for atomics to behave correctly.
+	 */
+#define DRM_XE_VM_MADVISE_DEVICE_ATOMIC		4
+	/*
+	 * Priority WRT to eviction (moving from preferred memory location due
+	 * to memory pressure). The lower the priority, the more likely to be
+	 * evicted.
+	 */
+#define DRM_XE_VM_MADVISE_PRIORITY		5
+#define		DRM_XE_VMA_PRIORITY_LOW		0
+#define		DRM_XE_VMA_PRIORITY_NORMAL	1	/* Default */
+#define		DRM_XE_VMA_PRIORITY_HIGH	2	/* Must be elevated user */
+	/* Pin the VMA in memory, must be elevated user */
+#define DRM_XE_VM_MADVISE_PIN			6
+
+	/** @property: property to set */
+	__u32 property;
+
+	/** @value: property value */
+	__u64 value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _UAPI_XE_DRM_H_ */