drm-next for 6.8:

new drivers:
 - imagination - new driver for Imagination Technologies GPU
 - xe - new driver for Intel GPUs using core drm concepts
 
 core:
 - add CLOSE_FB ioctl
 - remove old UMS ioctls
 - increase max objects to accomodate AMD color mgmt
 
 encoder:
 - create per-encoder debugfs directory
 
 edid:
 - split out drm_eld
 - SAD helpers
 - drop edid_firmware module parameter
 
 format-helper:
 - cache format conversion buffers
 
 sched:
 - move from kthread to workqueue
 - rename some internals
 - implement dynamic job-flow control
 
 gpuvm:
 - provide more features to handle GEM objects
 
 client:
 - don't acquire module reference
 
 displayport:
 - add mst path property documentation
 
 fdinfo:
 - alignment fix
 
 dma-buf:
 - add fence timestamp helper
 - add fence deadline support
 
 bridge:
 - transparent aux-bridge for DP/USB-C
 - lt8912b: add suspend/resume support and power regulator support
 
 panel:
 - edp: AUO B116XTN02, BOE NT116WHM-N21,836X2, NV116WHM-N49
 - chromebook panel support
 - elida-kd35t133: rework pm
 - powkiddy RK2023 panel
 - himax-hx8394: drop prepare/unprepare and shutdown logic
 - BOE BP101WX1-100, Powkiddy X55, Ampire AM8001280G
 - Evervision VGG644804, SDC ATNA45AF01
 - nv3052c: register docs, init sequence fixes, fascontek FS035VG158
 - st7701: Anbernic RG-ARC support
 - r63353 panel controller
 - Ilitek ILI9805 panel controller
 - AUO G156HAN04.0
 
 simplefb:
 - support memory regions
 - support power domains
 
 amdgpu:
 - add new 64-bit sequence number infrastructure
 - add AMD specific color management
 - ACPI WBRF support for RF interference handling
 - GPUVM updates
 - RAS updates
 - DCN 3.5 updates
 - Rework PCIe link speed handling
 - Document GPU reset types
 - DMUB fixes
 - eDP fixes
 - NBIO 7.9/7.11 updates
 - SubVP updates
 - XGMI PCIe state dumping for aqua vanjaram
 - GFX11 golden register updates
 - enable tunnelling on high pri compute
 
 amdkfd:
 - Migrate TLB flushing logic to amdgpu
 - Trap handler fixes
 - Fix restore workers handling on suspend/resume
 - Fix possible memory leak in pqm_uninit()
 - support import/export of dma-bufs using GEM handles
 
 radeon:
 - fix possible overflows in command buffer checking
 - check for errors in ring_lock
 
 i915:
 - reorg display code for reuse in xe driver
 - fdinfo memory stats printing
 - DP MST bandwidth mgmt improvements
 - DP panel replay enabling
 - MTL C20 phy state verification
 - MTL DP DSC fractional bpp support
 - Audio fastset support
 - use dma_fence interfaces instead of i915_sw_fence
 - Separate gem and display code
 - AUX register macro refactoring
 - Separate display module/device parameters
 - Move display capabilities debugfs under display
 - Makefile cleanups
 - Register cleanups
 - Move display lock inits under display/
 - VLV/CHV DPIO PHY register and interface refactoring
 - DSI VBT sequence refactoring
 - C10/C20 PHY PLL hardware readout
 - DPLL code cleanups
 - Cleanup PXP plane protection checks
 - Improve display debug msgs
 - PSR selective fetch fixes/improvements
 - DP MST fixes
 - Xe2LPD FBC restrictions removed
 - DGFX uses direct VBT pin mapping
 - more MTL WAs
 - fix MTL eDP bug
 - eliminate use of kmap_atomic
 
 habanalabs:
 - sysfs entry to identify a device minor id with debugfs path
 - sysfs entry to expose device module id
 - add signed device info retrieval through INFO ioctl
 - add Gaudi2C device support
 - pcie reset prepare/done hooks
 
 msm:
 - Add support for SDM670, SM8650
 - Handle the CFG interconnect to fix the obscure hangs / timeouts
 - Kconfig fix for QMP dependency
 - use managed allocators
 - DPU: SDM670, SM8650 support
 - DPU: Enable SmartDMA on SM8350 and SM8450
 - DP: enable runtime PM support
 - GPU: add metadata UAPI
 - GPU: move devcoredumps to GPU device
 - GPU: convert to drm_exec
 
 ivpu:
 - update FW API
 - new debugfs file
 - a new NOP job submission test mode
 - improve suspend/resume
 - PM improvements
 - MMU PT optimizations
 - firmware profile frequency support
 - support for uncached buffers
 - switch to gem shmem helpers
 - replace kthread with threaded irqs
 
 rockchip:
 - rk3066_hdmi: convert to atomic
 - vop2: support nv20 and nv30
 - rk3588 support
 
 mediatek:
 - use devm_platform_ioremap_resource
 - stop using iommu_present
 - MT8188 VDOSYS1 display support
 
 panfrost:
 - PM improvements
 - improve interrupt handling as poweroff
 
 qaic:
 - allow to run with single MSI
 - support host/device time sync
 - switch to persistent DRM devices
 
 exynos:
 - fix potential error pointer dereference
 - fix wrong error checking
 - add missing call to drm_atomic_helper_shutdown
 
 omapdrm:
 - dma-fence lockdep annotation fix
 
 tidss:
 - dma-fence lockdep annotation fix
 - support for AM62A7
 
 v3d:
 - BCM2712 - rpi5 support
 - fdinfo + gputop support
 - uapi for CPU job handling
 
 virtio-gpu:
 - add context debug name
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmWeLcQACgkQDHTzWXnE
 hr54zg//dtPiG9nRA3OeoQh/pTmbFO26uhS8OluLiXhcX/7T/c1e6ck4dA3De5kB
 wgaqVH6/TFuMgiBbEqZSFuQM6k2X3HLCgHcCRpiz7iGse2GODLtFiUE/E4XFPrSP
 VhycI64and9XLBmxW87yGdmezVXxo6KZNX4nYabgZ7SD83/2w+ub6rxiAvd0KfSO
 gFmaOrujOIYBjFYFtKLZIYLH4Jzsy81bP0REBzEnAiWYV5qHdsXfvVgwuOU+3G/B
 BAVUUf++SU046QeD3HPEuOp3AqgazF4uNHQH5QL0UD2144uGWsk0LA4OZBnU0qhd
 oM4Oxu9V+TXvRfYhHwiQKeVleifcZBijndqiF7rlrTnNqS4YYOCPxuXzMlZO9aEJ
 6wQL/0JX8d5G6lXsweoBzNC76jeU/gspd1DvyaTFt7I8l8YqWvR5V8l8KRf2s14R
 +CwwujoqMMVmhZ4WhB+FgZTiWw5PaWoMM9ijVFOv8QhXOz21rj718NPdBspvdJK3
 Lo3obSO5p4lqgkMEuINBEXzkHjcSyOmMe1fG4Et8Wr+IrEBr1gfG9E4Twr+3/k3s
 9Ok9nOPykbYmt4gfJp/RDNCWBr8QGZKznP6Nq8EFfIqhEkXOHQo9wtsofVUhyW7P
 qEkCYcYkRa89KFp4Lep6lgDT5O7I+32eRmbRg716qRm9nn3Vj3Y=
 =nuw0
 -----END PGP SIGNATURE-----

Merge tag 'drm-next-2024-01-10' of git://anongit.freedesktop.org/drm/drm

Pull drm updates from Dave Airlie:
 "This contains two major new drivers:

   - imagination is a first driver for Imagination Technologies devices,
     it only covers very specific devices, but there is hope to grow it

   - xe is a reboot of the i915 GPU (shares display) side using a more
     upstream focused development model, and trying to maximise code
     sharing. It's not enabled for any hw by default, and will hopefully
     get switched on for Intel's Lunarlake.

  This also drops a bunch of the old UMS ioctls. It's been dead long
  enough.

  amdgpu has a bunch of new color management code that is being used in
  the Steam Deck.

  amdgpu also has a new ACPI WBRF interaction to help avoid radio
  interference.

  Otherwise it's the usual lots of changes in lots of places.

  Detailed summary:

  new drivers:
   - imagination - new driver for Imagination Technologies GPU
   - xe - new driver for Intel GPUs using core drm concepts

  core:
   - add CLOSE_FB ioctl
   - remove old UMS ioctls
   - increase max objects to accomodate AMD color mgmt

  encoder:
   - create per-encoder debugfs directory

  edid:
   - split out drm_eld
   - SAD helpers
   - drop edid_firmware module parameter

  format-helper:
   - cache format conversion buffers

  sched:
   - move from kthread to workqueue
   - rename some internals
   - implement dynamic job-flow control

  gpuvm:
   - provide more features to handle GEM objects

  client:
   - don't acquire module reference

  displayport:
   - add mst path property documentation

  fdinfo:
   - alignment fix

  dma-buf:
   - add fence timestamp helper
   - add fence deadline support

  bridge:
   - transparent aux-bridge for DP/USB-C
   - lt8912b: add suspend/resume support and power regulator support

  panel:
   - edp: AUO B116XTN02, BOE NT116WHM-N21,836X2, NV116WHM-N49
   - chromebook panel support
   - elida-kd35t133: rework pm
   - powkiddy RK2023 panel
   - himax-hx8394: drop prepare/unprepare and shutdown logic
   - BOE BP101WX1-100, Powkiddy X55, Ampire AM8001280G
   - Evervision VGG644804, SDC ATNA45AF01
   - nv3052c: register docs, init sequence fixes, fascontek FS035VG158
   - st7701: Anbernic RG-ARC support
   - r63353 panel controller
   - Ilitek ILI9805 panel controller
   - AUO G156HAN04.0

  simplefb:
   - support memory regions
   - support power domains

  amdgpu:
   - add new 64-bit sequence number infrastructure
   - add AMD specific color management
   - ACPI WBRF support for RF interference handling
   - GPUVM updates
   - RAS updates
   - DCN 3.5 updates
   - Rework PCIe link speed handling
   - Document GPU reset types
   - DMUB fixes
   - eDP fixes
   - NBIO 7.9/7.11 updates
   - SubVP updates
   - XGMI PCIe state dumping for aqua vanjaram
   - GFX11 golden register updates
   - enable tunnelling on high pri compute

  amdkfd:
   - Migrate TLB flushing logic to amdgpu
   - Trap handler fixes
   - Fix restore workers handling on suspend/resume
   - Fix possible memory leak in pqm_uninit()
   - support import/export of dma-bufs using GEM handles

  radeon:
   - fix possible overflows in command buffer checking
   - check for errors in ring_lock

  i915:
   - reorg display code for reuse in xe driver
   - fdinfo memory stats printing
   - DP MST bandwidth mgmt improvements
   - DP panel replay enabling
   - MTL C20 phy state verification
   - MTL DP DSC fractional bpp support
   - Audio fastset support
   - use dma_fence interfaces instead of i915_sw_fence
   - Separate gem and display code
   - AUX register macro refactoring
   - Separate display module/device parameters
   - Move display capabilities debugfs under display
   - Makefile cleanups
   - Register cleanups
   - Move display lock inits under display/
   - VLV/CHV DPIO PHY register and interface refactoring
   - DSI VBT sequence refactoring
   - C10/C20 PHY PLL hardware readout
   - DPLL code cleanups
   - Cleanup PXP plane protection checks
   - Improve display debug msgs
   - PSR selective fetch fixes/improvements
   - DP MST fixes
   - Xe2LPD FBC restrictions removed
   - DGFX uses direct VBT pin mapping
   - more MTL WAs
   - fix MTL eDP bug
   - eliminate use of kmap_atomic

  habanalabs:
   - sysfs entry to identify a device minor id with debugfs path
   - sysfs entry to expose device module id
   - add signed device info retrieval through INFO ioctl
   - add Gaudi2C device support
   - pcie reset prepare/done hooks

  msm:
   - Add support for SDM670, SM8650
   - Handle the CFG interconnect to fix the obscure hangs / timeouts
   - Kconfig fix for QMP dependency
   - use managed allocators
   - DPU: SDM670, SM8650 support
   - DPU: Enable SmartDMA on SM8350 and SM8450
   - DP: enable runtime PM support
   - GPU: add metadata UAPI
   - GPU: move devcoredumps to GPU device
   - GPU: convert to drm_exec

  ivpu:
   - update FW API
   - new debugfs file
   - a new NOP job submission test mode
   - improve suspend/resume
   - PM improvements
   - MMU PT optimizations
   - firmware profile frequency support
   - support for uncached buffers
   - switch to gem shmem helpers
   - replace kthread with threaded irqs

  rockchip:
   - rk3066_hdmi: convert to atomic
   - vop2: support nv20 and nv30
   - rk3588 support

  mediatek:
   - use devm_platform_ioremap_resource
   - stop using iommu_present
   - MT8188 VDOSYS1 display support

  panfrost:
   - PM improvements
   - improve interrupt handling as poweroff

  qaic:
   - allow to run with single MSI
   - support host/device time sync
   - switch to persistent DRM devices

  exynos:
   - fix potential error pointer dereference
   - fix wrong error checking
   - add missing call to drm_atomic_helper_shutdown

  omapdrm:
   - dma-fence lockdep annotation fix

  tidss:
   - dma-fence lockdep annotation fix
   - support for AM62A7

  v3d:
   - BCM2712 - rpi5 support
   - fdinfo + gputop support
   - uapi for CPU job handling

  virtio-gpu:
   - add context debug name"

* tag 'drm-next-2024-01-10' of git://anongit.freedesktop.org/drm/drm: (2340 commits)
  drm/amd/display: Allow z8/z10 from driver
  drm/amd/display: fix bandwidth validation failure on DCN 2.1
  drm/amdgpu: apply the RV2 system aperture fix to RN/CZN as well
  drm/amd/display: Move fixpt_from_s3132 to amdgpu_dm
  drm/amd/display: Fix recent checkpatch errors in amdgpu_dm
  Revert "drm/amdkfd: Relocate TBA/TMA to opposite side of VM hole"
  drm/amd/display: avoid stringop-overflow warnings for dp_decide_lane_settings()
  drm/amd/display: Fix power_helpers.c codestyle
  drm/amd/display: Fix hdcp_log.h codestyle
  drm/amd/display: Fix hdcp2_execution.c codestyle
  drm/amd/display: Fix hdcp_psp.h codestyle
  drm/amd/display: Fix freesync.c codestyle
  drm/amd/display: Fix hdcp_psp.c codestyle
  drm/amd/display: Fix hdcp1_execution.c codestyle
  drm/amd/pm/smu7: fix a memleak in smu7_hwmgr_backend_init
  drm/amdkfd: Fix iterator used outside loop in 'kfd_add_peer_prop()'
  drm/amdgpu: Drop 'fence' check in 'to_amdgpu_amdkfd_fence()'
  drm/amdkfd: Confirm list is non-empty before utilizing list_first_entry in kfd_topology.c
  drm/amdgpu: Fix '*fw' from request_firmware() not released in 'amdgpu_ucode_request()'
  drm/amdgpu: Fix variable 'mca_funcs' dereferenced before NULL check in 'amdgpu_mca_smu_get_mca_entry()'
  ...
This commit is contained in:
Linus Torvalds 2024-01-12 11:32:19 -08:00
commit cf65598d59
1612 changed files with 132686 additions and 24975 deletions

View File

@ -1,4 +1,4 @@
What: /sys/kernel/debug/accel/<n>/addr
What: /sys/kernel/debug/accel/<parent_device>/addr
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -8,34 +8,34 @@ Description: Sets the device address to be used for read or write through
only when the IOMMU is disabled.
The acceptable value is a string that starts with "0x"
What: /sys/kernel/debug/accel/<n>/clk_gate
What: /sys/kernel/debug/accel/<parent_device>/clk_gate
Date: May 2020
KernelVersion: 5.8
Contact: ogabbay@kernel.org
Description: This setting is now deprecated as clock gating is handled solely by the f/w
What: /sys/kernel/debug/accel/<n>/command_buffers
What: /sys/kernel/debug/accel/<parent_device>/command_buffers
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays a list with information about the currently allocated
command buffers
What: /sys/kernel/debug/accel/<n>/command_submission
What: /sys/kernel/debug/accel/<parent_device>/command_submission
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays a list with information about the currently active
command submissions
What: /sys/kernel/debug/accel/<n>/command_submission_jobs
What: /sys/kernel/debug/accel/<parent_device>/command_submission_jobs
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays a list with detailed information about each JOB (CB) of
each active command submission
What: /sys/kernel/debug/accel/<n>/data32
What: /sys/kernel/debug/accel/<parent_device>/data32
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -50,7 +50,7 @@ Description: Allows the root user to read or write directly through the
If the IOMMU is disabled, it also allows the root user to read
or write from the host a device VA of a host mapped memory
What: /sys/kernel/debug/accel/<n>/data64
What: /sys/kernel/debug/accel/<parent_device>/data64
Date: Jan 2020
KernelVersion: 5.6
Contact: ogabbay@kernel.org
@ -65,7 +65,7 @@ Description: Allows the root user to read or write 64 bit data directly
If the IOMMU is disabled, it also allows the root user to read
or write from the host a device VA of a host mapped memory
What: /sys/kernel/debug/accel/<n>/data_dma
What: /sys/kernel/debug/accel/<parent_device>/data_dma
Date: Apr 2021
KernelVersion: 5.13
Contact: ogabbay@kernel.org
@ -83,7 +83,7 @@ Description: Allows the root user to read from the device's internal
workloads.
Only supported on GAUDI at this stage.
What: /sys/kernel/debug/accel/<n>/device
What: /sys/kernel/debug/accel/<parent_device>/device
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -91,14 +91,14 @@ Description: Enables the root user to set the device to specific state.
Valid values are "disable", "enable", "suspend", "resume".
User can read this property to see the valid values
What: /sys/kernel/debug/accel/<n>/device_release_watchdog_timeout
What: /sys/kernel/debug/accel/<parent_device>/device_release_watchdog_timeout
Date: Oct 2022
KernelVersion: 6.2
Contact: ttayar@habana.ai
Description: The watchdog timeout value in seconds for a device release upon
certain error cases, after which the device is reset.
What: /sys/kernel/debug/accel/<n>/dma_size
What: /sys/kernel/debug/accel/<parent_device>/dma_size
Date: Apr 2021
KernelVersion: 5.13
Contact: ogabbay@kernel.org
@ -108,7 +108,7 @@ Description: Specify the size of the DMA transaction when using DMA to read
When the write is finished, the user can read the "data_dma"
blob
What: /sys/kernel/debug/accel/<n>/dump_razwi_events
What: /sys/kernel/debug/accel/<parent_device>/dump_razwi_events
Date: Aug 2022
KernelVersion: 5.20
Contact: fkassabri@habana.ai
@ -117,7 +117,7 @@ Description: Dumps all razwi events to dmesg if exist.
the routine will clear the status register.
Usage: cat dump_razwi_events
What: /sys/kernel/debug/accel/<n>/dump_security_violations
What: /sys/kernel/debug/accel/<parent_device>/dump_security_violations
Date: Jan 2021
KernelVersion: 5.12
Contact: ogabbay@kernel.org
@ -125,14 +125,14 @@ Description: Dumps all security violations to dmesg. This will also ack
all security violations meanings those violations will not be
dumped next time user calls this API
What: /sys/kernel/debug/accel/<n>/engines
What: /sys/kernel/debug/accel/<parent_device>/engines
Date: Jul 2019
KernelVersion: 5.3
Contact: ogabbay@kernel.org
Description: Displays the status registers values of the device engines and
their derived idle status
What: /sys/kernel/debug/accel/<n>/i2c_addr
What: /sys/kernel/debug/accel/<parent_device>/i2c_addr
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -140,7 +140,7 @@ Description: Sets I2C device address for I2C transaction that is generated
by the device's CPU, Not available when device is loaded with secured
firmware
What: /sys/kernel/debug/accel/<n>/i2c_bus
What: /sys/kernel/debug/accel/<parent_device>/i2c_bus
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -148,7 +148,7 @@ Description: Sets I2C bus address for I2C transaction that is generated by
the device's CPU, Not available when device is loaded with secured
firmware
What: /sys/kernel/debug/accel/<n>/i2c_data
What: /sys/kernel/debug/accel/<parent_device>/i2c_data
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -157,7 +157,7 @@ Description: Triggers an I2C transaction that is generated by the device's
reading from the file generates a read transaction, Not available
when device is loaded with secured firmware
What: /sys/kernel/debug/accel/<n>/i2c_len
What: /sys/kernel/debug/accel/<parent_device>/i2c_len
Date: Dec 2021
KernelVersion: 5.17
Contact: obitton@habana.ai
@ -165,7 +165,7 @@ Description: Sets I2C length in bytes for I2C transaction that is generated b
the device's CPU, Not available when device is loaded with secured
firmware
What: /sys/kernel/debug/accel/<n>/i2c_reg
What: /sys/kernel/debug/accel/<parent_device>/i2c_reg
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -173,35 +173,35 @@ Description: Sets I2C register id for I2C transaction that is generated by
the device's CPU, Not available when device is loaded with secured
firmware
What: /sys/kernel/debug/accel/<n>/led0
What: /sys/kernel/debug/accel/<parent_device>/led0
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Sets the state of the first S/W led on the device, Not available
when device is loaded with secured firmware
What: /sys/kernel/debug/accel/<n>/led1
What: /sys/kernel/debug/accel/<parent_device>/led1
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Sets the state of the second S/W led on the device, Not available
when device is loaded with secured firmware
What: /sys/kernel/debug/accel/<n>/led2
What: /sys/kernel/debug/accel/<parent_device>/led2
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Sets the state of the third S/W led on the device, Not available
when device is loaded with secured firmware
What: /sys/kernel/debug/accel/<n>/memory_scrub
What: /sys/kernel/debug/accel/<parent_device>/memory_scrub
Date: May 2022
KernelVersion: 5.19
Contact: dhirschfeld@habana.ai
Description: Allows the root user to scrub the dram memory. The scrubbing
value can be set using the debugfs file memory_scrub_val.
What: /sys/kernel/debug/accel/<n>/memory_scrub_val
What: /sys/kernel/debug/accel/<parent_device>/memory_scrub_val
Date: May 2022
KernelVersion: 5.19
Contact: dhirschfeld@habana.ai
@ -209,7 +209,7 @@ Description: The value to which the dram will be set to when the user
scrubs the dram using 'memory_scrub' debugfs file and
the scrubbing value when using module param 'memory_scrub'
What: /sys/kernel/debug/accel/<n>/mmu
What: /sys/kernel/debug/accel/<parent_device>/mmu
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -219,7 +219,7 @@ Description: Displays the hop values and physical address for a given ASID
e.g. to display info about VA 0x1000 for ASID 1 you need to do:
echo "1 0x1000" > /sys/kernel/debug/accel/0/mmu
What: /sys/kernel/debug/accel/<n>/mmu_error
What: /sys/kernel/debug/accel/<parent_device>/mmu_error
Date: Mar 2021
KernelVersion: 5.12
Contact: fkassabri@habana.ai
@ -229,7 +229,7 @@ Description: Check and display page fault or access violation mmu errors for
echo "0x200" > /sys/kernel/debug/accel/0/mmu_error
cat /sys/kernel/debug/accel/0/mmu_error
What: /sys/kernel/debug/accel/<n>/monitor_dump
What: /sys/kernel/debug/accel/<parent_device>/monitor_dump
Date: Mar 2022
KernelVersion: 5.19
Contact: osharabi@habana.ai
@ -243,7 +243,7 @@ Description: Allows the root user to dump monitors status from the device's
This interface doesn't support concurrency in the same device.
Only supported on GAUDI.
What: /sys/kernel/debug/accel/<n>/monitor_dump_trig
What: /sys/kernel/debug/accel/<parent_device>/monitor_dump_trig
Date: Mar 2022
KernelVersion: 5.19
Contact: osharabi@habana.ai
@ -253,14 +253,14 @@ Description: Triggers dump of monitor data. The value to trigger the operatio
When the write is finished, the user can read the "monitor_dump"
blob
What: /sys/kernel/debug/accel/<n>/set_power_state
What: /sys/kernel/debug/accel/<parent_device>/set_power_state
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Sets the PCI power state. Valid values are "1" for D0 and "2"
for D3Hot
What: /sys/kernel/debug/accel/<n>/skip_reset_on_timeout
What: /sys/kernel/debug/accel/<parent_device>/skip_reset_on_timeout
Date: Jun 2021
KernelVersion: 5.13
Contact: ynudelman@habana.ai
@ -268,7 +268,7 @@ Description: Sets the skip reset on timeout option for the device. Value of
"0" means device will be reset in case some CS has timed out,
otherwise it will not be reset.
What: /sys/kernel/debug/accel/<n>/state_dump
What: /sys/kernel/debug/accel/<parent_device>/state_dump
Date: Oct 2021
KernelVersion: 5.15
Contact: ynudelman@habana.ai
@ -279,7 +279,7 @@ Description: Gets the state dump occurring on a CS timeout or failure.
Writing an integer X discards X state dumps, so that the
next read would return X+1-st newest state dump.
What: /sys/kernel/debug/accel/<n>/stop_on_err
What: /sys/kernel/debug/accel/<parent_device>/stop_on_err
Date: Mar 2020
KernelVersion: 5.6
Contact: ogabbay@kernel.org
@ -287,13 +287,13 @@ Description: Sets the stop-on_error option for the device engines. Value of
"0" is for disable, otherwise enable.
Relevant only for GOYA and GAUDI.
What: /sys/kernel/debug/accel/<n>/timeout_locked
What: /sys/kernel/debug/accel/<parent_device>/timeout_locked
Date: Sep 2021
KernelVersion: 5.16
Contact: obitton@habana.ai
Description: Sets the command submission timeout value in seconds.
What: /sys/kernel/debug/accel/<n>/userptr
What: /sys/kernel/debug/accel/<parent_device>/userptr
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -301,7 +301,7 @@ Description: Displays a list with information about the current user
pointers (user virtual addresses) that are pinned and mapped
to DMA addresses
What: /sys/kernel/debug/accel/<n>/userptr_lookup
What: /sys/kernel/debug/accel/<parent_device>/userptr_lookup
Date: Oct 2021
KernelVersion: 5.15
Contact: ogabbay@kernel.org
@ -309,7 +309,7 @@ Description: Allows to search for specific user pointers (user virtual
addresses) that are pinned and mapped to DMA addresses, and see
their resolution to the specific dma address.
What: /sys/kernel/debug/accel/<n>/vm
What: /sys/kernel/debug/accel/<parent_device>/vm
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org

View File

@ -149,6 +149,18 @@ Contact: ogabbay@kernel.org
Description: Displays the current clock frequency, in Hz, of the MME compute
engine. This property is valid only for the Goya ASIC family
What: /sys/class/accel/accel<n>/device/module_id
Date: Nov 2023
KernelVersion: not yet upstreamed
Contact: ogabbay@kernel.org
Description: Displays the device's module id
What: /sys/class/accel/accel<n>/device/parent_device
Date: Nov 2023
KernelVersion: 6.8
Contact: ttayar@habana.ai
Description: Displays the name of the parent device of the accel device
What: /sys/class/accel/accel<n>/device/pci_addr
Date: Jan 2019
KernelVersion: 5.1

View File

@ -0,0 +1,70 @@
What: /sys/devices/.../hwmon/hwmon<i>/power1_max
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
Description: RW. Card reactive sustained (PL1) power limit in microwatts.
The power controller will throttle the operating frequency
if the power averaged over a window (typically seconds)
exceeds this limit. A read value of 0 means that the PL1
power limit is disabled, writing 0 disables the
limit. Writing values > 0 and <= TDP will enable the power limit.
Only supported for particular Intel xe graphics platforms.
What: /sys/devices/.../hwmon/hwmon<i>/power1_rated_max
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
Description: RO. Card default power limit (default TDP setting).
Only supported for particular Intel xe graphics platforms.
What: /sys/devices/.../hwmon/hwmon<i>/power1_crit
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
Description: RW. Card reactive critical (I1) power limit in microwatts.
Card reactive critical (I1) power limit in microwatts is exposed
for client products. The power controller will throttle the
operating frequency if the power averaged over a window exceeds
this limit.
Only supported for particular Intel xe graphics platforms.
What: /sys/devices/.../hwmon/hwmon<i>/curr1_crit
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
Description: RW. Card reactive critical (I1) power limit in milliamperes.
Card reactive critical (I1) power limit in milliamperes is
exposed for server products. The power controller will throttle
the operating frequency if the power averaged over a window
exceeds this limit.
What: /sys/devices/.../hwmon/hwmon<i>/in0_input
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
Description: RO. Current Voltage in millivolt.
Only supported for particular Intel xe graphics platforms.
What: /sys/devices/.../hwmon/hwmon<i>/energy1_input
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
Description: RO. Energy input of device in microjoules.
Only supported for particular Intel xe graphics platforms.
What: /sys/devices/.../hwmon/hwmon<i>/power1_max_interval
Date: October 2023
KernelVersion: 6.6
Contact: intel-xe@lists.freedesktop.org
Description: RW. Sustained power limit interval (Tau in PL1/Tau) in
milliseconds over which sustained power is averaged.
Only supported for particular Intel xe graphics platforms.

View File

@ -36,8 +36,9 @@ AIC100 DID (0xa100).
AIC100 does not implement FLR (function level reset).
AIC100 implements MSI but does not implement MSI-X. AIC100 requires 17 MSIs to
operate (1 for MHI, 16 for the DMA Bridge).
AIC100 implements MSI but does not implement MSI-X. AIC100 prefers 17 MSIs to
operate (1 for MHI, 16 for the DMA Bridge). Falling back to 1 MSI is possible in
scenarios where reserving 32 MSIs isn't feasible.
As a PCIe device, AIC100 utilizes BARs to provide host interfaces to the device
hardware. AIC100 provides 3, 64-bit BARs.
@ -220,10 +221,14 @@ of the defined channels, and their uses.
+----------------+---------+----------+----------------------------------------+
| QAIC_DEBUG | 18 & 19 | AMSS | Not used. |
+----------------+---------+----------+----------------------------------------+
| QAIC_TIMESYNC | 20 & 21 | SBL/AMSS | Used to synchronize timestamps in the |
| QAIC_TIMESYNC | 20 & 21 | SBL | Used to synchronize timestamps in the |
| | | | device side logs with the host time |
| | | | source. |
+----------------+---------+----------+----------------------------------------+
| QAIC_TIMESYNC | 22 & 23 | AMSS | Used to periodically synchronize |
| _PERIODIC | | | timestamps in the device side logs with|
| | | | the host time source. |
+----------------+---------+----------+----------------------------------------+
DMA Bridge
==========

View File

@ -10,6 +10,9 @@ accelerator products.
Interrupts
==========
IRQ Storm Mitigation
--------------------
While the AIC100 DMA Bridge hardware implements an IRQ storm mitigation
mechanism, it is still possible for an IRQ storm to occur. A storm can happen
if the workload is particularly quick, and the host is responsive. If the host
@ -35,6 +38,26 @@ generates 100k IRQs per second (per /proc/interrupts) is reduced to roughly 64
IRQs over 5 minutes while keeping the host system stable, and having the same
workload throughput performance (within run to run noise variation).
Single MSI Mode
---------------
MultiMSI is not well supported on all systems; virtualized ones even less so
(circa 2023). Between hypervisors masking the PCIe MSI capability structure to
large memory requirements for vIOMMUs (required for supporting MultiMSI), it is
useful to be able to fall back to a single MSI when needed.
To support this fallback, we allow the case where only one MSI is able to be
allocated, and share that one MSI between MHI and the DBCs. The device detects
when only one MSI has been configured and directs the interrupts for the DBCs
to the interrupt normally used for MHI. Unfortunately this means that the
interrupt handlers for every DBC and MHI wake up for every interrupt that
arrives; however, the DBC threaded irq handlers only are started when work to be
done is detected (MHI will always start its threaded handler).
If the DBC is configured to force MSI interrupts, this can circumvent the
software IRQ storm mitigation mentioned above. Since the MSI is shared it is
never disabled, allowing each new entry to the FIFO to trigger a new interrupt.
Neural Network Control (NNC) Protocol
=====================================
@ -70,8 +93,15 @@ commands (does not impact QAIC).
uAPI
====
QAIC creates an accel device per phsyical PCIe device. This accel device exists
for as long as the PCIe device is known to Linux.
The PCIe device may not be in the state to accept requests from userspace at
all times. QAIC will trigger KOBJ_ONLINE/OFFLINE uevents to advertise when the
device can accept requests (ONLINE) and when the device is no longer accepting
requests (OFFLINE) because of a reset or other state transition.
QAIC defines a number of driver specific IOCTLs as part of the userspace API.
This section describes those APIs.
DRM_IOCTL_QAIC_MANAGE
This IOCTL allows userspace to send a NNC request to the QSM. The call will
@ -178,3 +208,8 @@ overrides this for that call. Default is 5000 (5 seconds).
Sets the polling interval in microseconds (us) when datapath polling is active.
Takes effect at the next polling interval. Default is 100 (100 us).
**timesync_delay_ms (unsigned int)**
Sets the time interval in milliseconds (ms) between two consecutive timesync
operations. Default is 1000 (1000 ms).

View File

@ -153,6 +153,8 @@ NOTE: Some pages, such as DAX pages, cannot be pinned with longterm pins. That's
because DAX pages do not have a separate page cache, and so "pinning" implies
locking down file system blocks, which is not (yet) supported in that way.
.. _mmu-notifier-registration-case:
CASE 3: MMU notifier registration, with or without page faulting hardware
-------------------------------------------------------------------------
Device drivers can pin pages via get_user_pages*(), and register for mmu

View File

@ -55,6 +55,27 @@ properties:
- port@0
- port@1
vcchdmipll-supply:
description: A 1.8V supply that powers the HDMI PLL.
vcchdmitx-supply:
description: A 1.8V supply that powers the HDMI TX part.
vcclvdspll-supply:
description: A 1.8V supply that powers the LVDS PLL.
vcclvdstx-supply:
description: A 1.8V supply that powers the LVDS TX part.
vccmipirx-supply:
description: A 1.8V supply that powers the MIPI RX part.
vccsysclk-supply:
description: A 1.8V supply that powers the SYSCLK.
vdd-supply:
description: A 1.8V supply that powers the digital part.
required:
- compatible
- reg

View File

@ -26,8 +26,10 @@ properties:
- qcom,sc8280xp-edp
- qcom,sdm845-dp
- qcom,sm8350-dp
- qcom,sm8650-dp
- items:
- enum:
- qcom,sm8150-dp
- qcom,sm8250-dp
- qcom,sm8450-dp
- qcom,sm8550-dp

View File

@ -25,6 +25,7 @@ properties:
- qcom,sc7180-dsi-ctrl
- qcom,sc7280-dsi-ctrl
- qcom,sdm660-dsi-ctrl
- qcom,sdm670-dsi-ctrl
- qcom,sdm845-dsi-ctrl
- qcom,sm6115-dsi-ctrl
- qcom,sm6125-dsi-ctrl
@ -35,6 +36,7 @@ properties:
- qcom,sm8350-dsi-ctrl
- qcom,sm8450-dsi-ctrl
- qcom,sm8550-dsi-ctrl
- qcom,sm8650-dsi-ctrl
- const: qcom,mdss-dsi-ctrl
- enum:
- qcom,dsi-ctrl-6g-qcm2290
@ -333,6 +335,7 @@ allOf:
- qcom,sm8350-dsi-ctrl
- qcom,sm8450-dsi-ctrl
- qcom,sm8550-dsi-ctrl
- qcom,sm8650-dsi-ctrl
then:
properties:
clocks:

View File

@ -22,6 +22,7 @@ properties:
- qcom,sm8350-dsi-phy-5nm
- qcom,sm8450-dsi-phy-5nm
- qcom,sm8550-dsi-phy-4nm
- qcom,sm8650-dsi-phy-4nm
reg:
items:

View File

@ -61,17 +61,27 @@ properties:
ranges: true
# This is not a perfect description, but it's impossible to discern and match
# the entries like we do with interconnect-names
interconnects:
minItems: 1
items:
- description: Interconnect path from mdp0 (or a single mdp) port to the data bus
- description: Interconnect path from mdp1 port to the data bus
- description: Interconnect path from CPU to the reg bus
interconnect-names:
minItems: 1
items:
- const: mdp0-mem
- const: mdp1-mem
oneOf:
- minItems: 1
items:
- const: mdp0-mem
- const: cpu-cfg
- minItems: 2
items:
- const: mdp0-mem
- const: mdp1-mem
- const: cpu-cfg
resets:
items:

View File

@ -36,10 +36,14 @@ properties:
maxItems: 2
interconnects:
maxItems: 1
items:
- description: Interconnect path from mdp0 port to the data bus
- description: Interconnect path from CPU to the reg bus
interconnect-names:
maxItems: 1
items:
- const: mdp0-mem
- const: cpu-cfg
patternProperties:
"^display-controller@[0-9a-f]+$":
@ -56,7 +60,9 @@ patternProperties:
properties:
compatible:
const: qcom,dsi-ctrl-6g-qcm2290
items:
- const: qcom,qcm2290-dsi-ctrl
- const: qcom,mdss-dsi-ctrl
"^phy@[0-9a-f]+$":
type: object
@ -96,8 +102,10 @@ examples:
interrupt-controller;
#interrupt-cells = <1>;
interconnects = <&mmrt_virt MASTER_MDP0 &bimc SLAVE_EBI1>;
interconnect-names = "mdp0-mem";
interconnects = <&mmrt_virt MASTER_MDP0 &bimc SLAVE_EBI1>,
<&bimc MASTER_APPSS_PROC &config_noc SLAVE_DISPLAY_CFG>;
interconnect-names = "mdp0-mem",
"cpu-cfg";
iommus = <&apps_smmu 0x420 0x2>,
<&apps_smmu 0x421 0x0>;
@ -136,7 +144,8 @@ examples:
};
dsi@5e94000 {
compatible = "qcom,dsi-ctrl-6g-qcm2290";
compatible = "qcom,qcm2290-dsi-ctrl",
"qcom,mdss-dsi-ctrl";
reg = <0x05e94000 0x400>;
reg-names = "dsi_ctrl";

View File

@ -36,10 +36,14 @@ properties:
maxItems: 1
interconnects:
maxItems: 1
items:
- description: Interconnect path from mdp0 port to the data bus
- description: Interconnect path from CPU to the reg bus
interconnect-names:
maxItems: 1
items:
- const: mdp0-mem
- const: cpu-cfg
patternProperties:
"^display-controller@[0-9a-f]+$":
@ -106,8 +110,10 @@ examples:
interrupt-controller;
#interrupt-cells = <1>;
interconnects = <&mmss_noc MASTER_MDP0 &mc_virt SLAVE_EBI1>;
interconnect-names = "mdp0-mem";
interconnects = <&mmss_noc MASTER_MDP0 &mc_virt SLAVE_EBI1>,
<&gem_noc MASTER_APPSS_PROC &config_noc SLAVE_DISPLAY_CFG>;
interconnect-names = "mdp0-mem",
"cpu-cfg";
iommus = <&apps_smmu 0x800 0x2>;
ranges;

View File

@ -36,10 +36,14 @@ properties:
maxItems: 1
interconnects:
maxItems: 1
items:
- description: Interconnect path from mdp0 port to the data bus
- description: Interconnect path from CPU to the reg bus
interconnect-names:
maxItems: 1
items:
- const: mdp0-mem
- const: cpu-cfg
patternProperties:
"^display-controller@[0-9a-f]+$":
@ -118,8 +122,10 @@ examples:
interrupt-controller;
#interrupt-cells = <1>;
interconnects = <&mmss_noc MASTER_MDP0 &mc_virt SLAVE_EBI1>;
interconnect-names = "mdp0-mem";
interconnects = <&mmss_noc MASTER_MDP0 &mc_virt SLAVE_EBI1>,
<&gem_noc MASTER_APPSS_PROC &cnoc2 SLAVE_DISPLAY_CFG>;
interconnect-names = "mdp0-mem",
"cpu-cfg";
iommus = <&apps_smmu 0x900 0x402>;
ranges;

View File

@ -0,0 +1,292 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/msm/qcom,sdm670-mdss.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Qualcomm SDM670 Display MDSS
maintainers:
- Richard Acayan <mailingradian@gmail.com>
description:
SDM670 MSM Mobile Display Subsystem (MDSS), which encapsulates sub-blocks
like DPU display controller, DSI and DP interfaces etc.
$ref: /schemas/display/msm/mdss-common.yaml#
properties:
compatible:
const: qcom,sdm670-mdss
clocks:
items:
- description: Display AHB clock from gcc
- description: Display core clock
clock-names:
items:
- const: iface
- const: core
iommus:
maxItems: 2
interconnects:
maxItems: 2
interconnect-names:
maxItems: 2
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sdm670-dpu
"^displayport-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sdm670-dp
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
contains:
const: qcom,sdm670-dsi-ctrl
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,dsi-phy-10nm
required:
- compatible
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/clock/qcom,dispcc-sdm845.h>
#include <dt-bindings/clock/qcom,gcc-sdm845.h>
#include <dt-bindings/clock/qcom,rpmh.h>
#include <dt-bindings/interconnect/qcom,sdm670-rpmh.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/power/qcom-rpmpd.h>
display-subsystem@ae00000 {
compatible = "qcom,sdm670-mdss";
reg = <0x0ae00000 0x1000>;
reg-names = "mdss";
power-domains = <&dispcc MDSS_GDSC>;
clocks = <&gcc GCC_DISP_AHB_CLK>,
<&dispcc DISP_CC_MDSS_MDP_CLK>;
clock-names = "iface", "core";
interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
interrupt-controller;
#interrupt-cells = <1>;
interconnects = <&mmss_noc MASTER_MDP_PORT0 0 &mem_noc SLAVE_EBI_CH0 0>,
<&mmss_noc MASTER_MDP_PORT1 0 &mem_noc SLAVE_EBI_CH0 0>;
interconnect-names = "mdp0-mem", "mdp1-mem";
iommus = <&apps_smmu 0x880 0x8>,
<&apps_smmu 0xc80 0x8>;
#address-cells = <1>;
#size-cells = <1>;
ranges;
display-controller@ae01000 {
compatible = "qcom,sdm670-dpu";
reg = <0x0ae01000 0x8f000>,
<0x0aeb0000 0x2008>;
reg-names = "mdp", "vbif";
clocks = <&gcc GCC_DISP_AXI_CLK>,
<&dispcc DISP_CC_MDSS_AHB_CLK>,
<&dispcc DISP_CC_MDSS_AXI_CLK>,
<&dispcc DISP_CC_MDSS_MDP_CLK>,
<&dispcc DISP_CC_MDSS_VSYNC_CLK>;
clock-names = "gcc-bus", "iface", "bus", "core", "vsync";
interrupt-parent = <&mdss>;
interrupts = <0>;
power-domains = <&rpmhpd SDM670_CX>;
operating-points-v2 = <&mdp_opp_table>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
dpu_intf1_out: endpoint {
remote-endpoint = <&mdss_dsi0_in>;
};
};
port@1 {
reg = <1>;
dpu_intf2_out: endpoint {
remote-endpoint = <&mdss_dsi1_in>;
};
};
};
};
dsi@ae94000 {
compatible = "qcom,sdm670-dsi-ctrl", "qcom,mdss-dsi-ctrl";
reg = <0x0ae94000 0x400>;
reg-names = "dsi_ctrl";
interrupt-parent = <&mdss>;
interrupts = <4>;
clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK>,
<&dispcc DISP_CC_MDSS_BYTE0_INTF_CLK>,
<&dispcc DISP_CC_MDSS_PCLK0_CLK>,
<&dispcc DISP_CC_MDSS_ESC0_CLK>,
<&dispcc DISP_CC_MDSS_AHB_CLK>,
<&dispcc DISP_CC_MDSS_AXI_CLK>;
clock-names = "byte",
"byte_intf",
"pixel",
"core",
"iface",
"bus";
assigned-clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK_SRC>,
<&dispcc DISP_CC_MDSS_PCLK0_CLK_SRC>;
assigned-clock-parents = <&mdss_dsi0_phy 0>, <&mdss_dsi0_phy 1>;
operating-points-v2 = <&dsi_opp_table>;
power-domains = <&rpmhpd SDM670_CX>;
phys = <&mdss_dsi0_phy>;
phy-names = "dsi";
#address-cells = <1>;
#size-cells = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
mdss_dsi0_in: endpoint {
remote-endpoint = <&dpu_intf1_out>;
};
};
port@1 {
reg = <1>;
mdss_dsi0_out: endpoint {
};
};
};
};
mdss_dsi0_phy: phy@ae94400 {
compatible = "qcom,dsi-phy-10nm";
reg = <0x0ae94400 0x200>,
<0x0ae94600 0x280>,
<0x0ae94a00 0x1e0>;
reg-names = "dsi_phy",
"dsi_phy_lane",
"dsi_pll";
#clock-cells = <1>;
#phy-cells = <0>;
clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
<&rpmhcc RPMH_CXO_CLK>;
clock-names = "iface", "ref";
vdds-supply = <&vreg_dsi_phy>;
};
dsi@ae96000 {
compatible = "qcom,sdm670-dsi-ctrl", "qcom,mdss-dsi-ctrl";
reg = <0x0ae96000 0x400>;
reg-names = "dsi_ctrl";
interrupt-parent = <&mdss>;
interrupts = <5>;
clocks = <&dispcc DISP_CC_MDSS_BYTE1_CLK>,
<&dispcc DISP_CC_MDSS_BYTE1_INTF_CLK>,
<&dispcc DISP_CC_MDSS_PCLK1_CLK>,
<&dispcc DISP_CC_MDSS_ESC1_CLK>,
<&dispcc DISP_CC_MDSS_AHB_CLK>,
<&dispcc DISP_CC_MDSS_AXI_CLK>;
clock-names = "byte",
"byte_intf",
"pixel",
"core",
"iface",
"bus";
assigned-clocks = <&dispcc DISP_CC_MDSS_BYTE1_CLK_SRC>,
<&dispcc DISP_CC_MDSS_PCLK1_CLK_SRC>;
assigned-clock-parents = <&dsi1_phy 0>, <&dsi1_phy 1>;
operating-points-v2 = <&dsi_opp_table>;
power-domains = <&rpmhpd SDM670_CX>;
phys = <&dsi1_phy>;
phy-names = "dsi";
#address-cells = <1>;
#size-cells = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
mdss_dsi1_in: endpoint {
remote-endpoint = <&dpu_intf2_out>;
};
};
port@1 {
reg = <1>;
mdss_dsi1_out: endpoint {
};
};
};
};
mdss_dsi1_phy: phy@ae96400 {
compatible = "qcom,dsi-phy-10nm";
reg = <0x0ae96400 0x200>,
<0x0ae96600 0x280>,
<0x0ae96a00 0x10e>;
reg-names = "dsi_phy",
"dsi_phy_lane",
"dsi_pll";
#clock-cells = <1>;
#phy-cells = <0>;
clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
<&rpmhcc RPMH_CXO_CLK>;
clock-names = "iface", "ref";
vdds-supply = <&vreg_dsi_phy>;
};
};
...

View File

@ -13,7 +13,9 @@ $ref: /schemas/display/msm/dpu-common.yaml#
properties:
compatible:
const: qcom,sdm845-dpu
enum:
- qcom,sdm670-dpu
- qcom,sdm845-dpu
reg:
items:

View File

@ -29,6 +29,16 @@ properties:
iommus:
maxItems: 2
interconnects:
items:
- description: Interconnect path from mdp0 port to the data bus
- description: Interconnect path from CPU to the reg bus
interconnect-names:
items:
- const: mdp0-mem
- const: cpu-cfg
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object

View File

@ -35,10 +35,14 @@ properties:
maxItems: 1
interconnects:
maxItems: 2
items:
- description: Interconnect path from mdp0 port to the data bus
- description: Interconnect path from CPU to the reg bus
interconnect-names:
maxItems: 2
items:
- const: mdp0-mem
- const: cpu-cfg
patternProperties:
"^display-controller@[0-9a-f]+$":

View File

@ -35,10 +35,14 @@ properties:
maxItems: 1
interconnects:
maxItems: 2
items:
- description: Interconnect path from mdp0 port to the data bus
- description: Interconnect path from CPU to the reg bus
interconnect-names:
maxItems: 2
items:
- const: mdp0-mem
- const: cpu-cfg
patternProperties:
"^display-controller@[0-9a-f]+$":

View File

@ -35,10 +35,14 @@ properties:
maxItems: 1
interconnects:
maxItems: 2
items:
- description: Interconnect path from mdp0 port to the data bus
- description: Interconnect path from CPU to the reg bus
interconnect-names:
maxItems: 2
items:
- const: mdp0-mem
- const: cpu-cfg
patternProperties:
"^display-controller@[0-9a-f]+$":

View File

@ -69,7 +69,7 @@ patternProperties:
properties:
compatible:
const: qcom,dsi-phy-7nm
const: qcom,dsi-phy-7nm-8150
unevaluatedProperties: false
@ -247,7 +247,7 @@ examples:
};
dsi0_phy: phy@ae94400 {
compatible = "qcom,dsi-phy-7nm";
compatible = "qcom,dsi-phy-7nm-8150";
reg = <0x0ae94400 0x200>,
<0x0ae94600 0x280>,
<0x0ae94900 0x260>;
@ -318,7 +318,7 @@ examples:
};
dsi1_phy: phy@ae96400 {
compatible = "qcom,dsi-phy-7nm";
compatible = "qcom,dsi-phy-7nm-8150";
reg = <0x0ae96400 0x200>,
<0x0ae96600 0x280>,
<0x0ae96900 0x260>;

View File

@ -52,6 +52,16 @@ patternProperties:
compatible:
const: qcom,sm8250-dpu
"^displayport-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
- const: qcom,sm8250-dp
- const: qcom,sm8350-dp
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true

View File

@ -30,10 +30,10 @@ properties:
maxItems: 1
interconnects:
maxItems: 2
maxItems: 3
interconnect-names:
maxItems: 2
maxItems: 3
patternProperties:
"^display-controller@[0-9a-f]+$":
@ -91,9 +91,12 @@ examples:
reg = <0x0ae00000 0x1000>;
reg-names = "mdss";
interconnects = <&mmss_noc MASTER_MDP_DISP 0 &mc_virt SLAVE_EBI1_DISP 0>,
<&mmss_noc MASTER_MDP_DISP 0 &mc_virt SLAVE_EBI1_DISP 0>;
interconnect-names = "mdp0-mem", "mdp1-mem";
interconnects = <&mmss_noc MASTER_MDP_DISP &mc_virt SLAVE_EBI1_DISP>,
<&mmss_noc MASTER_MDP_DISP &mc_virt SLAVE_EBI1_DISP>,
<&gem_noc MASTER_APPSS_PROC &config_noc SLAVE_DISPLAY_CFG>;
interconnect-names = "mdp0-mem",
"mdp1-mem",
"cpu-cfg";
resets = <&dispcc DISP_CC_MDSS_CORE_BCR>;

View File

@ -0,0 +1,127 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/msm/qcom,sm8650-dpu.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Qualcomm SM8650 Display DPU
maintainers:
- Neil Armstrong <neil.armstrong@linaro.org>
$ref: /schemas/display/msm/dpu-common.yaml#
properties:
compatible:
const: qcom,sm8650-dpu
reg:
items:
- description: Address offset and size for mdp register set
- description: Address offset and size for vbif register set
reg-names:
items:
- const: mdp
- const: vbif
clocks:
items:
- description: Display hf axi
- description: Display MDSS ahb
- description: Display lut
- description: Display core
- description: Display vsync
clock-names:
items:
- const: nrt_bus
- const: iface
- const: lut
- const: core
- const: vsync
required:
- compatible
- reg
- reg-names
- clocks
- clock-names
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/power/qcom,rpmhpd.h>
display-controller@ae01000 {
compatible = "qcom,sm8650-dpu";
reg = <0x0ae01000 0x8f000>,
<0x0aeb0000 0x2008>;
reg-names = "mdp", "vbif";
clocks = <&gcc_axi_clk>,
<&dispcc_ahb_clk>,
<&dispcc_mdp_lut_clk>,
<&dispcc_mdp_clk>,
<&dispcc_vsync_clk>;
clock-names = "nrt_bus",
"iface",
"lut",
"core",
"vsync";
assigned-clocks = <&dispcc_vsync_clk>;
assigned-clock-rates = <19200000>;
operating-points-v2 = <&mdp_opp_table>;
power-domains = <&rpmhpd RPMHPD_MMCX>;
interrupt-parent = <&mdss>;
interrupts = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
dpu_intf1_out: endpoint {
remote-endpoint = <&dsi0_in>;
};
};
port@1 {
reg = <1>;
dpu_intf2_out: endpoint {
remote-endpoint = <&dsi1_in>;
};
};
};
mdp_opp_table: opp-table {
compatible = "operating-points-v2";
opp-200000000 {
opp-hz = /bits/ 64 <200000000>;
required-opps = <&rpmhpd_opp_low_svs>;
};
opp-325000000 {
opp-hz = /bits/ 64 <325000000>;
required-opps = <&rpmhpd_opp_svs>;
};
opp-375000000 {
opp-hz = /bits/ 64 <375000000>;
required-opps = <&rpmhpd_opp_svs_l1>;
};
opp-514000000 {
opp-hz = /bits/ 64 <514000000>;
required-opps = <&rpmhpd_opp_nom>;
};
};
};
...

View File

@ -0,0 +1,328 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/msm/qcom,sm8650-mdss.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Qualcomm SM8650 Display MDSS
maintainers:
- Neil Armstrong <neil.armstrong@linaro.org>
description:
SM8650 MSM Mobile Display Subsystem(MDSS), which encapsulates sub-blocks like
DPU display controller, DSI and DP interfaces etc.
$ref: /schemas/display/msm/mdss-common.yaml#
properties:
compatible:
const: qcom,sm8650-mdss
clocks:
items:
- description: Display AHB
- description: Display hf AXI
- description: Display core
iommus:
maxItems: 1
interconnects:
maxItems: 2
interconnect-names:
maxItems: 2
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
properties:
compatible:
const: qcom,sm8650-dpu
"^displayport-controller@[0-9a-f]+$":
type: object
properties:
compatible:
const: qcom,sm8650-dp
"^dsi@[0-9a-f]+$":
type: object
properties:
compatible:
items:
- const: qcom,sm8650-dsi-ctrl
- const: qcom,mdss-dsi-ctrl
"^phy@[0-9a-f]+$":
type: object
properties:
compatible:
const: qcom,sm8650-dsi-phy-4nm
required:
- compatible
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/clock/qcom,rpmh.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/power/qcom,rpmhpd.h>
display-subsystem@ae00000 {
compatible = "qcom,sm8650-mdss";
reg = <0x0ae00000 0x1000>;
reg-names = "mdss";
resets = <&dispcc_core_bcr>;
power-domains = <&dispcc_gdsc>;
clocks = <&gcc_ahb_clk>,
<&gcc_axi_clk>,
<&dispcc_mdp_clk>;
clock-names = "bus", "nrt_bus", "core";
interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
interrupt-controller;
#interrupt-cells = <1>;
iommus = <&apps_smmu 0x1c00 0x2>;
#address-cells = <1>;
#size-cells = <1>;
ranges;
display-controller@ae01000 {
compatible = "qcom,sm8650-dpu";
reg = <0x0ae01000 0x8f000>,
<0x0aeb0000 0x2008>;
reg-names = "mdp", "vbif";
clocks = <&gcc_axi_clk>,
<&dispcc_ahb_clk>,
<&dispcc_mdp_lut_clk>,
<&dispcc_mdp_clk>,
<&dispcc_mdp_vsync_clk>;
clock-names = "nrt_bus",
"iface",
"lut",
"core",
"vsync";
assigned-clocks = <&dispcc_mdp_vsync_clk>;
assigned-clock-rates = <19200000>;
operating-points-v2 = <&mdp_opp_table>;
power-domains = <&rpmhpd RPMHPD_MMCX>;
interrupt-parent = <&mdss>;
interrupts = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
dpu_intf1_out: endpoint {
remote-endpoint = <&dsi0_in>;
};
};
port@1 {
reg = <1>;
dpu_intf2_out: endpoint {
remote-endpoint = <&dsi1_in>;
};
};
};
mdp_opp_table: opp-table {
compatible = "operating-points-v2";
opp-200000000 {
opp-hz = /bits/ 64 <200000000>;
required-opps = <&rpmhpd_opp_low_svs>;
};
opp-325000000 {
opp-hz = /bits/ 64 <325000000>;
required-opps = <&rpmhpd_opp_svs>;
};
opp-375000000 {
opp-hz = /bits/ 64 <375000000>;
required-opps = <&rpmhpd_opp_svs_l1>;
};
opp-514000000 {
opp-hz = /bits/ 64 <514000000>;
required-opps = <&rpmhpd_opp_nom>;
};
};
};
dsi@ae94000 {
compatible = "qcom,sm8650-dsi-ctrl", "qcom,mdss-dsi-ctrl";
reg = <0x0ae94000 0x400>;
reg-names = "dsi_ctrl";
interrupt-parent = <&mdss>;
interrupts = <4>;
clocks = <&dispc_byte_clk>,
<&dispcc_intf_clk>,
<&dispcc_pclk>,
<&dispcc_esc_clk>,
<&dispcc_ahb_clk>,
<&gcc_bus_clk>;
clock-names = "byte",
"byte_intf",
"pixel",
"core",
"iface",
"bus";
assigned-clocks = <&dispcc_byte_clk>,
<&dispcc_pclk>;
assigned-clock-parents = <&dsi0_phy 0>, <&dsi0_phy 1>;
operating-points-v2 = <&dsi_opp_table>;
power-domains = <&rpmhpd RPMHPD_MMCX>;
phys = <&dsi0_phy>;
phy-names = "dsi";
#address-cells = <1>;
#size-cells = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
dsi0_in: endpoint {
remote-endpoint = <&dpu_intf1_out>;
};
};
port@1 {
reg = <1>;
dsi0_out: endpoint {
};
};
};
dsi_opp_table: opp-table {
compatible = "operating-points-v2";
opp-187500000 {
opp-hz = /bits/ 64 <187500000>;
required-opps = <&rpmhpd_opp_low_svs>;
};
opp-300000000 {
opp-hz = /bits/ 64 <300000000>;
required-opps = <&rpmhpd_opp_svs>;
};
opp-358000000 {
opp-hz = /bits/ 64 <358000000>;
required-opps = <&rpmhpd_opp_svs_l1>;
};
};
};
dsi0_phy: phy@ae94400 {
compatible = "qcom,sm8650-dsi-phy-4nm";
reg = <0x0ae95000 0x200>,
<0x0ae95200 0x280>,
<0x0ae95500 0x400>;
reg-names = "dsi_phy",
"dsi_phy_lane",
"dsi_pll";
#clock-cells = <1>;
#phy-cells = <0>;
clocks = <&dispcc_iface_clk>,
<&rpmhcc_ref_clk>;
clock-names = "iface", "ref";
};
dsi@ae96000 {
compatible = "qcom,sm8650-dsi-ctrl", "qcom,mdss-dsi-ctrl";
reg = <0x0ae96000 0x400>;
reg-names = "dsi_ctrl";
interrupt-parent = <&mdss>;
interrupts = <5>;
clocks = <&dispc_byte_clk>,
<&dispcc_intf_clk>,
<&dispcc_pclk>,
<&dispcc_esc_clk>,
<&dispcc_ahb_clk>,
<&gcc_bus_clk>;
clock-names = "byte",
"byte_intf",
"pixel",
"core",
"iface",
"bus";
assigned-clocks = <&dispcc_byte_clk>,
<&dispcc_pclk>;
assigned-clock-parents = <&dsi1_phy 0>, <&dsi1_phy 1>;
operating-points-v2 = <&dsi_opp_table>;
power-domains = <&rpmhpd RPMHPD_MMCX>;
phys = <&dsi1_phy>;
phy-names = "dsi";
#address-cells = <1>;
#size-cells = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
dsi1_in: endpoint {
remote-endpoint = <&dpu_intf2_out>;
};
};
port@1 {
reg = <1>;
dsi1_out: endpoint {
};
};
};
};
dsi1_phy: phy@ae96400 {
compatible = "qcom,sm8650-dsi-phy-4nm";
reg = <0x0ae97000 0x200>,
<0x0ae97200 0x280>,
<0x0ae97500 0x400>;
reg-names = "dsi_phy",
"dsi_phy_lane",
"dsi_pll";
#clock-cells = <1>;
#phy-cells = <0>;
clocks = <&dispcc_iface_clk>,
<&rpmhcc_ref_clk>;
clock-names = "iface", "ref";
};
};
...

View File

@ -0,0 +1,56 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/panel/fascontek,fs035vg158.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Fascontek FS035VG158 3.5" (640x480 pixels) 24-bit IPS LCD panel
maintainers:
- John Watts <contact@jookia.org>
allOf:
- $ref: panel-common.yaml#
- $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
const: fascontek,fs035vg158
spi-3wire: true
required:
- compatible
- reg
- port
- power-supply
- reset-gpios
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/gpio/gpio.h>
spi {
#address-cells = <1>;
#size-cells = <0>;
panel@0 {
compatible = "fascontek,fs035vg158";
reg = <0>;
spi-3wire;
spi-max-frequency = <3125000>;
reset-gpios = <&gpe 2 GPIO_ACTIVE_LOW>;
backlight = <&backlight>;
power-supply = <&vcc>;
port {
panel_input: endpoint {
remote-endpoint = <&panel_output>;
};
};
};
};

View File

@ -23,6 +23,7 @@ properties:
items:
- enum:
- hannstar,hsd060bhw4
- powkiddy,x55-panel
- const: himax,hx8394
reg: true
@ -31,6 +32,8 @@ properties:
backlight: true
rotation: true
port: true
vcc-supply:

View File

@ -0,0 +1,62 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/panel/ilitek,ili9805.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Ilitek ILI9805 based MIPI-DSI panels
maintainers:
- Michael Trimarchi <michael@amarulasolutions.com>
allOf:
- $ref: panel-common.yaml#
properties:
compatible:
items:
- enum:
- giantplus,gpm1790a0
- tianma,tm041xdhg01
- const: ilitek,ili9805
avdd-supply: true
dvdd-supply: true
reg: true
required:
- compatible
- avdd-supply
- dvdd-supply
- reg
- reset-gpios
- port
- backlight
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/gpio/gpio.h>
dsi {
#address-cells = <1>;
#size-cells = <0>;
panel@0 {
compatible = "giantplus,gpm1790a0", "ilitek,ili9805";
reg = <0>;
avdd-supply = <&avdd_display>;
dvdd-supply = <&dvdd_display>;
reset-gpios = <&r_pio 0 5 GPIO_ACTIVE_LOW>; /* PL05 */
backlight = <&backlight>;
port {
panel_in: endpoint {
remote-endpoint = <&mipi_dsi_out>;
};
};
};
};
...

View File

@ -16,6 +16,7 @@ properties:
compatible:
items:
- enum:
- ampire,am8001280g
- bananapi,lhr050h41
- feixin,k101-im2byl02
- tdo,tl050hdv35

View File

@ -18,16 +18,12 @@ properties:
compatible:
const: leadtek,ltk035c5444t
backlight: true
port: true
power-supply: true
reg: true
reset-gpios: true
spi-3wire: true
required:
- compatible
- reg
- port
- power-supply
- reset-gpios

View File

@ -21,7 +21,7 @@ properties:
- enum:
- anbernic,rg351v-panel
- anbernic,rg353p-panel
- anbernic,rg353v-panel
- powkiddy,rk2023-panel
- const: newvision,nv3051d
reg: true

View File

@ -33,6 +33,8 @@ properties:
# AU Optronics Corporation 13.3" FHD (1920x1080) TFT LCD panel
- auo,g133han01
# AU Optronics Corporation 15.6" FHD (1920x1080) TFT LCD panel
- auo,g156han04
# AU Optronics Corporation 18.5" FHD (1920x1080) TFT LCD panel
- auo,g185han01
# AU Optronics Corporation 19.0" (1280x1024) TFT LCD panel

View File

@ -73,6 +73,8 @@ properties:
- auo,t215hvn01
# Shanghai AVIC Optoelectronics 7" 1024x600 color TFT-LCD panel
- avic,tm070ddh03
# BOE BP101WX1-100 10.1" WXGA (1280x800) LVDS panel
- boe,bp101wx1-100
# BOE EV121WXM-N10-1850 12.1" WXGA (1280x800) TFT LCD panel
- boe,ev121wxm-n10-1850
# BOE HV070WSA-100 7.01" WSVGA TFT LCD panel
@ -144,6 +146,8 @@ properties:
- edt,etmv570g2dhu
# E Ink VB3300-KCA
- eink,vb3300-kca
# Evervision Electronics Co. Ltd. VGG644804 5.7" VGA TFT LCD Panel
- evervision,vgg644804
# Evervision Electronics Co. Ltd. VGG804821 5.0" WVGA TFT LCD Panel
- evervision,vgg804821
# Foxlink Group 5" WVGA TFT LCD panel

View File

@ -27,6 +27,7 @@ properties:
compatible:
items:
- enum:
- anbernic,rg-arc-panel
- densitron,dmt028vghmcmi-1a
- elida,kd50t048a
- techstar,ts8550b

View File

@ -8,8 +8,8 @@ title: Rockchip SoC display controller (VOP2)
description:
VOP2 (Video Output Processor v2) is the display controller for the Rockchip
series of SoCs which transfers the image data from a video memory
buffer to an external LCD interface.
series of SoCs which transfers the image data from a video memory buffer to
an external LCD interface.
maintainers:
- Sandy Huang <hjc@rock-chips.com>
@ -20,6 +20,7 @@ properties:
enum:
- rockchip,rk3566-vop
- rockchip,rk3568-vop
- rockchip,rk3588-vop
reg:
items:
@ -27,8 +28,8 @@ properties:
Must contain one entry corresponding to the base address and length
of the register space.
- description:
Can optionally contain a second entry corresponding to
the CRTC gamma LUT address.
Can optionally contain a second entry corresponding to the CRTC gamma
LUT address.
reg-names:
items:
@ -41,45 +42,63 @@ properties:
The VOP interrupt is shared by several interrupt sources, such as
frame start (VSYNC), line flag and other status interrupts.
# See compatible-specific constraints below.
clocks:
minItems: 5
items:
- description: Clock for ddr buffer transfer.
- description: Clock for the ahb bus to R/W the phy regs.
- description: Clock for ddr buffer transfer via axi.
- description: Clock for the ahb bus to R/W the regs.
- description: Pixel clock for video port 0.
- description: Pixel clock for video port 1.
- description: Pixel clock for video port 2.
- description: Pixel clock for video port 3.
- description: Peripheral(vop grf/dsi) clock.
clock-names:
minItems: 5
items:
- const: aclk
- const: hclk
- const: dclk_vp0
- const: dclk_vp1
- const: dclk_vp2
- const: dclk_vp3
- const: pclk_vop
rockchip,grf:
$ref: /schemas/types.yaml#/definitions/phandle
description:
Phandle to GRF regs used for misc control
Phandle to GRF regs used for control the polarity of dclk/hsync/vsync of DPI,
also used for query vop memory bisr enable status, etc.
rockchip,vo1-grf:
$ref: /schemas/types.yaml#/definitions/phandle
description:
Phandle to VO GRF regs used for control the polarity of dclk/hsync/vsync of hdmi
on rk3588.
rockchip,vop-grf:
$ref: /schemas/types.yaml#/definitions/phandle
description:
Phandle to VOP GRF regs used for control data path between vopr and hdmi/edp.
rockchip,pmu:
$ref: /schemas/types.yaml#/definitions/phandle
description:
Phandle to PMU GRF used for query vop memory bisr status on rk3588.
ports:
$ref: /schemas/graph.yaml#/properties/ports
properties:
port@0:
patternProperties:
"^port@[0-3]$":
$ref: /schemas/graph.yaml#/properties/port
description:
Output endpoint of VP0
description: Output endpoint of VP0/1/2/3.
port@1:
$ref: /schemas/graph.yaml#/properties/port
description:
Output endpoint of VP1
required:
- port@0
port@2:
$ref: /schemas/graph.yaml#/properties/port
description:
Output endpoint of VP2
unevaluatedProperties: false
iommus:
maxItems: 1
@ -96,6 +115,49 @@ required:
- clock-names
- ports
allOf:
- if:
properties:
compatible:
contains:
const: rockchip,rk3588-vop
then:
properties:
clocks:
minItems: 7
clock-names:
minItems: 7
ports:
required:
- port@0
- port@1
- port@2
- port@3
required:
- rockchip,grf
- rockchip,vo1-grf
- rockchip,vop-grf
- rockchip,pmu
else:
properties:
rockchip,vo1-grf: false
rockchip,vop-grf: false
rockchip,pmu: false
clocks:
maxItems: 5
clock-names:
maxItems: 5
ports:
required:
- port@0
- port@1
- port@2
additionalProperties: false
examples:

View File

@ -23,6 +23,7 @@ properties:
compatible:
enum:
- ti,am625-dss
- ti,am62a7,dss
- ti,am65x-dss
reg:
@ -87,6 +88,7 @@ properties:
For AM65x DSS, the OLDI output port node from video port 1.
For AM625 DSS, the internal DPI output port node from video
port 1.
For AM62A7 DSS, the port is tied off inside the SoC.
port@1:
$ref: /schemas/graph.yaml#/properties/port
@ -108,6 +110,18 @@ properties:
Input memory (from main memory to dispc) bandwidth limit in
bytes per second
allOf:
- if:
properties:
compatible:
contains:
const: ti,am62a7-dss
then:
properties:
ports:
properties:
port@0: false
required:
- compatible
- reg

View File

@ -29,6 +29,7 @@ properties:
- allwinner,sun50i-a64-mali
- rockchip,rk3036-mali
- rockchip,rk3066-mali
- rockchip,rk3128-mali
- rockchip,rk3188-mali
- rockchip,rk3228-mali
- samsung,exynos4210-mali

View File

@ -17,6 +17,7 @@ properties:
compatible:
enum:
- brcm,2711-v3d
- brcm,2712-v3d
- brcm,7268-v3d
- brcm,7278-v3d

View File

@ -0,0 +1,73 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
# Copyright (c) 2023 Imagination Technologies Ltd.
%YAML 1.2
---
$id: http://devicetree.org/schemas/gpu/img,powervr.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Imagination Technologies PowerVR and IMG GPU
maintainers:
- Frank Binns <frank.binns@imgtec.com>
properties:
compatible:
items:
- enum:
- ti,am62-gpu
- const: img,img-axe # IMG AXE GPU model/revision is fully discoverable
reg:
maxItems: 1
clocks:
minItems: 1
maxItems: 3
clock-names:
items:
- const: core
- const: mem
- const: sys
minItems: 1
interrupts:
maxItems: 1
power-domains:
maxItems: 1
required:
- compatible
- reg
- clocks
- clock-names
- interrupts
additionalProperties: false
allOf:
- if:
properties:
compatible:
contains:
const: ti,am62-gpu
then:
properties:
clocks:
maxItems: 1
examples:
- |
#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/soc/ti,sci_pm_domain.h>
gpu@fd00000 {
compatible = "ti,am62-gpu", "img,img-axe";
reg = <0x0fd00000 0x20000>;
clocks = <&k3_clks 187 0>;
clock-names = "core";
interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
power-domains = <&k3_pds 187 TI_SCI_PD_EXCLUSIVE>;
};

View File

@ -478,6 +478,8 @@ patternProperties:
description: Fairphone B.V.
"^faraday,.*":
description: Faraday Technology Corporation
"^fascontek,.*":
description: Fascontek
"^fastrax,.*":
description: Fastrax Oy
"^fcs,.*":

View File

@ -7,6 +7,7 @@ SteamDeck, VANGOGH, DCN 3.0.1, 10.3.1, VCN 3.1.0, 5.2.1, 11.5.0
Ryzen 5000 series / Ryzen 7x30 series, GREEN SARDINE / Cezanne / Barcelo / Barcelo-R, DCN 2.1, 9.3, VCN 2.2, 4.1.1, 12.0.1
Ryzen 6000 series / Ryzen 7x35 series / Ryzen 7x36 series, YELLOW CARP / Rembrandt / Rembrandt-R, 3.1.2, 10.3.3, VCN 3.1.1, 5.2.3, 13.0.3
Ryzen 7000 series (AM5), Raphael, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
Ryzen 7x45 series (FL1), / Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
Ryzen 7x45 series (FL1), Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8
Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
Ryzen 8x40 series, Hawk Point, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11

1 Product Name Code Reference DCN/DCE version GC version VCE/UVD/VCN version SDMA version MP0 version
7 Ryzen 5000 series / Ryzen 7x30 series GREEN SARDINE / Cezanne / Barcelo / Barcelo-R DCN 2.1 9.3 VCN 2.2 4.1.1 12.0.1
8 Ryzen 6000 series / Ryzen 7x35 series / Ryzen 7x36 series YELLOW CARP / Rembrandt / Rembrandt-R 3.1.2 10.3.3 VCN 3.1.1 5.2.3 13.0.3
9 Ryzen 7000 series (AM5) Raphael 3.1.5 10.3.6 3.1.2 5.2.6 13.0.5
10 Ryzen 7x45 series (FL1) / Dragon Range Dragon Range 3.1.5 10.3.6 3.1.2 5.2.6 13.0.5
11 Ryzen 7x20 series Mendocino 3.1.6 10.3.7 3.1.1 5.2.7 13.0.8
12 Ryzen 7x40 series Phoenix 3.1.4 11.0.1 / 11.0.4 4.0.2 6.0.1 13.0.4 / 13.0.11
13 Ryzen 8x40 series Hawk Point 3.1.4 11.0.1 / 11.0.4 4.0.2 6.0.1 13.0.4 / 13.0.11

View File

@ -75,3 +75,44 @@ change in real-time by using something like::
When reporting a bug related to DC, consider attaching this log before and
after you reproduce the bug.
DMUB Firmware Debug
===================
Sometimes, dmesg logs aren't enough. This is especially true if a feature is
implemented primarily in DMUB firmware. In such cases, all we see in dmesg when
an issue arises is some generic timeout error. So, to get more relevant
information, we can trace DMUB commands by enabling the relevant bits in
`amdgpu_dm_dmub_trace_mask`.
Currently, we support the tracing of the following groups:
Trace Groups
------------
.. csv-table::
:header-rows: 1
:widths: 1, 1
:file: ./trace-groups-table.csv
**Note: Not all ASICs support all of the listed trace groups**
So, to enable just PSR tracing you can use the following command::
# echo 0x8020 > /sys/kernel/debug/dri/0/amdgpu_dm_dmub_trace_mask
Then, you need to enable logging trace events to the buffer, which you can do
using the following::
# echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
Lastly, after you are able to reproduce the issue you are trying to debug,
you can disable tracing and read the trace log by using the following::
# echo 0 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
# cat /sys/kernel/debug/dri/0/amdgpu_dm_dmub_tracebuffer
So, when reporting bugs related to features such as PSR and ABM, consider
enabling the relevant bits in the mask before reproducing the issue and
attach the log that you obtain from the trace buffer in any bug reports that you
create.

View File

@ -0,0 +1,29 @@
Name, Mask Value
INFO, 0x1
IRQ SVC, 0x2
VBIOS, 0x4
REGISTER, 0x8
PHY DBG, 0x10
PSR, 0x20
AUX, 0x40
SMU, 0x80
MALL, 0x100
ABM, 0x200
ALPM, 0x400
TIMER, 0x800
HW LOCK MGR, 0x1000
INBOX1, 0x2000
PHY SEQ, 0x4000
PSR STATE, 0x8000
ZSTATE, 0x10000
TRANSMITTER CTL, 0x20000
PANEL CNTL, 0x40000
FAMS, 0x80000
DPIA, 0x100000
SUBVP, 0x200000
INBOX0, 0x400000
SDP, 0x4000000
REPLAY, 0x8000000
REPLAY RESIDENCY, 0x20000000
CURSOR INFO, 0x80000000
IPS, 0x100000000
1 Name Mask Value
2 INFO 0x1
3 IRQ SVC 0x2
4 VBIOS 0x4
5 REGISTER 0x8
6 PHY DBG 0x10
7 PSR 0x20
8 AUX 0x40
9 SMU 0x80
10 MALL 0x100
11 ABM 0x200
12 ALPM 0x400
13 TIMER 0x800
14 HW LOCK MGR 0x1000
15 INBOX1 0x2000
16 PHY SEQ 0x4000
17 PSR STATE 0x8000
18 ZSTATE 0x10000
19 TRANSMITTER CTL 0x20000
20 PANEL CNTL 0x40000
21 FAMS 0x80000
22 DPIA 0x100000
23 SUBVP 0x200000
24 INBOX0 0x400000
25 SDP 0x4000000
26 REPLAY 0x8000000
27 REPLAY RESIDENCY 0x20000000
28 CURSOR INFO 0x80000000
29 IPS 0x100000000

View File

@ -69,14 +69,15 @@ the result. They will still be run.
Each new flake entry must be associated with a link to the email reporting the
bug to the author of the affected driver, the board name or Device Tree name of
the board, the first kernel version affected, and an approximation of the
failure rate.
the board, the first kernel version affected, the IGT version used for tests,
and an approximation of the failure rate.
They should be provided under the following format::
# Bug Report: $LORE_OR_PATCHWORK_URL
# Board Name: broken-board.dtb
# Version: 6.6-rc1
# Linux Version: 6.6-rc1
# IGT Version: 1.28-gd2af13d9f
# Failure Rate: 100
flaky-test

View File

@ -17,3 +17,8 @@ VM_BIND / EXEC uAPI
:doc: Overview
.. kernel-doc:: include/uapi/drm/nouveau_drm.h
drm/xe uAPI
===========
.. kernel-doc:: include/uapi/drm/xe_drm.h

View File

@ -3,9 +3,11 @@ GPU Driver Documentation
========================
.. toctree::
:maxdepth: 3
amdgpu/index
i915
imagination/index
mcde
meson
pl111
@ -16,6 +18,7 @@ GPU Driver Documentation
vkms
bridge/dw-hdmi
xen-front
xe/index
afbc
komeda-kms
panfrost

View File

@ -363,6 +363,12 @@ EDID Helper Functions Reference
.. kernel-doc:: drivers/gpu/drm/drm_edid.c
:export:
.. kernel-doc:: include/drm/drm_eld.h
:internal:
.. kernel-doc:: drivers/gpu/drm/drm_eld.c
:export:
SCDC Helper Functions Reference
===============================

View File

@ -548,6 +548,8 @@ Plane Composition Properties
.. kernel-doc:: drivers/gpu/drm/drm_blend.c
:doc: overview
.. _damage_tracking_properties:
Damage Tracking Properties
--------------------------
@ -579,6 +581,12 @@ Variable Refresh Properties
.. kernel-doc:: drivers/gpu/drm/drm_connector.c
:doc: Variable refresh properties
Cursor Hotspot Properties
---------------------------
.. kernel-doc:: drivers/gpu/drm/drm_plane.c
:doc: hotspot properties
Existing KMS Properties
-----------------------

View File

@ -466,6 +466,8 @@ DRM MM Range Allocator Function References
.. kernel-doc:: drivers/gpu/drm/drm_mm.c
:export:
.. _drm_gpuvm:
DRM GPUVM
=========
@ -481,6 +483,8 @@ Split and Merge
.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
:doc: Split and Merge
.. _drm_gpuvm_locking:
Locking
-------
@ -552,6 +556,12 @@ Overview
.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
:doc: Overview
Flow Control
------------
.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
:doc: Flow Control
Scheduler Function References
-----------------------------

View File

@ -0,0 +1,582 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
===============
VM_BIND locking
===============
This document attempts to describe what's needed to get VM_BIND locking right,
including the userptr mmu_notifier locking. It also discusses some
optimizations to get rid of the looping through of all userptr mappings and
external / shared object mappings that is needed in the simplest
implementation. In addition, there is a section describing the VM_BIND locking
required for implementing recoverable pagefaults.
The DRM GPUVM set of helpers
============================
There is a set of helpers for drivers implementing VM_BIND, and this
set of helpers implements much, but not all of the locking described
in this document. In particular, it is currently lacking a userptr
implementation. This document does not intend to describe the DRM GPUVM
implementation in detail, but it is covered in :ref:`its own
documentation <drm_gpuvm>`. It is highly recommended for any driver
implementing VM_BIND to use the DRM GPUVM helpers and to extend it if
common functionality is missing.
Nomenclature
============
* ``gpu_vm``: Abstraction of a virtual GPU address space with
meta-data. Typically one per client (DRM file-private), or one per
execution context.
* ``gpu_vma``: Abstraction of a GPU address range within a gpu_vm with
associated meta-data. The backing storage of a gpu_vma can either be
a GEM object or anonymous or page-cache pages mapped also into the CPU
address space for the process.
* ``gpu_vm_bo``: Abstracts the association of a GEM object and
a VM. The GEM object maintains a list of gpu_vm_bos, where each gpu_vm_bo
maintains a list of gpu_vmas.
* ``userptr gpu_vma or just userptr``: A gpu_vma, whose backing store
is anonymous or page-cache pages as described above.
* ``revalidating``: Revalidating a gpu_vma means making the latest version
of the backing store resident and making sure the gpu_vma's
page-table entries point to that backing store.
* ``dma_fence``: A struct dma_fence that is similar to a struct completion
and which tracks GPU activity. When the GPU activity is finished,
the dma_fence signals. Please refer to the ``DMA Fences`` section of
the :doc:`dma-buf doc </driver-api/dma-buf>`.
* ``dma_resv``: A struct dma_resv (a.k.a reservation object) that is used
to track GPU activity in the form of multiple dma_fences on a
gpu_vm or a GEM object. The dma_resv contains an array / list
of dma_fences and a lock that needs to be held when adding
additional dma_fences to the dma_resv. The lock is of a type that
allows deadlock-safe locking of multiple dma_resvs in arbitrary
order. Please refer to the ``Reservation Objects`` section of the
:doc:`dma-buf doc </driver-api/dma-buf>`.
* ``exec function``: An exec function is a function that revalidates all
affected gpu_vmas, submits a GPU command batch and registers the
dma_fence representing the GPU command's activity with all affected
dma_resvs. For completeness, although not covered by this document,
it's worth mentioning that an exec function may also be the
revalidation worker that is used by some drivers in compute /
long-running mode.
* ``local object``: A GEM object which is only mapped within a
single VM. Local GEM objects share the gpu_vm's dma_resv.
* ``external object``: a.k.a shared object: A GEM object which may be shared
by multiple gpu_vms and whose backing storage may be shared with
other drivers.
Locks and locking order
=======================
One of the benefits of VM_BIND is that local GEM objects share the gpu_vm's
dma_resv object and hence the dma_resv lock. So, even with a huge
number of local GEM objects, only one lock is needed to make the exec
sequence atomic.
The following locks and locking orders are used:
* The ``gpu_vm->lock`` (optionally an rwsem). Protects the gpu_vm's
data structure keeping track of gpu_vmas. It can also protect the
gpu_vm's list of userptr gpu_vmas. With a CPU mm analogy this would
correspond to the mmap_lock. An rwsem allows several readers to walk
the VM tree concurrently, but the benefit of that concurrency most
likely varies from driver to driver.
* The ``userptr_seqlock``. This lock is taken in read mode for each
userptr gpu_vma on the gpu_vm's userptr list, and in write mode during mmu
notifier invalidation. This is not a real seqlock but described in
``mm/mmu_notifier.c`` as a "Collision-retry read-side/write-side
'lock' a lot like a seqcount. However this allows multiple
write-sides to hold it at once...". The read side critical section
is enclosed by ``mmu_interval_read_begin() /
mmu_interval_read_retry()`` with ``mmu_interval_read_begin()``
sleeping if the write side is held.
The write side is held by the core mm while calling mmu interval
invalidation notifiers.
* The ``gpu_vm->resv`` lock. Protects the gpu_vm's list of gpu_vmas needing
rebinding, as well as the residency state of all the gpu_vm's local
GEM objects.
Furthermore, it typically protects the gpu_vm's list of evicted and
external GEM objects.
* The ``gpu_vm->userptr_notifier_lock``. This is an rwsem that is
taken in read mode during exec and write mode during a mmu notifier
invalidation. The userptr notifier lock is per gpu_vm.
* The ``gem_object->gpuva_lock`` This lock protects the GEM object's
list of gpu_vm_bos. This is usually the same lock as the GEM
object's dma_resv, but some drivers protects this list differently,
see below.
* The ``gpu_vm list spinlocks``. With some implementations they are needed
to be able to update the gpu_vm evicted- and external object
list. For those implementations, the spinlocks are grabbed when the
lists are manipulated. However, to avoid locking order violations
with the dma_resv locks, a special scheme is needed when iterating
over the lists.
.. _gpu_vma lifetime:
Protection and lifetime of gpu_vm_bos and gpu_vmas
==================================================
The GEM object's list of gpu_vm_bos, and the gpu_vm_bo's list of gpu_vmas
is protected by the ``gem_object->gpuva_lock``, which is typically the
same as the GEM object's dma_resv, but if the driver
needs to access these lists from within a dma_fence signalling
critical section, it can instead choose to protect it with a
separate lock, which can be locked from within the dma_fence signalling
critical section. Such drivers then need to pay additional attention
to what locks need to be taken from within the loop when iterating
over the gpu_vm_bo and gpu_vma lists to avoid locking-order violations.
The DRM GPUVM set of helpers provide lockdep asserts that this lock is
held in relevant situations and also provides a means of making itself
aware of which lock is actually used: :c:func:`drm_gem_gpuva_set_lock`.
Each gpu_vm_bo holds a reference counted pointer to the underlying GEM
object, and each gpu_vma holds a reference counted pointer to the
gpu_vm_bo. When iterating over the GEM object's list of gpu_vm_bos and
over the gpu_vm_bo's list of gpu_vmas, the ``gem_object->gpuva_lock`` must
not be dropped, otherwise, gpu_vmas attached to a gpu_vm_bo may
disappear without notice since those are not reference-counted. A
driver may implement its own scheme to allow this at the expense of
additional complexity, but this is outside the scope of this document.
In the DRM GPUVM implementation, each gpu_vm_bo and each gpu_vma
holds a reference count on the gpu_vm itself. Due to this, and to avoid circular
reference counting, cleanup of the gpu_vm's gpu_vmas must not be done from the
gpu_vm's destructor. Drivers typically implements a gpu_vm close
function for this cleanup. The gpu_vm close function will abort gpu
execution using this VM, unmap all gpu_vmas and release page-table memory.
Revalidation and eviction of local objects
==========================================
Note that in all the code examples given below we use simplified
pseudo-code. In particular, the dma_resv deadlock avoidance algorithm
as well as reserving memory for dma_resv fences is left out.
Revalidation
____________
With VM_BIND, all local objects need to be resident when the gpu is
executing using the gpu_vm, and the objects need to have valid
gpu_vmas set up pointing to them. Typically, each gpu command buffer
submission is therefore preceded with a re-validation section:
.. code-block:: C
dma_resv_lock(gpu_vm->resv);
// Validation section starts here.
for_each_gpu_vm_bo_on_evict_list(&gpu_vm->evict_list, &gpu_vm_bo) {
validate_gem_bo(&gpu_vm_bo->gem_bo);
// The following list iteration needs the Gem object's
// dma_resv to be held (it protects the gpu_vm_bo's list of
// gpu_vmas, but since local gem objects share the gpu_vm's
// dma_resv, it is already held at this point.
for_each_gpu_vma_of_gpu_vm_bo(&gpu_vm_bo, &gpu_vma)
move_gpu_vma_to_rebind_list(&gpu_vma, &gpu_vm->rebind_list);
}
for_each_gpu_vma_on_rebind_list(&gpu vm->rebind_list, &gpu_vma) {
rebind_gpu_vma(&gpu_vma);
remove_gpu_vma_from_rebind_list(&gpu_vma);
}
// Validation section ends here, and job submission starts.
add_dependencies(&gpu_job, &gpu_vm->resv);
job_dma_fence = gpu_submit(&gpu_job));
add_dma_fence(job_dma_fence, &gpu_vm->resv);
dma_resv_unlock(gpu_vm->resv);
The reason for having a separate gpu_vm rebind list is that there
might be userptr gpu_vmas that are not mapping a buffer object that
also need rebinding.
Eviction
________
Eviction of one of these local objects will then look similar to the
following:
.. code-block:: C
obj = get_object_from_lru();
dma_resv_lock(obj->resv);
for_each_gpu_vm_bo_of_obj(obj, &gpu_vm_bo);
add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);
add_dependencies(&eviction_job, &obj->resv);
job_dma_fence = gpu_submit(&eviction_job);
add_dma_fence(&obj->resv, job_dma_fence);
dma_resv_unlock(&obj->resv);
put_object(obj);
Note that since the object is local to the gpu_vm, it will share the gpu_vm's
dma_resv lock such that ``obj->resv == gpu_vm->resv``.
The gpu_vm_bos marked for eviction are put on the gpu_vm's evict list,
which is protected by ``gpu_vm->resv``. During eviction all local
objects have their dma_resv locked and, due to the above equality, also
the gpu_vm's dma_resv protecting the gpu_vm's evict list is locked.
With VM_BIND, gpu_vmas don't need to be unbound before eviction,
since the driver must ensure that the eviction blit or copy will wait
for GPU idle or depend on all previous GPU activity. Furthermore, any
subsequent attempt by the GPU to access freed memory through the
gpu_vma will be preceded by a new exec function, with a revalidation
section which will make sure all gpu_vmas are rebound. The eviction
code holding the object's dma_resv while revalidating will ensure a
new exec function may not race with the eviction.
A driver can be implemented in such a way that, on each exec function,
only a subset of vmas are selected for rebind. In this case, all vmas that are
*not* selected for rebind must be unbound before the exec
function workload is submitted.
Locking with external buffer objects
====================================
Since external buffer objects may be shared by multiple gpu_vm's they
can't share their reservation object with a single gpu_vm. Instead
they need to have a reservation object of their own. The external
objects bound to a gpu_vm using one or many gpu_vmas are therefore put on a
per-gpu_vm list which is protected by the gpu_vm's dma_resv lock or
one of the :ref:`gpu_vm list spinlocks <Spinlock iteration>`. Once
the gpu_vm's reservation object is locked, it is safe to traverse the
external object list and lock the dma_resvs of all external
objects. However, if instead a list spinlock is used, a more elaborate
iteration scheme needs to be used.
At eviction time, the gpu_vm_bos of *all* the gpu_vms an external
object is bound to need to be put on their gpu_vm's evict list.
However, when evicting an external object, the dma_resvs of the
gpu_vms the object is bound to are typically not held. Only
the object's private dma_resv can be guaranteed to be held. If there
is a ww_acquire context at hand at eviction time we could grab those
dma_resvs but that could cause expensive ww_mutex rollbacks. A simple
option is to just mark the gpu_vm_bos of the evicted gem object with
an ``evicted`` bool that is inspected before the next time the
corresponding gpu_vm evicted list needs to be traversed. For example, when
traversing the list of external objects and locking them. At that time,
both the gpu_vm's dma_resv and the object's dma_resv is held, and the
gpu_vm_bo marked evicted, can then be added to the gpu_vm's list of
evicted gpu_vm_bos. The ``evicted`` bool is formally protected by the
object's dma_resv.
The exec function becomes
.. code-block:: C
dma_resv_lock(gpu_vm->resv);
// External object list is protected by the gpu_vm->resv lock.
for_each_gpu_vm_bo_on_extobj_list(gpu_vm, &gpu_vm_bo) {
dma_resv_lock(gpu_vm_bo.gem_obj->resv);
if (gpu_vm_bo_marked_evicted(&gpu_vm_bo))
add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);
}
for_each_gpu_vm_bo_on_evict_list(&gpu_vm->evict_list, &gpu_vm_bo) {
validate_gem_bo(&gpu_vm_bo->gem_bo);
for_each_gpu_vma_of_gpu_vm_bo(&gpu_vm_bo, &gpu_vma)
move_gpu_vma_to_rebind_list(&gpu_vma, &gpu_vm->rebind_list);
}
for_each_gpu_vma_on_rebind_list(&gpu vm->rebind_list, &gpu_vma) {
rebind_gpu_vma(&gpu_vma);
remove_gpu_vma_from_rebind_list(&gpu_vma);
}
add_dependencies(&gpu_job, &gpu_vm->resv);
job_dma_fence = gpu_submit(&gpu_job));
add_dma_fence(job_dma_fence, &gpu_vm->resv);
for_each_external_obj(gpu_vm, &obj)
add_dma_fence(job_dma_fence, &obj->resv);
dma_resv_unlock_all_resv_locks();
And the corresponding shared-object aware eviction would look like:
.. code-block:: C
obj = get_object_from_lru();
dma_resv_lock(obj->resv);
for_each_gpu_vm_bo_of_obj(obj, &gpu_vm_bo)
if (object_is_vm_local(obj))
add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);
else
mark_gpu_vm_bo_evicted(&gpu_vm_bo);
add_dependencies(&eviction_job, &obj->resv);
job_dma_fence = gpu_submit(&eviction_job);
add_dma_fence(&obj->resv, job_dma_fence);
dma_resv_unlock(&obj->resv);
put_object(obj);
.. _Spinlock iteration:
Accessing the gpu_vm's lists without the dma_resv lock held
===========================================================
Some drivers will hold the gpu_vm's dma_resv lock when accessing the
gpu_vm's evict list and external objects lists. However, there are
drivers that need to access these lists without the dma_resv lock
held, for example due to asynchronous state updates from within the
dma_fence signalling critical path. In such cases, a spinlock can be
used to protect manipulation of the lists. However, since higher level
sleeping locks need to be taken for each list item while iterating
over the lists, the items already iterated over need to be
temporarily moved to a private list and the spinlock released
while processing each item:
.. code block:: C
struct list_head still_in_list;
INIT_LIST_HEAD(&still_in_list);
spin_lock(&gpu_vm->list_lock);
do {
struct list_head *entry = list_first_entry_or_null(&gpu_vm->list, head);
if (!entry)
break;
list_move_tail(&entry->head, &still_in_list);
list_entry_get_unless_zero(entry);
spin_unlock(&gpu_vm->list_lock);
process(entry);
spin_lock(&gpu_vm->list_lock);
list_entry_put(entry);
} while (true);
list_splice_tail(&still_in_list, &gpu_vm->list);
spin_unlock(&gpu_vm->list_lock);
Due to the additional locking and atomic operations, drivers that *can*
avoid accessing the gpu_vm's list outside of the dma_resv lock
might want to avoid also this iteration scheme. Particularly, if the
driver anticipates a large number of list items. For lists where the
anticipated number of list items is small, where list iteration doesn't
happen very often or if there is a significant additional cost
associated with each iteration, the atomic operation overhead
associated with this type of iteration is, most likely, negligible. Note that
if this scheme is used, it is necessary to make sure this list
iteration is protected by an outer level lock or semaphore, since list
items are temporarily pulled off the list while iterating, and it is
also worth mentioning that the local list ``still_in_list`` should
also be considered protected by the ``gpu_vm->list_lock``, and it is
thus possible that items can be removed also from the local list
concurrently with list iteration.
Please refer to the :ref:`DRM GPUVM locking section
<drm_gpuvm_locking>` and its internal
:c:func:`get_next_vm_bo_from_list` function.
userptr gpu_vmas
================
A userptr gpu_vma is a gpu_vma that, instead of mapping a buffer object to a
GPU virtual address range, directly maps a CPU mm range of anonymous-
or file page-cache pages.
A very simple approach would be to just pin the pages using
pin_user_pages() at bind time and unpin them at unbind time, but this
creates a Denial-Of-Service vector since a single user-space process
would be able to pin down all of system memory, which is not
desirable. (For special use-cases and assuming proper accounting pinning might
still be a desirable feature, though). What we need to do in the
general case is to obtain a reference to the desired pages, make sure
we are notified using a MMU notifier just before the CPU mm unmaps the
pages, dirty them if they are not mapped read-only to the GPU, and
then drop the reference.
When we are notified by the MMU notifier that CPU mm is about to drop the
pages, we need to stop GPU access to the pages by waiting for VM idle
in the MMU notifier and make sure that before the next time the GPU
tries to access whatever is now present in the CPU mm range, we unmap
the old pages from the GPU page tables and repeat the process of
obtaining new page references. (See the :ref:`notifier example
<Invalidation example>` below). Note that when the core mm decides to
laundry pages, we get such an unmap MMU notification and can mark the
pages dirty again before the next GPU access. We also get similar MMU
notifications for NUMA accounting which the GPU driver doesn't really
need to care about, but so far it has proven difficult to exclude
certain notifications.
Using a MMU notifier for device DMA (and other methods) is described in
:ref:`the pin_user_pages() documentation <mmu-notifier-registration-case>`.
Now, the method of obtaining struct page references using
get_user_pages() unfortunately can't be used under a dma_resv lock
since that would violate the locking order of the dma_resv lock vs the
mmap_lock that is grabbed when resolving a CPU pagefault. This means
the gpu_vm's list of userptr gpu_vmas needs to be protected by an
outer lock, which in our example below is the ``gpu_vm->lock``.
The MMU interval seqlock for a userptr gpu_vma is used in the following
way:
.. code-block:: C
// Exclusive locking mode here is strictly needed only if there are
// invalidated userptr gpu_vmas present, to avoid concurrent userptr
// revalidations of the same userptr gpu_vma.
down_write(&gpu_vm->lock);
retry:
// Note: mmu_interval_read_begin() blocks until there is no
// invalidation notifier running anymore.
seq = mmu_interval_read_begin(&gpu_vma->userptr_interval);
if (seq != gpu_vma->saved_seq) {
obtain_new_page_pointers(&gpu_vma);
dma_resv_lock(&gpu_vm->resv);
add_gpu_vma_to_revalidate_list(&gpu_vma, &gpu_vm);
dma_resv_unlock(&gpu_vm->resv);
gpu_vma->saved_seq = seq;
}
// The usual revalidation goes here.
// Final userptr sequence validation may not happen before the
// submission dma_fence is added to the gpu_vm's resv, from the POW
// of the MMU invalidation notifier. Hence the
// userptr_notifier_lock that will make them appear atomic.
add_dependencies(&gpu_job, &gpu_vm->resv);
down_read(&gpu_vm->userptr_notifier_lock);
if (mmu_interval_read_retry(&gpu_vma->userptr_interval, gpu_vma->saved_seq)) {
up_read(&gpu_vm->userptr_notifier_lock);
goto retry;
}
job_dma_fence = gpu_submit(&gpu_job));
add_dma_fence(job_dma_fence, &gpu_vm->resv);
for_each_external_obj(gpu_vm, &obj)
add_dma_fence(job_dma_fence, &obj->resv);
dma_resv_unlock_all_resv_locks();
up_read(&gpu_vm->userptr_notifier_lock);
up_write(&gpu_vm->lock);
The code between ``mmu_interval_read_begin()`` and the
``mmu_interval_read_retry()`` marks the read side critical section of
what we call the ``userptr_seqlock``. In reality, the gpu_vm's userptr
gpu_vma list is looped through, and the check is done for *all* of its
userptr gpu_vmas, although we only show a single one here.
The userptr gpu_vma MMU invalidation notifier might be called from
reclaim context and, again, to avoid locking order violations, we can't
take any dma_resv lock nor the gpu_vm->lock from within it.
.. _Invalidation example:
.. code-block:: C
bool gpu_vma_userptr_invalidate(userptr_interval, cur_seq)
{
// Make sure the exec function either sees the new sequence
// and backs off or we wait for the dma-fence:
down_write(&gpu_vm->userptr_notifier_lock);
mmu_interval_set_seq(userptr_interval, cur_seq);
up_write(&gpu_vm->userptr_notifier_lock);
// At this point, the exec function can't succeed in
// submitting a new job, because cur_seq is an invalid
// sequence number and will always cause a retry. When all
// invalidation callbacks, the mmu notifier core will flip
// the sequence number to a valid one. However we need to
// stop gpu access to the old pages here.
dma_resv_wait_timeout(&gpu_vm->resv, DMA_RESV_USAGE_BOOKKEEP,
false, MAX_SCHEDULE_TIMEOUT);
return true;
}
When this invalidation notifier returns, the GPU can no longer be
accessing the old pages of the userptr gpu_vma and needs to redo the
page-binding before a new GPU submission can succeed.
Efficient userptr gpu_vma exec_function iteration
_________________________________________________
If the gpu_vm's list of userptr gpu_vmas becomes large, it's
inefficient to iterate through the complete lists of userptrs on each
exec function to check whether each userptr gpu_vma's saved
sequence number is stale. A solution to this is to put all
*invalidated* userptr gpu_vmas on a separate gpu_vm list and
only check the gpu_vmas present on this list on each exec
function. This list will then lend itself very-well to the spinlock
locking scheme that is
:ref:`described in the spinlock iteration section <Spinlock iteration>`, since
in the mmu notifier, where we add the invalidated gpu_vmas to the
list, it's not possible to take any outer locks like the
``gpu_vm->lock`` or the ``gpu_vm->resv`` lock. Note that the
``gpu_vm->lock`` still needs to be taken while iterating to ensure the list is
complete, as also mentioned in that section.
If using an invalidated userptr list like this, the retry check in the
exec function trivially becomes a check for invalidated list empty.
Locking at bind and unbind time
===============================
At bind time, assuming a GEM object backed gpu_vma, each
gpu_vma needs to be associated with a gpu_vm_bo and that
gpu_vm_bo in turn needs to be added to the GEM object's
gpu_vm_bo list, and possibly to the gpu_vm's external object
list. This is referred to as *linking* the gpu_vma, and typically
requires that the ``gpu_vm->lock`` and the ``gem_object->gpuva_lock``
are held. When unlinking a gpu_vma the same locks should be held,
and that ensures that when iterating over ``gpu_vmas`, either under
the ``gpu_vm->resv`` or the GEM object's dma_resv, that the gpu_vmas
stay alive as long as the lock under which we iterate is not released. For
userptr gpu_vmas it's similarly required that during vma destroy, the
outer ``gpu_vm->lock`` is held, since otherwise when iterating over
the invalidated userptr list as described in the previous section,
there is nothing keeping those userptr gpu_vmas alive.
Locking for recoverable page-fault page-table updates
=====================================================
There are two important things we need to ensure with locking for
recoverable page-faults:
* At the time we return pages back to the system / allocator for
reuse, there should be no remaining GPU mappings and any GPU TLB
must have been flushed.
* The unmapping and mapping of a gpu_vma must not race.
Since the unmapping (or zapping) of GPU ptes is typically taking place
where it is hard or even impossible to take any outer level locks we
must either introduce a new lock that is held at both mapping and
unmapping time, or look at the locks we do hold at unmapping time and
make sure that they are held also at mapping time. For userptr
gpu_vmas, the ``userptr_seqlock`` is held in write mode in the mmu
invalidation notifier where zapping happens. Hence, if the
``userptr_seqlock`` as well as the ``gpu_vm->userptr_notifier_lock``
is held in read mode during mapping, it will not race with the
zapping. For GEM object backed gpu_vmas, zapping will take place under
the GEM object's dma_resv and ensuring that the dma_resv is held also
when populating the page-tables for any gpu_vma pointing to the GEM
object, will similarly ensure we are race-free.
If any part of the mapping is performed asynchronously
under a dma-fence with these locks released, the zapping will need to
wait for that dma-fence to signal under the relevant lock before
starting to modify the page-table.
Since modifying the
page-table structure in a way that frees up page-table memory
might also require outer level locks, the zapping of GPU ptes
typically focuses only on zeroing page-table or page-directory entries
and flushing TLB, whereas freeing of page-table memory is deferred to
unbind or rebind time.

View File

@ -0,0 +1,13 @@
=======================================
drm/imagination PowerVR Graphics Driver
=======================================
.. kernel-doc:: drivers/gpu/drm/imagination/pvr_drv.c
:doc: PowerVR (Series 6 and later) and IMG Graphics Driver
Contents
========
.. toctree::
:maxdepth: 2
uapi

View File

@ -0,0 +1,171 @@
====
UAPI
====
The sources associated with this section can be found in ``pvr_drm.h``.
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: PowerVR UAPI
OBJECT ARRAYS
=============
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_obj_array
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: DRM_PVR_OBJ_ARRAY
IOCTLS
======
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: PowerVR IOCTL interface
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: PVR_IOCTL
DEV_QUERY
---------
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: PowerVR IOCTL DEV_QUERY interface
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_dev_query
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_ioctl_dev_query_args
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_dev_query_gpu_info
drm_pvr_dev_query_runtime_info
drm_pvr_dev_query_hwrt_info
drm_pvr_dev_query_quirks
drm_pvr_dev_query_enhancements
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_heap_id
drm_pvr_heap
drm_pvr_dev_query_heap_info
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_static_data_area_usage
drm_pvr_static_data_area
drm_pvr_dev_query_static_data_areas
CREATE_BO
---------
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: PowerVR IOCTL CREATE_BO interface
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_ioctl_create_bo_args
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: Flags for CREATE_BO
GET_BO_MMAP_OFFSET
------------------
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: PowerVR IOCTL GET_BO_MMAP_OFFSET interface
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_ioctl_get_bo_mmap_offset_args
CREATE_VM_CONTEXT and DESTROY_VM_CONTEXT
----------------------------------------
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: PowerVR IOCTL CREATE_VM_CONTEXT and DESTROY_VM_CONTEXT interfaces
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_ioctl_create_vm_context_args
drm_pvr_ioctl_destroy_vm_context_args
VM_MAP and VM_UNMAP
-------------------
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: PowerVR IOCTL VM_MAP and VM_UNMAP interfaces
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_ioctl_vm_map_args
drm_pvr_ioctl_vm_unmap_args
CREATE_CONTEXT and DESTROY_CONTEXT
----------------------------------
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: PowerVR IOCTL CREATE_CONTEXT and DESTROY_CONTEXT interfaces
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_ioctl_create_context_args
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_ctx_priority
drm_pvr_ctx_type
drm_pvr_static_render_context_state
drm_pvr_static_render_context_state_format
drm_pvr_reset_framework
drm_pvr_reset_framework_format
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_ioctl_destroy_context_args
CREATE_FREE_LIST and DESTROY_FREE_LIST
--------------------------------------
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: PowerVR IOCTL CREATE_FREE_LIST and DESTROY_FREE_LIST interfaces
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_ioctl_create_free_list_args
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_ioctl_destroy_free_list_args
CREATE_HWRT_DATASET and DESTROY_HWRT_DATASET
--------------------------------------------
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: PowerVR IOCTL CREATE_HWRT_DATASET and DESTROY_HWRT_DATASET interfaces
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_ioctl_create_hwrt_dataset_args
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_create_hwrt_geom_data_args
drm_pvr_create_hwrt_rt_data_args
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_ioctl_destroy_hwrt_dataset_args
SUBMIT_JOBS
-----------
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: PowerVR IOCTL SUBMIT_JOBS interface
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: Flags for the drm_pvr_sync_op object.
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_ioctl_submit_jobs_args
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: Flags for SUBMIT_JOB ioctl geometry command.
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: Flags for SUBMIT_JOB ioctl fragment command.
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: Flags for SUBMIT_JOB ioctl compute command.
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:doc: Flags for SUBMIT_JOB ioctl transfer command.
.. kernel-doc:: include/uapi/drm/pvr_drm.h
:identifiers: drm_pvr_sync_op
drm_pvr_job_type
drm_pvr_hwrt_data_ref
drm_pvr_job
Internal notes
==============
.. kernel-doc:: drivers/gpu/drm/imagination/pvr_device.h
:doc: IOCTL validation helpers
.. kernel-doc:: drivers/gpu/drm/imagination/pvr_device.h
:identifiers: PVR_STATIC_ASSERT_64BIT_ALIGNED PVR_IOCTL_UNION_PADDING_CHECK
pvr_ioctl_union_padding_check

View File

@ -7,3 +7,4 @@ Misc DRM driver uAPI- and feature implementation guidelines
.. toctree::
drm-vm-bind-async
drm-vm-bind-locking

View File

@ -70,35 +70,42 @@ When the time comes for Xe, the protection will be lifted on Xe and kept in i915
Xe Pre-Merge Goals - Work-in-Progress
=======================================
Drm_scheduler
-------------
Xe primarily uses Firmware based scheduling (GuC FW). However, it will use
drm_scheduler as the scheduler frontend for userspace submission in order to
resolve syncobj and dma-buf implicit sync dependencies. However, drm_scheduler is
not yet prepared to handle the 1-to-1 relationship between drm_gpu_scheduler and
drm_sched_entity.
Display integration with i915
-----------------------------
In order to share the display code with the i915 driver so that there is maximum
reuse, the i915/display/ code is built twice, once for i915.ko and then for
xe.ko. Currently, the i915/display code in Xe tree is polluted with many 'ifdefs'
depending on the build target. The goal is to refactor both Xe and i915/display
code simultaneously in order to get a clean result before they land upstream, so
that display can already be part of the initial pull request towards drm-next.
Deeper changes to drm_scheduler should *not* be required to get Xe accepted, but
some consensus needs to be reached between Xe and other community drivers that
could also benefit from this work, for coupling FW based/assisted submission such
as the ARMs new Mali GPU driver, and others.
However, display code should not gate the acceptance of Xe in upstream. Xe
patches will be refactored in a way that display code can be removed, if needed,
from the first pull request of Xe towards drm-next. The expectation is that when
both drivers are part of the drm-tip, the introduction of cleaner patches will be
easier and speed up.
As a key measurable result, the patch series introducing Xe itself shall not
depend on any other patch touching drm_scheduler itself that was not yet merged
through drm-misc. This, by itself, already includes the reach of an agreement for
uniform 1 to 1 relationship implementation / usage across drivers.
Xe uAPI high level overview
=============================
ASYNC VM_BIND
-------------
Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
Xe merged, it is mandatory to have a consensus with other drivers and Mesa.
It needs to be clear how to handle async VM_BIND and interactions with userspace
memory fences. Ideally with helper support so people don't get it wrong in all
possible ways.
...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
As a key measurable result, the benefits of ASYNC VM_BIND and a discussion of
various flavors, error handling and sample API suggestions are documented in
:doc:`The ASYNC VM_BIND document </gpu/drm-vm-bind-async>`.
Xe Pre-Merge Goals - Completed
================================
Drm_exec
--------
Helper to make dma_resv locking for a big number of buffers is getting removed in
the drm_exec series proposed in https://patchwork.freedesktop.org/patch/524376/
If that happens, Xe needs to change and incorporate the changes in the driver.
The goal is to engage with the Community to understand if the best approach is to
move that to the drivers that are using it or if we should keep the helpers in
place waiting for Xe to get merged.
This item ties into the GPUVA, VM_BIND, and even long-running compute support.
As a key measurable result, we need to have a community consensus documented in
this document and the Xe driver prepared for the changes, if necessary.
Userptr integration and vm_bind
-------------------------------
@ -123,10 +130,45 @@ Documentation should include:
* O(1) complexity under VM_BIND.
The document is now included in the drm documentation :doc:`here </gpu/drm-vm-bind-async>`.
Some parts of userptr like mmu_notifiers should become GPUVA or DRM helpers when
the second driver supporting VM_BIND+userptr appears. Details to be defined when
the time comes.
The DRM GPUVM helpers do not yet include the userptr parts, but discussions
about implementing them are ongoing.
ASYNC VM_BIND
-------------
Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
Xe merged, it is mandatory to have a consensus with other drivers and Mesa.
It needs to be clear how to handle async VM_BIND and interactions with userspace
memory fences. Ideally with helper support so people don't get it wrong in all
possible ways.
As a key measurable result, the benefits of ASYNC VM_BIND and a discussion of
various flavors, error handling and sample API suggestions are documented in
:doc:`The ASYNC VM_BIND document </gpu/drm-vm-bind-async>`.
Drm_scheduler
-------------
Xe primarily uses Firmware based scheduling (GuC FW). However, it will use
drm_scheduler as the scheduler frontend for userspace submission in order to
resolve syncobj and dma-buf implicit sync dependencies. However, drm_scheduler is
not yet prepared to handle the 1-to-1 relationship between drm_gpu_scheduler and
drm_sched_entity.
Deeper changes to drm_scheduler should *not* be required to get Xe accepted, but
some consensus needs to be reached between Xe and other community drivers that
could also benefit from this work, for coupling FW based/assisted submission such
as the ARMs new Mali GPU driver, and others.
As a key measurable result, the patch series introducing Xe itself shall not
depend on any other patch touching drm_scheduler itself that was not yet merged
through drm-misc. This, by itself, already includes the reach of an agreement for
uniform 1 to 1 relationship implementation / usage across drivers.
Long running compute: minimal data structure/scaffolding
--------------------------------------------------------
The generic scheduler code needs to include the handling of endless compute
@ -139,46 +181,6 @@ this minimal drm/scheduler work, if needed, merged to drm-misc in a way that any
drm driver, including Xe, could re-use and add their own individual needs on top
in a next stage. However, this should not block the initial merge.
This is a non-blocker item since the driver without the support for the long
running compute enabled is not a showstopper.
Display integration with i915
-----------------------------
In order to share the display code with the i915 driver so that there is maximum
reuse, the i915/display/ code is built twice, once for i915.ko and then for
xe.ko. Currently, the i915/display code in Xe tree is polluted with many 'ifdefs'
depending on the build target. The goal is to refactor both Xe and i915/display
code simultaneously in order to get a clean result before they land upstream, so
that display can already be part of the initial pull request towards drm-next.
However, display code should not gate the acceptance of Xe in upstream. Xe
patches will be refactored in a way that display code can be removed, if needed,
from the first pull request of Xe towards drm-next. The expectation is that when
both drivers are part of the drm-tip, the introduction of cleaner patches will be
easier and speed up.
Drm_exec
--------
Helper to make dma_resv locking for a big number of buffers is getting removed in
the drm_exec series proposed in https://patchwork.freedesktop.org/patch/524376/
If that happens, Xe needs to change and incorporate the changes in the driver.
The goal is to engage with the Community to understand if the best approach is to
move that to the drivers that are using it or if we should keep the helpers in
place waiting for Xe to get merged.
This item ties into the GPUVA, VM_BIND, and even long-running compute support.
As a key measurable result, we need to have a community consensus documented in
this document and the Xe driver prepared for the changes, if necessary.
Xe uAPI high level overview
=============================
...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
Xe Pre-Merge Goals - Completed
================================
Dev_coredump
------------

View File

@ -337,8 +337,8 @@ connector register/unregister fixes
Level: Intermediate
Remove load/unload callbacks from all non-DRIVER_LEGACY drivers
---------------------------------------------------------------
Remove load/unload callbacks
----------------------------
The load/unload callbacks in struct &drm_driver are very much midlayers, plus
for historical reasons they get the ordering wrong (and we can't fix that)
@ -347,8 +347,7 @@ between setting up the &drm_driver structure and calling drm_dev_register().
- Rework drivers to no longer use the load/unload callbacks, directly coding the
load/unload sequence into the driver's probe function.
- Once all non-DRIVER_LEGACY drivers are converted, disallow the load/unload
callbacks for all modern drivers.
- Once all drivers are converted, remove the load/unload callbacks.
Contact: Daniel Vetter
@ -621,6 +620,23 @@ Contact: Javier Martinez Canillas <javierm@redhat.com>
Level: Intermediate
Clean up and document former selftests suites
---------------------------------------------
Some KUnit test suites (drm_buddy, drm_cmdline_parser, drm_damage_helper,
drm_format, drm_framebuffer, drm_dp_mst_helper, drm_mm, drm_plane_helper and
drm_rect) are former selftests suites that have been converted over when KUnit
was first introduced.
These suites were fairly undocumented, and with different goals than what unit
tests can be. Trying to identify what each test in these suites actually test
for, whether that makes sense for a unit test, and either remove it if it
doesn't or document it if it does would be of great help.
Contact: Maxime Ripard <mripard@kernel.org>
Level: Intermediate
Enable trinity for DRM
----------------------
@ -765,6 +781,29 @@ Contact: Hans de Goede
Level: Advanced
Buffer age or other damage accumulation algorithm for buffer damage
===================================================================
Drivers that do per-buffer uploads, need a buffer damage handling (rather than
frame damage like drivers that do per-plane or per-CRTC uploads), but there is
no support to get the buffer age or any other damage accumulation algorithm.
For this reason, the damage helpers just fallback to a full plane update if the
framebuffer attached to a plane has changed since the last page-flip. Drivers
set &drm_plane_state.ignore_damage_clips to true as indication to
drm_atomic_helper_damage_iter_init() and drm_atomic_helper_damage_iter_next()
helpers that the damage clips should be ignored.
This should be improved to get damage tracking properly working on drivers that
do per-buffer uploads.
More information about damage tracking and references to learning materials can
be found in :ref:`damage_tracking_properties`.
Contact: Javier Martinez Canillas <javierm@redhat.com>
Level: Advanced
Outside DRM
===========

View File

@ -0,0 +1,25 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
=======================
drm/xe Intel GFX Driver
=======================
The drm/xe driver supports some future GFX cards with rendering, display,
compute and media. Support for currently available platforms like TGL, ADL,
DG2, etc is provided to prototype the driver.
.. toctree::
:titlesonly:
xe_mm
xe_map
xe_migrate
xe_cs
xe_pm
xe_pcode
xe_gt_mcr
xe_wa
xe_rtp
xe_firmware
xe_tile
xe_debugging

View File

@ -0,0 +1,8 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
==================
Command submission
==================
.. kernel-doc:: drivers/gpu/drm/xe/xe_exec.c
:doc: Execbuf (User GPU command submission)

View File

@ -0,0 +1,7 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
=========
Debugging
=========
.. kernel-doc:: drivers/gpu/drm/xe/xe_assert.h

View File

@ -0,0 +1,37 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
========
Firmware
========
Firmware Layout
===============
.. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h
:doc: CSS-based Firmware Layout
.. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h
:doc: GSC-based Firmware Layout
Write Once Protected Content Memory (WOPCM) Layout
==================================================
.. kernel-doc:: drivers/gpu/drm/xe/xe_wopcm.c
:doc: Write Once Protected Content Memory (WOPCM) Layout
GuC CTB Blob
============
.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_ct.c
:doc: GuC CTB Blob
GuC Power Conservation (PC)
===========================
.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_pc.c
:doc: GuC Power Conservation (PC)
Internal API
============
TODO

View File

@ -0,0 +1,13 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
==============================================
GT Multicast/Replicated (MCR) Register Support
==============================================
.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_mcr.c
:doc: GT Multicast/Replicated (MCR) Register Support
Internal API
============
TODO

View File

@ -0,0 +1,8 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
=========
Map Layer
=========
.. kernel-doc:: drivers/gpu/drm/xe/xe_map.h
:doc: Map layer

View File

@ -0,0 +1,8 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
=============
Migrate Layer
=============
.. kernel-doc:: drivers/gpu/drm/xe/xe_migrate_doc.h
:doc: Migrate Layer

View File

@ -0,0 +1,14 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
=================
Memory Management
=================
.. kernel-doc:: drivers/gpu/drm/xe/xe_bo_doc.h
:doc: Buffer Objects (BO)
Pagetable building
==================
.. kernel-doc:: drivers/gpu/drm/xe/xe_pt.c
:doc: Pagetable building

View File

@ -0,0 +1,14 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
=====
Pcode
=====
.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c
:doc: PCODE
Internal API
============
.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c
:internal:

View File

@ -0,0 +1,14 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
========================
Runtime Power Management
========================
.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c
:doc: Xe Power Management
Internal API
============
.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c
:internal:

View File

@ -0,0 +1,20 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
=========================
Register Table Processing
=========================
.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c
:doc: Register Table Processing
Internal API
============
.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp_types.h
:internal:
.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.h
:internal:
.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c
:internal:

View File

@ -0,0 +1,14 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
==================
Multi-tile Devices
==================
.. kernel-doc:: drivers/gpu/drm/xe/xe_tile.c
:doc: Multi-tile Design
Internal API
============
.. kernel-doc:: drivers/gpu/drm/xe/xe_tile.c
:internal:

View File

@ -0,0 +1,14 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
====================
Hardware workarounds
====================
.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c
:doc: Hardware workarounds
Internal API
============
.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c
:internal:

View File

@ -6484,8 +6484,7 @@ T: git git://anongit.freedesktop.org/drm/drm-misc
F: drivers/gpu/drm/sun4i/sun8i*
DRM DRIVER FOR ARM PL111 CLCD
M: Emma Anholt <emma@anholt.net>
S: Supported
S: Orphan
T: git git://anongit.freedesktop.org/drm/drm-misc
F: drivers/gpu/drm/pl111/
@ -6600,8 +6599,7 @@ F: Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml
F: drivers/gpu/drm/panel/panel-himax-hx8394.c
DRM DRIVER FOR HX8357D PANELS
M: Emma Anholt <emma@anholt.net>
S: Maintained
S: Orphan
T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/display/himax,hx8357d.txt
F: drivers/gpu/drm/tiny/hx8357d.c
@ -6628,6 +6626,12 @@ T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/display/ilitek,ili9486.yaml
F: drivers/gpu/drm/tiny/ili9486.c
DRM DRIVER FOR ILITEK ILI9805 PANELS
M: Michael Trimarchi <michael@amarulasolutions.com>
S: Maintained
F: Documentation/devicetree/bindings/display/panel/ilitek,ili9805.yaml
F: drivers/gpu/drm/panel/panel-ilitek-ili9805.c
DRM DRIVER FOR JADARD JD9365DA-H3 MIPI-DSI LCD PANELS
M: Jagan Teki <jagan@edgeble.ai>
S: Maintained
@ -6856,6 +6860,12 @@ T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/display/ste,mcde.yaml
F: drivers/gpu/drm/mcde/
DRM DRIVER FOR SYNAPTICS R63353 PANELS
M: Michael Trimarchi <michael@amarulasolutions.com>
S: Maintained
F: Documentation/devicetree/bindings/display/panel/synaptics,r63353.yaml
F: drivers/gpu/drm/panel/panel-synaptics-r63353.c
DRM DRIVER FOR TI DLPC3433 MIPI DSI TO DMD BRIDGE
M: Jagan Teki <jagan@amarulasolutions.com>
S: Maintained
@ -7140,6 +7150,7 @@ F: include/linux/platform_data/shmob_drm.h
DRM DRIVERS FOR ROCKCHIP
M: Sandy Huang <hjc@rock-chips.com>
M: Heiko Stübner <heiko@sntech.de>
M: Andy Yan <andy.yan@rock-chips.com>
L: dri-devel@lists.freedesktop.org
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
@ -7194,8 +7205,8 @@ F: Documentation/devicetree/bindings/display/ti/
F: drivers/gpu/drm/omapdrm/
DRM DRIVERS FOR V3D
M: Emma Anholt <emma@anholt.net>
M: Melissa Wen <mwen@igalia.com>
M: Maíra Canal <mcanal@igalia.com>
S: Supported
T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
@ -7203,7 +7214,6 @@ F: drivers/gpu/drm/v3d/
F: include/uapi/drm/v3d_drm.h
DRM DRIVERS FOR VC4
M: Emma Anholt <emma@anholt.net>
M: Maxime Ripard <mripard@kernel.org>
S: Supported
T: git git://github.com/anholt/linux
@ -10370,6 +10380,17 @@ IMGTEC IR DECODER DRIVER
S: Orphan
F: drivers/media/rc/img-ir/
IMGTEC POWERVR DRM DRIVER
M: Frank Binns <frank.binns@imgtec.com>
M: Donald Robson <donald.robson@imgtec.com>
M: Matt Coster <matt.coster@imgtec.com>
S: Supported
T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/gpu/img,powervr.yaml
F: Documentation/gpu/imagination/
F: drivers/gpu/drm/imagination/
F: include/uapi/drm/pvr_drm.h
IMON SOUNDGRAPH USB IR RECEIVER
M: Sean Young <sean@mess.org>
L: linux-media@vger.kernel.org
@ -10609,16 +10630,26 @@ L: linux-kernel@vger.kernel.org
S: Supported
F: arch/x86/include/asm/intel-family.h
INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets)
INTEL DRM DISPLAY FOR XE AND I915 DRIVERS
M: Jani Nikula <jani.nikula@linux.intel.com>
M: Rodrigo Vivi <rodrigo.vivi@intel.com>
L: intel-gfx@lists.freedesktop.org
L: intel-xe@lists.freedesktop.org
S: Supported
F: drivers/gpu/drm/i915/display/
F: drivers/gpu/drm/xe/display/
F: drivers/gpu/drm/xe/compat-i915-headers
INTEL DRM I915 DRIVER (Meteor Lake, DG2 and older excluding Poulsbo, Moorestown and derivative)
M: Jani Nikula <jani.nikula@linux.intel.com>
M: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
M: Rodrigo Vivi <rodrigo.vivi@intel.com>
M: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
L: intel-gfx@lists.freedesktop.org
S: Supported
W: https://01.org/linuxgraphics/
W: https://drm.pages.freedesktop.org/intel-docs/
Q: http://patchwork.freedesktop.org/project/intel-gfx/
B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
B: https://drm.pages.freedesktop.org/intel-docs/how-to-file-i915-bugs.html
C: irc://irc.oftc.net/intel-gfx
T: git git://anongit.freedesktop.org/drm-intel
F: Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@ -10628,6 +10659,23 @@ F: drivers/gpu/drm/i915/
F: include/drm/i915*
F: include/uapi/drm/i915_drm.h
INTEL DRM XE DRIVER (Lunar Lake and newer)
M: Lucas De Marchi <lucas.demarchi@intel.com>
M: Oded Gabbay <ogabbay@kernel.org>
M: Thomas Hellström <thomas.hellstrom@linux.intel.com>
L: intel-xe@lists.freedesktop.org
S: Supported
W: https://drm.pages.freedesktop.org/intel-docs/
Q: http://patchwork.freedesktop.org/project/intel-xe/
B: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues
C: irc://irc.oftc.net/xe
T: git https://gitlab.freedesktop.org/drm/xe/kernel.git
F: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
F: Documentation/gpu/xe/
F: drivers/gpu/drm/xe/
F: include/drm/xe*
F: include/uapi/drm/xe_drm.h
INTEL ETHERNET DRIVERS
M: Jesse Brandeburg <jesse.brandeburg@intel.com>
M: Tony Nguyen <anthony.l.nguyen@intel.com>

View File

@ -11,6 +11,7 @@
#include <linux/idr.h>
#include <drm/drm_accel.h>
#include <drm/drm_auth.h>
#include <drm/drm_debugfs.h>
#include <drm/drm_drv.h>
#include <drm/drm_file.h>

View File

@ -853,6 +853,9 @@ static int device_early_init(struct hl_device *hdev)
gaudi2_set_asic_funcs(hdev);
strscpy(hdev->asic_name, "GAUDI2B", sizeof(hdev->asic_name));
break;
case ASIC_GAUDI2C:
gaudi2_set_asic_funcs(hdev);
strscpy(hdev->asic_name, "GAUDI2C", sizeof(hdev->asic_name));
break;
default:
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
@ -1041,18 +1044,21 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
return (vendor_id == PCI_VENDOR_ID_HABANALABS);
}
static void hl_device_eq_heartbeat(struct hl_device *hdev)
static int hl_device_eq_heartbeat_check(struct hl_device *hdev)
{
u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
struct asic_fixed_properties *prop = &hdev->asic_prop;
if (!prop->cpucp_info.eq_health_check_supported)
return;
return 0;
if (hdev->eq_heartbeat_received)
if (hdev->eq_heartbeat_received) {
hdev->eq_heartbeat_received = false;
else
hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
} else {
dev_err(hdev->dev, "EQ heartbeat event was not received!\n");
return -EIO;
}
return 0;
}
static void hl_device_heartbeat(struct work_struct *work)
@ -1069,10 +1075,9 @@ static void hl_device_heartbeat(struct work_struct *work)
/*
* For EQ health check need to check if driver received the heartbeat eq event
* in order to validate the eq is working.
* Only if both the EQ is healthy and we managed to send the next heartbeat reschedule.
*/
hl_device_eq_heartbeat(hdev);
if (!hdev->asic_funcs->send_heartbeat(hdev))
if ((!hl_device_eq_heartbeat_check(hdev)) && (!hdev->asic_funcs->send_heartbeat(hdev)))
goto reschedule;
if (hl_device_operational(hdev, NULL))
@ -2035,7 +2040,7 @@ device_reset:
if (ctx)
hl_ctx_put(ctx);
return hl_device_reset(hdev, flags);
return hl_device_reset(hdev, flags | HL_DRV_RESET_HARD);
}
static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask)

View File

@ -646,39 +646,27 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
return rc;
}
static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
u32 sts_val)
static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, u32 sts_val)
{
bool err_exists = false;
if (!(err_val & CPU_BOOT_ERR0_ENABLED))
return false;
if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) {
dev_err(hdev->dev,
"Device boot error - DRAM initialization failed\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
dev_err(hdev->dev, "Device boot error - DRAM initialization failed\n");
if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) {
if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) {
dev_err(hdev->dev,
"Device boot error - Thermal Sensor initialization failed\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
dev_err(hdev->dev, "Device boot error - Thermal Sensor initialization failed\n");
if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
if (hdev->bmc_enable) {
dev_err(hdev->dev,
"Device boot error - Skipped waiting for BMC\n");
err_exists = true;
dev_err(hdev->dev, "Device boot error - Skipped waiting for BMC\n");
} else {
dev_info(hdev->dev,
"Device boot message - Skipped waiting for BMC\n");
dev_info(hdev->dev, "Device boot message - Skipped waiting for BMC\n");
/* This is an info so we don't want it to disable the
* device
*/
@ -686,48 +674,29 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
}
}
if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) {
dev_err(hdev->dev,
"Device boot error - Serdes data from BMC not available\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
dev_err(hdev->dev, "Device boot error - Serdes data from BMC not available\n");
if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) {
dev_err(hdev->dev,
"Device boot error - NIC F/W initialization failed\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
dev_err(hdev->dev, "Device boot error - NIC F/W initialization failed\n");
if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) {
dev_err(hdev->dev,
"Device boot warning - security not ready\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
dev_err(hdev->dev, "Device boot warning - security not ready\n");
if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) {
if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
dev_err(hdev->dev, "Device boot error - security failure\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) {
if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
dev_err(hdev->dev, "Device boot error - eFuse failure\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL) {
if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL)
dev_err(hdev->dev, "Device boot error - Failed to load preboot secondary image\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_PLL_FAIL) {
if (err_val & CPU_BOOT_ERR0_PLL_FAIL)
dev_err(hdev->dev, "Device boot error - PLL failure\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL) {
if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL)
dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
/* Ignore this bit, don't prevent driver loading */
@ -735,52 +704,32 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
err_val &= ~CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL;
}
if (err_val & CPU_BOOT_ERR0_BINNING_FAIL) {
if (err_val & CPU_BOOT_ERR0_BINNING_FAIL)
dev_err(hdev->dev, "Device boot error - binning failure\n");
err_exists = true;
}
if (sts_val & CPU_BOOT_DEV_STS0_ENABLED)
dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val);
if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
dev_err(hdev->dev, "Device boot warning - Skipped DRAM initialization\n");
if (err_val & CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL)
dev_err(hdev->dev, "Device boot error - ARC memory scrub failed\n");
/* All warnings should go here in order not to reach the unknown error validation */
if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) {
dev_err(hdev->dev, "Device boot error - EEPROM failure detected\n");
err_exists = true;
}
/* All warnings should go here in order not to reach the unknown error validation */
if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
dev_warn(hdev->dev,
"Device boot warning - Skipped DRAM initialization\n");
/* This is a warning so we don't want it to disable the
* device
*/
err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED;
}
if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL)
dev_warn(hdev->dev, "Device boot warning - Failed to load preboot primary image\n");
if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL) {
dev_warn(hdev->dev,
"Device boot warning - Failed to load preboot primary image\n");
/* This is a warning so we don't want it to disable the
* device as we have a secondary preboot image
*/
err_val &= ~CPU_BOOT_ERR0_PRI_IMG_VER_FAIL;
}
if (err_val & CPU_BOOT_ERR0_TPM_FAIL)
dev_warn(hdev->dev, "Device boot warning - TPM failure\n");
if (err_val & CPU_BOOT_ERR0_TPM_FAIL) {
dev_warn(hdev->dev,
"Device boot warning - TPM failure\n");
/* This is a warning so we don't want it to disable the
* device
*/
err_val &= ~CPU_BOOT_ERR0_TPM_FAIL;
}
if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) {
dev_err(hdev->dev,
"Device boot error - unknown ERR0 error 0x%08x\n", err_val);
if (err_val & CPU_BOOT_ERR_FATAL_MASK)
err_exists = true;
}
/* return error only if it's in the predefined mask */
if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) &
@ -3295,6 +3244,14 @@ int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_in
HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
}
int hl_fw_get_dev_info_signed(struct hl_device *hdev,
struct cpucp_dev_info_signed *dev_info_signed, u32 nonce)
{
return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_INFO_SIGNED_GET, dev_info_signed,
sizeof(struct cpucp_dev_info_signed), nonce,
HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
}
int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type sub_opcode,
dma_addr_t buff, u32 *size)
{

View File

@ -1262,6 +1262,7 @@ struct hl_dec {
* @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000).
* @ASIC_GAUDI2: Gaudi2 device.
* @ASIC_GAUDI2B: Gaudi2B device.
* @ASIC_GAUDI2C: Gaudi2C device.
*/
enum hl_asic_type {
ASIC_INVALID,
@ -1270,6 +1271,7 @@ enum hl_asic_type {
ASIC_GAUDI_SEC,
ASIC_GAUDI2,
ASIC_GAUDI2B,
ASIC_GAUDI2C,
};
struct hl_cs_parser;
@ -3519,6 +3521,9 @@ struct hl_device {
u8 heartbeat;
};
/* Retrieve PCI device name in case of a PCI device or dev name in simulator */
#define HL_DEV_NAME(hdev) \
((hdev)->pdev ? dev_name(&(hdev)->pdev->dev) : "NA-DEVICE")
/**
* struct hl_cs_encaps_sig_handle - encapsulated signals handle structure
@ -3594,6 +3599,14 @@ static inline bool hl_is_fw_sw_ver_below(struct hl_device *hdev, u32 fw_sw_major
return false;
}
static inline bool hl_is_fw_sw_ver_equal_or_greater(struct hl_device *hdev, u32 fw_sw_major,
u32 fw_sw_minor)
{
return (hdev->fw_sw_major_ver > fw_sw_major ||
(hdev->fw_sw_major_ver == fw_sw_major &&
hdev->fw_sw_minor_ver >= fw_sw_minor));
}
/*
* Kernel module functions that can be accessed by entire module
*/
@ -3954,6 +3967,8 @@ long hl_fw_get_max_power(struct hl_device *hdev);
void hl_fw_set_max_power(struct hl_device *hdev);
int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info,
u32 nonce);
int hl_fw_get_dev_info_signed(struct hl_device *hdev,
struct cpucp_dev_info_signed *dev_info_signed, u32 nonce);
int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value);
int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value);
int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value);

View File

@ -141,6 +141,9 @@ static enum hl_asic_type get_asic_type(struct hl_device *hdev)
case REV_ID_B:
asic_type = ASIC_GAUDI2B;
break;
case REV_ID_C:
asic_type = ASIC_GAUDI2C;
break;
default:
break;
}
@ -670,6 +673,38 @@ static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev)
return PCI_ERS_RESULT_RECOVERED;
}
static void hl_pci_reset_prepare(struct pci_dev *pdev)
{
struct hl_device *hdev;
hdev = pci_get_drvdata(pdev);
if (!hdev)
return;
hdev->disabled = true;
}
static void hl_pci_reset_done(struct pci_dev *pdev)
{
struct hl_device *hdev;
u32 flags;
hdev = pci_get_drvdata(pdev);
if (!hdev)
return;
/*
* Schedule a thread to trigger hard reset.
* The reason for this handler, is for rare cases where the driver is up
* and FLR occurs. This is valid only when working with no VM, so FW handles FLR
* and resets the device. FW will go back preboot stage, so driver needs to perform
* hard reset in order to load FW fit again.
*/
flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW;
hl_device_reset(hdev, flags);
}
static const struct dev_pm_ops hl_pm_ops = {
.suspend = hl_pmops_suspend,
.resume = hl_pmops_resume,
@ -679,6 +714,8 @@ static const struct pci_error_handlers hl_pci_err_handler = {
.error_detected = hl_pci_err_detected,
.slot_reset = hl_pci_err_slot_reset,
.resume = hl_pci_err_resume,
.reset_prepare = hl_pci_reset_prepare,
.reset_done = hl_pci_reset_done,
};
static struct pci_driver hl_pci_driver = {

View File

@ -19,6 +19,9 @@
#include <asm/msr.h>
/* make sure there is space for all the signed info */
static_assert(sizeof(struct cpucp_info) <= SEC_DEV_INFO_BUF_SZ);
static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
[HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
@ -685,7 +688,7 @@ static int sec_attest_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
if (!sec_attest_info)
return -ENOMEM;
info = kmalloc(sizeof(*info), GFP_KERNEL);
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
rc = -ENOMEM;
goto free_sec_attest_info;
@ -719,6 +722,53 @@ free_sec_attest_info:
return rc;
}
static int dev_info_signed(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
struct cpucp_dev_info_signed *dev_info_signed;
struct hl_info_signed *info;
u32 max_size = args->return_size;
int rc;
if ((!max_size) || (!out))
return -EINVAL;
dev_info_signed = kzalloc(sizeof(*dev_info_signed), GFP_KERNEL);
if (!dev_info_signed)
return -ENOMEM;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
rc = -ENOMEM;
goto free_dev_info_signed;
}
rc = hl_fw_get_dev_info_signed(hpriv->hdev,
dev_info_signed, args->sec_attest_nonce);
if (rc)
goto free_info;
info->nonce = le32_to_cpu(dev_info_signed->nonce);
info->info_sig_len = dev_info_signed->info_sig_len;
info->pub_data_len = le16_to_cpu(dev_info_signed->pub_data_len);
info->certificate_len = le16_to_cpu(dev_info_signed->certificate_len);
info->dev_info_len = sizeof(struct cpucp_info);
memcpy(&info->info_sig, &dev_info_signed->info_sig, sizeof(info->info_sig));
memcpy(&info->public_data, &dev_info_signed->public_data, sizeof(info->public_data));
memcpy(&info->certificate, &dev_info_signed->certificate, sizeof(info->certificate));
memcpy(&info->dev_info, &dev_info_signed->info, info->dev_info_len);
rc = copy_to_user(out, info, min_t(size_t, max_size, sizeof(*info))) ? -EFAULT : 0;
free_info:
kfree(info);
free_dev_info_signed:
kfree(dev_info_signed);
return rc;
}
static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
int rc;
@ -1089,6 +1139,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_FW_GENERIC_REQ:
return send_fw_generic_request(hdev, args);
case HL_INFO_DEV_SIGNED:
return dev_info_signed(hpriv, args);
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -EINVAL;

View File

@ -578,10 +578,6 @@ int hl_get_temperature(struct hl_device *hdev,
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
dev_dbg(hdev->dev, "get temp, ctl 0x%x, sensor %d, type %d\n",
pkt.ctl, pkt.sensor_index, pkt.type);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, &result);

View File

@ -955,8 +955,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
(i + 1) == phys_pg_pack->npages);
if (rc) {
dev_err(hdev->dev,
"map failed for handle %u, npages: %llu, mapped: %llu",
phys_pg_pack->handle, phys_pg_pack->npages,
"map failed (%d) for handle %u, npages: %llu, mapped: %llu\n",
rc, phys_pg_pack->handle, phys_pg_pack->npages,
mapped_pg_cnt);
goto err;
}
@ -1186,7 +1186,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device
rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
if (rc) {
dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle);
dev_err(hdev->dev, "mapping page pack failed (%d) for handle %u\n",
rc, handle);
mutex_unlock(&hdev->mmu_lock);
goto map_err;
}

View File

@ -596,6 +596,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
break;
case ASIC_GAUDI2:
case ASIC_GAUDI2B:
case ASIC_GAUDI2C:
/* MMUs in Gaudi2 are always host resident */
hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]);
break;

View File

@ -8,6 +8,7 @@
#include "habanalabs.h"
#include <linux/pci.h>
#include <linux/types.h>
static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
{
@ -80,12 +81,27 @@ static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, c
{
struct hl_device *hdev = dev_get_drvdata(dev);
struct cpucp_info *cpucp_info;
u32 infineon_second_stage_version;
u32 infineon_second_stage_first_instance;
u32 infineon_second_stage_second_instance;
u32 infineon_second_stage_third_instance;
u32 mask = 0xff;
cpucp_info = &hdev->asic_prop.cpucp_info;
infineon_second_stage_version = le32_to_cpu(cpucp_info->infineon_second_stage_version);
infineon_second_stage_first_instance = infineon_second_stage_version & mask;
infineon_second_stage_second_instance =
(infineon_second_stage_version >> 8) & mask;
infineon_second_stage_third_instance =
(infineon_second_stage_version >> 16) & mask;
if (cpucp_info->infineon_second_stage_version)
return sprintf(buf, "%#04x %#04x\n", le32_to_cpu(cpucp_info->infineon_version),
le32_to_cpu(cpucp_info->infineon_second_stage_version));
return sprintf(buf, "%#04x %#04x:%#04x:%#04x\n",
le32_to_cpu(cpucp_info->infineon_version),
infineon_second_stage_first_instance,
infineon_second_stage_second_instance,
infineon_second_stage_third_instance);
else
return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
}
@ -251,6 +267,9 @@ static ssize_t device_type_show(struct device *dev,
case ASIC_GAUDI2B:
str = "GAUDI2B";
break;
case ASIC_GAUDI2C:
str = "GAUDI2C";
break;
default:
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
hdev->asic_type);
@ -383,6 +402,21 @@ static ssize_t security_enabled_show(struct device *dev,
return sprintf(buf, "%d\n", hdev->asic_prop.fw_security_enabled);
}
static ssize_t module_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%u\n", le32_to_cpu(hdev->asic_prop.cpucp_info.card_location));
}
static ssize_t parent_device_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s\n", HL_DEV_NAME(hdev));
}
static DEVICE_ATTR_RO(armcp_kernel_ver);
static DEVICE_ATTR_RO(armcp_ver);
static DEVICE_ATTR_RO(cpld_ver);
@ -402,6 +436,8 @@ static DEVICE_ATTR_RO(thermal_ver);
static DEVICE_ATTR_RO(uboot_ver);
static DEVICE_ATTR_RO(fw_os_ver);
static DEVICE_ATTR_RO(security_enabled);
static DEVICE_ATTR_RO(module_id);
static DEVICE_ATTR_RO(parent_device);
static struct bin_attribute bin_attr_eeprom = {
.attr = {.name = "eeprom", .mode = (0444)},
@ -427,6 +463,8 @@ static struct attribute *hl_dev_attrs[] = {
&dev_attr_uboot_ver.attr,
&dev_attr_fw_os_ver.attr,
&dev_attr_security_enabled.attr,
&dev_attr_module_id.attr,
&dev_attr_parent_device.attr,
NULL,
};

View File

@ -7858,39 +7858,44 @@ static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
return !!ecc_data->is_critical;
}
static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask)
static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id)
{
u32 lo, hi, cq_ptr_size, arc_cq_ptr_size;
u64 cq_ptr, arc_cq_ptr, cp_current_inst;
struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
u64 cq_ptr, cp_current_inst;
u32 lo, hi, cq_size, cp_sts;
bool is_arc_cq;
lo = RREG32(qman_base + QM_CQ_PTR_LO_4_OFFSET);
hi = RREG32(qman_base + QM_CQ_PTR_HI_4_OFFSET);
cq_ptr = ((u64) hi) << 32 | lo;
cq_ptr_size = RREG32(qman_base + QM_CQ_TSIZE_4_OFFSET);
cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET);
is_arc_cq = FIELD_GET(PDMA0_QM_CP_STS_CUR_CQ_MASK, cp_sts); /* 0 - legacy CQ, 1 - ARC_CQ */
lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_OFFSET);
hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_OFFSET);
arc_cq_ptr = ((u64) hi) << 32 | lo;
arc_cq_ptr_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_OFFSET);
if (is_arc_cq) {
lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET);
hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET);
cq_ptr = ((u64) hi) << 32 | lo;
cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET);
} else {
lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET);
hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET);
cq_ptr = ((u64) hi) << 32 | lo;
cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET);
}
lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET);
cp_current_inst = ((u64) hi) << 32 | lo;
dev_info(hdev->dev,
"LowerQM. CQ: {ptr %#llx, size %u}, ARC_CQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n",
cq_ptr, cq_ptr_size, arc_cq_ptr, arc_cq_ptr_size, cp_current_inst);
"LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n",
is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst);
if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
if (arc_cq_ptr) {
hdev->captured_err_info.undef_opcode.cq_addr = arc_cq_ptr;
hdev->captured_err_info.undef_opcode.cq_size = arc_cq_ptr_size;
} else {
hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size;
}
hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS;
if (undef_opcode->write_enable) {
memset(undef_opcode, 0, sizeof(*undef_opcode));
undef_opcode->timestamp = ktime_get();
undef_opcode->cq_addr = cq_ptr;
undef_opcode->cq_size = cq_size;
undef_opcode->engine_id = engine_id;
undef_opcode->stream_id = QMAN_STREAMS;
undef_opcode->write_enable = 0;
}
}
@ -7929,21 +7934,12 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type
error_count++;
}
if (i == QMAN_STREAMS && error_count) {
/* check for undefined opcode */
if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK &&
hdev->captured_err_info.undef_opcode.write_enable) {
memset(&hdev->captured_err_info.undef_opcode, 0,
sizeof(hdev->captured_err_info.undef_opcode));
hdev->captured_err_info.undef_opcode.write_enable = false;
hdev->captured_err_info.undef_opcode.timestamp = ktime_get();
hdev->captured_err_info.undef_opcode.engine_id =
gaudi2_queue_id_to_engine_id[qid_base];
*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
}
handle_lower_qman_data_on_err(hdev, qman_base, *event_mask);
/* Check for undefined opcode error in lower QM */
if ((i == QMAN_STREAMS) &&
(glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) {
handle_lower_qman_data_on_err(hdev, qman_base,
gaudi2_queue_id_to_engine_id[qid_base]);
*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
}
}
@ -10007,6 +10003,8 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
if (hl_is_fw_sw_ver_equal_or_greater(hdev, 1, 13))
is_critical = true;
break;
case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:

View File

@ -242,14 +242,15 @@
#define QM_FENCE2_OFFSET (mmPDMA0_QM_CP_FENCE2_RDATA_0 - mmPDMA0_QM_BASE)
#define QM_SEI_STATUS_OFFSET (mmPDMA0_QM_SEI_STATUS - mmPDMA0_QM_BASE)
#define QM_CQ_PTR_LO_4_OFFSET (mmPDMA0_QM_CQ_PTR_LO_4 - mmPDMA0_QM_BASE)
#define QM_CQ_PTR_HI_4_OFFSET (mmPDMA0_QM_CQ_PTR_HI_4 - mmPDMA0_QM_BASE)
#define QM_CQ_TSIZE_4_OFFSET (mmPDMA0_QM_CQ_TSIZE_4 - mmPDMA0_QM_BASE)
#define QM_CQ_TSIZE_STS_4_OFFSET (mmPDMA0_QM_CQ_TSIZE_STS_4 - mmPDMA0_QM_BASE)
#define QM_CQ_PTR_LO_STS_4_OFFSET (mmPDMA0_QM_CQ_PTR_LO_STS_4 - mmPDMA0_QM_BASE)
#define QM_CQ_PTR_HI_STS_4_OFFSET (mmPDMA0_QM_CQ_PTR_HI_STS_4 - mmPDMA0_QM_BASE)
#define QM_ARC_CQ_PTR_LO_OFFSET (mmPDMA0_QM_ARC_CQ_PTR_LO - mmPDMA0_QM_BASE)
#define QM_ARC_CQ_PTR_HI_OFFSET (mmPDMA0_QM_ARC_CQ_PTR_HI - mmPDMA0_QM_BASE)
#define QM_ARC_CQ_TSIZE_OFFSET (mmPDMA0_QM_ARC_CQ_TSIZE - mmPDMA0_QM_BASE)
#define QM_ARC_CQ_TSIZE_STS_OFFSET (mmPDMA0_QM_ARC_CQ_TSIZE_STS - mmPDMA0_QM_BASE)
#define QM_ARC_CQ_PTR_LO_STS_OFFSET (mmPDMA0_QM_ARC_CQ_PTR_LO_STS - mmPDMA0_QM_BASE)
#define QM_ARC_CQ_PTR_HI_STS_OFFSET (mmPDMA0_QM_ARC_CQ_PTR_HI_STS - mmPDMA0_QM_BASE)
#define QM_CP_STS_4_OFFSET (mmPDMA0_QM_CP_STS_4 - mmPDMA0_QM_BASE)
#define QM_CP_CURRENT_INST_LO_4_OFFSET (mmPDMA0_QM_CP_CURRENT_INST_LO_4 - mmPDMA0_QM_BASE)
#define QM_CP_CURRENT_INST_HI_4_OFFSET (mmPDMA0_QM_CP_CURRENT_INST_HI_4 - mmPDMA0_QM_BASE)

View File

@ -25,6 +25,7 @@ enum hl_revision_id {
REV_ID_INVALID = 0x00,
REV_ID_A = 0x01,
REV_ID_B = 0x02,
REV_ID_C = 0x03
};
#endif /* INCLUDE_PCI_GENERAL_H_ */

View File

@ -1,16 +1,17 @@
# SPDX-License-Identifier: GPL-2.0-only
config DRM_ACCEL_IVPU
tristate "Intel VPU for Meteor Lake and newer"
tristate "Intel NPU (Neural Processing Unit)"
depends on DRM_ACCEL
depends on X86_64 && !UML
depends on PCI && PCI_MSI
select FW_LOADER
select SHMEM
select DRM_GEM_SHMEM_HELPER
select GENERIC_ALLOCATOR
help
Choose this option if you have a system that has an 14th generation Intel CPU
or newer. VPU stands for Versatile Processing Unit and it's a CPU-integrated
inference accelerator for Computer Vision and Deep Learning applications.
Choose this option if you have a system with an 14th generation
Intel CPU (Meteor Lake) or newer. Intel NPU (formerly called Intel VPU)
is a CPU-integrated inference accelerator for Computer Vision
and Deep Learning applications.
If "M" is selected, the module will be called intel_vpu.

View File

@ -14,6 +14,7 @@
#include "ivpu_fw.h"
#include "ivpu_fw_log.h"
#include "ivpu_gem.h"
#include "ivpu_hw.h"
#include "ivpu_jsm_msg.h"
#include "ivpu_pm.h"
@ -115,6 +116,31 @@ static const struct drm_debugfs_info vdev_debugfs_list[] = {
{"reset_pending", reset_pending_show, 0},
};
static ssize_t
dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
{
struct ivpu_device *vdev = file->private_data;
struct ivpu_fw_info *fw = vdev->fw;
u32 dvfs_mode;
int ret;
ret = kstrtou32_from_user(user_buf, size, 0, &dvfs_mode);
if (ret < 0)
return ret;
fw->dvfs_mode = dvfs_mode;
ivpu_pm_schedule_recovery(vdev);
return size;
}
static const struct file_operations dvfs_mode_fops = {
.owner = THIS_MODULE,
.open = simple_open,
.write = dvfs_mode_fops_write,
};
static int fw_log_show(struct seq_file *s, void *v)
{
struct ivpu_device *vdev = s->private;
@ -151,6 +177,30 @@ static const struct file_operations fw_log_fops = {
.release = single_release,
};
static ssize_t
fw_profiling_freq_fops_write(struct file *file, const char __user *user_buf,
size_t size, loff_t *pos)
{
struct ivpu_device *vdev = file->private_data;
bool enable;
int ret;
ret = kstrtobool_from_user(user_buf, size, &enable);
if (ret < 0)
return ret;
ivpu_hw_profiling_freq_drive(vdev, enable);
ivpu_pm_schedule_recovery(vdev);
return size;
}
static const struct file_operations fw_profiling_freq_fops = {
.owner = THIS_MODULE,
.open = simple_open,
.write = fw_profiling_freq_fops_write,
};
static ssize_t
fw_trace_destination_mask_fops_write(struct file *file, const char __user *user_buf,
size_t size, loff_t *pos)
@ -280,6 +330,9 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
debugfs_create_file("force_recovery", 0200, debugfs_root, vdev,
&ivpu_force_recovery_fops);
debugfs_create_file("dvfs_mode", 0200, debugfs_root, vdev,
&dvfs_mode_fops);
debugfs_create_file("fw_log", 0644, debugfs_root, vdev,
&fw_log_fops);
debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev,
@ -291,4 +344,8 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
debugfs_create_file("reset_engine", 0200, debugfs_root, vdev,
&ivpu_reset_engine_fops);
if (ivpu_hw_gen(vdev) >= IVPU_HW_40XX)
debugfs_create_file("fw_profiling_freq_drive", 0200,
debugfs_root, vdev, &fw_profiling_freq_fops);
}

View File

@ -31,8 +31,6 @@
__stringify(DRM_IVPU_DRIVER_MINOR) "."
#endif
static const struct drm_driver driver;
static struct lock_class_key submitted_jobs_xa_lock_class_key;
int ivpu_dbg_mask;
@ -41,7 +39,7 @@ MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
int ivpu_test_mode;
module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644);
MODULE_PARM_DESC(test_mode, "Test mode: 0 - normal operation, 1 - fw unit test, 2 - null hw");
MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros.");
u8 ivpu_pll_min_ratio;
module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644);
@ -93,8 +91,8 @@ static void file_priv_release(struct kref *ref)
ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id);
ivpu_cmdq_release_all(file_priv);
ivpu_bo_remove_all_bos_from_context(&file_priv->ctx);
ivpu_jsm_context_release(vdev, file_priv->ctx.id);
ivpu_bo_remove_all_bos_from_context(vdev, &file_priv->ctx);
ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv);
mutex_destroy(&file_priv->lock);
@ -317,16 +315,14 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev)
unsigned long timeout;
int ret;
if (ivpu_test_mode == IVPU_TEST_MODE_FW_TEST)
if (ivpu_test_mode & IVPU_TEST_MODE_FW_TEST)
return 0;
ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG);
ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG, NULL);
timeout = jiffies + msecs_to_jiffies(vdev->timeout.boot);
while (1) {
ret = ivpu_ipc_irq_handler(vdev);
if (ret)
break;
ivpu_ipc_irq_handler(vdev, NULL);
ret = ivpu_ipc_receive(vdev, &cons, &ipc_hdr, NULL, 0);
if (ret != -ETIMEDOUT || time_after_eq(jiffies, timeout))
break;
@ -362,7 +358,7 @@ int ivpu_boot(struct ivpu_device *vdev)
int ret;
/* Update boot params located at first 4KB of FW memory */
ivpu_fw_boot_params_setup(vdev, vdev->fw->mem->kvaddr);
ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem));
ret = ivpu_hw_boot_fw(vdev);
if (ret) {
@ -414,7 +410,9 @@ static const struct drm_driver driver = {
.open = ivpu_open,
.postclose = ivpu_postclose,
.gem_prime_import = ivpu_gem_prime_import,
.gem_create_object = ivpu_gem_create_object,
.gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
.ioctls = ivpu_drm_ioctls,
.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
@ -427,6 +425,13 @@ static const struct drm_driver driver = {
.minor = DRM_IVPU_DRIVER_MINOR,
};
static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg)
{
struct ivpu_device *vdev = arg;
return ivpu_ipc_irq_thread_handler(vdev);
}
static int ivpu_irq_init(struct ivpu_device *vdev)
{
struct pci_dev *pdev = to_pci_dev(vdev->drm.dev);
@ -440,8 +445,8 @@ static int ivpu_irq_init(struct ivpu_device *vdev)
vdev->irq = pci_irq_vector(pdev, 0);
ret = devm_request_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler,
IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
ret = devm_request_threaded_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler,
ivpu_irq_thread_handler, IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
if (ret)
ivpu_err(vdev, "Failed to request an IRQ %d\n", ret);
@ -533,6 +538,11 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
INIT_LIST_HEAD(&vdev->bo_list);
ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock);
if (ret)
goto err_xa_destroy;
ret = ivpu_pci_init(vdev);
if (ret)
@ -550,7 +560,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
/* Power up early so the rest of init code can access VPU registers */
ret = ivpu_hw_power_up(vdev);
if (ret)
goto err_xa_destroy;
goto err_power_down;
ret = ivpu_mmu_global_context_init(vdev);
if (ret)
@ -574,20 +584,15 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
ivpu_pm_init(vdev);
ret = ivpu_job_done_thread_init(vdev);
ret = ivpu_boot(vdev);
if (ret)
goto err_ipc_fini;
ret = ivpu_boot(vdev);
if (ret)
goto err_job_done_thread_fini;
ivpu_job_done_consumer_init(vdev);
ivpu_pm_enable(vdev);
return 0;
err_job_done_thread_fini:
ivpu_job_done_thread_fini(vdev);
err_ipc_fini:
ivpu_ipc_fini(vdev);
err_fw_fini:
@ -612,7 +617,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev)
ivpu_shutdown(vdev);
if (IVPU_WA(d3hot_after_power_off))
pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
ivpu_job_done_thread_fini(vdev);
ivpu_job_done_consumer_fini(vdev);
ivpu_pm_cancel_recovery(vdev);
ivpu_ipc_fini(vdev);

View File

@ -17,9 +17,10 @@
#include <uapi/drm/ivpu_accel.h>
#include "ivpu_mmu_context.h"
#include "ivpu_ipc.h"
#define DRIVER_NAME "intel_vpu"
#define DRIVER_DESC "Driver for Intel Versatile Processing Unit (VPU)"
#define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)"
#define DRIVER_DATE "20230117"
#define PCI_DEVICE_ID_MTL 0x7d1d
@ -88,6 +89,7 @@ struct ivpu_wa_table {
bool d3hot_after_power_off;
bool interrupt_clear_with_0;
bool disable_clock_relinquish;
bool disable_d0i3_msg;
};
struct ivpu_hw_info;
@ -115,8 +117,11 @@ struct ivpu_device {
struct xarray context_xa;
struct xa_limit context_xa_limit;
struct mutex bo_list_lock; /* Protects bo_list */
struct list_head bo_list;
struct xarray submitted_jobs_xa;
struct task_struct *job_done_thread;
struct ivpu_ipc_consumer job_done_consumer;
atomic64_t unique_id_counter;
@ -126,6 +131,7 @@ struct ivpu_device {
int tdr;
int reschedule_suspend;
int autosuspend;
int d0i3_entry_msg;
} timeout;
};
@ -148,9 +154,11 @@ extern u8 ivpu_pll_min_ratio;
extern u8 ivpu_pll_max_ratio;
extern bool ivpu_disable_mmu_cont_pages;
#define IVPU_TEST_MODE_DISABLED 0
#define IVPU_TEST_MODE_FW_TEST 1
#define IVPU_TEST_MODE_NULL_HW 2
#define IVPU_TEST_MODE_FW_TEST BIT(0)
#define IVPU_TEST_MODE_NULL_HW BIT(1)
#define IVPU_TEST_MODE_NULL_SUBMISSION BIT(2)
#define IVPU_TEST_MODE_D0I3_MSG_DISABLE BIT(4)
#define IVPU_TEST_MODE_D0I3_MSG_ENABLE BIT(5)
extern int ivpu_test_mode;
struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);

View File

@ -33,12 +33,17 @@
#define ADDR_TO_L2_CACHE_CFG(addr) ((addr) >> 31)
#define IVPU_FW_CHECK_API(vdev, fw_hdr, name, min_major) \
/* Check if FW API is compatible with the driver */
#define IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, name, min_major) \
ivpu_fw_check_api(vdev, fw_hdr, #name, \
VPU_##name##_API_VER_INDEX, \
VPU_##name##_API_VER_MAJOR, \
VPU_##name##_API_VER_MINOR, min_major)
/* Check if API version is lower that the given version */
#define IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, name, major, minor) \
ivpu_fw_check_api_ver_lt(vdev, fw_hdr, #name, VPU_##name##_API_VER_INDEX, major, minor)
static char *ivpu_firmware;
module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644);
MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/..");
@ -105,6 +110,19 @@ ivpu_fw_check_api(struct ivpu_device *vdev, const struct vpu_firmware_header *fw
return 0;
}
static bool
ivpu_fw_check_api_ver_lt(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr,
const char *str, int index, u16 major, u16 minor)
{
u16 fw_major = (u16)(fw_hdr->api_version[index] >> 16);
u16 fw_minor = (u16)(fw_hdr->api_version[index]);
if (fw_major < major || (fw_major == major && fw_minor < minor))
return true;
return false;
}
static int ivpu_fw_parse(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
@ -164,9 +182,9 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
ivpu_info(vdev, "Firmware: %s, version: %s", fw->name,
(const char *)fw_hdr + VPU_FW_HEADER_SIZE);
if (IVPU_FW_CHECK_API(vdev, fw_hdr, BOOT, 3))
if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, BOOT, 3))
return -EINVAL;
if (IVPU_FW_CHECK_API(vdev, fw_hdr, JSM, 3))
if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, JSM, 3))
return -EINVAL;
fw->runtime_addr = runtime_addr;
@ -182,6 +200,8 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING;
fw->trace_hw_component_mask = -1;
fw->dvfs_mode = 0;
ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n",
fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size);
ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n",
@ -195,6 +215,24 @@ static void ivpu_fw_release(struct ivpu_device *vdev)
release_firmware(vdev->fw->file);
}
/* Initialize workarounds that depend on FW version */
static void
ivpu_fw_init_wa(struct ivpu_device *vdev)
{
const struct vpu_firmware_header *fw_hdr = (const void *)vdev->fw->file->data;
if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, BOOT, 3, 17) ||
(ivpu_hw_gen(vdev) > IVPU_HW_37XX) ||
(ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_DISABLE))
vdev->wa.disable_d0i3_msg = true;
/* Force enable the feature for testing purposes */
if (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_ENABLE)
vdev->wa.disable_d0i3_msg = false;
IVPU_PRINT_WA(disable_d0i3_msg);
}
static int ivpu_fw_update_global_range(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
@ -248,7 +286,7 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
if (fw->shave_nn_size) {
fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.shave.start,
fw->shave_nn_size, DRM_IVPU_BO_UNCACHED);
fw->shave_nn_size, DRM_IVPU_BO_WC);
if (!fw->mem_shave_nn) {
ivpu_err(vdev, "Failed to allocate shavenn buffer\n");
ret = -ENOMEM;
@ -297,6 +335,8 @@ int ivpu_fw_init(struct ivpu_device *vdev)
if (ret)
goto err_fw_release;
ivpu_fw_init_wa(vdev);
ret = ivpu_fw_mem_init(vdev);
if (ret)
goto err_fw_release;
@ -422,14 +462,31 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
boot_params->punit_telemetry_sram_size);
ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_telemetry_enable = 0x%x\n",
boot_params->vpu_telemetry_enable);
ivpu_dbg(vdev, FW_BOOT, "boot_params.dvfs_mode = %u\n",
boot_params->dvfs_mode);
ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_delayed_entry = %d\n",
boot_params->d0i3_delayed_entry);
ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n",
boot_params->d0i3_residency_time_us);
ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
boot_params->d0i3_entry_vpu_ts);
}
void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
{
struct ivpu_bo *ipc_mem_rx = vdev->ipc->mem_rx;
/* In case of warm boot we only have to reset the entrypoint addr */
/* In case of warm boot only update variable params */
if (!ivpu_fw_is_cold_boot(vdev)) {
boot_params->d0i3_residency_time_us =
ktime_us_delta(ktime_get_boottime(), vdev->hw->d0i3_entry_host_ts);
boot_params->d0i3_entry_vpu_ts = vdev->hw->d0i3_entry_vpu_ts;
ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n",
boot_params->d0i3_residency_time_us);
ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
boot_params->d0i3_entry_vpu_ts);
boot_params->save_restore_ret_address = 0;
vdev->pm->is_warmboot = true;
wmb(); /* Flush WC buffers after writing save_restore_ret_address */
@ -442,6 +499,13 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number;
boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev);
/*
* This param is a debug firmware feature. It switches default clock
* to higher resolution one for fine-grained and more accurate firmware
* task profiling.
*/
boot_params->perf_clk_frequency = ivpu_hw_profiling_freq_get(vdev);
/*
* Uncached region of VPU address space, covers IPC buffers, job queues
* and log buffers, programmable to L2$ Uncached by VPU MTRR
@ -493,6 +557,11 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev);
boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev);
boot_params->dvfs_mode = vdev->fw->dvfs_mode;
if (!IVPU_WA(disable_d0i3_msg))
boot_params->d0i3_delayed_entry = 1;
boot_params->d0i3_residency_time_us = 0;
boot_params->d0i3_entry_vpu_ts = 0;
wmb(); /* Flush WC buffers after writing bootparams */

View File

@ -27,6 +27,7 @@ struct ivpu_fw_info {
u32 trace_level;
u32 trace_destination_mask;
u64 trace_hw_component_mask;
u32 dvfs_mode;
};
int ivpu_fw_init(struct ivpu_device *vdev);

View File

@ -20,215 +20,18 @@
#include "ivpu_mmu.h"
#include "ivpu_mmu_context.h"
MODULE_IMPORT_NS(DMA_BUF);
static const struct drm_gem_object_funcs ivpu_gem_funcs;
static struct lock_class_key prime_bo_lock_class_key;
static int __must_check prime_alloc_pages_locked(struct ivpu_bo *bo)
static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action)
{
/* Pages are managed by the underlying dma-buf */
return 0;
}
static void prime_free_pages_locked(struct ivpu_bo *bo)
{
/* Pages are managed by the underlying dma-buf */
}
static int prime_map_pages_locked(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
struct sg_table *sgt;
sgt = dma_buf_map_attachment_unlocked(bo->base.import_attach, DMA_BIDIRECTIONAL);
if (IS_ERR(sgt)) {
ivpu_err(vdev, "Failed to map attachment: %ld\n", PTR_ERR(sgt));
return PTR_ERR(sgt);
}
bo->sgt = sgt;
return 0;
}
static void prime_unmap_pages_locked(struct ivpu_bo *bo)
{
dma_buf_unmap_attachment_unlocked(bo->base.import_attach, bo->sgt, DMA_BIDIRECTIONAL);
bo->sgt = NULL;
}
static const struct ivpu_bo_ops prime_ops = {
.type = IVPU_BO_TYPE_PRIME,
.name = "prime",
.alloc_pages = prime_alloc_pages_locked,
.free_pages = prime_free_pages_locked,
.map_pages = prime_map_pages_locked,
.unmap_pages = prime_unmap_pages_locked,
};
static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
{
int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
struct page **pages;
pages = drm_gem_get_pages(&bo->base);
if (IS_ERR(pages))
return PTR_ERR(pages);
if (bo->flags & DRM_IVPU_BO_WC)
set_pages_array_wc(pages, npages);
else if (bo->flags & DRM_IVPU_BO_UNCACHED)
set_pages_array_uc(pages, npages);
bo->pages = pages;
return 0;
}
static void shmem_free_pages_locked(struct ivpu_bo *bo)
{
if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
drm_gem_put_pages(&bo->base, bo->pages, true, false);
bo->pages = NULL;
}
static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo)
{
int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
struct sg_table *sgt;
int ret;
sgt = drm_prime_pages_to_sg(&vdev->drm, bo->pages, npages);
if (IS_ERR(sgt)) {
ivpu_err(vdev, "Failed to allocate sgtable\n");
return PTR_ERR(sgt);
}
ret = dma_map_sgtable(vdev->drm.dev, sgt, DMA_BIDIRECTIONAL, 0);
if (ret) {
ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
goto err_free_sgt;
}
bo->sgt = sgt;
return 0;
err_free_sgt:
kfree(sgt);
return ret;
}
static void ivpu_bo_unmap_pages_locked(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
dma_unmap_sgtable(vdev->drm.dev, bo->sgt, DMA_BIDIRECTIONAL, 0);
sg_free_table(bo->sgt);
kfree(bo->sgt);
bo->sgt = NULL;
}
static const struct ivpu_bo_ops shmem_ops = {
.type = IVPU_BO_TYPE_SHMEM,
.name = "shmem",
.alloc_pages = shmem_alloc_pages_locked,
.free_pages = shmem_free_pages_locked,
.map_pages = ivpu_bo_map_pages_locked,
.unmap_pages = ivpu_bo_unmap_pages_locked,
};
static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
{
unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
struct page **pages;
int ret;
pages = kvmalloc_array(npages, sizeof(*bo->pages), GFP_KERNEL);
if (!pages)
return -ENOMEM;
for (i = 0; i < npages; i++) {
pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
if (!pages[i]) {
ret = -ENOMEM;
goto err_free_pages;
}
cond_resched();
}
bo->pages = pages;
return 0;
err_free_pages:
while (i--)
put_page(pages[i]);
kvfree(pages);
return ret;
}
static void internal_free_pages_locked(struct ivpu_bo *bo)
{
unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
for (i = 0; i < npages; i++)
put_page(bo->pages[i]);
kvfree(bo->pages);
bo->pages = NULL;
}
static const struct ivpu_bo_ops internal_ops = {
.type = IVPU_BO_TYPE_INTERNAL,
.name = "internal",
.alloc_pages = internal_alloc_pages_locked,
.free_pages = internal_free_pages_locked,
.map_pages = ivpu_bo_map_pages_locked,
.unmap_pages = ivpu_bo_unmap_pages_locked,
};
static int __must_check ivpu_bo_alloc_and_map_pages_locked(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
int ret;
lockdep_assert_held(&bo->lock);
drm_WARN_ON(&vdev->drm, bo->sgt);
ret = bo->ops->alloc_pages(bo);
if (ret) {
ivpu_err(vdev, "Failed to allocate pages for BO: %d", ret);
return ret;
}
ret = bo->ops->map_pages(bo);
if (ret) {
ivpu_err(vdev, "Failed to map pages for BO: %d", ret);
goto err_free_pages;
}
return ret;
err_free_pages:
bo->ops->free_pages(bo);
return ret;
}
static void ivpu_bo_unmap_and_free_pages(struct ivpu_bo *bo)
{
mutex_lock(&bo->lock);
WARN_ON(!bo->sgt);
bo->ops->unmap_pages(bo);
WARN_ON(bo->sgt);
bo->ops->free_pages(bo);
WARN_ON(bo->pages);
mutex_unlock(&bo->lock);
if (bo->ctx)
ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u ctx %d vpu_addr 0x%llx mmu_mapped %d\n",
action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt,
bo->handle, bo->ctx->id, bo->vpu_addr, bo->mmu_mapped);
else
ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u (not added to context)\n",
action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt,
bo->handle);
}
/*
@ -245,21 +48,24 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
mutex_lock(&bo->lock);
if (!bo->vpu_addr) {
ivpu_err(vdev, "vpu_addr not set for BO ctx_id: %d handle: %d\n",
bo->ctx->id, bo->handle);
ivpu_dbg_bo(vdev, bo, "pin");
if (!bo->ctx) {
ivpu_err(vdev, "vpu_addr not allocated for BO %d\n", bo->handle);
ret = -EINVAL;
goto unlock;
}
if (!bo->sgt) {
ret = ivpu_bo_alloc_and_map_pages_locked(bo);
if (ret)
goto unlock;
}
if (!bo->mmu_mapped) {
ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt,
struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
if (IS_ERR(sgt)) {
ret = PTR_ERR(sgt);
ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
goto unlock;
}
ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt,
ivpu_bo_is_snooped(bo));
if (ret) {
ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret);
@ -281,248 +87,213 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
int ret;
if (!range) {
if (bo->flags & DRM_IVPU_BO_SHAVE_MEM)
range = &vdev->hw->ranges.shave;
else if (bo->flags & DRM_IVPU_BO_DMA_MEM)
range = &vdev->hw->ranges.dma;
else
range = &vdev->hw->ranges.user;
}
mutex_lock(&bo->lock);
mutex_lock(&ctx->lock);
ret = ivpu_mmu_context_insert_node_locked(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
ret = ivpu_mmu_context_insert_node(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
if (!ret) {
bo->ctx = ctx;
bo->vpu_addr = bo->mm_node.start;
list_add_tail(&bo->ctx_node, &ctx->bo_list);
} else {
ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret);
}
mutex_unlock(&ctx->lock);
ivpu_dbg_bo(vdev, bo, "alloc");
mutex_unlock(&bo->lock);
return ret;
}
static void ivpu_bo_free_vpu_addr(struct ivpu_bo *bo)
static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
struct ivpu_mmu_context *ctx = bo->ctx;
ivpu_dbg(vdev, BO, "remove from ctx: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
lockdep_assert_held(&bo->lock);
mutex_lock(&bo->lock);
ivpu_dbg_bo(vdev, bo, "unbind");
/* TODO: dma_unmap */
if (bo->mmu_mapped) {
drm_WARN_ON(&vdev->drm, !bo->sgt);
ivpu_mmu_context_unmap_sgt(vdev, ctx, bo->vpu_addr, bo->sgt);
drm_WARN_ON(&vdev->drm, !bo->ctx);
drm_WARN_ON(&vdev->drm, !bo->vpu_addr);
drm_WARN_ON(&vdev->drm, !bo->base.sgt);
ivpu_mmu_context_unmap_sgt(vdev, bo->ctx, bo->vpu_addr, bo->base.sgt);
bo->mmu_mapped = false;
}
mutex_lock(&ctx->lock);
list_del(&bo->ctx_node);
bo->vpu_addr = 0;
bo->ctx = NULL;
ivpu_mmu_context_remove_node_locked(ctx, &bo->mm_node);
mutex_unlock(&ctx->lock);
if (bo->ctx) {
ivpu_mmu_context_remove_node(bo->ctx, &bo->mm_node);
bo->vpu_addr = 0;
bo->ctx = NULL;
}
}
static void ivpu_bo_unbind(struct ivpu_bo *bo)
{
mutex_lock(&bo->lock);
ivpu_bo_unbind_locked(bo);
mutex_unlock(&bo->lock);
}
void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx)
void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
{
struct ivpu_bo *bo, *tmp;
struct ivpu_bo *bo;
list_for_each_entry_safe(bo, tmp, &ctx->bo_list, ctx_node)
ivpu_bo_free_vpu_addr(bo);
if (drm_WARN_ON(&vdev->drm, !ctx))
return;
mutex_lock(&vdev->bo_list_lock);
list_for_each_entry(bo, &vdev->bo_list, bo_list_node) {
mutex_lock(&bo->lock);
if (bo->ctx == ctx)
ivpu_bo_unbind_locked(bo);
mutex_unlock(&bo->lock);
}
mutex_unlock(&vdev->bo_list_lock);
}
struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size)
{
struct ivpu_bo *bo;
if (size == 0 || !PAGE_ALIGNED(size))
return ERR_PTR(-EINVAL);
bo = kzalloc(sizeof(*bo), GFP_KERNEL);
if (!bo)
return ERR_PTR(-ENOMEM);
bo->base.base.funcs = &ivpu_gem_funcs;
bo->base.pages_mark_dirty_on_put = true; /* VPU can dirty a BO anytime */
INIT_LIST_HEAD(&bo->bo_list_node);
mutex_init(&bo->lock);
return &bo->base.base;
}
static struct ivpu_bo *
ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context,
u64 size, u32 flags, const struct ivpu_bo_ops *ops,
const struct ivpu_addr_range *range, u64 user_ptr)
ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags)
{
struct drm_gem_shmem_object *shmem;
struct ivpu_bo *bo;
int ret = 0;
if (drm_WARN_ON(&vdev->drm, size == 0 || !PAGE_ALIGNED(size)))
return ERR_PTR(-EINVAL);
switch (flags & DRM_IVPU_BO_CACHE_MASK) {
case DRM_IVPU_BO_CACHED:
case DRM_IVPU_BO_UNCACHED:
case DRM_IVPU_BO_WC:
break;
default:
return ERR_PTR(-EINVAL);
}
bo = kzalloc(sizeof(*bo), GFP_KERNEL);
if (!bo)
return ERR_PTR(-ENOMEM);
shmem = drm_gem_shmem_create(&vdev->drm, size);
if (IS_ERR(shmem))
return ERR_CAST(shmem);
mutex_init(&bo->lock);
bo->base.funcs = &ivpu_gem_funcs;
bo = to_ivpu_bo(&shmem->base);
bo->base.map_wc = flags & DRM_IVPU_BO_WC;
bo->flags = flags;
bo->ops = ops;
bo->user_ptr = user_ptr;
if (ops->type == IVPU_BO_TYPE_SHMEM)
ret = drm_gem_object_init(&vdev->drm, &bo->base, size);
else
drm_gem_private_object_init(&vdev->drm, &bo->base, size);
mutex_lock(&vdev->bo_list_lock);
list_add_tail(&bo->bo_list_node, &vdev->bo_list);
mutex_unlock(&vdev->bo_list_lock);
if (ret) {
ivpu_err(vdev, "Failed to initialize drm object\n");
goto err_free;
}
if (flags & DRM_IVPU_BO_MAPPABLE) {
ret = drm_gem_create_mmap_offset(&bo->base);
if (ret) {
ivpu_err(vdev, "Failed to allocate mmap offset\n");
goto err_release;
}
}
if (mmu_context) {
ret = ivpu_bo_alloc_vpu_addr(bo, mmu_context, range);
if (ret) {
ivpu_err(vdev, "Failed to add BO to context: %d\n", ret);
goto err_release;
}
}
ivpu_dbg(vdev, BO, "create: vpu_addr 0x%llx size %zu flags 0x%x\n",
bo->vpu_addr, bo->base.base.size, flags);
return bo;
}
err_release:
drm_gem_object_release(&bo->base);
err_free:
kfree(bo);
return ERR_PTR(ret);
static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_bo *bo = to_ivpu_bo(obj);
struct ivpu_addr_range *range;
if (bo->flags & DRM_IVPU_BO_SHAVE_MEM)
range = &vdev->hw->ranges.shave;
else if (bo->flags & DRM_IVPU_BO_DMA_MEM)
range = &vdev->hw->ranges.dma;
else
range = &vdev->hw->ranges.user;
return ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, range);
}
static void ivpu_bo_free(struct drm_gem_object *obj)
{
struct ivpu_device *vdev = to_ivpu_device(obj->dev);
struct ivpu_bo *bo = to_ivpu_bo(obj);
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
if (bo->ctx)
ivpu_dbg(vdev, BO, "free: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
bo->ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
else
ivpu_dbg(vdev, BO, "free: ctx (released) allocated %d mmu_mapped %d\n",
(bool)bo->sgt, bo->mmu_mapped);
mutex_lock(&vdev->bo_list_lock);
list_del(&bo->bo_list_node);
mutex_unlock(&vdev->bo_list_lock);
drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
vunmap(bo->kvaddr);
if (bo->ctx)
ivpu_bo_free_vpu_addr(bo);
if (bo->sgt)
ivpu_bo_unmap_and_free_pages(bo);
if (bo->base.import_attach)
drm_prime_gem_destroy(&bo->base, bo->sgt);
drm_gem_object_release(&bo->base);
ivpu_dbg_bo(vdev, bo, "free");
ivpu_bo_unbind(bo);
mutex_destroy(&bo->lock);
kfree(bo);
drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1);
drm_gem_shmem_free(&bo->base);
}
static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
{
struct ivpu_bo *bo = to_ivpu_bo(obj);
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s",
bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo), bo->ops->name);
if (obj->import_attach) {
/* Drop the reference drm_gem_mmap_obj() acquired.*/
drm_gem_object_put(obj);
vma->vm_private_data = NULL;
return dma_buf_mmap(obj->dma_buf, vma, 0);
}
vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND);
vma->vm_page_prot = ivpu_bo_pgprot(bo, vm_get_page_prot(vma->vm_flags));
return 0;
}
static struct sg_table *ivpu_bo_get_sg_table(struct drm_gem_object *obj)
{
struct ivpu_bo *bo = to_ivpu_bo(obj);
loff_t npages = obj->size >> PAGE_SHIFT;
int ret = 0;
mutex_lock(&bo->lock);
if (!bo->sgt)
ret = ivpu_bo_alloc_and_map_pages_locked(bo);
mutex_unlock(&bo->lock);
if (ret)
return ERR_PTR(ret);
return drm_prime_pages_to_sg(obj->dev, bo->pages, npages);
}
static vm_fault_t ivpu_vm_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct drm_gem_object *obj = vma->vm_private_data;
struct ivpu_bo *bo = to_ivpu_bo(obj);
loff_t npages = obj->size >> PAGE_SHIFT;
pgoff_t page_offset;
struct page *page;
vm_fault_t ret;
int err;
mutex_lock(&bo->lock);
if (!bo->sgt) {
err = ivpu_bo_alloc_and_map_pages_locked(bo);
if (err) {
ret = vmf_error(err);
goto unlock;
}
}
/* We don't use vmf->pgoff since that has the fake offset */
page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
if (page_offset >= npages) {
ret = VM_FAULT_SIGBUS;
} else {
page = bo->pages[page_offset];
ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
}
unlock:
mutex_unlock(&bo->lock);
return ret;
}
static const struct vm_operations_struct ivpu_vm_ops = {
.fault = ivpu_vm_fault,
.open = drm_gem_vm_open,
.close = drm_gem_vm_close,
static const struct dma_buf_ops ivpu_bo_dmabuf_ops = {
.cache_sgt_mapping = true,
.attach = drm_gem_map_attach,
.detach = drm_gem_map_detach,
.map_dma_buf = drm_gem_map_dma_buf,
.unmap_dma_buf = drm_gem_unmap_dma_buf,
.release = drm_gem_dmabuf_release,
.mmap = drm_gem_dmabuf_mmap,
.vmap = drm_gem_dmabuf_vmap,
.vunmap = drm_gem_dmabuf_vunmap,
};
static struct dma_buf *ivpu_bo_export(struct drm_gem_object *obj, int flags)
{
struct drm_device *dev = obj->dev;
struct dma_buf_export_info exp_info = {
.exp_name = KBUILD_MODNAME,
.owner = dev->driver->fops->owner,
.ops = &ivpu_bo_dmabuf_ops,
.size = obj->size,
.flags = flags,
.priv = obj,
.resv = obj->resv,
};
void *sgt;
/*
* Make sure that pages are allocated and dma-mapped before exporting the bo.
* DMA-mapping is required if the bo will be imported to the same device.
*/
sgt = drm_gem_shmem_get_pages_sgt(to_drm_gem_shmem_obj(obj));
if (IS_ERR(sgt))
return sgt;
return drm_gem_dmabuf_export(dev, &exp_info);
}
static const struct drm_gem_object_funcs ivpu_gem_funcs = {
.free = ivpu_bo_free,
.mmap = ivpu_bo_mmap,
.vm_ops = &ivpu_vm_ops,
.get_sg_table = ivpu_bo_get_sg_table,
.open = ivpu_bo_open,
.export = ivpu_bo_export,
.print_info = drm_gem_shmem_object_print_info,
.pin = drm_gem_shmem_object_pin,
.unpin = drm_gem_shmem_object_unpin,
.get_sg_table = drm_gem_shmem_object_get_sg_table,
.vmap = drm_gem_shmem_object_vmap,
.vunmap = drm_gem_shmem_object_vunmap,
.mmap = drm_gem_shmem_object_mmap,
.vm_ops = &drm_gem_shmem_vm_ops,
};
int
ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
struct ivpu_device *vdev = file_priv->vdev;
@ -537,23 +308,20 @@ ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (size == 0)
return -EINVAL;
bo = ivpu_bo_alloc(vdev, &file_priv->ctx, size, args->flags, &shmem_ops, NULL, 0);
bo = ivpu_bo_create(vdev, size, args->flags);
if (IS_ERR(bo)) {
ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)",
bo, file_priv->ctx.id, args->size, args->flags);
return PTR_ERR(bo);
}
ret = drm_gem_handle_create(file, &bo->base, &bo->handle);
ret = drm_gem_handle_create(file, &bo->base.base, &bo->handle);
if (!ret) {
args->vpu_addr = bo->vpu_addr;
args->handle = bo->handle;
}
drm_gem_object_put(&bo->base);
ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n",
file_priv->ctx.id, bo->vpu_addr, ivpu_bo_size(bo), bo->flags);
drm_gem_object_put(&bo->base.base);
return ret;
}
@ -563,8 +331,8 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
{
const struct ivpu_addr_range *range;
struct ivpu_addr_range fixed_range;
struct iosys_map map;
struct ivpu_bo *bo;
pgprot_t prot;
int ret;
drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr));
@ -578,81 +346,42 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
range = &vdev->hw->ranges.global;
}
bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0);
bo = ivpu_bo_create(vdev, size, flags);
if (IS_ERR(bo)) {
ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
bo, vpu_addr, size, flags);
return NULL;
}
ret = ivpu_bo_alloc_vpu_addr(bo, &vdev->gctx, range);
if (ret)
goto err_put;
ret = ivpu_bo_pin(bo);
if (ret)
goto err_put;
if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
drm_clflush_pages(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
if (bo->flags & DRM_IVPU_BO_WC)
set_pages_array_wc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
else if (bo->flags & DRM_IVPU_BO_UNCACHED)
set_pages_array_uc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
bo->kvaddr = vmap(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT, VM_MAP, prot);
if (!bo->kvaddr) {
ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n");
ret = drm_gem_shmem_vmap(&bo->base, &map);
if (ret)
goto err_put;
}
ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n",
bo->vpu_addr, ivpu_bo_size(bo), flags);
return bo;
err_put:
drm_gem_object_put(&bo->base);
drm_gem_object_put(&bo->base.base);
return NULL;
}
void ivpu_bo_free_internal(struct ivpu_bo *bo)
{
drm_gem_object_put(&bo->base);
}
struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr);
struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *buf)
{
struct ivpu_device *vdev = to_ivpu_device(dev);
struct dma_buf_attachment *attach;
struct ivpu_bo *bo;
attach = dma_buf_attach(buf, dev->dev);
if (IS_ERR(attach))
return ERR_CAST(attach);
get_dma_buf(buf);
bo = ivpu_bo_alloc(vdev, NULL, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops, NULL, 0);
if (IS_ERR(bo)) {
ivpu_err(vdev, "Failed to import BO: %pe (size %lu)", bo, buf->size);
goto err_detach;
}
lockdep_set_class(&bo->lock, &prime_bo_lock_class_key);
bo->base.import_attach = attach;
return &bo->base;
err_detach:
dma_buf_detach(buf, attach);
dma_buf_put(buf);
return ERR_CAST(bo);
drm_gem_shmem_vunmap(&bo->base, &map);
drm_gem_object_put(&bo->base.base);
}
int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
struct ivpu_device *vdev = to_ivpu_device(dev);
struct drm_ivpu_bo_info *args = data;
struct drm_gem_object *obj;
struct ivpu_bo *bo;
@ -665,21 +394,12 @@ int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file
bo = to_ivpu_bo(obj);
mutex_lock(&bo->lock);
if (!bo->ctx) {
ret = ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, NULL);
if (ret) {
ivpu_err(vdev, "Failed to allocate vpu_addr: %d\n", ret);
goto unlock;
}
}
args->flags = bo->flags;
args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node);
args->vpu_addr = bo->vpu_addr;
args->size = obj->size;
unlock:
mutex_unlock(&bo->lock);
drm_gem_object_put(obj);
return ret;
}
@ -714,41 +434,41 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
{
unsigned long dma_refcount = 0;
if (bo->base.dma_buf && bo->base.dma_buf->file)
dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count);
mutex_lock(&bo->lock);
drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n",
bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo),
kref_read(&bo->base.refcount), dma_refcount, bo->ops->name);
if (bo->base.base.dma_buf && bo->base.base.dma_buf->file)
dma_refcount = atomic_long_read(&bo->base.base.dma_buf->file->f_count);
drm_printf(p, "%-3u %-6d 0x%-12llx %-10lu 0x%-8x %-4u %-8lu",
bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.base.size,
bo->flags, kref_read(&bo->base.base.refcount), dma_refcount);
if (bo->base.base.import_attach)
drm_printf(p, " imported");
if (bo->base.pages)
drm_printf(p, " has_pages");
if (bo->mmu_mapped)
drm_printf(p, " mmu_mapped");
drm_printf(p, "\n");
mutex_unlock(&bo->lock);
}
void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
{
struct ivpu_device *vdev = to_ivpu_device(dev);
struct ivpu_file_priv *file_priv;
unsigned long ctx_id;
struct ivpu_bo *bo;
drm_printf(p, "%5s %6s %16s %10s %10s %12s %14s\n",
"ctx", "handle", "vpu_addr", "size", "refcount", "dma_refcount", "type");
drm_printf(p, "%-3s %-6s %-14s %-10s %-10s %-4s %-8s %s\n",
"ctx", "handle", "vpu_addr", "size", "flags", "refs", "dma_refs", "attribs");
mutex_lock(&vdev->gctx.lock);
list_for_each_entry(bo, &vdev->gctx.bo_list, ctx_node)
mutex_lock(&vdev->bo_list_lock);
list_for_each_entry(bo, &vdev->bo_list, bo_list_node)
ivpu_bo_print_info(bo, p);
mutex_unlock(&vdev->gctx.lock);
xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id);
if (!file_priv)
continue;
mutex_lock(&file_priv->ctx.lock);
list_for_each_entry(bo, &file_priv->ctx.bo_list, ctx_node)
ivpu_bo_print_info(bo, p);
mutex_unlock(&file_priv->ctx.lock);
ivpu_file_priv_put(&file_priv);
}
mutex_unlock(&vdev->bo_list_lock);
}
void ivpu_bo_list_print(struct drm_device *dev)

View File

@ -6,84 +6,52 @@
#define __IVPU_GEM_H__
#include <drm/drm_gem.h>
#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_mm.h>
struct dma_buf;
struct ivpu_bo_ops;
struct ivpu_file_priv;
struct ivpu_bo {
struct drm_gem_object base;
const struct ivpu_bo_ops *ops;
struct drm_gem_shmem_object base;
struct ivpu_mmu_context *ctx;
struct list_head ctx_node;
struct list_head bo_list_node;
struct drm_mm_node mm_node;
struct mutex lock; /* Protects: pages, sgt, mmu_mapped */
struct sg_table *sgt;
struct page **pages;
bool mmu_mapped;
void *kvaddr;
struct mutex lock; /* Protects: ctx, mmu_mapped, vpu_addr */
u64 vpu_addr;
u32 handle;
u32 flags;
uintptr_t user_ptr;
u32 job_status;
};
enum ivpu_bo_type {
IVPU_BO_TYPE_SHMEM = 1,
IVPU_BO_TYPE_INTERNAL,
IVPU_BO_TYPE_PRIME,
};
struct ivpu_bo_ops {
enum ivpu_bo_type type;
const char *name;
int (*alloc_pages)(struct ivpu_bo *bo);
void (*free_pages)(struct ivpu_bo *bo);
int (*map_pages)(struct ivpu_bo *bo);
void (*unmap_pages)(struct ivpu_bo *bo);
u32 job_status; /* Valid only for command buffer */
bool mmu_mapped;
};
int ivpu_bo_pin(struct ivpu_bo *bo);
void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx);
void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
void ivpu_bo_list_print(struct drm_device *dev);
void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
struct ivpu_bo *
ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size);
struct ivpu_bo *ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
void ivpu_bo_free_internal(struct ivpu_bo *bo);
struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
void ivpu_bo_unmap_sgt_and_remove_from_context(struct ivpu_bo *bo);
int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
void ivpu_bo_list_print(struct drm_device *dev);
static inline struct ivpu_bo *to_ivpu_bo(struct drm_gem_object *obj)
{
return container_of(obj, struct ivpu_bo, base);
return container_of(obj, struct ivpu_bo, base.base);
}
static inline void *ivpu_bo_vaddr(struct ivpu_bo *bo)
{
return bo->kvaddr;
return bo->base.vaddr;
}
static inline size_t ivpu_bo_size(struct ivpu_bo *bo)
{
return bo->base.size;
}
static inline struct page *ivpu_bo_get_page(struct ivpu_bo *bo, u64 offset)
{
if (offset > ivpu_bo_size(bo) || !bo->pages)
return NULL;
return bo->pages[offset / PAGE_SIZE];
return bo->base.base.size;
}
static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo)
@ -96,20 +64,9 @@ static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo)
return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED;
}
static inline pgprot_t ivpu_bo_pgprot(struct ivpu_bo *bo, pgprot_t prot)
{
if (bo->flags & DRM_IVPU_BO_WC)
return pgprot_writecombine(prot);
if (bo->flags & DRM_IVPU_BO_UNCACHED)
return pgprot_noncached(prot);
return prot;
}
static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo)
{
return to_ivpu_device(bo->base.dev);
return to_ivpu_device(bo->base.base.dev);
}
static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr)

View File

@ -15,8 +15,11 @@ struct ivpu_hw_ops {
int (*power_down)(struct ivpu_device *vdev);
int (*reset)(struct ivpu_device *vdev);
bool (*is_idle)(struct ivpu_device *vdev);
int (*wait_for_idle)(struct ivpu_device *vdev);
void (*wdt_disable)(struct ivpu_device *vdev);
void (*diagnose_failure)(struct ivpu_device *vdev);
u32 (*profiling_freq_get)(struct ivpu_device *vdev);
void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable);
u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev);
u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev);
@ -58,6 +61,8 @@ struct ivpu_hw_info {
u32 sku;
u16 config;
int dma_bits;
ktime_t d0i3_entry_host_ts;
u64 d0i3_entry_vpu_ts;
};
extern const struct ivpu_hw_ops ivpu_hw_37xx_ops;
@ -85,6 +90,11 @@ static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev)
return vdev->hw->ops->is_idle(vdev);
};
static inline int ivpu_hw_wait_for_idle(struct ivpu_device *vdev)
{
return vdev->hw->ops->wait_for_idle(vdev);
};
static inline int ivpu_hw_power_down(struct ivpu_device *vdev)
{
ivpu_dbg(vdev, PM, "HW power down\n");
@ -104,6 +114,16 @@ static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev)
vdev->hw->ops->wdt_disable(vdev);
};
static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)
{
return vdev->hw->ops->profiling_freq_get(vdev);
};
static inline void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
{
return vdev->hw->ops->profiling_freq_drive(vdev, enable);
};
/* Register indirect accesses */
static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev)
{

View File

@ -29,6 +29,7 @@
#define PLL_REF_CLK_FREQ (50 * 1000000)
#define PLL_SIMULATION_FREQ (10 * 1000000)
#define PLL_PROF_CLK_FREQ (38400 * 1000)
#define PLL_DEFAULT_EPP_VALUE 0x80
#define TIM_SAFE_ENABLE 0xf1d0dead
@ -37,7 +38,7 @@
#define TIMEOUT_US (150 * USEC_PER_MSEC)
#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC)
#define IDLE_TIMEOUT_US (500 * USEC_PER_MSEC)
#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC)
#define ICB_0_IRQ_MASK ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \
(REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \
@ -96,6 +97,7 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
vdev->timeout.tdr = 2000;
vdev->timeout.reschedule_suspend = 10;
vdev->timeout.autosuspend = 10;
vdev->timeout.d0i3_entry_msg = 5;
}
static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev)
@ -722,10 +724,23 @@ static bool ivpu_hw_37xx_is_idle(struct ivpu_device *vdev)
REG_TEST_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, val);
}
static int ivpu_hw_37xx_wait_for_idle(struct ivpu_device *vdev)
{
return REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
}
static void ivpu_hw_37xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev)
{
vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT);
}
static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev)
{
int ret = 0;
ivpu_hw_37xx_save_d0i3_entry_timestamp(vdev);
if (!ivpu_hw_37xx_is_idle(vdev))
ivpu_warn(vdev, "VPU not idle during power down\n");
@ -760,6 +775,16 @@ static void ivpu_hw_37xx_wdt_disable(struct ivpu_device *vdev)
REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val);
}
static u32 ivpu_hw_37xx_profiling_freq_get(struct ivpu_device *vdev)
{
return PLL_PROF_CLK_FREQ;
}
static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
{
/* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */
}
static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config)
{
u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
@ -871,17 +896,20 @@ static void ivpu_hw_37xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
}
/* Handler for IRQs from VPU core (irqV) */
static u32 ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq)
static bool ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread)
{
u32 status = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
if (!status)
return false;
REGV_WR32(VPU_37XX_HOST_SS_ICB_CLEAR_0, status);
if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status))
ivpu_mmu_irq_evtq_handler(vdev);
if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
ivpu_ipc_irq_handler(vdev);
ivpu_ipc_irq_handler(vdev, wake_thread);
if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
@ -898,17 +926,17 @@ static u32 ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq)
if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
ivpu_hw_37xx_irq_noc_firewall_handler(vdev);
return status;
return true;
}
/* Handler for IRQs from Buttress core (irqB) */
static u32 ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
static bool ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
{
u32 status = REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
bool schedule_recovery = false;
if (status == 0)
return 0;
if (!status)
return false;
if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x",
@ -944,23 +972,27 @@ static u32 ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
if (schedule_recovery)
ivpu_pm_schedule_recovery(vdev);
return status;
return true;
}
static irqreturn_t ivpu_hw_37xx_irq_handler(int irq, void *ptr)
{
struct ivpu_device *vdev = ptr;
u32 ret_irqv, ret_irqb;
bool irqv_handled, irqb_handled, wake_thread = false;
REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
ret_irqv = ivpu_hw_37xx_irqv_handler(vdev, irq);
ret_irqb = ivpu_hw_37xx_irqb_handler(vdev, irq);
irqv_handled = ivpu_hw_37xx_irqv_handler(vdev, irq, &wake_thread);
irqb_handled = ivpu_hw_37xx_irqb_handler(vdev, irq);
/* Re-enable global interrupts to re-trigger MSI for pending interrupts */
REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
return IRQ_RETVAL(ret_irqb | ret_irqv);
if (wake_thread)
return IRQ_WAKE_THREAD;
if (irqv_handled || irqb_handled)
return IRQ_HANDLED;
return IRQ_NONE;
}
static void ivpu_hw_37xx_diagnose_failure(struct ivpu_device *vdev)
@ -997,11 +1029,14 @@ const struct ivpu_hw_ops ivpu_hw_37xx_ops = {
.info_init = ivpu_hw_37xx_info_init,
.power_up = ivpu_hw_37xx_power_up,
.is_idle = ivpu_hw_37xx_is_idle,
.wait_for_idle = ivpu_hw_37xx_wait_for_idle,
.power_down = ivpu_hw_37xx_power_down,
.reset = ivpu_hw_37xx_reset,
.boot_fw = ivpu_hw_37xx_boot_fw,
.wdt_disable = ivpu_hw_37xx_wdt_disable,
.diagnose_failure = ivpu_hw_37xx_diagnose_failure,
.profiling_freq_get = ivpu_hw_37xx_profiling_freq_get,
.profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive,
.reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get,
.reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get,
.reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get,

View File

@ -240,6 +240,8 @@
#define VPU_37XX_CPU_SS_TIM_GEN_CONFIG 0x06021008u
#define VPU_37XX_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK BIT_MASK(9)
#define VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT 0x06029000u
#define VPU_37XX_CPU_SS_DOORBELL_0 0x06300000u
#define VPU_37XX_CPU_SS_DOORBELL_0_SET_MASK BIT_MASK(0)

View File

@ -39,6 +39,7 @@
#define TIMEOUT_US (150 * USEC_PER_MSEC)
#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC)
#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC)
#define WEIGHTS_DEFAULT 0xf711f711u
#define WEIGHTS_ATS_DEFAULT 0x0000f711u
@ -139,18 +140,21 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
vdev->timeout.tdr = 2000000;
vdev->timeout.reschedule_suspend = 1000;
vdev->timeout.autosuspend = -1;
vdev->timeout.d0i3_entry_msg = 500;
} else if (ivpu_is_simics(vdev)) {
vdev->timeout.boot = 50;
vdev->timeout.jsm = 500;
vdev->timeout.tdr = 10000;
vdev->timeout.reschedule_suspend = 10;
vdev->timeout.autosuspend = -1;
vdev->timeout.d0i3_entry_msg = 100;
} else {
vdev->timeout.boot = 1000;
vdev->timeout.jsm = 500;
vdev->timeout.tdr = 2000;
vdev->timeout.reschedule_suspend = 10;
vdev->timeout.autosuspend = 10;
vdev->timeout.d0i3_entry_msg = 5;
}
}
@ -824,12 +828,6 @@ static int ivpu_hw_40xx_power_up(struct ivpu_device *vdev)
{
int ret;
ret = ivpu_hw_40xx_reset(vdev);
if (ret) {
ivpu_err(vdev, "Failed to reset HW: %d\n", ret);
return ret;
}
ret = ivpu_hw_40xx_d0i3_disable(vdev);
if (ret)
ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
@ -898,10 +896,23 @@ static bool ivpu_hw_40xx_is_idle(struct ivpu_device *vdev)
REG_TEST_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, val);
}
static int ivpu_hw_40xx_wait_for_idle(struct ivpu_device *vdev)
{
return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
}
static void ivpu_hw_40xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev)
{
vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_40XX_CPU_SS_TIM_PERF_EXT_FREE_CNT);
}
static int ivpu_hw_40xx_power_down(struct ivpu_device *vdev)
{
int ret = 0;
ivpu_hw_40xx_save_d0i3_entry_timestamp(vdev);
if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_reset(vdev))
ivpu_warn(vdev, "Failed to reset the VPU\n");
@ -933,6 +944,19 @@ static void ivpu_hw_40xx_wdt_disable(struct ivpu_device *vdev)
REGV_WR32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, val);
}
static u32 ivpu_hw_40xx_profiling_freq_get(struct ivpu_device *vdev)
{
return vdev->hw->pll.profiling_freq;
}
static void ivpu_hw_40xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
{
if (enable)
vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_HIGH;
else
vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
}
/* Register indirect accesses */
static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev)
{
@ -1023,13 +1047,12 @@ static void ivpu_hw_40xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
}
/* Handler for IRQs from VPU core (irqV) */
static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
static bool ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread)
{
u32 status = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
irqreturn_t ret = IRQ_NONE;
if (!status)
return IRQ_NONE;
return false;
REGV_WR32(VPU_40XX_HOST_SS_ICB_CLEAR_0, status);
@ -1037,7 +1060,7 @@ static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
ivpu_mmu_irq_evtq_handler(vdev);
if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
ret |= ivpu_ipc_irq_handler(vdev);
ivpu_ipc_irq_handler(vdev, wake_thread);
if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
@ -1054,17 +1077,17 @@ static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
ivpu_hw_40xx_irq_noc_firewall_handler(vdev);
return ret;
return true;
}
/* Handler for IRQs from Buttress core (irqB) */
static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
static bool ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
{
bool schedule_recovery = false;
u32 status = REGB_RD32(VPU_40XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
if (status == 0)
return IRQ_NONE;
if (!status)
return false;
if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
ivpu_dbg(vdev, IRQ, "FREQ_CHANGE");
@ -1116,26 +1139,27 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
if (schedule_recovery)
ivpu_pm_schedule_recovery(vdev);
return IRQ_HANDLED;
return true;
}
static irqreturn_t ivpu_hw_40xx_irq_handler(int irq, void *ptr)
{
bool irqv_handled, irqb_handled, wake_thread = false;
struct ivpu_device *vdev = ptr;
irqreturn_t ret = IRQ_NONE;
REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
ret |= ivpu_hw_40xx_irqv_handler(vdev, irq);
ret |= ivpu_hw_40xx_irqb_handler(vdev, irq);
irqv_handled = ivpu_hw_40xx_irqv_handler(vdev, irq, &wake_thread);
irqb_handled = ivpu_hw_40xx_irqb_handler(vdev, irq);
/* Re-enable global interrupts to re-trigger MSI for pending interrupts */
REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
if (ret & IRQ_WAKE_THREAD)
if (wake_thread)
return IRQ_WAKE_THREAD;
return ret;
if (irqv_handled || irqb_handled)
return IRQ_HANDLED;
return IRQ_NONE;
}
static void ivpu_hw_40xx_diagnose_failure(struct ivpu_device *vdev)
@ -1185,11 +1209,14 @@ const struct ivpu_hw_ops ivpu_hw_40xx_ops = {
.info_init = ivpu_hw_40xx_info_init,
.power_up = ivpu_hw_40xx_power_up,
.is_idle = ivpu_hw_40xx_is_idle,
.wait_for_idle = ivpu_hw_40xx_wait_for_idle,
.power_down = ivpu_hw_40xx_power_down,
.reset = ivpu_hw_40xx_reset,
.boot_fw = ivpu_hw_40xx_boot_fw,
.wdt_disable = ivpu_hw_40xx_wdt_disable,
.diagnose_failure = ivpu_hw_40xx_diagnose_failure,
.profiling_freq_get = ivpu_hw_40xx_profiling_freq_get,
.profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive,
.reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get,
.reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get,
.reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get,

View File

@ -5,7 +5,7 @@
#include <linux/genalloc.h>
#include <linux/highmem.h>
#include <linux/kthread.h>
#include <linux/pm_runtime.h>
#include <linux/wait.h>
#include "ivpu_drv.h"
@ -17,19 +17,12 @@
#include "ivpu_pm.h"
#define IPC_MAX_RX_MSG 128
#define IS_KTHREAD() (get_current()->flags & PF_KTHREAD)
struct ivpu_ipc_tx_buf {
struct ivpu_ipc_hdr ipc;
struct vpu_jsm_msg jsm;
};
struct ivpu_ipc_rx_msg {
struct list_head link;
struct ivpu_ipc_hdr *ipc_hdr;
struct vpu_jsm_msg *jsm_msg;
};
static void ivpu_ipc_msg_dump(struct ivpu_device *vdev, char *c,
struct ivpu_ipc_hdr *ipc_hdr, u32 vpu_addr)
{
@ -139,8 +132,49 @@ static void ivpu_ipc_tx(struct ivpu_device *vdev, u32 vpu_addr)
ivpu_hw_reg_ipc_tx_set(vdev, vpu_addr);
}
void
ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 channel)
static void
ivpu_ipc_rx_msg_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_rx_msg *rx_msg;
lockdep_assert_held(&ipc->cons_lock);
lockdep_assert_irqs_disabled();
rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
if (!rx_msg) {
ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
return;
}
atomic_inc(&ipc->rx_msg_count);
rx_msg->ipc_hdr = ipc_hdr;
rx_msg->jsm_msg = jsm_msg;
rx_msg->callback = cons->rx_callback;
if (rx_msg->callback) {
list_add_tail(&rx_msg->link, &ipc->cb_msg_list);
} else {
spin_lock(&cons->rx_lock);
list_add_tail(&rx_msg->link, &cons->rx_msg_list);
spin_unlock(&cons->rx_lock);
wake_up(&cons->rx_msg_wq);
}
}
static void
ivpu_ipc_rx_msg_del(struct ivpu_device *vdev, struct ivpu_ipc_rx_msg *rx_msg)
{
list_del(&rx_msg->link);
ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
atomic_dec(&vdev->ipc->rx_msg_count);
kfree(rx_msg);
}
void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
u32 channel, ivpu_ipc_rx_callback_t rx_callback)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
@ -148,13 +182,15 @@ ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
cons->channel = channel;
cons->tx_vpu_addr = 0;
cons->request_id = 0;
spin_lock_init(&cons->rx_msg_lock);
cons->aborted = false;
cons->rx_callback = rx_callback;
spin_lock_init(&cons->rx_lock);
INIT_LIST_HEAD(&cons->rx_msg_list);
init_waitqueue_head(&cons->rx_msg_wq);
spin_lock_irq(&ipc->cons_list_lock);
spin_lock_irq(&ipc->cons_lock);
list_add_tail(&cons->link, &ipc->cons_list);
spin_unlock_irq(&ipc->cons_list_lock);
spin_unlock_irq(&ipc->cons_lock);
}
void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons)
@ -162,18 +198,14 @@ void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *c
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_rx_msg *rx_msg, *r;
spin_lock_irq(&ipc->cons_list_lock);
spin_lock_irq(&ipc->cons_lock);
list_del(&cons->link);
spin_unlock_irq(&ipc->cons_list_lock);
spin_unlock_irq(&ipc->cons_lock);
spin_lock_irq(&cons->rx_msg_lock);
list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) {
list_del(&rx_msg->link);
ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
atomic_dec(&ipc->rx_msg_count);
kfree(rx_msg);
}
spin_unlock_irq(&cons->rx_msg_lock);
spin_lock_irq(&cons->rx_lock);
list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link)
ivpu_ipc_rx_msg_del(vdev, rx_msg);
spin_unlock_irq(&cons->rx_lock);
ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr);
}
@ -202,52 +234,61 @@ unlock:
return ret;
}
static bool ivpu_ipc_rx_need_wakeup(struct ivpu_ipc_consumer *cons)
{
bool ret;
spin_lock_irq(&cons->rx_lock);
ret = !list_empty(&cons->rx_msg_list) || cons->aborted;
spin_unlock_irq(&cons->rx_lock);
return ret;
}
int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
struct ivpu_ipc_hdr *ipc_buf,
struct vpu_jsm_msg *ipc_payload, unsigned long timeout_ms)
struct vpu_jsm_msg *jsm_msg, unsigned long timeout_ms)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_rx_msg *rx_msg;
int wait_ret, ret = 0;
wait_ret = wait_event_timeout(cons->rx_msg_wq,
(IS_KTHREAD() && kthread_should_stop()) ||
!list_empty(&cons->rx_msg_list),
msecs_to_jiffies(timeout_ms));
if (drm_WARN_ONCE(&vdev->drm, cons->rx_callback, "Consumer works only in async mode\n"))
return -EINVAL;
if (IS_KTHREAD() && kthread_should_stop())
return -EINTR;
wait_ret = wait_event_timeout(cons->rx_msg_wq,
ivpu_ipc_rx_need_wakeup(cons),
msecs_to_jiffies(timeout_ms));
if (wait_ret == 0)
return -ETIMEDOUT;
spin_lock_irq(&cons->rx_msg_lock);
spin_lock_irq(&cons->rx_lock);
if (cons->aborted) {
spin_unlock_irq(&cons->rx_lock);
return -ECANCELED;
}
rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link);
if (!rx_msg) {
spin_unlock_irq(&cons->rx_msg_lock);
spin_unlock_irq(&cons->rx_lock);
return -EAGAIN;
}
list_del(&rx_msg->link);
spin_unlock_irq(&cons->rx_msg_lock);
if (ipc_buf)
memcpy(ipc_buf, rx_msg->ipc_hdr, sizeof(*ipc_buf));
if (rx_msg->jsm_msg) {
u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*ipc_payload));
u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*jsm_msg));
if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) {
ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result);
ret = -EBADMSG;
}
if (ipc_payload)
memcpy(ipc_payload, rx_msg->jsm_msg, size);
if (jsm_msg)
memcpy(jsm_msg, rx_msg->jsm_msg, size);
}
ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
atomic_dec(&ipc->rx_msg_count);
kfree(rx_msg);
ivpu_ipc_rx_msg_del(vdev, rx_msg);
spin_unlock_irq(&cons->rx_lock);
return ret;
}
@ -260,7 +301,7 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req
struct ivpu_ipc_consumer cons;
int ret;
ivpu_ipc_consumer_add(vdev, &cons, channel);
ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
ret = ivpu_ipc_send(vdev, &cons, req);
if (ret) {
@ -285,23 +326,19 @@ consumer_del:
return ret;
}
int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
enum vpu_ipc_msg_type expected_resp_type,
struct vpu_jsm_msg *resp, u32 channel,
unsigned long timeout_ms)
int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
u32 channel, unsigned long timeout_ms)
{
struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB };
struct vpu_jsm_msg hb_resp;
int ret, hb_ret;
ret = ivpu_rpm_get(vdev);
if (ret < 0)
return ret;
drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev));
ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp_type, resp,
channel, timeout_ms);
ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp, resp, channel, timeout_ms);
if (ret != -ETIMEDOUT)
goto rpm_put;
return ret;
hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE,
&hb_resp, VPU_IPC_CHAN_ASYNC_CMD,
@ -311,7 +348,21 @@ int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
ivpu_pm_schedule_recovery(vdev);
}
rpm_put:
return ret;
}
int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
u32 channel, unsigned long timeout_ms)
{
int ret;
ret = ivpu_rpm_get(vdev);
if (ret < 0)
return ret;
ret = ivpu_ipc_send_receive_active(vdev, req, expected_resp, resp, channel, timeout_ms);
ivpu_rpm_put(vdev);
return ret;
}
@ -329,35 +380,7 @@ ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons
return false;
}
static void
ivpu_ipc_dispatch(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_rx_msg *rx_msg;
unsigned long flags;
lockdep_assert_held(&ipc->cons_list_lock);
rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
if (!rx_msg) {
ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
return;
}
atomic_inc(&ipc->rx_msg_count);
rx_msg->ipc_hdr = ipc_hdr;
rx_msg->jsm_msg = jsm_msg;
spin_lock_irqsave(&cons->rx_msg_lock, flags);
list_add_tail(&rx_msg->link, &cons->rx_msg_list);
spin_unlock_irqrestore(&cons->rx_msg_lock, flags);
wake_up(&cons->rx_msg_wq);
}
int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_consumer *cons;
@ -375,7 +398,7 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev);
if (vpu_addr == REG_IO_ERROR) {
ivpu_err_ratelimited(vdev, "Failed to read IPC rx addr register\n");
return -EIO;
return;
}
ipc_hdr = ivpu_to_cpu_addr(ipc->mem_rx, vpu_addr);
@ -405,15 +428,15 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
}
dispatched = false;
spin_lock_irqsave(&ipc->cons_list_lock, flags);
spin_lock_irqsave(&ipc->cons_lock, flags);
list_for_each_entry(cons, &ipc->cons_list, link) {
if (ivpu_ipc_match_consumer(vdev, cons, ipc_hdr, jsm_msg)) {
ivpu_ipc_dispatch(vdev, cons, ipc_hdr, jsm_msg);
ivpu_ipc_rx_msg_add(vdev, cons, ipc_hdr, jsm_msg);
dispatched = true;
break;
}
}
spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
spin_unlock_irqrestore(&ipc->cons_lock, flags);
if (!dispatched) {
ivpu_dbg(vdev, IPC, "IPC RX msg 0x%x dropped (no consumer)\n", vpu_addr);
@ -421,7 +444,28 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
}
}
return 0;
if (wake_thread)
*wake_thread = !list_empty(&ipc->cb_msg_list);
}
irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_rx_msg *rx_msg, *r;
struct list_head cb_msg_list;
INIT_LIST_HEAD(&cb_msg_list);
spin_lock_irq(&ipc->cons_lock);
list_splice_tail_init(&ipc->cb_msg_list, &cb_msg_list);
spin_unlock_irq(&ipc->cons_lock);
list_for_each_entry_safe(rx_msg, r, &cb_msg_list, link) {
rx_msg->callback(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
ivpu_ipc_rx_msg_del(vdev, rx_msg);
}
return IRQ_HANDLED;
}
int ivpu_ipc_init(struct ivpu_device *vdev)
@ -456,10 +500,10 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
goto err_free_rx;
}
spin_lock_init(&ipc->cons_lock);
INIT_LIST_HEAD(&ipc->cons_list);
spin_lock_init(&ipc->cons_list_lock);
INIT_LIST_HEAD(&ipc->cb_msg_list);
drmm_mutex_init(&vdev->drm, &ipc->lock);
ivpu_ipc_reset(vdev);
return 0;
@ -472,6 +516,13 @@ err_free_tx:
void ivpu_ipc_fini(struct ivpu_device *vdev)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
drm_WARN_ON(&vdev->drm, ipc->on);
drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cons_list));
drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list));
drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0);
ivpu_ipc_mem_fini(vdev);
}
@ -488,16 +539,27 @@ void ivpu_ipc_disable(struct ivpu_device *vdev)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_consumer *cons, *c;
unsigned long flags;
struct ivpu_ipc_rx_msg *rx_msg, *r;
drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list));
mutex_lock(&ipc->lock);
ipc->on = false;
mutex_unlock(&ipc->lock);
spin_lock_irqsave(&ipc->cons_list_lock, flags);
list_for_each_entry_safe(cons, c, &ipc->cons_list, link)
spin_lock_irq(&ipc->cons_lock);
list_for_each_entry_safe(cons, c, &ipc->cons_list, link) {
spin_lock(&cons->rx_lock);
if (!cons->rx_callback)
cons->aborted = true;
list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link)
ivpu_ipc_rx_msg_del(vdev, rx_msg);
spin_unlock(&cons->rx_lock);
wake_up(&cons->rx_msg_wq);
spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
}
spin_unlock_irq(&ipc->cons_lock);
drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0);
}
void ivpu_ipc_reset(struct ivpu_device *vdev)
@ -505,6 +567,7 @@ void ivpu_ipc_reset(struct ivpu_device *vdev)
struct ivpu_ipc_info *ipc = vdev->ipc;
mutex_lock(&ipc->lock);
drm_WARN_ON(&vdev->drm, ipc->on);
memset(ivpu_bo_vaddr(ipc->mem_tx), 0, ivpu_bo_size(ipc->mem_tx));
memset(ivpu_bo_vaddr(ipc->mem_rx), 0, ivpu_bo_size(ipc->mem_rx));

View File

@ -42,13 +42,26 @@ struct ivpu_ipc_hdr {
u8 status;
} __packed __aligned(IVPU_IPC_ALIGNMENT);
typedef void (*ivpu_ipc_rx_callback_t)(struct ivpu_device *vdev,
struct ivpu_ipc_hdr *ipc_hdr,
struct vpu_jsm_msg *jsm_msg);
struct ivpu_ipc_rx_msg {
struct list_head link;
struct ivpu_ipc_hdr *ipc_hdr;
struct vpu_jsm_msg *jsm_msg;
ivpu_ipc_rx_callback_t callback;
};
struct ivpu_ipc_consumer {
struct list_head link;
u32 channel;
u32 tx_vpu_addr;
u32 request_id;
bool aborted;
ivpu_ipc_rx_callback_t rx_callback;
spinlock_t rx_msg_lock; /* Protects rx_msg_list */
spinlock_t rx_lock; /* Protects rx_msg_list and aborted */
struct list_head rx_msg_list;
wait_queue_head_t rx_msg_wq;
};
@ -60,8 +73,9 @@ struct ivpu_ipc_info {
atomic_t rx_msg_count;
spinlock_t cons_list_lock; /* Protects cons_list */
spinlock_t cons_lock; /* Protects cons_list and cb_msg_list */
struct list_head cons_list;
struct list_head cb_msg_list;
atomic_t request_id;
struct mutex lock; /* Lock on status */
@ -75,19 +89,22 @@ void ivpu_ipc_enable(struct ivpu_device *vdev);
void ivpu_ipc_disable(struct ivpu_device *vdev);
void ivpu_ipc_reset(struct ivpu_device *vdev);
int ivpu_ipc_irq_handler(struct ivpu_device *vdev);
void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread);
irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev);
void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
u32 channel);
u32 channel, ivpu_ipc_rx_callback_t callback);
void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons);
int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *ipc_payload,
struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *jsm_msg,
unsigned long timeout_ms);
int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
u32 channel, unsigned long timeout_ms);
int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
enum vpu_ipc_msg_type expected_resp_type,
struct vpu_jsm_msg *resp, u32 channel,
unsigned long timeout_ms);
enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
u32 channel, unsigned long timeout_ms);
#endif /* __IVPU_IPC_H__ */

View File

@ -7,7 +7,6 @@
#include <linux/bitfield.h>
#include <linux/highmem.h>
#include <linux/kthread.h>
#include <linux/pci.h>
#include <linux/module.h>
#include <uapi/drm/ivpu_accel.h>
@ -24,10 +23,6 @@
#define JOB_ID_CONTEXT_MASK GENMASK(31, 8)
#define JOB_MAX_BUFFER_COUNT 65535
static unsigned int ivpu_tdr_timeout_ms;
module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644);
MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
{
ivpu_hw_reg_db_set(vdev, cmdq->db_id);
@ -196,6 +191,8 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
entry->batch_buf_addr = job->cmd_buf_vpu_addr;
entry->job_id = job->job_id;
entry->flags = 0;
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION))
entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK;
wmb(); /* Ensure that tail is updated after filling entry */
header->tail = next_entry;
wmb(); /* Flush WC buffer for jobq header */
@ -264,7 +261,7 @@ static void job_release(struct kref *ref)
for (i = 0; i < job->bo_count; i++)
if (job->bos[i])
drm_gem_object_put(&job->bos[i]->base);
drm_gem_object_put(&job->bos[i]->base.base);
dma_fence_put(job->done_fence);
ivpu_file_priv_put(&job->file_priv);
@ -340,23 +337,12 @@ static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n",
job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
ivpu_stop_job_timeout_detection(vdev);
job_put(job);
return 0;
}
static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg)
{
struct vpu_ipc_msg_payload_job_done *payload;
struct vpu_jsm_msg *job_ret_msg = msg;
int ret;
payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload;
ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
if (ret)
ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret);
}
void ivpu_jobs_abort_all(struct ivpu_device *vdev)
{
struct ivpu_job *job;
@ -398,11 +384,13 @@ static int ivpu_direct_job_submission(struct ivpu_job *job)
if (ret)
goto err_xa_erase;
ivpu_start_job_timeout_detection(vdev);
ivpu_dbg(vdev, JOB, "Job submitted: id %3u addr 0x%llx ctx %2d engine %d next %d\n",
job->job_id, job->cmd_buf_vpu_addr, file_priv->ctx.id,
job->engine_idx, cmdq->jobq->header.tail);
if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) {
if (ivpu_test_mode & IVPU_TEST_MODE_NULL_HW) {
ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
cmdq->jobq->header.head = cmdq->jobq->header.tail;
wmb(); /* Flush WC buffer for jobq header */
@ -448,7 +436,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
}
bo = job->bos[CMD_BUF_IDX];
if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) {
if (!dma_resv_test_signaled(bo->base.base.resv, DMA_RESV_USAGE_READ)) {
ivpu_warn(vdev, "Buffer is already in use\n");
return -EBUSY;
}
@ -468,7 +456,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
}
for (i = 0; i < buf_count; i++) {
ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1);
ret = dma_resv_reserve_fences(job->bos[i]->base.base.resv, 1);
if (ret) {
ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
goto unlock_reservations;
@ -477,7 +465,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
for (i = 0; i < buf_count; i++) {
usage = (i == CMD_BUF_IDX) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_BOOKKEEP;
dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, usage);
dma_resv_add_fence(job->bos[i]->base.base.resv, job->done_fence, usage);
}
unlock_reservations:
@ -562,61 +550,36 @@ free_handles:
return ret;
}
static int ivpu_job_done_thread(void *arg)
static void
ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
struct vpu_jsm_msg *jsm_msg)
{
struct ivpu_device *vdev = (struct ivpu_device *)arg;
struct ivpu_ipc_consumer cons;
struct vpu_jsm_msg jsm_msg;
bool jobs_submitted;
unsigned int timeout;
struct vpu_ipc_msg_payload_job_done *payload;
int ret;
ivpu_dbg(vdev, JOB, "Started %s\n", __func__);
ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET);
while (!kthread_should_stop()) {
timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa);
ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout);
if (!ret) {
ivpu_job_done_message(vdev, &jsm_msg);
} else if (ret == -ETIMEDOUT) {
if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) {
ivpu_err(vdev, "TDR detected, timeout %d ms", timeout);
ivpu_hw_diagnose_failure(vdev);
ivpu_pm_schedule_recovery(vdev);
}
}
if (!jsm_msg) {
ivpu_err(vdev, "IPC message has no JSM payload\n");
return;
}
ivpu_ipc_consumer_del(vdev, &cons);
ivpu_jobs_abort_all(vdev);
ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__);
return 0;
}
int ivpu_job_done_thread_init(struct ivpu_device *vdev)
{
struct task_struct *thread;
thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread");
if (IS_ERR(thread)) {
ivpu_err(vdev, "Failed to start job completion thread\n");
return -EIO;
if (jsm_msg->result != VPU_JSM_STATUS_SUCCESS) {
ivpu_err(vdev, "Invalid JSM message result: %d\n", jsm_msg->result);
return;
}
get_task_struct(thread);
wake_up_process(thread);
vdev->job_done_thread = thread;
return 0;
payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
ivpu_start_job_timeout_detection(vdev);
}
void ivpu_job_done_thread_fini(struct ivpu_device *vdev)
void ivpu_job_done_consumer_init(struct ivpu_device *vdev)
{
kthread_stop_put(vdev->job_done_thread);
ivpu_ipc_consumer_add(vdev, &vdev->job_done_consumer,
VPU_IPC_CHAN_JOB_RET, ivpu_job_done_callback);
}
void ivpu_job_done_consumer_fini(struct ivpu_device *vdev)
{
ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer);
}

View File

@ -59,8 +59,8 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv);
void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
int ivpu_job_done_thread_init(struct ivpu_device *vdev);
void ivpu_job_done_thread_fini(struct ivpu_device *vdev);
void ivpu_job_done_consumer_init(struct ivpu_device *vdev);
void ivpu_job_done_consumer_fini(struct ivpu_device *vdev);
void ivpu_jobs_abort_all(struct ivpu_device *vdev);

View File

@ -4,6 +4,7 @@
*/
#include "ivpu_drv.h"
#include "ivpu_hw.h"
#include "ivpu_ipc.h"
#include "ivpu_jsm_msg.h"
@ -36,6 +37,17 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
IVPU_CASE_TO_STR(VPU_JSM_MSG_DESTROY_CMD_QUEUE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES);
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_REGISTER_DB);
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_CMDQ);
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SUSPEND_CMDQ);
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP);
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG);
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP);
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION);
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_ENGINE_RESUME);
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP);
IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP);
IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT);
IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL);
IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE);
@ -65,6 +77,12 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP);
IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL_RSP);
IVPU_CASE_TO_STR(VPU_JSM_MSG_PWR_D0I3_ENTER);
IVPU_CASE_TO_STR(VPU_JSM_MSG_PWR_D0I3_ENTER_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_ENABLE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_ENABLE_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_DISABLE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_DISABLE_DONE);
}
#undef IVPU_CASE_TO_STR
@ -243,3 +261,23 @@ int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid)
return ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
}
int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev)
{
struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_PWR_D0I3_ENTER };
struct vpu_jsm_msg resp;
int ret;
if (IVPU_WA(disable_d0i3_msg))
return 0;
req.payload.pwr_d0i3_enter.send_response = 1;
ret = ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE,
&resp, VPU_IPC_CHAN_GEN_CMD,
vdev->timeout.d0i3_entry_msg);
if (ret)
return ret;
return ivpu_hw_wait_for_idle(vdev);
}

View File

@ -22,4 +22,5 @@ int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destinati
int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask,
u64 trace_hw_component_mask);
int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid);
int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev);
#endif

Some files were not shown because too many files have changed in this diff Show More