drm/radeon: Use direct mapping for fast fb access on RS690

This patch allows the CPU to map the stolen vram segment
directly rather than going through the PCI BAR.  This
significantly improves performance for certain workloads with
a properly patched ddx.

Use radeon.fastfb=1 to enable it (disabled by default).
Currently only supported on RS690, but support for RS780/880
and newer APUs may be added eventually.

Signed-off-by: Samuel Li <samuel.li@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Samuel Li 2013-04-08 17:25:47 -04:00 committed by Alex Deucher
parent 7c1c7c18fc
commit a0a53aa8c7
7 changed files with 43 additions and 2 deletions

View file

@ -95,6 +95,7 @@ extern int radeon_hw_i2c;
extern int radeon_pcie_gen2;
extern int radeon_msi;
extern int radeon_lockup_timeout;
extern int radeon_fastfb;
/*
* Copy from radeon_drv.h so we don't have to include both and have conflicting
@ -1616,6 +1617,7 @@ struct radeon_device {
bool suspend;
bool need_dma32;
bool accel_working;
bool fastfb_working; /* IGP feature*/
struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
const struct firmware *me_fw; /* all family ME firmware */
const struct firmware *pfp_fw; /* r6/700 PFP firmware */

View file

@ -71,9 +71,10 @@
* 2.28.0 - r600-eg: Add MEM_WRITE packet support
* 2.29.0 - R500 FP16 color clear registers
* 2.30.0 - fix for FMASK texturing
* 2.31.0 - Add fastfb support for rs690
*/
#define KMS_DRIVER_MAJOR 2
#define KMS_DRIVER_MINOR 30
#define KMS_DRIVER_MINOR 31
#define KMS_DRIVER_PATCHLEVEL 0
int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
int radeon_driver_unload_kms(struct drm_device *dev);
@ -160,6 +161,7 @@ int radeon_hw_i2c = 0;
int radeon_pcie_gen2 = -1;
int radeon_msi = -1;
int radeon_lockup_timeout = 10000;
int radeon_fastfb = 0;
MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
module_param_named(no_wb, radeon_no_wb, int, 0444);
@ -212,6 +214,9 @@ module_param_named(msi, radeon_msi, int, 0444);
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (defaul 10000 = 10 seconds, 0 = disable)");
module_param_named(lockup_timeout, radeon_lockup_timeout, int, 0444);
MODULE_PARM_DESC(fastfb, "Direct FB access for IGP chips (0 = disable, 1 = enable)");
module_param_named(fastfb, radeon_fastfb, int, 0444);
static struct pci_device_id pciidlist[] = {
radeon_PCI_IDS
};

View file

@ -376,6 +376,9 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
else
return -EINVAL;
break;
case RADEON_INFO_FASTFB_WORKING:
value = rdev->fastfb_working;
break;
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->request);
return -EINVAL;

View file

@ -321,8 +321,10 @@ void radeon_bo_force_delete(struct radeon_device *rdev)
int radeon_bo_init(struct radeon_device *rdev)
{
/* Add an MTRR for the VRAM */
rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
if (!rdev->fastfb_working) {
rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
MTRR_TYPE_WRCOMB, 1);
}
DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
rdev->mc.mc_vram_size >> 20,
(unsigned long long)rdev->mc.aper_size >> 20);

View file

@ -148,6 +148,8 @@ void rs690_pm_info(struct radeon_device *rdev)
static void rs690_mc_init(struct radeon_device *rdev)
{
u64 base;
uint32_t h_addr, l_addr;
unsigned long long k8_addr;
rs400_gart_adjust_size(rdev);
rdev->mc.vram_is_ddr = true;
@ -160,6 +162,27 @@ static void rs690_mc_init(struct radeon_device *rdev)
base = RREG32_MC(R_000100_MCCFG_FB_LOCATION);
base = G_000100_MC_FB_START(base) << 16;
rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
/* Use K8 direct mapping for fast fb access. */
rdev->fastfb_working = false;
h_addr = G_00005F_K8_ADDR_EXT(RREG32_MC(R_00005F_MC_MISC_UMA_CNTL));
l_addr = RREG32_MC(R_00001E_K8_FB_LOCATION);
k8_addr = ((unsigned long long)h_addr) << 32 | l_addr;
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
if (k8_addr + rdev->mc.visible_vram_size < 0x100000000ULL)
#endif
{
/* FastFB shall be used with UMA memory. Here it is simply disabled when sideport
* memory is present.
*/
if (rdev->mc.igp_sideport_enabled == false && radeon_fastfb == 1) {
DRM_INFO("Direct mapping: aper base at 0x%llx, replaced by direct mapping base 0x%llx.\n",
(unsigned long long)rdev->mc.aper_base, k8_addr);
rdev->mc.aper_base = (resource_size_t)k8_addr;
rdev->fastfb_working = true;
}
}
rs690_pm_info(rdev);
radeon_vram_location(rdev, &rdev->mc, base);
rdev->mc.gtt_base_align = rdev->mc.gtt_size - 1;

View file

@ -29,6 +29,9 @@
#define __RS690D_H__
/* Registers */
#define R_00001E_K8_FB_LOCATION 0x00001E
#define R_00005F_MC_MISC_UMA_CNTL 0x00005F
#define G_00005F_K8_ADDR_EXT(x) (((x) >> 0) & 0xFF)
#define R_000078_MC_INDEX 0x000078
#define S_000078_MC_IND_ADDR(x) (((x) & 0x1FF) << 0)
#define G_000078_MC_IND_ADDR(x) (((x) >> 0) & 0x1FF)

View file

@ -972,6 +972,9 @@ struct drm_radeon_cs {
#define RADEON_INFO_MAX_SE 0x12
/* max SH per SE */
#define RADEON_INFO_MAX_SH_PER_SE 0x13
/* fast fb access is enabled */
#define RADEON_INFO_FASTFB_WORKING 0x14
struct drm_radeon_info {
uint32_t request;