drm/amdkfd: change system memory overcommit limit

It is to improve system limit by:
1. replacing userptrlimit with a total memory limit that
conunts TTM memory usage and userptr usage.
2. counting acc size for all BOs.

Signed-off-by: Eric Huang <JinHuiEric.Huang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Eric Huang 2018-09-05 11:46:14 -04:00 committed by Alex Deucher
parent 0f04e53858
commit 5d240da93e
1 changed files with 58 additions and 41 deletions

View File

@ -46,9 +46,9 @@
/* Impose limit on how much memory KFD can use */ /* Impose limit on how much memory KFD can use */
static struct { static struct {
uint64_t max_system_mem_limit; uint64_t max_system_mem_limit;
uint64_t max_userptr_mem_limit; uint64_t max_ttm_mem_limit;
int64_t system_mem_used; int64_t system_mem_used;
int64_t userptr_mem_used; int64_t ttm_mem_used;
spinlock_t mem_limit_lock; spinlock_t mem_limit_lock;
} kfd_mem_limit; } kfd_mem_limit;
@ -90,8 +90,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
} }
/* Set memory usage limits. Current, limits are /* Set memory usage limits. Current, limits are
* System (kernel) memory - 3/8th System RAM * System (TTM + userptr) memory - 3/4th System RAM
* Userptr memory - 3/4th System RAM * TTM memory - 3/8th System RAM
*/ */
void amdgpu_amdkfd_gpuvm_init_mem_limits(void) void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
{ {
@ -103,48 +103,54 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
mem *= si.mem_unit; mem *= si.mem_unit;
spin_lock_init(&kfd_mem_limit.mem_limit_lock); spin_lock_init(&kfd_mem_limit.mem_limit_lock);
kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2); kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n", pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20), (kfd_mem_limit.max_system_mem_limit >> 20),
(kfd_mem_limit.max_userptr_mem_limit >> 20)); (kfd_mem_limit.max_ttm_mem_limit >> 20));
} }
static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain) uint64_t size, u32 domain, bool sg)
{ {
size_t acc_size; size_t acc_size, system_mem_needed, ttm_mem_needed;
int ret = 0; int ret = 0;
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
sizeof(struct amdgpu_bo)); sizeof(struct amdgpu_bo));
spin_lock(&kfd_mem_limit.mem_limit_lock); spin_lock(&kfd_mem_limit.mem_limit_lock);
if (domain == AMDGPU_GEM_DOMAIN_GTT) { if (domain == AMDGPU_GEM_DOMAIN_GTT) {
if (kfd_mem_limit.system_mem_used + (acc_size + size) > /* TTM GTT memory */
kfd_mem_limit.max_system_mem_limit) { system_mem_needed = acc_size + size;
ret = -ENOMEM; ttm_mem_needed = acc_size + size;
goto err_no_mem; } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
} /* Userptr */
kfd_mem_limit.system_mem_used += (acc_size + size); system_mem_needed = acc_size + size;
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) { ttm_mem_needed = acc_size;
if ((kfd_mem_limit.system_mem_used + acc_size > } else {
kfd_mem_limit.max_system_mem_limit) || /* VRAM and SG */
(kfd_mem_limit.userptr_mem_used + (size + acc_size) > system_mem_needed = acc_size;
kfd_mem_limit.max_userptr_mem_limit)) { ttm_mem_needed = acc_size;
ret = -ENOMEM;
goto err_no_mem;
}
kfd_mem_limit.system_mem_used += acc_size;
kfd_mem_limit.userptr_mem_used += size;
} }
err_no_mem:
if ((kfd_mem_limit.system_mem_used + system_mem_needed >
kfd_mem_limit.max_system_mem_limit) ||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
kfd_mem_limit.max_ttm_mem_limit))
ret = -ENOMEM;
else {
kfd_mem_limit.system_mem_used += system_mem_needed;
kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
}
spin_unlock(&kfd_mem_limit.mem_limit_lock); spin_unlock(&kfd_mem_limit.mem_limit_lock);
return ret; return ret;
} }
static void unreserve_system_mem_limit(struct amdgpu_device *adev, static void unreserve_system_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain) uint64_t size, u32 domain, bool sg)
{ {
size_t acc_size; size_t acc_size;
@ -154,14 +160,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
spin_lock(&kfd_mem_limit.mem_limit_lock); spin_lock(&kfd_mem_limit.mem_limit_lock);
if (domain == AMDGPU_GEM_DOMAIN_GTT) { if (domain == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -= (acc_size + size); kfd_mem_limit.system_mem_used -= (acc_size + size);
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) { kfd_mem_limit.ttm_mem_used -= (acc_size + size);
} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
kfd_mem_limit.system_mem_used -= (acc_size + size);
kfd_mem_limit.ttm_mem_used -= acc_size;
} else {
kfd_mem_limit.system_mem_used -= acc_size; kfd_mem_limit.system_mem_used -= acc_size;
kfd_mem_limit.userptr_mem_used -= size; kfd_mem_limit.ttm_mem_used -= acc_size;
} }
WARN_ONCE(kfd_mem_limit.system_mem_used < 0, WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced"); "kfd system memory accounting unbalanced");
WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
"kfd userptr memory accounting unbalanced"); "kfd TTM memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock); spin_unlock(&kfd_mem_limit.mem_limit_lock);
} }
@ -171,16 +181,22 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
spin_lock(&kfd_mem_limit.mem_limit_lock); spin_lock(&kfd_mem_limit.mem_limit_lock);
if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; kfd_mem_limit.system_mem_used -=
kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); (bo->tbo.acc_size + amdgpu_bo_size(bo));
kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -= kfd_mem_limit.system_mem_used -=
(bo->tbo.acc_size + amdgpu_bo_size(bo)); (bo->tbo.acc_size + amdgpu_bo_size(bo));
kfd_mem_limit.ttm_mem_used -=
(bo->tbo.acc_size + amdgpu_bo_size(bo));
} else {
kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
} }
WARN_ONCE(kfd_mem_limit.system_mem_used < 0, WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced"); "kfd system memory accounting unbalanced");
WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
"kfd userptr memory accounting unbalanced"); "kfd TTM memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock); spin_unlock(&kfd_mem_limit.mem_limit_lock);
} }
@ -1219,10 +1235,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
amdgpu_sync_create(&(*mem)->sync); amdgpu_sync_create(&(*mem)->sync);
ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size,
alloc_domain, false);
if (ret) { if (ret) {
pr_debug("Insufficient system memory\n"); pr_debug("Insufficient system memory\n");
goto err_reserve_system_mem; goto err_reserve_limit;
} }
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
@ -1270,10 +1287,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
allocate_init_user_pages_failed: allocate_init_user_pages_failed:
amdgpu_bo_unref(&bo); amdgpu_bo_unref(&bo);
/* Don't unreserve system mem limit twice */ /* Don't unreserve system mem limit twice */
goto err_reserve_system_mem; goto err_reserve_limit;
err_bo_create: err_bo_create:
unreserve_system_mem_limit(adev, size, alloc_domain); unreserve_system_mem_limit(adev, size, alloc_domain, false);
err_reserve_system_mem: err_reserve_limit:
mutex_destroy(&(*mem)->lock); mutex_destroy(&(*mem)->lock);
kfree(*mem); kfree(*mem);
return ret; return ret;