drm/amdkfd: change system memory overcommit limit
It is to improve system limit by: 1. replacing userptrlimit with a total memory limit that conunts TTM memory usage and userptr usage. 2. counting acc size for all BOs. Signed-off-by: Eric Huang <JinHuiEric.Huang@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
0f04e53858
commit
5d240da93e
|
@ -46,9 +46,9 @@
|
||||||
/* Impose limit on how much memory KFD can use */
|
/* Impose limit on how much memory KFD can use */
|
||||||
static struct {
|
static struct {
|
||||||
uint64_t max_system_mem_limit;
|
uint64_t max_system_mem_limit;
|
||||||
uint64_t max_userptr_mem_limit;
|
uint64_t max_ttm_mem_limit;
|
||||||
int64_t system_mem_used;
|
int64_t system_mem_used;
|
||||||
int64_t userptr_mem_used;
|
int64_t ttm_mem_used;
|
||||||
spinlock_t mem_limit_lock;
|
spinlock_t mem_limit_lock;
|
||||||
} kfd_mem_limit;
|
} kfd_mem_limit;
|
||||||
|
|
||||||
|
@ -90,8 +90,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set memory usage limits. Current, limits are
|
/* Set memory usage limits. Current, limits are
|
||||||
* System (kernel) memory - 3/8th System RAM
|
* System (TTM + userptr) memory - 3/4th System RAM
|
||||||
* Userptr memory - 3/4th System RAM
|
* TTM memory - 3/8th System RAM
|
||||||
*/
|
*/
|
||||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
|
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
|
||||||
{
|
{
|
||||||
|
@ -103,48 +103,54 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
|
||||||
mem *= si.mem_unit;
|
mem *= si.mem_unit;
|
||||||
|
|
||||||
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
|
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
|
||||||
kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
|
kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
|
||||||
kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
|
kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
|
||||||
pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
|
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
|
||||||
(kfd_mem_limit.max_system_mem_limit >> 20),
|
(kfd_mem_limit.max_system_mem_limit >> 20),
|
||||||
(kfd_mem_limit.max_userptr_mem_limit >> 20));
|
(kfd_mem_limit.max_ttm_mem_limit >> 20));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
|
static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
|
||||||
uint64_t size, u32 domain)
|
uint64_t size, u32 domain, bool sg)
|
||||||
{
|
{
|
||||||
size_t acc_size;
|
size_t acc_size, system_mem_needed, ttm_mem_needed;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
|
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
|
||||||
sizeof(struct amdgpu_bo));
|
sizeof(struct amdgpu_bo));
|
||||||
|
|
||||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||||
|
|
||||||
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
|
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
|
||||||
if (kfd_mem_limit.system_mem_used + (acc_size + size) >
|
/* TTM GTT memory */
|
||||||
kfd_mem_limit.max_system_mem_limit) {
|
system_mem_needed = acc_size + size;
|
||||||
ret = -ENOMEM;
|
ttm_mem_needed = acc_size + size;
|
||||||
goto err_no_mem;
|
} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
|
||||||
}
|
/* Userptr */
|
||||||
kfd_mem_limit.system_mem_used += (acc_size + size);
|
system_mem_needed = acc_size + size;
|
||||||
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
|
ttm_mem_needed = acc_size;
|
||||||
if ((kfd_mem_limit.system_mem_used + acc_size >
|
} else {
|
||||||
kfd_mem_limit.max_system_mem_limit) ||
|
/* VRAM and SG */
|
||||||
(kfd_mem_limit.userptr_mem_used + (size + acc_size) >
|
system_mem_needed = acc_size;
|
||||||
kfd_mem_limit.max_userptr_mem_limit)) {
|
ttm_mem_needed = acc_size;
|
||||||
ret = -ENOMEM;
|
|
||||||
goto err_no_mem;
|
|
||||||
}
|
|
||||||
kfd_mem_limit.system_mem_used += acc_size;
|
|
||||||
kfd_mem_limit.userptr_mem_used += size;
|
|
||||||
}
|
}
|
||||||
err_no_mem:
|
|
||||||
|
if ((kfd_mem_limit.system_mem_used + system_mem_needed >
|
||||||
|
kfd_mem_limit.max_system_mem_limit) ||
|
||||||
|
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
|
||||||
|
kfd_mem_limit.max_ttm_mem_limit))
|
||||||
|
ret = -ENOMEM;
|
||||||
|
else {
|
||||||
|
kfd_mem_limit.system_mem_used += system_mem_needed;
|
||||||
|
kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
|
||||||
|
}
|
||||||
|
|
||||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void unreserve_system_mem_limit(struct amdgpu_device *adev,
|
static void unreserve_system_mem_limit(struct amdgpu_device *adev,
|
||||||
uint64_t size, u32 domain)
|
uint64_t size, u32 domain, bool sg)
|
||||||
{
|
{
|
||||||
size_t acc_size;
|
size_t acc_size;
|
||||||
|
|
||||||
|
@ -154,14 +160,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
|
||||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||||
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
|
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
|
||||||
kfd_mem_limit.system_mem_used -= (acc_size + size);
|
kfd_mem_limit.system_mem_used -= (acc_size + size);
|
||||||
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
|
kfd_mem_limit.ttm_mem_used -= (acc_size + size);
|
||||||
|
} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
|
||||||
|
kfd_mem_limit.system_mem_used -= (acc_size + size);
|
||||||
|
kfd_mem_limit.ttm_mem_used -= acc_size;
|
||||||
|
} else {
|
||||||
kfd_mem_limit.system_mem_used -= acc_size;
|
kfd_mem_limit.system_mem_used -= acc_size;
|
||||||
kfd_mem_limit.userptr_mem_used -= size;
|
kfd_mem_limit.ttm_mem_used -= acc_size;
|
||||||
}
|
}
|
||||||
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
|
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
|
||||||
"kfd system memory accounting unbalanced");
|
"kfd system memory accounting unbalanced");
|
||||||
WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
|
WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
|
||||||
"kfd userptr memory accounting unbalanced");
|
"kfd TTM memory accounting unbalanced");
|
||||||
|
|
||||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||||
}
|
}
|
||||||
|
@ -171,16 +181,22 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
|
||||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||||
|
|
||||||
if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
|
if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
|
||||||
kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
|
kfd_mem_limit.system_mem_used -=
|
||||||
kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
|
(bo->tbo.acc_size + amdgpu_bo_size(bo));
|
||||||
|
kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
|
||||||
} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
|
} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
|
||||||
kfd_mem_limit.system_mem_used -=
|
kfd_mem_limit.system_mem_used -=
|
||||||
(bo->tbo.acc_size + amdgpu_bo_size(bo));
|
(bo->tbo.acc_size + amdgpu_bo_size(bo));
|
||||||
|
kfd_mem_limit.ttm_mem_used -=
|
||||||
|
(bo->tbo.acc_size + amdgpu_bo_size(bo));
|
||||||
|
} else {
|
||||||
|
kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
|
||||||
|
kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
|
||||||
}
|
}
|
||||||
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
|
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
|
||||||
"kfd system memory accounting unbalanced");
|
"kfd system memory accounting unbalanced");
|
||||||
WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
|
WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
|
||||||
"kfd userptr memory accounting unbalanced");
|
"kfd TTM memory accounting unbalanced");
|
||||||
|
|
||||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||||
}
|
}
|
||||||
|
@ -1219,10 +1235,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||||
|
|
||||||
amdgpu_sync_create(&(*mem)->sync);
|
amdgpu_sync_create(&(*mem)->sync);
|
||||||
|
|
||||||
ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
|
ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size,
|
||||||
|
alloc_domain, false);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
pr_debug("Insufficient system memory\n");
|
pr_debug("Insufficient system memory\n");
|
||||||
goto err_reserve_system_mem;
|
goto err_reserve_limit;
|
||||||
}
|
}
|
||||||
|
|
||||||
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
|
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
|
||||||
|
@ -1270,10 +1287,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||||
allocate_init_user_pages_failed:
|
allocate_init_user_pages_failed:
|
||||||
amdgpu_bo_unref(&bo);
|
amdgpu_bo_unref(&bo);
|
||||||
/* Don't unreserve system mem limit twice */
|
/* Don't unreserve system mem limit twice */
|
||||||
goto err_reserve_system_mem;
|
goto err_reserve_limit;
|
||||||
err_bo_create:
|
err_bo_create:
|
||||||
unreserve_system_mem_limit(adev, size, alloc_domain);
|
unreserve_system_mem_limit(adev, size, alloc_domain, false);
|
||||||
err_reserve_system_mem:
|
err_reserve_limit:
|
||||||
mutex_destroy(&(*mem)->lock);
|
mutex_destroy(&(*mem)->lock);
|
||||||
kfree(*mem);
|
kfree(*mem);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
Loading…
Reference in New Issue