drm/amdgpu: Fix RAS function interface

The correctable and uncorrectable errors
are calculated at each invocation of this
function. Therefore, it is highly inefficient to
return just one of them based on a Boolean
input. If the caller wants both, twice the work
would be done. (And this work is O(n^3) on
Vega20.)

Fix this "interface" to simply return what it had
calculated--both values. Let the caller choose
what it wants to record, inspect, use.

Cc: Alexander Deucher <Alexander.Deucher@amd.com>
Cc: John Clements <john.clements@amd.com>
Cc: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
Reviewed-by: Alexander Deucher <Alexander.Deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Luben Tuikov 2021-05-18 21:07:17 -04:00 committed by Alex Deucher
parent 2871e10199
commit a46751fbcd
2 changed files with 18 additions and 10 deletions

View file

@ -1043,29 +1043,36 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
} }
/* get the total error counts on all IPs */ /* get the total error counts on all IPs */
unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, void amdgpu_ras_query_error_count(struct amdgpu_device *adev,
bool is_ce) unsigned long *ce_count,
unsigned long *ue_count)
{ {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj; struct ras_manager *obj;
struct ras_err_data data = {0, 0}; unsigned long ce, ue;
if (!adev->ras_enabled || !con) if (!adev->ras_enabled || !con)
return 0; return;
ce = 0;
ue = 0;
list_for_each_entry(obj, &con->head, node) { list_for_each_entry(obj, &con->head, node) {
struct ras_query_if info = { struct ras_query_if info = {
.head = obj->head, .head = obj->head,
}; };
if (amdgpu_ras_query_error_status(adev, &info)) if (amdgpu_ras_query_error_status(adev, &info))
return 0; return;
data.ce_count += info.ce_count; ce += info.ce_count;
data.ue_count += info.ue_count; ue += info.ue_count;
} }
return is_ce ? data.ce_count : data.ue_count; if (ce_count)
*ce_count = ce;
if (ue_count)
*ue_count = ue;
} }
/* query/inject/cure end */ /* query/inject/cure end */

View file

@ -485,8 +485,9 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
void amdgpu_ras_resume(struct amdgpu_device *adev); void amdgpu_ras_resume(struct amdgpu_device *adev);
void amdgpu_ras_suspend(struct amdgpu_device *adev); void amdgpu_ras_suspend(struct amdgpu_device *adev);
unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, void amdgpu_ras_query_error_count(struct amdgpu_device *adev,
bool is_ce); unsigned long *ce_count,
unsigned long *ue_count);
/* error handling functions */ /* error handling functions */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,