drm/amdgpu: Rework mca ras sw_init

To align with other IP blocks

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Stanley Yang <Stanley.Yang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Hawking Zhang 2023-03-15 08:59:04 +08:00 committed by Alex Deucher
parent 22e3d9343b
commit 7f544c5488
6 changed files with 103 additions and 54 deletions

View file

@ -466,6 +466,19 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev)
if (r)
return r;
/* mca.x ras block */
r = amdgpu_mca_mp0_ras_sw_init(adev);
if (r)
return r;
r = amdgpu_mca_mp1_ras_sw_init(adev);
if (r)
return r;
r = amdgpu_mca_mpio_ras_sw_init(adev);
if (r)
return r;
if (!adev->gmc.xgmi.connected_to_cpu) {
adev->gmc.xgmi.ras = &xgmi_ras;
amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block);

View file

@ -70,3 +70,75 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
amdgpu_mca_reset_error_count(adev, mc_status_addr);
}
int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev)
{
int err;
struct amdgpu_mca_ras_block *ras;
if (!adev->mca.mp0.ras)
return 0;
ras = adev->mca.mp0.ras;
err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
if (err) {
dev_err(adev->dev, "Failed to register mca.mp0 ras block!\n");
return err;
}
strcpy(ras->ras_block.ras_comm.name, "mca.mp0");
ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
adev->mca.mp0.ras_if = &ras->ras_block.ras_comm;
return 0;
}
int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev)
{
int err;
struct amdgpu_mca_ras_block *ras;
if (!adev->mca.mp1.ras)
return 0;
ras = adev->mca.mp1.ras;
err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
if (err) {
dev_err(adev->dev, "Failed to register mca.mp1 ras block!\n");
return err;
}
strcpy(ras->ras_block.ras_comm.name, "mca.mp1");
ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
adev->mca.mp1.ras_if = &ras->ras_block.ras_comm;
return 0;
}
int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev)
{
int err;
struct amdgpu_mca_ras_block *ras;
if (!adev->mca.mpio.ras)
return 0;
ras = adev->mca.mpio.ras;
err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
if (err) {
dev_err(adev->dev, "Failed to register mca.mpio ras block!\n");
return err;
}
strcpy(ras->ras_block.ras_comm.name, "mca.mpio");
ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
adev->mca.mpio.ras_if = &ras->ras_block.ras_comm;
return 0;
}

View file

@ -30,12 +30,7 @@ struct amdgpu_mca_ras {
struct amdgpu_mca_ras_block *ras;
};
struct amdgpu_mca_funcs {
void (*init)(struct amdgpu_device *adev);
};
struct amdgpu_mca {
const struct amdgpu_mca_funcs *funcs;
struct amdgpu_mca_ras mp0;
struct amdgpu_mca_ras mp1;
struct amdgpu_mca_ras mpio;
@ -55,5 +50,7 @@ void amdgpu_mca_reset_error_count(struct amdgpu_device *adev,
void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
uint64_t mc_status_addr,
void *ras_error_status);
int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev);
#endif

View file

@ -1363,13 +1363,18 @@ static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev)
adev->hdp.ras = &hdp_v4_0_ras;
}
static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_mca_ras_funcs(struct amdgpu_device *adev)
{
struct amdgpu_mca *mca = &adev->mca;
/* is UMC the right IP to check for MCA? Maybe DF? */
switch (adev->ip_versions[UMC_HWIP][0]) {
case IP_VERSION(6, 7, 0):
if (!adev->gmc.xgmi.connected_to_cpu)
adev->mca.funcs = &mca_v3_0_funcs;
if (!adev->gmc.xgmi.connected_to_cpu) {
mca->mp0.ras = &mca_v3_0_mp0_ras;
mca->mp1.ras = &mca_v3_0_mp1_ras;
mca->mpio.ras = &mca_v3_0_mpio_ras;
}
break;
default:
break;
@ -1398,7 +1403,7 @@ static int gmc_v9_0_early_init(void *handle)
gmc_v9_0_set_mmhub_ras_funcs(adev);
gmc_v9_0_set_gfxhub_funcs(adev);
gmc_v9_0_set_hdp_ras_funcs(adev);
gmc_v9_0_set_mca_funcs(adev);
gmc_v9_0_set_mca_ras_funcs(adev);
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
@ -1611,8 +1616,6 @@ static int gmc_v9_0_sw_init(void *handle)
adev->gfxhub.funcs->init(adev);
adev->mmhub.funcs->init(adev);
if (adev->mca.funcs)
adev->mca.funcs->init(adev);
spin_lock_init(&adev->gmc.invalidate_lock);

View file

@ -51,19 +51,13 @@ static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object *block_obj,
return -EINVAL;
}
const struct amdgpu_ras_block_hw_ops mca_v3_0_mp0_hw_ops = {
static const struct amdgpu_ras_block_hw_ops mca_v3_0_mp0_hw_ops = {
.query_ras_error_count = mca_v3_0_mp0_query_ras_error_count,
.query_ras_error_address = NULL,
};
struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = {
.ras_block = {
.ras_comm = {
.block = AMDGPU_RAS_BLOCK__MCA,
.sub_block_index = AMDGPU_RAS_MCA_BLOCK__MP0,
.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
.name = "mp0",
},
.hw_ops = &mca_v3_0_mp0_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
},
@ -77,19 +71,13 @@ static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev,
ras_error_status);
}
const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = {
static const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = {
.query_ras_error_count = mca_v3_0_mp1_query_ras_error_count,
.query_ras_error_address = NULL,
};
struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = {
.ras_block = {
.ras_comm = {
.block = AMDGPU_RAS_BLOCK__MCA,
.sub_block_index = AMDGPU_RAS_MCA_BLOCK__MP1,
.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
.name = "mp1",
},
.hw_ops = &mca_v3_0_mp1_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
},
@ -103,40 +91,14 @@ static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev,
ras_error_status);
}
const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = {
static const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = {
.query_ras_error_count = mca_v3_0_mpio_query_ras_error_count,
.query_ras_error_address = NULL,
};
struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = {
.ras_block = {
.ras_comm = {
.block = AMDGPU_RAS_BLOCK__MCA,
.sub_block_index = AMDGPU_RAS_MCA_BLOCK__MPIO,
.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
.name = "mpio",
},
.hw_ops = &mca_v3_0_mpio_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
},
};
static void mca_v3_0_init(struct amdgpu_device *adev)
{
struct amdgpu_mca *mca = &adev->mca;
mca->mp0.ras = &mca_v3_0_mp0_ras;
mca->mp1.ras = &mca_v3_0_mp1_ras;
mca->mpio.ras = &mca_v3_0_mpio_ras;
amdgpu_ras_register_ras_block(adev, &mca->mp0.ras->ras_block);
amdgpu_ras_register_ras_block(adev, &mca->mp1.ras->ras_block);
amdgpu_ras_register_ras_block(adev, &mca->mpio.ras->ras_block);
mca->mp0.ras_if = &mca->mp0.ras->ras_block.ras_comm;
mca->mp1.ras_if = &mca->mp1.ras->ras_block.ras_comm;
mca->mpio.ras_if = &mca->mpio.ras->ras_block.ras_comm;
}
const struct amdgpu_mca_funcs mca_v3_0_funcs = {
.init = mca_v3_0_init,
};

View file

@ -21,6 +21,8 @@
#ifndef __MCA_V3_0_H__
#define __MCA_V3_0_H__
extern const struct amdgpu_mca_funcs mca_v3_0_funcs;
extern struct amdgpu_mca_ras_block mca_v3_0_mp0_ras;
extern struct amdgpu_mca_ras_block mca_v3_0_mp1_ras;
extern struct amdgpu_mca_ras_block mca_v3_0_mpio_ras;
#endif