drm/amdgpu: add ras error count after each query (v2) v1: increase ras ce/ue error count v2: log the number of correctable and uncorrectable errors Signed-off-by: Tao Zhou <[email protected]> Signed-off-by: Hawking Zhang <[email protected]> Reviewed-by: Dennis Li <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 3d39d62..a613428 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -601,9 +601,20 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, default: break; } + + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; + info->ue_count = obj->err_data.ue_count; info->ce_count = obj->err_data.ce_count; + if (err_data.ce_count) + dev_info(adev->dev, "%ld correctable errors detected in %s block\n", + obj->err_data.ce_count, ras_block_str(info->head.block)); + if (err_data.ue_count) + dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n", + obj->err_data.ue_count, ras_block_str(info->head.block)); + return 0; }