drm/amdgpu: support ce interrupt in ras module

correctable error can also trigger interrupt in some ras blocks

Signed-off-by: Tao Zhou <[email protected]>
Reviewed-by: Hawking Zhang <[email protected]>
Reviewed-by: Alex Deucher <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 2a48786..d4c0847 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1049,12 +1049,12 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
 			 * the error.
 			 */
 			if (ret == AMDGPU_RAS_UE) {
+				/* these counts could be left as 0 if
+				 * some blocks do not count error number
+				 */
 				obj->err_data.ue_count += err_data.ue_count;
+				obj->err_data.ce_count += err_data.ce_count;
 			}
-			/* Might need get ce count by register, but not all IP
-			 * saves ce count, some IP just use one bit or two bits
-			 * to indicate ce happened.
-			 */
 		}
 	}
 }
@@ -1551,6 +1551,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 	if (amdgpu_ras_fs_init(adev))
 		goto fs_out;
 
+	/* ras init for each ras block */
+	if (adev->umc.funcs->ras_init)
+		adev->umc.funcs->ras_init(adev);
+
 	DRM_INFO("RAS INFO: ras initialized successfully, "
 			"hardware ability[%x] ras_mask[%x]\n",
 			con->hw_supported, con->supported);