drm/amdgpu: added support to get mGPU DRAM base

resolves issue with RAS error injection in mGPU configuration

Reviewed-by: Hawking Zhang <[email protected]>
Signed-off-by: John Clements <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
index 61a26c1..057f6ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
@@ -52,6 +52,9 @@ struct amdgpu_df_funcs {
 	uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
 	void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
 			 uint32_t ficadl_val, uint32_t ficadh_val);
+	uint64_t (*get_dram_base_addr)(struct amdgpu_device *adev,
+				       uint32_t df_inst);
+	uint32_t (*get_df_inst_id)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_df {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 766be7f1..cef94e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -742,6 +742,20 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
 	return 0;
 }
 
+uint64_t get_xgmi_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr)
+{
+	uint32_t df_inst_id;
+
+	if ((!adev->df.funcs)                 ||
+	    (!adev->df.funcs->get_df_inst_id) ||
+	    (!adev->df.funcs->get_dram_base_addr))
+		return addr;
+
+	df_inst_id = adev->df.funcs->get_df_inst_id(adev);
+
+	return addr + adev->df.funcs->get_dram_base_addr(adev, df_inst_id);
+}
+
 /* wrapper of psp_ras_trigger_error */
 int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 		struct ras_inject_if *info)
@@ -759,6 +773,12 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 	if (!obj)
 		return -EINVAL;
 
+	/* Calculate XGMI relative offset */
+	if (adev->gmc.xgmi.num_physical_nodes > 1) {
+		block_info.address = get_xgmi_relative_phy_addr(adev,
+								block_info.address);
+	}
+
 	switch (info->head.block) {
 	case AMDGPU_RAS_BLOCK__GFX:
 		if (adev->gfx.funcs->ras_error_inject)
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index f513265..5a1bd8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -27,6 +27,9 @@
 #include "df/df_3_6_offset.h"
 #include "df/df_3_6_sh_mask.h"
 
+#define DF_3_6_SMN_REG_INST_DIST        0x8
+#define DF_3_6_INST_CNT                 8
+
 static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
 				       16, 32, 0, 0, 0, 2, 4, 8};
 
@@ -683,6 +686,58 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
 	}
 }
 
+static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev,
+					   uint32_t df_inst)
+{
+	uint32_t base_addr_reg_val 	= 0;
+	uint64_t base_addr	 	= 0;
+
+	base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 +
+					df_inst * DF_3_6_SMN_REG_INST_DIST);
+
+	if (REG_GET_FIELD(base_addr_reg_val,
+			  DF_CS_UMC_AON0_DramBaseAddress0,
+			  AddrRngVal) == 0) {
+		DRM_WARN("address range not valid");
+		return 0;
+	}
+
+	base_addr = REG_GET_FIELD(base_addr_reg_val,
+				  DF_CS_UMC_AON0_DramBaseAddress0,
+				  DramBaseAddr);
+
+	return base_addr << 28;
+}
+
+static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev)
+{
+	uint32_t xgmi_node_id	= 0;
+	uint32_t df_inst_id 	= 0;
+
+	/* Walk through DF dst nodes to find current XGMI node */
+	for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) {
+
+		xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 +
+					   df_inst_id * DF_3_6_SMN_REG_INST_DIST);
+		xgmi_node_id = REG_GET_FIELD(xgmi_node_id,
+					     DF_CS_UMC_AON0_DramLimitAddress0,
+					     DstFabricID);
+
+		/* TODO: establish reason dest fabric id is offset by 7 */
+		xgmi_node_id = xgmi_node_id >> 7;
+
+		if (adev->gmc.xgmi.physical_node_id == xgmi_node_id)
+			break;
+	}
+
+	if (df_inst_id == DF_3_6_INST_CNT) {
+		DRM_WARN("cant match df dst id with gpu node");
+		return 0;
+	}
+
+	return df_inst_id;
+}
+
 const struct amdgpu_df_funcs df_v3_6_funcs = {
 	.sw_init = df_v3_6_sw_init,
 	.sw_fini = df_v3_6_sw_fini,
@@ -696,5 +751,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
 	.pmc_stop = df_v3_6_pmc_stop,
 	.pmc_get_count = df_v3_6_pmc_get_count,
 	.get_fica = df_v3_6_get_fica,
-	.set_fica = df_v3_6_set_fica
+	.set_fica = df_v3_6_set_fica,
+	.get_dram_base_addr = df_v3_6_get_dram_base_addr,
+	.get_df_inst_id = df_v3_6_get_df_inst_id
 };