drm/amd/powerplay: expose current hotspot and memory temperatures V2
Two new hwmon interfaces(temp2_input and temp3_input) are added.
They are supported on SOC15 dGPUs only.
- V2: correct thermal sensor output
Signed-off-by: Evan Quan <[email protected]>
Reviewed-by: Alex Deucher <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 7093b4e..00ca8ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -1382,6 +1382,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
struct drm_device *ddev = adev->ddev;
+ int channel = to_sensor_dev_attr(attr)->index;
int r, temp, size = sizeof(temp);
/* Can't get temperature when the card is off */
@@ -1389,11 +1390,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
- /* get the temperature */
- r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
- (void *)&temp, &size);
- if (r)
- return r;
+ if (channel >= PP_TEMP_MAX)
+ return -EINVAL;
+
+ switch (channel) {
+ case PP_TEMP_JUNCTION:
+ /* get current junction temperature */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
+ (void *)&temp, &size);
+ if (r)
+ return r;
+ break;
+ case PP_TEMP_EDGE:
+ /* get current edge temperature */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
+ (void *)&temp, &size);
+ if (r)
+ return r;
+ break;
+ case PP_TEMP_MEM:
+ /* get current memory temperature */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
+ (void *)&temp, &size);
+ if (r)
+ return r;
+ break;
+ }
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
@@ -2041,7 +2063,8 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
* hwmon interfaces for GPU temperature:
*
- * - temp1_input: the on die GPU temperature in millidegrees Celsius
+ * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius
+ * - temp2_input and temp3_input are supported on SOC15 dGPUs only
*
* - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
* - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
@@ -2098,13 +2121,15 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
*/
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE);
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION);
static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1);
static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM);
static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
@@ -2134,8 +2159,10 @@ static struct attribute *hwmon_attributes[] = {
&sensor_dev_attr_temp1_input.dev_attr.attr,
&sensor_dev_attr_temp1_crit.dev_attr.attr,
&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
+ &sensor_dev_attr_temp2_input.dev_attr.attr,
&sensor_dev_attr_temp2_crit.dev_attr.attr,
&sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
+ &sensor_dev_attr_temp3_input.dev_attr.attr,
&sensor_dev_attr_temp3_crit.dev_attr.attr,
&sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
&sensor_dev_attr_temp1_emergency.dev_attr.attr,
@@ -2272,7 +2299,9 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
- attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr))
+ attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_input.dev_attr.attr))
return 0;
return effective_mode;
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 2b579ba..a8bf8e9 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -111,6 +111,9 @@ enum amd_pp_sensors {
AMDGPU_PP_SENSOR_GPU_LOAD,
AMDGPU_PP_SENSOR_GFX_MCLK,
AMDGPU_PP_SENSOR_GPU_TEMP,
+ AMDGPU_PP_SENSOR_EDGE_TEMP = AMDGPU_PP_SENSOR_GPU_TEMP,
+ AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
+ AMDGPU_PP_SENSOR_MEM_TEMP,
AMDGPU_PP_SENSOR_VCE_POWER,
AMDGPU_PP_SENSOR_UVD_POWER,
AMDGPU_PP_SENSOR_GPU_POWER,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 1422bc4..d5d0db4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -3785,6 +3785,18 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
*((uint32_t *)value) = vega10_thermal_get_temperature(hwmgr);
*size = 4;
break;
+ case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHotspot);
+ *((uint32_t *)value) = smum_get_argument(hwmgr) *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ *size = 4;
+ break;
+ case AMDGPU_PP_SENSOR_MEM_TEMP:
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHBM);
+ *((uint32_t *)value) = smum_get_argument(hwmgr) *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ *size = 4;
+ break;
case AMDGPU_PP_SENSOR_UVD_POWER:
*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
*size = 4;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index aeeeaa7..8d2865b7 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -1338,6 +1338,7 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
void *value, int *size)
{
struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
+ SmuMetrics_t metrics_table;
int ret = 0;
switch (idx) {
@@ -1360,6 +1361,24 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
*((uint32_t *)value) = vega12_thermal_get_temperature(hwmgr);
*size = 4;
break;
+ case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
+ ret = vega12_get_metrics_table(hwmgr, &metrics_table);
+ if (ret)
+ return ret;
+
+ *((uint32_t *)value) = metrics_table.TemperatureHotspot *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ *size = 4;
+ break;
+ case AMDGPU_PP_SENSOR_MEM_TEMP:
+ ret = vega12_get_metrics_table(hwmgr, &metrics_table);
+ if (ret)
+ return ret;
+
+ *((uint32_t *)value) = metrics_table.TemperatureHBM *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ *size = 4;
+ break;
case AMDGPU_PP_SENSOR_UVD_POWER:
*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
*size = 4;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 3a9629c..91e26f8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -2138,10 +2138,28 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx,
if (!ret)
*size = 4;
break;
- case AMDGPU_PP_SENSOR_GPU_TEMP:
+ case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
*((uint32_t *)value) = vega20_thermal_get_temperature(hwmgr);
*size = 4;
break;
+ case AMDGPU_PP_SENSOR_EDGE_TEMP:
+ ret = vega20_get_metrics_table(hwmgr, &metrics_table);
+ if (ret)
+ return ret;
+
+ *((uint32_t *)value) = metrics_table.TemperatureEdge *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ *size = 4;
+ break;
+ case AMDGPU_PP_SENSOR_MEM_TEMP:
+ ret = vega20_get_metrics_table(hwmgr, &metrics_table);
+ if (ret)
+ return ret;
+
+ *((uint32_t *)value) = metrics_table.TemperatureHBM *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ *size = 4;
+ break;
case AMDGPU_PP_SENSOR_UVD_POWER:
*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
*size = 4;