1/* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24#define SWSMU_CODE_LAYER_L2 25 26#include <linux/firmware.h> 27#include "amdgpu.h" 28#include "amdgpu_smu.h" 29#include "atomfirmware.h" 30#include "amdgpu_atomfirmware.h" 31#include "amdgpu_atombios.h" 32#include "smu_v13_0_6_pmfw.h" 33#include "smu13_driver_if_v13_0_6.h" 34#include "smu_v13_0_6_ppsmc.h" 35#include "soc15_common.h" 36#include "atom.h" 37#include "power_state.h" 38#include "smu_v13_0.h" 39#include "smu_v13_0_6_ppt.h" 40#include "nbio/nbio_7_4_offset.h" 41#include "nbio/nbio_7_4_sh_mask.h" 42#include "thm/thm_11_0_2_offset.h" 43#include "thm/thm_11_0_2_sh_mask.h" 44#include "amdgpu_xgmi.h" 45#include <linux/pci.h> 46#include "amdgpu_ras.h" 47#include "amdgpu_mca.h" 48#include "amdgpu_aca.h" 49#include "smu_cmn.h" 50#include "mp/mp_13_0_6_offset.h" 51#include "mp/mp_13_0_6_sh_mask.h" 52#include "umc_v12_0.h" 53 54#undef MP1_Public 55#undef smnMP1_FIRMWARE_FLAGS 56 57/* TODO: Check final register offsets */ 58#define MP1_Public 0x03b00000 59#define smnMP1_FIRMWARE_FLAGS 0x3010028 60/* 61 * DO NOT use these for err/warn/info/debug messages. 62 * Use dev_err, dev_warn, dev_info and dev_dbg instead. 63 * They are more MGPU friendly. 64 */ 65#undef pr_err 66#undef pr_warn 67#undef pr_info 68#undef pr_debug 69 70MODULE_FIRMWARE("amdgpu/smu_13_0_6.bin"); 71 72#define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) 73 74#define SMU_13_0_6_FEA_MAP(smu_feature, smu_13_0_6_feature) \ 75 [smu_feature] = { 1, (smu_13_0_6_feature) } 76 77#define FEATURE_MASK(feature) (1ULL << feature) 78#define SMC_DPM_FEATURE \ 79 (FEATURE_MASK(FEATURE_DATA_CALCULATION) | \ 80 FEATURE_MASK(FEATURE_DPM_GFXCLK) | FEATURE_MASK(FEATURE_DPM_UCLK) | \ 81 FEATURE_MASK(FEATURE_DPM_SOCCLK) | FEATURE_MASK(FEATURE_DPM_FCLK) | \ 82 FEATURE_MASK(FEATURE_DPM_LCLK) | FEATURE_MASK(FEATURE_DPM_XGMI) | \ 83 FEATURE_MASK(FEATURE_DPM_VCN)) 84 85/* possible frequency drift (1Mhz) */ 86#define EPSILON 1 87 88#define smnPCIE_ESM_CTRL 0x93D0 89#define smnPCIE_LC_LINK_WIDTH_CNTL 0x1a340288 90#define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK 0x00000070L 91#define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT 0x4 92#define MAX_LINK_WIDTH 6 93 94#define smnPCIE_LC_SPEED_CNTL 0x1a340290 95#define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xE0 96#define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5 97#define LINK_SPEED_MAX 4 98 99#define SMU_13_0_6_DSCLK_THRESHOLD 140 100 101#define MCA_BANK_IPID(_ip, _hwid, _type) \ 102 [AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, } 103 104struct mca_bank_ipid { 105 enum amdgpu_mca_ip ip; 106 uint16_t hwid; 107 uint16_t mcatype; 108}; 109 110struct mca_ras_info { 111 enum amdgpu_ras_block blkid; 112 enum amdgpu_mca_ip ip; 113 int *err_code_array; 114 int err_code_count; 115 int (*get_err_count)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 116 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count); 117 bool (*bank_is_valid)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 118 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry); 119}; 120 121#define P2S_TABLE_ID_A 0x50325341 122#define P2S_TABLE_ID_X 0x50325358 123 124// clang-format off 125static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { 126 MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), 127 MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), 128 MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1), 129 MSG_MAP(EnableAllSmuFeatures, PPSMC_MSG_EnableAllSmuFeatures, 0), 130 MSG_MAP(DisableAllSmuFeatures, PPSMC_MSG_DisableAllSmuFeatures, 0), 131 MSG_MAP(RequestI2cTransaction, PPSMC_MSG_RequestI2cTransaction, 0), 132 MSG_MAP(GetMetricsTable, PPSMC_MSG_GetMetricsTable, 1), 133 MSG_MAP(GetMetricsVersion, PPSMC_MSG_GetMetricsVersion, 1), 134 MSG_MAP(GetEnabledSmuFeaturesHigh, PPSMC_MSG_GetEnabledSmuFeaturesHigh, 1), 135 MSG_MAP(GetEnabledSmuFeaturesLow, PPSMC_MSG_GetEnabledSmuFeaturesLow, 1), 136 MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1), 137 MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverDramAddrLow, 1), 138 MSG_MAP(SetToolsDramAddrHigh, PPSMC_MSG_SetToolsDramAddrHigh, 0), 139 MSG_MAP(SetToolsDramAddrLow, PPSMC_MSG_SetToolsDramAddrLow, 0), 140 MSG_MAP(SetSoftMinByFreq, PPSMC_MSG_SetSoftMinByFreq, 0), 141 MSG_MAP(SetSoftMaxByFreq, PPSMC_MSG_SetSoftMaxByFreq, 0), 142 MSG_MAP(GetMinDpmFreq, PPSMC_MSG_GetMinDpmFreq, 1), 143 MSG_MAP(GetMaxDpmFreq, PPSMC_MSG_GetMaxDpmFreq, 1), 144 MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1), 145 MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 0), 146 MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1), 147 MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, 0), 148 MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0), 149 MSG_MAP(DramLogSetDramAddrLow, PPSMC_MSG_DramLogSetDramAddrLow, 0), 150 MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0), 151 MSG_MAP(GetDebugData, PPSMC_MSG_GetDebugData, 0), 152 MSG_MAP(SetNumBadHbmPagesRetired, PPSMC_MSG_SetNumBadHbmPagesRetired, 0), 153 MSG_MAP(DFCstateControl, PPSMC_MSG_DFCstateControl, 0), 154 MSG_MAP(GetGmiPwrDnHyst, PPSMC_MSG_GetGmiPwrDnHyst, 0), 155 MSG_MAP(SetGmiPwrDnHyst, PPSMC_MSG_SetGmiPwrDnHyst, 0), 156 MSG_MAP(GmiPwrDnControl, PPSMC_MSG_GmiPwrDnControl, 0), 157 MSG_MAP(EnterGfxoff, PPSMC_MSG_EnterGfxoff, 0), 158 MSG_MAP(ExitGfxoff, PPSMC_MSG_ExitGfxoff, 0), 159 MSG_MAP(EnableDeterminism, PPSMC_MSG_EnableDeterminism, 0), 160 MSG_MAP(DisableDeterminism, PPSMC_MSG_DisableDeterminism, 0), 161 MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0), 162 MSG_MAP(GetMinGfxclkFrequency, PPSMC_MSG_GetMinGfxDpmFreq, 1), 163 MSG_MAP(GetMaxGfxclkFrequency, PPSMC_MSG_GetMaxGfxDpmFreq, 1), 164 MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxClk, 1), 165 MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk, 1), 166 MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareForDriverUnload, 0), 167 MSG_MAP(GetCTFLimit, PPSMC_MSG_GetCTFLimit, 0), 168 MSG_MAP(GetThermalLimit, PPSMC_MSG_ReadThrottlerLimit, 0), 169 MSG_MAP(ClearMcaOnRead, PPSMC_MSG_ClearMcaOnRead, 0), 170 MSG_MAP(QueryValidMcaCount, PPSMC_MSG_QueryValidMcaCount, 0), 171 MSG_MAP(QueryValidMcaCeCount, PPSMC_MSG_QueryValidMcaCeCount, 0), 172 MSG_MAP(McaBankDumpDW, PPSMC_MSG_McaBankDumpDW, 0), 173 MSG_MAP(McaBankCeDumpDW, PPSMC_MSG_McaBankCeDumpDW, 0), 174 MSG_MAP(SelectPLPDMode, PPSMC_MSG_SelectPLPDMode, 0), 175 MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0), 176}; 177 178// clang-format on 179static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = { 180 CLK_MAP(SOCCLK, PPCLK_SOCCLK), 181 CLK_MAP(FCLK, PPCLK_FCLK), 182 CLK_MAP(UCLK, PPCLK_UCLK), 183 CLK_MAP(MCLK, PPCLK_UCLK), 184 CLK_MAP(DCLK, PPCLK_DCLK), 185 CLK_MAP(VCLK, PPCLK_VCLK), 186 CLK_MAP(LCLK, PPCLK_LCLK), 187}; 188 189static const struct cmn2asic_mapping smu_v13_0_6_feature_mask_map[SMU_FEATURE_COUNT] = { 190 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DATA_CALCULATIONS_BIT, FEATURE_DATA_CALCULATION), 191 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_GFXCLK_BIT, FEATURE_DPM_GFXCLK), 192 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_UCLK_BIT, FEATURE_DPM_UCLK), 193 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_SOCCLK_BIT, FEATURE_DPM_SOCCLK), 194 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_FCLK_BIT, FEATURE_DPM_FCLK), 195 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_LCLK_BIT, FEATURE_DPM_LCLK), 196 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_VCLK_BIT, FEATURE_DPM_VCN), 197 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_DCLK_BIT, FEATURE_DPM_VCN), 198 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_XGMI_BIT, FEATURE_DPM_XGMI), 199 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_GFXCLK_BIT, FEATURE_DS_GFXCLK), 200 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_SOCCLK_BIT, FEATURE_DS_SOCCLK), 201 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_LCLK_BIT, FEATURE_DS_LCLK), 202 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_FCLK_BIT, FEATURE_DS_FCLK), 203 SMU_13_0_6_FEA_MAP(SMU_FEATURE_VCN_DPM_BIT, FEATURE_DPM_VCN), 204 SMU_13_0_6_FEA_MAP(SMU_FEATURE_PPT_BIT, FEATURE_PPT), 205 SMU_13_0_6_FEA_MAP(SMU_FEATURE_TDC_BIT, FEATURE_TDC), 206 SMU_13_0_6_FEA_MAP(SMU_FEATURE_APCC_DFLL_BIT, FEATURE_APCC_DFLL), 207 SMU_13_0_6_FEA_MAP(SMU_FEATURE_MP1_CG_BIT, FEATURE_SMU_CG), 208 SMU_13_0_6_FEA_MAP(SMU_FEATURE_GFXOFF_BIT, FEATURE_GFXOFF), 209 SMU_13_0_6_FEA_MAP(SMU_FEATURE_FW_CTF_BIT, FEATURE_FW_CTF), 210 SMU_13_0_6_FEA_MAP(SMU_FEATURE_THERMAL_BIT, FEATURE_THERMAL), 211 SMU_13_0_6_FEA_MAP(SMU_FEATURE_XGMI_PER_LINK_PWR_DWN_BIT, FEATURE_XGMI_PER_LINK_PWR_DOWN), 212 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DF_CSTATE_BIT, FEATURE_DF_CSTATE), 213}; 214 215#define TABLE_PMSTATUSLOG 0 216#define TABLE_SMU_METRICS 1 217#define TABLE_I2C_COMMANDS 2 218#define TABLE_COUNT 3 219 220static const struct cmn2asic_mapping smu_v13_0_6_table_map[SMU_TABLE_COUNT] = { 221 TAB_MAP(PMSTATUSLOG), 222 TAB_MAP(SMU_METRICS), 223 TAB_MAP(I2C_COMMANDS), 224}; 225 226static const uint8_t smu_v13_0_6_throttler_map[] = { 227 [THROTTLER_PPT_BIT] = (SMU_THROTTLER_PPT0_BIT), 228 [THROTTLER_THERMAL_SOCKET_BIT] = (SMU_THROTTLER_TEMP_GPU_BIT), 229 [THROTTLER_THERMAL_HBM_BIT] = (SMU_THROTTLER_TEMP_MEM_BIT), 230 [THROTTLER_THERMAL_VR_BIT] = (SMU_THROTTLER_TEMP_VR_GFX_BIT), 231 [THROTTLER_PROCHOT_BIT] = (SMU_THROTTLER_PROCHOT_GFX_BIT), 232}; 233 234struct PPTable_t { 235 uint32_t MaxSocketPowerLimit; 236 uint32_t MaxGfxclkFrequency; 237 uint32_t MinGfxclkFrequency; 238 uint32_t FclkFrequencyTable[4]; 239 uint32_t UclkFrequencyTable[4]; 240 uint32_t SocclkFrequencyTable[4]; 241 uint32_t VclkFrequencyTable[4]; 242 uint32_t DclkFrequencyTable[4]; 243 uint32_t LclkFrequencyTable[4]; 244 uint32_t MaxLclkDpmRange; 245 uint32_t MinLclkDpmRange; 246 uint64_t PublicSerialNumber_AID; 247 bool Init; 248}; 249 250#define SMUQ10_TO_UINT(x) ((x) >> 10) 251#define SMUQ10_FRAC(x) ((x) & 0x3ff) 252#define SMUQ10_ROUND(x) ((SMUQ10_TO_UINT(x)) + ((SMUQ10_FRAC(x)) >= 0x200)) 253#define GET_METRIC_FIELD(field) ((adev->flags & AMD_IS_APU) ?\ 254 (metrics_a->field) : (metrics_x->field)) 255 256struct smu_v13_0_6_dpm_map { 257 enum smu_clk_type clk_type; 258 uint32_t feature_num; 259 struct smu_13_0_dpm_table *dpm_table; 260 uint32_t *freq_table; 261}; 262 263static int smu_v13_0_6_init_microcode(struct smu_context *smu) 264{ 265 const struct smc_firmware_header_v2_1 *v2_1; 266 const struct common_firmware_header *hdr; 267 struct amdgpu_firmware_info *ucode = NULL; 268 struct smc_soft_pptable_entry *entries; 269 struct amdgpu_device *adev = smu->adev; 270 uint32_t p2s_table_id = P2S_TABLE_ID_A; 271 int ret = 0, i, p2stable_count; 272 char ucode_prefix[15]; 273 char fw_name[30]; 274 275 /* No need to load P2S tables in IOV mode */ 276 if (amdgpu_sriov_vf(adev)) 277 return 0; 278 279 if (!(adev->flags & AMD_IS_APU)) 280 p2s_table_id = P2S_TABLE_ID_X; 281 282 amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 283 sizeof(ucode_prefix)); 284 285 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); 286 287 ret = amdgpu_ucode_request(adev, &adev->pm.fw, fw_name); 288 if (ret) 289 goto out; 290 291 hdr = (const struct common_firmware_header *)adev->pm.fw->data; 292 amdgpu_ucode_print_smc_hdr(hdr); 293 294 /* SMU v13.0.6 binary file doesn't carry pptables, instead the entries 295 * are used to carry p2s tables. 296 */ 297 v2_1 = (const struct smc_firmware_header_v2_1 *)adev->pm.fw->data; 298 entries = (struct smc_soft_pptable_entry 299 *)((uint8_t *)v2_1 + 300 le32_to_cpu(v2_1->pptable_entry_offset)); 301 p2stable_count = le32_to_cpu(v2_1->pptable_count); 302 for (i = 0; i < p2stable_count; i++) { 303 if (le32_to_cpu(entries[i].id) == p2s_table_id) { 304 smu->pptable_firmware.data = 305 ((uint8_t *)v2_1 + 306 le32_to_cpu(entries[i].ppt_offset_bytes)); 307 smu->pptable_firmware.size = 308 le32_to_cpu(entries[i].ppt_size_bytes); 309 break; 310 } 311 } 312 313 if (smu->pptable_firmware.data && smu->pptable_firmware.size) { 314 ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_P2S_TABLE]; 315 ucode->ucode_id = AMDGPU_UCODE_ID_P2S_TABLE; 316 ucode->fw = &smu->pptable_firmware; 317 adev->firmware.fw_size += ALIGN(ucode->fw->size, PAGE_SIZE); 318 } 319 320 return 0; 321out: 322 amdgpu_ucode_release(&adev->pm.fw); 323 324 return ret; 325} 326 327static int smu_v13_0_6_tables_init(struct smu_context *smu) 328{ 329 struct smu_table_context *smu_table = &smu->smu_table; 330 struct smu_table *tables = smu_table->tables; 331 struct amdgpu_device *adev = smu->adev; 332 333 if (!(adev->flags & AMD_IS_APU)) 334 SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE, 335 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); 336 337 SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, 338 max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), 339 PAGE_SIZE, 340 AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); 341 342 SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t), 343 PAGE_SIZE, 344 AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); 345 346 smu_table->metrics_table = kzalloc(max(sizeof(MetricsTableX_t), 347 sizeof(MetricsTableA_t)), GFP_KERNEL); 348 if (!smu_table->metrics_table) 349 return -ENOMEM; 350 smu_table->metrics_time = 0; 351 352 smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_5); 353 smu_table->gpu_metrics_table = 354 kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); 355 if (!smu_table->gpu_metrics_table) { 356 kfree(smu_table->metrics_table); 357 return -ENOMEM; 358 } 359 360 smu_table->driver_pptable = 361 kzalloc(sizeof(struct PPTable_t), GFP_KERNEL); 362 if (!smu_table->driver_pptable) { 363 kfree(smu_table->metrics_table); 364 kfree(smu_table->gpu_metrics_table); 365 return -ENOMEM; 366 } 367 368 return 0; 369} 370 371static int smu_v13_0_6_allocate_dpm_context(struct smu_context *smu) 372{ 373 struct smu_dpm_context *smu_dpm = &smu->smu_dpm; 374 375 smu_dpm->dpm_context = 376 kzalloc(sizeof(struct smu_13_0_dpm_context), GFP_KERNEL); 377 if (!smu_dpm->dpm_context) 378 return -ENOMEM; 379 smu_dpm->dpm_context_size = sizeof(struct smu_13_0_dpm_context); 380 381 return 0; 382} 383 384static int smu_v13_0_6_init_smc_tables(struct smu_context *smu) 385{ 386 int ret = 0; 387 388 ret = smu_v13_0_6_tables_init(smu); 389 if (ret) 390 return ret; 391 392 ret = smu_v13_0_6_allocate_dpm_context(smu); 393 394 return ret; 395} 396 397static int smu_v13_0_6_get_allowed_feature_mask(struct smu_context *smu, 398 uint32_t *feature_mask, 399 uint32_t num) 400{ 401 if (num > 2) 402 return -EINVAL; 403 404 /* pptable will handle the features to enable */ 405 memset(feature_mask, 0xFF, sizeof(uint32_t) * num); 406 407 return 0; 408} 409 410static int smu_v13_0_6_get_metrics_table(struct smu_context *smu, 411 void *metrics_table, bool bypass_cache) 412{ 413 struct smu_table_context *smu_table = &smu->smu_table; 414 uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size; 415 struct smu_table *table = &smu_table->driver_table; 416 int ret; 417 418 if (bypass_cache || !smu_table->metrics_time || 419 time_after(jiffies, 420 smu_table->metrics_time + msecs_to_jiffies(1))) { 421 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetMetricsTable, NULL); 422 if (ret) { 423 dev_info(smu->adev->dev, 424 "Failed to export SMU metrics table!\n"); 425 return ret; 426 } 427 428 amdgpu_asic_invalidate_hdp(smu->adev, NULL); 429 memcpy(smu_table->metrics_table, table->cpu_addr, table_size); 430 431 smu_table->metrics_time = jiffies; 432 } 433 434 if (metrics_table) 435 memcpy(metrics_table, smu_table->metrics_table, table_size); 436 437 return 0; 438} 439 440static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu, 441 void *metrics, size_t max_size) 442{ 443 struct smu_table_context *smu_tbl_ctxt = &smu->smu_table; 444 uint32_t table_version = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].version; 445 uint32_t table_size = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].size; 446 struct amdgpu_pm_metrics *pm_metrics = metrics; 447 uint32_t pmfw_version; 448 int ret; 449 450 if (!pm_metrics || !max_size) 451 return -EINVAL; 452 453 if (max_size < (table_size + sizeof(pm_metrics->common_header))) 454 return -EOVERFLOW; 455 456 /* Don't use cached metrics data */ 457 ret = smu_v13_0_6_get_metrics_table(smu, pm_metrics->data, true); 458 if (ret) 459 return ret; 460 461 smu_cmn_get_smc_version(smu, NULL, &pmfw_version); 462 463 memset(&pm_metrics->common_header, 0, 464 sizeof(pm_metrics->common_header)); 465 pm_metrics->common_header.mp1_ip_discovery_version = 466 IP_VERSION(13, 0, 6); 467 pm_metrics->common_header.pmfw_version = pmfw_version; 468 pm_metrics->common_header.pmmetrics_version = table_version; 469 pm_metrics->common_header.structure_size = 470 sizeof(pm_metrics->common_header) + table_size; 471 472 return pm_metrics->common_header.structure_size; 473} 474 475static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) 476{ 477 struct smu_table_context *smu_table = &smu->smu_table; 478 MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; 479 MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; 480 struct PPTable_t *pptable = 481 (struct PPTable_t *)smu_table->driver_pptable; 482 struct amdgpu_device *adev = smu->adev; 483 int ret, i, retry = 100; 484 uint32_t table_version; 485 486 /* Store one-time values in driver PPTable */ 487 if (!pptable->Init) { 488 while (--retry) { 489 ret = smu_v13_0_6_get_metrics_table(smu, NULL, true); 490 if (ret) 491 return ret; 492 493 /* Ensure that metrics have been updated */ 494 if (GET_METRIC_FIELD(AccumulationCounter)) 495 break; 496 497 usleep_range(1000, 1100); 498 } 499 500 if (!retry) 501 return -ETIME; 502 503 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetMetricsVersion, 504 &table_version); 505 if (ret) 506 return ret; 507 smu_table->tables[SMU_TABLE_SMU_METRICS].version = 508 table_version; 509 510 pptable->MaxSocketPowerLimit = 511 SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit)); 512 pptable->MaxGfxclkFrequency = 513 SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency)); 514 pptable->MinGfxclkFrequency = 515 SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency)); 516 517 for (i = 0; i < 4; ++i) { 518 pptable->FclkFrequencyTable[i] = 519 SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable)[i]); 520 pptable->UclkFrequencyTable[i] = 521 SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable)[i]); 522 pptable->SocclkFrequencyTable[i] = SMUQ10_ROUND( 523 GET_METRIC_FIELD(SocclkFrequencyTable)[i]); 524 pptable->VclkFrequencyTable[i] = 525 SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable)[i]); 526 pptable->DclkFrequencyTable[i] = 527 SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable)[i]); 528 pptable->LclkFrequencyTable[i] = 529 SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable)[i]); 530 } 531 532 /* use AID0 serial number by default */ 533 pptable->PublicSerialNumber_AID = GET_METRIC_FIELD(PublicSerialNumber_AID)[0]; 534 535 pptable->Init = true; 536 } 537 538 return 0; 539} 540 541static int smu_v13_0_6_get_dpm_ultimate_freq(struct smu_context *smu, 542 enum smu_clk_type clk_type, 543 uint32_t *min, uint32_t *max) 544{ 545 struct smu_table_context *smu_table = &smu->smu_table; 546 struct PPTable_t *pptable = 547 (struct PPTable_t *)smu_table->driver_pptable; 548 uint32_t clock_limit = 0, param; 549 int ret = 0, clk_id = 0; 550 551 if (!smu_cmn_clk_dpm_is_enabled(smu, clk_type)) { 552 switch (clk_type) { 553 case SMU_MCLK: 554 case SMU_UCLK: 555 if (pptable->Init) 556 clock_limit = pptable->UclkFrequencyTable[0]; 557 break; 558 case SMU_GFXCLK: 559 case SMU_SCLK: 560 if (pptable->Init) 561 clock_limit = pptable->MinGfxclkFrequency; 562 break; 563 case SMU_SOCCLK: 564 if (pptable->Init) 565 clock_limit = pptable->SocclkFrequencyTable[0]; 566 break; 567 case SMU_FCLK: 568 if (pptable->Init) 569 clock_limit = pptable->FclkFrequencyTable[0]; 570 break; 571 case SMU_VCLK: 572 if (pptable->Init) 573 clock_limit = pptable->VclkFrequencyTable[0]; 574 break; 575 case SMU_DCLK: 576 if (pptable->Init) 577 clock_limit = pptable->DclkFrequencyTable[0]; 578 break; 579 default: 580 break; 581 } 582 583 if (min) 584 *min = clock_limit; 585 586 if (max) 587 *max = clock_limit; 588 589 return 0; 590 } 591 592 if (!(clk_type == SMU_GFXCLK || clk_type == SMU_SCLK)) { 593 clk_id = smu_cmn_to_asic_specific_index( 594 smu, CMN2ASIC_MAPPING_CLK, clk_type); 595 if (clk_id < 0) { 596 ret = -EINVAL; 597 goto failed; 598 } 599 param = (clk_id & 0xffff) << 16; 600 } 601 602 if (max) { 603 if (clk_type == SMU_GFXCLK || clk_type == SMU_SCLK) 604 ret = smu_cmn_send_smc_msg( 605 smu, SMU_MSG_GetMaxGfxclkFrequency, max); 606 else 607 ret = smu_cmn_send_smc_msg_with_param( 608 smu, SMU_MSG_GetMaxDpmFreq, param, max); 609 if (ret) 610 goto failed; 611 } 612 613 if (min) { 614 if (clk_type == SMU_GFXCLK || clk_type == SMU_SCLK) 615 ret = smu_cmn_send_smc_msg( 616 smu, SMU_MSG_GetMinGfxclkFrequency, min); 617 else 618 ret = smu_cmn_send_smc_msg_with_param( 619 smu, SMU_MSG_GetMinDpmFreq, param, min); 620 } 621 622failed: 623 return ret; 624} 625 626static int smu_v13_0_6_get_dpm_level_count(struct smu_context *smu, 627 enum smu_clk_type clk_type, 628 uint32_t *levels) 629{ 630 int ret; 631 632 ret = smu_v13_0_get_dpm_freq_by_index(smu, clk_type, 0xff, levels); 633 if (!ret) 634 ++(*levels); 635 636 return ret; 637} 638 639static int smu_v13_0_6_set_default_dpm_table(struct smu_context *smu) 640{ 641 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 642 struct smu_table_context *smu_table = &smu->smu_table; 643 struct smu_13_0_dpm_table *dpm_table = NULL; 644 struct PPTable_t *pptable = 645 (struct PPTable_t *)smu_table->driver_pptable; 646 uint32_t gfxclkmin, gfxclkmax, levels; 647 int ret = 0, i, j; 648 struct smu_v13_0_6_dpm_map dpm_map[] = { 649 { SMU_SOCCLK, SMU_FEATURE_DPM_SOCCLK_BIT, 650 &dpm_context->dpm_tables.soc_table, 651 pptable->SocclkFrequencyTable }, 652 { SMU_UCLK, SMU_FEATURE_DPM_UCLK_BIT, 653 &dpm_context->dpm_tables.uclk_table, 654 pptable->UclkFrequencyTable }, 655 { SMU_FCLK, SMU_FEATURE_DPM_FCLK_BIT, 656 &dpm_context->dpm_tables.fclk_table, 657 pptable->FclkFrequencyTable }, 658 { SMU_VCLK, SMU_FEATURE_DPM_VCLK_BIT, 659 &dpm_context->dpm_tables.vclk_table, 660 pptable->VclkFrequencyTable }, 661 { SMU_DCLK, SMU_FEATURE_DPM_DCLK_BIT, 662 &dpm_context->dpm_tables.dclk_table, 663 pptable->DclkFrequencyTable }, 664 }; 665 666 smu_v13_0_6_setup_driver_pptable(smu); 667 668 /* gfxclk dpm table setup */ 669 dpm_table = &dpm_context->dpm_tables.gfx_table; 670 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT)) { 671 /* In the case of gfxclk, only fine-grained dpm is honored. 672 * Get min/max values from FW. 673 */ 674 ret = smu_v13_0_6_get_dpm_ultimate_freq(smu, SMU_GFXCLK, 675 &gfxclkmin, &gfxclkmax); 676 if (ret) 677 return ret; 678 679 dpm_table->count = 2; 680 dpm_table->dpm_levels[0].value = gfxclkmin; 681 dpm_table->dpm_levels[0].enabled = true; 682 dpm_table->dpm_levels[1].value = gfxclkmax; 683 dpm_table->dpm_levels[1].enabled = true; 684 dpm_table->min = dpm_table->dpm_levels[0].value; 685 dpm_table->max = dpm_table->dpm_levels[1].value; 686 } else { 687 dpm_table->count = 1; 688 dpm_table->dpm_levels[0].value = pptable->MinGfxclkFrequency; 689 dpm_table->dpm_levels[0].enabled = true; 690 dpm_table->min = dpm_table->dpm_levels[0].value; 691 dpm_table->max = dpm_table->dpm_levels[0].value; 692 } 693 694 for (j = 0; j < ARRAY_SIZE(dpm_map); j++) { 695 dpm_table = dpm_map[j].dpm_table; 696 levels = 1; 697 if (smu_cmn_feature_is_enabled(smu, dpm_map[j].feature_num)) { 698 ret = smu_v13_0_6_get_dpm_level_count( 699 smu, dpm_map[j].clk_type, &levels); 700 if (ret) 701 return ret; 702 } 703 dpm_table->count = levels; 704 for (i = 0; i < dpm_table->count; ++i) { 705 dpm_table->dpm_levels[i].value = 706 dpm_map[j].freq_table[i]; 707 dpm_table->dpm_levels[i].enabled = true; 708 709 } 710 dpm_table->min = dpm_table->dpm_levels[0].value; 711 dpm_table->max = dpm_table->dpm_levels[levels - 1].value; 712 713 } 714 715 return 0; 716} 717 718static int smu_v13_0_6_setup_pptable(struct smu_context *smu) 719{ 720 struct smu_table_context *table_context = &smu->smu_table; 721 722 /* TODO: PPTable is not available. 723 * 1) Find an alternate way to get 'PPTable values' here. 724 * 2) Check if there is SW CTF 725 */ 726 table_context->thermal_controller_type = 0; 727 728 return 0; 729} 730 731static int smu_v13_0_6_check_fw_status(struct smu_context *smu) 732{ 733 struct amdgpu_device *adev = smu->adev; 734 uint32_t mp1_fw_flags; 735 736 mp1_fw_flags = 737 RREG32_PCIE(MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff)); 738 739 if ((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >> 740 MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT) 741 return 0; 742 743 return -EIO; 744} 745 746static int smu_v13_0_6_populate_umd_state_clk(struct smu_context *smu) 747{ 748 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 749 struct smu_13_0_dpm_table *gfx_table = 750 &dpm_context->dpm_tables.gfx_table; 751 struct smu_13_0_dpm_table *mem_table = 752 &dpm_context->dpm_tables.uclk_table; 753 struct smu_13_0_dpm_table *soc_table = 754 &dpm_context->dpm_tables.soc_table; 755 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 756 757 pstate_table->gfxclk_pstate.min = gfx_table->min; 758 pstate_table->gfxclk_pstate.peak = gfx_table->max; 759 pstate_table->gfxclk_pstate.curr.min = gfx_table->min; 760 pstate_table->gfxclk_pstate.curr.max = gfx_table->max; 761 762 pstate_table->uclk_pstate.min = mem_table->min; 763 pstate_table->uclk_pstate.peak = mem_table->max; 764 pstate_table->uclk_pstate.curr.min = mem_table->min; 765 pstate_table->uclk_pstate.curr.max = mem_table->max; 766 767 pstate_table->socclk_pstate.min = soc_table->min; 768 pstate_table->socclk_pstate.peak = soc_table->max; 769 pstate_table->socclk_pstate.curr.min = soc_table->min; 770 pstate_table->socclk_pstate.curr.max = soc_table->max; 771 772 if (gfx_table->count > SMU_13_0_6_UMD_PSTATE_GFXCLK_LEVEL && 773 mem_table->count > SMU_13_0_6_UMD_PSTATE_MCLK_LEVEL && 774 soc_table->count > SMU_13_0_6_UMD_PSTATE_SOCCLK_LEVEL) { 775 pstate_table->gfxclk_pstate.standard = 776 gfx_table->dpm_levels[SMU_13_0_6_UMD_PSTATE_GFXCLK_LEVEL].value; 777 pstate_table->uclk_pstate.standard = 778 mem_table->dpm_levels[SMU_13_0_6_UMD_PSTATE_MCLK_LEVEL].value; 779 pstate_table->socclk_pstate.standard = 780 soc_table->dpm_levels[SMU_13_0_6_UMD_PSTATE_SOCCLK_LEVEL].value; 781 } else { 782 pstate_table->gfxclk_pstate.standard = 783 pstate_table->gfxclk_pstate.min; 784 pstate_table->uclk_pstate.standard = 785 pstate_table->uclk_pstate.min; 786 pstate_table->socclk_pstate.standard = 787 pstate_table->socclk_pstate.min; 788 } 789 790 return 0; 791} 792 793static int smu_v13_0_6_get_clk_table(struct smu_context *smu, 794 struct pp_clock_levels_with_latency *clocks, 795 struct smu_13_0_dpm_table *dpm_table) 796{ 797 int i, count; 798 799 count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : 800 dpm_table->count; 801 clocks->num_levels = count; 802 803 for (i = 0; i < count; i++) { 804 clocks->data[i].clocks_in_khz = 805 dpm_table->dpm_levels[i].value * 1000; 806 clocks->data[i].latency_in_us = 0; 807 } 808 809 return 0; 810} 811 812static int smu_v13_0_6_freqs_in_same_level(int32_t frequency1, 813 int32_t frequency2) 814{ 815 return (abs(frequency1 - frequency2) <= EPSILON); 816} 817 818static uint32_t smu_v13_0_6_get_throttler_status(struct smu_context *smu) 819{ 820 struct smu_power_context *smu_power = &smu->smu_power; 821 struct smu_13_0_power_context *power_context = smu_power->power_context; 822 uint32_t throttler_status = 0; 823 824 throttler_status = atomic_read(&power_context->throttle_status); 825 dev_dbg(smu->adev->dev, "SMU Throttler status: %u", throttler_status); 826 827 return throttler_status; 828} 829 830static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, 831 MetricsMember_t member, 832 uint32_t *value) 833{ 834 struct smu_table_context *smu_table = &smu->smu_table; 835 MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; 836 MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; 837 struct amdgpu_device *adev = smu->adev; 838 int ret = 0; 839 int xcc_id; 840 841 ret = smu_v13_0_6_get_metrics_table(smu, NULL, false); 842 if (ret) 843 return ret; 844 845 /* For clocks with multiple instances, only report the first one */ 846 switch (member) { 847 case METRICS_CURR_GFXCLK: 848 case METRICS_AVERAGE_GFXCLK: 849 if (smu->smc_fw_version >= 0x552F00) { 850 xcc_id = GET_INST(GC, 0); 851 *value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]); 852 } else { 853 *value = 0; 854 } 855 break; 856 case METRICS_CURR_SOCCLK: 857 case METRICS_AVERAGE_SOCCLK: 858 *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[0]); 859 break; 860 case METRICS_CURR_UCLK: 861 case METRICS_AVERAGE_UCLK: 862 *value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency)); 863 break; 864 case METRICS_CURR_VCLK: 865 *value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[0]); 866 break; 867 case METRICS_CURR_DCLK: 868 *value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[0]); 869 break; 870 case METRICS_CURR_FCLK: 871 *value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency)); 872 break; 873 case METRICS_AVERAGE_GFXACTIVITY: 874 *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy)); 875 break; 876 case METRICS_AVERAGE_MEMACTIVITY: 877 *value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization)); 878 break; 879 case METRICS_CURR_SOCKETPOWER: 880 *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)) << 8; 881 break; 882 case METRICS_TEMPERATURE_HOTSPOT: 883 *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)) * 884 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 885 break; 886 case METRICS_TEMPERATURE_MEM: 887 *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)) * 888 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 889 break; 890 /* This is the max of all VRs and not just SOC VR. 891 * No need to define another data type for the same. 892 */ 893 case METRICS_TEMPERATURE_VRSOC: 894 *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)) * 895 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 896 break; 897 default: 898 *value = UINT_MAX; 899 break; 900 } 901 902 return ret; 903} 904 905static int smu_v13_0_6_get_current_clk_freq_by_table(struct smu_context *smu, 906 enum smu_clk_type clk_type, 907 uint32_t *value) 908{ 909 MetricsMember_t member_type; 910 911 if (!value) 912 return -EINVAL; 913 914 switch (clk_type) { 915 case SMU_GFXCLK: 916 member_type = METRICS_CURR_GFXCLK; 917 break; 918 case SMU_UCLK: 919 member_type = METRICS_CURR_UCLK; 920 break; 921 case SMU_SOCCLK: 922 member_type = METRICS_CURR_SOCCLK; 923 break; 924 case SMU_VCLK: 925 member_type = METRICS_CURR_VCLK; 926 break; 927 case SMU_DCLK: 928 member_type = METRICS_CURR_DCLK; 929 break; 930 case SMU_FCLK: 931 member_type = METRICS_CURR_FCLK; 932 break; 933 default: 934 return -EINVAL; 935 } 936 937 return smu_v13_0_6_get_smu_metrics_data(smu, member_type, value); 938} 939 940static int smu_v13_0_6_print_clks(struct smu_context *smu, char *buf, int size, 941 struct smu_13_0_dpm_table *single_dpm_table, 942 uint32_t curr_clk, const char *clk_name) 943{ 944 struct pp_clock_levels_with_latency clocks; 945 int i, ret, level = -1; 946 uint32_t clk1, clk2; 947 948 ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table); 949 if (ret) { 950 dev_err(smu->adev->dev, "Attempt to get %s clk levels failed!", 951 clk_name); 952 return ret; 953 } 954 955 if (!clocks.num_levels) 956 return -EINVAL; 957 958 if (curr_clk < SMU_13_0_6_DSCLK_THRESHOLD) { 959 size = sysfs_emit_at(buf, size, "S: %uMhz *\n", curr_clk); 960 for (i = 0; i < clocks.num_levels; i++) 961 size += sysfs_emit_at(buf, size, "%d: %uMhz\n", i, 962 clocks.data[i].clocks_in_khz / 963 1000); 964 965 } else { 966 if ((clocks.num_levels == 1) || 967 (curr_clk < (clocks.data[0].clocks_in_khz / 1000))) 968 level = 0; 969 for (i = 0; i < clocks.num_levels; i++) { 970 clk1 = clocks.data[i].clocks_in_khz / 1000; 971 972 if (i < (clocks.num_levels - 1)) 973 clk2 = clocks.data[i + 1].clocks_in_khz / 1000; 974 975 if (curr_clk == clk1) { 976 level = i; 977 } else if (curr_clk >= clk1 && curr_clk < clk2) { 978 level = (curr_clk - clk1) <= (clk2 - curr_clk) ? 979 i : 980 i + 1; 981 } 982 983 size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, 984 clk1, (level == i) ? "*" : ""); 985 } 986 } 987 988 return size; 989} 990 991static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, 992 enum smu_clk_type type, char *buf) 993{ 994 int now, size = 0; 995 int ret = 0; 996 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 997 struct smu_13_0_dpm_table *single_dpm_table; 998 struct smu_dpm_context *smu_dpm = &smu->smu_dpm; 999 struct smu_13_0_dpm_context *dpm_context = NULL; 1000 uint32_t min_clk, max_clk; 1001 1002 smu_cmn_get_sysfs_buf(&buf, &size); 1003 1004 if (amdgpu_ras_intr_triggered()) { 1005 size += sysfs_emit_at(buf, size, "unavailable\n"); 1006 return size; 1007 } 1008 1009 dpm_context = smu_dpm->dpm_context; 1010 1011 switch (type) { 1012 case SMU_OD_SCLK: 1013 size += sysfs_emit_at(buf, size, "%s:\n", "GFXCLK"); 1014 fallthrough; 1015 case SMU_SCLK: 1016 ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_GFXCLK, 1017 &now); 1018 if (ret) { 1019 dev_err(smu->adev->dev, 1020 "Attempt to get current gfx clk Failed!"); 1021 return ret; 1022 } 1023 1024 min_clk = pstate_table->gfxclk_pstate.curr.min; 1025 max_clk = pstate_table->gfxclk_pstate.curr.max; 1026 1027 if (now < SMU_13_0_6_DSCLK_THRESHOLD) { 1028 size += sysfs_emit_at(buf, size, "S: %uMhz *\n", 1029 now); 1030 size += sysfs_emit_at(buf, size, "0: %uMhz\n", 1031 min_clk); 1032 size += sysfs_emit_at(buf, size, "1: %uMhz\n", 1033 max_clk); 1034 1035 } else if (!smu_v13_0_6_freqs_in_same_level(now, min_clk) && 1036 !smu_v13_0_6_freqs_in_same_level(now, max_clk)) { 1037 size += sysfs_emit_at(buf, size, "0: %uMhz\n", 1038 min_clk); 1039 size += sysfs_emit_at(buf, size, "1: %uMhz *\n", 1040 now); 1041 size += sysfs_emit_at(buf, size, "2: %uMhz\n", 1042 max_clk); 1043 } else { 1044 size += sysfs_emit_at(buf, size, "0: %uMhz %s\n", 1045 min_clk, 1046 smu_v13_0_6_freqs_in_same_level(now, min_clk) ? "*" : ""); 1047 size += sysfs_emit_at(buf, size, "1: %uMhz %s\n", 1048 max_clk, 1049 smu_v13_0_6_freqs_in_same_level(now, max_clk) ? "*" : ""); 1050 } 1051 1052 break; 1053 1054 case SMU_OD_MCLK: 1055 size += sysfs_emit_at(buf, size, "%s:\n", "MCLK"); 1056 fallthrough; 1057 case SMU_MCLK: 1058 ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_UCLK, 1059 &now); 1060 if (ret) { 1061 dev_err(smu->adev->dev, 1062 "Attempt to get current mclk Failed!"); 1063 return ret; 1064 } 1065 1066 single_dpm_table = &(dpm_context->dpm_tables.uclk_table); 1067 1068 return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, 1069 now, "mclk"); 1070 1071 case SMU_SOCCLK: 1072 ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_SOCCLK, 1073 &now); 1074 if (ret) { 1075 dev_err(smu->adev->dev, 1076 "Attempt to get current socclk Failed!"); 1077 return ret; 1078 } 1079 1080 single_dpm_table = &(dpm_context->dpm_tables.soc_table); 1081 1082 return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, 1083 now, "socclk"); 1084 1085 case SMU_FCLK: 1086 ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_FCLK, 1087 &now); 1088 if (ret) { 1089 dev_err(smu->adev->dev, 1090 "Attempt to get current fclk Failed!"); 1091 return ret; 1092 } 1093 1094 single_dpm_table = &(dpm_context->dpm_tables.fclk_table); 1095 1096 return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, 1097 now, "fclk"); 1098 1099 case SMU_VCLK: 1100 ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_VCLK, 1101 &now); 1102 if (ret) { 1103 dev_err(smu->adev->dev, 1104 "Attempt to get current vclk Failed!"); 1105 return ret; 1106 } 1107 1108 single_dpm_table = &(dpm_context->dpm_tables.vclk_table); 1109 1110 return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, 1111 now, "vclk"); 1112 1113 case SMU_DCLK: 1114 ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_DCLK, 1115 &now); 1116 if (ret) { 1117 dev_err(smu->adev->dev, 1118 "Attempt to get current dclk Failed!"); 1119 return ret; 1120 } 1121 1122 single_dpm_table = &(dpm_context->dpm_tables.dclk_table); 1123 1124 return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, 1125 now, "dclk"); 1126 1127 default: 1128 break; 1129 } 1130 1131 return size; 1132} 1133 1134static int smu_v13_0_6_upload_dpm_level(struct smu_context *smu, bool max, 1135 uint32_t feature_mask, uint32_t level) 1136{ 1137 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 1138 uint32_t freq; 1139 int ret = 0; 1140 1141 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT) && 1142 (feature_mask & FEATURE_MASK(FEATURE_DPM_GFXCLK))) { 1143 freq = dpm_context->dpm_tables.gfx_table.dpm_levels[level].value; 1144 ret = smu_cmn_send_smc_msg_with_param( 1145 smu, 1146 (max ? SMU_MSG_SetSoftMaxGfxClk : 1147 SMU_MSG_SetSoftMinGfxclk), 1148 freq & 0xffff, NULL); 1149 if (ret) { 1150 dev_err(smu->adev->dev, 1151 "Failed to set soft %s gfxclk !\n", 1152 max ? "max" : "min"); 1153 return ret; 1154 } 1155 } 1156 1157 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT) && 1158 (feature_mask & FEATURE_MASK(FEATURE_DPM_UCLK))) { 1159 freq = dpm_context->dpm_tables.uclk_table.dpm_levels[level] 1160 .value; 1161 ret = smu_cmn_send_smc_msg_with_param( 1162 smu, 1163 (max ? SMU_MSG_SetSoftMaxByFreq : 1164 SMU_MSG_SetSoftMinByFreq), 1165 (PPCLK_UCLK << 16) | (freq & 0xffff), NULL); 1166 if (ret) { 1167 dev_err(smu->adev->dev, 1168 "Failed to set soft %s memclk !\n", 1169 max ? "max" : "min"); 1170 return ret; 1171 } 1172 } 1173 1174 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT) && 1175 (feature_mask & FEATURE_MASK(FEATURE_DPM_SOCCLK))) { 1176 freq = dpm_context->dpm_tables.soc_table.dpm_levels[level].value; 1177 ret = smu_cmn_send_smc_msg_with_param( 1178 smu, 1179 (max ? SMU_MSG_SetSoftMaxByFreq : 1180 SMU_MSG_SetSoftMinByFreq), 1181 (PPCLK_SOCCLK << 16) | (freq & 0xffff), NULL); 1182 if (ret) { 1183 dev_err(smu->adev->dev, 1184 "Failed to set soft %s socclk !\n", 1185 max ? "max" : "min"); 1186 return ret; 1187 } 1188 } 1189 1190 return ret; 1191} 1192 1193static int smu_v13_0_6_force_clk_levels(struct smu_context *smu, 1194 enum smu_clk_type type, uint32_t mask) 1195{ 1196 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 1197 struct smu_13_0_dpm_table *single_dpm_table = NULL; 1198 uint32_t soft_min_level, soft_max_level; 1199 int ret = 0; 1200 1201 soft_min_level = mask ? (ffs(mask) - 1) : 0; 1202 soft_max_level = mask ? (fls(mask) - 1) : 0; 1203 1204 switch (type) { 1205 case SMU_SCLK: 1206 single_dpm_table = &(dpm_context->dpm_tables.gfx_table); 1207 if (soft_max_level >= single_dpm_table->count) { 1208 dev_err(smu->adev->dev, 1209 "Clock level specified %d is over max allowed %d\n", 1210 soft_max_level, single_dpm_table->count - 1); 1211 ret = -EINVAL; 1212 break; 1213 } 1214 1215 ret = smu_v13_0_6_upload_dpm_level( 1216 smu, false, FEATURE_MASK(FEATURE_DPM_GFXCLK), 1217 soft_min_level); 1218 if (ret) { 1219 dev_err(smu->adev->dev, 1220 "Failed to upload boot level to lowest!\n"); 1221 break; 1222 } 1223 1224 ret = smu_v13_0_6_upload_dpm_level( 1225 smu, true, FEATURE_MASK(FEATURE_DPM_GFXCLK), 1226 soft_max_level); 1227 if (ret) 1228 dev_err(smu->adev->dev, 1229 "Failed to upload dpm max level to highest!\n"); 1230 1231 break; 1232 1233 case SMU_MCLK: 1234 case SMU_SOCCLK: 1235 case SMU_FCLK: 1236 /* 1237 * Should not arrive here since smu_13_0_6 does not 1238 * support mclk/socclk/fclk softmin/softmax settings 1239 */ 1240 ret = -EINVAL; 1241 break; 1242 1243 default: 1244 break; 1245 } 1246 1247 return ret; 1248} 1249 1250static int smu_v13_0_6_get_current_activity_percent(struct smu_context *smu, 1251 enum amd_pp_sensors sensor, 1252 uint32_t *value) 1253{ 1254 int ret = 0; 1255 1256 if (!value) 1257 return -EINVAL; 1258 1259 switch (sensor) { 1260 case AMDGPU_PP_SENSOR_GPU_LOAD: 1261 ret = smu_v13_0_6_get_smu_metrics_data( 1262 smu, METRICS_AVERAGE_GFXACTIVITY, value); 1263 break; 1264 case AMDGPU_PP_SENSOR_MEM_LOAD: 1265 ret = smu_v13_0_6_get_smu_metrics_data( 1266 smu, METRICS_AVERAGE_MEMACTIVITY, value); 1267 break; 1268 default: 1269 dev_err(smu->adev->dev, 1270 "Invalid sensor for retrieving clock activity\n"); 1271 return -EINVAL; 1272 } 1273 1274 return ret; 1275} 1276 1277static int smu_v13_0_6_thermal_get_temperature(struct smu_context *smu, 1278 enum amd_pp_sensors sensor, 1279 uint32_t *value) 1280{ 1281 int ret = 0; 1282 1283 if (!value) 1284 return -EINVAL; 1285 1286 switch (sensor) { 1287 case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: 1288 ret = smu_v13_0_6_get_smu_metrics_data( 1289 smu, METRICS_TEMPERATURE_HOTSPOT, value); 1290 break; 1291 case AMDGPU_PP_SENSOR_MEM_TEMP: 1292 ret = smu_v13_0_6_get_smu_metrics_data( 1293 smu, METRICS_TEMPERATURE_MEM, value); 1294 break; 1295 default: 1296 dev_err(smu->adev->dev, "Invalid sensor for retrieving temp\n"); 1297 return -EINVAL; 1298 } 1299 1300 return ret; 1301} 1302 1303static int smu_v13_0_6_read_sensor(struct smu_context *smu, 1304 enum amd_pp_sensors sensor, void *data, 1305 uint32_t *size) 1306{ 1307 int ret = 0; 1308 1309 if (amdgpu_ras_intr_triggered()) 1310 return 0; 1311 1312 if (!data || !size) 1313 return -EINVAL; 1314 1315 switch (sensor) { 1316 case AMDGPU_PP_SENSOR_MEM_LOAD: 1317 case AMDGPU_PP_SENSOR_GPU_LOAD: 1318 ret = smu_v13_0_6_get_current_activity_percent(smu, sensor, 1319 (uint32_t *)data); 1320 *size = 4; 1321 break; 1322 case AMDGPU_PP_SENSOR_GPU_INPUT_POWER: 1323 ret = smu_v13_0_6_get_smu_metrics_data(smu, 1324 METRICS_CURR_SOCKETPOWER, 1325 (uint32_t *)data); 1326 *size = 4; 1327 break; 1328 case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: 1329 case AMDGPU_PP_SENSOR_MEM_TEMP: 1330 ret = smu_v13_0_6_thermal_get_temperature(smu, sensor, 1331 (uint32_t *)data); 1332 *size = 4; 1333 break; 1334 case AMDGPU_PP_SENSOR_GFX_MCLK: 1335 ret = smu_v13_0_6_get_current_clk_freq_by_table( 1336 smu, SMU_UCLK, (uint32_t *)data); 1337 /* the output clock frequency in 10K unit */ 1338 *(uint32_t *)data *= 100; 1339 *size = 4; 1340 break; 1341 case AMDGPU_PP_SENSOR_GFX_SCLK: 1342 ret = smu_v13_0_6_get_current_clk_freq_by_table( 1343 smu, SMU_GFXCLK, (uint32_t *)data); 1344 *(uint32_t *)data *= 100; 1345 *size = 4; 1346 break; 1347 case AMDGPU_PP_SENSOR_VDDGFX: 1348 ret = smu_v13_0_get_gfx_vdd(smu, (uint32_t *)data); 1349 *size = 4; 1350 break; 1351 case AMDGPU_PP_SENSOR_GPU_AVG_POWER: 1352 default: 1353 ret = -EOPNOTSUPP; 1354 break; 1355 } 1356 1357 return ret; 1358} 1359 1360static int smu_v13_0_6_get_power_limit(struct smu_context *smu, 1361 uint32_t *current_power_limit, 1362 uint32_t *default_power_limit, 1363 uint32_t *max_power_limit, 1364 uint32_t *min_power_limit) 1365{ 1366 struct smu_table_context *smu_table = &smu->smu_table; 1367 struct PPTable_t *pptable = 1368 (struct PPTable_t *)smu_table->driver_pptable; 1369 uint32_t power_limit = 0; 1370 int ret; 1371 1372 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetPptLimit, &power_limit); 1373 1374 if (ret) { 1375 dev_err(smu->adev->dev, "Couldn't get PPT limit"); 1376 return -EINVAL; 1377 } 1378 1379 if (current_power_limit) 1380 *current_power_limit = power_limit; 1381 if (default_power_limit) 1382 *default_power_limit = power_limit; 1383 1384 if (max_power_limit) { 1385 *max_power_limit = pptable->MaxSocketPowerLimit; 1386 } 1387 1388 if (min_power_limit) 1389 *min_power_limit = 0; 1390 return 0; 1391} 1392 1393static int smu_v13_0_6_set_power_limit(struct smu_context *smu, 1394 enum smu_ppt_limit_type limit_type, 1395 uint32_t limit) 1396{ 1397 return smu_v13_0_set_power_limit(smu, limit_type, limit); 1398} 1399 1400static int smu_v13_0_6_irq_process(struct amdgpu_device *adev, 1401 struct amdgpu_irq_src *source, 1402 struct amdgpu_iv_entry *entry) 1403{ 1404 struct smu_context *smu = adev->powerplay.pp_handle; 1405 struct smu_power_context *smu_power = &smu->smu_power; 1406 struct smu_13_0_power_context *power_context = smu_power->power_context; 1407 uint32_t client_id = entry->client_id; 1408 uint32_t ctxid = entry->src_data[0]; 1409 uint32_t src_id = entry->src_id; 1410 uint32_t data; 1411 1412 if (client_id == SOC15_IH_CLIENTID_MP1) { 1413 if (src_id == IH_INTERRUPT_ID_TO_DRIVER) { 1414 /* ACK SMUToHost interrupt */ 1415 data = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL); 1416 data = REG_SET_FIELD(data, MP1_SMN_IH_SW_INT_CTRL, INT_ACK, 1); 1417 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, data); 1418 /* 1419 * ctxid is used to distinguish different events for SMCToHost 1420 * interrupt. 1421 */ 1422 switch (ctxid) { 1423 case IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING: 1424 /* 1425 * Increment the throttle interrupt counter 1426 */ 1427 atomic64_inc(&smu->throttle_int_counter); 1428 1429 if (!atomic_read(&adev->throttling_logging_enabled)) 1430 return 0; 1431 1432 /* This uses the new method which fixes the 1433 * incorrect throttling status reporting 1434 * through metrics table. For older FWs, 1435 * it will be ignored. 1436 */ 1437 if (__ratelimit(&adev->throttling_logging_rs)) { 1438 atomic_set( 1439 &power_context->throttle_status, 1440 entry->src_data[1]); 1441 schedule_work(&smu->throttling_logging_work); 1442 } 1443 break; 1444 default: 1445 dev_dbg(adev->dev, "Unhandled context id %d from client:%d!\n", 1446 ctxid, client_id); 1447 break; 1448 } 1449 } 1450 } 1451 1452 return 0; 1453} 1454 1455static int smu_v13_0_6_set_irq_state(struct amdgpu_device *adev, 1456 struct amdgpu_irq_src *source, 1457 unsigned tyep, 1458 enum amdgpu_interrupt_state state) 1459{ 1460 uint32_t val = 0; 1461 1462 switch (state) { 1463 case AMDGPU_IRQ_STATE_DISABLE: 1464 /* For MP1 SW irqs */ 1465 val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL); 1466 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT_CTRL, INT_MASK, 1); 1467 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, val); 1468 1469 break; 1470 case AMDGPU_IRQ_STATE_ENABLE: 1471 /* For MP1 SW irqs */ 1472 val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT); 1473 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT, ID, 0xFE); 1474 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT, VALID, 0); 1475 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT, val); 1476 1477 val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL); 1478 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT_CTRL, INT_MASK, 0); 1479 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, val); 1480 1481 break; 1482 default: 1483 break; 1484 } 1485 1486 return 0; 1487} 1488 1489static const struct amdgpu_irq_src_funcs smu_v13_0_6_irq_funcs = { 1490 .set = smu_v13_0_6_set_irq_state, 1491 .process = smu_v13_0_6_irq_process, 1492}; 1493 1494static int smu_v13_0_6_register_irq_handler(struct smu_context *smu) 1495{ 1496 struct amdgpu_device *adev = smu->adev; 1497 struct amdgpu_irq_src *irq_src = &smu->irq_source; 1498 int ret = 0; 1499 1500 if (amdgpu_sriov_vf(adev)) 1501 return 0; 1502 1503 irq_src->num_types = 1; 1504 irq_src->funcs = &smu_v13_0_6_irq_funcs; 1505 1506 ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_MP1, 1507 IH_INTERRUPT_ID_TO_DRIVER, 1508 irq_src); 1509 if (ret) 1510 return ret; 1511 1512 return ret; 1513} 1514 1515static int smu_v13_0_6_notify_unload(struct smu_context *smu) 1516{ 1517 if (amdgpu_in_reset(smu->adev)) 1518 return 0; 1519 1520 dev_dbg(smu->adev->dev, "Notify PMFW about driver unload"); 1521 /* Ignore return, just intimate FW that driver is not going to be there */ 1522 smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL); 1523 1524 return 0; 1525} 1526 1527static int smu_v13_0_6_mca_set_debug_mode(struct smu_context *smu, bool enable) 1528{ 1529 /* NOTE: this ClearMcaOnRead message is only supported for smu version 85.72.0 or higher */ 1530 if (smu->smc_fw_version < 0x554800) 1531 return 0; 1532 1533 return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ClearMcaOnRead, 1534 enable ? 0 : ClearMcaOnRead_UE_FLAG_MASK | ClearMcaOnRead_CE_POLL_MASK, 1535 NULL); 1536} 1537 1538static int smu_v13_0_6_system_features_control(struct smu_context *smu, 1539 bool enable) 1540{ 1541 struct amdgpu_device *adev = smu->adev; 1542 int ret = 0; 1543 1544 if (amdgpu_sriov_vf(adev)) 1545 return 0; 1546 1547 if (enable) { 1548 if (!(adev->flags & AMD_IS_APU)) 1549 ret = smu_v13_0_system_features_control(smu, enable); 1550 } else { 1551 /* Notify FW that the device is no longer driver managed */ 1552 smu_v13_0_6_notify_unload(smu); 1553 } 1554 1555 return ret; 1556} 1557 1558static int smu_v13_0_6_set_gfx_soft_freq_limited_range(struct smu_context *smu, 1559 uint32_t min, 1560 uint32_t max) 1561{ 1562 int ret; 1563 1564 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk, 1565 max & 0xffff, NULL); 1566 if (ret) 1567 return ret; 1568 1569 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMinGfxclk, 1570 min & 0xffff, NULL); 1571 1572 return ret; 1573} 1574 1575static int smu_v13_0_6_set_performance_level(struct smu_context *smu, 1576 enum amd_dpm_forced_level level) 1577{ 1578 struct smu_dpm_context *smu_dpm = &(smu->smu_dpm); 1579 struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context; 1580 struct smu_13_0_dpm_table *gfx_table = 1581 &dpm_context->dpm_tables.gfx_table; 1582 struct smu_13_0_dpm_table *uclk_table = 1583 &dpm_context->dpm_tables.uclk_table; 1584 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 1585 int ret; 1586 1587 /* Disable determinism if switching to another mode */ 1588 if ((smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) && 1589 (level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)) { 1590 smu_cmn_send_smc_msg(smu, SMU_MSG_DisableDeterminism, NULL); 1591 pstate_table->gfxclk_pstate.curr.max = gfx_table->max; 1592 } 1593 1594 switch (level) { 1595 case AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM: 1596 return 0; 1597 1598 case AMD_DPM_FORCED_LEVEL_AUTO: 1599 if ((gfx_table->min != pstate_table->gfxclk_pstate.curr.min) || 1600 (gfx_table->max != pstate_table->gfxclk_pstate.curr.max)) { 1601 ret = smu_v13_0_6_set_gfx_soft_freq_limited_range( 1602 smu, gfx_table->min, gfx_table->max); 1603 if (ret) 1604 return ret; 1605 1606 pstate_table->gfxclk_pstate.curr.min = gfx_table->min; 1607 pstate_table->gfxclk_pstate.curr.max = gfx_table->max; 1608 } 1609 1610 if (uclk_table->max != pstate_table->uclk_pstate.curr.max) { 1611 /* Min UCLK is not expected to be changed */ 1612 ret = smu_v13_0_set_soft_freq_limited_range( 1613 smu, SMU_UCLK, 0, uclk_table->max); 1614 if (ret) 1615 return ret; 1616 pstate_table->uclk_pstate.curr.max = uclk_table->max; 1617 } 1618 pstate_table->uclk_pstate.custom.max = 0; 1619 1620 return 0; 1621 case AMD_DPM_FORCED_LEVEL_MANUAL: 1622 return 0; 1623 default: 1624 break; 1625 } 1626 1627 return -EINVAL; 1628} 1629 1630static int smu_v13_0_6_set_soft_freq_limited_range(struct smu_context *smu, 1631 enum smu_clk_type clk_type, 1632 uint32_t min, uint32_t max) 1633{ 1634 struct smu_dpm_context *smu_dpm = &(smu->smu_dpm); 1635 struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context; 1636 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 1637 struct amdgpu_device *adev = smu->adev; 1638 uint32_t min_clk; 1639 uint32_t max_clk; 1640 int ret = 0; 1641 1642 if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK && 1643 clk_type != SMU_UCLK) 1644 return -EINVAL; 1645 1646 if ((smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) && 1647 (smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)) 1648 return -EINVAL; 1649 1650 if (smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { 1651 if (min >= max) { 1652 dev_err(smu->adev->dev, 1653 "Minimum clk should be less than the maximum allowed clock\n"); 1654 return -EINVAL; 1655 } 1656 1657 if (clk_type == SMU_GFXCLK) { 1658 if ((min == pstate_table->gfxclk_pstate.curr.min) && 1659 (max == pstate_table->gfxclk_pstate.curr.max)) 1660 return 0; 1661 1662 ret = smu_v13_0_6_set_gfx_soft_freq_limited_range( 1663 smu, min, max); 1664 if (!ret) { 1665 pstate_table->gfxclk_pstate.curr.min = min; 1666 pstate_table->gfxclk_pstate.curr.max = max; 1667 } 1668 } 1669 1670 if (clk_type == SMU_UCLK) { 1671 if (max == pstate_table->uclk_pstate.curr.max) 1672 return 0; 1673 /* Only max clock limiting is allowed for UCLK */ 1674 ret = smu_v13_0_set_soft_freq_limited_range( 1675 smu, SMU_UCLK, 0, max); 1676 if (!ret) 1677 pstate_table->uclk_pstate.curr.max = max; 1678 } 1679 1680 return ret; 1681 } 1682 1683 if (smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { 1684 if (!max || (max < dpm_context->dpm_tables.gfx_table.min) || 1685 (max > dpm_context->dpm_tables.gfx_table.max)) { 1686 dev_warn( 1687 adev->dev, 1688 "Invalid max frequency %d MHz specified for determinism\n", 1689 max); 1690 return -EINVAL; 1691 } 1692 1693 /* Restore default min/max clocks and enable determinism */ 1694 min_clk = dpm_context->dpm_tables.gfx_table.min; 1695 max_clk = dpm_context->dpm_tables.gfx_table.max; 1696 ret = smu_v13_0_6_set_gfx_soft_freq_limited_range(smu, min_clk, 1697 max_clk); 1698 if (!ret) { 1699 usleep_range(500, 1000); 1700 ret = smu_cmn_send_smc_msg_with_param( 1701 smu, SMU_MSG_EnableDeterminism, max, NULL); 1702 if (ret) { 1703 dev_err(adev->dev, 1704 "Failed to enable determinism at GFX clock %d MHz\n", 1705 max); 1706 } else { 1707 pstate_table->gfxclk_pstate.curr.min = min_clk; 1708 pstate_table->gfxclk_pstate.curr.max = max; 1709 } 1710 } 1711 } 1712 1713 return ret; 1714} 1715 1716static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu, 1717 enum PP_OD_DPM_TABLE_COMMAND type, 1718 long input[], uint32_t size) 1719{ 1720 struct smu_dpm_context *smu_dpm = &(smu->smu_dpm); 1721 struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context; 1722 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 1723 uint32_t min_clk; 1724 uint32_t max_clk; 1725 int ret = 0; 1726 1727 /* Only allowed in manual or determinism mode */ 1728 if ((smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) && 1729 (smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)) 1730 return -EINVAL; 1731 1732 switch (type) { 1733 case PP_OD_EDIT_SCLK_VDDC_TABLE: 1734 if (size != 2) { 1735 dev_err(smu->adev->dev, 1736 "Input parameter number not correct\n"); 1737 return -EINVAL; 1738 } 1739 1740 if (input[0] == 0) { 1741 if (input[1] < dpm_context->dpm_tables.gfx_table.min) { 1742 dev_warn( 1743 smu->adev->dev, 1744 "Minimum GFX clk (%ld) MHz specified is less than the minimum allowed (%d) MHz\n", 1745 input[1], 1746 dpm_context->dpm_tables.gfx_table.min); 1747 pstate_table->gfxclk_pstate.custom.min = 1748 pstate_table->gfxclk_pstate.curr.min; 1749 return -EINVAL; 1750 } 1751 1752 pstate_table->gfxclk_pstate.custom.min = input[1]; 1753 } else if (input[0] == 1) { 1754 if (input[1] > dpm_context->dpm_tables.gfx_table.max) { 1755 dev_warn( 1756 smu->adev->dev, 1757 "Maximum GFX clk (%ld) MHz specified is greater than the maximum allowed (%d) MHz\n", 1758 input[1], 1759 dpm_context->dpm_tables.gfx_table.max); 1760 pstate_table->gfxclk_pstate.custom.max = 1761 pstate_table->gfxclk_pstate.curr.max; 1762 return -EINVAL; 1763 } 1764 1765 pstate_table->gfxclk_pstate.custom.max = input[1]; 1766 } else { 1767 return -EINVAL; 1768 } 1769 break; 1770 case PP_OD_EDIT_MCLK_VDDC_TABLE: 1771 if (size != 2) { 1772 dev_err(smu->adev->dev, 1773 "Input parameter number not correct\n"); 1774 return -EINVAL; 1775 } 1776 1777 if (!smu_cmn_feature_is_enabled(smu, 1778 SMU_FEATURE_DPM_UCLK_BIT)) { 1779 dev_warn(smu->adev->dev, 1780 "UCLK_LIMITS setting not supported!\n"); 1781 return -EOPNOTSUPP; 1782 } 1783 1784 if (input[0] == 0) { 1785 dev_info(smu->adev->dev, 1786 "Setting min UCLK level is not supported"); 1787 return -EINVAL; 1788 } else if (input[0] == 1) { 1789 if (input[1] > dpm_context->dpm_tables.uclk_table.max) { 1790 dev_warn( 1791 smu->adev->dev, 1792 "Maximum UCLK (%ld) MHz specified is greater than the maximum allowed (%d) MHz\n", 1793 input[1], 1794 dpm_context->dpm_tables.uclk_table.max); 1795 pstate_table->uclk_pstate.custom.max = 1796 pstate_table->uclk_pstate.curr.max; 1797 return -EINVAL; 1798 } 1799 1800 pstate_table->uclk_pstate.custom.max = input[1]; 1801 } 1802 break; 1803 1804 case PP_OD_RESTORE_DEFAULT_TABLE: 1805 if (size != 0) { 1806 dev_err(smu->adev->dev, 1807 "Input parameter number not correct\n"); 1808 return -EINVAL; 1809 } else { 1810 /* Use the default frequencies for manual and determinism mode */ 1811 min_clk = dpm_context->dpm_tables.gfx_table.min; 1812 max_clk = dpm_context->dpm_tables.gfx_table.max; 1813 1814 ret = smu_v13_0_6_set_soft_freq_limited_range( 1815 smu, SMU_GFXCLK, min_clk, max_clk); 1816 1817 if (ret) 1818 return ret; 1819 1820 min_clk = dpm_context->dpm_tables.uclk_table.min; 1821 max_clk = dpm_context->dpm_tables.uclk_table.max; 1822 ret = smu_v13_0_6_set_soft_freq_limited_range( 1823 smu, SMU_UCLK, min_clk, max_clk); 1824 if (ret) 1825 return ret; 1826 pstate_table->uclk_pstate.custom.max = 0; 1827 } 1828 break; 1829 case PP_OD_COMMIT_DPM_TABLE: 1830 if (size != 0) { 1831 dev_err(smu->adev->dev, 1832 "Input parameter number not correct\n"); 1833 return -EINVAL; 1834 } else { 1835 if (!pstate_table->gfxclk_pstate.custom.min) 1836 pstate_table->gfxclk_pstate.custom.min = 1837 pstate_table->gfxclk_pstate.curr.min; 1838 1839 if (!pstate_table->gfxclk_pstate.custom.max) 1840 pstate_table->gfxclk_pstate.custom.max = 1841 pstate_table->gfxclk_pstate.curr.max; 1842 1843 min_clk = pstate_table->gfxclk_pstate.custom.min; 1844 max_clk = pstate_table->gfxclk_pstate.custom.max; 1845 1846 ret = smu_v13_0_6_set_soft_freq_limited_range( 1847 smu, SMU_GFXCLK, min_clk, max_clk); 1848 1849 if (ret) 1850 return ret; 1851 1852 if (!pstate_table->uclk_pstate.custom.max) 1853 return 0; 1854 1855 min_clk = pstate_table->uclk_pstate.curr.min; 1856 max_clk = pstate_table->uclk_pstate.custom.max; 1857 return smu_v13_0_6_set_soft_freq_limited_range( 1858 smu, SMU_UCLK, min_clk, max_clk); 1859 } 1860 break; 1861 default: 1862 return -ENOSYS; 1863 } 1864 1865 return ret; 1866} 1867 1868static int smu_v13_0_6_get_enabled_mask(struct smu_context *smu, 1869 uint64_t *feature_mask) 1870{ 1871 int ret; 1872 1873 ret = smu_cmn_get_enabled_mask(smu, feature_mask); 1874 1875 if (ret == -EIO && smu->smc_fw_version < 0x552F00) { 1876 *feature_mask = 0; 1877 ret = 0; 1878 } 1879 1880 return ret; 1881} 1882 1883static bool smu_v13_0_6_is_dpm_running(struct smu_context *smu) 1884{ 1885 int ret; 1886 uint64_t feature_enabled; 1887 1888 ret = smu_v13_0_6_get_enabled_mask(smu, &feature_enabled); 1889 1890 if (ret) 1891 return false; 1892 1893 return !!(feature_enabled & SMC_DPM_FEATURE); 1894} 1895 1896static int smu_v13_0_6_request_i2c_xfer(struct smu_context *smu, 1897 void *table_data) 1898{ 1899 struct smu_table_context *smu_table = &smu->smu_table; 1900 struct smu_table *table = &smu_table->driver_table; 1901 struct amdgpu_device *adev = smu->adev; 1902 uint32_t table_size; 1903 int ret = 0; 1904 1905 if (!table_data) 1906 return -EINVAL; 1907 1908 table_size = smu_table->tables[SMU_TABLE_I2C_COMMANDS].size; 1909 1910 memcpy(table->cpu_addr, table_data, table_size); 1911 /* Flush hdp cache */ 1912 amdgpu_asic_flush_hdp(adev, NULL); 1913 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RequestI2cTransaction, 1914 NULL); 1915 1916 return ret; 1917} 1918 1919static int smu_v13_0_6_i2c_xfer(struct i2c_adapter *i2c_adap, 1920 struct i2c_msg *msg, int num_msgs) 1921{ 1922 struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c_adap); 1923 struct amdgpu_device *adev = smu_i2c->adev; 1924 struct smu_context *smu = adev->powerplay.pp_handle; 1925 struct smu_table_context *smu_table = &smu->smu_table; 1926 struct smu_table *table = &smu_table->driver_table; 1927 SwI2cRequest_t *req, *res = (SwI2cRequest_t *)table->cpu_addr; 1928 int i, j, r, c; 1929 u16 dir; 1930 1931 if (!adev->pm.dpm_enabled) 1932 return -EBUSY; 1933 1934 req = kzalloc(sizeof(*req), GFP_KERNEL); 1935 if (!req) 1936 return -ENOMEM; 1937 1938 req->I2CcontrollerPort = smu_i2c->port; 1939 req->I2CSpeed = I2C_SPEED_FAST_400K; 1940 req->SlaveAddress = msg[0].addr << 1; /* wants an 8-bit address */ 1941 dir = msg[0].flags & I2C_M_RD; 1942 1943 for (c = i = 0; i < num_msgs; i++) { 1944 for (j = 0; j < msg[i].len; j++, c++) { 1945 SwI2cCmd_t *cmd = &req->SwI2cCmds[c]; 1946 1947 if (!(msg[i].flags & I2C_M_RD)) { 1948 /* write */ 1949 cmd->CmdConfig |= CMDCONFIG_READWRITE_MASK; 1950 cmd->ReadWriteData = msg[i].buf[j]; 1951 } 1952 1953 if ((dir ^ msg[i].flags) & I2C_M_RD) { 1954 /* The direction changes. 1955 */ 1956 dir = msg[i].flags & I2C_M_RD; 1957 cmd->CmdConfig |= CMDCONFIG_RESTART_MASK; 1958 } 1959 1960 req->NumCmds++; 1961 1962 /* 1963 * Insert STOP if we are at the last byte of either last 1964 * message for the transaction or the client explicitly 1965 * requires a STOP at this particular message. 1966 */ 1967 if ((j == msg[i].len - 1) && 1968 ((i == num_msgs - 1) || (msg[i].flags & I2C_M_STOP))) { 1969 cmd->CmdConfig &= ~CMDCONFIG_RESTART_MASK; 1970 cmd->CmdConfig |= CMDCONFIG_STOP_MASK; 1971 } 1972 } 1973 } 1974 mutex_lock(&adev->pm.mutex); 1975 r = smu_v13_0_6_request_i2c_xfer(smu, req); 1976 if (r) 1977 goto fail; 1978 1979 for (c = i = 0; i < num_msgs; i++) { 1980 if (!(msg[i].flags & I2C_M_RD)) { 1981 c += msg[i].len; 1982 continue; 1983 } 1984 for (j = 0; j < msg[i].len; j++, c++) { 1985 SwI2cCmd_t *cmd = &res->SwI2cCmds[c]; 1986 1987 msg[i].buf[j] = cmd->ReadWriteData; 1988 } 1989 } 1990 r = num_msgs; 1991fail: 1992 mutex_unlock(&adev->pm.mutex); 1993 kfree(req); 1994 return r; 1995} 1996 1997static u32 smu_v13_0_6_i2c_func(struct i2c_adapter *adap) 1998{ 1999 return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; 2000} 2001 2002static const struct i2c_algorithm smu_v13_0_6_i2c_algo = { 2003 .master_xfer = smu_v13_0_6_i2c_xfer, 2004 .functionality = smu_v13_0_6_i2c_func, 2005}; 2006 2007static const struct i2c_adapter_quirks smu_v13_0_6_i2c_control_quirks = { 2008 .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN, 2009 .max_read_len = MAX_SW_I2C_COMMANDS, 2010 .max_write_len = MAX_SW_I2C_COMMANDS, 2011 .max_comb_1st_msg_len = 2, 2012 .max_comb_2nd_msg_len = MAX_SW_I2C_COMMANDS - 2, 2013}; 2014 2015static int smu_v13_0_6_i2c_control_init(struct smu_context *smu) 2016{ 2017 struct amdgpu_device *adev = smu->adev; 2018 int res, i; 2019 2020 for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { 2021 struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; 2022 struct i2c_adapter *control = &smu_i2c->adapter; 2023 2024 smu_i2c->adev = adev; 2025 smu_i2c->port = i; 2026 mutex_init(&smu_i2c->mutex); 2027 control->owner = THIS_MODULE; 2028 control->dev.parent = &adev->pdev->dev; 2029 control->algo = &smu_v13_0_6_i2c_algo; 2030 snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i); 2031 control->quirks = &smu_v13_0_6_i2c_control_quirks; 2032 i2c_set_adapdata(control, smu_i2c); 2033 2034 res = i2c_add_adapter(control); 2035 if (res) { 2036 DRM_ERROR("Failed to register hw i2c, err: %d\n", res); 2037 goto Out_err; 2038 } 2039 } 2040 2041 adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; 2042 adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; 2043 2044 return 0; 2045Out_err: 2046 for ( ; i >= 0; i--) { 2047 struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; 2048 struct i2c_adapter *control = &smu_i2c->adapter; 2049 2050 i2c_del_adapter(control); 2051 } 2052 return res; 2053} 2054 2055static void smu_v13_0_6_i2c_control_fini(struct smu_context *smu) 2056{ 2057 struct amdgpu_device *adev = smu->adev; 2058 int i; 2059 2060 for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { 2061 struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; 2062 struct i2c_adapter *control = &smu_i2c->adapter; 2063 2064 i2c_del_adapter(control); 2065 } 2066 adev->pm.ras_eeprom_i2c_bus = NULL; 2067 adev->pm.fru_eeprom_i2c_bus = NULL; 2068} 2069 2070static void smu_v13_0_6_get_unique_id(struct smu_context *smu) 2071{ 2072 struct amdgpu_device *adev = smu->adev; 2073 struct smu_table_context *smu_table = &smu->smu_table; 2074 struct PPTable_t *pptable = 2075 (struct PPTable_t *)smu_table->driver_pptable; 2076 2077 adev->unique_id = pptable->PublicSerialNumber_AID; 2078} 2079 2080static bool smu_v13_0_6_is_baco_supported(struct smu_context *smu) 2081{ 2082 /* smu_13_0_6 does not support baco */ 2083 2084 return false; 2085} 2086 2087static const char *const throttling_logging_label[] = { 2088 [THROTTLER_PROCHOT_BIT] = "Prochot", 2089 [THROTTLER_PPT_BIT] = "PPT", 2090 [THROTTLER_THERMAL_SOCKET_BIT] = "SOC", 2091 [THROTTLER_THERMAL_VR_BIT] = "VR", 2092 [THROTTLER_THERMAL_HBM_BIT] = "HBM" 2093}; 2094 2095static void smu_v13_0_6_log_thermal_throttling_event(struct smu_context *smu) 2096{ 2097 int throttler_idx, throttling_events = 0, buf_idx = 0; 2098 struct amdgpu_device *adev = smu->adev; 2099 uint32_t throttler_status; 2100 char log_buf[256]; 2101 2102 throttler_status = smu_v13_0_6_get_throttler_status(smu); 2103 if (!throttler_status) 2104 return; 2105 2106 memset(log_buf, 0, sizeof(log_buf)); 2107 for (throttler_idx = 0; 2108 throttler_idx < ARRAY_SIZE(throttling_logging_label); 2109 throttler_idx++) { 2110 if (throttler_status & (1U << throttler_idx)) { 2111 throttling_events++; 2112 buf_idx += snprintf( 2113 log_buf + buf_idx, sizeof(log_buf) - buf_idx, 2114 "%s%s", throttling_events > 1 ? " and " : "", 2115 throttling_logging_label[throttler_idx]); 2116 if (buf_idx >= sizeof(log_buf)) { 2117 dev_err(adev->dev, "buffer overflow!\n"); 2118 log_buf[sizeof(log_buf) - 1] = '\0'; 2119 break; 2120 } 2121 } 2122 } 2123 2124 dev_warn(adev->dev, 2125 "WARN: GPU is throttled, expect performance decrease. %s.\n", 2126 log_buf); 2127 kgd2kfd_smi_event_throttle( 2128 smu->adev->kfd.dev, 2129 smu_cmn_get_indep_throttler_status(throttler_status, 2130 smu_v13_0_6_throttler_map)); 2131} 2132 2133static int 2134smu_v13_0_6_get_current_pcie_link_width_level(struct smu_context *smu) 2135{ 2136 struct amdgpu_device *adev = smu->adev; 2137 2138 return REG_GET_FIELD(RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL), 2139 PCIE_LC_LINK_WIDTH_CNTL, LC_LINK_WIDTH_RD); 2140} 2141 2142static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu) 2143{ 2144 struct amdgpu_device *adev = smu->adev; 2145 uint32_t speed_level; 2146 uint32_t esm_ctrl; 2147 2148 /* TODO: confirm this on real target */ 2149 esm_ctrl = RREG32_PCIE(smnPCIE_ESM_CTRL); 2150 if ((esm_ctrl >> 15) & 0x1) 2151 return (((esm_ctrl >> 8) & 0x7F) + 128); 2152 2153 speed_level = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) & 2154 PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) 2155 >> PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; 2156 if (speed_level > LINK_SPEED_MAX) 2157 speed_level = 0; 2158 2159 return pcie_gen_to_speed(speed_level + 1); 2160} 2161 2162static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table) 2163{ 2164 struct smu_table_context *smu_table = &smu->smu_table; 2165 struct gpu_metrics_v1_5 *gpu_metrics = 2166 (struct gpu_metrics_v1_5 *)smu_table->gpu_metrics_table; 2167 struct amdgpu_device *adev = smu->adev; 2168 int ret = 0, xcc_id, inst, i, j; 2169 MetricsTableX_t *metrics_x; 2170 MetricsTableA_t *metrics_a; 2171 u16 link_width_level; 2172 2173 metrics_x = kzalloc(max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), GFP_KERNEL); 2174 ret = smu_v13_0_6_get_metrics_table(smu, metrics_x, true); 2175 if (ret) { 2176 kfree(metrics_x); 2177 return ret; 2178 } 2179 2180 metrics_a = (MetricsTableA_t *)metrics_x; 2181 2182 smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 5); 2183 2184 gpu_metrics->temperature_hotspot = 2185 SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)); 2186 /* Individual HBM stack temperature is not reported */ 2187 gpu_metrics->temperature_mem = 2188 SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)); 2189 /* Reports max temperature of all voltage rails */ 2190 gpu_metrics->temperature_vrsoc = 2191 SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)); 2192 2193 gpu_metrics->average_gfx_activity = 2194 SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy)); 2195 gpu_metrics->average_umc_activity = 2196 SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization)); 2197 2198 gpu_metrics->curr_socket_power = 2199 SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)); 2200 /* Energy counter reported in 15.259uJ (2^-16) units */ 2201 gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc); 2202 2203 for (i = 0; i < MAX_GFX_CLKS; i++) { 2204 xcc_id = GET_INST(GC, i); 2205 if (xcc_id >= 0) 2206 gpu_metrics->current_gfxclk[i] = 2207 SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]); 2208 2209 if (i < MAX_CLKS) { 2210 gpu_metrics->current_socclk[i] = 2211 SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[i]); 2212 inst = GET_INST(VCN, i); 2213 if (inst >= 0) { 2214 gpu_metrics->current_vclk0[i] = 2215 SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[inst]); 2216 gpu_metrics->current_dclk0[i] = 2217 SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[inst]); 2218 } 2219 } 2220 } 2221 2222 gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency)); 2223 2224 /* Throttle status is not reported through metrics now */ 2225 gpu_metrics->throttle_status = 0; 2226 2227 /* Clock Lock Status. Each bit corresponds to each GFXCLK instance */ 2228 gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak) >> GET_INST(GC, 0); 2229 2230 if (!(adev->flags & AMD_IS_APU)) { 2231 if (!amdgpu_sriov_vf(adev)) { 2232 link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu); 2233 if (link_width_level > MAX_LINK_WIDTH) 2234 link_width_level = 0; 2235 2236 gpu_metrics->pcie_link_width = 2237 DECODE_LANE_WIDTH(link_width_level); 2238 gpu_metrics->pcie_link_speed = 2239 smu_v13_0_6_get_current_pcie_link_speed(smu); 2240 } 2241 gpu_metrics->pcie_bandwidth_acc = 2242 SMUQ10_ROUND(metrics_x->PcieBandwidthAcc[0]); 2243 gpu_metrics->pcie_bandwidth_inst = 2244 SMUQ10_ROUND(metrics_x->PcieBandwidth[0]); 2245 gpu_metrics->pcie_l0_to_recov_count_acc = 2246 metrics_x->PCIeL0ToRecoveryCountAcc; 2247 gpu_metrics->pcie_replay_count_acc = 2248 metrics_x->PCIenReplayAAcc; 2249 gpu_metrics->pcie_replay_rover_count_acc = 2250 metrics_x->PCIenReplayARolloverCountAcc; 2251 gpu_metrics->pcie_nak_sent_count_acc = 2252 metrics_x->PCIeNAKSentCountAcc; 2253 gpu_metrics->pcie_nak_rcvd_count_acc = 2254 metrics_x->PCIeNAKReceivedCountAcc; 2255 } 2256 2257 gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); 2258 2259 gpu_metrics->gfx_activity_acc = 2260 SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc)); 2261 gpu_metrics->mem_activity_acc = 2262 SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc)); 2263 2264 for (i = 0; i < NUM_XGMI_LINKS; i++) { 2265 gpu_metrics->xgmi_read_data_acc[i] = 2266 SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc)[i]); 2267 gpu_metrics->xgmi_write_data_acc[i] = 2268 SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc)[i]); 2269 } 2270 2271 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 2272 inst = GET_INST(JPEG, i); 2273 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 2274 gpu_metrics->jpeg_activity[(i * adev->jpeg.num_jpeg_rings) + j] = 2275 SMUQ10_ROUND(GET_METRIC_FIELD(JpegBusy) 2276 [(inst * adev->jpeg.num_jpeg_rings) + j]); 2277 } 2278 } 2279 2280 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 2281 inst = GET_INST(VCN, i); 2282 gpu_metrics->vcn_activity[i] = 2283 SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy)[inst]); 2284 } 2285 2286 gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth)); 2287 gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate)); 2288 2289 gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp); 2290 2291 *table = (void *)gpu_metrics; 2292 kfree(metrics_x); 2293 2294 return sizeof(*gpu_metrics); 2295} 2296 2297static void smu_v13_0_6_restore_pci_config(struct smu_context *smu) 2298{ 2299 struct amdgpu_device *adev = smu->adev; 2300 int i; 2301 2302 for (i = 0; i < 16; i++) 2303 pci_write_config_dword(adev->pdev, i * 4, 2304 adev->pdev->saved_config_space[i]); 2305 pci_restore_msi_state(adev->pdev); 2306} 2307 2308static int smu_v13_0_6_mode2_reset(struct smu_context *smu) 2309{ 2310 int ret = 0, index; 2311 struct amdgpu_device *adev = smu->adev; 2312 int timeout = 10; 2313 2314 index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG, 2315 SMU_MSG_GfxDeviceDriverReset); 2316 2317 mutex_lock(&smu->message_lock); 2318 2319 ret = smu_cmn_send_msg_without_waiting(smu, (uint16_t)index, 2320 SMU_RESET_MODE_2); 2321 2322 /* Reset takes a bit longer, wait for 200ms. */ 2323 msleep(200); 2324 2325 dev_dbg(smu->adev->dev, "restore config space...\n"); 2326 /* Restore the config space saved during init */ 2327 amdgpu_device_load_pci_state(adev->pdev); 2328 2329 /* Certain platforms have switches which assign virtual BAR values to 2330 * devices. OS uses the virtual BAR values and device behind the switch 2331 * is assgined another BAR value. When device's config space registers 2332 * are queried, switch returns the virtual BAR values. When mode-2 reset 2333 * is performed, switch is unaware of it, and will continue to return 2334 * the same virtual values to the OS.This affects 2335 * pci_restore_config_space() API as it doesn't write the value saved if 2336 * the current value read from config space is the same as what is 2337 * saved. As a workaround, make sure the config space is restored 2338 * always. 2339 */ 2340 if (!(adev->flags & AMD_IS_APU)) 2341 smu_v13_0_6_restore_pci_config(smu); 2342 2343 dev_dbg(smu->adev->dev, "wait for reset ack\n"); 2344 do { 2345 ret = smu_cmn_wait_for_response(smu); 2346 /* Wait a bit more time for getting ACK */ 2347 if (ret == -ETIME) { 2348 --timeout; 2349 usleep_range(500, 1000); 2350 continue; 2351 } 2352 2353 if (ret) 2354 goto out; 2355 2356 } while (ret == -ETIME && timeout); 2357 2358out: 2359 mutex_unlock(&smu->message_lock); 2360 2361 if (ret) 2362 dev_err(adev->dev, "failed to send mode2 reset, error code %d", 2363 ret); 2364 2365 return ret; 2366} 2367 2368static int smu_v13_0_6_get_thermal_temperature_range(struct smu_context *smu, 2369 struct smu_temperature_range *range) 2370{ 2371 struct amdgpu_device *adev = smu->adev; 2372 u32 aid_temp, xcd_temp, max_temp; 2373 u32 ccd_temp = 0; 2374 int ret; 2375 2376 if (amdgpu_sriov_vf(smu->adev)) 2377 return 0; 2378 2379 if (!range) 2380 return -EINVAL; 2381 2382 /*Check smu version, GetCtfLimit message only supported for smu version 85.69 or higher */ 2383 if (smu->smc_fw_version < 0x554500) 2384 return 0; 2385 2386 /* Get SOC Max operating temperature */ 2387 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, 2388 PPSMC_AID_THM_TYPE, &aid_temp); 2389 if (ret) 2390 goto failed; 2391 if (adev->flags & AMD_IS_APU) { 2392 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, 2393 PPSMC_CCD_THM_TYPE, &ccd_temp); 2394 if (ret) 2395 goto failed; 2396 } 2397 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, 2398 PPSMC_XCD_THM_TYPE, &xcd_temp); 2399 if (ret) 2400 goto failed; 2401 range->hotspot_emergency_max = max3(aid_temp, xcd_temp, ccd_temp) * 2402 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 2403 2404 /* Get HBM Max operating temperature */ 2405 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, 2406 PPSMC_HBM_THM_TYPE, &max_temp); 2407 if (ret) 2408 goto failed; 2409 range->mem_emergency_max = 2410 max_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 2411 2412 /* Get SOC thermal throttle limit */ 2413 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetThermalLimit, 2414 PPSMC_THROTTLING_LIMIT_TYPE_SOCKET, 2415 &max_temp); 2416 if (ret) 2417 goto failed; 2418 range->hotspot_crit_max = 2419 max_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 2420 2421 /* Get HBM thermal throttle limit */ 2422 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetThermalLimit, 2423 PPSMC_THROTTLING_LIMIT_TYPE_HBM, 2424 &max_temp); 2425 if (ret) 2426 goto failed; 2427 2428 range->mem_crit_max = max_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 2429 2430failed: 2431 return ret; 2432} 2433 2434static int smu_v13_0_6_mode1_reset(struct smu_context *smu) 2435{ 2436 struct amdgpu_device *adev = smu->adev; 2437 struct amdgpu_hive_info *hive = NULL; 2438 u32 hive_ras_recovery = 0; 2439 struct amdgpu_ras *ras; 2440 u32 fatal_err, param; 2441 int ret = 0; 2442 2443 hive = amdgpu_get_xgmi_hive(adev); 2444 ras = amdgpu_ras_get_context(adev); 2445 fatal_err = 0; 2446 param = SMU_RESET_MODE_1; 2447 2448 if (hive) { 2449 hive_ras_recovery = atomic_read(&hive->ras_recovery); 2450 amdgpu_put_xgmi_hive(hive); 2451 } 2452 2453 /* fatal error triggered by ras, PMFW supports the flag */ 2454 if (ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) 2455 fatal_err = 1; 2456 2457 param |= (fatal_err << 16); 2458 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, 2459 param, NULL); 2460 2461 if (!ret) 2462 msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS); 2463 2464 return ret; 2465} 2466 2467static bool smu_v13_0_6_is_mode1_reset_supported(struct smu_context *smu) 2468{ 2469 return true; 2470} 2471 2472static bool smu_v13_0_6_is_mode2_reset_supported(struct smu_context *smu) 2473{ 2474 return true; 2475} 2476 2477static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu, 2478 uint32_t size) 2479{ 2480 int ret = 0; 2481 2482 /* message SMU to update the bad page number on SMUBUS */ 2483 ret = smu_cmn_send_smc_msg_with_param( 2484 smu, SMU_MSG_SetNumBadHbmPagesRetired, size, NULL); 2485 if (ret) 2486 dev_err(smu->adev->dev, 2487 "[%s] failed to message SMU to update HBM bad pages number\n", 2488 __func__); 2489 2490 return ret; 2491} 2492 2493static int smu_v13_0_6_send_rma_reason(struct smu_context *smu) 2494{ 2495 struct amdgpu_device *adev = smu->adev; 2496 int ret; 2497 2498 /* NOTE: the message is only valid on dGPU with pmfw 85.90.0 and above */ 2499 if ((adev->flags & AMD_IS_APU) || smu->smc_fw_version < 0x00555a00) 2500 return 0; 2501 2502 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RmaDueToBadPageThreshold, NULL); 2503 if (ret) 2504 dev_err(smu->adev->dev, 2505 "[%s] failed to send BadPageThreshold event to SMU\n", 2506 __func__); 2507 2508 return ret; 2509} 2510 2511static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) 2512{ 2513 struct smu_context *smu = adev->powerplay.pp_handle; 2514 2515 return smu_v13_0_6_mca_set_debug_mode(smu, enable); 2516} 2517 2518static int smu_v13_0_6_get_valid_mca_count(struct smu_context *smu, enum amdgpu_mca_error_type type, uint32_t *count) 2519{ 2520 uint32_t msg; 2521 int ret; 2522 2523 if (!count) 2524 return -EINVAL; 2525 2526 switch (type) { 2527 case AMDGPU_MCA_ERROR_TYPE_UE: 2528 msg = SMU_MSG_QueryValidMcaCount; 2529 break; 2530 case AMDGPU_MCA_ERROR_TYPE_CE: 2531 msg = SMU_MSG_QueryValidMcaCeCount; 2532 break; 2533 default: 2534 return -EINVAL; 2535 } 2536 2537 ret = smu_cmn_send_smc_msg(smu, msg, count); 2538 if (ret) { 2539 *count = 0; 2540 return ret; 2541 } 2542 2543 return 0; 2544} 2545 2546static int __smu_v13_0_6_mca_dump_bank(struct smu_context *smu, enum amdgpu_mca_error_type type, 2547 int idx, int offset, uint32_t *val) 2548{ 2549 uint32_t msg, param; 2550 2551 switch (type) { 2552 case AMDGPU_MCA_ERROR_TYPE_UE: 2553 msg = SMU_MSG_McaBankDumpDW; 2554 break; 2555 case AMDGPU_MCA_ERROR_TYPE_CE: 2556 msg = SMU_MSG_McaBankCeDumpDW; 2557 break; 2558 default: 2559 return -EINVAL; 2560 } 2561 2562 param = ((idx & 0xffff) << 16) | (offset & 0xfffc); 2563 2564 return smu_cmn_send_smc_msg_with_param(smu, msg, param, val); 2565} 2566 2567static int smu_v13_0_6_mca_dump_bank(struct smu_context *smu, enum amdgpu_mca_error_type type, 2568 int idx, int offset, uint32_t *val, int count) 2569{ 2570 int ret, i; 2571 2572 if (!val) 2573 return -EINVAL; 2574 2575 for (i = 0; i < count; i++) { 2576 ret = __smu_v13_0_6_mca_dump_bank(smu, type, idx, offset + (i << 2), &val[i]); 2577 if (ret) 2578 return ret; 2579 } 2580 2581 return 0; 2582} 2583 2584static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT] = { 2585 MCA_BANK_IPID(UMC, 0x96, 0x0), 2586 MCA_BANK_IPID(SMU, 0x01, 0x1), 2587 MCA_BANK_IPID(MP5, 0x01, 0x2), 2588 MCA_BANK_IPID(PCS_XGMI, 0x50, 0x0), 2589}; 2590 2591static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info) 2592{ 2593 u64 ipid = entry->regs[MCA_REG_IDX_IPID]; 2594 u32 instidhi, instid; 2595 2596 /* NOTE: All MCA IPID register share the same format, 2597 * so the driver can share the MCMP1 register header file. 2598 * */ 2599 2600 info->hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); 2601 info->mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); 2602 2603 /* 2604 * Unfied DieID Format: SAASS. A:AID, S:Socket. 2605 * Unfied DieID[4] = InstanceId[0] 2606 * Unfied DieID[0:3] = InstanceIdHi[0:3] 2607 */ 2608 instidhi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi); 2609 instid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo); 2610 info->aid = ((instidhi >> 2) & 0x03); 2611 info->socket_id = ((instid & 0x1) << 2) | (instidhi & 0x03); 2612} 2613 2614static int mca_bank_read_reg(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, 2615 int idx, int reg_idx, uint64_t *val) 2616{ 2617 struct smu_context *smu = adev->powerplay.pp_handle; 2618 uint32_t data[2] = {0, 0}; 2619 int ret; 2620 2621 if (!val || reg_idx >= MCA_REG_IDX_COUNT) 2622 return -EINVAL; 2623 2624 ret = smu_v13_0_6_mca_dump_bank(smu, type, idx, reg_idx * 8, data, ARRAY_SIZE(data)); 2625 if (ret) 2626 return ret; 2627 2628 *val = (uint64_t)data[1] << 32 | data[0]; 2629 2630 dev_dbg(adev->dev, "mca read bank reg: type:%s, index: %d, reg_idx: %d, val: 0x%016llx\n", 2631 type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE", idx, reg_idx, *val); 2632 2633 return 0; 2634} 2635 2636static int mca_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, 2637 int idx, struct mca_bank_entry *entry) 2638{ 2639 int i, ret; 2640 2641 /* NOTE: populated all mca register by default */ 2642 for (i = 0; i < ARRAY_SIZE(entry->regs); i++) { 2643 ret = mca_bank_read_reg(adev, type, idx, i, &entry->regs[i]); 2644 if (ret) 2645 return ret; 2646 } 2647 2648 entry->idx = idx; 2649 entry->type = type; 2650 2651 mca_bank_entry_info_decode(entry, &entry->info); 2652 2653 return 0; 2654} 2655 2656static int mca_decode_ipid_to_hwip(uint64_t val) 2657{ 2658 const struct mca_bank_ipid *ipid; 2659 uint16_t hwid, mcatype; 2660 int i; 2661 2662 hwid = REG_GET_FIELD(val, MCMP1_IPIDT0, HardwareID); 2663 mcatype = REG_GET_FIELD(val, MCMP1_IPIDT0, McaType); 2664 2665 for (i = 0; i < ARRAY_SIZE(smu_v13_0_6_mca_ipid_table); i++) { 2666 ipid = &smu_v13_0_6_mca_ipid_table[i]; 2667 2668 if (!ipid->hwid) 2669 continue; 2670 2671 if (ipid->hwid == hwid && ipid->mcatype == mcatype) 2672 return i; 2673 } 2674 2675 return AMDGPU_MCA_IP_UNKNOW; 2676} 2677 2678static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 2679 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) 2680{ 2681 uint64_t status0; 2682 uint32_t ext_error_code; 2683 uint32_t odecc_err_cnt; 2684 2685 status0 = entry->regs[MCA_REG_IDX_STATUS]; 2686 ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status0); 2687 odecc_err_cnt = MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]); 2688 2689 if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { 2690 *count = 0; 2691 return 0; 2692 } 2693 2694 if (umc_v12_0_is_deferred_error(adev, status0) || 2695 umc_v12_0_is_uncorrectable_error(adev, status0) || 2696 umc_v12_0_is_correctable_error(adev, status0)) 2697 *count = (ext_error_code == 0) ? odecc_err_cnt : 1; 2698 2699 return 0; 2700} 2701 2702static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 2703 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, 2704 uint32_t *count) 2705{ 2706 u32 ext_error_code; 2707 u32 err_cnt; 2708 2709 ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]); 2710 err_cnt = MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]); 2711 2712 if (type == AMDGPU_MCA_ERROR_TYPE_UE && ext_error_code == 0) 2713 *count = err_cnt; 2714 else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6) 2715 *count = err_cnt; 2716 2717 return 0; 2718} 2719 2720static bool mca_smu_check_error_code(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, 2721 uint32_t errcode) 2722{ 2723 int i; 2724 2725 if (!mca_ras->err_code_count || !mca_ras->err_code_array) 2726 return true; 2727 2728 for (i = 0; i < mca_ras->err_code_count; i++) { 2729 if (errcode == mca_ras->err_code_array[i]) 2730 return true; 2731 } 2732 2733 return false; 2734} 2735 2736static int mca_gfx_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 2737 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) 2738{ 2739 uint64_t status0, misc0; 2740 2741 status0 = entry->regs[MCA_REG_IDX_STATUS]; 2742 if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { 2743 *count = 0; 2744 return 0; 2745 } 2746 2747 if (type == AMDGPU_MCA_ERROR_TYPE_UE && 2748 REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 && 2749 REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) { 2750 *count = 1; 2751 return 0; 2752 } else { 2753 misc0 = entry->regs[MCA_REG_IDX_MISC0]; 2754 *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt); 2755 } 2756 2757 return 0; 2758} 2759 2760static int mca_smu_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 2761 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) 2762{ 2763 uint64_t status0, misc0; 2764 2765 status0 = entry->regs[MCA_REG_IDX_STATUS]; 2766 if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { 2767 *count = 0; 2768 return 0; 2769 } 2770 2771 if (type == AMDGPU_MCA_ERROR_TYPE_UE && 2772 REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 && 2773 REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) { 2774 if (count) 2775 *count = 1; 2776 return 0; 2777 } 2778 2779 misc0 = entry->regs[MCA_REG_IDX_MISC0]; 2780 *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt); 2781 2782 return 0; 2783} 2784 2785static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 2786 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) 2787{ 2788 uint32_t instlo; 2789 2790 instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); 2791 instlo &= GENMASK(31, 1); 2792 switch (instlo) { 2793 case 0x36430400: /* SMNAID XCD 0 */ 2794 case 0x38430400: /* SMNAID XCD 1 */ 2795 case 0x40430400: /* SMNXCD XCD 0, NOTE: FIXME: fix this error later */ 2796 return true; 2797 default: 2798 return false; 2799 } 2800 2801 return false; 2802}; 2803 2804static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 2805 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) 2806{ 2807 struct smu_context *smu = adev->powerplay.pp_handle; 2808 uint32_t errcode, instlo; 2809 2810 instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); 2811 instlo &= GENMASK(31, 1); 2812 if (instlo != 0x03b30400) 2813 return false; 2814 2815 if (!(adev->flags & AMD_IS_APU) && smu->smc_fw_version >= 0x00555600) { 2816 errcode = MCA_REG__SYND__ERRORINFORMATION(entry->regs[MCA_REG_IDX_SYND]); 2817 errcode &= 0xff; 2818 } else { 2819 errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); 2820 } 2821 2822 return mca_smu_check_error_code(adev, mca_ras, errcode); 2823} 2824 2825static int sdma_err_codes[] = { CODE_SDMA0, CODE_SDMA1, CODE_SDMA2, CODE_SDMA3 }; 2826static int mmhub_err_codes[] = { 2827 CODE_DAGB0, CODE_DAGB0 + 1, CODE_DAGB0 + 2, CODE_DAGB0 + 3, CODE_DAGB0 + 4, /* DAGB0-4 */ 2828 CODE_EA0, CODE_EA0 + 1, CODE_EA0 + 2, CODE_EA0 + 3, CODE_EA0 + 4, /* MMEA0-4*/ 2829 CODE_VML2, CODE_VML2_WALKER, CODE_MMCANE, 2830}; 2831 2832static const struct mca_ras_info mca_ras_table[] = { 2833 { 2834 .blkid = AMDGPU_RAS_BLOCK__UMC, 2835 .ip = AMDGPU_MCA_IP_UMC, 2836 .get_err_count = mca_umc_mca_get_err_count, 2837 }, { 2838 .blkid = AMDGPU_RAS_BLOCK__GFX, 2839 .ip = AMDGPU_MCA_IP_SMU, 2840 .get_err_count = mca_gfx_mca_get_err_count, 2841 .bank_is_valid = mca_gfx_smu_bank_is_valid, 2842 }, { 2843 .blkid = AMDGPU_RAS_BLOCK__SDMA, 2844 .ip = AMDGPU_MCA_IP_SMU, 2845 .err_code_array = sdma_err_codes, 2846 .err_code_count = ARRAY_SIZE(sdma_err_codes), 2847 .get_err_count = mca_smu_mca_get_err_count, 2848 .bank_is_valid = mca_smu_bank_is_valid, 2849 }, { 2850 .blkid = AMDGPU_RAS_BLOCK__MMHUB, 2851 .ip = AMDGPU_MCA_IP_SMU, 2852 .err_code_array = mmhub_err_codes, 2853 .err_code_count = ARRAY_SIZE(mmhub_err_codes), 2854 .get_err_count = mca_smu_mca_get_err_count, 2855 .bank_is_valid = mca_smu_bank_is_valid, 2856 }, { 2857 .blkid = AMDGPU_RAS_BLOCK__XGMI_WAFL, 2858 .ip = AMDGPU_MCA_IP_PCS_XGMI, 2859 .get_err_count = mca_pcs_xgmi_mca_get_err_count, 2860 }, 2861}; 2862 2863static const struct mca_ras_info *mca_get_mca_ras_info(struct amdgpu_device *adev, enum amdgpu_ras_block blkid) 2864{ 2865 int i; 2866 2867 for (i = 0; i < ARRAY_SIZE(mca_ras_table); i++) { 2868 if (mca_ras_table[i].blkid == blkid) 2869 return &mca_ras_table[i]; 2870 } 2871 2872 return NULL; 2873} 2874 2875static int mca_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count) 2876{ 2877 struct smu_context *smu = adev->powerplay.pp_handle; 2878 int ret; 2879 2880 switch (type) { 2881 case AMDGPU_MCA_ERROR_TYPE_UE: 2882 case AMDGPU_MCA_ERROR_TYPE_CE: 2883 ret = smu_v13_0_6_get_valid_mca_count(smu, type, count); 2884 break; 2885 default: 2886 ret = -EINVAL; 2887 break; 2888 } 2889 2890 return ret; 2891} 2892 2893static bool mca_bank_is_valid(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, 2894 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) 2895{ 2896 if (mca_decode_ipid_to_hwip(entry->regs[MCA_REG_IDX_IPID]) != mca_ras->ip) 2897 return false; 2898 2899 if (mca_ras->bank_is_valid) 2900 return mca_ras->bank_is_valid(mca_ras, adev, type, entry); 2901 2902 return true; 2903} 2904 2905static int __mca_smu_get_ras_mca_set(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, 2906 enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set) 2907{ 2908 struct mca_bank_entry entry; 2909 uint32_t mca_cnt; 2910 int i, ret; 2911 2912 ret = mca_get_valid_mca_count(adev, type, &mca_cnt); 2913 if (ret) 2914 return ret; 2915 2916 /* if valid mca bank count is 0, the driver can return 0 directly */ 2917 if (!mca_cnt) 2918 return 0; 2919 2920 for (i = 0; i < mca_cnt; i++) { 2921 memset(&entry, 0, sizeof(entry)); 2922 ret = mca_get_mca_entry(adev, type, i, &entry); 2923 if (ret) 2924 return ret; 2925 2926 if (mca_ras && !mca_bank_is_valid(adev, mca_ras, type, &entry)) 2927 continue; 2928 2929 ret = amdgpu_mca_bank_set_add_entry(mca_set, &entry); 2930 if (ret) 2931 return ret; 2932 } 2933 2934 return 0; 2935} 2936 2937static int mca_smu_get_ras_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, 2938 enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set) 2939{ 2940 const struct mca_ras_info *mca_ras = NULL; 2941 2942 if (!mca_set) 2943 return -EINVAL; 2944 2945 if (blk != AMDGPU_RAS_BLOCK_COUNT) { 2946 mca_ras = mca_get_mca_ras_info(adev, blk); 2947 if (!mca_ras) 2948 return -EOPNOTSUPP; 2949 } 2950 2951 return __mca_smu_get_ras_mca_set(adev, mca_ras, type, mca_set); 2952} 2953 2954static int mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, 2955 struct mca_bank_entry *entry, uint32_t *count) 2956{ 2957 const struct mca_ras_info *mca_ras; 2958 2959 if (!entry || !count) 2960 return -EINVAL; 2961 2962 mca_ras = mca_get_mca_ras_info(adev, blk); 2963 if (!mca_ras) 2964 return -EOPNOTSUPP; 2965 2966 if (!mca_bank_is_valid(adev, mca_ras, type, entry)) { 2967 *count = 0; 2968 return 0; 2969 } 2970 2971 return mca_ras->get_err_count(mca_ras, adev, type, entry, count); 2972} 2973 2974static int mca_smu_get_mca_entry(struct amdgpu_device *adev, 2975 enum amdgpu_mca_error_type type, int idx, struct mca_bank_entry *entry) 2976{ 2977 return mca_get_mca_entry(adev, type, idx, entry); 2978} 2979 2980static int mca_smu_get_valid_mca_count(struct amdgpu_device *adev, 2981 enum amdgpu_mca_error_type type, uint32_t *count) 2982{ 2983 return mca_get_valid_mca_count(adev, type, count); 2984} 2985 2986static const struct amdgpu_mca_smu_funcs smu_v13_0_6_mca_smu_funcs = { 2987 .max_ue_count = 12, 2988 .max_ce_count = 12, 2989 .mca_set_debug_mode = mca_smu_set_debug_mode, 2990 .mca_get_ras_mca_set = mca_smu_get_ras_mca_set, 2991 .mca_parse_mca_error_count = mca_smu_parse_mca_error_count, 2992 .mca_get_mca_entry = mca_smu_get_mca_entry, 2993 .mca_get_valid_mca_count = mca_smu_get_valid_mca_count, 2994}; 2995 2996static int aca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) 2997{ 2998 struct smu_context *smu = adev->powerplay.pp_handle; 2999 3000 return smu_v13_0_6_mca_set_debug_mode(smu, enable); 3001} 3002 3003static int smu_v13_0_6_get_valid_aca_count(struct smu_context *smu, enum aca_error_type type, u32 *count) 3004{ 3005 uint32_t msg; 3006 int ret; 3007 3008 if (!count) 3009 return -EINVAL; 3010 3011 switch (type) { 3012 case ACA_ERROR_TYPE_UE: 3013 msg = SMU_MSG_QueryValidMcaCount; 3014 break; 3015 case ACA_ERROR_TYPE_CE: 3016 msg = SMU_MSG_QueryValidMcaCeCount; 3017 break; 3018 default: 3019 return -EINVAL; 3020 } 3021 3022 ret = smu_cmn_send_smc_msg(smu, msg, count); 3023 if (ret) { 3024 *count = 0; 3025 return ret; 3026 } 3027 3028 return 0; 3029} 3030 3031static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, 3032 enum aca_error_type type, u32 *count) 3033{ 3034 struct smu_context *smu = adev->powerplay.pp_handle; 3035 int ret; 3036 3037 switch (type) { 3038 case ACA_ERROR_TYPE_UE: 3039 case ACA_ERROR_TYPE_CE: 3040 ret = smu_v13_0_6_get_valid_aca_count(smu, type, count); 3041 break; 3042 default: 3043 ret = -EINVAL; 3044 break; 3045 } 3046 3047 return ret; 3048} 3049 3050static int __smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_error_type type, 3051 int idx, int offset, u32 *val) 3052{ 3053 uint32_t msg, param; 3054 3055 switch (type) { 3056 case ACA_ERROR_TYPE_UE: 3057 msg = SMU_MSG_McaBankDumpDW; 3058 break; 3059 case ACA_ERROR_TYPE_CE: 3060 msg = SMU_MSG_McaBankCeDumpDW; 3061 break; 3062 default: 3063 return -EINVAL; 3064 } 3065 3066 param = ((idx & 0xffff) << 16) | (offset & 0xfffc); 3067 3068 return smu_cmn_send_smc_msg_with_param(smu, msg, param, (uint32_t *)val); 3069} 3070 3071static int smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_error_type type, 3072 int idx, int offset, u32 *val, int count) 3073{ 3074 int ret, i; 3075 3076 if (!val) 3077 return -EINVAL; 3078 3079 for (i = 0; i < count; i++) { 3080 ret = __smu_v13_0_6_aca_bank_dump(smu, type, idx, offset + (i << 2), &val[i]); 3081 if (ret) 3082 return ret; 3083 } 3084 3085 return 0; 3086} 3087 3088static int aca_bank_read_reg(struct amdgpu_device *adev, enum aca_error_type type, 3089 int idx, int reg_idx, u64 *val) 3090{ 3091 struct smu_context *smu = adev->powerplay.pp_handle; 3092 u32 data[2] = {0, 0}; 3093 int ret; 3094 3095 if (!val || reg_idx >= ACA_REG_IDX_COUNT) 3096 return -EINVAL; 3097 3098 ret = smu_v13_0_6_aca_bank_dump(smu, type, idx, reg_idx * 8, data, ARRAY_SIZE(data)); 3099 if (ret) 3100 return ret; 3101 3102 *val = (u64)data[1] << 32 | data[0]; 3103 3104 dev_dbg(adev->dev, "mca read bank reg: type:%s, index: %d, reg_idx: %d, val: 0x%016llx\n", 3105 type == ACA_ERROR_TYPE_UE ? "UE" : "CE", idx, reg_idx, *val); 3106 3107 return 0; 3108} 3109 3110static int aca_smu_get_valid_aca_bank(struct amdgpu_device *adev, 3111 enum aca_error_type type, int idx, struct aca_bank *bank) 3112{ 3113 int i, ret, count; 3114 3115 count = min_t(int, 16, ARRAY_SIZE(bank->regs)); 3116 for (i = 0; i < count; i++) { 3117 ret = aca_bank_read_reg(adev, type, idx, i, &bank->regs[i]); 3118 if (ret) 3119 return ret; 3120 } 3121 3122 return 0; 3123} 3124 3125static const struct aca_smu_funcs smu_v13_0_6_aca_smu_funcs = { 3126 .max_ue_bank_count = 12, 3127 .max_ce_bank_count = 12, 3128 .set_debug_mode = aca_smu_set_debug_mode, 3129 .get_valid_aca_count = aca_smu_get_valid_aca_count, 3130 .get_valid_aca_bank = aca_smu_get_valid_aca_bank, 3131}; 3132 3133static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu, 3134 enum pp_xgmi_plpd_mode mode) 3135{ 3136 struct amdgpu_device *adev = smu->adev; 3137 int ret, param; 3138 3139 switch (mode) { 3140 case XGMI_PLPD_DEFAULT: 3141 param = PPSMC_PLPD_MODE_DEFAULT; 3142 break; 3143 case XGMI_PLPD_OPTIMIZED: 3144 param = PPSMC_PLPD_MODE_OPTIMIZED; 3145 break; 3146 case XGMI_PLPD_DISALLOW: 3147 param = 0; 3148 break; 3149 default: 3150 return -EINVAL; 3151 } 3152 3153 if (mode == XGMI_PLPD_DISALLOW) 3154 ret = smu_cmn_send_smc_msg_with_param(smu, 3155 SMU_MSG_GmiPwrDnControl, 3156 param, NULL); 3157 else 3158 /* change xgmi per-link power down policy */ 3159 ret = smu_cmn_send_smc_msg_with_param(smu, 3160 SMU_MSG_SelectPLPDMode, 3161 param, NULL); 3162 3163 if (ret) 3164 dev_err(adev->dev, 3165 "select xgmi per-link power down policy %d failed\n", 3166 mode); 3167 3168 return ret; 3169} 3170 3171static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { 3172 /* init dpm */ 3173 .get_allowed_feature_mask = smu_v13_0_6_get_allowed_feature_mask, 3174 /* dpm/clk tables */ 3175 .set_default_dpm_table = smu_v13_0_6_set_default_dpm_table, 3176 .populate_umd_state_clk = smu_v13_0_6_populate_umd_state_clk, 3177 .print_clk_levels = smu_v13_0_6_print_clk_levels, 3178 .force_clk_levels = smu_v13_0_6_force_clk_levels, 3179 .read_sensor = smu_v13_0_6_read_sensor, 3180 .set_performance_level = smu_v13_0_6_set_performance_level, 3181 .get_power_limit = smu_v13_0_6_get_power_limit, 3182 .is_dpm_running = smu_v13_0_6_is_dpm_running, 3183 .get_unique_id = smu_v13_0_6_get_unique_id, 3184 .init_microcode = smu_v13_0_6_init_microcode, 3185 .fini_microcode = smu_v13_0_fini_microcode, 3186 .init_smc_tables = smu_v13_0_6_init_smc_tables, 3187 .fini_smc_tables = smu_v13_0_fini_smc_tables, 3188 .init_power = smu_v13_0_init_power, 3189 .fini_power = smu_v13_0_fini_power, 3190 .check_fw_status = smu_v13_0_6_check_fw_status, 3191 /* pptable related */ 3192 .check_fw_version = smu_v13_0_check_fw_version, 3193 .set_driver_table_location = smu_v13_0_set_driver_table_location, 3194 .set_tool_table_location = smu_v13_0_set_tool_table_location, 3195 .notify_memory_pool_location = smu_v13_0_notify_memory_pool_location, 3196 .system_features_control = smu_v13_0_6_system_features_control, 3197 .send_smc_msg_with_param = smu_cmn_send_smc_msg_with_param, 3198 .send_smc_msg = smu_cmn_send_smc_msg, 3199 .get_enabled_mask = smu_v13_0_6_get_enabled_mask, 3200 .feature_is_enabled = smu_cmn_feature_is_enabled, 3201 .set_power_limit = smu_v13_0_6_set_power_limit, 3202 .set_xgmi_pstate = smu_v13_0_set_xgmi_pstate, 3203 .register_irq_handler = smu_v13_0_6_register_irq_handler, 3204 .enable_thermal_alert = smu_v13_0_enable_thermal_alert, 3205 .disable_thermal_alert = smu_v13_0_disable_thermal_alert, 3206 .setup_pptable = smu_v13_0_6_setup_pptable, 3207 .baco_is_support = smu_v13_0_6_is_baco_supported, 3208 .get_dpm_ultimate_freq = smu_v13_0_6_get_dpm_ultimate_freq, 3209 .set_soft_freq_limited_range = smu_v13_0_6_set_soft_freq_limited_range, 3210 .od_edit_dpm_table = smu_v13_0_6_usr_edit_dpm_table, 3211 .select_xgmi_plpd_policy = smu_v13_0_6_select_xgmi_plpd_policy, 3212 .log_thermal_throttling_event = smu_v13_0_6_log_thermal_throttling_event, 3213 .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, 3214 .get_gpu_metrics = smu_v13_0_6_get_gpu_metrics, 3215 .get_pm_metrics = smu_v13_0_6_get_pm_metrics, 3216 .get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range, 3217 .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported, 3218 .mode2_reset_is_support = smu_v13_0_6_is_mode2_reset_supported, 3219 .mode1_reset = smu_v13_0_6_mode1_reset, 3220 .mode2_reset = smu_v13_0_6_mode2_reset, 3221 .wait_for_event = smu_v13_0_wait_for_event, 3222 .i2c_init = smu_v13_0_6_i2c_control_init, 3223 .i2c_fini = smu_v13_0_6_i2c_control_fini, 3224 .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num, 3225 .send_rma_reason = smu_v13_0_6_send_rma_reason, 3226}; 3227 3228void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) 3229{ 3230 smu->ppt_funcs = &smu_v13_0_6_ppt_funcs; 3231 smu->message_map = smu_v13_0_6_message_map; 3232 smu->clock_map = smu_v13_0_6_clk_map; 3233 smu->feature_map = smu_v13_0_6_feature_mask_map; 3234 smu->table_map = smu_v13_0_6_table_map; 3235 smu->smc_driver_if_version = SMU13_0_6_DRIVER_IF_VERSION; 3236 smu_v13_0_set_smu_mailbox_registers(smu); 3237 amdgpu_mca_smu_init_funcs(smu->adev, &smu_v13_0_6_mca_smu_funcs); 3238 amdgpu_aca_set_smu_funcs(smu->adev, &smu_v13_0_6_aca_smu_funcs); 3239} 3240