1/* $NetBSD: amdgpu_discovery.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $ */ 2 3/* 4 * Copyright 2018 Advanced Micro Devices, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 */ 25 26#include <sys/cdefs.h> 27__KERNEL_RCSID(0, "$NetBSD: amdgpu_discovery.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $"); 28 29#include "amdgpu.h" 30#include "amdgpu_discovery.h" 31#include "soc15_common.h" 32#include "soc15_hw_ip.h" 33#include "nbio/nbio_2_3_offset.h" 34#include "discovery.h" 35 36#define mmRCC_CONFIG_MEMSIZE 0xde3 37#define mmMM_INDEX 0x0 38#define mmMM_INDEX_HI 0x6 39#define mmMM_DATA 0x1 40#define HW_ID_MAX 300 41 42const char *hw_id_names[HW_ID_MAX] = { 43 [MP1_HWID] = "MP1", 44 [MP2_HWID] = "MP2", 45 [THM_HWID] = "THM", 46 [SMUIO_HWID] = "SMUIO", 47 [FUSE_HWID] = "FUSE", 48 [CLKA_HWID] = "CLKA", 49 [PWR_HWID] = "PWR", 50 [GC_HWID] = "GC", 51 [UVD_HWID] = "UVD", 52 [AUDIO_AZ_HWID] = "AUDIO_AZ", 53 [ACP_HWID] = "ACP", 54 [DCI_HWID] = "DCI", 55 [DMU_HWID] = "DMU", 56 [DCO_HWID] = "DCO", 57 [DIO_HWID] = "DIO", 58 [XDMA_HWID] = "XDMA", 59 [DCEAZ_HWID] = "DCEAZ", 60 [DAZ_HWID] = "DAZ", 61 [SDPMUX_HWID] = "SDPMUX", 62 [NTB_HWID] = "NTB", 63 [IOHC_HWID] = "IOHC", 64 [L2IMU_HWID] = "L2IMU", 65 [VCE_HWID] = "VCE", 66 [MMHUB_HWID] = "MMHUB", 67 [ATHUB_HWID] = "ATHUB", 68 [DBGU_NBIO_HWID] = "DBGU_NBIO", 69 [DFX_HWID] = "DFX", 70 [DBGU0_HWID] = "DBGU0", 71 [DBGU1_HWID] = "DBGU1", 72 [OSSSYS_HWID] = "OSSSYS", 73 [HDP_HWID] = "HDP", 74 [SDMA0_HWID] = "SDMA0", 75 [SDMA1_HWID] = "SDMA1", 76 [ISP_HWID] = "ISP", 77 [DBGU_IO_HWID] = "DBGU_IO", 78 [DF_HWID] = "DF", 79 [CLKB_HWID] = "CLKB", 80 [FCH_HWID] = "FCH", 81 [DFX_DAP_HWID] = "DFX_DAP", 82 [L1IMU_PCIE_HWID] = "L1IMU_PCIE", 83 [L1IMU_NBIF_HWID] = "L1IMU_NBIF", 84 [L1IMU_IOAGR_HWID] = "L1IMU_IOAGR", 85 [L1IMU3_HWID] = "L1IMU3", 86 [L1IMU4_HWID] = "L1IMU4", 87 [L1IMU5_HWID] = "L1IMU5", 88 [L1IMU6_HWID] = "L1IMU6", 89 [L1IMU7_HWID] = "L1IMU7", 90 [L1IMU8_HWID] = "L1IMU8", 91 [L1IMU9_HWID] = "L1IMU9", 92 [L1IMU10_HWID] = "L1IMU10", 93 [L1IMU11_HWID] = "L1IMU11", 94 [L1IMU12_HWID] = "L1IMU12", 95 [L1IMU13_HWID] = "L1IMU13", 96 [L1IMU14_HWID] = "L1IMU14", 97 [L1IMU15_HWID] = "L1IMU15", 98 [WAFLC_HWID] = "WAFLC", 99 [FCH_USB_PD_HWID] = "FCH_USB_PD", 100 [PCIE_HWID] = "PCIE", 101 [PCS_HWID] = "PCS", 102 [DDCL_HWID] = "DDCL", 103 [SST_HWID] = "SST", 104 [IOAGR_HWID] = "IOAGR", 105 [NBIF_HWID] = "NBIF", 106 [IOAPIC_HWID] = "IOAPIC", 107 [SYSTEMHUB_HWID] = "SYSTEMHUB", 108 [NTBCCP_HWID] = "NTBCCP", 109 [UMC_HWID] = "UMC", 110 [SATA_HWID] = "SATA", 111 [USB_HWID] = "USB", 112 [CCXSEC_HWID] = "CCXSEC", 113 [XGMI_HWID] = "XGMI", 114 [XGBE_HWID] = "XGBE", 115 [MP0_HWID] = "MP0", 116}; 117 118static int hw_id_map[MAX_HWIP] = { 119 [GC_HWIP] = GC_HWID, 120 [HDP_HWIP] = HDP_HWID, 121 [SDMA0_HWIP] = SDMA0_HWID, 122 [SDMA1_HWIP] = SDMA1_HWID, 123 [MMHUB_HWIP] = MMHUB_HWID, 124 [ATHUB_HWIP] = ATHUB_HWID, 125 [NBIO_HWIP] = NBIF_HWID, 126 [MP0_HWIP] = MP0_HWID, 127 [MP1_HWIP] = MP1_HWID, 128 [UVD_HWIP] = UVD_HWID, 129 [VCE_HWIP] = VCE_HWID, 130 [DF_HWIP] = DF_HWID, 131 [DCE_HWIP] = DMU_HWID, 132 [OSSSYS_HWIP] = OSSSYS_HWID, 133 [SMUIO_HWIP] = SMUIO_HWID, 134 [PWR_HWIP] = PWR_HWID, 135 [NBIF_HWIP] = NBIF_HWID, 136 [THM_HWIP] = THM_HWID, 137 [CLK_HWIP] = CLKA_HWID, 138}; 139 140static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) 141{ 142 uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; 143 uint64_t pos = vram_size - DISCOVERY_TMR_SIZE; 144 145 amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, DISCOVERY_TMR_SIZE, false); 146 return 0; 147} 148 149static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size) 150{ 151 uint16_t checksum = 0; 152 int i; 153 154 for (i = 0; i < size; i++) 155 checksum += data[i]; 156 157 return checksum; 158} 159 160static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size, 161 uint16_t expected) 162{ 163 return !!(amdgpu_discovery_calculate_checksum(data, size) == expected); 164} 165 166int amdgpu_discovery_init(struct amdgpu_device *adev) 167{ 168 struct table_info *info; 169 struct binary_header *bhdr; 170 struct ip_discovery_header *ihdr; 171 struct gpu_info_header *ghdr; 172 uint16_t offset; 173 uint16_t size; 174 uint16_t checksum; 175 int r; 176 177 adev->discovery = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL); 178 if (!adev->discovery) 179 return -ENOMEM; 180 181 r = amdgpu_discovery_read_binary(adev, adev->discovery); 182 if (r) { 183 DRM_ERROR("failed to read ip discovery binary\n"); 184 goto out; 185 } 186 187 bhdr = (struct binary_header *)adev->discovery; 188 189 if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) { 190 DRM_ERROR("invalid ip discovery binary signature\n"); 191 r = -EINVAL; 192 goto out; 193 } 194 195 offset = offsetof(struct binary_header, binary_checksum) + 196 sizeof(bhdr->binary_checksum); 197 size = bhdr->binary_size - offset; 198 checksum = bhdr->binary_checksum; 199 200 if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, 201 size, checksum)) { 202 DRM_ERROR("invalid ip discovery binary checksum\n"); 203 r = -EINVAL; 204 goto out; 205 } 206 207 info = &bhdr->table_list[IP_DISCOVERY]; 208 offset = le16_to_cpu(info->offset); 209 checksum = le16_to_cpu(info->checksum); 210 ihdr = (struct ip_discovery_header *)(adev->discovery + offset); 211 212 if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) { 213 DRM_ERROR("invalid ip discovery data table signature\n"); 214 r = -EINVAL; 215 goto out; 216 } 217 218 if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, 219 ihdr->size, checksum)) { 220 DRM_ERROR("invalid ip discovery data table checksum\n"); 221 r = -EINVAL; 222 goto out; 223 } 224 225 info = &bhdr->table_list[GC]; 226 offset = le16_to_cpu(info->offset); 227 checksum = le16_to_cpu(info->checksum); 228 ghdr = (struct gpu_info_header *)(adev->discovery + offset); 229 230 if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, 231 ghdr->size, checksum)) { 232 DRM_ERROR("invalid gc data table checksum\n"); 233 r = -EINVAL; 234 goto out; 235 } 236 237 return 0; 238 239out: 240 kfree(adev->discovery); 241 adev->discovery = NULL; 242 243 return r; 244} 245 246void amdgpu_discovery_fini(struct amdgpu_device *adev) 247{ 248 kfree(adev->discovery); 249 adev->discovery = NULL; 250} 251 252int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) 253{ 254 struct binary_header *bhdr; 255 struct ip_discovery_header *ihdr; 256 struct die_header *dhdr; 257 struct ip *ip; 258 uint16_t die_offset; 259 uint16_t ip_offset; 260 uint16_t num_dies; 261 uint16_t num_ips; 262 uint8_t num_base_address; 263 int hw_ip; 264 int i, j, k; 265 266 if (!adev->discovery) { 267 DRM_ERROR("ip discovery uninitialized\n"); 268 return -EINVAL; 269 } 270 271 bhdr = (struct binary_header *)adev->discovery; 272 ihdr = (struct ip_discovery_header *)(adev->discovery + 273 le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); 274 num_dies = le16_to_cpu(ihdr->num_dies); 275 276 DRM_DEBUG("number of dies: %d\n", num_dies); 277 278 for (i = 0; i < num_dies; i++) { 279 die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); 280 dhdr = (struct die_header *)(adev->discovery + die_offset); 281 num_ips = le16_to_cpu(dhdr->num_ips); 282 ip_offset = die_offset + sizeof(*dhdr); 283 284 if (le16_to_cpu(dhdr->die_id) != i) { 285 DRM_ERROR("invalid die id %d, expected %d\n", 286 le16_to_cpu(dhdr->die_id), i); 287 return -EINVAL; 288 } 289 290 DRM_DEBUG("number of hardware IPs on die%d: %d\n", 291 le16_to_cpu(dhdr->die_id), num_ips); 292 293 for (j = 0; j < num_ips; j++) { 294 ip = (struct ip *)(adev->discovery + ip_offset); 295 num_base_address = ip->num_base_address; 296 297 DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n", 298 hw_id_names[le16_to_cpu(ip->hw_id)], 299 le16_to_cpu(ip->hw_id), 300 ip->number_instance, 301 ip->major, ip->minor, 302 ip->revision); 303 304 for (k = 0; k < num_base_address; k++) { 305 /* 306 * convert the endianness of base addresses in place, 307 * so that we don't need to convert them when accessing adev->reg_offset. 308 */ 309 ip->base_address[k] = le32_to_cpu(ip->base_address[k]); 310 DRM_DEBUG("\t0x%08x\n", ip->base_address[k]); 311 } 312 313 for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) { 314 if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) { 315 DRM_INFO("set register base offset for %s\n", 316 hw_id_names[le16_to_cpu(ip->hw_id)]); 317 adev->reg_offset[hw_ip][ip->number_instance] = 318 ip->base_address; 319 } 320 321 } 322 323 ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); 324 } 325 } 326 327 return 0; 328} 329 330int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, 331 int *major, int *minor, int *revision) 332{ 333 struct binary_header *bhdr; 334 struct ip_discovery_header *ihdr; 335 struct die_header *dhdr; 336 struct ip *ip; 337 uint16_t die_offset; 338 uint16_t ip_offset; 339 uint16_t num_dies; 340 uint16_t num_ips; 341 int i, j; 342 343 if (!adev->discovery) { 344 DRM_ERROR("ip discovery uninitialized\n"); 345 return -EINVAL; 346 } 347 348 bhdr = (struct binary_header *)adev->discovery; 349 ihdr = (struct ip_discovery_header *)(adev->discovery + 350 le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); 351 num_dies = le16_to_cpu(ihdr->num_dies); 352 353 for (i = 0; i < num_dies; i++) { 354 die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); 355 dhdr = (struct die_header *)(adev->discovery + die_offset); 356 num_ips = le16_to_cpu(dhdr->num_ips); 357 ip_offset = die_offset + sizeof(*dhdr); 358 359 for (j = 0; j < num_ips; j++) { 360 ip = (struct ip *)(adev->discovery + ip_offset); 361 362 if (le16_to_cpu(ip->hw_id) == hw_id) { 363 if (major) 364 *major = ip->major; 365 if (minor) 366 *minor = ip->minor; 367 if (revision) 368 *revision = ip->revision; 369 return 0; 370 } 371 ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); 372 } 373 } 374 375 return -EINVAL; 376} 377 378int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) 379{ 380 struct binary_header *bhdr; 381 struct gc_info_v1_0 *gc_info; 382 383 if (!adev->discovery) { 384 DRM_ERROR("ip discovery uninitialized\n"); 385 return -EINVAL; 386 } 387 388 bhdr = (struct binary_header *)adev->discovery; 389 gc_info = (struct gc_info_v1_0 *)(adev->discovery + 390 le16_to_cpu(bhdr->table_list[GC].offset)); 391 392 adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se); 393 adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) + 394 le32_to_cpu(gc_info->gc_num_wgp1_per_sa)); 395 adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se); 396 adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se); 397 adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c); 398 adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs); 399 adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds); 400 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth); 401 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth); 402 adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer); 403 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size); 404 adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd); 405 adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu); 406 adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size); 407 adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) / 408 le32_to_cpu(gc_info->gc_num_sa_per_se); 409 adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc); 410 411 return 0; 412} 413