1/*	$NetBSD: amdgpu_discovery.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $	*/
2
3/*
4 * Copyright 2018 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 */
25
26#include <sys/cdefs.h>
27__KERNEL_RCSID(0, "$NetBSD: amdgpu_discovery.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $");
28
29#include "amdgpu.h"
30#include "amdgpu_discovery.h"
31#include "soc15_common.h"
32#include "soc15_hw_ip.h"
33#include "nbio/nbio_2_3_offset.h"
34#include "discovery.h"
35
36#define mmRCC_CONFIG_MEMSIZE	0xde3
37#define mmMM_INDEX		0x0
38#define mmMM_INDEX_HI		0x6
39#define mmMM_DATA		0x1
40#define HW_ID_MAX		300
41
42const char *hw_id_names[HW_ID_MAX] = {
43	[MP1_HWID]		= "MP1",
44	[MP2_HWID]		= "MP2",
45	[THM_HWID]		= "THM",
46	[SMUIO_HWID]		= "SMUIO",
47	[FUSE_HWID]		= "FUSE",
48	[CLKA_HWID]		= "CLKA",
49	[PWR_HWID]		= "PWR",
50	[GC_HWID]		= "GC",
51	[UVD_HWID]		= "UVD",
52	[AUDIO_AZ_HWID]		= "AUDIO_AZ",
53	[ACP_HWID]		= "ACP",
54	[DCI_HWID]		= "DCI",
55	[DMU_HWID]		= "DMU",
56	[DCO_HWID]		= "DCO",
57	[DIO_HWID]		= "DIO",
58	[XDMA_HWID]		= "XDMA",
59	[DCEAZ_HWID]		= "DCEAZ",
60	[DAZ_HWID]		= "DAZ",
61	[SDPMUX_HWID]		= "SDPMUX",
62	[NTB_HWID]		= "NTB",
63	[IOHC_HWID]		= "IOHC",
64	[L2IMU_HWID]		= "L2IMU",
65	[VCE_HWID]		= "VCE",
66	[MMHUB_HWID]		= "MMHUB",
67	[ATHUB_HWID]		= "ATHUB",
68	[DBGU_NBIO_HWID]	= "DBGU_NBIO",
69	[DFX_HWID]		= "DFX",
70	[DBGU0_HWID]		= "DBGU0",
71	[DBGU1_HWID]		= "DBGU1",
72	[OSSSYS_HWID]		= "OSSSYS",
73	[HDP_HWID]		= "HDP",
74	[SDMA0_HWID]		= "SDMA0",
75	[SDMA1_HWID]		= "SDMA1",
76	[ISP_HWID]		= "ISP",
77	[DBGU_IO_HWID]		= "DBGU_IO",
78	[DF_HWID]		= "DF",
79	[CLKB_HWID]		= "CLKB",
80	[FCH_HWID]		= "FCH",
81	[DFX_DAP_HWID]		= "DFX_DAP",
82	[L1IMU_PCIE_HWID]	= "L1IMU_PCIE",
83	[L1IMU_NBIF_HWID]	= "L1IMU_NBIF",
84	[L1IMU_IOAGR_HWID]	= "L1IMU_IOAGR",
85	[L1IMU3_HWID]		= "L1IMU3",
86	[L1IMU4_HWID]		= "L1IMU4",
87	[L1IMU5_HWID]		= "L1IMU5",
88	[L1IMU6_HWID]		= "L1IMU6",
89	[L1IMU7_HWID]		= "L1IMU7",
90	[L1IMU8_HWID]		= "L1IMU8",
91	[L1IMU9_HWID]		= "L1IMU9",
92	[L1IMU10_HWID]		= "L1IMU10",
93	[L1IMU11_HWID]		= "L1IMU11",
94	[L1IMU12_HWID]		= "L1IMU12",
95	[L1IMU13_HWID]		= "L1IMU13",
96	[L1IMU14_HWID]		= "L1IMU14",
97	[L1IMU15_HWID]		= "L1IMU15",
98	[WAFLC_HWID]		= "WAFLC",
99	[FCH_USB_PD_HWID]	= "FCH_USB_PD",
100	[PCIE_HWID]		= "PCIE",
101	[PCS_HWID]		= "PCS",
102	[DDCL_HWID]		= "DDCL",
103	[SST_HWID]		= "SST",
104	[IOAGR_HWID]		= "IOAGR",
105	[NBIF_HWID]		= "NBIF",
106	[IOAPIC_HWID]		= "IOAPIC",
107	[SYSTEMHUB_HWID]	= "SYSTEMHUB",
108	[NTBCCP_HWID]		= "NTBCCP",
109	[UMC_HWID]		= "UMC",
110	[SATA_HWID]		= "SATA",
111	[USB_HWID]		= "USB",
112	[CCXSEC_HWID]		= "CCXSEC",
113	[XGMI_HWID]		= "XGMI",
114	[XGBE_HWID]		= "XGBE",
115	[MP0_HWID]		= "MP0",
116};
117
118static int hw_id_map[MAX_HWIP] = {
119	[GC_HWIP]	= GC_HWID,
120	[HDP_HWIP]	= HDP_HWID,
121	[SDMA0_HWIP]	= SDMA0_HWID,
122	[SDMA1_HWIP]	= SDMA1_HWID,
123	[MMHUB_HWIP]	= MMHUB_HWID,
124	[ATHUB_HWIP]	= ATHUB_HWID,
125	[NBIO_HWIP]	= NBIF_HWID,
126	[MP0_HWIP]	= MP0_HWID,
127	[MP1_HWIP]	= MP1_HWID,
128	[UVD_HWIP]	= UVD_HWID,
129	[VCE_HWIP]	= VCE_HWID,
130	[DF_HWIP]	= DF_HWID,
131	[DCE_HWIP]	= DMU_HWID,
132	[OSSSYS_HWIP]	= OSSSYS_HWID,
133	[SMUIO_HWIP]	= SMUIO_HWID,
134	[PWR_HWIP]	= PWR_HWID,
135	[NBIF_HWIP]	= NBIF_HWID,
136	[THM_HWIP]	= THM_HWID,
137	[CLK_HWIP]	= CLKA_HWID,
138};
139
140static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary)
141{
142	uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
143	uint64_t pos = vram_size - DISCOVERY_TMR_SIZE;
144
145	amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, DISCOVERY_TMR_SIZE, false);
146	return 0;
147}
148
149static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size)
150{
151	uint16_t checksum = 0;
152	int i;
153
154	for (i = 0; i < size; i++)
155		checksum += data[i];
156
157	return checksum;
158}
159
160static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size,
161						    uint16_t expected)
162{
163	return !!(amdgpu_discovery_calculate_checksum(data, size) == expected);
164}
165
166int amdgpu_discovery_init(struct amdgpu_device *adev)
167{
168	struct table_info *info;
169	struct binary_header *bhdr;
170	struct ip_discovery_header *ihdr;
171	struct gpu_info_header *ghdr;
172	uint16_t offset;
173	uint16_t size;
174	uint16_t checksum;
175	int r;
176
177	adev->discovery = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL);
178	if (!adev->discovery)
179		return -ENOMEM;
180
181	r = amdgpu_discovery_read_binary(adev, adev->discovery);
182	if (r) {
183		DRM_ERROR("failed to read ip discovery binary\n");
184		goto out;
185	}
186
187	bhdr = (struct binary_header *)adev->discovery;
188
189	if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) {
190		DRM_ERROR("invalid ip discovery binary signature\n");
191		r = -EINVAL;
192		goto out;
193	}
194
195	offset = offsetof(struct binary_header, binary_checksum) +
196		sizeof(bhdr->binary_checksum);
197	size = bhdr->binary_size - offset;
198	checksum = bhdr->binary_checksum;
199
200	if (!amdgpu_discovery_verify_checksum(adev->discovery + offset,
201					      size, checksum)) {
202		DRM_ERROR("invalid ip discovery binary checksum\n");
203		r = -EINVAL;
204		goto out;
205	}
206
207	info = &bhdr->table_list[IP_DISCOVERY];
208	offset = le16_to_cpu(info->offset);
209	checksum = le16_to_cpu(info->checksum);
210	ihdr = (struct ip_discovery_header *)(adev->discovery + offset);
211
212	if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
213		DRM_ERROR("invalid ip discovery data table signature\n");
214		r = -EINVAL;
215		goto out;
216	}
217
218	if (!amdgpu_discovery_verify_checksum(adev->discovery + offset,
219					      ihdr->size, checksum)) {
220		DRM_ERROR("invalid ip discovery data table checksum\n");
221		r = -EINVAL;
222		goto out;
223	}
224
225	info = &bhdr->table_list[GC];
226	offset = le16_to_cpu(info->offset);
227	checksum = le16_to_cpu(info->checksum);
228	ghdr = (struct gpu_info_header *)(adev->discovery + offset);
229
230	if (!amdgpu_discovery_verify_checksum(adev->discovery + offset,
231				              ghdr->size, checksum)) {
232		DRM_ERROR("invalid gc data table checksum\n");
233		r = -EINVAL;
234		goto out;
235	}
236
237	return 0;
238
239out:
240	kfree(adev->discovery);
241	adev->discovery = NULL;
242
243	return r;
244}
245
246void amdgpu_discovery_fini(struct amdgpu_device *adev)
247{
248	kfree(adev->discovery);
249	adev->discovery = NULL;
250}
251
252int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
253{
254	struct binary_header *bhdr;
255	struct ip_discovery_header *ihdr;
256	struct die_header *dhdr;
257	struct ip *ip;
258	uint16_t die_offset;
259	uint16_t ip_offset;
260	uint16_t num_dies;
261	uint16_t num_ips;
262	uint8_t num_base_address;
263	int hw_ip;
264	int i, j, k;
265
266	if (!adev->discovery) {
267		DRM_ERROR("ip discovery uninitialized\n");
268		return -EINVAL;
269	}
270
271	bhdr = (struct binary_header *)adev->discovery;
272	ihdr = (struct ip_discovery_header *)(adev->discovery +
273			le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
274	num_dies = le16_to_cpu(ihdr->num_dies);
275
276	DRM_DEBUG("number of dies: %d\n", num_dies);
277
278	for (i = 0; i < num_dies; i++) {
279		die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
280		dhdr = (struct die_header *)(adev->discovery + die_offset);
281		num_ips = le16_to_cpu(dhdr->num_ips);
282		ip_offset = die_offset + sizeof(*dhdr);
283
284		if (le16_to_cpu(dhdr->die_id) != i) {
285			DRM_ERROR("invalid die id %d, expected %d\n",
286					le16_to_cpu(dhdr->die_id), i);
287			return -EINVAL;
288		}
289
290		DRM_DEBUG("number of hardware IPs on die%d: %d\n",
291				le16_to_cpu(dhdr->die_id), num_ips);
292
293		for (j = 0; j < num_ips; j++) {
294			ip = (struct ip *)(adev->discovery + ip_offset);
295			num_base_address = ip->num_base_address;
296
297			DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
298				  hw_id_names[le16_to_cpu(ip->hw_id)],
299				  le16_to_cpu(ip->hw_id),
300				  ip->number_instance,
301				  ip->major, ip->minor,
302				  ip->revision);
303
304			for (k = 0; k < num_base_address; k++) {
305				/*
306				 * convert the endianness of base addresses in place,
307				 * so that we don't need to convert them when accessing adev->reg_offset.
308				 */
309				ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
310				DRM_DEBUG("\t0x%08x\n", ip->base_address[k]);
311			}
312
313			for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) {
314				if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) {
315					DRM_INFO("set register base offset for %s\n",
316							hw_id_names[le16_to_cpu(ip->hw_id)]);
317					adev->reg_offset[hw_ip][ip->number_instance] =
318						ip->base_address;
319				}
320
321			}
322
323			ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
324		}
325	}
326
327	return 0;
328}
329
330int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
331				    int *major, int *minor, int *revision)
332{
333	struct binary_header *bhdr;
334	struct ip_discovery_header *ihdr;
335	struct die_header *dhdr;
336	struct ip *ip;
337	uint16_t die_offset;
338	uint16_t ip_offset;
339	uint16_t num_dies;
340	uint16_t num_ips;
341	int i, j;
342
343	if (!adev->discovery) {
344		DRM_ERROR("ip discovery uninitialized\n");
345		return -EINVAL;
346	}
347
348	bhdr = (struct binary_header *)adev->discovery;
349	ihdr = (struct ip_discovery_header *)(adev->discovery +
350			le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
351	num_dies = le16_to_cpu(ihdr->num_dies);
352
353	for (i = 0; i < num_dies; i++) {
354		die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
355		dhdr = (struct die_header *)(adev->discovery + die_offset);
356		num_ips = le16_to_cpu(dhdr->num_ips);
357		ip_offset = die_offset + sizeof(*dhdr);
358
359		for (j = 0; j < num_ips; j++) {
360			ip = (struct ip *)(adev->discovery + ip_offset);
361
362			if (le16_to_cpu(ip->hw_id) == hw_id) {
363				if (major)
364					*major = ip->major;
365				if (minor)
366					*minor = ip->minor;
367				if (revision)
368					*revision = ip->revision;
369				return 0;
370			}
371			ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
372		}
373	}
374
375	return -EINVAL;
376}
377
378int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
379{
380	struct binary_header *bhdr;
381	struct gc_info_v1_0 *gc_info;
382
383	if (!adev->discovery) {
384		DRM_ERROR("ip discovery uninitialized\n");
385		return -EINVAL;
386	}
387
388	bhdr = (struct binary_header *)adev->discovery;
389	gc_info = (struct gc_info_v1_0 *)(adev->discovery +
390			le16_to_cpu(bhdr->table_list[GC].offset));
391
392	adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se);
393	adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) +
394					      le32_to_cpu(gc_info->gc_num_wgp1_per_sa));
395	adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se);
396	adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se);
397	adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c);
398	adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs);
399	adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds);
400	adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth);
401	adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth);
402	adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer);
403	adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size);
404	adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd);
405	adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu);
406	adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size);
407	adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) /
408					 le32_to_cpu(gc_info->gc_num_sa_per_se);
409	adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc);
410
411	return 0;
412}
413