1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3
4#include <linux/ascii85.h>
5#include "msm_gem.h"
6#include "a6xx_gpu.h"
7#include "a6xx_gmu.h"
8#include "a6xx_gpu_state.h"
9#include "a6xx_gmu.xml.h"
10
11/* Ignore diagnostics about register tables that we aren't using yet. We don't
12 * want to modify these headers too much from their original source.
13 */
14#pragma GCC diagnostic push
15#pragma GCC diagnostic ignored "-Wunused-variable"
16
17#include "adreno_gen7_0_0_snapshot.h"
18#include "adreno_gen7_2_0_snapshot.h"
19
20#pragma GCC diagnostic pop
21
22struct a6xx_gpu_state_obj {
23	const void *handle;
24	u32 *data;
25};
26
27struct a6xx_gpu_state {
28	struct msm_gpu_state base;
29
30	struct a6xx_gpu_state_obj *gmu_registers;
31	int nr_gmu_registers;
32
33	struct a6xx_gpu_state_obj *registers;
34	int nr_registers;
35
36	struct a6xx_gpu_state_obj *shaders;
37	int nr_shaders;
38
39	struct a6xx_gpu_state_obj *clusters;
40	int nr_clusters;
41
42	struct a6xx_gpu_state_obj *dbgahb_clusters;
43	int nr_dbgahb_clusters;
44
45	struct a6xx_gpu_state_obj *indexed_regs;
46	int nr_indexed_regs;
47
48	struct a6xx_gpu_state_obj *debugbus;
49	int nr_debugbus;
50
51	struct a6xx_gpu_state_obj *vbif_debugbus;
52
53	struct a6xx_gpu_state_obj *cx_debugbus;
54	int nr_cx_debugbus;
55
56	struct msm_gpu_state_bo *gmu_log;
57	struct msm_gpu_state_bo *gmu_hfi;
58	struct msm_gpu_state_bo *gmu_debug;
59
60	s32 hfi_queue_history[2][HFI_HISTORY_SZ];
61
62	struct list_head objs;
63
64	bool gpu_initialized;
65};
66
67static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
68{
69	in[0] = val;
70	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
71
72	return 2;
73}
74
75static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
76{
77	in[0] = target;
78	in[1] = (((u64) reg) << 44 | dwords);
79
80	return 2;
81}
82
83static inline int CRASHDUMP_FINI(u64 *in)
84{
85	in[0] = 0;
86	in[1] = 0;
87
88	return 2;
89}
90
91struct a6xx_crashdumper {
92	void *ptr;
93	struct drm_gem_object *bo;
94	u64 iova;
95};
96
97struct a6xx_state_memobj {
98	struct list_head node;
99	unsigned long long data[];
100};
101
102static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
103{
104	struct a6xx_state_memobj *obj =
105		kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
106
107	if (!obj)
108		return NULL;
109
110	list_add_tail(&obj->node, &a6xx_state->objs);
111	return &obj->data;
112}
113
114static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
115		size_t size)
116{
117	void *dst = state_kcalloc(a6xx_state, 1, size);
118
119	if (dst)
120		memcpy(dst, src, size);
121	return dst;
122}
123
124/*
125 * Allocate 1MB for the crashdumper scratch region - 8k for the script and
126 * the rest for the data
127 */
128#define A6XX_CD_DATA_OFFSET 8192
129#define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
130
131static int a6xx_crashdumper_init(struct msm_gpu *gpu,
132		struct a6xx_crashdumper *dumper)
133{
134	dumper->ptr = msm_gem_kernel_new(gpu->dev,
135		SZ_1M, MSM_BO_WC, gpu->aspace,
136		&dumper->bo, &dumper->iova);
137
138	if (!IS_ERR(dumper->ptr))
139		msm_gem_object_set_name(dumper->bo, "crashdump");
140
141	return PTR_ERR_OR_ZERO(dumper->ptr);
142}
143
144static int a6xx_crashdumper_run(struct msm_gpu *gpu,
145		struct a6xx_crashdumper *dumper)
146{
147	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
148	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
149	u32 val;
150	int ret;
151
152	if (IS_ERR_OR_NULL(dumper->ptr))
153		return -EINVAL;
154
155	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
156		return -EINVAL;
157
158	/* Make sure all pending memory writes are posted */
159	wmb();
160
161	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova);
162
163	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
164
165	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
166		val & 0x02, 100, 10000);
167
168	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
169
170	return ret;
171}
172
173/* read a value from the GX debug bus */
174static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
175		u32 *data)
176{
177	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
178		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
179
180	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
181	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
182	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
183	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
184
185	/* Wait 1 us to make sure the data is flowing */
186	udelay(1);
187
188	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
189	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
190
191	return 2;
192}
193
194#define cxdbg_write(ptr, offset, val) \
195	msm_writel((val), (ptr) + ((offset) << 2))
196
197#define cxdbg_read(ptr, offset) \
198	msm_readl((ptr) + ((offset) << 2))
199
200/* read a value from the CX debug bus */
201static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
202		u32 *data)
203{
204	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
205		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
206
207	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
208	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
209	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
210	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
211
212	/* Wait 1 us to make sure the data is flowing */
213	udelay(1);
214
215	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
216	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
217
218	return 2;
219}
220
221/* Read a chunk of data from the VBIF debug bus */
222static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
223		u32 reg, int count, u32 *data)
224{
225	int i;
226
227	gpu_write(gpu, ctrl0, reg);
228
229	for (i = 0; i < count; i++) {
230		gpu_write(gpu, ctrl1, i);
231		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
232	}
233
234	return count;
235}
236
237#define AXI_ARB_BLOCKS 2
238#define XIN_AXI_BLOCKS 5
239#define XIN_CORE_BLOCKS 4
240
241#define VBIF_DEBUGBUS_BLOCK_SIZE \
242	((16 * AXI_ARB_BLOCKS) + \
243	 (18 * XIN_AXI_BLOCKS) + \
244	 (12 * XIN_CORE_BLOCKS))
245
246static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
247		struct a6xx_gpu_state *a6xx_state,
248		struct a6xx_gpu_state_obj *obj)
249{
250	u32 clk, *ptr;
251	int i;
252
253	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
254		sizeof(u32));
255	if (!obj->data)
256		return;
257
258	obj->handle = NULL;
259
260	/* Get the current clock setting */
261	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
262
263	/* Force on the bus so we can read it */
264	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
265		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
266
267	/* We will read from BUS2 first, so disable BUS1 */
268	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
269
270	/* Enable the VBIF bus for reading */
271	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
272
273	ptr = obj->data;
274
275	for (i = 0; i < AXI_ARB_BLOCKS; i++)
276		ptr += vbif_debugbus_read(gpu,
277			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
278			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
279			1 << (i + 16), 16, ptr);
280
281	for (i = 0; i < XIN_AXI_BLOCKS; i++)
282		ptr += vbif_debugbus_read(gpu,
283			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
284			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
285			1 << i, 18, ptr);
286
287	/* Stop BUS2 so we can turn on BUS1 */
288	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
289
290	for (i = 0; i < XIN_CORE_BLOCKS; i++)
291		ptr += vbif_debugbus_read(gpu,
292			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
293			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
294			1 << i, 12, ptr);
295
296	/* Restore the VBIF clock setting */
297	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
298}
299
300static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
301		struct a6xx_gpu_state *a6xx_state,
302		const struct a6xx_debugbus_block *block,
303		struct a6xx_gpu_state_obj *obj)
304{
305	int i;
306	u32 *ptr;
307
308	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
309	if (!obj->data)
310		return;
311
312	obj->handle = block;
313
314	for (ptr = obj->data, i = 0; i < block->count; i++)
315		ptr += debugbus_read(gpu, block->id, i, ptr);
316}
317
318static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
319		struct a6xx_gpu_state *a6xx_state,
320		const struct a6xx_debugbus_block *block,
321		struct a6xx_gpu_state_obj *obj)
322{
323	int i;
324	u32 *ptr;
325
326	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
327	if (!obj->data)
328		return;
329
330	obj->handle = block;
331
332	for (ptr = obj->data, i = 0; i < block->count; i++)
333		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
334}
335
336static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu,
337		struct a6xx_gpu_state *a6xx_state)
338{
339	int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
340		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
341
342	if (adreno_is_a650_family(to_adreno_gpu(gpu)))
343		nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks);
344
345	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
346			sizeof(*a6xx_state->debugbus));
347
348	if (a6xx_state->debugbus) {
349		int i;
350
351		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
352			a6xx_get_debugbus_block(gpu,
353				a6xx_state,
354				&a6xx_debugbus_blocks[i],
355				&a6xx_state->debugbus[i]);
356
357		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
358
359		/*
360		 * GBIF has same debugbus as of other GPU blocks, fall back to
361		 * default path if GPU uses GBIF, also GBIF uses exactly same
362		 * ID as of VBIF.
363		 */
364		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
365			a6xx_get_debugbus_block(gpu, a6xx_state,
366				&a6xx_gbif_debugbus_block,
367				&a6xx_state->debugbus[i]);
368
369			a6xx_state->nr_debugbus += 1;
370		}
371
372
373		if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
374			for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++)
375				a6xx_get_debugbus_block(gpu,
376					a6xx_state,
377					&a650_debugbus_blocks[i],
378					&a6xx_state->debugbus[i]);
379		}
380	}
381}
382
383static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu,
384		struct a6xx_gpu_state *a6xx_state)
385{
386	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
387	int debugbus_blocks_count, total_debugbus_blocks;
388	const u32 *debugbus_blocks;
389	int i;
390
391	if (adreno_is_a730(adreno_gpu)) {
392		debugbus_blocks = gen7_0_0_debugbus_blocks;
393		debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks);
394	} else {
395		BUG_ON(!adreno_is_a740_family(adreno_gpu));
396		debugbus_blocks = gen7_2_0_debugbus_blocks;
397		debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks);
398	}
399
400	total_debugbus_blocks = debugbus_blocks_count +
401		ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
402
403	a6xx_state->debugbus = state_kcalloc(a6xx_state, total_debugbus_blocks,
404			sizeof(*a6xx_state->debugbus));
405
406	if (a6xx_state->debugbus) {
407		for (i = 0; i < debugbus_blocks_count; i++) {
408			a6xx_get_debugbus_block(gpu,
409				a6xx_state, &a7xx_debugbus_blocks[debugbus_blocks[i]],
410				&a6xx_state->debugbus[i]);
411		}
412
413		for (i = 0; i < ARRAY_SIZE(a7xx_gbif_debugbus_blocks); i++) {
414			a6xx_get_debugbus_block(gpu,
415				a6xx_state, &a7xx_gbif_debugbus_blocks[i],
416				&a6xx_state->debugbus[i + debugbus_blocks_count]);
417		}
418	}
419
420}
421
422static void a6xx_get_debugbus(struct msm_gpu *gpu,
423		struct a6xx_gpu_state *a6xx_state)
424{
425	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
426	struct resource *res;
427	void __iomem *cxdbg = NULL;
428
429	/* Set up the GX debug bus */
430
431	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
432		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
433
434	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
435		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
436
437	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
438	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
439	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
440	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
441
442	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
443	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
444
445	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
446	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
447	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
448	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
449
450	/* Set up the CX debug bus - it lives elsewhere in the system so do a
451	 * temporary ioremap for the registers
452	 */
453	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
454			"cx_dbgc");
455
456	if (res)
457		cxdbg = ioremap(res->start, resource_size(res));
458
459	if (cxdbg) {
460		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
461			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
462
463		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
464			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
465
466		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
467		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
468		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
469		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
470
471		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
472			0x76543210);
473		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
474			0xFEDCBA98);
475
476		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
477		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
478		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
479		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
480	}
481
482	if (adreno_is_a7xx(adreno_gpu)) {
483		a7xx_get_debugbus_blocks(gpu, a6xx_state);
484	} else {
485		a6xx_get_debugbus_blocks(gpu, a6xx_state);
486	}
487
488	/*  Dump the VBIF debugbus on applicable targets */
489	if (!a6xx_has_gbif(adreno_gpu)) {
490		a6xx_state->vbif_debugbus =
491			state_kcalloc(a6xx_state, 1,
492					sizeof(*a6xx_state->vbif_debugbus));
493
494		if (a6xx_state->vbif_debugbus)
495			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
496					a6xx_state->vbif_debugbus);
497	}
498
499	if (cxdbg) {
500		unsigned nr_cx_debugbus_blocks;
501		const struct a6xx_debugbus_block *cx_debugbus_blocks;
502
503		if (adreno_is_a7xx(adreno_gpu)) {
504			BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)));
505			cx_debugbus_blocks = a7xx_cx_debugbus_blocks;
506			nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks);
507		} else {
508			cx_debugbus_blocks = a6xx_cx_debugbus_blocks;
509			nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks);
510		}
511
512		a6xx_state->cx_debugbus =
513			state_kcalloc(a6xx_state,
514			nr_cx_debugbus_blocks,
515			sizeof(*a6xx_state->cx_debugbus));
516
517		if (a6xx_state->cx_debugbus) {
518			int i;
519
520			for (i = 0; i < nr_cx_debugbus_blocks; i++)
521				a6xx_get_cx_debugbus_block(cxdbg,
522					a6xx_state,
523					&cx_debugbus_blocks[i],
524					&a6xx_state->cx_debugbus[i]);
525
526			a6xx_state->nr_cx_debugbus =
527				nr_cx_debugbus_blocks;
528		}
529
530		iounmap(cxdbg);
531	}
532}
533
534#define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
535
536/* Read a data cluster from behind the AHB aperture */
537static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
538		struct a6xx_gpu_state *a6xx_state,
539		const struct a6xx_dbgahb_cluster *dbgahb,
540		struct a6xx_gpu_state_obj *obj,
541		struct a6xx_crashdumper *dumper)
542{
543	u64 *in = dumper->ptr;
544	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
545	size_t datasize;
546	int i, regcount = 0;
547
548	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
549		int j;
550
551		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
552			(dbgahb->statetype + i * 2) << 8);
553
554		for (j = 0; j < dbgahb->count; j += 2) {
555			int count = RANGE(dbgahb->registers, j);
556			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
557				dbgahb->registers[j] - (dbgahb->base >> 2);
558
559			in += CRASHDUMP_READ(in, offset, count, out);
560
561			out += count * sizeof(u32);
562
563			if (i == 0)
564				regcount += count;
565		}
566	}
567
568	CRASHDUMP_FINI(in);
569
570	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
571
572	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
573		return;
574
575	if (a6xx_crashdumper_run(gpu, dumper))
576		return;
577
578	obj->handle = dbgahb;
579	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
580		datasize);
581}
582
583static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu,
584		struct a6xx_gpu_state *a6xx_state,
585		const struct gen7_sptp_cluster_registers *dbgahb,
586		struct a6xx_gpu_state_obj *obj,
587		struct a6xx_crashdumper *dumper)
588{
589	u64 *in = dumper->ptr;
590	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
591	size_t datasize;
592	int i, regcount = 0;
593
594	in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
595		A7XX_SP_READ_SEL_LOCATION(dbgahb->location_id) |
596		A7XX_SP_READ_SEL_PIPE(dbgahb->pipe_id) |
597		A7XX_SP_READ_SEL_STATETYPE(dbgahb->statetype));
598
599	for (i = 0; dbgahb->regs[i] != UINT_MAX; i += 2) {
600		int count = RANGE(dbgahb->regs, i);
601		u32 offset = REG_A7XX_SP_AHB_READ_APERTURE +
602			dbgahb->regs[i] - dbgahb->regbase;
603
604		in += CRASHDUMP_READ(in, offset, count, out);
605
606		out += count * sizeof(u32);
607		regcount += count;
608	}
609
610	CRASHDUMP_FINI(in);
611
612	datasize = regcount * sizeof(u32);
613
614	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
615		return;
616
617	if (a6xx_crashdumper_run(gpu, dumper))
618		return;
619
620	obj->handle = dbgahb;
621	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
622		datasize);
623}
624
625static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
626		struct a6xx_gpu_state *a6xx_state,
627		struct a6xx_crashdumper *dumper)
628{
629	int i;
630
631	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
632		ARRAY_SIZE(a6xx_dbgahb_clusters),
633		sizeof(*a6xx_state->dbgahb_clusters));
634
635	if (!a6xx_state->dbgahb_clusters)
636		return;
637
638	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
639
640	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
641		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
642			&a6xx_dbgahb_clusters[i],
643			&a6xx_state->dbgahb_clusters[i], dumper);
644}
645
646static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu,
647		struct a6xx_gpu_state *a6xx_state,
648		struct a6xx_crashdumper *dumper)
649{
650	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
651	int i;
652	const struct gen7_sptp_cluster_registers *dbgahb_clusters;
653	unsigned dbgahb_clusters_size;
654
655	if (adreno_is_a730(adreno_gpu)) {
656		dbgahb_clusters = gen7_0_0_sptp_clusters;
657		dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters);
658	} else {
659		BUG_ON(!adreno_is_a740_family(adreno_gpu));
660		dbgahb_clusters = gen7_2_0_sptp_clusters;
661		dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters);
662	}
663
664	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
665		dbgahb_clusters_size,
666		sizeof(*a6xx_state->dbgahb_clusters));
667
668	if (!a6xx_state->dbgahb_clusters)
669		return;
670
671	a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size;
672
673	for (i = 0; i < dbgahb_clusters_size; i++)
674		a7xx_get_dbgahb_cluster(gpu, a6xx_state,
675			&dbgahb_clusters[i],
676			&a6xx_state->dbgahb_clusters[i], dumper);
677}
678
679/* Read a data cluster from the CP aperture with the crashdumper */
680static void a6xx_get_cluster(struct msm_gpu *gpu,
681		struct a6xx_gpu_state *a6xx_state,
682		const struct a6xx_cluster *cluster,
683		struct a6xx_gpu_state_obj *obj,
684		struct a6xx_crashdumper *dumper)
685{
686	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
687	u64 *in = dumper->ptr;
688	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
689	size_t datasize;
690	int i, regcount = 0;
691	u32 id = cluster->id;
692
693	/* Skip registers that are not present on older generation */
694	if (!adreno_is_a660_family(adreno_gpu) &&
695			cluster->registers == a660_fe_cluster)
696		return;
697
698	if (adreno_is_a650_family(adreno_gpu) &&
699			cluster->registers == a6xx_ps_cluster)
700		id = CLUSTER_VPC_PS;
701
702	/* Some clusters need a selector register to be programmed too */
703	if (cluster->sel_reg)
704		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
705
706	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
707		int j;
708
709		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
710			(id << 8) | (i << 4) | i);
711
712		for (j = 0; j < cluster->count; j += 2) {
713			int count = RANGE(cluster->registers, j);
714
715			in += CRASHDUMP_READ(in, cluster->registers[j],
716				count, out);
717
718			out += count * sizeof(u32);
719
720			if (i == 0)
721				regcount += count;
722		}
723	}
724
725	CRASHDUMP_FINI(in);
726
727	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
728
729	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
730		return;
731
732	if (a6xx_crashdumper_run(gpu, dumper))
733		return;
734
735	obj->handle = cluster;
736	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
737		datasize);
738}
739
740static void a7xx_get_cluster(struct msm_gpu *gpu,
741		struct a6xx_gpu_state *a6xx_state,
742		const struct gen7_cluster_registers *cluster,
743		struct a6xx_gpu_state_obj *obj,
744		struct a6xx_crashdumper *dumper)
745{
746	u64 *in = dumper->ptr;
747	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
748	size_t datasize;
749	int i, regcount = 0;
750
751	/* Some clusters need a selector register to be programmed too */
752	if (cluster->sel)
753		in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val);
754
755	in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD,
756		A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) |
757		A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) |
758		A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id));
759
760	for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) {
761		int count = RANGE(cluster->regs, i);
762
763		in += CRASHDUMP_READ(in, cluster->regs[i],
764			count, out);
765
766		out += count * sizeof(u32);
767		regcount += count;
768	}
769
770	CRASHDUMP_FINI(in);
771
772	datasize = regcount * sizeof(u32);
773
774	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
775		return;
776
777	if (a6xx_crashdumper_run(gpu, dumper))
778		return;
779
780	obj->handle = cluster;
781	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
782		datasize);
783}
784
785static void a6xx_get_clusters(struct msm_gpu *gpu,
786		struct a6xx_gpu_state *a6xx_state,
787		struct a6xx_crashdumper *dumper)
788{
789	int i;
790
791	a6xx_state->clusters = state_kcalloc(a6xx_state,
792		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
793
794	if (!a6xx_state->clusters)
795		return;
796
797	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
798
799	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
800		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
801			&a6xx_state->clusters[i], dumper);
802}
803
804static void a7xx_get_clusters(struct msm_gpu *gpu,
805		struct a6xx_gpu_state *a6xx_state,
806		struct a6xx_crashdumper *dumper)
807{
808	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
809	int i;
810	const struct gen7_cluster_registers *clusters;
811	unsigned clusters_size;
812
813	if (adreno_is_a730(adreno_gpu)) {
814		clusters = gen7_0_0_clusters;
815		clusters_size = ARRAY_SIZE(gen7_0_0_clusters);
816	} else {
817		BUG_ON(!adreno_is_a740_family(adreno_gpu));
818		clusters = gen7_2_0_clusters;
819		clusters_size = ARRAY_SIZE(gen7_2_0_clusters);
820	}
821
822	a6xx_state->clusters = state_kcalloc(a6xx_state,
823		clusters_size, sizeof(*a6xx_state->clusters));
824
825	if (!a6xx_state->clusters)
826		return;
827
828	a6xx_state->nr_clusters = clusters_size;
829
830	for (i = 0; i < clusters_size; i++)
831		a7xx_get_cluster(gpu, a6xx_state, &clusters[i],
832			&a6xx_state->clusters[i], dumper);
833}
834
835/* Read a shader / debug block from the HLSQ aperture with the crashdumper */
836static void a6xx_get_shader_block(struct msm_gpu *gpu,
837		struct a6xx_gpu_state *a6xx_state,
838		const struct a6xx_shader_block *block,
839		struct a6xx_gpu_state_obj *obj,
840		struct a6xx_crashdumper *dumper)
841{
842	u64 *in = dumper->ptr;
843	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
844	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
845	int i;
846
847	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
848		return;
849
850	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
851		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
852			(block->type << 8) | i);
853
854		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
855			block->size, out);
856
857		out += block->size * sizeof(u32);
858	}
859
860	CRASHDUMP_FINI(in);
861
862	if (a6xx_crashdumper_run(gpu, dumper))
863		return;
864
865	obj->handle = block;
866	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
867		datasize);
868}
869
870static void a7xx_get_shader_block(struct msm_gpu *gpu,
871		struct a6xx_gpu_state *a6xx_state,
872		const struct gen7_shader_block *block,
873		struct a6xx_gpu_state_obj *obj,
874		struct a6xx_crashdumper *dumper)
875{
876	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
877	u64 *in = dumper->ptr;
878	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
879	size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32);
880	int i, j;
881
882	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
883		return;
884
885	if (adreno_is_a730(adreno_gpu)) {
886		gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 3);
887	}
888
889	for (i = 0; i < block->num_sps; i++) {
890		for (j = 0; j < block->num_usptps; j++) {
891			in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
892				A7XX_SP_READ_SEL_LOCATION(block->location) |
893				A7XX_SP_READ_SEL_PIPE(block->pipeid) |
894				A7XX_SP_READ_SEL_STATETYPE(block->statetype) |
895				A7XX_SP_READ_SEL_USPTP(j) |
896				A7XX_SP_READ_SEL_SPTP(i));
897
898			in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE,
899				block->size, out);
900
901			out += block->size * sizeof(u32);
902		}
903	}
904
905	CRASHDUMP_FINI(in);
906
907	if (a6xx_crashdumper_run(gpu, dumper))
908		goto out;
909
910	obj->handle = block;
911	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
912		datasize);
913
914out:
915	if (adreno_is_a730(adreno_gpu)) {
916		gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 0);
917	}
918}
919
920static void a6xx_get_shaders(struct msm_gpu *gpu,
921		struct a6xx_gpu_state *a6xx_state,
922		struct a6xx_crashdumper *dumper)
923{
924	int i;
925
926	a6xx_state->shaders = state_kcalloc(a6xx_state,
927		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
928
929	if (!a6xx_state->shaders)
930		return;
931
932	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
933
934	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
935		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
936			&a6xx_state->shaders[i], dumper);
937}
938
939static void a7xx_get_shaders(struct msm_gpu *gpu,
940		struct a6xx_gpu_state *a6xx_state,
941		struct a6xx_crashdumper *dumper)
942{
943	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
944	const struct gen7_shader_block *shader_blocks;
945	unsigned num_shader_blocks;
946	int i;
947
948	if (adreno_is_a730(adreno_gpu)) {
949		shader_blocks = gen7_0_0_shader_blocks;
950		num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks);
951	} else {
952		BUG_ON(!adreno_is_a740_family(adreno_gpu));
953		shader_blocks = gen7_2_0_shader_blocks;
954		num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks);
955	}
956
957	a6xx_state->shaders = state_kcalloc(a6xx_state,
958		num_shader_blocks, sizeof(*a6xx_state->shaders));
959
960	if (!a6xx_state->shaders)
961		return;
962
963	a6xx_state->nr_shaders = num_shader_blocks;
964
965	for (i = 0; i < num_shader_blocks; i++)
966		a7xx_get_shader_block(gpu, a6xx_state, &shader_blocks[i],
967			&a6xx_state->shaders[i], dumper);
968}
969
970/* Read registers from behind the HLSQ aperture with the crashdumper */
971static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
972		struct a6xx_gpu_state *a6xx_state,
973		const struct a6xx_registers *regs,
974		struct a6xx_gpu_state_obj *obj,
975		struct a6xx_crashdumper *dumper)
976
977{
978	u64 *in = dumper->ptr;
979	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
980	int i, regcount = 0;
981
982	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
983
984	for (i = 0; i < regs->count; i += 2) {
985		u32 count = RANGE(regs->registers, i);
986		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
987			regs->registers[i] - (regs->val0 >> 2);
988
989		in += CRASHDUMP_READ(in, offset, count, out);
990
991		out += count * sizeof(u32);
992		regcount += count;
993	}
994
995	CRASHDUMP_FINI(in);
996
997	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
998		return;
999
1000	if (a6xx_crashdumper_run(gpu, dumper))
1001		return;
1002
1003	obj->handle = regs;
1004	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1005		regcount * sizeof(u32));
1006}
1007
1008/* Read a block of registers using the crashdumper */
1009static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
1010		struct a6xx_gpu_state *a6xx_state,
1011		const struct a6xx_registers *regs,
1012		struct a6xx_gpu_state_obj *obj,
1013		struct a6xx_crashdumper *dumper)
1014
1015{
1016	u64 *in = dumper->ptr;
1017	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1018	int i, regcount = 0;
1019
1020	/* Skip unsupported registers on older generations */
1021	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1022			(regs->registers == a660_registers))
1023		return;
1024
1025	/* Some blocks might need to program a selector register first */
1026	if (regs->val0)
1027		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
1028
1029	for (i = 0; i < regs->count; i += 2) {
1030		u32 count = RANGE(regs->registers, i);
1031
1032		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
1033
1034		out += count * sizeof(u32);
1035		regcount += count;
1036	}
1037
1038	CRASHDUMP_FINI(in);
1039
1040	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1041		return;
1042
1043	if (a6xx_crashdumper_run(gpu, dumper))
1044		return;
1045
1046	obj->handle = regs;
1047	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1048		regcount * sizeof(u32));
1049}
1050
1051static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu,
1052		struct a6xx_gpu_state *a6xx_state,
1053		const struct gen7_reg_list *regs,
1054		struct a6xx_gpu_state_obj *obj,
1055		struct a6xx_crashdumper *dumper)
1056
1057{
1058	u64 *in = dumper->ptr;
1059	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1060	int i, regcount = 0;
1061
1062	/* Some blocks might need to program a selector register first */
1063	if (regs->sel)
1064		in += CRASHDUMP_WRITE(in, regs->sel->cd_reg, regs->sel->val);
1065
1066	for (i = 0; regs->regs[i] != UINT_MAX; i += 2) {
1067		u32 count = RANGE(regs->regs, i);
1068
1069		in += CRASHDUMP_READ(in, regs->regs[i], count, out);
1070
1071		out += count * sizeof(u32);
1072		regcount += count;
1073	}
1074
1075	CRASHDUMP_FINI(in);
1076
1077	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1078		return;
1079
1080	if (a6xx_crashdumper_run(gpu, dumper))
1081		return;
1082
1083	obj->handle = regs->regs;
1084	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1085		regcount * sizeof(u32));
1086}
1087
1088
1089/* Read a block of registers via AHB */
1090static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1091		struct a6xx_gpu_state *a6xx_state,
1092		const struct a6xx_registers *regs,
1093		struct a6xx_gpu_state_obj *obj)
1094{
1095	int i, regcount = 0, index = 0;
1096
1097	/* Skip unsupported registers on older generations */
1098	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1099			(regs->registers == a660_registers))
1100		return;
1101
1102	for (i = 0; i < regs->count; i += 2)
1103		regcount += RANGE(regs->registers, i);
1104
1105	obj->handle = (const void *) regs;
1106	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1107	if (!obj->data)
1108		return;
1109
1110	for (i = 0; i < regs->count; i += 2) {
1111		u32 count = RANGE(regs->registers, i);
1112		int j;
1113
1114		for (j = 0; j < count; j++)
1115			obj->data[index++] = gpu_read(gpu,
1116				regs->registers[i] + j);
1117	}
1118}
1119
1120static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1121		struct a6xx_gpu_state *a6xx_state,
1122		const u32 *regs,
1123		struct a6xx_gpu_state_obj *obj)
1124{
1125	int i, regcount = 0, index = 0;
1126
1127	for (i = 0; regs[i] != UINT_MAX; i += 2)
1128		regcount += RANGE(regs, i);
1129
1130	obj->handle = (const void *) regs;
1131	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1132	if (!obj->data)
1133		return;
1134
1135	for (i = 0; regs[i] != UINT_MAX; i += 2) {
1136		u32 count = RANGE(regs, i);
1137		int j;
1138
1139		for (j = 0; j < count; j++)
1140			obj->data[index++] = gpu_read(gpu, regs[i] + j);
1141	}
1142}
1143
1144static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu,
1145		struct a6xx_gpu_state *a6xx_state,
1146		const struct gen7_reg_list *regs,
1147		struct a6xx_gpu_state_obj *obj)
1148{
1149	if (regs->sel)
1150		gpu_write(gpu, regs->sel->host_reg, regs->sel->val);
1151
1152	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs->regs, obj);
1153}
1154
1155/* Read a block of GMU registers */
1156static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
1157		struct a6xx_gpu_state *a6xx_state,
1158		const struct a6xx_registers *regs,
1159		struct a6xx_gpu_state_obj *obj,
1160		bool rscc)
1161{
1162	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1163	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1164	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1165	int i, regcount = 0, index = 0;
1166
1167	for (i = 0; i < regs->count; i += 2)
1168		regcount += RANGE(regs->registers, i);
1169
1170	obj->handle = (const void *) regs;
1171	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1172	if (!obj->data)
1173		return;
1174
1175	for (i = 0; i < regs->count; i += 2) {
1176		u32 count = RANGE(regs->registers, i);
1177		int j;
1178
1179		for (j = 0; j < count; j++) {
1180			u32 offset = regs->registers[i] + j;
1181			u32 val;
1182
1183			if (rscc)
1184				val = gmu_read_rscc(gmu, offset);
1185			else
1186				val = gmu_read(gmu, offset);
1187
1188			obj->data[index++] = val;
1189		}
1190	}
1191}
1192
1193static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
1194		struct a6xx_gpu_state *a6xx_state)
1195{
1196	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1197	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1198
1199	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
1200		3, sizeof(*a6xx_state->gmu_registers));
1201
1202	if (!a6xx_state->gmu_registers)
1203		return;
1204
1205	a6xx_state->nr_gmu_registers = 3;
1206
1207	/* Get the CX GMU registers from AHB */
1208	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
1209		&a6xx_state->gmu_registers[0], false);
1210	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
1211		&a6xx_state->gmu_registers[1], true);
1212
1213	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1214		return;
1215
1216	/* Set the fence to ALLOW mode so we can access the registers */
1217	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
1218
1219	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
1220		&a6xx_state->gmu_registers[2], false);
1221}
1222
1223static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
1224		struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
1225{
1226	struct msm_gpu_state_bo *snapshot;
1227
1228	if (!bo->size)
1229		return NULL;
1230
1231	snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
1232	if (!snapshot)
1233		return NULL;
1234
1235	snapshot->iova = bo->iova;
1236	snapshot->size = bo->size;
1237	snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
1238	if (!snapshot->data)
1239		return NULL;
1240
1241	memcpy(snapshot->data, bo->virt, bo->size);
1242
1243	return snapshot;
1244}
1245
1246static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
1247					  struct a6xx_gpu_state *a6xx_state)
1248{
1249	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1250	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1251	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1252	unsigned i, j;
1253
1254	BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
1255
1256	for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
1257		struct a6xx_hfi_queue *queue = &gmu->queues[i];
1258		for (j = 0; j < HFI_HISTORY_SZ; j++) {
1259			unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
1260			a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
1261		}
1262	}
1263}
1264
1265#define A6XX_REGLIST_SIZE        1
1266#define A6XX_GBIF_REGLIST_SIZE   1
1267static void a6xx_get_registers(struct msm_gpu *gpu,
1268		struct a6xx_gpu_state *a6xx_state,
1269		struct a6xx_crashdumper *dumper)
1270{
1271	int i, count = A6XX_REGLIST_SIZE +
1272		ARRAY_SIZE(a6xx_reglist) +
1273		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
1274	int index = 0;
1275	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1276
1277	a6xx_state->registers = state_kcalloc(a6xx_state,
1278		count, sizeof(*a6xx_state->registers));
1279
1280	if (!a6xx_state->registers)
1281		return;
1282
1283	a6xx_state->nr_registers = count;
1284
1285	a6xx_get_ahb_gpu_registers(gpu,
1286		a6xx_state, &a6xx_ahb_reglist,
1287		&a6xx_state->registers[index++]);
1288
1289	if (a6xx_has_gbif(adreno_gpu))
1290		a6xx_get_ahb_gpu_registers(gpu,
1291				a6xx_state, &a6xx_gbif_reglist,
1292				&a6xx_state->registers[index++]);
1293	else
1294		a6xx_get_ahb_gpu_registers(gpu,
1295				a6xx_state, &a6xx_vbif_reglist,
1296				&a6xx_state->registers[index++]);
1297	if (!dumper) {
1298		/*
1299		 * We can't use the crashdumper when the SMMU is stalled,
1300		 * because the GPU has no memory access until we resume
1301		 * translation (but we don't want to do that until after
1302		 * we have captured as much useful GPU state as possible).
1303		 * So instead collect registers via the CPU:
1304		 */
1305		for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1306			a6xx_get_ahb_gpu_registers(gpu,
1307				a6xx_state, &a6xx_reglist[i],
1308				&a6xx_state->registers[index++]);
1309		return;
1310	}
1311
1312	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1313		a6xx_get_crashdumper_registers(gpu,
1314			a6xx_state, &a6xx_reglist[i],
1315			&a6xx_state->registers[index++],
1316			dumper);
1317
1318	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
1319		a6xx_get_crashdumper_hlsq_registers(gpu,
1320			a6xx_state, &a6xx_hlsq_reglist[i],
1321			&a6xx_state->registers[index++],
1322			dumper);
1323}
1324
1325#define A7XX_PRE_CRASHDUMPER_SIZE    1
1326#define A7XX_POST_CRASHDUMPER_SIZE   1
1327static void a7xx_get_registers(struct msm_gpu *gpu,
1328		struct a6xx_gpu_state *a6xx_state,
1329		struct a6xx_crashdumper *dumper)
1330{
1331	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1332	int i, count;
1333	int index = 0;
1334	const u32 *pre_crashdumper_regs;
1335	const struct gen7_reg_list *reglist;
1336
1337	if (adreno_is_a730(adreno_gpu)) {
1338		reglist = gen7_0_0_reg_list;
1339		pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1340	} else {
1341		BUG_ON(!adreno_is_a740_family(adreno_gpu));
1342		reglist = gen7_2_0_reg_list;
1343		pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1344	}
1345
1346	count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE;
1347
1348	/* The downstream reglist contains registers in other memory regions
1349	 * (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their
1350	 * offsets and map them to read them on the CPU. For now only read the
1351	 * first region which is the main one.
1352	 */
1353	if (dumper) {
1354		for (i = 0; reglist[i].regs; i++)
1355			count++;
1356	} else {
1357		count++;
1358	}
1359
1360	a6xx_state->registers = state_kcalloc(a6xx_state,
1361		count, sizeof(*a6xx_state->registers));
1362
1363	if (!a6xx_state->registers)
1364		return;
1365
1366	a6xx_state->nr_registers = count;
1367
1368	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, pre_crashdumper_regs,
1369		&a6xx_state->registers[index++]);
1370
1371	if (!dumper) {
1372		a7xx_get_ahb_gpu_reglist(gpu,
1373			a6xx_state, &reglist[0],
1374			&a6xx_state->registers[index++]);
1375		return;
1376	}
1377
1378	for (i = 0; reglist[i].regs; i++)
1379		a7xx_get_crashdumper_registers(gpu,
1380			a6xx_state, &reglist[i],
1381			&a6xx_state->registers[index++],
1382			dumper);
1383}
1384
1385static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu,
1386		struct a6xx_gpu_state *a6xx_state)
1387{
1388	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1389	const u32 *regs;
1390
1391	BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)));
1392	regs = gen7_0_0_post_crashdumper_registers;
1393
1394	a7xx_get_ahb_gpu_registers(gpu,
1395		a6xx_state, regs,
1396		&a6xx_state->registers[a6xx_state->nr_registers - 1]);
1397}
1398
1399static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu)
1400{
1401	/* The value at [16:31] is in 4dword units. Convert it to dwords */
1402	return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14;
1403}
1404
1405static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu)
1406{
1407	/*
1408	 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units.
1409	 * That register however is not directly accessible from APSS on A7xx.
1410	 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value.
1411	 */
1412	gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3);
1413
1414	return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20);
1415}
1416
1417/* Read a block of data from an indexed register pair */
1418static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
1419		struct a6xx_gpu_state *a6xx_state,
1420		struct a6xx_indexed_registers *indexed,
1421		struct a6xx_gpu_state_obj *obj)
1422{
1423	int i;
1424
1425	obj->handle = (const void *) indexed;
1426	if (indexed->count_fn)
1427		indexed->count = indexed->count_fn(gpu);
1428
1429	obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
1430	if (!obj->data)
1431		return;
1432
1433	/* All the indexed banks start at address 0 */
1434	gpu_write(gpu, indexed->addr, 0);
1435
1436	/* Read the data - each read increments the internal address by 1 */
1437	for (i = 0; i < indexed->count; i++)
1438		obj->data[i] = gpu_read(gpu, indexed->data);
1439}
1440
1441static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
1442		struct a6xx_gpu_state *a6xx_state)
1443{
1444	u32 mempool_size;
1445	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
1446	int i;
1447
1448	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
1449		sizeof(*a6xx_state->indexed_regs));
1450	if (!a6xx_state->indexed_regs)
1451		return;
1452
1453	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
1454		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
1455			&a6xx_state->indexed_regs[i]);
1456
1457	if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
1458		u32 val;
1459
1460		val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG);
1461		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4);
1462
1463		/* Get the contents of the CP mempool */
1464		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1465			&a6xx_state->indexed_regs[i]);
1466
1467		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val);
1468		a6xx_state->nr_indexed_regs = count;
1469		return;
1470	}
1471
1472	/* Set the CP mempool size to 0 to stabilize it while dumping */
1473	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
1474	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
1475
1476	/* Get the contents of the CP mempool */
1477	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1478		&a6xx_state->indexed_regs[i]);
1479
1480	/*
1481	 * Offset 0x2000 in the mempool is the size - copy the saved size over
1482	 * so the data is consistent
1483	 */
1484	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
1485
1486	/* Restore the size in the hardware */
1487	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
1488}
1489
1490static void a7xx_get_indexed_registers(struct msm_gpu *gpu,
1491		struct a6xx_gpu_state *a6xx_state)
1492{
1493	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1494	int i, indexed_count, mempool_count;
1495
1496	BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)));
1497	indexed_count = ARRAY_SIZE(a7xx_indexed_reglist);
1498	mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed);
1499
1500	a6xx_state->indexed_regs = state_kcalloc(a6xx_state,
1501					indexed_count + mempool_count,
1502					sizeof(*a6xx_state->indexed_regs));
1503	if (!a6xx_state->indexed_regs)
1504		return;
1505
1506	a6xx_state->nr_indexed_regs = indexed_count + mempool_count;
1507
1508	/* First read the common regs */
1509	for (i = 0; i < indexed_count; i++)
1510		a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_indexed_reglist[i],
1511			&a6xx_state->indexed_regs[i]);
1512
1513	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2));
1514	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2));
1515
1516	/* Get the contents of the CP_BV mempool */
1517	for (i = 0; i < mempool_count; i++)
1518		a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_cp_bv_mempool_indexed[i],
1519			&a6xx_state->indexed_regs[indexed_count + i]);
1520
1521	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0);
1522	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0);
1523	return;
1524}
1525
1526struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
1527{
1528	struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
1529	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1530	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1531	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
1532		GFP_KERNEL);
1533	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
1534			A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
1535
1536	if (!a6xx_state)
1537		return ERR_PTR(-ENOMEM);
1538
1539	INIT_LIST_HEAD(&a6xx_state->objs);
1540
1541	/* Get the generic state from the adreno core */
1542	adreno_gpu_state_get(gpu, &a6xx_state->base);
1543
1544	if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1545		a6xx_get_gmu_registers(gpu, a6xx_state);
1546
1547		a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
1548		a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
1549		a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
1550
1551		a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
1552	}
1553
1554	/* If GX isn't on the rest of the data isn't going to be accessible */
1555	if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1556		return &a6xx_state->base;
1557
1558	/* Get the banks of indexed registers */
1559	if (adreno_is_a7xx(adreno_gpu))
1560		a7xx_get_indexed_registers(gpu, a6xx_state);
1561	else
1562		a6xx_get_indexed_registers(gpu, a6xx_state);
1563
1564	/*
1565	 * Try to initialize the crashdumper, if we are not dumping state
1566	 * with the SMMU stalled.  The crashdumper needs memory access to
1567	 * write out GPU state, so we need to skip this when the SMMU is
1568	 * stalled in response to an iova fault
1569	 */
1570	if (!stalled && !gpu->needs_hw_init &&
1571	    !a6xx_crashdumper_init(gpu, &_dumper)) {
1572		dumper = &_dumper;
1573	}
1574
1575	if (adreno_is_a7xx(adreno_gpu)) {
1576		a7xx_get_registers(gpu, a6xx_state, dumper);
1577
1578		if (dumper) {
1579			a7xx_get_shaders(gpu, a6xx_state, dumper);
1580			a7xx_get_clusters(gpu, a6xx_state, dumper);
1581			a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1582
1583			msm_gem_kernel_put(dumper->bo, gpu->aspace);
1584		}
1585
1586		a7xx_get_post_crashdumper_registers(gpu, a6xx_state);
1587	} else {
1588		a6xx_get_registers(gpu, a6xx_state, dumper);
1589
1590		if (dumper) {
1591			a6xx_get_shaders(gpu, a6xx_state, dumper);
1592			a6xx_get_clusters(gpu, a6xx_state, dumper);
1593			a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1594
1595			msm_gem_kernel_put(dumper->bo, gpu->aspace);
1596		}
1597	}
1598
1599	if (snapshot_debugbus)
1600		a6xx_get_debugbus(gpu, a6xx_state);
1601
1602	a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1603
1604	return  &a6xx_state->base;
1605}
1606
1607static void a6xx_gpu_state_destroy(struct kref *kref)
1608{
1609	struct a6xx_state_memobj *obj, *tmp;
1610	struct msm_gpu_state *state = container_of(kref,
1611			struct msm_gpu_state, ref);
1612	struct a6xx_gpu_state *a6xx_state = container_of(state,
1613			struct a6xx_gpu_state, base);
1614
1615	if (a6xx_state->gmu_log)
1616		kvfree(a6xx_state->gmu_log->data);
1617
1618	if (a6xx_state->gmu_hfi)
1619		kvfree(a6xx_state->gmu_hfi->data);
1620
1621	if (a6xx_state->gmu_debug)
1622		kvfree(a6xx_state->gmu_debug->data);
1623
1624	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1625		list_del(&obj->node);
1626		kvfree(obj);
1627	}
1628
1629	adreno_gpu_state_destroy(state);
1630	kfree(a6xx_state);
1631}
1632
1633int a6xx_gpu_state_put(struct msm_gpu_state *state)
1634{
1635	if (IS_ERR_OR_NULL(state))
1636		return 1;
1637
1638	return kref_put(&state->ref, a6xx_gpu_state_destroy);
1639}
1640
1641static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1642		struct drm_printer *p)
1643{
1644	int i, index = 0;
1645
1646	if (!data)
1647		return;
1648
1649	for (i = 0; i < count; i += 2) {
1650		u32 count = RANGE(registers, i);
1651		u32 offset = registers[i];
1652		int j;
1653
1654		for (j = 0; j < count; index++, offset++, j++) {
1655			if (data[index] == 0xdeafbead)
1656				continue;
1657
1658			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1659				offset << 2, data[index]);
1660		}
1661	}
1662}
1663
1664static void a7xx_show_registers_indented(const u32 *registers, u32 *data,
1665		struct drm_printer *p, unsigned indent)
1666{
1667	int i, index = 0;
1668
1669	for (i = 0; registers[i] != UINT_MAX; i += 2) {
1670		u32 count = RANGE(registers, i);
1671		u32 offset = registers[i];
1672		int j;
1673
1674		for (j = 0; j < count; index++, offset++, j++) {
1675			int k;
1676
1677			if (data[index] == 0xdeafbead)
1678				continue;
1679
1680			for (k = 0; k < indent; k++)
1681				drm_printf(p, "  ");
1682			drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n",
1683				offset << 2, data[index]);
1684		}
1685	}
1686}
1687
1688static void a7xx_show_registers(const u32 *registers, u32 *data, struct drm_printer *p)
1689{
1690	a7xx_show_registers_indented(registers, data, p, 1);
1691}
1692
1693static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1694{
1695	char out[ASCII85_BUFSZ];
1696	long i, l, datalen = 0;
1697
1698	for (i = 0; i < len >> 2; i++) {
1699		if (data[i])
1700			datalen = (i + 1) << 2;
1701	}
1702
1703	if (datalen == 0)
1704		return;
1705
1706	drm_puts(p, "    data: !!ascii85 |\n");
1707	drm_puts(p, "      ");
1708
1709
1710	l = ascii85_encode_len(datalen);
1711
1712	for (i = 0; i < l; i++)
1713		drm_puts(p, ascii85_encode(data[i], out));
1714
1715	drm_puts(p, "\n");
1716}
1717
1718static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1719{
1720	drm_puts(p, fmt);
1721	drm_puts(p, name);
1722	drm_puts(p, "\n");
1723}
1724
1725static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1726		struct drm_printer *p)
1727{
1728	const struct a6xx_shader_block *block = obj->handle;
1729	int i;
1730
1731	if (!obj->handle)
1732		return;
1733
1734	print_name(p, "  - type: ", block->name);
1735
1736	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1737		drm_printf(p, "    - bank: %d\n", i);
1738		drm_printf(p, "      size: %d\n", block->size);
1739
1740		if (!obj->data)
1741			continue;
1742
1743		print_ascii85(p, block->size << 2,
1744			obj->data + (block->size * i));
1745	}
1746}
1747
1748static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj,
1749		struct drm_printer *p)
1750{
1751	const struct gen7_shader_block *block = obj->handle;
1752	int i, j;
1753	u32 *data = obj->data;
1754
1755	if (!obj->handle)
1756		return;
1757
1758	print_name(p, "  - type: ", a7xx_statetype_names[block->statetype]);
1759	print_name(p, "    - pipe: ", a7xx_pipe_names[block->pipeid]);
1760
1761	for (i = 0; i < block->num_sps; i++) {
1762		drm_printf(p, "      - sp: %d\n", i);
1763
1764		for (j = 0; j < block->num_usptps; j++) {
1765			drm_printf(p, "        - usptp: %d\n", j);
1766			drm_printf(p, "          size: %d\n", block->size);
1767
1768			if (!obj->data)
1769				continue;
1770
1771			print_ascii85(p, block->size << 2, data);
1772
1773			data += block->size;
1774		}
1775	}
1776}
1777
1778static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1779		struct drm_printer *p)
1780{
1781	int ctx, index = 0;
1782
1783	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1784		int j;
1785
1786		drm_printf(p, "    - context: %d\n", ctx);
1787
1788		for (j = 0; j < size; j += 2) {
1789			u32 count = RANGE(registers, j);
1790			u32 offset = registers[j];
1791			int k;
1792
1793			for (k = 0; k < count; index++, offset++, k++) {
1794				if (data[index] == 0xdeafbead)
1795					continue;
1796
1797				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1798					offset << 2, data[index]);
1799			}
1800		}
1801	}
1802}
1803
1804static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1805		struct drm_printer *p)
1806{
1807	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1808
1809	if (dbgahb) {
1810		print_name(p, "  - cluster-name: ", dbgahb->name);
1811		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1812			obj->data, p);
1813	}
1814}
1815
1816static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1817		struct drm_printer *p)
1818{
1819	const struct a6xx_cluster *cluster = obj->handle;
1820
1821	if (cluster) {
1822		print_name(p, "  - cluster-name: ", cluster->name);
1823		a6xx_show_cluster_data(cluster->registers, cluster->count,
1824			obj->data, p);
1825	}
1826}
1827
1828static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1829		struct drm_printer *p)
1830{
1831	const struct gen7_sptp_cluster_registers *dbgahb = obj->handle;
1832
1833	if (dbgahb) {
1834		print_name(p, "  - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]);
1835		print_name(p, "    - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]);
1836		drm_printf(p, "      - context: %d\n", dbgahb->context_id);
1837		a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4);
1838	}
1839}
1840
1841static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1842		struct drm_printer *p)
1843{
1844	const struct gen7_cluster_registers *cluster = obj->handle;
1845
1846	if (cluster) {
1847		int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0;
1848
1849		print_name(p, "  - pipe: ", a7xx_pipe_names[cluster->pipe_id]);
1850		print_name(p, "    - cluster-name: ", a7xx_cluster_names[cluster->cluster_id]);
1851		drm_printf(p, "      - context: %d\n", context);
1852		a7xx_show_registers_indented(cluster->regs, obj->data, p, 4);
1853	}
1854}
1855
1856static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1857		struct drm_printer *p)
1858{
1859	const struct a6xx_indexed_registers *indexed = obj->handle;
1860
1861	if (!indexed)
1862		return;
1863
1864	print_name(p, "  - regs-name: ", indexed->name);
1865	drm_printf(p, "    dwords: %d\n", indexed->count);
1866
1867	print_ascii85(p, indexed->count << 2, obj->data);
1868}
1869
1870static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1871		u32 *data, struct drm_printer *p)
1872{
1873	if (block) {
1874		print_name(p, "  - debugbus-block: ", block->name);
1875
1876		/*
1877		 * count for regular debugbus data is in quadwords,
1878		 * but print the size in dwords for consistency
1879		 */
1880		drm_printf(p, "    count: %d\n", block->count << 1);
1881
1882		print_ascii85(p, block->count << 3, data);
1883	}
1884}
1885
1886static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1887		struct drm_printer *p)
1888{
1889	int i;
1890
1891	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1892		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1893
1894		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1895	}
1896
1897	if (a6xx_state->vbif_debugbus) {
1898		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1899
1900		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1901		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1902
1903		/* vbif debugbus data is in dwords.  Confusing, huh? */
1904		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1905	}
1906
1907	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1908		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1909
1910		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1911	}
1912}
1913
1914void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1915		struct drm_printer *p)
1916{
1917	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1918	struct a6xx_gpu_state *a6xx_state = container_of(state,
1919			struct a6xx_gpu_state, base);
1920	int i;
1921
1922	if (IS_ERR_OR_NULL(state))
1923		return;
1924
1925	drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1926
1927	adreno_show(gpu, state, p);
1928
1929	drm_puts(p, "gmu-log:\n");
1930	if (a6xx_state->gmu_log) {
1931		struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1932
1933		drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1934		drm_printf(p, "    size: %zu\n", gmu_log->size);
1935		adreno_show_object(p, &gmu_log->data, gmu_log->size,
1936				&gmu_log->encoded);
1937	}
1938
1939	drm_puts(p, "gmu-hfi:\n");
1940	if (a6xx_state->gmu_hfi) {
1941		struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1942		unsigned i, j;
1943
1944		drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1945		drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1946		for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1947			drm_printf(p, "    queue-history[%u]:", i);
1948			for (j = 0; j < HFI_HISTORY_SZ; j++) {
1949				drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1950			}
1951			drm_printf(p, "\n");
1952		}
1953		adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1954				&gmu_hfi->encoded);
1955	}
1956
1957	drm_puts(p, "gmu-debug:\n");
1958	if (a6xx_state->gmu_debug) {
1959		struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1960
1961		drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1962		drm_printf(p, "    size: %zu\n", gmu_debug->size);
1963		adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1964				&gmu_debug->encoded);
1965	}
1966
1967	drm_puts(p, "registers:\n");
1968	for (i = 0; i < a6xx_state->nr_registers; i++) {
1969		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1970
1971		if (!obj->handle)
1972			continue;
1973
1974		if (adreno_is_a7xx(adreno_gpu)) {
1975			a7xx_show_registers(obj->handle, obj->data, p);
1976		} else {
1977			const struct a6xx_registers *regs = obj->handle;
1978
1979			a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1980		}
1981	}
1982
1983	drm_puts(p, "registers-gmu:\n");
1984	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1985		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1986		const struct a6xx_registers *regs = obj->handle;
1987
1988		if (!obj->handle)
1989			continue;
1990
1991		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1992	}
1993
1994	drm_puts(p, "indexed-registers:\n");
1995	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1996		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1997
1998	drm_puts(p, "shader-blocks:\n");
1999	for (i = 0; i < a6xx_state->nr_shaders; i++) {
2000		if (adreno_is_a7xx(adreno_gpu))
2001			a7xx_show_shader(&a6xx_state->shaders[i], p);
2002		else
2003			a6xx_show_shader(&a6xx_state->shaders[i], p);
2004	}
2005
2006	drm_puts(p, "clusters:\n");
2007	for (i = 0; i < a6xx_state->nr_clusters; i++) {
2008		if (adreno_is_a7xx(adreno_gpu))
2009			a7xx_show_cluster(&a6xx_state->clusters[i], p);
2010		else
2011			a6xx_show_cluster(&a6xx_state->clusters[i], p);
2012	}
2013
2014	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) {
2015		if (adreno_is_a7xx(adreno_gpu))
2016			a7xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2017		else
2018			a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2019	}
2020
2021	drm_puts(p, "debugbus:\n");
2022	a6xx_show_debugbus(a6xx_state, p);
2023}
2024