1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2013 Red Hat
4 * Author: Rob Clark <robdclark@gmail.com>
5 *
6 * Copyright (c) 2014 The Linux Foundation. All rights reserved.
7 */
8
9#include "a3xx_gpu.h"
10
11#define A3XX_INT0_MASK \
12	(A3XX_INT0_RBBM_AHB_ERROR |        \
13	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
14	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
15	 A3XX_INT0_CP_OPCODE_ERROR |       \
16	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
17	 A3XX_INT0_CP_HW_FAULT |           \
18	 A3XX_INT0_CP_IB1_INT |            \
19	 A3XX_INT0_CP_IB2_INT |            \
20	 A3XX_INT0_CP_RB_INT |             \
21	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
22	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
23	 A3XX_INT0_CACHE_FLUSH_TS |        \
24	 A3XX_INT0_UCHE_OOB_ACCESS)
25
26extern bool hang_debug;
27
28static void a3xx_dump(struct msm_gpu *gpu);
29static bool a3xx_idle(struct msm_gpu *gpu);
30
31static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
32{
33	struct msm_ringbuffer *ring = submit->ring;
34	unsigned int i;
35
36	for (i = 0; i < submit->nr_cmds; i++) {
37		switch (submit->cmd[i].type) {
38		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
39			/* ignore IB-targets */
40			break;
41		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
42			/* ignore if there has not been a ctx switch: */
43			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
44				break;
45			fallthrough;
46		case MSM_SUBMIT_CMD_BUF:
47			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
48			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
49			OUT_RING(ring, submit->cmd[i].size);
50			OUT_PKT2(ring);
51			break;
52		}
53	}
54
55	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
56	OUT_RING(ring, submit->seqno);
57
58	/* Flush HLSQ lazy updates to make sure there is nothing
59	 * pending for indirect loads after the timestamp has
60	 * passed:
61	 */
62	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
63	OUT_RING(ring, HLSQ_FLUSH);
64
65	/* wait for idle before cache flush/interrupt */
66	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
67	OUT_RING(ring, 0x00000000);
68
69	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
70	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
71	OUT_RING(ring, CACHE_FLUSH_TS | CP_EVENT_WRITE_0_IRQ);
72	OUT_RING(ring, rbmemptr(ring, fence));
73	OUT_RING(ring, submit->seqno);
74
75#if 0
76	/* Dummy set-constant to trigger context rollover */
77	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
78	OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
79	OUT_RING(ring, 0x00000000);
80#endif
81
82	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
83}
84
85static bool a3xx_me_init(struct msm_gpu *gpu)
86{
87	struct msm_ringbuffer *ring = gpu->rb[0];
88
89	OUT_PKT3(ring, CP_ME_INIT, 17);
90	OUT_RING(ring, 0x000003f7);
91	OUT_RING(ring, 0x00000000);
92	OUT_RING(ring, 0x00000000);
93	OUT_RING(ring, 0x00000000);
94	OUT_RING(ring, 0x00000080);
95	OUT_RING(ring, 0x00000100);
96	OUT_RING(ring, 0x00000180);
97	OUT_RING(ring, 0x00006600);
98	OUT_RING(ring, 0x00000150);
99	OUT_RING(ring, 0x0000014e);
100	OUT_RING(ring, 0x00000154);
101	OUT_RING(ring, 0x00000001);
102	OUT_RING(ring, 0x00000000);
103	OUT_RING(ring, 0x00000000);
104	OUT_RING(ring, 0x00000000);
105	OUT_RING(ring, 0x00000000);
106	OUT_RING(ring, 0x00000000);
107
108	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
109	return a3xx_idle(gpu);
110}
111
112static int a3xx_hw_init(struct msm_gpu *gpu)
113{
114	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
115	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
116	uint32_t *ptr, len;
117	int i, ret;
118
119	DBG("%s", gpu->name);
120
121	if (adreno_is_a305(adreno_gpu)) {
122		/* Set up 16 deep read/write request queues: */
123		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
124		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
125		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
126		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
127		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
128		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
129		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
130		/* Enable WR-REQ: */
131		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
132		/* Set up round robin arbitration between both AXI ports: */
133		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
134		/* Set up AOOO: */
135		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
136		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
137	} else if (adreno_is_a305b(adreno_gpu)) {
138		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x00181818);
139		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x00181818);
140		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000018);
141		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000018);
142		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303);
143		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
144	} else if (adreno_is_a306(adreno_gpu)) {
145		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
146		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
147		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
148	} else if (adreno_is_a320(adreno_gpu)) {
149		/* Set up 16 deep read/write request queues: */
150		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
151		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
152		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
153		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
154		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
155		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
156		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
157		/* Enable WR-REQ: */
158		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
159		/* Set up round robin arbitration between both AXI ports: */
160		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
161		/* Set up AOOO: */
162		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
163		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
164		/* Enable 1K sort: */
165		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
166		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
167
168	} else if (adreno_is_a330v2(adreno_gpu)) {
169		/*
170		 * Most of the VBIF registers on 8974v2 have the correct
171		 * values at power on, so we won't modify those if we don't
172		 * need to
173		 */
174		/* Enable 1k sort: */
175		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
176		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
177		/* Enable WR-REQ: */
178		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
179		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
180		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
181		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
182
183	} else if (adreno_is_a330(adreno_gpu)) {
184		/* Set up 16 deep read/write request queues: */
185		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
186		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
187		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
188		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
189		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
190		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
191		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
192		/* Enable WR-REQ: */
193		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
194		/* Set up round robin arbitration between both AXI ports: */
195		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
196		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
197		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
198		/* Set up AOOO: */
199		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
200		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
201		/* Enable 1K sort: */
202		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
203		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
204		/* Disable VBIF clock gating. This is to enable AXI running
205		 * higher frequency than GPU:
206		 */
207		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
208
209	} else {
210		BUG();
211	}
212
213	/* Make all blocks contribute to the GPU BUSY perf counter: */
214	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
215
216	/* Tune the hystersis counters for SP and CP idle detection: */
217	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
218	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
219
220	/* Enable the RBBM error reporting bits.  This lets us get
221	 * useful information on failure:
222	 */
223	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
224
225	/* Enable AHB error reporting: */
226	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
227
228	/* Turn on the power counters: */
229	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
230
231	/* Turn on hang detection - this spews a lot of useful information
232	 * into the RBBM registers on a hang:
233	 */
234	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
235
236	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
237	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
238
239	/* Enable Clock gating: */
240	if (adreno_is_a305b(adreno_gpu) || adreno_is_a306(adreno_gpu))
241		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
242	else if (adreno_is_a320(adreno_gpu))
243		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
244	else if (adreno_is_a330v2(adreno_gpu))
245		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
246	else if (adreno_is_a330(adreno_gpu))
247		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
248
249	if (adreno_is_a330v2(adreno_gpu))
250		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
251	else if (adreno_is_a330(adreno_gpu))
252		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
253
254	/* Set the OCMEM base address for A330, etc */
255	if (a3xx_gpu->ocmem.hdl) {
256		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
257			(unsigned int)(a3xx_gpu->ocmem.base >> 14));
258	}
259
260	/* Turn on performance counters: */
261	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
262
263	/* Enable the perfcntrs that we use.. */
264	for (i = 0; i < gpu->num_perfcntrs; i++) {
265		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
266		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
267	}
268
269	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
270
271	ret = adreno_hw_init(gpu);
272	if (ret)
273		return ret;
274
275	/*
276	 * Use the default ringbuffer size and block size but disable the RPTR
277	 * shadow
278	 */
279	gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
280		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
281
282	/* Set the ringbuffer address */
283	gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
284
285	/* setup access protection: */
286	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
287
288	/* RBBM registers */
289	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
290	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
291	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
292	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
293	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
294	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
295
296	/* CP registers */
297	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
298	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
299	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
300	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
301	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
302
303	/* RB registers */
304	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
305
306	/* VBIF registers */
307	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
308
309	/* NOTE: PM4/micro-engine firmware registers look to be the same
310	 * for a2xx and a3xx.. we could possibly push that part down to
311	 * adreno_gpu base class.  Or push both PM4 and PFP but
312	 * parameterize the pfp ucode addr/data registers..
313	 */
314
315	/* Load PM4: */
316	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
317	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
318	DBG("loading PM4 ucode version: %x", ptr[1]);
319
320	gpu_write(gpu, REG_AXXX_CP_DEBUG,
321			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
322			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
323	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
324	for (i = 1; i < len; i++)
325		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
326
327	/* Load PFP: */
328	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
329	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
330	DBG("loading PFP ucode version: %x", ptr[5]);
331
332	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
333	for (i = 1; i < len; i++)
334		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
335
336	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
337	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
338			adreno_is_a320(adreno_gpu)) {
339		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
340				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
341				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
342				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
343	} else if (adreno_is_a330(adreno_gpu) || adreno_is_a305b(adreno_gpu)) {
344		/* NOTE: this (value take from downstream android driver)
345		 * includes some bits outside of the known bitfields.  But
346		 * A330 has this "MERCIU queue" thing too, which might
347		 * explain a new bitfield or reshuffling:
348		 */
349		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
350	}
351
352	/* clear ME_HALT to start micro engine */
353	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
354
355	return a3xx_me_init(gpu) ? 0 : -EINVAL;
356}
357
358static void a3xx_recover(struct msm_gpu *gpu)
359{
360	int i;
361
362	adreno_dump_info(gpu);
363
364	for (i = 0; i < 8; i++) {
365		printk("CP_SCRATCH_REG%d: %u\n", i,
366			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
367	}
368
369	/* dump registers before resetting gpu, if enabled: */
370	if (hang_debug)
371		a3xx_dump(gpu);
372
373	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
374	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
375	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
376	adreno_recover(gpu);
377}
378
379static void a3xx_destroy(struct msm_gpu *gpu)
380{
381	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
382	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
383
384	DBG("%s", gpu->name);
385
386	adreno_gpu_cleanup(adreno_gpu);
387
388	adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
389
390	kfree(a3xx_gpu);
391}
392
393static bool a3xx_idle(struct msm_gpu *gpu)
394{
395	/* wait for ringbuffer to drain: */
396	if (!adreno_idle(gpu, gpu->rb[0]))
397		return false;
398
399	/* then wait for GPU to finish: */
400	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
401			A3XX_RBBM_STATUS_GPU_BUSY))) {
402		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
403
404		/* TODO maybe we need to reset GPU here to recover from hang? */
405		return false;
406	}
407
408	return true;
409}
410
411static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
412{
413	uint32_t status;
414
415	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
416	DBG("%s: %08x", gpu->name, status);
417
418	// TODO
419
420	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
421
422	msm_gpu_retire(gpu);
423
424	return IRQ_HANDLED;
425}
426
427static const unsigned int a3xx_registers[] = {
428	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
429	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
430	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
431	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
432	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
433	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
434	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
435	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
436	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
437	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
438	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
439	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
440	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
441	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
442	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
443	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
444	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
445	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
446	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
447	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
448	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
449	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
450	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
451	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
452	0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
453	0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
454	0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
455	0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
456	0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
457	0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
458	0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
459	0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
460	0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
461	0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
462	~0   /* sentinel */
463};
464
465/* would be nice to not have to duplicate the _show() stuff with printk(): */
466static void a3xx_dump(struct msm_gpu *gpu)
467{
468	printk("status:   %08x\n",
469			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
470	adreno_dump(gpu);
471}
472
473static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
474{
475	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
476
477	if (!state)
478		return ERR_PTR(-ENOMEM);
479
480	adreno_gpu_state_get(gpu, state);
481
482	state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
483
484	return state;
485}
486
487static u64 a3xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
488{
489	u64 busy_cycles;
490
491	busy_cycles = gpu_read64(gpu, REG_A3XX_RBBM_PERFCTR_RBBM_1_LO);
492	*out_sample_rate = clk_get_rate(gpu->core_clk);
493
494	return busy_cycles;
495}
496
497static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
498{
499	ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR);
500	return ring->memptrs->rptr;
501}
502
503static const struct adreno_gpu_funcs funcs = {
504	.base = {
505		.get_param = adreno_get_param,
506		.set_param = adreno_set_param,
507		.hw_init = a3xx_hw_init,
508		.pm_suspend = msm_gpu_pm_suspend,
509		.pm_resume = msm_gpu_pm_resume,
510		.recover = a3xx_recover,
511		.submit = a3xx_submit,
512		.active_ring = adreno_active_ring,
513		.irq = a3xx_irq,
514		.destroy = a3xx_destroy,
515#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
516		.show = adreno_show,
517#endif
518		.gpu_busy = a3xx_gpu_busy,
519		.gpu_state_get = a3xx_gpu_state_get,
520		.gpu_state_put = adreno_gpu_state_put,
521		.create_address_space = adreno_create_address_space,
522		.get_rptr = a3xx_get_rptr,
523	},
524};
525
526static const struct msm_gpu_perfcntr perfcntrs[] = {
527	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
528			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
529	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
530			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
531};
532
533struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
534{
535	struct a3xx_gpu *a3xx_gpu = NULL;
536	struct adreno_gpu *adreno_gpu;
537	struct msm_gpu *gpu;
538	struct msm_drm_private *priv = dev->dev_private;
539	struct platform_device *pdev = priv->gpu_pdev;
540	struct icc_path *ocmem_icc_path;
541	struct icc_path *icc_path;
542	int ret;
543
544	if (!pdev) {
545		DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
546		ret = -ENXIO;
547		goto fail;
548	}
549
550	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
551	if (!a3xx_gpu) {
552		ret = -ENOMEM;
553		goto fail;
554	}
555
556	adreno_gpu = &a3xx_gpu->base;
557	gpu = &adreno_gpu->base;
558
559	gpu->perfcntrs = perfcntrs;
560	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
561
562	adreno_gpu->registers = a3xx_registers;
563
564	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
565	if (ret)
566		goto fail;
567
568	/* if needed, allocate gmem: */
569	if (adreno_is_a330(adreno_gpu) || adreno_is_a305b(adreno_gpu)) {
570		ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
571					    adreno_gpu, &a3xx_gpu->ocmem);
572		if (ret)
573			goto fail;
574	}
575
576	if (!gpu->aspace) {
577		/* TODO we think it is possible to configure the GPU to
578		 * restrict access to VRAM carveout.  But the required
579		 * registers are unknown.  For now just bail out and
580		 * limp along with just modesetting.  If it turns out
581		 * to not be possible to restrict access, then we must
582		 * implement a cmdstream validator.
583		 */
584		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
585		if (!allow_vram_carveout) {
586			ret = -ENXIO;
587			goto fail;
588		}
589	}
590
591	icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
592	if (IS_ERR(icc_path)) {
593		ret = PTR_ERR(icc_path);
594		goto fail;
595	}
596
597	ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
598	if (IS_ERR(ocmem_icc_path)) {
599		ret = PTR_ERR(ocmem_icc_path);
600		/* allow -ENODATA, ocmem icc is optional */
601		if (ret != -ENODATA)
602			goto fail;
603		ocmem_icc_path = NULL;
604	}
605
606
607	/*
608	 * Set the ICC path to maximum speed for now by multiplying the fastest
609	 * frequency by the bus width (8). We'll want to scale this later on to
610	 * improve battery life.
611	 */
612	icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
613	icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
614
615	return gpu;
616
617fail:
618	if (a3xx_gpu)
619		a3xx_destroy(&a3xx_gpu->base.base);
620
621	return ERR_PTR(ret);
622}
623