1/* SPDX-License-Identifier: GPL-2.0 or MIT */
2/* Copyright 2023 Collabora ltd. */
3
4#ifndef __PANTHOR_MCU_H__
5#define __PANTHOR_MCU_H__
6
7#include <linux/types.h>
8
9struct panthor_device;
10struct panthor_kernel_bo;
11
12#define MAX_CSGS				31
13#define MAX_CS_PER_CSG                          32
14
15struct panthor_fw_ringbuf_input_iface {
16	u64 insert;
17	u64 extract;
18};
19
20struct panthor_fw_ringbuf_output_iface {
21	u64 extract;
22	u32 active;
23};
24
25struct panthor_fw_cs_control_iface {
26#define CS_FEATURES_WORK_REGS(x)		(((x) & GENMASK(7, 0)) + 1)
27#define CS_FEATURES_SCOREBOARDS(x)		(((x) & GENMASK(15, 8)) >> 8)
28#define CS_FEATURES_COMPUTE			BIT(16)
29#define CS_FEATURES_FRAGMENT			BIT(17)
30#define CS_FEATURES_TILER			BIT(18)
31	u32 features;
32	u32 input_va;
33	u32 output_va;
34};
35
36struct panthor_fw_cs_input_iface {
37#define CS_STATE_MASK				GENMASK(2, 0)
38#define CS_STATE_STOP				0
39#define CS_STATE_START				1
40#define CS_EXTRACT_EVENT			BIT(4)
41#define CS_IDLE_SYNC_WAIT			BIT(8)
42#define CS_IDLE_PROTM_PENDING			BIT(9)
43#define CS_IDLE_EMPTY				BIT(10)
44#define CS_IDLE_RESOURCE_REQ			BIT(11)
45#define CS_TILER_OOM				BIT(26)
46#define CS_PROTM_PENDING			BIT(27)
47#define CS_FATAL				BIT(30)
48#define CS_FAULT				BIT(31)
49#define CS_REQ_MASK				(CS_STATE_MASK | \
50						 CS_EXTRACT_EVENT | \
51						 CS_IDLE_SYNC_WAIT | \
52						 CS_IDLE_PROTM_PENDING | \
53						 CS_IDLE_EMPTY | \
54						 CS_IDLE_RESOURCE_REQ)
55#define CS_EVT_MASK				(CS_TILER_OOM | \
56						 CS_PROTM_PENDING | \
57						 CS_FATAL | \
58						 CS_FAULT)
59	u32 req;
60
61#define CS_CONFIG_PRIORITY(x)			((x) & GENMASK(3, 0))
62#define CS_CONFIG_DOORBELL(x)			(((x) << 8) & GENMASK(15, 8))
63	u32 config;
64	u32 reserved1;
65	u32 ack_irq_mask;
66	u64 ringbuf_base;
67	u32 ringbuf_size;
68	u32 reserved2;
69	u64 heap_start;
70	u64 heap_end;
71	u64 ringbuf_input;
72	u64 ringbuf_output;
73	u32 instr_config;
74	u32 instrbuf_size;
75	u64 instrbuf_base;
76	u64 instrbuf_offset_ptr;
77};
78
79struct panthor_fw_cs_output_iface {
80	u32 ack;
81	u32 reserved1[15];
82	u64 status_cmd_ptr;
83
84#define CS_STATUS_WAIT_SB_MASK			GENMASK(15, 0)
85#define CS_STATUS_WAIT_SB_SRC_MASK		GENMASK(19, 16)
86#define CS_STATUS_WAIT_SB_SRC_NONE		(0 << 16)
87#define CS_STATUS_WAIT_SB_SRC_WAIT		(8 << 16)
88#define CS_STATUS_WAIT_SYNC_COND_LE		(0 << 24)
89#define CS_STATUS_WAIT_SYNC_COND_GT		(1 << 24)
90#define CS_STATUS_WAIT_SYNC_COND_MASK		GENMASK(27, 24)
91#define CS_STATUS_WAIT_PROGRESS			BIT(28)
92#define CS_STATUS_WAIT_PROTM			BIT(29)
93#define CS_STATUS_WAIT_SYNC_64B			BIT(30)
94#define CS_STATUS_WAIT_SYNC			BIT(31)
95	u32 status_wait;
96	u32 status_req_resource;
97	u64 status_wait_sync_ptr;
98	u32 status_wait_sync_value;
99	u32 status_scoreboards;
100
101#define CS_STATUS_BLOCKED_REASON_UNBLOCKED	0
102#define CS_STATUS_BLOCKED_REASON_SB_WAIT	1
103#define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT	2
104#define CS_STATUS_BLOCKED_REASON_SYNC_WAIT	3
105#define CS_STATUS_BLOCKED_REASON_DEFERRED	5
106#define CS_STATUS_BLOCKED_REASON_RES		6
107#define CS_STATUS_BLOCKED_REASON_FLUSH		7
108#define CS_STATUS_BLOCKED_REASON_MASK		GENMASK(3, 0)
109	u32 status_blocked_reason;
110	u32 status_wait_sync_value_hi;
111	u32 reserved2[6];
112
113#define CS_EXCEPTION_TYPE(x)			((x) & GENMASK(7, 0))
114#define CS_EXCEPTION_DATA(x)			(((x) >> 8) & GENMASK(23, 0))
115	u32 fault;
116	u32 fatal;
117	u64 fault_info;
118	u64 fatal_info;
119	u32 reserved3[10];
120	u32 heap_vt_start;
121	u32 heap_vt_end;
122	u32 reserved4;
123	u32 heap_frag_end;
124	u64 heap_address;
125};
126
127struct panthor_fw_csg_control_iface {
128	u32 features;
129	u32 input_va;
130	u32 output_va;
131	u32 suspend_size;
132	u32 protm_suspend_size;
133	u32 stream_num;
134	u32 stream_stride;
135};
136
137struct panthor_fw_csg_input_iface {
138#define CSG_STATE_MASK				GENMASK(2, 0)
139#define CSG_STATE_TERMINATE			0
140#define CSG_STATE_START				1
141#define CSG_STATE_SUSPEND			2
142#define CSG_STATE_RESUME			3
143#define CSG_ENDPOINT_CONFIG			BIT(4)
144#define CSG_STATUS_UPDATE			BIT(5)
145#define CSG_SYNC_UPDATE				BIT(28)
146#define CSG_IDLE				BIT(29)
147#define CSG_DOORBELL				BIT(30)
148#define CSG_PROGRESS_TIMER_EVENT		BIT(31)
149#define CSG_REQ_MASK				(CSG_STATE_MASK | \
150						 CSG_ENDPOINT_CONFIG | \
151						 CSG_STATUS_UPDATE)
152#define CSG_EVT_MASK				(CSG_SYNC_UPDATE | \
153						 CSG_IDLE | \
154						 CSG_PROGRESS_TIMER_EVENT)
155	u32 req;
156	u32 ack_irq_mask;
157
158	u32 doorbell_req;
159	u32 cs_irq_ack;
160	u32 reserved1[4];
161	u64 allow_compute;
162	u64 allow_fragment;
163	u32 allow_other;
164
165#define CSG_EP_REQ_COMPUTE(x)			((x) & GENMASK(7, 0))
166#define CSG_EP_REQ_FRAGMENT(x)			(((x) << 8) & GENMASK(15, 8))
167#define CSG_EP_REQ_TILER(x)			(((x) << 16) & GENMASK(19, 16))
168#define CSG_EP_REQ_EXCL_COMPUTE			BIT(20)
169#define CSG_EP_REQ_EXCL_FRAGMENT		BIT(21)
170#define CSG_EP_REQ_PRIORITY(x)			(((x) << 28) & GENMASK(31, 28))
171#define CSG_EP_REQ_PRIORITY_MASK		GENMASK(31, 28)
172	u32 endpoint_req;
173	u32 reserved2[2];
174	u64 suspend_buf;
175	u64 protm_suspend_buf;
176	u32 config;
177	u32 iter_trace_config;
178};
179
180struct panthor_fw_csg_output_iface {
181	u32 ack;
182	u32 reserved1;
183	u32 doorbell_ack;
184	u32 cs_irq_req;
185	u32 status_endpoint_current;
186	u32 status_endpoint_req;
187
188#define CSG_STATUS_STATE_IS_IDLE		BIT(0)
189	u32 status_state;
190	u32 resource_dep;
191};
192
193struct panthor_fw_global_control_iface {
194	u32 version;
195	u32 features;
196	u32 input_va;
197	u32 output_va;
198	u32 group_num;
199	u32 group_stride;
200	u32 perfcnt_size;
201	u32 instr_features;
202};
203
204struct panthor_fw_global_input_iface {
205#define GLB_HALT				BIT(0)
206#define GLB_CFG_PROGRESS_TIMER			BIT(1)
207#define GLB_CFG_ALLOC_EN			BIT(2)
208#define GLB_CFG_POWEROFF_TIMER			BIT(3)
209#define GLB_PROTM_ENTER				BIT(4)
210#define GLB_PERFCNT_EN				BIT(5)
211#define GLB_PERFCNT_SAMPLE			BIT(6)
212#define GLB_COUNTER_EN				BIT(7)
213#define GLB_PING				BIT(8)
214#define GLB_FWCFG_UPDATE			BIT(9)
215#define GLB_IDLE_EN				BIT(10)
216#define GLB_SLEEP				BIT(12)
217#define GLB_INACTIVE_COMPUTE			BIT(20)
218#define GLB_INACTIVE_FRAGMENT			BIT(21)
219#define GLB_INACTIVE_TILER			BIT(22)
220#define GLB_PROTM_EXIT				BIT(23)
221#define GLB_PERFCNT_THRESHOLD			BIT(24)
222#define GLB_PERFCNT_OVERFLOW			BIT(25)
223#define GLB_IDLE				BIT(26)
224#define GLB_DBG_CSF				BIT(30)
225#define GLB_DBG_HOST				BIT(31)
226#define GLB_REQ_MASK				GENMASK(10, 0)
227#define GLB_EVT_MASK				GENMASK(26, 20)
228	u32 req;
229	u32 ack_irq_mask;
230	u32 doorbell_req;
231	u32 reserved1;
232	u32 progress_timer;
233
234#define GLB_TIMER_VAL(x)			((x) & GENMASK(30, 0))
235#define GLB_TIMER_SOURCE_GPU_COUNTER		BIT(31)
236	u32 poweroff_timer;
237	u64 core_en_mask;
238	u32 reserved2;
239	u32 perfcnt_as;
240	u64 perfcnt_base;
241	u32 perfcnt_extract;
242	u32 reserved3[3];
243	u32 perfcnt_config;
244	u32 perfcnt_csg_select;
245	u32 perfcnt_fw_enable;
246	u32 perfcnt_csg_enable;
247	u32 perfcnt_csf_enable;
248	u32 perfcnt_shader_enable;
249	u32 perfcnt_tiler_enable;
250	u32 perfcnt_mmu_l2_enable;
251	u32 reserved4[8];
252	u32 idle_timer;
253};
254
255enum panthor_fw_halt_status {
256	PANTHOR_FW_HALT_OK = 0,
257	PANTHOR_FW_HALT_ON_PANIC = 0x4e,
258	PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f,
259};
260
261struct panthor_fw_global_output_iface {
262	u32 ack;
263	u32 reserved1;
264	u32 doorbell_ack;
265	u32 reserved2;
266	u32 halt_status;
267	u32 perfcnt_status;
268	u32 perfcnt_insert;
269};
270
271/**
272 * struct panthor_fw_cs_iface - Firmware command stream slot interface
273 */
274struct panthor_fw_cs_iface {
275	/**
276	 * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req
277	 * field.
278	 *
279	 * Needed so we can update the req field concurrently from the interrupt
280	 * handler and the scheduler logic.
281	 *
282	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
283	 * interface sections are mapped uncached/write-combined right now, and
284	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
285	 * we have 'SHARED' GPU mappings hooked up.
286	 */
287	spinlock_t lock;
288
289	/**
290	 * @control: Command stream slot control interface.
291	 *
292	 * Used to expose command stream slot properties.
293	 *
294	 * This interface is read-only.
295	 */
296	struct panthor_fw_cs_control_iface *control;
297
298	/**
299	 * @input: Command stream slot input interface.
300	 *
301	 * Used for host updates/events.
302	 */
303	struct panthor_fw_cs_input_iface *input;
304
305	/**
306	 * @output: Command stream slot output interface.
307	 *
308	 * Used for FW updates/events.
309	 *
310	 * This interface is read-only.
311	 */
312	const struct panthor_fw_cs_output_iface *output;
313};
314
315/**
316 * struct panthor_fw_csg_iface - Firmware command stream group slot interface
317 */
318struct panthor_fw_csg_iface {
319	/**
320	 * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req
321	 * field.
322	 *
323	 * Needed so we can update the req field concurrently from the interrupt
324	 * handler and the scheduler logic.
325	 *
326	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
327	 * interface sections are mapped uncached/write-combined right now, and
328	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
329	 * we have 'SHARED' GPU mappings hooked up.
330	 */
331	spinlock_t lock;
332
333	/**
334	 * @control: Command stream group slot control interface.
335	 *
336	 * Used to expose command stream group slot properties.
337	 *
338	 * This interface is read-only.
339	 */
340	const struct panthor_fw_csg_control_iface *control;
341
342	/**
343	 * @input: Command stream slot input interface.
344	 *
345	 * Used for host updates/events.
346	 */
347	struct panthor_fw_csg_input_iface *input;
348
349	/**
350	 * @output: Command stream group slot output interface.
351	 *
352	 * Used for FW updates/events.
353	 *
354	 * This interface is read-only.
355	 */
356	const struct panthor_fw_csg_output_iface *output;
357};
358
359/**
360 * struct panthor_fw_global_iface - Firmware global interface
361 */
362struct panthor_fw_global_iface {
363	/**
364	 * @lock: Lock protecting access to the panthor_fw_global_input_iface::req
365	 * field.
366	 *
367	 * Needed so we can update the req field concurrently from the interrupt
368	 * handler and the scheduler/FW management logic.
369	 *
370	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
371	 * interface sections are mapped uncached/write-combined right now, and
372	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
373	 * we have 'SHARED' GPU mappings hooked up.
374	 */
375	spinlock_t lock;
376
377	/**
378	 * @control: Command stream group slot control interface.
379	 *
380	 * Used to expose global FW properties.
381	 *
382	 * This interface is read-only.
383	 */
384	const struct panthor_fw_global_control_iface *control;
385
386	/**
387	 * @input: Global input interface.
388	 *
389	 * Used for host updates/events.
390	 */
391	struct panthor_fw_global_input_iface *input;
392
393	/**
394	 * @output: Global output interface.
395	 *
396	 * Used for FW updates/events.
397	 *
398	 * This interface is read-only.
399	 */
400	const struct panthor_fw_global_output_iface *output;
401};
402
403/**
404 * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to the FW
405 * @__iface: The interface to operate on.
406 * @__in_reg: Name of the register to update in the input section of the interface.
407 * @__out_reg: Name of the register to take as a reference in the output section of the
408 * interface.
409 * @__mask: Mask to apply to the update.
410 *
411 * The Host -> FW event/message passing was designed to be lockless, with each side of
412 * the channel having its writeable section. Events are signaled as a difference between
413 * the host and FW side in the req/ack registers (when a bit differs, there's an event
414 * pending, when they are the same, nothing needs attention).
415 *
416 * This helper allows one to update the req register based on the current value of the
417 * ack register managed by the FW. Toggling a specific bit will flag an event. In order
418 * for events to be re-evaluated, the interface doorbell needs to be rung.
419 *
420 * Concurrent accesses to the same req register is covered.
421 *
422 * Anything requiring atomic updates to multiple registers requires a dedicated lock.
423 */
424#define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \
425	do { \
426		u32 __cur_val, __new_val, __out_val; \
427		spin_lock(&(__iface)->lock); \
428		__cur_val = READ_ONCE((__iface)->input->__in_reg); \
429		__out_val = READ_ONCE((__iface)->output->__out_reg); \
430		__new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & ~(__mask)); \
431		WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
432		spin_unlock(&(__iface)->lock); \
433	} while (0)
434
435/**
436 * panthor_fw_update_reqs() - Update bits to reflect a configuration change
437 * @__iface: The interface to operate on.
438 * @__in_reg: Name of the register to update in the input section of the interface.
439 * @__val: Value to set.
440 * @__mask: Mask to apply to the update.
441 *
442 * Some configuration get passed through req registers that are also used to
443 * send events to the FW. Those req registers being updated from the interrupt
444 * handler, they require special helpers to update the configuration part as well.
445 *
446 * Concurrent accesses to the same req register is covered.
447 *
448 * Anything requiring atomic updates to multiple registers requires a dedicated lock.
449 */
450#define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \
451	do { \
452		u32 __cur_val, __new_val; \
453		spin_lock(&(__iface)->lock); \
454		__cur_val = READ_ONCE((__iface)->input->__in_reg); \
455		__new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \
456		WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
457		spin_unlock(&(__iface)->lock); \
458	} while (0)
459
460struct panthor_fw_global_iface *
461panthor_fw_get_glb_iface(struct panthor_device *ptdev);
462
463struct panthor_fw_csg_iface *
464panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot);
465
466struct panthor_fw_cs_iface *
467panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot);
468
469int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask,
470			     u32 *acked, u32 timeout_ms);
471
472int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 *acked,
473			     u32 timeout_ms);
474
475void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_slot);
476
477struct panthor_kernel_bo *
478panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
479				 struct panthor_fw_ringbuf_input_iface **input,
480				 const struct panthor_fw_ringbuf_output_iface **output,
481				 u32 *input_fw_va, u32 *output_fw_va);
482struct panthor_kernel_bo *
483panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size);
484
485struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev);
486
487void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang);
488int panthor_fw_post_reset(struct panthor_device *ptdev);
489
490static inline void panthor_fw_suspend(struct panthor_device *ptdev)
491{
492	panthor_fw_pre_reset(ptdev, false);
493}
494
495static inline int panthor_fw_resume(struct panthor_device *ptdev)
496{
497	return panthor_fw_post_reset(ptdev);
498}
499
500int panthor_fw_init(struct panthor_device *ptdev);
501void panthor_fw_unplug(struct panthor_device *ptdev);
502
503#endif
504