1// SPDX-License-Identifier: GPL-2.0 or MIT
2/* Copyright 2023 Collabora ltd. */
3
4#ifdef CONFIG_ARM_ARCH_TIMER
5#include <asm/arch_timer.h>
6#endif
7
8#include <linux/clk.h>
9#include <linux/dma-mapping.h>
10#include <linux/firmware.h>
11#include <linux/iopoll.h>
12#include <linux/iosys-map.h>
13#include <linux/mutex.h>
14#include <linux/platform_device.h>
15
16#include <drm/drm_drv.h>
17#include <drm/drm_managed.h>
18
19#include "panthor_device.h"
20#include "panthor_fw.h"
21#include "panthor_gem.h"
22#include "panthor_gpu.h"
23#include "panthor_mmu.h"
24#include "panthor_regs.h"
25#include "panthor_sched.h"
26
27#define CSF_FW_NAME "mali_csffw.bin"
28
29#define PING_INTERVAL_MS			12000
30#define PROGRESS_TIMEOUT_CYCLES			(5ull * 500 * 1024 * 1024)
31#define PROGRESS_TIMEOUT_SCALE_SHIFT		10
32#define IDLE_HYSTERESIS_US			800
33#define PWROFF_HYSTERESIS_US			10000
34
35/**
36 * struct panthor_fw_binary_hdr - Firmware binary header.
37 */
38struct panthor_fw_binary_hdr {
39	/** @magic: Magic value to check binary validity. */
40	u32 magic;
41#define CSF_FW_BINARY_HEADER_MAGIC		0xc3f13a6e
42
43	/** @minor: Minor FW version. */
44	u8 minor;
45
46	/** @major: Major FW version. */
47	u8 major;
48#define CSF_FW_BINARY_HEADER_MAJOR_MAX		0
49
50	/** @padding1: MBZ. */
51	u16 padding1;
52
53	/** @version_hash: FW version hash. */
54	u32 version_hash;
55
56	/** @padding2: MBZ. */
57	u32 padding2;
58
59	/** @size: FW binary size. */
60	u32 size;
61};
62
63/**
64 * enum panthor_fw_binary_entry_type - Firmware binary entry type
65 */
66enum panthor_fw_binary_entry_type {
67	/** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
68	CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
69
70	/** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
71	CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
72
73	/** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
74	CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
75
76	/** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
77	CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
78
79	/** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
80	CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
81};
82
83#define CSF_FW_BINARY_ENTRY_TYPE(ehdr)					((ehdr) & 0xff)
84#define CSF_FW_BINARY_ENTRY_SIZE(ehdr)					(((ehdr) >> 8) & 0xff)
85#define CSF_FW_BINARY_ENTRY_UPDATE					BIT(30)
86#define CSF_FW_BINARY_ENTRY_OPTIONAL					BIT(31)
87
88#define CSF_FW_BINARY_IFACE_ENTRY_RD_RD					BIT(0)
89#define CSF_FW_BINARY_IFACE_ENTRY_RD_WR					BIT(1)
90#define CSF_FW_BINARY_IFACE_ENTRY_RD_EX					BIT(2)
91#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE			(0 << 3)
92#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED			(1 << 3)
93#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT	(2 << 3)
94#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT		(3 << 3)
95#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK			GENMASK(4, 3)
96#define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT				BIT(5)
97#define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED				BIT(30)
98#define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO				BIT(31)
99
100#define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS			\
101	(CSF_FW_BINARY_IFACE_ENTRY_RD_RD |				\
102	 CSF_FW_BINARY_IFACE_ENTRY_RD_WR |				\
103	 CSF_FW_BINARY_IFACE_ENTRY_RD_EX |				\
104	 CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK |			\
105	 CSF_FW_BINARY_IFACE_ENTRY_RD_PROT |				\
106	 CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED  |				\
107	 CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)
108
109/**
110 * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
111 */
112struct panthor_fw_binary_section_entry_hdr {
113	/** @flags: Section flags. */
114	u32 flags;
115
116	/** @va: MCU virtual range to map this binary section to. */
117	struct {
118		/** @start: Start address. */
119		u32 start;
120
121		/** @end: End address. */
122		u32 end;
123	} va;
124
125	/** @data: Data to initialize the FW section with. */
126	struct {
127		/** @start: Start offset in the FW binary. */
128		u32 start;
129
130		/** @end: End offset in the FW binary. */
131		u32 end;
132	} data;
133};
134
135/**
136 * struct panthor_fw_binary_iter - Firmware binary iterator
137 *
138 * Used to parse a firmware binary.
139 */
140struct panthor_fw_binary_iter {
141	/** @data: FW binary data. */
142	const void *data;
143
144	/** @size: FW binary size. */
145	size_t size;
146
147	/** @offset: Iterator offset. */
148	size_t offset;
149};
150
151/**
152 * struct panthor_fw_section - FW section
153 */
154struct panthor_fw_section {
155	/** @node: Used to keep track of FW sections. */
156	struct list_head node;
157
158	/** @flags: Section flags, as encoded in the FW binary. */
159	u32 flags;
160
161	/** @mem: Section memory. */
162	struct panthor_kernel_bo *mem;
163
164	/**
165	 * @name: Name of the section, as specified in the binary.
166	 *
167	 * Can be NULL.
168	 */
169	const char *name;
170
171	/**
172	 * @data: Initial data copied to the FW memory.
173	 *
174	 * We keep data around so we can reload sections after a reset.
175	 */
176	struct {
177		/** @buf: Buffed used to store init data. */
178		const void *buf;
179
180		/** @size: Size of @buf in bytes. */
181		size_t size;
182	} data;
183};
184
185#define CSF_MCU_SHARED_REGION_START		0x04000000ULL
186#define CSF_MCU_SHARED_REGION_SIZE		0x04000000ULL
187
188#define MIN_CS_PER_CSG				8
189#define MIN_CSGS				3
190#define MAX_CSG_PRIO				0xf
191
192#define CSF_IFACE_VERSION(major, minor, patch)	\
193	(((major) << 24) | ((minor) << 16) | (patch))
194#define CSF_IFACE_VERSION_MAJOR(v)		((v) >> 24)
195#define CSF_IFACE_VERSION_MINOR(v)		(((v) >> 16) & 0xff)
196#define CSF_IFACE_VERSION_PATCH(v)		((v) & 0xffff)
197
198#define CSF_GROUP_CONTROL_OFFSET		0x1000
199#define CSF_STREAM_CONTROL_OFFSET		0x40
200#define CSF_UNPRESERVED_REG_COUNT		4
201
202/**
203 * struct panthor_fw_iface - FW interfaces
204 */
205struct panthor_fw_iface {
206	/** @global: Global interface. */
207	struct panthor_fw_global_iface global;
208
209	/** @groups: Group slot interfaces. */
210	struct panthor_fw_csg_iface groups[MAX_CSGS];
211
212	/** @streams: Command stream slot interfaces. */
213	struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
214};
215
216/**
217 * struct panthor_fw - Firmware management
218 */
219struct panthor_fw {
220	/** @vm: MCU VM. */
221	struct panthor_vm *vm;
222
223	/** @sections: List of FW sections. */
224	struct list_head sections;
225
226	/** @shared_section: The section containing the FW interfaces. */
227	struct panthor_fw_section *shared_section;
228
229	/** @iface: FW interfaces. */
230	struct panthor_fw_iface iface;
231
232	/** @watchdog: Collection of fields relating to the FW watchdog. */
233	struct {
234		/** @ping_work: Delayed work used to ping the FW. */
235		struct delayed_work ping_work;
236	} watchdog;
237
238	/**
239	 * @req_waitqueue: FW request waitqueue.
240	 *
241	 * Everytime a request is sent to a command stream group or the global
242	 * interface, the caller will first busy wait for the request to be
243	 * acknowledged, and then fallback to a sleeping wait.
244	 *
245	 * This wait queue is here to support the sleeping wait flavor.
246	 */
247	wait_queue_head_t req_waitqueue;
248
249	/** @booted: True is the FW is booted */
250	bool booted;
251
252	/**
253	 * @fast_reset: True if the post_reset logic can proceed with a fast reset.
254	 *
255	 * A fast reset is just a reset where the driver doesn't reload the FW sections.
256	 *
257	 * Any time the firmware is properly suspended, a fast reset can take place.
258	 * On the other hand, if the halt operation failed, the driver will reload
259	 * all sections to make sure we start from a fresh state.
260	 */
261	bool fast_reset;
262
263	/** @irq: Job irq data. */
264	struct panthor_irq irq;
265};
266
267struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
268{
269	return ptdev->fw->vm;
270}
271
272/**
273 * panthor_fw_get_glb_iface() - Get the global interface
274 * @ptdev: Device.
275 *
276 * Return: The global interface.
277 */
278struct panthor_fw_global_iface *
279panthor_fw_get_glb_iface(struct panthor_device *ptdev)
280{
281	return &ptdev->fw->iface.global;
282}
283
284/**
285 * panthor_fw_get_csg_iface() - Get a command stream group slot interface
286 * @ptdev: Device.
287 * @csg_slot: Index of the command stream group slot.
288 *
289 * Return: The command stream group slot interface.
290 */
291struct panthor_fw_csg_iface *
292panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
293{
294	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
295		return NULL;
296
297	return &ptdev->fw->iface.groups[csg_slot];
298}
299
300/**
301 * panthor_fw_get_cs_iface() - Get a command stream slot interface
302 * @ptdev: Device.
303 * @csg_slot: Index of the command stream group slot.
304 * @cs_slot: Index of the command stream slot.
305 *
306 * Return: The command stream slot interface.
307 */
308struct panthor_fw_cs_iface *
309panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
310{
311	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG))
312		return NULL;
313
314	return &ptdev->fw->iface.streams[csg_slot][cs_slot];
315}
316
317/**
318 * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
319 * @ptdev: Device.
320 * @timeout_us: Timeout expressed in micro-seconds.
321 *
322 * The FW has two timer sources: the GPU counter or arch-timer. We need
323 * to express timeouts in term of number of cycles and specify which
324 * timer source should be used.
325 *
326 * Return: A value suitable for timeout fields in the global interface.
327 */
328static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
329{
330	bool use_cycle_counter = false;
331	u32 timer_rate = 0;
332	u64 mod_cycles;
333
334#ifdef CONFIG_ARM_ARCH_TIMER
335	timer_rate = arch_timer_get_cntfrq();
336#endif
337
338	if (!timer_rate) {
339		use_cycle_counter = true;
340		timer_rate = clk_get_rate(ptdev->clks.core);
341	}
342
343	if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
344		/* We couldn't get a valid clock rate, let's just pick the
345		 * maximum value so the FW still handles the core
346		 * power on/off requests.
347		 */
348		return GLB_TIMER_VAL(~0) |
349		       GLB_TIMER_SOURCE_GPU_COUNTER;
350	}
351
352	mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
353				      1000000ull << 10);
354	if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
355		mod_cycles = GLB_TIMER_VAL(~0);
356
357	return GLB_TIMER_VAL(mod_cycles) |
358	       (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
359}
360
361static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
362				       struct panthor_fw_binary_iter *iter,
363				       void *out, size_t size)
364{
365	size_t new_offset = iter->offset + size;
366
367	if (new_offset > iter->size || new_offset < iter->offset) {
368		drm_err(&ptdev->base, "Firmware too small\n");
369		return -EINVAL;
370	}
371
372	memcpy(out, iter->data + iter->offset, size);
373	iter->offset = new_offset;
374	return 0;
375}
376
377static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
378					   struct panthor_fw_binary_iter *iter,
379					   struct panthor_fw_binary_iter *sub_iter,
380					   size_t size)
381{
382	size_t new_offset = iter->offset + size;
383
384	if (new_offset > iter->size || new_offset < iter->offset) {
385		drm_err(&ptdev->base, "Firmware entry too long\n");
386		return -EINVAL;
387	}
388
389	sub_iter->offset = 0;
390	sub_iter->data = iter->data + iter->offset;
391	sub_iter->size = size;
392	iter->offset = new_offset;
393	return 0;
394}
395
396static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
397					struct panthor_fw_section *section)
398{
399	bool was_mapped = !!section->mem->kmap;
400	int ret;
401
402	if (!section->data.size &&
403	    !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO))
404		return;
405
406	ret = panthor_kernel_bo_vmap(section->mem);
407	if (drm_WARN_ON(&ptdev->base, ret))
408		return;
409
410	memcpy(section->mem->kmap, section->data.buf, section->data.size);
411	if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) {
412		memset(section->mem->kmap + section->data.size, 0,
413		       panthor_kernel_bo_size(section->mem) - section->data.size);
414	}
415
416	if (!was_mapped)
417		panthor_kernel_bo_vunmap(section->mem);
418}
419
420/**
421 * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
422 * @ptdev: Device.
423 * @input: Pointer holding the input interface on success.
424 * Should be ignored on failure.
425 * @output: Pointer holding the output interface on success.
426 * Should be ignored on failure.
427 * @input_fw_va: Pointer holding the input interface FW VA on success.
428 * Should be ignored on failure.
429 * @output_fw_va: Pointer holding the output interface FW VA on success.
430 * Should be ignored on failure.
431 *
432 * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
433 * interface is at offset 0, and the output interface at offset 4096.
434 *
435 * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
436 */
437struct panthor_kernel_bo *
438panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
439				 struct panthor_fw_ringbuf_input_iface **input,
440				 const struct panthor_fw_ringbuf_output_iface **output,
441				 u32 *input_fw_va, u32 *output_fw_va)
442{
443	struct panthor_kernel_bo *mem;
444	int ret;
445
446	mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
447				       DRM_PANTHOR_BO_NO_MMAP,
448				       DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
449				       DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
450				       PANTHOR_VM_KERNEL_AUTO_VA);
451	if (IS_ERR(mem))
452		return mem;
453
454	ret = panthor_kernel_bo_vmap(mem);
455	if (ret) {
456		panthor_kernel_bo_destroy(mem);
457		return ERR_PTR(ret);
458	}
459
460	memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
461	*input = mem->kmap;
462	*output = mem->kmap + SZ_4K;
463	*input_fw_va = panthor_kernel_bo_gpuva(mem);
464	*output_fw_va = *input_fw_va + SZ_4K;
465
466	return mem;
467}
468
469/**
470 * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
471 * @ptdev: Device.
472 * @size: Size of the suspend buffer.
473 *
474 * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
475 */
476struct panthor_kernel_bo *
477panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
478{
479	return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
480					DRM_PANTHOR_BO_NO_MMAP,
481					DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
482					PANTHOR_VM_KERNEL_AUTO_VA);
483}
484
485static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
486					 const struct firmware *fw,
487					 struct panthor_fw_binary_iter *iter,
488					 u32 ehdr)
489{
490	struct panthor_fw_binary_section_entry_hdr hdr;
491	struct panthor_fw_section *section;
492	u32 section_size;
493	u32 name_len;
494	int ret;
495
496	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
497	if (ret)
498		return ret;
499
500	if (hdr.data.end < hdr.data.start) {
501		drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
502			hdr.data.end, hdr.data.start);
503		return -EINVAL;
504	}
505
506	if (hdr.va.end < hdr.va.start) {
507		drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
508			hdr.va.end, hdr.va.start);
509		return -EINVAL;
510	}
511
512	if (hdr.data.end > fw->size) {
513		drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
514			hdr.data.end, fw->size);
515		return -EINVAL;
516	}
517
518	if ((hdr.va.start & ~PAGE_MASK) != 0 ||
519	    (hdr.va.end & ~PAGE_MASK) != 0) {
520		drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
521			hdr.va.start, hdr.va.end);
522		return -EINVAL;
523	}
524
525	if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) {
526		drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
527			hdr.flags);
528		return -EINVAL;
529	}
530
531	if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) {
532		drm_warn(&ptdev->base,
533			 "Firmware protected mode entry not be supported, ignoring");
534		return 0;
535	}
536
537	if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
538	    !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) {
539		drm_err(&ptdev->base,
540			"Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
541		return -EINVAL;
542	}
543
544	name_len = iter->size - iter->offset;
545
546	section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
547	if (!section)
548		return -ENOMEM;
549
550	list_add_tail(&section->node, &ptdev->fw->sections);
551	section->flags = hdr.flags;
552	section->data.size = hdr.data.end - hdr.data.start;
553
554	if (section->data.size > 0) {
555		void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
556
557		if (!data)
558			return -ENOMEM;
559
560		memcpy(data, fw->data + hdr.data.start, section->data.size);
561		section->data.buf = data;
562	}
563
564	if (name_len > 0) {
565		char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
566
567		if (!name)
568			return -ENOMEM;
569
570		memcpy(name, iter->data + iter->offset, name_len);
571		name[name_len] = '\0';
572		section->name = name;
573	}
574
575	section_size = hdr.va.end - hdr.va.start;
576	if (section_size) {
577		u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK;
578		struct panthor_gem_object *bo;
579		u32 vm_map_flags = 0;
580		struct sg_table *sgt;
581		u64 va = hdr.va.start;
582
583		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
584			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
585
586		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX))
587			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
588
589		/* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to
590		 * non-cacheable for now. We might want to introduce a new
591		 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
592		 * memory and is currently not used by our driver) for
593		 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
594		 * of IO-coherent systems.
595		 */
596		if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED)
597			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
598
599		section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
600							section_size,
601							DRM_PANTHOR_BO_NO_MMAP,
602							vm_map_flags, va);
603		if (IS_ERR(section->mem))
604			return PTR_ERR(section->mem);
605
606		if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
607			return -EINVAL;
608
609		if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) {
610			ret = panthor_kernel_bo_vmap(section->mem);
611			if (ret)
612				return ret;
613		}
614
615		panthor_fw_init_section_mem(ptdev, section);
616
617		bo = to_panthor_bo(section->mem->obj);
618		sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
619		if (IS_ERR(sgt))
620			return PTR_ERR(sgt);
621
622		dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
623	}
624
625	if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
626		ptdev->fw->shared_section = section;
627
628	return 0;
629}
630
631static void
632panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
633{
634	struct panthor_fw_section *section;
635
636	list_for_each_entry(section, &ptdev->fw->sections, node) {
637		struct sg_table *sgt;
638
639		if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
640			continue;
641
642		panthor_fw_init_section_mem(ptdev, section);
643		sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
644		if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
645			dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
646	}
647}
648
649static int panthor_fw_load_entry(struct panthor_device *ptdev,
650				 const struct firmware *fw,
651				 struct panthor_fw_binary_iter *iter)
652{
653	struct panthor_fw_binary_iter eiter;
654	u32 ehdr;
655	int ret;
656
657	ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
658	if (ret)
659		return ret;
660
661	if ((iter->offset % sizeof(u32)) ||
662	    (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
663		drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n",
664			(u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
665		return -EINVAL;
666	}
667
668	if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
669					    CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
670		return -EINVAL;
671
672	switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
673	case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
674		return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
675
676	/* FIXME: handle those entry types? */
677	case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
678	case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
679	case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
680	case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
681		return 0;
682	default:
683		break;
684	}
685
686	if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
687		return 0;
688
689	drm_err(&ptdev->base,
690		"Unsupported non-optional entry type %u in firmware\n",
691		CSF_FW_BINARY_ENTRY_TYPE(ehdr));
692	return -EINVAL;
693}
694
695static int panthor_fw_load(struct panthor_device *ptdev)
696{
697	const struct firmware *fw = NULL;
698	struct panthor_fw_binary_iter iter = {};
699	struct panthor_fw_binary_hdr hdr;
700	char fw_path[128];
701	int ret;
702
703	snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
704		 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
705		 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
706		 CSF_FW_NAME);
707
708	ret = request_firmware(&fw, fw_path, ptdev->base.dev);
709	if (ret) {
710		drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
711			CSF_FW_NAME);
712		return ret;
713	}
714
715	iter.data = fw->data;
716	iter.size = fw->size;
717	ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
718	if (ret)
719		goto out;
720
721	if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
722		ret = -EINVAL;
723		drm_err(&ptdev->base, "Invalid firmware magic\n");
724		goto out;
725	}
726
727	if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
728		ret = -EINVAL;
729		drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
730			hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
731		goto out;
732	}
733
734	if (hdr.size > iter.size) {
735		drm_err(&ptdev->base, "Firmware image is truncated\n");
736		goto out;
737	}
738
739	iter.size = hdr.size;
740
741	while (iter.offset < hdr.size) {
742		ret = panthor_fw_load_entry(ptdev, fw, &iter);
743		if (ret)
744			goto out;
745	}
746
747	if (!ptdev->fw->shared_section) {
748		drm_err(&ptdev->base, "Shared interface region not found\n");
749		ret = -EINVAL;
750		goto out;
751	}
752
753out:
754	release_firmware(fw);
755	return ret;
756}
757
758/**
759 * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
760 * @ptdev: Device.
761 * @mcu_va: MCU address.
762 *
763 * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
764 */
765static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
766{
767	u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
768	u64 shared_mem_end = shared_mem_start +
769			     panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
770	if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
771		return NULL;
772
773	return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
774}
775
776static int panthor_init_cs_iface(struct panthor_device *ptdev,
777				 unsigned int csg_idx, unsigned int cs_idx)
778{
779	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
780	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
781	struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
782	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
783	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
784			   (csg_idx * glb_iface->control->group_stride) +
785			   CSF_STREAM_CONTROL_OFFSET +
786			   (cs_idx * csg_iface->control->stream_stride);
787	struct panthor_fw_cs_iface *first_cs_iface =
788		panthor_fw_get_cs_iface(ptdev, 0, 0);
789
790	if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
791		return -EINVAL;
792
793	spin_lock_init(&cs_iface->lock);
794	cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
795	cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
796	cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
797
798	if (!cs_iface->input || !cs_iface->output) {
799		drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
800		return -EINVAL;
801	}
802
803	if (cs_iface != first_cs_iface) {
804		if (cs_iface->control->features != first_cs_iface->control->features) {
805			drm_err(&ptdev->base, "Expecting identical CS slots");
806			return -EINVAL;
807		}
808	} else {
809		u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
810
811		ptdev->csif_info.cs_reg_count = reg_count;
812		ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
813	}
814
815	return 0;
816}
817
818static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
819			const struct panthor_fw_csg_control_iface *b)
820{
821	if (a->features != b->features)
822		return false;
823	if (a->suspend_size != b->suspend_size)
824		return false;
825	if (a->protm_suspend_size != b->protm_suspend_size)
826		return false;
827	if (a->stream_num != b->stream_num)
828		return false;
829	return true;
830}
831
832static int panthor_init_csg_iface(struct panthor_device *ptdev,
833				  unsigned int csg_idx)
834{
835	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
836	struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
837	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
838	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
839	unsigned int i;
840
841	if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
842		return -EINVAL;
843
844	spin_lock_init(&csg_iface->lock);
845	csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
846	csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
847	csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
848
849	if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
850	    csg_iface->control->stream_num > MAX_CS_PER_CSG)
851		return -EINVAL;
852
853	if (!csg_iface->input || !csg_iface->output) {
854		drm_err(&ptdev->base, "Invalid group control interface input/output VA");
855		return -EINVAL;
856	}
857
858	if (csg_idx > 0) {
859		struct panthor_fw_csg_iface *first_csg_iface =
860			panthor_fw_get_csg_iface(ptdev, 0);
861
862		if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
863			drm_err(&ptdev->base, "Expecting identical CSG slots");
864			return -EINVAL;
865		}
866	}
867
868	for (i = 0; i < csg_iface->control->stream_num; i++) {
869		int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
870
871		if (ret)
872			return ret;
873	}
874
875	return 0;
876}
877
878static u32 panthor_get_instr_features(struct panthor_device *ptdev)
879{
880	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
881
882	if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
883		return 0;
884
885	return glb_iface->control->instr_features;
886}
887
888static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
889{
890	struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
891	unsigned int i;
892
893	if (!ptdev->fw->shared_section->mem->kmap)
894		return -EINVAL;
895
896	spin_lock_init(&glb_iface->lock);
897	glb_iface->control = ptdev->fw->shared_section->mem->kmap;
898
899	if (!glb_iface->control->version) {
900		drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
901		return -EINVAL;
902	}
903
904	glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
905	glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
906	if (!glb_iface->input || !glb_iface->output) {
907		drm_err(&ptdev->base, "Invalid global control interface input/output VA");
908		return -EINVAL;
909	}
910
911	if (glb_iface->control->group_num > MAX_CSGS ||
912	    glb_iface->control->group_num < MIN_CSGS) {
913		drm_err(&ptdev->base, "Invalid number of control groups");
914		return -EINVAL;
915	}
916
917	for (i = 0; i < glb_iface->control->group_num; i++) {
918		int ret = panthor_init_csg_iface(ptdev, i);
919
920		if (ret)
921			return ret;
922	}
923
924	drm_info(&ptdev->base, "CSF FW v%d.%d.%d, Features %#x Instrumentation features %#x",
925		 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
926		 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
927		 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
928		 glb_iface->control->features,
929		 panthor_get_instr_features(ptdev));
930	return 0;
931}
932
933static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
934{
935	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
936
937	/* Enable all cores. */
938	glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
939
940	/* Setup timers. */
941	glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
942	glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
943	glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
944
945	/* Enable interrupts we care about. */
946	glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
947					 GLB_PING |
948					 GLB_CFG_PROGRESS_TIMER |
949					 GLB_CFG_POWEROFF_TIMER |
950					 GLB_IDLE_EN |
951					 GLB_IDLE;
952
953	panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
954	panthor_fw_toggle_reqs(glb_iface, req, ack,
955			       GLB_CFG_ALLOC_EN |
956			       GLB_CFG_POWEROFF_TIMER |
957			       GLB_CFG_PROGRESS_TIMER);
958
959	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
960
961	/* Kick the watchdog. */
962	mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
963			 msecs_to_jiffies(PING_INTERVAL_MS));
964}
965
966static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
967{
968	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
969		ptdev->fw->booted = true;
970
971	wake_up_all(&ptdev->fw->req_waitqueue);
972
973	/* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
974	if (!ptdev->fw->booted)
975		return;
976
977	panthor_sched_report_fw_events(ptdev, status);
978}
979PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
980
981static int panthor_fw_start(struct panthor_device *ptdev)
982{
983	bool timedout = false;
984
985	ptdev->fw->booted = false;
986	panthor_job_irq_resume(&ptdev->fw->irq, ~0);
987	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
988
989	if (!wait_event_timeout(ptdev->fw->req_waitqueue,
990				ptdev->fw->booted,
991				msecs_to_jiffies(1000))) {
992		if (!ptdev->fw->booted &&
993		    !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
994			timedout = true;
995	}
996
997	if (timedout) {
998		static const char * const status_str[] = {
999			[MCU_STATUS_DISABLED] = "disabled",
1000			[MCU_STATUS_ENABLED] = "enabled",
1001			[MCU_STATUS_HALT] = "halt",
1002			[MCU_STATUS_FATAL] = "fatal",
1003		};
1004		u32 status = gpu_read(ptdev, MCU_STATUS);
1005
1006		drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
1007			status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
1008		return -ETIMEDOUT;
1009	}
1010
1011	return 0;
1012}
1013
1014static void panthor_fw_stop(struct panthor_device *ptdev)
1015{
1016	u32 status;
1017
1018	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
1019	if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
1020			       status == MCU_STATUS_DISABLED, 10, 100000))
1021		drm_err(&ptdev->base, "Failed to stop MCU");
1022}
1023
1024/**
1025 * panthor_fw_pre_reset() - Call before a reset.
1026 * @ptdev: Device.
1027 * @on_hang: true if the reset was triggered on a GPU hang.
1028 *
1029 * If the reset is not triggered on a hang, we try to gracefully halt the
1030 * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
1031 */
1032void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
1033{
1034	/* Make sure we won't be woken up by a ping. */
1035	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1036
1037	ptdev->fw->fast_reset = false;
1038
1039	if (!on_hang) {
1040		struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1041		u32 status;
1042
1043		panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
1044		gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1045		if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
1046					status == MCU_STATUS_HALT, 10, 100000) &&
1047		    glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) {
1048			ptdev->fw->fast_reset = true;
1049		} else {
1050			drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
1051		}
1052
1053		/* The FW detects 0 -> 1 transitions. Make sure we reset
1054		 * the HALT bit before the FW is rebooted.
1055		 */
1056		panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
1057	}
1058
1059	panthor_job_irq_suspend(&ptdev->fw->irq);
1060}
1061
1062/**
1063 * panthor_fw_post_reset() - Call after a reset.
1064 * @ptdev: Device.
1065 *
1066 * Start the FW. If this is not a fast reset, all FW sections are reloaded to
1067 * make sure we can recover from a memory corruption.
1068 */
1069int panthor_fw_post_reset(struct panthor_device *ptdev)
1070{
1071	int ret;
1072
1073	/* Make the MCU VM active. */
1074	ret = panthor_vm_active(ptdev->fw->vm);
1075	if (ret)
1076		return ret;
1077
1078	/* If this is a fast reset, try to start the MCU without reloading
1079	 * the FW sections. If it fails, go for a full reset.
1080	 */
1081	if (ptdev->fw->fast_reset) {
1082		ret = panthor_fw_start(ptdev);
1083		if (!ret)
1084			goto out;
1085
1086		/* Forcibly reset the MCU and force a slow reset, so we get a
1087		 * fresh boot on the next panthor_fw_start() call.
1088		 */
1089		panthor_fw_stop(ptdev);
1090		ptdev->fw->fast_reset = false;
1091		drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");
1092	}
1093
1094	/* Reload all sections, including RO ones. We're not supposed
1095	 * to end up here anyway, let's just assume the overhead of
1096	 * reloading everything is acceptable.
1097	 */
1098	panthor_reload_fw_sections(ptdev, true);
1099
1100	ret = panthor_fw_start(ptdev);
1101	if (ret) {
1102		drm_err(&ptdev->base, "FW slow reset failed");
1103		return ret;
1104	}
1105
1106out:
1107	/* We must re-initialize the global interface even on fast-reset. */
1108	panthor_fw_init_global_iface(ptdev);
1109	return 0;
1110}
1111
1112/**
1113 * panthor_fw_unplug() - Called when the device is unplugged.
1114 * @ptdev: Device.
1115 *
1116 * This function must make sure all pending operations are flushed before
1117 * will release device resources, thus preventing any interaction with
1118 * the HW.
1119 *
1120 * If there is still FW-related work running after this function returns,
1121 * they must use drm_dev_{enter,exit}() and skip any HW access when
1122 * drm_dev_enter() returns false.
1123 */
1124void panthor_fw_unplug(struct panthor_device *ptdev)
1125{
1126	struct panthor_fw_section *section;
1127
1128	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1129
1130	/* Make sure the IRQ handler can be called after that point. */
1131	if (ptdev->fw->irq.irq)
1132		panthor_job_irq_suspend(&ptdev->fw->irq);
1133
1134	panthor_fw_stop(ptdev);
1135
1136	list_for_each_entry(section, &ptdev->fw->sections, node)
1137		panthor_kernel_bo_destroy(section->mem);
1138
1139	/* We intentionally don't call panthor_vm_idle() and let
1140	 * panthor_mmu_unplug() release the AS we acquired with
1141	 * panthor_vm_active() so we don't have to track the VM active/idle
1142	 * state to keep the active_refcnt balanced.
1143	 */
1144	panthor_vm_put(ptdev->fw->vm);
1145	ptdev->fw->vm = NULL;
1146
1147	panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
1148}
1149
1150/**
1151 * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
1152 * @req_ptr: Pointer to the req register.
1153 * @ack_ptr: Pointer to the ack register.
1154 * @wq: Wait queue to use for the sleeping wait.
1155 * @req_mask: Mask of requests to wait for.
1156 * @acked: Pointer to field that's updated with the acked requests.
1157 * If the function returns 0, *acked == req_mask.
1158 * @timeout_ms: Timeout expressed in milliseconds.
1159 *
1160 * Return: 0 on success, -ETIMEDOUT otherwise.
1161 */
1162static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
1163				wait_queue_head_t *wq,
1164				u32 req_mask, u32 *acked,
1165				u32 timeout_ms)
1166{
1167	u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
1168	int ret;
1169
1170	/* Busy wait for a few ��secs before falling back to a sleeping wait. */
1171	*acked = req_mask;
1172	ret = read_poll_timeout_atomic(READ_ONCE, ack,
1173				       (ack & req_mask) == req,
1174				       0, 10, 0,
1175				       *ack_ptr);
1176	if (!ret)
1177		return 0;
1178
1179	if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
1180			       msecs_to_jiffies(timeout_ms)))
1181		return 0;
1182
1183	/* Check one last time, in case we were not woken up for some reason. */
1184	ack = READ_ONCE(*ack_ptr);
1185	if ((ack & req_mask) == req)
1186		return 0;
1187
1188	*acked = ~(req ^ ack) & req_mask;
1189	return -ETIMEDOUT;
1190}
1191
1192/**
1193 * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
1194 * @ptdev: Device.
1195 * @req_mask: Mask of requests to wait for.
1196 * @acked: Pointer to field that's updated with the acked requests.
1197 * If the function returns 0, *acked == req_mask.
1198 * @timeout_ms: Timeout expressed in milliseconds.
1199 *
1200 * Return: 0 on success, -ETIMEDOUT otherwise.
1201 */
1202int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
1203			     u32 req_mask, u32 *acked,
1204			     u32 timeout_ms)
1205{
1206	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1207
1208	/* GLB_HALT doesn't get acked through the FW interface. */
1209	if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
1210		return -EINVAL;
1211
1212	return panthor_fw_wait_acks(&glb_iface->input->req,
1213				    &glb_iface->output->ack,
1214				    &ptdev->fw->req_waitqueue,
1215				    req_mask, acked, timeout_ms);
1216}
1217
1218/**
1219 * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
1220 * @ptdev: Device.
1221 * @csg_slot: CSG slot ID.
1222 * @req_mask: Mask of requests to wait for.
1223 * @acked: Pointer to field that's updated with the acked requests.
1224 * If the function returns 0, *acked == req_mask.
1225 * @timeout_ms: Timeout expressed in milliseconds.
1226 *
1227 * Return: 0 on success, -ETIMEDOUT otherwise.
1228 */
1229int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
1230			     u32 req_mask, u32 *acked, u32 timeout_ms)
1231{
1232	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
1233	int ret;
1234
1235	if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
1236		return -EINVAL;
1237
1238	ret = panthor_fw_wait_acks(&csg_iface->input->req,
1239				   &csg_iface->output->ack,
1240				   &ptdev->fw->req_waitqueue,
1241				   req_mask, acked, timeout_ms);
1242
1243	/*
1244	 * Check that all bits in the state field were updated, if any mismatch
1245	 * then clear all bits in the state field. This allows code to do
1246	 * (acked & CSG_STATE_MASK) and get the right value.
1247	 */
1248
1249	if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
1250		*acked &= ~CSG_STATE_MASK;
1251
1252	return ret;
1253}
1254
1255/**
1256 * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
1257 * @ptdev: Device.
1258 * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
1259 *
1260 * This function is toggling bits in the doorbell_req and ringing the
1261 * global doorbell. It doesn't require a user doorbell to be attached to
1262 * the group.
1263 */
1264void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
1265{
1266	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1267
1268	panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
1269	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1270}
1271
1272static void panthor_fw_ping_work(struct work_struct *work)
1273{
1274	struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
1275	struct panthor_device *ptdev = fw->irq.ptdev;
1276	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1277	u32 acked;
1278	int ret;
1279
1280	if (panthor_device_reset_is_pending(ptdev))
1281		return;
1282
1283	panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
1284	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1285
1286	ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
1287	if (ret) {
1288		panthor_device_schedule_reset(ptdev);
1289		drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
1290	} else {
1291		mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
1292				 msecs_to_jiffies(PING_INTERVAL_MS));
1293	}
1294}
1295
1296/**
1297 * panthor_fw_init() - Initialize FW related data.
1298 * @ptdev: Device.
1299 *
1300 * Return: 0 on success, a negative error code otherwise.
1301 */
1302int panthor_fw_init(struct panthor_device *ptdev)
1303{
1304	struct panthor_fw *fw;
1305	int ret, irq;
1306
1307	fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
1308	if (!fw)
1309		return -ENOMEM;
1310
1311	ptdev->fw = fw;
1312	init_waitqueue_head(&fw->req_waitqueue);
1313	INIT_LIST_HEAD(&fw->sections);
1314	INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
1315
1316	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
1317	if (irq <= 0)
1318		return -ENODEV;
1319
1320	ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
1321	if (ret) {
1322		drm_err(&ptdev->base, "failed to request job irq");
1323		return ret;
1324	}
1325
1326	ret = panthor_gpu_l2_power_on(ptdev);
1327	if (ret)
1328		return ret;
1329
1330	fw->vm = panthor_vm_create(ptdev, true,
1331				   0, SZ_4G,
1332				   CSF_MCU_SHARED_REGION_START,
1333				   CSF_MCU_SHARED_REGION_SIZE);
1334	if (IS_ERR(fw->vm)) {
1335		ret = PTR_ERR(fw->vm);
1336		fw->vm = NULL;
1337		goto err_unplug_fw;
1338	}
1339
1340	ret = panthor_fw_load(ptdev);
1341	if (ret)
1342		goto err_unplug_fw;
1343
1344	ret = panthor_vm_active(fw->vm);
1345	if (ret)
1346		goto err_unplug_fw;
1347
1348	ret = panthor_fw_start(ptdev);
1349	if (ret)
1350		goto err_unplug_fw;
1351
1352	ret = panthor_fw_init_ifaces(ptdev);
1353	if (ret)
1354		goto err_unplug_fw;
1355
1356	panthor_fw_init_global_iface(ptdev);
1357	return 0;
1358
1359err_unplug_fw:
1360	panthor_fw_unplug(ptdev);
1361	return ret;
1362}
1363
1364MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");
1365