1// SPDX-License-Identifier: GPL-2.0 or MIT
2/* Copyright 2023 Collabora ltd. */
3
4#include <drm/drm_drv.h>
5#include <drm/drm_exec.h>
6#include <drm/drm_gem_shmem_helper.h>
7#include <drm/drm_managed.h>
8#include <drm/gpu_scheduler.h>
9#include <drm/panthor_drm.h>
10
11#include <linux/build_bug.h>
12#include <linux/clk.h>
13#include <linux/delay.h>
14#include <linux/dma-mapping.h>
15#include <linux/dma-resv.h>
16#include <linux/firmware.h>
17#include <linux/interrupt.h>
18#include <linux/io.h>
19#include <linux/iopoll.h>
20#include <linux/iosys-map.h>
21#include <linux/module.h>
22#include <linux/platform_device.h>
23#include <linux/pm_runtime.h>
24
25#include "panthor_devfreq.h"
26#include "panthor_device.h"
27#include "panthor_fw.h"
28#include "panthor_gem.h"
29#include "panthor_gpu.h"
30#include "panthor_heap.h"
31#include "panthor_mmu.h"
32#include "panthor_regs.h"
33#include "panthor_sched.h"
34
35/**
36 * DOC: Scheduler
37 *
38 * Mali CSF hardware adopts a firmware-assisted scheduling model, where
39 * the firmware takes care of scheduling aspects, to some extent.
40 *
41 * The scheduling happens at the scheduling group level, each group
42 * contains 1 to N queues (N is FW/hardware dependent, and exposed
43 * through the firmware interface). Each queue is assigned a command
44 * stream ring buffer, which serves as a way to get jobs submitted to
45 * the GPU, among other things.
46 *
47 * The firmware can schedule a maximum of M groups (M is FW/hardware
48 * dependent, and exposed through the firmware interface). Passed
49 * this maximum number of groups, the kernel must take care of
50 * rotating the groups passed to the firmware so every group gets
51 * a chance to have his queues scheduled for execution.
52 *
53 * The current implementation only supports with kernel-mode queues.
54 * In other terms, userspace doesn't have access to the ring-buffer.
55 * Instead, userspace passes indirect command stream buffers that are
56 * called from the queue ring-buffer by the kernel using a pre-defined
57 * sequence of command stream instructions to ensure the userspace driver
58 * always gets consistent results (cache maintenance,
59 * synchronization, ...).
60 *
61 * We rely on the drm_gpu_scheduler framework to deal with job
62 * dependencies and submission. As any other driver dealing with a
63 * FW-scheduler, we use the 1:1 entity:scheduler mode, such that each
64 * entity has its own job scheduler. When a job is ready to be executed
65 * (all its dependencies are met), it is pushed to the appropriate
66 * queue ring-buffer, and the group is scheduled for execution if it
67 * wasn't already active.
68 *
69 * Kernel-side group scheduling is timeslice-based. When we have less
70 * groups than there are slots, the periodic tick is disabled and we
71 * just let the FW schedule the active groups. When there are more
72 * groups than slots, we let each group a chance to execute stuff for
73 * a given amount of time, and then re-evaluate and pick new groups
74 * to schedule. The group selection algorithm is based on
75 * priority+round-robin.
76 *
77 * Even though user-mode queues is out of the scope right now, the
78 * current design takes them into account by avoiding any guess on the
79 * group/queue state that would be based on information we wouldn't have
80 * if userspace was in charge of the ring-buffer. That's also one of the
81 * reason we don't do 'cooperative' scheduling (encoding FW group slot
82 * reservation as dma_fence that would be returned from the
83 * drm_gpu_scheduler::prepare_job() hook, and treating group rotation as
84 * a queue of waiters, ordered by job submission order). This approach
85 * would work for kernel-mode queues, but would make user-mode queues a
86 * lot more complicated to retrofit.
87 */
88
89#define JOB_TIMEOUT_MS				5000
90
91#define MIN_CS_PER_CSG				8
92
93#define MIN_CSGS				3
94#define MAX_CSG_PRIO				0xf
95
96struct panthor_group;
97
98/**
99 * struct panthor_csg_slot - Command stream group slot
100 *
101 * This represents a FW slot for a scheduling group.
102 */
103struct panthor_csg_slot {
104	/** @group: Scheduling group bound to this slot. */
105	struct panthor_group *group;
106
107	/** @priority: Group priority. */
108	u8 priority;
109
110	/**
111	 * @idle: True if the group bound to this slot is idle.
112	 *
113	 * A group is idle when it has nothing waiting for execution on
114	 * all its queues, or when queues are blocked waiting for something
115	 * to happen (synchronization object).
116	 */
117	bool idle;
118};
119
120/**
121 * enum panthor_csg_priority - Group priority
122 */
123enum panthor_csg_priority {
124	/** @PANTHOR_CSG_PRIORITY_LOW: Low priority group. */
125	PANTHOR_CSG_PRIORITY_LOW = 0,
126
127	/** @PANTHOR_CSG_PRIORITY_MEDIUM: Medium priority group. */
128	PANTHOR_CSG_PRIORITY_MEDIUM,
129
130	/** @PANTHOR_CSG_PRIORITY_HIGH: High priority group. */
131	PANTHOR_CSG_PRIORITY_HIGH,
132
133	/**
134	 * @PANTHOR_CSG_PRIORITY_RT: Real-time priority group.
135	 *
136	 * Real-time priority allows one to preempt scheduling of other
137	 * non-real-time groups. When such a group becomes executable,
138	 * it will evict the group with the lowest non-rt priority if
139	 * there's no free group slot available.
140	 *
141	 * Currently not exposed to userspace.
142	 */
143	PANTHOR_CSG_PRIORITY_RT,
144
145	/** @PANTHOR_CSG_PRIORITY_COUNT: Number of priority levels. */
146	PANTHOR_CSG_PRIORITY_COUNT,
147};
148
149/**
150 * struct panthor_scheduler - Object used to manage the scheduler
151 */
152struct panthor_scheduler {
153	/** @ptdev: Device. */
154	struct panthor_device *ptdev;
155
156	/**
157	 * @wq: Workqueue used by our internal scheduler logic and
158	 * drm_gpu_scheduler.
159	 *
160	 * Used for the scheduler tick, group update or other kind of FW
161	 * event processing that can't be handled in the threaded interrupt
162	 * path. Also passed to the drm_gpu_scheduler instances embedded
163	 * in panthor_queue.
164	 */
165	struct workqueue_struct *wq;
166
167	/**
168	 * @heap_alloc_wq: Workqueue used to schedule tiler_oom works.
169	 *
170	 * We have a queue dedicated to heap chunk allocation works to avoid
171	 * blocking the rest of the scheduler if the allocation tries to
172	 * reclaim memory.
173	 */
174	struct workqueue_struct *heap_alloc_wq;
175
176	/** @tick_work: Work executed on a scheduling tick. */
177	struct delayed_work tick_work;
178
179	/**
180	 * @sync_upd_work: Work used to process synchronization object updates.
181	 *
182	 * We use this work to unblock queues/groups that were waiting on a
183	 * synchronization object.
184	 */
185	struct work_struct sync_upd_work;
186
187	/**
188	 * @fw_events_work: Work used to process FW events outside the interrupt path.
189	 *
190	 * Even if the interrupt is threaded, we need any event processing
191	 * that require taking the panthor_scheduler::lock to be processed
192	 * outside the interrupt path so we don't block the tick logic when
193	 * it calls panthor_fw_{csg,wait}_wait_acks(). Since most of the
194	 * event processing requires taking this lock, we just delegate all
195	 * FW event processing to the scheduler workqueue.
196	 */
197	struct work_struct fw_events_work;
198
199	/**
200	 * @fw_events: Bitmask encoding pending FW events.
201	 */
202	atomic_t fw_events;
203
204	/**
205	 * @resched_target: When the next tick should occur.
206	 *
207	 * Expressed in jiffies.
208	 */
209	u64 resched_target;
210
211	/**
212	 * @last_tick: When the last tick occurred.
213	 *
214	 * Expressed in jiffies.
215	 */
216	u64 last_tick;
217
218	/** @tick_period: Tick period in jiffies. */
219	u64 tick_period;
220
221	/**
222	 * @lock: Lock protecting access to all the scheduler fields.
223	 *
224	 * Should be taken in the tick work, the irq handler, and anywhere the @groups
225	 * fields are touched.
226	 */
227	struct mutex lock;
228
229	/** @groups: Various lists used to classify groups. */
230	struct {
231		/**
232		 * @runnable: Runnable group lists.
233		 *
234		 * When a group has queues that want to execute something,
235		 * its panthor_group::run_node should be inserted here.
236		 *
237		 * One list per-priority.
238		 */
239		struct list_head runnable[PANTHOR_CSG_PRIORITY_COUNT];
240
241		/**
242		 * @idle: Idle group lists.
243		 *
244		 * When all queues of a group are idle (either because they
245		 * have nothing to execute, or because they are blocked), the
246		 * panthor_group::run_node field should be inserted here.
247		 *
248		 * One list per-priority.
249		 */
250		struct list_head idle[PANTHOR_CSG_PRIORITY_COUNT];
251
252		/**
253		 * @waiting: List of groups whose queues are blocked on a
254		 * synchronization object.
255		 *
256		 * Insert panthor_group::wait_node here when a group is waiting
257		 * for synchronization objects to be signaled.
258		 *
259		 * This list is evaluated in the @sync_upd_work work.
260		 */
261		struct list_head waiting;
262	} groups;
263
264	/**
265	 * @csg_slots: FW command stream group slots.
266	 */
267	struct panthor_csg_slot csg_slots[MAX_CSGS];
268
269	/** @csg_slot_count: Number of command stream group slots exposed by the FW. */
270	u32 csg_slot_count;
271
272	/** @cs_slot_count: Number of command stream slot per group slot exposed by the FW. */
273	u32 cs_slot_count;
274
275	/** @as_slot_count: Number of address space slots supported by the MMU. */
276	u32 as_slot_count;
277
278	/** @used_csg_slot_count: Number of command stream group slot currently used. */
279	u32 used_csg_slot_count;
280
281	/** @sb_slot_count: Number of scoreboard slots. */
282	u32 sb_slot_count;
283
284	/**
285	 * @might_have_idle_groups: True if an active group might have become idle.
286	 *
287	 * This will force a tick, so other runnable groups can be scheduled if one
288	 * or more active groups became idle.
289	 */
290	bool might_have_idle_groups;
291
292	/** @pm: Power management related fields. */
293	struct {
294		/** @has_ref: True if the scheduler owns a runtime PM reference. */
295		bool has_ref;
296	} pm;
297
298	/** @reset: Reset related fields. */
299	struct {
300		/** @lock: Lock protecting the other reset fields. */
301		struct mutex lock;
302
303		/**
304		 * @in_progress: True if a reset is in progress.
305		 *
306		 * Set to true in panthor_sched_pre_reset() and back to false in
307		 * panthor_sched_post_reset().
308		 */
309		atomic_t in_progress;
310
311		/**
312		 * @stopped_groups: List containing all groups that were stopped
313		 * before a reset.
314		 *
315		 * Insert panthor_group::run_node in the pre_reset path.
316		 */
317		struct list_head stopped_groups;
318	} reset;
319};
320
321/**
322 * struct panthor_syncobj_32b - 32-bit FW synchronization object
323 */
324struct panthor_syncobj_32b {
325	/** @seqno: Sequence number. */
326	u32 seqno;
327
328	/**
329	 * @status: Status.
330	 *
331	 * Not zero on failure.
332	 */
333	u32 status;
334};
335
336/**
337 * struct panthor_syncobj_64b - 64-bit FW synchronization object
338 */
339struct panthor_syncobj_64b {
340	/** @seqno: Sequence number. */
341	u64 seqno;
342
343	/**
344	 * @status: Status.
345	 *
346	 * Not zero on failure.
347	 */
348	u32 status;
349
350	/** @pad: MBZ. */
351	u32 pad;
352};
353
354/**
355 * struct panthor_queue - Execution queue
356 */
357struct panthor_queue {
358	/** @scheduler: DRM scheduler used for this queue. */
359	struct drm_gpu_scheduler scheduler;
360
361	/** @entity: DRM scheduling entity used for this queue. */
362	struct drm_sched_entity entity;
363
364	/**
365	 * @remaining_time: Time remaining before the job timeout expires.
366	 *
367	 * The job timeout is suspended when the queue is not scheduled by the
368	 * FW. Every time we suspend the timer, we need to save the remaining
369	 * time so we can restore it later on.
370	 */
371	unsigned long remaining_time;
372
373	/** @timeout_suspended: True if the job timeout was suspended. */
374	bool timeout_suspended;
375
376	/**
377	 * @doorbell_id: Doorbell assigned to this queue.
378	 *
379	 * Right now, all groups share the same doorbell, and the doorbell ID
380	 * is assigned to group_slot + 1 when the group is assigned a slot. But
381	 * we might decide to provide fine grained doorbell assignment at some
382	 * point, so don't have to wake up all queues in a group every time one
383	 * of them is updated.
384	 */
385	u8 doorbell_id;
386
387	/**
388	 * @priority: Priority of the queue inside the group.
389	 *
390	 * Must be less than 16 (Only 4 bits available).
391	 */
392	u8 priority;
393#define CSF_MAX_QUEUE_PRIO	GENMASK(3, 0)
394
395	/** @ringbuf: Command stream ring-buffer. */
396	struct panthor_kernel_bo *ringbuf;
397
398	/** @iface: Firmware interface. */
399	struct {
400		/** @mem: FW memory allocated for this interface. */
401		struct panthor_kernel_bo *mem;
402
403		/** @input: Input interface. */
404		struct panthor_fw_ringbuf_input_iface *input;
405
406		/** @output: Output interface. */
407		const struct panthor_fw_ringbuf_output_iface *output;
408
409		/** @input_fw_va: FW virtual address of the input interface buffer. */
410		u32 input_fw_va;
411
412		/** @output_fw_va: FW virtual address of the output interface buffer. */
413		u32 output_fw_va;
414	} iface;
415
416	/**
417	 * @syncwait: Stores information about the synchronization object this
418	 * queue is waiting on.
419	 */
420	struct {
421		/** @gpu_va: GPU address of the synchronization object. */
422		u64 gpu_va;
423
424		/** @ref: Reference value to compare against. */
425		u64 ref;
426
427		/** @gt: True if this is a greater-than test. */
428		bool gt;
429
430		/** @sync64: True if this is a 64-bit sync object. */
431		bool sync64;
432
433		/** @bo: Buffer object holding the synchronization object. */
434		struct drm_gem_object *obj;
435
436		/** @offset: Offset of the synchronization object inside @bo. */
437		u64 offset;
438
439		/**
440		 * @kmap: Kernel mapping of the buffer object holding the
441		 * synchronization object.
442		 */
443		void *kmap;
444	} syncwait;
445
446	/** @fence_ctx: Fence context fields. */
447	struct {
448		/** @lock: Used to protect access to all fences allocated by this context. */
449		spinlock_t lock;
450
451		/**
452		 * @id: Fence context ID.
453		 *
454		 * Allocated with dma_fence_context_alloc().
455		 */
456		u64 id;
457
458		/** @seqno: Sequence number of the last initialized fence. */
459		atomic64_t seqno;
460
461		/**
462		 * @in_flight_jobs: List containing all in-flight jobs.
463		 *
464		 * Used to keep track and signal panthor_job::done_fence when the
465		 * synchronization object attached to the queue is signaled.
466		 */
467		struct list_head in_flight_jobs;
468	} fence_ctx;
469};
470
471/**
472 * enum panthor_group_state - Scheduling group state.
473 */
474enum panthor_group_state {
475	/** @PANTHOR_CS_GROUP_CREATED: Group was created, but not scheduled yet. */
476	PANTHOR_CS_GROUP_CREATED,
477
478	/** @PANTHOR_CS_GROUP_ACTIVE: Group is currently scheduled. */
479	PANTHOR_CS_GROUP_ACTIVE,
480
481	/**
482	 * @PANTHOR_CS_GROUP_SUSPENDED: Group was scheduled at least once, but is
483	 * inactive/suspended right now.
484	 */
485	PANTHOR_CS_GROUP_SUSPENDED,
486
487	/**
488	 * @PANTHOR_CS_GROUP_TERMINATED: Group was terminated.
489	 *
490	 * Can no longer be scheduled. The only allowed action is a destruction.
491	 */
492	PANTHOR_CS_GROUP_TERMINATED,
493
494	/**
495	 * @PANTHOR_CS_GROUP_UNKNOWN_STATE: Group is an unknown state.
496	 *
497	 * The FW returned an inconsistent state. The group is flagged unusable
498	 * and can no longer be scheduled. The only allowed action is a
499	 * destruction.
500	 *
501	 * When that happens, we also schedule a FW reset, to start from a fresh
502	 * state.
503	 */
504	PANTHOR_CS_GROUP_UNKNOWN_STATE,
505};
506
507/**
508 * struct panthor_group - Scheduling group object
509 */
510struct panthor_group {
511	/** @refcount: Reference count */
512	struct kref refcount;
513
514	/** @ptdev: Device. */
515	struct panthor_device *ptdev;
516
517	/** @vm: VM bound to the group. */
518	struct panthor_vm *vm;
519
520	/** @compute_core_mask: Mask of shader cores that can be used for compute jobs. */
521	u64 compute_core_mask;
522
523	/** @fragment_core_mask: Mask of shader cores that can be used for fragment jobs. */
524	u64 fragment_core_mask;
525
526	/** @tiler_core_mask: Mask of tiler cores that can be used for tiler jobs. */
527	u64 tiler_core_mask;
528
529	/** @max_compute_cores: Maximum number of shader cores used for compute jobs. */
530	u8 max_compute_cores;
531
532	/** @max_fragment_cores: Maximum number of shader cores used for fragment jobs. */
533	u8 max_fragment_cores;
534
535	/** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */
536	u8 max_tiler_cores;
537
538	/** @priority: Group priority (check panthor_csg_priority). */
539	u8 priority;
540
541	/** @blocked_queues: Bitmask reflecting the blocked queues. */
542	u32 blocked_queues;
543
544	/** @idle_queues: Bitmask reflecting the idle queues. */
545	u32 idle_queues;
546
547	/** @fatal_lock: Lock used to protect access to fatal fields. */
548	spinlock_t fatal_lock;
549
550	/** @fatal_queues: Bitmask reflecting the queues that hit a fatal exception. */
551	u32 fatal_queues;
552
553	/** @tiler_oom: Mask of queues that have a tiler OOM event to process. */
554	atomic_t tiler_oom;
555
556	/** @queue_count: Number of queues in this group. */
557	u32 queue_count;
558
559	/** @queues: Queues owned by this group. */
560	struct panthor_queue *queues[MAX_CS_PER_CSG];
561
562	/**
563	 * @csg_id: ID of the FW group slot.
564	 *
565	 * -1 when the group is not scheduled/active.
566	 */
567	int csg_id;
568
569	/**
570	 * @destroyed: True when the group has been destroyed.
571	 *
572	 * If a group is destroyed it becomes useless: no further jobs can be submitted
573	 * to its queues. We simply wait for all references to be dropped so we can
574	 * release the group object.
575	 */
576	bool destroyed;
577
578	/**
579	 * @timedout: True when a timeout occurred on any of the queues owned by
580	 * this group.
581	 *
582	 * Timeouts can be reported by drm_sched or by the FW. In any case, any
583	 * timeout situation is unrecoverable, and the group becomes useless.
584	 * We simply wait for all references to be dropped so we can release the
585	 * group object.
586	 */
587	bool timedout;
588
589	/**
590	 * @syncobjs: Pool of per-queue synchronization objects.
591	 *
592	 * One sync object per queue. The position of the sync object is
593	 * determined by the queue index.
594	 */
595	struct panthor_kernel_bo *syncobjs;
596
597	/** @state: Group state. */
598	enum panthor_group_state state;
599
600	/**
601	 * @suspend_buf: Suspend buffer.
602	 *
603	 * Stores the state of the group and its queues when a group is suspended.
604	 * Used at resume time to restore the group in its previous state.
605	 *
606	 * The size of the suspend buffer is exposed through the FW interface.
607	 */
608	struct panthor_kernel_bo *suspend_buf;
609
610	/**
611	 * @protm_suspend_buf: Protection mode suspend buffer.
612	 *
613	 * Stores the state of the group and its queues when a group that's in
614	 * protection mode is suspended.
615	 *
616	 * Used at resume time to restore the group in its previous state.
617	 *
618	 * The size of the protection mode suspend buffer is exposed through the
619	 * FW interface.
620	 */
621	struct panthor_kernel_bo *protm_suspend_buf;
622
623	/** @sync_upd_work: Work used to check/signal job fences. */
624	struct work_struct sync_upd_work;
625
626	/** @tiler_oom_work: Work used to process tiler OOM events happening on this group. */
627	struct work_struct tiler_oom_work;
628
629	/** @term_work: Work used to finish the group termination procedure. */
630	struct work_struct term_work;
631
632	/**
633	 * @release_work: Work used to release group resources.
634	 *
635	 * We need to postpone the group release to avoid a deadlock when
636	 * the last ref is released in the tick work.
637	 */
638	struct work_struct release_work;
639
640	/**
641	 * @run_node: Node used to insert the group in the
642	 * panthor_group::groups::{runnable,idle} and
643	 * panthor_group::reset.stopped_groups lists.
644	 */
645	struct list_head run_node;
646
647	/**
648	 * @wait_node: Node used to insert the group in the
649	 * panthor_group::groups::waiting list.
650	 */
651	struct list_head wait_node;
652};
653
654/**
655 * group_queue_work() - Queue a group work
656 * @group: Group to queue the work for.
657 * @wname: Work name.
658 *
659 * Grabs a ref and queue a work item to the scheduler workqueue. If
660 * the work was already queued, we release the reference we grabbed.
661 *
662 * Work callbacks must release the reference we grabbed here.
663 */
664#define group_queue_work(group, wname) \
665	do { \
666		group_get(group); \
667		if (!queue_work((group)->ptdev->scheduler->wq, &(group)->wname ## _work)) \
668			group_put(group); \
669	} while (0)
670
671/**
672 * sched_queue_work() - Queue a scheduler work.
673 * @sched: Scheduler object.
674 * @wname: Work name.
675 *
676 * Conditionally queues a scheduler work if no reset is pending/in-progress.
677 */
678#define sched_queue_work(sched, wname) \
679	do { \
680		if (!atomic_read(&(sched)->reset.in_progress) && \
681		    !panthor_device_reset_is_pending((sched)->ptdev)) \
682			queue_work((sched)->wq, &(sched)->wname ## _work); \
683	} while (0)
684
685/**
686 * sched_queue_delayed_work() - Queue a scheduler delayed work.
687 * @sched: Scheduler object.
688 * @wname: Work name.
689 * @delay: Work delay in jiffies.
690 *
691 * Conditionally queues a scheduler delayed work if no reset is
692 * pending/in-progress.
693 */
694#define sched_queue_delayed_work(sched, wname, delay) \
695	do { \
696		if (!atomic_read(&sched->reset.in_progress) && \
697		    !panthor_device_reset_is_pending((sched)->ptdev)) \
698			mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \
699	} while (0)
700
701/*
702 * We currently set the maximum of groups per file to an arbitrary low value.
703 * But this can be updated if we need more.
704 */
705#define MAX_GROUPS_PER_POOL 128
706
707/**
708 * struct panthor_group_pool - Group pool
709 *
710 * Each file get assigned a group pool.
711 */
712struct panthor_group_pool {
713	/** @xa: Xarray used to manage group handles. */
714	struct xarray xa;
715};
716
717/**
718 * struct panthor_job - Used to manage GPU job
719 */
720struct panthor_job {
721	/** @base: Inherit from drm_sched_job. */
722	struct drm_sched_job base;
723
724	/** @refcount: Reference count. */
725	struct kref refcount;
726
727	/** @group: Group of the queue this job will be pushed to. */
728	struct panthor_group *group;
729
730	/** @queue_idx: Index of the queue inside @group. */
731	u32 queue_idx;
732
733	/** @call_info: Information about the userspace command stream call. */
734	struct {
735		/** @start: GPU address of the userspace command stream. */
736		u64 start;
737
738		/** @size: Size of the userspace command stream. */
739		u32 size;
740
741		/**
742		 * @latest_flush: Flush ID at the time the userspace command
743		 * stream was built.
744		 *
745		 * Needed for the flush reduction mechanism.
746		 */
747		u32 latest_flush;
748	} call_info;
749
750	/** @ringbuf: Position of this job is in the ring buffer. */
751	struct {
752		/** @start: Start offset. */
753		u64 start;
754
755		/** @end: End offset. */
756		u64 end;
757	} ringbuf;
758
759	/**
760	 * @node: Used to insert the job in the panthor_queue::fence_ctx::in_flight_jobs
761	 * list.
762	 */
763	struct list_head node;
764
765	/** @done_fence: Fence signaled when the job is finished or cancelled. */
766	struct dma_fence *done_fence;
767};
768
769static void
770panthor_queue_put_syncwait_obj(struct panthor_queue *queue)
771{
772	if (queue->syncwait.kmap) {
773		struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap);
774
775		drm_gem_vunmap_unlocked(queue->syncwait.obj, &map);
776		queue->syncwait.kmap = NULL;
777	}
778
779	drm_gem_object_put(queue->syncwait.obj);
780	queue->syncwait.obj = NULL;
781}
782
783static void *
784panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue *queue)
785{
786	struct panthor_device *ptdev = group->ptdev;
787	struct panthor_gem_object *bo;
788	struct iosys_map map;
789	int ret;
790
791	if (queue->syncwait.kmap)
792		return queue->syncwait.kmap + queue->syncwait.offset;
793
794	bo = panthor_vm_get_bo_for_va(group->vm,
795				      queue->syncwait.gpu_va,
796				      &queue->syncwait.offset);
797	if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(bo)))
798		goto err_put_syncwait_obj;
799
800	queue->syncwait.obj = &bo->base.base;
801	ret = drm_gem_vmap_unlocked(queue->syncwait.obj, &map);
802	if (drm_WARN_ON(&ptdev->base, ret))
803		goto err_put_syncwait_obj;
804
805	queue->syncwait.kmap = map.vaddr;
806	if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap))
807		goto err_put_syncwait_obj;
808
809	return queue->syncwait.kmap + queue->syncwait.offset;
810
811err_put_syncwait_obj:
812	panthor_queue_put_syncwait_obj(queue);
813	return NULL;
814}
815
816static void group_free_queue(struct panthor_group *group, struct panthor_queue *queue)
817{
818	if (IS_ERR_OR_NULL(queue))
819		return;
820
821	if (queue->entity.fence_context)
822		drm_sched_entity_destroy(&queue->entity);
823
824	if (queue->scheduler.ops)
825		drm_sched_fini(&queue->scheduler);
826
827	panthor_queue_put_syncwait_obj(queue);
828
829	panthor_kernel_bo_destroy(queue->ringbuf);
830	panthor_kernel_bo_destroy(queue->iface.mem);
831
832	kfree(queue);
833}
834
835static void group_release_work(struct work_struct *work)
836{
837	struct panthor_group *group = container_of(work,
838						   struct panthor_group,
839						   release_work);
840	u32 i;
841
842	for (i = 0; i < group->queue_count; i++)
843		group_free_queue(group, group->queues[i]);
844
845	panthor_kernel_bo_destroy(group->suspend_buf);
846	panthor_kernel_bo_destroy(group->protm_suspend_buf);
847	panthor_kernel_bo_destroy(group->syncobjs);
848
849	panthor_vm_put(group->vm);
850	kfree(group);
851}
852
853static void group_release(struct kref *kref)
854{
855	struct panthor_group *group = container_of(kref,
856						   struct panthor_group,
857						   refcount);
858	struct panthor_device *ptdev = group->ptdev;
859
860	drm_WARN_ON(&ptdev->base, group->csg_id >= 0);
861	drm_WARN_ON(&ptdev->base, !list_empty(&group->run_node));
862	drm_WARN_ON(&ptdev->base, !list_empty(&group->wait_node));
863
864	queue_work(panthor_cleanup_wq, &group->release_work);
865}
866
867static void group_put(struct panthor_group *group)
868{
869	if (group)
870		kref_put(&group->refcount, group_release);
871}
872
873static struct panthor_group *
874group_get(struct panthor_group *group)
875{
876	if (group)
877		kref_get(&group->refcount);
878
879	return group;
880}
881
882/**
883 * group_bind_locked() - Bind a group to a group slot
884 * @group: Group.
885 * @csg_id: Slot.
886 *
887 * Return: 0 on success, a negative error code otherwise.
888 */
889static int
890group_bind_locked(struct panthor_group *group, u32 csg_id)
891{
892	struct panthor_device *ptdev = group->ptdev;
893	struct panthor_csg_slot *csg_slot;
894	int ret;
895
896	lockdep_assert_held(&ptdev->scheduler->lock);
897
898	if (drm_WARN_ON(&ptdev->base, group->csg_id != -1 || csg_id >= MAX_CSGS ||
899			ptdev->scheduler->csg_slots[csg_id].group))
900		return -EINVAL;
901
902	ret = panthor_vm_active(group->vm);
903	if (ret)
904		return ret;
905
906	csg_slot = &ptdev->scheduler->csg_slots[csg_id];
907	group_get(group);
908	group->csg_id = csg_id;
909
910	/* Dummy doorbell allocation: doorbell is assigned to the group and
911	 * all queues use the same doorbell.
912	 *
913	 * TODO: Implement LRU-based doorbell assignment, so the most often
914	 * updated queues get their own doorbell, thus avoiding useless checks
915	 * on queues belonging to the same group that are rarely updated.
916	 */
917	for (u32 i = 0; i < group->queue_count; i++)
918		group->queues[i]->doorbell_id = csg_id + 1;
919
920	csg_slot->group = group;
921
922	return 0;
923}
924
925/**
926 * group_unbind_locked() - Unbind a group from a slot.
927 * @group: Group to unbind.
928 *
929 * Return: 0 on success, a negative error code otherwise.
930 */
931static int
932group_unbind_locked(struct panthor_group *group)
933{
934	struct panthor_device *ptdev = group->ptdev;
935	struct panthor_csg_slot *slot;
936
937	lockdep_assert_held(&ptdev->scheduler->lock);
938
939	if (drm_WARN_ON(&ptdev->base, group->csg_id < 0 || group->csg_id >= MAX_CSGS))
940		return -EINVAL;
941
942	if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE))
943		return -EINVAL;
944
945	slot = &ptdev->scheduler->csg_slots[group->csg_id];
946	panthor_vm_idle(group->vm);
947	group->csg_id = -1;
948
949	/* Tiler OOM events will be re-issued next time the group is scheduled. */
950	atomic_set(&group->tiler_oom, 0);
951	cancel_work(&group->tiler_oom_work);
952
953	for (u32 i = 0; i < group->queue_count; i++)
954		group->queues[i]->doorbell_id = -1;
955
956	slot->group = NULL;
957
958	group_put(group);
959	return 0;
960}
961
962/**
963 * cs_slot_prog_locked() - Program a queue slot
964 * @ptdev: Device.
965 * @csg_id: Group slot ID.
966 * @cs_id: Queue slot ID.
967 *
968 * Program a queue slot with the queue information so things can start being
969 * executed on this queue.
970 *
971 * The group slot must have a group bound to it already (group_bind_locked()).
972 */
973static void
974cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
975{
976	struct panthor_queue *queue = ptdev->scheduler->csg_slots[csg_id].group->queues[cs_id];
977	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
978
979	lockdep_assert_held(&ptdev->scheduler->lock);
980
981	queue->iface.input->extract = queue->iface.output->extract;
982	drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract);
983
984	cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf);
985	cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
986	cs_iface->input->ringbuf_input = queue->iface.input_fw_va;
987	cs_iface->input->ringbuf_output = queue->iface.output_fw_va;
988	cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) |
989				  CS_CONFIG_DOORBELL(queue->doorbell_id);
990	cs_iface->input->ack_irq_mask = ~0;
991	panthor_fw_update_reqs(cs_iface, req,
992			       CS_IDLE_SYNC_WAIT |
993			       CS_IDLE_EMPTY |
994			       CS_STATE_START |
995			       CS_EXTRACT_EVENT,
996			       CS_IDLE_SYNC_WAIT |
997			       CS_IDLE_EMPTY |
998			       CS_STATE_MASK |
999			       CS_EXTRACT_EVENT);
1000	if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) {
1001		drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time);
1002		queue->timeout_suspended = false;
1003	}
1004}
1005
1006/**
1007 * cs_slot_reset_locked() - Reset a queue slot
1008 * @ptdev: Device.
1009 * @csg_id: Group slot.
1010 * @cs_id: Queue slot.
1011 *
1012 * Change the queue slot state to STOP and suspend the queue timeout if
1013 * the queue is not blocked.
1014 *
1015 * The group slot must have a group bound to it (group_bind_locked()).
1016 */
1017static int
1018cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
1019{
1020	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1021	struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
1022	struct panthor_queue *queue = group->queues[cs_id];
1023
1024	lockdep_assert_held(&ptdev->scheduler->lock);
1025
1026	panthor_fw_update_reqs(cs_iface, req,
1027			       CS_STATE_STOP,
1028			       CS_STATE_MASK);
1029
1030	/* If the queue is blocked, we want to keep the timeout running, so
1031	 * we can detect unbounded waits and kill the group when that happens.
1032	 */
1033	if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) {
1034		queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
1035		queue->timeout_suspended = true;
1036		WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS));
1037	}
1038
1039	return 0;
1040}
1041
1042/**
1043 * csg_slot_sync_priority_locked() - Synchronize the group slot priority
1044 * @ptdev: Device.
1045 * @csg_id: Group slot ID.
1046 *
1047 * Group slot priority update happens asynchronously. When we receive a
1048 * %CSG_ENDPOINT_CONFIG, we know the update is effective, and can
1049 * reflect it to our panthor_csg_slot object.
1050 */
1051static void
1052csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id)
1053{
1054	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1055	struct panthor_fw_csg_iface *csg_iface;
1056
1057	lockdep_assert_held(&ptdev->scheduler->lock);
1058
1059	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1060	csg_slot->priority = (csg_iface->input->endpoint_req & CSG_EP_REQ_PRIORITY_MASK) >> 28;
1061}
1062
1063/**
1064 * cs_slot_sync_queue_state_locked() - Synchronize the queue slot priority
1065 * @ptdev: Device.
1066 * @csg_id: Group slot.
1067 * @cs_id: Queue slot.
1068 *
1069 * Queue state is updated on group suspend or STATUS_UPDATE event.
1070 */
1071static void
1072cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
1073{
1074	struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
1075	struct panthor_queue *queue = group->queues[cs_id];
1076	struct panthor_fw_cs_iface *cs_iface =
1077		panthor_fw_get_cs_iface(group->ptdev, csg_id, cs_id);
1078
1079	u32 status_wait_cond;
1080
1081	switch (cs_iface->output->status_blocked_reason) {
1082	case CS_STATUS_BLOCKED_REASON_UNBLOCKED:
1083		if (queue->iface.input->insert == queue->iface.output->extract &&
1084		    cs_iface->output->status_scoreboards == 0)
1085			group->idle_queues |= BIT(cs_id);
1086		break;
1087
1088	case CS_STATUS_BLOCKED_REASON_SYNC_WAIT:
1089		if (list_empty(&group->wait_node)) {
1090			list_move_tail(&group->wait_node,
1091				       &group->ptdev->scheduler->groups.waiting);
1092		}
1093		group->blocked_queues |= BIT(cs_id);
1094		queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr;
1095		queue->syncwait.ref = cs_iface->output->status_wait_sync_value;
1096		status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK;
1097		queue->syncwait.gt = status_wait_cond == CS_STATUS_WAIT_SYNC_COND_GT;
1098		if (cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_64B) {
1099			u64 sync_val_hi = cs_iface->output->status_wait_sync_value_hi;
1100
1101			queue->syncwait.sync64 = true;
1102			queue->syncwait.ref |= sync_val_hi << 32;
1103		} else {
1104			queue->syncwait.sync64 = false;
1105		}
1106		break;
1107
1108	default:
1109		/* Other reasons are not blocking. Consider the queue as runnable
1110		 * in those cases.
1111		 */
1112		break;
1113	}
1114}
1115
1116static void
1117csg_slot_sync_queues_state_locked(struct panthor_device *ptdev, u32 csg_id)
1118{
1119	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1120	struct panthor_group *group = csg_slot->group;
1121	u32 i;
1122
1123	lockdep_assert_held(&ptdev->scheduler->lock);
1124
1125	group->idle_queues = 0;
1126	group->blocked_queues = 0;
1127
1128	for (i = 0; i < group->queue_count; i++) {
1129		if (group->queues[i])
1130			cs_slot_sync_queue_state_locked(ptdev, csg_id, i);
1131	}
1132}
1133
1134static void
1135csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
1136{
1137	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1138	struct panthor_fw_csg_iface *csg_iface;
1139	struct panthor_group *group;
1140	enum panthor_group_state new_state, old_state;
1141	u32 csg_state;
1142
1143	lockdep_assert_held(&ptdev->scheduler->lock);
1144
1145	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1146	group = csg_slot->group;
1147
1148	if (!group)
1149		return;
1150
1151	old_state = group->state;
1152	csg_state = csg_iface->output->ack & CSG_STATE_MASK;
1153	switch (csg_state) {
1154	case CSG_STATE_START:
1155	case CSG_STATE_RESUME:
1156		new_state = PANTHOR_CS_GROUP_ACTIVE;
1157		break;
1158	case CSG_STATE_TERMINATE:
1159		new_state = PANTHOR_CS_GROUP_TERMINATED;
1160		break;
1161	case CSG_STATE_SUSPEND:
1162		new_state = PANTHOR_CS_GROUP_SUSPENDED;
1163		break;
1164	default:
1165		/* The unknown state might be caused by a FW state corruption,
1166		 * which means the group metadata can't be trusted anymore, and
1167		 * the SUSPEND operation might propagate the corruption to the
1168		 * suspend buffers. Flag the group state as unknown to make
1169		 * sure it's unusable after that point.
1170		 */
1171		drm_err(&ptdev->base, "Invalid state on CSG %d (state=%d)",
1172			csg_id, csg_state);
1173		new_state = PANTHOR_CS_GROUP_UNKNOWN_STATE;
1174		break;
1175	}
1176
1177	if (old_state == new_state)
1178		return;
1179
1180	/* The unknown state might be caused by a FW issue, reset the FW to
1181	 * take a fresh start.
1182	 */
1183	if (new_state == PANTHOR_CS_GROUP_UNKNOWN_STATE)
1184		panthor_device_schedule_reset(ptdev);
1185
1186	if (new_state == PANTHOR_CS_GROUP_SUSPENDED)
1187		csg_slot_sync_queues_state_locked(ptdev, csg_id);
1188
1189	if (old_state == PANTHOR_CS_GROUP_ACTIVE) {
1190		u32 i;
1191
1192		/* Reset the queue slots so we start from a clean
1193		 * state when starting/resuming a new group on this
1194		 * CSG slot. No wait needed here, and no ringbell
1195		 * either, since the CS slot will only be re-used
1196		 * on the next CSG start operation.
1197		 */
1198		for (i = 0; i < group->queue_count; i++) {
1199			if (group->queues[i])
1200				cs_slot_reset_locked(ptdev, csg_id, i);
1201		}
1202	}
1203
1204	group->state = new_state;
1205}
1206
1207static int
1208csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority)
1209{
1210	struct panthor_fw_csg_iface *csg_iface;
1211	struct panthor_csg_slot *csg_slot;
1212	struct panthor_group *group;
1213	u32 queue_mask = 0, i;
1214
1215	lockdep_assert_held(&ptdev->scheduler->lock);
1216
1217	if (priority > MAX_CSG_PRIO)
1218		return -EINVAL;
1219
1220	if (drm_WARN_ON(&ptdev->base, csg_id >= MAX_CSGS))
1221		return -EINVAL;
1222
1223	csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1224	group = csg_slot->group;
1225	if (!group || group->state == PANTHOR_CS_GROUP_ACTIVE)
1226		return 0;
1227
1228	csg_iface = panthor_fw_get_csg_iface(group->ptdev, csg_id);
1229
1230	for (i = 0; i < group->queue_count; i++) {
1231		if (group->queues[i]) {
1232			cs_slot_prog_locked(ptdev, csg_id, i);
1233			queue_mask |= BIT(i);
1234		}
1235	}
1236
1237	csg_iface->input->allow_compute = group->compute_core_mask;
1238	csg_iface->input->allow_fragment = group->fragment_core_mask;
1239	csg_iface->input->allow_other = group->tiler_core_mask;
1240	csg_iface->input->endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) |
1241					 CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) |
1242					 CSG_EP_REQ_TILER(group->max_tiler_cores) |
1243					 CSG_EP_REQ_PRIORITY(priority);
1244	csg_iface->input->config = panthor_vm_as(group->vm);
1245
1246	if (group->suspend_buf)
1247		csg_iface->input->suspend_buf = panthor_kernel_bo_gpuva(group->suspend_buf);
1248	else
1249		csg_iface->input->suspend_buf = 0;
1250
1251	if (group->protm_suspend_buf) {
1252		csg_iface->input->protm_suspend_buf =
1253			panthor_kernel_bo_gpuva(group->protm_suspend_buf);
1254	} else {
1255		csg_iface->input->protm_suspend_buf = 0;
1256	}
1257
1258	csg_iface->input->ack_irq_mask = ~0;
1259	panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, queue_mask);
1260	return 0;
1261}
1262
1263static void
1264cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
1265				   u32 csg_id, u32 cs_id)
1266{
1267	struct panthor_scheduler *sched = ptdev->scheduler;
1268	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1269	struct panthor_group *group = csg_slot->group;
1270	struct panthor_fw_cs_iface *cs_iface;
1271	u32 fatal;
1272	u64 info;
1273
1274	lockdep_assert_held(&sched->lock);
1275
1276	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1277	fatal = cs_iface->output->fatal;
1278	info = cs_iface->output->fatal_info;
1279
1280	if (group)
1281		group->fatal_queues |= BIT(cs_id);
1282
1283	if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) {
1284		/* If this exception is unrecoverable, queue a reset, and make
1285		 * sure we stop scheduling groups until the reset has happened.
1286		 */
1287		panthor_device_schedule_reset(ptdev);
1288		cancel_delayed_work(&sched->tick_work);
1289	} else {
1290		sched_queue_delayed_work(sched, tick, 0);
1291	}
1292
1293	drm_warn(&ptdev->base,
1294		 "CSG slot %d CS slot: %d\n"
1295		 "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
1296		 "CS_FATAL.EXCEPTION_DATA: 0x%x\n"
1297		 "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n",
1298		 csg_id, cs_id,
1299		 (unsigned int)CS_EXCEPTION_TYPE(fatal),
1300		 panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fatal)),
1301		 (unsigned int)CS_EXCEPTION_DATA(fatal),
1302		 info);
1303}
1304
1305static void
1306cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
1307				   u32 csg_id, u32 cs_id)
1308{
1309	struct panthor_scheduler *sched = ptdev->scheduler;
1310	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1311	struct panthor_group *group = csg_slot->group;
1312	struct panthor_queue *queue = group && cs_id < group->queue_count ?
1313				      group->queues[cs_id] : NULL;
1314	struct panthor_fw_cs_iface *cs_iface;
1315	u32 fault;
1316	u64 info;
1317
1318	lockdep_assert_held(&sched->lock);
1319
1320	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1321	fault = cs_iface->output->fault;
1322	info = cs_iface->output->fault_info;
1323
1324	if (queue && CS_EXCEPTION_TYPE(fault) == DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT) {
1325		u64 cs_extract = queue->iface.output->extract;
1326		struct panthor_job *job;
1327
1328		spin_lock(&queue->fence_ctx.lock);
1329		list_for_each_entry(job, &queue->fence_ctx.in_flight_jobs, node) {
1330			if (cs_extract >= job->ringbuf.end)
1331				continue;
1332
1333			if (cs_extract < job->ringbuf.start)
1334				break;
1335
1336			dma_fence_set_error(job->done_fence, -EINVAL);
1337		}
1338		spin_unlock(&queue->fence_ctx.lock);
1339	}
1340
1341	drm_warn(&ptdev->base,
1342		 "CSG slot %d CS slot: %d\n"
1343		 "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
1344		 "CS_FAULT.EXCEPTION_DATA: 0x%x\n"
1345		 "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n",
1346		 csg_id, cs_id,
1347		 (unsigned int)CS_EXCEPTION_TYPE(fault),
1348		 panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fault)),
1349		 (unsigned int)CS_EXCEPTION_DATA(fault),
1350		 info);
1351}
1352
1353static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id)
1354{
1355	struct panthor_device *ptdev = group->ptdev;
1356	struct panthor_scheduler *sched = ptdev->scheduler;
1357	u32 renderpasses_in_flight, pending_frag_count;
1358	struct panthor_heap_pool *heaps = NULL;
1359	u64 heap_address, new_chunk_va = 0;
1360	u32 vt_start, vt_end, frag_end;
1361	int ret, csg_id;
1362
1363	mutex_lock(&sched->lock);
1364	csg_id = group->csg_id;
1365	if (csg_id >= 0) {
1366		struct panthor_fw_cs_iface *cs_iface;
1367
1368		cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1369		heaps = panthor_vm_get_heap_pool(group->vm, false);
1370		heap_address = cs_iface->output->heap_address;
1371		vt_start = cs_iface->output->heap_vt_start;
1372		vt_end = cs_iface->output->heap_vt_end;
1373		frag_end = cs_iface->output->heap_frag_end;
1374		renderpasses_in_flight = vt_start - frag_end;
1375		pending_frag_count = vt_end - frag_end;
1376	}
1377	mutex_unlock(&sched->lock);
1378
1379	/* The group got scheduled out, we stop here. We will get a new tiler OOM event
1380	 * when it's scheduled again.
1381	 */
1382	if (unlikely(csg_id < 0))
1383		return 0;
1384
1385	if (IS_ERR(heaps) || frag_end > vt_end || vt_end >= vt_start) {
1386		ret = -EINVAL;
1387	} else {
1388		/* We do the allocation without holding the scheduler lock to avoid
1389		 * blocking the scheduling.
1390		 */
1391		ret = panthor_heap_grow(heaps, heap_address,
1392					renderpasses_in_flight,
1393					pending_frag_count, &new_chunk_va);
1394	}
1395
1396	/* If the heap context doesn't have memory for us, we want to let the
1397	 * FW try to reclaim memory by waiting for fragment jobs to land or by
1398	 * executing the tiler OOM exception handler, which is supposed to
1399	 * implement incremental rendering.
1400	 */
1401	if (ret && ret != -ENOMEM) {
1402		drm_warn(&ptdev->base, "Failed to extend the tiler heap\n");
1403		group->fatal_queues |= BIT(cs_id);
1404		sched_queue_delayed_work(sched, tick, 0);
1405		goto out_put_heap_pool;
1406	}
1407
1408	mutex_lock(&sched->lock);
1409	csg_id = group->csg_id;
1410	if (csg_id >= 0) {
1411		struct panthor_fw_csg_iface *csg_iface;
1412		struct panthor_fw_cs_iface *cs_iface;
1413
1414		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1415		cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1416
1417		cs_iface->input->heap_start = new_chunk_va;
1418		cs_iface->input->heap_end = new_chunk_va;
1419		panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack, CS_TILER_OOM);
1420		panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, BIT(cs_id));
1421		panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
1422	}
1423	mutex_unlock(&sched->lock);
1424
1425	/* We allocated a chunck, but couldn't link it to the heap
1426	 * context because the group was scheduled out while we were
1427	 * allocating memory. We need to return this chunk to the heap.
1428	 */
1429	if (unlikely(csg_id < 0 && new_chunk_va))
1430		panthor_heap_return_chunk(heaps, heap_address, new_chunk_va);
1431
1432	ret = 0;
1433
1434out_put_heap_pool:
1435	panthor_heap_pool_put(heaps);
1436	return ret;
1437}
1438
1439static void group_tiler_oom_work(struct work_struct *work)
1440{
1441	struct panthor_group *group =
1442		container_of(work, struct panthor_group, tiler_oom_work);
1443	u32 tiler_oom = atomic_xchg(&group->tiler_oom, 0);
1444
1445	while (tiler_oom) {
1446		u32 cs_id = ffs(tiler_oom) - 1;
1447
1448		group_process_tiler_oom(group, cs_id);
1449		tiler_oom &= ~BIT(cs_id);
1450	}
1451
1452	group_put(group);
1453}
1454
1455static void
1456cs_slot_process_tiler_oom_event_locked(struct panthor_device *ptdev,
1457				       u32 csg_id, u32 cs_id)
1458{
1459	struct panthor_scheduler *sched = ptdev->scheduler;
1460	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1461	struct panthor_group *group = csg_slot->group;
1462
1463	lockdep_assert_held(&sched->lock);
1464
1465	if (drm_WARN_ON(&ptdev->base, !group))
1466		return;
1467
1468	atomic_or(BIT(cs_id), &group->tiler_oom);
1469
1470	/* We don't use group_queue_work() here because we want to queue the
1471	 * work item to the heap_alloc_wq.
1472	 */
1473	group_get(group);
1474	if (!queue_work(sched->heap_alloc_wq, &group->tiler_oom_work))
1475		group_put(group);
1476}
1477
1478static bool cs_slot_process_irq_locked(struct panthor_device *ptdev,
1479				       u32 csg_id, u32 cs_id)
1480{
1481	struct panthor_fw_cs_iface *cs_iface;
1482	u32 req, ack, events;
1483
1484	lockdep_assert_held(&ptdev->scheduler->lock);
1485
1486	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1487	req = cs_iface->input->req;
1488	ack = cs_iface->output->ack;
1489	events = (req ^ ack) & CS_EVT_MASK;
1490
1491	if (events & CS_FATAL)
1492		cs_slot_process_fatal_event_locked(ptdev, csg_id, cs_id);
1493
1494	if (events & CS_FAULT)
1495		cs_slot_process_fault_event_locked(ptdev, csg_id, cs_id);
1496
1497	if (events & CS_TILER_OOM)
1498		cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id);
1499
1500	/* We don't acknowledge the TILER_OOM event since its handling is
1501	 * deferred to a separate work.
1502	 */
1503	panthor_fw_update_reqs(cs_iface, req, ack, CS_FATAL | CS_FAULT);
1504
1505	return (events & (CS_FAULT | CS_TILER_OOM)) != 0;
1506}
1507
1508static void csg_slot_sync_idle_state_locked(struct panthor_device *ptdev, u32 csg_id)
1509{
1510	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1511	struct panthor_fw_csg_iface *csg_iface;
1512
1513	lockdep_assert_held(&ptdev->scheduler->lock);
1514
1515	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1516	csg_slot->idle = csg_iface->output->status_state & CSG_STATUS_STATE_IS_IDLE;
1517}
1518
1519static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32 csg_id)
1520{
1521	struct panthor_scheduler *sched = ptdev->scheduler;
1522
1523	lockdep_assert_held(&sched->lock);
1524
1525	sched->might_have_idle_groups = true;
1526
1527	/* Schedule a tick so we can evict idle groups and schedule non-idle
1528	 * ones. This will also update runtime PM and devfreq busy/idle states,
1529	 * so the device can lower its frequency or get suspended.
1530	 */
1531	sched_queue_delayed_work(sched, tick, 0);
1532}
1533
1534static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
1535					u32 csg_id)
1536{
1537	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1538	struct panthor_group *group = csg_slot->group;
1539
1540	lockdep_assert_held(&ptdev->scheduler->lock);
1541
1542	if (group)
1543		group_queue_work(group, sync_upd);
1544
1545	sched_queue_work(ptdev->scheduler, sync_upd);
1546}
1547
1548static void
1549csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 csg_id)
1550{
1551	struct panthor_scheduler *sched = ptdev->scheduler;
1552	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1553	struct panthor_group *group = csg_slot->group;
1554
1555	lockdep_assert_held(&sched->lock);
1556
1557	drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id);
1558
1559	group = csg_slot->group;
1560	if (!drm_WARN_ON(&ptdev->base, !group))
1561		group->timedout = true;
1562
1563	sched_queue_delayed_work(sched, tick, 0);
1564}
1565
1566static void sched_process_csg_irq_locked(struct panthor_device *ptdev, u32 csg_id)
1567{
1568	u32 req, ack, cs_irq_req, cs_irq_ack, cs_irqs, csg_events;
1569	struct panthor_fw_csg_iface *csg_iface;
1570	u32 ring_cs_db_mask = 0;
1571
1572	lockdep_assert_held(&ptdev->scheduler->lock);
1573
1574	if (drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
1575		return;
1576
1577	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1578	req = READ_ONCE(csg_iface->input->req);
1579	ack = READ_ONCE(csg_iface->output->ack);
1580	cs_irq_req = READ_ONCE(csg_iface->output->cs_irq_req);
1581	cs_irq_ack = READ_ONCE(csg_iface->input->cs_irq_ack);
1582	csg_events = (req ^ ack) & CSG_EVT_MASK;
1583
1584	/* There may not be any pending CSG/CS interrupts to process */
1585	if (req == ack && cs_irq_req == cs_irq_ack)
1586		return;
1587
1588	/* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
1589	 * examining the CS_ACK & CS_REQ bits. This would ensure that Host
1590	 * doesn't miss an interrupt for the CS in the race scenario where
1591	 * whilst Host is servicing an interrupt for the CS, firmware sends
1592	 * another interrupt for that CS.
1593	 */
1594	csg_iface->input->cs_irq_ack = cs_irq_req;
1595
1596	panthor_fw_update_reqs(csg_iface, req, ack,
1597			       CSG_SYNC_UPDATE |
1598			       CSG_IDLE |
1599			       CSG_PROGRESS_TIMER_EVENT);
1600
1601	if (csg_events & CSG_IDLE)
1602		csg_slot_process_idle_event_locked(ptdev, csg_id);
1603
1604	if (csg_events & CSG_PROGRESS_TIMER_EVENT)
1605		csg_slot_process_progress_timer_event_locked(ptdev, csg_id);
1606
1607	cs_irqs = cs_irq_req ^ cs_irq_ack;
1608	while (cs_irqs) {
1609		u32 cs_id = ffs(cs_irqs) - 1;
1610
1611		if (cs_slot_process_irq_locked(ptdev, csg_id, cs_id))
1612			ring_cs_db_mask |= BIT(cs_id);
1613
1614		cs_irqs &= ~BIT(cs_id);
1615	}
1616
1617	if (csg_events & CSG_SYNC_UPDATE)
1618		csg_slot_sync_update_locked(ptdev, csg_id);
1619
1620	if (ring_cs_db_mask)
1621		panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, ring_cs_db_mask);
1622
1623	panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
1624}
1625
1626static void sched_process_idle_event_locked(struct panthor_device *ptdev)
1627{
1628	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1629
1630	lockdep_assert_held(&ptdev->scheduler->lock);
1631
1632	/* Acknowledge the idle event and schedule a tick. */
1633	panthor_fw_update_reqs(glb_iface, req, glb_iface->output->ack, GLB_IDLE);
1634	sched_queue_delayed_work(ptdev->scheduler, tick, 0);
1635}
1636
1637/**
1638 * sched_process_global_irq_locked() - Process the scheduling part of a global IRQ
1639 * @ptdev: Device.
1640 */
1641static void sched_process_global_irq_locked(struct panthor_device *ptdev)
1642{
1643	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1644	u32 req, ack, evts;
1645
1646	lockdep_assert_held(&ptdev->scheduler->lock);
1647
1648	req = READ_ONCE(glb_iface->input->req);
1649	ack = READ_ONCE(glb_iface->output->ack);
1650	evts = (req ^ ack) & GLB_EVT_MASK;
1651
1652	if (evts & GLB_IDLE)
1653		sched_process_idle_event_locked(ptdev);
1654}
1655
1656static void process_fw_events_work(struct work_struct *work)
1657{
1658	struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
1659						      fw_events_work);
1660	u32 events = atomic_xchg(&sched->fw_events, 0);
1661	struct panthor_device *ptdev = sched->ptdev;
1662
1663	mutex_lock(&sched->lock);
1664
1665	if (events & JOB_INT_GLOBAL_IF) {
1666		sched_process_global_irq_locked(ptdev);
1667		events &= ~JOB_INT_GLOBAL_IF;
1668	}
1669
1670	while (events) {
1671		u32 csg_id = ffs(events) - 1;
1672
1673		sched_process_csg_irq_locked(ptdev, csg_id);
1674		events &= ~BIT(csg_id);
1675	}
1676
1677	mutex_unlock(&sched->lock);
1678}
1679
1680/**
1681 * panthor_sched_report_fw_events() - Report FW events to the scheduler.
1682 */
1683void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events)
1684{
1685	if (!ptdev->scheduler)
1686		return;
1687
1688	atomic_or(events, &ptdev->scheduler->fw_events);
1689	sched_queue_work(ptdev->scheduler, fw_events);
1690}
1691
1692static const char *fence_get_driver_name(struct dma_fence *fence)
1693{
1694	return "panthor";
1695}
1696
1697static const char *queue_fence_get_timeline_name(struct dma_fence *fence)
1698{
1699	return "queue-fence";
1700}
1701
1702static const struct dma_fence_ops panthor_queue_fence_ops = {
1703	.get_driver_name = fence_get_driver_name,
1704	.get_timeline_name = queue_fence_get_timeline_name,
1705};
1706
1707struct panthor_csg_slots_upd_ctx {
1708	u32 update_mask;
1709	u32 timedout_mask;
1710	struct {
1711		u32 value;
1712		u32 mask;
1713	} requests[MAX_CSGS];
1714};
1715
1716static void csgs_upd_ctx_init(struct panthor_csg_slots_upd_ctx *ctx)
1717{
1718	memset(ctx, 0, sizeof(*ctx));
1719}
1720
1721static void csgs_upd_ctx_queue_reqs(struct panthor_device *ptdev,
1722				    struct panthor_csg_slots_upd_ctx *ctx,
1723				    u32 csg_id, u32 value, u32 mask)
1724{
1725	if (drm_WARN_ON(&ptdev->base, !mask) ||
1726	    drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
1727		return;
1728
1729	ctx->requests[csg_id].value = (ctx->requests[csg_id].value & ~mask) | (value & mask);
1730	ctx->requests[csg_id].mask |= mask;
1731	ctx->update_mask |= BIT(csg_id);
1732}
1733
1734static int csgs_upd_ctx_apply_locked(struct panthor_device *ptdev,
1735				     struct panthor_csg_slots_upd_ctx *ctx)
1736{
1737	struct panthor_scheduler *sched = ptdev->scheduler;
1738	u32 update_slots = ctx->update_mask;
1739
1740	lockdep_assert_held(&sched->lock);
1741
1742	if (!ctx->update_mask)
1743		return 0;
1744
1745	while (update_slots) {
1746		struct panthor_fw_csg_iface *csg_iface;
1747		u32 csg_id = ffs(update_slots) - 1;
1748
1749		update_slots &= ~BIT(csg_id);
1750		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1751		panthor_fw_update_reqs(csg_iface, req,
1752				       ctx->requests[csg_id].value,
1753				       ctx->requests[csg_id].mask);
1754	}
1755
1756	panthor_fw_ring_csg_doorbells(ptdev, ctx->update_mask);
1757
1758	update_slots = ctx->update_mask;
1759	while (update_slots) {
1760		struct panthor_fw_csg_iface *csg_iface;
1761		u32 csg_id = ffs(update_slots) - 1;
1762		u32 req_mask = ctx->requests[csg_id].mask, acked;
1763		int ret;
1764
1765		update_slots &= ~BIT(csg_id);
1766		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1767
1768		ret = panthor_fw_csg_wait_acks(ptdev, csg_id, req_mask, &acked, 100);
1769
1770		if (acked & CSG_ENDPOINT_CONFIG)
1771			csg_slot_sync_priority_locked(ptdev, csg_id);
1772
1773		if (acked & CSG_STATE_MASK)
1774			csg_slot_sync_state_locked(ptdev, csg_id);
1775
1776		if (acked & CSG_STATUS_UPDATE) {
1777			csg_slot_sync_queues_state_locked(ptdev, csg_id);
1778			csg_slot_sync_idle_state_locked(ptdev, csg_id);
1779		}
1780
1781		if (ret && acked != req_mask &&
1782		    ((csg_iface->input->req ^ csg_iface->output->ack) & req_mask) != 0) {
1783			drm_err(&ptdev->base, "CSG %d update request timedout", csg_id);
1784			ctx->timedout_mask |= BIT(csg_id);
1785		}
1786	}
1787
1788	if (ctx->timedout_mask)
1789		return -ETIMEDOUT;
1790
1791	return 0;
1792}
1793
1794struct panthor_sched_tick_ctx {
1795	struct list_head old_groups[PANTHOR_CSG_PRIORITY_COUNT];
1796	struct list_head groups[PANTHOR_CSG_PRIORITY_COUNT];
1797	u32 idle_group_count;
1798	u32 group_count;
1799	enum panthor_csg_priority min_priority;
1800	struct panthor_vm *vms[MAX_CS_PER_CSG];
1801	u32 as_count;
1802	bool immediate_tick;
1803	u32 csg_upd_failed_mask;
1804};
1805
1806static bool
1807tick_ctx_is_full(const struct panthor_scheduler *sched,
1808		 const struct panthor_sched_tick_ctx *ctx)
1809{
1810	return ctx->group_count == sched->csg_slot_count;
1811}
1812
1813static bool
1814group_is_idle(struct panthor_group *group)
1815{
1816	struct panthor_device *ptdev = group->ptdev;
1817	u32 inactive_queues;
1818
1819	if (group->csg_id >= 0)
1820		return ptdev->scheduler->csg_slots[group->csg_id].idle;
1821
1822	inactive_queues = group->idle_queues | group->blocked_queues;
1823	return hweight32(inactive_queues) == group->queue_count;
1824}
1825
1826static bool
1827group_can_run(struct panthor_group *group)
1828{
1829	return group->state != PANTHOR_CS_GROUP_TERMINATED &&
1830	       group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE &&
1831	       !group->destroyed && group->fatal_queues == 0 &&
1832	       !group->timedout;
1833}
1834
1835static void
1836tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched,
1837			       struct panthor_sched_tick_ctx *ctx,
1838			       struct list_head *queue,
1839			       bool skip_idle_groups,
1840			       bool owned_by_tick_ctx)
1841{
1842	struct panthor_group *group, *tmp;
1843
1844	if (tick_ctx_is_full(sched, ctx))
1845		return;
1846
1847	list_for_each_entry_safe(group, tmp, queue, run_node) {
1848		u32 i;
1849
1850		if (!group_can_run(group))
1851			continue;
1852
1853		if (skip_idle_groups && group_is_idle(group))
1854			continue;
1855
1856		for (i = 0; i < ctx->as_count; i++) {
1857			if (ctx->vms[i] == group->vm)
1858				break;
1859		}
1860
1861		if (i == ctx->as_count && ctx->as_count == sched->as_slot_count)
1862			continue;
1863
1864		if (!owned_by_tick_ctx)
1865			group_get(group);
1866
1867		list_move_tail(&group->run_node, &ctx->groups[group->priority]);
1868		ctx->group_count++;
1869		if (group_is_idle(group))
1870			ctx->idle_group_count++;
1871
1872		if (i == ctx->as_count)
1873			ctx->vms[ctx->as_count++] = group->vm;
1874
1875		if (ctx->min_priority > group->priority)
1876			ctx->min_priority = group->priority;
1877
1878		if (tick_ctx_is_full(sched, ctx))
1879			return;
1880	}
1881}
1882
1883static void
1884tick_ctx_insert_old_group(struct panthor_scheduler *sched,
1885			  struct panthor_sched_tick_ctx *ctx,
1886			  struct panthor_group *group,
1887			  bool full_tick)
1888{
1889	struct panthor_csg_slot *csg_slot = &sched->csg_slots[group->csg_id];
1890	struct panthor_group *other_group;
1891
1892	if (!full_tick) {
1893		list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
1894		return;
1895	}
1896
1897	/* Rotate to make sure groups with lower CSG slot
1898	 * priorities have a chance to get a higher CSG slot
1899	 * priority next time they get picked. This priority
1900	 * has an impact on resource request ordering, so it's
1901	 * important to make sure we don't let one group starve
1902	 * all other groups with the same group priority.
1903	 */
1904	list_for_each_entry(other_group,
1905			    &ctx->old_groups[csg_slot->group->priority],
1906			    run_node) {
1907		struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id];
1908
1909		if (other_csg_slot->priority > csg_slot->priority) {
1910			list_add_tail(&csg_slot->group->run_node, &other_group->run_node);
1911			return;
1912		}
1913	}
1914
1915	list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
1916}
1917
1918static void
1919tick_ctx_init(struct panthor_scheduler *sched,
1920	      struct panthor_sched_tick_ctx *ctx,
1921	      bool full_tick)
1922{
1923	struct panthor_device *ptdev = sched->ptdev;
1924	struct panthor_csg_slots_upd_ctx upd_ctx;
1925	int ret;
1926	u32 i;
1927
1928	memset(ctx, 0, sizeof(*ctx));
1929	csgs_upd_ctx_init(&upd_ctx);
1930
1931	ctx->min_priority = PANTHOR_CSG_PRIORITY_COUNT;
1932	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
1933		INIT_LIST_HEAD(&ctx->groups[i]);
1934		INIT_LIST_HEAD(&ctx->old_groups[i]);
1935	}
1936
1937	for (i = 0; i < sched->csg_slot_count; i++) {
1938		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
1939		struct panthor_group *group = csg_slot->group;
1940		struct panthor_fw_csg_iface *csg_iface;
1941
1942		if (!group)
1943			continue;
1944
1945		csg_iface = panthor_fw_get_csg_iface(ptdev, i);
1946		group_get(group);
1947
1948		/* If there was unhandled faults on the VM, force processing of
1949		 * CSG IRQs, so we can flag the faulty queue.
1950		 */
1951		if (panthor_vm_has_unhandled_faults(group->vm)) {
1952			sched_process_csg_irq_locked(ptdev, i);
1953
1954			/* No fatal fault reported, flag all queues as faulty. */
1955			if (!group->fatal_queues)
1956				group->fatal_queues |= GENMASK(group->queue_count - 1, 0);
1957		}
1958
1959		tick_ctx_insert_old_group(sched, ctx, group, full_tick);
1960		csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
1961					csg_iface->output->ack ^ CSG_STATUS_UPDATE,
1962					CSG_STATUS_UPDATE);
1963	}
1964
1965	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
1966	if (ret) {
1967		panthor_device_schedule_reset(ptdev);
1968		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
1969	}
1970}
1971
1972#define NUM_INSTRS_PER_SLOT		16
1973
1974static void
1975group_term_post_processing(struct panthor_group *group)
1976{
1977	struct panthor_job *job, *tmp;
1978	LIST_HEAD(faulty_jobs);
1979	bool cookie;
1980	u32 i = 0;
1981
1982	if (drm_WARN_ON(&group->ptdev->base, group_can_run(group)))
1983		return;
1984
1985	cookie = dma_fence_begin_signalling();
1986	for (i = 0; i < group->queue_count; i++) {
1987		struct panthor_queue *queue = group->queues[i];
1988		struct panthor_syncobj_64b *syncobj;
1989		int err;
1990
1991		if (group->fatal_queues & BIT(i))
1992			err = -EINVAL;
1993		else if (group->timedout)
1994			err = -ETIMEDOUT;
1995		else
1996			err = -ECANCELED;
1997
1998		if (!queue)
1999			continue;
2000
2001		spin_lock(&queue->fence_ctx.lock);
2002		list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) {
2003			list_move_tail(&job->node, &faulty_jobs);
2004			dma_fence_set_error(job->done_fence, err);
2005			dma_fence_signal_locked(job->done_fence);
2006		}
2007		spin_unlock(&queue->fence_ctx.lock);
2008
2009		/* Manually update the syncobj seqno to unblock waiters. */
2010		syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
2011		syncobj->status = ~0;
2012		syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
2013		sched_queue_work(group->ptdev->scheduler, sync_upd);
2014	}
2015	dma_fence_end_signalling(cookie);
2016
2017	list_for_each_entry_safe(job, tmp, &faulty_jobs, node) {
2018		list_del_init(&job->node);
2019		panthor_job_put(&job->base);
2020	}
2021}
2022
2023static void group_term_work(struct work_struct *work)
2024{
2025	struct panthor_group *group =
2026		container_of(work, struct panthor_group, term_work);
2027
2028	group_term_post_processing(group);
2029	group_put(group);
2030}
2031
2032static void
2033tick_ctx_cleanup(struct panthor_scheduler *sched,
2034		 struct panthor_sched_tick_ctx *ctx)
2035{
2036	struct panthor_group *group, *tmp;
2037	u32 i;
2038
2039	for (i = 0; i < ARRAY_SIZE(ctx->old_groups); i++) {
2040		list_for_each_entry_safe(group, tmp, &ctx->old_groups[i], run_node) {
2041			/* If everything went fine, we should only have groups
2042			 * to be terminated in the old_groups lists.
2043			 */
2044			drm_WARN_ON(&group->ptdev->base, !ctx->csg_upd_failed_mask &&
2045				    group_can_run(group));
2046
2047			if (!group_can_run(group)) {
2048				list_del_init(&group->run_node);
2049				list_del_init(&group->wait_node);
2050				group_queue_work(group, term);
2051			} else if (group->csg_id >= 0) {
2052				list_del_init(&group->run_node);
2053			} else {
2054				list_move(&group->run_node,
2055					  group_is_idle(group) ?
2056					  &sched->groups.idle[group->priority] :
2057					  &sched->groups.runnable[group->priority]);
2058			}
2059			group_put(group);
2060		}
2061	}
2062
2063	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
2064		/* If everything went fine, the groups to schedule lists should
2065		 * be empty.
2066		 */
2067		drm_WARN_ON(&group->ptdev->base,
2068			    !ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i]));
2069
2070		list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) {
2071			if (group->csg_id >= 0) {
2072				list_del_init(&group->run_node);
2073			} else {
2074				list_move(&group->run_node,
2075					  group_is_idle(group) ?
2076					  &sched->groups.idle[group->priority] :
2077					  &sched->groups.runnable[group->priority]);
2078			}
2079			group_put(group);
2080		}
2081	}
2082}
2083
2084static void
2085tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx)
2086{
2087	struct panthor_group *group, *tmp;
2088	struct panthor_device *ptdev = sched->ptdev;
2089	struct panthor_csg_slot *csg_slot;
2090	int prio, new_csg_prio = MAX_CSG_PRIO, i;
2091	u32 free_csg_slots = 0;
2092	struct panthor_csg_slots_upd_ctx upd_ctx;
2093	int ret;
2094
2095	csgs_upd_ctx_init(&upd_ctx);
2096
2097	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2098		/* Suspend or terminate evicted groups. */
2099		list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
2100			bool term = !group_can_run(group);
2101			int csg_id = group->csg_id;
2102
2103			if (drm_WARN_ON(&ptdev->base, csg_id < 0))
2104				continue;
2105
2106			csg_slot = &sched->csg_slots[csg_id];
2107			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2108						term ? CSG_STATE_TERMINATE : CSG_STATE_SUSPEND,
2109						CSG_STATE_MASK);
2110		}
2111
2112		/* Update priorities on already running groups. */
2113		list_for_each_entry(group, &ctx->groups[prio], run_node) {
2114			struct panthor_fw_csg_iface *csg_iface;
2115			int csg_id = group->csg_id;
2116
2117			if (csg_id < 0) {
2118				new_csg_prio--;
2119				continue;
2120			}
2121
2122			csg_slot = &sched->csg_slots[csg_id];
2123			csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
2124			if (csg_slot->priority == new_csg_prio) {
2125				new_csg_prio--;
2126				continue;
2127			}
2128
2129			panthor_fw_update_reqs(csg_iface, endpoint_req,
2130					       CSG_EP_REQ_PRIORITY(new_csg_prio),
2131					       CSG_EP_REQ_PRIORITY_MASK);
2132			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2133						csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
2134						CSG_ENDPOINT_CONFIG);
2135			new_csg_prio--;
2136		}
2137	}
2138
2139	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2140	if (ret) {
2141		panthor_device_schedule_reset(ptdev);
2142		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
2143		return;
2144	}
2145
2146	/* Unbind evicted groups. */
2147	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2148		list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
2149			/* This group is gone. Process interrupts to clear
2150			 * any pending interrupts before we start the new
2151			 * group.
2152			 */
2153			if (group->csg_id >= 0)
2154				sched_process_csg_irq_locked(ptdev, group->csg_id);
2155
2156			group_unbind_locked(group);
2157		}
2158	}
2159
2160	for (i = 0; i < sched->csg_slot_count; i++) {
2161		if (!sched->csg_slots[i].group)
2162			free_csg_slots |= BIT(i);
2163	}
2164
2165	csgs_upd_ctx_init(&upd_ctx);
2166	new_csg_prio = MAX_CSG_PRIO;
2167
2168	/* Start new groups. */
2169	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2170		list_for_each_entry(group, &ctx->groups[prio], run_node) {
2171			int csg_id = group->csg_id;
2172			struct panthor_fw_csg_iface *csg_iface;
2173
2174			if (csg_id >= 0) {
2175				new_csg_prio--;
2176				continue;
2177			}
2178
2179			csg_id = ffs(free_csg_slots) - 1;
2180			if (drm_WARN_ON(&ptdev->base, csg_id < 0))
2181				break;
2182
2183			csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
2184			csg_slot = &sched->csg_slots[csg_id];
2185			group_bind_locked(group, csg_id);
2186			csg_slot_prog_locked(ptdev, csg_id, new_csg_prio--);
2187			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2188						group->state == PANTHOR_CS_GROUP_SUSPENDED ?
2189						CSG_STATE_RESUME : CSG_STATE_START,
2190						CSG_STATE_MASK);
2191			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2192						csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
2193						CSG_ENDPOINT_CONFIG);
2194			free_csg_slots &= ~BIT(csg_id);
2195		}
2196	}
2197
2198	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2199	if (ret) {
2200		panthor_device_schedule_reset(ptdev);
2201		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
2202		return;
2203	}
2204
2205	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2206		list_for_each_entry_safe(group, tmp, &ctx->groups[prio], run_node) {
2207			list_del_init(&group->run_node);
2208
2209			/* If the group has been destroyed while we were
2210			 * scheduling, ask for an immediate tick to
2211			 * re-evaluate as soon as possible and get rid of
2212			 * this dangling group.
2213			 */
2214			if (group->destroyed)
2215				ctx->immediate_tick = true;
2216			group_put(group);
2217		}
2218
2219		/* Return evicted groups to the idle or run queues. Groups
2220		 * that can no longer be run (because they've been destroyed
2221		 * or experienced an unrecoverable error) will be scheduled
2222		 * for destruction in tick_ctx_cleanup().
2223		 */
2224		list_for_each_entry_safe(group, tmp, &ctx->old_groups[prio], run_node) {
2225			if (!group_can_run(group))
2226				continue;
2227
2228			if (group_is_idle(group))
2229				list_move_tail(&group->run_node, &sched->groups.idle[prio]);
2230			else
2231				list_move_tail(&group->run_node, &sched->groups.runnable[prio]);
2232			group_put(group);
2233		}
2234	}
2235
2236	sched->used_csg_slot_count = ctx->group_count;
2237	sched->might_have_idle_groups = ctx->idle_group_count > 0;
2238}
2239
2240static u64
2241tick_ctx_update_resched_target(struct panthor_scheduler *sched,
2242			       const struct panthor_sched_tick_ctx *ctx)
2243{
2244	/* We had space left, no need to reschedule until some external event happens. */
2245	if (!tick_ctx_is_full(sched, ctx))
2246		goto no_tick;
2247
2248	/* If idle groups were scheduled, no need to wake up until some external
2249	 * event happens (group unblocked, new job submitted, ...).
2250	 */
2251	if (ctx->idle_group_count)
2252		goto no_tick;
2253
2254	if (drm_WARN_ON(&sched->ptdev->base, ctx->min_priority >= PANTHOR_CSG_PRIORITY_COUNT))
2255		goto no_tick;
2256
2257	/* If there are groups of the same priority waiting, we need to
2258	 * keep the scheduler ticking, otherwise, we'll just wait for
2259	 * new groups with higher priority to be queued.
2260	 */
2261	if (!list_empty(&sched->groups.runnable[ctx->min_priority])) {
2262		u64 resched_target = sched->last_tick + sched->tick_period;
2263
2264		if (time_before64(sched->resched_target, sched->last_tick) ||
2265		    time_before64(resched_target, sched->resched_target))
2266			sched->resched_target = resched_target;
2267
2268		return sched->resched_target - sched->last_tick;
2269	}
2270
2271no_tick:
2272	sched->resched_target = U64_MAX;
2273	return U64_MAX;
2274}
2275
2276static void tick_work(struct work_struct *work)
2277{
2278	struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
2279						      tick_work.work);
2280	struct panthor_device *ptdev = sched->ptdev;
2281	struct panthor_sched_tick_ctx ctx;
2282	u64 remaining_jiffies = 0, resched_delay;
2283	u64 now = get_jiffies_64();
2284	int prio, ret, cookie;
2285
2286	if (!drm_dev_enter(&ptdev->base, &cookie))
2287		return;
2288
2289	ret = pm_runtime_resume_and_get(ptdev->base.dev);
2290	if (drm_WARN_ON(&ptdev->base, ret))
2291		goto out_dev_exit;
2292
2293	if (time_before64(now, sched->resched_target))
2294		remaining_jiffies = sched->resched_target - now;
2295
2296	mutex_lock(&sched->lock);
2297	if (panthor_device_reset_is_pending(sched->ptdev))
2298		goto out_unlock;
2299
2300	tick_ctx_init(sched, &ctx, remaining_jiffies != 0);
2301	if (ctx.csg_upd_failed_mask)
2302		goto out_cleanup_ctx;
2303
2304	if (remaining_jiffies) {
2305		/* Scheduling forced in the middle of a tick. Only RT groups
2306		 * can preempt non-RT ones. Currently running RT groups can't be
2307		 * preempted.
2308		 */
2309		for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
2310		     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
2311		     prio--) {
2312			tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio],
2313						       true, true);
2314			if (prio == PANTHOR_CSG_PRIORITY_RT) {
2315				tick_ctx_pick_groups_from_list(sched, &ctx,
2316							       &sched->groups.runnable[prio],
2317							       true, false);
2318			}
2319		}
2320	}
2321
2322	/* First pick non-idle groups */
2323	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
2324	     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
2325	     prio--) {
2326		tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.runnable[prio],
2327					       true, false);
2328		tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true);
2329	}
2330
2331	/* If we have free CSG slots left, pick idle groups */
2332	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
2333	     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
2334	     prio--) {
2335		/* Check the old_group queue first to avoid reprogramming the slots */
2336		tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], false, true);
2337		tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.idle[prio],
2338					       false, false);
2339	}
2340
2341	tick_ctx_apply(sched, &ctx);
2342	if (ctx.csg_upd_failed_mask)
2343		goto out_cleanup_ctx;
2344
2345	if (ctx.idle_group_count == ctx.group_count) {
2346		panthor_devfreq_record_idle(sched->ptdev);
2347		if (sched->pm.has_ref) {
2348			pm_runtime_put_autosuspend(ptdev->base.dev);
2349			sched->pm.has_ref = false;
2350		}
2351	} else {
2352		panthor_devfreq_record_busy(sched->ptdev);
2353		if (!sched->pm.has_ref) {
2354			pm_runtime_get(ptdev->base.dev);
2355			sched->pm.has_ref = true;
2356		}
2357	}
2358
2359	sched->last_tick = now;
2360	resched_delay = tick_ctx_update_resched_target(sched, &ctx);
2361	if (ctx.immediate_tick)
2362		resched_delay = 0;
2363
2364	if (resched_delay != U64_MAX)
2365		sched_queue_delayed_work(sched, tick, resched_delay);
2366
2367out_cleanup_ctx:
2368	tick_ctx_cleanup(sched, &ctx);
2369
2370out_unlock:
2371	mutex_unlock(&sched->lock);
2372	pm_runtime_mark_last_busy(ptdev->base.dev);
2373	pm_runtime_put_autosuspend(ptdev->base.dev);
2374
2375out_dev_exit:
2376	drm_dev_exit(cookie);
2377}
2378
2379static int panthor_queue_eval_syncwait(struct panthor_group *group, u8 queue_idx)
2380{
2381	struct panthor_queue *queue = group->queues[queue_idx];
2382	union {
2383		struct panthor_syncobj_64b sync64;
2384		struct panthor_syncobj_32b sync32;
2385	} *syncobj;
2386	bool result;
2387	u64 value;
2388
2389	syncobj = panthor_queue_get_syncwait_obj(group, queue);
2390	if (!syncobj)
2391		return -EINVAL;
2392
2393	value = queue->syncwait.sync64 ?
2394		syncobj->sync64.seqno :
2395		syncobj->sync32.seqno;
2396
2397	if (queue->syncwait.gt)
2398		result = value > queue->syncwait.ref;
2399	else
2400		result = value <= queue->syncwait.ref;
2401
2402	if (result)
2403		panthor_queue_put_syncwait_obj(queue);
2404
2405	return result;
2406}
2407
2408static void sync_upd_work(struct work_struct *work)
2409{
2410	struct panthor_scheduler *sched = container_of(work,
2411						      struct panthor_scheduler,
2412						      sync_upd_work);
2413	struct panthor_group *group, *tmp;
2414	bool immediate_tick = false;
2415
2416	mutex_lock(&sched->lock);
2417	list_for_each_entry_safe(group, tmp, &sched->groups.waiting, wait_node) {
2418		u32 tested_queues = group->blocked_queues;
2419		u32 unblocked_queues = 0;
2420
2421		while (tested_queues) {
2422			u32 cs_id = ffs(tested_queues) - 1;
2423			int ret;
2424
2425			ret = panthor_queue_eval_syncwait(group, cs_id);
2426			drm_WARN_ON(&group->ptdev->base, ret < 0);
2427			if (ret)
2428				unblocked_queues |= BIT(cs_id);
2429
2430			tested_queues &= ~BIT(cs_id);
2431		}
2432
2433		if (unblocked_queues) {
2434			group->blocked_queues &= ~unblocked_queues;
2435
2436			if (group->csg_id < 0) {
2437				list_move(&group->run_node,
2438					  &sched->groups.runnable[group->priority]);
2439				if (group->priority == PANTHOR_CSG_PRIORITY_RT)
2440					immediate_tick = true;
2441			}
2442		}
2443
2444		if (!group->blocked_queues)
2445			list_del_init(&group->wait_node);
2446	}
2447	mutex_unlock(&sched->lock);
2448
2449	if (immediate_tick)
2450		sched_queue_delayed_work(sched, tick, 0);
2451}
2452
2453static void group_schedule_locked(struct panthor_group *group, u32 queue_mask)
2454{
2455	struct panthor_device *ptdev = group->ptdev;
2456	struct panthor_scheduler *sched = ptdev->scheduler;
2457	struct list_head *queue = &sched->groups.runnable[group->priority];
2458	u64 delay_jiffies = 0;
2459	bool was_idle;
2460	u64 now;
2461
2462	if (!group_can_run(group))
2463		return;
2464
2465	/* All updated queues are blocked, no need to wake up the scheduler. */
2466	if ((queue_mask & group->blocked_queues) == queue_mask)
2467		return;
2468
2469	was_idle = group_is_idle(group);
2470	group->idle_queues &= ~queue_mask;
2471
2472	/* Don't mess up with the lists if we're in a middle of a reset. */
2473	if (atomic_read(&sched->reset.in_progress))
2474		return;
2475
2476	if (was_idle && !group_is_idle(group))
2477		list_move_tail(&group->run_node, queue);
2478
2479	/* RT groups are preemptive. */
2480	if (group->priority == PANTHOR_CSG_PRIORITY_RT) {
2481		sched_queue_delayed_work(sched, tick, 0);
2482		return;
2483	}
2484
2485	/* Some groups might be idle, force an immediate tick to
2486	 * re-evaluate.
2487	 */
2488	if (sched->might_have_idle_groups) {
2489		sched_queue_delayed_work(sched, tick, 0);
2490		return;
2491	}
2492
2493	/* Scheduler is ticking, nothing to do. */
2494	if (sched->resched_target != U64_MAX) {
2495		/* If there are free slots, force immediating ticking. */
2496		if (sched->used_csg_slot_count < sched->csg_slot_count)
2497			sched_queue_delayed_work(sched, tick, 0);
2498
2499		return;
2500	}
2501
2502	/* Scheduler tick was off, recalculate the resched_target based on the
2503	 * last tick event, and queue the scheduler work.
2504	 */
2505	now = get_jiffies_64();
2506	sched->resched_target = sched->last_tick + sched->tick_period;
2507	if (sched->used_csg_slot_count == sched->csg_slot_count &&
2508	    time_before64(now, sched->resched_target))
2509		delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX);
2510
2511	sched_queue_delayed_work(sched, tick, delay_jiffies);
2512}
2513
2514static void queue_stop(struct panthor_queue *queue,
2515		       struct panthor_job *bad_job)
2516{
2517	drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
2518}
2519
2520static void queue_start(struct panthor_queue *queue)
2521{
2522	struct panthor_job *job;
2523
2524	/* Re-assign the parent fences. */
2525	list_for_each_entry(job, &queue->scheduler.pending_list, base.list)
2526		job->base.s_fence->parent = dma_fence_get(job->done_fence);
2527
2528	drm_sched_start(&queue->scheduler, true);
2529}
2530
2531static void panthor_group_stop(struct panthor_group *group)
2532{
2533	struct panthor_scheduler *sched = group->ptdev->scheduler;
2534
2535	lockdep_assert_held(&sched->reset.lock);
2536
2537	for (u32 i = 0; i < group->queue_count; i++)
2538		queue_stop(group->queues[i], NULL);
2539
2540	group_get(group);
2541	list_move_tail(&group->run_node, &sched->reset.stopped_groups);
2542}
2543
2544static void panthor_group_start(struct panthor_group *group)
2545{
2546	struct panthor_scheduler *sched = group->ptdev->scheduler;
2547
2548	lockdep_assert_held(&group->ptdev->scheduler->reset.lock);
2549
2550	for (u32 i = 0; i < group->queue_count; i++)
2551		queue_start(group->queues[i]);
2552
2553	if (group_can_run(group)) {
2554		list_move_tail(&group->run_node,
2555			       group_is_idle(group) ?
2556			       &sched->groups.idle[group->priority] :
2557			       &sched->groups.runnable[group->priority]);
2558	} else {
2559		list_del_init(&group->run_node);
2560		list_del_init(&group->wait_node);
2561		group_queue_work(group, term);
2562	}
2563
2564	group_put(group);
2565}
2566
2567static void panthor_sched_immediate_tick(struct panthor_device *ptdev)
2568{
2569	struct panthor_scheduler *sched = ptdev->scheduler;
2570
2571	sched_queue_delayed_work(sched, tick, 0);
2572}
2573
2574/**
2575 * panthor_sched_report_mmu_fault() - Report MMU faults to the scheduler.
2576 */
2577void panthor_sched_report_mmu_fault(struct panthor_device *ptdev)
2578{
2579	/* Force a tick to immediately kill faulty groups. */
2580	if (ptdev->scheduler)
2581		panthor_sched_immediate_tick(ptdev);
2582}
2583
2584void panthor_sched_resume(struct panthor_device *ptdev)
2585{
2586	/* Force a tick to re-evaluate after a resume. */
2587	panthor_sched_immediate_tick(ptdev);
2588}
2589
2590void panthor_sched_suspend(struct panthor_device *ptdev)
2591{
2592	struct panthor_scheduler *sched = ptdev->scheduler;
2593	struct panthor_csg_slots_upd_ctx upd_ctx;
2594	struct panthor_group *group;
2595	u32 suspended_slots;
2596	u32 i;
2597
2598	mutex_lock(&sched->lock);
2599	csgs_upd_ctx_init(&upd_ctx);
2600	for (i = 0; i < sched->csg_slot_count; i++) {
2601		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
2602
2603		if (csg_slot->group) {
2604			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
2605						group_can_run(csg_slot->group) ?
2606						CSG_STATE_SUSPEND : CSG_STATE_TERMINATE,
2607						CSG_STATE_MASK);
2608		}
2609	}
2610
2611	suspended_slots = upd_ctx.update_mask;
2612
2613	csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2614	suspended_slots &= ~upd_ctx.timedout_mask;
2615
2616	if (upd_ctx.timedout_mask) {
2617		u32 slot_mask = upd_ctx.timedout_mask;
2618
2619		drm_err(&ptdev->base, "CSG suspend failed, escalating to termination");
2620		csgs_upd_ctx_init(&upd_ctx);
2621		while (slot_mask) {
2622			u32 csg_id = ffs(slot_mask) - 1;
2623
2624			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2625						CSG_STATE_TERMINATE,
2626						CSG_STATE_MASK);
2627			slot_mask &= ~BIT(csg_id);
2628		}
2629
2630		csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2631
2632		slot_mask = upd_ctx.timedout_mask;
2633		while (slot_mask) {
2634			u32 csg_id = ffs(slot_mask) - 1;
2635			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
2636
2637			/* Terminate command timedout, but the soft-reset will
2638			 * automatically terminate all active groups, so let's
2639			 * force the state to halted here.
2640			 */
2641			if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED)
2642				csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
2643			slot_mask &= ~BIT(csg_id);
2644		}
2645	}
2646
2647	/* Flush L2 and LSC caches to make sure suspend state is up-to-date.
2648	 * If the flush fails, flag all queues for termination.
2649	 */
2650	if (suspended_slots) {
2651		bool flush_caches_failed = false;
2652		u32 slot_mask = suspended_slots;
2653
2654		if (panthor_gpu_flush_caches(ptdev, CACHE_CLEAN, CACHE_CLEAN, 0))
2655			flush_caches_failed = true;
2656
2657		while (slot_mask) {
2658			u32 csg_id = ffs(slot_mask) - 1;
2659			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
2660
2661			if (flush_caches_failed)
2662				csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
2663			else
2664				csg_slot_sync_update_locked(ptdev, csg_id);
2665
2666			slot_mask &= ~BIT(csg_id);
2667		}
2668	}
2669
2670	for (i = 0; i < sched->csg_slot_count; i++) {
2671		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
2672
2673		group = csg_slot->group;
2674		if (!group)
2675			continue;
2676
2677		group_get(group);
2678
2679		if (group->csg_id >= 0)
2680			sched_process_csg_irq_locked(ptdev, group->csg_id);
2681
2682		group_unbind_locked(group);
2683
2684		drm_WARN_ON(&group->ptdev->base, !list_empty(&group->run_node));
2685
2686		if (group_can_run(group)) {
2687			list_add(&group->run_node,
2688				 &sched->groups.idle[group->priority]);
2689		} else {
2690			/* We don't bother stopping the scheduler if the group is
2691			 * faulty, the group termination work will finish the job.
2692			 */
2693			list_del_init(&group->wait_node);
2694			group_queue_work(group, term);
2695		}
2696		group_put(group);
2697	}
2698	mutex_unlock(&sched->lock);
2699}
2700
2701void panthor_sched_pre_reset(struct panthor_device *ptdev)
2702{
2703	struct panthor_scheduler *sched = ptdev->scheduler;
2704	struct panthor_group *group, *group_tmp;
2705	u32 i;
2706
2707	mutex_lock(&sched->reset.lock);
2708	atomic_set(&sched->reset.in_progress, true);
2709
2710	/* Cancel all scheduler works. Once this is done, these works can't be
2711	 * scheduled again until the reset operation is complete.
2712	 */
2713	cancel_work_sync(&sched->sync_upd_work);
2714	cancel_delayed_work_sync(&sched->tick_work);
2715
2716	panthor_sched_suspend(ptdev);
2717
2718	/* Stop all groups that might still accept jobs, so we don't get passed
2719	 * new jobs while we're resetting.
2720	 */
2721	for (i = 0; i < ARRAY_SIZE(sched->groups.runnable); i++) {
2722		/* All groups should be in the idle lists. */
2723		drm_WARN_ON(&ptdev->base, !list_empty(&sched->groups.runnable[i]));
2724		list_for_each_entry_safe(group, group_tmp, &sched->groups.runnable[i], run_node)
2725			panthor_group_stop(group);
2726	}
2727
2728	for (i = 0; i < ARRAY_SIZE(sched->groups.idle); i++) {
2729		list_for_each_entry_safe(group, group_tmp, &sched->groups.idle[i], run_node)
2730			panthor_group_stop(group);
2731	}
2732
2733	mutex_unlock(&sched->reset.lock);
2734}
2735
2736void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed)
2737{
2738	struct panthor_scheduler *sched = ptdev->scheduler;
2739	struct panthor_group *group, *group_tmp;
2740
2741	mutex_lock(&sched->reset.lock);
2742
2743	list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) {
2744		/* Consider all previously running group as terminated if the
2745		 * reset failed.
2746		 */
2747		if (reset_failed)
2748			group->state = PANTHOR_CS_GROUP_TERMINATED;
2749
2750		panthor_group_start(group);
2751	}
2752
2753	/* We're done resetting the GPU, clear the reset.in_progress bit so we can
2754	 * kick the scheduler.
2755	 */
2756	atomic_set(&sched->reset.in_progress, false);
2757	mutex_unlock(&sched->reset.lock);
2758
2759	/* No need to queue a tick and update syncs if the reset failed. */
2760	if (!reset_failed) {
2761		sched_queue_delayed_work(sched, tick, 0);
2762		sched_queue_work(sched, sync_upd);
2763	}
2764}
2765
2766static void group_sync_upd_work(struct work_struct *work)
2767{
2768	struct panthor_group *group =
2769		container_of(work, struct panthor_group, sync_upd_work);
2770	struct panthor_job *job, *job_tmp;
2771	LIST_HEAD(done_jobs);
2772	u32 queue_idx;
2773	bool cookie;
2774
2775	cookie = dma_fence_begin_signalling();
2776	for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
2777		struct panthor_queue *queue = group->queues[queue_idx];
2778		struct panthor_syncobj_64b *syncobj;
2779
2780		if (!queue)
2781			continue;
2782
2783		syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
2784
2785		spin_lock(&queue->fence_ctx.lock);
2786		list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
2787			if (!job->call_info.size)
2788				continue;
2789
2790			if (syncobj->seqno < job->done_fence->seqno)
2791				break;
2792
2793			list_move_tail(&job->node, &done_jobs);
2794			dma_fence_signal_locked(job->done_fence);
2795		}
2796		spin_unlock(&queue->fence_ctx.lock);
2797	}
2798	dma_fence_end_signalling(cookie);
2799
2800	list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
2801		list_del_init(&job->node);
2802		panthor_job_put(&job->base);
2803	}
2804
2805	group_put(group);
2806}
2807
2808static struct dma_fence *
2809queue_run_job(struct drm_sched_job *sched_job)
2810{
2811	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
2812	struct panthor_group *group = job->group;
2813	struct panthor_queue *queue = group->queues[job->queue_idx];
2814	struct panthor_device *ptdev = group->ptdev;
2815	struct panthor_scheduler *sched = ptdev->scheduler;
2816	u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
2817	u32 ringbuf_insert = queue->iface.input->insert & (ringbuf_size - 1);
2818	u64 addr_reg = ptdev->csif_info.cs_reg_count -
2819		       ptdev->csif_info.unpreserved_cs_reg_count;
2820	u64 val_reg = addr_reg + 2;
2821	u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) +
2822			job->queue_idx * sizeof(struct panthor_syncobj_64b);
2823	u32 waitall_mask = GENMASK(sched->sb_slot_count - 1, 0);
2824	struct dma_fence *done_fence;
2825	int ret;
2826
2827	u64 call_instrs[NUM_INSTRS_PER_SLOT] = {
2828		/* MOV32 rX+2, cs.latest_flush */
2829		(2ull << 56) | (val_reg << 48) | job->call_info.latest_flush,
2830
2831		/* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */
2832		(36ull << 56) | (0ull << 48) | (val_reg << 40) | (0 << 16) | 0x233,
2833
2834		/* MOV48 rX:rX+1, cs.start */
2835		(1ull << 56) | (addr_reg << 48) | job->call_info.start,
2836
2837		/* MOV32 rX+2, cs.size */
2838		(2ull << 56) | (val_reg << 48) | job->call_info.size,
2839
2840		/* WAIT(0) => waits for FLUSH_CACHE2 instruction */
2841		(3ull << 56) | (1 << 16),
2842
2843		/* CALL rX:rX+1, rX+2 */
2844		(32ull << 56) | (addr_reg << 40) | (val_reg << 32),
2845
2846		/* MOV48 rX:rX+1, sync_addr */
2847		(1ull << 56) | (addr_reg << 48) | sync_addr,
2848
2849		/* MOV48 rX+2, #1 */
2850		(1ull << 56) | (val_reg << 48) | 1,
2851
2852		/* WAIT(all) */
2853		(3ull << 56) | (waitall_mask << 16),
2854
2855		/* SYNC_ADD64.system_scope.propage_err.nowait rX:rX+1, rX+2*/
2856		(51ull << 56) | (0ull << 48) | (addr_reg << 40) | (val_reg << 32) | (0 << 16) | 1,
2857
2858		/* ERROR_BARRIER, so we can recover from faults at job
2859		 * boundaries.
2860		 */
2861		(47ull << 56),
2862	};
2863
2864	/* Need to be cacheline aligned to please the prefetcher. */
2865	static_assert(sizeof(call_instrs) % 64 == 0,
2866		      "call_instrs is not aligned on a cacheline");
2867
2868	/* Stream size is zero, nothing to do => return a NULL fence and let
2869	 * drm_sched signal the parent.
2870	 */
2871	if (!job->call_info.size)
2872		return NULL;
2873
2874	ret = pm_runtime_resume_and_get(ptdev->base.dev);
2875	if (drm_WARN_ON(&ptdev->base, ret))
2876		return ERR_PTR(ret);
2877
2878	mutex_lock(&sched->lock);
2879	if (!group_can_run(group)) {
2880		done_fence = ERR_PTR(-ECANCELED);
2881		goto out_unlock;
2882	}
2883
2884	dma_fence_init(job->done_fence,
2885		       &panthor_queue_fence_ops,
2886		       &queue->fence_ctx.lock,
2887		       queue->fence_ctx.id,
2888		       atomic64_inc_return(&queue->fence_ctx.seqno));
2889
2890	memcpy(queue->ringbuf->kmap + ringbuf_insert,
2891	       call_instrs, sizeof(call_instrs));
2892
2893	panthor_job_get(&job->base);
2894	spin_lock(&queue->fence_ctx.lock);
2895	list_add_tail(&job->node, &queue->fence_ctx.in_flight_jobs);
2896	spin_unlock(&queue->fence_ctx.lock);
2897
2898	job->ringbuf.start = queue->iface.input->insert;
2899	job->ringbuf.end = job->ringbuf.start + sizeof(call_instrs);
2900
2901	/* Make sure the ring buffer is updated before the INSERT
2902	 * register.
2903	 */
2904	wmb();
2905
2906	queue->iface.input->extract = queue->iface.output->extract;
2907	queue->iface.input->insert = job->ringbuf.end;
2908
2909	if (group->csg_id < 0) {
2910		/* If the queue is blocked, we want to keep the timeout running, so we
2911		 * can detect unbounded waits and kill the group when that happens.
2912		 * Otherwise, we suspend the timeout so the time we spend waiting for
2913		 * a CSG slot is not counted.
2914		 */
2915		if (!(group->blocked_queues & BIT(job->queue_idx)) &&
2916		    !queue->timeout_suspended) {
2917			queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
2918			queue->timeout_suspended = true;
2919		}
2920
2921		group_schedule_locked(group, BIT(job->queue_idx));
2922	} else {
2923		gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
2924		if (!sched->pm.has_ref &&
2925		    !(group->blocked_queues & BIT(job->queue_idx))) {
2926			pm_runtime_get(ptdev->base.dev);
2927			sched->pm.has_ref = true;
2928		}
2929	}
2930
2931	done_fence = dma_fence_get(job->done_fence);
2932
2933out_unlock:
2934	mutex_unlock(&sched->lock);
2935	pm_runtime_mark_last_busy(ptdev->base.dev);
2936	pm_runtime_put_autosuspend(ptdev->base.dev);
2937
2938	return done_fence;
2939}
2940
2941static enum drm_gpu_sched_stat
2942queue_timedout_job(struct drm_sched_job *sched_job)
2943{
2944	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
2945	struct panthor_group *group = job->group;
2946	struct panthor_device *ptdev = group->ptdev;
2947	struct panthor_scheduler *sched = ptdev->scheduler;
2948	struct panthor_queue *queue = group->queues[job->queue_idx];
2949
2950	drm_warn(&ptdev->base, "job timeout\n");
2951
2952	drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress));
2953
2954	queue_stop(queue, job);
2955
2956	mutex_lock(&sched->lock);
2957	group->timedout = true;
2958	if (group->csg_id >= 0) {
2959		sched_queue_delayed_work(ptdev->scheduler, tick, 0);
2960	} else {
2961		/* Remove from the run queues, so the scheduler can't
2962		 * pick the group on the next tick.
2963		 */
2964		list_del_init(&group->run_node);
2965		list_del_init(&group->wait_node);
2966
2967		group_queue_work(group, term);
2968	}
2969	mutex_unlock(&sched->lock);
2970
2971	queue_start(queue);
2972
2973	return DRM_GPU_SCHED_STAT_NOMINAL;
2974}
2975
2976static void queue_free_job(struct drm_sched_job *sched_job)
2977{
2978	drm_sched_job_cleanup(sched_job);
2979	panthor_job_put(sched_job);
2980}
2981
2982static const struct drm_sched_backend_ops panthor_queue_sched_ops = {
2983	.run_job = queue_run_job,
2984	.timedout_job = queue_timedout_job,
2985	.free_job = queue_free_job,
2986};
2987
2988static struct panthor_queue *
2989group_create_queue(struct panthor_group *group,
2990		   const struct drm_panthor_queue_create *args)
2991{
2992	struct drm_gpu_scheduler *drm_sched;
2993	struct panthor_queue *queue;
2994	int ret;
2995
2996	if (args->pad[0] || args->pad[1] || args->pad[2])
2997		return ERR_PTR(-EINVAL);
2998
2999	if (args->ringbuf_size < SZ_4K || args->ringbuf_size > SZ_64K ||
3000	    !is_power_of_2(args->ringbuf_size))
3001		return ERR_PTR(-EINVAL);
3002
3003	if (args->priority > CSF_MAX_QUEUE_PRIO)
3004		return ERR_PTR(-EINVAL);
3005
3006	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
3007	if (!queue)
3008		return ERR_PTR(-ENOMEM);
3009
3010	queue->fence_ctx.id = dma_fence_context_alloc(1);
3011	spin_lock_init(&queue->fence_ctx.lock);
3012	INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs);
3013
3014	queue->priority = args->priority;
3015
3016	queue->ringbuf = panthor_kernel_bo_create(group->ptdev, group->vm,
3017						  args->ringbuf_size,
3018						  DRM_PANTHOR_BO_NO_MMAP,
3019						  DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
3020						  DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
3021						  PANTHOR_VM_KERNEL_AUTO_VA);
3022	if (IS_ERR(queue->ringbuf)) {
3023		ret = PTR_ERR(queue->ringbuf);
3024		goto err_free_queue;
3025	}
3026
3027	ret = panthor_kernel_bo_vmap(queue->ringbuf);
3028	if (ret)
3029		goto err_free_queue;
3030
3031	queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev,
3032							    &queue->iface.input,
3033							    &queue->iface.output,
3034							    &queue->iface.input_fw_va,
3035							    &queue->iface.output_fw_va);
3036	if (IS_ERR(queue->iface.mem)) {
3037		ret = PTR_ERR(queue->iface.mem);
3038		goto err_free_queue;
3039	}
3040
3041	ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops,
3042			     group->ptdev->scheduler->wq, 1,
3043			     args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)),
3044			     0, msecs_to_jiffies(JOB_TIMEOUT_MS),
3045			     group->ptdev->reset.wq,
3046			     NULL, "panthor-queue", group->ptdev->base.dev);
3047	if (ret)
3048		goto err_free_queue;
3049
3050	drm_sched = &queue->scheduler;
3051	ret = drm_sched_entity_init(&queue->entity, 0, &drm_sched, 1, NULL);
3052
3053	return queue;
3054
3055err_free_queue:
3056	group_free_queue(group, queue);
3057	return ERR_PTR(ret);
3058}
3059
3060#define MAX_GROUPS_PER_POOL		128
3061
3062int panthor_group_create(struct panthor_file *pfile,
3063			 const struct drm_panthor_group_create *group_args,
3064			 const struct drm_panthor_queue_create *queue_args)
3065{
3066	struct panthor_device *ptdev = pfile->ptdev;
3067	struct panthor_group_pool *gpool = pfile->groups;
3068	struct panthor_scheduler *sched = ptdev->scheduler;
3069	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
3070	struct panthor_group *group = NULL;
3071	u32 gid, i, suspend_size;
3072	int ret;
3073
3074	if (group_args->pad)
3075		return -EINVAL;
3076
3077	if (group_args->priority > PANTHOR_CSG_PRIORITY_HIGH)
3078		return -EINVAL;
3079
3080	if ((group_args->compute_core_mask & ~ptdev->gpu_info.shader_present) ||
3081	    (group_args->fragment_core_mask & ~ptdev->gpu_info.shader_present) ||
3082	    (group_args->tiler_core_mask & ~ptdev->gpu_info.tiler_present))
3083		return -EINVAL;
3084
3085	if (hweight64(group_args->compute_core_mask) < group_args->max_compute_cores ||
3086	    hweight64(group_args->fragment_core_mask) < group_args->max_fragment_cores ||
3087	    hweight64(group_args->tiler_core_mask) < group_args->max_tiler_cores)
3088		return -EINVAL;
3089
3090	group = kzalloc(sizeof(*group), GFP_KERNEL);
3091	if (!group)
3092		return -ENOMEM;
3093
3094	spin_lock_init(&group->fatal_lock);
3095	kref_init(&group->refcount);
3096	group->state = PANTHOR_CS_GROUP_CREATED;
3097	group->csg_id = -1;
3098
3099	group->ptdev = ptdev;
3100	group->max_compute_cores = group_args->max_compute_cores;
3101	group->compute_core_mask = group_args->compute_core_mask;
3102	group->max_fragment_cores = group_args->max_fragment_cores;
3103	group->fragment_core_mask = group_args->fragment_core_mask;
3104	group->max_tiler_cores = group_args->max_tiler_cores;
3105	group->tiler_core_mask = group_args->tiler_core_mask;
3106	group->priority = group_args->priority;
3107
3108	INIT_LIST_HEAD(&group->wait_node);
3109	INIT_LIST_HEAD(&group->run_node);
3110	INIT_WORK(&group->term_work, group_term_work);
3111	INIT_WORK(&group->sync_upd_work, group_sync_upd_work);
3112	INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work);
3113	INIT_WORK(&group->release_work, group_release_work);
3114
3115	group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id);
3116	if (!group->vm) {
3117		ret = -EINVAL;
3118		goto err_put_group;
3119	}
3120
3121	suspend_size = csg_iface->control->suspend_size;
3122	group->suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
3123	if (IS_ERR(group->suspend_buf)) {
3124		ret = PTR_ERR(group->suspend_buf);
3125		group->suspend_buf = NULL;
3126		goto err_put_group;
3127	}
3128
3129	suspend_size = csg_iface->control->protm_suspend_size;
3130	group->protm_suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
3131	if (IS_ERR(group->protm_suspend_buf)) {
3132		ret = PTR_ERR(group->protm_suspend_buf);
3133		group->protm_suspend_buf = NULL;
3134		goto err_put_group;
3135	}
3136
3137	group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
3138						   group_args->queues.count *
3139						   sizeof(struct panthor_syncobj_64b),
3140						   DRM_PANTHOR_BO_NO_MMAP,
3141						   DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
3142						   DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
3143						   PANTHOR_VM_KERNEL_AUTO_VA);
3144	if (IS_ERR(group->syncobjs)) {
3145		ret = PTR_ERR(group->syncobjs);
3146		goto err_put_group;
3147	}
3148
3149	ret = panthor_kernel_bo_vmap(group->syncobjs);
3150	if (ret)
3151		goto err_put_group;
3152
3153	memset(group->syncobjs->kmap, 0,
3154	       group_args->queues.count * sizeof(struct panthor_syncobj_64b));
3155
3156	for (i = 0; i < group_args->queues.count; i++) {
3157		group->queues[i] = group_create_queue(group, &queue_args[i]);
3158		if (IS_ERR(group->queues[i])) {
3159			ret = PTR_ERR(group->queues[i]);
3160			group->queues[i] = NULL;
3161			goto err_put_group;
3162		}
3163
3164		group->queue_count++;
3165	}
3166
3167	group->idle_queues = GENMASK(group->queue_count - 1, 0);
3168
3169	ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL);
3170	if (ret)
3171		goto err_put_group;
3172
3173	mutex_lock(&sched->reset.lock);
3174	if (atomic_read(&sched->reset.in_progress)) {
3175		panthor_group_stop(group);
3176	} else {
3177		mutex_lock(&sched->lock);
3178		list_add_tail(&group->run_node,
3179			      &sched->groups.idle[group->priority]);
3180		mutex_unlock(&sched->lock);
3181	}
3182	mutex_unlock(&sched->reset.lock);
3183
3184	return gid;
3185
3186err_put_group:
3187	group_put(group);
3188	return ret;
3189}
3190
3191int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
3192{
3193	struct panthor_group_pool *gpool = pfile->groups;
3194	struct panthor_device *ptdev = pfile->ptdev;
3195	struct panthor_scheduler *sched = ptdev->scheduler;
3196	struct panthor_group *group;
3197
3198	group = xa_erase(&gpool->xa, group_handle);
3199	if (!group)
3200		return -EINVAL;
3201
3202	for (u32 i = 0; i < group->queue_count; i++) {
3203		if (group->queues[i])
3204			drm_sched_entity_destroy(&group->queues[i]->entity);
3205	}
3206
3207	mutex_lock(&sched->reset.lock);
3208	mutex_lock(&sched->lock);
3209	group->destroyed = true;
3210	if (group->csg_id >= 0) {
3211		sched_queue_delayed_work(sched, tick, 0);
3212	} else if (!atomic_read(&sched->reset.in_progress)) {
3213		/* Remove from the run queues, so the scheduler can't
3214		 * pick the group on the next tick.
3215		 */
3216		list_del_init(&group->run_node);
3217		list_del_init(&group->wait_node);
3218		group_queue_work(group, term);
3219	}
3220	mutex_unlock(&sched->lock);
3221	mutex_unlock(&sched->reset.lock);
3222
3223	group_put(group);
3224	return 0;
3225}
3226
3227int panthor_group_get_state(struct panthor_file *pfile,
3228			    struct drm_panthor_group_get_state *get_state)
3229{
3230	struct panthor_group_pool *gpool = pfile->groups;
3231	struct panthor_device *ptdev = pfile->ptdev;
3232	struct panthor_scheduler *sched = ptdev->scheduler;
3233	struct panthor_group *group;
3234
3235	if (get_state->pad)
3236		return -EINVAL;
3237
3238	group = group_get(xa_load(&gpool->xa, get_state->group_handle));
3239	if (!group)
3240		return -EINVAL;
3241
3242	memset(get_state, 0, sizeof(*get_state));
3243
3244	mutex_lock(&sched->lock);
3245	if (group->timedout)
3246		get_state->state |= DRM_PANTHOR_GROUP_STATE_TIMEDOUT;
3247	if (group->fatal_queues) {
3248		get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT;
3249		get_state->fatal_queues = group->fatal_queues;
3250	}
3251	mutex_unlock(&sched->lock);
3252
3253	group_put(group);
3254	return 0;
3255}
3256
3257int panthor_group_pool_create(struct panthor_file *pfile)
3258{
3259	struct panthor_group_pool *gpool;
3260
3261	gpool = kzalloc(sizeof(*gpool), GFP_KERNEL);
3262	if (!gpool)
3263		return -ENOMEM;
3264
3265	xa_init_flags(&gpool->xa, XA_FLAGS_ALLOC1);
3266	pfile->groups = gpool;
3267	return 0;
3268}
3269
3270void panthor_group_pool_destroy(struct panthor_file *pfile)
3271{
3272	struct panthor_group_pool *gpool = pfile->groups;
3273	struct panthor_group *group;
3274	unsigned long i;
3275
3276	if (IS_ERR_OR_NULL(gpool))
3277		return;
3278
3279	xa_for_each(&gpool->xa, i, group)
3280		panthor_group_destroy(pfile, i);
3281
3282	xa_destroy(&gpool->xa);
3283	kfree(gpool);
3284	pfile->groups = NULL;
3285}
3286
3287static void job_release(struct kref *ref)
3288{
3289	struct panthor_job *job = container_of(ref, struct panthor_job, refcount);
3290
3291	drm_WARN_ON(&job->group->ptdev->base, !list_empty(&job->node));
3292
3293	if (job->base.s_fence)
3294		drm_sched_job_cleanup(&job->base);
3295
3296	if (job->done_fence && job->done_fence->ops)
3297		dma_fence_put(job->done_fence);
3298	else
3299		dma_fence_free(job->done_fence);
3300
3301	group_put(job->group);
3302
3303	kfree(job);
3304}
3305
3306struct drm_sched_job *panthor_job_get(struct drm_sched_job *sched_job)
3307{
3308	if (sched_job) {
3309		struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3310
3311		kref_get(&job->refcount);
3312	}
3313
3314	return sched_job;
3315}
3316
3317void panthor_job_put(struct drm_sched_job *sched_job)
3318{
3319	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3320
3321	if (sched_job)
3322		kref_put(&job->refcount, job_release);
3323}
3324
3325struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job)
3326{
3327	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3328
3329	return job->group->vm;
3330}
3331
3332struct drm_sched_job *
3333panthor_job_create(struct panthor_file *pfile,
3334		   u16 group_handle,
3335		   const struct drm_panthor_queue_submit *qsubmit)
3336{
3337	struct panthor_group_pool *gpool = pfile->groups;
3338	struct panthor_job *job;
3339	int ret;
3340
3341	if (qsubmit->pad)
3342		return ERR_PTR(-EINVAL);
3343
3344	/* If stream_addr is zero, so stream_size should be. */
3345	if ((qsubmit->stream_size == 0) != (qsubmit->stream_addr == 0))
3346		return ERR_PTR(-EINVAL);
3347
3348	/* Make sure the address is aligned on 64-byte (cacheline) and the size is
3349	 * aligned on 8-byte (instruction size).
3350	 */
3351	if ((qsubmit->stream_addr & 63) || (qsubmit->stream_size & 7))
3352		return ERR_PTR(-EINVAL);
3353
3354	/* bits 24:30 must be zero. */
3355	if (qsubmit->latest_flush & GENMASK(30, 24))
3356		return ERR_PTR(-EINVAL);
3357
3358	job = kzalloc(sizeof(*job), GFP_KERNEL);
3359	if (!job)
3360		return ERR_PTR(-ENOMEM);
3361
3362	kref_init(&job->refcount);
3363	job->queue_idx = qsubmit->queue_index;
3364	job->call_info.size = qsubmit->stream_size;
3365	job->call_info.start = qsubmit->stream_addr;
3366	job->call_info.latest_flush = qsubmit->latest_flush;
3367	INIT_LIST_HEAD(&job->node);
3368
3369	job->group = group_get(xa_load(&gpool->xa, group_handle));
3370	if (!job->group) {
3371		ret = -EINVAL;
3372		goto err_put_job;
3373	}
3374
3375	if (job->queue_idx >= job->group->queue_count ||
3376	    !job->group->queues[job->queue_idx]) {
3377		ret = -EINVAL;
3378		goto err_put_job;
3379	}
3380
3381	job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
3382	if (!job->done_fence) {
3383		ret = -ENOMEM;
3384		goto err_put_job;
3385	}
3386
3387	ret = drm_sched_job_init(&job->base,
3388				 &job->group->queues[job->queue_idx]->entity,
3389				 1, job->group);
3390	if (ret)
3391		goto err_put_job;
3392
3393	return &job->base;
3394
3395err_put_job:
3396	panthor_job_put(&job->base);
3397	return ERR_PTR(ret);
3398}
3399
3400void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched_job)
3401{
3402	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3403
3404	/* Still not sure why we want USAGE_WRITE for external objects, since I
3405	 * was assuming this would be handled through explicit syncs being imported
3406	 * to external BOs with DMA_BUF_IOCTL_IMPORT_SYNC_FILE, but other drivers
3407	 * seem to pass DMA_RESV_USAGE_WRITE, so there must be a good reason.
3408	 */
3409	panthor_vm_update_resvs(job->group->vm, exec, &sched_job->s_fence->finished,
3410				DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE);
3411}
3412
3413void panthor_sched_unplug(struct panthor_device *ptdev)
3414{
3415	struct panthor_scheduler *sched = ptdev->scheduler;
3416
3417	cancel_delayed_work_sync(&sched->tick_work);
3418
3419	mutex_lock(&sched->lock);
3420	if (sched->pm.has_ref) {
3421		pm_runtime_put(ptdev->base.dev);
3422		sched->pm.has_ref = false;
3423	}
3424	mutex_unlock(&sched->lock);
3425}
3426
3427static void panthor_sched_fini(struct drm_device *ddev, void *res)
3428{
3429	struct panthor_scheduler *sched = res;
3430	int prio;
3431
3432	if (!sched || !sched->csg_slot_count)
3433		return;
3434
3435	cancel_delayed_work_sync(&sched->tick_work);
3436
3437	if (sched->wq)
3438		destroy_workqueue(sched->wq);
3439
3440	if (sched->heap_alloc_wq)
3441		destroy_workqueue(sched->heap_alloc_wq);
3442
3443	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
3444		drm_WARN_ON(ddev, !list_empty(&sched->groups.runnable[prio]));
3445		drm_WARN_ON(ddev, !list_empty(&sched->groups.idle[prio]));
3446	}
3447
3448	drm_WARN_ON(ddev, !list_empty(&sched->groups.waiting));
3449}
3450
3451int panthor_sched_init(struct panthor_device *ptdev)
3452{
3453	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
3454	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
3455	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, 0, 0);
3456	struct panthor_scheduler *sched;
3457	u32 gpu_as_count, num_groups;
3458	int prio, ret;
3459
3460	sched = drmm_kzalloc(&ptdev->base, sizeof(*sched), GFP_KERNEL);
3461	if (!sched)
3462		return -ENOMEM;
3463
3464	/* The highest bit in JOB_INT_* is reserved for globabl IRQs. That
3465	 * leaves 31 bits for CSG IRQs, hence the MAX_CSGS clamp here.
3466	 */
3467	num_groups = min_t(u32, MAX_CSGS, glb_iface->control->group_num);
3468
3469	/* The FW-side scheduler might deadlock if two groups with the same
3470	 * priority try to access a set of resources that overlaps, with part
3471	 * of the resources being allocated to one group and the other part to
3472	 * the other group, both groups waiting for the remaining resources to
3473	 * be allocated. To avoid that, it is recommended to assign each CSG a
3474	 * different priority. In theory we could allow several groups to have
3475	 * the same CSG priority if they don't request the same resources, but
3476	 * that makes the scheduling logic more complicated, so let's clamp
3477	 * the number of CSG slots to MAX_CSG_PRIO + 1 for now.
3478	 */
3479	num_groups = min_t(u32, MAX_CSG_PRIO + 1, num_groups);
3480
3481	/* We need at least one AS for the MCU and one for the GPU contexts. */
3482	gpu_as_count = hweight32(ptdev->gpu_info.as_present & GENMASK(31, 1));
3483	if (!gpu_as_count) {
3484		drm_err(&ptdev->base, "Not enough AS (%d, expected at least 2)",
3485			gpu_as_count + 1);
3486		return -EINVAL;
3487	}
3488
3489	sched->ptdev = ptdev;
3490	sched->sb_slot_count = CS_FEATURES_SCOREBOARDS(cs_iface->control->features);
3491	sched->csg_slot_count = num_groups;
3492	sched->cs_slot_count = csg_iface->control->stream_num;
3493	sched->as_slot_count = gpu_as_count;
3494	ptdev->csif_info.csg_slot_count = sched->csg_slot_count;
3495	ptdev->csif_info.cs_slot_count = sched->cs_slot_count;
3496	ptdev->csif_info.scoreboard_slot_count = sched->sb_slot_count;
3497
3498	sched->last_tick = 0;
3499	sched->resched_target = U64_MAX;
3500	sched->tick_period = msecs_to_jiffies(10);
3501	INIT_DELAYED_WORK(&sched->tick_work, tick_work);
3502	INIT_WORK(&sched->sync_upd_work, sync_upd_work);
3503	INIT_WORK(&sched->fw_events_work, process_fw_events_work);
3504
3505	ret = drmm_mutex_init(&ptdev->base, &sched->lock);
3506	if (ret)
3507		return ret;
3508
3509	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
3510		INIT_LIST_HEAD(&sched->groups.runnable[prio]);
3511		INIT_LIST_HEAD(&sched->groups.idle[prio]);
3512	}
3513	INIT_LIST_HEAD(&sched->groups.waiting);
3514
3515	ret = drmm_mutex_init(&ptdev->base, &sched->reset.lock);
3516	if (ret)
3517		return ret;
3518
3519	INIT_LIST_HEAD(&sched->reset.stopped_groups);
3520
3521	/* sched->heap_alloc_wq will be used for heap chunk allocation on
3522	 * tiler OOM events, which means we can't use the same workqueue for
3523	 * the scheduler because works queued by the scheduler are in
3524	 * the dma-signalling path. Allocate a dedicated heap_alloc_wq to
3525	 * work around this limitation.
3526	 *
3527	 * FIXME: Ultimately, what we need is a failable/non-blocking GEM
3528	 * allocation path that we can call when a heap OOM is reported. The
3529	 * FW is smart enough to fall back on other methods if the kernel can't
3530	 * allocate memory, and fail the tiling job if none of these
3531	 * countermeasures worked.
3532	 *
3533	 * Set WQ_MEM_RECLAIM on sched->wq to unblock the situation when the
3534	 * system is running out of memory.
3535	 */
3536	sched->heap_alloc_wq = alloc_workqueue("panthor-heap-alloc", WQ_UNBOUND, 0);
3537	sched->wq = alloc_workqueue("panthor-csf-sched", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
3538	if (!sched->wq || !sched->heap_alloc_wq) {
3539		panthor_sched_fini(&ptdev->base, sched);
3540		drm_err(&ptdev->base, "Failed to allocate the workqueues");
3541		return -ENOMEM;
3542	}
3543
3544	ret = drmm_add_action_or_reset(&ptdev->base, panthor_sched_fini, sched);
3545	if (ret)
3546		return ret;
3547
3548	ptdev->scheduler = sched;
3549	return 0;
3550}
3551