1/*	$NetBSD: intel_lrc.c,v 1.8 2021/12/19 12:32:15 riastradh Exp $	*/
2
3/*
4 * Copyright �� 2014 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 *
25 * Authors:
26 *    Ben Widawsky <ben@bwidawsk.net>
27 *    Michel Thierry <michel.thierry@intel.com>
28 *    Thomas Daniel <thomas.daniel@intel.com>
29 *    Oscar Mateo <oscar.mateo@intel.com>
30 *
31 */
32
33/**
34 * DOC: Logical Rings, Logical Ring Contexts and Execlists
35 *
36 * Motivation:
37 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
38 * These expanded contexts enable a number of new abilities, especially
39 * "Execlists" (also implemented in this file).
40 *
41 * One of the main differences with the legacy HW contexts is that logical
42 * ring contexts incorporate many more things to the context's state, like
43 * PDPs or ringbuffer control registers:
44 *
45 * The reason why PDPs are included in the context is straightforward: as
46 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
47 * contained there mean you don't need to do a ppgtt->switch_mm yourself,
48 * instead, the GPU will do it for you on the context switch.
49 *
50 * But, what about the ringbuffer control registers (head, tail, etc..)?
51 * shouldn't we just need a set of those per engine command streamer? This is
52 * where the name "Logical Rings" starts to make sense: by virtualizing the
53 * rings, the engine cs shifts to a new "ring buffer" with every context
54 * switch. When you want to submit a workload to the GPU you: A) choose your
55 * context, B) find its appropriate virtualized ring, C) write commands to it
56 * and then, finally, D) tell the GPU to switch to that context.
57 *
58 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
59 * to a contexts is via a context execution list, ergo "Execlists".
60 *
61 * LRC implementation:
62 * Regarding the creation of contexts, we have:
63 *
64 * - One global default context.
65 * - One local default context for each opened fd.
66 * - One local extra context for each context create ioctl call.
67 *
68 * Now that ringbuffers belong per-context (and not per-engine, like before)
69 * and that contexts are uniquely tied to a given engine (and not reusable,
70 * like before) we need:
71 *
72 * - One ringbuffer per-engine inside each context.
73 * - One backing object per-engine inside each context.
74 *
75 * The global default context starts its life with these new objects fully
76 * allocated and populated. The local default context for each opened fd is
77 * more complex, because we don't know at creation time which engine is going
78 * to use them. To handle this, we have implemented a deferred creation of LR
79 * contexts:
80 *
81 * The local context starts its life as a hollow or blank holder, that only
82 * gets populated for a given engine once we receive an execbuffer. If later
83 * on we receive another execbuffer ioctl for the same context but a different
84 * engine, we allocate/populate a new ringbuffer and context backing object and
85 * so on.
86 *
87 * Finally, regarding local contexts created using the ioctl call: as they are
88 * only allowed with the render ring, we can allocate & populate them right
89 * away (no need to defer anything, at least for now).
90 *
91 * Execlists implementation:
92 * Execlists are the new method by which, on gen8+ hardware, workloads are
93 * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
94 * This method works as follows:
95 *
96 * When a request is committed, its commands (the BB start and any leading or
97 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
98 * for the appropriate context. The tail pointer in the hardware context is not
99 * updated at this time, but instead, kept by the driver in the ringbuffer
100 * structure. A structure representing this request is added to a request queue
101 * for the appropriate engine: this structure contains a copy of the context's
102 * tail after the request was written to the ring buffer and a pointer to the
103 * context itself.
104 *
105 * If the engine's request queue was empty before the request was added, the
106 * queue is processed immediately. Otherwise the queue will be processed during
107 * a context switch interrupt. In any case, elements on the queue will get sent
108 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
109 * globally unique 20-bits submission ID.
110 *
111 * When execution of a request completes, the GPU updates the context status
112 * buffer with a context complete event and generates a context switch interrupt.
113 * During the interrupt handling, the driver examines the events in the buffer:
114 * for each context complete event, if the announced ID matches that on the head
115 * of the request queue, then that request is retired and removed from the queue.
116 *
117 * After processing, if any requests were retired and the queue is not empty
118 * then a new execution list can be submitted. The two requests at the front of
119 * the queue are next to be submitted but since a context may not occur twice in
120 * an execution list, if subsequent requests have the same ID as the first then
121 * the two requests must be combined. This is done simply by discarding requests
122 * at the head of the queue until either only one requests is left (in which case
123 * we use a NULL second context) or the first two requests have unique IDs.
124 *
125 * By always executing the first two requests in the queue the driver ensures
126 * that the GPU is kept as busy as possible. In the case where a single context
127 * completes but a second context is still executing, the request for this second
128 * context will be at the head of the queue when we remove the first one. This
129 * request will then be resubmitted along with a new request for a different context,
130 * which will cause the hardware to continue executing the second request and queue
131 * the new request (the GPU detects the condition of a context getting preempted
132 * with the same context and optimizes the context switch flow by not doing
133 * preemption, but just sampling the new tail pointer).
134 *
135 */
136#include <sys/cdefs.h>
137__KERNEL_RCSID(0, "$NetBSD: intel_lrc.c,v 1.8 2021/12/19 12:32:15 riastradh Exp $");
138
139#include <linux/interrupt.h>
140
141#include "i915_drv.h"
142#include "i915_perf.h"
143#include "i915_trace.h"
144#include "i915_vgpu.h"
145#include "intel_context.h"
146#include "intel_engine_pm.h"
147#include "intel_gt.h"
148#include "intel_gt_pm.h"
149#include "intel_gt_requests.h"
150#include "intel_lrc_reg.h"
151#include "intel_mocs.h"
152#include "intel_reset.h"
153#include "intel_ring.h"
154#include "intel_workarounds.h"
155
156#include <linux/nbsd-namespace.h>
157
158#define RING_EXECLIST_QFULL		(1 << 0x2)
159#define RING_EXECLIST1_VALID		(1 << 0x3)
160#define RING_EXECLIST0_VALID		(1 << 0x4)
161#define RING_EXECLIST_ACTIVE_STATUS	(3 << 0xE)
162#define RING_EXECLIST1_ACTIVE		(1 << 0x11)
163#define RING_EXECLIST0_ACTIVE		(1 << 0x12)
164
165#define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
166#define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
167#define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
168#define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
169#define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
170#define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
171
172#define GEN8_CTX_STATUS_COMPLETED_MASK \
173	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
174
175#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
176
177#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE	(0x1) /* lower csb dword */
178#define GEN12_CTX_SWITCH_DETAIL(csb_dw)	((csb_dw) & 0xF) /* upper csb dword */
179#define GEN12_CSB_SW_CTX_ID_MASK		GENMASK(25, 15)
180#define GEN12_IDLE_CTX_ID		0x7FF
181#define GEN12_CSB_CTX_VALID(csb_dw) \
182	(FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
183
184/* Typical size of the average request (2 pipecontrols and a MI_BB) */
185#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
186#define WA_TAIL_DWORDS 2
187#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
188
189struct virtual_engine {
190	struct intel_engine_cs base;
191	struct intel_context context;
192
193	/*
194	 * We allow only a single request through the virtual engine at a time
195	 * (each request in the timeline waits for the completion fence of
196	 * the previous before being submitted). By restricting ourselves to
197	 * only submitting a single request, each request is placed on to a
198	 * physical to maximise load spreading (by virtue of the late greedy
199	 * scheduling -- each real engine takes the next available request
200	 * upon idling).
201	 */
202	struct i915_request *request;
203
204	/*
205	 * We keep a rbtree of available virtual engines inside each physical
206	 * engine, sorted by priority. Here we preallocate the nodes we need
207	 * for the virtual engine, indexed by physical_engine->id.
208	 */
209	struct ve_node {
210		struct rb_node rb;
211		int prio;
212		uint64_t order;
213		bool inserted;
214	} nodes[I915_NUM_ENGINES];
215	uint64_t order;
216
217	/*
218	 * Keep track of bonded pairs -- restrictions upon on our selection
219	 * of physical engines any particular request may be submitted to.
220	 * If we receive a submit-fence from a master engine, we will only
221	 * use one of sibling_mask physical engines.
222	 */
223	struct ve_bond {
224		const struct intel_engine_cs *master;
225		intel_engine_mask_t sibling_mask;
226	} *bonds;
227	unsigned int num_bonds;
228
229	/* And finally, which physical engines this virtual engine maps onto. */
230	unsigned int num_siblings;
231	struct intel_engine_cs *siblings[0];
232};
233
234#ifdef __NetBSD__
235static int
236compare_ve_nodes(void *cookie, const void *va, const void *vb)
237{
238	const struct ve_node *na = va;
239	const struct ve_node *nb = vb;
240
241	if (na->prio < nb->prio)
242		return -1;
243	if (na->prio > nb->prio)
244		return +1;
245	if (na->order < nb->order)
246		return -1;
247	if (na->order > nb->order)
248		return +1;
249	return 0;
250}
251
252static int
253compare_ve_node_key(void *cookie, const void *vn, const void *vk)
254{
255	const struct ve_node *n = vn;
256	const int *k = vk;
257
258	if (n->prio < *k)
259		return -1;
260	if (n->prio > *k)
261		return +1;
262	return 0;
263}
264
265static const rb_tree_ops_t ve_tree_ops = {
266	.rbto_compare_nodes = compare_ve_nodes,
267	.rbto_compare_key = compare_ve_node_key,
268	.rbto_node_offset = offsetof(struct ve_node, rb),
269};
270#endif
271
272static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
273{
274	GEM_BUG_ON(!intel_engine_is_virtual(engine));
275	return container_of(engine, struct virtual_engine, base);
276}
277
278static int __execlists_context_alloc(struct intel_context *ce,
279				     struct intel_engine_cs *engine);
280
281static void execlists_init_reg_state(u32 *reg_state,
282				     const struct intel_context *ce,
283				     const struct intel_engine_cs *engine,
284				     const struct intel_ring *ring,
285				     bool close);
286static void
287__execlists_update_reg_state(const struct intel_context *ce,
288			     const struct intel_engine_cs *engine,
289			     u32 head);
290
291static void mark_eio(struct i915_request *rq)
292{
293	if (i915_request_completed(rq))
294		return;
295
296	GEM_BUG_ON(i915_request_signaled(rq));
297
298	dma_fence_set_error(&rq->fence, -EIO);
299	i915_request_mark_complete(rq);
300}
301
302static struct i915_request *
303active_request(const struct intel_timeline * const tl, struct i915_request *rq)
304{
305	struct i915_request *active = rq;
306
307	rcu_read_lock();
308	list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
309		if (i915_request_completed(rq))
310			break;
311
312		active = rq;
313	}
314	rcu_read_unlock();
315
316	return active;
317}
318
319static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
320{
321	return (i915_ggtt_offset(engine->status_page.vma) +
322		I915_GEM_HWS_PREEMPT_ADDR);
323}
324
325static inline void
326ring_set_paused(const struct intel_engine_cs *engine, int state)
327{
328	/*
329	 * We inspect HWS_PREEMPT with a semaphore inside
330	 * engine->emit_fini_breadcrumb. If the dword is true,
331	 * the ring is paused as the semaphore will busywait
332	 * until the dword is false.
333	 */
334	engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
335	if (state)
336		wmb();
337}
338
339static inline struct i915_priolist *to_priolist(struct rb_node *rb)
340{
341	return rb_entry(rb, struct i915_priolist, node);
342}
343
344static inline int rq_prio(const struct i915_request *rq)
345{
346	return rq->sched.attr.priority;
347}
348
349static int effective_prio(const struct i915_request *rq)
350{
351	int prio = rq_prio(rq);
352
353	/*
354	 * If this request is special and must not be interrupted at any
355	 * cost, so be it. Note we are only checking the most recent request
356	 * in the context and so may be masking an earlier vip request. It
357	 * is hoped that under the conditions where nopreempt is used, this
358	 * will not matter (i.e. all requests to that context will be
359	 * nopreempt for as long as desired).
360	 */
361	if (i915_request_has_nopreempt(rq))
362		prio = I915_PRIORITY_UNPREEMPTABLE;
363
364	/*
365	 * On unwinding the active request, we give it a priority bump
366	 * if it has completed waiting on any semaphore. If we know that
367	 * the request has already started, we can prevent an unwanted
368	 * preempt-to-idle cycle by taking that into account now.
369	 */
370	if (__i915_request_has_started(rq))
371		prio |= I915_PRIORITY_NOSEMAPHORE;
372
373	/* Restrict mere WAIT boosts from triggering preemption */
374	BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
375	return prio | __NO_PREEMPTION;
376}
377
378static int queue_prio(const struct intel_engine_execlists *execlists)
379{
380	struct i915_priolist *p;
381	struct rb_node *rb;
382
383	rb = rb_first_cached(&execlists->queue);
384	if (!rb)
385		return INT_MIN;
386
387	/*
388	 * As the priolist[] are inverted, with the highest priority in [0],
389	 * we have to flip the index value to become priority.
390	 */
391	p = to_priolist(rb);
392	return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
393}
394
395static inline bool need_preempt(const struct intel_engine_cs *engine,
396				const struct i915_request *rq,
397				struct rb_node *rb)
398{
399	int last_prio;
400
401	if (!intel_engine_has_semaphores(engine))
402		return false;
403
404	/*
405	 * Check if the current priority hint merits a preemption attempt.
406	 *
407	 * We record the highest value priority we saw during rescheduling
408	 * prior to this dequeue, therefore we know that if it is strictly
409	 * less than the current tail of ESLP[0], we do not need to force
410	 * a preempt-to-idle cycle.
411	 *
412	 * However, the priority hint is a mere hint that we may need to
413	 * preempt. If that hint is stale or we may be trying to preempt
414	 * ourselves, ignore the request.
415	 *
416	 * More naturally we would write
417	 *      prio >= max(0, last);
418	 * except that we wish to prevent triggering preemption at the same
419	 * priority level: the task that is running should remain running
420	 * to preserve FIFO ordering of dependencies.
421	 */
422	last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
423	if (engine->execlists.queue_priority_hint <= last_prio)
424		return false;
425
426	/*
427	 * Check against the first request in ELSP[1], it will, thanks to the
428	 * power of PI, be the highest priority of that context.
429	 */
430	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
431	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
432		return true;
433
434	if (rb) {
435		struct virtual_engine *ve =
436			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
437		bool preempt = false;
438
439		if (engine == ve->siblings[0]) { /* only preempt one sibling */
440			struct i915_request *next;
441
442			rcu_read_lock();
443			next = READ_ONCE(ve->request);
444			if (next)
445				preempt = rq_prio(next) > last_prio;
446			rcu_read_unlock();
447		}
448
449		if (preempt)
450			return preempt;
451	}
452
453	/*
454	 * If the inflight context did not trigger the preemption, then maybe
455	 * it was the set of queued requests? Pick the highest priority in
456	 * the queue (the first active priolist) and see if it deserves to be
457	 * running instead of ELSP[0].
458	 *
459	 * The highest priority request in the queue can not be either
460	 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
461	 * context, it's priority would not exceed ELSP[0] aka last_prio.
462	 */
463	return queue_prio(&engine->execlists) > last_prio;
464}
465
466__maybe_unused static inline bool
467assert_priority_queue(const struct i915_request *prev,
468		      const struct i915_request *next)
469{
470	/*
471	 * Without preemption, the prev may refer to the still active element
472	 * which we refuse to let go.
473	 *
474	 * Even with preemption, there are times when we think it is better not
475	 * to preempt and leave an ostensibly lower priority request in flight.
476	 */
477	if (i915_request_is_active(prev))
478		return true;
479
480	return rq_prio(prev) >= rq_prio(next);
481}
482
483/*
484 * The context descriptor encodes various attributes of a context,
485 * including its GTT address and some flags. Because it's fairly
486 * expensive to calculate, we'll just do it once and cache the result,
487 * which remains valid until the context is unpinned.
488 *
489 * This is what a descriptor looks like, from LSB to MSB::
490 *
491 *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
492 *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
493 *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
494 *      bits 53-54:    mbz, reserved for use by hardware
495 *      bits 55-63:    group ID, currently unused and set to 0
496 *
497 * Starting from Gen11, the upper dword of the descriptor has a new format:
498 *
499 *      bits 32-36:    reserved
500 *      bits 37-47:    SW context ID
501 *      bits 48:53:    engine instance
502 *      bit 54:        mbz, reserved for use by hardware
503 *      bits 55-60:    SW counter
504 *      bits 61-63:    engine class
505 *
506 * engine info, SW context ID and SW counter need to form a unique number
507 * (Context ID) per lrc.
508 */
509static u64
510lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
511{
512	u64 desc;
513
514	desc = INTEL_LEGACY_32B_CONTEXT;
515	if (i915_vm_is_4lvl(ce->vm))
516		desc = INTEL_LEGACY_64B_CONTEXT;
517	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
518
519	desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
520	if (IS_GEN(engine->i915, 8))
521		desc |= GEN8_CTX_L3LLC_COHERENT;
522
523	desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
524	/*
525	 * The following 32bits are copied into the OA reports (dword 2).
526	 * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
527	 * anything below.
528	 */
529	if (INTEL_GEN(engine->i915) >= 11) {
530		desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
531								/* bits 48-53 */
532
533		desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
534								/* bits 61-63 */
535	}
536
537	return desc;
538}
539
540static inline unsigned int dword_in_page(void *addr)
541{
542	return offset_in_page(addr) / sizeof(u32);
543}
544
545static void set_offsets(u32 *regs,
546			const u8 *data,
547			const struct intel_engine_cs *engine,
548			bool clear)
549#define NOP(x) (BIT(7) | (x))
550#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
551#define POSTED BIT(0)
552#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
553#define REG16(x) \
554	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
555	(((x) >> 2) & 0x7f)
556#define END(x) 0, (x)
557{
558	const u32 base = engine->mmio_base;
559
560	while (*data) {
561		u8 count, flags;
562
563		if (*data & BIT(7)) { /* skip */
564			count = *data++ & ~BIT(7);
565			if (clear)
566				memset32(regs, MI_NOOP, count);
567			regs += count;
568			continue;
569		}
570
571		count = *data & 0x3f;
572		flags = *data >> 6;
573		data++;
574
575		*regs = MI_LOAD_REGISTER_IMM(count);
576		if (flags & POSTED)
577			*regs |= MI_LRI_FORCE_POSTED;
578		if (INTEL_GEN(engine->i915) >= 11)
579			*regs |= MI_LRI_CS_MMIO;
580		regs++;
581
582		GEM_BUG_ON(!count);
583		do {
584			u32 offset = 0;
585			u8 v;
586
587			do {
588				v = *data++;
589				offset <<= 7;
590				offset |= v & ~BIT(7);
591			} while (v & BIT(7));
592
593			regs[0] = base + (offset << 2);
594			if (clear)
595				regs[1] = 0;
596			regs += 2;
597		} while (--count);
598	}
599
600	if (clear) {
601		u8 count = *++data;
602
603		/* Clear past the tail for HW access */
604		GEM_BUG_ON(dword_in_page(regs) > count);
605		memset32(regs, MI_NOOP, count - dword_in_page(regs));
606
607		/* Close the batch; used mainly by live_lrc_layout() */
608		*regs = MI_BATCH_BUFFER_END;
609		if (INTEL_GEN(engine->i915) >= 10)
610			*regs |= BIT(0);
611	}
612}
613
614static const u8 gen8_xcs_offsets[] = {
615	NOP(1),
616	LRI(11, 0),
617	REG16(0x244),
618	REG(0x034),
619	REG(0x030),
620	REG(0x038),
621	REG(0x03c),
622	REG(0x168),
623	REG(0x140),
624	REG(0x110),
625	REG(0x11c),
626	REG(0x114),
627	REG(0x118),
628
629	NOP(9),
630	LRI(9, 0),
631	REG16(0x3a8),
632	REG16(0x28c),
633	REG16(0x288),
634	REG16(0x284),
635	REG16(0x280),
636	REG16(0x27c),
637	REG16(0x278),
638	REG16(0x274),
639	REG16(0x270),
640
641	NOP(13),
642	LRI(2, 0),
643	REG16(0x200),
644	REG(0x028),
645
646	END(80)
647};
648
649static const u8 gen9_xcs_offsets[] = {
650	NOP(1),
651	LRI(14, POSTED),
652	REG16(0x244),
653	REG(0x034),
654	REG(0x030),
655	REG(0x038),
656	REG(0x03c),
657	REG(0x168),
658	REG(0x140),
659	REG(0x110),
660	REG(0x11c),
661	REG(0x114),
662	REG(0x118),
663	REG(0x1c0),
664	REG(0x1c4),
665	REG(0x1c8),
666
667	NOP(3),
668	LRI(9, POSTED),
669	REG16(0x3a8),
670	REG16(0x28c),
671	REG16(0x288),
672	REG16(0x284),
673	REG16(0x280),
674	REG16(0x27c),
675	REG16(0x278),
676	REG16(0x274),
677	REG16(0x270),
678
679	NOP(13),
680	LRI(1, POSTED),
681	REG16(0x200),
682
683	NOP(13),
684	LRI(44, POSTED),
685	REG(0x028),
686	REG(0x09c),
687	REG(0x0c0),
688	REG(0x178),
689	REG(0x17c),
690	REG16(0x358),
691	REG(0x170),
692	REG(0x150),
693	REG(0x154),
694	REG(0x158),
695	REG16(0x41c),
696	REG16(0x600),
697	REG16(0x604),
698	REG16(0x608),
699	REG16(0x60c),
700	REG16(0x610),
701	REG16(0x614),
702	REG16(0x618),
703	REG16(0x61c),
704	REG16(0x620),
705	REG16(0x624),
706	REG16(0x628),
707	REG16(0x62c),
708	REG16(0x630),
709	REG16(0x634),
710	REG16(0x638),
711	REG16(0x63c),
712	REG16(0x640),
713	REG16(0x644),
714	REG16(0x648),
715	REG16(0x64c),
716	REG16(0x650),
717	REG16(0x654),
718	REG16(0x658),
719	REG16(0x65c),
720	REG16(0x660),
721	REG16(0x664),
722	REG16(0x668),
723	REG16(0x66c),
724	REG16(0x670),
725	REG16(0x674),
726	REG16(0x678),
727	REG16(0x67c),
728	REG(0x068),
729
730	END(176)
731};
732
733static const u8 gen12_xcs_offsets[] = {
734	NOP(1),
735	LRI(13, POSTED),
736	REG16(0x244),
737	REG(0x034),
738	REG(0x030),
739	REG(0x038),
740	REG(0x03c),
741	REG(0x168),
742	REG(0x140),
743	REG(0x110),
744	REG(0x1c0),
745	REG(0x1c4),
746	REG(0x1c8),
747	REG(0x180),
748	REG16(0x2b4),
749
750	NOP(5),
751	LRI(9, POSTED),
752	REG16(0x3a8),
753	REG16(0x28c),
754	REG16(0x288),
755	REG16(0x284),
756	REG16(0x280),
757	REG16(0x27c),
758	REG16(0x278),
759	REG16(0x274),
760	REG16(0x270),
761
762	END(80)
763};
764
765static const u8 gen8_rcs_offsets[] = {
766	NOP(1),
767	LRI(14, POSTED),
768	REG16(0x244),
769	REG(0x034),
770	REG(0x030),
771	REG(0x038),
772	REG(0x03c),
773	REG(0x168),
774	REG(0x140),
775	REG(0x110),
776	REG(0x11c),
777	REG(0x114),
778	REG(0x118),
779	REG(0x1c0),
780	REG(0x1c4),
781	REG(0x1c8),
782
783	NOP(3),
784	LRI(9, POSTED),
785	REG16(0x3a8),
786	REG16(0x28c),
787	REG16(0x288),
788	REG16(0x284),
789	REG16(0x280),
790	REG16(0x27c),
791	REG16(0x278),
792	REG16(0x274),
793	REG16(0x270),
794
795	NOP(13),
796	LRI(1, 0),
797	REG(0x0c8),
798
799	END(80)
800};
801
802static const u8 gen9_rcs_offsets[] = {
803	NOP(1),
804	LRI(14, POSTED),
805	REG16(0x244),
806	REG(0x34),
807	REG(0x30),
808	REG(0x38),
809	REG(0x3c),
810	REG(0x168),
811	REG(0x140),
812	REG(0x110),
813	REG(0x11c),
814	REG(0x114),
815	REG(0x118),
816	REG(0x1c0),
817	REG(0x1c4),
818	REG(0x1c8),
819
820	NOP(3),
821	LRI(9, POSTED),
822	REG16(0x3a8),
823	REG16(0x28c),
824	REG16(0x288),
825	REG16(0x284),
826	REG16(0x280),
827	REG16(0x27c),
828	REG16(0x278),
829	REG16(0x274),
830	REG16(0x270),
831
832	NOP(13),
833	LRI(1, 0),
834	REG(0xc8),
835
836	NOP(13),
837	LRI(44, POSTED),
838	REG(0x28),
839	REG(0x9c),
840	REG(0xc0),
841	REG(0x178),
842	REG(0x17c),
843	REG16(0x358),
844	REG(0x170),
845	REG(0x150),
846	REG(0x154),
847	REG(0x158),
848	REG16(0x41c),
849	REG16(0x600),
850	REG16(0x604),
851	REG16(0x608),
852	REG16(0x60c),
853	REG16(0x610),
854	REG16(0x614),
855	REG16(0x618),
856	REG16(0x61c),
857	REG16(0x620),
858	REG16(0x624),
859	REG16(0x628),
860	REG16(0x62c),
861	REG16(0x630),
862	REG16(0x634),
863	REG16(0x638),
864	REG16(0x63c),
865	REG16(0x640),
866	REG16(0x644),
867	REG16(0x648),
868	REG16(0x64c),
869	REG16(0x650),
870	REG16(0x654),
871	REG16(0x658),
872	REG16(0x65c),
873	REG16(0x660),
874	REG16(0x664),
875	REG16(0x668),
876	REG16(0x66c),
877	REG16(0x670),
878	REG16(0x674),
879	REG16(0x678),
880	REG16(0x67c),
881	REG(0x68),
882
883	END(176)
884};
885
886static const u8 gen11_rcs_offsets[] = {
887	NOP(1),
888	LRI(15, POSTED),
889	REG16(0x244),
890	REG(0x034),
891	REG(0x030),
892	REG(0x038),
893	REG(0x03c),
894	REG(0x168),
895	REG(0x140),
896	REG(0x110),
897	REG(0x11c),
898	REG(0x114),
899	REG(0x118),
900	REG(0x1c0),
901	REG(0x1c4),
902	REG(0x1c8),
903	REG(0x180),
904
905	NOP(1),
906	LRI(9, POSTED),
907	REG16(0x3a8),
908	REG16(0x28c),
909	REG16(0x288),
910	REG16(0x284),
911	REG16(0x280),
912	REG16(0x27c),
913	REG16(0x278),
914	REG16(0x274),
915	REG16(0x270),
916
917	LRI(1, POSTED),
918	REG(0x1b0),
919
920	NOP(10),
921	LRI(1, 0),
922	REG(0x0c8),
923
924	END(80)
925};
926
927static const u8 gen12_rcs_offsets[] = {
928	NOP(1),
929	LRI(13, POSTED),
930	REG16(0x244),
931	REG(0x034),
932	REG(0x030),
933	REG(0x038),
934	REG(0x03c),
935	REG(0x168),
936	REG(0x140),
937	REG(0x110),
938	REG(0x1c0),
939	REG(0x1c4),
940	REG(0x1c8),
941	REG(0x180),
942	REG16(0x2b4),
943
944	NOP(5),
945	LRI(9, POSTED),
946	REG16(0x3a8),
947	REG16(0x28c),
948	REG16(0x288),
949	REG16(0x284),
950	REG16(0x280),
951	REG16(0x27c),
952	REG16(0x278),
953	REG16(0x274),
954	REG16(0x270),
955
956	LRI(3, POSTED),
957	REG(0x1b0),
958	REG16(0x5a8),
959	REG16(0x5ac),
960
961	NOP(6),
962	LRI(1, 0),
963	REG(0x0c8),
964
965	END(80)
966};
967
968#undef END
969#undef REG16
970#undef REG
971#undef LRI
972#undef NOP
973
974static const u8 *reg_offsets(const struct intel_engine_cs *engine)
975{
976	/*
977	 * The gen12+ lists only have the registers we program in the basic
978	 * default state. We rely on the context image using relative
979	 * addressing to automatic fixup the register state between the
980	 * physical engines for virtual engine.
981	 */
982	GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
983		   !intel_engine_has_relative_mmio(engine));
984
985	if (engine->class == RENDER_CLASS) {
986		if (INTEL_GEN(engine->i915) >= 12)
987			return gen12_rcs_offsets;
988		else if (INTEL_GEN(engine->i915) >= 11)
989			return gen11_rcs_offsets;
990		else if (INTEL_GEN(engine->i915) >= 9)
991			return gen9_rcs_offsets;
992		else
993			return gen8_rcs_offsets;
994	} else {
995		if (INTEL_GEN(engine->i915) >= 12)
996			return gen12_xcs_offsets;
997		else if (INTEL_GEN(engine->i915) >= 9)
998			return gen9_xcs_offsets;
999		else
1000			return gen8_xcs_offsets;
1001	}
1002}
1003
1004static struct i915_request *
1005__unwind_incomplete_requests(struct intel_engine_cs *engine)
1006{
1007	struct i915_request *rq, *rn, *active = NULL;
1008	struct list_head *uninitialized_var(pl);
1009	int prio = I915_PRIORITY_INVALID;
1010
1011	lockdep_assert_held(&engine->active.lock);
1012
1013	list_for_each_entry_safe_reverse(rq, rn,
1014					 &engine->active.requests,
1015					 sched.link) {
1016		if (i915_request_completed(rq))
1017			continue; /* XXX */
1018
1019		__i915_request_unsubmit(rq);
1020
1021		/*
1022		 * Push the request back into the queue for later resubmission.
1023		 * If this request is not native to this physical engine (i.e.
1024		 * it came from a virtual source), push it back onto the virtual
1025		 * engine so that it can be moved across onto another physical
1026		 * engine as load dictates.
1027		 */
1028		if (likely(rq->execution_mask == engine->mask)) {
1029			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1030			if (rq_prio(rq) != prio) {
1031				prio = rq_prio(rq);
1032				pl = i915_sched_lookup_priolist(engine, prio);
1033			}
1034			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
1035
1036			list_move(&rq->sched.link, pl);
1037			set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1038
1039			active = rq;
1040		} else {
1041			struct intel_engine_cs *owner = rq->context->engine;
1042
1043			/*
1044			 * Decouple the virtual breadcrumb before moving it
1045			 * back to the virtual engine -- we don't want the
1046			 * request to complete in the background and try
1047			 * and cancel the breadcrumb on the virtual engine
1048			 * (instead of the old engine where it is linked)!
1049			 */
1050			if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1051				     &rq->fence.flags)) {
1052				spin_lock_nested(&rq->lock,
1053						 SINGLE_DEPTH_NESTING);
1054				i915_request_cancel_breadcrumb(rq);
1055				spin_unlock(&rq->lock);
1056			}
1057			rq->engine = owner;
1058			owner->submit_request(rq);
1059			active = NULL;
1060		}
1061	}
1062
1063	return active;
1064}
1065
1066struct i915_request *
1067execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1068{
1069	struct intel_engine_cs *engine =
1070		container_of(execlists, typeof(*engine), execlists);
1071
1072	return __unwind_incomplete_requests(engine);
1073}
1074
1075static inline void
1076execlists_context_status_change(struct i915_request *rq, unsigned long status)
1077{
1078	/*
1079	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
1080	 * The compiler should eliminate this function as dead-code.
1081	 */
1082	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1083		return;
1084
1085	atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1086				   status, rq);
1087}
1088
1089static void intel_engine_context_in(struct intel_engine_cs *engine)
1090{
1091	unsigned long flags;
1092
1093	if (READ_ONCE(engine->stats.enabled) == 0)
1094		return;
1095
1096	write_seqlock_irqsave(&engine->stats.lock, flags);
1097
1098	if (engine->stats.enabled > 0) {
1099		if (engine->stats.active++ == 0)
1100			engine->stats.start = ktime_get();
1101		GEM_BUG_ON(engine->stats.active == 0);
1102	}
1103
1104	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1105}
1106
1107static void intel_engine_context_out(struct intel_engine_cs *engine)
1108{
1109	unsigned long flags;
1110
1111	if (READ_ONCE(engine->stats.enabled) == 0)
1112		return;
1113
1114	write_seqlock_irqsave(&engine->stats.lock, flags);
1115
1116	if (engine->stats.enabled > 0) {
1117		ktime_t last;
1118
1119		if (engine->stats.active && --engine->stats.active == 0) {
1120			/*
1121			 * Decrement the active context count and in case GPU
1122			 * is now idle add up to the running total.
1123			 */
1124			last = ktime_sub(ktime_get(), engine->stats.start);
1125
1126			engine->stats.total = ktime_add(engine->stats.total,
1127							last);
1128		} else if (engine->stats.active == 0) {
1129			/*
1130			 * After turning on engine stats, context out might be
1131			 * the first event in which case we account from the
1132			 * time stats gathering was turned on.
1133			 */
1134			last = ktime_sub(ktime_get(), engine->stats.enabled_at);
1135
1136			engine->stats.total = ktime_add(engine->stats.total,
1137							last);
1138		}
1139	}
1140
1141	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1142}
1143
1144static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
1145{
1146	if (INTEL_GEN(engine->i915) >= 12)
1147		return 0x60;
1148	else if (INTEL_GEN(engine->i915) >= 9)
1149		return 0x54;
1150	else if (engine->class == RENDER_CLASS)
1151		return 0x58;
1152	else
1153		return -1;
1154}
1155
1156static void
1157execlists_check_context(const struct intel_context *ce,
1158			const struct intel_engine_cs *engine)
1159{
1160	const struct intel_ring *ring = ce->ring;
1161	u32 *regs = ce->lrc_reg_state;
1162	bool valid = true;
1163	int x;
1164
1165	if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1166		pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1167		       engine->name,
1168		       regs[CTX_RING_START],
1169		       i915_ggtt_offset(ring->vma));
1170		regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1171		valid = false;
1172	}
1173
1174	if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1175	    (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1176		pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1177		       engine->name,
1178		       regs[CTX_RING_CTL],
1179		       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1180		regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1181		valid = false;
1182	}
1183
1184	x = lrc_ring_mi_mode(engine);
1185	if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1186		pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1187		       engine->name, regs[x + 1]);
1188		regs[x + 1] &= ~STOP_RING;
1189		regs[x + 1] |= STOP_RING << 16;
1190		valid = false;
1191	}
1192
1193	WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
1194}
1195
1196static void restore_default_state(struct intel_context *ce,
1197				  struct intel_engine_cs *engine)
1198{
1199	u32 *regs = ce->lrc_reg_state;
1200
1201	if (engine->pinned_default_state)
1202		memcpy(regs, /* skip restoring the vanilla PPHWSP */
1203		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
1204		       engine->context_size - PAGE_SIZE);
1205
1206	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
1207}
1208
1209static void reset_active(struct i915_request *rq,
1210			 struct intel_engine_cs *engine)
1211{
1212	struct intel_context * const ce = rq->context;
1213	u32 head;
1214
1215	/*
1216	 * The executing context has been cancelled. We want to prevent
1217	 * further execution along this context and propagate the error on
1218	 * to anything depending on its results.
1219	 *
1220	 * In __i915_request_submit(), we apply the -EIO and remove the
1221	 * requests' payloads for any banned requests. But first, we must
1222	 * rewind the context back to the start of the incomplete request so
1223	 * that we do not jump back into the middle of the batch.
1224	 *
1225	 * We preserve the breadcrumbs and semaphores of the incomplete
1226	 * requests so that inter-timeline dependencies (i.e other timelines)
1227	 * remain correctly ordered. And we defer to __i915_request_submit()
1228	 * so that all asynchronous waits are correctly handled.
1229	 */
1230	ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1231		     rq->fence.context, rq->fence.seqno);
1232
1233	/* On resubmission of the active request, payload will be scrubbed */
1234	if (i915_request_completed(rq))
1235		head = rq->tail;
1236	else
1237		head = active_request(ce->timeline, rq)->head;
1238	head = intel_ring_wrap(ce->ring, head);
1239
1240	/* Scrub the context image to prevent replaying the previous batch */
1241	restore_default_state(ce, engine);
1242	__execlists_update_reg_state(ce, engine, head);
1243
1244	/* We've switched away, so this should be a no-op, but intent matters */
1245	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
1246}
1247
1248static inline struct intel_engine_cs *
1249__execlists_schedule_in(struct i915_request *rq)
1250{
1251	struct intel_engine_cs * const engine = rq->engine;
1252	struct intel_context * const ce = rq->context;
1253
1254	intel_context_get(ce);
1255
1256	if (unlikely(intel_context_is_banned(ce)))
1257		reset_active(rq, engine);
1258
1259	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1260		execlists_check_context(ce, engine);
1261
1262	if (ce->tag) {
1263		/* Use a fixed tag for OA and friends */
1264		ce->lrc_desc |= (u64)ce->tag << 32;
1265	} else {
1266		/* We don't need a strict matching tag, just different values */
1267		ce->lrc_desc &= ~GENMASK_ULL(47, 37);
1268		ce->lrc_desc |=
1269			(u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
1270			GEN11_SW_CTX_ID_SHIFT;
1271		BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
1272	}
1273
1274	__intel_gt_pm_get(engine->gt);
1275	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1276	intel_engine_context_in(engine);
1277
1278	return engine;
1279}
1280
1281static inline struct i915_request *
1282execlists_schedule_in(struct i915_request *rq, int idx)
1283{
1284	struct intel_context * const ce = rq->context;
1285	struct intel_engine_cs *old;
1286
1287	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1288	trace_i915_request_in(rq, idx);
1289
1290	old = READ_ONCE(ce->inflight);
1291	do {
1292		if (!old) {
1293			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1294			break;
1295		}
1296	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1297
1298	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1299	return i915_request_get(rq);
1300}
1301
1302static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1303{
1304	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1305	struct i915_request *next = READ_ONCE(ve->request);
1306
1307	if (next && next->execution_mask & ~rq->execution_mask)
1308		tasklet_schedule(&ve->base.execlists.tasklet);
1309}
1310
1311static inline void
1312__execlists_schedule_out(struct i915_request *rq,
1313			 struct intel_engine_cs * const engine)
1314{
1315	struct intel_context * const ce = rq->context;
1316
1317	/*
1318	 * NB process_csb() is not under the engine->active.lock and hence
1319	 * schedule_out can race with schedule_in meaning that we should
1320	 * refrain from doing non-trivial work here.
1321	 */
1322
1323	/*
1324	 * If we have just completed this context, the engine may now be
1325	 * idle and we want to re-enter powersaving.
1326	 */
1327	if (list_is_last(&rq->link, &ce->timeline->requests) &&
1328	    i915_request_completed(rq))
1329		intel_engine_add_retire(engine, ce->timeline);
1330
1331	intel_engine_context_out(engine);
1332	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1333	intel_gt_pm_put_async(engine->gt);
1334
1335	/*
1336	 * If this is part of a virtual engine, its next request may
1337	 * have been blocked waiting for access to the active context.
1338	 * We have to kick all the siblings again in case we need to
1339	 * switch (e.g. the next request is not runnable on this
1340	 * engine). Hopefully, we will already have submitted the next
1341	 * request before the tasklet runs and do not need to rebuild
1342	 * each virtual tree and kick everyone again.
1343	 */
1344	if (ce->engine != engine)
1345		kick_siblings(rq, ce);
1346
1347	intel_context_put(ce);
1348}
1349
1350static inline void
1351execlists_schedule_out(struct i915_request *rq)
1352{
1353	struct intel_context * const ce = rq->context;
1354	struct intel_engine_cs *cur, *old;
1355
1356	trace_i915_request_out(rq);
1357
1358	old = READ_ONCE(ce->inflight);
1359	do
1360		cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1361	while (!try_cmpxchg(&ce->inflight, &old, cur));
1362	if (!cur)
1363		__execlists_schedule_out(rq, old);
1364
1365	i915_request_put(rq);
1366}
1367
1368static u64 execlists_update_context(struct i915_request *rq)
1369{
1370	struct intel_context *ce = rq->context;
1371	u64 desc = ce->lrc_desc;
1372	u32 tail, prev;
1373
1374	/*
1375	 * WaIdleLiteRestore:bdw,skl
1376	 *
1377	 * We should never submit the context with the same RING_TAIL twice
1378	 * just in case we submit an empty ring, which confuses the HW.
1379	 *
1380	 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1381	 * the normal request to be able to always advance the RING_TAIL on
1382	 * subsequent resubmissions (for lite restore). Should that fail us,
1383	 * and we try and submit the same tail again, force the context
1384	 * reload.
1385	 *
1386	 * If we need to return to a preempted context, we need to skip the
1387	 * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1388	 * HW has a tendency to ignore us rewinding the TAIL to the end of
1389	 * an earlier request.
1390	 */
1391	tail = intel_ring_set_tail(rq->ring, rq->tail);
1392	prev = ce->lrc_reg_state[CTX_RING_TAIL];
1393	if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1394		desc |= CTX_DESC_FORCE_RESTORE;
1395	ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1396	rq->tail = rq->wa_tail;
1397
1398	/*
1399	 * Make sure the context image is complete before we submit it to HW.
1400	 *
1401	 * Ostensibly, writes (including the WCB) should be flushed prior to
1402	 * an uncached write such as our mmio register access, the empirical
1403	 * evidence (esp. on Braswell) suggests that the WC write into memory
1404	 * may not be visible to the HW prior to the completion of the UC
1405	 * register write and that we may begin execution from the context
1406	 * before its image is complete leading to invalid PD chasing.
1407	 */
1408	wmb();
1409
1410	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
1411	return desc;
1412}
1413
1414static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1415{
1416#ifdef __NetBSD__
1417	if (execlists->ctrl_reg) {
1418		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg + port * 2, lower_32_bits(desc));
1419		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg + port * 2 + 1, upper_32_bits(desc));
1420	} else {
1421		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg, upper_32_bits(desc));
1422		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg, lower_32_bits(desc));
1423	}
1424#else
1425	if (execlists->ctrl_reg) {
1426		writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1427		writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1428	} else {
1429		writel(upper_32_bits(desc), execlists->submit_reg);
1430		writel(lower_32_bits(desc), execlists->submit_reg);
1431	}
1432#endif
1433}
1434
1435static __maybe_unused void
1436trace_ports(const struct intel_engine_execlists *execlists,
1437	    const char *msg,
1438	    struct i915_request * const *ports)
1439{
1440	const struct intel_engine_cs *engine =
1441		const_container_of(execlists, typeof(*engine), execlists);
1442
1443	if (!ports[0])
1444		return;
1445
1446	ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
1447		     ports[0]->fence.context,
1448		     ports[0]->fence.seqno,
1449		     i915_request_completed(ports[0]) ? "!" :
1450		     i915_request_started(ports[0]) ? "*" :
1451		     "",
1452		     ports[1] ? ports[1]->fence.context : 0,
1453		     ports[1] ? ports[1]->fence.seqno : 0);
1454}
1455
1456static __maybe_unused bool
1457assert_pending_valid(const struct intel_engine_execlists *execlists,
1458		     const char *msg)
1459{
1460	struct i915_request * const *port, *rq;
1461	struct intel_context *ce = NULL;
1462
1463	trace_ports(execlists, msg, execlists->pending);
1464
1465	if (!execlists->pending[0]) {
1466		GEM_TRACE_ERR("Nothing pending for promotion!\n");
1467		return false;
1468	}
1469
1470	if (execlists->pending[execlists_num_ports(execlists)]) {
1471		GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
1472			      execlists_num_ports(execlists));
1473		return false;
1474	}
1475
1476	for (port = execlists->pending; (rq = *port); port++) {
1477		unsigned long flags;
1478		bool ok = true;
1479
1480		GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1481		GEM_BUG_ON(!i915_request_is_active(rq));
1482
1483		if (ce == rq->context) {
1484			GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
1485				      ce->timeline->fence_context,
1486				      port - execlists->pending);
1487			return false;
1488		}
1489		ce = rq->context;
1490
1491		/* Hold tightly onto the lock to prevent concurrent retires! */
1492		if (!spin_trylock_irqsave(&rq->lock, flags))
1493			continue;
1494
1495		if (i915_request_completed(rq))
1496			goto unlock;
1497
1498		if (i915_active_is_idle(&ce->active) &&
1499		    !intel_context_is_barrier(ce)) {
1500			GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
1501				      ce->timeline->fence_context,
1502				      port - execlists->pending);
1503			ok = false;
1504			goto unlock;
1505		}
1506
1507		if (!i915_vma_is_pinned(ce->state)) {
1508			GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
1509				      ce->timeline->fence_context,
1510				      port - execlists->pending);
1511			ok = false;
1512			goto unlock;
1513		}
1514
1515		if (!i915_vma_is_pinned(ce->ring->vma)) {
1516			GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
1517				      ce->timeline->fence_context,
1518				      port - execlists->pending);
1519			ok = false;
1520			goto unlock;
1521		}
1522
1523unlock:
1524		spin_unlock_irqrestore(&rq->lock, flags);
1525		if (!ok)
1526			return false;
1527	}
1528
1529	return ce;
1530}
1531
1532static void execlists_submit_ports(struct intel_engine_cs *engine)
1533{
1534	struct intel_engine_execlists *execlists = &engine->execlists;
1535	unsigned int n;
1536
1537	GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1538
1539	/*
1540	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
1541	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
1542	 * not be relinquished until the device is idle (see
1543	 * i915_gem_idle_work_handler()). As a precaution, we make sure
1544	 * that all ELSP are drained i.e. we have processed the CSB,
1545	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
1546	 */
1547	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1548
1549	/*
1550	 * ELSQ note: the submit queue is not cleared after being submitted
1551	 * to the HW so we need to make sure we always clean it up. This is
1552	 * currently ensured by the fact that we always write the same number
1553	 * of elsq entries, keep this in mind before changing the loop below.
1554	 */
1555	for (n = execlists_num_ports(execlists); n--; ) {
1556		struct i915_request *rq = execlists->pending[n];
1557
1558		write_desc(execlists,
1559			   rq ? execlists_update_context(rq) : 0,
1560			   n);
1561	}
1562
1563	/* we need to manually load the submit queue */
1564	if (execlists->ctrl_reg)
1565#ifdef __NetBSD__
1566		bus_space_write_4(execlists->bst, execlists->bsh, execlists->ctrl_reg, EL_CTRL_LOAD);
1567#else
1568		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1569#endif
1570}
1571
1572static bool ctx_single_port_submission(const struct intel_context *ce)
1573{
1574	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1575		intel_context_force_single_submission(ce));
1576}
1577
1578static bool can_merge_ctx(const struct intel_context *prev,
1579			  const struct intel_context *next)
1580{
1581	if (prev != next)
1582		return false;
1583
1584	if (ctx_single_port_submission(prev))
1585		return false;
1586
1587	return true;
1588}
1589
1590static bool can_merge_rq(const struct i915_request *prev,
1591			 const struct i915_request *next)
1592{
1593	GEM_BUG_ON(prev == next);
1594	GEM_BUG_ON(!assert_priority_queue(prev, next));
1595
1596	/*
1597	 * We do not submit known completed requests. Therefore if the next
1598	 * request is already completed, we can pretend to merge it in
1599	 * with the previous context (and we will skip updating the ELSP
1600	 * and tracking). Thus hopefully keeping the ELSP full with active
1601	 * contexts, despite the best efforts of preempt-to-busy to confuse
1602	 * us.
1603	 */
1604	if (i915_request_completed(next))
1605		return true;
1606
1607	if (unlikely((prev->fence.flags ^ next->fence.flags) &
1608		     (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1609		      BIT(I915_FENCE_FLAG_SENTINEL))))
1610		return false;
1611
1612	if (!can_merge_ctx(prev->context, next->context))
1613		return false;
1614
1615	return true;
1616}
1617
1618static void virtual_update_register_offsets(u32 *regs,
1619					    struct intel_engine_cs *engine)
1620{
1621	set_offsets(regs, reg_offsets(engine), engine, false);
1622}
1623
1624static bool virtual_matches(const struct virtual_engine *ve,
1625			    const struct i915_request *rq,
1626			    const struct intel_engine_cs *engine)
1627{
1628	const struct intel_engine_cs *inflight;
1629
1630	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1631		return false;
1632
1633	/*
1634	 * We track when the HW has completed saving the context image
1635	 * (i.e. when we have seen the final CS event switching out of
1636	 * the context) and must not overwrite the context image before
1637	 * then. This restricts us to only using the active engine
1638	 * while the previous virtualized request is inflight (so
1639	 * we reuse the register offsets). This is a very small
1640	 * hystersis on the greedy seelction algorithm.
1641	 */
1642	inflight = intel_context_inflight(&ve->context);
1643	if (inflight && inflight != engine)
1644		return false;
1645
1646	return true;
1647}
1648
1649static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
1650				     struct intel_engine_cs *engine)
1651{
1652	struct intel_engine_cs *old = ve->siblings[0];
1653
1654	/* All unattached (rq->engine == old) must already be completed */
1655
1656	spin_lock(&old->breadcrumbs.irq_lock);
1657	if (!list_empty(&ve->context.signal_link)) {
1658		list_move_tail(&ve->context.signal_link,
1659			       &engine->breadcrumbs.signalers);
1660		intel_engine_signal_breadcrumbs(engine);
1661	}
1662	spin_unlock(&old->breadcrumbs.irq_lock);
1663}
1664
1665static struct i915_request *
1666last_active(const struct intel_engine_execlists *execlists)
1667{
1668	struct i915_request * const *last = READ_ONCE(execlists->active);
1669
1670	while (*last && i915_request_completed(*last))
1671		last++;
1672
1673	return *last;
1674}
1675
1676#define for_each_waiter(p__, rq__) \
1677	list_for_each_entry_lockless(p__, \
1678				     &(rq__)->sched.waiters_list, \
1679				     wait_link)
1680
1681static void defer_request(struct i915_request *rq, struct list_head * const pl)
1682{
1683	LIST_HEAD(list);
1684
1685	/*
1686	 * We want to move the interrupted request to the back of
1687	 * the round-robin list (i.e. its priority level), but
1688	 * in doing so, we must then move all requests that were in
1689	 * flight and were waiting for the interrupted request to
1690	 * be run after it again.
1691	 */
1692	do {
1693		struct i915_dependency *p;
1694
1695		GEM_BUG_ON(i915_request_is_active(rq));
1696		list_move_tail(&rq->sched.link, pl);
1697
1698		for_each_waiter(p, rq) {
1699			struct i915_request *w =
1700				container_of(p->waiter, typeof(*w), sched);
1701
1702			/* Leave semaphores spinning on the other engines */
1703			if (w->engine != rq->engine)
1704				continue;
1705
1706			/* No waiter should start before its signaler */
1707			GEM_BUG_ON(i915_request_started(w) &&
1708				   !i915_request_completed(rq));
1709
1710			GEM_BUG_ON(i915_request_is_active(w));
1711			if (!i915_request_is_ready(w))
1712				continue;
1713
1714			if (rq_prio(w) < rq_prio(rq))
1715				continue;
1716
1717			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1718			list_move_tail(&w->sched.link, &list);
1719		}
1720
1721		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1722	} while (rq);
1723}
1724
1725static void defer_active(struct intel_engine_cs *engine)
1726{
1727	struct i915_request *rq;
1728
1729	rq = __unwind_incomplete_requests(engine);
1730	if (!rq)
1731		return;
1732
1733	defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1734}
1735
1736static bool
1737need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
1738{
1739	int hint;
1740
1741	if (!intel_engine_has_timeslices(engine))
1742		return false;
1743
1744	if (list_is_last(&rq->sched.link, &engine->active.requests))
1745		return false;
1746
1747	hint = max(rq_prio(list_next_entry(rq, sched.link)),
1748		   engine->execlists.queue_priority_hint);
1749
1750	return hint >= effective_prio(rq);
1751}
1752
1753static int
1754switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1755{
1756	if (list_is_last(&rq->sched.link, &engine->active.requests))
1757		return INT_MIN;
1758
1759	return rq_prio(list_next_entry(rq, sched.link));
1760}
1761
1762static inline unsigned long
1763timeslice(const struct intel_engine_cs *engine)
1764{
1765	return READ_ONCE(engine->props.timeslice_duration_ms);
1766}
1767
1768static unsigned long
1769active_timeslice(const struct intel_engine_cs *engine)
1770{
1771	const struct i915_request *rq = *engine->execlists.active;
1772
1773	if (!rq || i915_request_completed(rq))
1774		return 0;
1775
1776	if (engine->execlists.switch_priority_hint < effective_prio(rq))
1777		return 0;
1778
1779	return timeslice(engine);
1780}
1781
1782static void set_timeslice(struct intel_engine_cs *engine)
1783{
1784	if (!intel_engine_has_timeslices(engine))
1785		return;
1786
1787	set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
1788}
1789
1790static void record_preemption(struct intel_engine_execlists *execlists)
1791{
1792	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
1793}
1794
1795static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
1796{
1797	struct i915_request *rq;
1798
1799	rq = last_active(&engine->execlists);
1800	if (!rq)
1801		return 0;
1802
1803	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
1804	if (unlikely(intel_context_is_banned(rq->context)))
1805		return 1;
1806
1807	return READ_ONCE(engine->props.preempt_timeout_ms);
1808}
1809
1810static void set_preempt_timeout(struct intel_engine_cs *engine)
1811{
1812	if (!intel_engine_has_preempt_reset(engine))
1813		return;
1814
1815	set_timer_ms(&engine->execlists.preempt,
1816		     active_preempt_timeout(engine));
1817}
1818
1819static inline void clear_ports(struct i915_request **ports, int count)
1820{
1821	memset_p((void **)ports, NULL, count);
1822}
1823
1824static void execlists_dequeue(struct intel_engine_cs *engine)
1825{
1826	struct intel_engine_execlists * const execlists = &engine->execlists;
1827	struct i915_request **port = execlists->pending;
1828	struct i915_request ** const last_port = port + execlists->port_mask;
1829	struct i915_request *last;
1830	struct rb_node *rb;
1831	bool submit = false;
1832
1833	/*
1834	 * Hardware submission is through 2 ports. Conceptually each port
1835	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
1836	 * static for a context, and unique to each, so we only execute
1837	 * requests belonging to a single context from each ring. RING_HEAD
1838	 * is maintained by the CS in the context image, it marks the place
1839	 * where it got up to last time, and through RING_TAIL we tell the CS
1840	 * where we want to execute up to this time.
1841	 *
1842	 * In this list the requests are in order of execution. Consecutive
1843	 * requests from the same context are adjacent in the ringbuffer. We
1844	 * can combine these requests into a single RING_TAIL update:
1845	 *
1846	 *              RING_HEAD...req1...req2
1847	 *                                    ^- RING_TAIL
1848	 * since to execute req2 the CS must first execute req1.
1849	 *
1850	 * Our goal then is to point each port to the end of a consecutive
1851	 * sequence of requests as being the most optimal (fewest wake ups
1852	 * and context switches) submission.
1853	 */
1854
1855	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
1856		struct virtual_engine *ve =
1857			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1858		struct i915_request *rq = READ_ONCE(ve->request);
1859
1860		if (!rq) { /* lazily cleanup after another engine handled rq */
1861			rb_erase_cached(rb, &execlists->virtual);
1862			container_of(rb, struct ve_node, rb)->inserted =
1863			    false;
1864			rb = rb_first_cached(&execlists->virtual);
1865			continue;
1866		}
1867
1868		if (!virtual_matches(ve, rq, engine)) {
1869			rb = rb_next2(&execlists->virtual.rb_root, rb);
1870			continue;
1871		}
1872
1873		break;
1874	}
1875
1876	/*
1877	 * If the queue is higher priority than the last
1878	 * request in the currently active context, submit afresh.
1879	 * We will resubmit again afterwards in case we need to split
1880	 * the active context to interject the preemption request,
1881	 * i.e. we will retrigger preemption following the ack in case
1882	 * of trouble.
1883	 */
1884	last = last_active(execlists);
1885	if (last) {
1886		if (need_preempt(engine, last, rb)) {
1887			ENGINE_TRACE(engine,
1888				     "preempting last=%llx:%lld, prio=%d, hint=%d\n",
1889				     last->fence.context,
1890				     last->fence.seqno,
1891				     last->sched.attr.priority,
1892				     execlists->queue_priority_hint);
1893			record_preemption(execlists);
1894
1895			/*
1896			 * Don't let the RING_HEAD advance past the breadcrumb
1897			 * as we unwind (and until we resubmit) so that we do
1898			 * not accidentally tell it to go backwards.
1899			 */
1900			ring_set_paused(engine, 1);
1901
1902			/*
1903			 * Note that we have not stopped the GPU at this point,
1904			 * so we are unwinding the incomplete requests as they
1905			 * remain inflight and so by the time we do complete
1906			 * the preemption, some of the unwound requests may
1907			 * complete!
1908			 */
1909			__unwind_incomplete_requests(engine);
1910
1911			last = NULL;
1912		} else if (need_timeslice(engine, last) &&
1913			   timer_expired(&engine->execlists.timer)) {
1914			ENGINE_TRACE(engine,
1915				     "expired last=%llx:%lld, prio=%d, hint=%d\n",
1916				     last->fence.context,
1917				     last->fence.seqno,
1918				     last->sched.attr.priority,
1919				     execlists->queue_priority_hint);
1920
1921			ring_set_paused(engine, 1);
1922			defer_active(engine);
1923
1924			/*
1925			 * Unlike for preemption, if we rewind and continue
1926			 * executing the same context as previously active,
1927			 * the order of execution will remain the same and
1928			 * the tail will only advance. We do not need to
1929			 * force a full context restore, as a lite-restore
1930			 * is sufficient to resample the monotonic TAIL.
1931			 *
1932			 * If we switch to any other context, similarly we
1933			 * will not rewind TAIL of current context, and
1934			 * normal save/restore will preserve state and allow
1935			 * us to later continue executing the same request.
1936			 */
1937			last = NULL;
1938		} else {
1939			/*
1940			 * Otherwise if we already have a request pending
1941			 * for execution after the current one, we can
1942			 * just wait until the next CS event before
1943			 * queuing more. In either case we will force a
1944			 * lite-restore preemption event, but if we wait
1945			 * we hopefully coalesce several updates into a single
1946			 * submission.
1947			 */
1948			if (!list_is_last(&last->sched.link,
1949					  &engine->active.requests)) {
1950				/*
1951				 * Even if ELSP[1] is occupied and not worthy
1952				 * of timeslices, our queue might be.
1953				 */
1954				if (!timer_pending(&execlists->timer) &&
1955				    need_timeslice(engine, last))
1956					set_timer_ms(&execlists->timer,
1957						     timeslice(engine));
1958
1959				return;
1960			}
1961		}
1962	}
1963
1964	while (rb) { /* XXX virtual is always taking precedence */
1965		struct virtual_engine *ve =
1966			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1967		struct i915_request *rq;
1968
1969		spin_lock(&ve->base.active.lock);
1970
1971		rq = ve->request;
1972		if (unlikely(!rq)) { /* lost the race to a sibling */
1973			spin_unlock(&ve->base.active.lock);
1974			rb_erase_cached(rb, &execlists->virtual);
1975			container_of(rb, struct ve_node, rb)->inserted =
1976			    false;
1977			rb = rb_first_cached(&execlists->virtual);
1978			continue;
1979		}
1980
1981		GEM_BUG_ON(rq != ve->request);
1982		GEM_BUG_ON(rq->engine != &ve->base);
1983		GEM_BUG_ON(rq->context != &ve->context);
1984
1985		if (rq_prio(rq) >= queue_prio(execlists)) {
1986			if (!virtual_matches(ve, rq, engine)) {
1987				spin_unlock(&ve->base.active.lock);
1988				rb = rb_next2(&execlists->virtual.rb_root,
1989				    rb);
1990				continue;
1991			}
1992
1993			if (last && !can_merge_rq(last, rq)) {
1994				spin_unlock(&ve->base.active.lock);
1995				return; /* leave this for another */
1996			}
1997
1998			ENGINE_TRACE(engine,
1999				     "virtual rq=%llx:%lld%s, new engine? %s\n",
2000				     rq->fence.context,
2001				     rq->fence.seqno,
2002				     i915_request_completed(rq) ? "!" :
2003				     i915_request_started(rq) ? "*" :
2004				     "",
2005				     yesno(engine != ve->siblings[0]));
2006
2007			ve->request = NULL;
2008			ve->base.execlists.queue_priority_hint = INT_MIN;
2009			rb_erase_cached(rb, &execlists->virtual);
2010			container_of(rb, struct ve_node, rb)->inserted =
2011			    false;
2012
2013			GEM_BUG_ON(!(rq->execution_mask & engine->mask));
2014			rq->engine = engine;
2015
2016			if (engine != ve->siblings[0]) {
2017				u32 *regs = ve->context.lrc_reg_state;
2018				unsigned int n;
2019
2020				GEM_BUG_ON(READ_ONCE(ve->context.inflight));
2021
2022				if (!intel_engine_has_relative_mmio(engine))
2023					virtual_update_register_offsets(regs,
2024									engine);
2025
2026				if (!list_empty(&ve->context.signals))
2027					virtual_xfer_breadcrumbs(ve, engine);
2028
2029				/*
2030				 * Move the bound engine to the top of the list
2031				 * for future execution. We then kick this
2032				 * tasklet first before checking others, so that
2033				 * we preferentially reuse this set of bound
2034				 * registers.
2035				 */
2036				for (n = 1; n < ve->num_siblings; n++) {
2037					if (ve->siblings[n] == engine) {
2038						swap(ve->siblings[n],
2039						     ve->siblings[0]);
2040						break;
2041					}
2042				}
2043
2044				GEM_BUG_ON(ve->siblings[0] != engine);
2045			}
2046
2047			if (__i915_request_submit(rq)) {
2048				submit = true;
2049				last = rq;
2050			}
2051			i915_request_put(rq);
2052
2053			/*
2054			 * Hmm, we have a bunch of virtual engine requests,
2055			 * but the first one was already completed (thanks
2056			 * preempt-to-busy!). Keep looking at the veng queue
2057			 * until we have no more relevant requests (i.e.
2058			 * the normal submit queue has higher priority).
2059			 */
2060			if (!submit) {
2061				spin_unlock(&ve->base.active.lock);
2062				rb = rb_first_cached(&execlists->virtual);
2063				continue;
2064			}
2065		}
2066
2067		spin_unlock(&ve->base.active.lock);
2068		break;
2069	}
2070
2071	while ((rb = rb_first_cached(&execlists->queue))) {
2072		struct i915_priolist *p = to_priolist(rb);
2073		struct i915_request *rq, *rn;
2074		int i;
2075
2076		priolist_for_each_request_consume(rq, rn, p, i) {
2077			bool merge = true;
2078
2079			/*
2080			 * Can we combine this request with the current port?
2081			 * It has to be the same context/ringbuffer and not
2082			 * have any exceptions (e.g. GVT saying never to
2083			 * combine contexts).
2084			 *
2085			 * If we can combine the requests, we can execute both
2086			 * by updating the RING_TAIL to point to the end of the
2087			 * second request, and so we never need to tell the
2088			 * hardware about the first.
2089			 */
2090			if (last && !can_merge_rq(last, rq)) {
2091				/*
2092				 * If we are on the second port and cannot
2093				 * combine this request with the last, then we
2094				 * are done.
2095				 */
2096				if (port == last_port)
2097					goto done;
2098
2099				/*
2100				 * We must not populate both ELSP[] with the
2101				 * same LRCA, i.e. we must submit 2 different
2102				 * contexts if we submit 2 ELSP.
2103				 */
2104				if (last->context == rq->context)
2105					goto done;
2106
2107				if (i915_request_has_sentinel(last))
2108					goto done;
2109
2110				/*
2111				 * If GVT overrides us we only ever submit
2112				 * port[0], leaving port[1] empty. Note that we
2113				 * also have to be careful that we don't queue
2114				 * the same context (even though a different
2115				 * request) to the second port.
2116				 */
2117				if (ctx_single_port_submission(last->context) ||
2118				    ctx_single_port_submission(rq->context))
2119					goto done;
2120
2121				merge = false;
2122			}
2123
2124			if (__i915_request_submit(rq)) {
2125				if (!merge) {
2126					*port = execlists_schedule_in(last, port - execlists->pending);
2127					port++;
2128					last = NULL;
2129				}
2130
2131				GEM_BUG_ON(last &&
2132					   !can_merge_ctx(last->context,
2133							  rq->context));
2134
2135				submit = true;
2136				last = rq;
2137			}
2138		}
2139
2140		rb_erase_cached(&p->node, &execlists->queue);
2141		i915_priolist_free(p);
2142	}
2143
2144done:
2145	/*
2146	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2147	 *
2148	 * We choose the priority hint such that if we add a request of greater
2149	 * priority than this, we kick the submission tasklet to decide on
2150	 * the right order of submitting the requests to hardware. We must
2151	 * also be prepared to reorder requests as they are in-flight on the
2152	 * HW. We derive the priority hint then as the first "hole" in
2153	 * the HW submission ports and if there are no available slots,
2154	 * the priority of the lowest executing request, i.e. last.
2155	 *
2156	 * When we do receive a higher priority request ready to run from the
2157	 * user, see queue_request(), the priority hint is bumped to that
2158	 * request triggering preemption on the next dequeue (or subsequent
2159	 * interrupt for secondary ports).
2160	 */
2161	execlists->queue_priority_hint = queue_prio(execlists);
2162
2163	if (submit) {
2164		*port = execlists_schedule_in(last, port - execlists->pending);
2165		execlists->switch_priority_hint =
2166			switch_prio(engine, *execlists->pending);
2167
2168		/*
2169		 * Skip if we ended up with exactly the same set of requests,
2170		 * e.g. trying to timeslice a pair of ordered contexts
2171		 */
2172		if (!memcmp(execlists->active, execlists->pending,
2173			    (port - execlists->pending + 1) * sizeof(*port))) {
2174			do
2175				execlists_schedule_out(fetch_and_zero(port));
2176			while (port-- != execlists->pending);
2177
2178			goto skip_submit;
2179		}
2180		clear_ports(port + 1, last_port - port);
2181
2182		execlists_submit_ports(engine);
2183		set_preempt_timeout(engine);
2184	} else {
2185skip_submit:
2186		ring_set_paused(engine, 0);
2187	}
2188}
2189
2190static void
2191cancel_port_requests(struct intel_engine_execlists * const execlists)
2192{
2193	struct i915_request * const *port;
2194
2195	for (port = execlists->pending; *port; port++)
2196		execlists_schedule_out(*port);
2197	clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2198
2199	/* Mark the end of active before we overwrite *active */
2200	for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2201		execlists_schedule_out(*port);
2202	clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2203
2204	WRITE_ONCE(execlists->active, execlists->inflight);
2205}
2206
2207static inline void
2208invalidate_csb_entries(const u32 *first, const u32 *last)
2209{
2210	clflush(__UNCONST(first));
2211	clflush(__UNCONST(last));
2212}
2213
2214static inline bool
2215reset_in_progress(const struct intel_engine_execlists *execlists)
2216{
2217	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
2218}
2219
2220/*
2221 * Starting with Gen12, the status has a new format:
2222 *
2223 *     bit  0:     switched to new queue
2224 *     bit  1:     reserved
2225 *     bit  2:     semaphore wait mode (poll or signal), only valid when
2226 *                 switch detail is set to "wait on semaphore"
2227 *     bits 3-5:   engine class
2228 *     bits 6-11:  engine instance
2229 *     bits 12-14: reserved
2230 *     bits 15-25: sw context id of the lrc the GT switched to
2231 *     bits 26-31: sw counter of the lrc the GT switched to
2232 *     bits 32-35: context switch detail
2233 *                  - 0: ctx complete
2234 *                  - 1: wait on sync flip
2235 *                  - 2: wait on vblank
2236 *                  - 3: wait on scanline
2237 *                  - 4: wait on semaphore
2238 *                  - 5: context preempted (not on SEMAPHORE_WAIT or
2239 *                       WAIT_FOR_EVENT)
2240 *     bit  36:    reserved
2241 *     bits 37-43: wait detail (for switch detail 1 to 4)
2242 *     bits 44-46: reserved
2243 *     bits 47-57: sw context id of the lrc the GT switched away from
2244 *     bits 58-63: sw counter of the lrc the GT switched away from
2245 */
2246static inline bool
2247gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2248{
2249	u32 lower_dw = csb[0];
2250	u32 upper_dw = csb[1];
2251	bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
2252	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
2253	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2254
2255	/*
2256	 * The context switch detail is not guaranteed to be 5 when a preemption
2257	 * occurs, so we can't just check for that. The check below works for
2258	 * all the cases we care about, including preemptions of WAIT
2259	 * instructions and lite-restore. Preempt-to-idle via the CTRL register
2260	 * would require some extra handling, but we don't support that.
2261	 */
2262	if (!ctx_away_valid || new_queue) {
2263		GEM_BUG_ON(!ctx_to_valid);
2264		return true;
2265	}
2266
2267	/*
2268	 * switch detail = 5 is covered by the case above and we do not expect a
2269	 * context switch on an unsuccessful wait instruction since we always
2270	 * use polling mode.
2271	 */
2272	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
2273	return false;
2274}
2275
2276static inline bool
2277gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2278{
2279	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2280}
2281
2282static void process_csb(struct intel_engine_cs *engine)
2283{
2284	struct intel_engine_execlists * const execlists = &engine->execlists;
2285	const u32 * const buf = execlists->csb_status;
2286	const u8 num_entries = execlists->csb_size;
2287	u8 head, tail;
2288
2289	/*
2290	 * As we modify our execlists state tracking we require exclusive
2291	 * access. Either we are inside the tasklet, or the tasklet is disabled
2292	 * and we assume that is only inside the reset paths and so serialised.
2293	 */
2294	GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2295		   !reset_in_progress(execlists));
2296	GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2297
2298	/*
2299	 * Note that csb_write, csb_status may be either in HWSP or mmio.
2300	 * When reading from the csb_write mmio register, we have to be
2301	 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2302	 * the low 4bits. As it happens we know the next 4bits are always
2303	 * zero and so we can simply masked off the low u8 of the register
2304	 * and treat it identically to reading from the HWSP (without having
2305	 * to use explicit shifting and masking, and probably bifurcating
2306	 * the code to handle the legacy mmio read).
2307	 */
2308	head = execlists->csb_head;
2309	tail = READ_ONCE(*execlists->csb_write);
2310	ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2311	if (unlikely(head == tail))
2312		return;
2313
2314	/*
2315	 * Hopefully paired with a wmb() in HW!
2316	 *
2317	 * We must complete the read of the write pointer before any reads
2318	 * from the CSB, so that we do not see stale values. Without an rmb
2319	 * (lfence) the HW may speculatively perform the CSB[] reads *before*
2320	 * we perform the READ_ONCE(*csb_write).
2321	 */
2322	rmb();
2323
2324	do {
2325		bool promote;
2326
2327		if (++head == num_entries)
2328			head = 0;
2329
2330		/*
2331		 * We are flying near dragons again.
2332		 *
2333		 * We hold a reference to the request in execlist_port[]
2334		 * but no more than that. We are operating in softirq
2335		 * context and so cannot hold any mutex or sleep. That
2336		 * prevents us stopping the requests we are processing
2337		 * in port[] from being retired simultaneously (the
2338		 * breadcrumb will be complete before we see the
2339		 * context-switch). As we only hold the reference to the
2340		 * request, any pointer chasing underneath the request
2341		 * is subject to a potential use-after-free. Thus we
2342		 * store all of the bookkeeping within port[] as
2343		 * required, and avoid using unguarded pointers beneath
2344		 * request itself. The same applies to the atomic
2345		 * status notifier.
2346		 */
2347
2348		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2349			     head, buf[2 * head + 0], buf[2 * head + 1]);
2350
2351		if (INTEL_GEN(engine->i915) >= 12)
2352			promote = gen12_csb_parse(execlists, buf + 2 * head);
2353		else
2354			promote = gen8_csb_parse(execlists, buf + 2 * head);
2355		if (promote) {
2356			struct i915_request * const *old = execlists->active;
2357
2358			/* Point active to the new ELSP; prevent overwriting */
2359			WRITE_ONCE(execlists->active, execlists->pending);
2360
2361			if (!inject_preempt_hang(execlists))
2362				ring_set_paused(engine, 0);
2363
2364			/* cancel old inflight, prepare for switch */
2365			trace_ports(execlists, "preempted", old);
2366			while (*old)
2367				execlists_schedule_out(*old++);
2368
2369			/* switch pending to inflight */
2370			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2371			WRITE_ONCE(execlists->active,
2372				   memcpy(execlists->inflight,
2373					  execlists->pending,
2374					  execlists_num_ports(execlists) *
2375					  sizeof(*execlists->pending)));
2376
2377			WRITE_ONCE(execlists->pending[0], NULL);
2378		} else {
2379			GEM_BUG_ON(!*execlists->active);
2380
2381			/* port0 completed, advanced to port1 */
2382			trace_ports(execlists, "completed", execlists->active);
2383
2384			/*
2385			 * We rely on the hardware being strongly
2386			 * ordered, that the breadcrumb write is
2387			 * coherent (visible from the CPU) before the
2388			 * user interrupt and CSB is processed.
2389			 */
2390			GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
2391				   !reset_in_progress(execlists));
2392			execlists_schedule_out(*execlists->active++);
2393
2394			GEM_BUG_ON(execlists->active - execlists->inflight >
2395				   execlists_num_ports(execlists));
2396		}
2397	} while (head != tail);
2398
2399	execlists->csb_head = head;
2400	set_timeslice(engine);
2401
2402	/*
2403	 * Gen11 has proven to fail wrt global observation point between
2404	 * entry and tail update, failing on the ordering and thus
2405	 * we see an old entry in the context status buffer.
2406	 *
2407	 * Forcibly evict out entries for the next gpu csb update,
2408	 * to increase the odds that we get a fresh entries with non
2409	 * working hardware. The cost for doing so comes out mostly with
2410	 * the wash as hardware, working or not, will need to do the
2411	 * invalidation before.
2412	 */
2413	invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2414}
2415
2416static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2417{
2418	lockdep_assert_held(&engine->active.lock);
2419	if (!engine->execlists.pending[0]) {
2420		rcu_read_lock(); /* protect peeking at execlists->active */
2421		execlists_dequeue(engine);
2422		rcu_read_unlock();
2423	}
2424}
2425
2426static void __execlists_hold(struct i915_request *rq)
2427{
2428	LIST_HEAD(list);
2429
2430	do {
2431		struct i915_dependency *p;
2432
2433		if (i915_request_is_active(rq))
2434			__i915_request_unsubmit(rq);
2435
2436		RQ_TRACE(rq, "on hold\n");
2437		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2438		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2439		i915_request_set_hold(rq);
2440
2441		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
2442			struct i915_request *w =
2443				container_of(p->waiter, typeof(*w), sched);
2444
2445			/* Leave semaphores spinning on the other engines */
2446			if (w->engine != rq->engine)
2447				continue;
2448
2449			if (!i915_request_is_ready(w))
2450				continue;
2451
2452			if (i915_request_completed(w))
2453				continue;
2454
2455			if (i915_request_on_hold(rq))
2456				continue;
2457
2458			list_move_tail(&w->sched.link, &list);
2459		}
2460
2461		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2462	} while (rq);
2463}
2464
2465static bool execlists_hold(struct intel_engine_cs *engine,
2466			   struct i915_request *rq)
2467{
2468	spin_lock_irq(&engine->active.lock);
2469
2470	if (i915_request_completed(rq)) { /* too late! */
2471		rq = NULL;
2472		goto unlock;
2473	}
2474
2475	if (rq->engine != engine) { /* preempted virtual engine */
2476		struct virtual_engine *ve = to_virtual_engine(rq->engine);
2477
2478		/*
2479		 * intel_context_inflight() is only protected by virtue
2480		 * of process_csb() being called only by the tasklet (or
2481		 * directly from inside reset while the tasklet is suspended).
2482		 * Assert that neither of those are allowed to run while we
2483		 * poke at the request queues.
2484		 */
2485		GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2486
2487		/*
2488		 * An unsubmitted request along a virtual engine will
2489		 * remain on the active (this) engine until we are able
2490		 * to process the context switch away (and so mark the
2491		 * context as no longer in flight). That cannot have happened
2492		 * yet, otherwise we would not be hanging!
2493		 */
2494		spin_lock(&ve->base.active.lock);
2495		GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2496		GEM_BUG_ON(ve->request != rq);
2497		ve->request = NULL;
2498		spin_unlock(&ve->base.active.lock);
2499		i915_request_put(rq);
2500
2501		rq->engine = engine;
2502	}
2503
2504	/*
2505	 * Transfer this request onto the hold queue to prevent it
2506	 * being resumbitted to HW (and potentially completed) before we have
2507	 * released it. Since we may have already submitted following
2508	 * requests, we need to remove those as well.
2509	 */
2510	GEM_BUG_ON(i915_request_on_hold(rq));
2511	GEM_BUG_ON(rq->engine != engine);
2512	__execlists_hold(rq);
2513
2514unlock:
2515	spin_unlock_irq(&engine->active.lock);
2516	return rq;
2517}
2518
2519static bool hold_request(const struct i915_request *rq)
2520{
2521	struct i915_dependency *p;
2522
2523	/*
2524	 * If one of our ancestors is on hold, we must also be on hold,
2525	 * otherwise we will bypass it and execute before it.
2526	 */
2527	list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
2528		const struct i915_request *s =
2529			container_of(p->signaler, typeof(*s), sched);
2530
2531		if (s->engine != rq->engine)
2532			continue;
2533
2534		if (i915_request_on_hold(s))
2535			return true;
2536	}
2537
2538	return false;
2539}
2540
2541static void __execlists_unhold(struct i915_request *rq)
2542{
2543	LIST_HEAD(list);
2544
2545	do {
2546		struct i915_dependency *p;
2547
2548		GEM_BUG_ON(!i915_request_on_hold(rq));
2549		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2550
2551		i915_request_clear_hold(rq);
2552		list_move_tail(&rq->sched.link,
2553			       i915_sched_lookup_priolist(rq->engine,
2554							  rq_prio(rq)));
2555		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2556		RQ_TRACE(rq, "hold release\n");
2557
2558		/* Also release any children on this engine that are ready */
2559		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
2560			struct i915_request *w =
2561				container_of(p->waiter, typeof(*w), sched);
2562
2563			if (w->engine != rq->engine)
2564				continue;
2565
2566			if (!i915_request_on_hold(rq))
2567				continue;
2568
2569			/* Check that no other parents are also on hold */
2570			if (hold_request(rq))
2571				continue;
2572
2573			list_move_tail(&w->sched.link, &list);
2574		}
2575
2576		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2577	} while (rq);
2578}
2579
2580static void execlists_unhold(struct intel_engine_cs *engine,
2581			     struct i915_request *rq)
2582{
2583	spin_lock_irq(&engine->active.lock);
2584
2585	/*
2586	 * Move this request back to the priority queue, and all of its
2587	 * children and grandchildren that were suspended along with it.
2588	 */
2589	__execlists_unhold(rq);
2590
2591	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2592		engine->execlists.queue_priority_hint = rq_prio(rq);
2593		tasklet_hi_schedule(&engine->execlists.tasklet);
2594	}
2595
2596	spin_unlock_irq(&engine->active.lock);
2597}
2598
2599struct execlists_capture {
2600	struct work_struct work;
2601	struct i915_request *rq;
2602	struct i915_gpu_coredump *error;
2603};
2604
2605static void execlists_capture_work(struct work_struct *work)
2606{
2607	struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2608	const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2609	struct intel_engine_cs *engine = cap->rq->engine;
2610	struct intel_gt_coredump *gt = cap->error->gt;
2611	struct intel_engine_capture_vma *vma;
2612
2613	/* Compress all the objects attached to the request, slow! */
2614	vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2615	if (vma) {
2616		struct i915_vma_compress *compress =
2617			i915_vma_capture_prepare(gt);
2618
2619		intel_engine_coredump_add_vma(gt->engine, vma, compress);
2620		i915_vma_capture_finish(gt, compress);
2621	}
2622
2623	gt->simulated = gt->engine->simulated;
2624	cap->error->simulated = gt->simulated;
2625
2626	/* Publish the error state, and announce it to the world */
2627	i915_error_state_store(cap->error);
2628	i915_gpu_coredump_put(cap->error);
2629
2630	/* Return this request and all that depend upon it for signaling */
2631	execlists_unhold(engine, cap->rq);
2632	i915_request_put(cap->rq);
2633
2634	kfree(cap);
2635}
2636
2637static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2638{
2639	const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2640	struct execlists_capture *cap;
2641
2642	cap = kmalloc(sizeof(*cap), gfp);
2643	if (!cap)
2644		return NULL;
2645
2646	cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2647	if (!cap->error)
2648		goto err_cap;
2649
2650	cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2651	if (!cap->error->gt)
2652		goto err_gpu;
2653
2654	cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2655	if (!cap->error->gt->engine)
2656		goto err_gt;
2657
2658	return cap;
2659
2660err_gt:
2661	kfree(cap->error->gt);
2662err_gpu:
2663	kfree(cap->error);
2664err_cap:
2665	kfree(cap);
2666	return NULL;
2667}
2668
2669static bool execlists_capture(struct intel_engine_cs *engine)
2670{
2671	struct execlists_capture *cap;
2672
2673	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
2674		return true;
2675
2676	/*
2677	 * We need to _quickly_ capture the engine state before we reset.
2678	 * We are inside an atomic section (softirq) here and we are delaying
2679	 * the forced preemption event.
2680	 */
2681	cap = capture_regs(engine);
2682	if (!cap)
2683		return true;
2684
2685	cap->rq = execlists_active(&engine->execlists);
2686	GEM_BUG_ON(!cap->rq);
2687
2688	rcu_read_lock();
2689	cap->rq = active_request(cap->rq->context->timeline, cap->rq);
2690	cap->rq = i915_request_get_rcu(cap->rq);
2691	rcu_read_unlock();
2692	if (!cap->rq)
2693		goto err_free;
2694
2695	/*
2696	 * Remove the request from the execlists queue, and take ownership
2697	 * of the request. We pass it to our worker who will _slowly_ compress
2698	 * all the pages the _user_ requested for debugging their batch, after
2699	 * which we return it to the queue for signaling.
2700	 *
2701	 * By removing them from the execlists queue, we also remove the
2702	 * requests from being processed by __unwind_incomplete_requests()
2703	 * during the intel_engine_reset(), and so they will *not* be replayed
2704	 * afterwards.
2705	 *
2706	 * Note that because we have not yet reset the engine at this point,
2707	 * it is possible for the request that we have identified as being
2708	 * guilty, did in fact complete and we will then hit an arbitration
2709	 * point allowing the outstanding preemption to succeed. The likelihood
2710	 * of that is very low (as capturing of the engine registers should be
2711	 * fast enough to run inside an irq-off atomic section!), so we will
2712	 * simply hold that request accountable for being non-preemptible
2713	 * long enough to force the reset.
2714	 */
2715	if (!execlists_hold(engine, cap->rq))
2716		goto err_rq;
2717
2718	INIT_WORK(&cap->work, execlists_capture_work);
2719	schedule_work(&cap->work);
2720	return true;
2721
2722err_rq:
2723	i915_request_put(cap->rq);
2724err_free:
2725	i915_gpu_coredump_put(cap->error);
2726	kfree(cap);
2727	return false;
2728}
2729
2730static noinline void preempt_reset(struct intel_engine_cs *engine)
2731{
2732	const unsigned int bit = I915_RESET_ENGINE + engine->id;
2733	unsigned long *lock = &engine->gt->reset.flags;
2734
2735	if (i915_modparams.reset < 3)
2736		return;
2737
2738	if (test_and_set_bit(bit, lock))
2739		return;
2740
2741	/* Mark this tasklet as disabled to avoid waiting for it to complete */
2742	tasklet_disable_nosync(&engine->execlists.tasklet);
2743
2744	ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
2745		     READ_ONCE(engine->props.preempt_timeout_ms),
2746		     jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
2747
2748	ring_set_paused(engine, 1); /* Freeze the current request in place */
2749	if (execlists_capture(engine))
2750		intel_engine_reset(engine, "preemption time out");
2751	else
2752		ring_set_paused(engine, 0);
2753
2754	tasklet_enable(&engine->execlists.tasklet);
2755	clear_and_wake_up_bit(bit, lock);
2756}
2757
2758static bool preempt_timeout(const struct intel_engine_cs *const engine)
2759{
2760	const struct timer_list *t = &engine->execlists.preempt;
2761
2762	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2763		return false;
2764
2765	if (!timer_expired(t))
2766		return false;
2767
2768	return READ_ONCE(engine->execlists.pending[0]);
2769}
2770
2771/*
2772 * Check the unread Context Status Buffers and manage the submission of new
2773 * contexts to the ELSP accordingly.
2774 */
2775static void execlists_submission_tasklet(unsigned long data)
2776{
2777	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
2778	bool timeout = preempt_timeout(engine);
2779
2780	process_csb(engine);
2781	if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
2782		unsigned long flags;
2783
2784		spin_lock_irqsave(&engine->active.lock, flags);
2785		__execlists_submission_tasklet(engine);
2786		spin_unlock_irqrestore(&engine->active.lock, flags);
2787
2788		/* Recheck after serialising with direct-submission */
2789		if (timeout && preempt_timeout(engine))
2790			preempt_reset(engine);
2791	}
2792}
2793
2794static void __execlists_kick(struct intel_engine_execlists *execlists)
2795{
2796	/* Kick the tasklet for some interrupt coalescing and reset handling */
2797	tasklet_hi_schedule(&execlists->tasklet);
2798}
2799
2800#define execlists_kick(t, member) \
2801	__execlists_kick(container_of(t, struct intel_engine_execlists, member))
2802
2803static void execlists_timeslice(struct timer_list *timer)
2804{
2805	execlists_kick(timer, timer);
2806}
2807
2808static void execlists_preempt(struct timer_list *timer)
2809{
2810	execlists_kick(timer, preempt);
2811}
2812
2813static void queue_request(struct intel_engine_cs *engine,
2814			  struct i915_request *rq)
2815{
2816	GEM_BUG_ON(!list_empty(&rq->sched.link));
2817	list_add_tail(&rq->sched.link,
2818		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
2819	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2820}
2821
2822static void __submit_queue_imm(struct intel_engine_cs *engine)
2823{
2824	struct intel_engine_execlists * const execlists = &engine->execlists;
2825
2826	if (reset_in_progress(execlists))
2827		return; /* defer until we restart the engine following reset */
2828
2829	if (execlists->tasklet.func == execlists_submission_tasklet)
2830		__execlists_submission_tasklet(engine);
2831	else
2832		tasklet_hi_schedule(&execlists->tasklet);
2833}
2834
2835static void submit_queue(struct intel_engine_cs *engine,
2836			 const struct i915_request *rq)
2837{
2838	struct intel_engine_execlists *execlists = &engine->execlists;
2839
2840	if (rq_prio(rq) <= execlists->queue_priority_hint)
2841		return;
2842
2843	execlists->queue_priority_hint = rq_prio(rq);
2844	__submit_queue_imm(engine);
2845}
2846
2847static bool ancestor_on_hold(const struct intel_engine_cs *engine,
2848			     const struct i915_request *rq)
2849{
2850	GEM_BUG_ON(i915_request_on_hold(rq));
2851	return !list_empty(&engine->active.hold) && hold_request(rq);
2852}
2853
2854static void execlists_submit_request(struct i915_request *request)
2855{
2856	struct intel_engine_cs *engine = request->engine;
2857	unsigned long flags;
2858
2859	/* Will be called from irq-context when using foreign fences. */
2860	spin_lock_irqsave(&engine->active.lock, flags);
2861
2862	if (unlikely(ancestor_on_hold(engine, request))) {
2863		list_add_tail(&request->sched.link, &engine->active.hold);
2864		i915_request_set_hold(request);
2865	} else {
2866		queue_request(engine, request);
2867
2868		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
2869		GEM_BUG_ON(list_empty(&request->sched.link));
2870
2871		submit_queue(engine, request);
2872	}
2873
2874	spin_unlock_irqrestore(&engine->active.lock, flags);
2875}
2876
2877static void __execlists_context_fini(struct intel_context *ce)
2878{
2879	intel_ring_put(ce->ring);
2880	i915_vma_put(ce->state);
2881}
2882
2883static void execlists_context_destroy(struct kref *kref)
2884{
2885	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
2886
2887	GEM_BUG_ON(!i915_active_is_idle(&ce->active));
2888	GEM_BUG_ON(intel_context_is_pinned(ce));
2889
2890	if (ce->state)
2891		__execlists_context_fini(ce);
2892
2893	intel_context_fini(ce);
2894	intel_context_free(ce);
2895}
2896
2897static void
2898set_redzone(void *vaddr, const struct intel_engine_cs *engine)
2899{
2900	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2901		return;
2902
2903	vaddr += engine->context_size;
2904
2905	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
2906}
2907
2908static void
2909check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
2910{
2911	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2912		return;
2913
2914	vaddr += engine->context_size;
2915
2916	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
2917		dev_err_once(engine->i915->drm.dev,
2918			     "%s context redzone overwritten!\n",
2919			     engine->name);
2920}
2921
2922static void execlists_context_unpin(struct intel_context *ce)
2923{
2924	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
2925		      ce->engine);
2926
2927	i915_gem_object_unpin_map(ce->state->obj);
2928}
2929
2930static void
2931__execlists_update_reg_state(const struct intel_context *ce,
2932			     const struct intel_engine_cs *engine,
2933			     u32 head)
2934{
2935	struct intel_ring *ring = ce->ring;
2936	u32 *regs = ce->lrc_reg_state;
2937
2938	GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
2939	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
2940
2941	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
2942	regs[CTX_RING_HEAD] = head;
2943	regs[CTX_RING_TAIL] = ring->tail;
2944
2945	/* RPCS */
2946	if (engine->class == RENDER_CLASS) {
2947		regs[CTX_R_PWR_CLK_STATE] =
2948			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
2949
2950		i915_oa_init_reg_state(ce, engine);
2951	}
2952}
2953
2954static int
2955__execlists_context_pin(struct intel_context *ce,
2956			struct intel_engine_cs *engine)
2957{
2958	void *vaddr;
2959
2960	GEM_BUG_ON(!ce->state);
2961	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
2962
2963	vaddr = i915_gem_object_pin_map(ce->state->obj,
2964					i915_coherent_map_type(engine->i915) |
2965					I915_MAP_OVERRIDE);
2966	if (IS_ERR(vaddr))
2967		return PTR_ERR(vaddr);
2968
2969	ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
2970	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
2971	__execlists_update_reg_state(ce, engine, ce->ring->tail);
2972
2973	return 0;
2974}
2975
2976static int execlists_context_pin(struct intel_context *ce)
2977{
2978	return __execlists_context_pin(ce, ce->engine);
2979}
2980
2981static int execlists_context_alloc(struct intel_context *ce)
2982{
2983	return __execlists_context_alloc(ce, ce->engine);
2984}
2985
2986static void execlists_context_reset(struct intel_context *ce)
2987{
2988	CE_TRACE(ce, "reset\n");
2989	GEM_BUG_ON(!intel_context_is_pinned(ce));
2990
2991	/*
2992	 * Because we emit WA_TAIL_DWORDS there may be a disparity
2993	 * between our bookkeeping in ce->ring->head and ce->ring->tail and
2994	 * that stored in context. As we only write new commands from
2995	 * ce->ring->tail onwards, everything before that is junk. If the GPU
2996	 * starts reading from its RING_HEAD from the context, it may try to
2997	 * execute that junk and die.
2998	 *
2999	 * The contexts that are stilled pinned on resume belong to the
3000	 * kernel, and are local to each engine. All other contexts will
3001	 * have their head/tail sanitized upon pinning before use, so they
3002	 * will never see garbage,
3003	 *
3004	 * So to avoid that we reset the context images upon resume. For
3005	 * simplicity, we just zero everything out.
3006	 */
3007	intel_ring_reset(ce->ring, ce->ring->emit);
3008
3009	/* Scrub away the garbage */
3010	execlists_init_reg_state(ce->lrc_reg_state,
3011				 ce, ce->engine, ce->ring, true);
3012	__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3013
3014	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
3015}
3016
3017static const struct intel_context_ops execlists_context_ops = {
3018	.alloc = execlists_context_alloc,
3019
3020	.pin = execlists_context_pin,
3021	.unpin = execlists_context_unpin,
3022
3023	.enter = intel_context_enter_engine,
3024	.exit = intel_context_exit_engine,
3025
3026	.reset = execlists_context_reset,
3027	.destroy = execlists_context_destroy,
3028};
3029
3030static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3031{
3032	u32 *cs;
3033
3034	GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
3035
3036	cs = intel_ring_begin(rq, 6);
3037	if (IS_ERR(cs))
3038		return PTR_ERR(cs);
3039
3040	/*
3041	 * Check if we have been preempted before we even get started.
3042	 *
3043	 * After this point i915_request_started() reports true, even if
3044	 * we get preempted and so are no longer running.
3045	 */
3046	*cs++ = MI_ARB_CHECK;
3047	*cs++ = MI_NOOP;
3048
3049	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3050	*cs++ = i915_request_timeline(rq)->hwsp_offset;
3051	*cs++ = 0;
3052	*cs++ = rq->fence.seqno - 1;
3053
3054	intel_ring_advance(rq, cs);
3055
3056	/* Record the updated position of the request's payload */
3057	rq->infix = intel_ring_offset(rq, cs);
3058
3059	return 0;
3060}
3061
3062static int execlists_request_alloc(struct i915_request *request)
3063{
3064	int ret;
3065
3066	GEM_BUG_ON(!intel_context_is_pinned(request->context));
3067
3068	/*
3069	 * Flush enough space to reduce the likelihood of waiting after
3070	 * we start building the request - in which case we will just
3071	 * have to repeat work.
3072	 */
3073	request->reserved_space += EXECLISTS_REQUEST_SIZE;
3074
3075	/*
3076	 * Note that after this point, we have committed to using
3077	 * this request as it is being used to both track the
3078	 * state of engine initialisation and liveness of the
3079	 * golden renderstate above. Think twice before you try
3080	 * to cancel/unwind this request now.
3081	 */
3082
3083	/* Unconditionally invalidate GPU caches and TLBs. */
3084	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3085	if (ret)
3086		return ret;
3087
3088	request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3089	return 0;
3090}
3091
3092/*
3093 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3094 * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3095 * but there is a slight complication as this is applied in WA batch where the
3096 * values are only initialized once so we cannot take register value at the
3097 * beginning and reuse it further; hence we save its value to memory, upload a
3098 * constant value with bit21 set and then we restore it back with the saved value.
3099 * To simplify the WA, a constant value is formed by using the default value
3100 * of this register. This shouldn't be a problem because we are only modifying
3101 * it for a short period and this batch in non-premptible. We can ofcourse
3102 * use additional instructions that read the actual value of the register
3103 * at that time and set our bit of interest but it makes the WA complicated.
3104 *
3105 * This WA is also required for Gen9 so extracting as a function avoids
3106 * code duplication.
3107 */
3108static u32 *
3109gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3110{
3111	/* NB no one else is allowed to scribble over scratch + 256! */
3112	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3113	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3114	*batch++ = intel_gt_scratch_offset(engine->gt,
3115					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3116	*batch++ = 0;
3117
3118	*batch++ = MI_LOAD_REGISTER_IMM(1);
3119	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3120	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3121
3122	batch = gen8_emit_pipe_control(batch,
3123				       PIPE_CONTROL_CS_STALL |
3124				       PIPE_CONTROL_DC_FLUSH_ENABLE,
3125				       0);
3126
3127	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3128	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3129	*batch++ = intel_gt_scratch_offset(engine->gt,
3130					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3131	*batch++ = 0;
3132
3133	return batch;
3134}
3135
3136/*
3137 * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3138 * initialized at the beginning and shared across all contexts but this field
3139 * helps us to have multiple batches at different offsets and select them based
3140 * on a criteria. At the moment this batch always start at the beginning of the page
3141 * and at this point we don't have multiple wa_ctx batch buffers.
3142 *
3143 * The number of WA applied are not known at the beginning; we use this field
3144 * to return the no of DWORDS written.
3145 *
3146 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3147 * so it adds NOOPs as padding to make it cacheline aligned.
3148 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3149 * makes a complete batch buffer.
3150 */
3151static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3152{
3153	/* WaDisableCtxRestoreArbitration:bdw,chv */
3154	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3155
3156	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3157	if (IS_BROADWELL(engine->i915))
3158		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3159
3160	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3161	/* Actual scratch location is at 128 bytes offset */
3162	batch = gen8_emit_pipe_control(batch,
3163				       PIPE_CONTROL_FLUSH_L3 |
3164				       PIPE_CONTROL_STORE_DATA_INDEX |
3165				       PIPE_CONTROL_CS_STALL |
3166				       PIPE_CONTROL_QW_WRITE,
3167				       LRC_PPHWSP_SCRATCH_ADDR);
3168
3169	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3170
3171	/* Pad to end of cacheline */
3172	while ((unsigned long)batch % CACHELINE_BYTES)
3173		*batch++ = MI_NOOP;
3174
3175	/*
3176	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3177	 * execution depends on the length specified in terms of cache lines
3178	 * in the register CTX_RCS_INDIRECT_CTX
3179	 */
3180
3181	return batch;
3182}
3183
3184struct lri {
3185	i915_reg_t reg;
3186	u32 value;
3187};
3188
3189static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3190{
3191	GEM_BUG_ON(!count || count > 63);
3192
3193	*batch++ = MI_LOAD_REGISTER_IMM(count);
3194	do {
3195		*batch++ = i915_mmio_reg_offset(lri->reg);
3196		*batch++ = lri->value;
3197	} while (lri++, --count);
3198	*batch++ = MI_NOOP;
3199
3200	return batch;
3201}
3202
3203static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3204{
3205	static const struct lri lri[] = {
3206		/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3207		{
3208			COMMON_SLICE_CHICKEN2,
3209			__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3210				       0),
3211		},
3212
3213		/* BSpec: 11391 */
3214		{
3215			FF_SLICE_CHICKEN,
3216			__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3217				       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3218		},
3219
3220		/* BSpec: 11299 */
3221		{
3222			_3D_CHICKEN3,
3223			__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3224				       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3225		}
3226	};
3227
3228	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3229
3230	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3231	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3232
3233	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3234	batch = gen8_emit_pipe_control(batch,
3235				       PIPE_CONTROL_FLUSH_L3 |
3236				       PIPE_CONTROL_STORE_DATA_INDEX |
3237				       PIPE_CONTROL_CS_STALL |
3238				       PIPE_CONTROL_QW_WRITE,
3239				       LRC_PPHWSP_SCRATCH_ADDR);
3240
3241	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3242
3243	/* WaMediaPoolStateCmdInWABB:bxt,glk */
3244	if (HAS_POOLED_EU(engine->i915)) {
3245		/*
3246		 * EU pool configuration is setup along with golden context
3247		 * during context initialization. This value depends on
3248		 * device type (2x6 or 3x6) and needs to be updated based
3249		 * on which subslice is disabled especially for 2x6
3250		 * devices, however it is safe to load default
3251		 * configuration of 3x6 device instead of masking off
3252		 * corresponding bits because HW ignores bits of a disabled
3253		 * subslice and drops down to appropriate config. Please
3254		 * see render_state_setup() in i915_gem_render_state.c for
3255		 * possible configurations, to avoid duplication they are
3256		 * not shown here again.
3257		 */
3258		*batch++ = GEN9_MEDIA_POOL_STATE;
3259		*batch++ = GEN9_MEDIA_POOL_ENABLE;
3260		*batch++ = 0x00777000;
3261		*batch++ = 0;
3262		*batch++ = 0;
3263		*batch++ = 0;
3264	}
3265
3266	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3267
3268	/* Pad to end of cacheline */
3269	while ((unsigned long)batch % CACHELINE_BYTES)
3270		*batch++ = MI_NOOP;
3271
3272	return batch;
3273}
3274
3275static u32 *
3276gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3277{
3278	int i;
3279
3280	/*
3281	 * WaPipeControlBefore3DStateSamplePattern: cnl
3282	 *
3283	 * Ensure the engine is idle prior to programming a
3284	 * 3DSTATE_SAMPLE_PATTERN during a context restore.
3285	 */
3286	batch = gen8_emit_pipe_control(batch,
3287				       PIPE_CONTROL_CS_STALL,
3288				       0);
3289	/*
3290	 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3291	 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3292	 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3293	 * confusing. Since gen8_emit_pipe_control() already advances the
3294	 * batch by 6 dwords, we advance the other 10 here, completing a
3295	 * cacheline. It's not clear if the workaround requires this padding
3296	 * before other commands, or if it's just the regular padding we would
3297	 * already have for the workaround bb, so leave it here for now.
3298	 */
3299	for (i = 0; i < 10; i++)
3300		*batch++ = MI_NOOP;
3301
3302	/* Pad to end of cacheline */
3303	while ((unsigned long)batch % CACHELINE_BYTES)
3304		*batch++ = MI_NOOP;
3305
3306	return batch;
3307}
3308
3309#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3310
3311static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3312{
3313	struct drm_i915_gem_object *obj;
3314	struct i915_vma *vma;
3315	int err;
3316
3317	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3318	if (IS_ERR(obj))
3319		return PTR_ERR(obj);
3320
3321	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3322	if (IS_ERR(vma)) {
3323		err = PTR_ERR(vma);
3324		goto err;
3325	}
3326
3327	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
3328	if (err)
3329		goto err;
3330
3331	engine->wa_ctx.vma = vma;
3332	return 0;
3333
3334err:
3335	i915_gem_object_put(obj);
3336	return err;
3337}
3338
3339static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3340{
3341	i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3342}
3343
3344typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3345
3346static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3347{
3348	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3349	struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3350					    &wa_ctx->per_ctx };
3351	wa_bb_func_t wa_bb_fn[2];
3352	struct page *page;
3353	void *batch, *batch_ptr;
3354	unsigned int i;
3355	int ret;
3356
3357	if (engine->class != RENDER_CLASS)
3358		return 0;
3359
3360	switch (INTEL_GEN(engine->i915)) {
3361	case 12:
3362	case 11:
3363		return 0;
3364	case 10:
3365		wa_bb_fn[0] = gen10_init_indirectctx_bb;
3366		wa_bb_fn[1] = NULL;
3367		break;
3368	case 9:
3369		wa_bb_fn[0] = gen9_init_indirectctx_bb;
3370		wa_bb_fn[1] = NULL;
3371		break;
3372	case 8:
3373		wa_bb_fn[0] = gen8_init_indirectctx_bb;
3374		wa_bb_fn[1] = NULL;
3375		break;
3376	default:
3377		MISSING_CASE(INTEL_GEN(engine->i915));
3378		return 0;
3379	}
3380
3381	ret = lrc_setup_wa_ctx(engine);
3382	if (ret) {
3383		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
3384		return ret;
3385	}
3386
3387	page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
3388	batch = batch_ptr = kmap_atomic(page);
3389
3390	/*
3391	 * Emit the two workaround batch buffers, recording the offset from the
3392	 * start of the workaround batch buffer object for each and their
3393	 * respective sizes.
3394	 */
3395	for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
3396		wa_bb[i]->offset = batch_ptr - batch;
3397		if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
3398						  CACHELINE_BYTES))) {
3399			ret = -EINVAL;
3400			break;
3401		}
3402		if (wa_bb_fn[i])
3403			batch_ptr = wa_bb_fn[i](engine, batch_ptr);
3404		wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
3405	}
3406
3407	BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
3408
3409	kunmap_atomic(batch);
3410	if (ret)
3411		lrc_destroy_wa_ctx(engine);
3412
3413	return ret;
3414}
3415
3416static void enable_execlists(struct intel_engine_cs *engine)
3417{
3418	u32 mode;
3419
3420	assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
3421
3422	intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
3423
3424	if (INTEL_GEN(engine->i915) >= 11)
3425		mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
3426	else
3427		mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
3428	ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
3429
3430	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
3431
3432	ENGINE_WRITE_FW(engine,
3433			RING_HWS_PGA,
3434			i915_ggtt_offset(engine->status_page.vma));
3435	ENGINE_POSTING_READ(engine, RING_HWS_PGA);
3436
3437	engine->context_tag = 0;
3438}
3439
3440static bool unexpected_starting_state(struct intel_engine_cs *engine)
3441{
3442	bool unexpected = false;
3443
3444	if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
3445		DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
3446		unexpected = true;
3447	}
3448
3449	return unexpected;
3450}
3451
3452static int execlists_resume(struct intel_engine_cs *engine)
3453{
3454	intel_engine_apply_workarounds(engine);
3455	intel_engine_apply_whitelist(engine);
3456
3457	intel_mocs_init_engine(engine);
3458
3459	intel_engine_reset_breadcrumbs(engine);
3460
3461	if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
3462		struct drm_printer p = drm_debug_printer(__func__);
3463
3464		intel_engine_dump(engine, &p, NULL);
3465	}
3466
3467	enable_execlists(engine);
3468
3469	return 0;
3470}
3471
3472static void execlists_reset_prepare(struct intel_engine_cs *engine)
3473{
3474	struct intel_engine_execlists * const execlists = &engine->execlists;
3475	unsigned long flags;
3476
3477	ENGINE_TRACE(engine, "depth<-%d\n",
3478		     atomic_read(&execlists->tasklet.count));
3479
3480	/*
3481	 * Prevent request submission to the hardware until we have
3482	 * completed the reset in i915_gem_reset_finish(). If a request
3483	 * is completed by one engine, it may then queue a request
3484	 * to a second via its execlists->tasklet *just* as we are
3485	 * calling engine->resume() and also writing the ELSP.
3486	 * Turning off the execlists->tasklet until the reset is over
3487	 * prevents the race.
3488	 */
3489	__tasklet_disable_sync_once(&execlists->tasklet);
3490	GEM_BUG_ON(!reset_in_progress(execlists));
3491
3492	/* And flush any current direct submission. */
3493	spin_lock_irqsave(&engine->active.lock, flags);
3494	spin_unlock_irqrestore(&engine->active.lock, flags);
3495
3496	/*
3497	 * We stop engines, otherwise we might get failed reset and a
3498	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
3499	 * from system hang if batchbuffer is progressing when
3500	 * the reset is issued, regardless of READY_TO_RESET ack.
3501	 * Thus assume it is best to stop engines on all gens
3502	 * where we have a gpu reset.
3503	 *
3504	 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
3505	 *
3506	 * FIXME: Wa for more modern gens needs to be validated
3507	 */
3508	intel_engine_stop_cs(engine);
3509}
3510
3511static void reset_csb_pointers(struct intel_engine_cs *engine)
3512{
3513	struct intel_engine_execlists * const execlists = &engine->execlists;
3514	const unsigned int reset_value = execlists->csb_size - 1;
3515
3516	ring_set_paused(engine, 0);
3517
3518	/*
3519	 * After a reset, the HW starts writing into CSB entry [0]. We
3520	 * therefore have to set our HEAD pointer back one entry so that
3521	 * the *first* entry we check is entry 0. To complicate this further,
3522	 * as we don't wait for the first interrupt after reset, we have to
3523	 * fake the HW write to point back to the last entry so that our
3524	 * inline comparison of our cached head position against the last HW
3525	 * write works even before the first interrupt.
3526	 */
3527	execlists->csb_head = reset_value;
3528	WRITE_ONCE(*execlists->csb_write, reset_value);
3529	wmb(); /* Make sure this is visible to HW (paranoia?) */
3530
3531	/*
3532	 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
3533	 * Bludgeon them with a mmio update to be sure.
3534	 */
3535	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
3536		     reset_value << 8 | reset_value);
3537	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
3538
3539	invalidate_csb_entries(&execlists->csb_status[0],
3540			       &execlists->csb_status[reset_value]);
3541}
3542
3543static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
3544{
3545	int x;
3546
3547	x = lrc_ring_mi_mode(engine);
3548	if (x != -1) {
3549		regs[x + 1] &= ~STOP_RING;
3550		regs[x + 1] |= STOP_RING << 16;
3551	}
3552}
3553
3554static void __execlists_reset_reg_state(const struct intel_context *ce,
3555					const struct intel_engine_cs *engine)
3556{
3557	u32 *regs = ce->lrc_reg_state;
3558
3559	__reset_stop_ring(regs, engine);
3560}
3561
3562static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
3563{
3564	struct intel_engine_execlists * const execlists = &engine->execlists;
3565	struct intel_context *ce;
3566	struct i915_request *rq;
3567	u32 head;
3568
3569	mb(); /* paranoia: read the CSB pointers from after the reset */
3570	clflush(execlists->csb_write);
3571	mb();
3572
3573	process_csb(engine); /* drain preemption events */
3574
3575	/* Following the reset, we need to reload the CSB read/write pointers */
3576	reset_csb_pointers(engine);
3577
3578	/*
3579	 * Save the currently executing context, even if we completed
3580	 * its request, it was still running at the time of the
3581	 * reset and will have been clobbered.
3582	 */
3583	rq = execlists_active(execlists);
3584	if (!rq)
3585		goto unwind;
3586
3587	/* We still have requests in-flight; the engine should be active */
3588	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
3589
3590	ce = rq->context;
3591	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3592
3593	if (i915_request_completed(rq)) {
3594		/* Idle context; tidy up the ring so we can restart afresh */
3595		head = intel_ring_wrap(ce->ring, rq->tail);
3596		goto out_replay;
3597	}
3598
3599	/* Context has requests still in-flight; it should not be idle! */
3600	GEM_BUG_ON(i915_active_is_idle(&ce->active));
3601	rq = active_request(ce->timeline, rq);
3602	head = intel_ring_wrap(ce->ring, rq->head);
3603	GEM_BUG_ON(head == ce->ring->tail);
3604
3605	/*
3606	 * If this request hasn't started yet, e.g. it is waiting on a
3607	 * semaphore, we need to avoid skipping the request or else we
3608	 * break the signaling chain. However, if the context is corrupt
3609	 * the request will not restart and we will be stuck with a wedged
3610	 * device. It is quite often the case that if we issue a reset
3611	 * while the GPU is loading the context image, that the context
3612	 * image becomes corrupt.
3613	 *
3614	 * Otherwise, if we have not started yet, the request should replay
3615	 * perfectly and we do not need to flag the result as being erroneous.
3616	 */
3617	if (!i915_request_started(rq))
3618		goto out_replay;
3619
3620	/*
3621	 * If the request was innocent, we leave the request in the ELSP
3622	 * and will try to replay it on restarting. The context image may
3623	 * have been corrupted by the reset, in which case we may have
3624	 * to service a new GPU hang, but more likely we can continue on
3625	 * without impact.
3626	 *
3627	 * If the request was guilty, we presume the context is corrupt
3628	 * and have to at least restore the RING register in the context
3629	 * image back to the expected values to skip over the guilty request.
3630	 */
3631	__i915_request_reset(rq, stalled);
3632	if (!stalled)
3633		goto out_replay;
3634
3635	/*
3636	 * We want a simple context + ring to execute the breadcrumb update.
3637	 * We cannot rely on the context being intact across the GPU hang,
3638	 * so clear it and rebuild just what we need for the breadcrumb.
3639	 * All pending requests for this context will be zapped, and any
3640	 * future request will be after userspace has had the opportunity
3641	 * to recreate its own state.
3642	 */
3643	GEM_BUG_ON(!intel_context_is_pinned(ce));
3644	restore_default_state(ce, engine);
3645
3646out_replay:
3647	ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
3648		     head, ce->ring->tail);
3649	__execlists_reset_reg_state(ce, engine);
3650	__execlists_update_reg_state(ce, engine, head);
3651	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
3652
3653unwind:
3654	/* Push back any incomplete requests for replay after the reset. */
3655	cancel_port_requests(execlists);
3656	__unwind_incomplete_requests(engine);
3657}
3658
3659static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
3660{
3661	unsigned long flags;
3662
3663	ENGINE_TRACE(engine, "\n");
3664
3665	spin_lock_irqsave(&engine->active.lock, flags);
3666
3667	__execlists_reset(engine, stalled);
3668
3669	spin_unlock_irqrestore(&engine->active.lock, flags);
3670}
3671
3672static void nop_submission_tasklet(unsigned long data)
3673{
3674	/* The driver is wedged; don't process any more events. */
3675}
3676
3677static void execlists_reset_cancel(struct intel_engine_cs *engine)
3678{
3679	struct intel_engine_execlists * const execlists = &engine->execlists;
3680	struct i915_request *rq, *rn;
3681	struct rb_node *rb;
3682	unsigned long flags;
3683
3684	ENGINE_TRACE(engine, "\n");
3685
3686	/*
3687	 * Before we call engine->cancel_requests(), we should have exclusive
3688	 * access to the submission state. This is arranged for us by the
3689	 * caller disabling the interrupt generation, the tasklet and other
3690	 * threads that may then access the same state, giving us a free hand
3691	 * to reset state. However, we still need to let lockdep be aware that
3692	 * we know this state may be accessed in hardirq context, so we
3693	 * disable the irq around this manipulation and we want to keep
3694	 * the spinlock focused on its duties and not accidentally conflate
3695	 * coverage to the submission's irq state. (Similarly, although we
3696	 * shouldn't need to disable irq around the manipulation of the
3697	 * submission's irq state, we also wish to remind ourselves that
3698	 * it is irq state.)
3699	 */
3700	spin_lock_irqsave(&engine->active.lock, flags);
3701
3702	__execlists_reset(engine, true);
3703
3704	/* Mark all executing requests as skipped. */
3705	list_for_each_entry(rq, &engine->active.requests, sched.link)
3706		mark_eio(rq);
3707
3708	/* Flush the queued requests to the timeline list (for retiring). */
3709	while ((rb = rb_first_cached(&execlists->queue))) {
3710		struct i915_priolist *p = to_priolist(rb);
3711		int i;
3712
3713		priolist_for_each_request_consume(rq, rn, p, i) {
3714			mark_eio(rq);
3715			__i915_request_submit(rq);
3716		}
3717
3718		rb_erase_cached(&p->node, &execlists->queue);
3719		i915_priolist_free(p);
3720	}
3721
3722	/* On-hold requests will be flushed to timeline upon their release */
3723	list_for_each_entry(rq, &engine->active.hold, sched.link)
3724		mark_eio(rq);
3725
3726	/* Cancel all attached virtual engines */
3727	while ((rb = rb_first_cached(&execlists->virtual))) {
3728		struct virtual_engine *ve =
3729			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
3730
3731		rb_erase_cached(rb, &execlists->virtual);
3732		container_of(rb, struct ve_node, rb)->inserted = false;
3733
3734		spin_lock(&ve->base.active.lock);
3735		rq = fetch_and_zero(&ve->request);
3736		if (rq) {
3737			mark_eio(rq);
3738
3739			rq->engine = engine;
3740			__i915_request_submit(rq);
3741			i915_request_put(rq);
3742
3743			ve->base.execlists.queue_priority_hint = INT_MIN;
3744		}
3745		spin_unlock(&ve->base.active.lock);
3746	}
3747
3748	/* Remaining _unready_ requests will be nop'ed when submitted */
3749
3750	execlists->queue_priority_hint = INT_MIN;
3751#ifdef __NetBSD__
3752	i915_sched_init(execlists);
3753	rb_tree_init(&execlists->virtual.rb_root.rbr_tree, &ve_tree_ops);
3754#else
3755	execlists->queue = RB_ROOT_CACHED;
3756#endif
3757
3758	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
3759	execlists->tasklet.func = nop_submission_tasklet;
3760
3761	spin_unlock_irqrestore(&engine->active.lock, flags);
3762}
3763
3764static void execlists_reset_finish(struct intel_engine_cs *engine)
3765{
3766	struct intel_engine_execlists * const execlists = &engine->execlists;
3767
3768	/*
3769	 * After a GPU reset, we may have requests to replay. Do so now while
3770	 * we still have the forcewake to be sure that the GPU is not allowed
3771	 * to sleep before we restart and reload a context.
3772	 */
3773	GEM_BUG_ON(!reset_in_progress(execlists));
3774	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
3775		execlists->tasklet.func(execlists->tasklet.data);
3776
3777	if (__tasklet_enable(&execlists->tasklet))
3778		/* And kick in case we missed a new request submission. */
3779		tasklet_hi_schedule(&execlists->tasklet);
3780	ENGINE_TRACE(engine, "depth->%d\n",
3781		     atomic_read(&execlists->tasklet.count));
3782}
3783
3784static int gen8_emit_bb_start_noarb(struct i915_request *rq,
3785				    u64 offset, u32 len,
3786				    const unsigned int flags)
3787{
3788	u32 *cs;
3789
3790	cs = intel_ring_begin(rq, 4);
3791	if (IS_ERR(cs))
3792		return PTR_ERR(cs);
3793
3794	/*
3795	 * WaDisableCtxRestoreArbitration:bdw,chv
3796	 *
3797	 * We don't need to perform MI_ARB_ENABLE as often as we do (in
3798	 * particular all the gen that do not need the w/a at all!), if we
3799	 * took care to make sure that on every switch into this context
3800	 * (both ordinary and for preemption) that arbitrartion was enabled
3801	 * we would be fine.  However, for gen8 there is another w/a that
3802	 * requires us to not preempt inside GPGPU execution, so we keep
3803	 * arbitration disabled for gen8 batches. Arbitration will be
3804	 * re-enabled before we close the request
3805	 * (engine->emit_fini_breadcrumb).
3806	 */
3807	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3808
3809	/* FIXME(BDW+): Address space and security selectors. */
3810	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
3811		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3812	*cs++ = lower_32_bits(offset);
3813	*cs++ = upper_32_bits(offset);
3814
3815	intel_ring_advance(rq, cs);
3816
3817	return 0;
3818}
3819
3820static int gen8_emit_bb_start(struct i915_request *rq,
3821			      u64 offset, u32 len,
3822			      const unsigned int flags)
3823{
3824	u32 *cs;
3825
3826	cs = intel_ring_begin(rq, 6);
3827	if (IS_ERR(cs))
3828		return PTR_ERR(cs);
3829
3830	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3831
3832	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
3833		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3834	*cs++ = lower_32_bits(offset);
3835	*cs++ = upper_32_bits(offset);
3836
3837	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3838	*cs++ = MI_NOOP;
3839
3840	intel_ring_advance(rq, cs);
3841
3842	return 0;
3843}
3844
3845static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
3846{
3847	ENGINE_WRITE(engine, RING_IMR,
3848		     ~(engine->irq_enable_mask | engine->irq_keep_mask));
3849	ENGINE_POSTING_READ(engine, RING_IMR);
3850}
3851
3852static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
3853{
3854	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
3855}
3856
3857static int gen8_emit_flush(struct i915_request *request, u32 mode)
3858{
3859	u32 cmd, *cs;
3860
3861	cs = intel_ring_begin(request, 4);
3862	if (IS_ERR(cs))
3863		return PTR_ERR(cs);
3864
3865	cmd = MI_FLUSH_DW + 1;
3866
3867	/* We always require a command barrier so that subsequent
3868	 * commands, such as breadcrumb interrupts, are strictly ordered
3869	 * wrt the contents of the write cache being flushed to memory
3870	 * (and thus being coherent from the CPU).
3871	 */
3872	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
3873
3874	if (mode & EMIT_INVALIDATE) {
3875		cmd |= MI_INVALIDATE_TLB;
3876		if (request->engine->class == VIDEO_DECODE_CLASS)
3877			cmd |= MI_INVALIDATE_BSD;
3878	}
3879
3880	*cs++ = cmd;
3881	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
3882	*cs++ = 0; /* upper addr */
3883	*cs++ = 0; /* value */
3884	intel_ring_advance(request, cs);
3885
3886	return 0;
3887}
3888
3889static int gen8_emit_flush_render(struct i915_request *request,
3890				  u32 mode)
3891{
3892	bool vf_flush_wa = false, dc_flush_wa = false;
3893	u32 *cs, flags = 0;
3894	int len;
3895
3896	flags |= PIPE_CONTROL_CS_STALL;
3897
3898	if (mode & EMIT_FLUSH) {
3899		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3900		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3901		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3902		flags |= PIPE_CONTROL_FLUSH_ENABLE;
3903	}
3904
3905	if (mode & EMIT_INVALIDATE) {
3906		flags |= PIPE_CONTROL_TLB_INVALIDATE;
3907		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3908		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3909		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3910		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3911		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3912		flags |= PIPE_CONTROL_QW_WRITE;
3913		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3914
3915		/*
3916		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
3917		 * pipe control.
3918		 */
3919		if (IS_GEN(request->i915, 9))
3920			vf_flush_wa = true;
3921
3922		/* WaForGAMHang:kbl */
3923		if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
3924			dc_flush_wa = true;
3925	}
3926
3927	len = 6;
3928
3929	if (vf_flush_wa)
3930		len += 6;
3931
3932	if (dc_flush_wa)
3933		len += 12;
3934
3935	cs = intel_ring_begin(request, len);
3936	if (IS_ERR(cs))
3937		return PTR_ERR(cs);
3938
3939	if (vf_flush_wa)
3940		cs = gen8_emit_pipe_control(cs, 0, 0);
3941
3942	if (dc_flush_wa)
3943		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
3944					    0);
3945
3946	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3947
3948	if (dc_flush_wa)
3949		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
3950
3951	intel_ring_advance(request, cs);
3952
3953	return 0;
3954}
3955
3956static int gen11_emit_flush_render(struct i915_request *request,
3957				   u32 mode)
3958{
3959	if (mode & EMIT_FLUSH) {
3960		u32 *cs;
3961		u32 flags = 0;
3962
3963		flags |= PIPE_CONTROL_CS_STALL;
3964
3965		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
3966		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3967		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3968		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3969		flags |= PIPE_CONTROL_FLUSH_ENABLE;
3970		flags |= PIPE_CONTROL_QW_WRITE;
3971		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3972
3973		cs = intel_ring_begin(request, 6);
3974		if (IS_ERR(cs))
3975			return PTR_ERR(cs);
3976
3977		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3978		intel_ring_advance(request, cs);
3979	}
3980
3981	if (mode & EMIT_INVALIDATE) {
3982		u32 *cs;
3983		u32 flags = 0;
3984
3985		flags |= PIPE_CONTROL_CS_STALL;
3986
3987		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
3988		flags |= PIPE_CONTROL_TLB_INVALIDATE;
3989		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3990		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3991		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3992		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3993		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3994		flags |= PIPE_CONTROL_QW_WRITE;
3995		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3996
3997		cs = intel_ring_begin(request, 6);
3998		if (IS_ERR(cs))
3999			return PTR_ERR(cs);
4000
4001		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4002		intel_ring_advance(request, cs);
4003	}
4004
4005	return 0;
4006}
4007
4008static u32 preparser_disable(bool state)
4009{
4010	return MI_ARB_CHECK | 1 << 8 | state;
4011}
4012
4013static int gen12_emit_flush_render(struct i915_request *request,
4014				   u32 mode)
4015{
4016	if (mode & EMIT_FLUSH) {
4017		u32 flags = 0;
4018		u32 *cs;
4019
4020		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4021		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4022		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4023		/* Wa_1409600907:tgl */
4024		flags |= PIPE_CONTROL_DEPTH_STALL;
4025		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4026		flags |= PIPE_CONTROL_FLUSH_ENABLE;
4027		flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
4028
4029		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4030		flags |= PIPE_CONTROL_QW_WRITE;
4031
4032		flags |= PIPE_CONTROL_CS_STALL;
4033
4034		cs = intel_ring_begin(request, 6);
4035		if (IS_ERR(cs))
4036			return PTR_ERR(cs);
4037
4038		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4039		intel_ring_advance(request, cs);
4040	}
4041
4042	if (mode & EMIT_INVALIDATE) {
4043		u32 flags = 0;
4044		u32 *cs;
4045
4046		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4047		flags |= PIPE_CONTROL_TLB_INVALIDATE;
4048		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4049		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4050		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4051		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4052		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4053		flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
4054
4055		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4056		flags |= PIPE_CONTROL_QW_WRITE;
4057
4058		flags |= PIPE_CONTROL_CS_STALL;
4059
4060		cs = intel_ring_begin(request, 8);
4061		if (IS_ERR(cs))
4062			return PTR_ERR(cs);
4063
4064		/*
4065		 * Prevent the pre-parser from skipping past the TLB
4066		 * invalidate and loading a stale page for the batch
4067		 * buffer / request payload.
4068		 */
4069		*cs++ = preparser_disable(true);
4070
4071		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4072
4073		*cs++ = preparser_disable(false);
4074		intel_ring_advance(request, cs);
4075
4076		/*
4077		 * Wa_1604544889:tgl
4078		 */
4079		if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
4080			flags = 0;
4081			flags |= PIPE_CONTROL_CS_STALL;
4082			flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
4083
4084			flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4085			flags |= PIPE_CONTROL_QW_WRITE;
4086
4087			cs = intel_ring_begin(request, 6);
4088			if (IS_ERR(cs))
4089				return PTR_ERR(cs);
4090
4091			cs = gen8_emit_pipe_control(cs, flags,
4092						    LRC_PPHWSP_SCRATCH_ADDR);
4093			intel_ring_advance(request, cs);
4094		}
4095	}
4096
4097	return 0;
4098}
4099
4100/*
4101 * Reserve space for 2 NOOPs at the end of each request to be
4102 * used as a workaround for not being allowed to do lite
4103 * restore with HEAD==TAIL (WaIdleLiteRestore).
4104 */
4105static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4106{
4107	/* Ensure there's always at least one preemption point per-request. */
4108	*cs++ = MI_ARB_CHECK;
4109	*cs++ = MI_NOOP;
4110	request->wa_tail = intel_ring_offset(request, cs);
4111
4112	return cs;
4113}
4114
4115static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4116{
4117	*cs++ = MI_SEMAPHORE_WAIT |
4118		MI_SEMAPHORE_GLOBAL_GTT |
4119		MI_SEMAPHORE_POLL |
4120		MI_SEMAPHORE_SAD_EQ_SDD;
4121	*cs++ = 0;
4122	*cs++ = intel_hws_preempt_address(request->engine);
4123	*cs++ = 0;
4124
4125	return cs;
4126}
4127
4128static __always_inline u32*
4129gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
4130				 u32 *cs)
4131{
4132	*cs++ = MI_USER_INTERRUPT;
4133
4134	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4135	if (intel_engine_has_semaphores(request->engine))
4136		cs = emit_preempt_busywait(request, cs);
4137
4138	request->tail = intel_ring_offset(request, cs);
4139	assert_ring_tail_valid(request->ring, request->tail);
4140
4141	return gen8_emit_wa_tail(request, cs);
4142}
4143
4144static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
4145{
4146	cs = gen8_emit_ggtt_write(cs,
4147				  request->fence.seqno,
4148				  i915_request_active_timeline(request)->hwsp_offset,
4149				  0);
4150
4151	return gen8_emit_fini_breadcrumb_footer(request, cs);
4152}
4153
4154static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4155{
4156	cs = gen8_emit_pipe_control(cs,
4157				    PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4158				    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4159				    PIPE_CONTROL_DC_FLUSH_ENABLE,
4160				    0);
4161
4162	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4163	cs = gen8_emit_ggtt_write_rcs(cs,
4164				      request->fence.seqno,
4165				      i915_request_active_timeline(request)->hwsp_offset,
4166				      PIPE_CONTROL_FLUSH_ENABLE |
4167				      PIPE_CONTROL_CS_STALL);
4168
4169	return gen8_emit_fini_breadcrumb_footer(request, cs);
4170}
4171
4172static u32 *
4173gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4174{
4175	cs = gen8_emit_ggtt_write_rcs(cs,
4176				      request->fence.seqno,
4177				      i915_request_active_timeline(request)->hwsp_offset,
4178				      PIPE_CONTROL_CS_STALL |
4179				      PIPE_CONTROL_TILE_CACHE_FLUSH |
4180				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4181				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4182				      PIPE_CONTROL_DC_FLUSH_ENABLE |
4183				      PIPE_CONTROL_FLUSH_ENABLE);
4184
4185	return gen8_emit_fini_breadcrumb_footer(request, cs);
4186}
4187
4188/*
4189 * Note that the CS instruction pre-parser will not stall on the breadcrumb
4190 * flush and will continue pre-fetching the instructions after it before the
4191 * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4192 * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4193 * of the next request before the memory has been flushed, we're guaranteed that
4194 * we won't access the batch itself too early.
4195 * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4196 * so, if the current request is modifying an instruction in the next request on
4197 * the same intel_context, we might pre-fetch and then execute the pre-update
4198 * instruction. To avoid this, the users of self-modifying code should either
4199 * disable the parser around the code emitting the memory writes, via a new flag
4200 * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4201 * the in-kernel use-cases we've opted to use a separate context, see
4202 * reloc_gpu() as an example.
4203 * All the above applies only to the instructions themselves. Non-inline data
4204 * used by the instructions is not pre-fetched.
4205 */
4206
4207static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4208{
4209	*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
4210		MI_SEMAPHORE_GLOBAL_GTT |
4211		MI_SEMAPHORE_POLL |
4212		MI_SEMAPHORE_SAD_EQ_SDD;
4213	*cs++ = 0;
4214	*cs++ = intel_hws_preempt_address(request->engine);
4215	*cs++ = 0;
4216	*cs++ = 0;
4217	*cs++ = MI_NOOP;
4218
4219	return cs;
4220}
4221
4222static __always_inline u32*
4223gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
4224{
4225	*cs++ = MI_USER_INTERRUPT;
4226
4227	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4228	if (intel_engine_has_semaphores(request->engine))
4229		cs = gen12_emit_preempt_busywait(request, cs);
4230
4231	request->tail = intel_ring_offset(request, cs);
4232	assert_ring_tail_valid(request->ring, request->tail);
4233
4234	return gen8_emit_wa_tail(request, cs);
4235}
4236
4237static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
4238{
4239	cs = gen8_emit_ggtt_write(cs,
4240				  request->fence.seqno,
4241				  i915_request_active_timeline(request)->hwsp_offset,
4242				  0);
4243
4244	return gen12_emit_fini_breadcrumb_footer(request, cs);
4245}
4246
4247static u32 *
4248gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4249{
4250	cs = gen8_emit_ggtt_write_rcs(cs,
4251				      request->fence.seqno,
4252				      i915_request_active_timeline(request)->hwsp_offset,
4253				      PIPE_CONTROL_CS_STALL |
4254				      PIPE_CONTROL_TILE_CACHE_FLUSH |
4255				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4256				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4257				      /* Wa_1409600907:tgl */
4258				      PIPE_CONTROL_DEPTH_STALL |
4259				      PIPE_CONTROL_DC_FLUSH_ENABLE |
4260				      PIPE_CONTROL_FLUSH_ENABLE |
4261				      PIPE_CONTROL_HDC_PIPELINE_FLUSH);
4262
4263	return gen12_emit_fini_breadcrumb_footer(request, cs);
4264}
4265
4266static void execlists_park(struct intel_engine_cs *engine)
4267{
4268	cancel_timer(&engine->execlists.timer);
4269	cancel_timer(&engine->execlists.preempt);
4270}
4271
4272void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
4273{
4274	engine->submit_request = execlists_submit_request;
4275	engine->schedule = i915_schedule;
4276	engine->execlists.tasklet.func = execlists_submission_tasklet;
4277
4278	engine->reset.prepare = execlists_reset_prepare;
4279	engine->reset.rewind = execlists_reset_rewind;
4280	engine->reset.cancel = execlists_reset_cancel;
4281	engine->reset.finish = execlists_reset_finish;
4282
4283	engine->park = execlists_park;
4284	engine->unpark = NULL;
4285
4286	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4287	if (!intel_vgpu_active(engine->i915)) {
4288		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4289		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
4290			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4291	}
4292
4293	if (INTEL_GEN(engine->i915) >= 12)
4294		engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
4295
4296	if (intel_engine_has_preemption(engine))
4297		engine->emit_bb_start = gen8_emit_bb_start;
4298	else
4299		engine->emit_bb_start = gen8_emit_bb_start_noarb;
4300}
4301
4302static void execlists_shutdown(struct intel_engine_cs *engine)
4303{
4304	/* Synchronise with residual timers and any softirq they raise */
4305	del_timer_sync(&engine->execlists.timer);
4306	del_timer_sync(&engine->execlists.preempt);
4307	tasklet_kill(&engine->execlists.tasklet);
4308}
4309
4310static void execlists_release(struct intel_engine_cs *engine)
4311{
4312	execlists_shutdown(engine);
4313
4314	intel_engine_cleanup_common(engine);
4315	lrc_destroy_wa_ctx(engine);
4316}
4317
4318static void
4319logical_ring_default_vfuncs(struct intel_engine_cs *engine)
4320{
4321	/* Default vfuncs which can be overriden by each engine. */
4322
4323	engine->resume = execlists_resume;
4324
4325	engine->cops = &execlists_context_ops;
4326	engine->request_alloc = execlists_request_alloc;
4327
4328	engine->emit_flush = gen8_emit_flush;
4329	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4330	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
4331	if (INTEL_GEN(engine->i915) >= 12)
4332		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
4333
4334	engine->set_default_submission = intel_execlists_set_default_submission;
4335
4336	if (INTEL_GEN(engine->i915) < 11) {
4337		engine->irq_enable = gen8_logical_ring_enable_irq;
4338		engine->irq_disable = gen8_logical_ring_disable_irq;
4339	} else {
4340		/*
4341		 * TODO: On Gen11 interrupt masks need to be clear
4342		 * to allow C6 entry. Keep interrupts enabled at
4343		 * and take the hit of generating extra interrupts
4344		 * until a more refined solution exists.
4345		 */
4346	}
4347}
4348
4349static inline void
4350logical_ring_default_irqs(struct intel_engine_cs *engine)
4351{
4352	unsigned int shift = 0;
4353
4354	if (INTEL_GEN(engine->i915) < 11) {
4355		const u8 irq_shifts[] = {
4356			[RCS0]  = GEN8_RCS_IRQ_SHIFT,
4357			[BCS0]  = GEN8_BCS_IRQ_SHIFT,
4358			[VCS0]  = GEN8_VCS0_IRQ_SHIFT,
4359			[VCS1]  = GEN8_VCS1_IRQ_SHIFT,
4360			[VECS0] = GEN8_VECS_IRQ_SHIFT,
4361		};
4362
4363		shift = irq_shifts[engine->id];
4364	}
4365
4366	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
4367	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
4368}
4369
4370static void rcs_submission_override(struct intel_engine_cs *engine)
4371{
4372	switch (INTEL_GEN(engine->i915)) {
4373	case 12:
4374		engine->emit_flush = gen12_emit_flush_render;
4375		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4376		break;
4377	case 11:
4378		engine->emit_flush = gen11_emit_flush_render;
4379		engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4380		break;
4381	default:
4382		engine->emit_flush = gen8_emit_flush_render;
4383		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4384		break;
4385	}
4386}
4387
4388int intel_execlists_submission_setup(struct intel_engine_cs *engine)
4389{
4390	struct intel_engine_execlists * const execlists = &engine->execlists;
4391	struct drm_i915_private *i915 = engine->i915;
4392	struct intel_uncore *uncore = engine->uncore;
4393	u32 base = engine->mmio_base;
4394
4395	i915_sched_init(&engine->execlists);
4396
4397	tasklet_init(&engine->execlists.tasklet,
4398		     execlists_submission_tasklet, (unsigned long)engine);
4399	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
4400	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
4401
4402	logical_ring_default_vfuncs(engine);
4403	logical_ring_default_irqs(engine);
4404
4405	if (engine->class == RENDER_CLASS)
4406		rcs_submission_override(engine);
4407
4408	if (intel_init_workaround_bb(engine))
4409		/*
4410		 * We continue even if we fail to initialize WA batch
4411		 * because we only expect rare glitches but nothing
4412		 * critical to prevent us from using GPU
4413		 */
4414		DRM_ERROR("WA batch buffer initialization failed\n");
4415
4416	if (HAS_LOGICAL_RING_ELSQ(i915)) {
4417#ifdef __NetBSD__
4418		execlists->submit_reg = i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
4419		execlists->ctrl_reg = i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
4420		execlists->bsh = uncore->regs_bsh;
4421		execlists->bst = uncore->regs_bst;
4422#else
4423		execlists->submit_reg = uncore->regs +
4424			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
4425		execlists->ctrl_reg = uncore->regs +
4426			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
4427#endif
4428	} else {
4429#ifdef __NetBSD__
4430		execlists->submit_reg = i915_mmio_reg_offset(RING_ELSP(base));
4431		execlists->bsh = uncore->regs_bsh;
4432		execlists->bst = uncore->regs_bst;
4433#else
4434		execlists->submit_reg = uncore->regs +
4435			i915_mmio_reg_offset(RING_ELSP(base));
4436#endif
4437	}
4438
4439	execlists->csb_status =
4440		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
4441
4442	execlists->csb_write =
4443		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
4444
4445	if (INTEL_GEN(i915) < 11)
4446		execlists->csb_size = GEN8_CSB_ENTRIES;
4447	else
4448		execlists->csb_size = GEN11_CSB_ENTRIES;
4449
4450	reset_csb_pointers(engine);
4451
4452	/* Finally, take ownership and responsibility for cleanup! */
4453	engine->release = execlists_release;
4454
4455	return 0;
4456}
4457
4458static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
4459{
4460	u32 indirect_ctx_offset;
4461
4462	switch (INTEL_GEN(engine->i915)) {
4463	default:
4464		MISSING_CASE(INTEL_GEN(engine->i915));
4465		/* fall through */
4466	case 12:
4467		indirect_ctx_offset =
4468			GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4469		break;
4470	case 11:
4471		indirect_ctx_offset =
4472			GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4473		break;
4474	case 10:
4475		indirect_ctx_offset =
4476			GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4477		break;
4478	case 9:
4479		indirect_ctx_offset =
4480			GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4481		break;
4482	case 8:
4483		indirect_ctx_offset =
4484			GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4485		break;
4486	}
4487
4488	return indirect_ctx_offset;
4489}
4490
4491
4492static void init_common_reg_state(u32 * const regs,
4493				  const struct intel_engine_cs *engine,
4494				  const struct intel_ring *ring,
4495				  bool inhibit)
4496{
4497	u32 ctl;
4498
4499	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
4500	ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
4501	if (inhibit)
4502		ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
4503	if (INTEL_GEN(engine->i915) < 11)
4504		ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
4505					   CTX_CTRL_RS_CTX_ENABLE);
4506	regs[CTX_CONTEXT_CONTROL] = ctl;
4507
4508	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
4509}
4510
4511static void init_wa_bb_reg_state(u32 * const regs,
4512				 const struct intel_engine_cs *engine,
4513				 u32 pos_bb_per_ctx)
4514{
4515	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
4516
4517	if (wa_ctx->per_ctx.size) {
4518		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4519
4520		regs[pos_bb_per_ctx] =
4521			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
4522	}
4523
4524	if (wa_ctx->indirect_ctx.size) {
4525		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4526
4527		regs[pos_bb_per_ctx + 2] =
4528			(ggtt_offset + wa_ctx->indirect_ctx.offset) |
4529			(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
4530
4531		regs[pos_bb_per_ctx + 4] =
4532			intel_lr_indirect_ctx_offset(engine) << 6;
4533	}
4534}
4535
4536static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
4537{
4538	if (i915_vm_is_4lvl(&ppgtt->vm)) {
4539		/* 64b PPGTT (48bit canonical)
4540		 * PDP0_DESCRIPTOR contains the base address to PML4 and
4541		 * other PDP Descriptors are ignored.
4542		 */
4543		ASSIGN_CTX_PML4(ppgtt, regs);
4544	} else {
4545		ASSIGN_CTX_PDP(ppgtt, regs, 3);
4546		ASSIGN_CTX_PDP(ppgtt, regs, 2);
4547		ASSIGN_CTX_PDP(ppgtt, regs, 1);
4548		ASSIGN_CTX_PDP(ppgtt, regs, 0);
4549	}
4550}
4551
4552static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
4553{
4554	if (i915_is_ggtt(vm))
4555		return i915_vm_to_ggtt(vm)->alias;
4556	else
4557		return i915_vm_to_ppgtt(vm);
4558}
4559
4560static void execlists_init_reg_state(u32 *regs,
4561				     const struct intel_context *ce,
4562				     const struct intel_engine_cs *engine,
4563				     const struct intel_ring *ring,
4564				     bool inhibit)
4565{
4566	/*
4567	 * A context is actually a big batch buffer with several
4568	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
4569	 * values we are setting here are only for the first context restore:
4570	 * on a subsequent save, the GPU will recreate this batchbuffer with new
4571	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
4572	 * we are not initializing here).
4573	 *
4574	 * Must keep consistent with virtual_update_register_offsets().
4575	 */
4576	set_offsets(regs, reg_offsets(engine), engine, inhibit);
4577
4578	init_common_reg_state(regs, engine, ring, inhibit);
4579	init_ppgtt_reg_state(regs, vm_alias(ce->vm));
4580
4581	init_wa_bb_reg_state(regs, engine,
4582			     INTEL_GEN(engine->i915) >= 12 ?
4583			     GEN12_CTX_BB_PER_CTX_PTR :
4584			     CTX_BB_PER_CTX_PTR);
4585
4586	__reset_stop_ring(regs, engine);
4587}
4588
4589static int
4590populate_lr_context(struct intel_context *ce,
4591		    struct drm_i915_gem_object *ctx_obj,
4592		    struct intel_engine_cs *engine,
4593		    struct intel_ring *ring)
4594{
4595	bool inhibit = true;
4596	void *vaddr;
4597	int ret;
4598
4599	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
4600	if (IS_ERR(vaddr)) {
4601		ret = PTR_ERR(vaddr);
4602		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
4603		return ret;
4604	}
4605
4606	set_redzone(vaddr, engine);
4607
4608	if (engine->default_state) {
4609		void *defaults;
4610
4611		defaults = i915_gem_object_pin_map(engine->default_state,
4612						   I915_MAP_WB);
4613		if (IS_ERR(defaults)) {
4614			ret = PTR_ERR(defaults);
4615			goto err_unpin_ctx;
4616		}
4617
4618		memcpy(vaddr, defaults, engine->context_size);
4619		i915_gem_object_unpin_map(engine->default_state);
4620		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
4621		inhibit = false;
4622	}
4623
4624	/* The second page of the context object contains some fields which must
4625	 * be set up prior to the first execution. */
4626	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
4627				 ce, engine, ring, inhibit);
4628
4629	ret = 0;
4630err_unpin_ctx:
4631	__i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
4632	i915_gem_object_unpin_map(ctx_obj);
4633	return ret;
4634}
4635
4636static int __execlists_context_alloc(struct intel_context *ce,
4637				     struct intel_engine_cs *engine)
4638{
4639	struct drm_i915_gem_object *ctx_obj;
4640	struct intel_ring *ring;
4641	struct i915_vma *vma;
4642	u32 context_size;
4643	int ret;
4644
4645	GEM_BUG_ON(ce->state);
4646	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
4647
4648	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4649		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
4650
4651	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
4652	if (IS_ERR(ctx_obj))
4653		return PTR_ERR(ctx_obj);
4654
4655	vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
4656	if (IS_ERR(vma)) {
4657		ret = PTR_ERR(vma);
4658		goto error_deref_obj;
4659	}
4660
4661	if (!ce->timeline) {
4662		struct intel_timeline *tl;
4663
4664		tl = intel_timeline_create(engine->gt, NULL);
4665		if (IS_ERR(tl)) {
4666			ret = PTR_ERR(tl);
4667			goto error_deref_obj;
4668		}
4669
4670		ce->timeline = tl;
4671	}
4672
4673	ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
4674	if (IS_ERR(ring)) {
4675		ret = PTR_ERR(ring);
4676		goto error_deref_obj;
4677	}
4678
4679	ret = populate_lr_context(ce, ctx_obj, engine, ring);
4680	if (ret) {
4681		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
4682		goto error_ring_free;
4683	}
4684
4685	ce->ring = ring;
4686	ce->state = vma;
4687
4688	return 0;
4689
4690error_ring_free:
4691	intel_ring_put(ring);
4692error_deref_obj:
4693	i915_gem_object_put(ctx_obj);
4694	return ret;
4695}
4696
4697static struct list_head *virtual_queue(struct virtual_engine *ve)
4698{
4699	return &ve->base.execlists.default_priolist.requests[0];
4700}
4701
4702static void virtual_context_destroy(struct kref *kref)
4703{
4704	struct virtual_engine *ve =
4705		container_of(kref, typeof(*ve), context.ref);
4706	unsigned int n;
4707
4708	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
4709	GEM_BUG_ON(ve->request);
4710	GEM_BUG_ON(ve->context.inflight);
4711
4712	for (n = 0; n < ve->num_siblings; n++) {
4713		struct intel_engine_cs *sibling = ve->siblings[n];
4714		struct rb_node *node = &ve->nodes[sibling->id].rb;
4715		unsigned long flags;
4716
4717		if (!ve->nodes[sibling->id].inserted)
4718			continue;
4719
4720		spin_lock_irqsave(&sibling->active.lock, flags);
4721
4722		/* Detachment is lazily performed in the execlists tasklet */
4723		if (ve->nodes[sibling->id].inserted) {
4724			rb_erase_cached(node, &sibling->execlists.virtual);
4725			ve->nodes[sibling->id].inserted = false;
4726		}
4727
4728		spin_unlock_irqrestore(&sibling->active.lock, flags);
4729	}
4730	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
4731
4732	if (ve->context.state)
4733		__execlists_context_fini(&ve->context);
4734	intel_context_fini(&ve->context);
4735
4736	intel_engine_fini_breadcrumbs(&ve->base);
4737	spin_lock_destroy(&ve->base.active.lock);
4738
4739	kfree(ve->bonds);
4740	kfree(ve);
4741}
4742
4743static void virtual_engine_initial_hint(struct virtual_engine *ve)
4744{
4745	int swp;
4746
4747	/*
4748	 * Pick a random sibling on starting to help spread the load around.
4749	 *
4750	 * New contexts are typically created with exactly the same order
4751	 * of siblings, and often started in batches. Due to the way we iterate
4752	 * the array of sibling when submitting requests, sibling[0] is
4753	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
4754	 * randomised across the system, we also help spread the load by the
4755	 * first engine we inspect being different each time.
4756	 *
4757	 * NB This does not force us to execute on this engine, it will just
4758	 * typically be the first we inspect for submission.
4759	 */
4760	swp = prandom_u32_max(ve->num_siblings);
4761	if (!swp)
4762		return;
4763
4764	swap(ve->siblings[swp], ve->siblings[0]);
4765	if (!intel_engine_has_relative_mmio(ve->siblings[0]))
4766		virtual_update_register_offsets(ve->context.lrc_reg_state,
4767						ve->siblings[0]);
4768}
4769
4770static int virtual_context_alloc(struct intel_context *ce)
4771{
4772	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4773
4774	return __execlists_context_alloc(ce, ve->siblings[0]);
4775}
4776
4777static int virtual_context_pin(struct intel_context *ce)
4778{
4779	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4780	int err;
4781
4782	/* Note: we must use a real engine class for setting up reg state */
4783	err = __execlists_context_pin(ce, ve->siblings[0]);
4784	if (err)
4785		return err;
4786
4787	virtual_engine_initial_hint(ve);
4788	return 0;
4789}
4790
4791static void virtual_context_enter(struct intel_context *ce)
4792{
4793	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4794	unsigned int n;
4795
4796	for (n = 0; n < ve->num_siblings; n++)
4797		intel_engine_pm_get(ve->siblings[n]);
4798
4799	intel_timeline_enter(ce->timeline);
4800}
4801
4802static void virtual_context_exit(struct intel_context *ce)
4803{
4804	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4805	unsigned int n;
4806
4807	intel_timeline_exit(ce->timeline);
4808
4809	for (n = 0; n < ve->num_siblings; n++)
4810		intel_engine_pm_put(ve->siblings[n]);
4811}
4812
4813static const struct intel_context_ops virtual_context_ops = {
4814	.alloc = virtual_context_alloc,
4815
4816	.pin = virtual_context_pin,
4817	.unpin = execlists_context_unpin,
4818
4819	.enter = virtual_context_enter,
4820	.exit = virtual_context_exit,
4821
4822	.destroy = virtual_context_destroy,
4823};
4824
4825static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
4826{
4827	struct i915_request *rq;
4828	intel_engine_mask_t mask;
4829
4830	rq = READ_ONCE(ve->request);
4831	if (!rq)
4832		return 0;
4833
4834	/* The rq is ready for submission; rq->execution_mask is now stable. */
4835	mask = rq->execution_mask;
4836	if (unlikely(!mask)) {
4837		/* Invalid selection, submit to a random engine in error */
4838		i915_request_skip(rq, -ENODEV);
4839		mask = ve->siblings[0]->mask;
4840	}
4841
4842	ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
4843		     rq->fence.context, rq->fence.seqno,
4844		     mask, ve->base.execlists.queue_priority_hint);
4845
4846	return mask;
4847}
4848
4849static void virtual_submission_tasklet(unsigned long data)
4850{
4851	struct virtual_engine * const ve = (struct virtual_engine *)data;
4852	const int prio = ve->base.execlists.queue_priority_hint;
4853	intel_engine_mask_t mask;
4854	unsigned int n;
4855
4856	rcu_read_lock();
4857	mask = virtual_submission_mask(ve);
4858	rcu_read_unlock();
4859	if (unlikely(!mask))
4860		return;
4861
4862#ifdef __NetBSD__
4863	int s = splsoftserial(); /* block tasklets=softints */
4864#else
4865	local_irq_disable();
4866#endif
4867	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
4868		struct intel_engine_cs *sibling = ve->siblings[n];
4869		struct ve_node * const node = &ve->nodes[sibling->id];
4870		struct rb_node **parent, *rb;
4871		bool first;
4872
4873		if (unlikely(!(mask & sibling->mask))) {
4874			if (node->inserted) {
4875				spin_lock(&sibling->active.lock);
4876				rb_erase_cached(&node->rb,
4877						&sibling->execlists.virtual);
4878				node->inserted = false;
4879				spin_unlock(&sibling->active.lock);
4880			}
4881			continue;
4882		}
4883
4884		spin_lock(&sibling->active.lock);
4885
4886		if (node->inserted) {
4887			/*
4888			 * Cheat and avoid rebalancing the tree if we can
4889			 * reuse this node in situ.
4890			 */
4891			first = rb_first_cached(&sibling->execlists.virtual) ==
4892				&node->rb;
4893			if (prio == node->prio || (prio > node->prio && first))
4894				goto submit_engine;
4895
4896			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
4897			node->inserted = false;
4898		}
4899
4900#ifdef __NetBSD__
4901		__USE(parent);
4902		__USE(rb);
4903		struct ve_node *collision __diagused;
4904		/* XXX kludge to get insertion order */
4905		node->order = ve->order++;
4906		collision = rb_tree_insert_node(
4907			&sibling->execlists.virtual.rb_root.rbr_tree,
4908			node);
4909		KASSERT(collision == node);
4910		node->inserted = true;
4911		first = rb_tree_find_node_geq(
4912			&sibling->execlists.virtual.rb_root.rbr_tree,
4913			&node->prio) == node;
4914#else
4915		rb = NULL;
4916		first = true;
4917		parent = &sibling->execlists.virtual.rb_root.rb_node;
4918		while (*parent) {
4919			struct ve_node *other;
4920
4921			rb = *parent;
4922			other = rb_entry(rb, typeof(*other), rb);
4923			if (prio > other->prio) {
4924				parent = &rb->rb_left;
4925			} else {
4926				parent = &rb->rb_right;
4927				first = false;
4928			}
4929		}
4930
4931		rb_link_node(&node->rb, rb, parent);
4932		rb_insert_color_cached(&node->rb,
4933				       &sibling->execlists.virtual,
4934				       first);
4935#endif
4936
4937submit_engine:
4938		GEM_BUG_ON(!node->inserted);
4939		node->prio = prio;
4940		if (first && prio > sibling->execlists.queue_priority_hint) {
4941			sibling->execlists.queue_priority_hint = prio;
4942			tasklet_hi_schedule(&sibling->execlists.tasklet);
4943		}
4944
4945		spin_unlock(&sibling->active.lock);
4946	}
4947#ifdef __NetBSD__
4948	splx(s);
4949#else
4950	local_irq_enable();
4951#endif
4952}
4953
4954static void virtual_submit_request(struct i915_request *rq)
4955{
4956	struct virtual_engine *ve = to_virtual_engine(rq->engine);
4957	struct i915_request *old;
4958	unsigned long flags;
4959
4960	ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
4961		     rq->fence.context,
4962		     rq->fence.seqno);
4963
4964	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
4965
4966	spin_lock_irqsave(&ve->base.active.lock, flags);
4967
4968	old = ve->request;
4969	if (old) { /* background completion event from preempt-to-busy */
4970		GEM_BUG_ON(!i915_request_completed(old));
4971		__i915_request_submit(old);
4972		i915_request_put(old);
4973	}
4974
4975	if (i915_request_completed(rq)) {
4976		__i915_request_submit(rq);
4977
4978		ve->base.execlists.queue_priority_hint = INT_MIN;
4979		ve->request = NULL;
4980	} else {
4981		ve->base.execlists.queue_priority_hint = rq_prio(rq);
4982		ve->request = i915_request_get(rq);
4983
4984		GEM_BUG_ON(!list_empty(virtual_queue(ve)));
4985		list_move_tail(&rq->sched.link, virtual_queue(ve));
4986
4987		tasklet_schedule(&ve->base.execlists.tasklet);
4988	}
4989
4990	spin_unlock_irqrestore(&ve->base.active.lock, flags);
4991}
4992
4993static struct ve_bond *
4994virtual_find_bond(struct virtual_engine *ve,
4995		  const struct intel_engine_cs *master)
4996{
4997	int i;
4998
4999	for (i = 0; i < ve->num_bonds; i++) {
5000		if (ve->bonds[i].master == master)
5001			return &ve->bonds[i];
5002	}
5003
5004	return NULL;
5005}
5006
5007static void
5008virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
5009{
5010	struct virtual_engine *ve = to_virtual_engine(rq->engine);
5011	intel_engine_mask_t allowed, exec;
5012	struct ve_bond *bond;
5013
5014	allowed = ~to_request(signal)->engine->mask;
5015
5016	bond = virtual_find_bond(ve, to_request(signal)->engine);
5017	if (bond)
5018		allowed &= bond->sibling_mask;
5019
5020	/* Restrict the bonded request to run on only the available engines */
5021	exec = READ_ONCE(rq->execution_mask);
5022	while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
5023		;
5024
5025	/* Prevent the master from being re-run on the bonded engines */
5026	to_request(signal)->execution_mask &= ~allowed;
5027}
5028
5029struct intel_context *
5030intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5031			       unsigned int count)
5032{
5033	struct virtual_engine *ve;
5034	unsigned int n;
5035	int err;
5036
5037	if (count == 0)
5038		return ERR_PTR(-EINVAL);
5039
5040	if (count == 1)
5041		return intel_context_create(siblings[0]);
5042
5043	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
5044	if (!ve)
5045		return ERR_PTR(-ENOMEM);
5046
5047	ve->base.i915 = siblings[0]->i915;
5048	ve->base.gt = siblings[0]->gt;
5049	ve->base.uncore = siblings[0]->uncore;
5050	ve->base.id = -1;
5051
5052	ve->base.class = OTHER_CLASS;
5053	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5054	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5055	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5056
5057	/*
5058	 * The decision on whether to submit a request using semaphores
5059	 * depends on the saturated state of the engine. We only compute
5060	 * this during HW submission of the request, and we need for this
5061	 * state to be globally applied to all requests being submitted
5062	 * to this engine. Virtual engines encompass more than one physical
5063	 * engine and so we cannot accurately tell in advance if one of those
5064	 * engines is already saturated and so cannot afford to use a semaphore
5065	 * and be pessimized in priority for doing so -- if we are the only
5066	 * context using semaphores after all other clients have stopped, we
5067	 * will be starved on the saturated system. Such a global switch for
5068	 * semaphores is less than ideal, but alas is the current compromise.
5069	 */
5070	ve->base.saturated = ALL_ENGINES;
5071
5072	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5073
5074	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
5075	intel_engine_init_breadcrumbs(&ve->base);
5076	intel_engine_init_execlists(&ve->base);
5077
5078	ve->base.cops = &virtual_context_ops;
5079	ve->base.request_alloc = execlists_request_alloc;
5080
5081	ve->base.schedule = i915_schedule;
5082	ve->base.submit_request = virtual_submit_request;
5083	ve->base.bond_execute = virtual_bond_execute;
5084
5085	INIT_LIST_HEAD(virtual_queue(ve));
5086	ve->base.execlists.queue_priority_hint = INT_MIN;
5087	tasklet_init(&ve->base.execlists.tasklet,
5088		     virtual_submission_tasklet,
5089		     (unsigned long)ve);
5090
5091	intel_context_init(&ve->context, &ve->base);
5092
5093	for (n = 0; n < count; n++) {
5094		struct intel_engine_cs *sibling = siblings[n];
5095
5096		GEM_BUG_ON(!is_power_of_2(sibling->mask));
5097		if (sibling->mask & ve->base.mask) {
5098			DRM_DEBUG("duplicate %s entry in load balancer\n",
5099				  sibling->name);
5100			err = -EINVAL;
5101			goto err_put;
5102		}
5103
5104		/*
5105		 * The virtual engine implementation is tightly coupled to
5106		 * the execlists backend -- we push out request directly
5107		 * into a tree inside each physical engine. We could support
5108		 * layering if we handle cloning of the requests and
5109		 * submitting a copy into each backend.
5110		 */
5111		if (sibling->execlists.tasklet.func !=
5112		    execlists_submission_tasklet) {
5113			err = -ENODEV;
5114			goto err_put;
5115		}
5116
5117		GEM_BUG_ON(!ve->nodes[sibling->id].inserted);
5118		ve->nodes[sibling->id].inserted = false;
5119
5120		ve->siblings[ve->num_siblings++] = sibling;
5121		ve->base.mask |= sibling->mask;
5122
5123		/*
5124		 * All physical engines must be compatible for their emission
5125		 * functions (as we build the instructions during request
5126		 * construction and do not alter them before submission
5127		 * on the physical engine). We use the engine class as a guide
5128		 * here, although that could be refined.
5129		 */
5130		if (ve->base.class != OTHER_CLASS) {
5131			if (ve->base.class != sibling->class) {
5132				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5133					  sibling->class, ve->base.class);
5134				err = -EINVAL;
5135				goto err_put;
5136			}
5137			continue;
5138		}
5139
5140		ve->base.class = sibling->class;
5141		ve->base.uabi_class = sibling->uabi_class;
5142		snprintf(ve->base.name, sizeof(ve->base.name),
5143			 "v%dx%d", ve->base.class, count);
5144		ve->base.context_size = sibling->context_size;
5145
5146		ve->base.emit_bb_start = sibling->emit_bb_start;
5147		ve->base.emit_flush = sibling->emit_flush;
5148		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5149		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5150		ve->base.emit_fini_breadcrumb_dw =
5151			sibling->emit_fini_breadcrumb_dw;
5152
5153		ve->base.flags = sibling->flags;
5154	}
5155
5156	ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5157
5158	return &ve->context;
5159
5160err_put:
5161	intel_context_put(&ve->context);
5162	return ERR_PTR(err);
5163}
5164
5165struct intel_context *
5166intel_execlists_clone_virtual(struct intel_engine_cs *src)
5167{
5168	struct virtual_engine *se = to_virtual_engine(src);
5169	struct intel_context *dst;
5170
5171	dst = intel_execlists_create_virtual(se->siblings,
5172					     se->num_siblings);
5173	if (IS_ERR(dst))
5174		return dst;
5175
5176	if (se->num_bonds) {
5177		struct virtual_engine *de = to_virtual_engine(dst->engine);
5178
5179		de->bonds = kmemdup(se->bonds,
5180				    sizeof(*se->bonds) * se->num_bonds,
5181				    GFP_KERNEL);
5182		if (!de->bonds) {
5183			intel_context_put(dst);
5184			return ERR_PTR(-ENOMEM);
5185		}
5186
5187		de->num_bonds = se->num_bonds;
5188	}
5189
5190	return dst;
5191}
5192
5193int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5194				     const struct intel_engine_cs *master,
5195				     const struct intel_engine_cs *sibling)
5196{
5197	struct virtual_engine *ve = to_virtual_engine(engine);
5198	struct ve_bond *bond;
5199	int n;
5200
5201	/* Sanity check the sibling is part of the virtual engine */
5202	for (n = 0; n < ve->num_siblings; n++)
5203		if (sibling == ve->siblings[n])
5204			break;
5205	if (n == ve->num_siblings)
5206		return -EINVAL;
5207
5208	bond = virtual_find_bond(ve, master);
5209	if (bond) {
5210		bond->sibling_mask |= sibling->mask;
5211		return 0;
5212	}
5213
5214	bond = krealloc(ve->bonds,
5215			sizeof(*bond) * (ve->num_bonds + 1),
5216			GFP_KERNEL);
5217	if (!bond)
5218		return -ENOMEM;
5219
5220	bond[ve->num_bonds].master = master;
5221	bond[ve->num_bonds].sibling_mask = sibling->mask;
5222
5223	ve->bonds = bond;
5224	ve->num_bonds++;
5225
5226	return 0;
5227}
5228
5229struct intel_engine_cs *
5230intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
5231				 unsigned int sibling)
5232{
5233	struct virtual_engine *ve = to_virtual_engine(engine);
5234
5235	if (sibling >= ve->num_siblings)
5236		return NULL;
5237
5238	return ve->siblings[sibling];
5239}
5240
5241void intel_execlists_show_requests(struct intel_engine_cs *engine,
5242				   struct drm_printer *m,
5243				   void (*show_request)(struct drm_printer *m,
5244							struct i915_request *rq,
5245							const char *prefix),
5246				   unsigned int max)
5247{
5248	const struct intel_engine_execlists *execlists = &engine->execlists;
5249	struct i915_request *rq, *last;
5250	unsigned long flags;
5251	unsigned int count;
5252	struct rb_node *rb;
5253
5254	spin_lock_irqsave(&engine->active.lock, flags);
5255
5256	last = NULL;
5257	count = 0;
5258	list_for_each_entry(rq, &engine->active.requests, sched.link) {
5259		if (count++ < max - 1)
5260			show_request(m, rq, "\t\tE ");
5261		else
5262			last = rq;
5263	}
5264	if (last) {
5265		if (count > max) {
5266			drm_printf(m,
5267				   "\t\t...skipping %d executing requests...\n",
5268				   count - max);
5269		}
5270		show_request(m, last, "\t\tE ");
5271	}
5272
5273	last = NULL;
5274	count = 0;
5275	if (execlists->queue_priority_hint != INT_MIN)
5276		drm_printf(m, "\t\tQueue priority hint: %d\n",
5277			   execlists->queue_priority_hint);
5278	for (rb = rb_first_cached(&execlists->queue);
5279	     rb;
5280	     rb = rb_next2(&execlists->queue.rb_root, rb)) {
5281		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
5282		int i;
5283
5284		priolist_for_each_request(rq, p, i) {
5285			if (count++ < max - 1)
5286				show_request(m, rq, "\t\tQ ");
5287			else
5288				last = rq;
5289		}
5290	}
5291	if (last) {
5292		if (count > max) {
5293			drm_printf(m,
5294				   "\t\t...skipping %d queued requests...\n",
5295				   count - max);
5296		}
5297		show_request(m, last, "\t\tQ ");
5298	}
5299
5300	last = NULL;
5301	count = 0;
5302	for (rb = rb_first_cached(&execlists->virtual);
5303	     rb;
5304	     rb = rb_next2(&execlists->virtual.rb_root, rb)) {
5305		struct virtual_engine *ve =
5306			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
5307		struct i915_request *rq = READ_ONCE(ve->request);
5308
5309		if (rq) {
5310			if (count++ < max - 1)
5311				show_request(m, rq, "\t\tV ");
5312			else
5313				last = rq;
5314		}
5315	}
5316	if (last) {
5317		if (count > max) {
5318			drm_printf(m,
5319				   "\t\t...skipping %d virtual requests...\n",
5320				   count - max);
5321		}
5322		show_request(m, last, "\t\tV ");
5323	}
5324
5325	spin_unlock_irqrestore(&engine->active.lock, flags);
5326}
5327
5328void intel_lr_context_reset(struct intel_engine_cs *engine,
5329			    struct intel_context *ce,
5330			    u32 head,
5331			    bool scrub)
5332{
5333	GEM_BUG_ON(!intel_context_is_pinned(ce));
5334
5335	/*
5336	 * We want a simple context + ring to execute the breadcrumb update.
5337	 * We cannot rely on the context being intact across the GPU hang,
5338	 * so clear it and rebuild just what we need for the breadcrumb.
5339	 * All pending requests for this context will be zapped, and any
5340	 * future request will be after userspace has had the opportunity
5341	 * to recreate its own state.
5342	 */
5343	if (scrub)
5344		restore_default_state(ce, engine);
5345
5346	/* Rerun the request; its payload has been neutered (if guilty). */
5347	__execlists_update_reg_state(ce, engine, head);
5348}
5349
5350bool
5351intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
5352{
5353	return engine->set_default_submission ==
5354	       intel_execlists_set_default_submission;
5355}
5356
5357#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5358#include "selftest_lrc.c"
5359#endif
5360