1/*	$NetBSD: selftest_lrc.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $	*/
2
3/*
4 * SPDX-License-Identifier: MIT
5 *
6 * Copyright �� 2018 Intel Corporation
7 */
8
9#include <sys/cdefs.h>
10__KERNEL_RCSID(0, "$NetBSD: selftest_lrc.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $");
11
12#include <linux/prime_numbers.h>
13
14#include "gem/i915_gem_pm.h"
15#include "gt/intel_engine_heartbeat.h"
16#include "gt/intel_reset.h"
17
18#include "i915_selftest.h"
19#include "selftests/i915_random.h"
20#include "selftests/igt_flush_test.h"
21#include "selftests/igt_live_test.h"
22#include "selftests/igt_spinner.h"
23#include "selftests/lib_sw_fence.h"
24
25#include "gem/selftests/igt_gem_utils.h"
26#include "gem/selftests/mock_context.h"
27
28#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
29#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
30
31static struct i915_vma *create_scratch(struct intel_gt *gt)
32{
33	struct drm_i915_gem_object *obj;
34	struct i915_vma *vma;
35	int err;
36
37	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
38	if (IS_ERR(obj))
39		return ERR_CAST(obj);
40
41	i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
42
43	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
44	if (IS_ERR(vma)) {
45		i915_gem_object_put(obj);
46		return vma;
47	}
48
49	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
50	if (err) {
51		i915_gem_object_put(obj);
52		return ERR_PTR(err);
53	}
54
55	return vma;
56}
57
58static void engine_heartbeat_disable(struct intel_engine_cs *engine,
59				     unsigned long *saved)
60{
61	*saved = engine->props.heartbeat_interval_ms;
62	engine->props.heartbeat_interval_ms = 0;
63
64	intel_engine_pm_get(engine);
65	intel_engine_park_heartbeat(engine);
66}
67
68static void engine_heartbeat_enable(struct intel_engine_cs *engine,
69				    unsigned long saved)
70{
71	intel_engine_pm_put(engine);
72
73	engine->props.heartbeat_interval_ms = saved;
74}
75
76static int live_sanitycheck(void *arg)
77{
78	struct intel_gt *gt = arg;
79	struct intel_engine_cs *engine;
80	enum intel_engine_id id;
81	struct igt_spinner spin;
82	int err = 0;
83
84	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
85		return 0;
86
87	if (igt_spinner_init(&spin, gt))
88		return -ENOMEM;
89
90	for_each_engine(engine, gt, id) {
91		struct intel_context *ce;
92		struct i915_request *rq;
93
94		ce = intel_context_create(engine);
95		if (IS_ERR(ce)) {
96			err = PTR_ERR(ce);
97			break;
98		}
99
100		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
101		if (IS_ERR(rq)) {
102			err = PTR_ERR(rq);
103			goto out_ctx;
104		}
105
106		i915_request_add(rq);
107		if (!igt_wait_for_spinner(&spin, rq)) {
108			GEM_TRACE("spinner failed to start\n");
109			GEM_TRACE_DUMP();
110			intel_gt_set_wedged(gt);
111			err = -EIO;
112			goto out_ctx;
113		}
114
115		igt_spinner_end(&spin);
116		if (igt_flush_test(gt->i915)) {
117			err = -EIO;
118			goto out_ctx;
119		}
120
121out_ctx:
122		intel_context_put(ce);
123		if (err)
124			break;
125	}
126
127	igt_spinner_fini(&spin);
128	return err;
129}
130
131static int live_unlite_restore(struct intel_gt *gt, int prio)
132{
133	struct intel_engine_cs *engine;
134	enum intel_engine_id id;
135	struct igt_spinner spin;
136	int err = -ENOMEM;
137
138	/*
139	 * Check that we can correctly context switch between 2 instances
140	 * on the same engine from the same parent context.
141	 */
142
143	if (igt_spinner_init(&spin, gt))
144		return err;
145
146	err = 0;
147	for_each_engine(engine, gt, id) {
148		struct intel_context *ce[2] = {};
149		struct i915_request *rq[2];
150		struct igt_live_test t;
151		unsigned long saved;
152		int n;
153
154		if (prio && !intel_engine_has_preemption(engine))
155			continue;
156
157		if (!intel_engine_can_store_dword(engine))
158			continue;
159
160		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
161			err = -EIO;
162			break;
163		}
164		engine_heartbeat_disable(engine, &saved);
165
166		for (n = 0; n < ARRAY_SIZE(ce); n++) {
167			struct intel_context *tmp;
168
169			tmp = intel_context_create(engine);
170			if (IS_ERR(tmp)) {
171				err = PTR_ERR(tmp);
172				goto err_ce;
173			}
174
175			err = intel_context_pin(tmp);
176			if (err) {
177				intel_context_put(tmp);
178				goto err_ce;
179			}
180
181			/*
182			 * Setup the pair of contexts such that if we
183			 * lite-restore using the RING_TAIL from ce[1] it
184			 * will execute garbage from ce[0]->ring.
185			 */
186			memset(tmp->ring->vaddr,
187			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
188			       tmp->ring->vma->size);
189
190			ce[n] = tmp;
191		}
192		GEM_BUG_ON(!ce[1]->ring->size);
193		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
194		__execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
195
196		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
197		if (IS_ERR(rq[0])) {
198			err = PTR_ERR(rq[0]);
199			goto err_ce;
200		}
201
202		i915_request_get(rq[0]);
203		i915_request_add(rq[0]);
204		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
205
206		if (!igt_wait_for_spinner(&spin, rq[0])) {
207			i915_request_put(rq[0]);
208			goto err_ce;
209		}
210
211		rq[1] = i915_request_create(ce[1]);
212		if (IS_ERR(rq[1])) {
213			err = PTR_ERR(rq[1]);
214			i915_request_put(rq[0]);
215			goto err_ce;
216		}
217
218		if (!prio) {
219			/*
220			 * Ensure we do the switch to ce[1] on completion.
221			 *
222			 * rq[0] is already submitted, so this should reduce
223			 * to a no-op (a wait on a request on the same engine
224			 * uses the submit fence, not the completion fence),
225			 * but it will install a dependency on rq[1] for rq[0]
226			 * that will prevent the pair being reordered by
227			 * timeslicing.
228			 */
229			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
230		}
231
232		i915_request_get(rq[1]);
233		i915_request_add(rq[1]);
234		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
235		i915_request_put(rq[0]);
236
237		if (prio) {
238			struct i915_sched_attr attr = {
239				.priority = prio,
240			};
241
242			/* Alternatively preempt the spinner with ce[1] */
243			engine->schedule(rq[1], &attr);
244		}
245
246		/* And switch back to ce[0] for good measure */
247		rq[0] = i915_request_create(ce[0]);
248		if (IS_ERR(rq[0])) {
249			err = PTR_ERR(rq[0]);
250			i915_request_put(rq[1]);
251			goto err_ce;
252		}
253
254		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
255		i915_request_get(rq[0]);
256		i915_request_add(rq[0]);
257		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
258		i915_request_put(rq[1]);
259		i915_request_put(rq[0]);
260
261err_ce:
262		tasklet_kill(&engine->execlists.tasklet); /* flush submission */
263		igt_spinner_end(&spin);
264		for (n = 0; n < ARRAY_SIZE(ce); n++) {
265			if (IS_ERR_OR_NULL(ce[n]))
266				break;
267
268			intel_context_unpin(ce[n]);
269			intel_context_put(ce[n]);
270		}
271
272		engine_heartbeat_enable(engine, saved);
273		if (igt_live_test_end(&t))
274			err = -EIO;
275		if (err)
276			break;
277	}
278
279	igt_spinner_fini(&spin);
280	return err;
281}
282
283static int live_unlite_switch(void *arg)
284{
285	return live_unlite_restore(arg, 0);
286}
287
288static int live_unlite_preempt(void *arg)
289{
290	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
291}
292
293static int live_hold_reset(void *arg)
294{
295	struct intel_gt *gt = arg;
296	struct intel_engine_cs *engine;
297	enum intel_engine_id id;
298	struct igt_spinner spin;
299	int err = 0;
300
301	/*
302	 * In order to support offline error capture for fast preempt reset,
303	 * we need to decouple the guilty request and ensure that it and its
304	 * descendents are not executed while the capture is in progress.
305	 */
306
307	if (!intel_has_reset_engine(gt))
308		return 0;
309
310	if (igt_spinner_init(&spin, gt))
311		return -ENOMEM;
312
313	for_each_engine(engine, gt, id) {
314		struct intel_context *ce;
315		unsigned long heartbeat;
316		struct i915_request *rq;
317
318		ce = intel_context_create(engine);
319		if (IS_ERR(ce)) {
320			err = PTR_ERR(ce);
321			break;
322		}
323
324		engine_heartbeat_disable(engine, &heartbeat);
325
326		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
327		if (IS_ERR(rq)) {
328			err = PTR_ERR(rq);
329			goto out;
330		}
331		i915_request_add(rq);
332
333		if (!igt_wait_for_spinner(&spin, rq)) {
334			intel_gt_set_wedged(gt);
335			err = -ETIME;
336			goto out;
337		}
338
339		/* We have our request executing, now remove it and reset */
340
341		if (test_and_set_bit(I915_RESET_ENGINE + id,
342				     &gt->reset.flags)) {
343			intel_gt_set_wedged(gt);
344			err = -EBUSY;
345			goto out;
346		}
347		tasklet_disable(&engine->execlists.tasklet);
348
349		engine->execlists.tasklet.func(engine->execlists.tasklet.data);
350		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
351
352		i915_request_get(rq);
353		execlists_hold(engine, rq);
354		GEM_BUG_ON(!i915_request_on_hold(rq));
355
356		intel_engine_reset(engine, NULL);
357		GEM_BUG_ON(rq->fence.error != -EIO);
358
359		tasklet_enable(&engine->execlists.tasklet);
360		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
361				      &gt->reset.flags);
362
363		/* Check that we do not resubmit the held request */
364		if (!i915_request_wait(rq, 0, HZ / 5)) {
365			pr_err("%s: on hold request completed!\n",
366			       engine->name);
367			i915_request_put(rq);
368			err = -EIO;
369			goto out;
370		}
371		GEM_BUG_ON(!i915_request_on_hold(rq));
372
373		/* But is resubmitted on release */
374		execlists_unhold(engine, rq);
375		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
376			pr_err("%s: held request did not complete!\n",
377			       engine->name);
378			intel_gt_set_wedged(gt);
379			err = -ETIME;
380		}
381		i915_request_put(rq);
382
383out:
384		engine_heartbeat_enable(engine, heartbeat);
385		intel_context_put(ce);
386		if (err)
387			break;
388	}
389
390	igt_spinner_fini(&spin);
391	return err;
392}
393
394static int
395emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
396{
397	u32 *cs;
398
399	cs = intel_ring_begin(rq, 10);
400	if (IS_ERR(cs))
401		return PTR_ERR(cs);
402
403	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
404
405	*cs++ = MI_SEMAPHORE_WAIT |
406		MI_SEMAPHORE_GLOBAL_GTT |
407		MI_SEMAPHORE_POLL |
408		MI_SEMAPHORE_SAD_NEQ_SDD;
409	*cs++ = 0;
410	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
411	*cs++ = 0;
412
413	if (idx > 0) {
414		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
415		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
416		*cs++ = 0;
417		*cs++ = 1;
418	} else {
419		*cs++ = MI_NOOP;
420		*cs++ = MI_NOOP;
421		*cs++ = MI_NOOP;
422		*cs++ = MI_NOOP;
423	}
424
425	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
426
427	intel_ring_advance(rq, cs);
428	return 0;
429}
430
431static struct i915_request *
432semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
433{
434	struct intel_context *ce;
435	struct i915_request *rq;
436	int err;
437
438	ce = intel_context_create(engine);
439	if (IS_ERR(ce))
440		return ERR_CAST(ce);
441
442	rq = intel_context_create_request(ce);
443	if (IS_ERR(rq))
444		goto out_ce;
445
446	err = 0;
447	if (rq->engine->emit_init_breadcrumb)
448		err = rq->engine->emit_init_breadcrumb(rq);
449	if (err == 0)
450		err = emit_semaphore_chain(rq, vma, idx);
451	if (err == 0)
452		i915_request_get(rq);
453	i915_request_add(rq);
454	if (err)
455		rq = ERR_PTR(err);
456
457out_ce:
458	intel_context_put(ce);
459	return rq;
460}
461
462static int
463release_queue(struct intel_engine_cs *engine,
464	      struct i915_vma *vma,
465	      int idx, int prio)
466{
467	struct i915_sched_attr attr = {
468		.priority = prio,
469	};
470	struct i915_request *rq;
471	u32 *cs;
472
473	rq = intel_engine_create_kernel_request(engine);
474	if (IS_ERR(rq))
475		return PTR_ERR(rq);
476
477	cs = intel_ring_begin(rq, 4);
478	if (IS_ERR(cs)) {
479		i915_request_add(rq);
480		return PTR_ERR(cs);
481	}
482
483	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
484	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
485	*cs++ = 0;
486	*cs++ = 1;
487
488	intel_ring_advance(rq, cs);
489
490	i915_request_get(rq);
491	i915_request_add(rq);
492
493	local_bh_disable();
494	engine->schedule(rq, &attr);
495	local_bh_enable(); /* kick tasklet */
496
497	i915_request_put(rq);
498
499	return 0;
500}
501
502static int
503slice_semaphore_queue(struct intel_engine_cs *outer,
504		      struct i915_vma *vma,
505		      int count)
506{
507	struct intel_engine_cs *engine;
508	struct i915_request *head;
509	enum intel_engine_id id;
510	int err, i, n = 0;
511
512	head = semaphore_queue(outer, vma, n++);
513	if (IS_ERR(head))
514		return PTR_ERR(head);
515
516	for_each_engine(engine, outer->gt, id) {
517		for (i = 0; i < count; i++) {
518			struct i915_request *rq;
519
520			rq = semaphore_queue(engine, vma, n++);
521			if (IS_ERR(rq)) {
522				err = PTR_ERR(rq);
523				goto out;
524			}
525
526			i915_request_put(rq);
527		}
528	}
529
530	err = release_queue(outer, vma, n, INT_MAX);
531	if (err)
532		goto out;
533
534	if (i915_request_wait(head, 0,
535			      2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
536		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
537		       count, n);
538		GEM_TRACE_DUMP();
539		intel_gt_set_wedged(outer->gt);
540		err = -EIO;
541	}
542
543out:
544	i915_request_put(head);
545	return err;
546}
547
548static int live_timeslice_preempt(void *arg)
549{
550	struct intel_gt *gt = arg;
551	struct drm_i915_gem_object *obj;
552	struct i915_vma *vma;
553	void *vaddr;
554	int err = 0;
555	int count;
556
557	/*
558	 * If a request takes too long, we would like to give other users
559	 * a fair go on the GPU. In particular, users may create batches
560	 * that wait upon external input, where that input may even be
561	 * supplied by another GPU job. To avoid blocking forever, we
562	 * need to preempt the current task and replace it with another
563	 * ready task.
564	 */
565	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
566		return 0;
567
568	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
569	if (IS_ERR(obj))
570		return PTR_ERR(obj);
571
572	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
573	if (IS_ERR(vma)) {
574		err = PTR_ERR(vma);
575		goto err_obj;
576	}
577
578	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
579	if (IS_ERR(vaddr)) {
580		err = PTR_ERR(vaddr);
581		goto err_obj;
582	}
583
584	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
585	if (err)
586		goto err_map;
587
588	for_each_prime_number_from(count, 1, 16) {
589		struct intel_engine_cs *engine;
590		enum intel_engine_id id;
591
592		for_each_engine(engine, gt, id) {
593			unsigned long saved;
594
595			if (!intel_engine_has_preemption(engine))
596				continue;
597
598			memset(vaddr, 0, PAGE_SIZE);
599
600			engine_heartbeat_disable(engine, &saved);
601			err = slice_semaphore_queue(engine, vma, count);
602			engine_heartbeat_enable(engine, saved);
603			if (err)
604				goto err_pin;
605
606			if (igt_flush_test(gt->i915)) {
607				err = -EIO;
608				goto err_pin;
609			}
610		}
611	}
612
613err_pin:
614	i915_vma_unpin(vma);
615err_map:
616	i915_gem_object_unpin_map(obj);
617err_obj:
618	i915_gem_object_put(obj);
619	return err;
620}
621
622static struct i915_request *nop_request(struct intel_engine_cs *engine)
623{
624	struct i915_request *rq;
625
626	rq = intel_engine_create_kernel_request(engine);
627	if (IS_ERR(rq))
628		return rq;
629
630	i915_request_get(rq);
631	i915_request_add(rq);
632
633	return rq;
634}
635
636static int wait_for_submit(struct intel_engine_cs *engine,
637			   struct i915_request *rq,
638			   unsigned long timeout)
639{
640	timeout += jiffies;
641	do {
642		cond_resched();
643		intel_engine_flush_submission(engine);
644		if (i915_request_is_active(rq))
645			return 0;
646	} while (time_before(jiffies, timeout));
647
648	return -ETIME;
649}
650
651static long timeslice_threshold(const struct intel_engine_cs *engine)
652{
653	return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
654}
655
656static int live_timeslice_queue(void *arg)
657{
658	struct intel_gt *gt = arg;
659	struct drm_i915_gem_object *obj;
660	struct intel_engine_cs *engine;
661	enum intel_engine_id id;
662	struct i915_vma *vma;
663	void *vaddr;
664	int err = 0;
665
666	/*
667	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
668	 * timeslicing between them disabled, we *do* enable timeslicing
669	 * if the queue demands it. (Normally, we do not submit if
670	 * ELSP[1] is already occupied, so must rely on timeslicing to
671	 * eject ELSP[0] in favour of the queue.)
672	 */
673	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
674		return 0;
675
676	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
677	if (IS_ERR(obj))
678		return PTR_ERR(obj);
679
680	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
681	if (IS_ERR(vma)) {
682		err = PTR_ERR(vma);
683		goto err_obj;
684	}
685
686	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
687	if (IS_ERR(vaddr)) {
688		err = PTR_ERR(vaddr);
689		goto err_obj;
690	}
691
692	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
693	if (err)
694		goto err_map;
695
696	for_each_engine(engine, gt, id) {
697		struct i915_sched_attr attr = {
698			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
699		};
700		struct i915_request *rq, *nop;
701		unsigned long saved;
702
703		if (!intel_engine_has_preemption(engine))
704			continue;
705
706		engine_heartbeat_disable(engine, &saved);
707		memset(vaddr, 0, PAGE_SIZE);
708
709		/* ELSP[0]: semaphore wait */
710		rq = semaphore_queue(engine, vma, 0);
711		if (IS_ERR(rq)) {
712			err = PTR_ERR(rq);
713			goto err_heartbeat;
714		}
715		engine->schedule(rq, &attr);
716		err = wait_for_submit(engine, rq, HZ / 2);
717		if (err) {
718			pr_err("%s: Timed out trying to submit semaphores\n",
719			       engine->name);
720			goto err_rq;
721		}
722
723		/* ELSP[1]: nop request */
724		nop = nop_request(engine);
725		if (IS_ERR(nop)) {
726			err = PTR_ERR(nop);
727			goto err_rq;
728		}
729		err = wait_for_submit(engine, nop, HZ / 2);
730		i915_request_put(nop);
731		if (err) {
732			pr_err("%s: Timed out trying to submit nop\n",
733			       engine->name);
734			goto err_rq;
735		}
736
737		GEM_BUG_ON(i915_request_completed(rq));
738		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
739
740		/* Queue: semaphore signal, matching priority as semaphore */
741		err = release_queue(engine, vma, 1, effective_prio(rq));
742		if (err)
743			goto err_rq;
744
745		intel_engine_flush_submission(engine);
746		if (!READ_ONCE(engine->execlists.timer.expires) &&
747		    !i915_request_completed(rq)) {
748			struct drm_printer p =
749				drm_info_printer(gt->i915->drm.dev);
750
751			GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
752				      engine->name);
753			intel_engine_dump(engine, &p,
754					  "%s\n", engine->name);
755			GEM_TRACE_DUMP();
756
757			memset(vaddr, 0xff, PAGE_SIZE);
758			err = -EINVAL;
759		}
760
761		/* Timeslice every jiffy, so within 2 we should signal */
762		if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
763			struct drm_printer p =
764				drm_info_printer(gt->i915->drm.dev);
765
766			pr_err("%s: Failed to timeslice into queue\n",
767			       engine->name);
768			intel_engine_dump(engine, &p,
769					  "%s\n", engine->name);
770
771			memset(vaddr, 0xff, PAGE_SIZE);
772			err = -EIO;
773		}
774err_rq:
775		i915_request_put(rq);
776err_heartbeat:
777		engine_heartbeat_enable(engine, saved);
778		if (err)
779			break;
780	}
781
782	i915_vma_unpin(vma);
783err_map:
784	i915_gem_object_unpin_map(obj);
785err_obj:
786	i915_gem_object_put(obj);
787	return err;
788}
789
790static int live_busywait_preempt(void *arg)
791{
792	struct intel_gt *gt = arg;
793	struct i915_gem_context *ctx_hi, *ctx_lo;
794	struct intel_engine_cs *engine;
795	struct drm_i915_gem_object *obj;
796	struct i915_vma *vma;
797	enum intel_engine_id id;
798	int err = -ENOMEM;
799	u32 *map;
800
801	/*
802	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
803	 * preempt the busywaits used to synchronise between rings.
804	 */
805
806	ctx_hi = kernel_context(gt->i915);
807	if (!ctx_hi)
808		return -ENOMEM;
809	ctx_hi->sched.priority =
810		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
811
812	ctx_lo = kernel_context(gt->i915);
813	if (!ctx_lo)
814		goto err_ctx_hi;
815	ctx_lo->sched.priority =
816		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
817
818	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
819	if (IS_ERR(obj)) {
820		err = PTR_ERR(obj);
821		goto err_ctx_lo;
822	}
823
824	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
825	if (IS_ERR(map)) {
826		err = PTR_ERR(map);
827		goto err_obj;
828	}
829
830	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
831	if (IS_ERR(vma)) {
832		err = PTR_ERR(vma);
833		goto err_map;
834	}
835
836	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
837	if (err)
838		goto err_map;
839
840	for_each_engine(engine, gt, id) {
841		struct i915_request *lo, *hi;
842		struct igt_live_test t;
843		u32 *cs;
844
845		if (!intel_engine_has_preemption(engine))
846			continue;
847
848		if (!intel_engine_can_store_dword(engine))
849			continue;
850
851		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
852			err = -EIO;
853			goto err_vma;
854		}
855
856		/*
857		 * We create two requests. The low priority request
858		 * busywaits on a semaphore (inside the ringbuffer where
859		 * is should be preemptible) and the high priority requests
860		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
861		 * allowing the first request to complete. If preemption
862		 * fails, we hang instead.
863		 */
864
865		lo = igt_request_alloc(ctx_lo, engine);
866		if (IS_ERR(lo)) {
867			err = PTR_ERR(lo);
868			goto err_vma;
869		}
870
871		cs = intel_ring_begin(lo, 8);
872		if (IS_ERR(cs)) {
873			err = PTR_ERR(cs);
874			i915_request_add(lo);
875			goto err_vma;
876		}
877
878		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
879		*cs++ = i915_ggtt_offset(vma);
880		*cs++ = 0;
881		*cs++ = 1;
882
883		/* XXX Do we need a flush + invalidate here? */
884
885		*cs++ = MI_SEMAPHORE_WAIT |
886			MI_SEMAPHORE_GLOBAL_GTT |
887			MI_SEMAPHORE_POLL |
888			MI_SEMAPHORE_SAD_EQ_SDD;
889		*cs++ = 0;
890		*cs++ = i915_ggtt_offset(vma);
891		*cs++ = 0;
892
893		intel_ring_advance(lo, cs);
894
895		i915_request_get(lo);
896		i915_request_add(lo);
897
898		if (wait_for(READ_ONCE(*map), 10)) {
899			i915_request_put(lo);
900			err = -ETIMEDOUT;
901			goto err_vma;
902		}
903
904		/* Low priority request should be busywaiting now */
905		if (i915_request_wait(lo, 0, 1) != -ETIME) {
906			i915_request_put(lo);
907			pr_err("%s: Busywaiting request did not!\n",
908			       engine->name);
909			err = -EIO;
910			goto err_vma;
911		}
912
913		hi = igt_request_alloc(ctx_hi, engine);
914		if (IS_ERR(hi)) {
915			err = PTR_ERR(hi);
916			i915_request_put(lo);
917			goto err_vma;
918		}
919
920		cs = intel_ring_begin(hi, 4);
921		if (IS_ERR(cs)) {
922			err = PTR_ERR(cs);
923			i915_request_add(hi);
924			i915_request_put(lo);
925			goto err_vma;
926		}
927
928		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
929		*cs++ = i915_ggtt_offset(vma);
930		*cs++ = 0;
931		*cs++ = 0;
932
933		intel_ring_advance(hi, cs);
934		i915_request_add(hi);
935
936		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
937			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
938
939			pr_err("%s: Failed to preempt semaphore busywait!\n",
940			       engine->name);
941
942			intel_engine_dump(engine, &p, "%s\n", engine->name);
943			GEM_TRACE_DUMP();
944
945			i915_request_put(lo);
946			intel_gt_set_wedged(gt);
947			err = -EIO;
948			goto err_vma;
949		}
950		GEM_BUG_ON(READ_ONCE(*map));
951		i915_request_put(lo);
952
953		if (igt_live_test_end(&t)) {
954			err = -EIO;
955			goto err_vma;
956		}
957	}
958
959	err = 0;
960err_vma:
961	i915_vma_unpin(vma);
962err_map:
963	i915_gem_object_unpin_map(obj);
964err_obj:
965	i915_gem_object_put(obj);
966err_ctx_lo:
967	kernel_context_close(ctx_lo);
968err_ctx_hi:
969	kernel_context_close(ctx_hi);
970	return err;
971}
972
973static struct i915_request *
974spinner_create_request(struct igt_spinner *spin,
975		       struct i915_gem_context *ctx,
976		       struct intel_engine_cs *engine,
977		       u32 arb)
978{
979	struct intel_context *ce;
980	struct i915_request *rq;
981
982	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
983	if (IS_ERR(ce))
984		return ERR_CAST(ce);
985
986	rq = igt_spinner_create_request(spin, ce, arb);
987	intel_context_put(ce);
988	return rq;
989}
990
991static int live_preempt(void *arg)
992{
993	struct intel_gt *gt = arg;
994	struct i915_gem_context *ctx_hi, *ctx_lo;
995	struct igt_spinner spin_hi, spin_lo;
996	struct intel_engine_cs *engine;
997	enum intel_engine_id id;
998	int err = -ENOMEM;
999
1000	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1001		return 0;
1002
1003	if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1004		pr_err("Logical preemption supported, but not exposed\n");
1005
1006	if (igt_spinner_init(&spin_hi, gt))
1007		return -ENOMEM;
1008
1009	if (igt_spinner_init(&spin_lo, gt))
1010		goto err_spin_hi;
1011
1012	ctx_hi = kernel_context(gt->i915);
1013	if (!ctx_hi)
1014		goto err_spin_lo;
1015	ctx_hi->sched.priority =
1016		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1017
1018	ctx_lo = kernel_context(gt->i915);
1019	if (!ctx_lo)
1020		goto err_ctx_hi;
1021	ctx_lo->sched.priority =
1022		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1023
1024	for_each_engine(engine, gt, id) {
1025		struct igt_live_test t;
1026		struct i915_request *rq;
1027
1028		if (!intel_engine_has_preemption(engine))
1029			continue;
1030
1031		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1032			err = -EIO;
1033			goto err_ctx_lo;
1034		}
1035
1036		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1037					    MI_ARB_CHECK);
1038		if (IS_ERR(rq)) {
1039			err = PTR_ERR(rq);
1040			goto err_ctx_lo;
1041		}
1042
1043		i915_request_add(rq);
1044		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1045			GEM_TRACE("lo spinner failed to start\n");
1046			GEM_TRACE_DUMP();
1047			intel_gt_set_wedged(gt);
1048			err = -EIO;
1049			goto err_ctx_lo;
1050		}
1051
1052		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1053					    MI_ARB_CHECK);
1054		if (IS_ERR(rq)) {
1055			igt_spinner_end(&spin_lo);
1056			err = PTR_ERR(rq);
1057			goto err_ctx_lo;
1058		}
1059
1060		i915_request_add(rq);
1061		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1062			GEM_TRACE("hi spinner failed to start\n");
1063			GEM_TRACE_DUMP();
1064			intel_gt_set_wedged(gt);
1065			err = -EIO;
1066			goto err_ctx_lo;
1067		}
1068
1069		igt_spinner_end(&spin_hi);
1070		igt_spinner_end(&spin_lo);
1071
1072		if (igt_live_test_end(&t)) {
1073			err = -EIO;
1074			goto err_ctx_lo;
1075		}
1076	}
1077
1078	err = 0;
1079err_ctx_lo:
1080	kernel_context_close(ctx_lo);
1081err_ctx_hi:
1082	kernel_context_close(ctx_hi);
1083err_spin_lo:
1084	igt_spinner_fini(&spin_lo);
1085err_spin_hi:
1086	igt_spinner_fini(&spin_hi);
1087	return err;
1088}
1089
1090static int live_late_preempt(void *arg)
1091{
1092	struct intel_gt *gt = arg;
1093	struct i915_gem_context *ctx_hi, *ctx_lo;
1094	struct igt_spinner spin_hi, spin_lo;
1095	struct intel_engine_cs *engine;
1096	struct i915_sched_attr attr = {};
1097	enum intel_engine_id id;
1098	int err = -ENOMEM;
1099
1100	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1101		return 0;
1102
1103	if (igt_spinner_init(&spin_hi, gt))
1104		return -ENOMEM;
1105
1106	if (igt_spinner_init(&spin_lo, gt))
1107		goto err_spin_hi;
1108
1109	ctx_hi = kernel_context(gt->i915);
1110	if (!ctx_hi)
1111		goto err_spin_lo;
1112
1113	ctx_lo = kernel_context(gt->i915);
1114	if (!ctx_lo)
1115		goto err_ctx_hi;
1116
1117	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1118	ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1119
1120	for_each_engine(engine, gt, id) {
1121		struct igt_live_test t;
1122		struct i915_request *rq;
1123
1124		if (!intel_engine_has_preemption(engine))
1125			continue;
1126
1127		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1128			err = -EIO;
1129			goto err_ctx_lo;
1130		}
1131
1132		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1133					    MI_ARB_CHECK);
1134		if (IS_ERR(rq)) {
1135			err = PTR_ERR(rq);
1136			goto err_ctx_lo;
1137		}
1138
1139		i915_request_add(rq);
1140		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1141			pr_err("First context failed to start\n");
1142			goto err_wedged;
1143		}
1144
1145		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1146					    MI_NOOP);
1147		if (IS_ERR(rq)) {
1148			igt_spinner_end(&spin_lo);
1149			err = PTR_ERR(rq);
1150			goto err_ctx_lo;
1151		}
1152
1153		i915_request_add(rq);
1154		if (igt_wait_for_spinner(&spin_hi, rq)) {
1155			pr_err("Second context overtook first?\n");
1156			goto err_wedged;
1157		}
1158
1159		attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1160		engine->schedule(rq, &attr);
1161
1162		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1163			pr_err("High priority context failed to preempt the low priority context\n");
1164			GEM_TRACE_DUMP();
1165			goto err_wedged;
1166		}
1167
1168		igt_spinner_end(&spin_hi);
1169		igt_spinner_end(&spin_lo);
1170
1171		if (igt_live_test_end(&t)) {
1172			err = -EIO;
1173			goto err_ctx_lo;
1174		}
1175	}
1176
1177	err = 0;
1178err_ctx_lo:
1179	kernel_context_close(ctx_lo);
1180err_ctx_hi:
1181	kernel_context_close(ctx_hi);
1182err_spin_lo:
1183	igt_spinner_fini(&spin_lo);
1184err_spin_hi:
1185	igt_spinner_fini(&spin_hi);
1186	return err;
1187
1188err_wedged:
1189	igt_spinner_end(&spin_hi);
1190	igt_spinner_end(&spin_lo);
1191	intel_gt_set_wedged(gt);
1192	err = -EIO;
1193	goto err_ctx_lo;
1194}
1195
1196struct preempt_client {
1197	struct igt_spinner spin;
1198	struct i915_gem_context *ctx;
1199};
1200
1201static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1202{
1203	c->ctx = kernel_context(gt->i915);
1204	if (!c->ctx)
1205		return -ENOMEM;
1206
1207	if (igt_spinner_init(&c->spin, gt))
1208		goto err_ctx;
1209
1210	return 0;
1211
1212err_ctx:
1213	kernel_context_close(c->ctx);
1214	return -ENOMEM;
1215}
1216
1217static void preempt_client_fini(struct preempt_client *c)
1218{
1219	igt_spinner_fini(&c->spin);
1220	kernel_context_close(c->ctx);
1221}
1222
1223static int live_nopreempt(void *arg)
1224{
1225	struct intel_gt *gt = arg;
1226	struct intel_engine_cs *engine;
1227	struct preempt_client a, b;
1228	enum intel_engine_id id;
1229	int err = -ENOMEM;
1230
1231	/*
1232	 * Verify that we can disable preemption for an individual request
1233	 * that may be being observed and not want to be interrupted.
1234	 */
1235
1236	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1237		return 0;
1238
1239	if (preempt_client_init(gt, &a))
1240		return -ENOMEM;
1241	if (preempt_client_init(gt, &b))
1242		goto err_client_a;
1243	b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1244
1245	for_each_engine(engine, gt, id) {
1246		struct i915_request *rq_a, *rq_b;
1247
1248		if (!intel_engine_has_preemption(engine))
1249			continue;
1250
1251		engine->execlists.preempt_hang.count = 0;
1252
1253		rq_a = spinner_create_request(&a.spin,
1254					      a.ctx, engine,
1255					      MI_ARB_CHECK);
1256		if (IS_ERR(rq_a)) {
1257			err = PTR_ERR(rq_a);
1258			goto err_client_b;
1259		}
1260
1261		/* Low priority client, but unpreemptable! */
1262		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1263
1264		i915_request_add(rq_a);
1265		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1266			pr_err("First client failed to start\n");
1267			goto err_wedged;
1268		}
1269
1270		rq_b = spinner_create_request(&b.spin,
1271					      b.ctx, engine,
1272					      MI_ARB_CHECK);
1273		if (IS_ERR(rq_b)) {
1274			err = PTR_ERR(rq_b);
1275			goto err_client_b;
1276		}
1277
1278		i915_request_add(rq_b);
1279
1280		/* B is much more important than A! (But A is unpreemptable.) */
1281		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1282
1283		/* Wait long enough for preemption and timeslicing */
1284		if (igt_wait_for_spinner(&b.spin, rq_b)) {
1285			pr_err("Second client started too early!\n");
1286			goto err_wedged;
1287		}
1288
1289		igt_spinner_end(&a.spin);
1290
1291		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1292			pr_err("Second client failed to start\n");
1293			goto err_wedged;
1294		}
1295
1296		igt_spinner_end(&b.spin);
1297
1298		if (engine->execlists.preempt_hang.count) {
1299			pr_err("Preemption recorded x%d; should have been suppressed!\n",
1300			       engine->execlists.preempt_hang.count);
1301			err = -EINVAL;
1302			goto err_wedged;
1303		}
1304
1305		if (igt_flush_test(gt->i915))
1306			goto err_wedged;
1307	}
1308
1309	err = 0;
1310err_client_b:
1311	preempt_client_fini(&b);
1312err_client_a:
1313	preempt_client_fini(&a);
1314	return err;
1315
1316err_wedged:
1317	igt_spinner_end(&b.spin);
1318	igt_spinner_end(&a.spin);
1319	intel_gt_set_wedged(gt);
1320	err = -EIO;
1321	goto err_client_b;
1322}
1323
1324struct live_preempt_cancel {
1325	struct intel_engine_cs *engine;
1326	struct preempt_client a, b;
1327};
1328
1329static int __cancel_active0(struct live_preempt_cancel *arg)
1330{
1331	struct i915_request *rq;
1332	struct igt_live_test t;
1333	int err;
1334
1335	/* Preempt cancel of ELSP0 */
1336	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1337	if (igt_live_test_begin(&t, arg->engine->i915,
1338				__func__, arg->engine->name))
1339		return -EIO;
1340
1341	rq = spinner_create_request(&arg->a.spin,
1342				    arg->a.ctx, arg->engine,
1343				    MI_ARB_CHECK);
1344	if (IS_ERR(rq))
1345		return PTR_ERR(rq);
1346
1347	clear_bit(CONTEXT_BANNED, &rq->context->flags);
1348	i915_request_get(rq);
1349	i915_request_add(rq);
1350	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1351		err = -EIO;
1352		goto out;
1353	}
1354
1355	intel_context_set_banned(rq->context);
1356	err = intel_engine_pulse(arg->engine);
1357	if (err)
1358		goto out;
1359
1360	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1361		err = -EIO;
1362		goto out;
1363	}
1364
1365	if (rq->fence.error != -EIO) {
1366		pr_err("Cancelled inflight0 request did not report -EIO\n");
1367		err = -EINVAL;
1368		goto out;
1369	}
1370
1371out:
1372	i915_request_put(rq);
1373	if (igt_live_test_end(&t))
1374		err = -EIO;
1375	return err;
1376}
1377
1378static int __cancel_active1(struct live_preempt_cancel *arg)
1379{
1380	struct i915_request *rq[2] = {};
1381	struct igt_live_test t;
1382	int err;
1383
1384	/* Preempt cancel of ELSP1 */
1385	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1386	if (igt_live_test_begin(&t, arg->engine->i915,
1387				__func__, arg->engine->name))
1388		return -EIO;
1389
1390	rq[0] = spinner_create_request(&arg->a.spin,
1391				       arg->a.ctx, arg->engine,
1392				       MI_NOOP); /* no preemption */
1393	if (IS_ERR(rq[0]))
1394		return PTR_ERR(rq[0]);
1395
1396	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1397	i915_request_get(rq[0]);
1398	i915_request_add(rq[0]);
1399	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1400		err = -EIO;
1401		goto out;
1402	}
1403
1404	rq[1] = spinner_create_request(&arg->b.spin,
1405				       arg->b.ctx, arg->engine,
1406				       MI_ARB_CHECK);
1407	if (IS_ERR(rq[1])) {
1408		err = PTR_ERR(rq[1]);
1409		goto out;
1410	}
1411
1412	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1413	i915_request_get(rq[1]);
1414	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1415	i915_request_add(rq[1]);
1416	if (err)
1417		goto out;
1418
1419	intel_context_set_banned(rq[1]->context);
1420	err = intel_engine_pulse(arg->engine);
1421	if (err)
1422		goto out;
1423
1424	igt_spinner_end(&arg->a.spin);
1425	if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
1426		err = -EIO;
1427		goto out;
1428	}
1429
1430	if (rq[0]->fence.error != 0) {
1431		pr_err("Normal inflight0 request did not complete\n");
1432		err = -EINVAL;
1433		goto out;
1434	}
1435
1436	if (rq[1]->fence.error != -EIO) {
1437		pr_err("Cancelled inflight1 request did not report -EIO\n");
1438		err = -EINVAL;
1439		goto out;
1440	}
1441
1442out:
1443	i915_request_put(rq[1]);
1444	i915_request_put(rq[0]);
1445	if (igt_live_test_end(&t))
1446		err = -EIO;
1447	return err;
1448}
1449
1450static int __cancel_queued(struct live_preempt_cancel *arg)
1451{
1452	struct i915_request *rq[3] = {};
1453	struct igt_live_test t;
1454	int err;
1455
1456	/* Full ELSP and one in the wings */
1457	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1458	if (igt_live_test_begin(&t, arg->engine->i915,
1459				__func__, arg->engine->name))
1460		return -EIO;
1461
1462	rq[0] = spinner_create_request(&arg->a.spin,
1463				       arg->a.ctx, arg->engine,
1464				       MI_ARB_CHECK);
1465	if (IS_ERR(rq[0]))
1466		return PTR_ERR(rq[0]);
1467
1468	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1469	i915_request_get(rq[0]);
1470	i915_request_add(rq[0]);
1471	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1472		err = -EIO;
1473		goto out;
1474	}
1475
1476	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
1477	if (IS_ERR(rq[1])) {
1478		err = PTR_ERR(rq[1]);
1479		goto out;
1480	}
1481
1482	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1483	i915_request_get(rq[1]);
1484	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1485	i915_request_add(rq[1]);
1486	if (err)
1487		goto out;
1488
1489	rq[2] = spinner_create_request(&arg->b.spin,
1490				       arg->a.ctx, arg->engine,
1491				       MI_ARB_CHECK);
1492	if (IS_ERR(rq[2])) {
1493		err = PTR_ERR(rq[2]);
1494		goto out;
1495	}
1496
1497	i915_request_get(rq[2]);
1498	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
1499	i915_request_add(rq[2]);
1500	if (err)
1501		goto out;
1502
1503	intel_context_set_banned(rq[2]->context);
1504	err = intel_engine_pulse(arg->engine);
1505	if (err)
1506		goto out;
1507
1508	if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1509		err = -EIO;
1510		goto out;
1511	}
1512
1513	if (rq[0]->fence.error != -EIO) {
1514		pr_err("Cancelled inflight0 request did not report -EIO\n");
1515		err = -EINVAL;
1516		goto out;
1517	}
1518
1519	if (rq[1]->fence.error != 0) {
1520		pr_err("Normal inflight1 request did not complete\n");
1521		err = -EINVAL;
1522		goto out;
1523	}
1524
1525	if (rq[2]->fence.error != -EIO) {
1526		pr_err("Cancelled queued request did not report -EIO\n");
1527		err = -EINVAL;
1528		goto out;
1529	}
1530
1531out:
1532	i915_request_put(rq[2]);
1533	i915_request_put(rq[1]);
1534	i915_request_put(rq[0]);
1535	if (igt_live_test_end(&t))
1536		err = -EIO;
1537	return err;
1538}
1539
1540static int __cancel_hostile(struct live_preempt_cancel *arg)
1541{
1542	struct i915_request *rq;
1543	int err;
1544
1545	/* Preempt cancel non-preemptible spinner in ELSP0 */
1546	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
1547		return 0;
1548
1549	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1550	rq = spinner_create_request(&arg->a.spin,
1551				    arg->a.ctx, arg->engine,
1552				    MI_NOOP); /* preemption disabled */
1553	if (IS_ERR(rq))
1554		return PTR_ERR(rq);
1555
1556	clear_bit(CONTEXT_BANNED, &rq->context->flags);
1557	i915_request_get(rq);
1558	i915_request_add(rq);
1559	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1560		err = -EIO;
1561		goto out;
1562	}
1563
1564	intel_context_set_banned(rq->context);
1565	err = intel_engine_pulse(arg->engine); /* force reset */
1566	if (err)
1567		goto out;
1568
1569	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1570		err = -EIO;
1571		goto out;
1572	}
1573
1574	if (rq->fence.error != -EIO) {
1575		pr_err("Cancelled inflight0 request did not report -EIO\n");
1576		err = -EINVAL;
1577		goto out;
1578	}
1579
1580out:
1581	i915_request_put(rq);
1582	if (igt_flush_test(arg->engine->i915))
1583		err = -EIO;
1584	return err;
1585}
1586
1587static int live_preempt_cancel(void *arg)
1588{
1589	struct intel_gt *gt = arg;
1590	struct live_preempt_cancel data;
1591	enum intel_engine_id id;
1592	int err = -ENOMEM;
1593
1594	/*
1595	 * To cancel an inflight context, we need to first remove it from the
1596	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
1597	 */
1598
1599	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1600		return 0;
1601
1602	if (preempt_client_init(gt, &data.a))
1603		return -ENOMEM;
1604	if (preempt_client_init(gt, &data.b))
1605		goto err_client_a;
1606
1607	for_each_engine(data.engine, gt, id) {
1608		if (!intel_engine_has_preemption(data.engine))
1609			continue;
1610
1611		err = __cancel_active0(&data);
1612		if (err)
1613			goto err_wedged;
1614
1615		err = __cancel_active1(&data);
1616		if (err)
1617			goto err_wedged;
1618
1619		err = __cancel_queued(&data);
1620		if (err)
1621			goto err_wedged;
1622
1623		err = __cancel_hostile(&data);
1624		if (err)
1625			goto err_wedged;
1626	}
1627
1628	err = 0;
1629err_client_b:
1630	preempt_client_fini(&data.b);
1631err_client_a:
1632	preempt_client_fini(&data.a);
1633	return err;
1634
1635err_wedged:
1636	GEM_TRACE_DUMP();
1637	igt_spinner_end(&data.b.spin);
1638	igt_spinner_end(&data.a.spin);
1639	intel_gt_set_wedged(gt);
1640	goto err_client_b;
1641}
1642
1643static int live_suppress_self_preempt(void *arg)
1644{
1645	struct intel_gt *gt = arg;
1646	struct intel_engine_cs *engine;
1647	struct i915_sched_attr attr = {
1648		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
1649	};
1650	struct preempt_client a, b;
1651	enum intel_engine_id id;
1652	int err = -ENOMEM;
1653
1654	/*
1655	 * Verify that if a preemption request does not cause a change in
1656	 * the current execution order, the preempt-to-idle injection is
1657	 * skipped and that we do not accidentally apply it after the CS
1658	 * completion event.
1659	 */
1660
1661	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1662		return 0;
1663
1664	if (USES_GUC_SUBMISSION(gt->i915))
1665		return 0; /* presume black blox */
1666
1667	if (intel_vgpu_active(gt->i915))
1668		return 0; /* GVT forces single port & request submission */
1669
1670	if (preempt_client_init(gt, &a))
1671		return -ENOMEM;
1672	if (preempt_client_init(gt, &b))
1673		goto err_client_a;
1674
1675	for_each_engine(engine, gt, id) {
1676		struct i915_request *rq_a, *rq_b;
1677		int depth;
1678
1679		if (!intel_engine_has_preemption(engine))
1680			continue;
1681
1682		if (igt_flush_test(gt->i915))
1683			goto err_wedged;
1684
1685		intel_engine_pm_get(engine);
1686		engine->execlists.preempt_hang.count = 0;
1687
1688		rq_a = spinner_create_request(&a.spin,
1689					      a.ctx, engine,
1690					      MI_NOOP);
1691		if (IS_ERR(rq_a)) {
1692			err = PTR_ERR(rq_a);
1693			intel_engine_pm_put(engine);
1694			goto err_client_b;
1695		}
1696
1697		i915_request_add(rq_a);
1698		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1699			pr_err("First client failed to start\n");
1700			intel_engine_pm_put(engine);
1701			goto err_wedged;
1702		}
1703
1704		/* Keep postponing the timer to avoid premature slicing */
1705		mod_timer(&engine->execlists.timer, jiffies + HZ);
1706		for (depth = 0; depth < 8; depth++) {
1707			rq_b = spinner_create_request(&b.spin,
1708						      b.ctx, engine,
1709						      MI_NOOP);
1710			if (IS_ERR(rq_b)) {
1711				err = PTR_ERR(rq_b);
1712				intel_engine_pm_put(engine);
1713				goto err_client_b;
1714			}
1715			i915_request_add(rq_b);
1716
1717			GEM_BUG_ON(i915_request_completed(rq_a));
1718			engine->schedule(rq_a, &attr);
1719			igt_spinner_end(&a.spin);
1720
1721			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1722				pr_err("Second client failed to start\n");
1723				intel_engine_pm_put(engine);
1724				goto err_wedged;
1725			}
1726
1727			swap(a, b);
1728			rq_a = rq_b;
1729		}
1730		igt_spinner_end(&a.spin);
1731
1732		if (engine->execlists.preempt_hang.count) {
1733			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
1734			       engine->name,
1735			       engine->execlists.preempt_hang.count,
1736			       depth);
1737			intel_engine_pm_put(engine);
1738			err = -EINVAL;
1739			goto err_client_b;
1740		}
1741
1742		intel_engine_pm_put(engine);
1743		if (igt_flush_test(gt->i915))
1744			goto err_wedged;
1745	}
1746
1747	err = 0;
1748err_client_b:
1749	preempt_client_fini(&b);
1750err_client_a:
1751	preempt_client_fini(&a);
1752	return err;
1753
1754err_wedged:
1755	igt_spinner_end(&b.spin);
1756	igt_spinner_end(&a.spin);
1757	intel_gt_set_wedged(gt);
1758	err = -EIO;
1759	goto err_client_b;
1760}
1761
1762static int __i915_sw_fence_call
1763dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
1764{
1765	return NOTIFY_DONE;
1766}
1767
1768static struct i915_request *dummy_request(struct intel_engine_cs *engine)
1769{
1770	struct i915_request *rq;
1771
1772	rq = kzalloc(sizeof(*rq), GFP_KERNEL);
1773	if (!rq)
1774		return NULL;
1775
1776	rq->engine = engine;
1777
1778	spin_lock_init(&rq->lock);
1779	INIT_LIST_HEAD(&rq->fence.cb_list);
1780	rq->fence.lock = &rq->lock;
1781	rq->fence.ops = &i915_fence_ops;
1782
1783	i915_sched_node_init(&rq->sched);
1784
1785	/* mark this request as permanently incomplete */
1786	rq->fence.seqno = 1;
1787	BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
1788	rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
1789	GEM_BUG_ON(i915_request_completed(rq));
1790
1791	i915_sw_fence_init(&rq->submit, dummy_notify);
1792	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
1793
1794	spin_lock_init(&rq->lock);
1795	rq->fence.lock = &rq->lock;
1796	INIT_LIST_HEAD(&rq->fence.cb_list);
1797
1798	return rq;
1799}
1800
1801static void dummy_request_free(struct i915_request *dummy)
1802{
1803	/* We have to fake the CS interrupt to kick the next request */
1804	i915_sw_fence_commit(&dummy->submit);
1805
1806	i915_request_mark_complete(dummy);
1807	dma_fence_signal(&dummy->fence);
1808
1809	i915_sched_node_fini(&dummy->sched);
1810	i915_sw_fence_fini(&dummy->submit);
1811
1812	dma_fence_free(&dummy->fence);
1813}
1814
1815static int live_suppress_wait_preempt(void *arg)
1816{
1817	struct intel_gt *gt = arg;
1818	struct preempt_client client[4];
1819	struct i915_request *rq[ARRAY_SIZE(client)] = {};
1820	struct intel_engine_cs *engine;
1821	enum intel_engine_id id;
1822	int err = -ENOMEM;
1823	int i;
1824
1825	/*
1826	 * Waiters are given a little priority nudge, but not enough
1827	 * to actually cause any preemption. Double check that we do
1828	 * not needlessly generate preempt-to-idle cycles.
1829	 */
1830
1831	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1832		return 0;
1833
1834	if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
1835		return -ENOMEM;
1836	if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
1837		goto err_client_0;
1838	if (preempt_client_init(gt, &client[2])) /* head of queue */
1839		goto err_client_1;
1840	if (preempt_client_init(gt, &client[3])) /* bystander */
1841		goto err_client_2;
1842
1843	for_each_engine(engine, gt, id) {
1844		int depth;
1845
1846		if (!intel_engine_has_preemption(engine))
1847			continue;
1848
1849		if (!engine->emit_init_breadcrumb)
1850			continue;
1851
1852		for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
1853			struct i915_request *dummy;
1854
1855			engine->execlists.preempt_hang.count = 0;
1856
1857			dummy = dummy_request(engine);
1858			if (!dummy)
1859				goto err_client_3;
1860
1861			for (i = 0; i < ARRAY_SIZE(client); i++) {
1862				struct i915_request *this;
1863
1864				this = spinner_create_request(&client[i].spin,
1865							      client[i].ctx, engine,
1866							      MI_NOOP);
1867				if (IS_ERR(this)) {
1868					err = PTR_ERR(this);
1869					goto err_wedged;
1870				}
1871
1872				/* Disable NEWCLIENT promotion */
1873				__i915_active_fence_set(&i915_request_timeline(this)->last_request,
1874							&dummy->fence);
1875
1876				rq[i] = i915_request_get(this);
1877				i915_request_add(this);
1878			}
1879
1880			dummy_request_free(dummy);
1881
1882			GEM_BUG_ON(i915_request_completed(rq[0]));
1883			if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
1884				pr_err("%s: First client failed to start\n",
1885				       engine->name);
1886				goto err_wedged;
1887			}
1888			GEM_BUG_ON(!i915_request_started(rq[0]));
1889
1890			if (i915_request_wait(rq[depth],
1891					      I915_WAIT_PRIORITY,
1892					      1) != -ETIME) {
1893				pr_err("%s: Waiter depth:%d completed!\n",
1894				       engine->name, depth);
1895				goto err_wedged;
1896			}
1897
1898			for (i = 0; i < ARRAY_SIZE(client); i++) {
1899				igt_spinner_end(&client[i].spin);
1900				i915_request_put(rq[i]);
1901				rq[i] = NULL;
1902			}
1903
1904			if (igt_flush_test(gt->i915))
1905				goto err_wedged;
1906
1907			if (engine->execlists.preempt_hang.count) {
1908				pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
1909				       engine->name,
1910				       engine->execlists.preempt_hang.count,
1911				       depth);
1912				err = -EINVAL;
1913				goto err_client_3;
1914			}
1915		}
1916	}
1917
1918	err = 0;
1919err_client_3:
1920	preempt_client_fini(&client[3]);
1921err_client_2:
1922	preempt_client_fini(&client[2]);
1923err_client_1:
1924	preempt_client_fini(&client[1]);
1925err_client_0:
1926	preempt_client_fini(&client[0]);
1927	return err;
1928
1929err_wedged:
1930	for (i = 0; i < ARRAY_SIZE(client); i++) {
1931		igt_spinner_end(&client[i].spin);
1932		i915_request_put(rq[i]);
1933	}
1934	intel_gt_set_wedged(gt);
1935	err = -EIO;
1936	goto err_client_3;
1937}
1938
1939static int live_chain_preempt(void *arg)
1940{
1941	struct intel_gt *gt = arg;
1942	struct intel_engine_cs *engine;
1943	struct preempt_client hi, lo;
1944	enum intel_engine_id id;
1945	int err = -ENOMEM;
1946
1947	/*
1948	 * Build a chain AB...BA between two contexts (A, B) and request
1949	 * preemption of the last request. It should then complete before
1950	 * the previously submitted spinner in B.
1951	 */
1952
1953	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1954		return 0;
1955
1956	if (preempt_client_init(gt, &hi))
1957		return -ENOMEM;
1958
1959	if (preempt_client_init(gt, &lo))
1960		goto err_client_hi;
1961
1962	for_each_engine(engine, gt, id) {
1963		struct i915_sched_attr attr = {
1964			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1965		};
1966		struct igt_live_test t;
1967		struct i915_request *rq;
1968		int ring_size, count, i;
1969
1970		if (!intel_engine_has_preemption(engine))
1971			continue;
1972
1973		rq = spinner_create_request(&lo.spin,
1974					    lo.ctx, engine,
1975					    MI_ARB_CHECK);
1976		if (IS_ERR(rq))
1977			goto err_wedged;
1978
1979		i915_request_get(rq);
1980		i915_request_add(rq);
1981
1982		ring_size = rq->wa_tail - rq->head;
1983		if (ring_size < 0)
1984			ring_size += rq->ring->size;
1985		ring_size = rq->ring->size / ring_size;
1986		pr_debug("%s(%s): Using maximum of %d requests\n",
1987			 __func__, engine->name, ring_size);
1988
1989		igt_spinner_end(&lo.spin);
1990		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1991			pr_err("Timed out waiting to flush %s\n", engine->name);
1992			i915_request_put(rq);
1993			goto err_wedged;
1994		}
1995		i915_request_put(rq);
1996
1997		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1998			err = -EIO;
1999			goto err_wedged;
2000		}
2001
2002		for_each_prime_number_from(count, 1, ring_size) {
2003			rq = spinner_create_request(&hi.spin,
2004						    hi.ctx, engine,
2005						    MI_ARB_CHECK);
2006			if (IS_ERR(rq))
2007				goto err_wedged;
2008			i915_request_add(rq);
2009			if (!igt_wait_for_spinner(&hi.spin, rq))
2010				goto err_wedged;
2011
2012			rq = spinner_create_request(&lo.spin,
2013						    lo.ctx, engine,
2014						    MI_ARB_CHECK);
2015			if (IS_ERR(rq))
2016				goto err_wedged;
2017			i915_request_add(rq);
2018
2019			for (i = 0; i < count; i++) {
2020				rq = igt_request_alloc(lo.ctx, engine);
2021				if (IS_ERR(rq))
2022					goto err_wedged;
2023				i915_request_add(rq);
2024			}
2025
2026			rq = igt_request_alloc(hi.ctx, engine);
2027			if (IS_ERR(rq))
2028				goto err_wedged;
2029
2030			i915_request_get(rq);
2031			i915_request_add(rq);
2032			engine->schedule(rq, &attr);
2033
2034			igt_spinner_end(&hi.spin);
2035			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2036				struct drm_printer p =
2037					drm_info_printer(gt->i915->drm.dev);
2038
2039				pr_err("Failed to preempt over chain of %d\n",
2040				       count);
2041				intel_engine_dump(engine, &p,
2042						  "%s\n", engine->name);
2043				i915_request_put(rq);
2044				goto err_wedged;
2045			}
2046			igt_spinner_end(&lo.spin);
2047			i915_request_put(rq);
2048
2049			rq = igt_request_alloc(lo.ctx, engine);
2050			if (IS_ERR(rq))
2051				goto err_wedged;
2052
2053			i915_request_get(rq);
2054			i915_request_add(rq);
2055
2056			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2057				struct drm_printer p =
2058					drm_info_printer(gt->i915->drm.dev);
2059
2060				pr_err("Failed to flush low priority chain of %d requests\n",
2061				       count);
2062				intel_engine_dump(engine, &p,
2063						  "%s\n", engine->name);
2064
2065				i915_request_put(rq);
2066				goto err_wedged;
2067			}
2068			i915_request_put(rq);
2069		}
2070
2071		if (igt_live_test_end(&t)) {
2072			err = -EIO;
2073			goto err_wedged;
2074		}
2075	}
2076
2077	err = 0;
2078err_client_lo:
2079	preempt_client_fini(&lo);
2080err_client_hi:
2081	preempt_client_fini(&hi);
2082	return err;
2083
2084err_wedged:
2085	igt_spinner_end(&hi.spin);
2086	igt_spinner_end(&lo.spin);
2087	intel_gt_set_wedged(gt);
2088	err = -EIO;
2089	goto err_client_lo;
2090}
2091
2092static int create_gang(struct intel_engine_cs *engine,
2093		       struct i915_request **prev)
2094{
2095	struct drm_i915_gem_object *obj;
2096	struct intel_context *ce;
2097	struct i915_request *rq;
2098	struct i915_vma *vma;
2099	u32 *cs;
2100	int err;
2101
2102	ce = intel_context_create(engine);
2103	if (IS_ERR(ce))
2104		return PTR_ERR(ce);
2105
2106	obj = i915_gem_object_create_internal(engine->i915, 4096);
2107	if (IS_ERR(obj)) {
2108		err = PTR_ERR(obj);
2109		goto err_ce;
2110	}
2111
2112	vma = i915_vma_instance(obj, ce->vm, NULL);
2113	if (IS_ERR(vma)) {
2114		err = PTR_ERR(vma);
2115		goto err_obj;
2116	}
2117
2118	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2119	if (err)
2120		goto err_obj;
2121
2122	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2123	if (IS_ERR(cs))
2124		goto err_obj;
2125
2126	/* Semaphore target: spin until zero */
2127	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2128
2129	*cs++ = MI_SEMAPHORE_WAIT |
2130		MI_SEMAPHORE_POLL |
2131		MI_SEMAPHORE_SAD_EQ_SDD;
2132	*cs++ = 0;
2133	*cs++ = lower_32_bits(vma->node.start);
2134	*cs++ = upper_32_bits(vma->node.start);
2135
2136	if (*prev) {
2137		u64 offset = (*prev)->batch->node.start;
2138
2139		/* Terminate the spinner in the next lower priority batch. */
2140		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2141		*cs++ = lower_32_bits(offset);
2142		*cs++ = upper_32_bits(offset);
2143		*cs++ = 0;
2144	}
2145
2146	*cs++ = MI_BATCH_BUFFER_END;
2147	i915_gem_object_flush_map(obj);
2148	i915_gem_object_unpin_map(obj);
2149
2150	rq = intel_context_create_request(ce);
2151	if (IS_ERR(rq))
2152		goto err_obj;
2153
2154	rq->batch = vma;
2155	i915_request_get(rq);
2156
2157	i915_vma_lock(vma);
2158	err = i915_request_await_object(rq, vma->obj, false);
2159	if (!err)
2160		err = i915_vma_move_to_active(vma, rq, 0);
2161	if (!err)
2162		err = rq->engine->emit_bb_start(rq,
2163						vma->node.start,
2164						PAGE_SIZE, 0);
2165	i915_vma_unlock(vma);
2166	i915_request_add(rq);
2167	if (err)
2168		goto err_rq;
2169
2170	i915_gem_object_put(obj);
2171	intel_context_put(ce);
2172
2173	rq->client_link.next = &(*prev)->client_link;
2174	*prev = rq;
2175	return 0;
2176
2177err_rq:
2178	i915_request_put(rq);
2179err_obj:
2180	i915_gem_object_put(obj);
2181err_ce:
2182	intel_context_put(ce);
2183	return err;
2184}
2185
2186static int live_preempt_gang(void *arg)
2187{
2188	struct intel_gt *gt = arg;
2189	struct intel_engine_cs *engine;
2190	enum intel_engine_id id;
2191
2192	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2193		return 0;
2194
2195	/*
2196	 * Build as long a chain of preempters as we can, with each
2197	 * request higher priority than the last. Once we are ready, we release
2198	 * the last batch which then precolates down the chain, each releasing
2199	 * the next oldest in turn. The intent is to simply push as hard as we
2200	 * can with the number of preemptions, trying to exceed narrow HW
2201	 * limits. At a minimum, we insist that we can sort all the user
2202	 * high priority levels into execution order.
2203	 */
2204
2205	for_each_engine(engine, gt, id) {
2206		struct i915_request *rq = NULL;
2207		struct igt_live_test t;
2208		IGT_TIMEOUT(end_time);
2209		int prio = 0;
2210		int err = 0;
2211		u32 *cs;
2212
2213		if (!intel_engine_has_preemption(engine))
2214			continue;
2215
2216		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2217			return -EIO;
2218
2219		do {
2220			struct i915_sched_attr attr = {
2221				.priority = I915_USER_PRIORITY(prio++),
2222			};
2223
2224			err = create_gang(engine, &rq);
2225			if (err)
2226				break;
2227
2228			/* Submit each spinner at increasing priority */
2229			engine->schedule(rq, &attr);
2230
2231			if (prio <= I915_PRIORITY_MAX)
2232				continue;
2233
2234			if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2235				break;
2236
2237			if (__igt_timeout(end_time, NULL))
2238				break;
2239		} while (1);
2240		pr_debug("%s: Preempt chain of %d requests\n",
2241			 engine->name, prio);
2242
2243		/*
2244		 * Such that the last spinner is the highest priority and
2245		 * should execute first. When that spinner completes,
2246		 * it will terminate the next lowest spinner until there
2247		 * are no more spinners and the gang is complete.
2248		 */
2249		cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2250		if (!IS_ERR(cs)) {
2251			*cs = 0;
2252			i915_gem_object_unpin_map(rq->batch->obj);
2253		} else {
2254			err = PTR_ERR(cs);
2255			intel_gt_set_wedged(gt);
2256		}
2257
2258		while (rq) { /* wait for each rq from highest to lowest prio */
2259			struct i915_request *n =
2260				list_next_entry(rq, client_link);
2261
2262			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2263				struct drm_printer p =
2264					drm_info_printer(engine->i915->drm.dev);
2265
2266				pr_err("Failed to flush chain of %d requests, at %d\n",
2267				       prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2268				intel_engine_dump(engine, &p,
2269						  "%s\n", engine->name);
2270
2271				err = -ETIME;
2272			}
2273
2274			i915_request_put(rq);
2275			rq = n;
2276		}
2277
2278		if (igt_live_test_end(&t))
2279			err = -EIO;
2280		if (err)
2281			return err;
2282	}
2283
2284	return 0;
2285}
2286
2287static int live_preempt_hang(void *arg)
2288{
2289	struct intel_gt *gt = arg;
2290	struct i915_gem_context *ctx_hi, *ctx_lo;
2291	struct igt_spinner spin_hi, spin_lo;
2292	struct intel_engine_cs *engine;
2293	enum intel_engine_id id;
2294	int err = -ENOMEM;
2295
2296	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2297		return 0;
2298
2299	if (!intel_has_reset_engine(gt))
2300		return 0;
2301
2302	if (igt_spinner_init(&spin_hi, gt))
2303		return -ENOMEM;
2304
2305	if (igt_spinner_init(&spin_lo, gt))
2306		goto err_spin_hi;
2307
2308	ctx_hi = kernel_context(gt->i915);
2309	if (!ctx_hi)
2310		goto err_spin_lo;
2311	ctx_hi->sched.priority =
2312		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2313
2314	ctx_lo = kernel_context(gt->i915);
2315	if (!ctx_lo)
2316		goto err_ctx_hi;
2317	ctx_lo->sched.priority =
2318		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2319
2320	for_each_engine(engine, gt, id) {
2321		struct i915_request *rq;
2322
2323		if (!intel_engine_has_preemption(engine))
2324			continue;
2325
2326		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2327					    MI_ARB_CHECK);
2328		if (IS_ERR(rq)) {
2329			err = PTR_ERR(rq);
2330			goto err_ctx_lo;
2331		}
2332
2333		i915_request_add(rq);
2334		if (!igt_wait_for_spinner(&spin_lo, rq)) {
2335			GEM_TRACE("lo spinner failed to start\n");
2336			GEM_TRACE_DUMP();
2337			intel_gt_set_wedged(gt);
2338			err = -EIO;
2339			goto err_ctx_lo;
2340		}
2341
2342		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
2343					    MI_ARB_CHECK);
2344		if (IS_ERR(rq)) {
2345			igt_spinner_end(&spin_lo);
2346			err = PTR_ERR(rq);
2347			goto err_ctx_lo;
2348		}
2349
2350		init_completion(&engine->execlists.preempt_hang.completion);
2351		engine->execlists.preempt_hang.inject_hang = true;
2352
2353		i915_request_add(rq);
2354
2355		if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion,
2356						 HZ / 10)) {
2357			pr_err("Preemption did not occur within timeout!");
2358			GEM_TRACE_DUMP();
2359			intel_gt_set_wedged(gt);
2360			err = -EIO;
2361			goto err_ctx_lo;
2362		}
2363
2364		set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
2365		intel_engine_reset(engine, NULL);
2366		clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
2367
2368		engine->execlists.preempt_hang.inject_hang = false;
2369
2370		if (!igt_wait_for_spinner(&spin_hi, rq)) {
2371			GEM_TRACE("hi spinner failed to start\n");
2372			GEM_TRACE_DUMP();
2373			intel_gt_set_wedged(gt);
2374			err = -EIO;
2375			goto err_ctx_lo;
2376		}
2377
2378		igt_spinner_end(&spin_hi);
2379		igt_spinner_end(&spin_lo);
2380		if (igt_flush_test(gt->i915)) {
2381			err = -EIO;
2382			goto err_ctx_lo;
2383		}
2384	}
2385
2386	err = 0;
2387err_ctx_lo:
2388	kernel_context_close(ctx_lo);
2389err_ctx_hi:
2390	kernel_context_close(ctx_hi);
2391err_spin_lo:
2392	igt_spinner_fini(&spin_lo);
2393err_spin_hi:
2394	igt_spinner_fini(&spin_hi);
2395	return err;
2396}
2397
2398static int live_preempt_timeout(void *arg)
2399{
2400	struct intel_gt *gt = arg;
2401	struct i915_gem_context *ctx_hi, *ctx_lo;
2402	struct igt_spinner spin_lo;
2403	struct intel_engine_cs *engine;
2404	enum intel_engine_id id;
2405	int err = -ENOMEM;
2406
2407	/*
2408	 * Check that we force preemption to occur by cancelling the previous
2409	 * context if it refuses to yield the GPU.
2410	 */
2411	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2412		return 0;
2413
2414	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2415		return 0;
2416
2417	if (!intel_has_reset_engine(gt))
2418		return 0;
2419
2420	if (igt_spinner_init(&spin_lo, gt))
2421		return -ENOMEM;
2422
2423	ctx_hi = kernel_context(gt->i915);
2424	if (!ctx_hi)
2425		goto err_spin_lo;
2426	ctx_hi->sched.priority =
2427		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2428
2429	ctx_lo = kernel_context(gt->i915);
2430	if (!ctx_lo)
2431		goto err_ctx_hi;
2432	ctx_lo->sched.priority =
2433		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2434
2435	for_each_engine(engine, gt, id) {
2436		unsigned long saved_timeout;
2437		struct i915_request *rq;
2438
2439		if (!intel_engine_has_preemption(engine))
2440			continue;
2441
2442		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2443					    MI_NOOP); /* preemption disabled */
2444		if (IS_ERR(rq)) {
2445			err = PTR_ERR(rq);
2446			goto err_ctx_lo;
2447		}
2448
2449		i915_request_add(rq);
2450		if (!igt_wait_for_spinner(&spin_lo, rq)) {
2451			intel_gt_set_wedged(gt);
2452			err = -EIO;
2453			goto err_ctx_lo;
2454		}
2455
2456		rq = igt_request_alloc(ctx_hi, engine);
2457		if (IS_ERR(rq)) {
2458			igt_spinner_end(&spin_lo);
2459			err = PTR_ERR(rq);
2460			goto err_ctx_lo;
2461		}
2462
2463		/* Flush the previous CS ack before changing timeouts */
2464		while (READ_ONCE(engine->execlists.pending[0]))
2465			cpu_relax();
2466
2467		saved_timeout = engine->props.preempt_timeout_ms;
2468		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
2469
2470		i915_request_get(rq);
2471		i915_request_add(rq);
2472
2473		intel_engine_flush_submission(engine);
2474		engine->props.preempt_timeout_ms = saved_timeout;
2475
2476		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
2477			intel_gt_set_wedged(gt);
2478			i915_request_put(rq);
2479			err = -ETIME;
2480			goto err_ctx_lo;
2481		}
2482
2483		igt_spinner_end(&spin_lo);
2484		i915_request_put(rq);
2485	}
2486
2487	err = 0;
2488err_ctx_lo:
2489	kernel_context_close(ctx_lo);
2490err_ctx_hi:
2491	kernel_context_close(ctx_hi);
2492err_spin_lo:
2493	igt_spinner_fini(&spin_lo);
2494	return err;
2495}
2496
2497static int random_range(struct rnd_state *rnd, int min, int max)
2498{
2499	return i915_prandom_u32_max_state(max - min, rnd) + min;
2500}
2501
2502static int random_priority(struct rnd_state *rnd)
2503{
2504	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
2505}
2506
2507struct preempt_smoke {
2508	struct intel_gt *gt;
2509	struct i915_gem_context **contexts;
2510	struct intel_engine_cs *engine;
2511	struct drm_i915_gem_object *batch;
2512	unsigned int ncontext;
2513	struct rnd_state prng;
2514	unsigned long count;
2515};
2516
2517static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
2518{
2519	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
2520							  &smoke->prng)];
2521}
2522
2523static int smoke_submit(struct preempt_smoke *smoke,
2524			struct i915_gem_context *ctx, int prio,
2525			struct drm_i915_gem_object *batch)
2526{
2527	struct i915_request *rq;
2528	struct i915_vma *vma = NULL;
2529	int err = 0;
2530
2531	if (batch) {
2532		struct i915_address_space *vm;
2533
2534		vm = i915_gem_context_get_vm_rcu(ctx);
2535		vma = i915_vma_instance(batch, vm, NULL);
2536		i915_vm_put(vm);
2537		if (IS_ERR(vma))
2538			return PTR_ERR(vma);
2539
2540		err = i915_vma_pin(vma, 0, 0, PIN_USER);
2541		if (err)
2542			return err;
2543	}
2544
2545	ctx->sched.priority = prio;
2546
2547	rq = igt_request_alloc(ctx, smoke->engine);
2548	if (IS_ERR(rq)) {
2549		err = PTR_ERR(rq);
2550		goto unpin;
2551	}
2552
2553	if (vma) {
2554		i915_vma_lock(vma);
2555		err = i915_request_await_object(rq, vma->obj, false);
2556		if (!err)
2557			err = i915_vma_move_to_active(vma, rq, 0);
2558		if (!err)
2559			err = rq->engine->emit_bb_start(rq,
2560							vma->node.start,
2561							PAGE_SIZE, 0);
2562		i915_vma_unlock(vma);
2563	}
2564
2565	i915_request_add(rq);
2566
2567unpin:
2568	if (vma)
2569		i915_vma_unpin(vma);
2570
2571	return err;
2572}
2573
2574static int smoke_crescendo_thread(void *arg)
2575{
2576	struct preempt_smoke *smoke = arg;
2577	IGT_TIMEOUT(end_time);
2578	unsigned long count;
2579
2580	count = 0;
2581	do {
2582		struct i915_gem_context *ctx = smoke_context(smoke);
2583		int err;
2584
2585		err = smoke_submit(smoke,
2586				   ctx, count % I915_PRIORITY_MAX,
2587				   smoke->batch);
2588		if (err)
2589			return err;
2590
2591		count++;
2592	} while (!__igt_timeout(end_time, NULL));
2593
2594	smoke->count = count;
2595	return 0;
2596}
2597
2598static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
2599#define BATCH BIT(0)
2600{
2601	struct task_struct *tsk[I915_NUM_ENGINES] = {};
2602	struct preempt_smoke arg[I915_NUM_ENGINES];
2603	struct intel_engine_cs *engine;
2604	enum intel_engine_id id;
2605	unsigned long count;
2606	int err = 0;
2607
2608	for_each_engine(engine, smoke->gt, id) {
2609		arg[id] = *smoke;
2610		arg[id].engine = engine;
2611		if (!(flags & BATCH))
2612			arg[id].batch = NULL;
2613		arg[id].count = 0;
2614
2615		tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
2616				      "igt/smoke:%d", id);
2617		if (IS_ERR(tsk[id])) {
2618			err = PTR_ERR(tsk[id]);
2619			break;
2620		}
2621		get_task_struct(tsk[id]);
2622	}
2623
2624	yield(); /* start all threads before we kthread_stop() */
2625
2626	count = 0;
2627	for_each_engine(engine, smoke->gt, id) {
2628		int status;
2629
2630		if (IS_ERR_OR_NULL(tsk[id]))
2631			continue;
2632
2633		status = kthread_stop(tsk[id]);
2634		if (status && !err)
2635			err = status;
2636
2637		count += arg[id].count;
2638
2639		put_task_struct(tsk[id]);
2640	}
2641
2642	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
2643		count, flags,
2644		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2645	return 0;
2646}
2647
2648static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
2649{
2650	enum intel_engine_id id;
2651	IGT_TIMEOUT(end_time);
2652	unsigned long count;
2653
2654	count = 0;
2655	do {
2656		for_each_engine(smoke->engine, smoke->gt, id) {
2657			struct i915_gem_context *ctx = smoke_context(smoke);
2658			int err;
2659
2660			err = smoke_submit(smoke,
2661					   ctx, random_priority(&smoke->prng),
2662					   flags & BATCH ? smoke->batch : NULL);
2663			if (err)
2664				return err;
2665
2666			count++;
2667		}
2668	} while (!__igt_timeout(end_time, NULL));
2669
2670	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
2671		count, flags,
2672		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2673	return 0;
2674}
2675
2676static int live_preempt_smoke(void *arg)
2677{
2678	struct preempt_smoke smoke = {
2679		.gt = arg,
2680		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
2681		.ncontext = 1024,
2682	};
2683	const unsigned int phase[] = { 0, BATCH };
2684	struct igt_live_test t;
2685	int err = -ENOMEM;
2686	u32 *cs;
2687	int n;
2688
2689	if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
2690		return 0;
2691
2692	smoke.contexts = kmalloc_array(smoke.ncontext,
2693				       sizeof(*smoke.contexts),
2694				       GFP_KERNEL);
2695	if (!smoke.contexts)
2696		return -ENOMEM;
2697
2698	smoke.batch =
2699		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
2700	if (IS_ERR(smoke.batch)) {
2701		err = PTR_ERR(smoke.batch);
2702		goto err_free;
2703	}
2704
2705	cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
2706	if (IS_ERR(cs)) {
2707		err = PTR_ERR(cs);
2708		goto err_batch;
2709	}
2710	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
2711		cs[n] = MI_ARB_CHECK;
2712	cs[n] = MI_BATCH_BUFFER_END;
2713	i915_gem_object_flush_map(smoke.batch);
2714	i915_gem_object_unpin_map(smoke.batch);
2715
2716	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
2717		err = -EIO;
2718		goto err_batch;
2719	}
2720
2721	for (n = 0; n < smoke.ncontext; n++) {
2722		smoke.contexts[n] = kernel_context(smoke.gt->i915);
2723		if (!smoke.contexts[n])
2724			goto err_ctx;
2725	}
2726
2727	for (n = 0; n < ARRAY_SIZE(phase); n++) {
2728		err = smoke_crescendo(&smoke, phase[n]);
2729		if (err)
2730			goto err_ctx;
2731
2732		err = smoke_random(&smoke, phase[n]);
2733		if (err)
2734			goto err_ctx;
2735	}
2736
2737err_ctx:
2738	if (igt_live_test_end(&t))
2739		err = -EIO;
2740
2741	for (n = 0; n < smoke.ncontext; n++) {
2742		if (!smoke.contexts[n])
2743			break;
2744		kernel_context_close(smoke.contexts[n]);
2745	}
2746
2747err_batch:
2748	i915_gem_object_put(smoke.batch);
2749err_free:
2750	kfree(smoke.contexts);
2751
2752	return err;
2753}
2754
2755static int nop_virtual_engine(struct intel_gt *gt,
2756			      struct intel_engine_cs **siblings,
2757			      unsigned int nsibling,
2758			      unsigned int nctx,
2759			      unsigned int flags)
2760#define CHAIN BIT(0)
2761{
2762	IGT_TIMEOUT(end_time);
2763	struct i915_request *request[16] = {};
2764	struct intel_context *ve[16];
2765	unsigned long n, prime, nc;
2766	struct igt_live_test t;
2767	ktime_t times[2] = {};
2768	int err;
2769
2770	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
2771
2772	for (n = 0; n < nctx; n++) {
2773		ve[n] = intel_execlists_create_virtual(siblings, nsibling);
2774		if (IS_ERR(ve[n])) {
2775			err = PTR_ERR(ve[n]);
2776			nctx = n;
2777			goto out;
2778		}
2779
2780		err = intel_context_pin(ve[n]);
2781		if (err) {
2782			intel_context_put(ve[n]);
2783			nctx = n;
2784			goto out;
2785		}
2786	}
2787
2788	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
2789	if (err)
2790		goto out;
2791
2792	for_each_prime_number_from(prime, 1, 8192) {
2793		times[1] = ktime_get_raw();
2794
2795		if (flags & CHAIN) {
2796			for (nc = 0; nc < nctx; nc++) {
2797				for (n = 0; n < prime; n++) {
2798					struct i915_request *rq;
2799
2800					rq = i915_request_create(ve[nc]);
2801					if (IS_ERR(rq)) {
2802						err = PTR_ERR(rq);
2803						goto out;
2804					}
2805
2806					if (request[nc])
2807						i915_request_put(request[nc]);
2808					request[nc] = i915_request_get(rq);
2809					i915_request_add(rq);
2810				}
2811			}
2812		} else {
2813			for (n = 0; n < prime; n++) {
2814				for (nc = 0; nc < nctx; nc++) {
2815					struct i915_request *rq;
2816
2817					rq = i915_request_create(ve[nc]);
2818					if (IS_ERR(rq)) {
2819						err = PTR_ERR(rq);
2820						goto out;
2821					}
2822
2823					if (request[nc])
2824						i915_request_put(request[nc]);
2825					request[nc] = i915_request_get(rq);
2826					i915_request_add(rq);
2827				}
2828			}
2829		}
2830
2831		for (nc = 0; nc < nctx; nc++) {
2832			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
2833				pr_err("%s(%s): wait for %llx:%lld timed out\n",
2834				       __func__, ve[0]->engine->name,
2835				       request[nc]->fence.context,
2836				       request[nc]->fence.seqno);
2837
2838				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2839					  __func__, ve[0]->engine->name,
2840					  request[nc]->fence.context,
2841					  request[nc]->fence.seqno);
2842				GEM_TRACE_DUMP();
2843				intel_gt_set_wedged(gt);
2844				break;
2845			}
2846		}
2847
2848		times[1] = ktime_sub(ktime_get_raw(), times[1]);
2849		if (prime == 1)
2850			times[0] = times[1];
2851
2852		for (nc = 0; nc < nctx; nc++) {
2853			i915_request_put(request[nc]);
2854			request[nc] = NULL;
2855		}
2856
2857		if (__igt_timeout(end_time, NULL))
2858			break;
2859	}
2860
2861	err = igt_live_test_end(&t);
2862	if (err)
2863		goto out;
2864
2865	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
2866		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
2867		prime, div64_u64(ktime_to_ns(times[1]), prime));
2868
2869out:
2870	if (igt_flush_test(gt->i915))
2871		err = -EIO;
2872
2873	for (nc = 0; nc < nctx; nc++) {
2874		i915_request_put(request[nc]);
2875		intel_context_unpin(ve[nc]);
2876		intel_context_put(ve[nc]);
2877	}
2878	return err;
2879}
2880
2881static int live_virtual_engine(void *arg)
2882{
2883	struct intel_gt *gt = arg;
2884	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
2885	struct intel_engine_cs *engine;
2886	enum intel_engine_id id;
2887	unsigned int class, inst;
2888	int err;
2889
2890	if (USES_GUC_SUBMISSION(gt->i915))
2891		return 0;
2892
2893	for_each_engine(engine, gt, id) {
2894		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
2895		if (err) {
2896			pr_err("Failed to wrap engine %s: err=%d\n",
2897			       engine->name, err);
2898			return err;
2899		}
2900	}
2901
2902	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
2903		int nsibling, n;
2904
2905		nsibling = 0;
2906		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
2907			if (!gt->engine_class[class][inst])
2908				continue;
2909
2910			siblings[nsibling++] = gt->engine_class[class][inst];
2911		}
2912		if (nsibling < 2)
2913			continue;
2914
2915		for (n = 1; n <= nsibling + 1; n++) {
2916			err = nop_virtual_engine(gt, siblings, nsibling,
2917						 n, 0);
2918			if (err)
2919				return err;
2920		}
2921
2922		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
2923		if (err)
2924			return err;
2925	}
2926
2927	return 0;
2928}
2929
2930static int mask_virtual_engine(struct intel_gt *gt,
2931			       struct intel_engine_cs **siblings,
2932			       unsigned int nsibling)
2933{
2934	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
2935	struct intel_context *ve;
2936	struct igt_live_test t;
2937	unsigned int n;
2938	int err;
2939
2940	/*
2941	 * Check that by setting the execution mask on a request, we can
2942	 * restrict it to our desired engine within the virtual engine.
2943	 */
2944
2945	ve = intel_execlists_create_virtual(siblings, nsibling);
2946	if (IS_ERR(ve)) {
2947		err = PTR_ERR(ve);
2948		goto out_close;
2949	}
2950
2951	err = intel_context_pin(ve);
2952	if (err)
2953		goto out_put;
2954
2955	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
2956	if (err)
2957		goto out_unpin;
2958
2959	for (n = 0; n < nsibling; n++) {
2960		request[n] = i915_request_create(ve);
2961		if (IS_ERR(request[n])) {
2962			err = PTR_ERR(request[n]);
2963			nsibling = n;
2964			goto out;
2965		}
2966
2967		/* Reverse order as it's more likely to be unnatural */
2968		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
2969
2970		i915_request_get(request[n]);
2971		i915_request_add(request[n]);
2972	}
2973
2974	for (n = 0; n < nsibling; n++) {
2975		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
2976			pr_err("%s(%s): wait for %llx:%lld timed out\n",
2977			       __func__, ve->engine->name,
2978			       request[n]->fence.context,
2979			       request[n]->fence.seqno);
2980
2981			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2982				  __func__, ve->engine->name,
2983				  request[n]->fence.context,
2984				  request[n]->fence.seqno);
2985			GEM_TRACE_DUMP();
2986			intel_gt_set_wedged(gt);
2987			err = -EIO;
2988			goto out;
2989		}
2990
2991		if (request[n]->engine != siblings[nsibling - n - 1]) {
2992			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
2993			       request[n]->engine->name,
2994			       siblings[nsibling - n - 1]->name);
2995			err = -EINVAL;
2996			goto out;
2997		}
2998	}
2999
3000	err = igt_live_test_end(&t);
3001out:
3002	if (igt_flush_test(gt->i915))
3003		err = -EIO;
3004
3005	for (n = 0; n < nsibling; n++)
3006		i915_request_put(request[n]);
3007
3008out_unpin:
3009	intel_context_unpin(ve);
3010out_put:
3011	intel_context_put(ve);
3012out_close:
3013	return err;
3014}
3015
3016static int live_virtual_mask(void *arg)
3017{
3018	struct intel_gt *gt = arg;
3019	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3020	unsigned int class, inst;
3021	int err;
3022
3023	if (USES_GUC_SUBMISSION(gt->i915))
3024		return 0;
3025
3026	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3027		unsigned int nsibling;
3028
3029		nsibling = 0;
3030		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3031			if (!gt->engine_class[class][inst])
3032				break;
3033
3034			siblings[nsibling++] = gt->engine_class[class][inst];
3035		}
3036		if (nsibling < 2)
3037			continue;
3038
3039		err = mask_virtual_engine(gt, siblings, nsibling);
3040		if (err)
3041			return err;
3042	}
3043
3044	return 0;
3045}
3046
3047static int preserved_virtual_engine(struct intel_gt *gt,
3048				    struct intel_engine_cs **siblings,
3049				    unsigned int nsibling)
3050{
3051	struct i915_request *last = NULL;
3052	struct intel_context *ve;
3053	struct i915_vma *scratch;
3054	struct igt_live_test t;
3055	unsigned int n;
3056	int err = 0;
3057	u32 *cs;
3058
3059	scratch = create_scratch(siblings[0]->gt);
3060	if (IS_ERR(scratch))
3061		return PTR_ERR(scratch);
3062
3063	ve = intel_execlists_create_virtual(siblings, nsibling);
3064	if (IS_ERR(ve)) {
3065		err = PTR_ERR(ve);
3066		goto out_scratch;
3067	}
3068
3069	err = intel_context_pin(ve);
3070	if (err)
3071		goto out_put;
3072
3073	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3074	if (err)
3075		goto out_unpin;
3076
3077	for (n = 0; n < NUM_GPR_DW; n++) {
3078		struct intel_engine_cs *engine = siblings[n % nsibling];
3079		struct i915_request *rq;
3080
3081		rq = i915_request_create(ve);
3082		if (IS_ERR(rq)) {
3083			err = PTR_ERR(rq);
3084			goto out_end;
3085		}
3086
3087		i915_request_put(last);
3088		last = i915_request_get(rq);
3089
3090		cs = intel_ring_begin(rq, 8);
3091		if (IS_ERR(cs)) {
3092			i915_request_add(rq);
3093			err = PTR_ERR(cs);
3094			goto out_end;
3095		}
3096
3097		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3098		*cs++ = CS_GPR(engine, n);
3099		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3100		*cs++ = 0;
3101
3102		*cs++ = MI_LOAD_REGISTER_IMM(1);
3103		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3104		*cs++ = n + 1;
3105
3106		*cs++ = MI_NOOP;
3107		intel_ring_advance(rq, cs);
3108
3109		/* Restrict this request to run on a particular engine */
3110		rq->execution_mask = engine->mask;
3111		i915_request_add(rq);
3112	}
3113
3114	if (i915_request_wait(last, 0, HZ / 5) < 0) {
3115		err = -ETIME;
3116		goto out_end;
3117	}
3118
3119	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3120	if (IS_ERR(cs)) {
3121		err = PTR_ERR(cs);
3122		goto out_end;
3123	}
3124
3125	for (n = 0; n < NUM_GPR_DW; n++) {
3126		if (cs[n] != n) {
3127			pr_err("Incorrect value[%d] found for GPR[%d]\n",
3128			       cs[n], n);
3129			err = -EINVAL;
3130			break;
3131		}
3132	}
3133
3134	i915_gem_object_unpin_map(scratch->obj);
3135
3136out_end:
3137	if (igt_live_test_end(&t))
3138		err = -EIO;
3139	i915_request_put(last);
3140out_unpin:
3141	intel_context_unpin(ve);
3142out_put:
3143	intel_context_put(ve);
3144out_scratch:
3145	i915_vma_unpin_and_release(&scratch, 0);
3146	return err;
3147}
3148
3149static int live_virtual_preserved(void *arg)
3150{
3151	struct intel_gt *gt = arg;
3152	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3153	unsigned int class, inst;
3154
3155	/*
3156	 * Check that the context image retains non-privileged (user) registers
3157	 * from one engine to the next. For this we check that the CS_GPR
3158	 * are preserved.
3159	 */
3160
3161	if (USES_GUC_SUBMISSION(gt->i915))
3162		return 0;
3163
3164	/* As we use CS_GPR we cannot run before they existed on all engines. */
3165	if (INTEL_GEN(gt->i915) < 9)
3166		return 0;
3167
3168	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3169		int nsibling, err;
3170
3171		nsibling = 0;
3172		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3173			if (!gt->engine_class[class][inst])
3174				continue;
3175
3176			siblings[nsibling++] = gt->engine_class[class][inst];
3177		}
3178		if (nsibling < 2)
3179			continue;
3180
3181		err = preserved_virtual_engine(gt, siblings, nsibling);
3182		if (err)
3183			return err;
3184	}
3185
3186	return 0;
3187}
3188
3189static int bond_virtual_engine(struct intel_gt *gt,
3190			       unsigned int class,
3191			       struct intel_engine_cs **siblings,
3192			       unsigned int nsibling,
3193			       unsigned int flags)
3194#define BOND_SCHEDULE BIT(0)
3195{
3196	struct intel_engine_cs *master;
3197	struct i915_request *rq[16];
3198	enum intel_engine_id id;
3199	struct igt_spinner spin;
3200	unsigned long n;
3201	int err;
3202
3203	/*
3204	 * A set of bonded requests is intended to be run concurrently
3205	 * across a number of engines. We use one request per-engine
3206	 * and a magic fence to schedule each of the bonded requests
3207	 * at the same time. A consequence of our current scheduler is that
3208	 * we only move requests to the HW ready queue when the request
3209	 * becomes ready, that is when all of its prerequisite fences have
3210	 * been signaled. As one of those fences is the master submit fence,
3211	 * there is a delay on all secondary fences as the HW may be
3212	 * currently busy. Equally, as all the requests are independent,
3213	 * they may have other fences that delay individual request
3214	 * submission to HW. Ergo, we do not guarantee that all requests are
3215	 * immediately submitted to HW at the same time, just that if the
3216	 * rules are abided by, they are ready at the same time as the
3217	 * first is submitted. Userspace can embed semaphores in its batch
3218	 * to ensure parallel execution of its phases as it requires.
3219	 * Though naturally it gets requested that perhaps the scheduler should
3220	 * take care of parallel execution, even across preemption events on
3221	 * different HW. (The proper answer is of course "lalalala".)
3222	 *
3223	 * With the submit-fence, we have identified three possible phases
3224	 * of synchronisation depending on the master fence: queued (not
3225	 * ready), executing, and signaled. The first two are quite simple
3226	 * and checked below. However, the signaled master fence handling is
3227	 * contentious. Currently we do not distinguish between a signaled
3228	 * fence and an expired fence, as once signaled it does not convey
3229	 * any information about the previous execution. It may even be freed
3230	 * and hence checking later it may not exist at all. Ergo we currently
3231	 * do not apply the bonding constraint for an already signaled fence,
3232	 * as our expectation is that it should not constrain the secondaries
3233	 * and is outside of the scope of the bonded request API (i.e. all
3234	 * userspace requests are meant to be running in parallel). As
3235	 * it imposes no constraint, and is effectively a no-op, we do not
3236	 * check below as normal execution flows are checked extensively above.
3237	 *
3238	 * XXX Is the degenerate handling of signaled submit fences the
3239	 * expected behaviour for userpace?
3240	 */
3241
3242	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
3243
3244	if (igt_spinner_init(&spin, gt))
3245		return -ENOMEM;
3246
3247	err = 0;
3248	rq[0] = ERR_PTR(-ENOMEM);
3249	for_each_engine(master, gt, id) {
3250		struct i915_sw_fence fence = {};
3251
3252		if (master->class == class)
3253			continue;
3254
3255		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
3256
3257		rq[0] = igt_spinner_create_request(&spin,
3258						   master->kernel_context,
3259						   MI_NOOP);
3260		if (IS_ERR(rq[0])) {
3261			err = PTR_ERR(rq[0]);
3262			goto out;
3263		}
3264		i915_request_get(rq[0]);
3265
3266		if (flags & BOND_SCHEDULE) {
3267			onstack_fence_init(&fence);
3268			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
3269							       &fence,
3270							       GFP_KERNEL);
3271		}
3272
3273		i915_request_add(rq[0]);
3274		if (err < 0)
3275			goto out;
3276
3277		if (!(flags & BOND_SCHEDULE) &&
3278		    !igt_wait_for_spinner(&spin, rq[0])) {
3279			err = -EIO;
3280			goto out;
3281		}
3282
3283		for (n = 0; n < nsibling; n++) {
3284			struct intel_context *ve;
3285
3286			ve = intel_execlists_create_virtual(siblings, nsibling);
3287			if (IS_ERR(ve)) {
3288				err = PTR_ERR(ve);
3289				onstack_fence_fini(&fence);
3290				goto out;
3291			}
3292
3293			err = intel_virtual_engine_attach_bond(ve->engine,
3294							       master,
3295							       siblings[n]);
3296			if (err) {
3297				intel_context_put(ve);
3298				onstack_fence_fini(&fence);
3299				goto out;
3300			}
3301
3302			err = intel_context_pin(ve);
3303			intel_context_put(ve);
3304			if (err) {
3305				onstack_fence_fini(&fence);
3306				goto out;
3307			}
3308
3309			rq[n + 1] = i915_request_create(ve);
3310			intel_context_unpin(ve);
3311			if (IS_ERR(rq[n + 1])) {
3312				err = PTR_ERR(rq[n + 1]);
3313				onstack_fence_fini(&fence);
3314				goto out;
3315			}
3316			i915_request_get(rq[n + 1]);
3317
3318			err = i915_request_await_execution(rq[n + 1],
3319							   &rq[0]->fence,
3320							   ve->engine->bond_execute);
3321			i915_request_add(rq[n + 1]);
3322			if (err < 0) {
3323				onstack_fence_fini(&fence);
3324				goto out;
3325			}
3326		}
3327		onstack_fence_fini(&fence);
3328		intel_engine_flush_submission(master);
3329		igt_spinner_end(&spin);
3330
3331		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
3332			pr_err("Master request did not execute (on %s)!\n",
3333			       rq[0]->engine->name);
3334			err = -EIO;
3335			goto out;
3336		}
3337
3338		for (n = 0; n < nsibling; n++) {
3339			if (i915_request_wait(rq[n + 1], 0,
3340					      MAX_SCHEDULE_TIMEOUT) < 0) {
3341				err = -EIO;
3342				goto out;
3343			}
3344
3345			if (rq[n + 1]->engine != siblings[n]) {
3346				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3347				       siblings[n]->name,
3348				       rq[n + 1]->engine->name,
3349				       rq[0]->engine->name);
3350				err = -EINVAL;
3351				goto out;
3352			}
3353		}
3354
3355		for (n = 0; !IS_ERR(rq[n]); n++)
3356			i915_request_put(rq[n]);
3357		rq[0] = ERR_PTR(-ENOMEM);
3358	}
3359
3360out:
3361	for (n = 0; !IS_ERR(rq[n]); n++)
3362		i915_request_put(rq[n]);
3363	if (igt_flush_test(gt->i915))
3364		err = -EIO;
3365
3366	igt_spinner_fini(&spin);
3367	return err;
3368}
3369
3370static int live_virtual_bond(void *arg)
3371{
3372	static const struct phase {
3373		const char *name;
3374		unsigned int flags;
3375	} phases[] = {
3376		{ "", 0 },
3377		{ "schedule", BOND_SCHEDULE },
3378		{ },
3379	};
3380	struct intel_gt *gt = arg;
3381	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3382	unsigned int class, inst;
3383	int err;
3384
3385	if (USES_GUC_SUBMISSION(gt->i915))
3386		return 0;
3387
3388	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3389		const struct phase *p;
3390		int nsibling;
3391
3392		nsibling = 0;
3393		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3394			if (!gt->engine_class[class][inst])
3395				break;
3396
3397			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
3398			siblings[nsibling++] = gt->engine_class[class][inst];
3399		}
3400		if (nsibling < 2)
3401			continue;
3402
3403		for (p = phases; p->name; p++) {
3404			err = bond_virtual_engine(gt,
3405						  class, siblings, nsibling,
3406						  p->flags);
3407			if (err) {
3408				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3409				       __func__, p->name, class, nsibling, err);
3410				return err;
3411			}
3412		}
3413	}
3414
3415	return 0;
3416}
3417
3418static int reset_virtual_engine(struct intel_gt *gt,
3419				struct intel_engine_cs **siblings,
3420				unsigned int nsibling)
3421{
3422	struct intel_engine_cs *engine;
3423	struct intel_context *ve;
3424	unsigned long *heartbeat;
3425	struct igt_spinner spin;
3426	struct i915_request *rq;
3427	unsigned int n;
3428	int err = 0;
3429
3430	/*
3431	 * In order to support offline error capture for fast preempt reset,
3432	 * we need to decouple the guilty request and ensure that it and its
3433	 * descendents are not executed while the capture is in progress.
3434	 */
3435
3436	heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
3437	if (!heartbeat)
3438		return -ENOMEM;
3439
3440	if (igt_spinner_init(&spin, gt)) {
3441		err = -ENOMEM;
3442		goto out_free;
3443	}
3444
3445	ve = intel_execlists_create_virtual(siblings, nsibling);
3446	if (IS_ERR(ve)) {
3447		err = PTR_ERR(ve);
3448		goto out_spin;
3449	}
3450
3451	for (n = 0; n < nsibling; n++)
3452		engine_heartbeat_disable(siblings[n], &heartbeat[n]);
3453
3454	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
3455	if (IS_ERR(rq)) {
3456		err = PTR_ERR(rq);
3457		goto out_heartbeat;
3458	}
3459	i915_request_add(rq);
3460
3461	if (!igt_wait_for_spinner(&spin, rq)) {
3462		intel_gt_set_wedged(gt);
3463		err = -ETIME;
3464		goto out_heartbeat;
3465	}
3466
3467	engine = rq->engine;
3468	GEM_BUG_ON(engine == ve->engine);
3469
3470	/* Take ownership of the reset and tasklet */
3471	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
3472			     &gt->reset.flags)) {
3473		intel_gt_set_wedged(gt);
3474		err = -EBUSY;
3475		goto out_heartbeat;
3476	}
3477	tasklet_disable(&engine->execlists.tasklet);
3478
3479	engine->execlists.tasklet.func(engine->execlists.tasklet.data);
3480	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
3481
3482	/* Fake a preemption event; failed of course */
3483	spin_lock_irq(&engine->active.lock);
3484	__unwind_incomplete_requests(engine);
3485	spin_unlock_irq(&engine->active.lock);
3486	GEM_BUG_ON(rq->engine != ve->engine);
3487
3488	/* Reset the engine while keeping our active request on hold */
3489	execlists_hold(engine, rq);
3490	GEM_BUG_ON(!i915_request_on_hold(rq));
3491
3492	intel_engine_reset(engine, NULL);
3493	GEM_BUG_ON(rq->fence.error != -EIO);
3494
3495	/* Release our grasp on the engine, letting CS flow again */
3496	tasklet_enable(&engine->execlists.tasklet);
3497	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
3498
3499	/* Check that we do not resubmit the held request */
3500	i915_request_get(rq);
3501	if (!i915_request_wait(rq, 0, HZ / 5)) {
3502		pr_err("%s: on hold request completed!\n",
3503		       engine->name);
3504		intel_gt_set_wedged(gt);
3505		err = -EIO;
3506		goto out_rq;
3507	}
3508	GEM_BUG_ON(!i915_request_on_hold(rq));
3509
3510	/* But is resubmitted on release */
3511	execlists_unhold(engine, rq);
3512	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3513		pr_err("%s: held request did not complete!\n",
3514		       engine->name);
3515		intel_gt_set_wedged(gt);
3516		err = -ETIME;
3517	}
3518
3519out_rq:
3520	i915_request_put(rq);
3521out_heartbeat:
3522	for (n = 0; n < nsibling; n++)
3523		engine_heartbeat_enable(siblings[n], heartbeat[n]);
3524
3525	intel_context_put(ve);
3526out_spin:
3527	igt_spinner_fini(&spin);
3528out_free:
3529	kfree(heartbeat);
3530	return err;
3531}
3532
3533static int live_virtual_reset(void *arg)
3534{
3535	struct intel_gt *gt = arg;
3536	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3537	unsigned int class, inst;
3538
3539	/*
3540	 * Check that we handle a reset event within a virtual engine.
3541	 * Only the physical engine is reset, but we have to check the flow
3542	 * of the virtual requests around the reset, and make sure it is not
3543	 * forgotten.
3544	 */
3545
3546	if (USES_GUC_SUBMISSION(gt->i915))
3547		return 0;
3548
3549	if (!intel_has_reset_engine(gt))
3550		return 0;
3551
3552	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3553		int nsibling, err;
3554
3555		nsibling = 0;
3556		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3557			if (!gt->engine_class[class][inst])
3558				continue;
3559
3560			siblings[nsibling++] = gt->engine_class[class][inst];
3561		}
3562		if (nsibling < 2)
3563			continue;
3564
3565		err = reset_virtual_engine(gt, siblings, nsibling);
3566		if (err)
3567			return err;
3568	}
3569
3570	return 0;
3571}
3572
3573int intel_execlists_live_selftests(struct drm_i915_private *i915)
3574{
3575	static const struct i915_subtest tests[] = {
3576		SUBTEST(live_sanitycheck),
3577		SUBTEST(live_unlite_switch),
3578		SUBTEST(live_unlite_preempt),
3579		SUBTEST(live_hold_reset),
3580		SUBTEST(live_timeslice_preempt),
3581		SUBTEST(live_timeslice_queue),
3582		SUBTEST(live_busywait_preempt),
3583		SUBTEST(live_preempt),
3584		SUBTEST(live_late_preempt),
3585		SUBTEST(live_nopreempt),
3586		SUBTEST(live_preempt_cancel),
3587		SUBTEST(live_suppress_self_preempt),
3588		SUBTEST(live_suppress_wait_preempt),
3589		SUBTEST(live_chain_preempt),
3590		SUBTEST(live_preempt_gang),
3591		SUBTEST(live_preempt_hang),
3592		SUBTEST(live_preempt_timeout),
3593		SUBTEST(live_preempt_smoke),
3594		SUBTEST(live_virtual_engine),
3595		SUBTEST(live_virtual_mask),
3596		SUBTEST(live_virtual_preserved),
3597		SUBTEST(live_virtual_bond),
3598		SUBTEST(live_virtual_reset),
3599	};
3600
3601	if (!HAS_EXECLISTS(i915))
3602		return 0;
3603
3604	if (intel_gt_is_wedged(&i915->gt))
3605		return 0;
3606
3607	return intel_gt_live_subtests(tests, &i915->gt);
3608}
3609
3610static void hexdump(const void *buf, size_t len)
3611{
3612	const size_t rowsize = 8 * sizeof(u32);
3613	const void *prev = NULL;
3614	bool skip = false;
3615	size_t pos;
3616
3617	for (pos = 0; pos < len; pos += rowsize) {
3618		char line[128];
3619
3620		if (prev && !memcmp(prev, buf + pos, rowsize)) {
3621			if (!skip) {
3622				pr_info("*\n");
3623				skip = true;
3624			}
3625			continue;
3626		}
3627
3628		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
3629						rowsize, sizeof(u32),
3630						line, sizeof(line),
3631						false) >= sizeof(line));
3632		pr_info("[%04zx] %s\n", pos, line);
3633
3634		prev = buf + pos;
3635		skip = false;
3636	}
3637}
3638
3639static int live_lrc_layout(void *arg)
3640{
3641	struct intel_gt *gt = arg;
3642	struct intel_engine_cs *engine;
3643	enum intel_engine_id id;
3644	u32 *lrc;
3645	int err;
3646
3647	/*
3648	 * Check the registers offsets we use to create the initial reg state
3649	 * match the layout saved by HW.
3650	 */
3651
3652	lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
3653	if (!lrc)
3654		return -ENOMEM;
3655
3656	err = 0;
3657	for_each_engine(engine, gt, id) {
3658		u32 *hw;
3659		int dw;
3660
3661		if (!engine->default_state)
3662			continue;
3663
3664		hw = i915_gem_object_pin_map(engine->default_state,
3665					     I915_MAP_WB);
3666		if (IS_ERR(hw)) {
3667			err = PTR_ERR(hw);
3668			break;
3669		}
3670		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3671
3672		execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
3673					 engine->kernel_context,
3674					 engine,
3675					 engine->kernel_context->ring,
3676					 true);
3677
3678		dw = 0;
3679		do {
3680			u32 lri = hw[dw];
3681
3682			if (lri == 0) {
3683				dw++;
3684				continue;
3685			}
3686
3687			if (lrc[dw] == 0) {
3688				pr_debug("%s: skipped instruction %x at dword %d\n",
3689					 engine->name, lri, dw);
3690				dw++;
3691				continue;
3692			}
3693
3694			if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
3695				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
3696				       engine->name, dw, lri);
3697				err = -EINVAL;
3698				break;
3699			}
3700
3701			if (lrc[dw] != lri) {
3702				pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
3703				       engine->name, dw, lri, lrc[dw]);
3704				err = -EINVAL;
3705				break;
3706			}
3707
3708			lri &= 0x7f;
3709			lri++;
3710			dw++;
3711
3712			while (lri) {
3713				if (hw[dw] != lrc[dw]) {
3714					pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
3715					       engine->name, dw, hw[dw], lrc[dw]);
3716					err = -EINVAL;
3717					break;
3718				}
3719
3720				/*
3721				 * Skip over the actual register value as we
3722				 * expect that to differ.
3723				 */
3724				dw += 2;
3725				lri -= 2;
3726			}
3727		} while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
3728
3729		if (err) {
3730			pr_info("%s: HW register image:\n", engine->name);
3731			hexdump(hw, PAGE_SIZE);
3732
3733			pr_info("%s: SW register image:\n", engine->name);
3734			hexdump(lrc, PAGE_SIZE);
3735		}
3736
3737		i915_gem_object_unpin_map(engine->default_state);
3738		if (err)
3739			break;
3740	}
3741
3742	kfree(lrc);
3743	return err;
3744}
3745
3746static int find_offset(const u32 *lri, u32 offset)
3747{
3748	int i;
3749
3750	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
3751		if (lri[i] == offset)
3752			return i;
3753
3754	return -1;
3755}
3756
3757static int live_lrc_fixed(void *arg)
3758{
3759	struct intel_gt *gt = arg;
3760	struct intel_engine_cs *engine;
3761	enum intel_engine_id id;
3762	int err = 0;
3763
3764	/*
3765	 * Check the assumed register offsets match the actual locations in
3766	 * the context image.
3767	 */
3768
3769	for_each_engine(engine, gt, id) {
3770		const struct {
3771			u32 reg;
3772			u32 offset;
3773			const char *name;
3774		} tbl[] = {
3775			{
3776				i915_mmio_reg_offset(RING_START(engine->mmio_base)),
3777				CTX_RING_START - 1,
3778				"RING_START"
3779			},
3780			{
3781				i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
3782				CTX_RING_CTL - 1,
3783				"RING_CTL"
3784			},
3785			{
3786				i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
3787				CTX_RING_HEAD - 1,
3788				"RING_HEAD"
3789			},
3790			{
3791				i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
3792				CTX_RING_TAIL - 1,
3793				"RING_TAIL"
3794			},
3795			{
3796				i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
3797				lrc_ring_mi_mode(engine),
3798				"RING_MI_MODE"
3799			},
3800			{
3801				i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
3802				CTX_BB_STATE - 1,
3803				"BB_STATE"
3804			},
3805			{ },
3806		}, *t;
3807		u32 *hw;
3808
3809		if (!engine->default_state)
3810			continue;
3811
3812		hw = i915_gem_object_pin_map(engine->default_state,
3813					     I915_MAP_WB);
3814		if (IS_ERR(hw)) {
3815			err = PTR_ERR(hw);
3816			break;
3817		}
3818		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3819
3820		for (t = tbl; t->name; t++) {
3821			int dw = find_offset(hw, t->reg);
3822
3823			if (dw != t->offset) {
3824				pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
3825				       engine->name,
3826				       t->name,
3827				       t->reg,
3828				       dw,
3829				       t->offset);
3830				err = -EINVAL;
3831			}
3832		}
3833
3834		i915_gem_object_unpin_map(engine->default_state);
3835	}
3836
3837	return err;
3838}
3839
3840static int __live_lrc_state(struct intel_engine_cs *engine,
3841			    struct i915_vma *scratch)
3842{
3843	struct intel_context *ce;
3844	struct i915_request *rq;
3845	enum {
3846		RING_START_IDX = 0,
3847		RING_TAIL_IDX,
3848		MAX_IDX
3849	};
3850	u32 expected[MAX_IDX];
3851	u32 *cs;
3852	int err;
3853	int n;
3854
3855	ce = intel_context_create(engine);
3856	if (IS_ERR(ce))
3857		return PTR_ERR(ce);
3858
3859	err = intel_context_pin(ce);
3860	if (err)
3861		goto err_put;
3862
3863	rq = i915_request_create(ce);
3864	if (IS_ERR(rq)) {
3865		err = PTR_ERR(rq);
3866		goto err_unpin;
3867	}
3868
3869	cs = intel_ring_begin(rq, 4 * MAX_IDX);
3870	if (IS_ERR(cs)) {
3871		err = PTR_ERR(cs);
3872		i915_request_add(rq);
3873		goto err_unpin;
3874	}
3875
3876	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3877	*cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
3878	*cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
3879	*cs++ = 0;
3880
3881	expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
3882
3883	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3884	*cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
3885	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
3886	*cs++ = 0;
3887
3888	i915_request_get(rq);
3889	i915_request_add(rq);
3890
3891	intel_engine_flush_submission(engine);
3892	expected[RING_TAIL_IDX] = ce->ring->tail;
3893
3894	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3895		err = -ETIME;
3896		goto err_rq;
3897	}
3898
3899	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3900	if (IS_ERR(cs)) {
3901		err = PTR_ERR(cs);
3902		goto err_rq;
3903	}
3904
3905	for (n = 0; n < MAX_IDX; n++) {
3906		if (cs[n] != expected[n]) {
3907			pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
3908			       engine->name, n, cs[n], expected[n]);
3909			err = -EINVAL;
3910			break;
3911		}
3912	}
3913
3914	i915_gem_object_unpin_map(scratch->obj);
3915
3916err_rq:
3917	i915_request_put(rq);
3918err_unpin:
3919	intel_context_unpin(ce);
3920err_put:
3921	intel_context_put(ce);
3922	return err;
3923}
3924
3925static int live_lrc_state(void *arg)
3926{
3927	struct intel_gt *gt = arg;
3928	struct intel_engine_cs *engine;
3929	struct i915_vma *scratch;
3930	enum intel_engine_id id;
3931	int err = 0;
3932
3933	/*
3934	 * Check the live register state matches what we expect for this
3935	 * intel_context.
3936	 */
3937
3938	scratch = create_scratch(gt);
3939	if (IS_ERR(scratch))
3940		return PTR_ERR(scratch);
3941
3942	for_each_engine(engine, gt, id) {
3943		err = __live_lrc_state(engine, scratch);
3944		if (err)
3945			break;
3946	}
3947
3948	if (igt_flush_test(gt->i915))
3949		err = -EIO;
3950
3951	i915_vma_unpin_and_release(&scratch, 0);
3952	return err;
3953}
3954
3955static int gpr_make_dirty(struct intel_engine_cs *engine)
3956{
3957	struct i915_request *rq;
3958	u32 *cs;
3959	int n;
3960
3961	rq = intel_engine_create_kernel_request(engine);
3962	if (IS_ERR(rq))
3963		return PTR_ERR(rq);
3964
3965	cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
3966	if (IS_ERR(cs)) {
3967		i915_request_add(rq);
3968		return PTR_ERR(cs);
3969	}
3970
3971	*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
3972	for (n = 0; n < NUM_GPR_DW; n++) {
3973		*cs++ = CS_GPR(engine, n);
3974		*cs++ = STACK_MAGIC;
3975	}
3976	*cs++ = MI_NOOP;
3977
3978	intel_ring_advance(rq, cs);
3979	i915_request_add(rq);
3980
3981	return 0;
3982}
3983
3984static int __live_gpr_clear(struct intel_engine_cs *engine,
3985			    struct i915_vma *scratch)
3986{
3987	struct intel_context *ce;
3988	struct i915_request *rq;
3989	u32 *cs;
3990	int err;
3991	int n;
3992
3993	if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
3994		return 0; /* GPR only on rcs0 for gen8 */
3995
3996	err = gpr_make_dirty(engine);
3997	if (err)
3998		return err;
3999
4000	ce = intel_context_create(engine);
4001	if (IS_ERR(ce))
4002		return PTR_ERR(ce);
4003
4004	rq = intel_context_create_request(ce);
4005	if (IS_ERR(rq)) {
4006		err = PTR_ERR(rq);
4007		goto err_put;
4008	}
4009
4010	cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
4011	if (IS_ERR(cs)) {
4012		err = PTR_ERR(cs);
4013		i915_request_add(rq);
4014		goto err_put;
4015	}
4016
4017	for (n = 0; n < NUM_GPR_DW; n++) {
4018		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4019		*cs++ = CS_GPR(engine, n);
4020		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4021		*cs++ = 0;
4022	}
4023
4024	i915_request_get(rq);
4025	i915_request_add(rq);
4026
4027	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4028		err = -ETIME;
4029		goto err_rq;
4030	}
4031
4032	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4033	if (IS_ERR(cs)) {
4034		err = PTR_ERR(cs);
4035		goto err_rq;
4036	}
4037
4038	for (n = 0; n < NUM_GPR_DW; n++) {
4039		if (cs[n]) {
4040			pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4041			       engine->name,
4042			       n / 2, n & 1 ? "udw" : "ldw",
4043			       cs[n]);
4044			err = -EINVAL;
4045			break;
4046		}
4047	}
4048
4049	i915_gem_object_unpin_map(scratch->obj);
4050
4051err_rq:
4052	i915_request_put(rq);
4053err_put:
4054	intel_context_put(ce);
4055	return err;
4056}
4057
4058static int live_gpr_clear(void *arg)
4059{
4060	struct intel_gt *gt = arg;
4061	struct intel_engine_cs *engine;
4062	struct i915_vma *scratch;
4063	enum intel_engine_id id;
4064	int err = 0;
4065
4066	/*
4067	 * Check that GPR registers are cleared in new contexts as we need
4068	 * to avoid leaking any information from previous contexts.
4069	 */
4070
4071	scratch = create_scratch(gt);
4072	if (IS_ERR(scratch))
4073		return PTR_ERR(scratch);
4074
4075	for_each_engine(engine, gt, id) {
4076		err = __live_gpr_clear(engine, scratch);
4077		if (err)
4078			break;
4079	}
4080
4081	if (igt_flush_test(gt->i915))
4082		err = -EIO;
4083
4084	i915_vma_unpin_and_release(&scratch, 0);
4085	return err;
4086}
4087
4088int intel_lrc_live_selftests(struct drm_i915_private *i915)
4089{
4090	static const struct i915_subtest tests[] = {
4091		SUBTEST(live_lrc_layout),
4092		SUBTEST(live_lrc_fixed),
4093		SUBTEST(live_lrc_state),
4094		SUBTEST(live_gpr_clear),
4095	};
4096
4097	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
4098		return 0;
4099
4100	return intel_gt_live_subtests(tests, &i915->gt);
4101}
4102