1/*	$NetBSD: i915_gem_context.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $	*/
2
3/*
4 * SPDX-License-Identifier: MIT
5 *
6 * Copyright �� 2017 Intel Corporation
7 */
8
9#include <sys/cdefs.h>
10__KERNEL_RCSID(0, "$NetBSD: i915_gem_context.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $");
11
12#include <linux/prime_numbers.h>
13
14#include "gem/i915_gem_pm.h"
15#include "gt/intel_engine_pm.h"
16#include "gt/intel_gt.h"
17#include "gt/intel_gt_requests.h"
18#include "gt/intel_reset.h"
19#include "i915_selftest.h"
20
21#include "gem/selftests/igt_gem_utils.h"
22#include "selftests/i915_random.h"
23#include "selftests/igt_flush_test.h"
24#include "selftests/igt_live_test.h"
25#include "selftests/igt_reset.h"
26#include "selftests/igt_spinner.h"
27#include "selftests/mock_drm.h"
28#include "selftests/mock_gem_device.h"
29
30#include "huge_gem_object.h"
31#include "igt_gem_utils.h"
32
33#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
34
35static inline struct i915_address_space *ctx_vm(struct i915_gem_context *ctx)
36{
37	/* single threaded, private ctx */
38	return rcu_dereference_protected(ctx->vm, true);
39}
40
41static int live_nop_switch(void *arg)
42{
43	const unsigned int nctx = 1024;
44	struct drm_i915_private *i915 = arg;
45	struct intel_engine_cs *engine;
46	struct i915_gem_context **ctx;
47	struct igt_live_test t;
48	struct file *file;
49	unsigned long n;
50	int err = -ENODEV;
51
52	/*
53	 * Create as many contexts as we can feasibly get away with
54	 * and check we can switch between them rapidly.
55	 *
56	 * Serves as very simple stress test for submission and HW switching
57	 * between contexts.
58	 */
59
60	if (!DRIVER_CAPS(i915)->has_logical_contexts)
61		return 0;
62
63	file = mock_file(i915);
64	if (IS_ERR(file))
65		return PTR_ERR(file);
66
67	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
68	if (!ctx) {
69		err = -ENOMEM;
70		goto out_file;
71	}
72
73	for (n = 0; n < nctx; n++) {
74		ctx[n] = live_context(i915, file);
75		if (IS_ERR(ctx[n])) {
76			err = PTR_ERR(ctx[n]);
77			goto out_file;
78		}
79	}
80
81	for_each_uabi_engine(engine, i915) {
82		struct i915_request *rq = NULL;
83		unsigned long end_time, prime;
84		ktime_t times[2] = {};
85
86		times[0] = ktime_get_raw();
87		for (n = 0; n < nctx; n++) {
88			struct i915_request *this;
89
90			this = igt_request_alloc(ctx[n], engine);
91			if (IS_ERR(this)) {
92				err = PTR_ERR(this);
93				goto out_file;
94			}
95			if (rq) {
96				i915_request_await_dma_fence(this, &rq->fence);
97				i915_request_put(rq);
98			}
99			rq = i915_request_get(this);
100			i915_request_add(this);
101		}
102		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
103			pr_err("Failed to populated %d contexts\n", nctx);
104			intel_gt_set_wedged(&i915->gt);
105			i915_request_put(rq);
106			err = -EIO;
107			goto out_file;
108		}
109		i915_request_put(rq);
110
111		times[1] = ktime_get_raw();
112
113		pr_info("Populated %d contexts on %s in %lluns\n",
114			nctx, engine->name, ktime_to_ns(times[1] - times[0]));
115
116		err = igt_live_test_begin(&t, i915, __func__, engine->name);
117		if (err)
118			goto out_file;
119
120		end_time = jiffies + i915_selftest.timeout_jiffies;
121		for_each_prime_number_from(prime, 2, 8192) {
122			times[1] = ktime_get_raw();
123
124			rq = NULL;
125			for (n = 0; n < prime; n++) {
126				struct i915_request *this;
127
128				this = igt_request_alloc(ctx[n % nctx], engine);
129				if (IS_ERR(this)) {
130					err = PTR_ERR(this);
131					goto out_file;
132				}
133
134				if (rq) { /* Force submission order */
135					i915_request_await_dma_fence(this, &rq->fence);
136					i915_request_put(rq);
137				}
138
139				/*
140				 * This space is left intentionally blank.
141				 *
142				 * We do not actually want to perform any
143				 * action with this request, we just want
144				 * to measure the latency in allocation
145				 * and submission of our breadcrumbs -
146				 * ensuring that the bare request is sufficient
147				 * for the system to work (i.e. proper HEAD
148				 * tracking of the rings, interrupt handling,
149				 * etc). It also gives us the lowest bounds
150				 * for latency.
151				 */
152
153				rq = i915_request_get(this);
154				i915_request_add(this);
155			}
156			GEM_BUG_ON(!rq);
157			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
158				pr_err("Switching between %ld contexts timed out\n",
159				       prime);
160				intel_gt_set_wedged(&i915->gt);
161				i915_request_put(rq);
162				break;
163			}
164			i915_request_put(rq);
165
166			times[1] = ktime_sub(ktime_get_raw(), times[1]);
167			if (prime == 2)
168				times[0] = times[1];
169
170			if (__igt_timeout(end_time, NULL))
171				break;
172		}
173
174		err = igt_live_test_end(&t);
175		if (err)
176			goto out_file;
177
178		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
179			engine->name,
180			ktime_to_ns(times[0]),
181			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
182	}
183
184out_file:
185	fput(file);
186	return err;
187}
188
189struct parallel_switch {
190	struct task_struct *tsk;
191	struct intel_context *ce[2];
192};
193
194static int __live_parallel_switch1(void *data)
195{
196	struct parallel_switch *arg = data;
197	IGT_TIMEOUT(end_time);
198	unsigned long count;
199
200	count = 0;
201	do {
202		struct i915_request *rq = NULL;
203		int err, n;
204
205		err = 0;
206		for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
207			struct i915_request *prev = rq;
208
209			rq = i915_request_create(arg->ce[n]);
210			if (IS_ERR(rq)) {
211				i915_request_put(prev);
212				return PTR_ERR(rq);
213			}
214
215			i915_request_get(rq);
216			if (prev) {
217				err = i915_request_await_dma_fence(rq, &prev->fence);
218				i915_request_put(prev);
219			}
220
221			i915_request_add(rq);
222		}
223		if (i915_request_wait(rq, 0, HZ / 5) < 0)
224			err = -ETIME;
225		i915_request_put(rq);
226		if (err)
227			return err;
228
229		count++;
230	} while (!__igt_timeout(end_time, NULL));
231
232	pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
233	return 0;
234}
235
236static int __live_parallel_switchN(void *data)
237{
238	struct parallel_switch *arg = data;
239	struct i915_request *rq = NULL;
240	IGT_TIMEOUT(end_time);
241	unsigned long count;
242	int n;
243
244	count = 0;
245	do {
246		for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
247			struct i915_request *prev = rq;
248			int err = 0;
249
250			rq = i915_request_create(arg->ce[n]);
251			if (IS_ERR(rq)) {
252				i915_request_put(prev);
253				return PTR_ERR(rq);
254			}
255
256			i915_request_get(rq);
257			if (prev) {
258				err = i915_request_await_dma_fence(rq, &prev->fence);
259				i915_request_put(prev);
260			}
261
262			i915_request_add(rq);
263			if (err) {
264				i915_request_put(rq);
265				return err;
266			}
267		}
268
269		count++;
270	} while (!__igt_timeout(end_time, NULL));
271	i915_request_put(rq);
272
273	pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
274	return 0;
275}
276
277static int live_parallel_switch(void *arg)
278{
279	struct drm_i915_private *i915 = arg;
280	static int (* const func[])(void *arg) = {
281		__live_parallel_switch1,
282		__live_parallel_switchN,
283		NULL,
284	};
285	struct parallel_switch *data = NULL;
286	struct i915_gem_engines *engines;
287	struct i915_gem_engines_iter it;
288	int (* const *fn)(void *arg);
289	struct i915_gem_context *ctx;
290	struct intel_context *ce;
291	struct file *file;
292	int n, m, count;
293	int err = 0;
294
295	/*
296	 * Check we can process switches on all engines simultaneously.
297	 */
298
299	if (!DRIVER_CAPS(i915)->has_logical_contexts)
300		return 0;
301
302	file = mock_file(i915);
303	if (IS_ERR(file))
304		return PTR_ERR(file);
305
306	ctx = live_context(i915, file);
307	if (IS_ERR(ctx)) {
308		err = PTR_ERR(ctx);
309		goto out_file;
310	}
311
312	engines = i915_gem_context_lock_engines(ctx);
313	count = engines->num_engines;
314
315	data = kcalloc(count, sizeof(*data), GFP_KERNEL);
316	if (!data) {
317		i915_gem_context_unlock_engines(ctx);
318		err = -ENOMEM;
319		goto out_file;
320	}
321
322	m = 0; /* Use the first context as our template for the engines */
323	for_each_gem_engine(ce, engines, it) {
324		err = intel_context_pin(ce);
325		if (err) {
326			i915_gem_context_unlock_engines(ctx);
327			goto out;
328		}
329		data[m++].ce[0] = intel_context_get(ce);
330	}
331	i915_gem_context_unlock_engines(ctx);
332
333	/* Clone the same set of engines into the other contexts */
334	for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
335		ctx = live_context(i915, file);
336		if (IS_ERR(ctx)) {
337			err = PTR_ERR(ctx);
338			goto out;
339		}
340
341		for (m = 0; m < count; m++) {
342			if (!data[m].ce[0])
343				continue;
344
345			ce = intel_context_create(data[m].ce[0]->engine);
346			if (IS_ERR(ce))
347				goto out;
348
349			err = intel_context_pin(ce);
350			if (err) {
351				intel_context_put(ce);
352				goto out;
353			}
354
355			data[m].ce[n] = ce;
356		}
357	}
358
359	for (fn = func; !err && *fn; fn++) {
360		struct igt_live_test t;
361		int n;
362
363		err = igt_live_test_begin(&t, i915, __func__, "");
364		if (err)
365			break;
366
367		for (n = 0; n < count; n++) {
368			if (!data[n].ce[0])
369				continue;
370
371			data[n].tsk = kthread_run(*fn, &data[n],
372						  "igt/parallel:%s",
373						  data[n].ce[0]->engine->name);
374			if (IS_ERR(data[n].tsk)) {
375				err = PTR_ERR(data[n].tsk);
376				break;
377			}
378			get_task_struct(data[n].tsk);
379		}
380
381		yield(); /* start all threads before we kthread_stop() */
382
383		for (n = 0; n < count; n++) {
384			int status;
385
386			if (IS_ERR_OR_NULL(data[n].tsk))
387				continue;
388
389			status = kthread_stop(data[n].tsk);
390			if (status && !err)
391				err = status;
392
393			put_task_struct(data[n].tsk);
394			data[n].tsk = NULL;
395		}
396
397		if (igt_live_test_end(&t))
398			err = -EIO;
399	}
400
401out:
402	for (n = 0; n < count; n++) {
403		for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
404			if (!data[n].ce[m])
405				continue;
406
407			intel_context_unpin(data[n].ce[m]);
408			intel_context_put(data[n].ce[m]);
409		}
410	}
411	kfree(data);
412out_file:
413	fput(file);
414	return err;
415}
416
417static unsigned long real_page_count(struct drm_i915_gem_object *obj)
418{
419	return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
420}
421
422static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
423{
424	return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
425}
426
427static int gpu_fill(struct intel_context *ce,
428		    struct drm_i915_gem_object *obj,
429		    unsigned int dw)
430{
431	struct i915_vma *vma;
432	int err;
433
434	GEM_BUG_ON(obj->base.size > ce->vm->total);
435	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
436
437	vma = i915_vma_instance(obj, ce->vm, NULL);
438	if (IS_ERR(vma))
439		return PTR_ERR(vma);
440
441	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
442	if (err)
443		return err;
444
445	/*
446	 * Within the GTT the huge objects maps every page onto
447	 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
448	 * We set the nth dword within the page using the nth
449	 * mapping via the GTT - this should exercise the GTT mapping
450	 * whilst checking that each context provides a unique view
451	 * into the object.
452	 */
453	err = igt_gpu_fill_dw(ce, vma,
454			      (dw * real_page_count(obj)) << PAGE_SHIFT |
455			      (dw * sizeof(u32)),
456			      real_page_count(obj),
457			      dw);
458	i915_vma_unpin(vma);
459
460	return err;
461}
462
463static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
464{
465	const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
466	unsigned int n, m, need_flush;
467	int err;
468
469	err = i915_gem_object_prepare_write(obj, &need_flush);
470	if (err)
471		return err;
472
473	for (n = 0; n < real_page_count(obj); n++) {
474		u32 *map;
475
476		map = kmap_atomic(i915_gem_object_get_page(obj, n));
477		for (m = 0; m < DW_PER_PAGE; m++)
478			map[m] = value;
479		if (!has_llc)
480			drm_clflush_virt_range(map, PAGE_SIZE);
481		kunmap_atomic(map);
482	}
483
484	i915_gem_object_finish_access(obj);
485	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
486	obj->write_domain = 0;
487	return 0;
488}
489
490static noinline int cpu_check(struct drm_i915_gem_object *obj,
491			      unsigned int idx, unsigned int max)
492{
493	unsigned int n, m, needs_flush;
494	int err;
495
496	err = i915_gem_object_prepare_read(obj, &needs_flush);
497	if (err)
498		return err;
499
500	for (n = 0; n < real_page_count(obj); n++) {
501		u32 *map;
502
503		map = kmap_atomic(i915_gem_object_get_page(obj, n));
504		if (needs_flush & CLFLUSH_BEFORE)
505			drm_clflush_virt_range(map, PAGE_SIZE);
506
507		for (m = 0; m < max; m++) {
508			if (map[m] != m) {
509				pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
510				       __builtin_return_address(0), idx,
511				       n, real_page_count(obj), m, max,
512				       map[m], m);
513				err = -EINVAL;
514				goto out_unmap;
515			}
516		}
517
518		for (; m < DW_PER_PAGE; m++) {
519			if (map[m] != STACK_MAGIC) {
520				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
521				       __builtin_return_address(0), idx, n, m,
522				       map[m], STACK_MAGIC);
523				err = -EINVAL;
524				goto out_unmap;
525			}
526		}
527
528out_unmap:
529		kunmap_atomic(map);
530		if (err)
531			break;
532	}
533
534	i915_gem_object_finish_access(obj);
535	return err;
536}
537
538static int file_add_object(struct file *file, struct drm_i915_gem_object *obj)
539{
540	int err;
541
542	GEM_BUG_ON(obj->base.handle_count);
543
544	/* tie the object to the drm_file for easy reaping */
545	err = idr_alloc(&to_drm_file(file)->object_idr,
546			&obj->base, 1, 0, GFP_KERNEL);
547	if (err < 0)
548		return err;
549
550	i915_gem_object_get(obj);
551	obj->base.handle_count++;
552	return 0;
553}
554
555static struct drm_i915_gem_object *
556create_test_object(struct i915_address_space *vm,
557		   struct file *file,
558		   struct list_head *objects)
559{
560	struct drm_i915_gem_object *obj;
561	u64 size;
562	int err;
563
564	/* Keep in GEM's good graces */
565	intel_gt_retire_requests(vm->gt);
566
567	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
568	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
569
570	obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
571	if (IS_ERR(obj))
572		return obj;
573
574	err = file_add_object(file, obj);
575	i915_gem_object_put(obj);
576	if (err)
577		return ERR_PTR(err);
578
579	err = cpu_fill(obj, STACK_MAGIC);
580	if (err) {
581		pr_err("Failed to fill object with cpu, err=%d\n",
582		       err);
583		return ERR_PTR(err);
584	}
585
586	list_add_tail(&obj->st_link, objects);
587	return obj;
588}
589
590static unsigned long max_dwords(struct drm_i915_gem_object *obj)
591{
592	unsigned long npages = fake_page_count(obj);
593
594	GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
595	return npages / DW_PER_PAGE;
596}
597
598static void throttle_release(struct i915_request **q, int count)
599{
600	int i;
601
602	for (i = 0; i < count; i++) {
603		if (IS_ERR_OR_NULL(q[i]))
604			continue;
605
606		i915_request_put(fetch_and_zero(&q[i]));
607	}
608}
609
610static int throttle(struct intel_context *ce,
611		    struct i915_request **q, int count)
612{
613	int i;
614
615	if (!IS_ERR_OR_NULL(q[0])) {
616		if (i915_request_wait(q[0],
617				      I915_WAIT_INTERRUPTIBLE,
618				      MAX_SCHEDULE_TIMEOUT) < 0)
619			return -EINTR;
620
621		i915_request_put(q[0]);
622	}
623
624	for (i = 0; i < count - 1; i++)
625		q[i] = q[i + 1];
626
627	q[i] = intel_context_create_request(ce);
628	if (IS_ERR(q[i]))
629		return PTR_ERR(q[i]);
630
631	i915_request_get(q[i]);
632	i915_request_add(q[i]);
633
634	return 0;
635}
636
637static int igt_ctx_exec(void *arg)
638{
639	struct drm_i915_private *i915 = arg;
640	struct intel_engine_cs *engine;
641	int err = -ENODEV;
642
643	/*
644	 * Create a few different contexts (with different mm) and write
645	 * through each ctx/mm using the GPU making sure those writes end
646	 * up in the expected pages of our obj.
647	 */
648
649	if (!DRIVER_CAPS(i915)->has_logical_contexts)
650		return 0;
651
652	for_each_uabi_engine(engine, i915) {
653		struct drm_i915_gem_object *obj = NULL;
654		unsigned long ncontexts, ndwords, dw;
655		struct i915_request *tq[5] = {};
656		struct igt_live_test t;
657		IGT_TIMEOUT(end_time);
658		LIST_HEAD(objects);
659		struct file *file;
660
661		if (!intel_engine_can_store_dword(engine))
662			continue;
663
664		if (!engine->context_size)
665			continue; /* No logical context support in HW */
666
667		file = mock_file(i915);
668		if (IS_ERR(file))
669			return PTR_ERR(file);
670
671		err = igt_live_test_begin(&t, i915, __func__, engine->name);
672		if (err)
673			goto out_file;
674
675		ncontexts = 0;
676		ndwords = 0;
677		dw = 0;
678		while (!time_after(jiffies, end_time)) {
679			struct i915_gem_context *ctx;
680			struct intel_context *ce;
681
682			ctx = kernel_context(i915);
683			if (IS_ERR(ctx)) {
684				err = PTR_ERR(ctx);
685				goto out_file;
686			}
687
688			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
689			GEM_BUG_ON(IS_ERR(ce));
690
691			if (!obj) {
692				obj = create_test_object(ce->vm, file, &objects);
693				if (IS_ERR(obj)) {
694					err = PTR_ERR(obj);
695					intel_context_put(ce);
696					kernel_context_close(ctx);
697					goto out_file;
698				}
699			}
700
701			err = gpu_fill(ce, obj, dw);
702			if (err) {
703				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
704				       ndwords, dw, max_dwords(obj),
705				       engine->name,
706				       yesno(!!rcu_access_pointer(ctx->vm)),
707				       err);
708				intel_context_put(ce);
709				kernel_context_close(ctx);
710				goto out_file;
711			}
712
713			err = throttle(ce, tq, ARRAY_SIZE(tq));
714			if (err) {
715				intel_context_put(ce);
716				kernel_context_close(ctx);
717				goto out_file;
718			}
719
720			if (++dw == max_dwords(obj)) {
721				obj = NULL;
722				dw = 0;
723			}
724
725			ndwords++;
726			ncontexts++;
727
728			intel_context_put(ce);
729			kernel_context_close(ctx);
730		}
731
732		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
733			ncontexts, engine->name, ndwords);
734
735		ncontexts = dw = 0;
736		list_for_each_entry(obj, &objects, st_link) {
737			unsigned int rem =
738				min_t(unsigned int, ndwords - dw, max_dwords(obj));
739
740			err = cpu_check(obj, ncontexts++, rem);
741			if (err)
742				break;
743
744			dw += rem;
745		}
746
747out_file:
748		throttle_release(tq, ARRAY_SIZE(tq));
749		if (igt_live_test_end(&t))
750			err = -EIO;
751
752		fput(file);
753		if (err)
754			return err;
755
756		i915_gem_drain_freed_objects(i915);
757	}
758
759	return 0;
760}
761
762static int igt_shared_ctx_exec(void *arg)
763{
764	struct drm_i915_private *i915 = arg;
765	struct i915_request *tq[5] = {};
766	struct i915_gem_context *parent;
767	struct intel_engine_cs *engine;
768	struct igt_live_test t;
769	struct file *file;
770	int err = 0;
771
772	/*
773	 * Create a few different contexts with the same mm and write
774	 * through each ctx using the GPU making sure those writes end
775	 * up in the expected pages of our obj.
776	 */
777	if (!DRIVER_CAPS(i915)->has_logical_contexts)
778		return 0;
779
780	file = mock_file(i915);
781	if (IS_ERR(file))
782		return PTR_ERR(file);
783
784	parent = live_context(i915, file);
785	if (IS_ERR(parent)) {
786		err = PTR_ERR(parent);
787		goto out_file;
788	}
789
790	if (!parent->vm) { /* not full-ppgtt; nothing to share */
791		err = 0;
792		goto out_file;
793	}
794
795	err = igt_live_test_begin(&t, i915, __func__, "");
796	if (err)
797		goto out_file;
798
799	for_each_uabi_engine(engine, i915) {
800		unsigned long ncontexts, ndwords, dw;
801		struct drm_i915_gem_object *obj = NULL;
802		IGT_TIMEOUT(end_time);
803		LIST_HEAD(objects);
804
805		if (!intel_engine_can_store_dword(engine))
806			continue;
807
808		dw = 0;
809		ndwords = 0;
810		ncontexts = 0;
811		while (!time_after(jiffies, end_time)) {
812			struct i915_gem_context *ctx;
813			struct intel_context *ce;
814
815			ctx = kernel_context(i915);
816			if (IS_ERR(ctx)) {
817				err = PTR_ERR(ctx);
818				goto out_test;
819			}
820
821			mutex_lock(&ctx->mutex);
822			__assign_ppgtt(ctx, ctx_vm(parent));
823			mutex_unlock(&ctx->mutex);
824
825			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
826			GEM_BUG_ON(IS_ERR(ce));
827
828			if (!obj) {
829				obj = create_test_object(ctx_vm(parent),
830							 file, &objects);
831				if (IS_ERR(obj)) {
832					err = PTR_ERR(obj);
833					intel_context_put(ce);
834					kernel_context_close(ctx);
835					goto out_test;
836				}
837			}
838
839			err = gpu_fill(ce, obj, dw);
840			if (err) {
841				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
842				       ndwords, dw, max_dwords(obj),
843				       engine->name,
844				       yesno(!!rcu_access_pointer(ctx->vm)),
845				       err);
846				intel_context_put(ce);
847				kernel_context_close(ctx);
848				goto out_test;
849			}
850
851			err = throttle(ce, tq, ARRAY_SIZE(tq));
852			if (err) {
853				intel_context_put(ce);
854				kernel_context_close(ctx);
855				goto out_test;
856			}
857
858			if (++dw == max_dwords(obj)) {
859				obj = NULL;
860				dw = 0;
861			}
862
863			ndwords++;
864			ncontexts++;
865
866			intel_context_put(ce);
867			kernel_context_close(ctx);
868		}
869		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
870			ncontexts, engine->name, ndwords);
871
872		ncontexts = dw = 0;
873		list_for_each_entry(obj, &objects, st_link) {
874			unsigned int rem =
875				min_t(unsigned int, ndwords - dw, max_dwords(obj));
876
877			err = cpu_check(obj, ncontexts++, rem);
878			if (err)
879				goto out_test;
880
881			dw += rem;
882		}
883
884		i915_gem_drain_freed_objects(i915);
885	}
886out_test:
887	throttle_release(tq, ARRAY_SIZE(tq));
888	if (igt_live_test_end(&t))
889		err = -EIO;
890out_file:
891	fput(file);
892	return err;
893}
894
895static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
896{
897	struct drm_i915_gem_object *obj;
898	u32 *cmd;
899	int err;
900
901	if (INTEL_GEN(vma->vm->i915) < 8)
902		return ERR_PTR(-EINVAL);
903
904	obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE);
905	if (IS_ERR(obj))
906		return ERR_CAST(obj);
907
908	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
909	if (IS_ERR(cmd)) {
910		err = PTR_ERR(cmd);
911		goto err;
912	}
913
914	*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
915	*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
916	*cmd++ = lower_32_bits(vma->node.start);
917	*cmd++ = upper_32_bits(vma->node.start);
918	*cmd = MI_BATCH_BUFFER_END;
919
920	__i915_gem_object_flush_map(obj, 0, 64);
921	i915_gem_object_unpin_map(obj);
922
923	intel_gt_chipset_flush(vma->vm->gt);
924
925	vma = i915_vma_instance(obj, vma->vm, NULL);
926	if (IS_ERR(vma)) {
927		err = PTR_ERR(vma);
928		goto err;
929	}
930
931	err = i915_vma_pin(vma, 0, 0, PIN_USER);
932	if (err)
933		goto err;
934
935	return vma;
936
937err:
938	i915_gem_object_put(obj);
939	return ERR_PTR(err);
940}
941
942static int
943emit_rpcs_query(struct drm_i915_gem_object *obj,
944		struct intel_context *ce,
945		struct i915_request **rq_out)
946{
947	struct i915_request *rq;
948	struct i915_vma *batch;
949	struct i915_vma *vma;
950	int err;
951
952	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
953
954	vma = i915_vma_instance(obj, ce->vm, NULL);
955	if (IS_ERR(vma))
956		return PTR_ERR(vma);
957
958	i915_gem_object_lock(obj);
959	err = i915_gem_object_set_to_gtt_domain(obj, false);
960	i915_gem_object_unlock(obj);
961	if (err)
962		return err;
963
964	err = i915_vma_pin(vma, 0, 0, PIN_USER);
965	if (err)
966		return err;
967
968	batch = rpcs_query_batch(vma);
969	if (IS_ERR(batch)) {
970		err = PTR_ERR(batch);
971		goto err_vma;
972	}
973
974	rq = i915_request_create(ce);
975	if (IS_ERR(rq)) {
976		err = PTR_ERR(rq);
977		goto err_batch;
978	}
979
980	err = rq->engine->emit_bb_start(rq,
981					batch->node.start, batch->node.size,
982					0);
983	if (err)
984		goto err_request;
985
986	i915_vma_lock(batch);
987	err = i915_request_await_object(rq, batch->obj, false);
988	if (err == 0)
989		err = i915_vma_move_to_active(batch, rq, 0);
990	i915_vma_unlock(batch);
991	if (err)
992		goto skip_request;
993
994	i915_vma_lock(vma);
995	err = i915_request_await_object(rq, vma->obj, true);
996	if (err == 0)
997		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
998	i915_vma_unlock(vma);
999	if (err)
1000		goto skip_request;
1001
1002	i915_vma_unpin_and_release(&batch, 0);
1003	i915_vma_unpin(vma);
1004
1005	*rq_out = i915_request_get(rq);
1006
1007	i915_request_add(rq);
1008
1009	return 0;
1010
1011skip_request:
1012	i915_request_skip(rq, err);
1013err_request:
1014	i915_request_add(rq);
1015err_batch:
1016	i915_vma_unpin_and_release(&batch, 0);
1017err_vma:
1018	i915_vma_unpin(vma);
1019
1020	return err;
1021}
1022
1023#define TEST_IDLE	BIT(0)
1024#define TEST_BUSY	BIT(1)
1025#define TEST_RESET	BIT(2)
1026
1027static int
1028__sseu_prepare(const char *name,
1029	       unsigned int flags,
1030	       struct intel_context *ce,
1031	       struct igt_spinner **spin)
1032{
1033	struct i915_request *rq;
1034	int ret;
1035
1036	*spin = NULL;
1037	if (!(flags & (TEST_BUSY | TEST_RESET)))
1038		return 0;
1039
1040	*spin = kzalloc(sizeof(**spin), GFP_KERNEL);
1041	if (!*spin)
1042		return -ENOMEM;
1043
1044	ret = igt_spinner_init(*spin, ce->engine->gt);
1045	if (ret)
1046		goto err_free;
1047
1048	rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
1049	if (IS_ERR(rq)) {
1050		ret = PTR_ERR(rq);
1051		goto err_fini;
1052	}
1053
1054	i915_request_add(rq);
1055
1056	if (!igt_wait_for_spinner(*spin, rq)) {
1057		pr_err("%s: Spinner failed to start!\n", name);
1058		ret = -ETIMEDOUT;
1059		goto err_end;
1060	}
1061
1062	return 0;
1063
1064err_end:
1065	igt_spinner_end(*spin);
1066err_fini:
1067	igt_spinner_fini(*spin);
1068err_free:
1069	kfree(fetch_and_zero(spin));
1070	return ret;
1071}
1072
1073static int
1074__read_slice_count(struct intel_context *ce,
1075		   struct drm_i915_gem_object *obj,
1076		   struct igt_spinner *spin,
1077		   u32 *rpcs)
1078{
1079	struct i915_request *rq = NULL;
1080	u32 s_mask, s_shift;
1081	unsigned int cnt;
1082	u32 *buf, val;
1083	long ret;
1084
1085	ret = emit_rpcs_query(obj, ce, &rq);
1086	if (ret)
1087		return ret;
1088
1089	if (spin)
1090		igt_spinner_end(spin);
1091
1092	ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
1093	i915_request_put(rq);
1094	if (ret < 0)
1095		return ret;
1096
1097	buf = i915_gem_object_pin_map(obj, I915_MAP_WB);
1098	if (IS_ERR(buf)) {
1099		ret = PTR_ERR(buf);
1100		return ret;
1101	}
1102
1103	if (INTEL_GEN(ce->engine->i915) >= 11) {
1104		s_mask = GEN11_RPCS_S_CNT_MASK;
1105		s_shift = GEN11_RPCS_S_CNT_SHIFT;
1106	} else {
1107		s_mask = GEN8_RPCS_S_CNT_MASK;
1108		s_shift = GEN8_RPCS_S_CNT_SHIFT;
1109	}
1110
1111	val = *buf;
1112	cnt = (val & s_mask) >> s_shift;
1113	*rpcs = val;
1114
1115	i915_gem_object_unpin_map(obj);
1116
1117	return cnt;
1118}
1119
1120static int
1121__check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
1122	     const char *prefix, const char *suffix)
1123{
1124	if (slices == expected)
1125		return 0;
1126
1127	if (slices < 0) {
1128		pr_err("%s: %s read slice count failed with %d%s\n",
1129		       name, prefix, slices, suffix);
1130		return slices;
1131	}
1132
1133	pr_err("%s: %s slice count %d is not %u%s\n",
1134	       name, prefix, slices, expected, suffix);
1135
1136	pr_info("RPCS=0x%x; %u%sx%u%s\n",
1137		rpcs, slices,
1138		(rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
1139		(rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
1140		(rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
1141
1142	return -EINVAL;
1143}
1144
1145static int
1146__sseu_finish(const char *name,
1147	      unsigned int flags,
1148	      struct intel_context *ce,
1149	      struct drm_i915_gem_object *obj,
1150	      unsigned int expected,
1151	      struct igt_spinner *spin)
1152{
1153	unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
1154	u32 rpcs = 0;
1155	int ret = 0;
1156
1157	if (flags & TEST_RESET) {
1158		ret = intel_engine_reset(ce->engine, "sseu");
1159		if (ret)
1160			goto out;
1161	}
1162
1163	ret = __read_slice_count(ce, obj,
1164				 flags & TEST_RESET ? NULL : spin, &rpcs);
1165	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
1166	if (ret)
1167		goto out;
1168
1169	ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
1170	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
1171
1172out:
1173	if (spin)
1174		igt_spinner_end(spin);
1175
1176	if ((flags & TEST_IDLE) && ret == 0) {
1177		ret = igt_flush_test(ce->engine->i915);
1178		if (ret)
1179			return ret;
1180
1181		ret = __read_slice_count(ce, obj, NULL, &rpcs);
1182		ret = __check_rpcs(name, rpcs, ret, expected,
1183				   "Context", " after idle!");
1184	}
1185
1186	return ret;
1187}
1188
1189static int
1190__sseu_test(const char *name,
1191	    unsigned int flags,
1192	    struct intel_context *ce,
1193	    struct drm_i915_gem_object *obj,
1194	    struct intel_sseu sseu)
1195{
1196	struct igt_spinner *spin = NULL;
1197	int ret;
1198
1199	intel_engine_pm_get(ce->engine);
1200
1201	ret = __sseu_prepare(name, flags, ce, &spin);
1202	if (ret)
1203		goto out_pm;
1204
1205	ret = intel_context_reconfigure_sseu(ce, sseu);
1206	if (ret)
1207		goto out_spin;
1208
1209	ret = __sseu_finish(name, flags, ce, obj,
1210			    hweight32(sseu.slice_mask), spin);
1211
1212out_spin:
1213	if (spin) {
1214		igt_spinner_end(spin);
1215		igt_spinner_fini(spin);
1216		kfree(spin);
1217	}
1218out_pm:
1219	intel_engine_pm_put(ce->engine);
1220	return ret;
1221}
1222
1223static int
1224__igt_ctx_sseu(struct drm_i915_private *i915,
1225	       const char *name,
1226	       unsigned int flags)
1227{
1228	struct drm_i915_gem_object *obj;
1229	int inst = 0;
1230	int ret = 0;
1231
1232	if (INTEL_GEN(i915) < 9 || !RUNTIME_INFO(i915)->sseu.has_slice_pg)
1233		return 0;
1234
1235	if (flags & TEST_RESET)
1236		igt_global_reset_lock(&i915->gt);
1237
1238	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1239	if (IS_ERR(obj)) {
1240		ret = PTR_ERR(obj);
1241		goto out_unlock;
1242	}
1243
1244	do {
1245		struct intel_engine_cs *engine;
1246		struct intel_context *ce;
1247		struct intel_sseu pg_sseu;
1248
1249		engine = intel_engine_lookup_user(i915,
1250						  I915_ENGINE_CLASS_RENDER,
1251						  inst++);
1252		if (!engine)
1253			break;
1254
1255		if (hweight32(engine->sseu.slice_mask) < 2)
1256			continue;
1257
1258		/*
1259		 * Gen11 VME friendly power-gated configuration with
1260		 * half enabled sub-slices.
1261		 */
1262		pg_sseu = engine->sseu;
1263		pg_sseu.slice_mask = 1;
1264		pg_sseu.subslice_mask =
1265			~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
1266
1267		pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
1268			engine->name, name, flags,
1269			hweight32(engine->sseu.slice_mask),
1270			hweight32(pg_sseu.slice_mask));
1271
1272		ce = intel_context_create(engine);
1273		if (IS_ERR(ce)) {
1274			ret = PTR_ERR(ce);
1275			goto out_put;
1276		}
1277
1278		ret = intel_context_pin(ce);
1279		if (ret)
1280			goto out_ce;
1281
1282		/* First set the default mask. */
1283		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1284		if (ret)
1285			goto out_unpin;
1286
1287		/* Then set a power-gated configuration. */
1288		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1289		if (ret)
1290			goto out_unpin;
1291
1292		/* Back to defaults. */
1293		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1294		if (ret)
1295			goto out_unpin;
1296
1297		/* One last power-gated configuration for the road. */
1298		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1299		if (ret)
1300			goto out_unpin;
1301
1302out_unpin:
1303		intel_context_unpin(ce);
1304out_ce:
1305		intel_context_put(ce);
1306	} while (!ret);
1307
1308	if (igt_flush_test(i915))
1309		ret = -EIO;
1310
1311out_put:
1312	i915_gem_object_put(obj);
1313
1314out_unlock:
1315	if (flags & TEST_RESET)
1316		igt_global_reset_unlock(&i915->gt);
1317
1318	if (ret)
1319		pr_err("%s: Failed with %d!\n", name, ret);
1320
1321	return ret;
1322}
1323
1324static int igt_ctx_sseu(void *arg)
1325{
1326	struct {
1327		const char *name;
1328		unsigned int flags;
1329	} *phase, phases[] = {
1330		{ .name = "basic", .flags = 0 },
1331		{ .name = "idle", .flags = TEST_IDLE },
1332		{ .name = "busy", .flags = TEST_BUSY },
1333		{ .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
1334		{ .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
1335		{ .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
1336	};
1337	unsigned int i;
1338	int ret = 0;
1339
1340	for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
1341	     i++, phase++)
1342		ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
1343
1344	return ret;
1345}
1346
1347static int igt_ctx_readonly(void *arg)
1348{
1349	struct drm_i915_private *i915 = arg;
1350	unsigned long idx, ndwords, dw, num_engines;
1351	struct drm_i915_gem_object *obj = NULL;
1352	struct i915_request *tq[5] = {};
1353	struct i915_gem_engines_iter it;
1354	struct i915_address_space *vm;
1355	struct i915_gem_context *ctx;
1356	struct intel_context *ce;
1357	struct igt_live_test t;
1358	I915_RND_STATE(prng);
1359	IGT_TIMEOUT(end_time);
1360	LIST_HEAD(objects);
1361	struct file *file;
1362	int err = -ENODEV;
1363
1364	/*
1365	 * Create a few read-only objects (with the occasional writable object)
1366	 * and try to write into these object checking that the GPU discards
1367	 * any write to a read-only object.
1368	 */
1369
1370	file = mock_file(i915);
1371	if (IS_ERR(file))
1372		return PTR_ERR(file);
1373
1374	err = igt_live_test_begin(&t, i915, __func__, "");
1375	if (err)
1376		goto out_file;
1377
1378	ctx = live_context(i915, file);
1379	if (IS_ERR(ctx)) {
1380		err = PTR_ERR(ctx);
1381		goto out_file;
1382	}
1383
1384	vm = ctx_vm(ctx) ?: &i915->ggtt.alias->vm;
1385	if (!vm || !vm->has_read_only) {
1386		err = 0;
1387		goto out_file;
1388	}
1389
1390	num_engines = 0;
1391	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
1392		if (intel_engine_can_store_dword(ce->engine))
1393			num_engines++;
1394	i915_gem_context_unlock_engines(ctx);
1395
1396	ndwords = 0;
1397	dw = 0;
1398	while (!time_after(jiffies, end_time)) {
1399		for_each_gem_engine(ce,
1400				    i915_gem_context_lock_engines(ctx), it) {
1401			if (!intel_engine_can_store_dword(ce->engine))
1402				continue;
1403
1404			if (!obj) {
1405				obj = create_test_object(ce->vm, file, &objects);
1406				if (IS_ERR(obj)) {
1407					err = PTR_ERR(obj);
1408					i915_gem_context_unlock_engines(ctx);
1409					goto out_file;
1410				}
1411
1412				if (prandom_u32_state(&prng) & 1)
1413					i915_gem_object_set_readonly(obj);
1414			}
1415
1416			err = gpu_fill(ce, obj, dw);
1417			if (err) {
1418				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
1419				       ndwords, dw, max_dwords(obj),
1420				       ce->engine->name,
1421				       yesno(!!ctx_vm(ctx)),
1422				       err);
1423				i915_gem_context_unlock_engines(ctx);
1424				goto out_file;
1425			}
1426
1427			err = throttle(ce, tq, ARRAY_SIZE(tq));
1428			if (err) {
1429				i915_gem_context_unlock_engines(ctx);
1430				goto out_file;
1431			}
1432
1433			if (++dw == max_dwords(obj)) {
1434				obj = NULL;
1435				dw = 0;
1436			}
1437			ndwords++;
1438		}
1439		i915_gem_context_unlock_engines(ctx);
1440	}
1441	pr_info("Submitted %lu dwords (across %lu engines)\n",
1442		ndwords, num_engines);
1443
1444	dw = 0;
1445	idx = 0;
1446	list_for_each_entry(obj, &objects, st_link) {
1447		unsigned int rem =
1448			min_t(unsigned int, ndwords - dw, max_dwords(obj));
1449		unsigned int num_writes;
1450
1451		num_writes = rem;
1452		if (i915_gem_object_is_readonly(obj))
1453			num_writes = 0;
1454
1455		err = cpu_check(obj, idx++, num_writes);
1456		if (err)
1457			break;
1458
1459		dw += rem;
1460	}
1461
1462out_file:
1463	throttle_release(tq, ARRAY_SIZE(tq));
1464	if (igt_live_test_end(&t))
1465		err = -EIO;
1466
1467	fput(file);
1468	return err;
1469}
1470
1471static int check_scratch(struct i915_address_space *vm, u64 offset)
1472{
1473	struct drm_mm_node *node =
1474		__drm_mm_interval_first(&vm->mm,
1475					offset, offset + sizeof(u32) - 1);
1476	if (!node || node->start > offset)
1477		return 0;
1478
1479	GEM_BUG_ON(offset >= node->start + node->size);
1480
1481	pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
1482	       upper_32_bits(offset), lower_32_bits(offset));
1483	return -EINVAL;
1484}
1485
1486static int write_to_scratch(struct i915_gem_context *ctx,
1487			    struct intel_engine_cs *engine,
1488			    u64 offset, u32 value)
1489{
1490	struct drm_i915_private *i915 = ctx->i915;
1491	struct drm_i915_gem_object *obj;
1492	struct i915_address_space *vm;
1493	struct i915_request *rq;
1494	struct i915_vma *vma;
1495	u32 *cmd;
1496	int err;
1497
1498	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1499
1500	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1501	if (IS_ERR(obj))
1502		return PTR_ERR(obj);
1503
1504	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
1505	if (IS_ERR(cmd)) {
1506		err = PTR_ERR(cmd);
1507		goto out;
1508	}
1509
1510	*cmd++ = MI_STORE_DWORD_IMM_GEN4;
1511	if (INTEL_GEN(i915) >= 8) {
1512		*cmd++ = lower_32_bits(offset);
1513		*cmd++ = upper_32_bits(offset);
1514	} else {
1515		*cmd++ = 0;
1516		*cmd++ = offset;
1517	}
1518	*cmd++ = value;
1519	*cmd = MI_BATCH_BUFFER_END;
1520	__i915_gem_object_flush_map(obj, 0, 64);
1521	i915_gem_object_unpin_map(obj);
1522
1523	intel_gt_chipset_flush(engine->gt);
1524
1525	vm = i915_gem_context_get_vm_rcu(ctx);
1526	vma = i915_vma_instance(obj, vm, NULL);
1527	if (IS_ERR(vma)) {
1528		err = PTR_ERR(vma);
1529		goto out_vm;
1530	}
1531
1532	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1533	if (err)
1534		goto out_vm;
1535
1536	err = check_scratch(vm, offset);
1537	if (err)
1538		goto err_unpin;
1539
1540	rq = igt_request_alloc(ctx, engine);
1541	if (IS_ERR(rq)) {
1542		err = PTR_ERR(rq);
1543		goto err_unpin;
1544	}
1545
1546	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
1547	if (err)
1548		goto err_request;
1549
1550	i915_vma_lock(vma);
1551	err = i915_request_await_object(rq, vma->obj, false);
1552	if (err == 0)
1553		err = i915_vma_move_to_active(vma, rq, 0);
1554	i915_vma_unlock(vma);
1555	if (err)
1556		goto skip_request;
1557
1558	i915_vma_unpin(vma);
1559
1560	i915_request_add(rq);
1561
1562	goto out_vm;
1563skip_request:
1564	i915_request_skip(rq, err);
1565err_request:
1566	i915_request_add(rq);
1567err_unpin:
1568	i915_vma_unpin(vma);
1569out_vm:
1570	i915_vm_put(vm);
1571out:
1572	i915_gem_object_put(obj);
1573	return err;
1574}
1575
1576static int read_from_scratch(struct i915_gem_context *ctx,
1577			     struct intel_engine_cs *engine,
1578			     u64 offset, u32 *value)
1579{
1580	struct drm_i915_private *i915 = ctx->i915;
1581	struct drm_i915_gem_object *obj;
1582	struct i915_address_space *vm;
1583	const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */
1584	const u32 result = 0x100;
1585	struct i915_request *rq;
1586	struct i915_vma *vma;
1587	u32 *cmd;
1588	int err;
1589
1590	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1591
1592	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1593	if (IS_ERR(obj))
1594		return PTR_ERR(obj);
1595
1596	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
1597	if (IS_ERR(cmd)) {
1598		err = PTR_ERR(cmd);
1599		goto out;
1600	}
1601
1602	memset(cmd, POISON_INUSE, PAGE_SIZE);
1603	if (INTEL_GEN(i915) >= 8) {
1604		*cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
1605		*cmd++ = RCS_GPR0;
1606		*cmd++ = lower_32_bits(offset);
1607		*cmd++ = upper_32_bits(offset);
1608		*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
1609		*cmd++ = RCS_GPR0;
1610		*cmd++ = result;
1611		*cmd++ = 0;
1612	} else {
1613		*cmd++ = MI_LOAD_REGISTER_MEM;
1614		*cmd++ = RCS_GPR0;
1615		*cmd++ = offset;
1616		*cmd++ = MI_STORE_REGISTER_MEM;
1617		*cmd++ = RCS_GPR0;
1618		*cmd++ = result;
1619	}
1620	*cmd = MI_BATCH_BUFFER_END;
1621
1622	i915_gem_object_flush_map(obj);
1623	i915_gem_object_unpin_map(obj);
1624
1625	intel_gt_chipset_flush(engine->gt);
1626
1627	vm = i915_gem_context_get_vm_rcu(ctx);
1628	vma = i915_vma_instance(obj, vm, NULL);
1629	if (IS_ERR(vma)) {
1630		err = PTR_ERR(vma);
1631		goto out_vm;
1632	}
1633
1634	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1635	if (err)
1636		goto out_vm;
1637
1638	err = check_scratch(vm, offset);
1639	if (err)
1640		goto err_unpin;
1641
1642	rq = igt_request_alloc(ctx, engine);
1643	if (IS_ERR(rq)) {
1644		err = PTR_ERR(rq);
1645		goto err_unpin;
1646	}
1647
1648	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
1649	if (err)
1650		goto err_request;
1651
1652	i915_vma_lock(vma);
1653	err = i915_request_await_object(rq, vma->obj, true);
1654	if (err == 0)
1655		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1656	i915_vma_unlock(vma);
1657	if (err)
1658		goto skip_request;
1659
1660	i915_vma_unpin(vma);
1661	i915_vma_close(vma);
1662
1663	i915_request_add(rq);
1664
1665	i915_gem_object_lock(obj);
1666	err = i915_gem_object_set_to_cpu_domain(obj, false);
1667	i915_gem_object_unlock(obj);
1668	if (err)
1669		goto out_vm;
1670
1671	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
1672	if (IS_ERR(cmd)) {
1673		err = PTR_ERR(cmd);
1674		goto out_vm;
1675	}
1676
1677	*value = cmd[result / sizeof(*cmd)];
1678	i915_gem_object_unpin_map(obj);
1679
1680	goto out_vm;
1681skip_request:
1682	i915_request_skip(rq, err);
1683err_request:
1684	i915_request_add(rq);
1685err_unpin:
1686	i915_vma_unpin(vma);
1687out_vm:
1688	i915_vm_put(vm);
1689out:
1690	i915_gem_object_put(obj);
1691	return err;
1692}
1693
1694static int igt_vm_isolation(void *arg)
1695{
1696	struct drm_i915_private *i915 = arg;
1697	struct i915_gem_context *ctx_a, *ctx_b;
1698	unsigned long num_engines, count;
1699	struct intel_engine_cs *engine;
1700	struct igt_live_test t;
1701	I915_RND_STATE(prng);
1702	struct file *file;
1703	u64 vm_total;
1704	int err;
1705
1706	if (INTEL_GEN(i915) < 7)
1707		return 0;
1708
1709	/*
1710	 * The simple goal here is that a write into one context is not
1711	 * observed in a second (separate page tables and scratch).
1712	 */
1713
1714	file = mock_file(i915);
1715	if (IS_ERR(file))
1716		return PTR_ERR(file);
1717
1718	err = igt_live_test_begin(&t, i915, __func__, "");
1719	if (err)
1720		goto out_file;
1721
1722	ctx_a = live_context(i915, file);
1723	if (IS_ERR(ctx_a)) {
1724		err = PTR_ERR(ctx_a);
1725		goto out_file;
1726	}
1727
1728	ctx_b = live_context(i915, file);
1729	if (IS_ERR(ctx_b)) {
1730		err = PTR_ERR(ctx_b);
1731		goto out_file;
1732	}
1733
1734	/* We can only test vm isolation, if the vm are distinct */
1735	if (ctx_vm(ctx_a) == ctx_vm(ctx_b))
1736		goto out_file;
1737
1738	vm_total = ctx_vm(ctx_a)->total;
1739	GEM_BUG_ON(ctx_vm(ctx_b)->total != vm_total);
1740	vm_total -= I915_GTT_PAGE_SIZE;
1741
1742	count = 0;
1743	num_engines = 0;
1744	for_each_uabi_engine(engine, i915) {
1745		IGT_TIMEOUT(end_time);
1746		unsigned long this = 0;
1747
1748		if (!intel_engine_can_store_dword(engine))
1749			continue;
1750
1751		while (!__igt_timeout(end_time, NULL)) {
1752			u32 value = 0xc5c5c5c5;
1753			u64 offset;
1754
1755			div64_u64_rem(i915_prandom_u64_state(&prng),
1756				      vm_total, &offset);
1757			offset = round_down(offset, alignof_dword);
1758			offset += I915_GTT_PAGE_SIZE;
1759
1760			err = write_to_scratch(ctx_a, engine,
1761					       offset, 0xdeadbeef);
1762			if (err == 0)
1763				err = read_from_scratch(ctx_b, engine,
1764							offset, &value);
1765			if (err)
1766				goto out_file;
1767
1768			if (value) {
1769				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
1770				       engine->name, value,
1771				       upper_32_bits(offset),
1772				       lower_32_bits(offset),
1773				       this);
1774				err = -EINVAL;
1775				goto out_file;
1776			}
1777
1778			this++;
1779		}
1780		count += this;
1781		num_engines++;
1782	}
1783	pr_info("Checked %lu scratch offsets across %lu engines\n",
1784		count, num_engines);
1785
1786out_file:
1787	if (igt_live_test_end(&t))
1788		err = -EIO;
1789	fput(file);
1790	return err;
1791}
1792
1793static bool skip_unused_engines(struct intel_context *ce, void *data)
1794{
1795	return !ce->state;
1796}
1797
1798static void mock_barrier_task(void *data)
1799{
1800	unsigned int *counter = data;
1801
1802	++*counter;
1803}
1804
1805static int mock_context_barrier(void *arg)
1806{
1807#undef pr_fmt
1808#define pr_fmt(x) "context_barrier_task():" # x
1809	struct drm_i915_private *i915 = arg;
1810	struct i915_gem_context *ctx;
1811	struct i915_request *rq;
1812	unsigned int counter;
1813	int err;
1814
1815	/*
1816	 * The context barrier provides us with a callback after it emits
1817	 * a request; useful for retiring old state after loading new.
1818	 */
1819
1820	ctx = mock_context(i915, "mock");
1821	if (!ctx)
1822		return -ENOMEM;
1823
1824	counter = 0;
1825	err = context_barrier_task(ctx, 0,
1826				   NULL, NULL, mock_barrier_task, &counter);
1827	if (err) {
1828		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
1829		goto out;
1830	}
1831	if (counter == 0) {
1832		pr_err("Did not retire immediately with 0 engines\n");
1833		err = -EINVAL;
1834		goto out;
1835	}
1836
1837	counter = 0;
1838	err = context_barrier_task(ctx, ALL_ENGINES,
1839				   skip_unused_engines,
1840				   NULL,
1841				   mock_barrier_task,
1842				   &counter);
1843	if (err) {
1844		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
1845		goto out;
1846	}
1847	if (counter == 0) {
1848		pr_err("Did not retire immediately for all unused engines\n");
1849		err = -EINVAL;
1850		goto out;
1851	}
1852
1853	rq = igt_request_alloc(ctx, i915->engine[RCS0]);
1854	if (IS_ERR(rq)) {
1855		pr_err("Request allocation failed!\n");
1856		goto out;
1857	}
1858	i915_request_add(rq);
1859
1860	counter = 0;
1861	context_barrier_inject_fault = BIT(RCS0);
1862	err = context_barrier_task(ctx, ALL_ENGINES,
1863				   NULL, NULL, mock_barrier_task, &counter);
1864	context_barrier_inject_fault = 0;
1865	if (err == -ENXIO)
1866		err = 0;
1867	else
1868		pr_err("Did not hit fault injection!\n");
1869	if (counter != 0) {
1870		pr_err("Invoked callback on error!\n");
1871		err = -EIO;
1872	}
1873	if (err)
1874		goto out;
1875
1876	counter = 0;
1877	err = context_barrier_task(ctx, ALL_ENGINES,
1878				   skip_unused_engines,
1879				   NULL,
1880				   mock_barrier_task,
1881				   &counter);
1882	if (err) {
1883		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
1884		goto out;
1885	}
1886	mock_device_flush(i915);
1887	if (counter == 0) {
1888		pr_err("Did not retire on each active engines\n");
1889		err = -EINVAL;
1890		goto out;
1891	}
1892
1893out:
1894	mock_context_close(ctx);
1895	return err;
1896#undef pr_fmt
1897#define pr_fmt(x) x
1898}
1899
1900int i915_gem_context_mock_selftests(void)
1901{
1902	static const struct i915_subtest tests[] = {
1903		SUBTEST(mock_context_barrier),
1904	};
1905	struct drm_i915_private *i915;
1906	int err;
1907
1908	i915 = mock_gem_device();
1909	if (!i915)
1910		return -ENOMEM;
1911
1912	err = i915_subtests(tests, i915);
1913
1914	drm_dev_put(&i915->drm);
1915	return err;
1916}
1917
1918int i915_gem_context_live_selftests(struct drm_i915_private *i915)
1919{
1920	static const struct i915_subtest tests[] = {
1921		SUBTEST(live_nop_switch),
1922		SUBTEST(live_parallel_switch),
1923		SUBTEST(igt_ctx_exec),
1924		SUBTEST(igt_ctx_readonly),
1925		SUBTEST(igt_ctx_sseu),
1926		SUBTEST(igt_shared_ctx_exec),
1927		SUBTEST(igt_vm_isolation),
1928	};
1929
1930	if (intel_gt_is_wedged(&i915->gt))
1931		return 0;
1932
1933	return i915_live_subtests(tests, i915);
1934}
1935