1// SPDX-License-Identifier: MIT
2/*
3 * Copyright �� 2020 Intel Corporation
4 */
5
6#include <linux/sort.h>
7
8#include "gem/i915_gem_internal.h"
9#include "gem/i915_gem_lmem.h"
10
11#include "selftests/igt_spinner.h"
12#include "selftests/i915_random.h"
13
14static const unsigned int sizes[] = {
15	SZ_4K,
16	SZ_64K,
17	SZ_2M,
18	CHUNK_SZ - SZ_4K,
19	CHUNK_SZ,
20	CHUNK_SZ + SZ_4K,
21	SZ_64M,
22};
23
24static struct drm_i915_gem_object *
25create_lmem_or_internal(struct drm_i915_private *i915, size_t size)
26{
27	struct drm_i915_gem_object *obj;
28
29	obj = i915_gem_object_create_lmem(i915, size, 0);
30	if (!IS_ERR(obj))
31		return obj;
32
33	return i915_gem_object_create_internal(i915, size);
34}
35
36static int copy(struct intel_migrate *migrate,
37		int (*fn)(struct intel_migrate *migrate,
38			  struct i915_gem_ww_ctx *ww,
39			  struct drm_i915_gem_object *src,
40			  struct drm_i915_gem_object *dst,
41			  struct i915_request **out),
42		u32 sz, struct rnd_state *prng)
43{
44	struct drm_i915_private *i915 = migrate->context->engine->i915;
45	struct drm_i915_gem_object *src, *dst;
46	struct i915_request *rq;
47	struct i915_gem_ww_ctx ww;
48	u32 *vaddr;
49	int err = 0;
50	int i;
51
52	src = create_lmem_or_internal(i915, sz);
53	if (IS_ERR(src))
54		return 0;
55
56	sz = src->base.size;
57	dst = i915_gem_object_create_internal(i915, sz);
58	if (IS_ERR(dst))
59		goto err_free_src;
60
61	for_i915_gem_ww(&ww, err, true) {
62		err = i915_gem_object_lock(src, &ww);
63		if (err)
64			continue;
65
66		err = i915_gem_object_lock(dst, &ww);
67		if (err)
68			continue;
69
70		vaddr = i915_gem_object_pin_map(src, I915_MAP_WC);
71		if (IS_ERR(vaddr)) {
72			err = PTR_ERR(vaddr);
73			continue;
74		}
75
76		for (i = 0; i < sz / sizeof(u32); i++)
77			vaddr[i] = i;
78		i915_gem_object_flush_map(src);
79
80		vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC);
81		if (IS_ERR(vaddr)) {
82			err = PTR_ERR(vaddr);
83			goto unpin_src;
84		}
85
86		for (i = 0; i < sz / sizeof(u32); i++)
87			vaddr[i] = ~i;
88		i915_gem_object_flush_map(dst);
89
90		err = fn(migrate, &ww, src, dst, &rq);
91		if (!err)
92			continue;
93
94		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
95			pr_err("%ps failed, size: %u\n", fn, sz);
96		if (rq) {
97			i915_request_wait(rq, 0, HZ);
98			i915_request_put(rq);
99		}
100		i915_gem_object_unpin_map(dst);
101unpin_src:
102		i915_gem_object_unpin_map(src);
103	}
104	if (err)
105		goto err_out;
106
107	if (rq) {
108		if (i915_request_wait(rq, 0, HZ) < 0) {
109			pr_err("%ps timed out, size: %u\n", fn, sz);
110			err = -ETIME;
111		}
112		i915_request_put(rq);
113	}
114
115	for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
116		int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
117
118		if (vaddr[x] != x) {
119			pr_err("%ps failed, size: %u, offset: %zu\n",
120			       fn, sz, x * sizeof(u32));
121			igt_hexdump(vaddr + i * 1024, 4096);
122			err = -EINVAL;
123		}
124	}
125
126	i915_gem_object_unpin_map(dst);
127	i915_gem_object_unpin_map(src);
128
129err_out:
130	i915_gem_object_put(dst);
131err_free_src:
132	i915_gem_object_put(src);
133
134	return err;
135}
136
137static int intel_context_copy_ccs(struct intel_context *ce,
138				  const struct i915_deps *deps,
139				  struct scatterlist *sg,
140				  unsigned int pat_index,
141				  bool write_to_ccs,
142				  struct i915_request **out)
143{
144	u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS;
145	u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS;
146	struct sgt_dma it = sg_sgt(sg);
147	struct i915_request *rq;
148	u32 offset;
149	int err;
150
151	GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
152	*out = NULL;
153
154	GEM_BUG_ON(ce->ring->size < SZ_64K);
155
156	offset = 0;
157	if (HAS_64K_PAGES(ce->engine->i915))
158		offset = CHUNK_SZ;
159
160	do {
161		int len;
162
163		rq = i915_request_create(ce);
164		if (IS_ERR(rq)) {
165			err = PTR_ERR(rq);
166			goto out_ce;
167		}
168
169		if (deps) {
170			err = i915_request_await_deps(rq, deps);
171			if (err)
172				goto out_rq;
173
174			if (rq->engine->emit_init_breadcrumb) {
175				err = rq->engine->emit_init_breadcrumb(rq);
176				if (err)
177					goto out_rq;
178			}
179
180			deps = NULL;
181		}
182
183		/* The PTE updates + clear must not be interrupted. */
184		err = emit_no_arbitration(rq);
185		if (err)
186			goto out_rq;
187
188		len = emit_pte(rq, &it, pat_index, true, offset, CHUNK_SZ);
189		if (len <= 0) {
190			err = len;
191			goto out_rq;
192		}
193
194		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
195		if (err)
196			goto out_rq;
197
198		err = emit_copy_ccs(rq, offset, dst_access,
199				    offset, src_access, len);
200		if (err)
201			goto out_rq;
202
203		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
204
205		/* Arbitration is re-enabled between requests. */
206out_rq:
207		if (*out)
208			i915_request_put(*out);
209		*out = i915_request_get(rq);
210		i915_request_add(rq);
211		if (err || !it.sg || !sg_dma_len(it.sg))
212			break;
213
214		cond_resched();
215	} while (1);
216
217out_ce:
218	return err;
219}
220
221static int
222intel_migrate_ccs_copy(struct intel_migrate *m,
223		       struct i915_gem_ww_ctx *ww,
224		       const struct i915_deps *deps,
225		       struct scatterlist *sg,
226		       unsigned int pat_index,
227		       bool write_to_ccs,
228		       struct i915_request **out)
229{
230	struct intel_context *ce;
231	int err;
232
233	*out = NULL;
234	if (!m->context)
235		return -ENODEV;
236
237	ce = intel_migrate_create_context(m);
238	if (IS_ERR(ce))
239		ce = intel_context_get(m->context);
240	GEM_BUG_ON(IS_ERR(ce));
241
242	err = intel_context_pin_ww(ce, ww);
243	if (err)
244		goto out;
245
246	err = intel_context_copy_ccs(ce, deps, sg, pat_index,
247				     write_to_ccs, out);
248
249	intel_context_unpin(ce);
250out:
251	intel_context_put(ce);
252	return err;
253}
254
255static int clear(struct intel_migrate *migrate,
256		 int (*fn)(struct intel_migrate *migrate,
257			   struct i915_gem_ww_ctx *ww,
258			   struct drm_i915_gem_object *obj,
259			   u32 value,
260			   struct i915_request **out),
261		 u32 sz, struct rnd_state *prng)
262{
263	struct drm_i915_private *i915 = migrate->context->engine->i915;
264	struct drm_i915_gem_object *obj;
265	struct i915_request *rq;
266	struct i915_gem_ww_ctx ww;
267	u32 *vaddr, val = 0;
268	bool ccs_cap = false;
269	int err = 0;
270	int i;
271
272	obj = create_lmem_or_internal(i915, sz);
273	if (IS_ERR(obj))
274		return 0;
275
276	/* Consider the rounded up memory too */
277	sz = obj->base.size;
278
279	if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
280		ccs_cap = true;
281
282	for_i915_gem_ww(&ww, err, true) {
283		int ccs_bytes, ccs_bytes_per_chunk;
284
285		err = i915_gem_object_lock(obj, &ww);
286		if (err)
287			continue;
288
289		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
290		if (IS_ERR(vaddr)) {
291			err = PTR_ERR(vaddr);
292			continue;
293		}
294
295		for (i = 0; i < sz / sizeof(u32); i++)
296			vaddr[i] = ~i;
297		i915_gem_object_flush_map(obj);
298
299		if (ccs_cap && !val) {
300			/* Write the obj data into ccs surface */
301			err = intel_migrate_ccs_copy(migrate, &ww, NULL,
302						     obj->mm.pages->sgl,
303						     obj->pat_index,
304						     true, &rq);
305			if (rq && !err) {
306				if (i915_request_wait(rq, 0, HZ) < 0) {
307					pr_err("%ps timed out, size: %u\n",
308					       fn, sz);
309					err = -ETIME;
310				}
311				i915_request_put(rq);
312				rq = NULL;
313			}
314			if (err)
315				continue;
316		}
317
318		err = fn(migrate, &ww, obj, val, &rq);
319		if (rq && !err) {
320			if (i915_request_wait(rq, 0, HZ) < 0) {
321				pr_err("%ps timed out, size: %u\n", fn, sz);
322				err = -ETIME;
323			}
324			i915_request_put(rq);
325			rq = NULL;
326		}
327		if (err)
328			continue;
329
330		i915_gem_object_flush_map(obj);
331
332		/* Verify the set/clear of the obj mem */
333		for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
334			int x = i * 1024 +
335				i915_prandom_u32_max_state(1024, prng);
336
337			if (vaddr[x] != val) {
338				pr_err("%ps failed, (%u != %u), offset: %zu\n",
339				       fn, vaddr[x], val,  x * sizeof(u32));
340				igt_hexdump(vaddr + i * 1024, 4096);
341				err = -EINVAL;
342			}
343		}
344		if (err)
345			continue;
346
347		if (ccs_cap && !val) {
348			for (i = 0; i < sz / sizeof(u32); i++)
349				vaddr[i] = ~i;
350			i915_gem_object_flush_map(obj);
351
352			err = intel_migrate_ccs_copy(migrate, &ww, NULL,
353						     obj->mm.pages->sgl,
354						     obj->pat_index,
355						     false, &rq);
356			if (rq && !err) {
357				if (i915_request_wait(rq, 0, HZ) < 0) {
358					pr_err("%ps timed out, size: %u\n",
359					       fn, sz);
360					err = -ETIME;
361				}
362				i915_request_put(rq);
363				rq = NULL;
364			}
365			if (err)
366				continue;
367
368			ccs_bytes = GET_CCS_BYTES(i915, sz);
369			ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ);
370			i915_gem_object_flush_map(obj);
371
372			for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) {
373				int offset = ((i * PAGE_SIZE)  /
374					ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32);
375				int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32);
376				int x = i915_prandom_u32_max_state(min_t(int, 1024,
377									 ccs_bytes_left), prng);
378
379				if (vaddr[offset + x]) {
380					pr_err("%ps ccs clearing failed, offset: %ld/%d\n",
381					       fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes);
382					igt_hexdump(vaddr + offset,
383						    min_t(int, 4096,
384							  ccs_bytes_left * sizeof(u32)));
385					err = -EINVAL;
386				}
387			}
388
389			if (err)
390				continue;
391		}
392		i915_gem_object_unpin_map(obj);
393	}
394
395	if (err) {
396		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
397			pr_err("%ps failed, size: %u\n", fn, sz);
398		if (rq && err != -EINVAL) {
399			i915_request_wait(rq, 0, HZ);
400			i915_request_put(rq);
401		}
402
403		i915_gem_object_unpin_map(obj);
404	}
405
406	i915_gem_object_put(obj);
407	return err;
408}
409
410static int __migrate_copy(struct intel_migrate *migrate,
411			  struct i915_gem_ww_ctx *ww,
412			  struct drm_i915_gem_object *src,
413			  struct drm_i915_gem_object *dst,
414			  struct i915_request **out)
415{
416	return intel_migrate_copy(migrate, ww, NULL,
417				  src->mm.pages->sgl, src->pat_index,
418				  i915_gem_object_is_lmem(src),
419				  dst->mm.pages->sgl, dst->pat_index,
420				  i915_gem_object_is_lmem(dst),
421				  out);
422}
423
424static int __global_copy(struct intel_migrate *migrate,
425			 struct i915_gem_ww_ctx *ww,
426			 struct drm_i915_gem_object *src,
427			 struct drm_i915_gem_object *dst,
428			 struct i915_request **out)
429{
430	return intel_context_migrate_copy(migrate->context, NULL,
431					  src->mm.pages->sgl, src->pat_index,
432					  i915_gem_object_is_lmem(src),
433					  dst->mm.pages->sgl, dst->pat_index,
434					  i915_gem_object_is_lmem(dst),
435					  out);
436}
437
438static int
439migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
440{
441	return copy(migrate, __migrate_copy, sz, prng);
442}
443
444static int
445global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
446{
447	return copy(migrate, __global_copy, sz, prng);
448}
449
450static int __migrate_clear(struct intel_migrate *migrate,
451			   struct i915_gem_ww_ctx *ww,
452			   struct drm_i915_gem_object *obj,
453			   u32 value,
454			   struct i915_request **out)
455{
456	return intel_migrate_clear(migrate, ww, NULL,
457				   obj->mm.pages->sgl,
458				   obj->pat_index,
459				   i915_gem_object_is_lmem(obj),
460				   value, out);
461}
462
463static int __global_clear(struct intel_migrate *migrate,
464			  struct i915_gem_ww_ctx *ww,
465			  struct drm_i915_gem_object *obj,
466			  u32 value,
467			  struct i915_request **out)
468{
469	return intel_context_migrate_clear(migrate->context, NULL,
470					   obj->mm.pages->sgl,
471					   obj->pat_index,
472					   i915_gem_object_is_lmem(obj),
473					   value, out);
474}
475
476static int
477migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
478{
479	return clear(migrate, __migrate_clear, sz, prng);
480}
481
482static int
483global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
484{
485	return clear(migrate, __global_clear, sz, prng);
486}
487
488static int live_migrate_copy(void *arg)
489{
490	struct intel_gt *gt = arg;
491	struct intel_migrate *migrate = &gt->migrate;
492	struct drm_i915_private *i915 = migrate->context->engine->i915;
493	I915_RND_STATE(prng);
494	int i;
495
496	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
497		int err;
498
499		err = migrate_copy(migrate, sizes[i], &prng);
500		if (err == 0)
501			err = global_copy(migrate, sizes[i], &prng);
502		i915_gem_drain_freed_objects(i915);
503		if (err)
504			return err;
505	}
506
507	return 0;
508}
509
510static int live_migrate_clear(void *arg)
511{
512	struct intel_gt *gt = arg;
513	struct intel_migrate *migrate = &gt->migrate;
514	struct drm_i915_private *i915 = migrate->context->engine->i915;
515	I915_RND_STATE(prng);
516	int i;
517
518	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
519		int err;
520
521		err = migrate_clear(migrate, sizes[i], &prng);
522		if (err == 0)
523			err = global_clear(migrate, sizes[i], &prng);
524
525		i915_gem_drain_freed_objects(i915);
526		if (err)
527			return err;
528	}
529
530	return 0;
531}
532
533struct spinner_timer {
534	struct timer_list timer;
535	struct igt_spinner spin;
536};
537
538static void spinner_kill(struct timer_list *timer)
539{
540	struct spinner_timer *st = from_timer(st, timer, timer);
541
542	igt_spinner_end(&st->spin);
543	pr_info("%s\n", __func__);
544}
545
546static int live_emit_pte_full_ring(void *arg)
547{
548	struct intel_gt *gt = arg;
549	struct intel_migrate *migrate = &gt->migrate;
550	struct drm_i915_private *i915 = migrate->context->engine->i915;
551	struct drm_i915_gem_object *obj;
552	struct intel_context *ce;
553	struct i915_request *rq, *prev;
554	struct spinner_timer st;
555	struct sgt_dma it;
556	int len, sz, err;
557	u32 *cs;
558
559	/*
560	 * Simple regression test to check that we don't trample the
561	 * rq->reserved_space when returning from emit_pte(), if the ring is
562	 * nearly full.
563	 */
564
565	if (igt_spinner_init(&st.spin, to_gt(i915)))
566		return -ENOMEM;
567
568	obj = i915_gem_object_create_internal(i915, 2 * PAGE_SIZE);
569	if (IS_ERR(obj)) {
570		err = PTR_ERR(obj);
571		goto out_spinner;
572	}
573
574	err = i915_gem_object_pin_pages_unlocked(obj);
575	if (err)
576		goto out_obj;
577
578	ce = intel_migrate_create_context(migrate);
579	if (IS_ERR(ce)) {
580		err = PTR_ERR(ce);
581		goto out_obj;
582	}
583
584	ce->ring_size = SZ_4K; /* Not too big */
585
586	err = intel_context_pin(ce);
587	if (err)
588		goto out_put;
589
590	rq = igt_spinner_create_request(&st.spin, ce, MI_ARB_CHECK);
591	if (IS_ERR(rq)) {
592		err = PTR_ERR(rq);
593		goto out_unpin;
594	}
595
596	i915_request_add(rq);
597	if (!igt_wait_for_spinner(&st.spin, rq)) {
598		err = -EIO;
599		goto out_unpin;
600	}
601
602	/*
603	 * Fill the rest of the ring leaving I915_EMIT_PTE_NUM_DWORDS +
604	 * ring->reserved_space at the end. To actually emit the PTEs we require
605	 * slightly more than I915_EMIT_PTE_NUM_DWORDS, since our object size is
606	 * greater than PAGE_SIZE. The correct behaviour is to wait for more
607	 * ring space in emit_pte(), otherwise we trample on the reserved_space
608	 * resulting in crashes when later submitting the rq.
609	 */
610
611	prev = NULL;
612	do {
613		if (prev)
614			i915_request_add(rq);
615
616		rq = i915_request_create(ce);
617		if (IS_ERR(rq)) {
618			err = PTR_ERR(rq);
619			goto out_unpin;
620		}
621
622		sz = (rq->ring->space - rq->reserved_space) / sizeof(u32) -
623			I915_EMIT_PTE_NUM_DWORDS;
624		sz = min_t(u32, sz, (SZ_1K - rq->reserved_space) / sizeof(u32) -
625			   I915_EMIT_PTE_NUM_DWORDS);
626		cs = intel_ring_begin(rq, sz);
627		if (IS_ERR(cs)) {
628			err = PTR_ERR(cs);
629			goto out_rq;
630		}
631
632		memset32(cs, MI_NOOP, sz);
633		cs += sz;
634		intel_ring_advance(rq, cs);
635
636		pr_info("%s emit=%u sz=%d\n", __func__, rq->ring->emit, sz);
637
638		prev = rq;
639	} while (rq->ring->space > (rq->reserved_space +
640				    I915_EMIT_PTE_NUM_DWORDS * sizeof(u32)));
641
642	timer_setup_on_stack(&st.timer, spinner_kill, 0);
643	mod_timer(&st.timer, jiffies + 2 * HZ);
644
645	/*
646	 * This should wait for the spinner to be killed, otherwise we should go
647	 * down in flames when doing i915_request_add().
648	 */
649	pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space);
650	it = sg_sgt(obj->mm.pages->sgl);
651	len = emit_pte(rq, &it, obj->pat_index, false, 0, CHUNK_SZ);
652	if (!len) {
653		err = -EINVAL;
654		goto out_rq;
655	}
656	if (len < 0) {
657		err = len;
658		goto out_rq;
659	}
660
661out_rq:
662	i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */
663	del_timer_sync(&st.timer);
664	destroy_timer_on_stack(&st.timer);
665out_unpin:
666	intel_context_unpin(ce);
667out_put:
668	intel_context_put(ce);
669out_obj:
670	i915_gem_object_put(obj);
671out_spinner:
672	igt_spinner_fini(&st.spin);
673	return err;
674}
675
676struct threaded_migrate {
677	struct intel_migrate *migrate;
678	struct task_struct *tsk;
679	struct rnd_state prng;
680};
681
682static int threaded_migrate(struct intel_migrate *migrate,
683			    int (*fn)(void *arg),
684			    unsigned int flags)
685{
686	const unsigned int n_cpus = num_online_cpus() + 1;
687	struct threaded_migrate *thread;
688	I915_RND_STATE(prng);
689	unsigned int i;
690	int err = 0;
691
692	thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL);
693	if (!thread)
694		return 0;
695
696	for (i = 0; i < n_cpus; ++i) {
697		struct task_struct *tsk;
698
699		thread[i].migrate = migrate;
700		thread[i].prng =
701			I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
702
703		tsk = kthread_run(fn, &thread[i], "igt-%d", i);
704		if (IS_ERR(tsk)) {
705			err = PTR_ERR(tsk);
706			break;
707		}
708
709		get_task_struct(tsk);
710		thread[i].tsk = tsk;
711	}
712
713	msleep(10 * n_cpus); /* start all threads before we kthread_stop() */
714
715	for (i = 0; i < n_cpus; ++i) {
716		struct task_struct *tsk = thread[i].tsk;
717		int status;
718
719		if (IS_ERR_OR_NULL(tsk))
720			continue;
721
722		status = kthread_stop_put(tsk);
723		if (status && !err)
724			err = status;
725	}
726
727	kfree(thread);
728	return err;
729}
730
731static int __thread_migrate_copy(void *arg)
732{
733	struct threaded_migrate *tm = arg;
734
735	return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
736}
737
738static int thread_migrate_copy(void *arg)
739{
740	struct intel_gt *gt = arg;
741	struct intel_migrate *migrate = &gt->migrate;
742
743	return threaded_migrate(migrate, __thread_migrate_copy, 0);
744}
745
746static int __thread_global_copy(void *arg)
747{
748	struct threaded_migrate *tm = arg;
749
750	return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
751}
752
753static int thread_global_copy(void *arg)
754{
755	struct intel_gt *gt = arg;
756	struct intel_migrate *migrate = &gt->migrate;
757
758	return threaded_migrate(migrate, __thread_global_copy, 0);
759}
760
761static int __thread_migrate_clear(void *arg)
762{
763	struct threaded_migrate *tm = arg;
764
765	return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
766}
767
768static int __thread_global_clear(void *arg)
769{
770	struct threaded_migrate *tm = arg;
771
772	return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
773}
774
775static int thread_migrate_clear(void *arg)
776{
777	struct intel_gt *gt = arg;
778	struct intel_migrate *migrate = &gt->migrate;
779
780	return threaded_migrate(migrate, __thread_migrate_clear, 0);
781}
782
783static int thread_global_clear(void *arg)
784{
785	struct intel_gt *gt = arg;
786	struct intel_migrate *migrate = &gt->migrate;
787
788	return threaded_migrate(migrate, __thread_global_clear, 0);
789}
790
791int intel_migrate_live_selftests(struct drm_i915_private *i915)
792{
793	static const struct i915_subtest tests[] = {
794		SUBTEST(live_migrate_copy),
795		SUBTEST(live_migrate_clear),
796		SUBTEST(live_emit_pte_full_ring),
797		SUBTEST(thread_migrate_copy),
798		SUBTEST(thread_migrate_clear),
799		SUBTEST(thread_global_copy),
800		SUBTEST(thread_global_clear),
801	};
802	struct intel_gt *gt = to_gt(i915);
803
804	if (!gt->migrate.context)
805		return 0;
806
807	return intel_gt_live_subtests(tests, gt);
808}
809
810static struct drm_i915_gem_object *
811create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem)
812{
813	struct drm_i915_gem_object *obj = NULL;
814	int err;
815
816	if (try_lmem)
817		obj = i915_gem_object_create_lmem(gt->i915, sz, 0);
818
819	if (IS_ERR_OR_NULL(obj)) {
820		obj = i915_gem_object_create_internal(gt->i915, sz);
821		if (IS_ERR(obj))
822			return obj;
823	}
824
825	i915_gem_object_trylock(obj, NULL);
826	err = i915_gem_object_pin_pages(obj);
827	if (err) {
828		i915_gem_object_unlock(obj);
829		i915_gem_object_put(obj);
830		return ERR_PTR(err);
831	}
832
833	return obj;
834}
835
836static int wrap_ktime_compare(const void *A, const void *B)
837{
838	const ktime_t *a = A, *b = B;
839
840	return ktime_compare(*a, *b);
841}
842
843static int __perf_clear_blt(struct intel_context *ce,
844			    struct scatterlist *sg,
845			    unsigned int pat_index,
846			    bool is_lmem,
847			    size_t sz)
848{
849	ktime_t t[5];
850	int pass;
851	int err = 0;
852
853	for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
854		struct i915_request *rq;
855		ktime_t t0, t1;
856
857		t0 = ktime_get();
858
859		err = intel_context_migrate_clear(ce, NULL, sg, pat_index,
860						  is_lmem, 0, &rq);
861		if (rq) {
862			if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
863				err = -EIO;
864			i915_request_put(rq);
865		}
866		if (err)
867			break;
868
869		t1 = ktime_get();
870		t[pass] = ktime_sub(t1, t0);
871	}
872	if (err)
873		return err;
874
875	sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
876	pr_info("%s: %zd KiB fill: %lld MiB/s\n",
877		ce->engine->name, sz >> 10,
878		div64_u64(mul_u32_u32(4 * sz,
879				      1000 * 1000 * 1000),
880			  t[1] + 2 * t[2] + t[3]) >> 20);
881	return 0;
882}
883
884static int perf_clear_blt(void *arg)
885{
886	struct intel_gt *gt = arg;
887	static const unsigned long sizes[] = {
888		SZ_4K,
889		SZ_64K,
890		SZ_2M,
891		SZ_64M
892	};
893	int i;
894
895	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
896		struct drm_i915_gem_object *dst;
897		int err;
898
899		dst = create_init_lmem_internal(gt, sizes[i], true);
900		if (IS_ERR(dst))
901			return PTR_ERR(dst);
902
903		err = __perf_clear_blt(gt->migrate.context,
904				       dst->mm.pages->sgl,
905				       i915_gem_get_pat_index(gt->i915,
906							      I915_CACHE_NONE),
907				       i915_gem_object_is_lmem(dst),
908				       sizes[i]);
909
910		i915_gem_object_unlock(dst);
911		i915_gem_object_put(dst);
912		if (err)
913			return err;
914	}
915
916	return 0;
917}
918
919static int __perf_copy_blt(struct intel_context *ce,
920			   struct scatterlist *src,
921			   unsigned int src_pat_index,
922			   bool src_is_lmem,
923			   struct scatterlist *dst,
924			   unsigned int dst_pat_index,
925			   bool dst_is_lmem,
926			   size_t sz)
927{
928	ktime_t t[5];
929	int pass;
930	int err = 0;
931
932	for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
933		struct i915_request *rq;
934		ktime_t t0, t1;
935
936		t0 = ktime_get();
937
938		err = intel_context_migrate_copy(ce, NULL,
939						 src, src_pat_index,
940						 src_is_lmem,
941						 dst, dst_pat_index,
942						 dst_is_lmem,
943						 &rq);
944		if (rq) {
945			if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
946				err = -EIO;
947			i915_request_put(rq);
948		}
949		if (err)
950			break;
951
952		t1 = ktime_get();
953		t[pass] = ktime_sub(t1, t0);
954	}
955	if (err)
956		return err;
957
958	sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
959	pr_info("%s: %zd KiB copy: %lld MiB/s\n",
960		ce->engine->name, sz >> 10,
961		div64_u64(mul_u32_u32(4 * sz,
962				      1000 * 1000 * 1000),
963			  t[1] + 2 * t[2] + t[3]) >> 20);
964	return 0;
965}
966
967static int perf_copy_blt(void *arg)
968{
969	struct intel_gt *gt = arg;
970	static const unsigned long sizes[] = {
971		SZ_4K,
972		SZ_64K,
973		SZ_2M,
974		SZ_64M
975	};
976	int i;
977
978	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
979		struct drm_i915_gem_object *src, *dst;
980		size_t sz;
981		int err;
982
983		src = create_init_lmem_internal(gt, sizes[i], true);
984		if (IS_ERR(src))
985			return PTR_ERR(src);
986
987		sz = src->base.size;
988		dst = create_init_lmem_internal(gt, sz, false);
989		if (IS_ERR(dst)) {
990			err = PTR_ERR(dst);
991			goto err_src;
992		}
993
994		err = __perf_copy_blt(gt->migrate.context,
995				      src->mm.pages->sgl,
996				      i915_gem_get_pat_index(gt->i915,
997							     I915_CACHE_NONE),
998				      i915_gem_object_is_lmem(src),
999				      dst->mm.pages->sgl,
1000				      i915_gem_get_pat_index(gt->i915,
1001							     I915_CACHE_NONE),
1002				      i915_gem_object_is_lmem(dst),
1003				      sz);
1004
1005		i915_gem_object_unlock(dst);
1006		i915_gem_object_put(dst);
1007err_src:
1008		i915_gem_object_unlock(src);
1009		i915_gem_object_put(src);
1010		if (err)
1011			return err;
1012	}
1013
1014	return 0;
1015}
1016
1017int intel_migrate_perf_selftests(struct drm_i915_private *i915)
1018{
1019	static const struct i915_subtest tests[] = {
1020		SUBTEST(perf_clear_blt),
1021		SUBTEST(perf_copy_blt),
1022	};
1023	struct intel_gt *gt = to_gt(i915);
1024
1025	if (intel_gt_is_wedged(gt))
1026		return 0;
1027
1028	if (!gt->migrate.context)
1029		return 0;
1030
1031	return intel_gt_live_subtests(tests, gt);
1032}
1033