1// SPDX-License-Identifier: GPL-2.0 AND MIT
2/*
3 * Copyright �� 2022 Intel Corporation
4 */
5
6#include <kunit/test.h>
7#include <kunit/visibility.h>
8
9#include "tests/xe_bo_test.h"
10#include "tests/xe_pci_test.h"
11#include "tests/xe_test.h"
12
13#include "xe_bo_evict.h"
14#include "xe_pci.h"
15#include "xe_pm.h"
16
17static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
18			    bool clear, u64 get_val, u64 assign_val,
19			    struct kunit *test)
20{
21	struct dma_fence *fence;
22	struct ttm_tt *ttm;
23	struct page *page;
24	pgoff_t ccs_page;
25	long timeout;
26	u64 *cpu_map;
27	int ret;
28	u32 offset;
29
30	/* Move bo to VRAM if not already there. */
31	ret = xe_bo_validate(bo, NULL, false);
32	if (ret) {
33		KUNIT_FAIL(test, "Failed to validate bo.\n");
34		return ret;
35	}
36
37	/* Optionally clear bo *and* CCS data in VRAM. */
38	if (clear) {
39		fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource);
40		if (IS_ERR(fence)) {
41			KUNIT_FAIL(test, "Failed to submit bo clear.\n");
42			return PTR_ERR(fence);
43		}
44		dma_fence_put(fence);
45	}
46
47	/* Evict to system. CCS data should be copied. */
48	ret = xe_bo_evict(bo, true);
49	if (ret) {
50		KUNIT_FAIL(test, "Failed to evict bo.\n");
51		return ret;
52	}
53
54	/* Sync all migration blits */
55	timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
56					DMA_RESV_USAGE_KERNEL,
57					true,
58					5 * HZ);
59	if (timeout <= 0) {
60		KUNIT_FAIL(test, "Failed to sync bo eviction.\n");
61		return -ETIME;
62	}
63
64	/*
65	 * Bo with CCS data is now in system memory. Verify backing store
66	 * and data integrity. Then assign for the next testing round while
67	 * we still have a CPU map.
68	 */
69	ttm = bo->ttm.ttm;
70	if (!ttm || !ttm_tt_is_populated(ttm)) {
71		KUNIT_FAIL(test, "Bo was not in expected placement.\n");
72		return -EINVAL;
73	}
74
75	ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT;
76	if (ccs_page >= ttm->num_pages) {
77		KUNIT_FAIL(test, "No TTM CCS pages present.\n");
78		return -EINVAL;
79	}
80
81	page = ttm->pages[ccs_page];
82	cpu_map = kmap_local_page(page);
83
84	/* Check first CCS value */
85	if (cpu_map[0] != get_val) {
86		KUNIT_FAIL(test,
87			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
88			   (unsigned long long)get_val,
89			   (unsigned long long)cpu_map[0]);
90		ret = -EINVAL;
91	}
92
93	/* Check last CCS value, or at least last value in page. */
94	offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size);
95	offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
96	if (cpu_map[offset] != get_val) {
97		KUNIT_FAIL(test,
98			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
99			   (unsigned long long)get_val,
100			   (unsigned long long)cpu_map[offset]);
101		ret = -EINVAL;
102	}
103
104	cpu_map[0] = assign_val;
105	cpu_map[offset] = assign_val;
106	kunmap_local(cpu_map);
107
108	return ret;
109}
110
111static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
112			      struct kunit *test)
113{
114	struct xe_bo *bo;
115
116	int ret;
117
118	/* TODO: Sanity check */
119	unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
120
121	if (IS_DGFX(xe))
122		kunit_info(test, "Testing vram id %u\n", tile->id);
123	else
124		kunit_info(test, "Testing system memory\n");
125
126	bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
127			       ttm_bo_type_device, bo_flags);
128	if (IS_ERR(bo)) {
129		KUNIT_FAIL(test, "Failed to create bo.\n");
130		return;
131	}
132
133	xe_bo_lock(bo, false);
134
135	kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
136	ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
137			       test);
138	if (ret)
139		goto out_unlock;
140
141	kunit_info(test, "Verifying that CCS data survives migration.\n");
142	ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL,
143			       0xdeadbeefdeadbeefULL, test);
144	if (ret)
145		goto out_unlock;
146
147	kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
148	ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test);
149
150out_unlock:
151	xe_bo_unlock(bo);
152	xe_bo_put(bo);
153}
154
155static int ccs_test_run_device(struct xe_device *xe)
156{
157	struct kunit *test = xe_cur_kunit();
158	struct xe_tile *tile;
159	int id;
160
161	if (!xe_device_has_flat_ccs(xe)) {
162		kunit_info(test, "Skipping non-flat-ccs device.\n");
163		return 0;
164	}
165
166	xe_device_mem_access_get(xe);
167
168	for_each_tile(tile, xe, id) {
169		/* For igfx run only for primary tile */
170		if (!IS_DGFX(xe) && id > 0)
171			continue;
172		ccs_test_run_tile(xe, tile, test);
173	}
174
175	xe_device_mem_access_put(xe);
176
177	return 0;
178}
179
180void xe_ccs_migrate_kunit(struct kunit *test)
181{
182	xe_call_for_each_device(ccs_test_run_device);
183}
184EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit);
185
186static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
187{
188	struct xe_bo *bo, *external;
189	unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
190	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
191	struct xe_gt *__gt;
192	int err, i, id;
193
194	kunit_info(test, "Testing device %s vram id %u\n",
195		   dev_name(xe->drm.dev), tile->id);
196
197	for (i = 0; i < 2; ++i) {
198		xe_vm_lock(vm, false);
199		bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
200				       DRM_XE_GEM_CPU_CACHING_WC,
201				       ttm_bo_type_device,
202				       bo_flags);
203		xe_vm_unlock(vm);
204		if (IS_ERR(bo)) {
205			KUNIT_FAIL(test, "bo create err=%pe\n", bo);
206			break;
207		}
208
209		external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
210					     DRM_XE_GEM_CPU_CACHING_WC,
211					     ttm_bo_type_device, bo_flags);
212		if (IS_ERR(external)) {
213			KUNIT_FAIL(test, "external bo create err=%pe\n", external);
214			goto cleanup_bo;
215		}
216
217		xe_bo_lock(external, false);
218		err = xe_bo_pin_external(external);
219		xe_bo_unlock(external);
220		if (err) {
221			KUNIT_FAIL(test, "external bo pin err=%pe\n",
222				   ERR_PTR(err));
223			goto cleanup_external;
224		}
225
226		err = xe_bo_evict_all(xe);
227		if (err) {
228			KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err));
229			goto cleanup_all;
230		}
231
232		for_each_gt(__gt, xe, id)
233			xe_gt_sanitize(__gt);
234		err = xe_bo_restore_kernel(xe);
235		/*
236		 * Snapshotting the CTB and copying back a potentially old
237		 * version seems risky, depending on what might have been
238		 * inflight. Also it seems snapshotting the ADS object and
239		 * copying back results in serious breakage. Normally when
240		 * calling xe_bo_restore_kernel() we always fully restart the
241		 * GT, which re-intializes such things.  We could potentially
242		 * skip saving and restoring such objects in xe_bo_evict_all()
243		 * however seems quite fragile not to also restart the GT. Try
244		 * to do that here by triggering a GT reset.
245		 */
246		for_each_gt(__gt, xe, id) {
247			xe_gt_reset_async(__gt);
248			flush_work(&__gt->reset.worker);
249		}
250		if (err) {
251			KUNIT_FAIL(test, "restore kernel err=%pe\n",
252				   ERR_PTR(err));
253			goto cleanup_all;
254		}
255
256		err = xe_bo_restore_user(xe);
257		if (err) {
258			KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
259			goto cleanup_all;
260		}
261
262		if (!xe_bo_is_vram(external)) {
263			KUNIT_FAIL(test, "external bo is not vram\n");
264			err = -EPROTO;
265			goto cleanup_all;
266		}
267
268		if (xe_bo_is_vram(bo)) {
269			KUNIT_FAIL(test, "bo is vram\n");
270			err = -EPROTO;
271			goto cleanup_all;
272		}
273
274		if (i) {
275			down_read(&vm->lock);
276			xe_vm_lock(vm, false);
277			err = xe_bo_validate(bo, bo->vm, false);
278			xe_vm_unlock(vm);
279			up_read(&vm->lock);
280			if (err) {
281				KUNIT_FAIL(test, "bo valid err=%pe\n",
282					   ERR_PTR(err));
283				goto cleanup_all;
284			}
285			xe_bo_lock(external, false);
286			err = xe_bo_validate(external, NULL, false);
287			xe_bo_unlock(external);
288			if (err) {
289				KUNIT_FAIL(test, "external bo valid err=%pe\n",
290					   ERR_PTR(err));
291				goto cleanup_all;
292			}
293		}
294
295		xe_bo_lock(external, false);
296		xe_bo_unpin_external(external);
297		xe_bo_unlock(external);
298
299		xe_bo_put(external);
300
301		xe_bo_lock(bo, false);
302		__xe_bo_unset_bulk_move(bo);
303		xe_bo_unlock(bo);
304		xe_bo_put(bo);
305		continue;
306
307cleanup_all:
308		xe_bo_lock(external, false);
309		xe_bo_unpin_external(external);
310		xe_bo_unlock(external);
311cleanup_external:
312		xe_bo_put(external);
313cleanup_bo:
314		xe_bo_lock(bo, false);
315		__xe_bo_unset_bulk_move(bo);
316		xe_bo_unlock(bo);
317		xe_bo_put(bo);
318		break;
319	}
320
321	xe_vm_put(vm);
322
323	return 0;
324}
325
326static int evict_test_run_device(struct xe_device *xe)
327{
328	struct kunit *test = xe_cur_kunit();
329	struct xe_tile *tile;
330	int id;
331
332	if (!IS_DGFX(xe)) {
333		kunit_info(test, "Skipping non-discrete device %s.\n",
334			   dev_name(xe->drm.dev));
335		return 0;
336	}
337
338	xe_device_mem_access_get(xe);
339
340	for_each_tile(tile, xe, id)
341		evict_test_run_tile(xe, tile, test);
342
343	xe_device_mem_access_put(xe);
344
345	return 0;
346}
347
348void xe_bo_evict_kunit(struct kunit *test)
349{
350	xe_call_for_each_device(evict_test_run_device);
351}
352EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit);
353