1/*
2 * Copyright 2009 VMware, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Michel Dänzer
23 */
24
25#include <sys/cdefs.h>
26__FBSDID("$FreeBSD$");
27
28#include <dev/drm2/drmP.h>
29#include <dev/drm2/radeon/radeon_drm.h>
30#include "radeon_reg.h"
31#include "radeon.h"
32
33#define RADEON_TEST_COPY_BLIT 1
34#define RADEON_TEST_COPY_DMA  0
35
36
37/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
38static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
39{
40	struct radeon_bo *vram_obj = NULL;
41	struct radeon_bo **gtt_obj = NULL;
42	struct radeon_fence *fence = NULL;
43	uint64_t gtt_addr, vram_addr;
44	unsigned i, n, size;
45	int r, ring;
46
47	switch (flag) {
48	case RADEON_TEST_COPY_DMA:
49		ring = radeon_copy_dma_ring_index(rdev);
50		break;
51	case RADEON_TEST_COPY_BLIT:
52		ring = radeon_copy_blit_ring_index(rdev);
53		break;
54	default:
55		DRM_ERROR("Unknown copy method\n");
56		return;
57	}
58
59	size = 1024 * 1024;
60
61	/* Number of tests =
62	 * (Total GTT - IB pool - writeback page - ring buffers) / test size
63	 */
64	n = rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024;
65	for (i = 0; i < RADEON_NUM_RINGS; ++i)
66		n -= rdev->ring[i].ring_size;
67	if (rdev->wb.wb_obj)
68		n -= RADEON_GPU_PAGE_SIZE;
69	if (rdev->ih.ring_obj)
70		n -= rdev->ih.ring_size;
71	n /= size;
72
73	gtt_obj = malloc(n * sizeof(*gtt_obj), DRM_MEM_DRIVER, M_NOWAIT | M_ZERO);
74	if (!gtt_obj) {
75		DRM_ERROR("Failed to allocate %d pointers\n", n);
76		r = 1;
77		goto out_cleanup;
78	}
79
80	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
81			     NULL, &vram_obj);
82	if (r) {
83		DRM_ERROR("Failed to create VRAM object\n");
84		goto out_cleanup;
85	}
86	r = radeon_bo_reserve(vram_obj, false);
87	if (unlikely(r != 0))
88		goto out_cleanup;
89	r = radeon_bo_pin(vram_obj, RADEON_GEM_DOMAIN_VRAM, &vram_addr);
90	if (r) {
91		DRM_ERROR("Failed to pin VRAM object\n");
92		goto out_cleanup;
93	}
94	for (i = 0; i < n; i++) {
95		void *gtt_map, *vram_map;
96		void **gtt_start, **gtt_end;
97		void **vram_start, **vram_end;
98
99		r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
100				     RADEON_GEM_DOMAIN_GTT, NULL, gtt_obj + i);
101		if (r) {
102			DRM_ERROR("Failed to create GTT object %d\n", i);
103			goto out_cleanup;
104		}
105
106		r = radeon_bo_reserve(gtt_obj[i], false);
107		if (unlikely(r != 0))
108			goto out_cleanup;
109		r = radeon_bo_pin(gtt_obj[i], RADEON_GEM_DOMAIN_GTT, &gtt_addr);
110		if (r) {
111			DRM_ERROR("Failed to pin GTT object %d\n", i);
112			goto out_cleanup;
113		}
114
115		r = radeon_bo_kmap(gtt_obj[i], &gtt_map);
116		if (r) {
117			DRM_ERROR("Failed to map GTT object %d\n", i);
118			goto out_cleanup;
119		}
120
121		for (gtt_start = gtt_map, gtt_end = (void *)((uintptr_t)gtt_map + size);
122		     gtt_start < gtt_end;
123		     gtt_start++)
124			*gtt_start = gtt_start;
125
126		radeon_bo_kunmap(gtt_obj[i]);
127
128		if (ring == R600_RING_TYPE_DMA_INDEX)
129			r = radeon_copy_dma(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
130		else
131			r = radeon_copy_blit(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
132		if (r) {
133			DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
134			goto out_cleanup;
135		}
136
137		r = radeon_fence_wait(fence, false);
138		if (r) {
139			DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
140			goto out_cleanup;
141		}
142
143		radeon_fence_unref(&fence);
144
145		r = radeon_bo_kmap(vram_obj, &vram_map);
146		if (r) {
147			DRM_ERROR("Failed to map VRAM object after copy %d\n", i);
148			goto out_cleanup;
149		}
150
151		for (gtt_start = gtt_map, gtt_end = (void *)((uintptr_t)gtt_map + size),
152		     vram_start = vram_map, vram_end = (void *)((uintptr_t)vram_map + size);
153		     vram_start < vram_end;
154		     gtt_start++, vram_start++) {
155			if (*vram_start != gtt_start) {
156				DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, "
157					  "expected 0x%p (GTT/VRAM offset "
158					  "0x%16llx/0x%16llx)\n",
159					  i, *vram_start, gtt_start,
160					  (unsigned long long)
161					  ((uintptr_t)gtt_addr - (uintptr_t)rdev->mc.gtt_start +
162					   (uintptr_t)gtt_start - (uintptr_t)gtt_map),
163					  (unsigned long long)
164					  ((uintptr_t)vram_addr - (uintptr_t)rdev->mc.vram_start +
165					   (uintptr_t)gtt_start - (uintptr_t)gtt_map));
166				radeon_bo_kunmap(vram_obj);
167				goto out_cleanup;
168			}
169			*vram_start = vram_start;
170		}
171
172		radeon_bo_kunmap(vram_obj);
173
174		if (ring == R600_RING_TYPE_DMA_INDEX)
175			r = radeon_copy_dma(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
176		else
177			r = radeon_copy_blit(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
178		if (r) {
179			DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
180			goto out_cleanup;
181		}
182
183		r = radeon_fence_wait(fence, false);
184		if (r) {
185			DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
186			goto out_cleanup;
187		}
188
189		radeon_fence_unref(&fence);
190
191		r = radeon_bo_kmap(gtt_obj[i], &gtt_map);
192		if (r) {
193			DRM_ERROR("Failed to map GTT object after copy %d\n", i);
194			goto out_cleanup;
195		}
196
197		for (gtt_start = gtt_map, gtt_end = (void *)((uintptr_t)gtt_map + size),
198		     vram_start = vram_map, vram_end = (void *)((uintptr_t)vram_map + size);
199		     gtt_start < gtt_end;
200		     gtt_start++, vram_start++) {
201			if (*gtt_start != vram_start) {
202				DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, "
203					  "expected 0x%p (VRAM/GTT offset "
204					  "0x%16llx/0x%16llx)\n",
205					  i, *gtt_start, vram_start,
206					  (unsigned long long)
207					  ((uintptr_t)vram_addr - (uintptr_t)rdev->mc.vram_start +
208					   (uintptr_t)vram_start - (uintptr_t)vram_map),
209					  (unsigned long long)
210					  ((uintptr_t)gtt_addr - (uintptr_t)rdev->mc.gtt_start +
211					   (uintptr_t)vram_start - (uintptr_t)vram_map));
212				radeon_bo_kunmap(gtt_obj[i]);
213				goto out_cleanup;
214			}
215		}
216
217		radeon_bo_kunmap(gtt_obj[i]);
218
219		DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%jx\n",
220			 (uintmax_t)gtt_addr - rdev->mc.gtt_start);
221	}
222
223out_cleanup:
224	if (vram_obj) {
225		if (radeon_bo_is_reserved(vram_obj)) {
226			radeon_bo_unpin(vram_obj);
227			radeon_bo_unreserve(vram_obj);
228		}
229		radeon_bo_unref(&vram_obj);
230	}
231	if (gtt_obj) {
232		for (i = 0; i < n; i++) {
233			if (gtt_obj[i]) {
234				if (radeon_bo_is_reserved(gtt_obj[i])) {
235					radeon_bo_unpin(gtt_obj[i]);
236					radeon_bo_unreserve(gtt_obj[i]);
237				}
238				radeon_bo_unref(&gtt_obj[i]);
239			}
240		}
241		free(gtt_obj, DRM_MEM_DRIVER);
242	}
243	if (fence) {
244		radeon_fence_unref(&fence);
245	}
246	if (r) {
247		DRM_ERROR("Error while testing BO move.\n");
248	}
249}
250
251void radeon_test_moves(struct radeon_device *rdev)
252{
253	if (rdev->asic->copy.dma)
254		radeon_do_test_moves(rdev, RADEON_TEST_COPY_DMA);
255	if (rdev->asic->copy.blit)
256		radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT);
257}
258
259void radeon_test_ring_sync(struct radeon_device *rdev,
260			   struct radeon_ring *ringA,
261			   struct radeon_ring *ringB)
262{
263	struct radeon_fence *fence1 = NULL, *fence2 = NULL;
264	struct radeon_semaphore *semaphore = NULL;
265	int r;
266
267	r = radeon_semaphore_create(rdev, &semaphore);
268	if (r) {
269		DRM_ERROR("Failed to create semaphore\n");
270		goto out_cleanup;
271	}
272
273	r = radeon_ring_lock(rdev, ringA, 64);
274	if (r) {
275		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
276		goto out_cleanup;
277	}
278	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
279	r = radeon_fence_emit(rdev, &fence1, ringA->idx);
280	if (r) {
281		DRM_ERROR("Failed to emit fence 1\n");
282		radeon_ring_unlock_undo(rdev, ringA);
283		goto out_cleanup;
284	}
285	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
286	r = radeon_fence_emit(rdev, &fence2, ringA->idx);
287	if (r) {
288		DRM_ERROR("Failed to emit fence 2\n");
289		radeon_ring_unlock_undo(rdev, ringA);
290		goto out_cleanup;
291	}
292	radeon_ring_unlock_commit(rdev, ringA);
293
294	mdelay(1000);
295
296	if (radeon_fence_signaled(fence1)) {
297		DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n");
298		goto out_cleanup;
299	}
300
301	r = radeon_ring_lock(rdev, ringB, 64);
302	if (r) {
303		DRM_ERROR("Failed to lock ring B %p\n", ringB);
304		goto out_cleanup;
305	}
306	radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore);
307	radeon_ring_unlock_commit(rdev, ringB);
308
309	r = radeon_fence_wait(fence1, false);
310	if (r) {
311		DRM_ERROR("Failed to wait for sync fence 1\n");
312		goto out_cleanup;
313	}
314
315	mdelay(1000);
316
317	if (radeon_fence_signaled(fence2)) {
318		DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n");
319		goto out_cleanup;
320	}
321
322	r = radeon_ring_lock(rdev, ringB, 64);
323	if (r) {
324		DRM_ERROR("Failed to lock ring B %p\n", ringB);
325		goto out_cleanup;
326	}
327	radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore);
328	radeon_ring_unlock_commit(rdev, ringB);
329
330	r = radeon_fence_wait(fence2, false);
331	if (r) {
332		DRM_ERROR("Failed to wait for sync fence 1\n");
333		goto out_cleanup;
334	}
335
336out_cleanup:
337	radeon_semaphore_free(rdev, &semaphore, NULL);
338
339	if (fence1)
340		radeon_fence_unref(&fence1);
341
342	if (fence2)
343		radeon_fence_unref(&fence2);
344
345	if (r)
346		DRM_ERROR("Error while testing ring sync (%d).\n", r);
347}
348
349static void radeon_test_ring_sync2(struct radeon_device *rdev,
350			    struct radeon_ring *ringA,
351			    struct radeon_ring *ringB,
352			    struct radeon_ring *ringC)
353{
354	struct radeon_fence *fenceA = NULL, *fenceB = NULL;
355	struct radeon_semaphore *semaphore = NULL;
356	bool sigA, sigB;
357	int i, r;
358
359	r = radeon_semaphore_create(rdev, &semaphore);
360	if (r) {
361		DRM_ERROR("Failed to create semaphore\n");
362		goto out_cleanup;
363	}
364
365	r = radeon_ring_lock(rdev, ringA, 64);
366	if (r) {
367		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
368		goto out_cleanup;
369	}
370	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
371	r = radeon_fence_emit(rdev, &fenceA, ringA->idx);
372	if (r) {
373		DRM_ERROR("Failed to emit sync fence 1\n");
374		radeon_ring_unlock_undo(rdev, ringA);
375		goto out_cleanup;
376	}
377	radeon_ring_unlock_commit(rdev, ringA);
378
379	r = radeon_ring_lock(rdev, ringB, 64);
380	if (r) {
381		DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
382		goto out_cleanup;
383	}
384	radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore);
385	r = radeon_fence_emit(rdev, &fenceB, ringB->idx);
386	if (r) {
387		DRM_ERROR("Failed to create sync fence 2\n");
388		radeon_ring_unlock_undo(rdev, ringB);
389		goto out_cleanup;
390	}
391	radeon_ring_unlock_commit(rdev, ringB);
392
393	mdelay(1000);
394
395	if (radeon_fence_signaled(fenceA)) {
396		DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
397		goto out_cleanup;
398	}
399	if (radeon_fence_signaled(fenceB)) {
400		DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
401		goto out_cleanup;
402	}
403
404	r = radeon_ring_lock(rdev, ringC, 64);
405	if (r) {
406		DRM_ERROR("Failed to lock ring B %p\n", ringC);
407		goto out_cleanup;
408	}
409	radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore);
410	radeon_ring_unlock_commit(rdev, ringC);
411
412	for (i = 0; i < 30; ++i) {
413		mdelay(100);
414		sigA = radeon_fence_signaled(fenceA);
415		sigB = radeon_fence_signaled(fenceB);
416		if (sigA || sigB)
417			break;
418	}
419
420	if (!sigA && !sigB) {
421		DRM_ERROR("Neither fence A nor B has been signaled\n");
422		goto out_cleanup;
423	} else if (sigA && sigB) {
424		DRM_ERROR("Both fence A and B has been signaled\n");
425		goto out_cleanup;
426	}
427
428	DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B');
429
430	r = radeon_ring_lock(rdev, ringC, 64);
431	if (r) {
432		DRM_ERROR("Failed to lock ring B %p\n", ringC);
433		goto out_cleanup;
434	}
435	radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore);
436	radeon_ring_unlock_commit(rdev, ringC);
437
438	mdelay(1000);
439
440	r = radeon_fence_wait(fenceA, false);
441	if (r) {
442		DRM_ERROR("Failed to wait for sync fence A\n");
443		goto out_cleanup;
444	}
445	r = radeon_fence_wait(fenceB, false);
446	if (r) {
447		DRM_ERROR("Failed to wait for sync fence B\n");
448		goto out_cleanup;
449	}
450
451out_cleanup:
452	radeon_semaphore_free(rdev, &semaphore, NULL);
453
454	if (fenceA)
455		radeon_fence_unref(&fenceA);
456
457	if (fenceB)
458		radeon_fence_unref(&fenceB);
459
460	if (r)
461		DRM_ERROR("Error while testing ring sync (%d).\n", r);
462}
463
464void radeon_test_syncing(struct radeon_device *rdev)
465{
466	int i, j, k;
467
468	for (i = 1; i < RADEON_NUM_RINGS; ++i) {
469		struct radeon_ring *ringA = &rdev->ring[i];
470		if (!ringA->ready)
471			continue;
472
473		for (j = 0; j < i; ++j) {
474			struct radeon_ring *ringB = &rdev->ring[j];
475			if (!ringB->ready)
476				continue;
477
478			DRM_INFO("Testing syncing between rings %d and %d...\n", i, j);
479			radeon_test_ring_sync(rdev, ringA, ringB);
480
481			DRM_INFO("Testing syncing between rings %d and %d...\n", j, i);
482			radeon_test_ring_sync(rdev, ringB, ringA);
483
484			for (k = 0; k < j; ++k) {
485				struct radeon_ring *ringC = &rdev->ring[k];
486				if (!ringC->ready)
487					continue;
488
489				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k);
490				radeon_test_ring_sync2(rdev, ringA, ringB, ringC);
491
492				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, k, j);
493				radeon_test_ring_sync2(rdev, ringA, ringC, ringB);
494
495				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, i, k);
496				radeon_test_ring_sync2(rdev, ringB, ringA, ringC);
497
498				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, k, i);
499				radeon_test_ring_sync2(rdev, ringB, ringC, ringA);
500
501				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, i, j);
502				radeon_test_ring_sync2(rdev, ringC, ringA, ringB);
503
504				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, j, i);
505				radeon_test_ring_sync2(rdev, ringC, ringB, ringA);
506			}
507		}
508	}
509}
510