i915_gem_execbuffer.c revision 255013
1/*
2 * Copyright �� 2008,2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *    Chris Wilson <chris@chris-wilson.co.uk>
26 *
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/drm2/i915/i915_gem_execbuffer.c 255013 2013-08-28 23:59:38Z jkim $");
31
32#include <dev/drm2/drmP.h>
33#include <dev/drm2/drm.h>
34#include <dev/drm2/i915/i915_drm.h>
35#include <dev/drm2/i915/i915_drv.h>
36#include <dev/drm2/i915/intel_drv.h>
37#include <sys/limits.h>
38#include <sys/sf_buf.h>
39
40struct change_domains {
41	uint32_t invalidate_domains;
42	uint32_t flush_domains;
43	uint32_t flush_rings;
44	uint32_t flips;
45};
46
47/*
48 * Set the next domain for the specified object. This
49 * may not actually perform the necessary flushing/invaliding though,
50 * as that may want to be batched with other set_domain operations
51 *
52 * This is (we hope) the only really tricky part of gem. The goal
53 * is fairly simple -- track which caches hold bits of the object
54 * and make sure they remain coherent. A few concrete examples may
55 * help to explain how it works. For shorthand, we use the notation
56 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
57 * a pair of read and write domain masks.
58 *
59 * Case 1: the batch buffer
60 *
61 *	1. Allocated
62 *	2. Written by CPU
63 *	3. Mapped to GTT
64 *	4. Read by GPU
65 *	5. Unmapped from GTT
66 *	6. Freed
67 *
68 *	Let's take these a step at a time
69 *
70 *	1. Allocated
71 *		Pages allocated from the kernel may still have
72 *		cache contents, so we set them to (CPU, CPU) always.
73 *	2. Written by CPU (using pwrite)
74 *		The pwrite function calls set_domain (CPU, CPU) and
75 *		this function does nothing (as nothing changes)
76 *	3. Mapped by GTT
77 *		This function asserts that the object is not
78 *		currently in any GPU-based read or write domains
79 *	4. Read by GPU
80 *		i915_gem_execbuffer calls set_domain (COMMAND, 0).
81 *		As write_domain is zero, this function adds in the
82 *		current read domains (CPU+COMMAND, 0).
83 *		flush_domains is set to CPU.
84 *		invalidate_domains is set to COMMAND
85 *		clflush is run to get data out of the CPU caches
86 *		then i915_dev_set_domain calls i915_gem_flush to
87 *		emit an MI_FLUSH and drm_agp_chipset_flush
88 *	5. Unmapped from GTT
89 *		i915_gem_object_unbind calls set_domain (CPU, CPU)
90 *		flush_domains and invalidate_domains end up both zero
91 *		so no flushing/invalidating happens
92 *	6. Freed
93 *		yay, done
94 *
95 * Case 2: The shared render buffer
96 *
97 *	1. Allocated
98 *	2. Mapped to GTT
99 *	3. Read/written by GPU
100 *	4. set_domain to (CPU,CPU)
101 *	5. Read/written by CPU
102 *	6. Read/written by GPU
103 *
104 *	1. Allocated
105 *		Same as last example, (CPU, CPU)
106 *	2. Mapped to GTT
107 *		Nothing changes (assertions find that it is not in the GPU)
108 *	3. Read/written by GPU
109 *		execbuffer calls set_domain (RENDER, RENDER)
110 *		flush_domains gets CPU
111 *		invalidate_domains gets GPU
112 *		clflush (obj)
113 *		MI_FLUSH and drm_agp_chipset_flush
114 *	4. set_domain (CPU, CPU)
115 *		flush_domains gets GPU
116 *		invalidate_domains gets CPU
117 *		wait_rendering (obj) to make sure all drawing is complete.
118 *		This will include an MI_FLUSH to get the data from GPU
119 *		to memory
120 *		clflush (obj) to invalidate the CPU cache
121 *		Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
122 *	5. Read/written by CPU
123 *		cache lines are loaded and dirtied
124 *	6. Read written by GPU
125 *		Same as last GPU access
126 *
127 * Case 3: The constant buffer
128 *
129 *	1. Allocated
130 *	2. Written by CPU
131 *	3. Read by GPU
132 *	4. Updated (written) by CPU again
133 *	5. Read by GPU
134 *
135 *	1. Allocated
136 *		(CPU, CPU)
137 *	2. Written by CPU
138 *		(CPU, CPU)
139 *	3. Read by GPU
140 *		(CPU+RENDER, 0)
141 *		flush_domains = CPU
142 *		invalidate_domains = RENDER
143 *		clflush (obj)
144 *		MI_FLUSH
145 *		drm_agp_chipset_flush
146 *	4. Updated (written) by CPU again
147 *		(CPU, CPU)
148 *		flush_domains = 0 (no previous write domain)
149 *		invalidate_domains = 0 (no new read domains)
150 *	5. Read by GPU
151 *		(CPU+RENDER, 0)
152 *		flush_domains = CPU
153 *		invalidate_domains = RENDER
154 *		clflush (obj)
155 *		MI_FLUSH
156 *		drm_agp_chipset_flush
157 */
158static void
159i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
160				  struct intel_ring_buffer *ring,
161				  struct change_domains *cd)
162{
163	uint32_t invalidate_domains = 0, flush_domains = 0;
164
165	/*
166	 * If the object isn't moving to a new write domain,
167	 * let the object stay in multiple read domains
168	 */
169	if (obj->base.pending_write_domain == 0)
170		obj->base.pending_read_domains |= obj->base.read_domains;
171
172	/*
173	 * Flush the current write domain if
174	 * the new read domains don't match. Invalidate
175	 * any read domains which differ from the old
176	 * write domain
177	 */
178	if (obj->base.write_domain &&
179	    (((obj->base.write_domain != obj->base.pending_read_domains ||
180	       obj->ring != ring)) ||
181	     (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
182		flush_domains |= obj->base.write_domain;
183		invalidate_domains |=
184			obj->base.pending_read_domains & ~obj->base.write_domain;
185	}
186	/*
187	 * Invalidate any read caches which may have
188	 * stale data. That is, any new read domains.
189	 */
190	invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
191	if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
192		i915_gem_clflush_object(obj);
193
194	if (obj->base.pending_write_domain)
195		cd->flips |= atomic_load_acq_int(&obj->pending_flip);
196
197	/* The actual obj->write_domain will be updated with
198	 * pending_write_domain after we emit the accumulated flush for all
199	 * of our domain changes in execbuffers (which clears objects'
200	 * write_domains).  So if we have a current write domain that we
201	 * aren't changing, set pending_write_domain to that.
202	 */
203	if (flush_domains == 0 && obj->base.pending_write_domain == 0)
204		obj->base.pending_write_domain = obj->base.write_domain;
205
206	cd->invalidate_domains |= invalidate_domains;
207	cd->flush_domains |= flush_domains;
208	if (flush_domains & I915_GEM_GPU_DOMAINS)
209		cd->flush_rings |= intel_ring_flag(obj->ring);
210	if (invalidate_domains & I915_GEM_GPU_DOMAINS)
211		cd->flush_rings |= intel_ring_flag(ring);
212}
213
214struct eb_objects {
215	u_long hashmask;
216	LIST_HEAD(, drm_i915_gem_object) *buckets;
217};
218
219static struct eb_objects *
220eb_create(int size)
221{
222	struct eb_objects *eb;
223
224	eb = malloc(sizeof(*eb), DRM_I915_GEM, M_WAITOK | M_ZERO);
225	eb->buckets = hashinit(size, DRM_I915_GEM, &eb->hashmask);
226	return (eb);
227}
228
229static void
230eb_reset(struct eb_objects *eb)
231{
232	int i;
233
234	for (i = 0; i <= eb->hashmask; i++)
235		LIST_INIT(&eb->buckets[i]);
236}
237
238static void
239eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
240{
241
242	LIST_INSERT_HEAD(&eb->buckets[obj->exec_handle & eb->hashmask],
243	    obj, exec_node);
244}
245
246static struct drm_i915_gem_object *
247eb_get_object(struct eb_objects *eb, unsigned long handle)
248{
249	struct drm_i915_gem_object *obj;
250
251	LIST_FOREACH(obj, &eb->buckets[handle & eb->hashmask], exec_node) {
252		if (obj->exec_handle == handle)
253			return (obj);
254	}
255	return (NULL);
256}
257
258static void
259eb_destroy(struct eb_objects *eb)
260{
261
262	free(eb->buckets, DRM_I915_GEM);
263	free(eb, DRM_I915_GEM);
264}
265
266static int
267i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
268				   struct eb_objects *eb,
269				   struct drm_i915_gem_relocation_entry *reloc)
270{
271	struct drm_device *dev = obj->base.dev;
272	struct drm_gem_object *target_obj;
273	uint32_t target_offset;
274	int ret = -EINVAL;
275
276	/* we've already hold a reference to all valid objects */
277	target_obj = &eb_get_object(eb, reloc->target_handle)->base;
278	if (unlikely(target_obj == NULL))
279		return -ENOENT;
280
281	target_offset = to_intel_bo(target_obj)->gtt_offset;
282
283#if WATCH_RELOC
284	DRM_INFO("%s: obj %p offset %08x target %d "
285		 "read %08x write %08x gtt %08x "
286		 "presumed %08x delta %08x\n",
287		 __func__,
288		 obj,
289		 (int) reloc->offset,
290		 (int) reloc->target_handle,
291		 (int) reloc->read_domains,
292		 (int) reloc->write_domain,
293		 (int) target_offset,
294		 (int) reloc->presumed_offset,
295		 reloc->delta);
296#endif
297
298	/* The target buffer should have appeared before us in the
299	 * exec_object list, so it should have a GTT space bound by now.
300	 */
301	if (unlikely(target_offset == 0)) {
302		DRM_DEBUG("No GTT space found for object %d\n",
303			  reloc->target_handle);
304		return ret;
305	}
306
307	/* Validate that the target is in a valid r/w GPU domain */
308	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
309		DRM_DEBUG("reloc with multiple write domains: "
310			  "obj %p target %d offset %d "
311			  "read %08x write %08x",
312			  obj, reloc->target_handle,
313			  (int) reloc->offset,
314			  reloc->read_domains,
315			  reloc->write_domain);
316		return ret;
317	}
318	if (unlikely((reloc->write_domain | reloc->read_domains)
319		     & ~I915_GEM_GPU_DOMAINS)) {
320		DRM_DEBUG("reloc with read/write non-GPU domains: "
321			  "obj %p target %d offset %d "
322			  "read %08x write %08x",
323			  obj, reloc->target_handle,
324			  (int) reloc->offset,
325			  reloc->read_domains,
326			  reloc->write_domain);
327		return ret;
328	}
329	if (unlikely(reloc->write_domain && target_obj->pending_write_domain &&
330		     reloc->write_domain != target_obj->pending_write_domain)) {
331		DRM_DEBUG("Write domain conflict: "
332			  "obj %p target %d offset %d "
333			  "new %08x old %08x\n",
334			  obj, reloc->target_handle,
335			  (int) reloc->offset,
336			  reloc->write_domain,
337			  target_obj->pending_write_domain);
338		return ret;
339	}
340
341	target_obj->pending_read_domains |= reloc->read_domains;
342	target_obj->pending_write_domain |= reloc->write_domain;
343
344	/* If the relocation already has the right value in it, no
345	 * more work needs to be done.
346	 */
347	if (target_offset == reloc->presumed_offset)
348		return 0;
349
350	/* Check that the relocation address is valid... */
351	if (unlikely(reloc->offset > obj->base.size - 4)) {
352		DRM_DEBUG("Relocation beyond object bounds: "
353			  "obj %p target %d offset %d size %d.\n",
354			  obj, reloc->target_handle,
355			  (int) reloc->offset,
356			  (int) obj->base.size);
357		return ret;
358	}
359	if (unlikely(reloc->offset & 3)) {
360		DRM_DEBUG("Relocation not 4-byte aligned: "
361			  "obj %p target %d offset %d.\n",
362			  obj, reloc->target_handle,
363			  (int) reloc->offset);
364		return ret;
365	}
366
367	reloc->delta += target_offset;
368	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
369		uint32_t page_offset = reloc->offset & PAGE_MASK;
370		char *vaddr;
371		struct sf_buf *sf;
372
373		sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)],
374		    SFB_NOWAIT);
375		if (sf == NULL)
376			return (-ENOMEM);
377		vaddr = (void *)sf_buf_kva(sf);
378		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
379		sf_buf_free(sf);
380	} else {
381		uint32_t *reloc_entry;
382		char *reloc_page;
383
384		/* We can't wait for rendering with pagefaults disabled */
385		if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0)
386			return (-EFAULT);
387		ret = i915_gem_object_set_to_gtt_domain(obj, 1);
388		if (ret)
389			return ret;
390
391		/*
392		 * Map the page containing the relocation we're going
393		 * to perform.
394		 */
395		reloc->offset += obj->gtt_offset;
396		reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset &
397		    ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING);
398		reloc_entry = (uint32_t *)(reloc_page + (reloc->offset &
399		    PAGE_MASK));
400		*(volatile uint32_t *)reloc_entry = reloc->delta;
401		pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE);
402	}
403
404	/* and update the user's relocation entry */
405	reloc->presumed_offset = target_offset;
406
407	return 0;
408}
409
410static int
411i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
412    struct eb_objects *eb)
413{
414	struct drm_i915_gem_relocation_entry *user_relocs;
415	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
416	struct drm_i915_gem_relocation_entry reloc;
417	int i, ret;
418
419	user_relocs = (void *)(uintptr_t)entry->relocs_ptr;
420	for (i = 0; i < entry->relocation_count; i++) {
421		ret = -copyin_nofault(user_relocs + i, &reloc, sizeof(reloc));
422		if (ret != 0)
423			return (ret);
424
425		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc);
426		if (ret != 0)
427			return (ret);
428
429		ret = -copyout_nofault(&reloc.presumed_offset,
430		    &user_relocs[i].presumed_offset,
431		    sizeof(reloc.presumed_offset));
432		if (ret != 0)
433			return (ret);
434	}
435
436	return (0);
437}
438
439static int
440i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
441    struct eb_objects *eb, struct drm_i915_gem_relocation_entry *relocs)
442{
443	const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
444	int i, ret;
445
446	for (i = 0; i < entry->relocation_count; i++) {
447		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
448		if (ret)
449			return ret;
450	}
451
452	return 0;
453}
454
455static int
456i915_gem_execbuffer_relocate(struct drm_device *dev,
457			     struct eb_objects *eb,
458			     struct list_head *objects)
459{
460	struct drm_i915_gem_object *obj;
461	int ret, pflags;
462
463	/* Try to move as many of the relocation targets off the active list
464	 * to avoid unnecessary fallbacks to the slow path, as we cannot wait
465	 * for the retirement with pagefaults disabled.
466	 */
467	i915_gem_retire_requests(dev);
468
469	ret = 0;
470	pflags = vm_fault_disable_pagefaults();
471	/* This is the fast path and we cannot handle a pagefault whilst
472	 * holding the device lock lest the user pass in the relocations
473	 * contained within a mmaped bo. For in such a case we, the page
474	 * fault handler would call i915_gem_fault() and we would try to
475	 * acquire the device lock again. Obviously this is bad.
476	 */
477
478	list_for_each_entry(obj, objects, exec_list) {
479		ret = i915_gem_execbuffer_relocate_object(obj, eb);
480		if (ret != 0)
481			break;
482	}
483	vm_fault_enable_pagefaults(pflags);
484	return (ret);
485}
486
487#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
488
489static int
490pin_and_fence_object(struct drm_i915_gem_object *obj,
491		     struct intel_ring_buffer *ring)
492{
493	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
494	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
495	bool need_fence, need_mappable;
496	int ret;
497
498	need_fence =
499		has_fenced_gpu_access &&
500		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
501		obj->tiling_mode != I915_TILING_NONE;
502	need_mappable =
503		entry->relocation_count ? true : need_fence;
504
505	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
506	if (ret)
507		return ret;
508
509	if (has_fenced_gpu_access) {
510		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
511			if (obj->tiling_mode) {
512				ret = i915_gem_object_get_fence(obj, ring);
513				if (ret)
514					goto err_unpin;
515
516				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
517				i915_gem_object_pin_fence(obj);
518			} else {
519				ret = i915_gem_object_put_fence(obj);
520				if (ret)
521					goto err_unpin;
522			}
523			obj->pending_fenced_gpu_access = true;
524		}
525	}
526
527	entry->offset = obj->gtt_offset;
528	return 0;
529
530err_unpin:
531	i915_gem_object_unpin(obj);
532	return ret;
533}
534
535static int
536i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
537			    struct drm_file *file,
538			    struct list_head *objects)
539{
540	drm_i915_private_t *dev_priv;
541	struct drm_i915_gem_object *obj;
542	int ret, retry;
543	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
544	struct list_head ordered_objects;
545
546	dev_priv = ring->dev->dev_private;
547	INIT_LIST_HEAD(&ordered_objects);
548	while (!list_empty(objects)) {
549		struct drm_i915_gem_exec_object2 *entry;
550		bool need_fence, need_mappable;
551
552		obj = list_first_entry(objects,
553				       struct drm_i915_gem_object,
554				       exec_list);
555		entry = obj->exec_entry;
556
557		need_fence =
558			has_fenced_gpu_access &&
559			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
560			obj->tiling_mode != I915_TILING_NONE;
561		need_mappable =
562			entry->relocation_count ? true : need_fence;
563
564		if (need_mappable)
565			list_move(&obj->exec_list, &ordered_objects);
566		else
567			list_move_tail(&obj->exec_list, &ordered_objects);
568
569		obj->base.pending_read_domains = 0;
570		obj->base.pending_write_domain = 0;
571	}
572	list_splice(&ordered_objects, objects);
573
574	/* Attempt to pin all of the buffers into the GTT.
575	 * This is done in 3 phases:
576	 *
577	 * 1a. Unbind all objects that do not match the GTT constraints for
578	 *     the execbuffer (fenceable, mappable, alignment etc).
579	 * 1b. Increment pin count for already bound objects and obtain
580	 *     a fence register if required.
581	 * 2.  Bind new objects.
582	 * 3.  Decrement pin count.
583	 *
584	 * This avoid unnecessary unbinding of later objects in order to makr
585	 * room for the earlier objects *unless* we need to defragment.
586	 */
587	retry = 0;
588	do {
589		ret = 0;
590
591		/* Unbind any ill-fitting objects or pin. */
592		list_for_each_entry(obj, objects, exec_list) {
593			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
594			bool need_fence, need_mappable;
595
596			if (!obj->gtt_space)
597				continue;
598
599			need_fence =
600				has_fenced_gpu_access &&
601				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
602				obj->tiling_mode != I915_TILING_NONE;
603			need_mappable =
604				entry->relocation_count ? true : need_fence;
605
606			if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
607			    (need_mappable && !obj->map_and_fenceable))
608				ret = i915_gem_object_unbind(obj);
609			else
610				ret = pin_and_fence_object(obj, ring);
611			if (ret)
612				goto err;
613		}
614
615		/* Bind fresh objects */
616		list_for_each_entry(obj, objects, exec_list) {
617			if (obj->gtt_space)
618				continue;
619
620			ret = pin_and_fence_object(obj, ring);
621			if (ret) {
622				int ret_ignore;
623
624				/* This can potentially raise a harmless
625				 * -EINVAL if we failed to bind in the above
626				 * call. It cannot raise -EINTR since we know
627				 * that the bo is freshly bound and so will
628				 * not need to be flushed or waited upon.
629				 */
630				ret_ignore = i915_gem_object_unbind(obj);
631				(void)ret_ignore;
632				if (obj->gtt_space != NULL)
633					printf("%s: gtt_space\n", __func__);
634				break;
635			}
636		}
637
638		/* Decrement pin count for bound objects */
639		list_for_each_entry(obj, objects, exec_list) {
640			struct drm_i915_gem_exec_object2 *entry;
641
642			if (!obj->gtt_space)
643				continue;
644
645			entry = obj->exec_entry;
646			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
647				i915_gem_object_unpin_fence(obj);
648				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
649			}
650
651			i915_gem_object_unpin(obj);
652
653			/* ... and ensure ppgtt mapping exist if needed. */
654			if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
655				i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
656						       obj, obj->cache_level);
657
658				obj->has_aliasing_ppgtt_mapping = 1;
659			}
660		}
661
662		if (ret != -ENOSPC || retry > 1)
663			return ret;
664
665		/* First attempt, just clear anything that is purgeable.
666		 * Second attempt, clear the entire GTT.
667		 */
668		ret = i915_gem_evict_everything(ring->dev, retry == 0);
669		if (ret)
670			return ret;
671
672		retry++;
673	} while (1);
674
675err:
676	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
677		struct drm_i915_gem_exec_object2 *entry;
678
679		if (!obj->gtt_space)
680			continue;
681
682		entry = obj->exec_entry;
683		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
684			i915_gem_object_unpin_fence(obj);
685			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
686		}
687
688		i915_gem_object_unpin(obj);
689	}
690
691	return ret;
692}
693
694static int
695i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
696    struct drm_file *file, struct intel_ring_buffer *ring,
697    struct list_head *objects, struct eb_objects *eb,
698    struct drm_i915_gem_exec_object2 *exec, int count)
699{
700	struct drm_i915_gem_relocation_entry *reloc;
701	struct drm_i915_gem_object *obj;
702	int *reloc_offset;
703	int i, total, ret;
704
705	/* We may process another execbuffer during the unlock... */
706	while (!list_empty(objects)) {
707		obj = list_first_entry(objects,
708				       struct drm_i915_gem_object,
709				       exec_list);
710		list_del_init(&obj->exec_list);
711		drm_gem_object_unreference(&obj->base);
712	}
713
714	DRM_UNLOCK(dev);
715
716	total = 0;
717	for (i = 0; i < count; i++)
718		total += exec[i].relocation_count;
719
720	reloc_offset = malloc(count * sizeof(*reloc_offset), DRM_I915_GEM,
721	    M_WAITOK | M_ZERO);
722	reloc = malloc(total * sizeof(*reloc), DRM_I915_GEM, M_WAITOK | M_ZERO);
723
724	total = 0;
725	for (i = 0; i < count; i++) {
726		struct drm_i915_gem_relocation_entry *user_relocs;
727
728		user_relocs = (void *)(uintptr_t)exec[i].relocs_ptr;
729		ret = -copyin(user_relocs, reloc + total,
730		    exec[i].relocation_count * sizeof(*reloc));
731		if (ret != 0) {
732			DRM_LOCK(dev);
733			goto err;
734		}
735
736		reloc_offset[i] = total;
737		total += exec[i].relocation_count;
738	}
739
740	ret = i915_mutex_lock_interruptible(dev);
741	if (ret) {
742		DRM_LOCK(dev);
743		goto err;
744	}
745
746	/* reacquire the objects */
747	eb_reset(eb);
748	for (i = 0; i < count; i++) {
749		struct drm_i915_gem_object *obj;
750
751		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
752							exec[i].handle));
753		if (&obj->base == NULL) {
754			DRM_DEBUG("Invalid object handle %d at index %d\n",
755				   exec[i].handle, i);
756			ret = -ENOENT;
757			goto err;
758		}
759
760		list_add_tail(&obj->exec_list, objects);
761		obj->exec_handle = exec[i].handle;
762		obj->exec_entry = &exec[i];
763		eb_add_object(eb, obj);
764	}
765
766	ret = i915_gem_execbuffer_reserve(ring, file, objects);
767	if (ret)
768		goto err;
769
770	list_for_each_entry(obj, objects, exec_list) {
771		int offset = obj->exec_entry - exec;
772		ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
773		    reloc + reloc_offset[offset]);
774		if (ret)
775			goto err;
776	}
777
778	/* Leave the user relocations as are, this is the painfully slow path,
779	 * and we want to avoid the complication of dropping the lock whilst
780	 * having buffers reserved in the aperture and so causing spurious
781	 * ENOSPC for random operations.
782	 */
783
784err:
785	free(reloc, DRM_I915_GEM);
786	free(reloc_offset, DRM_I915_GEM);
787	return ret;
788}
789
790static int
791i915_gem_execbuffer_flush(struct drm_device *dev,
792			  uint32_t invalidate_domains,
793			  uint32_t flush_domains,
794			  uint32_t flush_rings)
795{
796	drm_i915_private_t *dev_priv = dev->dev_private;
797	int i, ret;
798
799	if (flush_domains & I915_GEM_DOMAIN_CPU)
800		intel_gtt_chipset_flush();
801
802	if (flush_domains & I915_GEM_DOMAIN_GTT)
803		wmb();
804
805	if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
806		for (i = 0; i < I915_NUM_RINGS; i++)
807			if (flush_rings & (1 << i)) {
808				ret = i915_gem_flush_ring(&dev_priv->rings[i],
809				    invalidate_domains, flush_domains);
810				if (ret)
811					return ret;
812			}
813	}
814
815	return 0;
816}
817
818static bool
819intel_enable_semaphores(struct drm_device *dev)
820{
821	if (INTEL_INFO(dev)->gen < 6)
822		return 0;
823
824	if (i915_semaphores >= 0)
825		return i915_semaphores;
826
827	/* Enable semaphores on SNB when IO remapping is off */
828	if (INTEL_INFO(dev)->gen == 6)
829		return !intel_iommu_enabled;
830
831	return 1;
832}
833
834static int
835i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
836			       struct intel_ring_buffer *to)
837{
838	struct intel_ring_buffer *from = obj->ring;
839	u32 seqno;
840	int ret, idx;
841
842	if (from == NULL || to == from)
843		return 0;
844
845	/* XXX gpu semaphores are implicated in various hard hangs on SNB */
846	if (!intel_enable_semaphores(obj->base.dev))
847		return i915_gem_object_wait_rendering(obj);
848
849	idx = intel_ring_sync_index(from, to);
850
851	seqno = obj->last_rendering_seqno;
852	if (seqno <= from->sync_seqno[idx])
853		return 0;
854
855	if (seqno == from->outstanding_lazy_request) {
856		struct drm_i915_gem_request *request;
857
858		request = malloc(sizeof(*request), DRM_I915_GEM,
859		    M_WAITOK | M_ZERO);
860		ret = i915_add_request(from, NULL, request);
861		if (ret) {
862			free(request, DRM_I915_GEM);
863			return ret;
864		}
865
866		seqno = request->seqno;
867	}
868
869	from->sync_seqno[idx] = seqno;
870
871	return to->sync_to(to, from, seqno - 1);
872}
873
874static int
875i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
876{
877	u32 plane, flip_mask;
878	int ret;
879
880	/* Check for any pending flips. As we only maintain a flip queue depth
881	 * of 1, we can simply insert a WAIT for the next display flip prior
882	 * to executing the batch and avoid stalling the CPU.
883	 */
884
885	for (plane = 0; flips >> plane; plane++) {
886		if (((flips >> plane) & 1) == 0)
887			continue;
888
889		if (plane)
890			flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
891		else
892			flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
893
894		ret = intel_ring_begin(ring, 2);
895		if (ret)
896			return ret;
897
898		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
899		intel_ring_emit(ring, MI_NOOP);
900		intel_ring_advance(ring);
901	}
902
903	return 0;
904}
905
906static int
907i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
908				struct list_head *objects)
909{
910	struct drm_i915_gem_object *obj;
911	struct change_domains cd;
912	int ret;
913
914	memset(&cd, 0, sizeof(cd));
915	list_for_each_entry(obj, objects, exec_list)
916		i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
917
918	if (cd.invalidate_domains | cd.flush_domains) {
919#if WATCH_EXEC
920		DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
921			  __func__,
922			 cd.invalidate_domains,
923			 cd.flush_domains);
924#endif
925		ret = i915_gem_execbuffer_flush(ring->dev,
926						cd.invalidate_domains,
927						cd.flush_domains,
928						cd.flush_rings);
929		if (ret)
930			return ret;
931	}
932
933	if (cd.flips) {
934		ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
935		if (ret)
936			return ret;
937	}
938
939	list_for_each_entry(obj, objects, exec_list) {
940		ret = i915_gem_execbuffer_sync_rings(obj, ring);
941		if (ret)
942			return ret;
943	}
944
945	return 0;
946}
947
948static bool
949i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
950{
951	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
952}
953
954static int
955validate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count,
956    vm_page_t ***map)
957{
958	vm_page_t *ma;
959	int i, length, page_count;
960
961	/* XXXKIB various limits checking is missing there */
962	*map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO);
963	for (i = 0; i < count; i++) {
964		/* First check for malicious input causing overflow */
965		if (exec[i].relocation_count >
966		    INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
967			return -EINVAL;
968
969		length = exec[i].relocation_count *
970		    sizeof(struct drm_i915_gem_relocation_entry);
971		if (length == 0) {
972			(*map)[i] = NULL;
973			continue;
974		}
975		/*
976		 * Since both start and end of the relocation region
977		 * may be not aligned on the page boundary, be
978		 * conservative and request a page slot for each
979		 * partial page.  Thus +2.
980		 */
981		page_count = howmany(length, PAGE_SIZE) + 2;
982		ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t),
983		    DRM_I915_GEM, M_WAITOK | M_ZERO);
984		if (vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
985		    exec[i].relocs_ptr, length, VM_PROT_READ | VM_PROT_WRITE,
986		    ma, page_count) == -1) {
987			free(ma, DRM_I915_GEM);
988			(*map)[i] = NULL;
989			return (-EFAULT);
990		}
991	}
992
993	return 0;
994}
995
996static void
997i915_gem_execbuffer_move_to_active(struct list_head *objects,
998				   struct intel_ring_buffer *ring,
999				   u32 seqno)
1000{
1001	struct drm_i915_gem_object *obj;
1002	uint32_t old_read, old_write;
1003
1004	list_for_each_entry(obj, objects, exec_list) {
1005		old_read = obj->base.read_domains;
1006		old_write = obj->base.write_domain;
1007
1008		obj->base.read_domains = obj->base.pending_read_domains;
1009		obj->base.write_domain = obj->base.pending_write_domain;
1010		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
1011
1012		i915_gem_object_move_to_active(obj, ring, seqno);
1013		if (obj->base.write_domain) {
1014			obj->dirty = 1;
1015			obj->pending_gpu_write = true;
1016			list_move_tail(&obj->gpu_write_list,
1017				       &ring->gpu_write_list);
1018			intel_mark_busy(ring->dev, obj);
1019		}
1020		CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x",
1021		    obj, old_read, old_write);
1022	}
1023}
1024
1025int i915_gem_sync_exec_requests;
1026
1027static void
1028i915_gem_execbuffer_retire_commands(struct drm_device *dev,
1029				    struct drm_file *file,
1030				    struct intel_ring_buffer *ring)
1031{
1032	struct drm_i915_gem_request *request;
1033	u32 invalidate;
1034
1035	/*
1036	 * Ensure that the commands in the batch buffer are
1037	 * finished before the interrupt fires.
1038	 *
1039	 * The sampler always gets flushed on i965 (sigh).
1040	 */
1041	invalidate = I915_GEM_DOMAIN_COMMAND;
1042	if (INTEL_INFO(dev)->gen >= 4)
1043		invalidate |= I915_GEM_DOMAIN_SAMPLER;
1044	if (ring->flush(ring, invalidate, 0)) {
1045		i915_gem_next_request_seqno(ring);
1046		return;
1047	}
1048
1049	/* Add a breadcrumb for the completion of the batch buffer */
1050	request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO);
1051	if (request == NULL || i915_add_request(ring, file, request)) {
1052		i915_gem_next_request_seqno(ring);
1053		free(request, DRM_I915_GEM);
1054	} else if (i915_gem_sync_exec_requests)
1055		i915_wait_request(ring, request->seqno, true);
1056}
1057
1058static void
1059i915_gem_fix_mi_batchbuffer_end(struct drm_i915_gem_object *batch_obj,
1060    uint32_t batch_start_offset, uint32_t batch_len)
1061{
1062	char *mkva;
1063	uint64_t po_r, po_w;
1064	uint32_t cmd;
1065
1066	po_r = batch_obj->base.dev->agp->base + batch_obj->gtt_offset +
1067	    batch_start_offset + batch_len;
1068	if (batch_len > 0)
1069		po_r -= 4;
1070	mkva = pmap_mapdev_attr(trunc_page(po_r), 2 * PAGE_SIZE,
1071	    PAT_WRITE_COMBINING);
1072	po_r &= PAGE_MASK;
1073	cmd = *(uint32_t *)(mkva + po_r);
1074
1075	if (cmd != MI_BATCH_BUFFER_END) {
1076		/*
1077		 * batch_len != 0 due to the check at the start of
1078		 * i915_gem_do_execbuffer
1079		 */
1080		if (batch_obj->base.size > batch_start_offset + batch_len) {
1081			po_w = po_r + 4;
1082/* DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END !\n"); */
1083		} else {
1084			po_w = po_r;
1085DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END, overwriting last bo cmd !\n");
1086		}
1087		*(uint32_t *)(mkva + po_w) = MI_BATCH_BUFFER_END;
1088	}
1089
1090	pmap_unmapdev((vm_offset_t)mkva, 2 * PAGE_SIZE);
1091}
1092
1093int i915_fix_mi_batchbuffer_end = 0;
1094
1095 static int
1096i915_reset_gen7_sol_offsets(struct drm_device *dev,
1097			    struct intel_ring_buffer *ring)
1098{
1099	drm_i915_private_t *dev_priv = dev->dev_private;
1100	int ret, i;
1101
1102	if (!IS_GEN7(dev) || ring != &dev_priv->rings[RCS])
1103		return 0;
1104
1105	ret = intel_ring_begin(ring, 4 * 3);
1106	if (ret)
1107		return ret;
1108
1109	for (i = 0; i < 4; i++) {
1110		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1111		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1112		intel_ring_emit(ring, 0);
1113	}
1114
1115	intel_ring_advance(ring);
1116
1117	return 0;
1118}
1119
1120static int
1121i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1122		       struct drm_file *file,
1123		       struct drm_i915_gem_execbuffer2 *args,
1124		       struct drm_i915_gem_exec_object2 *exec)
1125{
1126	drm_i915_private_t *dev_priv = dev->dev_private;
1127	struct list_head objects;
1128	struct eb_objects *eb;
1129	struct drm_i915_gem_object *batch_obj;
1130	struct drm_clip_rect *cliprects = NULL;
1131	struct intel_ring_buffer *ring;
1132	vm_page_t **relocs_ma;
1133	u32 exec_start, exec_len;
1134	u32 seqno;
1135	u32 mask;
1136	int ret, mode, i;
1137
1138	if (!i915_gem_check_execbuffer(args)) {
1139		DRM_DEBUG("execbuf with invalid offset/length\n");
1140		return -EINVAL;
1141	}
1142
1143	if (args->batch_len == 0)
1144		return (0);
1145
1146	ret = validate_exec_list(exec, args->buffer_count, &relocs_ma);
1147	if (ret != 0)
1148		goto pre_struct_lock_err;
1149
1150	switch (args->flags & I915_EXEC_RING_MASK) {
1151	case I915_EXEC_DEFAULT:
1152	case I915_EXEC_RENDER:
1153		ring = &dev_priv->rings[RCS];
1154		break;
1155	case I915_EXEC_BSD:
1156		if (!HAS_BSD(dev)) {
1157			DRM_DEBUG("execbuf with invalid ring (BSD)\n");
1158			return -EINVAL;
1159		}
1160		ring = &dev_priv->rings[VCS];
1161		break;
1162	case I915_EXEC_BLT:
1163		if (!HAS_BLT(dev)) {
1164			DRM_DEBUG("execbuf with invalid ring (BLT)\n");
1165			return -EINVAL;
1166		}
1167		ring = &dev_priv->rings[BCS];
1168		break;
1169	default:
1170		DRM_DEBUG("execbuf with unknown ring: %d\n",
1171			  (int)(args->flags & I915_EXEC_RING_MASK));
1172		ret = -EINVAL;
1173		goto pre_struct_lock_err;
1174	}
1175
1176	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1177	mask = I915_EXEC_CONSTANTS_MASK;
1178	switch (mode) {
1179	case I915_EXEC_CONSTANTS_REL_GENERAL:
1180	case I915_EXEC_CONSTANTS_ABSOLUTE:
1181	case I915_EXEC_CONSTANTS_REL_SURFACE:
1182		if (ring == &dev_priv->rings[RCS] &&
1183		    mode != dev_priv->relative_constants_mode) {
1184			if (INTEL_INFO(dev)->gen < 4) {
1185				ret = -EINVAL;
1186				goto pre_struct_lock_err;
1187			}
1188
1189			if (INTEL_INFO(dev)->gen > 5 &&
1190			    mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1191				ret = -EINVAL;
1192				goto pre_struct_lock_err;
1193			}
1194
1195			/* The HW changed the meaning on this bit on gen6 */
1196			if (INTEL_INFO(dev)->gen >= 6)
1197				mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1198		}
1199		break;
1200	default:
1201		DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
1202		ret = -EINVAL;
1203		goto pre_struct_lock_err;
1204	}
1205
1206	if (args->buffer_count < 1) {
1207		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1208		ret = -EINVAL;
1209		goto pre_struct_lock_err;
1210	}
1211
1212	if (args->num_cliprects != 0) {
1213		if (ring != &dev_priv->rings[RCS]) {
1214	DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1215			ret = -EINVAL;
1216			goto pre_struct_lock_err;
1217		}
1218
1219		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1220			DRM_DEBUG("execbuf with %u cliprects\n",
1221				  args->num_cliprects);
1222			ret = -EINVAL;
1223			goto pre_struct_lock_err;
1224		}
1225		cliprects = malloc( sizeof(*cliprects) * args->num_cliprects,
1226		    DRM_I915_GEM, M_WAITOK | M_ZERO);
1227		ret = -copyin((void *)(uintptr_t)args->cliprects_ptr, cliprects,
1228		    sizeof(*cliprects) * args->num_cliprects);
1229		if (ret != 0)
1230			goto pre_struct_lock_err;
1231	}
1232
1233	ret = i915_mutex_lock_interruptible(dev);
1234	if (ret)
1235		goto pre_struct_lock_err;
1236
1237	if (dev_priv->mm.suspended) {
1238		ret = -EBUSY;
1239		goto struct_lock_err;
1240	}
1241
1242	eb = eb_create(args->buffer_count);
1243	if (eb == NULL) {
1244		ret = -ENOMEM;
1245		goto struct_lock_err;
1246	}
1247
1248	/* Look up object handles */
1249	INIT_LIST_HEAD(&objects);
1250	for (i = 0; i < args->buffer_count; i++) {
1251		struct drm_i915_gem_object *obj;
1252		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
1253							exec[i].handle));
1254		if (&obj->base == NULL) {
1255			DRM_DEBUG("Invalid object handle %d at index %d\n",
1256				   exec[i].handle, i);
1257			/* prevent error path from reading uninitialized data */
1258			ret = -ENOENT;
1259			goto err;
1260		}
1261
1262		if (!list_empty(&obj->exec_list)) {
1263			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
1264				   obj, exec[i].handle, i);
1265			ret = -EINVAL;
1266			goto err;
1267		}
1268
1269		list_add_tail(&obj->exec_list, &objects);
1270		obj->exec_handle = exec[i].handle;
1271		obj->exec_entry = &exec[i];
1272		eb_add_object(eb, obj);
1273	}
1274
1275	/* take note of the batch buffer before we might reorder the lists */
1276	batch_obj = list_entry(objects.prev,
1277			       struct drm_i915_gem_object,
1278			       exec_list);
1279
1280	/* Move the objects en-masse into the GTT, evicting if necessary. */
1281	ret = i915_gem_execbuffer_reserve(ring, file, &objects);
1282	if (ret)
1283		goto err;
1284
1285	/* The objects are in their final locations, apply the relocations. */
1286	ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
1287	if (ret) {
1288		if (ret == -EFAULT) {
1289			ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
1290			    &objects, eb, exec,	args->buffer_count);
1291			DRM_LOCK_ASSERT(dev);
1292		}
1293		if (ret)
1294			goto err;
1295	}
1296
1297	/* Set the pending read domains for the batch buffer to COMMAND */
1298	if (batch_obj->base.pending_write_domain) {
1299		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1300		ret = -EINVAL;
1301		goto err;
1302	}
1303	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1304
1305	ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
1306	if (ret)
1307		goto err;
1308
1309	seqno = i915_gem_next_request_seqno(ring);
1310	for (i = 0; i < I915_NUM_RINGS - 1; i++) {
1311		if (seqno < ring->sync_seqno[i]) {
1312			/* The GPU can not handle its semaphore value wrapping,
1313			 * so every billion or so execbuffers, we need to stall
1314			 * the GPU in order to reset the counters.
1315			 */
1316			ret = i915_gpu_idle(dev, true);
1317			if (ret)
1318				goto err;
1319
1320			KASSERT(ring->sync_seqno[i] == 0, ("Non-zero sync_seqno"));
1321		}
1322	}
1323
1324	if (ring == &dev_priv->rings[RCS] &&
1325	    mode != dev_priv->relative_constants_mode) {
1326		ret = intel_ring_begin(ring, 4);
1327		if (ret)
1328			goto err;
1329
1330		intel_ring_emit(ring, MI_NOOP);
1331		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1332		intel_ring_emit(ring, INSTPM);
1333		intel_ring_emit(ring, mask << 16 | mode);
1334		intel_ring_advance(ring);
1335
1336		dev_priv->relative_constants_mode = mode;
1337	}
1338
1339	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1340		ret = i915_reset_gen7_sol_offsets(dev, ring);
1341		if (ret)
1342			goto err;
1343	}
1344
1345	exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1346	exec_len = args->batch_len;
1347
1348	if (i915_fix_mi_batchbuffer_end) {
1349		i915_gem_fix_mi_batchbuffer_end(batch_obj,
1350		    args->batch_start_offset, args->batch_len);
1351	}
1352
1353	CTR4(KTR_DRM, "ring_dispatch %s %d exec %x %x", ring->name, seqno,
1354	    exec_start, exec_len);
1355
1356	if (cliprects) {
1357		for (i = 0; i < args->num_cliprects; i++) {
1358			ret = i915_emit_box_p(dev, &cliprects[i],
1359			    args->DR1, args->DR4);
1360			if (ret)
1361				goto err;
1362
1363			ret = ring->dispatch_execbuffer(ring, exec_start,
1364			    exec_len);
1365			if (ret)
1366				goto err;
1367		}
1368	} else {
1369		ret = ring->dispatch_execbuffer(ring, exec_start, exec_len);
1370		if (ret)
1371			goto err;
1372	}
1373
1374	i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
1375	i915_gem_execbuffer_retire_commands(dev, file, ring);
1376
1377err:
1378	eb_destroy(eb);
1379	while (!list_empty(&objects)) {
1380		struct drm_i915_gem_object *obj;
1381
1382		obj = list_first_entry(&objects, struct drm_i915_gem_object,
1383		    exec_list);
1384		list_del_init(&obj->exec_list);
1385		drm_gem_object_unreference(&obj->base);
1386	}
1387struct_lock_err:
1388	DRM_UNLOCK(dev);
1389
1390pre_struct_lock_err:
1391	for (i = 0; i < args->buffer_count; i++) {
1392		if (relocs_ma[i] != NULL) {
1393			vm_page_unhold_pages(relocs_ma[i], howmany(
1394			    exec[i].relocation_count *
1395			    sizeof(struct drm_i915_gem_relocation_entry),
1396			    PAGE_SIZE));
1397			free(relocs_ma[i], DRM_I915_GEM);
1398		}
1399	}
1400	free(relocs_ma, DRM_I915_GEM);
1401	free(cliprects, DRM_I915_GEM);
1402	return ret;
1403}
1404
1405/*
1406 * Legacy execbuffer just creates an exec2 list from the original exec object
1407 * list array and passes it to the real function.
1408 */
1409int
1410i915_gem_execbuffer(struct drm_device *dev, void *data,
1411		    struct drm_file *file)
1412{
1413	struct drm_i915_gem_execbuffer *args = data;
1414	struct drm_i915_gem_execbuffer2 exec2;
1415	struct drm_i915_gem_exec_object *exec_list = NULL;
1416	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1417	int ret, i;
1418
1419	DRM_DEBUG("buffers_ptr %d buffer_count %d len %08x\n",
1420	    (int) args->buffers_ptr, args->buffer_count, args->batch_len);
1421
1422	if (args->buffer_count < 1) {
1423		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1424		return -EINVAL;
1425	}
1426
1427	/* Copy in the exec list from userland */
1428	/* XXXKIB user-controlled malloc size */
1429	exec_list = malloc(sizeof(*exec_list) * args->buffer_count,
1430	    DRM_I915_GEM, M_WAITOK);
1431	exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count,
1432	    DRM_I915_GEM, M_WAITOK);
1433	ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec_list,
1434	    sizeof(*exec_list) * args->buffer_count);
1435	if (ret != 0) {
1436		DRM_DEBUG("copy %d exec entries failed %d\n",
1437			  args->buffer_count, ret);
1438		free(exec_list, DRM_I915_GEM);
1439		free(exec2_list, DRM_I915_GEM);
1440		return (ret);
1441	}
1442
1443	for (i = 0; i < args->buffer_count; i++) {
1444		exec2_list[i].handle = exec_list[i].handle;
1445		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1446		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1447		exec2_list[i].alignment = exec_list[i].alignment;
1448		exec2_list[i].offset = exec_list[i].offset;
1449		if (INTEL_INFO(dev)->gen < 4)
1450			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1451		else
1452			exec2_list[i].flags = 0;
1453	}
1454
1455	exec2.buffers_ptr = args->buffers_ptr;
1456	exec2.buffer_count = args->buffer_count;
1457	exec2.batch_start_offset = args->batch_start_offset;
1458	exec2.batch_len = args->batch_len;
1459	exec2.DR1 = args->DR1;
1460	exec2.DR4 = args->DR4;
1461	exec2.num_cliprects = args->num_cliprects;
1462	exec2.cliprects_ptr = args->cliprects_ptr;
1463	exec2.flags = I915_EXEC_RENDER;
1464
1465	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1466	if (!ret) {
1467		/* Copy the new buffer offsets back to the user's exec list. */
1468		for (i = 0; i < args->buffer_count; i++)
1469			exec_list[i].offset = exec2_list[i].offset;
1470		/* ... and back out to userspace */
1471		ret = -copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr,
1472		    sizeof(*exec_list) * args->buffer_count);
1473		if (ret != 0) {
1474			DRM_DEBUG("failed to copy %d exec entries "
1475				  "back to user (%d)\n",
1476				  args->buffer_count, ret);
1477		}
1478	}
1479
1480	free(exec_list, DRM_I915_GEM);
1481	free(exec2_list, DRM_I915_GEM);
1482	return ret;
1483}
1484
1485int
1486i915_gem_execbuffer2(struct drm_device *dev, void *data,
1487		     struct drm_file *file)
1488{
1489	struct drm_i915_gem_execbuffer2 *args = data;
1490	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1491	int ret;
1492
1493	DRM_DEBUG("buffers_ptr %jx buffer_count %d len %08x\n",
1494	    (uintmax_t)args->buffers_ptr, args->buffer_count, args->batch_len);
1495
1496	if (args->buffer_count < 1 ||
1497	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1498		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1499		return -EINVAL;
1500	}
1501
1502	/* XXXKIB user-controllable malloc size */
1503	exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count,
1504	    DRM_I915_GEM, M_WAITOK);
1505	ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec2_list,
1506	    sizeof(*exec2_list) * args->buffer_count);
1507	if (ret != 0) {
1508		DRM_DEBUG("copy %d exec entries failed %d\n",
1509			  args->buffer_count, ret);
1510		free(exec2_list, DRM_I915_GEM);
1511		return (ret);
1512	}
1513
1514	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1515	if (!ret) {
1516		/* Copy the new buffer offsets back to the user's exec list. */
1517		ret = -copyout(exec2_list, (void *)(uintptr_t)args->buffers_ptr,
1518		    sizeof(*exec2_list) * args->buffer_count);
1519		if (ret) {
1520			DRM_DEBUG("failed to copy %d exec entries "
1521				  "back to user (%d)\n",
1522				  args->buffer_count, ret);
1523		}
1524	}
1525
1526	free(exec2_list, DRM_I915_GEM);
1527	return ret;
1528}
1529