i915_gem_execbuffer.c revision 289719
1/*
2 * Copyright �� 2008,2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *    Chris Wilson <chris@chris-wilson.co.uk>
26 *
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/drm2/i915/i915_gem_execbuffer.c 289719 2015-10-21 20:49:45Z jhb $");
31
32#include <dev/drm2/drmP.h>
33#include <dev/drm2/drm.h>
34#include <dev/drm2/i915/i915_drm.h>
35#include <dev/drm2/i915/i915_drv.h>
36#include <dev/drm2/i915/intel_drv.h>
37#include <sys/limits.h>
38#include <sys/sf_buf.h>
39
40struct change_domains {
41	uint32_t invalidate_domains;
42	uint32_t flush_domains;
43	uint32_t flush_rings;
44	uint32_t flips;
45};
46
47/*
48 * Set the next domain for the specified object. This
49 * may not actually perform the necessary flushing/invaliding though,
50 * as that may want to be batched with other set_domain operations
51 *
52 * This is (we hope) the only really tricky part of gem. The goal
53 * is fairly simple -- track which caches hold bits of the object
54 * and make sure they remain coherent. A few concrete examples may
55 * help to explain how it works. For shorthand, we use the notation
56 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
57 * a pair of read and write domain masks.
58 *
59 * Case 1: the batch buffer
60 *
61 *	1. Allocated
62 *	2. Written by CPU
63 *	3. Mapped to GTT
64 *	4. Read by GPU
65 *	5. Unmapped from GTT
66 *	6. Freed
67 *
68 *	Let's take these a step at a time
69 *
70 *	1. Allocated
71 *		Pages allocated from the kernel may still have
72 *		cache contents, so we set them to (CPU, CPU) always.
73 *	2. Written by CPU (using pwrite)
74 *		The pwrite function calls set_domain (CPU, CPU) and
75 *		this function does nothing (as nothing changes)
76 *	3. Mapped by GTT
77 *		This function asserts that the object is not
78 *		currently in any GPU-based read or write domains
79 *	4. Read by GPU
80 *		i915_gem_execbuffer calls set_domain (COMMAND, 0).
81 *		As write_domain is zero, this function adds in the
82 *		current read domains (CPU+COMMAND, 0).
83 *		flush_domains is set to CPU.
84 *		invalidate_domains is set to COMMAND
85 *		clflush is run to get data out of the CPU caches
86 *		then i915_dev_set_domain calls i915_gem_flush to
87 *		emit an MI_FLUSH and drm_agp_chipset_flush
88 *	5. Unmapped from GTT
89 *		i915_gem_object_unbind calls set_domain (CPU, CPU)
90 *		flush_domains and invalidate_domains end up both zero
91 *		so no flushing/invalidating happens
92 *	6. Freed
93 *		yay, done
94 *
95 * Case 2: The shared render buffer
96 *
97 *	1. Allocated
98 *	2. Mapped to GTT
99 *	3. Read/written by GPU
100 *	4. set_domain to (CPU,CPU)
101 *	5. Read/written by CPU
102 *	6. Read/written by GPU
103 *
104 *	1. Allocated
105 *		Same as last example, (CPU, CPU)
106 *	2. Mapped to GTT
107 *		Nothing changes (assertions find that it is not in the GPU)
108 *	3. Read/written by GPU
109 *		execbuffer calls set_domain (RENDER, RENDER)
110 *		flush_domains gets CPU
111 *		invalidate_domains gets GPU
112 *		clflush (obj)
113 *		MI_FLUSH and drm_agp_chipset_flush
114 *	4. set_domain (CPU, CPU)
115 *		flush_domains gets GPU
116 *		invalidate_domains gets CPU
117 *		wait_rendering (obj) to make sure all drawing is complete.
118 *		This will include an MI_FLUSH to get the data from GPU
119 *		to memory
120 *		clflush (obj) to invalidate the CPU cache
121 *		Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
122 *	5. Read/written by CPU
123 *		cache lines are loaded and dirtied
124 *	6. Read written by GPU
125 *		Same as last GPU access
126 *
127 * Case 3: The constant buffer
128 *
129 *	1. Allocated
130 *	2. Written by CPU
131 *	3. Read by GPU
132 *	4. Updated (written) by CPU again
133 *	5. Read by GPU
134 *
135 *	1. Allocated
136 *		(CPU, CPU)
137 *	2. Written by CPU
138 *		(CPU, CPU)
139 *	3. Read by GPU
140 *		(CPU+RENDER, 0)
141 *		flush_domains = CPU
142 *		invalidate_domains = RENDER
143 *		clflush (obj)
144 *		MI_FLUSH
145 *		drm_agp_chipset_flush
146 *	4. Updated (written) by CPU again
147 *		(CPU, CPU)
148 *		flush_domains = 0 (no previous write domain)
149 *		invalidate_domains = 0 (no new read domains)
150 *	5. Read by GPU
151 *		(CPU+RENDER, 0)
152 *		flush_domains = CPU
153 *		invalidate_domains = RENDER
154 *		clflush (obj)
155 *		MI_FLUSH
156 *		drm_agp_chipset_flush
157 */
158static void
159i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
160				  struct intel_ring_buffer *ring,
161				  struct change_domains *cd)
162{
163	uint32_t invalidate_domains = 0, flush_domains = 0;
164
165	/*
166	 * If the object isn't moving to a new write domain,
167	 * let the object stay in multiple read domains
168	 */
169	if (obj->base.pending_write_domain == 0)
170		obj->base.pending_read_domains |= obj->base.read_domains;
171
172	/*
173	 * Flush the current write domain if
174	 * the new read domains don't match. Invalidate
175	 * any read domains which differ from the old
176	 * write domain
177	 */
178	if (obj->base.write_domain &&
179	    (((obj->base.write_domain != obj->base.pending_read_domains ||
180	       obj->ring != ring)) ||
181	     (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
182		flush_domains |= obj->base.write_domain;
183		invalidate_domains |=
184			obj->base.pending_read_domains & ~obj->base.write_domain;
185	}
186	/*
187	 * Invalidate any read caches which may have
188	 * stale data. That is, any new read domains.
189	 */
190	invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
191	if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
192		i915_gem_clflush_object(obj);
193
194	if (obj->base.pending_write_domain)
195		cd->flips |= atomic_load_acq_int(&obj->pending_flip);
196
197	/* The actual obj->write_domain will be updated with
198	 * pending_write_domain after we emit the accumulated flush for all
199	 * of our domain changes in execbuffers (which clears objects'
200	 * write_domains).  So if we have a current write domain that we
201	 * aren't changing, set pending_write_domain to that.
202	 */
203	if (flush_domains == 0 && obj->base.pending_write_domain == 0)
204		obj->base.pending_write_domain = obj->base.write_domain;
205
206	cd->invalidate_domains |= invalidate_domains;
207	cd->flush_domains |= flush_domains;
208	if (flush_domains & I915_GEM_GPU_DOMAINS)
209		cd->flush_rings |= intel_ring_flag(obj->ring);
210	if (invalidate_domains & I915_GEM_GPU_DOMAINS)
211		cd->flush_rings |= intel_ring_flag(ring);
212}
213
214struct eb_objects {
215	u_long hashmask;
216	LIST_HEAD(, drm_i915_gem_object) *buckets;
217};
218
219static struct eb_objects *
220eb_create(int size)
221{
222	struct eb_objects *eb;
223
224	eb = malloc(sizeof(*eb), DRM_I915_GEM, M_WAITOK | M_ZERO);
225	eb->buckets = hashinit(size, DRM_I915_GEM, &eb->hashmask);
226	return (eb);
227}
228
229static void
230eb_reset(struct eb_objects *eb)
231{
232	int i;
233
234	for (i = 0; i <= eb->hashmask; i++)
235		LIST_INIT(&eb->buckets[i]);
236}
237
238static void
239eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
240{
241
242	LIST_INSERT_HEAD(&eb->buckets[obj->exec_handle & eb->hashmask],
243	    obj, exec_node);
244}
245
246static struct drm_i915_gem_object *
247eb_get_object(struct eb_objects *eb, unsigned long handle)
248{
249	struct drm_i915_gem_object *obj;
250
251	LIST_FOREACH(obj, &eb->buckets[handle & eb->hashmask], exec_node) {
252		if (obj->exec_handle == handle)
253			return (obj);
254	}
255	return (NULL);
256}
257
258static void
259eb_destroy(struct eb_objects *eb)
260{
261
262	free(eb->buckets, DRM_I915_GEM);
263	free(eb, DRM_I915_GEM);
264}
265
266static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
267{
268	return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
269		obj->cache_level != I915_CACHE_NONE);
270}
271
272static int
273i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
274				   struct eb_objects *eb,
275				   struct drm_i915_gem_relocation_entry *reloc)
276{
277	struct drm_device *dev = obj->base.dev;
278	struct drm_gem_object *target_obj;
279	struct drm_i915_gem_object *target_i915_obj;
280	uint32_t target_offset;
281	int ret = -EINVAL;
282
283	/* we've already hold a reference to all valid objects */
284	target_obj = &eb_get_object(eb, reloc->target_handle)->base;
285	if (unlikely(target_obj == NULL))
286		return -ENOENT;
287
288	target_i915_obj = to_intel_bo(target_obj);
289	target_offset = target_i915_obj->gtt_offset;
290
291#if WATCH_RELOC
292	DRM_INFO("%s: obj %p offset %08x target %d "
293		 "read %08x write %08x gtt %08x "
294		 "presumed %08x delta %08x\n",
295		 __func__,
296		 obj,
297		 (int) reloc->offset,
298		 (int) reloc->target_handle,
299		 (int) reloc->read_domains,
300		 (int) reloc->write_domain,
301		 (int) target_offset,
302		 (int) reloc->presumed_offset,
303		 reloc->delta);
304#endif
305
306	/* The target buffer should have appeared before us in the
307	 * exec_object list, so it should have a GTT space bound by now.
308	 */
309	if (unlikely(target_offset == 0)) {
310		DRM_DEBUG("No GTT space found for object %d\n",
311			  reloc->target_handle);
312		return ret;
313	}
314
315	/* Validate that the target is in a valid r/w GPU domain */
316	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
317		DRM_DEBUG("reloc with multiple write domains: "
318			  "obj %p target %d offset %d "
319			  "read %08x write %08x",
320			  obj, reloc->target_handle,
321			  (int) reloc->offset,
322			  reloc->read_domains,
323			  reloc->write_domain);
324		return ret;
325	}
326	if (unlikely((reloc->write_domain | reloc->read_domains)
327		     & ~I915_GEM_GPU_DOMAINS)) {
328		DRM_DEBUG("reloc with read/write non-GPU domains: "
329			  "obj %p target %d offset %d "
330			  "read %08x write %08x",
331			  obj, reloc->target_handle,
332			  (int) reloc->offset,
333			  reloc->read_domains,
334			  reloc->write_domain);
335		return ret;
336	}
337	if (unlikely(reloc->write_domain && target_obj->pending_write_domain &&
338		     reloc->write_domain != target_obj->pending_write_domain)) {
339		DRM_DEBUG("Write domain conflict: "
340			  "obj %p target %d offset %d "
341			  "new %08x old %08x\n",
342			  obj, reloc->target_handle,
343			  (int) reloc->offset,
344			  reloc->write_domain,
345			  target_obj->pending_write_domain);
346		return ret;
347	}
348
349	target_obj->pending_read_domains |= reloc->read_domains;
350	target_obj->pending_write_domain |= reloc->write_domain;
351
352	/* If the relocation already has the right value in it, no
353	 * more work needs to be done.
354	 */
355	if (target_offset == reloc->presumed_offset)
356		return 0;
357
358	/* Check that the relocation address is valid... */
359	if (unlikely(reloc->offset > obj->base.size - 4)) {
360		DRM_DEBUG("Relocation beyond object bounds: "
361			  "obj %p target %d offset %d size %d.\n",
362			  obj, reloc->target_handle,
363			  (int) reloc->offset,
364			  (int) obj->base.size);
365		return ret;
366	}
367	if (unlikely(reloc->offset & 3)) {
368		DRM_DEBUG("Relocation not 4-byte aligned: "
369			  "obj %p target %d offset %d.\n",
370			  obj, reloc->target_handle,
371			  (int) reloc->offset);
372		return ret;
373	}
374
375	/* We can't wait for rendering with pagefaults disabled */
376	if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0)
377		return (-EFAULT);
378
379	reloc->delta += target_offset;
380	if (use_cpu_reloc(obj)) {
381		uint32_t page_offset = reloc->offset & PAGE_MASK;
382		char *vaddr;
383		struct sf_buf *sf;
384
385		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
386		if (ret)
387			return ret;
388
389		sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)],
390		    SFB_NOWAIT);
391		if (sf == NULL)
392			return (-ENOMEM);
393		vaddr = (void *)sf_buf_kva(sf);
394		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
395		sf_buf_free(sf);
396	} else {
397		uint32_t *reloc_entry;
398		char *reloc_page;
399
400		ret = i915_gem_object_set_to_gtt_domain(obj, true);
401		if (ret)
402			return ret;
403
404		ret = i915_gem_object_put_fence(obj);
405		if (ret)
406			return ret;
407
408		/* Map the page containing the relocation we're going to perform.  */
409		reloc->offset += obj->gtt_offset;
410		reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset &
411		    ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING);
412		reloc_entry = (uint32_t *)(reloc_page + (reloc->offset &
413		    PAGE_MASK));
414		*(volatile uint32_t *)reloc_entry = reloc->delta;
415		pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE);
416	}
417
418	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
419	 * pipe_control writes because the gpu doesn't properly redirect them
420	 * through the ppgtt for non_secure batchbuffers. */
421	if (unlikely(IS_GEN6(dev) &&
422	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
423	    !target_i915_obj->has_global_gtt_mapping)) {
424		i915_gem_gtt_bind_object(target_i915_obj,
425					 target_i915_obj->cache_level);
426	}
427
428	/* and update the user's relocation entry */
429	reloc->presumed_offset = target_offset;
430
431	return 0;
432}
433
434static int
435i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
436				    struct eb_objects *eb)
437{
438#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
439	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
440	struct drm_i915_gem_relocation_entry *user_relocs;
441	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
442	int remain, ret;
443
444	user_relocs = (void *)(uintptr_t)entry->relocs_ptr;
445	remain = entry->relocation_count;
446	while (remain) {
447		struct drm_i915_gem_relocation_entry *r = stack_reloc;
448		int count = remain;
449		if (count > DRM_ARRAY_SIZE(stack_reloc))
450			count = DRM_ARRAY_SIZE(stack_reloc);
451		remain -= count;
452
453		ret = -copyin_nofault(user_relocs, r, count*sizeof(r[0]));
454		if (ret != 0)
455			return (ret);
456
457		do {
458			u64 offset = r->presumed_offset;
459
460			ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
461			if (ret)
462				return ret;
463
464			if (r->presumed_offset != offset &&
465			    copyout_nofault(&r->presumed_offset,
466					    &user_relocs->presumed_offset,
467					    sizeof(r->presumed_offset))) {
468				return -EFAULT;
469			}
470
471			user_relocs++;
472			r++;
473		} while (--count);
474	}
475
476	return 0;
477#undef N_RELOC
478}
479
480static int
481i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
482					 struct eb_objects *eb,
483					 struct drm_i915_gem_relocation_entry *relocs)
484{
485	const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
486	int i, ret;
487
488	for (i = 0; i < entry->relocation_count; i++) {
489		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
490		if (ret)
491			return ret;
492	}
493
494	return 0;
495}
496
497static int
498i915_gem_execbuffer_relocate(struct drm_device *dev,
499			     struct eb_objects *eb,
500			     struct list_head *objects)
501{
502	struct drm_i915_gem_object *obj;
503	int ret, pflags;
504
505	/* Try to move as many of the relocation targets off the active list
506	 * to avoid unnecessary fallbacks to the slow path, as we cannot wait
507	 * for the retirement with pagefaults disabled.
508	 */
509	i915_gem_retire_requests(dev);
510
511	ret = 0;
512	pflags = vm_fault_disable_pagefaults();
513	/* This is the fast path and we cannot handle a pagefault whilst
514	 * holding the device lock lest the user pass in the relocations
515	 * contained within a mmaped bo. For in such a case we, the page
516	 * fault handler would call i915_gem_fault() and we would try to
517	 * acquire the device lock again. Obviously this is bad.
518	 */
519
520	list_for_each_entry(obj, objects, exec_list) {
521		ret = i915_gem_execbuffer_relocate_object(obj, eb);
522		if (ret)
523			break;
524	}
525	vm_fault_enable_pagefaults(pflags);
526
527	return ret;
528}
529
530#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
531
532static int
533need_reloc_mappable(struct drm_i915_gem_object *obj)
534{
535	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
536	return entry->relocation_count && !use_cpu_reloc(obj);
537}
538
539static int
540pin_and_fence_object(struct drm_i915_gem_object *obj,
541		     struct intel_ring_buffer *ring)
542{
543	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
544	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
545	bool need_fence, need_mappable;
546	int ret;
547
548	need_fence =
549		has_fenced_gpu_access &&
550		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
551		obj->tiling_mode != I915_TILING_NONE;
552	need_mappable = need_fence || need_reloc_mappable(obj);
553
554	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
555	if (ret)
556		return ret;
557
558	if (has_fenced_gpu_access) {
559		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
560			ret = i915_gem_object_get_fence(obj);
561			if (ret)
562				goto err_unpin;
563
564			if (i915_gem_object_pin_fence(obj))
565				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
566
567			obj->pending_fenced_gpu_access = true;
568		}
569	}
570
571	entry->offset = obj->gtt_offset;
572	return 0;
573
574err_unpin:
575	i915_gem_object_unpin(obj);
576	return ret;
577}
578
579static int
580i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
581			    struct drm_file *file,
582			    struct list_head *objects)
583{
584	drm_i915_private_t *dev_priv;
585	struct drm_i915_gem_object *obj;
586	struct list_head ordered_objects;
587	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
588	int ret, retry;
589
590	dev_priv = ring->dev->dev_private;
591	INIT_LIST_HEAD(&ordered_objects);
592	while (!list_empty(objects)) {
593		struct drm_i915_gem_exec_object2 *entry;
594		bool need_fence, need_mappable;
595
596		obj = list_first_entry(objects,
597				       struct drm_i915_gem_object,
598				       exec_list);
599		entry = obj->exec_entry;
600
601		need_fence =
602			has_fenced_gpu_access &&
603			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
604			obj->tiling_mode != I915_TILING_NONE;
605		need_mappable = need_fence || need_reloc_mappable(obj);
606
607		if (need_mappable)
608			list_move(&obj->exec_list, &ordered_objects);
609		else
610			list_move_tail(&obj->exec_list, &ordered_objects);
611
612		obj->base.pending_read_domains = 0;
613		obj->base.pending_write_domain = 0;
614	}
615	list_splice(&ordered_objects, objects);
616
617	/* Attempt to pin all of the buffers into the GTT.
618	 * This is done in 3 phases:
619	 *
620	 * 1a. Unbind all objects that do not match the GTT constraints for
621	 *     the execbuffer (fenceable, mappable, alignment etc).
622	 * 1b. Increment pin count for already bound objects.
623	 * 2.  Bind new objects.
624	 * 3.  Decrement pin count.
625	 *
626	 * This avoid unnecessary unbinding of later objects in order to make
627	 * room for the earlier objects *unless* we need to defragment.
628	 */
629	retry = 0;
630	do {
631		ret = 0;
632
633		/* Unbind any ill-fitting objects or pin. */
634		list_for_each_entry(obj, objects, exec_list) {
635			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
636			bool need_fence, need_mappable;
637
638			if (!obj->gtt_space)
639				continue;
640
641			need_fence =
642				has_fenced_gpu_access &&
643				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
644				obj->tiling_mode != I915_TILING_NONE;
645			need_mappable = need_fence || need_reloc_mappable(obj);
646
647			if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
648			    (need_mappable && !obj->map_and_fenceable))
649				ret = i915_gem_object_unbind(obj);
650			else
651				ret = pin_and_fence_object(obj, ring);
652			if (ret)
653				goto err;
654		}
655
656		/* Bind fresh objects */
657		list_for_each_entry(obj, objects, exec_list) {
658			if (obj->gtt_space)
659				continue;
660
661			ret = pin_and_fence_object(obj, ring);
662			if (ret) {
663				int ret_ignore;
664
665				/* This can potentially raise a harmless
666				 * -EINVAL if we failed to bind in the above
667				 * call. It cannot raise -EINTR since we know
668				 * that the bo is freshly bound and so will
669				 * not need to be flushed or waited upon.
670				 */
671				ret_ignore = i915_gem_object_unbind(obj);
672				(void)ret_ignore;
673				if (obj->gtt_space != NULL)
674					printf("%s: gtt_space\n", __func__);
675				break;
676			}
677		}
678
679		/* Decrement pin count for bound objects */
680		list_for_each_entry(obj, objects, exec_list) {
681			struct drm_i915_gem_exec_object2 *entry;
682
683			if (!obj->gtt_space)
684				continue;
685
686			entry = obj->exec_entry;
687			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
688				i915_gem_object_unpin_fence(obj);
689				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
690			}
691
692			i915_gem_object_unpin(obj);
693
694			/* ... and ensure ppgtt mapping exist if needed. */
695			if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
696				i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
697						       obj, obj->cache_level);
698
699				obj->has_aliasing_ppgtt_mapping = 1;
700			}
701		}
702
703		if (ret != -ENOSPC || retry > 1)
704			return ret;
705
706		/* First attempt, just clear anything that is purgeable.
707		 * Second attempt, clear the entire GTT.
708		 */
709		ret = i915_gem_evict_everything(ring->dev, retry == 0);
710		if (ret)
711			return ret;
712
713		retry++;
714	} while (1);
715
716err:
717	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
718		struct drm_i915_gem_exec_object2 *entry;
719
720		if (!obj->gtt_space)
721			continue;
722
723		entry = obj->exec_entry;
724		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
725			i915_gem_object_unpin_fence(obj);
726			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
727		}
728
729		i915_gem_object_unpin(obj);
730	}
731
732	return ret;
733}
734
735static int
736i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
737				  struct drm_file *file,
738				  struct intel_ring_buffer *ring,
739				  struct list_head *objects,
740				  struct eb_objects *eb,
741				  struct drm_i915_gem_exec_object2 *exec,
742				  int count)
743{
744	struct drm_i915_gem_relocation_entry *reloc;
745	struct drm_i915_gem_object *obj;
746	int *reloc_offset;
747	int i, total, ret;
748
749	/* We may process another execbuffer during the unlock... */
750	while (!list_empty(objects)) {
751		obj = list_first_entry(objects,
752				       struct drm_i915_gem_object,
753				       exec_list);
754		list_del_init(&obj->exec_list);
755		drm_gem_object_unreference(&obj->base);
756	}
757
758	DRM_UNLOCK(dev);
759
760	total = 0;
761	for (i = 0; i < count; i++)
762		total += exec[i].relocation_count;
763
764	reloc_offset = malloc(count * sizeof(*reloc_offset), DRM_I915_GEM,
765	    M_WAITOK | M_ZERO);
766	reloc = malloc(total * sizeof(*reloc), DRM_I915_GEM, M_WAITOK | M_ZERO);
767
768	total = 0;
769	for (i = 0; i < count; i++) {
770		struct drm_i915_gem_relocation_entry *user_relocs;
771
772		user_relocs = (void *)(uintptr_t)exec[i].relocs_ptr;
773		ret = -copyin(user_relocs, reloc + total,
774		    exec[i].relocation_count * sizeof(*reloc));
775		if (ret != 0) {
776			DRM_LOCK(dev);
777			goto err;
778		}
779
780		reloc_offset[i] = total;
781		total += exec[i].relocation_count;
782	}
783
784	ret = i915_mutex_lock_interruptible(dev);
785	if (ret) {
786		DRM_LOCK(dev);
787		goto err;
788	}
789
790	/* reacquire the objects */
791	eb_reset(eb);
792	for (i = 0; i < count; i++) {
793		struct drm_i915_gem_object *obj;
794
795		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
796							exec[i].handle));
797		if (&obj->base == NULL) {
798			DRM_DEBUG("Invalid object handle %d at index %d\n",
799				   exec[i].handle, i);
800			ret = -ENOENT;
801			goto err;
802		}
803
804		list_add_tail(&obj->exec_list, objects);
805		obj->exec_handle = exec[i].handle;
806		obj->exec_entry = &exec[i];
807		eb_add_object(eb, obj);
808	}
809
810	ret = i915_gem_execbuffer_reserve(ring, file, objects);
811	if (ret)
812		goto err;
813
814	list_for_each_entry(obj, objects, exec_list) {
815		int offset = obj->exec_entry - exec;
816		ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
817							       reloc + reloc_offset[offset]);
818		if (ret)
819			goto err;
820	}
821
822	/* Leave the user relocations as are, this is the painfully slow path,
823	 * and we want to avoid the complication of dropping the lock whilst
824	 * having buffers reserved in the aperture and so causing spurious
825	 * ENOSPC for random operations.
826	 */
827
828err:
829	free(reloc, DRM_I915_GEM);
830	free(reloc_offset, DRM_I915_GEM);
831	return ret;
832}
833
834static int
835i915_gem_execbuffer_flush(struct drm_device *dev,
836			  uint32_t invalidate_domains,
837			  uint32_t flush_domains,
838			  uint32_t flush_rings)
839{
840	drm_i915_private_t *dev_priv = dev->dev_private;
841	int i, ret;
842
843	if (flush_domains & I915_GEM_DOMAIN_CPU)
844		intel_gtt_chipset_flush();
845
846	if (flush_domains & I915_GEM_DOMAIN_GTT)
847		wmb();
848
849	if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
850		for (i = 0; i < I915_NUM_RINGS; i++)
851			if (flush_rings & (1 << i)) {
852				ret = i915_gem_flush_ring(&dev_priv->rings[i],
853				    invalidate_domains, flush_domains);
854				if (ret)
855					return ret;
856			}
857	}
858
859	return 0;
860}
861
862static int
863i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
864{
865	u32 plane, flip_mask;
866	int ret;
867
868	/* Check for any pending flips. As we only maintain a flip queue depth
869	 * of 1, we can simply insert a WAIT for the next display flip prior
870	 * to executing the batch and avoid stalling the CPU.
871	 */
872
873	for (plane = 0; flips >> plane; plane++) {
874		if (((flips >> plane) & 1) == 0)
875			continue;
876
877		if (plane)
878			flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
879		else
880			flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
881
882		ret = intel_ring_begin(ring, 2);
883		if (ret)
884			return ret;
885
886		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
887		intel_ring_emit(ring, MI_NOOP);
888		intel_ring_advance(ring);
889	}
890
891	return 0;
892}
893
894static int
895i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
896				struct list_head *objects)
897{
898	struct drm_i915_gem_object *obj;
899	struct change_domains cd;
900	int ret;
901
902	memset(&cd, 0, sizeof(cd));
903	list_for_each_entry(obj, objects, exec_list)
904		i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
905
906	if (cd.invalidate_domains | cd.flush_domains) {
907#if WATCH_EXEC
908		DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
909			  __func__,
910			 cd.invalidate_domains,
911			 cd.flush_domains);
912#endif
913		ret = i915_gem_execbuffer_flush(ring->dev,
914						cd.invalidate_domains,
915						cd.flush_domains,
916						cd.flush_rings);
917		if (ret)
918			return ret;
919	}
920
921	if (cd.flips) {
922		ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
923		if (ret)
924			return ret;
925	}
926
927	list_for_each_entry(obj, objects, exec_list) {
928		ret = i915_gem_object_sync(obj, ring);
929		if (ret)
930			return ret;
931	}
932
933	return 0;
934}
935
936static bool
937i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
938{
939	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
940}
941
942static int
943validate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count,
944    vm_page_t ***map, int **maplen)
945{
946	vm_page_t *ma;
947	int i, length, page_count;
948
949	/* XXXKIB various limits checking is missing there */
950	*map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO);
951	*maplen = malloc(count * sizeof(*maplen), DRM_I915_GEM, M_WAITOK |
952	    M_ZERO);
953	for (i = 0; i < count; i++) {
954		/* First check for malicious input causing overflow */
955		if (exec[i].relocation_count >
956		    INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
957			return -EINVAL;
958
959		length = exec[i].relocation_count *
960		    sizeof(struct drm_i915_gem_relocation_entry);
961		if (length == 0) {
962			(*map)[i] = NULL;
963			continue;
964		}
965		/*
966		 * Since both start and end of the relocation region
967		 * may be not aligned on the page boundary, be
968		 * conservative and request a page slot for each
969		 * partial page.  Thus +2.
970		 */
971		page_count = howmany(length, PAGE_SIZE) + 2;
972		ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t),
973		    DRM_I915_GEM, M_WAITOK | M_ZERO);
974		(*maplen)[i] = vm_fault_quick_hold_pages(
975		    &curproc->p_vmspace->vm_map, exec[i].relocs_ptr, length,
976		    VM_PROT_READ | VM_PROT_WRITE, ma, page_count);
977		if ((*maplen)[i] == -1) {
978			free(ma, DRM_I915_GEM);
979			(*map)[i] = NULL;
980			return (-EFAULT);
981		}
982	}
983
984	return 0;
985}
986
987static void
988i915_gem_execbuffer_move_to_active(struct list_head *objects,
989				   struct intel_ring_buffer *ring,
990				   u32 seqno)
991{
992	struct drm_i915_gem_object *obj;
993	uint32_t old_read, old_write;
994
995	list_for_each_entry(obj, objects, exec_list) {
996		old_read = obj->base.read_domains;
997		old_write = obj->base.write_domain;
998
999		obj->base.read_domains = obj->base.pending_read_domains;
1000		obj->base.write_domain = obj->base.pending_write_domain;
1001		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
1002
1003		i915_gem_object_move_to_active(obj, ring, seqno);
1004		if (obj->base.write_domain) {
1005			obj->dirty = 1;
1006			obj->pending_gpu_write = true;
1007			list_move_tail(&obj->gpu_write_list,
1008				       &ring->gpu_write_list);
1009			if (obj->pin_count) /* check for potential scanout */
1010				intel_mark_busy(ring->dev, obj);
1011		}
1012		CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x",
1013		    obj, old_read, old_write);
1014	}
1015
1016	intel_mark_busy(ring->dev, NULL);
1017}
1018
1019int i915_gem_sync_exec_requests;
1020
1021static void
1022i915_gem_execbuffer_retire_commands(struct drm_device *dev,
1023				    struct drm_file *file,
1024				    struct intel_ring_buffer *ring)
1025{
1026	struct drm_i915_gem_request *request;
1027	u32 invalidate;
1028
1029	/*
1030	 * Ensure that the commands in the batch buffer are
1031	 * finished before the interrupt fires.
1032	 *
1033	 * The sampler always gets flushed on i965 (sigh).
1034	 */
1035	invalidate = I915_GEM_DOMAIN_COMMAND;
1036	if (INTEL_INFO(dev)->gen >= 4)
1037		invalidate |= I915_GEM_DOMAIN_SAMPLER;
1038	if (ring->flush(ring, invalidate, 0)) {
1039		i915_gem_next_request_seqno(ring);
1040		return;
1041	}
1042
1043	/* Add a breadcrumb for the completion of the batch buffer */
1044	request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO);
1045	if (request == NULL || i915_add_request(ring, file, request)) {
1046		i915_gem_next_request_seqno(ring);
1047		free(request, DRM_I915_GEM);
1048	} else if (i915_gem_sync_exec_requests) {
1049		i915_wait_request(ring, request->seqno);
1050		i915_gem_retire_requests(dev);
1051	}
1052}
1053
1054static void
1055i915_gem_fix_mi_batchbuffer_end(struct drm_i915_gem_object *batch_obj,
1056    uint32_t batch_start_offset, uint32_t batch_len)
1057{
1058	char *mkva;
1059	uint64_t po_r, po_w;
1060	uint32_t cmd;
1061
1062	po_r = batch_obj->base.dev->agp->base + batch_obj->gtt_offset +
1063	    batch_start_offset + batch_len;
1064	if (batch_len > 0)
1065		po_r -= 4;
1066	mkva = pmap_mapdev_attr(trunc_page(po_r), 2 * PAGE_SIZE,
1067	    PAT_WRITE_COMBINING);
1068	po_r &= PAGE_MASK;
1069	cmd = *(uint32_t *)(mkva + po_r);
1070
1071	if (cmd != MI_BATCH_BUFFER_END) {
1072		/*
1073		 * batch_len != 0 due to the check at the start of
1074		 * i915_gem_do_execbuffer
1075		 */
1076		if (batch_obj->base.size > batch_start_offset + batch_len) {
1077			po_w = po_r + 4;
1078/* DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END !\n"); */
1079		} else {
1080			po_w = po_r;
1081DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END, overwriting last bo cmd !\n");
1082		}
1083		*(uint32_t *)(mkva + po_w) = MI_BATCH_BUFFER_END;
1084	}
1085
1086	pmap_unmapdev((vm_offset_t)mkva, 2 * PAGE_SIZE);
1087}
1088
1089int i915_fix_mi_batchbuffer_end = 0;
1090
1091 static int
1092i915_reset_gen7_sol_offsets(struct drm_device *dev,
1093			    struct intel_ring_buffer *ring)
1094{
1095	drm_i915_private_t *dev_priv = dev->dev_private;
1096	int ret, i;
1097
1098	if (!IS_GEN7(dev) || ring != &dev_priv->rings[RCS])
1099		return 0;
1100
1101	ret = intel_ring_begin(ring, 4 * 3);
1102	if (ret)
1103		return ret;
1104
1105	for (i = 0; i < 4; i++) {
1106		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1107		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1108		intel_ring_emit(ring, 0);
1109	}
1110
1111	intel_ring_advance(ring);
1112
1113	return 0;
1114}
1115
1116static int
1117i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1118		       struct drm_file *file,
1119		       struct drm_i915_gem_execbuffer2 *args,
1120		       struct drm_i915_gem_exec_object2 *exec)
1121{
1122	drm_i915_private_t *dev_priv = dev->dev_private;
1123	struct list_head objects;
1124	struct eb_objects *eb;
1125	struct drm_i915_gem_object *batch_obj;
1126	struct drm_clip_rect *cliprects = NULL;
1127	struct intel_ring_buffer *ring;
1128	vm_page_t **relocs_ma;
1129	int *relocs_len;
1130	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1131	u32 exec_start, exec_len;
1132	u32 seqno;
1133	u32 mask;
1134	int ret, mode, i;
1135
1136	if (!i915_gem_check_execbuffer(args)) {
1137		DRM_DEBUG("execbuf with invalid offset/length\n");
1138		return -EINVAL;
1139	}
1140
1141	if (args->batch_len == 0)
1142		return (0);
1143
1144	ret = validate_exec_list(exec, args->buffer_count, &relocs_ma,
1145	    &relocs_len);
1146	if (ret != 0)
1147		goto pre_struct_lock_err;
1148
1149	switch (args->flags & I915_EXEC_RING_MASK) {
1150	case I915_EXEC_DEFAULT:
1151	case I915_EXEC_RENDER:
1152		ring = &dev_priv->rings[RCS];
1153		break;
1154	case I915_EXEC_BSD:
1155		ring = &dev_priv->rings[VCS];
1156		if (ctx_id != 0) {
1157			DRM_DEBUG("Ring %s doesn't support contexts\n",
1158				  ring->name);
1159			ret = -EPERM;
1160			goto pre_struct_lock_err;
1161		}
1162		break;
1163	case I915_EXEC_BLT:
1164		ring = &dev_priv->rings[BCS];
1165		if (ctx_id != 0) {
1166			DRM_DEBUG("Ring %s doesn't support contexts\n",
1167				  ring->name);
1168			ret = -EPERM;
1169			goto pre_struct_lock_err;
1170		}
1171		break;
1172	default:
1173		DRM_DEBUG("execbuf with unknown ring: %d\n",
1174			  (int)(args->flags & I915_EXEC_RING_MASK));
1175		ret = -EINVAL;
1176		goto pre_struct_lock_err;
1177	}
1178	if (!intel_ring_initialized(ring)) {
1179		DRM_DEBUG("execbuf with invalid ring: %d\n",
1180			  (int)(args->flags & I915_EXEC_RING_MASK));
1181		ret = -EINVAL;
1182		goto pre_struct_lock_err;
1183	}
1184
1185	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1186	mask = I915_EXEC_CONSTANTS_MASK;
1187	switch (mode) {
1188	case I915_EXEC_CONSTANTS_REL_GENERAL:
1189	case I915_EXEC_CONSTANTS_ABSOLUTE:
1190	case I915_EXEC_CONSTANTS_REL_SURFACE:
1191		if (ring == &dev_priv->rings[RCS] &&
1192		    mode != dev_priv->relative_constants_mode) {
1193			if (INTEL_INFO(dev)->gen < 4) {
1194				ret = -EINVAL;
1195				goto pre_struct_lock_err;
1196			}
1197
1198			if (INTEL_INFO(dev)->gen > 5 &&
1199			    mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1200				ret = -EINVAL;
1201				goto pre_struct_lock_err;
1202			}
1203
1204			/* The HW changed the meaning on this bit on gen6 */
1205			if (INTEL_INFO(dev)->gen >= 6)
1206				mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1207		}
1208		break;
1209	default:
1210		DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
1211		ret = -EINVAL;
1212		goto pre_struct_lock_err;
1213	}
1214
1215	if (args->buffer_count < 1) {
1216		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1217		ret = -EINVAL;
1218		goto pre_struct_lock_err;
1219	}
1220
1221	if (args->num_cliprects != 0) {
1222		if (ring != &dev_priv->rings[RCS]) {
1223			DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1224			ret = -EINVAL;
1225			goto pre_struct_lock_err;
1226		}
1227
1228		if (INTEL_INFO(dev)->gen >= 5) {
1229			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
1230			ret = -EINVAL;
1231			goto pre_struct_lock_err;
1232		}
1233
1234		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1235			DRM_DEBUG("execbuf with %u cliprects\n",
1236				  args->num_cliprects);
1237			ret = -EINVAL;
1238			goto pre_struct_lock_err;
1239		}
1240		cliprects = malloc( sizeof(*cliprects) * args->num_cliprects,
1241		    DRM_I915_GEM, M_WAITOK | M_ZERO);
1242		ret = -copyin((void *)(uintptr_t)args->cliprects_ptr, cliprects,
1243		    sizeof(*cliprects) * args->num_cliprects);
1244		if (ret != 0)
1245			goto pre_struct_lock_err;
1246	}
1247
1248	ret = i915_mutex_lock_interruptible(dev);
1249	if (ret)
1250		goto pre_struct_lock_err;
1251
1252	if (dev_priv->mm.suspended) {
1253		DRM_UNLOCK(dev);
1254		ret = -EBUSY;
1255		goto pre_struct_lock_err;
1256	}
1257
1258	eb = eb_create(args->buffer_count);
1259	if (eb == NULL) {
1260		DRM_UNLOCK(dev);
1261		ret = -ENOMEM;
1262		goto pre_struct_lock_err;
1263	}
1264
1265	/* Look up object handles */
1266	INIT_LIST_HEAD(&objects);
1267	for (i = 0; i < args->buffer_count; i++) {
1268		struct drm_i915_gem_object *obj;
1269
1270		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
1271							exec[i].handle));
1272		if (&obj->base == NULL) {
1273			DRM_DEBUG("Invalid object handle %d at index %d\n",
1274				   exec[i].handle, i);
1275			/* prevent error path from reading uninitialized data */
1276			ret = -ENOENT;
1277			goto err;
1278		}
1279
1280		if (!list_empty(&obj->exec_list)) {
1281			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
1282				   obj, exec[i].handle, i);
1283			ret = -EINVAL;
1284			goto err;
1285		}
1286
1287		list_add_tail(&obj->exec_list, &objects);
1288		obj->exec_handle = exec[i].handle;
1289		obj->exec_entry = &exec[i];
1290		eb_add_object(eb, obj);
1291	}
1292
1293	/* take note of the batch buffer before we might reorder the lists */
1294	batch_obj = list_entry(objects.prev,
1295			       struct drm_i915_gem_object,
1296			       exec_list);
1297
1298	/* Move the objects en-masse into the GTT, evicting if necessary. */
1299	ret = i915_gem_execbuffer_reserve(ring, file, &objects);
1300	if (ret)
1301		goto err;
1302
1303	/* The objects are in their final locations, apply the relocations. */
1304	ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
1305	if (ret) {
1306		if (ret == -EFAULT) {
1307			ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
1308								&objects, eb,
1309								exec,
1310								args->buffer_count);
1311			DRM_LOCK_ASSERT(dev);
1312		}
1313		if (ret)
1314			goto err;
1315	}
1316
1317	/* Set the pending read domains for the batch buffer to COMMAND */
1318	if (batch_obj->base.pending_write_domain) {
1319		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1320		ret = -EINVAL;
1321		goto err;
1322	}
1323	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1324
1325	ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
1326	if (ret)
1327		goto err;
1328
1329	ret = i915_switch_context(ring, file, ctx_id);
1330	if (ret)
1331		goto err;
1332
1333	seqno = i915_gem_next_request_seqno(ring);
1334	for (i = 0; i < I915_NUM_RINGS - 1; i++) {
1335		if (seqno < ring->sync_seqno[i]) {
1336			/* The GPU can not handle its semaphore value wrapping,
1337			 * so every billion or so execbuffers, we need to stall
1338			 * the GPU in order to reset the counters.
1339			 */
1340			ret = i915_gpu_idle(dev);
1341			if (ret)
1342				goto err;
1343			i915_gem_retire_requests(dev);
1344
1345			KASSERT(ring->sync_seqno[i] == 0, ("Non-zero sync_seqno"));
1346		}
1347	}
1348
1349	if (ring == &dev_priv->rings[RCS] &&
1350	    mode != dev_priv->relative_constants_mode) {
1351		ret = intel_ring_begin(ring, 4);
1352		if (ret)
1353			goto err;
1354
1355		intel_ring_emit(ring, MI_NOOP);
1356		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1357		intel_ring_emit(ring, INSTPM);
1358		intel_ring_emit(ring, mask << 16 | mode);
1359		intel_ring_advance(ring);
1360
1361		dev_priv->relative_constants_mode = mode;
1362	}
1363
1364	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1365		ret = i915_reset_gen7_sol_offsets(dev, ring);
1366		if (ret)
1367			goto err;
1368	}
1369
1370	exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1371	exec_len = args->batch_len;
1372
1373	if (i915_fix_mi_batchbuffer_end) {
1374		i915_gem_fix_mi_batchbuffer_end(batch_obj,
1375		    args->batch_start_offset, args->batch_len);
1376	}
1377
1378	CTR4(KTR_DRM, "ring_dispatch %s %d exec %x %x", ring->name, seqno,
1379	    exec_start, exec_len);
1380
1381	if (cliprects) {
1382		for (i = 0; i < args->num_cliprects; i++) {
1383			ret = i915_emit_box(dev, &cliprects[i],
1384					    args->DR1, args->DR4);
1385			if (ret)
1386				goto err;
1387
1388			ret = ring->dispatch_execbuffer(ring,
1389							exec_start, exec_len);
1390			if (ret)
1391				goto err;
1392		}
1393	} else {
1394		ret = ring->dispatch_execbuffer(ring,
1395						exec_start, exec_len);
1396		if (ret)
1397			goto err;
1398	}
1399
1400	i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
1401	i915_gem_execbuffer_retire_commands(dev, file, ring);
1402
1403err:
1404	eb_destroy(eb);
1405	while (!list_empty(&objects)) {
1406		struct drm_i915_gem_object *obj;
1407
1408		obj = list_first_entry(&objects,
1409				       struct drm_i915_gem_object,
1410				       exec_list);
1411		list_del_init(&obj->exec_list);
1412		drm_gem_object_unreference(&obj->base);
1413	}
1414	DRM_UNLOCK(dev);
1415
1416pre_struct_lock_err:
1417	for (i = 0; i < args->buffer_count; i++) {
1418		if (relocs_ma[i] != NULL) {
1419			vm_page_unhold_pages(relocs_ma[i], relocs_len[i]);
1420			free(relocs_ma[i], DRM_I915_GEM);
1421		}
1422	}
1423	free(relocs_len, DRM_I915_GEM);
1424	free(relocs_ma, DRM_I915_GEM);
1425	free(cliprects, DRM_I915_GEM);
1426	return ret;
1427}
1428
1429/*
1430 * Legacy execbuffer just creates an exec2 list from the original exec object
1431 * list array and passes it to the real function.
1432 */
1433int
1434i915_gem_execbuffer(struct drm_device *dev, void *data,
1435		    struct drm_file *file)
1436{
1437	struct drm_i915_gem_execbuffer *args = data;
1438	struct drm_i915_gem_execbuffer2 exec2;
1439	struct drm_i915_gem_exec_object *exec_list = NULL;
1440	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1441	int ret, i;
1442
1443	DRM_DEBUG("buffers_ptr %d buffer_count %d len %08x\n",
1444	    (int) args->buffers_ptr, args->buffer_count, args->batch_len);
1445
1446	if (args->buffer_count < 1) {
1447		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1448		return -EINVAL;
1449	}
1450
1451	/* Copy in the exec list from userland */
1452	/* XXXKIB user-controlled malloc size */
1453	exec_list = malloc(sizeof(*exec_list) * args->buffer_count,
1454	    DRM_I915_GEM, M_WAITOK);
1455	exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count,
1456	    DRM_I915_GEM, M_WAITOK);
1457	ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec_list,
1458	    sizeof(*exec_list) * args->buffer_count);
1459	if (ret != 0) {
1460		DRM_DEBUG("copy %d exec entries failed %d\n",
1461			  args->buffer_count, ret);
1462		free(exec_list, DRM_I915_GEM);
1463		free(exec2_list, DRM_I915_GEM);
1464		return (ret);
1465	}
1466
1467	for (i = 0; i < args->buffer_count; i++) {
1468		exec2_list[i].handle = exec_list[i].handle;
1469		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1470		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1471		exec2_list[i].alignment = exec_list[i].alignment;
1472		exec2_list[i].offset = exec_list[i].offset;
1473		if (INTEL_INFO(dev)->gen < 4)
1474			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1475		else
1476			exec2_list[i].flags = 0;
1477	}
1478
1479	exec2.buffers_ptr = args->buffers_ptr;
1480	exec2.buffer_count = args->buffer_count;
1481	exec2.batch_start_offset = args->batch_start_offset;
1482	exec2.batch_len = args->batch_len;
1483	exec2.DR1 = args->DR1;
1484	exec2.DR4 = args->DR4;
1485	exec2.num_cliprects = args->num_cliprects;
1486	exec2.cliprects_ptr = args->cliprects_ptr;
1487	exec2.flags = I915_EXEC_RENDER;
1488	i915_execbuffer2_set_context_id(exec2, 0);
1489
1490	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1491	if (!ret) {
1492		/* Copy the new buffer offsets back to the user's exec list. */
1493		for (i = 0; i < args->buffer_count; i++)
1494			exec_list[i].offset = exec2_list[i].offset;
1495		/* ... and back out to userspace */
1496		ret = -copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr,
1497		    sizeof(*exec_list) * args->buffer_count);
1498		if (ret != 0) {
1499			DRM_DEBUG("failed to copy %d exec entries "
1500				  "back to user (%d)\n",
1501				  args->buffer_count, ret);
1502		}
1503	}
1504
1505	free(exec_list, DRM_I915_GEM);
1506	free(exec2_list, DRM_I915_GEM);
1507	return ret;
1508}
1509
1510int
1511i915_gem_execbuffer2(struct drm_device *dev, void *data,
1512		     struct drm_file *file)
1513{
1514	struct drm_i915_gem_execbuffer2 *args = data;
1515	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1516	int ret;
1517
1518	DRM_DEBUG("buffers_ptr %jx buffer_count %d len %08x\n",
1519	    (uintmax_t)args->buffers_ptr, args->buffer_count, args->batch_len);
1520
1521	if (args->buffer_count < 1 ||
1522	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1523		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1524		return -EINVAL;
1525	}
1526
1527	/* XXXKIB user-controllable malloc size */
1528	exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count,
1529	    DRM_I915_GEM, M_WAITOK);
1530	ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec2_list,
1531	    sizeof(*exec2_list) * args->buffer_count);
1532	if (ret != 0) {
1533		DRM_DEBUG("copy %d exec entries failed %d\n",
1534			  args->buffer_count, ret);
1535		free(exec2_list, DRM_I915_GEM);
1536		return -EFAULT;
1537	}
1538
1539	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1540	if (!ret) {
1541		/* Copy the new buffer offsets back to the user's exec list. */
1542		ret = -copyout(exec2_list, (void *)(uintptr_t)args->buffers_ptr,
1543		    sizeof(*exec2_list) * args->buffer_count);
1544		if (ret) {
1545			DRM_DEBUG("failed to copy %d exec entries "
1546				  "back to user (%d)\n",
1547				  args->buffer_count, ret);
1548		}
1549	}
1550
1551	free(exec2_list, DRM_I915_GEM);
1552	return ret;
1553}
1554