i915_gem_execbuffer.c revision 282199
1/*
2 * Copyright �� 2008,2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *    Chris Wilson <chris@chris-wilson.co.uk>
26 *
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/dev/drm2/i915/i915_gem_execbuffer.c 282199 2015-04-28 19:35:05Z dumbbell $");
31
32#include <dev/drm2/drmP.h>
33#include <dev/drm2/drm.h>
34#include <dev/drm2/i915/i915_drm.h>
35#include <dev/drm2/i915/i915_drv.h>
36#include <dev/drm2/i915/intel_drv.h>
37#include <sys/limits.h>
38#include <sys/sf_buf.h>
39
40struct change_domains {
41	uint32_t invalidate_domains;
42	uint32_t flush_domains;
43	uint32_t flush_rings;
44	uint32_t flips;
45};
46
47/*
48 * Set the next domain for the specified object. This
49 * may not actually perform the necessary flushing/invaliding though,
50 * as that may want to be batched with other set_domain operations
51 *
52 * This is (we hope) the only really tricky part of gem. The goal
53 * is fairly simple -- track which caches hold bits of the object
54 * and make sure they remain coherent. A few concrete examples may
55 * help to explain how it works. For shorthand, we use the notation
56 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
57 * a pair of read and write domain masks.
58 *
59 * Case 1: the batch buffer
60 *
61 *	1. Allocated
62 *	2. Written by CPU
63 *	3. Mapped to GTT
64 *	4. Read by GPU
65 *	5. Unmapped from GTT
66 *	6. Freed
67 *
68 *	Let's take these a step at a time
69 *
70 *	1. Allocated
71 *		Pages allocated from the kernel may still have
72 *		cache contents, so we set them to (CPU, CPU) always.
73 *	2. Written by CPU (using pwrite)
74 *		The pwrite function calls set_domain (CPU, CPU) and
75 *		this function does nothing (as nothing changes)
76 *	3. Mapped by GTT
77 *		This function asserts that the object is not
78 *		currently in any GPU-based read or write domains
79 *	4. Read by GPU
80 *		i915_gem_execbuffer calls set_domain (COMMAND, 0).
81 *		As write_domain is zero, this function adds in the
82 *		current read domains (CPU+COMMAND, 0).
83 *		flush_domains is set to CPU.
84 *		invalidate_domains is set to COMMAND
85 *		clflush is run to get data out of the CPU caches
86 *		then i915_dev_set_domain calls i915_gem_flush to
87 *		emit an MI_FLUSH and drm_agp_chipset_flush
88 *	5. Unmapped from GTT
89 *		i915_gem_object_unbind calls set_domain (CPU, CPU)
90 *		flush_domains and invalidate_domains end up both zero
91 *		so no flushing/invalidating happens
92 *	6. Freed
93 *		yay, done
94 *
95 * Case 2: The shared render buffer
96 *
97 *	1. Allocated
98 *	2. Mapped to GTT
99 *	3. Read/written by GPU
100 *	4. set_domain to (CPU,CPU)
101 *	5. Read/written by CPU
102 *	6. Read/written by GPU
103 *
104 *	1. Allocated
105 *		Same as last example, (CPU, CPU)
106 *	2. Mapped to GTT
107 *		Nothing changes (assertions find that it is not in the GPU)
108 *	3. Read/written by GPU
109 *		execbuffer calls set_domain (RENDER, RENDER)
110 *		flush_domains gets CPU
111 *		invalidate_domains gets GPU
112 *		clflush (obj)
113 *		MI_FLUSH and drm_agp_chipset_flush
114 *	4. set_domain (CPU, CPU)
115 *		flush_domains gets GPU
116 *		invalidate_domains gets CPU
117 *		wait_rendering (obj) to make sure all drawing is complete.
118 *		This will include an MI_FLUSH to get the data from GPU
119 *		to memory
120 *		clflush (obj) to invalidate the CPU cache
121 *		Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
122 *	5. Read/written by CPU
123 *		cache lines are loaded and dirtied
124 *	6. Read written by GPU
125 *		Same as last GPU access
126 *
127 * Case 3: The constant buffer
128 *
129 *	1. Allocated
130 *	2. Written by CPU
131 *	3. Read by GPU
132 *	4. Updated (written) by CPU again
133 *	5. Read by GPU
134 *
135 *	1. Allocated
136 *		(CPU, CPU)
137 *	2. Written by CPU
138 *		(CPU, CPU)
139 *	3. Read by GPU
140 *		(CPU+RENDER, 0)
141 *		flush_domains = CPU
142 *		invalidate_domains = RENDER
143 *		clflush (obj)
144 *		MI_FLUSH
145 *		drm_agp_chipset_flush
146 *	4. Updated (written) by CPU again
147 *		(CPU, CPU)
148 *		flush_domains = 0 (no previous write domain)
149 *		invalidate_domains = 0 (no new read domains)
150 *	5. Read by GPU
151 *		(CPU+RENDER, 0)
152 *		flush_domains = CPU
153 *		invalidate_domains = RENDER
154 *		clflush (obj)
155 *		MI_FLUSH
156 *		drm_agp_chipset_flush
157 */
158static void
159i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
160				  struct intel_ring_buffer *ring,
161				  struct change_domains *cd)
162{
163	uint32_t invalidate_domains = 0, flush_domains = 0;
164
165	/*
166	 * If the object isn't moving to a new write domain,
167	 * let the object stay in multiple read domains
168	 */
169	if (obj->base.pending_write_domain == 0)
170		obj->base.pending_read_domains |= obj->base.read_domains;
171
172	/*
173	 * Flush the current write domain if
174	 * the new read domains don't match. Invalidate
175	 * any read domains which differ from the old
176	 * write domain
177	 */
178	if (obj->base.write_domain &&
179	    (((obj->base.write_domain != obj->base.pending_read_domains ||
180	       obj->ring != ring)) ||
181	     (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
182		flush_domains |= obj->base.write_domain;
183		invalidate_domains |=
184			obj->base.pending_read_domains & ~obj->base.write_domain;
185	}
186	/*
187	 * Invalidate any read caches which may have
188	 * stale data. That is, any new read domains.
189	 */
190	invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
191	if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
192		i915_gem_clflush_object(obj);
193
194	if (obj->base.pending_write_domain)
195		cd->flips |= atomic_load_acq_int(&obj->pending_flip);
196
197	/* The actual obj->write_domain will be updated with
198	 * pending_write_domain after we emit the accumulated flush for all
199	 * of our domain changes in execbuffers (which clears objects'
200	 * write_domains).  So if we have a current write domain that we
201	 * aren't changing, set pending_write_domain to that.
202	 */
203	if (flush_domains == 0 && obj->base.pending_write_domain == 0)
204		obj->base.pending_write_domain = obj->base.write_domain;
205
206	cd->invalidate_domains |= invalidate_domains;
207	cd->flush_domains |= flush_domains;
208	if (flush_domains & I915_GEM_GPU_DOMAINS)
209		cd->flush_rings |= intel_ring_flag(obj->ring);
210	if (invalidate_domains & I915_GEM_GPU_DOMAINS)
211		cd->flush_rings |= intel_ring_flag(ring);
212}
213
214struct eb_objects {
215	u_long hashmask;
216	LIST_HEAD(, drm_i915_gem_object) *buckets;
217};
218
219static struct eb_objects *
220eb_create(int size)
221{
222	struct eb_objects *eb;
223
224	eb = malloc(sizeof(*eb), DRM_I915_GEM, M_WAITOK | M_ZERO);
225	eb->buckets = hashinit(size, DRM_I915_GEM, &eb->hashmask);
226	return (eb);
227}
228
229static void
230eb_reset(struct eb_objects *eb)
231{
232	int i;
233
234	for (i = 0; i <= eb->hashmask; i++)
235		LIST_INIT(&eb->buckets[i]);
236}
237
238static void
239eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
240{
241
242	LIST_INSERT_HEAD(&eb->buckets[obj->exec_handle & eb->hashmask],
243	    obj, exec_node);
244}
245
246static struct drm_i915_gem_object *
247eb_get_object(struct eb_objects *eb, unsigned long handle)
248{
249	struct drm_i915_gem_object *obj;
250
251	LIST_FOREACH(obj, &eb->buckets[handle & eb->hashmask], exec_node) {
252		if (obj->exec_handle == handle)
253			return (obj);
254	}
255	return (NULL);
256}
257
258static void
259eb_destroy(struct eb_objects *eb)
260{
261
262	free(eb->buckets, DRM_I915_GEM);
263	free(eb, DRM_I915_GEM);
264}
265
266static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
267{
268	return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
269		obj->cache_level != I915_CACHE_NONE);
270}
271
272static int
273i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
274				   struct eb_objects *eb,
275				   struct drm_i915_gem_relocation_entry *reloc)
276{
277	struct drm_device *dev = obj->base.dev;
278	struct drm_gem_object *target_obj;
279	struct drm_i915_gem_object *target_i915_obj;
280	uint32_t target_offset;
281	int ret = -EINVAL;
282
283	/* we've already hold a reference to all valid objects */
284	target_obj = &eb_get_object(eb, reloc->target_handle)->base;
285	if (unlikely(target_obj == NULL))
286		return -ENOENT;
287
288	target_i915_obj = to_intel_bo(target_obj);
289	target_offset = target_i915_obj->gtt_offset;
290
291#if WATCH_RELOC
292	DRM_INFO("%s: obj %p offset %08x target %d "
293		 "read %08x write %08x gtt %08x "
294		 "presumed %08x delta %08x\n",
295		 __func__,
296		 obj,
297		 (int) reloc->offset,
298		 (int) reloc->target_handle,
299		 (int) reloc->read_domains,
300		 (int) reloc->write_domain,
301		 (int) target_offset,
302		 (int) reloc->presumed_offset,
303		 reloc->delta);
304#endif
305
306	/* The target buffer should have appeared before us in the
307	 * exec_object list, so it should have a GTT space bound by now.
308	 */
309	if (unlikely(target_offset == 0)) {
310		DRM_DEBUG("No GTT space found for object %d\n",
311			  reloc->target_handle);
312		return ret;
313	}
314
315	/* Validate that the target is in a valid r/w GPU domain */
316	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
317		DRM_DEBUG("reloc with multiple write domains: "
318			  "obj %p target %d offset %d "
319			  "read %08x write %08x",
320			  obj, reloc->target_handle,
321			  (int) reloc->offset,
322			  reloc->read_domains,
323			  reloc->write_domain);
324		return ret;
325	}
326	if (unlikely((reloc->write_domain | reloc->read_domains)
327		     & ~I915_GEM_GPU_DOMAINS)) {
328		DRM_DEBUG("reloc with read/write non-GPU domains: "
329			  "obj %p target %d offset %d "
330			  "read %08x write %08x",
331			  obj, reloc->target_handle,
332			  (int) reloc->offset,
333			  reloc->read_domains,
334			  reloc->write_domain);
335		return ret;
336	}
337	if (unlikely(reloc->write_domain && target_obj->pending_write_domain &&
338		     reloc->write_domain != target_obj->pending_write_domain)) {
339		DRM_DEBUG("Write domain conflict: "
340			  "obj %p target %d offset %d "
341			  "new %08x old %08x\n",
342			  obj, reloc->target_handle,
343			  (int) reloc->offset,
344			  reloc->write_domain,
345			  target_obj->pending_write_domain);
346		return ret;
347	}
348
349	target_obj->pending_read_domains |= reloc->read_domains;
350	target_obj->pending_write_domain |= reloc->write_domain;
351
352	/* If the relocation already has the right value in it, no
353	 * more work needs to be done.
354	 */
355	if (target_offset == reloc->presumed_offset)
356		return 0;
357
358	/* Check that the relocation address is valid... */
359	if (unlikely(reloc->offset > obj->base.size - 4)) {
360		DRM_DEBUG("Relocation beyond object bounds: "
361			  "obj %p target %d offset %d size %d.\n",
362			  obj, reloc->target_handle,
363			  (int) reloc->offset,
364			  (int) obj->base.size);
365		return ret;
366	}
367	if (unlikely(reloc->offset & 3)) {
368		DRM_DEBUG("Relocation not 4-byte aligned: "
369			  "obj %p target %d offset %d.\n",
370			  obj, reloc->target_handle,
371			  (int) reloc->offset);
372		return ret;
373	}
374
375	/* We can't wait for rendering with pagefaults disabled */
376	if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0)
377		return (-EFAULT);
378
379	reloc->delta += target_offset;
380	if (use_cpu_reloc(obj)) {
381		uint32_t page_offset = reloc->offset & PAGE_MASK;
382		char *vaddr;
383		struct sf_buf *sf;
384
385		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
386		if (ret)
387			return ret;
388
389		sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)],
390		    SFB_NOWAIT);
391		if (sf == NULL)
392			return (-ENOMEM);
393		vaddr = (void *)sf_buf_kva(sf);
394		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
395		sf_buf_free(sf);
396	} else {
397		uint32_t *reloc_entry;
398		char *reloc_page;
399
400		ret = i915_gem_object_set_to_gtt_domain(obj, true);
401		if (ret)
402			return ret;
403
404		ret = i915_gem_object_put_fence(obj);
405		if (ret)
406			return ret;
407
408		/*
409		 * Map the page containing the relocation we're going
410		 * to perform.
411		 */
412		reloc->offset += obj->gtt_offset;
413		reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset &
414		    ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING);
415		reloc_entry = (uint32_t *)(reloc_page + (reloc->offset &
416		    PAGE_MASK));
417		*(volatile uint32_t *)reloc_entry = reloc->delta;
418		pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE);
419	}
420
421	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
422	 * pipe_control writes because the gpu doesn't properly redirect them
423	 * through the ppgtt for non_secure batchbuffers. */
424	if (unlikely(IS_GEN6(dev) &&
425	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
426	    !target_i915_obj->has_global_gtt_mapping)) {
427		i915_gem_gtt_bind_object(target_i915_obj,
428					 target_i915_obj->cache_level);
429	}
430
431	/* and update the user's relocation entry */
432	reloc->presumed_offset = target_offset;
433
434	return 0;
435}
436
437static int
438i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
439    struct eb_objects *eb)
440{
441#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
442	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
443	struct drm_i915_gem_relocation_entry *user_relocs;
444	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
445	int remain, ret;
446
447	user_relocs = (void *)(uintptr_t)entry->relocs_ptr;
448	remain = entry->relocation_count;
449	while (remain) {
450		struct drm_i915_gem_relocation_entry *r = stack_reloc;
451		int count = remain;
452		if (count > DRM_ARRAY_SIZE(stack_reloc))
453			count = DRM_ARRAY_SIZE(stack_reloc);
454		remain -= count;
455
456		ret = -copyin_nofault(user_relocs, r, count*sizeof(r[0]));
457		if (ret != 0)
458			return (ret);
459
460		do {
461			u64 offset = r->presumed_offset;
462
463			ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
464			if (ret)
465				return ret;
466
467			if (r->presumed_offset != offset &&
468			    copyout_nofault(&r->presumed_offset,
469					    &user_relocs->presumed_offset,
470					    sizeof(r->presumed_offset))) {
471				return -EFAULT;
472			}
473
474			user_relocs++;
475			r++;
476		} while (--count);
477	}
478#undef N_RELOC
479	return (0);
480}
481
482static int
483i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
484    struct eb_objects *eb, struct drm_i915_gem_relocation_entry *relocs)
485{
486	const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
487	int i, ret;
488
489	for (i = 0; i < entry->relocation_count; i++) {
490		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
491		if (ret)
492			return ret;
493	}
494
495	return 0;
496}
497
498static int
499i915_gem_execbuffer_relocate(struct drm_device *dev,
500			     struct eb_objects *eb,
501			     struct list_head *objects)
502{
503	struct drm_i915_gem_object *obj;
504	int ret, pflags;
505
506	/* Try to move as many of the relocation targets off the active list
507	 * to avoid unnecessary fallbacks to the slow path, as we cannot wait
508	 * for the retirement with pagefaults disabled.
509	 */
510	i915_gem_retire_requests(dev);
511
512	ret = 0;
513	pflags = vm_fault_disable_pagefaults();
514	/* This is the fast path and we cannot handle a pagefault whilst
515	 * holding the device lock lest the user pass in the relocations
516	 * contained within a mmaped bo. For in such a case we, the page
517	 * fault handler would call i915_gem_fault() and we would try to
518	 * acquire the device lock again. Obviously this is bad.
519	 */
520
521	list_for_each_entry(obj, objects, exec_list) {
522		ret = i915_gem_execbuffer_relocate_object(obj, eb);
523		if (ret != 0)
524			break;
525	}
526	vm_fault_enable_pagefaults(pflags);
527	return (ret);
528}
529
530#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
531
532static int
533need_reloc_mappable(struct drm_i915_gem_object *obj)
534{
535	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
536	return entry->relocation_count && !use_cpu_reloc(obj);
537}
538
539static int
540pin_and_fence_object(struct drm_i915_gem_object *obj,
541		     struct intel_ring_buffer *ring)
542{
543	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
544	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
545	bool need_fence, need_mappable;
546	int ret;
547
548	need_fence =
549		has_fenced_gpu_access &&
550		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
551		obj->tiling_mode != I915_TILING_NONE;
552	need_mappable = need_fence || need_reloc_mappable(obj);
553
554	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
555	if (ret)
556		return ret;
557
558	if (has_fenced_gpu_access) {
559		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
560			ret = i915_gem_object_get_fence(obj);
561			if (ret)
562				goto err_unpin;
563
564			if (i915_gem_object_pin_fence(obj))
565				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
566
567			obj->pending_fenced_gpu_access = true;
568		}
569	}
570
571	entry->offset = obj->gtt_offset;
572	return 0;
573
574err_unpin:
575	i915_gem_object_unpin(obj);
576	return ret;
577}
578
579static int
580i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
581			    struct drm_file *file,
582			    struct list_head *objects)
583{
584	drm_i915_private_t *dev_priv;
585	struct drm_i915_gem_object *obj;
586	int ret, retry;
587	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
588	struct list_head ordered_objects;
589
590	dev_priv = ring->dev->dev_private;
591	INIT_LIST_HEAD(&ordered_objects);
592	while (!list_empty(objects)) {
593		struct drm_i915_gem_exec_object2 *entry;
594		bool need_fence, need_mappable;
595
596		obj = list_first_entry(objects,
597				       struct drm_i915_gem_object,
598				       exec_list);
599		entry = obj->exec_entry;
600
601		need_fence =
602			has_fenced_gpu_access &&
603			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
604			obj->tiling_mode != I915_TILING_NONE;
605		need_mappable = need_fence || need_reloc_mappable(obj);
606
607		if (need_mappable)
608			list_move(&obj->exec_list, &ordered_objects);
609		else
610			list_move_tail(&obj->exec_list, &ordered_objects);
611
612		obj->base.pending_read_domains = 0;
613		obj->base.pending_write_domain = 0;
614	}
615	list_splice(&ordered_objects, objects);
616
617	/* Attempt to pin all of the buffers into the GTT.
618	 * This is done in 3 phases:
619	 *
620	 * 1a. Unbind all objects that do not match the GTT constraints for
621	 *     the execbuffer (fenceable, mappable, alignment etc).
622	 * 1b. Increment pin count for already bound objects and obtain
623	 *     a fence register if required.
624	 * 2.  Bind new objects.
625	 * 3.  Decrement pin count.
626	 *
627	 * This avoid unnecessary unbinding of later objects in order to makr
628	 * room for the earlier objects *unless* we need to defragment.
629	 */
630	retry = 0;
631	do {
632		ret = 0;
633
634		/* Unbind any ill-fitting objects or pin. */
635		list_for_each_entry(obj, objects, exec_list) {
636			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
637			bool need_fence, need_mappable;
638
639			if (!obj->gtt_space)
640				continue;
641
642			need_fence =
643				has_fenced_gpu_access &&
644				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
645				obj->tiling_mode != I915_TILING_NONE;
646			need_mappable = need_fence || need_reloc_mappable(obj);
647
648			if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
649			    (need_mappable && !obj->map_and_fenceable))
650				ret = i915_gem_object_unbind(obj);
651			else
652				ret = pin_and_fence_object(obj, ring);
653			if (ret)
654				goto err;
655		}
656
657		/* Bind fresh objects */
658		list_for_each_entry(obj, objects, exec_list) {
659			if (obj->gtt_space)
660				continue;
661
662			ret = pin_and_fence_object(obj, ring);
663			if (ret) {
664				int ret_ignore;
665
666				/* This can potentially raise a harmless
667				 * -EINVAL if we failed to bind in the above
668				 * call. It cannot raise -EINTR since we know
669				 * that the bo is freshly bound and so will
670				 * not need to be flushed or waited upon.
671				 */
672				ret_ignore = i915_gem_object_unbind(obj);
673				(void)ret_ignore;
674				if (obj->gtt_space != NULL)
675					printf("%s: gtt_space\n", __func__);
676				break;
677			}
678		}
679
680		/* Decrement pin count for bound objects */
681		list_for_each_entry(obj, objects, exec_list) {
682			struct drm_i915_gem_exec_object2 *entry;
683
684			if (!obj->gtt_space)
685				continue;
686
687			entry = obj->exec_entry;
688			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
689				i915_gem_object_unpin_fence(obj);
690				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
691			}
692
693			i915_gem_object_unpin(obj);
694
695			/* ... and ensure ppgtt mapping exist if needed. */
696			if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
697				i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
698						       obj, obj->cache_level);
699
700				obj->has_aliasing_ppgtt_mapping = 1;
701			}
702		}
703
704		if (ret != -ENOSPC || retry > 1)
705			return ret;
706
707		/* First attempt, just clear anything that is purgeable.
708		 * Second attempt, clear the entire GTT.
709		 */
710		ret = i915_gem_evict_everything(ring->dev, retry == 0);
711		if (ret)
712			return ret;
713
714		retry++;
715	} while (1);
716
717err:
718	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
719		struct drm_i915_gem_exec_object2 *entry;
720
721		if (!obj->gtt_space)
722			continue;
723
724		entry = obj->exec_entry;
725		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
726			i915_gem_object_unpin_fence(obj);
727			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
728		}
729
730		i915_gem_object_unpin(obj);
731	}
732
733	return ret;
734}
735
736static int
737i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
738    struct drm_file *file, struct intel_ring_buffer *ring,
739    struct list_head *objects, struct eb_objects *eb,
740    struct drm_i915_gem_exec_object2 *exec, int count)
741{
742	struct drm_i915_gem_relocation_entry *reloc;
743	struct drm_i915_gem_object *obj;
744	int *reloc_offset;
745	int i, total, ret;
746
747	/* We may process another execbuffer during the unlock... */
748	while (!list_empty(objects)) {
749		obj = list_first_entry(objects,
750				       struct drm_i915_gem_object,
751				       exec_list);
752		list_del_init(&obj->exec_list);
753		drm_gem_object_unreference(&obj->base);
754	}
755
756	DRM_UNLOCK(dev);
757
758	total = 0;
759	for (i = 0; i < count; i++)
760		total += exec[i].relocation_count;
761
762	reloc_offset = malloc(count * sizeof(*reloc_offset), DRM_I915_GEM,
763	    M_WAITOK | M_ZERO);
764	reloc = malloc(total * sizeof(*reloc), DRM_I915_GEM, M_WAITOK | M_ZERO);
765
766	total = 0;
767	for (i = 0; i < count; i++) {
768		struct drm_i915_gem_relocation_entry *user_relocs;
769
770		user_relocs = (void *)(uintptr_t)exec[i].relocs_ptr;
771		ret = -copyin(user_relocs, reloc + total,
772		    exec[i].relocation_count * sizeof(*reloc));
773		if (ret != 0) {
774			DRM_LOCK(dev);
775			goto err;
776		}
777
778		reloc_offset[i] = total;
779		total += exec[i].relocation_count;
780	}
781
782	ret = i915_mutex_lock_interruptible(dev);
783	if (ret) {
784		DRM_LOCK(dev);
785		goto err;
786	}
787
788	/* reacquire the objects */
789	eb_reset(eb);
790	for (i = 0; i < count; i++) {
791		struct drm_i915_gem_object *obj;
792
793		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
794							exec[i].handle));
795		if (&obj->base == NULL) {
796			DRM_DEBUG("Invalid object handle %d at index %d\n",
797				   exec[i].handle, i);
798			ret = -ENOENT;
799			goto err;
800		}
801
802		list_add_tail(&obj->exec_list, objects);
803		obj->exec_handle = exec[i].handle;
804		obj->exec_entry = &exec[i];
805		eb_add_object(eb, obj);
806	}
807
808	ret = i915_gem_execbuffer_reserve(ring, file, objects);
809	if (ret)
810		goto err;
811
812	list_for_each_entry(obj, objects, exec_list) {
813		int offset = obj->exec_entry - exec;
814		ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
815		    reloc + reloc_offset[offset]);
816		if (ret)
817			goto err;
818	}
819
820	/* Leave the user relocations as are, this is the painfully slow path,
821	 * and we want to avoid the complication of dropping the lock whilst
822	 * having buffers reserved in the aperture and so causing spurious
823	 * ENOSPC for random operations.
824	 */
825
826err:
827	free(reloc, DRM_I915_GEM);
828	free(reloc_offset, DRM_I915_GEM);
829	return ret;
830}
831
832static int
833i915_gem_execbuffer_flush(struct drm_device *dev,
834			  uint32_t invalidate_domains,
835			  uint32_t flush_domains,
836			  uint32_t flush_rings)
837{
838	drm_i915_private_t *dev_priv = dev->dev_private;
839	int i, ret;
840
841	if (flush_domains & I915_GEM_DOMAIN_CPU)
842		intel_gtt_chipset_flush();
843
844	if (flush_domains & I915_GEM_DOMAIN_GTT)
845		wmb();
846
847	if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
848		for (i = 0; i < I915_NUM_RINGS; i++)
849			if (flush_rings & (1 << i)) {
850				ret = i915_gem_flush_ring(&dev_priv->rings[i],
851				    invalidate_domains, flush_domains);
852				if (ret)
853					return ret;
854			}
855	}
856
857	return 0;
858}
859
860static int
861i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
862{
863	u32 plane, flip_mask;
864	int ret;
865
866	/* Check for any pending flips. As we only maintain a flip queue depth
867	 * of 1, we can simply insert a WAIT for the next display flip prior
868	 * to executing the batch and avoid stalling the CPU.
869	 */
870
871	for (plane = 0; flips >> plane; plane++) {
872		if (((flips >> plane) & 1) == 0)
873			continue;
874
875		if (plane)
876			flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
877		else
878			flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
879
880		ret = intel_ring_begin(ring, 2);
881		if (ret)
882			return ret;
883
884		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
885		intel_ring_emit(ring, MI_NOOP);
886		intel_ring_advance(ring);
887	}
888
889	return 0;
890}
891
892static int
893i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
894				struct list_head *objects)
895{
896	struct drm_i915_gem_object *obj;
897	struct change_domains cd;
898	int ret;
899
900	memset(&cd, 0, sizeof(cd));
901	list_for_each_entry(obj, objects, exec_list)
902		i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
903
904	if (cd.invalidate_domains | cd.flush_domains) {
905#if WATCH_EXEC
906		DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
907			  __func__,
908			 cd.invalidate_domains,
909			 cd.flush_domains);
910#endif
911		ret = i915_gem_execbuffer_flush(ring->dev,
912						cd.invalidate_domains,
913						cd.flush_domains,
914						cd.flush_rings);
915		if (ret)
916			return ret;
917	}
918
919	if (cd.flips) {
920		ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
921		if (ret)
922			return ret;
923	}
924
925	list_for_each_entry(obj, objects, exec_list) {
926		ret = i915_gem_object_sync(obj, ring);
927		if (ret)
928			return ret;
929	}
930
931	return 0;
932}
933
934static bool
935i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
936{
937	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
938}
939
940static int
941validate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count,
942    vm_page_t ***map)
943{
944	vm_page_t *ma;
945	int i, length, page_count;
946
947	/* XXXKIB various limits checking is missing there */
948	*map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO);
949	for (i = 0; i < count; i++) {
950		/* First check for malicious input causing overflow */
951		if (exec[i].relocation_count >
952		    INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
953			return -EINVAL;
954
955		length = exec[i].relocation_count *
956		    sizeof(struct drm_i915_gem_relocation_entry);
957		if (length == 0) {
958			(*map)[i] = NULL;
959			continue;
960		}
961		/*
962		 * Since both start and end of the relocation region
963		 * may be not aligned on the page boundary, be
964		 * conservative and request a page slot for each
965		 * partial page.  Thus +2.
966		 */
967		page_count = howmany(length, PAGE_SIZE) + 2;
968		ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t),
969		    DRM_I915_GEM, M_WAITOK | M_ZERO);
970		if (vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
971		    exec[i].relocs_ptr, length, VM_PROT_READ | VM_PROT_WRITE,
972		    ma, page_count) == -1) {
973			free(ma, DRM_I915_GEM);
974			(*map)[i] = NULL;
975			return (-EFAULT);
976		}
977	}
978
979	return 0;
980}
981
982static void
983i915_gem_execbuffer_move_to_active(struct list_head *objects,
984				   struct intel_ring_buffer *ring,
985				   u32 seqno)
986{
987	struct drm_i915_gem_object *obj;
988	uint32_t old_read, old_write;
989
990	list_for_each_entry(obj, objects, exec_list) {
991		old_read = obj->base.read_domains;
992		old_write = obj->base.write_domain;
993
994		obj->base.read_domains = obj->base.pending_read_domains;
995		obj->base.write_domain = obj->base.pending_write_domain;
996		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
997
998		i915_gem_object_move_to_active(obj, ring, seqno);
999		if (obj->base.write_domain) {
1000			obj->dirty = 1;
1001			obj->pending_gpu_write = true;
1002			list_move_tail(&obj->gpu_write_list,
1003				       &ring->gpu_write_list);
1004			if (obj->pin_count) /* check for potential scanout */
1005				intel_mark_busy(ring->dev, obj);
1006		}
1007		CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x",
1008		    obj, old_read, old_write);
1009	}
1010
1011	intel_mark_busy(ring->dev, NULL);
1012}
1013
1014int i915_gem_sync_exec_requests;
1015
1016static void
1017i915_gem_execbuffer_retire_commands(struct drm_device *dev,
1018				    struct drm_file *file,
1019				    struct intel_ring_buffer *ring)
1020{
1021	struct drm_i915_gem_request *request;
1022	u32 invalidate;
1023
1024	/*
1025	 * Ensure that the commands in the batch buffer are
1026	 * finished before the interrupt fires.
1027	 *
1028	 * The sampler always gets flushed on i965 (sigh).
1029	 */
1030	invalidate = I915_GEM_DOMAIN_COMMAND;
1031	if (INTEL_INFO(dev)->gen >= 4)
1032		invalidate |= I915_GEM_DOMAIN_SAMPLER;
1033	if (ring->flush(ring, invalidate, 0)) {
1034		i915_gem_next_request_seqno(ring);
1035		return;
1036	}
1037
1038	/* Add a breadcrumb for the completion of the batch buffer */
1039	request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO);
1040	if (request == NULL || i915_add_request(ring, file, request)) {
1041		i915_gem_next_request_seqno(ring);
1042		free(request, DRM_I915_GEM);
1043	} else if (i915_gem_sync_exec_requests) {
1044		i915_wait_request(ring, request->seqno);
1045		i915_gem_retire_requests(dev);
1046	}
1047}
1048
1049static void
1050i915_gem_fix_mi_batchbuffer_end(struct drm_i915_gem_object *batch_obj,
1051    uint32_t batch_start_offset, uint32_t batch_len)
1052{
1053	char *mkva;
1054	uint64_t po_r, po_w;
1055	uint32_t cmd;
1056
1057	po_r = batch_obj->base.dev->agp->base + batch_obj->gtt_offset +
1058	    batch_start_offset + batch_len;
1059	if (batch_len > 0)
1060		po_r -= 4;
1061	mkva = pmap_mapdev_attr(trunc_page(po_r), 2 * PAGE_SIZE,
1062	    PAT_WRITE_COMBINING);
1063	po_r &= PAGE_MASK;
1064	cmd = *(uint32_t *)(mkva + po_r);
1065
1066	if (cmd != MI_BATCH_BUFFER_END) {
1067		/*
1068		 * batch_len != 0 due to the check at the start of
1069		 * i915_gem_do_execbuffer
1070		 */
1071		if (batch_obj->base.size > batch_start_offset + batch_len) {
1072			po_w = po_r + 4;
1073/* DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END !\n"); */
1074		} else {
1075			po_w = po_r;
1076DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END, overwriting last bo cmd !\n");
1077		}
1078		*(uint32_t *)(mkva + po_w) = MI_BATCH_BUFFER_END;
1079	}
1080
1081	pmap_unmapdev((vm_offset_t)mkva, 2 * PAGE_SIZE);
1082}
1083
1084int i915_fix_mi_batchbuffer_end = 0;
1085
1086 static int
1087i915_reset_gen7_sol_offsets(struct drm_device *dev,
1088			    struct intel_ring_buffer *ring)
1089{
1090	drm_i915_private_t *dev_priv = dev->dev_private;
1091	int ret, i;
1092
1093	if (!IS_GEN7(dev) || ring != &dev_priv->rings[RCS])
1094		return 0;
1095
1096	ret = intel_ring_begin(ring, 4 * 3);
1097	if (ret)
1098		return ret;
1099
1100	for (i = 0; i < 4; i++) {
1101		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1102		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1103		intel_ring_emit(ring, 0);
1104	}
1105
1106	intel_ring_advance(ring);
1107
1108	return 0;
1109}
1110
1111static int
1112i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1113		       struct drm_file *file,
1114		       struct drm_i915_gem_execbuffer2 *args,
1115		       struct drm_i915_gem_exec_object2 *exec)
1116{
1117	drm_i915_private_t *dev_priv = dev->dev_private;
1118	struct list_head objects;
1119	struct eb_objects *eb;
1120	struct drm_i915_gem_object *batch_obj;
1121	struct drm_clip_rect *cliprects = NULL;
1122	struct intel_ring_buffer *ring;
1123	vm_page_t **relocs_ma;
1124	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1125	u32 exec_start, exec_len;
1126	u32 seqno;
1127	u32 mask;
1128	int ret, mode, i;
1129
1130	if (!i915_gem_check_execbuffer(args)) {
1131		DRM_DEBUG("execbuf with invalid offset/length\n");
1132		return -EINVAL;
1133	}
1134
1135	if (args->batch_len == 0)
1136		return (0);
1137
1138	ret = validate_exec_list(exec, args->buffer_count, &relocs_ma);
1139	if (ret != 0)
1140		goto pre_struct_lock_err;
1141
1142	switch (args->flags & I915_EXEC_RING_MASK) {
1143	case I915_EXEC_DEFAULT:
1144	case I915_EXEC_RENDER:
1145		ring = &dev_priv->rings[RCS];
1146		break;
1147	case I915_EXEC_BSD:
1148		ring = &dev_priv->rings[VCS];
1149		if (ctx_id != 0) {
1150			DRM_DEBUG("Ring %s doesn't support contexts\n",
1151				  ring->name);
1152			return -EPERM;
1153		}
1154		break;
1155	case I915_EXEC_BLT:
1156		ring = &dev_priv->rings[BCS];
1157		if (ctx_id != 0) {
1158			DRM_DEBUG("Ring %s doesn't support contexts\n",
1159				  ring->name);
1160			return -EPERM;
1161		}
1162		break;
1163	default:
1164		DRM_DEBUG("execbuf with unknown ring: %d\n",
1165			  (int)(args->flags & I915_EXEC_RING_MASK));
1166		ret = -EINVAL;
1167		goto pre_struct_lock_err;
1168	}
1169	if (!intel_ring_initialized(ring)) {
1170		DRM_DEBUG("execbuf with invalid ring: %d\n",
1171			  (int)(args->flags & I915_EXEC_RING_MASK));
1172		return -EINVAL;
1173	}
1174
1175	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1176	mask = I915_EXEC_CONSTANTS_MASK;
1177	switch (mode) {
1178	case I915_EXEC_CONSTANTS_REL_GENERAL:
1179	case I915_EXEC_CONSTANTS_ABSOLUTE:
1180	case I915_EXEC_CONSTANTS_REL_SURFACE:
1181		if (ring == &dev_priv->rings[RCS] &&
1182		    mode != dev_priv->relative_constants_mode) {
1183			if (INTEL_INFO(dev)->gen < 4) {
1184				ret = -EINVAL;
1185				goto pre_struct_lock_err;
1186			}
1187
1188			if (INTEL_INFO(dev)->gen > 5 &&
1189			    mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1190				ret = -EINVAL;
1191				goto pre_struct_lock_err;
1192			}
1193
1194			/* The HW changed the meaning on this bit on gen6 */
1195			if (INTEL_INFO(dev)->gen >= 6)
1196				mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1197		}
1198		break;
1199	default:
1200		DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
1201		ret = -EINVAL;
1202		goto pre_struct_lock_err;
1203	}
1204
1205	if (args->buffer_count < 1) {
1206		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1207		ret = -EINVAL;
1208		goto pre_struct_lock_err;
1209	}
1210
1211	if (args->num_cliprects != 0) {
1212		if (ring != &dev_priv->rings[RCS]) {
1213	DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1214			ret = -EINVAL;
1215			goto pre_struct_lock_err;
1216		}
1217
1218		if (INTEL_INFO(dev)->gen >= 5) {
1219			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
1220			ret = -EINVAL;
1221			goto pre_struct_lock_err;
1222		}
1223
1224		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1225			DRM_DEBUG("execbuf with %u cliprects\n",
1226				  args->num_cliprects);
1227			ret = -EINVAL;
1228			goto pre_struct_lock_err;
1229		}
1230		cliprects = malloc( sizeof(*cliprects) * args->num_cliprects,
1231		    DRM_I915_GEM, M_WAITOK | M_ZERO);
1232		ret = -copyin((void *)(uintptr_t)args->cliprects_ptr, cliprects,
1233		    sizeof(*cliprects) * args->num_cliprects);
1234		if (ret != 0)
1235			goto pre_struct_lock_err;
1236	}
1237
1238	ret = i915_mutex_lock_interruptible(dev);
1239	if (ret)
1240		goto pre_struct_lock_err;
1241
1242	if (dev_priv->mm.suspended) {
1243		DRM_UNLOCK(dev);
1244		ret = -EBUSY;
1245		goto pre_struct_lock_err;
1246	}
1247
1248	eb = eb_create(args->buffer_count);
1249	if (eb == NULL) {
1250		DRM_UNLOCK(dev);
1251		ret = -ENOMEM;
1252		goto pre_struct_lock_err;
1253	}
1254
1255	/* Look up object handles */
1256	INIT_LIST_HEAD(&objects);
1257	for (i = 0; i < args->buffer_count; i++) {
1258		struct drm_i915_gem_object *obj;
1259		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
1260							exec[i].handle));
1261		if (&obj->base == NULL) {
1262			DRM_DEBUG("Invalid object handle %d at index %d\n",
1263				   exec[i].handle, i);
1264			/* prevent error path from reading uninitialized data */
1265			ret = -ENOENT;
1266			goto err;
1267		}
1268
1269		if (!list_empty(&obj->exec_list)) {
1270			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
1271				   obj, exec[i].handle, i);
1272			ret = -EINVAL;
1273			goto err;
1274		}
1275
1276		list_add_tail(&obj->exec_list, &objects);
1277		obj->exec_handle = exec[i].handle;
1278		obj->exec_entry = &exec[i];
1279		eb_add_object(eb, obj);
1280	}
1281
1282	/* take note of the batch buffer before we might reorder the lists */
1283	batch_obj = list_entry(objects.prev,
1284			       struct drm_i915_gem_object,
1285			       exec_list);
1286
1287	/* Move the objects en-masse into the GTT, evicting if necessary. */
1288	ret = i915_gem_execbuffer_reserve(ring, file, &objects);
1289	if (ret)
1290		goto err;
1291
1292	/* The objects are in their final locations, apply the relocations. */
1293	ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
1294	if (ret) {
1295		if (ret == -EFAULT) {
1296			ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
1297			    &objects, eb, exec,	args->buffer_count);
1298			DRM_LOCK_ASSERT(dev);
1299		}
1300		if (ret)
1301			goto err;
1302	}
1303
1304	/* Set the pending read domains for the batch buffer to COMMAND */
1305	if (batch_obj->base.pending_write_domain) {
1306		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1307		ret = -EINVAL;
1308		goto err;
1309	}
1310	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1311
1312	ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
1313	if (ret)
1314		goto err;
1315
1316	ret = i915_switch_context(ring, file, ctx_id);
1317	if (ret)
1318		goto err;
1319
1320	seqno = i915_gem_next_request_seqno(ring);
1321	for (i = 0; i < I915_NUM_RINGS - 1; i++) {
1322		if (seqno < ring->sync_seqno[i]) {
1323			/* The GPU can not handle its semaphore value wrapping,
1324			 * so every billion or so execbuffers, we need to stall
1325			 * the GPU in order to reset the counters.
1326			 */
1327			ret = i915_gpu_idle(dev);
1328			if (ret)
1329				goto err;
1330			i915_gem_retire_requests(dev);
1331
1332			KASSERT(ring->sync_seqno[i] == 0, ("Non-zero sync_seqno"));
1333		}
1334	}
1335
1336	if (ring == &dev_priv->rings[RCS] &&
1337	    mode != dev_priv->relative_constants_mode) {
1338		ret = intel_ring_begin(ring, 4);
1339		if (ret)
1340			goto err;
1341
1342		intel_ring_emit(ring, MI_NOOP);
1343		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1344		intel_ring_emit(ring, INSTPM);
1345		intel_ring_emit(ring, mask << 16 | mode);
1346		intel_ring_advance(ring);
1347
1348		dev_priv->relative_constants_mode = mode;
1349	}
1350
1351	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1352		ret = i915_reset_gen7_sol_offsets(dev, ring);
1353		if (ret)
1354			goto err;
1355	}
1356
1357	exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1358	exec_len = args->batch_len;
1359
1360	if (i915_fix_mi_batchbuffer_end) {
1361		i915_gem_fix_mi_batchbuffer_end(batch_obj,
1362		    args->batch_start_offset, args->batch_len);
1363	}
1364
1365	CTR4(KTR_DRM, "ring_dispatch %s %d exec %x %x", ring->name, seqno,
1366	    exec_start, exec_len);
1367
1368	if (cliprects) {
1369		for (i = 0; i < args->num_cliprects; i++) {
1370			ret = i915_emit_box_p(dev, &cliprects[i],
1371			    args->DR1, args->DR4);
1372			if (ret)
1373				goto err;
1374
1375			ret = ring->dispatch_execbuffer(ring, exec_start,
1376			    exec_len);
1377			if (ret)
1378				goto err;
1379		}
1380	} else {
1381		ret = ring->dispatch_execbuffer(ring, exec_start, exec_len);
1382		if (ret)
1383			goto err;
1384	}
1385
1386	i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
1387	i915_gem_execbuffer_retire_commands(dev, file, ring);
1388
1389err:
1390	eb_destroy(eb);
1391	while (!list_empty(&objects)) {
1392		struct drm_i915_gem_object *obj;
1393
1394		obj = list_first_entry(&objects, struct drm_i915_gem_object,
1395		    exec_list);
1396		list_del_init(&obj->exec_list);
1397		drm_gem_object_unreference(&obj->base);
1398	}
1399	DRM_UNLOCK(dev);
1400
1401pre_struct_lock_err:
1402	for (i = 0; i < args->buffer_count; i++) {
1403		if (relocs_ma[i] != NULL) {
1404			vm_page_unhold_pages(relocs_ma[i], howmany(
1405			    exec[i].relocation_count *
1406			    sizeof(struct drm_i915_gem_relocation_entry),
1407			    PAGE_SIZE));
1408			free(relocs_ma[i], DRM_I915_GEM);
1409		}
1410	}
1411	free(relocs_ma, DRM_I915_GEM);
1412	free(cliprects, DRM_I915_GEM);
1413	return ret;
1414}
1415
1416/*
1417 * Legacy execbuffer just creates an exec2 list from the original exec object
1418 * list array and passes it to the real function.
1419 */
1420int
1421i915_gem_execbuffer(struct drm_device *dev, void *data,
1422		    struct drm_file *file)
1423{
1424	struct drm_i915_gem_execbuffer *args = data;
1425	struct drm_i915_gem_execbuffer2 exec2;
1426	struct drm_i915_gem_exec_object *exec_list = NULL;
1427	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1428	int ret, i;
1429
1430	DRM_DEBUG("buffers_ptr %d buffer_count %d len %08x\n",
1431	    (int) args->buffers_ptr, args->buffer_count, args->batch_len);
1432
1433	if (args->buffer_count < 1) {
1434		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1435		return -EINVAL;
1436	}
1437
1438	/* Copy in the exec list from userland */
1439	/* XXXKIB user-controlled malloc size */
1440	exec_list = malloc(sizeof(*exec_list) * args->buffer_count,
1441	    DRM_I915_GEM, M_WAITOK);
1442	exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count,
1443	    DRM_I915_GEM, M_WAITOK);
1444	ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec_list,
1445	    sizeof(*exec_list) * args->buffer_count);
1446	if (ret != 0) {
1447		DRM_DEBUG("copy %d exec entries failed %d\n",
1448			  args->buffer_count, ret);
1449		free(exec_list, DRM_I915_GEM);
1450		free(exec2_list, DRM_I915_GEM);
1451		return (ret);
1452	}
1453
1454	for (i = 0; i < args->buffer_count; i++) {
1455		exec2_list[i].handle = exec_list[i].handle;
1456		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1457		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1458		exec2_list[i].alignment = exec_list[i].alignment;
1459		exec2_list[i].offset = exec_list[i].offset;
1460		if (INTEL_INFO(dev)->gen < 4)
1461			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1462		else
1463			exec2_list[i].flags = 0;
1464	}
1465
1466	exec2.buffers_ptr = args->buffers_ptr;
1467	exec2.buffer_count = args->buffer_count;
1468	exec2.batch_start_offset = args->batch_start_offset;
1469	exec2.batch_len = args->batch_len;
1470	exec2.DR1 = args->DR1;
1471	exec2.DR4 = args->DR4;
1472	exec2.num_cliprects = args->num_cliprects;
1473	exec2.cliprects_ptr = args->cliprects_ptr;
1474	exec2.flags = I915_EXEC_RENDER;
1475	i915_execbuffer2_set_context_id(exec2, 0);
1476
1477	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1478	if (!ret) {
1479		/* Copy the new buffer offsets back to the user's exec list. */
1480		for (i = 0; i < args->buffer_count; i++)
1481			exec_list[i].offset = exec2_list[i].offset;
1482		/* ... and back out to userspace */
1483		ret = -copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr,
1484		    sizeof(*exec_list) * args->buffer_count);
1485		if (ret != 0) {
1486			DRM_DEBUG("failed to copy %d exec entries "
1487				  "back to user (%d)\n",
1488				  args->buffer_count, ret);
1489		}
1490	}
1491
1492	free(exec_list, DRM_I915_GEM);
1493	free(exec2_list, DRM_I915_GEM);
1494	return ret;
1495}
1496
1497int
1498i915_gem_execbuffer2(struct drm_device *dev, void *data,
1499		     struct drm_file *file)
1500{
1501	struct drm_i915_gem_execbuffer2 *args = data;
1502	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1503	int ret;
1504
1505	DRM_DEBUG("buffers_ptr %jx buffer_count %d len %08x\n",
1506	    (uintmax_t)args->buffers_ptr, args->buffer_count, args->batch_len);
1507
1508	if (args->buffer_count < 1 ||
1509	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1510		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1511		return -EINVAL;
1512	}
1513
1514	/* XXXKIB user-controllable malloc size */
1515	exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count,
1516	    DRM_I915_GEM, M_WAITOK);
1517	ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec2_list,
1518	    sizeof(*exec2_list) * args->buffer_count);
1519	if (ret != 0) {
1520		DRM_DEBUG("copy %d exec entries failed %d\n",
1521			  args->buffer_count, ret);
1522		free(exec2_list, DRM_I915_GEM);
1523		return (ret);
1524	}
1525
1526	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1527	if (!ret) {
1528		/* Copy the new buffer offsets back to the user's exec list. */
1529		ret = -copyout(exec2_list, (void *)(uintptr_t)args->buffers_ptr,
1530		    sizeof(*exec2_list) * args->buffer_count);
1531		if (ret) {
1532			DRM_DEBUG("failed to copy %d exec entries "
1533				  "back to user (%d)\n",
1534				  args->buffer_count, ret);
1535		}
1536	}
1537
1538	free(exec2_list, DRM_I915_GEM);
1539	return ret;
1540}
1541