1/*
2 * Copyright �� 2008,2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *    Chris Wilson <chris@chris-wilson.co.uk>
26 *
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: releng/10.3/sys/dev/drm2/i915/i915_gem_execbuffer.c 290454 2015-11-06 16:48:33Z jhb $");
31
32#include <dev/drm2/drmP.h>
33#include <dev/drm2/drm.h>
34#include <dev/drm2/i915/i915_drm.h>
35#include <dev/drm2/i915/i915_drv.h>
36#include <dev/drm2/i915/intel_drv.h>
37#include <sys/limits.h>
38#include <sys/sf_buf.h>
39
40struct change_domains {
41	uint32_t invalidate_domains;
42	uint32_t flush_domains;
43	uint32_t flush_rings;
44	uint32_t flips;
45};
46
47/*
48 * Set the next domain for the specified object. This
49 * may not actually perform the necessary flushing/invaliding though,
50 * as that may want to be batched with other set_domain operations
51 *
52 * This is (we hope) the only really tricky part of gem. The goal
53 * is fairly simple -- track which caches hold bits of the object
54 * and make sure they remain coherent. A few concrete examples may
55 * help to explain how it works. For shorthand, we use the notation
56 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
57 * a pair of read and write domain masks.
58 *
59 * Case 1: the batch buffer
60 *
61 *	1. Allocated
62 *	2. Written by CPU
63 *	3. Mapped to GTT
64 *	4. Read by GPU
65 *	5. Unmapped from GTT
66 *	6. Freed
67 *
68 *	Let's take these a step at a time
69 *
70 *	1. Allocated
71 *		Pages allocated from the kernel may still have
72 *		cache contents, so we set them to (CPU, CPU) always.
73 *	2. Written by CPU (using pwrite)
74 *		The pwrite function calls set_domain (CPU, CPU) and
75 *		this function does nothing (as nothing changes)
76 *	3. Mapped by GTT
77 *		This function asserts that the object is not
78 *		currently in any GPU-based read or write domains
79 *	4. Read by GPU
80 *		i915_gem_execbuffer calls set_domain (COMMAND, 0).
81 *		As write_domain is zero, this function adds in the
82 *		current read domains (CPU+COMMAND, 0).
83 *		flush_domains is set to CPU.
84 *		invalidate_domains is set to COMMAND
85 *		clflush is run to get data out of the CPU caches
86 *		then i915_dev_set_domain calls i915_gem_flush to
87 *		emit an MI_FLUSH and drm_agp_chipset_flush
88 *	5. Unmapped from GTT
89 *		i915_gem_object_unbind calls set_domain (CPU, CPU)
90 *		flush_domains and invalidate_domains end up both zero
91 *		so no flushing/invalidating happens
92 *	6. Freed
93 *		yay, done
94 *
95 * Case 2: The shared render buffer
96 *
97 *	1. Allocated
98 *	2. Mapped to GTT
99 *	3. Read/written by GPU
100 *	4. set_domain to (CPU,CPU)
101 *	5. Read/written by CPU
102 *	6. Read/written by GPU
103 *
104 *	1. Allocated
105 *		Same as last example, (CPU, CPU)
106 *	2. Mapped to GTT
107 *		Nothing changes (assertions find that it is not in the GPU)
108 *	3. Read/written by GPU
109 *		execbuffer calls set_domain (RENDER, RENDER)
110 *		flush_domains gets CPU
111 *		invalidate_domains gets GPU
112 *		clflush (obj)
113 *		MI_FLUSH and drm_agp_chipset_flush
114 *	4. set_domain (CPU, CPU)
115 *		flush_domains gets GPU
116 *		invalidate_domains gets CPU
117 *		wait_rendering (obj) to make sure all drawing is complete.
118 *		This will include an MI_FLUSH to get the data from GPU
119 *		to memory
120 *		clflush (obj) to invalidate the CPU cache
121 *		Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
122 *	5. Read/written by CPU
123 *		cache lines are loaded and dirtied
124 *	6. Read written by GPU
125 *		Same as last GPU access
126 *
127 * Case 3: The constant buffer
128 *
129 *	1. Allocated
130 *	2. Written by CPU
131 *	3. Read by GPU
132 *	4. Updated (written) by CPU again
133 *	5. Read by GPU
134 *
135 *	1. Allocated
136 *		(CPU, CPU)
137 *	2. Written by CPU
138 *		(CPU, CPU)
139 *	3. Read by GPU
140 *		(CPU+RENDER, 0)
141 *		flush_domains = CPU
142 *		invalidate_domains = RENDER
143 *		clflush (obj)
144 *		MI_FLUSH
145 *		drm_agp_chipset_flush
146 *	4. Updated (written) by CPU again
147 *		(CPU, CPU)
148 *		flush_domains = 0 (no previous write domain)
149 *		invalidate_domains = 0 (no new read domains)
150 *	5. Read by GPU
151 *		(CPU+RENDER, 0)
152 *		flush_domains = CPU
153 *		invalidate_domains = RENDER
154 *		clflush (obj)
155 *		MI_FLUSH
156 *		drm_agp_chipset_flush
157 */
158static void
159i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
160				  struct intel_ring_buffer *ring,
161				  struct change_domains *cd)
162{
163	uint32_t invalidate_domains = 0, flush_domains = 0;
164
165	/*
166	 * If the object isn't moving to a new write domain,
167	 * let the object stay in multiple read domains
168	 */
169	if (obj->base.pending_write_domain == 0)
170		obj->base.pending_read_domains |= obj->base.read_domains;
171
172	/*
173	 * Flush the current write domain if
174	 * the new read domains don't match. Invalidate
175	 * any read domains which differ from the old
176	 * write domain
177	 */
178	if (obj->base.write_domain &&
179	    (((obj->base.write_domain != obj->base.pending_read_domains ||
180	       obj->ring != ring)) ||
181	     (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
182		flush_domains |= obj->base.write_domain;
183		invalidate_domains |=
184			obj->base.pending_read_domains & ~obj->base.write_domain;
185	}
186	/*
187	 * Invalidate any read caches which may have
188	 * stale data. That is, any new read domains.
189	 */
190	invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
191	if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
192		i915_gem_clflush_object(obj);
193
194	if (obj->base.pending_write_domain)
195		cd->flips |= atomic_load_acq_int(&obj->pending_flip);
196
197	/* The actual obj->write_domain will be updated with
198	 * pending_write_domain after we emit the accumulated flush for all
199	 * of our domain changes in execbuffers (which clears objects'
200	 * write_domains).  So if we have a current write domain that we
201	 * aren't changing, set pending_write_domain to that.
202	 */
203	if (flush_domains == 0 && obj->base.pending_write_domain == 0)
204		obj->base.pending_write_domain = obj->base.write_domain;
205
206	cd->invalidate_domains |= invalidate_domains;
207	cd->flush_domains |= flush_domains;
208	if (flush_domains & I915_GEM_GPU_DOMAINS)
209		cd->flush_rings |= intel_ring_flag(obj->ring);
210	if (invalidate_domains & I915_GEM_GPU_DOMAINS)
211		cd->flush_rings |= intel_ring_flag(ring);
212}
213
214struct eb_objects {
215	u_long hashmask;
216	LIST_HEAD(, drm_i915_gem_object) *buckets;
217};
218
219static struct eb_objects *
220eb_create(int size)
221{
222	struct eb_objects *eb;
223
224	eb = malloc(sizeof(*eb), DRM_I915_GEM, M_WAITOK | M_ZERO);
225	eb->buckets = hashinit(size, DRM_I915_GEM, &eb->hashmask);
226	return (eb);
227}
228
229static void
230eb_reset(struct eb_objects *eb)
231{
232	int i;
233
234	for (i = 0; i <= eb->hashmask; i++)
235		LIST_INIT(&eb->buckets[i]);
236}
237
238static void
239eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
240{
241
242	LIST_INSERT_HEAD(&eb->buckets[obj->exec_handle & eb->hashmask],
243	    obj, exec_node);
244}
245
246static struct drm_i915_gem_object *
247eb_get_object(struct eb_objects *eb, unsigned long handle)
248{
249	struct drm_i915_gem_object *obj;
250
251	LIST_FOREACH(obj, &eb->buckets[handle & eb->hashmask], exec_node) {
252		if (obj->exec_handle == handle)
253			return (obj);
254	}
255	return (NULL);
256}
257
258static void
259eb_destroy(struct eb_objects *eb)
260{
261
262	free(eb->buckets, DRM_I915_GEM);
263	free(eb, DRM_I915_GEM);
264}
265
266static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
267{
268	return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
269		obj->cache_level != I915_CACHE_NONE);
270}
271
272static int
273i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
274				   struct eb_objects *eb,
275				   struct drm_i915_gem_relocation_entry *reloc)
276{
277	struct drm_device *dev = obj->base.dev;
278	struct drm_gem_object *target_obj;
279	struct drm_i915_gem_object *target_i915_obj;
280	uint32_t target_offset;
281	int ret = -EINVAL;
282
283	/* we've already hold a reference to all valid objects */
284	target_obj = &eb_get_object(eb, reloc->target_handle)->base;
285	if (unlikely(target_obj == NULL))
286		return -ENOENT;
287
288	target_i915_obj = to_intel_bo(target_obj);
289	target_offset = target_i915_obj->gtt_offset;
290
291#if WATCH_RELOC
292	DRM_INFO("%s: obj %p offset %08x target %d "
293		 "read %08x write %08x gtt %08x "
294		 "presumed %08x delta %08x\n",
295		 __func__,
296		 obj,
297		 (int) reloc->offset,
298		 (int) reloc->target_handle,
299		 (int) reloc->read_domains,
300		 (int) reloc->write_domain,
301		 (int) target_offset,
302		 (int) reloc->presumed_offset,
303		 reloc->delta);
304#endif
305
306	/* The target buffer should have appeared before us in the
307	 * exec_object list, so it should have a GTT space bound by now.
308	 */
309	if (unlikely(target_offset == 0)) {
310		DRM_DEBUG("No GTT space found for object %d\n",
311			  reloc->target_handle);
312		return ret;
313	}
314
315	/* Validate that the target is in a valid r/w GPU domain */
316	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
317		DRM_DEBUG("reloc with multiple write domains: "
318			  "obj %p target %d offset %d "
319			  "read %08x write %08x",
320			  obj, reloc->target_handle,
321			  (int) reloc->offset,
322			  reloc->read_domains,
323			  reloc->write_domain);
324		return ret;
325	}
326	if (unlikely((reloc->write_domain | reloc->read_domains)
327		     & ~I915_GEM_GPU_DOMAINS)) {
328		DRM_DEBUG("reloc with read/write non-GPU domains: "
329			  "obj %p target %d offset %d "
330			  "read %08x write %08x",
331			  obj, reloc->target_handle,
332			  (int) reloc->offset,
333			  reloc->read_domains,
334			  reloc->write_domain);
335		return ret;
336	}
337	if (unlikely(reloc->write_domain && target_obj->pending_write_domain &&
338		     reloc->write_domain != target_obj->pending_write_domain)) {
339		DRM_DEBUG("Write domain conflict: "
340			  "obj %p target %d offset %d "
341			  "new %08x old %08x\n",
342			  obj, reloc->target_handle,
343			  (int) reloc->offset,
344			  reloc->write_domain,
345			  target_obj->pending_write_domain);
346		return ret;
347	}
348
349	target_obj->pending_read_domains |= reloc->read_domains;
350	target_obj->pending_write_domain |= reloc->write_domain;
351
352	/* If the relocation already has the right value in it, no
353	 * more work needs to be done.
354	 */
355	if (target_offset == reloc->presumed_offset)
356		return 0;
357
358	/* Check that the relocation address is valid... */
359	if (unlikely(reloc->offset > obj->base.size - 4)) {
360		DRM_DEBUG("Relocation beyond object bounds: "
361			  "obj %p target %d offset %d size %d.\n",
362			  obj, reloc->target_handle,
363			  (int) reloc->offset,
364			  (int) obj->base.size);
365		return ret;
366	}
367	if (unlikely(reloc->offset & 3)) {
368		DRM_DEBUG("Relocation not 4-byte aligned: "
369			  "obj %p target %d offset %d.\n",
370			  obj, reloc->target_handle,
371			  (int) reloc->offset);
372		return ret;
373	}
374
375	/* We can't wait for rendering with pagefaults disabled */
376	if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0)
377		return (-EFAULT);
378
379	reloc->delta += target_offset;
380	if (use_cpu_reloc(obj)) {
381		uint32_t page_offset = reloc->offset & PAGE_MASK;
382		char *vaddr;
383		struct sf_buf *sf;
384
385		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
386		if (ret)
387			return ret;
388
389		sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)],
390		    SFB_NOWAIT);
391		if (sf == NULL)
392			return (-ENOMEM);
393		vaddr = (void *)sf_buf_kva(sf);
394		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
395		sf_buf_free(sf);
396	} else {
397		uint32_t *reloc_entry;
398		char *reloc_page;
399
400		ret = i915_gem_object_set_to_gtt_domain(obj, true);
401		if (ret)
402			return ret;
403
404		ret = i915_gem_object_put_fence(obj);
405		if (ret)
406			return ret;
407
408		/*
409		 * Map the page containing the relocation we're going
410		 * to perform.
411		 */
412		reloc->offset += obj->gtt_offset;
413		reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset &
414		    ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING);
415		reloc_entry = (uint32_t *)(reloc_page + (reloc->offset &
416		    PAGE_MASK));
417		*(volatile uint32_t *)reloc_entry = reloc->delta;
418		pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE);
419	}
420
421	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
422	 * pipe_control writes because the gpu doesn't properly redirect them
423	 * through the ppgtt for non_secure batchbuffers. */
424	if (unlikely(IS_GEN6(dev) &&
425	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
426	    !target_i915_obj->has_global_gtt_mapping)) {
427		i915_gem_gtt_bind_object(target_i915_obj,
428					 target_i915_obj->cache_level);
429	}
430
431	/* and update the user's relocation entry */
432	reloc->presumed_offset = target_offset;
433
434	return 0;
435}
436
437static int
438i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
439    struct eb_objects *eb)
440{
441#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
442	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
443	struct drm_i915_gem_relocation_entry *user_relocs;
444	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
445	int remain, ret;
446
447	user_relocs = (void *)(uintptr_t)entry->relocs_ptr;
448	remain = entry->relocation_count;
449	while (remain) {
450		struct drm_i915_gem_relocation_entry *r = stack_reloc;
451		int count = remain;
452		if (count > DRM_ARRAY_SIZE(stack_reloc))
453			count = DRM_ARRAY_SIZE(stack_reloc);
454		remain -= count;
455
456		ret = -copyin_nofault(user_relocs, r, count*sizeof(r[0]));
457		if (ret != 0)
458			return (ret);
459
460		do {
461			u64 offset = r->presumed_offset;
462
463			ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
464			if (ret)
465				return ret;
466
467			if (r->presumed_offset != offset &&
468			    copyout_nofault(&r->presumed_offset,
469					    &user_relocs->presumed_offset,
470					    sizeof(r->presumed_offset))) {
471				return -EFAULT;
472			}
473
474			user_relocs++;
475			r++;
476		} while (--count);
477	}
478#undef N_RELOC
479	return (0);
480}
481
482static int
483i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
484    struct eb_objects *eb, struct drm_i915_gem_relocation_entry *relocs)
485{
486	const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
487	int i, ret;
488
489	for (i = 0; i < entry->relocation_count; i++) {
490		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
491		if (ret)
492			return ret;
493	}
494
495	return 0;
496}
497
498static int
499i915_gem_execbuffer_relocate(struct drm_device *dev,
500			     struct eb_objects *eb,
501			     struct list_head *objects)
502{
503	struct drm_i915_gem_object *obj;
504	int ret, pflags;
505
506	/* Try to move as many of the relocation targets off the active list
507	 * to avoid unnecessary fallbacks to the slow path, as we cannot wait
508	 * for the retirement with pagefaults disabled.
509	 */
510	i915_gem_retire_requests(dev);
511
512	ret = 0;
513	pflags = vm_fault_disable_pagefaults();
514	/* This is the fast path and we cannot handle a pagefault whilst
515	 * holding the device lock lest the user pass in the relocations
516	 * contained within a mmaped bo. For in such a case we, the page
517	 * fault handler would call i915_gem_fault() and we would try to
518	 * acquire the device lock again. Obviously this is bad.
519	 */
520
521	list_for_each_entry(obj, objects, exec_list) {
522		ret = i915_gem_execbuffer_relocate_object(obj, eb);
523		if (ret != 0)
524			break;
525	}
526	vm_fault_enable_pagefaults(pflags);
527	return (ret);
528}
529
530#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
531
532static int
533need_reloc_mappable(struct drm_i915_gem_object *obj)
534{
535	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
536	return entry->relocation_count && !use_cpu_reloc(obj);
537}
538
539static int
540pin_and_fence_object(struct drm_i915_gem_object *obj,
541		     struct intel_ring_buffer *ring)
542{
543	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
544	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
545	bool need_fence, need_mappable;
546	int ret;
547
548	need_fence =
549		has_fenced_gpu_access &&
550		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
551		obj->tiling_mode != I915_TILING_NONE;
552	need_mappable = need_fence || need_reloc_mappable(obj);
553
554	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
555	if (ret)
556		return ret;
557
558	if (has_fenced_gpu_access) {
559		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
560			ret = i915_gem_object_get_fence(obj);
561			if (ret)
562				goto err_unpin;
563
564			if (i915_gem_object_pin_fence(obj))
565				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
566
567			obj->pending_fenced_gpu_access = true;
568		}
569	}
570
571	entry->offset = obj->gtt_offset;
572	return 0;
573
574err_unpin:
575	i915_gem_object_unpin(obj);
576	return ret;
577}
578
579static int
580i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
581			    struct drm_file *file,
582			    struct list_head *objects)
583{
584	drm_i915_private_t *dev_priv;
585	struct drm_i915_gem_object *obj;
586	int ret, retry;
587	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
588	struct list_head ordered_objects;
589
590	dev_priv = ring->dev->dev_private;
591	INIT_LIST_HEAD(&ordered_objects);
592	while (!list_empty(objects)) {
593		struct drm_i915_gem_exec_object2 *entry;
594		bool need_fence, need_mappable;
595
596		obj = list_first_entry(objects,
597				       struct drm_i915_gem_object,
598				       exec_list);
599		entry = obj->exec_entry;
600
601		need_fence =
602			has_fenced_gpu_access &&
603			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
604			obj->tiling_mode != I915_TILING_NONE;
605		need_mappable = need_fence || need_reloc_mappable(obj);
606
607		if (need_mappable)
608			list_move(&obj->exec_list, &ordered_objects);
609		else
610			list_move_tail(&obj->exec_list, &ordered_objects);
611
612		obj->base.pending_read_domains = 0;
613		obj->base.pending_write_domain = 0;
614	}
615	list_splice(&ordered_objects, objects);
616
617	/* Attempt to pin all of the buffers into the GTT.
618	 * This is done in 3 phases:
619	 *
620	 * 1a. Unbind all objects that do not match the GTT constraints for
621	 *     the execbuffer (fenceable, mappable, alignment etc).
622	 * 1b. Increment pin count for already bound objects and obtain
623	 *     a fence register if required.
624	 * 2.  Bind new objects.
625	 * 3.  Decrement pin count.
626	 *
627	 * This avoid unnecessary unbinding of later objects in order to makr
628	 * room for the earlier objects *unless* we need to defragment.
629	 */
630	retry = 0;
631	do {
632		ret = 0;
633
634		/* Unbind any ill-fitting objects or pin. */
635		list_for_each_entry(obj, objects, exec_list) {
636			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
637			bool need_fence, need_mappable;
638
639			if (!obj->gtt_space)
640				continue;
641
642			need_fence =
643				has_fenced_gpu_access &&
644				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
645				obj->tiling_mode != I915_TILING_NONE;
646			need_mappable = need_fence || need_reloc_mappable(obj);
647
648			if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
649			    (need_mappable && !obj->map_and_fenceable))
650				ret = i915_gem_object_unbind(obj);
651			else
652				ret = pin_and_fence_object(obj, ring);
653			if (ret)
654				goto err;
655		}
656
657		/* Bind fresh objects */
658		list_for_each_entry(obj, objects, exec_list) {
659			if (obj->gtt_space)
660				continue;
661
662			ret = pin_and_fence_object(obj, ring);
663			if (ret) {
664				int ret_ignore;
665
666				/* This can potentially raise a harmless
667				 * -EINVAL if we failed to bind in the above
668				 * call. It cannot raise -EINTR since we know
669				 * that the bo is freshly bound and so will
670				 * not need to be flushed or waited upon.
671				 */
672				ret_ignore = i915_gem_object_unbind(obj);
673				(void)ret_ignore;
674				if (obj->gtt_space != NULL)
675					printf("%s: gtt_space\n", __func__);
676				break;
677			}
678		}
679
680		/* Decrement pin count for bound objects */
681		list_for_each_entry(obj, objects, exec_list) {
682			struct drm_i915_gem_exec_object2 *entry;
683
684			if (!obj->gtt_space)
685				continue;
686
687			entry = obj->exec_entry;
688			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
689				i915_gem_object_unpin_fence(obj);
690				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
691			}
692
693			i915_gem_object_unpin(obj);
694
695			/* ... and ensure ppgtt mapping exist if needed. */
696			if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
697				i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
698						       obj, obj->cache_level);
699
700				obj->has_aliasing_ppgtt_mapping = 1;
701			}
702		}
703
704		if (ret != -ENOSPC || retry > 1)
705			return ret;
706
707		/* First attempt, just clear anything that is purgeable.
708		 * Second attempt, clear the entire GTT.
709		 */
710		ret = i915_gem_evict_everything(ring->dev, retry == 0);
711		if (ret)
712			return ret;
713
714		retry++;
715	} while (1);
716
717err:
718	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
719		struct drm_i915_gem_exec_object2 *entry;
720
721		if (!obj->gtt_space)
722			continue;
723
724		entry = obj->exec_entry;
725		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
726			i915_gem_object_unpin_fence(obj);
727			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
728		}
729
730		i915_gem_object_unpin(obj);
731	}
732
733	return ret;
734}
735
736static int
737i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
738    struct drm_file *file, struct intel_ring_buffer *ring,
739    struct list_head *objects, struct eb_objects *eb,
740    struct drm_i915_gem_exec_object2 *exec, int count)
741{
742	struct drm_i915_gem_relocation_entry *reloc;
743	struct drm_i915_gem_object *obj;
744	int *reloc_offset;
745	int i, total, ret;
746
747	/* We may process another execbuffer during the unlock... */
748	while (!list_empty(objects)) {
749		obj = list_first_entry(objects,
750				       struct drm_i915_gem_object,
751				       exec_list);
752		list_del_init(&obj->exec_list);
753		drm_gem_object_unreference(&obj->base);
754	}
755
756	DRM_UNLOCK(dev);
757
758	total = 0;
759	for (i = 0; i < count; i++)
760		total += exec[i].relocation_count;
761
762	reloc_offset = malloc(count * sizeof(*reloc_offset), DRM_I915_GEM,
763	    M_WAITOK | M_ZERO);
764	reloc = malloc(total * sizeof(*reloc), DRM_I915_GEM, M_WAITOK | M_ZERO);
765
766	total = 0;
767	for (i = 0; i < count; i++) {
768		struct drm_i915_gem_relocation_entry *user_relocs;
769
770		user_relocs = (void *)(uintptr_t)exec[i].relocs_ptr;
771		ret = -copyin(user_relocs, reloc + total,
772		    exec[i].relocation_count * sizeof(*reloc));
773		if (ret != 0) {
774			DRM_LOCK(dev);
775			goto err;
776		}
777
778		reloc_offset[i] = total;
779		total += exec[i].relocation_count;
780	}
781
782	ret = i915_mutex_lock_interruptible(dev);
783	if (ret) {
784		DRM_LOCK(dev);
785		goto err;
786	}
787
788	/* reacquire the objects */
789	eb_reset(eb);
790	for (i = 0; i < count; i++) {
791		struct drm_i915_gem_object *obj;
792
793		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
794							exec[i].handle));
795		if (&obj->base == NULL) {
796			DRM_DEBUG("Invalid object handle %d at index %d\n",
797				   exec[i].handle, i);
798			ret = -ENOENT;
799			goto err;
800		}
801
802		list_add_tail(&obj->exec_list, objects);
803		obj->exec_handle = exec[i].handle;
804		obj->exec_entry = &exec[i];
805		eb_add_object(eb, obj);
806	}
807
808	ret = i915_gem_execbuffer_reserve(ring, file, objects);
809	if (ret)
810		goto err;
811
812	list_for_each_entry(obj, objects, exec_list) {
813		int offset = obj->exec_entry - exec;
814		ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
815		    reloc + reloc_offset[offset]);
816		if (ret)
817			goto err;
818	}
819
820	/* Leave the user relocations as are, this is the painfully slow path,
821	 * and we want to avoid the complication of dropping the lock whilst
822	 * having buffers reserved in the aperture and so causing spurious
823	 * ENOSPC for random operations.
824	 */
825
826err:
827	free(reloc, DRM_I915_GEM);
828	free(reloc_offset, DRM_I915_GEM);
829	return ret;
830}
831
832static int
833i915_gem_execbuffer_flush(struct drm_device *dev,
834			  uint32_t invalidate_domains,
835			  uint32_t flush_domains,
836			  uint32_t flush_rings)
837{
838	drm_i915_private_t *dev_priv = dev->dev_private;
839	int i, ret;
840
841	if (flush_domains & I915_GEM_DOMAIN_CPU)
842		intel_gtt_chipset_flush();
843
844	if (flush_domains & I915_GEM_DOMAIN_GTT)
845		wmb();
846
847	if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
848		for (i = 0; i < I915_NUM_RINGS; i++)
849			if (flush_rings & (1 << i)) {
850				ret = i915_gem_flush_ring(&dev_priv->rings[i],
851				    invalidate_domains, flush_domains);
852				if (ret)
853					return ret;
854			}
855	}
856
857	return 0;
858}
859
860static int
861i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
862{
863	u32 plane, flip_mask;
864	int ret;
865
866	/* Check for any pending flips. As we only maintain a flip queue depth
867	 * of 1, we can simply insert a WAIT for the next display flip prior
868	 * to executing the batch and avoid stalling the CPU.
869	 */
870
871	for (plane = 0; flips >> plane; plane++) {
872		if (((flips >> plane) & 1) == 0)
873			continue;
874
875		if (plane)
876			flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
877		else
878			flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
879
880		ret = intel_ring_begin(ring, 2);
881		if (ret)
882			return ret;
883
884		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
885		intel_ring_emit(ring, MI_NOOP);
886		intel_ring_advance(ring);
887	}
888
889	return 0;
890}
891
892static int
893i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
894				struct list_head *objects)
895{
896	struct drm_i915_gem_object *obj;
897	struct change_domains cd;
898	int ret;
899
900	memset(&cd, 0, sizeof(cd));
901	list_for_each_entry(obj, objects, exec_list)
902		i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
903
904	if (cd.invalidate_domains | cd.flush_domains) {
905#if WATCH_EXEC
906		DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
907			  __func__,
908			 cd.invalidate_domains,
909			 cd.flush_domains);
910#endif
911		ret = i915_gem_execbuffer_flush(ring->dev,
912						cd.invalidate_domains,
913						cd.flush_domains,
914						cd.flush_rings);
915		if (ret)
916			return ret;
917	}
918
919	if (cd.flips) {
920		ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
921		if (ret)
922			return ret;
923	}
924
925	list_for_each_entry(obj, objects, exec_list) {
926		ret = i915_gem_object_sync(obj, ring);
927		if (ret)
928			return ret;
929	}
930
931	return 0;
932}
933
934static bool
935i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
936{
937	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
938}
939
940static int
941validate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count,
942    vm_page_t ***map, int **maplen)
943{
944	vm_page_t *ma;
945	int i, length, page_count;
946
947	/* XXXKIB various limits checking is missing there */
948	*map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO);
949	*maplen = malloc(count * sizeof(*maplen), DRM_I915_GEM, M_WAITOK |
950	    M_ZERO);
951	for (i = 0; i < count; i++) {
952		/* First check for malicious input causing overflow */
953		if (exec[i].relocation_count >
954		    INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
955			return -EINVAL;
956
957		length = exec[i].relocation_count *
958		    sizeof(struct drm_i915_gem_relocation_entry);
959		if (length == 0) {
960			(*map)[i] = NULL;
961			continue;
962		}
963		/*
964		 * Since both start and end of the relocation region
965		 * may be not aligned on the page boundary, be
966		 * conservative and request a page slot for each
967		 * partial page.  Thus +2.
968		 */
969		page_count = howmany(length, PAGE_SIZE) + 2;
970		ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t),
971		    DRM_I915_GEM, M_WAITOK | M_ZERO);
972		(*maplen)[i] = vm_fault_quick_hold_pages(
973		    &curproc->p_vmspace->vm_map, exec[i].relocs_ptr, length,
974		    VM_PROT_READ | VM_PROT_WRITE, ma, page_count);
975		if ((*maplen)[i] == -1) {
976			free(ma, DRM_I915_GEM);
977			(*map)[i] = NULL;
978			return (-EFAULT);
979		}
980	}
981
982	return 0;
983}
984
985static void
986i915_gem_execbuffer_move_to_active(struct list_head *objects,
987				   struct intel_ring_buffer *ring,
988				   u32 seqno)
989{
990	struct drm_i915_gem_object *obj;
991	uint32_t old_read, old_write;
992
993	list_for_each_entry(obj, objects, exec_list) {
994		old_read = obj->base.read_domains;
995		old_write = obj->base.write_domain;
996
997		obj->base.read_domains = obj->base.pending_read_domains;
998		obj->base.write_domain = obj->base.pending_write_domain;
999		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
1000
1001		i915_gem_object_move_to_active(obj, ring, seqno);
1002		if (obj->base.write_domain) {
1003			obj->dirty = 1;
1004			obj->pending_gpu_write = true;
1005			list_move_tail(&obj->gpu_write_list,
1006				       &ring->gpu_write_list);
1007			if (obj->pin_count) /* check for potential scanout */
1008				intel_mark_busy(ring->dev, obj);
1009		}
1010		CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x",
1011		    obj, old_read, old_write);
1012	}
1013
1014	intel_mark_busy(ring->dev, NULL);
1015}
1016
1017int i915_gem_sync_exec_requests;
1018
1019static void
1020i915_gem_execbuffer_retire_commands(struct drm_device *dev,
1021				    struct drm_file *file,
1022				    struct intel_ring_buffer *ring)
1023{
1024	struct drm_i915_gem_request *request;
1025	u32 invalidate;
1026
1027	/*
1028	 * Ensure that the commands in the batch buffer are
1029	 * finished before the interrupt fires.
1030	 *
1031	 * The sampler always gets flushed on i965 (sigh).
1032	 */
1033	invalidate = I915_GEM_DOMAIN_COMMAND;
1034	if (INTEL_INFO(dev)->gen >= 4)
1035		invalidate |= I915_GEM_DOMAIN_SAMPLER;
1036	if (ring->flush(ring, invalidate, 0)) {
1037		i915_gem_next_request_seqno(ring);
1038		return;
1039	}
1040
1041	/* Add a breadcrumb for the completion of the batch buffer */
1042	request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO);
1043	if (request == NULL || i915_add_request(ring, file, request)) {
1044		i915_gem_next_request_seqno(ring);
1045		free(request, DRM_I915_GEM);
1046	} else if (i915_gem_sync_exec_requests) {
1047		i915_wait_request(ring, request->seqno);
1048		i915_gem_retire_requests(dev);
1049	}
1050}
1051
1052static void
1053i915_gem_fix_mi_batchbuffer_end(struct drm_i915_gem_object *batch_obj,
1054    uint32_t batch_start_offset, uint32_t batch_len)
1055{
1056	char *mkva;
1057	uint64_t po_r, po_w;
1058	uint32_t cmd;
1059
1060	po_r = batch_obj->base.dev->agp->base + batch_obj->gtt_offset +
1061	    batch_start_offset + batch_len;
1062	if (batch_len > 0)
1063		po_r -= 4;
1064	mkva = pmap_mapdev_attr(trunc_page(po_r), 2 * PAGE_SIZE,
1065	    PAT_WRITE_COMBINING);
1066	po_r &= PAGE_MASK;
1067	cmd = *(uint32_t *)(mkva + po_r);
1068
1069	if (cmd != MI_BATCH_BUFFER_END) {
1070		/*
1071		 * batch_len != 0 due to the check at the start of
1072		 * i915_gem_do_execbuffer
1073		 */
1074		if (batch_obj->base.size > batch_start_offset + batch_len) {
1075			po_w = po_r + 4;
1076/* DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END !\n"); */
1077		} else {
1078			po_w = po_r;
1079DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END, overwriting last bo cmd !\n");
1080		}
1081		*(uint32_t *)(mkva + po_w) = MI_BATCH_BUFFER_END;
1082	}
1083
1084	pmap_unmapdev((vm_offset_t)mkva, 2 * PAGE_SIZE);
1085}
1086
1087int i915_fix_mi_batchbuffer_end = 0;
1088
1089 static int
1090i915_reset_gen7_sol_offsets(struct drm_device *dev,
1091			    struct intel_ring_buffer *ring)
1092{
1093	drm_i915_private_t *dev_priv = dev->dev_private;
1094	int ret, i;
1095
1096	if (!IS_GEN7(dev) || ring != &dev_priv->rings[RCS])
1097		return 0;
1098
1099	ret = intel_ring_begin(ring, 4 * 3);
1100	if (ret)
1101		return ret;
1102
1103	for (i = 0; i < 4; i++) {
1104		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1105		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1106		intel_ring_emit(ring, 0);
1107	}
1108
1109	intel_ring_advance(ring);
1110
1111	return 0;
1112}
1113
1114static int
1115i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1116		       struct drm_file *file,
1117		       struct drm_i915_gem_execbuffer2 *args,
1118		       struct drm_i915_gem_exec_object2 *exec)
1119{
1120	drm_i915_private_t *dev_priv = dev->dev_private;
1121	struct list_head objects;
1122	struct eb_objects *eb;
1123	struct drm_i915_gem_object *batch_obj;
1124	struct drm_clip_rect *cliprects = NULL;
1125	struct intel_ring_buffer *ring;
1126	vm_page_t **relocs_ma;
1127	int *relocs_len;
1128	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1129	u32 exec_start, exec_len;
1130	u32 seqno;
1131	u32 mask;
1132	int ret, mode, i;
1133
1134	if (!i915_gem_check_execbuffer(args)) {
1135		DRM_DEBUG("execbuf with invalid offset/length\n");
1136		return -EINVAL;
1137	}
1138
1139	if (args->batch_len == 0)
1140		return (0);
1141
1142	ret = validate_exec_list(exec, args->buffer_count, &relocs_ma,
1143	    &relocs_len);
1144	if (ret != 0)
1145		goto pre_struct_lock_err;
1146
1147	switch (args->flags & I915_EXEC_RING_MASK) {
1148	case I915_EXEC_DEFAULT:
1149	case I915_EXEC_RENDER:
1150		ring = &dev_priv->rings[RCS];
1151		break;
1152	case I915_EXEC_BSD:
1153		ring = &dev_priv->rings[VCS];
1154		if (ctx_id != 0) {
1155			DRM_DEBUG("Ring %s doesn't support contexts\n",
1156				  ring->name);
1157			ret = -EPERM;
1158			goto pre_struct_lock_err;
1159		}
1160		break;
1161	case I915_EXEC_BLT:
1162		ring = &dev_priv->rings[BCS];
1163		if (ctx_id != 0) {
1164			DRM_DEBUG("Ring %s doesn't support contexts\n",
1165				  ring->name);
1166			ret = -EPERM;
1167			goto pre_struct_lock_err;
1168		}
1169		break;
1170	default:
1171		DRM_DEBUG("execbuf with unknown ring: %d\n",
1172			  (int)(args->flags & I915_EXEC_RING_MASK));
1173		ret = -EINVAL;
1174		goto pre_struct_lock_err;
1175	}
1176	if (!intel_ring_initialized(ring)) {
1177		DRM_DEBUG("execbuf with invalid ring: %d\n",
1178			  (int)(args->flags & I915_EXEC_RING_MASK));
1179		ret = -EINVAL;
1180		goto pre_struct_lock_err;
1181	}
1182
1183	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1184	mask = I915_EXEC_CONSTANTS_MASK;
1185	switch (mode) {
1186	case I915_EXEC_CONSTANTS_REL_GENERAL:
1187	case I915_EXEC_CONSTANTS_ABSOLUTE:
1188	case I915_EXEC_CONSTANTS_REL_SURFACE:
1189		if (ring == &dev_priv->rings[RCS] &&
1190		    mode != dev_priv->relative_constants_mode) {
1191			if (INTEL_INFO(dev)->gen < 4) {
1192				ret = -EINVAL;
1193				goto pre_struct_lock_err;
1194			}
1195
1196			if (INTEL_INFO(dev)->gen > 5 &&
1197			    mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1198				ret = -EINVAL;
1199				goto pre_struct_lock_err;
1200			}
1201
1202			/* The HW changed the meaning on this bit on gen6 */
1203			if (INTEL_INFO(dev)->gen >= 6)
1204				mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1205		}
1206		break;
1207	default:
1208		DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
1209		ret = -EINVAL;
1210		goto pre_struct_lock_err;
1211	}
1212
1213	if (args->buffer_count < 1) {
1214		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1215		ret = -EINVAL;
1216		goto pre_struct_lock_err;
1217	}
1218
1219	if (args->num_cliprects != 0) {
1220		if (ring != &dev_priv->rings[RCS]) {
1221	DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1222			ret = -EINVAL;
1223			goto pre_struct_lock_err;
1224		}
1225
1226		if (INTEL_INFO(dev)->gen >= 5) {
1227			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
1228			ret = -EINVAL;
1229			goto pre_struct_lock_err;
1230		}
1231
1232		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1233			DRM_DEBUG("execbuf with %u cliprects\n",
1234				  args->num_cliprects);
1235			ret = -EINVAL;
1236			goto pre_struct_lock_err;
1237		}
1238		cliprects = malloc( sizeof(*cliprects) * args->num_cliprects,
1239		    DRM_I915_GEM, M_WAITOK | M_ZERO);
1240		ret = -copyin((void *)(uintptr_t)args->cliprects_ptr, cliprects,
1241		    sizeof(*cliprects) * args->num_cliprects);
1242		if (ret != 0)
1243			goto pre_struct_lock_err;
1244	}
1245
1246	ret = i915_mutex_lock_interruptible(dev);
1247	if (ret)
1248		goto pre_struct_lock_err;
1249
1250	if (dev_priv->mm.suspended) {
1251		DRM_UNLOCK(dev);
1252		ret = -EBUSY;
1253		goto pre_struct_lock_err;
1254	}
1255
1256	eb = eb_create(args->buffer_count);
1257	if (eb == NULL) {
1258		DRM_UNLOCK(dev);
1259		ret = -ENOMEM;
1260		goto pre_struct_lock_err;
1261	}
1262
1263	/* Look up object handles */
1264	INIT_LIST_HEAD(&objects);
1265	for (i = 0; i < args->buffer_count; i++) {
1266		struct drm_i915_gem_object *obj;
1267		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
1268							exec[i].handle));
1269		if (&obj->base == NULL) {
1270			DRM_DEBUG("Invalid object handle %d at index %d\n",
1271				   exec[i].handle, i);
1272			/* prevent error path from reading uninitialized data */
1273			ret = -ENOENT;
1274			goto err;
1275		}
1276
1277		if (!list_empty(&obj->exec_list)) {
1278			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
1279				   obj, exec[i].handle, i);
1280			ret = -EINVAL;
1281			goto err;
1282		}
1283
1284		list_add_tail(&obj->exec_list, &objects);
1285		obj->exec_handle = exec[i].handle;
1286		obj->exec_entry = &exec[i];
1287		eb_add_object(eb, obj);
1288	}
1289
1290	/* take note of the batch buffer before we might reorder the lists */
1291	batch_obj = list_entry(objects.prev,
1292			       struct drm_i915_gem_object,
1293			       exec_list);
1294
1295	/* Move the objects en-masse into the GTT, evicting if necessary. */
1296	ret = i915_gem_execbuffer_reserve(ring, file, &objects);
1297	if (ret)
1298		goto err;
1299
1300	/* The objects are in their final locations, apply the relocations. */
1301	ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
1302	if (ret) {
1303		if (ret == -EFAULT) {
1304			ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
1305			    &objects, eb, exec,	args->buffer_count);
1306			DRM_LOCK_ASSERT(dev);
1307		}
1308		if (ret)
1309			goto err;
1310	}
1311
1312	/* Set the pending read domains for the batch buffer to COMMAND */
1313	if (batch_obj->base.pending_write_domain) {
1314		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1315		ret = -EINVAL;
1316		goto err;
1317	}
1318	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1319
1320	ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
1321	if (ret)
1322		goto err;
1323
1324	ret = i915_switch_context(ring, file, ctx_id);
1325	if (ret)
1326		goto err;
1327
1328	seqno = i915_gem_next_request_seqno(ring);
1329	for (i = 0; i < I915_NUM_RINGS - 1; i++) {
1330		if (seqno < ring->sync_seqno[i]) {
1331			/* The GPU can not handle its semaphore value wrapping,
1332			 * so every billion or so execbuffers, we need to stall
1333			 * the GPU in order to reset the counters.
1334			 */
1335			ret = i915_gpu_idle(dev);
1336			if (ret)
1337				goto err;
1338			i915_gem_retire_requests(dev);
1339
1340			KASSERT(ring->sync_seqno[i] == 0, ("Non-zero sync_seqno"));
1341		}
1342	}
1343
1344	if (ring == &dev_priv->rings[RCS] &&
1345	    mode != dev_priv->relative_constants_mode) {
1346		ret = intel_ring_begin(ring, 4);
1347		if (ret)
1348			goto err;
1349
1350		intel_ring_emit(ring, MI_NOOP);
1351		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1352		intel_ring_emit(ring, INSTPM);
1353		intel_ring_emit(ring, mask << 16 | mode);
1354		intel_ring_advance(ring);
1355
1356		dev_priv->relative_constants_mode = mode;
1357	}
1358
1359	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1360		ret = i915_reset_gen7_sol_offsets(dev, ring);
1361		if (ret)
1362			goto err;
1363	}
1364
1365	exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1366	exec_len = args->batch_len;
1367
1368	if (i915_fix_mi_batchbuffer_end) {
1369		i915_gem_fix_mi_batchbuffer_end(batch_obj,
1370		    args->batch_start_offset, args->batch_len);
1371	}
1372
1373	CTR4(KTR_DRM, "ring_dispatch %s %d exec %x %x", ring->name, seqno,
1374	    exec_start, exec_len);
1375
1376	if (cliprects) {
1377		for (i = 0; i < args->num_cliprects; i++) {
1378			ret = i915_emit_box_p(dev, &cliprects[i],
1379			    args->DR1, args->DR4);
1380			if (ret)
1381				goto err;
1382
1383			ret = ring->dispatch_execbuffer(ring, exec_start,
1384			    exec_len);
1385			if (ret)
1386				goto err;
1387		}
1388	} else {
1389		ret = ring->dispatch_execbuffer(ring, exec_start, exec_len);
1390		if (ret)
1391			goto err;
1392	}
1393
1394	i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
1395	i915_gem_execbuffer_retire_commands(dev, file, ring);
1396
1397err:
1398	eb_destroy(eb);
1399	while (!list_empty(&objects)) {
1400		struct drm_i915_gem_object *obj;
1401
1402		obj = list_first_entry(&objects, struct drm_i915_gem_object,
1403		    exec_list);
1404		list_del_init(&obj->exec_list);
1405		drm_gem_object_unreference(&obj->base);
1406	}
1407	DRM_UNLOCK(dev);
1408
1409pre_struct_lock_err:
1410	for (i = 0; i < args->buffer_count; i++) {
1411		if (relocs_ma[i] != NULL) {
1412			vm_page_unhold_pages(relocs_ma[i], relocs_len[i]);
1413			free(relocs_ma[i], DRM_I915_GEM);
1414		}
1415	}
1416	free(relocs_len, DRM_I915_GEM);
1417	free(relocs_ma, DRM_I915_GEM);
1418	free(cliprects, DRM_I915_GEM);
1419	return ret;
1420}
1421
1422/*
1423 * Legacy execbuffer just creates an exec2 list from the original exec object
1424 * list array and passes it to the real function.
1425 */
1426int
1427i915_gem_execbuffer(struct drm_device *dev, void *data,
1428		    struct drm_file *file)
1429{
1430	struct drm_i915_gem_execbuffer *args = data;
1431	struct drm_i915_gem_execbuffer2 exec2;
1432	struct drm_i915_gem_exec_object *exec_list = NULL;
1433	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1434	int ret, i;
1435
1436	DRM_DEBUG("buffers_ptr %d buffer_count %d len %08x\n",
1437	    (int) args->buffers_ptr, args->buffer_count, args->batch_len);
1438
1439	if (args->buffer_count < 1) {
1440		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1441		return -EINVAL;
1442	}
1443
1444	/* Copy in the exec list from userland */
1445	/* XXXKIB user-controlled malloc size */
1446	exec_list = malloc(sizeof(*exec_list) * args->buffer_count,
1447	    DRM_I915_GEM, M_WAITOK);
1448	exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count,
1449	    DRM_I915_GEM, M_WAITOK);
1450	ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec_list,
1451	    sizeof(*exec_list) * args->buffer_count);
1452	if (ret != 0) {
1453		DRM_DEBUG("copy %d exec entries failed %d\n",
1454			  args->buffer_count, ret);
1455		free(exec_list, DRM_I915_GEM);
1456		free(exec2_list, DRM_I915_GEM);
1457		return (ret);
1458	}
1459
1460	for (i = 0; i < args->buffer_count; i++) {
1461		exec2_list[i].handle = exec_list[i].handle;
1462		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1463		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1464		exec2_list[i].alignment = exec_list[i].alignment;
1465		exec2_list[i].offset = exec_list[i].offset;
1466		if (INTEL_INFO(dev)->gen < 4)
1467			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1468		else
1469			exec2_list[i].flags = 0;
1470	}
1471
1472	exec2.buffers_ptr = args->buffers_ptr;
1473	exec2.buffer_count = args->buffer_count;
1474	exec2.batch_start_offset = args->batch_start_offset;
1475	exec2.batch_len = args->batch_len;
1476	exec2.DR1 = args->DR1;
1477	exec2.DR4 = args->DR4;
1478	exec2.num_cliprects = args->num_cliprects;
1479	exec2.cliprects_ptr = args->cliprects_ptr;
1480	exec2.flags = I915_EXEC_RENDER;
1481	i915_execbuffer2_set_context_id(exec2, 0);
1482
1483	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1484	if (!ret) {
1485		/* Copy the new buffer offsets back to the user's exec list. */
1486		for (i = 0; i < args->buffer_count; i++)
1487			exec_list[i].offset = exec2_list[i].offset;
1488		/* ... and back out to userspace */
1489		ret = -copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr,
1490		    sizeof(*exec_list) * args->buffer_count);
1491		if (ret != 0) {
1492			DRM_DEBUG("failed to copy %d exec entries "
1493				  "back to user (%d)\n",
1494				  args->buffer_count, ret);
1495		}
1496	}
1497
1498	free(exec_list, DRM_I915_GEM);
1499	free(exec2_list, DRM_I915_GEM);
1500	return ret;
1501}
1502
1503int
1504i915_gem_execbuffer2(struct drm_device *dev, void *data,
1505		     struct drm_file *file)
1506{
1507	struct drm_i915_gem_execbuffer2 *args = data;
1508	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1509	int ret;
1510
1511	DRM_DEBUG("buffers_ptr %jx buffer_count %d len %08x\n",
1512	    (uintmax_t)args->buffers_ptr, args->buffer_count, args->batch_len);
1513
1514	if (args->buffer_count < 1 ||
1515	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1516		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1517		return -EINVAL;
1518	}
1519
1520	/* XXXKIB user-controllable malloc size */
1521	exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count,
1522	    DRM_I915_GEM, M_WAITOK);
1523	ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec2_list,
1524	    sizeof(*exec2_list) * args->buffer_count);
1525	if (ret != 0) {
1526		DRM_DEBUG("copy %d exec entries failed %d\n",
1527			  args->buffer_count, ret);
1528		free(exec2_list, DRM_I915_GEM);
1529		return (ret);
1530	}
1531
1532	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1533	if (!ret) {
1534		/* Copy the new buffer offsets back to the user's exec list. */
1535		ret = -copyout(exec2_list, (void *)(uintptr_t)args->buffers_ptr,
1536		    sizeof(*exec2_list) * args->buffer_count);
1537		if (ret) {
1538			DRM_DEBUG("failed to copy %d exec entries "
1539				  "back to user (%d)\n",
1540				  args->buffer_count, ret);
1541		}
1542	}
1543
1544	free(exec2_list, DRM_I915_GEM);
1545	return ret;
1546}
1547