1177867Sjfv/*
2169240Sjfv * Copyright �� 2008,2010 Intel Corporation
3190872Sjfv *
4169240Sjfv * Permission is hereby granted, free of charge, to any person obtaining a
5169240Sjfv * copy of this software and associated documentation files (the "Software"),
6169240Sjfv * to deal in the Software without restriction, including without limitation
7169240Sjfv * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8169240Sjfv * and/or sell copies of the Software, and to permit persons to whom the
9169240Sjfv * Software is furnished to do so, subject to the following conditions:
10169240Sjfv *
11169240Sjfv * The above copyright notice and this permission notice (including the next
12169240Sjfv * paragraph) shall be included in all copies or substantial portions of the
13169240Sjfv * Software.
14169240Sjfv *
15169240Sjfv * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16169240Sjfv * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17169240Sjfv * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18169240Sjfv * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19169240Sjfv * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20169240Sjfv * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21169240Sjfv * IN THE SOFTWARE.
22169240Sjfv *
23169240Sjfv * Authors:
24169240Sjfv *    Eric Anholt <eric@anholt.net>
25169240Sjfv *    Chris Wilson <chris@chris-wilson.co.uk>
26169240Sjfv *
27169240Sjfv */
28169240Sjfv
29169240Sjfv#include <sys/cdefs.h>
30169240Sjfv__FBSDID("$FreeBSD$");
31169240Sjfv
32177867Sjfv#include <dev/drm2/drmP.h>
33177867Sjfv#include <dev/drm2/drm.h>
34169240Sjfv#include <dev/drm2/i915/i915_drm.h>
35185353Sjfv#include <dev/drm2/i915/i915_drv.h>
36185353Sjfv#include <dev/drm2/i915/intel_drv.h>
37185353Sjfv#include <sys/limits.h>
38185353Sjfv#include <sys/sf_buf.h>
39185353Sjfv
40185353Sjfvstruct change_domains {
41185353Sjfv	uint32_t invalidate_domains;
42169240Sjfv	uint32_t flush_domains;
43169240Sjfv	uint32_t flush_rings;
44169589Sjfv	uint32_t flips;
45169240Sjfv};
46177867Sjfv
47177867Sjfv/*
48177867Sjfv * Set the next domain for the specified object. This
49177867Sjfv * may not actually perform the necessary flushing/invaliding though,
50177867Sjfv * as that may want to be batched with other set_domain operations
51177867Sjfv *
52169240Sjfv * This is (we hope) the only really tricky part of gem. The goal
53177867Sjfv * is fairly simple -- track which caches hold bits of the object
54177867Sjfv * and make sure they remain coherent. A few concrete examples may
55177867Sjfv * help to explain how it works. For shorthand, we use the notation
56177867Sjfv * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
57177867Sjfv * a pair of read and write domain masks.
58173788Sjfv *
59177867Sjfv * Case 1: the batch buffer
60177867Sjfv *
61177867Sjfv *	1. Allocated
62169240Sjfv *	2. Written by CPU
63173788Sjfv *	3. Mapped to GTT
64169240Sjfv *	4. Read by GPU
65177867Sjfv *	5. Unmapped from GTT
66169240Sjfv *	6. Freed
67173788Sjfv *
68169240Sjfv *	Let's take these a step at a time
69169240Sjfv *
70169240Sjfv *	1. Allocated
71169240Sjfv *		Pages allocated from the kernel may still have
72169240Sjfv *		cache contents, so we set them to (CPU, CPU) always.
73169240Sjfv *	2. Written by CPU (using pwrite)
74169240Sjfv *		The pwrite function calls set_domain (CPU, CPU) and
75169240Sjfv *		this function does nothing (as nothing changes)
76169240Sjfv *	3. Mapped by GTT
77169240Sjfv *		This function asserts that the object is not
78169240Sjfv *		currently in any GPU-based read or write domains
79169240Sjfv *	4. Read by GPU
80169240Sjfv *		i915_gem_execbuffer calls set_domain (COMMAND, 0).
81169240Sjfv *		As write_domain is zero, this function adds in the
82169589Sjfv *		current read domains (CPU+COMMAND, 0).
83169240Sjfv *		flush_domains is set to CPU.
84177867Sjfv *		invalidate_domains is set to COMMAND
85169240Sjfv *		clflush is run to get data out of the CPU caches
86169240Sjfv *		then i915_dev_set_domain calls i915_gem_flush to
87169240Sjfv *		emit an MI_FLUSH and drm_agp_chipset_flush
88169240Sjfv *	5. Unmapped from GTT
89169240Sjfv *		i915_gem_object_unbind calls set_domain (CPU, CPU)
90169240Sjfv *		flush_domains and invalidate_domains end up both zero
91177867Sjfv *		so no flushing/invalidating happens
92177867Sjfv *	6. Freed
93177867Sjfv *		yay, done
94177867Sjfv *
95169240Sjfv * Case 2: The shared render buffer
96169240Sjfv *
97177867Sjfv *	1. Allocated
98177867Sjfv *	2. Mapped to GTT
99177867Sjfv *	3. Read/written by GPU
100177867Sjfv *	4. set_domain to (CPU,CPU)
101177867Sjfv *	5. Read/written by CPU
102177867Sjfv *	6. Read/written by GPU
103177867Sjfv *
104177867Sjfv *	1. Allocated
105177867Sjfv *		Same as last example, (CPU, CPU)
106177867Sjfv *	2. Mapped to GTT
107177867Sjfv *		Nothing changes (assertions find that it is not in the GPU)
108169240Sjfv *	3. Read/written by GPU
109169240Sjfv *		execbuffer calls set_domain (RENDER, RENDER)
110169240Sjfv *		flush_domains gets CPU
111169240Sjfv *		invalidate_domains gets GPU
112169240Sjfv *		clflush (obj)
113169240Sjfv *		MI_FLUSH and drm_agp_chipset_flush
114169240Sjfv *	4. set_domain (CPU, CPU)
115169240Sjfv *		flush_domains gets GPU
116169240Sjfv *		invalidate_domains gets CPU
117169240Sjfv *		wait_rendering (obj) to make sure all drawing is complete.
118169240Sjfv *		This will include an MI_FLUSH to get the data from GPU
119169240Sjfv *		to memory
120169240Sjfv *		clflush (obj) to invalidate the CPU cache
121169240Sjfv *		Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
122169240Sjfv *	5. Read/written by CPU
123169240Sjfv *		cache lines are loaded and dirtied
124169240Sjfv *	6. Read written by GPU
125169589Sjfv *		Same as last GPU access
126169240Sjfv *
127177867Sjfv * Case 3: The constant buffer
128169240Sjfv *
129169240Sjfv *	1. Allocated
130169240Sjfv *	2. Written by CPU
131169240Sjfv *	3. Read by GPU
132169240Sjfv *	4. Updated (written) by CPU again
133169240Sjfv *	5. Read by GPU
134169240Sjfv *
135169240Sjfv *	1. Allocated
136169240Sjfv *		(CPU, CPU)
137169240Sjfv *	2. Written by CPU
138169240Sjfv *		(CPU, CPU)
139169240Sjfv *	3. Read by GPU
140169240Sjfv *		(CPU+RENDER, 0)
141169240Sjfv *		flush_domains = CPU
142169240Sjfv *		invalidate_domains = RENDER
143169240Sjfv *		clflush (obj)
144169240Sjfv *		MI_FLUSH
145169240Sjfv *		drm_agp_chipset_flush
146169240Sjfv *	4. Updated (written) by CPU again
147169240Sjfv *		(CPU, CPU)
148169240Sjfv *		flush_domains = 0 (no previous write domain)
149169240Sjfv *		invalidate_domains = 0 (no new read domains)
150169240Sjfv *	5. Read by GPU
151169240Sjfv *		(CPU+RENDER, 0)
152169240Sjfv *		flush_domains = CPU
153169240Sjfv *		invalidate_domains = RENDER
154169240Sjfv *		clflush (obj)
155169240Sjfv *		MI_FLUSH
156169240Sjfv *		drm_agp_chipset_flush
157169240Sjfv */
158169240Sjfvstatic void
159169240Sjfvi915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
160169240Sjfv				  struct intel_ring_buffer *ring,
161169240Sjfv				  struct change_domains *cd)
162169240Sjfv{
163169240Sjfv	uint32_t invalidate_domains = 0, flush_domains = 0;
164169240Sjfv
165169240Sjfv	/*
166169240Sjfv	 * If the object isn't moving to a new write domain,
167169240Sjfv	 * let the object stay in multiple read domains
168169240Sjfv	 */
169177867Sjfv	if (obj->base.pending_write_domain == 0)
170177867Sjfv		obj->base.pending_read_domains |= obj->base.read_domains;
171177867Sjfv
172177867Sjfv	/*
173177867Sjfv	 * Flush the current write domain if
174177867Sjfv	 * the new read domains don't match. Invalidate
175177867Sjfv	 * any read domains which differ from the old
176169240Sjfv	 * write domain
177173788Sjfv	 */
178173788Sjfv	if (obj->base.write_domain &&
179169240Sjfv	    (((obj->base.write_domain != obj->base.pending_read_domains ||
180169240Sjfv	       obj->ring != ring)) ||
181169240Sjfv	     (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
182169240Sjfv		flush_domains |= obj->base.write_domain;
183169240Sjfv		invalidate_domains |=
184177867Sjfv			obj->base.pending_read_domains & ~obj->base.write_domain;
185169240Sjfv	}
186169240Sjfv	/*
187169240Sjfv	 * Invalidate any read caches which may have
188173788Sjfv	 * stale data. That is, any new read domains.
189173788Sjfv	 */
190169240Sjfv	invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
191169240Sjfv	if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
192169240Sjfv		i915_gem_clflush_object(obj);
193169240Sjfv
194169240Sjfv	if (obj->base.pending_write_domain)
195169240Sjfv		cd->flips |= atomic_load_acq_int(&obj->pending_flip);
196169240Sjfv
197169240Sjfv	/* The actual obj->write_domain will be updated with
198169240Sjfv	 * pending_write_domain after we emit the accumulated flush for all
199169240Sjfv	 * of our domain changes in execbuffers (which clears objects'
200169240Sjfv	 * write_domains).  So if we have a current write domain that we
201169240Sjfv	 * aren't changing, set pending_write_domain to that.
202169240Sjfv	 */
203169240Sjfv	if (flush_domains == 0 && obj->base.pending_write_domain == 0)
204169240Sjfv		obj->base.pending_write_domain = obj->base.write_domain;
205169240Sjfv
206177867Sjfv	cd->invalidate_domains |= invalidate_domains;
207177867Sjfv	cd->flush_domains |= flush_domains;
208177867Sjfv	if (flush_domains & I915_GEM_GPU_DOMAINS)
209177867Sjfv		cd->flush_rings |= intel_ring_flag(obj->ring);
210177867Sjfv	if (invalidate_domains & I915_GEM_GPU_DOMAINS)
211177867Sjfv		cd->flush_rings |= intel_ring_flag(ring);
212177867Sjfv}
213169240Sjfv
214169240Sjfvstruct eb_objects {
215169240Sjfv	u_long hashmask;
216169240Sjfv	LIST_HEAD(, drm_i915_gem_object) *buckets;
217169240Sjfv};
218169240Sjfv
219169240Sjfvstatic struct eb_objects *
220169240Sjfveb_create(int size)
221169589Sjfv{
222169240Sjfv	struct eb_objects *eb;
223177867Sjfv
224169240Sjfv	eb = malloc(sizeof(*eb), DRM_I915_GEM, M_WAITOK | M_ZERO);
225169240Sjfv	eb->buckets = hashinit(size, DRM_I915_GEM, &eb->hashmask);
226169240Sjfv	return (eb);
227169240Sjfv}
228169240Sjfv
229169240Sjfvstatic void
230173788Sjfveb_reset(struct eb_objects *eb)
231169240Sjfv{
232169240Sjfv	int i;
233169240Sjfv
234169240Sjfv	for (i = 0; i <= eb->hashmask; i++)
235169240Sjfv		LIST_INIT(&eb->buckets[i]);
236169240Sjfv}
237169240Sjfv
238169240Sjfvstatic void
239169240Sjfveb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
240169240Sjfv{
241177867Sjfv
242185353Sjfv	LIST_INSERT_HEAD(&eb->buckets[obj->exec_handle & eb->hashmask],
243185353Sjfv	    obj, exec_node);
244169240Sjfv}
245177867Sjfv
246169240Sjfvstatic struct drm_i915_gem_object *
247177867Sjfveb_get_object(struct eb_objects *eb, unsigned long handle)
248169240Sjfv{
249177867Sjfv	struct drm_i915_gem_object *obj;
250169240Sjfv
251177867Sjfv	LIST_FOREACH(obj, &eb->buckets[handle & eb->hashmask], exec_node) {
252169240Sjfv		if (obj->exec_handle == handle)
253177867Sjfv			return (obj);
254169240Sjfv	}
255177867Sjfv	return (NULL);
256169240Sjfv}
257177867Sjfv
258169240Sjfvstatic void
259177867Sjfveb_destroy(struct eb_objects *eb)
260169240Sjfv{
261177867Sjfv
262169240Sjfv	free(eb->buckets, DRM_I915_GEM);
263177867Sjfv	free(eb, DRM_I915_GEM);
264190872Sjfv}
265190872Sjfv
266169240Sjfvstatic inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
267177867Sjfv{
268169240Sjfv	return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
269177867Sjfv		obj->cache_level != I915_CACHE_NONE);
270169240Sjfv}
271177867Sjfv
272177867Sjfvstatic int
273169240Sjfvi915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
274177867Sjfv				   struct eb_objects *eb,
275169240Sjfv				   struct drm_i915_gem_relocation_entry *reloc)
276185353Sjfv{
277169240Sjfv	struct drm_device *dev = obj->base.dev;
278169240Sjfv	struct drm_gem_object *target_obj;
279169240Sjfv	struct drm_i915_gem_object *target_i915_obj;
280169240Sjfv	uint32_t target_offset;
281169589Sjfv	int ret = -EINVAL;
282169240Sjfv
283185353Sjfv	/* we've already hold a reference to all valid objects */
284169240Sjfv	target_obj = &eb_get_object(eb, reloc->target_handle)->base;
285173788Sjfv	if (unlikely(target_obj == NULL))
286169240Sjfv		return -ENOENT;
287169240Sjfv
288169240Sjfv	target_i915_obj = to_intel_bo(target_obj);
289177867Sjfv	target_offset = target_i915_obj->gtt_offset;
290177867Sjfv
291177867Sjfv#if WATCH_RELOC
292169240Sjfv	DRM_INFO("%s: obj %p offset %08x target %d "
293169240Sjfv		 "read %08x write %08x gtt %08x "
294169240Sjfv		 "presumed %08x delta %08x\n",
295169240Sjfv		 __func__,
296169589Sjfv		 obj,
297169240Sjfv		 (int) reloc->offset,
298185353Sjfv		 (int) reloc->target_handle,
299169240Sjfv		 (int) reloc->read_domains,
300177867Sjfv		 (int) reloc->write_domain,
301169240Sjfv		 (int) target_offset,
302169240Sjfv		 (int) reloc->presumed_offset,
303169240Sjfv		 reloc->delta);
304169240Sjfv#endif
305169240Sjfv
306169240Sjfv	/* The target buffer should have appeared before us in the
307169240Sjfv	 * exec_object list, so it should have a GTT space bound by now.
308169240Sjfv	 */
309169240Sjfv	if (unlikely(target_offset == 0)) {
310169240Sjfv		DRM_DEBUG("No GTT space found for object %d\n",
311169240Sjfv			  reloc->target_handle);
312169240Sjfv		return ret;
313173788Sjfv	}
314173788Sjfv
315169240Sjfv	/* Validate that the target is in a valid r/w GPU domain */
316169240Sjfv	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
317169240Sjfv		DRM_DEBUG("reloc with multiple write domains: "
318169240Sjfv			  "obj %p target %d offset %d "
319169240Sjfv			  "read %08x write %08x",
320169240Sjfv			  obj, reloc->target_handle,
321169240Sjfv			  (int) reloc->offset,
322169240Sjfv			  reloc->read_domains,
323169240Sjfv			  reloc->write_domain);
324169240Sjfv		return ret;
325169240Sjfv	}
326169240Sjfv	if (unlikely((reloc->write_domain | reloc->read_domains)
327169240Sjfv		     & ~I915_GEM_GPU_DOMAINS)) {
328169240Sjfv		DRM_DEBUG("reloc with read/write non-GPU domains: "
329169240Sjfv			  "obj %p target %d offset %d "
330169240Sjfv			  "read %08x write %08x",
331173788Sjfv			  obj, reloc->target_handle,
332173788Sjfv			  (int) reloc->offset,
333169240Sjfv			  reloc->read_domains,
334169240Sjfv			  reloc->write_domain);
335169240Sjfv		return ret;
336169240Sjfv	}
337169240Sjfv	if (unlikely(reloc->write_domain && target_obj->pending_write_domain &&
338169240Sjfv		     reloc->write_domain != target_obj->pending_write_domain)) {
339169240Sjfv		DRM_DEBUG("Write domain conflict: "
340169240Sjfv			  "obj %p target %d offset %d "
341169240Sjfv			  "new %08x old %08x\n",
342169240Sjfv			  obj, reloc->target_handle,
343169240Sjfv			  (int) reloc->offset,
344169240Sjfv			  reloc->write_domain,
345169240Sjfv			  target_obj->pending_write_domain);
346169240Sjfv		return ret;
347169240Sjfv	}
348169240Sjfv
349169240Sjfv	target_obj->pending_read_domains |= reloc->read_domains;
350169240Sjfv	target_obj->pending_write_domain |= reloc->write_domain;
351169240Sjfv
352169240Sjfv	/* If the relocation already has the right value in it, no
353169240Sjfv	 * more work needs to be done.
354169240Sjfv	 */
355169240Sjfv	if (target_offset == reloc->presumed_offset)
356169240Sjfv		return 0;
357169240Sjfv
358169240Sjfv	/* Check that the relocation address is valid... */
359169240Sjfv	if (unlikely(reloc->offset > obj->base.size - 4)) {
360169240Sjfv		DRM_DEBUG("Relocation beyond object bounds: "
361169240Sjfv			  "obj %p target %d offset %d size %d.\n",
362169240Sjfv			  obj, reloc->target_handle,
363169240Sjfv			  (int) reloc->offset,
364169240Sjfv			  (int) obj->base.size);
365169240Sjfv		return ret;
366169240Sjfv	}
367169240Sjfv	if (unlikely(reloc->offset & 3)) {
368169240Sjfv		DRM_DEBUG("Relocation not 4-byte aligned: "
369169240Sjfv			  "obj %p target %d offset %d.\n",
370169240Sjfv			  obj, reloc->target_handle,
371169240Sjfv			  (int) reloc->offset);
372169240Sjfv		return ret;
373169589Sjfv	}
374169240Sjfv
375185353Sjfv	/* We can't wait for rendering with pagefaults disabled */
376169240Sjfv	if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0)
377177867Sjfv		return (-EFAULT);
378169240Sjfv
379169240Sjfv	reloc->delta += target_offset;
380194865Sjfv	if (use_cpu_reloc(obj)) {
381169240Sjfv		uint32_t page_offset = reloc->offset & PAGE_MASK;
382169240Sjfv		char *vaddr;
383169240Sjfv		struct sf_buf *sf;
384169240Sjfv
385169240Sjfv		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
386169240Sjfv		if (ret)
387190872Sjfv			return ret;
388169240Sjfv
389169240Sjfv		sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)],
390173788Sjfv		    SFB_NOWAIT);
391169240Sjfv		if (sf == NULL)
392194865Sjfv			return (-ENOMEM);
393194865Sjfv		vaddr = (void *)sf_buf_kva(sf);
394194865Sjfv		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
395194865Sjfv		sf_buf_free(sf);
396194865Sjfv	} else {
397194865Sjfv		uint32_t *reloc_entry;
398194865Sjfv		char *reloc_page;
399169240Sjfv
400169240Sjfv		ret = i915_gem_object_set_to_gtt_domain(obj, true);
401169240Sjfv		if (ret)
402177867Sjfv			return ret;
403169240Sjfv
404169240Sjfv		ret = i915_gem_object_put_fence(obj);
405169240Sjfv		if (ret)
406169240Sjfv			return ret;
407169240Sjfv
408169240Sjfv		/*
409169240Sjfv		 * Map the page containing the relocation we're going
410169240Sjfv		 * to perform.
411173788Sjfv		 */
412173788Sjfv		reloc->offset += obj->gtt_offset;
413169240Sjfv		reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset &
414169240Sjfv		    ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING);
415169240Sjfv		reloc_entry = (uint32_t *)(reloc_page + (reloc->offset &
416169240Sjfv		    PAGE_MASK));
417169240Sjfv		*(volatile uint32_t *)reloc_entry = reloc->delta;
418169240Sjfv		pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE);
419169240Sjfv	}
420169240Sjfv
421177867Sjfv	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
422169240Sjfv	 * pipe_control writes because the gpu doesn't properly redirect them
423173788Sjfv	 * through the ppgtt for non_secure batchbuffers. */
424169240Sjfv	if (unlikely(IS_GEN6(dev) &&
425169240Sjfv	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
426173788Sjfv	    !target_i915_obj->has_global_gtt_mapping)) {
427169240Sjfv		i915_gem_gtt_bind_object(target_i915_obj,
428173788Sjfv					 target_i915_obj->cache_level);
429173788Sjfv	}
430169240Sjfv
431169240Sjfv	/* and update the user's relocation entry */
432169240Sjfv	reloc->presumed_offset = target_offset;
433169240Sjfv
434169240Sjfv	return 0;
435169240Sjfv}
436194865Sjfv
437169240Sjfvstatic int
438169240Sjfvi915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
439169240Sjfv    struct eb_objects *eb)
440169240Sjfv{
441169240Sjfv#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
442169589Sjfv	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
443169589Sjfv	struct drm_i915_gem_relocation_entry *user_relocs;
444169589Sjfv	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
445169240Sjfv	int remain, ret;
446169240Sjfv
447169240Sjfv	user_relocs = (void *)(uintptr_t)entry->relocs_ptr;
448177867Sjfv	remain = entry->relocation_count;
449173788Sjfv	while (remain) {
450169240Sjfv		struct drm_i915_gem_relocation_entry *r = stack_reloc;
451169240Sjfv		int count = remain;
452169240Sjfv		if (count > DRM_ARRAY_SIZE(stack_reloc))
453169240Sjfv			count = DRM_ARRAY_SIZE(stack_reloc);
454169240Sjfv		remain -= count;
455169240Sjfv
456169240Sjfv		ret = -copyin_nofault(user_relocs, r, count*sizeof(r[0]));
457169240Sjfv		if (ret != 0)
458169240Sjfv			return (ret);
459169240Sjfv
460169240Sjfv		do {
461169240Sjfv			u64 offset = r->presumed_offset;
462169240Sjfv
463169240Sjfv			ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
464173788Sjfv			if (ret)
465173788Sjfv				return ret;
466169240Sjfv
467169240Sjfv			if (r->presumed_offset != offset &&
468169240Sjfv			    copyout_nofault(&r->presumed_offset,
469169240Sjfv					    &user_relocs->presumed_offset,
470177867Sjfv					    sizeof(r->presumed_offset))) {
471169240Sjfv				return -EFAULT;
472169240Sjfv			}
473169240Sjfv
474173788Sjfv			user_relocs++;
475169240Sjfv			r++;
476173788Sjfv		} while (--count);
477177867Sjfv	}
478169240Sjfv#undef N_RELOC
479169240Sjfv	return (0);
480169240Sjfv}
481169240Sjfv
482169240Sjfvstatic int
483169240Sjfvi915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
484169240Sjfv    struct eb_objects *eb, struct drm_i915_gem_relocation_entry *relocs)
485169240Sjfv{
486169240Sjfv	const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
487169240Sjfv	int i, ret;
488169240Sjfv
489169240Sjfv	for (i = 0; i < entry->relocation_count; i++) {
490169240Sjfv		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
491169240Sjfv		if (ret)
492169240Sjfv			return ret;
493169240Sjfv	}
494169240Sjfv
495169240Sjfv	return 0;
496169589Sjfv}
497169240Sjfv
498169240Sjfvstatic int
499169240Sjfvi915_gem_execbuffer_relocate(struct drm_device *dev,
500169240Sjfv			     struct eb_objects *eb,
501176667Sjfv			     struct list_head *objects)
502169240Sjfv{
503177867Sjfv	struct drm_i915_gem_object *obj;
504169240Sjfv	int ret, pflags;
505169240Sjfv
506169240Sjfv	/* Try to move as many of the relocation targets off the active list
507169240Sjfv	 * to avoid unnecessary fallbacks to the slow path, as we cannot wait
508169240Sjfv	 * for the retirement with pagefaults disabled.
509169240Sjfv	 */
510169240Sjfv	i915_gem_retire_requests(dev);
511169240Sjfv
512169240Sjfv	ret = 0;
513169240Sjfv	pflags = vm_fault_disable_pagefaults();
514169240Sjfv	/* This is the fast path and we cannot handle a pagefault whilst
515169240Sjfv	 * holding the device lock lest the user pass in the relocations
516169240Sjfv	 * contained within a mmaped bo. For in such a case we, the page
517169240Sjfv	 * fault handler would call i915_gem_fault() and we would try to
518169240Sjfv	 * acquire the device lock again. Obviously this is bad.
519169240Sjfv	 */
520169240Sjfv
521169240Sjfv	list_for_each_entry(obj, objects, exec_list) {
522169240Sjfv		ret = i915_gem_execbuffer_relocate_object(obj, eb);
523169240Sjfv		if (ret != 0)
524169240Sjfv			break;
525169240Sjfv	}
526169240Sjfv	vm_fault_enable_pagefaults(pflags);
527169240Sjfv	return (ret);
528169240Sjfv}
529169240Sjfv
530169589Sjfv#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
531169240Sjfv
532169240Sjfvstatic int
533169240Sjfvneed_reloc_mappable(struct drm_i915_gem_object *obj)
534169240Sjfv{
535185353Sjfv	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
536169240Sjfv	return entry->relocation_count && !use_cpu_reloc(obj);
537177867Sjfv}
538169240Sjfv
539169240Sjfvstatic int
540185353Sjfvpin_and_fence_object(struct drm_i915_gem_object *obj,
541169240Sjfv		     struct intel_ring_buffer *ring)
542169240Sjfv{
543169240Sjfv	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
544169240Sjfv	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
545169240Sjfv	bool need_fence, need_mappable;
546169240Sjfv	int ret;
547169240Sjfv
548169240Sjfv	need_fence =
549169240Sjfv		has_fenced_gpu_access &&
550169240Sjfv		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
551169240Sjfv		obj->tiling_mode != I915_TILING_NONE;
552169240Sjfv	need_mappable = need_fence || need_reloc_mappable(obj);
553169240Sjfv
554169240Sjfv	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
555169240Sjfv	if (ret)
556169240Sjfv		return ret;
557173788Sjfv
558169240Sjfv	if (has_fenced_gpu_access) {
559173788Sjfv		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
560169240Sjfv			ret = i915_gem_object_get_fence(obj);
561169240Sjfv			if (ret)
562169240Sjfv				goto err_unpin;
563169240Sjfv
564169240Sjfv			if (i915_gem_object_pin_fence(obj))
565169240Sjfv				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
566169240Sjfv
567169240Sjfv			obj->pending_fenced_gpu_access = true;
568169240Sjfv		}
569169240Sjfv	}
570169240Sjfv
571169240Sjfv	entry->offset = obj->gtt_offset;
572169240Sjfv	return 0;
573169240Sjfv
574169240Sjfverr_unpin:
575169240Sjfv	i915_gem_object_unpin(obj);
576169240Sjfv	return ret;
577169240Sjfv}
578169240Sjfv
579169240Sjfvstatic int
580169240Sjfvi915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
581169240Sjfv			    struct drm_file *file,
582169240Sjfv			    struct list_head *objects)
583169240Sjfv{
584169589Sjfv	drm_i915_private_t *dev_priv;
585169240Sjfv	struct drm_i915_gem_object *obj;
586169240Sjfv	int ret, retry;
587185353Sjfv	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
588169240Sjfv	struct list_head ordered_objects;
589177867Sjfv
590169240Sjfv	dev_priv = ring->dev->dev_private;
591169240Sjfv	INIT_LIST_HEAD(&ordered_objects);
592169240Sjfv	while (!list_empty(objects)) {
593173788Sjfv		struct drm_i915_gem_exec_object2 *entry;
594169240Sjfv		bool need_fence, need_mappable;
595169240Sjfv
596169240Sjfv		obj = list_first_entry(objects,
597173788Sjfv				       struct drm_i915_gem_object,
598173788Sjfv				       exec_list);
599169240Sjfv		entry = obj->exec_entry;
600169240Sjfv
601169240Sjfv		need_fence =
602169240Sjfv			has_fenced_gpu_access &&
603169240Sjfv			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
604169240Sjfv			obj->tiling_mode != I915_TILING_NONE;
605169240Sjfv		need_mappable = need_fence || need_reloc_mappable(obj);
606169240Sjfv
607169240Sjfv		if (need_mappable)
608173788Sjfv			list_move(&obj->exec_list, &ordered_objects);
609173788Sjfv		else
610169240Sjfv			list_move_tail(&obj->exec_list, &ordered_objects);
611169240Sjfv
612169240Sjfv		obj->base.pending_read_domains = 0;
613169240Sjfv		obj->base.pending_write_domain = 0;
614169240Sjfv	}
615169240Sjfv	list_splice(&ordered_objects, objects);
616169240Sjfv
617169240Sjfv	/* Attempt to pin all of the buffers into the GTT.
618169240Sjfv	 * This is done in 3 phases:
619169240Sjfv	 *
620169240Sjfv	 * 1a. Unbind all objects that do not match the GTT constraints for
621169240Sjfv	 *     the execbuffer (fenceable, mappable, alignment etc).
622169240Sjfv	 * 1b. Increment pin count for already bound objects and obtain
623169240Sjfv	 *     a fence register if required.
624173788Sjfv	 * 2.  Bind new objects.
625173788Sjfv	 * 3.  Decrement pin count.
626173788Sjfv	 *
627173788Sjfv	 * This avoid unnecessary unbinding of later objects in order to makr
628169240Sjfv	 * room for the earlier objects *unless* we need to defragment.
629169240Sjfv	 */
630173788Sjfv	retry = 0;
631173788Sjfv	do {
632169240Sjfv		ret = 0;
633169240Sjfv
634169240Sjfv		/* Unbind any ill-fitting objects or pin. */
635169240Sjfv		list_for_each_entry(obj, objects, exec_list) {
636169240Sjfv			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
637169240Sjfv			bool need_fence, need_mappable;
638169240Sjfv
639169240Sjfv			if (!obj->gtt_space)
640169240Sjfv				continue;
641173788Sjfv
642173788Sjfv			need_fence =
643169240Sjfv				has_fenced_gpu_access &&
644169240Sjfv				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
645169240Sjfv				obj->tiling_mode != I915_TILING_NONE;
646169240Sjfv			need_mappable = need_fence || need_reloc_mappable(obj);
647169240Sjfv
648173788Sjfv			if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
649173788Sjfv			    (need_mappable && !obj->map_and_fenceable))
650169240Sjfv				ret = i915_gem_object_unbind(obj);
651169240Sjfv			else
652169240Sjfv				ret = pin_and_fence_object(obj, ring);
653169240Sjfv			if (ret)
654169240Sjfv				goto err;
655169240Sjfv		}
656169240Sjfv
657169240Sjfv		/* Bind fresh objects */
658169240Sjfv		list_for_each_entry(obj, objects, exec_list) {
659169240Sjfv			if (obj->gtt_space)
660169240Sjfv				continue;
661169240Sjfv
662169240Sjfv			ret = pin_and_fence_object(obj, ring);
663169240Sjfv			if (ret) {
664169240Sjfv				int ret_ignore;
665169589Sjfv
666169589Sjfv				/* This can potentially raise a harmless
667169240Sjfv				 * -EINVAL if we failed to bind in the above
668169240Sjfv				 * call. It cannot raise -EINTR since we know
669169240Sjfv				 * that the bo is freshly bound and so will
670169240Sjfv				 * not need to be flushed or waited upon.
671169240Sjfv				 */
672169240Sjfv				ret_ignore = i915_gem_object_unbind(obj);
673169240Sjfv				(void)ret_ignore;
674173788Sjfv				if (obj->gtt_space != NULL)
675173788Sjfv					printf("%s: gtt_space\n", __func__);
676169240Sjfv				break;
677169240Sjfv			}
678185353Sjfv		}
679169240Sjfv
680169240Sjfv		/* Decrement pin count for bound objects */
681169240Sjfv		list_for_each_entry(obj, objects, exec_list) {
682169240Sjfv			struct drm_i915_gem_exec_object2 *entry;
683169240Sjfv
684169240Sjfv			if (!obj->gtt_space)
685169240Sjfv				continue;
686169240Sjfv
687169240Sjfv			entry = obj->exec_entry;
688169240Sjfv			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
689169240Sjfv				i915_gem_object_unpin_fence(obj);
690169240Sjfv				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
691169240Sjfv			}
692177867Sjfv
693169240Sjfv			i915_gem_object_unpin(obj);
694169240Sjfv
695169240Sjfv			/* ... and ensure ppgtt mapping exist if needed. */
696169240Sjfv			if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
697169240Sjfv				i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
698169240Sjfv						       obj, obj->cache_level);
699169240Sjfv
700169240Sjfv				obj->has_aliasing_ppgtt_mapping = 1;
701169240Sjfv			}
702169240Sjfv		}
703177867Sjfv
704169240Sjfv		if (ret != -ENOSPC || retry > 1)
705169240Sjfv			return ret;
706169240Sjfv
707169240Sjfv		/* First attempt, just clear anything that is purgeable.
708169240Sjfv		 * Second attempt, clear the entire GTT.
709169240Sjfv		 */
710169240Sjfv		ret = i915_gem_evict_everything(ring->dev, retry == 0);
711177867Sjfv		if (ret)
712169240Sjfv			return ret;
713169240Sjfv
714169240Sjfv		retry++;
715169240Sjfv	} while (1);
716169240Sjfv
717169240Sjfverr:
718169240Sjfv	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
719177867Sjfv		struct drm_i915_gem_exec_object2 *entry;
720169240Sjfv
721169240Sjfv		if (!obj->gtt_space)
722169240Sjfv			continue;
723169240Sjfv
724169240Sjfv		entry = obj->exec_entry;
725169240Sjfv		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
726169240Sjfv			i915_gem_object_unpin_fence(obj);
727169240Sjfv			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
728169240Sjfv		}
729169240Sjfv
730169240Sjfv		i915_gem_object_unpin(obj);
731169240Sjfv	}
732169240Sjfv
733169240Sjfv	return ret;
734169240Sjfv}
735177867Sjfv
736169240Sjfvstatic int
737169240Sjfvi915_gem_execbuffer_relocate_slow(struct drm_device *dev,
738169240Sjfv    struct drm_file *file, struct intel_ring_buffer *ring,
739169240Sjfv    struct list_head *objects, struct eb_objects *eb,
740169240Sjfv    struct drm_i915_gem_exec_object2 *exec, int count)
741177867Sjfv{
742169240Sjfv	struct drm_i915_gem_relocation_entry *reloc;
743169240Sjfv	struct drm_i915_gem_object *obj;
744169240Sjfv	int *reloc_offset;
745169240Sjfv	int i, total, ret;
746169240Sjfv
747169240Sjfv	/* We may process another execbuffer during the unlock... */
748169240Sjfv	while (!list_empty(objects)) {
749169240Sjfv		obj = list_first_entry(objects,
750169240Sjfv				       struct drm_i915_gem_object,
751177867Sjfv				       exec_list);
752169240Sjfv		list_del_init(&obj->exec_list);
753169240Sjfv		drm_gem_object_unreference(&obj->base);
754169240Sjfv	}
755169240Sjfv
756169240Sjfv	DRM_UNLOCK(dev);
757169240Sjfv
758169240Sjfv	total = 0;
759169240Sjfv	for (i = 0; i < count; i++)
760169240Sjfv		total += exec[i].relocation_count;
761169240Sjfv
762169240Sjfv	reloc_offset = malloc(count * sizeof(*reloc_offset), DRM_I915_GEM,
763169240Sjfv	    M_WAITOK | M_ZERO);
764169240Sjfv	reloc = malloc(total * sizeof(*reloc), DRM_I915_GEM, M_WAITOK | M_ZERO);
765173788Sjfv
766173788Sjfv	total = 0;
767173788Sjfv	for (i = 0; i < count; i++) {
768173788Sjfv		struct drm_i915_gem_relocation_entry *user_relocs;
769177867Sjfv
770169240Sjfv		user_relocs = (void *)(uintptr_t)exec[i].relocs_ptr;
771169240Sjfv		ret = -copyin(user_relocs, reloc + total,
772169240Sjfv		    exec[i].relocation_count * sizeof(*reloc));
773169240Sjfv		if (ret != 0) {
774169240Sjfv			DRM_LOCK(dev);
775169240Sjfv			goto err;
776177867Sjfv		}
777169240Sjfv
778169240Sjfv		reloc_offset[i] = total;
779169240Sjfv		total += exec[i].relocation_count;
780169240Sjfv	}
781169240Sjfv
782177867Sjfv	ret = i915_mutex_lock_interruptible(dev);
783169240Sjfv	if (ret) {
784169240Sjfv		DRM_LOCK(dev);
785169240Sjfv		goto err;
786169240Sjfv	}
787169240Sjfv
788177867Sjfv	/* reacquire the objects */
789169240Sjfv	eb_reset(eb);
790169240Sjfv	for (i = 0; i < count; i++) {
791169240Sjfv		struct drm_i915_gem_object *obj;
792169240Sjfv
793169240Sjfv		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
794169240Sjfv							exec[i].handle));
795169240Sjfv		if (&obj->base == NULL) {
796169240Sjfv			DRM_DEBUG("Invalid object handle %d at index %d\n",
797177867Sjfv				   exec[i].handle, i);
798169240Sjfv			ret = -ENOENT;
799169240Sjfv			goto err;
800169240Sjfv		}
801169240Sjfv
802169240Sjfv		list_add_tail(&obj->exec_list, objects);
803169240Sjfv		obj->exec_handle = exec[i].handle;
804177867Sjfv		obj->exec_entry = &exec[i];
805169240Sjfv		eb_add_object(eb, obj);
806169240Sjfv	}
807169240Sjfv
808169240Sjfv	ret = i915_gem_execbuffer_reserve(ring, file, objects);
809169240Sjfv	if (ret)
810169240Sjfv		goto err;
811169240Sjfv
812169240Sjfv	list_for_each_entry(obj, objects, exec_list) {
813177867Sjfv		int offset = obj->exec_entry - exec;
814169240Sjfv		ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
815169240Sjfv		    reloc + reloc_offset[offset]);
816169240Sjfv		if (ret)
817169240Sjfv			goto err;
818169240Sjfv	}
819169240Sjfv
820169240Sjfv	/* Leave the user relocations as are, this is the painfully slow path,
821169240Sjfv	 * and we want to avoid the complication of dropping the lock whilst
822169240Sjfv	 * having buffers reserved in the aperture and so causing spurious
823169240Sjfv	 * ENOSPC for random operations.
824169240Sjfv	 */
825169240Sjfv
826169240Sjfverr:
827173788Sjfv	free(reloc, DRM_I915_GEM);
828173788Sjfv	free(reloc_offset, DRM_I915_GEM);
829173788Sjfv	return ret;
830173788Sjfv}
831177867Sjfv
832169240Sjfvstatic int
833169240Sjfvi915_gem_execbuffer_flush(struct drm_device *dev,
834169240Sjfv			  uint32_t invalidate_domains,
835169240Sjfv			  uint32_t flush_domains,
836177867Sjfv			  uint32_t flush_rings)
837169240Sjfv{
838169240Sjfv	drm_i915_private_t *dev_priv = dev->dev_private;
839169240Sjfv	int i, ret;
840169240Sjfv
841169240Sjfv	if (flush_domains & I915_GEM_DOMAIN_CPU)
842177867Sjfv		intel_gtt_chipset_flush();
843169240Sjfv
844169240Sjfv	if (flush_domains & I915_GEM_DOMAIN_GTT)
845169240Sjfv		wmb();
846169240Sjfv
847169240Sjfv	if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
848177867Sjfv		for (i = 0; i < I915_NUM_RINGS; i++)
849169240Sjfv			if (flush_rings & (1 << i)) {
850169240Sjfv				ret = i915_gem_flush_ring(&dev_priv->rings[i],
851169240Sjfv				    invalidate_domains, flush_domains);
852169240Sjfv				if (ret)
853169240Sjfv					return ret;
854177867Sjfv			}
855169240Sjfv	}
856169240Sjfv
857169240Sjfv	return 0;
858169240Sjfv}
859169240Sjfv
860169240Sjfvstatic int
861169240Sjfvi915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
862169240Sjfv{
863177867Sjfv	u32 plane, flip_mask;
864169240Sjfv	int ret;
865169240Sjfv
866169240Sjfv	/* Check for any pending flips. As we only maintain a flip queue depth
867169240Sjfv	 * of 1, we can simply insert a WAIT for the next display flip prior
868169240Sjfv	 * to executing the batch and avoid stalling the CPU.
869169240Sjfv	 */
870169240Sjfv
871169240Sjfv	for (plane = 0; flips >> plane; plane++) {
872169240Sjfv		if (((flips >> plane) & 1) == 0)
873169240Sjfv			continue;
874169240Sjfv
875169240Sjfv		if (plane)
876169240Sjfv			flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
877169589Sjfv		else
878169240Sjfv			flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
879169240Sjfv
880169240Sjfv		ret = intel_ring_begin(ring, 2);
881176667Sjfv		if (ret)
882176667Sjfv			return ret;
883169240Sjfv
884185353Sjfv		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
885169240Sjfv		intel_ring_emit(ring, MI_NOOP);
886177867Sjfv		intel_ring_advance(ring);
887169240Sjfv	}
888169240Sjfv
889169240Sjfv	return 0;
890169240Sjfv}
891169240Sjfv
892169240Sjfvstatic int
893169240Sjfvi915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
894169240Sjfv				struct list_head *objects)
895169240Sjfv{
896169240Sjfv	struct drm_i915_gem_object *obj;
897169240Sjfv	struct change_domains cd;
898169240Sjfv	int ret;
899169240Sjfv
900169240Sjfv	memset(&cd, 0, sizeof(cd));
901169240Sjfv	list_for_each_entry(obj, objects, exec_list)
902169240Sjfv		i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
903177867Sjfv
904169240Sjfv	if (cd.invalidate_domains | cd.flush_domains) {
905169240Sjfv#if WATCH_EXEC
906169240Sjfv		DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
907169240Sjfv			  __func__,
908169240Sjfv			 cd.invalidate_domains,
909169240Sjfv			 cd.flush_domains);
910169240Sjfv#endif
911169240Sjfv		ret = i915_gem_execbuffer_flush(ring->dev,
912169240Sjfv						cd.invalidate_domains,
913169240Sjfv						cd.flush_domains,
914169240Sjfv						cd.flush_rings);
915169240Sjfv		if (ret)
916169240Sjfv			return ret;
917169240Sjfv	}
918169240Sjfv
919169240Sjfv	if (cd.flips) {
920169240Sjfv		ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
921169240Sjfv		if (ret)
922169240Sjfv			return ret;
923169240Sjfv	}
924169240Sjfv
925169240Sjfv	list_for_each_entry(obj, objects, exec_list) {
926169240Sjfv		ret = i915_gem_object_sync(obj, ring);
927169240Sjfv		if (ret)
928169240Sjfv			return ret;
929169240Sjfv	}
930169240Sjfv
931169240Sjfv	return 0;
932169240Sjfv}
933169240Sjfv
934169240Sjfvstatic bool
935169240Sjfvi915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
936169240Sjfv{
937169240Sjfv	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
938169240Sjfv}
939169240Sjfv
940169240Sjfvstatic int
941169240Sjfvvalidate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count,
942169240Sjfv    vm_page_t ***map, int **maplen)
943169240Sjfv{
944169240Sjfv	vm_page_t *ma;
945169240Sjfv	int i, length, page_count;
946169240Sjfv
947169240Sjfv	/* XXXKIB various limits checking is missing there */
948169589Sjfv	*map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO);
949169589Sjfv	*maplen = malloc(count * sizeof(*maplen), DRM_I915_GEM, M_WAITOK |
950169240Sjfv	    M_ZERO);
951169240Sjfv	for (i = 0; i < count; i++) {
952169240Sjfv		/* First check for malicious input causing overflow */
953169240Sjfv		if (exec[i].relocation_count >
954177867Sjfv		    INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
955169240Sjfv			return -EINVAL;
956169240Sjfv
957169240Sjfv		length = exec[i].relocation_count *
958185353Sjfv		    sizeof(struct drm_i915_gem_relocation_entry);
959169240Sjfv		if (length == 0) {
960177867Sjfv			(*map)[i] = NULL;
961169240Sjfv			continue;
962169240Sjfv		}
963169240Sjfv		/*
964169240Sjfv		 * Since both start and end of the relocation region
965169240Sjfv		 * may be not aligned on the page boundary, be
966169240Sjfv		 * conservative and request a page slot for each
967169240Sjfv		 * partial page.  Thus +2.
968169240Sjfv		 */
969169240Sjfv		page_count = howmany(length, PAGE_SIZE) + 2;
970169240Sjfv		ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t),
971169240Sjfv		    DRM_I915_GEM, M_WAITOK | M_ZERO);
972169240Sjfv		(*maplen)[i] = vm_fault_quick_hold_pages(
973169240Sjfv		    &curproc->p_vmspace->vm_map, exec[i].relocs_ptr, length,
974169240Sjfv		    VM_PROT_READ | VM_PROT_WRITE, ma, page_count);
975169240Sjfv		if ((*maplen)[i] == -1) {
976169240Sjfv			free(ma, DRM_I915_GEM);
977169240Sjfv			(*map)[i] = NULL;
978177867Sjfv			return (-EFAULT);
979169240Sjfv		}
980169240Sjfv	}
981169240Sjfv
982169240Sjfv	return 0;
983169240Sjfv}
984177867Sjfv
985169240Sjfvstatic void
986169240Sjfvi915_gem_execbuffer_move_to_active(struct list_head *objects,
987169240Sjfv				   struct intel_ring_buffer *ring,
988173788Sjfv				   u32 seqno)
989173788Sjfv{
990169240Sjfv	struct drm_i915_gem_object *obj;
991169240Sjfv	uint32_t old_read, old_write;
992173788Sjfv
993173788Sjfv	list_for_each_entry(obj, objects, exec_list) {
994169240Sjfv		old_read = obj->base.read_domains;
995177867Sjfv		old_write = obj->base.write_domain;
996169240Sjfv
997169240Sjfv		obj->base.read_domains = obj->base.pending_read_domains;
998169240Sjfv		obj->base.write_domain = obj->base.pending_write_domain;
999169240Sjfv		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
1000169240Sjfv
1001169240Sjfv		i915_gem_object_move_to_active(obj, ring, seqno);
1002177867Sjfv		if (obj->base.write_domain) {
1003169240Sjfv			obj->dirty = 1;
1004169240Sjfv			obj->pending_gpu_write = true;
1005169240Sjfv			list_move_tail(&obj->gpu_write_list,
1006169240Sjfv				       &ring->gpu_write_list);
1007169240Sjfv			if (obj->pin_count) /* check for potential scanout */
1008177867Sjfv				intel_mark_busy(ring->dev, obj);
1009169240Sjfv		}
1010169240Sjfv		CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x",
1011169240Sjfv		    obj, old_read, old_write);
1012169240Sjfv	}
1013169240Sjfv
1014169240Sjfv	intel_mark_busy(ring->dev, NULL);
1015177867Sjfv}
1016169240Sjfv
1017169240Sjfvint i915_gem_sync_exec_requests;
1018169240Sjfv
1019169240Sjfvstatic void
1020169240Sjfvi915_gem_execbuffer_retire_commands(struct drm_device *dev,
1021169240Sjfv				    struct drm_file *file,
1022169240Sjfv				    struct intel_ring_buffer *ring)
1023169240Sjfv{
1024169240Sjfv	struct drm_i915_gem_request *request;
1025177867Sjfv	u32 invalidate;
1026169240Sjfv
1027169240Sjfv	/*
1028169240Sjfv	 * Ensure that the commands in the batch buffer are
1029169240Sjfv	 * finished before the interrupt fires.
1030177867Sjfv	 *
1031169240Sjfv	 * The sampler always gets flushed on i965 (sigh).
1032169240Sjfv	 */
1033169240Sjfv	invalidate = I915_GEM_DOMAIN_COMMAND;
1034169240Sjfv	if (INTEL_INFO(dev)->gen >= 4)
1035169240Sjfv		invalidate |= I915_GEM_DOMAIN_SAMPLER;
1036169240Sjfv	if (ring->flush(ring, invalidate, 0)) {
1037177867Sjfv		i915_gem_next_request_seqno(ring);
1038169240Sjfv		return;
1039169240Sjfv	}
1040169240Sjfv
1041169240Sjfv	/* Add a breadcrumb for the completion of the batch buffer */
1042169240Sjfv	request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO);
1043169240Sjfv	if (request == NULL || i915_add_request(ring, file, request)) {
1044169240Sjfv		i915_gem_next_request_seqno(ring);
1045169240Sjfv		free(request, DRM_I915_GEM);
1046169240Sjfv	} else if (i915_gem_sync_exec_requests) {
1047169240Sjfv		i915_wait_request(ring, request->seqno);
1048169589Sjfv		i915_gem_retire_requests(dev);
1049169240Sjfv	}
1050169240Sjfv}
1051185353Sjfv
1052169240Sjfvstatic void
1053177867Sjfvi915_gem_fix_mi_batchbuffer_end(struct drm_i915_gem_object *batch_obj,
1054169240Sjfv    uint32_t batch_start_offset, uint32_t batch_len)
1055185353Sjfv{
1056169240Sjfv	char *mkva;
1057169240Sjfv	uint64_t po_r, po_w;
1058169240Sjfv	uint32_t cmd;
1059169240Sjfv
1060177867Sjfv	po_r = batch_obj->base.dev->agp->base + batch_obj->gtt_offset +
1061177867Sjfv	    batch_start_offset + batch_len;
1062177867Sjfv	if (batch_len > 0)
1063169240Sjfv		po_r -= 4;
1064169240Sjfv	mkva = pmap_mapdev_attr(trunc_page(po_r), 2 * PAGE_SIZE,
1065169240Sjfv	    PAT_WRITE_COMBINING);
1066177867Sjfv	po_r &= PAGE_MASK;
1067177867Sjfv	cmd = *(uint32_t *)(mkva + po_r);
1068177867Sjfv
1069169240Sjfv	if (cmd != MI_BATCH_BUFFER_END) {
1070169240Sjfv		/*
1071169240Sjfv		 * batch_len != 0 due to the check at the start of
1072169240Sjfv		 * i915_gem_do_execbuffer
1073169240Sjfv		 */
1074169240Sjfv		if (batch_obj->base.size > batch_start_offset + batch_len) {
1075169240Sjfv			po_w = po_r + 4;
1076169240Sjfv/* DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END !\n"); */
1077169240Sjfv		} else {
1078169240Sjfv			po_w = po_r;
1079169240SjfvDRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END, overwriting last bo cmd !\n");
1080169240Sjfv		}
1081169589Sjfv		*(uint32_t *)(mkva + po_w) = MI_BATCH_BUFFER_END;
1082169240Sjfv	}
1083169240Sjfv
1084185353Sjfv	pmap_unmapdev((vm_offset_t)mkva, 2 * PAGE_SIZE);
1085169240Sjfv}
1086177867Sjfv
1087169240Sjfvint i915_fix_mi_batchbuffer_end = 0;
1088185353Sjfv
1089169240Sjfv static int
1090169240Sjfvi915_reset_gen7_sol_offsets(struct drm_device *dev,
1091169240Sjfv			    struct intel_ring_buffer *ring)
1092169240Sjfv{
1093177867Sjfv	drm_i915_private_t *dev_priv = dev->dev_private;
1094177867Sjfv	int ret, i;
1095177867Sjfv
1096169240Sjfv	if (!IS_GEN7(dev) || ring != &dev_priv->rings[RCS])
1097169240Sjfv		return 0;
1098169240Sjfv
1099169240Sjfv	ret = intel_ring_begin(ring, 4 * 3);
1100169240Sjfv	if (ret)
1101169240Sjfv		return ret;
1102169240Sjfv
1103169240Sjfv	for (i = 0; i < 4; i++) {
1104169240Sjfv		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1105169240Sjfv		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1106169240Sjfv		intel_ring_emit(ring, 0);
1107169589Sjfv	}
1108169240Sjfv
1109169240Sjfv	intel_ring_advance(ring);
1110169240Sjfv
1111173788Sjfv	return 0;
1112169240Sjfv}
1113185353Sjfv
1114169240Sjfvstatic int
1115169240Sjfvi915_gem_do_execbuffer(struct drm_device *dev, void *data,
1116169240Sjfv		       struct drm_file *file,
1117169240Sjfv		       struct drm_i915_gem_execbuffer2 *args,
1118169240Sjfv		       struct drm_i915_gem_exec_object2 *exec)
1119169240Sjfv{
1120169240Sjfv	drm_i915_private_t *dev_priv = dev->dev_private;
1121169240Sjfv	struct list_head objects;
1122169240Sjfv	struct eb_objects *eb;
1123169240Sjfv	struct drm_i915_gem_object *batch_obj;
1124169240Sjfv	struct drm_clip_rect *cliprects = NULL;
1125169240Sjfv	struct intel_ring_buffer *ring;
1126169240Sjfv	vm_page_t **relocs_ma;
1127173788Sjfv	int *relocs_len;
1128173788Sjfv	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1129173788Sjfv	u32 exec_start, exec_len;
1130173788Sjfv	u32 seqno;
1131177867Sjfv	u32 mask;
1132169240Sjfv	int ret, mode, i;
1133169240Sjfv
1134177867Sjfv	if (!i915_gem_check_execbuffer(args)) {
1135169240Sjfv		DRM_DEBUG("execbuf with invalid offset/length\n");
1136169240Sjfv		return -EINVAL;
1137169240Sjfv	}
1138177867Sjfv
1139169240Sjfv	if (args->batch_len == 0)
1140169240Sjfv		return (0);
1141169240Sjfv
1142169240Sjfv	ret = validate_exec_list(exec, args->buffer_count, &relocs_ma,
1143169240Sjfv	    &relocs_len);
1144169240Sjfv	if (ret != 0)
1145177867Sjfv		goto pre_struct_lock_err;
1146169240Sjfv
1147177867Sjfv	switch (args->flags & I915_EXEC_RING_MASK) {
1148169240Sjfv	case I915_EXEC_DEFAULT:
1149177867Sjfv	case I915_EXEC_RENDER:
1150169240Sjfv		ring = &dev_priv->rings[RCS];
1151177867Sjfv		break;
1152169240Sjfv	case I915_EXEC_BSD:
1153177867Sjfv		ring = &dev_priv->rings[VCS];
1154169240Sjfv		if (ctx_id != 0) {
1155177867Sjfv			DRM_DEBUG("Ring %s doesn't support contexts\n",
1156169240Sjfv				  ring->name);
1157177867Sjfv			ret = -EPERM;
1158169240Sjfv			goto pre_struct_lock_err;
1159177867Sjfv		}
1160169240Sjfv		break;
1161177867Sjfv	case I915_EXEC_BLT:
1162169240Sjfv		ring = &dev_priv->rings[BCS];
1163169240Sjfv		if (ctx_id != 0) {
1164169240Sjfv			DRM_DEBUG("Ring %s doesn't support contexts\n",
1165177867Sjfv				  ring->name);
1166169240Sjfv			ret = -EPERM;
1167169240Sjfv			goto pre_struct_lock_err;
1168169240Sjfv		}
1169169240Sjfv		break;
1170169240Sjfv	default:
1171177867Sjfv		DRM_DEBUG("execbuf with unknown ring: %d\n",
1172169240Sjfv			  (int)(args->flags & I915_EXEC_RING_MASK));
1173169240Sjfv		ret = -EINVAL;
1174169240Sjfv		goto pre_struct_lock_err;
1175169240Sjfv	}
1176177867Sjfv	if (!intel_ring_initialized(ring)) {
1177169240Sjfv		DRM_DEBUG("execbuf with invalid ring: %d\n",
1178169240Sjfv			  (int)(args->flags & I915_EXEC_RING_MASK));
1179169240Sjfv		ret = -EINVAL;
1180169240Sjfv		goto pre_struct_lock_err;
1181169240Sjfv	}
1182177867Sjfv
1183169240Sjfv	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1184169240Sjfv	mask = I915_EXEC_CONSTANTS_MASK;
1185169240Sjfv	switch (mode) {
1186169240Sjfv	case I915_EXEC_CONSTANTS_REL_GENERAL:
1187177867Sjfv	case I915_EXEC_CONSTANTS_ABSOLUTE:
1188169240Sjfv	case I915_EXEC_CONSTANTS_REL_SURFACE:
1189169240Sjfv		if (ring == &dev_priv->rings[RCS] &&
1190169240Sjfv		    mode != dev_priv->relative_constants_mode) {
1191169240Sjfv			if (INTEL_INFO(dev)->gen < 4) {
1192169240Sjfv				ret = -EINVAL;
1193169240Sjfv				goto pre_struct_lock_err;
1194169240Sjfv			}
1195169240Sjfv
1196169240Sjfv			if (INTEL_INFO(dev)->gen > 5 &&
1197169240Sjfv			    mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1198169240Sjfv				ret = -EINVAL;
1199169240Sjfv				goto pre_struct_lock_err;
1200169240Sjfv			}
1201169240Sjfv
1202169240Sjfv			/* The HW changed the meaning on this bit on gen6 */
1203169240Sjfv			if (INTEL_INFO(dev)->gen >= 6)
1204169240Sjfv				mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1205177867Sjfv		}
1206169240Sjfv		break;
1207169240Sjfv	default:
1208177867Sjfv		DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
1209169240Sjfv		ret = -EINVAL;
1210169240Sjfv		goto pre_struct_lock_err;
1211169240Sjfv	}
1212169240Sjfv
1213169240Sjfv	if (args->buffer_count < 1) {
1214169240Sjfv		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1215169240Sjfv		ret = -EINVAL;
1216169240Sjfv		goto pre_struct_lock_err;
1217169240Sjfv	}
1218169240Sjfv
1219169240Sjfv	if (args->num_cliprects != 0) {
1220169589Sjfv		if (ring != &dev_priv->rings[RCS]) {
1221169589Sjfv	DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1222169240Sjfv			ret = -EINVAL;
1223169240Sjfv			goto pre_struct_lock_err;
1224185353Sjfv		}
1225169240Sjfv
1226173788Sjfv		if (INTEL_INFO(dev)->gen >= 5) {
1227169240Sjfv			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
1228185353Sjfv			ret = -EINVAL;
1229169240Sjfv			goto pre_struct_lock_err;
1230169240Sjfv		}
1231169240Sjfv
1232169240Sjfv		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1233169240Sjfv			DRM_DEBUG("execbuf with %u cliprects\n",
1234169240Sjfv				  args->num_cliprects);
1235169240Sjfv			ret = -EINVAL;
1236169240Sjfv			goto pre_struct_lock_err;
1237169240Sjfv		}
1238169240Sjfv		cliprects = malloc( sizeof(*cliprects) * args->num_cliprects,
1239169240Sjfv		    DRM_I915_GEM, M_WAITOK | M_ZERO);
1240169240Sjfv		ret = -copyin((void *)(uintptr_t)args->cliprects_ptr, cliprects,
1241169240Sjfv		    sizeof(*cliprects) * args->num_cliprects);
1242169240Sjfv		if (ret != 0)
1243169240Sjfv			goto pre_struct_lock_err;
1244173788Sjfv	}
1245173788Sjfv
1246173788Sjfv	ret = i915_mutex_lock_interruptible(dev);
1247173788Sjfv	if (ret)
1248173788Sjfv		goto pre_struct_lock_err;
1249173788Sjfv
1250177867Sjfv	if (dev_priv->mm.suspended) {
1251173788Sjfv		DRM_UNLOCK(dev);
1252173788Sjfv		ret = -EBUSY;
1253173788Sjfv		goto pre_struct_lock_err;
1254173788Sjfv	}
1255173788Sjfv
1256173788Sjfv	eb = eb_create(args->buffer_count);
1257173788Sjfv	if (eb == NULL) {
1258173788Sjfv		DRM_UNLOCK(dev);
1259173788Sjfv		ret = -ENOMEM;
1260169240Sjfv		goto pre_struct_lock_err;
1261169589Sjfv	}
1262169240Sjfv
1263169240Sjfv	/* Look up object handles */
1264169240Sjfv	INIT_LIST_HEAD(&objects);
1265177867Sjfv	for (i = 0; i < args->buffer_count; i++) {
1266169240Sjfv		struct drm_i915_gem_object *obj;
1267169240Sjfv		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
1268169240Sjfv							exec[i].handle));
1269169240Sjfv		if (&obj->base == NULL) {
1270169240Sjfv			DRM_DEBUG("Invalid object handle %d at index %d\n",
1271185353Sjfv				   exec[i].handle, i);
1272185353Sjfv			/* prevent error path from reading uninitialized data */
1273185353Sjfv			ret = -ENOENT;
1274185353Sjfv			goto err;
1275185353Sjfv		}
1276185353Sjfv
1277185353Sjfv		if (!list_empty(&obj->exec_list)) {
1278185353Sjfv			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
1279185353Sjfv				   obj, exec[i].handle, i);
1280185353Sjfv			ret = -EINVAL;
1281185353Sjfv			goto err;
1282185353Sjfv		}
1283169240Sjfv
1284185353Sjfv		list_add_tail(&obj->exec_list, &objects);
1285185353Sjfv		obj->exec_handle = exec[i].handle;
1286185353Sjfv		obj->exec_entry = &exec[i];
1287185353Sjfv		eb_add_object(eb, obj);
1288185353Sjfv	}
1289185353Sjfv
1290169240Sjfv	/* take note of the batch buffer before we might reorder the lists */
1291185353Sjfv	batch_obj = list_entry(objects.prev,
1292185353Sjfv			       struct drm_i915_gem_object,
1293185353Sjfv			       exec_list);
1294169240Sjfv
1295	/* Move the objects en-masse into the GTT, evicting if necessary. */
1296	ret = i915_gem_execbuffer_reserve(ring, file, &objects);
1297	if (ret)
1298		goto err;
1299
1300	/* The objects are in their final locations, apply the relocations. */
1301	ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
1302	if (ret) {
1303		if (ret == -EFAULT) {
1304			ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
1305			    &objects, eb, exec,	args->buffer_count);
1306			DRM_LOCK_ASSERT(dev);
1307		}
1308		if (ret)
1309			goto err;
1310	}
1311
1312	/* Set the pending read domains for the batch buffer to COMMAND */
1313	if (batch_obj->base.pending_write_domain) {
1314		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1315		ret = -EINVAL;
1316		goto err;
1317	}
1318	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1319
1320	ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
1321	if (ret)
1322		goto err;
1323
1324	ret = i915_switch_context(ring, file, ctx_id);
1325	if (ret)
1326		goto err;
1327
1328	seqno = i915_gem_next_request_seqno(ring);
1329	for (i = 0; i < I915_NUM_RINGS - 1; i++) {
1330		if (seqno < ring->sync_seqno[i]) {
1331			/* The GPU can not handle its semaphore value wrapping,
1332			 * so every billion or so execbuffers, we need to stall
1333			 * the GPU in order to reset the counters.
1334			 */
1335			ret = i915_gpu_idle(dev);
1336			if (ret)
1337				goto err;
1338			i915_gem_retire_requests(dev);
1339
1340			KASSERT(ring->sync_seqno[i] == 0, ("Non-zero sync_seqno"));
1341		}
1342	}
1343
1344	if (ring == &dev_priv->rings[RCS] &&
1345	    mode != dev_priv->relative_constants_mode) {
1346		ret = intel_ring_begin(ring, 4);
1347		if (ret)
1348			goto err;
1349
1350		intel_ring_emit(ring, MI_NOOP);
1351		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1352		intel_ring_emit(ring, INSTPM);
1353		intel_ring_emit(ring, mask << 16 | mode);
1354		intel_ring_advance(ring);
1355
1356		dev_priv->relative_constants_mode = mode;
1357	}
1358
1359	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1360		ret = i915_reset_gen7_sol_offsets(dev, ring);
1361		if (ret)
1362			goto err;
1363	}
1364
1365	exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1366	exec_len = args->batch_len;
1367
1368	if (i915_fix_mi_batchbuffer_end) {
1369		i915_gem_fix_mi_batchbuffer_end(batch_obj,
1370		    args->batch_start_offset, args->batch_len);
1371	}
1372
1373	CTR4(KTR_DRM, "ring_dispatch %s %d exec %x %x", ring->name, seqno,
1374	    exec_start, exec_len);
1375
1376	if (cliprects) {
1377		for (i = 0; i < args->num_cliprects; i++) {
1378			ret = i915_emit_box_p(dev, &cliprects[i],
1379			    args->DR1, args->DR4);
1380			if (ret)
1381				goto err;
1382
1383			ret = ring->dispatch_execbuffer(ring, exec_start,
1384			    exec_len);
1385			if (ret)
1386				goto err;
1387		}
1388	} else {
1389		ret = ring->dispatch_execbuffer(ring, exec_start, exec_len);
1390		if (ret)
1391			goto err;
1392	}
1393
1394	i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
1395	i915_gem_execbuffer_retire_commands(dev, file, ring);
1396
1397err:
1398	eb_destroy(eb);
1399	while (!list_empty(&objects)) {
1400		struct drm_i915_gem_object *obj;
1401
1402		obj = list_first_entry(&objects, struct drm_i915_gem_object,
1403		    exec_list);
1404		list_del_init(&obj->exec_list);
1405		drm_gem_object_unreference(&obj->base);
1406	}
1407	DRM_UNLOCK(dev);
1408
1409pre_struct_lock_err:
1410	for (i = 0; i < args->buffer_count; i++) {
1411		if (relocs_ma[i] != NULL) {
1412			vm_page_unhold_pages(relocs_ma[i], relocs_len[i]);
1413			free(relocs_ma[i], DRM_I915_GEM);
1414		}
1415	}
1416	free(relocs_len, DRM_I915_GEM);
1417	free(relocs_ma, DRM_I915_GEM);
1418	free(cliprects, DRM_I915_GEM);
1419	return ret;
1420}
1421
1422/*
1423 * Legacy execbuffer just creates an exec2 list from the original exec object
1424 * list array and passes it to the real function.
1425 */
1426int
1427i915_gem_execbuffer(struct drm_device *dev, void *data,
1428		    struct drm_file *file)
1429{
1430	struct drm_i915_gem_execbuffer *args = data;
1431	struct drm_i915_gem_execbuffer2 exec2;
1432	struct drm_i915_gem_exec_object *exec_list = NULL;
1433	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1434	int ret, i;
1435
1436	DRM_DEBUG("buffers_ptr %d buffer_count %d len %08x\n",
1437	    (int) args->buffers_ptr, args->buffer_count, args->batch_len);
1438
1439	if (args->buffer_count < 1) {
1440		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1441		return -EINVAL;
1442	}
1443
1444	/* Copy in the exec list from userland */
1445	/* XXXKIB user-controlled malloc size */
1446	exec_list = malloc(sizeof(*exec_list) * args->buffer_count,
1447	    DRM_I915_GEM, M_WAITOK);
1448	exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count,
1449	    DRM_I915_GEM, M_WAITOK);
1450	ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec_list,
1451	    sizeof(*exec_list) * args->buffer_count);
1452	if (ret != 0) {
1453		DRM_DEBUG("copy %d exec entries failed %d\n",
1454			  args->buffer_count, ret);
1455		free(exec_list, DRM_I915_GEM);
1456		free(exec2_list, DRM_I915_GEM);
1457		return (ret);
1458	}
1459
1460	for (i = 0; i < args->buffer_count; i++) {
1461		exec2_list[i].handle = exec_list[i].handle;
1462		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1463		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1464		exec2_list[i].alignment = exec_list[i].alignment;
1465		exec2_list[i].offset = exec_list[i].offset;
1466		if (INTEL_INFO(dev)->gen < 4)
1467			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1468		else
1469			exec2_list[i].flags = 0;
1470	}
1471
1472	exec2.buffers_ptr = args->buffers_ptr;
1473	exec2.buffer_count = args->buffer_count;
1474	exec2.batch_start_offset = args->batch_start_offset;
1475	exec2.batch_len = args->batch_len;
1476	exec2.DR1 = args->DR1;
1477	exec2.DR4 = args->DR4;
1478	exec2.num_cliprects = args->num_cliprects;
1479	exec2.cliprects_ptr = args->cliprects_ptr;
1480	exec2.flags = I915_EXEC_RENDER;
1481	i915_execbuffer2_set_context_id(exec2, 0);
1482
1483	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1484	if (!ret) {
1485		/* Copy the new buffer offsets back to the user's exec list. */
1486		for (i = 0; i < args->buffer_count; i++)
1487			exec_list[i].offset = exec2_list[i].offset;
1488		/* ... and back out to userspace */
1489		ret = -copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr,
1490		    sizeof(*exec_list) * args->buffer_count);
1491		if (ret != 0) {
1492			DRM_DEBUG("failed to copy %d exec entries "
1493				  "back to user (%d)\n",
1494				  args->buffer_count, ret);
1495		}
1496	}
1497
1498	free(exec_list, DRM_I915_GEM);
1499	free(exec2_list, DRM_I915_GEM);
1500	return ret;
1501}
1502
1503int
1504i915_gem_execbuffer2(struct drm_device *dev, void *data,
1505		     struct drm_file *file)
1506{
1507	struct drm_i915_gem_execbuffer2 *args = data;
1508	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1509	int ret;
1510
1511	DRM_DEBUG("buffers_ptr %jx buffer_count %d len %08x\n",
1512	    (uintmax_t)args->buffers_ptr, args->buffer_count, args->batch_len);
1513
1514	if (args->buffer_count < 1 ||
1515	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1516		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1517		return -EINVAL;
1518	}
1519
1520	/* XXXKIB user-controllable malloc size */
1521	exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count,
1522	    DRM_I915_GEM, M_WAITOK);
1523	ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec2_list,
1524	    sizeof(*exec2_list) * args->buffer_count);
1525	if (ret != 0) {
1526		DRM_DEBUG("copy %d exec entries failed %d\n",
1527			  args->buffer_count, ret);
1528		free(exec2_list, DRM_I915_GEM);
1529		return (ret);
1530	}
1531
1532	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1533	if (!ret) {
1534		/* Copy the new buffer offsets back to the user's exec list. */
1535		ret = -copyout(exec2_list, (void *)(uintptr_t)args->buffers_ptr,
1536		    sizeof(*exec2_list) * args->buffer_count);
1537		if (ret) {
1538			DRM_DEBUG("failed to copy %d exec entries "
1539				  "back to user (%d)\n",
1540				  args->buffer_count, ret);
1541		}
1542	}
1543
1544	free(exec2_list, DRM_I915_GEM);
1545	return ret;
1546}
1547