1/*
2 * Copyright © 2008,2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *    Chris Wilson <chris@chris-wilson.co.uk>
26 *
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <dev/drm2/drmP.h>
33#include <dev/drm2/i915/i915_drm.h>
34#include <dev/drm2/i915/i915_drv.h>
35#include <dev/drm2/i915/intel_drv.h>
36
37#include <sys/limits.h>
38#include <sys/sf_buf.h>
39
40struct eb_objects {
41	int and;
42	struct hlist_head buckets[0];
43};
44
45static struct eb_objects *
46eb_create(int size)
47{
48	struct eb_objects *eb;
49	int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
50	BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
51	while (count > size)
52		count >>= 1;
53	eb = malloc(count*sizeof(struct hlist_head) +
54		     sizeof(struct eb_objects),
55		     DRM_I915_GEM, M_WAITOK | M_ZERO);
56	if (eb == NULL)
57		return eb;
58
59	eb->and = count - 1;
60	return eb;
61}
62
63static void
64eb_reset(struct eb_objects *eb)
65{
66	memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
67}
68
69static void
70eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
71{
72	hlist_add_head(&obj->exec_node,
73		       &eb->buckets[obj->exec_handle & eb->and]);
74}
75
76static struct drm_i915_gem_object *
77eb_get_object(struct eb_objects *eb, unsigned long handle)
78{
79	struct hlist_head *head;
80	struct hlist_node *node;
81	struct drm_i915_gem_object *obj;
82
83	head = &eb->buckets[handle & eb->and];
84	hlist_for_each(node, head) {
85		obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
86		if (obj->exec_handle == handle)
87			return obj;
88	}
89
90	return NULL;
91}
92
93static void
94eb_destroy(struct eb_objects *eb)
95{
96	free(eb, DRM_I915_GEM);
97}
98
99static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
100{
101	return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
102		!obj->map_and_fenceable ||
103		obj->cache_level != I915_CACHE_NONE);
104}
105
106static int
107i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
108				   struct eb_objects *eb,
109				   struct drm_i915_gem_relocation_entry *reloc)
110{
111	struct drm_device *dev = obj->base.dev;
112	struct drm_gem_object *target_obj;
113	struct drm_i915_gem_object *target_i915_obj;
114	uint32_t target_offset;
115	int ret = -EINVAL;
116
117	/* we've already hold a reference to all valid objects */
118	target_obj = &eb_get_object(eb, reloc->target_handle)->base;
119	if (unlikely(target_obj == NULL))
120		return -ENOENT;
121
122	target_i915_obj = to_intel_bo(target_obj);
123	target_offset = target_i915_obj->gtt_offset;
124
125	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
126	 * pipe_control writes because the gpu doesn't properly redirect them
127	 * through the ppgtt for non_secure batchbuffers. */
128	if (unlikely(IS_GEN6(dev) &&
129	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
130	    !target_i915_obj->has_global_gtt_mapping)) {
131		i915_gem_gtt_bind_object(target_i915_obj,
132					 target_i915_obj->cache_level);
133	}
134
135	/* Validate that the target is in a valid r/w GPU domain */
136	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
137		DRM_DEBUG("reloc with multiple write domains: "
138			  "obj %p target %d offset %d "
139			  "read %08x write %08x",
140			  obj, reloc->target_handle,
141			  (int) reloc->offset,
142			  reloc->read_domains,
143			  reloc->write_domain);
144		return ret;
145	}
146	if (unlikely((reloc->write_domain | reloc->read_domains)
147		     & ~I915_GEM_GPU_DOMAINS)) {
148		DRM_DEBUG("reloc with read/write non-GPU domains: "
149			  "obj %p target %d offset %d "
150			  "read %08x write %08x",
151			  obj, reloc->target_handle,
152			  (int) reloc->offset,
153			  reloc->read_domains,
154			  reloc->write_domain);
155		return ret;
156	}
157	if (unlikely(reloc->write_domain && target_obj->pending_write_domain &&
158		     reloc->write_domain != target_obj->pending_write_domain)) {
159		DRM_DEBUG("Write domain conflict: "
160			  "obj %p target %d offset %d "
161			  "new %08x old %08x\n",
162			  obj, reloc->target_handle,
163			  (int) reloc->offset,
164			  reloc->write_domain,
165			  target_obj->pending_write_domain);
166		return ret;
167	}
168
169	target_obj->pending_read_domains |= reloc->read_domains;
170	target_obj->pending_write_domain |= reloc->write_domain;
171
172	/* If the relocation already has the right value in it, no
173	 * more work needs to be done.
174	 */
175	if (target_offset == reloc->presumed_offset)
176		return 0;
177
178	/* Check that the relocation address is valid... */
179	if (unlikely(reloc->offset > obj->base.size - 4)) {
180		DRM_DEBUG("Relocation beyond object bounds: "
181			  "obj %p target %d offset %d size %d.\n",
182			  obj, reloc->target_handle,
183			  (int) reloc->offset,
184			  (int) obj->base.size);
185		return ret;
186	}
187	if (unlikely(reloc->offset & 3)) {
188		DRM_DEBUG("Relocation not 4-byte aligned: "
189			  "obj %p target %d offset %d.\n",
190			  obj, reloc->target_handle,
191			  (int) reloc->offset);
192		return ret;
193	}
194
195	/* We can't wait for rendering with pagefaults disabled */
196	if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0)
197		return -EFAULT;
198
199	reloc->delta += target_offset;
200	if (use_cpu_reloc(obj)) {
201		uint32_t page_offset = reloc->offset & PAGE_MASK;
202		char *vaddr;
203		struct sf_buf *sf;
204
205		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
206		if (ret)
207			return ret;
208
209		sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)],
210		    SFB_NOWAIT);
211		if (sf == NULL)
212			return -ENOMEM;
213		vaddr = (void *)sf_buf_kva(sf);
214		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
215		sf_buf_free(sf);
216	} else {
217		struct drm_i915_private *dev_priv = dev->dev_private;
218		uint32_t __iomem *reloc_entry;
219		char __iomem *reloc_page;
220
221		ret = i915_gem_object_set_to_gtt_domain(obj, true);
222		if (ret)
223			return ret;
224
225		ret = i915_gem_object_put_fence(obj);
226		if (ret)
227			return ret;
228
229		/* Map the page containing the relocation we're going to perform.  */
230		reloc->offset += obj->gtt_offset;
231		reloc_page = pmap_mapdev_attr(dev_priv->mm.gtt_base_addr + (reloc->offset &
232		    ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING);
233		reloc_entry = (uint32_t __iomem *)
234			(reloc_page + (reloc->offset & PAGE_MASK));
235		*(volatile uint32_t *)reloc_entry = reloc->delta;
236		pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE);
237	}
238
239	/* and update the user's relocation entry */
240	reloc->presumed_offset = target_offset;
241
242	return 0;
243}
244
245static int
246i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
247				    struct eb_objects *eb)
248{
249#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
250	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
251	struct drm_i915_gem_relocation_entry __user *user_relocs;
252	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
253	int remain, ret;
254
255	user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
256
257	remain = entry->relocation_count;
258	while (remain) {
259		struct drm_i915_gem_relocation_entry *r = stack_reloc;
260		int count = remain;
261		if (count > ARRAY_SIZE(stack_reloc))
262			count = ARRAY_SIZE(stack_reloc);
263		remain -= count;
264
265		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
266			return -EFAULT;
267
268		do {
269			u64 offset = r->presumed_offset;
270
271			ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
272			if (ret)
273				return ret;
274
275			if (r->presumed_offset != offset &&
276			    __copy_to_user_inatomic(&user_relocs->presumed_offset,
277						    &r->presumed_offset,
278						    sizeof(r->presumed_offset))) {
279				return -EFAULT;
280			}
281
282			user_relocs++;
283			r++;
284		} while (--count);
285	}
286
287	return 0;
288#undef N_RELOC
289}
290
291static int
292i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
293					 struct eb_objects *eb,
294					 struct drm_i915_gem_relocation_entry *relocs)
295{
296	const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
297	int i, ret;
298
299	for (i = 0; i < entry->relocation_count; i++) {
300		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
301		if (ret)
302			return ret;
303	}
304
305	return 0;
306}
307
308static int
309i915_gem_execbuffer_relocate(struct drm_device *dev,
310			     struct eb_objects *eb,
311			     struct list_head *objects)
312{
313	struct drm_i915_gem_object *obj;
314	int ret = 0, pflags;
315
316	/* This is the fast path and we cannot handle a pagefault whilst
317	 * holding the struct mutex lest the user pass in the relocations
318	 * contained within a mmaped bo. For in such a case we, the page
319	 * fault handler would call i915_gem_fault() and we would try to
320	 * acquire the struct mutex again. Obviously this is bad and so
321	 * lockdep complains vehemently.
322	 */
323	pflags = vm_fault_disable_pagefaults();
324	list_for_each_entry(obj, objects, exec_list) {
325		ret = i915_gem_execbuffer_relocate_object(obj, eb);
326		if (ret)
327			break;
328	}
329	vm_fault_enable_pagefaults(pflags);
330
331	return ret;
332}
333
334#define  __EXEC_OBJECT_HAS_PIN (1<<31)
335#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
336
337static int
338need_reloc_mappable(struct drm_i915_gem_object *obj)
339{
340	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
341	return entry->relocation_count && !use_cpu_reloc(obj);
342}
343
344static int
345i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
346				   struct intel_ring_buffer *ring)
347{
348	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
349	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
350	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
351	bool need_fence, need_mappable;
352	int ret;
353
354	need_fence =
355		has_fenced_gpu_access &&
356		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
357		obj->tiling_mode != I915_TILING_NONE;
358	need_mappable = need_fence || need_reloc_mappable(obj);
359
360	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false);
361	if (ret)
362		return ret;
363
364	entry->flags |= __EXEC_OBJECT_HAS_PIN;
365
366	if (has_fenced_gpu_access) {
367		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
368			ret = i915_gem_object_get_fence(obj);
369			if (ret)
370				return ret;
371
372			if (i915_gem_object_pin_fence(obj))
373				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
374
375			obj->pending_fenced_gpu_access = true;
376		}
377	}
378
379	/* Ensure ppgtt mapping exists if needed */
380	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
381		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
382				       obj, obj->cache_level);
383
384		obj->has_aliasing_ppgtt_mapping = 1;
385	}
386
387	entry->offset = obj->gtt_offset;
388	return 0;
389}
390
391static void
392i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
393{
394	struct drm_i915_gem_exec_object2 *entry;
395
396	if (!obj->gtt_space)
397		return;
398
399	entry = obj->exec_entry;
400
401	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
402		i915_gem_object_unpin_fence(obj);
403
404	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
405		i915_gem_object_unpin(obj);
406
407	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
408}
409
410static int
411i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
412			    struct drm_file *file,
413			    struct list_head *objects)
414{
415	struct drm_i915_gem_object *obj;
416	struct list_head ordered_objects;
417	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
418	int retry;
419
420	INIT_LIST_HEAD(&ordered_objects);
421	while (!list_empty(objects)) {
422		struct drm_i915_gem_exec_object2 *entry;
423		bool need_fence, need_mappable;
424
425		obj = list_first_entry(objects,
426				       struct drm_i915_gem_object,
427				       exec_list);
428		entry = obj->exec_entry;
429
430		need_fence =
431			has_fenced_gpu_access &&
432			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
433			obj->tiling_mode != I915_TILING_NONE;
434		need_mappable = need_fence || need_reloc_mappable(obj);
435
436		if (need_mappable)
437			list_move(&obj->exec_list, &ordered_objects);
438		else
439			list_move_tail(&obj->exec_list, &ordered_objects);
440
441		obj->base.pending_read_domains = 0;
442		obj->base.pending_write_domain = 0;
443		obj->pending_fenced_gpu_access = false;
444	}
445	list_splice(&ordered_objects, objects);
446
447	/* Attempt to pin all of the buffers into the GTT.
448	 * This is done in 3 phases:
449	 *
450	 * 1a. Unbind all objects that do not match the GTT constraints for
451	 *     the execbuffer (fenceable, mappable, alignment etc).
452	 * 1b. Increment pin count for already bound objects.
453	 * 2.  Bind new objects.
454	 * 3.  Decrement pin count.
455	 *
456	 * This avoid unnecessary unbinding of later objects in order to make
457	 * room for the earlier objects *unless* we need to defragment.
458	 */
459	retry = 0;
460	do {
461		int ret = 0;
462
463		/* Unbind any ill-fitting objects or pin. */
464		list_for_each_entry(obj, objects, exec_list) {
465			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
466			bool need_fence, need_mappable;
467
468			if (!obj->gtt_space)
469				continue;
470
471			need_fence =
472				has_fenced_gpu_access &&
473				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
474				obj->tiling_mode != I915_TILING_NONE;
475			need_mappable = need_fence || need_reloc_mappable(obj);
476
477			if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
478			    (need_mappable && !obj->map_and_fenceable))
479				ret = i915_gem_object_unbind(obj);
480			else
481				ret = i915_gem_execbuffer_reserve_object(obj, ring);
482			if (ret)
483				goto err;
484		}
485
486		/* Bind fresh objects */
487		list_for_each_entry(obj, objects, exec_list) {
488			if (obj->gtt_space)
489				continue;
490
491			ret = i915_gem_execbuffer_reserve_object(obj, ring);
492			if (ret)
493				goto err;
494		}
495
496err:		/* Decrement pin count for bound objects */
497		list_for_each_entry(obj, objects, exec_list)
498			i915_gem_execbuffer_unreserve_object(obj);
499
500		if (ret != -ENOSPC || retry++)
501			return ret;
502
503		ret = i915_gem_evict_everything(ring->dev);
504		if (ret)
505			return ret;
506	} while (1);
507}
508
509static int
510i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
511				  struct drm_file *file,
512				  struct intel_ring_buffer *ring,
513				  struct list_head *objects,
514				  struct eb_objects *eb,
515				  struct drm_i915_gem_exec_object2 *exec,
516				  int count)
517{
518	struct drm_i915_gem_relocation_entry *reloc;
519	struct drm_i915_gem_object *obj;
520	int *reloc_offset;
521	int i, total, ret;
522
523	/* We may process another execbuffer during the unlock... */
524	while (!list_empty(objects)) {
525		obj = list_first_entry(objects,
526				       struct drm_i915_gem_object,
527				       exec_list);
528		list_del_init(&obj->exec_list);
529		drm_gem_object_unreference(&obj->base);
530	}
531
532	DRM_UNLOCK(dev);
533
534	total = 0;
535	for (i = 0; i < count; i++)
536		total += exec[i].relocation_count;
537
538	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
539	reloc = drm_malloc_ab(total, sizeof(*reloc));
540	if (reloc == NULL || reloc_offset == NULL) {
541		drm_free_large(reloc);
542		drm_free_large(reloc_offset);
543		DRM_LOCK(dev);
544		return -ENOMEM;
545	}
546
547	total = 0;
548	for (i = 0; i < count; i++) {
549		struct drm_i915_gem_relocation_entry __user *user_relocs;
550		u64 invalid_offset = (u64)-1;
551		int j;
552
553		user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr;
554
555		if (copy_from_user(reloc+total, user_relocs,
556				   exec[i].relocation_count * sizeof(*reloc))) {
557			ret = -EFAULT;
558			DRM_LOCK(dev);
559			goto err;
560		}
561
562		/* As we do not update the known relocation offsets after
563		 * relocating (due to the complexities in lock handling),
564		 * we need to mark them as invalid now so that we force the
565		 * relocation processing next time. Just in case the target
566		 * object is evicted and then rebound into its old
567		 * presumed_offset before the next execbuffer - if that
568		 * happened we would make the mistake of assuming that the
569		 * relocations were valid.
570		 */
571		for (j = 0; j < exec[i].relocation_count; j++) {
572			if (copy_to_user(&user_relocs[j].presumed_offset,
573					 &invalid_offset,
574					 sizeof(invalid_offset))) {
575				ret = -EFAULT;
576				DRM_LOCK(dev);
577				goto err;
578			}
579		}
580
581		reloc_offset[i] = total;
582		total += exec[i].relocation_count;
583	}
584
585	ret = i915_mutex_lock_interruptible(dev);
586	if (ret) {
587		DRM_LOCK(dev);
588		goto err;
589	}
590
591	/* reacquire the objects */
592	eb_reset(eb);
593	for (i = 0; i < count; i++) {
594		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
595							exec[i].handle));
596		if (&obj->base == NULL) {
597			DRM_DEBUG("Invalid object handle %d at index %d\n",
598				   exec[i].handle, i);
599			ret = -ENOENT;
600			goto err;
601		}
602
603		list_add_tail(&obj->exec_list, objects);
604		obj->exec_handle = exec[i].handle;
605		obj->exec_entry = &exec[i];
606		eb_add_object(eb, obj);
607	}
608
609	ret = i915_gem_execbuffer_reserve(ring, file, objects);
610	if (ret)
611		goto err;
612
613	list_for_each_entry(obj, objects, exec_list) {
614		int offset = obj->exec_entry - exec;
615		ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
616							       reloc + reloc_offset[offset]);
617		if (ret)
618			goto err;
619	}
620
621	/* Leave the user relocations as are, this is the painfully slow path,
622	 * and we want to avoid the complication of dropping the lock whilst
623	 * having buffers reserved in the aperture and so causing spurious
624	 * ENOSPC for random operations.
625	 */
626
627err:
628	drm_free_large(reloc);
629	drm_free_large(reloc_offset);
630	return ret;
631}
632
633static int
634i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
635{
636	u32 plane, flip_mask;
637	int ret;
638
639	/* Check for any pending flips. As we only maintain a flip queue depth
640	 * of 1, we can simply insert a WAIT for the next display flip prior
641	 * to executing the batch and avoid stalling the CPU.
642	 */
643
644	for (plane = 0; flips >> plane; plane++) {
645		if (((flips >> plane) & 1) == 0)
646			continue;
647
648		if (plane)
649			flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
650		else
651			flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
652
653		ret = intel_ring_begin(ring, 2);
654		if (ret)
655			return ret;
656
657		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
658		intel_ring_emit(ring, MI_NOOP);
659		intel_ring_advance(ring);
660	}
661
662	return 0;
663}
664
665static int
666i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
667				struct list_head *objects)
668{
669	struct drm_i915_gem_object *obj;
670	uint32_t flush_domains = 0;
671	uint32_t flips = 0;
672	int ret;
673
674	list_for_each_entry(obj, objects, exec_list) {
675		ret = i915_gem_object_sync(obj, ring);
676		if (ret)
677			return ret;
678
679		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
680			i915_gem_clflush_object(obj);
681
682		if (obj->base.pending_write_domain)
683			flips |= atomic_read(&obj->pending_flip);
684
685		flush_domains |= obj->base.write_domain;
686	}
687
688	if (flips) {
689		ret = i915_gem_execbuffer_wait_for_flips(ring, flips);
690		if (ret)
691			return ret;
692	}
693
694	if (flush_domains & I915_GEM_DOMAIN_CPU)
695		i915_gem_chipset_flush(ring->dev);
696
697	if (flush_domains & I915_GEM_DOMAIN_GTT)
698		wmb();
699
700	/* Unconditionally invalidate gpu caches and ensure that we do flush
701	 * any residual writes from the previous batch.
702	 */
703	return intel_ring_invalidate_all_caches(ring);
704}
705
706static bool
707i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
708{
709	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
710}
711
712static int
713validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
714		   int count, vm_page_t ***map, int **maplen)
715{
716	int i;
717	int relocs_total = 0;
718	int relocs_max = INT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
719	vm_page_t *ma;
720
721	/* XXXKIB various limits checking is missing there */
722	*map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO);
723	*maplen = malloc(count * sizeof(*maplen), DRM_I915_GEM, M_WAITOK |
724	    M_ZERO);
725
726	for (i = 0; i < count; i++) {
727		char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
728		int length; /* limited by fault_in_pages_readable() */
729
730		/* First check for malicious input causing overflow in
731		 * the worst case where we need to allocate the entire
732		 * relocation tree as a single array.
733		 */
734		if (exec[i].relocation_count > relocs_max - relocs_total)
735			return -EINVAL;
736		relocs_total += exec[i].relocation_count;
737
738		length = exec[i].relocation_count *
739			sizeof(struct drm_i915_gem_relocation_entry);
740		if (length == 0) {
741			(*map)[i] = NULL;
742			continue;
743		}
744
745		/*
746		 * Since both start and end of the relocation region
747		 * may be not aligned on the page boundary, be
748		 * conservative and request a page slot for each
749		 * partial page.  Thus +2.
750		 */
751		int page_count;
752
753		page_count = howmany(length, PAGE_SIZE) + 2;
754		ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t),
755		    DRM_I915_GEM, M_WAITOK | M_ZERO);
756		(*maplen)[i] = vm_fault_quick_hold_pages(
757		    &curproc->p_vmspace->vm_map, (vm_offset_t)ptr, length,
758		    VM_PROT_READ | VM_PROT_WRITE, ma, page_count);
759		if ((*maplen)[i] == -1) {
760			free(ma, DRM_I915_GEM);
761			(*map)[i] = NULL;
762			return -EFAULT;
763		}
764	}
765
766	return 0;
767}
768
769static void
770i915_gem_execbuffer_move_to_active(struct list_head *objects,
771				   struct intel_ring_buffer *ring)
772{
773	struct drm_i915_gem_object *obj;
774
775	list_for_each_entry(obj, objects, exec_list) {
776#if defined(KTR)
777		u32 old_read = obj->base.read_domains;
778		u32 old_write = obj->base.write_domain;
779#endif
780
781		obj->base.read_domains = obj->base.pending_read_domains;
782		obj->base.write_domain = obj->base.pending_write_domain;
783		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
784
785		i915_gem_object_move_to_active(obj, ring);
786		if (obj->base.write_domain) {
787			obj->dirty = 1;
788			obj->last_write_seqno = intel_ring_get_seqno(ring);
789			if (obj->pin_count) /* check for potential scanout */
790				intel_mark_fb_busy(obj);
791		}
792
793		CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x",
794		    obj, old_read, old_write);
795	}
796}
797
798static void
799i915_gem_execbuffer_retire_commands(struct drm_device *dev,
800				    struct drm_file *file,
801				    struct intel_ring_buffer *ring)
802{
803	/* Unconditionally force add_request to emit a full flush. */
804	ring->gpu_caches_dirty = true;
805
806	/* Add a breadcrumb for the completion of the batch buffer */
807	(void)i915_add_request(ring, file, NULL);
808}
809
810static int
811i915_reset_gen7_sol_offsets(struct drm_device *dev,
812			    struct intel_ring_buffer *ring)
813{
814	drm_i915_private_t *dev_priv = dev->dev_private;
815	int ret, i;
816
817	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS])
818		return 0;
819
820	ret = intel_ring_begin(ring, 4 * 3);
821	if (ret)
822		return ret;
823
824	for (i = 0; i < 4; i++) {
825		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
826		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
827		intel_ring_emit(ring, 0);
828	}
829
830	intel_ring_advance(ring);
831
832	return 0;
833}
834
835static int
836i915_gem_do_execbuffer(struct drm_device *dev, void *data,
837		       struct drm_file *file,
838		       struct drm_i915_gem_execbuffer2 *args,
839		       struct drm_i915_gem_exec_object2 *exec)
840{
841	drm_i915_private_t *dev_priv = dev->dev_private;
842	struct list_head objects;
843	struct eb_objects *eb;
844	struct drm_i915_gem_object *batch_obj;
845	struct drm_clip_rect *cliprects = NULL;
846	struct intel_ring_buffer *ring;
847	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
848	u32 exec_start, exec_len;
849	u32 mask;
850	u32 flags;
851	int ret, mode, i;
852	vm_page_t **relocs_ma;
853	int *relocs_len;
854
855	if (!i915_gem_check_execbuffer(args)) {
856		DRM_DEBUG("execbuf with invalid offset/length\n");
857		return -EINVAL;
858	}
859
860	ret = validate_exec_list(exec, args->buffer_count,
861	    &relocs_ma, &relocs_len);
862	if (ret)
863		goto pre_mutex_err;
864
865	flags = 0;
866	if (args->flags & I915_EXEC_SECURE) {
867		if (!file->is_master || !capable(CAP_SYS_ADMIN)) {
868			ret = -EPERM;
869			goto pre_mutex_err;
870		}
871
872		flags |= I915_DISPATCH_SECURE;
873	}
874	if (args->flags & I915_EXEC_IS_PINNED)
875		flags |= I915_DISPATCH_PINNED;
876
877	switch (args->flags & I915_EXEC_RING_MASK) {
878	case I915_EXEC_DEFAULT:
879	case I915_EXEC_RENDER:
880		ring = &dev_priv->ring[RCS];
881		break;
882	case I915_EXEC_BSD:
883		ring = &dev_priv->ring[VCS];
884		if (ctx_id != 0) {
885			DRM_DEBUG("Ring %s doesn't support contexts\n",
886				  ring->name);
887			ret = -EPERM;
888			goto pre_mutex_err;
889		}
890		break;
891	case I915_EXEC_BLT:
892		ring = &dev_priv->ring[BCS];
893		if (ctx_id != 0) {
894			DRM_DEBUG("Ring %s doesn't support contexts\n",
895				  ring->name);
896			ret = -EPERM;
897			goto pre_mutex_err;
898		}
899		break;
900	default:
901		DRM_DEBUG("execbuf with unknown ring: %d\n",
902			  (int)(args->flags & I915_EXEC_RING_MASK));
903		ret = -EINVAL;
904		goto pre_mutex_err;
905	}
906	if (!intel_ring_initialized(ring)) {
907		DRM_DEBUG("execbuf with invalid ring: %d\n",
908			  (int)(args->flags & I915_EXEC_RING_MASK));
909		ret = -EINVAL;
910		goto pre_mutex_err;
911	}
912
913	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
914	mask = I915_EXEC_CONSTANTS_MASK;
915	switch (mode) {
916	case I915_EXEC_CONSTANTS_REL_GENERAL:
917	case I915_EXEC_CONSTANTS_ABSOLUTE:
918	case I915_EXEC_CONSTANTS_REL_SURFACE:
919		if (ring == &dev_priv->ring[RCS] &&
920		    mode != dev_priv->relative_constants_mode) {
921			if (INTEL_INFO(dev)->gen < 4) {
922				ret = -EINVAL;
923				goto pre_mutex_err;
924			}
925
926			if (INTEL_INFO(dev)->gen > 5 &&
927			    mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
928				ret = -EINVAL;
929				goto pre_mutex_err;
930			}
931
932			/* The HW changed the meaning on this bit on gen6 */
933			if (INTEL_INFO(dev)->gen >= 6)
934				mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
935		}
936		break;
937	default:
938		DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
939		ret = -EINVAL;
940		goto pre_mutex_err;
941	}
942
943	if (args->buffer_count < 1) {
944		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
945		ret = -EINVAL;
946		goto pre_mutex_err;
947	}
948
949	if (args->num_cliprects != 0) {
950		if (ring != &dev_priv->ring[RCS]) {
951			DRM_DEBUG("clip rectangles are only valid with the render ring\n");
952			ret = -EINVAL;
953			goto pre_mutex_err;
954		}
955
956		if (INTEL_INFO(dev)->gen >= 5) {
957			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
958			ret = -EINVAL;
959			goto pre_mutex_err;
960		}
961
962		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
963			DRM_DEBUG("execbuf with %u cliprects\n",
964				  args->num_cliprects);
965			ret = -EINVAL;
966			goto pre_mutex_err;
967		}
968
969		cliprects = malloc(args->num_cliprects * sizeof(*cliprects),
970				    DRM_I915_GEM, M_WAITOK);
971		if (cliprects == NULL) {
972			ret = -ENOMEM;
973			goto pre_mutex_err;
974		}
975
976		if (copy_from_user(cliprects,
977				     (struct drm_clip_rect __user *)(uintptr_t)
978				     args->cliprects_ptr,
979				     sizeof(*cliprects)*args->num_cliprects)) {
980			ret = -EFAULT;
981			goto pre_mutex_err;
982		}
983	}
984
985	ret = i915_mutex_lock_interruptible(dev);
986	if (ret)
987		goto pre_mutex_err;
988
989	if (dev_priv->mm.suspended) {
990		DRM_UNLOCK(dev);
991		ret = -EBUSY;
992		goto pre_mutex_err;
993	}
994
995	eb = eb_create(args->buffer_count);
996	if (eb == NULL) {
997		DRM_UNLOCK(dev);
998		ret = -ENOMEM;
999		goto pre_mutex_err;
1000	}
1001
1002	/* Look up object handles */
1003	INIT_LIST_HEAD(&objects);
1004	for (i = 0; i < args->buffer_count; i++) {
1005		struct drm_i915_gem_object *obj;
1006
1007		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
1008							exec[i].handle));
1009		if (&obj->base == NULL) {
1010			DRM_DEBUG("Invalid object handle %d at index %d\n",
1011				   exec[i].handle, i);
1012			/* prevent error path from reading uninitialized data */
1013			ret = -ENOENT;
1014			goto err;
1015		}
1016
1017		if (!list_empty(&obj->exec_list)) {
1018			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
1019				   obj, exec[i].handle, i);
1020			ret = -EINVAL;
1021			goto err;
1022		}
1023
1024		list_add_tail(&obj->exec_list, &objects);
1025		obj->exec_handle = exec[i].handle;
1026		obj->exec_entry = &exec[i];
1027		eb_add_object(eb, obj);
1028	}
1029
1030	/* take note of the batch buffer before we might reorder the lists */
1031	batch_obj = list_entry(objects.prev,
1032			       struct drm_i915_gem_object,
1033			       exec_list);
1034
1035	/* Move the objects en-masse into the GTT, evicting if necessary. */
1036	ret = i915_gem_execbuffer_reserve(ring, file, &objects);
1037	if (ret)
1038		goto err;
1039
1040	/* The objects are in their final locations, apply the relocations. */
1041	ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
1042	if (ret) {
1043		if (ret == -EFAULT) {
1044			ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
1045								&objects, eb,
1046								exec,
1047								args->buffer_count);
1048			DRM_LOCK_ASSERT(dev);
1049		}
1050		if (ret)
1051			goto err;
1052	}
1053
1054	/* Set the pending read domains for the batch buffer to COMMAND */
1055	if (batch_obj->base.pending_write_domain) {
1056		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1057		ret = -EINVAL;
1058		goto err;
1059	}
1060	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1061
1062	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1063	 * batch" bit. Hence we need to pin secure batches into the global gtt.
1064	 * hsw should have this fixed, but let's be paranoid and do it
1065	 * unconditionally for now. */
1066	if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
1067		i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
1068
1069	ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
1070	if (ret)
1071		goto err;
1072
1073	ret = i915_switch_context(ring, file, ctx_id);
1074	if (ret)
1075		goto err;
1076
1077	if (ring == &dev_priv->ring[RCS] &&
1078	    mode != dev_priv->relative_constants_mode) {
1079		ret = intel_ring_begin(ring, 4);
1080		if (ret)
1081				goto err;
1082
1083		intel_ring_emit(ring, MI_NOOP);
1084		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1085		intel_ring_emit(ring, INSTPM);
1086		intel_ring_emit(ring, mask << 16 | mode);
1087		intel_ring_advance(ring);
1088
1089		dev_priv->relative_constants_mode = mode;
1090	}
1091
1092	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1093		ret = i915_reset_gen7_sol_offsets(dev, ring);
1094		if (ret)
1095			goto err;
1096	}
1097
1098	exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1099	exec_len = args->batch_len;
1100	if (cliprects) {
1101		for (i = 0; i < args->num_cliprects; i++) {
1102			ret = i915_emit_box(dev, &cliprects[i],
1103					    args->DR1, args->DR4);
1104			if (ret)
1105				goto err;
1106
1107			ret = ring->dispatch_execbuffer(ring,
1108							exec_start, exec_len,
1109							flags);
1110			if (ret)
1111				goto err;
1112		}
1113	} else {
1114		ret = ring->dispatch_execbuffer(ring,
1115						exec_start, exec_len,
1116						flags);
1117		if (ret)
1118			goto err;
1119	}
1120
1121	CTR3(KTR_DRM, "ring_dispatch ring=%s seqno=%d flags=%u", ring->name,
1122	    intel_ring_get_seqno(ring), flags);
1123
1124	i915_gem_execbuffer_move_to_active(&objects, ring);
1125	i915_gem_execbuffer_retire_commands(dev, file, ring);
1126
1127err:
1128	eb_destroy(eb);
1129	while (!list_empty(&objects)) {
1130		struct drm_i915_gem_object *obj;
1131
1132		obj = list_first_entry(&objects,
1133				       struct drm_i915_gem_object,
1134				       exec_list);
1135		list_del_init(&obj->exec_list);
1136		drm_gem_object_unreference(&obj->base);
1137	}
1138
1139	DRM_UNLOCK(dev);
1140
1141pre_mutex_err:
1142	for (i = 0; i < args->buffer_count; i++) {
1143		if (relocs_ma[i] != NULL) {
1144			vm_page_unhold_pages(relocs_ma[i], relocs_len[i]);
1145			free(relocs_ma[i], DRM_I915_GEM);
1146		}
1147	}
1148	free(relocs_len, DRM_I915_GEM);
1149	free(relocs_ma, DRM_I915_GEM);
1150	free(cliprects, DRM_I915_GEM);
1151	return ret;
1152}
1153
1154/*
1155 * Legacy execbuffer just creates an exec2 list from the original exec object
1156 * list array and passes it to the real function.
1157 */
1158int
1159i915_gem_execbuffer(struct drm_device *dev, void *data,
1160		    struct drm_file *file)
1161{
1162	struct drm_i915_gem_execbuffer *args = data;
1163	struct drm_i915_gem_execbuffer2 exec2;
1164	struct drm_i915_gem_exec_object *exec_list = NULL;
1165	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1166	int ret, i;
1167
1168	if (args->buffer_count < 1) {
1169		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1170		return -EINVAL;
1171	}
1172
1173	/* Copy in the exec list from userland */
1174	/* XXXKIB user-controlled malloc size */
1175	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1176	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1177	if (exec_list == NULL || exec2_list == NULL) {
1178		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1179			  args->buffer_count);
1180		drm_free_large(exec_list);
1181		drm_free_large(exec2_list);
1182		return -ENOMEM;
1183	}
1184	ret = copy_from_user(exec_list,
1185			     (void __user *)(uintptr_t)args->buffers_ptr,
1186			     sizeof(*exec_list) * args->buffer_count);
1187	if (ret != 0) {
1188		DRM_DEBUG("copy %d exec entries failed %d\n",
1189			  args->buffer_count, ret);
1190		drm_free_large(exec_list);
1191		drm_free_large(exec2_list);
1192		return -EFAULT;
1193	}
1194
1195	for (i = 0; i < args->buffer_count; i++) {
1196		exec2_list[i].handle = exec_list[i].handle;
1197		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1198		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1199		exec2_list[i].alignment = exec_list[i].alignment;
1200		exec2_list[i].offset = exec_list[i].offset;
1201		if (INTEL_INFO(dev)->gen < 4)
1202			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1203		else
1204			exec2_list[i].flags = 0;
1205	}
1206
1207	exec2.buffers_ptr = args->buffers_ptr;
1208	exec2.buffer_count = args->buffer_count;
1209	exec2.batch_start_offset = args->batch_start_offset;
1210	exec2.batch_len = args->batch_len;
1211	exec2.DR1 = args->DR1;
1212	exec2.DR4 = args->DR4;
1213	exec2.num_cliprects = args->num_cliprects;
1214	exec2.cliprects_ptr = args->cliprects_ptr;
1215	exec2.flags = I915_EXEC_RENDER;
1216	i915_execbuffer2_set_context_id(exec2, 0);
1217
1218	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1219	if (!ret) {
1220		/* Copy the new buffer offsets back to the user's exec list. */
1221		for (i = 0; i < args->buffer_count; i++)
1222			exec_list[i].offset = exec2_list[i].offset;
1223		/* ... and back out to userspace */
1224		ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
1225				   exec_list,
1226				   sizeof(*exec_list) * args->buffer_count);
1227		if (ret) {
1228			ret = -EFAULT;
1229			DRM_DEBUG("failed to copy %d exec entries "
1230				  "back to user (%d)\n",
1231				  args->buffer_count, ret);
1232		}
1233	}
1234
1235	drm_free_large(exec_list);
1236	drm_free_large(exec2_list);
1237	return ret;
1238}
1239
1240int
1241i915_gem_execbuffer2(struct drm_device *dev, void *data,
1242		     struct drm_file *file)
1243{
1244	struct drm_i915_gem_execbuffer2 *args = data;
1245	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1246	int ret;
1247
1248	if (args->buffer_count < 1 ||
1249	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1250		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1251		return -EINVAL;
1252	}
1253
1254	/* XXXKIB user-controllable malloc size */
1255	exec2_list = malloc(sizeof(*exec2_list)*args->buffer_count,
1256			     DRM_I915_GEM, M_WAITOK);
1257	if (exec2_list == NULL) {
1258		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1259			  args->buffer_count);
1260		return -ENOMEM;
1261	}
1262	ret = copy_from_user(exec2_list,
1263			     (struct drm_i915_relocation_entry __user *)
1264			     (uintptr_t) args->buffers_ptr,
1265			     sizeof(*exec2_list) * args->buffer_count);
1266	if (ret != 0) {
1267		DRM_DEBUG("copy %d exec entries failed %d\n",
1268			  args->buffer_count, ret);
1269		free(exec2_list, DRM_I915_GEM);
1270		return -EFAULT;
1271	}
1272
1273	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1274	if (!ret) {
1275		/* Copy the new buffer offsets back to the user's exec list. */
1276		ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
1277				   exec2_list,
1278				   sizeof(*exec2_list) * args->buffer_count);
1279		if (ret) {
1280			ret = -EFAULT;
1281			DRM_DEBUG("failed to copy %d exec entries "
1282				  "back to user (%d)\n",
1283				  args->buffer_count, ret);
1284		}
1285	}
1286
1287	free(exec2_list, DRM_I915_GEM);
1288	return ret;
1289}
1290