• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/drivers/gpu/drm/i915/
1/*
2 * Copyright �� 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28#include "drmP.h"
29#include "drm.h"
30#include "i915_drm.h"
31#include "i915_drv.h"
32#include "i915_trace.h"
33#include "intel_drv.h"
34#include <linux/slab.h>
35#include <linux/swap.h>
36#include <linux/pci.h>
37#include <linux/intel-gtt.h>
38
39static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj);
40static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
41static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
42static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
43static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
44					     int write);
45static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
46						     uint64_t offset,
47						     uint64_t size);
48static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
49static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
50static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
51					   unsigned alignment);
52static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
53static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
54				struct drm_i915_gem_pwrite *args,
55				struct drm_file *file_priv);
56static void i915_gem_free_object_tail(struct drm_gem_object *obj);
57
58static LIST_HEAD(shrink_list);
59static DEFINE_SPINLOCK(shrink_list_lock);
60
61static inline bool
62i915_gem_object_is_inactive(struct drm_i915_gem_object *obj_priv)
63{
64	return obj_priv->gtt_space &&
65		!obj_priv->active &&
66		obj_priv->pin_count == 0;
67}
68
69int i915_gem_do_init(struct drm_device *dev, unsigned long start,
70		     unsigned long end)
71{
72	drm_i915_private_t *dev_priv = dev->dev_private;
73
74	if (start >= end ||
75	    (start & (PAGE_SIZE - 1)) != 0 ||
76	    (end & (PAGE_SIZE - 1)) != 0) {
77		return -EINVAL;
78	}
79
80	drm_mm_init(&dev_priv->mm.gtt_space, start,
81		    end - start);
82
83	dev->gtt_total = (uint32_t) (end - start);
84
85	return 0;
86}
87
88int
89i915_gem_init_ioctl(struct drm_device *dev, void *data,
90		    struct drm_file *file_priv)
91{
92	struct drm_i915_gem_init *args = data;
93	int ret;
94
95	mutex_lock(&dev->struct_mutex);
96	ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end);
97	mutex_unlock(&dev->struct_mutex);
98
99	return ret;
100}
101
102int
103i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
104			    struct drm_file *file_priv)
105{
106	struct drm_i915_gem_get_aperture *args = data;
107
108	if (!(dev->driver->driver_features & DRIVER_GEM))
109		return -ENODEV;
110
111	args->aper_size = dev->gtt_total;
112	args->aper_available_size = (args->aper_size -
113				     atomic_read(&dev->pin_memory));
114
115	return 0;
116}
117
118
119/**
120 * Creates a new mm object and returns a handle to it.
121 */
122int
123i915_gem_create_ioctl(struct drm_device *dev, void *data,
124		      struct drm_file *file_priv)
125{
126	struct drm_i915_gem_create *args = data;
127	struct drm_gem_object *obj;
128	int ret;
129	u32 handle;
130
131	args->size = roundup(args->size, PAGE_SIZE);
132
133	/* Allocate the new object */
134	obj = i915_gem_alloc_object(dev, args->size);
135	if (obj == NULL)
136		return -ENOMEM;
137
138	ret = drm_gem_handle_create(file_priv, obj, &handle);
139	/* drop reference from allocate - handle holds it now */
140	drm_gem_object_unreference_unlocked(obj);
141	if (ret) {
142		return ret;
143	}
144
145	args->handle = handle;
146	return 0;
147}
148
149static inline int
150fast_shmem_read(struct page **pages,
151		loff_t page_base, int page_offset,
152		char __user *data,
153		int length)
154{
155	char __iomem *vaddr;
156	int unwritten;
157
158	vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
159	if (vaddr == NULL)
160		return -ENOMEM;
161	unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length);
162	kunmap_atomic(vaddr, KM_USER0);
163
164	if (unwritten)
165		return -EFAULT;
166
167	return 0;
168}
169
170static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
171{
172	drm_i915_private_t *dev_priv = obj->dev->dev_private;
173	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
174
175	return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
176		obj_priv->tiling_mode != I915_TILING_NONE;
177}
178
179static inline void
180slow_shmem_copy(struct page *dst_page,
181		int dst_offset,
182		struct page *src_page,
183		int src_offset,
184		int length)
185{
186	char *dst_vaddr, *src_vaddr;
187
188	dst_vaddr = kmap(dst_page);
189	src_vaddr = kmap(src_page);
190
191	memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
192
193	kunmap(src_page);
194	kunmap(dst_page);
195}
196
197static inline void
198slow_shmem_bit17_copy(struct page *gpu_page,
199		      int gpu_offset,
200		      struct page *cpu_page,
201		      int cpu_offset,
202		      int length,
203		      int is_read)
204{
205	char *gpu_vaddr, *cpu_vaddr;
206
207	/* Use the unswizzled path if this page isn't affected. */
208	if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
209		if (is_read)
210			return slow_shmem_copy(cpu_page, cpu_offset,
211					       gpu_page, gpu_offset, length);
212		else
213			return slow_shmem_copy(gpu_page, gpu_offset,
214					       cpu_page, cpu_offset, length);
215	}
216
217	gpu_vaddr = kmap(gpu_page);
218	cpu_vaddr = kmap(cpu_page);
219
220	/* Copy the data, XORing A6 with A17 (1). The user already knows he's
221	 * XORing with the other bits (A9 for Y, A9 and A10 for X)
222	 */
223	while (length > 0) {
224		int cacheline_end = ALIGN(gpu_offset + 1, 64);
225		int this_length = min(cacheline_end - gpu_offset, length);
226		int swizzled_gpu_offset = gpu_offset ^ 64;
227
228		if (is_read) {
229			memcpy(cpu_vaddr + cpu_offset,
230			       gpu_vaddr + swizzled_gpu_offset,
231			       this_length);
232		} else {
233			memcpy(gpu_vaddr + swizzled_gpu_offset,
234			       cpu_vaddr + cpu_offset,
235			       this_length);
236		}
237		cpu_offset += this_length;
238		gpu_offset += this_length;
239		length -= this_length;
240	}
241
242	kunmap(cpu_page);
243	kunmap(gpu_page);
244}
245
246/**
247 * This is the fast shmem pread path, which attempts to copy_from_user directly
248 * from the backing pages of the object to the user's address space.  On a
249 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
250 */
251static int
252i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
253			  struct drm_i915_gem_pread *args,
254			  struct drm_file *file_priv)
255{
256	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
257	ssize_t remain;
258	loff_t offset, page_base;
259	char __user *user_data;
260	int page_offset, page_length;
261	int ret;
262
263	user_data = (char __user *) (uintptr_t) args->data_ptr;
264	remain = args->size;
265
266	mutex_lock(&dev->struct_mutex);
267
268	ret = i915_gem_object_get_pages(obj, 0);
269	if (ret != 0)
270		goto fail_unlock;
271
272	ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
273							args->size);
274	if (ret != 0)
275		goto fail_put_pages;
276
277	obj_priv = to_intel_bo(obj);
278	offset = args->offset;
279
280	while (remain > 0) {
281		/* Operation in this page
282		 *
283		 * page_base = page offset within aperture
284		 * page_offset = offset within page
285		 * page_length = bytes to copy for this page
286		 */
287		page_base = (offset & ~(PAGE_SIZE-1));
288		page_offset = offset & (PAGE_SIZE-1);
289		page_length = remain;
290		if ((page_offset + remain) > PAGE_SIZE)
291			page_length = PAGE_SIZE - page_offset;
292
293		ret = fast_shmem_read(obj_priv->pages,
294				      page_base, page_offset,
295				      user_data, page_length);
296		if (ret)
297			goto fail_put_pages;
298
299		remain -= page_length;
300		user_data += page_length;
301		offset += page_length;
302	}
303
304fail_put_pages:
305	i915_gem_object_put_pages(obj);
306fail_unlock:
307	mutex_unlock(&dev->struct_mutex);
308
309	return ret;
310}
311
312static int
313i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
314{
315	int ret;
316
317	ret = i915_gem_object_get_pages(obj, __GFP_NORETRY | __GFP_NOWARN);
318
319	/* If we've insufficient memory to map in the pages, attempt
320	 * to make some space by throwing out some old buffers.
321	 */
322	if (ret == -ENOMEM) {
323		struct drm_device *dev = obj->dev;
324
325		ret = i915_gem_evict_something(dev, obj->size,
326					       i915_gem_get_gtt_alignment(obj));
327		if (ret)
328			return ret;
329
330		ret = i915_gem_object_get_pages(obj, 0);
331	}
332
333	return ret;
334}
335
336/**
337 * This is the fallback shmem pread path, which allocates temporary storage
338 * in kernel space to copy_to_user into outside of the struct_mutex, so we
339 * can copy out of the object's backing pages while holding the struct mutex
340 * and not take page faults.
341 */
342static int
343i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
344			  struct drm_i915_gem_pread *args,
345			  struct drm_file *file_priv)
346{
347	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
348	struct mm_struct *mm = current->mm;
349	struct page **user_pages;
350	ssize_t remain;
351	loff_t offset, pinned_pages, i;
352	loff_t first_data_page, last_data_page, num_pages;
353	int shmem_page_index, shmem_page_offset;
354	int data_page_index,  data_page_offset;
355	int page_length;
356	int ret;
357	uint64_t data_ptr = args->data_ptr;
358	int do_bit17_swizzling;
359
360	remain = args->size;
361
362	/* Pin the user pages containing the data.  We can't fault while
363	 * holding the struct mutex, yet we want to hold it while
364	 * dereferencing the user data.
365	 */
366	first_data_page = data_ptr / PAGE_SIZE;
367	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
368	num_pages = last_data_page - first_data_page + 1;
369
370	user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
371	if (user_pages == NULL)
372		return -ENOMEM;
373
374	down_read(&mm->mmap_sem);
375	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
376				      num_pages, 1, 0, user_pages, NULL);
377	up_read(&mm->mmap_sem);
378	if (pinned_pages < num_pages) {
379		ret = -EFAULT;
380		goto fail_put_user_pages;
381	}
382
383	do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
384
385	mutex_lock(&dev->struct_mutex);
386
387	ret = i915_gem_object_get_pages_or_evict(obj);
388	if (ret)
389		goto fail_unlock;
390
391	ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
392							args->size);
393	if (ret != 0)
394		goto fail_put_pages;
395
396	obj_priv = to_intel_bo(obj);
397	offset = args->offset;
398
399	while (remain > 0) {
400		/* Operation in this page
401		 *
402		 * shmem_page_index = page number within shmem file
403		 * shmem_page_offset = offset within page in shmem file
404		 * data_page_index = page number in get_user_pages return
405		 * data_page_offset = offset with data_page_index page.
406		 * page_length = bytes to copy for this page
407		 */
408		shmem_page_index = offset / PAGE_SIZE;
409		shmem_page_offset = offset & ~PAGE_MASK;
410		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
411		data_page_offset = data_ptr & ~PAGE_MASK;
412
413		page_length = remain;
414		if ((shmem_page_offset + page_length) > PAGE_SIZE)
415			page_length = PAGE_SIZE - shmem_page_offset;
416		if ((data_page_offset + page_length) > PAGE_SIZE)
417			page_length = PAGE_SIZE - data_page_offset;
418
419		if (do_bit17_swizzling) {
420			slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
421					      shmem_page_offset,
422					      user_pages[data_page_index],
423					      data_page_offset,
424					      page_length,
425					      1);
426		} else {
427			slow_shmem_copy(user_pages[data_page_index],
428					data_page_offset,
429					obj_priv->pages[shmem_page_index],
430					shmem_page_offset,
431					page_length);
432		}
433
434		remain -= page_length;
435		data_ptr += page_length;
436		offset += page_length;
437	}
438
439fail_put_pages:
440	i915_gem_object_put_pages(obj);
441fail_unlock:
442	mutex_unlock(&dev->struct_mutex);
443fail_put_user_pages:
444	for (i = 0; i < pinned_pages; i++) {
445		SetPageDirty(user_pages[i]);
446		page_cache_release(user_pages[i]);
447	}
448	drm_free_large(user_pages);
449
450	return ret;
451}
452
453/**
454 * Reads data from the object referenced by handle.
455 *
456 * On error, the contents of *data are undefined.
457 */
458int
459i915_gem_pread_ioctl(struct drm_device *dev, void *data,
460		     struct drm_file *file_priv)
461{
462	struct drm_i915_gem_pread *args = data;
463	struct drm_gem_object *obj;
464	struct drm_i915_gem_object *obj_priv;
465	int ret;
466
467	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
468	if (obj == NULL)
469		return -ENOENT;
470	obj_priv = to_intel_bo(obj);
471
472	/* Bounds check source.  */
473	if (args->offset > obj->size || args->size > obj->size - args->offset) {
474		ret = -EINVAL;
475		goto err;
476	}
477
478	if (!access_ok(VERIFY_WRITE,
479		       (char __user *)(uintptr_t)args->data_ptr,
480		       args->size)) {
481		ret = -EFAULT;
482		goto err;
483	}
484
485	if (i915_gem_object_needs_bit17_swizzle(obj)) {
486		ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
487	} else {
488		ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
489		if (ret != 0)
490			ret = i915_gem_shmem_pread_slow(dev, obj, args,
491							file_priv);
492	}
493
494err:
495	drm_gem_object_unreference_unlocked(obj);
496	return ret;
497}
498
499/* This is the fast write path which cannot handle
500 * page faults in the source data
501 */
502
503static inline int
504fast_user_write(struct io_mapping *mapping,
505		loff_t page_base, int page_offset,
506		char __user *user_data,
507		int length)
508{
509	char *vaddr_atomic;
510	unsigned long unwritten;
511
512	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base, KM_USER0);
513	unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
514						      user_data, length);
515	io_mapping_unmap_atomic(vaddr_atomic, KM_USER0);
516	if (unwritten)
517		return -EFAULT;
518	return 0;
519}
520
521/* Here's the write path which can sleep for
522 * page faults
523 */
524
525static inline void
526slow_kernel_write(struct io_mapping *mapping,
527		  loff_t gtt_base, int gtt_offset,
528		  struct page *user_page, int user_offset,
529		  int length)
530{
531	char __iomem *dst_vaddr;
532	char *src_vaddr;
533
534	dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
535	src_vaddr = kmap(user_page);
536
537	memcpy_toio(dst_vaddr + gtt_offset,
538		    src_vaddr + user_offset,
539		    length);
540
541	kunmap(user_page);
542	io_mapping_unmap(dst_vaddr);
543}
544
545static inline int
546fast_shmem_write(struct page **pages,
547		 loff_t page_base, int page_offset,
548		 char __user *data,
549		 int length)
550{
551	char __iomem *vaddr;
552	unsigned long unwritten;
553
554	vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
555	if (vaddr == NULL)
556		return -ENOMEM;
557	unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length);
558	kunmap_atomic(vaddr, KM_USER0);
559
560	if (unwritten)
561		return -EFAULT;
562	return 0;
563}
564
565/**
566 * This is the fast pwrite path, where we copy the data directly from the
567 * user into the GTT, uncached.
568 */
569static int
570i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
571			 struct drm_i915_gem_pwrite *args,
572			 struct drm_file *file_priv)
573{
574	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
575	drm_i915_private_t *dev_priv = dev->dev_private;
576	ssize_t remain;
577	loff_t offset, page_base;
578	char __user *user_data;
579	int page_offset, page_length;
580	int ret;
581
582	user_data = (char __user *) (uintptr_t) args->data_ptr;
583	remain = args->size;
584
585
586	mutex_lock(&dev->struct_mutex);
587	ret = i915_gem_object_pin(obj, 0);
588	if (ret) {
589		mutex_unlock(&dev->struct_mutex);
590		return ret;
591	}
592	ret = i915_gem_object_set_to_gtt_domain(obj, 1);
593	if (ret)
594		goto fail;
595
596	obj_priv = to_intel_bo(obj);
597	offset = obj_priv->gtt_offset + args->offset;
598
599	while (remain > 0) {
600		/* Operation in this page
601		 *
602		 * page_base = page offset within aperture
603		 * page_offset = offset within page
604		 * page_length = bytes to copy for this page
605		 */
606		page_base = (offset & ~(PAGE_SIZE-1));
607		page_offset = offset & (PAGE_SIZE-1);
608		page_length = remain;
609		if ((page_offset + remain) > PAGE_SIZE)
610			page_length = PAGE_SIZE - page_offset;
611
612		ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
613				       page_offset, user_data, page_length);
614
615		/* If we get a fault while copying data, then (presumably) our
616		 * source page isn't available.  Return the error and we'll
617		 * retry in the slow path.
618		 */
619		if (ret)
620			goto fail;
621
622		remain -= page_length;
623		user_data += page_length;
624		offset += page_length;
625	}
626
627fail:
628	i915_gem_object_unpin(obj);
629	mutex_unlock(&dev->struct_mutex);
630
631	return ret;
632}
633
634/**
635 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
636 * the memory and maps it using kmap_atomic for copying.
637 *
638 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
639 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
640 */
641static int
642i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
643			 struct drm_i915_gem_pwrite *args,
644			 struct drm_file *file_priv)
645{
646	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
647	drm_i915_private_t *dev_priv = dev->dev_private;
648	ssize_t remain;
649	loff_t gtt_page_base, offset;
650	loff_t first_data_page, last_data_page, num_pages;
651	loff_t pinned_pages, i;
652	struct page **user_pages;
653	struct mm_struct *mm = current->mm;
654	int gtt_page_offset, data_page_offset, data_page_index, page_length;
655	int ret;
656	uint64_t data_ptr = args->data_ptr;
657
658	remain = args->size;
659
660	/* Pin the user pages containing the data.  We can't fault while
661	 * holding the struct mutex, and all of the pwrite implementations
662	 * want to hold it while dereferencing the user data.
663	 */
664	first_data_page = data_ptr / PAGE_SIZE;
665	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
666	num_pages = last_data_page - first_data_page + 1;
667
668	user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
669	if (user_pages == NULL)
670		return -ENOMEM;
671
672	down_read(&mm->mmap_sem);
673	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
674				      num_pages, 0, 0, user_pages, NULL);
675	up_read(&mm->mmap_sem);
676	if (pinned_pages < num_pages) {
677		ret = -EFAULT;
678		goto out_unpin_pages;
679	}
680
681	mutex_lock(&dev->struct_mutex);
682	ret = i915_gem_object_pin(obj, 0);
683	if (ret)
684		goto out_unlock;
685
686	ret = i915_gem_object_set_to_gtt_domain(obj, 1);
687	if (ret)
688		goto out_unpin_object;
689
690	obj_priv = to_intel_bo(obj);
691	offset = obj_priv->gtt_offset + args->offset;
692
693	while (remain > 0) {
694		/* Operation in this page
695		 *
696		 * gtt_page_base = page offset within aperture
697		 * gtt_page_offset = offset within page in aperture
698		 * data_page_index = page number in get_user_pages return
699		 * data_page_offset = offset with data_page_index page.
700		 * page_length = bytes to copy for this page
701		 */
702		gtt_page_base = offset & PAGE_MASK;
703		gtt_page_offset = offset & ~PAGE_MASK;
704		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
705		data_page_offset = data_ptr & ~PAGE_MASK;
706
707		page_length = remain;
708		if ((gtt_page_offset + page_length) > PAGE_SIZE)
709			page_length = PAGE_SIZE - gtt_page_offset;
710		if ((data_page_offset + page_length) > PAGE_SIZE)
711			page_length = PAGE_SIZE - data_page_offset;
712
713		slow_kernel_write(dev_priv->mm.gtt_mapping,
714				  gtt_page_base, gtt_page_offset,
715				  user_pages[data_page_index],
716				  data_page_offset,
717				  page_length);
718
719		remain -= page_length;
720		offset += page_length;
721		data_ptr += page_length;
722	}
723
724out_unpin_object:
725	i915_gem_object_unpin(obj);
726out_unlock:
727	mutex_unlock(&dev->struct_mutex);
728out_unpin_pages:
729	for (i = 0; i < pinned_pages; i++)
730		page_cache_release(user_pages[i]);
731	drm_free_large(user_pages);
732
733	return ret;
734}
735
736/**
737 * This is the fast shmem pwrite path, which attempts to directly
738 * copy_from_user into the kmapped pages backing the object.
739 */
740static int
741i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
742			   struct drm_i915_gem_pwrite *args,
743			   struct drm_file *file_priv)
744{
745	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
746	ssize_t remain;
747	loff_t offset, page_base;
748	char __user *user_data;
749	int page_offset, page_length;
750	int ret;
751
752	user_data = (char __user *) (uintptr_t) args->data_ptr;
753	remain = args->size;
754
755	mutex_lock(&dev->struct_mutex);
756
757	ret = i915_gem_object_get_pages(obj, 0);
758	if (ret != 0)
759		goto fail_unlock;
760
761	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
762	if (ret != 0)
763		goto fail_put_pages;
764
765	obj_priv = to_intel_bo(obj);
766	offset = args->offset;
767	obj_priv->dirty = 1;
768
769	while (remain > 0) {
770		/* Operation in this page
771		 *
772		 * page_base = page offset within aperture
773		 * page_offset = offset within page
774		 * page_length = bytes to copy for this page
775		 */
776		page_base = (offset & ~(PAGE_SIZE-1));
777		page_offset = offset & (PAGE_SIZE-1);
778		page_length = remain;
779		if ((page_offset + remain) > PAGE_SIZE)
780			page_length = PAGE_SIZE - page_offset;
781
782		ret = fast_shmem_write(obj_priv->pages,
783				       page_base, page_offset,
784				       user_data, page_length);
785		if (ret)
786			goto fail_put_pages;
787
788		remain -= page_length;
789		user_data += page_length;
790		offset += page_length;
791	}
792
793fail_put_pages:
794	i915_gem_object_put_pages(obj);
795fail_unlock:
796	mutex_unlock(&dev->struct_mutex);
797
798	return ret;
799}
800
801/**
802 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
803 * the memory and maps it using kmap_atomic for copying.
804 *
805 * This avoids taking mmap_sem for faulting on the user's address while the
806 * struct_mutex is held.
807 */
808static int
809i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
810			   struct drm_i915_gem_pwrite *args,
811			   struct drm_file *file_priv)
812{
813	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
814	struct mm_struct *mm = current->mm;
815	struct page **user_pages;
816	ssize_t remain;
817	loff_t offset, pinned_pages, i;
818	loff_t first_data_page, last_data_page, num_pages;
819	int shmem_page_index, shmem_page_offset;
820	int data_page_index,  data_page_offset;
821	int page_length;
822	int ret;
823	uint64_t data_ptr = args->data_ptr;
824	int do_bit17_swizzling;
825
826	remain = args->size;
827
828	/* Pin the user pages containing the data.  We can't fault while
829	 * holding the struct mutex, and all of the pwrite implementations
830	 * want to hold it while dereferencing the user data.
831	 */
832	first_data_page = data_ptr / PAGE_SIZE;
833	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
834	num_pages = last_data_page - first_data_page + 1;
835
836	user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
837	if (user_pages == NULL)
838		return -ENOMEM;
839
840	down_read(&mm->mmap_sem);
841	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
842				      num_pages, 0, 0, user_pages, NULL);
843	up_read(&mm->mmap_sem);
844	if (pinned_pages < num_pages) {
845		ret = -EFAULT;
846		goto fail_put_user_pages;
847	}
848
849	do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
850
851	mutex_lock(&dev->struct_mutex);
852
853	ret = i915_gem_object_get_pages_or_evict(obj);
854	if (ret)
855		goto fail_unlock;
856
857	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
858	if (ret != 0)
859		goto fail_put_pages;
860
861	obj_priv = to_intel_bo(obj);
862	offset = args->offset;
863	obj_priv->dirty = 1;
864
865	while (remain > 0) {
866		/* Operation in this page
867		 *
868		 * shmem_page_index = page number within shmem file
869		 * shmem_page_offset = offset within page in shmem file
870		 * data_page_index = page number in get_user_pages return
871		 * data_page_offset = offset with data_page_index page.
872		 * page_length = bytes to copy for this page
873		 */
874		shmem_page_index = offset / PAGE_SIZE;
875		shmem_page_offset = offset & ~PAGE_MASK;
876		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
877		data_page_offset = data_ptr & ~PAGE_MASK;
878
879		page_length = remain;
880		if ((shmem_page_offset + page_length) > PAGE_SIZE)
881			page_length = PAGE_SIZE - shmem_page_offset;
882		if ((data_page_offset + page_length) > PAGE_SIZE)
883			page_length = PAGE_SIZE - data_page_offset;
884
885		if (do_bit17_swizzling) {
886			slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
887					      shmem_page_offset,
888					      user_pages[data_page_index],
889					      data_page_offset,
890					      page_length,
891					      0);
892		} else {
893			slow_shmem_copy(obj_priv->pages[shmem_page_index],
894					shmem_page_offset,
895					user_pages[data_page_index],
896					data_page_offset,
897					page_length);
898		}
899
900		remain -= page_length;
901		data_ptr += page_length;
902		offset += page_length;
903	}
904
905fail_put_pages:
906	i915_gem_object_put_pages(obj);
907fail_unlock:
908	mutex_unlock(&dev->struct_mutex);
909fail_put_user_pages:
910	for (i = 0; i < pinned_pages; i++)
911		page_cache_release(user_pages[i]);
912	drm_free_large(user_pages);
913
914	return ret;
915}
916
917/**
918 * Writes data to the object referenced by handle.
919 *
920 * On error, the contents of the buffer that were to be modified are undefined.
921 */
922int
923i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
924		      struct drm_file *file_priv)
925{
926	struct drm_i915_gem_pwrite *args = data;
927	struct drm_gem_object *obj;
928	struct drm_i915_gem_object *obj_priv;
929	int ret = 0;
930
931	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
932	if (obj == NULL)
933		return -ENOENT;
934	obj_priv = to_intel_bo(obj);
935
936	/* Bounds check destination. */
937	if (args->offset > obj->size || args->size > obj->size - args->offset) {
938		ret = -EINVAL;
939		goto err;
940	}
941
942	if (!access_ok(VERIFY_READ,
943		       (char __user *)(uintptr_t)args->data_ptr,
944		       args->size)) {
945		ret = -EFAULT;
946		goto err;
947	}
948
949	/* We can only do the GTT pwrite on untiled buffers, as otherwise
950	 * it would end up going through the fenced access, and we'll get
951	 * different detiling behavior between reading and writing.
952	 * pread/pwrite currently are reading and writing from the CPU
953	 * perspective, requiring manual detiling by the client.
954	 */
955	if (obj_priv->phys_obj)
956		ret = i915_gem_phys_pwrite(dev, obj, args, file_priv);
957	else if (obj_priv->tiling_mode == I915_TILING_NONE &&
958		 dev->gtt_total != 0 &&
959		 obj->write_domain != I915_GEM_DOMAIN_CPU) {
960		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv);
961		if (ret == -EFAULT) {
962			ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
963						       file_priv);
964		}
965	} else if (i915_gem_object_needs_bit17_swizzle(obj)) {
966		ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
967	} else {
968		ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
969		if (ret == -EFAULT) {
970			ret = i915_gem_shmem_pwrite_slow(dev, obj, args,
971							 file_priv);
972		}
973	}
974
975#if WATCH_PWRITE
976	if (ret)
977		DRM_INFO("pwrite failed %d\n", ret);
978#endif
979
980err:
981	drm_gem_object_unreference_unlocked(obj);
982	return ret;
983}
984
985/**
986 * Called when user space prepares to use an object with the CPU, either
987 * through the mmap ioctl's mapping or a GTT mapping.
988 */
989int
990i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
991			  struct drm_file *file_priv)
992{
993	struct drm_i915_private *dev_priv = dev->dev_private;
994	struct drm_i915_gem_set_domain *args = data;
995	struct drm_gem_object *obj;
996	struct drm_i915_gem_object *obj_priv;
997	uint32_t read_domains = args->read_domains;
998	uint32_t write_domain = args->write_domain;
999	int ret;
1000
1001	if (!(dev->driver->driver_features & DRIVER_GEM))
1002		return -ENODEV;
1003
1004	/* Only handle setting domains to types used by the CPU. */
1005	if (write_domain & I915_GEM_GPU_DOMAINS)
1006		return -EINVAL;
1007
1008	if (read_domains & I915_GEM_GPU_DOMAINS)
1009		return -EINVAL;
1010
1011	/* Having something in the write domain implies it's in the read
1012	 * domain, and only that read domain.  Enforce that in the request.
1013	 */
1014	if (write_domain != 0 && read_domains != write_domain)
1015		return -EINVAL;
1016
1017	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1018	if (obj == NULL)
1019		return -ENOENT;
1020	obj_priv = to_intel_bo(obj);
1021
1022	mutex_lock(&dev->struct_mutex);
1023
1024	intel_mark_busy(dev, obj);
1025
1026#if WATCH_BUF
1027	DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n",
1028		 obj, obj->size, read_domains, write_domain);
1029#endif
1030	if (read_domains & I915_GEM_DOMAIN_GTT) {
1031		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1032
1033		/* Update the LRU on the fence for the CPU access that's
1034		 * about to occur.
1035		 */
1036		if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1037			struct drm_i915_fence_reg *reg =
1038				&dev_priv->fence_regs[obj_priv->fence_reg];
1039			list_move_tail(&reg->lru_list,
1040				       &dev_priv->mm.fence_list);
1041		}
1042
1043		/* Silently promote "you're not bound, there was nothing to do"
1044		 * to success, since the client was just asking us to
1045		 * make sure everything was done.
1046		 */
1047		if (ret == -EINVAL)
1048			ret = 0;
1049	} else {
1050		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1051	}
1052
1053
1054	/* Maintain LRU order of "inactive" objects */
1055	if (ret == 0 && i915_gem_object_is_inactive(obj_priv))
1056		list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1057
1058	drm_gem_object_unreference(obj);
1059	mutex_unlock(&dev->struct_mutex);
1060	return ret;
1061}
1062
1063/**
1064 * Called when user space has done writes to this buffer
1065 */
1066int
1067i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1068		      struct drm_file *file_priv)
1069{
1070	struct drm_i915_gem_sw_finish *args = data;
1071	struct drm_gem_object *obj;
1072	struct drm_i915_gem_object *obj_priv;
1073	int ret = 0;
1074
1075	if (!(dev->driver->driver_features & DRIVER_GEM))
1076		return -ENODEV;
1077
1078	mutex_lock(&dev->struct_mutex);
1079	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1080	if (obj == NULL) {
1081		mutex_unlock(&dev->struct_mutex);
1082		return -ENOENT;
1083	}
1084
1085#if WATCH_BUF
1086	DRM_INFO("%s: sw_finish %d (%p %zd)\n",
1087		 __func__, args->handle, obj, obj->size);
1088#endif
1089	obj_priv = to_intel_bo(obj);
1090
1091	/* Pinned buffers may be scanout, so flush the cache */
1092	if (obj_priv->pin_count)
1093		i915_gem_object_flush_cpu_write_domain(obj);
1094
1095	drm_gem_object_unreference(obj);
1096	mutex_unlock(&dev->struct_mutex);
1097	return ret;
1098}
1099
1100/**
1101 * Maps the contents of an object, returning the address it is mapped
1102 * into.
1103 *
1104 * While the mapping holds a reference on the contents of the object, it doesn't
1105 * imply a ref on the object itself.
1106 */
1107int
1108i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1109		   struct drm_file *file_priv)
1110{
1111	struct drm_i915_gem_mmap *args = data;
1112	struct drm_gem_object *obj;
1113	loff_t offset;
1114	unsigned long addr;
1115
1116	if (!(dev->driver->driver_features & DRIVER_GEM))
1117		return -ENODEV;
1118
1119	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1120	if (obj == NULL)
1121		return -ENOENT;
1122
1123	offset = args->offset;
1124
1125	down_write(&current->mm->mmap_sem);
1126	addr = do_mmap(obj->filp, 0, args->size,
1127		       PROT_READ | PROT_WRITE, MAP_SHARED,
1128		       args->offset);
1129	up_write(&current->mm->mmap_sem);
1130	drm_gem_object_unreference_unlocked(obj);
1131	if (IS_ERR((void *)addr))
1132		return addr;
1133
1134	args->addr_ptr = (uint64_t) addr;
1135
1136	return 0;
1137}
1138
1139/**
1140 * i915_gem_fault - fault a page into the GTT
1141 * vma: VMA in question
1142 * vmf: fault info
1143 *
1144 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1145 * from userspace.  The fault handler takes care of binding the object to
1146 * the GTT (if needed), allocating and programming a fence register (again,
1147 * only if needed based on whether the old reg is still valid or the object
1148 * is tiled) and inserting a new PTE into the faulting process.
1149 *
1150 * Note that the faulting process may involve evicting existing objects
1151 * from the GTT and/or fence registers to make room.  So performance may
1152 * suffer if the GTT working set is large or there are few fence registers
1153 * left.
1154 */
1155int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1156{
1157	struct drm_gem_object *obj = vma->vm_private_data;
1158	struct drm_device *dev = obj->dev;
1159	drm_i915_private_t *dev_priv = dev->dev_private;
1160	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1161	pgoff_t page_offset;
1162	unsigned long pfn;
1163	int ret = 0;
1164	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1165
1166	/* We don't use vmf->pgoff since that has the fake offset */
1167	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1168		PAGE_SHIFT;
1169
1170	/* Now bind it into the GTT if needed */
1171	mutex_lock(&dev->struct_mutex);
1172	if (!obj_priv->gtt_space) {
1173		ret = i915_gem_object_bind_to_gtt(obj, 0);
1174		if (ret)
1175			goto unlock;
1176
1177		ret = i915_gem_object_set_to_gtt_domain(obj, write);
1178		if (ret)
1179			goto unlock;
1180	}
1181
1182	/* Need a new fence register? */
1183	if (obj_priv->tiling_mode != I915_TILING_NONE) {
1184		ret = i915_gem_object_get_fence_reg(obj);
1185		if (ret)
1186			goto unlock;
1187	}
1188
1189	if (i915_gem_object_is_inactive(obj_priv))
1190		list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1191
1192	pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
1193		page_offset;
1194
1195	/* Finally, remap it using the new GTT offset */
1196	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1197unlock:
1198	mutex_unlock(&dev->struct_mutex);
1199
1200	switch (ret) {
1201	case 0:
1202	case -ERESTARTSYS:
1203		return VM_FAULT_NOPAGE;
1204	case -ENOMEM:
1205	case -EAGAIN:
1206		return VM_FAULT_OOM;
1207	default:
1208		return VM_FAULT_SIGBUS;
1209	}
1210}
1211
1212/**
1213 * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1214 * @obj: obj in question
1215 *
1216 * GEM memory mapping works by handing back to userspace a fake mmap offset
1217 * it can use in a subsequent mmap(2) call.  The DRM core code then looks
1218 * up the object based on the offset and sets up the various memory mapping
1219 * structures.
1220 *
1221 * This routine allocates and attaches a fake offset for @obj.
1222 */
1223static int
1224i915_gem_create_mmap_offset(struct drm_gem_object *obj)
1225{
1226	struct drm_device *dev = obj->dev;
1227	struct drm_gem_mm *mm = dev->mm_private;
1228	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1229	struct drm_map_list *list;
1230	struct drm_local_map *map;
1231	int ret = 0;
1232
1233	/* Set the object up for mmap'ing */
1234	list = &obj->map_list;
1235	list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
1236	if (!list->map)
1237		return -ENOMEM;
1238
1239	map = list->map;
1240	map->type = _DRM_GEM;
1241	map->size = obj->size;
1242	map->handle = obj;
1243
1244	/* Get a DRM GEM mmap offset allocated... */
1245	list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1246						    obj->size / PAGE_SIZE, 0, 0);
1247	if (!list->file_offset_node) {
1248		DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
1249		ret = -ENOMEM;
1250		goto out_free_list;
1251	}
1252
1253	list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1254						  obj->size / PAGE_SIZE, 0);
1255	if (!list->file_offset_node) {
1256		ret = -ENOMEM;
1257		goto out_free_list;
1258	}
1259
1260	list->hash.key = list->file_offset_node->start;
1261	if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
1262		DRM_ERROR("failed to add to map hash\n");
1263		ret = -ENOMEM;
1264		goto out_free_mm;
1265	}
1266
1267	/* By now we should be all set, any drm_mmap request on the offset
1268	 * below will get to our mmap & fault handler */
1269	obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
1270
1271	return 0;
1272
1273out_free_mm:
1274	drm_mm_put_block(list->file_offset_node);
1275out_free_list:
1276	kfree(list->map);
1277
1278	return ret;
1279}
1280
1281/**
1282 * i915_gem_release_mmap - remove physical page mappings
1283 * @obj: obj in question
1284 *
1285 * Preserve the reservation of the mmapping with the DRM core code, but
1286 * relinquish ownership of the pages back to the system.
1287 *
1288 * It is vital that we remove the page mapping if we have mapped a tiled
1289 * object through the GTT and then lose the fence register due to
1290 * resource pressure. Similarly if the object has been moved out of the
1291 * aperture, than pages mapped into userspace must be revoked. Removing the
1292 * mapping will then trigger a page fault on the next user access, allowing
1293 * fixup by i915_gem_fault().
1294 */
1295void
1296i915_gem_release_mmap(struct drm_gem_object *obj)
1297{
1298	struct drm_device *dev = obj->dev;
1299	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1300
1301	if (dev->dev_mapping)
1302		unmap_mapping_range(dev->dev_mapping,
1303				    obj_priv->mmap_offset, obj->size, 1);
1304}
1305
1306static void
1307i915_gem_free_mmap_offset(struct drm_gem_object *obj)
1308{
1309	struct drm_device *dev = obj->dev;
1310	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1311	struct drm_gem_mm *mm = dev->mm_private;
1312	struct drm_map_list *list;
1313
1314	list = &obj->map_list;
1315	drm_ht_remove_item(&mm->offset_hash, &list->hash);
1316
1317	if (list->file_offset_node) {
1318		drm_mm_put_block(list->file_offset_node);
1319		list->file_offset_node = NULL;
1320	}
1321
1322	if (list->map) {
1323		kfree(list->map);
1324		list->map = NULL;
1325	}
1326
1327	obj_priv->mmap_offset = 0;
1328}
1329
1330/**
1331 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1332 * @obj: object to check
1333 *
1334 * Return the required GTT alignment for an object, taking into account
1335 * potential fence register mapping if needed.
1336 */
1337static uint32_t
1338i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
1339{
1340	struct drm_device *dev = obj->dev;
1341	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1342	int start, i;
1343
1344	/*
1345	 * Minimum alignment is 4k (GTT page size), but might be greater
1346	 * if a fence register is needed for the object.
1347	 */
1348	if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE)
1349		return 4096;
1350
1351	/*
1352	 * Previous chips need to be aligned to the size of the smallest
1353	 * fence register that can contain the object.
1354	 */
1355	if (IS_I9XX(dev))
1356		start = 1024*1024;
1357	else
1358		start = 512*1024;
1359
1360	for (i = start; i < obj->size; i <<= 1)
1361		;
1362
1363	return i;
1364}
1365
1366/**
1367 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1368 * @dev: DRM device
1369 * @data: GTT mapping ioctl data
1370 * @file_priv: GEM object info
1371 *
1372 * Simply returns the fake offset to userspace so it can mmap it.
1373 * The mmap call will end up in drm_gem_mmap(), which will set things
1374 * up so we can get faults in the handler above.
1375 *
1376 * The fault handler will take care of binding the object into the GTT
1377 * (since it may have been evicted to make room for something), allocating
1378 * a fence register, and mapping the appropriate aperture address into
1379 * userspace.
1380 */
1381int
1382i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1383			struct drm_file *file_priv)
1384{
1385	struct drm_i915_gem_mmap_gtt *args = data;
1386	struct drm_gem_object *obj;
1387	struct drm_i915_gem_object *obj_priv;
1388	int ret;
1389
1390	if (!(dev->driver->driver_features & DRIVER_GEM))
1391		return -ENODEV;
1392
1393	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1394	if (obj == NULL)
1395		return -ENOENT;
1396
1397	mutex_lock(&dev->struct_mutex);
1398
1399	obj_priv = to_intel_bo(obj);
1400
1401	if (obj_priv->madv != I915_MADV_WILLNEED) {
1402		DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1403		drm_gem_object_unreference(obj);
1404		mutex_unlock(&dev->struct_mutex);
1405		return -EINVAL;
1406	}
1407
1408
1409	if (!obj_priv->mmap_offset) {
1410		ret = i915_gem_create_mmap_offset(obj);
1411		if (ret) {
1412			drm_gem_object_unreference(obj);
1413			mutex_unlock(&dev->struct_mutex);
1414			return ret;
1415		}
1416	}
1417
1418	args->offset = obj_priv->mmap_offset;
1419
1420	/*
1421	 * Pull it into the GTT so that we have a page list (makes the
1422	 * initial fault faster and any subsequent flushing possible).
1423	 */
1424	if (!obj_priv->agp_mem) {
1425		ret = i915_gem_object_bind_to_gtt(obj, 0);
1426		if (ret) {
1427			drm_gem_object_unreference(obj);
1428			mutex_unlock(&dev->struct_mutex);
1429			return ret;
1430		}
1431	}
1432
1433	drm_gem_object_unreference(obj);
1434	mutex_unlock(&dev->struct_mutex);
1435
1436	return 0;
1437}
1438
1439void
1440i915_gem_object_put_pages(struct drm_gem_object *obj)
1441{
1442	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1443	int page_count = obj->size / PAGE_SIZE;
1444	int i;
1445
1446	BUG_ON(obj_priv->pages_refcount == 0);
1447	BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
1448
1449	if (--obj_priv->pages_refcount != 0)
1450		return;
1451
1452	if (obj_priv->tiling_mode != I915_TILING_NONE)
1453		i915_gem_object_save_bit_17_swizzle(obj);
1454
1455	if (obj_priv->madv == I915_MADV_DONTNEED)
1456		obj_priv->dirty = 0;
1457
1458	for (i = 0; i < page_count; i++) {
1459		if (obj_priv->dirty)
1460			set_page_dirty(obj_priv->pages[i]);
1461
1462		if (obj_priv->madv == I915_MADV_WILLNEED)
1463			mark_page_accessed(obj_priv->pages[i]);
1464
1465		page_cache_release(obj_priv->pages[i]);
1466	}
1467	obj_priv->dirty = 0;
1468
1469	drm_free_large(obj_priv->pages);
1470	obj_priv->pages = NULL;
1471}
1472
1473static void
1474i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno,
1475			       struct intel_ring_buffer *ring)
1476{
1477	struct drm_device *dev = obj->dev;
1478	drm_i915_private_t *dev_priv = dev->dev_private;
1479	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1480	BUG_ON(ring == NULL);
1481	obj_priv->ring = ring;
1482
1483	/* Add a reference if we're newly entering the active list. */
1484	if (!obj_priv->active) {
1485		drm_gem_object_reference(obj);
1486		obj_priv->active = 1;
1487	}
1488	/* Move from whatever list we were on to the tail of execution. */
1489	spin_lock(&dev_priv->mm.active_list_lock);
1490	list_move_tail(&obj_priv->list, &ring->active_list);
1491	spin_unlock(&dev_priv->mm.active_list_lock);
1492	obj_priv->last_rendering_seqno = seqno;
1493}
1494
1495static void
1496i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
1497{
1498	struct drm_device *dev = obj->dev;
1499	drm_i915_private_t *dev_priv = dev->dev_private;
1500	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1501
1502	BUG_ON(!obj_priv->active);
1503	list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list);
1504	obj_priv->last_rendering_seqno = 0;
1505}
1506
1507/* Immediately discard the backing storage */
1508static void
1509i915_gem_object_truncate(struct drm_gem_object *obj)
1510{
1511	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1512	struct inode *inode;
1513
1514	/* Our goal here is to return as much of the memory as
1515	 * is possible back to the system as we are called from OOM.
1516	 * To do this we must instruct the shmfs to drop all of its
1517	 * backing pages, *now*. Here we mirror the actions taken
1518	 * when by shmem_delete_inode() to release the backing store.
1519	 */
1520	inode = obj->filp->f_path.dentry->d_inode;
1521	truncate_inode_pages(inode->i_mapping, 0);
1522	if (inode->i_op->truncate_range)
1523		inode->i_op->truncate_range(inode, 0, (loff_t)-1);
1524
1525	obj_priv->madv = __I915_MADV_PURGED;
1526}
1527
1528static inline int
1529i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
1530{
1531	return obj_priv->madv == I915_MADV_DONTNEED;
1532}
1533
1534static void
1535i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1536{
1537	struct drm_device *dev = obj->dev;
1538	drm_i915_private_t *dev_priv = dev->dev_private;
1539	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1540
1541	i915_verify_inactive(dev, __FILE__, __LINE__);
1542	if (obj_priv->pin_count != 0)
1543		list_del_init(&obj_priv->list);
1544	else
1545		list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1546
1547	BUG_ON(!list_empty(&obj_priv->gpu_write_list));
1548
1549	obj_priv->last_rendering_seqno = 0;
1550	obj_priv->ring = NULL;
1551	if (obj_priv->active) {
1552		obj_priv->active = 0;
1553		drm_gem_object_unreference(obj);
1554	}
1555	i915_verify_inactive(dev, __FILE__, __LINE__);
1556}
1557
1558static void
1559i915_gem_process_flushing_list(struct drm_device *dev,
1560			       uint32_t flush_domains, uint32_t seqno,
1561			       struct intel_ring_buffer *ring)
1562{
1563	drm_i915_private_t *dev_priv = dev->dev_private;
1564	struct drm_i915_gem_object *obj_priv, *next;
1565
1566	list_for_each_entry_safe(obj_priv, next,
1567				 &dev_priv->mm.gpu_write_list,
1568				 gpu_write_list) {
1569		struct drm_gem_object *obj = &obj_priv->base;
1570
1571		if ((obj->write_domain & flush_domains) ==
1572		    obj->write_domain &&
1573		    obj_priv->ring->ring_flag == ring->ring_flag) {
1574			uint32_t old_write_domain = obj->write_domain;
1575
1576			obj->write_domain = 0;
1577			list_del_init(&obj_priv->gpu_write_list);
1578			i915_gem_object_move_to_active(obj, seqno, ring);
1579
1580			/* update the fence lru list */
1581			if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1582				struct drm_i915_fence_reg *reg =
1583					&dev_priv->fence_regs[obj_priv->fence_reg];
1584				list_move_tail(&reg->lru_list,
1585						&dev_priv->mm.fence_list);
1586			}
1587
1588			trace_i915_gem_object_change_domain(obj,
1589							    obj->read_domains,
1590							    old_write_domain);
1591		}
1592	}
1593}
1594
1595uint32_t
1596i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
1597		 uint32_t flush_domains, struct intel_ring_buffer *ring)
1598{
1599	drm_i915_private_t *dev_priv = dev->dev_private;
1600	struct drm_i915_file_private *i915_file_priv = NULL;
1601	struct drm_i915_gem_request *request;
1602	uint32_t seqno;
1603	int was_empty;
1604
1605	if (file_priv != NULL)
1606		i915_file_priv = file_priv->driver_priv;
1607
1608	request = kzalloc(sizeof(*request), GFP_KERNEL);
1609	if (request == NULL)
1610		return 0;
1611
1612	seqno = ring->add_request(dev, ring, file_priv, flush_domains);
1613
1614	request->seqno = seqno;
1615	request->ring = ring;
1616	request->emitted_jiffies = jiffies;
1617	was_empty = list_empty(&ring->request_list);
1618	list_add_tail(&request->list, &ring->request_list);
1619
1620	if (i915_file_priv) {
1621		list_add_tail(&request->client_list,
1622			      &i915_file_priv->mm.request_list);
1623	} else {
1624		INIT_LIST_HEAD(&request->client_list);
1625	}
1626
1627	/* Associate any objects on the flushing list matching the write
1628	 * domain we're flushing with our flush.
1629	 */
1630	if (flush_domains != 0)
1631		i915_gem_process_flushing_list(dev, flush_domains, seqno, ring);
1632
1633	if (!dev_priv->mm.suspended) {
1634		mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
1635		if (was_empty)
1636			queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1637	}
1638	return seqno;
1639}
1640
1641/**
1642 * Command execution barrier
1643 *
1644 * Ensures that all commands in the ring are finished
1645 * before signalling the CPU
1646 */
1647static uint32_t
1648i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring)
1649{
1650	uint32_t flush_domains = 0;
1651
1652	/* The sampler always gets flushed on i965 (sigh) */
1653	if (IS_I965G(dev))
1654		flush_domains |= I915_GEM_DOMAIN_SAMPLER;
1655
1656	ring->flush(dev, ring,
1657			I915_GEM_DOMAIN_COMMAND, flush_domains);
1658	return flush_domains;
1659}
1660
1661/**
1662 * Moves buffers associated only with the given active seqno from the active
1663 * to inactive list, potentially freeing them.
1664 */
1665static void
1666i915_gem_retire_request(struct drm_device *dev,
1667			struct drm_i915_gem_request *request)
1668{
1669	drm_i915_private_t *dev_priv = dev->dev_private;
1670
1671	trace_i915_gem_request_retire(dev, request->seqno);
1672
1673	/* Move any buffers on the active list that are no longer referenced
1674	 * by the ringbuffer to the flushing/inactive lists as appropriate.
1675	 */
1676	spin_lock(&dev_priv->mm.active_list_lock);
1677	while (!list_empty(&request->ring->active_list)) {
1678		struct drm_gem_object *obj;
1679		struct drm_i915_gem_object *obj_priv;
1680
1681		obj_priv = list_first_entry(&request->ring->active_list,
1682					    struct drm_i915_gem_object,
1683					    list);
1684		obj = &obj_priv->base;
1685
1686		/* If the seqno being retired doesn't match the oldest in the
1687		 * list, then the oldest in the list must still be newer than
1688		 * this seqno.
1689		 */
1690		if (obj_priv->last_rendering_seqno != request->seqno)
1691			goto out;
1692
1693#if WATCH_LRU
1694		DRM_INFO("%s: retire %d moves to inactive list %p\n",
1695			 __func__, request->seqno, obj);
1696#endif
1697
1698		if (obj->write_domain != 0)
1699			i915_gem_object_move_to_flushing(obj);
1700		else {
1701			/* Take a reference on the object so it won't be
1702			 * freed while the spinlock is held.  The list
1703			 * protection for this spinlock is safe when breaking
1704			 * the lock like this since the next thing we do
1705			 * is just get the head of the list again.
1706			 */
1707			drm_gem_object_reference(obj);
1708			i915_gem_object_move_to_inactive(obj);
1709			spin_unlock(&dev_priv->mm.active_list_lock);
1710			drm_gem_object_unreference(obj);
1711			spin_lock(&dev_priv->mm.active_list_lock);
1712		}
1713	}
1714out:
1715	spin_unlock(&dev_priv->mm.active_list_lock);
1716}
1717
1718/**
1719 * Returns true if seq1 is later than seq2.
1720 */
1721bool
1722i915_seqno_passed(uint32_t seq1, uint32_t seq2)
1723{
1724	return (int32_t)(seq1 - seq2) >= 0;
1725}
1726
1727uint32_t
1728i915_get_gem_seqno(struct drm_device *dev,
1729		   struct intel_ring_buffer *ring)
1730{
1731	return ring->get_gem_seqno(dev, ring);
1732}
1733
1734/**
1735 * This function clears the request list as sequence numbers are passed.
1736 */
1737static void
1738i915_gem_retire_requests_ring(struct drm_device *dev,
1739			      struct intel_ring_buffer *ring)
1740{
1741	drm_i915_private_t *dev_priv = dev->dev_private;
1742	uint32_t seqno;
1743
1744	if (!ring->status_page.page_addr
1745			|| list_empty(&ring->request_list))
1746		return;
1747
1748	seqno = i915_get_gem_seqno(dev, ring);
1749
1750	while (!list_empty(&ring->request_list)) {
1751		struct drm_i915_gem_request *request;
1752		uint32_t retiring_seqno;
1753
1754		request = list_first_entry(&ring->request_list,
1755					   struct drm_i915_gem_request,
1756					   list);
1757		retiring_seqno = request->seqno;
1758
1759		if (i915_seqno_passed(seqno, retiring_seqno) ||
1760		    atomic_read(&dev_priv->mm.wedged)) {
1761			i915_gem_retire_request(dev, request);
1762
1763			list_del(&request->list);
1764			list_del(&request->client_list);
1765			kfree(request);
1766		} else
1767			break;
1768	}
1769
1770	if (unlikely (dev_priv->trace_irq_seqno &&
1771		      i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
1772
1773		ring->user_irq_put(dev, ring);
1774		dev_priv->trace_irq_seqno = 0;
1775	}
1776}
1777
1778void
1779i915_gem_retire_requests(struct drm_device *dev)
1780{
1781	drm_i915_private_t *dev_priv = dev->dev_private;
1782
1783	if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1784	    struct drm_i915_gem_object *obj_priv, *tmp;
1785
1786	    /* We must be careful that during unbind() we do not
1787	     * accidentally infinitely recurse into retire requests.
1788	     * Currently:
1789	     *   retire -> free -> unbind -> wait -> retire_ring
1790	     */
1791	    list_for_each_entry_safe(obj_priv, tmp,
1792				     &dev_priv->mm.deferred_free_list,
1793				     list)
1794		    i915_gem_free_object_tail(&obj_priv->base);
1795	}
1796
1797	i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
1798	if (HAS_BSD(dev))
1799		i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
1800}
1801
1802void
1803i915_gem_retire_work_handler(struct work_struct *work)
1804{
1805	drm_i915_private_t *dev_priv;
1806	struct drm_device *dev;
1807
1808	dev_priv = container_of(work, drm_i915_private_t,
1809				mm.retire_work.work);
1810	dev = dev_priv->dev;
1811
1812	mutex_lock(&dev->struct_mutex);
1813	i915_gem_retire_requests(dev);
1814
1815	if (!dev_priv->mm.suspended &&
1816		(!list_empty(&dev_priv->render_ring.request_list) ||
1817			(HAS_BSD(dev) &&
1818			 !list_empty(&dev_priv->bsd_ring.request_list))))
1819		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1820	mutex_unlock(&dev->struct_mutex);
1821}
1822
1823int
1824i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
1825		int interruptible, struct intel_ring_buffer *ring)
1826{
1827	drm_i915_private_t *dev_priv = dev->dev_private;
1828	u32 ier;
1829	int ret = 0;
1830
1831	BUG_ON(seqno == 0);
1832
1833	if (atomic_read(&dev_priv->mm.wedged))
1834		return -EIO;
1835
1836	if (!i915_seqno_passed(ring->get_gem_seqno(dev, ring), seqno)) {
1837		if (HAS_PCH_SPLIT(dev))
1838			ier = I915_READ(DEIER) | I915_READ(GTIER);
1839		else
1840			ier = I915_READ(IER);
1841		if (!ier) {
1842			DRM_ERROR("something (likely vbetool) disabled "
1843				  "interrupts, re-enabling\n");
1844			i915_driver_irq_preinstall(dev);
1845			i915_driver_irq_postinstall(dev);
1846		}
1847
1848		trace_i915_gem_request_wait_begin(dev, seqno);
1849
1850		ring->waiting_gem_seqno = seqno;
1851		ring->user_irq_get(dev, ring);
1852		if (interruptible)
1853			ret = wait_event_interruptible(ring->irq_queue,
1854				i915_seqno_passed(
1855					ring->get_gem_seqno(dev, ring), seqno)
1856				|| atomic_read(&dev_priv->mm.wedged));
1857		else
1858			wait_event(ring->irq_queue,
1859				i915_seqno_passed(
1860					ring->get_gem_seqno(dev, ring), seqno)
1861				|| atomic_read(&dev_priv->mm.wedged));
1862
1863		ring->user_irq_put(dev, ring);
1864		ring->waiting_gem_seqno = 0;
1865
1866		trace_i915_gem_request_wait_end(dev, seqno);
1867	}
1868	if (atomic_read(&dev_priv->mm.wedged))
1869		ret = -EIO;
1870
1871	if (ret && ret != -ERESTARTSYS)
1872		DRM_ERROR("%s returns %d (awaiting %d at %d)\n",
1873			  __func__, ret, seqno, ring->get_gem_seqno(dev, ring));
1874
1875	/* Directly dispatch request retiring.  While we have the work queue
1876	 * to handle this, the waiter on a request often wants an associated
1877	 * buffer to have made it to the inactive list, and we would need
1878	 * a separate wait queue to handle that.
1879	 */
1880	if (ret == 0)
1881		i915_gem_retire_requests_ring(dev, ring);
1882
1883	return ret;
1884}
1885
1886/**
1887 * Waits for a sequence number to be signaled, and cleans up the
1888 * request and object lists appropriately for that event.
1889 */
1890static int
1891i915_wait_request(struct drm_device *dev, uint32_t seqno,
1892		struct intel_ring_buffer *ring)
1893{
1894	return i915_do_wait_request(dev, seqno, 1, ring);
1895}
1896
1897static void
1898i915_gem_flush(struct drm_device *dev,
1899	       uint32_t invalidate_domains,
1900	       uint32_t flush_domains)
1901{
1902	drm_i915_private_t *dev_priv = dev->dev_private;
1903	if (flush_domains & I915_GEM_DOMAIN_CPU)
1904		drm_agp_chipset_flush(dev);
1905	dev_priv->render_ring.flush(dev, &dev_priv->render_ring,
1906			invalidate_domains,
1907			flush_domains);
1908
1909	if (HAS_BSD(dev))
1910		dev_priv->bsd_ring.flush(dev, &dev_priv->bsd_ring,
1911				invalidate_domains,
1912				flush_domains);
1913}
1914
1915/**
1916 * Ensures that all rendering to the object has completed and the object is
1917 * safe to unbind from the GTT or access from the CPU.
1918 */
1919static int
1920i915_gem_object_wait_rendering(struct drm_gem_object *obj)
1921{
1922	struct drm_device *dev = obj->dev;
1923	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1924	int ret;
1925
1926	/* This function only exists to support waiting for existing rendering,
1927	 * not for emitting required flushes.
1928	 */
1929	BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0);
1930
1931	/* If there is rendering queued on the buffer being evicted, wait for
1932	 * it.
1933	 */
1934	if (obj_priv->active) {
1935#if WATCH_BUF
1936		DRM_INFO("%s: object %p wait for seqno %08x\n",
1937			  __func__, obj, obj_priv->last_rendering_seqno);
1938#endif
1939		ret = i915_wait_request(dev,
1940				obj_priv->last_rendering_seqno, obj_priv->ring);
1941		if (ret != 0)
1942			return ret;
1943	}
1944
1945	return 0;
1946}
1947
1948/**
1949 * Unbinds an object from the GTT aperture.
1950 */
1951int
1952i915_gem_object_unbind(struct drm_gem_object *obj)
1953{
1954	struct drm_device *dev = obj->dev;
1955	drm_i915_private_t *dev_priv = dev->dev_private;
1956	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1957	int ret = 0;
1958
1959#if WATCH_BUF
1960	DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj);
1961	DRM_INFO("gtt_space %p\n", obj_priv->gtt_space);
1962#endif
1963	if (obj_priv->gtt_space == NULL)
1964		return 0;
1965
1966	if (obj_priv->pin_count != 0) {
1967		DRM_ERROR("Attempting to unbind pinned buffer\n");
1968		return -EINVAL;
1969	}
1970
1971	/* blow away mappings if mapped through GTT */
1972	i915_gem_release_mmap(obj);
1973
1974	/* Move the object to the CPU domain to ensure that
1975	 * any possible CPU writes while it's not in the GTT
1976	 * are flushed when we go to remap it. This will
1977	 * also ensure that all pending GPU writes are finished
1978	 * before we unbind.
1979	 */
1980	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
1981	if (ret == -ERESTARTSYS)
1982		return ret;
1983	/* Continue on if we fail due to EIO, the GPU is hung so we
1984	 * should be safe and we need to cleanup or else we might
1985	 * cause memory corruption through use-after-free.
1986	 */
1987
1988	/* release the fence reg _after_ flushing */
1989	if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
1990		i915_gem_clear_fence_reg(obj);
1991
1992	if (obj_priv->agp_mem != NULL) {
1993		drm_unbind_agp(obj_priv->agp_mem);
1994		drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
1995		obj_priv->agp_mem = NULL;
1996	}
1997
1998	i915_gem_object_put_pages(obj);
1999	BUG_ON(obj_priv->pages_refcount);
2000
2001	if (obj_priv->gtt_space) {
2002		atomic_dec(&dev->gtt_count);
2003		atomic_sub(obj->size, &dev->gtt_memory);
2004
2005		drm_mm_put_block(obj_priv->gtt_space);
2006		obj_priv->gtt_space = NULL;
2007	}
2008
2009	/* Remove ourselves from the LRU list if present. */
2010	spin_lock(&dev_priv->mm.active_list_lock);
2011	if (!list_empty(&obj_priv->list))
2012		list_del_init(&obj_priv->list);
2013	spin_unlock(&dev_priv->mm.active_list_lock);
2014
2015	if (i915_gem_object_is_purgeable(obj_priv))
2016		i915_gem_object_truncate(obj);
2017
2018	trace_i915_gem_object_unbind(obj);
2019
2020	return ret;
2021}
2022
2023int
2024i915_gpu_idle(struct drm_device *dev)
2025{
2026	drm_i915_private_t *dev_priv = dev->dev_private;
2027	bool lists_empty;
2028	uint32_t seqno1, seqno2;
2029	int ret;
2030
2031	spin_lock(&dev_priv->mm.active_list_lock);
2032	lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
2033		       list_empty(&dev_priv->render_ring.active_list) &&
2034		       (!HAS_BSD(dev) ||
2035			list_empty(&dev_priv->bsd_ring.active_list)));
2036	spin_unlock(&dev_priv->mm.active_list_lock);
2037
2038	if (lists_empty)
2039		return 0;
2040
2041	/* Flush everything onto the inactive list. */
2042	i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2043	seqno1 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS,
2044			&dev_priv->render_ring);
2045	if (seqno1 == 0)
2046		return -ENOMEM;
2047	ret = i915_wait_request(dev, seqno1, &dev_priv->render_ring);
2048
2049	if (HAS_BSD(dev)) {
2050		seqno2 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS,
2051				&dev_priv->bsd_ring);
2052		if (seqno2 == 0)
2053			return -ENOMEM;
2054
2055		ret = i915_wait_request(dev, seqno2, &dev_priv->bsd_ring);
2056		if (ret)
2057			return ret;
2058	}
2059
2060
2061	return ret;
2062}
2063
2064int
2065i915_gem_object_get_pages(struct drm_gem_object *obj,
2066			  gfp_t gfpmask)
2067{
2068	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2069	int page_count, i;
2070	struct address_space *mapping;
2071	struct inode *inode;
2072	struct page *page;
2073
2074	BUG_ON(obj_priv->pages_refcount
2075			== DRM_I915_GEM_OBJECT_MAX_PAGES_REFCOUNT);
2076
2077	if (obj_priv->pages_refcount++ != 0)
2078		return 0;
2079
2080	/* Get the list of pages out of our struct file.  They'll be pinned
2081	 * at this point until we release them.
2082	 */
2083	page_count = obj->size / PAGE_SIZE;
2084	BUG_ON(obj_priv->pages != NULL);
2085	obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
2086	if (obj_priv->pages == NULL) {
2087		obj_priv->pages_refcount--;
2088		return -ENOMEM;
2089	}
2090
2091	inode = obj->filp->f_path.dentry->d_inode;
2092	mapping = inode->i_mapping;
2093	for (i = 0; i < page_count; i++) {
2094		page = read_cache_page_gfp(mapping, i,
2095					   GFP_HIGHUSER |
2096					   __GFP_COLD |
2097					   __GFP_RECLAIMABLE |
2098					   gfpmask);
2099		if (IS_ERR(page))
2100			goto err_pages;
2101
2102		obj_priv->pages[i] = page;
2103	}
2104
2105	if (obj_priv->tiling_mode != I915_TILING_NONE)
2106		i915_gem_object_do_bit_17_swizzle(obj);
2107
2108	return 0;
2109
2110err_pages:
2111	while (i--)
2112		page_cache_release(obj_priv->pages[i]);
2113
2114	drm_free_large(obj_priv->pages);
2115	obj_priv->pages = NULL;
2116	obj_priv->pages_refcount--;
2117	return PTR_ERR(page);
2118}
2119
2120static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
2121{
2122	struct drm_gem_object *obj = reg->obj;
2123	struct drm_device *dev = obj->dev;
2124	drm_i915_private_t *dev_priv = dev->dev_private;
2125	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2126	int regnum = obj_priv->fence_reg;
2127	uint64_t val;
2128
2129	val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2130		    0xfffff000) << 32;
2131	val |= obj_priv->gtt_offset & 0xfffff000;
2132	val |= (uint64_t)((obj_priv->stride / 128) - 1) <<
2133		SANDYBRIDGE_FENCE_PITCH_SHIFT;
2134
2135	if (obj_priv->tiling_mode == I915_TILING_Y)
2136		val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2137	val |= I965_FENCE_REG_VALID;
2138
2139	I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
2140}
2141
2142static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
2143{
2144	struct drm_gem_object *obj = reg->obj;
2145	struct drm_device *dev = obj->dev;
2146	drm_i915_private_t *dev_priv = dev->dev_private;
2147	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2148	int regnum = obj_priv->fence_reg;
2149	uint64_t val;
2150
2151	val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2152		    0xfffff000) << 32;
2153	val |= obj_priv->gtt_offset & 0xfffff000;
2154	val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2155	if (obj_priv->tiling_mode == I915_TILING_Y)
2156		val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2157	val |= I965_FENCE_REG_VALID;
2158
2159	I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
2160}
2161
2162static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
2163{
2164	struct drm_gem_object *obj = reg->obj;
2165	struct drm_device *dev = obj->dev;
2166	drm_i915_private_t *dev_priv = dev->dev_private;
2167	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2168	int regnum = obj_priv->fence_reg;
2169	int tile_width;
2170	uint32_t fence_reg, val;
2171	uint32_t pitch_val;
2172
2173	if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
2174	    (obj_priv->gtt_offset & (obj->size - 1))) {
2175		WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
2176		     __func__, obj_priv->gtt_offset, obj->size);
2177		return;
2178	}
2179
2180	if (obj_priv->tiling_mode == I915_TILING_Y &&
2181	    HAS_128_BYTE_Y_TILING(dev))
2182		tile_width = 128;
2183	else
2184		tile_width = 512;
2185
2186	/* Note: pitch better be a power of two tile widths */
2187	pitch_val = obj_priv->stride / tile_width;
2188	pitch_val = ffs(pitch_val) - 1;
2189
2190	if (obj_priv->tiling_mode == I915_TILING_Y &&
2191	    HAS_128_BYTE_Y_TILING(dev))
2192		WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2193	else
2194		WARN_ON(pitch_val > I915_FENCE_MAX_PITCH_VAL);
2195
2196	val = obj_priv->gtt_offset;
2197	if (obj_priv->tiling_mode == I915_TILING_Y)
2198		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2199	val |= I915_FENCE_SIZE_BITS(obj->size);
2200	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2201	val |= I830_FENCE_REG_VALID;
2202
2203	if (regnum < 8)
2204		fence_reg = FENCE_REG_830_0 + (regnum * 4);
2205	else
2206		fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
2207	I915_WRITE(fence_reg, val);
2208}
2209
2210static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
2211{
2212	struct drm_gem_object *obj = reg->obj;
2213	struct drm_device *dev = obj->dev;
2214	drm_i915_private_t *dev_priv = dev->dev_private;
2215	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2216	int regnum = obj_priv->fence_reg;
2217	uint32_t val;
2218	uint32_t pitch_val;
2219	uint32_t fence_size_bits;
2220
2221	if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
2222	    (obj_priv->gtt_offset & (obj->size - 1))) {
2223		WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
2224		     __func__, obj_priv->gtt_offset);
2225		return;
2226	}
2227
2228	pitch_val = obj_priv->stride / 128;
2229	pitch_val = ffs(pitch_val) - 1;
2230	WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2231
2232	val = obj_priv->gtt_offset;
2233	if (obj_priv->tiling_mode == I915_TILING_Y)
2234		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2235	fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
2236	WARN_ON(fence_size_bits & ~0x00000f00);
2237	val |= fence_size_bits;
2238	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2239	val |= I830_FENCE_REG_VALID;
2240
2241	I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
2242}
2243
2244static int i915_find_fence_reg(struct drm_device *dev)
2245{
2246	struct drm_i915_fence_reg *reg = NULL;
2247	struct drm_i915_gem_object *obj_priv = NULL;
2248	struct drm_i915_private *dev_priv = dev->dev_private;
2249	struct drm_gem_object *obj = NULL;
2250	int i, avail, ret;
2251
2252	/* First try to find a free reg */
2253	avail = 0;
2254	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2255		reg = &dev_priv->fence_regs[i];
2256		if (!reg->obj)
2257			return i;
2258
2259		obj_priv = to_intel_bo(reg->obj);
2260		if (!obj_priv->pin_count)
2261		    avail++;
2262	}
2263
2264	if (avail == 0)
2265		return -ENOSPC;
2266
2267	/* None available, try to steal one or wait for a user to finish */
2268	i = I915_FENCE_REG_NONE;
2269	list_for_each_entry(reg, &dev_priv->mm.fence_list,
2270			    lru_list) {
2271		obj = reg->obj;
2272		obj_priv = to_intel_bo(obj);
2273
2274		if (obj_priv->pin_count)
2275			continue;
2276
2277		/* found one! */
2278		i = obj_priv->fence_reg;
2279		break;
2280	}
2281
2282	BUG_ON(i == I915_FENCE_REG_NONE);
2283
2284	/* We only have a reference on obj from the active list. put_fence_reg
2285	 * might drop that one, causing a use-after-free in it. So hold a
2286	 * private reference to obj like the other callers of put_fence_reg
2287	 * (set_tiling ioctl) do. */
2288	drm_gem_object_reference(obj);
2289	ret = i915_gem_object_put_fence_reg(obj);
2290	drm_gem_object_unreference(obj);
2291	if (ret != 0)
2292		return ret;
2293
2294	return i;
2295}
2296
2297/**
2298 * i915_gem_object_get_fence_reg - set up a fence reg for an object
2299 * @obj: object to map through a fence reg
2300 *
2301 * When mapping objects through the GTT, userspace wants to be able to write
2302 * to them without having to worry about swizzling if the object is tiled.
2303 *
2304 * This function walks the fence regs looking for a free one for @obj,
2305 * stealing one if it can't find any.
2306 *
2307 * It then sets up the reg based on the object's properties: address, pitch
2308 * and tiling format.
2309 */
2310int
2311i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
2312{
2313	struct drm_device *dev = obj->dev;
2314	struct drm_i915_private *dev_priv = dev->dev_private;
2315	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2316	struct drm_i915_fence_reg *reg = NULL;
2317	int ret;
2318
2319	/* Just update our place in the LRU if our fence is getting used. */
2320	if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
2321		reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2322		list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2323		return 0;
2324	}
2325
2326	switch (obj_priv->tiling_mode) {
2327	case I915_TILING_NONE:
2328		WARN(1, "allocating a fence for non-tiled object?\n");
2329		break;
2330	case I915_TILING_X:
2331		if (!obj_priv->stride)
2332			return -EINVAL;
2333		WARN((obj_priv->stride & (512 - 1)),
2334		     "object 0x%08x is X tiled but has non-512B pitch\n",
2335		     obj_priv->gtt_offset);
2336		break;
2337	case I915_TILING_Y:
2338		if (!obj_priv->stride)
2339			return -EINVAL;
2340		WARN((obj_priv->stride & (128 - 1)),
2341		     "object 0x%08x is Y tiled but has non-128B pitch\n",
2342		     obj_priv->gtt_offset);
2343		break;
2344	}
2345
2346	ret = i915_find_fence_reg(dev);
2347	if (ret < 0)
2348		return ret;
2349
2350	obj_priv->fence_reg = ret;
2351	reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2352	list_add_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2353
2354	reg->obj = obj;
2355
2356	switch (INTEL_INFO(dev)->gen) {
2357	case 6:
2358		sandybridge_write_fence_reg(reg);
2359		break;
2360	case 5:
2361	case 4:
2362		i965_write_fence_reg(reg);
2363		break;
2364	case 3:
2365		i915_write_fence_reg(reg);
2366		break;
2367	case 2:
2368		i830_write_fence_reg(reg);
2369		break;
2370	}
2371
2372	trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
2373			obj_priv->tiling_mode);
2374
2375	return 0;
2376}
2377
2378/**
2379 * i915_gem_clear_fence_reg - clear out fence register info
2380 * @obj: object to clear
2381 *
2382 * Zeroes out the fence register itself and clears out the associated
2383 * data structures in dev_priv and obj_priv.
2384 */
2385static void
2386i915_gem_clear_fence_reg(struct drm_gem_object *obj)
2387{
2388	struct drm_device *dev = obj->dev;
2389	drm_i915_private_t *dev_priv = dev->dev_private;
2390	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2391	struct drm_i915_fence_reg *reg =
2392		&dev_priv->fence_regs[obj_priv->fence_reg];
2393	uint32_t fence_reg;
2394
2395	switch (INTEL_INFO(dev)->gen) {
2396	case 6:
2397		I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
2398			     (obj_priv->fence_reg * 8), 0);
2399		break;
2400	case 5:
2401	case 4:
2402		I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
2403		break;
2404	case 3:
2405		if (obj_priv->fence_reg >= 8)
2406			fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 8) * 4;
2407		else
2408	case 2:
2409			fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2410
2411		I915_WRITE(fence_reg, 0);
2412		break;
2413	}
2414
2415	reg->obj = NULL;
2416	obj_priv->fence_reg = I915_FENCE_REG_NONE;
2417	list_del_init(&reg->lru_list);
2418}
2419
2420/**
2421 * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2422 * to the buffer to finish, and then resets the fence register.
2423 * @obj: tiled object holding a fence register.
2424 *
2425 * Zeroes out the fence register itself and clears out the associated
2426 * data structures in dev_priv and obj_priv.
2427 */
2428int
2429i915_gem_object_put_fence_reg(struct drm_gem_object *obj)
2430{
2431	struct drm_device *dev = obj->dev;
2432	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2433
2434	if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
2435		return 0;
2436
2437	/* If we've changed tiling, GTT-mappings of the object
2438	 * need to re-fault to ensure that the correct fence register
2439	 * setup is in place.
2440	 */
2441	i915_gem_release_mmap(obj);
2442
2443	/* On the i915, GPU access to tiled buffers is via a fence,
2444	 * therefore we must wait for any outstanding access to complete
2445	 * before clearing the fence.
2446	 */
2447	if (!IS_I965G(dev)) {
2448		int ret;
2449
2450		ret = i915_gem_object_flush_gpu_write_domain(obj);
2451		if (ret != 0)
2452			return ret;
2453
2454		ret = i915_gem_object_wait_rendering(obj);
2455		if (ret != 0)
2456			return ret;
2457	}
2458
2459	i915_gem_object_flush_gtt_write_domain(obj);
2460	i915_gem_clear_fence_reg (obj);
2461
2462	return 0;
2463}
2464
2465/**
2466 * Finds free space in the GTT aperture and binds the object there.
2467 */
2468static int
2469i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2470{
2471	struct drm_device *dev = obj->dev;
2472	drm_i915_private_t *dev_priv = dev->dev_private;
2473	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2474	struct drm_mm_node *free_space;
2475	gfp_t gfpmask =  __GFP_NORETRY | __GFP_NOWARN;
2476	int ret;
2477
2478	if (obj_priv->madv != I915_MADV_WILLNEED) {
2479		DRM_ERROR("Attempting to bind a purgeable object\n");
2480		return -EINVAL;
2481	}
2482
2483	if (alignment == 0)
2484		alignment = i915_gem_get_gtt_alignment(obj);
2485	if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
2486		DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2487		return -EINVAL;
2488	}
2489
2490	/* If the object is bigger than the entire aperture, reject it early
2491	 * before evicting everything in a vain attempt to find space.
2492	 */
2493	if (obj->size > dev->gtt_total) {
2494		DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2495		return -E2BIG;
2496	}
2497
2498 search_free:
2499	free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2500					obj->size, alignment, 0);
2501	if (free_space != NULL) {
2502		obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
2503						       alignment);
2504		if (obj_priv->gtt_space != NULL)
2505			obj_priv->gtt_offset = obj_priv->gtt_space->start;
2506	}
2507	if (obj_priv->gtt_space == NULL) {
2508		/* If the gtt is empty and we're still having trouble
2509		 * fitting our object in, we're out of memory.
2510		 */
2511#if WATCH_LRU
2512		DRM_INFO("%s: GTT full, evicting something\n", __func__);
2513#endif
2514		ret = i915_gem_evict_something(dev, obj->size, alignment);
2515		if (ret)
2516			return ret;
2517
2518		goto search_free;
2519	}
2520
2521#if WATCH_BUF
2522	DRM_INFO("Binding object of size %zd at 0x%08x\n",
2523		 obj->size, obj_priv->gtt_offset);
2524#endif
2525	ret = i915_gem_object_get_pages(obj, gfpmask);
2526	if (ret) {
2527		drm_mm_put_block(obj_priv->gtt_space);
2528		obj_priv->gtt_space = NULL;
2529
2530		if (ret == -ENOMEM) {
2531			/* first try to clear up some space from the GTT */
2532			ret = i915_gem_evict_something(dev, obj->size,
2533						       alignment);
2534			if (ret) {
2535				/* now try to shrink everyone else */
2536				if (gfpmask) {
2537					gfpmask = 0;
2538					goto search_free;
2539				}
2540
2541				return ret;
2542			}
2543
2544			goto search_free;
2545		}
2546
2547		return ret;
2548	}
2549
2550	/* Create an AGP memory structure pointing at our pages, and bind it
2551	 * into the GTT.
2552	 */
2553	obj_priv->agp_mem = drm_agp_bind_pages(dev,
2554					       obj_priv->pages,
2555					       obj->size >> PAGE_SHIFT,
2556					       obj_priv->gtt_offset,
2557					       obj_priv->agp_type);
2558	if (obj_priv->agp_mem == NULL) {
2559		i915_gem_object_put_pages(obj);
2560		drm_mm_put_block(obj_priv->gtt_space);
2561		obj_priv->gtt_space = NULL;
2562
2563		ret = i915_gem_evict_something(dev, obj->size, alignment);
2564		if (ret)
2565			return ret;
2566
2567		goto search_free;
2568	}
2569	atomic_inc(&dev->gtt_count);
2570	atomic_add(obj->size, &dev->gtt_memory);
2571
2572	/* keep track of bounds object by adding it to the inactive list */
2573	list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
2574
2575	/* Assert that the object is not currently in any GPU domain. As it
2576	 * wasn't in the GTT, there shouldn't be any way it could have been in
2577	 * a GPU cache
2578	 */
2579	BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2580	BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2581
2582	trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);
2583
2584	return 0;
2585}
2586
2587void
2588i915_gem_clflush_object(struct drm_gem_object *obj)
2589{
2590	struct drm_i915_gem_object	*obj_priv = to_intel_bo(obj);
2591
2592	/* If we don't have a page list set up, then we're not pinned
2593	 * to GPU, and we can ignore the cache flush because it'll happen
2594	 * again at bind time.
2595	 */
2596	if (obj_priv->pages == NULL)
2597		return;
2598
2599	trace_i915_gem_object_clflush(obj);
2600
2601	drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
2602}
2603
2604/** Flushes any GPU write domain for the object if it's dirty. */
2605static int
2606i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
2607{
2608	struct drm_device *dev = obj->dev;
2609	uint32_t old_write_domain;
2610	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2611
2612	if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2613		return 0;
2614
2615	/* Queue the GPU write cache flushing we need. */
2616	old_write_domain = obj->write_domain;
2617	i915_gem_flush(dev, 0, obj->write_domain);
2618	if (i915_add_request(dev, NULL, obj->write_domain, obj_priv->ring) == 0)
2619		return -ENOMEM;
2620
2621	trace_i915_gem_object_change_domain(obj,
2622					    obj->read_domains,
2623					    old_write_domain);
2624	return 0;
2625}
2626
2627/** Flushes the GTT write domain for the object if it's dirty. */
2628static void
2629i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2630{
2631	uint32_t old_write_domain;
2632
2633	if (obj->write_domain != I915_GEM_DOMAIN_GTT)
2634		return;
2635
2636	/* No actual flushing is required for the GTT write domain.   Writes
2637	 * to it immediately go to main memory as far as we know, so there's
2638	 * no chipset flush.  It also doesn't land in render cache.
2639	 */
2640	old_write_domain = obj->write_domain;
2641	obj->write_domain = 0;
2642
2643	trace_i915_gem_object_change_domain(obj,
2644					    obj->read_domains,
2645					    old_write_domain);
2646}
2647
2648/** Flushes the CPU write domain for the object if it's dirty. */
2649static void
2650i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
2651{
2652	struct drm_device *dev = obj->dev;
2653	uint32_t old_write_domain;
2654
2655	if (obj->write_domain != I915_GEM_DOMAIN_CPU)
2656		return;
2657
2658	i915_gem_clflush_object(obj);
2659	drm_agp_chipset_flush(dev);
2660	old_write_domain = obj->write_domain;
2661	obj->write_domain = 0;
2662
2663	trace_i915_gem_object_change_domain(obj,
2664					    obj->read_domains,
2665					    old_write_domain);
2666}
2667
2668int
2669i915_gem_object_flush_write_domain(struct drm_gem_object *obj)
2670{
2671	int ret = 0;
2672
2673	switch (obj->write_domain) {
2674	case I915_GEM_DOMAIN_GTT:
2675		i915_gem_object_flush_gtt_write_domain(obj);
2676		break;
2677	case I915_GEM_DOMAIN_CPU:
2678		i915_gem_object_flush_cpu_write_domain(obj);
2679		break;
2680	default:
2681		ret = i915_gem_object_flush_gpu_write_domain(obj);
2682		break;
2683	}
2684
2685	return ret;
2686}
2687
2688/**
2689 * Moves a single object to the GTT read, and possibly write domain.
2690 *
2691 * This function returns when the move is complete, including waiting on
2692 * flushes to occur.
2693 */
2694int
2695i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2696{
2697	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2698	uint32_t old_write_domain, old_read_domains;
2699	int ret;
2700
2701	/* Not valid to be called on unbound objects. */
2702	if (obj_priv->gtt_space == NULL)
2703		return -EINVAL;
2704
2705	ret = i915_gem_object_flush_gpu_write_domain(obj);
2706	if (ret != 0)
2707		return ret;
2708
2709	/* Wait on any GPU rendering and flushing to occur. */
2710	ret = i915_gem_object_wait_rendering(obj);
2711	if (ret != 0)
2712		return ret;
2713
2714	old_write_domain = obj->write_domain;
2715	old_read_domains = obj->read_domains;
2716
2717	/* If we're writing through the GTT domain, then CPU and GPU caches
2718	 * will need to be invalidated at next use.
2719	 */
2720	if (write)
2721		obj->read_domains &= I915_GEM_DOMAIN_GTT;
2722
2723	i915_gem_object_flush_cpu_write_domain(obj);
2724
2725	/* It should now be out of any other write domains, and we can update
2726	 * the domain values for our changes.
2727	 */
2728	BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2729	obj->read_domains |= I915_GEM_DOMAIN_GTT;
2730	if (write) {
2731		obj->write_domain = I915_GEM_DOMAIN_GTT;
2732		obj_priv->dirty = 1;
2733	}
2734
2735	trace_i915_gem_object_change_domain(obj,
2736					    old_read_domains,
2737					    old_write_domain);
2738
2739	return 0;
2740}
2741
2742/*
2743 * Prepare buffer for display plane. Use uninterruptible for possible flush
2744 * wait, as in modesetting process we're not supposed to be interrupted.
2745 */
2746int
2747i915_gem_object_set_to_display_plane(struct drm_gem_object *obj)
2748{
2749	struct drm_device *dev = obj->dev;
2750	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2751	uint32_t old_write_domain, old_read_domains;
2752	int ret;
2753
2754	/* Not valid to be called on unbound objects. */
2755	if (obj_priv->gtt_space == NULL)
2756		return -EINVAL;
2757
2758	ret = i915_gem_object_flush_gpu_write_domain(obj);
2759	if (ret)
2760		return ret;
2761
2762	/* Wait on any GPU rendering and flushing to occur. */
2763	if (obj_priv->active) {
2764#if WATCH_BUF
2765		DRM_INFO("%s: object %p wait for seqno %08x\n",
2766			  __func__, obj, obj_priv->last_rendering_seqno);
2767#endif
2768		ret = i915_do_wait_request(dev,
2769				obj_priv->last_rendering_seqno,
2770				0,
2771				obj_priv->ring);
2772		if (ret != 0)
2773			return ret;
2774	}
2775
2776	i915_gem_object_flush_cpu_write_domain(obj);
2777
2778	old_write_domain = obj->write_domain;
2779	old_read_domains = obj->read_domains;
2780
2781	/* It should now be out of any other write domains, and we can update
2782	 * the domain values for our changes.
2783	 */
2784	BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2785	obj->read_domains = I915_GEM_DOMAIN_GTT;
2786	obj->write_domain = I915_GEM_DOMAIN_GTT;
2787	obj_priv->dirty = 1;
2788
2789	trace_i915_gem_object_change_domain(obj,
2790					    old_read_domains,
2791					    old_write_domain);
2792
2793	return 0;
2794}
2795
2796/**
2797 * Moves a single object to the CPU read, and possibly write domain.
2798 *
2799 * This function returns when the move is complete, including waiting on
2800 * flushes to occur.
2801 */
2802static int
2803i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
2804{
2805	uint32_t old_write_domain, old_read_domains;
2806	int ret;
2807
2808	ret = i915_gem_object_flush_gpu_write_domain(obj);
2809	if (ret)
2810		return ret;
2811
2812	/* Wait on any GPU rendering and flushing to occur. */
2813	ret = i915_gem_object_wait_rendering(obj);
2814	if (ret != 0)
2815		return ret;
2816
2817	i915_gem_object_flush_gtt_write_domain(obj);
2818
2819	/* If we have a partially-valid cache of the object in the CPU,
2820	 * finish invalidating it and free the per-page flags.
2821	 */
2822	i915_gem_object_set_to_full_cpu_read_domain(obj);
2823
2824	old_write_domain = obj->write_domain;
2825	old_read_domains = obj->read_domains;
2826
2827	/* Flush the CPU cache if it's still invalid. */
2828	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2829		i915_gem_clflush_object(obj);
2830
2831		obj->read_domains |= I915_GEM_DOMAIN_CPU;
2832	}
2833
2834	/* It should now be out of any other write domains, and we can update
2835	 * the domain values for our changes.
2836	 */
2837	BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2838
2839	/* If we're writing through the CPU, then the GPU read domains will
2840	 * need to be invalidated at next use.
2841	 */
2842	if (write) {
2843		obj->read_domains &= I915_GEM_DOMAIN_CPU;
2844		obj->write_domain = I915_GEM_DOMAIN_CPU;
2845	}
2846
2847	trace_i915_gem_object_change_domain(obj,
2848					    old_read_domains,
2849					    old_write_domain);
2850
2851	return 0;
2852}
2853
2854/*
2855 * Set the next domain for the specified object. This
2856 * may not actually perform the necessary flushing/invaliding though,
2857 * as that may want to be batched with other set_domain operations
2858 *
2859 * This is (we hope) the only really tricky part of gem. The goal
2860 * is fairly simple -- track which caches hold bits of the object
2861 * and make sure they remain coherent. A few concrete examples may
2862 * help to explain how it works. For shorthand, we use the notation
2863 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
2864 * a pair of read and write domain masks.
2865 *
2866 * Case 1: the batch buffer
2867 *
2868 *	1. Allocated
2869 *	2. Written by CPU
2870 *	3. Mapped to GTT
2871 *	4. Read by GPU
2872 *	5. Unmapped from GTT
2873 *	6. Freed
2874 *
2875 *	Let's take these a step at a time
2876 *
2877 *	1. Allocated
2878 *		Pages allocated from the kernel may still have
2879 *		cache contents, so we set them to (CPU, CPU) always.
2880 *	2. Written by CPU (using pwrite)
2881 *		The pwrite function calls set_domain (CPU, CPU) and
2882 *		this function does nothing (as nothing changes)
2883 *	3. Mapped by GTT
2884 *		This function asserts that the object is not
2885 *		currently in any GPU-based read or write domains
2886 *	4. Read by GPU
2887 *		i915_gem_execbuffer calls set_domain (COMMAND, 0).
2888 *		As write_domain is zero, this function adds in the
2889 *		current read domains (CPU+COMMAND, 0).
2890 *		flush_domains is set to CPU.
2891 *		invalidate_domains is set to COMMAND
2892 *		clflush is run to get data out of the CPU caches
2893 *		then i915_dev_set_domain calls i915_gem_flush to
2894 *		emit an MI_FLUSH and drm_agp_chipset_flush
2895 *	5. Unmapped from GTT
2896 *		i915_gem_object_unbind calls set_domain (CPU, CPU)
2897 *		flush_domains and invalidate_domains end up both zero
2898 *		so no flushing/invalidating happens
2899 *	6. Freed
2900 *		yay, done
2901 *
2902 * Case 2: The shared render buffer
2903 *
2904 *	1. Allocated
2905 *	2. Mapped to GTT
2906 *	3. Read/written by GPU
2907 *	4. set_domain to (CPU,CPU)
2908 *	5. Read/written by CPU
2909 *	6. Read/written by GPU
2910 *
2911 *	1. Allocated
2912 *		Same as last example, (CPU, CPU)
2913 *	2. Mapped to GTT
2914 *		Nothing changes (assertions find that it is not in the GPU)
2915 *	3. Read/written by GPU
2916 *		execbuffer calls set_domain (RENDER, RENDER)
2917 *		flush_domains gets CPU
2918 *		invalidate_domains gets GPU
2919 *		clflush (obj)
2920 *		MI_FLUSH and drm_agp_chipset_flush
2921 *	4. set_domain (CPU, CPU)
2922 *		flush_domains gets GPU
2923 *		invalidate_domains gets CPU
2924 *		wait_rendering (obj) to make sure all drawing is complete.
2925 *		This will include an MI_FLUSH to get the data from GPU
2926 *		to memory
2927 *		clflush (obj) to invalidate the CPU cache
2928 *		Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
2929 *	5. Read/written by CPU
2930 *		cache lines are loaded and dirtied
2931 *	6. Read written by GPU
2932 *		Same as last GPU access
2933 *
2934 * Case 3: The constant buffer
2935 *
2936 *	1. Allocated
2937 *	2. Written by CPU
2938 *	3. Read by GPU
2939 *	4. Updated (written) by CPU again
2940 *	5. Read by GPU
2941 *
2942 *	1. Allocated
2943 *		(CPU, CPU)
2944 *	2. Written by CPU
2945 *		(CPU, CPU)
2946 *	3. Read by GPU
2947 *		(CPU+RENDER, 0)
2948 *		flush_domains = CPU
2949 *		invalidate_domains = RENDER
2950 *		clflush (obj)
2951 *		MI_FLUSH
2952 *		drm_agp_chipset_flush
2953 *	4. Updated (written) by CPU again
2954 *		(CPU, CPU)
2955 *		flush_domains = 0 (no previous write domain)
2956 *		invalidate_domains = 0 (no new read domains)
2957 *	5. Read by GPU
2958 *		(CPU+RENDER, 0)
2959 *		flush_domains = CPU
2960 *		invalidate_domains = RENDER
2961 *		clflush (obj)
2962 *		MI_FLUSH
2963 *		drm_agp_chipset_flush
2964 */
2965static void
2966i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
2967{
2968	struct drm_device		*dev = obj->dev;
2969	drm_i915_private_t		*dev_priv = dev->dev_private;
2970	struct drm_i915_gem_object	*obj_priv = to_intel_bo(obj);
2971	uint32_t			invalidate_domains = 0;
2972	uint32_t			flush_domains = 0;
2973	uint32_t			old_read_domains;
2974
2975	BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
2976	BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
2977
2978	intel_mark_busy(dev, obj);
2979
2980#if WATCH_BUF
2981	DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
2982		 __func__, obj,
2983		 obj->read_domains, obj->pending_read_domains,
2984		 obj->write_domain, obj->pending_write_domain);
2985#endif
2986	/*
2987	 * If the object isn't moving to a new write domain,
2988	 * let the object stay in multiple read domains
2989	 */
2990	if (obj->pending_write_domain == 0)
2991		obj->pending_read_domains |= obj->read_domains;
2992	else
2993		obj_priv->dirty = 1;
2994
2995	/*
2996	 * Flush the current write domain if
2997	 * the new read domains don't match. Invalidate
2998	 * any read domains which differ from the old
2999	 * write domain
3000	 */
3001	if (obj->write_domain &&
3002	    obj->write_domain != obj->pending_read_domains) {
3003		flush_domains |= obj->write_domain;
3004		invalidate_domains |=
3005			obj->pending_read_domains & ~obj->write_domain;
3006	}
3007	/*
3008	 * Invalidate any read caches which may have
3009	 * stale data. That is, any new read domains.
3010	 */
3011	invalidate_domains |= obj->pending_read_domains & ~obj->read_domains;
3012	if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) {
3013#if WATCH_BUF
3014		DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n",
3015			 __func__, flush_domains, invalidate_domains);
3016#endif
3017		i915_gem_clflush_object(obj);
3018	}
3019
3020	old_read_domains = obj->read_domains;
3021
3022	/* The actual obj->write_domain will be updated with
3023	 * pending_write_domain after we emit the accumulated flush for all
3024	 * of our domain changes in execbuffers (which clears objects'
3025	 * write_domains).  So if we have a current write domain that we
3026	 * aren't changing, set pending_write_domain to that.
3027	 */
3028	if (flush_domains == 0 && obj->pending_write_domain == 0)
3029		obj->pending_write_domain = obj->write_domain;
3030	obj->read_domains = obj->pending_read_domains;
3031
3032	if (flush_domains & I915_GEM_GPU_DOMAINS) {
3033		if (obj_priv->ring == &dev_priv->render_ring)
3034			dev_priv->flush_rings |= FLUSH_RENDER_RING;
3035		else if (obj_priv->ring == &dev_priv->bsd_ring)
3036			dev_priv->flush_rings |= FLUSH_BSD_RING;
3037	}
3038
3039	dev->invalidate_domains |= invalidate_domains;
3040	dev->flush_domains |= flush_domains;
3041#if WATCH_BUF
3042	DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
3043		 __func__,
3044		 obj->read_domains, obj->write_domain,
3045		 dev->invalidate_domains, dev->flush_domains);
3046#endif
3047
3048	trace_i915_gem_object_change_domain(obj,
3049					    old_read_domains,
3050					    obj->write_domain);
3051}
3052
3053/**
3054 * Moves the object from a partially CPU read to a full one.
3055 *
3056 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3057 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3058 */
3059static void
3060i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
3061{
3062	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3063
3064	if (!obj_priv->page_cpu_valid)
3065		return;
3066
3067	/* If we're partially in the CPU read domain, finish moving it in.
3068	 */
3069	if (obj->read_domains & I915_GEM_DOMAIN_CPU) {
3070		int i;
3071
3072		for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) {
3073			if (obj_priv->page_cpu_valid[i])
3074				continue;
3075			drm_clflush_pages(obj_priv->pages + i, 1);
3076		}
3077	}
3078
3079	/* Free the page_cpu_valid mappings which are now stale, whether
3080	 * or not we've got I915_GEM_DOMAIN_CPU.
3081	 */
3082	kfree(obj_priv->page_cpu_valid);
3083	obj_priv->page_cpu_valid = NULL;
3084}
3085
3086/**
3087 * Set the CPU read domain on a range of the object.
3088 *
3089 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3090 * not entirely valid.  The page_cpu_valid member of the object flags which
3091 * pages have been flushed, and will be respected by
3092 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3093 * of the whole object.
3094 *
3095 * This function returns when the move is complete, including waiting on
3096 * flushes to occur.
3097 */
3098static int
3099i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
3100					  uint64_t offset, uint64_t size)
3101{
3102	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3103	uint32_t old_read_domains;
3104	int i, ret;
3105
3106	if (offset == 0 && size == obj->size)
3107		return i915_gem_object_set_to_cpu_domain(obj, 0);
3108
3109	ret = i915_gem_object_flush_gpu_write_domain(obj);
3110	if (ret)
3111		return ret;
3112
3113	/* Wait on any GPU rendering and flushing to occur. */
3114	ret = i915_gem_object_wait_rendering(obj);
3115	if (ret != 0)
3116		return ret;
3117	i915_gem_object_flush_gtt_write_domain(obj);
3118
3119	/* If we're already fully in the CPU read domain, we're done. */
3120	if (obj_priv->page_cpu_valid == NULL &&
3121	    (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0)
3122		return 0;
3123
3124	/* Otherwise, create/clear the per-page CPU read domain flag if we're
3125	 * newly adding I915_GEM_DOMAIN_CPU
3126	 */
3127	if (obj_priv->page_cpu_valid == NULL) {
3128		obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
3129						   GFP_KERNEL);
3130		if (obj_priv->page_cpu_valid == NULL)
3131			return -ENOMEM;
3132	} else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
3133		memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
3134
3135	/* Flush the cache on any pages that are still invalid from the CPU's
3136	 * perspective.
3137	 */
3138	for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3139	     i++) {
3140		if (obj_priv->page_cpu_valid[i])
3141			continue;
3142
3143		drm_clflush_pages(obj_priv->pages + i, 1);
3144
3145		obj_priv->page_cpu_valid[i] = 1;
3146	}
3147
3148	/* It should now be out of any other write domains, and we can update
3149	 * the domain values for our changes.
3150	 */
3151	BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3152
3153	old_read_domains = obj->read_domains;
3154	obj->read_domains |= I915_GEM_DOMAIN_CPU;
3155
3156	trace_i915_gem_object_change_domain(obj,
3157					    old_read_domains,
3158					    obj->write_domain);
3159
3160	return 0;
3161}
3162
3163/**
3164 * Pin an object to the GTT and evaluate the relocations landing in it.
3165 */
3166static int
3167i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3168				 struct drm_file *file_priv,
3169				 struct drm_i915_gem_exec_object2 *entry,
3170				 struct drm_i915_gem_relocation_entry *relocs)
3171{
3172	struct drm_device *dev = obj->dev;
3173	drm_i915_private_t *dev_priv = dev->dev_private;
3174	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3175	int i, ret;
3176	void __iomem *reloc_page;
3177	bool need_fence;
3178
3179	need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3180	             obj_priv->tiling_mode != I915_TILING_NONE;
3181
3182	/* Check fence reg constraints and rebind if necessary */
3183	if (need_fence &&
3184	    !i915_gem_object_fence_offset_ok(obj,
3185					     obj_priv->tiling_mode)) {
3186		ret = i915_gem_object_unbind(obj);
3187		if (ret)
3188			return ret;
3189	}
3190
3191	/* Choose the GTT offset for our buffer and put it there. */
3192	ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
3193	if (ret)
3194		return ret;
3195
3196	/*
3197	 * Pre-965 chips need a fence register set up in order to
3198	 * properly handle blits to/from tiled surfaces.
3199	 */
3200	if (need_fence) {
3201		ret = i915_gem_object_get_fence_reg(obj);
3202		if (ret != 0) {
3203			i915_gem_object_unpin(obj);
3204			return ret;
3205		}
3206	}
3207
3208	entry->offset = obj_priv->gtt_offset;
3209
3210	/* Apply the relocations, using the GTT aperture to avoid cache
3211	 * flushing requirements.
3212	 */
3213	for (i = 0; i < entry->relocation_count; i++) {
3214		struct drm_i915_gem_relocation_entry *reloc= &relocs[i];
3215		struct drm_gem_object *target_obj;
3216		struct drm_i915_gem_object *target_obj_priv;
3217		uint32_t reloc_val, reloc_offset;
3218		uint32_t __iomem *reloc_entry;
3219
3220		target_obj = drm_gem_object_lookup(obj->dev, file_priv,
3221						   reloc->target_handle);
3222		if (target_obj == NULL) {
3223			i915_gem_object_unpin(obj);
3224			return -ENOENT;
3225		}
3226		target_obj_priv = to_intel_bo(target_obj);
3227
3228#if WATCH_RELOC
3229		DRM_INFO("%s: obj %p offset %08x target %d "
3230			 "read %08x write %08x gtt %08x "
3231			 "presumed %08x delta %08x\n",
3232			 __func__,
3233			 obj,
3234			 (int) reloc->offset,
3235			 (int) reloc->target_handle,
3236			 (int) reloc->read_domains,
3237			 (int) reloc->write_domain,
3238			 (int) target_obj_priv->gtt_offset,
3239			 (int) reloc->presumed_offset,
3240			 reloc->delta);
3241#endif
3242
3243		/* The target buffer should have appeared before us in the
3244		 * exec_object list, so it should have a GTT space bound by now.
3245		 */
3246		if (target_obj_priv->gtt_space == NULL) {
3247			DRM_ERROR("No GTT space found for object %d\n",
3248				  reloc->target_handle);
3249			drm_gem_object_unreference(target_obj);
3250			i915_gem_object_unpin(obj);
3251			return -EINVAL;
3252		}
3253
3254		/* Validate that the target is in a valid r/w GPU domain */
3255		if (reloc->write_domain & (reloc->write_domain - 1)) {
3256			DRM_ERROR("reloc with multiple write domains: "
3257				  "obj %p target %d offset %d "
3258				  "read %08x write %08x",
3259				  obj, reloc->target_handle,
3260				  (int) reloc->offset,
3261				  reloc->read_domains,
3262				  reloc->write_domain);
3263			drm_gem_object_unreference(target_obj);
3264			i915_gem_object_unpin(obj);
3265			return -EINVAL;
3266		}
3267		if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
3268		    reloc->read_domains & I915_GEM_DOMAIN_CPU) {
3269			DRM_ERROR("reloc with read/write CPU domains: "
3270				  "obj %p target %d offset %d "
3271				  "read %08x write %08x",
3272				  obj, reloc->target_handle,
3273				  (int) reloc->offset,
3274				  reloc->read_domains,
3275				  reloc->write_domain);
3276			drm_gem_object_unreference(target_obj);
3277			i915_gem_object_unpin(obj);
3278			return -EINVAL;
3279		}
3280		if (reloc->write_domain && target_obj->pending_write_domain &&
3281		    reloc->write_domain != target_obj->pending_write_domain) {
3282			DRM_ERROR("Write domain conflict: "
3283				  "obj %p target %d offset %d "
3284				  "new %08x old %08x\n",
3285				  obj, reloc->target_handle,
3286				  (int) reloc->offset,
3287				  reloc->write_domain,
3288				  target_obj->pending_write_domain);
3289			drm_gem_object_unreference(target_obj);
3290			i915_gem_object_unpin(obj);
3291			return -EINVAL;
3292		}
3293
3294		target_obj->pending_read_domains |= reloc->read_domains;
3295		target_obj->pending_write_domain |= reloc->write_domain;
3296
3297		/* If the relocation already has the right value in it, no
3298		 * more work needs to be done.
3299		 */
3300		if (target_obj_priv->gtt_offset == reloc->presumed_offset) {
3301			drm_gem_object_unreference(target_obj);
3302			continue;
3303		}
3304
3305		/* Check that the relocation address is valid... */
3306		if (reloc->offset > obj->size - 4) {
3307			DRM_ERROR("Relocation beyond object bounds: "
3308				  "obj %p target %d offset %d size %d.\n",
3309				  obj, reloc->target_handle,
3310				  (int) reloc->offset, (int) obj->size);
3311			drm_gem_object_unreference(target_obj);
3312			i915_gem_object_unpin(obj);
3313			return -EINVAL;
3314		}
3315		if (reloc->offset & 3) {
3316			DRM_ERROR("Relocation not 4-byte aligned: "
3317				  "obj %p target %d offset %d.\n",
3318				  obj, reloc->target_handle,
3319				  (int) reloc->offset);
3320			drm_gem_object_unreference(target_obj);
3321			i915_gem_object_unpin(obj);
3322			return -EINVAL;
3323		}
3324
3325		/* and points to somewhere within the target object. */
3326		if (reloc->delta >= target_obj->size) {
3327			DRM_ERROR("Relocation beyond target object bounds: "
3328				  "obj %p target %d delta %d size %d.\n",
3329				  obj, reloc->target_handle,
3330				  (int) reloc->delta, (int) target_obj->size);
3331			drm_gem_object_unreference(target_obj);
3332			i915_gem_object_unpin(obj);
3333			return -EINVAL;
3334		}
3335
3336		ret = i915_gem_object_set_to_gtt_domain(obj, 1);
3337		if (ret != 0) {
3338			drm_gem_object_unreference(target_obj);
3339			i915_gem_object_unpin(obj);
3340			return -EINVAL;
3341		}
3342
3343		/* Map the page containing the relocation we're going to
3344		 * perform.
3345		 */
3346		reloc_offset = obj_priv->gtt_offset + reloc->offset;
3347		reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3348						      (reloc_offset &
3349						       ~(PAGE_SIZE - 1)),
3350						      KM_USER0);
3351		reloc_entry = (uint32_t __iomem *)(reloc_page +
3352						   (reloc_offset & (PAGE_SIZE - 1)));
3353		reloc_val = target_obj_priv->gtt_offset + reloc->delta;
3354
3355#if WATCH_BUF
3356		DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n",
3357			  obj, (unsigned int) reloc->offset,
3358			  readl(reloc_entry), reloc_val);
3359#endif
3360		writel(reloc_val, reloc_entry);
3361		io_mapping_unmap_atomic(reloc_page, KM_USER0);
3362
3363		/* The updated presumed offset for this entry will be
3364		 * copied back out to the user.
3365		 */
3366		reloc->presumed_offset = target_obj_priv->gtt_offset;
3367
3368		drm_gem_object_unreference(target_obj);
3369	}
3370
3371#if WATCH_BUF
3372	if (0)
3373		i915_gem_dump_object(obj, 128, __func__, ~0);
3374#endif
3375	return 0;
3376}
3377
3378/* Throttle our rendering by waiting until the ring has completed our requests
3379 * emitted over 20 msec ago.
3380 *
3381 * Note that if we were to use the current jiffies each time around the loop,
3382 * we wouldn't escape the function with any frames outstanding if the time to
3383 * render a frame was over 20ms.
3384 *
3385 * This should get us reasonable parallelism between CPU and GPU but also
3386 * relatively low latency when blocking on a particular request to finish.
3387 */
3388static int
3389i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
3390{
3391	struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
3392	int ret = 0;
3393	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3394
3395	mutex_lock(&dev->struct_mutex);
3396	while (!list_empty(&i915_file_priv->mm.request_list)) {
3397		struct drm_i915_gem_request *request;
3398
3399		request = list_first_entry(&i915_file_priv->mm.request_list,
3400					   struct drm_i915_gem_request,
3401					   client_list);
3402
3403		if (time_after_eq(request->emitted_jiffies, recent_enough))
3404			break;
3405
3406		ret = i915_wait_request(dev, request->seqno, request->ring);
3407		if (ret != 0)
3408			break;
3409	}
3410	mutex_unlock(&dev->struct_mutex);
3411
3412	return ret;
3413}
3414
3415static int
3416i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list,
3417			      uint32_t buffer_count,
3418			      struct drm_i915_gem_relocation_entry **relocs)
3419{
3420	uint32_t reloc_count = 0, reloc_index = 0, i;
3421	int ret;
3422
3423	*relocs = NULL;
3424	for (i = 0; i < buffer_count; i++) {
3425		if (reloc_count + exec_list[i].relocation_count < reloc_count)
3426			return -EINVAL;
3427		reloc_count += exec_list[i].relocation_count;
3428	}
3429
3430	*relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
3431	if (*relocs == NULL) {
3432		DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count);
3433		return -ENOMEM;
3434	}
3435
3436	for (i = 0; i < buffer_count; i++) {
3437		struct drm_i915_gem_relocation_entry __user *user_relocs;
3438
3439		user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3440
3441		ret = copy_from_user(&(*relocs)[reloc_index],
3442				     user_relocs,
3443				     exec_list[i].relocation_count *
3444				     sizeof(**relocs));
3445		if (ret != 0) {
3446			drm_free_large(*relocs);
3447			*relocs = NULL;
3448			return -EFAULT;
3449		}
3450
3451		reloc_index += exec_list[i].relocation_count;
3452	}
3453
3454	return 0;
3455}
3456
3457static int
3458i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list,
3459			    uint32_t buffer_count,
3460			    struct drm_i915_gem_relocation_entry *relocs)
3461{
3462	uint32_t reloc_count = 0, i;
3463	int ret = 0;
3464
3465	if (relocs == NULL)
3466	    return 0;
3467
3468	for (i = 0; i < buffer_count; i++) {
3469		struct drm_i915_gem_relocation_entry __user *user_relocs;
3470		int unwritten;
3471
3472		user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3473
3474		unwritten = copy_to_user(user_relocs,
3475					 &relocs[reloc_count],
3476					 exec_list[i].relocation_count *
3477					 sizeof(*relocs));
3478
3479		if (unwritten) {
3480			ret = -EFAULT;
3481			goto err;
3482		}
3483
3484		reloc_count += exec_list[i].relocation_count;
3485	}
3486
3487err:
3488	drm_free_large(relocs);
3489
3490	return ret;
3491}
3492
3493static int
3494i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec,
3495			   uint64_t exec_offset)
3496{
3497	uint32_t exec_start, exec_len;
3498
3499	exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3500	exec_len = (uint32_t) exec->batch_len;
3501
3502	if ((exec_start | exec_len) & 0x7)
3503		return -EINVAL;
3504
3505	if (!exec_start)
3506		return -EINVAL;
3507
3508	return 0;
3509}
3510
3511static int
3512i915_gem_wait_for_pending_flip(struct drm_device *dev,
3513			       struct drm_gem_object **object_list,
3514			       int count)
3515{
3516	drm_i915_private_t *dev_priv = dev->dev_private;
3517	struct drm_i915_gem_object *obj_priv;
3518	DEFINE_WAIT(wait);
3519	int i, ret = 0;
3520
3521	for (;;) {
3522		prepare_to_wait(&dev_priv->pending_flip_queue,
3523				&wait, TASK_INTERRUPTIBLE);
3524		for (i = 0; i < count; i++) {
3525			obj_priv = to_intel_bo(object_list[i]);
3526			if (atomic_read(&obj_priv->pending_flip) > 0)
3527				break;
3528		}
3529		if (i == count)
3530			break;
3531
3532		if (!signal_pending(current)) {
3533			mutex_unlock(&dev->struct_mutex);
3534			schedule();
3535			mutex_lock(&dev->struct_mutex);
3536			continue;
3537		}
3538		ret = -ERESTARTSYS;
3539		break;
3540	}
3541	finish_wait(&dev_priv->pending_flip_queue, &wait);
3542
3543	return ret;
3544}
3545
3546
3547int
3548i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3549		       struct drm_file *file_priv,
3550		       struct drm_i915_gem_execbuffer2 *args,
3551		       struct drm_i915_gem_exec_object2 *exec_list)
3552{
3553	drm_i915_private_t *dev_priv = dev->dev_private;
3554	struct drm_gem_object **object_list = NULL;
3555	struct drm_gem_object *batch_obj;
3556	struct drm_i915_gem_object *obj_priv;
3557	struct drm_clip_rect *cliprects = NULL;
3558	struct drm_i915_gem_relocation_entry *relocs = NULL;
3559	int ret = 0, ret2, i, pinned = 0;
3560	uint64_t exec_offset;
3561	uint32_t seqno, flush_domains, reloc_index;
3562	int pin_tries, flips;
3563
3564	struct intel_ring_buffer *ring = NULL;
3565
3566#if WATCH_EXEC
3567	DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3568		  (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3569#endif
3570	if (args->flags & I915_EXEC_BSD) {
3571		if (!HAS_BSD(dev)) {
3572			DRM_ERROR("execbuf with wrong flag\n");
3573			return -EINVAL;
3574		}
3575		ring = &dev_priv->bsd_ring;
3576	} else {
3577		ring = &dev_priv->render_ring;
3578	}
3579
3580	if (args->buffer_count < 1) {
3581		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3582		return -EINVAL;
3583	}
3584	object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
3585	if (object_list == NULL) {
3586		DRM_ERROR("Failed to allocate object list for %d buffers\n",
3587			  args->buffer_count);
3588		ret = -ENOMEM;
3589		goto pre_mutex_err;
3590	}
3591
3592	if (args->num_cliprects != 0) {
3593		cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3594				    GFP_KERNEL);
3595		if (cliprects == NULL) {
3596			ret = -ENOMEM;
3597			goto pre_mutex_err;
3598		}
3599
3600		ret = copy_from_user(cliprects,
3601				     (struct drm_clip_rect __user *)
3602				     (uintptr_t) args->cliprects_ptr,
3603				     sizeof(*cliprects) * args->num_cliprects);
3604		if (ret != 0) {
3605			DRM_ERROR("copy %d cliprects failed: %d\n",
3606				  args->num_cliprects, ret);
3607			ret = -EFAULT;
3608			goto pre_mutex_err;
3609		}
3610	}
3611
3612	ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count,
3613					    &relocs);
3614	if (ret != 0)
3615		goto pre_mutex_err;
3616
3617	mutex_lock(&dev->struct_mutex);
3618
3619	i915_verify_inactive(dev, __FILE__, __LINE__);
3620
3621	if (atomic_read(&dev_priv->mm.wedged)) {
3622		mutex_unlock(&dev->struct_mutex);
3623		ret = -EIO;
3624		goto pre_mutex_err;
3625	}
3626
3627	if (dev_priv->mm.suspended) {
3628		mutex_unlock(&dev->struct_mutex);
3629		ret = -EBUSY;
3630		goto pre_mutex_err;
3631	}
3632
3633	/* Look up object handles */
3634	flips = 0;
3635	for (i = 0; i < args->buffer_count; i++) {
3636		object_list[i] = drm_gem_object_lookup(dev, file_priv,
3637						       exec_list[i].handle);
3638		if (object_list[i] == NULL) {
3639			DRM_ERROR("Invalid object handle %d at index %d\n",
3640				   exec_list[i].handle, i);
3641			/* prevent error path from reading uninitialized data */
3642			args->buffer_count = i + 1;
3643			ret = -ENOENT;
3644			goto err;
3645		}
3646
3647		obj_priv = to_intel_bo(object_list[i]);
3648		if (obj_priv->in_execbuffer) {
3649			DRM_ERROR("Object %p appears more than once in object list\n",
3650				   object_list[i]);
3651			/* prevent error path from reading uninitialized data */
3652			args->buffer_count = i + 1;
3653			ret = -EINVAL;
3654			goto err;
3655		}
3656		obj_priv->in_execbuffer = true;
3657		flips += atomic_read(&obj_priv->pending_flip);
3658	}
3659
3660	if (flips > 0) {
3661		ret = i915_gem_wait_for_pending_flip(dev, object_list,
3662						     args->buffer_count);
3663		if (ret)
3664			goto err;
3665	}
3666
3667	/* Pin and relocate */
3668	for (pin_tries = 0; ; pin_tries++) {
3669		ret = 0;
3670		reloc_index = 0;
3671
3672		for (i = 0; i < args->buffer_count; i++) {
3673			object_list[i]->pending_read_domains = 0;
3674			object_list[i]->pending_write_domain = 0;
3675			ret = i915_gem_object_pin_and_relocate(object_list[i],
3676							       file_priv,
3677							       &exec_list[i],
3678							       &relocs[reloc_index]);
3679			if (ret)
3680				break;
3681			pinned = i + 1;
3682			reloc_index += exec_list[i].relocation_count;
3683		}
3684		/* success */
3685		if (ret == 0)
3686			break;
3687
3688		/* error other than GTT full, or we've already tried again */
3689		if (ret != -ENOSPC || pin_tries >= 1) {
3690			if (ret != -ERESTARTSYS) {
3691				unsigned long long total_size = 0;
3692				int num_fences = 0;
3693				for (i = 0; i < args->buffer_count; i++) {
3694					obj_priv = to_intel_bo(object_list[i]);
3695
3696					total_size += object_list[i]->size;
3697					num_fences +=
3698						exec_list[i].flags & EXEC_OBJECT_NEEDS_FENCE &&
3699						obj_priv->tiling_mode != I915_TILING_NONE;
3700				}
3701				DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes, %d fences: %d\n",
3702					  pinned+1, args->buffer_count,
3703					  total_size, num_fences,
3704					  ret);
3705				DRM_ERROR("%d objects [%d pinned], "
3706					  "%d object bytes [%d pinned], "
3707					  "%d/%d gtt bytes\n",
3708					  atomic_read(&dev->object_count),
3709					  atomic_read(&dev->pin_count),
3710					  atomic_read(&dev->object_memory),
3711					  atomic_read(&dev->pin_memory),
3712					  atomic_read(&dev->gtt_memory),
3713					  dev->gtt_total);
3714			}
3715			goto err;
3716		}
3717
3718		/* unpin all of our buffers */
3719		for (i = 0; i < pinned; i++)
3720			i915_gem_object_unpin(object_list[i]);
3721		pinned = 0;
3722
3723		/* evict everyone we can from the aperture */
3724		ret = i915_gem_evict_everything(dev);
3725		if (ret && ret != -ENOSPC)
3726			goto err;
3727	}
3728
3729	/* Set the pending read domains for the batch buffer to COMMAND */
3730	batch_obj = object_list[args->buffer_count-1];
3731	if (batch_obj->pending_write_domain) {
3732		DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3733		ret = -EINVAL;
3734		goto err;
3735	}
3736	batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
3737
3738	/* Sanity check the batch buffer, prior to moving objects */
3739	exec_offset = exec_list[args->buffer_count - 1].offset;
3740	ret = i915_gem_check_execbuffer (args, exec_offset);
3741	if (ret != 0) {
3742		DRM_ERROR("execbuf with invalid offset/length\n");
3743		goto err;
3744	}
3745
3746	i915_verify_inactive(dev, __FILE__, __LINE__);
3747
3748	/* Zero the global flush/invalidate flags. These
3749	 * will be modified as new domains are computed
3750	 * for each object
3751	 */
3752	dev->invalidate_domains = 0;
3753	dev->flush_domains = 0;
3754	dev_priv->flush_rings = 0;
3755
3756	for (i = 0; i < args->buffer_count; i++) {
3757		struct drm_gem_object *obj = object_list[i];
3758
3759		/* Compute new gpu domains and update invalidate/flush */
3760		i915_gem_object_set_to_gpu_domain(obj);
3761	}
3762
3763	i915_verify_inactive(dev, __FILE__, __LINE__);
3764
3765	if (dev->invalidate_domains | dev->flush_domains) {
3766#if WATCH_EXEC
3767		DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3768			  __func__,
3769			 dev->invalidate_domains,
3770			 dev->flush_domains);
3771#endif
3772		i915_gem_flush(dev,
3773			       dev->invalidate_domains,
3774			       dev->flush_domains);
3775		if (dev_priv->flush_rings & FLUSH_RENDER_RING)
3776			(void)i915_add_request(dev, file_priv,
3777					       dev->flush_domains,
3778					       &dev_priv->render_ring);
3779		if (dev_priv->flush_rings & FLUSH_BSD_RING)
3780			(void)i915_add_request(dev, file_priv,
3781					       dev->flush_domains,
3782					       &dev_priv->bsd_ring);
3783	}
3784
3785	for (i = 0; i < args->buffer_count; i++) {
3786		struct drm_gem_object *obj = object_list[i];
3787		struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3788		uint32_t old_write_domain = obj->write_domain;
3789
3790		obj->write_domain = obj->pending_write_domain;
3791		if (obj->write_domain)
3792			list_move_tail(&obj_priv->gpu_write_list,
3793				       &dev_priv->mm.gpu_write_list);
3794		else
3795			list_del_init(&obj_priv->gpu_write_list);
3796
3797		trace_i915_gem_object_change_domain(obj,
3798						    obj->read_domains,
3799						    old_write_domain);
3800	}
3801
3802	i915_verify_inactive(dev, __FILE__, __LINE__);
3803
3804#if WATCH_COHERENCY
3805	for (i = 0; i < args->buffer_count; i++) {
3806		i915_gem_object_check_coherency(object_list[i],
3807						exec_list[i].handle);
3808	}
3809#endif
3810
3811#if WATCH_EXEC
3812	i915_gem_dump_object(batch_obj,
3813			      args->batch_len,
3814			      __func__,
3815			      ~0);
3816#endif
3817
3818	/* Exec the batchbuffer */
3819	ret = ring->dispatch_gem_execbuffer(dev, ring, args,
3820			cliprects, exec_offset);
3821	if (ret) {
3822		DRM_ERROR("dispatch failed %d\n", ret);
3823		goto err;
3824	}
3825
3826	/*
3827	 * Ensure that the commands in the batch buffer are
3828	 * finished before the interrupt fires
3829	 */
3830	flush_domains = i915_retire_commands(dev, ring);
3831
3832	i915_verify_inactive(dev, __FILE__, __LINE__);
3833
3834	/*
3835	 * Get a seqno representing the execution of the current buffer,
3836	 * which we can wait on.  We would like to mitigate these interrupts,
3837	 * likely by only creating seqnos occasionally (so that we have
3838	 * *some* interrupts representing completion of buffers that we can
3839	 * wait on when trying to clear up gtt space).
3840	 */
3841	seqno = i915_add_request(dev, file_priv, flush_domains, ring);
3842	BUG_ON(seqno == 0);
3843	for (i = 0; i < args->buffer_count; i++) {
3844		struct drm_gem_object *obj = object_list[i];
3845		obj_priv = to_intel_bo(obj);
3846
3847		i915_gem_object_move_to_active(obj, seqno, ring);
3848#if WATCH_LRU
3849		DRM_INFO("%s: move to exec list %p\n", __func__, obj);
3850#endif
3851	}
3852#if WATCH_LRU
3853	i915_dump_lru(dev, __func__);
3854#endif
3855
3856	i915_verify_inactive(dev, __FILE__, __LINE__);
3857
3858err:
3859	for (i = 0; i < pinned; i++)
3860		i915_gem_object_unpin(object_list[i]);
3861
3862	for (i = 0; i < args->buffer_count; i++) {
3863		if (object_list[i]) {
3864			obj_priv = to_intel_bo(object_list[i]);
3865			obj_priv->in_execbuffer = false;
3866		}
3867		drm_gem_object_unreference(object_list[i]);
3868	}
3869
3870	mutex_unlock(&dev->struct_mutex);
3871
3872pre_mutex_err:
3873	/* Copy the updated relocations out regardless of current error
3874	 * state.  Failure to update the relocs would mean that the next
3875	 * time userland calls execbuf, it would do so with presumed offset
3876	 * state that didn't match the actual object state.
3877	 */
3878	ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count,
3879					   relocs);
3880	if (ret2 != 0) {
3881		DRM_ERROR("Failed to copy relocations back out: %d\n", ret2);
3882
3883		if (ret == 0)
3884			ret = ret2;
3885	}
3886
3887	drm_free_large(object_list);
3888	kfree(cliprects);
3889
3890	return ret;
3891}
3892
3893/*
3894 * Legacy execbuffer just creates an exec2 list from the original exec object
3895 * list array and passes it to the real function.
3896 */
3897int
3898i915_gem_execbuffer(struct drm_device *dev, void *data,
3899		    struct drm_file *file_priv)
3900{
3901	struct drm_i915_gem_execbuffer *args = data;
3902	struct drm_i915_gem_execbuffer2 exec2;
3903	struct drm_i915_gem_exec_object *exec_list = NULL;
3904	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
3905	int ret, i;
3906
3907#if WATCH_EXEC
3908	DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3909		  (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3910#endif
3911
3912	if (args->buffer_count < 1) {
3913		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3914		return -EINVAL;
3915	}
3916
3917	/* Copy in the exec list from userland */
3918	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
3919	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
3920	if (exec_list == NULL || exec2_list == NULL) {
3921		DRM_ERROR("Failed to allocate exec list for %d buffers\n",
3922			  args->buffer_count);
3923		drm_free_large(exec_list);
3924		drm_free_large(exec2_list);
3925		return -ENOMEM;
3926	}
3927	ret = copy_from_user(exec_list,
3928			     (struct drm_i915_relocation_entry __user *)
3929			     (uintptr_t) args->buffers_ptr,
3930			     sizeof(*exec_list) * args->buffer_count);
3931	if (ret != 0) {
3932		DRM_ERROR("copy %d exec entries failed %d\n",
3933			  args->buffer_count, ret);
3934		drm_free_large(exec_list);
3935		drm_free_large(exec2_list);
3936		return -EFAULT;
3937	}
3938
3939	for (i = 0; i < args->buffer_count; i++) {
3940		exec2_list[i].handle = exec_list[i].handle;
3941		exec2_list[i].relocation_count = exec_list[i].relocation_count;
3942		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
3943		exec2_list[i].alignment = exec_list[i].alignment;
3944		exec2_list[i].offset = exec_list[i].offset;
3945		if (!IS_I965G(dev))
3946			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
3947		else
3948			exec2_list[i].flags = 0;
3949	}
3950
3951	exec2.buffers_ptr = args->buffers_ptr;
3952	exec2.buffer_count = args->buffer_count;
3953	exec2.batch_start_offset = args->batch_start_offset;
3954	exec2.batch_len = args->batch_len;
3955	exec2.DR1 = args->DR1;
3956	exec2.DR4 = args->DR4;
3957	exec2.num_cliprects = args->num_cliprects;
3958	exec2.cliprects_ptr = args->cliprects_ptr;
3959	exec2.flags = I915_EXEC_RENDER;
3960
3961	ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list);
3962	if (!ret) {
3963		/* Copy the new buffer offsets back to the user's exec list. */
3964		for (i = 0; i < args->buffer_count; i++)
3965			exec_list[i].offset = exec2_list[i].offset;
3966		/* ... and back out to userspace */
3967		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
3968				   (uintptr_t) args->buffers_ptr,
3969				   exec_list,
3970				   sizeof(*exec_list) * args->buffer_count);
3971		if (ret) {
3972			ret = -EFAULT;
3973			DRM_ERROR("failed to copy %d exec entries "
3974				  "back to user (%d)\n",
3975				  args->buffer_count, ret);
3976		}
3977	}
3978
3979	drm_free_large(exec_list);
3980	drm_free_large(exec2_list);
3981	return ret;
3982}
3983
3984int
3985i915_gem_execbuffer2(struct drm_device *dev, void *data,
3986		     struct drm_file *file_priv)
3987{
3988	struct drm_i915_gem_execbuffer2 *args = data;
3989	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
3990	int ret;
3991
3992#if WATCH_EXEC
3993	DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3994		  (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3995#endif
3996
3997	if (args->buffer_count < 1) {
3998		DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
3999		return -EINVAL;
4000	}
4001
4002	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4003	if (exec2_list == NULL) {
4004		DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4005			  args->buffer_count);
4006		return -ENOMEM;
4007	}
4008	ret = copy_from_user(exec2_list,
4009			     (struct drm_i915_relocation_entry __user *)
4010			     (uintptr_t) args->buffers_ptr,
4011			     sizeof(*exec2_list) * args->buffer_count);
4012	if (ret != 0) {
4013		DRM_ERROR("copy %d exec entries failed %d\n",
4014			  args->buffer_count, ret);
4015		drm_free_large(exec2_list);
4016		return -EFAULT;
4017	}
4018
4019	ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list);
4020	if (!ret) {
4021		/* Copy the new buffer offsets back to the user's exec list. */
4022		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4023				   (uintptr_t) args->buffers_ptr,
4024				   exec2_list,
4025				   sizeof(*exec2_list) * args->buffer_count);
4026		if (ret) {
4027			ret = -EFAULT;
4028			DRM_ERROR("failed to copy %d exec entries "
4029				  "back to user (%d)\n",
4030				  args->buffer_count, ret);
4031		}
4032	}
4033
4034	drm_free_large(exec2_list);
4035	return ret;
4036}
4037
4038int
4039i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
4040{
4041	struct drm_device *dev = obj->dev;
4042	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4043	int ret;
4044
4045	BUG_ON(obj_priv->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
4046
4047	i915_verify_inactive(dev, __FILE__, __LINE__);
4048
4049	if (obj_priv->gtt_space != NULL) {
4050		if (alignment == 0)
4051			alignment = i915_gem_get_gtt_alignment(obj);
4052		if (obj_priv->gtt_offset & (alignment - 1)) {
4053			WARN(obj_priv->pin_count,
4054			     "bo is already pinned with incorrect alignment:"
4055			     " offset=%x, req.alignment=%x\n",
4056			     obj_priv->gtt_offset, alignment);
4057			ret = i915_gem_object_unbind(obj);
4058			if (ret)
4059				return ret;
4060		}
4061	}
4062
4063	if (obj_priv->gtt_space == NULL) {
4064		ret = i915_gem_object_bind_to_gtt(obj, alignment);
4065		if (ret)
4066			return ret;
4067	}
4068
4069	obj_priv->pin_count++;
4070
4071	/* If the object is not active and not pending a flush,
4072	 * remove it from the inactive list
4073	 */
4074	if (obj_priv->pin_count == 1) {
4075		atomic_inc(&dev->pin_count);
4076		atomic_add(obj->size, &dev->pin_memory);
4077		if (!obj_priv->active &&
4078		    (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
4079			list_del_init(&obj_priv->list);
4080	}
4081	i915_verify_inactive(dev, __FILE__, __LINE__);
4082
4083	return 0;
4084}
4085
4086void
4087i915_gem_object_unpin(struct drm_gem_object *obj)
4088{
4089	struct drm_device *dev = obj->dev;
4090	drm_i915_private_t *dev_priv = dev->dev_private;
4091	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4092
4093	i915_verify_inactive(dev, __FILE__, __LINE__);
4094	obj_priv->pin_count--;
4095	BUG_ON(obj_priv->pin_count < 0);
4096	BUG_ON(obj_priv->gtt_space == NULL);
4097
4098	/* If the object is no longer pinned, and is
4099	 * neither active nor being flushed, then stick it on
4100	 * the inactive list
4101	 */
4102	if (obj_priv->pin_count == 0) {
4103		if (!obj_priv->active &&
4104		    (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
4105			list_move_tail(&obj_priv->list,
4106				       &dev_priv->mm.inactive_list);
4107		atomic_dec(&dev->pin_count);
4108		atomic_sub(obj->size, &dev->pin_memory);
4109	}
4110	i915_verify_inactive(dev, __FILE__, __LINE__);
4111}
4112
4113int
4114i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4115		   struct drm_file *file_priv)
4116{
4117	struct drm_i915_gem_pin *args = data;
4118	struct drm_gem_object *obj;
4119	struct drm_i915_gem_object *obj_priv;
4120	int ret;
4121
4122	mutex_lock(&dev->struct_mutex);
4123
4124	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4125	if (obj == NULL) {
4126		DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n",
4127			  args->handle);
4128		mutex_unlock(&dev->struct_mutex);
4129		return -ENOENT;
4130	}
4131	obj_priv = to_intel_bo(obj);
4132
4133	if (obj_priv->madv != I915_MADV_WILLNEED) {
4134		DRM_ERROR("Attempting to pin a purgeable buffer\n");
4135		drm_gem_object_unreference(obj);
4136		mutex_unlock(&dev->struct_mutex);
4137		return -EINVAL;
4138	}
4139
4140	if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
4141		DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4142			  args->handle);
4143		drm_gem_object_unreference(obj);
4144		mutex_unlock(&dev->struct_mutex);
4145		return -EINVAL;
4146	}
4147
4148	obj_priv->user_pin_count++;
4149	obj_priv->pin_filp = file_priv;
4150	if (obj_priv->user_pin_count == 1) {
4151		ret = i915_gem_object_pin(obj, args->alignment);
4152		if (ret != 0) {
4153			drm_gem_object_unreference(obj);
4154			mutex_unlock(&dev->struct_mutex);
4155			return ret;
4156		}
4157	}
4158
4159	i915_gem_object_flush_cpu_write_domain(obj);
4160	args->offset = obj_priv->gtt_offset;
4161	drm_gem_object_unreference(obj);
4162	mutex_unlock(&dev->struct_mutex);
4163
4164	return 0;
4165}
4166
4167int
4168i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4169		     struct drm_file *file_priv)
4170{
4171	struct drm_i915_gem_pin *args = data;
4172	struct drm_gem_object *obj;
4173	struct drm_i915_gem_object *obj_priv;
4174
4175	mutex_lock(&dev->struct_mutex);
4176
4177	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4178	if (obj == NULL) {
4179		DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n",
4180			  args->handle);
4181		mutex_unlock(&dev->struct_mutex);
4182		return -ENOENT;
4183	}
4184
4185	obj_priv = to_intel_bo(obj);
4186	if (obj_priv->pin_filp != file_priv) {
4187		DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4188			  args->handle);
4189		drm_gem_object_unreference(obj);
4190		mutex_unlock(&dev->struct_mutex);
4191		return -EINVAL;
4192	}
4193	obj_priv->user_pin_count--;
4194	if (obj_priv->user_pin_count == 0) {
4195		obj_priv->pin_filp = NULL;
4196		i915_gem_object_unpin(obj);
4197	}
4198
4199	drm_gem_object_unreference(obj);
4200	mutex_unlock(&dev->struct_mutex);
4201	return 0;
4202}
4203
4204int
4205i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4206		    struct drm_file *file_priv)
4207{
4208	struct drm_i915_gem_busy *args = data;
4209	struct drm_gem_object *obj;
4210	struct drm_i915_gem_object *obj_priv;
4211
4212	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4213	if (obj == NULL) {
4214		DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
4215			  args->handle);
4216		return -ENOENT;
4217	}
4218
4219	mutex_lock(&dev->struct_mutex);
4220
4221	/* Count all active objects as busy, even if they are currently not used
4222	 * by the gpu. Users of this interface expect objects to eventually
4223	 * become non-busy without any further actions, therefore emit any
4224	 * necessary flushes here.
4225	 */
4226	obj_priv = to_intel_bo(obj);
4227	args->busy = obj_priv->active;
4228	if (args->busy) {
4229		/* Unconditionally flush objects, even when the gpu still uses this
4230		 * object. Userspace calling this function indicates that it wants to
4231		 * use this buffer rather sooner than later, so issuing the required
4232		 * flush earlier is beneficial.
4233		 */
4234		if (obj->write_domain) {
4235			i915_gem_flush(dev, 0, obj->write_domain);
4236			(void)i915_add_request(dev, file_priv, obj->write_domain, obj_priv->ring);
4237		}
4238
4239		/* Update the active list for the hardware's current position.
4240		 * Otherwise this only updates on a delayed timer or when irqs
4241		 * are actually unmasked, and our working set ends up being
4242		 * larger than required.
4243		 */
4244		i915_gem_retire_requests_ring(dev, obj_priv->ring);
4245
4246		args->busy = obj_priv->active;
4247	}
4248
4249	drm_gem_object_unreference(obj);
4250	mutex_unlock(&dev->struct_mutex);
4251	return 0;
4252}
4253
4254int
4255i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4256			struct drm_file *file_priv)
4257{
4258    return i915_gem_ring_throttle(dev, file_priv);
4259}
4260
4261int
4262i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4263		       struct drm_file *file_priv)
4264{
4265	struct drm_i915_gem_madvise *args = data;
4266	struct drm_gem_object *obj;
4267	struct drm_i915_gem_object *obj_priv;
4268
4269	switch (args->madv) {
4270	case I915_MADV_DONTNEED:
4271	case I915_MADV_WILLNEED:
4272	    break;
4273	default:
4274	    return -EINVAL;
4275	}
4276
4277	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4278	if (obj == NULL) {
4279		DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
4280			  args->handle);
4281		return -ENOENT;
4282	}
4283
4284	mutex_lock(&dev->struct_mutex);
4285	obj_priv = to_intel_bo(obj);
4286
4287	if (obj_priv->pin_count) {
4288		drm_gem_object_unreference(obj);
4289		mutex_unlock(&dev->struct_mutex);
4290
4291		DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
4292		return -EINVAL;
4293	}
4294
4295	if (obj_priv->madv != __I915_MADV_PURGED)
4296		obj_priv->madv = args->madv;
4297
4298	/* if the object is no longer bound, discard its backing storage */
4299	if (i915_gem_object_is_purgeable(obj_priv) &&
4300	    obj_priv->gtt_space == NULL)
4301		i915_gem_object_truncate(obj);
4302
4303	args->retained = obj_priv->madv != __I915_MADV_PURGED;
4304
4305	drm_gem_object_unreference(obj);
4306	mutex_unlock(&dev->struct_mutex);
4307
4308	return 0;
4309}
4310
4311struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
4312					      size_t size)
4313{
4314	struct drm_i915_gem_object *obj;
4315
4316	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
4317	if (obj == NULL)
4318		return NULL;
4319
4320	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4321		kfree(obj);
4322		return NULL;
4323	}
4324
4325	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4326	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4327
4328	obj->agp_type = AGP_USER_MEMORY;
4329	obj->base.driver_private = NULL;
4330	obj->fence_reg = I915_FENCE_REG_NONE;
4331	INIT_LIST_HEAD(&obj->list);
4332	INIT_LIST_HEAD(&obj->gpu_write_list);
4333	obj->madv = I915_MADV_WILLNEED;
4334
4335	trace_i915_gem_object_create(&obj->base);
4336
4337	return &obj->base;
4338}
4339
4340int i915_gem_init_object(struct drm_gem_object *obj)
4341{
4342	BUG();
4343
4344	return 0;
4345}
4346
4347static void i915_gem_free_object_tail(struct drm_gem_object *obj)
4348{
4349	struct drm_device *dev = obj->dev;
4350	drm_i915_private_t *dev_priv = dev->dev_private;
4351	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4352	int ret;
4353
4354	ret = i915_gem_object_unbind(obj);
4355	if (ret == -ERESTARTSYS) {
4356		list_move(&obj_priv->list,
4357			  &dev_priv->mm.deferred_free_list);
4358		return;
4359	}
4360
4361	if (obj_priv->mmap_offset)
4362		i915_gem_free_mmap_offset(obj);
4363
4364	drm_gem_object_release(obj);
4365
4366	kfree(obj_priv->page_cpu_valid);
4367	kfree(obj_priv->bit_17);
4368	kfree(obj_priv);
4369}
4370
4371void i915_gem_free_object(struct drm_gem_object *obj)
4372{
4373	struct drm_device *dev = obj->dev;
4374	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4375
4376	trace_i915_gem_object_destroy(obj);
4377
4378	while (obj_priv->pin_count > 0)
4379		i915_gem_object_unpin(obj);
4380
4381	if (obj_priv->phys_obj)
4382		i915_gem_detach_phys_object(dev, obj);
4383
4384	i915_gem_free_object_tail(obj);
4385}
4386
4387int
4388i915_gem_idle(struct drm_device *dev)
4389{
4390	drm_i915_private_t *dev_priv = dev->dev_private;
4391	int ret;
4392
4393	mutex_lock(&dev->struct_mutex);
4394
4395	if (dev_priv->mm.suspended ||
4396			(dev_priv->render_ring.gem_object == NULL) ||
4397			(HAS_BSD(dev) &&
4398			 dev_priv->bsd_ring.gem_object == NULL)) {
4399		mutex_unlock(&dev->struct_mutex);
4400		return 0;
4401	}
4402
4403	ret = i915_gpu_idle(dev);
4404	if (ret) {
4405		mutex_unlock(&dev->struct_mutex);
4406		return ret;
4407	}
4408
4409	/* Under UMS, be paranoid and evict. */
4410	if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
4411		ret = i915_gem_evict_inactive(dev);
4412		if (ret) {
4413			mutex_unlock(&dev->struct_mutex);
4414			return ret;
4415		}
4416	}
4417
4418	/* Hack!  Don't let anybody do execbuf while we don't control the chip.
4419	 * We need to replace this with a semaphore, or something.
4420	 * And not confound mm.suspended!
4421	 */
4422	dev_priv->mm.suspended = 1;
4423	del_timer(&dev_priv->hangcheck_timer);
4424
4425	i915_kernel_lost_context(dev);
4426	i915_gem_cleanup_ringbuffer(dev);
4427
4428	mutex_unlock(&dev->struct_mutex);
4429
4430	/* Cancel the retire work handler, which should be idle now. */
4431	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4432
4433	return 0;
4434}
4435
4436/*
4437 * 965+ support PIPE_CONTROL commands, which provide finer grained control
4438 * over cache flushing.
4439 */
4440static int
4441i915_gem_init_pipe_control(struct drm_device *dev)
4442{
4443	drm_i915_private_t *dev_priv = dev->dev_private;
4444	struct drm_gem_object *obj;
4445	struct drm_i915_gem_object *obj_priv;
4446	int ret;
4447
4448	obj = i915_gem_alloc_object(dev, 4096);
4449	if (obj == NULL) {
4450		DRM_ERROR("Failed to allocate seqno page\n");
4451		ret = -ENOMEM;
4452		goto err;
4453	}
4454	obj_priv = to_intel_bo(obj);
4455	obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
4456
4457	ret = i915_gem_object_pin(obj, 4096);
4458	if (ret)
4459		goto err_unref;
4460
4461	dev_priv->seqno_gfx_addr = obj_priv->gtt_offset;
4462	dev_priv->seqno_page =  kmap(obj_priv->pages[0]);
4463	if (dev_priv->seqno_page == NULL)
4464		goto err_unpin;
4465
4466	dev_priv->seqno_obj = obj;
4467	memset(dev_priv->seqno_page, 0, PAGE_SIZE);
4468
4469	return 0;
4470
4471err_unpin:
4472	i915_gem_object_unpin(obj);
4473err_unref:
4474	drm_gem_object_unreference(obj);
4475err:
4476	return ret;
4477}
4478
4479
4480static void
4481i915_gem_cleanup_pipe_control(struct drm_device *dev)
4482{
4483	drm_i915_private_t *dev_priv = dev->dev_private;
4484	struct drm_gem_object *obj;
4485	struct drm_i915_gem_object *obj_priv;
4486
4487	obj = dev_priv->seqno_obj;
4488	obj_priv = to_intel_bo(obj);
4489	kunmap(obj_priv->pages[0]);
4490	i915_gem_object_unpin(obj);
4491	drm_gem_object_unreference(obj);
4492	dev_priv->seqno_obj = NULL;
4493
4494	dev_priv->seqno_page = NULL;
4495}
4496
4497int
4498i915_gem_init_ringbuffer(struct drm_device *dev)
4499{
4500	drm_i915_private_t *dev_priv = dev->dev_private;
4501	int ret;
4502
4503	dev_priv->render_ring = render_ring;
4504
4505	if (!I915_NEED_GFX_HWS(dev)) {
4506		dev_priv->render_ring.status_page.page_addr
4507			= dev_priv->status_page_dmah->vaddr;
4508		memset(dev_priv->render_ring.status_page.page_addr,
4509				0, PAGE_SIZE);
4510	}
4511
4512	if (HAS_PIPE_CONTROL(dev)) {
4513		ret = i915_gem_init_pipe_control(dev);
4514		if (ret)
4515			return ret;
4516	}
4517
4518	ret = intel_init_ring_buffer(dev, &dev_priv->render_ring);
4519	if (ret)
4520		goto cleanup_pipe_control;
4521
4522	if (HAS_BSD(dev)) {
4523		dev_priv->bsd_ring = bsd_ring;
4524		ret = intel_init_ring_buffer(dev, &dev_priv->bsd_ring);
4525		if (ret)
4526			goto cleanup_render_ring;
4527	}
4528
4529	dev_priv->next_seqno = 1;
4530
4531	return 0;
4532
4533cleanup_render_ring:
4534	intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
4535cleanup_pipe_control:
4536	if (HAS_PIPE_CONTROL(dev))
4537		i915_gem_cleanup_pipe_control(dev);
4538	return ret;
4539}
4540
4541void
4542i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4543{
4544	drm_i915_private_t *dev_priv = dev->dev_private;
4545
4546	intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
4547	if (HAS_BSD(dev))
4548		intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
4549	if (HAS_PIPE_CONTROL(dev))
4550		i915_gem_cleanup_pipe_control(dev);
4551}
4552
4553int
4554i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4555		       struct drm_file *file_priv)
4556{
4557	drm_i915_private_t *dev_priv = dev->dev_private;
4558	int ret;
4559
4560	if (drm_core_check_feature(dev, DRIVER_MODESET))
4561		return 0;
4562
4563	if (atomic_read(&dev_priv->mm.wedged)) {
4564		DRM_ERROR("Reenabling wedged hardware, good luck\n");
4565		atomic_set(&dev_priv->mm.wedged, 0);
4566	}
4567
4568	mutex_lock(&dev->struct_mutex);
4569	dev_priv->mm.suspended = 0;
4570
4571	ret = i915_gem_init_ringbuffer(dev);
4572	if (ret != 0) {
4573		mutex_unlock(&dev->struct_mutex);
4574		return ret;
4575	}
4576
4577	spin_lock(&dev_priv->mm.active_list_lock);
4578	BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
4579	BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.active_list));
4580	spin_unlock(&dev_priv->mm.active_list_lock);
4581
4582	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4583	BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
4584	BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
4585	BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.request_list));
4586	mutex_unlock(&dev->struct_mutex);
4587
4588	ret = drm_irq_install(dev);
4589	if (ret)
4590		goto cleanup_ringbuffer;
4591
4592	return 0;
4593
4594cleanup_ringbuffer:
4595	mutex_lock(&dev->struct_mutex);
4596	i915_gem_cleanup_ringbuffer(dev);
4597	dev_priv->mm.suspended = 1;
4598	mutex_unlock(&dev->struct_mutex);
4599
4600	return ret;
4601}
4602
4603int
4604i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4605		       struct drm_file *file_priv)
4606{
4607	if (drm_core_check_feature(dev, DRIVER_MODESET))
4608		return 0;
4609
4610	drm_irq_uninstall(dev);
4611	return i915_gem_idle(dev);
4612}
4613
4614void
4615i915_gem_lastclose(struct drm_device *dev)
4616{
4617	int ret;
4618
4619	if (drm_core_check_feature(dev, DRIVER_MODESET))
4620		return;
4621
4622	ret = i915_gem_idle(dev);
4623	if (ret)
4624		DRM_ERROR("failed to idle hardware: %d\n", ret);
4625}
4626
4627void
4628i915_gem_load(struct drm_device *dev)
4629{
4630	int i;
4631	drm_i915_private_t *dev_priv = dev->dev_private;
4632
4633	spin_lock_init(&dev_priv->mm.active_list_lock);
4634	INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4635	INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list);
4636	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4637	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4638	INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
4639	INIT_LIST_HEAD(&dev_priv->render_ring.active_list);
4640	INIT_LIST_HEAD(&dev_priv->render_ring.request_list);
4641	if (HAS_BSD(dev)) {
4642		INIT_LIST_HEAD(&dev_priv->bsd_ring.active_list);
4643		INIT_LIST_HEAD(&dev_priv->bsd_ring.request_list);
4644	}
4645	for (i = 0; i < 16; i++)
4646		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4647	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4648			  i915_gem_retire_work_handler);
4649	spin_lock(&shrink_list_lock);
4650	list_add(&dev_priv->mm.shrink_list, &shrink_list);
4651	spin_unlock(&shrink_list_lock);
4652
4653	/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4654	if (IS_GEN3(dev)) {
4655		u32 tmp = I915_READ(MI_ARB_STATE);
4656		if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
4657			/* arb state is a masked write, so set bit + bit in mask */
4658			tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
4659			I915_WRITE(MI_ARB_STATE, tmp);
4660		}
4661	}
4662
4663	/* Old X drivers will take 0-2 for front, back, depth buffers */
4664	if (!drm_core_check_feature(dev, DRIVER_MODESET))
4665		dev_priv->fence_reg_start = 3;
4666
4667	if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4668		dev_priv->num_fence_regs = 16;
4669	else
4670		dev_priv->num_fence_regs = 8;
4671
4672	/* Initialize fence registers to zero */
4673	if (IS_I965G(dev)) {
4674		for (i = 0; i < 16; i++)
4675			I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4676	} else {
4677		for (i = 0; i < 8; i++)
4678			I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4679		if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4680			for (i = 0; i < 8; i++)
4681				I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4682	}
4683	i915_gem_detect_bit_6_swizzle(dev);
4684	init_waitqueue_head(&dev_priv->pending_flip_queue);
4685}
4686
4687/*
4688 * Create a physically contiguous memory object for this object
4689 * e.g. for cursor + overlay regs
4690 */
4691int i915_gem_init_phys_object(struct drm_device *dev,
4692			      int id, int size, int align)
4693{
4694	drm_i915_private_t *dev_priv = dev->dev_private;
4695	struct drm_i915_gem_phys_object *phys_obj;
4696	int ret;
4697
4698	if (dev_priv->mm.phys_objs[id - 1] || !size)
4699		return 0;
4700
4701	phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4702	if (!phys_obj)
4703		return -ENOMEM;
4704
4705	phys_obj->id = id;
4706
4707	phys_obj->handle = drm_pci_alloc(dev, size, align);
4708	if (!phys_obj->handle) {
4709		ret = -ENOMEM;
4710		goto kfree_obj;
4711	}
4712#ifdef CONFIG_X86
4713	set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4714#endif
4715
4716	dev_priv->mm.phys_objs[id - 1] = phys_obj;
4717
4718	return 0;
4719kfree_obj:
4720	kfree(phys_obj);
4721	return ret;
4722}
4723
4724void i915_gem_free_phys_object(struct drm_device *dev, int id)
4725{
4726	drm_i915_private_t *dev_priv = dev->dev_private;
4727	struct drm_i915_gem_phys_object *phys_obj;
4728
4729	if (!dev_priv->mm.phys_objs[id - 1])
4730		return;
4731
4732	phys_obj = dev_priv->mm.phys_objs[id - 1];
4733	if (phys_obj->cur_obj) {
4734		i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4735	}
4736
4737#ifdef CONFIG_X86
4738	set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4739#endif
4740	drm_pci_free(dev, phys_obj->handle);
4741	kfree(phys_obj);
4742	dev_priv->mm.phys_objs[id - 1] = NULL;
4743}
4744
4745void i915_gem_free_all_phys_object(struct drm_device *dev)
4746{
4747	int i;
4748
4749	for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4750		i915_gem_free_phys_object(dev, i);
4751}
4752
4753void i915_gem_detach_phys_object(struct drm_device *dev,
4754				 struct drm_gem_object *obj)
4755{
4756	struct drm_i915_gem_object *obj_priv;
4757	int i;
4758	int ret;
4759	int page_count;
4760
4761	obj_priv = to_intel_bo(obj);
4762	if (!obj_priv->phys_obj)
4763		return;
4764
4765	ret = i915_gem_object_get_pages(obj, 0);
4766	if (ret)
4767		goto out;
4768
4769	page_count = obj->size / PAGE_SIZE;
4770
4771	for (i = 0; i < page_count; i++) {
4772		char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0);
4773		char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4774
4775		memcpy(dst, src, PAGE_SIZE);
4776		kunmap_atomic(dst, KM_USER0);
4777	}
4778	drm_clflush_pages(obj_priv->pages, page_count);
4779	drm_agp_chipset_flush(dev);
4780
4781	i915_gem_object_put_pages(obj);
4782out:
4783	obj_priv->phys_obj->cur_obj = NULL;
4784	obj_priv->phys_obj = NULL;
4785}
4786
4787int
4788i915_gem_attach_phys_object(struct drm_device *dev,
4789			    struct drm_gem_object *obj,
4790			    int id,
4791			    int align)
4792{
4793	drm_i915_private_t *dev_priv = dev->dev_private;
4794	struct drm_i915_gem_object *obj_priv;
4795	int ret = 0;
4796	int page_count;
4797	int i;
4798
4799	if (id > I915_MAX_PHYS_OBJECT)
4800		return -EINVAL;
4801
4802	obj_priv = to_intel_bo(obj);
4803
4804	if (obj_priv->phys_obj) {
4805		if (obj_priv->phys_obj->id == id)
4806			return 0;
4807		i915_gem_detach_phys_object(dev, obj);
4808	}
4809
4810	/* create a new object */
4811	if (!dev_priv->mm.phys_objs[id - 1]) {
4812		ret = i915_gem_init_phys_object(dev, id,
4813						obj->size, align);
4814		if (ret) {
4815			DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size);
4816			goto out;
4817		}
4818	}
4819
4820	/* bind to the object */
4821	obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1];
4822	obj_priv->phys_obj->cur_obj = obj;
4823
4824	ret = i915_gem_object_get_pages(obj, 0);
4825	if (ret) {
4826		DRM_ERROR("failed to get page list\n");
4827		goto out;
4828	}
4829
4830	page_count = obj->size / PAGE_SIZE;
4831
4832	for (i = 0; i < page_count; i++) {
4833		char *src = kmap_atomic(obj_priv->pages[i], KM_USER0);
4834		char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4835
4836		memcpy(dst, src, PAGE_SIZE);
4837		kunmap_atomic(src, KM_USER0);
4838	}
4839
4840	i915_gem_object_put_pages(obj);
4841
4842	return 0;
4843out:
4844	return ret;
4845}
4846
4847static int
4848i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
4849		     struct drm_i915_gem_pwrite *args,
4850		     struct drm_file *file_priv)
4851{
4852	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4853	void *obj_addr;
4854	int ret;
4855	char __user *user_data;
4856
4857	user_data = (char __user *) (uintptr_t) args->data_ptr;
4858	obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;
4859
4860	DRM_DEBUG_DRIVER("obj_addr %p, %lld\n", obj_addr, args->size);
4861	ret = copy_from_user(obj_addr, user_data, args->size);
4862	if (ret)
4863		return -EFAULT;
4864
4865	drm_agp_chipset_flush(dev);
4866	return 0;
4867}
4868
4869void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
4870{
4871	struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
4872
4873	/* Clean up our request list when the client is going away, so that
4874	 * later retire_requests won't dereference our soon-to-be-gone
4875	 * file_priv.
4876	 */
4877	mutex_lock(&dev->struct_mutex);
4878	while (!list_empty(&i915_file_priv->mm.request_list))
4879		list_del_init(i915_file_priv->mm.request_list.next);
4880	mutex_unlock(&dev->struct_mutex);
4881}
4882
4883static int
4884i915_gpu_is_active(struct drm_device *dev)
4885{
4886	drm_i915_private_t *dev_priv = dev->dev_private;
4887	int lists_empty;
4888
4889	spin_lock(&dev_priv->mm.active_list_lock);
4890	lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
4891		      list_empty(&dev_priv->render_ring.active_list);
4892	if (HAS_BSD(dev))
4893		lists_empty &= list_empty(&dev_priv->bsd_ring.active_list);
4894	spin_unlock(&dev_priv->mm.active_list_lock);
4895
4896	return !lists_empty;
4897}
4898
4899static int
4900i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
4901{
4902	drm_i915_private_t *dev_priv, *next_dev;
4903	struct drm_i915_gem_object *obj_priv, *next_obj;
4904	int cnt = 0;
4905	int would_deadlock = 1;
4906
4907	/* "fast-path" to count number of available objects */
4908	if (nr_to_scan == 0) {
4909		spin_lock(&shrink_list_lock);
4910		list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
4911			struct drm_device *dev = dev_priv->dev;
4912
4913			if (mutex_trylock(&dev->struct_mutex)) {
4914				list_for_each_entry(obj_priv,
4915						    &dev_priv->mm.inactive_list,
4916						    list)
4917					cnt++;
4918				mutex_unlock(&dev->struct_mutex);
4919			}
4920		}
4921		spin_unlock(&shrink_list_lock);
4922
4923		return (cnt / 100) * sysctl_vfs_cache_pressure;
4924	}
4925
4926	spin_lock(&shrink_list_lock);
4927
4928rescan:
4929	/* first scan for clean buffers */
4930	list_for_each_entry_safe(dev_priv, next_dev,
4931				 &shrink_list, mm.shrink_list) {
4932		struct drm_device *dev = dev_priv->dev;
4933
4934		if (! mutex_trylock(&dev->struct_mutex))
4935			continue;
4936
4937		spin_unlock(&shrink_list_lock);
4938		i915_gem_retire_requests(dev);
4939
4940		list_for_each_entry_safe(obj_priv, next_obj,
4941					 &dev_priv->mm.inactive_list,
4942					 list) {
4943			if (i915_gem_object_is_purgeable(obj_priv)) {
4944				i915_gem_object_unbind(&obj_priv->base);
4945				if (--nr_to_scan <= 0)
4946					break;
4947			}
4948		}
4949
4950		spin_lock(&shrink_list_lock);
4951		mutex_unlock(&dev->struct_mutex);
4952
4953		would_deadlock = 0;
4954
4955		if (nr_to_scan <= 0)
4956			break;
4957	}
4958
4959	/* second pass, evict/count anything still on the inactive list */
4960	list_for_each_entry_safe(dev_priv, next_dev,
4961				 &shrink_list, mm.shrink_list) {
4962		struct drm_device *dev = dev_priv->dev;
4963
4964		if (! mutex_trylock(&dev->struct_mutex))
4965			continue;
4966
4967		spin_unlock(&shrink_list_lock);
4968
4969		list_for_each_entry_safe(obj_priv, next_obj,
4970					 &dev_priv->mm.inactive_list,
4971					 list) {
4972			if (nr_to_scan > 0) {
4973				i915_gem_object_unbind(&obj_priv->base);
4974				nr_to_scan--;
4975			} else
4976				cnt++;
4977		}
4978
4979		spin_lock(&shrink_list_lock);
4980		mutex_unlock(&dev->struct_mutex);
4981
4982		would_deadlock = 0;
4983	}
4984
4985	if (nr_to_scan) {
4986		int active = 0;
4987
4988		/*
4989		 * We are desperate for pages, so as a last resort, wait
4990		 * for the GPU to finish and discard whatever we can.
4991		 * This has a dramatic impact to reduce the number of
4992		 * OOM-killer events whilst running the GPU aggressively.
4993		 */
4994		list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
4995			struct drm_device *dev = dev_priv->dev;
4996
4997			if (!mutex_trylock(&dev->struct_mutex))
4998				continue;
4999
5000			spin_unlock(&shrink_list_lock);
5001
5002			if (i915_gpu_is_active(dev)) {
5003				i915_gpu_idle(dev);
5004				active++;
5005			}
5006
5007			spin_lock(&shrink_list_lock);
5008			mutex_unlock(&dev->struct_mutex);
5009		}
5010
5011		if (active)
5012			goto rescan;
5013	}
5014
5015	spin_unlock(&shrink_list_lock);
5016
5017	if (would_deadlock)
5018		return -1;
5019	else if (cnt > 0)
5020		return (cnt / 100) * sysctl_vfs_cache_pressure;
5021	else
5022		return 0;
5023}
5024
5025static struct shrinker shrinker = {
5026	.shrink = i915_gem_shrink,
5027	.seeks = DEFAULT_SEEKS,
5028};
5029
5030__init void
5031i915_gem_shrinker_init(void)
5032{
5033    register_shrinker(&shrinker);
5034}
5035
5036__exit void
5037i915_gem_shrinker_exit(void)
5038{
5039    unregister_shrinker(&shrinker);
5040}
5041