1/* 2 * Copyright �� 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28#include "drmP.h" 29#include "drm.h" 30#include "i915_drm.h" 31#include "i915_drv.h" 32#include "i915_trace.h" 33#include "intel_drv.h" 34#include <linux/slab.h> 35#include <linux/swap.h> 36#include <linux/pci.h> 37#include <linux/intel-gtt.h> 38 39static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj); 40static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); 41static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj); 42static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj); 43static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, 44 int write); 45static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 46 uint64_t offset, 47 uint64_t size); 48static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); 49static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); 50static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, 51 unsigned alignment); 52static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); 53static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 54 struct drm_i915_gem_pwrite *args, 55 struct drm_file *file_priv); 56static void i915_gem_free_object_tail(struct drm_gem_object *obj); 57 58static LIST_HEAD(shrink_list); 59static DEFINE_SPINLOCK(shrink_list_lock); 60 61static inline bool 62i915_gem_object_is_inactive(struct drm_i915_gem_object *obj_priv) 63{ 64 return obj_priv->gtt_space && 65 !obj_priv->active && 66 obj_priv->pin_count == 0; 67} 68 69int i915_gem_do_init(struct drm_device *dev, unsigned long start, 70 unsigned long end) 71{ 72 drm_i915_private_t *dev_priv = dev->dev_private; 73 74 if (start >= end || 75 (start & (PAGE_SIZE - 1)) != 0 || 76 (end & (PAGE_SIZE - 1)) != 0) { 77 return -EINVAL; 78 } 79 80 drm_mm_init(&dev_priv->mm.gtt_space, start, 81 end - start); 82 83 dev->gtt_total = (uint32_t) (end - start); 84 85 return 0; 86} 87 88int 89i915_gem_init_ioctl(struct drm_device *dev, void *data, 90 struct drm_file *file_priv) 91{ 92 struct drm_i915_gem_init *args = data; 93 int ret; 94 95 mutex_lock(&dev->struct_mutex); 96 ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end); 97 mutex_unlock(&dev->struct_mutex); 98 99 return ret; 100} 101 102int 103i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 104 struct drm_file *file_priv) 105{ 106 struct drm_i915_gem_get_aperture *args = data; 107 108 if (!(dev->driver->driver_features & DRIVER_GEM)) 109 return -ENODEV; 110 111 args->aper_size = dev->gtt_total; 112 args->aper_available_size = (args->aper_size - 113 atomic_read(&dev->pin_memory)); 114 115 return 0; 116} 117 118 119/** 120 * Creates a new mm object and returns a handle to it. 121 */ 122int 123i915_gem_create_ioctl(struct drm_device *dev, void *data, 124 struct drm_file *file_priv) 125{ 126 struct drm_i915_gem_create *args = data; 127 struct drm_gem_object *obj; 128 int ret; 129 u32 handle; 130 131 args->size = roundup(args->size, PAGE_SIZE); 132 133 /* Allocate the new object */ 134 obj = i915_gem_alloc_object(dev, args->size); 135 if (obj == NULL) 136 return -ENOMEM; 137 138 ret = drm_gem_handle_create(file_priv, obj, &handle); 139 /* drop reference from allocate - handle holds it now */ 140 drm_gem_object_unreference_unlocked(obj); 141 if (ret) { 142 return ret; 143 } 144 145 args->handle = handle; 146 return 0; 147} 148 149static inline int 150fast_shmem_read(struct page **pages, 151 loff_t page_base, int page_offset, 152 char __user *data, 153 int length) 154{ 155 char __iomem *vaddr; 156 int unwritten; 157 158 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); 159 if (vaddr == NULL) 160 return -ENOMEM; 161 unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length); 162 kunmap_atomic(vaddr, KM_USER0); 163 164 if (unwritten) 165 return -EFAULT; 166 167 return 0; 168} 169 170static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj) 171{ 172 drm_i915_private_t *dev_priv = obj->dev->dev_private; 173 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 174 175 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 176 obj_priv->tiling_mode != I915_TILING_NONE; 177} 178 179static inline void 180slow_shmem_copy(struct page *dst_page, 181 int dst_offset, 182 struct page *src_page, 183 int src_offset, 184 int length) 185{ 186 char *dst_vaddr, *src_vaddr; 187 188 dst_vaddr = kmap(dst_page); 189 src_vaddr = kmap(src_page); 190 191 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length); 192 193 kunmap(src_page); 194 kunmap(dst_page); 195} 196 197static inline void 198slow_shmem_bit17_copy(struct page *gpu_page, 199 int gpu_offset, 200 struct page *cpu_page, 201 int cpu_offset, 202 int length, 203 int is_read) 204{ 205 char *gpu_vaddr, *cpu_vaddr; 206 207 /* Use the unswizzled path if this page isn't affected. */ 208 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { 209 if (is_read) 210 return slow_shmem_copy(cpu_page, cpu_offset, 211 gpu_page, gpu_offset, length); 212 else 213 return slow_shmem_copy(gpu_page, gpu_offset, 214 cpu_page, cpu_offset, length); 215 } 216 217 gpu_vaddr = kmap(gpu_page); 218 cpu_vaddr = kmap(cpu_page); 219 220 /* Copy the data, XORing A6 with A17 (1). The user already knows he's 221 * XORing with the other bits (A9 for Y, A9 and A10 for X) 222 */ 223 while (length > 0) { 224 int cacheline_end = ALIGN(gpu_offset + 1, 64); 225 int this_length = min(cacheline_end - gpu_offset, length); 226 int swizzled_gpu_offset = gpu_offset ^ 64; 227 228 if (is_read) { 229 memcpy(cpu_vaddr + cpu_offset, 230 gpu_vaddr + swizzled_gpu_offset, 231 this_length); 232 } else { 233 memcpy(gpu_vaddr + swizzled_gpu_offset, 234 cpu_vaddr + cpu_offset, 235 this_length); 236 } 237 cpu_offset += this_length; 238 gpu_offset += this_length; 239 length -= this_length; 240 } 241 242 kunmap(cpu_page); 243 kunmap(gpu_page); 244} 245 246/** 247 * This is the fast shmem pread path, which attempts to copy_from_user directly 248 * from the backing pages of the object to the user's address space. On a 249 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow(). 250 */ 251static int 252i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj, 253 struct drm_i915_gem_pread *args, 254 struct drm_file *file_priv) 255{ 256 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 257 ssize_t remain; 258 loff_t offset, page_base; 259 char __user *user_data; 260 int page_offset, page_length; 261 int ret; 262 263 user_data = (char __user *) (uintptr_t) args->data_ptr; 264 remain = args->size; 265 266 mutex_lock(&dev->struct_mutex); 267 268 ret = i915_gem_object_get_pages(obj, 0); 269 if (ret != 0) 270 goto fail_unlock; 271 272 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 273 args->size); 274 if (ret != 0) 275 goto fail_put_pages; 276 277 obj_priv = to_intel_bo(obj); 278 offset = args->offset; 279 280 while (remain > 0) { 281 /* Operation in this page 282 * 283 * page_base = page offset within aperture 284 * page_offset = offset within page 285 * page_length = bytes to copy for this page 286 */ 287 page_base = (offset & ~(PAGE_SIZE-1)); 288 page_offset = offset & (PAGE_SIZE-1); 289 page_length = remain; 290 if ((page_offset + remain) > PAGE_SIZE) 291 page_length = PAGE_SIZE - page_offset; 292 293 ret = fast_shmem_read(obj_priv->pages, 294 page_base, page_offset, 295 user_data, page_length); 296 if (ret) 297 goto fail_put_pages; 298 299 remain -= page_length; 300 user_data += page_length; 301 offset += page_length; 302 } 303 304fail_put_pages: 305 i915_gem_object_put_pages(obj); 306fail_unlock: 307 mutex_unlock(&dev->struct_mutex); 308 309 return ret; 310} 311 312static int 313i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj) 314{ 315 int ret; 316 317 ret = i915_gem_object_get_pages(obj, __GFP_NORETRY | __GFP_NOWARN); 318 319 /* If we've insufficient memory to map in the pages, attempt 320 * to make some space by throwing out some old buffers. 321 */ 322 if (ret == -ENOMEM) { 323 struct drm_device *dev = obj->dev; 324 325 ret = i915_gem_evict_something(dev, obj->size, 326 i915_gem_get_gtt_alignment(obj)); 327 if (ret) 328 return ret; 329 330 ret = i915_gem_object_get_pages(obj, 0); 331 } 332 333 return ret; 334} 335 336/** 337 * This is the fallback shmem pread path, which allocates temporary storage 338 * in kernel space to copy_to_user into outside of the struct_mutex, so we 339 * can copy out of the object's backing pages while holding the struct mutex 340 * and not take page faults. 341 */ 342static int 343i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, 344 struct drm_i915_gem_pread *args, 345 struct drm_file *file_priv) 346{ 347 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 348 struct mm_struct *mm = current->mm; 349 struct page **user_pages; 350 ssize_t remain; 351 loff_t offset, pinned_pages, i; 352 loff_t first_data_page, last_data_page, num_pages; 353 int shmem_page_index, shmem_page_offset; 354 int data_page_index, data_page_offset; 355 int page_length; 356 int ret; 357 uint64_t data_ptr = args->data_ptr; 358 int do_bit17_swizzling; 359 360 remain = args->size; 361 362 /* Pin the user pages containing the data. We can't fault while 363 * holding the struct mutex, yet we want to hold it while 364 * dereferencing the user data. 365 */ 366 first_data_page = data_ptr / PAGE_SIZE; 367 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 368 num_pages = last_data_page - first_data_page + 1; 369 370 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 371 if (user_pages == NULL) 372 return -ENOMEM; 373 374 down_read(&mm->mmap_sem); 375 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 376 num_pages, 1, 0, user_pages, NULL); 377 up_read(&mm->mmap_sem); 378 if (pinned_pages < num_pages) { 379 ret = -EFAULT; 380 goto fail_put_user_pages; 381 } 382 383 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 384 385 mutex_lock(&dev->struct_mutex); 386 387 ret = i915_gem_object_get_pages_or_evict(obj); 388 if (ret) 389 goto fail_unlock; 390 391 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 392 args->size); 393 if (ret != 0) 394 goto fail_put_pages; 395 396 obj_priv = to_intel_bo(obj); 397 offset = args->offset; 398 399 while (remain > 0) { 400 /* Operation in this page 401 * 402 * shmem_page_index = page number within shmem file 403 * shmem_page_offset = offset within page in shmem file 404 * data_page_index = page number in get_user_pages return 405 * data_page_offset = offset with data_page_index page. 406 * page_length = bytes to copy for this page 407 */ 408 shmem_page_index = offset / PAGE_SIZE; 409 shmem_page_offset = offset & ~PAGE_MASK; 410 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 411 data_page_offset = data_ptr & ~PAGE_MASK; 412 413 page_length = remain; 414 if ((shmem_page_offset + page_length) > PAGE_SIZE) 415 page_length = PAGE_SIZE - shmem_page_offset; 416 if ((data_page_offset + page_length) > PAGE_SIZE) 417 page_length = PAGE_SIZE - data_page_offset; 418 419 if (do_bit17_swizzling) { 420 slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], 421 shmem_page_offset, 422 user_pages[data_page_index], 423 data_page_offset, 424 page_length, 425 1); 426 } else { 427 slow_shmem_copy(user_pages[data_page_index], 428 data_page_offset, 429 obj_priv->pages[shmem_page_index], 430 shmem_page_offset, 431 page_length); 432 } 433 434 remain -= page_length; 435 data_ptr += page_length; 436 offset += page_length; 437 } 438 439fail_put_pages: 440 i915_gem_object_put_pages(obj); 441fail_unlock: 442 mutex_unlock(&dev->struct_mutex); 443fail_put_user_pages: 444 for (i = 0; i < pinned_pages; i++) { 445 SetPageDirty(user_pages[i]); 446 page_cache_release(user_pages[i]); 447 } 448 drm_free_large(user_pages); 449 450 return ret; 451} 452 453/** 454 * Reads data from the object referenced by handle. 455 * 456 * On error, the contents of *data are undefined. 457 */ 458int 459i915_gem_pread_ioctl(struct drm_device *dev, void *data, 460 struct drm_file *file_priv) 461{ 462 struct drm_i915_gem_pread *args = data; 463 struct drm_gem_object *obj; 464 struct drm_i915_gem_object *obj_priv; 465 int ret; 466 467 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 468 if (obj == NULL) 469 return -ENOENT; 470 obj_priv = to_intel_bo(obj); 471 472 /* Bounds check source. */ 473 if (args->offset > obj->size || args->size > obj->size - args->offset) { 474 ret = -EINVAL; 475 goto err; 476 } 477 478 if (!access_ok(VERIFY_WRITE, 479 (char __user *)(uintptr_t)args->data_ptr, 480 args->size)) { 481 ret = -EFAULT; 482 goto err; 483 } 484 485 if (i915_gem_object_needs_bit17_swizzle(obj)) { 486 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv); 487 } else { 488 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); 489 if (ret != 0) 490 ret = i915_gem_shmem_pread_slow(dev, obj, args, 491 file_priv); 492 } 493 494err: 495 drm_gem_object_unreference_unlocked(obj); 496 return ret; 497} 498 499/* This is the fast write path which cannot handle 500 * page faults in the source data 501 */ 502 503static inline int 504fast_user_write(struct io_mapping *mapping, 505 loff_t page_base, int page_offset, 506 char __user *user_data, 507 int length) 508{ 509 char *vaddr_atomic; 510 unsigned long unwritten; 511 512 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base, KM_USER0); 513 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, 514 user_data, length); 515 io_mapping_unmap_atomic(vaddr_atomic, KM_USER0); 516 if (unwritten) 517 return -EFAULT; 518 return 0; 519} 520 521/* Here's the write path which can sleep for 522 * page faults 523 */ 524 525static inline void 526slow_kernel_write(struct io_mapping *mapping, 527 loff_t gtt_base, int gtt_offset, 528 struct page *user_page, int user_offset, 529 int length) 530{ 531 char __iomem *dst_vaddr; 532 char *src_vaddr; 533 534 dst_vaddr = io_mapping_map_wc(mapping, gtt_base); 535 src_vaddr = kmap(user_page); 536 537 memcpy_toio(dst_vaddr + gtt_offset, 538 src_vaddr + user_offset, 539 length); 540 541 kunmap(user_page); 542 io_mapping_unmap(dst_vaddr); 543} 544 545static inline int 546fast_shmem_write(struct page **pages, 547 loff_t page_base, int page_offset, 548 char __user *data, 549 int length) 550{ 551 char __iomem *vaddr; 552 unsigned long unwritten; 553 554 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); 555 if (vaddr == NULL) 556 return -ENOMEM; 557 unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length); 558 kunmap_atomic(vaddr, KM_USER0); 559 560 if (unwritten) 561 return -EFAULT; 562 return 0; 563} 564 565/** 566 * This is the fast pwrite path, where we copy the data directly from the 567 * user into the GTT, uncached. 568 */ 569static int 570i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, 571 struct drm_i915_gem_pwrite *args, 572 struct drm_file *file_priv) 573{ 574 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 575 drm_i915_private_t *dev_priv = dev->dev_private; 576 ssize_t remain; 577 loff_t offset, page_base; 578 char __user *user_data; 579 int page_offset, page_length; 580 int ret; 581 582 user_data = (char __user *) (uintptr_t) args->data_ptr; 583 remain = args->size; 584 585 586 mutex_lock(&dev->struct_mutex); 587 ret = i915_gem_object_pin(obj, 0); 588 if (ret) { 589 mutex_unlock(&dev->struct_mutex); 590 return ret; 591 } 592 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 593 if (ret) 594 goto fail; 595 596 obj_priv = to_intel_bo(obj); 597 offset = obj_priv->gtt_offset + args->offset; 598 599 while (remain > 0) { 600 /* Operation in this page 601 * 602 * page_base = page offset within aperture 603 * page_offset = offset within page 604 * page_length = bytes to copy for this page 605 */ 606 page_base = (offset & ~(PAGE_SIZE-1)); 607 page_offset = offset & (PAGE_SIZE-1); 608 page_length = remain; 609 if ((page_offset + remain) > PAGE_SIZE) 610 page_length = PAGE_SIZE - page_offset; 611 612 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base, 613 page_offset, user_data, page_length); 614 615 /* If we get a fault while copying data, then (presumably) our 616 * source page isn't available. Return the error and we'll 617 * retry in the slow path. 618 */ 619 if (ret) 620 goto fail; 621 622 remain -= page_length; 623 user_data += page_length; 624 offset += page_length; 625 } 626 627fail: 628 i915_gem_object_unpin(obj); 629 mutex_unlock(&dev->struct_mutex); 630 631 return ret; 632} 633 634/** 635 * This is the fallback GTT pwrite path, which uses get_user_pages to pin 636 * the memory and maps it using kmap_atomic for copying. 637 * 638 * This code resulted in x11perf -rgb10text consuming about 10% more CPU 639 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). 640 */ 641static int 642i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, 643 struct drm_i915_gem_pwrite *args, 644 struct drm_file *file_priv) 645{ 646 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 647 drm_i915_private_t *dev_priv = dev->dev_private; 648 ssize_t remain; 649 loff_t gtt_page_base, offset; 650 loff_t first_data_page, last_data_page, num_pages; 651 loff_t pinned_pages, i; 652 struct page **user_pages; 653 struct mm_struct *mm = current->mm; 654 int gtt_page_offset, data_page_offset, data_page_index, page_length; 655 int ret; 656 uint64_t data_ptr = args->data_ptr; 657 658 remain = args->size; 659 660 /* Pin the user pages containing the data. We can't fault while 661 * holding the struct mutex, and all of the pwrite implementations 662 * want to hold it while dereferencing the user data. 663 */ 664 first_data_page = data_ptr / PAGE_SIZE; 665 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 666 num_pages = last_data_page - first_data_page + 1; 667 668 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 669 if (user_pages == NULL) 670 return -ENOMEM; 671 672 down_read(&mm->mmap_sem); 673 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 674 num_pages, 0, 0, user_pages, NULL); 675 up_read(&mm->mmap_sem); 676 if (pinned_pages < num_pages) { 677 ret = -EFAULT; 678 goto out_unpin_pages; 679 } 680 681 mutex_lock(&dev->struct_mutex); 682 ret = i915_gem_object_pin(obj, 0); 683 if (ret) 684 goto out_unlock; 685 686 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 687 if (ret) 688 goto out_unpin_object; 689 690 obj_priv = to_intel_bo(obj); 691 offset = obj_priv->gtt_offset + args->offset; 692 693 while (remain > 0) { 694 /* Operation in this page 695 * 696 * gtt_page_base = page offset within aperture 697 * gtt_page_offset = offset within page in aperture 698 * data_page_index = page number in get_user_pages return 699 * data_page_offset = offset with data_page_index page. 700 * page_length = bytes to copy for this page 701 */ 702 gtt_page_base = offset & PAGE_MASK; 703 gtt_page_offset = offset & ~PAGE_MASK; 704 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 705 data_page_offset = data_ptr & ~PAGE_MASK; 706 707 page_length = remain; 708 if ((gtt_page_offset + page_length) > PAGE_SIZE) 709 page_length = PAGE_SIZE - gtt_page_offset; 710 if ((data_page_offset + page_length) > PAGE_SIZE) 711 page_length = PAGE_SIZE - data_page_offset; 712 713 slow_kernel_write(dev_priv->mm.gtt_mapping, 714 gtt_page_base, gtt_page_offset, 715 user_pages[data_page_index], 716 data_page_offset, 717 page_length); 718 719 remain -= page_length; 720 offset += page_length; 721 data_ptr += page_length; 722 } 723 724out_unpin_object: 725 i915_gem_object_unpin(obj); 726out_unlock: 727 mutex_unlock(&dev->struct_mutex); 728out_unpin_pages: 729 for (i = 0; i < pinned_pages; i++) 730 page_cache_release(user_pages[i]); 731 drm_free_large(user_pages); 732 733 return ret; 734} 735 736/** 737 * This is the fast shmem pwrite path, which attempts to directly 738 * copy_from_user into the kmapped pages backing the object. 739 */ 740static int 741i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, 742 struct drm_i915_gem_pwrite *args, 743 struct drm_file *file_priv) 744{ 745 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 746 ssize_t remain; 747 loff_t offset, page_base; 748 char __user *user_data; 749 int page_offset, page_length; 750 int ret; 751 752 user_data = (char __user *) (uintptr_t) args->data_ptr; 753 remain = args->size; 754 755 mutex_lock(&dev->struct_mutex); 756 757 ret = i915_gem_object_get_pages(obj, 0); 758 if (ret != 0) 759 goto fail_unlock; 760 761 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 762 if (ret != 0) 763 goto fail_put_pages; 764 765 obj_priv = to_intel_bo(obj); 766 offset = args->offset; 767 obj_priv->dirty = 1; 768 769 while (remain > 0) { 770 /* Operation in this page 771 * 772 * page_base = page offset within aperture 773 * page_offset = offset within page 774 * page_length = bytes to copy for this page 775 */ 776 page_base = (offset & ~(PAGE_SIZE-1)); 777 page_offset = offset & (PAGE_SIZE-1); 778 page_length = remain; 779 if ((page_offset + remain) > PAGE_SIZE) 780 page_length = PAGE_SIZE - page_offset; 781 782 ret = fast_shmem_write(obj_priv->pages, 783 page_base, page_offset, 784 user_data, page_length); 785 if (ret) 786 goto fail_put_pages; 787 788 remain -= page_length; 789 user_data += page_length; 790 offset += page_length; 791 } 792 793fail_put_pages: 794 i915_gem_object_put_pages(obj); 795fail_unlock: 796 mutex_unlock(&dev->struct_mutex); 797 798 return ret; 799} 800 801/** 802 * This is the fallback shmem pwrite path, which uses get_user_pages to pin 803 * the memory and maps it using kmap_atomic for copying. 804 * 805 * This avoids taking mmap_sem for faulting on the user's address while the 806 * struct_mutex is held. 807 */ 808static int 809i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, 810 struct drm_i915_gem_pwrite *args, 811 struct drm_file *file_priv) 812{ 813 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 814 struct mm_struct *mm = current->mm; 815 struct page **user_pages; 816 ssize_t remain; 817 loff_t offset, pinned_pages, i; 818 loff_t first_data_page, last_data_page, num_pages; 819 int shmem_page_index, shmem_page_offset; 820 int data_page_index, data_page_offset; 821 int page_length; 822 int ret; 823 uint64_t data_ptr = args->data_ptr; 824 int do_bit17_swizzling; 825 826 remain = args->size; 827 828 /* Pin the user pages containing the data. We can't fault while 829 * holding the struct mutex, and all of the pwrite implementations 830 * want to hold it while dereferencing the user data. 831 */ 832 first_data_page = data_ptr / PAGE_SIZE; 833 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 834 num_pages = last_data_page - first_data_page + 1; 835 836 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 837 if (user_pages == NULL) 838 return -ENOMEM; 839 840 down_read(&mm->mmap_sem); 841 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 842 num_pages, 0, 0, user_pages, NULL); 843 up_read(&mm->mmap_sem); 844 if (pinned_pages < num_pages) { 845 ret = -EFAULT; 846 goto fail_put_user_pages; 847 } 848 849 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 850 851 mutex_lock(&dev->struct_mutex); 852 853 ret = i915_gem_object_get_pages_or_evict(obj); 854 if (ret) 855 goto fail_unlock; 856 857 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 858 if (ret != 0) 859 goto fail_put_pages; 860 861 obj_priv = to_intel_bo(obj); 862 offset = args->offset; 863 obj_priv->dirty = 1; 864 865 while (remain > 0) { 866 /* Operation in this page 867 * 868 * shmem_page_index = page number within shmem file 869 * shmem_page_offset = offset within page in shmem file 870 * data_page_index = page number in get_user_pages return 871 * data_page_offset = offset with data_page_index page. 872 * page_length = bytes to copy for this page 873 */ 874 shmem_page_index = offset / PAGE_SIZE; 875 shmem_page_offset = offset & ~PAGE_MASK; 876 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 877 data_page_offset = data_ptr & ~PAGE_MASK; 878 879 page_length = remain; 880 if ((shmem_page_offset + page_length) > PAGE_SIZE) 881 page_length = PAGE_SIZE - shmem_page_offset; 882 if ((data_page_offset + page_length) > PAGE_SIZE) 883 page_length = PAGE_SIZE - data_page_offset; 884 885 if (do_bit17_swizzling) { 886 slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], 887 shmem_page_offset, 888 user_pages[data_page_index], 889 data_page_offset, 890 page_length, 891 0); 892 } else { 893 slow_shmem_copy(obj_priv->pages[shmem_page_index], 894 shmem_page_offset, 895 user_pages[data_page_index], 896 data_page_offset, 897 page_length); 898 } 899 900 remain -= page_length; 901 data_ptr += page_length; 902 offset += page_length; 903 } 904 905fail_put_pages: 906 i915_gem_object_put_pages(obj); 907fail_unlock: 908 mutex_unlock(&dev->struct_mutex); 909fail_put_user_pages: 910 for (i = 0; i < pinned_pages; i++) 911 page_cache_release(user_pages[i]); 912 drm_free_large(user_pages); 913 914 return ret; 915} 916 917/** 918 * Writes data to the object referenced by handle. 919 * 920 * On error, the contents of the buffer that were to be modified are undefined. 921 */ 922int 923i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 924 struct drm_file *file_priv) 925{ 926 struct drm_i915_gem_pwrite *args = data; 927 struct drm_gem_object *obj; 928 struct drm_i915_gem_object *obj_priv; 929 int ret = 0; 930 931 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 932 if (obj == NULL) 933 return -ENOENT; 934 obj_priv = to_intel_bo(obj); 935 936 /* Bounds check destination. */ 937 if (args->offset > obj->size || args->size > obj->size - args->offset) { 938 ret = -EINVAL; 939 goto err; 940 } 941 942 if (!access_ok(VERIFY_READ, 943 (char __user *)(uintptr_t)args->data_ptr, 944 args->size)) { 945 ret = -EFAULT; 946 goto err; 947 } 948 949 /* We can only do the GTT pwrite on untiled buffers, as otherwise 950 * it would end up going through the fenced access, and we'll get 951 * different detiling behavior between reading and writing. 952 * pread/pwrite currently are reading and writing from the CPU 953 * perspective, requiring manual detiling by the client. 954 */ 955 if (obj_priv->phys_obj) 956 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv); 957 else if (obj_priv->tiling_mode == I915_TILING_NONE && 958 dev->gtt_total != 0 && 959 obj->write_domain != I915_GEM_DOMAIN_CPU) { 960 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv); 961 if (ret == -EFAULT) { 962 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, 963 file_priv); 964 } 965 } else if (i915_gem_object_needs_bit17_swizzle(obj)) { 966 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv); 967 } else { 968 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv); 969 if (ret == -EFAULT) { 970 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, 971 file_priv); 972 } 973 } 974 975#if WATCH_PWRITE 976 if (ret) 977 DRM_INFO("pwrite failed %d\n", ret); 978#endif 979 980err: 981 drm_gem_object_unreference_unlocked(obj); 982 return ret; 983} 984 985/** 986 * Called when user space prepares to use an object with the CPU, either 987 * through the mmap ioctl's mapping or a GTT mapping. 988 */ 989int 990i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 991 struct drm_file *file_priv) 992{ 993 struct drm_i915_private *dev_priv = dev->dev_private; 994 struct drm_i915_gem_set_domain *args = data; 995 struct drm_gem_object *obj; 996 struct drm_i915_gem_object *obj_priv; 997 uint32_t read_domains = args->read_domains; 998 uint32_t write_domain = args->write_domain; 999 int ret; 1000 1001 if (!(dev->driver->driver_features & DRIVER_GEM)) 1002 return -ENODEV; 1003 1004 /* Only handle setting domains to types used by the CPU. */ 1005 if (write_domain & I915_GEM_GPU_DOMAINS) 1006 return -EINVAL; 1007 1008 if (read_domains & I915_GEM_GPU_DOMAINS) 1009 return -EINVAL; 1010 1011 /* Having something in the write domain implies it's in the read 1012 * domain, and only that read domain. Enforce that in the request. 1013 */ 1014 if (write_domain != 0 && read_domains != write_domain) 1015 return -EINVAL; 1016 1017 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1018 if (obj == NULL) 1019 return -ENOENT; 1020 obj_priv = to_intel_bo(obj); 1021 1022 mutex_lock(&dev->struct_mutex); 1023 1024 intel_mark_busy(dev, obj); 1025 1026#if WATCH_BUF 1027 DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n", 1028 obj, obj->size, read_domains, write_domain); 1029#endif 1030 if (read_domains & I915_GEM_DOMAIN_GTT) { 1031 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1032 1033 /* Update the LRU on the fence for the CPU access that's 1034 * about to occur. 1035 */ 1036 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { 1037 struct drm_i915_fence_reg *reg = 1038 &dev_priv->fence_regs[obj_priv->fence_reg]; 1039 list_move_tail(®->lru_list, 1040 &dev_priv->mm.fence_list); 1041 } 1042 1043 /* Silently promote "you're not bound, there was nothing to do" 1044 * to success, since the client was just asking us to 1045 * make sure everything was done. 1046 */ 1047 if (ret == -EINVAL) 1048 ret = 0; 1049 } else { 1050 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1051 } 1052 1053 1054 /* Maintain LRU order of "inactive" objects */ 1055 if (ret == 0 && i915_gem_object_is_inactive(obj_priv)) 1056 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1057 1058 drm_gem_object_unreference(obj); 1059 mutex_unlock(&dev->struct_mutex); 1060 return ret; 1061} 1062 1063/** 1064 * Called when user space has done writes to this buffer 1065 */ 1066int 1067i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1068 struct drm_file *file_priv) 1069{ 1070 struct drm_i915_gem_sw_finish *args = data; 1071 struct drm_gem_object *obj; 1072 struct drm_i915_gem_object *obj_priv; 1073 int ret = 0; 1074 1075 if (!(dev->driver->driver_features & DRIVER_GEM)) 1076 return -ENODEV; 1077 1078 mutex_lock(&dev->struct_mutex); 1079 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1080 if (obj == NULL) { 1081 mutex_unlock(&dev->struct_mutex); 1082 return -ENOENT; 1083 } 1084 1085#if WATCH_BUF 1086 DRM_INFO("%s: sw_finish %d (%p %zd)\n", 1087 __func__, args->handle, obj, obj->size); 1088#endif 1089 obj_priv = to_intel_bo(obj); 1090 1091 /* Pinned buffers may be scanout, so flush the cache */ 1092 if (obj_priv->pin_count) 1093 i915_gem_object_flush_cpu_write_domain(obj); 1094 1095 drm_gem_object_unreference(obj); 1096 mutex_unlock(&dev->struct_mutex); 1097 return ret; 1098} 1099 1100/** 1101 * Maps the contents of an object, returning the address it is mapped 1102 * into. 1103 * 1104 * While the mapping holds a reference on the contents of the object, it doesn't 1105 * imply a ref on the object itself. 1106 */ 1107int 1108i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1109 struct drm_file *file_priv) 1110{ 1111 struct drm_i915_gem_mmap *args = data; 1112 struct drm_gem_object *obj; 1113 loff_t offset; 1114 unsigned long addr; 1115 1116 if (!(dev->driver->driver_features & DRIVER_GEM)) 1117 return -ENODEV; 1118 1119 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1120 if (obj == NULL) 1121 return -ENOENT; 1122 1123 offset = args->offset; 1124 1125 down_write(¤t->mm->mmap_sem); 1126 addr = do_mmap(obj->filp, 0, args->size, 1127 PROT_READ | PROT_WRITE, MAP_SHARED, 1128 args->offset); 1129 up_write(¤t->mm->mmap_sem); 1130 drm_gem_object_unreference_unlocked(obj); 1131 if (IS_ERR((void *)addr)) 1132 return addr; 1133 1134 args->addr_ptr = (uint64_t) addr; 1135 1136 return 0; 1137} 1138 1139/** 1140 * i915_gem_fault - fault a page into the GTT 1141 * vma: VMA in question 1142 * vmf: fault info 1143 * 1144 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1145 * from userspace. The fault handler takes care of binding the object to 1146 * the GTT (if needed), allocating and programming a fence register (again, 1147 * only if needed based on whether the old reg is still valid or the object 1148 * is tiled) and inserting a new PTE into the faulting process. 1149 * 1150 * Note that the faulting process may involve evicting existing objects 1151 * from the GTT and/or fence registers to make room. So performance may 1152 * suffer if the GTT working set is large or there are few fence registers 1153 * left. 1154 */ 1155int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1156{ 1157 struct drm_gem_object *obj = vma->vm_private_data; 1158 struct drm_device *dev = obj->dev; 1159 drm_i915_private_t *dev_priv = dev->dev_private; 1160 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1161 pgoff_t page_offset; 1162 unsigned long pfn; 1163 int ret = 0; 1164 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1165 1166 /* We don't use vmf->pgoff since that has the fake offset */ 1167 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1168 PAGE_SHIFT; 1169 1170 /* Now bind it into the GTT if needed */ 1171 mutex_lock(&dev->struct_mutex); 1172 if (!obj_priv->gtt_space) { 1173 ret = i915_gem_object_bind_to_gtt(obj, 0); 1174 if (ret) 1175 goto unlock; 1176 1177 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1178 if (ret) 1179 goto unlock; 1180 } 1181 1182 /* Need a new fence register? */ 1183 if (obj_priv->tiling_mode != I915_TILING_NONE) { 1184 ret = i915_gem_object_get_fence_reg(obj); 1185 if (ret) 1186 goto unlock; 1187 } 1188 1189 if (i915_gem_object_is_inactive(obj_priv)) 1190 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1191 1192 pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + 1193 page_offset; 1194 1195 /* Finally, remap it using the new GTT offset */ 1196 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1197unlock: 1198 mutex_unlock(&dev->struct_mutex); 1199 1200 switch (ret) { 1201 case 0: 1202 case -ERESTARTSYS: 1203 return VM_FAULT_NOPAGE; 1204 case -ENOMEM: 1205 case -EAGAIN: 1206 return VM_FAULT_OOM; 1207 default: 1208 return VM_FAULT_SIGBUS; 1209 } 1210} 1211 1212/** 1213 * i915_gem_create_mmap_offset - create a fake mmap offset for an object 1214 * @obj: obj in question 1215 * 1216 * GEM memory mapping works by handing back to userspace a fake mmap offset 1217 * it can use in a subsequent mmap(2) call. The DRM core code then looks 1218 * up the object based on the offset and sets up the various memory mapping 1219 * structures. 1220 * 1221 * This routine allocates and attaches a fake offset for @obj. 1222 */ 1223static int 1224i915_gem_create_mmap_offset(struct drm_gem_object *obj) 1225{ 1226 struct drm_device *dev = obj->dev; 1227 struct drm_gem_mm *mm = dev->mm_private; 1228 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1229 struct drm_map_list *list; 1230 struct drm_local_map *map; 1231 int ret = 0; 1232 1233 /* Set the object up for mmap'ing */ 1234 list = &obj->map_list; 1235 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL); 1236 if (!list->map) 1237 return -ENOMEM; 1238 1239 map = list->map; 1240 map->type = _DRM_GEM; 1241 map->size = obj->size; 1242 map->handle = obj; 1243 1244 /* Get a DRM GEM mmap offset allocated... */ 1245 list->file_offset_node = drm_mm_search_free(&mm->offset_manager, 1246 obj->size / PAGE_SIZE, 0, 0); 1247 if (!list->file_offset_node) { 1248 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name); 1249 ret = -ENOMEM; 1250 goto out_free_list; 1251 } 1252 1253 list->file_offset_node = drm_mm_get_block(list->file_offset_node, 1254 obj->size / PAGE_SIZE, 0); 1255 if (!list->file_offset_node) { 1256 ret = -ENOMEM; 1257 goto out_free_list; 1258 } 1259 1260 list->hash.key = list->file_offset_node->start; 1261 if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) { 1262 DRM_ERROR("failed to add to map hash\n"); 1263 ret = -ENOMEM; 1264 goto out_free_mm; 1265 } 1266 1267 /* By now we should be all set, any drm_mmap request on the offset 1268 * below will get to our mmap & fault handler */ 1269 obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT; 1270 1271 return 0; 1272 1273out_free_mm: 1274 drm_mm_put_block(list->file_offset_node); 1275out_free_list: 1276 kfree(list->map); 1277 1278 return ret; 1279} 1280 1281/** 1282 * i915_gem_release_mmap - remove physical page mappings 1283 * @obj: obj in question 1284 * 1285 * Preserve the reservation of the mmapping with the DRM core code, but 1286 * relinquish ownership of the pages back to the system. 1287 * 1288 * It is vital that we remove the page mapping if we have mapped a tiled 1289 * object through the GTT and then lose the fence register due to 1290 * resource pressure. Similarly if the object has been moved out of the 1291 * aperture, than pages mapped into userspace must be revoked. Removing the 1292 * mapping will then trigger a page fault on the next user access, allowing 1293 * fixup by i915_gem_fault(). 1294 */ 1295void 1296i915_gem_release_mmap(struct drm_gem_object *obj) 1297{ 1298 struct drm_device *dev = obj->dev; 1299 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1300 1301 if (dev->dev_mapping) 1302 unmap_mapping_range(dev->dev_mapping, 1303 obj_priv->mmap_offset, obj->size, 1); 1304} 1305 1306static void 1307i915_gem_free_mmap_offset(struct drm_gem_object *obj) 1308{ 1309 struct drm_device *dev = obj->dev; 1310 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1311 struct drm_gem_mm *mm = dev->mm_private; 1312 struct drm_map_list *list; 1313 1314 list = &obj->map_list; 1315 drm_ht_remove_item(&mm->offset_hash, &list->hash); 1316 1317 if (list->file_offset_node) { 1318 drm_mm_put_block(list->file_offset_node); 1319 list->file_offset_node = NULL; 1320 } 1321 1322 if (list->map) { 1323 kfree(list->map); 1324 list->map = NULL; 1325 } 1326 1327 obj_priv->mmap_offset = 0; 1328} 1329 1330/** 1331 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1332 * @obj: object to check 1333 * 1334 * Return the required GTT alignment for an object, taking into account 1335 * potential fence register mapping if needed. 1336 */ 1337static uint32_t 1338i915_gem_get_gtt_alignment(struct drm_gem_object *obj) 1339{ 1340 struct drm_device *dev = obj->dev; 1341 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1342 int start, i; 1343 1344 /* 1345 * Minimum alignment is 4k (GTT page size), but might be greater 1346 * if a fence register is needed for the object. 1347 */ 1348 if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE) 1349 return 4096; 1350 1351 /* 1352 * Previous chips need to be aligned to the size of the smallest 1353 * fence register that can contain the object. 1354 */ 1355 if (IS_I9XX(dev)) 1356 start = 1024*1024; 1357 else 1358 start = 512*1024; 1359 1360 for (i = start; i < obj->size; i <<= 1) 1361 ; 1362 1363 return i; 1364} 1365 1366/** 1367 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1368 * @dev: DRM device 1369 * @data: GTT mapping ioctl data 1370 * @file_priv: GEM object info 1371 * 1372 * Simply returns the fake offset to userspace so it can mmap it. 1373 * The mmap call will end up in drm_gem_mmap(), which will set things 1374 * up so we can get faults in the handler above. 1375 * 1376 * The fault handler will take care of binding the object into the GTT 1377 * (since it may have been evicted to make room for something), allocating 1378 * a fence register, and mapping the appropriate aperture address into 1379 * userspace. 1380 */ 1381int 1382i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1383 struct drm_file *file_priv) 1384{ 1385 struct drm_i915_gem_mmap_gtt *args = data; 1386 struct drm_gem_object *obj; 1387 struct drm_i915_gem_object *obj_priv; 1388 int ret; 1389 1390 if (!(dev->driver->driver_features & DRIVER_GEM)) 1391 return -ENODEV; 1392 1393 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1394 if (obj == NULL) 1395 return -ENOENT; 1396 1397 mutex_lock(&dev->struct_mutex); 1398 1399 obj_priv = to_intel_bo(obj); 1400 1401 if (obj_priv->madv != I915_MADV_WILLNEED) { 1402 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1403 drm_gem_object_unreference(obj); 1404 mutex_unlock(&dev->struct_mutex); 1405 return -EINVAL; 1406 } 1407 1408 1409 if (!obj_priv->mmap_offset) { 1410 ret = i915_gem_create_mmap_offset(obj); 1411 if (ret) { 1412 drm_gem_object_unreference(obj); 1413 mutex_unlock(&dev->struct_mutex); 1414 return ret; 1415 } 1416 } 1417 1418 args->offset = obj_priv->mmap_offset; 1419 1420 /* 1421 * Pull it into the GTT so that we have a page list (makes the 1422 * initial fault faster and any subsequent flushing possible). 1423 */ 1424 if (!obj_priv->agp_mem) { 1425 ret = i915_gem_object_bind_to_gtt(obj, 0); 1426 if (ret) { 1427 drm_gem_object_unreference(obj); 1428 mutex_unlock(&dev->struct_mutex); 1429 return ret; 1430 } 1431 } 1432 1433 drm_gem_object_unreference(obj); 1434 mutex_unlock(&dev->struct_mutex); 1435 1436 return 0; 1437} 1438 1439void 1440i915_gem_object_put_pages(struct drm_gem_object *obj) 1441{ 1442 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1443 int page_count = obj->size / PAGE_SIZE; 1444 int i; 1445 1446 BUG_ON(obj_priv->pages_refcount == 0); 1447 BUG_ON(obj_priv->madv == __I915_MADV_PURGED); 1448 1449 if (--obj_priv->pages_refcount != 0) 1450 return; 1451 1452 if (obj_priv->tiling_mode != I915_TILING_NONE) 1453 i915_gem_object_save_bit_17_swizzle(obj); 1454 1455 if (obj_priv->madv == I915_MADV_DONTNEED) 1456 obj_priv->dirty = 0; 1457 1458 for (i = 0; i < page_count; i++) { 1459 if (obj_priv->dirty) 1460 set_page_dirty(obj_priv->pages[i]); 1461 1462 if (obj_priv->madv == I915_MADV_WILLNEED) 1463 mark_page_accessed(obj_priv->pages[i]); 1464 1465 page_cache_release(obj_priv->pages[i]); 1466 } 1467 obj_priv->dirty = 0; 1468 1469 drm_free_large(obj_priv->pages); 1470 obj_priv->pages = NULL; 1471} 1472 1473static void 1474i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno, 1475 struct intel_ring_buffer *ring) 1476{ 1477 struct drm_device *dev = obj->dev; 1478 drm_i915_private_t *dev_priv = dev->dev_private; 1479 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1480 BUG_ON(ring == NULL); 1481 obj_priv->ring = ring; 1482 1483 /* Add a reference if we're newly entering the active list. */ 1484 if (!obj_priv->active) { 1485 drm_gem_object_reference(obj); 1486 obj_priv->active = 1; 1487 } 1488 /* Move from whatever list we were on to the tail of execution. */ 1489 spin_lock(&dev_priv->mm.active_list_lock); 1490 list_move_tail(&obj_priv->list, &ring->active_list); 1491 spin_unlock(&dev_priv->mm.active_list_lock); 1492 obj_priv->last_rendering_seqno = seqno; 1493} 1494 1495static void 1496i915_gem_object_move_to_flushing(struct drm_gem_object *obj) 1497{ 1498 struct drm_device *dev = obj->dev; 1499 drm_i915_private_t *dev_priv = dev->dev_private; 1500 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1501 1502 BUG_ON(!obj_priv->active); 1503 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list); 1504 obj_priv->last_rendering_seqno = 0; 1505} 1506 1507/* Immediately discard the backing storage */ 1508static void 1509i915_gem_object_truncate(struct drm_gem_object *obj) 1510{ 1511 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1512 struct inode *inode; 1513 1514 /* Our goal here is to return as much of the memory as 1515 * is possible back to the system as we are called from OOM. 1516 * To do this we must instruct the shmfs to drop all of its 1517 * backing pages, *now*. Here we mirror the actions taken 1518 * when by shmem_delete_inode() to release the backing store. 1519 */ 1520 inode = obj->filp->f_path.dentry->d_inode; 1521 truncate_inode_pages(inode->i_mapping, 0); 1522 if (inode->i_op->truncate_range) 1523 inode->i_op->truncate_range(inode, 0, (loff_t)-1); 1524 1525 obj_priv->madv = __I915_MADV_PURGED; 1526} 1527 1528static inline int 1529i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv) 1530{ 1531 return obj_priv->madv == I915_MADV_DONTNEED; 1532} 1533 1534static void 1535i915_gem_object_move_to_inactive(struct drm_gem_object *obj) 1536{ 1537 struct drm_device *dev = obj->dev; 1538 drm_i915_private_t *dev_priv = dev->dev_private; 1539 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1540 1541 i915_verify_inactive(dev, __FILE__, __LINE__); 1542 if (obj_priv->pin_count != 0) 1543 list_del_init(&obj_priv->list); 1544 else 1545 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1546 1547 BUG_ON(!list_empty(&obj_priv->gpu_write_list)); 1548 1549 obj_priv->last_rendering_seqno = 0; 1550 obj_priv->ring = NULL; 1551 if (obj_priv->active) { 1552 obj_priv->active = 0; 1553 drm_gem_object_unreference(obj); 1554 } 1555 i915_verify_inactive(dev, __FILE__, __LINE__); 1556} 1557 1558static void 1559i915_gem_process_flushing_list(struct drm_device *dev, 1560 uint32_t flush_domains, uint32_t seqno, 1561 struct intel_ring_buffer *ring) 1562{ 1563 drm_i915_private_t *dev_priv = dev->dev_private; 1564 struct drm_i915_gem_object *obj_priv, *next; 1565 1566 list_for_each_entry_safe(obj_priv, next, 1567 &dev_priv->mm.gpu_write_list, 1568 gpu_write_list) { 1569 struct drm_gem_object *obj = &obj_priv->base; 1570 1571 if ((obj->write_domain & flush_domains) == 1572 obj->write_domain && 1573 obj_priv->ring->ring_flag == ring->ring_flag) { 1574 uint32_t old_write_domain = obj->write_domain; 1575 1576 obj->write_domain = 0; 1577 list_del_init(&obj_priv->gpu_write_list); 1578 i915_gem_object_move_to_active(obj, seqno, ring); 1579 1580 /* update the fence lru list */ 1581 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { 1582 struct drm_i915_fence_reg *reg = 1583 &dev_priv->fence_regs[obj_priv->fence_reg]; 1584 list_move_tail(®->lru_list, 1585 &dev_priv->mm.fence_list); 1586 } 1587 1588 trace_i915_gem_object_change_domain(obj, 1589 obj->read_domains, 1590 old_write_domain); 1591 } 1592 } 1593} 1594 1595uint32_t 1596i915_add_request(struct drm_device *dev, struct drm_file *file_priv, 1597 uint32_t flush_domains, struct intel_ring_buffer *ring) 1598{ 1599 drm_i915_private_t *dev_priv = dev->dev_private; 1600 struct drm_i915_file_private *i915_file_priv = NULL; 1601 struct drm_i915_gem_request *request; 1602 uint32_t seqno; 1603 int was_empty; 1604 1605 if (file_priv != NULL) 1606 i915_file_priv = file_priv->driver_priv; 1607 1608 request = kzalloc(sizeof(*request), GFP_KERNEL); 1609 if (request == NULL) 1610 return 0; 1611 1612 seqno = ring->add_request(dev, ring, file_priv, flush_domains); 1613 1614 request->seqno = seqno; 1615 request->ring = ring; 1616 request->emitted_jiffies = jiffies; 1617 was_empty = list_empty(&ring->request_list); 1618 list_add_tail(&request->list, &ring->request_list); 1619 1620 if (i915_file_priv) { 1621 list_add_tail(&request->client_list, 1622 &i915_file_priv->mm.request_list); 1623 } else { 1624 INIT_LIST_HEAD(&request->client_list); 1625 } 1626 1627 /* Associate any objects on the flushing list matching the write 1628 * domain we're flushing with our flush. 1629 */ 1630 if (flush_domains != 0) 1631 i915_gem_process_flushing_list(dev, flush_domains, seqno, ring); 1632 1633 if (!dev_priv->mm.suspended) { 1634 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); 1635 if (was_empty) 1636 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1637 } 1638 return seqno; 1639} 1640 1641/** 1642 * Command execution barrier 1643 * 1644 * Ensures that all commands in the ring are finished 1645 * before signalling the CPU 1646 */ 1647static uint32_t 1648i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring) 1649{ 1650 uint32_t flush_domains = 0; 1651 1652 /* The sampler always gets flushed on i965 (sigh) */ 1653 if (IS_I965G(dev)) 1654 flush_domains |= I915_GEM_DOMAIN_SAMPLER; 1655 1656 ring->flush(dev, ring, 1657 I915_GEM_DOMAIN_COMMAND, flush_domains); 1658 return flush_domains; 1659} 1660 1661/** 1662 * Moves buffers associated only with the given active seqno from the active 1663 * to inactive list, potentially freeing them. 1664 */ 1665static void 1666i915_gem_retire_request(struct drm_device *dev, 1667 struct drm_i915_gem_request *request) 1668{ 1669 drm_i915_private_t *dev_priv = dev->dev_private; 1670 1671 trace_i915_gem_request_retire(dev, request->seqno); 1672 1673 /* Move any buffers on the active list that are no longer referenced 1674 * by the ringbuffer to the flushing/inactive lists as appropriate. 1675 */ 1676 spin_lock(&dev_priv->mm.active_list_lock); 1677 while (!list_empty(&request->ring->active_list)) { 1678 struct drm_gem_object *obj; 1679 struct drm_i915_gem_object *obj_priv; 1680 1681 obj_priv = list_first_entry(&request->ring->active_list, 1682 struct drm_i915_gem_object, 1683 list); 1684 obj = &obj_priv->base; 1685 1686 /* If the seqno being retired doesn't match the oldest in the 1687 * list, then the oldest in the list must still be newer than 1688 * this seqno. 1689 */ 1690 if (obj_priv->last_rendering_seqno != request->seqno) 1691 goto out; 1692 1693#if WATCH_LRU 1694 DRM_INFO("%s: retire %d moves to inactive list %p\n", 1695 __func__, request->seqno, obj); 1696#endif 1697 1698 if (obj->write_domain != 0) 1699 i915_gem_object_move_to_flushing(obj); 1700 else { 1701 /* Take a reference on the object so it won't be 1702 * freed while the spinlock is held. The list 1703 * protection for this spinlock is safe when breaking 1704 * the lock like this since the next thing we do 1705 * is just get the head of the list again. 1706 */ 1707 drm_gem_object_reference(obj); 1708 i915_gem_object_move_to_inactive(obj); 1709 spin_unlock(&dev_priv->mm.active_list_lock); 1710 drm_gem_object_unreference(obj); 1711 spin_lock(&dev_priv->mm.active_list_lock); 1712 } 1713 } 1714out: 1715 spin_unlock(&dev_priv->mm.active_list_lock); 1716} 1717 1718/** 1719 * Returns true if seq1 is later than seq2. 1720 */ 1721bool 1722i915_seqno_passed(uint32_t seq1, uint32_t seq2) 1723{ 1724 return (int32_t)(seq1 - seq2) >= 0; 1725} 1726 1727uint32_t 1728i915_get_gem_seqno(struct drm_device *dev, 1729 struct intel_ring_buffer *ring) 1730{ 1731 return ring->get_gem_seqno(dev, ring); 1732} 1733 1734/** 1735 * This function clears the request list as sequence numbers are passed. 1736 */ 1737static void 1738i915_gem_retire_requests_ring(struct drm_device *dev, 1739 struct intel_ring_buffer *ring) 1740{ 1741 drm_i915_private_t *dev_priv = dev->dev_private; 1742 uint32_t seqno; 1743 1744 if (!ring->status_page.page_addr 1745 || list_empty(&ring->request_list)) 1746 return; 1747 1748 seqno = i915_get_gem_seqno(dev, ring); 1749 1750 while (!list_empty(&ring->request_list)) { 1751 struct drm_i915_gem_request *request; 1752 uint32_t retiring_seqno; 1753 1754 request = list_first_entry(&ring->request_list, 1755 struct drm_i915_gem_request, 1756 list); 1757 retiring_seqno = request->seqno; 1758 1759 if (i915_seqno_passed(seqno, retiring_seqno) || 1760 atomic_read(&dev_priv->mm.wedged)) { 1761 i915_gem_retire_request(dev, request); 1762 1763 list_del(&request->list); 1764 list_del(&request->client_list); 1765 kfree(request); 1766 } else 1767 break; 1768 } 1769 1770 if (unlikely (dev_priv->trace_irq_seqno && 1771 i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) { 1772 1773 ring->user_irq_put(dev, ring); 1774 dev_priv->trace_irq_seqno = 0; 1775 } 1776} 1777 1778void 1779i915_gem_retire_requests(struct drm_device *dev) 1780{ 1781 drm_i915_private_t *dev_priv = dev->dev_private; 1782 1783 if (!list_empty(&dev_priv->mm.deferred_free_list)) { 1784 struct drm_i915_gem_object *obj_priv, *tmp; 1785 1786 /* We must be careful that during unbind() we do not 1787 * accidentally infinitely recurse into retire requests. 1788 * Currently: 1789 * retire -> free -> unbind -> wait -> retire_ring 1790 */ 1791 list_for_each_entry_safe(obj_priv, tmp, 1792 &dev_priv->mm.deferred_free_list, 1793 list) 1794 i915_gem_free_object_tail(&obj_priv->base); 1795 } 1796 1797 i915_gem_retire_requests_ring(dev, &dev_priv->render_ring); 1798 if (HAS_BSD(dev)) 1799 i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring); 1800} 1801 1802void 1803i915_gem_retire_work_handler(struct work_struct *work) 1804{ 1805 drm_i915_private_t *dev_priv; 1806 struct drm_device *dev; 1807 1808 dev_priv = container_of(work, drm_i915_private_t, 1809 mm.retire_work.work); 1810 dev = dev_priv->dev; 1811 1812 mutex_lock(&dev->struct_mutex); 1813 i915_gem_retire_requests(dev); 1814 1815 if (!dev_priv->mm.suspended && 1816 (!list_empty(&dev_priv->render_ring.request_list) || 1817 (HAS_BSD(dev) && 1818 !list_empty(&dev_priv->bsd_ring.request_list)))) 1819 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1820 mutex_unlock(&dev->struct_mutex); 1821} 1822 1823int 1824i915_do_wait_request(struct drm_device *dev, uint32_t seqno, 1825 int interruptible, struct intel_ring_buffer *ring) 1826{ 1827 drm_i915_private_t *dev_priv = dev->dev_private; 1828 u32 ier; 1829 int ret = 0; 1830 1831 BUG_ON(seqno == 0); 1832 1833 if (atomic_read(&dev_priv->mm.wedged)) 1834 return -EIO; 1835 1836 if (!i915_seqno_passed(ring->get_gem_seqno(dev, ring), seqno)) { 1837 if (HAS_PCH_SPLIT(dev)) 1838 ier = I915_READ(DEIER) | I915_READ(GTIER); 1839 else 1840 ier = I915_READ(IER); 1841 if (!ier) { 1842 DRM_ERROR("something (likely vbetool) disabled " 1843 "interrupts, re-enabling\n"); 1844 i915_driver_irq_preinstall(dev); 1845 i915_driver_irq_postinstall(dev); 1846 } 1847 1848 trace_i915_gem_request_wait_begin(dev, seqno); 1849 1850 ring->waiting_gem_seqno = seqno; 1851 ring->user_irq_get(dev, ring); 1852 if (interruptible) 1853 ret = wait_event_interruptible(ring->irq_queue, 1854 i915_seqno_passed( 1855 ring->get_gem_seqno(dev, ring), seqno) 1856 || atomic_read(&dev_priv->mm.wedged)); 1857 else 1858 wait_event(ring->irq_queue, 1859 i915_seqno_passed( 1860 ring->get_gem_seqno(dev, ring), seqno) 1861 || atomic_read(&dev_priv->mm.wedged)); 1862 1863 ring->user_irq_put(dev, ring); 1864 ring->waiting_gem_seqno = 0; 1865 1866 trace_i915_gem_request_wait_end(dev, seqno); 1867 } 1868 if (atomic_read(&dev_priv->mm.wedged)) 1869 ret = -EIO; 1870 1871 if (ret && ret != -ERESTARTSYS) 1872 DRM_ERROR("%s returns %d (awaiting %d at %d)\n", 1873 __func__, ret, seqno, ring->get_gem_seqno(dev, ring)); 1874 1875 /* Directly dispatch request retiring. While we have the work queue 1876 * to handle this, the waiter on a request often wants an associated 1877 * buffer to have made it to the inactive list, and we would need 1878 * a separate wait queue to handle that. 1879 */ 1880 if (ret == 0) 1881 i915_gem_retire_requests_ring(dev, ring); 1882 1883 return ret; 1884} 1885 1886/** 1887 * Waits for a sequence number to be signaled, and cleans up the 1888 * request and object lists appropriately for that event. 1889 */ 1890static int 1891i915_wait_request(struct drm_device *dev, uint32_t seqno, 1892 struct intel_ring_buffer *ring) 1893{ 1894 return i915_do_wait_request(dev, seqno, 1, ring); 1895} 1896 1897static void 1898i915_gem_flush(struct drm_device *dev, 1899 uint32_t invalidate_domains, 1900 uint32_t flush_domains) 1901{ 1902 drm_i915_private_t *dev_priv = dev->dev_private; 1903 if (flush_domains & I915_GEM_DOMAIN_CPU) 1904 drm_agp_chipset_flush(dev); 1905 dev_priv->render_ring.flush(dev, &dev_priv->render_ring, 1906 invalidate_domains, 1907 flush_domains); 1908 1909 if (HAS_BSD(dev)) 1910 dev_priv->bsd_ring.flush(dev, &dev_priv->bsd_ring, 1911 invalidate_domains, 1912 flush_domains); 1913} 1914 1915/** 1916 * Ensures that all rendering to the object has completed and the object is 1917 * safe to unbind from the GTT or access from the CPU. 1918 */ 1919static int 1920i915_gem_object_wait_rendering(struct drm_gem_object *obj) 1921{ 1922 struct drm_device *dev = obj->dev; 1923 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1924 int ret; 1925 1926 /* This function only exists to support waiting for existing rendering, 1927 * not for emitting required flushes. 1928 */ 1929 BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0); 1930 1931 /* If there is rendering queued on the buffer being evicted, wait for 1932 * it. 1933 */ 1934 if (obj_priv->active) { 1935#if WATCH_BUF 1936 DRM_INFO("%s: object %p wait for seqno %08x\n", 1937 __func__, obj, obj_priv->last_rendering_seqno); 1938#endif 1939 ret = i915_wait_request(dev, 1940 obj_priv->last_rendering_seqno, obj_priv->ring); 1941 if (ret != 0) 1942 return ret; 1943 } 1944 1945 return 0; 1946} 1947 1948/** 1949 * Unbinds an object from the GTT aperture. 1950 */ 1951int 1952i915_gem_object_unbind(struct drm_gem_object *obj) 1953{ 1954 struct drm_device *dev = obj->dev; 1955 drm_i915_private_t *dev_priv = dev->dev_private; 1956 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1957 int ret = 0; 1958 1959#if WATCH_BUF 1960 DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj); 1961 DRM_INFO("gtt_space %p\n", obj_priv->gtt_space); 1962#endif 1963 if (obj_priv->gtt_space == NULL) 1964 return 0; 1965 1966 if (obj_priv->pin_count != 0) { 1967 DRM_ERROR("Attempting to unbind pinned buffer\n"); 1968 return -EINVAL; 1969 } 1970 1971 /* blow away mappings if mapped through GTT */ 1972 i915_gem_release_mmap(obj); 1973 1974 /* Move the object to the CPU domain to ensure that 1975 * any possible CPU writes while it's not in the GTT 1976 * are flushed when we go to remap it. This will 1977 * also ensure that all pending GPU writes are finished 1978 * before we unbind. 1979 */ 1980 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 1981 if (ret == -ERESTARTSYS) 1982 return ret; 1983 /* Continue on if we fail due to EIO, the GPU is hung so we 1984 * should be safe and we need to cleanup or else we might 1985 * cause memory corruption through use-after-free. 1986 */ 1987 1988 /* release the fence reg _after_ flushing */ 1989 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) 1990 i915_gem_clear_fence_reg(obj); 1991 1992 if (obj_priv->agp_mem != NULL) { 1993 drm_unbind_agp(obj_priv->agp_mem); 1994 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE); 1995 obj_priv->agp_mem = NULL; 1996 } 1997 1998 i915_gem_object_put_pages(obj); 1999 BUG_ON(obj_priv->pages_refcount); 2000 2001 if (obj_priv->gtt_space) { 2002 atomic_dec(&dev->gtt_count); 2003 atomic_sub(obj->size, &dev->gtt_memory); 2004 2005 drm_mm_put_block(obj_priv->gtt_space); 2006 obj_priv->gtt_space = NULL; 2007 } 2008 2009 /* Remove ourselves from the LRU list if present. */ 2010 spin_lock(&dev_priv->mm.active_list_lock); 2011 if (!list_empty(&obj_priv->list)) 2012 list_del_init(&obj_priv->list); 2013 spin_unlock(&dev_priv->mm.active_list_lock); 2014 2015 if (i915_gem_object_is_purgeable(obj_priv)) 2016 i915_gem_object_truncate(obj); 2017 2018 trace_i915_gem_object_unbind(obj); 2019 2020 return ret; 2021} 2022 2023int 2024i915_gpu_idle(struct drm_device *dev) 2025{ 2026 drm_i915_private_t *dev_priv = dev->dev_private; 2027 bool lists_empty; 2028 uint32_t seqno1, seqno2; 2029 int ret; 2030 2031 spin_lock(&dev_priv->mm.active_list_lock); 2032 lists_empty = (list_empty(&dev_priv->mm.flushing_list) && 2033 list_empty(&dev_priv->render_ring.active_list) && 2034 (!HAS_BSD(dev) || 2035 list_empty(&dev_priv->bsd_ring.active_list))); 2036 spin_unlock(&dev_priv->mm.active_list_lock); 2037 2038 if (lists_empty) 2039 return 0; 2040 2041 /* Flush everything onto the inactive list. */ 2042 i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 2043 seqno1 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS, 2044 &dev_priv->render_ring); 2045 if (seqno1 == 0) 2046 return -ENOMEM; 2047 ret = i915_wait_request(dev, seqno1, &dev_priv->render_ring); 2048 2049 if (HAS_BSD(dev)) { 2050 seqno2 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS, 2051 &dev_priv->bsd_ring); 2052 if (seqno2 == 0) 2053 return -ENOMEM; 2054 2055 ret = i915_wait_request(dev, seqno2, &dev_priv->bsd_ring); 2056 if (ret) 2057 return ret; 2058 } 2059 2060 2061 return ret; 2062} 2063 2064int 2065i915_gem_object_get_pages(struct drm_gem_object *obj, 2066 gfp_t gfpmask) 2067{ 2068 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2069 int page_count, i; 2070 struct address_space *mapping; 2071 struct inode *inode; 2072 struct page *page; 2073 2074 BUG_ON(obj_priv->pages_refcount 2075 == DRM_I915_GEM_OBJECT_MAX_PAGES_REFCOUNT); 2076 2077 if (obj_priv->pages_refcount++ != 0) 2078 return 0; 2079 2080 /* Get the list of pages out of our struct file. They'll be pinned 2081 * at this point until we release them. 2082 */ 2083 page_count = obj->size / PAGE_SIZE; 2084 BUG_ON(obj_priv->pages != NULL); 2085 obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *)); 2086 if (obj_priv->pages == NULL) { 2087 obj_priv->pages_refcount--; 2088 return -ENOMEM; 2089 } 2090 2091 inode = obj->filp->f_path.dentry->d_inode; 2092 mapping = inode->i_mapping; 2093 for (i = 0; i < page_count; i++) { 2094 page = read_cache_page_gfp(mapping, i, 2095 GFP_HIGHUSER | 2096 __GFP_COLD | 2097 __GFP_RECLAIMABLE | 2098 gfpmask); 2099 if (IS_ERR(page)) 2100 goto err_pages; 2101 2102 obj_priv->pages[i] = page; 2103 } 2104 2105 if (obj_priv->tiling_mode != I915_TILING_NONE) 2106 i915_gem_object_do_bit_17_swizzle(obj); 2107 2108 return 0; 2109 2110err_pages: 2111 while (i--) 2112 page_cache_release(obj_priv->pages[i]); 2113 2114 drm_free_large(obj_priv->pages); 2115 obj_priv->pages = NULL; 2116 obj_priv->pages_refcount--; 2117 return PTR_ERR(page); 2118} 2119 2120static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg) 2121{ 2122 struct drm_gem_object *obj = reg->obj; 2123 struct drm_device *dev = obj->dev; 2124 drm_i915_private_t *dev_priv = dev->dev_private; 2125 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2126 int regnum = obj_priv->fence_reg; 2127 uint64_t val; 2128 2129 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) & 2130 0xfffff000) << 32; 2131 val |= obj_priv->gtt_offset & 0xfffff000; 2132 val |= (uint64_t)((obj_priv->stride / 128) - 1) << 2133 SANDYBRIDGE_FENCE_PITCH_SHIFT; 2134 2135 if (obj_priv->tiling_mode == I915_TILING_Y) 2136 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2137 val |= I965_FENCE_REG_VALID; 2138 2139 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val); 2140} 2141 2142static void i965_write_fence_reg(struct drm_i915_fence_reg *reg) 2143{ 2144 struct drm_gem_object *obj = reg->obj; 2145 struct drm_device *dev = obj->dev; 2146 drm_i915_private_t *dev_priv = dev->dev_private; 2147 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2148 int regnum = obj_priv->fence_reg; 2149 uint64_t val; 2150 2151 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) & 2152 0xfffff000) << 32; 2153 val |= obj_priv->gtt_offset & 0xfffff000; 2154 val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 2155 if (obj_priv->tiling_mode == I915_TILING_Y) 2156 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2157 val |= I965_FENCE_REG_VALID; 2158 2159 I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val); 2160} 2161 2162static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) 2163{ 2164 struct drm_gem_object *obj = reg->obj; 2165 struct drm_device *dev = obj->dev; 2166 drm_i915_private_t *dev_priv = dev->dev_private; 2167 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2168 int regnum = obj_priv->fence_reg; 2169 int tile_width; 2170 uint32_t fence_reg, val; 2171 uint32_t pitch_val; 2172 2173 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || 2174 (obj_priv->gtt_offset & (obj->size - 1))) { 2175 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n", 2176 __func__, obj_priv->gtt_offset, obj->size); 2177 return; 2178 } 2179 2180 if (obj_priv->tiling_mode == I915_TILING_Y && 2181 HAS_128_BYTE_Y_TILING(dev)) 2182 tile_width = 128; 2183 else 2184 tile_width = 512; 2185 2186 /* Note: pitch better be a power of two tile widths */ 2187 pitch_val = obj_priv->stride / tile_width; 2188 pitch_val = ffs(pitch_val) - 1; 2189 2190 if (obj_priv->tiling_mode == I915_TILING_Y && 2191 HAS_128_BYTE_Y_TILING(dev)) 2192 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL); 2193 else 2194 WARN_ON(pitch_val > I915_FENCE_MAX_PITCH_VAL); 2195 2196 val = obj_priv->gtt_offset; 2197 if (obj_priv->tiling_mode == I915_TILING_Y) 2198 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2199 val |= I915_FENCE_SIZE_BITS(obj->size); 2200 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2201 val |= I830_FENCE_REG_VALID; 2202 2203 if (regnum < 8) 2204 fence_reg = FENCE_REG_830_0 + (regnum * 4); 2205 else 2206 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4); 2207 I915_WRITE(fence_reg, val); 2208} 2209 2210static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) 2211{ 2212 struct drm_gem_object *obj = reg->obj; 2213 struct drm_device *dev = obj->dev; 2214 drm_i915_private_t *dev_priv = dev->dev_private; 2215 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2216 int regnum = obj_priv->fence_reg; 2217 uint32_t val; 2218 uint32_t pitch_val; 2219 uint32_t fence_size_bits; 2220 2221 if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) || 2222 (obj_priv->gtt_offset & (obj->size - 1))) { 2223 WARN(1, "%s: object 0x%08x not 512K or size aligned\n", 2224 __func__, obj_priv->gtt_offset); 2225 return; 2226 } 2227 2228 pitch_val = obj_priv->stride / 128; 2229 pitch_val = ffs(pitch_val) - 1; 2230 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL); 2231 2232 val = obj_priv->gtt_offset; 2233 if (obj_priv->tiling_mode == I915_TILING_Y) 2234 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2235 fence_size_bits = I830_FENCE_SIZE_BITS(obj->size); 2236 WARN_ON(fence_size_bits & ~0x00000f00); 2237 val |= fence_size_bits; 2238 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2239 val |= I830_FENCE_REG_VALID; 2240 2241 I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val); 2242} 2243 2244static int i915_find_fence_reg(struct drm_device *dev) 2245{ 2246 struct drm_i915_fence_reg *reg = NULL; 2247 struct drm_i915_gem_object *obj_priv = NULL; 2248 struct drm_i915_private *dev_priv = dev->dev_private; 2249 struct drm_gem_object *obj = NULL; 2250 int i, avail, ret; 2251 2252 /* First try to find a free reg */ 2253 avail = 0; 2254 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2255 reg = &dev_priv->fence_regs[i]; 2256 if (!reg->obj) 2257 return i; 2258 2259 obj_priv = to_intel_bo(reg->obj); 2260 if (!obj_priv->pin_count) 2261 avail++; 2262 } 2263 2264 if (avail == 0) 2265 return -ENOSPC; 2266 2267 /* None available, try to steal one or wait for a user to finish */ 2268 i = I915_FENCE_REG_NONE; 2269 list_for_each_entry(reg, &dev_priv->mm.fence_list, 2270 lru_list) { 2271 obj = reg->obj; 2272 obj_priv = to_intel_bo(obj); 2273 2274 if (obj_priv->pin_count) 2275 continue; 2276 2277 /* found one! */ 2278 i = obj_priv->fence_reg; 2279 break; 2280 } 2281 2282 BUG_ON(i == I915_FENCE_REG_NONE); 2283 2284 /* We only have a reference on obj from the active list. put_fence_reg 2285 * might drop that one, causing a use-after-free in it. So hold a 2286 * private reference to obj like the other callers of put_fence_reg 2287 * (set_tiling ioctl) do. */ 2288 drm_gem_object_reference(obj); 2289 ret = i915_gem_object_put_fence_reg(obj); 2290 drm_gem_object_unreference(obj); 2291 if (ret != 0) 2292 return ret; 2293 2294 return i; 2295} 2296 2297/** 2298 * i915_gem_object_get_fence_reg - set up a fence reg for an object 2299 * @obj: object to map through a fence reg 2300 * 2301 * When mapping objects through the GTT, userspace wants to be able to write 2302 * to them without having to worry about swizzling if the object is tiled. 2303 * 2304 * This function walks the fence regs looking for a free one for @obj, 2305 * stealing one if it can't find any. 2306 * 2307 * It then sets up the reg based on the object's properties: address, pitch 2308 * and tiling format. 2309 */ 2310int 2311i915_gem_object_get_fence_reg(struct drm_gem_object *obj) 2312{ 2313 struct drm_device *dev = obj->dev; 2314 struct drm_i915_private *dev_priv = dev->dev_private; 2315 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2316 struct drm_i915_fence_reg *reg = NULL; 2317 int ret; 2318 2319 /* Just update our place in the LRU if our fence is getting used. */ 2320 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { 2321 reg = &dev_priv->fence_regs[obj_priv->fence_reg]; 2322 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 2323 return 0; 2324 } 2325 2326 switch (obj_priv->tiling_mode) { 2327 case I915_TILING_NONE: 2328 WARN(1, "allocating a fence for non-tiled object?\n"); 2329 break; 2330 case I915_TILING_X: 2331 if (!obj_priv->stride) 2332 return -EINVAL; 2333 WARN((obj_priv->stride & (512 - 1)), 2334 "object 0x%08x is X tiled but has non-512B pitch\n", 2335 obj_priv->gtt_offset); 2336 break; 2337 case I915_TILING_Y: 2338 if (!obj_priv->stride) 2339 return -EINVAL; 2340 WARN((obj_priv->stride & (128 - 1)), 2341 "object 0x%08x is Y tiled but has non-128B pitch\n", 2342 obj_priv->gtt_offset); 2343 break; 2344 } 2345 2346 ret = i915_find_fence_reg(dev); 2347 if (ret < 0) 2348 return ret; 2349 2350 obj_priv->fence_reg = ret; 2351 reg = &dev_priv->fence_regs[obj_priv->fence_reg]; 2352 list_add_tail(®->lru_list, &dev_priv->mm.fence_list); 2353 2354 reg->obj = obj; 2355 2356 switch (INTEL_INFO(dev)->gen) { 2357 case 6: 2358 sandybridge_write_fence_reg(reg); 2359 break; 2360 case 5: 2361 case 4: 2362 i965_write_fence_reg(reg); 2363 break; 2364 case 3: 2365 i915_write_fence_reg(reg); 2366 break; 2367 case 2: 2368 i830_write_fence_reg(reg); 2369 break; 2370 } 2371 2372 trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg, 2373 obj_priv->tiling_mode); 2374 2375 return 0; 2376} 2377 2378/** 2379 * i915_gem_clear_fence_reg - clear out fence register info 2380 * @obj: object to clear 2381 * 2382 * Zeroes out the fence register itself and clears out the associated 2383 * data structures in dev_priv and obj_priv. 2384 */ 2385static void 2386i915_gem_clear_fence_reg(struct drm_gem_object *obj) 2387{ 2388 struct drm_device *dev = obj->dev; 2389 drm_i915_private_t *dev_priv = dev->dev_private; 2390 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2391 struct drm_i915_fence_reg *reg = 2392 &dev_priv->fence_regs[obj_priv->fence_reg]; 2393 uint32_t fence_reg; 2394 2395 switch (INTEL_INFO(dev)->gen) { 2396 case 6: 2397 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + 2398 (obj_priv->fence_reg * 8), 0); 2399 break; 2400 case 5: 2401 case 4: 2402 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0); 2403 break; 2404 case 3: 2405 if (obj_priv->fence_reg >= 8) 2406 fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 8) * 4; 2407 else 2408 case 2: 2409 fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4; 2410 2411 I915_WRITE(fence_reg, 0); 2412 break; 2413 } 2414 2415 reg->obj = NULL; 2416 obj_priv->fence_reg = I915_FENCE_REG_NONE; 2417 list_del_init(®->lru_list); 2418} 2419 2420/** 2421 * i915_gem_object_put_fence_reg - waits on outstanding fenced access 2422 * to the buffer to finish, and then resets the fence register. 2423 * @obj: tiled object holding a fence register. 2424 * 2425 * Zeroes out the fence register itself and clears out the associated 2426 * data structures in dev_priv and obj_priv. 2427 */ 2428int 2429i915_gem_object_put_fence_reg(struct drm_gem_object *obj) 2430{ 2431 struct drm_device *dev = obj->dev; 2432 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2433 2434 if (obj_priv->fence_reg == I915_FENCE_REG_NONE) 2435 return 0; 2436 2437 /* If we've changed tiling, GTT-mappings of the object 2438 * need to re-fault to ensure that the correct fence register 2439 * setup is in place. 2440 */ 2441 i915_gem_release_mmap(obj); 2442 2443 /* On the i915, GPU access to tiled buffers is via a fence, 2444 * therefore we must wait for any outstanding access to complete 2445 * before clearing the fence. 2446 */ 2447 if (!IS_I965G(dev)) { 2448 int ret; 2449 2450 ret = i915_gem_object_flush_gpu_write_domain(obj); 2451 if (ret != 0) 2452 return ret; 2453 2454 ret = i915_gem_object_wait_rendering(obj); 2455 if (ret != 0) 2456 return ret; 2457 } 2458 2459 i915_gem_object_flush_gtt_write_domain(obj); 2460 i915_gem_clear_fence_reg (obj); 2461 2462 return 0; 2463} 2464 2465/** 2466 * Finds free space in the GTT aperture and binds the object there. 2467 */ 2468static int 2469i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) 2470{ 2471 struct drm_device *dev = obj->dev; 2472 drm_i915_private_t *dev_priv = dev->dev_private; 2473 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2474 struct drm_mm_node *free_space; 2475 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN; 2476 int ret; 2477 2478 if (obj_priv->madv != I915_MADV_WILLNEED) { 2479 DRM_ERROR("Attempting to bind a purgeable object\n"); 2480 return -EINVAL; 2481 } 2482 2483 if (alignment == 0) 2484 alignment = i915_gem_get_gtt_alignment(obj); 2485 if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) { 2486 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2487 return -EINVAL; 2488 } 2489 2490 /* If the object is bigger than the entire aperture, reject it early 2491 * before evicting everything in a vain attempt to find space. 2492 */ 2493 if (obj->size > dev->gtt_total) { 2494 DRM_ERROR("Attempting to bind an object larger than the aperture\n"); 2495 return -E2BIG; 2496 } 2497 2498 search_free: 2499 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 2500 obj->size, alignment, 0); 2501 if (free_space != NULL) { 2502 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size, 2503 alignment); 2504 if (obj_priv->gtt_space != NULL) 2505 obj_priv->gtt_offset = obj_priv->gtt_space->start; 2506 } 2507 if (obj_priv->gtt_space == NULL) { 2508 /* If the gtt is empty and we're still having trouble 2509 * fitting our object in, we're out of memory. 2510 */ 2511#if WATCH_LRU 2512 DRM_INFO("%s: GTT full, evicting something\n", __func__); 2513#endif 2514 ret = i915_gem_evict_something(dev, obj->size, alignment); 2515 if (ret) 2516 return ret; 2517 2518 goto search_free; 2519 } 2520 2521#if WATCH_BUF 2522 DRM_INFO("Binding object of size %zd at 0x%08x\n", 2523 obj->size, obj_priv->gtt_offset); 2524#endif 2525 ret = i915_gem_object_get_pages(obj, gfpmask); 2526 if (ret) { 2527 drm_mm_put_block(obj_priv->gtt_space); 2528 obj_priv->gtt_space = NULL; 2529 2530 if (ret == -ENOMEM) { 2531 /* first try to clear up some space from the GTT */ 2532 ret = i915_gem_evict_something(dev, obj->size, 2533 alignment); 2534 if (ret) { 2535 /* now try to shrink everyone else */ 2536 if (gfpmask) { 2537 gfpmask = 0; 2538 goto search_free; 2539 } 2540 2541 return ret; 2542 } 2543 2544 goto search_free; 2545 } 2546 2547 return ret; 2548 } 2549 2550 /* Create an AGP memory structure pointing at our pages, and bind it 2551 * into the GTT. 2552 */ 2553 obj_priv->agp_mem = drm_agp_bind_pages(dev, 2554 obj_priv->pages, 2555 obj->size >> PAGE_SHIFT, 2556 obj_priv->gtt_offset, 2557 obj_priv->agp_type); 2558 if (obj_priv->agp_mem == NULL) { 2559 i915_gem_object_put_pages(obj); 2560 drm_mm_put_block(obj_priv->gtt_space); 2561 obj_priv->gtt_space = NULL; 2562 2563 ret = i915_gem_evict_something(dev, obj->size, alignment); 2564 if (ret) 2565 return ret; 2566 2567 goto search_free; 2568 } 2569 atomic_inc(&dev->gtt_count); 2570 atomic_add(obj->size, &dev->gtt_memory); 2571 2572 /* keep track of bounds object by adding it to the inactive list */ 2573 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 2574 2575 /* Assert that the object is not currently in any GPU domain. As it 2576 * wasn't in the GTT, there shouldn't be any way it could have been in 2577 * a GPU cache 2578 */ 2579 BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2580 BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2581 2582 trace_i915_gem_object_bind(obj, obj_priv->gtt_offset); 2583 2584 return 0; 2585} 2586 2587void 2588i915_gem_clflush_object(struct drm_gem_object *obj) 2589{ 2590 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2591 2592 /* If we don't have a page list set up, then we're not pinned 2593 * to GPU, and we can ignore the cache flush because it'll happen 2594 * again at bind time. 2595 */ 2596 if (obj_priv->pages == NULL) 2597 return; 2598 2599 trace_i915_gem_object_clflush(obj); 2600 2601 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); 2602} 2603 2604/** Flushes any GPU write domain for the object if it's dirty. */ 2605static int 2606i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) 2607{ 2608 struct drm_device *dev = obj->dev; 2609 uint32_t old_write_domain; 2610 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2611 2612 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 2613 return 0; 2614 2615 /* Queue the GPU write cache flushing we need. */ 2616 old_write_domain = obj->write_domain; 2617 i915_gem_flush(dev, 0, obj->write_domain); 2618 if (i915_add_request(dev, NULL, obj->write_domain, obj_priv->ring) == 0) 2619 return -ENOMEM; 2620 2621 trace_i915_gem_object_change_domain(obj, 2622 obj->read_domains, 2623 old_write_domain); 2624 return 0; 2625} 2626 2627/** Flushes the GTT write domain for the object if it's dirty. */ 2628static void 2629i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) 2630{ 2631 uint32_t old_write_domain; 2632 2633 if (obj->write_domain != I915_GEM_DOMAIN_GTT) 2634 return; 2635 2636 /* No actual flushing is required for the GTT write domain. Writes 2637 * to it immediately go to main memory as far as we know, so there's 2638 * no chipset flush. It also doesn't land in render cache. 2639 */ 2640 old_write_domain = obj->write_domain; 2641 obj->write_domain = 0; 2642 2643 trace_i915_gem_object_change_domain(obj, 2644 obj->read_domains, 2645 old_write_domain); 2646} 2647 2648/** Flushes the CPU write domain for the object if it's dirty. */ 2649static void 2650i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) 2651{ 2652 struct drm_device *dev = obj->dev; 2653 uint32_t old_write_domain; 2654 2655 if (obj->write_domain != I915_GEM_DOMAIN_CPU) 2656 return; 2657 2658 i915_gem_clflush_object(obj); 2659 drm_agp_chipset_flush(dev); 2660 old_write_domain = obj->write_domain; 2661 obj->write_domain = 0; 2662 2663 trace_i915_gem_object_change_domain(obj, 2664 obj->read_domains, 2665 old_write_domain); 2666} 2667 2668int 2669i915_gem_object_flush_write_domain(struct drm_gem_object *obj) 2670{ 2671 int ret = 0; 2672 2673 switch (obj->write_domain) { 2674 case I915_GEM_DOMAIN_GTT: 2675 i915_gem_object_flush_gtt_write_domain(obj); 2676 break; 2677 case I915_GEM_DOMAIN_CPU: 2678 i915_gem_object_flush_cpu_write_domain(obj); 2679 break; 2680 default: 2681 ret = i915_gem_object_flush_gpu_write_domain(obj); 2682 break; 2683 } 2684 2685 return ret; 2686} 2687 2688/** 2689 * Moves a single object to the GTT read, and possibly write domain. 2690 * 2691 * This function returns when the move is complete, including waiting on 2692 * flushes to occur. 2693 */ 2694int 2695i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) 2696{ 2697 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2698 uint32_t old_write_domain, old_read_domains; 2699 int ret; 2700 2701 /* Not valid to be called on unbound objects. */ 2702 if (obj_priv->gtt_space == NULL) 2703 return -EINVAL; 2704 2705 ret = i915_gem_object_flush_gpu_write_domain(obj); 2706 if (ret != 0) 2707 return ret; 2708 2709 /* Wait on any GPU rendering and flushing to occur. */ 2710 ret = i915_gem_object_wait_rendering(obj); 2711 if (ret != 0) 2712 return ret; 2713 2714 old_write_domain = obj->write_domain; 2715 old_read_domains = obj->read_domains; 2716 2717 /* If we're writing through the GTT domain, then CPU and GPU caches 2718 * will need to be invalidated at next use. 2719 */ 2720 if (write) 2721 obj->read_domains &= I915_GEM_DOMAIN_GTT; 2722 2723 i915_gem_object_flush_cpu_write_domain(obj); 2724 2725 /* It should now be out of any other write domains, and we can update 2726 * the domain values for our changes. 2727 */ 2728 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2729 obj->read_domains |= I915_GEM_DOMAIN_GTT; 2730 if (write) { 2731 obj->write_domain = I915_GEM_DOMAIN_GTT; 2732 obj_priv->dirty = 1; 2733 } 2734 2735 trace_i915_gem_object_change_domain(obj, 2736 old_read_domains, 2737 old_write_domain); 2738 2739 return 0; 2740} 2741 2742/* 2743 * Prepare buffer for display plane. Use uninterruptible for possible flush 2744 * wait, as in modesetting process we're not supposed to be interrupted. 2745 */ 2746int 2747i915_gem_object_set_to_display_plane(struct drm_gem_object *obj) 2748{ 2749 struct drm_device *dev = obj->dev; 2750 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2751 uint32_t old_write_domain, old_read_domains; 2752 int ret; 2753 2754 /* Not valid to be called on unbound objects. */ 2755 if (obj_priv->gtt_space == NULL) 2756 return -EINVAL; 2757 2758 ret = i915_gem_object_flush_gpu_write_domain(obj); 2759 if (ret) 2760 return ret; 2761 2762 /* Wait on any GPU rendering and flushing to occur. */ 2763 if (obj_priv->active) { 2764#if WATCH_BUF 2765 DRM_INFO("%s: object %p wait for seqno %08x\n", 2766 __func__, obj, obj_priv->last_rendering_seqno); 2767#endif 2768 ret = i915_do_wait_request(dev, 2769 obj_priv->last_rendering_seqno, 2770 0, 2771 obj_priv->ring); 2772 if (ret != 0) 2773 return ret; 2774 } 2775 2776 i915_gem_object_flush_cpu_write_domain(obj); 2777 2778 old_write_domain = obj->write_domain; 2779 old_read_domains = obj->read_domains; 2780 2781 /* It should now be out of any other write domains, and we can update 2782 * the domain values for our changes. 2783 */ 2784 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2785 obj->read_domains = I915_GEM_DOMAIN_GTT; 2786 obj->write_domain = I915_GEM_DOMAIN_GTT; 2787 obj_priv->dirty = 1; 2788 2789 trace_i915_gem_object_change_domain(obj, 2790 old_read_domains, 2791 old_write_domain); 2792 2793 return 0; 2794} 2795 2796/** 2797 * Moves a single object to the CPU read, and possibly write domain. 2798 * 2799 * This function returns when the move is complete, including waiting on 2800 * flushes to occur. 2801 */ 2802static int 2803i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) 2804{ 2805 uint32_t old_write_domain, old_read_domains; 2806 int ret; 2807 2808 ret = i915_gem_object_flush_gpu_write_domain(obj); 2809 if (ret) 2810 return ret; 2811 2812 /* Wait on any GPU rendering and flushing to occur. */ 2813 ret = i915_gem_object_wait_rendering(obj); 2814 if (ret != 0) 2815 return ret; 2816 2817 i915_gem_object_flush_gtt_write_domain(obj); 2818 2819 /* If we have a partially-valid cache of the object in the CPU, 2820 * finish invalidating it and free the per-page flags. 2821 */ 2822 i915_gem_object_set_to_full_cpu_read_domain(obj); 2823 2824 old_write_domain = obj->write_domain; 2825 old_read_domains = obj->read_domains; 2826 2827 /* Flush the CPU cache if it's still invalid. */ 2828 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 2829 i915_gem_clflush_object(obj); 2830 2831 obj->read_domains |= I915_GEM_DOMAIN_CPU; 2832 } 2833 2834 /* It should now be out of any other write domains, and we can update 2835 * the domain values for our changes. 2836 */ 2837 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 2838 2839 /* If we're writing through the CPU, then the GPU read domains will 2840 * need to be invalidated at next use. 2841 */ 2842 if (write) { 2843 obj->read_domains &= I915_GEM_DOMAIN_CPU; 2844 obj->write_domain = I915_GEM_DOMAIN_CPU; 2845 } 2846 2847 trace_i915_gem_object_change_domain(obj, 2848 old_read_domains, 2849 old_write_domain); 2850 2851 return 0; 2852} 2853 2854/* 2855 * Set the next domain for the specified object. This 2856 * may not actually perform the necessary flushing/invaliding though, 2857 * as that may want to be batched with other set_domain operations 2858 * 2859 * This is (we hope) the only really tricky part of gem. The goal 2860 * is fairly simple -- track which caches hold bits of the object 2861 * and make sure they remain coherent. A few concrete examples may 2862 * help to explain how it works. For shorthand, we use the notation 2863 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 2864 * a pair of read and write domain masks. 2865 * 2866 * Case 1: the batch buffer 2867 * 2868 * 1. Allocated 2869 * 2. Written by CPU 2870 * 3. Mapped to GTT 2871 * 4. Read by GPU 2872 * 5. Unmapped from GTT 2873 * 6. Freed 2874 * 2875 * Let's take these a step at a time 2876 * 2877 * 1. Allocated 2878 * Pages allocated from the kernel may still have 2879 * cache contents, so we set them to (CPU, CPU) always. 2880 * 2. Written by CPU (using pwrite) 2881 * The pwrite function calls set_domain (CPU, CPU) and 2882 * this function does nothing (as nothing changes) 2883 * 3. Mapped by GTT 2884 * This function asserts that the object is not 2885 * currently in any GPU-based read or write domains 2886 * 4. Read by GPU 2887 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 2888 * As write_domain is zero, this function adds in the 2889 * current read domains (CPU+COMMAND, 0). 2890 * flush_domains is set to CPU. 2891 * invalidate_domains is set to COMMAND 2892 * clflush is run to get data out of the CPU caches 2893 * then i915_dev_set_domain calls i915_gem_flush to 2894 * emit an MI_FLUSH and drm_agp_chipset_flush 2895 * 5. Unmapped from GTT 2896 * i915_gem_object_unbind calls set_domain (CPU, CPU) 2897 * flush_domains and invalidate_domains end up both zero 2898 * so no flushing/invalidating happens 2899 * 6. Freed 2900 * yay, done 2901 * 2902 * Case 2: The shared render buffer 2903 * 2904 * 1. Allocated 2905 * 2. Mapped to GTT 2906 * 3. Read/written by GPU 2907 * 4. set_domain to (CPU,CPU) 2908 * 5. Read/written by CPU 2909 * 6. Read/written by GPU 2910 * 2911 * 1. Allocated 2912 * Same as last example, (CPU, CPU) 2913 * 2. Mapped to GTT 2914 * Nothing changes (assertions find that it is not in the GPU) 2915 * 3. Read/written by GPU 2916 * execbuffer calls set_domain (RENDER, RENDER) 2917 * flush_domains gets CPU 2918 * invalidate_domains gets GPU 2919 * clflush (obj) 2920 * MI_FLUSH and drm_agp_chipset_flush 2921 * 4. set_domain (CPU, CPU) 2922 * flush_domains gets GPU 2923 * invalidate_domains gets CPU 2924 * wait_rendering (obj) to make sure all drawing is complete. 2925 * This will include an MI_FLUSH to get the data from GPU 2926 * to memory 2927 * clflush (obj) to invalidate the CPU cache 2928 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 2929 * 5. Read/written by CPU 2930 * cache lines are loaded and dirtied 2931 * 6. Read written by GPU 2932 * Same as last GPU access 2933 * 2934 * Case 3: The constant buffer 2935 * 2936 * 1. Allocated 2937 * 2. Written by CPU 2938 * 3. Read by GPU 2939 * 4. Updated (written) by CPU again 2940 * 5. Read by GPU 2941 * 2942 * 1. Allocated 2943 * (CPU, CPU) 2944 * 2. Written by CPU 2945 * (CPU, CPU) 2946 * 3. Read by GPU 2947 * (CPU+RENDER, 0) 2948 * flush_domains = CPU 2949 * invalidate_domains = RENDER 2950 * clflush (obj) 2951 * MI_FLUSH 2952 * drm_agp_chipset_flush 2953 * 4. Updated (written) by CPU again 2954 * (CPU, CPU) 2955 * flush_domains = 0 (no previous write domain) 2956 * invalidate_domains = 0 (no new read domains) 2957 * 5. Read by GPU 2958 * (CPU+RENDER, 0) 2959 * flush_domains = CPU 2960 * invalidate_domains = RENDER 2961 * clflush (obj) 2962 * MI_FLUSH 2963 * drm_agp_chipset_flush 2964 */ 2965static void 2966i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) 2967{ 2968 struct drm_device *dev = obj->dev; 2969 drm_i915_private_t *dev_priv = dev->dev_private; 2970 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2971 uint32_t invalidate_domains = 0; 2972 uint32_t flush_domains = 0; 2973 uint32_t old_read_domains; 2974 2975 BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU); 2976 BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU); 2977 2978 intel_mark_busy(dev, obj); 2979 2980#if WATCH_BUF 2981 DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n", 2982 __func__, obj, 2983 obj->read_domains, obj->pending_read_domains, 2984 obj->write_domain, obj->pending_write_domain); 2985#endif 2986 /* 2987 * If the object isn't moving to a new write domain, 2988 * let the object stay in multiple read domains 2989 */ 2990 if (obj->pending_write_domain == 0) 2991 obj->pending_read_domains |= obj->read_domains; 2992 else 2993 obj_priv->dirty = 1; 2994 2995 /* 2996 * Flush the current write domain if 2997 * the new read domains don't match. Invalidate 2998 * any read domains which differ from the old 2999 * write domain 3000 */ 3001 if (obj->write_domain && 3002 obj->write_domain != obj->pending_read_domains) { 3003 flush_domains |= obj->write_domain; 3004 invalidate_domains |= 3005 obj->pending_read_domains & ~obj->write_domain; 3006 } 3007 /* 3008 * Invalidate any read caches which may have 3009 * stale data. That is, any new read domains. 3010 */ 3011 invalidate_domains |= obj->pending_read_domains & ~obj->read_domains; 3012 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) { 3013#if WATCH_BUF 3014 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n", 3015 __func__, flush_domains, invalidate_domains); 3016#endif 3017 i915_gem_clflush_object(obj); 3018 } 3019 3020 old_read_domains = obj->read_domains; 3021 3022 /* The actual obj->write_domain will be updated with 3023 * pending_write_domain after we emit the accumulated flush for all 3024 * of our domain changes in execbuffers (which clears objects' 3025 * write_domains). So if we have a current write domain that we 3026 * aren't changing, set pending_write_domain to that. 3027 */ 3028 if (flush_domains == 0 && obj->pending_write_domain == 0) 3029 obj->pending_write_domain = obj->write_domain; 3030 obj->read_domains = obj->pending_read_domains; 3031 3032 if (flush_domains & I915_GEM_GPU_DOMAINS) { 3033 if (obj_priv->ring == &dev_priv->render_ring) 3034 dev_priv->flush_rings |= FLUSH_RENDER_RING; 3035 else if (obj_priv->ring == &dev_priv->bsd_ring) 3036 dev_priv->flush_rings |= FLUSH_BSD_RING; 3037 } 3038 3039 dev->invalidate_domains |= invalidate_domains; 3040 dev->flush_domains |= flush_domains; 3041#if WATCH_BUF 3042 DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n", 3043 __func__, 3044 obj->read_domains, obj->write_domain, 3045 dev->invalidate_domains, dev->flush_domains); 3046#endif 3047 3048 trace_i915_gem_object_change_domain(obj, 3049 old_read_domains, 3050 obj->write_domain); 3051} 3052 3053/** 3054 * Moves the object from a partially CPU read to a full one. 3055 * 3056 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 3057 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 3058 */ 3059static void 3060i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) 3061{ 3062 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 3063 3064 if (!obj_priv->page_cpu_valid) 3065 return; 3066 3067 /* If we're partially in the CPU read domain, finish moving it in. 3068 */ 3069 if (obj->read_domains & I915_GEM_DOMAIN_CPU) { 3070 int i; 3071 3072 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) { 3073 if (obj_priv->page_cpu_valid[i]) 3074 continue; 3075 drm_clflush_pages(obj_priv->pages + i, 1); 3076 } 3077 } 3078 3079 /* Free the page_cpu_valid mappings which are now stale, whether 3080 * or not we've got I915_GEM_DOMAIN_CPU. 3081 */ 3082 kfree(obj_priv->page_cpu_valid); 3083 obj_priv->page_cpu_valid = NULL; 3084} 3085 3086/** 3087 * Set the CPU read domain on a range of the object. 3088 * 3089 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 3090 * not entirely valid. The page_cpu_valid member of the object flags which 3091 * pages have been flushed, and will be respected by 3092 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 3093 * of the whole object. 3094 * 3095 * This function returns when the move is complete, including waiting on 3096 * flushes to occur. 3097 */ 3098static int 3099i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 3100 uint64_t offset, uint64_t size) 3101{ 3102 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 3103 uint32_t old_read_domains; 3104 int i, ret; 3105 3106 if (offset == 0 && size == obj->size) 3107 return i915_gem_object_set_to_cpu_domain(obj, 0); 3108 3109 ret = i915_gem_object_flush_gpu_write_domain(obj); 3110 if (ret) 3111 return ret; 3112 3113 /* Wait on any GPU rendering and flushing to occur. */ 3114 ret = i915_gem_object_wait_rendering(obj); 3115 if (ret != 0) 3116 return ret; 3117 i915_gem_object_flush_gtt_write_domain(obj); 3118 3119 /* If we're already fully in the CPU read domain, we're done. */ 3120 if (obj_priv->page_cpu_valid == NULL && 3121 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0) 3122 return 0; 3123 3124 /* Otherwise, create/clear the per-page CPU read domain flag if we're 3125 * newly adding I915_GEM_DOMAIN_CPU 3126 */ 3127 if (obj_priv->page_cpu_valid == NULL) { 3128 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE, 3129 GFP_KERNEL); 3130 if (obj_priv->page_cpu_valid == NULL) 3131 return -ENOMEM; 3132 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) 3133 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE); 3134 3135 /* Flush the cache on any pages that are still invalid from the CPU's 3136 * perspective. 3137 */ 3138 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 3139 i++) { 3140 if (obj_priv->page_cpu_valid[i]) 3141 continue; 3142 3143 drm_clflush_pages(obj_priv->pages + i, 1); 3144 3145 obj_priv->page_cpu_valid[i] = 1; 3146 } 3147 3148 /* It should now be out of any other write domains, and we can update 3149 * the domain values for our changes. 3150 */ 3151 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3152 3153 old_read_domains = obj->read_domains; 3154 obj->read_domains |= I915_GEM_DOMAIN_CPU; 3155 3156 trace_i915_gem_object_change_domain(obj, 3157 old_read_domains, 3158 obj->write_domain); 3159 3160 return 0; 3161} 3162 3163/** 3164 * Pin an object to the GTT and evaluate the relocations landing in it. 3165 */ 3166static int 3167i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, 3168 struct drm_file *file_priv, 3169 struct drm_i915_gem_exec_object2 *entry, 3170 struct drm_i915_gem_relocation_entry *relocs) 3171{ 3172 struct drm_device *dev = obj->dev; 3173 drm_i915_private_t *dev_priv = dev->dev_private; 3174 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 3175 int i, ret; 3176 void __iomem *reloc_page; 3177 bool need_fence; 3178 3179 need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE && 3180 obj_priv->tiling_mode != I915_TILING_NONE; 3181 3182 /* Check fence reg constraints and rebind if necessary */ 3183 if (need_fence && 3184 !i915_gem_object_fence_offset_ok(obj, 3185 obj_priv->tiling_mode)) { 3186 ret = i915_gem_object_unbind(obj); 3187 if (ret) 3188 return ret; 3189 } 3190 3191 /* Choose the GTT offset for our buffer and put it there. */ 3192 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); 3193 if (ret) 3194 return ret; 3195 3196 /* 3197 * Pre-965 chips need a fence register set up in order to 3198 * properly handle blits to/from tiled surfaces. 3199 */ 3200 if (need_fence) { 3201 ret = i915_gem_object_get_fence_reg(obj); 3202 if (ret != 0) { 3203 i915_gem_object_unpin(obj); 3204 return ret; 3205 } 3206 } 3207 3208 entry->offset = obj_priv->gtt_offset; 3209 3210 /* Apply the relocations, using the GTT aperture to avoid cache 3211 * flushing requirements. 3212 */ 3213 for (i = 0; i < entry->relocation_count; i++) { 3214 struct drm_i915_gem_relocation_entry *reloc= &relocs[i]; 3215 struct drm_gem_object *target_obj; 3216 struct drm_i915_gem_object *target_obj_priv; 3217 uint32_t reloc_val, reloc_offset; 3218 uint32_t __iomem *reloc_entry; 3219 3220 target_obj = drm_gem_object_lookup(obj->dev, file_priv, 3221 reloc->target_handle); 3222 if (target_obj == NULL) { 3223 i915_gem_object_unpin(obj); 3224 return -ENOENT; 3225 } 3226 target_obj_priv = to_intel_bo(target_obj); 3227 3228#if WATCH_RELOC 3229 DRM_INFO("%s: obj %p offset %08x target %d " 3230 "read %08x write %08x gtt %08x " 3231 "presumed %08x delta %08x\n", 3232 __func__, 3233 obj, 3234 (int) reloc->offset, 3235 (int) reloc->target_handle, 3236 (int) reloc->read_domains, 3237 (int) reloc->write_domain, 3238 (int) target_obj_priv->gtt_offset, 3239 (int) reloc->presumed_offset, 3240 reloc->delta); 3241#endif 3242 3243 /* The target buffer should have appeared before us in the 3244 * exec_object list, so it should have a GTT space bound by now. 3245 */ 3246 if (target_obj_priv->gtt_space == NULL) { 3247 DRM_ERROR("No GTT space found for object %d\n", 3248 reloc->target_handle); 3249 drm_gem_object_unreference(target_obj); 3250 i915_gem_object_unpin(obj); 3251 return -EINVAL; 3252 } 3253 3254 /* Validate that the target is in a valid r/w GPU domain */ 3255 if (reloc->write_domain & (reloc->write_domain - 1)) { 3256 DRM_ERROR("reloc with multiple write domains: " 3257 "obj %p target %d offset %d " 3258 "read %08x write %08x", 3259 obj, reloc->target_handle, 3260 (int) reloc->offset, 3261 reloc->read_domains, 3262 reloc->write_domain); 3263 drm_gem_object_unreference(target_obj); 3264 i915_gem_object_unpin(obj); 3265 return -EINVAL; 3266 } 3267 if (reloc->write_domain & I915_GEM_DOMAIN_CPU || 3268 reloc->read_domains & I915_GEM_DOMAIN_CPU) { 3269 DRM_ERROR("reloc with read/write CPU domains: " 3270 "obj %p target %d offset %d " 3271 "read %08x write %08x", 3272 obj, reloc->target_handle, 3273 (int) reloc->offset, 3274 reloc->read_domains, 3275 reloc->write_domain); 3276 drm_gem_object_unreference(target_obj); 3277 i915_gem_object_unpin(obj); 3278 return -EINVAL; 3279 } 3280 if (reloc->write_domain && target_obj->pending_write_domain && 3281 reloc->write_domain != target_obj->pending_write_domain) { 3282 DRM_ERROR("Write domain conflict: " 3283 "obj %p target %d offset %d " 3284 "new %08x old %08x\n", 3285 obj, reloc->target_handle, 3286 (int) reloc->offset, 3287 reloc->write_domain, 3288 target_obj->pending_write_domain); 3289 drm_gem_object_unreference(target_obj); 3290 i915_gem_object_unpin(obj); 3291 return -EINVAL; 3292 } 3293 3294 target_obj->pending_read_domains |= reloc->read_domains; 3295 target_obj->pending_write_domain |= reloc->write_domain; 3296 3297 /* If the relocation already has the right value in it, no 3298 * more work needs to be done. 3299 */ 3300 if (target_obj_priv->gtt_offset == reloc->presumed_offset) { 3301 drm_gem_object_unreference(target_obj); 3302 continue; 3303 } 3304 3305 /* Check that the relocation address is valid... */ 3306 if (reloc->offset > obj->size - 4) { 3307 DRM_ERROR("Relocation beyond object bounds: " 3308 "obj %p target %d offset %d size %d.\n", 3309 obj, reloc->target_handle, 3310 (int) reloc->offset, (int) obj->size); 3311 drm_gem_object_unreference(target_obj); 3312 i915_gem_object_unpin(obj); 3313 return -EINVAL; 3314 } 3315 if (reloc->offset & 3) { 3316 DRM_ERROR("Relocation not 4-byte aligned: " 3317 "obj %p target %d offset %d.\n", 3318 obj, reloc->target_handle, 3319 (int) reloc->offset); 3320 drm_gem_object_unreference(target_obj); 3321 i915_gem_object_unpin(obj); 3322 return -EINVAL; 3323 } 3324 3325 /* and points to somewhere within the target object. */ 3326 if (reloc->delta >= target_obj->size) { 3327 DRM_ERROR("Relocation beyond target object bounds: " 3328 "obj %p target %d delta %d size %d.\n", 3329 obj, reloc->target_handle, 3330 (int) reloc->delta, (int) target_obj->size); 3331 drm_gem_object_unreference(target_obj); 3332 i915_gem_object_unpin(obj); 3333 return -EINVAL; 3334 } 3335 3336 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 3337 if (ret != 0) { 3338 drm_gem_object_unreference(target_obj); 3339 i915_gem_object_unpin(obj); 3340 return -EINVAL; 3341 } 3342 3343 /* Map the page containing the relocation we're going to 3344 * perform. 3345 */ 3346 reloc_offset = obj_priv->gtt_offset + reloc->offset; 3347 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, 3348 (reloc_offset & 3349 ~(PAGE_SIZE - 1)), 3350 KM_USER0); 3351 reloc_entry = (uint32_t __iomem *)(reloc_page + 3352 (reloc_offset & (PAGE_SIZE - 1))); 3353 reloc_val = target_obj_priv->gtt_offset + reloc->delta; 3354 3355#if WATCH_BUF 3356 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n", 3357 obj, (unsigned int) reloc->offset, 3358 readl(reloc_entry), reloc_val); 3359#endif 3360 writel(reloc_val, reloc_entry); 3361 io_mapping_unmap_atomic(reloc_page, KM_USER0); 3362 3363 /* The updated presumed offset for this entry will be 3364 * copied back out to the user. 3365 */ 3366 reloc->presumed_offset = target_obj_priv->gtt_offset; 3367 3368 drm_gem_object_unreference(target_obj); 3369 } 3370 3371#if WATCH_BUF 3372 if (0) 3373 i915_gem_dump_object(obj, 128, __func__, ~0); 3374#endif 3375 return 0; 3376} 3377 3378/* Throttle our rendering by waiting until the ring has completed our requests 3379 * emitted over 20 msec ago. 3380 * 3381 * Note that if we were to use the current jiffies each time around the loop, 3382 * we wouldn't escape the function with any frames outstanding if the time to 3383 * render a frame was over 20ms. 3384 * 3385 * This should get us reasonable parallelism between CPU and GPU but also 3386 * relatively low latency when blocking on a particular request to finish. 3387 */ 3388static int 3389i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) 3390{ 3391 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 3392 int ret = 0; 3393 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3394 3395 mutex_lock(&dev->struct_mutex); 3396 while (!list_empty(&i915_file_priv->mm.request_list)) { 3397 struct drm_i915_gem_request *request; 3398 3399 request = list_first_entry(&i915_file_priv->mm.request_list, 3400 struct drm_i915_gem_request, 3401 client_list); 3402 3403 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3404 break; 3405 3406 ret = i915_wait_request(dev, request->seqno, request->ring); 3407 if (ret != 0) 3408 break; 3409 } 3410 mutex_unlock(&dev->struct_mutex); 3411 3412 return ret; 3413} 3414 3415static int 3416i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list, 3417 uint32_t buffer_count, 3418 struct drm_i915_gem_relocation_entry **relocs) 3419{ 3420 uint32_t reloc_count = 0, reloc_index = 0, i; 3421 int ret; 3422 3423 *relocs = NULL; 3424 for (i = 0; i < buffer_count; i++) { 3425 if (reloc_count + exec_list[i].relocation_count < reloc_count) 3426 return -EINVAL; 3427 reloc_count += exec_list[i].relocation_count; 3428 } 3429 3430 *relocs = drm_calloc_large(reloc_count, sizeof(**relocs)); 3431 if (*relocs == NULL) { 3432 DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count); 3433 return -ENOMEM; 3434 } 3435 3436 for (i = 0; i < buffer_count; i++) { 3437 struct drm_i915_gem_relocation_entry __user *user_relocs; 3438 3439 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 3440 3441 ret = copy_from_user(&(*relocs)[reloc_index], 3442 user_relocs, 3443 exec_list[i].relocation_count * 3444 sizeof(**relocs)); 3445 if (ret != 0) { 3446 drm_free_large(*relocs); 3447 *relocs = NULL; 3448 return -EFAULT; 3449 } 3450 3451 reloc_index += exec_list[i].relocation_count; 3452 } 3453 3454 return 0; 3455} 3456 3457static int 3458i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list, 3459 uint32_t buffer_count, 3460 struct drm_i915_gem_relocation_entry *relocs) 3461{ 3462 uint32_t reloc_count = 0, i; 3463 int ret = 0; 3464 3465 if (relocs == NULL) 3466 return 0; 3467 3468 for (i = 0; i < buffer_count; i++) { 3469 struct drm_i915_gem_relocation_entry __user *user_relocs; 3470 int unwritten; 3471 3472 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 3473 3474 unwritten = copy_to_user(user_relocs, 3475 &relocs[reloc_count], 3476 exec_list[i].relocation_count * 3477 sizeof(*relocs)); 3478 3479 if (unwritten) { 3480 ret = -EFAULT; 3481 goto err; 3482 } 3483 3484 reloc_count += exec_list[i].relocation_count; 3485 } 3486 3487err: 3488 drm_free_large(relocs); 3489 3490 return ret; 3491} 3492 3493static int 3494i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec, 3495 uint64_t exec_offset) 3496{ 3497 uint32_t exec_start, exec_len; 3498 3499 exec_start = (uint32_t) exec_offset + exec->batch_start_offset; 3500 exec_len = (uint32_t) exec->batch_len; 3501 3502 if ((exec_start | exec_len) & 0x7) 3503 return -EINVAL; 3504 3505 if (!exec_start) 3506 return -EINVAL; 3507 3508 return 0; 3509} 3510 3511static int 3512i915_gem_wait_for_pending_flip(struct drm_device *dev, 3513 struct drm_gem_object **object_list, 3514 int count) 3515{ 3516 drm_i915_private_t *dev_priv = dev->dev_private; 3517 struct drm_i915_gem_object *obj_priv; 3518 DEFINE_WAIT(wait); 3519 int i, ret = 0; 3520 3521 for (;;) { 3522 prepare_to_wait(&dev_priv->pending_flip_queue, 3523 &wait, TASK_INTERRUPTIBLE); 3524 for (i = 0; i < count; i++) { 3525 obj_priv = to_intel_bo(object_list[i]); 3526 if (atomic_read(&obj_priv->pending_flip) > 0) 3527 break; 3528 } 3529 if (i == count) 3530 break; 3531 3532 if (!signal_pending(current)) { 3533 mutex_unlock(&dev->struct_mutex); 3534 schedule(); 3535 mutex_lock(&dev->struct_mutex); 3536 continue; 3537 } 3538 ret = -ERESTARTSYS; 3539 break; 3540 } 3541 finish_wait(&dev_priv->pending_flip_queue, &wait); 3542 3543 return ret; 3544} 3545 3546 3547int 3548i915_gem_do_execbuffer(struct drm_device *dev, void *data, 3549 struct drm_file *file_priv, 3550 struct drm_i915_gem_execbuffer2 *args, 3551 struct drm_i915_gem_exec_object2 *exec_list) 3552{ 3553 drm_i915_private_t *dev_priv = dev->dev_private; 3554 struct drm_gem_object **object_list = NULL; 3555 struct drm_gem_object *batch_obj; 3556 struct drm_i915_gem_object *obj_priv; 3557 struct drm_clip_rect *cliprects = NULL; 3558 struct drm_i915_gem_relocation_entry *relocs = NULL; 3559 int ret = 0, ret2, i, pinned = 0; 3560 uint64_t exec_offset; 3561 uint32_t seqno, flush_domains, reloc_index; 3562 int pin_tries, flips; 3563 3564 struct intel_ring_buffer *ring = NULL; 3565 3566#if WATCH_EXEC 3567 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 3568 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 3569#endif 3570 if (args->flags & I915_EXEC_BSD) { 3571 if (!HAS_BSD(dev)) { 3572 DRM_ERROR("execbuf with wrong flag\n"); 3573 return -EINVAL; 3574 } 3575 ring = &dev_priv->bsd_ring; 3576 } else { 3577 ring = &dev_priv->render_ring; 3578 } 3579 3580 if (args->buffer_count < 1) { 3581 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); 3582 return -EINVAL; 3583 } 3584 object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count); 3585 if (object_list == NULL) { 3586 DRM_ERROR("Failed to allocate object list for %d buffers\n", 3587 args->buffer_count); 3588 ret = -ENOMEM; 3589 goto pre_mutex_err; 3590 } 3591 3592 if (args->num_cliprects != 0) { 3593 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects), 3594 GFP_KERNEL); 3595 if (cliprects == NULL) { 3596 ret = -ENOMEM; 3597 goto pre_mutex_err; 3598 } 3599 3600 ret = copy_from_user(cliprects, 3601 (struct drm_clip_rect __user *) 3602 (uintptr_t) args->cliprects_ptr, 3603 sizeof(*cliprects) * args->num_cliprects); 3604 if (ret != 0) { 3605 DRM_ERROR("copy %d cliprects failed: %d\n", 3606 args->num_cliprects, ret); 3607 ret = -EFAULT; 3608 goto pre_mutex_err; 3609 } 3610 } 3611 3612 ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count, 3613 &relocs); 3614 if (ret != 0) 3615 goto pre_mutex_err; 3616 3617 mutex_lock(&dev->struct_mutex); 3618 3619 i915_verify_inactive(dev, __FILE__, __LINE__); 3620 3621 if (atomic_read(&dev_priv->mm.wedged)) { 3622 mutex_unlock(&dev->struct_mutex); 3623 ret = -EIO; 3624 goto pre_mutex_err; 3625 } 3626 3627 if (dev_priv->mm.suspended) { 3628 mutex_unlock(&dev->struct_mutex); 3629 ret = -EBUSY; 3630 goto pre_mutex_err; 3631 } 3632 3633 /* Look up object handles */ 3634 flips = 0; 3635 for (i = 0; i < args->buffer_count; i++) { 3636 object_list[i] = drm_gem_object_lookup(dev, file_priv, 3637 exec_list[i].handle); 3638 if (object_list[i] == NULL) { 3639 DRM_ERROR("Invalid object handle %d at index %d\n", 3640 exec_list[i].handle, i); 3641 /* prevent error path from reading uninitialized data */ 3642 args->buffer_count = i + 1; 3643 ret = -ENOENT; 3644 goto err; 3645 } 3646 3647 obj_priv = to_intel_bo(object_list[i]); 3648 if (obj_priv->in_execbuffer) { 3649 DRM_ERROR("Object %p appears more than once in object list\n", 3650 object_list[i]); 3651 /* prevent error path from reading uninitialized data */ 3652 args->buffer_count = i + 1; 3653 ret = -EINVAL; 3654 goto err; 3655 } 3656 obj_priv->in_execbuffer = true; 3657 flips += atomic_read(&obj_priv->pending_flip); 3658 } 3659 3660 if (flips > 0) { 3661 ret = i915_gem_wait_for_pending_flip(dev, object_list, 3662 args->buffer_count); 3663 if (ret) 3664 goto err; 3665 } 3666 3667 /* Pin and relocate */ 3668 for (pin_tries = 0; ; pin_tries++) { 3669 ret = 0; 3670 reloc_index = 0; 3671 3672 for (i = 0; i < args->buffer_count; i++) { 3673 object_list[i]->pending_read_domains = 0; 3674 object_list[i]->pending_write_domain = 0; 3675 ret = i915_gem_object_pin_and_relocate(object_list[i], 3676 file_priv, 3677 &exec_list[i], 3678 &relocs[reloc_index]); 3679 if (ret) 3680 break; 3681 pinned = i + 1; 3682 reloc_index += exec_list[i].relocation_count; 3683 } 3684 /* success */ 3685 if (ret == 0) 3686 break; 3687 3688 /* error other than GTT full, or we've already tried again */ 3689 if (ret != -ENOSPC || pin_tries >= 1) { 3690 if (ret != -ERESTARTSYS) { 3691 unsigned long long total_size = 0; 3692 int num_fences = 0; 3693 for (i = 0; i < args->buffer_count; i++) { 3694 obj_priv = to_intel_bo(object_list[i]); 3695 3696 total_size += object_list[i]->size; 3697 num_fences += 3698 exec_list[i].flags & EXEC_OBJECT_NEEDS_FENCE && 3699 obj_priv->tiling_mode != I915_TILING_NONE; 3700 } 3701 DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes, %d fences: %d\n", 3702 pinned+1, args->buffer_count, 3703 total_size, num_fences, 3704 ret); 3705 DRM_ERROR("%d objects [%d pinned], " 3706 "%d object bytes [%d pinned], " 3707 "%d/%d gtt bytes\n", 3708 atomic_read(&dev->object_count), 3709 atomic_read(&dev->pin_count), 3710 atomic_read(&dev->object_memory), 3711 atomic_read(&dev->pin_memory), 3712 atomic_read(&dev->gtt_memory), 3713 dev->gtt_total); 3714 } 3715 goto err; 3716 } 3717 3718 /* unpin all of our buffers */ 3719 for (i = 0; i < pinned; i++) 3720 i915_gem_object_unpin(object_list[i]); 3721 pinned = 0; 3722 3723 /* evict everyone we can from the aperture */ 3724 ret = i915_gem_evict_everything(dev); 3725 if (ret && ret != -ENOSPC) 3726 goto err; 3727 } 3728 3729 /* Set the pending read domains for the batch buffer to COMMAND */ 3730 batch_obj = object_list[args->buffer_count-1]; 3731 if (batch_obj->pending_write_domain) { 3732 DRM_ERROR("Attempting to use self-modifying batch buffer\n"); 3733 ret = -EINVAL; 3734 goto err; 3735 } 3736 batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 3737 3738 /* Sanity check the batch buffer, prior to moving objects */ 3739 exec_offset = exec_list[args->buffer_count - 1].offset; 3740 ret = i915_gem_check_execbuffer (args, exec_offset); 3741 if (ret != 0) { 3742 DRM_ERROR("execbuf with invalid offset/length\n"); 3743 goto err; 3744 } 3745 3746 i915_verify_inactive(dev, __FILE__, __LINE__); 3747 3748 /* Zero the global flush/invalidate flags. These 3749 * will be modified as new domains are computed 3750 * for each object 3751 */ 3752 dev->invalidate_domains = 0; 3753 dev->flush_domains = 0; 3754 dev_priv->flush_rings = 0; 3755 3756 for (i = 0; i < args->buffer_count; i++) { 3757 struct drm_gem_object *obj = object_list[i]; 3758 3759 /* Compute new gpu domains and update invalidate/flush */ 3760 i915_gem_object_set_to_gpu_domain(obj); 3761 } 3762 3763 i915_verify_inactive(dev, __FILE__, __LINE__); 3764 3765 if (dev->invalidate_domains | dev->flush_domains) { 3766#if WATCH_EXEC 3767 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 3768 __func__, 3769 dev->invalidate_domains, 3770 dev->flush_domains); 3771#endif 3772 i915_gem_flush(dev, 3773 dev->invalidate_domains, 3774 dev->flush_domains); 3775 if (dev_priv->flush_rings & FLUSH_RENDER_RING) 3776 (void)i915_add_request(dev, file_priv, 3777 dev->flush_domains, 3778 &dev_priv->render_ring); 3779 if (dev_priv->flush_rings & FLUSH_BSD_RING) 3780 (void)i915_add_request(dev, file_priv, 3781 dev->flush_domains, 3782 &dev_priv->bsd_ring); 3783 } 3784 3785 for (i = 0; i < args->buffer_count; i++) { 3786 struct drm_gem_object *obj = object_list[i]; 3787 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 3788 uint32_t old_write_domain = obj->write_domain; 3789 3790 obj->write_domain = obj->pending_write_domain; 3791 if (obj->write_domain) 3792 list_move_tail(&obj_priv->gpu_write_list, 3793 &dev_priv->mm.gpu_write_list); 3794 else 3795 list_del_init(&obj_priv->gpu_write_list); 3796 3797 trace_i915_gem_object_change_domain(obj, 3798 obj->read_domains, 3799 old_write_domain); 3800 } 3801 3802 i915_verify_inactive(dev, __FILE__, __LINE__); 3803 3804#if WATCH_COHERENCY 3805 for (i = 0; i < args->buffer_count; i++) { 3806 i915_gem_object_check_coherency(object_list[i], 3807 exec_list[i].handle); 3808 } 3809#endif 3810 3811#if WATCH_EXEC 3812 i915_gem_dump_object(batch_obj, 3813 args->batch_len, 3814 __func__, 3815 ~0); 3816#endif 3817 3818 /* Exec the batchbuffer */ 3819 ret = ring->dispatch_gem_execbuffer(dev, ring, args, 3820 cliprects, exec_offset); 3821 if (ret) { 3822 DRM_ERROR("dispatch failed %d\n", ret); 3823 goto err; 3824 } 3825 3826 /* 3827 * Ensure that the commands in the batch buffer are 3828 * finished before the interrupt fires 3829 */ 3830 flush_domains = i915_retire_commands(dev, ring); 3831 3832 i915_verify_inactive(dev, __FILE__, __LINE__); 3833 3834 /* 3835 * Get a seqno representing the execution of the current buffer, 3836 * which we can wait on. We would like to mitigate these interrupts, 3837 * likely by only creating seqnos occasionally (so that we have 3838 * *some* interrupts representing completion of buffers that we can 3839 * wait on when trying to clear up gtt space). 3840 */ 3841 seqno = i915_add_request(dev, file_priv, flush_domains, ring); 3842 BUG_ON(seqno == 0); 3843 for (i = 0; i < args->buffer_count; i++) { 3844 struct drm_gem_object *obj = object_list[i]; 3845 obj_priv = to_intel_bo(obj); 3846 3847 i915_gem_object_move_to_active(obj, seqno, ring); 3848#if WATCH_LRU 3849 DRM_INFO("%s: move to exec list %p\n", __func__, obj); 3850#endif 3851 } 3852#if WATCH_LRU 3853 i915_dump_lru(dev, __func__); 3854#endif 3855 3856 i915_verify_inactive(dev, __FILE__, __LINE__); 3857 3858err: 3859 for (i = 0; i < pinned; i++) 3860 i915_gem_object_unpin(object_list[i]); 3861 3862 for (i = 0; i < args->buffer_count; i++) { 3863 if (object_list[i]) { 3864 obj_priv = to_intel_bo(object_list[i]); 3865 obj_priv->in_execbuffer = false; 3866 } 3867 drm_gem_object_unreference(object_list[i]); 3868 } 3869 3870 mutex_unlock(&dev->struct_mutex); 3871 3872pre_mutex_err: 3873 /* Copy the updated relocations out regardless of current error 3874 * state. Failure to update the relocs would mean that the next 3875 * time userland calls execbuf, it would do so with presumed offset 3876 * state that didn't match the actual object state. 3877 */ 3878 ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count, 3879 relocs); 3880 if (ret2 != 0) { 3881 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2); 3882 3883 if (ret == 0) 3884 ret = ret2; 3885 } 3886 3887 drm_free_large(object_list); 3888 kfree(cliprects); 3889 3890 return ret; 3891} 3892 3893/* 3894 * Legacy execbuffer just creates an exec2 list from the original exec object 3895 * list array and passes it to the real function. 3896 */ 3897int 3898i915_gem_execbuffer(struct drm_device *dev, void *data, 3899 struct drm_file *file_priv) 3900{ 3901 struct drm_i915_gem_execbuffer *args = data; 3902 struct drm_i915_gem_execbuffer2 exec2; 3903 struct drm_i915_gem_exec_object *exec_list = NULL; 3904 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 3905 int ret, i; 3906 3907#if WATCH_EXEC 3908 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 3909 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 3910#endif 3911 3912 if (args->buffer_count < 1) { 3913 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); 3914 return -EINVAL; 3915 } 3916 3917 /* Copy in the exec list from userland */ 3918 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); 3919 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); 3920 if (exec_list == NULL || exec2_list == NULL) { 3921 DRM_ERROR("Failed to allocate exec list for %d buffers\n", 3922 args->buffer_count); 3923 drm_free_large(exec_list); 3924 drm_free_large(exec2_list); 3925 return -ENOMEM; 3926 } 3927 ret = copy_from_user(exec_list, 3928 (struct drm_i915_relocation_entry __user *) 3929 (uintptr_t) args->buffers_ptr, 3930 sizeof(*exec_list) * args->buffer_count); 3931 if (ret != 0) { 3932 DRM_ERROR("copy %d exec entries failed %d\n", 3933 args->buffer_count, ret); 3934 drm_free_large(exec_list); 3935 drm_free_large(exec2_list); 3936 return -EFAULT; 3937 } 3938 3939 for (i = 0; i < args->buffer_count; i++) { 3940 exec2_list[i].handle = exec_list[i].handle; 3941 exec2_list[i].relocation_count = exec_list[i].relocation_count; 3942 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 3943 exec2_list[i].alignment = exec_list[i].alignment; 3944 exec2_list[i].offset = exec_list[i].offset; 3945 if (!IS_I965G(dev)) 3946 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 3947 else 3948 exec2_list[i].flags = 0; 3949 } 3950 3951 exec2.buffers_ptr = args->buffers_ptr; 3952 exec2.buffer_count = args->buffer_count; 3953 exec2.batch_start_offset = args->batch_start_offset; 3954 exec2.batch_len = args->batch_len; 3955 exec2.DR1 = args->DR1; 3956 exec2.DR4 = args->DR4; 3957 exec2.num_cliprects = args->num_cliprects; 3958 exec2.cliprects_ptr = args->cliprects_ptr; 3959 exec2.flags = I915_EXEC_RENDER; 3960 3961 ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list); 3962 if (!ret) { 3963 /* Copy the new buffer offsets back to the user's exec list. */ 3964 for (i = 0; i < args->buffer_count; i++) 3965 exec_list[i].offset = exec2_list[i].offset; 3966 /* ... and back out to userspace */ 3967 ret = copy_to_user((struct drm_i915_relocation_entry __user *) 3968 (uintptr_t) args->buffers_ptr, 3969 exec_list, 3970 sizeof(*exec_list) * args->buffer_count); 3971 if (ret) { 3972 ret = -EFAULT; 3973 DRM_ERROR("failed to copy %d exec entries " 3974 "back to user (%d)\n", 3975 args->buffer_count, ret); 3976 } 3977 } 3978 3979 drm_free_large(exec_list); 3980 drm_free_large(exec2_list); 3981 return ret; 3982} 3983 3984int 3985i915_gem_execbuffer2(struct drm_device *dev, void *data, 3986 struct drm_file *file_priv) 3987{ 3988 struct drm_i915_gem_execbuffer2 *args = data; 3989 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 3990 int ret; 3991 3992#if WATCH_EXEC 3993 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 3994 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 3995#endif 3996 3997 if (args->buffer_count < 1) { 3998 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count); 3999 return -EINVAL; 4000 } 4001 4002 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); 4003 if (exec2_list == NULL) { 4004 DRM_ERROR("Failed to allocate exec list for %d buffers\n", 4005 args->buffer_count); 4006 return -ENOMEM; 4007 } 4008 ret = copy_from_user(exec2_list, 4009 (struct drm_i915_relocation_entry __user *) 4010 (uintptr_t) args->buffers_ptr, 4011 sizeof(*exec2_list) * args->buffer_count); 4012 if (ret != 0) { 4013 DRM_ERROR("copy %d exec entries failed %d\n", 4014 args->buffer_count, ret); 4015 drm_free_large(exec2_list); 4016 return -EFAULT; 4017 } 4018 4019 ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list); 4020 if (!ret) { 4021 /* Copy the new buffer offsets back to the user's exec list. */ 4022 ret = copy_to_user((struct drm_i915_relocation_entry __user *) 4023 (uintptr_t) args->buffers_ptr, 4024 exec2_list, 4025 sizeof(*exec2_list) * args->buffer_count); 4026 if (ret) { 4027 ret = -EFAULT; 4028 DRM_ERROR("failed to copy %d exec entries " 4029 "back to user (%d)\n", 4030 args->buffer_count, ret); 4031 } 4032 } 4033 4034 drm_free_large(exec2_list); 4035 return ret; 4036} 4037 4038int 4039i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) 4040{ 4041 struct drm_device *dev = obj->dev; 4042 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 4043 int ret; 4044 4045 BUG_ON(obj_priv->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); 4046 4047 i915_verify_inactive(dev, __FILE__, __LINE__); 4048 4049 if (obj_priv->gtt_space != NULL) { 4050 if (alignment == 0) 4051 alignment = i915_gem_get_gtt_alignment(obj); 4052 if (obj_priv->gtt_offset & (alignment - 1)) { 4053 WARN(obj_priv->pin_count, 4054 "bo is already pinned with incorrect alignment:" 4055 " offset=%x, req.alignment=%x\n", 4056 obj_priv->gtt_offset, alignment); 4057 ret = i915_gem_object_unbind(obj); 4058 if (ret) 4059 return ret; 4060 } 4061 } 4062 4063 if (obj_priv->gtt_space == NULL) { 4064 ret = i915_gem_object_bind_to_gtt(obj, alignment); 4065 if (ret) 4066 return ret; 4067 } 4068 4069 obj_priv->pin_count++; 4070 4071 /* If the object is not active and not pending a flush, 4072 * remove it from the inactive list 4073 */ 4074 if (obj_priv->pin_count == 1) { 4075 atomic_inc(&dev->pin_count); 4076 atomic_add(obj->size, &dev->pin_memory); 4077 if (!obj_priv->active && 4078 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 4079 list_del_init(&obj_priv->list); 4080 } 4081 i915_verify_inactive(dev, __FILE__, __LINE__); 4082 4083 return 0; 4084} 4085 4086void 4087i915_gem_object_unpin(struct drm_gem_object *obj) 4088{ 4089 struct drm_device *dev = obj->dev; 4090 drm_i915_private_t *dev_priv = dev->dev_private; 4091 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 4092 4093 i915_verify_inactive(dev, __FILE__, __LINE__); 4094 obj_priv->pin_count--; 4095 BUG_ON(obj_priv->pin_count < 0); 4096 BUG_ON(obj_priv->gtt_space == NULL); 4097 4098 /* If the object is no longer pinned, and is 4099 * neither active nor being flushed, then stick it on 4100 * the inactive list 4101 */ 4102 if (obj_priv->pin_count == 0) { 4103 if (!obj_priv->active && 4104 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 4105 list_move_tail(&obj_priv->list, 4106 &dev_priv->mm.inactive_list); 4107 atomic_dec(&dev->pin_count); 4108 atomic_sub(obj->size, &dev->pin_memory); 4109 } 4110 i915_verify_inactive(dev, __FILE__, __LINE__); 4111} 4112 4113int 4114i915_gem_pin_ioctl(struct drm_device *dev, void *data, 4115 struct drm_file *file_priv) 4116{ 4117 struct drm_i915_gem_pin *args = data; 4118 struct drm_gem_object *obj; 4119 struct drm_i915_gem_object *obj_priv; 4120 int ret; 4121 4122 mutex_lock(&dev->struct_mutex); 4123 4124 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 4125 if (obj == NULL) { 4126 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n", 4127 args->handle); 4128 mutex_unlock(&dev->struct_mutex); 4129 return -ENOENT; 4130 } 4131 obj_priv = to_intel_bo(obj); 4132 4133 if (obj_priv->madv != I915_MADV_WILLNEED) { 4134 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 4135 drm_gem_object_unreference(obj); 4136 mutex_unlock(&dev->struct_mutex); 4137 return -EINVAL; 4138 } 4139 4140 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) { 4141 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 4142 args->handle); 4143 drm_gem_object_unreference(obj); 4144 mutex_unlock(&dev->struct_mutex); 4145 return -EINVAL; 4146 } 4147 4148 obj_priv->user_pin_count++; 4149 obj_priv->pin_filp = file_priv; 4150 if (obj_priv->user_pin_count == 1) { 4151 ret = i915_gem_object_pin(obj, args->alignment); 4152 if (ret != 0) { 4153 drm_gem_object_unreference(obj); 4154 mutex_unlock(&dev->struct_mutex); 4155 return ret; 4156 } 4157 } 4158 4159 i915_gem_object_flush_cpu_write_domain(obj); 4160 args->offset = obj_priv->gtt_offset; 4161 drm_gem_object_unreference(obj); 4162 mutex_unlock(&dev->struct_mutex); 4163 4164 return 0; 4165} 4166 4167int 4168i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 4169 struct drm_file *file_priv) 4170{ 4171 struct drm_i915_gem_pin *args = data; 4172 struct drm_gem_object *obj; 4173 struct drm_i915_gem_object *obj_priv; 4174 4175 mutex_lock(&dev->struct_mutex); 4176 4177 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 4178 if (obj == NULL) { 4179 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n", 4180 args->handle); 4181 mutex_unlock(&dev->struct_mutex); 4182 return -ENOENT; 4183 } 4184 4185 obj_priv = to_intel_bo(obj); 4186 if (obj_priv->pin_filp != file_priv) { 4187 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 4188 args->handle); 4189 drm_gem_object_unreference(obj); 4190 mutex_unlock(&dev->struct_mutex); 4191 return -EINVAL; 4192 } 4193 obj_priv->user_pin_count--; 4194 if (obj_priv->user_pin_count == 0) { 4195 obj_priv->pin_filp = NULL; 4196 i915_gem_object_unpin(obj); 4197 } 4198 4199 drm_gem_object_unreference(obj); 4200 mutex_unlock(&dev->struct_mutex); 4201 return 0; 4202} 4203 4204int 4205i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4206 struct drm_file *file_priv) 4207{ 4208 struct drm_i915_gem_busy *args = data; 4209 struct drm_gem_object *obj; 4210 struct drm_i915_gem_object *obj_priv; 4211 4212 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 4213 if (obj == NULL) { 4214 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", 4215 args->handle); 4216 return -ENOENT; 4217 } 4218 4219 mutex_lock(&dev->struct_mutex); 4220 4221 /* Count all active objects as busy, even if they are currently not used 4222 * by the gpu. Users of this interface expect objects to eventually 4223 * become non-busy without any further actions, therefore emit any 4224 * necessary flushes here. 4225 */ 4226 obj_priv = to_intel_bo(obj); 4227 args->busy = obj_priv->active; 4228 if (args->busy) { 4229 /* Unconditionally flush objects, even when the gpu still uses this 4230 * object. Userspace calling this function indicates that it wants to 4231 * use this buffer rather sooner than later, so issuing the required 4232 * flush earlier is beneficial. 4233 */ 4234 if (obj->write_domain) { 4235 i915_gem_flush(dev, 0, obj->write_domain); 4236 (void)i915_add_request(dev, file_priv, obj->write_domain, obj_priv->ring); 4237 } 4238 4239 /* Update the active list for the hardware's current position. 4240 * Otherwise this only updates on a delayed timer or when irqs 4241 * are actually unmasked, and our working set ends up being 4242 * larger than required. 4243 */ 4244 i915_gem_retire_requests_ring(dev, obj_priv->ring); 4245 4246 args->busy = obj_priv->active; 4247 } 4248 4249 drm_gem_object_unreference(obj); 4250 mutex_unlock(&dev->struct_mutex); 4251 return 0; 4252} 4253 4254int 4255i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4256 struct drm_file *file_priv) 4257{ 4258 return i915_gem_ring_throttle(dev, file_priv); 4259} 4260 4261int 4262i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4263 struct drm_file *file_priv) 4264{ 4265 struct drm_i915_gem_madvise *args = data; 4266 struct drm_gem_object *obj; 4267 struct drm_i915_gem_object *obj_priv; 4268 4269 switch (args->madv) { 4270 case I915_MADV_DONTNEED: 4271 case I915_MADV_WILLNEED: 4272 break; 4273 default: 4274 return -EINVAL; 4275 } 4276 4277 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 4278 if (obj == NULL) { 4279 DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n", 4280 args->handle); 4281 return -ENOENT; 4282 } 4283 4284 mutex_lock(&dev->struct_mutex); 4285 obj_priv = to_intel_bo(obj); 4286 4287 if (obj_priv->pin_count) { 4288 drm_gem_object_unreference(obj); 4289 mutex_unlock(&dev->struct_mutex); 4290 4291 DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n"); 4292 return -EINVAL; 4293 } 4294 4295 if (obj_priv->madv != __I915_MADV_PURGED) 4296 obj_priv->madv = args->madv; 4297 4298 /* if the object is no longer bound, discard its backing storage */ 4299 if (i915_gem_object_is_purgeable(obj_priv) && 4300 obj_priv->gtt_space == NULL) 4301 i915_gem_object_truncate(obj); 4302 4303 args->retained = obj_priv->madv != __I915_MADV_PURGED; 4304 4305 drm_gem_object_unreference(obj); 4306 mutex_unlock(&dev->struct_mutex); 4307 4308 return 0; 4309} 4310 4311struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev, 4312 size_t size) 4313{ 4314 struct drm_i915_gem_object *obj; 4315 4316 obj = kzalloc(sizeof(*obj), GFP_KERNEL); 4317 if (obj == NULL) 4318 return NULL; 4319 4320 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4321 kfree(obj); 4322 return NULL; 4323 } 4324 4325 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4326 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4327 4328 obj->agp_type = AGP_USER_MEMORY; 4329 obj->base.driver_private = NULL; 4330 obj->fence_reg = I915_FENCE_REG_NONE; 4331 INIT_LIST_HEAD(&obj->list); 4332 INIT_LIST_HEAD(&obj->gpu_write_list); 4333 obj->madv = I915_MADV_WILLNEED; 4334 4335 trace_i915_gem_object_create(&obj->base); 4336 4337 return &obj->base; 4338} 4339 4340int i915_gem_init_object(struct drm_gem_object *obj) 4341{ 4342 BUG(); 4343 4344 return 0; 4345} 4346 4347static void i915_gem_free_object_tail(struct drm_gem_object *obj) 4348{ 4349 struct drm_device *dev = obj->dev; 4350 drm_i915_private_t *dev_priv = dev->dev_private; 4351 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 4352 int ret; 4353 4354 ret = i915_gem_object_unbind(obj); 4355 if (ret == -ERESTARTSYS) { 4356 list_move(&obj_priv->list, 4357 &dev_priv->mm.deferred_free_list); 4358 return; 4359 } 4360 4361 if (obj_priv->mmap_offset) 4362 i915_gem_free_mmap_offset(obj); 4363 4364 drm_gem_object_release(obj); 4365 4366 kfree(obj_priv->page_cpu_valid); 4367 kfree(obj_priv->bit_17); 4368 kfree(obj_priv); 4369} 4370 4371void i915_gem_free_object(struct drm_gem_object *obj) 4372{ 4373 struct drm_device *dev = obj->dev; 4374 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 4375 4376 trace_i915_gem_object_destroy(obj); 4377 4378 while (obj_priv->pin_count > 0) 4379 i915_gem_object_unpin(obj); 4380 4381 if (obj_priv->phys_obj) 4382 i915_gem_detach_phys_object(dev, obj); 4383 4384 i915_gem_free_object_tail(obj); 4385} 4386 4387int 4388i915_gem_idle(struct drm_device *dev) 4389{ 4390 drm_i915_private_t *dev_priv = dev->dev_private; 4391 int ret; 4392 4393 mutex_lock(&dev->struct_mutex); 4394 4395 if (dev_priv->mm.suspended || 4396 (dev_priv->render_ring.gem_object == NULL) || 4397 (HAS_BSD(dev) && 4398 dev_priv->bsd_ring.gem_object == NULL)) { 4399 mutex_unlock(&dev->struct_mutex); 4400 return 0; 4401 } 4402 4403 ret = i915_gpu_idle(dev); 4404 if (ret) { 4405 mutex_unlock(&dev->struct_mutex); 4406 return ret; 4407 } 4408 4409 /* Under UMS, be paranoid and evict. */ 4410 if (!drm_core_check_feature(dev, DRIVER_MODESET)) { 4411 ret = i915_gem_evict_inactive(dev); 4412 if (ret) { 4413 mutex_unlock(&dev->struct_mutex); 4414 return ret; 4415 } 4416 } 4417 4418 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4419 * We need to replace this with a semaphore, or something. 4420 * And not confound mm.suspended! 4421 */ 4422 dev_priv->mm.suspended = 1; 4423 del_timer(&dev_priv->hangcheck_timer); 4424 4425 i915_kernel_lost_context(dev); 4426 i915_gem_cleanup_ringbuffer(dev); 4427 4428 mutex_unlock(&dev->struct_mutex); 4429 4430 /* Cancel the retire work handler, which should be idle now. */ 4431 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4432 4433 return 0; 4434} 4435 4436/* 4437 * 965+ support PIPE_CONTROL commands, which provide finer grained control 4438 * over cache flushing. 4439 */ 4440static int 4441i915_gem_init_pipe_control(struct drm_device *dev) 4442{ 4443 drm_i915_private_t *dev_priv = dev->dev_private; 4444 struct drm_gem_object *obj; 4445 struct drm_i915_gem_object *obj_priv; 4446 int ret; 4447 4448 obj = i915_gem_alloc_object(dev, 4096); 4449 if (obj == NULL) { 4450 DRM_ERROR("Failed to allocate seqno page\n"); 4451 ret = -ENOMEM; 4452 goto err; 4453 } 4454 obj_priv = to_intel_bo(obj); 4455 obj_priv->agp_type = AGP_USER_CACHED_MEMORY; 4456 4457 ret = i915_gem_object_pin(obj, 4096); 4458 if (ret) 4459 goto err_unref; 4460 4461 dev_priv->seqno_gfx_addr = obj_priv->gtt_offset; 4462 dev_priv->seqno_page = kmap(obj_priv->pages[0]); 4463 if (dev_priv->seqno_page == NULL) 4464 goto err_unpin; 4465 4466 dev_priv->seqno_obj = obj; 4467 memset(dev_priv->seqno_page, 0, PAGE_SIZE); 4468 4469 return 0; 4470 4471err_unpin: 4472 i915_gem_object_unpin(obj); 4473err_unref: 4474 drm_gem_object_unreference(obj); 4475err: 4476 return ret; 4477} 4478 4479 4480static void 4481i915_gem_cleanup_pipe_control(struct drm_device *dev) 4482{ 4483 drm_i915_private_t *dev_priv = dev->dev_private; 4484 struct drm_gem_object *obj; 4485 struct drm_i915_gem_object *obj_priv; 4486 4487 obj = dev_priv->seqno_obj; 4488 obj_priv = to_intel_bo(obj); 4489 kunmap(obj_priv->pages[0]); 4490 i915_gem_object_unpin(obj); 4491 drm_gem_object_unreference(obj); 4492 dev_priv->seqno_obj = NULL; 4493 4494 dev_priv->seqno_page = NULL; 4495} 4496 4497int 4498i915_gem_init_ringbuffer(struct drm_device *dev) 4499{ 4500 drm_i915_private_t *dev_priv = dev->dev_private; 4501 int ret; 4502 4503 dev_priv->render_ring = render_ring; 4504 4505 if (!I915_NEED_GFX_HWS(dev)) { 4506 dev_priv->render_ring.status_page.page_addr 4507 = dev_priv->status_page_dmah->vaddr; 4508 memset(dev_priv->render_ring.status_page.page_addr, 4509 0, PAGE_SIZE); 4510 } 4511 4512 if (HAS_PIPE_CONTROL(dev)) { 4513 ret = i915_gem_init_pipe_control(dev); 4514 if (ret) 4515 return ret; 4516 } 4517 4518 ret = intel_init_ring_buffer(dev, &dev_priv->render_ring); 4519 if (ret) 4520 goto cleanup_pipe_control; 4521 4522 if (HAS_BSD(dev)) { 4523 dev_priv->bsd_ring = bsd_ring; 4524 ret = intel_init_ring_buffer(dev, &dev_priv->bsd_ring); 4525 if (ret) 4526 goto cleanup_render_ring; 4527 } 4528 4529 dev_priv->next_seqno = 1; 4530 4531 return 0; 4532 4533cleanup_render_ring: 4534 intel_cleanup_ring_buffer(dev, &dev_priv->render_ring); 4535cleanup_pipe_control: 4536 if (HAS_PIPE_CONTROL(dev)) 4537 i915_gem_cleanup_pipe_control(dev); 4538 return ret; 4539} 4540 4541void 4542i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4543{ 4544 drm_i915_private_t *dev_priv = dev->dev_private; 4545 4546 intel_cleanup_ring_buffer(dev, &dev_priv->render_ring); 4547 if (HAS_BSD(dev)) 4548 intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring); 4549 if (HAS_PIPE_CONTROL(dev)) 4550 i915_gem_cleanup_pipe_control(dev); 4551} 4552 4553int 4554i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4555 struct drm_file *file_priv) 4556{ 4557 drm_i915_private_t *dev_priv = dev->dev_private; 4558 int ret; 4559 4560 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4561 return 0; 4562 4563 if (atomic_read(&dev_priv->mm.wedged)) { 4564 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4565 atomic_set(&dev_priv->mm.wedged, 0); 4566 } 4567 4568 mutex_lock(&dev->struct_mutex); 4569 dev_priv->mm.suspended = 0; 4570 4571 ret = i915_gem_init_ringbuffer(dev); 4572 if (ret != 0) { 4573 mutex_unlock(&dev->struct_mutex); 4574 return ret; 4575 } 4576 4577 spin_lock(&dev_priv->mm.active_list_lock); 4578 BUG_ON(!list_empty(&dev_priv->render_ring.active_list)); 4579 BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.active_list)); 4580 spin_unlock(&dev_priv->mm.active_list_lock); 4581 4582 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 4583 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 4584 BUG_ON(!list_empty(&dev_priv->render_ring.request_list)); 4585 BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.request_list)); 4586 mutex_unlock(&dev->struct_mutex); 4587 4588 ret = drm_irq_install(dev); 4589 if (ret) 4590 goto cleanup_ringbuffer; 4591 4592 return 0; 4593 4594cleanup_ringbuffer: 4595 mutex_lock(&dev->struct_mutex); 4596 i915_gem_cleanup_ringbuffer(dev); 4597 dev_priv->mm.suspended = 1; 4598 mutex_unlock(&dev->struct_mutex); 4599 4600 return ret; 4601} 4602 4603int 4604i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4605 struct drm_file *file_priv) 4606{ 4607 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4608 return 0; 4609 4610 drm_irq_uninstall(dev); 4611 return i915_gem_idle(dev); 4612} 4613 4614void 4615i915_gem_lastclose(struct drm_device *dev) 4616{ 4617 int ret; 4618 4619 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4620 return; 4621 4622 ret = i915_gem_idle(dev); 4623 if (ret) 4624 DRM_ERROR("failed to idle hardware: %d\n", ret); 4625} 4626 4627void 4628i915_gem_load(struct drm_device *dev) 4629{ 4630 int i; 4631 drm_i915_private_t *dev_priv = dev->dev_private; 4632 4633 spin_lock_init(&dev_priv->mm.active_list_lock); 4634 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 4635 INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list); 4636 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 4637 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4638 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list); 4639 INIT_LIST_HEAD(&dev_priv->render_ring.active_list); 4640 INIT_LIST_HEAD(&dev_priv->render_ring.request_list); 4641 if (HAS_BSD(dev)) { 4642 INIT_LIST_HEAD(&dev_priv->bsd_ring.active_list); 4643 INIT_LIST_HEAD(&dev_priv->bsd_ring.request_list); 4644 } 4645 for (i = 0; i < 16; i++) 4646 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 4647 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4648 i915_gem_retire_work_handler); 4649 spin_lock(&shrink_list_lock); 4650 list_add(&dev_priv->mm.shrink_list, &shrink_list); 4651 spin_unlock(&shrink_list_lock); 4652 4653 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 4654 if (IS_GEN3(dev)) { 4655 u32 tmp = I915_READ(MI_ARB_STATE); 4656 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) { 4657 /* arb state is a masked write, so set bit + bit in mask */ 4658 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT); 4659 I915_WRITE(MI_ARB_STATE, tmp); 4660 } 4661 } 4662 4663 /* Old X drivers will take 0-2 for front, back, depth buffers */ 4664 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4665 dev_priv->fence_reg_start = 3; 4666 4667 if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4668 dev_priv->num_fence_regs = 16; 4669 else 4670 dev_priv->num_fence_regs = 8; 4671 4672 /* Initialize fence registers to zero */ 4673 if (IS_I965G(dev)) { 4674 for (i = 0; i < 16; i++) 4675 I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0); 4676 } else { 4677 for (i = 0; i < 8; i++) 4678 I915_WRITE(FENCE_REG_830_0 + (i * 4), 0); 4679 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4680 for (i = 0; i < 8; i++) 4681 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0); 4682 } 4683 i915_gem_detect_bit_6_swizzle(dev); 4684 init_waitqueue_head(&dev_priv->pending_flip_queue); 4685} 4686 4687/* 4688 * Create a physically contiguous memory object for this object 4689 * e.g. for cursor + overlay regs 4690 */ 4691int i915_gem_init_phys_object(struct drm_device *dev, 4692 int id, int size, int align) 4693{ 4694 drm_i915_private_t *dev_priv = dev->dev_private; 4695 struct drm_i915_gem_phys_object *phys_obj; 4696 int ret; 4697 4698 if (dev_priv->mm.phys_objs[id - 1] || !size) 4699 return 0; 4700 4701 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 4702 if (!phys_obj) 4703 return -ENOMEM; 4704 4705 phys_obj->id = id; 4706 4707 phys_obj->handle = drm_pci_alloc(dev, size, align); 4708 if (!phys_obj->handle) { 4709 ret = -ENOMEM; 4710 goto kfree_obj; 4711 } 4712#ifdef CONFIG_X86 4713 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4714#endif 4715 4716 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4717 4718 return 0; 4719kfree_obj: 4720 kfree(phys_obj); 4721 return ret; 4722} 4723 4724void i915_gem_free_phys_object(struct drm_device *dev, int id) 4725{ 4726 drm_i915_private_t *dev_priv = dev->dev_private; 4727 struct drm_i915_gem_phys_object *phys_obj; 4728 4729 if (!dev_priv->mm.phys_objs[id - 1]) 4730 return; 4731 4732 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4733 if (phys_obj->cur_obj) { 4734 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4735 } 4736 4737#ifdef CONFIG_X86 4738 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4739#endif 4740 drm_pci_free(dev, phys_obj->handle); 4741 kfree(phys_obj); 4742 dev_priv->mm.phys_objs[id - 1] = NULL; 4743} 4744 4745void i915_gem_free_all_phys_object(struct drm_device *dev) 4746{ 4747 int i; 4748 4749 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4750 i915_gem_free_phys_object(dev, i); 4751} 4752 4753void i915_gem_detach_phys_object(struct drm_device *dev, 4754 struct drm_gem_object *obj) 4755{ 4756 struct drm_i915_gem_object *obj_priv; 4757 int i; 4758 int ret; 4759 int page_count; 4760 4761 obj_priv = to_intel_bo(obj); 4762 if (!obj_priv->phys_obj) 4763 return; 4764 4765 ret = i915_gem_object_get_pages(obj, 0); 4766 if (ret) 4767 goto out; 4768 4769 page_count = obj->size / PAGE_SIZE; 4770 4771 for (i = 0; i < page_count; i++) { 4772 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0); 4773 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4774 4775 memcpy(dst, src, PAGE_SIZE); 4776 kunmap_atomic(dst, KM_USER0); 4777 } 4778 drm_clflush_pages(obj_priv->pages, page_count); 4779 drm_agp_chipset_flush(dev); 4780 4781 i915_gem_object_put_pages(obj); 4782out: 4783 obj_priv->phys_obj->cur_obj = NULL; 4784 obj_priv->phys_obj = NULL; 4785} 4786 4787int 4788i915_gem_attach_phys_object(struct drm_device *dev, 4789 struct drm_gem_object *obj, 4790 int id, 4791 int align) 4792{ 4793 drm_i915_private_t *dev_priv = dev->dev_private; 4794 struct drm_i915_gem_object *obj_priv; 4795 int ret = 0; 4796 int page_count; 4797 int i; 4798 4799 if (id > I915_MAX_PHYS_OBJECT) 4800 return -EINVAL; 4801 4802 obj_priv = to_intel_bo(obj); 4803 4804 if (obj_priv->phys_obj) { 4805 if (obj_priv->phys_obj->id == id) 4806 return 0; 4807 i915_gem_detach_phys_object(dev, obj); 4808 } 4809 4810 /* create a new object */ 4811 if (!dev_priv->mm.phys_objs[id - 1]) { 4812 ret = i915_gem_init_phys_object(dev, id, 4813 obj->size, align); 4814 if (ret) { 4815 DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size); 4816 goto out; 4817 } 4818 } 4819 4820 /* bind to the object */ 4821 obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4822 obj_priv->phys_obj->cur_obj = obj; 4823 4824 ret = i915_gem_object_get_pages(obj, 0); 4825 if (ret) { 4826 DRM_ERROR("failed to get page list\n"); 4827 goto out; 4828 } 4829 4830 page_count = obj->size / PAGE_SIZE; 4831 4832 for (i = 0; i < page_count; i++) { 4833 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0); 4834 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4835 4836 memcpy(dst, src, PAGE_SIZE); 4837 kunmap_atomic(src, KM_USER0); 4838 } 4839 4840 i915_gem_object_put_pages(obj); 4841 4842 return 0; 4843out: 4844 return ret; 4845} 4846 4847static int 4848i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 4849 struct drm_i915_gem_pwrite *args, 4850 struct drm_file *file_priv) 4851{ 4852 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 4853 void *obj_addr; 4854 int ret; 4855 char __user *user_data; 4856 4857 user_data = (char __user *) (uintptr_t) args->data_ptr; 4858 obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset; 4859 4860 DRM_DEBUG_DRIVER("obj_addr %p, %lld\n", obj_addr, args->size); 4861 ret = copy_from_user(obj_addr, user_data, args->size); 4862 if (ret) 4863 return -EFAULT; 4864 4865 drm_agp_chipset_flush(dev); 4866 return 0; 4867} 4868 4869void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv) 4870{ 4871 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 4872 4873 /* Clean up our request list when the client is going away, so that 4874 * later retire_requests won't dereference our soon-to-be-gone 4875 * file_priv. 4876 */ 4877 mutex_lock(&dev->struct_mutex); 4878 while (!list_empty(&i915_file_priv->mm.request_list)) 4879 list_del_init(i915_file_priv->mm.request_list.next); 4880 mutex_unlock(&dev->struct_mutex); 4881} 4882 4883static int 4884i915_gpu_is_active(struct drm_device *dev) 4885{ 4886 drm_i915_private_t *dev_priv = dev->dev_private; 4887 int lists_empty; 4888 4889 spin_lock(&dev_priv->mm.active_list_lock); 4890 lists_empty = list_empty(&dev_priv->mm.flushing_list) && 4891 list_empty(&dev_priv->render_ring.active_list); 4892 if (HAS_BSD(dev)) 4893 lists_empty &= list_empty(&dev_priv->bsd_ring.active_list); 4894 spin_unlock(&dev_priv->mm.active_list_lock); 4895 4896 return !lists_empty; 4897} 4898 4899static int 4900i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 4901{ 4902 drm_i915_private_t *dev_priv, *next_dev; 4903 struct drm_i915_gem_object *obj_priv, *next_obj; 4904 int cnt = 0; 4905 int would_deadlock = 1; 4906 4907 /* "fast-path" to count number of available objects */ 4908 if (nr_to_scan == 0) { 4909 spin_lock(&shrink_list_lock); 4910 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) { 4911 struct drm_device *dev = dev_priv->dev; 4912 4913 if (mutex_trylock(&dev->struct_mutex)) { 4914 list_for_each_entry(obj_priv, 4915 &dev_priv->mm.inactive_list, 4916 list) 4917 cnt++; 4918 mutex_unlock(&dev->struct_mutex); 4919 } 4920 } 4921 spin_unlock(&shrink_list_lock); 4922 4923 return (cnt / 100) * sysctl_vfs_cache_pressure; 4924 } 4925 4926 spin_lock(&shrink_list_lock); 4927 4928rescan: 4929 /* first scan for clean buffers */ 4930 list_for_each_entry_safe(dev_priv, next_dev, 4931 &shrink_list, mm.shrink_list) { 4932 struct drm_device *dev = dev_priv->dev; 4933 4934 if (! mutex_trylock(&dev->struct_mutex)) 4935 continue; 4936 4937 spin_unlock(&shrink_list_lock); 4938 i915_gem_retire_requests(dev); 4939 4940 list_for_each_entry_safe(obj_priv, next_obj, 4941 &dev_priv->mm.inactive_list, 4942 list) { 4943 if (i915_gem_object_is_purgeable(obj_priv)) { 4944 i915_gem_object_unbind(&obj_priv->base); 4945 if (--nr_to_scan <= 0) 4946 break; 4947 } 4948 } 4949 4950 spin_lock(&shrink_list_lock); 4951 mutex_unlock(&dev->struct_mutex); 4952 4953 would_deadlock = 0; 4954 4955 if (nr_to_scan <= 0) 4956 break; 4957 } 4958 4959 /* second pass, evict/count anything still on the inactive list */ 4960 list_for_each_entry_safe(dev_priv, next_dev, 4961 &shrink_list, mm.shrink_list) { 4962 struct drm_device *dev = dev_priv->dev; 4963 4964 if (! mutex_trylock(&dev->struct_mutex)) 4965 continue; 4966 4967 spin_unlock(&shrink_list_lock); 4968 4969 list_for_each_entry_safe(obj_priv, next_obj, 4970 &dev_priv->mm.inactive_list, 4971 list) { 4972 if (nr_to_scan > 0) { 4973 i915_gem_object_unbind(&obj_priv->base); 4974 nr_to_scan--; 4975 } else 4976 cnt++; 4977 } 4978 4979 spin_lock(&shrink_list_lock); 4980 mutex_unlock(&dev->struct_mutex); 4981 4982 would_deadlock = 0; 4983 } 4984 4985 if (nr_to_scan) { 4986 int active = 0; 4987 4988 /* 4989 * We are desperate for pages, so as a last resort, wait 4990 * for the GPU to finish and discard whatever we can. 4991 * This has a dramatic impact to reduce the number of 4992 * OOM-killer events whilst running the GPU aggressively. 4993 */ 4994 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) { 4995 struct drm_device *dev = dev_priv->dev; 4996 4997 if (!mutex_trylock(&dev->struct_mutex)) 4998 continue; 4999 5000 spin_unlock(&shrink_list_lock); 5001 5002 if (i915_gpu_is_active(dev)) { 5003 i915_gpu_idle(dev); 5004 active++; 5005 } 5006 5007 spin_lock(&shrink_list_lock); 5008 mutex_unlock(&dev->struct_mutex); 5009 } 5010 5011 if (active) 5012 goto rescan; 5013 } 5014 5015 spin_unlock(&shrink_list_lock); 5016 5017 if (would_deadlock) 5018 return -1; 5019 else if (cnt > 0) 5020 return (cnt / 100) * sysctl_vfs_cache_pressure; 5021 else 5022 return 0; 5023} 5024 5025static struct shrinker shrinker = { 5026 .shrink = i915_gem_shrink, 5027 .seeks = DEFAULT_SEEKS, 5028}; 5029 5030__init void 5031i915_gem_shrinker_init(void) 5032{ 5033 register_shrinker(&shrinker); 5034} 5035 5036__exit void 5037i915_gem_shrinker_exit(void) 5038{ 5039 unregister_shrinker(&shrinker); 5040} 5041