1/* 2 * Copyright © 2008,2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Chris Wilson <chris@chris-wilson.co.uk> 26 * 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <dev/drm2/drmP.h> 33#include <dev/drm2/i915/i915_drm.h> 34#include <dev/drm2/i915/i915_drv.h> 35#include <dev/drm2/i915/intel_drv.h> 36 37#include <sys/limits.h> 38#include <sys/sf_buf.h> 39 40struct eb_objects { 41 int and; 42 struct hlist_head buckets[0]; 43}; 44 45static struct eb_objects * 46eb_create(int size) 47{ 48 struct eb_objects *eb; 49 int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; 50 BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head)); 51 while (count > size) 52 count >>= 1; 53 eb = malloc(count*sizeof(struct hlist_head) + 54 sizeof(struct eb_objects), 55 DRM_I915_GEM, M_WAITOK | M_ZERO); 56 if (eb == NULL) 57 return eb; 58 59 eb->and = count - 1; 60 return eb; 61} 62 63static void 64eb_reset(struct eb_objects *eb) 65{ 66 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); 67} 68 69static void 70eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) 71{ 72 hlist_add_head(&obj->exec_node, 73 &eb->buckets[obj->exec_handle & eb->and]); 74} 75 76static struct drm_i915_gem_object * 77eb_get_object(struct eb_objects *eb, unsigned long handle) 78{ 79 struct hlist_head *head; 80 struct hlist_node *node; 81 struct drm_i915_gem_object *obj; 82 83 head = &eb->buckets[handle & eb->and]; 84 hlist_for_each(node, head) { 85 obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); 86 if (obj->exec_handle == handle) 87 return obj; 88 } 89 90 return NULL; 91} 92 93static void 94eb_destroy(struct eb_objects *eb) 95{ 96 free(eb, DRM_I915_GEM); 97} 98 99static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) 100{ 101 return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || 102 !obj->map_and_fenceable || 103 obj->cache_level != I915_CACHE_NONE); 104} 105 106static int 107i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 108 struct eb_objects *eb, 109 struct drm_i915_gem_relocation_entry *reloc) 110{ 111 struct drm_device *dev = obj->base.dev; 112 struct drm_gem_object *target_obj; 113 struct drm_i915_gem_object *target_i915_obj; 114 uint32_t target_offset; 115 int ret = -EINVAL; 116 117 /* we've already hold a reference to all valid objects */ 118 target_obj = &eb_get_object(eb, reloc->target_handle)->base; 119 if (unlikely(target_obj == NULL)) 120 return -ENOENT; 121 122 target_i915_obj = to_intel_bo(target_obj); 123 target_offset = target_i915_obj->gtt_offset; 124 125 /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and 126 * pipe_control writes because the gpu doesn't properly redirect them 127 * through the ppgtt for non_secure batchbuffers. */ 128 if (unlikely(IS_GEN6(dev) && 129 reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && 130 !target_i915_obj->has_global_gtt_mapping)) { 131 i915_gem_gtt_bind_object(target_i915_obj, 132 target_i915_obj->cache_level); 133 } 134 135 /* Validate that the target is in a valid r/w GPU domain */ 136 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 137 DRM_DEBUG("reloc with multiple write domains: " 138 "obj %p target %d offset %d " 139 "read %08x write %08x", 140 obj, reloc->target_handle, 141 (int) reloc->offset, 142 reloc->read_domains, 143 reloc->write_domain); 144 return ret; 145 } 146 if (unlikely((reloc->write_domain | reloc->read_domains) 147 & ~I915_GEM_GPU_DOMAINS)) { 148 DRM_DEBUG("reloc with read/write non-GPU domains: " 149 "obj %p target %d offset %d " 150 "read %08x write %08x", 151 obj, reloc->target_handle, 152 (int) reloc->offset, 153 reloc->read_domains, 154 reloc->write_domain); 155 return ret; 156 } 157 if (unlikely(reloc->write_domain && target_obj->pending_write_domain && 158 reloc->write_domain != target_obj->pending_write_domain)) { 159 DRM_DEBUG("Write domain conflict: " 160 "obj %p target %d offset %d " 161 "new %08x old %08x\n", 162 obj, reloc->target_handle, 163 (int) reloc->offset, 164 reloc->write_domain, 165 target_obj->pending_write_domain); 166 return ret; 167 } 168 169 target_obj->pending_read_domains |= reloc->read_domains; 170 target_obj->pending_write_domain |= reloc->write_domain; 171 172 /* If the relocation already has the right value in it, no 173 * more work needs to be done. 174 */ 175 if (target_offset == reloc->presumed_offset) 176 return 0; 177 178 /* Check that the relocation address is valid... */ 179 if (unlikely(reloc->offset > obj->base.size - 4)) { 180 DRM_DEBUG("Relocation beyond object bounds: " 181 "obj %p target %d offset %d size %d.\n", 182 obj, reloc->target_handle, 183 (int) reloc->offset, 184 (int) obj->base.size); 185 return ret; 186 } 187 if (unlikely(reloc->offset & 3)) { 188 DRM_DEBUG("Relocation not 4-byte aligned: " 189 "obj %p target %d offset %d.\n", 190 obj, reloc->target_handle, 191 (int) reloc->offset); 192 return ret; 193 } 194 195 /* We can't wait for rendering with pagefaults disabled */ 196 if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0) 197 return -EFAULT; 198 199 reloc->delta += target_offset; 200 if (use_cpu_reloc(obj)) { 201 uint32_t page_offset = reloc->offset & PAGE_MASK; 202 char *vaddr; 203 struct sf_buf *sf; 204 205 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 206 if (ret) 207 return ret; 208 209 sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)], 210 SFB_NOWAIT); 211 if (sf == NULL) 212 return -ENOMEM; 213 vaddr = (void *)sf_buf_kva(sf); 214 *(uint32_t *)(vaddr + page_offset) = reloc->delta; 215 sf_buf_free(sf); 216 } else { 217 struct drm_i915_private *dev_priv = dev->dev_private; 218 uint32_t __iomem *reloc_entry; 219 char __iomem *reloc_page; 220 221 ret = i915_gem_object_set_to_gtt_domain(obj, true); 222 if (ret) 223 return ret; 224 225 ret = i915_gem_object_put_fence(obj); 226 if (ret) 227 return ret; 228 229 /* Map the page containing the relocation we're going to perform. */ 230 reloc->offset += obj->gtt_offset; 231 reloc_page = pmap_mapdev_attr(dev_priv->mm.gtt_base_addr + (reloc->offset & 232 ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING); 233 reloc_entry = (uint32_t __iomem *) 234 (reloc_page + (reloc->offset & PAGE_MASK)); 235 *(volatile uint32_t *)reloc_entry = reloc->delta; 236 pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE); 237 } 238 239 /* and update the user's relocation entry */ 240 reloc->presumed_offset = target_offset; 241 242 return 0; 243} 244 245static int 246i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, 247 struct eb_objects *eb) 248{ 249#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 250 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; 251 struct drm_i915_gem_relocation_entry __user *user_relocs; 252 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 253 int remain, ret; 254 255 user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr; 256 257 remain = entry->relocation_count; 258 while (remain) { 259 struct drm_i915_gem_relocation_entry *r = stack_reloc; 260 int count = remain; 261 if (count > ARRAY_SIZE(stack_reloc)) 262 count = ARRAY_SIZE(stack_reloc); 263 remain -= count; 264 265 if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) 266 return -EFAULT; 267 268 do { 269 u64 offset = r->presumed_offset; 270 271 ret = i915_gem_execbuffer_relocate_entry(obj, eb, r); 272 if (ret) 273 return ret; 274 275 if (r->presumed_offset != offset && 276 __copy_to_user_inatomic(&user_relocs->presumed_offset, 277 &r->presumed_offset, 278 sizeof(r->presumed_offset))) { 279 return -EFAULT; 280 } 281 282 user_relocs++; 283 r++; 284 } while (--count); 285 } 286 287 return 0; 288#undef N_RELOC 289} 290 291static int 292i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, 293 struct eb_objects *eb, 294 struct drm_i915_gem_relocation_entry *relocs) 295{ 296 const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 297 int i, ret; 298 299 for (i = 0; i < entry->relocation_count; i++) { 300 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); 301 if (ret) 302 return ret; 303 } 304 305 return 0; 306} 307 308static int 309i915_gem_execbuffer_relocate(struct drm_device *dev, 310 struct eb_objects *eb, 311 struct list_head *objects) 312{ 313 struct drm_i915_gem_object *obj; 314 int ret = 0, pflags; 315 316 /* This is the fast path and we cannot handle a pagefault whilst 317 * holding the struct mutex lest the user pass in the relocations 318 * contained within a mmaped bo. For in such a case we, the page 319 * fault handler would call i915_gem_fault() and we would try to 320 * acquire the struct mutex again. Obviously this is bad and so 321 * lockdep complains vehemently. 322 */ 323 pflags = vm_fault_disable_pagefaults(); 324 list_for_each_entry(obj, objects, exec_list) { 325 ret = i915_gem_execbuffer_relocate_object(obj, eb); 326 if (ret) 327 break; 328 } 329 vm_fault_enable_pagefaults(pflags); 330 331 return ret; 332} 333 334#define __EXEC_OBJECT_HAS_PIN (1<<31) 335#define __EXEC_OBJECT_HAS_FENCE (1<<30) 336 337static int 338need_reloc_mappable(struct drm_i915_gem_object *obj) 339{ 340 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 341 return entry->relocation_count && !use_cpu_reloc(obj); 342} 343 344static int 345i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, 346 struct intel_ring_buffer *ring) 347{ 348 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 349 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 350 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 351 bool need_fence, need_mappable; 352 int ret; 353 354 need_fence = 355 has_fenced_gpu_access && 356 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 357 obj->tiling_mode != I915_TILING_NONE; 358 need_mappable = need_fence || need_reloc_mappable(obj); 359 360 ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false); 361 if (ret) 362 return ret; 363 364 entry->flags |= __EXEC_OBJECT_HAS_PIN; 365 366 if (has_fenced_gpu_access) { 367 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 368 ret = i915_gem_object_get_fence(obj); 369 if (ret) 370 return ret; 371 372 if (i915_gem_object_pin_fence(obj)) 373 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 374 375 obj->pending_fenced_gpu_access = true; 376 } 377 } 378 379 /* Ensure ppgtt mapping exists if needed */ 380 if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { 381 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 382 obj, obj->cache_level); 383 384 obj->has_aliasing_ppgtt_mapping = 1; 385 } 386 387 entry->offset = obj->gtt_offset; 388 return 0; 389} 390 391static void 392i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj) 393{ 394 struct drm_i915_gem_exec_object2 *entry; 395 396 if (!obj->gtt_space) 397 return; 398 399 entry = obj->exec_entry; 400 401 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) 402 i915_gem_object_unpin_fence(obj); 403 404 if (entry->flags & __EXEC_OBJECT_HAS_PIN) 405 i915_gem_object_unpin(obj); 406 407 entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); 408} 409 410static int 411i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, 412 struct drm_file *file, 413 struct list_head *objects) 414{ 415 struct drm_i915_gem_object *obj; 416 struct list_head ordered_objects; 417 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 418 int retry; 419 420 INIT_LIST_HEAD(&ordered_objects); 421 while (!list_empty(objects)) { 422 struct drm_i915_gem_exec_object2 *entry; 423 bool need_fence, need_mappable; 424 425 obj = list_first_entry(objects, 426 struct drm_i915_gem_object, 427 exec_list); 428 entry = obj->exec_entry; 429 430 need_fence = 431 has_fenced_gpu_access && 432 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 433 obj->tiling_mode != I915_TILING_NONE; 434 need_mappable = need_fence || need_reloc_mappable(obj); 435 436 if (need_mappable) 437 list_move(&obj->exec_list, &ordered_objects); 438 else 439 list_move_tail(&obj->exec_list, &ordered_objects); 440 441 obj->base.pending_read_domains = 0; 442 obj->base.pending_write_domain = 0; 443 obj->pending_fenced_gpu_access = false; 444 } 445 list_splice(&ordered_objects, objects); 446 447 /* Attempt to pin all of the buffers into the GTT. 448 * This is done in 3 phases: 449 * 450 * 1a. Unbind all objects that do not match the GTT constraints for 451 * the execbuffer (fenceable, mappable, alignment etc). 452 * 1b. Increment pin count for already bound objects. 453 * 2. Bind new objects. 454 * 3. Decrement pin count. 455 * 456 * This avoid unnecessary unbinding of later objects in order to make 457 * room for the earlier objects *unless* we need to defragment. 458 */ 459 retry = 0; 460 do { 461 int ret = 0; 462 463 /* Unbind any ill-fitting objects or pin. */ 464 list_for_each_entry(obj, objects, exec_list) { 465 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 466 bool need_fence, need_mappable; 467 468 if (!obj->gtt_space) 469 continue; 470 471 need_fence = 472 has_fenced_gpu_access && 473 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 474 obj->tiling_mode != I915_TILING_NONE; 475 need_mappable = need_fence || need_reloc_mappable(obj); 476 477 if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || 478 (need_mappable && !obj->map_and_fenceable)) 479 ret = i915_gem_object_unbind(obj); 480 else 481 ret = i915_gem_execbuffer_reserve_object(obj, ring); 482 if (ret) 483 goto err; 484 } 485 486 /* Bind fresh objects */ 487 list_for_each_entry(obj, objects, exec_list) { 488 if (obj->gtt_space) 489 continue; 490 491 ret = i915_gem_execbuffer_reserve_object(obj, ring); 492 if (ret) 493 goto err; 494 } 495 496err: /* Decrement pin count for bound objects */ 497 list_for_each_entry(obj, objects, exec_list) 498 i915_gem_execbuffer_unreserve_object(obj); 499 500 if (ret != -ENOSPC || retry++) 501 return ret; 502 503 ret = i915_gem_evict_everything(ring->dev); 504 if (ret) 505 return ret; 506 } while (1); 507} 508 509static int 510i915_gem_execbuffer_relocate_slow(struct drm_device *dev, 511 struct drm_file *file, 512 struct intel_ring_buffer *ring, 513 struct list_head *objects, 514 struct eb_objects *eb, 515 struct drm_i915_gem_exec_object2 *exec, 516 int count) 517{ 518 struct drm_i915_gem_relocation_entry *reloc; 519 struct drm_i915_gem_object *obj; 520 int *reloc_offset; 521 int i, total, ret; 522 523 /* We may process another execbuffer during the unlock... */ 524 while (!list_empty(objects)) { 525 obj = list_first_entry(objects, 526 struct drm_i915_gem_object, 527 exec_list); 528 list_del_init(&obj->exec_list); 529 drm_gem_object_unreference(&obj->base); 530 } 531 532 DRM_UNLOCK(dev); 533 534 total = 0; 535 for (i = 0; i < count; i++) 536 total += exec[i].relocation_count; 537 538 reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset)); 539 reloc = drm_malloc_ab(total, sizeof(*reloc)); 540 if (reloc == NULL || reloc_offset == NULL) { 541 drm_free_large(reloc); 542 drm_free_large(reloc_offset); 543 DRM_LOCK(dev); 544 return -ENOMEM; 545 } 546 547 total = 0; 548 for (i = 0; i < count; i++) { 549 struct drm_i915_gem_relocation_entry __user *user_relocs; 550 u64 invalid_offset = (u64)-1; 551 int j; 552 553 user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr; 554 555 if (copy_from_user(reloc+total, user_relocs, 556 exec[i].relocation_count * sizeof(*reloc))) { 557 ret = -EFAULT; 558 DRM_LOCK(dev); 559 goto err; 560 } 561 562 /* As we do not update the known relocation offsets after 563 * relocating (due to the complexities in lock handling), 564 * we need to mark them as invalid now so that we force the 565 * relocation processing next time. Just in case the target 566 * object is evicted and then rebound into its old 567 * presumed_offset before the next execbuffer - if that 568 * happened we would make the mistake of assuming that the 569 * relocations were valid. 570 */ 571 for (j = 0; j < exec[i].relocation_count; j++) { 572 if (copy_to_user(&user_relocs[j].presumed_offset, 573 &invalid_offset, 574 sizeof(invalid_offset))) { 575 ret = -EFAULT; 576 DRM_LOCK(dev); 577 goto err; 578 } 579 } 580 581 reloc_offset[i] = total; 582 total += exec[i].relocation_count; 583 } 584 585 ret = i915_mutex_lock_interruptible(dev); 586 if (ret) { 587 DRM_LOCK(dev); 588 goto err; 589 } 590 591 /* reacquire the objects */ 592 eb_reset(eb); 593 for (i = 0; i < count; i++) { 594 obj = to_intel_bo(drm_gem_object_lookup(dev, file, 595 exec[i].handle)); 596 if (&obj->base == NULL) { 597 DRM_DEBUG("Invalid object handle %d at index %d\n", 598 exec[i].handle, i); 599 ret = -ENOENT; 600 goto err; 601 } 602 603 list_add_tail(&obj->exec_list, objects); 604 obj->exec_handle = exec[i].handle; 605 obj->exec_entry = &exec[i]; 606 eb_add_object(eb, obj); 607 } 608 609 ret = i915_gem_execbuffer_reserve(ring, file, objects); 610 if (ret) 611 goto err; 612 613 list_for_each_entry(obj, objects, exec_list) { 614 int offset = obj->exec_entry - exec; 615 ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, 616 reloc + reloc_offset[offset]); 617 if (ret) 618 goto err; 619 } 620 621 /* Leave the user relocations as are, this is the painfully slow path, 622 * and we want to avoid the complication of dropping the lock whilst 623 * having buffers reserved in the aperture and so causing spurious 624 * ENOSPC for random operations. 625 */ 626 627err: 628 drm_free_large(reloc); 629 drm_free_large(reloc_offset); 630 return ret; 631} 632 633static int 634i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) 635{ 636 u32 plane, flip_mask; 637 int ret; 638 639 /* Check for any pending flips. As we only maintain a flip queue depth 640 * of 1, we can simply insert a WAIT for the next display flip prior 641 * to executing the batch and avoid stalling the CPU. 642 */ 643 644 for (plane = 0; flips >> plane; plane++) { 645 if (((flips >> plane) & 1) == 0) 646 continue; 647 648 if (plane) 649 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; 650 else 651 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; 652 653 ret = intel_ring_begin(ring, 2); 654 if (ret) 655 return ret; 656 657 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); 658 intel_ring_emit(ring, MI_NOOP); 659 intel_ring_advance(ring); 660 } 661 662 return 0; 663} 664 665static int 666i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, 667 struct list_head *objects) 668{ 669 struct drm_i915_gem_object *obj; 670 uint32_t flush_domains = 0; 671 uint32_t flips = 0; 672 int ret; 673 674 list_for_each_entry(obj, objects, exec_list) { 675 ret = i915_gem_object_sync(obj, ring); 676 if (ret) 677 return ret; 678 679 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) 680 i915_gem_clflush_object(obj); 681 682 if (obj->base.pending_write_domain) 683 flips |= atomic_read(&obj->pending_flip); 684 685 flush_domains |= obj->base.write_domain; 686 } 687 688 if (flips) { 689 ret = i915_gem_execbuffer_wait_for_flips(ring, flips); 690 if (ret) 691 return ret; 692 } 693 694 if (flush_domains & I915_GEM_DOMAIN_CPU) 695 i915_gem_chipset_flush(ring->dev); 696 697 if (flush_domains & I915_GEM_DOMAIN_GTT) 698 wmb(); 699 700 /* Unconditionally invalidate gpu caches and ensure that we do flush 701 * any residual writes from the previous batch. 702 */ 703 return intel_ring_invalidate_all_caches(ring); 704} 705 706static bool 707i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 708{ 709 return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; 710} 711 712static int 713validate_exec_list(struct drm_i915_gem_exec_object2 *exec, 714 int count, vm_page_t ***map, int **maplen) 715{ 716 int i; 717 int relocs_total = 0; 718 int relocs_max = INT_MAX / sizeof(struct drm_i915_gem_relocation_entry); 719 vm_page_t *ma; 720 721 /* XXXKIB various limits checking is missing there */ 722 *map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO); 723 *maplen = malloc(count * sizeof(*maplen), DRM_I915_GEM, M_WAITOK | 724 M_ZERO); 725 726 for (i = 0; i < count; i++) { 727 char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr; 728 int length; /* limited by fault_in_pages_readable() */ 729 730 /* First check for malicious input causing overflow in 731 * the worst case where we need to allocate the entire 732 * relocation tree as a single array. 733 */ 734 if (exec[i].relocation_count > relocs_max - relocs_total) 735 return -EINVAL; 736 relocs_total += exec[i].relocation_count; 737 738 length = exec[i].relocation_count * 739 sizeof(struct drm_i915_gem_relocation_entry); 740 if (length == 0) { 741 (*map)[i] = NULL; 742 continue; 743 } 744 745 /* 746 * Since both start and end of the relocation region 747 * may be not aligned on the page boundary, be 748 * conservative and request a page slot for each 749 * partial page. Thus +2. 750 */ 751 int page_count; 752 753 page_count = howmany(length, PAGE_SIZE) + 2; 754 ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t), 755 DRM_I915_GEM, M_WAITOK | M_ZERO); 756 (*maplen)[i] = vm_fault_quick_hold_pages( 757 &curproc->p_vmspace->vm_map, (vm_offset_t)ptr, length, 758 VM_PROT_READ | VM_PROT_WRITE, ma, page_count); 759 if ((*maplen)[i] == -1) { 760 free(ma, DRM_I915_GEM); 761 (*map)[i] = NULL; 762 return -EFAULT; 763 } 764 } 765 766 return 0; 767} 768 769static void 770i915_gem_execbuffer_move_to_active(struct list_head *objects, 771 struct intel_ring_buffer *ring) 772{ 773 struct drm_i915_gem_object *obj; 774 775 list_for_each_entry(obj, objects, exec_list) { 776#if defined(KTR) 777 u32 old_read = obj->base.read_domains; 778 u32 old_write = obj->base.write_domain; 779#endif 780 781 obj->base.read_domains = obj->base.pending_read_domains; 782 obj->base.write_domain = obj->base.pending_write_domain; 783 obj->fenced_gpu_access = obj->pending_fenced_gpu_access; 784 785 i915_gem_object_move_to_active(obj, ring); 786 if (obj->base.write_domain) { 787 obj->dirty = 1; 788 obj->last_write_seqno = intel_ring_get_seqno(ring); 789 if (obj->pin_count) /* check for potential scanout */ 790 intel_mark_fb_busy(obj); 791 } 792 793 CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x", 794 obj, old_read, old_write); 795 } 796} 797 798static void 799i915_gem_execbuffer_retire_commands(struct drm_device *dev, 800 struct drm_file *file, 801 struct intel_ring_buffer *ring) 802{ 803 /* Unconditionally force add_request to emit a full flush. */ 804 ring->gpu_caches_dirty = true; 805 806 /* Add a breadcrumb for the completion of the batch buffer */ 807 (void)i915_add_request(ring, file, NULL); 808} 809 810static int 811i915_reset_gen7_sol_offsets(struct drm_device *dev, 812 struct intel_ring_buffer *ring) 813{ 814 drm_i915_private_t *dev_priv = dev->dev_private; 815 int ret, i; 816 817 if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) 818 return 0; 819 820 ret = intel_ring_begin(ring, 4 * 3); 821 if (ret) 822 return ret; 823 824 for (i = 0; i < 4; i++) { 825 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 826 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); 827 intel_ring_emit(ring, 0); 828 } 829 830 intel_ring_advance(ring); 831 832 return 0; 833} 834 835static int 836i915_gem_do_execbuffer(struct drm_device *dev, void *data, 837 struct drm_file *file, 838 struct drm_i915_gem_execbuffer2 *args, 839 struct drm_i915_gem_exec_object2 *exec) 840{ 841 drm_i915_private_t *dev_priv = dev->dev_private; 842 struct list_head objects; 843 struct eb_objects *eb; 844 struct drm_i915_gem_object *batch_obj; 845 struct drm_clip_rect *cliprects = NULL; 846 struct intel_ring_buffer *ring; 847 u32 ctx_id = i915_execbuffer2_get_context_id(*args); 848 u32 exec_start, exec_len; 849 u32 mask; 850 u32 flags; 851 int ret, mode, i; 852 vm_page_t **relocs_ma; 853 int *relocs_len; 854 855 if (!i915_gem_check_execbuffer(args)) { 856 DRM_DEBUG("execbuf with invalid offset/length\n"); 857 return -EINVAL; 858 } 859 860 ret = validate_exec_list(exec, args->buffer_count, 861 &relocs_ma, &relocs_len); 862 if (ret) 863 goto pre_mutex_err; 864 865 flags = 0; 866 if (args->flags & I915_EXEC_SECURE) { 867 if (!file->is_master || !capable(CAP_SYS_ADMIN)) { 868 ret = -EPERM; 869 goto pre_mutex_err; 870 } 871 872 flags |= I915_DISPATCH_SECURE; 873 } 874 if (args->flags & I915_EXEC_IS_PINNED) 875 flags |= I915_DISPATCH_PINNED; 876 877 switch (args->flags & I915_EXEC_RING_MASK) { 878 case I915_EXEC_DEFAULT: 879 case I915_EXEC_RENDER: 880 ring = &dev_priv->ring[RCS]; 881 break; 882 case I915_EXEC_BSD: 883 ring = &dev_priv->ring[VCS]; 884 if (ctx_id != 0) { 885 DRM_DEBUG("Ring %s doesn't support contexts\n", 886 ring->name); 887 ret = -EPERM; 888 goto pre_mutex_err; 889 } 890 break; 891 case I915_EXEC_BLT: 892 ring = &dev_priv->ring[BCS]; 893 if (ctx_id != 0) { 894 DRM_DEBUG("Ring %s doesn't support contexts\n", 895 ring->name); 896 ret = -EPERM; 897 goto pre_mutex_err; 898 } 899 break; 900 default: 901 DRM_DEBUG("execbuf with unknown ring: %d\n", 902 (int)(args->flags & I915_EXEC_RING_MASK)); 903 ret = -EINVAL; 904 goto pre_mutex_err; 905 } 906 if (!intel_ring_initialized(ring)) { 907 DRM_DEBUG("execbuf with invalid ring: %d\n", 908 (int)(args->flags & I915_EXEC_RING_MASK)); 909 ret = -EINVAL; 910 goto pre_mutex_err; 911 } 912 913 mode = args->flags & I915_EXEC_CONSTANTS_MASK; 914 mask = I915_EXEC_CONSTANTS_MASK; 915 switch (mode) { 916 case I915_EXEC_CONSTANTS_REL_GENERAL: 917 case I915_EXEC_CONSTANTS_ABSOLUTE: 918 case I915_EXEC_CONSTANTS_REL_SURFACE: 919 if (ring == &dev_priv->ring[RCS] && 920 mode != dev_priv->relative_constants_mode) { 921 if (INTEL_INFO(dev)->gen < 4) { 922 ret = -EINVAL; 923 goto pre_mutex_err; 924 } 925 926 if (INTEL_INFO(dev)->gen > 5 && 927 mode == I915_EXEC_CONSTANTS_REL_SURFACE) { 928 ret = -EINVAL; 929 goto pre_mutex_err; 930 } 931 932 /* The HW changed the meaning on this bit on gen6 */ 933 if (INTEL_INFO(dev)->gen >= 6) 934 mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; 935 } 936 break; 937 default: 938 DRM_DEBUG("execbuf with unknown constants: %d\n", mode); 939 ret = -EINVAL; 940 goto pre_mutex_err; 941 } 942 943 if (args->buffer_count < 1) { 944 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 945 ret = -EINVAL; 946 goto pre_mutex_err; 947 } 948 949 if (args->num_cliprects != 0) { 950 if (ring != &dev_priv->ring[RCS]) { 951 DRM_DEBUG("clip rectangles are only valid with the render ring\n"); 952 ret = -EINVAL; 953 goto pre_mutex_err; 954 } 955 956 if (INTEL_INFO(dev)->gen >= 5) { 957 DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); 958 ret = -EINVAL; 959 goto pre_mutex_err; 960 } 961 962 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { 963 DRM_DEBUG("execbuf with %u cliprects\n", 964 args->num_cliprects); 965 ret = -EINVAL; 966 goto pre_mutex_err; 967 } 968 969 cliprects = malloc(args->num_cliprects * sizeof(*cliprects), 970 DRM_I915_GEM, M_WAITOK); 971 if (cliprects == NULL) { 972 ret = -ENOMEM; 973 goto pre_mutex_err; 974 } 975 976 if (copy_from_user(cliprects, 977 (struct drm_clip_rect __user *)(uintptr_t) 978 args->cliprects_ptr, 979 sizeof(*cliprects)*args->num_cliprects)) { 980 ret = -EFAULT; 981 goto pre_mutex_err; 982 } 983 } 984 985 ret = i915_mutex_lock_interruptible(dev); 986 if (ret) 987 goto pre_mutex_err; 988 989 if (dev_priv->mm.suspended) { 990 DRM_UNLOCK(dev); 991 ret = -EBUSY; 992 goto pre_mutex_err; 993 } 994 995 eb = eb_create(args->buffer_count); 996 if (eb == NULL) { 997 DRM_UNLOCK(dev); 998 ret = -ENOMEM; 999 goto pre_mutex_err; 1000 } 1001 1002 /* Look up object handles */ 1003 INIT_LIST_HEAD(&objects); 1004 for (i = 0; i < args->buffer_count; i++) { 1005 struct drm_i915_gem_object *obj; 1006 1007 obj = to_intel_bo(drm_gem_object_lookup(dev, file, 1008 exec[i].handle)); 1009 if (&obj->base == NULL) { 1010 DRM_DEBUG("Invalid object handle %d at index %d\n", 1011 exec[i].handle, i); 1012 /* prevent error path from reading uninitialized data */ 1013 ret = -ENOENT; 1014 goto err; 1015 } 1016 1017 if (!list_empty(&obj->exec_list)) { 1018 DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", 1019 obj, exec[i].handle, i); 1020 ret = -EINVAL; 1021 goto err; 1022 } 1023 1024 list_add_tail(&obj->exec_list, &objects); 1025 obj->exec_handle = exec[i].handle; 1026 obj->exec_entry = &exec[i]; 1027 eb_add_object(eb, obj); 1028 } 1029 1030 /* take note of the batch buffer before we might reorder the lists */ 1031 batch_obj = list_entry(objects.prev, 1032 struct drm_i915_gem_object, 1033 exec_list); 1034 1035 /* Move the objects en-masse into the GTT, evicting if necessary. */ 1036 ret = i915_gem_execbuffer_reserve(ring, file, &objects); 1037 if (ret) 1038 goto err; 1039 1040 /* The objects are in their final locations, apply the relocations. */ 1041 ret = i915_gem_execbuffer_relocate(dev, eb, &objects); 1042 if (ret) { 1043 if (ret == -EFAULT) { 1044 ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, 1045 &objects, eb, 1046 exec, 1047 args->buffer_count); 1048 DRM_LOCK_ASSERT(dev); 1049 } 1050 if (ret) 1051 goto err; 1052 } 1053 1054 /* Set the pending read domains for the batch buffer to COMMAND */ 1055 if (batch_obj->base.pending_write_domain) { 1056 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1057 ret = -EINVAL; 1058 goto err; 1059 } 1060 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1061 1062 /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 1063 * batch" bit. Hence we need to pin secure batches into the global gtt. 1064 * hsw should have this fixed, but let's be paranoid and do it 1065 * unconditionally for now. */ 1066 if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping) 1067 i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level); 1068 1069 ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); 1070 if (ret) 1071 goto err; 1072 1073 ret = i915_switch_context(ring, file, ctx_id); 1074 if (ret) 1075 goto err; 1076 1077 if (ring == &dev_priv->ring[RCS] && 1078 mode != dev_priv->relative_constants_mode) { 1079 ret = intel_ring_begin(ring, 4); 1080 if (ret) 1081 goto err; 1082 1083 intel_ring_emit(ring, MI_NOOP); 1084 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1085 intel_ring_emit(ring, INSTPM); 1086 intel_ring_emit(ring, mask << 16 | mode); 1087 intel_ring_advance(ring); 1088 1089 dev_priv->relative_constants_mode = mode; 1090 } 1091 1092 if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1093 ret = i915_reset_gen7_sol_offsets(dev, ring); 1094 if (ret) 1095 goto err; 1096 } 1097 1098 exec_start = batch_obj->gtt_offset + args->batch_start_offset; 1099 exec_len = args->batch_len; 1100 if (cliprects) { 1101 for (i = 0; i < args->num_cliprects; i++) { 1102 ret = i915_emit_box(dev, &cliprects[i], 1103 args->DR1, args->DR4); 1104 if (ret) 1105 goto err; 1106 1107 ret = ring->dispatch_execbuffer(ring, 1108 exec_start, exec_len, 1109 flags); 1110 if (ret) 1111 goto err; 1112 } 1113 } else { 1114 ret = ring->dispatch_execbuffer(ring, 1115 exec_start, exec_len, 1116 flags); 1117 if (ret) 1118 goto err; 1119 } 1120 1121 CTR3(KTR_DRM, "ring_dispatch ring=%s seqno=%d flags=%u", ring->name, 1122 intel_ring_get_seqno(ring), flags); 1123 1124 i915_gem_execbuffer_move_to_active(&objects, ring); 1125 i915_gem_execbuffer_retire_commands(dev, file, ring); 1126 1127err: 1128 eb_destroy(eb); 1129 while (!list_empty(&objects)) { 1130 struct drm_i915_gem_object *obj; 1131 1132 obj = list_first_entry(&objects, 1133 struct drm_i915_gem_object, 1134 exec_list); 1135 list_del_init(&obj->exec_list); 1136 drm_gem_object_unreference(&obj->base); 1137 } 1138 1139 DRM_UNLOCK(dev); 1140 1141pre_mutex_err: 1142 for (i = 0; i < args->buffer_count; i++) { 1143 if (relocs_ma[i] != NULL) { 1144 vm_page_unhold_pages(relocs_ma[i], relocs_len[i]); 1145 free(relocs_ma[i], DRM_I915_GEM); 1146 } 1147 } 1148 free(relocs_len, DRM_I915_GEM); 1149 free(relocs_ma, DRM_I915_GEM); 1150 free(cliprects, DRM_I915_GEM); 1151 return ret; 1152} 1153 1154/* 1155 * Legacy execbuffer just creates an exec2 list from the original exec object 1156 * list array and passes it to the real function. 1157 */ 1158int 1159i915_gem_execbuffer(struct drm_device *dev, void *data, 1160 struct drm_file *file) 1161{ 1162 struct drm_i915_gem_execbuffer *args = data; 1163 struct drm_i915_gem_execbuffer2 exec2; 1164 struct drm_i915_gem_exec_object *exec_list = NULL; 1165 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1166 int ret, i; 1167 1168 if (args->buffer_count < 1) { 1169 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1170 return -EINVAL; 1171 } 1172 1173 /* Copy in the exec list from userland */ 1174 /* XXXKIB user-controlled malloc size */ 1175 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); 1176 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); 1177 if (exec_list == NULL || exec2_list == NULL) { 1178 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 1179 args->buffer_count); 1180 drm_free_large(exec_list); 1181 drm_free_large(exec2_list); 1182 return -ENOMEM; 1183 } 1184 ret = copy_from_user(exec_list, 1185 (void __user *)(uintptr_t)args->buffers_ptr, 1186 sizeof(*exec_list) * args->buffer_count); 1187 if (ret != 0) { 1188 DRM_DEBUG("copy %d exec entries failed %d\n", 1189 args->buffer_count, ret); 1190 drm_free_large(exec_list); 1191 drm_free_large(exec2_list); 1192 return -EFAULT; 1193 } 1194 1195 for (i = 0; i < args->buffer_count; i++) { 1196 exec2_list[i].handle = exec_list[i].handle; 1197 exec2_list[i].relocation_count = exec_list[i].relocation_count; 1198 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 1199 exec2_list[i].alignment = exec_list[i].alignment; 1200 exec2_list[i].offset = exec_list[i].offset; 1201 if (INTEL_INFO(dev)->gen < 4) 1202 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 1203 else 1204 exec2_list[i].flags = 0; 1205 } 1206 1207 exec2.buffers_ptr = args->buffers_ptr; 1208 exec2.buffer_count = args->buffer_count; 1209 exec2.batch_start_offset = args->batch_start_offset; 1210 exec2.batch_len = args->batch_len; 1211 exec2.DR1 = args->DR1; 1212 exec2.DR4 = args->DR4; 1213 exec2.num_cliprects = args->num_cliprects; 1214 exec2.cliprects_ptr = args->cliprects_ptr; 1215 exec2.flags = I915_EXEC_RENDER; 1216 i915_execbuffer2_set_context_id(exec2, 0); 1217 1218 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); 1219 if (!ret) { 1220 /* Copy the new buffer offsets back to the user's exec list. */ 1221 for (i = 0; i < args->buffer_count; i++) 1222 exec_list[i].offset = exec2_list[i].offset; 1223 /* ... and back out to userspace */ 1224 ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr, 1225 exec_list, 1226 sizeof(*exec_list) * args->buffer_count); 1227 if (ret) { 1228 ret = -EFAULT; 1229 DRM_DEBUG("failed to copy %d exec entries " 1230 "back to user (%d)\n", 1231 args->buffer_count, ret); 1232 } 1233 } 1234 1235 drm_free_large(exec_list); 1236 drm_free_large(exec2_list); 1237 return ret; 1238} 1239 1240int 1241i915_gem_execbuffer2(struct drm_device *dev, void *data, 1242 struct drm_file *file) 1243{ 1244 struct drm_i915_gem_execbuffer2 *args = data; 1245 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1246 int ret; 1247 1248 if (args->buffer_count < 1 || 1249 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { 1250 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 1251 return -EINVAL; 1252 } 1253 1254 /* XXXKIB user-controllable malloc size */ 1255 exec2_list = malloc(sizeof(*exec2_list)*args->buffer_count, 1256 DRM_I915_GEM, M_WAITOK); 1257 if (exec2_list == NULL) { 1258 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 1259 args->buffer_count); 1260 return -ENOMEM; 1261 } 1262 ret = copy_from_user(exec2_list, 1263 (struct drm_i915_relocation_entry __user *) 1264 (uintptr_t) args->buffers_ptr, 1265 sizeof(*exec2_list) * args->buffer_count); 1266 if (ret != 0) { 1267 DRM_DEBUG("copy %d exec entries failed %d\n", 1268 args->buffer_count, ret); 1269 free(exec2_list, DRM_I915_GEM); 1270 return -EFAULT; 1271 } 1272 1273 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1274 if (!ret) { 1275 /* Copy the new buffer offsets back to the user's exec list. */ 1276 ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr, 1277 exec2_list, 1278 sizeof(*exec2_list) * args->buffer_count); 1279 if (ret) { 1280 ret = -EFAULT; 1281 DRM_DEBUG("failed to copy %d exec entries " 1282 "back to user (%d)\n", 1283 args->buffer_count, ret); 1284 } 1285 } 1286 1287 free(exec2_list, DRM_I915_GEM); 1288 return ret; 1289} 1290