i915_gem_execbuffer.c revision 289719
1/* 2 * Copyright �� 2008,2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Chris Wilson <chris@chris-wilson.co.uk> 26 * 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/dev/drm2/i915/i915_gem_execbuffer.c 289719 2015-10-21 20:49:45Z jhb $"); 31 32#include <dev/drm2/drmP.h> 33#include <dev/drm2/drm.h> 34#include <dev/drm2/i915/i915_drm.h> 35#include <dev/drm2/i915/i915_drv.h> 36#include <dev/drm2/i915/intel_drv.h> 37#include <sys/limits.h> 38#include <sys/sf_buf.h> 39 40struct change_domains { 41 uint32_t invalidate_domains; 42 uint32_t flush_domains; 43 uint32_t flush_rings; 44 uint32_t flips; 45}; 46 47/* 48 * Set the next domain for the specified object. This 49 * may not actually perform the necessary flushing/invaliding though, 50 * as that may want to be batched with other set_domain operations 51 * 52 * This is (we hope) the only really tricky part of gem. The goal 53 * is fairly simple -- track which caches hold bits of the object 54 * and make sure they remain coherent. A few concrete examples may 55 * help to explain how it works. For shorthand, we use the notation 56 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 57 * a pair of read and write domain masks. 58 * 59 * Case 1: the batch buffer 60 * 61 * 1. Allocated 62 * 2. Written by CPU 63 * 3. Mapped to GTT 64 * 4. Read by GPU 65 * 5. Unmapped from GTT 66 * 6. Freed 67 * 68 * Let's take these a step at a time 69 * 70 * 1. Allocated 71 * Pages allocated from the kernel may still have 72 * cache contents, so we set them to (CPU, CPU) always. 73 * 2. Written by CPU (using pwrite) 74 * The pwrite function calls set_domain (CPU, CPU) and 75 * this function does nothing (as nothing changes) 76 * 3. Mapped by GTT 77 * This function asserts that the object is not 78 * currently in any GPU-based read or write domains 79 * 4. Read by GPU 80 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 81 * As write_domain is zero, this function adds in the 82 * current read domains (CPU+COMMAND, 0). 83 * flush_domains is set to CPU. 84 * invalidate_domains is set to COMMAND 85 * clflush is run to get data out of the CPU caches 86 * then i915_dev_set_domain calls i915_gem_flush to 87 * emit an MI_FLUSH and drm_agp_chipset_flush 88 * 5. Unmapped from GTT 89 * i915_gem_object_unbind calls set_domain (CPU, CPU) 90 * flush_domains and invalidate_domains end up both zero 91 * so no flushing/invalidating happens 92 * 6. Freed 93 * yay, done 94 * 95 * Case 2: The shared render buffer 96 * 97 * 1. Allocated 98 * 2. Mapped to GTT 99 * 3. Read/written by GPU 100 * 4. set_domain to (CPU,CPU) 101 * 5. Read/written by CPU 102 * 6. Read/written by GPU 103 * 104 * 1. Allocated 105 * Same as last example, (CPU, CPU) 106 * 2. Mapped to GTT 107 * Nothing changes (assertions find that it is not in the GPU) 108 * 3. Read/written by GPU 109 * execbuffer calls set_domain (RENDER, RENDER) 110 * flush_domains gets CPU 111 * invalidate_domains gets GPU 112 * clflush (obj) 113 * MI_FLUSH and drm_agp_chipset_flush 114 * 4. set_domain (CPU, CPU) 115 * flush_domains gets GPU 116 * invalidate_domains gets CPU 117 * wait_rendering (obj) to make sure all drawing is complete. 118 * This will include an MI_FLUSH to get the data from GPU 119 * to memory 120 * clflush (obj) to invalidate the CPU cache 121 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 122 * 5. Read/written by CPU 123 * cache lines are loaded and dirtied 124 * 6. Read written by GPU 125 * Same as last GPU access 126 * 127 * Case 3: The constant buffer 128 * 129 * 1. Allocated 130 * 2. Written by CPU 131 * 3. Read by GPU 132 * 4. Updated (written) by CPU again 133 * 5. Read by GPU 134 * 135 * 1. Allocated 136 * (CPU, CPU) 137 * 2. Written by CPU 138 * (CPU, CPU) 139 * 3. Read by GPU 140 * (CPU+RENDER, 0) 141 * flush_domains = CPU 142 * invalidate_domains = RENDER 143 * clflush (obj) 144 * MI_FLUSH 145 * drm_agp_chipset_flush 146 * 4. Updated (written) by CPU again 147 * (CPU, CPU) 148 * flush_domains = 0 (no previous write domain) 149 * invalidate_domains = 0 (no new read domains) 150 * 5. Read by GPU 151 * (CPU+RENDER, 0) 152 * flush_domains = CPU 153 * invalidate_domains = RENDER 154 * clflush (obj) 155 * MI_FLUSH 156 * drm_agp_chipset_flush 157 */ 158static void 159i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, 160 struct intel_ring_buffer *ring, 161 struct change_domains *cd) 162{ 163 uint32_t invalidate_domains = 0, flush_domains = 0; 164 165 /* 166 * If the object isn't moving to a new write domain, 167 * let the object stay in multiple read domains 168 */ 169 if (obj->base.pending_write_domain == 0) 170 obj->base.pending_read_domains |= obj->base.read_domains; 171 172 /* 173 * Flush the current write domain if 174 * the new read domains don't match. Invalidate 175 * any read domains which differ from the old 176 * write domain 177 */ 178 if (obj->base.write_domain && 179 (((obj->base.write_domain != obj->base.pending_read_domains || 180 obj->ring != ring)) || 181 (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) { 182 flush_domains |= obj->base.write_domain; 183 invalidate_domains |= 184 obj->base.pending_read_domains & ~obj->base.write_domain; 185 } 186 /* 187 * Invalidate any read caches which may have 188 * stale data. That is, any new read domains. 189 */ 190 invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains; 191 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) 192 i915_gem_clflush_object(obj); 193 194 if (obj->base.pending_write_domain) 195 cd->flips |= atomic_load_acq_int(&obj->pending_flip); 196 197 /* The actual obj->write_domain will be updated with 198 * pending_write_domain after we emit the accumulated flush for all 199 * of our domain changes in execbuffers (which clears objects' 200 * write_domains). So if we have a current write domain that we 201 * aren't changing, set pending_write_domain to that. 202 */ 203 if (flush_domains == 0 && obj->base.pending_write_domain == 0) 204 obj->base.pending_write_domain = obj->base.write_domain; 205 206 cd->invalidate_domains |= invalidate_domains; 207 cd->flush_domains |= flush_domains; 208 if (flush_domains & I915_GEM_GPU_DOMAINS) 209 cd->flush_rings |= intel_ring_flag(obj->ring); 210 if (invalidate_domains & I915_GEM_GPU_DOMAINS) 211 cd->flush_rings |= intel_ring_flag(ring); 212} 213 214struct eb_objects { 215 u_long hashmask; 216 LIST_HEAD(, drm_i915_gem_object) *buckets; 217}; 218 219static struct eb_objects * 220eb_create(int size) 221{ 222 struct eb_objects *eb; 223 224 eb = malloc(sizeof(*eb), DRM_I915_GEM, M_WAITOK | M_ZERO); 225 eb->buckets = hashinit(size, DRM_I915_GEM, &eb->hashmask); 226 return (eb); 227} 228 229static void 230eb_reset(struct eb_objects *eb) 231{ 232 int i; 233 234 for (i = 0; i <= eb->hashmask; i++) 235 LIST_INIT(&eb->buckets[i]); 236} 237 238static void 239eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) 240{ 241 242 LIST_INSERT_HEAD(&eb->buckets[obj->exec_handle & eb->hashmask], 243 obj, exec_node); 244} 245 246static struct drm_i915_gem_object * 247eb_get_object(struct eb_objects *eb, unsigned long handle) 248{ 249 struct drm_i915_gem_object *obj; 250 251 LIST_FOREACH(obj, &eb->buckets[handle & eb->hashmask], exec_node) { 252 if (obj->exec_handle == handle) 253 return (obj); 254 } 255 return (NULL); 256} 257 258static void 259eb_destroy(struct eb_objects *eb) 260{ 261 262 free(eb->buckets, DRM_I915_GEM); 263 free(eb, DRM_I915_GEM); 264} 265 266static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) 267{ 268 return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || 269 obj->cache_level != I915_CACHE_NONE); 270} 271 272static int 273i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 274 struct eb_objects *eb, 275 struct drm_i915_gem_relocation_entry *reloc) 276{ 277 struct drm_device *dev = obj->base.dev; 278 struct drm_gem_object *target_obj; 279 struct drm_i915_gem_object *target_i915_obj; 280 uint32_t target_offset; 281 int ret = -EINVAL; 282 283 /* we've already hold a reference to all valid objects */ 284 target_obj = &eb_get_object(eb, reloc->target_handle)->base; 285 if (unlikely(target_obj == NULL)) 286 return -ENOENT; 287 288 target_i915_obj = to_intel_bo(target_obj); 289 target_offset = target_i915_obj->gtt_offset; 290 291#if WATCH_RELOC 292 DRM_INFO("%s: obj %p offset %08x target %d " 293 "read %08x write %08x gtt %08x " 294 "presumed %08x delta %08x\n", 295 __func__, 296 obj, 297 (int) reloc->offset, 298 (int) reloc->target_handle, 299 (int) reloc->read_domains, 300 (int) reloc->write_domain, 301 (int) target_offset, 302 (int) reloc->presumed_offset, 303 reloc->delta); 304#endif 305 306 /* The target buffer should have appeared before us in the 307 * exec_object list, so it should have a GTT space bound by now. 308 */ 309 if (unlikely(target_offset == 0)) { 310 DRM_DEBUG("No GTT space found for object %d\n", 311 reloc->target_handle); 312 return ret; 313 } 314 315 /* Validate that the target is in a valid r/w GPU domain */ 316 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 317 DRM_DEBUG("reloc with multiple write domains: " 318 "obj %p target %d offset %d " 319 "read %08x write %08x", 320 obj, reloc->target_handle, 321 (int) reloc->offset, 322 reloc->read_domains, 323 reloc->write_domain); 324 return ret; 325 } 326 if (unlikely((reloc->write_domain | reloc->read_domains) 327 & ~I915_GEM_GPU_DOMAINS)) { 328 DRM_DEBUG("reloc with read/write non-GPU domains: " 329 "obj %p target %d offset %d " 330 "read %08x write %08x", 331 obj, reloc->target_handle, 332 (int) reloc->offset, 333 reloc->read_domains, 334 reloc->write_domain); 335 return ret; 336 } 337 if (unlikely(reloc->write_domain && target_obj->pending_write_domain && 338 reloc->write_domain != target_obj->pending_write_domain)) { 339 DRM_DEBUG("Write domain conflict: " 340 "obj %p target %d offset %d " 341 "new %08x old %08x\n", 342 obj, reloc->target_handle, 343 (int) reloc->offset, 344 reloc->write_domain, 345 target_obj->pending_write_domain); 346 return ret; 347 } 348 349 target_obj->pending_read_domains |= reloc->read_domains; 350 target_obj->pending_write_domain |= reloc->write_domain; 351 352 /* If the relocation already has the right value in it, no 353 * more work needs to be done. 354 */ 355 if (target_offset == reloc->presumed_offset) 356 return 0; 357 358 /* Check that the relocation address is valid... */ 359 if (unlikely(reloc->offset > obj->base.size - 4)) { 360 DRM_DEBUG("Relocation beyond object bounds: " 361 "obj %p target %d offset %d size %d.\n", 362 obj, reloc->target_handle, 363 (int) reloc->offset, 364 (int) obj->base.size); 365 return ret; 366 } 367 if (unlikely(reloc->offset & 3)) { 368 DRM_DEBUG("Relocation not 4-byte aligned: " 369 "obj %p target %d offset %d.\n", 370 obj, reloc->target_handle, 371 (int) reloc->offset); 372 return ret; 373 } 374 375 /* We can't wait for rendering with pagefaults disabled */ 376 if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0) 377 return (-EFAULT); 378 379 reloc->delta += target_offset; 380 if (use_cpu_reloc(obj)) { 381 uint32_t page_offset = reloc->offset & PAGE_MASK; 382 char *vaddr; 383 struct sf_buf *sf; 384 385 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 386 if (ret) 387 return ret; 388 389 sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)], 390 SFB_NOWAIT); 391 if (sf == NULL) 392 return (-ENOMEM); 393 vaddr = (void *)sf_buf_kva(sf); 394 *(uint32_t *)(vaddr + page_offset) = reloc->delta; 395 sf_buf_free(sf); 396 } else { 397 uint32_t *reloc_entry; 398 char *reloc_page; 399 400 ret = i915_gem_object_set_to_gtt_domain(obj, true); 401 if (ret) 402 return ret; 403 404 ret = i915_gem_object_put_fence(obj); 405 if (ret) 406 return ret; 407 408 /* Map the page containing the relocation we're going to perform. */ 409 reloc->offset += obj->gtt_offset; 410 reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset & 411 ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING); 412 reloc_entry = (uint32_t *)(reloc_page + (reloc->offset & 413 PAGE_MASK)); 414 *(volatile uint32_t *)reloc_entry = reloc->delta; 415 pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE); 416 } 417 418 /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and 419 * pipe_control writes because the gpu doesn't properly redirect them 420 * through the ppgtt for non_secure batchbuffers. */ 421 if (unlikely(IS_GEN6(dev) && 422 reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && 423 !target_i915_obj->has_global_gtt_mapping)) { 424 i915_gem_gtt_bind_object(target_i915_obj, 425 target_i915_obj->cache_level); 426 } 427 428 /* and update the user's relocation entry */ 429 reloc->presumed_offset = target_offset; 430 431 return 0; 432} 433 434static int 435i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, 436 struct eb_objects *eb) 437{ 438#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 439 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; 440 struct drm_i915_gem_relocation_entry *user_relocs; 441 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 442 int remain, ret; 443 444 user_relocs = (void *)(uintptr_t)entry->relocs_ptr; 445 remain = entry->relocation_count; 446 while (remain) { 447 struct drm_i915_gem_relocation_entry *r = stack_reloc; 448 int count = remain; 449 if (count > DRM_ARRAY_SIZE(stack_reloc)) 450 count = DRM_ARRAY_SIZE(stack_reloc); 451 remain -= count; 452 453 ret = -copyin_nofault(user_relocs, r, count*sizeof(r[0])); 454 if (ret != 0) 455 return (ret); 456 457 do { 458 u64 offset = r->presumed_offset; 459 460 ret = i915_gem_execbuffer_relocate_entry(obj, eb, r); 461 if (ret) 462 return ret; 463 464 if (r->presumed_offset != offset && 465 copyout_nofault(&r->presumed_offset, 466 &user_relocs->presumed_offset, 467 sizeof(r->presumed_offset))) { 468 return -EFAULT; 469 } 470 471 user_relocs++; 472 r++; 473 } while (--count); 474 } 475 476 return 0; 477#undef N_RELOC 478} 479 480static int 481i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, 482 struct eb_objects *eb, 483 struct drm_i915_gem_relocation_entry *relocs) 484{ 485 const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 486 int i, ret; 487 488 for (i = 0; i < entry->relocation_count; i++) { 489 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); 490 if (ret) 491 return ret; 492 } 493 494 return 0; 495} 496 497static int 498i915_gem_execbuffer_relocate(struct drm_device *dev, 499 struct eb_objects *eb, 500 struct list_head *objects) 501{ 502 struct drm_i915_gem_object *obj; 503 int ret, pflags; 504 505 /* Try to move as many of the relocation targets off the active list 506 * to avoid unnecessary fallbacks to the slow path, as we cannot wait 507 * for the retirement with pagefaults disabled. 508 */ 509 i915_gem_retire_requests(dev); 510 511 ret = 0; 512 pflags = vm_fault_disable_pagefaults(); 513 /* This is the fast path and we cannot handle a pagefault whilst 514 * holding the device lock lest the user pass in the relocations 515 * contained within a mmaped bo. For in such a case we, the page 516 * fault handler would call i915_gem_fault() and we would try to 517 * acquire the device lock again. Obviously this is bad. 518 */ 519 520 list_for_each_entry(obj, objects, exec_list) { 521 ret = i915_gem_execbuffer_relocate_object(obj, eb); 522 if (ret) 523 break; 524 } 525 vm_fault_enable_pagefaults(pflags); 526 527 return ret; 528} 529 530#define __EXEC_OBJECT_HAS_FENCE (1<<31) 531 532static int 533need_reloc_mappable(struct drm_i915_gem_object *obj) 534{ 535 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 536 return entry->relocation_count && !use_cpu_reloc(obj); 537} 538 539static int 540pin_and_fence_object(struct drm_i915_gem_object *obj, 541 struct intel_ring_buffer *ring) 542{ 543 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 544 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 545 bool need_fence, need_mappable; 546 int ret; 547 548 need_fence = 549 has_fenced_gpu_access && 550 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 551 obj->tiling_mode != I915_TILING_NONE; 552 need_mappable = need_fence || need_reloc_mappable(obj); 553 554 ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); 555 if (ret) 556 return ret; 557 558 if (has_fenced_gpu_access) { 559 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 560 ret = i915_gem_object_get_fence(obj); 561 if (ret) 562 goto err_unpin; 563 564 if (i915_gem_object_pin_fence(obj)) 565 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 566 567 obj->pending_fenced_gpu_access = true; 568 } 569 } 570 571 entry->offset = obj->gtt_offset; 572 return 0; 573 574err_unpin: 575 i915_gem_object_unpin(obj); 576 return ret; 577} 578 579static int 580i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, 581 struct drm_file *file, 582 struct list_head *objects) 583{ 584 drm_i915_private_t *dev_priv; 585 struct drm_i915_gem_object *obj; 586 struct list_head ordered_objects; 587 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 588 int ret, retry; 589 590 dev_priv = ring->dev->dev_private; 591 INIT_LIST_HEAD(&ordered_objects); 592 while (!list_empty(objects)) { 593 struct drm_i915_gem_exec_object2 *entry; 594 bool need_fence, need_mappable; 595 596 obj = list_first_entry(objects, 597 struct drm_i915_gem_object, 598 exec_list); 599 entry = obj->exec_entry; 600 601 need_fence = 602 has_fenced_gpu_access && 603 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 604 obj->tiling_mode != I915_TILING_NONE; 605 need_mappable = need_fence || need_reloc_mappable(obj); 606 607 if (need_mappable) 608 list_move(&obj->exec_list, &ordered_objects); 609 else 610 list_move_tail(&obj->exec_list, &ordered_objects); 611 612 obj->base.pending_read_domains = 0; 613 obj->base.pending_write_domain = 0; 614 } 615 list_splice(&ordered_objects, objects); 616 617 /* Attempt to pin all of the buffers into the GTT. 618 * This is done in 3 phases: 619 * 620 * 1a. Unbind all objects that do not match the GTT constraints for 621 * the execbuffer (fenceable, mappable, alignment etc). 622 * 1b. Increment pin count for already bound objects. 623 * 2. Bind new objects. 624 * 3. Decrement pin count. 625 * 626 * This avoid unnecessary unbinding of later objects in order to make 627 * room for the earlier objects *unless* we need to defragment. 628 */ 629 retry = 0; 630 do { 631 ret = 0; 632 633 /* Unbind any ill-fitting objects or pin. */ 634 list_for_each_entry(obj, objects, exec_list) { 635 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 636 bool need_fence, need_mappable; 637 638 if (!obj->gtt_space) 639 continue; 640 641 need_fence = 642 has_fenced_gpu_access && 643 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 644 obj->tiling_mode != I915_TILING_NONE; 645 need_mappable = need_fence || need_reloc_mappable(obj); 646 647 if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || 648 (need_mappable && !obj->map_and_fenceable)) 649 ret = i915_gem_object_unbind(obj); 650 else 651 ret = pin_and_fence_object(obj, ring); 652 if (ret) 653 goto err; 654 } 655 656 /* Bind fresh objects */ 657 list_for_each_entry(obj, objects, exec_list) { 658 if (obj->gtt_space) 659 continue; 660 661 ret = pin_and_fence_object(obj, ring); 662 if (ret) { 663 int ret_ignore; 664 665 /* This can potentially raise a harmless 666 * -EINVAL if we failed to bind in the above 667 * call. It cannot raise -EINTR since we know 668 * that the bo is freshly bound and so will 669 * not need to be flushed or waited upon. 670 */ 671 ret_ignore = i915_gem_object_unbind(obj); 672 (void)ret_ignore; 673 if (obj->gtt_space != NULL) 674 printf("%s: gtt_space\n", __func__); 675 break; 676 } 677 } 678 679 /* Decrement pin count for bound objects */ 680 list_for_each_entry(obj, objects, exec_list) { 681 struct drm_i915_gem_exec_object2 *entry; 682 683 if (!obj->gtt_space) 684 continue; 685 686 entry = obj->exec_entry; 687 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 688 i915_gem_object_unpin_fence(obj); 689 entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; 690 } 691 692 i915_gem_object_unpin(obj); 693 694 /* ... and ensure ppgtt mapping exist if needed. */ 695 if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { 696 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 697 obj, obj->cache_level); 698 699 obj->has_aliasing_ppgtt_mapping = 1; 700 } 701 } 702 703 if (ret != -ENOSPC || retry > 1) 704 return ret; 705 706 /* First attempt, just clear anything that is purgeable. 707 * Second attempt, clear the entire GTT. 708 */ 709 ret = i915_gem_evict_everything(ring->dev, retry == 0); 710 if (ret) 711 return ret; 712 713 retry++; 714 } while (1); 715 716err: 717 list_for_each_entry_continue_reverse(obj, objects, exec_list) { 718 struct drm_i915_gem_exec_object2 *entry; 719 720 if (!obj->gtt_space) 721 continue; 722 723 entry = obj->exec_entry; 724 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 725 i915_gem_object_unpin_fence(obj); 726 entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; 727 } 728 729 i915_gem_object_unpin(obj); 730 } 731 732 return ret; 733} 734 735static int 736i915_gem_execbuffer_relocate_slow(struct drm_device *dev, 737 struct drm_file *file, 738 struct intel_ring_buffer *ring, 739 struct list_head *objects, 740 struct eb_objects *eb, 741 struct drm_i915_gem_exec_object2 *exec, 742 int count) 743{ 744 struct drm_i915_gem_relocation_entry *reloc; 745 struct drm_i915_gem_object *obj; 746 int *reloc_offset; 747 int i, total, ret; 748 749 /* We may process another execbuffer during the unlock... */ 750 while (!list_empty(objects)) { 751 obj = list_first_entry(objects, 752 struct drm_i915_gem_object, 753 exec_list); 754 list_del_init(&obj->exec_list); 755 drm_gem_object_unreference(&obj->base); 756 } 757 758 DRM_UNLOCK(dev); 759 760 total = 0; 761 for (i = 0; i < count; i++) 762 total += exec[i].relocation_count; 763 764 reloc_offset = malloc(count * sizeof(*reloc_offset), DRM_I915_GEM, 765 M_WAITOK | M_ZERO); 766 reloc = malloc(total * sizeof(*reloc), DRM_I915_GEM, M_WAITOK | M_ZERO); 767 768 total = 0; 769 for (i = 0; i < count; i++) { 770 struct drm_i915_gem_relocation_entry *user_relocs; 771 772 user_relocs = (void *)(uintptr_t)exec[i].relocs_ptr; 773 ret = -copyin(user_relocs, reloc + total, 774 exec[i].relocation_count * sizeof(*reloc)); 775 if (ret != 0) { 776 DRM_LOCK(dev); 777 goto err; 778 } 779 780 reloc_offset[i] = total; 781 total += exec[i].relocation_count; 782 } 783 784 ret = i915_mutex_lock_interruptible(dev); 785 if (ret) { 786 DRM_LOCK(dev); 787 goto err; 788 } 789 790 /* reacquire the objects */ 791 eb_reset(eb); 792 for (i = 0; i < count; i++) { 793 struct drm_i915_gem_object *obj; 794 795 obj = to_intel_bo(drm_gem_object_lookup(dev, file, 796 exec[i].handle)); 797 if (&obj->base == NULL) { 798 DRM_DEBUG("Invalid object handle %d at index %d\n", 799 exec[i].handle, i); 800 ret = -ENOENT; 801 goto err; 802 } 803 804 list_add_tail(&obj->exec_list, objects); 805 obj->exec_handle = exec[i].handle; 806 obj->exec_entry = &exec[i]; 807 eb_add_object(eb, obj); 808 } 809 810 ret = i915_gem_execbuffer_reserve(ring, file, objects); 811 if (ret) 812 goto err; 813 814 list_for_each_entry(obj, objects, exec_list) { 815 int offset = obj->exec_entry - exec; 816 ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, 817 reloc + reloc_offset[offset]); 818 if (ret) 819 goto err; 820 } 821 822 /* Leave the user relocations as are, this is the painfully slow path, 823 * and we want to avoid the complication of dropping the lock whilst 824 * having buffers reserved in the aperture and so causing spurious 825 * ENOSPC for random operations. 826 */ 827 828err: 829 free(reloc, DRM_I915_GEM); 830 free(reloc_offset, DRM_I915_GEM); 831 return ret; 832} 833 834static int 835i915_gem_execbuffer_flush(struct drm_device *dev, 836 uint32_t invalidate_domains, 837 uint32_t flush_domains, 838 uint32_t flush_rings) 839{ 840 drm_i915_private_t *dev_priv = dev->dev_private; 841 int i, ret; 842 843 if (flush_domains & I915_GEM_DOMAIN_CPU) 844 intel_gtt_chipset_flush(); 845 846 if (flush_domains & I915_GEM_DOMAIN_GTT) 847 wmb(); 848 849 if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { 850 for (i = 0; i < I915_NUM_RINGS; i++) 851 if (flush_rings & (1 << i)) { 852 ret = i915_gem_flush_ring(&dev_priv->rings[i], 853 invalidate_domains, flush_domains); 854 if (ret) 855 return ret; 856 } 857 } 858 859 return 0; 860} 861 862static int 863i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) 864{ 865 u32 plane, flip_mask; 866 int ret; 867 868 /* Check for any pending flips. As we only maintain a flip queue depth 869 * of 1, we can simply insert a WAIT for the next display flip prior 870 * to executing the batch and avoid stalling the CPU. 871 */ 872 873 for (plane = 0; flips >> plane; plane++) { 874 if (((flips >> plane) & 1) == 0) 875 continue; 876 877 if (plane) 878 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; 879 else 880 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; 881 882 ret = intel_ring_begin(ring, 2); 883 if (ret) 884 return ret; 885 886 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); 887 intel_ring_emit(ring, MI_NOOP); 888 intel_ring_advance(ring); 889 } 890 891 return 0; 892} 893 894static int 895i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, 896 struct list_head *objects) 897{ 898 struct drm_i915_gem_object *obj; 899 struct change_domains cd; 900 int ret; 901 902 memset(&cd, 0, sizeof(cd)); 903 list_for_each_entry(obj, objects, exec_list) 904 i915_gem_object_set_to_gpu_domain(obj, ring, &cd); 905 906 if (cd.invalidate_domains | cd.flush_domains) { 907#if WATCH_EXEC 908 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 909 __func__, 910 cd.invalidate_domains, 911 cd.flush_domains); 912#endif 913 ret = i915_gem_execbuffer_flush(ring->dev, 914 cd.invalidate_domains, 915 cd.flush_domains, 916 cd.flush_rings); 917 if (ret) 918 return ret; 919 } 920 921 if (cd.flips) { 922 ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips); 923 if (ret) 924 return ret; 925 } 926 927 list_for_each_entry(obj, objects, exec_list) { 928 ret = i915_gem_object_sync(obj, ring); 929 if (ret) 930 return ret; 931 } 932 933 return 0; 934} 935 936static bool 937i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 938{ 939 return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; 940} 941 942static int 943validate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count, 944 vm_page_t ***map, int **maplen) 945{ 946 vm_page_t *ma; 947 int i, length, page_count; 948 949 /* XXXKIB various limits checking is missing there */ 950 *map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO); 951 *maplen = malloc(count * sizeof(*maplen), DRM_I915_GEM, M_WAITOK | 952 M_ZERO); 953 for (i = 0; i < count; i++) { 954 /* First check for malicious input causing overflow */ 955 if (exec[i].relocation_count > 956 INT_MAX / sizeof(struct drm_i915_gem_relocation_entry)) 957 return -EINVAL; 958 959 length = exec[i].relocation_count * 960 sizeof(struct drm_i915_gem_relocation_entry); 961 if (length == 0) { 962 (*map)[i] = NULL; 963 continue; 964 } 965 /* 966 * Since both start and end of the relocation region 967 * may be not aligned on the page boundary, be 968 * conservative and request a page slot for each 969 * partial page. Thus +2. 970 */ 971 page_count = howmany(length, PAGE_SIZE) + 2; 972 ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t), 973 DRM_I915_GEM, M_WAITOK | M_ZERO); 974 (*maplen)[i] = vm_fault_quick_hold_pages( 975 &curproc->p_vmspace->vm_map, exec[i].relocs_ptr, length, 976 VM_PROT_READ | VM_PROT_WRITE, ma, page_count); 977 if ((*maplen)[i] == -1) { 978 free(ma, DRM_I915_GEM); 979 (*map)[i] = NULL; 980 return (-EFAULT); 981 } 982 } 983 984 return 0; 985} 986 987static void 988i915_gem_execbuffer_move_to_active(struct list_head *objects, 989 struct intel_ring_buffer *ring, 990 u32 seqno) 991{ 992 struct drm_i915_gem_object *obj; 993 uint32_t old_read, old_write; 994 995 list_for_each_entry(obj, objects, exec_list) { 996 old_read = obj->base.read_domains; 997 old_write = obj->base.write_domain; 998 999 obj->base.read_domains = obj->base.pending_read_domains; 1000 obj->base.write_domain = obj->base.pending_write_domain; 1001 obj->fenced_gpu_access = obj->pending_fenced_gpu_access; 1002 1003 i915_gem_object_move_to_active(obj, ring, seqno); 1004 if (obj->base.write_domain) { 1005 obj->dirty = 1; 1006 obj->pending_gpu_write = true; 1007 list_move_tail(&obj->gpu_write_list, 1008 &ring->gpu_write_list); 1009 if (obj->pin_count) /* check for potential scanout */ 1010 intel_mark_busy(ring->dev, obj); 1011 } 1012 CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x", 1013 obj, old_read, old_write); 1014 } 1015 1016 intel_mark_busy(ring->dev, NULL); 1017} 1018 1019int i915_gem_sync_exec_requests; 1020 1021static void 1022i915_gem_execbuffer_retire_commands(struct drm_device *dev, 1023 struct drm_file *file, 1024 struct intel_ring_buffer *ring) 1025{ 1026 struct drm_i915_gem_request *request; 1027 u32 invalidate; 1028 1029 /* 1030 * Ensure that the commands in the batch buffer are 1031 * finished before the interrupt fires. 1032 * 1033 * The sampler always gets flushed on i965 (sigh). 1034 */ 1035 invalidate = I915_GEM_DOMAIN_COMMAND; 1036 if (INTEL_INFO(dev)->gen >= 4) 1037 invalidate |= I915_GEM_DOMAIN_SAMPLER; 1038 if (ring->flush(ring, invalidate, 0)) { 1039 i915_gem_next_request_seqno(ring); 1040 return; 1041 } 1042 1043 /* Add a breadcrumb for the completion of the batch buffer */ 1044 request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO); 1045 if (request == NULL || i915_add_request(ring, file, request)) { 1046 i915_gem_next_request_seqno(ring); 1047 free(request, DRM_I915_GEM); 1048 } else if (i915_gem_sync_exec_requests) { 1049 i915_wait_request(ring, request->seqno); 1050 i915_gem_retire_requests(dev); 1051 } 1052} 1053 1054static void 1055i915_gem_fix_mi_batchbuffer_end(struct drm_i915_gem_object *batch_obj, 1056 uint32_t batch_start_offset, uint32_t batch_len) 1057{ 1058 char *mkva; 1059 uint64_t po_r, po_w; 1060 uint32_t cmd; 1061 1062 po_r = batch_obj->base.dev->agp->base + batch_obj->gtt_offset + 1063 batch_start_offset + batch_len; 1064 if (batch_len > 0) 1065 po_r -= 4; 1066 mkva = pmap_mapdev_attr(trunc_page(po_r), 2 * PAGE_SIZE, 1067 PAT_WRITE_COMBINING); 1068 po_r &= PAGE_MASK; 1069 cmd = *(uint32_t *)(mkva + po_r); 1070 1071 if (cmd != MI_BATCH_BUFFER_END) { 1072 /* 1073 * batch_len != 0 due to the check at the start of 1074 * i915_gem_do_execbuffer 1075 */ 1076 if (batch_obj->base.size > batch_start_offset + batch_len) { 1077 po_w = po_r + 4; 1078/* DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END !\n"); */ 1079 } else { 1080 po_w = po_r; 1081DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END, overwriting last bo cmd !\n"); 1082 } 1083 *(uint32_t *)(mkva + po_w) = MI_BATCH_BUFFER_END; 1084 } 1085 1086 pmap_unmapdev((vm_offset_t)mkva, 2 * PAGE_SIZE); 1087} 1088 1089int i915_fix_mi_batchbuffer_end = 0; 1090 1091 static int 1092i915_reset_gen7_sol_offsets(struct drm_device *dev, 1093 struct intel_ring_buffer *ring) 1094{ 1095 drm_i915_private_t *dev_priv = dev->dev_private; 1096 int ret, i; 1097 1098 if (!IS_GEN7(dev) || ring != &dev_priv->rings[RCS]) 1099 return 0; 1100 1101 ret = intel_ring_begin(ring, 4 * 3); 1102 if (ret) 1103 return ret; 1104 1105 for (i = 0; i < 4; i++) { 1106 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1107 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); 1108 intel_ring_emit(ring, 0); 1109 } 1110 1111 intel_ring_advance(ring); 1112 1113 return 0; 1114} 1115 1116static int 1117i915_gem_do_execbuffer(struct drm_device *dev, void *data, 1118 struct drm_file *file, 1119 struct drm_i915_gem_execbuffer2 *args, 1120 struct drm_i915_gem_exec_object2 *exec) 1121{ 1122 drm_i915_private_t *dev_priv = dev->dev_private; 1123 struct list_head objects; 1124 struct eb_objects *eb; 1125 struct drm_i915_gem_object *batch_obj; 1126 struct drm_clip_rect *cliprects = NULL; 1127 struct intel_ring_buffer *ring; 1128 vm_page_t **relocs_ma; 1129 int *relocs_len; 1130 u32 ctx_id = i915_execbuffer2_get_context_id(*args); 1131 u32 exec_start, exec_len; 1132 u32 seqno; 1133 u32 mask; 1134 int ret, mode, i; 1135 1136 if (!i915_gem_check_execbuffer(args)) { 1137 DRM_DEBUG("execbuf with invalid offset/length\n"); 1138 return -EINVAL; 1139 } 1140 1141 if (args->batch_len == 0) 1142 return (0); 1143 1144 ret = validate_exec_list(exec, args->buffer_count, &relocs_ma, 1145 &relocs_len); 1146 if (ret != 0) 1147 goto pre_struct_lock_err; 1148 1149 switch (args->flags & I915_EXEC_RING_MASK) { 1150 case I915_EXEC_DEFAULT: 1151 case I915_EXEC_RENDER: 1152 ring = &dev_priv->rings[RCS]; 1153 break; 1154 case I915_EXEC_BSD: 1155 ring = &dev_priv->rings[VCS]; 1156 if (ctx_id != 0) { 1157 DRM_DEBUG("Ring %s doesn't support contexts\n", 1158 ring->name); 1159 ret = -EPERM; 1160 goto pre_struct_lock_err; 1161 } 1162 break; 1163 case I915_EXEC_BLT: 1164 ring = &dev_priv->rings[BCS]; 1165 if (ctx_id != 0) { 1166 DRM_DEBUG("Ring %s doesn't support contexts\n", 1167 ring->name); 1168 ret = -EPERM; 1169 goto pre_struct_lock_err; 1170 } 1171 break; 1172 default: 1173 DRM_DEBUG("execbuf with unknown ring: %d\n", 1174 (int)(args->flags & I915_EXEC_RING_MASK)); 1175 ret = -EINVAL; 1176 goto pre_struct_lock_err; 1177 } 1178 if (!intel_ring_initialized(ring)) { 1179 DRM_DEBUG("execbuf with invalid ring: %d\n", 1180 (int)(args->flags & I915_EXEC_RING_MASK)); 1181 ret = -EINVAL; 1182 goto pre_struct_lock_err; 1183 } 1184 1185 mode = args->flags & I915_EXEC_CONSTANTS_MASK; 1186 mask = I915_EXEC_CONSTANTS_MASK; 1187 switch (mode) { 1188 case I915_EXEC_CONSTANTS_REL_GENERAL: 1189 case I915_EXEC_CONSTANTS_ABSOLUTE: 1190 case I915_EXEC_CONSTANTS_REL_SURFACE: 1191 if (ring == &dev_priv->rings[RCS] && 1192 mode != dev_priv->relative_constants_mode) { 1193 if (INTEL_INFO(dev)->gen < 4) { 1194 ret = -EINVAL; 1195 goto pre_struct_lock_err; 1196 } 1197 1198 if (INTEL_INFO(dev)->gen > 5 && 1199 mode == I915_EXEC_CONSTANTS_REL_SURFACE) { 1200 ret = -EINVAL; 1201 goto pre_struct_lock_err; 1202 } 1203 1204 /* The HW changed the meaning on this bit on gen6 */ 1205 if (INTEL_INFO(dev)->gen >= 6) 1206 mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; 1207 } 1208 break; 1209 default: 1210 DRM_DEBUG("execbuf with unknown constants: %d\n", mode); 1211 ret = -EINVAL; 1212 goto pre_struct_lock_err; 1213 } 1214 1215 if (args->buffer_count < 1) { 1216 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1217 ret = -EINVAL; 1218 goto pre_struct_lock_err; 1219 } 1220 1221 if (args->num_cliprects != 0) { 1222 if (ring != &dev_priv->rings[RCS]) { 1223 DRM_DEBUG("clip rectangles are only valid with the render ring\n"); 1224 ret = -EINVAL; 1225 goto pre_struct_lock_err; 1226 } 1227 1228 if (INTEL_INFO(dev)->gen >= 5) { 1229 DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); 1230 ret = -EINVAL; 1231 goto pre_struct_lock_err; 1232 } 1233 1234 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { 1235 DRM_DEBUG("execbuf with %u cliprects\n", 1236 args->num_cliprects); 1237 ret = -EINVAL; 1238 goto pre_struct_lock_err; 1239 } 1240 cliprects = malloc( sizeof(*cliprects) * args->num_cliprects, 1241 DRM_I915_GEM, M_WAITOK | M_ZERO); 1242 ret = -copyin((void *)(uintptr_t)args->cliprects_ptr, cliprects, 1243 sizeof(*cliprects) * args->num_cliprects); 1244 if (ret != 0) 1245 goto pre_struct_lock_err; 1246 } 1247 1248 ret = i915_mutex_lock_interruptible(dev); 1249 if (ret) 1250 goto pre_struct_lock_err; 1251 1252 if (dev_priv->mm.suspended) { 1253 DRM_UNLOCK(dev); 1254 ret = -EBUSY; 1255 goto pre_struct_lock_err; 1256 } 1257 1258 eb = eb_create(args->buffer_count); 1259 if (eb == NULL) { 1260 DRM_UNLOCK(dev); 1261 ret = -ENOMEM; 1262 goto pre_struct_lock_err; 1263 } 1264 1265 /* Look up object handles */ 1266 INIT_LIST_HEAD(&objects); 1267 for (i = 0; i < args->buffer_count; i++) { 1268 struct drm_i915_gem_object *obj; 1269 1270 obj = to_intel_bo(drm_gem_object_lookup(dev, file, 1271 exec[i].handle)); 1272 if (&obj->base == NULL) { 1273 DRM_DEBUG("Invalid object handle %d at index %d\n", 1274 exec[i].handle, i); 1275 /* prevent error path from reading uninitialized data */ 1276 ret = -ENOENT; 1277 goto err; 1278 } 1279 1280 if (!list_empty(&obj->exec_list)) { 1281 DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", 1282 obj, exec[i].handle, i); 1283 ret = -EINVAL; 1284 goto err; 1285 } 1286 1287 list_add_tail(&obj->exec_list, &objects); 1288 obj->exec_handle = exec[i].handle; 1289 obj->exec_entry = &exec[i]; 1290 eb_add_object(eb, obj); 1291 } 1292 1293 /* take note of the batch buffer before we might reorder the lists */ 1294 batch_obj = list_entry(objects.prev, 1295 struct drm_i915_gem_object, 1296 exec_list); 1297 1298 /* Move the objects en-masse into the GTT, evicting if necessary. */ 1299 ret = i915_gem_execbuffer_reserve(ring, file, &objects); 1300 if (ret) 1301 goto err; 1302 1303 /* The objects are in their final locations, apply the relocations. */ 1304 ret = i915_gem_execbuffer_relocate(dev, eb, &objects); 1305 if (ret) { 1306 if (ret == -EFAULT) { 1307 ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, 1308 &objects, eb, 1309 exec, 1310 args->buffer_count); 1311 DRM_LOCK_ASSERT(dev); 1312 } 1313 if (ret) 1314 goto err; 1315 } 1316 1317 /* Set the pending read domains for the batch buffer to COMMAND */ 1318 if (batch_obj->base.pending_write_domain) { 1319 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1320 ret = -EINVAL; 1321 goto err; 1322 } 1323 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1324 1325 ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); 1326 if (ret) 1327 goto err; 1328 1329 ret = i915_switch_context(ring, file, ctx_id); 1330 if (ret) 1331 goto err; 1332 1333 seqno = i915_gem_next_request_seqno(ring); 1334 for (i = 0; i < I915_NUM_RINGS - 1; i++) { 1335 if (seqno < ring->sync_seqno[i]) { 1336 /* The GPU can not handle its semaphore value wrapping, 1337 * so every billion or so execbuffers, we need to stall 1338 * the GPU in order to reset the counters. 1339 */ 1340 ret = i915_gpu_idle(dev); 1341 if (ret) 1342 goto err; 1343 i915_gem_retire_requests(dev); 1344 1345 KASSERT(ring->sync_seqno[i] == 0, ("Non-zero sync_seqno")); 1346 } 1347 } 1348 1349 if (ring == &dev_priv->rings[RCS] && 1350 mode != dev_priv->relative_constants_mode) { 1351 ret = intel_ring_begin(ring, 4); 1352 if (ret) 1353 goto err; 1354 1355 intel_ring_emit(ring, MI_NOOP); 1356 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1357 intel_ring_emit(ring, INSTPM); 1358 intel_ring_emit(ring, mask << 16 | mode); 1359 intel_ring_advance(ring); 1360 1361 dev_priv->relative_constants_mode = mode; 1362 } 1363 1364 if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1365 ret = i915_reset_gen7_sol_offsets(dev, ring); 1366 if (ret) 1367 goto err; 1368 } 1369 1370 exec_start = batch_obj->gtt_offset + args->batch_start_offset; 1371 exec_len = args->batch_len; 1372 1373 if (i915_fix_mi_batchbuffer_end) { 1374 i915_gem_fix_mi_batchbuffer_end(batch_obj, 1375 args->batch_start_offset, args->batch_len); 1376 } 1377 1378 CTR4(KTR_DRM, "ring_dispatch %s %d exec %x %x", ring->name, seqno, 1379 exec_start, exec_len); 1380 1381 if (cliprects) { 1382 for (i = 0; i < args->num_cliprects; i++) { 1383 ret = i915_emit_box(dev, &cliprects[i], 1384 args->DR1, args->DR4); 1385 if (ret) 1386 goto err; 1387 1388 ret = ring->dispatch_execbuffer(ring, 1389 exec_start, exec_len); 1390 if (ret) 1391 goto err; 1392 } 1393 } else { 1394 ret = ring->dispatch_execbuffer(ring, 1395 exec_start, exec_len); 1396 if (ret) 1397 goto err; 1398 } 1399 1400 i915_gem_execbuffer_move_to_active(&objects, ring, seqno); 1401 i915_gem_execbuffer_retire_commands(dev, file, ring); 1402 1403err: 1404 eb_destroy(eb); 1405 while (!list_empty(&objects)) { 1406 struct drm_i915_gem_object *obj; 1407 1408 obj = list_first_entry(&objects, 1409 struct drm_i915_gem_object, 1410 exec_list); 1411 list_del_init(&obj->exec_list); 1412 drm_gem_object_unreference(&obj->base); 1413 } 1414 DRM_UNLOCK(dev); 1415 1416pre_struct_lock_err: 1417 for (i = 0; i < args->buffer_count; i++) { 1418 if (relocs_ma[i] != NULL) { 1419 vm_page_unhold_pages(relocs_ma[i], relocs_len[i]); 1420 free(relocs_ma[i], DRM_I915_GEM); 1421 } 1422 } 1423 free(relocs_len, DRM_I915_GEM); 1424 free(relocs_ma, DRM_I915_GEM); 1425 free(cliprects, DRM_I915_GEM); 1426 return ret; 1427} 1428 1429/* 1430 * Legacy execbuffer just creates an exec2 list from the original exec object 1431 * list array and passes it to the real function. 1432 */ 1433int 1434i915_gem_execbuffer(struct drm_device *dev, void *data, 1435 struct drm_file *file) 1436{ 1437 struct drm_i915_gem_execbuffer *args = data; 1438 struct drm_i915_gem_execbuffer2 exec2; 1439 struct drm_i915_gem_exec_object *exec_list = NULL; 1440 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1441 int ret, i; 1442 1443 DRM_DEBUG("buffers_ptr %d buffer_count %d len %08x\n", 1444 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 1445 1446 if (args->buffer_count < 1) { 1447 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1448 return -EINVAL; 1449 } 1450 1451 /* Copy in the exec list from userland */ 1452 /* XXXKIB user-controlled malloc size */ 1453 exec_list = malloc(sizeof(*exec_list) * args->buffer_count, 1454 DRM_I915_GEM, M_WAITOK); 1455 exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count, 1456 DRM_I915_GEM, M_WAITOK); 1457 ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec_list, 1458 sizeof(*exec_list) * args->buffer_count); 1459 if (ret != 0) { 1460 DRM_DEBUG("copy %d exec entries failed %d\n", 1461 args->buffer_count, ret); 1462 free(exec_list, DRM_I915_GEM); 1463 free(exec2_list, DRM_I915_GEM); 1464 return (ret); 1465 } 1466 1467 for (i = 0; i < args->buffer_count; i++) { 1468 exec2_list[i].handle = exec_list[i].handle; 1469 exec2_list[i].relocation_count = exec_list[i].relocation_count; 1470 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 1471 exec2_list[i].alignment = exec_list[i].alignment; 1472 exec2_list[i].offset = exec_list[i].offset; 1473 if (INTEL_INFO(dev)->gen < 4) 1474 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 1475 else 1476 exec2_list[i].flags = 0; 1477 } 1478 1479 exec2.buffers_ptr = args->buffers_ptr; 1480 exec2.buffer_count = args->buffer_count; 1481 exec2.batch_start_offset = args->batch_start_offset; 1482 exec2.batch_len = args->batch_len; 1483 exec2.DR1 = args->DR1; 1484 exec2.DR4 = args->DR4; 1485 exec2.num_cliprects = args->num_cliprects; 1486 exec2.cliprects_ptr = args->cliprects_ptr; 1487 exec2.flags = I915_EXEC_RENDER; 1488 i915_execbuffer2_set_context_id(exec2, 0); 1489 1490 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); 1491 if (!ret) { 1492 /* Copy the new buffer offsets back to the user's exec list. */ 1493 for (i = 0; i < args->buffer_count; i++) 1494 exec_list[i].offset = exec2_list[i].offset; 1495 /* ... and back out to userspace */ 1496 ret = -copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr, 1497 sizeof(*exec_list) * args->buffer_count); 1498 if (ret != 0) { 1499 DRM_DEBUG("failed to copy %d exec entries " 1500 "back to user (%d)\n", 1501 args->buffer_count, ret); 1502 } 1503 } 1504 1505 free(exec_list, DRM_I915_GEM); 1506 free(exec2_list, DRM_I915_GEM); 1507 return ret; 1508} 1509 1510int 1511i915_gem_execbuffer2(struct drm_device *dev, void *data, 1512 struct drm_file *file) 1513{ 1514 struct drm_i915_gem_execbuffer2 *args = data; 1515 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1516 int ret; 1517 1518 DRM_DEBUG("buffers_ptr %jx buffer_count %d len %08x\n", 1519 (uintmax_t)args->buffers_ptr, args->buffer_count, args->batch_len); 1520 1521 if (args->buffer_count < 1 || 1522 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { 1523 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 1524 return -EINVAL; 1525 } 1526 1527 /* XXXKIB user-controllable malloc size */ 1528 exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count, 1529 DRM_I915_GEM, M_WAITOK); 1530 ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec2_list, 1531 sizeof(*exec2_list) * args->buffer_count); 1532 if (ret != 0) { 1533 DRM_DEBUG("copy %d exec entries failed %d\n", 1534 args->buffer_count, ret); 1535 free(exec2_list, DRM_I915_GEM); 1536 return -EFAULT; 1537 } 1538 1539 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1540 if (!ret) { 1541 /* Copy the new buffer offsets back to the user's exec list. */ 1542 ret = -copyout(exec2_list, (void *)(uintptr_t)args->buffers_ptr, 1543 sizeof(*exec2_list) * args->buffer_count); 1544 if (ret) { 1545 DRM_DEBUG("failed to copy %d exec entries " 1546 "back to user (%d)\n", 1547 args->buffer_count, ret); 1548 } 1549 } 1550 1551 free(exec2_list, DRM_I915_GEM); 1552 return ret; 1553} 1554