i915_gem_execbuffer.c revision 255013
1/* 2 * Copyright �� 2008,2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Chris Wilson <chris@chris-wilson.co.uk> 26 * 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/dev/drm2/i915/i915_gem_execbuffer.c 255013 2013-08-28 23:59:38Z jkim $"); 31 32#include <dev/drm2/drmP.h> 33#include <dev/drm2/drm.h> 34#include <dev/drm2/i915/i915_drm.h> 35#include <dev/drm2/i915/i915_drv.h> 36#include <dev/drm2/i915/intel_drv.h> 37#include <sys/limits.h> 38#include <sys/sf_buf.h> 39 40struct change_domains { 41 uint32_t invalidate_domains; 42 uint32_t flush_domains; 43 uint32_t flush_rings; 44 uint32_t flips; 45}; 46 47/* 48 * Set the next domain for the specified object. This 49 * may not actually perform the necessary flushing/invaliding though, 50 * as that may want to be batched with other set_domain operations 51 * 52 * This is (we hope) the only really tricky part of gem. The goal 53 * is fairly simple -- track which caches hold bits of the object 54 * and make sure they remain coherent. A few concrete examples may 55 * help to explain how it works. For shorthand, we use the notation 56 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 57 * a pair of read and write domain masks. 58 * 59 * Case 1: the batch buffer 60 * 61 * 1. Allocated 62 * 2. Written by CPU 63 * 3. Mapped to GTT 64 * 4. Read by GPU 65 * 5. Unmapped from GTT 66 * 6. Freed 67 * 68 * Let's take these a step at a time 69 * 70 * 1. Allocated 71 * Pages allocated from the kernel may still have 72 * cache contents, so we set them to (CPU, CPU) always. 73 * 2. Written by CPU (using pwrite) 74 * The pwrite function calls set_domain (CPU, CPU) and 75 * this function does nothing (as nothing changes) 76 * 3. Mapped by GTT 77 * This function asserts that the object is not 78 * currently in any GPU-based read or write domains 79 * 4. Read by GPU 80 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 81 * As write_domain is zero, this function adds in the 82 * current read domains (CPU+COMMAND, 0). 83 * flush_domains is set to CPU. 84 * invalidate_domains is set to COMMAND 85 * clflush is run to get data out of the CPU caches 86 * then i915_dev_set_domain calls i915_gem_flush to 87 * emit an MI_FLUSH and drm_agp_chipset_flush 88 * 5. Unmapped from GTT 89 * i915_gem_object_unbind calls set_domain (CPU, CPU) 90 * flush_domains and invalidate_domains end up both zero 91 * so no flushing/invalidating happens 92 * 6. Freed 93 * yay, done 94 * 95 * Case 2: The shared render buffer 96 * 97 * 1. Allocated 98 * 2. Mapped to GTT 99 * 3. Read/written by GPU 100 * 4. set_domain to (CPU,CPU) 101 * 5. Read/written by CPU 102 * 6. Read/written by GPU 103 * 104 * 1. Allocated 105 * Same as last example, (CPU, CPU) 106 * 2. Mapped to GTT 107 * Nothing changes (assertions find that it is not in the GPU) 108 * 3. Read/written by GPU 109 * execbuffer calls set_domain (RENDER, RENDER) 110 * flush_domains gets CPU 111 * invalidate_domains gets GPU 112 * clflush (obj) 113 * MI_FLUSH and drm_agp_chipset_flush 114 * 4. set_domain (CPU, CPU) 115 * flush_domains gets GPU 116 * invalidate_domains gets CPU 117 * wait_rendering (obj) to make sure all drawing is complete. 118 * This will include an MI_FLUSH to get the data from GPU 119 * to memory 120 * clflush (obj) to invalidate the CPU cache 121 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 122 * 5. Read/written by CPU 123 * cache lines are loaded and dirtied 124 * 6. Read written by GPU 125 * Same as last GPU access 126 * 127 * Case 3: The constant buffer 128 * 129 * 1. Allocated 130 * 2. Written by CPU 131 * 3. Read by GPU 132 * 4. Updated (written) by CPU again 133 * 5. Read by GPU 134 * 135 * 1. Allocated 136 * (CPU, CPU) 137 * 2. Written by CPU 138 * (CPU, CPU) 139 * 3. Read by GPU 140 * (CPU+RENDER, 0) 141 * flush_domains = CPU 142 * invalidate_domains = RENDER 143 * clflush (obj) 144 * MI_FLUSH 145 * drm_agp_chipset_flush 146 * 4. Updated (written) by CPU again 147 * (CPU, CPU) 148 * flush_domains = 0 (no previous write domain) 149 * invalidate_domains = 0 (no new read domains) 150 * 5. Read by GPU 151 * (CPU+RENDER, 0) 152 * flush_domains = CPU 153 * invalidate_domains = RENDER 154 * clflush (obj) 155 * MI_FLUSH 156 * drm_agp_chipset_flush 157 */ 158static void 159i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, 160 struct intel_ring_buffer *ring, 161 struct change_domains *cd) 162{ 163 uint32_t invalidate_domains = 0, flush_domains = 0; 164 165 /* 166 * If the object isn't moving to a new write domain, 167 * let the object stay in multiple read domains 168 */ 169 if (obj->base.pending_write_domain == 0) 170 obj->base.pending_read_domains |= obj->base.read_domains; 171 172 /* 173 * Flush the current write domain if 174 * the new read domains don't match. Invalidate 175 * any read domains which differ from the old 176 * write domain 177 */ 178 if (obj->base.write_domain && 179 (((obj->base.write_domain != obj->base.pending_read_domains || 180 obj->ring != ring)) || 181 (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) { 182 flush_domains |= obj->base.write_domain; 183 invalidate_domains |= 184 obj->base.pending_read_domains & ~obj->base.write_domain; 185 } 186 /* 187 * Invalidate any read caches which may have 188 * stale data. That is, any new read domains. 189 */ 190 invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains; 191 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) 192 i915_gem_clflush_object(obj); 193 194 if (obj->base.pending_write_domain) 195 cd->flips |= atomic_load_acq_int(&obj->pending_flip); 196 197 /* The actual obj->write_domain will be updated with 198 * pending_write_domain after we emit the accumulated flush for all 199 * of our domain changes in execbuffers (which clears objects' 200 * write_domains). So if we have a current write domain that we 201 * aren't changing, set pending_write_domain to that. 202 */ 203 if (flush_domains == 0 && obj->base.pending_write_domain == 0) 204 obj->base.pending_write_domain = obj->base.write_domain; 205 206 cd->invalidate_domains |= invalidate_domains; 207 cd->flush_domains |= flush_domains; 208 if (flush_domains & I915_GEM_GPU_DOMAINS) 209 cd->flush_rings |= intel_ring_flag(obj->ring); 210 if (invalidate_domains & I915_GEM_GPU_DOMAINS) 211 cd->flush_rings |= intel_ring_flag(ring); 212} 213 214struct eb_objects { 215 u_long hashmask; 216 LIST_HEAD(, drm_i915_gem_object) *buckets; 217}; 218 219static struct eb_objects * 220eb_create(int size) 221{ 222 struct eb_objects *eb; 223 224 eb = malloc(sizeof(*eb), DRM_I915_GEM, M_WAITOK | M_ZERO); 225 eb->buckets = hashinit(size, DRM_I915_GEM, &eb->hashmask); 226 return (eb); 227} 228 229static void 230eb_reset(struct eb_objects *eb) 231{ 232 int i; 233 234 for (i = 0; i <= eb->hashmask; i++) 235 LIST_INIT(&eb->buckets[i]); 236} 237 238static void 239eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) 240{ 241 242 LIST_INSERT_HEAD(&eb->buckets[obj->exec_handle & eb->hashmask], 243 obj, exec_node); 244} 245 246static struct drm_i915_gem_object * 247eb_get_object(struct eb_objects *eb, unsigned long handle) 248{ 249 struct drm_i915_gem_object *obj; 250 251 LIST_FOREACH(obj, &eb->buckets[handle & eb->hashmask], exec_node) { 252 if (obj->exec_handle == handle) 253 return (obj); 254 } 255 return (NULL); 256} 257 258static void 259eb_destroy(struct eb_objects *eb) 260{ 261 262 free(eb->buckets, DRM_I915_GEM); 263 free(eb, DRM_I915_GEM); 264} 265 266static int 267i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 268 struct eb_objects *eb, 269 struct drm_i915_gem_relocation_entry *reloc) 270{ 271 struct drm_device *dev = obj->base.dev; 272 struct drm_gem_object *target_obj; 273 uint32_t target_offset; 274 int ret = -EINVAL; 275 276 /* we've already hold a reference to all valid objects */ 277 target_obj = &eb_get_object(eb, reloc->target_handle)->base; 278 if (unlikely(target_obj == NULL)) 279 return -ENOENT; 280 281 target_offset = to_intel_bo(target_obj)->gtt_offset; 282 283#if WATCH_RELOC 284 DRM_INFO("%s: obj %p offset %08x target %d " 285 "read %08x write %08x gtt %08x " 286 "presumed %08x delta %08x\n", 287 __func__, 288 obj, 289 (int) reloc->offset, 290 (int) reloc->target_handle, 291 (int) reloc->read_domains, 292 (int) reloc->write_domain, 293 (int) target_offset, 294 (int) reloc->presumed_offset, 295 reloc->delta); 296#endif 297 298 /* The target buffer should have appeared before us in the 299 * exec_object list, so it should have a GTT space bound by now. 300 */ 301 if (unlikely(target_offset == 0)) { 302 DRM_DEBUG("No GTT space found for object %d\n", 303 reloc->target_handle); 304 return ret; 305 } 306 307 /* Validate that the target is in a valid r/w GPU domain */ 308 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 309 DRM_DEBUG("reloc with multiple write domains: " 310 "obj %p target %d offset %d " 311 "read %08x write %08x", 312 obj, reloc->target_handle, 313 (int) reloc->offset, 314 reloc->read_domains, 315 reloc->write_domain); 316 return ret; 317 } 318 if (unlikely((reloc->write_domain | reloc->read_domains) 319 & ~I915_GEM_GPU_DOMAINS)) { 320 DRM_DEBUG("reloc with read/write non-GPU domains: " 321 "obj %p target %d offset %d " 322 "read %08x write %08x", 323 obj, reloc->target_handle, 324 (int) reloc->offset, 325 reloc->read_domains, 326 reloc->write_domain); 327 return ret; 328 } 329 if (unlikely(reloc->write_domain && target_obj->pending_write_domain && 330 reloc->write_domain != target_obj->pending_write_domain)) { 331 DRM_DEBUG("Write domain conflict: " 332 "obj %p target %d offset %d " 333 "new %08x old %08x\n", 334 obj, reloc->target_handle, 335 (int) reloc->offset, 336 reloc->write_domain, 337 target_obj->pending_write_domain); 338 return ret; 339 } 340 341 target_obj->pending_read_domains |= reloc->read_domains; 342 target_obj->pending_write_domain |= reloc->write_domain; 343 344 /* If the relocation already has the right value in it, no 345 * more work needs to be done. 346 */ 347 if (target_offset == reloc->presumed_offset) 348 return 0; 349 350 /* Check that the relocation address is valid... */ 351 if (unlikely(reloc->offset > obj->base.size - 4)) { 352 DRM_DEBUG("Relocation beyond object bounds: " 353 "obj %p target %d offset %d size %d.\n", 354 obj, reloc->target_handle, 355 (int) reloc->offset, 356 (int) obj->base.size); 357 return ret; 358 } 359 if (unlikely(reloc->offset & 3)) { 360 DRM_DEBUG("Relocation not 4-byte aligned: " 361 "obj %p target %d offset %d.\n", 362 obj, reloc->target_handle, 363 (int) reloc->offset); 364 return ret; 365 } 366 367 reloc->delta += target_offset; 368 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) { 369 uint32_t page_offset = reloc->offset & PAGE_MASK; 370 char *vaddr; 371 struct sf_buf *sf; 372 373 sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)], 374 SFB_NOWAIT); 375 if (sf == NULL) 376 return (-ENOMEM); 377 vaddr = (void *)sf_buf_kva(sf); 378 *(uint32_t *)(vaddr + page_offset) = reloc->delta; 379 sf_buf_free(sf); 380 } else { 381 uint32_t *reloc_entry; 382 char *reloc_page; 383 384 /* We can't wait for rendering with pagefaults disabled */ 385 if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0) 386 return (-EFAULT); 387 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 388 if (ret) 389 return ret; 390 391 /* 392 * Map the page containing the relocation we're going 393 * to perform. 394 */ 395 reloc->offset += obj->gtt_offset; 396 reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset & 397 ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING); 398 reloc_entry = (uint32_t *)(reloc_page + (reloc->offset & 399 PAGE_MASK)); 400 *(volatile uint32_t *)reloc_entry = reloc->delta; 401 pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE); 402 } 403 404 /* and update the user's relocation entry */ 405 reloc->presumed_offset = target_offset; 406 407 return 0; 408} 409 410static int 411i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, 412 struct eb_objects *eb) 413{ 414 struct drm_i915_gem_relocation_entry *user_relocs; 415 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 416 struct drm_i915_gem_relocation_entry reloc; 417 int i, ret; 418 419 user_relocs = (void *)(uintptr_t)entry->relocs_ptr; 420 for (i = 0; i < entry->relocation_count; i++) { 421 ret = -copyin_nofault(user_relocs + i, &reloc, sizeof(reloc)); 422 if (ret != 0) 423 return (ret); 424 425 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc); 426 if (ret != 0) 427 return (ret); 428 429 ret = -copyout_nofault(&reloc.presumed_offset, 430 &user_relocs[i].presumed_offset, 431 sizeof(reloc.presumed_offset)); 432 if (ret != 0) 433 return (ret); 434 } 435 436 return (0); 437} 438 439static int 440i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, 441 struct eb_objects *eb, struct drm_i915_gem_relocation_entry *relocs) 442{ 443 const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 444 int i, ret; 445 446 for (i = 0; i < entry->relocation_count; i++) { 447 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); 448 if (ret) 449 return ret; 450 } 451 452 return 0; 453} 454 455static int 456i915_gem_execbuffer_relocate(struct drm_device *dev, 457 struct eb_objects *eb, 458 struct list_head *objects) 459{ 460 struct drm_i915_gem_object *obj; 461 int ret, pflags; 462 463 /* Try to move as many of the relocation targets off the active list 464 * to avoid unnecessary fallbacks to the slow path, as we cannot wait 465 * for the retirement with pagefaults disabled. 466 */ 467 i915_gem_retire_requests(dev); 468 469 ret = 0; 470 pflags = vm_fault_disable_pagefaults(); 471 /* This is the fast path and we cannot handle a pagefault whilst 472 * holding the device lock lest the user pass in the relocations 473 * contained within a mmaped bo. For in such a case we, the page 474 * fault handler would call i915_gem_fault() and we would try to 475 * acquire the device lock again. Obviously this is bad. 476 */ 477 478 list_for_each_entry(obj, objects, exec_list) { 479 ret = i915_gem_execbuffer_relocate_object(obj, eb); 480 if (ret != 0) 481 break; 482 } 483 vm_fault_enable_pagefaults(pflags); 484 return (ret); 485} 486 487#define __EXEC_OBJECT_HAS_FENCE (1<<31) 488 489static int 490pin_and_fence_object(struct drm_i915_gem_object *obj, 491 struct intel_ring_buffer *ring) 492{ 493 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 494 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 495 bool need_fence, need_mappable; 496 int ret; 497 498 need_fence = 499 has_fenced_gpu_access && 500 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 501 obj->tiling_mode != I915_TILING_NONE; 502 need_mappable = 503 entry->relocation_count ? true : need_fence; 504 505 ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); 506 if (ret) 507 return ret; 508 509 if (has_fenced_gpu_access) { 510 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 511 if (obj->tiling_mode) { 512 ret = i915_gem_object_get_fence(obj, ring); 513 if (ret) 514 goto err_unpin; 515 516 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 517 i915_gem_object_pin_fence(obj); 518 } else { 519 ret = i915_gem_object_put_fence(obj); 520 if (ret) 521 goto err_unpin; 522 } 523 obj->pending_fenced_gpu_access = true; 524 } 525 } 526 527 entry->offset = obj->gtt_offset; 528 return 0; 529 530err_unpin: 531 i915_gem_object_unpin(obj); 532 return ret; 533} 534 535static int 536i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, 537 struct drm_file *file, 538 struct list_head *objects) 539{ 540 drm_i915_private_t *dev_priv; 541 struct drm_i915_gem_object *obj; 542 int ret, retry; 543 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 544 struct list_head ordered_objects; 545 546 dev_priv = ring->dev->dev_private; 547 INIT_LIST_HEAD(&ordered_objects); 548 while (!list_empty(objects)) { 549 struct drm_i915_gem_exec_object2 *entry; 550 bool need_fence, need_mappable; 551 552 obj = list_first_entry(objects, 553 struct drm_i915_gem_object, 554 exec_list); 555 entry = obj->exec_entry; 556 557 need_fence = 558 has_fenced_gpu_access && 559 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 560 obj->tiling_mode != I915_TILING_NONE; 561 need_mappable = 562 entry->relocation_count ? true : need_fence; 563 564 if (need_mappable) 565 list_move(&obj->exec_list, &ordered_objects); 566 else 567 list_move_tail(&obj->exec_list, &ordered_objects); 568 569 obj->base.pending_read_domains = 0; 570 obj->base.pending_write_domain = 0; 571 } 572 list_splice(&ordered_objects, objects); 573 574 /* Attempt to pin all of the buffers into the GTT. 575 * This is done in 3 phases: 576 * 577 * 1a. Unbind all objects that do not match the GTT constraints for 578 * the execbuffer (fenceable, mappable, alignment etc). 579 * 1b. Increment pin count for already bound objects and obtain 580 * a fence register if required. 581 * 2. Bind new objects. 582 * 3. Decrement pin count. 583 * 584 * This avoid unnecessary unbinding of later objects in order to makr 585 * room for the earlier objects *unless* we need to defragment. 586 */ 587 retry = 0; 588 do { 589 ret = 0; 590 591 /* Unbind any ill-fitting objects or pin. */ 592 list_for_each_entry(obj, objects, exec_list) { 593 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 594 bool need_fence, need_mappable; 595 596 if (!obj->gtt_space) 597 continue; 598 599 need_fence = 600 has_fenced_gpu_access && 601 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 602 obj->tiling_mode != I915_TILING_NONE; 603 need_mappable = 604 entry->relocation_count ? true : need_fence; 605 606 if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || 607 (need_mappable && !obj->map_and_fenceable)) 608 ret = i915_gem_object_unbind(obj); 609 else 610 ret = pin_and_fence_object(obj, ring); 611 if (ret) 612 goto err; 613 } 614 615 /* Bind fresh objects */ 616 list_for_each_entry(obj, objects, exec_list) { 617 if (obj->gtt_space) 618 continue; 619 620 ret = pin_and_fence_object(obj, ring); 621 if (ret) { 622 int ret_ignore; 623 624 /* This can potentially raise a harmless 625 * -EINVAL if we failed to bind in the above 626 * call. It cannot raise -EINTR since we know 627 * that the bo is freshly bound and so will 628 * not need to be flushed or waited upon. 629 */ 630 ret_ignore = i915_gem_object_unbind(obj); 631 (void)ret_ignore; 632 if (obj->gtt_space != NULL) 633 printf("%s: gtt_space\n", __func__); 634 break; 635 } 636 } 637 638 /* Decrement pin count for bound objects */ 639 list_for_each_entry(obj, objects, exec_list) { 640 struct drm_i915_gem_exec_object2 *entry; 641 642 if (!obj->gtt_space) 643 continue; 644 645 entry = obj->exec_entry; 646 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 647 i915_gem_object_unpin_fence(obj); 648 entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; 649 } 650 651 i915_gem_object_unpin(obj); 652 653 /* ... and ensure ppgtt mapping exist if needed. */ 654 if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { 655 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 656 obj, obj->cache_level); 657 658 obj->has_aliasing_ppgtt_mapping = 1; 659 } 660 } 661 662 if (ret != -ENOSPC || retry > 1) 663 return ret; 664 665 /* First attempt, just clear anything that is purgeable. 666 * Second attempt, clear the entire GTT. 667 */ 668 ret = i915_gem_evict_everything(ring->dev, retry == 0); 669 if (ret) 670 return ret; 671 672 retry++; 673 } while (1); 674 675err: 676 list_for_each_entry_continue_reverse(obj, objects, exec_list) { 677 struct drm_i915_gem_exec_object2 *entry; 678 679 if (!obj->gtt_space) 680 continue; 681 682 entry = obj->exec_entry; 683 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 684 i915_gem_object_unpin_fence(obj); 685 entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; 686 } 687 688 i915_gem_object_unpin(obj); 689 } 690 691 return ret; 692} 693 694static int 695i915_gem_execbuffer_relocate_slow(struct drm_device *dev, 696 struct drm_file *file, struct intel_ring_buffer *ring, 697 struct list_head *objects, struct eb_objects *eb, 698 struct drm_i915_gem_exec_object2 *exec, int count) 699{ 700 struct drm_i915_gem_relocation_entry *reloc; 701 struct drm_i915_gem_object *obj; 702 int *reloc_offset; 703 int i, total, ret; 704 705 /* We may process another execbuffer during the unlock... */ 706 while (!list_empty(objects)) { 707 obj = list_first_entry(objects, 708 struct drm_i915_gem_object, 709 exec_list); 710 list_del_init(&obj->exec_list); 711 drm_gem_object_unreference(&obj->base); 712 } 713 714 DRM_UNLOCK(dev); 715 716 total = 0; 717 for (i = 0; i < count; i++) 718 total += exec[i].relocation_count; 719 720 reloc_offset = malloc(count * sizeof(*reloc_offset), DRM_I915_GEM, 721 M_WAITOK | M_ZERO); 722 reloc = malloc(total * sizeof(*reloc), DRM_I915_GEM, M_WAITOK | M_ZERO); 723 724 total = 0; 725 for (i = 0; i < count; i++) { 726 struct drm_i915_gem_relocation_entry *user_relocs; 727 728 user_relocs = (void *)(uintptr_t)exec[i].relocs_ptr; 729 ret = -copyin(user_relocs, reloc + total, 730 exec[i].relocation_count * sizeof(*reloc)); 731 if (ret != 0) { 732 DRM_LOCK(dev); 733 goto err; 734 } 735 736 reloc_offset[i] = total; 737 total += exec[i].relocation_count; 738 } 739 740 ret = i915_mutex_lock_interruptible(dev); 741 if (ret) { 742 DRM_LOCK(dev); 743 goto err; 744 } 745 746 /* reacquire the objects */ 747 eb_reset(eb); 748 for (i = 0; i < count; i++) { 749 struct drm_i915_gem_object *obj; 750 751 obj = to_intel_bo(drm_gem_object_lookup(dev, file, 752 exec[i].handle)); 753 if (&obj->base == NULL) { 754 DRM_DEBUG("Invalid object handle %d at index %d\n", 755 exec[i].handle, i); 756 ret = -ENOENT; 757 goto err; 758 } 759 760 list_add_tail(&obj->exec_list, objects); 761 obj->exec_handle = exec[i].handle; 762 obj->exec_entry = &exec[i]; 763 eb_add_object(eb, obj); 764 } 765 766 ret = i915_gem_execbuffer_reserve(ring, file, objects); 767 if (ret) 768 goto err; 769 770 list_for_each_entry(obj, objects, exec_list) { 771 int offset = obj->exec_entry - exec; 772 ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, 773 reloc + reloc_offset[offset]); 774 if (ret) 775 goto err; 776 } 777 778 /* Leave the user relocations as are, this is the painfully slow path, 779 * and we want to avoid the complication of dropping the lock whilst 780 * having buffers reserved in the aperture and so causing spurious 781 * ENOSPC for random operations. 782 */ 783 784err: 785 free(reloc, DRM_I915_GEM); 786 free(reloc_offset, DRM_I915_GEM); 787 return ret; 788} 789 790static int 791i915_gem_execbuffer_flush(struct drm_device *dev, 792 uint32_t invalidate_domains, 793 uint32_t flush_domains, 794 uint32_t flush_rings) 795{ 796 drm_i915_private_t *dev_priv = dev->dev_private; 797 int i, ret; 798 799 if (flush_domains & I915_GEM_DOMAIN_CPU) 800 intel_gtt_chipset_flush(); 801 802 if (flush_domains & I915_GEM_DOMAIN_GTT) 803 wmb(); 804 805 if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { 806 for (i = 0; i < I915_NUM_RINGS; i++) 807 if (flush_rings & (1 << i)) { 808 ret = i915_gem_flush_ring(&dev_priv->rings[i], 809 invalidate_domains, flush_domains); 810 if (ret) 811 return ret; 812 } 813 } 814 815 return 0; 816} 817 818static bool 819intel_enable_semaphores(struct drm_device *dev) 820{ 821 if (INTEL_INFO(dev)->gen < 6) 822 return 0; 823 824 if (i915_semaphores >= 0) 825 return i915_semaphores; 826 827 /* Enable semaphores on SNB when IO remapping is off */ 828 if (INTEL_INFO(dev)->gen == 6) 829 return !intel_iommu_enabled; 830 831 return 1; 832} 833 834static int 835i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, 836 struct intel_ring_buffer *to) 837{ 838 struct intel_ring_buffer *from = obj->ring; 839 u32 seqno; 840 int ret, idx; 841 842 if (from == NULL || to == from) 843 return 0; 844 845 /* XXX gpu semaphores are implicated in various hard hangs on SNB */ 846 if (!intel_enable_semaphores(obj->base.dev)) 847 return i915_gem_object_wait_rendering(obj); 848 849 idx = intel_ring_sync_index(from, to); 850 851 seqno = obj->last_rendering_seqno; 852 if (seqno <= from->sync_seqno[idx]) 853 return 0; 854 855 if (seqno == from->outstanding_lazy_request) { 856 struct drm_i915_gem_request *request; 857 858 request = malloc(sizeof(*request), DRM_I915_GEM, 859 M_WAITOK | M_ZERO); 860 ret = i915_add_request(from, NULL, request); 861 if (ret) { 862 free(request, DRM_I915_GEM); 863 return ret; 864 } 865 866 seqno = request->seqno; 867 } 868 869 from->sync_seqno[idx] = seqno; 870 871 return to->sync_to(to, from, seqno - 1); 872} 873 874static int 875i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) 876{ 877 u32 plane, flip_mask; 878 int ret; 879 880 /* Check for any pending flips. As we only maintain a flip queue depth 881 * of 1, we can simply insert a WAIT for the next display flip prior 882 * to executing the batch and avoid stalling the CPU. 883 */ 884 885 for (plane = 0; flips >> plane; plane++) { 886 if (((flips >> plane) & 1) == 0) 887 continue; 888 889 if (plane) 890 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; 891 else 892 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; 893 894 ret = intel_ring_begin(ring, 2); 895 if (ret) 896 return ret; 897 898 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); 899 intel_ring_emit(ring, MI_NOOP); 900 intel_ring_advance(ring); 901 } 902 903 return 0; 904} 905 906static int 907i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, 908 struct list_head *objects) 909{ 910 struct drm_i915_gem_object *obj; 911 struct change_domains cd; 912 int ret; 913 914 memset(&cd, 0, sizeof(cd)); 915 list_for_each_entry(obj, objects, exec_list) 916 i915_gem_object_set_to_gpu_domain(obj, ring, &cd); 917 918 if (cd.invalidate_domains | cd.flush_domains) { 919#if WATCH_EXEC 920 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 921 __func__, 922 cd.invalidate_domains, 923 cd.flush_domains); 924#endif 925 ret = i915_gem_execbuffer_flush(ring->dev, 926 cd.invalidate_domains, 927 cd.flush_domains, 928 cd.flush_rings); 929 if (ret) 930 return ret; 931 } 932 933 if (cd.flips) { 934 ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips); 935 if (ret) 936 return ret; 937 } 938 939 list_for_each_entry(obj, objects, exec_list) { 940 ret = i915_gem_execbuffer_sync_rings(obj, ring); 941 if (ret) 942 return ret; 943 } 944 945 return 0; 946} 947 948static bool 949i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 950{ 951 return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; 952} 953 954static int 955validate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count, 956 vm_page_t ***map) 957{ 958 vm_page_t *ma; 959 int i, length, page_count; 960 961 /* XXXKIB various limits checking is missing there */ 962 *map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO); 963 for (i = 0; i < count; i++) { 964 /* First check for malicious input causing overflow */ 965 if (exec[i].relocation_count > 966 INT_MAX / sizeof(struct drm_i915_gem_relocation_entry)) 967 return -EINVAL; 968 969 length = exec[i].relocation_count * 970 sizeof(struct drm_i915_gem_relocation_entry); 971 if (length == 0) { 972 (*map)[i] = NULL; 973 continue; 974 } 975 /* 976 * Since both start and end of the relocation region 977 * may be not aligned on the page boundary, be 978 * conservative and request a page slot for each 979 * partial page. Thus +2. 980 */ 981 page_count = howmany(length, PAGE_SIZE) + 2; 982 ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t), 983 DRM_I915_GEM, M_WAITOK | M_ZERO); 984 if (vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 985 exec[i].relocs_ptr, length, VM_PROT_READ | VM_PROT_WRITE, 986 ma, page_count) == -1) { 987 free(ma, DRM_I915_GEM); 988 (*map)[i] = NULL; 989 return (-EFAULT); 990 } 991 } 992 993 return 0; 994} 995 996static void 997i915_gem_execbuffer_move_to_active(struct list_head *objects, 998 struct intel_ring_buffer *ring, 999 u32 seqno) 1000{ 1001 struct drm_i915_gem_object *obj; 1002 uint32_t old_read, old_write; 1003 1004 list_for_each_entry(obj, objects, exec_list) { 1005 old_read = obj->base.read_domains; 1006 old_write = obj->base.write_domain; 1007 1008 obj->base.read_domains = obj->base.pending_read_domains; 1009 obj->base.write_domain = obj->base.pending_write_domain; 1010 obj->fenced_gpu_access = obj->pending_fenced_gpu_access; 1011 1012 i915_gem_object_move_to_active(obj, ring, seqno); 1013 if (obj->base.write_domain) { 1014 obj->dirty = 1; 1015 obj->pending_gpu_write = true; 1016 list_move_tail(&obj->gpu_write_list, 1017 &ring->gpu_write_list); 1018 intel_mark_busy(ring->dev, obj); 1019 } 1020 CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x", 1021 obj, old_read, old_write); 1022 } 1023} 1024 1025int i915_gem_sync_exec_requests; 1026 1027static void 1028i915_gem_execbuffer_retire_commands(struct drm_device *dev, 1029 struct drm_file *file, 1030 struct intel_ring_buffer *ring) 1031{ 1032 struct drm_i915_gem_request *request; 1033 u32 invalidate; 1034 1035 /* 1036 * Ensure that the commands in the batch buffer are 1037 * finished before the interrupt fires. 1038 * 1039 * The sampler always gets flushed on i965 (sigh). 1040 */ 1041 invalidate = I915_GEM_DOMAIN_COMMAND; 1042 if (INTEL_INFO(dev)->gen >= 4) 1043 invalidate |= I915_GEM_DOMAIN_SAMPLER; 1044 if (ring->flush(ring, invalidate, 0)) { 1045 i915_gem_next_request_seqno(ring); 1046 return; 1047 } 1048 1049 /* Add a breadcrumb for the completion of the batch buffer */ 1050 request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO); 1051 if (request == NULL || i915_add_request(ring, file, request)) { 1052 i915_gem_next_request_seqno(ring); 1053 free(request, DRM_I915_GEM); 1054 } else if (i915_gem_sync_exec_requests) 1055 i915_wait_request(ring, request->seqno, true); 1056} 1057 1058static void 1059i915_gem_fix_mi_batchbuffer_end(struct drm_i915_gem_object *batch_obj, 1060 uint32_t batch_start_offset, uint32_t batch_len) 1061{ 1062 char *mkva; 1063 uint64_t po_r, po_w; 1064 uint32_t cmd; 1065 1066 po_r = batch_obj->base.dev->agp->base + batch_obj->gtt_offset + 1067 batch_start_offset + batch_len; 1068 if (batch_len > 0) 1069 po_r -= 4; 1070 mkva = pmap_mapdev_attr(trunc_page(po_r), 2 * PAGE_SIZE, 1071 PAT_WRITE_COMBINING); 1072 po_r &= PAGE_MASK; 1073 cmd = *(uint32_t *)(mkva + po_r); 1074 1075 if (cmd != MI_BATCH_BUFFER_END) { 1076 /* 1077 * batch_len != 0 due to the check at the start of 1078 * i915_gem_do_execbuffer 1079 */ 1080 if (batch_obj->base.size > batch_start_offset + batch_len) { 1081 po_w = po_r + 4; 1082/* DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END !\n"); */ 1083 } else { 1084 po_w = po_r; 1085DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END, overwriting last bo cmd !\n"); 1086 } 1087 *(uint32_t *)(mkva + po_w) = MI_BATCH_BUFFER_END; 1088 } 1089 1090 pmap_unmapdev((vm_offset_t)mkva, 2 * PAGE_SIZE); 1091} 1092 1093int i915_fix_mi_batchbuffer_end = 0; 1094 1095 static int 1096i915_reset_gen7_sol_offsets(struct drm_device *dev, 1097 struct intel_ring_buffer *ring) 1098{ 1099 drm_i915_private_t *dev_priv = dev->dev_private; 1100 int ret, i; 1101 1102 if (!IS_GEN7(dev) || ring != &dev_priv->rings[RCS]) 1103 return 0; 1104 1105 ret = intel_ring_begin(ring, 4 * 3); 1106 if (ret) 1107 return ret; 1108 1109 for (i = 0; i < 4; i++) { 1110 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1111 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); 1112 intel_ring_emit(ring, 0); 1113 } 1114 1115 intel_ring_advance(ring); 1116 1117 return 0; 1118} 1119 1120static int 1121i915_gem_do_execbuffer(struct drm_device *dev, void *data, 1122 struct drm_file *file, 1123 struct drm_i915_gem_execbuffer2 *args, 1124 struct drm_i915_gem_exec_object2 *exec) 1125{ 1126 drm_i915_private_t *dev_priv = dev->dev_private; 1127 struct list_head objects; 1128 struct eb_objects *eb; 1129 struct drm_i915_gem_object *batch_obj; 1130 struct drm_clip_rect *cliprects = NULL; 1131 struct intel_ring_buffer *ring; 1132 vm_page_t **relocs_ma; 1133 u32 exec_start, exec_len; 1134 u32 seqno; 1135 u32 mask; 1136 int ret, mode, i; 1137 1138 if (!i915_gem_check_execbuffer(args)) { 1139 DRM_DEBUG("execbuf with invalid offset/length\n"); 1140 return -EINVAL; 1141 } 1142 1143 if (args->batch_len == 0) 1144 return (0); 1145 1146 ret = validate_exec_list(exec, args->buffer_count, &relocs_ma); 1147 if (ret != 0) 1148 goto pre_struct_lock_err; 1149 1150 switch (args->flags & I915_EXEC_RING_MASK) { 1151 case I915_EXEC_DEFAULT: 1152 case I915_EXEC_RENDER: 1153 ring = &dev_priv->rings[RCS]; 1154 break; 1155 case I915_EXEC_BSD: 1156 if (!HAS_BSD(dev)) { 1157 DRM_DEBUG("execbuf with invalid ring (BSD)\n"); 1158 return -EINVAL; 1159 } 1160 ring = &dev_priv->rings[VCS]; 1161 break; 1162 case I915_EXEC_BLT: 1163 if (!HAS_BLT(dev)) { 1164 DRM_DEBUG("execbuf with invalid ring (BLT)\n"); 1165 return -EINVAL; 1166 } 1167 ring = &dev_priv->rings[BCS]; 1168 break; 1169 default: 1170 DRM_DEBUG("execbuf with unknown ring: %d\n", 1171 (int)(args->flags & I915_EXEC_RING_MASK)); 1172 ret = -EINVAL; 1173 goto pre_struct_lock_err; 1174 } 1175 1176 mode = args->flags & I915_EXEC_CONSTANTS_MASK; 1177 mask = I915_EXEC_CONSTANTS_MASK; 1178 switch (mode) { 1179 case I915_EXEC_CONSTANTS_REL_GENERAL: 1180 case I915_EXEC_CONSTANTS_ABSOLUTE: 1181 case I915_EXEC_CONSTANTS_REL_SURFACE: 1182 if (ring == &dev_priv->rings[RCS] && 1183 mode != dev_priv->relative_constants_mode) { 1184 if (INTEL_INFO(dev)->gen < 4) { 1185 ret = -EINVAL; 1186 goto pre_struct_lock_err; 1187 } 1188 1189 if (INTEL_INFO(dev)->gen > 5 && 1190 mode == I915_EXEC_CONSTANTS_REL_SURFACE) { 1191 ret = -EINVAL; 1192 goto pre_struct_lock_err; 1193 } 1194 1195 /* The HW changed the meaning on this bit on gen6 */ 1196 if (INTEL_INFO(dev)->gen >= 6) 1197 mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; 1198 } 1199 break; 1200 default: 1201 DRM_DEBUG("execbuf with unknown constants: %d\n", mode); 1202 ret = -EINVAL; 1203 goto pre_struct_lock_err; 1204 } 1205 1206 if (args->buffer_count < 1) { 1207 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1208 ret = -EINVAL; 1209 goto pre_struct_lock_err; 1210 } 1211 1212 if (args->num_cliprects != 0) { 1213 if (ring != &dev_priv->rings[RCS]) { 1214 DRM_DEBUG("clip rectangles are only valid with the render ring\n"); 1215 ret = -EINVAL; 1216 goto pre_struct_lock_err; 1217 } 1218 1219 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { 1220 DRM_DEBUG("execbuf with %u cliprects\n", 1221 args->num_cliprects); 1222 ret = -EINVAL; 1223 goto pre_struct_lock_err; 1224 } 1225 cliprects = malloc( sizeof(*cliprects) * args->num_cliprects, 1226 DRM_I915_GEM, M_WAITOK | M_ZERO); 1227 ret = -copyin((void *)(uintptr_t)args->cliprects_ptr, cliprects, 1228 sizeof(*cliprects) * args->num_cliprects); 1229 if (ret != 0) 1230 goto pre_struct_lock_err; 1231 } 1232 1233 ret = i915_mutex_lock_interruptible(dev); 1234 if (ret) 1235 goto pre_struct_lock_err; 1236 1237 if (dev_priv->mm.suspended) { 1238 ret = -EBUSY; 1239 goto struct_lock_err; 1240 } 1241 1242 eb = eb_create(args->buffer_count); 1243 if (eb == NULL) { 1244 ret = -ENOMEM; 1245 goto struct_lock_err; 1246 } 1247 1248 /* Look up object handles */ 1249 INIT_LIST_HEAD(&objects); 1250 for (i = 0; i < args->buffer_count; i++) { 1251 struct drm_i915_gem_object *obj; 1252 obj = to_intel_bo(drm_gem_object_lookup(dev, file, 1253 exec[i].handle)); 1254 if (&obj->base == NULL) { 1255 DRM_DEBUG("Invalid object handle %d at index %d\n", 1256 exec[i].handle, i); 1257 /* prevent error path from reading uninitialized data */ 1258 ret = -ENOENT; 1259 goto err; 1260 } 1261 1262 if (!list_empty(&obj->exec_list)) { 1263 DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", 1264 obj, exec[i].handle, i); 1265 ret = -EINVAL; 1266 goto err; 1267 } 1268 1269 list_add_tail(&obj->exec_list, &objects); 1270 obj->exec_handle = exec[i].handle; 1271 obj->exec_entry = &exec[i]; 1272 eb_add_object(eb, obj); 1273 } 1274 1275 /* take note of the batch buffer before we might reorder the lists */ 1276 batch_obj = list_entry(objects.prev, 1277 struct drm_i915_gem_object, 1278 exec_list); 1279 1280 /* Move the objects en-masse into the GTT, evicting if necessary. */ 1281 ret = i915_gem_execbuffer_reserve(ring, file, &objects); 1282 if (ret) 1283 goto err; 1284 1285 /* The objects are in their final locations, apply the relocations. */ 1286 ret = i915_gem_execbuffer_relocate(dev, eb, &objects); 1287 if (ret) { 1288 if (ret == -EFAULT) { 1289 ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, 1290 &objects, eb, exec, args->buffer_count); 1291 DRM_LOCK_ASSERT(dev); 1292 } 1293 if (ret) 1294 goto err; 1295 } 1296 1297 /* Set the pending read domains for the batch buffer to COMMAND */ 1298 if (batch_obj->base.pending_write_domain) { 1299 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1300 ret = -EINVAL; 1301 goto err; 1302 } 1303 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1304 1305 ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); 1306 if (ret) 1307 goto err; 1308 1309 seqno = i915_gem_next_request_seqno(ring); 1310 for (i = 0; i < I915_NUM_RINGS - 1; i++) { 1311 if (seqno < ring->sync_seqno[i]) { 1312 /* The GPU can not handle its semaphore value wrapping, 1313 * so every billion or so execbuffers, we need to stall 1314 * the GPU in order to reset the counters. 1315 */ 1316 ret = i915_gpu_idle(dev, true); 1317 if (ret) 1318 goto err; 1319 1320 KASSERT(ring->sync_seqno[i] == 0, ("Non-zero sync_seqno")); 1321 } 1322 } 1323 1324 if (ring == &dev_priv->rings[RCS] && 1325 mode != dev_priv->relative_constants_mode) { 1326 ret = intel_ring_begin(ring, 4); 1327 if (ret) 1328 goto err; 1329 1330 intel_ring_emit(ring, MI_NOOP); 1331 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1332 intel_ring_emit(ring, INSTPM); 1333 intel_ring_emit(ring, mask << 16 | mode); 1334 intel_ring_advance(ring); 1335 1336 dev_priv->relative_constants_mode = mode; 1337 } 1338 1339 if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1340 ret = i915_reset_gen7_sol_offsets(dev, ring); 1341 if (ret) 1342 goto err; 1343 } 1344 1345 exec_start = batch_obj->gtt_offset + args->batch_start_offset; 1346 exec_len = args->batch_len; 1347 1348 if (i915_fix_mi_batchbuffer_end) { 1349 i915_gem_fix_mi_batchbuffer_end(batch_obj, 1350 args->batch_start_offset, args->batch_len); 1351 } 1352 1353 CTR4(KTR_DRM, "ring_dispatch %s %d exec %x %x", ring->name, seqno, 1354 exec_start, exec_len); 1355 1356 if (cliprects) { 1357 for (i = 0; i < args->num_cliprects; i++) { 1358 ret = i915_emit_box_p(dev, &cliprects[i], 1359 args->DR1, args->DR4); 1360 if (ret) 1361 goto err; 1362 1363 ret = ring->dispatch_execbuffer(ring, exec_start, 1364 exec_len); 1365 if (ret) 1366 goto err; 1367 } 1368 } else { 1369 ret = ring->dispatch_execbuffer(ring, exec_start, exec_len); 1370 if (ret) 1371 goto err; 1372 } 1373 1374 i915_gem_execbuffer_move_to_active(&objects, ring, seqno); 1375 i915_gem_execbuffer_retire_commands(dev, file, ring); 1376 1377err: 1378 eb_destroy(eb); 1379 while (!list_empty(&objects)) { 1380 struct drm_i915_gem_object *obj; 1381 1382 obj = list_first_entry(&objects, struct drm_i915_gem_object, 1383 exec_list); 1384 list_del_init(&obj->exec_list); 1385 drm_gem_object_unreference(&obj->base); 1386 } 1387struct_lock_err: 1388 DRM_UNLOCK(dev); 1389 1390pre_struct_lock_err: 1391 for (i = 0; i < args->buffer_count; i++) { 1392 if (relocs_ma[i] != NULL) { 1393 vm_page_unhold_pages(relocs_ma[i], howmany( 1394 exec[i].relocation_count * 1395 sizeof(struct drm_i915_gem_relocation_entry), 1396 PAGE_SIZE)); 1397 free(relocs_ma[i], DRM_I915_GEM); 1398 } 1399 } 1400 free(relocs_ma, DRM_I915_GEM); 1401 free(cliprects, DRM_I915_GEM); 1402 return ret; 1403} 1404 1405/* 1406 * Legacy execbuffer just creates an exec2 list from the original exec object 1407 * list array and passes it to the real function. 1408 */ 1409int 1410i915_gem_execbuffer(struct drm_device *dev, void *data, 1411 struct drm_file *file) 1412{ 1413 struct drm_i915_gem_execbuffer *args = data; 1414 struct drm_i915_gem_execbuffer2 exec2; 1415 struct drm_i915_gem_exec_object *exec_list = NULL; 1416 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1417 int ret, i; 1418 1419 DRM_DEBUG("buffers_ptr %d buffer_count %d len %08x\n", 1420 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 1421 1422 if (args->buffer_count < 1) { 1423 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1424 return -EINVAL; 1425 } 1426 1427 /* Copy in the exec list from userland */ 1428 /* XXXKIB user-controlled malloc size */ 1429 exec_list = malloc(sizeof(*exec_list) * args->buffer_count, 1430 DRM_I915_GEM, M_WAITOK); 1431 exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count, 1432 DRM_I915_GEM, M_WAITOK); 1433 ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec_list, 1434 sizeof(*exec_list) * args->buffer_count); 1435 if (ret != 0) { 1436 DRM_DEBUG("copy %d exec entries failed %d\n", 1437 args->buffer_count, ret); 1438 free(exec_list, DRM_I915_GEM); 1439 free(exec2_list, DRM_I915_GEM); 1440 return (ret); 1441 } 1442 1443 for (i = 0; i < args->buffer_count; i++) { 1444 exec2_list[i].handle = exec_list[i].handle; 1445 exec2_list[i].relocation_count = exec_list[i].relocation_count; 1446 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 1447 exec2_list[i].alignment = exec_list[i].alignment; 1448 exec2_list[i].offset = exec_list[i].offset; 1449 if (INTEL_INFO(dev)->gen < 4) 1450 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 1451 else 1452 exec2_list[i].flags = 0; 1453 } 1454 1455 exec2.buffers_ptr = args->buffers_ptr; 1456 exec2.buffer_count = args->buffer_count; 1457 exec2.batch_start_offset = args->batch_start_offset; 1458 exec2.batch_len = args->batch_len; 1459 exec2.DR1 = args->DR1; 1460 exec2.DR4 = args->DR4; 1461 exec2.num_cliprects = args->num_cliprects; 1462 exec2.cliprects_ptr = args->cliprects_ptr; 1463 exec2.flags = I915_EXEC_RENDER; 1464 1465 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); 1466 if (!ret) { 1467 /* Copy the new buffer offsets back to the user's exec list. */ 1468 for (i = 0; i < args->buffer_count; i++) 1469 exec_list[i].offset = exec2_list[i].offset; 1470 /* ... and back out to userspace */ 1471 ret = -copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr, 1472 sizeof(*exec_list) * args->buffer_count); 1473 if (ret != 0) { 1474 DRM_DEBUG("failed to copy %d exec entries " 1475 "back to user (%d)\n", 1476 args->buffer_count, ret); 1477 } 1478 } 1479 1480 free(exec_list, DRM_I915_GEM); 1481 free(exec2_list, DRM_I915_GEM); 1482 return ret; 1483} 1484 1485int 1486i915_gem_execbuffer2(struct drm_device *dev, void *data, 1487 struct drm_file *file) 1488{ 1489 struct drm_i915_gem_execbuffer2 *args = data; 1490 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1491 int ret; 1492 1493 DRM_DEBUG("buffers_ptr %jx buffer_count %d len %08x\n", 1494 (uintmax_t)args->buffers_ptr, args->buffer_count, args->batch_len); 1495 1496 if (args->buffer_count < 1 || 1497 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { 1498 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 1499 return -EINVAL; 1500 } 1501 1502 /* XXXKIB user-controllable malloc size */ 1503 exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count, 1504 DRM_I915_GEM, M_WAITOK); 1505 ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec2_list, 1506 sizeof(*exec2_list) * args->buffer_count); 1507 if (ret != 0) { 1508 DRM_DEBUG("copy %d exec entries failed %d\n", 1509 args->buffer_count, ret); 1510 free(exec2_list, DRM_I915_GEM); 1511 return (ret); 1512 } 1513 1514 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1515 if (!ret) { 1516 /* Copy the new buffer offsets back to the user's exec list. */ 1517 ret = -copyout(exec2_list, (void *)(uintptr_t)args->buffers_ptr, 1518 sizeof(*exec2_list) * args->buffer_count); 1519 if (ret) { 1520 DRM_DEBUG("failed to copy %d exec entries " 1521 "back to user (%d)\n", 1522 args->buffer_count, ret); 1523 } 1524 } 1525 1526 free(exec2_list, DRM_I915_GEM); 1527 return ret; 1528} 1529