1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#ifdef HAVE_CONFIG_H 38#include "config.h" 39#endif 40 41#include <xf86drm.h> 42#include <fcntl.h> 43#include <stdio.h> 44#include <stdlib.h> 45#include <string.h> 46#include <unistd.h> 47#include <assert.h> 48#include <pthread.h> 49#include <sys/ioctl.h> 50#include <sys/mman.h> 51#include <sys/stat.h> 52#include <sys/types.h> 53 54#include "errno.h" 55#include "libdrm_lists.h" 56#include "intel_bufmgr.h" 57#include "intel_bufmgr_priv.h" 58#include "intel_chipset.h" 59#include "string.h" 60 61#include "i915_drm.h" 62 63#define DBG(...) do { \ 64 if (bufmgr_gem->bufmgr.debug) \ 65 fprintf(stderr, __VA_ARGS__); \ 66} while (0) 67 68typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 69 70struct drm_intel_gem_bo_bucket { 71 drmMMListHead head; 72 73 /** 74 * Limit on the number of entries in this bucket. 75 * 76 * 0 means that this caching at this bucket size is disabled. 77 * -1 means that there is no limit to caching at this size. 78 */ 79 int max_entries; 80 int num_entries; 81}; 82 83/* Only cache objects up to 64MB. Bigger than that, and the rounding of the 84 * size makes many operations fail that wouldn't otherwise. 85 */ 86#define DRM_INTEL_GEM_BO_BUCKETS 14 87typedef struct _drm_intel_bufmgr_gem { 88 drm_intel_bufmgr bufmgr; 89 90 int fd; 91 92 int max_relocs; 93 94 pthread_mutex_t lock; 95 96 struct drm_i915_gem_exec_object *exec_objects; 97 drm_intel_bo **exec_bos; 98 int exec_size; 99 int exec_count; 100 101 /** Array of lists of cached gem objects of power-of-two sizes */ 102 struct drm_intel_gem_bo_bucket cache_bucket[DRM_INTEL_GEM_BO_BUCKETS]; 103 104 uint64_t gtt_size; 105 int available_fences; 106 int pci_device; 107} drm_intel_bufmgr_gem; 108 109struct _drm_intel_bo_gem { 110 drm_intel_bo bo; 111 112 int refcount; 113 /** Boolean whether the mmap ioctl has been called for this buffer yet. */ 114 uint32_t gem_handle; 115 const char *name; 116 117 /** 118 * Kenel-assigned global name for this object 119 */ 120 unsigned int global_name; 121 122 /** 123 * Index of the buffer within the validation list while preparing a 124 * batchbuffer execution. 125 */ 126 int validate_index; 127 128 /** 129 * Boolean whether we've started swrast 130 * Set when the buffer has been mapped 131 * Cleared when the buffer is unmapped 132 */ 133 int swrast; 134 135 /** 136 * Current tiling mode 137 */ 138 uint32_t tiling_mode; 139 uint32_t swizzle_mode; 140 141 /** Array passed to the DRM containing relocation information. */ 142 struct drm_i915_gem_relocation_entry *relocs; 143 /** Array of bos corresponding to relocs[i].target_handle */ 144 drm_intel_bo **reloc_target_bo; 145 /** Number of entries in relocs */ 146 int reloc_count; 147 /** Mapped address for the buffer, saved across map/unmap cycles */ 148 void *mem_virtual; 149 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 150 void *gtt_virtual; 151 152 /** BO cache list */ 153 drmMMListHead head; 154 155 /** 156 * Boolean of whether this BO and its children have been included in 157 * the current drm_intel_bufmgr_check_aperture_space() total. 158 */ 159 char included_in_check_aperture; 160 161 /** 162 * Boolean of whether this buffer has been used as a relocation 163 * target and had its size accounted for, and thus can't have any 164 * further relocations added to it. 165 */ 166 char used_as_reloc_target; 167 168 /** 169 * Boolean of whether this buffer can be re-used 170 */ 171 char reusable; 172 173 /** 174 * Size in bytes of this buffer and its relocation descendents. 175 * 176 * Used to avoid costly tree walking in drm_intel_bufmgr_check_aperture in 177 * the common case. 178 */ 179 int reloc_tree_size; 180 /** 181 * Number of potential fence registers required by this buffer and its 182 * relocations. 183 */ 184 int reloc_tree_fences; 185}; 186 187static void drm_intel_gem_bo_reference_locked(drm_intel_bo *bo); 188 189static unsigned int 190drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count); 191 192static unsigned int 193drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count); 194 195static int 196drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t *tiling_mode, 197 uint32_t *swizzle_mode); 198 199static int 200drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode, 201 uint32_t stride); 202 203static void 204drm_intel_gem_bo_unreference(drm_intel_bo *bo); 205 206static int 207logbase2(int n) 208{ 209 int i = 1; 210 int log2 = 0; 211 212 while (n > i) { 213 i *= 2; 214 log2++; 215 } 216 217 return log2; 218} 219 220static struct drm_intel_gem_bo_bucket * 221drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 222 unsigned long size) 223{ 224 int i; 225 226 /* We only do buckets in power of two increments */ 227 if ((size & (size - 1)) != 0) 228 return NULL; 229 230 /* We should only see sizes rounded to pages. */ 231 assert((size % 4096) == 0); 232 233 /* We always allocate in units of pages */ 234 i = ffs(size / 4096) - 1; 235 if (i >= DRM_INTEL_GEM_BO_BUCKETS) 236 return NULL; 237 238 return &bufmgr_gem->cache_bucket[i]; 239} 240 241 242static void drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 243{ 244 int i, j; 245 246 for (i = 0; i < bufmgr_gem->exec_count; i++) { 247 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 248 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 249 250 if (bo_gem->relocs == NULL) { 251 DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, bo_gem->name); 252 continue; 253 } 254 255 for (j = 0; j < bo_gem->reloc_count; j++) { 256 drm_intel_bo *target_bo = bo_gem->reloc_target_bo[j]; 257 drm_intel_bo_gem *target_gem = (drm_intel_bo_gem *)target_bo; 258 259 DBG("%2d: %d (%s)@0x%08llx -> %d (%s)@0x%08lx + 0x%08x\n", 260 i, 261 bo_gem->gem_handle, bo_gem->name, 262 (unsigned long long)bo_gem->relocs[j].offset, 263 target_gem->gem_handle, target_gem->name, target_bo->offset, 264 bo_gem->relocs[j].delta); 265 } 266 } 267} 268 269/** 270 * Adds the given buffer to the list of buffers to be validated (moved into the 271 * appropriate memory type) with the next batch submission. 272 * 273 * If a buffer is validated multiple times in a batch submission, it ends up 274 * with the intersection of the memory type flags and the union of the 275 * access flags. 276 */ 277static void 278drm_intel_add_validate_buffer(drm_intel_bo *bo) 279{ 280 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 281 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 282 int index; 283 284 if (bo_gem->validate_index != -1) 285 return; 286 287 /* Extend the array of validation entries as necessary. */ 288 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 289 int new_size = bufmgr_gem->exec_size * 2; 290 291 if (new_size == 0) 292 new_size = 5; 293 294 bufmgr_gem->exec_objects = 295 realloc(bufmgr_gem->exec_objects, 296 sizeof(*bufmgr_gem->exec_objects) * new_size); 297 bufmgr_gem->exec_bos = 298 realloc(bufmgr_gem->exec_bos, 299 sizeof(*bufmgr_gem->exec_bos) * new_size); 300 bufmgr_gem->exec_size = new_size; 301 } 302 303 index = bufmgr_gem->exec_count; 304 bo_gem->validate_index = index; 305 /* Fill in array entry */ 306 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 307 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 308 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 309 bufmgr_gem->exec_objects[index].alignment = 0; 310 bufmgr_gem->exec_objects[index].offset = 0; 311 bufmgr_gem->exec_bos[index] = bo; 312 drm_intel_gem_bo_reference_locked(bo); 313 bufmgr_gem->exec_count++; 314} 315 316 317#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 318 sizeof(uint32_t)) 319 320static int 321drm_intel_setup_reloc_list(drm_intel_bo *bo) 322{ 323 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 324 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 325 326 bo_gem->relocs = malloc(bufmgr_gem->max_relocs * 327 sizeof(struct drm_i915_gem_relocation_entry)); 328 bo_gem->reloc_target_bo = malloc(bufmgr_gem->max_relocs * 329 sizeof(drm_intel_bo *)); 330 331 return 0; 332} 333 334static drm_intel_bo * 335drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name, 336 unsigned long size, unsigned int alignment, 337 int for_render) 338{ 339 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 340 drm_intel_bo_gem *bo_gem; 341 unsigned int page_size = getpagesize(); 342 int ret; 343 struct drm_intel_gem_bo_bucket *bucket; 344 int alloc_from_cache = 0; 345 unsigned long bo_size; 346 347 /* Round the allocated size up to a power of two number of pages. */ 348 bo_size = 1 << logbase2(size); 349 if (bo_size < page_size) 350 bo_size = page_size; 351 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo_size); 352 353 /* If we don't have caching at this size, don't actually round the 354 * allocation up. 355 */ 356 if (bucket == NULL || bucket->max_entries == 0) { 357 bo_size = size; 358 if (bo_size < page_size) 359 bo_size = page_size; 360 } 361 362 pthread_mutex_lock(&bufmgr_gem->lock); 363 /* Get a buffer out of the cache if available */ 364 if (bucket != NULL && bucket->num_entries > 0) { 365 struct drm_i915_gem_busy busy; 366 367 if (for_render) { 368 /* Allocate new render-target BOs from the tail (MRU) 369 * of the list, as it will likely be hot in the GPU cache 370 * and in the aperture for us. 371 */ 372 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.prev, head); 373 DRMLISTDEL(&bo_gem->head); 374 bucket->num_entries--; 375 alloc_from_cache = 1; 376 } else { 377 /* For non-render-target BOs (where we're probably going to map it 378 * first thing in order to fill it with data), check if the 379 * last BO in the cache is unbusy, and only reuse in that case. 380 * Otherwise, allocating a new buffer is probably faster than 381 * waiting for the GPU to finish. 382 */ 383 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head); 384 385 memset(&busy, 0, sizeof(busy)); 386 busy.handle = bo_gem->gem_handle; 387 388 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 389 alloc_from_cache = (ret == 0 && busy.busy == 0); 390 391 if (alloc_from_cache) { 392 DRMLISTDEL(&bo_gem->head); 393 bucket->num_entries--; 394 } 395 } 396 } 397 pthread_mutex_unlock(&bufmgr_gem->lock); 398 399 if (!alloc_from_cache) { 400 struct drm_i915_gem_create create; 401 402 bo_gem = calloc(1, sizeof(*bo_gem)); 403 if (!bo_gem) 404 return NULL; 405 406 bo_gem->bo.size = bo_size; 407 memset(&create, 0, sizeof(create)); 408 create.size = bo_size; 409 410 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CREATE, &create); 411 bo_gem->gem_handle = create.handle; 412 bo_gem->bo.handle = bo_gem->gem_handle; 413 if (ret != 0) { 414 free(bo_gem); 415 return NULL; 416 } 417 bo_gem->bo.bufmgr = bufmgr; 418 } 419 420 bo_gem->name = name; 421 bo_gem->refcount = 1; 422 bo_gem->validate_index = -1; 423 bo_gem->reloc_tree_size = bo_gem->bo.size; 424 bo_gem->reloc_tree_fences = 0; 425 bo_gem->used_as_reloc_target = 0; 426 bo_gem->tiling_mode = I915_TILING_NONE; 427 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 428 bo_gem->reusable = 1; 429 430 DBG("bo_create: buf %d (%s) %ldb\n", 431 bo_gem->gem_handle, bo_gem->name, size); 432 433 return &bo_gem->bo; 434} 435 436static drm_intel_bo * 437drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, const char *name, 438 unsigned long size, unsigned int alignment) 439{ 440 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 1); 441} 442 443static drm_intel_bo * 444drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, 445 unsigned long size, unsigned int alignment) 446{ 447 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 0); 448} 449 450/** 451 * Returns a drm_intel_bo wrapping the given buffer object handle. 452 * 453 * This can be used when one application needs to pass a buffer object 454 * to another. 455 */ 456drm_intel_bo * 457drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, const char *name, 458 unsigned int handle) 459{ 460 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 461 drm_intel_bo_gem *bo_gem; 462 int ret; 463 struct drm_gem_open open_arg; 464 struct drm_i915_gem_get_tiling get_tiling; 465 466 bo_gem = calloc(1, sizeof(*bo_gem)); 467 if (!bo_gem) 468 return NULL; 469 470 memset(&open_arg, 0, sizeof(open_arg)); 471 open_arg.name = handle; 472 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_OPEN, &open_arg); 473 if (ret != 0) { 474 fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", 475 name, handle, strerror(errno)); 476 free(bo_gem); 477 return NULL; 478 } 479 bo_gem->bo.size = open_arg.size; 480 bo_gem->bo.offset = 0; 481 bo_gem->bo.virtual = NULL; 482 bo_gem->bo.bufmgr = bufmgr; 483 bo_gem->name = name; 484 bo_gem->refcount = 1; 485 bo_gem->validate_index = -1; 486 bo_gem->gem_handle = open_arg.handle; 487 bo_gem->global_name = handle; 488 bo_gem->reusable = 0; 489 490 memset(&get_tiling, 0, sizeof(get_tiling)); 491 get_tiling.handle = bo_gem->gem_handle; 492 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); 493 if (ret != 0) { 494 drm_intel_gem_bo_unreference(&bo_gem->bo); 495 return NULL; 496 } 497 bo_gem->tiling_mode = get_tiling.tiling_mode; 498 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 499 if (bo_gem->tiling_mode == I915_TILING_NONE) 500 bo_gem->reloc_tree_fences = 0; 501 else 502 bo_gem->reloc_tree_fences = 1; 503 504 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 505 506 return &bo_gem->bo; 507} 508 509static void 510drm_intel_gem_bo_reference(drm_intel_bo *bo) 511{ 512 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 513 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 514 515 assert(bo_gem->refcount > 0); 516 pthread_mutex_lock(&bufmgr_gem->lock); 517 bo_gem->refcount++; 518 pthread_mutex_unlock(&bufmgr_gem->lock); 519} 520 521static void 522drm_intel_gem_bo_reference_locked(drm_intel_bo *bo) 523{ 524 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 525 526 assert(bo_gem->refcount > 0); 527 bo_gem->refcount++; 528} 529 530static void 531drm_intel_gem_bo_free(drm_intel_bo *bo) 532{ 533 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 534 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 535 struct drm_gem_close close; 536 int ret; 537 538 if (bo_gem->mem_virtual) 539 munmap (bo_gem->mem_virtual, bo_gem->bo.size); 540 if (bo_gem->gtt_virtual) 541 munmap (bo_gem->gtt_virtual, bo_gem->bo.size); 542 543 /* Close this object */ 544 memset(&close, 0, sizeof(close)); 545 close.handle = bo_gem->gem_handle; 546 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 547 if (ret != 0) { 548 fprintf(stderr, 549 "DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 550 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 551 } 552 free(bo); 553} 554 555static void 556drm_intel_gem_bo_unreference_locked(drm_intel_bo *bo) 557{ 558 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 559 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 560 561 assert(bo_gem->refcount > 0); 562 if (--bo_gem->refcount == 0) { 563 struct drm_intel_gem_bo_bucket *bucket; 564 uint32_t tiling_mode; 565 566 if (bo_gem->relocs != NULL) { 567 int i; 568 569 /* Unreference all the target buffers */ 570 for (i = 0; i < bo_gem->reloc_count; i++) 571 drm_intel_gem_bo_unreference_locked(bo_gem->reloc_target_bo[i]); 572 free(bo_gem->reloc_target_bo); 573 free(bo_gem->relocs); 574 } 575 576 DBG("bo_unreference final: %d (%s)\n", 577 bo_gem->gem_handle, bo_gem->name); 578 579 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 580 /* Put the buffer into our internal cache for reuse if we can. */ 581 tiling_mode = I915_TILING_NONE; 582 if (bo_gem->reusable && 583 bucket != NULL && 584 (bucket->max_entries == -1 || 585 (bucket->max_entries > 0 && 586 bucket->num_entries < bucket->max_entries)) && 587 drm_intel_gem_bo_set_tiling(bo, &tiling_mode, 0) == 0) 588 { 589 bo_gem->name = NULL; 590 bo_gem->validate_index = -1; 591 bo_gem->relocs = NULL; 592 bo_gem->reloc_target_bo = NULL; 593 bo_gem->reloc_count = 0; 594 595 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 596 bucket->num_entries++; 597 } else { 598 drm_intel_gem_bo_free(bo); 599 } 600 } 601} 602 603static void 604drm_intel_gem_bo_unreference(drm_intel_bo *bo) 605{ 606 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 607 608 pthread_mutex_lock(&bufmgr_gem->lock); 609 drm_intel_gem_bo_unreference_locked(bo); 610 pthread_mutex_unlock(&bufmgr_gem->lock); 611} 612 613static int 614drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 615{ 616 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 617 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 618 struct drm_i915_gem_set_domain set_domain; 619 int ret; 620 621 pthread_mutex_lock(&bufmgr_gem->lock); 622 623 /* Allow recursive mapping. Mesa may recursively map buffers with 624 * nested display loops. 625 */ 626 if (!bo_gem->mem_virtual) { 627 struct drm_i915_gem_mmap mmap_arg; 628 629 DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); 630 631 memset(&mmap_arg, 0, sizeof(mmap_arg)); 632 mmap_arg.handle = bo_gem->gem_handle; 633 mmap_arg.offset = 0; 634 mmap_arg.size = bo->size; 635 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); 636 if (ret != 0) { 637 fprintf(stderr, "%s:%d: Error mapping buffer %d (%s): %s .\n", 638 __FILE__, __LINE__, 639 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 640 pthread_mutex_unlock(&bufmgr_gem->lock); 641 return ret; 642 } 643 bo_gem->mem_virtual = (void *)(uintptr_t)mmap_arg.addr_ptr; 644 bo_gem->swrast = 0; 645 } 646 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 647 bo_gem->mem_virtual); 648 bo->virtual = bo_gem->mem_virtual; 649 650 if (bo_gem->global_name != 0 || !bo_gem->swrast) { 651 set_domain.handle = bo_gem->gem_handle; 652 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 653 if (write_enable) 654 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 655 else 656 set_domain.write_domain = 0; 657 do { 658 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, 659 &set_domain); 660 } while (ret == -1 && errno == EINTR); 661 if (ret != 0) { 662 fprintf (stderr, "%s:%d: Error setting swrast %d: %s\n", 663 __FILE__, __LINE__, bo_gem->gem_handle, strerror (errno)); 664 pthread_mutex_unlock(&bufmgr_gem->lock); 665 return ret; 666 } 667 bo_gem->swrast = 1; 668 } 669 670 pthread_mutex_unlock(&bufmgr_gem->lock); 671 672 return 0; 673} 674 675int 676drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 677{ 678 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 679 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 680 struct drm_i915_gem_set_domain set_domain; 681 int ret; 682 683 pthread_mutex_lock(&bufmgr_gem->lock); 684 685 /* Get a mapping of the buffer if we haven't before. */ 686 if (bo_gem->gtt_virtual == NULL) { 687 struct drm_i915_gem_mmap_gtt mmap_arg; 688 689 DBG("bo_map_gtt: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); 690 691 memset(&mmap_arg, 0, sizeof(mmap_arg)); 692 mmap_arg.handle = bo_gem->gem_handle; 693 694 /* Get the fake offset back... */ 695 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg); 696 if (ret != 0) { 697 fprintf(stderr, 698 "%s:%d: Error preparing buffer map %d (%s): %s .\n", 699 __FILE__, __LINE__, 700 bo_gem->gem_handle, bo_gem->name, 701 strerror(errno)); 702 pthread_mutex_unlock(&bufmgr_gem->lock); 703 return ret; 704 } 705 706 /* and mmap it */ 707 bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE, 708 MAP_SHARED, bufmgr_gem->fd, 709 mmap_arg.offset); 710 if (bo_gem->gtt_virtual == MAP_FAILED) { 711 fprintf(stderr, 712 "%s:%d: Error mapping buffer %d (%s): %s .\n", 713 __FILE__, __LINE__, 714 bo_gem->gem_handle, bo_gem->name, 715 strerror(errno)); 716 pthread_mutex_unlock(&bufmgr_gem->lock); 717 return errno; 718 } 719 } 720 721 bo->virtual = bo_gem->gtt_virtual; 722 723 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 724 bo_gem->gtt_virtual); 725 726 /* Now move it to the GTT domain so that the CPU caches are flushed */ 727 set_domain.handle = bo_gem->gem_handle; 728 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 729 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 730 do { 731 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, 732 &set_domain); 733 } while (ret == -1 && errno == EINTR); 734 735 if (ret != 0) { 736 fprintf (stderr, "%s:%d: Error setting domain %d: %s\n", 737 __FILE__, __LINE__, bo_gem->gem_handle, strerror (errno)); 738 } 739 740 pthread_mutex_unlock(&bufmgr_gem->lock); 741 742 return 0; 743} 744 745int 746drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 747{ 748 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 749 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 750 int ret = 0; 751 752 if (bo == NULL) 753 return 0; 754 755 assert(bo_gem->gtt_virtual != NULL); 756 757 pthread_mutex_lock(&bufmgr_gem->lock); 758 bo->virtual = NULL; 759 pthread_mutex_unlock(&bufmgr_gem->lock); 760 761 return ret; 762} 763 764static int 765drm_intel_gem_bo_unmap(drm_intel_bo *bo) 766{ 767 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 768 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 769 struct drm_i915_gem_sw_finish sw_finish; 770 int ret; 771 772 if (bo == NULL) 773 return 0; 774 775 assert(bo_gem->mem_virtual != NULL); 776 777 pthread_mutex_lock(&bufmgr_gem->lock); 778 if (bo_gem->swrast) { 779 sw_finish.handle = bo_gem->gem_handle; 780 do { 781 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SW_FINISH, 782 &sw_finish); 783 } while (ret == -1 && errno == EINTR); 784 bo_gem->swrast = 0; 785 } 786 bo->virtual = NULL; 787 pthread_mutex_unlock(&bufmgr_gem->lock); 788 return 0; 789} 790 791static int 792drm_intel_gem_bo_subdata (drm_intel_bo *bo, unsigned long offset, 793 unsigned long size, const void *data) 794{ 795 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 796 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 797 struct drm_i915_gem_pwrite pwrite; 798 int ret; 799 800 memset (&pwrite, 0, sizeof (pwrite)); 801 pwrite.handle = bo_gem->gem_handle; 802 pwrite.offset = offset; 803 pwrite.size = size; 804 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 805 do { 806 ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); 807 } while (ret == -1 && errno == EINTR); 808 if (ret != 0) { 809 fprintf (stderr, "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 810 __FILE__, __LINE__, 811 bo_gem->gem_handle, (int) offset, (int) size, 812 strerror (errno)); 813 } 814 return 0; 815} 816 817static int 818drm_intel_gem_get_pipe_from_crtc_id (drm_intel_bufmgr *bufmgr, int crtc_id) 819{ 820 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 821 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 822 int ret; 823 824 get_pipe_from_crtc_id.crtc_id = crtc_id; 825 ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 826 &get_pipe_from_crtc_id); 827 if (ret != 0) { 828 /* We return -1 here to signal that we don't 829 * know which pipe is associated with this crtc. 830 * This lets the caller know that this information 831 * isn't available; using the wrong pipe for 832 * vblank waiting can cause the chipset to lock up 833 */ 834 return -1; 835 } 836 837 return get_pipe_from_crtc_id.pipe; 838} 839 840static int 841drm_intel_gem_bo_get_subdata (drm_intel_bo *bo, unsigned long offset, 842 unsigned long size, void *data) 843{ 844 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 845 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 846 struct drm_i915_gem_pread pread; 847 int ret; 848 849 memset (&pread, 0, sizeof (pread)); 850 pread.handle = bo_gem->gem_handle; 851 pread.offset = offset; 852 pread.size = size; 853 pread.data_ptr = (uint64_t) (uintptr_t) data; 854 do { 855 ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_I915_GEM_PREAD, &pread); 856 } while (ret == -1 && errno == EINTR); 857 if (ret != 0) { 858 fprintf (stderr, "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 859 __FILE__, __LINE__, 860 bo_gem->gem_handle, (int) offset, (int) size, 861 strerror (errno)); 862 } 863 return 0; 864} 865 866/** Waits for all GPU rendering to the object to have completed. */ 867static void 868drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 869{ 870 return drm_intel_gem_bo_start_gtt_access(bo, 0); 871} 872 873/** 874 * Sets the object to the GTT read and possibly write domain, used by the X 875 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 876 * 877 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 878 * can do tiled pixmaps this way. 879 */ 880void 881drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 882{ 883 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 884 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 885 struct drm_i915_gem_set_domain set_domain; 886 int ret; 887 888 set_domain.handle = bo_gem->gem_handle; 889 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 890 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 891 do { 892 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); 893 } while (ret == -1 && errno == EINTR); 894 if (ret != 0) { 895 fprintf (stderr, "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 896 __FILE__, __LINE__, 897 bo_gem->gem_handle, set_domain.read_domains, set_domain.write_domain, 898 strerror (errno)); 899 } 900} 901 902static void 903drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 904{ 905 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 906 int i; 907 908 free(bufmgr_gem->exec_objects); 909 free(bufmgr_gem->exec_bos); 910 911 pthread_mutex_destroy(&bufmgr_gem->lock); 912 913 /* Free any cached buffer objects we were going to reuse */ 914 for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { 915 struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i]; 916 drm_intel_bo_gem *bo_gem; 917 918 while (!DRMLISTEMPTY(&bucket->head)) { 919 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head); 920 DRMLISTDEL(&bo_gem->head); 921 bucket->num_entries--; 922 923 drm_intel_gem_bo_free(&bo_gem->bo); 924 } 925 } 926 927 free(bufmgr); 928} 929 930/** 931 * Adds the target buffer to the validation list and adds the relocation 932 * to the reloc_buffer's relocation list. 933 * 934 * The relocation entry at the given offset must already contain the 935 * precomputed relocation value, because the kernel will optimize out 936 * the relocation entry write when the buffer hasn't moved from the 937 * last known offset in target_bo. 938 */ 939static int 940drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 941 drm_intel_bo *target_bo, uint32_t target_offset, 942 uint32_t read_domains, uint32_t write_domain) 943{ 944 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 945 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 946 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo; 947 948 pthread_mutex_lock(&bufmgr_gem->lock); 949 950 /* Create a new relocation list if needed */ 951 if (bo_gem->relocs == NULL) 952 drm_intel_setup_reloc_list(bo); 953 954 /* Check overflow */ 955 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 956 957 /* Check args */ 958 assert (offset <= bo->size - 4); 959 assert ((write_domain & (write_domain-1)) == 0); 960 961 /* Make sure that we're not adding a reloc to something whose size has 962 * already been accounted for. 963 */ 964 assert(!bo_gem->used_as_reloc_target); 965 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 966 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 967 968 /* Flag the target to disallow further relocations in it. */ 969 target_bo_gem->used_as_reloc_target = 1; 970 971 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 972 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 973 bo_gem->relocs[bo_gem->reloc_count].target_handle = 974 target_bo_gem->gem_handle; 975 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 976 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 977 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; 978 979 bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo; 980 drm_intel_gem_bo_reference_locked(target_bo); 981 982 bo_gem->reloc_count++; 983 984 pthread_mutex_unlock(&bufmgr_gem->lock); 985 986 return 0; 987} 988 989/** 990 * Walk the tree of relocations rooted at BO and accumulate the list of 991 * validations to be performed and update the relocation buffers with 992 * index values into the validation list. 993 */ 994static void 995drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 996{ 997 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 998 int i; 999 1000 if (bo_gem->relocs == NULL) 1001 return; 1002 1003 for (i = 0; i < bo_gem->reloc_count; i++) { 1004 drm_intel_bo *target_bo = bo_gem->reloc_target_bo[i]; 1005 1006 /* Continue walking the tree depth-first. */ 1007 drm_intel_gem_bo_process_reloc(target_bo); 1008 1009 /* Add the target to the validate list */ 1010 drm_intel_add_validate_buffer(target_bo); 1011 } 1012} 1013 1014static void 1015drm_intel_update_buffer_offsets (drm_intel_bufmgr_gem *bufmgr_gem) 1016{ 1017 int i; 1018 1019 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1020 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1021 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1022 1023 /* Update the buffer offset */ 1024 if (bufmgr_gem->exec_objects[i].offset != bo->offset) { 1025 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1026 bo_gem->gem_handle, bo_gem->name, bo->offset, 1027 (unsigned long long)bufmgr_gem->exec_objects[i].offset); 1028 bo->offset = bufmgr_gem->exec_objects[i].offset; 1029 } 1030 } 1031} 1032 1033static int 1034drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 1035 drm_clip_rect_t *cliprects, int num_cliprects, 1036 int DR4) 1037{ 1038 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1039 struct drm_i915_gem_execbuffer execbuf; 1040 int ret, i; 1041 1042 pthread_mutex_lock(&bufmgr_gem->lock); 1043 /* Update indices and set up the validate list. */ 1044 drm_intel_gem_bo_process_reloc(bo); 1045 1046 /* Add the batch buffer to the validation list. There are no relocations 1047 * pointing to it. 1048 */ 1049 drm_intel_add_validate_buffer(bo); 1050 1051 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec_objects; 1052 execbuf.buffer_count = bufmgr_gem->exec_count; 1053 execbuf.batch_start_offset = 0; 1054 execbuf.batch_len = used; 1055 execbuf.cliprects_ptr = (uintptr_t)cliprects; 1056 execbuf.num_cliprects = num_cliprects; 1057 execbuf.DR1 = 0; 1058 execbuf.DR4 = DR4; 1059 1060 do { 1061 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER, &execbuf); 1062 } while (ret != 0 && errno == EAGAIN); 1063 1064 if (ret != 0 && errno == ENOMEM) { 1065 fprintf(stderr, "Execbuffer fails to pin. Estimate: %u. Actual: %u. Available: %u\n", 1066 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 1067 bufmgr_gem->exec_count), 1068 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 1069 bufmgr_gem->exec_count), 1070 (unsigned int) bufmgr_gem->gtt_size); 1071 } 1072 drm_intel_update_buffer_offsets (bufmgr_gem); 1073 1074 if (bufmgr_gem->bufmgr.debug) 1075 drm_intel_gem_dump_validation_list(bufmgr_gem); 1076 1077 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1078 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1079 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1080 1081 /* Need to call swrast on next bo_map */ 1082 bo_gem->swrast = 0; 1083 1084 /* Disconnect the buffer from the validate list */ 1085 bo_gem->validate_index = -1; 1086 drm_intel_gem_bo_unreference_locked(bo); 1087 bufmgr_gem->exec_bos[i] = NULL; 1088 } 1089 bufmgr_gem->exec_count = 0; 1090 pthread_mutex_unlock(&bufmgr_gem->lock); 1091 1092 return 0; 1093} 1094 1095static int 1096drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 1097{ 1098 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1099 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1100 struct drm_i915_gem_pin pin; 1101 int ret; 1102 1103 memset(&pin, 0, sizeof(pin)); 1104 pin.handle = bo_gem->gem_handle; 1105 pin.alignment = alignment; 1106 1107 do { 1108 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_PIN, &pin); 1109 } while (ret == -1 && errno == EINTR); 1110 1111 if (ret != 0) 1112 return -errno; 1113 1114 bo->offset = pin.offset; 1115 return 0; 1116} 1117 1118static int 1119drm_intel_gem_bo_unpin(drm_intel_bo *bo) 1120{ 1121 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1122 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1123 struct drm_i915_gem_unpin unpin; 1124 int ret; 1125 1126 memset(&unpin, 0, sizeof(unpin)); 1127 unpin.handle = bo_gem->gem_handle; 1128 1129 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 1130 if (ret != 0) 1131 return -errno; 1132 1133 return 0; 1134} 1135 1136static int 1137drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode, 1138 uint32_t stride) 1139{ 1140 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1141 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1142 struct drm_i915_gem_set_tiling set_tiling; 1143 int ret; 1144 1145 if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode) 1146 return 0; 1147 1148 /* If we're going from non-tiling to tiling, bump fence count */ 1149 if (bo_gem->tiling_mode == I915_TILING_NONE) 1150 bo_gem->reloc_tree_fences++; 1151 1152 memset(&set_tiling, 0, sizeof(set_tiling)); 1153 set_tiling.handle = bo_gem->gem_handle; 1154 set_tiling.tiling_mode = *tiling_mode; 1155 set_tiling.stride = stride; 1156 1157 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); 1158 if (ret != 0) { 1159 *tiling_mode = bo_gem->tiling_mode; 1160 return -errno; 1161 } 1162 bo_gem->tiling_mode = set_tiling.tiling_mode; 1163 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 1164 1165 /* If we're going from tiling to non-tiling, drop fence count */ 1166 if (bo_gem->tiling_mode == I915_TILING_NONE) 1167 bo_gem->reloc_tree_fences--; 1168 1169 *tiling_mode = bo_gem->tiling_mode; 1170 return 0; 1171} 1172 1173static int 1174drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t *tiling_mode, 1175 uint32_t *swizzle_mode) 1176{ 1177 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1178 1179 *tiling_mode = bo_gem->tiling_mode; 1180 *swizzle_mode = bo_gem->swizzle_mode; 1181 return 0; 1182} 1183 1184static int 1185drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t *name) 1186{ 1187 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1188 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1189 struct drm_gem_flink flink; 1190 int ret; 1191 1192 if (!bo_gem->global_name) { 1193 memset(&flink, 0, sizeof(flink)); 1194 flink.handle = bo_gem->gem_handle; 1195 1196 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 1197 if (ret != 0) 1198 return -errno; 1199 bo_gem->global_name = flink.name; 1200 bo_gem->reusable = 0; 1201 } 1202 1203 *name = bo_gem->global_name; 1204 return 0; 1205} 1206 1207/** 1208 * Enables unlimited caching of buffer objects for reuse. 1209 * 1210 * This is potentially very memory expensive, as the cache at each bucket 1211 * size is only bounded by how many buffers of that size we've managed to have 1212 * in flight at once. 1213 */ 1214void 1215drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 1216{ 1217 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 1218 int i; 1219 1220 for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { 1221 bufmgr_gem->cache_bucket[i].max_entries = -1; 1222 } 1223} 1224 1225/** 1226 * Return the additional aperture space required by the tree of buffer objects 1227 * rooted at bo. 1228 */ 1229static int 1230drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 1231{ 1232 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1233 int i; 1234 int total = 0; 1235 1236 if (bo == NULL || bo_gem->included_in_check_aperture) 1237 return 0; 1238 1239 total += bo->size; 1240 bo_gem->included_in_check_aperture = 1; 1241 1242 for (i = 0; i < bo_gem->reloc_count; i++) 1243 total += drm_intel_gem_bo_get_aperture_space(bo_gem->reloc_target_bo[i]); 1244 1245 return total; 1246} 1247 1248/** 1249 * Count the number of buffers in this list that need a fence reg 1250 * 1251 * If the count is greater than the number of available regs, we'll have 1252 * to ask the caller to resubmit a batch with fewer tiled buffers. 1253 * 1254 * This function over-counts if the same buffer is used multiple times. 1255 */ 1256static unsigned int 1257drm_intel_gem_total_fences(drm_intel_bo **bo_array, int count) 1258{ 1259 int i; 1260 unsigned int total = 0; 1261 1262 for (i = 0; i < count; i++) { 1263 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo_array[i]; 1264 1265 if (bo_gem == NULL) 1266 continue; 1267 1268 total += bo_gem->reloc_tree_fences; 1269 } 1270 return total; 1271} 1272 1273/** 1274 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 1275 * for the next drm_intel_bufmgr_check_aperture_space() call. 1276 */ 1277static void 1278drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 1279{ 1280 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1281 int i; 1282 1283 if (bo == NULL || !bo_gem->included_in_check_aperture) 1284 return; 1285 1286 bo_gem->included_in_check_aperture = 0; 1287 1288 for (i = 0; i < bo_gem->reloc_count; i++) 1289 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->reloc_target_bo[i]); 1290} 1291 1292/** 1293 * Return a conservative estimate for the amount of aperture required 1294 * for a collection of buffers. This may double-count some buffers. 1295 */ 1296static unsigned int 1297drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 1298{ 1299 int i; 1300 unsigned int total = 0; 1301 1302 for (i = 0; i < count; i++) { 1303 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo_array[i]; 1304 if (bo_gem != NULL) 1305 total += bo_gem->reloc_tree_size; 1306 } 1307 return total; 1308} 1309 1310/** 1311 * Return the amount of aperture needed for a collection of buffers. 1312 * This avoids double counting any buffers, at the cost of looking 1313 * at every buffer in the set. 1314 */ 1315static unsigned int 1316drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 1317{ 1318 int i; 1319 unsigned int total = 0; 1320 1321 for (i = 0; i < count; i++) { 1322 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 1323 /* For the first buffer object in the array, we get an accurate count 1324 * back for its reloc_tree size (since nothing had been flagged as 1325 * being counted yet). We can save that value out as a more 1326 * conservative reloc_tree_size that avoids double-counting target 1327 * buffers. Since the first buffer happens to usually be the batch 1328 * buffer in our callers, this can pull us back from doing the tree 1329 * walk on every new batch emit. 1330 */ 1331 if (i == 0) { 1332 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo_array[i]; 1333 bo_gem->reloc_tree_size = total; 1334 } 1335 } 1336 1337 for (i = 0; i < count; i++) 1338 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 1339 return total; 1340} 1341 1342/** 1343 * Return -1 if the batchbuffer should be flushed before attempting to 1344 * emit rendering referencing the buffers pointed to by bo_array. 1345 * 1346 * This is required because if we try to emit a batchbuffer with relocations 1347 * to a tree of buffers that won't simultaneously fit in the aperture, 1348 * the rendering will return an error at a point where the software is not 1349 * prepared to recover from it. 1350 * 1351 * However, we also want to emit the batchbuffer significantly before we reach 1352 * the limit, as a series of batchbuffers each of which references buffers 1353 * covering almost all of the aperture means that at each emit we end up 1354 * waiting to evict a buffer from the last rendering, and we get synchronous 1355 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 1356 * get better parallelism. 1357 */ 1358static int 1359drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 1360{ 1361 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo_array[0]->bufmgr; 1362 unsigned int total = 0; 1363 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 1364 int total_fences; 1365 1366 /* Check for fence reg constraints if necessary */ 1367 if (bufmgr_gem->available_fences) { 1368 total_fences = drm_intel_gem_total_fences(bo_array, count); 1369 if (total_fences > bufmgr_gem->available_fences) 1370 return -1; 1371 } 1372 1373 total = drm_intel_gem_estimate_batch_space(bo_array, count); 1374 1375 if (total > threshold) 1376 total = drm_intel_gem_compute_batch_space(bo_array, count); 1377 1378 if (total > threshold) { 1379 DBG("check_space: overflowed available aperture, %dkb vs %dkb\n", 1380 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 1381 return -1; 1382 } else { 1383 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024 , 1384 (int)bufmgr_gem->gtt_size / 1024); 1385 return 0; 1386 } 1387} 1388 1389/* 1390 * Disable buffer reuse for objects which are shared with the kernel 1391 * as scanout buffers 1392 */ 1393static int 1394drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 1395{ 1396 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1397 1398 bo_gem->reusable = 0; 1399 return 0; 1400} 1401 1402/** 1403 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 1404 * and manage map buffer objections. 1405 * 1406 * \param fd File descriptor of the opened DRM device. 1407 */ 1408drm_intel_bufmgr * 1409drm_intel_bufmgr_gem_init(int fd, int batch_size) 1410{ 1411 drm_intel_bufmgr_gem *bufmgr_gem; 1412 struct drm_i915_gem_get_aperture aperture; 1413 drm_i915_getparam_t gp; 1414 int ret, i; 1415 1416 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 1417 bufmgr_gem->fd = fd; 1418 1419 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 1420 free(bufmgr_gem); 1421 return NULL; 1422 } 1423 1424 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); 1425 1426 if (ret == 0) 1427 bufmgr_gem->gtt_size = aperture.aper_available_size; 1428 else { 1429 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 1430 strerror(errno)); 1431 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 1432 fprintf(stderr, "Assuming %dkB available aperture size.\n" 1433 "May lead to reduced performance or incorrect rendering.\n", 1434 (int)bufmgr_gem->gtt_size / 1024); 1435 } 1436 1437 gp.param = I915_PARAM_CHIPSET_ID; 1438 gp.value = &bufmgr_gem->pci_device; 1439 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 1440 if (ret) { 1441 fprintf(stderr, "get chip id failed: %d\n", ret); 1442 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 1443 } 1444 1445 if (!IS_I965G(bufmgr_gem)) { 1446 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 1447 gp.value = &bufmgr_gem->available_fences; 1448 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 1449 if (ret) { 1450 fprintf(stderr, "get fences failed: %d\n", ret); 1451 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 1452 bufmgr_gem->available_fences = 0; 1453 } 1454 } 1455 1456 /* Let's go with one relocation per every 2 dwords (but round down a bit 1457 * since a power of two will mean an extra page allocation for the reloc 1458 * buffer). 1459 * 1460 * Every 4 was too few for the blender benchmark. 1461 */ 1462 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 1463 1464 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 1465 bufmgr_gem->bufmgr.bo_alloc_for_render = drm_intel_gem_bo_alloc_for_render; 1466 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 1467 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 1468 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 1469 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 1470 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 1471 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 1472 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 1473 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 1474 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 1475 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 1476 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 1477 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 1478 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 1479 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 1480 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 1481 bufmgr_gem->bufmgr.debug = 0; 1482 bufmgr_gem->bufmgr.check_aperture_space = drm_intel_gem_check_aperture_space; 1483 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 1484 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = drm_intel_gem_get_pipe_from_crtc_id; 1485 /* Initialize the linked lists for BO reuse cache. */ 1486 for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) 1487 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 1488 1489 return &bufmgr_gem->bufmgr; 1490} 1491 1492