1235783Skib/* 2235783Skib * Copyright �� 2008,2010 Intel Corporation 3235783Skib * 4235783Skib * Permission is hereby granted, free of charge, to any person obtaining a 5235783Skib * copy of this software and associated documentation files (the "Software"), 6235783Skib * to deal in the Software without restriction, including without limitation 7235783Skib * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8235783Skib * and/or sell copies of the Software, and to permit persons to whom the 9235783Skib * Software is furnished to do so, subject to the following conditions: 10235783Skib * 11235783Skib * The above copyright notice and this permission notice (including the next 12235783Skib * paragraph) shall be included in all copies or substantial portions of the 13235783Skib * Software. 14235783Skib * 15235783Skib * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16235783Skib * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17235783Skib * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18235783Skib * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19235783Skib * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20235783Skib * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21235783Skib * IN THE SOFTWARE. 22235783Skib * 23235783Skib * Authors: 24235783Skib * Eric Anholt <eric@anholt.net> 25235783Skib * Chris Wilson <chris@chris-wilson.co.uk> 26235783Skib * 27235783Skib */ 28235783Skib 29235783Skib#include <sys/cdefs.h> 30235783Skib__FBSDID("$FreeBSD$"); 31235783Skib 32235783Skib#include <dev/drm2/drmP.h> 33235783Skib#include <dev/drm2/drm.h> 34235783Skib#include <dev/drm2/i915/i915_drm.h> 35235783Skib#include <dev/drm2/i915/i915_drv.h> 36235783Skib#include <dev/drm2/i915/intel_drv.h> 37235783Skib#include <sys/limits.h> 38235783Skib#include <sys/sf_buf.h> 39235783Skib 40235783Skibstruct change_domains { 41235783Skib uint32_t invalidate_domains; 42235783Skib uint32_t flush_domains; 43235783Skib uint32_t flush_rings; 44235783Skib uint32_t flips; 45235783Skib}; 46235783Skib 47235783Skib/* 48235783Skib * Set the next domain for the specified object. This 49235783Skib * may not actually perform the necessary flushing/invaliding though, 50235783Skib * as that may want to be batched with other set_domain operations 51235783Skib * 52235783Skib * This is (we hope) the only really tricky part of gem. The goal 53235783Skib * is fairly simple -- track which caches hold bits of the object 54235783Skib * and make sure they remain coherent. A few concrete examples may 55235783Skib * help to explain how it works. For shorthand, we use the notation 56235783Skib * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 57235783Skib * a pair of read and write domain masks. 58235783Skib * 59235783Skib * Case 1: the batch buffer 60235783Skib * 61235783Skib * 1. Allocated 62235783Skib * 2. Written by CPU 63235783Skib * 3. Mapped to GTT 64235783Skib * 4. Read by GPU 65235783Skib * 5. Unmapped from GTT 66235783Skib * 6. Freed 67235783Skib * 68235783Skib * Let's take these a step at a time 69235783Skib * 70235783Skib * 1. Allocated 71235783Skib * Pages allocated from the kernel may still have 72235783Skib * cache contents, so we set them to (CPU, CPU) always. 73235783Skib * 2. Written by CPU (using pwrite) 74235783Skib * The pwrite function calls set_domain (CPU, CPU) and 75235783Skib * this function does nothing (as nothing changes) 76235783Skib * 3. Mapped by GTT 77235783Skib * This function asserts that the object is not 78235783Skib * currently in any GPU-based read or write domains 79235783Skib * 4. Read by GPU 80235783Skib * i915_gem_execbuffer calls set_domain (COMMAND, 0). 81235783Skib * As write_domain is zero, this function adds in the 82235783Skib * current read domains (CPU+COMMAND, 0). 83235783Skib * flush_domains is set to CPU. 84235783Skib * invalidate_domains is set to COMMAND 85235783Skib * clflush is run to get data out of the CPU caches 86235783Skib * then i915_dev_set_domain calls i915_gem_flush to 87235783Skib * emit an MI_FLUSH and drm_agp_chipset_flush 88235783Skib * 5. Unmapped from GTT 89235783Skib * i915_gem_object_unbind calls set_domain (CPU, CPU) 90235783Skib * flush_domains and invalidate_domains end up both zero 91235783Skib * so no flushing/invalidating happens 92235783Skib * 6. Freed 93235783Skib * yay, done 94235783Skib * 95235783Skib * Case 2: The shared render buffer 96235783Skib * 97235783Skib * 1. Allocated 98235783Skib * 2. Mapped to GTT 99235783Skib * 3. Read/written by GPU 100235783Skib * 4. set_domain to (CPU,CPU) 101235783Skib * 5. Read/written by CPU 102235783Skib * 6. Read/written by GPU 103235783Skib * 104235783Skib * 1. Allocated 105235783Skib * Same as last example, (CPU, CPU) 106235783Skib * 2. Mapped to GTT 107235783Skib * Nothing changes (assertions find that it is not in the GPU) 108235783Skib * 3. Read/written by GPU 109235783Skib * execbuffer calls set_domain (RENDER, RENDER) 110235783Skib * flush_domains gets CPU 111235783Skib * invalidate_domains gets GPU 112235783Skib * clflush (obj) 113235783Skib * MI_FLUSH and drm_agp_chipset_flush 114235783Skib * 4. set_domain (CPU, CPU) 115235783Skib * flush_domains gets GPU 116235783Skib * invalidate_domains gets CPU 117235783Skib * wait_rendering (obj) to make sure all drawing is complete. 118235783Skib * This will include an MI_FLUSH to get the data from GPU 119235783Skib * to memory 120235783Skib * clflush (obj) to invalidate the CPU cache 121235783Skib * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 122235783Skib * 5. Read/written by CPU 123235783Skib * cache lines are loaded and dirtied 124235783Skib * 6. Read written by GPU 125235783Skib * Same as last GPU access 126235783Skib * 127235783Skib * Case 3: The constant buffer 128235783Skib * 129235783Skib * 1. Allocated 130235783Skib * 2. Written by CPU 131235783Skib * 3. Read by GPU 132235783Skib * 4. Updated (written) by CPU again 133235783Skib * 5. Read by GPU 134235783Skib * 135235783Skib * 1. Allocated 136235783Skib * (CPU, CPU) 137235783Skib * 2. Written by CPU 138235783Skib * (CPU, CPU) 139235783Skib * 3. Read by GPU 140235783Skib * (CPU+RENDER, 0) 141235783Skib * flush_domains = CPU 142235783Skib * invalidate_domains = RENDER 143235783Skib * clflush (obj) 144235783Skib * MI_FLUSH 145235783Skib * drm_agp_chipset_flush 146235783Skib * 4. Updated (written) by CPU again 147235783Skib * (CPU, CPU) 148235783Skib * flush_domains = 0 (no previous write domain) 149235783Skib * invalidate_domains = 0 (no new read domains) 150235783Skib * 5. Read by GPU 151235783Skib * (CPU+RENDER, 0) 152235783Skib * flush_domains = CPU 153235783Skib * invalidate_domains = RENDER 154235783Skib * clflush (obj) 155235783Skib * MI_FLUSH 156235783Skib * drm_agp_chipset_flush 157235783Skib */ 158235783Skibstatic void 159235783Skibi915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, 160235783Skib struct intel_ring_buffer *ring, 161235783Skib struct change_domains *cd) 162235783Skib{ 163235783Skib uint32_t invalidate_domains = 0, flush_domains = 0; 164235783Skib 165235783Skib /* 166235783Skib * If the object isn't moving to a new write domain, 167235783Skib * let the object stay in multiple read domains 168235783Skib */ 169235783Skib if (obj->base.pending_write_domain == 0) 170235783Skib obj->base.pending_read_domains |= obj->base.read_domains; 171235783Skib 172235783Skib /* 173235783Skib * Flush the current write domain if 174235783Skib * the new read domains don't match. Invalidate 175235783Skib * any read domains which differ from the old 176235783Skib * write domain 177235783Skib */ 178235783Skib if (obj->base.write_domain && 179235783Skib (((obj->base.write_domain != obj->base.pending_read_domains || 180235783Skib obj->ring != ring)) || 181235783Skib (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) { 182235783Skib flush_domains |= obj->base.write_domain; 183235783Skib invalidate_domains |= 184235783Skib obj->base.pending_read_domains & ~obj->base.write_domain; 185235783Skib } 186235783Skib /* 187235783Skib * Invalidate any read caches which may have 188235783Skib * stale data. That is, any new read domains. 189235783Skib */ 190235783Skib invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains; 191235783Skib if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) 192235783Skib i915_gem_clflush_object(obj); 193235783Skib 194235783Skib if (obj->base.pending_write_domain) 195255013Sjkim cd->flips |= atomic_load_acq_int(&obj->pending_flip); 196235783Skib 197235783Skib /* The actual obj->write_domain will be updated with 198235783Skib * pending_write_domain after we emit the accumulated flush for all 199235783Skib * of our domain changes in execbuffers (which clears objects' 200235783Skib * write_domains). So if we have a current write domain that we 201235783Skib * aren't changing, set pending_write_domain to that. 202235783Skib */ 203235783Skib if (flush_domains == 0 && obj->base.pending_write_domain == 0) 204235783Skib obj->base.pending_write_domain = obj->base.write_domain; 205235783Skib 206235783Skib cd->invalidate_domains |= invalidate_domains; 207235783Skib cd->flush_domains |= flush_domains; 208235783Skib if (flush_domains & I915_GEM_GPU_DOMAINS) 209235783Skib cd->flush_rings |= intel_ring_flag(obj->ring); 210235783Skib if (invalidate_domains & I915_GEM_GPU_DOMAINS) 211235783Skib cd->flush_rings |= intel_ring_flag(ring); 212235783Skib} 213235783Skib 214235783Skibstruct eb_objects { 215235783Skib u_long hashmask; 216235783Skib LIST_HEAD(, drm_i915_gem_object) *buckets; 217235783Skib}; 218235783Skib 219235783Skibstatic struct eb_objects * 220235783Skibeb_create(int size) 221235783Skib{ 222235783Skib struct eb_objects *eb; 223235783Skib 224235783Skib eb = malloc(sizeof(*eb), DRM_I915_GEM, M_WAITOK | M_ZERO); 225235783Skib eb->buckets = hashinit(size, DRM_I915_GEM, &eb->hashmask); 226235783Skib return (eb); 227235783Skib} 228235783Skib 229235783Skibstatic void 230235783Skibeb_reset(struct eb_objects *eb) 231235783Skib{ 232235783Skib int i; 233235783Skib 234235783Skib for (i = 0; i <= eb->hashmask; i++) 235235783Skib LIST_INIT(&eb->buckets[i]); 236235783Skib} 237235783Skib 238235783Skibstatic void 239235783Skibeb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) 240235783Skib{ 241235783Skib 242235783Skib LIST_INSERT_HEAD(&eb->buckets[obj->exec_handle & eb->hashmask], 243235783Skib obj, exec_node); 244235783Skib} 245235783Skib 246235783Skibstatic struct drm_i915_gem_object * 247235783Skibeb_get_object(struct eb_objects *eb, unsigned long handle) 248235783Skib{ 249235783Skib struct drm_i915_gem_object *obj; 250235783Skib 251235783Skib LIST_FOREACH(obj, &eb->buckets[handle & eb->hashmask], exec_node) { 252235783Skib if (obj->exec_handle == handle) 253235783Skib return (obj); 254235783Skib } 255235783Skib return (NULL); 256235783Skib} 257235783Skib 258235783Skibstatic void 259235783Skibeb_destroy(struct eb_objects *eb) 260235783Skib{ 261235783Skib 262235783Skib free(eb->buckets, DRM_I915_GEM); 263235783Skib free(eb, DRM_I915_GEM); 264235783Skib} 265235783Skib 266235783Skibstatic int 267235783Skibi915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 268235783Skib struct eb_objects *eb, 269235783Skib struct drm_i915_gem_relocation_entry *reloc) 270235783Skib{ 271235783Skib struct drm_device *dev = obj->base.dev; 272235783Skib struct drm_gem_object *target_obj; 273235783Skib uint32_t target_offset; 274235783Skib int ret = -EINVAL; 275235783Skib 276235783Skib /* we've already hold a reference to all valid objects */ 277235783Skib target_obj = &eb_get_object(eb, reloc->target_handle)->base; 278235783Skib if (unlikely(target_obj == NULL)) 279235783Skib return -ENOENT; 280235783Skib 281235783Skib target_offset = to_intel_bo(target_obj)->gtt_offset; 282235783Skib 283235783Skib#if WATCH_RELOC 284235783Skib DRM_INFO("%s: obj %p offset %08x target %d " 285235783Skib "read %08x write %08x gtt %08x " 286235783Skib "presumed %08x delta %08x\n", 287235783Skib __func__, 288235783Skib obj, 289235783Skib (int) reloc->offset, 290235783Skib (int) reloc->target_handle, 291235783Skib (int) reloc->read_domains, 292235783Skib (int) reloc->write_domain, 293235783Skib (int) target_offset, 294235783Skib (int) reloc->presumed_offset, 295235783Skib reloc->delta); 296235783Skib#endif 297235783Skib 298235783Skib /* The target buffer should have appeared before us in the 299235783Skib * exec_object list, so it should have a GTT space bound by now. 300235783Skib */ 301235783Skib if (unlikely(target_offset == 0)) { 302235783Skib DRM_DEBUG("No GTT space found for object %d\n", 303235783Skib reloc->target_handle); 304235783Skib return ret; 305235783Skib } 306235783Skib 307235783Skib /* Validate that the target is in a valid r/w GPU domain */ 308235783Skib if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 309235783Skib DRM_DEBUG("reloc with multiple write domains: " 310235783Skib "obj %p target %d offset %d " 311235783Skib "read %08x write %08x", 312235783Skib obj, reloc->target_handle, 313235783Skib (int) reloc->offset, 314235783Skib reloc->read_domains, 315235783Skib reloc->write_domain); 316235783Skib return ret; 317235783Skib } 318235783Skib if (unlikely((reloc->write_domain | reloc->read_domains) 319235783Skib & ~I915_GEM_GPU_DOMAINS)) { 320235783Skib DRM_DEBUG("reloc with read/write non-GPU domains: " 321235783Skib "obj %p target %d offset %d " 322235783Skib "read %08x write %08x", 323235783Skib obj, reloc->target_handle, 324235783Skib (int) reloc->offset, 325235783Skib reloc->read_domains, 326235783Skib reloc->write_domain); 327235783Skib return ret; 328235783Skib } 329235783Skib if (unlikely(reloc->write_domain && target_obj->pending_write_domain && 330235783Skib reloc->write_domain != target_obj->pending_write_domain)) { 331235783Skib DRM_DEBUG("Write domain conflict: " 332235783Skib "obj %p target %d offset %d " 333235783Skib "new %08x old %08x\n", 334235783Skib obj, reloc->target_handle, 335235783Skib (int) reloc->offset, 336235783Skib reloc->write_domain, 337235783Skib target_obj->pending_write_domain); 338235783Skib return ret; 339235783Skib } 340235783Skib 341235783Skib target_obj->pending_read_domains |= reloc->read_domains; 342235783Skib target_obj->pending_write_domain |= reloc->write_domain; 343235783Skib 344235783Skib /* If the relocation already has the right value in it, no 345235783Skib * more work needs to be done. 346235783Skib */ 347235783Skib if (target_offset == reloc->presumed_offset) 348235783Skib return 0; 349235783Skib 350235783Skib /* Check that the relocation address is valid... */ 351235783Skib if (unlikely(reloc->offset > obj->base.size - 4)) { 352235783Skib DRM_DEBUG("Relocation beyond object bounds: " 353235783Skib "obj %p target %d offset %d size %d.\n", 354235783Skib obj, reloc->target_handle, 355235783Skib (int) reloc->offset, 356235783Skib (int) obj->base.size); 357235783Skib return ret; 358235783Skib } 359235783Skib if (unlikely(reloc->offset & 3)) { 360235783Skib DRM_DEBUG("Relocation not 4-byte aligned: " 361235783Skib "obj %p target %d offset %d.\n", 362235783Skib obj, reloc->target_handle, 363235783Skib (int) reloc->offset); 364235783Skib return ret; 365235783Skib } 366235783Skib 367235783Skib reloc->delta += target_offset; 368235783Skib if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) { 369235783Skib uint32_t page_offset = reloc->offset & PAGE_MASK; 370235783Skib char *vaddr; 371235783Skib struct sf_buf *sf; 372235783Skib 373235783Skib sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)], 374235783Skib SFB_NOWAIT); 375235783Skib if (sf == NULL) 376235783Skib return (-ENOMEM); 377235783Skib vaddr = (void *)sf_buf_kva(sf); 378235783Skib *(uint32_t *)(vaddr + page_offset) = reloc->delta; 379235783Skib sf_buf_free(sf); 380235783Skib } else { 381235783Skib uint32_t *reloc_entry; 382235783Skib char *reloc_page; 383235783Skib 384235783Skib /* We can't wait for rendering with pagefaults disabled */ 385235783Skib if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0) 386235783Skib return (-EFAULT); 387235783Skib ret = i915_gem_object_set_to_gtt_domain(obj, 1); 388235783Skib if (ret) 389235783Skib return ret; 390235783Skib 391235783Skib /* 392235783Skib * Map the page containing the relocation we're going 393235783Skib * to perform. 394235783Skib */ 395235783Skib reloc->offset += obj->gtt_offset; 396235783Skib reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset & 397235783Skib ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING); 398235783Skib reloc_entry = (uint32_t *)(reloc_page + (reloc->offset & 399235783Skib PAGE_MASK)); 400235783Skib *(volatile uint32_t *)reloc_entry = reloc->delta; 401235783Skib pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE); 402235783Skib } 403235783Skib 404235783Skib /* and update the user's relocation entry */ 405235783Skib reloc->presumed_offset = target_offset; 406235783Skib 407235783Skib return 0; 408235783Skib} 409235783Skib 410235783Skibstatic int 411235783Skibi915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, 412235783Skib struct eb_objects *eb) 413235783Skib{ 414235783Skib struct drm_i915_gem_relocation_entry *user_relocs; 415235783Skib struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 416235783Skib struct drm_i915_gem_relocation_entry reloc; 417235783Skib int i, ret; 418235783Skib 419235783Skib user_relocs = (void *)(uintptr_t)entry->relocs_ptr; 420235783Skib for (i = 0; i < entry->relocation_count; i++) { 421235783Skib ret = -copyin_nofault(user_relocs + i, &reloc, sizeof(reloc)); 422235783Skib if (ret != 0) 423235783Skib return (ret); 424235783Skib 425235783Skib ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc); 426235783Skib if (ret != 0) 427235783Skib return (ret); 428235783Skib 429235783Skib ret = -copyout_nofault(&reloc.presumed_offset, 430235783Skib &user_relocs[i].presumed_offset, 431235783Skib sizeof(reloc.presumed_offset)); 432235783Skib if (ret != 0) 433235783Skib return (ret); 434235783Skib } 435235783Skib 436235783Skib return (0); 437235783Skib} 438235783Skib 439235783Skibstatic int 440235783Skibi915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, 441235783Skib struct eb_objects *eb, struct drm_i915_gem_relocation_entry *relocs) 442235783Skib{ 443235783Skib const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 444235783Skib int i, ret; 445235783Skib 446235783Skib for (i = 0; i < entry->relocation_count; i++) { 447235783Skib ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); 448235783Skib if (ret) 449235783Skib return ret; 450235783Skib } 451235783Skib 452235783Skib return 0; 453235783Skib} 454235783Skib 455235783Skibstatic int 456235783Skibi915_gem_execbuffer_relocate(struct drm_device *dev, 457235783Skib struct eb_objects *eb, 458235783Skib struct list_head *objects) 459235783Skib{ 460235783Skib struct drm_i915_gem_object *obj; 461235783Skib int ret, pflags; 462235783Skib 463235783Skib /* Try to move as many of the relocation targets off the active list 464235783Skib * to avoid unnecessary fallbacks to the slow path, as we cannot wait 465235783Skib * for the retirement with pagefaults disabled. 466235783Skib */ 467235783Skib i915_gem_retire_requests(dev); 468235783Skib 469235783Skib ret = 0; 470235783Skib pflags = vm_fault_disable_pagefaults(); 471235783Skib /* This is the fast path and we cannot handle a pagefault whilst 472235783Skib * holding the device lock lest the user pass in the relocations 473235783Skib * contained within a mmaped bo. For in such a case we, the page 474235783Skib * fault handler would call i915_gem_fault() and we would try to 475235783Skib * acquire the device lock again. Obviously this is bad. 476235783Skib */ 477235783Skib 478235783Skib list_for_each_entry(obj, objects, exec_list) { 479235783Skib ret = i915_gem_execbuffer_relocate_object(obj, eb); 480235783Skib if (ret != 0) 481235783Skib break; 482235783Skib } 483235783Skib vm_fault_enable_pagefaults(pflags); 484235783Skib return (ret); 485235783Skib} 486235783Skib 487235783Skib#define __EXEC_OBJECT_HAS_FENCE (1<<31) 488235783Skib 489235783Skibstatic int 490235783Skibpin_and_fence_object(struct drm_i915_gem_object *obj, 491235783Skib struct intel_ring_buffer *ring) 492235783Skib{ 493235783Skib struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 494235783Skib bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 495235783Skib bool need_fence, need_mappable; 496235783Skib int ret; 497235783Skib 498235783Skib need_fence = 499235783Skib has_fenced_gpu_access && 500235783Skib entry->flags & EXEC_OBJECT_NEEDS_FENCE && 501235783Skib obj->tiling_mode != I915_TILING_NONE; 502235783Skib need_mappable = 503235783Skib entry->relocation_count ? true : need_fence; 504235783Skib 505235783Skib ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); 506235783Skib if (ret) 507235783Skib return ret; 508235783Skib 509235783Skib if (has_fenced_gpu_access) { 510235783Skib if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 511235783Skib if (obj->tiling_mode) { 512235783Skib ret = i915_gem_object_get_fence(obj, ring); 513235783Skib if (ret) 514235783Skib goto err_unpin; 515235783Skib 516235783Skib entry->flags |= __EXEC_OBJECT_HAS_FENCE; 517235783Skib i915_gem_object_pin_fence(obj); 518235783Skib } else { 519235783Skib ret = i915_gem_object_put_fence(obj); 520235783Skib if (ret) 521235783Skib goto err_unpin; 522235783Skib } 523235783Skib obj->pending_fenced_gpu_access = true; 524235783Skib } 525235783Skib } 526235783Skib 527235783Skib entry->offset = obj->gtt_offset; 528235783Skib return 0; 529235783Skib 530235783Skiberr_unpin: 531235783Skib i915_gem_object_unpin(obj); 532235783Skib return ret; 533235783Skib} 534235783Skib 535235783Skibstatic int 536235783Skibi915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, 537235783Skib struct drm_file *file, 538235783Skib struct list_head *objects) 539235783Skib{ 540235783Skib drm_i915_private_t *dev_priv; 541235783Skib struct drm_i915_gem_object *obj; 542235783Skib int ret, retry; 543235783Skib bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 544235783Skib struct list_head ordered_objects; 545235783Skib 546235783Skib dev_priv = ring->dev->dev_private; 547235783Skib INIT_LIST_HEAD(&ordered_objects); 548235783Skib while (!list_empty(objects)) { 549235783Skib struct drm_i915_gem_exec_object2 *entry; 550235783Skib bool need_fence, need_mappable; 551235783Skib 552235783Skib obj = list_first_entry(objects, 553235783Skib struct drm_i915_gem_object, 554235783Skib exec_list); 555235783Skib entry = obj->exec_entry; 556235783Skib 557235783Skib need_fence = 558235783Skib has_fenced_gpu_access && 559235783Skib entry->flags & EXEC_OBJECT_NEEDS_FENCE && 560235783Skib obj->tiling_mode != I915_TILING_NONE; 561235783Skib need_mappable = 562235783Skib entry->relocation_count ? true : need_fence; 563235783Skib 564235783Skib if (need_mappable) 565235783Skib list_move(&obj->exec_list, &ordered_objects); 566235783Skib else 567235783Skib list_move_tail(&obj->exec_list, &ordered_objects); 568235783Skib 569235783Skib obj->base.pending_read_domains = 0; 570235783Skib obj->base.pending_write_domain = 0; 571235783Skib } 572235783Skib list_splice(&ordered_objects, objects); 573235783Skib 574235783Skib /* Attempt to pin all of the buffers into the GTT. 575235783Skib * This is done in 3 phases: 576235783Skib * 577235783Skib * 1a. Unbind all objects that do not match the GTT constraints for 578235783Skib * the execbuffer (fenceable, mappable, alignment etc). 579235783Skib * 1b. Increment pin count for already bound objects and obtain 580235783Skib * a fence register if required. 581235783Skib * 2. Bind new objects. 582235783Skib * 3. Decrement pin count. 583235783Skib * 584235783Skib * This avoid unnecessary unbinding of later objects in order to makr 585235783Skib * room for the earlier objects *unless* we need to defragment. 586235783Skib */ 587235783Skib retry = 0; 588235783Skib do { 589235783Skib ret = 0; 590235783Skib 591235783Skib /* Unbind any ill-fitting objects or pin. */ 592235783Skib list_for_each_entry(obj, objects, exec_list) { 593235783Skib struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 594235783Skib bool need_fence, need_mappable; 595235783Skib 596235783Skib if (!obj->gtt_space) 597235783Skib continue; 598235783Skib 599235783Skib need_fence = 600235783Skib has_fenced_gpu_access && 601235783Skib entry->flags & EXEC_OBJECT_NEEDS_FENCE && 602235783Skib obj->tiling_mode != I915_TILING_NONE; 603235783Skib need_mappable = 604235783Skib entry->relocation_count ? true : need_fence; 605235783Skib 606235783Skib if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || 607235783Skib (need_mappable && !obj->map_and_fenceable)) 608235783Skib ret = i915_gem_object_unbind(obj); 609235783Skib else 610235783Skib ret = pin_and_fence_object(obj, ring); 611235783Skib if (ret) 612235783Skib goto err; 613235783Skib } 614235783Skib 615235783Skib /* Bind fresh objects */ 616235783Skib list_for_each_entry(obj, objects, exec_list) { 617235783Skib if (obj->gtt_space) 618235783Skib continue; 619235783Skib 620235783Skib ret = pin_and_fence_object(obj, ring); 621235783Skib if (ret) { 622235783Skib int ret_ignore; 623235783Skib 624235783Skib /* This can potentially raise a harmless 625235783Skib * -EINVAL if we failed to bind in the above 626235783Skib * call. It cannot raise -EINTR since we know 627235783Skib * that the bo is freshly bound and so will 628235783Skib * not need to be flushed or waited upon. 629235783Skib */ 630235783Skib ret_ignore = i915_gem_object_unbind(obj); 631235783Skib (void)ret_ignore; 632235783Skib if (obj->gtt_space != NULL) 633235783Skib printf("%s: gtt_space\n", __func__); 634235783Skib break; 635235783Skib } 636235783Skib } 637235783Skib 638235783Skib /* Decrement pin count for bound objects */ 639235783Skib list_for_each_entry(obj, objects, exec_list) { 640235783Skib struct drm_i915_gem_exec_object2 *entry; 641235783Skib 642235783Skib if (!obj->gtt_space) 643235783Skib continue; 644235783Skib 645235783Skib entry = obj->exec_entry; 646235783Skib if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 647235783Skib i915_gem_object_unpin_fence(obj); 648235783Skib entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; 649235783Skib } 650235783Skib 651235783Skib i915_gem_object_unpin(obj); 652235783Skib 653235783Skib /* ... and ensure ppgtt mapping exist if needed. */ 654235783Skib if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { 655235783Skib i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 656235783Skib obj, obj->cache_level); 657235783Skib 658235783Skib obj->has_aliasing_ppgtt_mapping = 1; 659235783Skib } 660235783Skib } 661235783Skib 662235783Skib if (ret != -ENOSPC || retry > 1) 663235783Skib return ret; 664235783Skib 665235783Skib /* First attempt, just clear anything that is purgeable. 666235783Skib * Second attempt, clear the entire GTT. 667235783Skib */ 668235783Skib ret = i915_gem_evict_everything(ring->dev, retry == 0); 669235783Skib if (ret) 670235783Skib return ret; 671235783Skib 672235783Skib retry++; 673235783Skib } while (1); 674235783Skib 675235783Skiberr: 676235783Skib list_for_each_entry_continue_reverse(obj, objects, exec_list) { 677235783Skib struct drm_i915_gem_exec_object2 *entry; 678235783Skib 679235783Skib if (!obj->gtt_space) 680235783Skib continue; 681235783Skib 682235783Skib entry = obj->exec_entry; 683235783Skib if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 684235783Skib i915_gem_object_unpin_fence(obj); 685235783Skib entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; 686235783Skib } 687235783Skib 688235783Skib i915_gem_object_unpin(obj); 689235783Skib } 690235783Skib 691235783Skib return ret; 692235783Skib} 693235783Skib 694235783Skibstatic int 695235783Skibi915_gem_execbuffer_relocate_slow(struct drm_device *dev, 696235783Skib struct drm_file *file, struct intel_ring_buffer *ring, 697235783Skib struct list_head *objects, struct eb_objects *eb, 698235783Skib struct drm_i915_gem_exec_object2 *exec, int count) 699235783Skib{ 700235783Skib struct drm_i915_gem_relocation_entry *reloc; 701235783Skib struct drm_i915_gem_object *obj; 702235783Skib int *reloc_offset; 703235783Skib int i, total, ret; 704235783Skib 705235783Skib /* We may process another execbuffer during the unlock... */ 706235783Skib while (!list_empty(objects)) { 707235783Skib obj = list_first_entry(objects, 708235783Skib struct drm_i915_gem_object, 709235783Skib exec_list); 710235783Skib list_del_init(&obj->exec_list); 711235783Skib drm_gem_object_unreference(&obj->base); 712235783Skib } 713235783Skib 714235783Skib DRM_UNLOCK(dev); 715235783Skib 716235783Skib total = 0; 717235783Skib for (i = 0; i < count; i++) 718235783Skib total += exec[i].relocation_count; 719235783Skib 720235783Skib reloc_offset = malloc(count * sizeof(*reloc_offset), DRM_I915_GEM, 721235783Skib M_WAITOK | M_ZERO); 722235783Skib reloc = malloc(total * sizeof(*reloc), DRM_I915_GEM, M_WAITOK | M_ZERO); 723235783Skib 724235783Skib total = 0; 725235783Skib for (i = 0; i < count; i++) { 726235783Skib struct drm_i915_gem_relocation_entry *user_relocs; 727235783Skib 728235783Skib user_relocs = (void *)(uintptr_t)exec[i].relocs_ptr; 729235783Skib ret = -copyin(user_relocs, reloc + total, 730235783Skib exec[i].relocation_count * sizeof(*reloc)); 731235783Skib if (ret != 0) { 732235783Skib DRM_LOCK(dev); 733235783Skib goto err; 734235783Skib } 735235783Skib 736235783Skib reloc_offset[i] = total; 737235783Skib total += exec[i].relocation_count; 738235783Skib } 739235783Skib 740235783Skib ret = i915_mutex_lock_interruptible(dev); 741235783Skib if (ret) { 742235783Skib DRM_LOCK(dev); 743235783Skib goto err; 744235783Skib } 745235783Skib 746235783Skib /* reacquire the objects */ 747235783Skib eb_reset(eb); 748235783Skib for (i = 0; i < count; i++) { 749235783Skib struct drm_i915_gem_object *obj; 750235783Skib 751235783Skib obj = to_intel_bo(drm_gem_object_lookup(dev, file, 752235783Skib exec[i].handle)); 753235783Skib if (&obj->base == NULL) { 754235783Skib DRM_DEBUG("Invalid object handle %d at index %d\n", 755235783Skib exec[i].handle, i); 756235783Skib ret = -ENOENT; 757235783Skib goto err; 758235783Skib } 759235783Skib 760235783Skib list_add_tail(&obj->exec_list, objects); 761235783Skib obj->exec_handle = exec[i].handle; 762235783Skib obj->exec_entry = &exec[i]; 763235783Skib eb_add_object(eb, obj); 764235783Skib } 765235783Skib 766235783Skib ret = i915_gem_execbuffer_reserve(ring, file, objects); 767235783Skib if (ret) 768235783Skib goto err; 769235783Skib 770235783Skib list_for_each_entry(obj, objects, exec_list) { 771235783Skib int offset = obj->exec_entry - exec; 772235783Skib ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, 773235783Skib reloc + reloc_offset[offset]); 774235783Skib if (ret) 775235783Skib goto err; 776235783Skib } 777235783Skib 778235783Skib /* Leave the user relocations as are, this is the painfully slow path, 779235783Skib * and we want to avoid the complication of dropping the lock whilst 780235783Skib * having buffers reserved in the aperture and so causing spurious 781235783Skib * ENOSPC for random operations. 782235783Skib */ 783235783Skib 784235783Skiberr: 785235783Skib free(reloc, DRM_I915_GEM); 786235783Skib free(reloc_offset, DRM_I915_GEM); 787235783Skib return ret; 788235783Skib} 789235783Skib 790235783Skibstatic int 791235783Skibi915_gem_execbuffer_flush(struct drm_device *dev, 792235783Skib uint32_t invalidate_domains, 793235783Skib uint32_t flush_domains, 794235783Skib uint32_t flush_rings) 795235783Skib{ 796235783Skib drm_i915_private_t *dev_priv = dev->dev_private; 797235783Skib int i, ret; 798235783Skib 799235783Skib if (flush_domains & I915_GEM_DOMAIN_CPU) 800235783Skib intel_gtt_chipset_flush(); 801235783Skib 802235783Skib if (flush_domains & I915_GEM_DOMAIN_GTT) 803235783Skib wmb(); 804235783Skib 805235783Skib if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { 806235783Skib for (i = 0; i < I915_NUM_RINGS; i++) 807235783Skib if (flush_rings & (1 << i)) { 808235783Skib ret = i915_gem_flush_ring(&dev_priv->rings[i], 809235783Skib invalidate_domains, flush_domains); 810235783Skib if (ret) 811235783Skib return ret; 812235783Skib } 813235783Skib } 814235783Skib 815235783Skib return 0; 816235783Skib} 817235783Skib 818235783Skibstatic bool 819235783Skibintel_enable_semaphores(struct drm_device *dev) 820235783Skib{ 821235783Skib if (INTEL_INFO(dev)->gen < 6) 822235783Skib return 0; 823235783Skib 824235783Skib if (i915_semaphores >= 0) 825235783Skib return i915_semaphores; 826235783Skib 827235783Skib /* Enable semaphores on SNB when IO remapping is off */ 828235783Skib if (INTEL_INFO(dev)->gen == 6) 829235783Skib return !intel_iommu_enabled; 830235783Skib 831235783Skib return 1; 832235783Skib} 833235783Skib 834235783Skibstatic int 835235783Skibi915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, 836235783Skib struct intel_ring_buffer *to) 837235783Skib{ 838235783Skib struct intel_ring_buffer *from = obj->ring; 839235783Skib u32 seqno; 840235783Skib int ret, idx; 841235783Skib 842235783Skib if (from == NULL || to == from) 843235783Skib return 0; 844235783Skib 845235783Skib /* XXX gpu semaphores are implicated in various hard hangs on SNB */ 846235783Skib if (!intel_enable_semaphores(obj->base.dev)) 847235783Skib return i915_gem_object_wait_rendering(obj); 848235783Skib 849235783Skib idx = intel_ring_sync_index(from, to); 850235783Skib 851235783Skib seqno = obj->last_rendering_seqno; 852235783Skib if (seqno <= from->sync_seqno[idx]) 853235783Skib return 0; 854235783Skib 855235783Skib if (seqno == from->outstanding_lazy_request) { 856235783Skib struct drm_i915_gem_request *request; 857235783Skib 858235783Skib request = malloc(sizeof(*request), DRM_I915_GEM, 859235783Skib M_WAITOK | M_ZERO); 860235783Skib ret = i915_add_request(from, NULL, request); 861235783Skib if (ret) { 862235783Skib free(request, DRM_I915_GEM); 863235783Skib return ret; 864235783Skib } 865235783Skib 866235783Skib seqno = request->seqno; 867235783Skib } 868235783Skib 869235783Skib from->sync_seqno[idx] = seqno; 870235783Skib 871235783Skib return to->sync_to(to, from, seqno - 1); 872235783Skib} 873235783Skib 874235783Skibstatic int 875235783Skibi915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) 876235783Skib{ 877235783Skib u32 plane, flip_mask; 878235783Skib int ret; 879235783Skib 880235783Skib /* Check for any pending flips. As we only maintain a flip queue depth 881235783Skib * of 1, we can simply insert a WAIT for the next display flip prior 882235783Skib * to executing the batch and avoid stalling the CPU. 883235783Skib */ 884235783Skib 885235783Skib for (plane = 0; flips >> plane; plane++) { 886235783Skib if (((flips >> plane) & 1) == 0) 887235783Skib continue; 888235783Skib 889235783Skib if (plane) 890235783Skib flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; 891235783Skib else 892235783Skib flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; 893235783Skib 894235783Skib ret = intel_ring_begin(ring, 2); 895235783Skib if (ret) 896235783Skib return ret; 897235783Skib 898235783Skib intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); 899235783Skib intel_ring_emit(ring, MI_NOOP); 900235783Skib intel_ring_advance(ring); 901235783Skib } 902235783Skib 903235783Skib return 0; 904235783Skib} 905235783Skib 906235783Skibstatic int 907235783Skibi915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, 908235783Skib struct list_head *objects) 909235783Skib{ 910235783Skib struct drm_i915_gem_object *obj; 911235783Skib struct change_domains cd; 912235783Skib int ret; 913235783Skib 914235783Skib memset(&cd, 0, sizeof(cd)); 915235783Skib list_for_each_entry(obj, objects, exec_list) 916235783Skib i915_gem_object_set_to_gpu_domain(obj, ring, &cd); 917235783Skib 918235783Skib if (cd.invalidate_domains | cd.flush_domains) { 919235783Skib#if WATCH_EXEC 920235783Skib DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 921235783Skib __func__, 922235783Skib cd.invalidate_domains, 923235783Skib cd.flush_domains); 924235783Skib#endif 925235783Skib ret = i915_gem_execbuffer_flush(ring->dev, 926235783Skib cd.invalidate_domains, 927235783Skib cd.flush_domains, 928235783Skib cd.flush_rings); 929235783Skib if (ret) 930235783Skib return ret; 931235783Skib } 932235783Skib 933235783Skib if (cd.flips) { 934235783Skib ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips); 935235783Skib if (ret) 936235783Skib return ret; 937235783Skib } 938235783Skib 939235783Skib list_for_each_entry(obj, objects, exec_list) { 940235783Skib ret = i915_gem_execbuffer_sync_rings(obj, ring); 941235783Skib if (ret) 942235783Skib return ret; 943235783Skib } 944235783Skib 945235783Skib return 0; 946235783Skib} 947235783Skib 948235783Skibstatic bool 949235783Skibi915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 950235783Skib{ 951235783Skib return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; 952235783Skib} 953235783Skib 954235783Skibstatic int 955235783Skibvalidate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count, 956235783Skib vm_page_t ***map) 957235783Skib{ 958235783Skib vm_page_t *ma; 959235783Skib int i, length, page_count; 960235783Skib 961235783Skib /* XXXKIB various limits checking is missing there */ 962235783Skib *map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO); 963235783Skib for (i = 0; i < count; i++) { 964235783Skib /* First check for malicious input causing overflow */ 965235783Skib if (exec[i].relocation_count > 966235783Skib INT_MAX / sizeof(struct drm_i915_gem_relocation_entry)) 967235783Skib return -EINVAL; 968235783Skib 969235783Skib length = exec[i].relocation_count * 970235783Skib sizeof(struct drm_i915_gem_relocation_entry); 971235783Skib if (length == 0) { 972235783Skib (*map)[i] = NULL; 973235783Skib continue; 974235783Skib } 975235783Skib /* 976235783Skib * Since both start and end of the relocation region 977235783Skib * may be not aligned on the page boundary, be 978235783Skib * conservative and request a page slot for each 979235783Skib * partial page. Thus +2. 980235783Skib */ 981235783Skib page_count = howmany(length, PAGE_SIZE) + 2; 982235783Skib ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t), 983235783Skib DRM_I915_GEM, M_WAITOK | M_ZERO); 984235783Skib if (vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 985235783Skib exec[i].relocs_ptr, length, VM_PROT_READ | VM_PROT_WRITE, 986235783Skib ma, page_count) == -1) { 987235783Skib free(ma, DRM_I915_GEM); 988235783Skib (*map)[i] = NULL; 989235783Skib return (-EFAULT); 990235783Skib } 991235783Skib } 992235783Skib 993235783Skib return 0; 994235783Skib} 995235783Skib 996235783Skibstatic void 997235783Skibi915_gem_execbuffer_move_to_active(struct list_head *objects, 998235783Skib struct intel_ring_buffer *ring, 999235783Skib u32 seqno) 1000235783Skib{ 1001235783Skib struct drm_i915_gem_object *obj; 1002235783Skib uint32_t old_read, old_write; 1003235783Skib 1004235783Skib list_for_each_entry(obj, objects, exec_list) { 1005235783Skib old_read = obj->base.read_domains; 1006235783Skib old_write = obj->base.write_domain; 1007235783Skib 1008235783Skib obj->base.read_domains = obj->base.pending_read_domains; 1009235783Skib obj->base.write_domain = obj->base.pending_write_domain; 1010235783Skib obj->fenced_gpu_access = obj->pending_fenced_gpu_access; 1011235783Skib 1012235783Skib i915_gem_object_move_to_active(obj, ring, seqno); 1013235783Skib if (obj->base.write_domain) { 1014235783Skib obj->dirty = 1; 1015235783Skib obj->pending_gpu_write = true; 1016235783Skib list_move_tail(&obj->gpu_write_list, 1017235783Skib &ring->gpu_write_list); 1018235783Skib intel_mark_busy(ring->dev, obj); 1019235783Skib } 1020235783Skib CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x", 1021235783Skib obj, old_read, old_write); 1022235783Skib } 1023235783Skib} 1024235783Skib 1025235783Skibint i915_gem_sync_exec_requests; 1026235783Skib 1027235783Skibstatic void 1028235783Skibi915_gem_execbuffer_retire_commands(struct drm_device *dev, 1029235783Skib struct drm_file *file, 1030235783Skib struct intel_ring_buffer *ring) 1031235783Skib{ 1032235783Skib struct drm_i915_gem_request *request; 1033235783Skib u32 invalidate; 1034235783Skib 1035235783Skib /* 1036235783Skib * Ensure that the commands in the batch buffer are 1037235783Skib * finished before the interrupt fires. 1038235783Skib * 1039235783Skib * The sampler always gets flushed on i965 (sigh). 1040235783Skib */ 1041235783Skib invalidate = I915_GEM_DOMAIN_COMMAND; 1042235783Skib if (INTEL_INFO(dev)->gen >= 4) 1043235783Skib invalidate |= I915_GEM_DOMAIN_SAMPLER; 1044235783Skib if (ring->flush(ring, invalidate, 0)) { 1045235783Skib i915_gem_next_request_seqno(ring); 1046235783Skib return; 1047235783Skib } 1048235783Skib 1049235783Skib /* Add a breadcrumb for the completion of the batch buffer */ 1050235783Skib request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO); 1051235783Skib if (request == NULL || i915_add_request(ring, file, request)) { 1052235783Skib i915_gem_next_request_seqno(ring); 1053235783Skib free(request, DRM_I915_GEM); 1054235783Skib } else if (i915_gem_sync_exec_requests) 1055235783Skib i915_wait_request(ring, request->seqno, true); 1056235783Skib} 1057235783Skib 1058235783Skibstatic void 1059235783Skibi915_gem_fix_mi_batchbuffer_end(struct drm_i915_gem_object *batch_obj, 1060235783Skib uint32_t batch_start_offset, uint32_t batch_len) 1061235783Skib{ 1062235783Skib char *mkva; 1063235783Skib uint64_t po_r, po_w; 1064235783Skib uint32_t cmd; 1065235783Skib 1066235783Skib po_r = batch_obj->base.dev->agp->base + batch_obj->gtt_offset + 1067235783Skib batch_start_offset + batch_len; 1068235783Skib if (batch_len > 0) 1069235783Skib po_r -= 4; 1070235783Skib mkva = pmap_mapdev_attr(trunc_page(po_r), 2 * PAGE_SIZE, 1071235783Skib PAT_WRITE_COMBINING); 1072236182Skib po_r &= PAGE_MASK; 1073236182Skib cmd = *(uint32_t *)(mkva + po_r); 1074235783Skib 1075235783Skib if (cmd != MI_BATCH_BUFFER_END) { 1076235783Skib /* 1077235783Skib * batch_len != 0 due to the check at the start of 1078235783Skib * i915_gem_do_execbuffer 1079235783Skib */ 1080235783Skib if (batch_obj->base.size > batch_start_offset + batch_len) { 1081235783Skib po_w = po_r + 4; 1082235783Skib/* DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END !\n"); */ 1083235783Skib } else { 1084235783Skib po_w = po_r; 1085235783SkibDRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END, overwriting last bo cmd !\n"); 1086235783Skib } 1087236182Skib *(uint32_t *)(mkva + po_w) = MI_BATCH_BUFFER_END; 1088235783Skib } 1089235783Skib 1090235783Skib pmap_unmapdev((vm_offset_t)mkva, 2 * PAGE_SIZE); 1091235783Skib} 1092235783Skib 1093236183Skibint i915_fix_mi_batchbuffer_end = 0; 1094235783Skib 1095235783Skib static int 1096235783Skibi915_reset_gen7_sol_offsets(struct drm_device *dev, 1097235783Skib struct intel_ring_buffer *ring) 1098235783Skib{ 1099235783Skib drm_i915_private_t *dev_priv = dev->dev_private; 1100235783Skib int ret, i; 1101235783Skib 1102235783Skib if (!IS_GEN7(dev) || ring != &dev_priv->rings[RCS]) 1103235783Skib return 0; 1104235783Skib 1105235783Skib ret = intel_ring_begin(ring, 4 * 3); 1106235783Skib if (ret) 1107235783Skib return ret; 1108235783Skib 1109235783Skib for (i = 0; i < 4; i++) { 1110235783Skib intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1111235783Skib intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); 1112235783Skib intel_ring_emit(ring, 0); 1113235783Skib } 1114235783Skib 1115235783Skib intel_ring_advance(ring); 1116235783Skib 1117235783Skib return 0; 1118235783Skib} 1119235783Skib 1120235783Skibstatic int 1121235783Skibi915_gem_do_execbuffer(struct drm_device *dev, void *data, 1122235783Skib struct drm_file *file, 1123235783Skib struct drm_i915_gem_execbuffer2 *args, 1124235783Skib struct drm_i915_gem_exec_object2 *exec) 1125235783Skib{ 1126235783Skib drm_i915_private_t *dev_priv = dev->dev_private; 1127235783Skib struct list_head objects; 1128235783Skib struct eb_objects *eb; 1129235783Skib struct drm_i915_gem_object *batch_obj; 1130235783Skib struct drm_clip_rect *cliprects = NULL; 1131235783Skib struct intel_ring_buffer *ring; 1132235783Skib vm_page_t **relocs_ma; 1133271816Sdumbbell u32 ctx_id = i915_execbuffer2_get_context_id(*args); 1134235783Skib u32 exec_start, exec_len; 1135235783Skib u32 seqno; 1136235783Skib u32 mask; 1137235783Skib int ret, mode, i; 1138235783Skib 1139235783Skib if (!i915_gem_check_execbuffer(args)) { 1140235783Skib DRM_DEBUG("execbuf with invalid offset/length\n"); 1141235783Skib return -EINVAL; 1142235783Skib } 1143235783Skib 1144235783Skib if (args->batch_len == 0) 1145235783Skib return (0); 1146235783Skib 1147235783Skib ret = validate_exec_list(exec, args->buffer_count, &relocs_ma); 1148235783Skib if (ret != 0) 1149235783Skib goto pre_struct_lock_err; 1150235783Skib 1151235783Skib switch (args->flags & I915_EXEC_RING_MASK) { 1152235783Skib case I915_EXEC_DEFAULT: 1153235783Skib case I915_EXEC_RENDER: 1154235783Skib ring = &dev_priv->rings[RCS]; 1155235783Skib break; 1156235783Skib case I915_EXEC_BSD: 1157235783Skib if (!HAS_BSD(dev)) { 1158235783Skib DRM_DEBUG("execbuf with invalid ring (BSD)\n"); 1159235783Skib return -EINVAL; 1160235783Skib } 1161235783Skib ring = &dev_priv->rings[VCS]; 1162271816Sdumbbell if (ctx_id != 0) { 1163271816Sdumbbell DRM_DEBUG("Ring %s doesn't support contexts\n", 1164271816Sdumbbell ring->name); 1165271816Sdumbbell return -EPERM; 1166271816Sdumbbell } 1167235783Skib break; 1168235783Skib case I915_EXEC_BLT: 1169235783Skib if (!HAS_BLT(dev)) { 1170235783Skib DRM_DEBUG("execbuf with invalid ring (BLT)\n"); 1171235783Skib return -EINVAL; 1172235783Skib } 1173235783Skib ring = &dev_priv->rings[BCS]; 1174271816Sdumbbell if (ctx_id != 0) { 1175271816Sdumbbell DRM_DEBUG("Ring %s doesn't support contexts\n", 1176271816Sdumbbell ring->name); 1177271816Sdumbbell return -EPERM; 1178271816Sdumbbell } 1179235783Skib break; 1180235783Skib default: 1181235783Skib DRM_DEBUG("execbuf with unknown ring: %d\n", 1182235783Skib (int)(args->flags & I915_EXEC_RING_MASK)); 1183235783Skib ret = -EINVAL; 1184235783Skib goto pre_struct_lock_err; 1185235783Skib } 1186235783Skib 1187235783Skib mode = args->flags & I915_EXEC_CONSTANTS_MASK; 1188235783Skib mask = I915_EXEC_CONSTANTS_MASK; 1189235783Skib switch (mode) { 1190235783Skib case I915_EXEC_CONSTANTS_REL_GENERAL: 1191235783Skib case I915_EXEC_CONSTANTS_ABSOLUTE: 1192235783Skib case I915_EXEC_CONSTANTS_REL_SURFACE: 1193235783Skib if (ring == &dev_priv->rings[RCS] && 1194235783Skib mode != dev_priv->relative_constants_mode) { 1195235783Skib if (INTEL_INFO(dev)->gen < 4) { 1196235783Skib ret = -EINVAL; 1197235783Skib goto pre_struct_lock_err; 1198235783Skib } 1199235783Skib 1200235783Skib if (INTEL_INFO(dev)->gen > 5 && 1201235783Skib mode == I915_EXEC_CONSTANTS_REL_SURFACE) { 1202235783Skib ret = -EINVAL; 1203235783Skib goto pre_struct_lock_err; 1204235783Skib } 1205235783Skib 1206235783Skib /* The HW changed the meaning on this bit on gen6 */ 1207235783Skib if (INTEL_INFO(dev)->gen >= 6) 1208235783Skib mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; 1209235783Skib } 1210235783Skib break; 1211235783Skib default: 1212235783Skib DRM_DEBUG("execbuf with unknown constants: %d\n", mode); 1213235783Skib ret = -EINVAL; 1214235783Skib goto pre_struct_lock_err; 1215235783Skib } 1216235783Skib 1217235783Skib if (args->buffer_count < 1) { 1218235783Skib DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1219235783Skib ret = -EINVAL; 1220235783Skib goto pre_struct_lock_err; 1221235783Skib } 1222235783Skib 1223235783Skib if (args->num_cliprects != 0) { 1224235783Skib if (ring != &dev_priv->rings[RCS]) { 1225235783Skib DRM_DEBUG("clip rectangles are only valid with the render ring\n"); 1226235783Skib ret = -EINVAL; 1227235783Skib goto pre_struct_lock_err; 1228235783Skib } 1229235783Skib 1230235783Skib if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { 1231235783Skib DRM_DEBUG("execbuf with %u cliprects\n", 1232235783Skib args->num_cliprects); 1233235783Skib ret = -EINVAL; 1234235783Skib goto pre_struct_lock_err; 1235235783Skib } 1236235783Skib cliprects = malloc( sizeof(*cliprects) * args->num_cliprects, 1237235783Skib DRM_I915_GEM, M_WAITOK | M_ZERO); 1238235783Skib ret = -copyin((void *)(uintptr_t)args->cliprects_ptr, cliprects, 1239235783Skib sizeof(*cliprects) * args->num_cliprects); 1240235783Skib if (ret != 0) 1241235783Skib goto pre_struct_lock_err; 1242235783Skib } 1243235783Skib 1244235783Skib ret = i915_mutex_lock_interruptible(dev); 1245235783Skib if (ret) 1246235783Skib goto pre_struct_lock_err; 1247235783Skib 1248235783Skib if (dev_priv->mm.suspended) { 1249235783Skib ret = -EBUSY; 1250235783Skib goto struct_lock_err; 1251235783Skib } 1252235783Skib 1253235783Skib eb = eb_create(args->buffer_count); 1254235783Skib if (eb == NULL) { 1255235783Skib ret = -ENOMEM; 1256235783Skib goto struct_lock_err; 1257235783Skib } 1258235783Skib 1259235783Skib /* Look up object handles */ 1260235783Skib INIT_LIST_HEAD(&objects); 1261235783Skib for (i = 0; i < args->buffer_count; i++) { 1262235783Skib struct drm_i915_gem_object *obj; 1263235783Skib obj = to_intel_bo(drm_gem_object_lookup(dev, file, 1264235783Skib exec[i].handle)); 1265235783Skib if (&obj->base == NULL) { 1266235783Skib DRM_DEBUG("Invalid object handle %d at index %d\n", 1267235783Skib exec[i].handle, i); 1268235783Skib /* prevent error path from reading uninitialized data */ 1269235783Skib ret = -ENOENT; 1270235783Skib goto err; 1271235783Skib } 1272235783Skib 1273235783Skib if (!list_empty(&obj->exec_list)) { 1274235783Skib DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", 1275235783Skib obj, exec[i].handle, i); 1276235783Skib ret = -EINVAL; 1277235783Skib goto err; 1278235783Skib } 1279235783Skib 1280235783Skib list_add_tail(&obj->exec_list, &objects); 1281235783Skib obj->exec_handle = exec[i].handle; 1282235783Skib obj->exec_entry = &exec[i]; 1283235783Skib eb_add_object(eb, obj); 1284235783Skib } 1285235783Skib 1286235783Skib /* take note of the batch buffer before we might reorder the lists */ 1287235783Skib batch_obj = list_entry(objects.prev, 1288235783Skib struct drm_i915_gem_object, 1289235783Skib exec_list); 1290235783Skib 1291235783Skib /* Move the objects en-masse into the GTT, evicting if necessary. */ 1292235783Skib ret = i915_gem_execbuffer_reserve(ring, file, &objects); 1293235783Skib if (ret) 1294235783Skib goto err; 1295235783Skib 1296235783Skib /* The objects are in their final locations, apply the relocations. */ 1297235783Skib ret = i915_gem_execbuffer_relocate(dev, eb, &objects); 1298235783Skib if (ret) { 1299235783Skib if (ret == -EFAULT) { 1300235783Skib ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, 1301235783Skib &objects, eb, exec, args->buffer_count); 1302235783Skib DRM_LOCK_ASSERT(dev); 1303235783Skib } 1304235783Skib if (ret) 1305235783Skib goto err; 1306235783Skib } 1307235783Skib 1308235783Skib /* Set the pending read domains for the batch buffer to COMMAND */ 1309235783Skib if (batch_obj->base.pending_write_domain) { 1310235783Skib DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1311235783Skib ret = -EINVAL; 1312235783Skib goto err; 1313235783Skib } 1314235783Skib batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1315235783Skib 1316235783Skib ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); 1317235783Skib if (ret) 1318235783Skib goto err; 1319235783Skib 1320271816Sdumbbell ret = i915_switch_context(ring, file, ctx_id); 1321271816Sdumbbell if (ret) 1322271816Sdumbbell goto err; 1323271816Sdumbbell 1324235783Skib seqno = i915_gem_next_request_seqno(ring); 1325235783Skib for (i = 0; i < I915_NUM_RINGS - 1; i++) { 1326235783Skib if (seqno < ring->sync_seqno[i]) { 1327235783Skib /* The GPU can not handle its semaphore value wrapping, 1328235783Skib * so every billion or so execbuffers, we need to stall 1329235783Skib * the GPU in order to reset the counters. 1330235783Skib */ 1331235783Skib ret = i915_gpu_idle(dev, true); 1332235783Skib if (ret) 1333235783Skib goto err; 1334235783Skib 1335235783Skib KASSERT(ring->sync_seqno[i] == 0, ("Non-zero sync_seqno")); 1336235783Skib } 1337235783Skib } 1338235783Skib 1339235783Skib if (ring == &dev_priv->rings[RCS] && 1340235783Skib mode != dev_priv->relative_constants_mode) { 1341235783Skib ret = intel_ring_begin(ring, 4); 1342235783Skib if (ret) 1343235783Skib goto err; 1344235783Skib 1345235783Skib intel_ring_emit(ring, MI_NOOP); 1346235783Skib intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1347235783Skib intel_ring_emit(ring, INSTPM); 1348235783Skib intel_ring_emit(ring, mask << 16 | mode); 1349235783Skib intel_ring_advance(ring); 1350235783Skib 1351235783Skib dev_priv->relative_constants_mode = mode; 1352235783Skib } 1353235783Skib 1354235783Skib if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1355235783Skib ret = i915_reset_gen7_sol_offsets(dev, ring); 1356235783Skib if (ret) 1357235783Skib goto err; 1358235783Skib } 1359235783Skib 1360235783Skib exec_start = batch_obj->gtt_offset + args->batch_start_offset; 1361235783Skib exec_len = args->batch_len; 1362235783Skib 1363235783Skib if (i915_fix_mi_batchbuffer_end) { 1364235783Skib i915_gem_fix_mi_batchbuffer_end(batch_obj, 1365235783Skib args->batch_start_offset, args->batch_len); 1366235783Skib } 1367235783Skib 1368235783Skib CTR4(KTR_DRM, "ring_dispatch %s %d exec %x %x", ring->name, seqno, 1369235783Skib exec_start, exec_len); 1370235783Skib 1371235783Skib if (cliprects) { 1372235783Skib for (i = 0; i < args->num_cliprects; i++) { 1373235783Skib ret = i915_emit_box_p(dev, &cliprects[i], 1374235783Skib args->DR1, args->DR4); 1375235783Skib if (ret) 1376235783Skib goto err; 1377235783Skib 1378235783Skib ret = ring->dispatch_execbuffer(ring, exec_start, 1379235783Skib exec_len); 1380235783Skib if (ret) 1381235783Skib goto err; 1382235783Skib } 1383235783Skib } else { 1384235783Skib ret = ring->dispatch_execbuffer(ring, exec_start, exec_len); 1385235783Skib if (ret) 1386235783Skib goto err; 1387235783Skib } 1388235783Skib 1389235783Skib i915_gem_execbuffer_move_to_active(&objects, ring, seqno); 1390235783Skib i915_gem_execbuffer_retire_commands(dev, file, ring); 1391235783Skib 1392235783Skiberr: 1393235783Skib eb_destroy(eb); 1394235783Skib while (!list_empty(&objects)) { 1395235783Skib struct drm_i915_gem_object *obj; 1396235783Skib 1397235783Skib obj = list_first_entry(&objects, struct drm_i915_gem_object, 1398235783Skib exec_list); 1399235783Skib list_del_init(&obj->exec_list); 1400235783Skib drm_gem_object_unreference(&obj->base); 1401235783Skib } 1402235783Skibstruct_lock_err: 1403235783Skib DRM_UNLOCK(dev); 1404235783Skib 1405235783Skibpre_struct_lock_err: 1406235783Skib for (i = 0; i < args->buffer_count; i++) { 1407235783Skib if (relocs_ma[i] != NULL) { 1408235783Skib vm_page_unhold_pages(relocs_ma[i], howmany( 1409235783Skib exec[i].relocation_count * 1410235783Skib sizeof(struct drm_i915_gem_relocation_entry), 1411235783Skib PAGE_SIZE)); 1412235783Skib free(relocs_ma[i], DRM_I915_GEM); 1413235783Skib } 1414235783Skib } 1415235783Skib free(relocs_ma, DRM_I915_GEM); 1416235783Skib free(cliprects, DRM_I915_GEM); 1417235783Skib return ret; 1418235783Skib} 1419235783Skib 1420235783Skib/* 1421235783Skib * Legacy execbuffer just creates an exec2 list from the original exec object 1422235783Skib * list array and passes it to the real function. 1423235783Skib */ 1424235783Skibint 1425235783Skibi915_gem_execbuffer(struct drm_device *dev, void *data, 1426235783Skib struct drm_file *file) 1427235783Skib{ 1428235783Skib struct drm_i915_gem_execbuffer *args = data; 1429235783Skib struct drm_i915_gem_execbuffer2 exec2; 1430235783Skib struct drm_i915_gem_exec_object *exec_list = NULL; 1431235783Skib struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1432235783Skib int ret, i; 1433235783Skib 1434235783Skib DRM_DEBUG("buffers_ptr %d buffer_count %d len %08x\n", 1435235783Skib (int) args->buffers_ptr, args->buffer_count, args->batch_len); 1436235783Skib 1437235783Skib if (args->buffer_count < 1) { 1438235783Skib DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1439235783Skib return -EINVAL; 1440235783Skib } 1441235783Skib 1442235783Skib /* Copy in the exec list from userland */ 1443235783Skib /* XXXKIB user-controlled malloc size */ 1444235783Skib exec_list = malloc(sizeof(*exec_list) * args->buffer_count, 1445235783Skib DRM_I915_GEM, M_WAITOK); 1446235783Skib exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count, 1447235783Skib DRM_I915_GEM, M_WAITOK); 1448235783Skib ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec_list, 1449235783Skib sizeof(*exec_list) * args->buffer_count); 1450235783Skib if (ret != 0) { 1451235783Skib DRM_DEBUG("copy %d exec entries failed %d\n", 1452235783Skib args->buffer_count, ret); 1453235783Skib free(exec_list, DRM_I915_GEM); 1454235783Skib free(exec2_list, DRM_I915_GEM); 1455235783Skib return (ret); 1456235783Skib } 1457235783Skib 1458235783Skib for (i = 0; i < args->buffer_count; i++) { 1459235783Skib exec2_list[i].handle = exec_list[i].handle; 1460235783Skib exec2_list[i].relocation_count = exec_list[i].relocation_count; 1461235783Skib exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 1462235783Skib exec2_list[i].alignment = exec_list[i].alignment; 1463235783Skib exec2_list[i].offset = exec_list[i].offset; 1464235783Skib if (INTEL_INFO(dev)->gen < 4) 1465235783Skib exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 1466235783Skib else 1467235783Skib exec2_list[i].flags = 0; 1468235783Skib } 1469235783Skib 1470235783Skib exec2.buffers_ptr = args->buffers_ptr; 1471235783Skib exec2.buffer_count = args->buffer_count; 1472235783Skib exec2.batch_start_offset = args->batch_start_offset; 1473235783Skib exec2.batch_len = args->batch_len; 1474235783Skib exec2.DR1 = args->DR1; 1475235783Skib exec2.DR4 = args->DR4; 1476235783Skib exec2.num_cliprects = args->num_cliprects; 1477235783Skib exec2.cliprects_ptr = args->cliprects_ptr; 1478235783Skib exec2.flags = I915_EXEC_RENDER; 1479271816Sdumbbell i915_execbuffer2_set_context_id(exec2, 0); 1480235783Skib 1481235783Skib ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); 1482235783Skib if (!ret) { 1483235783Skib /* Copy the new buffer offsets back to the user's exec list. */ 1484235783Skib for (i = 0; i < args->buffer_count; i++) 1485235783Skib exec_list[i].offset = exec2_list[i].offset; 1486235783Skib /* ... and back out to userspace */ 1487235783Skib ret = -copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr, 1488235783Skib sizeof(*exec_list) * args->buffer_count); 1489235783Skib if (ret != 0) { 1490235783Skib DRM_DEBUG("failed to copy %d exec entries " 1491235783Skib "back to user (%d)\n", 1492235783Skib args->buffer_count, ret); 1493235783Skib } 1494235783Skib } 1495235783Skib 1496235783Skib free(exec_list, DRM_I915_GEM); 1497235783Skib free(exec2_list, DRM_I915_GEM); 1498235783Skib return ret; 1499235783Skib} 1500235783Skib 1501235783Skibint 1502235783Skibi915_gem_execbuffer2(struct drm_device *dev, void *data, 1503235783Skib struct drm_file *file) 1504235783Skib{ 1505235783Skib struct drm_i915_gem_execbuffer2 *args = data; 1506235783Skib struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1507235783Skib int ret; 1508235783Skib 1509235783Skib DRM_DEBUG("buffers_ptr %jx buffer_count %d len %08x\n", 1510235783Skib (uintmax_t)args->buffers_ptr, args->buffer_count, args->batch_len); 1511235783Skib 1512235783Skib if (args->buffer_count < 1 || 1513235783Skib args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { 1514235783Skib DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 1515235783Skib return -EINVAL; 1516235783Skib } 1517235783Skib 1518235783Skib /* XXXKIB user-controllable malloc size */ 1519235783Skib exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count, 1520235783Skib DRM_I915_GEM, M_WAITOK); 1521235783Skib ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec2_list, 1522235783Skib sizeof(*exec2_list) * args->buffer_count); 1523235783Skib if (ret != 0) { 1524235783Skib DRM_DEBUG("copy %d exec entries failed %d\n", 1525235783Skib args->buffer_count, ret); 1526235783Skib free(exec2_list, DRM_I915_GEM); 1527235783Skib return (ret); 1528235783Skib } 1529235783Skib 1530235783Skib ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1531235783Skib if (!ret) { 1532235783Skib /* Copy the new buffer offsets back to the user's exec list. */ 1533235783Skib ret = -copyout(exec2_list, (void *)(uintptr_t)args->buffers_ptr, 1534235783Skib sizeof(*exec2_list) * args->buffer_count); 1535235783Skib if (ret) { 1536235783Skib DRM_DEBUG("failed to copy %d exec entries " 1537235783Skib "back to user (%d)\n", 1538235783Skib args->buffer_count, ret); 1539235783Skib } 1540235783Skib } 1541235783Skib 1542235783Skib free(exec2_list, DRM_I915_GEM); 1543235783Skib return ret; 1544235783Skib} 1545