i915_gem_execbuffer.c revision 287177
1235783Skib/* 2235783Skib * Copyright �� 2008,2010 Intel Corporation 3235783Skib * 4235783Skib * Permission is hereby granted, free of charge, to any person obtaining a 5235783Skib * copy of this software and associated documentation files (the "Software"), 6235783Skib * to deal in the Software without restriction, including without limitation 7235783Skib * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8235783Skib * and/or sell copies of the Software, and to permit persons to whom the 9235783Skib * Software is furnished to do so, subject to the following conditions: 10235783Skib * 11235783Skib * The above copyright notice and this permission notice (including the next 12235783Skib * paragraph) shall be included in all copies or substantial portions of the 13235783Skib * Software. 14235783Skib * 15235783Skib * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16235783Skib * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17235783Skib * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18235783Skib * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19235783Skib * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20235783Skib * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21235783Skib * IN THE SOFTWARE. 22235783Skib * 23235783Skib * Authors: 24235783Skib * Eric Anholt <eric@anholt.net> 25235783Skib * Chris Wilson <chris@chris-wilson.co.uk> 26235783Skib * 27235783Skib */ 28235783Skib 29235783Skib#include <sys/cdefs.h> 30235783Skib__FBSDID("$FreeBSD: head/sys/dev/drm2/i915/i915_gem_execbuffer.c 287177 2015-08-26 22:19:53Z bapt $"); 31235783Skib 32235783Skib#include <dev/drm2/drmP.h> 33235783Skib#include <dev/drm2/drm.h> 34235783Skib#include <dev/drm2/i915/i915_drm.h> 35235783Skib#include <dev/drm2/i915/i915_drv.h> 36235783Skib#include <dev/drm2/i915/intel_drv.h> 37235783Skib#include <sys/limits.h> 38235783Skib#include <sys/sf_buf.h> 39235783Skib 40235783Skibstruct change_domains { 41235783Skib uint32_t invalidate_domains; 42235783Skib uint32_t flush_domains; 43235783Skib uint32_t flush_rings; 44235783Skib uint32_t flips; 45235783Skib}; 46235783Skib 47235783Skib/* 48235783Skib * Set the next domain for the specified object. This 49235783Skib * may not actually perform the necessary flushing/invaliding though, 50235783Skib * as that may want to be batched with other set_domain operations 51235783Skib * 52235783Skib * This is (we hope) the only really tricky part of gem. The goal 53235783Skib * is fairly simple -- track which caches hold bits of the object 54235783Skib * and make sure they remain coherent. A few concrete examples may 55235783Skib * help to explain how it works. For shorthand, we use the notation 56235783Skib * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 57235783Skib * a pair of read and write domain masks. 58235783Skib * 59235783Skib * Case 1: the batch buffer 60235783Skib * 61235783Skib * 1. Allocated 62235783Skib * 2. Written by CPU 63235783Skib * 3. Mapped to GTT 64235783Skib * 4. Read by GPU 65235783Skib * 5. Unmapped from GTT 66235783Skib * 6. Freed 67235783Skib * 68235783Skib * Let's take these a step at a time 69235783Skib * 70235783Skib * 1. Allocated 71235783Skib * Pages allocated from the kernel may still have 72235783Skib * cache contents, so we set them to (CPU, CPU) always. 73235783Skib * 2. Written by CPU (using pwrite) 74235783Skib * The pwrite function calls set_domain (CPU, CPU) and 75235783Skib * this function does nothing (as nothing changes) 76235783Skib * 3. Mapped by GTT 77235783Skib * This function asserts that the object is not 78235783Skib * currently in any GPU-based read or write domains 79235783Skib * 4. Read by GPU 80235783Skib * i915_gem_execbuffer calls set_domain (COMMAND, 0). 81235783Skib * As write_domain is zero, this function adds in the 82235783Skib * current read domains (CPU+COMMAND, 0). 83235783Skib * flush_domains is set to CPU. 84235783Skib * invalidate_domains is set to COMMAND 85235783Skib * clflush is run to get data out of the CPU caches 86235783Skib * then i915_dev_set_domain calls i915_gem_flush to 87235783Skib * emit an MI_FLUSH and drm_agp_chipset_flush 88235783Skib * 5. Unmapped from GTT 89235783Skib * i915_gem_object_unbind calls set_domain (CPU, CPU) 90235783Skib * flush_domains and invalidate_domains end up both zero 91235783Skib * so no flushing/invalidating happens 92235783Skib * 6. Freed 93235783Skib * yay, done 94235783Skib * 95235783Skib * Case 2: The shared render buffer 96235783Skib * 97235783Skib * 1. Allocated 98235783Skib * 2. Mapped to GTT 99235783Skib * 3. Read/written by GPU 100235783Skib * 4. set_domain to (CPU,CPU) 101235783Skib * 5. Read/written by CPU 102235783Skib * 6. Read/written by GPU 103235783Skib * 104235783Skib * 1. Allocated 105235783Skib * Same as last example, (CPU, CPU) 106235783Skib * 2. Mapped to GTT 107235783Skib * Nothing changes (assertions find that it is not in the GPU) 108235783Skib * 3. Read/written by GPU 109235783Skib * execbuffer calls set_domain (RENDER, RENDER) 110235783Skib * flush_domains gets CPU 111235783Skib * invalidate_domains gets GPU 112235783Skib * clflush (obj) 113235783Skib * MI_FLUSH and drm_agp_chipset_flush 114235783Skib * 4. set_domain (CPU, CPU) 115235783Skib * flush_domains gets GPU 116235783Skib * invalidate_domains gets CPU 117235783Skib * wait_rendering (obj) to make sure all drawing is complete. 118235783Skib * This will include an MI_FLUSH to get the data from GPU 119235783Skib * to memory 120235783Skib * clflush (obj) to invalidate the CPU cache 121235783Skib * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 122235783Skib * 5. Read/written by CPU 123235783Skib * cache lines are loaded and dirtied 124235783Skib * 6. Read written by GPU 125235783Skib * Same as last GPU access 126235783Skib * 127235783Skib * Case 3: The constant buffer 128235783Skib * 129235783Skib * 1. Allocated 130235783Skib * 2. Written by CPU 131235783Skib * 3. Read by GPU 132235783Skib * 4. Updated (written) by CPU again 133235783Skib * 5. Read by GPU 134235783Skib * 135235783Skib * 1. Allocated 136235783Skib * (CPU, CPU) 137235783Skib * 2. Written by CPU 138235783Skib * (CPU, CPU) 139235783Skib * 3. Read by GPU 140235783Skib * (CPU+RENDER, 0) 141235783Skib * flush_domains = CPU 142235783Skib * invalidate_domains = RENDER 143235783Skib * clflush (obj) 144235783Skib * MI_FLUSH 145235783Skib * drm_agp_chipset_flush 146235783Skib * 4. Updated (written) by CPU again 147235783Skib * (CPU, CPU) 148235783Skib * flush_domains = 0 (no previous write domain) 149235783Skib * invalidate_domains = 0 (no new read domains) 150235783Skib * 5. Read by GPU 151235783Skib * (CPU+RENDER, 0) 152235783Skib * flush_domains = CPU 153235783Skib * invalidate_domains = RENDER 154235783Skib * clflush (obj) 155235783Skib * MI_FLUSH 156235783Skib * drm_agp_chipset_flush 157235783Skib */ 158235783Skibstatic void 159235783Skibi915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, 160235783Skib struct intel_ring_buffer *ring, 161235783Skib struct change_domains *cd) 162235783Skib{ 163235783Skib uint32_t invalidate_domains = 0, flush_domains = 0; 164235783Skib 165235783Skib /* 166235783Skib * If the object isn't moving to a new write domain, 167235783Skib * let the object stay in multiple read domains 168235783Skib */ 169235783Skib if (obj->base.pending_write_domain == 0) 170235783Skib obj->base.pending_read_domains |= obj->base.read_domains; 171235783Skib 172235783Skib /* 173235783Skib * Flush the current write domain if 174235783Skib * the new read domains don't match. Invalidate 175235783Skib * any read domains which differ from the old 176235783Skib * write domain 177235783Skib */ 178235783Skib if (obj->base.write_domain && 179235783Skib (((obj->base.write_domain != obj->base.pending_read_domains || 180235783Skib obj->ring != ring)) || 181235783Skib (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) { 182235783Skib flush_domains |= obj->base.write_domain; 183235783Skib invalidate_domains |= 184235783Skib obj->base.pending_read_domains & ~obj->base.write_domain; 185235783Skib } 186235783Skib /* 187235783Skib * Invalidate any read caches which may have 188235783Skib * stale data. That is, any new read domains. 189235783Skib */ 190235783Skib invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains; 191235783Skib if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) 192235783Skib i915_gem_clflush_object(obj); 193235783Skib 194235783Skib if (obj->base.pending_write_domain) 195255013Sjkim cd->flips |= atomic_load_acq_int(&obj->pending_flip); 196235783Skib 197235783Skib /* The actual obj->write_domain will be updated with 198235783Skib * pending_write_domain after we emit the accumulated flush for all 199235783Skib * of our domain changes in execbuffers (which clears objects' 200235783Skib * write_domains). So if we have a current write domain that we 201235783Skib * aren't changing, set pending_write_domain to that. 202235783Skib */ 203235783Skib if (flush_domains == 0 && obj->base.pending_write_domain == 0) 204235783Skib obj->base.pending_write_domain = obj->base.write_domain; 205235783Skib 206235783Skib cd->invalidate_domains |= invalidate_domains; 207235783Skib cd->flush_domains |= flush_domains; 208235783Skib if (flush_domains & I915_GEM_GPU_DOMAINS) 209235783Skib cd->flush_rings |= intel_ring_flag(obj->ring); 210235783Skib if (invalidate_domains & I915_GEM_GPU_DOMAINS) 211235783Skib cd->flush_rings |= intel_ring_flag(ring); 212235783Skib} 213235783Skib 214235783Skibstruct eb_objects { 215235783Skib u_long hashmask; 216235783Skib LIST_HEAD(, drm_i915_gem_object) *buckets; 217235783Skib}; 218235783Skib 219235783Skibstatic struct eb_objects * 220235783Skibeb_create(int size) 221235783Skib{ 222235783Skib struct eb_objects *eb; 223235783Skib 224235783Skib eb = malloc(sizeof(*eb), DRM_I915_GEM, M_WAITOK | M_ZERO); 225235783Skib eb->buckets = hashinit(size, DRM_I915_GEM, &eb->hashmask); 226235783Skib return (eb); 227235783Skib} 228235783Skib 229235783Skibstatic void 230235783Skibeb_reset(struct eb_objects *eb) 231235783Skib{ 232235783Skib int i; 233235783Skib 234235783Skib for (i = 0; i <= eb->hashmask; i++) 235235783Skib LIST_INIT(&eb->buckets[i]); 236235783Skib} 237235783Skib 238235783Skibstatic void 239235783Skibeb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) 240235783Skib{ 241235783Skib 242235783Skib LIST_INSERT_HEAD(&eb->buckets[obj->exec_handle & eb->hashmask], 243235783Skib obj, exec_node); 244235783Skib} 245235783Skib 246235783Skibstatic struct drm_i915_gem_object * 247235783Skibeb_get_object(struct eb_objects *eb, unsigned long handle) 248235783Skib{ 249235783Skib struct drm_i915_gem_object *obj; 250235783Skib 251235783Skib LIST_FOREACH(obj, &eb->buckets[handle & eb->hashmask], exec_node) { 252235783Skib if (obj->exec_handle == handle) 253235783Skib return (obj); 254235783Skib } 255235783Skib return (NULL); 256235783Skib} 257235783Skib 258235783Skibstatic void 259235783Skibeb_destroy(struct eb_objects *eb) 260235783Skib{ 261235783Skib 262235783Skib free(eb->buckets, DRM_I915_GEM); 263235783Skib free(eb, DRM_I915_GEM); 264235783Skib} 265235783Skib 266277487Skibstatic inline int use_cpu_reloc(struct drm_i915_gem_object *obj) 267277487Skib{ 268277487Skib return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || 269277487Skib obj->cache_level != I915_CACHE_NONE); 270277487Skib} 271277487Skib 272235783Skibstatic int 273235783Skibi915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 274235783Skib struct eb_objects *eb, 275235783Skib struct drm_i915_gem_relocation_entry *reloc) 276235783Skib{ 277235783Skib struct drm_device *dev = obj->base.dev; 278235783Skib struct drm_gem_object *target_obj; 279277487Skib struct drm_i915_gem_object *target_i915_obj; 280235783Skib uint32_t target_offset; 281235783Skib int ret = -EINVAL; 282235783Skib 283235783Skib /* we've already hold a reference to all valid objects */ 284235783Skib target_obj = &eb_get_object(eb, reloc->target_handle)->base; 285235783Skib if (unlikely(target_obj == NULL)) 286235783Skib return -ENOENT; 287235783Skib 288277487Skib target_i915_obj = to_intel_bo(target_obj); 289277487Skib target_offset = target_i915_obj->gtt_offset; 290235783Skib 291235783Skib#if WATCH_RELOC 292235783Skib DRM_INFO("%s: obj %p offset %08x target %d " 293235783Skib "read %08x write %08x gtt %08x " 294235783Skib "presumed %08x delta %08x\n", 295235783Skib __func__, 296235783Skib obj, 297235783Skib (int) reloc->offset, 298235783Skib (int) reloc->target_handle, 299235783Skib (int) reloc->read_domains, 300235783Skib (int) reloc->write_domain, 301235783Skib (int) target_offset, 302235783Skib (int) reloc->presumed_offset, 303235783Skib reloc->delta); 304235783Skib#endif 305235783Skib 306235783Skib /* The target buffer should have appeared before us in the 307235783Skib * exec_object list, so it should have a GTT space bound by now. 308235783Skib */ 309235783Skib if (unlikely(target_offset == 0)) { 310235783Skib DRM_DEBUG("No GTT space found for object %d\n", 311235783Skib reloc->target_handle); 312235783Skib return ret; 313235783Skib } 314235783Skib 315235783Skib /* Validate that the target is in a valid r/w GPU domain */ 316235783Skib if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 317235783Skib DRM_DEBUG("reloc with multiple write domains: " 318235783Skib "obj %p target %d offset %d " 319235783Skib "read %08x write %08x", 320235783Skib obj, reloc->target_handle, 321235783Skib (int) reloc->offset, 322235783Skib reloc->read_domains, 323235783Skib reloc->write_domain); 324235783Skib return ret; 325235783Skib } 326235783Skib if (unlikely((reloc->write_domain | reloc->read_domains) 327235783Skib & ~I915_GEM_GPU_DOMAINS)) { 328235783Skib DRM_DEBUG("reloc with read/write non-GPU domains: " 329235783Skib "obj %p target %d offset %d " 330235783Skib "read %08x write %08x", 331235783Skib obj, reloc->target_handle, 332235783Skib (int) reloc->offset, 333235783Skib reloc->read_domains, 334235783Skib reloc->write_domain); 335235783Skib return ret; 336235783Skib } 337235783Skib if (unlikely(reloc->write_domain && target_obj->pending_write_domain && 338235783Skib reloc->write_domain != target_obj->pending_write_domain)) { 339235783Skib DRM_DEBUG("Write domain conflict: " 340235783Skib "obj %p target %d offset %d " 341235783Skib "new %08x old %08x\n", 342235783Skib obj, reloc->target_handle, 343235783Skib (int) reloc->offset, 344235783Skib reloc->write_domain, 345235783Skib target_obj->pending_write_domain); 346235783Skib return ret; 347235783Skib } 348235783Skib 349235783Skib target_obj->pending_read_domains |= reloc->read_domains; 350235783Skib target_obj->pending_write_domain |= reloc->write_domain; 351235783Skib 352235783Skib /* If the relocation already has the right value in it, no 353235783Skib * more work needs to be done. 354235783Skib */ 355235783Skib if (target_offset == reloc->presumed_offset) 356235783Skib return 0; 357235783Skib 358235783Skib /* Check that the relocation address is valid... */ 359235783Skib if (unlikely(reloc->offset > obj->base.size - 4)) { 360235783Skib DRM_DEBUG("Relocation beyond object bounds: " 361235783Skib "obj %p target %d offset %d size %d.\n", 362235783Skib obj, reloc->target_handle, 363235783Skib (int) reloc->offset, 364235783Skib (int) obj->base.size); 365235783Skib return ret; 366235783Skib } 367235783Skib if (unlikely(reloc->offset & 3)) { 368235783Skib DRM_DEBUG("Relocation not 4-byte aligned: " 369235783Skib "obj %p target %d offset %d.\n", 370235783Skib obj, reloc->target_handle, 371235783Skib (int) reloc->offset); 372235783Skib return ret; 373235783Skib } 374235783Skib 375277487Skib /* We can't wait for rendering with pagefaults disabled */ 376277487Skib if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0) 377277487Skib return (-EFAULT); 378277487Skib 379235783Skib reloc->delta += target_offset; 380277487Skib if (use_cpu_reloc(obj)) { 381235783Skib uint32_t page_offset = reloc->offset & PAGE_MASK; 382235783Skib char *vaddr; 383235783Skib struct sf_buf *sf; 384235783Skib 385277487Skib ret = i915_gem_object_set_to_cpu_domain(obj, 1); 386277487Skib if (ret) 387277487Skib return ret; 388277487Skib 389235783Skib sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)], 390235783Skib SFB_NOWAIT); 391235783Skib if (sf == NULL) 392235783Skib return (-ENOMEM); 393235783Skib vaddr = (void *)sf_buf_kva(sf); 394235783Skib *(uint32_t *)(vaddr + page_offset) = reloc->delta; 395235783Skib sf_buf_free(sf); 396235783Skib } else { 397235783Skib uint32_t *reloc_entry; 398235783Skib char *reloc_page; 399235783Skib 400277487Skib ret = i915_gem_object_set_to_gtt_domain(obj, true); 401235783Skib if (ret) 402235783Skib return ret; 403235783Skib 404277487Skib ret = i915_gem_object_put_fence(obj); 405277487Skib if (ret) 406277487Skib return ret; 407277487Skib 408287174Sbapt /* Map the page containing the relocation we're going to perform. */ 409235783Skib reloc->offset += obj->gtt_offset; 410235783Skib reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset & 411235783Skib ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING); 412235783Skib reloc_entry = (uint32_t *)(reloc_page + (reloc->offset & 413235783Skib PAGE_MASK)); 414235783Skib *(volatile uint32_t *)reloc_entry = reloc->delta; 415235783Skib pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE); 416235783Skib } 417235783Skib 418277487Skib /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and 419277487Skib * pipe_control writes because the gpu doesn't properly redirect them 420277487Skib * through the ppgtt for non_secure batchbuffers. */ 421277487Skib if (unlikely(IS_GEN6(dev) && 422277487Skib reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && 423277487Skib !target_i915_obj->has_global_gtt_mapping)) { 424277487Skib i915_gem_gtt_bind_object(target_i915_obj, 425277487Skib target_i915_obj->cache_level); 426277487Skib } 427277487Skib 428235783Skib /* and update the user's relocation entry */ 429235783Skib reloc->presumed_offset = target_offset; 430235783Skib 431235783Skib return 0; 432235783Skib} 433235783Skib 434235783Skibstatic int 435235783Skibi915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, 436287174Sbapt struct eb_objects *eb) 437235783Skib{ 438277487Skib#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 439277487Skib struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; 440235783Skib struct drm_i915_gem_relocation_entry *user_relocs; 441235783Skib struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 442277487Skib int remain, ret; 443235783Skib 444235783Skib user_relocs = (void *)(uintptr_t)entry->relocs_ptr; 445277487Skib remain = entry->relocation_count; 446277487Skib while (remain) { 447277487Skib struct drm_i915_gem_relocation_entry *r = stack_reloc; 448277487Skib int count = remain; 449277487Skib if (count > DRM_ARRAY_SIZE(stack_reloc)) 450277487Skib count = DRM_ARRAY_SIZE(stack_reloc); 451277487Skib remain -= count; 452235783Skib 453277487Skib ret = -copyin_nofault(user_relocs, r, count*sizeof(r[0])); 454235783Skib if (ret != 0) 455235783Skib return (ret); 456235783Skib 457277487Skib do { 458277487Skib u64 offset = r->presumed_offset; 459287174Sbapt 460277487Skib ret = i915_gem_execbuffer_relocate_entry(obj, eb, r); 461277487Skib if (ret) 462277487Skib return ret; 463277487Skib 464277487Skib if (r->presumed_offset != offset && 465277487Skib copyout_nofault(&r->presumed_offset, 466277487Skib &user_relocs->presumed_offset, 467277487Skib sizeof(r->presumed_offset))) { 468277487Skib return -EFAULT; 469277487Skib } 470277487Skib 471277487Skib user_relocs++; 472277487Skib r++; 473277487Skib } while (--count); 474235783Skib } 475287174Sbapt 476287174Sbapt return 0; 477277487Skib#undef N_RELOC 478235783Skib} 479235783Skib 480235783Skibstatic int 481235783Skibi915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, 482287174Sbapt struct eb_objects *eb, 483287174Sbapt struct drm_i915_gem_relocation_entry *relocs) 484235783Skib{ 485235783Skib const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 486235783Skib int i, ret; 487235783Skib 488235783Skib for (i = 0; i < entry->relocation_count; i++) { 489235783Skib ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); 490235783Skib if (ret) 491235783Skib return ret; 492235783Skib } 493235783Skib 494235783Skib return 0; 495235783Skib} 496235783Skib 497235783Skibstatic int 498235783Skibi915_gem_execbuffer_relocate(struct drm_device *dev, 499235783Skib struct eb_objects *eb, 500235783Skib struct list_head *objects) 501235783Skib{ 502235783Skib struct drm_i915_gem_object *obj; 503235783Skib int ret, pflags; 504235783Skib 505235783Skib /* Try to move as many of the relocation targets off the active list 506235783Skib * to avoid unnecessary fallbacks to the slow path, as we cannot wait 507235783Skib * for the retirement with pagefaults disabled. 508235783Skib */ 509235783Skib i915_gem_retire_requests(dev); 510235783Skib 511235783Skib ret = 0; 512235783Skib pflags = vm_fault_disable_pagefaults(); 513235783Skib /* This is the fast path and we cannot handle a pagefault whilst 514235783Skib * holding the device lock lest the user pass in the relocations 515235783Skib * contained within a mmaped bo. For in such a case we, the page 516235783Skib * fault handler would call i915_gem_fault() and we would try to 517235783Skib * acquire the device lock again. Obviously this is bad. 518235783Skib */ 519235783Skib 520235783Skib list_for_each_entry(obj, objects, exec_list) { 521235783Skib ret = i915_gem_execbuffer_relocate_object(obj, eb); 522287174Sbapt if (ret) 523235783Skib break; 524235783Skib } 525235783Skib vm_fault_enable_pagefaults(pflags); 526287174Sbapt 527287174Sbapt return ret; 528235783Skib} 529235783Skib 530235783Skib#define __EXEC_OBJECT_HAS_FENCE (1<<31) 531235783Skib 532235783Skibstatic int 533277487Skibneed_reloc_mappable(struct drm_i915_gem_object *obj) 534277487Skib{ 535277487Skib struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 536277487Skib return entry->relocation_count && !use_cpu_reloc(obj); 537277487Skib} 538277487Skib 539277487Skibstatic int 540235783Skibpin_and_fence_object(struct drm_i915_gem_object *obj, 541235783Skib struct intel_ring_buffer *ring) 542235783Skib{ 543235783Skib struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 544235783Skib bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 545235783Skib bool need_fence, need_mappable; 546235783Skib int ret; 547235783Skib 548235783Skib need_fence = 549235783Skib has_fenced_gpu_access && 550235783Skib entry->flags & EXEC_OBJECT_NEEDS_FENCE && 551235783Skib obj->tiling_mode != I915_TILING_NONE; 552277487Skib need_mappable = need_fence || need_reloc_mappable(obj); 553235783Skib 554235783Skib ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); 555235783Skib if (ret) 556235783Skib return ret; 557235783Skib 558235783Skib if (has_fenced_gpu_access) { 559235783Skib if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 560277487Skib ret = i915_gem_object_get_fence(obj); 561277487Skib if (ret) 562277487Skib goto err_unpin; 563235783Skib 564277487Skib if (i915_gem_object_pin_fence(obj)) 565235783Skib entry->flags |= __EXEC_OBJECT_HAS_FENCE; 566277487Skib 567235783Skib obj->pending_fenced_gpu_access = true; 568235783Skib } 569235783Skib } 570235783Skib 571235783Skib entry->offset = obj->gtt_offset; 572235783Skib return 0; 573235783Skib 574235783Skiberr_unpin: 575235783Skib i915_gem_object_unpin(obj); 576235783Skib return ret; 577235783Skib} 578235783Skib 579235783Skibstatic int 580235783Skibi915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, 581235783Skib struct drm_file *file, 582235783Skib struct list_head *objects) 583235783Skib{ 584235783Skib drm_i915_private_t *dev_priv; 585235783Skib struct drm_i915_gem_object *obj; 586287174Sbapt struct list_head ordered_objects; 587287174Sbapt bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 588235783Skib int ret, retry; 589235783Skib 590235783Skib dev_priv = ring->dev->dev_private; 591235783Skib INIT_LIST_HEAD(&ordered_objects); 592235783Skib while (!list_empty(objects)) { 593235783Skib struct drm_i915_gem_exec_object2 *entry; 594235783Skib bool need_fence, need_mappable; 595235783Skib 596235783Skib obj = list_first_entry(objects, 597235783Skib struct drm_i915_gem_object, 598235783Skib exec_list); 599235783Skib entry = obj->exec_entry; 600235783Skib 601235783Skib need_fence = 602235783Skib has_fenced_gpu_access && 603235783Skib entry->flags & EXEC_OBJECT_NEEDS_FENCE && 604235783Skib obj->tiling_mode != I915_TILING_NONE; 605277487Skib need_mappable = need_fence || need_reloc_mappable(obj); 606235783Skib 607235783Skib if (need_mappable) 608235783Skib list_move(&obj->exec_list, &ordered_objects); 609235783Skib else 610235783Skib list_move_tail(&obj->exec_list, &ordered_objects); 611235783Skib 612235783Skib obj->base.pending_read_domains = 0; 613235783Skib obj->base.pending_write_domain = 0; 614235783Skib } 615235783Skib list_splice(&ordered_objects, objects); 616235783Skib 617235783Skib /* Attempt to pin all of the buffers into the GTT. 618235783Skib * This is done in 3 phases: 619235783Skib * 620235783Skib * 1a. Unbind all objects that do not match the GTT constraints for 621235783Skib * the execbuffer (fenceable, mappable, alignment etc). 622287174Sbapt * 1b. Increment pin count for already bound objects. 623235783Skib * 2. Bind new objects. 624235783Skib * 3. Decrement pin count. 625235783Skib * 626287174Sbapt * This avoid unnecessary unbinding of later objects in order to make 627235783Skib * room for the earlier objects *unless* we need to defragment. 628235783Skib */ 629235783Skib retry = 0; 630235783Skib do { 631235783Skib ret = 0; 632235783Skib 633235783Skib /* Unbind any ill-fitting objects or pin. */ 634235783Skib list_for_each_entry(obj, objects, exec_list) { 635235783Skib struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 636235783Skib bool need_fence, need_mappable; 637235783Skib 638235783Skib if (!obj->gtt_space) 639235783Skib continue; 640235783Skib 641235783Skib need_fence = 642235783Skib has_fenced_gpu_access && 643235783Skib entry->flags & EXEC_OBJECT_NEEDS_FENCE && 644235783Skib obj->tiling_mode != I915_TILING_NONE; 645277487Skib need_mappable = need_fence || need_reloc_mappable(obj); 646235783Skib 647235783Skib if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || 648235783Skib (need_mappable && !obj->map_and_fenceable)) 649235783Skib ret = i915_gem_object_unbind(obj); 650235783Skib else 651235783Skib ret = pin_and_fence_object(obj, ring); 652235783Skib if (ret) 653235783Skib goto err; 654235783Skib } 655235783Skib 656235783Skib /* Bind fresh objects */ 657235783Skib list_for_each_entry(obj, objects, exec_list) { 658235783Skib if (obj->gtt_space) 659235783Skib continue; 660235783Skib 661235783Skib ret = pin_and_fence_object(obj, ring); 662235783Skib if (ret) { 663235783Skib int ret_ignore; 664235783Skib 665235783Skib /* This can potentially raise a harmless 666235783Skib * -EINVAL if we failed to bind in the above 667235783Skib * call. It cannot raise -EINTR since we know 668235783Skib * that the bo is freshly bound and so will 669235783Skib * not need to be flushed or waited upon. 670235783Skib */ 671235783Skib ret_ignore = i915_gem_object_unbind(obj); 672235783Skib (void)ret_ignore; 673235783Skib if (obj->gtt_space != NULL) 674235783Skib printf("%s: gtt_space\n", __func__); 675235783Skib break; 676235783Skib } 677235783Skib } 678235783Skib 679235783Skib /* Decrement pin count for bound objects */ 680235783Skib list_for_each_entry(obj, objects, exec_list) { 681235783Skib struct drm_i915_gem_exec_object2 *entry; 682235783Skib 683235783Skib if (!obj->gtt_space) 684235783Skib continue; 685235783Skib 686235783Skib entry = obj->exec_entry; 687235783Skib if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 688235783Skib i915_gem_object_unpin_fence(obj); 689235783Skib entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; 690235783Skib } 691235783Skib 692235783Skib i915_gem_object_unpin(obj); 693235783Skib 694235783Skib /* ... and ensure ppgtt mapping exist if needed. */ 695235783Skib if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { 696235783Skib i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 697235783Skib obj, obj->cache_level); 698235783Skib 699235783Skib obj->has_aliasing_ppgtt_mapping = 1; 700235783Skib } 701235783Skib } 702235783Skib 703235783Skib if (ret != -ENOSPC || retry > 1) 704235783Skib return ret; 705235783Skib 706235783Skib /* First attempt, just clear anything that is purgeable. 707235783Skib * Second attempt, clear the entire GTT. 708235783Skib */ 709235783Skib ret = i915_gem_evict_everything(ring->dev, retry == 0); 710235783Skib if (ret) 711235783Skib return ret; 712235783Skib 713235783Skib retry++; 714235783Skib } while (1); 715235783Skib 716235783Skiberr: 717235783Skib list_for_each_entry_continue_reverse(obj, objects, exec_list) { 718235783Skib struct drm_i915_gem_exec_object2 *entry; 719235783Skib 720235783Skib if (!obj->gtt_space) 721235783Skib continue; 722235783Skib 723235783Skib entry = obj->exec_entry; 724235783Skib if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 725235783Skib i915_gem_object_unpin_fence(obj); 726235783Skib entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; 727235783Skib } 728235783Skib 729235783Skib i915_gem_object_unpin(obj); 730235783Skib } 731235783Skib 732235783Skib return ret; 733235783Skib} 734235783Skib 735235783Skibstatic int 736235783Skibi915_gem_execbuffer_relocate_slow(struct drm_device *dev, 737287174Sbapt struct drm_file *file, 738287174Sbapt struct intel_ring_buffer *ring, 739287174Sbapt struct list_head *objects, 740287174Sbapt struct eb_objects *eb, 741287174Sbapt struct drm_i915_gem_exec_object2 *exec, 742287174Sbapt int count) 743235783Skib{ 744235783Skib struct drm_i915_gem_relocation_entry *reloc; 745235783Skib struct drm_i915_gem_object *obj; 746235783Skib int *reloc_offset; 747235783Skib int i, total, ret; 748235783Skib 749235783Skib /* We may process another execbuffer during the unlock... */ 750235783Skib while (!list_empty(objects)) { 751235783Skib obj = list_first_entry(objects, 752235783Skib struct drm_i915_gem_object, 753235783Skib exec_list); 754235783Skib list_del_init(&obj->exec_list); 755235783Skib drm_gem_object_unreference(&obj->base); 756235783Skib } 757235783Skib 758235783Skib DRM_UNLOCK(dev); 759235783Skib 760235783Skib total = 0; 761235783Skib for (i = 0; i < count; i++) 762235783Skib total += exec[i].relocation_count; 763235783Skib 764235783Skib reloc_offset = malloc(count * sizeof(*reloc_offset), DRM_I915_GEM, 765235783Skib M_WAITOK | M_ZERO); 766235783Skib reloc = malloc(total * sizeof(*reloc), DRM_I915_GEM, M_WAITOK | M_ZERO); 767235783Skib 768235783Skib total = 0; 769235783Skib for (i = 0; i < count; i++) { 770235783Skib struct drm_i915_gem_relocation_entry *user_relocs; 771235783Skib 772235783Skib user_relocs = (void *)(uintptr_t)exec[i].relocs_ptr; 773235783Skib ret = -copyin(user_relocs, reloc + total, 774235783Skib exec[i].relocation_count * sizeof(*reloc)); 775235783Skib if (ret != 0) { 776235783Skib DRM_LOCK(dev); 777235783Skib goto err; 778235783Skib } 779235783Skib 780235783Skib reloc_offset[i] = total; 781235783Skib total += exec[i].relocation_count; 782235783Skib } 783235783Skib 784235783Skib ret = i915_mutex_lock_interruptible(dev); 785235783Skib if (ret) { 786235783Skib DRM_LOCK(dev); 787235783Skib goto err; 788235783Skib } 789235783Skib 790235783Skib /* reacquire the objects */ 791235783Skib eb_reset(eb); 792235783Skib for (i = 0; i < count; i++) { 793235783Skib struct drm_i915_gem_object *obj; 794235783Skib 795235783Skib obj = to_intel_bo(drm_gem_object_lookup(dev, file, 796235783Skib exec[i].handle)); 797235783Skib if (&obj->base == NULL) { 798235783Skib DRM_DEBUG("Invalid object handle %d at index %d\n", 799235783Skib exec[i].handle, i); 800235783Skib ret = -ENOENT; 801235783Skib goto err; 802235783Skib } 803235783Skib 804235783Skib list_add_tail(&obj->exec_list, objects); 805235783Skib obj->exec_handle = exec[i].handle; 806235783Skib obj->exec_entry = &exec[i]; 807235783Skib eb_add_object(eb, obj); 808235783Skib } 809235783Skib 810235783Skib ret = i915_gem_execbuffer_reserve(ring, file, objects); 811235783Skib if (ret) 812235783Skib goto err; 813235783Skib 814235783Skib list_for_each_entry(obj, objects, exec_list) { 815235783Skib int offset = obj->exec_entry - exec; 816235783Skib ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, 817287174Sbapt reloc + reloc_offset[offset]); 818235783Skib if (ret) 819235783Skib goto err; 820235783Skib } 821235783Skib 822235783Skib /* Leave the user relocations as are, this is the painfully slow path, 823235783Skib * and we want to avoid the complication of dropping the lock whilst 824235783Skib * having buffers reserved in the aperture and so causing spurious 825235783Skib * ENOSPC for random operations. 826235783Skib */ 827235783Skib 828235783Skiberr: 829235783Skib free(reloc, DRM_I915_GEM); 830235783Skib free(reloc_offset, DRM_I915_GEM); 831235783Skib return ret; 832235783Skib} 833235783Skib 834235783Skibstatic int 835235783Skibi915_gem_execbuffer_flush(struct drm_device *dev, 836235783Skib uint32_t invalidate_domains, 837235783Skib uint32_t flush_domains, 838235783Skib uint32_t flush_rings) 839235783Skib{ 840235783Skib drm_i915_private_t *dev_priv = dev->dev_private; 841235783Skib int i, ret; 842235783Skib 843235783Skib if (flush_domains & I915_GEM_DOMAIN_CPU) 844235783Skib intel_gtt_chipset_flush(); 845235783Skib 846235783Skib if (flush_domains & I915_GEM_DOMAIN_GTT) 847235783Skib wmb(); 848235783Skib 849235783Skib if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { 850235783Skib for (i = 0; i < I915_NUM_RINGS; i++) 851235783Skib if (flush_rings & (1 << i)) { 852235783Skib ret = i915_gem_flush_ring(&dev_priv->rings[i], 853235783Skib invalidate_domains, flush_domains); 854235783Skib if (ret) 855235783Skib return ret; 856235783Skib } 857235783Skib } 858235783Skib 859235783Skib return 0; 860235783Skib} 861235783Skib 862235783Skibstatic int 863235783Skibi915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) 864235783Skib{ 865235783Skib u32 plane, flip_mask; 866235783Skib int ret; 867235783Skib 868235783Skib /* Check for any pending flips. As we only maintain a flip queue depth 869235783Skib * of 1, we can simply insert a WAIT for the next display flip prior 870235783Skib * to executing the batch and avoid stalling the CPU. 871235783Skib */ 872235783Skib 873235783Skib for (plane = 0; flips >> plane; plane++) { 874235783Skib if (((flips >> plane) & 1) == 0) 875235783Skib continue; 876235783Skib 877235783Skib if (plane) 878235783Skib flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; 879235783Skib else 880235783Skib flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; 881235783Skib 882235783Skib ret = intel_ring_begin(ring, 2); 883235783Skib if (ret) 884235783Skib return ret; 885235783Skib 886235783Skib intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); 887235783Skib intel_ring_emit(ring, MI_NOOP); 888235783Skib intel_ring_advance(ring); 889235783Skib } 890235783Skib 891235783Skib return 0; 892235783Skib} 893235783Skib 894235783Skibstatic int 895235783Skibi915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, 896235783Skib struct list_head *objects) 897235783Skib{ 898235783Skib struct drm_i915_gem_object *obj; 899235783Skib struct change_domains cd; 900235783Skib int ret; 901235783Skib 902235783Skib memset(&cd, 0, sizeof(cd)); 903235783Skib list_for_each_entry(obj, objects, exec_list) 904235783Skib i915_gem_object_set_to_gpu_domain(obj, ring, &cd); 905235783Skib 906235783Skib if (cd.invalidate_domains | cd.flush_domains) { 907235783Skib#if WATCH_EXEC 908235783Skib DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 909235783Skib __func__, 910235783Skib cd.invalidate_domains, 911235783Skib cd.flush_domains); 912235783Skib#endif 913235783Skib ret = i915_gem_execbuffer_flush(ring->dev, 914235783Skib cd.invalidate_domains, 915235783Skib cd.flush_domains, 916235783Skib cd.flush_rings); 917235783Skib if (ret) 918235783Skib return ret; 919235783Skib } 920235783Skib 921235783Skib if (cd.flips) { 922235783Skib ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips); 923235783Skib if (ret) 924235783Skib return ret; 925235783Skib } 926235783Skib 927235783Skib list_for_each_entry(obj, objects, exec_list) { 928277487Skib ret = i915_gem_object_sync(obj, ring); 929235783Skib if (ret) 930235783Skib return ret; 931235783Skib } 932235783Skib 933235783Skib return 0; 934235783Skib} 935235783Skib 936235783Skibstatic bool 937235783Skibi915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 938235783Skib{ 939235783Skib return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; 940235783Skib} 941235783Skib 942235783Skibstatic int 943235783Skibvalidate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count, 944235783Skib vm_page_t ***map) 945235783Skib{ 946235783Skib vm_page_t *ma; 947235783Skib int i, length, page_count; 948235783Skib 949235783Skib /* XXXKIB various limits checking is missing there */ 950235783Skib *map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO); 951235783Skib for (i = 0; i < count; i++) { 952235783Skib /* First check for malicious input causing overflow */ 953235783Skib if (exec[i].relocation_count > 954235783Skib INT_MAX / sizeof(struct drm_i915_gem_relocation_entry)) 955235783Skib return -EINVAL; 956235783Skib 957235783Skib length = exec[i].relocation_count * 958235783Skib sizeof(struct drm_i915_gem_relocation_entry); 959235783Skib if (length == 0) { 960235783Skib (*map)[i] = NULL; 961235783Skib continue; 962235783Skib } 963235783Skib /* 964235783Skib * Since both start and end of the relocation region 965235783Skib * may be not aligned on the page boundary, be 966235783Skib * conservative and request a page slot for each 967235783Skib * partial page. Thus +2. 968235783Skib */ 969235783Skib page_count = howmany(length, PAGE_SIZE) + 2; 970235783Skib ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t), 971235783Skib DRM_I915_GEM, M_WAITOK | M_ZERO); 972235783Skib if (vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 973235783Skib exec[i].relocs_ptr, length, VM_PROT_READ | VM_PROT_WRITE, 974235783Skib ma, page_count) == -1) { 975235783Skib free(ma, DRM_I915_GEM); 976235783Skib (*map)[i] = NULL; 977235783Skib return (-EFAULT); 978235783Skib } 979235783Skib } 980235783Skib 981235783Skib return 0; 982235783Skib} 983235783Skib 984235783Skibstatic void 985235783Skibi915_gem_execbuffer_move_to_active(struct list_head *objects, 986235783Skib struct intel_ring_buffer *ring, 987235783Skib u32 seqno) 988235783Skib{ 989235783Skib struct drm_i915_gem_object *obj; 990235783Skib uint32_t old_read, old_write; 991235783Skib 992235783Skib list_for_each_entry(obj, objects, exec_list) { 993235783Skib old_read = obj->base.read_domains; 994235783Skib old_write = obj->base.write_domain; 995235783Skib 996235783Skib obj->base.read_domains = obj->base.pending_read_domains; 997235783Skib obj->base.write_domain = obj->base.pending_write_domain; 998235783Skib obj->fenced_gpu_access = obj->pending_fenced_gpu_access; 999235783Skib 1000235783Skib i915_gem_object_move_to_active(obj, ring, seqno); 1001235783Skib if (obj->base.write_domain) { 1002235783Skib obj->dirty = 1; 1003235783Skib obj->pending_gpu_write = true; 1004235783Skib list_move_tail(&obj->gpu_write_list, 1005235783Skib &ring->gpu_write_list); 1006277487Skib if (obj->pin_count) /* check for potential scanout */ 1007277487Skib intel_mark_busy(ring->dev, obj); 1008235783Skib } 1009235783Skib CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x", 1010235783Skib obj, old_read, old_write); 1011235783Skib } 1012277487Skib 1013277487Skib intel_mark_busy(ring->dev, NULL); 1014235783Skib} 1015235783Skib 1016235783Skibint i915_gem_sync_exec_requests; 1017235783Skib 1018235783Skibstatic void 1019235783Skibi915_gem_execbuffer_retire_commands(struct drm_device *dev, 1020235783Skib struct drm_file *file, 1021235783Skib struct intel_ring_buffer *ring) 1022235783Skib{ 1023235783Skib struct drm_i915_gem_request *request; 1024235783Skib u32 invalidate; 1025235783Skib 1026235783Skib /* 1027235783Skib * Ensure that the commands in the batch buffer are 1028235783Skib * finished before the interrupt fires. 1029235783Skib * 1030235783Skib * The sampler always gets flushed on i965 (sigh). 1031235783Skib */ 1032235783Skib invalidate = I915_GEM_DOMAIN_COMMAND; 1033235783Skib if (INTEL_INFO(dev)->gen >= 4) 1034235783Skib invalidate |= I915_GEM_DOMAIN_SAMPLER; 1035235783Skib if (ring->flush(ring, invalidate, 0)) { 1036235783Skib i915_gem_next_request_seqno(ring); 1037235783Skib return; 1038235783Skib } 1039235783Skib 1040235783Skib /* Add a breadcrumb for the completion of the batch buffer */ 1041235783Skib request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO); 1042235783Skib if (request == NULL || i915_add_request(ring, file, request)) { 1043235783Skib i915_gem_next_request_seqno(ring); 1044235783Skib free(request, DRM_I915_GEM); 1045277487Skib } else if (i915_gem_sync_exec_requests) { 1046277487Skib i915_wait_request(ring, request->seqno); 1047277487Skib i915_gem_retire_requests(dev); 1048277487Skib } 1049235783Skib} 1050235783Skib 1051235783Skibstatic void 1052235783Skibi915_gem_fix_mi_batchbuffer_end(struct drm_i915_gem_object *batch_obj, 1053235783Skib uint32_t batch_start_offset, uint32_t batch_len) 1054235783Skib{ 1055235783Skib char *mkva; 1056235783Skib uint64_t po_r, po_w; 1057235783Skib uint32_t cmd; 1058235783Skib 1059235783Skib po_r = batch_obj->base.dev->agp->base + batch_obj->gtt_offset + 1060235783Skib batch_start_offset + batch_len; 1061235783Skib if (batch_len > 0) 1062235783Skib po_r -= 4; 1063235783Skib mkva = pmap_mapdev_attr(trunc_page(po_r), 2 * PAGE_SIZE, 1064235783Skib PAT_WRITE_COMBINING); 1065236182Skib po_r &= PAGE_MASK; 1066236182Skib cmd = *(uint32_t *)(mkva + po_r); 1067235783Skib 1068235783Skib if (cmd != MI_BATCH_BUFFER_END) { 1069235783Skib /* 1070235783Skib * batch_len != 0 due to the check at the start of 1071235783Skib * i915_gem_do_execbuffer 1072235783Skib */ 1073235783Skib if (batch_obj->base.size > batch_start_offset + batch_len) { 1074235783Skib po_w = po_r + 4; 1075235783Skib/* DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END !\n"); */ 1076235783Skib } else { 1077235783Skib po_w = po_r; 1078235783SkibDRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END, overwriting last bo cmd !\n"); 1079235783Skib } 1080236182Skib *(uint32_t *)(mkva + po_w) = MI_BATCH_BUFFER_END; 1081235783Skib } 1082235783Skib 1083235783Skib pmap_unmapdev((vm_offset_t)mkva, 2 * PAGE_SIZE); 1084235783Skib} 1085235783Skib 1086236183Skibint i915_fix_mi_batchbuffer_end = 0; 1087235783Skib 1088235783Skib static int 1089235783Skibi915_reset_gen7_sol_offsets(struct drm_device *dev, 1090235783Skib struct intel_ring_buffer *ring) 1091235783Skib{ 1092235783Skib drm_i915_private_t *dev_priv = dev->dev_private; 1093235783Skib int ret, i; 1094235783Skib 1095235783Skib if (!IS_GEN7(dev) || ring != &dev_priv->rings[RCS]) 1096235783Skib return 0; 1097235783Skib 1098235783Skib ret = intel_ring_begin(ring, 4 * 3); 1099235783Skib if (ret) 1100235783Skib return ret; 1101235783Skib 1102235783Skib for (i = 0; i < 4; i++) { 1103235783Skib intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1104235783Skib intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); 1105235783Skib intel_ring_emit(ring, 0); 1106235783Skib } 1107235783Skib 1108235783Skib intel_ring_advance(ring); 1109235783Skib 1110235783Skib return 0; 1111235783Skib} 1112235783Skib 1113235783Skibstatic int 1114235783Skibi915_gem_do_execbuffer(struct drm_device *dev, void *data, 1115235783Skib struct drm_file *file, 1116235783Skib struct drm_i915_gem_execbuffer2 *args, 1117235783Skib struct drm_i915_gem_exec_object2 *exec) 1118235783Skib{ 1119235783Skib drm_i915_private_t *dev_priv = dev->dev_private; 1120235783Skib struct list_head objects; 1121235783Skib struct eb_objects *eb; 1122235783Skib struct drm_i915_gem_object *batch_obj; 1123235783Skib struct drm_clip_rect *cliprects = NULL; 1124235783Skib struct intel_ring_buffer *ring; 1125235783Skib vm_page_t **relocs_ma; 1126271705Sdumbbell u32 ctx_id = i915_execbuffer2_get_context_id(*args); 1127235783Skib u32 exec_start, exec_len; 1128235783Skib u32 seqno; 1129235783Skib u32 mask; 1130235783Skib int ret, mode, i; 1131235783Skib 1132235783Skib if (!i915_gem_check_execbuffer(args)) { 1133235783Skib DRM_DEBUG("execbuf with invalid offset/length\n"); 1134235783Skib return -EINVAL; 1135235783Skib } 1136235783Skib 1137235783Skib if (args->batch_len == 0) 1138235783Skib return (0); 1139235783Skib 1140235783Skib ret = validate_exec_list(exec, args->buffer_count, &relocs_ma); 1141235783Skib if (ret != 0) 1142235783Skib goto pre_struct_lock_err; 1143235783Skib 1144235783Skib switch (args->flags & I915_EXEC_RING_MASK) { 1145235783Skib case I915_EXEC_DEFAULT: 1146235783Skib case I915_EXEC_RENDER: 1147235783Skib ring = &dev_priv->rings[RCS]; 1148235783Skib break; 1149235783Skib case I915_EXEC_BSD: 1150235783Skib ring = &dev_priv->rings[VCS]; 1151271705Sdumbbell if (ctx_id != 0) { 1152271705Sdumbbell DRM_DEBUG("Ring %s doesn't support contexts\n", 1153271705Sdumbbell ring->name); 1154271705Sdumbbell return -EPERM; 1155271705Sdumbbell } 1156235783Skib break; 1157235783Skib case I915_EXEC_BLT: 1158235783Skib ring = &dev_priv->rings[BCS]; 1159271705Sdumbbell if (ctx_id != 0) { 1160271705Sdumbbell DRM_DEBUG("Ring %s doesn't support contexts\n", 1161271705Sdumbbell ring->name); 1162271705Sdumbbell return -EPERM; 1163271705Sdumbbell } 1164235783Skib break; 1165235783Skib default: 1166235783Skib DRM_DEBUG("execbuf with unknown ring: %d\n", 1167235783Skib (int)(args->flags & I915_EXEC_RING_MASK)); 1168235783Skib ret = -EINVAL; 1169235783Skib goto pre_struct_lock_err; 1170235783Skib } 1171277487Skib if (!intel_ring_initialized(ring)) { 1172277487Skib DRM_DEBUG("execbuf with invalid ring: %d\n", 1173277487Skib (int)(args->flags & I915_EXEC_RING_MASK)); 1174277487Skib return -EINVAL; 1175277487Skib } 1176235783Skib 1177235783Skib mode = args->flags & I915_EXEC_CONSTANTS_MASK; 1178235783Skib mask = I915_EXEC_CONSTANTS_MASK; 1179235783Skib switch (mode) { 1180235783Skib case I915_EXEC_CONSTANTS_REL_GENERAL: 1181235783Skib case I915_EXEC_CONSTANTS_ABSOLUTE: 1182235783Skib case I915_EXEC_CONSTANTS_REL_SURFACE: 1183235783Skib if (ring == &dev_priv->rings[RCS] && 1184235783Skib mode != dev_priv->relative_constants_mode) { 1185235783Skib if (INTEL_INFO(dev)->gen < 4) { 1186235783Skib ret = -EINVAL; 1187235783Skib goto pre_struct_lock_err; 1188235783Skib } 1189235783Skib 1190235783Skib if (INTEL_INFO(dev)->gen > 5 && 1191235783Skib mode == I915_EXEC_CONSTANTS_REL_SURFACE) { 1192235783Skib ret = -EINVAL; 1193235783Skib goto pre_struct_lock_err; 1194235783Skib } 1195235783Skib 1196235783Skib /* The HW changed the meaning on this bit on gen6 */ 1197235783Skib if (INTEL_INFO(dev)->gen >= 6) 1198235783Skib mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; 1199235783Skib } 1200235783Skib break; 1201235783Skib default: 1202235783Skib DRM_DEBUG("execbuf with unknown constants: %d\n", mode); 1203235783Skib ret = -EINVAL; 1204235783Skib goto pre_struct_lock_err; 1205235783Skib } 1206235783Skib 1207235783Skib if (args->buffer_count < 1) { 1208235783Skib DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1209235783Skib ret = -EINVAL; 1210235783Skib goto pre_struct_lock_err; 1211235783Skib } 1212235783Skib 1213235783Skib if (args->num_cliprects != 0) { 1214235783Skib if (ring != &dev_priv->rings[RCS]) { 1215287174Sbapt DRM_DEBUG("clip rectangles are only valid with the render ring\n"); 1216235783Skib ret = -EINVAL; 1217235783Skib goto pre_struct_lock_err; 1218235783Skib } 1219235783Skib 1220277487Skib if (INTEL_INFO(dev)->gen >= 5) { 1221277487Skib DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); 1222277487Skib ret = -EINVAL; 1223277487Skib goto pre_struct_lock_err; 1224277487Skib } 1225277487Skib 1226235783Skib if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { 1227235783Skib DRM_DEBUG("execbuf with %u cliprects\n", 1228235783Skib args->num_cliprects); 1229235783Skib ret = -EINVAL; 1230235783Skib goto pre_struct_lock_err; 1231235783Skib } 1232235783Skib cliprects = malloc( sizeof(*cliprects) * args->num_cliprects, 1233235783Skib DRM_I915_GEM, M_WAITOK | M_ZERO); 1234235783Skib ret = -copyin((void *)(uintptr_t)args->cliprects_ptr, cliprects, 1235235783Skib sizeof(*cliprects) * args->num_cliprects); 1236235783Skib if (ret != 0) 1237235783Skib goto pre_struct_lock_err; 1238235783Skib } 1239235783Skib 1240235783Skib ret = i915_mutex_lock_interruptible(dev); 1241235783Skib if (ret) 1242235783Skib goto pre_struct_lock_err; 1243235783Skib 1244235783Skib if (dev_priv->mm.suspended) { 1245280183Sdumbbell DRM_UNLOCK(dev); 1246235783Skib ret = -EBUSY; 1247280183Sdumbbell goto pre_struct_lock_err; 1248235783Skib } 1249235783Skib 1250235783Skib eb = eb_create(args->buffer_count); 1251235783Skib if (eb == NULL) { 1252280183Sdumbbell DRM_UNLOCK(dev); 1253235783Skib ret = -ENOMEM; 1254280183Sdumbbell goto pre_struct_lock_err; 1255235783Skib } 1256235783Skib 1257235783Skib /* Look up object handles */ 1258235783Skib INIT_LIST_HEAD(&objects); 1259235783Skib for (i = 0; i < args->buffer_count; i++) { 1260235783Skib struct drm_i915_gem_object *obj; 1261287174Sbapt 1262235783Skib obj = to_intel_bo(drm_gem_object_lookup(dev, file, 1263235783Skib exec[i].handle)); 1264235783Skib if (&obj->base == NULL) { 1265235783Skib DRM_DEBUG("Invalid object handle %d at index %d\n", 1266235783Skib exec[i].handle, i); 1267235783Skib /* prevent error path from reading uninitialized data */ 1268235783Skib ret = -ENOENT; 1269235783Skib goto err; 1270235783Skib } 1271235783Skib 1272235783Skib if (!list_empty(&obj->exec_list)) { 1273235783Skib DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", 1274235783Skib obj, exec[i].handle, i); 1275235783Skib ret = -EINVAL; 1276235783Skib goto err; 1277235783Skib } 1278235783Skib 1279235783Skib list_add_tail(&obj->exec_list, &objects); 1280235783Skib obj->exec_handle = exec[i].handle; 1281235783Skib obj->exec_entry = &exec[i]; 1282235783Skib eb_add_object(eb, obj); 1283235783Skib } 1284235783Skib 1285235783Skib /* take note of the batch buffer before we might reorder the lists */ 1286235783Skib batch_obj = list_entry(objects.prev, 1287235783Skib struct drm_i915_gem_object, 1288235783Skib exec_list); 1289235783Skib 1290235783Skib /* Move the objects en-masse into the GTT, evicting if necessary. */ 1291235783Skib ret = i915_gem_execbuffer_reserve(ring, file, &objects); 1292235783Skib if (ret) 1293235783Skib goto err; 1294235783Skib 1295235783Skib /* The objects are in their final locations, apply the relocations. */ 1296235783Skib ret = i915_gem_execbuffer_relocate(dev, eb, &objects); 1297235783Skib if (ret) { 1298235783Skib if (ret == -EFAULT) { 1299235783Skib ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, 1300287174Sbapt &objects, eb, 1301287174Sbapt exec, 1302287174Sbapt args->buffer_count); 1303235783Skib DRM_LOCK_ASSERT(dev); 1304235783Skib } 1305235783Skib if (ret) 1306235783Skib goto err; 1307235783Skib } 1308235783Skib 1309235783Skib /* Set the pending read domains for the batch buffer to COMMAND */ 1310235783Skib if (batch_obj->base.pending_write_domain) { 1311235783Skib DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1312235783Skib ret = -EINVAL; 1313235783Skib goto err; 1314235783Skib } 1315235783Skib batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1316235783Skib 1317235783Skib ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); 1318235783Skib if (ret) 1319235783Skib goto err; 1320235783Skib 1321271705Sdumbbell ret = i915_switch_context(ring, file, ctx_id); 1322271705Sdumbbell if (ret) 1323271705Sdumbbell goto err; 1324271705Sdumbbell 1325235783Skib seqno = i915_gem_next_request_seqno(ring); 1326235783Skib for (i = 0; i < I915_NUM_RINGS - 1; i++) { 1327235783Skib if (seqno < ring->sync_seqno[i]) { 1328235783Skib /* The GPU can not handle its semaphore value wrapping, 1329235783Skib * so every billion or so execbuffers, we need to stall 1330235783Skib * the GPU in order to reset the counters. 1331235783Skib */ 1332277487Skib ret = i915_gpu_idle(dev); 1333235783Skib if (ret) 1334235783Skib goto err; 1335277487Skib i915_gem_retire_requests(dev); 1336235783Skib 1337235783Skib KASSERT(ring->sync_seqno[i] == 0, ("Non-zero sync_seqno")); 1338235783Skib } 1339235783Skib } 1340235783Skib 1341235783Skib if (ring == &dev_priv->rings[RCS] && 1342235783Skib mode != dev_priv->relative_constants_mode) { 1343235783Skib ret = intel_ring_begin(ring, 4); 1344235783Skib if (ret) 1345235783Skib goto err; 1346235783Skib 1347235783Skib intel_ring_emit(ring, MI_NOOP); 1348235783Skib intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1349235783Skib intel_ring_emit(ring, INSTPM); 1350235783Skib intel_ring_emit(ring, mask << 16 | mode); 1351235783Skib intel_ring_advance(ring); 1352235783Skib 1353235783Skib dev_priv->relative_constants_mode = mode; 1354235783Skib } 1355235783Skib 1356235783Skib if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1357235783Skib ret = i915_reset_gen7_sol_offsets(dev, ring); 1358235783Skib if (ret) 1359235783Skib goto err; 1360235783Skib } 1361235783Skib 1362235783Skib exec_start = batch_obj->gtt_offset + args->batch_start_offset; 1363235783Skib exec_len = args->batch_len; 1364235783Skib 1365235783Skib if (i915_fix_mi_batchbuffer_end) { 1366235783Skib i915_gem_fix_mi_batchbuffer_end(batch_obj, 1367235783Skib args->batch_start_offset, args->batch_len); 1368235783Skib } 1369235783Skib 1370235783Skib CTR4(KTR_DRM, "ring_dispatch %s %d exec %x %x", ring->name, seqno, 1371235783Skib exec_start, exec_len); 1372235783Skib 1373235783Skib if (cliprects) { 1374235783Skib for (i = 0; i < args->num_cliprects; i++) { 1375287177Sbapt ret = i915_emit_box(dev, &cliprects[i], 1376287174Sbapt args->DR1, args->DR4); 1377235783Skib if (ret) 1378235783Skib goto err; 1379235783Skib 1380287174Sbapt ret = ring->dispatch_execbuffer(ring, 1381287174Sbapt exec_start, exec_len); 1382235783Skib if (ret) 1383235783Skib goto err; 1384235783Skib } 1385235783Skib } else { 1386287174Sbapt ret = ring->dispatch_execbuffer(ring, 1387287174Sbapt exec_start, exec_len); 1388235783Skib if (ret) 1389235783Skib goto err; 1390235783Skib } 1391235783Skib 1392235783Skib i915_gem_execbuffer_move_to_active(&objects, ring, seqno); 1393235783Skib i915_gem_execbuffer_retire_commands(dev, file, ring); 1394235783Skib 1395235783Skiberr: 1396235783Skib eb_destroy(eb); 1397235783Skib while (!list_empty(&objects)) { 1398235783Skib struct drm_i915_gem_object *obj; 1399235783Skib 1400287174Sbapt obj = list_first_entry(&objects, 1401287174Sbapt struct drm_i915_gem_object, 1402287174Sbapt exec_list); 1403235783Skib list_del_init(&obj->exec_list); 1404235783Skib drm_gem_object_unreference(&obj->base); 1405235783Skib } 1406235783Skib DRM_UNLOCK(dev); 1407235783Skib 1408235783Skibpre_struct_lock_err: 1409235783Skib for (i = 0; i < args->buffer_count; i++) { 1410235783Skib if (relocs_ma[i] != NULL) { 1411235783Skib vm_page_unhold_pages(relocs_ma[i], howmany( 1412235783Skib exec[i].relocation_count * 1413235783Skib sizeof(struct drm_i915_gem_relocation_entry), 1414235783Skib PAGE_SIZE)); 1415235783Skib free(relocs_ma[i], DRM_I915_GEM); 1416235783Skib } 1417235783Skib } 1418235783Skib free(relocs_ma, DRM_I915_GEM); 1419235783Skib free(cliprects, DRM_I915_GEM); 1420235783Skib return ret; 1421235783Skib} 1422235783Skib 1423235783Skib/* 1424235783Skib * Legacy execbuffer just creates an exec2 list from the original exec object 1425235783Skib * list array and passes it to the real function. 1426235783Skib */ 1427235783Skibint 1428235783Skibi915_gem_execbuffer(struct drm_device *dev, void *data, 1429235783Skib struct drm_file *file) 1430235783Skib{ 1431235783Skib struct drm_i915_gem_execbuffer *args = data; 1432235783Skib struct drm_i915_gem_execbuffer2 exec2; 1433235783Skib struct drm_i915_gem_exec_object *exec_list = NULL; 1434235783Skib struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1435235783Skib int ret, i; 1436235783Skib 1437235783Skib DRM_DEBUG("buffers_ptr %d buffer_count %d len %08x\n", 1438235783Skib (int) args->buffers_ptr, args->buffer_count, args->batch_len); 1439235783Skib 1440235783Skib if (args->buffer_count < 1) { 1441235783Skib DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1442235783Skib return -EINVAL; 1443235783Skib } 1444235783Skib 1445235783Skib /* Copy in the exec list from userland */ 1446235783Skib /* XXXKIB user-controlled malloc size */ 1447235783Skib exec_list = malloc(sizeof(*exec_list) * args->buffer_count, 1448235783Skib DRM_I915_GEM, M_WAITOK); 1449235783Skib exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count, 1450235783Skib DRM_I915_GEM, M_WAITOK); 1451235783Skib ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec_list, 1452235783Skib sizeof(*exec_list) * args->buffer_count); 1453235783Skib if (ret != 0) { 1454235783Skib DRM_DEBUG("copy %d exec entries failed %d\n", 1455235783Skib args->buffer_count, ret); 1456235783Skib free(exec_list, DRM_I915_GEM); 1457235783Skib free(exec2_list, DRM_I915_GEM); 1458235783Skib return (ret); 1459235783Skib } 1460235783Skib 1461235783Skib for (i = 0; i < args->buffer_count; i++) { 1462235783Skib exec2_list[i].handle = exec_list[i].handle; 1463235783Skib exec2_list[i].relocation_count = exec_list[i].relocation_count; 1464235783Skib exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 1465235783Skib exec2_list[i].alignment = exec_list[i].alignment; 1466235783Skib exec2_list[i].offset = exec_list[i].offset; 1467235783Skib if (INTEL_INFO(dev)->gen < 4) 1468235783Skib exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 1469235783Skib else 1470235783Skib exec2_list[i].flags = 0; 1471235783Skib } 1472235783Skib 1473235783Skib exec2.buffers_ptr = args->buffers_ptr; 1474235783Skib exec2.buffer_count = args->buffer_count; 1475235783Skib exec2.batch_start_offset = args->batch_start_offset; 1476235783Skib exec2.batch_len = args->batch_len; 1477235783Skib exec2.DR1 = args->DR1; 1478235783Skib exec2.DR4 = args->DR4; 1479235783Skib exec2.num_cliprects = args->num_cliprects; 1480235783Skib exec2.cliprects_ptr = args->cliprects_ptr; 1481235783Skib exec2.flags = I915_EXEC_RENDER; 1482271705Sdumbbell i915_execbuffer2_set_context_id(exec2, 0); 1483235783Skib 1484235783Skib ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); 1485235783Skib if (!ret) { 1486235783Skib /* Copy the new buffer offsets back to the user's exec list. */ 1487235783Skib for (i = 0; i < args->buffer_count; i++) 1488235783Skib exec_list[i].offset = exec2_list[i].offset; 1489235783Skib /* ... and back out to userspace */ 1490235783Skib ret = -copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr, 1491235783Skib sizeof(*exec_list) * args->buffer_count); 1492235783Skib if (ret != 0) { 1493235783Skib DRM_DEBUG("failed to copy %d exec entries " 1494235783Skib "back to user (%d)\n", 1495235783Skib args->buffer_count, ret); 1496235783Skib } 1497235783Skib } 1498235783Skib 1499235783Skib free(exec_list, DRM_I915_GEM); 1500235783Skib free(exec2_list, DRM_I915_GEM); 1501235783Skib return ret; 1502235783Skib} 1503235783Skib 1504235783Skibint 1505235783Skibi915_gem_execbuffer2(struct drm_device *dev, void *data, 1506235783Skib struct drm_file *file) 1507235783Skib{ 1508235783Skib struct drm_i915_gem_execbuffer2 *args = data; 1509235783Skib struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1510235783Skib int ret; 1511235783Skib 1512235783Skib DRM_DEBUG("buffers_ptr %jx buffer_count %d len %08x\n", 1513235783Skib (uintmax_t)args->buffers_ptr, args->buffer_count, args->batch_len); 1514235783Skib 1515235783Skib if (args->buffer_count < 1 || 1516235783Skib args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { 1517235783Skib DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 1518235783Skib return -EINVAL; 1519235783Skib } 1520235783Skib 1521235783Skib /* XXXKIB user-controllable malloc size */ 1522235783Skib exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count, 1523235783Skib DRM_I915_GEM, M_WAITOK); 1524235783Skib ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec2_list, 1525235783Skib sizeof(*exec2_list) * args->buffer_count); 1526235783Skib if (ret != 0) { 1527235783Skib DRM_DEBUG("copy %d exec entries failed %d\n", 1528235783Skib args->buffer_count, ret); 1529235783Skib free(exec2_list, DRM_I915_GEM); 1530287174Sbapt return -EFAULT; 1531235783Skib } 1532235783Skib 1533235783Skib ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1534235783Skib if (!ret) { 1535235783Skib /* Copy the new buffer offsets back to the user's exec list. */ 1536235783Skib ret = -copyout(exec2_list, (void *)(uintptr_t)args->buffers_ptr, 1537235783Skib sizeof(*exec2_list) * args->buffer_count); 1538235783Skib if (ret) { 1539235783Skib DRM_DEBUG("failed to copy %d exec entries " 1540235783Skib "back to user (%d)\n", 1541235783Skib args->buffer_count, ret); 1542235783Skib } 1543235783Skib } 1544235783Skib 1545235783Skib free(exec2_list, DRM_I915_GEM); 1546235783Skib return ret; 1547235783Skib} 1548