1177867Sjfv/* 2169240Sjfv * Copyright �� 2008,2010 Intel Corporation 3190872Sjfv * 4169240Sjfv * Permission is hereby granted, free of charge, to any person obtaining a 5169240Sjfv * copy of this software and associated documentation files (the "Software"), 6169240Sjfv * to deal in the Software without restriction, including without limitation 7169240Sjfv * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8169240Sjfv * and/or sell copies of the Software, and to permit persons to whom the 9169240Sjfv * Software is furnished to do so, subject to the following conditions: 10169240Sjfv * 11169240Sjfv * The above copyright notice and this permission notice (including the next 12169240Sjfv * paragraph) shall be included in all copies or substantial portions of the 13169240Sjfv * Software. 14169240Sjfv * 15169240Sjfv * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16169240Sjfv * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17169240Sjfv * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18169240Sjfv * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19169240Sjfv * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20169240Sjfv * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21169240Sjfv * IN THE SOFTWARE. 22169240Sjfv * 23169240Sjfv * Authors: 24169240Sjfv * Eric Anholt <eric@anholt.net> 25169240Sjfv * Chris Wilson <chris@chris-wilson.co.uk> 26169240Sjfv * 27169240Sjfv */ 28169240Sjfv 29169240Sjfv#include <sys/cdefs.h> 30169240Sjfv__FBSDID("$FreeBSD$"); 31169240Sjfv 32177867Sjfv#include <dev/drm2/drmP.h> 33177867Sjfv#include <dev/drm2/drm.h> 34169240Sjfv#include <dev/drm2/i915/i915_drm.h> 35185353Sjfv#include <dev/drm2/i915/i915_drv.h> 36185353Sjfv#include <dev/drm2/i915/intel_drv.h> 37185353Sjfv#include <sys/limits.h> 38185353Sjfv#include <sys/sf_buf.h> 39185353Sjfv 40185353Sjfvstruct change_domains { 41185353Sjfv uint32_t invalidate_domains; 42169240Sjfv uint32_t flush_domains; 43169240Sjfv uint32_t flush_rings; 44169589Sjfv uint32_t flips; 45169240Sjfv}; 46177867Sjfv 47177867Sjfv/* 48177867Sjfv * Set the next domain for the specified object. This 49177867Sjfv * may not actually perform the necessary flushing/invaliding though, 50177867Sjfv * as that may want to be batched with other set_domain operations 51177867Sjfv * 52169240Sjfv * This is (we hope) the only really tricky part of gem. The goal 53177867Sjfv * is fairly simple -- track which caches hold bits of the object 54177867Sjfv * and make sure they remain coherent. A few concrete examples may 55177867Sjfv * help to explain how it works. For shorthand, we use the notation 56177867Sjfv * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 57177867Sjfv * a pair of read and write domain masks. 58173788Sjfv * 59177867Sjfv * Case 1: the batch buffer 60177867Sjfv * 61177867Sjfv * 1. Allocated 62169240Sjfv * 2. Written by CPU 63173788Sjfv * 3. Mapped to GTT 64169240Sjfv * 4. Read by GPU 65177867Sjfv * 5. Unmapped from GTT 66169240Sjfv * 6. Freed 67173788Sjfv * 68169240Sjfv * Let's take these a step at a time 69169240Sjfv * 70169240Sjfv * 1. Allocated 71169240Sjfv * Pages allocated from the kernel may still have 72169240Sjfv * cache contents, so we set them to (CPU, CPU) always. 73169240Sjfv * 2. Written by CPU (using pwrite) 74169240Sjfv * The pwrite function calls set_domain (CPU, CPU) and 75169240Sjfv * this function does nothing (as nothing changes) 76169240Sjfv * 3. Mapped by GTT 77169240Sjfv * This function asserts that the object is not 78169240Sjfv * currently in any GPU-based read or write domains 79169240Sjfv * 4. Read by GPU 80169240Sjfv * i915_gem_execbuffer calls set_domain (COMMAND, 0). 81169240Sjfv * As write_domain is zero, this function adds in the 82169589Sjfv * current read domains (CPU+COMMAND, 0). 83169240Sjfv * flush_domains is set to CPU. 84177867Sjfv * invalidate_domains is set to COMMAND 85169240Sjfv * clflush is run to get data out of the CPU caches 86169240Sjfv * then i915_dev_set_domain calls i915_gem_flush to 87169240Sjfv * emit an MI_FLUSH and drm_agp_chipset_flush 88169240Sjfv * 5. Unmapped from GTT 89169240Sjfv * i915_gem_object_unbind calls set_domain (CPU, CPU) 90169240Sjfv * flush_domains and invalidate_domains end up both zero 91177867Sjfv * so no flushing/invalidating happens 92177867Sjfv * 6. Freed 93177867Sjfv * yay, done 94177867Sjfv * 95169240Sjfv * Case 2: The shared render buffer 96169240Sjfv * 97177867Sjfv * 1. Allocated 98177867Sjfv * 2. Mapped to GTT 99177867Sjfv * 3. Read/written by GPU 100177867Sjfv * 4. set_domain to (CPU,CPU) 101177867Sjfv * 5. Read/written by CPU 102177867Sjfv * 6. Read/written by GPU 103177867Sjfv * 104177867Sjfv * 1. Allocated 105177867Sjfv * Same as last example, (CPU, CPU) 106177867Sjfv * 2. Mapped to GTT 107177867Sjfv * Nothing changes (assertions find that it is not in the GPU) 108169240Sjfv * 3. Read/written by GPU 109169240Sjfv * execbuffer calls set_domain (RENDER, RENDER) 110169240Sjfv * flush_domains gets CPU 111169240Sjfv * invalidate_domains gets GPU 112169240Sjfv * clflush (obj) 113169240Sjfv * MI_FLUSH and drm_agp_chipset_flush 114169240Sjfv * 4. set_domain (CPU, CPU) 115169240Sjfv * flush_domains gets GPU 116169240Sjfv * invalidate_domains gets CPU 117169240Sjfv * wait_rendering (obj) to make sure all drawing is complete. 118169240Sjfv * This will include an MI_FLUSH to get the data from GPU 119169240Sjfv * to memory 120169240Sjfv * clflush (obj) to invalidate the CPU cache 121169240Sjfv * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 122169240Sjfv * 5. Read/written by CPU 123169240Sjfv * cache lines are loaded and dirtied 124169240Sjfv * 6. Read written by GPU 125169589Sjfv * Same as last GPU access 126169240Sjfv * 127177867Sjfv * Case 3: The constant buffer 128169240Sjfv * 129169240Sjfv * 1. Allocated 130169240Sjfv * 2. Written by CPU 131169240Sjfv * 3. Read by GPU 132169240Sjfv * 4. Updated (written) by CPU again 133169240Sjfv * 5. Read by GPU 134169240Sjfv * 135169240Sjfv * 1. Allocated 136169240Sjfv * (CPU, CPU) 137169240Sjfv * 2. Written by CPU 138169240Sjfv * (CPU, CPU) 139169240Sjfv * 3. Read by GPU 140169240Sjfv * (CPU+RENDER, 0) 141169240Sjfv * flush_domains = CPU 142169240Sjfv * invalidate_domains = RENDER 143169240Sjfv * clflush (obj) 144169240Sjfv * MI_FLUSH 145169240Sjfv * drm_agp_chipset_flush 146169240Sjfv * 4. Updated (written) by CPU again 147169240Sjfv * (CPU, CPU) 148169240Sjfv * flush_domains = 0 (no previous write domain) 149169240Sjfv * invalidate_domains = 0 (no new read domains) 150169240Sjfv * 5. Read by GPU 151169240Sjfv * (CPU+RENDER, 0) 152169240Sjfv * flush_domains = CPU 153169240Sjfv * invalidate_domains = RENDER 154169240Sjfv * clflush (obj) 155169240Sjfv * MI_FLUSH 156169240Sjfv * drm_agp_chipset_flush 157169240Sjfv */ 158169240Sjfvstatic void 159169240Sjfvi915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, 160169240Sjfv struct intel_ring_buffer *ring, 161169240Sjfv struct change_domains *cd) 162169240Sjfv{ 163169240Sjfv uint32_t invalidate_domains = 0, flush_domains = 0; 164169240Sjfv 165169240Sjfv /* 166169240Sjfv * If the object isn't moving to a new write domain, 167169240Sjfv * let the object stay in multiple read domains 168169240Sjfv */ 169177867Sjfv if (obj->base.pending_write_domain == 0) 170177867Sjfv obj->base.pending_read_domains |= obj->base.read_domains; 171177867Sjfv 172177867Sjfv /* 173177867Sjfv * Flush the current write domain if 174177867Sjfv * the new read domains don't match. Invalidate 175177867Sjfv * any read domains which differ from the old 176169240Sjfv * write domain 177173788Sjfv */ 178173788Sjfv if (obj->base.write_domain && 179169240Sjfv (((obj->base.write_domain != obj->base.pending_read_domains || 180169240Sjfv obj->ring != ring)) || 181169240Sjfv (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) { 182169240Sjfv flush_domains |= obj->base.write_domain; 183169240Sjfv invalidate_domains |= 184177867Sjfv obj->base.pending_read_domains & ~obj->base.write_domain; 185169240Sjfv } 186169240Sjfv /* 187169240Sjfv * Invalidate any read caches which may have 188173788Sjfv * stale data. That is, any new read domains. 189173788Sjfv */ 190169240Sjfv invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains; 191169240Sjfv if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) 192169240Sjfv i915_gem_clflush_object(obj); 193169240Sjfv 194169240Sjfv if (obj->base.pending_write_domain) 195169240Sjfv cd->flips |= atomic_load_acq_int(&obj->pending_flip); 196169240Sjfv 197169240Sjfv /* The actual obj->write_domain will be updated with 198169240Sjfv * pending_write_domain after we emit the accumulated flush for all 199169240Sjfv * of our domain changes in execbuffers (which clears objects' 200169240Sjfv * write_domains). So if we have a current write domain that we 201169240Sjfv * aren't changing, set pending_write_domain to that. 202169240Sjfv */ 203169240Sjfv if (flush_domains == 0 && obj->base.pending_write_domain == 0) 204169240Sjfv obj->base.pending_write_domain = obj->base.write_domain; 205169240Sjfv 206177867Sjfv cd->invalidate_domains |= invalidate_domains; 207177867Sjfv cd->flush_domains |= flush_domains; 208177867Sjfv if (flush_domains & I915_GEM_GPU_DOMAINS) 209177867Sjfv cd->flush_rings |= intel_ring_flag(obj->ring); 210177867Sjfv if (invalidate_domains & I915_GEM_GPU_DOMAINS) 211177867Sjfv cd->flush_rings |= intel_ring_flag(ring); 212177867Sjfv} 213169240Sjfv 214169240Sjfvstruct eb_objects { 215169240Sjfv u_long hashmask; 216169240Sjfv LIST_HEAD(, drm_i915_gem_object) *buckets; 217169240Sjfv}; 218169240Sjfv 219169240Sjfvstatic struct eb_objects * 220169240Sjfveb_create(int size) 221169589Sjfv{ 222169240Sjfv struct eb_objects *eb; 223177867Sjfv 224169240Sjfv eb = malloc(sizeof(*eb), DRM_I915_GEM, M_WAITOK | M_ZERO); 225169240Sjfv eb->buckets = hashinit(size, DRM_I915_GEM, &eb->hashmask); 226169240Sjfv return (eb); 227169240Sjfv} 228169240Sjfv 229169240Sjfvstatic void 230173788Sjfveb_reset(struct eb_objects *eb) 231169240Sjfv{ 232169240Sjfv int i; 233169240Sjfv 234169240Sjfv for (i = 0; i <= eb->hashmask; i++) 235169240Sjfv LIST_INIT(&eb->buckets[i]); 236169240Sjfv} 237169240Sjfv 238169240Sjfvstatic void 239169240Sjfveb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) 240169240Sjfv{ 241177867Sjfv 242185353Sjfv LIST_INSERT_HEAD(&eb->buckets[obj->exec_handle & eb->hashmask], 243185353Sjfv obj, exec_node); 244169240Sjfv} 245177867Sjfv 246169240Sjfvstatic struct drm_i915_gem_object * 247177867Sjfveb_get_object(struct eb_objects *eb, unsigned long handle) 248169240Sjfv{ 249177867Sjfv struct drm_i915_gem_object *obj; 250169240Sjfv 251177867Sjfv LIST_FOREACH(obj, &eb->buckets[handle & eb->hashmask], exec_node) { 252169240Sjfv if (obj->exec_handle == handle) 253177867Sjfv return (obj); 254169240Sjfv } 255177867Sjfv return (NULL); 256169240Sjfv} 257177867Sjfv 258169240Sjfvstatic void 259177867Sjfveb_destroy(struct eb_objects *eb) 260169240Sjfv{ 261177867Sjfv 262169240Sjfv free(eb->buckets, DRM_I915_GEM); 263177867Sjfv free(eb, DRM_I915_GEM); 264190872Sjfv} 265190872Sjfv 266169240Sjfvstatic inline int use_cpu_reloc(struct drm_i915_gem_object *obj) 267177867Sjfv{ 268169240Sjfv return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || 269177867Sjfv obj->cache_level != I915_CACHE_NONE); 270169240Sjfv} 271177867Sjfv 272177867Sjfvstatic int 273169240Sjfvi915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 274177867Sjfv struct eb_objects *eb, 275169240Sjfv struct drm_i915_gem_relocation_entry *reloc) 276185353Sjfv{ 277169240Sjfv struct drm_device *dev = obj->base.dev; 278169240Sjfv struct drm_gem_object *target_obj; 279169240Sjfv struct drm_i915_gem_object *target_i915_obj; 280169240Sjfv uint32_t target_offset; 281169589Sjfv int ret = -EINVAL; 282169240Sjfv 283185353Sjfv /* we've already hold a reference to all valid objects */ 284169240Sjfv target_obj = &eb_get_object(eb, reloc->target_handle)->base; 285173788Sjfv if (unlikely(target_obj == NULL)) 286169240Sjfv return -ENOENT; 287169240Sjfv 288169240Sjfv target_i915_obj = to_intel_bo(target_obj); 289177867Sjfv target_offset = target_i915_obj->gtt_offset; 290177867Sjfv 291177867Sjfv#if WATCH_RELOC 292169240Sjfv DRM_INFO("%s: obj %p offset %08x target %d " 293169240Sjfv "read %08x write %08x gtt %08x " 294169240Sjfv "presumed %08x delta %08x\n", 295169240Sjfv __func__, 296169589Sjfv obj, 297169240Sjfv (int) reloc->offset, 298185353Sjfv (int) reloc->target_handle, 299169240Sjfv (int) reloc->read_domains, 300177867Sjfv (int) reloc->write_domain, 301169240Sjfv (int) target_offset, 302169240Sjfv (int) reloc->presumed_offset, 303169240Sjfv reloc->delta); 304169240Sjfv#endif 305169240Sjfv 306169240Sjfv /* The target buffer should have appeared before us in the 307169240Sjfv * exec_object list, so it should have a GTT space bound by now. 308169240Sjfv */ 309169240Sjfv if (unlikely(target_offset == 0)) { 310169240Sjfv DRM_DEBUG("No GTT space found for object %d\n", 311169240Sjfv reloc->target_handle); 312169240Sjfv return ret; 313173788Sjfv } 314173788Sjfv 315169240Sjfv /* Validate that the target is in a valid r/w GPU domain */ 316169240Sjfv if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 317169240Sjfv DRM_DEBUG("reloc with multiple write domains: " 318169240Sjfv "obj %p target %d offset %d " 319169240Sjfv "read %08x write %08x", 320169240Sjfv obj, reloc->target_handle, 321169240Sjfv (int) reloc->offset, 322169240Sjfv reloc->read_domains, 323169240Sjfv reloc->write_domain); 324169240Sjfv return ret; 325169240Sjfv } 326169240Sjfv if (unlikely((reloc->write_domain | reloc->read_domains) 327169240Sjfv & ~I915_GEM_GPU_DOMAINS)) { 328169240Sjfv DRM_DEBUG("reloc with read/write non-GPU domains: " 329169240Sjfv "obj %p target %d offset %d " 330169240Sjfv "read %08x write %08x", 331173788Sjfv obj, reloc->target_handle, 332173788Sjfv (int) reloc->offset, 333169240Sjfv reloc->read_domains, 334169240Sjfv reloc->write_domain); 335169240Sjfv return ret; 336169240Sjfv } 337169240Sjfv if (unlikely(reloc->write_domain && target_obj->pending_write_domain && 338169240Sjfv reloc->write_domain != target_obj->pending_write_domain)) { 339169240Sjfv DRM_DEBUG("Write domain conflict: " 340169240Sjfv "obj %p target %d offset %d " 341169240Sjfv "new %08x old %08x\n", 342169240Sjfv obj, reloc->target_handle, 343169240Sjfv (int) reloc->offset, 344169240Sjfv reloc->write_domain, 345169240Sjfv target_obj->pending_write_domain); 346169240Sjfv return ret; 347169240Sjfv } 348169240Sjfv 349169240Sjfv target_obj->pending_read_domains |= reloc->read_domains; 350169240Sjfv target_obj->pending_write_domain |= reloc->write_domain; 351169240Sjfv 352169240Sjfv /* If the relocation already has the right value in it, no 353169240Sjfv * more work needs to be done. 354169240Sjfv */ 355169240Sjfv if (target_offset == reloc->presumed_offset) 356169240Sjfv return 0; 357169240Sjfv 358169240Sjfv /* Check that the relocation address is valid... */ 359169240Sjfv if (unlikely(reloc->offset > obj->base.size - 4)) { 360169240Sjfv DRM_DEBUG("Relocation beyond object bounds: " 361169240Sjfv "obj %p target %d offset %d size %d.\n", 362169240Sjfv obj, reloc->target_handle, 363169240Sjfv (int) reloc->offset, 364169240Sjfv (int) obj->base.size); 365169240Sjfv return ret; 366169240Sjfv } 367169240Sjfv if (unlikely(reloc->offset & 3)) { 368169240Sjfv DRM_DEBUG("Relocation not 4-byte aligned: " 369169240Sjfv "obj %p target %d offset %d.\n", 370169240Sjfv obj, reloc->target_handle, 371169240Sjfv (int) reloc->offset); 372169240Sjfv return ret; 373169589Sjfv } 374169240Sjfv 375185353Sjfv /* We can't wait for rendering with pagefaults disabled */ 376169240Sjfv if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0) 377177867Sjfv return (-EFAULT); 378169240Sjfv 379169240Sjfv reloc->delta += target_offset; 380194865Sjfv if (use_cpu_reloc(obj)) { 381169240Sjfv uint32_t page_offset = reloc->offset & PAGE_MASK; 382169240Sjfv char *vaddr; 383169240Sjfv struct sf_buf *sf; 384169240Sjfv 385169240Sjfv ret = i915_gem_object_set_to_cpu_domain(obj, 1); 386169240Sjfv if (ret) 387190872Sjfv return ret; 388169240Sjfv 389169240Sjfv sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)], 390173788Sjfv SFB_NOWAIT); 391169240Sjfv if (sf == NULL) 392194865Sjfv return (-ENOMEM); 393194865Sjfv vaddr = (void *)sf_buf_kva(sf); 394194865Sjfv *(uint32_t *)(vaddr + page_offset) = reloc->delta; 395194865Sjfv sf_buf_free(sf); 396194865Sjfv } else { 397194865Sjfv uint32_t *reloc_entry; 398194865Sjfv char *reloc_page; 399169240Sjfv 400169240Sjfv ret = i915_gem_object_set_to_gtt_domain(obj, true); 401169240Sjfv if (ret) 402177867Sjfv return ret; 403169240Sjfv 404169240Sjfv ret = i915_gem_object_put_fence(obj); 405169240Sjfv if (ret) 406169240Sjfv return ret; 407169240Sjfv 408169240Sjfv /* 409169240Sjfv * Map the page containing the relocation we're going 410169240Sjfv * to perform. 411173788Sjfv */ 412173788Sjfv reloc->offset += obj->gtt_offset; 413169240Sjfv reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset & 414169240Sjfv ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING); 415169240Sjfv reloc_entry = (uint32_t *)(reloc_page + (reloc->offset & 416169240Sjfv PAGE_MASK)); 417169240Sjfv *(volatile uint32_t *)reloc_entry = reloc->delta; 418169240Sjfv pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE); 419169240Sjfv } 420169240Sjfv 421177867Sjfv /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and 422169240Sjfv * pipe_control writes because the gpu doesn't properly redirect them 423173788Sjfv * through the ppgtt for non_secure batchbuffers. */ 424169240Sjfv if (unlikely(IS_GEN6(dev) && 425169240Sjfv reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && 426173788Sjfv !target_i915_obj->has_global_gtt_mapping)) { 427169240Sjfv i915_gem_gtt_bind_object(target_i915_obj, 428173788Sjfv target_i915_obj->cache_level); 429173788Sjfv } 430169240Sjfv 431169240Sjfv /* and update the user's relocation entry */ 432169240Sjfv reloc->presumed_offset = target_offset; 433169240Sjfv 434169240Sjfv return 0; 435169240Sjfv} 436194865Sjfv 437169240Sjfvstatic int 438169240Sjfvi915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, 439169240Sjfv struct eb_objects *eb) 440169240Sjfv{ 441169240Sjfv#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 442169589Sjfv struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; 443169589Sjfv struct drm_i915_gem_relocation_entry *user_relocs; 444169589Sjfv struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 445169240Sjfv int remain, ret; 446169240Sjfv 447169240Sjfv user_relocs = (void *)(uintptr_t)entry->relocs_ptr; 448177867Sjfv remain = entry->relocation_count; 449173788Sjfv while (remain) { 450169240Sjfv struct drm_i915_gem_relocation_entry *r = stack_reloc; 451169240Sjfv int count = remain; 452169240Sjfv if (count > DRM_ARRAY_SIZE(stack_reloc)) 453169240Sjfv count = DRM_ARRAY_SIZE(stack_reloc); 454169240Sjfv remain -= count; 455169240Sjfv 456169240Sjfv ret = -copyin_nofault(user_relocs, r, count*sizeof(r[0])); 457169240Sjfv if (ret != 0) 458169240Sjfv return (ret); 459169240Sjfv 460169240Sjfv do { 461169240Sjfv u64 offset = r->presumed_offset; 462169240Sjfv 463169240Sjfv ret = i915_gem_execbuffer_relocate_entry(obj, eb, r); 464173788Sjfv if (ret) 465173788Sjfv return ret; 466169240Sjfv 467169240Sjfv if (r->presumed_offset != offset && 468169240Sjfv copyout_nofault(&r->presumed_offset, 469169240Sjfv &user_relocs->presumed_offset, 470177867Sjfv sizeof(r->presumed_offset))) { 471169240Sjfv return -EFAULT; 472169240Sjfv } 473169240Sjfv 474173788Sjfv user_relocs++; 475169240Sjfv r++; 476173788Sjfv } while (--count); 477177867Sjfv } 478169240Sjfv#undef N_RELOC 479169240Sjfv return (0); 480169240Sjfv} 481169240Sjfv 482169240Sjfvstatic int 483169240Sjfvi915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, 484169240Sjfv struct eb_objects *eb, struct drm_i915_gem_relocation_entry *relocs) 485169240Sjfv{ 486169240Sjfv const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 487169240Sjfv int i, ret; 488169240Sjfv 489169240Sjfv for (i = 0; i < entry->relocation_count; i++) { 490169240Sjfv ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); 491169240Sjfv if (ret) 492169240Sjfv return ret; 493169240Sjfv } 494169240Sjfv 495169240Sjfv return 0; 496169589Sjfv} 497169240Sjfv 498169240Sjfvstatic int 499169240Sjfvi915_gem_execbuffer_relocate(struct drm_device *dev, 500169240Sjfv struct eb_objects *eb, 501176667Sjfv struct list_head *objects) 502169240Sjfv{ 503177867Sjfv struct drm_i915_gem_object *obj; 504169240Sjfv int ret, pflags; 505169240Sjfv 506169240Sjfv /* Try to move as many of the relocation targets off the active list 507169240Sjfv * to avoid unnecessary fallbacks to the slow path, as we cannot wait 508169240Sjfv * for the retirement with pagefaults disabled. 509169240Sjfv */ 510169240Sjfv i915_gem_retire_requests(dev); 511169240Sjfv 512169240Sjfv ret = 0; 513169240Sjfv pflags = vm_fault_disable_pagefaults(); 514169240Sjfv /* This is the fast path and we cannot handle a pagefault whilst 515169240Sjfv * holding the device lock lest the user pass in the relocations 516169240Sjfv * contained within a mmaped bo. For in such a case we, the page 517169240Sjfv * fault handler would call i915_gem_fault() and we would try to 518169240Sjfv * acquire the device lock again. Obviously this is bad. 519169240Sjfv */ 520169240Sjfv 521169240Sjfv list_for_each_entry(obj, objects, exec_list) { 522169240Sjfv ret = i915_gem_execbuffer_relocate_object(obj, eb); 523169240Sjfv if (ret != 0) 524169240Sjfv break; 525169240Sjfv } 526169240Sjfv vm_fault_enable_pagefaults(pflags); 527169240Sjfv return (ret); 528169240Sjfv} 529169240Sjfv 530169589Sjfv#define __EXEC_OBJECT_HAS_FENCE (1<<31) 531169240Sjfv 532169240Sjfvstatic int 533169240Sjfvneed_reloc_mappable(struct drm_i915_gem_object *obj) 534169240Sjfv{ 535185353Sjfv struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 536169240Sjfv return entry->relocation_count && !use_cpu_reloc(obj); 537177867Sjfv} 538169240Sjfv 539169240Sjfvstatic int 540185353Sjfvpin_and_fence_object(struct drm_i915_gem_object *obj, 541169240Sjfv struct intel_ring_buffer *ring) 542169240Sjfv{ 543169240Sjfv struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 544169240Sjfv bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 545169240Sjfv bool need_fence, need_mappable; 546169240Sjfv int ret; 547169240Sjfv 548169240Sjfv need_fence = 549169240Sjfv has_fenced_gpu_access && 550169240Sjfv entry->flags & EXEC_OBJECT_NEEDS_FENCE && 551169240Sjfv obj->tiling_mode != I915_TILING_NONE; 552169240Sjfv need_mappable = need_fence || need_reloc_mappable(obj); 553169240Sjfv 554169240Sjfv ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); 555169240Sjfv if (ret) 556169240Sjfv return ret; 557173788Sjfv 558169240Sjfv if (has_fenced_gpu_access) { 559173788Sjfv if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 560169240Sjfv ret = i915_gem_object_get_fence(obj); 561169240Sjfv if (ret) 562169240Sjfv goto err_unpin; 563169240Sjfv 564169240Sjfv if (i915_gem_object_pin_fence(obj)) 565169240Sjfv entry->flags |= __EXEC_OBJECT_HAS_FENCE; 566169240Sjfv 567169240Sjfv obj->pending_fenced_gpu_access = true; 568169240Sjfv } 569169240Sjfv } 570169240Sjfv 571169240Sjfv entry->offset = obj->gtt_offset; 572169240Sjfv return 0; 573169240Sjfv 574169240Sjfverr_unpin: 575169240Sjfv i915_gem_object_unpin(obj); 576169240Sjfv return ret; 577169240Sjfv} 578169240Sjfv 579169240Sjfvstatic int 580169240Sjfvi915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, 581169240Sjfv struct drm_file *file, 582169240Sjfv struct list_head *objects) 583169240Sjfv{ 584169589Sjfv drm_i915_private_t *dev_priv; 585169240Sjfv struct drm_i915_gem_object *obj; 586169240Sjfv int ret, retry; 587185353Sjfv bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 588169240Sjfv struct list_head ordered_objects; 589177867Sjfv 590169240Sjfv dev_priv = ring->dev->dev_private; 591169240Sjfv INIT_LIST_HEAD(&ordered_objects); 592169240Sjfv while (!list_empty(objects)) { 593173788Sjfv struct drm_i915_gem_exec_object2 *entry; 594169240Sjfv bool need_fence, need_mappable; 595169240Sjfv 596169240Sjfv obj = list_first_entry(objects, 597173788Sjfv struct drm_i915_gem_object, 598173788Sjfv exec_list); 599169240Sjfv entry = obj->exec_entry; 600169240Sjfv 601169240Sjfv need_fence = 602169240Sjfv has_fenced_gpu_access && 603169240Sjfv entry->flags & EXEC_OBJECT_NEEDS_FENCE && 604169240Sjfv obj->tiling_mode != I915_TILING_NONE; 605169240Sjfv need_mappable = need_fence || need_reloc_mappable(obj); 606169240Sjfv 607169240Sjfv if (need_mappable) 608173788Sjfv list_move(&obj->exec_list, &ordered_objects); 609173788Sjfv else 610169240Sjfv list_move_tail(&obj->exec_list, &ordered_objects); 611169240Sjfv 612169240Sjfv obj->base.pending_read_domains = 0; 613169240Sjfv obj->base.pending_write_domain = 0; 614169240Sjfv } 615169240Sjfv list_splice(&ordered_objects, objects); 616169240Sjfv 617169240Sjfv /* Attempt to pin all of the buffers into the GTT. 618169240Sjfv * This is done in 3 phases: 619169240Sjfv * 620169240Sjfv * 1a. Unbind all objects that do not match the GTT constraints for 621169240Sjfv * the execbuffer (fenceable, mappable, alignment etc). 622169240Sjfv * 1b. Increment pin count for already bound objects and obtain 623169240Sjfv * a fence register if required. 624173788Sjfv * 2. Bind new objects. 625173788Sjfv * 3. Decrement pin count. 626173788Sjfv * 627173788Sjfv * This avoid unnecessary unbinding of later objects in order to makr 628169240Sjfv * room for the earlier objects *unless* we need to defragment. 629169240Sjfv */ 630173788Sjfv retry = 0; 631173788Sjfv do { 632169240Sjfv ret = 0; 633169240Sjfv 634169240Sjfv /* Unbind any ill-fitting objects or pin. */ 635169240Sjfv list_for_each_entry(obj, objects, exec_list) { 636169240Sjfv struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 637169240Sjfv bool need_fence, need_mappable; 638169240Sjfv 639169240Sjfv if (!obj->gtt_space) 640169240Sjfv continue; 641173788Sjfv 642173788Sjfv need_fence = 643169240Sjfv has_fenced_gpu_access && 644169240Sjfv entry->flags & EXEC_OBJECT_NEEDS_FENCE && 645169240Sjfv obj->tiling_mode != I915_TILING_NONE; 646169240Sjfv need_mappable = need_fence || need_reloc_mappable(obj); 647169240Sjfv 648173788Sjfv if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || 649173788Sjfv (need_mappable && !obj->map_and_fenceable)) 650169240Sjfv ret = i915_gem_object_unbind(obj); 651169240Sjfv else 652169240Sjfv ret = pin_and_fence_object(obj, ring); 653169240Sjfv if (ret) 654169240Sjfv goto err; 655169240Sjfv } 656169240Sjfv 657169240Sjfv /* Bind fresh objects */ 658169240Sjfv list_for_each_entry(obj, objects, exec_list) { 659169240Sjfv if (obj->gtt_space) 660169240Sjfv continue; 661169240Sjfv 662169240Sjfv ret = pin_and_fence_object(obj, ring); 663169240Sjfv if (ret) { 664169240Sjfv int ret_ignore; 665169589Sjfv 666169589Sjfv /* This can potentially raise a harmless 667169240Sjfv * -EINVAL if we failed to bind in the above 668169240Sjfv * call. It cannot raise -EINTR since we know 669169240Sjfv * that the bo is freshly bound and so will 670169240Sjfv * not need to be flushed or waited upon. 671169240Sjfv */ 672169240Sjfv ret_ignore = i915_gem_object_unbind(obj); 673169240Sjfv (void)ret_ignore; 674173788Sjfv if (obj->gtt_space != NULL) 675173788Sjfv printf("%s: gtt_space\n", __func__); 676169240Sjfv break; 677169240Sjfv } 678185353Sjfv } 679169240Sjfv 680169240Sjfv /* Decrement pin count for bound objects */ 681169240Sjfv list_for_each_entry(obj, objects, exec_list) { 682169240Sjfv struct drm_i915_gem_exec_object2 *entry; 683169240Sjfv 684169240Sjfv if (!obj->gtt_space) 685169240Sjfv continue; 686169240Sjfv 687169240Sjfv entry = obj->exec_entry; 688169240Sjfv if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 689169240Sjfv i915_gem_object_unpin_fence(obj); 690169240Sjfv entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; 691169240Sjfv } 692177867Sjfv 693169240Sjfv i915_gem_object_unpin(obj); 694169240Sjfv 695169240Sjfv /* ... and ensure ppgtt mapping exist if needed. */ 696169240Sjfv if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { 697169240Sjfv i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 698169240Sjfv obj, obj->cache_level); 699169240Sjfv 700169240Sjfv obj->has_aliasing_ppgtt_mapping = 1; 701169240Sjfv } 702169240Sjfv } 703177867Sjfv 704169240Sjfv if (ret != -ENOSPC || retry > 1) 705169240Sjfv return ret; 706169240Sjfv 707169240Sjfv /* First attempt, just clear anything that is purgeable. 708169240Sjfv * Second attempt, clear the entire GTT. 709169240Sjfv */ 710169240Sjfv ret = i915_gem_evict_everything(ring->dev, retry == 0); 711177867Sjfv if (ret) 712169240Sjfv return ret; 713169240Sjfv 714169240Sjfv retry++; 715169240Sjfv } while (1); 716169240Sjfv 717169240Sjfverr: 718169240Sjfv list_for_each_entry_continue_reverse(obj, objects, exec_list) { 719177867Sjfv struct drm_i915_gem_exec_object2 *entry; 720169240Sjfv 721169240Sjfv if (!obj->gtt_space) 722169240Sjfv continue; 723169240Sjfv 724169240Sjfv entry = obj->exec_entry; 725169240Sjfv if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 726169240Sjfv i915_gem_object_unpin_fence(obj); 727169240Sjfv entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; 728169240Sjfv } 729169240Sjfv 730169240Sjfv i915_gem_object_unpin(obj); 731169240Sjfv } 732169240Sjfv 733169240Sjfv return ret; 734169240Sjfv} 735177867Sjfv 736169240Sjfvstatic int 737169240Sjfvi915_gem_execbuffer_relocate_slow(struct drm_device *dev, 738169240Sjfv struct drm_file *file, struct intel_ring_buffer *ring, 739169240Sjfv struct list_head *objects, struct eb_objects *eb, 740169240Sjfv struct drm_i915_gem_exec_object2 *exec, int count) 741177867Sjfv{ 742169240Sjfv struct drm_i915_gem_relocation_entry *reloc; 743169240Sjfv struct drm_i915_gem_object *obj; 744169240Sjfv int *reloc_offset; 745169240Sjfv int i, total, ret; 746169240Sjfv 747169240Sjfv /* We may process another execbuffer during the unlock... */ 748169240Sjfv while (!list_empty(objects)) { 749169240Sjfv obj = list_first_entry(objects, 750169240Sjfv struct drm_i915_gem_object, 751177867Sjfv exec_list); 752169240Sjfv list_del_init(&obj->exec_list); 753169240Sjfv drm_gem_object_unreference(&obj->base); 754169240Sjfv } 755169240Sjfv 756169240Sjfv DRM_UNLOCK(dev); 757169240Sjfv 758169240Sjfv total = 0; 759169240Sjfv for (i = 0; i < count; i++) 760169240Sjfv total += exec[i].relocation_count; 761169240Sjfv 762169240Sjfv reloc_offset = malloc(count * sizeof(*reloc_offset), DRM_I915_GEM, 763169240Sjfv M_WAITOK | M_ZERO); 764169240Sjfv reloc = malloc(total * sizeof(*reloc), DRM_I915_GEM, M_WAITOK | M_ZERO); 765173788Sjfv 766173788Sjfv total = 0; 767173788Sjfv for (i = 0; i < count; i++) { 768173788Sjfv struct drm_i915_gem_relocation_entry *user_relocs; 769177867Sjfv 770169240Sjfv user_relocs = (void *)(uintptr_t)exec[i].relocs_ptr; 771169240Sjfv ret = -copyin(user_relocs, reloc + total, 772169240Sjfv exec[i].relocation_count * sizeof(*reloc)); 773169240Sjfv if (ret != 0) { 774169240Sjfv DRM_LOCK(dev); 775169240Sjfv goto err; 776177867Sjfv } 777169240Sjfv 778169240Sjfv reloc_offset[i] = total; 779169240Sjfv total += exec[i].relocation_count; 780169240Sjfv } 781169240Sjfv 782177867Sjfv ret = i915_mutex_lock_interruptible(dev); 783169240Sjfv if (ret) { 784169240Sjfv DRM_LOCK(dev); 785169240Sjfv goto err; 786169240Sjfv } 787169240Sjfv 788177867Sjfv /* reacquire the objects */ 789169240Sjfv eb_reset(eb); 790169240Sjfv for (i = 0; i < count; i++) { 791169240Sjfv struct drm_i915_gem_object *obj; 792169240Sjfv 793169240Sjfv obj = to_intel_bo(drm_gem_object_lookup(dev, file, 794169240Sjfv exec[i].handle)); 795169240Sjfv if (&obj->base == NULL) { 796169240Sjfv DRM_DEBUG("Invalid object handle %d at index %d\n", 797177867Sjfv exec[i].handle, i); 798169240Sjfv ret = -ENOENT; 799169240Sjfv goto err; 800169240Sjfv } 801169240Sjfv 802169240Sjfv list_add_tail(&obj->exec_list, objects); 803169240Sjfv obj->exec_handle = exec[i].handle; 804177867Sjfv obj->exec_entry = &exec[i]; 805169240Sjfv eb_add_object(eb, obj); 806169240Sjfv } 807169240Sjfv 808169240Sjfv ret = i915_gem_execbuffer_reserve(ring, file, objects); 809169240Sjfv if (ret) 810169240Sjfv goto err; 811169240Sjfv 812169240Sjfv list_for_each_entry(obj, objects, exec_list) { 813177867Sjfv int offset = obj->exec_entry - exec; 814169240Sjfv ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, 815169240Sjfv reloc + reloc_offset[offset]); 816169240Sjfv if (ret) 817169240Sjfv goto err; 818169240Sjfv } 819169240Sjfv 820169240Sjfv /* Leave the user relocations as are, this is the painfully slow path, 821169240Sjfv * and we want to avoid the complication of dropping the lock whilst 822169240Sjfv * having buffers reserved in the aperture and so causing spurious 823169240Sjfv * ENOSPC for random operations. 824169240Sjfv */ 825169240Sjfv 826169240Sjfverr: 827173788Sjfv free(reloc, DRM_I915_GEM); 828173788Sjfv free(reloc_offset, DRM_I915_GEM); 829173788Sjfv return ret; 830173788Sjfv} 831177867Sjfv 832169240Sjfvstatic int 833169240Sjfvi915_gem_execbuffer_flush(struct drm_device *dev, 834169240Sjfv uint32_t invalidate_domains, 835169240Sjfv uint32_t flush_domains, 836177867Sjfv uint32_t flush_rings) 837169240Sjfv{ 838169240Sjfv drm_i915_private_t *dev_priv = dev->dev_private; 839169240Sjfv int i, ret; 840169240Sjfv 841169240Sjfv if (flush_domains & I915_GEM_DOMAIN_CPU) 842177867Sjfv intel_gtt_chipset_flush(); 843169240Sjfv 844169240Sjfv if (flush_domains & I915_GEM_DOMAIN_GTT) 845169240Sjfv wmb(); 846169240Sjfv 847169240Sjfv if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { 848177867Sjfv for (i = 0; i < I915_NUM_RINGS; i++) 849169240Sjfv if (flush_rings & (1 << i)) { 850169240Sjfv ret = i915_gem_flush_ring(&dev_priv->rings[i], 851169240Sjfv invalidate_domains, flush_domains); 852169240Sjfv if (ret) 853169240Sjfv return ret; 854177867Sjfv } 855169240Sjfv } 856169240Sjfv 857169240Sjfv return 0; 858169240Sjfv} 859169240Sjfv 860169240Sjfvstatic int 861169240Sjfvi915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) 862169240Sjfv{ 863177867Sjfv u32 plane, flip_mask; 864169240Sjfv int ret; 865169240Sjfv 866169240Sjfv /* Check for any pending flips. As we only maintain a flip queue depth 867169240Sjfv * of 1, we can simply insert a WAIT for the next display flip prior 868169240Sjfv * to executing the batch and avoid stalling the CPU. 869169240Sjfv */ 870169240Sjfv 871169240Sjfv for (plane = 0; flips >> plane; plane++) { 872169240Sjfv if (((flips >> plane) & 1) == 0) 873169240Sjfv continue; 874169240Sjfv 875169240Sjfv if (plane) 876169240Sjfv flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; 877169589Sjfv else 878169240Sjfv flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; 879169240Sjfv 880169240Sjfv ret = intel_ring_begin(ring, 2); 881176667Sjfv if (ret) 882176667Sjfv return ret; 883169240Sjfv 884185353Sjfv intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); 885169240Sjfv intel_ring_emit(ring, MI_NOOP); 886177867Sjfv intel_ring_advance(ring); 887169240Sjfv } 888169240Sjfv 889169240Sjfv return 0; 890169240Sjfv} 891169240Sjfv 892169240Sjfvstatic int 893169240Sjfvi915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, 894169240Sjfv struct list_head *objects) 895169240Sjfv{ 896169240Sjfv struct drm_i915_gem_object *obj; 897169240Sjfv struct change_domains cd; 898169240Sjfv int ret; 899169240Sjfv 900169240Sjfv memset(&cd, 0, sizeof(cd)); 901169240Sjfv list_for_each_entry(obj, objects, exec_list) 902169240Sjfv i915_gem_object_set_to_gpu_domain(obj, ring, &cd); 903177867Sjfv 904169240Sjfv if (cd.invalidate_domains | cd.flush_domains) { 905169240Sjfv#if WATCH_EXEC 906169240Sjfv DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 907169240Sjfv __func__, 908169240Sjfv cd.invalidate_domains, 909169240Sjfv cd.flush_domains); 910169240Sjfv#endif 911169240Sjfv ret = i915_gem_execbuffer_flush(ring->dev, 912169240Sjfv cd.invalidate_domains, 913169240Sjfv cd.flush_domains, 914169240Sjfv cd.flush_rings); 915169240Sjfv if (ret) 916169240Sjfv return ret; 917169240Sjfv } 918169240Sjfv 919169240Sjfv if (cd.flips) { 920169240Sjfv ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips); 921169240Sjfv if (ret) 922169240Sjfv return ret; 923169240Sjfv } 924169240Sjfv 925169240Sjfv list_for_each_entry(obj, objects, exec_list) { 926169240Sjfv ret = i915_gem_object_sync(obj, ring); 927169240Sjfv if (ret) 928169240Sjfv return ret; 929169240Sjfv } 930169240Sjfv 931169240Sjfv return 0; 932169240Sjfv} 933169240Sjfv 934169240Sjfvstatic bool 935169240Sjfvi915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 936169240Sjfv{ 937169240Sjfv return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; 938169240Sjfv} 939169240Sjfv 940169240Sjfvstatic int 941169240Sjfvvalidate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count, 942169240Sjfv vm_page_t ***map, int **maplen) 943169240Sjfv{ 944169240Sjfv vm_page_t *ma; 945169240Sjfv int i, length, page_count; 946169240Sjfv 947169240Sjfv /* XXXKIB various limits checking is missing there */ 948169589Sjfv *map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO); 949169589Sjfv *maplen = malloc(count * sizeof(*maplen), DRM_I915_GEM, M_WAITOK | 950169240Sjfv M_ZERO); 951169240Sjfv for (i = 0; i < count; i++) { 952169240Sjfv /* First check for malicious input causing overflow */ 953169240Sjfv if (exec[i].relocation_count > 954177867Sjfv INT_MAX / sizeof(struct drm_i915_gem_relocation_entry)) 955169240Sjfv return -EINVAL; 956169240Sjfv 957169240Sjfv length = exec[i].relocation_count * 958185353Sjfv sizeof(struct drm_i915_gem_relocation_entry); 959169240Sjfv if (length == 0) { 960177867Sjfv (*map)[i] = NULL; 961169240Sjfv continue; 962169240Sjfv } 963169240Sjfv /* 964169240Sjfv * Since both start and end of the relocation region 965169240Sjfv * may be not aligned on the page boundary, be 966169240Sjfv * conservative and request a page slot for each 967169240Sjfv * partial page. Thus +2. 968169240Sjfv */ 969169240Sjfv page_count = howmany(length, PAGE_SIZE) + 2; 970169240Sjfv ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t), 971169240Sjfv DRM_I915_GEM, M_WAITOK | M_ZERO); 972169240Sjfv (*maplen)[i] = vm_fault_quick_hold_pages( 973169240Sjfv &curproc->p_vmspace->vm_map, exec[i].relocs_ptr, length, 974169240Sjfv VM_PROT_READ | VM_PROT_WRITE, ma, page_count); 975169240Sjfv if ((*maplen)[i] == -1) { 976169240Sjfv free(ma, DRM_I915_GEM); 977169240Sjfv (*map)[i] = NULL; 978177867Sjfv return (-EFAULT); 979169240Sjfv } 980169240Sjfv } 981169240Sjfv 982169240Sjfv return 0; 983169240Sjfv} 984177867Sjfv 985169240Sjfvstatic void 986169240Sjfvi915_gem_execbuffer_move_to_active(struct list_head *objects, 987169240Sjfv struct intel_ring_buffer *ring, 988173788Sjfv u32 seqno) 989173788Sjfv{ 990169240Sjfv struct drm_i915_gem_object *obj; 991169240Sjfv uint32_t old_read, old_write; 992173788Sjfv 993173788Sjfv list_for_each_entry(obj, objects, exec_list) { 994169240Sjfv old_read = obj->base.read_domains; 995177867Sjfv old_write = obj->base.write_domain; 996169240Sjfv 997169240Sjfv obj->base.read_domains = obj->base.pending_read_domains; 998169240Sjfv obj->base.write_domain = obj->base.pending_write_domain; 999169240Sjfv obj->fenced_gpu_access = obj->pending_fenced_gpu_access; 1000169240Sjfv 1001169240Sjfv i915_gem_object_move_to_active(obj, ring, seqno); 1002177867Sjfv if (obj->base.write_domain) { 1003169240Sjfv obj->dirty = 1; 1004169240Sjfv obj->pending_gpu_write = true; 1005169240Sjfv list_move_tail(&obj->gpu_write_list, 1006169240Sjfv &ring->gpu_write_list); 1007169240Sjfv if (obj->pin_count) /* check for potential scanout */ 1008177867Sjfv intel_mark_busy(ring->dev, obj); 1009169240Sjfv } 1010169240Sjfv CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x", 1011169240Sjfv obj, old_read, old_write); 1012169240Sjfv } 1013169240Sjfv 1014169240Sjfv intel_mark_busy(ring->dev, NULL); 1015177867Sjfv} 1016169240Sjfv 1017169240Sjfvint i915_gem_sync_exec_requests; 1018169240Sjfv 1019169240Sjfvstatic void 1020169240Sjfvi915_gem_execbuffer_retire_commands(struct drm_device *dev, 1021169240Sjfv struct drm_file *file, 1022169240Sjfv struct intel_ring_buffer *ring) 1023169240Sjfv{ 1024169240Sjfv struct drm_i915_gem_request *request; 1025177867Sjfv u32 invalidate; 1026169240Sjfv 1027169240Sjfv /* 1028169240Sjfv * Ensure that the commands in the batch buffer are 1029169240Sjfv * finished before the interrupt fires. 1030177867Sjfv * 1031169240Sjfv * The sampler always gets flushed on i965 (sigh). 1032169240Sjfv */ 1033169240Sjfv invalidate = I915_GEM_DOMAIN_COMMAND; 1034169240Sjfv if (INTEL_INFO(dev)->gen >= 4) 1035169240Sjfv invalidate |= I915_GEM_DOMAIN_SAMPLER; 1036169240Sjfv if (ring->flush(ring, invalidate, 0)) { 1037177867Sjfv i915_gem_next_request_seqno(ring); 1038169240Sjfv return; 1039169240Sjfv } 1040169240Sjfv 1041169240Sjfv /* Add a breadcrumb for the completion of the batch buffer */ 1042169240Sjfv request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO); 1043169240Sjfv if (request == NULL || i915_add_request(ring, file, request)) { 1044169240Sjfv i915_gem_next_request_seqno(ring); 1045169240Sjfv free(request, DRM_I915_GEM); 1046169240Sjfv } else if (i915_gem_sync_exec_requests) { 1047169240Sjfv i915_wait_request(ring, request->seqno); 1048169589Sjfv i915_gem_retire_requests(dev); 1049169240Sjfv } 1050169240Sjfv} 1051185353Sjfv 1052169240Sjfvstatic void 1053177867Sjfvi915_gem_fix_mi_batchbuffer_end(struct drm_i915_gem_object *batch_obj, 1054169240Sjfv uint32_t batch_start_offset, uint32_t batch_len) 1055185353Sjfv{ 1056169240Sjfv char *mkva; 1057169240Sjfv uint64_t po_r, po_w; 1058169240Sjfv uint32_t cmd; 1059169240Sjfv 1060177867Sjfv po_r = batch_obj->base.dev->agp->base + batch_obj->gtt_offset + 1061177867Sjfv batch_start_offset + batch_len; 1062177867Sjfv if (batch_len > 0) 1063169240Sjfv po_r -= 4; 1064169240Sjfv mkva = pmap_mapdev_attr(trunc_page(po_r), 2 * PAGE_SIZE, 1065169240Sjfv PAT_WRITE_COMBINING); 1066177867Sjfv po_r &= PAGE_MASK; 1067177867Sjfv cmd = *(uint32_t *)(mkva + po_r); 1068177867Sjfv 1069169240Sjfv if (cmd != MI_BATCH_BUFFER_END) { 1070169240Sjfv /* 1071169240Sjfv * batch_len != 0 due to the check at the start of 1072169240Sjfv * i915_gem_do_execbuffer 1073169240Sjfv */ 1074169240Sjfv if (batch_obj->base.size > batch_start_offset + batch_len) { 1075169240Sjfv po_w = po_r + 4; 1076169240Sjfv/* DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END !\n"); */ 1077169240Sjfv } else { 1078169240Sjfv po_w = po_r; 1079169240SjfvDRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END, overwriting last bo cmd !\n"); 1080169240Sjfv } 1081169589Sjfv *(uint32_t *)(mkva + po_w) = MI_BATCH_BUFFER_END; 1082169240Sjfv } 1083169240Sjfv 1084185353Sjfv pmap_unmapdev((vm_offset_t)mkva, 2 * PAGE_SIZE); 1085169240Sjfv} 1086177867Sjfv 1087169240Sjfvint i915_fix_mi_batchbuffer_end = 0; 1088185353Sjfv 1089169240Sjfv static int 1090169240Sjfvi915_reset_gen7_sol_offsets(struct drm_device *dev, 1091169240Sjfv struct intel_ring_buffer *ring) 1092169240Sjfv{ 1093177867Sjfv drm_i915_private_t *dev_priv = dev->dev_private; 1094177867Sjfv int ret, i; 1095177867Sjfv 1096169240Sjfv if (!IS_GEN7(dev) || ring != &dev_priv->rings[RCS]) 1097169240Sjfv return 0; 1098169240Sjfv 1099169240Sjfv ret = intel_ring_begin(ring, 4 * 3); 1100169240Sjfv if (ret) 1101169240Sjfv return ret; 1102169240Sjfv 1103169240Sjfv for (i = 0; i < 4; i++) { 1104169240Sjfv intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1105169240Sjfv intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); 1106169240Sjfv intel_ring_emit(ring, 0); 1107169589Sjfv } 1108169240Sjfv 1109169240Sjfv intel_ring_advance(ring); 1110169240Sjfv 1111173788Sjfv return 0; 1112169240Sjfv} 1113185353Sjfv 1114169240Sjfvstatic int 1115169240Sjfvi915_gem_do_execbuffer(struct drm_device *dev, void *data, 1116169240Sjfv struct drm_file *file, 1117169240Sjfv struct drm_i915_gem_execbuffer2 *args, 1118169240Sjfv struct drm_i915_gem_exec_object2 *exec) 1119169240Sjfv{ 1120169240Sjfv drm_i915_private_t *dev_priv = dev->dev_private; 1121169240Sjfv struct list_head objects; 1122169240Sjfv struct eb_objects *eb; 1123169240Sjfv struct drm_i915_gem_object *batch_obj; 1124169240Sjfv struct drm_clip_rect *cliprects = NULL; 1125169240Sjfv struct intel_ring_buffer *ring; 1126169240Sjfv vm_page_t **relocs_ma; 1127173788Sjfv int *relocs_len; 1128173788Sjfv u32 ctx_id = i915_execbuffer2_get_context_id(*args); 1129173788Sjfv u32 exec_start, exec_len; 1130173788Sjfv u32 seqno; 1131177867Sjfv u32 mask; 1132169240Sjfv int ret, mode, i; 1133169240Sjfv 1134177867Sjfv if (!i915_gem_check_execbuffer(args)) { 1135169240Sjfv DRM_DEBUG("execbuf with invalid offset/length\n"); 1136169240Sjfv return -EINVAL; 1137169240Sjfv } 1138177867Sjfv 1139169240Sjfv if (args->batch_len == 0) 1140169240Sjfv return (0); 1141169240Sjfv 1142169240Sjfv ret = validate_exec_list(exec, args->buffer_count, &relocs_ma, 1143169240Sjfv &relocs_len); 1144169240Sjfv if (ret != 0) 1145177867Sjfv goto pre_struct_lock_err; 1146169240Sjfv 1147177867Sjfv switch (args->flags & I915_EXEC_RING_MASK) { 1148169240Sjfv case I915_EXEC_DEFAULT: 1149177867Sjfv case I915_EXEC_RENDER: 1150169240Sjfv ring = &dev_priv->rings[RCS]; 1151177867Sjfv break; 1152169240Sjfv case I915_EXEC_BSD: 1153177867Sjfv ring = &dev_priv->rings[VCS]; 1154169240Sjfv if (ctx_id != 0) { 1155177867Sjfv DRM_DEBUG("Ring %s doesn't support contexts\n", 1156169240Sjfv ring->name); 1157177867Sjfv ret = -EPERM; 1158169240Sjfv goto pre_struct_lock_err; 1159177867Sjfv } 1160169240Sjfv break; 1161177867Sjfv case I915_EXEC_BLT: 1162169240Sjfv ring = &dev_priv->rings[BCS]; 1163169240Sjfv if (ctx_id != 0) { 1164169240Sjfv DRM_DEBUG("Ring %s doesn't support contexts\n", 1165177867Sjfv ring->name); 1166169240Sjfv ret = -EPERM; 1167169240Sjfv goto pre_struct_lock_err; 1168169240Sjfv } 1169169240Sjfv break; 1170169240Sjfv default: 1171177867Sjfv DRM_DEBUG("execbuf with unknown ring: %d\n", 1172169240Sjfv (int)(args->flags & I915_EXEC_RING_MASK)); 1173169240Sjfv ret = -EINVAL; 1174169240Sjfv goto pre_struct_lock_err; 1175169240Sjfv } 1176177867Sjfv if (!intel_ring_initialized(ring)) { 1177169240Sjfv DRM_DEBUG("execbuf with invalid ring: %d\n", 1178169240Sjfv (int)(args->flags & I915_EXEC_RING_MASK)); 1179169240Sjfv ret = -EINVAL; 1180169240Sjfv goto pre_struct_lock_err; 1181169240Sjfv } 1182177867Sjfv 1183169240Sjfv mode = args->flags & I915_EXEC_CONSTANTS_MASK; 1184169240Sjfv mask = I915_EXEC_CONSTANTS_MASK; 1185169240Sjfv switch (mode) { 1186169240Sjfv case I915_EXEC_CONSTANTS_REL_GENERAL: 1187177867Sjfv case I915_EXEC_CONSTANTS_ABSOLUTE: 1188169240Sjfv case I915_EXEC_CONSTANTS_REL_SURFACE: 1189169240Sjfv if (ring == &dev_priv->rings[RCS] && 1190169240Sjfv mode != dev_priv->relative_constants_mode) { 1191169240Sjfv if (INTEL_INFO(dev)->gen < 4) { 1192169240Sjfv ret = -EINVAL; 1193169240Sjfv goto pre_struct_lock_err; 1194169240Sjfv } 1195169240Sjfv 1196169240Sjfv if (INTEL_INFO(dev)->gen > 5 && 1197169240Sjfv mode == I915_EXEC_CONSTANTS_REL_SURFACE) { 1198169240Sjfv ret = -EINVAL; 1199169240Sjfv goto pre_struct_lock_err; 1200169240Sjfv } 1201169240Sjfv 1202169240Sjfv /* The HW changed the meaning on this bit on gen6 */ 1203169240Sjfv if (INTEL_INFO(dev)->gen >= 6) 1204169240Sjfv mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; 1205177867Sjfv } 1206169240Sjfv break; 1207169240Sjfv default: 1208177867Sjfv DRM_DEBUG("execbuf with unknown constants: %d\n", mode); 1209169240Sjfv ret = -EINVAL; 1210169240Sjfv goto pre_struct_lock_err; 1211169240Sjfv } 1212169240Sjfv 1213169240Sjfv if (args->buffer_count < 1) { 1214169240Sjfv DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1215169240Sjfv ret = -EINVAL; 1216169240Sjfv goto pre_struct_lock_err; 1217169240Sjfv } 1218169240Sjfv 1219169240Sjfv if (args->num_cliprects != 0) { 1220169589Sjfv if (ring != &dev_priv->rings[RCS]) { 1221169589Sjfv DRM_DEBUG("clip rectangles are only valid with the render ring\n"); 1222169240Sjfv ret = -EINVAL; 1223169240Sjfv goto pre_struct_lock_err; 1224185353Sjfv } 1225169240Sjfv 1226173788Sjfv if (INTEL_INFO(dev)->gen >= 5) { 1227169240Sjfv DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); 1228185353Sjfv ret = -EINVAL; 1229169240Sjfv goto pre_struct_lock_err; 1230169240Sjfv } 1231169240Sjfv 1232169240Sjfv if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { 1233169240Sjfv DRM_DEBUG("execbuf with %u cliprects\n", 1234169240Sjfv args->num_cliprects); 1235169240Sjfv ret = -EINVAL; 1236169240Sjfv goto pre_struct_lock_err; 1237169240Sjfv } 1238169240Sjfv cliprects = malloc( sizeof(*cliprects) * args->num_cliprects, 1239169240Sjfv DRM_I915_GEM, M_WAITOK | M_ZERO); 1240169240Sjfv ret = -copyin((void *)(uintptr_t)args->cliprects_ptr, cliprects, 1241169240Sjfv sizeof(*cliprects) * args->num_cliprects); 1242169240Sjfv if (ret != 0) 1243169240Sjfv goto pre_struct_lock_err; 1244173788Sjfv } 1245173788Sjfv 1246173788Sjfv ret = i915_mutex_lock_interruptible(dev); 1247173788Sjfv if (ret) 1248173788Sjfv goto pre_struct_lock_err; 1249173788Sjfv 1250177867Sjfv if (dev_priv->mm.suspended) { 1251173788Sjfv DRM_UNLOCK(dev); 1252173788Sjfv ret = -EBUSY; 1253173788Sjfv goto pre_struct_lock_err; 1254173788Sjfv } 1255173788Sjfv 1256173788Sjfv eb = eb_create(args->buffer_count); 1257173788Sjfv if (eb == NULL) { 1258173788Sjfv DRM_UNLOCK(dev); 1259173788Sjfv ret = -ENOMEM; 1260169240Sjfv goto pre_struct_lock_err; 1261169589Sjfv } 1262169240Sjfv 1263169240Sjfv /* Look up object handles */ 1264169240Sjfv INIT_LIST_HEAD(&objects); 1265177867Sjfv for (i = 0; i < args->buffer_count; i++) { 1266169240Sjfv struct drm_i915_gem_object *obj; 1267169240Sjfv obj = to_intel_bo(drm_gem_object_lookup(dev, file, 1268169240Sjfv exec[i].handle)); 1269169240Sjfv if (&obj->base == NULL) { 1270169240Sjfv DRM_DEBUG("Invalid object handle %d at index %d\n", 1271185353Sjfv exec[i].handle, i); 1272185353Sjfv /* prevent error path from reading uninitialized data */ 1273185353Sjfv ret = -ENOENT; 1274185353Sjfv goto err; 1275185353Sjfv } 1276185353Sjfv 1277185353Sjfv if (!list_empty(&obj->exec_list)) { 1278185353Sjfv DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", 1279185353Sjfv obj, exec[i].handle, i); 1280185353Sjfv ret = -EINVAL; 1281185353Sjfv goto err; 1282185353Sjfv } 1283169240Sjfv 1284185353Sjfv list_add_tail(&obj->exec_list, &objects); 1285185353Sjfv obj->exec_handle = exec[i].handle; 1286185353Sjfv obj->exec_entry = &exec[i]; 1287185353Sjfv eb_add_object(eb, obj); 1288185353Sjfv } 1289185353Sjfv 1290169240Sjfv /* take note of the batch buffer before we might reorder the lists */ 1291185353Sjfv batch_obj = list_entry(objects.prev, 1292185353Sjfv struct drm_i915_gem_object, 1293185353Sjfv exec_list); 1294169240Sjfv 1295 /* Move the objects en-masse into the GTT, evicting if necessary. */ 1296 ret = i915_gem_execbuffer_reserve(ring, file, &objects); 1297 if (ret) 1298 goto err; 1299 1300 /* The objects are in their final locations, apply the relocations. */ 1301 ret = i915_gem_execbuffer_relocate(dev, eb, &objects); 1302 if (ret) { 1303 if (ret == -EFAULT) { 1304 ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, 1305 &objects, eb, exec, args->buffer_count); 1306 DRM_LOCK_ASSERT(dev); 1307 } 1308 if (ret) 1309 goto err; 1310 } 1311 1312 /* Set the pending read domains for the batch buffer to COMMAND */ 1313 if (batch_obj->base.pending_write_domain) { 1314 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1315 ret = -EINVAL; 1316 goto err; 1317 } 1318 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1319 1320 ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); 1321 if (ret) 1322 goto err; 1323 1324 ret = i915_switch_context(ring, file, ctx_id); 1325 if (ret) 1326 goto err; 1327 1328 seqno = i915_gem_next_request_seqno(ring); 1329 for (i = 0; i < I915_NUM_RINGS - 1; i++) { 1330 if (seqno < ring->sync_seqno[i]) { 1331 /* The GPU can not handle its semaphore value wrapping, 1332 * so every billion or so execbuffers, we need to stall 1333 * the GPU in order to reset the counters. 1334 */ 1335 ret = i915_gpu_idle(dev); 1336 if (ret) 1337 goto err; 1338 i915_gem_retire_requests(dev); 1339 1340 KASSERT(ring->sync_seqno[i] == 0, ("Non-zero sync_seqno")); 1341 } 1342 } 1343 1344 if (ring == &dev_priv->rings[RCS] && 1345 mode != dev_priv->relative_constants_mode) { 1346 ret = intel_ring_begin(ring, 4); 1347 if (ret) 1348 goto err; 1349 1350 intel_ring_emit(ring, MI_NOOP); 1351 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1352 intel_ring_emit(ring, INSTPM); 1353 intel_ring_emit(ring, mask << 16 | mode); 1354 intel_ring_advance(ring); 1355 1356 dev_priv->relative_constants_mode = mode; 1357 } 1358 1359 if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1360 ret = i915_reset_gen7_sol_offsets(dev, ring); 1361 if (ret) 1362 goto err; 1363 } 1364 1365 exec_start = batch_obj->gtt_offset + args->batch_start_offset; 1366 exec_len = args->batch_len; 1367 1368 if (i915_fix_mi_batchbuffer_end) { 1369 i915_gem_fix_mi_batchbuffer_end(batch_obj, 1370 args->batch_start_offset, args->batch_len); 1371 } 1372 1373 CTR4(KTR_DRM, "ring_dispatch %s %d exec %x %x", ring->name, seqno, 1374 exec_start, exec_len); 1375 1376 if (cliprects) { 1377 for (i = 0; i < args->num_cliprects; i++) { 1378 ret = i915_emit_box_p(dev, &cliprects[i], 1379 args->DR1, args->DR4); 1380 if (ret) 1381 goto err; 1382 1383 ret = ring->dispatch_execbuffer(ring, exec_start, 1384 exec_len); 1385 if (ret) 1386 goto err; 1387 } 1388 } else { 1389 ret = ring->dispatch_execbuffer(ring, exec_start, exec_len); 1390 if (ret) 1391 goto err; 1392 } 1393 1394 i915_gem_execbuffer_move_to_active(&objects, ring, seqno); 1395 i915_gem_execbuffer_retire_commands(dev, file, ring); 1396 1397err: 1398 eb_destroy(eb); 1399 while (!list_empty(&objects)) { 1400 struct drm_i915_gem_object *obj; 1401 1402 obj = list_first_entry(&objects, struct drm_i915_gem_object, 1403 exec_list); 1404 list_del_init(&obj->exec_list); 1405 drm_gem_object_unreference(&obj->base); 1406 } 1407 DRM_UNLOCK(dev); 1408 1409pre_struct_lock_err: 1410 for (i = 0; i < args->buffer_count; i++) { 1411 if (relocs_ma[i] != NULL) { 1412 vm_page_unhold_pages(relocs_ma[i], relocs_len[i]); 1413 free(relocs_ma[i], DRM_I915_GEM); 1414 } 1415 } 1416 free(relocs_len, DRM_I915_GEM); 1417 free(relocs_ma, DRM_I915_GEM); 1418 free(cliprects, DRM_I915_GEM); 1419 return ret; 1420} 1421 1422/* 1423 * Legacy execbuffer just creates an exec2 list from the original exec object 1424 * list array and passes it to the real function. 1425 */ 1426int 1427i915_gem_execbuffer(struct drm_device *dev, void *data, 1428 struct drm_file *file) 1429{ 1430 struct drm_i915_gem_execbuffer *args = data; 1431 struct drm_i915_gem_execbuffer2 exec2; 1432 struct drm_i915_gem_exec_object *exec_list = NULL; 1433 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1434 int ret, i; 1435 1436 DRM_DEBUG("buffers_ptr %d buffer_count %d len %08x\n", 1437 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 1438 1439 if (args->buffer_count < 1) { 1440 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1441 return -EINVAL; 1442 } 1443 1444 /* Copy in the exec list from userland */ 1445 /* XXXKIB user-controlled malloc size */ 1446 exec_list = malloc(sizeof(*exec_list) * args->buffer_count, 1447 DRM_I915_GEM, M_WAITOK); 1448 exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count, 1449 DRM_I915_GEM, M_WAITOK); 1450 ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec_list, 1451 sizeof(*exec_list) * args->buffer_count); 1452 if (ret != 0) { 1453 DRM_DEBUG("copy %d exec entries failed %d\n", 1454 args->buffer_count, ret); 1455 free(exec_list, DRM_I915_GEM); 1456 free(exec2_list, DRM_I915_GEM); 1457 return (ret); 1458 } 1459 1460 for (i = 0; i < args->buffer_count; i++) { 1461 exec2_list[i].handle = exec_list[i].handle; 1462 exec2_list[i].relocation_count = exec_list[i].relocation_count; 1463 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 1464 exec2_list[i].alignment = exec_list[i].alignment; 1465 exec2_list[i].offset = exec_list[i].offset; 1466 if (INTEL_INFO(dev)->gen < 4) 1467 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 1468 else 1469 exec2_list[i].flags = 0; 1470 } 1471 1472 exec2.buffers_ptr = args->buffers_ptr; 1473 exec2.buffer_count = args->buffer_count; 1474 exec2.batch_start_offset = args->batch_start_offset; 1475 exec2.batch_len = args->batch_len; 1476 exec2.DR1 = args->DR1; 1477 exec2.DR4 = args->DR4; 1478 exec2.num_cliprects = args->num_cliprects; 1479 exec2.cliprects_ptr = args->cliprects_ptr; 1480 exec2.flags = I915_EXEC_RENDER; 1481 i915_execbuffer2_set_context_id(exec2, 0); 1482 1483 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); 1484 if (!ret) { 1485 /* Copy the new buffer offsets back to the user's exec list. */ 1486 for (i = 0; i < args->buffer_count; i++) 1487 exec_list[i].offset = exec2_list[i].offset; 1488 /* ... and back out to userspace */ 1489 ret = -copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr, 1490 sizeof(*exec_list) * args->buffer_count); 1491 if (ret != 0) { 1492 DRM_DEBUG("failed to copy %d exec entries " 1493 "back to user (%d)\n", 1494 args->buffer_count, ret); 1495 } 1496 } 1497 1498 free(exec_list, DRM_I915_GEM); 1499 free(exec2_list, DRM_I915_GEM); 1500 return ret; 1501} 1502 1503int 1504i915_gem_execbuffer2(struct drm_device *dev, void *data, 1505 struct drm_file *file) 1506{ 1507 struct drm_i915_gem_execbuffer2 *args = data; 1508 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1509 int ret; 1510 1511 DRM_DEBUG("buffers_ptr %jx buffer_count %d len %08x\n", 1512 (uintmax_t)args->buffers_ptr, args->buffer_count, args->batch_len); 1513 1514 if (args->buffer_count < 1 || 1515 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { 1516 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 1517 return -EINVAL; 1518 } 1519 1520 /* XXXKIB user-controllable malloc size */ 1521 exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count, 1522 DRM_I915_GEM, M_WAITOK); 1523 ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec2_list, 1524 sizeof(*exec2_list) * args->buffer_count); 1525 if (ret != 0) { 1526 DRM_DEBUG("copy %d exec entries failed %d\n", 1527 args->buffer_count, ret); 1528 free(exec2_list, DRM_I915_GEM); 1529 return (ret); 1530 } 1531 1532 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1533 if (!ret) { 1534 /* Copy the new buffer offsets back to the user's exec list. */ 1535 ret = -copyout(exec2_list, (void *)(uintptr_t)args->buffers_ptr, 1536 sizeof(*exec2_list) * args->buffer_count); 1537 if (ret) { 1538 DRM_DEBUG("failed to copy %d exec entries " 1539 "back to user (%d)\n", 1540 args->buffer_count, ret); 1541 } 1542 } 1543 1544 free(exec2_list, DRM_I915_GEM); 1545 return ret; 1546} 1547