1/* $NetBSD: intel_ggtt.c,v 1.16 2023/07/09 20:24:06 riastradh Exp $ */ 2 3// SPDX-License-Identifier: MIT 4/* 5 * Copyright �� 2020 Intel Corporation 6 */ 7 8#include <sys/cdefs.h> 9__KERNEL_RCSID(0, "$NetBSD: intel_ggtt.c,v 1.16 2023/07/09 20:24:06 riastradh Exp $"); 10 11#include <linux/stop_machine.h> 12 13#include <asm/set_memory.h> 14#include <asm/smp.h> 15 16#ifdef __NetBSD__ 17#include <drm/io-mapping.h> 18#endif 19 20#include "intel_gt.h" 21#include "i915_drv.h" 22#include "i915_scatterlist.h" 23#include "i915_vgpu.h" 24 25#include "intel_gtt.h" 26 27#include <linux/nbsd-namespace.h> 28 29static int 30i915_get_ggtt_vma_pages(struct i915_vma *vma); 31 32static void i915_ggtt_color_adjust(const struct drm_mm_node *node, 33 unsigned long color, 34 u64 *start, 35 u64 *end) 36{ 37 if (i915_node_color_differs(node, color)) 38 *start += I915_GTT_PAGE_SIZE; 39 40 /* 41 * Also leave a space between the unallocated reserved node after the 42 * GTT and any objects within the GTT, i.e. we use the color adjustment 43 * to insert a guard page to prevent prefetches crossing over the 44 * GTT boundary. 45 */ 46 node = list_next_entry(node, node_list); 47 if (node->color != color) 48 *end -= I915_GTT_PAGE_SIZE; 49} 50 51static int ggtt_init_hw(struct i915_ggtt *ggtt) 52{ 53 struct drm_i915_private *i915 = ggtt->vm.i915; 54 55 i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); 56 57 ggtt->vm.is_ggtt = true; 58 59 /* Only VLV supports read-only GGTT mappings */ 60 ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); 61 62 if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) 63 ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; 64 65 if (ggtt->mappable_end) { 66#ifdef __NetBSD__ 67 if (!drm_io_mapping_init_wc(&i915->drm, &ggtt->iomap, 68 ggtt->gmadr.start, ggtt->mappable_end)) { 69 ggtt->vm.cleanup(&ggtt->vm); 70 return -EIO; 71 } 72 /* 73 * Note: mappable_end is the size, not end paddr, of 74 * the aperture. 75 */ 76 pmap_pv_track(ggtt->gmadr.start, ggtt->mappable_end); 77#else 78 if (!io_mapping_init_wc(&ggtt->iomap, 79 ggtt->gmadr.start, 80 ggtt->mappable_end)) { 81 ggtt->vm.cleanup(&ggtt->vm); 82 return -EIO; 83 } 84#endif 85 86 ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, 87 ggtt->mappable_end); 88 } 89 90 i915_ggtt_init_fences(ggtt); 91 92 return 0; 93} 94 95/** 96 * i915_ggtt_init_hw - Initialize GGTT hardware 97 * @i915: i915 device 98 */ 99int i915_ggtt_init_hw(struct drm_i915_private *i915) 100{ 101 int ret; 102 103#ifndef __NetBSD__ 104 stash_init(&i915->mm.wc_stash); 105#endif 106 107 /* 108 * Note that we use page colouring to enforce a guard page at the 109 * end of the address space. This is required as the CS may prefetch 110 * beyond the end of the batch buffer, across the page boundary, 111 * and beyond the end of the GTT if we do not provide a guard. 112 */ 113 ret = ggtt_init_hw(&i915->ggtt); 114 if (ret) 115 return ret; 116 117 return 0; 118} 119 120/* 121 * Certain Gen5 chipsets require require idling the GPU before 122 * unmapping anything from the GTT when VT-d is enabled. 123 */ 124static bool needs_idle_maps(struct drm_i915_private *i915) 125{ 126 /* 127 * Query intel_iommu to see if we need the workaround. Presumably that 128 * was loaded first. 129 */ 130 return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active(); 131} 132 133static void ggtt_suspend_mappings(struct i915_ggtt *ggtt) 134{ 135 struct drm_i915_private *i915 = ggtt->vm.i915; 136 137 /* 138 * Don't bother messing with faults pre GEN6 as we have little 139 * documentation supporting that it's a good idea. 140 */ 141 if (INTEL_GEN(i915) < 6) 142 return; 143 144 intel_gt_check_and_clear_faults(ggtt->vm.gt); 145 146 ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); 147 148 ggtt->invalidate(ggtt); 149} 150 151void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915) 152{ 153 ggtt_suspend_mappings(&i915->ggtt); 154} 155 156void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) 157{ 158 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 159 160 spin_lock_irq(&uncore->lock); 161 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 162 intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); 163 spin_unlock_irq(&uncore->lock); 164} 165 166static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) 167{ 168 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 169 170 /* 171 * Note that as an uncached mmio write, this will flush the 172 * WCB of the writes into the GGTT before it triggers the invalidate. 173 */ 174 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 175} 176 177static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) 178{ 179 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 180 struct drm_i915_private *i915 = ggtt->vm.i915; 181 182 gen8_ggtt_invalidate(ggtt); 183 184 if (INTEL_GEN(i915) >= 12) 185 intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, 186 GEN12_GUC_TLB_INV_CR_INVALIDATE); 187 else 188 intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); 189} 190 191static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) 192{ 193 intel_gtt_chipset_flush(); 194} 195 196#ifdef __NetBSD__ 197static inline void 198gen8_set_pte(bus_space_tag_t bst, bus_space_handle_t bsh, unsigned i, 199 gen8_pte_t pte) 200{ 201 CTASSERT(_BYTE_ORDER == _LITTLE_ENDIAN); /* x86 */ 202 CTASSERT(sizeof(gen8_pte_t) == 8); 203#ifdef _LP64 /* XXX How to detect bus_space_write_8? */ 204 bus_space_write_8(bst, bsh, 8*i, pte); 205#else 206 bus_space_write_4(bst, bsh, 8*i, (uint32_t)pte); 207 bus_space_write_4(bst, bsh, 8*i + 4, (uint32_t)(pte >> 32)); 208#endif 209} 210#else 211static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 212{ 213 writeq(pte, addr); 214} 215#endif 216 217static void gen8_ggtt_insert_page(struct i915_address_space *vm, 218 dma_addr_t addr, 219 u64 offset, 220 enum i915_cache_level level, 221 u32 unused) 222{ 223 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 224#ifndef __NetBSD__ 225 gen8_pte_t __iomem *pte = 226 (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 227#endif 228 229#ifdef __NetBSD__ 230 gen8_set_pte(ggtt->gsmt, ggtt->gsmh, offset / I915_GTT_PAGE_SIZE, 231 gen8_pte_encode(addr, level, 0)); 232#else 233 gen8_set_pte(pte, gen8_pte_encode(addr, level, 0)); 234#endif 235 236 ggtt->invalidate(ggtt); 237} 238 239static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 240 struct i915_vma *vma, 241 enum i915_cache_level level, 242 u32 flags) 243{ 244 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 245#ifdef __NetBSD__ 246 bus_dmamap_t map = vma->pages->sgl[0].sg_dmamap; 247 unsigned seg; 248 unsigned pgno; 249#else 250 struct sgt_iter sgt_iter; 251 gen8_pte_t __iomem *gtt_entries; 252#endif 253 const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); 254 dma_addr_t addr; 255 256 /* 257 * Note that we ignore PTE_READ_ONLY here. The caller must be careful 258 * not to allow the user to override access to a read only page. 259 */ 260 261#ifdef __NetBSD__ 262 pgno = vma->node.start / I915_GTT_PAGE_SIZE; 263 for (seg = 0; seg < map->dm_nsegs; seg++) { 264 addr = map->dm_segs[seg].ds_addr; 265 bus_size_t len = map->dm_segs[seg].ds_len; 266 KASSERT((addr % I915_GTT_PAGE_SIZE) == 0); 267 KASSERT((len % I915_GTT_PAGE_SIZE) == 0); 268 for (; 269 len >= I915_GTT_PAGE_SIZE; 270 addr += I915_GTT_PAGE_SIZE, len -= I915_GTT_PAGE_SIZE) { 271 gen8_set_pte(ggtt->gsmt, ggtt->gsmh, pgno++, 272 pte_encode | addr); 273 } 274 KASSERT(len == 0); 275 } 276#else 277 gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; 278 gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; 279 for_each_sgt_daddr(addr, sgt_iter, vma->pages) 280 gen8_set_pte(gtt_entries++, pte_encode | addr); 281#endif 282 283 /* 284 * We want to flush the TLBs only after we're certain all the PTE 285 * updates have finished. 286 */ 287 ggtt->invalidate(ggtt); 288} 289 290static void gen6_ggtt_insert_page(struct i915_address_space *vm, 291 dma_addr_t addr, 292 u64 offset, 293 enum i915_cache_level level, 294 u32 flags) 295{ 296 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 297#ifndef __NetBSD__ 298 gen6_pte_t __iomem *pte = 299 (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 300#endif 301 302#ifdef __NetBSD__ 303 bus_space_write_4(ggtt->gsmt, ggtt->gsmh, offset / I915_GTT_PAGE_SIZE, 304 vm->pte_encode(addr, level, flags)); 305#else 306 iowrite32(vm->pte_encode(addr, level, flags), pte); 307#endif 308 309 ggtt->invalidate(ggtt); 310} 311 312/* 313 * Binds an object into the global gtt with the specified cache level. 314 * The object will be accessible to the GPU via commands whose operands 315 * reference offsets within the global GTT as well as accessible by the GPU 316 * through the GMADR mapped BAR (i915->mm.gtt->gtt). 317 */ 318 319static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 320 struct i915_vma *vma, 321 enum i915_cache_level level, 322 u32 flags) 323{ 324 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 325#ifdef __NetBSD__ 326 bus_dmamap_t map = vma->pages->sgl[0].sg_dmamap; 327 unsigned seg; 328 unsigned pgno; 329#else 330 gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; 331 unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE; 332 struct sgt_iter iter; 333#endif 334 dma_addr_t addr; 335 336#ifdef __NetBSD__ 337 pgno = vma->node.start >> PAGE_SHIFT; 338 for (seg = 0; seg < map->dm_nsegs; seg++) { 339 addr = map->dm_segs[seg].ds_addr; 340 bus_size_t len = map->dm_segs[seg].ds_len; 341 KASSERT((addr % I915_GTT_PAGE_SIZE) == 0); 342 KASSERT((len % I915_GTT_PAGE_SIZE) == 0); 343 for (; 344 len >= I915_GTT_PAGE_SIZE; 345 addr += I915_GTT_PAGE_SIZE, len -= I915_GTT_PAGE_SIZE) { 346 /* XXX KASSERT(pgno < ...)? */ 347 CTASSERT(sizeof(gen6_pte_t) == 4); 348 bus_space_write_4(ggtt->gsmt, ggtt->gsmh, 349 sizeof(gen6_pte_t) * pgno++, 350 vm->pte_encode(addr, level, flags)); 351 } 352 KASSERT(len == 0); 353 /* XXX KASSERT(pgno <= ...)? */ 354 } 355#else 356 for_each_sgt_daddr(addr, iter, vma->pages) 357 iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); 358#endif 359 360 /* 361 * We want to flush the TLBs only after we're certain all the PTE 362 * updates have finished. 363 */ 364 ggtt->invalidate(ggtt); 365} 366 367static void nop_clear_range(struct i915_address_space *vm, 368 u64 start, u64 length) 369{ 370} 371 372static void gen8_ggtt_clear_range(struct i915_address_space *vm, 373 u64 start, u64 length) 374{ 375 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 376 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 377 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 378 const gen8_pte_t scratch_pte = vm->scratch[0].encode; 379#ifndef __NetBSD__ 380 gen8_pte_t __iomem *gtt_base = 381 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 382#endif 383 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 384 int i; 385 386 if (WARN(num_entries > max_entries, 387 "First entry = %d; Num entries = %d (max=%d)\n", 388 first_entry, num_entries, max_entries)) 389 num_entries = max_entries; 390 391#ifdef __NetBSD__ 392 for (i = 0; i < num_entries; i++) 393 gen8_set_pte(ggtt->gsmt, ggtt->gsmh, first_entry + i, 394 scratch_pte); 395#else 396 for (i = 0; i < num_entries; i++) 397 gen8_set_pte(>t_base[i], scratch_pte); 398#endif 399} 400 401static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) 402{ 403 /* 404 * Make sure the internal GAM fifo has been cleared of all GTT 405 * writes before exiting stop_machine(). This guarantees that 406 * any aperture accesses waiting to start in another process 407 * cannot back up behind the GTT writes causing a hang. 408 * The register can be any arbitrary GAM register. 409 */ 410 intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); 411} 412 413struct insert_page { 414 struct i915_address_space *vm; 415 dma_addr_t addr; 416 u64 offset; 417 enum i915_cache_level level; 418}; 419 420static int bxt_vtd_ggtt_insert_page__cb(void *_arg) 421{ 422 struct insert_page *arg = _arg; 423 424 gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); 425 bxt_vtd_ggtt_wa(arg->vm); 426 427 return 0; 428} 429 430static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, 431 dma_addr_t addr, 432 u64 offset, 433 enum i915_cache_level level, 434 u32 unused) 435{ 436 struct insert_page arg = { vm, addr, offset, level }; 437 438 stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); 439} 440 441struct insert_entries { 442 struct i915_address_space *vm; 443 struct i915_vma *vma; 444 enum i915_cache_level level; 445 u32 flags; 446}; 447 448static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) 449{ 450 struct insert_entries *arg = _arg; 451 452 gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); 453 bxt_vtd_ggtt_wa(arg->vm); 454 455 return 0; 456} 457 458static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, 459 struct i915_vma *vma, 460 enum i915_cache_level level, 461 u32 flags) 462{ 463 struct insert_entries arg = { vm, vma, level, flags }; 464 465 stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); 466} 467 468struct clear_range { 469 struct i915_address_space *vm; 470 u64 start; 471 u64 length; 472}; 473 474static int bxt_vtd_ggtt_clear_range__cb(void *_arg) 475{ 476 struct clear_range *arg = _arg; 477 478 gen8_ggtt_clear_range(arg->vm, arg->start, arg->length); 479 bxt_vtd_ggtt_wa(arg->vm); 480 481 return 0; 482} 483 484static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm, 485 u64 start, 486 u64 length) 487{ 488 struct clear_range arg = { vm, start, length }; 489 490 stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL); 491} 492 493static void gen6_ggtt_clear_range(struct i915_address_space *vm, 494 u64 start, u64 length) 495{ 496 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 497 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 498 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 499#ifdef __NetBSD__ 500 gen6_pte_t scratch_pte; 501#else 502 gen6_pte_t scratch_pte, __iomem *gtt_base = 503 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 504#endif 505 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 506 int i; 507 508 if (WARN(num_entries > max_entries, 509 "First entry = %d; Num entries = %d (max=%d)\n", 510 first_entry, num_entries, max_entries)) 511 num_entries = max_entries; 512 513 scratch_pte = vm->scratch[0].encode; 514#ifdef __NetBSD__ 515 CTASSERT(sizeof(gen6_pte_t) == 4); 516 for (i = 0; i < num_entries; i++) 517 bus_space_write_4(ggtt->gsmt, ggtt->gsmh, 518 sizeof(gen6_pte_t) * (first_entry + i), 519 scratch_pte); 520#else 521 for (i = 0; i < num_entries; i++) 522 iowrite32(scratch_pte, >t_base[i]); 523#endif 524} 525 526static void i915_ggtt_insert_page(struct i915_address_space *vm, 527 dma_addr_t addr, 528 u64 offset, 529 enum i915_cache_level cache_level, 530 u32 unused) 531{ 532 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 533 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 534 535 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 536} 537 538static void i915_ggtt_insert_entries(struct i915_address_space *vm, 539 struct i915_vma *vma, 540 enum i915_cache_level cache_level, 541 u32 unused) 542{ 543 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 544 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 545 546 intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT, 547 flags); 548} 549 550static void i915_ggtt_clear_range(struct i915_address_space *vm, 551 u64 start, u64 length) 552{ 553 intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); 554} 555 556static int ggtt_bind_vma(struct i915_vma *vma, 557 enum i915_cache_level cache_level, 558 u32 flags) 559{ 560 struct drm_i915_gem_object *obj = vma->obj; 561 u32 pte_flags; 562 563 /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ 564 pte_flags = 0; 565 if (i915_gem_object_is_readonly(obj)) 566 pte_flags |= PTE_READ_ONLY; 567 568 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); 569 570 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 571 572 /* 573 * Without aliasing PPGTT there's no difference between 574 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 575 * upgrade to both bound if we bind either to avoid double-binding. 576 */ 577 atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); 578 579 return 0; 580} 581 582static void ggtt_unbind_vma(struct i915_vma *vma) 583{ 584 vma->vm->clear_range(vma->vm, vma->node.start, vma->size); 585} 586 587static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) 588{ 589 u64 size; 590 int ret; 591 592 if (!USES_GUC(ggtt->vm.i915)) 593 return 0; 594 595 GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); 596 size = ggtt->vm.total - GUC_GGTT_TOP; 597 598 ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, 599 GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, 600 PIN_NOEVICT); 601 if (ret) 602 DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n"); 603 604 return ret; 605} 606 607static void ggtt_release_guc_top(struct i915_ggtt *ggtt) 608{ 609 if (drm_mm_node_allocated(&ggtt->uc_fw)) 610 drm_mm_remove_node(&ggtt->uc_fw); 611} 612 613static void cleanup_init_ggtt(struct i915_ggtt *ggtt) 614{ 615 ggtt_release_guc_top(ggtt); 616 if (drm_mm_node_allocated(&ggtt->error_capture)) 617 drm_mm_remove_node(&ggtt->error_capture); 618 mutex_destroy(&ggtt->error_mutex); 619} 620 621static int init_ggtt(struct i915_ggtt *ggtt) 622{ 623 /* 624 * Let GEM Manage all of the aperture. 625 * 626 * However, leave one page at the end still bound to the scratch page. 627 * There are a number of places where the hardware apparently prefetches 628 * past the end of the object, and we've seen multiple hangs with the 629 * GPU head pointer stuck in a batchbuffer bound at the last page of the 630 * aperture. One page should be enough to keep any prefetching inside 631 * of the aperture. 632 */ 633 unsigned long hole_start, hole_end; 634 struct drm_mm_node *entry; 635 int ret; 636 637 /* 638 * GuC requires all resources that we're sharing with it to be placed in 639 * non-WOPCM memory. If GuC is not present or not in use we still need a 640 * small bias as ring wraparound at offset 0 sometimes hangs. No idea 641 * why. 642 */ 643 ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, 644 intel_wopcm_guc_size(&ggtt->vm.i915->wopcm)); 645 646 ret = intel_vgt_balloon(ggtt); 647 if (ret) 648 return ret; 649 650 mutex_init(&ggtt->error_mutex); 651 if (ggtt->mappable_end) { 652 /* Reserve a mappable slot for our lockless error capture */ 653 ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, 654 &ggtt->error_capture, 655 PAGE_SIZE, 0, 656 I915_COLOR_UNEVICTABLE, 657 0, ggtt->mappable_end, 658 DRM_MM_INSERT_LOW); 659 if (ret) 660 return ret; 661 } 662 663 /* 664 * The upper portion of the GuC address space has a sizeable hole 665 * (several MB) that is inaccessible by GuC. Reserve this range within 666 * GGTT as it can comfortably hold GuC/HuC firmware images. 667 */ 668 ret = ggtt_reserve_guc_top(ggtt); 669 if (ret) 670 goto err; 671 672 /* Clear any non-preallocated blocks */ 673 drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { 674 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 675 hole_start, hole_end); 676 ggtt->vm.clear_range(&ggtt->vm, hole_start, 677 hole_end - hole_start); 678 } 679 680 /* And finally clear the reserved guard page */ 681 ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); 682 683 return 0; 684 685err: 686 cleanup_init_ggtt(ggtt); 687 return ret; 688} 689 690static int aliasing_gtt_bind_vma(struct i915_vma *vma, 691 enum i915_cache_level cache_level, 692 u32 flags) 693{ 694 u32 pte_flags; 695 int ret; 696 697 /* Currently applicable only to VLV */ 698 pte_flags = 0; 699 if (i915_gem_object_is_readonly(vma->obj)) 700 pte_flags |= PTE_READ_ONLY; 701 702 if (flags & I915_VMA_LOCAL_BIND) { 703 struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias; 704 705 if (flags & I915_VMA_ALLOC) { 706 ret = alias->vm.allocate_va_range(&alias->vm, 707 vma->node.start, 708 vma->size); 709 if (ret) 710 return ret; 711 712 set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); 713 } 714 715 GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, 716 __i915_vma_flags(vma))); 717 alias->vm.insert_entries(&alias->vm, vma, 718 cache_level, pte_flags); 719 } 720 721 if (flags & I915_VMA_GLOBAL_BIND) 722 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); 723 724 return 0; 725} 726 727static void aliasing_gtt_unbind_vma(struct i915_vma *vma) 728{ 729 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { 730 struct i915_address_space *vm = vma->vm; 731 732 vm->clear_range(vm, vma->node.start, vma->size); 733 } 734 735 if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { 736 struct i915_address_space *vm = 737 &i915_vm_to_ggtt(vma->vm)->alias->vm; 738 739 vm->clear_range(vm, vma->node.start, vma->size); 740 } 741} 742 743static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) 744{ 745 struct i915_ppgtt *ppgtt; 746 int err; 747 748 ppgtt = i915_ppgtt_create(ggtt->vm.gt); 749 if (IS_ERR(ppgtt)) 750 return PTR_ERR(ppgtt); 751 752 if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { 753 err = -ENODEV; 754 goto err_ppgtt; 755 } 756 757 /* 758 * Note we only pre-allocate as far as the end of the global 759 * GTT. On 48b / 4-level page-tables, the difference is very, 760 * very significant! We have to preallocate as GVT/vgpu does 761 * not like the page directory disappearing. 762 */ 763 err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total); 764 if (err) 765 goto err_ppgtt; 766 767 ggtt->alias = ppgtt; 768 ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; 769 770 GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); 771 ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; 772 773 GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); 774 ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; 775 776 return 0; 777 778err_ppgtt: 779 i915_vm_put(&ppgtt->vm); 780 return err; 781} 782 783static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) 784{ 785 struct i915_ppgtt *ppgtt; 786 787 ppgtt = fetch_and_zero(&ggtt->alias); 788 if (!ppgtt) 789 return; 790 791 i915_vm_put(&ppgtt->vm); 792 793 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 794 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 795} 796 797int i915_init_ggtt(struct drm_i915_private *i915) 798{ 799 int ret; 800 801 ret = init_ggtt(&i915->ggtt); 802 if (ret) 803 return ret; 804 805 if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { 806 ret = init_aliasing_ppgtt(&i915->ggtt); 807 if (ret) 808 cleanup_init_ggtt(&i915->ggtt); 809 } 810 811 return 0; 812} 813 814static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) 815{ 816 struct i915_vma *vma, *vn; 817 818 atomic_set(&ggtt->vm.open, 0); 819 820 rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ 821 flush_workqueue(ggtt->vm.i915->wq); 822 823 mutex_lock(&ggtt->vm.mutex); 824 825 list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) 826 WARN_ON(__i915_vma_unbind(vma)); 827 828 if (drm_mm_node_allocated(&ggtt->error_capture)) 829 drm_mm_remove_node(&ggtt->error_capture); 830 mutex_destroy(&ggtt->error_mutex); 831 832 ggtt_release_guc_top(ggtt); 833 intel_vgt_deballoon(ggtt); 834 835 ggtt->vm.cleanup(&ggtt->vm); 836 837 mutex_unlock(&ggtt->vm.mutex); 838 i915_address_space_fini(&ggtt->vm); 839 840#ifdef __NetBSD__ 841 if (ggtt->mappable_end) 842 pmap_pv_untrack(ggtt->gmadr.start, ggtt->mappable_end); 843#endif 844 845 arch_phys_wc_del(ggtt->mtrr); 846 847 if (ggtt->iomap.size) 848 io_mapping_fini(&ggtt->iomap); 849} 850 851/** 852 * i915_ggtt_driver_release - Clean up GGTT hardware initialization 853 * @i915: i915 device 854 */ 855void i915_ggtt_driver_release(struct drm_i915_private *i915) 856{ 857#ifndef __NetBSD__ 858 struct pagevec *pvec; 859#endif 860 861 fini_aliasing_ppgtt(&i915->ggtt); 862 863 ggtt_cleanup_hw(&i915->ggtt); 864 865#ifndef __NetBSD__ 866 pvec = &i915->mm.wc_stash.pvec; 867 if (pvec->nr) { 868 set_pages_array_wb(pvec->pages, pvec->nr); 869 __pagevec_release(pvec); 870 } 871#endif 872} 873 874static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 875{ 876 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 877 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 878 return snb_gmch_ctl << 20; 879} 880 881static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 882{ 883 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 884 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 885 if (bdw_gmch_ctl) 886 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 887 888#ifdef CONFIG_X86_32 889 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ 890 if (bdw_gmch_ctl > 4) 891 bdw_gmch_ctl = 4; 892#endif 893 894 return bdw_gmch_ctl << 20; 895} 896 897static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 898{ 899 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 900 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 901 902 if (gmch_ctrl) 903 return 1 << (20 + gmch_ctrl); 904 905 return 0; 906} 907 908static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 909{ 910 struct drm_i915_private *i915 = ggtt->vm.i915; 911 struct pci_dev *pdev = i915->drm.pdev; 912 phys_addr_t phys_addr; 913 int ret; 914 915 /* For Modern GENs the PTEs and register space are split in the BAR */ 916 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; 917 918 /* 919 * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range 920 * will be dropped. For WC mappings in general we have 64 byte burst 921 * writes when the WC buffer is flushed, so we can't use it, but have to 922 * resort to an uncached mapping. The WC issue is easily caught by the 923 * readback check when writing GTT PTE entries. 924 */ 925#ifdef __NetBSD__ 926 { 927 int flags; 928 if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) 929 flags = 0; 930 else 931 flags = BUS_SPACE_MAP_PREFETCHABLE; 932 ggtt->gsmt = i915->drm.pdev->pd_pa.pa_memt; 933 /* XXX errno NetBSD->Linux */ 934 ret = -bus_space_map(ggtt->gsmt, phys_addr, size, flags, &ggtt->gsmh); 935 if (ret) { 936 DRM_ERROR("Failed to map the ggtt page table: %d\n", ret); 937 return ret; 938 } 939 ggtt->gsmsz = size; 940 } 941#else 942 if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) 943 ggtt->gsm = ioremap(phys_addr, size); 944 else 945 ggtt->gsm = ioremap_wc(phys_addr, size); 946 if (!ggtt->gsm) { 947 DRM_ERROR("Failed to map the ggtt page table\n"); 948 return -ENOMEM; 949 } 950#endif 951 952 ret = setup_scratch_page(&ggtt->vm, GFP_DMA32); 953 if (ret) { 954 DRM_ERROR("Scratch setup failed\n"); 955 /* iounmap will also get called at remove, but meh */ 956#ifdef __NetBSD__ 957 KASSERT(ggtt->gsmsz == size); 958 bus_space_unmap(ggtt->gsmt, ggtt->gsmh, ggtt->gsmsz); 959 ggtt->gsmsz = 0; 960#else 961 iounmap(ggtt->gsm); 962#endif 963 return ret; 964 } 965 966 ggtt->vm.scratch[0].encode = 967 ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]), 968 I915_CACHE_NONE, 0); 969 970 return 0; 971} 972 973int ggtt_set_pages(struct i915_vma *vma) 974{ 975 int ret; 976 977 GEM_BUG_ON(vma->pages); 978 979 ret = i915_get_ggtt_vma_pages(vma); 980 if (ret) 981 return ret; 982 983 vma->page_sizes = vma->obj->mm.page_sizes; 984 985 return 0; 986} 987 988static void gen6_gmch_remove(struct i915_address_space *vm) 989{ 990 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 991 992#ifdef __NetBSD__ 993 if (ggtt->gsmsz) { 994 bus_space_unmap(ggtt->gsmt, ggtt->gsmh, ggtt->gsmsz); 995 ggtt->gsmsz = 0; 996 } 997#else 998 iounmap(ggtt->gsm); 999#endif 1000 cleanup_scratch_page(vm); 1001} 1002 1003static struct resource pci_resource(struct pci_dev *pdev, int bar) 1004{ 1005 return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar), 1006 pci_resource_len(pdev, bar)); 1007} 1008 1009static int gen8_gmch_probe(struct i915_ggtt *ggtt) 1010{ 1011 struct drm_i915_private *i915 = ggtt->vm.i915; 1012 struct pci_dev *pdev = i915->drm.pdev; 1013 unsigned int size; 1014 u16 snb_gmch_ctl; 1015 int err; 1016 1017 /* TODO: We're not aware of mappable constraints on gen8 yet */ 1018 if (!IS_DGFX(i915)) { 1019 ggtt->gmadr = pci_resource(pdev, 2); 1020 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1021 } 1022 1023#ifdef __NetBSD__ 1024 __USE(err); 1025 ggtt->max_paddr = DMA_BIT_MASK(39); 1026#else 1027 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); 1028 if (!err) 1029 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); 1030 if (err) 1031 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); 1032#endif 1033 1034 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1035 if (IS_CHERRYVIEW(i915)) 1036 size = chv_get_total_gtt_size(snb_gmch_ctl); 1037 else 1038 size = gen8_get_total_gtt_size(snb_gmch_ctl); 1039 1040 ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; 1041 ggtt->vm.cleanup = gen6_gmch_remove; 1042 ggtt->vm.insert_page = gen8_ggtt_insert_page; 1043 ggtt->vm.clear_range = nop_clear_range; 1044 if (intel_scanout_needs_vtd_wa(i915)) 1045 ggtt->vm.clear_range = gen8_ggtt_clear_range; 1046 1047 ggtt->vm.insert_entries = gen8_ggtt_insert_entries; 1048 1049 /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ 1050 if (intel_ggtt_update_needs_vtd_wa(i915) || 1051 IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { 1052 ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; 1053 ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; 1054 if (ggtt->vm.clear_range != nop_clear_range) 1055 ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; 1056 } 1057 1058 ggtt->invalidate = gen8_ggtt_invalidate; 1059 1060 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 1061 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 1062 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 1063 ggtt->vm.vma_ops.clear_pages = clear_pages; 1064 1065 ggtt->vm.pte_encode = gen8_pte_encode; 1066 1067 setup_private_pat(ggtt->vm.gt->uncore); 1068 1069 return ggtt_probe_common(ggtt, size); 1070} 1071 1072static u64 snb_pte_encode(dma_addr_t addr, 1073 enum i915_cache_level level, 1074 u32 flags) 1075{ 1076 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1077 1078 switch (level) { 1079 case I915_CACHE_L3_LLC: 1080 case I915_CACHE_LLC: 1081 pte |= GEN6_PTE_CACHE_LLC; 1082 break; 1083 case I915_CACHE_NONE: 1084 pte |= GEN6_PTE_UNCACHED; 1085 break; 1086 default: 1087 MISSING_CASE(level); 1088 } 1089 1090 return pte; 1091} 1092 1093static u64 ivb_pte_encode(dma_addr_t addr, 1094 enum i915_cache_level level, 1095 u32 flags) 1096{ 1097 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1098 1099 switch (level) { 1100 case I915_CACHE_L3_LLC: 1101 pte |= GEN7_PTE_CACHE_L3_LLC; 1102 break; 1103 case I915_CACHE_LLC: 1104 pte |= GEN6_PTE_CACHE_LLC; 1105 break; 1106 case I915_CACHE_NONE: 1107 pte |= GEN6_PTE_UNCACHED; 1108 break; 1109 default: 1110 MISSING_CASE(level); 1111 } 1112 1113 return pte; 1114} 1115 1116static u64 byt_pte_encode(dma_addr_t addr, 1117 enum i915_cache_level level, 1118 u32 flags) 1119{ 1120 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1121 1122 if (!(flags & PTE_READ_ONLY)) 1123 pte |= BYT_PTE_WRITEABLE; 1124 1125 if (level != I915_CACHE_NONE) 1126 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 1127 1128 return pte; 1129} 1130 1131static u64 hsw_pte_encode(dma_addr_t addr, 1132 enum i915_cache_level level, 1133 u32 flags) 1134{ 1135 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1136 1137 if (level != I915_CACHE_NONE) 1138 pte |= HSW_WB_LLC_AGE3; 1139 1140 return pte; 1141} 1142 1143static u64 iris_pte_encode(dma_addr_t addr, 1144 enum i915_cache_level level, 1145 u32 flags) 1146{ 1147 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1148 1149 switch (level) { 1150 case I915_CACHE_NONE: 1151 break; 1152 case I915_CACHE_WT: 1153 pte |= HSW_WT_ELLC_LLC_AGE3; 1154 break; 1155 default: 1156 pte |= HSW_WB_ELLC_LLC_AGE3; 1157 break; 1158 } 1159 1160 return pte; 1161} 1162 1163static int gen6_gmch_probe(struct i915_ggtt *ggtt) 1164{ 1165 struct drm_i915_private *i915 = ggtt->vm.i915; 1166 struct pci_dev *pdev = i915->drm.pdev; 1167 unsigned int size; 1168 u16 snb_gmch_ctl; 1169 int err; 1170 1171 ggtt->gmadr = pci_resource(pdev, 2); 1172 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1173 1174 /* 1175 * 64/512MB is the current min/max we actually know of, but this is 1176 * just a coarse sanity check. 1177 */ 1178 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 1179 DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end); 1180 return -ENXIO; 1181 } 1182 1183#ifdef __NetBSD__ 1184 __USE(err); 1185 ggtt->max_paddr = DMA_BIT_MASK(40); 1186#else 1187 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); 1188 if (!err) 1189 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); 1190 if (err) 1191 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); 1192#endif 1193 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1194 1195 size = gen6_get_total_gtt_size(snb_gmch_ctl); 1196 ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; 1197 1198 ggtt->vm.clear_range = nop_clear_range; 1199 if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) 1200 ggtt->vm.clear_range = gen6_ggtt_clear_range; 1201 ggtt->vm.insert_page = gen6_ggtt_insert_page; 1202 ggtt->vm.insert_entries = gen6_ggtt_insert_entries; 1203 ggtt->vm.cleanup = gen6_gmch_remove; 1204 1205 ggtt->invalidate = gen6_ggtt_invalidate; 1206 1207 if (HAS_EDRAM(i915)) 1208 ggtt->vm.pte_encode = iris_pte_encode; 1209 else if (IS_HASWELL(i915)) 1210 ggtt->vm.pte_encode = hsw_pte_encode; 1211 else if (IS_VALLEYVIEW(i915)) 1212 ggtt->vm.pte_encode = byt_pte_encode; 1213 else if (INTEL_GEN(i915) >= 7) 1214 ggtt->vm.pte_encode = ivb_pte_encode; 1215 else 1216 ggtt->vm.pte_encode = snb_pte_encode; 1217 1218 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 1219 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 1220 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 1221 ggtt->vm.vma_ops.clear_pages = clear_pages; 1222 1223 return ggtt_probe_common(ggtt, size); 1224} 1225 1226static void i915_gmch_remove(struct i915_address_space *vm) 1227{ 1228 intel_gmch_remove(); 1229} 1230 1231static int i915_gmch_probe(struct i915_ggtt *ggtt) 1232{ 1233 struct drm_i915_private *i915 = ggtt->vm.i915; 1234 phys_addr_t gmadr_base; 1235 int ret; 1236 1237 ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL); 1238 if (!ret) { 1239 DRM_ERROR("failed to set up gmch\n"); 1240 return -EIO; 1241 } 1242 1243 intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); 1244 1245 ggtt->gmadr = 1246 (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); 1247 1248#ifdef __NetBSD__ 1249 /* Based on i915_drv.c, i915_driver_hw_probe. */ 1250 if (IS_GEN(i915, 2)) 1251 ggtt->max_paddr = DMA_BIT_MASK(30); 1252 else if (IS_I965G(i915) || IS_I965GM(i915)) 1253 ggtt->max_paddr = DMA_BIT_MASK(32); 1254 else 1255 ggtt->max_paddr = DMA_BIT_MASK(40); 1256#endif 1257 1258 ggtt->do_idle_maps = needs_idle_maps(i915); 1259 ggtt->vm.insert_page = i915_ggtt_insert_page; 1260 ggtt->vm.insert_entries = i915_ggtt_insert_entries; 1261 ggtt->vm.clear_range = i915_ggtt_clear_range; 1262 ggtt->vm.cleanup = i915_gmch_remove; 1263 1264 ggtt->invalidate = gmch_ggtt_invalidate; 1265 1266 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 1267 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 1268 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 1269 ggtt->vm.vma_ops.clear_pages = clear_pages; 1270 1271 if (unlikely(ggtt->do_idle_maps)) 1272 dev_notice(i915->drm.dev, 1273 "Applying Ironlake quirks for intel_iommu\n"); 1274 1275 return 0; 1276} 1277 1278static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) 1279{ 1280 struct drm_i915_private *i915 = gt->i915; 1281 int ret; 1282 1283 ggtt->vm.gt = gt; 1284 ggtt->vm.i915 = i915; 1285#ifdef __NetBSD__ 1286 ggtt->vm.dmat = i915->drm.dmat; 1287#else 1288 ggtt->vm.dma = &i915->drm.pdev->dev; 1289#endif 1290 1291 if (INTEL_GEN(i915) <= 5) 1292 ret = i915_gmch_probe(ggtt); 1293 else if (INTEL_GEN(i915) < 8) 1294 ret = gen6_gmch_probe(ggtt); 1295 else 1296 ret = gen8_gmch_probe(ggtt); 1297 if (ret) 1298 return ret; 1299 1300#ifdef __NetBSD__ 1301 ggtt->pgfl = x86_select_freelist(ggtt->max_paddr); 1302 ret = drm_limit_dma_space(&i915->drm, 0, ggtt->max_paddr); 1303 if (ret) { 1304 DRM_ERROR("Unable to limit DMA paddr allocations: %d\n", ret); 1305 i915_ggtt_driver_release(i915); 1306 return ret; 1307 } 1308#endif 1309 1310 if ((ggtt->vm.total - 1) >> 32) { 1311 DRM_ERROR("We never expected a Global GTT with more than 32bits" 1312 " of address space! Found %"PRId64"M!\n", 1313 ggtt->vm.total >> 20); 1314 ggtt->vm.total = 1ULL << 32; 1315 ggtt->mappable_end = 1316 min_t(u64, ggtt->mappable_end, ggtt->vm.total); 1317 } 1318 1319 if (ggtt->mappable_end > ggtt->vm.total) { 1320 DRM_ERROR("mappable aperture extends past end of GGTT," 1321 " aperture=%pa, total=%"PRIx64"\n", 1322 &ggtt->mappable_end, ggtt->vm.total); 1323 ggtt->mappable_end = ggtt->vm.total; 1324 } 1325 1326 /* GMADR is the PCI mmio aperture into the global GTT. */ 1327 DRM_DEBUG_DRIVER("GGTT size = %"PRIu64"M\n", ggtt->vm.total >> 20); 1328 DRM_DEBUG_DRIVER("GMADR size = %"PRIu64"M\n", (u64)ggtt->mappable_end >> 20); 1329 DRM_DEBUG_DRIVER("DSM size = %"PRIu64"M\n", 1330 (u64)resource_size(&intel_graphics_stolen_res) >> 20); 1331 1332 return 0; 1333} 1334 1335/** 1336 * i915_ggtt_probe_hw - Probe GGTT hardware location 1337 * @i915: i915 device 1338 */ 1339int i915_ggtt_probe_hw(struct drm_i915_private *i915) 1340{ 1341 int ret; 1342 1343 ret = ggtt_probe_hw(&i915->ggtt, &i915->gt); 1344 if (ret) 1345 return ret; 1346 1347 if (intel_vtd_active()) 1348 dev_info(i915->drm.dev, "VT-d active for gfx access\n"); 1349 1350 return 0; 1351} 1352 1353int i915_ggtt_enable_hw(struct drm_i915_private *i915) 1354{ 1355 if (INTEL_GEN(i915) < 6 && !intel_enable_gtt()) 1356 return -EIO; 1357 1358 return 0; 1359} 1360 1361void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) 1362{ 1363 GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate); 1364 1365 ggtt->invalidate = guc_ggtt_invalidate; 1366 1367 ggtt->invalidate(ggtt); 1368} 1369 1370void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) 1371{ 1372 /* XXX Temporary pardon for error unload */ 1373 if (ggtt->invalidate == gen8_ggtt_invalidate) 1374 return; 1375 1376 /* We should only be called after i915_ggtt_enable_guc() */ 1377 GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate); 1378 1379 ggtt->invalidate = gen8_ggtt_invalidate; 1380 1381 ggtt->invalidate(ggtt); 1382} 1383 1384static void ggtt_restore_mappings(struct i915_ggtt *ggtt) 1385{ 1386 struct i915_vma *vma; 1387 bool flush = false; 1388 int open; 1389 1390 intel_gt_check_and_clear_faults(ggtt->vm.gt); 1391 1392 mutex_lock(&ggtt->vm.mutex); 1393 1394 /* First fill our portion of the GTT with scratch pages */ 1395 ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); 1396 1397 /* Skip rewriting PTE on VMA unbind. */ 1398 open = atomic_xchg(&ggtt->vm.open, 0); 1399 1400 /* clflush objects bound into the GGTT and rebind them. */ 1401 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { 1402 struct drm_i915_gem_object *obj = vma->obj; 1403 1404 if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 1405 continue; 1406 1407 clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); 1408 WARN_ON(i915_vma_bind(vma, 1409 obj ? obj->cache_level : 0, 1410 PIN_GLOBAL, NULL)); 1411 if (obj) { /* only used during resume => exclusive access */ 1412 flush |= fetch_and_zero(&obj->write_domain); 1413 obj->read_domains |= I915_GEM_DOMAIN_GTT; 1414 } 1415 } 1416 1417 atomic_set(&ggtt->vm.open, open); 1418 ggtt->invalidate(ggtt); 1419 1420 mutex_unlock(&ggtt->vm.mutex); 1421 1422 if (flush) 1423 wbinvd_on_all_cpus(); 1424} 1425 1426void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915) 1427{ 1428 struct i915_ggtt *ggtt = &i915->ggtt; 1429 1430 ggtt_restore_mappings(ggtt); 1431 1432 if (INTEL_GEN(i915) >= 8) 1433 setup_private_pat(ggtt->vm.gt->uncore); 1434} 1435 1436#ifndef __NetBSD__ 1437 1438static struct scatterlist * 1439rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, 1440 unsigned int width, unsigned int height, 1441 unsigned int stride, 1442 struct sg_table *st, struct scatterlist *sg) 1443{ 1444 unsigned int column, row; 1445 unsigned int src_idx; 1446 1447 for (column = 0; column < width; column++) { 1448 src_idx = stride * (height - 1) + column + offset; 1449 for (row = 0; row < height; row++) { 1450 st->nents++; 1451 /* 1452 * We don't need the pages, but need to initialize 1453 * the entries so the sg list can be happily traversed. 1454 * The only thing we need are DMA addresses. 1455 */ 1456 sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); 1457 sg_dma_address(sg) = 1458 i915_gem_object_get_dma_address(obj, src_idx); 1459 sg_dma_len(sg) = I915_GTT_PAGE_SIZE; 1460 sg = sg_next(sg); 1461 src_idx -= stride; 1462 } 1463 } 1464 1465 return sg; 1466} 1467 1468static noinline struct sg_table * 1469intel_rotate_pages(struct intel_rotation_info *rot_info, 1470 struct drm_i915_gem_object *obj) 1471{ 1472 unsigned int size = intel_rotation_info_size(rot_info); 1473 struct sg_table *st; 1474 struct scatterlist *sg; 1475 int ret = -ENOMEM; 1476 int i; 1477 1478 /* Allocate target SG list. */ 1479 st = kmalloc(sizeof(*st), GFP_KERNEL); 1480 if (!st) 1481 goto err_st_alloc; 1482 1483 ret = sg_alloc_table(st, size, GFP_KERNEL); 1484 if (ret) 1485 goto err_sg_alloc; 1486 1487 st->nents = 0; 1488 sg = st->sgl; 1489 1490 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { 1491 sg = rotate_pages(obj, rot_info->plane[i].offset, 1492 rot_info->plane[i].width, rot_info->plane[i].height, 1493 rot_info->plane[i].stride, st, sg); 1494 } 1495 1496 return st; 1497 1498err_sg_alloc: 1499 kfree(st); 1500err_st_alloc: 1501 1502 DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", 1503 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 1504 1505 return ERR_PTR(ret); 1506} 1507 1508static struct scatterlist * 1509remap_pages(struct drm_i915_gem_object *obj, unsigned int offset, 1510 unsigned int width, unsigned int height, 1511 unsigned int stride, 1512 struct sg_table *st, struct scatterlist *sg) 1513{ 1514 unsigned int row; 1515 1516 for (row = 0; row < height; row++) { 1517 unsigned int left = width * I915_GTT_PAGE_SIZE; 1518 1519 while (left) { 1520 dma_addr_t addr; 1521 unsigned int length; 1522 1523 /* 1524 * We don't need the pages, but need to initialize 1525 * the entries so the sg list can be happily traversed. 1526 * The only thing we need are DMA addresses. 1527 */ 1528 1529 addr = i915_gem_object_get_dma_address_len(obj, offset, &length); 1530 1531 length = min(left, length); 1532 1533 st->nents++; 1534 1535 sg_set_page(sg, NULL, length, 0); 1536 sg_dma_address(sg) = addr; 1537 sg_dma_len(sg) = length; 1538 sg = sg_next(sg); 1539 1540 offset += length / I915_GTT_PAGE_SIZE; 1541 left -= length; 1542 } 1543 1544 offset += stride - width; 1545 } 1546 1547 return sg; 1548} 1549 1550static noinline struct sg_table * 1551intel_remap_pages(struct intel_remapped_info *rem_info, 1552 struct drm_i915_gem_object *obj) 1553{ 1554 unsigned int size = intel_remapped_info_size(rem_info); 1555 struct sg_table *st; 1556 struct scatterlist *sg; 1557 int ret = -ENOMEM; 1558 int i; 1559 1560 /* Allocate target SG list. */ 1561 st = kmalloc(sizeof(*st), GFP_KERNEL); 1562 if (!st) 1563 goto err_st_alloc; 1564 1565 ret = sg_alloc_table(st, size, GFP_KERNEL); 1566 if (ret) 1567 goto err_sg_alloc; 1568 1569 st->nents = 0; 1570 sg = st->sgl; 1571 1572 for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { 1573 sg = remap_pages(obj, rem_info->plane[i].offset, 1574 rem_info->plane[i].width, rem_info->plane[i].height, 1575 rem_info->plane[i].stride, st, sg); 1576 } 1577 1578 i915_sg_trim(st); 1579 1580 return st; 1581 1582err_sg_alloc: 1583 kfree(st); 1584err_st_alloc: 1585 1586 DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", 1587 obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size); 1588 1589 return ERR_PTR(ret); 1590} 1591 1592#endif /* __NetBSD__ */ 1593 1594static noinline struct sg_table * 1595intel_partial_pages(const struct i915_ggtt_view *view, 1596 struct drm_i915_gem_object *obj) 1597{ 1598#ifdef __NetBSD__ 1599 struct sg_table *st = NULL; 1600 int ret = -ENOMEM; 1601 1602 KASSERTMSG(view->partial.offset <= obj->base.size >> PAGE_SHIFT, 1603 "obj=%p size=0x%zx; view offset=0x%zx size=0x%zx", 1604 obj, 1605 (size_t)obj->base.size >> PAGE_SHIFT, 1606 (size_t)view->partial.offset, 1607 (size_t)view->partial.size); 1608 KASSERTMSG((view->partial.size <= 1609 (obj->base.size >> PAGE_SHIFT) - view->partial.offset), 1610 "obj=%p size=0x%zx; view offset=0x%zx size=0x%zx", 1611 obj, 1612 (size_t)obj->base.size >> PAGE_SHIFT, 1613 (size_t)view->partial.offset, 1614 (size_t)view->partial.size); 1615 KASSERTMSG(view->partial.size <= INT_MAX, "view size=0x%zx", 1616 (size_t)view->partial.size); 1617 1618 st = kmalloc(sizeof(*st), GFP_KERNEL); 1619 if (st == NULL) 1620 goto fail; 1621 ret = sg_alloc_table(st, view->partial.size, GFP_KERNEL); 1622 if (ret) { 1623 kfree(st); 1624 st = NULL; 1625 goto fail; 1626 } 1627 1628 /* XXX errno NetBSD->Linux */ 1629 if (obj->mm.pages->sgl->sg_dmamap) { /* XXX KASSERT? */ 1630 ret = -bus_dmamap_create(obj->base.dev->dmat, 1631 (bus_size_t)view->partial.size << PAGE_SHIFT, 1632 view->partial.size, PAGE_SIZE, 0, BUS_DMA_NOWAIT, 1633 &st->sgl->sg_dmamap); 1634 if (ret) { 1635 st->sgl->sg_dmamap = NULL; 1636 goto fail; 1637 } 1638 st->sgl->sg_dmat = obj->base.dev->dmat; 1639 } 1640 1641 /* 1642 * Copy over the pages. The view's offset and size are in 1643 * units of pages already. 1644 */ 1645 KASSERT(st->sgl->sg_npgs == view->partial.size); 1646 memcpy(st->sgl->sg_pgs, 1647 obj->mm.pages->sgl->sg_pgs + view->partial.offset, 1648 sizeof(st->sgl->sg_pgs[0]) * view->partial.size); 1649 1650 /* 1651 * Copy over the DMA addresses. For simplicity, we don't do 1652 * anything to compress contiguous pages into larger segments. 1653 */ 1654 if (obj->mm.pages->sgl->sg_dmamap) { 1655 bus_size_t offset = (bus_size_t)view->partial.offset 1656 << PAGE_SHIFT; 1657 unsigned i, j, k; 1658 1659 st->sgl->sg_dmamap->dm_nsegs = view->partial.size; 1660 for (i = j = 0; i < view->partial.size; j++) { 1661 KASSERT(j < obj->mm.pages->sgl->sg_dmamap->dm_nsegs); 1662 const bus_dma_segment_t *iseg = 1663 &obj->mm.pages->sgl->sg_dmamap->dm_segs[j]; 1664 1665 KASSERT(iseg->ds_len % PAGE_SIZE == 0); 1666 1667 /* Skip segments prior to the start offset. */ 1668 if (offset >= iseg->ds_len) { 1669 offset -= iseg->ds_len; 1670 continue; 1671 } 1672 for (k = 0; 1673 (i < view->partial.size && 1674 k < iseg->ds_len >> PAGE_SHIFT); 1675 k++) { 1676 KASSERT(i < view->partial.size); 1677 bus_dma_segment_t *oseg = 1678 &st->sgl->sg_dmamap->dm_segs[i++]; 1679 oseg->ds_addr = iseg->ds_addr + offset + 1680 k*PAGE_SIZE; 1681 oseg->ds_len = PAGE_SIZE; 1682 } 1683 1684 /* 1685 * After the first segment which we possibly 1686 * use only a suffix of, the remainder we will 1687 * take from the beginning. 1688 */ 1689 offset = 0; 1690 } 1691 } 1692 1693 /* Success! */ 1694 return st; 1695 1696fail: if (st) { 1697 sg_free_table(st); 1698 kfree(st); 1699 } 1700 return ERR_PTR(ret); 1701#else 1702 struct sg_table *st; 1703 struct scatterlist *sg, *iter; 1704 unsigned int count = view->partial.size; 1705 unsigned int offset; 1706 int ret; 1707 1708 st = kmalloc(sizeof(*st), GFP_KERNEL); 1709 if (!st) { 1710 ret = -ENOMEM; 1711 goto err_st_alloc; 1712 } 1713 1714 ret = sg_alloc_table(st, count, GFP_KERNEL); 1715 if (ret) 1716 goto err_sg_alloc; 1717 1718 iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); 1719 GEM_BUG_ON(!iter); 1720 1721 sg = st->sgl; 1722 st->nents = 0; 1723 do { 1724 unsigned int len; 1725 1726 len = min(iter->length - (offset << PAGE_SHIFT), 1727 count << PAGE_SHIFT); 1728 sg_set_page(sg, NULL, len, 0); 1729 sg_dma_address(sg) = 1730 sg_dma_address(iter) + (offset << PAGE_SHIFT); 1731 sg_dma_len(sg) = len; 1732 1733 st->nents++; 1734 count -= len >> PAGE_SHIFT; 1735 if (count == 0) { 1736 sg_mark_end(sg); 1737 i915_sg_trim(st); /* Drop any unused tail entries. */ 1738 1739 return st; 1740 } 1741 1742 sg = __sg_next(sg); 1743 iter = __sg_next(iter); 1744 offset = 0; 1745 } while (1); 1746 1747err_sg_alloc: 1748 kfree(st); 1749err_st_alloc: 1750 return ERR_PTR(ret); 1751#endif /* __NetBSD__ */ 1752} 1753 1754static int 1755i915_get_ggtt_vma_pages(struct i915_vma *vma) 1756{ 1757 int ret; 1758 1759 /* 1760 * The vma->pages are only valid within the lifespan of the borrowed 1761 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so 1762 * must be the vma->pages. A simple rule is that vma->pages must only 1763 * be accessed when the obj->mm.pages are pinned. 1764 */ 1765 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); 1766 1767 switch (vma->ggtt_view.type) { 1768 default: 1769 GEM_BUG_ON(vma->ggtt_view.type); 1770 /* fall through */ 1771 case I915_GGTT_VIEW_NORMAL: 1772 vma->pages = vma->obj->mm.pages; 1773 return 0; 1774 1775 case I915_GGTT_VIEW_ROTATED: 1776#ifdef __NetBSD__ 1777 vma->pages = ERR_PTR(-ENODEV); 1778#else 1779 vma->pages = 1780 intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); 1781#endif 1782 break; 1783 1784 case I915_GGTT_VIEW_REMAPPED: 1785#ifdef __NetBSD__ 1786 vma->pages = ERR_PTR(-ENODEV); 1787#else 1788 vma->pages = 1789 intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); 1790#endif 1791 break; 1792 1793 case I915_GGTT_VIEW_PARTIAL: 1794 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); 1795 break; 1796 } 1797 1798 ret = 0; 1799 if (IS_ERR(vma->pages)) { 1800 ret = PTR_ERR(vma->pages); 1801 vma->pages = NULL; 1802 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 1803 vma->ggtt_view.type, ret); 1804 } 1805 return ret; 1806} 1807