1/* pci_sun4v.c: SUN4V specific PCI controller support. 2 * 3 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) 4 */ 5 6#include <linux/kernel.h> 7#include <linux/types.h> 8#include <linux/pci.h> 9#include <linux/init.h> 10#include <linux/slab.h> 11#include <linux/interrupt.h> 12#include <linux/percpu.h> 13#include <linux/irq.h> 14#include <linux/msi.h> 15#include <linux/log2.h> 16 17#include <asm/iommu.h> 18#include <asm/irq.h> 19#include <asm/upa.h> 20#include <asm/pstate.h> 21#include <asm/oplib.h> 22#include <asm/hypervisor.h> 23#include <asm/prom.h> 24 25#include "pci_impl.h" 26#include "iommu_common.h" 27 28#include "pci_sun4v.h" 29 30static unsigned long vpci_major = 1; 31static unsigned long vpci_minor = 1; 32 33#define PGLIST_NENTS (PAGE_SIZE / sizeof(u64)) 34 35struct iommu_batch { 36 struct pci_dev *pdev; /* Device mapping is for. */ 37 unsigned long prot; /* IOMMU page protections */ 38 unsigned long entry; /* Index into IOTSB. */ 39 u64 *pglist; /* List of physical pages */ 40 unsigned long npages; /* Number of pages in list. */ 41}; 42 43static DEFINE_PER_CPU(struct iommu_batch, pci_iommu_batch); 44 45/* Interrupts must be disabled. */ 46static inline void pci_iommu_batch_start(struct pci_dev *pdev, unsigned long prot, unsigned long entry) 47{ 48 struct iommu_batch *p = &__get_cpu_var(pci_iommu_batch); 49 50 p->pdev = pdev; 51 p->prot = prot; 52 p->entry = entry; 53 p->npages = 0; 54} 55 56/* Interrupts must be disabled. */ 57static long pci_iommu_batch_flush(struct iommu_batch *p) 58{ 59 struct pci_pbm_info *pbm = p->pdev->dev.archdata.host_controller; 60 unsigned long devhandle = pbm->devhandle; 61 unsigned long prot = p->prot; 62 unsigned long entry = p->entry; 63 u64 *pglist = p->pglist; 64 unsigned long npages = p->npages; 65 66 while (npages != 0) { 67 long num; 68 69 num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry), 70 npages, prot, __pa(pglist)); 71 if (unlikely(num < 0)) { 72 if (printk_ratelimit()) 73 printk("pci_iommu_batch_flush: IOMMU map of " 74 "[%08lx:%08lx:%lx:%lx:%lx] failed with " 75 "status %ld\n", 76 devhandle, HV_PCI_TSBID(0, entry), 77 npages, prot, __pa(pglist), num); 78 return -1; 79 } 80 81 entry += num; 82 npages -= num; 83 pglist += num; 84 } 85 86 p->entry = entry; 87 p->npages = 0; 88 89 return 0; 90} 91 92/* Interrupts must be disabled. */ 93static inline long pci_iommu_batch_add(u64 phys_page) 94{ 95 struct iommu_batch *p = &__get_cpu_var(pci_iommu_batch); 96 97 BUG_ON(p->npages >= PGLIST_NENTS); 98 99 p->pglist[p->npages++] = phys_page; 100 if (p->npages == PGLIST_NENTS) 101 return pci_iommu_batch_flush(p); 102 103 return 0; 104} 105 106/* Interrupts must be disabled. */ 107static inline long pci_iommu_batch_end(void) 108{ 109 struct iommu_batch *p = &__get_cpu_var(pci_iommu_batch); 110 111 BUG_ON(p->npages >= PGLIST_NENTS); 112 113 return pci_iommu_batch_flush(p); 114} 115 116static long pci_arena_alloc(struct iommu_arena *arena, unsigned long npages) 117{ 118 unsigned long n, i, start, end, limit; 119 int pass; 120 121 limit = arena->limit; 122 start = arena->hint; 123 pass = 0; 124 125again: 126 n = find_next_zero_bit(arena->map, limit, start); 127 end = n + npages; 128 if (unlikely(end >= limit)) { 129 if (likely(pass < 1)) { 130 limit = start; 131 start = 0; 132 pass++; 133 goto again; 134 } else { 135 /* Scanned the whole thing, give up. */ 136 return -1; 137 } 138 } 139 140 for (i = n; i < end; i++) { 141 if (test_bit(i, arena->map)) { 142 start = i + 1; 143 goto again; 144 } 145 } 146 147 for (i = n; i < end; i++) 148 __set_bit(i, arena->map); 149 150 arena->hint = end; 151 152 return n; 153} 154 155static void pci_arena_free(struct iommu_arena *arena, unsigned long base, unsigned long npages) 156{ 157 unsigned long i; 158 159 for (i = base; i < (base + npages); i++) 160 __clear_bit(i, arena->map); 161} 162 163static void *pci_4v_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp, gfp_t gfp) 164{ 165 struct iommu *iommu; 166 unsigned long flags, order, first_page, npages, n; 167 void *ret; 168 long entry; 169 170 size = IO_PAGE_ALIGN(size); 171 order = get_order(size); 172 if (unlikely(order >= MAX_ORDER)) 173 return NULL; 174 175 npages = size >> IO_PAGE_SHIFT; 176 177 first_page = __get_free_pages(gfp, order); 178 if (unlikely(first_page == 0UL)) 179 return NULL; 180 181 memset((char *)first_page, 0, PAGE_SIZE << order); 182 183 iommu = pdev->dev.archdata.iommu; 184 185 spin_lock_irqsave(&iommu->lock, flags); 186 entry = pci_arena_alloc(&iommu->arena, npages); 187 spin_unlock_irqrestore(&iommu->lock, flags); 188 189 if (unlikely(entry < 0L)) 190 goto arena_alloc_fail; 191 192 *dma_addrp = (iommu->page_table_map_base + 193 (entry << IO_PAGE_SHIFT)); 194 ret = (void *) first_page; 195 first_page = __pa(first_page); 196 197 local_irq_save(flags); 198 199 pci_iommu_batch_start(pdev, 200 (HV_PCI_MAP_ATTR_READ | 201 HV_PCI_MAP_ATTR_WRITE), 202 entry); 203 204 for (n = 0; n < npages; n++) { 205 long err = pci_iommu_batch_add(first_page + (n * PAGE_SIZE)); 206 if (unlikely(err < 0L)) 207 goto iommu_map_fail; 208 } 209 210 if (unlikely(pci_iommu_batch_end() < 0L)) 211 goto iommu_map_fail; 212 213 local_irq_restore(flags); 214 215 return ret; 216 217iommu_map_fail: 218 /* Interrupts are disabled. */ 219 spin_lock(&iommu->lock); 220 pci_arena_free(&iommu->arena, entry, npages); 221 spin_unlock_irqrestore(&iommu->lock, flags); 222 223arena_alloc_fail: 224 free_pages(first_page, order); 225 return NULL; 226} 227 228static void pci_4v_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_t dvma) 229{ 230 struct pci_pbm_info *pbm; 231 struct iommu *iommu; 232 unsigned long flags, order, npages, entry; 233 u32 devhandle; 234 235 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; 236 iommu = pdev->dev.archdata.iommu; 237 pbm = pdev->dev.archdata.host_controller; 238 devhandle = pbm->devhandle; 239 entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 240 241 spin_lock_irqsave(&iommu->lock, flags); 242 243 pci_arena_free(&iommu->arena, entry, npages); 244 245 do { 246 unsigned long num; 247 248 num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), 249 npages); 250 entry += num; 251 npages -= num; 252 } while (npages != 0); 253 254 spin_unlock_irqrestore(&iommu->lock, flags); 255 256 order = get_order(size); 257 if (order < 10) 258 free_pages((unsigned long)cpu, order); 259} 260 261static dma_addr_t pci_4v_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direction) 262{ 263 struct iommu *iommu; 264 unsigned long flags, npages, oaddr; 265 unsigned long i, base_paddr; 266 u32 bus_addr, ret; 267 unsigned long prot; 268 long entry; 269 270 iommu = pdev->dev.archdata.iommu; 271 272 if (unlikely(direction == PCI_DMA_NONE)) 273 goto bad; 274 275 oaddr = (unsigned long)ptr; 276 npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); 277 npages >>= IO_PAGE_SHIFT; 278 279 spin_lock_irqsave(&iommu->lock, flags); 280 entry = pci_arena_alloc(&iommu->arena, npages); 281 spin_unlock_irqrestore(&iommu->lock, flags); 282 283 if (unlikely(entry < 0L)) 284 goto bad; 285 286 bus_addr = (iommu->page_table_map_base + 287 (entry << IO_PAGE_SHIFT)); 288 ret = bus_addr | (oaddr & ~IO_PAGE_MASK); 289 base_paddr = __pa(oaddr & IO_PAGE_MASK); 290 prot = HV_PCI_MAP_ATTR_READ; 291 if (direction != PCI_DMA_TODEVICE) 292 prot |= HV_PCI_MAP_ATTR_WRITE; 293 294 local_irq_save(flags); 295 296 pci_iommu_batch_start(pdev, prot, entry); 297 298 for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) { 299 long err = pci_iommu_batch_add(base_paddr); 300 if (unlikely(err < 0L)) 301 goto iommu_map_fail; 302 } 303 if (unlikely(pci_iommu_batch_end() < 0L)) 304 goto iommu_map_fail; 305 306 local_irq_restore(flags); 307 308 return ret; 309 310bad: 311 if (printk_ratelimit()) 312 WARN_ON(1); 313 return PCI_DMA_ERROR_CODE; 314 315iommu_map_fail: 316 /* Interrupts are disabled. */ 317 spin_lock(&iommu->lock); 318 pci_arena_free(&iommu->arena, entry, npages); 319 spin_unlock_irqrestore(&iommu->lock, flags); 320 321 return PCI_DMA_ERROR_CODE; 322} 323 324static void pci_4v_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) 325{ 326 struct pci_pbm_info *pbm; 327 struct iommu *iommu; 328 unsigned long flags, npages; 329 long entry; 330 u32 devhandle; 331 332 if (unlikely(direction == PCI_DMA_NONE)) { 333 if (printk_ratelimit()) 334 WARN_ON(1); 335 return; 336 } 337 338 iommu = pdev->dev.archdata.iommu; 339 pbm = pdev->dev.archdata.host_controller; 340 devhandle = pbm->devhandle; 341 342 npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); 343 npages >>= IO_PAGE_SHIFT; 344 bus_addr &= IO_PAGE_MASK; 345 346 spin_lock_irqsave(&iommu->lock, flags); 347 348 entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT; 349 pci_arena_free(&iommu->arena, entry, npages); 350 351 do { 352 unsigned long num; 353 354 num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), 355 npages); 356 entry += num; 357 npages -= num; 358 } while (npages != 0); 359 360 spin_unlock_irqrestore(&iommu->lock, flags); 361} 362 363#define SG_ENT_PHYS_ADDRESS(SG) \ 364 (__pa(page_address((SG)->page)) + (SG)->offset) 365 366static inline long fill_sg(long entry, struct pci_dev *pdev, 367 struct scatterlist *sg, 368 int nused, int nelems, unsigned long prot) 369{ 370 struct scatterlist *dma_sg = sg; 371 struct scatterlist *sg_end = sg + nelems; 372 unsigned long flags; 373 int i; 374 375 local_irq_save(flags); 376 377 pci_iommu_batch_start(pdev, prot, entry); 378 379 for (i = 0; i < nused; i++) { 380 unsigned long pteval = ~0UL; 381 u32 dma_npages; 382 383 dma_npages = ((dma_sg->dma_address & (IO_PAGE_SIZE - 1UL)) + 384 dma_sg->dma_length + 385 ((IO_PAGE_SIZE - 1UL))) >> IO_PAGE_SHIFT; 386 do { 387 unsigned long offset; 388 signed int len; 389 390 /* If we are here, we know we have at least one 391 * more page to map. So walk forward until we 392 * hit a page crossing, and begin creating new 393 * mappings from that spot. 394 */ 395 for (;;) { 396 unsigned long tmp; 397 398 tmp = SG_ENT_PHYS_ADDRESS(sg); 399 len = sg->length; 400 if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) { 401 pteval = tmp & IO_PAGE_MASK; 402 offset = tmp & (IO_PAGE_SIZE - 1UL); 403 break; 404 } 405 if (((tmp ^ (tmp + len - 1UL)) >> IO_PAGE_SHIFT) != 0UL) { 406 pteval = (tmp + IO_PAGE_SIZE) & IO_PAGE_MASK; 407 offset = 0UL; 408 len -= (IO_PAGE_SIZE - (tmp & (IO_PAGE_SIZE - 1UL))); 409 break; 410 } 411 sg++; 412 } 413 414 pteval = (pteval & IOPTE_PAGE); 415 while (len > 0) { 416 long err; 417 418 err = pci_iommu_batch_add(pteval); 419 if (unlikely(err < 0L)) 420 goto iommu_map_failed; 421 422 pteval += IO_PAGE_SIZE; 423 len -= (IO_PAGE_SIZE - offset); 424 offset = 0; 425 dma_npages--; 426 } 427 428 pteval = (pteval & IOPTE_PAGE) + len; 429 sg++; 430 431 /* Skip over any tail mappings we've fully mapped, 432 * adjusting pteval along the way. Stop when we 433 * detect a page crossing event. 434 */ 435 while (sg < sg_end && 436 (pteval << (64 - IO_PAGE_SHIFT)) != 0UL && 437 (pteval == SG_ENT_PHYS_ADDRESS(sg)) && 438 ((pteval ^ 439 (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) { 440 pteval += sg->length; 441 sg++; 442 } 443 if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL) 444 pteval = ~0UL; 445 } while (dma_npages != 0); 446 dma_sg++; 447 } 448 449 if (unlikely(pci_iommu_batch_end() < 0L)) 450 goto iommu_map_failed; 451 452 local_irq_restore(flags); 453 return 0; 454 455iommu_map_failed: 456 local_irq_restore(flags); 457 return -1L; 458} 459 460static int pci_4v_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) 461{ 462 struct iommu *iommu; 463 unsigned long flags, npages, prot; 464 u32 dma_base; 465 struct scatterlist *sgtmp; 466 long entry, err; 467 int used; 468 469 /* Fast path single entry scatterlists. */ 470 if (nelems == 1) { 471 sglist->dma_address = 472 pci_4v_map_single(pdev, 473 (page_address(sglist->page) + sglist->offset), 474 sglist->length, direction); 475 if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE)) 476 return 0; 477 sglist->dma_length = sglist->length; 478 return 1; 479 } 480 481 iommu = pdev->dev.archdata.iommu; 482 483 if (unlikely(direction == PCI_DMA_NONE)) 484 goto bad; 485 486 /* Step 1: Prepare scatter list. */ 487 npages = prepare_sg(sglist, nelems); 488 489 /* Step 2: Allocate a cluster and context, if necessary. */ 490 spin_lock_irqsave(&iommu->lock, flags); 491 entry = pci_arena_alloc(&iommu->arena, npages); 492 spin_unlock_irqrestore(&iommu->lock, flags); 493 494 if (unlikely(entry < 0L)) 495 goto bad; 496 497 dma_base = iommu->page_table_map_base + 498 (entry << IO_PAGE_SHIFT); 499 500 /* Step 3: Normalize DMA addresses. */ 501 used = nelems; 502 503 sgtmp = sglist; 504 while (used && sgtmp->dma_length) { 505 sgtmp->dma_address += dma_base; 506 sgtmp++; 507 used--; 508 } 509 used = nelems - used; 510 511 /* Step 4: Create the mappings. */ 512 prot = HV_PCI_MAP_ATTR_READ; 513 if (direction != PCI_DMA_TODEVICE) 514 prot |= HV_PCI_MAP_ATTR_WRITE; 515 516 err = fill_sg(entry, pdev, sglist, used, nelems, prot); 517 if (unlikely(err < 0L)) 518 goto iommu_map_failed; 519 520 return used; 521 522bad: 523 if (printk_ratelimit()) 524 WARN_ON(1); 525 return 0; 526 527iommu_map_failed: 528 spin_lock_irqsave(&iommu->lock, flags); 529 pci_arena_free(&iommu->arena, entry, npages); 530 spin_unlock_irqrestore(&iommu->lock, flags); 531 532 return 0; 533} 534 535static void pci_4v_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) 536{ 537 struct pci_pbm_info *pbm; 538 struct iommu *iommu; 539 unsigned long flags, i, npages; 540 long entry; 541 u32 devhandle, bus_addr; 542 543 if (unlikely(direction == PCI_DMA_NONE)) { 544 if (printk_ratelimit()) 545 WARN_ON(1); 546 } 547 548 iommu = pdev->dev.archdata.iommu; 549 pbm = pdev->dev.archdata.host_controller; 550 devhandle = pbm->devhandle; 551 552 bus_addr = sglist->dma_address & IO_PAGE_MASK; 553 554 for (i = 1; i < nelems; i++) 555 if (sglist[i].dma_length == 0) 556 break; 557 i--; 558 npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - 559 bus_addr) >> IO_PAGE_SHIFT; 560 561 entry = ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 562 563 spin_lock_irqsave(&iommu->lock, flags); 564 565 pci_arena_free(&iommu->arena, entry, npages); 566 567 do { 568 unsigned long num; 569 570 num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), 571 npages); 572 entry += num; 573 npages -= num; 574 } while (npages != 0); 575 576 spin_unlock_irqrestore(&iommu->lock, flags); 577} 578 579static void pci_4v_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) 580{ 581 /* Nothing to do... */ 582} 583 584static void pci_4v_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) 585{ 586 /* Nothing to do... */ 587} 588 589const struct pci_iommu_ops pci_sun4v_iommu_ops = { 590 .alloc_consistent = pci_4v_alloc_consistent, 591 .free_consistent = pci_4v_free_consistent, 592 .map_single = pci_4v_map_single, 593 .unmap_single = pci_4v_unmap_single, 594 .map_sg = pci_4v_map_sg, 595 .unmap_sg = pci_4v_unmap_sg, 596 .dma_sync_single_for_cpu = pci_4v_dma_sync_single_for_cpu, 597 .dma_sync_sg_for_cpu = pci_4v_dma_sync_sg_for_cpu, 598}; 599 600static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm) 601{ 602 struct property *prop; 603 struct device_node *dp; 604 605 dp = pbm->prom_node; 606 prop = of_find_property(dp, "66mhz-capable", NULL); 607 pbm->is_66mhz_capable = (prop != NULL); 608 pbm->pci_bus = pci_scan_one_pbm(pbm); 609 610} 611 612static unsigned long probe_existing_entries(struct pci_pbm_info *pbm, 613 struct iommu *iommu) 614{ 615 struct iommu_arena *arena = &iommu->arena; 616 unsigned long i, cnt = 0; 617 u32 devhandle; 618 619 devhandle = pbm->devhandle; 620 for (i = 0; i < arena->limit; i++) { 621 unsigned long ret, io_attrs, ra; 622 623 ret = pci_sun4v_iommu_getmap(devhandle, 624 HV_PCI_TSBID(0, i), 625 &io_attrs, &ra); 626 if (ret == HV_EOK) { 627 if (page_in_phys_avail(ra)) { 628 pci_sun4v_iommu_demap(devhandle, 629 HV_PCI_TSBID(0, i), 1); 630 } else { 631 cnt++; 632 __set_bit(i, arena->map); 633 } 634 } 635 } 636 637 return cnt; 638} 639 640static void pci_sun4v_iommu_init(struct pci_pbm_info *pbm) 641{ 642 struct iommu *iommu = pbm->iommu; 643 struct property *prop; 644 unsigned long num_tsb_entries, sz, tsbsize; 645 u32 vdma[2], dma_mask, dma_offset; 646 647 prop = of_find_property(pbm->prom_node, "virtual-dma", NULL); 648 if (prop) { 649 u32 *val = prop->value; 650 651 vdma[0] = val[0]; 652 vdma[1] = val[1]; 653 } else { 654 /* No property, use default values. */ 655 vdma[0] = 0x80000000; 656 vdma[1] = 0x80000000; 657 } 658 659 if ((vdma[0] | vdma[1]) & ~IO_PAGE_MASK) { 660 prom_printf("PCI-SUN4V: strange virtual-dma[%08x:%08x].\n", 661 vdma[0], vdma[1]); 662 prom_halt(); 663 }; 664 665 dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL); 666 num_tsb_entries = vdma[1] / IO_PAGE_SIZE; 667 tsbsize = num_tsb_entries * sizeof(iopte_t); 668 669 dma_offset = vdma[0]; 670 671 /* Setup initial software IOMMU state. */ 672 spin_lock_init(&iommu->lock); 673 iommu->ctx_lowest_free = 1; 674 iommu->page_table_map_base = dma_offset; 675 iommu->dma_addr_mask = dma_mask; 676 677 /* Allocate and initialize the free area map. */ 678 sz = (num_tsb_entries + 7) / 8; 679 sz = (sz + 7UL) & ~7UL; 680 iommu->arena.map = kzalloc(sz, GFP_KERNEL); 681 if (!iommu->arena.map) { 682 prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n"); 683 prom_halt(); 684 } 685 iommu->arena.limit = num_tsb_entries; 686 687 sz = probe_existing_entries(pbm, iommu); 688 if (sz) 689 printk("%s: Imported %lu TSB entries from OBP\n", 690 pbm->name, sz); 691} 692 693#ifdef CONFIG_PCI_MSI 694struct pci_sun4v_msiq_entry { 695 u64 version_type; 696#define MSIQ_VERSION_MASK 0xffffffff00000000UL 697#define MSIQ_VERSION_SHIFT 32 698#define MSIQ_TYPE_MASK 0x00000000000000ffUL 699#define MSIQ_TYPE_SHIFT 0 700#define MSIQ_TYPE_NONE 0x00 701#define MSIQ_TYPE_MSG 0x01 702#define MSIQ_TYPE_MSI32 0x02 703#define MSIQ_TYPE_MSI64 0x03 704#define MSIQ_TYPE_INTX 0x08 705#define MSIQ_TYPE_NONE2 0xff 706 707 u64 intx_sysino; 708 u64 reserved1; 709 u64 stick; 710 u64 req_id; /* bus/device/func */ 711#define MSIQ_REQID_BUS_MASK 0xff00UL 712#define MSIQ_REQID_BUS_SHIFT 8 713#define MSIQ_REQID_DEVICE_MASK 0x00f8UL 714#define MSIQ_REQID_DEVICE_SHIFT 3 715#define MSIQ_REQID_FUNC_MASK 0x0007UL 716#define MSIQ_REQID_FUNC_SHIFT 0 717 718 u64 msi_address; 719 720 /* The format of this value is message type dependent. 721 * For MSI bits 15:0 are the data from the MSI packet. 722 * For MSI-X bits 31:0 are the data from the MSI packet. 723 * For MSG, the message code and message routing code where: 724 * bits 39:32 is the bus/device/fn of the msg target-id 725 * bits 18:16 is the message routing code 726 * bits 7:0 is the message code 727 * For INTx the low order 2-bits are: 728 * 00 - INTA 729 * 01 - INTB 730 * 10 - INTC 731 * 11 - INTD 732 */ 733 u64 msi_data; 734 735 u64 reserved2; 736}; 737 738/* For now this just runs as a pre-handler for the real interrupt handler. 739 * So we just walk through the queue and ACK all the entries, update the 740 * head pointer, and return. 741 * 742 * In the longer term it would be nice to do something more integrated 743 * wherein we can pass in some of this MSI info to the drivers. This 744 * would be most useful for PCIe fabric error messages, although we could 745 * invoke those directly from the loop here in order to pass the info around. 746 */ 747static void pci_sun4v_msi_prehandler(unsigned int ino, void *data1, void *data2) 748{ 749 struct pci_pbm_info *pbm = data1; 750 struct pci_sun4v_msiq_entry *base, *ep; 751 unsigned long msiqid, orig_head, head, type, err; 752 753 msiqid = (unsigned long) data2; 754 755 head = 0xdeadbeef; 756 err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, &head); 757 if (unlikely(err)) 758 goto hv_error_get; 759 760 if (unlikely(head >= (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry)))) 761 goto bad_offset; 762 763 head /= sizeof(struct pci_sun4v_msiq_entry); 764 orig_head = head; 765 base = (pbm->msi_queues + ((msiqid - pbm->msiq_first) * 766 (pbm->msiq_ent_count * 767 sizeof(struct pci_sun4v_msiq_entry)))); 768 ep = &base[head]; 769 while ((ep->version_type & MSIQ_TYPE_MASK) != 0) { 770 type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT; 771 if (unlikely(type != MSIQ_TYPE_MSI32 && 772 type != MSIQ_TYPE_MSI64)) 773 goto bad_type; 774 775 pci_sun4v_msi_setstate(pbm->devhandle, 776 ep->msi_data /* msi_num */, 777 HV_MSISTATE_IDLE); 778 779 /* Clear the entry. */ 780 ep->version_type &= ~MSIQ_TYPE_MASK; 781 782 /* Go to next entry in ring. */ 783 head++; 784 if (head >= pbm->msiq_ent_count) 785 head = 0; 786 ep = &base[head]; 787 } 788 789 if (likely(head != orig_head)) { 790 /* ACK entries by updating head pointer. */ 791 head *= sizeof(struct pci_sun4v_msiq_entry); 792 err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head); 793 if (unlikely(err)) 794 goto hv_error_set; 795 } 796 return; 797 798hv_error_set: 799 printk(KERN_EMERG "MSI: Hypervisor set head gives error %lu\n", err); 800 goto hv_error_cont; 801 802hv_error_get: 803 printk(KERN_EMERG "MSI: Hypervisor get head gives error %lu\n", err); 804 805hv_error_cont: 806 printk(KERN_EMERG "MSI: devhandle[%x] msiqid[%lx] head[%lu]\n", 807 pbm->devhandle, msiqid, head); 808 return; 809 810bad_offset: 811 printk(KERN_EMERG "MSI: Hypervisor gives bad offset %lx max(%lx)\n", 812 head, pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry)); 813 return; 814 815bad_type: 816 printk(KERN_EMERG "MSI: Entry has bad type %lx\n", type); 817 return; 818} 819 820static int msi_bitmap_alloc(struct pci_pbm_info *pbm) 821{ 822 unsigned long size, bits_per_ulong; 823 824 bits_per_ulong = sizeof(unsigned long) * 8; 825 size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1); 826 size /= 8; 827 BUG_ON(size % sizeof(unsigned long)); 828 829 pbm->msi_bitmap = kzalloc(size, GFP_KERNEL); 830 if (!pbm->msi_bitmap) 831 return -ENOMEM; 832 833 return 0; 834} 835 836static void msi_bitmap_free(struct pci_pbm_info *pbm) 837{ 838 kfree(pbm->msi_bitmap); 839 pbm->msi_bitmap = NULL; 840} 841 842static int msi_queue_alloc(struct pci_pbm_info *pbm) 843{ 844 unsigned long q_size, alloc_size, pages, order; 845 int i; 846 847 q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry); 848 alloc_size = (pbm->msiq_num * q_size); 849 order = get_order(alloc_size); 850 pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order); 851 if (pages == 0UL) { 852 printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n", 853 order); 854 return -ENOMEM; 855 } 856 memset((char *)pages, 0, PAGE_SIZE << order); 857 pbm->msi_queues = (void *) pages; 858 859 for (i = 0; i < pbm->msiq_num; i++) { 860 unsigned long err, base = __pa(pages + (i * q_size)); 861 unsigned long ret1, ret2; 862 863 err = pci_sun4v_msiq_conf(pbm->devhandle, 864 pbm->msiq_first + i, 865 base, pbm->msiq_ent_count); 866 if (err) { 867 printk(KERN_ERR "MSI: msiq register fails (err=%lu)\n", 868 err); 869 goto h_error; 870 } 871 872 err = pci_sun4v_msiq_info(pbm->devhandle, 873 pbm->msiq_first + i, 874 &ret1, &ret2); 875 if (err) { 876 printk(KERN_ERR "MSI: Cannot read msiq (err=%lu)\n", 877 err); 878 goto h_error; 879 } 880 if (ret1 != base || ret2 != pbm->msiq_ent_count) { 881 printk(KERN_ERR "MSI: Bogus qconf " 882 "expected[%lx:%x] got[%lx:%lx]\n", 883 base, pbm->msiq_ent_count, 884 ret1, ret2); 885 goto h_error; 886 } 887 } 888 889 return 0; 890 891h_error: 892 free_pages(pages, order); 893 return -EINVAL; 894} 895 896 897static int alloc_msi(struct pci_pbm_info *pbm) 898{ 899 int i; 900 901 for (i = 0; i < pbm->msi_num; i++) { 902 if (!test_and_set_bit(i, pbm->msi_bitmap)) 903 return i + pbm->msi_first; 904 } 905 906 return -ENOENT; 907} 908 909static void free_msi(struct pci_pbm_info *pbm, int msi_num) 910{ 911 msi_num -= pbm->msi_first; 912 clear_bit(msi_num, pbm->msi_bitmap); 913} 914 915static int pci_sun4v_setup_msi_irq(unsigned int *virt_irq_p, 916 struct pci_dev *pdev, 917 struct msi_desc *entry) 918{ 919 struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller; 920 unsigned long devino, msiqid; 921 struct msi_msg msg; 922 int msi_num, err; 923 924 *virt_irq_p = 0; 925 926 msi_num = alloc_msi(pbm); 927 if (msi_num < 0) 928 return msi_num; 929 930 devino = sun4v_build_msi(pbm->devhandle, virt_irq_p, 931 pbm->msiq_first_devino, 932 (pbm->msiq_first_devino + 933 pbm->msiq_num)); 934 err = -ENOMEM; 935 if (!devino) 936 goto out_err; 937 938 msiqid = ((devino - pbm->msiq_first_devino) + 939 pbm->msiq_first); 940 941 err = -EINVAL; 942 if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE)) 943 if (err) 944 goto out_err; 945 946 if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID)) 947 goto out_err; 948 949 if (pci_sun4v_msi_setmsiq(pbm->devhandle, 950 msi_num, msiqid, 951 (entry->msi_attrib.is_64 ? 952 HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32))) 953 goto out_err; 954 955 if (pci_sun4v_msi_setstate(pbm->devhandle, msi_num, HV_MSISTATE_IDLE)) 956 goto out_err; 957 958 if (pci_sun4v_msi_setvalid(pbm->devhandle, msi_num, HV_MSIVALID_VALID)) 959 goto out_err; 960 961 pdev->dev.archdata.msi_num = msi_num; 962 963 if (entry->msi_attrib.is_64) { 964 msg.address_hi = pbm->msi64_start >> 32; 965 msg.address_lo = pbm->msi64_start & 0xffffffff; 966 } else { 967 msg.address_hi = 0; 968 msg.address_lo = pbm->msi32_start; 969 } 970 msg.data = msi_num; 971 972 set_irq_msi(*virt_irq_p, entry); 973 write_msi_msg(*virt_irq_p, &msg); 974 975 irq_install_pre_handler(*virt_irq_p, 976 pci_sun4v_msi_prehandler, 977 pbm, (void *) msiqid); 978 979 return 0; 980 981out_err: 982 free_msi(pbm, msi_num); 983 sun4v_destroy_msi(*virt_irq_p); 984 *virt_irq_p = 0; 985 return err; 986 987} 988 989static void pci_sun4v_teardown_msi_irq(unsigned int virt_irq, 990 struct pci_dev *pdev) 991{ 992 struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller; 993 unsigned long msiqid, err; 994 unsigned int msi_num; 995 996 msi_num = pdev->dev.archdata.msi_num; 997 err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi_num, &msiqid); 998 if (err) { 999 printk(KERN_ERR "%s: getmsiq gives error %lu\n", 1000 pbm->name, err); 1001 return; 1002 } 1003 1004 pci_sun4v_msi_setvalid(pbm->devhandle, msi_num, HV_MSIVALID_INVALID); 1005 pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_INVALID); 1006 1007 free_msi(pbm, msi_num); 1008 1009 /* The sun4v_destroy_msi() will liberate the devino and thus the MSIQ 1010 * allocation. 1011 */ 1012 sun4v_destroy_msi(virt_irq); 1013} 1014 1015static void pci_sun4v_msi_init(struct pci_pbm_info *pbm) 1016{ 1017 const u32 *val; 1018 int len; 1019 1020 val = of_get_property(pbm->prom_node, "#msi-eqs", &len); 1021 if (!val || len != 4) 1022 goto no_msi; 1023 pbm->msiq_num = *val; 1024 if (pbm->msiq_num) { 1025 const struct msiq_prop { 1026 u32 first_msiq; 1027 u32 num_msiq; 1028 u32 first_devino; 1029 } *mqp; 1030 const struct msi_range_prop { 1031 u32 first_msi; 1032 u32 num_msi; 1033 } *mrng; 1034 const struct addr_range_prop { 1035 u32 msi32_high; 1036 u32 msi32_low; 1037 u32 msi32_len; 1038 u32 msi64_high; 1039 u32 msi64_low; 1040 u32 msi64_len; 1041 } *arng; 1042 1043 val = of_get_property(pbm->prom_node, "msi-eq-size", &len); 1044 if (!val || len != 4) 1045 goto no_msi; 1046 1047 pbm->msiq_ent_count = *val; 1048 1049 mqp = of_get_property(pbm->prom_node, 1050 "msi-eq-to-devino", &len); 1051 if (!mqp || len != sizeof(struct msiq_prop)) 1052 goto no_msi; 1053 1054 pbm->msiq_first = mqp->first_msiq; 1055 pbm->msiq_first_devino = mqp->first_devino; 1056 1057 val = of_get_property(pbm->prom_node, "#msi", &len); 1058 if (!val || len != 4) 1059 goto no_msi; 1060 pbm->msi_num = *val; 1061 1062 mrng = of_get_property(pbm->prom_node, "msi-ranges", &len); 1063 if (!mrng || len != sizeof(struct msi_range_prop)) 1064 goto no_msi; 1065 pbm->msi_first = mrng->first_msi; 1066 1067 val = of_get_property(pbm->prom_node, "msi-data-mask", &len); 1068 if (!val || len != 4) 1069 goto no_msi; 1070 pbm->msi_data_mask = *val; 1071 1072 val = of_get_property(pbm->prom_node, "msix-data-width", &len); 1073 if (!val || len != 4) 1074 goto no_msi; 1075 pbm->msix_data_width = *val; 1076 1077 arng = of_get_property(pbm->prom_node, "msi-address-ranges", 1078 &len); 1079 if (!arng || len != sizeof(struct addr_range_prop)) 1080 goto no_msi; 1081 pbm->msi32_start = ((u64)arng->msi32_high << 32) | 1082 (u64) arng->msi32_low; 1083 pbm->msi64_start = ((u64)arng->msi64_high << 32) | 1084 (u64) arng->msi64_low; 1085 pbm->msi32_len = arng->msi32_len; 1086 pbm->msi64_len = arng->msi64_len; 1087 1088 if (msi_bitmap_alloc(pbm)) 1089 goto no_msi; 1090 1091 if (msi_queue_alloc(pbm)) { 1092 msi_bitmap_free(pbm); 1093 goto no_msi; 1094 } 1095 1096 printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] " 1097 "devino[0x%x]\n", 1098 pbm->name, 1099 pbm->msiq_first, pbm->msiq_num, 1100 pbm->msiq_ent_count, 1101 pbm->msiq_first_devino); 1102 printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] " 1103 "width[%u]\n", 1104 pbm->name, 1105 pbm->msi_first, pbm->msi_num, pbm->msi_data_mask, 1106 pbm->msix_data_width); 1107 printk(KERN_INFO "%s: MSI addr32[0x%lx:0x%x] " 1108 "addr64[0x%lx:0x%x]\n", 1109 pbm->name, 1110 pbm->msi32_start, pbm->msi32_len, 1111 pbm->msi64_start, pbm->msi64_len); 1112 printk(KERN_INFO "%s: MSI queues at RA [%p]\n", 1113 pbm->name, 1114 pbm->msi_queues); 1115 } 1116 pbm->setup_msi_irq = pci_sun4v_setup_msi_irq; 1117 pbm->teardown_msi_irq = pci_sun4v_teardown_msi_irq; 1118 1119 return; 1120 1121no_msi: 1122 pbm->msiq_num = 0; 1123 printk(KERN_INFO "%s: No MSI support.\n", pbm->name); 1124} 1125#else /* CONFIG_PCI_MSI */ 1126static void pci_sun4v_msi_init(struct pci_pbm_info *pbm) 1127{ 1128} 1129#endif /* !(CONFIG_PCI_MSI) */ 1130 1131static void pci_sun4v_pbm_init(struct pci_controller_info *p, struct device_node *dp, u32 devhandle) 1132{ 1133 struct pci_pbm_info *pbm; 1134 1135 if (devhandle & 0x40) 1136 pbm = &p->pbm_B; 1137 else 1138 pbm = &p->pbm_A; 1139 1140 pbm->next = pci_pbm_root; 1141 pci_pbm_root = pbm; 1142 1143 pbm->scan_bus = pci_sun4v_scan_bus; 1144 pbm->pci_ops = &sun4v_pci_ops; 1145 pbm->config_space_reg_bits = 12; 1146 1147 pbm->index = pci_num_pbms++; 1148 1149 pbm->parent = p; 1150 pbm->prom_node = dp; 1151 1152 pbm->devhandle = devhandle; 1153 1154 pbm->name = dp->full_name; 1155 1156 printk("%s: SUN4V PCI Bus Module\n", pbm->name); 1157 1158 pci_determine_mem_io_space(pbm); 1159 1160 pci_get_pbm_props(pbm); 1161 pci_sun4v_iommu_init(pbm); 1162 pci_sun4v_msi_init(pbm); 1163} 1164 1165void sun4v_pci_init(struct device_node *dp, char *model_name) 1166{ 1167 static int hvapi_negotiated = 0; 1168 struct pci_controller_info *p; 1169 struct pci_pbm_info *pbm; 1170 struct iommu *iommu; 1171 struct property *prop; 1172 struct linux_prom64_registers *regs; 1173 u32 devhandle; 1174 int i; 1175 1176 if (!hvapi_negotiated++) { 1177 int err = sun4v_hvapi_register(HV_GRP_PCI, 1178 vpci_major, 1179 &vpci_minor); 1180 1181 if (err) { 1182 prom_printf("SUN4V_PCI: Could not register hvapi, " 1183 "err=%d\n", err); 1184 prom_halt(); 1185 } 1186 printk("SUN4V_PCI: Registered hvapi major[%lu] minor[%lu]\n", 1187 vpci_major, vpci_minor); 1188 } 1189 1190 prop = of_find_property(dp, "reg", NULL); 1191 regs = prop->value; 1192 1193 devhandle = (regs->phys_addr >> 32UL) & 0x0fffffff; 1194 1195 for (pbm = pci_pbm_root; pbm; pbm = pbm->next) { 1196 if (pbm->devhandle == (devhandle ^ 0x40)) { 1197 pci_sun4v_pbm_init(pbm->parent, dp, devhandle); 1198 return; 1199 } 1200 } 1201 1202 for_each_possible_cpu(i) { 1203 unsigned long page = get_zeroed_page(GFP_ATOMIC); 1204 1205 if (!page) 1206 goto fatal_memory_error; 1207 1208 per_cpu(pci_iommu_batch, i).pglist = (u64 *) page; 1209 } 1210 1211 p = kzalloc(sizeof(struct pci_controller_info), GFP_ATOMIC); 1212 if (!p) 1213 goto fatal_memory_error; 1214 1215 iommu = kzalloc(sizeof(struct iommu), GFP_ATOMIC); 1216 if (!iommu) 1217 goto fatal_memory_error; 1218 1219 p->pbm_A.iommu = iommu; 1220 1221 iommu = kzalloc(sizeof(struct iommu), GFP_ATOMIC); 1222 if (!iommu) 1223 goto fatal_memory_error; 1224 1225 p->pbm_B.iommu = iommu; 1226 1227 /* Like PSYCHO and SCHIZO we have a 2GB aligned area 1228 * for memory space. 1229 */ 1230 pci_memspace_mask = 0x7fffffffUL; 1231 1232 pci_sun4v_pbm_init(p, dp, devhandle); 1233 return; 1234 1235fatal_memory_error: 1236 prom_printf("SUN4V_PCI: Fatal memory allocation error.\n"); 1237 prom_halt(); 1238} 1239