182#define MOEA_PVO_CHECK(pvo) 183 184struct ofw_map { 185 vm_offset_t om_va; 186 vm_size_t om_len; 187 vm_offset_t om_pa; 188 u_int om_mode; 189}; 190 191/* 192 * Map of physical memory regions. 193 */ 194static struct mem_region *regions; 195static struct mem_region *pregions; 196static u_int phys_avail_count; 197static int regions_sz, pregions_sz; 198static struct ofw_map *translations; 199 200/* 201 * Lock for the pteg and pvo tables. 202 */ 203struct mtx moea_table_mutex; 204struct mtx moea_vsid_mutex; 205 206/* tlbie instruction synchronization */ 207static struct mtx tlbie_mtx; 208 209/* 210 * PTEG data. 211 */ 212static struct pteg *moea_pteg_table; 213u_int moea_pteg_count; 214u_int moea_pteg_mask; 215 216/* 217 * PVO data. 218 */ 219struct pvo_head *moea_pvo_table; /* pvo entries by pteg index */ 220struct pvo_head moea_pvo_kunmanaged = 221 LIST_HEAD_INITIALIZER(moea_pvo_kunmanaged); /* list of unmanaged pages */ 222 223uma_zone_t moea_upvo_zone; /* zone for pvo entries for unmanaged pages */ 224uma_zone_t moea_mpvo_zone; /* zone for pvo entries for managed pages */ 225 226#define BPVO_POOL_SIZE 32768 227static struct pvo_entry *moea_bpvo_pool; 228static int moea_bpvo_pool_index = 0; 229 230#define VSID_NBPW (sizeof(u_int32_t) * 8) 231static u_int moea_vsid_bitmap[NPMAPS / VSID_NBPW]; 232 233static boolean_t moea_initialized = FALSE; 234 235/* 236 * Statistics. 237 */ 238u_int moea_pte_valid = 0; 239u_int moea_pte_overflow = 0; 240u_int moea_pte_replacements = 0; 241u_int moea_pvo_entries = 0; 242u_int moea_pvo_enter_calls = 0; 243u_int moea_pvo_remove_calls = 0; 244u_int moea_pte_spills = 0; 245SYSCTL_INT(_machdep, OID_AUTO, moea_pte_valid, CTLFLAG_RD, &moea_pte_valid, 246 0, ""); 247SYSCTL_INT(_machdep, OID_AUTO, moea_pte_overflow, CTLFLAG_RD, 248 &moea_pte_overflow, 0, ""); 249SYSCTL_INT(_machdep, OID_AUTO, moea_pte_replacements, CTLFLAG_RD, 250 &moea_pte_replacements, 0, ""); 251SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_entries, CTLFLAG_RD, &moea_pvo_entries, 252 0, ""); 253SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_enter_calls, CTLFLAG_RD, 254 &moea_pvo_enter_calls, 0, ""); 255SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_remove_calls, CTLFLAG_RD, 256 &moea_pvo_remove_calls, 0, ""); 257SYSCTL_INT(_machdep, OID_AUTO, moea_pte_spills, CTLFLAG_RD, 258 &moea_pte_spills, 0, ""); 259 260/* 261 * Allocate physical memory for use in moea_bootstrap. 262 */ 263static vm_offset_t moea_bootstrap_alloc(vm_size_t, u_int); 264 265/* 266 * PTE calls. 267 */ 268static int moea_pte_insert(u_int, struct pte *); 269 270/* 271 * PVO calls. 272 */ 273static int moea_pvo_enter(pmap_t, uma_zone_t, struct pvo_head *, 274 vm_offset_t, vm_offset_t, u_int, int); 275static void moea_pvo_remove(struct pvo_entry *, int); 276static struct pvo_entry *moea_pvo_find_va(pmap_t, vm_offset_t, int *); 277static struct pte *moea_pvo_to_pte(const struct pvo_entry *, int); 278 279/* 280 * Utility routines. 281 */ 282static void moea_enter_locked(pmap_t, vm_offset_t, vm_page_t, 283 vm_prot_t, boolean_t); 284static void moea_syncicache(vm_offset_t, vm_size_t); 285static boolean_t moea_query_bit(vm_page_t, int); 286static u_int moea_clear_bit(vm_page_t, int); 287static void moea_kremove(mmu_t, vm_offset_t); 288int moea_pte_spill(vm_offset_t); 289 290/* 291 * Kernel MMU interface 292 */ 293void moea_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t); 294void moea_clear_modify(mmu_t, vm_page_t); 295void moea_clear_reference(mmu_t, vm_page_t); 296void moea_copy_page(mmu_t, vm_page_t, vm_page_t); 297void moea_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t); 298void moea_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t, 299 vm_prot_t); 300void moea_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t); 301vm_paddr_t moea_extract(mmu_t, pmap_t, vm_offset_t); 302vm_page_t moea_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); 303void moea_init(mmu_t); 304boolean_t moea_is_modified(mmu_t, vm_page_t); 305boolean_t moea_is_prefaultable(mmu_t, pmap_t, vm_offset_t); 306boolean_t moea_is_referenced(mmu_t, vm_page_t); 307boolean_t moea_ts_referenced(mmu_t, vm_page_t); 308vm_offset_t moea_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, int); 309boolean_t moea_page_exists_quick(mmu_t, pmap_t, vm_page_t); 310int moea_page_wired_mappings(mmu_t, vm_page_t); 311void moea_pinit(mmu_t, pmap_t); 312void moea_pinit0(mmu_t, pmap_t); 313void moea_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); 314void moea_qenter(mmu_t, vm_offset_t, vm_page_t *, int); 315void moea_qremove(mmu_t, vm_offset_t, int); 316void moea_release(mmu_t, pmap_t); 317void moea_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 318void moea_remove_all(mmu_t, vm_page_t); 319void moea_remove_write(mmu_t, vm_page_t); 320void moea_zero_page(mmu_t, vm_page_t); 321void moea_zero_page_area(mmu_t, vm_page_t, int, int); 322void moea_zero_page_idle(mmu_t, vm_page_t); 323void moea_activate(mmu_t, struct thread *); 324void moea_deactivate(mmu_t, struct thread *); 325void moea_cpu_bootstrap(mmu_t, int); 326void moea_bootstrap(mmu_t, vm_offset_t, vm_offset_t); 327void *moea_mapdev(mmu_t, vm_offset_t, vm_size_t); 328void *moea_mapdev_attr(mmu_t, vm_offset_t, vm_size_t, vm_memattr_t); 329void moea_unmapdev(mmu_t, vm_offset_t, vm_size_t); 330vm_offset_t moea_kextract(mmu_t, vm_offset_t); 331void moea_kenter_attr(mmu_t, vm_offset_t, vm_offset_t, vm_memattr_t); 332void moea_kenter(mmu_t, vm_offset_t, vm_offset_t); 333void moea_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma); 334boolean_t moea_dev_direct_mapped(mmu_t, vm_offset_t, vm_size_t); 335static void moea_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t); 336 337static mmu_method_t moea_methods[] = { 338 MMUMETHOD(mmu_change_wiring, moea_change_wiring), 339 MMUMETHOD(mmu_clear_modify, moea_clear_modify), 340 MMUMETHOD(mmu_clear_reference, moea_clear_reference), 341 MMUMETHOD(mmu_copy_page, moea_copy_page), 342 MMUMETHOD(mmu_enter, moea_enter), 343 MMUMETHOD(mmu_enter_object, moea_enter_object), 344 MMUMETHOD(mmu_enter_quick, moea_enter_quick), 345 MMUMETHOD(mmu_extract, moea_extract), 346 MMUMETHOD(mmu_extract_and_hold, moea_extract_and_hold), 347 MMUMETHOD(mmu_init, moea_init), 348 MMUMETHOD(mmu_is_modified, moea_is_modified), 349 MMUMETHOD(mmu_is_prefaultable, moea_is_prefaultable), 350 MMUMETHOD(mmu_is_referenced, moea_is_referenced), 351 MMUMETHOD(mmu_ts_referenced, moea_ts_referenced), 352 MMUMETHOD(mmu_map, moea_map), 353 MMUMETHOD(mmu_page_exists_quick,moea_page_exists_quick), 354 MMUMETHOD(mmu_page_wired_mappings,moea_page_wired_mappings), 355 MMUMETHOD(mmu_pinit, moea_pinit), 356 MMUMETHOD(mmu_pinit0, moea_pinit0), 357 MMUMETHOD(mmu_protect, moea_protect), 358 MMUMETHOD(mmu_qenter, moea_qenter), 359 MMUMETHOD(mmu_qremove, moea_qremove), 360 MMUMETHOD(mmu_release, moea_release), 361 MMUMETHOD(mmu_remove, moea_remove), 362 MMUMETHOD(mmu_remove_all, moea_remove_all), 363 MMUMETHOD(mmu_remove_write, moea_remove_write), 364 MMUMETHOD(mmu_sync_icache, moea_sync_icache), 365 MMUMETHOD(mmu_zero_page, moea_zero_page), 366 MMUMETHOD(mmu_zero_page_area, moea_zero_page_area), 367 MMUMETHOD(mmu_zero_page_idle, moea_zero_page_idle), 368 MMUMETHOD(mmu_activate, moea_activate), 369 MMUMETHOD(mmu_deactivate, moea_deactivate), 370 MMUMETHOD(mmu_page_set_memattr, moea_page_set_memattr), 371 372 /* Internal interfaces */ 373 MMUMETHOD(mmu_bootstrap, moea_bootstrap), 374 MMUMETHOD(mmu_cpu_bootstrap, moea_cpu_bootstrap), 375 MMUMETHOD(mmu_mapdev_attr, moea_mapdev_attr), 376 MMUMETHOD(mmu_mapdev, moea_mapdev), 377 MMUMETHOD(mmu_unmapdev, moea_unmapdev), 378 MMUMETHOD(mmu_kextract, moea_kextract), 379 MMUMETHOD(mmu_kenter, moea_kenter), 380 MMUMETHOD(mmu_kenter_attr, moea_kenter_attr), 381 MMUMETHOD(mmu_dev_direct_mapped,moea_dev_direct_mapped), 382 383 { 0, 0 } 384}; 385 386MMU_DEF(oea_mmu, MMU_TYPE_OEA, moea_methods, 0); 387 388static __inline uint32_t 389moea_calc_wimg(vm_offset_t pa, vm_memattr_t ma) 390{ 391 uint32_t pte_lo; 392 int i; 393 394 if (ma != VM_MEMATTR_DEFAULT) { 395 switch (ma) { 396 case VM_MEMATTR_UNCACHEABLE: 397 return (PTE_I | PTE_G); 398 case VM_MEMATTR_WRITE_COMBINING: 399 case VM_MEMATTR_WRITE_BACK: 400 case VM_MEMATTR_PREFETCHABLE: 401 return (PTE_I); 402 case VM_MEMATTR_WRITE_THROUGH: 403 return (PTE_W | PTE_M); 404 } 405 } 406 407 /* 408 * Assume the page is cache inhibited and access is guarded unless 409 * it's in our available memory array. 410 */ 411 pte_lo = PTE_I | PTE_G; 412 for (i = 0; i < pregions_sz; i++) { 413 if ((pa >= pregions[i].mr_start) && 414 (pa < (pregions[i].mr_start + pregions[i].mr_size))) { 415 pte_lo = PTE_M; 416 break; 417 } 418 } 419 420 return pte_lo; 421} 422 423static void 424tlbie(vm_offset_t va) 425{ 426 427 mtx_lock_spin(&tlbie_mtx); 428 __asm __volatile("ptesync"); 429 __asm __volatile("tlbie %0" :: "r"(va)); 430 __asm __volatile("eieio; tlbsync; ptesync"); 431 mtx_unlock_spin(&tlbie_mtx); 432} 433 434static void 435tlbia(void) 436{ 437 vm_offset_t va; 438 439 for (va = 0; va < 0x00040000; va += 0x00001000) { 440 __asm __volatile("tlbie %0" :: "r"(va)); 441 powerpc_sync(); 442 } 443 __asm __volatile("tlbsync"); 444 powerpc_sync(); 445} 446 447static __inline int 448va_to_sr(u_int *sr, vm_offset_t va) 449{ 450 return (sr[(uintptr_t)va >> ADDR_SR_SHFT]); 451} 452 453static __inline u_int 454va_to_pteg(u_int sr, vm_offset_t addr) 455{ 456 u_int hash; 457 458 hash = (sr & SR_VSID_MASK) ^ (((u_int)addr & ADDR_PIDX) >> 459 ADDR_PIDX_SHFT); 460 return (hash & moea_pteg_mask); 461} 462 463static __inline struct pvo_head * 464vm_page_to_pvoh(vm_page_t m) 465{ 466 467 return (&m->md.mdpg_pvoh); 468} 469 470static __inline void 471moea_attr_clear(vm_page_t m, int ptebit) 472{ 473 474 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 475 m->md.mdpg_attrs &= ~ptebit; 476} 477 478static __inline int 479moea_attr_fetch(vm_page_t m) 480{ 481 482 return (m->md.mdpg_attrs); 483} 484 485static __inline void 486moea_attr_save(vm_page_t m, int ptebit) 487{ 488 489 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 490 m->md.mdpg_attrs |= ptebit; 491} 492 493static __inline int 494moea_pte_compare(const struct pte *pt, const struct pte *pvo_pt) 495{ 496 if (pt->pte_hi == pvo_pt->pte_hi) 497 return (1); 498 499 return (0); 500} 501 502static __inline int 503moea_pte_match(struct pte *pt, u_int sr, vm_offset_t va, int which) 504{ 505 return (pt->pte_hi & ~PTE_VALID) == 506 (((sr & SR_VSID_MASK) << PTE_VSID_SHFT) | 507 ((va >> ADDR_API_SHFT) & PTE_API) | which); 508} 509 510static __inline void 511moea_pte_create(struct pte *pt, u_int sr, vm_offset_t va, u_int pte_lo) 512{ 513 514 mtx_assert(&moea_table_mutex, MA_OWNED); 515 516 /* 517 * Construct a PTE. Default to IMB initially. Valid bit only gets 518 * set when the real pte is set in memory. 519 * 520 * Note: Don't set the valid bit for correct operation of tlb update. 521 */ 522 pt->pte_hi = ((sr & SR_VSID_MASK) << PTE_VSID_SHFT) | 523 (((va & ADDR_PIDX) >> ADDR_API_SHFT) & PTE_API); 524 pt->pte_lo = pte_lo; 525} 526 527static __inline void 528moea_pte_synch(struct pte *pt, struct pte *pvo_pt) 529{ 530 531 mtx_assert(&moea_table_mutex, MA_OWNED); 532 pvo_pt->pte_lo |= pt->pte_lo & (PTE_REF | PTE_CHG); 533} 534 535static __inline void 536moea_pte_clear(struct pte *pt, vm_offset_t va, int ptebit) 537{ 538 539 mtx_assert(&moea_table_mutex, MA_OWNED); 540 541 /* 542 * As shown in Section 7.6.3.2.3 543 */ 544 pt->pte_lo &= ~ptebit; 545 tlbie(va); 546} 547 548static __inline void 549moea_pte_set(struct pte *pt, struct pte *pvo_pt) 550{ 551 552 mtx_assert(&moea_table_mutex, MA_OWNED); 553 pvo_pt->pte_hi |= PTE_VALID; 554 555 /* 556 * Update the PTE as defined in section 7.6.3.1. 557 * Note that the REF/CHG bits are from pvo_pt and thus should havce 558 * been saved so this routine can restore them (if desired). 559 */ 560 pt->pte_lo = pvo_pt->pte_lo; 561 powerpc_sync(); 562 pt->pte_hi = pvo_pt->pte_hi; 563 powerpc_sync(); 564 moea_pte_valid++; 565} 566 567static __inline void 568moea_pte_unset(struct pte *pt, struct pte *pvo_pt, vm_offset_t va) 569{ 570 571 mtx_assert(&moea_table_mutex, MA_OWNED); 572 pvo_pt->pte_hi &= ~PTE_VALID; 573 574 /* 575 * Force the reg & chg bits back into the PTEs. 576 */ 577 powerpc_sync(); 578 579 /* 580 * Invalidate the pte. 581 */ 582 pt->pte_hi &= ~PTE_VALID; 583 584 tlbie(va); 585 586 /* 587 * Save the reg & chg bits. 588 */ 589 moea_pte_synch(pt, pvo_pt); 590 moea_pte_valid--; 591} 592 593static __inline void 594moea_pte_change(struct pte *pt, struct pte *pvo_pt, vm_offset_t va) 595{ 596 597 /* 598 * Invalidate the PTE 599 */ 600 moea_pte_unset(pt, pvo_pt, va); 601 moea_pte_set(pt, pvo_pt); 602} 603 604/* 605 * Quick sort callout for comparing memory regions. 606 */ 607static int mr_cmp(const void *a, const void *b); 608static int om_cmp(const void *a, const void *b); 609 610static int 611mr_cmp(const void *a, const void *b) 612{ 613 const struct mem_region *regiona; 614 const struct mem_region *regionb; 615 616 regiona = a; 617 regionb = b; 618 if (regiona->mr_start < regionb->mr_start) 619 return (-1); 620 else if (regiona->mr_start > regionb->mr_start) 621 return (1); 622 else 623 return (0); 624} 625 626static int 627om_cmp(const void *a, const void *b) 628{ 629 const struct ofw_map *mapa; 630 const struct ofw_map *mapb; 631 632 mapa = a; 633 mapb = b; 634 if (mapa->om_pa < mapb->om_pa) 635 return (-1); 636 else if (mapa->om_pa > mapb->om_pa) 637 return (1); 638 else 639 return (0); 640} 641 642void 643moea_cpu_bootstrap(mmu_t mmup, int ap) 644{ 645 u_int sdr; 646 int i; 647 648 if (ap) { 649 powerpc_sync(); 650 __asm __volatile("mtdbatu 0,%0" :: "r"(battable[0].batu)); 651 __asm __volatile("mtdbatl 0,%0" :: "r"(battable[0].batl)); 652 isync(); 653 __asm __volatile("mtibatu 0,%0" :: "r"(battable[0].batu)); 654 __asm __volatile("mtibatl 0,%0" :: "r"(battable[0].batl)); 655 isync(); 656 } 657 658 __asm __volatile("mtdbatu 1,%0" :: "r"(battable[8].batu)); 659 __asm __volatile("mtdbatl 1,%0" :: "r"(battable[8].batl)); 660 isync(); 661 662 __asm __volatile("mtibatu 1,%0" :: "r"(0)); 663 __asm __volatile("mtdbatu 2,%0" :: "r"(0)); 664 __asm __volatile("mtibatu 2,%0" :: "r"(0)); 665 __asm __volatile("mtdbatu 3,%0" :: "r"(0)); 666 __asm __volatile("mtibatu 3,%0" :: "r"(0)); 667 isync(); 668 669 for (i = 0; i < 16; i++) 670 mtsrin(i << ADDR_SR_SHFT, kernel_pmap->pm_sr[i]); 671 powerpc_sync(); 672 673 sdr = (u_int)moea_pteg_table | (moea_pteg_mask >> 10); 674 __asm __volatile("mtsdr1 %0" :: "r"(sdr)); 675 isync(); 676 677 tlbia(); 678} 679 680void 681moea_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 682{ 683 ihandle_t mmui; 684 phandle_t chosen, mmu; 685 int sz; 686 int i, j; 687 int ofw_mappings; 688 vm_size_t size, physsz, hwphyssz; 689 vm_offset_t pa, va, off; 690 void *dpcpu; 691 register_t msr; 692 693 /* 694 * Set up BAT0 to map the lowest 256 MB area 695 */ 696 battable[0x0].batl = BATL(0x00000000, BAT_M, BAT_PP_RW); 697 battable[0x0].batu = BATU(0x00000000, BAT_BL_256M, BAT_Vs); 698 699 /* 700 * Map PCI memory space. 701 */ 702 battable[0x8].batl = BATL(0x80000000, BAT_I|BAT_G, BAT_PP_RW); 703 battable[0x8].batu = BATU(0x80000000, BAT_BL_256M, BAT_Vs); 704 705 battable[0x9].batl = BATL(0x90000000, BAT_I|BAT_G, BAT_PP_RW); 706 battable[0x9].batu = BATU(0x90000000, BAT_BL_256M, BAT_Vs); 707 708 battable[0xa].batl = BATL(0xa0000000, BAT_I|BAT_G, BAT_PP_RW); 709 battable[0xa].batu = BATU(0xa0000000, BAT_BL_256M, BAT_Vs); 710 711 battable[0xb].batl = BATL(0xb0000000, BAT_I|BAT_G, BAT_PP_RW); 712 battable[0xb].batu = BATU(0xb0000000, BAT_BL_256M, BAT_Vs); 713 714 /* 715 * Map obio devices. 716 */ 717 battable[0xf].batl = BATL(0xf0000000, BAT_I|BAT_G, BAT_PP_RW); 718 battable[0xf].batu = BATU(0xf0000000, BAT_BL_256M, BAT_Vs); 719 720 /* 721 * Use an IBAT and a DBAT to map the bottom segment of memory 722 * where we are. Turn off instruction relocation temporarily 723 * to prevent faults while reprogramming the IBAT. 724 */ 725 msr = mfmsr(); 726 mtmsr(msr & ~PSL_IR); 727 __asm (".balign 32; \n" 728 "mtibatu 0,%0; mtibatl 0,%1; isync; \n" 729 "mtdbatu 0,%0; mtdbatl 0,%1; isync" 730 :: "r"(battable[0].batu), "r"(battable[0].batl)); 731 mtmsr(msr); 732 733 /* map pci space */ 734 __asm __volatile("mtdbatu 1,%0" :: "r"(battable[8].batu)); 735 __asm __volatile("mtdbatl 1,%0" :: "r"(battable[8].batl)); 736 isync(); 737 738 /* set global direct map flag */ 739 hw_direct_map = 1; 740 741 mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); 742 CTR0(KTR_PMAP, "moea_bootstrap: physical memory"); 743 744 qsort(pregions, pregions_sz, sizeof(*pregions), mr_cmp); 745 for (i = 0; i < pregions_sz; i++) { 746 vm_offset_t pa; 747 vm_offset_t end; 748 749 CTR3(KTR_PMAP, "physregion: %#x - %#x (%#x)", 750 pregions[i].mr_start, 751 pregions[i].mr_start + pregions[i].mr_size, 752 pregions[i].mr_size); 753 /* 754 * Install entries into the BAT table to allow all 755 * of physmem to be convered by on-demand BAT entries. 756 * The loop will sometimes set the same battable element 757 * twice, but that's fine since they won't be used for 758 * a while yet. 759 */ 760 pa = pregions[i].mr_start & 0xf0000000; 761 end = pregions[i].mr_start + pregions[i].mr_size; 762 do { 763 u_int n = pa >> ADDR_SR_SHFT; 764 765 battable[n].batl = BATL(pa, BAT_M, BAT_PP_RW); 766 battable[n].batu = BATU(pa, BAT_BL_256M, BAT_Vs); 767 pa += SEGMENT_LENGTH; 768 } while (pa < end); 769 } 770 771 if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz) 772 panic("moea_bootstrap: phys_avail too small"); 773 qsort(regions, regions_sz, sizeof(*regions), mr_cmp); 774 phys_avail_count = 0; 775 physsz = 0; 776 hwphyssz = 0; 777 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); 778 for (i = 0, j = 0; i < regions_sz; i++, j += 2) { 779 CTR3(KTR_PMAP, "region: %#x - %#x (%#x)", regions[i].mr_start, 780 regions[i].mr_start + regions[i].mr_size, 781 regions[i].mr_size); 782 if (hwphyssz != 0 && 783 (physsz + regions[i].mr_size) >= hwphyssz) { 784 if (physsz < hwphyssz) { 785 phys_avail[j] = regions[i].mr_start; 786 phys_avail[j + 1] = regions[i].mr_start + 787 hwphyssz - physsz; 788 physsz = hwphyssz; 789 phys_avail_count++; 790 } 791 break; 792 } 793 phys_avail[j] = regions[i].mr_start; 794 phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size; 795 phys_avail_count++; 796 physsz += regions[i].mr_size; 797 } 798 physmem = btoc(physsz); 799 800 /* 801 * Allocate PTEG table. 802 */ 803#ifdef PTEGCOUNT 804 moea_pteg_count = PTEGCOUNT; 805#else 806 moea_pteg_count = 0x1000; 807 808 while (moea_pteg_count < physmem) 809 moea_pteg_count <<= 1; 810 811 moea_pteg_count >>= 1; 812#endif /* PTEGCOUNT */ 813 814 size = moea_pteg_count * sizeof(struct pteg); 815 CTR2(KTR_PMAP, "moea_bootstrap: %d PTEGs, %d bytes", moea_pteg_count, 816 size); 817 moea_pteg_table = (struct pteg *)moea_bootstrap_alloc(size, size); 818 CTR1(KTR_PMAP, "moea_bootstrap: PTEG table at %p", moea_pteg_table); 819 bzero((void *)moea_pteg_table, moea_pteg_count * sizeof(struct pteg)); 820 moea_pteg_mask = moea_pteg_count - 1; 821 822 /* 823 * Allocate pv/overflow lists. 824 */ 825 size = sizeof(struct pvo_head) * moea_pteg_count; 826 moea_pvo_table = (struct pvo_head *)moea_bootstrap_alloc(size, 827 PAGE_SIZE); 828 CTR1(KTR_PMAP, "moea_bootstrap: PVO table at %p", moea_pvo_table); 829 for (i = 0; i < moea_pteg_count; i++) 830 LIST_INIT(&moea_pvo_table[i]); 831 832 /* 833 * Initialize the lock that synchronizes access to the pteg and pvo 834 * tables. 835 */ 836 mtx_init(&moea_table_mutex, "pmap table", NULL, MTX_DEF | 837 MTX_RECURSE); 838 mtx_init(&moea_vsid_mutex, "VSID table", NULL, MTX_DEF); 839 840 mtx_init(&tlbie_mtx, "tlbie", NULL, MTX_SPIN); 841 842 /* 843 * Initialise the unmanaged pvo pool. 844 */ 845 moea_bpvo_pool = (struct pvo_entry *)moea_bootstrap_alloc( 846 BPVO_POOL_SIZE*sizeof(struct pvo_entry), 0); 847 moea_bpvo_pool_index = 0; 848 849 /* 850 * Make sure kernel vsid is allocated as well as VSID 0. 851 */ 852 moea_vsid_bitmap[(KERNEL_VSIDBITS & (NPMAPS - 1)) / VSID_NBPW] 853 |= 1 << (KERNEL_VSIDBITS % VSID_NBPW); 854 moea_vsid_bitmap[0] |= 1; 855 856 /* 857 * Initialize the kernel pmap (which is statically allocated). 858 */ 859 PMAP_LOCK_INIT(kernel_pmap); 860 for (i = 0; i < 16; i++) 861 kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i; 862 kernel_pmap->pm_active = ~0; 863 864 /* 865 * Set up the Open Firmware mappings 866 */ 867 if ((chosen = OF_finddevice("/chosen")) == -1) 868 panic("moea_bootstrap: can't find /chosen"); 869 OF_getprop(chosen, "mmu", &mmui, 4); 870 if ((mmu = OF_instance_to_package(mmui)) == -1) 871 panic("moea_bootstrap: can't get mmu package"); 872 if ((sz = OF_getproplen(mmu, "translations")) == -1) 873 panic("moea_bootstrap: can't get ofw translation count"); 874 translations = NULL; 875 for (i = 0; phys_avail[i] != 0; i += 2) { 876 if (phys_avail[i + 1] >= sz) { 877 translations = (struct ofw_map *)phys_avail[i]; 878 break; 879 } 880 } 881 if (translations == NULL) 882 panic("moea_bootstrap: no space to copy translations"); 883 bzero(translations, sz); 884 if (OF_getprop(mmu, "translations", translations, sz) == -1) 885 panic("moea_bootstrap: can't get ofw translations"); 886 CTR0(KTR_PMAP, "moea_bootstrap: translations"); 887 sz /= sizeof(*translations); 888 qsort(translations, sz, sizeof (*translations), om_cmp); 889 for (i = 0, ofw_mappings = 0; i < sz; i++) { 890 CTR3(KTR_PMAP, "translation: pa=%#x va=%#x len=%#x", 891 translations[i].om_pa, translations[i].om_va, 892 translations[i].om_len); 893 894 /* 895 * If the mapping is 1:1, let the RAM and device on-demand 896 * BAT tables take care of the translation. 897 */ 898 if (translations[i].om_va == translations[i].om_pa) 899 continue; 900 901 /* Enter the pages */ 902 for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) { 903 moea_kenter(mmup, translations[i].om_va + off, 904 translations[i].om_pa + off); 905 ofw_mappings++; 906 } 907 } 908 909 /* 910 * Calculate the last available physical address. 911 */ 912 for (i = 0; phys_avail[i + 2] != 0; i += 2) 913 ; 914 Maxmem = powerpc_btop(phys_avail[i + 1]); 915 916 moea_cpu_bootstrap(mmup,0); 917 918 pmap_bootstrapped++; 919 920 /* 921 * Set the start and end of kva. 922 */ 923 virtual_avail = VM_MIN_KERNEL_ADDRESS; 924 virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS; 925 926 /* 927 * Allocate a kernel stack with a guard page for thread0 and map it 928 * into the kernel page map. 929 */ 930 pa = moea_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE); 931 va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; 932 virtual_avail = va + KSTACK_PAGES * PAGE_SIZE; 933 CTR2(KTR_PMAP, "moea_bootstrap: kstack0 at %#x (%#x)", pa, va); 934 thread0.td_kstack = va; 935 thread0.td_kstack_pages = KSTACK_PAGES; 936 for (i = 0; i < KSTACK_PAGES; i++) { 937 moea_kenter(mmup, va, pa); 938 pa += PAGE_SIZE; 939 va += PAGE_SIZE; 940 } 941 942 /* 943 * Allocate virtual address space for the message buffer. 944 */ 945 pa = msgbuf_phys = moea_bootstrap_alloc(MSGBUF_SIZE, PAGE_SIZE); 946 msgbufp = (struct msgbuf *)virtual_avail; 947 va = virtual_avail; 948 virtual_avail += round_page(MSGBUF_SIZE); 949 while (va < virtual_avail) { 950 moea_kenter(mmup, va, pa); 951 pa += PAGE_SIZE; 952 va += PAGE_SIZE; 953 } 954 955 /* 956 * Allocate virtual address space for the dynamic percpu area. 957 */ 958 pa = moea_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); 959 dpcpu = (void *)virtual_avail; 960 va = virtual_avail; 961 virtual_avail += DPCPU_SIZE; 962 while (va < virtual_avail) { 963 moea_kenter(mmup, va, pa); 964 pa += PAGE_SIZE; 965 va += PAGE_SIZE; 966 } 967 dpcpu_init(dpcpu, 0); 968} 969 970/* 971 * Activate a user pmap. The pmap must be activated before it's address 972 * space can be accessed in any way. 973 */ 974void 975moea_activate(mmu_t mmu, struct thread *td) 976{ 977 pmap_t pm, pmr; 978 979 /* 980 * Load all the data we need up front to encourage the compiler to 981 * not issue any loads while we have interrupts disabled below. 982 */ 983 pm = &td->td_proc->p_vmspace->vm_pmap; 984 pmr = pm->pmap_phys; 985 986 pm->pm_active |= PCPU_GET(cpumask); 987 PCPU_SET(curpmap, pmr); 988} 989 990void 991moea_deactivate(mmu_t mmu, struct thread *td) 992{ 993 pmap_t pm; 994 995 pm = &td->td_proc->p_vmspace->vm_pmap; 996 pm->pm_active &= ~PCPU_GET(cpumask); 997 PCPU_SET(curpmap, NULL); 998} 999 1000void 1001moea_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired) 1002{ 1003 struct pvo_entry *pvo; 1004 1005 PMAP_LOCK(pm); 1006 pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL); 1007 1008 if (pvo != NULL) { 1009 if (wired) { 1010 if ((pvo->pvo_vaddr & PVO_WIRED) == 0) 1011 pm->pm_stats.wired_count++; 1012 pvo->pvo_vaddr |= PVO_WIRED; 1013 } else { 1014 if ((pvo->pvo_vaddr & PVO_WIRED) != 0) 1015 pm->pm_stats.wired_count--; 1016 pvo->pvo_vaddr &= ~PVO_WIRED; 1017 } 1018 } 1019 PMAP_UNLOCK(pm); 1020} 1021 1022void 1023moea_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst) 1024{ 1025 vm_offset_t dst; 1026 vm_offset_t src; 1027 1028 dst = VM_PAGE_TO_PHYS(mdst); 1029 src = VM_PAGE_TO_PHYS(msrc); 1030 1031 kcopy((void *)src, (void *)dst, PAGE_SIZE); 1032} 1033 1034/* 1035 * Zero a page of physical memory by temporarily mapping it into the tlb. 1036 */ 1037void 1038moea_zero_page(mmu_t mmu, vm_page_t m) 1039{ 1040 vm_offset_t pa = VM_PAGE_TO_PHYS(m); 1041 void *va = (void *)pa; 1042 1043 bzero(va, PAGE_SIZE); 1044} 1045 1046void 1047moea_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) 1048{ 1049 vm_offset_t pa = VM_PAGE_TO_PHYS(m); 1050 void *va = (void *)(pa + off); 1051 1052 bzero(va, size); 1053} 1054 1055void 1056moea_zero_page_idle(mmu_t mmu, vm_page_t m) 1057{ 1058 vm_offset_t pa = VM_PAGE_TO_PHYS(m); 1059 void *va = (void *)pa; 1060 1061 bzero(va, PAGE_SIZE); 1062} 1063 1064/* 1065 * Map the given physical page at the specified virtual address in the 1066 * target pmap with the protection requested. If specified the page 1067 * will be wired down. 1068 */ 1069void 1070moea_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1071 boolean_t wired) 1072{ 1073 1074 vm_page_lock_queues(); 1075 PMAP_LOCK(pmap); 1076 moea_enter_locked(pmap, va, m, prot, wired); 1077 vm_page_unlock_queues(); 1078 PMAP_UNLOCK(pmap); 1079} 1080 1081/* 1082 * Map the given physical page at the specified virtual address in the 1083 * target pmap with the protection requested. If specified the page 1084 * will be wired down. 1085 * 1086 * The page queues and pmap must be locked. 1087 */ 1088static void 1089moea_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1090 boolean_t wired) 1091{ 1092 struct pvo_head *pvo_head; 1093 uma_zone_t zone; 1094 vm_page_t pg; 1095 u_int pte_lo, pvo_flags, was_exec; 1096 int error; 1097 1098 if (!moea_initialized) { 1099 pvo_head = &moea_pvo_kunmanaged; 1100 zone = moea_upvo_zone; 1101 pvo_flags = 0; 1102 pg = NULL; 1103 was_exec = PTE_EXEC; 1104 } else { 1105 pvo_head = vm_page_to_pvoh(m); 1106 pg = m; 1107 zone = moea_mpvo_zone; 1108 pvo_flags = PVO_MANAGED; 1109 was_exec = 0; 1110 } 1111 if (pmap_bootstrapped) 1112 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1113 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1114 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || 1115 (m->oflags & VPO_BUSY) != 0 || VM_OBJECT_LOCKED(m->object), 1116 ("moea_enter_locked: page %p is not busy", m)); 1117 1118 /* XXX change the pvo head for fake pages */ 1119 if ((m->flags & PG_FICTITIOUS) == PG_FICTITIOUS) { 1120 pvo_flags &= ~PVO_MANAGED; 1121 pvo_head = &moea_pvo_kunmanaged; 1122 zone = moea_upvo_zone; 1123 } 1124 1125 /* 1126 * If this is a managed page, and it's the first reference to the page, 1127 * clear the execness of the page. Otherwise fetch the execness. 1128 */ 1129 if ((pg != NULL) && ((m->flags & PG_FICTITIOUS) == 0)) { 1130 if (LIST_EMPTY(pvo_head)) { 1131 moea_attr_clear(pg, PTE_EXEC); 1132 } else { 1133 was_exec = moea_attr_fetch(pg) & PTE_EXEC; 1134 } 1135 } 1136 1137 pte_lo = moea_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m)); 1138 1139 if (prot & VM_PROT_WRITE) { 1140 pte_lo |= PTE_BW; 1141 if (pmap_bootstrapped && 1142 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) 1143 vm_page_flag_set(m, PG_WRITEABLE); 1144 } else 1145 pte_lo |= PTE_BR; 1146 1147 if (prot & VM_PROT_EXECUTE) 1148 pvo_flags |= PVO_EXECUTABLE; 1149 1150 if (wired) 1151 pvo_flags |= PVO_WIRED; 1152 1153 if ((m->flags & PG_FICTITIOUS) != 0) 1154 pvo_flags |= PVO_FAKE; 1155 1156 error = moea_pvo_enter(pmap, zone, pvo_head, va, VM_PAGE_TO_PHYS(m), 1157 pte_lo, pvo_flags); 1158 1159 /* 1160 * Flush the real page from the instruction cache if this page is 1161 * mapped executable and cacheable and was not previously mapped (or 1162 * was not mapped executable). 1163 */ 1164 if (error == 0 && (pvo_flags & PVO_EXECUTABLE) && 1165 (pte_lo & PTE_I) == 0 && was_exec == 0) { 1166 /* 1167 * Flush the real memory from the cache. 1168 */ 1169 moea_syncicache(VM_PAGE_TO_PHYS(m), PAGE_SIZE); 1170 if (pg != NULL) 1171 moea_attr_save(pg, PTE_EXEC); 1172 } 1173 1174 /* XXX syncicache always until problems are sorted */ 1175 moea_syncicache(VM_PAGE_TO_PHYS(m), PAGE_SIZE); 1176} 1177 1178/* 1179 * Maps a sequence of resident pages belonging to the same object. 1180 * The sequence begins with the given page m_start. This page is 1181 * mapped at the given virtual address start. Each subsequent page is 1182 * mapped at a virtual address that is offset from start by the same 1183 * amount as the page is offset from m_start within the object. The 1184 * last page in the sequence is the page with the largest offset from 1185 * m_start that can be mapped at a virtual address less than the given 1186 * virtual address end. Not every virtual page between start and end 1187 * is mapped; only those for which a resident page exists with the 1188 * corresponding offset from m_start are mapped. 1189 */ 1190void 1191moea_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end, 1192 vm_page_t m_start, vm_prot_t prot) 1193{ 1194 vm_page_t m; 1195 vm_pindex_t diff, psize; 1196 1197 psize = atop(end - start); 1198 m = m_start; 1199 vm_page_lock_queues(); 1200 PMAP_LOCK(pm); 1201 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 1202 moea_enter_locked(pm, start + ptoa(diff), m, prot & 1203 (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); 1204 m = TAILQ_NEXT(m, listq); 1205 } 1206 vm_page_unlock_queues(); 1207 PMAP_UNLOCK(pm); 1208} 1209 1210void 1211moea_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m, 1212 vm_prot_t prot) 1213{ 1214 1215 vm_page_lock_queues(); 1216 PMAP_LOCK(pm); 1217 moea_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), 1218 FALSE); 1219 vm_page_unlock_queues(); 1220 PMAP_UNLOCK(pm); 1221} 1222 1223vm_paddr_t 1224moea_extract(mmu_t mmu, pmap_t pm, vm_offset_t va) 1225{ 1226 struct pvo_entry *pvo; 1227 vm_paddr_t pa; 1228 1229 PMAP_LOCK(pm); 1230 pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL); 1231 if (pvo == NULL) 1232 pa = 0; 1233 else 1234 pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) | (va & ADDR_POFF); 1235 PMAP_UNLOCK(pm); 1236 return (pa); 1237} 1238 1239/* 1240 * Atomically extract and hold the physical page with the given 1241 * pmap and virtual address pair if that mapping permits the given 1242 * protection. 1243 */ 1244vm_page_t 1245moea_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1246{ 1247 struct pvo_entry *pvo; 1248 vm_page_t m; 1249 vm_paddr_t pa; 1250 1251 m = NULL; 1252 pa = 0; 1253 PMAP_LOCK(pmap); 1254retry: 1255 pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); 1256 if (pvo != NULL && (pvo->pvo_pte.pte.pte_hi & PTE_VALID) && 1257 ((pvo->pvo_pte.pte.pte_lo & PTE_PP) == PTE_RW || 1258 (prot & VM_PROT_WRITE) == 0)) { 1259 if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pte.pte_lo & PTE_RPGN, &pa)) 1260 goto retry; 1261 m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN); 1262 vm_page_hold(m); 1263 } 1264 PA_UNLOCK_COND(pa); 1265 PMAP_UNLOCK(pmap); 1266 return (m); 1267} 1268 1269void 1270moea_init(mmu_t mmu) 1271{ 1272 1273 moea_upvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry), 1274 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 1275 UMA_ZONE_VM | UMA_ZONE_NOFREE); 1276 moea_mpvo_zone = uma_zcreate("MPVO entry", sizeof(struct pvo_entry), 1277 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 1278 UMA_ZONE_VM | UMA_ZONE_NOFREE); 1279 moea_initialized = TRUE; 1280} 1281 1282boolean_t 1283moea_is_referenced(mmu_t mmu, vm_page_t m) 1284{ 1285 1286 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1287 ("moea_is_referenced: page %p is not managed", m)); 1288 return (moea_query_bit(m, PTE_REF)); 1289} 1290 1291boolean_t 1292moea_is_modified(mmu_t mmu, vm_page_t m) 1293{ 1294 1295 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1296 ("moea_is_modified: page %p is not managed", m)); 1297 1298 /* 1299 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be 1300 * concurrently set while the object is locked. Thus, if PG_WRITEABLE 1301 * is clear, no PTEs can have PTE_CHG set. 1302 */ 1303 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1304 if ((m->oflags & VPO_BUSY) == 0 && 1305 (m->flags & PG_WRITEABLE) == 0) 1306 return (FALSE); 1307 return (moea_query_bit(m, PTE_CHG)); 1308} 1309 1310boolean_t 1311moea_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1312{ 1313 struct pvo_entry *pvo; 1314 boolean_t rv; 1315 1316 PMAP_LOCK(pmap); 1317 pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); 1318 rv = pvo == NULL || (pvo->pvo_pte.pte.pte_hi & PTE_VALID) == 0; 1319 PMAP_UNLOCK(pmap); 1320 return (rv); 1321} 1322 1323void 1324moea_clear_reference(mmu_t mmu, vm_page_t m) 1325{ 1326 1327 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1328 ("moea_clear_reference: page %p is not managed", m)); 1329 moea_clear_bit(m, PTE_REF); 1330} 1331 1332void 1333moea_clear_modify(mmu_t mmu, vm_page_t m) 1334{ 1335 1336 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1337 ("moea_clear_modify: page %p is not managed", m)); 1338 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1339 KASSERT((m->oflags & VPO_BUSY) == 0, 1340 ("moea_clear_modify: page %p is busy", m)); 1341 1342 /* 1343 * If the page is not PG_WRITEABLE, then no PTEs can have PTE_CHG 1344 * set. If the object containing the page is locked and the page is 1345 * not VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. 1346 */ 1347 if ((m->flags & PG_WRITEABLE) == 0) 1348 return; 1349 moea_clear_bit(m, PTE_CHG); 1350} 1351 1352/* 1353 * Clear the write and modified bits in each of the given page's mappings. 1354 */ 1355void 1356moea_remove_write(mmu_t mmu, vm_page_t m) 1357{ 1358 struct pvo_entry *pvo; 1359 struct pte *pt; 1360 pmap_t pmap; 1361 u_int lo; 1362 1363 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1364 ("moea_remove_write: page %p is not managed", m)); 1365 1366 /* 1367 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by 1368 * another thread while the object is locked. Thus, if PG_WRITEABLE 1369 * is clear, no page table entries need updating. 1370 */ 1371 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1372 if ((m->oflags & VPO_BUSY) == 0 && 1373 (m->flags & PG_WRITEABLE) == 0) 1374 return; 1375 vm_page_lock_queues(); 1376 lo = moea_attr_fetch(m); 1377 powerpc_sync(); 1378 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 1379 pmap = pvo->pvo_pmap; 1380 PMAP_LOCK(pmap); 1381 if ((pvo->pvo_pte.pte.pte_lo & PTE_PP) != PTE_BR) { 1382 pt = moea_pvo_to_pte(pvo, -1); 1383 pvo->pvo_pte.pte.pte_lo &= ~PTE_PP; 1384 pvo->pvo_pte.pte.pte_lo |= PTE_BR; 1385 if (pt != NULL) { 1386 moea_pte_synch(pt, &pvo->pvo_pte.pte); 1387 lo |= pvo->pvo_pte.pte.pte_lo; 1388 pvo->pvo_pte.pte.pte_lo &= ~PTE_CHG; 1389 moea_pte_change(pt, &pvo->pvo_pte.pte, 1390 pvo->pvo_vaddr); 1391 mtx_unlock(&moea_table_mutex); 1392 } 1393 } 1394 PMAP_UNLOCK(pmap); 1395 } 1396 if ((lo & PTE_CHG) != 0) { 1397 moea_attr_clear(m, PTE_CHG); 1398 vm_page_dirty(m); 1399 } 1400 vm_page_flag_clear(m, PG_WRITEABLE); 1401 vm_page_unlock_queues(); 1402} 1403 1404/* 1405 * moea_ts_referenced: 1406 * 1407 * Return a count of reference bits for a page, clearing those bits. 1408 * It is not necessary for every reference bit to be cleared, but it 1409 * is necessary that 0 only be returned when there are truly no 1410 * reference bits set. 1411 * 1412 * XXX: The exact number of bits to check and clear is a matter that 1413 * should be tested and standardized at some point in the future for 1414 * optimal aging of shared pages. 1415 */ 1416boolean_t 1417moea_ts_referenced(mmu_t mmu, vm_page_t m) 1418{ 1419 1420 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1421 ("moea_ts_referenced: page %p is not managed", m)); 1422 return (moea_clear_bit(m, PTE_REF)); 1423} 1424 1425/* 1426 * Modify the WIMG settings of all mappings for a page. 1427 */ 1428void 1429moea_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) 1430{ 1431 struct pvo_entry *pvo; 1432 struct pvo_head *pvo_head; 1433 struct pte *pt; 1434 pmap_t pmap; 1435 u_int lo; 1436 1437 if (m->flags & PG_FICTITIOUS) { 1438 m->md.mdpg_cache_attrs = ma; 1439 return; 1440 } 1441 1442 vm_page_lock_queues(); 1443 pvo_head = vm_page_to_pvoh(m); 1444 lo = moea_calc_wimg(VM_PAGE_TO_PHYS(m), ma); 1445 1446 LIST_FOREACH(pvo, pvo_head, pvo_vlink) { 1447 pmap = pvo->pvo_pmap; 1448 PMAP_LOCK(pmap); 1449 pt = moea_pvo_to_pte(pvo, -1); 1450 pvo->pvo_pte.pte.pte_lo &= ~PTE_WIMG; 1451 pvo->pvo_pte.pte.pte_lo |= lo; 1452 if (pt != NULL) { 1453 moea_pte_change(pt, &pvo->pvo_pte.pte, 1454 pvo->pvo_vaddr); 1455 if (pvo->pvo_pmap == kernel_pmap) 1456 isync(); 1457 } 1458 mtx_unlock(&moea_table_mutex); 1459 PMAP_UNLOCK(pmap); 1460 } 1461 m->md.mdpg_cache_attrs = ma; 1462 vm_page_unlock_queues(); 1463} 1464 1465/* 1466 * Map a wired page into kernel virtual address space. 1467 */ 1468void 1469moea_kenter(mmu_t mmu, vm_offset_t va, vm_offset_t pa) 1470{ 1471 1472 moea_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); 1473} 1474 1475void 1476moea_kenter_attr(mmu_t mmu, vm_offset_t va, vm_offset_t pa, vm_memattr_t ma) 1477{ 1478 u_int pte_lo; 1479 int error; 1480 1481#if 0 1482 if (va < VM_MIN_KERNEL_ADDRESS) 1483 panic("moea_kenter: attempt to enter non-kernel address %#x", 1484 va); 1485#endif 1486 1487 pte_lo = moea_calc_wimg(pa, ma); 1488 1489 PMAP_LOCK(kernel_pmap); 1490 error = moea_pvo_enter(kernel_pmap, moea_upvo_zone, 1491 &moea_pvo_kunmanaged, va, pa, pte_lo, PVO_WIRED); 1492 1493 if (error != 0 && error != ENOENT) 1494 panic("moea_kenter: failed to enter va %#x pa %#x: %d", va, 1495 pa, error); 1496 1497 /* 1498 * Flush the real memory from the instruction cache. 1499 */ 1500 if ((pte_lo & (PTE_I | PTE_G)) == 0) { 1501 moea_syncicache(pa, PAGE_SIZE); 1502 } 1503 PMAP_UNLOCK(kernel_pmap); 1504} 1505 1506/* 1507 * Extract the physical page address associated with the given kernel virtual 1508 * address. 1509 */ 1510vm_offset_t 1511moea_kextract(mmu_t mmu, vm_offset_t va) 1512{ 1513 struct pvo_entry *pvo; 1514 vm_paddr_t pa; 1515 1516 /* 1517 * Allow direct mappings on 32-bit OEA 1518 */ 1519 if (va < VM_MIN_KERNEL_ADDRESS) { 1520 return (va); 1521 } 1522 1523 PMAP_LOCK(kernel_pmap); 1524 pvo = moea_pvo_find_va(kernel_pmap, va & ~ADDR_POFF, NULL); 1525 KASSERT(pvo != NULL, ("moea_kextract: no addr found")); 1526 pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) | (va & ADDR_POFF); 1527 PMAP_UNLOCK(kernel_pmap); 1528 return (pa); 1529} 1530 1531/* 1532 * Remove a wired page from kernel virtual address space. 1533 */ 1534void 1535moea_kremove(mmu_t mmu, vm_offset_t va) 1536{ 1537 1538 moea_remove(mmu, kernel_pmap, va, va + PAGE_SIZE); 1539} 1540 1541/* 1542 * Map a range of physical addresses into kernel virtual address space. 1543 * 1544 * The value passed in *virt is a suggested virtual address for the mapping. 1545 * Architectures which can support a direct-mapped physical to virtual region 1546 * can return the appropriate address within that region, leaving '*virt' 1547 * unchanged. We cannot and therefore do not; *virt is updated with the 1548 * first usable address after the mapped region. 1549 */ 1550vm_offset_t 1551moea_map(mmu_t mmu, vm_offset_t *virt, vm_offset_t pa_start, 1552 vm_offset_t pa_end, int prot) 1553{ 1554 vm_offset_t sva, va; 1555 1556 sva = *virt; 1557 va = sva; 1558 for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE) 1559 moea_kenter(mmu, va, pa_start); 1560 *virt = va; 1561 return (sva); 1562} 1563 1564/* 1565 * Returns true if the pmap's pv is one of the first 1566 * 16 pvs linked to from this page. This count may 1567 * be changed upwards or downwards in the future; it 1568 * is only necessary that true be returned for a small 1569 * subset of pmaps for proper page aging. 1570 */ 1571boolean_t 1572moea_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) 1573{ 1574 int loops; 1575 struct pvo_entry *pvo; 1576 boolean_t rv; 1577 1578 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1579 ("moea_page_exists_quick: page %p is not managed", m)); 1580 loops = 0; 1581 rv = FALSE; 1582 vm_page_lock_queues(); 1583 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 1584 if (pvo->pvo_pmap == pmap) { 1585 rv = TRUE; 1586 break; 1587 } 1588 if (++loops >= 16) 1589 break; 1590 } 1591 vm_page_unlock_queues(); 1592 return (rv); 1593} 1594 1595/* 1596 * Return the number of managed mappings to the given physical page 1597 * that are wired. 1598 */ 1599int 1600moea_page_wired_mappings(mmu_t mmu, vm_page_t m) 1601{ 1602 struct pvo_entry *pvo; 1603 int count; 1604 1605 count = 0; 1606 if ((m->flags & PG_FICTITIOUS) != 0) 1607 return (count); 1608 vm_page_lock_queues(); 1609 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) 1610 if ((pvo->pvo_vaddr & PVO_WIRED) != 0) 1611 count++; 1612 vm_page_unlock_queues(); 1613 return (count); 1614} 1615 1616static u_int moea_vsidcontext; 1617 1618void 1619moea_pinit(mmu_t mmu, pmap_t pmap) 1620{ 1621 int i, mask; 1622 u_int entropy; 1623 1624 KASSERT((int)pmap < VM_MIN_KERNEL_ADDRESS, ("moea_pinit: virt pmap")); 1625 PMAP_LOCK_INIT(pmap); 1626 1627 entropy = 0; 1628 __asm __volatile("mftb %0" : "=r"(entropy)); 1629 1630 if ((pmap->pmap_phys = (pmap_t)moea_kextract(mmu, (vm_offset_t)pmap)) 1631 == NULL) { 1632 pmap->pmap_phys = pmap; 1633 } 1634 1635 1636 mtx_lock(&moea_vsid_mutex); 1637 /* 1638 * Allocate some segment registers for this pmap. 1639 */ 1640 for (i = 0; i < NPMAPS; i += VSID_NBPW) { 1641 u_int hash, n; 1642 1643 /* 1644 * Create a new value by mutiplying by a prime and adding in 1645 * entropy from the timebase register. This is to make the 1646 * VSID more random so that the PT hash function collides 1647 * less often. (Note that the prime casues gcc to do shifts 1648 * instead of a multiply.) 1649 */ 1650 moea_vsidcontext = (moea_vsidcontext * 0x1105) + entropy; 1651 hash = moea_vsidcontext & (NPMAPS - 1); 1652 if (hash == 0) /* 0 is special, avoid it */ 1653 continue; 1654 n = hash >> 5; 1655 mask = 1 << (hash & (VSID_NBPW - 1)); 1656 hash = (moea_vsidcontext & 0xfffff); 1657 if (moea_vsid_bitmap[n] & mask) { /* collision? */ 1658 /* anything free in this bucket? */ 1659 if (moea_vsid_bitmap[n] == 0xffffffff) { 1660 entropy = (moea_vsidcontext >> 20); 1661 continue; 1662 } 1663 i = ffs(~moea_vsid_bitmap[n]) - 1; 1664 mask = 1 << i; 1665 hash &= 0xfffff & ~(VSID_NBPW - 1); 1666 hash |= i; 1667 } 1668 moea_vsid_bitmap[n] |= mask; 1669 for (i = 0; i < 16; i++) 1670 pmap->pm_sr[i] = VSID_MAKE(i, hash); 1671 mtx_unlock(&moea_vsid_mutex); 1672 return; 1673 } 1674 1675 mtx_unlock(&moea_vsid_mutex); 1676 panic("moea_pinit: out of segments"); 1677} 1678 1679/* 1680 * Initialize the pmap associated with process 0. 1681 */ 1682void 1683moea_pinit0(mmu_t mmu, pmap_t pm) 1684{ 1685 1686 moea_pinit(mmu, pm); 1687 bzero(&pm->pm_stats, sizeof(pm->pm_stats)); 1688} 1689 1690/* 1691 * Set the physical protection on the specified range of this map as requested. 1692 */ 1693void 1694moea_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva, 1695 vm_prot_t prot) 1696{ 1697 struct pvo_entry *pvo; 1698 struct pte *pt; 1699 int pteidx; 1700 1701 KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap, 1702 ("moea_protect: non current pmap")); 1703 1704 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1705 moea_remove(mmu, pm, sva, eva); 1706 return; 1707 } 1708 1709 vm_page_lock_queues(); 1710 PMAP_LOCK(pm); 1711 for (; sva < eva; sva += PAGE_SIZE) { 1712 pvo = moea_pvo_find_va(pm, sva, &pteidx); 1713 if (pvo == NULL) 1714 continue; 1715 1716 if ((prot & VM_PROT_EXECUTE) == 0) 1717 pvo->pvo_vaddr &= ~PVO_EXECUTABLE; 1718 1719 /* 1720 * Grab the PTE pointer before we diddle with the cached PTE 1721 * copy. 1722 */ 1723 pt = moea_pvo_to_pte(pvo, pteidx); 1724 /* 1725 * Change the protection of the page. 1726 */ 1727 pvo->pvo_pte.pte.pte_lo &= ~PTE_PP; 1728 pvo->pvo_pte.pte.pte_lo |= PTE_BR; 1729 1730 /* 1731 * If the PVO is in the page table, update that pte as well. 1732 */ 1733 if (pt != NULL) { 1734 moea_pte_change(pt, &pvo->pvo_pte.pte, pvo->pvo_vaddr); 1735 mtx_unlock(&moea_table_mutex); 1736 } 1737 } 1738 vm_page_unlock_queues(); 1739 PMAP_UNLOCK(pm); 1740} 1741 1742/* 1743 * Map a list of wired pages into kernel virtual address space. This is 1744 * intended for temporary mappings which do not need page modification or 1745 * references recorded. Existing mappings in the region are overwritten. 1746 */ 1747void 1748moea_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count) 1749{ 1750 vm_offset_t va; 1751 1752 va = sva; 1753 while (count-- > 0) { 1754 moea_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); 1755 va += PAGE_SIZE; 1756 m++; 1757 } 1758} 1759 1760/* 1761 * Remove page mappings from kernel virtual address space. Intended for 1762 * temporary mappings entered by moea_qenter. 1763 */ 1764void 1765moea_qremove(mmu_t mmu, vm_offset_t sva, int count) 1766{ 1767 vm_offset_t va; 1768 1769 va = sva; 1770 while (count-- > 0) { 1771 moea_kremove(mmu, va); 1772 va += PAGE_SIZE; 1773 } 1774} 1775 1776void 1777moea_release(mmu_t mmu, pmap_t pmap) 1778{ 1779 int idx, mask; 1780 1781 /* 1782 * Free segment register's VSID 1783 */ 1784 if (pmap->pm_sr[0] == 0) 1785 panic("moea_release"); 1786 1787 mtx_lock(&moea_vsid_mutex); 1788 idx = VSID_TO_HASH(pmap->pm_sr[0]) & (NPMAPS-1); 1789 mask = 1 << (idx % VSID_NBPW); 1790 idx /= VSID_NBPW; 1791 moea_vsid_bitmap[idx] &= ~mask; 1792 mtx_unlock(&moea_vsid_mutex); 1793 PMAP_LOCK_DESTROY(pmap); 1794} 1795 1796/* 1797 * Remove the given range of addresses from the specified map. 1798 */ 1799void 1800moea_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) 1801{ 1802 struct pvo_entry *pvo; 1803 int pteidx; 1804 1805 vm_page_lock_queues(); 1806 PMAP_LOCK(pm); 1807 for (; sva < eva; sva += PAGE_SIZE) { 1808 pvo = moea_pvo_find_va(pm, sva, &pteidx); 1809 if (pvo != NULL) { 1810 moea_pvo_remove(pvo, pteidx); 1811 } 1812 } 1813 PMAP_UNLOCK(pm); 1814 vm_page_unlock_queues(); 1815} 1816 1817/* 1818 * Remove physical page from all pmaps in which it resides. moea_pvo_remove() 1819 * will reflect changes in pte's back to the vm_page. 1820 */ 1821void 1822moea_remove_all(mmu_t mmu, vm_page_t m) 1823{ 1824 struct pvo_head *pvo_head; 1825 struct pvo_entry *pvo, *next_pvo; 1826 pmap_t pmap; 1827 1828 vm_page_lock_queues(); 1829 pvo_head = vm_page_to_pvoh(m); 1830 for (pvo = LIST_FIRST(pvo_head); pvo != NULL; pvo = next_pvo) { 1831 next_pvo = LIST_NEXT(pvo, pvo_vlink); 1832 1833 MOEA_PVO_CHECK(pvo); /* sanity check */ 1834 pmap = pvo->pvo_pmap; 1835 PMAP_LOCK(pmap); 1836 moea_pvo_remove(pvo, -1); 1837 PMAP_UNLOCK(pmap); 1838 } 1839 if ((m->flags & PG_WRITEABLE) && moea_is_modified(mmu, m)) { 1840 moea_attr_clear(m, PTE_CHG); 1841 vm_page_dirty(m); 1842 } 1843 vm_page_flag_clear(m, PG_WRITEABLE); 1844 vm_page_unlock_queues(); 1845} 1846 1847/* 1848 * Allocate a physical page of memory directly from the phys_avail map. 1849 * Can only be called from moea_bootstrap before avail start and end are 1850 * calculated. 1851 */ 1852static vm_offset_t 1853moea_bootstrap_alloc(vm_size_t size, u_int align) 1854{ 1855 vm_offset_t s, e; 1856 int i, j; 1857 1858 size = round_page(size); 1859 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 1860 if (align != 0) 1861 s = (phys_avail[i] + align - 1) & ~(align - 1); 1862 else 1863 s = phys_avail[i]; 1864 e = s + size; 1865 1866 if (s < phys_avail[i] || e > phys_avail[i + 1]) 1867 continue; 1868 1869 if (s == phys_avail[i]) { 1870 phys_avail[i] += size; 1871 } else if (e == phys_avail[i + 1]) { 1872 phys_avail[i + 1] -= size; 1873 } else { 1874 for (j = phys_avail_count * 2; j > i; j -= 2) { 1875 phys_avail[j] = phys_avail[j - 2]; 1876 phys_avail[j + 1] = phys_avail[j - 1]; 1877 } 1878 1879 phys_avail[i + 3] = phys_avail[i + 1]; 1880 phys_avail[i + 1] = s; 1881 phys_avail[i + 2] = e; 1882 phys_avail_count++; 1883 } 1884 1885 return (s); 1886 } 1887 panic("moea_bootstrap_alloc: could not allocate memory"); 1888} 1889 1890static void 1891moea_syncicache(vm_offset_t pa, vm_size_t len) 1892{ 1893 __syncicache((void *)pa, len); 1894} 1895 1896static int 1897moea_pvo_enter(pmap_t pm, uma_zone_t zone, struct pvo_head *pvo_head, 1898 vm_offset_t va, vm_offset_t pa, u_int pte_lo, int flags) 1899{ 1900 struct pvo_entry *pvo; 1901 u_int sr; 1902 int first; 1903 u_int ptegidx; 1904 int i; 1905 int bootstrap; 1906 1907 moea_pvo_enter_calls++; 1908 first = 0; 1909 bootstrap = 0; 1910 1911 /* 1912 * Compute the PTE Group index. 1913 */ 1914 va &= ~ADDR_POFF; 1915 sr = va_to_sr(pm->pm_sr, va); 1916 ptegidx = va_to_pteg(sr, va); 1917 1918 /* 1919 * Remove any existing mapping for this page. Reuse the pvo entry if 1920 * there is a mapping. 1921 */ 1922 mtx_lock(&moea_table_mutex); 1923 LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) { 1924 if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) { 1925 if ((pvo->pvo_pte.pte.pte_lo & PTE_RPGN) == pa && 1926 (pvo->pvo_pte.pte.pte_lo & PTE_PP) == 1927 (pte_lo & PTE_PP)) { 1928 mtx_unlock(&moea_table_mutex); 1929 return (0); 1930 } 1931 moea_pvo_remove(pvo, -1); 1932 break; 1933 } 1934 } 1935 1936 /* 1937 * If we aren't overwriting a mapping, try to allocate. 1938 */ 1939 if (moea_initialized) { 1940 pvo = uma_zalloc(zone, M_NOWAIT); 1941 } else { 1942 if (moea_bpvo_pool_index >= BPVO_POOL_SIZE) { 1943 panic("moea_enter: bpvo pool exhausted, %d, %d, %d", 1944 moea_bpvo_pool_index, BPVO_POOL_SIZE, 1945 BPVO_POOL_SIZE * sizeof(struct pvo_entry)); 1946 } 1947 pvo = &moea_bpvo_pool[moea_bpvo_pool_index]; 1948 moea_bpvo_pool_index++; 1949 bootstrap = 1; 1950 } 1951 1952 if (pvo == NULL) { 1953 mtx_unlock(&moea_table_mutex); 1954 return (ENOMEM); 1955 } 1956 1957 moea_pvo_entries++; 1958 pvo->pvo_vaddr = va; 1959 pvo->pvo_pmap = pm; 1960 LIST_INSERT_HEAD(&moea_pvo_table[ptegidx], pvo, pvo_olink); 1961 pvo->pvo_vaddr &= ~ADDR_POFF; 1962 if (flags & VM_PROT_EXECUTE) 1963 pvo->pvo_vaddr |= PVO_EXECUTABLE; 1964 if (flags & PVO_WIRED) 1965 pvo->pvo_vaddr |= PVO_WIRED; 1966 if (pvo_head != &moea_pvo_kunmanaged) 1967 pvo->pvo_vaddr |= PVO_MANAGED; 1968 if (bootstrap) 1969 pvo->pvo_vaddr |= PVO_BOOTSTRAP; 1970 if (flags & PVO_FAKE) 1971 pvo->pvo_vaddr |= PVO_FAKE; 1972 1973 moea_pte_create(&pvo->pvo_pte.pte, sr, va, pa | pte_lo); 1974 1975 /* 1976 * Remember if the list was empty and therefore will be the first 1977 * item. 1978 */ 1979 if (LIST_FIRST(pvo_head) == NULL) 1980 first = 1; 1981 LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink); 1982 1983 if (pvo->pvo_pte.pte.pte_lo & PVO_WIRED) 1984 pm->pm_stats.wired_count++; 1985 pm->pm_stats.resident_count++; 1986 1987 /* 1988 * We hope this succeeds but it isn't required. 1989 */ 1990 i = moea_pte_insert(ptegidx, &pvo->pvo_pte.pte); 1991 if (i >= 0) { 1992 PVO_PTEGIDX_SET(pvo, i); 1993 } else { 1994 panic("moea_pvo_enter: overflow"); 1995 moea_pte_overflow++; 1996 } 1997 mtx_unlock(&moea_table_mutex); 1998 1999 return (first ? ENOENT : 0); 2000} 2001 2002static void 2003moea_pvo_remove(struct pvo_entry *pvo, int pteidx) 2004{ 2005 struct pte *pt; 2006 2007 /* 2008 * If there is an active pte entry, we need to deactivate it (and 2009 * save the ref & cfg bits). 2010 */ 2011 pt = moea_pvo_to_pte(pvo, pteidx); 2012 if (pt != NULL) { 2013 moea_pte_unset(pt, &pvo->pvo_pte.pte, pvo->pvo_vaddr); 2014 mtx_unlock(&moea_table_mutex); 2015 PVO_PTEGIDX_CLR(pvo); 2016 } else { 2017 moea_pte_overflow--; 2018 } 2019 2020 /* 2021 * Update our statistics. 2022 */ 2023 pvo->pvo_pmap->pm_stats.resident_count--; 2024 if (pvo->pvo_pte.pte.pte_lo & PVO_WIRED) 2025 pvo->pvo_pmap->pm_stats.wired_count--; 2026 2027 /* 2028 * Save the REF/CHG bits into their cache if the page is managed. 2029 */ 2030 if ((pvo->pvo_vaddr & (PVO_MANAGED|PVO_FAKE)) == PVO_MANAGED) { 2031 struct vm_page *pg; 2032 2033 pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN); 2034 if (pg != NULL) { 2035 moea_attr_save(pg, pvo->pvo_pte.pte.pte_lo & 2036 (PTE_REF | PTE_CHG)); 2037 } 2038 } 2039 2040 /* 2041 * Remove this PVO from the PV list. 2042 */ 2043 LIST_REMOVE(pvo, pvo_vlink); 2044 2045 /* 2046 * Remove this from the overflow list and return it to the pool 2047 * if we aren't going to reuse it. 2048 */ 2049 LIST_REMOVE(pvo, pvo_olink); 2050 if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP)) 2051 uma_zfree(pvo->pvo_vaddr & PVO_MANAGED ? moea_mpvo_zone : 2052 moea_upvo_zone, pvo); 2053 moea_pvo_entries--; 2054 moea_pvo_remove_calls++; 2055} 2056 2057static __inline int 2058moea_pvo_pte_index(const struct pvo_entry *pvo, int ptegidx) 2059{ 2060 int pteidx; 2061 2062 /* 2063 * We can find the actual pte entry without searching by grabbing 2064 * the PTEG index from 3 unused bits in pte_lo[11:9] and by 2065 * noticing the HID bit. 2066 */ 2067 pteidx = ptegidx * 8 + PVO_PTEGIDX_GET(pvo); 2068 if (pvo->pvo_pte.pte.pte_hi & PTE_HID) 2069 pteidx ^= moea_pteg_mask * 8; 2070 2071 return (pteidx); 2072} 2073 2074static struct pvo_entry * 2075moea_pvo_find_va(pmap_t pm, vm_offset_t va, int *pteidx_p) 2076{ 2077 struct pvo_entry *pvo; 2078 int ptegidx; 2079 u_int sr; 2080 2081 va &= ~ADDR_POFF; 2082 sr = va_to_sr(pm->pm_sr, va); 2083 ptegidx = va_to_pteg(sr, va); 2084 2085 mtx_lock(&moea_table_mutex); 2086 LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) { 2087 if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) { 2088 if (pteidx_p) 2089 *pteidx_p = moea_pvo_pte_index(pvo, ptegidx); 2090 break; 2091 } 2092 } 2093 mtx_unlock(&moea_table_mutex); 2094 2095 return (pvo); 2096} 2097 2098static struct pte * 2099moea_pvo_to_pte(const struct pvo_entry *pvo, int pteidx) 2100{ 2101 struct pte *pt; 2102 2103 /* 2104 * If we haven't been supplied the ptegidx, calculate it. 2105 */ 2106 if (pteidx == -1) { 2107 int ptegidx; 2108 u_int sr; 2109 2110 sr = va_to_sr(pvo->pvo_pmap->pm_sr, pvo->pvo_vaddr); 2111 ptegidx = va_to_pteg(sr, pvo->pvo_vaddr); 2112 pteidx = moea_pvo_pte_index(pvo, ptegidx); 2113 } 2114 2115 pt = &moea_pteg_table[pteidx >> 3].pt[pteidx & 7]; 2116 mtx_lock(&moea_table_mutex); 2117 2118 if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) && !PVO_PTEGIDX_ISSET(pvo)) { 2119 panic("moea_pvo_to_pte: pvo %p has valid pte in pvo but no " 2120 "valid pte index", pvo); 2121 } 2122 2123 if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) == 0 && PVO_PTEGIDX_ISSET(pvo)) { 2124 panic("moea_pvo_to_pte: pvo %p has valid pte index in pvo " 2125 "pvo but no valid pte", pvo); 2126 } 2127 2128 if ((pt->pte_hi ^ (pvo->pvo_pte.pte.pte_hi & ~PTE_VALID)) == PTE_VALID) { 2129 if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) == 0) { 2130 panic("moea_pvo_to_pte: pvo %p has valid pte in " 2131 "moea_pteg_table %p but invalid in pvo", pvo, pt); 2132 } 2133 2134 if (((pt->pte_lo ^ pvo->pvo_pte.pte.pte_lo) & ~(PTE_CHG|PTE_REF)) 2135 != 0) { 2136 panic("moea_pvo_to_pte: pvo %p pte does not match " 2137 "pte %p in moea_pteg_table", pvo, pt); 2138 } 2139 2140 mtx_assert(&moea_table_mutex, MA_OWNED); 2141 return (pt); 2142 } 2143 2144 if (pvo->pvo_pte.pte.pte_hi & PTE_VALID) { 2145 panic("moea_pvo_to_pte: pvo %p has invalid pte %p in " 2146 "moea_pteg_table but valid in pvo", pvo, pt); 2147 } 2148 2149 mtx_unlock(&moea_table_mutex); 2150 return (NULL); 2151} 2152 2153/* 2154 * XXX: THIS STUFF SHOULD BE IN pte.c? 2155 */ 2156int 2157moea_pte_spill(vm_offset_t addr) 2158{ 2159 struct pvo_entry *source_pvo, *victim_pvo; 2160 struct pvo_entry *pvo; 2161 int ptegidx, i, j; 2162 u_int sr; 2163 struct pteg *pteg; 2164 struct pte *pt; 2165 2166 moea_pte_spills++; 2167 2168 sr = mfsrin(addr); 2169 ptegidx = va_to_pteg(sr, addr); 2170 2171 /* 2172 * Have to substitute some entry. Use the primary hash for this. 2173 * Use low bits of timebase as random generator. 2174 */ 2175 pteg = &moea_pteg_table[ptegidx]; 2176 mtx_lock(&moea_table_mutex); 2177 __asm __volatile("mftb %0" : "=r"(i)); 2178 i &= 7; 2179 pt = &pteg->pt[i]; 2180 2181 source_pvo = NULL; 2182 victim_pvo = NULL; 2183 LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) { 2184 /* 2185 * We need to find a pvo entry for this address. 2186 */ 2187 MOEA_PVO_CHECK(pvo); 2188 if (source_pvo == NULL && 2189 moea_pte_match(&pvo->pvo_pte.pte, sr, addr, 2190 pvo->pvo_pte.pte.pte_hi & PTE_HID)) { 2191 /* 2192 * Now found an entry to be spilled into the pteg. 2193 * The PTE is now valid, so we know it's active. 2194 */ 2195 j = moea_pte_insert(ptegidx, &pvo->pvo_pte.pte); 2196 2197 if (j >= 0) { 2198 PVO_PTEGIDX_SET(pvo, j); 2199 moea_pte_overflow--; 2200 MOEA_PVO_CHECK(pvo); 2201 mtx_unlock(&moea_table_mutex); 2202 return (1); 2203 } 2204 2205 source_pvo = pvo; 2206 2207 if (victim_pvo != NULL) 2208 break; 2209 } 2210 2211 /* 2212 * We also need the pvo entry of the victim we are replacing 2213 * so save the R & C bits of the PTE. 2214 */ 2215 if ((pt->pte_hi & PTE_HID) == 0 && victim_pvo == NULL && 2216 moea_pte_compare(pt, &pvo->pvo_pte.pte)) { 2217 victim_pvo = pvo; 2218 if (source_pvo != NULL) 2219 break; 2220 } 2221 } 2222 2223 if (source_pvo == NULL) { 2224 mtx_unlock(&moea_table_mutex); 2225 return (0); 2226 } 2227 2228 if (victim_pvo == NULL) { 2229 if ((pt->pte_hi & PTE_HID) == 0) 2230 panic("moea_pte_spill: victim p-pte (%p) has no pvo" 2231 "entry", pt); 2232 2233 /* 2234 * If this is a secondary PTE, we need to search it's primary 2235 * pvo bucket for the matching PVO. 2236 */ 2237 LIST_FOREACH(pvo, &moea_pvo_table[ptegidx ^ moea_pteg_mask], 2238 pvo_olink) { 2239 MOEA_PVO_CHECK(pvo); 2240 /* 2241 * We also need the pvo entry of the victim we are 2242 * replacing so save the R & C bits of the PTE. 2243 */ 2244 if (moea_pte_compare(pt, &pvo->pvo_pte.pte)) { 2245 victim_pvo = pvo; 2246 break; 2247 } 2248 } 2249 2250 if (victim_pvo == NULL) 2251 panic("moea_pte_spill: victim s-pte (%p) has no pvo" 2252 "entry", pt); 2253 } 2254 2255 /* 2256 * We are invalidating the TLB entry for the EA we are replacing even 2257 * though it's valid. If we don't, we lose any ref/chg bit changes 2258 * contained in the TLB entry. 2259 */ 2260 source_pvo->pvo_pte.pte.pte_hi &= ~PTE_HID; 2261 2262 moea_pte_unset(pt, &victim_pvo->pvo_pte.pte, victim_pvo->pvo_vaddr); 2263 moea_pte_set(pt, &source_pvo->pvo_pte.pte); 2264 2265 PVO_PTEGIDX_CLR(victim_pvo); 2266 PVO_PTEGIDX_SET(source_pvo, i); 2267 moea_pte_replacements++; 2268 2269 MOEA_PVO_CHECK(victim_pvo); 2270 MOEA_PVO_CHECK(source_pvo); 2271 2272 mtx_unlock(&moea_table_mutex); 2273 return (1); 2274} 2275 2276static int 2277moea_pte_insert(u_int ptegidx, struct pte *pvo_pt) 2278{ 2279 struct pte *pt; 2280 int i; 2281 2282 mtx_assert(&moea_table_mutex, MA_OWNED); 2283 2284 /* 2285 * First try primary hash. 2286 */ 2287 for (pt = moea_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) { 2288 if ((pt->pte_hi & PTE_VALID) == 0) { 2289 pvo_pt->pte_hi &= ~PTE_HID; 2290 moea_pte_set(pt, pvo_pt); 2291 return (i); 2292 } 2293 } 2294 2295 /* 2296 * Now try secondary hash. 2297 */ 2298 ptegidx ^= moea_pteg_mask; 2299 2300 for (pt = moea_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) { 2301 if ((pt->pte_hi & PTE_VALID) == 0) { 2302 pvo_pt->pte_hi |= PTE_HID; 2303 moea_pte_set(pt, pvo_pt); 2304 return (i); 2305 } 2306 } 2307 2308 panic("moea_pte_insert: overflow"); 2309 return (-1); 2310} 2311 2312static boolean_t 2313moea_query_bit(vm_page_t m, int ptebit) 2314{ 2315 struct pvo_entry *pvo; 2316 struct pte *pt; 2317 2318 if (moea_attr_fetch(m) & ptebit) 2319 return (TRUE); 2320 2321 vm_page_lock_queues(); 2322 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2323 MOEA_PVO_CHECK(pvo); /* sanity check */ 2324 2325 /* 2326 * See if we saved the bit off. If so, cache it and return 2327 * success. 2328 */ 2329 if (pvo->pvo_pte.pte.pte_lo & ptebit) { 2330 moea_attr_save(m, ptebit); 2331 MOEA_PVO_CHECK(pvo); /* sanity check */ 2332 vm_page_unlock_queues(); 2333 return (TRUE); 2334 } 2335 } 2336 2337 /* 2338 * No luck, now go through the hard part of looking at the PTEs 2339 * themselves. Sync so that any pending REF/CHG bits are flushed to 2340 * the PTEs. 2341 */ 2342 powerpc_sync(); 2343 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2344 MOEA_PVO_CHECK(pvo); /* sanity check */ 2345 2346 /* 2347 * See if this pvo has a valid PTE. if so, fetch the 2348 * REF/CHG bits from the valid PTE. If the appropriate 2349 * ptebit is set, cache it and return success. 2350 */ 2351 pt = moea_pvo_to_pte(pvo, -1); 2352 if (pt != NULL) { 2353 moea_pte_synch(pt, &pvo->pvo_pte.pte); 2354 mtx_unlock(&moea_table_mutex); 2355 if (pvo->pvo_pte.pte.pte_lo & ptebit) { 2356 moea_attr_save(m, ptebit); 2357 MOEA_PVO_CHECK(pvo); /* sanity check */ 2358 vm_page_unlock_queues(); 2359 return (TRUE); 2360 } 2361 } 2362 } 2363 2364 vm_page_unlock_queues(); 2365 return (FALSE); 2366} 2367 2368static u_int 2369moea_clear_bit(vm_page_t m, int ptebit) 2370{ 2371 u_int count; 2372 struct pvo_entry *pvo; 2373 struct pte *pt; 2374 2375 vm_page_lock_queues(); 2376 2377 /* 2378 * Clear the cached value. 2379 */ 2380 moea_attr_clear(m, ptebit); 2381 2382 /* 2383 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so 2384 * we can reset the right ones). note that since the pvo entries and 2385 * list heads are accessed via BAT0 and are never placed in the page 2386 * table, we don't have to worry about further accesses setting the 2387 * REF/CHG bits. 2388 */ 2389 powerpc_sync(); 2390 2391 /* 2392 * For each pvo entry, clear the pvo's ptebit. If this pvo has a 2393 * valid pte clear the ptebit from the valid pte. 2394 */ 2395 count = 0; 2396 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2397 MOEA_PVO_CHECK(pvo); /* sanity check */ 2398 pt = moea_pvo_to_pte(pvo, -1); 2399 if (pt != NULL) { 2400 moea_pte_synch(pt, &pvo->pvo_pte.pte); 2401 if (pvo->pvo_pte.pte.pte_lo & ptebit) { 2402 count++; 2403 moea_pte_clear(pt, PVO_VADDR(pvo), ptebit); 2404 } 2405 mtx_unlock(&moea_table_mutex); 2406 } 2407 pvo->pvo_pte.pte.pte_lo &= ~ptebit; 2408 MOEA_PVO_CHECK(pvo); /* sanity check */ 2409 } 2410 2411 vm_page_unlock_queues(); 2412 return (count); 2413} 2414 2415/* 2416 * Return true if the physical range is encompassed by the battable[idx] 2417 */ 2418static int 2419moea_bat_mapped(int idx, vm_offset_t pa, vm_size_t size) 2420{ 2421 u_int prot; 2422 u_int32_t start; 2423 u_int32_t end; 2424 u_int32_t bat_ble; 2425 2426 /* 2427 * Return immediately if not a valid mapping 2428 */ 2429 if (!(battable[idx].batu & BAT_Vs)) 2430 return (EINVAL); 2431 2432 /* 2433 * The BAT entry must be cache-inhibited, guarded, and r/w 2434 * so it can function as an i/o page 2435 */ 2436 prot = battable[idx].batl & (BAT_I|BAT_G|BAT_PP_RW); 2437 if (prot != (BAT_I|BAT_G|BAT_PP_RW)) 2438 return (EPERM); 2439 2440 /* 2441 * The address should be within the BAT range. Assume that the 2442 * start address in the BAT has the correct alignment (thus 2443 * not requiring masking) 2444 */ 2445 start = battable[idx].batl & BAT_PBS; 2446 bat_ble = (battable[idx].batu & ~(BAT_EBS)) | 0x03; 2447 end = start | (bat_ble << 15) | 0x7fff; 2448 2449 if ((pa < start) || ((pa + size) > end)) 2450 return (ERANGE); 2451 2452 return (0); 2453} 2454 2455boolean_t 2456moea_dev_direct_mapped(mmu_t mmu, vm_offset_t pa, vm_size_t size) 2457{ 2458 int i; 2459 2460 /* 2461 * This currently does not work for entries that 2462 * overlap 256M BAT segments. 2463 */ 2464 2465 for(i = 0; i < 16; i++) 2466 if (moea_bat_mapped(i, pa, size) == 0) 2467 return (0); 2468 2469 return (EFAULT); 2470} 2471 2472/* 2473 * Map a set of physical memory pages into the kernel virtual 2474 * address space. Return a pointer to where it is mapped. This 2475 * routine is intended to be used for mapping device memory, 2476 * NOT real memory. 2477 */ 2478void * 2479moea_mapdev(mmu_t mmu, vm_offset_t pa, vm_size_t size) 2480{ 2481 2482 return (moea_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT)); 2483} 2484 2485void * 2486moea_mapdev_attr(mmu_t mmu, vm_offset_t pa, vm_size_t size, vm_memattr_t ma) 2487{ 2488 vm_offset_t va, tmpva, ppa, offset; 2489 int i; 2490 2491 ppa = trunc_page(pa); 2492 offset = pa & PAGE_MASK; 2493 size = roundup(offset + size, PAGE_SIZE); 2494 2495 /* 2496 * If the physical address lies within a valid BAT table entry, 2497 * return the 1:1 mapping. This currently doesn't work 2498 * for regions that overlap 256M BAT segments. 2499 */ 2500 for (i = 0; i < 16; i++) { 2501 if (moea_bat_mapped(i, pa, size) == 0) 2502 return ((void *) pa); 2503 } 2504 2505 va = kmem_alloc_nofault(kernel_map, size); 2506 if (!va) 2507 panic("moea_mapdev: Couldn't alloc kernel virtual memory"); 2508 2509 for (tmpva = va; size > 0;) { 2510 moea_kenter_attr(mmu, tmpva, ppa, ma); 2511 tlbie(tmpva); 2512 size -= PAGE_SIZE; 2513 tmpva += PAGE_SIZE; 2514 ppa += PAGE_SIZE; 2515 } 2516 2517 return ((void *)(va + offset)); 2518} 2519 2520void 2521moea_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) 2522{ 2523 vm_offset_t base, offset; 2524 2525 /* 2526 * If this is outside kernel virtual space, then it's a 2527 * battable entry and doesn't require unmapping 2528 */ 2529 if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= virtual_end)) { 2530 base = trunc_page(va); 2531 offset = va & PAGE_MASK; 2532 size = roundup(offset + size, PAGE_SIZE); 2533 kmem_free(kernel_map, base, size); 2534 } 2535} 2536 2537static void 2538moea_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) 2539{ 2540 struct pvo_entry *pvo; 2541 vm_offset_t lim; 2542 vm_paddr_t pa; 2543 vm_size_t len; 2544 2545 PMAP_LOCK(pm); 2546 while (sz > 0) { 2547 lim = round_page(va); 2548 len = MIN(lim - va, sz); 2549 pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL); 2550 if (pvo != NULL) { 2551 pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) | 2552 (va & ADDR_POFF); 2553 moea_syncicache(pa, len); 2554 } 2555 va += len; 2556 sz -= len; 2557 } 2558 PMAP_UNLOCK(pm); 2559}
| 164#define MOEA_PVO_CHECK(pvo) 165 166struct ofw_map { 167 vm_offset_t om_va; 168 vm_size_t om_len; 169 vm_offset_t om_pa; 170 u_int om_mode; 171}; 172 173/* 174 * Map of physical memory regions. 175 */ 176static struct mem_region *regions; 177static struct mem_region *pregions; 178static u_int phys_avail_count; 179static int regions_sz, pregions_sz; 180static struct ofw_map *translations; 181 182/* 183 * Lock for the pteg and pvo tables. 184 */ 185struct mtx moea_table_mutex; 186struct mtx moea_vsid_mutex; 187 188/* tlbie instruction synchronization */ 189static struct mtx tlbie_mtx; 190 191/* 192 * PTEG data. 193 */ 194static struct pteg *moea_pteg_table; 195u_int moea_pteg_count; 196u_int moea_pteg_mask; 197 198/* 199 * PVO data. 200 */ 201struct pvo_head *moea_pvo_table; /* pvo entries by pteg index */ 202struct pvo_head moea_pvo_kunmanaged = 203 LIST_HEAD_INITIALIZER(moea_pvo_kunmanaged); /* list of unmanaged pages */ 204 205uma_zone_t moea_upvo_zone; /* zone for pvo entries for unmanaged pages */ 206uma_zone_t moea_mpvo_zone; /* zone for pvo entries for managed pages */ 207 208#define BPVO_POOL_SIZE 32768 209static struct pvo_entry *moea_bpvo_pool; 210static int moea_bpvo_pool_index = 0; 211 212#define VSID_NBPW (sizeof(u_int32_t) * 8) 213static u_int moea_vsid_bitmap[NPMAPS / VSID_NBPW]; 214 215static boolean_t moea_initialized = FALSE; 216 217/* 218 * Statistics. 219 */ 220u_int moea_pte_valid = 0; 221u_int moea_pte_overflow = 0; 222u_int moea_pte_replacements = 0; 223u_int moea_pvo_entries = 0; 224u_int moea_pvo_enter_calls = 0; 225u_int moea_pvo_remove_calls = 0; 226u_int moea_pte_spills = 0; 227SYSCTL_INT(_machdep, OID_AUTO, moea_pte_valid, CTLFLAG_RD, &moea_pte_valid, 228 0, ""); 229SYSCTL_INT(_machdep, OID_AUTO, moea_pte_overflow, CTLFLAG_RD, 230 &moea_pte_overflow, 0, ""); 231SYSCTL_INT(_machdep, OID_AUTO, moea_pte_replacements, CTLFLAG_RD, 232 &moea_pte_replacements, 0, ""); 233SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_entries, CTLFLAG_RD, &moea_pvo_entries, 234 0, ""); 235SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_enter_calls, CTLFLAG_RD, 236 &moea_pvo_enter_calls, 0, ""); 237SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_remove_calls, CTLFLAG_RD, 238 &moea_pvo_remove_calls, 0, ""); 239SYSCTL_INT(_machdep, OID_AUTO, moea_pte_spills, CTLFLAG_RD, 240 &moea_pte_spills, 0, ""); 241 242/* 243 * Allocate physical memory for use in moea_bootstrap. 244 */ 245static vm_offset_t moea_bootstrap_alloc(vm_size_t, u_int); 246 247/* 248 * PTE calls. 249 */ 250static int moea_pte_insert(u_int, struct pte *); 251 252/* 253 * PVO calls. 254 */ 255static int moea_pvo_enter(pmap_t, uma_zone_t, struct pvo_head *, 256 vm_offset_t, vm_offset_t, u_int, int); 257static void moea_pvo_remove(struct pvo_entry *, int); 258static struct pvo_entry *moea_pvo_find_va(pmap_t, vm_offset_t, int *); 259static struct pte *moea_pvo_to_pte(const struct pvo_entry *, int); 260 261/* 262 * Utility routines. 263 */ 264static void moea_enter_locked(pmap_t, vm_offset_t, vm_page_t, 265 vm_prot_t, boolean_t); 266static void moea_syncicache(vm_offset_t, vm_size_t); 267static boolean_t moea_query_bit(vm_page_t, int); 268static u_int moea_clear_bit(vm_page_t, int); 269static void moea_kremove(mmu_t, vm_offset_t); 270int moea_pte_spill(vm_offset_t); 271 272/* 273 * Kernel MMU interface 274 */ 275void moea_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t); 276void moea_clear_modify(mmu_t, vm_page_t); 277void moea_clear_reference(mmu_t, vm_page_t); 278void moea_copy_page(mmu_t, vm_page_t, vm_page_t); 279void moea_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t); 280void moea_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t, 281 vm_prot_t); 282void moea_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t); 283vm_paddr_t moea_extract(mmu_t, pmap_t, vm_offset_t); 284vm_page_t moea_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); 285void moea_init(mmu_t); 286boolean_t moea_is_modified(mmu_t, vm_page_t); 287boolean_t moea_is_prefaultable(mmu_t, pmap_t, vm_offset_t); 288boolean_t moea_is_referenced(mmu_t, vm_page_t); 289boolean_t moea_ts_referenced(mmu_t, vm_page_t); 290vm_offset_t moea_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, int); 291boolean_t moea_page_exists_quick(mmu_t, pmap_t, vm_page_t); 292int moea_page_wired_mappings(mmu_t, vm_page_t); 293void moea_pinit(mmu_t, pmap_t); 294void moea_pinit0(mmu_t, pmap_t); 295void moea_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); 296void moea_qenter(mmu_t, vm_offset_t, vm_page_t *, int); 297void moea_qremove(mmu_t, vm_offset_t, int); 298void moea_release(mmu_t, pmap_t); 299void moea_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 300void moea_remove_all(mmu_t, vm_page_t); 301void moea_remove_write(mmu_t, vm_page_t); 302void moea_zero_page(mmu_t, vm_page_t); 303void moea_zero_page_area(mmu_t, vm_page_t, int, int); 304void moea_zero_page_idle(mmu_t, vm_page_t); 305void moea_activate(mmu_t, struct thread *); 306void moea_deactivate(mmu_t, struct thread *); 307void moea_cpu_bootstrap(mmu_t, int); 308void moea_bootstrap(mmu_t, vm_offset_t, vm_offset_t); 309void *moea_mapdev(mmu_t, vm_offset_t, vm_size_t); 310void *moea_mapdev_attr(mmu_t, vm_offset_t, vm_size_t, vm_memattr_t); 311void moea_unmapdev(mmu_t, vm_offset_t, vm_size_t); 312vm_offset_t moea_kextract(mmu_t, vm_offset_t); 313void moea_kenter_attr(mmu_t, vm_offset_t, vm_offset_t, vm_memattr_t); 314void moea_kenter(mmu_t, vm_offset_t, vm_offset_t); 315void moea_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma); 316boolean_t moea_dev_direct_mapped(mmu_t, vm_offset_t, vm_size_t); 317static void moea_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t); 318 319static mmu_method_t moea_methods[] = { 320 MMUMETHOD(mmu_change_wiring, moea_change_wiring), 321 MMUMETHOD(mmu_clear_modify, moea_clear_modify), 322 MMUMETHOD(mmu_clear_reference, moea_clear_reference), 323 MMUMETHOD(mmu_copy_page, moea_copy_page), 324 MMUMETHOD(mmu_enter, moea_enter), 325 MMUMETHOD(mmu_enter_object, moea_enter_object), 326 MMUMETHOD(mmu_enter_quick, moea_enter_quick), 327 MMUMETHOD(mmu_extract, moea_extract), 328 MMUMETHOD(mmu_extract_and_hold, moea_extract_and_hold), 329 MMUMETHOD(mmu_init, moea_init), 330 MMUMETHOD(mmu_is_modified, moea_is_modified), 331 MMUMETHOD(mmu_is_prefaultable, moea_is_prefaultable), 332 MMUMETHOD(mmu_is_referenced, moea_is_referenced), 333 MMUMETHOD(mmu_ts_referenced, moea_ts_referenced), 334 MMUMETHOD(mmu_map, moea_map), 335 MMUMETHOD(mmu_page_exists_quick,moea_page_exists_quick), 336 MMUMETHOD(mmu_page_wired_mappings,moea_page_wired_mappings), 337 MMUMETHOD(mmu_pinit, moea_pinit), 338 MMUMETHOD(mmu_pinit0, moea_pinit0), 339 MMUMETHOD(mmu_protect, moea_protect), 340 MMUMETHOD(mmu_qenter, moea_qenter), 341 MMUMETHOD(mmu_qremove, moea_qremove), 342 MMUMETHOD(mmu_release, moea_release), 343 MMUMETHOD(mmu_remove, moea_remove), 344 MMUMETHOD(mmu_remove_all, moea_remove_all), 345 MMUMETHOD(mmu_remove_write, moea_remove_write), 346 MMUMETHOD(mmu_sync_icache, moea_sync_icache), 347 MMUMETHOD(mmu_zero_page, moea_zero_page), 348 MMUMETHOD(mmu_zero_page_area, moea_zero_page_area), 349 MMUMETHOD(mmu_zero_page_idle, moea_zero_page_idle), 350 MMUMETHOD(mmu_activate, moea_activate), 351 MMUMETHOD(mmu_deactivate, moea_deactivate), 352 MMUMETHOD(mmu_page_set_memattr, moea_page_set_memattr), 353 354 /* Internal interfaces */ 355 MMUMETHOD(mmu_bootstrap, moea_bootstrap), 356 MMUMETHOD(mmu_cpu_bootstrap, moea_cpu_bootstrap), 357 MMUMETHOD(mmu_mapdev_attr, moea_mapdev_attr), 358 MMUMETHOD(mmu_mapdev, moea_mapdev), 359 MMUMETHOD(mmu_unmapdev, moea_unmapdev), 360 MMUMETHOD(mmu_kextract, moea_kextract), 361 MMUMETHOD(mmu_kenter, moea_kenter), 362 MMUMETHOD(mmu_kenter_attr, moea_kenter_attr), 363 MMUMETHOD(mmu_dev_direct_mapped,moea_dev_direct_mapped), 364 365 { 0, 0 } 366}; 367 368MMU_DEF(oea_mmu, MMU_TYPE_OEA, moea_methods, 0); 369 370static __inline uint32_t 371moea_calc_wimg(vm_offset_t pa, vm_memattr_t ma) 372{ 373 uint32_t pte_lo; 374 int i; 375 376 if (ma != VM_MEMATTR_DEFAULT) { 377 switch (ma) { 378 case VM_MEMATTR_UNCACHEABLE: 379 return (PTE_I | PTE_G); 380 case VM_MEMATTR_WRITE_COMBINING: 381 case VM_MEMATTR_WRITE_BACK: 382 case VM_MEMATTR_PREFETCHABLE: 383 return (PTE_I); 384 case VM_MEMATTR_WRITE_THROUGH: 385 return (PTE_W | PTE_M); 386 } 387 } 388 389 /* 390 * Assume the page is cache inhibited and access is guarded unless 391 * it's in our available memory array. 392 */ 393 pte_lo = PTE_I | PTE_G; 394 for (i = 0; i < pregions_sz; i++) { 395 if ((pa >= pregions[i].mr_start) && 396 (pa < (pregions[i].mr_start + pregions[i].mr_size))) { 397 pte_lo = PTE_M; 398 break; 399 } 400 } 401 402 return pte_lo; 403} 404 405static void 406tlbie(vm_offset_t va) 407{ 408 409 mtx_lock_spin(&tlbie_mtx); 410 __asm __volatile("ptesync"); 411 __asm __volatile("tlbie %0" :: "r"(va)); 412 __asm __volatile("eieio; tlbsync; ptesync"); 413 mtx_unlock_spin(&tlbie_mtx); 414} 415 416static void 417tlbia(void) 418{ 419 vm_offset_t va; 420 421 for (va = 0; va < 0x00040000; va += 0x00001000) { 422 __asm __volatile("tlbie %0" :: "r"(va)); 423 powerpc_sync(); 424 } 425 __asm __volatile("tlbsync"); 426 powerpc_sync(); 427} 428 429static __inline int 430va_to_sr(u_int *sr, vm_offset_t va) 431{ 432 return (sr[(uintptr_t)va >> ADDR_SR_SHFT]); 433} 434 435static __inline u_int 436va_to_pteg(u_int sr, vm_offset_t addr) 437{ 438 u_int hash; 439 440 hash = (sr & SR_VSID_MASK) ^ (((u_int)addr & ADDR_PIDX) >> 441 ADDR_PIDX_SHFT); 442 return (hash & moea_pteg_mask); 443} 444 445static __inline struct pvo_head * 446vm_page_to_pvoh(vm_page_t m) 447{ 448 449 return (&m->md.mdpg_pvoh); 450} 451 452static __inline void 453moea_attr_clear(vm_page_t m, int ptebit) 454{ 455 456 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 457 m->md.mdpg_attrs &= ~ptebit; 458} 459 460static __inline int 461moea_attr_fetch(vm_page_t m) 462{ 463 464 return (m->md.mdpg_attrs); 465} 466 467static __inline void 468moea_attr_save(vm_page_t m, int ptebit) 469{ 470 471 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 472 m->md.mdpg_attrs |= ptebit; 473} 474 475static __inline int 476moea_pte_compare(const struct pte *pt, const struct pte *pvo_pt) 477{ 478 if (pt->pte_hi == pvo_pt->pte_hi) 479 return (1); 480 481 return (0); 482} 483 484static __inline int 485moea_pte_match(struct pte *pt, u_int sr, vm_offset_t va, int which) 486{ 487 return (pt->pte_hi & ~PTE_VALID) == 488 (((sr & SR_VSID_MASK) << PTE_VSID_SHFT) | 489 ((va >> ADDR_API_SHFT) & PTE_API) | which); 490} 491 492static __inline void 493moea_pte_create(struct pte *pt, u_int sr, vm_offset_t va, u_int pte_lo) 494{ 495 496 mtx_assert(&moea_table_mutex, MA_OWNED); 497 498 /* 499 * Construct a PTE. Default to IMB initially. Valid bit only gets 500 * set when the real pte is set in memory. 501 * 502 * Note: Don't set the valid bit for correct operation of tlb update. 503 */ 504 pt->pte_hi = ((sr & SR_VSID_MASK) << PTE_VSID_SHFT) | 505 (((va & ADDR_PIDX) >> ADDR_API_SHFT) & PTE_API); 506 pt->pte_lo = pte_lo; 507} 508 509static __inline void 510moea_pte_synch(struct pte *pt, struct pte *pvo_pt) 511{ 512 513 mtx_assert(&moea_table_mutex, MA_OWNED); 514 pvo_pt->pte_lo |= pt->pte_lo & (PTE_REF | PTE_CHG); 515} 516 517static __inline void 518moea_pte_clear(struct pte *pt, vm_offset_t va, int ptebit) 519{ 520 521 mtx_assert(&moea_table_mutex, MA_OWNED); 522 523 /* 524 * As shown in Section 7.6.3.2.3 525 */ 526 pt->pte_lo &= ~ptebit; 527 tlbie(va); 528} 529 530static __inline void 531moea_pte_set(struct pte *pt, struct pte *pvo_pt) 532{ 533 534 mtx_assert(&moea_table_mutex, MA_OWNED); 535 pvo_pt->pte_hi |= PTE_VALID; 536 537 /* 538 * Update the PTE as defined in section 7.6.3.1. 539 * Note that the REF/CHG bits are from pvo_pt and thus should havce 540 * been saved so this routine can restore them (if desired). 541 */ 542 pt->pte_lo = pvo_pt->pte_lo; 543 powerpc_sync(); 544 pt->pte_hi = pvo_pt->pte_hi; 545 powerpc_sync(); 546 moea_pte_valid++; 547} 548 549static __inline void 550moea_pte_unset(struct pte *pt, struct pte *pvo_pt, vm_offset_t va) 551{ 552 553 mtx_assert(&moea_table_mutex, MA_OWNED); 554 pvo_pt->pte_hi &= ~PTE_VALID; 555 556 /* 557 * Force the reg & chg bits back into the PTEs. 558 */ 559 powerpc_sync(); 560 561 /* 562 * Invalidate the pte. 563 */ 564 pt->pte_hi &= ~PTE_VALID; 565 566 tlbie(va); 567 568 /* 569 * Save the reg & chg bits. 570 */ 571 moea_pte_synch(pt, pvo_pt); 572 moea_pte_valid--; 573} 574 575static __inline void 576moea_pte_change(struct pte *pt, struct pte *pvo_pt, vm_offset_t va) 577{ 578 579 /* 580 * Invalidate the PTE 581 */ 582 moea_pte_unset(pt, pvo_pt, va); 583 moea_pte_set(pt, pvo_pt); 584} 585 586/* 587 * Quick sort callout for comparing memory regions. 588 */ 589static int mr_cmp(const void *a, const void *b); 590static int om_cmp(const void *a, const void *b); 591 592static int 593mr_cmp(const void *a, const void *b) 594{ 595 const struct mem_region *regiona; 596 const struct mem_region *regionb; 597 598 regiona = a; 599 regionb = b; 600 if (regiona->mr_start < regionb->mr_start) 601 return (-1); 602 else if (regiona->mr_start > regionb->mr_start) 603 return (1); 604 else 605 return (0); 606} 607 608static int 609om_cmp(const void *a, const void *b) 610{ 611 const struct ofw_map *mapa; 612 const struct ofw_map *mapb; 613 614 mapa = a; 615 mapb = b; 616 if (mapa->om_pa < mapb->om_pa) 617 return (-1); 618 else if (mapa->om_pa > mapb->om_pa) 619 return (1); 620 else 621 return (0); 622} 623 624void 625moea_cpu_bootstrap(mmu_t mmup, int ap) 626{ 627 u_int sdr; 628 int i; 629 630 if (ap) { 631 powerpc_sync(); 632 __asm __volatile("mtdbatu 0,%0" :: "r"(battable[0].batu)); 633 __asm __volatile("mtdbatl 0,%0" :: "r"(battable[0].batl)); 634 isync(); 635 __asm __volatile("mtibatu 0,%0" :: "r"(battable[0].batu)); 636 __asm __volatile("mtibatl 0,%0" :: "r"(battable[0].batl)); 637 isync(); 638 } 639 640 __asm __volatile("mtdbatu 1,%0" :: "r"(battable[8].batu)); 641 __asm __volatile("mtdbatl 1,%0" :: "r"(battable[8].batl)); 642 isync(); 643 644 __asm __volatile("mtibatu 1,%0" :: "r"(0)); 645 __asm __volatile("mtdbatu 2,%0" :: "r"(0)); 646 __asm __volatile("mtibatu 2,%0" :: "r"(0)); 647 __asm __volatile("mtdbatu 3,%0" :: "r"(0)); 648 __asm __volatile("mtibatu 3,%0" :: "r"(0)); 649 isync(); 650 651 for (i = 0; i < 16; i++) 652 mtsrin(i << ADDR_SR_SHFT, kernel_pmap->pm_sr[i]); 653 powerpc_sync(); 654 655 sdr = (u_int)moea_pteg_table | (moea_pteg_mask >> 10); 656 __asm __volatile("mtsdr1 %0" :: "r"(sdr)); 657 isync(); 658 659 tlbia(); 660} 661 662void 663moea_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 664{ 665 ihandle_t mmui; 666 phandle_t chosen, mmu; 667 int sz; 668 int i, j; 669 int ofw_mappings; 670 vm_size_t size, physsz, hwphyssz; 671 vm_offset_t pa, va, off; 672 void *dpcpu; 673 register_t msr; 674 675 /* 676 * Set up BAT0 to map the lowest 256 MB area 677 */ 678 battable[0x0].batl = BATL(0x00000000, BAT_M, BAT_PP_RW); 679 battable[0x0].batu = BATU(0x00000000, BAT_BL_256M, BAT_Vs); 680 681 /* 682 * Map PCI memory space. 683 */ 684 battable[0x8].batl = BATL(0x80000000, BAT_I|BAT_G, BAT_PP_RW); 685 battable[0x8].batu = BATU(0x80000000, BAT_BL_256M, BAT_Vs); 686 687 battable[0x9].batl = BATL(0x90000000, BAT_I|BAT_G, BAT_PP_RW); 688 battable[0x9].batu = BATU(0x90000000, BAT_BL_256M, BAT_Vs); 689 690 battable[0xa].batl = BATL(0xa0000000, BAT_I|BAT_G, BAT_PP_RW); 691 battable[0xa].batu = BATU(0xa0000000, BAT_BL_256M, BAT_Vs); 692 693 battable[0xb].batl = BATL(0xb0000000, BAT_I|BAT_G, BAT_PP_RW); 694 battable[0xb].batu = BATU(0xb0000000, BAT_BL_256M, BAT_Vs); 695 696 /* 697 * Map obio devices. 698 */ 699 battable[0xf].batl = BATL(0xf0000000, BAT_I|BAT_G, BAT_PP_RW); 700 battable[0xf].batu = BATU(0xf0000000, BAT_BL_256M, BAT_Vs); 701 702 /* 703 * Use an IBAT and a DBAT to map the bottom segment of memory 704 * where we are. Turn off instruction relocation temporarily 705 * to prevent faults while reprogramming the IBAT. 706 */ 707 msr = mfmsr(); 708 mtmsr(msr & ~PSL_IR); 709 __asm (".balign 32; \n" 710 "mtibatu 0,%0; mtibatl 0,%1; isync; \n" 711 "mtdbatu 0,%0; mtdbatl 0,%1; isync" 712 :: "r"(battable[0].batu), "r"(battable[0].batl)); 713 mtmsr(msr); 714 715 /* map pci space */ 716 __asm __volatile("mtdbatu 1,%0" :: "r"(battable[8].batu)); 717 __asm __volatile("mtdbatl 1,%0" :: "r"(battable[8].batl)); 718 isync(); 719 720 /* set global direct map flag */ 721 hw_direct_map = 1; 722 723 mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); 724 CTR0(KTR_PMAP, "moea_bootstrap: physical memory"); 725 726 qsort(pregions, pregions_sz, sizeof(*pregions), mr_cmp); 727 for (i = 0; i < pregions_sz; i++) { 728 vm_offset_t pa; 729 vm_offset_t end; 730 731 CTR3(KTR_PMAP, "physregion: %#x - %#x (%#x)", 732 pregions[i].mr_start, 733 pregions[i].mr_start + pregions[i].mr_size, 734 pregions[i].mr_size); 735 /* 736 * Install entries into the BAT table to allow all 737 * of physmem to be convered by on-demand BAT entries. 738 * The loop will sometimes set the same battable element 739 * twice, but that's fine since they won't be used for 740 * a while yet. 741 */ 742 pa = pregions[i].mr_start & 0xf0000000; 743 end = pregions[i].mr_start + pregions[i].mr_size; 744 do { 745 u_int n = pa >> ADDR_SR_SHFT; 746 747 battable[n].batl = BATL(pa, BAT_M, BAT_PP_RW); 748 battable[n].batu = BATU(pa, BAT_BL_256M, BAT_Vs); 749 pa += SEGMENT_LENGTH; 750 } while (pa < end); 751 } 752 753 if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz) 754 panic("moea_bootstrap: phys_avail too small"); 755 qsort(regions, regions_sz, sizeof(*regions), mr_cmp); 756 phys_avail_count = 0; 757 physsz = 0; 758 hwphyssz = 0; 759 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); 760 for (i = 0, j = 0; i < regions_sz; i++, j += 2) { 761 CTR3(KTR_PMAP, "region: %#x - %#x (%#x)", regions[i].mr_start, 762 regions[i].mr_start + regions[i].mr_size, 763 regions[i].mr_size); 764 if (hwphyssz != 0 && 765 (physsz + regions[i].mr_size) >= hwphyssz) { 766 if (physsz < hwphyssz) { 767 phys_avail[j] = regions[i].mr_start; 768 phys_avail[j + 1] = regions[i].mr_start + 769 hwphyssz - physsz; 770 physsz = hwphyssz; 771 phys_avail_count++; 772 } 773 break; 774 } 775 phys_avail[j] = regions[i].mr_start; 776 phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size; 777 phys_avail_count++; 778 physsz += regions[i].mr_size; 779 } 780 physmem = btoc(physsz); 781 782 /* 783 * Allocate PTEG table. 784 */ 785#ifdef PTEGCOUNT 786 moea_pteg_count = PTEGCOUNT; 787#else 788 moea_pteg_count = 0x1000; 789 790 while (moea_pteg_count < physmem) 791 moea_pteg_count <<= 1; 792 793 moea_pteg_count >>= 1; 794#endif /* PTEGCOUNT */ 795 796 size = moea_pteg_count * sizeof(struct pteg); 797 CTR2(KTR_PMAP, "moea_bootstrap: %d PTEGs, %d bytes", moea_pteg_count, 798 size); 799 moea_pteg_table = (struct pteg *)moea_bootstrap_alloc(size, size); 800 CTR1(KTR_PMAP, "moea_bootstrap: PTEG table at %p", moea_pteg_table); 801 bzero((void *)moea_pteg_table, moea_pteg_count * sizeof(struct pteg)); 802 moea_pteg_mask = moea_pteg_count - 1; 803 804 /* 805 * Allocate pv/overflow lists. 806 */ 807 size = sizeof(struct pvo_head) * moea_pteg_count; 808 moea_pvo_table = (struct pvo_head *)moea_bootstrap_alloc(size, 809 PAGE_SIZE); 810 CTR1(KTR_PMAP, "moea_bootstrap: PVO table at %p", moea_pvo_table); 811 for (i = 0; i < moea_pteg_count; i++) 812 LIST_INIT(&moea_pvo_table[i]); 813 814 /* 815 * Initialize the lock that synchronizes access to the pteg and pvo 816 * tables. 817 */ 818 mtx_init(&moea_table_mutex, "pmap table", NULL, MTX_DEF | 819 MTX_RECURSE); 820 mtx_init(&moea_vsid_mutex, "VSID table", NULL, MTX_DEF); 821 822 mtx_init(&tlbie_mtx, "tlbie", NULL, MTX_SPIN); 823 824 /* 825 * Initialise the unmanaged pvo pool. 826 */ 827 moea_bpvo_pool = (struct pvo_entry *)moea_bootstrap_alloc( 828 BPVO_POOL_SIZE*sizeof(struct pvo_entry), 0); 829 moea_bpvo_pool_index = 0; 830 831 /* 832 * Make sure kernel vsid is allocated as well as VSID 0. 833 */ 834 moea_vsid_bitmap[(KERNEL_VSIDBITS & (NPMAPS - 1)) / VSID_NBPW] 835 |= 1 << (KERNEL_VSIDBITS % VSID_NBPW); 836 moea_vsid_bitmap[0] |= 1; 837 838 /* 839 * Initialize the kernel pmap (which is statically allocated). 840 */ 841 PMAP_LOCK_INIT(kernel_pmap); 842 for (i = 0; i < 16; i++) 843 kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i; 844 kernel_pmap->pm_active = ~0; 845 846 /* 847 * Set up the Open Firmware mappings 848 */ 849 if ((chosen = OF_finddevice("/chosen")) == -1) 850 panic("moea_bootstrap: can't find /chosen"); 851 OF_getprop(chosen, "mmu", &mmui, 4); 852 if ((mmu = OF_instance_to_package(mmui)) == -1) 853 panic("moea_bootstrap: can't get mmu package"); 854 if ((sz = OF_getproplen(mmu, "translations")) == -1) 855 panic("moea_bootstrap: can't get ofw translation count"); 856 translations = NULL; 857 for (i = 0; phys_avail[i] != 0; i += 2) { 858 if (phys_avail[i + 1] >= sz) { 859 translations = (struct ofw_map *)phys_avail[i]; 860 break; 861 } 862 } 863 if (translations == NULL) 864 panic("moea_bootstrap: no space to copy translations"); 865 bzero(translations, sz); 866 if (OF_getprop(mmu, "translations", translations, sz) == -1) 867 panic("moea_bootstrap: can't get ofw translations"); 868 CTR0(KTR_PMAP, "moea_bootstrap: translations"); 869 sz /= sizeof(*translations); 870 qsort(translations, sz, sizeof (*translations), om_cmp); 871 for (i = 0, ofw_mappings = 0; i < sz; i++) { 872 CTR3(KTR_PMAP, "translation: pa=%#x va=%#x len=%#x", 873 translations[i].om_pa, translations[i].om_va, 874 translations[i].om_len); 875 876 /* 877 * If the mapping is 1:1, let the RAM and device on-demand 878 * BAT tables take care of the translation. 879 */ 880 if (translations[i].om_va == translations[i].om_pa) 881 continue; 882 883 /* Enter the pages */ 884 for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) { 885 moea_kenter(mmup, translations[i].om_va + off, 886 translations[i].om_pa + off); 887 ofw_mappings++; 888 } 889 } 890 891 /* 892 * Calculate the last available physical address. 893 */ 894 for (i = 0; phys_avail[i + 2] != 0; i += 2) 895 ; 896 Maxmem = powerpc_btop(phys_avail[i + 1]); 897 898 moea_cpu_bootstrap(mmup,0); 899 900 pmap_bootstrapped++; 901 902 /* 903 * Set the start and end of kva. 904 */ 905 virtual_avail = VM_MIN_KERNEL_ADDRESS; 906 virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS; 907 908 /* 909 * Allocate a kernel stack with a guard page for thread0 and map it 910 * into the kernel page map. 911 */ 912 pa = moea_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE); 913 va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; 914 virtual_avail = va + KSTACK_PAGES * PAGE_SIZE; 915 CTR2(KTR_PMAP, "moea_bootstrap: kstack0 at %#x (%#x)", pa, va); 916 thread0.td_kstack = va; 917 thread0.td_kstack_pages = KSTACK_PAGES; 918 for (i = 0; i < KSTACK_PAGES; i++) { 919 moea_kenter(mmup, va, pa); 920 pa += PAGE_SIZE; 921 va += PAGE_SIZE; 922 } 923 924 /* 925 * Allocate virtual address space for the message buffer. 926 */ 927 pa = msgbuf_phys = moea_bootstrap_alloc(MSGBUF_SIZE, PAGE_SIZE); 928 msgbufp = (struct msgbuf *)virtual_avail; 929 va = virtual_avail; 930 virtual_avail += round_page(MSGBUF_SIZE); 931 while (va < virtual_avail) { 932 moea_kenter(mmup, va, pa); 933 pa += PAGE_SIZE; 934 va += PAGE_SIZE; 935 } 936 937 /* 938 * Allocate virtual address space for the dynamic percpu area. 939 */ 940 pa = moea_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); 941 dpcpu = (void *)virtual_avail; 942 va = virtual_avail; 943 virtual_avail += DPCPU_SIZE; 944 while (va < virtual_avail) { 945 moea_kenter(mmup, va, pa); 946 pa += PAGE_SIZE; 947 va += PAGE_SIZE; 948 } 949 dpcpu_init(dpcpu, 0); 950} 951 952/* 953 * Activate a user pmap. The pmap must be activated before it's address 954 * space can be accessed in any way. 955 */ 956void 957moea_activate(mmu_t mmu, struct thread *td) 958{ 959 pmap_t pm, pmr; 960 961 /* 962 * Load all the data we need up front to encourage the compiler to 963 * not issue any loads while we have interrupts disabled below. 964 */ 965 pm = &td->td_proc->p_vmspace->vm_pmap; 966 pmr = pm->pmap_phys; 967 968 pm->pm_active |= PCPU_GET(cpumask); 969 PCPU_SET(curpmap, pmr); 970} 971 972void 973moea_deactivate(mmu_t mmu, struct thread *td) 974{ 975 pmap_t pm; 976 977 pm = &td->td_proc->p_vmspace->vm_pmap; 978 pm->pm_active &= ~PCPU_GET(cpumask); 979 PCPU_SET(curpmap, NULL); 980} 981 982void 983moea_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired) 984{ 985 struct pvo_entry *pvo; 986 987 PMAP_LOCK(pm); 988 pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL); 989 990 if (pvo != NULL) { 991 if (wired) { 992 if ((pvo->pvo_vaddr & PVO_WIRED) == 0) 993 pm->pm_stats.wired_count++; 994 pvo->pvo_vaddr |= PVO_WIRED; 995 } else { 996 if ((pvo->pvo_vaddr & PVO_WIRED) != 0) 997 pm->pm_stats.wired_count--; 998 pvo->pvo_vaddr &= ~PVO_WIRED; 999 } 1000 } 1001 PMAP_UNLOCK(pm); 1002} 1003 1004void 1005moea_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst) 1006{ 1007 vm_offset_t dst; 1008 vm_offset_t src; 1009 1010 dst = VM_PAGE_TO_PHYS(mdst); 1011 src = VM_PAGE_TO_PHYS(msrc); 1012 1013 kcopy((void *)src, (void *)dst, PAGE_SIZE); 1014} 1015 1016/* 1017 * Zero a page of physical memory by temporarily mapping it into the tlb. 1018 */ 1019void 1020moea_zero_page(mmu_t mmu, vm_page_t m) 1021{ 1022 vm_offset_t pa = VM_PAGE_TO_PHYS(m); 1023 void *va = (void *)pa; 1024 1025 bzero(va, PAGE_SIZE); 1026} 1027 1028void 1029moea_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) 1030{ 1031 vm_offset_t pa = VM_PAGE_TO_PHYS(m); 1032 void *va = (void *)(pa + off); 1033 1034 bzero(va, size); 1035} 1036 1037void 1038moea_zero_page_idle(mmu_t mmu, vm_page_t m) 1039{ 1040 vm_offset_t pa = VM_PAGE_TO_PHYS(m); 1041 void *va = (void *)pa; 1042 1043 bzero(va, PAGE_SIZE); 1044} 1045 1046/* 1047 * Map the given physical page at the specified virtual address in the 1048 * target pmap with the protection requested. If specified the page 1049 * will be wired down. 1050 */ 1051void 1052moea_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1053 boolean_t wired) 1054{ 1055 1056 vm_page_lock_queues(); 1057 PMAP_LOCK(pmap); 1058 moea_enter_locked(pmap, va, m, prot, wired); 1059 vm_page_unlock_queues(); 1060 PMAP_UNLOCK(pmap); 1061} 1062 1063/* 1064 * Map the given physical page at the specified virtual address in the 1065 * target pmap with the protection requested. If specified the page 1066 * will be wired down. 1067 * 1068 * The page queues and pmap must be locked. 1069 */ 1070static void 1071moea_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1072 boolean_t wired) 1073{ 1074 struct pvo_head *pvo_head; 1075 uma_zone_t zone; 1076 vm_page_t pg; 1077 u_int pte_lo, pvo_flags, was_exec; 1078 int error; 1079 1080 if (!moea_initialized) { 1081 pvo_head = &moea_pvo_kunmanaged; 1082 zone = moea_upvo_zone; 1083 pvo_flags = 0; 1084 pg = NULL; 1085 was_exec = PTE_EXEC; 1086 } else { 1087 pvo_head = vm_page_to_pvoh(m); 1088 pg = m; 1089 zone = moea_mpvo_zone; 1090 pvo_flags = PVO_MANAGED; 1091 was_exec = 0; 1092 } 1093 if (pmap_bootstrapped) 1094 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1095 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1096 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || 1097 (m->oflags & VPO_BUSY) != 0 || VM_OBJECT_LOCKED(m->object), 1098 ("moea_enter_locked: page %p is not busy", m)); 1099 1100 /* XXX change the pvo head for fake pages */ 1101 if ((m->flags & PG_FICTITIOUS) == PG_FICTITIOUS) { 1102 pvo_flags &= ~PVO_MANAGED; 1103 pvo_head = &moea_pvo_kunmanaged; 1104 zone = moea_upvo_zone; 1105 } 1106 1107 /* 1108 * If this is a managed page, and it's the first reference to the page, 1109 * clear the execness of the page. Otherwise fetch the execness. 1110 */ 1111 if ((pg != NULL) && ((m->flags & PG_FICTITIOUS) == 0)) { 1112 if (LIST_EMPTY(pvo_head)) { 1113 moea_attr_clear(pg, PTE_EXEC); 1114 } else { 1115 was_exec = moea_attr_fetch(pg) & PTE_EXEC; 1116 } 1117 } 1118 1119 pte_lo = moea_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m)); 1120 1121 if (prot & VM_PROT_WRITE) { 1122 pte_lo |= PTE_BW; 1123 if (pmap_bootstrapped && 1124 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) 1125 vm_page_flag_set(m, PG_WRITEABLE); 1126 } else 1127 pte_lo |= PTE_BR; 1128 1129 if (prot & VM_PROT_EXECUTE) 1130 pvo_flags |= PVO_EXECUTABLE; 1131 1132 if (wired) 1133 pvo_flags |= PVO_WIRED; 1134 1135 if ((m->flags & PG_FICTITIOUS) != 0) 1136 pvo_flags |= PVO_FAKE; 1137 1138 error = moea_pvo_enter(pmap, zone, pvo_head, va, VM_PAGE_TO_PHYS(m), 1139 pte_lo, pvo_flags); 1140 1141 /* 1142 * Flush the real page from the instruction cache if this page is 1143 * mapped executable and cacheable and was not previously mapped (or 1144 * was not mapped executable). 1145 */ 1146 if (error == 0 && (pvo_flags & PVO_EXECUTABLE) && 1147 (pte_lo & PTE_I) == 0 && was_exec == 0) { 1148 /* 1149 * Flush the real memory from the cache. 1150 */ 1151 moea_syncicache(VM_PAGE_TO_PHYS(m), PAGE_SIZE); 1152 if (pg != NULL) 1153 moea_attr_save(pg, PTE_EXEC); 1154 } 1155 1156 /* XXX syncicache always until problems are sorted */ 1157 moea_syncicache(VM_PAGE_TO_PHYS(m), PAGE_SIZE); 1158} 1159 1160/* 1161 * Maps a sequence of resident pages belonging to the same object. 1162 * The sequence begins with the given page m_start. This page is 1163 * mapped at the given virtual address start. Each subsequent page is 1164 * mapped at a virtual address that is offset from start by the same 1165 * amount as the page is offset from m_start within the object. The 1166 * last page in the sequence is the page with the largest offset from 1167 * m_start that can be mapped at a virtual address less than the given 1168 * virtual address end. Not every virtual page between start and end 1169 * is mapped; only those for which a resident page exists with the 1170 * corresponding offset from m_start are mapped. 1171 */ 1172void 1173moea_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end, 1174 vm_page_t m_start, vm_prot_t prot) 1175{ 1176 vm_page_t m; 1177 vm_pindex_t diff, psize; 1178 1179 psize = atop(end - start); 1180 m = m_start; 1181 vm_page_lock_queues(); 1182 PMAP_LOCK(pm); 1183 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 1184 moea_enter_locked(pm, start + ptoa(diff), m, prot & 1185 (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); 1186 m = TAILQ_NEXT(m, listq); 1187 } 1188 vm_page_unlock_queues(); 1189 PMAP_UNLOCK(pm); 1190} 1191 1192void 1193moea_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m, 1194 vm_prot_t prot) 1195{ 1196 1197 vm_page_lock_queues(); 1198 PMAP_LOCK(pm); 1199 moea_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), 1200 FALSE); 1201 vm_page_unlock_queues(); 1202 PMAP_UNLOCK(pm); 1203} 1204 1205vm_paddr_t 1206moea_extract(mmu_t mmu, pmap_t pm, vm_offset_t va) 1207{ 1208 struct pvo_entry *pvo; 1209 vm_paddr_t pa; 1210 1211 PMAP_LOCK(pm); 1212 pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL); 1213 if (pvo == NULL) 1214 pa = 0; 1215 else 1216 pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) | (va & ADDR_POFF); 1217 PMAP_UNLOCK(pm); 1218 return (pa); 1219} 1220 1221/* 1222 * Atomically extract and hold the physical page with the given 1223 * pmap and virtual address pair if that mapping permits the given 1224 * protection. 1225 */ 1226vm_page_t 1227moea_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1228{ 1229 struct pvo_entry *pvo; 1230 vm_page_t m; 1231 vm_paddr_t pa; 1232 1233 m = NULL; 1234 pa = 0; 1235 PMAP_LOCK(pmap); 1236retry: 1237 pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); 1238 if (pvo != NULL && (pvo->pvo_pte.pte.pte_hi & PTE_VALID) && 1239 ((pvo->pvo_pte.pte.pte_lo & PTE_PP) == PTE_RW || 1240 (prot & VM_PROT_WRITE) == 0)) { 1241 if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pte.pte_lo & PTE_RPGN, &pa)) 1242 goto retry; 1243 m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN); 1244 vm_page_hold(m); 1245 } 1246 PA_UNLOCK_COND(pa); 1247 PMAP_UNLOCK(pmap); 1248 return (m); 1249} 1250 1251void 1252moea_init(mmu_t mmu) 1253{ 1254 1255 moea_upvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry), 1256 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 1257 UMA_ZONE_VM | UMA_ZONE_NOFREE); 1258 moea_mpvo_zone = uma_zcreate("MPVO entry", sizeof(struct pvo_entry), 1259 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 1260 UMA_ZONE_VM | UMA_ZONE_NOFREE); 1261 moea_initialized = TRUE; 1262} 1263 1264boolean_t 1265moea_is_referenced(mmu_t mmu, vm_page_t m) 1266{ 1267 1268 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1269 ("moea_is_referenced: page %p is not managed", m)); 1270 return (moea_query_bit(m, PTE_REF)); 1271} 1272 1273boolean_t 1274moea_is_modified(mmu_t mmu, vm_page_t m) 1275{ 1276 1277 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1278 ("moea_is_modified: page %p is not managed", m)); 1279 1280 /* 1281 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be 1282 * concurrently set while the object is locked. Thus, if PG_WRITEABLE 1283 * is clear, no PTEs can have PTE_CHG set. 1284 */ 1285 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1286 if ((m->oflags & VPO_BUSY) == 0 && 1287 (m->flags & PG_WRITEABLE) == 0) 1288 return (FALSE); 1289 return (moea_query_bit(m, PTE_CHG)); 1290} 1291 1292boolean_t 1293moea_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1294{ 1295 struct pvo_entry *pvo; 1296 boolean_t rv; 1297 1298 PMAP_LOCK(pmap); 1299 pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); 1300 rv = pvo == NULL || (pvo->pvo_pte.pte.pte_hi & PTE_VALID) == 0; 1301 PMAP_UNLOCK(pmap); 1302 return (rv); 1303} 1304 1305void 1306moea_clear_reference(mmu_t mmu, vm_page_t m) 1307{ 1308 1309 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1310 ("moea_clear_reference: page %p is not managed", m)); 1311 moea_clear_bit(m, PTE_REF); 1312} 1313 1314void 1315moea_clear_modify(mmu_t mmu, vm_page_t m) 1316{ 1317 1318 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1319 ("moea_clear_modify: page %p is not managed", m)); 1320 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1321 KASSERT((m->oflags & VPO_BUSY) == 0, 1322 ("moea_clear_modify: page %p is busy", m)); 1323 1324 /* 1325 * If the page is not PG_WRITEABLE, then no PTEs can have PTE_CHG 1326 * set. If the object containing the page is locked and the page is 1327 * not VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. 1328 */ 1329 if ((m->flags & PG_WRITEABLE) == 0) 1330 return; 1331 moea_clear_bit(m, PTE_CHG); 1332} 1333 1334/* 1335 * Clear the write and modified bits in each of the given page's mappings. 1336 */ 1337void 1338moea_remove_write(mmu_t mmu, vm_page_t m) 1339{ 1340 struct pvo_entry *pvo; 1341 struct pte *pt; 1342 pmap_t pmap; 1343 u_int lo; 1344 1345 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1346 ("moea_remove_write: page %p is not managed", m)); 1347 1348 /* 1349 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by 1350 * another thread while the object is locked. Thus, if PG_WRITEABLE 1351 * is clear, no page table entries need updating. 1352 */ 1353 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1354 if ((m->oflags & VPO_BUSY) == 0 && 1355 (m->flags & PG_WRITEABLE) == 0) 1356 return; 1357 vm_page_lock_queues(); 1358 lo = moea_attr_fetch(m); 1359 powerpc_sync(); 1360 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 1361 pmap = pvo->pvo_pmap; 1362 PMAP_LOCK(pmap); 1363 if ((pvo->pvo_pte.pte.pte_lo & PTE_PP) != PTE_BR) { 1364 pt = moea_pvo_to_pte(pvo, -1); 1365 pvo->pvo_pte.pte.pte_lo &= ~PTE_PP; 1366 pvo->pvo_pte.pte.pte_lo |= PTE_BR; 1367 if (pt != NULL) { 1368 moea_pte_synch(pt, &pvo->pvo_pte.pte); 1369 lo |= pvo->pvo_pte.pte.pte_lo; 1370 pvo->pvo_pte.pte.pte_lo &= ~PTE_CHG; 1371 moea_pte_change(pt, &pvo->pvo_pte.pte, 1372 pvo->pvo_vaddr); 1373 mtx_unlock(&moea_table_mutex); 1374 } 1375 } 1376 PMAP_UNLOCK(pmap); 1377 } 1378 if ((lo & PTE_CHG) != 0) { 1379 moea_attr_clear(m, PTE_CHG); 1380 vm_page_dirty(m); 1381 } 1382 vm_page_flag_clear(m, PG_WRITEABLE); 1383 vm_page_unlock_queues(); 1384} 1385 1386/* 1387 * moea_ts_referenced: 1388 * 1389 * Return a count of reference bits for a page, clearing those bits. 1390 * It is not necessary for every reference bit to be cleared, but it 1391 * is necessary that 0 only be returned when there are truly no 1392 * reference bits set. 1393 * 1394 * XXX: The exact number of bits to check and clear is a matter that 1395 * should be tested and standardized at some point in the future for 1396 * optimal aging of shared pages. 1397 */ 1398boolean_t 1399moea_ts_referenced(mmu_t mmu, vm_page_t m) 1400{ 1401 1402 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1403 ("moea_ts_referenced: page %p is not managed", m)); 1404 return (moea_clear_bit(m, PTE_REF)); 1405} 1406 1407/* 1408 * Modify the WIMG settings of all mappings for a page. 1409 */ 1410void 1411moea_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) 1412{ 1413 struct pvo_entry *pvo; 1414 struct pvo_head *pvo_head; 1415 struct pte *pt; 1416 pmap_t pmap; 1417 u_int lo; 1418 1419 if (m->flags & PG_FICTITIOUS) { 1420 m->md.mdpg_cache_attrs = ma; 1421 return; 1422 } 1423 1424 vm_page_lock_queues(); 1425 pvo_head = vm_page_to_pvoh(m); 1426 lo = moea_calc_wimg(VM_PAGE_TO_PHYS(m), ma); 1427 1428 LIST_FOREACH(pvo, pvo_head, pvo_vlink) { 1429 pmap = pvo->pvo_pmap; 1430 PMAP_LOCK(pmap); 1431 pt = moea_pvo_to_pte(pvo, -1); 1432 pvo->pvo_pte.pte.pte_lo &= ~PTE_WIMG; 1433 pvo->pvo_pte.pte.pte_lo |= lo; 1434 if (pt != NULL) { 1435 moea_pte_change(pt, &pvo->pvo_pte.pte, 1436 pvo->pvo_vaddr); 1437 if (pvo->pvo_pmap == kernel_pmap) 1438 isync(); 1439 } 1440 mtx_unlock(&moea_table_mutex); 1441 PMAP_UNLOCK(pmap); 1442 } 1443 m->md.mdpg_cache_attrs = ma; 1444 vm_page_unlock_queues(); 1445} 1446 1447/* 1448 * Map a wired page into kernel virtual address space. 1449 */ 1450void 1451moea_kenter(mmu_t mmu, vm_offset_t va, vm_offset_t pa) 1452{ 1453 1454 moea_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); 1455} 1456 1457void 1458moea_kenter_attr(mmu_t mmu, vm_offset_t va, vm_offset_t pa, vm_memattr_t ma) 1459{ 1460 u_int pte_lo; 1461 int error; 1462 1463#if 0 1464 if (va < VM_MIN_KERNEL_ADDRESS) 1465 panic("moea_kenter: attempt to enter non-kernel address %#x", 1466 va); 1467#endif 1468 1469 pte_lo = moea_calc_wimg(pa, ma); 1470 1471 PMAP_LOCK(kernel_pmap); 1472 error = moea_pvo_enter(kernel_pmap, moea_upvo_zone, 1473 &moea_pvo_kunmanaged, va, pa, pte_lo, PVO_WIRED); 1474 1475 if (error != 0 && error != ENOENT) 1476 panic("moea_kenter: failed to enter va %#x pa %#x: %d", va, 1477 pa, error); 1478 1479 /* 1480 * Flush the real memory from the instruction cache. 1481 */ 1482 if ((pte_lo & (PTE_I | PTE_G)) == 0) { 1483 moea_syncicache(pa, PAGE_SIZE); 1484 } 1485 PMAP_UNLOCK(kernel_pmap); 1486} 1487 1488/* 1489 * Extract the physical page address associated with the given kernel virtual 1490 * address. 1491 */ 1492vm_offset_t 1493moea_kextract(mmu_t mmu, vm_offset_t va) 1494{ 1495 struct pvo_entry *pvo; 1496 vm_paddr_t pa; 1497 1498 /* 1499 * Allow direct mappings on 32-bit OEA 1500 */ 1501 if (va < VM_MIN_KERNEL_ADDRESS) { 1502 return (va); 1503 } 1504 1505 PMAP_LOCK(kernel_pmap); 1506 pvo = moea_pvo_find_va(kernel_pmap, va & ~ADDR_POFF, NULL); 1507 KASSERT(pvo != NULL, ("moea_kextract: no addr found")); 1508 pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) | (va & ADDR_POFF); 1509 PMAP_UNLOCK(kernel_pmap); 1510 return (pa); 1511} 1512 1513/* 1514 * Remove a wired page from kernel virtual address space. 1515 */ 1516void 1517moea_kremove(mmu_t mmu, vm_offset_t va) 1518{ 1519 1520 moea_remove(mmu, kernel_pmap, va, va + PAGE_SIZE); 1521} 1522 1523/* 1524 * Map a range of physical addresses into kernel virtual address space. 1525 * 1526 * The value passed in *virt is a suggested virtual address for the mapping. 1527 * Architectures which can support a direct-mapped physical to virtual region 1528 * can return the appropriate address within that region, leaving '*virt' 1529 * unchanged. We cannot and therefore do not; *virt is updated with the 1530 * first usable address after the mapped region. 1531 */ 1532vm_offset_t 1533moea_map(mmu_t mmu, vm_offset_t *virt, vm_offset_t pa_start, 1534 vm_offset_t pa_end, int prot) 1535{ 1536 vm_offset_t sva, va; 1537 1538 sva = *virt; 1539 va = sva; 1540 for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE) 1541 moea_kenter(mmu, va, pa_start); 1542 *virt = va; 1543 return (sva); 1544} 1545 1546/* 1547 * Returns true if the pmap's pv is one of the first 1548 * 16 pvs linked to from this page. This count may 1549 * be changed upwards or downwards in the future; it 1550 * is only necessary that true be returned for a small 1551 * subset of pmaps for proper page aging. 1552 */ 1553boolean_t 1554moea_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) 1555{ 1556 int loops; 1557 struct pvo_entry *pvo; 1558 boolean_t rv; 1559 1560 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1561 ("moea_page_exists_quick: page %p is not managed", m)); 1562 loops = 0; 1563 rv = FALSE; 1564 vm_page_lock_queues(); 1565 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 1566 if (pvo->pvo_pmap == pmap) { 1567 rv = TRUE; 1568 break; 1569 } 1570 if (++loops >= 16) 1571 break; 1572 } 1573 vm_page_unlock_queues(); 1574 return (rv); 1575} 1576 1577/* 1578 * Return the number of managed mappings to the given physical page 1579 * that are wired. 1580 */ 1581int 1582moea_page_wired_mappings(mmu_t mmu, vm_page_t m) 1583{ 1584 struct pvo_entry *pvo; 1585 int count; 1586 1587 count = 0; 1588 if ((m->flags & PG_FICTITIOUS) != 0) 1589 return (count); 1590 vm_page_lock_queues(); 1591 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) 1592 if ((pvo->pvo_vaddr & PVO_WIRED) != 0) 1593 count++; 1594 vm_page_unlock_queues(); 1595 return (count); 1596} 1597 1598static u_int moea_vsidcontext; 1599 1600void 1601moea_pinit(mmu_t mmu, pmap_t pmap) 1602{ 1603 int i, mask; 1604 u_int entropy; 1605 1606 KASSERT((int)pmap < VM_MIN_KERNEL_ADDRESS, ("moea_pinit: virt pmap")); 1607 PMAP_LOCK_INIT(pmap); 1608 1609 entropy = 0; 1610 __asm __volatile("mftb %0" : "=r"(entropy)); 1611 1612 if ((pmap->pmap_phys = (pmap_t)moea_kextract(mmu, (vm_offset_t)pmap)) 1613 == NULL) { 1614 pmap->pmap_phys = pmap; 1615 } 1616 1617 1618 mtx_lock(&moea_vsid_mutex); 1619 /* 1620 * Allocate some segment registers for this pmap. 1621 */ 1622 for (i = 0; i < NPMAPS; i += VSID_NBPW) { 1623 u_int hash, n; 1624 1625 /* 1626 * Create a new value by mutiplying by a prime and adding in 1627 * entropy from the timebase register. This is to make the 1628 * VSID more random so that the PT hash function collides 1629 * less often. (Note that the prime casues gcc to do shifts 1630 * instead of a multiply.) 1631 */ 1632 moea_vsidcontext = (moea_vsidcontext * 0x1105) + entropy; 1633 hash = moea_vsidcontext & (NPMAPS - 1); 1634 if (hash == 0) /* 0 is special, avoid it */ 1635 continue; 1636 n = hash >> 5; 1637 mask = 1 << (hash & (VSID_NBPW - 1)); 1638 hash = (moea_vsidcontext & 0xfffff); 1639 if (moea_vsid_bitmap[n] & mask) { /* collision? */ 1640 /* anything free in this bucket? */ 1641 if (moea_vsid_bitmap[n] == 0xffffffff) { 1642 entropy = (moea_vsidcontext >> 20); 1643 continue; 1644 } 1645 i = ffs(~moea_vsid_bitmap[n]) - 1; 1646 mask = 1 << i; 1647 hash &= 0xfffff & ~(VSID_NBPW - 1); 1648 hash |= i; 1649 } 1650 moea_vsid_bitmap[n] |= mask; 1651 for (i = 0; i < 16; i++) 1652 pmap->pm_sr[i] = VSID_MAKE(i, hash); 1653 mtx_unlock(&moea_vsid_mutex); 1654 return; 1655 } 1656 1657 mtx_unlock(&moea_vsid_mutex); 1658 panic("moea_pinit: out of segments"); 1659} 1660 1661/* 1662 * Initialize the pmap associated with process 0. 1663 */ 1664void 1665moea_pinit0(mmu_t mmu, pmap_t pm) 1666{ 1667 1668 moea_pinit(mmu, pm); 1669 bzero(&pm->pm_stats, sizeof(pm->pm_stats)); 1670} 1671 1672/* 1673 * Set the physical protection on the specified range of this map as requested. 1674 */ 1675void 1676moea_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva, 1677 vm_prot_t prot) 1678{ 1679 struct pvo_entry *pvo; 1680 struct pte *pt; 1681 int pteidx; 1682 1683 KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap, 1684 ("moea_protect: non current pmap")); 1685 1686 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1687 moea_remove(mmu, pm, sva, eva); 1688 return; 1689 } 1690 1691 vm_page_lock_queues(); 1692 PMAP_LOCK(pm); 1693 for (; sva < eva; sva += PAGE_SIZE) { 1694 pvo = moea_pvo_find_va(pm, sva, &pteidx); 1695 if (pvo == NULL) 1696 continue; 1697 1698 if ((prot & VM_PROT_EXECUTE) == 0) 1699 pvo->pvo_vaddr &= ~PVO_EXECUTABLE; 1700 1701 /* 1702 * Grab the PTE pointer before we diddle with the cached PTE 1703 * copy. 1704 */ 1705 pt = moea_pvo_to_pte(pvo, pteidx); 1706 /* 1707 * Change the protection of the page. 1708 */ 1709 pvo->pvo_pte.pte.pte_lo &= ~PTE_PP; 1710 pvo->pvo_pte.pte.pte_lo |= PTE_BR; 1711 1712 /* 1713 * If the PVO is in the page table, update that pte as well. 1714 */ 1715 if (pt != NULL) { 1716 moea_pte_change(pt, &pvo->pvo_pte.pte, pvo->pvo_vaddr); 1717 mtx_unlock(&moea_table_mutex); 1718 } 1719 } 1720 vm_page_unlock_queues(); 1721 PMAP_UNLOCK(pm); 1722} 1723 1724/* 1725 * Map a list of wired pages into kernel virtual address space. This is 1726 * intended for temporary mappings which do not need page modification or 1727 * references recorded. Existing mappings in the region are overwritten. 1728 */ 1729void 1730moea_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count) 1731{ 1732 vm_offset_t va; 1733 1734 va = sva; 1735 while (count-- > 0) { 1736 moea_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); 1737 va += PAGE_SIZE; 1738 m++; 1739 } 1740} 1741 1742/* 1743 * Remove page mappings from kernel virtual address space. Intended for 1744 * temporary mappings entered by moea_qenter. 1745 */ 1746void 1747moea_qremove(mmu_t mmu, vm_offset_t sva, int count) 1748{ 1749 vm_offset_t va; 1750 1751 va = sva; 1752 while (count-- > 0) { 1753 moea_kremove(mmu, va); 1754 va += PAGE_SIZE; 1755 } 1756} 1757 1758void 1759moea_release(mmu_t mmu, pmap_t pmap) 1760{ 1761 int idx, mask; 1762 1763 /* 1764 * Free segment register's VSID 1765 */ 1766 if (pmap->pm_sr[0] == 0) 1767 panic("moea_release"); 1768 1769 mtx_lock(&moea_vsid_mutex); 1770 idx = VSID_TO_HASH(pmap->pm_sr[0]) & (NPMAPS-1); 1771 mask = 1 << (idx % VSID_NBPW); 1772 idx /= VSID_NBPW; 1773 moea_vsid_bitmap[idx] &= ~mask; 1774 mtx_unlock(&moea_vsid_mutex); 1775 PMAP_LOCK_DESTROY(pmap); 1776} 1777 1778/* 1779 * Remove the given range of addresses from the specified map. 1780 */ 1781void 1782moea_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) 1783{ 1784 struct pvo_entry *pvo; 1785 int pteidx; 1786 1787 vm_page_lock_queues(); 1788 PMAP_LOCK(pm); 1789 for (; sva < eva; sva += PAGE_SIZE) { 1790 pvo = moea_pvo_find_va(pm, sva, &pteidx); 1791 if (pvo != NULL) { 1792 moea_pvo_remove(pvo, pteidx); 1793 } 1794 } 1795 PMAP_UNLOCK(pm); 1796 vm_page_unlock_queues(); 1797} 1798 1799/* 1800 * Remove physical page from all pmaps in which it resides. moea_pvo_remove() 1801 * will reflect changes in pte's back to the vm_page. 1802 */ 1803void 1804moea_remove_all(mmu_t mmu, vm_page_t m) 1805{ 1806 struct pvo_head *pvo_head; 1807 struct pvo_entry *pvo, *next_pvo; 1808 pmap_t pmap; 1809 1810 vm_page_lock_queues(); 1811 pvo_head = vm_page_to_pvoh(m); 1812 for (pvo = LIST_FIRST(pvo_head); pvo != NULL; pvo = next_pvo) { 1813 next_pvo = LIST_NEXT(pvo, pvo_vlink); 1814 1815 MOEA_PVO_CHECK(pvo); /* sanity check */ 1816 pmap = pvo->pvo_pmap; 1817 PMAP_LOCK(pmap); 1818 moea_pvo_remove(pvo, -1); 1819 PMAP_UNLOCK(pmap); 1820 } 1821 if ((m->flags & PG_WRITEABLE) && moea_is_modified(mmu, m)) { 1822 moea_attr_clear(m, PTE_CHG); 1823 vm_page_dirty(m); 1824 } 1825 vm_page_flag_clear(m, PG_WRITEABLE); 1826 vm_page_unlock_queues(); 1827} 1828 1829/* 1830 * Allocate a physical page of memory directly from the phys_avail map. 1831 * Can only be called from moea_bootstrap before avail start and end are 1832 * calculated. 1833 */ 1834static vm_offset_t 1835moea_bootstrap_alloc(vm_size_t size, u_int align) 1836{ 1837 vm_offset_t s, e; 1838 int i, j; 1839 1840 size = round_page(size); 1841 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 1842 if (align != 0) 1843 s = (phys_avail[i] + align - 1) & ~(align - 1); 1844 else 1845 s = phys_avail[i]; 1846 e = s + size; 1847 1848 if (s < phys_avail[i] || e > phys_avail[i + 1]) 1849 continue; 1850 1851 if (s == phys_avail[i]) { 1852 phys_avail[i] += size; 1853 } else if (e == phys_avail[i + 1]) { 1854 phys_avail[i + 1] -= size; 1855 } else { 1856 for (j = phys_avail_count * 2; j > i; j -= 2) { 1857 phys_avail[j] = phys_avail[j - 2]; 1858 phys_avail[j + 1] = phys_avail[j - 1]; 1859 } 1860 1861 phys_avail[i + 3] = phys_avail[i + 1]; 1862 phys_avail[i + 1] = s; 1863 phys_avail[i + 2] = e; 1864 phys_avail_count++; 1865 } 1866 1867 return (s); 1868 } 1869 panic("moea_bootstrap_alloc: could not allocate memory"); 1870} 1871 1872static void 1873moea_syncicache(vm_offset_t pa, vm_size_t len) 1874{ 1875 __syncicache((void *)pa, len); 1876} 1877 1878static int 1879moea_pvo_enter(pmap_t pm, uma_zone_t zone, struct pvo_head *pvo_head, 1880 vm_offset_t va, vm_offset_t pa, u_int pte_lo, int flags) 1881{ 1882 struct pvo_entry *pvo; 1883 u_int sr; 1884 int first; 1885 u_int ptegidx; 1886 int i; 1887 int bootstrap; 1888 1889 moea_pvo_enter_calls++; 1890 first = 0; 1891 bootstrap = 0; 1892 1893 /* 1894 * Compute the PTE Group index. 1895 */ 1896 va &= ~ADDR_POFF; 1897 sr = va_to_sr(pm->pm_sr, va); 1898 ptegidx = va_to_pteg(sr, va); 1899 1900 /* 1901 * Remove any existing mapping for this page. Reuse the pvo entry if 1902 * there is a mapping. 1903 */ 1904 mtx_lock(&moea_table_mutex); 1905 LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) { 1906 if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) { 1907 if ((pvo->pvo_pte.pte.pte_lo & PTE_RPGN) == pa && 1908 (pvo->pvo_pte.pte.pte_lo & PTE_PP) == 1909 (pte_lo & PTE_PP)) { 1910 mtx_unlock(&moea_table_mutex); 1911 return (0); 1912 } 1913 moea_pvo_remove(pvo, -1); 1914 break; 1915 } 1916 } 1917 1918 /* 1919 * If we aren't overwriting a mapping, try to allocate. 1920 */ 1921 if (moea_initialized) { 1922 pvo = uma_zalloc(zone, M_NOWAIT); 1923 } else { 1924 if (moea_bpvo_pool_index >= BPVO_POOL_SIZE) { 1925 panic("moea_enter: bpvo pool exhausted, %d, %d, %d", 1926 moea_bpvo_pool_index, BPVO_POOL_SIZE, 1927 BPVO_POOL_SIZE * sizeof(struct pvo_entry)); 1928 } 1929 pvo = &moea_bpvo_pool[moea_bpvo_pool_index]; 1930 moea_bpvo_pool_index++; 1931 bootstrap = 1; 1932 } 1933 1934 if (pvo == NULL) { 1935 mtx_unlock(&moea_table_mutex); 1936 return (ENOMEM); 1937 } 1938 1939 moea_pvo_entries++; 1940 pvo->pvo_vaddr = va; 1941 pvo->pvo_pmap = pm; 1942 LIST_INSERT_HEAD(&moea_pvo_table[ptegidx], pvo, pvo_olink); 1943 pvo->pvo_vaddr &= ~ADDR_POFF; 1944 if (flags & VM_PROT_EXECUTE) 1945 pvo->pvo_vaddr |= PVO_EXECUTABLE; 1946 if (flags & PVO_WIRED) 1947 pvo->pvo_vaddr |= PVO_WIRED; 1948 if (pvo_head != &moea_pvo_kunmanaged) 1949 pvo->pvo_vaddr |= PVO_MANAGED; 1950 if (bootstrap) 1951 pvo->pvo_vaddr |= PVO_BOOTSTRAP; 1952 if (flags & PVO_FAKE) 1953 pvo->pvo_vaddr |= PVO_FAKE; 1954 1955 moea_pte_create(&pvo->pvo_pte.pte, sr, va, pa | pte_lo); 1956 1957 /* 1958 * Remember if the list was empty and therefore will be the first 1959 * item. 1960 */ 1961 if (LIST_FIRST(pvo_head) == NULL) 1962 first = 1; 1963 LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink); 1964 1965 if (pvo->pvo_pte.pte.pte_lo & PVO_WIRED) 1966 pm->pm_stats.wired_count++; 1967 pm->pm_stats.resident_count++; 1968 1969 /* 1970 * We hope this succeeds but it isn't required. 1971 */ 1972 i = moea_pte_insert(ptegidx, &pvo->pvo_pte.pte); 1973 if (i >= 0) { 1974 PVO_PTEGIDX_SET(pvo, i); 1975 } else { 1976 panic("moea_pvo_enter: overflow"); 1977 moea_pte_overflow++; 1978 } 1979 mtx_unlock(&moea_table_mutex); 1980 1981 return (first ? ENOENT : 0); 1982} 1983 1984static void 1985moea_pvo_remove(struct pvo_entry *pvo, int pteidx) 1986{ 1987 struct pte *pt; 1988 1989 /* 1990 * If there is an active pte entry, we need to deactivate it (and 1991 * save the ref & cfg bits). 1992 */ 1993 pt = moea_pvo_to_pte(pvo, pteidx); 1994 if (pt != NULL) { 1995 moea_pte_unset(pt, &pvo->pvo_pte.pte, pvo->pvo_vaddr); 1996 mtx_unlock(&moea_table_mutex); 1997 PVO_PTEGIDX_CLR(pvo); 1998 } else { 1999 moea_pte_overflow--; 2000 } 2001 2002 /* 2003 * Update our statistics. 2004 */ 2005 pvo->pvo_pmap->pm_stats.resident_count--; 2006 if (pvo->pvo_pte.pte.pte_lo & PVO_WIRED) 2007 pvo->pvo_pmap->pm_stats.wired_count--; 2008 2009 /* 2010 * Save the REF/CHG bits into their cache if the page is managed. 2011 */ 2012 if ((pvo->pvo_vaddr & (PVO_MANAGED|PVO_FAKE)) == PVO_MANAGED) { 2013 struct vm_page *pg; 2014 2015 pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN); 2016 if (pg != NULL) { 2017 moea_attr_save(pg, pvo->pvo_pte.pte.pte_lo & 2018 (PTE_REF | PTE_CHG)); 2019 } 2020 } 2021 2022 /* 2023 * Remove this PVO from the PV list. 2024 */ 2025 LIST_REMOVE(pvo, pvo_vlink); 2026 2027 /* 2028 * Remove this from the overflow list and return it to the pool 2029 * if we aren't going to reuse it. 2030 */ 2031 LIST_REMOVE(pvo, pvo_olink); 2032 if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP)) 2033 uma_zfree(pvo->pvo_vaddr & PVO_MANAGED ? moea_mpvo_zone : 2034 moea_upvo_zone, pvo); 2035 moea_pvo_entries--; 2036 moea_pvo_remove_calls++; 2037} 2038 2039static __inline int 2040moea_pvo_pte_index(const struct pvo_entry *pvo, int ptegidx) 2041{ 2042 int pteidx; 2043 2044 /* 2045 * We can find the actual pte entry without searching by grabbing 2046 * the PTEG index from 3 unused bits in pte_lo[11:9] and by 2047 * noticing the HID bit. 2048 */ 2049 pteidx = ptegidx * 8 + PVO_PTEGIDX_GET(pvo); 2050 if (pvo->pvo_pte.pte.pte_hi & PTE_HID) 2051 pteidx ^= moea_pteg_mask * 8; 2052 2053 return (pteidx); 2054} 2055 2056static struct pvo_entry * 2057moea_pvo_find_va(pmap_t pm, vm_offset_t va, int *pteidx_p) 2058{ 2059 struct pvo_entry *pvo; 2060 int ptegidx; 2061 u_int sr; 2062 2063 va &= ~ADDR_POFF; 2064 sr = va_to_sr(pm->pm_sr, va); 2065 ptegidx = va_to_pteg(sr, va); 2066 2067 mtx_lock(&moea_table_mutex); 2068 LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) { 2069 if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) { 2070 if (pteidx_p) 2071 *pteidx_p = moea_pvo_pte_index(pvo, ptegidx); 2072 break; 2073 } 2074 } 2075 mtx_unlock(&moea_table_mutex); 2076 2077 return (pvo); 2078} 2079 2080static struct pte * 2081moea_pvo_to_pte(const struct pvo_entry *pvo, int pteidx) 2082{ 2083 struct pte *pt; 2084 2085 /* 2086 * If we haven't been supplied the ptegidx, calculate it. 2087 */ 2088 if (pteidx == -1) { 2089 int ptegidx; 2090 u_int sr; 2091 2092 sr = va_to_sr(pvo->pvo_pmap->pm_sr, pvo->pvo_vaddr); 2093 ptegidx = va_to_pteg(sr, pvo->pvo_vaddr); 2094 pteidx = moea_pvo_pte_index(pvo, ptegidx); 2095 } 2096 2097 pt = &moea_pteg_table[pteidx >> 3].pt[pteidx & 7]; 2098 mtx_lock(&moea_table_mutex); 2099 2100 if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) && !PVO_PTEGIDX_ISSET(pvo)) { 2101 panic("moea_pvo_to_pte: pvo %p has valid pte in pvo but no " 2102 "valid pte index", pvo); 2103 } 2104 2105 if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) == 0 && PVO_PTEGIDX_ISSET(pvo)) { 2106 panic("moea_pvo_to_pte: pvo %p has valid pte index in pvo " 2107 "pvo but no valid pte", pvo); 2108 } 2109 2110 if ((pt->pte_hi ^ (pvo->pvo_pte.pte.pte_hi & ~PTE_VALID)) == PTE_VALID) { 2111 if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) == 0) { 2112 panic("moea_pvo_to_pte: pvo %p has valid pte in " 2113 "moea_pteg_table %p but invalid in pvo", pvo, pt); 2114 } 2115 2116 if (((pt->pte_lo ^ pvo->pvo_pte.pte.pte_lo) & ~(PTE_CHG|PTE_REF)) 2117 != 0) { 2118 panic("moea_pvo_to_pte: pvo %p pte does not match " 2119 "pte %p in moea_pteg_table", pvo, pt); 2120 } 2121 2122 mtx_assert(&moea_table_mutex, MA_OWNED); 2123 return (pt); 2124 } 2125 2126 if (pvo->pvo_pte.pte.pte_hi & PTE_VALID) { 2127 panic("moea_pvo_to_pte: pvo %p has invalid pte %p in " 2128 "moea_pteg_table but valid in pvo", pvo, pt); 2129 } 2130 2131 mtx_unlock(&moea_table_mutex); 2132 return (NULL); 2133} 2134 2135/* 2136 * XXX: THIS STUFF SHOULD BE IN pte.c? 2137 */ 2138int 2139moea_pte_spill(vm_offset_t addr) 2140{ 2141 struct pvo_entry *source_pvo, *victim_pvo; 2142 struct pvo_entry *pvo; 2143 int ptegidx, i, j; 2144 u_int sr; 2145 struct pteg *pteg; 2146 struct pte *pt; 2147 2148 moea_pte_spills++; 2149 2150 sr = mfsrin(addr); 2151 ptegidx = va_to_pteg(sr, addr); 2152 2153 /* 2154 * Have to substitute some entry. Use the primary hash for this. 2155 * Use low bits of timebase as random generator. 2156 */ 2157 pteg = &moea_pteg_table[ptegidx]; 2158 mtx_lock(&moea_table_mutex); 2159 __asm __volatile("mftb %0" : "=r"(i)); 2160 i &= 7; 2161 pt = &pteg->pt[i]; 2162 2163 source_pvo = NULL; 2164 victim_pvo = NULL; 2165 LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) { 2166 /* 2167 * We need to find a pvo entry for this address. 2168 */ 2169 MOEA_PVO_CHECK(pvo); 2170 if (source_pvo == NULL && 2171 moea_pte_match(&pvo->pvo_pte.pte, sr, addr, 2172 pvo->pvo_pte.pte.pte_hi & PTE_HID)) { 2173 /* 2174 * Now found an entry to be spilled into the pteg. 2175 * The PTE is now valid, so we know it's active. 2176 */ 2177 j = moea_pte_insert(ptegidx, &pvo->pvo_pte.pte); 2178 2179 if (j >= 0) { 2180 PVO_PTEGIDX_SET(pvo, j); 2181 moea_pte_overflow--; 2182 MOEA_PVO_CHECK(pvo); 2183 mtx_unlock(&moea_table_mutex); 2184 return (1); 2185 } 2186 2187 source_pvo = pvo; 2188 2189 if (victim_pvo != NULL) 2190 break; 2191 } 2192 2193 /* 2194 * We also need the pvo entry of the victim we are replacing 2195 * so save the R & C bits of the PTE. 2196 */ 2197 if ((pt->pte_hi & PTE_HID) == 0 && victim_pvo == NULL && 2198 moea_pte_compare(pt, &pvo->pvo_pte.pte)) { 2199 victim_pvo = pvo; 2200 if (source_pvo != NULL) 2201 break; 2202 } 2203 } 2204 2205 if (source_pvo == NULL) { 2206 mtx_unlock(&moea_table_mutex); 2207 return (0); 2208 } 2209 2210 if (victim_pvo == NULL) { 2211 if ((pt->pte_hi & PTE_HID) == 0) 2212 panic("moea_pte_spill: victim p-pte (%p) has no pvo" 2213 "entry", pt); 2214 2215 /* 2216 * If this is a secondary PTE, we need to search it's primary 2217 * pvo bucket for the matching PVO. 2218 */ 2219 LIST_FOREACH(pvo, &moea_pvo_table[ptegidx ^ moea_pteg_mask], 2220 pvo_olink) { 2221 MOEA_PVO_CHECK(pvo); 2222 /* 2223 * We also need the pvo entry of the victim we are 2224 * replacing so save the R & C bits of the PTE. 2225 */ 2226 if (moea_pte_compare(pt, &pvo->pvo_pte.pte)) { 2227 victim_pvo = pvo; 2228 break; 2229 } 2230 } 2231 2232 if (victim_pvo == NULL) 2233 panic("moea_pte_spill: victim s-pte (%p) has no pvo" 2234 "entry", pt); 2235 } 2236 2237 /* 2238 * We are invalidating the TLB entry for the EA we are replacing even 2239 * though it's valid. If we don't, we lose any ref/chg bit changes 2240 * contained in the TLB entry. 2241 */ 2242 source_pvo->pvo_pte.pte.pte_hi &= ~PTE_HID; 2243 2244 moea_pte_unset(pt, &victim_pvo->pvo_pte.pte, victim_pvo->pvo_vaddr); 2245 moea_pte_set(pt, &source_pvo->pvo_pte.pte); 2246 2247 PVO_PTEGIDX_CLR(victim_pvo); 2248 PVO_PTEGIDX_SET(source_pvo, i); 2249 moea_pte_replacements++; 2250 2251 MOEA_PVO_CHECK(victim_pvo); 2252 MOEA_PVO_CHECK(source_pvo); 2253 2254 mtx_unlock(&moea_table_mutex); 2255 return (1); 2256} 2257 2258static int 2259moea_pte_insert(u_int ptegidx, struct pte *pvo_pt) 2260{ 2261 struct pte *pt; 2262 int i; 2263 2264 mtx_assert(&moea_table_mutex, MA_OWNED); 2265 2266 /* 2267 * First try primary hash. 2268 */ 2269 for (pt = moea_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) { 2270 if ((pt->pte_hi & PTE_VALID) == 0) { 2271 pvo_pt->pte_hi &= ~PTE_HID; 2272 moea_pte_set(pt, pvo_pt); 2273 return (i); 2274 } 2275 } 2276 2277 /* 2278 * Now try secondary hash. 2279 */ 2280 ptegidx ^= moea_pteg_mask; 2281 2282 for (pt = moea_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) { 2283 if ((pt->pte_hi & PTE_VALID) == 0) { 2284 pvo_pt->pte_hi |= PTE_HID; 2285 moea_pte_set(pt, pvo_pt); 2286 return (i); 2287 } 2288 } 2289 2290 panic("moea_pte_insert: overflow"); 2291 return (-1); 2292} 2293 2294static boolean_t 2295moea_query_bit(vm_page_t m, int ptebit) 2296{ 2297 struct pvo_entry *pvo; 2298 struct pte *pt; 2299 2300 if (moea_attr_fetch(m) & ptebit) 2301 return (TRUE); 2302 2303 vm_page_lock_queues(); 2304 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2305 MOEA_PVO_CHECK(pvo); /* sanity check */ 2306 2307 /* 2308 * See if we saved the bit off. If so, cache it and return 2309 * success. 2310 */ 2311 if (pvo->pvo_pte.pte.pte_lo & ptebit) { 2312 moea_attr_save(m, ptebit); 2313 MOEA_PVO_CHECK(pvo); /* sanity check */ 2314 vm_page_unlock_queues(); 2315 return (TRUE); 2316 } 2317 } 2318 2319 /* 2320 * No luck, now go through the hard part of looking at the PTEs 2321 * themselves. Sync so that any pending REF/CHG bits are flushed to 2322 * the PTEs. 2323 */ 2324 powerpc_sync(); 2325 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2326 MOEA_PVO_CHECK(pvo); /* sanity check */ 2327 2328 /* 2329 * See if this pvo has a valid PTE. if so, fetch the 2330 * REF/CHG bits from the valid PTE. If the appropriate 2331 * ptebit is set, cache it and return success. 2332 */ 2333 pt = moea_pvo_to_pte(pvo, -1); 2334 if (pt != NULL) { 2335 moea_pte_synch(pt, &pvo->pvo_pte.pte); 2336 mtx_unlock(&moea_table_mutex); 2337 if (pvo->pvo_pte.pte.pte_lo & ptebit) { 2338 moea_attr_save(m, ptebit); 2339 MOEA_PVO_CHECK(pvo); /* sanity check */ 2340 vm_page_unlock_queues(); 2341 return (TRUE); 2342 } 2343 } 2344 } 2345 2346 vm_page_unlock_queues(); 2347 return (FALSE); 2348} 2349 2350static u_int 2351moea_clear_bit(vm_page_t m, int ptebit) 2352{ 2353 u_int count; 2354 struct pvo_entry *pvo; 2355 struct pte *pt; 2356 2357 vm_page_lock_queues(); 2358 2359 /* 2360 * Clear the cached value. 2361 */ 2362 moea_attr_clear(m, ptebit); 2363 2364 /* 2365 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so 2366 * we can reset the right ones). note that since the pvo entries and 2367 * list heads are accessed via BAT0 and are never placed in the page 2368 * table, we don't have to worry about further accesses setting the 2369 * REF/CHG bits. 2370 */ 2371 powerpc_sync(); 2372 2373 /* 2374 * For each pvo entry, clear the pvo's ptebit. If this pvo has a 2375 * valid pte clear the ptebit from the valid pte. 2376 */ 2377 count = 0; 2378 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2379 MOEA_PVO_CHECK(pvo); /* sanity check */ 2380 pt = moea_pvo_to_pte(pvo, -1); 2381 if (pt != NULL) { 2382 moea_pte_synch(pt, &pvo->pvo_pte.pte); 2383 if (pvo->pvo_pte.pte.pte_lo & ptebit) { 2384 count++; 2385 moea_pte_clear(pt, PVO_VADDR(pvo), ptebit); 2386 } 2387 mtx_unlock(&moea_table_mutex); 2388 } 2389 pvo->pvo_pte.pte.pte_lo &= ~ptebit; 2390 MOEA_PVO_CHECK(pvo); /* sanity check */ 2391 } 2392 2393 vm_page_unlock_queues(); 2394 return (count); 2395} 2396 2397/* 2398 * Return true if the physical range is encompassed by the battable[idx] 2399 */ 2400static int 2401moea_bat_mapped(int idx, vm_offset_t pa, vm_size_t size) 2402{ 2403 u_int prot; 2404 u_int32_t start; 2405 u_int32_t end; 2406 u_int32_t bat_ble; 2407 2408 /* 2409 * Return immediately if not a valid mapping 2410 */ 2411 if (!(battable[idx].batu & BAT_Vs)) 2412 return (EINVAL); 2413 2414 /* 2415 * The BAT entry must be cache-inhibited, guarded, and r/w 2416 * so it can function as an i/o page 2417 */ 2418 prot = battable[idx].batl & (BAT_I|BAT_G|BAT_PP_RW); 2419 if (prot != (BAT_I|BAT_G|BAT_PP_RW)) 2420 return (EPERM); 2421 2422 /* 2423 * The address should be within the BAT range. Assume that the 2424 * start address in the BAT has the correct alignment (thus 2425 * not requiring masking) 2426 */ 2427 start = battable[idx].batl & BAT_PBS; 2428 bat_ble = (battable[idx].batu & ~(BAT_EBS)) | 0x03; 2429 end = start | (bat_ble << 15) | 0x7fff; 2430 2431 if ((pa < start) || ((pa + size) > end)) 2432 return (ERANGE); 2433 2434 return (0); 2435} 2436 2437boolean_t 2438moea_dev_direct_mapped(mmu_t mmu, vm_offset_t pa, vm_size_t size) 2439{ 2440 int i; 2441 2442 /* 2443 * This currently does not work for entries that 2444 * overlap 256M BAT segments. 2445 */ 2446 2447 for(i = 0; i < 16; i++) 2448 if (moea_bat_mapped(i, pa, size) == 0) 2449 return (0); 2450 2451 return (EFAULT); 2452} 2453 2454/* 2455 * Map a set of physical memory pages into the kernel virtual 2456 * address space. Return a pointer to where it is mapped. This 2457 * routine is intended to be used for mapping device memory, 2458 * NOT real memory. 2459 */ 2460void * 2461moea_mapdev(mmu_t mmu, vm_offset_t pa, vm_size_t size) 2462{ 2463 2464 return (moea_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT)); 2465} 2466 2467void * 2468moea_mapdev_attr(mmu_t mmu, vm_offset_t pa, vm_size_t size, vm_memattr_t ma) 2469{ 2470 vm_offset_t va, tmpva, ppa, offset; 2471 int i; 2472 2473 ppa = trunc_page(pa); 2474 offset = pa & PAGE_MASK; 2475 size = roundup(offset + size, PAGE_SIZE); 2476 2477 /* 2478 * If the physical address lies within a valid BAT table entry, 2479 * return the 1:1 mapping. This currently doesn't work 2480 * for regions that overlap 256M BAT segments. 2481 */ 2482 for (i = 0; i < 16; i++) { 2483 if (moea_bat_mapped(i, pa, size) == 0) 2484 return ((void *) pa); 2485 } 2486 2487 va = kmem_alloc_nofault(kernel_map, size); 2488 if (!va) 2489 panic("moea_mapdev: Couldn't alloc kernel virtual memory"); 2490 2491 for (tmpva = va; size > 0;) { 2492 moea_kenter_attr(mmu, tmpva, ppa, ma); 2493 tlbie(tmpva); 2494 size -= PAGE_SIZE; 2495 tmpva += PAGE_SIZE; 2496 ppa += PAGE_SIZE; 2497 } 2498 2499 return ((void *)(va + offset)); 2500} 2501 2502void 2503moea_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) 2504{ 2505 vm_offset_t base, offset; 2506 2507 /* 2508 * If this is outside kernel virtual space, then it's a 2509 * battable entry and doesn't require unmapping 2510 */ 2511 if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= virtual_end)) { 2512 base = trunc_page(va); 2513 offset = va & PAGE_MASK; 2514 size = roundup(offset + size, PAGE_SIZE); 2515 kmem_free(kernel_map, base, size); 2516 } 2517} 2518 2519static void 2520moea_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) 2521{ 2522 struct pvo_entry *pvo; 2523 vm_offset_t lim; 2524 vm_paddr_t pa; 2525 vm_size_t len; 2526 2527 PMAP_LOCK(pm); 2528 while (sz > 0) { 2529 lim = round_page(va); 2530 len = MIN(lim - va, sz); 2531 pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL); 2532 if (pvo != NULL) { 2533 pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) | 2534 (va & ADDR_POFF); 2535 moea_syncicache(pa, len); 2536 } 2537 va += len; 2538 sz -= len; 2539 } 2540 PMAP_UNLOCK(pm); 2541}
|