mmu_oea64.c revision 233011
1/*- 2 * Copyright (c) 2001 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The NetBSD Foundation 6 * by Matt Thomas <matt@3am-software.com> of Allegro Networks, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the NetBSD 19 * Foundation, Inc. and its contributors. 20 * 4. Neither the name of The NetBSD Foundation nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36/*- 37 * Copyright (C) 1995, 1996 Wolfgang Solfrank. 38 * Copyright (C) 1995, 1996 TooLs GmbH. 39 * All rights reserved. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. All advertising materials mentioning features or use of this software 50 * must display the following acknowledgement: 51 * This product includes software developed by TooLs GmbH. 52 * 4. The name of TooLs GmbH may not be used to endorse or promote products 53 * derived from this software without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR 56 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 57 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 58 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 59 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 60 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 61 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 62 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 63 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 64 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 * 66 * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $ 67 */ 68/*- 69 * Copyright (C) 2001 Benno Rice. 70 * All rights reserved. 71 * 72 * Redistribution and use in source and binary forms, with or without 73 * modification, are permitted provided that the following conditions 74 * are met: 75 * 1. Redistributions of source code must retain the above copyright 76 * notice, this list of conditions and the following disclaimer. 77 * 2. Redistributions in binary form must reproduce the above copyright 78 * notice, this list of conditions and the following disclaimer in the 79 * documentation and/or other materials provided with the distribution. 80 * 81 * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR 82 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 83 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 84 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 85 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 86 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 87 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 88 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 89 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 90 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 91 */ 92 93#include <sys/cdefs.h> 94__FBSDID("$FreeBSD: head/sys/powerpc/aim/mmu_oea64.c 233011 2012-03-15 19:36:52Z nwhitehorn $"); 95 96/* 97 * Manages physical address maps. 98 * 99 * In addition to hardware address maps, this module is called upon to 100 * provide software-use-only maps which may or may not be stored in the 101 * same form as hardware maps. These pseudo-maps are used to store 102 * intermediate results from copy operations to and from address spaces. 103 * 104 * Since the information managed by this module is also stored by the 105 * logical address mapping module, this module may throw away valid virtual 106 * to physical mappings at almost any time. However, invalidations of 107 * mappings must be done as requested. 108 * 109 * In order to cope with hardware architectures which make virtual to 110 * physical map invalidates expensive, this module may delay invalidate 111 * reduced protection operations until such time as they are actually 112 * necessary. This module is given full information as to which processors 113 * are currently using which maps, and to when physical maps must be made 114 * correct. 115 */ 116 117#include "opt_compat.h" 118#include "opt_kstack_pages.h" 119 120#include <sys/param.h> 121#include <sys/kernel.h> 122#include <sys/queue.h> 123#include <sys/cpuset.h> 124#include <sys/ktr.h> 125#include <sys/lock.h> 126#include <sys/msgbuf.h> 127#include <sys/mutex.h> 128#include <sys/proc.h> 129#include <sys/sched.h> 130#include <sys/sysctl.h> 131#include <sys/systm.h> 132#include <sys/vmmeter.h> 133 134#include <sys/kdb.h> 135 136#include <dev/ofw/openfirm.h> 137 138#include <vm/vm.h> 139#include <vm/vm_param.h> 140#include <vm/vm_kern.h> 141#include <vm/vm_page.h> 142#include <vm/vm_map.h> 143#include <vm/vm_object.h> 144#include <vm/vm_extern.h> 145#include <vm/vm_pageout.h> 146#include <vm/vm_pager.h> 147#include <vm/uma.h> 148 149#include <machine/_inttypes.h> 150#include <machine/cpu.h> 151#include <machine/platform.h> 152#include <machine/frame.h> 153#include <machine/md_var.h> 154#include <machine/psl.h> 155#include <machine/bat.h> 156#include <machine/hid.h> 157#include <machine/pte.h> 158#include <machine/sr.h> 159#include <machine/trap.h> 160#include <machine/mmuvar.h> 161 162#include "mmu_oea64.h" 163#include "mmu_if.h" 164#include "moea64_if.h" 165 166void moea64_release_vsid(uint64_t vsid); 167uintptr_t moea64_get_unique_vsid(void); 168 169#define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) 170#define ENABLE_TRANS(msr) mtmsr(msr) 171 172#define VSID_MAKE(sr, hash) ((sr) | (((hash) & 0xfffff) << 4)) 173#define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff) 174#define VSID_HASH_MASK 0x0000007fffffffffULL 175 176#define LOCK_TABLE() mtx_lock(&moea64_table_mutex) 177#define UNLOCK_TABLE() mtx_unlock(&moea64_table_mutex); 178#define ASSERT_TABLE_LOCK() mtx_assert(&moea64_table_mutex, MA_OWNED) 179 180struct ofw_map { 181 cell_t om_va; 182 cell_t om_len; 183 cell_t om_pa_hi; 184 cell_t om_pa_lo; 185 cell_t om_mode; 186}; 187 188/* 189 * Map of physical memory regions. 190 */ 191static struct mem_region *regions; 192static struct mem_region *pregions; 193static u_int phys_avail_count; 194static int regions_sz, pregions_sz; 195 196extern void bs_remap_earlyboot(void); 197 198/* 199 * Lock for the pteg and pvo tables. 200 */ 201struct mtx moea64_table_mutex; 202struct mtx moea64_slb_mutex; 203 204/* 205 * PTEG data. 206 */ 207u_int moea64_pteg_count; 208u_int moea64_pteg_mask; 209 210/* 211 * PVO data. 212 */ 213struct pvo_head *moea64_pvo_table; /* pvo entries by pteg index */ 214struct pvo_head moea64_pvo_kunmanaged = /* list of unmanaged pages */ 215 LIST_HEAD_INITIALIZER(moea64_pvo_kunmanaged); 216 217uma_zone_t moea64_upvo_zone; /* zone for pvo entries for unmanaged pages */ 218uma_zone_t moea64_mpvo_zone; /* zone for pvo entries for managed pages */ 219 220#define BPVO_POOL_SIZE 327680 221static struct pvo_entry *moea64_bpvo_pool; 222static int moea64_bpvo_pool_index = 0; 223 224#define VSID_NBPW (sizeof(u_int32_t) * 8) 225#ifdef __powerpc64__ 226#define NVSIDS (NPMAPS * 16) 227#define VSID_HASHMASK 0xffffffffUL 228#else 229#define NVSIDS NPMAPS 230#define VSID_HASHMASK 0xfffffUL 231#endif 232static u_int moea64_vsid_bitmap[NVSIDS / VSID_NBPW]; 233 234static boolean_t moea64_initialized = FALSE; 235 236/* 237 * Statistics. 238 */ 239u_int moea64_pte_valid = 0; 240u_int moea64_pte_overflow = 0; 241u_int moea64_pvo_entries = 0; 242u_int moea64_pvo_enter_calls = 0; 243u_int moea64_pvo_remove_calls = 0; 244SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD, 245 &moea64_pte_valid, 0, ""); 246SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD, 247 &moea64_pte_overflow, 0, ""); 248SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD, 249 &moea64_pvo_entries, 0, ""); 250SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD, 251 &moea64_pvo_enter_calls, 0, ""); 252SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD, 253 &moea64_pvo_remove_calls, 0, ""); 254 255vm_offset_t moea64_scratchpage_va[2]; 256struct pvo_entry *moea64_scratchpage_pvo[2]; 257uintptr_t moea64_scratchpage_pte[2]; 258struct mtx moea64_scratchpage_mtx; 259 260uint64_t moea64_large_page_mask = 0; 261int moea64_large_page_size = 0; 262int moea64_large_page_shift = 0; 263 264/* 265 * PVO calls. 266 */ 267static int moea64_pvo_enter(mmu_t, pmap_t, uma_zone_t, struct pvo_head *, 268 vm_offset_t, vm_offset_t, uint64_t, int); 269static void moea64_pvo_remove(mmu_t, struct pvo_entry *); 270static struct pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t); 271 272/* 273 * Utility routines. 274 */ 275static void moea64_enter_locked(mmu_t, pmap_t, vm_offset_t, 276 vm_page_t, vm_prot_t, boolean_t); 277static boolean_t moea64_query_bit(mmu_t, vm_page_t, u_int64_t); 278static u_int moea64_clear_bit(mmu_t, vm_page_t, u_int64_t); 279static void moea64_kremove(mmu_t, vm_offset_t); 280static void moea64_syncicache(mmu_t, pmap_t pmap, vm_offset_t va, 281 vm_offset_t pa, vm_size_t sz); 282 283/* 284 * Kernel MMU interface 285 */ 286void moea64_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t); 287void moea64_clear_modify(mmu_t, vm_page_t); 288void moea64_clear_reference(mmu_t, vm_page_t); 289void moea64_copy_page(mmu_t, vm_page_t, vm_page_t); 290void moea64_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t); 291void moea64_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t, 292 vm_prot_t); 293void moea64_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t); 294vm_paddr_t moea64_extract(mmu_t, pmap_t, vm_offset_t); 295vm_page_t moea64_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); 296void moea64_init(mmu_t); 297boolean_t moea64_is_modified(mmu_t, vm_page_t); 298boolean_t moea64_is_prefaultable(mmu_t, pmap_t, vm_offset_t); 299boolean_t moea64_is_referenced(mmu_t, vm_page_t); 300boolean_t moea64_ts_referenced(mmu_t, vm_page_t); 301vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, int); 302boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t); 303int moea64_page_wired_mappings(mmu_t, vm_page_t); 304void moea64_pinit(mmu_t, pmap_t); 305void moea64_pinit0(mmu_t, pmap_t); 306void moea64_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); 307void moea64_qenter(mmu_t, vm_offset_t, vm_page_t *, int); 308void moea64_qremove(mmu_t, vm_offset_t, int); 309void moea64_release(mmu_t, pmap_t); 310void moea64_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 311void moea64_remove_all(mmu_t, vm_page_t); 312void moea64_remove_write(mmu_t, vm_page_t); 313void moea64_zero_page(mmu_t, vm_page_t); 314void moea64_zero_page_area(mmu_t, vm_page_t, int, int); 315void moea64_zero_page_idle(mmu_t, vm_page_t); 316void moea64_activate(mmu_t, struct thread *); 317void moea64_deactivate(mmu_t, struct thread *); 318void *moea64_mapdev(mmu_t, vm_offset_t, vm_size_t); 319void *moea64_mapdev_attr(mmu_t, vm_offset_t, vm_size_t, vm_memattr_t); 320void moea64_unmapdev(mmu_t, vm_offset_t, vm_size_t); 321vm_offset_t moea64_kextract(mmu_t, vm_offset_t); 322void moea64_page_set_memattr(mmu_t, vm_page_t m, vm_memattr_t ma); 323void moea64_kenter_attr(mmu_t, vm_offset_t, vm_offset_t, vm_memattr_t ma); 324void moea64_kenter(mmu_t, vm_offset_t, vm_offset_t); 325boolean_t moea64_dev_direct_mapped(mmu_t, vm_offset_t, vm_size_t); 326static void moea64_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t); 327 328static mmu_method_t moea64_methods[] = { 329 MMUMETHOD(mmu_change_wiring, moea64_change_wiring), 330 MMUMETHOD(mmu_clear_modify, moea64_clear_modify), 331 MMUMETHOD(mmu_clear_reference, moea64_clear_reference), 332 MMUMETHOD(mmu_copy_page, moea64_copy_page), 333 MMUMETHOD(mmu_enter, moea64_enter), 334 MMUMETHOD(mmu_enter_object, moea64_enter_object), 335 MMUMETHOD(mmu_enter_quick, moea64_enter_quick), 336 MMUMETHOD(mmu_extract, moea64_extract), 337 MMUMETHOD(mmu_extract_and_hold, moea64_extract_and_hold), 338 MMUMETHOD(mmu_init, moea64_init), 339 MMUMETHOD(mmu_is_modified, moea64_is_modified), 340 MMUMETHOD(mmu_is_prefaultable, moea64_is_prefaultable), 341 MMUMETHOD(mmu_is_referenced, moea64_is_referenced), 342 MMUMETHOD(mmu_ts_referenced, moea64_ts_referenced), 343 MMUMETHOD(mmu_map, moea64_map), 344 MMUMETHOD(mmu_page_exists_quick,moea64_page_exists_quick), 345 MMUMETHOD(mmu_page_wired_mappings,moea64_page_wired_mappings), 346 MMUMETHOD(mmu_pinit, moea64_pinit), 347 MMUMETHOD(mmu_pinit0, moea64_pinit0), 348 MMUMETHOD(mmu_protect, moea64_protect), 349 MMUMETHOD(mmu_qenter, moea64_qenter), 350 MMUMETHOD(mmu_qremove, moea64_qremove), 351 MMUMETHOD(mmu_release, moea64_release), 352 MMUMETHOD(mmu_remove, moea64_remove), 353 MMUMETHOD(mmu_remove_all, moea64_remove_all), 354 MMUMETHOD(mmu_remove_write, moea64_remove_write), 355 MMUMETHOD(mmu_sync_icache, moea64_sync_icache), 356 MMUMETHOD(mmu_zero_page, moea64_zero_page), 357 MMUMETHOD(mmu_zero_page_area, moea64_zero_page_area), 358 MMUMETHOD(mmu_zero_page_idle, moea64_zero_page_idle), 359 MMUMETHOD(mmu_activate, moea64_activate), 360 MMUMETHOD(mmu_deactivate, moea64_deactivate), 361 MMUMETHOD(mmu_page_set_memattr, moea64_page_set_memattr), 362 363 /* Internal interfaces */ 364 MMUMETHOD(mmu_mapdev, moea64_mapdev), 365 MMUMETHOD(mmu_mapdev_attr, moea64_mapdev_attr), 366 MMUMETHOD(mmu_unmapdev, moea64_unmapdev), 367 MMUMETHOD(mmu_kextract, moea64_kextract), 368 MMUMETHOD(mmu_kenter, moea64_kenter), 369 MMUMETHOD(mmu_kenter_attr, moea64_kenter_attr), 370 MMUMETHOD(mmu_dev_direct_mapped,moea64_dev_direct_mapped), 371 372 { 0, 0 } 373}; 374 375MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods, 0); 376 377static __inline u_int 378va_to_pteg(uint64_t vsid, vm_offset_t addr, int large) 379{ 380 uint64_t hash; 381 int shift; 382 383 shift = large ? moea64_large_page_shift : ADDR_PIDX_SHFT; 384 hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)addr & ADDR_PIDX) >> 385 shift); 386 return (hash & moea64_pteg_mask); 387} 388 389static __inline struct pvo_head * 390vm_page_to_pvoh(vm_page_t m) 391{ 392 393 return (&m->md.mdpg_pvoh); 394} 395 396static __inline void 397moea64_attr_clear(vm_page_t m, u_int64_t ptebit) 398{ 399 400 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 401 m->md.mdpg_attrs &= ~ptebit; 402} 403 404static __inline u_int64_t 405moea64_attr_fetch(vm_page_t m) 406{ 407 408 return (m->md.mdpg_attrs); 409} 410 411static __inline void 412moea64_attr_save(vm_page_t m, u_int64_t ptebit) 413{ 414 415 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 416 m->md.mdpg_attrs |= ptebit; 417} 418 419static __inline void 420moea64_pte_create(struct lpte *pt, uint64_t vsid, vm_offset_t va, 421 uint64_t pte_lo, int flags) 422{ 423 424 ASSERT_TABLE_LOCK(); 425 426 /* 427 * Construct a PTE. Default to IMB initially. Valid bit only gets 428 * set when the real pte is set in memory. 429 * 430 * Note: Don't set the valid bit for correct operation of tlb update. 431 */ 432 pt->pte_hi = (vsid << LPTE_VSID_SHIFT) | 433 (((uint64_t)(va & ADDR_PIDX) >> ADDR_API_SHFT64) & LPTE_API); 434 435 if (flags & PVO_LARGE) 436 pt->pte_hi |= LPTE_BIG; 437 438 pt->pte_lo = pte_lo; 439} 440 441static __inline uint64_t 442moea64_calc_wimg(vm_offset_t pa, vm_memattr_t ma) 443{ 444 uint64_t pte_lo; 445 int i; 446 447 if (ma != VM_MEMATTR_DEFAULT) { 448 switch (ma) { 449 case VM_MEMATTR_UNCACHEABLE: 450 return (LPTE_I | LPTE_G); 451 case VM_MEMATTR_WRITE_COMBINING: 452 case VM_MEMATTR_WRITE_BACK: 453 case VM_MEMATTR_PREFETCHABLE: 454 return (LPTE_I); 455 case VM_MEMATTR_WRITE_THROUGH: 456 return (LPTE_W | LPTE_M); 457 } 458 } 459 460 /* 461 * Assume the page is cache inhibited and access is guarded unless 462 * it's in our available memory array. 463 */ 464 pte_lo = LPTE_I | LPTE_G; 465 for (i = 0; i < pregions_sz; i++) { 466 if ((pa >= pregions[i].mr_start) && 467 (pa < (pregions[i].mr_start + pregions[i].mr_size))) { 468 pte_lo &= ~(LPTE_I | LPTE_G); 469 pte_lo |= LPTE_M; 470 break; 471 } 472 } 473 474 return pte_lo; 475} 476 477/* 478 * Quick sort callout for comparing memory regions. 479 */ 480static int om_cmp(const void *a, const void *b); 481 482static int 483om_cmp(const void *a, const void *b) 484{ 485 const struct ofw_map *mapa; 486 const struct ofw_map *mapb; 487 488 mapa = a; 489 mapb = b; 490 if (mapa->om_pa_hi < mapb->om_pa_hi) 491 return (-1); 492 else if (mapa->om_pa_hi > mapb->om_pa_hi) 493 return (1); 494 else if (mapa->om_pa_lo < mapb->om_pa_lo) 495 return (-1); 496 else if (mapa->om_pa_lo > mapb->om_pa_lo) 497 return (1); 498 else 499 return (0); 500} 501 502static void 503moea64_add_ofw_mappings(mmu_t mmup, phandle_t mmu, size_t sz) 504{ 505 struct ofw_map translations[sz/sizeof(struct ofw_map)]; 506 register_t msr; 507 vm_offset_t off; 508 vm_paddr_t pa_base; 509 int i; 510 511 bzero(translations, sz); 512 if (OF_getprop(mmu, "translations", translations, sz) == -1) 513 panic("moea64_bootstrap: can't get ofw translations"); 514 515 CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations"); 516 sz /= sizeof(*translations); 517 qsort(translations, sz, sizeof (*translations), om_cmp); 518 519 for (i = 0; i < sz; i++) { 520 CTR3(KTR_PMAP, "translation: pa=%#x va=%#x len=%#x", 521 (uint32_t)(translations[i].om_pa_lo), translations[i].om_va, 522 translations[i].om_len); 523 524 if (translations[i].om_pa_lo % PAGE_SIZE) 525 panic("OFW translation not page-aligned!"); 526 527 pa_base = translations[i].om_pa_lo; 528 529 #ifdef __powerpc64__ 530 pa_base += (vm_offset_t)translations[i].om_pa_hi << 32; 531 #else 532 if (translations[i].om_pa_hi) 533 panic("OFW translations above 32-bit boundary!"); 534 #endif 535 536 /* Now enter the pages for this mapping */ 537 538 DISABLE_TRANS(msr); 539 for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) { 540 if (moea64_pvo_find_va(kernel_pmap, 541 translations[i].om_va + off) != NULL) 542 continue; 543 544 moea64_kenter(mmup, translations[i].om_va + off, 545 pa_base + off); 546 } 547 ENABLE_TRANS(msr); 548 } 549} 550 551#ifdef __powerpc64__ 552static void 553moea64_probe_large_page(void) 554{ 555 uint16_t pvr = mfpvr() >> 16; 556 557 switch (pvr) { 558 case IBM970: 559 case IBM970FX: 560 case IBM970MP: 561 powerpc_sync(); isync(); 562 mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG); 563 powerpc_sync(); isync(); 564 565 /* FALLTHROUGH */ 566 case IBMCELLBE: 567 moea64_large_page_size = 0x1000000; /* 16 MB */ 568 moea64_large_page_shift = 24; 569 break; 570 default: 571 moea64_large_page_size = 0; 572 } 573 574 moea64_large_page_mask = moea64_large_page_size - 1; 575} 576 577static void 578moea64_bootstrap_slb_prefault(vm_offset_t va, int large) 579{ 580 struct slb *cache; 581 struct slb entry; 582 uint64_t esid, slbe; 583 uint64_t i; 584 585 cache = PCPU_GET(slb); 586 esid = va >> ADDR_SR_SHFT; 587 slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID; 588 589 for (i = 0; i < 64; i++) { 590 if (cache[i].slbe == (slbe | i)) 591 return; 592 } 593 594 entry.slbe = slbe; 595 entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT; 596 if (large) 597 entry.slbv |= SLBV_L; 598 599 slb_insert_kernel(entry.slbe, entry.slbv); 600} 601#endif 602 603static void 604moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart, 605 vm_offset_t kernelend) 606{ 607 register_t msr; 608 vm_paddr_t pa; 609 vm_offset_t size, off; 610 uint64_t pte_lo; 611 int i; 612 613 if (moea64_large_page_size == 0) 614 hw_direct_map = 0; 615 616 DISABLE_TRANS(msr); 617 if (hw_direct_map) { 618 PMAP_LOCK(kernel_pmap); 619 for (i = 0; i < pregions_sz; i++) { 620 for (pa = pregions[i].mr_start; pa < pregions[i].mr_start + 621 pregions[i].mr_size; pa += moea64_large_page_size) { 622 pte_lo = LPTE_M; 623 624 /* 625 * Set memory access as guarded if prefetch within 626 * the page could exit the available physmem area. 627 */ 628 if (pa & moea64_large_page_mask) { 629 pa &= moea64_large_page_mask; 630 pte_lo |= LPTE_G; 631 } 632 if (pa + moea64_large_page_size > 633 pregions[i].mr_start + pregions[i].mr_size) 634 pte_lo |= LPTE_G; 635 636 moea64_pvo_enter(mmup, kernel_pmap, moea64_upvo_zone, 637 &moea64_pvo_kunmanaged, pa, pa, 638 pte_lo, PVO_WIRED | PVO_LARGE); 639 } 640 } 641 PMAP_UNLOCK(kernel_pmap); 642 } else { 643 size = sizeof(struct pvo_head) * moea64_pteg_count; 644 off = (vm_offset_t)(moea64_pvo_table); 645 for (pa = off; pa < off + size; pa += PAGE_SIZE) 646 moea64_kenter(mmup, pa, pa); 647 size = BPVO_POOL_SIZE*sizeof(struct pvo_entry); 648 off = (vm_offset_t)(moea64_bpvo_pool); 649 for (pa = off; pa < off + size; pa += PAGE_SIZE) 650 moea64_kenter(mmup, pa, pa); 651 652 /* 653 * Map certain important things, like ourselves. 654 * 655 * NOTE: We do not map the exception vector space. That code is 656 * used only in real mode, and leaving it unmapped allows us to 657 * catch NULL pointer deferences, instead of making NULL a valid 658 * address. 659 */ 660 661 for (pa = kernelstart & ~PAGE_MASK; pa < kernelend; 662 pa += PAGE_SIZE) 663 moea64_kenter(mmup, pa, pa); 664 } 665 ENABLE_TRANS(msr); 666} 667 668void 669moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 670{ 671 int i, j; 672 vm_size_t physsz, hwphyssz; 673 674#ifndef __powerpc64__ 675 /* We don't have a direct map since there is no BAT */ 676 hw_direct_map = 0; 677 678 /* Make sure battable is zero, since we have no BAT */ 679 for (i = 0; i < 16; i++) { 680 battable[i].batu = 0; 681 battable[i].batl = 0; 682 } 683#else 684 moea64_probe_large_page(); 685 686 /* Use a direct map if we have large page support */ 687 if (moea64_large_page_size > 0) 688 hw_direct_map = 1; 689 else 690 hw_direct_map = 0; 691#endif 692 693 /* Get physical memory regions from firmware */ 694 mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); 695 CTR0(KTR_PMAP, "moea64_bootstrap: physical memory"); 696 697 if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz) 698 panic("moea64_bootstrap: phys_avail too small"); 699 700 phys_avail_count = 0; 701 physsz = 0; 702 hwphyssz = 0; 703 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); 704 for (i = 0, j = 0; i < regions_sz; i++, j += 2) { 705 CTR3(KTR_PMAP, "region: %#x - %#x (%#x)", regions[i].mr_start, 706 regions[i].mr_start + regions[i].mr_size, 707 regions[i].mr_size); 708 if (hwphyssz != 0 && 709 (physsz + regions[i].mr_size) >= hwphyssz) { 710 if (physsz < hwphyssz) { 711 phys_avail[j] = regions[i].mr_start; 712 phys_avail[j + 1] = regions[i].mr_start + 713 hwphyssz - physsz; 714 physsz = hwphyssz; 715 phys_avail_count++; 716 } 717 break; 718 } 719 phys_avail[j] = regions[i].mr_start; 720 phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size; 721 phys_avail_count++; 722 physsz += regions[i].mr_size; 723 } 724 725 /* Check for overlap with the kernel and exception vectors */ 726 for (j = 0; j < 2*phys_avail_count; j+=2) { 727 if (phys_avail[j] < EXC_LAST) 728 phys_avail[j] += EXC_LAST; 729 730 if (kernelstart >= phys_avail[j] && 731 kernelstart < phys_avail[j+1]) { 732 if (kernelend < phys_avail[j+1]) { 733 phys_avail[2*phys_avail_count] = 734 (kernelend & ~PAGE_MASK) + PAGE_SIZE; 735 phys_avail[2*phys_avail_count + 1] = 736 phys_avail[j+1]; 737 phys_avail_count++; 738 } 739 740 phys_avail[j+1] = kernelstart & ~PAGE_MASK; 741 } 742 743 if (kernelend >= phys_avail[j] && 744 kernelend < phys_avail[j+1]) { 745 if (kernelstart > phys_avail[j]) { 746 phys_avail[2*phys_avail_count] = phys_avail[j]; 747 phys_avail[2*phys_avail_count + 1] = 748 kernelstart & ~PAGE_MASK; 749 phys_avail_count++; 750 } 751 752 phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE; 753 } 754 } 755 756 physmem = btoc(physsz); 757 758#ifdef PTEGCOUNT 759 moea64_pteg_count = PTEGCOUNT; 760#else 761 moea64_pteg_count = 0x1000; 762 763 while (moea64_pteg_count < physmem) 764 moea64_pteg_count <<= 1; 765 766 moea64_pteg_count >>= 1; 767#endif /* PTEGCOUNT */ 768} 769 770void 771moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 772{ 773 vm_size_t size; 774 register_t msr; 775 int i; 776 777 /* 778 * Set PTEG mask 779 */ 780 moea64_pteg_mask = moea64_pteg_count - 1; 781 782 /* 783 * Allocate pv/overflow lists. 784 */ 785 size = sizeof(struct pvo_head) * moea64_pteg_count; 786 787 moea64_pvo_table = (struct pvo_head *)moea64_bootstrap_alloc(size, 788 PAGE_SIZE); 789 CTR1(KTR_PMAP, "moea64_bootstrap: PVO table at %p", moea64_pvo_table); 790 791 DISABLE_TRANS(msr); 792 for (i = 0; i < moea64_pteg_count; i++) 793 LIST_INIT(&moea64_pvo_table[i]); 794 ENABLE_TRANS(msr); 795 796 /* 797 * Initialize the lock that synchronizes access to the pteg and pvo 798 * tables. 799 */ 800 mtx_init(&moea64_table_mutex, "pmap table", NULL, MTX_DEF | 801 MTX_RECURSE); 802 mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF); 803 804 /* 805 * Initialise the unmanaged pvo pool. 806 */ 807 moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc( 808 BPVO_POOL_SIZE*sizeof(struct pvo_entry), 0); 809 moea64_bpvo_pool_index = 0; 810 811 /* 812 * Make sure kernel vsid is allocated as well as VSID 0. 813 */ 814 #ifndef __powerpc64__ 815 moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW] 816 |= 1 << (KERNEL_VSIDBITS % VSID_NBPW); 817 moea64_vsid_bitmap[0] |= 1; 818 #endif 819 820 /* 821 * Initialize the kernel pmap (which is statically allocated). 822 */ 823 #ifdef __powerpc64__ 824 for (i = 0; i < 64; i++) { 825 pcpup->pc_slb[i].slbv = 0; 826 pcpup->pc_slb[i].slbe = 0; 827 } 828 #else 829 for (i = 0; i < 16; i++) 830 kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i; 831 #endif 832 833 kernel_pmap->pmap_phys = kernel_pmap; 834 CPU_FILL(&kernel_pmap->pm_active); 835 LIST_INIT(&kernel_pmap->pmap_pvo); 836 837 PMAP_LOCK_INIT(kernel_pmap); 838 839 /* 840 * Now map in all the other buffers we allocated earlier 841 */ 842 843 moea64_setup_direct_map(mmup, kernelstart, kernelend); 844} 845 846void 847moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 848{ 849 ihandle_t mmui; 850 phandle_t chosen; 851 phandle_t mmu; 852 size_t sz; 853 int i; 854 vm_offset_t pa, va; 855 void *dpcpu; 856 857 /* 858 * Set up the Open Firmware pmap and add its mappings if not in real 859 * mode. 860 */ 861 862 chosen = OF_finddevice("/chosen"); 863 if (chosen != -1 && OF_getprop(chosen, "mmu", &mmui, 4) != -1) { 864 mmu = OF_instance_to_package(mmui); 865 if (mmu == -1 || (sz = OF_getproplen(mmu, "translations")) == -1) 866 sz = 0; 867 if (sz > 6144 /* tmpstksz - 2 KB headroom */) 868 panic("moea64_bootstrap: too many ofw translations"); 869 870 if (sz > 0) 871 moea64_add_ofw_mappings(mmup, mmu, sz); 872 } 873 874 /* 875 * Calculate the last available physical address. 876 */ 877 for (i = 0; phys_avail[i + 2] != 0; i += 2) 878 ; 879 Maxmem = powerpc_btop(phys_avail[i + 1]); 880 881 /* 882 * Initialize MMU and remap early physical mappings 883 */ 884 MMU_CPU_BOOTSTRAP(mmup,0); 885 mtmsr(mfmsr() | PSL_DR | PSL_IR); 886 pmap_bootstrapped++; 887 bs_remap_earlyboot(); 888 889 /* 890 * Set the start and end of kva. 891 */ 892 virtual_avail = VM_MIN_KERNEL_ADDRESS; 893 virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS; 894 895 /* 896 * Map the entire KVA range into the SLB. We must not fault there. 897 */ 898 #ifdef __powerpc64__ 899 for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH) 900 moea64_bootstrap_slb_prefault(va, 0); 901 #endif 902 903 /* 904 * Figure out how far we can extend virtual_end into segment 16 905 * without running into existing mappings. Segment 16 is guaranteed 906 * to contain neither RAM nor devices (at least on Apple hardware), 907 * but will generally contain some OFW mappings we should not 908 * step on. 909 */ 910 911 #ifndef __powerpc64__ /* KVA is in high memory on PPC64 */ 912 PMAP_LOCK(kernel_pmap); 913 while (virtual_end < VM_MAX_KERNEL_ADDRESS && 914 moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL) 915 virtual_end += PAGE_SIZE; 916 PMAP_UNLOCK(kernel_pmap); 917 #endif 918 919 /* 920 * Allocate a kernel stack with a guard page for thread0 and map it 921 * into the kernel page map. 922 */ 923 pa = moea64_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE); 924 va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; 925 virtual_avail = va + KSTACK_PAGES * PAGE_SIZE; 926 CTR2(KTR_PMAP, "moea64_bootstrap: kstack0 at %#x (%#x)", pa, va); 927 thread0.td_kstack = va; 928 thread0.td_kstack_pages = KSTACK_PAGES; 929 for (i = 0; i < KSTACK_PAGES; i++) { 930 moea64_kenter(mmup, va, pa); 931 pa += PAGE_SIZE; 932 va += PAGE_SIZE; 933 } 934 935 /* 936 * Allocate virtual address space for the message buffer. 937 */ 938 pa = msgbuf_phys = moea64_bootstrap_alloc(msgbufsize, PAGE_SIZE); 939 msgbufp = (struct msgbuf *)virtual_avail; 940 va = virtual_avail; 941 virtual_avail += round_page(msgbufsize); 942 while (va < virtual_avail) { 943 moea64_kenter(mmup, va, pa); 944 pa += PAGE_SIZE; 945 va += PAGE_SIZE; 946 } 947 948 /* 949 * Allocate virtual address space for the dynamic percpu area. 950 */ 951 pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); 952 dpcpu = (void *)virtual_avail; 953 va = virtual_avail; 954 virtual_avail += DPCPU_SIZE; 955 while (va < virtual_avail) { 956 moea64_kenter(mmup, va, pa); 957 pa += PAGE_SIZE; 958 va += PAGE_SIZE; 959 } 960 dpcpu_init(dpcpu, 0); 961 962 /* 963 * Allocate some things for page zeroing. We put this directly 964 * in the page table, marked with LPTE_LOCKED, to avoid any 965 * of the PVO book-keeping or other parts of the VM system 966 * from even knowing that this hack exists. 967 */ 968 969 if (!hw_direct_map) { 970 mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL, 971 MTX_DEF); 972 for (i = 0; i < 2; i++) { 973 moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE; 974 virtual_end -= PAGE_SIZE; 975 976 moea64_kenter(mmup, moea64_scratchpage_va[i], 0); 977 978 moea64_scratchpage_pvo[i] = moea64_pvo_find_va( 979 kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]); 980 LOCK_TABLE(); 981 moea64_scratchpage_pte[i] = MOEA64_PVO_TO_PTE( 982 mmup, moea64_scratchpage_pvo[i]); 983 moea64_scratchpage_pvo[i]->pvo_pte.lpte.pte_hi 984 |= LPTE_LOCKED; 985 MOEA64_PTE_CHANGE(mmup, moea64_scratchpage_pte[i], 986 &moea64_scratchpage_pvo[i]->pvo_pte.lpte, 987 moea64_scratchpage_pvo[i]->pvo_vpn); 988 UNLOCK_TABLE(); 989 } 990 } 991} 992 993/* 994 * Activate a user pmap. The pmap must be activated before its address 995 * space can be accessed in any way. 996 */ 997void 998moea64_activate(mmu_t mmu, struct thread *td) 999{ 1000 pmap_t pm; 1001 1002 pm = &td->td_proc->p_vmspace->vm_pmap; 1003 CPU_SET(PCPU_GET(cpuid), &pm->pm_active); 1004 1005 #ifdef __powerpc64__ 1006 PCPU_SET(userslb, pm->pm_slb); 1007 #else 1008 PCPU_SET(curpmap, pm->pmap_phys); 1009 #endif 1010} 1011 1012void 1013moea64_deactivate(mmu_t mmu, struct thread *td) 1014{ 1015 pmap_t pm; 1016 1017 pm = &td->td_proc->p_vmspace->vm_pmap; 1018 CPU_CLR(PCPU_GET(cpuid), &pm->pm_active); 1019 #ifdef __powerpc64__ 1020 PCPU_SET(userslb, NULL); 1021 #else 1022 PCPU_SET(curpmap, NULL); 1023 #endif 1024} 1025 1026void 1027moea64_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired) 1028{ 1029 struct pvo_entry *pvo; 1030 uintptr_t pt; 1031 uint64_t vsid; 1032 int i, ptegidx; 1033 1034 PMAP_LOCK(pm); 1035 pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF); 1036 1037 if (pvo != NULL) { 1038 LOCK_TABLE(); 1039 pt = MOEA64_PVO_TO_PTE(mmu, pvo); 1040 1041 if (wired) { 1042 if ((pvo->pvo_vaddr & PVO_WIRED) == 0) 1043 pm->pm_stats.wired_count++; 1044 pvo->pvo_vaddr |= PVO_WIRED; 1045 pvo->pvo_pte.lpte.pte_hi |= LPTE_WIRED; 1046 } else { 1047 if ((pvo->pvo_vaddr & PVO_WIRED) != 0) 1048 pm->pm_stats.wired_count--; 1049 pvo->pvo_vaddr &= ~PVO_WIRED; 1050 pvo->pvo_pte.lpte.pte_hi &= ~LPTE_WIRED; 1051 } 1052 1053 if (pt != -1) { 1054 /* Update wiring flag in page table. */ 1055 MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, 1056 pvo->pvo_vpn); 1057 } else if (wired) { 1058 /* 1059 * If we are wiring the page, and it wasn't in the 1060 * page table before, add it. 1061 */ 1062 vsid = PVO_VSID(pvo); 1063 ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo), 1064 pvo->pvo_vaddr & PVO_LARGE); 1065 1066 i = MOEA64_PTE_INSERT(mmu, ptegidx, &pvo->pvo_pte.lpte); 1067 1068 if (i >= 0) { 1069 PVO_PTEGIDX_CLR(pvo); 1070 PVO_PTEGIDX_SET(pvo, i); 1071 } 1072 } 1073 1074 UNLOCK_TABLE(); 1075 } 1076 PMAP_UNLOCK(pm); 1077} 1078 1079/* 1080 * This goes through and sets the physical address of our 1081 * special scratch PTE to the PA we want to zero or copy. Because 1082 * of locking issues (this can get called in pvo_enter() by 1083 * the UMA allocator), we can't use most other utility functions here 1084 */ 1085 1086static __inline 1087void moea64_set_scratchpage_pa(mmu_t mmup, int which, vm_offset_t pa) { 1088 1089 KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!")); 1090 mtx_assert(&moea64_scratchpage_mtx, MA_OWNED); 1091 1092 moea64_scratchpage_pvo[which]->pvo_pte.lpte.pte_lo &= 1093 ~(LPTE_WIMG | LPTE_RPGN); 1094 moea64_scratchpage_pvo[which]->pvo_pte.lpte.pte_lo |= 1095 moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa; 1096 MOEA64_PTE_CHANGE(mmup, moea64_scratchpage_pte[which], 1097 &moea64_scratchpage_pvo[which]->pvo_pte.lpte, 1098 moea64_scratchpage_pvo[which]->pvo_vpn); 1099 isync(); 1100} 1101 1102void 1103moea64_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst) 1104{ 1105 vm_offset_t dst; 1106 vm_offset_t src; 1107 1108 dst = VM_PAGE_TO_PHYS(mdst); 1109 src = VM_PAGE_TO_PHYS(msrc); 1110 1111 if (hw_direct_map) { 1112 kcopy((void *)src, (void *)dst, PAGE_SIZE); 1113 } else { 1114 mtx_lock(&moea64_scratchpage_mtx); 1115 1116 moea64_set_scratchpage_pa(mmu, 0, src); 1117 moea64_set_scratchpage_pa(mmu, 1, dst); 1118 1119 kcopy((void *)moea64_scratchpage_va[0], 1120 (void *)moea64_scratchpage_va[1], PAGE_SIZE); 1121 1122 mtx_unlock(&moea64_scratchpage_mtx); 1123 } 1124} 1125 1126void 1127moea64_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) 1128{ 1129 vm_offset_t pa = VM_PAGE_TO_PHYS(m); 1130 1131 if (size + off > PAGE_SIZE) 1132 panic("moea64_zero_page: size + off > PAGE_SIZE"); 1133 1134 if (hw_direct_map) { 1135 bzero((caddr_t)pa + off, size); 1136 } else { 1137 mtx_lock(&moea64_scratchpage_mtx); 1138 moea64_set_scratchpage_pa(mmu, 0, pa); 1139 bzero((caddr_t)moea64_scratchpage_va[0] + off, size); 1140 mtx_unlock(&moea64_scratchpage_mtx); 1141 } 1142} 1143 1144/* 1145 * Zero a page of physical memory by temporarily mapping it 1146 */ 1147void 1148moea64_zero_page(mmu_t mmu, vm_page_t m) 1149{ 1150 vm_offset_t pa = VM_PAGE_TO_PHYS(m); 1151 vm_offset_t va, off; 1152 1153 if (!hw_direct_map) { 1154 mtx_lock(&moea64_scratchpage_mtx); 1155 1156 moea64_set_scratchpage_pa(mmu, 0, pa); 1157 va = moea64_scratchpage_va[0]; 1158 } else { 1159 va = pa; 1160 } 1161 1162 for (off = 0; off < PAGE_SIZE; off += cacheline_size) 1163 __asm __volatile("dcbz 0,%0" :: "r"(va + off)); 1164 1165 if (!hw_direct_map) 1166 mtx_unlock(&moea64_scratchpage_mtx); 1167} 1168 1169void 1170moea64_zero_page_idle(mmu_t mmu, vm_page_t m) 1171{ 1172 1173 moea64_zero_page(mmu, m); 1174} 1175 1176/* 1177 * Map the given physical page at the specified virtual address in the 1178 * target pmap with the protection requested. If specified the page 1179 * will be wired down. 1180 */ 1181void 1182moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 1183 vm_prot_t prot, boolean_t wired) 1184{ 1185 1186 vm_page_lock_queues(); 1187 PMAP_LOCK(pmap); 1188 moea64_enter_locked(mmu, pmap, va, m, prot, wired); 1189 vm_page_unlock_queues(); 1190 PMAP_UNLOCK(pmap); 1191} 1192 1193/* 1194 * Map the given physical page at the specified virtual address in the 1195 * target pmap with the protection requested. If specified the page 1196 * will be wired down. 1197 * 1198 * The page queues and pmap must be locked. 1199 */ 1200 1201static void 1202moea64_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 1203 vm_prot_t prot, boolean_t wired) 1204{ 1205 struct pvo_head *pvo_head; 1206 uma_zone_t zone; 1207 vm_page_t pg; 1208 uint64_t pte_lo; 1209 u_int pvo_flags; 1210 int error; 1211 1212 if (!moea64_initialized) { 1213 pvo_head = &moea64_pvo_kunmanaged; 1214 pg = NULL; 1215 zone = moea64_upvo_zone; 1216 pvo_flags = 0; 1217 } else { 1218 pvo_head = vm_page_to_pvoh(m); 1219 pg = m; 1220 zone = moea64_mpvo_zone; 1221 pvo_flags = PVO_MANAGED; 1222 } 1223 1224 if (pmap_bootstrapped) 1225 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1226 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1227 KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 || 1228 VM_OBJECT_LOCKED(m->object), 1229 ("moea64_enter_locked: page %p is not busy", m)); 1230 1231 /* XXX change the pvo head for fake pages */ 1232 if ((m->oflags & VPO_UNMANAGED) != 0) { 1233 pvo_flags &= ~PVO_MANAGED; 1234 pvo_head = &moea64_pvo_kunmanaged; 1235 zone = moea64_upvo_zone; 1236 } 1237 1238 pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m)); 1239 1240 if (prot & VM_PROT_WRITE) { 1241 pte_lo |= LPTE_BW; 1242 if (pmap_bootstrapped && 1243 (m->oflags & VPO_UNMANAGED) == 0) 1244 vm_page_aflag_set(m, PGA_WRITEABLE); 1245 } else 1246 pte_lo |= LPTE_BR; 1247 1248 if ((prot & VM_PROT_EXECUTE) == 0) 1249 pte_lo |= LPTE_NOEXEC; 1250 1251 if (wired) 1252 pvo_flags |= PVO_WIRED; 1253 1254 error = moea64_pvo_enter(mmu, pmap, zone, pvo_head, va, 1255 VM_PAGE_TO_PHYS(m), pte_lo, pvo_flags); 1256 1257 /* 1258 * Flush the page from the instruction cache if this page is 1259 * mapped executable and cacheable. 1260 */ 1261 if ((pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) 1262 moea64_syncicache(mmu, pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE); 1263} 1264 1265static void 1266moea64_syncicache(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t pa, 1267 vm_size_t sz) 1268{ 1269 1270 /* 1271 * This is much trickier than on older systems because 1272 * we can't sync the icache on physical addresses directly 1273 * without a direct map. Instead we check a couple of cases 1274 * where the memory is already mapped in and, failing that, 1275 * use the same trick we use for page zeroing to create 1276 * a temporary mapping for this physical address. 1277 */ 1278 1279 if (!pmap_bootstrapped) { 1280 /* 1281 * If PMAP is not bootstrapped, we are likely to be 1282 * in real mode. 1283 */ 1284 __syncicache((void *)pa, sz); 1285 } else if (pmap == kernel_pmap) { 1286 __syncicache((void *)va, sz); 1287 } else if (hw_direct_map) { 1288 __syncicache((void *)pa, sz); 1289 } else { 1290 /* Use the scratch page to set up a temp mapping */ 1291 1292 mtx_lock(&moea64_scratchpage_mtx); 1293 1294 moea64_set_scratchpage_pa(mmu, 1, pa & ~ADDR_POFF); 1295 __syncicache((void *)(moea64_scratchpage_va[1] + 1296 (va & ADDR_POFF)), sz); 1297 1298 mtx_unlock(&moea64_scratchpage_mtx); 1299 } 1300} 1301 1302/* 1303 * Maps a sequence of resident pages belonging to the same object. 1304 * The sequence begins with the given page m_start. This page is 1305 * mapped at the given virtual address start. Each subsequent page is 1306 * mapped at a virtual address that is offset from start by the same 1307 * amount as the page is offset from m_start within the object. The 1308 * last page in the sequence is the page with the largest offset from 1309 * m_start that can be mapped at a virtual address less than the given 1310 * virtual address end. Not every virtual page between start and end 1311 * is mapped; only those for which a resident page exists with the 1312 * corresponding offset from m_start are mapped. 1313 */ 1314void 1315moea64_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end, 1316 vm_page_t m_start, vm_prot_t prot) 1317{ 1318 vm_page_t m; 1319 vm_pindex_t diff, psize; 1320 1321 psize = atop(end - start); 1322 m = m_start; 1323 vm_page_lock_queues(); 1324 PMAP_LOCK(pm); 1325 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 1326 moea64_enter_locked(mmu, pm, start + ptoa(diff), m, prot & 1327 (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); 1328 m = TAILQ_NEXT(m, listq); 1329 } 1330 vm_page_unlock_queues(); 1331 PMAP_UNLOCK(pm); 1332} 1333 1334void 1335moea64_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m, 1336 vm_prot_t prot) 1337{ 1338 1339 vm_page_lock_queues(); 1340 PMAP_LOCK(pm); 1341 moea64_enter_locked(mmu, pm, va, m, 1342 prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); 1343 vm_page_unlock_queues(); 1344 PMAP_UNLOCK(pm); 1345} 1346 1347vm_paddr_t 1348moea64_extract(mmu_t mmu, pmap_t pm, vm_offset_t va) 1349{ 1350 struct pvo_entry *pvo; 1351 vm_paddr_t pa; 1352 1353 PMAP_LOCK(pm); 1354 pvo = moea64_pvo_find_va(pm, va); 1355 if (pvo == NULL) 1356 pa = 0; 1357 else 1358 pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) | 1359 (va - PVO_VADDR(pvo)); 1360 PMAP_UNLOCK(pm); 1361 return (pa); 1362} 1363 1364/* 1365 * Atomically extract and hold the physical page with the given 1366 * pmap and virtual address pair if that mapping permits the given 1367 * protection. 1368 */ 1369vm_page_t 1370moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1371{ 1372 struct pvo_entry *pvo; 1373 vm_page_t m; 1374 vm_paddr_t pa; 1375 1376 m = NULL; 1377 pa = 0; 1378 PMAP_LOCK(pmap); 1379retry: 1380 pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); 1381 if (pvo != NULL && (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) && 1382 ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) == LPTE_RW || 1383 (prot & VM_PROT_WRITE) == 0)) { 1384 if (vm_page_pa_tryrelock(pmap, 1385 pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN, &pa)) 1386 goto retry; 1387 m = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN); 1388 vm_page_hold(m); 1389 } 1390 PA_UNLOCK_COND(pa); 1391 PMAP_UNLOCK(pmap); 1392 return (m); 1393} 1394 1395static mmu_t installed_mmu; 1396 1397static void * 1398moea64_uma_page_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 1399{ 1400 /* 1401 * This entire routine is a horrible hack to avoid bothering kmem 1402 * for new KVA addresses. Because this can get called from inside 1403 * kmem allocation routines, calling kmem for a new address here 1404 * can lead to multiply locking non-recursive mutexes. 1405 */ 1406 vm_offset_t va; 1407 1408 vm_page_t m; 1409 int pflags, needed_lock; 1410 1411 *flags = UMA_SLAB_PRIV; 1412 needed_lock = !PMAP_LOCKED(kernel_pmap); 1413 1414 if (needed_lock) 1415 PMAP_LOCK(kernel_pmap); 1416 1417 if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT) 1418 pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED; 1419 else 1420 pflags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED; 1421 if (wait & M_ZERO) 1422 pflags |= VM_ALLOC_ZERO; 1423 1424 for (;;) { 1425 m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ); 1426 if (m == NULL) { 1427 if (wait & M_NOWAIT) 1428 return (NULL); 1429 VM_WAIT; 1430 } else 1431 break; 1432 } 1433 1434 va = VM_PAGE_TO_PHYS(m); 1435 1436 moea64_pvo_enter(installed_mmu, kernel_pmap, moea64_upvo_zone, 1437 &moea64_pvo_kunmanaged, va, VM_PAGE_TO_PHYS(m), LPTE_M, 1438 PVO_WIRED | PVO_BOOTSTRAP); 1439 1440 if (needed_lock) 1441 PMAP_UNLOCK(kernel_pmap); 1442 1443 if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) 1444 bzero((void *)va, PAGE_SIZE); 1445 1446 return (void *)va; 1447} 1448 1449extern int elf32_nxstack; 1450 1451void 1452moea64_init(mmu_t mmu) 1453{ 1454 1455 CTR0(KTR_PMAP, "moea64_init"); 1456 1457 moea64_upvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry), 1458 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 1459 UMA_ZONE_VM | UMA_ZONE_NOFREE); 1460 moea64_mpvo_zone = uma_zcreate("MPVO entry", sizeof(struct pvo_entry), 1461 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 1462 UMA_ZONE_VM | UMA_ZONE_NOFREE); 1463 1464 if (!hw_direct_map) { 1465 installed_mmu = mmu; 1466 uma_zone_set_allocf(moea64_upvo_zone,moea64_uma_page_alloc); 1467 uma_zone_set_allocf(moea64_mpvo_zone,moea64_uma_page_alloc); 1468 } 1469 1470#ifdef COMPAT_FREEBSD32 1471 elf32_nxstack = 1; 1472#endif 1473 1474 moea64_initialized = TRUE; 1475} 1476 1477boolean_t 1478moea64_is_referenced(mmu_t mmu, vm_page_t m) 1479{ 1480 1481 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1482 ("moea64_is_referenced: page %p is not managed", m)); 1483 return (moea64_query_bit(mmu, m, PTE_REF)); 1484} 1485 1486boolean_t 1487moea64_is_modified(mmu_t mmu, vm_page_t m) 1488{ 1489 1490 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1491 ("moea64_is_modified: page %p is not managed", m)); 1492 1493 /* 1494 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be 1495 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 1496 * is clear, no PTEs can have LPTE_CHG set. 1497 */ 1498 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1499 if ((m->oflags & VPO_BUSY) == 0 && 1500 (m->aflags & PGA_WRITEABLE) == 0) 1501 return (FALSE); 1502 return (moea64_query_bit(mmu, m, LPTE_CHG)); 1503} 1504 1505boolean_t 1506moea64_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1507{ 1508 struct pvo_entry *pvo; 1509 boolean_t rv; 1510 1511 PMAP_LOCK(pmap); 1512 pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); 1513 rv = pvo == NULL || (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0; 1514 PMAP_UNLOCK(pmap); 1515 return (rv); 1516} 1517 1518void 1519moea64_clear_reference(mmu_t mmu, vm_page_t m) 1520{ 1521 1522 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1523 ("moea64_clear_reference: page %p is not managed", m)); 1524 moea64_clear_bit(mmu, m, LPTE_REF); 1525} 1526 1527void 1528moea64_clear_modify(mmu_t mmu, vm_page_t m) 1529{ 1530 1531 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1532 ("moea64_clear_modify: page %p is not managed", m)); 1533 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1534 KASSERT((m->oflags & VPO_BUSY) == 0, 1535 ("moea64_clear_modify: page %p is busy", m)); 1536 1537 /* 1538 * If the page is not PGA_WRITEABLE, then no PTEs can have LPTE_CHG 1539 * set. If the object containing the page is locked and the page is 1540 * not VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. 1541 */ 1542 if ((m->aflags & PGA_WRITEABLE) == 0) 1543 return; 1544 moea64_clear_bit(mmu, m, LPTE_CHG); 1545} 1546 1547/* 1548 * Clear the write and modified bits in each of the given page's mappings. 1549 */ 1550void 1551moea64_remove_write(mmu_t mmu, vm_page_t m) 1552{ 1553 struct pvo_entry *pvo; 1554 uintptr_t pt; 1555 pmap_t pmap; 1556 uint64_t lo; 1557 1558 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1559 ("moea64_remove_write: page %p is not managed", m)); 1560 1561 /* 1562 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by 1563 * another thread while the object is locked. Thus, if PGA_WRITEABLE 1564 * is clear, no page table entries need updating. 1565 */ 1566 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1567 if ((m->oflags & VPO_BUSY) == 0 && 1568 (m->aflags & PGA_WRITEABLE) == 0) 1569 return; 1570 vm_page_lock_queues(); 1571 lo = moea64_attr_fetch(m); 1572 powerpc_sync(); 1573 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 1574 pmap = pvo->pvo_pmap; 1575 PMAP_LOCK(pmap); 1576 LOCK_TABLE(); 1577 if ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) != LPTE_BR) { 1578 pt = MOEA64_PVO_TO_PTE(mmu, pvo); 1579 pvo->pvo_pte.lpte.pte_lo &= ~LPTE_PP; 1580 pvo->pvo_pte.lpte.pte_lo |= LPTE_BR; 1581 if (pt != -1) { 1582 MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte); 1583 lo |= pvo->pvo_pte.lpte.pte_lo; 1584 pvo->pvo_pte.lpte.pte_lo &= ~LPTE_CHG; 1585 MOEA64_PTE_CHANGE(mmu, pt, 1586 &pvo->pvo_pte.lpte, pvo->pvo_vpn); 1587 if (pvo->pvo_pmap == kernel_pmap) 1588 isync(); 1589 } 1590 } 1591 UNLOCK_TABLE(); 1592 PMAP_UNLOCK(pmap); 1593 } 1594 if ((lo & LPTE_CHG) != 0) { 1595 moea64_attr_clear(m, LPTE_CHG); 1596 vm_page_dirty(m); 1597 } 1598 vm_page_aflag_clear(m, PGA_WRITEABLE); 1599 vm_page_unlock_queues(); 1600} 1601 1602/* 1603 * moea64_ts_referenced: 1604 * 1605 * Return a count of reference bits for a page, clearing those bits. 1606 * It is not necessary for every reference bit to be cleared, but it 1607 * is necessary that 0 only be returned when there are truly no 1608 * reference bits set. 1609 * 1610 * XXX: The exact number of bits to check and clear is a matter that 1611 * should be tested and standardized at some point in the future for 1612 * optimal aging of shared pages. 1613 */ 1614boolean_t 1615moea64_ts_referenced(mmu_t mmu, vm_page_t m) 1616{ 1617 1618 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1619 ("moea64_ts_referenced: page %p is not managed", m)); 1620 return (moea64_clear_bit(mmu, m, LPTE_REF)); 1621} 1622 1623/* 1624 * Modify the WIMG settings of all mappings for a page. 1625 */ 1626void 1627moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) 1628{ 1629 struct pvo_entry *pvo; 1630 struct pvo_head *pvo_head; 1631 uintptr_t pt; 1632 pmap_t pmap; 1633 uint64_t lo; 1634 1635 if ((m->oflags & VPO_UNMANAGED) != 0) { 1636 m->md.mdpg_cache_attrs = ma; 1637 return; 1638 } 1639 1640 vm_page_lock_queues(); 1641 pvo_head = vm_page_to_pvoh(m); 1642 lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma); 1643 LIST_FOREACH(pvo, pvo_head, pvo_vlink) { 1644 pmap = pvo->pvo_pmap; 1645 PMAP_LOCK(pmap); 1646 LOCK_TABLE(); 1647 pt = MOEA64_PVO_TO_PTE(mmu, pvo); 1648 pvo->pvo_pte.lpte.pte_lo &= ~LPTE_WIMG; 1649 pvo->pvo_pte.lpte.pte_lo |= lo; 1650 if (pt != -1) { 1651 MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, 1652 pvo->pvo_vpn); 1653 if (pvo->pvo_pmap == kernel_pmap) 1654 isync(); 1655 } 1656 UNLOCK_TABLE(); 1657 PMAP_UNLOCK(pmap); 1658 } 1659 m->md.mdpg_cache_attrs = ma; 1660 vm_page_unlock_queues(); 1661} 1662 1663/* 1664 * Map a wired page into kernel virtual address space. 1665 */ 1666void 1667moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_offset_t pa, vm_memattr_t ma) 1668{ 1669 uint64_t pte_lo; 1670 int error; 1671 1672 pte_lo = moea64_calc_wimg(pa, ma); 1673 1674 PMAP_LOCK(kernel_pmap); 1675 error = moea64_pvo_enter(mmu, kernel_pmap, moea64_upvo_zone, 1676 &moea64_pvo_kunmanaged, va, pa, pte_lo, PVO_WIRED); 1677 1678 if (error != 0 && error != ENOENT) 1679 panic("moea64_kenter: failed to enter va %#zx pa %#zx: %d", va, 1680 pa, error); 1681 1682 /* 1683 * Flush the memory from the instruction cache. 1684 */ 1685 if ((pte_lo & (LPTE_I | LPTE_G)) == 0) 1686 __syncicache((void *)va, PAGE_SIZE); 1687 PMAP_UNLOCK(kernel_pmap); 1688} 1689 1690void 1691moea64_kenter(mmu_t mmu, vm_offset_t va, vm_offset_t pa) 1692{ 1693 1694 moea64_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); 1695} 1696 1697/* 1698 * Extract the physical page address associated with the given kernel virtual 1699 * address. 1700 */ 1701vm_offset_t 1702moea64_kextract(mmu_t mmu, vm_offset_t va) 1703{ 1704 struct pvo_entry *pvo; 1705 vm_paddr_t pa; 1706 1707 /* 1708 * Shortcut the direct-mapped case when applicable. We never put 1709 * anything but 1:1 mappings below VM_MIN_KERNEL_ADDRESS. 1710 */ 1711 if (va < VM_MIN_KERNEL_ADDRESS) 1712 return (va); 1713 1714 PMAP_LOCK(kernel_pmap); 1715 pvo = moea64_pvo_find_va(kernel_pmap, va); 1716 KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR, 1717 va)); 1718 pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) | (va - PVO_VADDR(pvo)); 1719 PMAP_UNLOCK(kernel_pmap); 1720 return (pa); 1721} 1722 1723/* 1724 * Remove a wired page from kernel virtual address space. 1725 */ 1726void 1727moea64_kremove(mmu_t mmu, vm_offset_t va) 1728{ 1729 moea64_remove(mmu, kernel_pmap, va, va + PAGE_SIZE); 1730} 1731 1732/* 1733 * Map a range of physical addresses into kernel virtual address space. 1734 * 1735 * The value passed in *virt is a suggested virtual address for the mapping. 1736 * Architectures which can support a direct-mapped physical to virtual region 1737 * can return the appropriate address within that region, leaving '*virt' 1738 * unchanged. We cannot and therefore do not; *virt is updated with the 1739 * first usable address after the mapped region. 1740 */ 1741vm_offset_t 1742moea64_map(mmu_t mmu, vm_offset_t *virt, vm_offset_t pa_start, 1743 vm_offset_t pa_end, int prot) 1744{ 1745 vm_offset_t sva, va; 1746 1747 sva = *virt; 1748 va = sva; 1749 for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE) 1750 moea64_kenter(mmu, va, pa_start); 1751 *virt = va; 1752 1753 return (sva); 1754} 1755 1756/* 1757 * Returns true if the pmap's pv is one of the first 1758 * 16 pvs linked to from this page. This count may 1759 * be changed upwards or downwards in the future; it 1760 * is only necessary that true be returned for a small 1761 * subset of pmaps for proper page aging. 1762 */ 1763boolean_t 1764moea64_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) 1765{ 1766 int loops; 1767 struct pvo_entry *pvo; 1768 boolean_t rv; 1769 1770 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1771 ("moea64_page_exists_quick: page %p is not managed", m)); 1772 loops = 0; 1773 rv = FALSE; 1774 vm_page_lock_queues(); 1775 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 1776 if (pvo->pvo_pmap == pmap) { 1777 rv = TRUE; 1778 break; 1779 } 1780 if (++loops >= 16) 1781 break; 1782 } 1783 vm_page_unlock_queues(); 1784 return (rv); 1785} 1786 1787/* 1788 * Return the number of managed mappings to the given physical page 1789 * that are wired. 1790 */ 1791int 1792moea64_page_wired_mappings(mmu_t mmu, vm_page_t m) 1793{ 1794 struct pvo_entry *pvo; 1795 int count; 1796 1797 count = 0; 1798 if ((m->oflags & VPO_UNMANAGED) != 0) 1799 return (count); 1800 vm_page_lock_queues(); 1801 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) 1802 if ((pvo->pvo_vaddr & PVO_WIRED) != 0) 1803 count++; 1804 vm_page_unlock_queues(); 1805 return (count); 1806} 1807 1808static uintptr_t moea64_vsidcontext; 1809 1810uintptr_t 1811moea64_get_unique_vsid(void) { 1812 u_int entropy; 1813 register_t hash; 1814 uint32_t mask; 1815 int i; 1816 1817 entropy = 0; 1818 __asm __volatile("mftb %0" : "=r"(entropy)); 1819 1820 mtx_lock(&moea64_slb_mutex); 1821 for (i = 0; i < NVSIDS; i += VSID_NBPW) { 1822 u_int n; 1823 1824 /* 1825 * Create a new value by mutiplying by a prime and adding in 1826 * entropy from the timebase register. This is to make the 1827 * VSID more random so that the PT hash function collides 1828 * less often. (Note that the prime casues gcc to do shifts 1829 * instead of a multiply.) 1830 */ 1831 moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy; 1832 hash = moea64_vsidcontext & (NVSIDS - 1); 1833 if (hash == 0) /* 0 is special, avoid it */ 1834 continue; 1835 n = hash >> 5; 1836 mask = 1 << (hash & (VSID_NBPW - 1)); 1837 hash = (moea64_vsidcontext & VSID_HASHMASK); 1838 if (moea64_vsid_bitmap[n] & mask) { /* collision? */ 1839 /* anything free in this bucket? */ 1840 if (moea64_vsid_bitmap[n] == 0xffffffff) { 1841 entropy = (moea64_vsidcontext >> 20); 1842 continue; 1843 } 1844 i = ffs(~moea64_vsid_bitmap[n]) - 1; 1845 mask = 1 << i; 1846 hash &= VSID_HASHMASK & ~(VSID_NBPW - 1); 1847 hash |= i; 1848 } 1849 KASSERT(!(moea64_vsid_bitmap[n] & mask), 1850 ("Allocating in-use VSID %#zx\n", hash)); 1851 moea64_vsid_bitmap[n] |= mask; 1852 mtx_unlock(&moea64_slb_mutex); 1853 return (hash); 1854 } 1855 1856 mtx_unlock(&moea64_slb_mutex); 1857 panic("%s: out of segments",__func__); 1858} 1859 1860#ifdef __powerpc64__ 1861void 1862moea64_pinit(mmu_t mmu, pmap_t pmap) 1863{ 1864 PMAP_LOCK_INIT(pmap); 1865 LIST_INIT(&pmap->pmap_pvo); 1866 1867 pmap->pm_slb_tree_root = slb_alloc_tree(); 1868 pmap->pm_slb = slb_alloc_user_cache(); 1869 pmap->pm_slb_len = 0; 1870} 1871#else 1872void 1873moea64_pinit(mmu_t mmu, pmap_t pmap) 1874{ 1875 int i; 1876 uint32_t hash; 1877 1878 PMAP_LOCK_INIT(pmap); 1879 LIST_INIT(&pmap->pmap_pvo); 1880 1881 if (pmap_bootstrapped) 1882 pmap->pmap_phys = (pmap_t)moea64_kextract(mmu, 1883 (vm_offset_t)pmap); 1884 else 1885 pmap->pmap_phys = pmap; 1886 1887 /* 1888 * Allocate some segment registers for this pmap. 1889 */ 1890 hash = moea64_get_unique_vsid(); 1891 1892 for (i = 0; i < 16; i++) 1893 pmap->pm_sr[i] = VSID_MAKE(i, hash); 1894 1895 KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0")); 1896} 1897#endif 1898 1899/* 1900 * Initialize the pmap associated with process 0. 1901 */ 1902void 1903moea64_pinit0(mmu_t mmu, pmap_t pm) 1904{ 1905 moea64_pinit(mmu, pm); 1906 bzero(&pm->pm_stats, sizeof(pm->pm_stats)); 1907} 1908 1909/* 1910 * Set the physical protection on the specified range of this map as requested. 1911 */ 1912static void 1913moea64_pvo_protect(mmu_t mmu, pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot) 1914{ 1915 uintptr_t pt; 1916 1917 /* 1918 * Grab the PTE pointer before we diddle with the cached PTE 1919 * copy. 1920 */ 1921 LOCK_TABLE(); 1922 pt = MOEA64_PVO_TO_PTE(mmu, pvo); 1923 1924 /* 1925 * Change the protection of the page. 1926 */ 1927 pvo->pvo_pte.lpte.pte_lo &= ~LPTE_PP; 1928 pvo->pvo_pte.lpte.pte_lo |= LPTE_BR; 1929 pvo->pvo_pte.lpte.pte_lo &= ~LPTE_NOEXEC; 1930 if ((prot & VM_PROT_EXECUTE) == 0) 1931 pvo->pvo_pte.lpte.pte_lo |= LPTE_NOEXEC; 1932 1933 /* 1934 * If the PVO is in the page table, update that pte as well. 1935 */ 1936 if (pt != -1) { 1937 MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, 1938 pvo->pvo_vpn); 1939 if ((pvo->pvo_pte.lpte.pte_lo & 1940 (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { 1941 moea64_syncicache(mmu, pm, PVO_VADDR(pvo), 1942 pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN, 1943 PAGE_SIZE); 1944 } 1945 } 1946 UNLOCK_TABLE(); 1947} 1948 1949void 1950moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva, 1951 vm_prot_t prot) 1952{ 1953 struct pvo_entry *pvo, *tpvo; 1954 1955 CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm, 1956 sva, eva, prot); 1957 1958 KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap, 1959 ("moea64_protect: non current pmap")); 1960 1961 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1962 moea64_remove(mmu, pm, sva, eva); 1963 return; 1964 } 1965 1966 vm_page_lock_queues(); 1967 PMAP_LOCK(pm); 1968 if ((eva - sva)/PAGE_SIZE < pm->pm_stats.resident_count) { 1969 for (; sva < eva; sva += PAGE_SIZE) { 1970 pvo = moea64_pvo_find_va(pm, sva); 1971 if (pvo != NULL) 1972 moea64_pvo_protect(mmu, pm, pvo, prot); 1973 } 1974 } else { 1975 LIST_FOREACH_SAFE(pvo, &pm->pmap_pvo, pvo_plink, tpvo) { 1976 if (PVO_VADDR(pvo) < sva || PVO_VADDR(pvo) >= eva) 1977 continue; 1978 moea64_pvo_protect(mmu, pm, pvo, prot); 1979 } 1980 } 1981 vm_page_unlock_queues(); 1982 PMAP_UNLOCK(pm); 1983} 1984 1985/* 1986 * Map a list of wired pages into kernel virtual address space. This is 1987 * intended for temporary mappings which do not need page modification or 1988 * references recorded. Existing mappings in the region are overwritten. 1989 */ 1990void 1991moea64_qenter(mmu_t mmu, vm_offset_t va, vm_page_t *m, int count) 1992{ 1993 while (count-- > 0) { 1994 moea64_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); 1995 va += PAGE_SIZE; 1996 m++; 1997 } 1998} 1999 2000/* 2001 * Remove page mappings from kernel virtual address space. Intended for 2002 * temporary mappings entered by moea64_qenter. 2003 */ 2004void 2005moea64_qremove(mmu_t mmu, vm_offset_t va, int count) 2006{ 2007 while (count-- > 0) { 2008 moea64_kremove(mmu, va); 2009 va += PAGE_SIZE; 2010 } 2011} 2012 2013void 2014moea64_release_vsid(uint64_t vsid) 2015{ 2016 int idx, mask; 2017 2018 mtx_lock(&moea64_slb_mutex); 2019 idx = vsid & (NVSIDS-1); 2020 mask = 1 << (idx % VSID_NBPW); 2021 idx /= VSID_NBPW; 2022 KASSERT(moea64_vsid_bitmap[idx] & mask, 2023 ("Freeing unallocated VSID %#jx", vsid)); 2024 moea64_vsid_bitmap[idx] &= ~mask; 2025 mtx_unlock(&moea64_slb_mutex); 2026} 2027 2028 2029void 2030moea64_release(mmu_t mmu, pmap_t pmap) 2031{ 2032 2033 /* 2034 * Free segment registers' VSIDs 2035 */ 2036 #ifdef __powerpc64__ 2037 slb_free_tree(pmap); 2038 slb_free_user_cache(pmap->pm_slb); 2039 #else 2040 KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0")); 2041 2042 moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0])); 2043 #endif 2044 2045 PMAP_LOCK_DESTROY(pmap); 2046} 2047 2048/* 2049 * Remove the given range of addresses from the specified map. 2050 */ 2051void 2052moea64_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) 2053{ 2054 struct pvo_entry *pvo, *tpvo; 2055 2056 /* 2057 * Perform an unsynchronized read. This is, however, safe. 2058 */ 2059 if (pm->pm_stats.resident_count == 0) 2060 return; 2061 2062 vm_page_lock_queues(); 2063 PMAP_LOCK(pm); 2064 if ((eva - sva)/PAGE_SIZE < pm->pm_stats.resident_count) { 2065 for (; sva < eva; sva += PAGE_SIZE) { 2066 pvo = moea64_pvo_find_va(pm, sva); 2067 if (pvo != NULL) 2068 moea64_pvo_remove(mmu, pvo); 2069 } 2070 } else { 2071 LIST_FOREACH_SAFE(pvo, &pm->pmap_pvo, pvo_plink, tpvo) { 2072 if (PVO_VADDR(pvo) < sva || PVO_VADDR(pvo) >= eva) 2073 continue; 2074 moea64_pvo_remove(mmu, pvo); 2075 } 2076 } 2077 vm_page_unlock_queues(); 2078 PMAP_UNLOCK(pm); 2079} 2080 2081/* 2082 * Remove physical page from all pmaps in which it resides. moea64_pvo_remove() 2083 * will reflect changes in pte's back to the vm_page. 2084 */ 2085void 2086moea64_remove_all(mmu_t mmu, vm_page_t m) 2087{ 2088 struct pvo_head *pvo_head; 2089 struct pvo_entry *pvo, *next_pvo; 2090 pmap_t pmap; 2091 2092 vm_page_lock_queues(); 2093 pvo_head = vm_page_to_pvoh(m); 2094 for (pvo = LIST_FIRST(pvo_head); pvo != NULL; pvo = next_pvo) { 2095 next_pvo = LIST_NEXT(pvo, pvo_vlink); 2096 2097 pmap = pvo->pvo_pmap; 2098 PMAP_LOCK(pmap); 2099 moea64_pvo_remove(mmu, pvo); 2100 PMAP_UNLOCK(pmap); 2101 } 2102 if ((m->aflags & PGA_WRITEABLE) && moea64_is_modified(mmu, m)) { 2103 moea64_attr_clear(m, LPTE_CHG); 2104 vm_page_dirty(m); 2105 } 2106 vm_page_aflag_clear(m, PGA_WRITEABLE); 2107 vm_page_unlock_queues(); 2108} 2109 2110/* 2111 * Allocate a physical page of memory directly from the phys_avail map. 2112 * Can only be called from moea64_bootstrap before avail start and end are 2113 * calculated. 2114 */ 2115vm_offset_t 2116moea64_bootstrap_alloc(vm_size_t size, u_int align) 2117{ 2118 vm_offset_t s, e; 2119 int i, j; 2120 2121 size = round_page(size); 2122 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 2123 if (align != 0) 2124 s = (phys_avail[i] + align - 1) & ~(align - 1); 2125 else 2126 s = phys_avail[i]; 2127 e = s + size; 2128 2129 if (s < phys_avail[i] || e > phys_avail[i + 1]) 2130 continue; 2131 2132 if (s + size > platform_real_maxaddr()) 2133 continue; 2134 2135 if (s == phys_avail[i]) { 2136 phys_avail[i] += size; 2137 } else if (e == phys_avail[i + 1]) { 2138 phys_avail[i + 1] -= size; 2139 } else { 2140 for (j = phys_avail_count * 2; j > i; j -= 2) { 2141 phys_avail[j] = phys_avail[j - 2]; 2142 phys_avail[j + 1] = phys_avail[j - 1]; 2143 } 2144 2145 phys_avail[i + 3] = phys_avail[i + 1]; 2146 phys_avail[i + 1] = s; 2147 phys_avail[i + 2] = e; 2148 phys_avail_count++; 2149 } 2150 2151 return (s); 2152 } 2153 panic("moea64_bootstrap_alloc: could not allocate memory"); 2154} 2155 2156static int 2157moea64_pvo_enter(mmu_t mmu, pmap_t pm, uma_zone_t zone, 2158 struct pvo_head *pvo_head, vm_offset_t va, vm_offset_t pa, 2159 uint64_t pte_lo, int flags) 2160{ 2161 struct pvo_entry *pvo; 2162 uint64_t vsid; 2163 int first; 2164 u_int ptegidx; 2165 int i; 2166 int bootstrap; 2167 2168 /* 2169 * One nasty thing that can happen here is that the UMA calls to 2170 * allocate new PVOs need to map more memory, which calls pvo_enter(), 2171 * which calls UMA... 2172 * 2173 * We break the loop by detecting recursion and allocating out of 2174 * the bootstrap pool. 2175 */ 2176 2177 first = 0; 2178 bootstrap = (flags & PVO_BOOTSTRAP); 2179 2180 if (!moea64_initialized) 2181 bootstrap = 1; 2182 2183 /* 2184 * Compute the PTE Group index. 2185 */ 2186 va &= ~ADDR_POFF; 2187 vsid = va_to_vsid(pm, va); 2188 ptegidx = va_to_pteg(vsid, va, flags & PVO_LARGE); 2189 2190 /* 2191 * Remove any existing mapping for this page. Reuse the pvo entry if 2192 * there is a mapping. 2193 */ 2194 LOCK_TABLE(); 2195 2196 moea64_pvo_enter_calls++; 2197 2198 LIST_FOREACH(pvo, &moea64_pvo_table[ptegidx], pvo_olink) { 2199 if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) { 2200 if ((pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) == pa && 2201 (pvo->pvo_pte.lpte.pte_lo & (LPTE_NOEXEC | LPTE_PP)) 2202 == (pte_lo & (LPTE_NOEXEC | LPTE_PP))) { 2203 if (!(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID)) { 2204 /* Re-insert if spilled */ 2205 i = MOEA64_PTE_INSERT(mmu, ptegidx, 2206 &pvo->pvo_pte.lpte); 2207 if (i >= 0) 2208 PVO_PTEGIDX_SET(pvo, i); 2209 moea64_pte_overflow--; 2210 } 2211 UNLOCK_TABLE(); 2212 return (0); 2213 } 2214 moea64_pvo_remove(mmu, pvo); 2215 break; 2216 } 2217 } 2218 2219 /* 2220 * If we aren't overwriting a mapping, try to allocate. 2221 */ 2222 if (bootstrap) { 2223 if (moea64_bpvo_pool_index >= BPVO_POOL_SIZE) { 2224 panic("moea64_enter: bpvo pool exhausted, %d, %d, %zd", 2225 moea64_bpvo_pool_index, BPVO_POOL_SIZE, 2226 BPVO_POOL_SIZE * sizeof(struct pvo_entry)); 2227 } 2228 pvo = &moea64_bpvo_pool[moea64_bpvo_pool_index]; 2229 moea64_bpvo_pool_index++; 2230 bootstrap = 1; 2231 } else { 2232 /* 2233 * Note: drop the table lock around the UMA allocation in 2234 * case the UMA allocator needs to manipulate the page 2235 * table. The mapping we are working with is already 2236 * protected by the PMAP lock. 2237 */ 2238 UNLOCK_TABLE(); 2239 pvo = uma_zalloc(zone, M_NOWAIT); 2240 LOCK_TABLE(); 2241 } 2242 2243 if (pvo == NULL) { 2244 UNLOCK_TABLE(); 2245 return (ENOMEM); 2246 } 2247 2248 moea64_pvo_entries++; 2249 pvo->pvo_vaddr = va; 2250 pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT) 2251 | (vsid << 16); 2252 pvo->pvo_pmap = pm; 2253 LIST_INSERT_HEAD(&moea64_pvo_table[ptegidx], pvo, pvo_olink); 2254 pvo->pvo_vaddr &= ~ADDR_POFF; 2255 2256 if (flags & PVO_WIRED) 2257 pvo->pvo_vaddr |= PVO_WIRED; 2258 if (pvo_head != &moea64_pvo_kunmanaged) 2259 pvo->pvo_vaddr |= PVO_MANAGED; 2260 if (bootstrap) 2261 pvo->pvo_vaddr |= PVO_BOOTSTRAP; 2262 if (flags & PVO_LARGE) 2263 pvo->pvo_vaddr |= PVO_LARGE; 2264 2265 moea64_pte_create(&pvo->pvo_pte.lpte, vsid, va, 2266 (uint64_t)(pa) | pte_lo, flags); 2267 2268 /* 2269 * Add to pmap list 2270 */ 2271 LIST_INSERT_HEAD(&pm->pmap_pvo, pvo, pvo_plink); 2272 2273 /* 2274 * Remember if the list was empty and therefore will be the first 2275 * item. 2276 */ 2277 if (LIST_FIRST(pvo_head) == NULL) 2278 first = 1; 2279 LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink); 2280 2281 if (pvo->pvo_vaddr & PVO_WIRED) { 2282 pvo->pvo_pte.lpte.pte_hi |= LPTE_WIRED; 2283 pm->pm_stats.wired_count++; 2284 } 2285 pm->pm_stats.resident_count++; 2286 2287 /* 2288 * We hope this succeeds but it isn't required. 2289 */ 2290 i = MOEA64_PTE_INSERT(mmu, ptegidx, &pvo->pvo_pte.lpte); 2291 if (i >= 0) { 2292 PVO_PTEGIDX_SET(pvo, i); 2293 } else { 2294 panic("moea64_pvo_enter: overflow"); 2295 moea64_pte_overflow++; 2296 } 2297 2298 if (pm == kernel_pmap) 2299 isync(); 2300 2301 UNLOCK_TABLE(); 2302 2303#ifdef __powerpc64__ 2304 /* 2305 * Make sure all our bootstrap mappings are in the SLB as soon 2306 * as virtual memory is switched on. 2307 */ 2308 if (!pmap_bootstrapped) 2309 moea64_bootstrap_slb_prefault(va, flags & PVO_LARGE); 2310#endif 2311 2312 return (first ? ENOENT : 0); 2313} 2314 2315static void 2316moea64_pvo_remove(mmu_t mmu, struct pvo_entry *pvo) 2317{ 2318 uintptr_t pt; 2319 2320 /* 2321 * If there is an active pte entry, we need to deactivate it (and 2322 * save the ref & cfg bits). 2323 */ 2324 LOCK_TABLE(); 2325 pt = MOEA64_PVO_TO_PTE(mmu, pvo); 2326 if (pt != -1) { 2327 MOEA64_PTE_UNSET(mmu, pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn); 2328 PVO_PTEGIDX_CLR(pvo); 2329 } else { 2330 moea64_pte_overflow--; 2331 } 2332 2333 /* 2334 * Update our statistics. 2335 */ 2336 pvo->pvo_pmap->pm_stats.resident_count--; 2337 if (pvo->pvo_vaddr & PVO_WIRED) 2338 pvo->pvo_pmap->pm_stats.wired_count--; 2339 2340 /* 2341 * Save the REF/CHG bits into their cache if the page is managed. 2342 */ 2343 if ((pvo->pvo_vaddr & PVO_MANAGED) == PVO_MANAGED) { 2344 struct vm_page *pg; 2345 2346 pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN); 2347 if (pg != NULL) { 2348 moea64_attr_save(pg, pvo->pvo_pte.lpte.pte_lo & 2349 (LPTE_REF | LPTE_CHG)); 2350 } 2351 } 2352 2353 /* 2354 * Remove this PVO from the PV and pmap lists. 2355 */ 2356 LIST_REMOVE(pvo, pvo_vlink); 2357 LIST_REMOVE(pvo, pvo_plink); 2358 2359 /* 2360 * Remove this from the overflow list and return it to the pool 2361 * if we aren't going to reuse it. 2362 */ 2363 LIST_REMOVE(pvo, pvo_olink); 2364 2365 moea64_pvo_entries--; 2366 moea64_pvo_remove_calls++; 2367 2368 UNLOCK_TABLE(); 2369 2370 if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP)) 2371 uma_zfree((pvo->pvo_vaddr & PVO_MANAGED) ? moea64_mpvo_zone : 2372 moea64_upvo_zone, pvo); 2373} 2374 2375static struct pvo_entry * 2376moea64_pvo_find_va(pmap_t pm, vm_offset_t va) 2377{ 2378 struct pvo_entry *pvo; 2379 int ptegidx; 2380 uint64_t vsid; 2381 #ifdef __powerpc64__ 2382 uint64_t slbv; 2383 2384 if (pm == kernel_pmap) { 2385 slbv = kernel_va_to_slbv(va); 2386 } else { 2387 struct slb *slb; 2388 slb = user_va_to_slb_entry(pm, va); 2389 /* The page is not mapped if the segment isn't */ 2390 if (slb == NULL) 2391 return NULL; 2392 slbv = slb->slbv; 2393 } 2394 2395 vsid = (slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT; 2396 if (slbv & SLBV_L) 2397 va &= ~moea64_large_page_mask; 2398 else 2399 va &= ~ADDR_POFF; 2400 ptegidx = va_to_pteg(vsid, va, slbv & SLBV_L); 2401 #else 2402 va &= ~ADDR_POFF; 2403 vsid = va_to_vsid(pm, va); 2404 ptegidx = va_to_pteg(vsid, va, 0); 2405 #endif 2406 2407 LOCK_TABLE(); 2408 LIST_FOREACH(pvo, &moea64_pvo_table[ptegidx], pvo_olink) { 2409 if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) 2410 break; 2411 } 2412 UNLOCK_TABLE(); 2413 2414 return (pvo); 2415} 2416 2417static boolean_t 2418moea64_query_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit) 2419{ 2420 struct pvo_entry *pvo; 2421 uintptr_t pt; 2422 2423 if (moea64_attr_fetch(m) & ptebit) 2424 return (TRUE); 2425 2426 vm_page_lock_queues(); 2427 2428 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2429 2430 /* 2431 * See if we saved the bit off. If so, cache it and return 2432 * success. 2433 */ 2434 if (pvo->pvo_pte.lpte.pte_lo & ptebit) { 2435 moea64_attr_save(m, ptebit); 2436 vm_page_unlock_queues(); 2437 return (TRUE); 2438 } 2439 } 2440 2441 /* 2442 * No luck, now go through the hard part of looking at the PTEs 2443 * themselves. Sync so that any pending REF/CHG bits are flushed to 2444 * the PTEs. 2445 */ 2446 powerpc_sync(); 2447 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2448 2449 /* 2450 * See if this pvo has a valid PTE. if so, fetch the 2451 * REF/CHG bits from the valid PTE. If the appropriate 2452 * ptebit is set, cache it and return success. 2453 */ 2454 LOCK_TABLE(); 2455 pt = MOEA64_PVO_TO_PTE(mmu, pvo); 2456 if (pt != -1) { 2457 MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte); 2458 if (pvo->pvo_pte.lpte.pte_lo & ptebit) { 2459 UNLOCK_TABLE(); 2460 2461 moea64_attr_save(m, ptebit); 2462 vm_page_unlock_queues(); 2463 return (TRUE); 2464 } 2465 } 2466 UNLOCK_TABLE(); 2467 } 2468 2469 vm_page_unlock_queues(); 2470 return (FALSE); 2471} 2472 2473static u_int 2474moea64_clear_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit) 2475{ 2476 u_int count; 2477 struct pvo_entry *pvo; 2478 uintptr_t pt; 2479 2480 vm_page_lock_queues(); 2481 2482 /* 2483 * Clear the cached value. 2484 */ 2485 moea64_attr_clear(m, ptebit); 2486 2487 /* 2488 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so 2489 * we can reset the right ones). note that since the pvo entries and 2490 * list heads are accessed via BAT0 and are never placed in the page 2491 * table, we don't have to worry about further accesses setting the 2492 * REF/CHG bits. 2493 */ 2494 powerpc_sync(); 2495 2496 /* 2497 * For each pvo entry, clear the pvo's ptebit. If this pvo has a 2498 * valid pte clear the ptebit from the valid pte. 2499 */ 2500 count = 0; 2501 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2502 2503 LOCK_TABLE(); 2504 pt = MOEA64_PVO_TO_PTE(mmu, pvo); 2505 if (pt != -1) { 2506 MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte); 2507 if (pvo->pvo_pte.lpte.pte_lo & ptebit) { 2508 count++; 2509 MOEA64_PTE_CLEAR(mmu, pt, &pvo->pvo_pte.lpte, 2510 pvo->pvo_vpn, ptebit); 2511 } 2512 } 2513 pvo->pvo_pte.lpte.pte_lo &= ~ptebit; 2514 UNLOCK_TABLE(); 2515 } 2516 2517 vm_page_unlock_queues(); 2518 return (count); 2519} 2520 2521boolean_t 2522moea64_dev_direct_mapped(mmu_t mmu, vm_offset_t pa, vm_size_t size) 2523{ 2524 struct pvo_entry *pvo; 2525 vm_offset_t ppa; 2526 int error = 0; 2527 2528 PMAP_LOCK(kernel_pmap); 2529 for (ppa = pa & ~ADDR_POFF; ppa < pa + size; ppa += PAGE_SIZE) { 2530 pvo = moea64_pvo_find_va(kernel_pmap, ppa); 2531 if (pvo == NULL || 2532 (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) != ppa) { 2533 error = EFAULT; 2534 break; 2535 } 2536 } 2537 PMAP_UNLOCK(kernel_pmap); 2538 2539 return (error); 2540} 2541 2542/* 2543 * Map a set of physical memory pages into the kernel virtual 2544 * address space. Return a pointer to where it is mapped. This 2545 * routine is intended to be used for mapping device memory, 2546 * NOT real memory. 2547 */ 2548void * 2549moea64_mapdev_attr(mmu_t mmu, vm_offset_t pa, vm_size_t size, vm_memattr_t ma) 2550{ 2551 vm_offset_t va, tmpva, ppa, offset; 2552 2553 ppa = trunc_page(pa); 2554 offset = pa & PAGE_MASK; 2555 size = roundup(offset + size, PAGE_SIZE); 2556 2557 va = kmem_alloc_nofault(kernel_map, size); 2558 2559 if (!va) 2560 panic("moea64_mapdev: Couldn't alloc kernel virtual memory"); 2561 2562 for (tmpva = va; size > 0;) { 2563 moea64_kenter_attr(mmu, tmpva, ppa, ma); 2564 size -= PAGE_SIZE; 2565 tmpva += PAGE_SIZE; 2566 ppa += PAGE_SIZE; 2567 } 2568 2569 return ((void *)(va + offset)); 2570} 2571 2572void * 2573moea64_mapdev(mmu_t mmu, vm_offset_t pa, vm_size_t size) 2574{ 2575 2576 return moea64_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT); 2577} 2578 2579void 2580moea64_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) 2581{ 2582 vm_offset_t base, offset; 2583 2584 base = trunc_page(va); 2585 offset = va & PAGE_MASK; 2586 size = roundup(offset + size, PAGE_SIZE); 2587 2588 kmem_free(kernel_map, base, size); 2589} 2590 2591void 2592moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) 2593{ 2594 struct pvo_entry *pvo; 2595 vm_offset_t lim; 2596 vm_paddr_t pa; 2597 vm_size_t len; 2598 2599 PMAP_LOCK(pm); 2600 while (sz > 0) { 2601 lim = round_page(va); 2602 len = MIN(lim - va, sz); 2603 pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF); 2604 if (pvo != NULL && !(pvo->pvo_pte.lpte.pte_lo & LPTE_I)) { 2605 pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) | 2606 (va & ADDR_POFF); 2607 moea64_syncicache(mmu, pm, va, pa, len); 2608 } 2609 va += len; 2610 sz -= len; 2611 } 2612 PMAP_UNLOCK(pm); 2613} 2614