1/* $OpenBSD: pmap.c,v 1.184 2024/05/22 05:51:49 jsg Exp $ */ 2 3/* 4 * Copyright (c) 2015 Martin Pieuchot 5 * Copyright (c) 2001, 2002, 2007 Dale Rahn. 6 * All rights reserved. 7 * 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * 29 * Effort sponsored in part by the Defense Advanced Research Projects 30 * Agency (DARPA) and Air Force Research Laboratory, Air Force 31 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 32 */ 33 34/* 35 * powerpc lazy icache management. 36 * The icache does not snoop dcache accesses. The icache also will not load 37 * modified data from the dcache, but the unmodified data in ram. 38 * Before the icache is loaded, the dcache must be synced to ram to prevent 39 * the icache from loading stale data. 40 * pg->pg_flags PG_PMAP_EXE bit is used to track if the dcache is clean 41 * and the icache may have valid data in it. 42 * if the PG_PMAP_EXE bit is set (and the page is not currently RWX) 43 * the icache will only have valid code in it. If the bit is clear 44 * memory may not match the dcache contents or the icache may contain 45 * data from a previous page. 46 * 47 * pmap enter 48 * !E NONE -> R no action 49 * !E NONE|R -> RW no action 50 * !E NONE|R -> RX flush dcache, inval icache (that page only), set E 51 * !E NONE|R -> RWX flush dcache, inval icache (that page only), set E 52 * !E NONE|RW -> RWX flush dcache, inval icache (that page only), set E 53 * E NONE -> R no action 54 * E NONE|R -> RW clear PG_PMAP_EXE bit 55 * E NONE|R -> RX no action 56 * E NONE|R -> RWX no action 57 * E NONE|RW -> RWX -invalid source state 58 * 59 * pamp_protect 60 * E RW -> R - invalid source state 61 * !E RW -> R - no action 62 * * RX -> R - no action 63 * * RWX -> R - sync dcache, inval icache 64 * * RWX -> RW - clear PG_PMAP_EXE 65 * * RWX -> RX - sync dcache, inval icache 66 * * * -> NONE - no action 67 * 68 * pmap_page_protect (called with arg PROT_NONE if page is to be reused) 69 * * RW -> R - as pmap_protect 70 * * RX -> R - as pmap_protect 71 * * RWX -> R - as pmap_protect 72 * * RWX -> RW - as pmap_protect 73 * * RWX -> RX - as pmap_protect 74 * * * -> NONE - clear PG_PMAP_EXE 75 * 76 */ 77 78#include <sys/param.h> 79#include <sys/systm.h> 80#include <sys/proc.h> 81#include <sys/queue.h> 82#include <sys/pool.h> 83#include <sys/atomic.h> 84#include <sys/user.h> 85 86#include <uvm/uvm_extern.h> 87 88#include <machine/pcb.h> 89#include <powerpc/powerpc.h> 90#include <powerpc/bat.h> 91#include <machine/pmap.h> 92 93struct bat battable[16]; 94 95struct dumpmem dumpmem[VM_PHYSSEG_MAX]; 96u_int ndumpmem; 97 98struct pmap kernel_pmap_; 99static struct mem_region *pmap_mem, *pmap_avail; 100struct mem_region pmap_allocated[10]; 101int pmap_cnt_avail; 102int pmap_cnt_allocated; 103 104struct pte_64 *pmap_ptable64; 105struct pte_32 *pmap_ptable32; 106int pmap_ptab_cnt; 107u_int pmap_ptab_mask; 108 109#define HTABSIZE_32 (pmap_ptab_cnt * 64) 110#define HTABMEMSZ_64 (pmap_ptab_cnt * 8 * sizeof(struct pte_64)) 111#define HTABSIZE_64 (ffs(pmap_ptab_cnt) - 12) 112 113static u_int usedsr[NPMAPS / sizeof(u_int) / 8]; 114 115struct pte_desc { 116 /* Linked list of phys -> virt entries */ 117 LIST_ENTRY(pte_desc) pted_pv_list; 118 union { 119 struct pte_32 pted_pte32; 120 struct pte_64 pted_pte64; 121 } p; 122 pmap_t pted_pmap; 123 vaddr_t pted_va; 124}; 125 126void pmap_attr_save(paddr_t pa, u_int32_t bits); 127void pmap_pted_ro(struct pte_desc *, vm_prot_t); 128void pmap_pted_ro64(struct pte_desc *, vm_prot_t); 129void pmap_pted_ro32(struct pte_desc *, vm_prot_t); 130 131/* 132 * Some functions are called in real mode and cannot be profiled. 133 */ 134#define __noprof __attribute__((__no_instrument_function__)) 135 136/* VP routines */ 137int pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, int flags); 138struct pte_desc *pmap_vp_remove(pmap_t pm, vaddr_t va); 139void pmap_vp_destroy(pmap_t pm); 140struct pte_desc *pmap_vp_lookup(pmap_t pm, vaddr_t va) __noprof; 141 142/* PV routines */ 143void pmap_enter_pv(struct pte_desc *pted, struct vm_page *); 144void pmap_remove_pv(struct pte_desc *pted); 145 146 147/* pte hash table routines */ 148static inline void *pmap_ptedinhash(struct pte_desc *); 149void pte_insert32(struct pte_desc *) __noprof; 150void pte_insert64(struct pte_desc *) __noprof; 151void pmap_fill_pte64(pmap_t, vaddr_t, paddr_t, struct pte_desc *, vm_prot_t, 152 int) __noprof; 153void pmap_fill_pte32(pmap_t, vaddr_t, paddr_t, struct pte_desc *, vm_prot_t, 154 int) __noprof; 155 156void pmap_syncicache_user_virt(pmap_t pm, vaddr_t va); 157 158void pmap_remove_pted(pmap_t, struct pte_desc *); 159 160/* setup/initialization functions */ 161void pmap_avail_setup(void); 162void pmap_avail_fixup(void); 163void pmap_remove_avail(paddr_t base, paddr_t end); 164void *pmap_steal_avail(size_t size, int align); 165 166/* asm interface */ 167int pte_spill_r(u_int32_t, u_int32_t, u_int32_t, int) __noprof; 168int pte_spill_v(pmap_t, u_int32_t, u_int32_t, int) __noprof; 169 170u_int32_t pmap_setusr(pmap_t pm, vaddr_t va); 171void pmap_popusr(u_int32_t oldsr); 172 173/* pte invalidation */ 174void pte_del(void *, vaddr_t); 175void pte_zap(void *, struct pte_desc *); 176 177/* XXX - panic on pool get failures? */ 178struct pool pmap_pmap_pool; 179struct pool pmap_vp_pool; 180struct pool pmap_pted_pool; 181 182int pmap_initialized = 0; 183int physmem; 184int physmaxaddr; 185 186#ifdef MULTIPROCESSOR 187struct __ppc_lock pmap_hash_lock = PPC_LOCK_INITIALIZER; 188 189#define PMAP_HASH_LOCK(s) \ 190do { \ 191 s = ppc_intr_disable(); \ 192 __ppc_lock(&pmap_hash_lock); \ 193} while (0) 194 195#define PMAP_HASH_UNLOCK(s) \ 196do { \ 197 __ppc_unlock(&pmap_hash_lock); \ 198 ppc_intr_enable(s); \ 199} while (0) 200 201#define PMAP_VP_LOCK_INIT(pm) mtx_init(&pm->pm_mtx, IPL_VM) 202 203#define PMAP_VP_LOCK(pm) \ 204do { \ 205 if (pm != pmap_kernel()) \ 206 mtx_enter(&pm->pm_mtx); \ 207} while (0) 208 209#define PMAP_VP_UNLOCK(pm) \ 210do { \ 211 if (pm != pmap_kernel()) \ 212 mtx_leave(&pm->pm_mtx); \ 213} while (0) 214 215#define PMAP_VP_ASSERT_LOCKED(pm) \ 216do { \ 217 if (pm != pmap_kernel()) \ 218 MUTEX_ASSERT_LOCKED(&pm->pm_mtx); \ 219} while (0) 220 221#else /* ! MULTIPROCESSOR */ 222 223#define PMAP_HASH_LOCK(s) (void)s 224#define PMAP_HASH_UNLOCK(s) /* nothing */ 225 226#define PMAP_VP_LOCK_INIT(pm) /* nothing */ 227#define PMAP_VP_LOCK(pm) /* nothing */ 228#define PMAP_VP_UNLOCK(pm) /* nothing */ 229#define PMAP_VP_ASSERT_LOCKED(pm) /* nothing */ 230#endif /* MULTIPROCESSOR */ 231 232/* virtual to physical helpers */ 233static inline int 234VP_SR(vaddr_t va) 235{ 236 return (va >>VP_SR_POS) & VP_SR_MASK; 237} 238 239static inline int 240VP_IDX1(vaddr_t va) 241{ 242 return (va >> VP_IDX1_POS) & VP_IDX1_MASK; 243} 244 245static inline int 246VP_IDX2(vaddr_t va) 247{ 248 return (va >> VP_IDX2_POS) & VP_IDX2_MASK; 249} 250 251#if VP_IDX1_SIZE != VP_IDX2_SIZE 252#error pmap allocation code expects IDX1 and IDX2 size to be same 253#endif 254struct pmapvp { 255 void *vp[VP_IDX1_SIZE]; 256}; 257 258 259/* 260 * VP routines, virtual to physical translation information. 261 * These data structures are based off of the pmap, per process. 262 */ 263 264/* 265 * This is used for pmap_kernel() mappings, they are not to be removed 266 * from the vp table because they were statically initialized at the 267 * initial pmap initialization. This is so that memory allocation 268 * is not necessary in the pmap_kernel() mappings. 269 * Otherwise bad race conditions can appear. 270 */ 271struct pte_desc * 272pmap_vp_lookup(pmap_t pm, vaddr_t va) 273{ 274 struct pmapvp *vp1; 275 struct pmapvp *vp2; 276 struct pte_desc *pted; 277 278 PMAP_VP_ASSERT_LOCKED(pm); 279 280 vp1 = pm->pm_vp[VP_SR(va)]; 281 if (vp1 == NULL) { 282 return NULL; 283 } 284 285 vp2 = vp1->vp[VP_IDX1(va)]; 286 if (vp2 == NULL) { 287 return NULL; 288 } 289 290 pted = vp2->vp[VP_IDX2(va)]; 291 292 return pted; 293} 294 295/* 296 * Remove, and return, pted at specified address, NULL if not present 297 */ 298struct pte_desc * 299pmap_vp_remove(pmap_t pm, vaddr_t va) 300{ 301 struct pmapvp *vp1; 302 struct pmapvp *vp2; 303 struct pte_desc *pted; 304 305 PMAP_VP_ASSERT_LOCKED(pm); 306 307 vp1 = pm->pm_vp[VP_SR(va)]; 308 if (vp1 == NULL) { 309 return NULL; 310 } 311 312 vp2 = vp1->vp[VP_IDX1(va)]; 313 if (vp2 == NULL) { 314 return NULL; 315 } 316 317 pted = vp2->vp[VP_IDX2(va)]; 318 vp2->vp[VP_IDX2(va)] = NULL; 319 320 return pted; 321} 322 323/* 324 * Create a V -> P mapping for the given pmap and virtual address 325 * with reference to the pte descriptor that is used to map the page. 326 * This code should track allocations of vp table allocations 327 * so they can be freed efficiently. 328 */ 329int 330pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, int flags) 331{ 332 struct pmapvp *vp1; 333 struct pmapvp *vp2; 334 335 PMAP_VP_ASSERT_LOCKED(pm); 336 337 vp1 = pm->pm_vp[VP_SR(va)]; 338 if (vp1 == NULL) { 339 vp1 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO); 340 if (vp1 == NULL) { 341 if ((flags & PMAP_CANFAIL) == 0) 342 panic("pmap_vp_enter: failed to allocate vp1"); 343 return ENOMEM; 344 } 345 pm->pm_vp[VP_SR(va)] = vp1; 346 } 347 348 vp2 = vp1->vp[VP_IDX1(va)]; 349 if (vp2 == NULL) { 350 vp2 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO); 351 if (vp2 == NULL) { 352 if ((flags & PMAP_CANFAIL) == 0) 353 panic("pmap_vp_enter: failed to allocate vp2"); 354 return ENOMEM; 355 } 356 vp1->vp[VP_IDX1(va)] = vp2; 357 } 358 359 vp2->vp[VP_IDX2(va)] = pted; 360 361 return 0; 362} 363 364static inline void 365tlbie(vaddr_t va) 366{ 367 asm volatile ("tlbie %0" :: "r"(va & ~PAGE_MASK)); 368} 369 370static inline void 371tlbsync(void) 372{ 373 asm volatile ("tlbsync"); 374} 375static inline void 376eieio(void) 377{ 378 asm volatile ("eieio"); 379} 380 381static inline void 382sync(void) 383{ 384 asm volatile ("sync"); 385} 386 387static inline void 388tlbia(void) 389{ 390 vaddr_t va; 391 392 sync(); 393 for (va = 0; va < 0x00040000; va += 0x00001000) 394 tlbie(va); 395 eieio(); 396 tlbsync(); 397 sync(); 398} 399 400static inline int 401ptesr(sr_t *sr, vaddr_t va) 402{ 403 return sr[(u_int)va >> ADDR_SR_SHIFT]; 404} 405 406static inline int 407pteidx(sr_t sr, vaddr_t va) 408{ 409 int hash; 410 hash = (sr & SR_VSID) ^ (((u_int)va & ADDR_PIDX) >> ADDR_PIDX_SHIFT); 411 return hash & pmap_ptab_mask; 412} 413 414#define PTED_VA_PTEGIDX_M 0x07 415#define PTED_VA_HID_M 0x08 416#define PTED_VA_MANAGED_M 0x10 417#define PTED_VA_WIRED_M 0x20 418#define PTED_VA_EXEC_M 0x40 419 420static inline u_int32_t 421PTED_HID(struct pte_desc *pted) 422{ 423 return (pted->pted_va & PTED_VA_HID_M); 424} 425 426static inline u_int32_t 427PTED_PTEGIDX(struct pte_desc *pted) 428{ 429 return (pted->pted_va & PTED_VA_PTEGIDX_M); 430} 431 432static inline u_int32_t 433PTED_MANAGED(struct pte_desc *pted) 434{ 435 return (pted->pted_va & PTED_VA_MANAGED_M); 436} 437 438static inline u_int32_t 439PTED_VALID(struct pte_desc *pted) 440{ 441 if (ppc_proc_is_64b) 442 return (pted->p.pted_pte64.pte_hi & PTE_VALID_64); 443 else 444 return (pted->p.pted_pte32.pte_hi & PTE_VALID_32); 445} 446 447/* 448 * PV entries - 449 * manipulate the physical to virtual translations for the entire system. 450 * 451 * QUESTION: should all mapped memory be stored in PV tables? Or 452 * is it alright to only store "ram" memory. Currently device mappings 453 * are not stored. 454 * It makes sense to pre-allocate mappings for all of "ram" memory, since 455 * it is likely that it will be mapped at some point, but would it also 456 * make sense to use a tree/table like is use for pmap to store device 457 * mappings? 458 * Further notes: It seems that the PV table is only used for pmap_protect 459 * and other paging related operations. Given this, it is not necessary 460 * to store any pmap_kernel() entries in PV tables and does not make 461 * sense to store device mappings in PV either. 462 * 463 * Note: unlike other powerpc pmap designs, the array is only an array 464 * of pointers. Since the same structure is used for holding information 465 * in the VP table, the PV table, and for kernel mappings, the wired entries. 466 * Allocate one data structure to hold all of the info, instead of replicating 467 * it multiple times. 468 * 469 * One issue of making this a single data structure is that two pointers are 470 * wasted for every page which does not map ram (device mappings), this 471 * should be a low percentage of mapped pages in the system, so should not 472 * have too noticeable unnecessary ram consumption. 473 */ 474 475void 476pmap_enter_pv(struct pte_desc *pted, struct vm_page *pg) 477{ 478 if (__predict_false(!pmap_initialized)) { 479 return; 480 } 481 482 mtx_enter(&pg->mdpage.pv_mtx); 483 LIST_INSERT_HEAD(&(pg->mdpage.pv_list), pted, pted_pv_list); 484 pted->pted_va |= PTED_VA_MANAGED_M; 485 mtx_leave(&pg->mdpage.pv_mtx); 486} 487 488void 489pmap_remove_pv(struct pte_desc *pted) 490{ 491 struct vm_page *pg; 492 493 if (ppc_proc_is_64b) 494 pg = PHYS_TO_VM_PAGE(pted->p.pted_pte64.pte_lo & PTE_RPGN_64); 495 else 496 pg = PHYS_TO_VM_PAGE(pted->p.pted_pte32.pte_lo & PTE_RPGN_32); 497 498 mtx_enter(&pg->mdpage.pv_mtx); 499 pted->pted_va &= ~PTED_VA_MANAGED_M; 500 LIST_REMOVE(pted, pted_pv_list); 501 mtx_leave(&pg->mdpage.pv_mtx); 502} 503 504 505/* PTE_CHG_32 == PTE_CHG_64 */ 506/* PTE_REF_32 == PTE_REF_64 */ 507static __inline u_int 508pmap_pte2flags(u_int32_t pte) 509{ 510 return (((pte & PTE_REF_32) ? PG_PMAP_REF : 0) | 511 ((pte & PTE_CHG_32) ? PG_PMAP_MOD : 0)); 512} 513 514static __inline u_int 515pmap_flags2pte(u_int32_t flags) 516{ 517 return (((flags & PG_PMAP_REF) ? PTE_REF_32 : 0) | 518 ((flags & PG_PMAP_MOD) ? PTE_CHG_32 : 0)); 519} 520 521void 522pmap_attr_save(paddr_t pa, u_int32_t bits) 523{ 524 struct vm_page *pg; 525 526 pg = PHYS_TO_VM_PAGE(pa); 527 if (pg == NULL) 528 return; 529 530 atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(bits)); 531} 532 533int 534pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) 535{ 536 struct pte_desc *pted; 537 struct vm_page *pg; 538 boolean_t nocache = (pa & PMAP_NOCACHE) != 0; 539 boolean_t wt = (pa & PMAP_WT) != 0; 540 int need_sync = 0; 541 int cache, error = 0; 542 543 KASSERT(!(wt && nocache)); 544 pa &= PMAP_PA_MASK; 545 546 PMAP_VP_LOCK(pm); 547 pted = pmap_vp_lookup(pm, va); 548 if (pted && PTED_VALID(pted)) { 549 pmap_remove_pted(pm, pted); 550 /* we lost our pted if it was user */ 551 if (pm != pmap_kernel()) 552 pted = pmap_vp_lookup(pm, va); 553 } 554 555 pm->pm_stats.resident_count++; 556 557 /* Do not have pted for this, get one and put it in VP */ 558 if (pted == NULL) { 559 pted = pool_get(&pmap_pted_pool, PR_NOWAIT | PR_ZERO); 560 if (pted == NULL) { 561 if ((flags & PMAP_CANFAIL) == 0) { 562 error = ENOMEM; 563 goto out; 564 } 565 panic("pmap_enter: failed to allocate pted"); 566 } 567 error = pmap_vp_enter(pm, va, pted, flags); 568 if (error) { 569 pool_put(&pmap_pted_pool, pted); 570 goto out; 571 } 572 } 573 574 pg = PHYS_TO_VM_PAGE(pa); 575 if (pg != NULL && (pg->pg_flags & PG_PMAP_UC)) 576 nocache = TRUE; 577 if (wt) 578 cache = PMAP_CACHE_WT; 579 else if (pg != NULL && !(pg->pg_flags & PG_DEV) && !nocache) 580 cache = PMAP_CACHE_WB; 581 else 582 cache = PMAP_CACHE_CI; 583 584 /* Calculate PTE */ 585 if (ppc_proc_is_64b) 586 pmap_fill_pte64(pm, va, pa, pted, prot, cache); 587 else 588 pmap_fill_pte32(pm, va, pa, pted, prot, cache); 589 590 if (pg != NULL) { 591 pmap_enter_pv(pted, pg); /* only managed mem */ 592 } 593 594 /* 595 * Insert into HTAB 596 * We were told to map the page, probably called from vm_fault, 597 * so map the page! 598 */ 599 if (ppc_proc_is_64b) 600 pte_insert64(pted); 601 else 602 pte_insert32(pted); 603 604 if (prot & PROT_EXEC) { 605 u_int sn = VP_SR(va); 606 607 pm->pm_exec[sn]++; 608 if (pm->pm_sr[sn] & SR_NOEXEC) 609 pm->pm_sr[sn] &= ~SR_NOEXEC; 610 611 if (pg != NULL) { 612 need_sync = ((pg->pg_flags & PG_PMAP_EXE) == 0); 613 if (prot & PROT_WRITE) 614 atomic_clearbits_int(&pg->pg_flags, 615 PG_PMAP_EXE); 616 else 617 atomic_setbits_int(&pg->pg_flags, 618 PG_PMAP_EXE); 619 } else 620 need_sync = 1; 621 } else { 622 /* 623 * Should we be paranoid about writeable non-exec 624 * mappings ? if so, clear the exec tag 625 */ 626 if ((prot & PROT_WRITE) && (pg != NULL)) 627 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); 628 } 629 630 /* only instruction sync executable pages */ 631 if (need_sync) 632 pmap_syncicache_user_virt(pm, va); 633 634out: 635 PMAP_VP_UNLOCK(pm); 636 return (error); 637} 638 639/* 640 * Remove the given range of mapping entries. 641 */ 642void 643pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva) 644{ 645 struct pte_desc *pted; 646 vaddr_t va; 647 648 PMAP_VP_LOCK(pm); 649 for (va = sva; va < eva; va += PAGE_SIZE) { 650 pted = pmap_vp_lookup(pm, va); 651 if (pted && PTED_VALID(pted)) 652 pmap_remove_pted(pm, pted); 653 } 654 PMAP_VP_UNLOCK(pm); 655} 656 657/* 658 * remove a single mapping, notice that this code is O(1) 659 */ 660void 661pmap_remove_pted(pmap_t pm, struct pte_desc *pted) 662{ 663 void *pte; 664 int s; 665 666 KASSERT(pm == pted->pted_pmap); 667 PMAP_VP_ASSERT_LOCKED(pm); 668 669 pm->pm_stats.resident_count--; 670 671 PMAP_HASH_LOCK(s); 672 if ((pte = pmap_ptedinhash(pted)) != NULL) 673 pte_zap(pte, pted); 674 PMAP_HASH_UNLOCK(s); 675 676 if (pted->pted_va & PTED_VA_EXEC_M) { 677 u_int sn = VP_SR(pted->pted_va); 678 679 pted->pted_va &= ~PTED_VA_EXEC_M; 680 pm->pm_exec[sn]--; 681 if (pm->pm_exec[sn] == 0) 682 pm->pm_sr[sn] |= SR_NOEXEC; 683 } 684 685 if (ppc_proc_is_64b) 686 pted->p.pted_pte64.pte_hi &= ~PTE_VALID_64; 687 else 688 pted->p.pted_pte32.pte_hi &= ~PTE_VALID_32; 689 690 if (PTED_MANAGED(pted)) 691 pmap_remove_pv(pted); 692 693 if (pm != pmap_kernel()) { 694 (void)pmap_vp_remove(pm, pted->pted_va); 695 pool_put(&pmap_pted_pool, pted); 696 } 697} 698 699/* 700 * Enter a kernel mapping for the given page. 701 * kernel mappings have a larger set of prerequisites than normal mappings. 702 * 703 * 1. no memory should be allocated to create a kernel mapping. 704 * 2. a vp mapping should already exist, even if invalid. (see 1) 705 * 3. all vp tree mappings should already exist (see 1) 706 * 707 */ 708void 709pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) 710{ 711 struct pte_desc *pted; 712 struct vm_page *pg; 713 boolean_t nocache = (pa & PMAP_NOCACHE) != 0; 714 boolean_t wt = (pa & PMAP_WT) != 0; 715 pmap_t pm; 716 int cache; 717 718 KASSERT(!(wt && nocache)); 719 pa &= PMAP_PA_MASK; 720 721 pm = pmap_kernel(); 722 723 pted = pmap_vp_lookup(pm, va); 724 if (pted && PTED_VALID(pted)) 725 pmap_remove_pted(pm, pted); /* pted is reused */ 726 727 pm->pm_stats.resident_count++; 728 729 if (prot & PROT_WRITE) { 730 pg = PHYS_TO_VM_PAGE(pa); 731 if (pg != NULL) 732 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); 733 } 734 735 /* Do not have pted for this, get one and put it in VP */ 736 if (pted == NULL) { 737 panic("pted not preallocated in pmap_kernel() va %lx pa %lx", 738 va, pa); 739 } 740 741 pg = PHYS_TO_VM_PAGE(pa); 742 if (wt) 743 cache = PMAP_CACHE_WT; 744 else if (pg != NULL && !(pg->pg_flags & PG_DEV) && !nocache) 745 cache = PMAP_CACHE_WB; 746 else 747 cache = PMAP_CACHE_CI; 748 749 /* Calculate PTE */ 750 if (ppc_proc_is_64b) 751 pmap_fill_pte64(pm, va, pa, pted, prot, cache); 752 else 753 pmap_fill_pte32(pm, va, pa, pted, prot, cache); 754 755 /* 756 * Insert into HTAB 757 * We were told to map the page, probably called from vm_fault, 758 * so map the page! 759 */ 760 if (ppc_proc_is_64b) 761 pte_insert64(pted); 762 else 763 pte_insert32(pted); 764 765 pted->pted_va |= PTED_VA_WIRED_M; 766 767 if (prot & PROT_EXEC) { 768 u_int sn = VP_SR(va); 769 770 pm->pm_exec[sn]++; 771 if (pm->pm_sr[sn] & SR_NOEXEC) 772 pm->pm_sr[sn] &= ~SR_NOEXEC; 773 } 774} 775 776/* 777 * remove kernel (pmap_kernel()) mappings 778 */ 779void 780pmap_kremove(vaddr_t va, vsize_t len) 781{ 782 struct pte_desc *pted; 783 784 for (len >>= PAGE_SHIFT; len > 0; len--, va += PAGE_SIZE) { 785 pted = pmap_vp_lookup(pmap_kernel(), va); 786 if (pted && PTED_VALID(pted)) 787 pmap_remove_pted(pmap_kernel(), pted); 788 } 789} 790 791static inline void * 792pmap_ptedinhash(struct pte_desc *pted) 793{ 794 vaddr_t va = pted->pted_va & ~PAGE_MASK; 795 pmap_t pm = pted->pted_pmap; 796 int sr, idx; 797 798 sr = ptesr(pm->pm_sr, va); 799 idx = pteidx(sr, va); 800 801 if (ppc_proc_is_64b) { 802 struct pte_64 *pte = pmap_ptable64; 803 804 pte += (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0)) * 8; 805 pte += PTED_PTEGIDX(pted); 806 807 /* 808 * We now have the pointer to where it will be, if it is 809 * currently mapped. If the mapping was thrown away in 810 * exchange for another page mapping, then this page is 811 * not currently in the HASH. 812 */ 813 if ((pted->p.pted_pte64.pte_hi | 814 (PTED_HID(pted) ? PTE_HID_64 : 0)) == pte->pte_hi) 815 return (pte); 816 } else { 817 struct pte_32 *pte = pmap_ptable32; 818 819 pte += (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0)) * 8; 820 pte += PTED_PTEGIDX(pted); 821 822 /* 823 * We now have the pointer to where it will be, if it is 824 * currently mapped. If the mapping was thrown away in 825 * exchange for another page mapping, then this page is 826 * not currently in the HASH. 827 */ 828 if ((pted->p.pted_pte32.pte_hi | 829 (PTED_HID(pted) ? PTE_HID_32 : 0)) == pte->pte_hi) 830 return (pte); 831 } 832 833 return (NULL); 834} 835 836/* 837 * Delete a Page Table Entry, section 7.6.3.3. 838 * 839 * Note: pte must be locked. 840 */ 841void 842pte_del(void *pte, vaddr_t va) 843{ 844 if (ppc_proc_is_64b) 845 ((struct pte_64 *)pte)->pte_hi &= ~PTE_VALID_64; 846 else 847 ((struct pte_32 *)pte)->pte_hi &= ~PTE_VALID_32; 848 849 sync(); /* Ensure update completed. */ 850 tlbie(va); /* Invalidate old translation. */ 851 eieio(); /* Order tlbie before tlbsync. */ 852 tlbsync(); /* Ensure tlbie completed on all processors. */ 853 sync(); /* Ensure tlbsync and update completed. */ 854} 855 856void 857pte_zap(void *pte, struct pte_desc *pted) 858{ 859 pte_del(pte, pted->pted_va); 860 861 if (!PTED_MANAGED(pted)) 862 return; 863 864 if (ppc_proc_is_64b) { 865 pmap_attr_save(pted->p.pted_pte64.pte_lo & PTE_RPGN_64, 866 ((struct pte_64 *)pte)->pte_lo & (PTE_REF_64|PTE_CHG_64)); 867 } else { 868 pmap_attr_save(pted->p.pted_pte32.pte_lo & PTE_RPGN_32, 869 ((struct pte_32 *)pte)->pte_lo & (PTE_REF_32|PTE_CHG_32)); 870 } 871} 872 873/* 874 * What about execution control? Even at only a segment granularity. 875 */ 876void 877pmap_fill_pte64(pmap_t pm, vaddr_t va, paddr_t pa, struct pte_desc *pted, 878 vm_prot_t prot, int cache) 879{ 880 sr_t sr; 881 struct pte_64 *pte64; 882 883 sr = ptesr(pm->pm_sr, va); 884 pte64 = &pted->p.pted_pte64; 885 886 pte64->pte_hi = (((u_int64_t)sr & SR_VSID) << 887 PTE_VSID_SHIFT_64) | 888 ((va >> ADDR_API_SHIFT_64) & PTE_API_64) | PTE_VALID_64; 889 pte64->pte_lo = (pa & PTE_RPGN_64); 890 891 892 if (cache == PMAP_CACHE_WB) 893 pte64->pte_lo |= PTE_M_64; 894 else if (cache == PMAP_CACHE_WT) 895 pte64->pte_lo |= (PTE_W_64 | PTE_M_64); 896 else 897 pte64->pte_lo |= (PTE_M_64 | PTE_I_64 | PTE_G_64); 898 899 if ((prot & (PROT_READ | PROT_WRITE)) == 0) 900 pte64->pte_lo |= PTE_AC_64; 901 902 if (prot & PROT_WRITE) 903 pte64->pte_lo |= PTE_RW_64; 904 else 905 pte64->pte_lo |= PTE_RO_64; 906 907 pted->pted_va = va & ~PAGE_MASK; 908 909 if (prot & PROT_EXEC) 910 pted->pted_va |= PTED_VA_EXEC_M; 911 else 912 pte64->pte_lo |= PTE_N_64; 913 914 pted->pted_pmap = pm; 915} 916 917/* 918 * What about execution control? Even at only a segment granularity. 919 */ 920void 921pmap_fill_pte32(pmap_t pm, vaddr_t va, paddr_t pa, struct pte_desc *pted, 922 vm_prot_t prot, int cache) 923{ 924 sr_t sr; 925 struct pte_32 *pte32; 926 927 sr = ptesr(pm->pm_sr, va); 928 pte32 = &pted->p.pted_pte32; 929 930 pte32->pte_hi = ((sr & SR_VSID) << PTE_VSID_SHIFT_32) | 931 ((va >> ADDR_API_SHIFT_32) & PTE_API_32) | PTE_VALID_32; 932 pte32->pte_lo = (pa & PTE_RPGN_32); 933 934 if (cache == PMAP_CACHE_WB) 935 pte32->pte_lo |= PTE_M_32; 936 else if (cache == PMAP_CACHE_WT) 937 pte32->pte_lo |= (PTE_W_32 | PTE_M_32); 938 else 939 pte32->pte_lo |= (PTE_M_32 | PTE_I_32 | PTE_G_32); 940 941 if (prot & PROT_WRITE) 942 pte32->pte_lo |= PTE_RW_32; 943 else 944 pte32->pte_lo |= PTE_RO_32; 945 946 pted->pted_va = va & ~PAGE_MASK; 947 948 /* XXX Per-page execution control. */ 949 if (prot & PROT_EXEC) 950 pted->pted_va |= PTED_VA_EXEC_M; 951 952 pted->pted_pmap = pm; 953} 954 955int 956pmap_test_attrs(struct vm_page *pg, u_int flagbit) 957{ 958 u_int bits; 959 struct pte_desc *pted; 960 u_int ptebit = pmap_flags2pte(flagbit); 961 int s; 962 963 /* PTE_CHG_32 == PTE_CHG_64 */ 964 /* PTE_REF_32 == PTE_REF_64 */ 965 966 bits = pg->pg_flags & flagbit; 967 if (bits == flagbit) 968 return bits; 969 970 mtx_enter(&pg->mdpage.pv_mtx); 971 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) { 972 void *pte; 973 974 PMAP_HASH_LOCK(s); 975 if ((pte = pmap_ptedinhash(pted)) != NULL) { 976 if (ppc_proc_is_64b) { 977 struct pte_64 *ptp64 = pte; 978 bits |= pmap_pte2flags(ptp64->pte_lo & ptebit); 979 } else { 980 struct pte_32 *ptp32 = pte; 981 bits |= pmap_pte2flags(ptp32->pte_lo & ptebit); 982 } 983 } 984 PMAP_HASH_UNLOCK(s); 985 986 if (bits == flagbit) 987 break; 988 } 989 mtx_leave(&pg->mdpage.pv_mtx); 990 991 atomic_setbits_int(&pg->pg_flags, bits); 992 993 return bits; 994} 995 996int 997pmap_clear_attrs(struct vm_page *pg, u_int flagbit) 998{ 999 u_int bits; 1000 struct pte_desc *pted; 1001 u_int ptebit = pmap_flags2pte(flagbit); 1002 int s; 1003 1004 /* PTE_CHG_32 == PTE_CHG_64 */ 1005 /* PTE_REF_32 == PTE_REF_64 */ 1006 1007 bits = pg->pg_flags & flagbit; 1008 1009 mtx_enter(&pg->mdpage.pv_mtx); 1010 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) { 1011 void *pte; 1012 1013 PMAP_HASH_LOCK(s); 1014 if ((pte = pmap_ptedinhash(pted)) != NULL) { 1015 if (ppc_proc_is_64b) { 1016 struct pte_64 *ptp64 = pte; 1017 1018 bits |= pmap_pte2flags(ptp64->pte_lo & ptebit); 1019 1020 pte_del(ptp64, pted->pted_va); 1021 1022 ptp64->pte_lo &= ~ptebit; 1023 eieio(); 1024 ptp64->pte_hi |= PTE_VALID_64; 1025 sync(); 1026 } else { 1027 struct pte_32 *ptp32 = pte; 1028 1029 bits |= pmap_pte2flags(ptp32->pte_lo & ptebit); 1030 1031 pte_del(ptp32, pted->pted_va); 1032 1033 ptp32->pte_lo &= ~ptebit; 1034 eieio(); 1035 ptp32->pte_hi |= PTE_VALID_32; 1036 sync(); 1037 } 1038 } 1039 PMAP_HASH_UNLOCK(s); 1040 } 1041 mtx_leave(&pg->mdpage.pv_mtx); 1042 1043 /* 1044 * this is done a second time, because while walking the list 1045 * a bit could have been promoted via pmap_attr_save() 1046 */ 1047 bits |= pg->pg_flags & flagbit; 1048 atomic_clearbits_int(&pg->pg_flags, flagbit); 1049 1050 return bits; 1051} 1052 1053/* 1054 * Fill the given physical page with zeros. 1055 */ 1056void 1057pmap_zero_page(struct vm_page *pg) 1058{ 1059 vaddr_t va = pmap_map_direct(pg); 1060 int i; 1061 1062 /* 1063 * Loop over & zero cache lines. This code assumes that 64-bit 1064 * CPUs have 128-byte cache lines. We explicitly use ``dcbzl'' 1065 * here because we do not clear the DCBZ_SIZE bit of the HID5 1066 * register in order to be compatible with code using ``dcbz'' 1067 * and assuming that cache line size is 32. 1068 */ 1069 if (ppc_proc_is_64b) { 1070 for (i = 0; i < PAGE_SIZE; i += 128) 1071 asm volatile ("dcbzl 0,%0" :: "r"(va + i)); 1072 return; 1073 } 1074 1075 for (i = 0; i < PAGE_SIZE; i += CACHELINESIZE) 1076 asm volatile ("dcbz 0,%0" :: "r"(va + i)); 1077} 1078 1079/* 1080 * Copy a page. 1081 */ 1082void 1083pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) 1084{ 1085 vaddr_t srcva = pmap_map_direct(srcpg); 1086 vaddr_t dstva = pmap_map_direct(dstpg); 1087 1088 memcpy((void *)dstva, (void *)srcva, PAGE_SIZE); 1089} 1090 1091int pmap_id_avail = 0; 1092 1093pmap_t 1094pmap_create(void) 1095{ 1096 u_int bits; 1097 int first, i, k, try, tblidx, tbloff; 1098 int seg; 1099 pmap_t pm; 1100 1101 pm = pool_get(&pmap_pmap_pool, PR_WAITOK|PR_ZERO); 1102 1103 pmap_reference(pm); 1104 PMAP_VP_LOCK_INIT(pm); 1105 1106 /* 1107 * Allocate segment registers for this pmap. 1108 * Try not to reuse pmap ids, to spread the hash table usage. 1109 */ 1110 first = pmap_id_avail; 1111again: 1112 for (i = 0; i < NPMAPS; i++) { 1113 try = first + i; 1114 try = try % NPMAPS; /* truncate back into bounds */ 1115 tblidx = try / (8 * sizeof usedsr[0]); 1116 tbloff = try % (8 * sizeof usedsr[0]); 1117 bits = usedsr[tblidx]; 1118 if ((bits & (1U << tbloff)) == 0) { 1119 if (atomic_cas_uint(&usedsr[tblidx], bits, 1120 bits | (1U << tbloff)) != bits) { 1121 first = try; 1122 goto again; 1123 } 1124 pmap_id_avail = try + 1; 1125 1126 seg = try << 4; 1127 for (k = 0; k < 16; k++) 1128 pm->pm_sr[k] = (seg + k) | SR_NOEXEC; 1129 return (pm); 1130 } 1131 } 1132 panic("out of pmap slots"); 1133} 1134 1135/* 1136 * Add a reference to a given pmap. 1137 */ 1138void 1139pmap_reference(pmap_t pm) 1140{ 1141 atomic_inc_int(&pm->pm_refs); 1142} 1143 1144/* 1145 * Retire the given pmap from service. 1146 * Should only be called if the map contains no valid mappings. 1147 */ 1148void 1149pmap_destroy(pmap_t pm) 1150{ 1151 int refs; 1152 1153 refs = atomic_dec_int_nv(&pm->pm_refs); 1154 if (refs == -1) 1155 panic("re-entering pmap_destroy"); 1156 if (refs > 0) 1157 return; 1158 1159 /* 1160 * reference count is zero, free pmap resources and free pmap. 1161 */ 1162 pmap_release(pm); 1163 pool_put(&pmap_pmap_pool, pm); 1164} 1165 1166/* 1167 * Release any resources held by the given physical map. 1168 * Called when a pmap initialized by pmap_pinit is being released. 1169 */ 1170void 1171pmap_release(pmap_t pm) 1172{ 1173 int i, tblidx, tbloff; 1174 1175 pmap_vp_destroy(pm); 1176 i = (pm->pm_sr[0] & SR_VSID) >> 4; 1177 tblidx = i / (8 * sizeof usedsr[0]); 1178 tbloff = i % (8 * sizeof usedsr[0]); 1179 1180 /* powerpc can do atomic cas, clearbits on same word. */ 1181 atomic_clearbits_int(&usedsr[tblidx], 1U << tbloff); 1182} 1183 1184void 1185pmap_vp_destroy(pmap_t pm) 1186{ 1187 int i, j; 1188 struct pmapvp *vp1; 1189 struct pmapvp *vp2; 1190 1191 for (i = 0; i < VP_SR_SIZE; i++) { 1192 vp1 = pm->pm_vp[i]; 1193 if (vp1 == NULL) 1194 continue; 1195 1196 for (j = 0; j < VP_IDX1_SIZE; j++) { 1197 vp2 = vp1->vp[j]; 1198 if (vp2 == NULL) 1199 continue; 1200 1201 pool_put(&pmap_vp_pool, vp2); 1202 } 1203 pm->pm_vp[i] = NULL; 1204 pool_put(&pmap_vp_pool, vp1); 1205 } 1206} 1207 1208void 1209pmap_avail_setup(void) 1210{ 1211 struct mem_region *mp; 1212 1213 ppc_mem_regions(&pmap_mem, &pmap_avail); 1214 1215 for (mp = pmap_mem; mp->size !=0; mp++, ndumpmem++) { 1216 physmem += atop(mp->size); 1217 dumpmem[ndumpmem].start = atop(mp->start); 1218 dumpmem[ndumpmem].end = atop(mp->start + mp->size); 1219 } 1220 1221 for (mp = pmap_avail; mp->size !=0 ; mp++) { 1222 if (physmaxaddr < mp->start + mp->size) 1223 physmaxaddr = mp->start + mp->size; 1224 } 1225 1226 for (mp = pmap_avail; mp->size !=0; mp++) 1227 pmap_cnt_avail += 1; 1228} 1229 1230void 1231pmap_avail_fixup(void) 1232{ 1233 struct mem_region *mp; 1234 u_int32_t align; 1235 u_int32_t end; 1236 1237 mp = pmap_avail; 1238 while(mp->size !=0) { 1239 align = round_page(mp->start); 1240 if (mp->start != align) { 1241 pmap_remove_avail(mp->start, align); 1242 mp = pmap_avail; 1243 continue; 1244 } 1245 end = mp->start+mp->size; 1246 align = trunc_page(end); 1247 if (end != align) { 1248 pmap_remove_avail(align, end); 1249 mp = pmap_avail; 1250 continue; 1251 } 1252 mp++; 1253 } 1254} 1255 1256/* remove a given region from avail memory */ 1257void 1258pmap_remove_avail(paddr_t base, paddr_t end) 1259{ 1260 struct mem_region *mp; 1261 int i; 1262 int mpend; 1263 1264 /* remove given region from available */ 1265 for (mp = pmap_avail; mp->size; mp++) { 1266 /* 1267 * Check if this region holds all of the region 1268 */ 1269 mpend = mp->start + mp->size; 1270 if (base > mpend) { 1271 continue; 1272 } 1273 if (base <= mp->start) { 1274 if (end <= mp->start) 1275 break; /* region not present -??? */ 1276 1277 if (end >= mpend) { 1278 /* covers whole region */ 1279 /* shorten */ 1280 for (i = mp - pmap_avail; 1281 i < pmap_cnt_avail; 1282 i++) { 1283 pmap_avail[i] = pmap_avail[i+1]; 1284 } 1285 pmap_cnt_avail--; 1286 pmap_avail[pmap_cnt_avail].size = 0; 1287 } else { 1288 mp->start = end; 1289 mp->size = mpend - end; 1290 } 1291 } else { 1292 /* start after the beginning */ 1293 if (end >= mpend) { 1294 /* just truncate */ 1295 mp->size = base - mp->start; 1296 } else { 1297 /* split */ 1298 for (i = pmap_cnt_avail; 1299 i > (mp - pmap_avail); 1300 i--) { 1301 pmap_avail[i] = pmap_avail[i - 1]; 1302 } 1303 pmap_cnt_avail++; 1304 mp->size = base - mp->start; 1305 mp++; 1306 mp->start = end; 1307 mp->size = mpend - end; 1308 } 1309 } 1310 } 1311 for (mp = pmap_allocated; mp->size != 0; mp++) { 1312 if (base < mp->start) { 1313 if (end == mp->start) { 1314 mp->start = base; 1315 mp->size += end - base; 1316 break; 1317 } 1318 /* lengthen */ 1319 for (i = pmap_cnt_allocated; i > (mp - pmap_allocated); 1320 i--) { 1321 pmap_allocated[i] = pmap_allocated[i - 1]; 1322 } 1323 pmap_cnt_allocated++; 1324 mp->start = base; 1325 mp->size = end - base; 1326 return; 1327 } 1328 if (base == (mp->start + mp->size)) { 1329 mp->size += end - base; 1330 return; 1331 } 1332 } 1333 if (mp->size == 0) { 1334 mp->start = base; 1335 mp->size = end - base; 1336 pmap_cnt_allocated++; 1337 } 1338} 1339 1340void * 1341pmap_steal_avail(size_t size, int align) 1342{ 1343 struct mem_region *mp; 1344 int start; 1345 int remsize; 1346 1347 for (mp = pmap_avail; mp->size; mp++) { 1348 if (mp->size > size) { 1349 start = (mp->start + (align -1)) & ~(align -1); 1350 remsize = mp->size - (start - mp->start); 1351 if (remsize >= 0) { 1352 pmap_remove_avail(start, start+size); 1353 return (void *)start; 1354 } 1355 } 1356 } 1357 panic ("unable to allocate region with size %zx align %x", 1358 size, align); 1359} 1360 1361/* 1362 * Similar to pmap_steal_avail, but operating on vm_physmem since 1363 * uvm_page_physload() has been called. 1364 */ 1365vaddr_t 1366pmap_steal_memory(vsize_t size, vaddr_t *start, vaddr_t *end) 1367{ 1368 int segno; 1369 u_int npg; 1370 vaddr_t va; 1371 paddr_t pa; 1372 struct vm_physseg *seg; 1373 1374 size = round_page(size); 1375 npg = atop(size); 1376 1377 for (segno = 0, seg = vm_physmem; segno < vm_nphysseg; segno++, seg++) { 1378 if (seg->avail_end - seg->avail_start < npg) 1379 continue; 1380 /* 1381 * We can only steal at an ``unused'' segment boundary, 1382 * i.e. either at the start or at the end. 1383 */ 1384 if (seg->avail_start == seg->start || 1385 seg->avail_end == seg->end) 1386 break; 1387 } 1388 if (segno == vm_nphysseg) 1389 va = 0; 1390 else { 1391 if (seg->avail_start == seg->start) { 1392 pa = ptoa(seg->avail_start); 1393 seg->avail_start += npg; 1394 seg->start += npg; 1395 } else { 1396 pa = ptoa(seg->avail_end) - size; 1397 seg->avail_end -= npg; 1398 seg->end -= npg; 1399 } 1400 /* 1401 * If all the segment has been consumed now, remove it. 1402 * Note that the crash dump code still knows about it 1403 * and will dump it correctly. 1404 */ 1405 if (seg->start == seg->end) { 1406 if (vm_nphysseg-- == 1) 1407 panic("pmap_steal_memory: out of memory"); 1408 while (segno < vm_nphysseg) { 1409 seg[0] = seg[1]; /* struct copy */ 1410 seg++; 1411 segno++; 1412 } 1413 } 1414 1415 va = (vaddr_t)pa; /* 1:1 mapping */ 1416 bzero((void *)va, size); 1417 } 1418 1419 if (start != NULL) 1420 *start = VM_MIN_KERNEL_ADDRESS; 1421 if (end != NULL) 1422 *end = VM_MAX_KERNEL_ADDRESS; 1423 1424 return (va); 1425} 1426 1427void *msgbuf_addr; 1428 1429/* 1430 * Initialize pmap setup. 1431 * ALL of the code which deals with avail needs rewritten as an actual 1432 * memory allocation. 1433 */ 1434void 1435pmap_bootstrap(u_int kernelstart, u_int kernelend) 1436{ 1437 struct mem_region *mp; 1438 int i, k; 1439 struct pmapvp *vp1; 1440 struct pmapvp *vp2; 1441 extern vaddr_t ppc_kvm_stolen; 1442 1443 /* 1444 * set the page size (default value is 4K which is ok) 1445 */ 1446 uvm_setpagesize(); 1447 1448 /* 1449 * Get memory. 1450 */ 1451 pmap_avail_setup(); 1452 1453 /* 1454 * Page align all regions. 1455 * Non-page memory isn't very interesting to us. 1456 * Also, sort the entries for ascending addresses. 1457 */ 1458 kernelstart = trunc_page(kernelstart); 1459 kernelend = round_page(kernelend); 1460 pmap_remove_avail(kernelstart, kernelend); 1461 1462 msgbuf_addr = pmap_steal_avail(MSGBUFSIZE,4); 1463 1464#ifdef DEBUG 1465 for (mp = pmap_avail; mp->size; mp++) { 1466 bzero((void *)mp->start, mp->size); 1467 } 1468#endif 1469 1470#define HTABENTS_32 1024 1471#define HTABENTS_64 2048 1472 1473 if (ppc_proc_is_64b) { 1474 pmap_ptab_cnt = HTABENTS_64; 1475 while (pmap_ptab_cnt * 2 < physmem) 1476 pmap_ptab_cnt <<= 1; 1477 } else { 1478 pmap_ptab_cnt = HTABENTS_32; 1479 while (HTABSIZE_32 < (ptoa(physmem) >> 7)) 1480 pmap_ptab_cnt <<= 1; 1481 } 1482 /* 1483 * allocate suitably aligned memory for HTAB 1484 */ 1485 if (ppc_proc_is_64b) { 1486 pmap_ptable64 = pmap_steal_avail(HTABMEMSZ_64, HTABMEMSZ_64); 1487 bzero((void *)pmap_ptable64, HTABMEMSZ_64); 1488 pmap_ptab_mask = pmap_ptab_cnt - 1; 1489 } else { 1490 pmap_ptable32 = pmap_steal_avail(HTABSIZE_32, HTABSIZE_32); 1491 bzero((void *)pmap_ptable32, HTABSIZE_32); 1492 pmap_ptab_mask = pmap_ptab_cnt - 1; 1493 } 1494 1495 /* allocate v->p mappings for pmap_kernel() */ 1496 for (i = 0; i < VP_SR_SIZE; i++) { 1497 pmap_kernel()->pm_vp[i] = NULL; 1498 } 1499 vp1 = pmap_steal_avail(sizeof (struct pmapvp), 4); 1500 bzero (vp1, sizeof(struct pmapvp)); 1501 pmap_kernel()->pm_vp[PPC_KERNEL_SR] = vp1; 1502 for (i = 0; i < VP_IDX1_SIZE; i++) { 1503 vp2 = vp1->vp[i] = pmap_steal_avail(sizeof (struct pmapvp), 4); 1504 bzero (vp2, sizeof(struct pmapvp)); 1505 for (k = 0; k < VP_IDX2_SIZE; k++) { 1506 struct pte_desc *pted; 1507 pted = pmap_steal_avail(sizeof (struct pte_desc), 4); 1508 bzero (pted, sizeof (struct pte_desc)); 1509 vp2->vp[k] = pted; 1510 } 1511 } 1512 1513 /* 1514 * Initialize kernel pmap and hardware. 1515 */ 1516#if NPMAPS >= PPC_KERNEL_SEGMENT / 16 1517 usedsr[PPC_KERNEL_SEGMENT / 16 / (sizeof usedsr[0] * 8)] 1518 |= 1 << ((PPC_KERNEL_SEGMENT / 16) % (sizeof usedsr[0] * 8)); 1519#endif 1520 for (i = 0; i < 16; i++) 1521 pmap_kernel()->pm_sr[i] = (PPC_KERNEL_SEG0 + i) | SR_NOEXEC; 1522 1523 if (ppc_nobat) { 1524 vp1 = pmap_steal_avail(sizeof (struct pmapvp), 4); 1525 bzero (vp1, sizeof(struct pmapvp)); 1526 pmap_kernel()->pm_vp[0] = vp1; 1527 for (i = 0; i < VP_IDX1_SIZE; i++) { 1528 vp2 = vp1->vp[i] = 1529 pmap_steal_avail(sizeof (struct pmapvp), 4); 1530 bzero (vp2, sizeof(struct pmapvp)); 1531 for (k = 0; k < VP_IDX2_SIZE; k++) { 1532 struct pte_desc *pted; 1533 pted = pmap_steal_avail(sizeof (struct pte_desc), 4); 1534 bzero (pted, sizeof (struct pte_desc)); 1535 vp2->vp[k] = pted; 1536 } 1537 } 1538 1539 /* first segment contains executable pages */ 1540 pmap_kernel()->pm_exec[0]++; 1541 pmap_kernel()->pm_sr[0] &= ~SR_NOEXEC; 1542 } else { 1543 /* 1544 * Setup fixed BAT registers. 1545 * 1546 * Note that we still run in real mode, and the BAT 1547 * registers were cleared in cpu_bootstrap(). 1548 */ 1549 battable[0].batl = BATL(0x00000000, BAT_M); 1550 if (physmem > atop(0x08000000)) 1551 battable[0].batu = BATU(0x00000000, BAT_BL_256M); 1552 else 1553 battable[0].batu = BATU(0x00000000, BAT_BL_128M); 1554 1555 /* Map physical memory with BATs. */ 1556 if (physmem > atop(0x10000000)) { 1557 battable[0x1].batl = BATL(0x10000000, BAT_M); 1558 battable[0x1].batu = BATU(0x10000000, BAT_BL_256M); 1559 } 1560 if (physmem > atop(0x20000000)) { 1561 battable[0x2].batl = BATL(0x20000000, BAT_M); 1562 battable[0x2].batu = BATU(0x20000000, BAT_BL_256M); 1563 } 1564 if (physmem > atop(0x30000000)) { 1565 battable[0x3].batl = BATL(0x30000000, BAT_M); 1566 battable[0x3].batu = BATU(0x30000000, BAT_BL_256M); 1567 } 1568 if (physmem > atop(0x40000000)) { 1569 battable[0x4].batl = BATL(0x40000000, BAT_M); 1570 battable[0x4].batu = BATU(0x40000000, BAT_BL_256M); 1571 } 1572 if (physmem > atop(0x50000000)) { 1573 battable[0x5].batl = BATL(0x50000000, BAT_M); 1574 battable[0x5].batu = BATU(0x50000000, BAT_BL_256M); 1575 } 1576 if (physmem > atop(0x60000000)) { 1577 battable[0x6].batl = BATL(0x60000000, BAT_M); 1578 battable[0x6].batu = BATU(0x60000000, BAT_BL_256M); 1579 } 1580 if (physmem > atop(0x70000000)) { 1581 battable[0x7].batl = BATL(0x70000000, BAT_M); 1582 battable[0x7].batu = BATU(0x70000000, BAT_BL_256M); 1583 } 1584 } 1585 1586 ppc_kvm_stolen += reserve_dumppages( (caddr_t)(VM_MIN_KERNEL_ADDRESS + 1587 ppc_kvm_stolen)); 1588 1589 pmap_avail_fixup(); 1590 for (mp = pmap_avail; mp->size; mp++) { 1591 if (mp->start > 0x80000000) 1592 continue; 1593 if (mp->start + mp->size > 0x80000000) 1594 mp->size = 0x80000000 - mp->start; 1595 uvm_page_physload(atop(mp->start), atop(mp->start+mp->size), 1596 atop(mp->start), atop(mp->start+mp->size), 0); 1597 } 1598} 1599 1600void 1601pmap_enable_mmu(void) 1602{ 1603 uint32_t scratch, sdr1; 1604 int i; 1605 1606 /* 1607 * For the PowerPC 970, ACCR = 3 inhibits loads and stores to 1608 * pages with PTE_AC_64. This is for execute-only mappings. 1609 */ 1610 if (ppc_proc_is_64b) 1611 asm volatile ("mtspr 29, %0" :: "r" (3)); 1612 1613 if (!ppc_nobat) { 1614 extern caddr_t etext; 1615 1616 /* DBAT0 used for initial segment */ 1617 ppc_mtdbat0l(battable[0].batl); 1618 ppc_mtdbat0u(battable[0].batu); 1619 1620 /* IBAT0 only covering the kernel .text */ 1621 ppc_mtibat0l(battable[0].batl); 1622 if (round_page((vaddr_t)&etext) < 8*1024*1024) 1623 ppc_mtibat0u(BATU(0x00000000, BAT_BL_8M)); 1624 else 1625 ppc_mtibat0u(BATU(0x00000000, BAT_BL_16M)); 1626 } 1627 1628 for (i = 0; i < 16; i++) 1629 ppc_mtsrin(PPC_KERNEL_SEG0 + i, i << ADDR_SR_SHIFT); 1630 1631 if (ppc_proc_is_64b) 1632 sdr1 = (uint32_t)pmap_ptable64 | HTABSIZE_64; 1633 else 1634 sdr1 = (uint32_t)pmap_ptable32 | (pmap_ptab_mask >> 10); 1635 1636 asm volatile ("sync; mtsdr1 %0; isync" :: "r"(sdr1)); 1637 tlbia(); 1638 1639 asm volatile ("eieio; mfmsr %0; ori %0,%0,%1; mtmsr %0; sync; isync" 1640 : "=r"(scratch) : "K"(PSL_IR|PSL_DR|PSL_ME|PSL_RI)); 1641} 1642 1643/* 1644 * activate a pmap entry 1645 * All PTE entries exist in the same hash table. 1646 * Segment registers are filled on exit to user mode. 1647 */ 1648void 1649pmap_activate(struct proc *p) 1650{ 1651 struct pcb *pcb = &p->p_addr->u_pcb; 1652 1653 /* Set the current pmap. */ 1654 pcb->pcb_pm = p->p_vmspace->vm_map.pmap; 1655 pmap_extract(pmap_kernel(), 1656 (vaddr_t)pcb->pcb_pm, (paddr_t *)&pcb->pcb_pmreal); 1657 curcpu()->ci_curpm = pcb->pcb_pmreal; 1658} 1659 1660/* 1661 * deactivate a pmap entry 1662 * NOOP on powerpc 1663 */ 1664void 1665pmap_deactivate(struct proc *p) 1666{ 1667} 1668 1669/* 1670 * pmap_extract: extract a PA for the given VA 1671 */ 1672 1673boolean_t 1674pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pa) 1675{ 1676 struct pte_desc *pted; 1677 1678 if (pm == pmap_kernel() && va < physmaxaddr) { 1679 *pa = va; 1680 return TRUE; 1681 } 1682 1683 PMAP_VP_LOCK(pm); 1684 pted = pmap_vp_lookup(pm, va); 1685 if (pted == NULL || !PTED_VALID(pted)) { 1686 PMAP_VP_UNLOCK(pm); 1687 return FALSE; 1688 } 1689 1690 if (ppc_proc_is_64b) 1691 *pa = (pted->p.pted_pte64.pte_lo & PTE_RPGN_64) | 1692 (va & ~PTE_RPGN_64); 1693 else 1694 *pa = (pted->p.pted_pte32.pte_lo & PTE_RPGN_32) | 1695 (va & ~PTE_RPGN_32); 1696 1697 PMAP_VP_UNLOCK(pm); 1698 return TRUE; 1699} 1700 1701#ifdef ALTIVEC 1702/* 1703 * Read an instruction from a given virtual memory address. 1704 * Execute-only protection is bypassed. 1705 */ 1706int 1707pmap_copyinsn(pmap_t pm, vaddr_t va, uint32_t *insn) 1708{ 1709 struct pte_desc *pted; 1710 paddr_t pa; 1711 1712 /* Assume pm != pmap_kernel(). */ 1713 if (ppc_proc_is_64b) { 1714 /* inline pmap_extract */ 1715 PMAP_VP_LOCK(pm); 1716 pted = pmap_vp_lookup(pm, va); 1717 if (pted == NULL || !PTED_VALID(pted)) { 1718 PMAP_VP_UNLOCK(pm); 1719 return EFAULT; 1720 } 1721 pa = (pted->p.pted_pte64.pte_lo & PTE_RPGN_64) | 1722 (va & ~PTE_RPGN_64); 1723 PMAP_VP_UNLOCK(pm); 1724 1725 if (pa > physmaxaddr - sizeof(*insn)) 1726 return EFAULT; 1727 *insn = *(uint32_t *)pa; 1728 return 0; 1729 } else 1730 return copyin32((void *)va, insn); 1731} 1732#endif 1733 1734u_int32_t 1735pmap_setusr(pmap_t pm, vaddr_t va) 1736{ 1737 u_int32_t sr; 1738 u_int32_t oldsr; 1739 1740 sr = ptesr(pm->pm_sr, va); 1741 1742 /* user address range lock?? */ 1743 asm volatile ("mfsr %0,%1" : "=r" (oldsr): "n"(PPC_USER_SR)); 1744 asm volatile ("isync; mtsr %0,%1; isync" :: "n"(PPC_USER_SR), "r"(sr)); 1745 return oldsr; 1746} 1747 1748void 1749pmap_popusr(u_int32_t sr) 1750{ 1751 asm volatile ("isync; mtsr %0,%1; isync" 1752 :: "n"(PPC_USER_SR), "r"(sr)); 1753} 1754 1755int 1756_copyin(const void *udaddr, void *kaddr, size_t len) 1757{ 1758 void *p; 1759 size_t l; 1760 u_int32_t oldsr; 1761 faultbuf env; 1762 void *oldh = curpcb->pcb_onfault; 1763 1764 while (len > 0) { 1765 p = PPC_USER_ADDR + ((u_int)udaddr & ~PPC_SEGMENT_MASK); 1766 l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p; 1767 if (l > len) 1768 l = len; 1769 oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)udaddr); 1770 if (setfault(&env)) { 1771 pmap_popusr(oldsr); 1772 curpcb->pcb_onfault = oldh; 1773 return EFAULT; 1774 } 1775 bcopy(p, kaddr, l); 1776 pmap_popusr(oldsr); 1777 udaddr += l; 1778 kaddr += l; 1779 len -= l; 1780 } 1781 curpcb->pcb_onfault = oldh; 1782 return 0; 1783} 1784 1785int 1786copyout(const void *kaddr, void *udaddr, size_t len) 1787{ 1788 void *p; 1789 size_t l; 1790 u_int32_t oldsr; 1791 faultbuf env; 1792 void *oldh = curpcb->pcb_onfault; 1793 1794 while (len > 0) { 1795 p = PPC_USER_ADDR + ((u_int)udaddr & ~PPC_SEGMENT_MASK); 1796 l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p; 1797 if (l > len) 1798 l = len; 1799 oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)udaddr); 1800 if (setfault(&env)) { 1801 pmap_popusr(oldsr); 1802 curpcb->pcb_onfault = oldh; 1803 return EFAULT; 1804 } 1805 1806 bcopy(kaddr, p, l); 1807 pmap_popusr(oldsr); 1808 udaddr += l; 1809 kaddr += l; 1810 len -= l; 1811 } 1812 curpcb->pcb_onfault = oldh; 1813 return 0; 1814} 1815 1816int 1817copyin32(const uint32_t *udaddr, uint32_t *kaddr) 1818{ 1819 volatile uint32_t *p; 1820 u_int32_t oldsr; 1821 faultbuf env; 1822 void *oldh = curpcb->pcb_onfault; 1823 1824 if ((u_int)udaddr & 0x3) 1825 return EFAULT; 1826 1827 p = PPC_USER_ADDR + ((u_int)udaddr & ~PPC_SEGMENT_MASK); 1828 oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)udaddr); 1829 if (setfault(&env)) { 1830 pmap_popusr(oldsr); 1831 curpcb->pcb_onfault = oldh; 1832 return EFAULT; 1833 } 1834 *kaddr = *p; 1835 pmap_popusr(oldsr); 1836 curpcb->pcb_onfault = oldh; 1837 return 0; 1838} 1839 1840int 1841_copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *done) 1842{ 1843 const u_char *uaddr = udaddr; 1844 u_char *kp = kaddr; 1845 u_char *up; 1846 u_char c; 1847 void *p; 1848 size_t l; 1849 u_int32_t oldsr; 1850 int cnt = 0; 1851 faultbuf env; 1852 void *oldh = curpcb->pcb_onfault; 1853 1854 while (len > 0) { 1855 p = PPC_USER_ADDR + ((u_int)uaddr & ~PPC_SEGMENT_MASK); 1856 l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p; 1857 up = p; 1858 if (l > len) 1859 l = len; 1860 len -= l; 1861 oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)uaddr); 1862 if (setfault(&env)) { 1863 if (done != NULL) 1864 *done = cnt; 1865 1866 curpcb->pcb_onfault = oldh; 1867 pmap_popusr(oldsr); 1868 return EFAULT; 1869 } 1870 while (l > 0) { 1871 c = *up; 1872 *kp = c; 1873 if (c == 0) { 1874 if (done != NULL) 1875 *done = cnt + 1; 1876 1877 curpcb->pcb_onfault = oldh; 1878 pmap_popusr(oldsr); 1879 return 0; 1880 } 1881 up++; 1882 kp++; 1883 l--; 1884 cnt++; 1885 uaddr++; 1886 } 1887 pmap_popusr(oldsr); 1888 } 1889 curpcb->pcb_onfault = oldh; 1890 if (done != NULL) 1891 *done = cnt; 1892 1893 return ENAMETOOLONG; 1894} 1895 1896int 1897copyoutstr(const void *kaddr, void *udaddr, size_t len, size_t *done) 1898{ 1899 u_char *uaddr = (void *)udaddr; 1900 const u_char *kp = kaddr; 1901 u_char *up; 1902 u_char c; 1903 void *p; 1904 size_t l; 1905 u_int32_t oldsr; 1906 int cnt = 0; 1907 faultbuf env; 1908 void *oldh = curpcb->pcb_onfault; 1909 1910 while (len > 0) { 1911 p = PPC_USER_ADDR + ((u_int)uaddr & ~PPC_SEGMENT_MASK); 1912 l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p; 1913 up = p; 1914 if (l > len) 1915 l = len; 1916 len -= l; 1917 oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)uaddr); 1918 if (setfault(&env)) { 1919 if (done != NULL) 1920 *done = cnt; 1921 1922 curpcb->pcb_onfault = oldh; 1923 pmap_popusr(oldsr); 1924 return EFAULT; 1925 } 1926 while (l > 0) { 1927 c = *kp; 1928 *up = c; 1929 if (c == 0) { 1930 if (done != NULL) 1931 *done = cnt + 1; 1932 1933 curpcb->pcb_onfault = oldh; 1934 pmap_popusr(oldsr); 1935 return 0; 1936 } 1937 up++; 1938 kp++; 1939 l--; 1940 cnt++; 1941 uaddr++; 1942 } 1943 pmap_popusr(oldsr); 1944 } 1945 curpcb->pcb_onfault = oldh; 1946 if (done != NULL) 1947 *done = cnt; 1948 1949 return ENAMETOOLONG; 1950} 1951 1952/* 1953 * sync instruction cache for user virtual address. 1954 * The address WAS JUST MAPPED, so we have a VALID USERSPACE mapping 1955 */ 1956void 1957pmap_syncicache_user_virt(pmap_t pm, vaddr_t va) 1958{ 1959 vaddr_t start; 1960 int oldsr; 1961 1962 if (pm != pmap_kernel()) { 1963 start = ((u_int)PPC_USER_ADDR + ((u_int)va & 1964 ~PPC_SEGMENT_MASK)); 1965 /* will only ever be page size, will not cross segments */ 1966 1967 /* USER SEGMENT LOCK - MPXXX */ 1968 oldsr = pmap_setusr(pm, va); 1969 } else { 1970 start = va; /* flush mapped page */ 1971 } 1972 1973 syncicache((void *)start, PAGE_SIZE); 1974 1975 if (pm != pmap_kernel()) { 1976 pmap_popusr(oldsr); 1977 /* USER SEGMENT UNLOCK -MPXXX */ 1978 } 1979} 1980 1981void 1982pmap_pted_ro(struct pte_desc *pted, vm_prot_t prot) 1983{ 1984 if (ppc_proc_is_64b) 1985 pmap_pted_ro64(pted, prot); 1986 else 1987 pmap_pted_ro32(pted, prot); 1988} 1989 1990void 1991pmap_pted_ro64(struct pte_desc *pted, vm_prot_t prot) 1992{ 1993 pmap_t pm = pted->pted_pmap; 1994 vaddr_t va = pted->pted_va & ~PAGE_MASK; 1995 struct vm_page *pg; 1996 void *pte; 1997 int s; 1998 1999 pg = PHYS_TO_VM_PAGE(pted->p.pted_pte64.pte_lo & PTE_RPGN_64); 2000 if (pg->pg_flags & PG_PMAP_EXE) { 2001 if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_WRITE) { 2002 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); 2003 } else { 2004 pmap_syncicache_user_virt(pm, va); 2005 } 2006 } 2007 2008 pted->p.pted_pte64.pte_lo &= ~PTE_PP_64; 2009 pted->p.pted_pte64.pte_lo |= PTE_RO_64; 2010 2011 if ((prot & PROT_EXEC) == 0) 2012 pted->p.pted_pte64.pte_lo |= PTE_N_64; 2013 2014 if ((prot & (PROT_READ | PROT_WRITE)) == 0) 2015 pted->p.pted_pte64.pte_lo |= PTE_AC_64; 2016 2017 PMAP_HASH_LOCK(s); 2018 if ((pte = pmap_ptedinhash(pted)) != NULL) { 2019 struct pte_64 *ptp64 = pte; 2020 2021 pte_del(ptp64, va); 2022 2023 if (PTED_MANAGED(pted)) { /* XXX */ 2024 pmap_attr_save(ptp64->pte_lo & PTE_RPGN_64, 2025 ptp64->pte_lo & (PTE_REF_64|PTE_CHG_64)); 2026 } 2027 2028 /* Add a Page Table Entry, section 7.6.3.1. */ 2029 ptp64->pte_lo = pted->p.pted_pte64.pte_lo; 2030 eieio(); /* Order 1st PTE update before 2nd. */ 2031 ptp64->pte_hi |= PTE_VALID_64; 2032 sync(); /* Ensure updates completed. */ 2033 } 2034 PMAP_HASH_UNLOCK(s); 2035} 2036 2037void 2038pmap_pted_ro32(struct pte_desc *pted, vm_prot_t prot) 2039{ 2040 pmap_t pm = pted->pted_pmap; 2041 vaddr_t va = pted->pted_va & ~PAGE_MASK; 2042 struct vm_page *pg; 2043 void *pte; 2044 int s; 2045 2046 pg = PHYS_TO_VM_PAGE(pted->p.pted_pte32.pte_lo & PTE_RPGN_32); 2047 if (pg->pg_flags & PG_PMAP_EXE) { 2048 if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_WRITE) { 2049 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); 2050 } else { 2051 pmap_syncicache_user_virt(pm, va); 2052 } 2053 } 2054 2055 pted->p.pted_pte32.pte_lo &= ~PTE_PP_32; 2056 pted->p.pted_pte32.pte_lo |= PTE_RO_32; 2057 2058 PMAP_HASH_LOCK(s); 2059 if ((pte = pmap_ptedinhash(pted)) != NULL) { 2060 struct pte_32 *ptp32 = pte; 2061 2062 pte_del(ptp32, va); 2063 2064 if (PTED_MANAGED(pted)) { /* XXX */ 2065 pmap_attr_save(ptp32->pte_lo & PTE_RPGN_32, 2066 ptp32->pte_lo & (PTE_REF_32|PTE_CHG_32)); 2067 } 2068 2069 /* Add a Page Table Entry, section 7.6.3.1. */ 2070 ptp32->pte_lo &= ~(PTE_CHG_32|PTE_PP_32); 2071 ptp32->pte_lo |= PTE_RO_32; 2072 eieio(); /* Order 1st PTE update before 2nd. */ 2073 ptp32->pte_hi |= PTE_VALID_32; 2074 sync(); /* Ensure updates completed. */ 2075 } 2076 PMAP_HASH_UNLOCK(s); 2077} 2078 2079/* 2080 * Lower the protection on the specified physical page. 2081 * 2082 * There are only two cases, either the protection is going to 0, 2083 * or it is going to read-only. 2084 */ 2085void 2086pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 2087{ 2088 struct pte_desc *pted; 2089 void *pte; 2090 pmap_t pm; 2091 int s; 2092 2093 if (prot == PROT_NONE) { 2094 mtx_enter(&pg->mdpage.pv_mtx); 2095 while ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) != NULL) { 2096 pmap_reference(pted->pted_pmap); 2097 pm = pted->pted_pmap; 2098 mtx_leave(&pg->mdpage.pv_mtx); 2099 2100 PMAP_VP_LOCK(pm); 2101 2102 /* 2103 * We dropped the pvlist lock before grabbing 2104 * the pmap lock to avoid lock ordering 2105 * problems. This means we have to check the 2106 * pvlist again since somebody else might have 2107 * modified it. All we care about is that the 2108 * pvlist entry matches the pmap we just 2109 * locked. If it doesn't, unlock the pmap and 2110 * try again. 2111 */ 2112 mtx_enter(&pg->mdpage.pv_mtx); 2113 if ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) == NULL || 2114 pted->pted_pmap != pm) { 2115 mtx_leave(&pg->mdpage.pv_mtx); 2116 PMAP_VP_UNLOCK(pm); 2117 pmap_destroy(pm); 2118 mtx_enter(&pg->mdpage.pv_mtx); 2119 continue; 2120 } 2121 2122 PMAP_HASH_LOCK(s); 2123 if ((pte = pmap_ptedinhash(pted)) != NULL) 2124 pte_zap(pte, pted); 2125 PMAP_HASH_UNLOCK(s); 2126 2127 pted->pted_va &= ~PTED_VA_MANAGED_M; 2128 LIST_REMOVE(pted, pted_pv_list); 2129 mtx_leave(&pg->mdpage.pv_mtx); 2130 2131 pmap_remove_pted(pm, pted); 2132 2133 PMAP_VP_UNLOCK(pm); 2134 pmap_destroy(pm); 2135 mtx_enter(&pg->mdpage.pv_mtx); 2136 } 2137 mtx_leave(&pg->mdpage.pv_mtx); 2138 /* page is being reclaimed, sync icache next use */ 2139 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); 2140 return; 2141 } 2142 2143 mtx_enter(&pg->mdpage.pv_mtx); 2144 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) 2145 pmap_pted_ro(pted, prot); 2146 mtx_leave(&pg->mdpage.pv_mtx); 2147} 2148 2149void 2150pmap_protect(pmap_t pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 2151{ 2152 if (prot & (PROT_READ | PROT_EXEC)) { 2153 struct pte_desc *pted; 2154 2155 PMAP_VP_LOCK(pm); 2156 while (sva < eva) { 2157 pted = pmap_vp_lookup(pm, sva); 2158 if (pted && PTED_VALID(pted)) 2159 pmap_pted_ro(pted, prot); 2160 sva += PAGE_SIZE; 2161 } 2162 PMAP_VP_UNLOCK(pm); 2163 return; 2164 } 2165 pmap_remove(pm, sva, eva); 2166} 2167 2168/* 2169 * Restrict given range to physical memory 2170 */ 2171void 2172pmap_real_memory(paddr_t *start, vsize_t *size) 2173{ 2174 struct mem_region *mp; 2175 2176 for (mp = pmap_mem; mp->size; mp++) { 2177 if (((*start + *size) > mp->start) 2178 && (*start < (mp->start + mp->size))) 2179 { 2180 if (*start < mp->start) { 2181 *size -= mp->start - *start; 2182 *start = mp->start; 2183 } 2184 if ((*start + *size) > (mp->start + mp->size)) 2185 *size = mp->start + mp->size - *start; 2186 return; 2187 } 2188 } 2189 *size = 0; 2190} 2191 2192void 2193pmap_init() 2194{ 2195 pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_NONE, 0, 2196 "pmap", NULL); 2197 pool_setlowat(&pmap_pmap_pool, 2); 2198 pool_init(&pmap_vp_pool, sizeof(struct pmapvp), 0, IPL_VM, 0, 2199 "vp", &pool_allocator_single); 2200 pool_setlowat(&pmap_vp_pool, 10); 2201 pool_init(&pmap_pted_pool, sizeof(struct pte_desc), 0, IPL_VM, 0, 2202 "pted", NULL); 2203 pool_setlowat(&pmap_pted_pool, 20); 2204 2205 pmap_initialized = 1; 2206} 2207 2208void 2209pmap_proc_iflush(struct process *pr, vaddr_t va, vsize_t len) 2210{ 2211 paddr_t pa; 2212 vsize_t clen; 2213 2214 while (len > 0) { 2215 /* add one to always round up to the next page */ 2216 clen = round_page(va + 1) - va; 2217 if (clen > len) 2218 clen = len; 2219 2220 if (pmap_extract(pr->ps_vmspace->vm_map.pmap, va, &pa)) { 2221 syncicache((void *)pa, clen); 2222 } 2223 2224 len -= clen; 2225 va += clen; 2226 } 2227} 2228 2229/* 2230 * There are two routines, pte_spill_r and pte_spill_v 2231 * the _r version only handles kernel faults which are not user 2232 * accesses. The _v version handles all user faults and kernel copyin/copyout 2233 * "user" accesses. 2234 */ 2235int 2236pte_spill_r(u_int32_t va, u_int32_t msr, u_int32_t dsisr, int exec_fault) 2237{ 2238 pmap_t pm; 2239 struct pte_desc *pted; 2240 struct pte_desc pted_store; 2241 2242 /* lookup is done physical to prevent faults */ 2243 2244 /* 2245 * This function only handles kernel faults, not supervisor copyins. 2246 */ 2247 if (msr & PSL_PR) 2248 return 0; 2249 2250 /* if copyin, throw to full excption handler */ 2251 if (VP_SR(va) == PPC_USER_SR) 2252 return 0; 2253 2254 pm = pmap_kernel(); 2255 2256 /* 0 - physmaxaddr mapped 1-1 */ 2257 if (va < physmaxaddr) { 2258 u_int32_t aligned_va; 2259 vm_prot_t prot = PROT_READ | PROT_WRITE; 2260 extern caddr_t kernel_text; 2261 extern caddr_t etext; 2262 2263 pted = &pted_store; 2264 2265 if (va >= trunc_page((vaddr_t)&kernel_text) && 2266 va < round_page((vaddr_t)&etext)) { 2267 prot |= PROT_EXEC; 2268 } 2269 2270 aligned_va = trunc_page(va); 2271 if (ppc_proc_is_64b) { 2272 pmap_fill_pte64(pm, aligned_va, aligned_va, 2273 pted, prot, PMAP_CACHE_WB); 2274 pte_insert64(pted); 2275 } else { 2276 pmap_fill_pte32(pm, aligned_va, aligned_va, 2277 pted, prot, PMAP_CACHE_WB); 2278 pte_insert32(pted); 2279 } 2280 return 1; 2281 } 2282 2283 return pte_spill_v(pm, va, dsisr, exec_fault); 2284} 2285 2286int 2287pte_spill_v(pmap_t pm, u_int32_t va, u_int32_t dsisr, int exec_fault) 2288{ 2289 struct pte_desc *pted; 2290 int inserted = 0; 2291 2292 /* 2293 * DSISR_DABR is set if the PowerPC 970 attempted to read or 2294 * write an execute-only page. 2295 */ 2296 if (dsisr & DSISR_DABR) 2297 return 0; 2298 2299 /* 2300 * If the current mapping is RO and the access was a write 2301 * we return 0 2302 */ 2303 PMAP_VP_LOCK(pm); 2304 pted = pmap_vp_lookup(pm, va); 2305 if (pted == NULL || !PTED_VALID(pted)) 2306 goto out; 2307 2308 /* Attempted to write a read-only page. */ 2309 if (dsisr & DSISR_STORE) { 2310 if (ppc_proc_is_64b) { 2311 if ((pted->p.pted_pte64.pte_lo & PTE_PP_64) == 2312 PTE_RO_64) 2313 goto out; 2314 } else { 2315 if ((pted->p.pted_pte32.pte_lo & PTE_PP_32) == 2316 PTE_RO_32) 2317 goto out; 2318 } 2319 } 2320 2321 /* Attempted to execute non-executable page. */ 2322 if ((exec_fault != 0) && ((pted->pted_va & PTED_VA_EXEC_M) == 0)) 2323 goto out; 2324 2325 inserted = 1; 2326 if (ppc_proc_is_64b) 2327 pte_insert64(pted); 2328 else 2329 pte_insert32(pted); 2330 2331out: 2332 PMAP_VP_UNLOCK(pm); 2333 return (inserted); 2334} 2335 2336 2337/* 2338 * should pte_insert code avoid wired mappings? 2339 * is the stack safe? 2340 * is the pted safe? (physical) 2341 * -ugh 2342 */ 2343void 2344pte_insert64(struct pte_desc *pted) 2345{ 2346 struct pte_64 *ptp64; 2347 int off, secondary; 2348 int sr, idx, i; 2349 void *pte; 2350 int s; 2351 2352 PMAP_HASH_LOCK(s); 2353 if ((pte = pmap_ptedinhash(pted)) != NULL) 2354 pte_zap(pte, pted); 2355 2356 pted->pted_va &= ~(PTED_VA_HID_M|PTED_VA_PTEGIDX_M); 2357 2358 sr = ptesr(pted->pted_pmap->pm_sr, pted->pted_va); 2359 idx = pteidx(sr, pted->pted_va); 2360 2361 /* 2362 * instead of starting at the beginning of each pteg, 2363 * the code should pick a random location with in the primary 2364 * then search all of the entries, then if not yet found, 2365 * do the same for the secondary. 2366 * this would reduce the frontloading of the pteg. 2367 */ 2368 2369 /* first just try fill of primary hash */ 2370 ptp64 = pmap_ptable64 + (idx) * 8; 2371 for (i = 0; i < 8; i++) { 2372 if (ptp64[i].pte_hi & PTE_VALID_64) 2373 continue; 2374 2375 pted->pted_va |= i; 2376 2377 /* Add a Page Table Entry, section 7.6.3.1. */ 2378 ptp64[i].pte_hi = pted->p.pted_pte64.pte_hi & ~PTE_VALID_64; 2379 ptp64[i].pte_lo = pted->p.pted_pte64.pte_lo; 2380 eieio(); /* Order 1st PTE update before 2nd. */ 2381 ptp64[i].pte_hi |= PTE_VALID_64; 2382 sync(); /* Ensure updates completed. */ 2383 2384 goto out; 2385 } 2386 2387 /* try fill of secondary hash */ 2388 ptp64 = pmap_ptable64 + (idx ^ pmap_ptab_mask) * 8; 2389 for (i = 0; i < 8; i++) { 2390 if (ptp64[i].pte_hi & PTE_VALID_64) 2391 continue; 2392 2393 pted->pted_va |= (i | PTED_VA_HID_M); 2394 2395 /* Add a Page Table Entry, section 7.6.3.1. */ 2396 ptp64[i].pte_hi = pted->p.pted_pte64.pte_hi & ~PTE_VALID_64; 2397 ptp64[i].pte_lo = pted->p.pted_pte64.pte_lo; 2398 eieio(); /* Order 1st PTE update before 2nd. */ 2399 ptp64[i].pte_hi |= (PTE_HID_64|PTE_VALID_64); 2400 sync(); /* Ensure updates completed. */ 2401 2402 goto out; 2403 } 2404 2405 /* need decent replacement algorithm */ 2406 off = ppc_mftb(); 2407 secondary = off & 8; 2408 2409 2410 pted->pted_va |= off & (PTED_VA_PTEGIDX_M|PTED_VA_HID_M); 2411 2412 idx = (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0)); 2413 2414 ptp64 = pmap_ptable64 + (idx * 8); 2415 ptp64 += PTED_PTEGIDX(pted); /* increment by index into pteg */ 2416 2417 if (ptp64->pte_hi & PTE_VALID_64) { 2418 vaddr_t va; 2419 2420 /* Bits 9-19 */ 2421 idx = (idx ^ ((ptp64->pte_hi & PTE_HID_64) ? 2422 pmap_ptab_mask : 0)); 2423 va = (ptp64->pte_hi >> PTE_VSID_SHIFT_64) ^ idx; 2424 va <<= ADDR_PIDX_SHIFT; 2425 /* Bits 4-8 */ 2426 va |= (ptp64->pte_hi & PTE_API_64) << ADDR_API_SHIFT_32; 2427 /* Bits 0-3 */ 2428 va |= (ptp64->pte_hi >> PTE_VSID_SHIFT_64) 2429 << ADDR_SR_SHIFT; 2430 2431 pte_del(ptp64, va); 2432 2433 pmap_attr_save(ptp64->pte_lo & PTE_RPGN_64, 2434 ptp64->pte_lo & (PTE_REF_64|PTE_CHG_64)); 2435 } 2436 2437 /* Add a Page Table Entry, section 7.6.3.1. */ 2438 ptp64->pte_hi = pted->p.pted_pte64.pte_hi & ~PTE_VALID_64; 2439 if (secondary) 2440 ptp64->pte_hi |= PTE_HID_64; 2441 ptp64->pte_lo = pted->p.pted_pte64.pte_lo; 2442 eieio(); /* Order 1st PTE update before 2nd. */ 2443 ptp64->pte_hi |= PTE_VALID_64; 2444 sync(); /* Ensure updates completed. */ 2445 2446out: 2447 PMAP_HASH_UNLOCK(s); 2448} 2449 2450void 2451pte_insert32(struct pte_desc *pted) 2452{ 2453 struct pte_32 *ptp32; 2454 int off, secondary; 2455 int sr, idx, i; 2456 void *pte; 2457 int s; 2458 2459 PMAP_HASH_LOCK(s); 2460 if ((pte = pmap_ptedinhash(pted)) != NULL) 2461 pte_zap(pte, pted); 2462 2463 pted->pted_va &= ~(PTED_VA_HID_M|PTED_VA_PTEGIDX_M); 2464 2465 sr = ptesr(pted->pted_pmap->pm_sr, pted->pted_va); 2466 idx = pteidx(sr, pted->pted_va); 2467 2468 /* 2469 * instead of starting at the beginning of each pteg, 2470 * the code should pick a random location with in the primary 2471 * then search all of the entries, then if not yet found, 2472 * do the same for the secondary. 2473 * this would reduce the frontloading of the pteg. 2474 */ 2475 2476 /* first just try fill of primary hash */ 2477 ptp32 = pmap_ptable32 + (idx) * 8; 2478 for (i = 0; i < 8; i++) { 2479 if (ptp32[i].pte_hi & PTE_VALID_32) 2480 continue; 2481 2482 pted->pted_va |= i; 2483 2484 /* Add a Page Table Entry, section 7.6.3.1. */ 2485 ptp32[i].pte_hi = pted->p.pted_pte32.pte_hi & ~PTE_VALID_32; 2486 ptp32[i].pte_lo = pted->p.pted_pte32.pte_lo; 2487 eieio(); /* Order 1st PTE update before 2nd. */ 2488 ptp32[i].pte_hi |= PTE_VALID_32; 2489 sync(); /* Ensure updates completed. */ 2490 2491 goto out; 2492 } 2493 2494 /* try fill of secondary hash */ 2495 ptp32 = pmap_ptable32 + (idx ^ pmap_ptab_mask) * 8; 2496 for (i = 0; i < 8; i++) { 2497 if (ptp32[i].pte_hi & PTE_VALID_32) 2498 continue; 2499 2500 pted->pted_va |= (i | PTED_VA_HID_M); 2501 2502 /* Add a Page Table Entry, section 7.6.3.1. */ 2503 ptp32[i].pte_hi = pted->p.pted_pte32.pte_hi & ~PTE_VALID_32; 2504 ptp32[i].pte_lo = pted->p.pted_pte32.pte_lo; 2505 eieio(); /* Order 1st PTE update before 2nd. */ 2506 ptp32[i].pte_hi |= (PTE_HID_32|PTE_VALID_32); 2507 sync(); /* Ensure updates completed. */ 2508 2509 goto out; 2510 } 2511 2512 /* need decent replacement algorithm */ 2513 off = ppc_mftb(); 2514 secondary = off & 8; 2515 2516 pted->pted_va |= off & (PTED_VA_PTEGIDX_M|PTED_VA_HID_M); 2517 2518 idx = (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0)); 2519 2520 ptp32 = pmap_ptable32 + (idx * 8); 2521 ptp32 += PTED_PTEGIDX(pted); /* increment by index into pteg */ 2522 2523 if (ptp32->pte_hi & PTE_VALID_32) { 2524 vaddr_t va; 2525 2526 va = ((ptp32->pte_hi & PTE_API_32) << ADDR_API_SHIFT_32) | 2527 ((((ptp32->pte_hi >> PTE_VSID_SHIFT_32) & SR_VSID) 2528 ^(idx ^ ((ptp32->pte_hi & PTE_HID_32) ? 0x3ff : 0))) 2529 & 0x3ff) << PAGE_SHIFT; 2530 2531 pte_del(ptp32, va); 2532 2533 pmap_attr_save(ptp32->pte_lo & PTE_RPGN_32, 2534 ptp32->pte_lo & (PTE_REF_32|PTE_CHG_32)); 2535 } 2536 2537 /* Add a Page Table Entry, section 7.6.3.1. */ 2538 ptp32->pte_hi = pted->p.pted_pte32.pte_hi & ~PTE_VALID_32; 2539 if (secondary) 2540 ptp32->pte_hi |= PTE_HID_32; 2541 ptp32->pte_lo = pted->p.pted_pte32.pte_lo; 2542 eieio(); /* Order 1st PTE update before 2nd. */ 2543 ptp32->pte_hi |= PTE_VALID_32; 2544 sync(); /* Ensure updates completed. */ 2545 2546out: 2547 PMAP_HASH_UNLOCK(s); 2548} 2549