pmap.c revision 178626
1/*- 2 * Copyright (C) 2007 Semihalf, Rafal Jaworowski <raj@semihalf.com> 3 * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 20 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Some hw specific parts of this pmap were derived or influenced 29 * by NetBSD's ibm4xx pmap module. More generic code is shared with 30 * a few other pmap modules from the FreeBSD tree. 31 */ 32 33 /* 34 * VM layout notes: 35 * 36 * Kernel and user threads run within one common virtual address space 37 * defined by AS=0. 38 * 39 * Virtual address space layout: 40 * ----------------------------- 41 * 0x0000_0000 - 0xbfff_efff : user process 42 * 0xc000_0000 - 0xc1ff_ffff : kerel reserved 43 * 0xc000_0000 - kernelend : kernel code &data 44 * 0xc1ff_c000 - 0xc200_0000 : kstack0 45 * 0xc200_0000 - 0xffef_ffff : KVA 46 * 0xc200_0000 - 0xc200_3fff : reserved for page zero/copy 47 * 0xc200_4000 - ptbl buf end: reserved for ptbl bufs 48 * ptbl buf end- 0xffef_ffff : actual free KVA space 49 * 0xfff0_0000 - 0xffff_ffff : I/O devices region 50 */ 51 52#include <sys/cdefs.h> 53__FBSDID("$FreeBSD: head/sys/powerpc/booke/pmap.c 178626 2008-04-27 21:04:54Z marcel $"); 54 55#include <sys/types.h> 56#include <sys/param.h> 57#include <sys/malloc.h> 58#include <sys/proc.h> 59#include <sys/user.h> 60#include <sys/queue.h> 61#include <sys/systm.h> 62#include <sys/kernel.h> 63#include <sys/msgbuf.h> 64#include <sys/lock.h> 65#include <sys/mutex.h> 66#include <sys/vmmeter.h> 67 68#include <vm/vm.h> 69#include <vm/vm_page.h> 70#include <vm/vm_kern.h> 71#include <vm/vm_pageout.h> 72#include <vm/vm_extern.h> 73#include <vm/vm_object.h> 74#include <vm/vm_param.h> 75#include <vm/vm_map.h> 76#include <vm/vm_pager.h> 77#include <vm/uma.h> 78 79#include <machine/cpu.h> 80#include <machine/pcb.h> 81#include <machine/powerpc.h> 82 83#include <machine/tlb.h> 84#include <machine/spr.h> 85#include <machine/vmparam.h> 86#include <machine/md_var.h> 87#include <machine/mmuvar.h> 88#include <machine/pmap.h> 89#include <machine/pte.h> 90 91#include "mmu_if.h" 92 93#define DEBUG 94#undef DEBUG 95 96#ifdef DEBUG 97#define debugf(fmt, args...) printf(fmt, ##args) 98#else 99#define debugf(fmt, args...) 100#endif 101 102#define TODO panic("%s: not implemented", __func__); 103#define memmove(d, s, l) bcopy(s, d, l) 104 105#include "opt_sched.h" 106#ifndef SCHED_4BSD 107#error "e500 only works with SCHED_4BSD which uses a global scheduler lock." 108#endif 109extern struct mtx sched_lock; 110 111/* Kernel physical load address. */ 112extern uint32_t kernload; 113 114struct mem_region availmem_regions[MEM_REGIONS]; 115int availmem_regions_sz; 116 117/* Reserved KVA space and mutex for mmu_booke_zero_page. */ 118static vm_offset_t zero_page_va; 119static struct mtx zero_page_mutex; 120 121/* 122 * Reserved KVA space for mmu_booke_zero_page_idle. This is used 123 * by idle thred only, no lock required. 124 */ 125static vm_offset_t zero_page_idle_va; 126 127/* Reserved KVA space and mutex for mmu_booke_copy_page. */ 128static vm_offset_t copy_page_src_va; 129static vm_offset_t copy_page_dst_va; 130static struct mtx copy_page_mutex; 131 132/**************************************************************************/ 133/* PMAP */ 134/**************************************************************************/ 135 136static void mmu_booke_enter_locked(mmu_t, pmap_t, vm_offset_t, vm_page_t, 137 vm_prot_t, boolean_t); 138 139unsigned int kptbl_min; /* Index of the first kernel ptbl. */ 140unsigned int kernel_ptbls; /* Number of KVA ptbls. */ 141 142static int pagedaemon_waken; 143 144/* 145 * If user pmap is processed with mmu_booke_remove and the resident count 146 * drops to 0, there are no more pages to remove, so we need not continue. 147 */ 148#define PMAP_REMOVE_DONE(pmap) \ 149 ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0) 150 151extern void load_pid0(tlbtid_t); 152 153/**************************************************************************/ 154/* TLB and TID handling */ 155/**************************************************************************/ 156 157/* Translation ID busy table */ 158static volatile pmap_t tidbusy[TID_MAX + 1]; 159 160/* 161 * Actual maximum number of TLB0 entries. 162 * This number differs between e500 core revisions. 163 */ 164u_int32_t tlb0_size; 165u_int32_t tlb0_nways; 166u_int32_t tlb0_nentries_per_way; 167 168#define TLB0_SIZE (tlb0_size) 169#define TLB0_NWAYS (tlb0_nways) 170#define TLB0_ENTRIES_PER_WAY (tlb0_nentries_per_way) 171 172/* Pointer to kernel tlb0 table, allocated in mmu_booke_bootstrap() */ 173tlb_entry_t *tlb0; 174 175/* 176 * Spinlock to assure proper locking between threads and 177 * between tlb miss handler and kernel. 178 */ 179static struct mtx tlb0_mutex; 180 181#define TLB1_SIZE 16 182 183/* In-ram copy of the TLB1 */ 184static tlb_entry_t tlb1[TLB1_SIZE]; 185 186/* Next free entry in the TLB1 */ 187static unsigned int tlb1_idx; 188 189static tlbtid_t tid_alloc(struct pmap *); 190static void tid_flush(tlbtid_t); 191 192extern void tlb1_inval_va(vm_offset_t); 193extern void tlb0_inval_va(vm_offset_t); 194 195static void tlb_print_entry(int, u_int32_t, u_int32_t, u_int32_t, u_int32_t); 196 197static int tlb1_set_entry(vm_offset_t, vm_offset_t, vm_size_t, u_int32_t); 198static void __tlb1_set_entry(unsigned int, vm_offset_t, vm_offset_t, 199 vm_size_t, u_int32_t, unsigned int, unsigned int); 200static void tlb1_write_entry(unsigned int); 201static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); 202static vm_size_t tlb1_mapin_region(vm_offset_t, vm_offset_t, vm_size_t); 203 204static vm_size_t tsize2size(unsigned int); 205static unsigned int size2tsize(vm_size_t); 206static unsigned int ilog2(unsigned int); 207 208static void set_mas4_defaults(void); 209 210static void tlb0_inval_entry(vm_offset_t, unsigned int); 211static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int); 212static void tlb0_write_entry(unsigned int, unsigned int); 213static void tlb0_flush_entry(pmap_t, vm_offset_t); 214static void tlb0_init(void); 215 216/**************************************************************************/ 217/* Page table management */ 218/**************************************************************************/ 219 220/* Data for the pv entry allocation mechanism */ 221static uma_zone_t pvzone; 222static struct vm_object pvzone_obj; 223static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 224 225#define PV_ENTRY_ZONE_MIN 2048 /* min pv entries in uma zone */ 226 227#ifndef PMAP_SHPGPERPROC 228#define PMAP_SHPGPERPROC 200 229#endif 230 231static void ptbl_init(void); 232static struct ptbl_buf *ptbl_buf_alloc(void); 233static void ptbl_buf_free(struct ptbl_buf *); 234static void ptbl_free_pmap_ptbl(pmap_t, pte_t *); 235 236static void ptbl_alloc(mmu_t, pmap_t, unsigned int); 237static void ptbl_free(mmu_t, pmap_t, unsigned int); 238static void ptbl_hold(mmu_t, pmap_t, unsigned int); 239static int ptbl_unhold(mmu_t, pmap_t, unsigned int); 240 241static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t); 242static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t); 243void pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, u_int32_t); 244static int pte_remove(mmu_t, pmap_t, vm_offset_t, u_int8_t); 245 246pv_entry_t pv_alloc(void); 247static void pv_free(pv_entry_t); 248static void pv_insert(pmap_t, vm_offset_t, vm_page_t); 249static void pv_remove(pmap_t, vm_offset_t, vm_page_t); 250 251/* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */ 252#define PTBL_BUFS (128 * 16) 253 254struct ptbl_buf { 255 TAILQ_ENTRY(ptbl_buf) link; /* list link */ 256 vm_offset_t kva; /* va of mapping */ 257}; 258 259/* ptbl free list and a lock used for access synchronization. */ 260static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist; 261static struct mtx ptbl_buf_freelist_lock; 262 263/* Base address of kva space allocated fot ptbl bufs. */ 264static vm_offset_t ptbl_buf_pool_vabase; 265 266/* Pointer to ptbl_buf structures. */ 267static struct ptbl_buf *ptbl_bufs; 268 269/* 270 * Kernel MMU interface 271 */ 272static vm_offset_t mmu_booke_addr_hint(mmu_t, vm_object_t, vm_offset_t, vm_size_t); 273static void mmu_booke_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t); 274static void mmu_booke_clear_modify(mmu_t, vm_page_t); 275static void mmu_booke_clear_reference(mmu_t, vm_page_t); 276static void mmu_booke_copy(pmap_t, pmap_t, vm_offset_t, vm_size_t, 277 vm_offset_t); 278static void mmu_booke_copy_page(mmu_t, vm_page_t, vm_page_t); 279static void mmu_booke_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, 280 vm_prot_t, boolean_t); 281static void mmu_booke_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, 282 vm_page_t, vm_prot_t); 283static void mmu_booke_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, 284 vm_prot_t); 285static vm_paddr_t mmu_booke_extract(mmu_t, pmap_t, vm_offset_t); 286static vm_page_t mmu_booke_extract_and_hold(mmu_t, pmap_t, vm_offset_t, 287 vm_prot_t); 288static void mmu_booke_init(mmu_t); 289static boolean_t mmu_booke_is_modified(mmu_t, vm_page_t); 290static boolean_t mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t); 291static boolean_t mmu_booke_ts_referenced(mmu_t, vm_page_t); 292static vm_offset_t mmu_booke_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, 293 int); 294static int mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t); 295static void mmu_booke_object_init_pt(mmu_t, pmap_t, vm_offset_t, 296 vm_object_t, vm_pindex_t, vm_size_t); 297static boolean_t mmu_booke_page_exists_quick(mmu_t, pmap_t, vm_page_t); 298static void mmu_booke_page_init(mmu_t, vm_page_t); 299static int mmu_booke_page_wired_mappings(mmu_t, vm_page_t); 300static void mmu_booke_pinit(mmu_t, pmap_t); 301static void mmu_booke_pinit0(mmu_t, pmap_t); 302static void mmu_booke_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, 303 vm_prot_t); 304static void mmu_booke_qenter(mmu_t, vm_offset_t, vm_page_t *, int); 305static void mmu_booke_qremove(mmu_t, vm_offset_t, int); 306static void mmu_booke_release(mmu_t, pmap_t); 307static void mmu_booke_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 308static void mmu_booke_remove_all(mmu_t, vm_page_t); 309static void mmu_booke_remove_write(mmu_t, vm_page_t); 310static void mmu_booke_zero_page(mmu_t, vm_page_t); 311static void mmu_booke_zero_page_area(mmu_t, vm_page_t, int, int); 312static void mmu_booke_zero_page_idle(mmu_t, vm_page_t); 313static void mmu_booke_activate(mmu_t, struct thread *); 314static void mmu_booke_deactivate(mmu_t, struct thread *); 315static void mmu_booke_bootstrap(mmu_t, vm_offset_t, vm_offset_t); 316static void *mmu_booke_mapdev(mmu_t, vm_offset_t, vm_size_t); 317static void mmu_booke_unmapdev(mmu_t, vm_offset_t, vm_size_t); 318static vm_offset_t mmu_booke_kextract(mmu_t, vm_offset_t); 319static void mmu_booke_kenter(mmu_t, vm_offset_t, vm_offset_t); 320static void mmu_booke_kremove(mmu_t, vm_offset_t); 321static boolean_t mmu_booke_dev_direct_mapped(mmu_t, vm_offset_t, vm_size_t); 322static boolean_t mmu_booke_page_executable(mmu_t, vm_page_t); 323 324static mmu_method_t mmu_booke_methods[] = { 325 /* pmap dispatcher interface */ 326 MMUMETHOD(mmu_addr_hint, mmu_booke_addr_hint), 327 MMUMETHOD(mmu_change_wiring, mmu_booke_change_wiring), 328 MMUMETHOD(mmu_clear_modify, mmu_booke_clear_modify), 329 MMUMETHOD(mmu_clear_reference, mmu_booke_clear_reference), 330 MMUMETHOD(mmu_copy, mmu_booke_copy), 331 MMUMETHOD(mmu_copy_page, mmu_booke_copy_page), 332 MMUMETHOD(mmu_enter, mmu_booke_enter), 333 MMUMETHOD(mmu_enter_object, mmu_booke_enter_object), 334 MMUMETHOD(mmu_enter_quick, mmu_booke_enter_quick), 335 MMUMETHOD(mmu_extract, mmu_booke_extract), 336 MMUMETHOD(mmu_extract_and_hold, mmu_booke_extract_and_hold), 337 MMUMETHOD(mmu_init, mmu_booke_init), 338 MMUMETHOD(mmu_is_modified, mmu_booke_is_modified), 339 MMUMETHOD(mmu_is_prefaultable, mmu_booke_is_prefaultable), 340 MMUMETHOD(mmu_ts_referenced, mmu_booke_ts_referenced), 341 MMUMETHOD(mmu_map, mmu_booke_map), 342 MMUMETHOD(mmu_mincore, mmu_booke_mincore), 343 MMUMETHOD(mmu_object_init_pt, mmu_booke_object_init_pt), 344 MMUMETHOD(mmu_page_exists_quick,mmu_booke_page_exists_quick), 345 MMUMETHOD(mmu_page_init, mmu_booke_page_init), 346 MMUMETHOD(mmu_page_wired_mappings, mmu_booke_page_wired_mappings), 347 MMUMETHOD(mmu_pinit, mmu_booke_pinit), 348 MMUMETHOD(mmu_pinit0, mmu_booke_pinit0), 349 MMUMETHOD(mmu_protect, mmu_booke_protect), 350 MMUMETHOD(mmu_qenter, mmu_booke_qenter), 351 MMUMETHOD(mmu_qremove, mmu_booke_qremove), 352 MMUMETHOD(mmu_release, mmu_booke_release), 353 MMUMETHOD(mmu_remove, mmu_booke_remove), 354 MMUMETHOD(mmu_remove_all, mmu_booke_remove_all), 355 MMUMETHOD(mmu_remove_write, mmu_booke_remove_write), 356 MMUMETHOD(mmu_zero_page, mmu_booke_zero_page), 357 MMUMETHOD(mmu_zero_page_area, mmu_booke_zero_page_area), 358 MMUMETHOD(mmu_zero_page_idle, mmu_booke_zero_page_idle), 359 MMUMETHOD(mmu_activate, mmu_booke_activate), 360 MMUMETHOD(mmu_deactivate, mmu_booke_deactivate), 361 362 /* Internal interfaces */ 363 MMUMETHOD(mmu_bootstrap, mmu_booke_bootstrap), 364 MMUMETHOD(mmu_dev_direct_mapped,mmu_booke_dev_direct_mapped), 365 MMUMETHOD(mmu_mapdev, mmu_booke_mapdev), 366 MMUMETHOD(mmu_kenter, mmu_booke_kenter), 367 MMUMETHOD(mmu_kextract, mmu_booke_kextract), 368/* MMUMETHOD(mmu_kremove, mmu_booke_kremove), */ 369 MMUMETHOD(mmu_page_executable, mmu_booke_page_executable), 370 MMUMETHOD(mmu_unmapdev, mmu_booke_unmapdev), 371 372 { 0, 0 } 373}; 374 375static mmu_def_t booke_mmu = { 376 MMU_TYPE_BOOKE, 377 mmu_booke_methods, 378 0 379}; 380MMU_DEF(booke_mmu); 381 382/* Return number of entries in TLB0. */ 383static __inline void 384tlb0_get_tlbconf(void) 385{ 386 uint32_t tlb0_cfg; 387 388 tlb0_cfg = mfspr(SPR_TLB0CFG); 389 tlb0_size = tlb0_cfg & TLBCFG_NENTRY_MASK; 390 tlb0_nways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT; 391 tlb0_nentries_per_way = tlb0_size/tlb0_nways; 392} 393 394/* Initialize pool of kva ptbl buffers. */ 395static void 396ptbl_init(void) 397{ 398 int i; 399 400 //debugf("ptbl_init: s (ptbl_bufs = 0x%08x size 0x%08x)\n", 401 // (u_int32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS); 402 //debugf("ptbl_init: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)\n", 403 // ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE); 404 405 mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); 406 TAILQ_INIT(&ptbl_buf_freelist); 407 408 for (i = 0; i < PTBL_BUFS; i++) { 409 ptbl_bufs[i].kva = ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE; 410 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); 411 } 412 413 //debugf("ptbl_init: e\n"); 414} 415 416/* Get an sf_buf from the freelist. */ 417static struct ptbl_buf * 418ptbl_buf_alloc(void) 419{ 420 struct ptbl_buf *buf; 421 422 //debugf("ptbl_buf_alloc: s\n"); 423 424 mtx_lock(&ptbl_buf_freelist_lock); 425 buf = TAILQ_FIRST(&ptbl_buf_freelist); 426 if (buf != NULL) 427 TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); 428 mtx_unlock(&ptbl_buf_freelist_lock); 429 430 //debugf("ptbl_buf_alloc: e (buf = 0x%08x)\n", (u_int32_t)buf); 431 return (buf); 432} 433 434/* Return ptbl buff to free pool. */ 435static void 436ptbl_buf_free(struct ptbl_buf *buf) 437{ 438 439 //debugf("ptbl_buf_free: s (buf = 0x%08x)\n", (u_int32_t)buf); 440 441 mtx_lock(&ptbl_buf_freelist_lock); 442 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); 443 mtx_unlock(&ptbl_buf_freelist_lock); 444 445 //debugf("ptbl_buf_free: e\n"); 446} 447 448/* 449 * Search the list of allocated ptbl bufs and find 450 * on list of allocated ptbls 451 */ 452static void 453ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl) 454{ 455 struct ptbl_buf *pbuf; 456 457 //debugf("ptbl_free_pmap_ptbl: s (pmap = 0x%08x ptbl = 0x%08x)\n", 458 // (u_int32_t)pmap, (u_int32_t)ptbl); 459 460 TAILQ_FOREACH(pbuf, &pmap->ptbl_list, link) { 461 if (pbuf->kva == (vm_offset_t)ptbl) { 462 /* Remove from pmap ptbl buf list. */ 463 TAILQ_REMOVE(&pmap->ptbl_list, pbuf, link); 464 465 /* Free correspondig ptbl buf. */ 466 ptbl_buf_free(pbuf); 467 468 break; 469 } 470 } 471 472 //debugf("ptbl_free_pmap_ptbl: e\n"); 473} 474 475/* Allocate page table. */ 476static void 477ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 478{ 479 vm_page_t mtbl[PTBL_PAGES]; 480 vm_page_t m; 481 struct ptbl_buf *pbuf; 482 unsigned int pidx; 483 int i; 484 485 //int su = (pmap == kernel_pmap); 486 //debugf("ptbl_alloc: s (pmap = 0x%08x su = %d pdir_idx = %d)\n", (u_int32_t)pmap, su, pdir_idx); 487 488 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 489 ("ptbl_alloc: invalid pdir_idx")); 490 KASSERT((pmap->pm_pdir[pdir_idx] == NULL), 491 ("pte_alloc: valid ptbl entry exists!")); 492 493 pbuf = ptbl_buf_alloc(); 494 if (pbuf == NULL) 495 panic("pte_alloc: couldn't alloc kernel virtual memory"); 496 pmap->pm_pdir[pdir_idx] = (pte_t *)pbuf->kva; 497 //debugf("ptbl_alloc: kva = 0x%08x\n", (u_int32_t)pmap->pm_pdir[pdir_idx]); 498 499 /* Allocate ptbl pages, this will sleep! */ 500 for (i = 0; i < PTBL_PAGES; i++) { 501 pidx = (PTBL_PAGES * pdir_idx) + i; 502 while ((m = vm_page_alloc(NULL, pidx, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 503 PMAP_UNLOCK(pmap); 504 vm_page_unlock_queues(); 505 VM_WAIT; 506 vm_page_lock_queues(); 507 PMAP_LOCK(pmap); 508 } 509 mtbl[i] = m; 510 } 511 512 /* Map in allocated pages into kernel_pmap. */ 513 mmu_booke_qenter(mmu, (vm_offset_t)pmap->pm_pdir[pdir_idx], mtbl, PTBL_PAGES); 514 515 /* Zero whole ptbl. */ 516 bzero((caddr_t)pmap->pm_pdir[pdir_idx], PTBL_PAGES * PAGE_SIZE); 517 518 /* Add pbuf to the pmap ptbl bufs list. */ 519 TAILQ_INSERT_TAIL(&pmap->ptbl_list, pbuf, link); 520 521 //debugf("ptbl_alloc: e\n"); 522} 523 524/* Free ptbl pages and invalidate pdir entry. */ 525static void 526ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 527{ 528 pte_t *ptbl; 529 vm_paddr_t pa; 530 vm_offset_t va; 531 vm_page_t m; 532 int i; 533 534 //int su = (pmap == kernel_pmap); 535 //debugf("ptbl_free: s (pmap = 0x%08x su = %d pdir_idx = %d)\n", (u_int32_t)pmap, su, pdir_idx); 536 537 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 538 ("ptbl_free: invalid pdir_idx")); 539 540 ptbl = pmap->pm_pdir[pdir_idx]; 541 542 //debugf("ptbl_free: ptbl = 0x%08x\n", (u_int32_t)ptbl); 543 KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); 544 545 for (i = 0; i < PTBL_PAGES; i++) { 546 va = ((vm_offset_t)ptbl + (i * PAGE_SIZE)); 547 pa = pte_vatopa(mmu, kernel_pmap, va); 548 m = PHYS_TO_VM_PAGE(pa); 549 vm_page_free_zero(m); 550 atomic_subtract_int(&cnt.v_wire_count, 1); 551 mmu_booke_kremove(mmu, va); 552 } 553 554 ptbl_free_pmap_ptbl(pmap, ptbl); 555 pmap->pm_pdir[pdir_idx] = NULL; 556 557 //debugf("ptbl_free: e\n"); 558} 559 560/* 561 * Decrement ptbl pages hold count and attempt to free ptbl pages. 562 * Called when removing pte entry from ptbl. 563 * 564 * Return 1 if ptbl pages were freed. 565 */ 566static int 567ptbl_unhold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 568{ 569 pte_t *ptbl; 570 vm_paddr_t pa; 571 vm_page_t m; 572 int i; 573 574 //int su = (pmap == kernel_pmap); 575 //debugf("ptbl_unhold: s (pmap = %08x su = %d pdir_idx = %d)\n", 576 // (u_int32_t)pmap, su, pdir_idx); 577 578 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 579 ("ptbl_unhold: invalid pdir_idx")); 580 KASSERT((pmap != kernel_pmap), 581 ("ptbl_unhold: unholding kernel ptbl!")); 582 583 ptbl = pmap->pm_pdir[pdir_idx]; 584 585 //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl); 586 KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS), 587 ("ptbl_unhold: non kva ptbl")); 588 589 /* decrement hold count */ 590 for (i = 0; i < PTBL_PAGES; i++) { 591 pa = pte_vatopa(mmu, kernel_pmap, (vm_offset_t)ptbl + (i * PAGE_SIZE)); 592 m = PHYS_TO_VM_PAGE(pa); 593 m->wire_count--; 594 } 595 596 /* 597 * Free ptbl pages if there are no pte etries in this ptbl. 598 * wire_count has the same value for all ptbl pages, so check 599 * the last page. 600 */ 601 if (m->wire_count == 0) { 602 ptbl_free(mmu, pmap, pdir_idx); 603 604 //debugf("ptbl_unhold: e (freed ptbl)\n"); 605 return (1); 606 } 607 608 //debugf("ptbl_unhold: e\n"); 609 return (0); 610} 611 612/* 613 * Increment hold count for ptbl pages. This routine is used when 614 * new pte entry is being inserted into ptbl. 615 */ 616static void 617ptbl_hold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 618{ 619 vm_paddr_t pa; 620 pte_t *ptbl; 621 vm_page_t m; 622 int i; 623 624 //debugf("ptbl_hold: s (pmap = 0x%08x pdir_idx = %d)\n", (u_int32_t)pmap, pdir_idx); 625 626 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 627 ("ptbl_hold: invalid pdir_idx")); 628 KASSERT((pmap != kernel_pmap), 629 ("ptbl_hold: holding kernel ptbl!")); 630 631 ptbl = pmap->pm_pdir[pdir_idx]; 632 633 KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); 634 635 for (i = 0; i < PTBL_PAGES; i++) { 636 pa = pte_vatopa(mmu, kernel_pmap, (vm_offset_t)ptbl + (i * PAGE_SIZE)); 637 m = PHYS_TO_VM_PAGE(pa); 638 m->wire_count++; 639 } 640 641 //debugf("ptbl_hold: e\n"); 642} 643 644/* Allocate pv_entry structure. */ 645pv_entry_t 646pv_alloc(void) 647{ 648 pv_entry_t pv; 649 650 debugf("pv_alloc: s\n"); 651 652 pv_entry_count++; 653 if ((pv_entry_count > pv_entry_high_water) && (pagedaemon_waken == 0)) { 654 pagedaemon_waken = 1; 655 wakeup (&vm_pages_needed); 656 } 657 pv = uma_zalloc(pvzone, M_NOWAIT); 658 659 debugf("pv_alloc: e\n"); 660 return (pv); 661} 662 663/* Free pv_entry structure. */ 664static __inline void 665pv_free(pv_entry_t pve) 666{ 667 //debugf("pv_free: s\n"); 668 669 pv_entry_count--; 670 uma_zfree(pvzone, pve); 671 672 //debugf("pv_free: e\n"); 673} 674 675 676/* Allocate and initialize pv_entry structure. */ 677static void 678pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m) 679{ 680 pv_entry_t pve; 681 682 //int su = (pmap == kernel_pmap); 683 //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su, 684 // (u_int32_t)pmap, va, (u_int32_t)m); 685 686 pve = pv_alloc(); 687 if (pve == NULL) 688 panic("pv_insert: no pv entries!"); 689 690 pve->pv_pmap = pmap; 691 pve->pv_va = va; 692 693 /* add to pv_list */ 694 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 695 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 696 697 TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link); 698 699 //debugf("pv_insert: e\n"); 700} 701 702/* Destroy pv entry. */ 703static void 704pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m) 705{ 706 pv_entry_t pve; 707 708 //int su = (pmap == kernel_pmap); 709 //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); 710 711 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 712 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 713 714 /* find pv entry */ 715 TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) { 716 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 717 /* remove from pv_list */ 718 TAILQ_REMOVE(&m->md.pv_list, pve, pv_link); 719 if (TAILQ_EMPTY(&m->md.pv_list)) 720 vm_page_flag_clear(m, PG_WRITEABLE); 721 722 /* free pv entry struct */ 723 pv_free(pve); 724 725 break; 726 } 727 } 728 729 //debugf("pv_remove: e\n"); 730} 731 732/* 733 * Clean pte entry, try to free page table page if requested. 734 * 735 * Return 1 if ptbl pages were freed, otherwise return 0. 736 */ 737static int 738pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags) 739{ 740 unsigned int pdir_idx = PDIR_IDX(va); 741 unsigned int ptbl_idx = PTBL_IDX(va); 742 vm_page_t m; 743 pte_t *ptbl; 744 pte_t *pte; 745 746 //int su = (pmap == kernel_pmap); 747 //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n", 748 // su, (u_int32_t)pmap, va, flags); 749 750 ptbl = pmap->pm_pdir[pdir_idx]; 751 KASSERT(ptbl, ("pte_remove: null ptbl")); 752 753 pte = &ptbl[ptbl_idx]; 754 755 if (pte == NULL || !PTE_ISVALID(pte)) 756 return (0); 757 758 /* Get vm_page_t for mapped pte. */ 759 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 760 761 if (PTE_ISWIRED(pte)) 762 pmap->pm_stats.wired_count--; 763 764 if (!PTE_ISFAKE(pte)) { 765 /* Handle managed entry. */ 766 if (PTE_ISMANAGED(pte)) { 767 768 /* Handle modified pages. */ 769 if (PTE_ISMODIFIED(pte)) 770 vm_page_dirty(m); 771 772 /* Referenced pages. */ 773 if (PTE_ISREFERENCED(pte)) 774 vm_page_flag_set(m, PG_REFERENCED); 775 776 /* Remove pv_entry from pv_list. */ 777 pv_remove(pmap, va, m); 778 } 779 } 780 781 pte->flags = 0; 782 pte->rpn = 0; 783 pmap->pm_stats.resident_count--; 784 785 if (flags & PTBL_UNHOLD) { 786 //debugf("pte_remove: e (unhold)\n"); 787 return (ptbl_unhold(mmu, pmap, pdir_idx)); 788 } 789 790 //debugf("pte_remove: e\n"); 791 return (0); 792} 793 794/* 795 * Insert PTE for a given page and virtual address. 796 */ 797void 798pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, u_int32_t flags) 799{ 800 unsigned int pdir_idx = PDIR_IDX(va); 801 unsigned int ptbl_idx = PTBL_IDX(va); 802 pte_t *ptbl; 803 pte_t *pte; 804 805 //int su = (pmap == kernel_pmap); 806 //debugf("pte_enter: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); 807 808 /* Get the page table pointer. */ 809 ptbl = pmap->pm_pdir[pdir_idx]; 810 811 if (ptbl) { 812 /* 813 * Check if there is valid mapping for requested 814 * va, if there is, remove it. 815 */ 816 pte = &pmap->pm_pdir[pdir_idx][ptbl_idx]; 817 if (PTE_ISVALID(pte)) { 818 pte_remove(mmu, pmap, va, PTBL_HOLD); 819 } else { 820 /* 821 * pte is not used, increment hold count 822 * for ptbl pages. 823 */ 824 if (pmap != kernel_pmap) 825 ptbl_hold(mmu, pmap, pdir_idx); 826 } 827 } else { 828 /* Allocate page table pages. */ 829 ptbl_alloc(mmu, pmap, pdir_idx); 830 } 831 832 /* Flush entry from TLB. */ 833 tlb0_flush_entry(pmap, va); 834 835 pte = &(pmap->pm_pdir[pdir_idx][ptbl_idx]); 836 837 /* 838 * Insert pv_entry into pv_list for mapped page 839 * if part of managed memory. 840 */ 841 if ((m->flags & PG_FICTITIOUS) == 0) { 842 if ((m->flags & PG_UNMANAGED) == 0) { 843 pte->flags |= PTE_MANAGED; 844 845 /* Create and insert pv entry. */ 846 pv_insert(pmap, va, m); 847 } 848 } else { 849 pte->flags |= PTE_FAKE; 850 } 851 852 pmap->pm_stats.resident_count++; 853 pte->rpn = VM_PAGE_TO_PHYS(m) & ~PTE_PA_MASK; 854 pte->flags |= (PTE_VALID | flags); 855 856 //debugf("pte_enter: e\n"); 857} 858 859/* Return the pa for the given pmap/va. */ 860static vm_paddr_t 861pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) 862{ 863 vm_paddr_t pa = 0; 864 pte_t *pte; 865 866 pte = pte_find(mmu, pmap, va); 867 if ((pte != NULL) && PTE_ISVALID(pte)) 868 pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); 869 return (pa); 870} 871 872/* Get a pointer to a PTE in a page table. */ 873static pte_t * 874pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) 875{ 876 unsigned int pdir_idx = PDIR_IDX(va); 877 unsigned int ptbl_idx = PTBL_IDX(va); 878 879 KASSERT((pmap != NULL), ("pte_find: invalid pmap")); 880 881 if (pmap->pm_pdir[pdir_idx]) 882 return (&(pmap->pm_pdir[pdir_idx][ptbl_idx])); 883 884 return (NULL); 885} 886 887/**************************************************************************/ 888/* PMAP related */ 889/**************************************************************************/ 890 891/* 892 * This is called during e500_init, before the system is really initialized. 893 */ 894static void 895mmu_booke_bootstrap(mmu_t mmu, vm_offset_t kernelstart, vm_offset_t kernelend) 896{ 897 vm_offset_t phys_kernelend; 898 struct mem_region *mp, *mp1; 899 int cnt, i, j; 900 u_int s, e, sz; 901 u_int phys_avail_count; 902 vm_size_t physsz, hwphyssz; 903 vm_offset_t kernel_pdir; 904 905 debugf("mmu_booke_bootstrap: entered\n"); 906 907 /* Align kernel start and end address (kernel image). */ 908 kernelstart = trunc_page(kernelstart); 909 kernelend = round_page(kernelend); 910 911 /* Allocate space for the message buffer. */ 912 msgbufp = (struct msgbuf *)kernelend; 913 kernelend += MSGBUF_SIZE; 914 debugf(" msgbufp at 0x%08x end = 0x%08x\n", (u_int32_t)msgbufp, 915 kernelend); 916 917 kernelend = round_page(kernelend); 918 919 /* Allocate space for tlb0 table. */ 920 tlb0_get_tlbconf(); /* Read TLB0 size and associativity. */ 921 tlb0 = (tlb_entry_t *)kernelend; 922 kernelend += sizeof(tlb_entry_t) * tlb0_size; 923 memset((void *)tlb0, 0, sizeof(tlb_entry_t) * tlb0_size); 924 debugf(" tlb0 at 0x%08x end = 0x%08x\n", (u_int32_t)tlb0, kernelend); 925 926 kernelend = round_page(kernelend); 927 928 /* Allocate space for ptbl_bufs. */ 929 ptbl_bufs = (struct ptbl_buf *)kernelend; 930 kernelend += sizeof(struct ptbl_buf) * PTBL_BUFS; 931 memset((void *)ptbl_bufs, 0, sizeof(struct ptbl_buf) * PTBL_SIZE); 932 debugf(" ptbl_bufs at 0x%08x end = 0x%08x\n", (u_int32_t)ptbl_bufs, 933 kernelend); 934 935 kernelend = round_page(kernelend); 936 937 /* Allocate PTE tables for kernel KVA. */ 938 kernel_pdir = kernelend; 939 kernel_ptbls = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS + 940 PDIR_SIZE - 1) / PDIR_SIZE; 941 kernelend += kernel_ptbls * PTBL_PAGES * PAGE_SIZE; 942 memset((void *)kernel_pdir, 0, kernel_ptbls * PTBL_PAGES * PAGE_SIZE); 943 debugf(" kernel ptbls: %d\n", kernel_ptbls); 944 debugf(" kernel pdir at 0x%08x\n", kernel_pdir); 945 946 if (kernelend - kernelstart > 0x1000000) { 947 kernelend = (kernelend + 0x3fffff) & ~0x3fffff; 948 tlb1_mapin_region(kernelstart + 0x1000000, 949 kernload + 0x1000000, kernelend - kernelstart - 0x1000000); 950 } else 951 kernelend = (kernelend + 0xffffff) & ~0xffffff; 952 953 /*******************************************************/ 954 /* Set the start and end of kva. */ 955 /*******************************************************/ 956 virtual_avail = kernelend; 957 virtual_end = VM_MAX_KERNEL_ADDRESS; 958 959 /* Allocate KVA space for page zero/copy operations. */ 960 zero_page_va = virtual_avail; 961 virtual_avail += PAGE_SIZE; 962 zero_page_idle_va = virtual_avail; 963 virtual_avail += PAGE_SIZE; 964 copy_page_src_va = virtual_avail; 965 virtual_avail += PAGE_SIZE; 966 copy_page_dst_va = virtual_avail; 967 virtual_avail += PAGE_SIZE; 968 969 /* Initialize page zero/copy mutexes. */ 970 mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF); 971 mtx_init(©_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF); 972 973 /* Initialize tlb0 table mutex. */ 974 mtx_init(&tlb0_mutex, "tlb0", NULL, MTX_SPIN | MTX_RECURSE); 975 976 /* Allocate KVA space for ptbl bufs. */ 977 ptbl_buf_pool_vabase = virtual_avail; 978 virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE; 979 980 debugf("ptbl_buf_pool_vabase = 0x%08x\n", ptbl_buf_pool_vabase); 981 debugf("virtual_avail = %08x\n", virtual_avail); 982 debugf("virtual_end = %08x\n", virtual_end); 983 984 /* Calculate corresponding physical addresses for the kernel region. */ 985 phys_kernelend = kernload + (kernelend - kernelstart); 986 987 debugf("kernel image and allocated data:\n"); 988 debugf(" kernload = 0x%08x\n", kernload); 989 debugf(" kernelstart = 0x%08x\n", kernelstart); 990 debugf(" kernelend = 0x%08x\n", kernelend); 991 debugf(" kernel size = 0x%08x\n", kernelend - kernelstart); 992 993 if (sizeof(phys_avail) / sizeof(phys_avail[0]) < availmem_regions_sz) 994 panic("mmu_booke_bootstrap: phys_avail too small"); 995 996 /* 997 * Removed kernel physical address range from avail 998 * regions list. Page align all regions. 999 * Non-page aligned memory isn't very interesting to us. 1000 * Also, sort the entries for ascending addresses. 1001 */ 1002 sz = 0; 1003 cnt = availmem_regions_sz; 1004 debugf("processing avail regions:\n"); 1005 for (mp = availmem_regions; mp->mr_size; mp++) { 1006 s = mp->mr_start; 1007 e = mp->mr_start + mp->mr_size; 1008 debugf(" %08x-%08x -> ", s, e); 1009 /* Check whether this region holds all of the kernel. */ 1010 if (s < kernload && e > phys_kernelend) { 1011 availmem_regions[cnt].mr_start = phys_kernelend; 1012 availmem_regions[cnt++].mr_size = e - phys_kernelend; 1013 e = kernload; 1014 } 1015 /* Look whether this regions starts within the kernel. */ 1016 if (s >= kernload && s < phys_kernelend) { 1017 if (e <= phys_kernelend) 1018 goto empty; 1019 s = phys_kernelend; 1020 } 1021 /* Now look whether this region ends within the kernel. */ 1022 if (e > kernload && e <= phys_kernelend) { 1023 if (s >= kernload) 1024 goto empty; 1025 e = kernload; 1026 } 1027 /* Now page align the start and size of the region. */ 1028 s = round_page(s); 1029 e = trunc_page(e); 1030 if (e < s) 1031 e = s; 1032 sz = e - s; 1033 debugf("%08x-%08x = %x\n", s, e, sz); 1034 1035 /* Check whether some memory is left here. */ 1036 if (sz == 0) { 1037 empty: 1038 memmove(mp, mp + 1, 1039 (cnt - (mp - availmem_regions)) * sizeof(*mp)); 1040 cnt--; 1041 mp--; 1042 continue; 1043 } 1044 1045 /* Do an insertion sort. */ 1046 for (mp1 = availmem_regions; mp1 < mp; mp1++) 1047 if (s < mp1->mr_start) 1048 break; 1049 if (mp1 < mp) { 1050 memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1); 1051 mp1->mr_start = s; 1052 mp1->mr_size = sz; 1053 } else { 1054 mp->mr_start = s; 1055 mp->mr_size = sz; 1056 } 1057 } 1058 availmem_regions_sz = cnt; 1059 1060 /*******************************************************/ 1061 /* Fill in phys_avail table, based on availmem_regions */ 1062 /*******************************************************/ 1063 phys_avail_count = 0; 1064 physsz = 0; 1065 hwphyssz = 0; 1066 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); 1067 1068 debugf("fill in phys_avail:\n"); 1069 for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { 1070 1071 debugf(" region: 0x%08x - 0x%08x (0x%08x)\n", 1072 availmem_regions[i].mr_start, 1073 availmem_regions[i].mr_start + availmem_regions[i].mr_size, 1074 availmem_regions[i].mr_size); 1075 1076 if (hwphyssz != 0 && (physsz + availmem_regions[i].mr_size) >= hwphyssz) { 1077 debugf(" hw.physmem adjust\n"); 1078 if (physsz < hwphyssz) { 1079 phys_avail[j] = availmem_regions[i].mr_start; 1080 phys_avail[j + 1] = availmem_regions[i].mr_start + 1081 hwphyssz - physsz; 1082 physsz = hwphyssz; 1083 phys_avail_count++; 1084 } 1085 break; 1086 } 1087 1088 phys_avail[j] = availmem_regions[i].mr_start; 1089 phys_avail[j + 1] = availmem_regions[i].mr_start + 1090 availmem_regions[i].mr_size; 1091 phys_avail_count++; 1092 physsz += availmem_regions[i].mr_size; 1093 } 1094 physmem = btoc(physsz); 1095 1096 /* Calculate the last available physical address. */ 1097 for (i = 0; phys_avail[i + 2] != 0; i += 2) 1098 ; 1099 Maxmem = powerpc_btop(phys_avail[i + 1]); 1100 1101 debugf("Maxmem = 0x%08lx\n", Maxmem); 1102 debugf("phys_avail_count = %d\n", phys_avail_count); 1103 debugf("physsz = 0x%08x physmem = %ld (0x%08lx)\n", physsz, physmem, physmem); 1104 1105 /*******************************************************/ 1106 /* Initialize (statically allocated) kernel pmap. */ 1107 /*******************************************************/ 1108 PMAP_LOCK_INIT(kernel_pmap); 1109 kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE; 1110 1111 debugf("kernel_pmap = 0x%08x\n", (u_int32_t)kernel_pmap); 1112 debugf("kptbl_min = %d, kernel_kptbls = %d\n", kptbl_min, kernel_ptbls); 1113 debugf("kernel pdir range: 0x%08x - 0x%08x\n", 1114 kptbl_min * PDIR_SIZE, (kptbl_min + kernel_ptbls) * PDIR_SIZE - 1); 1115 1116 /* Initialize kernel pdir */ 1117 for (i = 0; i < kernel_ptbls; i++) 1118 kernel_pmap->pm_pdir[kptbl_min + i] = 1119 (pte_t *)(kernel_pdir + (i * PAGE_SIZE * PTBL_PAGES)); 1120 1121 kernel_pmap->pm_tid = KERNEL_TID; 1122 kernel_pmap->pm_active = ~0; 1123 1124 /* Initialize tidbusy with kenel_pmap entry. */ 1125 tidbusy[0] = kernel_pmap; 1126 1127 /*******************************************************/ 1128 /* Final setup */ 1129 /*******************************************************/ 1130 /* Initialize TLB0 handling. */ 1131 tlb0_init(); 1132 1133 debugf("mmu_booke_bootstrap: exit\n"); 1134} 1135 1136/* 1137 * Get the physical page address for the given pmap/virtual address. 1138 */ 1139static vm_paddr_t 1140mmu_booke_extract(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1141{ 1142 vm_paddr_t pa; 1143 1144 PMAP_LOCK(pmap); 1145 pa = pte_vatopa(mmu, pmap, va); 1146 PMAP_UNLOCK(pmap); 1147 1148 return (pa); 1149} 1150 1151/* 1152 * Extract the physical page address associated with the given 1153 * kernel virtual address. 1154 */ 1155static vm_paddr_t 1156mmu_booke_kextract(mmu_t mmu, vm_offset_t va) 1157{ 1158 1159 return (pte_vatopa(mmu, kernel_pmap, va)); 1160} 1161 1162/* 1163 * Initialize the pmap module. 1164 * Called by vm_init, to initialize any structures that the pmap 1165 * system needs to map virtual memory. 1166 */ 1167static void 1168mmu_booke_init(mmu_t mmu) 1169{ 1170 int shpgperproc = PMAP_SHPGPERPROC; 1171 1172 //debugf("mmu_booke_init: s\n"); 1173 1174 /* 1175 * Initialize the address space (zone) for the pv entries. Set a 1176 * high water mark so that the system can recover from excessive 1177 * numbers of pv entries. 1178 */ 1179 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 1180 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 1181 1182 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 1183 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; 1184 1185 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 1186 pv_entry_high_water = 9 * (pv_entry_max / 10); 1187 1188 uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); 1189 1190 /* Pre-fill pvzone with initial number of pv entries. */ 1191 uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN); 1192 1193 /* Initialize ptbl allocation. */ 1194 ptbl_init(); 1195 1196 //debugf("mmu_booke_init: e\n"); 1197} 1198 1199/* 1200 * Map a list of wired pages into kernel virtual address space. This is 1201 * intended for temporary mappings which do not need page modification or 1202 * references recorded. Existing mappings in the region are overwritten. 1203 */ 1204static void 1205mmu_booke_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count) 1206{ 1207 vm_offset_t va; 1208 1209 //debugf("mmu_booke_qenter: s (sva = 0x%08x count = %d)\n", sva, count); 1210 1211 va = sva; 1212 while (count-- > 0) { 1213 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); 1214 va += PAGE_SIZE; 1215 m++; 1216 } 1217 1218 //debugf("mmu_booke_qenter: e\n"); 1219} 1220 1221/* 1222 * Remove page mappings from kernel virtual address space. Intended for 1223 * temporary mappings entered by mmu_booke_qenter. 1224 */ 1225static void 1226mmu_booke_qremove(mmu_t mmu, vm_offset_t sva, int count) 1227{ 1228 vm_offset_t va; 1229 1230 //debugf("mmu_booke_qremove: s (sva = 0x%08x count = %d)\n", sva, count); 1231 1232 va = sva; 1233 while (count-- > 0) { 1234 mmu_booke_kremove(mmu, va); 1235 va += PAGE_SIZE; 1236 } 1237 1238 //debugf("mmu_booke_qremove: e\n"); 1239} 1240 1241/* 1242 * Map a wired page into kernel virtual address space. 1243 */ 1244static void 1245mmu_booke_kenter(mmu_t mmu, vm_offset_t va, vm_offset_t pa) 1246{ 1247 unsigned int pdir_idx = PDIR_IDX(va); 1248 unsigned int ptbl_idx = PTBL_IDX(va); 1249 u_int32_t flags; 1250 pte_t *pte; 1251 1252 //debugf("mmu_booke_kenter: s (pdir_idx = %d ptbl_idx = %d va=0x%08x pa=0x%08x)\n", 1253 // pdir_idx, ptbl_idx, va, pa); 1254 1255 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)), 1256 ("mmu_booke_kenter: invalid va")); 1257 1258#if 0 1259 /* assume IO mapping, set I, G bits */ 1260 flags = (PTE_G | PTE_I | PTE_FAKE); 1261 1262 /* if mapping is within system memory, do not set I, G bits */ 1263 for (i = 0; i < totalmem_regions_sz; i++) { 1264 if ((pa >= totalmem_regions[i].mr_start) && 1265 (pa < (totalmem_regions[i].mr_start + 1266 totalmem_regions[i].mr_size))) { 1267 flags &= ~(PTE_I | PTE_G | PTE_FAKE); 1268 break; 1269 } 1270 } 1271#else 1272 flags = 0; 1273#endif 1274 1275 flags |= (PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID); 1276 1277 pte = &(kernel_pmap->pm_pdir[pdir_idx][ptbl_idx]); 1278 1279 if (PTE_ISVALID(pte)) { 1280 //debugf("mmu_booke_kenter: replacing entry!\n"); 1281 1282 /* Flush entry from TLB0 */ 1283 tlb0_flush_entry(kernel_pmap, va); 1284 } 1285 1286 pte->rpn = pa & ~PTE_PA_MASK; 1287 pte->flags = flags; 1288 1289 //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x " 1290 // "pa=0x%08x rpn=0x%08x flags=0x%08x\n", 1291 // pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags); 1292 1293 /* Flush the real memory from the instruction cache. */ 1294 if ((flags & (PTE_I | PTE_G)) == 0) { 1295 __syncicache((void *)va, PAGE_SIZE); 1296 } 1297 1298 //debugf("mmu_booke_kenter: e\n"); 1299} 1300 1301/* 1302 * Remove a page from kernel page table. 1303 */ 1304static void 1305mmu_booke_kremove(mmu_t mmu, vm_offset_t va) 1306{ 1307 unsigned int pdir_idx = PDIR_IDX(va); 1308 unsigned int ptbl_idx = PTBL_IDX(va); 1309 pte_t *pte; 1310 1311 //debugf("mmu_booke_kremove: s (va = 0x%08x)\n", va); 1312 1313 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)), 1314 ("mmu_booke_kremove: invalid va")); 1315 1316 pte = &(kernel_pmap->pm_pdir[pdir_idx][ptbl_idx]); 1317 1318 if (!PTE_ISVALID(pte)) { 1319 //debugf("mmu_booke_kremove: e (invalid pte)\n"); 1320 return; 1321 } 1322 1323 /* Invalidate entry in TLB0. */ 1324 tlb0_flush_entry(kernel_pmap, va); 1325 1326 pte->flags = 0; 1327 pte->rpn = 0; 1328 1329 //debugf("mmu_booke_kremove: e\n"); 1330} 1331 1332/* 1333 * Initialize pmap associated with process 0. 1334 */ 1335static void 1336mmu_booke_pinit0(mmu_t mmu, pmap_t pmap) 1337{ 1338 //debugf("mmu_booke_pinit0: s (pmap = 0x%08x)\n", (u_int32_t)pmap); 1339 mmu_booke_pinit(mmu, pmap); 1340 PCPU_SET(curpmap, pmap); 1341 //debugf("mmu_booke_pinit0: e\n"); 1342} 1343 1344/* 1345 * Initialize a preallocated and zeroed pmap structure, 1346 * such as one in a vmspace structure. 1347 */ 1348static void 1349mmu_booke_pinit(mmu_t mmu, pmap_t pmap) 1350{ 1351 1352 //struct thread *td; 1353 //struct proc *p; 1354 1355 //td = PCPU_GET(curthread); 1356 //p = td->td_proc; 1357 //debugf("mmu_booke_pinit: s (pmap = 0x%08x)\n", (u_int32_t)pmap); 1358 //printf("mmu_booke_pinit: proc %d '%s'\n", p->p_pid, p->p_comm); 1359 1360 KASSERT((pmap != kernel_pmap), ("mmu_booke_pinit: initializing kernel_pmap")); 1361 1362 PMAP_LOCK_INIT(pmap); 1363 pmap->pm_tid = 0; 1364 pmap->pm_active = 0; 1365 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1366 bzero(&pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); 1367 1368 TAILQ_INIT(&pmap->ptbl_list); 1369 1370 //debugf("mmu_booke_pinit: e\n"); 1371} 1372 1373/* 1374 * Release any resources held by the given physical map. 1375 * Called when a pmap initialized by mmu_booke_pinit is being released. 1376 * Should only be called if the map contains no valid mappings. 1377 */ 1378static void 1379mmu_booke_release(mmu_t mmu, pmap_t pmap) 1380{ 1381 1382 //debugf("mmu_booke_release: s\n"); 1383 1384 PMAP_LOCK_DESTROY(pmap); 1385 1386 //debugf("mmu_booke_release: e\n"); 1387} 1388 1389#if 0 1390/* Not needed, kernel page tables are statically allocated. */ 1391void 1392mmu_booke_growkernel(vm_offset_t maxkvaddr) 1393{ 1394} 1395#endif 1396 1397/* 1398 * Insert the given physical page at the specified virtual address in the 1399 * target physical map with the protection requested. If specified the page 1400 * will be wired down. 1401 */ 1402static void 1403mmu_booke_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 1404 vm_prot_t prot, boolean_t wired) 1405{ 1406 vm_page_lock_queues(); 1407 PMAP_LOCK(pmap); 1408 mmu_booke_enter_locked(mmu, pmap, va, m, prot, wired); 1409 vm_page_unlock_queues(); 1410 PMAP_UNLOCK(pmap); 1411} 1412 1413static void 1414mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 1415 vm_prot_t prot, boolean_t wired) 1416{ 1417 pte_t *pte; 1418 vm_paddr_t pa; 1419 u_int32_t flags; 1420 int su, sync; 1421 1422 pa = VM_PAGE_TO_PHYS(m); 1423 su = (pmap == kernel_pmap); 1424 sync = 0; 1425 1426 //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x " 1427 // "pa=0x%08x prot=0x%08x wired=%d)\n", 1428 // (u_int32_t)pmap, su, pmap->pm_tid, 1429 // (u_int32_t)m, va, pa, prot, wired); 1430 1431 if (su) { 1432 KASSERT(((va >= virtual_avail) && (va <= VM_MAX_KERNEL_ADDRESS)), 1433 ("mmu_booke_enter_locked: kernel pmap, non kernel va")); 1434 } else { 1435 KASSERT((va <= VM_MAXUSER_ADDRESS), 1436 ("mmu_booke_enter_locked: user pmap, non user va")); 1437 } 1438 1439 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1440 1441 /* 1442 * If there is an existing mapping, and the physical address has not 1443 * changed, must be protection or wiring change. 1444 */ 1445 if (((pte = pte_find(mmu, pmap, va)) != NULL) && 1446 (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) { 1447 1448 //debugf("mmu_booke_enter_locked: update\n"); 1449 1450 /* Wiring change, just update stats. */ 1451 if (wired) { 1452 if (!PTE_ISWIRED(pte)) { 1453 pte->flags |= PTE_WIRED; 1454 pmap->pm_stats.wired_count++; 1455 } 1456 } else { 1457 if (PTE_ISWIRED(pte)) { 1458 pte->flags &= ~PTE_WIRED; 1459 pmap->pm_stats.wired_count--; 1460 } 1461 } 1462 1463 /* Save the old bits and clear the ones we're interested in. */ 1464 flags = pte->flags; 1465 pte->flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED); 1466 1467 if (prot & VM_PROT_WRITE) { 1468 /* Add write permissions. */ 1469 pte->flags |= PTE_SW; 1470 if (!su) 1471 pte->flags |= PTE_UW; 1472 } else { 1473 /* Handle modified pages, sense modify status. */ 1474 if (PTE_ISMODIFIED(pte)) 1475 vm_page_dirty(m); 1476 } 1477 1478 /* If we're turning on execute permissions, flush the icache. */ 1479 if (prot & VM_PROT_EXECUTE) { 1480 pte->flags |= PTE_SX; 1481 if (!su) 1482 pte->flags |= PTE_UX; 1483 1484 if ((flags & (PTE_UX | PTE_SX)) == 0) 1485 sync++; 1486 } 1487 1488 /* Flush the old mapping from TLB0. */ 1489 pte->flags &= ~PTE_REFERENCED; 1490 tlb0_flush_entry(pmap, va); 1491 } else { 1492 /* 1493 * If there is an existing mapping, but its for a different 1494 * physical address, pte_enter() will delete the old mapping. 1495 */ 1496 //if ((pte != NULL) && PTE_ISVALID(pte)) 1497 // debugf("mmu_booke_enter_locked: replace\n"); 1498 //else 1499 // debugf("mmu_booke_enter_locked: new\n"); 1500 1501 /* Now set up the flags and install the new mapping. */ 1502 flags = (PTE_SR | PTE_VALID); 1503 1504 if (!su) 1505 flags |= PTE_UR; 1506 1507 if (prot & VM_PROT_WRITE) { 1508 flags |= PTE_SW; 1509 if (!su) 1510 flags |= PTE_UW; 1511 } 1512 1513 if (prot & VM_PROT_EXECUTE) { 1514 flags |= PTE_SX; 1515 if (!su) 1516 flags |= PTE_UX; 1517 } 1518 1519 /* If its wired update stats. */ 1520 if (wired) { 1521 pmap->pm_stats.wired_count++; 1522 flags |= PTE_WIRED; 1523 } 1524 1525 pte_enter(mmu, pmap, m, va, flags); 1526 1527 /* Flush the real memory from the instruction cache. */ 1528 if (prot & VM_PROT_EXECUTE) 1529 sync++; 1530 } 1531 1532 if (sync && (su || pmap == PCPU_GET(curpmap))) { 1533 __syncicache((void *)va, PAGE_SIZE); 1534 sync = 0; 1535 } 1536 1537 if (sync) { 1538 /* Create a temporary mapping. */ 1539 pmap = PCPU_GET(curpmap); 1540 1541 va = 0; 1542 pte = pte_find(mmu, pmap, va); 1543 KASSERT(pte == NULL, ("%s:%d", __func__, __LINE__)); 1544 1545 flags = PTE_SR | PTE_VALID | PTE_UR; 1546 pte_enter(mmu, pmap, m, va, flags); 1547 __syncicache((void *)va, PAGE_SIZE); 1548 pte_remove(mmu, pmap, va, PTBL_UNHOLD); 1549 } 1550 1551 //debugf("mmu_booke_enter_locked: e\n"); 1552} 1553 1554/* 1555 * Maps a sequence of resident pages belonging to the same object. 1556 * The sequence begins with the given page m_start. This page is 1557 * mapped at the given virtual address start. Each subsequent page is 1558 * mapped at a virtual address that is offset from start by the same 1559 * amount as the page is offset from m_start within the object. The 1560 * last page in the sequence is the page with the largest offset from 1561 * m_start that can be mapped at a virtual address less than the given 1562 * virtual address end. Not every virtual page between start and end 1563 * is mapped; only those for which a resident page exists with the 1564 * corresponding offset from m_start are mapped. 1565 */ 1566static void 1567mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start, 1568 vm_offset_t end, vm_page_t m_start, vm_prot_t prot) 1569{ 1570 vm_page_t m; 1571 vm_pindex_t diff, psize; 1572 1573 psize = atop(end - start); 1574 m = m_start; 1575 PMAP_LOCK(pmap); 1576 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 1577 mmu_booke_enter_locked(mmu, pmap, start + ptoa(diff), m, prot & 1578 (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); 1579 m = TAILQ_NEXT(m, listq); 1580 } 1581 PMAP_UNLOCK(pmap); 1582} 1583 1584static void 1585mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 1586 vm_prot_t prot) 1587{ 1588 1589 //debugf("mmu_booke_enter_quick: s\n"); 1590 1591 PMAP_LOCK(pmap); 1592 mmu_booke_enter_locked(mmu, pmap, va, m, 1593 prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); 1594 PMAP_UNLOCK(pmap); 1595 1596 //debugf("mmu_booke_enter_quick e\n"); 1597} 1598 1599/* 1600 * Remove the given range of addresses from the specified map. 1601 * 1602 * It is assumed that the start and end are properly rounded to the page size. 1603 */ 1604static void 1605mmu_booke_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t endva) 1606{ 1607 pte_t *pte; 1608 u_int8_t hold_flag; 1609 1610 int su = (pmap == kernel_pmap); 1611 1612 //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n", 1613 // su, (u_int32_t)pmap, pmap->pm_tid, va, endva); 1614 1615 if (su) { 1616 KASSERT(((va >= virtual_avail) && (va <= VM_MAX_KERNEL_ADDRESS)), 1617 ("mmu_booke_enter: kernel pmap, non kernel va")); 1618 } else { 1619 KASSERT((va <= VM_MAXUSER_ADDRESS), 1620 ("mmu_booke_enter: user pmap, non user va")); 1621 } 1622 1623 if (PMAP_REMOVE_DONE(pmap)) { 1624 //debugf("mmu_booke_remove: e (empty)\n"); 1625 return; 1626 } 1627 1628 hold_flag = PTBL_HOLD_FLAG(pmap); 1629 //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag); 1630 1631 vm_page_lock_queues(); 1632 PMAP_LOCK(pmap); 1633 for (; va < endva; va += PAGE_SIZE) { 1634 pte = pte_find(mmu, pmap, va); 1635 if ((pte != NULL) && PTE_ISVALID(pte)) { 1636 pte_remove(mmu, pmap, va, hold_flag); 1637 1638 /* Flush mapping from TLB0. */ 1639 tlb0_flush_entry(pmap, va); 1640 } 1641 } 1642 PMAP_UNLOCK(pmap); 1643 vm_page_unlock_queues(); 1644 1645 //debugf("mmu_booke_remove: e\n"); 1646} 1647 1648/* 1649 * Remove physical page from all pmaps in which it resides. 1650 */ 1651static void 1652mmu_booke_remove_all(mmu_t mmu, vm_page_t m) 1653{ 1654 pv_entry_t pv, pvn; 1655 u_int8_t hold_flag; 1656 1657 //debugf("mmu_booke_remove_all: s\n"); 1658 1659 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1660 1661 for (pv = TAILQ_FIRST(&m->md.pv_list); pv != NULL; pv = pvn) { 1662 pvn = TAILQ_NEXT(pv, pv_link); 1663 1664 PMAP_LOCK(pv->pv_pmap); 1665 hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap); 1666 pte_remove(mmu, pv->pv_pmap, pv->pv_va, hold_flag); 1667 1668 /* Flush mapping from TLB0. */ 1669 tlb0_flush_entry(pv->pv_pmap, pv->pv_va); 1670 PMAP_UNLOCK(pv->pv_pmap); 1671 } 1672 vm_page_flag_clear(m, PG_WRITEABLE); 1673 1674 //debugf("mmu_booke_remove_all: e\n"); 1675} 1676 1677/* 1678 * Map a range of physical addresses into kernel virtual address space. 1679 * 1680 * The value passed in *virt is a suggested virtual address for the mapping. 1681 * Architectures which can support a direct-mapped physical to virtual region 1682 * can return the appropriate address within that region, leaving '*virt' 1683 * unchanged. We cannot and therefore do not; *virt is updated with the 1684 * first usable address after the mapped region. 1685 */ 1686static vm_offset_t 1687mmu_booke_map(mmu_t mmu, vm_offset_t *virt, vm_offset_t pa_start, 1688 vm_offset_t pa_end, int prot) 1689{ 1690 vm_offset_t sva = *virt; 1691 vm_offset_t va = sva; 1692 1693 //debugf("mmu_booke_map: s (sva = 0x%08x pa_start = 0x%08x pa_end = 0x%08x)\n", 1694 // sva, pa_start, pa_end); 1695 1696 while (pa_start < pa_end) { 1697 mmu_booke_kenter(mmu, va, pa_start); 1698 va += PAGE_SIZE; 1699 pa_start += PAGE_SIZE; 1700 } 1701 *virt = va; 1702 1703 //debugf("mmu_booke_map: e (va = 0x%08x)\n", va); 1704 return (sva); 1705} 1706 1707/* 1708 * The pmap must be activated before it's address space can be accessed in any 1709 * way. 1710 */ 1711static void 1712mmu_booke_activate(mmu_t mmu, struct thread *td) 1713{ 1714 pmap_t pmap; 1715 1716 pmap = &td->td_proc->p_vmspace->vm_pmap; 1717 1718 //debugf("mmu_booke_activate: s (proc = '%s', id = %d, pmap = 0x%08x)\n", 1719 // td->td_proc->p_comm, td->td_proc->p_pid, pmap); 1720 1721 KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!")); 1722 1723 mtx_lock_spin(&sched_lock); 1724 1725 pmap->pm_active |= PCPU_GET(cpumask); 1726 PCPU_SET(curpmap, pmap); 1727 1728 if (!pmap->pm_tid) 1729 tid_alloc(pmap); 1730 1731 /* Load PID0 register with pmap tid value. */ 1732 load_pid0(pmap->pm_tid); 1733 1734 mtx_unlock_spin(&sched_lock); 1735 1736 //debugf("mmu_booke_activate: e (tid = %d for '%s')\n", pmap->pm_tid, 1737 // td->td_proc->p_comm); 1738} 1739 1740/* 1741 * Deactivate the specified process's address space. 1742 */ 1743static void 1744mmu_booke_deactivate(mmu_t mmu, struct thread *td) 1745{ 1746 pmap_t pmap; 1747 1748 pmap = &td->td_proc->p_vmspace->vm_pmap; 1749 pmap->pm_active &= ~(PCPU_GET(cpumask)); 1750 PCPU_SET(curpmap, NULL); 1751} 1752 1753/* 1754 * Copy the range specified by src_addr/len 1755 * from the source map to the range dst_addr/len 1756 * in the destination map. 1757 * 1758 * This routine is only advisory and need not do anything. 1759 */ 1760static void 1761mmu_booke_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 1762 vm_size_t len, vm_offset_t src_addr) 1763{ 1764 1765} 1766 1767/* 1768 * Set the physical protection on the specified range of this map as requested. 1769 */ 1770static void 1771mmu_booke_protect(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva, 1772 vm_prot_t prot) 1773{ 1774 vm_offset_t va; 1775 vm_page_t m; 1776 pte_t *pte; 1777 1778 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1779 mmu_booke_remove(mmu, pmap, sva, eva); 1780 return; 1781 } 1782 1783 if (prot & VM_PROT_WRITE) 1784 return; 1785 1786 vm_page_lock_queues(); 1787 PMAP_LOCK(pmap); 1788 for (va = sva; va < eva; va += PAGE_SIZE) { 1789 if ((pte = pte_find(mmu, pmap, va)) != NULL) { 1790 if (PTE_ISVALID(pte)) { 1791 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1792 1793 /* Handle modified pages. */ 1794 if (PTE_ISMODIFIED(pte)) 1795 vm_page_dirty(m); 1796 1797 /* Referenced pages. */ 1798 if (PTE_ISREFERENCED(pte)) 1799 vm_page_flag_set(m, PG_REFERENCED); 1800 1801 /* Flush mapping from TLB0. */ 1802 pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED | 1803 PTE_REFERENCED); 1804 tlb0_flush_entry(pmap, va); 1805 } 1806 } 1807 } 1808 PMAP_UNLOCK(pmap); 1809 vm_page_unlock_queues(); 1810} 1811 1812/* 1813 * Clear the write and modified bits in each of the given page's mappings. 1814 */ 1815static void 1816mmu_booke_remove_write(mmu_t mmu, vm_page_t m) 1817{ 1818 pv_entry_t pv; 1819 pte_t *pte; 1820 1821 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1822 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || 1823 (m->flags & PG_WRITEABLE) == 0) 1824 return; 1825 1826 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 1827 PMAP_LOCK(pv->pv_pmap); 1828 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { 1829 if (PTE_ISVALID(pte)) { 1830 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1831 1832 /* Handle modified pages. */ 1833 if (PTE_ISMODIFIED(pte)) 1834 vm_page_dirty(m); 1835 1836 /* Referenced pages. */ 1837 if (PTE_ISREFERENCED(pte)) 1838 vm_page_flag_set(m, PG_REFERENCED); 1839 1840 /* Flush mapping from TLB0. */ 1841 pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED | 1842 PTE_REFERENCED); 1843 tlb0_flush_entry(pv->pv_pmap, pv->pv_va); 1844 } 1845 } 1846 PMAP_UNLOCK(pv->pv_pmap); 1847 } 1848 vm_page_flag_clear(m, PG_WRITEABLE); 1849} 1850 1851static boolean_t 1852mmu_booke_page_executable(mmu_t mmu, vm_page_t m) 1853{ 1854 pv_entry_t pv; 1855 pte_t *pte; 1856 boolean_t executable; 1857 1858 executable = FALSE; 1859 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 1860 PMAP_LOCK(pv->pv_pmap); 1861 pte = pte_find(mmu, pv->pv_pmap, pv->pv_va); 1862 if (pte != NULL && PTE_ISVALID(pte) && (pte->flags & PTE_UX)) 1863 executable = TRUE; 1864 PMAP_UNLOCK(pv->pv_pmap); 1865 if (executable) 1866 break; 1867 } 1868 1869 return (executable); 1870} 1871 1872/* 1873 * Atomically extract and hold the physical page with the given 1874 * pmap and virtual address pair if that mapping permits the given 1875 * protection. 1876 */ 1877static vm_page_t 1878mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, 1879 vm_prot_t prot) 1880{ 1881 pte_t *pte; 1882 vm_page_t m; 1883 u_int32_t pte_wbit; 1884 1885 m = NULL; 1886 vm_page_lock_queues(); 1887 PMAP_LOCK(pmap); 1888 pte = pte_find(mmu, pmap, va); 1889 1890 if ((pte != NULL) && PTE_ISVALID(pte)) { 1891 if (pmap == kernel_pmap) 1892 pte_wbit = PTE_SW; 1893 else 1894 pte_wbit = PTE_UW; 1895 1896 if ((pte->flags & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) { 1897 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1898 vm_page_hold(m); 1899 } 1900 } 1901 1902 vm_page_unlock_queues(); 1903 PMAP_UNLOCK(pmap); 1904 return (m); 1905} 1906 1907/* 1908 * Initialize a vm_page's machine-dependent fields. 1909 */ 1910static void 1911mmu_booke_page_init(mmu_t mmu, vm_page_t m) 1912{ 1913 1914 TAILQ_INIT(&m->md.pv_list); 1915} 1916 1917/* 1918 * mmu_booke_zero_page_area zeros the specified hardware page by 1919 * mapping it into virtual memory and using bzero to clear 1920 * its contents. 1921 * 1922 * off and size must reside within a single page. 1923 */ 1924static void 1925mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) 1926{ 1927 vm_offset_t va; 1928 1929 //debugf("mmu_booke_zero_page_area: s\n"); 1930 1931 mtx_lock(&zero_page_mutex); 1932 va = zero_page_va; 1933 1934 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); 1935 bzero((caddr_t)va + off, size); 1936 mmu_booke_kremove(mmu, va); 1937 1938 mtx_unlock(&zero_page_mutex); 1939 1940 //debugf("mmu_booke_zero_page_area: e\n"); 1941} 1942 1943/* 1944 * mmu_booke_zero_page zeros the specified hardware page. 1945 */ 1946static void 1947mmu_booke_zero_page(mmu_t mmu, vm_page_t m) 1948{ 1949 1950 //debugf("mmu_booke_zero_page: s\n"); 1951 mmu_booke_zero_page_area(mmu, m, 0, PAGE_SIZE); 1952 //debugf("mmu_booke_zero_page: e\n"); 1953} 1954 1955/* 1956 * mmu_booke_copy_page copies the specified (machine independent) page by 1957 * mapping the page into virtual memory and using memcopy to copy the page, 1958 * one machine dependent page at a time. 1959 */ 1960static void 1961mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm) 1962{ 1963 vm_offset_t sva, dva; 1964 1965 //debugf("mmu_booke_copy_page: s\n"); 1966 1967 mtx_lock(©_page_mutex); 1968 sva = copy_page_src_va; 1969 dva = copy_page_dst_va; 1970 1971 mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm)); 1972 mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm)); 1973 memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); 1974 mmu_booke_kremove(mmu, dva); 1975 mmu_booke_kremove(mmu, sva); 1976 1977 mtx_unlock(©_page_mutex); 1978 1979 //debugf("mmu_booke_copy_page: e\n"); 1980} 1981 1982#if 0 1983/* 1984 * Remove all pages from specified address space, this aids process exit 1985 * speeds. This is much faster than mmu_booke_remove in the case of running 1986 * down an entire address space. Only works for the current pmap. 1987 */ 1988void 1989mmu_booke_remove_pages(pmap_t pmap) 1990{ 1991} 1992#endif 1993 1994/* 1995 * mmu_booke_zero_page_idle zeros the specified hardware page by mapping it 1996 * into virtual memory and using bzero to clear its contents. This is intended 1997 * to be called from the vm_pagezero process only and outside of Giant. No 1998 * lock is required. 1999 */ 2000static void 2001mmu_booke_zero_page_idle(mmu_t mmu, vm_page_t m) 2002{ 2003 vm_offset_t va; 2004 2005 //debugf("mmu_booke_zero_page_idle: s\n"); 2006 2007 va = zero_page_idle_va; 2008 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); 2009 bzero((caddr_t)va, PAGE_SIZE); 2010 mmu_booke_kremove(mmu, va); 2011 2012 //debugf("mmu_booke_zero_page_idle: e\n"); 2013} 2014 2015/* 2016 * Return whether or not the specified physical page was modified 2017 * in any of physical maps. 2018 */ 2019static boolean_t 2020mmu_booke_is_modified(mmu_t mmu, vm_page_t m) 2021{ 2022 pte_t *pte; 2023 pv_entry_t pv; 2024 2025 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2026 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) 2027 return (FALSE); 2028 2029 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 2030 PMAP_LOCK(pv->pv_pmap); 2031 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { 2032 if (!PTE_ISVALID(pte)) 2033 goto make_sure_to_unlock; 2034 2035 if (PTE_ISMODIFIED(pte)) { 2036 PMAP_UNLOCK(pv->pv_pmap); 2037 return (TRUE); 2038 } 2039 } 2040make_sure_to_unlock: 2041 PMAP_UNLOCK(pv->pv_pmap); 2042 } 2043 return (FALSE); 2044} 2045 2046/* 2047 * Return whether or not the specified virtual address is elgible 2048 * for prefault. 2049 */ 2050static boolean_t 2051mmu_booke_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t addr) 2052{ 2053 2054 return (FALSE); 2055} 2056 2057/* 2058 * Clear the modify bits on the specified physical page. 2059 */ 2060static void 2061mmu_booke_clear_modify(mmu_t mmu, vm_page_t m) 2062{ 2063 pte_t *pte; 2064 pv_entry_t pv; 2065 2066 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2067 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) 2068 return; 2069 2070 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 2071 PMAP_LOCK(pv->pv_pmap); 2072 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { 2073 if (!PTE_ISVALID(pte)) 2074 goto make_sure_to_unlock; 2075 2076 if (pte->flags & (PTE_SW | PTE_UW | PTE_MODIFIED)) { 2077 pte->flags &= ~(PTE_SW | PTE_UW | PTE_MODIFIED | 2078 PTE_REFERENCED); 2079 tlb0_flush_entry(pv->pv_pmap, pv->pv_va); 2080 } 2081 } 2082make_sure_to_unlock: 2083 PMAP_UNLOCK(pv->pv_pmap); 2084 } 2085} 2086 2087/* 2088 * Return a count of reference bits for a page, clearing those bits. 2089 * It is not necessary for every reference bit to be cleared, but it 2090 * is necessary that 0 only be returned when there are truly no 2091 * reference bits set. 2092 * 2093 * XXX: The exact number of bits to check and clear is a matter that 2094 * should be tested and standardized at some point in the future for 2095 * optimal aging of shared pages. 2096 */ 2097static int 2098mmu_booke_ts_referenced(mmu_t mmu, vm_page_t m) 2099{ 2100 pte_t *pte; 2101 pv_entry_t pv; 2102 int count; 2103 2104 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2105 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) 2106 return (0); 2107 2108 count = 0; 2109 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 2110 PMAP_LOCK(pv->pv_pmap); 2111 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { 2112 if (!PTE_ISVALID(pte)) 2113 goto make_sure_to_unlock; 2114 2115 if (PTE_ISREFERENCED(pte)) { 2116 pte->flags &= ~PTE_REFERENCED; 2117 tlb0_flush_entry(pv->pv_pmap, pv->pv_va); 2118 2119 if (++count > 4) { 2120 PMAP_UNLOCK(pv->pv_pmap); 2121 break; 2122 } 2123 } 2124 } 2125make_sure_to_unlock: 2126 PMAP_UNLOCK(pv->pv_pmap); 2127 } 2128 return (count); 2129} 2130 2131/* 2132 * Clear the reference bit on the specified physical page. 2133 */ 2134static void 2135mmu_booke_clear_reference(mmu_t mmu, vm_page_t m) 2136{ 2137 pte_t *pte; 2138 pv_entry_t pv; 2139 2140 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2141 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) 2142 return; 2143 2144 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 2145 PMAP_LOCK(pv->pv_pmap); 2146 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { 2147 if (!PTE_ISVALID(pte)) 2148 goto make_sure_to_unlock; 2149 2150 if (PTE_ISREFERENCED(pte)) { 2151 pte->flags &= ~PTE_REFERENCED; 2152 tlb0_flush_entry(pv->pv_pmap, pv->pv_va); 2153 } 2154 } 2155make_sure_to_unlock: 2156 PMAP_UNLOCK(pv->pv_pmap); 2157 } 2158} 2159 2160/* 2161 * Change wiring attribute for a map/virtual-address pair. 2162 */ 2163static void 2164mmu_booke_change_wiring(mmu_t mmu, pmap_t pmap, vm_offset_t va, boolean_t wired) 2165{ 2166 pte_t *pte;; 2167 2168 PMAP_LOCK(pmap); 2169 if ((pte = pte_find(mmu, pmap, va)) != NULL) { 2170 if (wired) { 2171 if (!PTE_ISWIRED(pte)) { 2172 pte->flags |= PTE_WIRED; 2173 pmap->pm_stats.wired_count++; 2174 } 2175 } else { 2176 if (PTE_ISWIRED(pte)) { 2177 pte->flags &= ~PTE_WIRED; 2178 pmap->pm_stats.wired_count--; 2179 } 2180 } 2181 } 2182 PMAP_UNLOCK(pmap); 2183} 2184 2185/* 2186 * Return true if the pmap's pv is one of the first 16 pvs linked to from this 2187 * page. This count may be changed upwards or downwards in the future; it is 2188 * only necessary that true be returned for a small subset of pmaps for proper 2189 * page aging. 2190 */ 2191static boolean_t 2192mmu_booke_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) 2193{ 2194 pv_entry_t pv; 2195 int loops; 2196 2197 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2198 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) 2199 return (FALSE); 2200 2201 loops = 0; 2202 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 2203 2204 if (pv->pv_pmap == pmap) 2205 return (TRUE); 2206 2207 if (++loops >= 16) 2208 break; 2209 } 2210 return (FALSE); 2211} 2212 2213/* 2214 * Return the number of managed mappings to the given physical page that are 2215 * wired. 2216 */ 2217static int 2218mmu_booke_page_wired_mappings(mmu_t mmu, vm_page_t m) 2219{ 2220 pv_entry_t pv; 2221 pte_t *pte; 2222 int count = 0; 2223 2224 if ((m->flags & PG_FICTITIOUS) != 0) 2225 return (count); 2226 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2227 2228 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 2229 PMAP_LOCK(pv->pv_pmap); 2230 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) 2231 if (PTE_ISVALID(pte) && PTE_ISWIRED(pte)) 2232 count++; 2233 PMAP_UNLOCK(pv->pv_pmap); 2234 } 2235 2236 return (count); 2237} 2238 2239static int 2240mmu_booke_dev_direct_mapped(mmu_t mmu, vm_offset_t pa, vm_size_t size) 2241{ 2242 int i; 2243 vm_offset_t va; 2244 2245 /* 2246 * This currently does not work for entries that 2247 * overlap TLB1 entries. 2248 */ 2249 for (i = 0; i < tlb1_idx; i ++) { 2250 if (tlb1_iomapped(i, pa, size, &va) == 0) 2251 return (0); 2252 } 2253 2254 return (EFAULT); 2255} 2256 2257/* 2258 * Map a set of physical memory pages into the kernel virtual address space. 2259 * Return a pointer to where it is mapped. This routine is intended to be used 2260 * for mapping device memory, NOT real memory. 2261 */ 2262static void * 2263mmu_booke_mapdev(mmu_t mmu, vm_offset_t pa, vm_size_t size) 2264{ 2265 uintptr_t va; 2266 2267 va = (pa >= 0x80000000) ? pa : (0xe2000000 + pa); 2268 if (bootverbose) 2269 printf("Wiring VA=%x to PA=%x (size=%x), using TLB1[%d]\n", 2270 va, pa, size, tlb1_idx); 2271 tlb1_set_entry(va, pa, size, _TLB_ENTRY_IO); 2272 return ((void *)va); 2273} 2274 2275/* 2276 * 'Unmap' a range mapped by mmu_booke_mapdev(). 2277 */ 2278static void 2279mmu_booke_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) 2280{ 2281 vm_offset_t base, offset; 2282 2283 //debugf("mmu_booke_unmapdev: s (va = 0x%08x)\n", va); 2284 2285 /* 2286 * Unmap only if this is inside kernel virtual space. 2287 */ 2288 if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) { 2289 base = trunc_page(va); 2290 offset = va & PAGE_MASK; 2291 size = roundup(offset + size, PAGE_SIZE); 2292 kmem_free(kernel_map, base, size); 2293 } 2294 2295 //debugf("mmu_booke_unmapdev: e\n"); 2296} 2297 2298/* 2299 * mmu_booke_object_init_pt preloads the ptes for a given object 2300 * into the specified pmap. This eliminates the blast of soft 2301 * faults on process startup and immediately after an mmap. 2302 */ 2303static void 2304mmu_booke_object_init_pt(mmu_t mmu, pmap_t pmap, vm_offset_t addr, 2305 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 2306{ 2307 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2308 KASSERT(object->type == OBJT_DEVICE, 2309 ("mmu_booke_object_init_pt: non-device object")); 2310} 2311 2312/* 2313 * Perform the pmap work for mincore. 2314 */ 2315static int 2316mmu_booke_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr) 2317{ 2318 2319 TODO; 2320 return (0); 2321} 2322 2323static vm_offset_t 2324mmu_booke_addr_hint(mmu_t mmu, vm_object_t object, vm_offset_t va, 2325 vm_size_t size) 2326{ 2327 2328 return (va); 2329} 2330 2331/**************************************************************************/ 2332/* TID handling */ 2333/**************************************************************************/ 2334/* 2335 * Flush all entries from TLB0 matching given tid. 2336 */ 2337static void 2338tid_flush(tlbtid_t tid) 2339{ 2340 int i, entryidx, way; 2341 2342 //debugf("tid_flush: s (tid = %d)\n", tid); 2343 2344 mtx_lock_spin(&tlb0_mutex); 2345 2346 for (i = 0; i < TLB0_SIZE; i++) { 2347 if (MAS1_GETTID(tlb0[i].mas1) == tid) { 2348 way = i / TLB0_ENTRIES_PER_WAY; 2349 entryidx = i - (way * TLB0_ENTRIES_PER_WAY); 2350 2351 //debugf("tid_flush: inval tlb0 entry %d\n", i); 2352 tlb0_inval_entry(entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT, way); 2353 } 2354 } 2355 2356 mtx_unlock_spin(&tlb0_mutex); 2357 2358 //debugf("tid_flush: e\n"); 2359} 2360 2361/* 2362 * Allocate a TID. If necessary, steal one from someone else. 2363 * The new TID is flushed from the TLB before returning. 2364 */ 2365static tlbtid_t 2366tid_alloc(pmap_t pmap) 2367{ 2368 tlbtid_t tid; 2369 static tlbtid_t next_tid = TID_MIN; 2370 2371 //struct thread *td; 2372 //struct proc *p; 2373 2374 //td = PCPU_GET(curthread); 2375 //p = td->td_proc; 2376 //debugf("tid_alloc: s (pmap = 0x%08x)\n", (u_int32_t)pmap); 2377 //printf("tid_alloc: proc %d '%s'\n", p->p_pid, p->p_comm); 2378 2379 KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap")); 2380 2381 /* 2382 * Find a likely TID, allocate unused if possible, 2383 * skip reserved entries. 2384 */ 2385 tid = next_tid; 2386 while (tidbusy[tid] != NULL) { 2387 if (tid == next_tid) 2388 break; 2389 2390 if (tid == TID_MAX) 2391 tid = TID_MIN; 2392 else 2393 tid++; 2394 2395 } 2396 2397 /* Now clean it out */ 2398 tid_flush(tid); 2399 2400 /* If we are stealing pmap then clear its tid */ 2401 if (tidbusy[tid]) { 2402 //debugf("warning: stealing tid %d\n", tid); 2403 tidbusy[tid]->pm_tid = 0; 2404 } 2405 2406 /* Calculate next tid */ 2407 if (tid == TID_MAX) 2408 next_tid = TID_MIN; 2409 else 2410 next_tid = tid + 1; 2411 2412 tidbusy[tid] = pmap; 2413 pmap->pm_tid = tid; 2414 2415 //debugf("tid_alloc: e (%02d next = %02d)\n", tid, next_tid); 2416 return (tid); 2417} 2418 2419#if 0 2420/* 2421 * Free this pmap's TID. 2422 */ 2423static void 2424tid_free(pmap_t pmap) 2425{ 2426 tlbtid_t oldtid; 2427 2428 oldtid = pmap->pm_tid; 2429 2430 if (oldtid == 0) { 2431 panic("tid_free: freeing kernel tid"); 2432 } 2433 2434#ifdef DEBUG 2435 if (tidbusy[oldtid] == 0) 2436 debugf("tid_free: freeing free tid %d\n", oldtid); 2437 if (tidbusy[oldtid] != pmap) { 2438 debugf("tid_free: freeing someone esle's tid\n " 2439 "tidbusy[%d] = 0x%08x pmap = 0x%08x\n", 2440 oldtid, (u_int32_t)tidbusy[oldtid], (u_int32_t)pmap); 2441 } 2442#endif 2443 2444 tidbusy[oldtid] = NULL; 2445 tid_flush(oldtid); 2446} 2447#endif 2448 2449#if 0 2450#if DEBUG 2451static void 2452tid_print_busy(void) 2453{ 2454 int i; 2455 2456 for (i = 0; i < TID_MAX; i++) { 2457 debugf("tid %d = pmap 0x%08x", i, (u_int32_t)tidbusy[i]); 2458 if (tidbusy[i]) 2459 debugf(" pmap->tid = %d", tidbusy[i]->pm_tid); 2460 debugf("\n"); 2461 } 2462 2463} 2464#endif /* DEBUG */ 2465#endif 2466 2467/**************************************************************************/ 2468/* TLB0 handling */ 2469/**************************************************************************/ 2470 2471static void 2472tlb_print_entry(int i, u_int32_t mas1, u_int32_t mas2, u_int32_t mas3, u_int32_t mas7) 2473{ 2474 int as; 2475 char desc[3]; 2476 tlbtid_t tid; 2477 vm_size_t size; 2478 unsigned int tsize; 2479 2480 desc[2] = '\0'; 2481 if (mas1 & MAS1_VALID) 2482 desc[0] = 'V'; 2483 else 2484 desc[0] = ' '; 2485 2486 if (mas1 & MAS1_IPROT) 2487 desc[1] = 'P'; 2488 else 2489 desc[1] = ' '; 2490 2491 as = (mas1 & MAS1_TS) ? 1 : 0; 2492 tid = MAS1_GETTID(mas1); 2493 2494 tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 2495 size = 0; 2496 if (tsize) 2497 size = tsize2size(tsize); 2498 2499 debugf("%3d: (%s) [AS=%d] " 2500 "sz = 0x%08x tsz = %d tid = %d mas1 = 0x%08x " 2501 "mas2(va) = 0x%08x mas3(pa) = 0x%08x mas7 = 0x%08x\n", 2502 i, desc, as, size, tsize, tid, mas1, mas2, mas3, mas7); 2503} 2504 2505/* Convert TLB0 va and way number to tlb0[] table index. */ 2506static inline unsigned int 2507tlb0_tableidx(vm_offset_t va, unsigned int way) 2508{ 2509 unsigned int idx; 2510 2511 idx = (way * TLB0_ENTRIES_PER_WAY); 2512 idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT; 2513 return (idx); 2514} 2515 2516/* 2517 * Write given entry to TLB0 hardware. 2518 * Use 32 bit pa, clear 4 high-order bits of RPN (mas7). 2519 */ 2520static void 2521tlb0_write_entry(unsigned int idx, unsigned int way) 2522{ 2523 u_int32_t mas0, mas7, nv; 2524 2525 /* Clear high order RPN bits. */ 2526 mas7 = 0; 2527 2528 /* Preserve NV. */ 2529 mas0 = mfspr(SPR_MAS0); 2530 nv = mas0 & (TLB0_NWAYS - 1); 2531 2532 /* Select entry. */ 2533 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way) | nv; 2534 2535 //debugf("tlb0_write_entry: s (idx=%d way=%d mas0=0x%08x " 2536 // "mas1=0x%08x mas2=0x%08x mas3=0x%08x)\n", 2537 // idx, way, mas0, tlb0[idx].mas1, 2538 // tlb0[idx].mas2, tlb0[idx].mas3); 2539 2540 mtspr(SPR_MAS0, mas0); 2541 __asm volatile("isync"); 2542 mtspr(SPR_MAS1, tlb0[idx].mas1); 2543 __asm volatile("isync"); 2544 mtspr(SPR_MAS2, tlb0[idx].mas2); 2545 __asm volatile("isync"); 2546 mtspr(SPR_MAS3, tlb0[idx].mas3); 2547 __asm volatile("isync"); 2548 mtspr(SPR_MAS7, mas7); 2549 __asm volatile("isync; tlbwe; isync; msync"); 2550 2551 //debugf("tlb0_write_entry: e\n"); 2552} 2553 2554/* 2555 * Invalidate TLB0 entry, clear correspondig tlb0 table element. 2556 */ 2557static void 2558tlb0_inval_entry(vm_offset_t va, unsigned int way) 2559{ 2560 int idx = tlb0_tableidx(va, way); 2561 2562 //debugf("tlb0_inval_entry: s (va=0x%08x way=%d idx=%d)\n", 2563 // va, way, idx); 2564 2565 tlb0[idx].mas1 = 1 << MAS1_TSIZE_SHIFT; /* !MAS1_VALID */ 2566 tlb0[idx].mas2 = va & MAS2_EPN; 2567 tlb0[idx].mas3 = 0; 2568 2569 tlb0_write_entry(idx, way); 2570 2571 //debugf("tlb0_inval_entry: e\n"); 2572} 2573 2574/* 2575 * Invalidate TLB0 entry that corresponds to pmap/va. 2576 */ 2577static void 2578tlb0_flush_entry(pmap_t pmap, vm_offset_t va) 2579{ 2580 int idx, way; 2581 2582 //debugf("tlb0_flush_entry: s (pmap=0x%08x va=0x%08x)\n", 2583 // (u_int32_t)pmap, va); 2584 2585 mtx_lock_spin(&tlb0_mutex); 2586 2587 /* Check all TLB0 ways. */ 2588 for (way = 0; way < TLB0_NWAYS; way ++) { 2589 idx = tlb0_tableidx(va, way); 2590 2591 /* Invalidate only if entry matches va and pmap tid. */ 2592 if (((MAS1_GETTID(tlb0[idx].mas1) == pmap->pm_tid) && 2593 ((tlb0[idx].mas2 & MAS2_EPN) == va))) { 2594 tlb0_inval_entry(va, way); 2595 } 2596 } 2597 2598 mtx_unlock_spin(&tlb0_mutex); 2599 2600 //debugf("tlb0_flush_entry: e\n"); 2601} 2602 2603/* Clean TLB0 hardware and tlb0[] table. */ 2604static void 2605tlb0_init(void) 2606{ 2607 int entryidx, way; 2608 2609 debugf("tlb0_init: TLB0_SIZE = %d TLB0_NWAYS = %d\n", 2610 TLB0_SIZE, TLB0_NWAYS); 2611 2612 mtx_lock_spin(&tlb0_mutex); 2613 2614 for (way = 0; way < TLB0_NWAYS; way ++) { 2615 for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) { 2616 tlb0_inval_entry(entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT, way); 2617 } 2618 } 2619 2620 mtx_unlock_spin(&tlb0_mutex); 2621} 2622 2623#if 0 2624#if DEBUG 2625/* Print out tlb0 entries for given va. */ 2626static void 2627tlb0_print_tlbentries_va(vm_offset_t va) 2628{ 2629 u_int32_t mas0, mas1, mas2, mas3, mas7; 2630 int way, idx; 2631 2632 debugf("TLB0 entries for va = 0x%08x:\n", va); 2633 for (way = 0; way < TLB0_NWAYS; way ++) { 2634 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 2635 mtspr(SPR_MAS0, mas0); 2636 __asm volatile("isync"); 2637 2638 mas2 = va & MAS2_EPN; 2639 mtspr(SPR_MAS2, mas2); 2640 __asm volatile("isync; tlbre"); 2641 2642 mas1 = mfspr(SPR_MAS1); 2643 mas2 = mfspr(SPR_MAS2); 2644 mas3 = mfspr(SPR_MAS3); 2645 mas7 = mfspr(SPR_MAS7); 2646 2647 idx = tlb0_tableidx(va, way); 2648 tlb_print_entry(idx, mas1, mas2, mas3, mas7); 2649 } 2650} 2651 2652/* Print out contents of the MAS registers for each TLB0 entry */ 2653static void 2654tlb0_print_tlbentries(void) 2655{ 2656 u_int32_t mas0, mas1, mas2, mas3, mas7; 2657 int entryidx, way, idx; 2658 2659 debugf("TLB0 entries:\n"); 2660 for (way = 0; way < TLB0_NWAYS; way ++) { 2661 for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) { 2662 2663 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 2664 mtspr(SPR_MAS0, mas0); 2665 __asm volatile("isync"); 2666 2667 mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT; 2668 mtspr(SPR_MAS2, mas2); 2669 2670 __asm volatile("isync; tlbre"); 2671 2672 mas1 = mfspr(SPR_MAS1); 2673 mas2 = mfspr(SPR_MAS2); 2674 mas3 = mfspr(SPR_MAS3); 2675 mas7 = mfspr(SPR_MAS7); 2676 2677 idx = tlb0_tableidx(mas2, way); 2678 tlb_print_entry(idx, mas1, mas2, mas3, mas7); 2679 } 2680 } 2681} 2682 2683/* Print out kernel tlb0[] table. */ 2684static void 2685tlb0_print_entries(void) 2686{ 2687 int i; 2688 2689 debugf("tlb0[] table entries:\n"); 2690 for (i = 0; i < TLB0_SIZE; i++) { 2691 tlb_print_entry(i, tlb0[i].mas1, 2692 tlb0[i].mas2, tlb0[i].mas3, 0); 2693 } 2694} 2695#endif /* DEBUG */ 2696#endif 2697 2698/**************************************************************************/ 2699/* TLB1 handling */ 2700/**************************************************************************/ 2701/* 2702 * Write given entry to TLB1 hardware. 2703 * Use 32 bit pa, clear 4 high-order bits of RPN (mas7). 2704 */ 2705static void 2706tlb1_write_entry(unsigned int idx) 2707{ 2708 u_int32_t mas0, mas7; 2709 2710 //debugf("tlb1_write_entry: s\n"); 2711 2712 /* Clear high order RPN bits */ 2713 mas7 = 0; 2714 2715 /* Select entry */ 2716 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(idx); 2717 //debugf("tlb1_write_entry: mas0 = 0x%08x\n", mas0); 2718 2719 mtspr(SPR_MAS0, mas0); 2720 __asm volatile("isync"); 2721 mtspr(SPR_MAS1, tlb1[idx].mas1); 2722 __asm volatile("isync"); 2723 mtspr(SPR_MAS2, tlb1[idx].mas2); 2724 __asm volatile("isync"); 2725 mtspr(SPR_MAS3, tlb1[idx].mas3); 2726 __asm volatile("isync"); 2727 mtspr(SPR_MAS7, mas7); 2728 __asm volatile("isync; tlbwe; isync; msync"); 2729 2730 //debugf("tlb1_write_entry: e\n");; 2731} 2732 2733/* 2734 * Return the largest uint value log such that 2^log <= num. 2735 */ 2736static unsigned int 2737ilog2(unsigned int num) 2738{ 2739 int lz; 2740 2741 __asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num)); 2742 return (31 - lz); 2743} 2744 2745/* 2746 * Convert TLB TSIZE value to mapped region size. 2747 */ 2748static vm_size_t 2749tsize2size(unsigned int tsize) 2750{ 2751 2752 /* 2753 * size = 4^tsize KB 2754 * size = 4^tsize * 2^10 = 2^(2 * tsize - 10) 2755 */ 2756 2757 return ((1 << (2 * tsize)) * 1024); 2758} 2759 2760/* 2761 * Convert region size (must be power of 4) to TLB TSIZE value. 2762 */ 2763static unsigned int 2764size2tsize(vm_size_t size) 2765{ 2766 2767 /* 2768 * tsize = log2(size) / 2 - 5 2769 */ 2770 2771 return (ilog2(size) / 2 - 5); 2772} 2773 2774/* 2775 * Setup entry in a sw tlb1 table, write entry to TLB1 hardware. 2776 * This routine is used for low level operations on the TLB1, 2777 * for creating temporaray as well as permanent mappings (tlb_set_entry). 2778 * 2779 * We assume kernel mappings only, thus all entries created have supervisor 2780 * permission bits set nad user permission bits cleared. 2781 * 2782 * Provided mapping size must be a power of 4. 2783 * Mapping flags must be a combination of MAS2_[WIMG]. 2784 * Entry TID is set to _tid which must not exceed 8 bit value. 2785 * Entry TS is set to either 0 or MAS1_TS based on provided _ts. 2786 */ 2787static void 2788__tlb1_set_entry(unsigned int idx, vm_offset_t va, vm_offset_t pa, 2789 vm_size_t size, u_int32_t flags, unsigned int _tid, unsigned int _ts) 2790{ 2791 int tsize; 2792 u_int32_t ts, tid; 2793 2794 //debugf("__tlb1_set_entry: s (idx = %d va = 0x%08x pa = 0x%08x " 2795 // "size = 0x%08x flags = 0x%08x _tid = %d _ts = %d\n", 2796 // idx, va, pa, size, flags, _tid, _ts); 2797 2798 /* Convert size to TSIZE */ 2799 tsize = size2tsize(size); 2800 //debugf("__tlb1_set_entry: tsize = %d\n", tsize); 2801 2802 tid = (_tid << MAS1_TID_SHIFT) & MAS1_TID_MASK; 2803 ts = (_ts) ? MAS1_TS : 0; 2804 tlb1[idx].mas1 = MAS1_VALID | MAS1_IPROT | ts | tid; 2805 tlb1[idx].mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK); 2806 2807 tlb1[idx].mas2 = (va & MAS2_EPN) | flags; 2808 2809 /* Set supervisor rwx permission bits */ 2810 tlb1[idx].mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX; 2811 2812 //debugf("__tlb1_set_entry: mas1 = %08x mas2 = %08x mas3 = 0x%08x\n", 2813 // tlb1[idx].mas1, tlb1[idx].mas2, tlb1[idx].mas3); 2814 2815 tlb1_write_entry(idx); 2816 //debugf("__tlb1_set_entry: e\n"); 2817} 2818 2819/* 2820 * Register permanent kernel mapping in TLB1. 2821 * 2822 * Entries are created starting from index 0 (current free entry is 2823 * kept in tlb1_idx) and are not supposed to be invalidated. 2824 */ 2825static int 2826tlb1_set_entry(vm_offset_t va, vm_offset_t pa, vm_size_t size, u_int32_t flags) 2827{ 2828 //debugf("tlb1_set_entry: s (tlb1_idx = %d va = 0x%08x pa = 0x%08x " 2829 // "size = 0x%08x flags = 0x%08x\n", 2830 // tlb1_idx, va, pa, size, flags); 2831 2832 if (tlb1_idx >= TLB1_SIZE) { 2833 //debugf("tlb1_set_entry: e (tlb1 full!)\n"); 2834 return (-1); 2835 } 2836 2837 /* TS = 0, TID = 0 */ 2838 __tlb1_set_entry(tlb1_idx++, va, pa, size, flags, KERNEL_TID, 0); 2839 //debugf("tlb1_set_entry: e\n"); 2840 return (0); 2841} 2842 2843/* 2844 * Invalidate TLB1 entry, clear correspondig tlb1 table element. 2845 * This routine is used to clear temporary entries created 2846 * early in a locore.S or through the use of __tlb1_set_entry(). 2847 */ 2848void 2849tlb1_inval_entry(unsigned int idx) 2850{ 2851 vm_offset_t va; 2852 2853 va = tlb1[idx].mas2 & MAS2_EPN; 2854 2855 tlb1[idx].mas1 = 0; /* !MAS1_VALID */ 2856 tlb1[idx].mas2 = 0; 2857 tlb1[idx].mas3 = 0; 2858 2859 tlb1_write_entry(idx); 2860} 2861 2862static int 2863tlb1_entry_size_cmp(const void *a, const void *b) 2864{ 2865 const vm_size_t *sza; 2866 const vm_size_t *szb; 2867 2868 sza = a; 2869 szb = b; 2870 if (*sza > *szb) 2871 return (-1); 2872 else if (*sza < *szb) 2873 return (1); 2874 else 2875 return (0); 2876} 2877 2878/* 2879 * Mapin contiguous RAM region into the TLB1 using maximum of 2880 * KERNEL_REGION_MAX_TLB_ENTRIES entries. 2881 * 2882 * If necessarry round up last entry size and return total size 2883 * used by all allocated entries. 2884 */ 2885vm_size_t 2886tlb1_mapin_region(vm_offset_t va, vm_offset_t pa, vm_size_t size) 2887{ 2888 vm_size_t entry_size[KERNEL_REGION_MAX_TLB_ENTRIES]; 2889 vm_size_t mapped_size, sz, esz; 2890 unsigned int log; 2891 int i; 2892 2893 debugf("tlb1_mapin_region:\n"); 2894 debugf(" region size = 0x%08x va = 0x%08x pa = 0x%08x\n", size, va, pa); 2895 2896 mapped_size = 0; 2897 sz = size; 2898 memset(entry_size, 0, sizeof(entry_size)); 2899 2900 /* Calculate entry sizes. */ 2901 for (i = 0; i < KERNEL_REGION_MAX_TLB_ENTRIES && sz > 0; i++) { 2902 2903 /* Largest region that is power of 4 and fits within size */ 2904 log = ilog2(sz)/2; 2905 esz = 1 << (2 * log); 2906 2907 /* Minimum region size is 4KB */ 2908 if (esz < (1 << 12)) 2909 esz = 1 << 12; 2910 2911 /* If this is last entry cover remaining size. */ 2912 if (i == KERNEL_REGION_MAX_TLB_ENTRIES - 1) { 2913 while (esz < sz) 2914 esz = esz << 2; 2915 } 2916 2917 entry_size[i] = esz; 2918 mapped_size += esz; 2919 if (esz < sz) 2920 sz -= esz; 2921 else 2922 sz = 0; 2923 } 2924 2925 /* Sort entry sizes, required to get proper entry address alignment. */ 2926 qsort(entry_size, KERNEL_REGION_MAX_TLB_ENTRIES, 2927 sizeof(vm_size_t), tlb1_entry_size_cmp); 2928 2929 /* Load TLB1 entries. */ 2930 for (i = 0; i < KERNEL_REGION_MAX_TLB_ENTRIES; i++) { 2931 esz = entry_size[i]; 2932 if (!esz) 2933 break; 2934 debugf(" entry %d: sz = 0x%08x (va = 0x%08x pa = 0x%08x)\n", 2935 tlb1_idx, esz, va, pa); 2936 tlb1_set_entry(va, pa, esz, _TLB_ENTRY_MEM); 2937 2938 va += esz; 2939 pa += esz; 2940 } 2941 2942 debugf(" mapped size 0x%08x (wasted space 0x%08x)\n", 2943 mapped_size, mapped_size - size); 2944 2945 return (mapped_size); 2946} 2947 2948/* 2949 * TLB1 initialization routine, to be called after the very first 2950 * assembler level setup done in locore.S. 2951 */ 2952void 2953tlb1_init(vm_offset_t ccsrbar) 2954{ 2955 uint32_t mas0; 2956 2957 /* TBL1[1] is used to map the kernel. Save that entry. */ 2958 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(1); 2959 mtspr(SPR_MAS0, mas0); 2960 __asm __volatile("isync; tlbre"); 2961 2962 tlb1[1].mas1 = mfspr(SPR_MAS1); 2963 tlb1[1].mas2 = mfspr(SPR_MAS2); 2964 tlb1[1].mas3 = mfspr(SPR_MAS3); 2965 2966 /* Mapin CCSRBAR in TLB1[0] */ 2967 __tlb1_set_entry(0, CCSRBAR_VA, ccsrbar, CCSRBAR_SIZE, 2968 _TLB_ENTRY_IO, KERNEL_TID, 0); 2969 2970 /* Setup TLB miss defaults */ 2971 set_mas4_defaults(); 2972 2973 /* Reset next available TLB1 entry index. */ 2974 tlb1_idx = 2; 2975} 2976 2977/* 2978 * Setup MAS4 defaults. 2979 * These values are loaded to MAS0-2 on a TLB miss. 2980 */ 2981static void 2982set_mas4_defaults(void) 2983{ 2984 u_int32_t mas4; 2985 2986 /* Defaults: TLB0, PID0, TSIZED=4K */ 2987 mas4 = MAS4_TLBSELD0; 2988 mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK; 2989 2990 mtspr(SPR_MAS4, mas4); 2991 __asm volatile("isync"); 2992} 2993 2994/* 2995 * Print out contents of the MAS registers for each TLB1 entry 2996 */ 2997void 2998tlb1_print_tlbentries(void) 2999{ 3000 u_int32_t mas0, mas1, mas2, mas3, mas7; 3001 int i; 3002 3003 debugf("TLB1 entries:\n"); 3004 for (i = 0; i < TLB1_SIZE; i++) { 3005 3006 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i); 3007 mtspr(SPR_MAS0, mas0); 3008 3009 __asm volatile("isync; tlbre"); 3010 3011 mas1 = mfspr(SPR_MAS1); 3012 mas2 = mfspr(SPR_MAS2); 3013 mas3 = mfspr(SPR_MAS3); 3014 mas7 = mfspr(SPR_MAS7); 3015 3016 tlb_print_entry(i, mas1, mas2, mas3, mas7); 3017 } 3018} 3019 3020/* 3021 * Print out contents of the in-ram tlb1 table. 3022 */ 3023void 3024tlb1_print_entries(void) 3025{ 3026 int i; 3027 3028 debugf("tlb1[] table entries:\n"); 3029 for (i = 0; i < TLB1_SIZE; i++) 3030 tlb_print_entry(i, tlb1[i].mas1, tlb1[i].mas2, tlb1[i].mas3, 0); 3031} 3032 3033/* 3034 * Return 0 if the physical IO range is encompassed by one of the 3035 * the TLB1 entries, otherwise return related error code. 3036 */ 3037static int 3038tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va) 3039{ 3040 u_int32_t prot; 3041 vm_paddr_t pa_start; 3042 vm_paddr_t pa_end; 3043 unsigned int entry_tsize; 3044 vm_size_t entry_size; 3045 3046 *va = (vm_offset_t)NULL; 3047 3048 /* Skip invalid entries */ 3049 if (!(tlb1[i].mas1 & MAS1_VALID)) 3050 return (EINVAL); 3051 3052 /* 3053 * The entry must be cache-inhibited, guarded, and r/w 3054 * so it can function as an i/o page 3055 */ 3056 prot = tlb1[i].mas2 & (MAS2_I | MAS2_G); 3057 if (prot != (MAS2_I | MAS2_G)) 3058 return (EPERM); 3059 3060 prot = tlb1[i].mas3 & (MAS3_SR | MAS3_SW); 3061 if (prot != (MAS3_SR | MAS3_SW)) 3062 return (EPERM); 3063 3064 /* The address should be within the entry range. */ 3065 entry_tsize = (tlb1[i].mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 3066 KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize")); 3067 3068 entry_size = tsize2size(entry_tsize); 3069 pa_start = tlb1[i].mas3 & MAS3_RPN; 3070 pa_end = pa_start + entry_size - 1; 3071 3072 if ((pa < pa_start) || ((pa + size) > pa_end)) 3073 return (ERANGE); 3074 3075 /* Return virtual address of this mapping. */ 3076 *va = (tlb1[i].mas2 & MAS2_EPN) + (pa - pa_start); 3077 return (0); 3078} 3079