1/* 2 * Copyright 2013, winocm. <winocm@icloud.com> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without modification, 6 * are permitted provided that the following conditions are met: 7 * 8 * Redistributions of source code must retain the above copyright notice, this 9 * list of conditions and the following disclaimer. 10 * 11 * Redistributions in binary form must reproduce the above copyright notice, this 12 * list of conditions and the following disclaimer in the documentation and/or 13 * other materials provided with the distribution. 14 * 15 * If you are going to use this software in any form that does not involve 16 * releasing the source to this project or improving it, let me know beforehand. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29/* 30 * Copyright (c) 2000-2007 Apple Inc. All rights reserved. 31 * 32 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 33 * 34 * This file contains Original Code and/or Modifications of Original Code 35 * as defined in and that are subject to the Apple Public Source License 36 * Version 2.0 (the 'License'). You may not use this file except in 37 * compliance with the License. The rights granted to you under the License 38 * may not be used to create, or enable the creation or redistribution of, 39 * unlawful or unlicensed copies of an Apple operating system, or to 40 * circumvent, violate, or enable the circumvention or violation of, any 41 * terms of an Apple operating system software license agreement. 42 * 43 * Please obtain a copy of the License at 44 * http://www.opensource.apple.com/apsl/ and read it before using this file. 45 * 46 * The Original Code and all software distributed under the License are 47 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 48 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 49 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 50 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 51 * Please see the License for the specific language governing rights and 52 * limitations under the License. 53 * 54 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 55 */ 56/* 57 * @OSF_COPYRIGHT@ 58 */ 59/* 60 * Mach Operating System 61 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University 62 * All Rights Reserved. 63 * 64 * Permission to use, copy, modify and distribute this software and its 65 * documentation is hereby granted, provided that both the copyright 66 * notice and this permission notice appear in all copies of the 67 * software, derivative works or modified versions, and any portions 68 * thereof, and that both notices appear in supporting documentation. 69 * 70 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 71 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 72 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 73 * 74 * Carnegie Mellon requests users of this software to return to 75 * 76 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 77 * School of Computer Science 78 * Carnegie Mellon University 79 * Pittsburgh PA 15213-3890 80 * 81 * any improvements or extensions that they make and grant Carnegie Mellon 82 * the rights to redistribute these changes. 83 */ 84/*- 85 * Copyright (c) 2010 The NetBSD Foundation, Inc. 86 * All rights reserved. 87 * 88 * This code is derived from software contributed to The NetBSD Foundation 89 * by Matt Thomas at 3am Software Foundry. 90 * 91 * Redistribution and use in source and binary forms, with or without 92 * modification, are permitted provided that the following conditions 93 * are met: 94 * 1. Redistributions of source code must retain the above copyright 95 * notice, this list of conditions and the following disclaimer. 96 * 2. Redistributions in binary form must reproduce the above copyright 97 * notice, this list of conditions and the following disclaimer in the 98 * documentation and/or other materials provided with the distribution. 99 * 100 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 101 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 102 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 103 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 104 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 105 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 106 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 107 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 108 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 109 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 110 * POSSIBILITY OF SUCH DAMAGE. 111 */ 112/* 113 * ARM physical memory map. 114 * 115 * Version 1.2b2, 'The Rewrite'. 116 * 117 * I'm sorry. This pmap sucks, but it sucks 'less' than the previous one did. 118 * 119 * Todo: fix pmap_nest, pmap_copy, pmap_unnest, pmap_enter_options, pmap_remove/pmap_remove_region 120 * 121 * And make pmap_create use an ASID bitmap too ifdef _ARM_ARCH_7 122 */ 123 124#include <mach_debug.h> 125#include <debug.h> 126#include <mach/vm_types.h> 127#include <mach/vm_param.h> 128#include <mach/thread_status.h> 129#include <kern/misc_protos.h> 130#include <kern/assert.h> 131#include <kern/cpu_number.h> 132#include <kern/thread.h> 133#include <arm/pmap.h> 134#include <arm/mp.h> 135#include <arm/misc_protos.h> 136#include <kern/ledger.h> 137#include <kern/zalloc.h> 138#include <kern/lock.h> 139#include <kern/kalloc.h> 140#include <vm/vm_protos.h> 141#include <vm/vm_map.h> 142#include <vm/vm_kern.h> 143#include <mach/vm_param.h> 144#include <mach/vm_prot.h> 145#include <vm/vm_object.h> 146#include <vm/vm_page.h> 147#include <vm/cpm.h> 148#include <arm/cpu_capabilities.h> 149#include <arm/arch.h> 150#include <arm/pmap_asid.h> 151#include <arm/cpufunc.h> 152#include "proc_reg.h" 153 154/* 155 * The pv_head_table contains a 'trunk' of mappings for each physical 156 * page, one mapping exists for each page. Pages that are mapped in 157 * multiple pmaps (i.e: nested pmaps from say, the Dyld shared region) 158 * have multiple 'pv_nexts'. These are considered leaf mappings. Code should 159 * go through the leaf mappings if accessing/modifying page entries. 160 * 161 * -- With love, winocm. 162 */ 163 164#define VM_MEM_WIRED 0x4 165#define _1KB 1 * 1024 166#define _1MB 1 * 1024 * _1KB 167 168/** Core Structures */ 169typedef struct __pv_entry__ { 170 struct __pv_entry__ *pv_next; /* Next PV entry. */ 171 pmap_t pv_pmap; /* Where does our mapping lie? */ 172 vm_offset_t pv_address_va; /* Virtual Address for the mapping. */ 173 uint32_t pv_flags; /* Pmap Flags */ 174} pv_entry, *pv_entry_t; 175 176typedef enum { 177 ARM_PAGE_TRANSLATION_FAULT = 0x00, /* 0b00 */ 178 ARM_PAGE_PAGE_TABLE = 0x01, /* 0b01 */ 179 ARM_PAGE_SECTION = 0x02, /* 0b10 */ 180 ARM_PAGE_MASK_VALUE = 0x03, /* 0b11 */ 181} pmap_arm_l1_page_types_t; 182 183typedef enum { 184 ARM_PTE_DESCRIPTOR_64K = 0x01, /* 0b01 */ 185 ARM_PTE_DESCRIPTOR_4K = 0x02, /* 0b1X */ 186} pmap_arm_l2_page_types_t; 187 188extern vm_offset_t vm_kernel_stext; 189extern vm_offset_t vm_kernel_etext; 190 191/** Global variables */ 192boolean_t pmap_initialized = FALSE; /* Is the pmap system initialized? */ 193static struct vm_object pmap_object_store; /* Storage object for the actual VM thing. */ 194vm_object_t pmap_object; /* The real VM object. */ 195extern uint32_t first_avail, avail_end; /* End/begin of Managed RAM space. */ 196struct zone *pmap_zone; /* Zone of pmap structures */ 197struct zone *pve_zone; /* Pmap Virtual Entry zone. */ 198pv_entry_t pv_head_table; /* Start of PV entries. */ 199static pmap_paddr_t avail_remaining; /* Remaining avaialable pages. */ 200uint32_t virt_begin, virt_end; /* Virtual Address Space. */ 201uint32_t avail_start, vm_first_phys; 202vm_page_t commpage; 203uint64_t pmap_nesting_size_min = 0x8000000; 204uint64_t pmap_nesting_size_max = 0x8000000; 205 206int allow_data_exec = 0; /* no exec from data, embedded is hardcore like that */ 207int allow_stack_exec = 0; /* No apps may execute from the stack by default */ 208int nx_enabled = 1; 209 210/* THE kernel pmap. */ 211struct pmap kernel_pmap_store; 212pmap_t kernel_pmap = &kernel_pmap_store; 213 214/** Locking Primitives */ 215lock_t pmap_system_lock; 216#define SPLVM(spl) spl = splhigh(); 217#define SPLX(spl) splx(spl); 218 219#define PMAP_LOCK(pmap) { \ 220 simple_lock(&(pmap)->lock); \ 221} 222 223#define PMAP_UNLOCK(pmap) { \ 224 simple_unlock(&(pmap)->lock); \ 225} 226 227#define ppn_to_pai 228 229/** The Free List. */ 230pv_entry_t pv_free_list; /* The free list should be populated when the pmaps are not locked. */ 231decl_simple_lock_data(, pv_free_list_lock); 232 233#define PV_ALLOC(pv_e) { \ 234 simple_lock(&pv_free_list_lock); \ 235 if((pv_e = pv_free_list) != 0) { \ 236 pv_free_list = pv_e->pv_next; \ 237 } \ 238 simple_unlock(&pv_free_list_lock); \ 239} 240 241#define PV_FREE(pv_e) { \ 242 simple_lock(&pv_free_list_lock); \ 243 pv_e->pv_next = pv_free_list; \ 244 pv_free_list = pv_e; \ 245 simple_unlock(&pv_free_list_lock); \ 246} 247 248/* 249 * For each vm_page_t, there is a list of all currently 250 * valid virtual mappings of that page. An entry is 251 * a pv_rooted_entry_t; the list is the pv_table. 252 * 253 * N.B. with the new combo rooted/hashed scheme it is 254 * only possibly to remove individual non-rooted entries 255 * if they are found via the hashed chains as there is no 256 * way to unlink the singly linked hashed entries if navigated to 257 * via the queue list off the rooted entries. Think of it as 258 * hash/walk/pull, keeping track of the prev pointer while walking 259 * the singly linked hash list. All of this is to save memory and 260 * keep both types of pv_entries as small as possible. 261 */ 262 263/* 264 265PV HASHING Changes - JK 1/2007 266 267Pve's establish physical to virtual mappings. These are used for aliasing of a 268physical page to (potentially many) virtual addresses within pmaps. In the previous 269implementation the structure of the pv_entries (each 16 bytes in size) was 270 271typedef struct pv_entry { 272 struct pv_entry_t next; 273 pmap_t pmap; 274 vm_map_offset_t va; 275} *pv_entry_t; 276 277An initial array of these is created at boot time, one per physical page of memory, 278indexed by the physical page number. Additionally, a pool of entries is created from a 279pv_zone to be used as needed by pmap_enter() when it is creating new mappings. 280Originally, we kept this pool around because the code in pmap_enter() was unable to 281block if it needed an entry and none were available - we'd panic. Some time ago I 282restructured the pmap_enter() code so that for user pmaps it can block while zalloc'ing 283a pv structure and restart, removing a panic from the code (in the case of the kernel 284pmap we cannot block and still panic, so, we keep a separate hot pool for use only on 285kernel pmaps). The pool has not been removed since there is a large performance gain 286keeping freed pv's around for reuse and not suffering the overhead of zalloc for every new pv we need. 287 288As pmap_enter() created new mappings it linked the new pve's for them off the fixed 289pv array for that ppn (off the next pointer). These pve's are accessed for several 290operations, one of them being address space teardown. In that case, we basically do this 291 292 for (every page/pte in the space) { 293 calc pve_ptr from the ppn in the pte 294 for (every pv in the list for the ppn) { 295 if (this pv is for this pmap/vaddr) { 296 do housekeeping 297 unlink/free the pv 298 } 299 } 300 } 301 302The problem arose when we were running, say 8000 (or even 2000) apache or other processes 303and one or all terminate. The list hanging off each pv array entry could have thousands of 304entries. We were continuously linearly searching each of these lists as we stepped through 305the address space we were tearing down. Because of the locks we hold, likely taking a cache 306miss for each node, and interrupt disabling for MP issues the system became completely 307unresponsive for many seconds while we did this. 308 309Realizing that pve's are accessed in two distinct ways (linearly running the list by ppn 310for operations like pmap_page_protect and finding and modifying/removing a single pve as 311part of pmap_enter processing) has led to modifying the pve structures and databases. 312 313There are now two types of pve structures. A "rooted" structure which is basically the 314original structure accessed in an array by ppn, and a ''hashed'' structure accessed on a 315hash list via a hash of [pmap, vaddr]. These have been designed with the two goals of 316minimizing wired memory and making the lookup of a ppn faster. Since a vast majority of 317pages in the system are not aliased and hence represented by a single pv entry I've kept 318the rooted entry size as small as possible because there is one of these dedicated for 319every physical page of memory. The hashed pve's are larger due to the addition of the hash 320link and the ppn entry needed for matching while running the hash list to find the entry we 321are looking for. This way, only systems that have lots of aliasing (like 2000+ httpd procs) 322will pay the extra memory price. Both structures have the same first three fields allowing 323some simplification in the code. 324 325They have these shapes 326 327typedef struct pv_rooted_entry { 328 queue_head_t qlink; 329 vm_map_offset_t va; 330 pmap_t pmap; 331} *pv_rooted_entry_t; 332 333typedef struct pv_hashed_entry { 334 queue_head_t qlink; 335 vm_map_offset_t va; 336 pmap_t pmap; 337 ppnum_t ppn; 338 struct pv_hashed_entry *nexth; 339} *pv_hashed_entry_t; 340 341The main flow difference is that the code is now aware of the rooted entry and the hashed 342entries. Code that runs the pv list still starts with the rooted entry and then continues 343down the qlink onto the hashed entries. Code that is looking up a specific pv entry first 344checks the rooted entry and then hashes and runs the hash list for the match. The hash list 345lengths are much smaller than the original pv lists that contained all aliases for the specific ppn. 346 347*/ 348 349/* 350 * OS level page bits. 351 */ 352typedef enum { 353 PMAP_OSPTE_TYPE_VALID = 0x0, 354 PMAP_OSPTE_TYPE_WIRED = 0x1, 355 PMAP_OSPTE_TYPE_REFERENCED = 0x2, 356 PMAP_OSPTE_TYPE_MODIFIED = 0x4, 357 PMAP_OSPTE_TYPE_NOENCRYPT = 0x8, 358 PMAP_OSPTE_TYPE_NOCACHE = 0x10, 359 PMAP_OSPTE_TYPE_PTA = 0x20, 360} __internal_pmap_ospte_bits_t; 361 362/* 363 * The PV rooted hash stuff is from xnu-1228/osfmk/i386/pmap.c 364 */ 365 366typedef struct pv_rooted_entry { /* first three entries must match pv_hashed_entry_t */ 367 queue_head_t qlink; 368 vm_map_offset_t va; /* virtual address for mapping */ 369 pmap_t pmap; /* pmap where mapping lies */ 370 uint32_t flags; /* address flags */ 371} *pv_rooted_entry_t; 372 373#define PV_ROOTED_ENTRY_NULL ((pv_rooted_entry_t) 0) 374 375pv_rooted_entry_t pv_head_hash_table; /* array of entries, one per page */ 376 377typedef struct pv_hashed_entry { /* first three entries must match pv_rooted_entry_t */ 378 queue_head_t qlink; 379 vm_map_offset_t va; 380 pmap_t pmap; 381 ppnum_t ppn; 382 struct pv_hashed_entry *nexth; 383} *pv_hashed_entry_t; 384 385#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0) 386 387#define NPVHASH 4095 /* MUST BE 2^N - 1 */ 388pv_hashed_entry_t *pv_hash_table; /* hash lists */ 389 390uint32_t npvhash = 0; 391 392/* #define PV_DEBUG 1 uncomment to enable some PV debugging code */ 393// #define PV_DEBUG 1 394#define kprintf(args...) 395 396 397#ifdef PV_DEBUG 398#define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized"); 399#else 400#define CHK_NPVHASH() 401#endif 402 403/* 404 * pv_list entries are kept on a list that can only be accessed 405 * with the pmap system locked (at SPLVM, not in the cpus_active set). 406 * The list is refilled from the pv_hashed_list_zone if it becomes empty. 407 */ 408pv_rooted_entry_t pv_hash_free_list = PV_ROOTED_ENTRY_NULL; /* free list at SPLVM */ 409pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL; 410pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL; 411decl_simple_lock_data(, pv_hashed_free_list_lock) 412 decl_simple_lock_data(, pv_hashed_kern_free_list_lock) 413 decl_simple_lock_data(, pv_hash_table_lock) 414 415int pv_free_count = 0; 416int pv_hashed_free_count = 0; 417int pv_kern_free_count = 0; 418int pv_hashed_kern_free_count = 0; 419#define PV_HASHED_LOW_WATER_MARK 5000 420#define PV_HASHED_KERN_LOW_WATER_MARK 100 421#define PV_HASHED_ALLOC_CHUNK 2000 422#define PV_HASHED_KERN_ALLOC_CHUNK 50 423thread_call_t mapping_adjust_call; 424static thread_call_data_t mapping_adjust_call_data; 425uint32_t mappingrecurse = 0; 426 427#define PV_HASHED_ALLOC(pvh_e) { \ 428 simple_lock(&pv_hashed_free_list_lock); \ 429 if ((pvh_e = pv_hashed_free_list) != 0) { \ 430 pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \ 431 pv_hashed_free_count--; \ 432 if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) \ 433 if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \ 434 thread_call_enter(mapping_adjust_call); \ 435 } \ 436 simple_unlock(&pv_hashed_free_list_lock); \ 437} 438 439#define PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) { \ 440 simple_lock(&pv_hashed_free_list_lock); \ 441 pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list; \ 442 pv_hashed_free_list = pvh_eh; \ 443 pv_hashed_free_count += pv_cnt; \ 444 simple_unlock(&pv_hashed_free_list_lock); \ 445} 446 447#define PV_HASHED_KERN_ALLOC(pvh_e) { \ 448 simple_lock(&pv_hashed_kern_free_list_lock); \ 449 if ((pvh_e = pv_hashed_kern_free_list) != 0) { \ 450 pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \ 451 pv_hashed_kern_free_count--; \ 452 if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) \ 453 if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \ 454 thread_call_enter(mapping_adjust_call); \ 455 } \ 456 simple_unlock(&pv_hashed_kern_free_list_lock); \ 457} 458 459#define PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) { \ 460 simple_lock(&pv_hashed_kern_free_list_lock); \ 461 pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list; \ 462 pv_hashed_kern_free_list = pvh_eh; \ 463 pv_hashed_kern_free_count += pv_cnt; \ 464 simple_unlock(&pv_hashed_kern_free_list_lock); \ 465} 466 467zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */ 468 469#define pvhash(idx) (&pv_hash_table[idx]) 470 471/** Useful Macros */ 472#define pa_index(pa) (atop(pa)) 473#define pai_to_pvh(pai) (&pv_head_hash_table[pai - atop(gPhysBase)]) 474 475/* 476 * Each entry in the pv_head_table is locked by a full spinlock in the 477 * pv_lock_table. The lock bits are accessed by the physical 478 * address of the page they lock. 479 */ 480 481char *pv_lock_table; /* pointer to array of bits */ 482#define pv_lock_table_size(n) (((n) * sizeof(uint32_t))) 483 484char *pv_hash_lock_table; 485#define pv_hash_lock_table_size(n) (((n) * sizeof(uint32_t))) 486 487/* 488 * Locking protocols 489 */ 490 491#define bit_lock(pai, l) //lck_spin_lock((uint32_t*)(l) + pai); 492#define bit_unlock(pai, l) //lck_spin_unlock((uint32_t*)(l) + pai); 493 494#define lock_pvh_pai(pai) bit_lock(pai - atop(gPhysBase), (void *)pv_lock_table) 495#define unlock_pvh_pai(pai) bit_unlock(pai - atop(gPhysBase), (void *)pv_lock_table) 496 497#define lock_hash_hash(hash) bit_lock(hash, (void *)pv_hash_lock_table) 498#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table) 499 500#define LOCK_PV_HASH(hash) lock_hash_hash(hash) 501#define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash) 502 503#define LOCK_PVH(index) { \ 504 mp_disable_preemption(); \ 505 lock_pvh_pai(index); \ 506} 507 508#define UNLOCK_PVH(index) { \ 509 unlock_pvh_pai(index); \ 510 mp_enable_preemption(); \ 511} 512 513/** ASID stuff */ 514 515#define KERNEL_ASID_PID 0 516 517static vm_offset_t pm_asid_hint = KERNEL_ASID_PID + 1; 518static u_long pm_asid_bitmap[256 / (sizeof(u_long) * 8)]; 519 520static u_long pm_asid_max = 255; 521static u_long pm_asids_free = 254; /* One is reserved by the Kernel ASID */ 522 523#define __BITMAP_SET(bm, n) \ 524 ((bm)[(n) / (8*sizeof(bm[0]))] |= 1LU << ((n) % (8*sizeof(bm[0])))) 525#define __BITMAP_CLR(bm, n) \ 526 ((bm)[(n) / (8*sizeof(bm[0]))] &= ~(1LU << ((n) % (8*sizeof(bm[0]))))) 527#define __BITMAP_ISSET_P(bm, n) \ 528 (((bm)[(n) / (8*sizeof(bm[0]))] & (1LU << ((n) % (8*sizeof(bm[0]))))) != 0) 529 530#define TLBINFO_ASID_MARK_USED(ti, asid) \ 531 __BITMAP_SET((ti), (asid)) 532#define TLBINFO_ASID_INUSE_P(ti, asid) \ 533 __BITMAP_ISSET_P((ti), (asid)) 534 535/** Template PTEs */ 536 537/* 538 * Protection flags for various requested VM definitions, all of them are in here. 539 * These are per ARMv6/ARM11JZF-S defintions. 540 */ 541arm_l2_t arm_pte_prot_templates[] = { 542 {.l2.nx = TRUE,.l2.ap = 0x00,.l2.apx = 0}, /* Privileged --- User --- */ 543 {.l2.nx = TRUE,.l2.ap = 0x01,.l2.apx = 0}, /* Privileged RW- User --- */ 544 {.l2.nx = TRUE,.l2.ap = 0x02,.l2.apx = 0}, /* Privileged RW- User R-- */ 545 {.l2.nx = TRUE,.l2.ap = 0x03,.l2.apx = 0}, /* Privileged RW- User RW- */ 546 547 {.l2.nx = FALSE,.l2.ap = 0x00,.l2.apx = 0}, /* Privileged --X User --X */ 548 {.l2.nx = FALSE,.l2.ap = 0x01,.l2.apx = 0}, /* Privileged RWX User --X */ 549 {.l2.nx = FALSE,.l2.ap = 0x02,.l2.apx = 0}, /* Privileged RWX User R-X */ 550 {.l2.nx = FALSE,.l2.ap = 0x03,.l2.apx = 0}, /* Privileged RWX User RWX */ 551 552 {.l2.nx = TRUE,.l2.ap = 0x00,.l2.apx = 1}, /* Privileged --- User --- */ 553 {.l2.nx = TRUE,.l2.ap = 0x01,.l2.apx = 1}, /* Privileged R-- User --- */ 554 {.l2.nx = TRUE,.l2.ap = 0x02,.l2.apx = 1}, /* Privileged R-- User R-- */ 555 {.l2.nx = TRUE,.l2.ap = 0x03,.l2.apx = 1}, /* Privileged R-- User R-- */ 556 557 {.l2.nx = FALSE,.l2.ap = 0x00,.l2.apx = 1}, /* Privileged --X User --X */ 558 {.l2.nx = FALSE,.l2.ap = 0x01,.l2.apx = 1}, /* Privileged R-X User --X */ 559 {.l2.nx = FALSE,.l2.ap = 0x02,.l2.apx = 1}, /* Privileged R-X User R-X */ 560 {.l2.nx = FALSE,.l2.ap = 0x03,.l2.apx = 1}, /* Privileged R-X User R-X */ 561}; 562 563uint64_t pmap_pv_hashlist_walks = 0; 564uint64_t pmap_pv_hashlist_cnts = 0; 565uint32_t pmap_pv_hashlist_max = 0; 566 567unsigned int inuse_ptepages_count = 0; 568unsigned int bootstrap_wired_pages = 0; 569int pt_fake_zone_index = -1; 570 571uint32_t alloc_ptepages_count __attribute__ ((aligned(8))) = 0LL; /* aligned for atomic access */ 572extern uint32_t pmap_asid_ncpus; 573 574/* 575 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 576 * !!!!!!!! Make SURE this remains in sync with arm_pte_prot_templates. !!!!!!!!! 577 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 578 */ 579typedef enum { 580 ARM_PTE_PROT_KERNEL_NONE_USER_NONE, 581 ARM_PTE_PROT_KERNEL_RW_USER_NONE, 582 ARM_PTE_PROT_KERNEL_RW_USER_R, 583 ARM_PTE_PROT_KERNEL_RW_USER_RW, 584 ARM_PTE_PROT_KERNEL_X_USER_X, 585 ARM_PTE_PROT_KERNEL_RWX_USER_X, 586 ARM_PTE_PROT_KERNEL_RWX_USER_RX, 587 ARM_PTE_PROT_KERNEL_RWX_USER_RWX, 588 ARM_PTE_PROT_KERNEL_NONE_USER_NONE_2, 589 ARM_PTE_PROT_KERNEL_R_USER_NONE, 590 ARM_PTE_PROT_KERNEL_R_USER_R, 591 ARM_PTE_PROT_KERNEL_R_USER_R_2, 592 ARM_PTE_PROT_KERNEL_X_USER_X_2, 593 ARM_PTE_PROT_KERNEL_RX_USER_X, 594 ARM_PTE_PROT_KERNEL_RX_USER_X_2, 595 ARM_PTE_PROT_KERNEL_RX_USER_RX, 596 ARM_PTE_PROT_KERNEL_RX_USER_RX_2, 597} arm_prot_pte_definitions; 598 599/* 600 * Type Extension bits for ARM V6 and V7 MMU 601 * 602 * TEX C B Shared 603 * 000 0 0 Strong order yes 604 * 000 0 1 Shared device yes 605 * 000 1 0 write through, no write alloc S-bit 606 * 000 1 1 write back, no write alloc S-bit 607 * 001 0 0 non-cacheable S-bit 608 * 001 0 1 reserved 609 * 001 1 0 reserved 610 * 001 1 1 write back, write alloc S-bit 611 * 010 0 0 Non-shared device no 612 * 010 0 1 reserved 613 * 010 1 X reserved 614 * 011 X X reserved 615 * 1BB A A BB for internal, AA for external S-bit 616 * 617 * BB internal cache 618 * 0 0 Non-cacheable non-buffered 619 * 0 1 Write back, write alloc, buffered 620 * 1 0 Write through, no write alloc, buffered 621 * (non-cacheable for MPCore) 622 * 1 1 Write back, no write alloc, buffered 623 * (write back, write alloc for MPCore) 624 * 625 * AA external cache 626 * 0 0 Non-cacheable non-buffered 627 * 0 1 Write back, write alloc, buffered 628 * 1 0 Write through, no write alloc, buffered 629 * 1 1 Write back, no write alloc, buffered 630 */ 631#define ARM_L2_C_BIT 0x00000004 632#define ARM_L2_B_BIT 0x00000008 633#define ARM_L2_4KB_TEX(x) ((x & 0x7) << 6) /* Type Extension */ 634 635#define ARM_CACHEBIT_NONE_NO_BUFFERED 0 636#define ARM_CACHEBIT_WB_WA_BUFFERED 1 637#define ARM_CACHEBIT_WT_NWA_BUFFERED 2 638#define ARM_CACHEBIT_WB_NWA_BUFFERED 3 639 640#define ARM_L2_TEX_000 0 641#define ARM_L2_TEX_001 1 642#define ARM_L2_TEX_010 2 643#define ARM_L2_TEX_011 3 644#define ARM_L2_TEX_100 4 645#define ARM_L2_TEX_101 5 646#define ARM_L2_TEX_110 6 647#define ARM_L2_TEX_111 7 648 649/** Functions */ 650 651extern int pt_fake_zone_index; 652static inline void PMAP_ZINFO_PALLOC(pmap_t pmap, vm_size_t bytes) 653{ 654 thread_t thr = current_thread(); 655 task_t task; 656 zinfo_usage_t zinfo; 657 658 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes); 659 660 if (pt_fake_zone_index != -1 && (task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) 661 OSAddAtomic64(bytes, (int64_t *) & zinfo[pt_fake_zone_index].alloc); 662} 663 664static inline void PMAP_ZINFO_PFREE(pmap_t pmap, vm_size_t bytes) 665{ 666 thread_t thr = current_thread(); 667 task_t task; 668 zinfo_usage_t zinfo; 669 670 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes); 671 672 if (pt_fake_zone_index != -1 && (task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) 673 OSAddAtomic64(bytes, (int64_t *) & zinfo[pt_fake_zone_index].free); 674} 675 676static inline void PMAP_ZINFO_SALLOC(pmap_t pmap, vm_size_t bytes) 677{ 678 pmap_ledger_credit(pmap, task_ledgers.tkm_shared, bytes); 679} 680 681static inline void PMAP_ZINFO_SFREE(pmap_t pmap, vm_size_t bytes) 682{ 683 pmap_ledger_debit(pmap, task_ledgers.tkm_shared, bytes); 684} 685 686static inline uint32_t pvhashidx(pmap_t pmap, vm_map_offset_t va) 687{ 688 return ((uint32_t) (uintptr_t) pmap ^ ((uint32_t) (va >> PAGE_SHIFT) & 0xFFFFFFFF)) & npvhash; 689} 690 691static inline void pv_hash_add(pv_hashed_entry_t pvh_e, pv_rooted_entry_t pv_h) 692{ 693 pv_hashed_entry_t *hashp; 694 int pvhash_idx; 695 696 CHK_NPVHASH(); 697 pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va); 698 LOCK_PV_HASH(pvhash_idx); 699 insque(&pvh_e->qlink, &pv_h->qlink); 700 hashp = pvhash(pvhash_idx); 701#if PV_DEBUG 702 if (NULL == hashp) 703 panic("pv_hash_add(%p) null hash bucket", pvh_e); 704#endif 705 pvh_e->nexth = *hashp; 706 *hashp = pvh_e; 707 UNLOCK_PV_HASH(pvhash_idx); 708} 709 710/* 711 * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain. 712 * properly deals with the anchor. 713 * must be called with the hash locked, does not unlock it 714 */ 715 716static inline void pmap_pvh_unlink(pv_hashed_entry_t pvh) 717{ 718 pv_hashed_entry_t curh; 719 pv_hashed_entry_t *pprevh; 720 int pvhash_idx; 721 722 CHK_NPVHASH(); 723 pvhash_idx = pvhashidx(pvh->pmap, pvh->va); 724 725 pprevh = pvhash(pvhash_idx); 726 727#if PV_DEBUG 728 if (NULL == *pprevh) 729 panic("pvh_unlink null anchor"); /* JK DEBUG */ 730#endif 731 curh = *pprevh; 732 733 while (PV_HASHED_ENTRY_NULL != curh) { 734 if (pvh == curh) 735 break; 736 pprevh = &curh->nexth; 737 curh = curh->nexth; 738 } 739 if (PV_HASHED_ENTRY_NULL == curh) 740 panic("pmap_pvh_unlink no pvh"); 741 *pprevh = pvh->nexth; 742 return; 743} 744 745static inline void pv_hash_remove(pv_hashed_entry_t pvh_e) 746{ 747 int pvhash_idx; 748 749 CHK_NPVHASH(); 750 pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va); 751 LOCK_PV_HASH(pvhash_idx); 752 remque(&pvh_e->qlink); 753 pmap_pvh_unlink(pvh_e); 754 UNLOCK_PV_HASH(pvhash_idx); 755} 756 757static inline boolean_t popcnt1(uint64_t distance) 758{ 759 return ((distance & (distance - 1)) == 0); 760} 761 762/* 763 * Routines to handle suppression of/recovery from some forms of pagetable corruption 764 * incidents observed in the field. These can be either software induced (wild 765 * stores to the mapwindows where applicable, use after free errors 766 * (typically of pages addressed physically), mis-directed DMAs etc., or due 767 * to DRAM/memory hierarchy/interconnect errors. Given the theoretical rarity of these errors, 768 * the recording mechanism is deliberately not MP-safe. The overarching goal is to 769 * still assert on potential software races, but attempt recovery from incidents 770 * identifiable as occurring due to issues beyond the control of the pmap module. 771 * The latter includes single-bit errors and malformed pagetable entries. 772 * We currently limit ourselves to recovery/suppression of one incident per 773 * PMAP_PAGETABLE_CORRUPTION_INTERVAL seconds, and details of the incident 774 * are logged. 775 * Assertions are not suppressed if kernel debugging is enabled. (DRK 09) 776 */ 777 778typedef enum { 779 PTE_VALID = 0x0, 780 PTE_INVALID = 0x1, 781 PTE_RSVD = 0x2, 782 PTE_SUPERVISOR = 0x4, 783 PTE_BITFLIP = 0x8, 784 PV_BITFLIP = 0x10, 785 PTE_INVALID_CACHEABILITY = 0x20 786} pmap_pagetable_corruption_t; 787 788typedef enum { 789 ROOT_PRESENT = 0, 790 ROOT_ABSENT = 1 791} pmap_pv_assertion_t; 792 793typedef enum { 794 PMAP_ACTION_IGNORE = 0x0, 795 PMAP_ACTION_ASSERT = 0x1, 796 PMAP_ACTION_RETRY = 0x2, 797 PMAP_ACTION_RETRY_RELOCK = 0x4 798} pmap_pagetable_corruption_action_t; 799 800#define PMAP_PAGETABLE_CORRUPTION_INTERVAL (6ULL * 3600ULL) 801extern uint64_t pmap_pagetable_corruption_interval_abstime; 802 803extern uint32_t pmap_pagetable_corruption_incidents; 804#define PMAP_PAGETABLE_CORRUPTION_MAX_LOG (8) 805typedef struct { 806 pmap_pv_assertion_t incident; 807 pmap_pagetable_corruption_t reason; 808 pmap_pagetable_corruption_action_t action; 809 pmap_t pmap; 810 vm_map_offset_t vaddr; 811 pt_entry_t pte; 812 ppnum_t ppn; 813 pmap_t pvpmap; 814 vm_map_offset_t pvva; 815 uint64_t abstime; 816} pmap_pagetable_corruption_record_t; 817 818pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[PMAP_PAGETABLE_CORRUPTION_MAX_LOG]; 819uint32_t pmap_pagetable_corruption_incidents; 820uint64_t pmap_pagetable_corruption_last_abstime = (~(0ULL) >> 1); 821uint64_t pmap_pagetable_corruption_interval_abstime; 822thread_call_t pmap_pagetable_corruption_log_call; 823static thread_call_data_t pmap_pagetable_corruption_log_call_data; 824boolean_t pmap_pagetable_corruption_timeout = FALSE; 825 826static inline void pmap_pagetable_corruption_log(pmap_pv_assertion_t incident, pmap_pagetable_corruption_t suppress_reason, pmap_pagetable_corruption_action_t action, pmap_t pmap, vm_map_offset_t vaddr, pt_entry_t * ptep, ppnum_t ppn, pmap_t pvpmap, vm_map_offset_t pvva) 827{ 828 uint32_t pmap_pagetable_corruption_log_index; 829 pmap_pagetable_corruption_log_index = pmap_pagetable_corruption_incidents++ % PMAP_PAGETABLE_CORRUPTION_MAX_LOG; 830 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].incident = incident; 831 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].reason = suppress_reason; 832 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].action = action; 833 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pmap = pmap; 834 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].vaddr = vaddr; 835 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pte = *ptep; 836 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].ppn = ppn; 837 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvpmap = pvpmap; 838 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvva = pvva; 839 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].abstime = mach_absolute_time(); 840 /* 841 * Asynchronously log 842 */ 843 thread_call_enter(pmap_pagetable_corruption_log_call); 844} 845 846static inline pmap_pagetable_corruption_action_t pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t * ppnp, pt_entry_t * ptep, pmap_pv_assertion_t incident) 847{ 848 pmap_pagetable_corruption_action_t action = PMAP_ACTION_ASSERT; 849 pmap_pagetable_corruption_t suppress_reason = PTE_VALID; 850 ppnum_t suppress_ppn = 0; 851 pt_entry_t cpte = *ptep; 852 ppnum_t cpn = pa_index((cpte) & L2_ADDR_MASK); 853 ppnum_t ppn = *ppnp; 854 pv_rooted_entry_t pv_h = pai_to_pvh((ppn)); 855 pv_rooted_entry_t pv_e = pv_h; 856 uint32_t bitdex; 857 pmap_t pvpmap = pv_h->pmap; 858 vm_map_offset_t pvva = pv_h->va; 859 boolean_t ppcd = FALSE; 860 861 /* 862 * Ideally, we'd consult the Mach VM here to definitively determine 863 * * the nature of the mapping for this address space and address. 864 * * As that would be a layering violation in this context, we 865 * * use various heuristics to recover from single bit errors, 866 * * malformed pagetable entries etc. These are not intended 867 * * to be comprehensive. 868 */ 869 870 /* 871 * Correct potential single bit errors in either (but not both) element 872 * of the PV 873 */ 874 do { 875 if ((popcnt1((uintptr_t) pv_e->pmap ^ (uintptr_t) pmap) && pv_e->va == vaddr) || (pv_e->pmap == pmap && popcnt1(pv_e->va ^ vaddr))) { 876 pv_e->pmap = pmap; 877 pv_e->va = vaddr; 878 suppress_reason = PV_BITFLIP; 879 action = PMAP_ACTION_RETRY; 880 goto pmap_cpc_exit; 881 } 882 } while (((pv_e = (pv_rooted_entry_t) queue_next(&pv_e->qlink))) && (pv_e != pv_h)); 883 884 /* 885 * Discover root entries with a Hamming 886 * * distance of 1 from the supplied 887 * * physical page frame. 888 */ 889 for (bitdex = 0; bitdex < (sizeof(ppnum_t) << 3); bitdex++) { 890 ppnum_t npn = cpn ^ (ppnum_t) (1ULL << bitdex); 891 { 892 pv_rooted_entry_t npv_h = pai_to_pvh((npn)); 893 if (npv_h->va == vaddr && npv_h->pmap == pmap) { 894 suppress_reason = PTE_BITFLIP; 895 suppress_ppn = npn; 896 action = PMAP_ACTION_RETRY_RELOCK; 897 UNLOCK_PVH((ppn)); 898 *ppnp = npn; 899 goto pmap_cpc_exit; 900 } 901 } 902 } 903 904 if (pmap == kernel_pmap) { 905 action = PMAP_ACTION_ASSERT; 906 goto pmap_cpc_exit; 907 } 908 909 /* 910 * Check for malformed/inconsistent entries 911 */ 912 913 if ((pmap != kernel_pmap) && ((cpte & L2_ACCESS_USER) == 0)) { 914 action = PMAP_ACTION_IGNORE; 915 suppress_reason = PTE_SUPERVISOR; 916 } 917 pmap_cpc_exit: 918 PE_parse_boot_argn("-pmap_pagetable_corruption_deassert", &ppcd, sizeof(ppcd)); 919 920 if (debug_boot_arg && !ppcd) { 921 action = PMAP_ACTION_ASSERT; 922 } 923 924 if ((mach_absolute_time() - pmap_pagetable_corruption_last_abstime) < pmap_pagetable_corruption_interval_abstime) { 925 action = PMAP_ACTION_ASSERT; 926 pmap_pagetable_corruption_timeout = TRUE; 927 } else { 928 pmap_pagetable_corruption_last_abstime = mach_absolute_time(); 929 } 930 pmap_pagetable_corruption_log(incident, suppress_reason, action, pmap, vaddr, &cpte, *ppnp, pvpmap, pvva); 931 return action; 932} 933 934/* 935 * Remove pv list entry. 936 * Called with pv_head_table entry locked. 937 * Returns pv entry to be freed (or NULL). 938 */ 939static inline __attribute__ ((always_inline)) pv_hashed_entry_t pmap_pv_remove(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t * ppnp, pt_entry_t * pte) 940{ 941 pv_hashed_entry_t pvh_e; 942 pv_rooted_entry_t pv_h; 943 pv_hashed_entry_t *pprevh; 944 int pvhash_idx; 945 uint32_t pv_cnt; 946 ppnum_t ppn; 947 948 pmap_pv_remove_retry: 949 ppn = *ppnp; 950 pvh_e = PV_HASHED_ENTRY_NULL; 951 pv_h = pai_to_pvh((ppn)); 952 953 if (__improbable(pv_h->pmap == PMAP_NULL)) { 954 pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_ABSENT); 955 if (pac == PMAP_ACTION_IGNORE) 956 goto pmap_pv_remove_exit; 957 else if (pac == PMAP_ACTION_ASSERT) 958 panic("pmap_pv_remove(%p,0x%x,0x%x, 0x%x, %p, %p): null pv_list!", pmap, vaddr, ppn, *pte, ppnp, pte); 959 else if (pac == PMAP_ACTION_RETRY_RELOCK) { 960 LOCK_PVH((*ppnp)); 961 goto pmap_pv_remove_retry; 962 } else if (pac == PMAP_ACTION_RETRY) 963 goto pmap_pv_remove_retry; 964 } 965 966 if (pv_h->va == vaddr && pv_h->pmap == pmap) { 967 /* 968 * Header is the pv_rooted_entry. 969 * We can't free that. If there is a queued 970 * entry after this one we remove that 971 * from the ppn queue, we remove it from the hash chain 972 * and copy it to the rooted entry. Then free it instead. 973 */ 974 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink); 975 if (pv_h != (pv_rooted_entry_t) pvh_e) { 976 /* 977 * Entry queued to root, remove this from hash 978 * and install as new root. 979 */ 980 CHK_NPVHASH(); 981 pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va); 982 LOCK_PV_HASH(pvhash_idx); 983 remque(&pvh_e->qlink); 984 pprevh = pvhash(pvhash_idx); 985 if (PV_HASHED_ENTRY_NULL == *pprevh) { 986 panic("pmap_pv_remove(%p,0x%x,0x%x): " "empty hash, removing rooted", pmap, vaddr, ppn); 987 } 988 pmap_pvh_unlink(pvh_e); 989 UNLOCK_PV_HASH(pvhash_idx); 990 pv_h->pmap = pvh_e->pmap; 991 pv_h->va = pvh_e->va; /* dispose of pvh_e */ 992 } else { 993 /* 994 * none queued after rooted 995 */ 996 pv_h->pmap = PMAP_NULL; 997 pvh_e = PV_HASHED_ENTRY_NULL; 998 } 999 } else { 1000 /* 1001 * not removing rooted pv. find it on hash chain, remove from 1002 * ppn queue and hash chain and free it 1003 */ 1004 CHK_NPVHASH(); 1005 pvhash_idx = pvhashidx(pmap, vaddr); 1006 LOCK_PV_HASH(pvhash_idx); 1007 pprevh = pvhash(pvhash_idx); 1008 if (PV_HASHED_ENTRY_NULL == *pprevh) { 1009 panic("pmap_pv_remove(%p,0x%x,0x%x, 0x%x, %p): empty hash", pmap, vaddr, ppn, *pte, pte); 1010 } 1011 pvh_e = *pprevh; 1012 pmap_pv_hashlist_walks++; 1013 pv_cnt = 0; 1014 while (PV_HASHED_ENTRY_NULL != pvh_e) { 1015 pv_cnt++; 1016 if (pvh_e->pmap == pmap && pvh_e->va == vaddr && pvh_e->ppn == ppn) 1017 break; 1018 pprevh = &pvh_e->nexth; 1019 pvh_e = pvh_e->nexth; 1020 } 1021 1022 if (PV_HASHED_ENTRY_NULL == pvh_e) { 1023 pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_PRESENT); 1024 1025 if (pac == PMAP_ACTION_ASSERT) 1026 panic("pmap_pv_remove(%p, 0x%x, 0x%x, 0x%x, %p, %p): pv not on hash, head: %p, 0x%x", pmap, vaddr, ppn, *pte, ppnp, pte, pv_h->pmap, pv_h->va); 1027 else { 1028 UNLOCK_PV_HASH(pvhash_idx); 1029 if (pac == PMAP_ACTION_RETRY_RELOCK) { 1030 LOCK_PVH(ppn_to_pai(*ppnp)); 1031 goto pmap_pv_remove_retry; 1032 } else if (pac == PMAP_ACTION_RETRY) { 1033 goto pmap_pv_remove_retry; 1034 } else if (pac == PMAP_ACTION_IGNORE) { 1035 goto pmap_pv_remove_exit; 1036 } 1037 } 1038 } 1039 1040 pmap_pv_hashlist_cnts += pv_cnt; 1041 if (pmap_pv_hashlist_max < pv_cnt) 1042 pmap_pv_hashlist_max = pv_cnt; 1043 *pprevh = pvh_e->nexth; 1044 remque(&pvh_e->qlink); 1045 UNLOCK_PV_HASH(pvhash_idx); 1046 } 1047 pmap_pv_remove_exit: 1048 return pvh_e; 1049} 1050 1051__private_extern__ void pmap_pagetable_corruption_msg_log(int (*log_func) (const char *fmt, ...) __printflike(1, 2)) 1052{ 1053 if (pmap_pagetable_corruption_incidents > 0) { 1054 int i, e = MIN(pmap_pagetable_corruption_incidents, PMAP_PAGETABLE_CORRUPTION_MAX_LOG); 1055 (*log_func) ("%u pagetable corruption incident(s) detected, timeout: %u\n", pmap_pagetable_corruption_incidents, pmap_pagetable_corruption_timeout); 1056 for (i = 0; i < e; i++) { 1057 (*log_func) ("Incident 0x%x, reason: 0x%x, action: 0x%x, time: 0x%llx\n", pmap_pagetable_corruption_records[i].incident, pmap_pagetable_corruption_records[i].reason, pmap_pagetable_corruption_records[i].action, pmap_pagetable_corruption_records[i].abstime); 1058 } 1059 } 1060} 1061 1062static inline void pmap_pagetable_corruption_log_setup(void) 1063{ 1064 if (pmap_pagetable_corruption_log_call == NULL) { 1065 nanotime_to_absolutetime(20000, 0, &pmap_pagetable_corruption_interval_abstime); 1066 thread_call_setup(&pmap_pagetable_corruption_log_call_data, (thread_call_func_t) pmap_pagetable_corruption_msg_log, (thread_call_param_t) & printf); 1067 pmap_pagetable_corruption_log_call = &pmap_pagetable_corruption_log_call_data; 1068 } 1069} 1070 1071/** 1072 * pmap_vm_prot_to_page_flags 1073 */ 1074uint32_t pmap_vm_prot_to_page_flags(pmap_t pmap, vm_prot_t prot, int wired, int nx) 1075{ 1076 arm_l2_t *current_l2 = &arm_pte_prot_templates[0]; 1077 pt_entry_t pte = 0; 1078 1079 /* 1080 * Pmaps other than the kernel one will always have user accessible pages. 1081 */ 1082 if (pmap != kernel_pmap) 1083 pte |= L2_ACCESS_USER; 1084 pte |= L2_ACCESS_PRW; 1085 1086 /* 1087 * Enforce Read-Write if necessary. 1088 */ 1089 if (prot & VM_PROT_WRITE) 1090 pte &= ~(L2_ACCESS_APX); /* APX-bit, RW? */ 1091 else 1092 pte |= (L2_ACCESS_APX); /* APX-bit, R-? */ 1093 1094 /* 1095 * Enforce XN if necessary. 1096 */ 1097 if (!(prot & VM_PROT_EXECUTE)) 1098 pte |= L2_NX_BIT; /* XN-bit, R?X */ 1099 1100 return pte; 1101} 1102 1103/** 1104 * phys_attribute_clear and friends. These suck. 1105 */ 1106void phys_attribute_clear(ppnum_t pn, int bits) 1107{ 1108 int pai; 1109 pv_rooted_entry_t pv_h; 1110 1111 assert(pn != vm_page_fictitious_addr); 1112 1113 pv_h = pai_to_pvh(pn); 1114 pv_h->flags &= ~bits; 1115 1116 return; 1117} 1118 1119int phys_attribute_test(ppnum_t pn, int bits) 1120{ 1121 int pai; 1122 pv_rooted_entry_t pv_h; 1123 1124 assert(pn != vm_page_fictitious_addr); 1125 1126 pv_h = pai_to_pvh(pn); 1127 if ((pv_h->flags & bits) == (unsigned int)bits) 1128 return bits; 1129 1130 return (pv_h->flags & bits); 1131} 1132 1133void phys_attribute_set(ppnum_t pn, int bits) 1134{ 1135 int pai; 1136 pv_rooted_entry_t pv_h; 1137 1138 assert(pn != vm_page_fictitious_addr); 1139 1140 pv_h = pai_to_pvh(pn); 1141 pv_h->flags |= bits; 1142 1143 return; 1144} 1145 1146/** 1147 * pmap_adjust_unnest_parameters 1148 * 1149 * Invoked by the Mach VM to determine the platform specific unnest region. This 1150 * is not used on ARM platforms. 1151 */ 1152boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t * s, vm_map_offset_t * e) 1153{ 1154 return FALSE; 1155} 1156 1157/** 1158 * pmap_attributes 1159 * 1160 * Set/Get special memory attributes; Set/Get is not implemented. 1161 */ 1162kern_return_t pmap_attribute(pmap_t pmap, vm_offset_t address, vm_size_t size, vm_machine_attribute_t atte, vm_machine_attribute_val_t * attrp) 1163{ 1164 return KERN_INVALID_ADDRESS; 1165} 1166 1167/** 1168 * pmap_attribute_cache_sync 1169 * 1170 * Flush appropriate cache based on page number sent. 1171 */ 1172kern_return_t pmap_attribute_cache_sync(ppnum_t pn, vm_size_t size, vm_machine_attribute_t attr, vm_machine_attribute_val_t * attrp) 1173{ 1174 Debugger("pmap_attribute_cache_sync"); 1175 return KERN_SUCCESS; 1176} 1177 1178/** 1179 * pmap_get_cache_attributes 1180 */ 1181unsigned int pmap_get_cache_attributes(ppnum_t pn) { 1182 /* If the pmap subsystem isn't up, just assume writethrough cache. */ 1183 if(!pmap_initialized) 1184 return (ARM_CACHEBIT_WT_NWA_BUFFERED << 2) | (ARM_L2_4KB_TEX(ARM_L2_TEX_100 | ARM_CACHEBIT_WT_NWA_BUFFERED)); 1185 1186 /* If it's out of memory, assume it's not cacheable at all. */ 1187 if(!pmap_valid_page(pn)) 1188 return 0; 1189 1190 assert(pn != vm_page_fictitious_addr); 1191 pv_rooted_entry_t pv_h = pai_to_pvh(pn); 1192 assert(pv_h); 1193 1194 unsigned int attr = pv_h->flags; 1195 unsigned int template = 0; 1196 1197 if (attr & PMAP_OSPTE_TYPE_NOCACHE) 1198 /* No cache, strongly ordered memory. */ 1199 template |= 0; 1200 else 1201 /* Assume writethrough, no write allocate for now. */ 1202 template |= (ARM_CACHEBIT_WT_NWA_BUFFERED << 2) | (ARM_L2_4KB_TEX(ARM_L2_TEX_100 | ARM_CACHEBIT_WT_NWA_BUFFERED)); 1203 1204 return template; 1205} 1206 1207 1208/** 1209 * pmap_cache_attributes 1210 */ 1211unsigned int pmap_cache_attributes(ppnum_t pn) 1212{ 1213 if (!pmap_get_cache_attributes(pn) & ARM_L2_C_BIT) 1214 return (VM_WIMG_IO); 1215 else 1216 return (VM_WIMG_COPYBACK); 1217} 1218 1219/** 1220 * pmap_clear_noencrypt 1221 */ 1222void pmap_clear_noencrypt(ppnum_t pn) 1223{ 1224 if (!pmap_initialized) 1225 return; 1226 phys_attribute_clear(pn, PMAP_OSPTE_TYPE_NOENCRYPT); 1227} 1228 1229/** 1230 * pmap_is_noencrypt 1231 */ 1232boolean_t pmap_is_noencrypt(ppnum_t pn) 1233{ 1234 if (!pmap_initialized) 1235 return FALSE; 1236 return (phys_attribute_test(pn, PMAP_OSPTE_TYPE_NOENCRYPT)); 1237} 1238 1239/** 1240 * pmap_set_noencrypt 1241 */ 1242void pmap_set_noencrypt(ppnum_t pn) 1243{ 1244 if (!pmap_initialized) 1245 return; 1246 phys_attribute_set(pn, PMAP_OSPTE_TYPE_NOENCRYPT); 1247} 1248 1249 1250/** 1251 * pmap_flush_tlbs 1252 */ 1253void 1254pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv) 1255{ 1256 unsigned int cpu; 1257 unsigned int cpu_bit; 1258 unsigned int my_cpu = cpu_number(); 1259 pmap_paddr_t ttb = pmap->pm_l1_phys; 1260 boolean_t flush_self = FALSE; 1261 boolean_t pmap_is_shared = (pmap->pm_shared || (pmap == kernel_pmap)); 1262 1263 assert((processor_avail_count < 2) || 1264 (ml_get_interrupts_enabled() && get_preemption_level() != 0)); 1265 1266 if (pmap_asid_ncpus) { 1267 pmap_asid_invalidate_all_cpus(pmap); 1268 __asm__ volatile("":::"memory"); 1269 } 1270 1271 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { 1272 if (!cpu_datap(cpu)->cpu_running) 1273 continue; 1274 uint32_t ttbr_pmap = armreg_ttbr_read() & 0xFFFFFF00; 1275 1276 /* Current pmap is active, flush it. */ 1277 if ((ttb == ttbr_pmap) || 1278 (pmap_is_shared)) { 1279 if (cpu == my_cpu) { 1280 flush_self = TRUE; 1281 continue; 1282 } 1283 1284 /* xxx broadcast IPI to all other CPUs to flush */ 1285 } 1286 } 1287 1288 /* 1289 * Flush local tlb if required. 1290 * Do this now to overlap with other processors responding. 1291 */ 1292 if (flush_self) { 1293 if (pmap_asid_ncpus) { 1294 pmap_asid_validate_cpu(pmap, my_cpu); 1295 if (pmap_is_shared) 1296 arm_tlb_flushID(); 1297 else 1298 arm_tlb_flushID_ASID(pmap->pm_asid & 0xFF); 1299 } 1300 else 1301 arm_tlb_flushID(); 1302 } 1303 1304 if (__improbable((pmap == kernel_pmap) && (flush_self != TRUE))) { 1305 panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE"); 1306 } 1307} 1308 1309/* 1310 * Update cache attributes for all extant managed mappings. 1311 * Assumes PV for this page is locked, and that the page 1312 * is managed. 1313 */ 1314 1315static uint32_t cacheability_mask = ~((ARM_L2_TEX_011 << 2) | ARM_L2_4KB_TEX(ARM_L2_TEX_111)); 1316 1317void 1318pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) { 1319 pv_rooted_entry_t pv_h, pv_e; 1320 pv_hashed_entry_t pvh_e, nexth; 1321 vm_map_offset_t vaddr; 1322 pmap_t pmap; 1323 pt_entry_t *ptep; 1324 1325 pv_h = pai_to_pvh(pn); 1326 /* 1327 * TODO: translate the PHYS_* bits to PTE bits, while they're 1328 * currently identical, they may not remain so 1329 * Potential optimization (here and in page_protect), 1330 * parallel shootdowns, check for redundant 1331 * attribute modifications. 1332 */ 1333 1334 /* 1335 * Alter attributes on all mappings 1336 */ 1337 if (pv_h->pmap != PMAP_NULL) { 1338 pv_e = pv_h; 1339 pvh_e = (pv_hashed_entry_t)pv_e; 1340 1341 do { 1342 pmap = pv_e->pmap; 1343 vaddr = pv_e->va; 1344 ptep = (pt_entry_t *)pmap_pte(pmap, vaddr); 1345 1346 if (ptep == 0) 1347 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%x kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap); 1348 1349 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink); 1350 1351 /* 1352 * Update PTE. 1353 */ 1354 pt_entry_t* cpte = (pt_entry_t*)ptep; 1355 *cpte &= cacheability_mask; 1356 *cpte |= attributes; 1357 pmap_flush_tlbs(pmap, vaddr, vaddr + PAGE_SIZE); 1358 1359 pvh_e = nexth; 1360 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h); 1361 } 1362} 1363 1364 1365/** 1366 * pmap_set_cache_attributes 1367 * 1368 * Set the specified cache attributes. 1369 */ 1370void pmap_set_cache_attributes(ppnum_t pn, unsigned int cacheattr) 1371{ 1372 unsigned int current, template = 0; 1373 int pai; 1374 1375 if (cacheattr & VM_MEM_NOT_CACHEABLE) { 1376 /* 1377 * Template of 0 is non-cacheable, strongly ordered memory. 1378 */ 1379 template &= cacheability_mask; 1380 } else { 1381 /* 1382 * Writethrough. 1383 */ 1384 if(cacheattr == VM_WIMG_WTHRU) 1385 template |= (ARM_CACHEBIT_WT_NWA_BUFFERED << 2) | (ARM_L2_4KB_TEX(ARM_L2_TEX_100 | ARM_CACHEBIT_WT_NWA_BUFFERED)); 1386 /* 1387 * Writecombine/copyback = writeback. 1388 */ 1389 else if(cacheattr == VM_WIMG_WCOMB || cacheattr == VM_WIMG_COPYBACK) 1390 template |= (ARM_CACHEBIT_WB_WA_BUFFERED << 2) | (ARM_L2_4KB_TEX(ARM_L2_TEX_100 | ARM_CACHEBIT_WB_WA_BUFFERED)); 1391 } 1392 1393 /* 1394 * On MP systems, interrupts must be enabled. 1395 */ 1396 if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) 1397 panic("interrupts must be enabled for pmap_set_cache_attributes"); 1398 1399 assert((pn != vm_page_fictitious_addr) && (pn != vm_page_guard_addr)); 1400 1401 LOCK_PVH(pai); 1402 pmap_update_cache_attributes_locked(pn, template); 1403 1404 if(cacheattr & VM_MEM_NOT_CACHEABLE) 1405 phys_attribute_set(pn, PMAP_OSPTE_TYPE_NOCACHE); 1406 else 1407 phys_attribute_clear(pn, PMAP_OSPTE_TYPE_NOCACHE); 1408 1409 UNLOCK_PVH(pai); 1410 1411 return; 1412} 1413 1414/** 1415 * compute_pmap_gc_throttle 1416 * 1417 * Unused. 1418 */ 1419void compute_pmap_gc_throttle(void *arg __unused) 1420{ 1421 return; 1422} 1423 1424/** 1425 * pmap_change_wiring 1426 * 1427 * Specify pageability. 1428 */ 1429void pmap_change_wiring(pmap_t map, vm_map_offset_t va, boolean_t wired) 1430{ 1431 pt_entry_t *pte; 1432 uint32_t pa; 1433 1434 /* 1435 * Lock the pmap. 1436 */ 1437 PMAP_LOCK(map); 1438 1439 if ((pte = (pt_entry_t *)pmap_pte(map, va)) == (pt_entry_t *) 0) 1440 panic("pmap_change_wiring: pte missing"); 1441 1442 /* 1443 * Use FVTP to get the physical PPN. This will not work with the old 1444 * pmap_extract. 1445 */ 1446 PMAP_UNLOCK(map); 1447 pa = pmap_extract(map, va); 1448 PMAP_LOCK(map); 1449 assert(pa); 1450 1451 if (wired && phys_attribute_test(pa >> PAGE_SHIFT, PMAP_OSPTE_TYPE_WIRED)) { 1452 /* 1453 * We are wiring down the mapping. 1454 */ 1455 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE); 1456 OSAddAtomic(+1, &map->pm_stats.wired_count); 1457 phys_attribute_set(pa >> PAGE_SHIFT, PMAP_OSPTE_TYPE_WIRED); 1458 } else { 1459 /* 1460 * Unwiring the mapping. 1461 */ 1462 assert(map->pm_stats.wired_count >= 1); 1463 OSAddAtomic(-1, &map->pm_stats.wired_count); 1464 phys_attribute_clear(pa >> PAGE_SHIFT, PMAP_OSPTE_TYPE_WIRED); 1465 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE); 1466 } 1467 1468 /* 1469 * Done, unlock the map. 1470 */ 1471 PMAP_UNLOCK(map); 1472 return; 1473} 1474 1475/** 1476 * pmap_tte 1477 */ 1478vm_offset_t pmap_tte(pmap_t pmap, vm_offset_t virt) 1479{ 1480 uint32_t tte_offset_begin; 1481 tte_offset_begin = pmap->pm_l1_virt; 1482 if ((tte_offset_begin + L1_SIZE) < addr_to_tte(pmap->pm_l1_virt, virt)) 1483 panic("Translation table entry extends past L1 size (base: 0x%08X)", tte_offset_begin); 1484 return addr_to_tte(pmap->pm_l1_virt, virt); 1485} 1486 1487/** 1488 * pmap_pte 1489 */ 1490vm_offset_t pmap_pte(pmap_t pmap, vm_offset_t virt) 1491{ 1492 uint32_t *tte_offset = (uint32_t *) pmap_tte(pmap, virt); 1493 uint32_t tte, pte, *ptep; 1494 1495 /* 1496 * Get the translation-table entry. 1497 */ 1498 assert(tte_offset); 1499 tte = *tte_offset; 1500 1501 /* 1502 * If the requested PTE entry is required is indeed the commonpage and 1503 * we are not the kernel pmap, quit. 1504 * 1505 * This is because the TTBCR is set to 4kB, and all higher page table 1506 * address accesses will go to the kernel. 1507 */ 1508 if (pmap != kernel_pmap && virt >= _COMM_PAGE_BASE_ADDRESS) 1509 return 0; 1510 1511 /* 1512 * Verify it's not a section mapping. 1513 */ 1514 if ((tte & ARM_PAGE_MASK_VALUE) == ARM_PAGE_SECTION) { 1515 panic("Translation table entry is a section mapping (tte %x ttep %p ttebv %x)!\n", tte, tte_offset, pmap->pm_l1_virt); 1516 } 1517 1518 /* 1519 * Clean the TTE bits off, get the address. 1520 */ 1521 pte = L1_PTE_ADDR(tte); 1522 if (!pte) 1523 return 0; 1524 1525 /* 1526 * Return the virtual mapped PTE. 1527 */ 1528 ptep = (uint32_t *) ((phys_to_virt(pte) + pte_offset(virt))); 1529 1530 return (vm_offset_t)(ptep); 1531} 1532 1533void mapping_free_prime(void) 1534{ 1535 int i; 1536 pv_hashed_entry_t pvh_e; 1537 pv_hashed_entry_t pvh_eh; 1538 pv_hashed_entry_t pvh_et; 1539 int pv_cnt; 1540 1541 pv_cnt = 0; 1542 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; 1543 for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) { 1544 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); 1545 1546 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 1547 pvh_eh = pvh_e; 1548 1549 if (pvh_et == PV_HASHED_ENTRY_NULL) 1550 pvh_et = pvh_e; 1551 pv_cnt++; 1552 } 1553 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt); 1554 1555 pv_cnt = 0; 1556 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; 1557 for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) { 1558 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); 1559 1560 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 1561 pvh_eh = pvh_e; 1562 1563 if (pvh_et == PV_HASHED_ENTRY_NULL) 1564 pvh_et = pvh_e; 1565 pv_cnt++; 1566 } 1567 PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt); 1568 1569} 1570 1571void mapping_adjust(void) 1572{ 1573 pv_hashed_entry_t pvh_e; 1574 pv_hashed_entry_t pvh_eh; 1575 pv_hashed_entry_t pvh_et; 1576 int pv_cnt; 1577 int i; 1578 1579 if (mapping_adjust_call == NULL) { 1580 pmap_pagetable_corruption_log_setup(); 1581 thread_call_setup(&mapping_adjust_call_data, (thread_call_func_t) mapping_adjust, (thread_call_param_t) NULL); 1582 mapping_adjust_call = &mapping_adjust_call_data; 1583 } 1584 1585 pv_cnt = 0; 1586 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; 1587 if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) { 1588 for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) { 1589 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); 1590 1591 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 1592 pvh_eh = pvh_e; 1593 1594 if (pvh_et == PV_HASHED_ENTRY_NULL) 1595 pvh_et = pvh_e; 1596 pv_cnt++; 1597 } 1598 PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt); 1599 } 1600 1601 pv_cnt = 0; 1602 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; 1603 if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) { 1604 for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) { 1605 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); 1606 1607 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 1608 pvh_eh = pvh_e; 1609 1610 if (pvh_et == PV_HASHED_ENTRY_NULL) 1611 pvh_et = pvh_e; 1612 pv_cnt++; 1613 } 1614 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt); 1615 } 1616 mappingrecurse = 0; 1617} 1618 1619/* 1620 * pmap_map 1621 * 1622 * Map specified virtual address range to a physical one. 1623 */ 1624vm_offset_t pmap_map(vm_offset_t virt, vm_map_offset_t start_addr, vm_map_offset_t end_addr, vm_prot_t prot, unsigned int flags) 1625{ 1626 int ps; 1627 1628 ps = PAGE_SIZE; 1629 while (start_addr < end_addr) { 1630 pmap_enter(kernel_pmap, (vm_map_offset_t) virt, (start_addr >> PAGE_SHIFT), prot, flags, FALSE, TRUE); 1631 virt += ps; 1632 start_addr += ps; 1633 } 1634 return (virt); 1635} 1636 1637/** 1638 * pmap_next_page_hi 1639 * 1640 * Allocate physical pages. 1641 */ 1642boolean_t pmap_next_page_hi(ppnum_t * pnum) 1643{ 1644 return pmap_next_page(pnum); 1645} 1646 1647/** 1648 * pmap_zero_page 1649 * 1650 * Zero a physical page. 1651 */ 1652void pmap_zero_page(ppnum_t p) 1653{ 1654 assert(p != vm_page_fictitious_addr); 1655 1656 /* 1657 * Make sure the page is valid. 1658 */ 1659 if (((p << PAGE_SHIFT) < avail_start) || ((p << PAGE_SHIFT) > avail_end)) 1660 panic("pmap_zero_page: zeroing a non-managed page, ppnum %d", p); 1661 1662 bzero((void *)phys_to_virt(p << PAGE_SHIFT), PAGE_SIZE); 1663} 1664 1665/** 1666 * pmap_clear_refmod 1667 * 1668 * Clears the referenced and modified bits as specified by the mask 1669 * of the specified physical page. 1670 */ 1671void pmap_clear_refmod(ppnum_t pn, unsigned int mask) 1672{ 1673 phys_attribute_clear(pn, mask); 1674} 1675 1676/** 1677 * io_map 1678 * 1679 * Maps an IO region and returns its virtual address. 1680 */ 1681vm_offset_t io_map(vm_offset_t phys_addr, vm_size_t size, unsigned int flags) 1682{ 1683 vm_offset_t start; 1684 1685 if (kernel_map == VM_MAP_NULL) { 1686 /* 1687 * VM is not initialized. Grab memory. 1688 */ 1689 start = virt_begin; 1690 virt_begin += round_page(size); 1691 1692 (void) pmap_map_bd(start, phys_addr, phys_addr + round_page(size), VM_PROT_READ | VM_PROT_WRITE, flags); 1693 } else { 1694 (void) kmem_alloc_pageable(kernel_map, &start, round_page(size)); 1695 (void) pmap_map(start, phys_addr, phys_addr + round_page(size), VM_PROT_READ | VM_PROT_WRITE, flags); 1696 } 1697 1698 return (start); 1699} 1700 1701vm_offset_t io_map_spec(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags) 1702{ 1703 return (io_map(phys_addr, size, flags)); 1704} 1705 1706/** 1707 * pmap_next_page 1708 * 1709 * Allocate physical pages. 1710 */ 1711boolean_t pmap_next_page(ppnum_t * addrp) 1712{ 1713 if (first_avail >= avail_end) { 1714 kprintf("pmap_next_page: ran out of possible pages, last page was 0x%08x", first_avail); 1715 return FALSE; 1716 } 1717 1718 *addrp = pa_index(first_avail); 1719 1720 /* 1721 * We lost a page. 1722 */ 1723 first_avail += PAGE_SIZE; 1724 avail_remaining--; 1725 return TRUE; 1726} 1727 1728/** 1729 * pmap_virtual_space 1730 * 1731 * Get virtual space parameters. 1732 */ 1733void pmap_virtual_space(vm_offset_t * startp, vm_offset_t * endp) 1734{ 1735 *startp = virt_begin; 1736 *endp = virt_end; 1737 kprintf("pmap_virtual_space: VM region 0x%08x - 0x%08x\n", virt_begin, virt_end); 1738} 1739 1740/** 1741 * pmap_free_pages 1742 * 1743 * Return free page count. 1744 */ 1745unsigned int pmap_free_pages(void) 1746{ 1747 return avail_remaining; 1748} 1749 1750/** 1751 * pmap_map_bd 1752 * 1753 * Enters a physical mapping. (Before the VM subsystem is up.) 1754 */ 1755boolean_t pmap_map_bd(vm_offset_t virt, vm_map_offset_t start, vm_map_offset_t end, vm_prot_t prot, unsigned int flags) 1756{ 1757 spl_t spl; 1758 1759 /* 1760 * Verify the start and end are page aligned. 1761 */ 1762 assert(!(start & PAGE_MASK)); 1763 assert(!(end & PAGE_MASK)); 1764 1765 /* 1766 * Disable interrupts and start mapping pages 1767 */ 1768 SPLVM(spl); 1769 1770 /* 1771 * Write the PTEs to memory. 1772 */ 1773 uint32_t ptep = (uint32_t) (pmap_pte(kernel_pmap, virt)); 1774 if (!ptep) 1775 panic("pmap_map_bd: Invalid kernel address"); 1776 1777 /* 1778 * Map the pages. 1779 */ 1780 l2_map_linear_range_no_cache(virt_to_phys(ptep), start, end); 1781 1782 /* 1783 * Return. 1784 */ 1785 SPLX(spl); 1786 1787 return TRUE; 1788} 1789 1790/** 1791 * pmap_pageable 1792 */ 1793void pmap_pageable(__unused pmap_t pmap, __unused vm_map_offset_t start, __unused vm_map_offset_t end, __unused boolean_t pageable) 1794{ 1795 return; 1796} 1797 1798/** 1799 * pmap_set_modify 1800 * 1801 * Set the modify bit on the specified physical page. 1802 */ 1803void pmap_set_modify(ppnum_t pn) 1804{ 1805 phys_attribute_set(pn, PMAP_OSPTE_TYPE_MODIFIED); 1806} 1807 1808/** 1809 * pmap_clear_modify 1810 * 1811 * Clear the modify bits on the specified physical page. 1812 */ 1813void pmap_clear_modify(ppnum_t pn) 1814{ 1815 phys_attribute_clear(pn, PMAP_OSPTE_TYPE_MODIFIED); 1816} 1817 1818/** 1819 * pmap_clear_reference 1820 * 1821 * Clear the reference bit on the specified physical page. 1822 */ 1823void pmap_clear_reference(ppnum_t pn) 1824{ 1825 phys_attribute_clear(pn, PMAP_OSPTE_TYPE_REFERENCED); 1826} 1827 1828/** 1829 * pmap_set_reference 1830 * 1831 * Set the reference bit on the specified physical page. 1832 */ 1833void pmap_set_reference(ppnum_t pn) 1834{ 1835 phys_attribute_set(pn, PMAP_OSPTE_TYPE_REFERENCED); 1836} 1837 1838/** 1839 * pmap_valid_page 1840 * 1841 * Is the page inside the managed zone? 1842 */ 1843boolean_t pmap_valid_page(ppnum_t p) 1844{ 1845 return (((p << PAGE_SHIFT) > avail_start) 1846 && ((p << PAGE_SHIFT) < avail_end)); 1847} 1848 1849/** 1850 * pmap_verify_free 1851 * 1852 * Verify that the page has no mappings. 1853 */ 1854boolean_t pmap_verify_free(vm_offset_t phys) 1855{ 1856 pv_rooted_entry_t pv_h; 1857 int pai; 1858 boolean_t result; 1859 1860 assert(phys != vm_page_fictitious_addr); 1861 if (!pmap_initialized) 1862 return (TRUE); 1863 1864 if (!pmap_valid_page(phys)) 1865 return (FALSE); 1866 1867 pv_h = pai_to_pvh(phys); 1868 result = (pv_h->pmap == PMAP_NULL); 1869 1870 return (result); 1871} 1872 1873/** 1874 * pmap_sync_page_data_phys 1875 * 1876 * Invalidates all of the instruction cache on a physical page and 1877 * pushes any dirty data from the data cache for the same physical page 1878 */ 1879void pmap_sync_page_data_phys(__unused ppnum_t pa) 1880{ 1881 Debugger("pmap_sync_page_data_phys"); 1882 return; 1883} 1884 1885/** 1886 * pmap_sync_page_attributes_phys(ppnum_t pa) 1887 * 1888 * Write back and invalidate all cachelines on a physical page. 1889 */ 1890void pmap_sync_page_attributes_phys(ppnum_t pa) 1891{ 1892 Debugger("pmap_sync_page_attributes_phys"); 1893 return; 1894} 1895 1896/* 1897 * Statistics routines 1898 */ 1899int pmap_resident_max(pmap_t pmap) 1900{ 1901 return ((pmap)->pm_stats.resident_max); 1902} 1903 1904int pmap_resident_count(pmap_t pmap) 1905{ 1906 return ((pmap)->pm_stats.resident_count); 1907} 1908 1909/** 1910 * pmap_disable_NX 1911 * 1912 * Disable NX on a specified pmap. 1913 */ 1914void pmap_disable_NX(pmap_t pmap) 1915{ 1916 panic("pmap_disable_NX not implemented\n"); 1917} 1918 1919extern void ovbcopy(void *from, void *to, vm_size_t len); /* TODO: Put this in a better place. */ 1920 1921/** 1922 * pmap_zero_page 1923 * 1924 * pmap_copy_page copies the specified (machine independent) 1925 * page from physical address src to physical address dst. 1926 */ 1927void pmap_copy_page(ppnum_t src, ppnum_t dst) 1928{ 1929 ovbcopy((void *)phys_to_virt(src << PAGE_SHIFT), (void *)phys_to_virt(dst << PAGE_SHIFT), PAGE_SIZE); 1930} 1931 1932/** 1933 * pmap_copy_part_page 1934 * 1935 * Copies the specified (machine independent) pages. 1936 */ 1937void pmap_copy_part_page(ppnum_t src, vm_offset_t src_offset, ppnum_t dst, vm_offset_t dst_offset, vm_size_t len) 1938{ 1939 assert((((dst << PAGE_SHIFT) & PAGE_MASK) + dst_offset + len) <= PAGE_SIZE); 1940 assert((((src << PAGE_SHIFT) & PAGE_MASK) + src_offset + len) <= PAGE_SIZE); 1941 1942 ovbcopy((void *)(phys_to_virt(src << PAGE_SHIFT) + src_offset), (void *)(phys_to_virt(dst << PAGE_SHIFT) + src_offset), len); 1943} 1944 1945/** 1946 * pmap_common_init 1947 * 1948 * Initialize common elements of pmaps. 1949 */ 1950void pmap_common_init(pmap_t pmap) 1951{ 1952 usimple_lock_init(&pmap->lock, 0); 1953 if (pmap->ledger) 1954 ledger_reference(pmap->ledger); 1955 pmap->pm_refcnt = 1; 1956 pmap->pm_nx = 0; 1957 pmap->pm_shared = FALSE; 1958 pmap->pm_stats.resident_count = 0; 1959 pmap->pm_stats.wired_count = 0; 1960} 1961 1962/** 1963 * pmap_static_init 1964 * 1965 * Initialize the basic kernel pmap. 1966 */ 1967void pmap_static_init(void) 1968{ 1969 kdb_printf("pmap_static_init: Bootstrapping pmap\n"); 1970 kernel_pmap->ledger = NULL; 1971 kernel_pmap->pm_asid = 0; 1972 kernel_pmap->pm_l1_size = 0x4000; /* Cover 4*1024 TTEs */ 1973 pmap_common_init(kernel_pmap); 1974 return; 1975} 1976 1977/** 1978 * pmap_is_modified 1979 * 1980 * Return whether or not the specified physical page is modified 1981 * by any physical maps. 1982 */ 1983boolean_t pmap_is_modified(vm_offset_t phys) 1984{ 1985 return (phys_attribute_test(phys, PMAP_OSPTE_TYPE_MODIFIED)); 1986} 1987 1988/** 1989 * pmap_is_referenced 1990 * 1991 * Return whether or not the specified physical page is referenced 1992 * by any physical maps. 1993 */ 1994boolean_t pmap_is_referenced(vm_offset_t phys) 1995{ 1996 return (phys_attribute_test(phys, PMAP_OSPTE_TYPE_REFERENCED)); 1997} 1998 1999/** 2000 * pmap_list_resident_pages 2001 */ 2002int pmap_list_resident_pages(pmap_t pmap, vm_offset_t * listp, int space) 2003{ 2004 return 0; 2005} 2006 2007/** 2008 * pmap_find_phys 2009 * 2010 * pmap_find_phys returns the (4K) physical page number containing a 2011 * given virtual address in a given pmap. 2012 */ 2013ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va) 2014{ 2015 spl_t spl; 2016 uint32_t ptep, pte, ppn; 2017 2018 /* 2019 * Raise priority level. 2020 */ 2021 disable_preemption(); 2022 2023#if 1 2024 /* 2025 * Get the PTE. 2026 */ 2027 ptep = (uint32_t) pmap_pte(pmap, (vm_offset_t) va); 2028 if (!ptep) { 2029 ppn = 0; 2030 goto out; 2031 } 2032 pte = (*(uint32_t *) (ptep)); 2033 2034 /* 2035 * Make sure it's a PTE. 2036 */ 2037 if (!((pte) & ARM_PTE_DESCRIPTOR_4K)) { 2038 ppn = 0; 2039 goto out; 2040 } 2041 2042 ppn = pa_index(pte & L2_ADDR_MASK); 2043#else 2044 uint32_t virt = (va & L2_ADDR_MASK), par; 2045 boolean_t is_priv = (pmap == kernel_pmap) ? TRUE : FALSE; 2046 2047 /* 2048 * TTBCR split means that commonpage is at 0x40000000, in kernel_pmap. 2049 */ 2050 if (virt == _COMM_PAGE_BASE_ADDRESS) { 2051 ppn = 0; 2052 goto out; 2053 } 2054 2055 /* 2056 * Fast VirtToPhys involves using the virtual address trnalsation 2057 * register as present in Cortex-A and ARM11 MPCore systems. 2058 * 2059 * Privileged reads are only done on the kernel PMAP versus user 2060 * pmaps getting user read/write state. 2061 * 2062 * The entire process should take much shorter compared to the 2063 * older pmap_extract, which fully walked the page tables. You can 2064 * still use the current behaviour however, by messing with 2065 * the MASTER files. 2066 * 2067 * I swear, I need more stupid sleep. 2068 */ 2069 2070 /* 2071 * Set the PAtoVA register and perform the operation. 2072 */ 2073 if (is_priv) 2074 armreg_va2pa_pr_ns_write(virt); 2075 else 2076 armreg_va2pa_ur_ns_write(virt); 2077 2078 /* 2079 * Wait for the instruction transaction to complete. 2080 */ 2081 __asm__ __volatile__("xisb sy"); 2082 2083 /* 2084 * See if the translation aborted, log any translation errors. 2085 */ 2086 par = armreg_par_read(); 2087 2088 /* 2089 * Successful translation, we're done. 2090 */ 2091 if (!(par & 1)) { 2092 uint32_t pa = par & L2_ADDR_MASK; 2093 ppn = pa_index(pa); 2094 } else { 2095 ppn = 0; 2096 } 2097#endif 2098 out: 2099 /* 2100 * Return. 2101 */ 2102 enable_preemption(); 2103 return ppn; 2104} 2105 2106/** 2107 * pmap_find_phys_fvtp 2108 * 2109 * pmap_find_phys returns the (4K) physical page number containing a 2110 * given virtual address in a given pmap. This is used for KDP purposes 2111 * only. 2112 */ 2113ppnum_t pmap_find_phys_fvtp(pmap_t pmap, addr64_t va) 2114{ 2115#ifdef _ARM_ARCH_7 2116 uint32_t ptep, pte, ppn; 2117 uint32_t virt = (va & L2_ADDR_MASK), par; 2118 boolean_t is_priv = (pmap == kernel_pmap) ? TRUE : FALSE; 2119 2120 /* 2121 * TTBCR split means that commonpage is at 0x40000000, in kernel_pmap. 2122 */ 2123 if (virt == _COMM_PAGE_BASE_ADDRESS) { 2124 ppn = 0; 2125 goto out; 2126 } 2127 2128 /* 2129 * Fast VirtToPhys involves using the virtual address trnalsation 2130 * register as present in Cortex-A and ARM11 MPCore systems. 2131 * 2132 * Privileged reads are only done on the kernel PMAP versus user 2133 * pmaps getting user read/write state. 2134 * 2135 * The entire process should take much shorter compared to the 2136 * older pmap_extract, which fully walked the page tables. You can 2137 * still use the current behaviour however, by messing with 2138 * the MASTER files. 2139 * 2140 * I swear, I need more stupid sleep. 2141 */ 2142 2143 /* 2144 * Set the PAtoVA register and perform the operation. 2145 */ 2146 if (is_priv) 2147 armreg_va2pa_pr_ns_write(virt); 2148 else 2149 armreg_va2pa_ur_ns_write(virt); 2150 2151 /* 2152 * Wait for the instruction transaction to complete. 2153 */ 2154 __asm__ __volatile__("isb sy"); 2155 2156 /* 2157 * See if the translation aborted, log any translation errors. 2158 */ 2159 par = armreg_par_read(); 2160 2161 /* 2162 * Successful translation, we're done. 2163 */ 2164 if (!(par & 1)) { 2165 uint32_t pa = par & L2_ADDR_MASK; 2166 ppn = pa_index(pa); 2167 } else { 2168 ppn = 0; 2169 } 2170 out: 2171 /* 2172 * Return. 2173 */ 2174 enable_preemption(); 2175 return ppn; 2176#else 2177 return 0; 2178#endif 2179} 2180 2181/** 2182 * pmap_switch 2183 * 2184 * Switch the current user pmap to a new one. 2185 */ 2186void pmap_switch(pmap_t new_pmap) 2187{ 2188 spl_t spl; 2189 2190 /* 2191 * Raise priority level. 2192 */ 2193 SPLVM(spl); 2194 2195 /* 2196 * Make sure it's not the kernel pmap. 2197 */ 2198 if (new_pmap == kernel_pmap) 2199 goto switch_return; 2200 2201 /* 2202 * Switch it if needed. 2203 */ 2204 if (current_cpu_datap()->user_pmap == new_pmap) { 2205 goto switch_return; 2206 } else { 2207 if (pmap_asid_ncpus) { 2208 pmap_asid_activate(new_pmap, cpu_number()); 2209 } 2210 current_cpu_datap()->user_pmap = new_pmap; 2211 arm_set_context_id(new_pmap->pm_asid & 0xFF); 2212 arm_context_switch(new_pmap->pm_l1_phys); 2213 } 2214 2215 /* 2216 * Done. 2217 */ 2218 switch_return: 2219 SPLX(spl); 2220 return; 2221} 2222 2223/** 2224 * pmap_map_block 2225 * 2226 * Map a (possibly) autogenned block 2227 */ 2228void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, uint32_t size, vm_prot_t prot, int attr, __unused unsigned int flags) 2229{ 2230 uint32_t page; 2231 for (page = 0; page < size; page++) { 2232 pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE); 2233 va += PAGE_SIZE; 2234 pa++; 2235 } 2236} 2237 2238/** 2239 * pmap_asid_init 2240 */ 2241static inline void pmap_asid_init(void) 2242{ 2243 pm_asid_bitmap[0] = (2 << KERNEL_ASID_PID) - 1; 2244} 2245 2246/** 2247 * pmap_asid_alloc_fast 2248 * 2249 * Allocate a specified ASID for each proces. Each pmap has their own 2250 * individual ASID. 2251 */ 2252#define __arraycount(__x) (sizeof(__x) / sizeof(__x[0])) 2253static inline void pmap_asid_alloc_fast(pmap_t map) 2254{ 2255 /* 2256 * The pmap specified cannot be the kernel map, it already has its 2257 * own ASID allocated to it. 2258 */ 2259 assert(map != kernel_pmap); 2260 assert(map->pm_asid == 0); 2261 assert(pm_asids_free > 0); 2262 assert(pm_asid_hint <= pm_asid_max); 2263 2264 /* 2265 * Let's see if the hinted ASID is free. If not, search for a new one. 2266 */ 2267 if (TLBINFO_ASID_INUSE_P(pm_asid_bitmap, pm_asid_hint)) { 2268 const size_t words = __arraycount(pm_asid_bitmap); 2269 const size_t nbpw = 8 * sizeof(pm_asid_bitmap[0]); 2270 for (size_t i = 0; i < pm_asid_hint / nbpw; i++) { 2271 assert(pm_asid_bitmap[i] == 0); 2272 } 2273 for (size_t i = pm_asid_hint / nbpw;; i++) { 2274 assert(i < words); 2275 /* 2276 * ffs wants to find the first bit set while we want 2277 * to find the first bit cleared. 2278 */ 2279 u_long bits = ~pm_asid_bitmap[i]; 2280 if (bits) { 2281 u_int n = 0; 2282 if ((bits & 0xffffffff) == 0) { 2283 bits = (bits >> 31) >> 1; 2284 assert(bits); 2285 n += 32; 2286 } 2287 n += ffs(bits) - 1; 2288 assert(n < nbpw); 2289 pm_asid_hint = n + i * nbpw; 2290 break; 2291 } 2292 } 2293 assert(pm_asid_hint > KERNEL_ASID_PID); 2294 assert(TLBINFO_ASID_INUSE_P(pm_asid_bitmap, pm_asid_hint - 1)); 2295 assert(!TLBINFO_ASID_INUSE_P(pm_asid_bitmap, pm_asid_hint)); 2296 } 2297 2298 /* 2299 * The hint contains our next ASID so take it and advance the hint. 2300 * Mark it as used and insert the pai into the list of active asids. 2301 * There is also one less asid free in this TLB. 2302 */ 2303 map->pm_asid = pm_asid_hint++; 2304 TLBINFO_ASID_MARK_USED(pm_asid_bitmap, map->pm_asid); 2305 pm_asids_free--; 2306 2307#if 1 2308 kprintf("[pmap_asid_alloc_fast] ASIDs free: %d ASIDs, ASID subsystem allocated id %u for map %p!\n", pm_asids_free, map->pm_asid, map); 2309#endif 2310 2311 return; 2312} 2313 2314/** 2315 * pmap_asid_reset 2316 */ 2317static inline void pmap_asid_reset(pmap_t map) 2318{ 2319 /* 2320 * We must have an ASID. 2321 */ 2322 assert(map->pm_asid > KERNEL_ASID_PID); 2323 2324 /* 2325 * Note that we don't mark the ASID as not in use in the TLB's ASID 2326 * bitmap (thus it can't be allocated until the ASID space is exhausted 2327 * and therefore reinitialized). We don't want to flush the TLB for 2328 * entries belonging to this ASID so we will let natural TLB entry 2329 * replacement flush them out of the TLB. Any new entries for this 2330 * pmap will need a new ASID allocated. 2331 */ 2332 map->pm_asid = 0; 2333 2334 return; 2335} 2336 2337extern long __stack_chk_guard[]; 2338 2339/** 2340 * pmap_bootstrap 2341 * 2342 * Bootstrap the pmap subsystem. 2343 */ 2344void pmap_bootstrap(__unused uint64_t msize, vm_offset_t * __first_avail, __unused unsigned int kmapsize) 2345{ 2346 /* 2347 * Set the first virtual address we can use. 2348 */ 2349 virt_begin = *__first_avail; 2350 2351 /* 2352 * Make sure we don't go to the ARM Vector Table. 2353 */ 2354 virt_end = vm_last_addr = 0xFFFFEFFF; 2355 2356 /* 2357 * Set the available page amount. 2358 */ 2359 avail_remaining = (avail_end - first_avail) >> PAGE_SHIFT; 2360 vm_first_phys = first_avail; 2361 avail_start = first_avail; 2362 2363 kprintf("pmap_bootstrap: physical region 0x%08x - 0x%08x\n", first_avail, avail_end); 2364 2365 /* 2366 * Set NPVhash defaults. 2367 */ 2368 if (PE_parse_boot_argn("npvhash", &npvhash, sizeof(npvhash))) { 2369 if (0 != ((npvhash + 1) & npvhash)) { 2370 kprintf("invalid hash %d, must be ((2^N)-1), using default %d\n", npvhash, NPVHASH); 2371 npvhash = NPVHASH; 2372 } 2373 } else { 2374 npvhash = NPVHASH; 2375 } 2376 printf("npvhash=%d\n", npvhash); 2377 2378 /* 2379 * ASID initialization. 2380 */ 2381 pmap_asid_initialize_kernel(kernel_pmap); 2382 2383 /* 2384 * Initialize kernel pmap. 2385 */ 2386 pmap_static_init(); 2387} 2388 2389/** 2390 * pmap_reference 2391 * 2392 * Increment reference count of the specified pmap. 2393 */ 2394void pmap_reference(pmap_t pmap) 2395{ 2396 /* 2397 * Bump the count. 2398 */ 2399 if (pmap != PMAP_NULL) 2400 (void) hw_atomic_add((volatile uint32_t *)&pmap->pm_refcnt, 1); 2401} 2402 2403/** 2404 * pmap_get_refmod 2405 * 2406 * Returns the referenced and modified bits of the specified 2407 * physical page. 2408 */ 2409unsigned int pmap_get_refmod(ppnum_t pn) 2410{ 2411 int refmod; 2412 unsigned int retval = 0; 2413 2414 refmod = phys_attribute_test(pn, PMAP_OSPTE_TYPE_MODIFIED | PMAP_OSPTE_TYPE_REFERENCED); 2415 2416 if (refmod & PMAP_OSPTE_TYPE_MODIFIED) 2417 retval |= VM_MEM_MODIFIED; 2418 if (refmod & PMAP_OSPTE_TYPE_REFERENCED) 2419 retval |= VM_MEM_REFERENCED; 2420 2421 return (retval); 2422} 2423 2424/** 2425 * pmap_enter 2426 * 2427 * Enter pages into a physical map. 2428 */ 2429void pmap_enter(pmap_t pmap, vm_map_offset_t va, ppnum_t pa, vm_prot_t prot, vm_prot_t fault_type, unsigned int flags, boolean_t wired) 2430{ 2431 pmap_enter_options(pmap, va, pa, prot, fault_type, flags, wired, 0); 2432} 2433 2434/** 2435 * pmap_grab_page 2436 * 2437 * Get a page from the global pmap object. 2438 */ 2439vm_page_t pmap_grab_page(pmap_t pmap) 2440{ 2441 vm_page_t page; 2442 uint32_t ctr; 2443 assert(pmap_initialized && kernel_map && pmap->pm_obj); 2444 2445 /* 2446 * Grab pages from the global VM object. 2447 */ 2448 while ((page = vm_page_grab()) == VM_PAGE_NULL) 2449 VM_PAGE_WAIT(); 2450 2451 /* 2452 * Lock the global object to prevent interruptions. 2453 */ 2454 vm_object_lock(pmap->pm_obj); 2455 assert((page->phys_page << PAGE_SHIFT) > gPhysBase); 2456 ctr = (page->phys_page) - (gPhysBase >> PAGE_SHIFT); 2457 bzero((void *)phys_to_virt(page->phys_page << PAGE_SHIFT), PAGE_SIZE); 2458 vm_page_insert(page, pmap->pm_obj, ctr); 2459 2460 /* 2461 * Wire our new page. 2462 */ 2463 vm_page_lockspin_queues(); 2464 vm_page_wire(page); 2465 vm_page_unlock_queues(); 2466 2467 /* 2468 * Done. 2469 */ 2470 vm_object_unlock(pmap->pm_obj); 2471 2472 /* 2473 * Set noencrypt bits. 2474 */ 2475 pmap_set_noencrypt(page->phys_page); 2476 2477 /* 2478 * Increment inuse ptepages. 2479 */ 2480 OSAddAtomic(1, &inuse_ptepages_count); 2481 OSAddAtomic(1, &alloc_ptepages_count); 2482 2483 return page; 2484} 2485 2486/** 2487 * pmap_destroy_page 2488 * 2489 * Free a page from the internal VM object. 2490 */ 2491void pmap_destroy_page(ppnum_t pa) 2492{ 2493 vm_page_t m; 2494 2495 vm_object_lock(pmap_object); 2496 2497 m = vm_page_lookup(pmap_object, pa); 2498 if (m == VM_PAGE_NULL) 2499 return; 2500 2501 vm_object_unlock(pmap_object); 2502 2503 VM_PAGE_FREE(m); 2504 kprintf("Freed page for PA %x\n", pa << PAGE_SHIFT); 2505 2506 /* 2507 * Remove one. 2508 */ 2509 OSAddAtomic(-1, &inuse_ptepages_count); 2510 2511 return; 2512} 2513 2514/** 2515 * pmap_create_sharedpage 2516 * 2517 * Create the system common page. 2518 */ 2519void pmap_create_sharedpage(void) 2520{ 2521 /* 2522 * Grab a page... 2523 */ 2524 commpage = pmap_grab_page(kernel_pmap); 2525 assert(commpage); 2526 2527 /* 2528 * And map it. 2529 */ 2530 pmap_enter(kernel_pmap, (vm_map_offset_t) _COMM_PAGE_BASE_ADDRESS, commpage->phys_page, VM_PROT_READ | VM_PROT_WRITE, 0, FALSE, TRUE); 2531 2532 /* 2533 * Memset it. 2534 */ 2535 memset((void *) _COMM_PAGE_BASE_ADDRESS, 0x00, PAGE_SIZE); 2536 return; 2537} 2538 2539/** 2540 * pmap_extract 2541 * 2542 * Get the physical address for a virtual one. 2543 */ 2544vm_offset_t pmap_extract(pmap_t pmap, vm_offset_t virt) 2545{ 2546#if defined(_ARM_ARCH_6) 2547 spl_t spl; 2548 vm_offset_t ppn = 0; 2549 uint32_t tte, *ttep = pmap_tte(pmap, virt); 2550 2551 /* 2552 * Block off all interruptions. Nothing may interrupt the extraction process 2553 * as the page tables may be changed by another callee to pmap_enter or such. 2554 */ 2555 2556 PMAP_LOCK(pmap); 2557 if (!ttep) 2558 goto extract_out; 2559 2560 /* 2561 * Look at the TTE and see what type of mapping it is. 2562 */ 2563 tte = *ttep; 2564 2565 /* 2566 * Verify it's not a section mapping. 2567 */ 2568 if ((tte & ARM_PAGE_MASK_VALUE) == ARM_PAGE_SECTION) { 2569 /* 2570 * Clean the lower bits off. 2571 */ 2572 ppn = (tte & L1_SECT_ADDR_MASK); 2573 2574 /* 2575 * Now add the lower bits back from the VA. 2576 */ 2577 ppn |= (virt & ~(L1_SECT_ADDR_MASK)); 2578 2579 /* 2580 * Done. Address extraction successful. 2581 */ 2582 goto extract_out; 2583 } else if ((tte & ARM_PAGE_MASK_VALUE) == ARM_PAGE_PAGE_TABLE) { 2584 uint32_t pte, *ptep; 2585 2586 /* 2587 * Clean the TTE bits off, get the address of the L1 entry. 2588 */ 2589 pte = L1_PTE_ADDR(tte); 2590 if (!pte) 2591 goto extract_out; 2592 2593 /* 2594 * Return the virtually mapped PTE. 2595 */ 2596 ptep = (uint32_t *) ((phys_to_virt(pte) + pte_offset(virt))); 2597 2598 /* 2599 * Make sure it's not a large page. They're not supported yet, but they will 2600 * be at some point. 2601 */ 2602 if (((*ptep & ARM_PAGE_MASK_VALUE) == ARM_PTE_DESCRIPTOR_64K)) 2603 panic("pmap_extract: 64kb pages not supported yet"); 2604 2605 /* 2606 * Clean the PTE bits off the address. 2607 */ 2608 ppn = (*ptep) & L2_ADDR_MASK; 2609 2610 /* 2611 * Now, add the lower bits back from the VA. 2612 */ 2613 ppn |= (virt & ~(L2_ADDR_MASK)); 2614 2615 /* 2616 * Done. Extraction successful. 2617 */ 2618 goto extract_out; 2619 } else { 2620 kprintf("pmap_extract: invalid tte (ttep %x tte %x)\n", ttep, tte); 2621 } 2622 2623 extract_out: 2624 2625 /* 2626 * Return. 2627 */ 2628 PMAP_UNLOCK(pmap); 2629 return ppn; 2630#elif defined(_ARM_ARCH_7) 2631 uint32_t va = (virt & L2_ADDR_MASK), par; 2632 boolean_t is_priv = (pmap == kernel_pmap) ? TRUE : FALSE; 2633 2634 /* 2635 * Fast VirtToPhys involves using the virtual address trnalsation 2636 * register as present in Cortex-A and ARM11 MPCore systems. 2637 * 2638 * Privileged reads are only done on the kernel PMAP versus user 2639 * pmaps getting user read/write state. 2640 * 2641 * The entire process should take much shorter compared to the 2642 * older pmap_extract, which fully walked the page tables. You can 2643 * still use the current behaviour however, by messing with 2644 * the MASTER files. 2645 * 2646 * I swear, I need more stupid sleep. 2647 */ 2648 2649 /* 2650 * Set the PAtoVA register and perform the operation. 2651 */ 2652 if (is_priv) 2653 armreg_va2pa_pr_ns_write(va); 2654 else 2655 armreg_va2pa_ur_ns_write(va); 2656 2657 /* 2658 * Wait for the instruction transaction to complete. 2659 */ 2660 __asm__ __volatile__("isb sy"); 2661 2662 /* 2663 * See if the translation aborted, log any translation errors. 2664 */ 2665 par = armreg_par_read(); 2666 2667 /* 2668 * Successful translation, we're done. 2669 */ 2670 if (!(par & 1)) { 2671 uint32_t pa = par & L2_ADDR_MASK; 2672 pa |= (virt & ~(L2_ADDR_MASK)); 2673 return pa; 2674 } else { 2675 /* 2676 * Log translation fault. 2677 */ 2678 kprintf("pmap_extract: fast extraction failed, par 0x%x\n", par); 2679 } 2680 2681 return 0; 2682#else 2683#error Unsupported subarchitecture 2684#endif 2685} 2686 2687/** 2688 * pmap_expand_ttb 2689 * 2690 * Expand and reorganize the current translation-table as to fit a new size. 2691 */ 2692void pmap_expand_ttb(pmap_t map, vm_offset_t expansion_size) 2693{ 2694 /* 2695 * If the requested expansion size is less than or greater, we have nothing to do. 2696 */ 2697 if (expansion_size <= map->pm_l1_size) 2698 return; 2699 2700 /* 2701 * Do not expand past maximum size. 2702 */ 2703 if (expansion_size > 0x4000) 2704 panic("pmap_expand_ttb: attempting to expand past maximum address of %x, map %p, expansion %x\n", 0x4000, map, expansion_size); 2705 2706 switch (expansion_size) { 2707 case 0x1000: 2708 panic("pmap_expand_ttb: attempting to expand an already-expanded pmap?"); 2709 case 0x2000 ... 0x3000:{ 2710 kern_return_t ret; 2711 vm_page_t pages; 2712 2713 /* 2714 * Allocate a contiguous segment of memory for the new L1 mapping table. (including one guard) 2715 */ 2716 ret = cpm_allocate(expansion_size, &pages, 0, ((expansion_size / map->pm_l1_size) - 1), FALSE, KMA_LOMEM); 2717 assert(ret == KERN_SUCCESS); 2718 2719 /* 2720 * We got the new contiguous block. 2721 */ 2722 bzero((void *)phys_to_virt(pages->phys_page << PAGE_SHIFT), expansion_size); 2723 2724 /* 2725 * Copy the old entries to the new area. 2726 */ 2727 bcopy((void *) map->pm_l1_virt, (void *) phys_to_virt(pages->phys_page << PAGE_SHIFT), map->pm_l1_size); 2728#if 1 2729 kprintf("pmap_expand_ttb: 0x%x => 0x%x\n", map->pm_l1_virt, phys_to_virt(pages->phys_page << PAGE_SHIFT)); 2730#endif 2731 2732 /* 2733 * Deallocate the old L1. 2734 */ 2735 pmap_deallocate_l1(map); 2736 2737 /* 2738 * Set the new TTB base. 2739 */ 2740 map->pm_l1_virt = phys_to_virt(pages->phys_page << PAGE_SHIFT); 2741 map->pm_l1_phys = pages->phys_page << PAGE_SHIFT; 2742 map->pm_l1_size = expansion_size; 2743 2744 OSAddAtomic((expansion_size >> PAGE_SHIFT), &inuse_ptepages_count); 2745 OSAddAtomic((expansion_size >> PAGE_SHIFT), &alloc_ptepages_count); 2746 2747 /* 2748 * Switch into the new TTB if it needs to be used. 2749 */ 2750 if (map == current_cpu_datap()->user_pmap) { 2751 arm_context_switch(map->pm_l1_phys); 2752 } 2753 2754 return; 2755 } 2756 default: 2757 panic("pmap_expand_ttb: invalid expansion size %x\n", expansion_size); 2758 } 2759 2760 return; 2761} 2762 2763/** 2764 * pmap_expand 2765 * 2766 * Expand the address space of the current physical map. 2767 */ 2768void pmap_expand(pmap_t map, vm_offset_t v) 2769{ 2770 vm_offset_t *tte = (vm_offset_t *) pmap_tte(map, v); 2771 vm_page_t page = pmap_grab_page(map); 2772 spl_t spl; 2773 2774 /* 2775 * High priority. We do not want any interruptions. 2776 */ 2777 PMAP_LOCK(map); 2778 2779 if (map != kernel_pmap) { 2780 /* 2781 * First, if we have a size below 0x1000, we can't be sure about expanding. 2782 */ 2783 if (map->pm_l1_size < 0x1000) { 2784 panic("pmap_expand: this pmap has a really weird size: %d bytes", map->pm_l1_size); 2785 } 2786 2787 /* 2788 * See if we can make it grow. 2789 */ 2790 uint32_t expansion_size = ((tte_offset(v)) & ~(PAGE_SIZE - 1)) + PAGE_SIZE; 2791 pmap_expand_ttb(map, expansion_size); 2792 2793 /* 2794 * Refetch the TTE, since the pmap base may have changed. 2795 */ 2796 tte = (vm_offset_t *) pmap_tte(map, v); 2797 2798#if 0 2799 /* 2800 * Do not extend past the commpage. 2801 */ 2802 if (map->pm_l1_size == 0x1000) { 2803 if (v >= _COMM_PAGE_BASE_ADDRESS) { 2804 panic("attempting to expand pmap past maximum address of %x\n", _COMM_PAGE_BASE_ADDRESS); 2805 } 2806 } 2807#endif 2808 2809 /* 2810 * L1 section mappings may not be expanded any further. 2811 */ 2812 if ((*tte & ARM_PAGE_MASK_VALUE) == ARM_PAGE_SECTION) 2813 panic("cannot expand current map into L1 sections"); 2814 } 2815 2816 /* 2817 * Overwrite the old L1 mapping in this region with a fresh L1 descriptor. 2818 */ 2819 *tte = ((page->phys_page << PAGE_SHIFT) & L1_PTE_ADDR_MASK) | L1_TYPE_PTE; 2820 2821 /* 2822 * Flush the TLBs since we updated the page tables. 2823 */ 2824 pmap_flush_tlbs(map, v, v + PAGE_SIZE); 2825 PMAP_UNLOCK(map); 2826 return; 2827} 2828 2829/** 2830 * pmap_enter_options 2831 * 2832 * Create a translation entry for a PA->VA mappings with additional options. 2833 * Called from vm_fault. 2834 */ 2835kern_return_t pmap_enter_options(pmap_t pmap, vm_map_offset_t va, ppnum_t pa, vm_prot_t prot, vm_prot_t fault_type, unsigned int flags, boolean_t wired, unsigned int options) 2836{ 2837 spl_t spl; 2838 pt_entry_t pte; 2839 register pv_rooted_entry_t pv_h; 2840 pv_hashed_entry_t pvh_e; 2841 pv_hashed_entry_t pvh_new; 2842 pv_hashed_entry_t *hashp; 2843 int pvhash_idx; 2844 uint32_t pv_cnt; 2845 boolean_t old_pvh_locked = FALSE; 2846 2847 /* 2848 * Verify the address isn't fictitious. 2849 */ 2850 assert(pa != vm_page_fictitious_addr); 2851 2852 /* 2853 * Only low addresses are supported for user pmaps. 2854 */ 2855 if (va > _COMM_PAGE_BASE_ADDRESS && pmap != kernel_pmap) { 2856 kprintf("pmap_enter_options: low address 0x%08X is invalid for pmap %p\n", va, pmap); 2857 return KERN_INVALID_ARGUMENT; 2858 } 2859 2860 pvh_new = PV_HASHED_ENTRY_NULL; 2861 2862 Retry: 2863 pvh_e = PV_HASHED_ENTRY_NULL; 2864 2865 /* 2866 * Set a high priority level. We do not wany any interruptions or any unauthorized 2867 * page table modification. 2868 */ 2869 PMAP_LOCK(pmap); 2870 2871 /* 2872 * Expand the pmap to include the new PTE if necessary to accomodate the new VA we're 2873 * entering in. 2874 */ 2875 while ((pte = (pt_entry_t)pmap_pte(pmap, va)) == 0) { 2876 PMAP_UNLOCK(pmap); 2877 pmap_expand(pmap, va); 2878 PMAP_LOCK(pmap); 2879 } 2880 2881 /* 2882 * If the old page already has a mapping, the caller might be changing protection flags. 2883 */ 2884 uint32_t old_pte = (*(uint32_t *) pte); 2885 2886 /* 2887 * If it's a managed page, lock the pv entry right now. 2888 */ 2889 if((old_pte & L2_ADDR_MASK) != 0) { 2890 uint32_t pai = pa_index(old_pte & L2_ADDR_MASK); 2891 LOCK_PVH(pai); 2892 old_pvh_locked = TRUE; 2893 old_pte = (*(uint32_t *) pte); 2894 if(0 == old_pte) { 2895 UNLOCK_PVH(pai); 2896 old_pvh_locked = FALSE; 2897 } 2898 } 2899 2900 if ((old_pte & L2_ADDR_MASK) == (pa << PAGE_SHIFT)) { 2901 /* 2902 * !!! IMPLEMENT 'pmap_vm_prot_to_page_flags' !!! 2903 * XXX protection is not implemented right now, all pages are 'RWX'. 2904 */ 2905 2906 uint32_t template_pte = ((pa << PAGE_SHIFT) & L2_ADDR_MASK) | L2_SMALL_PAGE; 2907 template_pte |= pmap_vm_prot_to_page_flags(pmap, prot, wired, 0); 2908 2909 if (va == _COMM_PAGE_BASE_ADDRESS) 2910 template_pte |= L2_ACCESS_USER; 2911 2912 /* 2913 * Add cacheability attributes. 2914 */ 2915 template_pte |= pmap_get_cache_attributes(pa); 2916 2917 if (wired) { 2918 if (!phys_attribute_test(pa, PMAP_OSPTE_TYPE_WIRED)) { 2919 OSAddAtomic(+1, &pmap->pm_stats.wired_count); 2920 phys_attribute_set(pa, PMAP_OSPTE_TYPE_WIRED); 2921 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 2922 } else { 2923 assert(pmap->pm_stats.wired_count >= 1); 2924 OSAddAtomic(-1, &pmap->pm_stats.wired_count); 2925 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 2926 } 2927 } 2928 2929 *(uint32_t *) pte = template_pte; 2930 2931 /* 2932 * The work here is done, the PTE will now have new permissions. Flush the TLBs for the 2933 * specific VA and then exit. 2934 */ 2935 if(old_pvh_locked) { 2936 UNLOCK_PVH(pa); 2937 old_pvh_locked = FALSE; 2938 } 2939 2940 goto enter_options_done; 2941 } 2942 2943 /* 2944 * This is a new mapping, add it to the pv_head_table if pmap is initialized. This is so 2945 * we can correctly manage our entries. 2946 */ 2947 if (pmap_initialized) { 2948 ppnum_t pai; 2949 2950 /* 2951 * If the current PA isn't zero, and if it's non-existent... remove the mapping 2952 */ 2953 if ((old_pte & L2_ADDR_MASK) != 0) { 2954 pai = pa_index((old_pte & L2_ADDR_MASK)); 2955 pv_h = pai_to_pvh(pai); 2956 2957 *(uint32_t *) pte = 0; 2958 2959 if (!pmap_valid_page(pa)) 2960 goto EnterPte; 2961 2962 /* 2963 * Set statistics and credit/debit internal pmap ledgers 2964 */ 2965 { 2966 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 2967 assert(pmap->pm_stats.resident_count >= 1); 2968 OSAddAtomic(-1, &pmap->pm_stats.resident_count); 2969 } 2970 2971 if (phys_attribute_test(pa, PMAP_OSPTE_TYPE_WIRED)) { 2972 assert(pmap->pm_stats.wired_count >= 1); 2973 OSAddAtomic(-1, &pmap->pm_stats.wired_count); 2974 phys_attribute_clear(pa, PMAP_OSPTE_TYPE_WIRED); 2975 } 2976 2977 if (pv_h->pmap == PMAP_NULL) { 2978 panic("pmap_enter_options: null pv_list\n"); 2979 } 2980 pvh_e = pmap_pv_remove(pmap, va, (ppnum_t *) & pai, 0); 2981 2982 /* 2983 * Unlock the old pvh since it's gone now 2984 */ 2985 if(old_pvh_locked) { 2986 UNLOCK_PVH(pai); 2987 old_pvh_locked = FALSE; 2988 } 2989 } 2990 2991 pai = pa; 2992 pv_h = pai_to_pvh(pai); 2993 2994 if (!pmap_valid_page(pa)) 2995 goto EnterPte; 2996 2997#if 0 2998 /* 2999 * Check to see if it exists, if it does, then make it null. The code later 3000 * will treat a null mapping as a new one and will enter it anyway. 3001 */ 3002 if ((pv_h->pv_pmap == pmap) && (pv_h->pv_address_va == va)) { 3003 pv_entry_t cur; 3004 cur = pv_h->pv_next; 3005 if (cur != (pv_entry_t) 0) { 3006 *pv_h = *cur; 3007 pv_e = cur; 3008 } else { 3009 pv_h->pv_pmap = PMAP_NULL; 3010 } 3011 } 3012#endif 3013 /* 3014 * Step 2) Enter the mapping in the PV list for this 3015 * physical page. 3016 */ 3017 LOCK_PVH(pai); 3018 3019 /* 3020 * This is definitely a new mapping. 3021 */ 3022 if (pv_h->pmap == PMAP_NULL) { 3023 pv_h->va = va; 3024 pv_h->pmap = pmap; 3025 queue_init(&pv_h->qlink); 3026 if (wired) 3027 phys_attribute_set(pa, PMAP_OSPTE_TYPE_WIRED); 3028 } else { 3029 /* 3030 * Add new pv_hashed_entry after header. 3031 */ 3032 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) { 3033 pvh_e = pvh_new; 3034 pvh_new = PV_HASHED_ENTRY_NULL; 3035 } else if (PV_HASHED_ENTRY_NULL == pvh_e) { 3036 PV_HASHED_ALLOC(pvh_e); 3037 if (PV_HASHED_ENTRY_NULL == pvh_e) { 3038 /* 3039 * the pv list is empty. if we are on 3040 * the kernel pmap we'll use one of 3041 * the special private kernel pv_e's, 3042 * else, we need to unlock 3043 * everything, zalloc a pv_e, and 3044 * restart bringing in the pv_e with 3045 * us. 3046 */ 3047 if (kernel_pmap == pmap) { 3048 PV_HASHED_KERN_ALLOC(pvh_e); 3049 } else { 3050 UNLOCK_PVH(pai); 3051 PMAP_UNLOCK(pmap); 3052 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); 3053 goto Retry; 3054 } 3055 } 3056 } 3057 3058 if (PV_HASHED_ENTRY_NULL == pvh_e) 3059 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings"); 3060 pvh_e->va = va; 3061 pvh_e->pmap = pmap; 3062 pvh_e->ppn = pa; 3063 3064 pv_hash_add(pvh_e, pv_h); 3065 3066 /* 3067 * Remember that we used the pvlist entry. 3068 */ 3069 pvh_e = PV_HASHED_ENTRY_NULL; 3070 } 3071#if 0 3072 kprintf("pmap_enter: pai %d pa %d (%x) va %x pv_h %p pmap %p pv_h->pmap %p pv_h->pv_address_va %x\n", pai, pa, pa << PAGE_SHIFT, va, pv_h, pmap, pv_h->pv_pmap, pv_h->pv_address_va); 3073#endif 3074 } 3075 EnterPte: 3076 3077 /* 3078 * Enter and count the mapping. 3079 */ 3080 pmap->pm_stats.resident_count++; 3081 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 3082 3083 if (wired) { 3084 pmap->pm_stats.wired_count++; 3085 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 3086 } 3087 3088 /* 3089 * Set VM protections 3090 */ 3091 uint32_t template_pte = ((pa << PAGE_SHIFT) & L2_ADDR_MASK) | L2_SMALL_PAGE; 3092 template_pte |= pmap_vm_prot_to_page_flags(pmap, prot, wired, 0); 3093 3094 /* 3095 * Hack for commpage, how is this to be done? 3096 */ 3097 if (va == _COMM_PAGE_BASE_ADDRESS) 3098 template_pte |= L2_ACCESS_USER; 3099 3100 /* 3101 * Add cacheability attributes. 3102 */ 3103 template_pte |= pmap_get_cache_attributes(pa); 3104 3105 *(uint32_t *) pte = template_pte; 3106 3107 /* 3108 * Unlock the pv. (if it is managed by us) 3109 */ 3110 if(pmap_initialized && pmap_valid_page(pa)) { 3111 UNLOCK_PVH(pa); 3112 } 3113 3114 enter_options_done: 3115 /* 3116 * Done, now invalidate the TLB for a single page. 3117 */ 3118 pmap_flush_tlbs(pmap, va, va + PAGE_SIZE); 3119 3120 if (pvh_e != PV_HASHED_ENTRY_NULL) { 3121 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1); 3122 } 3123 if (pvh_new != PV_HASHED_ENTRY_NULL) { 3124 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1); 3125 } 3126 3127 /* 3128 * The operation has completed successfully. 3129 */ 3130 PMAP_UNLOCK(pmap); 3131 3132 return KERN_SUCCESS; 3133} 3134 3135extern vm_offset_t sdata, edata; 3136extern vm_offset_t sconstdata, econstdata; 3137extern boolean_t doconstro_override; 3138 3139/** 3140 * pmap_init 3141 * 3142 * Stage 2 initialization of the pmap subsystem. 3143 */ 3144void pmap_init(void) 3145{ 3146 vm_offset_t pv_root; 3147 vm_size_t s; 3148 spl_t spl; 3149 int i; 3150 3151 kprintf("pmap_init: %d physical pages in memory, kernel pmap at %p\n", (mem_size / PAGE_SIZE), kernel_pmap); 3152 3153 /* 3154 * Allocate the core PV structure. The pv_head_table contains trunk entries 3155 * for every physical page that exists in the system. 3156 */ 3157 s = (mem_size / PAGE_SIZE) * sizeof(pv_entry); 3158 if (kernel_memory_allocate(kernel_map, &pv_root, s, 0, KMA_KOBJECT | KMA_PERMANENT) != KERN_SUCCESS) 3159 panic("pmap_init: failed to allocate pv table!"); 3160 3161 /* 3162 * Okay. Zero out the PV head table. 3163 */ 3164 pv_head_table = (pv_entry_t) pv_root; 3165 kprintf("pmap_init: pv_head_table at %p\n", pv_head_table); 3166 bzero((void *) pv_head_table, s); 3167 3168 /* 3169 * Initialize the Zones for object allocation. 3170 */ 3171 pmap_zone = zinit((sizeof(struct pmap)), 400 * (sizeof(struct pmap)), 4096, "pmap_pmap"); 3172 3173 /* 3174 * Expandable zone. (pv_entry zone) 3175 */ 3176 pve_zone = zinit((sizeof(struct __pv_entry__)), 10000 * (sizeof(struct __pv_entry__)), 4096, "pmap_pve"); 3177 3178 /* 3179 * Allocate memory for the pv_head_hash_table. 3180 */ 3181 s = (vm_size_t) (sizeof(struct pv_rooted_entry) * (mem_size / PAGE_SIZE) 3182 + (sizeof(struct pv_hashed_entry_t *) * (npvhash + 1)) 3183 + pv_lock_table_size((mem_size / PAGE_SIZE)) 3184 + pv_hash_lock_table_size((npvhash + 1)) 3185 + (mem_size / PAGE_SIZE)); 3186 if (kernel_memory_allocate(kernel_map, &pv_root, s, 0, KMA_KOBJECT | KMA_PERMANENT) != KERN_SUCCESS) 3187 panic("pmap_init: failed to allocate pv hash table!"); 3188 3189 /* 3190 * Initialize the core objects. 3191 */ 3192 uint32_t npages = (mem_size / PAGE_SIZE); 3193 pv_head_hash_table = (pv_rooted_entry_t) pv_root; 3194 pv_root = (vm_offset_t) (pv_head_table + npages); 3195 3196 pv_hash_table = (pv_hashed_entry_t *) pv_root; 3197 pv_root = (vm_offset_t) (pv_hash_table + (npvhash + 1)); 3198 3199 pv_lock_table = (char *) pv_root; 3200 pv_root = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages)); 3201 bzero(pv_lock_table, pv_lock_table_size(npages)); 3202 3203 pv_hash_lock_table = (char *) pv_root; 3204 pv_root = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhash + 1))); 3205 bzero(pv_hash_lock_table, pv_hash_lock_table_size((npvhash + 1))); 3206 3207 bzero((void *) pv_head_hash_table, s); 3208 kprintf("pmap_init: pv_head_hash_table at %p\n", pv_head_hash_table); 3209 3210 /* 3211 * PVHash Zone 3212 */ 3213 pv_hashed_list_zone = zinit(sizeof(struct pv_hashed_entry), 10000 * sizeof(struct pv_hashed_entry), 4096, "pv_list"); /* XXX */ 3214 3215 /* 3216 * Initialize the free list lock. (unused right now.) 3217 */ 3218 simple_lock_init(&kernel_pmap->lock, 0); 3219 simple_lock_init(&pv_free_list_lock, 0); 3220 simple_lock_init(&pv_hashed_free_list_lock, 0); 3221 simple_lock_init(&pv_hashed_kern_free_list_lock, 0); 3222 simple_lock_init(&pv_hash_table_lock, 0); 3223 3224 /* 3225 * Remap kernel as RO only. 3226 */ 3227 uint32_t ro_kern = 1; 3228 if (PE_parse_boot_argn("kernel_read_only", &ro_kern, sizeof(ro_kern))) { 3229 ro_kern = 0; 3230 } 3231 SPLVM(spl); 3232 3233 kprintf("Kernel ASLR slide: 0x%08x, virtual base: 0x%08x\n", vm_kernel_slide, gVirtBase); 3234 3235 /* 3236 * Rewrite the kernel PTEs. 3237 */ 3238 if (ro_kern) { 3239 vm_offset_t kva; 3240 pt_entry_t *ptep; 3241 3242 kprintf("Kernel text %x-%x to be write-protected\n", vm_kernel_stext, vm_kernel_etext); 3243 3244 /* 3245 * Add APX-bit to reduce protections to R-X. 3246 */ 3247 for (kva = vm_kernel_stext; kva < vm_kernel_etext; kva += PAGE_SIZE) { 3248 ptep = (pt_entry_t *)pmap_pte(kernel_pmap, (vm_map_offset_t) kva); 3249 if (ptep) 3250 *ptep |= L2_ACCESS_APX; 3251 } 3252 } 3253 3254 /* 3255 * Set const to R-- only too. 3256 */ 3257 boolean_t doconstro = TRUE; 3258 3259 (void) PE_parse_boot_argn("dataconstro", &doconstro, sizeof(doconstro)); 3260 3261 if ((sconstdata | econstdata) & PAGE_MASK) { 3262 kprintf("Const DATA misaligned 0x%lx 0x%lx\n", sconstdata, econstdata); 3263 if ((sconstdata & PAGE_MASK) || (doconstro_override == FALSE)) 3264 doconstro = FALSE; 3265 } 3266 3267 if ((sconstdata > edata) || (sconstdata < sdata) 3268 || ((econstdata - sconstdata) >= (edata - sdata))) { 3269 kprintf("Const DATA incorrect size 0x%lx 0x%lx 0x%lx 0x%lx\n", sconstdata, econstdata, sdata, edata); 3270 doconstro = FALSE; 3271 } 3272 3273 if (doconstro) 3274 kprintf("Marking const DATA read-only\n"); 3275 3276 vm_offset_t dva; 3277 for (dva = sdata; dva < edata; dva += PAGE_SIZE) { 3278 pt_entry_t *pte, dpte; 3279 pte = (pt_entry_t *)pmap_pte(kernel_pmap, dva); 3280 assert(pte); 3281 3282 /* 3283 * Make sure the PTE is valid. 3284 */ 3285 dpte = *pte; 3286 assert(dpte & ARM_PTE_DESCRIPTOR_4K); 3287 if (!(dpte & ARM_PTE_DESCRIPTOR_4K)) { 3288 kprintf("Missing data mapping 0x%x 0x%x 0x%x\n", dva, sdata, edata); 3289 continue; 3290 } 3291 3292 /* 3293 * Enforce NX and RO as necessary. 3294 */ 3295 dpte |= L2_NX_BIT; 3296 if (doconstro && (dva >= sconstdata) && (dva < econstdata)) { 3297 dpte |= L2_ACCESS_APX; 3298 } 3299 *pte = dpte; 3300 } 3301 3302 /* 3303 * Just flush the entire TLB since we messed with quite a lot of mappings. 3304 */ 3305 pmap_flush_tlbs(kernel_pmap, 0, 0xFFFFFFFF); 3306 3307 SPLX(spl); 3308 3309 /* 3310 * Set up the core VM object. 3311 */ 3312 pmap_object = &pmap_object_store; 3313 _vm_object_allocate(mem_size, &pmap_object_store); 3314 kernel_pmap->pm_obj = pmap_object; 3315 3316#ifdef _ARM_ARCH_7 3317 /* 3318 * Initialize ASID subsystem properly. 3319 */ 3320 pmap_asid_configure(); 3321#endif 3322 3323 /* 3324 * Done initializing. 3325 */ 3326 pmap_initialized = TRUE; 3327 3328 return; 3329} 3330 3331/** 3332 * pmap_remove_range 3333 * 3334 * Remove a range of hardware page-table entries. (This function does not support section mappings.) 3335 */ 3336void pmap_remove_range(pmap_t pmap, vm_map_offset_t start_vaddr, pt_entry_t * spte, pt_entry_t * epte, boolean_t is_sect) 3337{ 3338 pt_entry_t *cpte = spte; 3339 vm_map_offset_t vaddr = start_vaddr; 3340 vm_size_t our_page_size = (is_sect) ? (_1MB) : PAGE_SIZE; 3341 int num_removed = 0, num_unwired = 0; 3342 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; 3343 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; 3344 pv_hashed_entry_t pvh_e; 3345 int pvh_cnt = 0; 3346 int pvhash_idx; 3347 uint32_t pv_cnt; 3348 3349 /* 3350 * Make sure the Cpte/Epte are within sane boundaries. (256 entries, one L2 area size.) 3351 */ 3352 if (((vm_offset_t) epte - (vm_offset_t) cpte) > L2_SIZE) 3353 panic("pmap_remove_range: attempting to remove more ptes than 256!\n"); 3354 3355 for (cpte = spte, vaddr = start_vaddr; cpte < epte; cpte++, vaddr += our_page_size) { 3356 /* 3357 * Start nuking the range. 3358 */ 3359 pt_entry_t *p = cpte; 3360 3361 /* 3362 * Get the index for the PV table. 3363 */ 3364 ppnum_t pai = pa_index(*cpte & L2_ADDR_MASK); 3365 if (pai == 0) { 3366 continue; 3367 } 3368 3369 /* 3370 * If it isn't a managed page, don't update the pv_table. 3371 */ 3372 if (!pmap_valid_page(pai)) 3373 continue; 3374 3375 num_removed++; 3376 if (phys_attribute_test(pai, PMAP_OSPTE_TYPE_WIRED)) { 3377 phys_attribute_clear(pai, PMAP_OSPTE_TYPE_WIRED); 3378 num_unwired++; 3379 } 3380 3381 /* 3382 * Nuke the page table entry. 3383 */ 3384 *cpte = 0; 3385 3386 /* 3387 * Continue onwards if pmap isn't up yet.. (keep nuking pages!) 3388 */ 3389 if (!pmap_initialized) 3390 continue; 3391 3392 LOCK_PVH(pai); 3393 /* 3394 * Remove the mapping from the pvlist for 3395 * this physical page. 3396 */ 3397 { 3398 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) & pai, cpte); 3399 UNLOCK_PVH(pai); 3400 if (pvh_e != PV_HASHED_ENTRY_NULL) { 3401 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 3402 pvh_eh = pvh_e; 3403 3404 if (pvh_et == PV_HASHED_ENTRY_NULL) { 3405 pvh_et = pvh_e; 3406 } 3407 pvh_cnt++; 3408 } 3409 } /* removing mappings for this phy page */ 3410 } 3411 3412 if (pvh_eh != PV_HASHED_ENTRY_NULL) { 3413 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt); 3414 } 3415 3416 /* 3417 * Invalidate all TLBs. 3418 */ 3419 pmap_flush_tlbs(pmap, start_vaddr, vaddr); 3420 3421 /* 3422 * Make sure the amount removed isn't... weird. 3423 */ 3424 if (pmap->pm_stats.resident_count < num_removed) 3425 panic("pmap_remove_range: resident_count"); 3426 pmap_ledger_debit(pmap, task_ledgers.phys_mem, num_removed * PAGE_SIZE); 3427 assert(pmap->pm_stats.resident_count >= num_removed); 3428 OSAddAtomic(-num_removed, &pmap->pm_stats.resident_count); 3429 3430 if (pmap->pm_stats.wired_count < num_unwired) 3431 panic("pmap_remove_range: wired_count"); 3432 assert(pmap->pm_stats.wired_count >= num_unwired); 3433 OSAddAtomic(-num_unwired, &pmap->pm_stats.wired_count); 3434 pmap_ledger_debit(pmap, task_ledgers.wired_mem, num_unwired * PAGE_SIZE); 3435 3436 return; 3437} 3438 3439/** 3440 * pmap_remove 3441 * 3442 * Remove the given range of addresses from the specified map. 3443 */ 3444void pmap_remove(pmap_t map, vm_offset_t sva, vm_offset_t eva) 3445{ 3446 spl_t spl; 3447 pt_entry_t *tte; 3448 vm_offset_t *spte, *epte, lva = sva; 3449 3450 /* 3451 * Verify the pages are page aligned. 3452 */ 3453 assert(!(sva & PAGE_MASK)); 3454 assert(!(eva & PAGE_MASK)); 3455 3456 /* 3457 * High Priority. Nothing may interrupt the removal process. 3458 */ 3459 PMAP_LOCK(map); 3460 3461 /* 3462 * This is broken. 3463 */ 3464 while (sva < eva) { 3465 lva = (sva + _1MB) & ~((_1MB) - 1); 3466 if (lva > eva) 3467 lva = eva; 3468 tte = (pt_entry_t *)pmap_tte(map, sva); 3469 assert(tte); 3470 if (tte && ((*tte & ARM_PAGE_MASK_VALUE) == ARM_PAGE_PAGE_TABLE)) { 3471 pt_entry_t *spte_begin; 3472 spte_begin = (pt_entry_t *) (phys_to_virt(L1_PTE_ADDR(*tte))); 3473 spte = (vm_offset_t *)((vm_offset_t) spte_begin + (vm_offset_t) pte_offset(sva)); 3474 epte = (vm_offset_t *)((vm_offset_t) spte_begin + (vm_offset_t) pte_offset(lva)); 3475 3476 /* 3477 * If the addresses are more than one 1MB apart, well... 3478 */ 3479 if ((sva >> L1SHIFT) != (lva >> L1SHIFT)) { 3480 int mb_off = (lva >> L1SHIFT) - (sva >> L1SHIFT); 3481 epte = (vm_offset_t *)((vm_offset_t) spte_begin + (0x400 * mb_off) + (vm_offset_t) pte_offset(lva)); 3482 } 3483 3484 assert(epte >= spte); 3485 3486 /* 3487 * Make sure the range isn't bogus. 3488 */ 3489 if (((vm_offset_t) epte - (vm_offset_t) spte) > L2_SIZE) { 3490 panic("pmap_remove: attempting to remove bogus PTE range"); 3491 } 3492 3493 pmap_remove_range(map, sva, spte, epte, FALSE); 3494 } 3495 sva = lva; 3496 } 3497 3498 /* 3499 * Flush TLBs since we modified page table entries. 3500 */ 3501 pmap_flush_tlbs(map, sva, eva); 3502 3503 /* 3504 * Return. 3505 */ 3506 PMAP_UNLOCK(map); 3507 return; 3508} 3509 3510/** 3511 * pmap_create 3512 * 3513 * Create a pmap. 3514 */ 3515pmap_t pmap_create(ledger_t ledger, vm_map_size_t size, __unused boolean_t is_64bit) 3516{ 3517 pmap_t our_pmap; 3518 vm_page_t new_l1; 3519 3520 /* 3521 * Some necessary requisites. 3522 */ 3523 if (!pmap_initialized || size || !kernel_task) 3524 return PMAP_NULL; 3525 3526 /* 3527 * Zalloc a new one. 3528 */ 3529 our_pmap = (pmap_t) zalloc(pmap_zone); 3530 if (!our_pmap) { 3531 panic("pmap_create: allocating the new pmap failed"); 3532 } 3533 our_pmap->pm_refcnt = 1; 3534 our_pmap->ledger = ledger; 3535 our_pmap->pm_asid = 0; 3536 pmap_common_init(our_pmap); 3537 3538#ifdef _NOTYET_ 3539 pmap_asid_alloc_fast(our_pmap); 3540#endif 3541 3542 /* 3543 * Create the pmap VM object. 3544 */ 3545 if (NULL == (our_pmap->pm_obj = vm_object_allocate((vm_object_size_t) (4096 * PAGE_SIZE)))) 3546 panic("pmap_create: pm_obj null"); 3547 3548 if (pmap_asid_ncpus) 3549 pmap_asid_initialize(our_pmap); 3550 3551 /* 3552 * Grab a new page and set the new L1 region. 3553 */ 3554 new_l1 = pmap_grab_page(our_pmap); 3555 our_pmap->pm_l1_phys = new_l1->phys_page << PAGE_SHIFT; 3556 our_pmap->pm_l1_virt = phys_to_virt(new_l1->phys_page << PAGE_SHIFT); 3557 bzero((void *)phys_to_virt(new_l1->phys_page << PAGE_SHIFT), PAGE_SIZE); 3558 3559 /* 3560 * New pmaps have 4096 bytes of TTB area. 3561 */ 3562 our_pmap->pm_l1_size = PAGE_SIZE; 3563 3564 /* 3565 * Done. 3566 */ 3567 return our_pmap; 3568} 3569 3570/** 3571 * pmap_page_protect 3572 * 3573 * Lower the protections on a set of mappings. 3574 */ 3575void pmap_page_protect(ppnum_t pn, vm_prot_t prot) 3576{ 3577 boolean_t remove; 3578 spl_t spl; 3579 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; 3580 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; 3581 pv_hashed_entry_t nexth; 3582 int pvh_cnt = 0; 3583 int pvhash_idx; 3584 pv_rooted_entry_t pv_h; 3585 pv_rooted_entry_t pv_e; 3586 pv_hashed_entry_t pvh_e; 3587 register pmap_t pmap; 3588 pt_entry_t *pte; 3589 3590 /* 3591 * Verify it's not a fictitious page. 3592 */ 3593 assert(pn != vm_page_fictitious_addr); 3594 3595 /* 3596 * Verify said page is managed by us. 3597 */ 3598 assert(pmap_initialized); 3599 if (!pmap_valid_page(pn)) { 3600 return; 3601 } 3602 3603 /* 3604 * Determine the new protection. 3605 */ 3606 switch (prot) { 3607 case VM_PROT_READ: 3608 case VM_PROT_READ | VM_PROT_EXECUTE: 3609 remove = FALSE; 3610 break; 3611 case VM_PROT_ALL: 3612 return; /* nothing to do */ 3613 default: 3614 remove = TRUE; 3615 break; 3616 } 3617 3618 /* 3619 * Walk down the PV listings and remove the entries. 3620 */ 3621 pv_h = pai_to_pvh(pn); 3622 LOCK_PVH(pn); 3623 3624 /* 3625 * Walk down PV list, changing or removing all mappings. 3626 */ 3627 if (pv_h->pmap != PMAP_NULL) { 3628 3629 pv_e = pv_h; 3630 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */ 3631 3632 do { 3633 register vm_map_offset_t vaddr; 3634 pmap = pv_e->pmap; 3635 3636 vaddr = pv_e->va; 3637 pte = (pt_entry_t *)pmap_pte(pmap, vaddr); 3638 3639 if (0 == pte) { 3640 panic("pmap_page_protect(): null PTE pmap=%p pn=0x%x vaddr=0x%08x shadow=0x%08x\n", pmap, pn, vaddr, pv_e->flags); 3641 } 3642 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink); /* if there is one */ 3643 3644 /* 3645 * Remove the mapping if new protection is NONE 3646 * or if write-protecting a kernel mapping. 3647 */ 3648 if (remove || pmap == kernel_pmap) { 3649 /* 3650 * Remove the mapping, collecting any modify bits. 3651 */ 3652 *(pt_entry_t *) pte = 0; 3653 pmap_flush_tlbs(pmap, vaddr, vaddr + PAGE_SIZE); 3654 phys_attribute_clear(pn, PMAP_OSPTE_TYPE_REFERENCED | PMAP_OSPTE_TYPE_MODIFIED); 3655 if (pmap->pm_stats.resident_count < 1) 3656 panic("pmap_page_protect: resident_count"); 3657 assert(pmap->pm_stats.resident_count >= 1); 3658 OSAddAtomic(-1, (SInt32 *) & pmap->pm_stats.resident_count); 3659 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 3660 3661 /* 3662 * Deal with the pv_rooted_entry. 3663 */ 3664 3665 if (pv_e == pv_h) { 3666 /* 3667 * Fix up head later. 3668 */ 3669 pv_h->pmap = PMAP_NULL; 3670 } else { 3671 /* 3672 * Delete this entry. 3673 */ 3674 pv_hash_remove(pvh_e); 3675 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 3676 pvh_eh = pvh_e; 3677 3678 if (pvh_et == PV_HASHED_ENTRY_NULL) 3679 pvh_et = pvh_e; 3680 pvh_cnt++; 3681 } 3682 } else { 3683 /* 3684 * Write-protect. 3685 */ 3686 *(pt_entry_t *) pte |= (L2_ACCESS_APX); 3687 pmap_flush_tlbs(pmap, vaddr, vaddr + PAGE_SIZE); 3688 } 3689 3690 pvh_e = nexth; 3691 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h); 3692 3693 /* 3694 * If pv_head mapping was removed, fix it up. 3695 */ 3696 3697 if (pv_h->pmap == PMAP_NULL) { 3698 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink); 3699 3700 if (pvh_e != (pv_hashed_entry_t) pv_h) { 3701 pv_hash_remove(pvh_e); 3702 pv_h->pmap = pvh_e->pmap; 3703 pv_h->va = pvh_e->va; 3704 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 3705 pvh_eh = pvh_e; 3706 3707 if (pvh_et == PV_HASHED_ENTRY_NULL) 3708 pvh_et = pvh_e; 3709 pvh_cnt++; 3710 } 3711 } 3712 } 3713 if (pvh_eh != PV_HASHED_ENTRY_NULL) { 3714 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt); 3715 } 3716 UNLOCK_PVH(pn); 3717} 3718 3719/** 3720 * pmap_deallocate_l1 3721 * 3722 * Deallocate the allocated L1 translation table. 3723 */ 3724void pmap_deallocate_l1(pmap_t pmap) 3725{ 3726 uint32_t ttb_base = pmap->pm_l1_phys; 3727 vm_page_t m; 3728 3729 /* 3730 * If the pmap is expanded past 0x1000, we must use cpm_deallocate. 3731 */ 3732 if (pmap->pm_l1_size > 0x1000) { 3733 /* 3734 * xxx todo 3735 */ 3736 return; 3737 } 3738 3739 /* 3740 * Lock the VM object. 3741 */ 3742 vm_object_lock(pmap->pm_obj); 3743 3744 /* 3745 * Look up the page. 3746 */ 3747 m = vm_page_lookup(pmap->pm_obj, (vm_object_offset_t) ((ttb_base >> PAGE_SHIFT) - (gPhysBase >> PAGE_SHIFT))); 3748 assert(m); 3749 3750 /* 3751 * Got it, now free it. 3752 */ 3753 VM_PAGE_FREE(m); 3754 3755 /* 3756 * Done. 3757 */ 3758 vm_object_unlock(pmap->pm_obj); 3759 3760 /* 3761 * Remove one. 3762 */ 3763 OSAddAtomic(-1, &inuse_ptepages_count); 3764 3765 /* 3766 * Invalidation of the entire pmap should be done. 3767 */ 3768 return; 3769} 3770 3771/** 3772 * pmap_destroy 3773 * 3774 * Destroy the current physical map. 3775 */ 3776void pmap_destroy(pmap_t pmap) 3777{ 3778 spl_t spl; 3779 int refcnt, i; 3780 3781 /* 3782 * Some necessary prerequisites. 3783 */ 3784 assert(pmap_initialized); 3785 3786 /* 3787 * NEVER EVER EVER DESTROY THE KERNEL PMAP 3788 */ 3789 if (pmap == kernel_pmap) 3790 panic("pmap_destroy: attempting to destroy kernel_pmap"); 3791 3792 PMAP_LOCK(pmap); 3793 3794 /* 3795 * Okay, decrease the reference count. 3796 */ 3797 refcnt = --pmap->pm_refcnt; 3798 if (refcnt == 0) { 3799 pmap_flush_tlbs(pmap, 0, 0xFFFFFFFF); 3800 if (pmap_asid_ncpus) 3801 pmap_destroy_asid_sync(pmap); 3802 } 3803 3804 /* 3805 * Unlock the pmap system. 3806 */ 3807 PMAP_UNLOCK(pmap); 3808 3809 /* 3810 * If the pmap still has a reference count, we don't kill it. 3811 */ 3812 if (refcnt != 0) { 3813 return; 3814 } 3815 3816 /* 3817 * Free the associated objects with the pmap first. 3818 */ 3819 pmap_deallocate_l1(pmap); 3820 ledger_dereference(pmap->ledger); 3821 3822 /* 3823 * Free the 'expanded' pages. 3824 */ 3825 OSAddAtomic(-pmap->pm_obj->resident_page_count, &inuse_ptepages_count); 3826 PMAP_ZINFO_PFREE(pmap, pmap->pm_obj->resident_page_count * PAGE_SIZE); 3827 vm_object_deallocate(pmap->pm_obj); 3828 3829 /* 3830 * Free the actual pmap. 3831 */ 3832 zfree(pmap_zone, pmap); 3833 3834 /* 3835 * Done. 3836 */ 3837 return; 3838} 3839 3840/** 3841 * pmap_protect 3842 *t 3843 * Lower the specified protections on a certain map from sva to eva using prot prot. 3844 */ 3845void pmap_protect(pmap_t map, vm_map_offset_t sva, vm_map_offset_t eva, vm_prot_t prot) 3846{ 3847 register pt_entry_t *tte; 3848 register pt_entry_t *spte, *epte; 3849 vm_map_offset_t lva; 3850 vm_map_offset_t orig_sva; 3851 boolean_t set_NX; 3852 int num_found = 0; 3853 3854 /* 3855 * Verify the start and end are page aligned. 3856 */ 3857 assert(!(sva & PAGE_MASK)); 3858 assert(!(eva & PAGE_MASK)); 3859 3860 /* 3861 * Remove PTEs if they're set to VM_PROT_NONE. 3862 */ 3863 if (map == PMAP_NULL) 3864 return; 3865 3866 if (prot == VM_PROT_NONE) { 3867 pmap_remove(map, sva, eva); 3868 return; 3869 } 3870 3871 /* 3872 * Enforce NX if necessary. 3873 */ 3874 if ((prot & VM_PROT_EXECUTE) || !nx_enabled) 3875 set_NX = FALSE; 3876 else 3877 set_NX = TRUE; 3878 3879 /* 3880 * Lock the pmap and set the protections on the PTEs. 3881 */ 3882 PMAP_LOCK(map); 3883 3884 /* 3885 * This is broken. 3886 */ 3887 orig_sva = sva; 3888 while (sva < eva) { 3889 lva = (sva + _1MB) & ~((_1MB) - 1); 3890 if (lva > eva) 3891 lva = eva; 3892 tte = (pt_entry_t *)pmap_tte(map, sva); 3893 assert(tte); 3894 if (tte && ((*tte & ARM_PAGE_MASK_VALUE) == ARM_PAGE_PAGE_TABLE)) { 3895 pt_entry_t *spte_begin; 3896 spte_begin = (pt_entry_t *) (phys_to_virt(L1_PTE_ADDR(*tte))); 3897 spte = (pt_entry_t *)((vm_offset_t) spte_begin + (vm_offset_t) pte_offset(sva)); 3898 epte = (pt_entry_t *)((vm_offset_t) spte_begin + (vm_offset_t) pte_offset(lva)); 3899 3900 /* 3901 * If the addresses are more than one 1MB apart, well... 3902 */ 3903 if ((sva >> L1SHIFT) != (lva >> L1SHIFT)) { 3904 int mb_off = (lva >> L1SHIFT) - (sva >> L1SHIFT); 3905 epte = (pt_entry_t *)((vm_offset_t) spte_begin + (0x400 * mb_off) + (vm_offset_t) pte_offset(lva)); 3906 } 3907 3908 assert(epte >= spte); 3909 3910 /* 3911 * Make sure the range isn't bogus. 3912 */ 3913 if (((vm_offset_t) epte - (vm_offset_t) spte) > L2_SIZE) 3914 panic("pmap_protect: attempting to protect bogus PTE range");; 3915 3916 while (spte < epte) { 3917 if (*spte & ARM_PTE_DESCRIPTOR_4K) { 3918 assert(*spte & ARM_PTE_DESCRIPTOR_4K); 3919 3920 /* 3921 * Make the PTE RO if necessary. 3922 */ 3923 if (prot & VM_PROT_WRITE) 3924 *spte &= ~(L2_ACCESS_APX); 3925 else 3926 *spte |= L2_ACCESS_APX; 3927 3928 /* 3929 * Enforce NX bit. 3930 */ 3931 if (set_NX) 3932 *spte |= L2_NX_BIT; 3933 else 3934 *spte &= ~(L2_NX_BIT); 3935 num_found++; 3936 } 3937 spte++; 3938 } 3939 } 3940 sva = lva; 3941 } 3942 3943 /* 3944 * We're done with that, bye. 3945 */ 3946 pmap_flush_tlbs(map, sva, eva); 3947 PMAP_UNLOCK(map); 3948 3949 return; 3950} 3951 3952/** 3953 * pmap_nest 3954 * 3955 * Nest a pmap with new mappings into a master pmap. 3956 */ 3957kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) 3958{ 3959 int copied; 3960 unsigned int i; 3961 vm_offset_t *tte, *ntte; 3962 vm_map_offset_t nvaddr, vaddr; 3963 3964 /* 3965 * Anounce ourselves. We are nesting one pmap inside another. 3966 */ 3967 kprintf("pmap_nest: %p[0x%08llx] => %p[0x%08llx], %d tte entries\n", subord, va_start, grand, nstart, size >> L1SHIFT); 3968 3969 /* 3970 * Sanity checks. 3971 */ 3972 if (size == 0) { 3973 panic("pmap_nest: size is invalid - %016llX\n", size); 3974 } 3975 3976 if (va_start != nstart) 3977 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart); 3978 3979 /* 3980 * Start the copy operations. 3981 */ 3982 PMAP_LOCK(subord); 3983 3984 /* 3985 * Mark the surbodinate pmap as shared. 3986 */ 3987 uint32_t num_sect = size >> L1SHIFT; 3988 subord->pm_shared = TRUE; 3989 nvaddr = (vm_map_offset_t) nstart; 3990 3991 /* 3992 * Expand the subordinate pmap to fit. 3993 */ 3994 for (i = 0; i < num_sect; i++) { 3995 /* 3996 * Fetch the TTE and expand the pmap if there is not one. 3997 */ 3998 ntte = (vm_offset_t *)pmap_tte(subord, nvaddr); 3999 4000 while (ntte == 0 || ((*ntte & ARM_PAGE_MASK_VALUE) != ARM_PAGE_PAGE_TABLE)) { 4001 PMAP_UNLOCK(subord); 4002 pmap_expand(subord, nvaddr); 4003 PMAP_LOCK(subord); 4004 ntte = (vm_offset_t *)pmap_tte(subord, nvaddr); 4005 } 4006 4007 /* 4008 * Increase virtual address by granularity of one TTE entry. 4009 */ 4010 nvaddr += (_1MB); 4011 } 4012 PMAP_UNLOCK(subord); 4013 4014 /* 4015 * Initial expansion of the Subordinate pmap is done, copy the new entries to the 4016 * master Grand pmap. 4017 */ 4018 PMAP_LOCK(grand); 4019 vaddr = (vm_map_offset_t) va_start; 4020 for (i = 0; i < num_sect; i++) { 4021 pt_entry_t target; 4022 4023 /* 4024 * Get the initial TTE from the subordinate map and verify it. 4025 */ 4026 ntte = (vm_offset_t *)pmap_tte(subord, vaddr); 4027 if (ntte == 0) 4028 panic("pmap_nest: no ntte, subord %p nstart 0x%llx", subord, nstart); 4029 target = *ntte; 4030 4031 nstart += (_1MB); 4032 4033 /* 4034 * Now, get the TTE address from the Grand map. 4035 */ 4036 tte = (vm_offset_t *)pmap_tte(grand, vaddr); 4037 if (tte == 0) 4038 panic("pmap_nest: no tte, grand %p vaddr 0x%x", grand, vaddr); 4039 4040 /* 4041 * Store the TTE. 4042 */ 4043 *tte = target; 4044 vaddr += (_1MB); 4045 } 4046 PMAP_UNLOCK(grand); 4047 4048 /* 4049 * Out. Flush all TLBs. 4050 */ 4051 pmap_flush_tlbs(grand, va_start, va_start + size); 4052 4053 return KERN_SUCCESS; 4054} 4055 4056/** 4057 * pmap_unnest 4058 * 4059 * Remove a nested pmap. 4060 */ 4061kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) 4062{ 4063 vm_offset_t *tte; 4064 unsigned int i, num_sect; 4065 addr64_t vstart, vend; 4066 spl_t spl; 4067 4068 /* 4069 * Verify the sizes aren't unaligned. 4070 */ 4071 if ((size & (pmap_nesting_size_min - 1)) || (vaddr & (pmap_nesting_size_min - 1))) { 4072 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned addresses\n", grand, vaddr, size); 4073 } 4074 4075 /* 4076 * Align everything to a 1MB boundary. (TTE granularity) 4077 */ 4078 vstart = vaddr & ~((_1MB) - 1); 4079 vend = (vaddr + size + (_1MB) - 1) & ~((_1MB) - 1); 4080 size = (vend - vstart); 4081 4082 /* 4083 * Lock the pmaps to prevent use. 4084 */ 4085 PMAP_LOCK(grand); 4086 4087 num_sect = size >> L1SHIFT; 4088 vaddr = vstart; 4089 for (i = 0; i < num_sect; i++) { 4090 tte = (vm_offset_t *)pmap_tte(grand, (vm_map_offset_t) vaddr); 4091 if (tte == 0) 4092 panic("pmap_unnest: no tte, grand %p vaddr 0x%llx\n", grand, vaddr); 4093 *tte = 0; 4094 vaddr += (_1MB); 4095 } 4096 4097 /* 4098 * The operation has now completed. 4099 */ 4100 pmap_flush_tlbs(grand, vaddr, vaddr + size); 4101 4102 PMAP_UNLOCK(grand); 4103 4104 return KERN_SUCCESS; 4105} 4106 4107/** 4108 * pmap_disconnect 4109 * 4110 * Remove a page and return the referenced bits. 4111 */ 4112unsigned int pmap_disconnect(ppnum_t pa) 4113{ 4114 /* 4115 * Disconnect the page. 4116 */ 4117 pmap_page_protect(pa, 0); 4118 return pmap_get_refmod(pa); 4119} 4120 4121/* 4122 * kern_return_t 4123 * pmap_add_physical_memory(vm_offset_t spa, vm_offset_t epa, 4124 * boolean_t available, unsigned int attr) 4125 * 4126 * THIS IS NOT SUPPORTED 4127 */ 4128kern_return_t pmap_add_physical_memory(__unused vm_offset_t spa, __unused vm_offset_t epa, __unused boolean_t available, __unused unsigned int attr) 4129{ 4130 panic("Forget it! You can't map no more memory, you greedy puke!\n"); 4131 return KERN_SUCCESS; 4132} 4133 4134/** 4135 * pmap_zero_part_page 4136 * 4137 * Zeroes the specified (machine independent) pages. 4138 */ 4139void pmap_zero_part_page(ppnum_t src, vm_offset_t src_offset, vm_offset_t len) 4140{ 4141 assert(src != vm_page_fictitious_addr); 4142 assert((((src << PAGE_SHIFT) & PAGE_MASK) + src_offset + len) <= PAGE_SIZE); 4143 bzero((void *)(phys_to_virt(src << PAGE_SHIFT) + src_offset), len); 4144} 4145 4146/** 4147 * pmap_copy_part_lpage 4148 * 4149 * Copy part of a virtually addressed page 4150 * to a physically addressed page. 4151 */ 4152void pmap_copy_part_lpage(vm_offset_t src, vm_offset_t dst, vm_offset_t dst_offset, vm_size_t len) 4153{ 4154 panic("pmap_copy_part_lpage"); 4155} 4156 4157/** 4158 * pmap_copy_part_rpage 4159 * 4160 * Copy part of a physically addressed page 4161 * to a virtually addressed page. 4162 */ 4163void pmap_copy_part_rpage(vm_offset_t src, vm_offset_t src_offset, vm_offset_t dst, vm_size_t len) 4164{ 4165 panic("pmap_copy_part_rpage"); 4166} 4167 4168/** 4169 * pmap_copy 4170 * 4171 * Unused. 4172 */ 4173void pmap_copy(pmap_t dst, pmap_t src, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) 4174{ 4175 return; 4176} 4177 4178/** 4179 * coredumpok 4180 * 4181 * Unused. 4182 */ 4183boolean_t coredumpok(__unused vm_map_t map, __unused vm_offset_t va) 4184{ 4185 return TRUE; 4186} 4187 4188/* 4189 * These functions are used for bookkeeping. 4190 */ 4191void pt_fake_zone_init(int zone_index) 4192{ 4193 pt_fake_zone_index = zone_index; 4194} 4195 4196void pt_fake_zone_info(int *count, vm_size_t * cur_size, vm_size_t * max_size, vm_size_t * elem_size, vm_size_t * alloc_size, uint64_t * sum_size, int *collectable, int *exhaustable, int *caller_acct) 4197{ 4198 *count = inuse_ptepages_count; 4199 *cur_size = PAGE_SIZE * inuse_ptepages_count; 4200 *max_size = PAGE_SIZE * (inuse_ptepages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count); 4201 *elem_size = PAGE_SIZE; 4202 *alloc_size = PAGE_SIZE; 4203 *sum_size = alloc_ptepages_count * PAGE_SIZE; 4204 4205 *collectable = 1; 4206 *exhaustable = 0; 4207 *caller_acct = 1; 4208} 4209