1/* 2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <i386/proc_reg.h> 30#include <i386/cpuid.h> 31#include <i386/tsc.h> 32#include <vm/pmap.h> 33#include <vm/vm_map.h> 34#include <i386/pmap_internal.h> 35#include <i386/pmap_pcid.h> 36#include <mach/branch_predicates.h> 37 38/* 39 * PCID (Process context identifier) aka tagged TLB support. 40 * On processors with this feature, unless disabled via the -pmap_pcid_disable 41 * boot-arg, the following algorithm is in effect: 42 * Each processor maintains an array of tag refcounts indexed by tag. 43 * Each address space maintains an array of tags indexed by CPU number. 44 * Each address space maintains a coherency vector, indexed by CPU 45 * indicating that the TLB state for that address space has a pending 46 * invalidation. 47 * On a context switch, a refcounted tag is lazily assigned to the newly 48 * dispatched (CPU, address space) tuple. 49 * When an inactive address space is invalidated on a remote CPU, it is marked 50 * for invalidation upon the next dispatch. Some invalidations are 51 * also processed at the user/kernel boundary. 52 * Provisions are made for the case where a CPU is overcommmitted, i.e. 53 * more active address spaces exist than the number of logical tags 54 * provided for by the processor architecture (currently 4096). 55 * The algorithm assumes the processor remaps the logical tags 56 * to physical TLB context IDs in an LRU fashion for efficiency. (DRK '10) 57 */ 58 59uint32_t pmap_pcid_ncpus; 60boolean_t pmap_pcid_disabled = FALSE; 61 62void pmap_pcid_configure(void) { 63 int ccpu = cpu_number(); 64 uintptr_t cr4 = get_cr4(); 65 boolean_t pcid_present = FALSE; 66 67 pmap_pcid_log("PCID configure invoked on CPU %d\n", ccpu); 68 pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0); 69 pmap_assert(cpu_mode_is64bit()); 70 71 if (PE_parse_boot_argn("-pmap_pcid_disable", &pmap_pcid_disabled, sizeof (pmap_pcid_disabled))) { 72 pmap_pcid_log("PMAP: PCID feature disabled\n"); 73 printf("PMAP: PCID feature disabled, %u\n", pmap_pcid_disabled); 74 kprintf("PMAP: PCID feature disabled %u\n", pmap_pcid_disabled); 75 } 76 /* no_shared_cr3+PCID is currently unsupported */ 77#if DEBUG 78 if (pmap_pcid_disabled == FALSE) 79 no_shared_cr3 = FALSE; 80 else 81 no_shared_cr3 = TRUE; 82#else 83 if (no_shared_cr3) 84 pmap_pcid_disabled = TRUE; 85#endif 86 if (pmap_pcid_disabled || no_shared_cr3) { 87 unsigned i; 88 /* Reset PCID status, as we may have picked up 89 * strays if discovered prior to platform 90 * expert initialization. 91 */ 92 for (i = 0; i < real_ncpus; i++) { 93 if (cpu_datap(i)) { 94 cpu_datap(i)->cpu_pmap_pcid_enabled = FALSE; 95 } 96 pmap_pcid_ncpus = 0; 97 } 98 cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE; 99 return; 100 } 101 /* DRKTODO: assert if features haven't been discovered yet. Redundant 102 * invocation of cpu_mode_init and descendants masks this for now. 103 */ 104 if ((cpuid_features() & CPUID_FEATURE_PCID)) 105 pcid_present = TRUE; 106 else { 107 cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE; 108 pmap_pcid_log("PMAP: PCID not detected CPU %d\n", ccpu); 109 return; 110 } 111 if ((cr4 & (CR4_PCIDE | CR4_PGE)) == (CR4_PCIDE|CR4_PGE)) { 112 cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE; 113 pmap_pcid_log("PMAP: PCID already enabled %d\n", ccpu); 114 return; 115 } 116 if (pcid_present == TRUE) { 117 pmap_pcid_log("Pre-PCID:CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, cr4); 118 119 if (cpu_number() >= PMAP_PCID_MAX_CPUS) { 120 panic("PMAP_PCID_MAX_CPUS %d\n", cpu_number()); 121 } 122 if ((get_cr4() & CR4_PGE) == 0) { 123 set_cr4(get_cr4() | CR4_PGE); 124 pmap_pcid_log("Toggled PGE ON (CPU: %d\n", ccpu); 125 } 126 set_cr4(get_cr4() | CR4_PCIDE); 127 pmap_pcid_log("Post PCID: CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, get_cr4()); 128 tlb_flush_global(); 129 cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE; 130 131 if (OSIncrementAtomic(&pmap_pcid_ncpus) == machine_info.max_cpus) { 132 pmap_pcid_log("All PCIDs enabled: real_ncpus: %d, pmap_pcid_ncpus: %d\n", real_ncpus, pmap_pcid_ncpus); 133 } 134 cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = 135 cpu_datap(ccpu)->cpu_pmap_pcid_coherentp_kernel = 136 &(kernel_pmap->pmap_pcid_coherency_vector[ccpu]); 137 cpu_datap(ccpu)->cpu_pcid_refcounts[0] = 1; 138 } 139} 140 141void pmap_pcid_initialize(pmap_t p) { 142 unsigned i; 143 unsigned nc = sizeof(p->pmap_pcid_cpus)/sizeof(pcid_t); 144 145 pmap_assert(nc >= real_ncpus); 146 for (i = 0; i < nc; i++) { 147 p->pmap_pcid_cpus[i] = PMAP_PCID_INVALID_PCID; 148 /* We assume here that the coherency vector is zeroed by 149 * pmap_create 150 */ 151 } 152} 153 154void pmap_pcid_initialize_kernel(pmap_t p) { 155 unsigned i; 156 unsigned nc = sizeof(p->pmap_pcid_cpus)/sizeof(pcid_t); 157 158 for (i = 0; i < nc; i++) { 159 p->pmap_pcid_cpus[i] = 0; 160 /* We assume here that the coherency vector is zeroed by 161 * pmap_create 162 */ 163 } 164} 165 166pcid_t pmap_pcid_allocate_pcid(int ccpu) { 167 int i; 168 pcid_ref_t cur_min = 0xFF; 169 uint32_t cur_min_index = ~1; 170 pcid_ref_t *cpu_pcid_refcounts = &cpu_datap(ccpu)->cpu_pcid_refcounts[0]; 171 pcid_ref_t old_count; 172 173 if ((i = cpu_datap(ccpu)->cpu_pcid_free_hint) != 0) { 174 if (cpu_pcid_refcounts[i] == 0) { 175 (void)__sync_fetch_and_add(&cpu_pcid_refcounts[i], 1); 176 cpu_datap(ccpu)->cpu_pcid_free_hint = 0; 177 return i; 178 } 179 } 180 /* Linear scan to discover free slot, with hint. Room for optimization 181 * but with intelligent prefetchers this should be 182 * adequately performant, as it is invoked 183 * only on first dispatch of a new address space onto 184 * a given processor. DRKTODO: use larger loads and 185 * zero byte discovery -- any pattern != ~1 should 186 * signify a free slot. 187 */ 188 for (i = PMAP_PCID_MIN_PCID; i < PMAP_PCID_MAX_PCID; i++) { 189 pcid_ref_t cur_refcount = cpu_pcid_refcounts[i]; 190 191 pmap_assert(cur_refcount < PMAP_PCID_MAX_REFCOUNT); 192 193 if (cur_refcount == 0) { 194 (void)__sync_fetch_and_add(&cpu_pcid_refcounts[i], 1); 195 return i; 196 } 197 else { 198 if (cur_refcount < cur_min) { 199 cur_min_index = i; 200 cur_min = cur_refcount; 201 } 202 } 203 } 204 pmap_assert(cur_min_index > 0 && cur_min_index < PMAP_PCID_MAX_PCID); 205 /* Consider "rebalancing" tags actively in highly oversubscribed cases 206 * perhaps selecting tags with lower activity. 207 */ 208 209 old_count = __sync_fetch_and_add(&cpu_pcid_refcounts[cur_min_index], 1); 210 pmap_assert(old_count < PMAP_PCID_MAX_REFCOUNT); 211 return cur_min_index; 212} 213 214void pmap_pcid_deallocate_pcid(int ccpu, pmap_t tpmap) { 215 pcid_t pcid; 216 pmap_t lp; 217 pcid_ref_t prior_count; 218 219 pcid = tpmap->pmap_pcid_cpus[ccpu]; 220 pmap_assert(pcid != PMAP_PCID_INVALID_PCID); 221 if (pcid == PMAP_PCID_INVALID_PCID) 222 return; 223 224 lp = cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[pcid]; 225 pmap_assert(pcid > 0 && pcid < PMAP_PCID_MAX_PCID); 226 pmap_assert(cpu_datap(ccpu)->cpu_pcid_refcounts[pcid] >= 1); 227 228 if (lp == tpmap) 229 (void)__sync_bool_compare_and_swap(&cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[pcid], tpmap, PMAP_INVALID); 230 231 if ((prior_count = __sync_fetch_and_sub(&cpu_datap(ccpu)->cpu_pcid_refcounts[pcid], 1)) == 1) { 232 cpu_datap(ccpu)->cpu_pcid_free_hint = pcid; 233 } 234 pmap_assert(prior_count <= PMAP_PCID_MAX_REFCOUNT); 235} 236 237void pmap_destroy_pcid_sync(pmap_t p) { 238 int i; 239 pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0); 240 for (i = 0; i < PMAP_PCID_MAX_CPUS; i++) 241 if (p->pmap_pcid_cpus[i] != PMAP_PCID_INVALID_PCID) 242 pmap_pcid_deallocate_pcid(i, p); 243} 244 245pcid_t pcid_for_pmap_cpu_tuple(pmap_t pmap, int ccpu) { 246 return pmap->pmap_pcid_cpus[ccpu]; 247} 248#if PMAP_ASSERT 249#define PCID_RECORD_SIZE 128 250uint64_t pcid_record_array[PCID_RECORD_SIZE]; 251#endif 252 253void pmap_pcid_activate(pmap_t tpmap, int ccpu) { 254 pcid_t new_pcid = tpmap->pmap_pcid_cpus[ccpu]; 255 pmap_t last_pmap; 256 boolean_t pcid_conflict = FALSE, pending_flush = FALSE; 257 258 pmap_assert(cpu_datap(ccpu)->cpu_pmap_pcid_enabled); 259 if (__improbable(new_pcid == PMAP_PCID_INVALID_PCID)) { 260 new_pcid = tpmap->pmap_pcid_cpus[ccpu] = pmap_pcid_allocate_pcid(ccpu); 261 } 262 pmap_assert(new_pcid != PMAP_PCID_INVALID_PCID); 263#ifdef PCID_ASSERT 264 cpu_datap(ccpu)->cpu_last_pcid = cpu_datap(ccpu)->cpu_active_pcid; 265#endif 266 cpu_datap(ccpu)->cpu_active_pcid = new_pcid; 267 268 pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0); 269 if (__probable(pending_flush == FALSE)) { 270 last_pmap = cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[new_pcid]; 271 pcid_conflict = ((last_pmap != NULL) &&(tpmap != last_pmap)); 272 } 273 if (__improbable(pending_flush || pcid_conflict)) { 274 pmap_pcid_validate_cpu(tpmap, ccpu); 275 } 276 /* Consider making this a unique id */ 277 cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[new_pcid] = tpmap; 278 279 pmap_assert(new_pcid < PMAP_PCID_MAX_PCID); 280 pmap_assert(((tpmap == kernel_pmap) && new_pcid == 0) || ((new_pcid != PMAP_PCID_INVALID_PCID) && (new_pcid != 0))); 281#if PMAP_ASSERT 282 pcid_record_array[ccpu % PCID_RECORD_SIZE] = tpmap->pm_cr3 | new_pcid | (((uint64_t)(!(pending_flush || pcid_conflict))) <<63); 283 pml4_entry_t *pml4 = pmap64_pml4(tpmap, 0ULL); 284 /* Diagnostic to detect pagetable anchor corruption */ 285 if (pml4[KERNEL_PML4_INDEX] != kernel_pmap->pm_pml4[KERNEL_PML4_INDEX]) 286 __asm__ volatile("int3"); 287#endif /* PMAP_ASSERT */ 288 set_cr3_composed(tpmap->pm_cr3, new_pcid, !(pending_flush || pcid_conflict)); 289 290 if (!pending_flush) { 291 /* We did not previously observe a pending invalidation for this 292 * ASID. However, the load from the coherency vector 293 * could've been reordered ahead of the store to the 294 * active_cr3 field (in the context switch path, our 295 * caller). Re-consult the pending invalidation vector 296 * after the CR3 write. We rely on MOV CR3's documented 297 * serializing property to avoid insertion of an expensive 298 * barrier. (DRK) 299 */ 300 pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0); 301 if (__improbable(pending_flush != 0)) { 302 pmap_pcid_validate_cpu(tpmap, ccpu); 303 set_cr3_composed(tpmap->pm_cr3, new_pcid, FALSE); 304 } 305 } 306 cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = &(tpmap->pmap_pcid_coherency_vector[ccpu]); 307#if DEBUG 308 KERNEL_DEBUG_CONSTANT(0x9c1d0000, tpmap, new_pcid, pending_flush, pcid_conflict, 0); 309#endif 310} 311