1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#pragma ident "%Z%%M% %I% %E% SMI" 27 28#include <sys/types.h> 29#include <sys/systm.h> 30#include <sys/archsystm.h> 31#include <sys/machsystm.h> 32#include <sys/t_lock.h> 33#include <sys/vmem.h> 34#include <sys/mman.h> 35#include <sys/vm.h> 36#include <sys/cpu.h> 37#include <sys/cmn_err.h> 38#include <sys/cpuvar.h> 39#include <sys/atomic.h> 40#include <vm/as.h> 41#include <vm/hat.h> 42#include <vm/as.h> 43#include <vm/page.h> 44#include <vm/seg.h> 45#include <vm/seg_kmem.h> 46#include <vm/seg_kpm.h> 47#include <vm/hat_sfmmu.h> 48#include <sys/debug.h> 49#include <sys/cpu_module.h> 50 51/* 52 * A quick way to generate a cache consistent address to map in a page. 53 * users: ppcopy, pagezero, /proc, dev/mem 54 * 55 * The ppmapin/ppmapout routines provide a quick way of generating a cache 56 * consistent address by reserving a given amount of kernel address space. 57 * The base is PPMAPBASE and its size is PPMAPSIZE. This memory is divided 58 * into x number of sets, where x is the number of colors for the virtual 59 * cache. The number of colors is how many times a page can be mapped 60 * simulatenously in the cache. For direct map caches this translates to 61 * the number of pages in the cache. 62 * Each set will be assigned a group of virtual pages from the reserved memory 63 * depending on its virtual color. 64 * When trying to assign a virtual address we will find out the color for the 65 * physical page in question (if applicable). Then we will try to find an 66 * available virtual page from the set of the appropiate color. 67 */ 68 69int pp_slots = 4; /* small default, tuned by cpu module */ 70 71/* tuned by cpu module, default is "safe" */ 72int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE; 73 74static caddr_t ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE]; 75static int nsets; /* number of sets */ 76static int ppmap_shift; /* set selector */ 77 78#ifdef PPDEBUG 79#define MAXCOLORS 16 /* for debug only */ 80static int ppalloc_noslot = 0; /* # of allocations from kernelmap */ 81static int align_hits; 82static int pp_allocs; /* # of ppmapin requests */ 83#endif /* PPDEBUG */ 84 85/* 86 * There are only 64 TLB entries on spitfire, 16 on cheetah 87 * (fully-associative TLB) so we allow the cpu module to tune the 88 * number to use here via pp_slots. 89 */ 90static struct ppmap_va { 91 caddr_t ppmap_slots[MAXPP_SLOTS]; 92} ppmap_va[NCPU]; 93 94/* prevent compilation with VAC defined */ 95#ifdef VAC 96#error "sun4v ppmapin and ppmapout do not support VAC" 97#endif 98 99void 100ppmapinit(void) 101{ 102 int nset; 103 caddr_t va; 104 105 ASSERT(pp_slots <= MAXPP_SLOTS); 106 107 va = (caddr_t)PPMAPBASE; 108 109 /* 110 * sun4v does not have a virtual indexed cache and simply 111 * has only one set containing all pages. 112 */ 113 nsets = mmu_btop(PPMAPSIZE); 114 ppmap_shift = MMU_PAGESHIFT; 115 116 for (nset = 0; nset < nsets; nset++) { 117 ppmap_vaddrs[nset] = 118 (caddr_t)((uintptr_t)va + (nset * MMU_PAGESIZE)); 119 } 120} 121 122/* 123 * Allocate a cache consistent virtual address to map a page, pp, 124 * with protection, vprot; and map it in the MMU, using the most 125 * efficient means possible. The argument avoid is a virtual address 126 * hint which when masked yields an offset into a virtual cache 127 * that should be avoided when allocating an address to map in a 128 * page. An avoid arg of -1 means you don't care, for instance pagezero. 129 * 130 * machine dependent, depends on virtual address space layout, 131 * understands that all kernel addresses have bit 31 set. 132 * 133 * NOTE: For sun4 platforms the meaning of the hint argument is opposite from 134 * that found in other architectures. In other architectures the hint 135 * (called avoid) was used to ask ppmapin to NOT use the specified cache color. 136 * This was used to avoid virtual cache trashing in the bcopy. Unfortunately 137 * in the case of a COW, this later on caused a cache aliasing conflict. In 138 * sun4, the bcopy routine uses the block ld/st instructions so we don't have 139 * to worry about virtual cache trashing. Actually, by using the hint to choose 140 * the right color we can almost guarantee a cache conflict will not occur. 141 */ 142 143/*ARGSUSED2*/ 144caddr_t 145ppmapin(page_t *pp, uint_t vprot, caddr_t hint) 146{ 147 int nset; 148 caddr_t va; 149 150#ifdef PPDEBUG 151 pp_allocs++; 152#endif /* PPDEBUG */ 153 154 /* 155 * For sun4v caches are physical caches, we can pick any address 156 * we want. 157 */ 158 for (nset = 0; nset < nsets; nset++) { 159 va = ppmap_vaddrs[nset]; 160 if (va != NULL) { 161#ifdef PPDEBUG 162 align_hits++; 163#endif /* PPDEBUG */ 164 if (casptr(&ppmap_vaddrs[nset], va, NULL) == va) { 165 hat_memload(kas.a_hat, va, pp, 166 vprot | HAT_NOSYNC, 167 HAT_LOAD_LOCK); 168 return (va); 169 } 170 } 171 } 172 173#ifdef PPDEBUG 174 ppalloc_noslot++; 175#endif /* PPDEBUG */ 176 177 /* 178 * No free slots; get a random one from the kernel heap area. 179 */ 180 va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 181 182 hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK); 183 184 return (va); 185 186} 187 188void 189ppmapout(caddr_t va) 190{ 191 int nset; 192 193 if (va >= kernelheap && va < ekernelheap) { 194 /* 195 * Space came from kernelmap, flush the page and 196 * return the space. 197 */ 198 hat_unload(kas.a_hat, va, PAGESIZE, 199 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK)); 200 vmem_free(heap_arena, va, PAGESIZE); 201 } else { 202 /* 203 * Space came from ppmap_vaddrs[], give it back. 204 */ 205 nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1); 206 hat_unload(kas.a_hat, va, PAGESIZE, 207 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK)); 208 209 ASSERT(ppmap_vaddrs[nset] == NULL); 210 ppmap_vaddrs[nset] = va; 211 } 212} 213 214#ifdef DEBUG 215#define PP_STAT_ADD(stat) (stat)++ 216uint_t pload, ploadfail; 217uint_t ppzero, ppzero_short; 218#else 219#define PP_STAT_ADD(stat) 220#endif /* DEBUG */ 221 222static void 223pp_unload_tlb(caddr_t *pslot, caddr_t va) 224{ 225 ASSERT(*pslot == va); 226 227 vtag_flushpage(va, (uint64_t)ksfmmup); 228 *pslot = NULL; /* release the slot */ 229} 230 231/* 232 * Routine to copy kernel pages during relocation. It will copy one 233 * PAGESIZE page to another PAGESIZE page. This function may be called 234 * above LOCK_LEVEL so it should not grab any locks. 235 */ 236void 237ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp) 238{ 239 uint64_t fm_pa, to_pa; 240 size_t nbytes; 241 242 fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT; 243 to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT; 244 245 nbytes = MMU_PAGESIZE; 246 247 for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32) 248 hw_pa_bcopy32(fm_pa, to_pa); 249} 250 251/* 252 * Copy the data from the physical page represented by "frompp" to 253 * that represented by "topp". 254 * 255 * Try to use per cpu mapping first, if that fails then call pp_mapin 256 * to load it. 257 * Returns one on success or zero on some sort of fault while doing the copy. 258 */ 259int 260ppcopy(page_t *fm_pp, page_t *to_pp) 261{ 262 caddr_t fm_va = NULL; 263 caddr_t to_va; 264 boolean_t fast; 265 label_t ljb; 266 int ret = 1; 267 268 ASSERT(PAGE_LOCKED(fm_pp)); 269 ASSERT(PAGE_LOCKED(to_pp)); 270 271 /* 272 * Try to map using KPM if enabled. If it fails, fall 273 * back to ppmapin/ppmapout. 274 */ 275 if ((kpm_enable == 0) || 276 (fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL || 277 (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) { 278 if (fm_va != NULL) 279 hat_kpm_mapout(fm_pp, NULL, fm_va); 280 fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1); 281 to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va); 282 fast = B_FALSE; 283 } else 284 fast = B_TRUE; 285 286 if (on_fault(&ljb)) { 287 ret = 0; 288 goto faulted; 289 } 290 bcopy(fm_va, to_va, PAGESIZE); 291 no_fault(); 292faulted: 293 294 /* Unmap */ 295 if (fast) { 296 hat_kpm_mapout(fm_pp, NULL, fm_va); 297 hat_kpm_mapout(to_pp, NULL, to_va); 298 } else { 299 ppmapout(fm_va); 300 ppmapout(to_va); 301 } 302 return (ret); 303} 304 305/* 306 * Zero the physical page from off to off + len given by `pp' 307 * without changing the reference and modified bits of page. 308 * 309 * Again, we'll try per cpu mapping first. 310 */ 311 312void 313pagezero(page_t *pp, uint_t off, uint_t len) 314{ 315 caddr_t va; 316 extern int hwblkclr(void *, size_t); 317 extern int use_hw_bzero; 318 boolean_t fast; 319 320 ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE); 321 ASSERT(PAGE_LOCKED(pp)); 322 323 PP_STAT_ADD(ppzero); 324 325 if (len != MMU_PAGESIZE || !use_hw_bzero) { 326 PP_STAT_ADD(ppzero_short); 327 } 328 329 kpreempt_disable(); 330 331 /* 332 * Try to use KPM if enabled. If that fails, fall back to 333 * ppmapin/ppmapout. 334 */ 335 336 if (kpm_enable != 0) { 337 fast = B_TRUE; 338 va = hat_kpm_mapin(pp, NULL); 339 } else 340 va = NULL; 341 342 if (va == NULL) { 343 fast = B_FALSE; 344 va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1); 345 } 346 347 if (!use_hw_bzero) { 348 bzero(va + off, len); 349 sync_icache(va + off, len); 350 } else if (hwblkclr(va + off, len)) { 351 /* 352 * We may not have used block commit asi. 353 * So flush the I-$ manually 354 */ 355 sync_icache(va + off, len); 356 } else { 357 /* 358 * We have used blk commit, and flushed the I-$. 359 * However we still may have an instruction in the 360 * pipeline. Only a flush will invalidate that. 361 */ 362 doflush(va); 363 } 364 365 if (fast) { 366 hat_kpm_mapout(pp, NULL, va); 367 } else { 368 ppmapout(va); 369 } 370 kpreempt_enable(); 371} 372