ppage.c revision 6461:037a423f52ad
1266072Sdes/* 2266072Sdes * CDDL HEADER START 3285206Sdes * 4266072Sdes * The contents of this file are subject to the terms of the 5266072Sdes * Common Development and Distribution License (the "License"). 6266072Sdes * You may not use this file except in compliance with the License. 7266072Sdes * 8266072Sdes * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9266072Sdes * or http://www.opensolaris.org/os/licensing. 10266072Sdes * See the License for the specific language governing permissions 11285206Sdes * and limitations under the License. 12285206Sdes * 13285206Sdes * When distributing Covered Code, include this CDDL HEADER in each 14285206Sdes * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15285206Sdes * If applicable, add the following below this CDDL HEADER, with the 16285206Sdes * fields enclosed by brackets "[]" replaced with your own identifying 17285206Sdes * information: Portions Copyright [yyyy] [name of copyright owner] 18285206Sdes * 19266072Sdes * CDDL HEADER END 20285206Sdes */ 21285206Sdes/* 22285206Sdes * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23285206Sdes * Use is subject to license terms. 24266072Sdes */ 25285206Sdes 26285206Sdes#pragma ident "%Z%%M% %I% %E% SMI" 27266072Sdes 28266072Sdes#include <sys/types.h> 29266072Sdes#include <sys/systm.h> 30266072Sdes#include <sys/archsystm.h> 31285206Sdes#include <sys/machsystm.h> 32266072Sdes#include <sys/t_lock.h> 33266072Sdes#include <sys/vmem.h> 34285206Sdes#include <sys/mman.h> 35266072Sdes#include <sys/vm.h> 36266072Sdes#include <sys/cpu.h> 37266072Sdes#include <sys/cmn_err.h> 38266072Sdes#include <sys/cpuvar.h> 39266072Sdes#include <sys/atomic.h> 40266072Sdes#include <vm/as.h> 41266072Sdes#include <vm/hat.h> 42266072Sdes#include <vm/as.h> 43266072Sdes#include <vm/page.h> 44266072Sdes#include <vm/seg.h> 45266072Sdes#include <vm/seg_kmem.h> 46266072Sdes#include <vm/seg_kpm.h> 47266072Sdes#include <vm/hat_sfmmu.h> 48266072Sdes#include <sys/debug.h> 49266072Sdes#include <sys/cpu_module.h> 50266072Sdes 51266072Sdes/* 52266072Sdes * A quick way to generate a cache consistent address to map in a page. 53266072Sdes * users: ppcopy, pagezero, /proc, dev/mem 54266072Sdes * 55266072Sdes * The ppmapin/ppmapout routines provide a quick way of generating a cache 56266072Sdes * consistent address by reserving a given amount of kernel address space. 57266072Sdes * The base is PPMAPBASE and its size is PPMAPSIZE. This memory is divided 58266072Sdes * into x number of sets, where x is the number of colors for the virtual 59266072Sdes * cache. The number of colors is how many times a page can be mapped 60266072Sdes * simulatenously in the cache. For direct map caches this translates to 61266072Sdes * the number of pages in the cache. 62285206Sdes * Each set will be assigned a group of virtual pages from the reserved memory 63266072Sdes * depending on its virtual color. 64266072Sdes * When trying to assign a virtual address we will find out the color for the 65266072Sdes * physical page in question (if applicable). Then we will try to find an 66266072Sdes * available virtual page from the set of the appropiate color. 67266072Sdes */ 68266072Sdes 69266072Sdesint pp_slots = 4; /* small default, tuned by cpu module */ 70266072Sdes 71266072Sdes/* tuned by cpu module, default is "safe" */ 72266072Sdesint pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE; 73266072Sdes 74266072Sdesstatic caddr_t ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE]; 75266072Sdesstatic int nsets; /* number of sets */ 76266072Sdesstatic int ppmap_shift; /* set selector */ 77266072Sdes 78266072Sdes#ifdef PPDEBUG 79266072Sdes#define MAXCOLORS 16 /* for debug only */ 80266072Sdesstatic int ppalloc_noslot = 0; /* # of allocations from kernelmap */ 81266072Sdesstatic int align_hits; 82266072Sdesstatic int pp_allocs; /* # of ppmapin requests */ 83266072Sdes#endif /* PPDEBUG */ 84266072Sdes 85266072Sdes/* 86285206Sdes * There are only 64 TLB entries on spitfire, 16 on cheetah 87266072Sdes * (fully-associative TLB) so we allow the cpu module to tune the 88266072Sdes * number to use here via pp_slots. 89266072Sdes */ 90266072Sdesstatic struct ppmap_va { 91266072Sdes caddr_t ppmap_slots[MAXPP_SLOTS]; 92266072Sdes} ppmap_va[NCPU]; 93266072Sdes 94266072Sdes/* prevent compilation with VAC defined */ 95266072Sdes#ifdef VAC 96266072Sdes#error "sun4v ppmapin and ppmapout do not support VAC" 97266072Sdes#endif 98266072Sdes 99266072Sdesvoid 100266072Sdesppmapinit(void) 101266072Sdes{ 102266072Sdes int nset; 103266072Sdes caddr_t va; 104266072Sdes 105266072Sdes ASSERT(pp_slots <= MAXPP_SLOTS); 106285206Sdes 107285206Sdes va = (caddr_t)PPMAPBASE; 108285206Sdes 109285206Sdes /* 110285206Sdes * sun4v does not have a virtual indexed cache and simply 111285206Sdes * has only one set containing all pages. 112285206Sdes */ 113285206Sdes nsets = mmu_btop(PPMAPSIZE); 114285206Sdes ppmap_shift = MMU_PAGESHIFT; 115285206Sdes 116285206Sdes for (nset = 0; nset < nsets; nset++) { 117285206Sdes ppmap_vaddrs[nset] = 118285206Sdes (caddr_t)((uintptr_t)va + (nset * MMU_PAGESIZE)); 119285206Sdes } 120285206Sdes} 121285206Sdes 122285206Sdes/* 123285206Sdes * Allocate a cache consistent virtual address to map a page, pp, 124285206Sdes * with protection, vprot; and map it in the MMU, using the most 125266072Sdes * efficient means possible. The argument avoid is a virtual address 126266072Sdes * hint which when masked yields an offset into a virtual cache 127285206Sdes * that should be avoided when allocating an address to map in a 128285206Sdes * page. An avoid arg of -1 means you don't care, for instance pagezero. 129285206Sdes * 130266072Sdes * machine dependent, depends on virtual address space layout, 131285206Sdes * understands that all kernel addresses have bit 31 set. 132285206Sdes * 133285206Sdes * NOTE: For sun4 platforms the meaning of the hint argument is opposite from 134285206Sdes * that found in other architectures. In other architectures the hint 135285206Sdes * (called avoid) was used to ask ppmapin to NOT use the specified cache color. 136266072Sdes * This was used to avoid virtual cache trashing in the bcopy. Unfortunately 137266072Sdes * in the case of a COW, this later on caused a cache aliasing conflict. In 138266072Sdes * sun4, the bcopy routine uses the block ld/st instructions so we don't have 139266072Sdes * to worry about virtual cache trashing. Actually, by using the hint to choose 140266072Sdes * the right color we can almost guarantee a cache conflict will not occur. 141266072Sdes */ 142285206Sdes 143266072Sdes/*ARGSUSED2*/ 144266072Sdescaddr_t 145266072Sdesppmapin(page_t *pp, uint_t vprot, caddr_t hint) 146266072Sdes{ 147266072Sdes int nset; 148266072Sdes caddr_t va; 149266072Sdes 150266072Sdes#ifdef PPDEBUG 151266072Sdes pp_allocs++; 152266072Sdes#endif /* PPDEBUG */ 153266072Sdes 154266072Sdes /* 155266072Sdes * For sun4v caches are physical caches, we can pick any address 156266072Sdes * we want. 157266072Sdes */ 158266072Sdes for (nset = 0; nset < nsets; nset++) { 159266072Sdes va = ppmap_vaddrs[nset]; 160266072Sdes if (va != NULL) { 161266072Sdes#ifdef PPDEBUG 162266072Sdes align_hits++; 163266072Sdes#endif /* PPDEBUG */ 164266072Sdes if (casptr(&ppmap_vaddrs[nset], va, NULL) == va) { 165266072Sdes hat_memload(kas.a_hat, va, pp, 166266072Sdes vprot | HAT_NOSYNC, 167266072Sdes HAT_LOAD_LOCK); 168266072Sdes return (va); 169266072Sdes } 170266072Sdes } 171266072Sdes } 172266072Sdes 173266072Sdes#ifdef PPDEBUG 174266072Sdes ppalloc_noslot++; 175266072Sdes#endif /* PPDEBUG */ 176266072Sdes 177266072Sdes /* 178266072Sdes * No free slots; get a random one from the kernel heap area. 179266072Sdes */ 180266072Sdes va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 181266072Sdes 182266072Sdes hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK); 183266072Sdes 184266072Sdes return (va); 185266072Sdes 186266072Sdes} 187266072Sdes 188266072Sdesvoid 189285206Sdesppmapout(caddr_t va) 190266072Sdes{ 191266072Sdes int nset; 192285206Sdes 193266072Sdes if (va >= kernelheap && va < ekernelheap) { 194285206Sdes /* 195266072Sdes * Space came from kernelmap, flush the page and 196266072Sdes * return the space. 197266072Sdes */ 198285206Sdes hat_unload(kas.a_hat, va, PAGESIZE, 199266072Sdes (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK)); 200266072Sdes vmem_free(heap_arena, va, PAGESIZE); 201266072Sdes } else { 202266072Sdes /* 203266072Sdes * Space came from ppmap_vaddrs[], give it back. 204266072Sdes */ 205266072Sdes nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1); 206266072Sdes hat_unload(kas.a_hat, va, PAGESIZE, 207266072Sdes (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK)); 208266072Sdes 209266072Sdes ASSERT(ppmap_vaddrs[nset] == NULL); 210266072Sdes ppmap_vaddrs[nset] = va; 211285206Sdes } 212266072Sdes} 213266072Sdes 214266072Sdes#ifdef DEBUG 215266072Sdes#define PP_STAT_ADD(stat) (stat)++ 216266072Sdesuint_t pload, ploadfail; 217266072Sdesuint_t ppzero, ppzero_short; 218266072Sdes#else 219266072Sdes#define PP_STAT_ADD(stat) 220266072Sdes#endif /* DEBUG */ 221266072Sdes 222285206Sdesstatic void 223266072Sdespp_unload_tlb(caddr_t *pslot, caddr_t va) 224266072Sdes{ 225266072Sdes ASSERT(*pslot == va); 226285206Sdes 227266072Sdes vtag_flushpage(va, (uint64_t)ksfmmup); 228285206Sdes *pslot = NULL; /* release the slot */ 229285206Sdes} 230266072Sdes 231266072Sdes/* 232266072Sdes * Routine to copy kernel pages during relocation. It will copy one 233266072Sdes * PAGESIZE page to another PAGESIZE page. This function may be called 234266072Sdes * above LOCK_LEVEL so it should not grab any locks. 235266072Sdes */ 236266072Sdesvoid 237266072Sdesppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp) 238266072Sdes{ 239266072Sdes uint64_t fm_pa, to_pa; 240266072Sdes size_t nbytes; 241266072Sdes 242266072Sdes fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT; 243266072Sdes to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT; 244266072Sdes 245266072Sdes nbytes = MMU_PAGESIZE; 246266072Sdes 247266072Sdes for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32) 248266072Sdes hw_pa_bcopy32(fm_pa, to_pa); 249266072Sdes} 250266072Sdes 251266072Sdes/* 252266072Sdes * Copy the data from the physical page represented by "frompp" to 253266072Sdes * that represented by "topp". 254266072Sdes * 255266072Sdes * Try to use per cpu mapping first, if that fails then call pp_mapin 256266072Sdes * to load it. 257266072Sdes * Returns one on success or zero on some sort of fault while doing the copy. 258266072Sdes */ 259266072Sdesint 260266072Sdesppcopy(page_t *fm_pp, page_t *to_pp) 261266072Sdes{ 262266072Sdes caddr_t fm_va = NULL; 263266072Sdes caddr_t to_va; 264266072Sdes boolean_t fast; 265266072Sdes label_t ljb; 266266072Sdes int ret = 1; 267266072Sdes 268266072Sdes ASSERT(PAGE_LOCKED(fm_pp)); 269266072Sdes ASSERT(PAGE_LOCKED(to_pp)); 270266072Sdes 271266072Sdes /* 272266072Sdes * Try to map using KPM if enabled. If it fails, fall 273266072Sdes * back to ppmapin/ppmapout. 274266072Sdes */ 275266072Sdes if ((kpm_enable == 0) || 276266072Sdes (fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL || 277266072Sdes (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) { 278266072Sdes if (fm_va != NULL) 279266072Sdes hat_kpm_mapout(fm_pp, NULL, fm_va); 280266072Sdes fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1); 281266072Sdes to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va); 282285206Sdes fast = B_FALSE; 283266072Sdes } else 284285206Sdes fast = B_TRUE; 285266072Sdes 286266072Sdes if (on_fault(&ljb)) { 287266072Sdes ret = 0; 288266072Sdes goto faulted; 289285206Sdes } 290266072Sdes bcopy(fm_va, to_va, PAGESIZE); 291266072Sdes no_fault(); 292266072Sdesfaulted: 293266072Sdes 294266072Sdes /* Unmap */ 295266072Sdes if (fast) { 296266072Sdes hat_kpm_mapout(fm_pp, NULL, fm_va); 297266072Sdes hat_kpm_mapout(to_pp, NULL, to_va); 298266072Sdes } else { 299285206Sdes ppmapout(fm_va); 300266072Sdes ppmapout(to_va); 301266072Sdes } 302266072Sdes return (ret); 303266072Sdes} 304266072Sdes 305266072Sdes/* 306266072Sdes * Zero the physical page from off to off + len given by `pp' 307266072Sdes * without changing the reference and modified bits of page. 308266072Sdes * 309266072Sdes * Again, we'll try per cpu mapping first. 310266072Sdes */ 311266072Sdes 312266072Sdesvoid 313266072Sdespagezero(page_t *pp, uint_t off, uint_t len) 314266072Sdes{ 315266072Sdes caddr_t va; 316266072Sdes extern int hwblkclr(void *, size_t); 317266072Sdes extern int use_hw_bzero; 318266072Sdes boolean_t fast; 319266072Sdes 320266072Sdes ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE); 321266072Sdes ASSERT(PAGE_LOCKED(pp)); 322266072Sdes 323266072Sdes PP_STAT_ADD(ppzero); 324266072Sdes 325266072Sdes if (len != MMU_PAGESIZE || !use_hw_bzero) { 326266072Sdes PP_STAT_ADD(ppzero_short); 327266072Sdes } 328266072Sdes 329266072Sdes kpreempt_disable(); 330266072Sdes 331266072Sdes /* 332266072Sdes * Try to use KPM if enabled. If that fails, fall back to 333266072Sdes * ppmapin/ppmapout. 334266072Sdes */ 335266072Sdes 336266072Sdes if (kpm_enable != 0) { 337266072Sdes fast = B_TRUE; 338266072Sdes va = hat_kpm_mapin(pp, NULL); 339266072Sdes } else 340266072Sdes va = NULL; 341266072Sdes 342266072Sdes if (va == NULL) { 343266072Sdes fast = B_FALSE; 344266072Sdes va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1); 345266072Sdes } 346266072Sdes 347266072Sdes if (!use_hw_bzero) { 348266072Sdes bzero(va + off, len); 349266072Sdes sync_icache(va + off, len); 350266072Sdes } else if (hwblkclr(va + off, len)) { 351266072Sdes /* 352266072Sdes * We may not have used block commit asi. 353266072Sdes * So flush the I-$ manually 354266072Sdes */ 355266072Sdes sync_icache(va + off, len); 356266072Sdes } else { 357266072Sdes /* 358266072Sdes * We have used blk commit, and flushed the I-$. 359266072Sdes * However we still may have an instruction in the 360266072Sdes * pipeline. Only a flush will invalidate that. 361266072Sdes */ 362266072Sdes doflush(va); 363266072Sdes } 364266072Sdes 365266072Sdes if (fast) { 366266072Sdes hat_kpm_mapout(pp, NULL, va); 367266072Sdes } else { 368266072Sdes ppmapout(va); 369266072Sdes } 370266072Sdes kpreempt_enable(); 371266072Sdes} 372266072Sdes