vm_dep.h revision 7718:555eee33ff6e
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26/* 27 * UNIX machine dependent virtual memory support. 28 */ 29 30#ifndef _VM_DEP_H 31#define _VM_DEP_H 32 33#ifdef __cplusplus 34extern "C" { 35#endif 36 37#include <vm/hat_sfmmu.h> 38#include <sys/archsystm.h> 39#include <sys/memnode.h> 40 41#define GETTICK() gettick() 42/* 43 * Do not use this function for obtaining clock tick. This 44 * is called by callers who do not need to have a guarenteed 45 * correct tick value. The proper routine to use is tsc_read(). 46 */ 47#define randtick() gettick() 48 49/* 50 * Per page size free lists. Allocated dynamically. 51 */ 52#define MAX_MEM_TYPES 2 /* 0 = reloc, 1 = noreloc */ 53#define MTYPE_RELOC 0 54#define MTYPE_NORELOC 1 55 56#define PP_2_MTYPE(pp) (PP_ISNORELOC(pp) ? MTYPE_NORELOC : MTYPE_RELOC) 57 58#define MTYPE_INIT(mtype, vp, vaddr, flags, pgsz) \ 59 mtype = (flags & PG_NORELOC) ? MTYPE_NORELOC : MTYPE_RELOC; 60 61/* mtype init for page_get_replacement_page */ 62#define MTYPE_PGR_INIT(mtype, flags, pp, mnode, pgcnt) \ 63 mtype = (flags & PG_NORELOC) ? MTYPE_NORELOC : MTYPE_RELOC; 64 65#define MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi) \ 66 ASSERT(mtype != MTYPE_NORELOC); \ 67 pfnlo = mem_node_config[mnode].physbase; \ 68 pfnhi = mem_node_config[mnode].physmax; 69 70/* 71 * candidate counters in vm_pagelist.c are indexed by color and range 72 */ 73#define MAX_MNODE_MRANGES MAX_MEM_TYPES 74#define MNODE_RANGE_CNT(mnode) MAX_MNODE_MRANGES 75#define MNODE_MAX_MRANGE(mnode) (MAX_MEM_TYPES - 1) 76#define MTYPE_2_MRANGE(mnode, mtype) (mtype) 77 78/* 79 * Internal PG_ flags. 80 */ 81#define PGI_RELOCONLY 0x10000 /* acts in the opposite sense to PG_NORELOC */ 82#define PGI_NOCAGE 0x20000 /* indicates Cage is disabled */ 83#define PGI_PGCPHIPRI 0x40000 /* page_get_contig_page priority allocation */ 84#define PGI_PGCPSZC0 0x80000 /* relocate base pagesize page */ 85 86/* 87 * PGI mtype flags - should not overlap PGI flags 88 */ 89#define PGI_MT_RANGE 0x1000000 /* mtype range */ 90#define PGI_MT_NEXT 0x2000000 /* get next mtype */ 91 92extern page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES]; 93extern page_t ***page_cachelists[MAX_MEM_TYPES]; 94 95#define PAGE_FREELISTS(mnode, szc, color, mtype) \ 96 (*(page_freelists[szc][mtype][mnode] + (color))) 97 98#define PAGE_CACHELISTS(mnode, color, mtype) \ 99 (*(page_cachelists[mtype][mnode] + (color))) 100 101/* 102 * There are 'page_colors' colors/bins. Spread them out under a 103 * couple of locks. There are mutexes for both the page freelist 104 * and the page cachelist. We want enough locks to make contention 105 * reasonable, but not too many -- otherwise page_freelist_lock() gets 106 * so expensive that it becomes the bottleneck! 107 */ 108#define NPC_MUTEX 16 109 110extern kmutex_t *fpc_mutex[NPC_MUTEX]; 111extern kmutex_t *cpc_mutex[NPC_MUTEX]; 112 113/* 114 * Iterator provides the info needed to convert RA to PA. 115 * MEM_NODE_ITERATOR_INIT() should be called before 116 * PAGE_NEXT_PFN_FOR_COLOR() if pfn was not obtained via a previous 117 * PAGE_NEXT_PFN_FOR_COLOR() call. Iterator caches color 2 hash 118 * translations requiring initializer call if color or ceq_mask changes, 119 * even if pfn doesn't. MEM_NODE_ITERATOR_INIT() must also be called before 120 * PFN_2_COLOR() that uses a valid iterator argument. 121 */ 122#ifdef sun4v 123 124typedef struct mem_node_iterator { 125 uint_t mi_mnode; /* mnode in which to iterate */ 126 int mi_init; /* set to 1 when first init */ 127 int mi_last_mblock; /* last mblock visited */ 128 uint_t mi_hash_ceq_mask; /* cached copy of ceq_mask */ 129 uint_t mi_hash_color; /* cached copy of color */ 130 uint_t mi_mnode_mask; /* number of mask bits */ 131 uint_t mi_mnode_pfn_shift; /* mnode position in pfn */ 132 pfn_t mi_mblock_base; /* first valid pfn in current mblock */ 133 pfn_t mi_mblock_end; /* last valid pfn in current mblock */ 134 pfn_t mi_ra_to_pa; /* ra adjustment for current mblock */ 135 pfn_t mi_mnode_pfn_mask; /* mask to obtain mnode id bits */ 136} mem_node_iterator_t; 137 138#define MEM_NODE_ITERATOR_DECL(it) \ 139 mem_node_iterator_t it 140#define MEM_NODE_ITERATOR_INIT(pfn, mnode, szc, it) \ 141 (pfn) = plat_mem_node_iterator_init((pfn), (mnode), (szc), (it), 1) 142 143extern pfn_t plat_mem_node_iterator_init(pfn_t, int, uchar_t, 144 mem_node_iterator_t *, int); 145extern pfn_t plat_rapfn_to_papfn(pfn_t); 146extern int interleaved_mnodes; 147 148#else /* sun4v */ 149 150#define MEM_NODE_ITERATOR_DECL(it) \ 151 void *it = NULL 152#define MEM_NODE_ITERATOR_INIT(pfn, mnode, szc, it) 153 154#endif /* sun4v */ 155 156/* 157 * Return the mnode limits so that hpc_counters length and base 158 * index can be determined. When interleaved_mnodes is set, we 159 * create an array only for the first mnode that exists. All other 160 * mnodes will share the array in this case. 161 * If interleaved_mnodes is not set, simply return the limits for 162 * the given mnode. 163 */ 164#define HPM_COUNTERS_LIMITS(mnode, physbase, physmax, first) \ 165 if (!interleaved_mnodes) { \ 166 (physbase) = mem_node_config[(mnode)].physbase; \ 167 (physmax) = mem_node_config[(mnode)].physmax; \ 168 (first) = (mnode); \ 169 } else if ((first) < 0) { \ 170 mem_node_max_range(&(physbase), &(physmax)); \ 171 (first) = (mnode); \ 172 } 173 174#define PAGE_CTRS_WRITE_LOCK(mnode) \ 175 if (!interleaved_mnodes) { \ 176 rw_enter(&page_ctrs_rwlock[(mnode)], RW_WRITER); \ 177 page_freelist_lock(mnode); \ 178 } else { \ 179 /* changing shared hpm_counters */ \ 180 int _i; \ 181 for (_i = 0; _i < max_mem_nodes; _i++) { \ 182 rw_enter(&page_ctrs_rwlock[_i], RW_WRITER); \ 183 page_freelist_lock(_i); \ 184 } \ 185 } 186 187#define PAGE_CTRS_WRITE_UNLOCK(mnode) \ 188 if (!interleaved_mnodes) { \ 189 page_freelist_unlock(mnode); \ 190 rw_exit(&page_ctrs_rwlock[(mnode)]); \ 191 } else { \ 192 int _i; \ 193 for (_i = 0; _i < max_mem_nodes; _i++) { \ 194 page_freelist_unlock(_i); \ 195 rw_exit(&page_ctrs_rwlock[_i]); \ 196 } \ 197 } 198 199/* 200 * cpu specific color conversion functions 201 */ 202extern uint_t page_get_nsz_color_mask_cpu(uchar_t, uint_t); 203#pragma weak page_get_nsz_color_mask_cpu 204 205extern uint_t page_get_nsz_color_cpu(uchar_t, uint_t); 206#pragma weak page_get_nsz_color_cpu 207 208extern uint_t page_get_color_shift_cpu(uchar_t, uchar_t); 209#pragma weak page_get_color_shift_cpu 210 211extern uint_t page_convert_color_cpu(uint_t, uchar_t, uchar_t); 212#pragma weak page_convert_color_cpu 213 214extern pfn_t page_next_pfn_for_color_cpu(pfn_t, 215 uchar_t, uint_t, uint_t, uint_t, void *); 216#pragma weak page_next_pfn_for_color_cpu 217 218extern uint_t page_pfn_2_color_cpu(pfn_t, uchar_t, void *); 219#pragma weak page_pfn_2_color_cpu 220 221#define PAGE_GET_COLOR_SHIFT(szc, nszc) \ 222 ((&page_get_color_shift_cpu != NULL) ? \ 223 page_get_color_shift_cpu(szc, nszc) : \ 224 (hw_page_array[(nszc)].hp_shift - \ 225 hw_page_array[(szc)].hp_shift)) 226 227#define PAGE_CONVERT_COLOR(ncolor, szc, nszc) \ 228 ((&page_convert_color_cpu != NULL) ? \ 229 page_convert_color_cpu(ncolor, szc, nszc) : \ 230 ((ncolor) << PAGE_GET_COLOR_SHIFT((szc), (nszc)))) 231 232#define PFN_2_COLOR(pfn, szc, it) \ 233 ((&page_pfn_2_color_cpu != NULL) ? \ 234 page_pfn_2_color_cpu(pfn, szc, it) : \ 235 ((pfn & (hw_page_array[0].hp_colors - 1)) >> \ 236 (hw_page_array[szc].hp_shift - \ 237 hw_page_array[0].hp_shift))) 238 239#define PNUM_SIZE(szc) \ 240 (hw_page_array[(szc)].hp_pgcnt) 241#define PNUM_SHIFT(szc) \ 242 (hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift) 243#define PAGE_GET_SHIFT(szc) \ 244 (hw_page_array[(szc)].hp_shift) 245#define PAGE_GET_PAGECOLORS(szc) \ 246 (hw_page_array[(szc)].hp_colors) 247 248/* 249 * This macro calculates the next sequential pfn with the specified 250 * color using color equivalency mask 251 */ 252#define PAGE_NEXT_PFN_FOR_COLOR(pfn, szc, color, ceq_mask, color_mask, it) \ 253 { \ 254 ASSERT(((color) & ~(ceq_mask)) == 0); \ 255 if (&page_next_pfn_for_color_cpu == NULL) { \ 256 uint_t pfn_shift = PAGE_BSZS_SHIFT(szc); \ 257 pfn_t spfn = pfn >> pfn_shift; \ 258 pfn_t stride = (ceq_mask) + 1; \ 259 ASSERT((((ceq_mask) + 1) & (ceq_mask)) == 0); \ 260 if (((spfn ^ (color)) & (ceq_mask)) == 0) { \ 261 pfn += stride << pfn_shift; \ 262 } else { \ 263 pfn = (spfn & ~(pfn_t)(ceq_mask)) | (color); \ 264 pfn = (pfn > spfn ? pfn : pfn + stride) << \ 265 pfn_shift; \ 266 } \ 267 } else { \ 268 pfn = page_next_pfn_for_color_cpu(pfn, szc, color, \ 269 ceq_mask, color_mask, it); \ 270 } \ 271 } 272 273/* get the color equivalency mask for the next szc */ 274#define PAGE_GET_NSZ_MASK(szc, mask) \ 275 ((&page_get_nsz_color_mask_cpu == NULL) ? \ 276 ((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) : \ 277 page_get_nsz_color_mask_cpu(szc, mask)) 278 279/* get the color of the next szc */ 280#define PAGE_GET_NSZ_COLOR(szc, color) \ 281 ((&page_get_nsz_color_cpu == NULL) ? \ 282 ((color) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) : \ 283 page_get_nsz_color_cpu(szc, color)) 284 285/* Find the bin for the given page if it was of size szc */ 286#define PP_2_BIN_SZC(pp, szc) (PFN_2_COLOR(pp->p_pagenum, szc, (void *)(-1))) 287 288#define PP_2_BIN(pp) (PP_2_BIN_SZC(pp, pp->p_szc)) 289 290#define PP_2_MEM_NODE(pp) (PFN_2_MEM_NODE(pp->p_pagenum)) 291 292#define PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ? \ 293 &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \ 294 &cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]) 295 296#define FPC_MUTEX(mnode, i) (&fpc_mutex[i][mnode]) 297#define CPC_MUTEX(mnode, i) (&cpc_mutex[i][mnode]) 298 299#define PFN_BASE(pfnum, szc) (pfnum & ~((1 << PAGE_BSZS_SHIFT(szc)) - 1)) 300 301/* 302 * this structure is used for walking free page lists 303 * controls when to split large pages into smaller pages, 304 * and when to coalesce smaller pages into larger pages 305 */ 306typedef struct page_list_walker { 307 uint_t plw_colors; /* num of colors for szc */ 308 uint_t plw_color_mask; /* colors-1 */ 309 uint_t plw_bin_step; /* next bin: 1 or 2 */ 310 uint_t plw_count; /* loop count */ 311 uint_t plw_bin0; /* starting bin */ 312 uint_t plw_bin_marker; /* bin after initial jump */ 313 uint_t plw_bin_split_prev; /* last bin we tried to split */ 314 uint_t plw_do_split; /* set if OK to split */ 315 uint_t plw_split_next; /* next bin to split */ 316 uint_t plw_ceq_dif; /* number of different color groups */ 317 /* to check */ 318 uint_t plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */ 319 uint_t plw_bins[MMU_PAGE_SIZES + 1]; /* num of bins */ 320} page_list_walker_t; 321 322void page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin, 323 int can_split, int use_ceq, page_list_walker_t *plw); 324 325typedef char hpmctr_t; 326 327#ifdef DEBUG 328#define CHK_LPG(pp, szc) chk_lpg(pp, szc) 329extern void chk_lpg(page_t *, uchar_t); 330#else 331#define CHK_LPG(pp, szc) 332#endif 333 334/* 335 * page list count per mnode and type. 336 */ 337typedef struct { 338 pgcnt_t plc_mt_pgmax; /* max page cnt */ 339 pgcnt_t plc_mt_clpgcnt; /* cache list cnt */ 340 pgcnt_t plc_mt_flpgcnt; /* free list cnt - small pages */ 341 pgcnt_t plc_mt_lgpgcnt; /* free list cnt - large pages */ 342#ifdef DEBUG 343 struct { 344 pgcnt_t plc_mts_pgcnt; /* per page size count */ 345 int plc_mts_colors; 346 pgcnt_t *plc_mtsc_pgcnt; /* per color bin count */ 347 } plc_mts[MMU_PAGE_SIZES]; 348#endif 349} plcnt_t[MAX_MEM_NODES][MAX_MEM_TYPES]; 350 351#ifdef DEBUG 352 353#define PLCNT_SZ(ctrs_sz) { \ 354 int szc; \ 355 for (szc = 0; szc < mmu_page_sizes; szc++) { \ 356 int colors = page_get_pagecolors(szc); \ 357 ctrs_sz += (max_mem_nodes * MAX_MEM_TYPES * \ 358 colors * sizeof (pgcnt_t)); \ 359 } \ 360} 361 362#define PLCNT_INIT(base) { \ 363 int mn, mt, szc, colors; \ 364 for (szc = 0; szc < mmu_page_sizes; szc++) { \ 365 colors = page_get_pagecolors(szc); \ 366 for (mn = 0; mn < max_mem_nodes; mn++) { \ 367 for (mt = 0; mt < MAX_MEM_TYPES; mt++) { \ 368 plcnt[mn][mt].plc_mts[szc]. \ 369 plc_mts_colors = colors; \ 370 plcnt[mn][mt].plc_mts[szc]. \ 371 plc_mtsc_pgcnt = (pgcnt_t *)base; \ 372 base += (colors * sizeof (pgcnt_t)); \ 373 } \ 374 } \ 375 } \ 376} 377 378#define PLCNT_DO(pp, mn, mtype, szc, cnt, flags) { \ 379 int bin = PP_2_BIN(pp); \ 380 if (flags & PG_CACHE_LIST) \ 381 atomic_add_long(&plcnt[mn][mtype].plc_mt_clpgcnt, cnt); \ 382 else if (szc) \ 383 atomic_add_long(&plcnt[mn][mtype].plc_mt_lgpgcnt, cnt); \ 384 else \ 385 atomic_add_long(&plcnt[mn][mtype].plc_mt_flpgcnt, cnt); \ 386 atomic_add_long(&plcnt[mn][mtype].plc_mts[szc].plc_mts_pgcnt, \ 387 cnt); \ 388 atomic_add_long(&plcnt[mn][mtype].plc_mts[szc]. \ 389 plc_mtsc_pgcnt[bin], cnt); \ 390} 391 392#else 393 394#define PLCNT_SZ(ctrs_sz) 395 396#define PLCNT_INIT(base) 397 398/* PG_FREE_LIST may not be explicitly set in flags for large pages */ 399 400#define PLCNT_DO(pp, mn, mtype, szc, cnt, flags) { \ 401 if (flags & PG_CACHE_LIST) \ 402 atomic_add_long(&plcnt[mn][mtype].plc_mt_clpgcnt, cnt); \ 403 else if (szc) \ 404 atomic_add_long(&plcnt[mn][mtype].plc_mt_lgpgcnt, cnt); \ 405 else \ 406 atomic_add_long(&plcnt[mn][mtype].plc_mt_flpgcnt, cnt); \ 407} 408 409#endif 410 411#define PLCNT_INCR(pp, mn, mtype, szc, flags) { \ 412 long cnt = (1 << PAGE_BSZS_SHIFT(szc)); \ 413 PLCNT_DO(pp, mn, mtype, szc, cnt, flags); \ 414} 415 416#define PLCNT_DECR(pp, mn, mtype, szc, flags) { \ 417 long cnt = ((-1) << PAGE_BSZS_SHIFT(szc)); \ 418 PLCNT_DO(pp, mn, mtype, szc, cnt, flags); \ 419} 420 421/* 422 * macros to update page list max counts - done when pages transferred 423 * from RELOC to NORELOC mtype (kcage_init or kcage_assimilate_page). 424 */ 425 426#define PLCNT_XFER_NORELOC(pp) { \ 427 long cnt = (1 << PAGE_BSZS_SHIFT((pp)->p_szc)); \ 428 int mn = PP_2_MEM_NODE(pp); \ 429 atomic_add_long(&plcnt[mn][MTYPE_NORELOC].plc_mt_pgmax, cnt); \ 430 atomic_add_long(&plcnt[mn][MTYPE_RELOC].plc_mt_pgmax, -cnt); \ 431} 432 433/* 434 * macro to modify the page list max counts when memory is added to 435 * the page lists during startup (add_physmem) or during a DR operation 436 * when memory is added (kphysm_add_memory_dynamic) or deleted 437 * (kphysm_del_cleanup). 438 */ 439#define PLCNT_MODIFY_MAX(pfn, cnt) { \ 440 spgcnt_t _cnt = (spgcnt_t)(cnt); \ 441 pgcnt_t _acnt = ABS(_cnt); \ 442 int _mn; \ 443 pgcnt_t _np; \ 444 if (&plat_mem_node_intersect_range != NULL) { \ 445 for (_mn = 0; _mn < max_mem_nodes; _mn++) { \ 446 plat_mem_node_intersect_range((pfn), _acnt, _mn, &_np);\ 447 if (_np == 0) \ 448 continue; \ 449 atomic_add_long(&plcnt[_mn][MTYPE_RELOC].plc_mt_pgmax, \ 450 (_cnt < 0) ? -_np : _np); \ 451 } \ 452 } else { \ 453 pfn_t _pfn = (pfn); \ 454 pfn_t _endpfn = _pfn + _acnt; \ 455 while (_pfn < _endpfn) { \ 456 _mn = PFN_2_MEM_NODE(_pfn); \ 457 _np = MIN(_endpfn, mem_node_config[_mn].physmax + 1) - \ 458 _pfn; \ 459 _pfn += _np; \ 460 atomic_add_long(&plcnt[_mn][MTYPE_RELOC].plc_mt_pgmax, \ 461 (_cnt < 0) ? -_np : _np); \ 462 } \ 463 } \ 464} 465 466extern plcnt_t plcnt; 467 468#define MNODE_PGCNT(mn) \ 469 (plcnt[mn][MTYPE_RELOC].plc_mt_clpgcnt + \ 470 plcnt[mn][MTYPE_NORELOC].plc_mt_clpgcnt + \ 471 plcnt[mn][MTYPE_RELOC].plc_mt_flpgcnt + \ 472 plcnt[mn][MTYPE_NORELOC].plc_mt_flpgcnt + \ 473 plcnt[mn][MTYPE_RELOC].plc_mt_lgpgcnt + \ 474 plcnt[mn][MTYPE_NORELOC].plc_mt_lgpgcnt) 475 476#define MNODETYPE_PGCNT(mn, mtype) \ 477 (plcnt[mn][mtype].plc_mt_clpgcnt + \ 478 plcnt[mn][mtype].plc_mt_flpgcnt + \ 479 plcnt[mn][mtype].plc_mt_lgpgcnt) 480 481/* 482 * macros to loop through the mtype range - MTYPE_START returns -1 in 483 * mtype if no pages in mnode/mtype and possibly NEXT mtype. 484 */ 485#define MTYPE_START(mnode, mtype, flags) { \ 486 if (plcnt[mnode][mtype].plc_mt_pgmax == 0) { \ 487 ASSERT(mtype == MTYPE_RELOC || \ 488 MNODETYPE_PGCNT(mnode, mtype) == 0 || \ 489 plcnt[mnode][mtype].plc_mt_pgmax != 0); \ 490 MTYPE_NEXT(mnode, mtype, flags); \ 491 } \ 492} 493 494/* 495 * if allocation from the RELOC pool failed and there is sufficient cage 496 * memory, attempt to allocate from the NORELOC pool. 497 */ 498#define MTYPE_NEXT(mnode, mtype, flags) { \ 499 if (!(flags & (PG_NORELOC | PGI_NOCAGE | PGI_RELOCONLY)) && \ 500 (kcage_freemem >= kcage_lotsfree)) { \ 501 if (plcnt[mnode][MTYPE_NORELOC].plc_mt_pgmax == 0) { \ 502 ASSERT(MNODETYPE_PGCNT(mnode, MTYPE_NORELOC) == 0 || \ 503 plcnt[mnode][MTYPE_NORELOC].plc_mt_pgmax != 0); \ 504 mtype = -1; \ 505 } else { \ 506 mtype = MTYPE_NORELOC; \ 507 flags |= PG_NORELOC; \ 508 } \ 509 } else { \ 510 mtype = -1; \ 511 } \ 512} 513 514/* 515 * get the ecache setsize for the current cpu. 516 */ 517#define CPUSETSIZE() (cpunodes[CPU->cpu_id].ecache_setsize) 518 519extern struct cpu cpu0; 520#define CPU0 &cpu0 521 522#define PAGE_BSZS_SHIFT(szc) TTE_BSZS_SHIFT(szc) 523/* 524 * For sfmmu each larger page is 8 times the size of the previous 525 * size page. 526 */ 527#define FULL_REGION_CNT(rg_szc) (8) 528 529/* 530 * The counter base must be per page_counter element to prevent 531 * races when re-indexing, and the base page size element should 532 * be aligned on a boundary of the given region size. 533 * 534 * We also round up the number of pages spanned by the counters 535 * for a given region to PC_BASE_ALIGN in certain situations to simplify 536 * the coding for some non-performance critical routines. 537 */ 538#define PC_BASE_ALIGN ((pfn_t)1 << PAGE_BSZS_SHIFT(mmu_page_sizes-1)) 539#define PC_BASE_ALIGN_MASK (PC_BASE_ALIGN - 1) 540 541extern int ecache_alignsize; 542#define L2CACHE_ALIGN ecache_alignsize 543#define L2CACHE_ALIGN_MAX 512 544 545extern int update_proc_pgcolorbase_after_fork; 546extern int consistent_coloring; 547extern uint_t vac_colors_mask; 548extern int vac_size; 549extern int vac_shift; 550 551/* 552 * Kernel mem segment in 64-bit space 553 */ 554extern caddr_t kmem64_base, kmem64_end, kmem64_aligned_end; 555extern int kmem64_alignsize, kmem64_szc; 556extern uint64_t kmem64_pabase; 557extern int max_bootlp_tteszc; 558 559/* 560 * Maximum and default values for user heap, stack, private and shared 561 * anonymous memory, and user text and initialized data. 562 * 563 * Initial values are defined in architecture specific mach_vm_dep.c file. 564 * Used by map_pgsz*() routines. 565 */ 566extern size_t max_uheap_lpsize; 567extern size_t default_uheap_lpsize; 568extern size_t max_ustack_lpsize; 569extern size_t default_ustack_lpsize; 570extern size_t max_privmap_lpsize; 571extern size_t max_uidata_lpsize; 572extern size_t max_utext_lpsize; 573extern size_t max_shm_lpsize; 574 575/* 576 * For adjusting the default lpsize, for DTLB-limited page sizes. 577 */ 578extern void adjust_data_maxlpsize(size_t ismpagesize); 579 580/* 581 * Sanity control. Don't use large pages regardless of user 582 * settings if there's less than priv or shm_lpg_min_physmem memory installed. 583 * The units for this variable are 8K pages. 584 */ 585extern pgcnt_t privm_lpg_min_physmem; 586extern pgcnt_t shm_lpg_min_physmem; 587 588/* 589 * AS_2_BIN macro controls the page coloring policy. 590 * 0 (default) uses various vaddr bits 591 * 1 virtual=paddr 592 * 2 bin hopping 593 */ 594#define AS_2_BIN(as, seg, vp, addr, bin, szc) \ 595switch (consistent_coloring) { \ 596 default: \ 597 cmn_err(CE_WARN, \ 598 "AS_2_BIN: bad consistent coloring value"); \ 599 /* assume default algorithm -> continue */ \ 600 case 0: { \ 601 uint32_t ndx, new; \ 602 int slew = 0; \ 603 pfn_t pfn; \ 604 \ 605 if (vp != NULL && IS_SWAPVP(vp) && \ 606 seg->s_ops == &segvn_ops) \ 607 slew = as_color_bin(as); \ 608 \ 609 pfn = ((uintptr_t)addr >> MMU_PAGESHIFT) + \ 610 (((uintptr_t)addr >> page_coloring_shift) << \ 611 (vac_shift - MMU_PAGESHIFT)); \ 612 if ((szc) == 0 || &page_pfn_2_color_cpu == NULL) { \ 613 pfn += slew; \ 614 bin = PFN_2_COLOR(pfn, szc, NULL); \ 615 } else { \ 616 bin = PFN_2_COLOR(pfn, szc, NULL); \ 617 bin += slew >> (vac_shift - MMU_PAGESHIFT); \ 618 bin &= hw_page_array[(szc)].hp_colors - 1; \ 619 } \ 620 break; \ 621 } \ 622 case 1: \ 623 bin = PFN_2_COLOR(((uintptr_t)addr >> MMU_PAGESHIFT), \ 624 szc, NULL); \ 625 break; \ 626 case 2: { \ 627 int cnt = as_color_bin(as); \ 628 uint_t color_mask = page_get_pagecolors(0) - 1; \ 629 \ 630 /* make sure physical color aligns with vac color */ \ 631 while ((cnt & vac_colors_mask) != \ 632 addr_to_vcolor(addr)) { \ 633 cnt++; \ 634 } \ 635 bin = cnt = cnt & color_mask; \ 636 bin >>= PAGE_GET_COLOR_SHIFT(0, szc); \ 637 /* update per as page coloring fields */ \ 638 cnt = (cnt + 1) & color_mask; \ 639 if (cnt == (as_color_start(as) & color_mask)) { \ 640 cnt = as_color_start(as) = as_color_start(as) + \ 641 PGCLR_LOOPFACTOR; \ 642 } \ 643 as_color_bin(as) = cnt & color_mask; \ 644 break; \ 645 } \ 646} \ 647 ASSERT(bin < page_get_pagecolors(szc)); 648 649/* 650 * cpu private vm data - accessed thru CPU->cpu_vm_data 651 * vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock() 652 * vc_pnext_memseg: tracks last memseg visited in page_nextn() 653 * vc_kmptr: unaligned kmem pointer for this vm_cpu_data_t 654 * vc_kmsize: orignal kmem size for this vm_cpu_data_t 655 */ 656 657typedef struct { 658 struct memseg *vc_pnum_memseg; 659 struct memseg *vc_pnext_memseg; 660 void *vc_kmptr; 661 size_t vc_kmsize; 662} vm_cpu_data_t; 663 664/* allocation size to ensure vm_cpu_data_t resides in its own cache line */ 665#define VM_CPU_DATA_PADSIZE \ 666 (P2ROUNDUP(sizeof (vm_cpu_data_t), L2CACHE_ALIGN_MAX)) 667 668/* for boot cpu before kmem is initialized */ 669extern char vm_cpu_data0[]; 670 671/* 672 * Function to get an ecache color bin: F(as, cnt, vcolor). 673 * the goal of this function is to: 674 * - to spread a processes' physical pages across the entire ecache to 675 * maximize its use. 676 * - to minimize vac flushes caused when we reuse a physical page on a 677 * different vac color than it was previously used. 678 * - to prevent all processes to use the same exact colors and trash each 679 * other. 680 * 681 * cnt is a bin ptr kept on a per as basis. As we page_create we increment 682 * the ptr so we spread out the physical pages to cover the entire ecache. 683 * The virtual color is made a subset of the physical color in order to 684 * in minimize virtual cache flushing. 685 * We add in the as to spread out different as. This happens when we 686 * initialize the start count value. 687 * sizeof(struct as) is 60 so we shift by 3 to get into the bit range 688 * that will tend to change. For example, on spitfire based machines 689 * (vcshft == 1) contigous as are spread bu ~6 bins. 690 * vcshft provides for proper virtual color alignment. 691 * In theory cnt should be updated using cas only but if we are off by one 692 * or 2 it is no big deal. 693 * We also keep a start value which is used to randomize on what bin we 694 * start counting when it is time to start another loop. This avoids 695 * contigous allocations of ecache size to point to the same bin. 696 * Why 3? Seems work ok. Better than 7 or anything larger. 697 */ 698#define PGCLR_LOOPFACTOR 3 699 700/* 701 * When a bin is empty, and we can't satisfy a color request correctly, 702 * we scan. If we assume that the programs have reasonable spatial 703 * behavior, then it will not be a good idea to use the adjacent color. 704 * Using the adjacent color would result in virtually adjacent addresses 705 * mapping into the same spot in the cache. So, if we stumble across 706 * an empty bin, skip a bunch before looking. After the first skip, 707 * then just look one bin at a time so we don't miss our cache on 708 * every look. Be sure to check every bin. Page_create() will panic 709 * if we miss a page. 710 * 711 * This also explains the `<=' in the for loops in both page_get_freelist() 712 * and page_get_cachelist(). Since we checked the target bin, skipped 713 * a bunch, then continued one a time, we wind up checking the target bin 714 * twice to make sure we get all of them bins. 715 */ 716#define BIN_STEP 20 717 718#ifdef VM_STATS 719struct vmm_vmstats_str { 720 ulong_t pgf_alloc[MMU_PAGE_SIZES]; /* page_get_freelist */ 721 ulong_t pgf_allocok[MMU_PAGE_SIZES]; 722 ulong_t pgf_allocokrem[MMU_PAGE_SIZES]; 723 ulong_t pgf_allocfailed[MMU_PAGE_SIZES]; 724 ulong_t pgf_allocdeferred; 725 ulong_t pgf_allocretry[MMU_PAGE_SIZES]; 726 ulong_t pgc_alloc; /* page_get_cachelist */ 727 ulong_t pgc_allocok; 728 ulong_t pgc_allocokrem; 729 ulong_t pgc_allocokdeferred; 730 ulong_t pgc_allocfailed; 731 ulong_t pgcp_alloc[MMU_PAGE_SIZES]; /* page_get_contig_pages */ 732 ulong_t pgcp_allocfailed[MMU_PAGE_SIZES]; 733 ulong_t pgcp_allocempty[MMU_PAGE_SIZES]; 734 ulong_t pgcp_allocok[MMU_PAGE_SIZES]; 735 ulong_t ptcp[MMU_PAGE_SIZES]; /* page_trylock_contig_pages */ 736 ulong_t ptcpfreethresh[MMU_PAGE_SIZES]; 737 ulong_t ptcpfailexcl[MMU_PAGE_SIZES]; 738 ulong_t ptcpfailszc[MMU_PAGE_SIZES]; 739 ulong_t ptcpfailcage[MMU_PAGE_SIZES]; 740 ulong_t ptcpok[MMU_PAGE_SIZES]; 741 ulong_t pgmf_alloc[MMU_PAGE_SIZES]; /* page_get_mnode_freelist */ 742 ulong_t pgmf_allocfailed[MMU_PAGE_SIZES]; 743 ulong_t pgmf_allocempty[MMU_PAGE_SIZES]; 744 ulong_t pgmf_allocok[MMU_PAGE_SIZES]; 745 ulong_t pgmc_alloc; /* page_get_mnode_cachelist */ 746 ulong_t pgmc_allocfailed; 747 ulong_t pgmc_allocempty; 748 ulong_t pgmc_allocok; 749 ulong_t pladd_free[MMU_PAGE_SIZES]; /* page_list_add/sub */ 750 ulong_t plsub_free[MMU_PAGE_SIZES]; 751 ulong_t pladd_cache; 752 ulong_t plsub_cache; 753 ulong_t plsubpages_szcbig; 754 ulong_t plsubpages_szc0; 755 ulong_t pfs_req[MMU_PAGE_SIZES]; /* page_freelist_split */ 756 ulong_t pfs_demote[MMU_PAGE_SIZES]; 757 ulong_t pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 758 ulong_t ppr_reloc[MMU_PAGE_SIZES]; /* page_relocate */ 759 ulong_t ppr_relocok[MMU_PAGE_SIZES]; 760 ulong_t ppr_relocnoroot[MMU_PAGE_SIZES]; 761 ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES]; 762 ulong_t ppr_relocnolock[MMU_PAGE_SIZES]; 763 ulong_t ppr_relocnomem[MMU_PAGE_SIZES]; 764 ulong_t ppr_krelocfail[MMU_PAGE_SIZES]; 765 ulong_t ppr_copyfail; 766 /* page coalesce counter */ 767 ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 768 /* candidates useful */ 769 ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 770 /* ctrs changed after locking */ 771 ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 772 /* page_freelist_coalesce failed */ 773 ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 774 ulong_t page_ctrs_coalesce_all; /* page coalesce all counter */ 775 ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */ 776}; 777extern struct vmm_vmstats_str vmm_vmstats; 778#endif /* VM_STATS */ 779 780/* 781 * Used to hold off page relocations into the cage until OBP has completed 782 * its boot-time handoff of its resources to the kernel. 783 */ 784extern int page_relocate_ready; 785 786/* 787 * cpu/mmu-dependent vm variables may be reset at bootup. 788 */ 789extern uint_t mmu_page_sizes; 790extern uint_t max_mmu_page_sizes; 791extern uint_t mmu_hashcnt; 792extern uint_t max_mmu_hashcnt; 793extern size_t mmu_ism_pagesize; 794extern int mmu_exported_pagesize_mask; 795extern uint_t mmu_exported_page_sizes; 796extern uint_t szc_2_userszc[]; 797extern uint_t userszc_2_szc[]; 798 799#define mmu_legacy_page_sizes mmu_exported_page_sizes 800#define USERSZC_2_SZC(userszc) (userszc_2_szc[userszc]) 801#define SZC_2_USERSZC(szc) (szc_2_userszc[szc]) 802 803/* 804 * Platform specific page routines 805 */ 806extern void mach_page_add(page_t **, page_t *); 807extern void mach_page_sub(page_t **, page_t *); 808extern uint_t page_get_pagecolors(uint_t); 809extern void ppcopy_kernel__relocatable(page_t *, page_t *); 810#define ppcopy_kernel(p1, p2) ppcopy_kernel__relocatable(p1, p2) 811 812/* 813 * platform specific large pages for kernel heap support 814 */ 815extern size_t get_segkmem_lpsize(size_t lpsize); 816extern size_t mmu_get_kernel_lpsize(size_t lpsize); 817extern void mmu_init_kernel_pgsz(struct hat *hat); 818extern void mmu_init_kcontext(); 819extern uint64_t kcontextreg; 820 821/* 822 * Nucleus data page allocator routines 823 */ 824extern void ndata_alloc_init(struct memlist *, uintptr_t, uintptr_t); 825extern void *ndata_alloc(struct memlist *, size_t, size_t); 826extern void *ndata_extra_base(struct memlist *, size_t, caddr_t); 827extern size_t ndata_maxsize(struct memlist *); 828extern size_t ndata_spare(struct memlist *, size_t, size_t); 829 830/* 831 * Platform specific support for non-coherent I-cache and soft exec 832 */ 833extern uint_t icache_is_coherent; 834extern uint_t force_sync_icache_after_bcopy; 835extern uint_t force_sync_icache_after_dma; 836 837extern void mach_setup_icache(uint_t); 838#pragma weak mach_setup_icache 839 840#ifdef __cplusplus 841} 842#endif 843 844#endif /* _VM_DEP_H */ 845