vm_dep.h revision 7718:555eee33ff6e
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * UNIX machine dependent virtual memory support.
28 */
29
30#ifndef	_VM_DEP_H
31#define	_VM_DEP_H
32
33#ifdef	__cplusplus
34extern "C" {
35#endif
36
37#include <vm/hat_sfmmu.h>
38#include <sys/archsystm.h>
39#include <sys/memnode.h>
40
41#define	GETTICK()	gettick()
42/*
43 * Do not use this function for obtaining clock tick.  This
44 * is called by callers who do not need to have a guarenteed
45 * correct tick value.  The proper routine to use is tsc_read().
46 */
47#define	randtick()	gettick()
48
49/*
50 * Per page size free lists. Allocated dynamically.
51 */
52#define	MAX_MEM_TYPES	2	/* 0 = reloc, 1 = noreloc */
53#define	MTYPE_RELOC	0
54#define	MTYPE_NORELOC	1
55
56#define	PP_2_MTYPE(pp)	(PP_ISNORELOC(pp) ? MTYPE_NORELOC : MTYPE_RELOC)
57
58#define	MTYPE_INIT(mtype, vp, vaddr, flags, pgsz)			\
59	mtype = (flags & PG_NORELOC) ? MTYPE_NORELOC : MTYPE_RELOC;
60
61/* mtype init for page_get_replacement_page */
62#define	MTYPE_PGR_INIT(mtype, flags, pp, mnode, pgcnt)			\
63	mtype = (flags & PG_NORELOC) ? MTYPE_NORELOC : MTYPE_RELOC;
64
65#define	MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi)			\
66	ASSERT(mtype != MTYPE_NORELOC);					\
67	pfnlo = mem_node_config[mnode].physbase;			\
68	pfnhi = mem_node_config[mnode].physmax;
69
70/*
71 * candidate counters in vm_pagelist.c are indexed by color and range
72 */
73#define	MAX_MNODE_MRANGES		MAX_MEM_TYPES
74#define	MNODE_RANGE_CNT(mnode)		MAX_MNODE_MRANGES
75#define	MNODE_MAX_MRANGE(mnode)		(MAX_MEM_TYPES - 1)
76#define	MTYPE_2_MRANGE(mnode, mtype)	(mtype)
77
78/*
79 * Internal PG_ flags.
80 */
81#define	PGI_RELOCONLY	0x10000	/* acts in the opposite sense to PG_NORELOC */
82#define	PGI_NOCAGE	0x20000	/* indicates Cage is disabled */
83#define	PGI_PGCPHIPRI	0x40000	/* page_get_contig_page priority allocation */
84#define	PGI_PGCPSZC0	0x80000	/* relocate base pagesize page */
85
86/*
87 * PGI mtype flags - should not overlap PGI flags
88 */
89#define	PGI_MT_RANGE	0x1000000	/* mtype range */
90#define	PGI_MT_NEXT	0x2000000	/* get next mtype */
91
92extern page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES];
93extern page_t ***page_cachelists[MAX_MEM_TYPES];
94
95#define	PAGE_FREELISTS(mnode, szc, color, mtype) \
96	(*(page_freelists[szc][mtype][mnode] + (color)))
97
98#define	PAGE_CACHELISTS(mnode, color, mtype) \
99	(*(page_cachelists[mtype][mnode] + (color)))
100
101/*
102 * There are 'page_colors' colors/bins.  Spread them out under a
103 * couple of locks.  There are mutexes for both the page freelist
104 * and the page cachelist.  We want enough locks to make contention
105 * reasonable, but not too many -- otherwise page_freelist_lock() gets
106 * so expensive that it becomes the bottleneck!
107 */
108#define	NPC_MUTEX	16
109
110extern kmutex_t	*fpc_mutex[NPC_MUTEX];
111extern kmutex_t	*cpc_mutex[NPC_MUTEX];
112
113/*
114 * Iterator provides the info needed to convert RA to PA.
115 * MEM_NODE_ITERATOR_INIT() should be called before
116 * PAGE_NEXT_PFN_FOR_COLOR() if pfn was not obtained via a previous
117 * PAGE_NEXT_PFN_FOR_COLOR() call. Iterator caches color 2 hash
118 * translations requiring initializer call if color or ceq_mask changes,
119 * even if pfn doesn't. MEM_NODE_ITERATOR_INIT() must also be called before
120 * PFN_2_COLOR() that uses a valid iterator argument.
121 */
122#ifdef	sun4v
123
124typedef struct mem_node_iterator {
125	uint_t mi_mnode;		/* mnode in which to iterate */
126	int mi_init;			/* set to 1 when first init */
127	int mi_last_mblock;		/* last mblock visited */
128	uint_t mi_hash_ceq_mask;	/* cached copy of ceq_mask */
129	uint_t mi_hash_color;		/* cached copy of color */
130	uint_t mi_mnode_mask;		/* number of mask bits */
131	uint_t mi_mnode_pfn_shift;	/* mnode position in pfn */
132	pfn_t mi_mblock_base;		/* first valid pfn in current mblock */
133	pfn_t mi_mblock_end;		/* last valid pfn in current mblock */
134	pfn_t mi_ra_to_pa;		/* ra adjustment for current mblock */
135	pfn_t mi_mnode_pfn_mask;	/* mask to obtain mnode id bits */
136} mem_node_iterator_t;
137
138#define	MEM_NODE_ITERATOR_DECL(it) \
139	mem_node_iterator_t it
140#define	MEM_NODE_ITERATOR_INIT(pfn, mnode, szc, it) \
141	(pfn) = plat_mem_node_iterator_init((pfn), (mnode), (szc), (it), 1)
142
143extern pfn_t plat_mem_node_iterator_init(pfn_t, int, uchar_t,
144    mem_node_iterator_t *, int);
145extern pfn_t plat_rapfn_to_papfn(pfn_t);
146extern int interleaved_mnodes;
147
148#else	/* sun4v */
149
150#define	MEM_NODE_ITERATOR_DECL(it) \
151	void *it = NULL
152#define	MEM_NODE_ITERATOR_INIT(pfn, mnode, szc, it)
153
154#endif	/* sun4v */
155
156/*
157 * Return the mnode limits so that hpc_counters length and base
158 * index can be determined. When interleaved_mnodes is set, we
159 * create an array only for the first mnode that exists. All other
160 * mnodes will share the array in this case.
161 * If interleaved_mnodes is not set, simply return the limits for
162 * the given mnode.
163 */
164#define	HPM_COUNTERS_LIMITS(mnode, physbase, physmax, first)		\
165	if (!interleaved_mnodes) {					\
166		(physbase) = mem_node_config[(mnode)].physbase;		\
167		(physmax) = mem_node_config[(mnode)].physmax;		\
168		(first) = (mnode);					\
169	} else if ((first) < 0) {					\
170		mem_node_max_range(&(physbase), &(physmax));		\
171		(first) = (mnode);					\
172	}
173
174#define	PAGE_CTRS_WRITE_LOCK(mnode)					\
175	if (!interleaved_mnodes) {					\
176		rw_enter(&page_ctrs_rwlock[(mnode)], RW_WRITER);	\
177		page_freelist_lock(mnode);				\
178	} else {							\
179		/* changing shared hpm_counters */			\
180		int _i;							\
181		for (_i = 0; _i < max_mem_nodes; _i++) {		\
182			rw_enter(&page_ctrs_rwlock[_i], RW_WRITER);	\
183			page_freelist_lock(_i);				\
184		}							\
185	}
186
187#define	PAGE_CTRS_WRITE_UNLOCK(mnode)					\
188	if (!interleaved_mnodes) {					\
189		page_freelist_unlock(mnode);				\
190		rw_exit(&page_ctrs_rwlock[(mnode)]);			\
191	} else {							\
192		int _i;							\
193		for (_i = 0; _i < max_mem_nodes; _i++) {		\
194			page_freelist_unlock(_i);			\
195			rw_exit(&page_ctrs_rwlock[_i]);			\
196		}							\
197	}
198
199/*
200 * cpu specific color conversion functions
201 */
202extern uint_t page_get_nsz_color_mask_cpu(uchar_t, uint_t);
203#pragma weak page_get_nsz_color_mask_cpu
204
205extern uint_t page_get_nsz_color_cpu(uchar_t, uint_t);
206#pragma weak page_get_nsz_color_cpu
207
208extern uint_t page_get_color_shift_cpu(uchar_t, uchar_t);
209#pragma weak page_get_color_shift_cpu
210
211extern uint_t page_convert_color_cpu(uint_t, uchar_t, uchar_t);
212#pragma weak page_convert_color_cpu
213
214extern pfn_t page_next_pfn_for_color_cpu(pfn_t,
215    uchar_t, uint_t, uint_t, uint_t, void *);
216#pragma weak page_next_pfn_for_color_cpu
217
218extern uint_t  page_pfn_2_color_cpu(pfn_t, uchar_t, void *);
219#pragma weak page_pfn_2_color_cpu
220
221#define	PAGE_GET_COLOR_SHIFT(szc, nszc)				\
222	((&page_get_color_shift_cpu != NULL) ?			\
223	    page_get_color_shift_cpu(szc, nszc) :		\
224	    (hw_page_array[(nszc)].hp_shift -			\
225		hw_page_array[(szc)].hp_shift))
226
227#define	PAGE_CONVERT_COLOR(ncolor, szc, nszc)			\
228	((&page_convert_color_cpu != NULL) ?			\
229	    page_convert_color_cpu(ncolor, szc, nszc) :		\
230	    ((ncolor) << PAGE_GET_COLOR_SHIFT((szc), (nszc))))
231
232#define	PFN_2_COLOR(pfn, szc, it)				\
233	((&page_pfn_2_color_cpu != NULL) ?			\
234	    page_pfn_2_color_cpu(pfn, szc, it) :		\
235	    ((pfn & (hw_page_array[0].hp_colors - 1)) >>	\
236		(hw_page_array[szc].hp_shift -			\
237		    hw_page_array[0].hp_shift)))
238
239#define	PNUM_SIZE(szc)							\
240	(hw_page_array[(szc)].hp_pgcnt)
241#define	PNUM_SHIFT(szc)							\
242	(hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift)
243#define	PAGE_GET_SHIFT(szc)						\
244	(hw_page_array[(szc)].hp_shift)
245#define	PAGE_GET_PAGECOLORS(szc)					\
246	(hw_page_array[(szc)].hp_colors)
247
248/*
249 * This macro calculates the next sequential pfn with the specified
250 * color using color equivalency mask
251 */
252#define	PAGE_NEXT_PFN_FOR_COLOR(pfn, szc, color, ceq_mask, color_mask, it)   \
253	{                                                                    \
254		ASSERT(((color) & ~(ceq_mask)) == 0);                        \
255		if (&page_next_pfn_for_color_cpu == NULL) {                  \
256			uint_t	pfn_shift = PAGE_BSZS_SHIFT(szc);            \
257			pfn_t	spfn = pfn >> pfn_shift;                     \
258			pfn_t	stride = (ceq_mask) + 1;                     \
259			ASSERT((((ceq_mask) + 1) & (ceq_mask)) == 0);        \
260			if (((spfn ^ (color)) & (ceq_mask)) == 0) {          \
261				pfn += stride << pfn_shift;                  \
262			} else {                                             \
263				pfn = (spfn & ~(pfn_t)(ceq_mask)) | (color); \
264				pfn = (pfn > spfn ? pfn : pfn + stride) <<   \
265				    pfn_shift;                               \
266			}                                                    \
267		} else {                                                     \
268		    pfn = page_next_pfn_for_color_cpu(pfn, szc, color,	     \
269			ceq_mask, color_mask, it);			     \
270		}                                                            \
271	}
272
273/* get the color equivalency mask for the next szc */
274#define	PAGE_GET_NSZ_MASK(szc, mask)                                         \
275	((&page_get_nsz_color_mask_cpu == NULL) ?                            \
276	    ((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) :  \
277	    page_get_nsz_color_mask_cpu(szc, mask))
278
279/* get the color of the next szc */
280#define	PAGE_GET_NSZ_COLOR(szc, color)                                       \
281	((&page_get_nsz_color_cpu == NULL) ?                                 \
282	    ((color) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) : \
283	    page_get_nsz_color_cpu(szc, color))
284
285/* Find the bin for the given page if it was of size szc */
286#define	PP_2_BIN_SZC(pp, szc)	(PFN_2_COLOR(pp->p_pagenum, szc, (void *)(-1)))
287
288#define	PP_2_BIN(pp)		(PP_2_BIN_SZC(pp, pp->p_szc))
289
290#define	PP_2_MEM_NODE(pp)	(PFN_2_MEM_NODE(pp->p_pagenum))
291
292#define	PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ?	\
293	&fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] :			\
294	&cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode])
295
296#define	FPC_MUTEX(mnode, i)	(&fpc_mutex[i][mnode])
297#define	CPC_MUTEX(mnode, i)	(&cpc_mutex[i][mnode])
298
299#define	PFN_BASE(pfnum, szc)	(pfnum & ~((1 << PAGE_BSZS_SHIFT(szc)) - 1))
300
301/*
302 * this structure is used for walking free page lists
303 * controls when to split large pages into smaller pages,
304 * and when to coalesce smaller pages into larger pages
305 */
306typedef struct page_list_walker {
307	uint_t	plw_colors;		/* num of colors for szc */
308	uint_t  plw_color_mask;		/* colors-1 */
309	uint_t	plw_bin_step;		/* next bin: 1 or 2 */
310	uint_t  plw_count;		/* loop count */
311	uint_t	plw_bin0;		/* starting bin */
312	uint_t  plw_bin_marker;		/* bin after initial jump */
313	uint_t  plw_bin_split_prev;	/* last bin we tried to split */
314	uint_t  plw_do_split;		/* set if OK to split */
315	uint_t  plw_split_next;		/* next bin to split */
316	uint_t	plw_ceq_dif;		/* number of different color groups */
317					/* to check */
318	uint_t	plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */
319	uint_t	plw_bins[MMU_PAGE_SIZES + 1];	/* num of bins */
320} page_list_walker_t;
321
322void	page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin,
323    int can_split, int use_ceq, page_list_walker_t *plw);
324
325typedef	char	hpmctr_t;
326
327#ifdef DEBUG
328#define	CHK_LPG(pp, szc)	chk_lpg(pp, szc)
329extern void	chk_lpg(page_t *, uchar_t);
330#else
331#define	CHK_LPG(pp, szc)
332#endif
333
334/*
335 * page list count per mnode and type.
336 */
337typedef	struct {
338	pgcnt_t	plc_mt_pgmax;		/* max page cnt */
339	pgcnt_t plc_mt_clpgcnt;		/* cache list cnt */
340	pgcnt_t plc_mt_flpgcnt;		/* free list cnt - small pages */
341	pgcnt_t plc_mt_lgpgcnt;		/* free list cnt - large pages */
342#ifdef DEBUG
343	struct {
344		pgcnt_t plc_mts_pgcnt;	/* per page size count */
345		int	plc_mts_colors;
346		pgcnt_t	*plc_mtsc_pgcnt; /* per color bin count */
347	} plc_mts[MMU_PAGE_SIZES];
348#endif
349} plcnt_t[MAX_MEM_NODES][MAX_MEM_TYPES];
350
351#ifdef DEBUG
352
353#define	PLCNT_SZ(ctrs_sz) {						\
354	int	szc;							\
355	for (szc = 0; szc < mmu_page_sizes; szc++) {			\
356		int	colors = page_get_pagecolors(szc);		\
357		ctrs_sz += (max_mem_nodes * MAX_MEM_TYPES *		\
358		    colors * sizeof (pgcnt_t));				\
359	}								\
360}
361
362#define	PLCNT_INIT(base) {						\
363	int	mn, mt, szc, colors;					\
364	for (szc = 0; szc < mmu_page_sizes; szc++) {			\
365		colors = page_get_pagecolors(szc);			\
366		for (mn = 0; mn < max_mem_nodes; mn++) {		\
367			for (mt = 0; mt < MAX_MEM_TYPES; mt++) {	\
368				plcnt[mn][mt].plc_mts[szc].		\
369				    plc_mts_colors = colors;		\
370				plcnt[mn][mt].plc_mts[szc].		\
371				    plc_mtsc_pgcnt = (pgcnt_t *)base;	\
372				base += (colors * sizeof (pgcnt_t));	\
373			}						\
374		}							\
375	}								\
376}
377
378#define	PLCNT_DO(pp, mn, mtype, szc, cnt, flags) {			\
379	int	bin = PP_2_BIN(pp);					\
380	if (flags & PG_CACHE_LIST)					\
381		atomic_add_long(&plcnt[mn][mtype].plc_mt_clpgcnt, cnt);	\
382	else if (szc)							\
383		atomic_add_long(&plcnt[mn][mtype].plc_mt_lgpgcnt, cnt);	\
384	else								\
385		atomic_add_long(&plcnt[mn][mtype].plc_mt_flpgcnt, cnt);	\
386	atomic_add_long(&plcnt[mn][mtype].plc_mts[szc].plc_mts_pgcnt,	\
387	    cnt);							\
388	atomic_add_long(&plcnt[mn][mtype].plc_mts[szc].			\
389	    plc_mtsc_pgcnt[bin], cnt);					\
390}
391
392#else
393
394#define	PLCNT_SZ(ctrs_sz)
395
396#define	PLCNT_INIT(base)
397
398/* PG_FREE_LIST may not be explicitly set in flags for large pages */
399
400#define	PLCNT_DO(pp, mn, mtype, szc, cnt, flags) {			\
401	if (flags & PG_CACHE_LIST)					\
402		atomic_add_long(&plcnt[mn][mtype].plc_mt_clpgcnt, cnt);	\
403	else if (szc)							\
404		atomic_add_long(&plcnt[mn][mtype].plc_mt_lgpgcnt, cnt);	\
405	else								\
406		atomic_add_long(&plcnt[mn][mtype].plc_mt_flpgcnt, cnt);	\
407}
408
409#endif
410
411#define	PLCNT_INCR(pp, mn, mtype, szc, flags) {				\
412	long	cnt = (1 << PAGE_BSZS_SHIFT(szc));			\
413	PLCNT_DO(pp, mn, mtype, szc, cnt, flags);			\
414}
415
416#define	PLCNT_DECR(pp, mn, mtype, szc, flags) {				\
417	long	cnt = ((-1) << PAGE_BSZS_SHIFT(szc));			\
418	PLCNT_DO(pp, mn, mtype, szc, cnt, flags);			\
419}
420
421/*
422 * macros to update page list max counts - done when pages transferred
423 * from RELOC to NORELOC mtype (kcage_init or kcage_assimilate_page).
424 */
425
426#define	PLCNT_XFER_NORELOC(pp) {					\
427	long	cnt = (1 << PAGE_BSZS_SHIFT((pp)->p_szc));		\
428	int	mn = PP_2_MEM_NODE(pp);					\
429	atomic_add_long(&plcnt[mn][MTYPE_NORELOC].plc_mt_pgmax, cnt);	\
430	atomic_add_long(&plcnt[mn][MTYPE_RELOC].plc_mt_pgmax, -cnt);	\
431}
432
433/*
434 * macro to modify the page list max counts when memory is added to
435 * the page lists during startup (add_physmem) or during a DR operation
436 * when memory is added (kphysm_add_memory_dynamic) or deleted
437 * (kphysm_del_cleanup).
438 */
439#define	PLCNT_MODIFY_MAX(pfn, cnt) {					       \
440	spgcnt_t _cnt = (spgcnt_t)(cnt);				       \
441	pgcnt_t _acnt = ABS(_cnt);					       \
442	int _mn;							       \
443	pgcnt_t _np;							       \
444	if (&plat_mem_node_intersect_range != NULL) {			       \
445		for (_mn = 0; _mn < max_mem_nodes; _mn++) {		       \
446			plat_mem_node_intersect_range((pfn), _acnt, _mn, &_np);\
447			if (_np == 0)					       \
448				continue;				       \
449			atomic_add_long(&plcnt[_mn][MTYPE_RELOC].plc_mt_pgmax, \
450			    (_cnt < 0) ? -_np : _np);			       \
451		}							       \
452	} else {							       \
453		pfn_t _pfn = (pfn);					       \
454		pfn_t _endpfn = _pfn + _acnt;				       \
455		while (_pfn < _endpfn) {				       \
456			_mn = PFN_2_MEM_NODE(_pfn);			       \
457			_np = MIN(_endpfn, mem_node_config[_mn].physmax + 1) - \
458			    _pfn;					       \
459			_pfn += _np;					       \
460			atomic_add_long(&plcnt[_mn][MTYPE_RELOC].plc_mt_pgmax, \
461			    (_cnt < 0) ? -_np : _np);			       \
462		}							       \
463	}								       \
464}
465
466extern plcnt_t	plcnt;
467
468#define	MNODE_PGCNT(mn)							\
469	(plcnt[mn][MTYPE_RELOC].plc_mt_clpgcnt +			\
470	    plcnt[mn][MTYPE_NORELOC].plc_mt_clpgcnt +			\
471	    plcnt[mn][MTYPE_RELOC].plc_mt_flpgcnt +			\
472	    plcnt[mn][MTYPE_NORELOC].plc_mt_flpgcnt +			\
473	    plcnt[mn][MTYPE_RELOC].plc_mt_lgpgcnt +			\
474	    plcnt[mn][MTYPE_NORELOC].plc_mt_lgpgcnt)
475
476#define	MNODETYPE_PGCNT(mn, mtype)					\
477	(plcnt[mn][mtype].plc_mt_clpgcnt +				\
478	    plcnt[mn][mtype].plc_mt_flpgcnt +				\
479	    plcnt[mn][mtype].plc_mt_lgpgcnt)
480
481/*
482 * macros to loop through the mtype range - MTYPE_START returns -1 in
483 * mtype if no pages in mnode/mtype and possibly NEXT mtype.
484 */
485#define	MTYPE_START(mnode, mtype, flags) {				\
486	if (plcnt[mnode][mtype].plc_mt_pgmax == 0) {			\
487		ASSERT(mtype == MTYPE_RELOC ||				\
488		    MNODETYPE_PGCNT(mnode, mtype) == 0 ||		\
489		    plcnt[mnode][mtype].plc_mt_pgmax != 0);		\
490		MTYPE_NEXT(mnode, mtype, flags);			\
491	}								\
492}
493
494/*
495 * if allocation from the RELOC pool failed and there is sufficient cage
496 * memory, attempt to allocate from the NORELOC pool.
497 */
498#define	MTYPE_NEXT(mnode, mtype, flags) { 				\
499	if (!(flags & (PG_NORELOC | PGI_NOCAGE | PGI_RELOCONLY)) &&	\
500	    (kcage_freemem >= kcage_lotsfree)) {			\
501		if (plcnt[mnode][MTYPE_NORELOC].plc_mt_pgmax == 0) {	\
502			ASSERT(MNODETYPE_PGCNT(mnode, MTYPE_NORELOC) == 0 || \
503			    plcnt[mnode][MTYPE_NORELOC].plc_mt_pgmax != 0);  \
504			mtype = -1;					\
505		} else {						\
506			mtype = MTYPE_NORELOC;				\
507			flags |= PG_NORELOC;				\
508		}							\
509	} else {							\
510		mtype = -1;						\
511	}								\
512}
513
514/*
515 * get the ecache setsize for the current cpu.
516 */
517#define	CPUSETSIZE()	(cpunodes[CPU->cpu_id].ecache_setsize)
518
519extern struct cpu	cpu0;
520#define	CPU0		&cpu0
521
522#define	PAGE_BSZS_SHIFT(szc)	TTE_BSZS_SHIFT(szc)
523/*
524 * For sfmmu each larger page is 8 times the size of the previous
525 * size page.
526 */
527#define	FULL_REGION_CNT(rg_szc)	(8)
528
529/*
530 * The counter base must be per page_counter element to prevent
531 * races when re-indexing, and the base page size element should
532 * be aligned on a boundary of the given region size.
533 *
534 * We also round up the number of pages spanned by the counters
535 * for a given region to PC_BASE_ALIGN in certain situations to simplify
536 * the coding for some non-performance critical routines.
537 */
538#define	PC_BASE_ALIGN		((pfn_t)1 << PAGE_BSZS_SHIFT(mmu_page_sizes-1))
539#define	PC_BASE_ALIGN_MASK	(PC_BASE_ALIGN - 1)
540
541extern int ecache_alignsize;
542#define	L2CACHE_ALIGN		ecache_alignsize
543#define	L2CACHE_ALIGN_MAX	512
544
545extern int update_proc_pgcolorbase_after_fork;
546extern int consistent_coloring;
547extern uint_t vac_colors_mask;
548extern int vac_size;
549extern int vac_shift;
550
551/*
552 * Kernel mem segment in 64-bit space
553 */
554extern caddr_t kmem64_base, kmem64_end, kmem64_aligned_end;
555extern int kmem64_alignsize, kmem64_szc;
556extern uint64_t kmem64_pabase;
557extern int max_bootlp_tteszc;
558
559/*
560 * Maximum and default values for user heap, stack, private and shared
561 * anonymous memory, and user text and initialized data.
562 *
563 * Initial values are defined in architecture specific mach_vm_dep.c file.
564 * Used by map_pgsz*() routines.
565 */
566extern size_t max_uheap_lpsize;
567extern size_t default_uheap_lpsize;
568extern size_t max_ustack_lpsize;
569extern size_t default_ustack_lpsize;
570extern size_t max_privmap_lpsize;
571extern size_t max_uidata_lpsize;
572extern size_t max_utext_lpsize;
573extern size_t max_shm_lpsize;
574
575/*
576 * For adjusting the default lpsize, for DTLB-limited page sizes.
577 */
578extern void adjust_data_maxlpsize(size_t ismpagesize);
579
580/*
581 * Sanity control. Don't use large pages regardless of user
582 * settings if there's less than priv or shm_lpg_min_physmem memory installed.
583 * The units for this variable are 8K pages.
584 */
585extern pgcnt_t privm_lpg_min_physmem;
586extern pgcnt_t shm_lpg_min_physmem;
587
588/*
589 * AS_2_BIN macro controls the page coloring policy.
590 * 0 (default) uses various vaddr bits
591 * 1 virtual=paddr
592 * 2 bin hopping
593 */
594#define	AS_2_BIN(as, seg, vp, addr, bin, szc)				\
595switch (consistent_coloring) {						\
596	default:                                                        \
597		cmn_err(CE_WARN,					\
598			"AS_2_BIN: bad consistent coloring value");	\
599		/* assume default algorithm -> continue */		\
600	case 0: {                                                       \
601		uint32_t ndx, new;					\
602		int slew = 0;						\
603		pfn_t pfn;                                              \
604                                                                        \
605		if (vp != NULL && IS_SWAPVP(vp) &&			\
606		    seg->s_ops == &segvn_ops)				\
607			slew = as_color_bin(as);			\
608                                                                        \
609		pfn = ((uintptr_t)addr >> MMU_PAGESHIFT) +		\
610			(((uintptr_t)addr >> page_coloring_shift) <<	\
611			(vac_shift - MMU_PAGESHIFT));			\
612		if ((szc) == 0 || &page_pfn_2_color_cpu == NULL) {	\
613			pfn += slew;					\
614			bin = PFN_2_COLOR(pfn, szc, NULL);		\
615		} else {						\
616			bin = PFN_2_COLOR(pfn, szc, NULL);		\
617			bin += slew >> (vac_shift - MMU_PAGESHIFT);	\
618			bin &= hw_page_array[(szc)].hp_colors - 1;	\
619		}							\
620		break;                                                  \
621	}                                                               \
622	case 1:                                                         \
623		bin = PFN_2_COLOR(((uintptr_t)addr >> MMU_PAGESHIFT),	\
624		    szc, NULL);						\
625		break;                                                  \
626	case 2: {                                                       \
627		int cnt = as_color_bin(as);				\
628		uint_t color_mask = page_get_pagecolors(0) - 1;		\
629                                                                        \
630		/* make sure physical color aligns with vac color */	\
631		while ((cnt & vac_colors_mask) !=			\
632		    addr_to_vcolor(addr)) {				\
633			cnt++;						\
634		}                                                       \
635		bin = cnt = cnt & color_mask;			        \
636		bin >>= PAGE_GET_COLOR_SHIFT(0, szc);                   \
637		/* update per as page coloring fields */		\
638		cnt = (cnt + 1) & color_mask;			        \
639		if (cnt == (as_color_start(as) & color_mask)) {	        \
640			cnt = as_color_start(as) = as_color_start(as) + \
641				PGCLR_LOOPFACTOR;			\
642		}                                                       \
643		as_color_bin(as) = cnt & color_mask;		        \
644		break;                                                  \
645	}								\
646}									\
647	ASSERT(bin < page_get_pagecolors(szc));
648
649/*
650 * cpu private vm data - accessed thru CPU->cpu_vm_data
651 *	vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock()
652 *	vc_pnext_memseg: tracks last memseg visited in page_nextn()
653 *	vc_kmptr: unaligned kmem pointer for this vm_cpu_data_t
654 *	vc_kmsize: orignal kmem size for this vm_cpu_data_t
655 */
656
657typedef struct {
658	struct memseg	*vc_pnum_memseg;
659	struct memseg	*vc_pnext_memseg;
660	void		*vc_kmptr;
661	size_t		vc_kmsize;
662} vm_cpu_data_t;
663
664/* allocation size to ensure vm_cpu_data_t resides in its own cache line */
665#define	VM_CPU_DATA_PADSIZE						\
666	(P2ROUNDUP(sizeof (vm_cpu_data_t), L2CACHE_ALIGN_MAX))
667
668/* for boot cpu before kmem is initialized */
669extern char	vm_cpu_data0[];
670
671/*
672 * Function to get an ecache color bin: F(as, cnt, vcolor).
673 * the goal of this function is to:
674 * - to spread a processes' physical pages across the entire ecache to
675 *	maximize its use.
676 * - to minimize vac flushes caused when we reuse a physical page on a
677 *	different vac color than it was previously used.
678 * - to prevent all processes to use the same exact colors and trash each
679 *	other.
680 *
681 * cnt is a bin ptr kept on a per as basis.  As we page_create we increment
682 * the ptr so we spread out the physical pages to cover the entire ecache.
683 * The virtual color is made a subset of the physical color in order to
684 * in minimize virtual cache flushing.
685 * We add in the as to spread out different as.	 This happens when we
686 * initialize the start count value.
687 * sizeof(struct as) is 60 so we shift by 3 to get into the bit range
688 * that will tend to change.  For example, on spitfire based machines
689 * (vcshft == 1) contigous as are spread bu ~6 bins.
690 * vcshft provides for proper virtual color alignment.
691 * In theory cnt should be updated using cas only but if we are off by one
692 * or 2 it is no big deal.
693 * We also keep a start value which is used to randomize on what bin we
694 * start counting when it is time to start another loop. This avoids
695 * contigous allocations of ecache size to point to the same bin.
696 * Why 3? Seems work ok. Better than 7 or anything larger.
697 */
698#define	PGCLR_LOOPFACTOR 3
699
700/*
701 * When a bin is empty, and we can't satisfy a color request correctly,
702 * we scan.  If we assume that the programs have reasonable spatial
703 * behavior, then it will not be a good idea to use the adjacent color.
704 * Using the adjacent color would result in virtually adjacent addresses
705 * mapping into the same spot in the cache.  So, if we stumble across
706 * an empty bin, skip a bunch before looking.  After the first skip,
707 * then just look one bin at a time so we don't miss our cache on
708 * every look. Be sure to check every bin.  Page_create() will panic
709 * if we miss a page.
710 *
711 * This also explains the `<=' in the for loops in both page_get_freelist()
712 * and page_get_cachelist().  Since we checked the target bin, skipped
713 * a bunch, then continued one a time, we wind up checking the target bin
714 * twice to make sure we get all of them bins.
715 */
716#define	BIN_STEP	20
717
718#ifdef VM_STATS
719struct vmm_vmstats_str {
720	ulong_t pgf_alloc[MMU_PAGE_SIZES];	/* page_get_freelist */
721	ulong_t pgf_allocok[MMU_PAGE_SIZES];
722	ulong_t pgf_allocokrem[MMU_PAGE_SIZES];
723	ulong_t pgf_allocfailed[MMU_PAGE_SIZES];
724	ulong_t pgf_allocdeferred;
725	ulong_t	pgf_allocretry[MMU_PAGE_SIZES];
726	ulong_t pgc_alloc;			/* page_get_cachelist */
727	ulong_t pgc_allocok;
728	ulong_t pgc_allocokrem;
729	ulong_t	pgc_allocokdeferred;
730	ulong_t pgc_allocfailed;
731	ulong_t	pgcp_alloc[MMU_PAGE_SIZES];	/* page_get_contig_pages */
732	ulong_t	pgcp_allocfailed[MMU_PAGE_SIZES];
733	ulong_t	pgcp_allocempty[MMU_PAGE_SIZES];
734	ulong_t	pgcp_allocok[MMU_PAGE_SIZES];
735	ulong_t	ptcp[MMU_PAGE_SIZES];		/* page_trylock_contig_pages */
736	ulong_t	ptcpfreethresh[MMU_PAGE_SIZES];
737	ulong_t	ptcpfailexcl[MMU_PAGE_SIZES];
738	ulong_t	ptcpfailszc[MMU_PAGE_SIZES];
739	ulong_t	ptcpfailcage[MMU_PAGE_SIZES];
740	ulong_t	ptcpok[MMU_PAGE_SIZES];
741	ulong_t	pgmf_alloc[MMU_PAGE_SIZES];	/* page_get_mnode_freelist */
742	ulong_t	pgmf_allocfailed[MMU_PAGE_SIZES];
743	ulong_t	pgmf_allocempty[MMU_PAGE_SIZES];
744	ulong_t	pgmf_allocok[MMU_PAGE_SIZES];
745	ulong_t	pgmc_alloc;			/* page_get_mnode_cachelist */
746	ulong_t	pgmc_allocfailed;
747	ulong_t	pgmc_allocempty;
748	ulong_t	pgmc_allocok;
749	ulong_t	pladd_free[MMU_PAGE_SIZES];	/* page_list_add/sub */
750	ulong_t	plsub_free[MMU_PAGE_SIZES];
751	ulong_t	pladd_cache;
752	ulong_t	plsub_cache;
753	ulong_t	plsubpages_szcbig;
754	ulong_t	plsubpages_szc0;
755	ulong_t	pfs_req[MMU_PAGE_SIZES];	/* page_freelist_split */
756	ulong_t	pfs_demote[MMU_PAGE_SIZES];
757	ulong_t	pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
758	ulong_t ppr_reloc[MMU_PAGE_SIZES];	/* page_relocate */
759	ulong_t ppr_relocok[MMU_PAGE_SIZES];
760	ulong_t ppr_relocnoroot[MMU_PAGE_SIZES];
761	ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES];
762	ulong_t ppr_relocnolock[MMU_PAGE_SIZES];
763	ulong_t ppr_relocnomem[MMU_PAGE_SIZES];
764	ulong_t ppr_krelocfail[MMU_PAGE_SIZES];
765	ulong_t ppr_copyfail;
766	/* page coalesce counter */
767	ulong_t	page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
768	/* candidates useful */
769	ulong_t	page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
770	/* ctrs changed after locking */
771	ulong_t	page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
772	/* page_freelist_coalesce failed */
773	ulong_t	page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
774	ulong_t	page_ctrs_coalesce_all;	/* page coalesce all counter */
775	ulong_t	page_ctrs_cands_skip_all; /* candidates useful for all func */
776};
777extern struct vmm_vmstats_str vmm_vmstats;
778#endif	/* VM_STATS */
779
780/*
781 * Used to hold off page relocations into the cage until OBP has completed
782 * its boot-time handoff of its resources to the kernel.
783 */
784extern int page_relocate_ready;
785
786/*
787 * cpu/mmu-dependent vm variables may be reset at bootup.
788 */
789extern uint_t mmu_page_sizes;
790extern uint_t max_mmu_page_sizes;
791extern uint_t mmu_hashcnt;
792extern uint_t max_mmu_hashcnt;
793extern size_t mmu_ism_pagesize;
794extern int mmu_exported_pagesize_mask;
795extern uint_t mmu_exported_page_sizes;
796extern uint_t szc_2_userszc[];
797extern uint_t userszc_2_szc[];
798
799#define	mmu_legacy_page_sizes	mmu_exported_page_sizes
800#define	USERSZC_2_SZC(userszc)	(userszc_2_szc[userszc])
801#define	SZC_2_USERSZC(szc)	(szc_2_userszc[szc])
802
803/*
804 * Platform specific page routines
805 */
806extern void mach_page_add(page_t **, page_t *);
807extern void mach_page_sub(page_t **, page_t *);
808extern uint_t page_get_pagecolors(uint_t);
809extern void ppcopy_kernel__relocatable(page_t *, page_t *);
810#define	ppcopy_kernel(p1, p2)	ppcopy_kernel__relocatable(p1, p2)
811
812/*
813 * platform specific large pages for kernel heap support
814 */
815extern size_t get_segkmem_lpsize(size_t lpsize);
816extern size_t mmu_get_kernel_lpsize(size_t lpsize);
817extern void mmu_init_kernel_pgsz(struct hat *hat);
818extern void mmu_init_kcontext();
819extern uint64_t kcontextreg;
820
821/*
822 * Nucleus data page allocator routines
823 */
824extern void ndata_alloc_init(struct memlist *, uintptr_t, uintptr_t);
825extern void *ndata_alloc(struct memlist *, size_t, size_t);
826extern void *ndata_extra_base(struct memlist *, size_t, caddr_t);
827extern size_t ndata_maxsize(struct memlist *);
828extern size_t ndata_spare(struct memlist *, size_t, size_t);
829
830/*
831 * Platform specific support for non-coherent I-cache and soft exec
832 */
833extern uint_t	icache_is_coherent;
834extern uint_t	force_sync_icache_after_bcopy;
835extern uint_t	force_sync_icache_after_dma;
836
837extern void	mach_setup_icache(uint_t);
838#pragma weak	mach_setup_icache
839
840#ifdef	__cplusplus
841}
842#endif
843
844#endif	/* _VM_DEP_H */
845