ppage.c revision 6461:037a423f52ad
1266072Sdes/*
2266072Sdes * CDDL HEADER START
3285206Sdes *
4266072Sdes * The contents of this file are subject to the terms of the
5266072Sdes * Common Development and Distribution License (the "License").
6266072Sdes * You may not use this file except in compliance with the License.
7266072Sdes *
8266072Sdes * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9266072Sdes * or http://www.opensolaris.org/os/licensing.
10266072Sdes * See the License for the specific language governing permissions
11285206Sdes * and limitations under the License.
12285206Sdes *
13285206Sdes * When distributing Covered Code, include this CDDL HEADER in each
14285206Sdes * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15285206Sdes * If applicable, add the following below this CDDL HEADER, with the
16285206Sdes * fields enclosed by brackets "[]" replaced with your own identifying
17285206Sdes * information: Portions Copyright [yyyy] [name of copyright owner]
18285206Sdes *
19266072Sdes * CDDL HEADER END
20285206Sdes */
21285206Sdes/*
22285206Sdes * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23285206Sdes * Use is subject to license terms.
24266072Sdes */
25285206Sdes
26285206Sdes#pragma ident	"%Z%%M%	%I%	%E% SMI"
27266072Sdes
28266072Sdes#include <sys/types.h>
29266072Sdes#include <sys/systm.h>
30266072Sdes#include <sys/archsystm.h>
31285206Sdes#include <sys/machsystm.h>
32266072Sdes#include <sys/t_lock.h>
33266072Sdes#include <sys/vmem.h>
34285206Sdes#include <sys/mman.h>
35266072Sdes#include <sys/vm.h>
36266072Sdes#include <sys/cpu.h>
37266072Sdes#include <sys/cmn_err.h>
38266072Sdes#include <sys/cpuvar.h>
39266072Sdes#include <sys/atomic.h>
40266072Sdes#include <vm/as.h>
41266072Sdes#include <vm/hat.h>
42266072Sdes#include <vm/as.h>
43266072Sdes#include <vm/page.h>
44266072Sdes#include <vm/seg.h>
45266072Sdes#include <vm/seg_kmem.h>
46266072Sdes#include <vm/seg_kpm.h>
47266072Sdes#include <vm/hat_sfmmu.h>
48266072Sdes#include <sys/debug.h>
49266072Sdes#include <sys/cpu_module.h>
50266072Sdes
51266072Sdes/*
52266072Sdes * A quick way to generate a cache consistent address to map in a page.
53266072Sdes * users: ppcopy, pagezero, /proc, dev/mem
54266072Sdes *
55266072Sdes * The ppmapin/ppmapout routines provide a quick way of generating a cache
56266072Sdes * consistent address by reserving a given amount of kernel address space.
57266072Sdes * The base is PPMAPBASE and its size is PPMAPSIZE.  This memory is divided
58266072Sdes * into x number of sets, where x is the number of colors for the virtual
59266072Sdes * cache. The number of colors is how many times a page can be mapped
60266072Sdes * simulatenously in the cache.  For direct map caches this translates to
61266072Sdes * the number of pages in the cache.
62285206Sdes * Each set will be assigned a group of virtual pages from the reserved memory
63266072Sdes * depending on its virtual color.
64266072Sdes * When trying to assign a virtual address we will find out the color for the
65266072Sdes * physical page in question (if applicable).  Then we will try to find an
66266072Sdes * available virtual page from the set of the appropiate color.
67266072Sdes */
68266072Sdes
69266072Sdesint pp_slots = 4;		/* small default, tuned by cpu module */
70266072Sdes
71266072Sdes/* tuned by cpu module, default is "safe" */
72266072Sdesint pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE;
73266072Sdes
74266072Sdesstatic caddr_t	ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE];
75266072Sdesstatic int	nsets;			/* number of sets */
76266072Sdesstatic int	ppmap_shift;		/* set selector */
77266072Sdes
78266072Sdes#ifdef PPDEBUG
79266072Sdes#define		MAXCOLORS	16	/* for debug only */
80266072Sdesstatic int	ppalloc_noslot = 0;	/* # of allocations from kernelmap */
81266072Sdesstatic int	align_hits;
82266072Sdesstatic int	pp_allocs;		/* # of ppmapin requests */
83266072Sdes#endif /* PPDEBUG */
84266072Sdes
85266072Sdes/*
86285206Sdes * There are only 64 TLB entries on spitfire, 16 on cheetah
87266072Sdes * (fully-associative TLB) so we allow the cpu module to tune the
88266072Sdes * number to use here via pp_slots.
89266072Sdes */
90266072Sdesstatic struct ppmap_va {
91266072Sdes	caddr_t	ppmap_slots[MAXPP_SLOTS];
92266072Sdes} ppmap_va[NCPU];
93266072Sdes
94266072Sdes/* prevent compilation with VAC defined */
95266072Sdes#ifdef VAC
96266072Sdes#error "sun4v ppmapin and ppmapout do not support VAC"
97266072Sdes#endif
98266072Sdes
99266072Sdesvoid
100266072Sdesppmapinit(void)
101266072Sdes{
102266072Sdes	int nset;
103266072Sdes	caddr_t va;
104266072Sdes
105266072Sdes	ASSERT(pp_slots <= MAXPP_SLOTS);
106285206Sdes
107285206Sdes	va = (caddr_t)PPMAPBASE;
108285206Sdes
109285206Sdes	/*
110285206Sdes	 * sun4v does not have a virtual indexed cache and simply
111285206Sdes	 * has only one set containing all pages.
112285206Sdes	 */
113285206Sdes	nsets = mmu_btop(PPMAPSIZE);
114285206Sdes	ppmap_shift = MMU_PAGESHIFT;
115285206Sdes
116285206Sdes	for (nset = 0; nset < nsets; nset++) {
117285206Sdes		ppmap_vaddrs[nset] =
118285206Sdes		    (caddr_t)((uintptr_t)va + (nset * MMU_PAGESIZE));
119285206Sdes	}
120285206Sdes}
121285206Sdes
122285206Sdes/*
123285206Sdes * Allocate a cache consistent virtual address to map a page, pp,
124285206Sdes * with protection, vprot; and map it in the MMU, using the most
125266072Sdes * efficient means possible.  The argument avoid is a virtual address
126266072Sdes * hint which when masked yields an offset into a virtual cache
127285206Sdes * that should be avoided when allocating an address to map in a
128285206Sdes * page.  An avoid arg of -1 means you don't care, for instance pagezero.
129285206Sdes *
130266072Sdes * machine dependent, depends on virtual address space layout,
131285206Sdes * understands that all kernel addresses have bit 31 set.
132285206Sdes *
133285206Sdes * NOTE: For sun4 platforms the meaning of the hint argument is opposite from
134285206Sdes * that found in other architectures.  In other architectures the hint
135285206Sdes * (called avoid) was used to ask ppmapin to NOT use the specified cache color.
136266072Sdes * This was used to avoid virtual cache trashing in the bcopy.  Unfortunately
137266072Sdes * in the case of a COW,  this later on caused a cache aliasing conflict.  In
138266072Sdes * sun4, the bcopy routine uses the block ld/st instructions so we don't have
139266072Sdes * to worry about virtual cache trashing.  Actually, by using the hint to choose
140266072Sdes * the right color we can almost guarantee a cache conflict will not occur.
141266072Sdes */
142285206Sdes
143266072Sdes/*ARGSUSED2*/
144266072Sdescaddr_t
145266072Sdesppmapin(page_t *pp, uint_t vprot, caddr_t hint)
146266072Sdes{
147266072Sdes	int nset;
148266072Sdes	caddr_t va;
149266072Sdes
150266072Sdes#ifdef PPDEBUG
151266072Sdes	pp_allocs++;
152266072Sdes#endif /* PPDEBUG */
153266072Sdes
154266072Sdes	/*
155266072Sdes	 * For sun4v caches are physical caches, we can pick any address
156266072Sdes	 * we want.
157266072Sdes	 */
158266072Sdes	for (nset = 0; nset < nsets; nset++) {
159266072Sdes		va = ppmap_vaddrs[nset];
160266072Sdes		if (va != NULL) {
161266072Sdes#ifdef PPDEBUG
162266072Sdes			align_hits++;
163266072Sdes#endif /* PPDEBUG */
164266072Sdes			if (casptr(&ppmap_vaddrs[nset], va, NULL) == va) {
165266072Sdes				hat_memload(kas.a_hat, va, pp,
166266072Sdes				    vprot | HAT_NOSYNC,
167266072Sdes				    HAT_LOAD_LOCK);
168266072Sdes				return (va);
169266072Sdes			}
170266072Sdes		}
171266072Sdes	}
172266072Sdes
173266072Sdes#ifdef PPDEBUG
174266072Sdes	ppalloc_noslot++;
175266072Sdes#endif /* PPDEBUG */
176266072Sdes
177266072Sdes	/*
178266072Sdes	 * No free slots; get a random one from the kernel heap area.
179266072Sdes	 */
180266072Sdes	va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
181266072Sdes
182266072Sdes	hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK);
183266072Sdes
184266072Sdes	return (va);
185266072Sdes
186266072Sdes}
187266072Sdes
188266072Sdesvoid
189285206Sdesppmapout(caddr_t va)
190266072Sdes{
191266072Sdes	int nset;
192285206Sdes
193266072Sdes	if (va >= kernelheap && va < ekernelheap) {
194285206Sdes		/*
195266072Sdes		 * Space came from kernelmap, flush the page and
196266072Sdes		 * return the space.
197266072Sdes		 */
198285206Sdes		hat_unload(kas.a_hat, va, PAGESIZE,
199266072Sdes		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
200266072Sdes		vmem_free(heap_arena, va, PAGESIZE);
201266072Sdes	} else {
202266072Sdes		/*
203266072Sdes		 * Space came from ppmap_vaddrs[], give it back.
204266072Sdes		 */
205266072Sdes		nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1);
206266072Sdes		hat_unload(kas.a_hat, va, PAGESIZE,
207266072Sdes		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
208266072Sdes
209266072Sdes		ASSERT(ppmap_vaddrs[nset] == NULL);
210266072Sdes		ppmap_vaddrs[nset] = va;
211285206Sdes	}
212266072Sdes}
213266072Sdes
214266072Sdes#ifdef DEBUG
215266072Sdes#define	PP_STAT_ADD(stat)	(stat)++
216266072Sdesuint_t pload, ploadfail;
217266072Sdesuint_t ppzero, ppzero_short;
218266072Sdes#else
219266072Sdes#define	PP_STAT_ADD(stat)
220266072Sdes#endif /* DEBUG */
221266072Sdes
222285206Sdesstatic void
223266072Sdespp_unload_tlb(caddr_t *pslot, caddr_t va)
224266072Sdes{
225266072Sdes	ASSERT(*pslot == va);
226285206Sdes
227266072Sdes	vtag_flushpage(va, (uint64_t)ksfmmup);
228285206Sdes	*pslot = NULL;				/* release the slot */
229285206Sdes}
230266072Sdes
231266072Sdes/*
232266072Sdes * Routine to copy kernel pages during relocation.  It will copy one
233266072Sdes * PAGESIZE page to another PAGESIZE page.  This function may be called
234266072Sdes * above LOCK_LEVEL so it should not grab any locks.
235266072Sdes */
236266072Sdesvoid
237266072Sdesppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp)
238266072Sdes{
239266072Sdes	uint64_t fm_pa, to_pa;
240266072Sdes	size_t nbytes;
241266072Sdes
242266072Sdes	fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT;
243266072Sdes	to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT;
244266072Sdes
245266072Sdes	nbytes = MMU_PAGESIZE;
246266072Sdes
247266072Sdes	for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32)
248266072Sdes		hw_pa_bcopy32(fm_pa, to_pa);
249266072Sdes}
250266072Sdes
251266072Sdes/*
252266072Sdes * Copy the data from the physical page represented by "frompp" to
253266072Sdes * that represented by "topp".
254266072Sdes *
255266072Sdes * Try to use per cpu mapping first, if that fails then call pp_mapin
256266072Sdes * to load it.
257266072Sdes * Returns one on success or zero on some sort of fault while doing the copy.
258266072Sdes */
259266072Sdesint
260266072Sdesppcopy(page_t *fm_pp, page_t *to_pp)
261266072Sdes{
262266072Sdes	caddr_t fm_va = NULL;
263266072Sdes	caddr_t to_va;
264266072Sdes	boolean_t fast;
265266072Sdes	label_t ljb;
266266072Sdes	int ret = 1;
267266072Sdes
268266072Sdes	ASSERT(PAGE_LOCKED(fm_pp));
269266072Sdes	ASSERT(PAGE_LOCKED(to_pp));
270266072Sdes
271266072Sdes	/*
272266072Sdes	 * Try to map using KPM if enabled.  If it fails, fall
273266072Sdes	 * back to ppmapin/ppmapout.
274266072Sdes	 */
275266072Sdes	if ((kpm_enable == 0) ||
276266072Sdes	    (fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL ||
277266072Sdes	    (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) {
278266072Sdes		if (fm_va != NULL)
279266072Sdes			hat_kpm_mapout(fm_pp, NULL, fm_va);
280266072Sdes		fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1);
281266072Sdes		to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va);
282285206Sdes		fast = B_FALSE;
283266072Sdes	} else
284285206Sdes		fast = B_TRUE;
285266072Sdes
286266072Sdes	if (on_fault(&ljb)) {
287266072Sdes		ret = 0;
288266072Sdes		goto faulted;
289285206Sdes	}
290266072Sdes	bcopy(fm_va, to_va, PAGESIZE);
291266072Sdes	no_fault();
292266072Sdesfaulted:
293266072Sdes
294266072Sdes	/* Unmap */
295266072Sdes	if (fast) {
296266072Sdes		hat_kpm_mapout(fm_pp, NULL, fm_va);
297266072Sdes		hat_kpm_mapout(to_pp, NULL, to_va);
298266072Sdes	} else {
299285206Sdes		ppmapout(fm_va);
300266072Sdes		ppmapout(to_va);
301266072Sdes	}
302266072Sdes	return (ret);
303266072Sdes}
304266072Sdes
305266072Sdes/*
306266072Sdes * Zero the physical page from off to off + len given by `pp'
307266072Sdes * without changing the reference and modified bits of page.
308266072Sdes *
309266072Sdes * Again, we'll try per cpu mapping first.
310266072Sdes */
311266072Sdes
312266072Sdesvoid
313266072Sdespagezero(page_t *pp, uint_t off, uint_t len)
314266072Sdes{
315266072Sdes	caddr_t va;
316266072Sdes	extern int hwblkclr(void *, size_t);
317266072Sdes	extern int use_hw_bzero;
318266072Sdes	boolean_t fast;
319266072Sdes
320266072Sdes	ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE);
321266072Sdes	ASSERT(PAGE_LOCKED(pp));
322266072Sdes
323266072Sdes	PP_STAT_ADD(ppzero);
324266072Sdes
325266072Sdes	if (len != MMU_PAGESIZE || !use_hw_bzero) {
326266072Sdes		PP_STAT_ADD(ppzero_short);
327266072Sdes	}
328266072Sdes
329266072Sdes	kpreempt_disable();
330266072Sdes
331266072Sdes	/*
332266072Sdes	 * Try to use KPM if enabled.  If that fails, fall back to
333266072Sdes	 * ppmapin/ppmapout.
334266072Sdes	 */
335266072Sdes
336266072Sdes	if (kpm_enable != 0) {
337266072Sdes		fast = B_TRUE;
338266072Sdes		va = hat_kpm_mapin(pp, NULL);
339266072Sdes	} else
340266072Sdes		va = NULL;
341266072Sdes
342266072Sdes	if (va == NULL) {
343266072Sdes		fast = B_FALSE;
344266072Sdes		va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
345266072Sdes	}
346266072Sdes
347266072Sdes	if (!use_hw_bzero) {
348266072Sdes		bzero(va + off, len);
349266072Sdes		sync_icache(va + off, len);
350266072Sdes	} else if (hwblkclr(va + off, len)) {
351266072Sdes		/*
352266072Sdes		 * We may not have used block commit asi.
353266072Sdes		 * So flush the I-$ manually
354266072Sdes		 */
355266072Sdes		sync_icache(va + off, len);
356266072Sdes	} else {
357266072Sdes		/*
358266072Sdes		 * We have used blk commit, and flushed the I-$.
359266072Sdes		 * However we still may have an instruction in the
360266072Sdes		 * pipeline. Only a flush will invalidate that.
361266072Sdes		 */
362266072Sdes		doflush(va);
363266072Sdes	}
364266072Sdes
365266072Sdes	if (fast) {
366266072Sdes		hat_kpm_mapout(pp, NULL, va);
367266072Sdes	} else {
368266072Sdes		ppmapout(va);
369266072Sdes	}
370266072Sdes	kpreempt_enable();
371266072Sdes}
372266072Sdes