1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/types.h>
29#include <sys/systm.h>
30#include <sys/archsystm.h>
31#include <sys/machsystm.h>
32#include <sys/t_lock.h>
33#include <sys/vmem.h>
34#include <sys/mman.h>
35#include <sys/vm.h>
36#include <sys/cpu.h>
37#include <sys/cmn_err.h>
38#include <sys/cpuvar.h>
39#include <sys/atomic.h>
40#include <vm/as.h>
41#include <vm/hat.h>
42#include <vm/as.h>
43#include <vm/page.h>
44#include <vm/seg.h>
45#include <vm/seg_kmem.h>
46#include <vm/seg_kpm.h>
47#include <vm/hat_sfmmu.h>
48#include <sys/debug.h>
49#include <sys/cpu_module.h>
50
51/*
52 * A quick way to generate a cache consistent address to map in a page.
53 * users: ppcopy, pagezero, /proc, dev/mem
54 *
55 * The ppmapin/ppmapout routines provide a quick way of generating a cache
56 * consistent address by reserving a given amount of kernel address space.
57 * The base is PPMAPBASE and its size is PPMAPSIZE.  This memory is divided
58 * into x number of sets, where x is the number of colors for the virtual
59 * cache. The number of colors is how many times a page can be mapped
60 * simulatenously in the cache.  For direct map caches this translates to
61 * the number of pages in the cache.
62 * Each set will be assigned a group of virtual pages from the reserved memory
63 * depending on its virtual color.
64 * When trying to assign a virtual address we will find out the color for the
65 * physical page in question (if applicable).  Then we will try to find an
66 * available virtual page from the set of the appropiate color.
67 */
68
69int pp_slots = 4;		/* small default, tuned by cpu module */
70
71/* tuned by cpu module, default is "safe" */
72int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE;
73
74static caddr_t	ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE];
75static int	nsets;			/* number of sets */
76static int	ppmap_shift;		/* set selector */
77
78#ifdef PPDEBUG
79#define		MAXCOLORS	16	/* for debug only */
80static int	ppalloc_noslot = 0;	/* # of allocations from kernelmap */
81static int	align_hits;
82static int	pp_allocs;		/* # of ppmapin requests */
83#endif /* PPDEBUG */
84
85/*
86 * There are only 64 TLB entries on spitfire, 16 on cheetah
87 * (fully-associative TLB) so we allow the cpu module to tune the
88 * number to use here via pp_slots.
89 */
90static struct ppmap_va {
91	caddr_t	ppmap_slots[MAXPP_SLOTS];
92} ppmap_va[NCPU];
93
94/* prevent compilation with VAC defined */
95#ifdef VAC
96#error "sun4v ppmapin and ppmapout do not support VAC"
97#endif
98
99void
100ppmapinit(void)
101{
102	int nset;
103	caddr_t va;
104
105	ASSERT(pp_slots <= MAXPP_SLOTS);
106
107	va = (caddr_t)PPMAPBASE;
108
109	/*
110	 * sun4v does not have a virtual indexed cache and simply
111	 * has only one set containing all pages.
112	 */
113	nsets = mmu_btop(PPMAPSIZE);
114	ppmap_shift = MMU_PAGESHIFT;
115
116	for (nset = 0; nset < nsets; nset++) {
117		ppmap_vaddrs[nset] =
118		    (caddr_t)((uintptr_t)va + (nset * MMU_PAGESIZE));
119	}
120}
121
122/*
123 * Allocate a cache consistent virtual address to map a page, pp,
124 * with protection, vprot; and map it in the MMU, using the most
125 * efficient means possible.  The argument avoid is a virtual address
126 * hint which when masked yields an offset into a virtual cache
127 * that should be avoided when allocating an address to map in a
128 * page.  An avoid arg of -1 means you don't care, for instance pagezero.
129 *
130 * machine dependent, depends on virtual address space layout,
131 * understands that all kernel addresses have bit 31 set.
132 *
133 * NOTE: For sun4 platforms the meaning of the hint argument is opposite from
134 * that found in other architectures.  In other architectures the hint
135 * (called avoid) was used to ask ppmapin to NOT use the specified cache color.
136 * This was used to avoid virtual cache trashing in the bcopy.  Unfortunately
137 * in the case of a COW,  this later on caused a cache aliasing conflict.  In
138 * sun4, the bcopy routine uses the block ld/st instructions so we don't have
139 * to worry about virtual cache trashing.  Actually, by using the hint to choose
140 * the right color we can almost guarantee a cache conflict will not occur.
141 */
142
143/*ARGSUSED2*/
144caddr_t
145ppmapin(page_t *pp, uint_t vprot, caddr_t hint)
146{
147	int nset;
148	caddr_t va;
149
150#ifdef PPDEBUG
151	pp_allocs++;
152#endif /* PPDEBUG */
153
154	/*
155	 * For sun4v caches are physical caches, we can pick any address
156	 * we want.
157	 */
158	for (nset = 0; nset < nsets; nset++) {
159		va = ppmap_vaddrs[nset];
160		if (va != NULL) {
161#ifdef PPDEBUG
162			align_hits++;
163#endif /* PPDEBUG */
164			if (casptr(&ppmap_vaddrs[nset], va, NULL) == va) {
165				hat_memload(kas.a_hat, va, pp,
166				    vprot | HAT_NOSYNC,
167				    HAT_LOAD_LOCK);
168				return (va);
169			}
170		}
171	}
172
173#ifdef PPDEBUG
174	ppalloc_noslot++;
175#endif /* PPDEBUG */
176
177	/*
178	 * No free slots; get a random one from the kernel heap area.
179	 */
180	va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
181
182	hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK);
183
184	return (va);
185
186}
187
188void
189ppmapout(caddr_t va)
190{
191	int nset;
192
193	if (va >= kernelheap && va < ekernelheap) {
194		/*
195		 * Space came from kernelmap, flush the page and
196		 * return the space.
197		 */
198		hat_unload(kas.a_hat, va, PAGESIZE,
199		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
200		vmem_free(heap_arena, va, PAGESIZE);
201	} else {
202		/*
203		 * Space came from ppmap_vaddrs[], give it back.
204		 */
205		nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1);
206		hat_unload(kas.a_hat, va, PAGESIZE,
207		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
208
209		ASSERT(ppmap_vaddrs[nset] == NULL);
210		ppmap_vaddrs[nset] = va;
211	}
212}
213
214#ifdef DEBUG
215#define	PP_STAT_ADD(stat)	(stat)++
216uint_t pload, ploadfail;
217uint_t ppzero, ppzero_short;
218#else
219#define	PP_STAT_ADD(stat)
220#endif /* DEBUG */
221
222static void
223pp_unload_tlb(caddr_t *pslot, caddr_t va)
224{
225	ASSERT(*pslot == va);
226
227	vtag_flushpage(va, (uint64_t)ksfmmup);
228	*pslot = NULL;				/* release the slot */
229}
230
231/*
232 * Routine to copy kernel pages during relocation.  It will copy one
233 * PAGESIZE page to another PAGESIZE page.  This function may be called
234 * above LOCK_LEVEL so it should not grab any locks.
235 */
236void
237ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp)
238{
239	uint64_t fm_pa, to_pa;
240	size_t nbytes;
241
242	fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT;
243	to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT;
244
245	nbytes = MMU_PAGESIZE;
246
247	for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32)
248		hw_pa_bcopy32(fm_pa, to_pa);
249}
250
251/*
252 * Copy the data from the physical page represented by "frompp" to
253 * that represented by "topp".
254 *
255 * Try to use per cpu mapping first, if that fails then call pp_mapin
256 * to load it.
257 * Returns one on success or zero on some sort of fault while doing the copy.
258 */
259int
260ppcopy(page_t *fm_pp, page_t *to_pp)
261{
262	caddr_t fm_va = NULL;
263	caddr_t to_va;
264	boolean_t fast;
265	label_t ljb;
266	int ret = 1;
267
268	ASSERT(PAGE_LOCKED(fm_pp));
269	ASSERT(PAGE_LOCKED(to_pp));
270
271	/*
272	 * Try to map using KPM if enabled.  If it fails, fall
273	 * back to ppmapin/ppmapout.
274	 */
275	if ((kpm_enable == 0) ||
276	    (fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL ||
277	    (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) {
278		if (fm_va != NULL)
279			hat_kpm_mapout(fm_pp, NULL, fm_va);
280		fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1);
281		to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va);
282		fast = B_FALSE;
283	} else
284		fast = B_TRUE;
285
286	if (on_fault(&ljb)) {
287		ret = 0;
288		goto faulted;
289	}
290	bcopy(fm_va, to_va, PAGESIZE);
291	no_fault();
292faulted:
293
294	/* Unmap */
295	if (fast) {
296		hat_kpm_mapout(fm_pp, NULL, fm_va);
297		hat_kpm_mapout(to_pp, NULL, to_va);
298	} else {
299		ppmapout(fm_va);
300		ppmapout(to_va);
301	}
302	return (ret);
303}
304
305/*
306 * Zero the physical page from off to off + len given by `pp'
307 * without changing the reference and modified bits of page.
308 *
309 * Again, we'll try per cpu mapping first.
310 */
311
312void
313pagezero(page_t *pp, uint_t off, uint_t len)
314{
315	caddr_t va;
316	extern int hwblkclr(void *, size_t);
317	extern int use_hw_bzero;
318	boolean_t fast;
319
320	ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE);
321	ASSERT(PAGE_LOCKED(pp));
322
323	PP_STAT_ADD(ppzero);
324
325	if (len != MMU_PAGESIZE || !use_hw_bzero) {
326		PP_STAT_ADD(ppzero_short);
327	}
328
329	kpreempt_disable();
330
331	/*
332	 * Try to use KPM if enabled.  If that fails, fall back to
333	 * ppmapin/ppmapout.
334	 */
335
336	if (kpm_enable != 0) {
337		fast = B_TRUE;
338		va = hat_kpm_mapin(pp, NULL);
339	} else
340		va = NULL;
341
342	if (va == NULL) {
343		fast = B_FALSE;
344		va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
345	}
346
347	if (!use_hw_bzero) {
348		bzero(va + off, len);
349		sync_icache(va + off, len);
350	} else if (hwblkclr(va + off, len)) {
351		/*
352		 * We may not have used block commit asi.
353		 * So flush the I-$ manually
354		 */
355		sync_icache(va + off, len);
356	} else {
357		/*
358		 * We have used blk commit, and flushed the I-$.
359		 * However we still may have an instruction in the
360		 * pipeline. Only a flush will invalidate that.
361		 */
362		doflush(va);
363	}
364
365	if (fast) {
366		hat_kpm_mapout(pp, NULL, va);
367	} else {
368		ppmapout(va);
369	}
370	kpreempt_enable();
371}
372