slb.c revision 234745
1/*-
2 * Copyright (c) 2010 Nathan Whitehorn
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/powerpc/aim/slb.c 234745 2012-04-28 00:12:23Z nwhitehorn $
27 */
28
29#include <sys/param.h>
30#include <sys/kernel.h>
31#include <sys/lock.h>
32#include <sys/mutex.h>
33#include <sys/proc.h>
34#include <sys/systm.h>
35
36#include <vm/vm.h>
37#include <vm/pmap.h>
38#include <vm/uma.h>
39#include <vm/vm.h>
40#include <vm/vm_map.h>
41#include <vm/vm_page.h>
42#include <vm/vm_pageout.h>
43
44#include <machine/md_var.h>
45#include <machine/platform.h>
46#include <machine/pmap.h>
47#include <machine/vmparam.h>
48
49uintptr_t moea64_get_unique_vsid(void);
50void moea64_release_vsid(uint64_t vsid);
51static void slb_zone_init(void *);
52
53static uma_zone_t slbt_zone;
54static uma_zone_t slb_cache_zone;
55int n_slbs = 64;
56
57SYSINIT(slb_zone_init, SI_SUB_KMEM, SI_ORDER_ANY, slb_zone_init, NULL);
58
59struct slbtnode {
60	uint16_t	ua_alloc;
61	uint8_t		ua_level;
62	/* Only 36 bits needed for full 64-bit address space. */
63	uint64_t	ua_base;
64	union {
65		struct slbtnode	*ua_child[16];
66		struct slb	slb_entries[16];
67	} u;
68};
69
70/*
71 * For a full 64-bit address space, there are 36 bits in play in an
72 * esid, so 8 levels, with the leaf being at level 0.
73 *
74 * |3333|3322|2222|2222|1111|1111|11  |    |    |  esid
75 * |5432|1098|7654|3210|9876|5432|1098|7654|3210|  bits
76 * +----+----+----+----+----+----+----+----+----+--------
77 * | 8  | 7  | 6  | 5  | 4  | 3  | 2  | 1  | 0  | level
78 */
79#define UAD_ROOT_LEVEL  8
80#define UAD_LEAF_LEVEL  0
81
82static inline int
83esid2idx(uint64_t esid, int level)
84{
85	int shift;
86
87	shift = level * 4;
88	return ((esid >> shift) & 0xF);
89}
90
91/*
92 * The ua_base field should have 0 bits after the first 4*(level+1)
93 * bits; i.e. only
94 */
95#define uad_baseok(ua)                          \
96	(esid2base(ua->ua_base, ua->ua_level) == ua->ua_base)
97
98
99static inline uint64_t
100esid2base(uint64_t esid, int level)
101{
102	uint64_t mask;
103	int shift;
104
105	shift = (level + 1) * 4;
106	mask = ~((1ULL << shift) - 1);
107	return (esid & mask);
108}
109
110/*
111 * Allocate a new leaf node for the specified esid/vmhandle from the
112 * parent node.
113 */
114static struct slb *
115make_new_leaf(uint64_t esid, uint64_t slbv, struct slbtnode *parent)
116{
117	struct slbtnode *child;
118	struct slb *retval;
119	int idx;
120
121	idx = esid2idx(esid, parent->ua_level);
122	KASSERT(parent->u.ua_child[idx] == NULL, ("Child already exists!"));
123
124	/* unlock and M_WAITOK and loop? */
125	child = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
126	KASSERT(child != NULL, ("unhandled NULL case"));
127
128	child->ua_level = UAD_LEAF_LEVEL;
129	child->ua_base = esid2base(esid, child->ua_level);
130	idx = esid2idx(esid, child->ua_level);
131	child->u.slb_entries[idx].slbv = slbv;
132	child->u.slb_entries[idx].slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
133	setbit(&child->ua_alloc, idx);
134
135	retval = &child->u.slb_entries[idx];
136
137	/*
138	 * The above stores must be visible before the next one, so
139	 * that a lockless searcher always sees a valid path through
140	 * the tree.
141	 */
142	mb();
143
144	idx = esid2idx(esid, parent->ua_level);
145	parent->u.ua_child[idx] = child;
146	setbit(&parent->ua_alloc, idx);
147
148	return (retval);
149}
150
151/*
152 * Allocate a new intermediate node to fit between the parent and
153 * esid.
154 */
155static struct slbtnode*
156make_intermediate(uint64_t esid, struct slbtnode *parent)
157{
158	struct slbtnode *child, *inter;
159	int idx, level;
160
161	idx = esid2idx(esid, parent->ua_level);
162	child = parent->u.ua_child[idx];
163	KASSERT(esid2base(esid, child->ua_level) != child->ua_base,
164	    ("No need for an intermediate node?"));
165
166	/*
167	 * Find the level where the existing child and our new esid
168	 * meet.  It must be lower than parent->ua_level or we would
169	 * have chosen a different index in parent.
170	 */
171	level = child->ua_level + 1;
172	while (esid2base(esid, level) !=
173	    esid2base(child->ua_base, level))
174		level++;
175	KASSERT(level < parent->ua_level,
176	    ("Found splitting level %d for %09jx and %09jx, "
177	    "but it's the same as %p's",
178	    level, esid, child->ua_base, parent));
179
180	/* unlock and M_WAITOK and loop? */
181	inter = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
182	KASSERT(inter != NULL, ("unhandled NULL case"));
183
184	/* Set up intermediate node to point to child ... */
185	inter->ua_level = level;
186	inter->ua_base = esid2base(esid, inter->ua_level);
187	idx = esid2idx(child->ua_base, inter->ua_level);
188	inter->u.ua_child[idx] = child;
189	setbit(&inter->ua_alloc, idx);
190	mb();
191
192	/* Set up parent to point to intermediate node ... */
193	idx = esid2idx(inter->ua_base, parent->ua_level);
194	parent->u.ua_child[idx] = inter;
195	setbit(&parent->ua_alloc, idx);
196
197	return (inter);
198}
199
200uint64_t
201kernel_va_to_slbv(vm_offset_t va)
202{
203	uint64_t slbv;
204
205	/* Set kernel VSID to deterministic value */
206	slbv = (KERNEL_VSID((uintptr_t)va >> ADDR_SR_SHFT)) << SLBV_VSID_SHIFT;
207
208	/* Figure out if this is a large-page mapping */
209	if (hw_direct_map && va < VM_MIN_KERNEL_ADDRESS) {
210		/*
211		 * XXX: If we have set up a direct map, assumes
212		 * all physical memory is mapped with large pages.
213		 */
214		if (mem_valid(va, 0) == 0)
215			slbv |= SLBV_L;
216	}
217
218	return (slbv);
219}
220
221struct slb *
222user_va_to_slb_entry(pmap_t pm, vm_offset_t va)
223{
224	uint64_t esid = va >> ADDR_SR_SHFT;
225	struct slbtnode *ua;
226	int idx;
227
228	ua = pm->pm_slb_tree_root;
229
230	for (;;) {
231		KASSERT(uad_baseok(ua), ("uad base %016jx level %d bad!",
232		    ua->ua_base, ua->ua_level));
233		idx = esid2idx(esid, ua->ua_level);
234
235		/*
236		 * This code is specific to ppc64 where a load is
237		 * atomic, so no need for atomic_load macro.
238		 */
239		if (ua->ua_level == UAD_LEAF_LEVEL)
240			return ((ua->u.slb_entries[idx].slbe & SLBE_VALID) ?
241			    &ua->u.slb_entries[idx] : NULL);
242
243		ua = ua->u.ua_child[idx];
244		if (ua == NULL ||
245		    esid2base(esid, ua->ua_level) != ua->ua_base)
246			return (NULL);
247	}
248
249	return (NULL);
250}
251
252uint64_t
253va_to_vsid(pmap_t pm, vm_offset_t va)
254{
255	struct slb *entry;
256
257	/* Shortcut kernel case */
258	if (pm == kernel_pmap)
259		return (KERNEL_VSID((uintptr_t)va >> ADDR_SR_SHFT));
260
261	/*
262	 * If there is no vsid for this VA, we need to add a new entry
263	 * to the PMAP's segment table.
264	 */
265
266	entry = user_va_to_slb_entry(pm, va);
267
268	if (entry == NULL)
269		return (allocate_user_vsid(pm,
270		    (uintptr_t)va >> ADDR_SR_SHFT, 0));
271
272	return ((entry->slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT);
273}
274
275uint64_t
276allocate_user_vsid(pmap_t pm, uint64_t esid, int large)
277{
278	uint64_t vsid, slbv;
279	struct slbtnode *ua, *next, *inter;
280	struct slb *slb;
281	int idx;
282
283	KASSERT(pm != kernel_pmap, ("Attempting to allocate a kernel VSID"));
284
285	PMAP_LOCK_ASSERT(pm, MA_OWNED);
286	vsid = moea64_get_unique_vsid();
287
288	slbv = vsid << SLBV_VSID_SHIFT;
289	if (large)
290		slbv |= SLBV_L;
291
292	ua = pm->pm_slb_tree_root;
293
294	/* Descend to the correct leaf or NULL pointer. */
295	for (;;) {
296		KASSERT(uad_baseok(ua),
297		   ("uad base %09jx level %d bad!", ua->ua_base, ua->ua_level));
298		idx = esid2idx(esid, ua->ua_level);
299
300		if (ua->ua_level == UAD_LEAF_LEVEL) {
301			ua->u.slb_entries[idx].slbv = slbv;
302			eieio();
303			ua->u.slb_entries[idx].slbe = (esid << SLBE_ESID_SHIFT)
304			    | SLBE_VALID;
305			setbit(&ua->ua_alloc, idx);
306			slb = &ua->u.slb_entries[idx];
307			break;
308		}
309
310		next = ua->u.ua_child[idx];
311		if (next == NULL) {
312			slb = make_new_leaf(esid, slbv, ua);
313			break;
314                }
315
316		/*
317		 * Check if the next item down has an okay ua_base.
318		 * If not, we need to allocate an intermediate node.
319		 */
320		if (esid2base(esid, next->ua_level) != next->ua_base) {
321			inter = make_intermediate(esid, ua);
322			slb = make_new_leaf(esid, slbv, inter);
323			break;
324		}
325
326		ua = next;
327	}
328
329	/*
330	 * Someone probably wants this soon, and it may be a wired
331	 * SLB mapping, so pre-spill this entry.
332	 */
333	eieio();
334	slb_insert_user(pm, slb);
335
336	return (vsid);
337}
338
339void
340free_vsid(pmap_t pm, uint64_t esid, int large)
341{
342	struct slbtnode *ua;
343	int idx;
344
345	PMAP_LOCK_ASSERT(pm, MA_OWNED);
346
347	ua = pm->pm_slb_tree_root;
348	/* Descend to the correct leaf. */
349	for (;;) {
350		KASSERT(uad_baseok(ua),
351		   ("uad base %09jx level %d bad!", ua->ua_base, ua->ua_level));
352
353		idx = esid2idx(esid, ua->ua_level);
354		if (ua->ua_level == UAD_LEAF_LEVEL) {
355			ua->u.slb_entries[idx].slbv = 0;
356			eieio();
357			ua->u.slb_entries[idx].slbe = 0;
358			clrbit(&ua->ua_alloc, idx);
359			return;
360		}
361
362		ua = ua->u.ua_child[idx];
363		if (ua == NULL ||
364		    esid2base(esid, ua->ua_level) != ua->ua_base) {
365			/* Perhaps just return instead of assert? */
366			KASSERT(0,
367			    ("Asked to remove an entry that was never inserted!"));
368			return;
369		}
370	}
371}
372
373static void
374free_slb_tree_node(struct slbtnode *ua)
375{
376	int idx;
377
378	for (idx = 0; idx < 16; idx++) {
379		if (ua->ua_level != UAD_LEAF_LEVEL) {
380			if (ua->u.ua_child[idx] != NULL)
381				free_slb_tree_node(ua->u.ua_child[idx]);
382		} else {
383			if (ua->u.slb_entries[idx].slbv != 0)
384				moea64_release_vsid(ua->u.slb_entries[idx].slbv
385				    >> SLBV_VSID_SHIFT);
386		}
387	}
388
389	uma_zfree(slbt_zone, ua);
390}
391
392void
393slb_free_tree(pmap_t pm)
394{
395
396	free_slb_tree_node(pm->pm_slb_tree_root);
397}
398
399struct slbtnode *
400slb_alloc_tree(void)
401{
402	struct slbtnode *root;
403
404	root = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
405	root->ua_level = UAD_ROOT_LEVEL;
406
407	return (root);
408}
409
410/* Lock entries mapping kernel text and stacks */
411
412void
413slb_insert_kernel(uint64_t slbe, uint64_t slbv)
414{
415	struct slb *slbcache;
416	int i;
417
418	/* We don't want to be preempted while modifying the kernel map */
419	critical_enter();
420
421	slbcache = PCPU_GET(slb);
422
423	/* Check for an unused slot, abusing the user slot as a full flag */
424	if (slbcache[USER_SLB_SLOT].slbe == 0) {
425		for (i = 0; i < n_slbs; i++) {
426			if (i == USER_SLB_SLOT)
427				continue;
428			if (!(slbcache[i].slbe & SLBE_VALID))
429				goto fillkernslb;
430		}
431
432		if (i == n_slbs)
433			slbcache[USER_SLB_SLOT].slbe = 1;
434	}
435
436	i = mftb() % n_slbs;
437	if (i == USER_SLB_SLOT)
438			i = (i+1) % n_slbs;
439
440fillkernslb:
441	KASSERT(i != USER_SLB_SLOT,
442	    ("Filling user SLB slot with a kernel mapping"));
443	slbcache[i].slbv = slbv;
444	slbcache[i].slbe = slbe | (uint64_t)i;
445
446	/* If it is for this CPU, put it in the SLB right away */
447	if (pmap_bootstrapped) {
448		/* slbie not required */
449		__asm __volatile ("slbmte %0, %1" ::
450		    "r"(slbcache[i].slbv), "r"(slbcache[i].slbe));
451	}
452
453	critical_exit();
454}
455
456void
457slb_insert_user(pmap_t pm, struct slb *slb)
458{
459	int i;
460
461	PMAP_LOCK_ASSERT(pm, MA_OWNED);
462
463	if (pm->pm_slb_len < n_slbs) {
464		i = pm->pm_slb_len;
465		pm->pm_slb_len++;
466	} else {
467		i = mftb() % n_slbs;
468	}
469
470	/* Note that this replacement is atomic with respect to trap_subr */
471	pm->pm_slb[i] = slb;
472}
473
474static void *
475slb_uma_real_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
476{
477	static vm_offset_t realmax = 0;
478	void *va;
479	vm_page_t m;
480	int pflags;
481
482	if (realmax == 0)
483		realmax = platform_real_maxaddr();
484
485	*flags = UMA_SLAB_PRIV;
486	if ((wait & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
487		pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
488	else
489		pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
490	if (wait & M_ZERO)
491		pflags |= VM_ALLOC_ZERO;
492
493	for (;;) {
494		m = vm_page_alloc_contig(NULL, 0, pflags, 1, 0, realmax,
495		    PAGE_SIZE, PAGE_SIZE, VM_MEMATTR_DEFAULT);
496		if (m == NULL) {
497			if (wait & M_NOWAIT)
498				return (NULL);
499			VM_WAIT;
500		} else
501                        break;
502        }
503
504	va = (void *) VM_PAGE_TO_PHYS(m);
505
506	if (!hw_direct_map)
507		pmap_kenter((vm_offset_t)va, VM_PAGE_TO_PHYS(m));
508
509	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
510		bzero(va, PAGE_SIZE);
511
512	return (va);
513}
514
515static void
516slb_zone_init(void *dummy)
517{
518
519	slbt_zone = uma_zcreate("SLB tree node", sizeof(struct slbtnode),
520	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
521	slb_cache_zone = uma_zcreate("SLB cache",
522	    (n_slbs + 1)*sizeof(struct slb *), NULL, NULL, NULL, NULL,
523	    UMA_ALIGN_PTR, UMA_ZONE_VM);
524
525	if (platform_real_maxaddr() != VM_MAX_ADDRESS) {
526		uma_zone_set_allocf(slb_cache_zone, slb_uma_real_alloc);
527		uma_zone_set_allocf(slbt_zone, slb_uma_real_alloc);
528	}
529}
530
531struct slb **
532slb_alloc_user_cache(void)
533{
534	return (uma_zalloc(slb_cache_zone, M_ZERO));
535}
536
537void
538slb_free_user_cache(struct slb **slb)
539{
540	uma_zfree(slb_cache_zone, slb);
541}
542