150476Speter/*-
242911Snik * Copyright (c) 2010 Nathan Whitehorn
389137Snik * All rights reserved.
442911Snik *
589137Snik * Redistribution and use in source and binary forms, with or without
642911Snik * modification, are permitted provided that the following conditions
789137Snik * are met:
889137Snik *
989137Snik * 1. Redistributions of source code must retain the above copyright
1089137Snik *    notice, this list of conditions and the following disclaimer.
1189137Snik * 2. Redistributions in binary form must reproduce the above copyright
1289137Snik *    notice, this list of conditions and the following disclaimer in the
1389137Snik *    documentation and/or other materials provided with the distribution.
1489137Snik *
1589137Snik * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1642911Snik * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1789137Snik * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
1889137Snik * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
1989137Snik * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2042911Snik * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2189137Snik * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2289137Snik * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2389137Snik * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2489137Snik * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2589137Snik *
2689137Snik * $FreeBSD: releng/11.0/sys/powerpc/aim/slb.c 295880 2016-02-22 09:02:20Z skra $
2789137Snik */
2889137Snik
2942911Snik#include <sys/param.h>
3089137Snik#include <sys/kernel.h>
3189137Snik#include <sys/lock.h>
3289137Snik#include <sys/malloc.h>
3342911Snik#include <sys/mutex.h>
3489137Snik#include <sys/proc.h>
3589137Snik#include <sys/systm.h>
3689137Snik
3789137Snik#include <vm/vm.h>
3889137Snik#include <vm/pmap.h>
3989137Snik#include <vm/uma.h>
4089137Snik#include <vm/vm.h>
4189137Snik#include <vm/vm_map.h>
4289137Snik#include <vm/vm_page.h>
4389137Snik#include <vm/vm_pageout.h>
4489137Snik
4589137Snik#include <machine/md_var.h>
4689137Snik#include <machine/platform.h>
4789137Snik#include <machine/vmparam.h>
4889137Snik
4989137Snikuintptr_t moea64_get_unique_vsid(void);
5089137Snikvoid moea64_release_vsid(uint64_t vsid);
5189137Snikstatic void slb_zone_init(void *);
5289137Snik
5389137Snikstatic uma_zone_t slbt_zone;
5489137Snikstatic uma_zone_t slb_cache_zone;
5589137Snikint n_slbs = 64;
5689137Snik
5789137SnikSYSINIT(slb_zone_init, SI_SUB_KMEM, SI_ORDER_ANY, slb_zone_init, NULL);
5889137Snik
5989137Snikstruct slbtnode {
6089137Snik	uint16_t	ua_alloc;
6189137Snik	uint8_t		ua_level;
6289137Snik	/* Only 36 bits needed for full 64-bit address space. */
6389137Snik	uint64_t	ua_base;
6489137Snik	union {
6589137Snik		struct slbtnode	*ua_child[16];
6689137Snik		struct slb	slb_entries[16];
6789137Snik	} u;
6889137Snik};
6989137Snik
7089137Snik/*
7189137Snik * For a full 64-bit address space, there are 36 bits in play in an
7289137Snik * esid, so 8 levels, with the leaf being at level 0.
7389137Snik *
7489137Snik * |3333|3322|2222|2222|1111|1111|11  |    |    |  esid
7589137Snik * |5432|1098|7654|3210|9876|5432|1098|7654|3210|  bits
7689137Snik * +----+----+----+----+----+----+----+----+----+--------
7789137Snik * | 8  | 7  | 6  | 5  | 4  | 3  | 2  | 1  | 0  | level
7889137Snik */
7989137Snik#define UAD_ROOT_LEVEL  8
8089137Snik#define UAD_LEAF_LEVEL  0
8189137Snik
8289137Snikstatic inline int
8389137Snikesid2idx(uint64_t esid, int level)
8489137Snik{
8589137Snik	int shift;
8689137Snik
8789137Snik	shift = level * 4;
8889137Snik	return ((esid >> shift) & 0xF);
8989137Snik}
9089137Snik
9189137Snik/*
9289137Snik * The ua_base field should have 0 bits after the first 4*(level+1)
9389137Snik * bits; i.e. only
9489137Snik */
9589137Snik#define uad_baseok(ua)                          \
9689137Snik	(esid2base(ua->ua_base, ua->ua_level) == ua->ua_base)
9789137Snik
9889137Snik
9989137Snikstatic inline uint64_t
10089137Snikesid2base(uint64_t esid, int level)
10189137Snik{
10289137Snik	uint64_t mask;
10389137Snik	int shift;
10489137Snik
10589137Snik	shift = (level + 1) * 4;
10689137Snik	mask = ~((1ULL << shift) - 1);
10789137Snik	return (esid & mask);
10889137Snik}
10989137Snik
11089137Snik/*
11189137Snik * Allocate a new leaf node for the specified esid/vmhandle from the
11289137Snik * parent node.
11389137Snik */
11489137Snikstatic struct slb *
11589137Snikmake_new_leaf(uint64_t esid, uint64_t slbv, struct slbtnode *parent)
11689137Snik{
11789137Snik	struct slbtnode *child;
11889137Snik	struct slb *retval;
11989137Snik	int idx;
12089137Snik
12189137Snik	idx = esid2idx(esid, parent->ua_level);
12289137Snik	KASSERT(parent->u.ua_child[idx] == NULL, ("Child already exists!"));
12389137Snik
12489137Snik	/* unlock and M_WAITOK and loop? */
12589137Snik	child = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
12689137Snik	KASSERT(child != NULL, ("unhandled NULL case"));
12789137Snik
12889137Snik	child->ua_level = UAD_LEAF_LEVEL;
12989137Snik	child->ua_base = esid2base(esid, child->ua_level);
13089137Snik	idx = esid2idx(esid, child->ua_level);
13189137Snik	child->u.slb_entries[idx].slbv = slbv;
13289137Snik	child->u.slb_entries[idx].slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
13389137Snik	setbit(&child->ua_alloc, idx);
13489137Snik
13589137Snik	retval = &child->u.slb_entries[idx];
13689137Snik
13789137Snik	/*
13889137Snik	 * The above stores must be visible before the next one, so
13989137Snik	 * that a lockless searcher always sees a valid path through
14089137Snik	 * the tree.
14189137Snik	 */
14289137Snik	powerpc_lwsync();
14389137Snik
14489137Snik	idx = esid2idx(esid, parent->ua_level);
14589137Snik	parent->u.ua_child[idx] = child;
14689137Snik	setbit(&parent->ua_alloc, idx);
14789137Snik
14889137Snik	return (retval);
14989137Snik}
15089137Snik
15189137Snik/*
15289137Snik * Allocate a new intermediate node to fit between the parent and
15389137Snik * esid.
15489137Snik */
15589137Snikstatic struct slbtnode*
15689137Snikmake_intermediate(uint64_t esid, struct slbtnode *parent)
15789137Snik{
15889137Snik	struct slbtnode *child, *inter;
15989137Snik	int idx, level;
16089137Snik
16189137Snik	idx = esid2idx(esid, parent->ua_level);
16289137Snik	child = parent->u.ua_child[idx];
16389137Snik	KASSERT(esid2base(esid, child->ua_level) != child->ua_base,
16489137Snik	    ("No need for an intermediate node?"));
16589137Snik
16689137Snik	/*
16789137Snik	 * Find the level where the existing child and our new esid
16889137Snik	 * meet.  It must be lower than parent->ua_level or we would
16989137Snik	 * have chosen a different index in parent.
17089137Snik	 */
17189137Snik	level = child->ua_level + 1;
17289137Snik	while (esid2base(esid, level) !=
17389137Snik	    esid2base(child->ua_base, level))
17489137Snik		level++;
17589137Snik	KASSERT(level < parent->ua_level,
17689137Snik	    ("Found splitting level %d for %09jx and %09jx, "
17789137Snik	    "but it's the same as %p's",
17889137Snik	    level, esid, child->ua_base, parent));
17989137Snik
18089137Snik	/* unlock and M_WAITOK and loop? */
18189137Snik	inter = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
18289137Snik	KASSERT(inter != NULL, ("unhandled NULL case"));
18389137Snik
18489137Snik	/* Set up intermediate node to point to child ... */
18589137Snik	inter->ua_level = level;
18689137Snik	inter->ua_base = esid2base(esid, inter->ua_level);
18789137Snik	idx = esid2idx(child->ua_base, inter->ua_level);
18889137Snik	inter->u.ua_child[idx] = child;
18989137Snik	setbit(&inter->ua_alloc, idx);
19089137Snik	powerpc_lwsync();
19189137Snik
19289137Snik	/* Set up parent to point to intermediate node ... */
19389137Snik	idx = esid2idx(inter->ua_base, parent->ua_level);
19489137Snik	parent->u.ua_child[idx] = inter;
19589137Snik	setbit(&parent->ua_alloc, idx);
19689137Snik
19789137Snik	return (inter);
19889137Snik}
19989137Snik
20089137Snikuint64_t
20189137Snikkernel_va_to_slbv(vm_offset_t va)
20289137Snik{
20389137Snik	uint64_t slbv;
20489137Snik
20589137Snik	/* Set kernel VSID to deterministic value */
20689137Snik	slbv = (KERNEL_VSID((uintptr_t)va >> ADDR_SR_SHFT)) << SLBV_VSID_SHIFT;
20789137Snik
20889137Snik	/* Figure out if this is a large-page mapping */
20989137Snik	if (hw_direct_map && va < VM_MIN_KERNEL_ADDRESS) {
21089137Snik		/*
21189137Snik		 * XXX: If we have set up a direct map, assumes
21289137Snik		 * all physical memory is mapped with large pages.
21389137Snik		 */
21489137Snik		if (mem_valid(va, 0) == 0)
21589137Snik			slbv |= SLBV_L;
21689137Snik	}
21789137Snik
21889137Snik	return (slbv);
21989137Snik}
22089137Snik
22189137Snikstruct slb *
22289137Snikuser_va_to_slb_entry(pmap_t pm, vm_offset_t va)
22389137Snik{
22489137Snik	uint64_t esid = va >> ADDR_SR_SHFT;
22589137Snik	struct slbtnode *ua;
22689137Snik	int idx;
22789137Snik
22889137Snik	ua = pm->pm_slb_tree_root;
22989137Snik
23089137Snik	for (;;) {
23189137Snik		KASSERT(uad_baseok(ua), ("uad base %016jx level %d bad!",
23289137Snik		    ua->ua_base, ua->ua_level));
23389137Snik		idx = esid2idx(esid, ua->ua_level);
23489137Snik
23589137Snik		/*
23689137Snik		 * This code is specific to ppc64 where a load is
23789137Snik		 * atomic, so no need for atomic_load macro.
23889137Snik		 */
23989137Snik		if (ua->ua_level == UAD_LEAF_LEVEL)
24089137Snik			return ((ua->u.slb_entries[idx].slbe & SLBE_VALID) ?
24189137Snik			    &ua->u.slb_entries[idx] : NULL);
24289137Snik
24389137Snik		/*
24489137Snik		 * The following accesses are implicitly ordered under the POWER
24589137Snik		 * ISA by load dependencies (the store ordering is provided by
24689137Snik		 * the powerpc_lwsync() calls elsewhere) and so are run without
24789137Snik		 * barriers.
24889137Snik		 */
24989137Snik		ua = ua->u.ua_child[idx];
25089137Snik		if (ua == NULL ||
25189137Snik		    esid2base(esid, ua->ua_level) != ua->ua_base)
25289137Snik			return (NULL);
25389137Snik	}
25489137Snik
25589137Snik	return (NULL);
25689137Snik}
25789137Snik
25889137Snikuint64_t
25989137Snikva_to_vsid(pmap_t pm, vm_offset_t va)
26089137Snik{
26189137Snik	struct slb *entry;
26289137Snik
26389137Snik	/* Shortcut kernel case */
26489137Snik	if (pm == kernel_pmap)
26589137Snik		return (KERNEL_VSID((uintptr_t)va >> ADDR_SR_SHFT));
26689137Snik
26789137Snik	/*
26889137Snik	 * If there is no vsid for this VA, we need to add a new entry
26989137Snik	 * to the PMAP's segment table.
27089137Snik	 */
27189137Snik
27289137Snik	entry = user_va_to_slb_entry(pm, va);
27389137Snik
27489137Snik	if (entry == NULL)
27589137Snik		return (allocate_user_vsid(pm,
27689137Snik		    (uintptr_t)va >> ADDR_SR_SHFT, 0));
27789137Snik
27889137Snik	return ((entry->slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT);
27989137Snik}
28089137Snik
28189137Snikuint64_t
28289137Snikallocate_user_vsid(pmap_t pm, uint64_t esid, int large)
28389137Snik{
28489137Snik	uint64_t vsid, slbv;
28589137Snik	struct slbtnode *ua, *next, *inter;
28689137Snik	struct slb *slb;
28789137Snik	int idx;
28889137Snik
28989137Snik	KASSERT(pm != kernel_pmap, ("Attempting to allocate a kernel VSID"));
29089137Snik
29189137Snik	PMAP_LOCK_ASSERT(pm, MA_OWNED);
29289137Snik	vsid = moea64_get_unique_vsid();
29389137Snik
29489137Snik	slbv = vsid << SLBV_VSID_SHIFT;
29589137Snik	if (large)
29689137Snik		slbv |= SLBV_L;
29789137Snik
29889137Snik	ua = pm->pm_slb_tree_root;
29989137Snik
30089137Snik	/* Descend to the correct leaf or NULL pointer. */
30189137Snik	for (;;) {
30289137Snik		KASSERT(uad_baseok(ua),
30389137Snik		   ("uad base %09jx level %d bad!", ua->ua_base, ua->ua_level));
30489137Snik		idx = esid2idx(esid, ua->ua_level);
30589137Snik
30689137Snik		if (ua->ua_level == UAD_LEAF_LEVEL) {
30789137Snik			ua->u.slb_entries[idx].slbv = slbv;
30889137Snik			eieio();
30989137Snik			ua->u.slb_entries[idx].slbe = (esid << SLBE_ESID_SHIFT)
31089137Snik			    | SLBE_VALID;
31189137Snik			setbit(&ua->ua_alloc, idx);
31289137Snik			slb = &ua->u.slb_entries[idx];
31389137Snik			break;
31489137Snik		}
31589137Snik
31689137Snik		next = ua->u.ua_child[idx];
31789137Snik		if (next == NULL) {
31889137Snik			slb = make_new_leaf(esid, slbv, ua);
31989137Snik			break;
32089137Snik                }
32189137Snik
32289137Snik		/*
32389137Snik		 * Check if the next item down has an okay ua_base.
32489137Snik		 * If not, we need to allocate an intermediate node.
32589137Snik		 */
32689137Snik		if (esid2base(esid, next->ua_level) != next->ua_base) {
32789137Snik			inter = make_intermediate(esid, ua);
32889137Snik			slb = make_new_leaf(esid, slbv, inter);
32989137Snik			break;
33089137Snik		}
33189137Snik
33289137Snik		ua = next;
33389137Snik	}
33489137Snik
33589137Snik	/*
33689137Snik	 * Someone probably wants this soon, and it may be a wired
33789137Snik	 * SLB mapping, so pre-spill this entry.
33889137Snik	 */
33989137Snik	eieio();
34089137Snik	slb_insert_user(pm, slb);
34189137Snik
34289137Snik	return (vsid);
34389137Snik}
34489137Snik
34589137Snikvoid
34689137Snikfree_vsid(pmap_t pm, uint64_t esid, int large)
34789137Snik{
34889137Snik	struct slbtnode *ua;
34989137Snik	int idx;
35089137Snik
35189137Snik	PMAP_LOCK_ASSERT(pm, MA_OWNED);
35289137Snik
35389137Snik	ua = pm->pm_slb_tree_root;
35489137Snik	/* Descend to the correct leaf. */
35589137Snik	for (;;) {
35689137Snik		KASSERT(uad_baseok(ua),
35789137Snik		   ("uad base %09jx level %d bad!", ua->ua_base, ua->ua_level));
35889137Snik
35989137Snik		idx = esid2idx(esid, ua->ua_level);
36089137Snik		if (ua->ua_level == UAD_LEAF_LEVEL) {
36189137Snik			ua->u.slb_entries[idx].slbv = 0;
36289137Snik			eieio();
36389137Snik			ua->u.slb_entries[idx].slbe = 0;
36489137Snik			clrbit(&ua->ua_alloc, idx);
36589137Snik			return;
36689137Snik		}
36789137Snik
36889137Snik		ua = ua->u.ua_child[idx];
36989137Snik		if (ua == NULL ||
37089137Snik		    esid2base(esid, ua->ua_level) != ua->ua_base) {
37189137Snik			/* Perhaps just return instead of assert? */
37289137Snik			KASSERT(0,
37389137Snik			    ("Asked to remove an entry that was never inserted!"));
37489137Snik			return;
37589137Snik		}
37689137Snik	}
37789137Snik}
37889137Snik
37989137Snikstatic void
38089137Snikfree_slb_tree_node(struct slbtnode *ua)
38189137Snik{
38289137Snik	int idx;
38389137Snik
38489137Snik	for (idx = 0; idx < 16; idx++) {
38589137Snik		if (ua->ua_level != UAD_LEAF_LEVEL) {
38689137Snik			if (ua->u.ua_child[idx] != NULL)
38789137Snik				free_slb_tree_node(ua->u.ua_child[idx]);
38889137Snik		} else {
38989137Snik			if (ua->u.slb_entries[idx].slbv != 0)
39089137Snik				moea64_release_vsid(ua->u.slb_entries[idx].slbv
39189137Snik				    >> SLBV_VSID_SHIFT);
39289137Snik		}
39389137Snik	}
39489137Snik
39589137Snik	uma_zfree(slbt_zone, ua);
39689137Snik}
39789137Snik
39889137Snikvoid
39989137Snikslb_free_tree(pmap_t pm)
40089137Snik{
40189137Snik
40289137Snik	free_slb_tree_node(pm->pm_slb_tree_root);
40389137Snik}
40489137Snik
40589137Snikstruct slbtnode *
40689137Snikslb_alloc_tree(void)
40789137Snik{
40889137Snik	struct slbtnode *root;
40989137Snik
41089137Snik	root = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
41189137Snik	root->ua_level = UAD_ROOT_LEVEL;
41289137Snik
41389137Snik	return (root);
41489137Snik}
41589137Snik
41689137Snik/* Lock entries mapping kernel text and stacks */
41789137Snik
41889137Snikvoid
41989137Snikslb_insert_kernel(uint64_t slbe, uint64_t slbv)
42089137Snik{
42189137Snik	struct slb *slbcache;
42289137Snik	int i;
42389137Snik
42489137Snik	/* We don't want to be preempted while modifying the kernel map */
42589137Snik	critical_enter();
42689137Snik
42789137Snik	slbcache = PCPU_GET(slb);
42889137Snik
42989137Snik	/* Check for an unused slot, abusing the user slot as a full flag */
43089137Snik	if (slbcache[USER_SLB_SLOT].slbe == 0) {
43189137Snik		for (i = 0; i < n_slbs; i++) {
43289137Snik			if (i == USER_SLB_SLOT)
43389137Snik				continue;
43489137Snik			if (!(slbcache[i].slbe & SLBE_VALID))
43589137Snik				goto fillkernslb;
43689137Snik		}
43789137Snik
43889137Snik		if (i == n_slbs)
43989137Snik			slbcache[USER_SLB_SLOT].slbe = 1;
44089137Snik	}
44189137Snik
44289137Snik	i = mftb() % n_slbs;
44389137Snik	if (i == USER_SLB_SLOT)
44489137Snik			i = (i+1) % n_slbs;
44589137Snik
44689137Snikfillkernslb:
44789137Snik	KASSERT(i != USER_SLB_SLOT,
44889137Snik	    ("Filling user SLB slot with a kernel mapping"));
44989137Snik	slbcache[i].slbv = slbv;
45089137Snik	slbcache[i].slbe = slbe | (uint64_t)i;
45189137Snik
45289137Snik	/* If it is for this CPU, put it in the SLB right away */
45389137Snik	if (pmap_bootstrapped) {
45489137Snik		/* slbie not required */
45589137Snik		__asm __volatile ("slbmte %0, %1" ::
45689137Snik		    "r"(slbcache[i].slbv), "r"(slbcache[i].slbe));
45789137Snik	}
45889137Snik
45989137Snik	critical_exit();
46089137Snik}
46189137Snik
46289137Snikvoid
46389137Snikslb_insert_user(pmap_t pm, struct slb *slb)
46489137Snik{
46589137Snik	int i;
46689137Snik
46789137Snik	PMAP_LOCK_ASSERT(pm, MA_OWNED);
46889137Snik
46989137Snik	if (pm->pm_slb_len < n_slbs) {
47089137Snik		i = pm->pm_slb_len;
47189137Snik		pm->pm_slb_len++;
47289137Snik	} else {
473		i = mftb() % n_slbs;
474	}
475
476	/* Note that this replacement is atomic with respect to trap_subr */
477	pm->pm_slb[i] = slb;
478}
479
480static void *
481slb_uma_real_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait)
482{
483	static vm_offset_t realmax = 0;
484	void *va;
485	vm_page_t m;
486	int pflags;
487
488	if (realmax == 0)
489		realmax = platform_real_maxaddr();
490
491	*flags = UMA_SLAB_PRIV;
492	pflags = malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
493
494	for (;;) {
495		m = vm_page_alloc_contig(NULL, 0, pflags, 1, 0, realmax,
496		    PAGE_SIZE, PAGE_SIZE, VM_MEMATTR_DEFAULT);
497		if (m == NULL) {
498			if (wait & M_NOWAIT)
499				return (NULL);
500			VM_WAIT;
501		} else
502                        break;
503        }
504
505	va = (void *) VM_PAGE_TO_PHYS(m);
506
507	if (!hw_direct_map)
508		pmap_kenter((vm_offset_t)va, VM_PAGE_TO_PHYS(m));
509
510	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
511		bzero(va, PAGE_SIZE);
512
513	return (va);
514}
515
516static void
517slb_zone_init(void *dummy)
518{
519
520	slbt_zone = uma_zcreate("SLB tree node", sizeof(struct slbtnode),
521	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
522	slb_cache_zone = uma_zcreate("SLB cache",
523	    (n_slbs + 1)*sizeof(struct slb *), NULL, NULL, NULL, NULL,
524	    UMA_ALIGN_PTR, UMA_ZONE_VM);
525
526	if (platform_real_maxaddr() != VM_MAX_ADDRESS) {
527		uma_zone_set_allocf(slb_cache_zone, slb_uma_real_alloc);
528		uma_zone_set_allocf(slbt_zone, slb_uma_real_alloc);
529	}
530}
531
532struct slb **
533slb_alloc_user_cache(void)
534{
535	return (uma_zalloc(slb_cache_zone, M_ZERO));
536}
537
538void
539slb_free_user_cache(struct slb **slb)
540{
541	uma_zfree(slb_cache_zone, slb);
542}
543