1/*
2 * native hashtable management.
3 *
4 * SMP scalability work:
5 *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#undef DEBUG_LOW
14
15#include <linux/spinlock.h>
16#include <linux/bitops.h>
17#include <linux/threads.h>
18#include <linux/smp.h>
19
20#include <asm/abs_addr.h>
21#include <asm/machdep.h>
22#include <asm/mmu.h>
23#include <asm/mmu_context.h>
24#include <asm/pgtable.h>
25#include <asm/tlbflush.h>
26#include <asm/tlb.h>
27#include <asm/cputable.h>
28#include <asm/udbg.h>
29#include <asm/kexec.h>
30
31#ifdef DEBUG_LOW
32#define DBG_LOW(fmt...) udbg_printf(fmt)
33#else
34#define DBG_LOW(fmt...)
35#endif
36
37#define HPTE_LOCK_BIT 3
38
39static DEFINE_SPINLOCK(native_tlbie_lock);
40
41static inline void __tlbie(unsigned long va, unsigned int psize)
42{
43	unsigned int penc;
44
45	/* clear top 16 bits, non SLS segment */
46	va &= ~(0xffffULL << 48);
47
48	switch (psize) {
49	case MMU_PAGE_4K:
50		va &= ~0xffful;
51		asm volatile("tlbie %0,0" : : "r" (va) : "memory");
52		break;
53	default:
54		penc = mmu_psize_defs[psize].penc;
55		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
56		va |= penc << 12;
57		asm volatile("tlbie %0,1" : : "r" (va) : "memory");
58		break;
59	}
60}
61
62static inline void __tlbiel(unsigned long va, unsigned int psize)
63{
64	unsigned int penc;
65
66	/* clear top 16 bits, non SLS segment */
67	va &= ~(0xffffULL << 48);
68
69	switch (psize) {
70	case MMU_PAGE_4K:
71		va &= ~0xffful;
72		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
73			     : : "r"(va) : "memory");
74		break;
75	default:
76		penc = mmu_psize_defs[psize].penc;
77		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
78		va |= penc << 12;
79		asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
80			     : : "r"(va) : "memory");
81		break;
82	}
83
84}
85
86static inline void tlbie(unsigned long va, int psize, int local)
87{
88	unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
89	int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
90
91	if (use_local)
92		use_local = mmu_psize_defs[psize].tlbiel;
93	if (lock_tlbie && !use_local)
94		spin_lock(&native_tlbie_lock);
95	asm volatile("ptesync": : :"memory");
96	if (use_local) {
97		__tlbiel(va, psize);
98		asm volatile("ptesync": : :"memory");
99	} else {
100		__tlbie(va, psize);
101		asm volatile("eieio; tlbsync; ptesync": : :"memory");
102	}
103	if (lock_tlbie && !use_local)
104		spin_unlock(&native_tlbie_lock);
105}
106
107static inline void native_lock_hpte(hpte_t *hptep)
108{
109	unsigned long *word = &hptep->v;
110
111	while (1) {
112		if (!test_and_set_bit(HPTE_LOCK_BIT, word))
113			break;
114		while(test_bit(HPTE_LOCK_BIT, word))
115			cpu_relax();
116	}
117}
118
119static inline void native_unlock_hpte(hpte_t *hptep)
120{
121	unsigned long *word = &hptep->v;
122
123	asm volatile("lwsync":::"memory");
124	clear_bit(HPTE_LOCK_BIT, word);
125}
126
127static long native_hpte_insert(unsigned long hpte_group, unsigned long va,
128			unsigned long pa, unsigned long rflags,
129			unsigned long vflags, int psize)
130{
131	hpte_t *hptep = htab_address + hpte_group;
132	unsigned long hpte_v, hpte_r;
133	int i;
134
135	if (!(vflags & HPTE_V_BOLTED)) {
136		DBG_LOW("    insert(group=%lx, va=%016lx, pa=%016lx,"
137			" rflags=%lx, vflags=%lx, psize=%d)\n",
138			hpte_group, va, pa, rflags, vflags, psize);
139	}
140
141	for (i = 0; i < HPTES_PER_GROUP; i++) {
142		if (! (hptep->v & HPTE_V_VALID)) {
143			/* retry with lock held */
144			native_lock_hpte(hptep);
145			if (! (hptep->v & HPTE_V_VALID))
146				break;
147			native_unlock_hpte(hptep);
148		}
149
150		hptep++;
151	}
152
153	if (i == HPTES_PER_GROUP)
154		return -1;
155
156	hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
157	hpte_r = hpte_encode_r(pa, psize) | rflags;
158
159	if (!(vflags & HPTE_V_BOLTED)) {
160		DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
161			i, hpte_v, hpte_r);
162	}
163
164	hptep->r = hpte_r;
165	/* Guarantee the second dword is visible before the valid bit */
166	__asm__ __volatile__ ("eieio" : : : "memory");
167	/*
168	 * Now set the first dword including the valid bit
169	 * NOTE: this also unlocks the hpte
170	 */
171	hptep->v = hpte_v;
172
173	__asm__ __volatile__ ("ptesync" : : : "memory");
174
175	return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
176}
177
178static long native_hpte_remove(unsigned long hpte_group)
179{
180	hpte_t *hptep;
181	int i;
182	int slot_offset;
183	unsigned long hpte_v;
184
185	DBG_LOW("    remove(group=%lx)\n", hpte_group);
186
187	/* pick a random entry to start at */
188	slot_offset = mftb() & 0x7;
189
190	for (i = 0; i < HPTES_PER_GROUP; i++) {
191		hptep = htab_address + hpte_group + slot_offset;
192		hpte_v = hptep->v;
193
194		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
195			/* retry with lock held */
196			native_lock_hpte(hptep);
197			hpte_v = hptep->v;
198			if ((hpte_v & HPTE_V_VALID)
199			    && !(hpte_v & HPTE_V_BOLTED))
200				break;
201			native_unlock_hpte(hptep);
202		}
203
204		slot_offset++;
205		slot_offset &= 0x7;
206	}
207
208	if (i == HPTES_PER_GROUP)
209		return -1;
210
211	/* Invalidate the hpte. NOTE: this also unlocks it */
212	hptep->v = 0;
213
214	return i;
215}
216
217static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
218				 unsigned long va, int psize, int local)
219{
220	hpte_t *hptep = htab_address + slot;
221	unsigned long hpte_v, want_v;
222	int ret = 0;
223
224	want_v = hpte_encode_v(va, psize);
225
226	DBG_LOW("    update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
227		va, want_v & HPTE_V_AVPN, slot, newpp);
228
229	native_lock_hpte(hptep);
230
231	hpte_v = hptep->v;
232
233	/* Even if we miss, we need to invalidate the TLB */
234	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
235		DBG_LOW(" -> miss\n");
236		native_unlock_hpte(hptep);
237		ret = -1;
238	} else {
239		DBG_LOW(" -> hit\n");
240		/* Update the HPTE */
241		hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
242			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
243		native_unlock_hpte(hptep);
244	}
245
246	/* Ensure it is out of the tlb too. */
247	tlbie(va, psize, local);
248
249	return ret;
250}
251
252static long native_hpte_find(unsigned long va, int psize)
253{
254	hpte_t *hptep;
255	unsigned long hash;
256	unsigned long i, j;
257	long slot;
258	unsigned long want_v, hpte_v;
259
260	hash = hpt_hash(va, mmu_psize_defs[psize].shift);
261	want_v = hpte_encode_v(va, psize);
262
263	for (j = 0; j < 2; j++) {
264		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
265		for (i = 0; i < HPTES_PER_GROUP; i++) {
266			hptep = htab_address + slot;
267			hpte_v = hptep->v;
268
269			if (HPTE_V_COMPARE(hpte_v, want_v)
270			    && (hpte_v & HPTE_V_VALID)
271			    && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
272				/* HPTE matches */
273				if (j)
274					slot = -slot;
275				return slot;
276			}
277			++slot;
278		}
279		hash = ~hash;
280	}
281
282	return -1;
283}
284
285/*
286 * Update the page protection bits. Intended to be used to create
287 * guard pages for kernel data structures on pages which are bolted
288 * in the HPT. Assumes pages being operated on will not be stolen.
289 *
290 * No need to lock here because we should be the only user.
291 */
292static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
293				       int psize)
294{
295	unsigned long vsid, va;
296	long slot;
297	hpte_t *hptep;
298
299	vsid = get_kernel_vsid(ea);
300	va = (vsid << 28) | (ea & 0x0fffffff);
301
302	slot = native_hpte_find(va, psize);
303	if (slot == -1)
304		panic("could not find page to bolt\n");
305	hptep = htab_address + slot;
306
307	/* Update the HPTE */
308	hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
309		(newpp & (HPTE_R_PP | HPTE_R_N));
310
311	/* Ensure it is out of the tlb too. */
312	tlbie(va, psize, 0);
313}
314
315static void native_hpte_invalidate(unsigned long slot, unsigned long va,
316				   int psize, int local)
317{
318	hpte_t *hptep = htab_address + slot;
319	unsigned long hpte_v;
320	unsigned long want_v;
321	unsigned long flags;
322
323	local_irq_save(flags);
324
325	DBG_LOW("    invalidate(va=%016lx, hash: %x)\n", va, slot);
326
327	want_v = hpte_encode_v(va, psize);
328	native_lock_hpte(hptep);
329	hpte_v = hptep->v;
330
331	/* Even if we miss, we need to invalidate the TLB */
332	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
333		native_unlock_hpte(hptep);
334	else
335		/* Invalidate the hpte. NOTE: this also unlocks it */
336		hptep->v = 0;
337
338	/* Invalidate the TLB */
339	tlbie(va, psize, local);
340
341	local_irq_restore(flags);
342}
343
344#define LP_SHIFT	12
345#define LP_BITS		8
346#define LP_MASK(i)	((0xFF >> (i)) << LP_SHIFT)
347
348static void hpte_decode(hpte_t *hpte, unsigned long slot,
349			int *psize, unsigned long *va)
350{
351	unsigned long hpte_r = hpte->r;
352	unsigned long hpte_v = hpte->v;
353	unsigned long avpn;
354	int i, size, shift, penc;
355
356	if (!(hpte_v & HPTE_V_LARGE))
357		size = MMU_PAGE_4K;
358	else {
359		for (i = 0; i < LP_BITS; i++) {
360			if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1))
361				break;
362		}
363		penc = LP_MASK(i+1) >> LP_SHIFT;
364		for (size = 0; size < MMU_PAGE_COUNT; size++) {
365
366			/* 4K pages are not represented by LP */
367			if (size == MMU_PAGE_4K)
368				continue;
369
370			/* valid entries have a shift value */
371			if (!mmu_psize_defs[size].shift)
372				continue;
373
374			if (penc == mmu_psize_defs[size].penc)
375				break;
376		}
377	}
378
379	/* This works for all page sizes, and for 256M and 1T segments */
380	shift = mmu_psize_defs[size].shift;
381	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm) << 23;
382
383	if (shift < 23) {
384		unsigned long vpi, vsid, pteg;
385
386		pteg = slot / HPTES_PER_GROUP;
387		if (hpte_v & HPTE_V_SECONDARY)
388			pteg = ~pteg;
389		switch (hpte_v >> HPTE_V_SSIZE_SHIFT) {
390		case MMU_SEGSIZE_256M:
391			vpi = ((avpn >> 28) ^ pteg) & htab_hash_mask;
392			break;
393		case MMU_SEGSIZE_1T:
394			vsid = avpn >> 40;
395			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
396			break;
397		default:
398			avpn = vpi = size = 0;
399		}
400		avpn |= (vpi << mmu_psize_defs[size].shift);
401	}
402
403	*va = avpn;
404	*psize = size;
405}
406
407/*
408 * clear all mappings on kexec.  All cpus are in real mode (or they will
409 * be when they isi), and we are the only one left.  We rely on our kernel
410 * mapping being 0xC0's and the hardware ignoring those two real bits.
411 *
412 * TODO: add batching support when enabled.  remember, no dynamic memory here,
413 * athough there is the control page available...
414 */
415static void native_hpte_clear(void)
416{
417	unsigned long slot, slots, flags;
418	hpte_t *hptep = htab_address;
419	unsigned long hpte_v, va;
420	unsigned long pteg_count;
421	int psize;
422
423	pteg_count = htab_hash_mask + 1;
424
425	local_irq_save(flags);
426
427	/* we take the tlbie lock and hold it.  Some hardware will
428	 * deadlock if we try to tlbie from two processors at once.
429	 */
430	spin_lock(&native_tlbie_lock);
431
432	slots = pteg_count * HPTES_PER_GROUP;
433
434	for (slot = 0; slot < slots; slot++, hptep++) {
435		/*
436		 * we could lock the pte here, but we are the only cpu
437		 * running,  right?  and for crash dump, we probably
438		 * don't want to wait for a maybe bad cpu.
439		 */
440		hpte_v = hptep->v;
441
442		/*
443		 * Call __tlbie() here rather than tlbie() since we
444		 * already hold the native_tlbie_lock.
445		 */
446		if (hpte_v & HPTE_V_VALID) {
447			hpte_decode(hptep, slot, &psize, &va);
448			hptep->v = 0;
449			__tlbie(va, psize);
450		}
451	}
452
453	asm volatile("eieio; tlbsync; ptesync":::"memory");
454	spin_unlock(&native_tlbie_lock);
455	local_irq_restore(flags);
456}
457
458/*
459 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
460 * the lock all the time
461 */
462static void native_flush_hash_range(unsigned long number, int local)
463{
464	unsigned long va, hash, index, hidx, shift, slot;
465	hpte_t *hptep;
466	unsigned long hpte_v;
467	unsigned long want_v;
468	unsigned long flags;
469	real_pte_t pte;
470	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
471	unsigned long psize = batch->psize;
472	int i;
473
474	local_irq_save(flags);
475
476	for (i = 0; i < number; i++) {
477		va = batch->vaddr[i];
478		pte = batch->pte[i];
479
480		pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
481			hash = hpt_hash(va, shift);
482			hidx = __rpte_to_hidx(pte, index);
483			if (hidx & _PTEIDX_SECONDARY)
484				hash = ~hash;
485			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
486			slot += hidx & _PTEIDX_GROUP_IX;
487			hptep = htab_address + slot;
488			want_v = hpte_encode_v(va, psize);
489			native_lock_hpte(hptep);
490			hpte_v = hptep->v;
491			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
492			    !(hpte_v & HPTE_V_VALID))
493				native_unlock_hpte(hptep);
494			else
495				hptep->v = 0;
496		} pte_iterate_hashed_end();
497	}
498
499	if (cpu_has_feature(CPU_FTR_TLBIEL) &&
500	    mmu_psize_defs[psize].tlbiel && local) {
501		asm volatile("ptesync":::"memory");
502		for (i = 0; i < number; i++) {
503			va = batch->vaddr[i];
504			pte = batch->pte[i];
505
506			pte_iterate_hashed_subpages(pte, psize, va, index,
507						    shift) {
508				__tlbiel(va, psize);
509			} pte_iterate_hashed_end();
510		}
511		asm volatile("ptesync":::"memory");
512	} else {
513		int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
514
515		if (lock_tlbie)
516			spin_lock(&native_tlbie_lock);
517
518		asm volatile("ptesync":::"memory");
519		for (i = 0; i < number; i++) {
520			va = batch->vaddr[i];
521			pte = batch->pte[i];
522
523			pte_iterate_hashed_subpages(pte, psize, va, index,
524						    shift) {
525				__tlbie(va, psize);
526			} pte_iterate_hashed_end();
527		}
528		asm volatile("eieio; tlbsync; ptesync":::"memory");
529
530		if (lock_tlbie)
531			spin_unlock(&native_tlbie_lock);
532	}
533
534	local_irq_restore(flags);
535}
536
537#ifdef CONFIG_PPC_PSERIES
538/* Disable TLB batching on nighthawk */
539static inline int tlb_batching_enabled(void)
540{
541	struct device_node *root = of_find_node_by_path("/");
542	int enabled = 1;
543
544	if (root) {
545		const char *model = of_get_property(root, "model", NULL);
546		if (model && !strcmp(model, "IBM,9076-N81"))
547			enabled = 0;
548		of_node_put(root);
549	}
550
551	return enabled;
552}
553#else
554static inline int tlb_batching_enabled(void)
555{
556	return 1;
557}
558#endif
559
560void __init hpte_init_native(void)
561{
562	ppc_md.hpte_invalidate	= native_hpte_invalidate;
563	ppc_md.hpte_updatepp	= native_hpte_updatepp;
564	ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
565	ppc_md.hpte_insert	= native_hpte_insert;
566	ppc_md.hpte_remove	= native_hpte_remove;
567	ppc_md.hpte_clear_all	= native_hpte_clear;
568	if (tlb_batching_enabled())
569		ppc_md.flush_hash_range = native_flush_hash_range;
570}
571