• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/arch/mips/mm/
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License.  See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Synthesize TLB refill handlers at runtime.
7 *
8 * Copyright (C) 2004, 2005, 2006, 2008  Thiemo Seufer
9 * Copyright (C) 2005, 2007, 2008, 2009  Maciej W. Rozycki
10 * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org)
11 * Copyright (C) 2008, 2009 Cavium Networks, Inc.
12 *
13 * ... and the days got worse and worse and now you see
14 * I've gone completly out of my mind.
15 *
16 * They're coming to take me a away haha
17 * they're coming to take me a away hoho hihi haha
18 * to the funny farm where code is beautiful all the time ...
19 *
20 * (Condolences to Napoleon XIV)
21 */
22
23#include <linux/bug.h>
24#include <linux/kernel.h>
25#include <linux/types.h>
26#include <linux/smp.h>
27#include <linux/string.h>
28#include <linux/init.h>
29#include <linux/cache.h>
30
31#include <asm/cacheflush.h>
32#include <asm/pgtable.h>
33#include <asm/war.h>
34#include <asm/uasm.h>
35
36/*
37 * TLB load/store/modify handlers.
38 *
39 * Only the fastpath gets synthesized at runtime, the slowpath for
40 * do_page_fault remains normal asm.
41 */
42extern void tlb_do_page_fault_0(void);
43extern void tlb_do_page_fault_1(void);
44
45struct work_registers {
46	int r1;
47	int r2;
48	int r3;
49};
50
51struct tlb_reg_save {
52	unsigned long a;
53	unsigned long b;
54} ____cacheline_aligned_in_smp;
55
56static struct tlb_reg_save handler_reg_save[NR_CPUS];
57
58static inline int r45k_bvahwbug(void)
59{
60	return 0;
61}
62
63static inline int r4k_250MHZhwbug(void)
64{
65	return 0;
66}
67
68static inline int __maybe_unused bcm1250_m3_war(void)
69{
70	return BCM1250_M3_WAR;
71}
72
73static inline int __maybe_unused r10000_llsc_war(void)
74{
75	return R10000_LLSC_WAR;
76}
77
78static int use_bbit_insns(void)
79{
80	switch (current_cpu_type()) {
81	case CPU_CAVIUM_OCTEON:
82	case CPU_CAVIUM_OCTEON_PLUS:
83	case CPU_CAVIUM_OCTEON2:
84		return 1;
85	default:
86		return 0;
87	}
88}
89
90static int use_lwx_insns(void)
91{
92	switch (current_cpu_type()) {
93	case CPU_CAVIUM_OCTEON2:
94		return 1;
95	default:
96		return 0;
97	}
98}
99#if defined(CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE) && CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0
100static bool scratchpad_available(void)
101{
102	return true;
103}
104static int scratchpad_offset(int i)
105{
106	/*
107	 * CVMSEG starts at address -32768 and extends for
108	 * CAVIUM_OCTEON_CVMSEG_SIZE 128 byte cache lines.
109	 */
110	i += 1; /* Kernel use starts at the top and works down. */
111	return CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128 - (8 * i) - 32768;
112}
113#else
114static bool scratchpad_available(void)
115{
116	return false;
117}
118static int scratchpad_offset(int i)
119{
120	BUG();
121}
122#endif
123/*
124 * Found by experiment: At least some revisions of the 4kc throw under
125 * some circumstances a machine check exception, triggered by invalid
126 * values in the index register.  Delaying the tlbp instruction until
127 * after the next branch,  plus adding an additional nop in front of
128 * tlbwi/tlbwr avoids the invalid index register values. Nobody knows
129 * why; it's not an issue caused by the core RTL.
130 *
131 */
132static int __cpuinit m4kc_tlbp_war(void)
133{
134	return (current_cpu_data.processor_id & 0xffff00) ==
135	       (PRID_COMP_MIPS | PRID_IMP_4KC);
136}
137
138/* Handle labels (which must be positive integers). */
139enum label_id {
140	label_second_part = 1,
141	label_leave,
142	label_vmalloc,
143	label_vmalloc_done,
144	label_tlbw_hazard,
145	label_split,
146	label_tlbl_goaround1,
147	label_tlbl_goaround2,
148	label_nopage_tlbl,
149	label_nopage_tlbs,
150	label_nopage_tlbm,
151	label_smp_pgtable_change,
152	label_r3000_write_probe_fail,
153	label_large_segbits_fault,
154#ifdef CONFIG_HUGETLB_PAGE
155	label_tlb_huge_update,
156#endif
157};
158
159UASM_L_LA(_second_part)
160UASM_L_LA(_leave)
161UASM_L_LA(_vmalloc)
162UASM_L_LA(_vmalloc_done)
163UASM_L_LA(_tlbw_hazard)
164UASM_L_LA(_split)
165UASM_L_LA(_tlbl_goaround1)
166UASM_L_LA(_tlbl_goaround2)
167UASM_L_LA(_nopage_tlbl)
168UASM_L_LA(_nopage_tlbs)
169UASM_L_LA(_nopage_tlbm)
170UASM_L_LA(_smp_pgtable_change)
171UASM_L_LA(_r3000_write_probe_fail)
172UASM_L_LA(_large_segbits_fault)
173#ifdef CONFIG_HUGETLB_PAGE
174UASM_L_LA(_tlb_huge_update)
175#endif
176
177/*
178 * For debug purposes.
179 */
180static inline void dump_handler(const u32 *handler, int count)
181{
182	int i;
183
184	pr_debug("\t.set push\n");
185	pr_debug("\t.set noreorder\n");
186
187	for (i = 0; i < count; i++)
188		pr_debug("\t%p\t.word 0x%08x\n", &handler[i], handler[i]);
189
190	pr_debug("\t.set pop\n");
191}
192
193/* The only general purpose registers allowed in TLB handlers. */
194#define K0		26
195#define K1		27
196
197/* Some CP0 registers */
198#define C0_INDEX	0, 0
199#define C0_ENTRYLO0	2, 0
200#define C0_TCBIND	2, 2
201#define C0_ENTRYLO1	3, 0
202#define C0_CONTEXT	4, 0
203#define C0_PAGEMASK	5, 0
204#define C0_BADVADDR	8, 0
205#define C0_ENTRYHI	10, 0
206#define C0_EPC		14, 0
207#define C0_XCONTEXT	20, 0
208#define C0_ERROREPC	30, 0
209
210#ifdef CONFIG_64BIT
211# define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_XCONTEXT)
212#else
213# define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_CONTEXT)
214#endif
215
216/* The worst case length of the handler is around 18 instructions for
217 * R3000-style TLBs and up to 63 instructions for R4000-style TLBs.
218 * Maximum space available is 32 instructions for R3000 and 64
219 * instructions for R4000.
220 *
221 * We deliberately chose a buffer size of 128, so we won't scribble
222 * over anything important on overflow before we panic.
223 */
224static u32 tlb_handler[128] __cpuinitdata;
225
226/* simply assume worst case size for labels and relocs */
227static struct uasm_label labels[128] __cpuinitdata;
228static struct uasm_reloc relocs[128] __cpuinitdata;
229
230#ifdef CONFIG_64BIT
231static int check_for_high_segbits __cpuinitdata;
232#endif
233
234static int check_for_high_segbits __cpuinitdata;
235
236static unsigned int kscratch_used_mask __cpuinitdata;
237
238static int __cpuinit allocate_kscratch(void)
239{
240	int r;
241	unsigned int a = cpu_data[0].kscratch_mask & ~kscratch_used_mask;
242
243	r = ffs(a);
244
245	if (r == 0)
246		return -1;
247
248	r--; /* make it zero based */
249
250	kscratch_used_mask |= (1 << r);
251
252	return r;
253}
254
255static int scratch_reg __cpuinitdata;
256static int pgd_reg __cpuinitdata;
257enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch};
258
259static struct work_registers __cpuinit build_get_work_registers(u32 **p)
260{
261	struct work_registers r;
262
263	int smp_processor_id_reg;
264	int smp_processor_id_sel;
265	int smp_processor_id_shift;
266
267	if (scratch_reg > 0) {
268		/* Save in CPU local C0_KScratch? */
269		UASM_i_MTC0(p, 1, 31, scratch_reg);
270		r.r1 = K0;
271		r.r2 = K1;
272		r.r3 = 1;
273		return r;
274	}
275
276	if (num_possible_cpus() > 1) {
277#ifdef CONFIG_MIPS_PGD_C0_CONTEXT
278		smp_processor_id_shift = 51;
279		smp_processor_id_reg = 20; /* XContext */
280		smp_processor_id_sel = 0;
281#else
282# ifdef CONFIG_32BIT
283		smp_processor_id_shift = 25;
284		smp_processor_id_reg = 4; /* Context */
285		smp_processor_id_sel = 0;
286# endif
287# ifdef CONFIG_64BIT
288		smp_processor_id_shift = 26;
289		smp_processor_id_reg = 4; /* Context */
290		smp_processor_id_sel = 0;
291# endif
292#endif
293		/* Get smp_processor_id */
294		UASM_i_MFC0(p, K0, smp_processor_id_reg, smp_processor_id_sel);
295		UASM_i_SRL_SAFE(p, K0, K0, smp_processor_id_shift);
296
297		/* handler_reg_save index in K0 */
298		UASM_i_SLL(p, K0, K0, ilog2(sizeof(struct tlb_reg_save)));
299
300		UASM_i_LA(p, K1, (long)&handler_reg_save);
301		UASM_i_ADDU(p, K0, K0, K1);
302	} else {
303		UASM_i_LA(p, K0, (long)&handler_reg_save);
304	}
305	/* K0 now points to save area, save $1 and $2  */
306	UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), K0);
307	UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), K0);
308
309	r.r1 = K1;
310	r.r2 = 1;
311	r.r3 = 2;
312	return r;
313}
314
315static void __cpuinit build_restore_work_registers(u32 **p)
316{
317	if (scratch_reg > 0) {
318		UASM_i_MFC0(p, 1, 31, scratch_reg);
319		return;
320	}
321	/* K0 already points to save area, restore $1 and $2  */
322	UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), K0);
323	UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), K0);
324}
325
326#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
327
328/*
329 * CONFIG_MIPS_PGD_C0_CONTEXT implies 64 bit and lack of pgd_current,
330 * we cannot do r3000 under these circumstances.
331 *
332 * Declare pgd_current here instead of including mmu_context.h to avoid type
333 * conflicts for tlbmiss_handler_setup_pgd
334 */
335extern unsigned long pgd_current[];
336
337/*
338 * The R3000 TLB handler is simple.
339 */
340static void __cpuinit build_r3000_tlb_refill_handler(void)
341{
342	long pgdc = (long)pgd_current;
343	u32 *p;
344
345	memset(tlb_handler, 0, sizeof(tlb_handler));
346	p = tlb_handler;
347
348	uasm_i_mfc0(&p, K0, C0_BADVADDR);
349	uasm_i_lui(&p, K1, uasm_rel_hi(pgdc)); /* cp0 delay */
350	uasm_i_lw(&p, K1, uasm_rel_lo(pgdc), K1);
351	uasm_i_srl(&p, K0, K0, 22); /* load delay */
352	uasm_i_sll(&p, K0, K0, 2);
353	uasm_i_addu(&p, K1, K1, K0);
354	uasm_i_mfc0(&p, K0, C0_CONTEXT);
355	uasm_i_lw(&p, K1, 0, K1); /* cp0 delay */
356	uasm_i_andi(&p, K0, K0, 0xffc); /* load delay */
357	uasm_i_addu(&p, K1, K1, K0);
358	uasm_i_lw(&p, K0, 0, K1);
359	uasm_i_nop(&p); /* load delay */
360	uasm_i_mtc0(&p, K0, C0_ENTRYLO0);
361	uasm_i_mfc0(&p, K1, C0_EPC); /* cp0 delay */
362	uasm_i_tlbwr(&p); /* cp0 delay */
363	uasm_i_jr(&p, K1);
364	uasm_i_rfe(&p); /* branch delay */
365
366	if (p > tlb_handler + 32)
367		panic("TLB refill handler space exceeded");
368
369	pr_debug("Wrote TLB refill handler (%u instructions).\n",
370		 (unsigned int)(p - tlb_handler));
371
372	memcpy((void *)ebase, tlb_handler, 0x80);
373
374	dump_handler((u32 *)ebase, 32);
375}
376#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
377
378/*
379 * The R4000 TLB handler is much more complicated. We have two
380 * consecutive handler areas with 32 instructions space each.
381 * Since they aren't used at the same time, we can overflow in the
382 * other one.To keep things simple, we first assume linear space,
383 * then we relocate it to the final handler layout as needed.
384 */
385static u32 final_handler[64] __cpuinitdata;
386
387static void __cpuinit __maybe_unused build_tlb_probe_entry(u32 **p)
388{
389	switch (current_cpu_type()) {
390	/* Found by experiment: R4600 v2.0/R4700 needs this, too.  */
391	case CPU_R4600:
392	case CPU_R4700:
393	case CPU_R5000:
394	case CPU_R5000A:
395	case CPU_NEVADA:
396		uasm_i_nop(p);
397		uasm_i_tlbp(p);
398		break;
399
400	default:
401		uasm_i_tlbp(p);
402		break;
403	}
404}
405
406/*
407 * Write random or indexed TLB entry, and care about the hazards from
408 * the preceeding mtc0 and for the following eret.
409 */
410enum tlb_write_entry { tlb_random, tlb_indexed };
411
412static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,
413					 struct uasm_reloc **r,
414					 enum tlb_write_entry wmode)
415{
416	void(*tlbw)(u32 **) = NULL;
417
418	switch (wmode) {
419	case tlb_random: tlbw = uasm_i_tlbwr; break;
420	case tlb_indexed: tlbw = uasm_i_tlbwi; break;
421	}
422
423	if (cpu_has_mips_r2) {
424		/*
425		 * The architecture spec says an ehb is required here,
426		 * but a number of cores do not have the hazard and
427		 * using an ehb causes an expensive pipeline stall.
428		 */
429		if (cpu_has_mips_r2_exec_hazard) {
430			switch (current_cpu_type()) {
431			case CPU_14K:
432			case CPU_14KE:
433			case CPU_74K:
434			case CPU_1074K:
435				break;
436
437			default:
438				uasm_i_ehb(p);
439				break;
440			}
441		}
442		tlbw(p);
443		return;
444	}
445
446	switch (current_cpu_type()) {
447	case CPU_R4000PC:
448	case CPU_R4000SC:
449	case CPU_R4000MC:
450	case CPU_R4400PC:
451	case CPU_R4400SC:
452	case CPU_R4400MC:
453		/*
454		 * This branch uses up a mtc0 hazard nop slot and saves
455		 * two nops after the tlbw instruction.
456		 */
457		uasm_il_bgezl(p, r, 0, label_tlbw_hazard);
458		tlbw(p);
459		uasm_l_tlbw_hazard(l, *p);
460		uasm_i_nop(p);
461		break;
462
463	case CPU_R4600:
464	case CPU_R4700:
465	case CPU_R5000:
466	case CPU_R5000A:
467		uasm_i_nop(p);
468		tlbw(p);
469		uasm_i_nop(p);
470		break;
471
472	case CPU_R4300:
473	case CPU_5KC:
474	case CPU_TX49XX:
475	case CPU_PR4450:
476		uasm_i_nop(p);
477		tlbw(p);
478		break;
479
480	case CPU_R10000:
481	case CPU_R12000:
482	case CPU_R14000:
483	case CPU_4KC:
484	case CPU_4KEC:
485	case CPU_14K:
486	case CPU_14KE:
487	case CPU_SB1:
488	case CPU_SB1A:
489	case CPU_4KSC:
490	case CPU_20KC:
491	case CPU_25KF:
492	case CPU_BCM3302:
493	case CPU_BCM4710:
494	case CPU_LOONGSON2:
495	case CPU_BCM6338:
496	case CPU_BCM6345:
497	case CPU_BCM6348:
498	case CPU_BCM6358:
499	case CPU_R5500:
500		if (m4kc_tlbp_war())
501			uasm_i_nop(p);
502	case CPU_ALCHEMY:
503		tlbw(p);
504		break;
505
506	case CPU_NEVADA:
507		uasm_i_nop(p); /* QED specifies 2 nops hazard */
508		/*
509		 * This branch uses up a mtc0 hazard nop slot and saves
510		 * a nop after the tlbw instruction.
511		 */
512		uasm_il_bgezl(p, r, 0, label_tlbw_hazard);
513		tlbw(p);
514		uasm_l_tlbw_hazard(l, *p);
515		break;
516
517	case CPU_RM7000:
518		uasm_i_nop(p);
519		uasm_i_nop(p);
520		uasm_i_nop(p);
521		uasm_i_nop(p);
522		tlbw(p);
523		break;
524
525	case CPU_RM9000:
526		/*
527		 * When the JTLB is updated by tlbwi or tlbwr, a subsequent
528		 * use of the JTLB for instructions should not occur for 4
529		 * cpu cycles and use for data translations should not occur
530		 * for 3 cpu cycles.
531		 */
532		uasm_i_ssnop(p);
533		uasm_i_ssnop(p);
534		uasm_i_ssnop(p);
535		uasm_i_ssnop(p);
536		tlbw(p);
537		uasm_i_ssnop(p);
538		uasm_i_ssnop(p);
539		uasm_i_ssnop(p);
540		uasm_i_ssnop(p);
541		break;
542
543	case CPU_VR4111:
544	case CPU_VR4121:
545	case CPU_VR4122:
546	case CPU_VR4181:
547	case CPU_VR4181A:
548		uasm_i_nop(p);
549		uasm_i_nop(p);
550		tlbw(p);
551		uasm_i_nop(p);
552		uasm_i_nop(p);
553		break;
554
555	case CPU_VR4131:
556	case CPU_VR4133:
557	case CPU_R5432:
558		uasm_i_nop(p);
559		uasm_i_nop(p);
560		tlbw(p);
561		break;
562
563	case CPU_JZRISC:
564		tlbw(p);
565		uasm_i_nop(p);
566		break;
567
568	default:
569		panic("No TLB refill handler yet (CPU type: %d)",
570		      current_cpu_data.cputype);
571		break;
572	}
573}
574
575static __cpuinit __maybe_unused void build_convert_pte_to_entrylo(u32 **p,
576								  unsigned int reg)
577{
578	if (kernel_uses_smartmips_rixi) {
579		UASM_i_SRL(p, reg, reg, ilog2(_PAGE_NO_EXEC));
580		UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC));
581	} else {
582#ifdef CONFIG_64BIT_PHYS_ADDR
583		uasm_i_dsrl_safe(p, reg, reg, ilog2(_PAGE_GLOBAL));
584#else
585		UASM_i_SRL(p, reg, reg, ilog2(_PAGE_GLOBAL));
586#endif
587	}
588}
589
590#ifdef CONFIG_HUGETLB_PAGE
591
592static __cpuinit void build_restore_pagemask(u32 **p,
593					     struct uasm_reloc **r,
594					     unsigned int tmp,
595					     enum label_id lid,
596					     int restore_scratch)
597{
598	if (restore_scratch) {
599		/* Reset default page size */
600		if (PM_DEFAULT_MASK >> 16) {
601			uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16);
602			uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff);
603			uasm_i_mtc0(p, tmp, C0_PAGEMASK);
604			uasm_il_b(p, r, lid);
605		} else if (PM_DEFAULT_MASK) {
606			uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK);
607			uasm_i_mtc0(p, tmp, C0_PAGEMASK);
608			uasm_il_b(p, r, lid);
609		} else {
610			uasm_i_mtc0(p, 0, C0_PAGEMASK);
611			uasm_il_b(p, r, lid);
612		}
613		if (scratch_reg > 0)
614			UASM_i_MFC0(p, 1, 31, scratch_reg);
615		else
616			UASM_i_LW(p, 1, scratchpad_offset(0), 0);
617	} else {
618		/* Reset default page size */
619		if (PM_DEFAULT_MASK >> 16) {
620			uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16);
621			uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff);
622			uasm_il_b(p, r, lid);
623			uasm_i_mtc0(p, tmp, C0_PAGEMASK);
624		} else if (PM_DEFAULT_MASK) {
625			uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK);
626			uasm_il_b(p, r, lid);
627			uasm_i_mtc0(p, tmp, C0_PAGEMASK);
628		} else {
629			uasm_il_b(p, r, lid);
630			uasm_i_mtc0(p, 0, C0_PAGEMASK);
631		}
632	}
633}
634
635static __cpuinit void build_huge_tlb_write_entry(u32 **p,
636						 struct uasm_label **l,
637						 struct uasm_reloc **r,
638						 unsigned int tmp,
639						 enum tlb_write_entry wmode,
640						 int restore_scratch)
641{
642	/* Set huge page tlb entry size */
643	uasm_i_lui(p, tmp, PM_HUGE_MASK >> 16);
644	uasm_i_ori(p, tmp, tmp, PM_HUGE_MASK & 0xffff);
645	uasm_i_mtc0(p, tmp, C0_PAGEMASK);
646
647	build_tlb_write_entry(p, l, r, wmode);
648
649	build_restore_pagemask(p, r, tmp, label_leave, restore_scratch);
650}
651
652/*
653 * Check if Huge PTE is present, if so then jump to LABEL.
654 */
655static void __cpuinit
656build_is_huge_pte(u32 **p, struct uasm_reloc **r, unsigned int tmp,
657		unsigned int pmd, int lid)
658{
659	UASM_i_LW(p, tmp, 0, pmd);
660	if (use_bbit_insns()) {
661		uasm_il_bbit1(p, r, tmp, ilog2(_PAGE_HUGE), lid);
662	} else {
663		uasm_i_andi(p, tmp, tmp, _PAGE_HUGE);
664		uasm_il_bnez(p, r, tmp, lid);
665	}
666}
667
668static __cpuinit void build_huge_update_entries(u32 **p,
669						unsigned int pte,
670						unsigned int tmp)
671{
672	int small_sequence;
673
674	/*
675	 * A huge PTE describes an area the size of the
676	 * configured huge page size. This is twice the
677	 * of the large TLB entry size we intend to use.
678	 * A TLB entry half the size of the configured
679	 * huge page size is configured into entrylo0
680	 * and entrylo1 to cover the contiguous huge PTE
681	 * address space.
682	 */
683	small_sequence = (HPAGE_SIZE >> 7) < 0x10000;
684
685	/* We can clobber tmp.  It isn't used after this.*/
686	if (!small_sequence)
687		uasm_i_lui(p, tmp, HPAGE_SIZE >> (7 + 16));
688
689	build_convert_pte_to_entrylo(p, pte);
690	UASM_i_MTC0(p, pte, C0_ENTRYLO0); /* load it */
691	/* convert to entrylo1 */
692	if (small_sequence)
693		UASM_i_ADDIU(p, pte, pte, HPAGE_SIZE >> 7);
694	else
695		UASM_i_ADDU(p, pte, pte, tmp);
696
697	UASM_i_MTC0(p, pte, C0_ENTRYLO1); /* load it */
698}
699
700static __cpuinit void build_huge_handler_tail(u32 **p,
701					      struct uasm_reloc **r,
702					      struct uasm_label **l,
703					      unsigned int pte,
704					      unsigned int ptr)
705{
706#ifdef CONFIG_SMP
707	UASM_i_SC(p, pte, 0, ptr);
708	uasm_il_beqz(p, r, pte, label_tlb_huge_update);
709	UASM_i_LW(p, pte, 0, ptr); /* Needed because SC killed our PTE */
710#else
711	UASM_i_SW(p, pte, 0, ptr);
712#endif
713	build_huge_update_entries(p, pte, ptr);
714	build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0);
715}
716#endif /* CONFIG_HUGETLB_PAGE */
717
718#ifdef CONFIG_64BIT
719/*
720 * TMP and PTR are scratch.
721 * TMP will be clobbered, PTR will hold the pmd entry.
722 */
723static void __cpuinit
724build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
725		 unsigned int tmp, unsigned int ptr)
726{
727#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
728	long pgdc = (long)pgd_current;
729#endif
730	/*
731	 * The vmalloc handling is not in the hotpath.
732	 */
733	uasm_i_dmfc0(p, tmp, C0_BADVADDR);
734
735	if (check_for_high_segbits) {
736		/*
737		 * The kernel currently implicitely assumes that the
738		 * MIPS SEGBITS parameter for the processor is
739		 * (PGDIR_SHIFT+PGDIR_BITS) or less, and will never
740		 * allocate virtual addresses outside the maximum
741		 * range for SEGBITS = (PGDIR_SHIFT+PGDIR_BITS). But
742		 * that doesn't prevent user code from accessing the
743		 * higher xuseg addresses.  Here, we make sure that
744		 * everything but the lower xuseg addresses goes down
745		 * the module_alloc/vmalloc path.
746		 */
747		uasm_i_dsrl_safe(p, ptr, tmp, PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3);
748		uasm_il_bnez(p, r, ptr, label_vmalloc);
749	} else {
750		uasm_il_bltz(p, r, tmp, label_vmalloc);
751	}
752	/* No uasm_i_nop needed here, since the next insn doesn't touch TMP. */
753
754#ifdef CONFIG_MIPS_PGD_C0_CONTEXT
755	if (pgd_reg != -1) {
756		/* pgd is in pgd_reg */
757		UASM_i_MFC0(p, ptr, 31, pgd_reg);
758	} else {
759		/*
760		 * &pgd << 11 stored in CONTEXT [23..63].
761		 */
762		UASM_i_MFC0(p, ptr, C0_CONTEXT);
763
764		/* Clear lower 23 bits of context. */
765		uasm_i_dins(p, ptr, 0, 0, 23);
766
767		/* 1 0  1 0 1  << 6  xkphys cached */
768		uasm_i_ori(p, ptr, ptr, 0x540);
769		uasm_i_drotr(p, ptr, ptr, 11);
770	}
771#elif defined(CONFIG_SMP)
772# ifdef  CONFIG_MIPS_MT_SMTC
773	/*
774	 * SMTC uses TCBind value as "CPU" index
775	 */
776	uasm_i_mfc0(p, ptr, C0_TCBIND);
777	uasm_i_dsrl_safe(p, ptr, ptr, 19);
778# else
779	/*
780	 * 64 bit SMP running in XKPHYS has smp_processor_id() << 3
781	 * stored in CONTEXT.
782	 */
783	uasm_i_dmfc0(p, ptr, C0_CONTEXT);
784	uasm_i_dsrl_safe(p, ptr, ptr, 23);
785# endif
786	UASM_i_LA_mostly(p, tmp, pgdc);
787	uasm_i_daddu(p, ptr, ptr, tmp);
788	uasm_i_dmfc0(p, tmp, C0_BADVADDR);
789	uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr);
790#else
791	UASM_i_LA_mostly(p, ptr, pgdc);
792	uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr);
793#endif
794
795	uasm_l_vmalloc_done(l, *p);
796
797	/* get pgd offset in bytes */
798	uasm_i_dsrl_safe(p, tmp, tmp, PGDIR_SHIFT - 3);
799
800	uasm_i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3);
801	uasm_i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */
802#ifndef __PAGETABLE_PMD_FOLDED
803	uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */
804	uasm_i_ld(p, ptr, 0, ptr); /* get pmd pointer */
805	uasm_i_dsrl_safe(p, tmp, tmp, PMD_SHIFT-3); /* get pmd offset in bytes */
806	uasm_i_andi(p, tmp, tmp, (PTRS_PER_PMD - 1)<<3);
807	uasm_i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */
808#endif
809}
810
811/*
812 * BVADDR is the faulting address, PTR is scratch.
813 * PTR will hold the pgd for vmalloc.
814 */
815static void __cpuinit
816build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
817			unsigned int bvaddr, unsigned int ptr,
818			enum vmalloc64_mode mode)
819{
820	long swpd = (long)swapper_pg_dir;
821	int single_insn_swpd;
822	int did_vmalloc_branch = 0;
823
824	single_insn_swpd = uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd);
825
826	uasm_l_vmalloc(l, *p);
827
828	if (mode != not_refill && check_for_high_segbits) {
829		if (single_insn_swpd) {
830			uasm_il_bltz(p, r, bvaddr, label_vmalloc_done);
831			uasm_i_lui(p, ptr, uasm_rel_hi(swpd));
832			did_vmalloc_branch = 1;
833			/* fall through */
834		} else {
835			uasm_il_bgez(p, r, bvaddr, label_large_segbits_fault);
836		}
837	}
838	if (!did_vmalloc_branch) {
839		if (uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd)) {
840			uasm_il_b(p, r, label_vmalloc_done);
841			uasm_i_lui(p, ptr, uasm_rel_hi(swpd));
842		} else {
843			UASM_i_LA_mostly(p, ptr, swpd);
844			uasm_il_b(p, r, label_vmalloc_done);
845			if (uasm_in_compat_space_p(swpd))
846				uasm_i_addiu(p, ptr, ptr, uasm_rel_lo(swpd));
847			else
848				uasm_i_daddiu(p, ptr, ptr, uasm_rel_lo(swpd));
849		}
850	}
851	if (mode != not_refill && check_for_high_segbits) {
852		uasm_l_large_segbits_fault(l, *p);
853		/*
854		 * We get here if we are an xsseg address, or if we are
855		 * an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary.
856		 *
857		 * Ignoring xsseg (assume disabled so would generate
858		 * (address errors?), the only remaining possibility
859		 * is the upper xuseg addresses.  On processors with
860		 * TLB_SEGBITS <= PGDIR_SHIFT+PGDIR_BITS, these
861		 * addresses would have taken an address error. We try
862		 * to mimic that here by taking a load/istream page
863		 * fault.
864		 */
865		UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0);
866		uasm_i_jr(p, ptr);
867
868		if (mode == refill_scratch) {
869			if (scratch_reg > 0)
870				UASM_i_MFC0(p, 1, 31, scratch_reg);
871			else
872				UASM_i_LW(p, 1, scratchpad_offset(0), 0);
873		} else {
874			uasm_i_nop(p);
875		}
876	}
877}
878
879#else /* !CONFIG_64BIT */
880
881/*
882 * TMP and PTR are scratch.
883 * TMP will be clobbered, PTR will hold the pgd entry.
884 */
885static void __cpuinit __maybe_unused
886build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
887{
888	long pgdc = (long)pgd_current;
889
890	/* PGD base in Context, ContextConfig allows PGDE computation in hardware */
891	if (cpu_has_pgdc_in_context) {
892		uasm_i_mfc0(p, ptr, C0_CONTEXT);
893	} else {
894		/* PGD base in ErrorEPC, as used as a scratch register */
895		if (cpu_has_pgdc_in_errorepc) {
896			uasm_i_mfc0(p, tmp, C0_BADVADDR);
897			uasm_i_mfc0(p, ptr, C0_ERROREPC);
898		} else {	/* PGD base in memory, array of per-cpu values */
899			/* 32 bit SMP has smp_processor_id() stored in CONTEXT. */
900#ifdef CONFIG_SMP
901#ifdef  CONFIG_MIPS_MT_SMTC
902			/*
903			 * SMTC uses TCBind value as "CPU" index
904			 */
905			uasm_i_mfc0(p, ptr, C0_TCBIND);
906			UASM_i_LA_mostly(p, tmp, pgdc);
907			uasm_i_srl(p, ptr, ptr, 19);
908#else
909			/*
910			 * smp_processor_id() << 3 is stored in CONTEXT.
911			 * - or ErrorEPC
912			 */
913#ifdef CONFIG_MIPS_TLB_SMPID_ERROREPC
914			uasm_i_mfc0(p, ptr, C0_ERROREPC);
915#else
916			uasm_i_mfc0(p, ptr, C0_CONTEXT);
917#endif
918			UASM_i_LA_mostly(p, tmp, pgdc);
919			uasm_i_srl(p, ptr, ptr, 23);
920#endif
921			uasm_i_addu(p, ptr, tmp, ptr);
922#else
923			UASM_i_LA_mostly(p, ptr, pgdc);
924#endif
925			uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */
926			uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr);
927		}
928
929		/* Extract pgd offset bits from tmp, insert into pgd base */
930		if (cpu_has_mips32r2) {
931			uasm_i_ext(p, tmp, tmp, PGDIR_SHIFT, (32-PGDIR_SHIFT));
932			uasm_i_ins(p, ptr, tmp, PGD_T_LOG2, (32-PGDIR_SHIFT));
933		} else {
934			uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */
935			uasm_i_sll(p, tmp, tmp, PGD_T_LOG2);
936			uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */
937		}
938	}
939}
940
941#endif /* !CONFIG_64BIT */
942
943static void __cpuinit build_adjust_context(u32 **p, unsigned int ctx)
944{
945	unsigned int shift = 4 - (PTE_T_LOG2 + 1) + PAGE_SHIFT - 12;
946	unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1);
947
948	switch (current_cpu_type()) {
949	case CPU_VR41XX:
950	case CPU_VR4111:
951	case CPU_VR4121:
952	case CPU_VR4122:
953	case CPU_VR4131:
954	case CPU_VR4181:
955	case CPU_VR4181A:
956	case CPU_VR4133:
957		shift += 2;
958		break;
959
960	default:
961		break;
962	}
963
964	if (shift)
965		UASM_i_SRL(p, ctx, ctx, shift);
966	uasm_i_andi(p, ctx, ctx, mask);
967}
968
969static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)
970{
971#ifndef CONFIG_64BIT
972	if (cpu_has_mips32r2) {
973		/* For MIPS32R2, PTE ptr offset is obtained from BadVAddr */
974		UASM_i_MFC0(p, tmp, C0_BADVADDR);
975		UASM_i_LW(p, ptr, 0, ptr);
976		UASM_i_EXT(p, tmp, tmp, PAGE_SHIFT+1, PGDIR_SHIFT-PAGE_SHIFT-1);
977		UASM_i_INS(p, ptr, tmp, PTE_T_LOG2+1, PGDIR_SHIFT-PAGE_SHIFT-1);
978	} else {
979#else /* CONFIG_64BIT */
980	{
981#endif /* CONFIG_64BIT */
982		switch (current_cpu_type()) {
983		case CPU_NEVADA:
984			UASM_i_LW(p, ptr, 0, ptr);
985			GET_CONTEXT(p, tmp); /* get context reg */
986			break;
987
988		default:
989			GET_CONTEXT(p, tmp); /* get context reg */
990			UASM_i_LW(p, ptr, 0, ptr);
991			break;
992		}
993
994		build_adjust_context(p, tmp);
995		UASM_i_ADDU(p, ptr, ptr, tmp); /* add in offset */
996	}
997}
998
999static void __cpuinit build_update_entries(u32 **p, unsigned int tmp,
1000					unsigned int ptep)
1001{
1002	/*
1003	 * 64bit address support (36bit on a 32bit CPU) in a 32bit
1004	 * Kernel is a special case. Only a few CPUs use it.
1005	 */
1006#ifdef CONFIG_64BIT_PHYS_ADDR
1007	if (cpu_has_64bits) {
1008		uasm_i_ld(p, tmp, 0, ptep); /* get even pte */
1009		uasm_i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */
1010		if (kernel_uses_smartmips_rixi) {
1011			UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_NO_EXEC));
1012			UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_NO_EXEC));
1013			UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC));
1014			UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */
1015			UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC));
1016		} else {
1017			uasm_i_dsrl_safe(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */
1018			UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */
1019			uasm_i_dsrl_safe(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */
1020		}
1021		UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */
1022	} else {
1023		int pte_off_even = sizeof(pte_t) / 2;
1024		int pte_off_odd = pte_off_even + sizeof(pte_t);
1025
1026		/* The pte entries are pre-shifted */
1027		uasm_i_lw(p, tmp, pte_off_even, ptep); /* get even pte */
1028		UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */
1029		uasm_i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */
1030		UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */
1031	}
1032#else
1033	UASM_i_LW(p, tmp, 0, ptep); /* get even pte */
1034	UASM_i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */
1035	if (r45k_bvahwbug())
1036		build_tlb_probe_entry(p);
1037	if (kernel_uses_smartmips_rixi) {
1038		UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_NO_EXEC));
1039		UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_NO_EXEC));
1040		UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC));
1041		if (r4k_250MHZhwbug())
1042			UASM_i_MTC0(p, 0, C0_ENTRYLO0);
1043		UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */
1044		UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC));
1045	} else {
1046		UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */
1047		if (r4k_250MHZhwbug())
1048			UASM_i_MTC0(p, 0, C0_ENTRYLO0);
1049		UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */
1050		UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */
1051		if (r45k_bvahwbug())
1052			uasm_i_mfc0(p, tmp, C0_INDEX);
1053	}
1054	if (r4k_250MHZhwbug())
1055		UASM_i_MTC0(p, 0, C0_ENTRYLO1);
1056	UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */
1057#endif
1058}
1059
1060struct mips_huge_tlb_info {
1061	int huge_pte;
1062	int restore_scratch;
1063};
1064
1065static struct mips_huge_tlb_info __cpuinit
1066build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
1067			       struct uasm_reloc **r, unsigned int tmp,
1068			       unsigned int ptr, int c0_scratch)
1069{
1070	struct mips_huge_tlb_info rv;
1071	unsigned int even, odd;
1072	int vmalloc_branch_delay_filled = 0;
1073	const int scratch = 1; /* Our extra working register */
1074
1075	rv.huge_pte = scratch;
1076	rv.restore_scratch = 0;
1077
1078	if (check_for_high_segbits) {
1079		UASM_i_MFC0(p, tmp, C0_BADVADDR);
1080
1081		if (pgd_reg != -1)
1082			UASM_i_MFC0(p, ptr, 31, pgd_reg);
1083		else
1084			UASM_i_MFC0(p, ptr, C0_CONTEXT);
1085
1086		if (c0_scratch >= 0)
1087			UASM_i_MTC0(p, scratch, 31, c0_scratch);
1088		else
1089			UASM_i_SW(p, scratch, scratchpad_offset(0), 0);
1090
1091		uasm_i_dsrl_safe(p, scratch, tmp,
1092				 PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3);
1093		uasm_il_bnez(p, r, scratch, label_vmalloc);
1094
1095		if (pgd_reg == -1) {
1096			vmalloc_branch_delay_filled = 1;
1097			/* Clear lower 23 bits of context. */
1098			uasm_i_dins(p, ptr, 0, 0, 23);
1099		}
1100	} else {
1101		if (pgd_reg != -1)
1102			UASM_i_MFC0(p, ptr, 31, pgd_reg);
1103		else
1104			UASM_i_MFC0(p, ptr, C0_CONTEXT);
1105
1106		UASM_i_MFC0(p, tmp, C0_BADVADDR);
1107
1108		if (c0_scratch >= 0)
1109			UASM_i_MTC0(p, scratch, 31, c0_scratch);
1110		else
1111			UASM_i_SW(p, scratch, scratchpad_offset(0), 0);
1112
1113		if (pgd_reg == -1)
1114			/* Clear lower 23 bits of context. */
1115			uasm_i_dins(p, ptr, 0, 0, 23);
1116
1117		uasm_il_bltz(p, r, tmp, label_vmalloc);
1118	}
1119
1120	if (pgd_reg == -1) {
1121		vmalloc_branch_delay_filled = 1;
1122		/* 1 0  1 0 1  << 6  xkphys cached */
1123		uasm_i_ori(p, ptr, ptr, 0x540);
1124		uasm_i_drotr(p, ptr, ptr, 11);
1125	}
1126
1127#ifdef __PAGETABLE_PMD_FOLDED
1128#define LOC_PTEP scratch
1129#else
1130#define LOC_PTEP ptr
1131#endif
1132
1133	if (!vmalloc_branch_delay_filled)
1134		/* get pgd offset in bytes */
1135		uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3);
1136
1137	uasm_l_vmalloc_done(l, *p);
1138
1139	/*
1140	 *                         tmp          ptr
1141	 * fall-through case =   badvaddr  *pgd_current
1142	 * vmalloc case      =   badvaddr  swapper_pg_dir
1143	 */
1144
1145	if (vmalloc_branch_delay_filled)
1146		/* get pgd offset in bytes */
1147		uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3);
1148
1149#ifdef __PAGETABLE_PMD_FOLDED
1150	GET_CONTEXT(p, tmp); /* get context reg */
1151#endif
1152	uasm_i_andi(p, scratch, scratch, (PTRS_PER_PGD - 1) << 3);
1153
1154	if (use_lwx_insns()) {
1155		UASM_i_LWX(p, LOC_PTEP, scratch, ptr);
1156	} else {
1157		uasm_i_daddu(p, ptr, ptr, scratch); /* add in pgd offset */
1158		uasm_i_ld(p, LOC_PTEP, 0, ptr); /* get pmd pointer */
1159	}
1160
1161#ifndef __PAGETABLE_PMD_FOLDED
1162	/* get pmd offset in bytes */
1163	uasm_i_dsrl_safe(p, scratch, tmp, PMD_SHIFT - 3);
1164	uasm_i_andi(p, scratch, scratch, (PTRS_PER_PMD - 1) << 3);
1165	GET_CONTEXT(p, tmp); /* get context reg */
1166
1167	if (use_lwx_insns()) {
1168		UASM_i_LWX(p, scratch, scratch, ptr);
1169	} else {
1170		uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */
1171		UASM_i_LW(p, scratch, 0, ptr);
1172	}
1173#endif
1174	/* Adjust the context during the load latency. */
1175	build_adjust_context(p, tmp);
1176
1177#ifdef CONFIG_HUGETLB_PAGE
1178	uasm_il_bbit1(p, r, scratch, ilog2(_PAGE_HUGE), label_tlb_huge_update);
1179	/*
1180	 * The in the LWX case we don't want to do the load in the
1181	 * delay slot.  It cannot issue in the same cycle and may be
1182	 * speculative and unneeded.
1183	 */
1184	if (use_lwx_insns())
1185		uasm_i_nop(p);
1186#endif /* CONFIG_HUGETLB_PAGE */
1187
1188
1189	/* build_update_entries */
1190	if (use_lwx_insns()) {
1191		even = ptr;
1192		odd = tmp;
1193		UASM_i_LWX(p, even, scratch, tmp);
1194		UASM_i_ADDIU(p, tmp, tmp, sizeof(pte_t));
1195		UASM_i_LWX(p, odd, scratch, tmp);
1196	} else {
1197		UASM_i_ADDU(p, ptr, scratch, tmp); /* add in offset */
1198		even = tmp;
1199		odd = ptr;
1200		UASM_i_LW(p, even, 0, ptr); /* get even pte */
1201		UASM_i_LW(p, odd, sizeof(pte_t), ptr); /* get odd pte */
1202	}
1203	if (kernel_uses_smartmips_rixi) {
1204		uasm_i_dsrl_safe(p, even, even, ilog2(_PAGE_NO_EXEC));
1205		uasm_i_dsrl_safe(p, odd, odd, ilog2(_PAGE_NO_EXEC));
1206		uasm_i_drotr(p, even, even,
1207			     ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC));
1208		UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */
1209		uasm_i_drotr(p, odd, odd,
1210			     ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC));
1211	} else {
1212		uasm_i_dsrl_safe(p, even, even, ilog2(_PAGE_GLOBAL));
1213		UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */
1214		uasm_i_dsrl_safe(p, odd, odd, ilog2(_PAGE_GLOBAL));
1215	}
1216	UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */
1217
1218	if (c0_scratch >= 0) {
1219		UASM_i_MFC0(p, scratch, 31, c0_scratch);
1220		build_tlb_write_entry(p, l, r, tlb_random);
1221		uasm_l_leave(l, *p);
1222		rv.restore_scratch = 1;
1223	} else if (PAGE_SHIFT == 14 || PAGE_SHIFT == 13)  {
1224		build_tlb_write_entry(p, l, r, tlb_random);
1225		uasm_l_leave(l, *p);
1226		UASM_i_LW(p, scratch, scratchpad_offset(0), 0);
1227	} else {
1228		UASM_i_LW(p, scratch, scratchpad_offset(0), 0);
1229		build_tlb_write_entry(p, l, r, tlb_random);
1230		uasm_l_leave(l, *p);
1231		rv.restore_scratch = 1;
1232	}
1233
1234	uasm_i_eret(p); /* return from trap */
1235
1236	return rv;
1237}
1238
1239/*
1240 * For a 64-bit kernel, we are using the 64-bit XTLB refill exception
1241 * because EXL == 0.  If we wrap, we can also use the 32 instruction
1242 * slots before the XTLB refill exception handler which belong to the
1243 * unused TLB refill exception.
1244 */
1245#define MIPS64_REFILL_INSNS 32
1246
1247static void __cpuinit build_r4000_tlb_refill_handler(void)
1248{
1249	u32 *p = tlb_handler;
1250	struct uasm_label *l = labels;
1251	struct uasm_reloc *r = relocs;
1252	u32 *f;
1253	unsigned int final_len;
1254	struct mips_huge_tlb_info htlb_info;
1255	enum vmalloc64_mode vmalloc_mode;
1256
1257	memset(tlb_handler, 0, sizeof(tlb_handler));
1258	memset(labels, 0, sizeof(labels));
1259	memset(relocs, 0, sizeof(relocs));
1260	memset(final_handler, 0, sizeof(final_handler));
1261
1262	if ((scratch_reg > 0 || scratchpad_available()) && use_bbit_insns()) {
1263		htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1,
1264							  scratch_reg);
1265		vmalloc_mode = refill_scratch;
1266	} else {
1267		htlb_info.huge_pte = K0;
1268		htlb_info.restore_scratch = 0;
1269		vmalloc_mode = refill_noscratch;
1270		/*
1271		 * create the plain linear handler
1272		 */
1273		if (bcm1250_m3_war()) {
1274			unsigned int segbits = 44;
1275
1276			uasm_i_dmfc0(&p, K0, C0_BADVADDR);
1277			uasm_i_dmfc0(&p, K1, C0_ENTRYHI);
1278			uasm_i_xor(&p, K0, K0, K1);
1279			uasm_i_dsrl_safe(&p, K1, K0, 62);
1280			uasm_i_dsrl_safe(&p, K0, K0, 12 + 1);
1281			uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits);
1282			uasm_i_or(&p, K0, K0, K1);
1283			uasm_il_bnez(&p, &r, K0, label_leave);
1284			/* No need for uasm_i_nop */
1285		}
1286
1287#ifdef CONFIG_64BIT
1288		build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */
1289#else
1290		build_get_pgde32(&p, K0, K1); /* get pgd in K1 */
1291#endif
1292
1293#ifdef CONFIG_HUGETLB_PAGE
1294		build_is_huge_pte(&p, &r, K0, K1, label_tlb_huge_update);
1295#endif
1296
1297		build_get_ptep(&p, K0, K1);
1298		build_update_entries(&p, K0, K1);
1299		build_tlb_write_entry(&p, &l, &r, tlb_random);
1300		uasm_l_leave(&l, p);
1301		uasm_i_eret(&p); /* return from trap */
1302	}
1303#ifdef CONFIG_HUGETLB_PAGE
1304	uasm_l_tlb_huge_update(&l, p);
1305	build_huge_update_entries(&p, htlb_info.huge_pte, K1);
1306	build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random,
1307				   htlb_info.restore_scratch);
1308#endif
1309
1310#ifdef CONFIG_64BIT
1311	build_get_pgd_vmalloc64(&p, &l, &r, K0, K1, vmalloc_mode);
1312#endif
1313
1314	/*
1315	 * Overflow check: For the 64bit handler, we need at least one
1316	 * free instruction slot for the wrap-around branch. In worst
1317	 * case, if the intended insertion point is a delay slot, we
1318	 * need three, with the second nop'ed and the third being
1319	 * unused.
1320	 */
1321	/* Loongson2 ebase is different than r4k, we have more space */
1322#if defined(CONFIG_32BIT) || defined(CONFIG_CPU_LOONGSON2)
1323	if ((p - tlb_handler) > 64)
1324		panic("TLB refill handler space exceeded");
1325#else
1326	if (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 1)
1327	    || (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 3)
1328		&& uasm_insn_has_bdelay(relocs,
1329					tlb_handler + MIPS64_REFILL_INSNS - 3)))
1330		panic("TLB refill handler space exceeded");
1331#endif
1332
1333	/*
1334	 * Now fold the handler in the TLB refill handler space.
1335	 */
1336#if defined(CONFIG_32BIT) || defined(CONFIG_CPU_LOONGSON2)
1337	f = final_handler;
1338	/* Simplest case, just copy the handler. */
1339	uasm_copy_handler(relocs, labels, tlb_handler, p, f);
1340	final_len = p - tlb_handler;
1341#else /* CONFIG_64BIT */
1342	f = final_handler + MIPS64_REFILL_INSNS;
1343	if ((p - tlb_handler) <= MIPS64_REFILL_INSNS) {
1344		/* Just copy the handler. */
1345		uasm_copy_handler(relocs, labels, tlb_handler, p, f);
1346		final_len = p - tlb_handler;
1347	} else {
1348#if defined(CONFIG_HUGETLB_PAGE)
1349		const enum label_id ls = label_tlb_huge_update;
1350#else
1351		const enum label_id ls = label_vmalloc;
1352#endif
1353		u32 *split;
1354		int ov = 0;
1355		int i;
1356
1357		for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++)
1358			;
1359		BUG_ON(i == ARRAY_SIZE(labels));
1360		split = labels[i].addr;
1361
1362		/*
1363		 * See if we have overflown one way or the other.
1364		 */
1365		if (split > tlb_handler + MIPS64_REFILL_INSNS ||
1366		    split < p - MIPS64_REFILL_INSNS)
1367			ov = 1;
1368
1369		if (ov) {
1370			/*
1371			 * Split two instructions before the end.  One
1372			 * for the branch and one for the instruction
1373			 * in the delay slot.
1374			 */
1375			split = tlb_handler + MIPS64_REFILL_INSNS - 2;
1376
1377			/*
1378			 * If the branch would fall in a delay slot,
1379			 * we must back up an additional instruction
1380			 * so that it is no longer in a delay slot.
1381			 */
1382			if (uasm_insn_has_bdelay(relocs, split - 1))
1383				split--;
1384		}
1385		/* Copy first part of the handler. */
1386		uasm_copy_handler(relocs, labels, tlb_handler, split, f);
1387		f += split - tlb_handler;
1388
1389		if (ov) {
1390			/* Insert branch. */
1391			uasm_l_split(&l, final_handler);
1392			uasm_il_b(&f, &r, label_split);
1393			if (uasm_insn_has_bdelay(relocs, split))
1394				uasm_i_nop(&f);
1395			else {
1396				uasm_copy_handler(relocs, labels,
1397						  split, split + 1, f);
1398				uasm_move_labels(labels, f, f + 1, -1);
1399				f++;
1400				split++;
1401			}
1402		}
1403
1404		/* Copy the rest of the handler. */
1405		uasm_copy_handler(relocs, labels, split, p, final_handler);
1406		final_len = (f - (final_handler + MIPS64_REFILL_INSNS)) +
1407			    (p - split);
1408	}
1409#endif /* CONFIG_64BIT */
1410
1411	uasm_resolve_relocs(relocs, labels);
1412	pr_debug("Wrote TLB refill handler (%u instructions).\n",
1413		 final_len);
1414
1415	memcpy((void *)ebase, final_handler, 0x100);
1416
1417	dump_handler((u32 *)ebase, 64);
1418}
1419
1420/*
1421 * 128 instructions for the fastpath handler is generous and should
1422 * never be exceeded.
1423 */
1424#define FASTPATH_SIZE 128
1425
1426u32 handle_tlbl[FASTPATH_SIZE] __cacheline_aligned;
1427u32 handle_tlbs[FASTPATH_SIZE] __cacheline_aligned;
1428u32 handle_tlbm[FASTPATH_SIZE] __cacheline_aligned;
1429#ifdef CONFIG_MIPS_PGD_C0_CONTEXT
1430u32 tlbmiss_handler_setup_pgd[16] __cacheline_aligned;
1431
1432static void __cpuinit build_r4000_setup_pgd(void)
1433{
1434	const int a0 = 4;
1435	const int a1 = 5;
1436	u32 *p = tlbmiss_handler_setup_pgd;
1437	struct uasm_label *l = labels;
1438	struct uasm_reloc *r = relocs;
1439
1440	memset(tlbmiss_handler_setup_pgd, 0, sizeof(tlbmiss_handler_setup_pgd));
1441	memset(labels, 0, sizeof(labels));
1442	memset(relocs, 0, sizeof(relocs));
1443
1444	pgd_reg = allocate_kscratch();
1445
1446	if (pgd_reg == -1) {
1447		/* PGD << 11 in c0_Context */
1448		/*
1449		 * If it is a ckseg0 address, convert to a physical
1450		 * address.  Shifting right by 29 and adding 4 will
1451		 * result in zero for these addresses.
1452		 *
1453		 */
1454		UASM_i_SRA(&p, a1, a0, 29);
1455		UASM_i_ADDIU(&p, a1, a1, 4);
1456		uasm_il_bnez(&p, &r, a1, label_tlbl_goaround1);
1457		uasm_i_nop(&p);
1458		uasm_i_dinsm(&p, a0, 0, 29, 64 - 29);
1459		uasm_l_tlbl_goaround1(&l, p);
1460		UASM_i_SLL(&p, a0, a0, 11);
1461		uasm_i_jr(&p, 31);
1462		UASM_i_MTC0(&p, a0, C0_CONTEXT);
1463	} else {
1464		/* PGD in c0_KScratch */
1465		uasm_i_jr(&p, 31);
1466		UASM_i_MTC0(&p, a0, 31, pgd_reg);
1467	}
1468	if (p - tlbmiss_handler_setup_pgd > ARRAY_SIZE(tlbmiss_handler_setup_pgd))
1469		panic("tlbmiss_handler_setup_pgd space exceeded");
1470	uasm_resolve_relocs(relocs, labels);
1471	pr_debug("Wrote tlbmiss_handler_setup_pgd (%u instructions).\n",
1472		 (unsigned int)(p - tlbmiss_handler_setup_pgd));
1473
1474	dump_handler(tlbmiss_handler_setup_pgd,
1475		     ARRAY_SIZE(tlbmiss_handler_setup_pgd));
1476}
1477#endif
1478
1479static void __cpuinit
1480iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr)
1481{
1482#ifdef CONFIG_SMP
1483# ifdef CONFIG_64BIT_PHYS_ADDR
1484	if (cpu_has_64bits)
1485		uasm_i_lld(p, pte, 0, ptr);
1486	else
1487# endif
1488		UASM_i_LL(p, pte, 0, ptr);
1489#else
1490# ifdef CONFIG_64BIT_PHYS_ADDR
1491	if (cpu_has_64bits)
1492		uasm_i_ld(p, pte, 0, ptr);
1493	else
1494# endif
1495		UASM_i_LW(p, pte, 0, ptr);
1496#endif
1497}
1498
1499static void __cpuinit
1500iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr,
1501	unsigned int mode)
1502{
1503#ifdef CONFIG_64BIT_PHYS_ADDR
1504	unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY);
1505#endif
1506
1507	uasm_i_ori(p, pte, pte, mode);
1508#ifdef CONFIG_SMP
1509# ifdef CONFIG_64BIT_PHYS_ADDR
1510	if (cpu_has_64bits)
1511		uasm_i_scd(p, pte, 0, ptr);
1512	else
1513# endif
1514		UASM_i_SC(p, pte, 0, ptr);
1515
1516	if (r10000_llsc_war())
1517		uasm_il_beqzl(p, r, pte, label_smp_pgtable_change);
1518	else
1519		uasm_il_beqz(p, r, pte, label_smp_pgtable_change);
1520
1521# ifdef CONFIG_64BIT_PHYS_ADDR
1522	if (!cpu_has_64bits) {
1523		/* no uasm_i_nop needed */
1524		uasm_i_ll(p, pte, sizeof(pte_t) / 2, ptr);
1525		uasm_i_ori(p, pte, pte, hwmode);
1526		uasm_i_sc(p, pte, sizeof(pte_t) / 2, ptr);
1527		uasm_il_beqz(p, r, pte, label_smp_pgtable_change);
1528		/* no uasm_i_nop needed */
1529		uasm_i_lw(p, pte, 0, ptr);
1530	} else
1531		uasm_i_nop(p);
1532# else
1533	uasm_i_nop(p);
1534# endif
1535#else
1536# ifdef CONFIG_64BIT_PHYS_ADDR
1537	if (cpu_has_64bits)
1538		uasm_i_sd(p, pte, 0, ptr);
1539	else
1540# endif
1541		UASM_i_SW(p, pte, 0, ptr);
1542
1543# ifdef CONFIG_64BIT_PHYS_ADDR
1544	if (!cpu_has_64bits) {
1545		uasm_i_lw(p, pte, sizeof(pte_t) / 2, ptr);
1546		uasm_i_ori(p, pte, pte, hwmode);
1547		uasm_i_sw(p, pte, sizeof(pte_t) / 2, ptr);
1548		uasm_i_lw(p, pte, 0, ptr);
1549	}
1550# endif
1551#endif
1552}
1553
1554/*
1555 * Check if PTE is present, if not then jump to LABEL. PTR points to
1556 * the page table where this PTE is located, PTE will be re-loaded
1557 * with it's original value.
1558 */
1559static void __cpuinit
1560build_pte_present(u32 **p, struct uasm_reloc **r,
1561		  int pte, int ptr, int scratch, enum label_id lid)
1562{
1563	int t = scratch >= 0 ? scratch : pte;
1564
1565	if (kernel_uses_smartmips_rixi) {
1566		if (use_bbit_insns()) {
1567			uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid);
1568			uasm_i_nop(p);
1569		} else {
1570			uasm_i_andi(p, t, pte, _PAGE_PRESENT);
1571			uasm_il_beqz(p, r, t, lid);
1572			if (pte == t)
1573				/* You lose the SMP race :-(*/
1574				iPTE_LW(p, pte, ptr);
1575		}
1576	} else {
1577		uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_READ);
1578		uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_READ);
1579		uasm_il_bnez(p, r, t, lid);
1580		if (pte == t)
1581			/* You lose the SMP race :-(*/
1582			iPTE_LW(p, pte, ptr);
1583	}
1584}
1585
1586/* Make PTE valid, store result in PTR. */
1587static void __cpuinit
1588build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte,
1589		 unsigned int ptr)
1590{
1591	unsigned int mode = _PAGE_VALID | _PAGE_ACCESSED;
1592
1593	iPTE_SW(p, r, pte, ptr, mode);
1594}
1595
1596/*
1597 * Check if PTE can be written to, if not branch to LABEL. Regardless
1598 * restore PTE with value from PTR when done.
1599 */
1600static void __cpuinit
1601build_pte_writable(u32 **p, struct uasm_reloc **r,
1602		   unsigned int pte, unsigned int ptr, int scratch,
1603		   enum label_id lid)
1604{
1605	int t = scratch >= 0 ? scratch : pte;
1606
1607	uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_WRITE);
1608	uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_WRITE);
1609	uasm_il_bnez(p, r, t, lid);
1610	if (pte == t)
1611		/* You lose the SMP race :-(*/
1612		iPTE_LW(p, pte, ptr);
1613	else
1614		uasm_i_nop(p);
1615}
1616
1617/* Make PTE writable, update software status bits as well, then store
1618 * at PTR.
1619 */
1620static void __cpuinit
1621build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte,
1622		 unsigned int ptr)
1623{
1624	unsigned int mode = (_PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID
1625			     | _PAGE_DIRTY);
1626
1627	iPTE_SW(p, r, pte, ptr, mode);
1628}
1629
1630/*
1631 * Check if PTE can be modified, if not branch to LABEL. Regardless
1632 * restore PTE with value from PTR when done.
1633 */
1634static void __cpuinit
1635build_pte_modifiable(u32 **p, struct uasm_reloc **r,
1636		     unsigned int pte, unsigned int ptr, int scratch,
1637		     enum label_id lid)
1638{
1639	if (use_bbit_insns()) {
1640		uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid);
1641		uasm_i_nop(p);
1642	} else {
1643		int t = scratch >= 0 ? scratch : pte;
1644		uasm_i_andi(p, t, pte, _PAGE_WRITE);
1645		uasm_il_beqz(p, r, t, lid);
1646		if (pte == t)
1647			/* You lose the SMP race :-(*/
1648			iPTE_LW(p, pte, ptr);
1649	}
1650}
1651
1652#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
1653
1654
1655/*
1656 * R3000 style TLB load/store/modify handlers.
1657 */
1658
1659/*
1660 * This places the pte into ENTRYLO0 and writes it with tlbwi.
1661 * Then it returns.
1662 */
1663static void __cpuinit
1664build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp)
1665{
1666	uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */
1667	uasm_i_mfc0(p, tmp, C0_EPC); /* cp0 delay */
1668	uasm_i_tlbwi(p);
1669	uasm_i_jr(p, tmp);
1670	uasm_i_rfe(p); /* branch delay */
1671}
1672
1673/*
1674 * This places the pte into ENTRYLO0 and writes it with tlbwi
1675 * or tlbwr as appropriate.  This is because the index register
1676 * may have the probe fail bit set as a result of a trap on a
1677 * kseg2 access, i.e. without refill.  Then it returns.
1678 */
1679static void __cpuinit
1680build_r3000_tlb_reload_write(u32 **p, struct uasm_label **l,
1681			     struct uasm_reloc **r, unsigned int pte,
1682			     unsigned int tmp)
1683{
1684	uasm_i_mfc0(p, tmp, C0_INDEX);
1685	uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */
1686	uasm_il_bltz(p, r, tmp, label_r3000_write_probe_fail); /* cp0 delay */
1687	uasm_i_mfc0(p, tmp, C0_EPC); /* branch delay */
1688	uasm_i_tlbwi(p); /* cp0 delay */
1689	uasm_i_jr(p, tmp);
1690	uasm_i_rfe(p); /* branch delay */
1691	uasm_l_r3000_write_probe_fail(l, *p);
1692	uasm_i_tlbwr(p); /* cp0 delay */
1693	uasm_i_jr(p, tmp);
1694	uasm_i_rfe(p); /* branch delay */
1695}
1696
1697static void __cpuinit
1698build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte,
1699				   unsigned int ptr)
1700{
1701	long pgdc = (long)pgd_current;
1702
1703	uasm_i_mfc0(p, pte, C0_BADVADDR);
1704	uasm_i_lui(p, ptr, uasm_rel_hi(pgdc)); /* cp0 delay */
1705	uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr);
1706	uasm_i_srl(p, pte, pte, 22); /* load delay */
1707	uasm_i_sll(p, pte, pte, 2);
1708	uasm_i_addu(p, ptr, ptr, pte);
1709	uasm_i_mfc0(p, pte, C0_CONTEXT);
1710	uasm_i_lw(p, ptr, 0, ptr); /* cp0 delay */
1711	uasm_i_andi(p, pte, pte, 0xffc); /* load delay */
1712	uasm_i_addu(p, ptr, ptr, pte);
1713	uasm_i_lw(p, pte, 0, ptr);
1714	uasm_i_tlbp(p); /* load delay */
1715}
1716
1717static void __cpuinit build_r3000_tlb_load_handler(void)
1718{
1719	u32 *p = handle_tlbl;
1720	struct uasm_label *l = labels;
1721	struct uasm_reloc *r = relocs;
1722
1723	memset(handle_tlbl, 0, sizeof(handle_tlbl));
1724	memset(labels, 0, sizeof(labels));
1725	memset(relocs, 0, sizeof(relocs));
1726
1727	build_r3000_tlbchange_handler_head(&p, K0, K1);
1728	build_pte_present(&p, &r, K0, K1, -1, label_nopage_tlbl);
1729	uasm_i_nop(&p); /* load delay */
1730	build_make_valid(&p, &r, K0, K1);
1731	build_r3000_tlb_reload_write(&p, &l, &r, K0, K1);
1732
1733	uasm_l_nopage_tlbl(&l, p);
1734	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff);
1735	uasm_i_nop(&p);
1736
1737	if ((p - handle_tlbl) > FASTPATH_SIZE)
1738		panic("TLB load handler fastpath space exceeded");
1739
1740	uasm_resolve_relocs(relocs, labels);
1741	pr_debug("Wrote TLB load handler fastpath (%u instructions).\n",
1742		 (unsigned int)(p - handle_tlbl));
1743
1744	dump_handler(handle_tlbl, ARRAY_SIZE(handle_tlbl));
1745}
1746
1747static void __cpuinit build_r3000_tlb_store_handler(void)
1748{
1749	u32 *p = handle_tlbs;
1750	struct uasm_label *l = labels;
1751	struct uasm_reloc *r = relocs;
1752
1753	memset(handle_tlbs, 0, sizeof(handle_tlbs));
1754	memset(labels, 0, sizeof(labels));
1755	memset(relocs, 0, sizeof(relocs));
1756
1757	build_r3000_tlbchange_handler_head(&p, K0, K1);
1758	build_pte_writable(&p, &r, K0, K1, -1, label_nopage_tlbs);
1759	uasm_i_nop(&p); /* load delay */
1760	build_make_write(&p, &r, K0, K1);
1761	build_r3000_tlb_reload_write(&p, &l, &r, K0, K1);
1762
1763	uasm_l_nopage_tlbs(&l, p);
1764	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
1765	uasm_i_nop(&p);
1766
1767	if ((p - handle_tlbs) > FASTPATH_SIZE)
1768		panic("TLB store handler fastpath space exceeded");
1769
1770	uasm_resolve_relocs(relocs, labels);
1771	pr_debug("Wrote TLB store handler fastpath (%u instructions).\n",
1772		 (unsigned int)(p - handle_tlbs));
1773
1774	dump_handler(handle_tlbs, ARRAY_SIZE(handle_tlbs));
1775}
1776
1777static void __cpuinit build_r3000_tlb_modify_handler(void)
1778{
1779	u32 *p = handle_tlbm;
1780	struct uasm_label *l = labels;
1781	struct uasm_reloc *r = relocs;
1782
1783	memset(handle_tlbm, 0, sizeof(handle_tlbm));
1784	memset(labels, 0, sizeof(labels));
1785	memset(relocs, 0, sizeof(relocs));
1786
1787	build_r3000_tlbchange_handler_head(&p, K0, K1);
1788	build_pte_modifiable(&p, &r, K0, K1,  -1, label_nopage_tlbm);
1789	uasm_i_nop(&p); /* load delay */
1790	build_make_write(&p, &r, K0, K1);
1791	build_r3000_pte_reload_tlbwi(&p, K0, K1);
1792
1793	uasm_l_nopage_tlbm(&l, p);
1794	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
1795	uasm_i_nop(&p);
1796
1797	if ((p - handle_tlbm) > FASTPATH_SIZE)
1798		panic("TLB modify handler fastpath space exceeded");
1799
1800	uasm_resolve_relocs(relocs, labels);
1801	pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n",
1802		 (unsigned int)(p - handle_tlbm));
1803
1804	dump_handler(handle_tlbm, ARRAY_SIZE(handle_tlbm));
1805}
1806#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
1807
1808/*
1809 * R4000 style TLB load/store/modify handlers.
1810 */
1811static struct work_registers __cpuinit
1812build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l,
1813				   struct uasm_reloc **r)
1814{
1815	struct work_registers wr = build_get_work_registers(p);
1816
1817#ifdef CONFIG_64BIT
1818	build_get_pmde64(p, l, r, wr.r1, wr.r2); /* get pmd in ptr */
1819#else
1820	build_get_pgde32(p, wr.r1, wr.r2); /* get pgd in ptr */
1821#endif
1822
1823#ifdef CONFIG_HUGETLB_PAGE
1824	/*
1825	 * For huge tlb entries, pmd doesn't contain an address but
1826	 * instead contains the tlb pte. Check the PAGE_HUGE bit and
1827	 * see if we need to jump to huge tlb processing.
1828	 */
1829	build_is_huge_pte(p, r, wr.r1, wr.r2, label_tlb_huge_update);
1830#endif
1831
1832	UASM_i_MFC0(p, wr.r1, C0_BADVADDR);
1833	UASM_i_LW(p, wr.r2, 0, wr.r2);
1834	UASM_i_SRL(p, wr.r1, wr.r1, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2);
1835	uasm_i_andi(p, wr.r1, wr.r1, (PTRS_PER_PTE - 1) << PTE_T_LOG2);
1836	UASM_i_ADDU(p, wr.r2, wr.r2, wr.r1);
1837
1838#ifdef CONFIG_SMP
1839	uasm_l_smp_pgtable_change(l, *p);
1840#endif
1841	iPTE_LW(p, wr.r1, wr.r2); /* get even pte */
1842	if (!m4kc_tlbp_war())
1843		build_tlb_probe_entry(p);
1844	return wr;
1845}
1846
1847static void __cpuinit
1848build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l,
1849				   struct uasm_reloc **r, unsigned int tmp,
1850				   unsigned int ptr)
1851{
1852	uasm_i_ori(p, ptr, ptr, sizeof(pte_t));
1853	uasm_i_xori(p, ptr, ptr, sizeof(pte_t));
1854	build_update_entries(p, tmp, ptr);
1855	build_tlb_write_entry(p, l, r, tlb_indexed);
1856	uasm_l_leave(l, *p);
1857	build_restore_work_registers(p);
1858	uasm_i_eret(p); /* return from trap */
1859
1860#ifdef CONFIG_64BIT
1861	build_get_pgd_vmalloc64(p, l, r, tmp, ptr, not_refill);
1862#endif
1863}
1864
1865static void __cpuinit build_r4000_tlb_load_handler(void)
1866{
1867	u32 *p = handle_tlbl;
1868	struct uasm_label *l = labels;
1869	struct uasm_reloc *r = relocs;
1870	struct work_registers wr;
1871
1872	memset(handle_tlbl, 0, sizeof(handle_tlbl));
1873	memset(labels, 0, sizeof(labels));
1874	memset(relocs, 0, sizeof(relocs));
1875
1876	if (bcm1250_m3_war()) {
1877		unsigned int segbits = 44;
1878
1879		uasm_i_dmfc0(&p, K0, C0_BADVADDR);
1880		uasm_i_dmfc0(&p, K1, C0_ENTRYHI);
1881		uasm_i_xor(&p, K0, K0, K1);
1882		uasm_i_dsrl_safe(&p, K1, K0, 62);
1883		uasm_i_dsrl_safe(&p, K0, K0, 12 + 1);
1884		uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits);
1885		uasm_i_or(&p, K0, K0, K1);
1886		uasm_il_bnez(&p, &r, K0, label_leave);
1887		/* No need for uasm_i_nop */
1888	}
1889
1890	wr = build_r4000_tlbchange_handler_head(&p, &l, &r);
1891	build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl);
1892	if (m4kc_tlbp_war())
1893		build_tlb_probe_entry(&p);
1894
1895	if (kernel_uses_smartmips_rixi) {
1896		/*
1897		 * If the page is not _PAGE_VALID, RI or XI could not
1898		 * have triggered it.  Skip the expensive test..
1899		 */
1900		if (use_bbit_insns()) {
1901			uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID),
1902				      label_tlbl_goaround1);
1903		} else {
1904			uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID);
1905			uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround1);
1906		}
1907		uasm_i_nop(&p);
1908
1909		uasm_i_tlbr(&p);
1910		/* Examine  entrylo 0 or 1 based on ptr. */
1911		if (use_bbit_insns()) {
1912			uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8);
1913		} else {
1914			uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t));
1915			uasm_i_beqz(&p, wr.r3, 8);
1916		}
1917		/* load it in the delay slot*/
1918		UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0);
1919		/* load it if ptr is odd */
1920		UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1);
1921		/*
1922		 * If the entryLo (now in wr.r3) is valid (bit 1), RI or
1923		 * XI must have triggered it.
1924		 */
1925		if (use_bbit_insns()) {
1926			uasm_il_bbit1(&p, &r, wr.r3, 1, label_nopage_tlbl);
1927			uasm_i_nop(&p);
1928			uasm_l_tlbl_goaround1(&l, p);
1929		} else {
1930			uasm_i_andi(&p, wr.r3, wr.r3, 2);
1931			uasm_il_bnez(&p, &r, wr.r3, label_nopage_tlbl);
1932			uasm_i_nop(&p);
1933		}
1934		uasm_l_tlbl_goaround1(&l, p);
1935	}
1936	build_make_valid(&p, &r, wr.r1, wr.r2);
1937	build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
1938
1939#ifdef CONFIG_HUGETLB_PAGE
1940	/*
1941	 * This is the entry point when build_r4000_tlbchange_handler_head
1942	 * spots a huge page.
1943	 */
1944	uasm_l_tlb_huge_update(&l, p);
1945	iPTE_LW(&p, wr.r1, wr.r2);
1946	build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl);
1947	build_tlb_probe_entry(&p);
1948
1949	if (kernel_uses_smartmips_rixi) {
1950		/*
1951		 * If the page is not _PAGE_VALID, RI or XI could not
1952		 * have triggered it.  Skip the expensive test..
1953		 */
1954		if (use_bbit_insns()) {
1955			uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID),
1956				      label_tlbl_goaround2);
1957		} else {
1958			uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID);
1959			uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2);
1960		}
1961		uasm_i_nop(&p);
1962
1963		uasm_i_tlbr(&p);
1964		/* Examine  entrylo 0 or 1 based on ptr. */
1965		if (use_bbit_insns()) {
1966			uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8);
1967		} else {
1968			uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t));
1969			uasm_i_beqz(&p, wr.r3, 8);
1970		}
1971		/* load it in the delay slot*/
1972		UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0);
1973		/* load it if ptr is odd */
1974		UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1);
1975		/*
1976		 * If the entryLo (now in wr.r3) is valid (bit 1), RI or
1977		 * XI must have triggered it.
1978		 */
1979		if (use_bbit_insns()) {
1980			uasm_il_bbit0(&p, &r, wr.r3, 1, label_tlbl_goaround2);
1981		} else {
1982			uasm_i_andi(&p, wr.r3, wr.r3, 2);
1983			uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2);
1984		}
1985
1986		/*
1987		 * We clobbered C0_PAGEMASK, restore it.  On the other branch
1988		 * it is restored in build_huge_tlb_write_entry.
1989		 */
1990		build_restore_pagemask(&p, &r, wr.r3, label_nopage_tlbl, 0);
1991
1992		uasm_l_tlbl_goaround2(&l, p);
1993	}
1994	uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID));
1995	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
1996#endif
1997
1998	uasm_l_nopage_tlbl(&l, p);
1999	build_restore_work_registers(&p);
2000#ifdef CONFIG_CPU_MICROMIPS
2001	if ((unsigned long)tlb_do_page_fault_0 & 1) {
2002		uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_0));
2003		uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_0));
2004		uasm_i_jr(&p, K0);
2005	} else
2006#endif
2007		uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff);
2008	uasm_i_nop(&p);
2009
2010	if ((p - handle_tlbl) > FASTPATH_SIZE)
2011		panic("TLB load handler fastpath space exceeded");
2012
2013	uasm_resolve_relocs(relocs, labels);
2014	pr_debug("Wrote TLB load handler fastpath (%u instructions).\n",
2015		 (unsigned int)(p - handle_tlbl));
2016
2017	dump_handler(handle_tlbl, ARRAY_SIZE(handle_tlbl));
2018}
2019
2020static void __cpuinit build_r4000_tlb_store_handler(void)
2021{
2022	u32 *p = handle_tlbs;
2023	struct uasm_label *l = labels;
2024	struct uasm_reloc *r = relocs;
2025	struct work_registers wr;
2026
2027	memset(handle_tlbs, 0, sizeof(handle_tlbs));
2028	memset(labels, 0, sizeof(labels));
2029	memset(relocs, 0, sizeof(relocs));
2030
2031	wr = build_r4000_tlbchange_handler_head(&p, &l, &r);
2032	build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs);
2033	if (m4kc_tlbp_war())
2034		build_tlb_probe_entry(&p);
2035	build_make_write(&p, &r, wr.r1, wr.r2);
2036	build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
2037
2038#ifdef CONFIG_HUGETLB_PAGE
2039	/*
2040	 * This is the entry point when
2041	 * build_r4000_tlbchange_handler_head spots a huge page.
2042	 */
2043	uasm_l_tlb_huge_update(&l, p);
2044	iPTE_LW(&p, wr.r1, wr.r2);
2045	build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs);
2046	build_tlb_probe_entry(&p);
2047	uasm_i_ori(&p, wr.r1, wr.r1,
2048		   _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
2049	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
2050#endif
2051
2052	uasm_l_nopage_tlbs(&l, p);
2053	build_restore_work_registers(&p);
2054#ifdef CONFIG_CPU_MICROMIPS
2055	if ((unsigned long)tlb_do_page_fault_1 & 1) {
2056		uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1));
2057		uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1));
2058		uasm_i_jr(&p, K0);
2059	} else
2060#endif
2061		uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
2062	uasm_i_nop(&p);
2063
2064	if ((p - handle_tlbs) > FASTPATH_SIZE)
2065		panic("TLB store handler fastpath space exceeded");
2066
2067	uasm_resolve_relocs(relocs, labels);
2068	pr_debug("Wrote TLB store handler fastpath (%u instructions).\n",
2069		 (unsigned int)(p - handle_tlbs));
2070
2071	dump_handler(handle_tlbs, ARRAY_SIZE(handle_tlbs));
2072}
2073
2074static void __cpuinit build_r4000_tlb_modify_handler(void)
2075{
2076	u32 *p = handle_tlbm;
2077	struct uasm_label *l = labels;
2078	struct uasm_reloc *r = relocs;
2079	struct work_registers wr;
2080
2081	memset(handle_tlbm, 0, sizeof(handle_tlbm));
2082	memset(labels, 0, sizeof(labels));
2083	memset(relocs, 0, sizeof(relocs));
2084
2085	wr = build_r4000_tlbchange_handler_head(&p, &l, &r);
2086	build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm);
2087	if (m4kc_tlbp_war())
2088		build_tlb_probe_entry(&p);
2089	/* Present and writable bits set, set accessed and dirty bits. */
2090	build_make_write(&p, &r, wr.r1, wr.r2);
2091	build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
2092
2093#ifdef CONFIG_HUGETLB_PAGE
2094	/*
2095	 * This is the entry point when
2096	 * build_r4000_tlbchange_handler_head spots a huge page.
2097	 */
2098	uasm_l_tlb_huge_update(&l, p);
2099	iPTE_LW(&p, wr.r1, wr.r2);
2100	build_pte_modifiable(&p, &r, wr.r1, wr.r2,  wr.r3, label_nopage_tlbm);
2101	build_tlb_probe_entry(&p);
2102	uasm_i_ori(&p, wr.r1, wr.r1,
2103		   _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
2104	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
2105#endif
2106
2107	uasm_l_nopage_tlbm(&l, p);
2108	build_restore_work_registers(&p);
2109#ifdef CONFIG_CPU_MICROMIPS
2110	if ((unsigned long)tlb_do_page_fault_1 & 1) {
2111		uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1));
2112		uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1));
2113		uasm_i_jr(&p, K0);
2114	} else
2115#endif
2116		uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
2117	uasm_i_nop(&p);
2118
2119	if ((p - handle_tlbm) > FASTPATH_SIZE)
2120		panic("TLB modify handler fastpath space exceeded");
2121
2122	uasm_resolve_relocs(relocs, labels);
2123	pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n",
2124		 (unsigned int)(p - handle_tlbm));
2125
2126	dump_handler(handle_tlbm, ARRAY_SIZE(handle_tlbm));
2127}
2128
2129void __cpuinit build_tlb_refill_handler(void)
2130{
2131	/*
2132	 * The refill handler is generated per-CPU, multi-node systems
2133	 * may have local storage for it. The other handlers are only
2134	 * needed once.
2135	 */
2136	static int run_once = 0;
2137
2138#ifdef CONFIG_64BIT
2139	check_for_high_segbits = current_cpu_data.vmbits > (PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3);
2140#endif
2141
2142	switch (current_cpu_type()) {
2143	case CPU_R2000:
2144	case CPU_R3000:
2145	case CPU_R3000A:
2146	case CPU_R3081E:
2147	case CPU_TX3912:
2148	case CPU_TX3922:
2149	case CPU_TX3927:
2150#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
2151		build_r3000_tlb_refill_handler();
2152		if (!run_once) {
2153			build_r3000_tlb_load_handler();
2154			build_r3000_tlb_store_handler();
2155			build_r3000_tlb_modify_handler();
2156			run_once++;
2157		}
2158#else
2159		panic("No R3000 TLB refill handler");
2160#endif
2161		break;
2162
2163	case CPU_R6000:
2164	case CPU_R6000A:
2165		panic("No R6000 TLB refill handler yet");
2166		break;
2167
2168	case CPU_R8000:
2169		panic("No R8000 TLB refill handler yet");
2170		break;
2171
2172	default:
2173		if (!run_once) {
2174			scratch_reg = allocate_kscratch();
2175#ifdef CONFIG_MIPS_PGD_C0_CONTEXT
2176			build_r4000_setup_pgd();
2177#endif
2178			build_r4000_tlb_load_handler();
2179			build_r4000_tlb_store_handler();
2180			build_r4000_tlb_modify_handler();
2181			run_once++;
2182		}
2183		build_r4000_tlb_refill_handler();
2184	}
2185}
2186
2187void __cpuinit flush_tlb_handlers(void)
2188{
2189	local_flush_icache_range((unsigned long)handle_tlbl,
2190			   (unsigned long)handle_tlbl + sizeof(handle_tlbl));
2191	local_flush_icache_range((unsigned long)handle_tlbs,
2192			   (unsigned long)handle_tlbs + sizeof(handle_tlbs));
2193	local_flush_icache_range((unsigned long)handle_tlbm,
2194			   (unsigned long)handle_tlbm + sizeof(handle_tlbm));
2195#ifdef CONFIG_MIPS_PGD_C0_CONTEXT
2196	local_flush_icache_range((unsigned long)tlbmiss_handler_setup_pgd,
2197			   (unsigned long)tlbmiss_handler_setup_pgd + sizeof(handle_tlbm));
2198#endif
2199}
2200