1/*
2 * This file contains miscellaneous low-level functions.
3 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *
5 * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
6 * and Paul Mackerras.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 */
14
15#include <linux/sys.h>
16#include <asm/unistd.h>
17#include <asm/errno.h>
18#include <asm/processor.h>
19#include <asm/page.h>
20#include <asm/cache.h>
21#include <asm/cputable.h>
22#include <asm/mmu.h>
23#include <asm/ppc_asm.h>
24#include <asm/thread_info.h>
25#include <asm/asm-offsets.h>
26
27#ifdef CONFIG_8xx
28#define ISYNC_8xx isync
29#else
30#define ISYNC_8xx
31#endif
32	.text
33
34	.align	5
35_GLOBAL(__delay)
36	cmpwi	0,r3,0
37	mtctr	r3
38	beqlr
391:	bdnz	1b
40	blr
41
42/*
43 * Returns (address we're running at) - (address we were linked at)
44 * for use before the text and data are mapped to KERNELBASE.
45 */
46_GLOBAL(reloc_offset)
47	mflr	r0
48	bl	1f
491:	mflr	r3
50	lis	r4,1b@ha
51	addi	r4,r4,1b@l
52	subf	r3,r4,r3
53	mtlr	r0
54	blr
55
56/*
57 * add_reloc_offset(x) returns x + reloc_offset().
58 */
59_GLOBAL(add_reloc_offset)
60	mflr	r0
61	bl	1f
621:	mflr	r5
63	lis	r4,1b@ha
64	addi	r4,r4,1b@l
65	subf	r5,r4,r5
66	add	r3,r3,r5
67	mtlr	r0
68	blr
69
70/*
71 * sub_reloc_offset(x) returns x - reloc_offset().
72 */
73_GLOBAL(sub_reloc_offset)
74	mflr	r0
75	bl	1f
761:	mflr	r5
77	lis	r4,1b@ha
78	addi	r4,r4,1b@l
79	subf	r5,r4,r5
80	subf	r3,r5,r3
81	mtlr	r0
82	blr
83
84/*
85 * reloc_got2 runs through the .got2 section adding an offset
86 * to each entry.
87 */
88_GLOBAL(reloc_got2)
89	mflr	r11
90	lis	r7,__got2_start@ha
91	addi	r7,r7,__got2_start@l
92	lis	r8,__got2_end@ha
93	addi	r8,r8,__got2_end@l
94	subf	r8,r7,r8
95	srwi.	r8,r8,2
96	beqlr
97	mtctr	r8
98	bl	1f
991:	mflr	r0
100	lis	r4,1b@ha
101	addi	r4,r4,1b@l
102	subf	r0,r4,r0
103	add	r7,r0,r7
1042:	lwz	r0,0(r7)
105	add	r0,r0,r3
106	stw	r0,0(r7)
107	addi	r7,r7,4
108	bdnz	2b
109	mtlr	r11
110	blr
111
112/*
113 * call_setup_cpu - call the setup_cpu function for this cpu
114 * r3 = data offset, r24 = cpu number
115 *
116 * Setup function is called with:
117 *   r3 = data offset
118 *   r4 = ptr to CPU spec (relocated)
119 */
120_GLOBAL(call_setup_cpu)
121	addis	r4,r3,cur_cpu_spec@ha
122	addi	r4,r4,cur_cpu_spec@l
123	lwz	r4,0(r4)
124	add	r4,r4,r3
125	lwz	r5,CPU_SPEC_SETUP(r4)
126	cmpi	0,r5,0
127	add	r5,r5,r3
128	beqlr
129	mtctr	r5
130	bctr
131
132/*
133 * complement mask on the msr then "or" some values on.
134 *     _nmask_and_or_msr(nmask, value_to_or)
135 */
136_GLOBAL(_nmask_and_or_msr)
137	mfmsr	r0		/* Get current msr */
138	andc	r0,r0,r3	/* And off the bits set in r3 (first parm) */
139	or	r0,r0,r4	/* Or on the bits in r4 (second parm) */
140	SYNC			/* Some chip revs have problems here... */
141	mtmsr	r0		/* Update machine state */
142	isync
143	blr			/* Done */
144
145
146/*
147 * Flush MMU TLB
148 */
149_GLOBAL(_tlbia)
150#if defined(CONFIG_40x)
151	sync			/* Flush to memory before changing mapping */
152	tlbia
153	isync			/* Flush shadow TLB */
154#elif defined(CONFIG_44x)
155	li	r3,0
156	sync
157
158	/* Load high watermark */
159	lis	r4,tlb_44x_hwater@ha
160	lwz	r5,tlb_44x_hwater@l(r4)
161
1621:	tlbwe	r3,r3,PPC44x_TLB_PAGEID
163	addi	r3,r3,1
164	cmpw	0,r3,r5
165	ble	1b
166
167	isync
168#elif defined(CONFIG_FSL_BOOKE)
169	/* Invalidate all entries in TLB0 */
170	li	r3, 0x04
171	tlbivax	0,3
172	/* Invalidate all entries in TLB1 */
173	li	r3, 0x0c
174	tlbivax	0,3
175	/* Invalidate all entries in TLB2 */
176	li	r3, 0x14
177	tlbivax	0,3
178	/* Invalidate all entries in TLB3 */
179	li	r3, 0x1c
180	tlbivax	0,3
181	msync
182#ifdef CONFIG_SMP
183	tlbsync
184#endif /* CONFIG_SMP */
185#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */
186#if defined(CONFIG_SMP)
187	rlwinm	r8,r1,0,0,18
188	lwz	r8,TI_CPU(r8)
189	oris	r8,r8,10
190	mfmsr	r10
191	SYNC
192	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
193	rlwinm	r0,r0,0,28,26		/* clear DR */
194	mtmsr	r0
195	SYNC_601
196	isync
197	lis	r9,mmu_hash_lock@h
198	ori	r9,r9,mmu_hash_lock@l
199	tophys(r9,r9)
20010:	lwarx	r7,0,r9
201	cmpwi	0,r7,0
202	bne-	10b
203	stwcx.	r8,0,r9
204	bne-	10b
205	sync
206	tlbia
207	sync
208	TLBSYNC
209	li	r0,0
210	stw	r0,0(r9)		/* clear mmu_hash_lock */
211	mtmsr	r10
212	SYNC_601
213	isync
214#else /* CONFIG_SMP */
215	sync
216	tlbia
217	sync
218#endif /* CONFIG_SMP */
219#endif /* ! defined(CONFIG_40x) */
220	blr
221
222/*
223 * Flush MMU TLB for a particular address
224 */
225_GLOBAL(_tlbie)
226#if defined(CONFIG_40x)
227	tlbsx.	r3, 0, r3
228	bne	10f
229	sync
230	/* There are only 64 TLB entries, so r3 < 64, which means bit 25 is clear.
231	 * Since 25 is the V bit in the TLB_TAG, loading this value will invalidate
232	 * the TLB entry. */
233	tlbwe	r3, r3, TLB_TAG
234	isync
23510:
236#elif defined(CONFIG_44x)
237	mfspr	r4,SPRN_MMUCR
238	mfspr	r5,SPRN_PID			/* Get PID */
239	rlwimi	r4,r5,0,24,31			/* Set TID */
240	mtspr	SPRN_MMUCR,r4
241
242	tlbsx.	r3, 0, r3
243	bne	10f
244	sync
245	/* There are only 64 TLB entries, so r3 < 64,
246	 * which means bit 22, is clear.  Since 22 is
247	 * the V bit in the TLB_PAGEID, loading this
248	 * value will invalidate the TLB entry.
249	 */
250	tlbwe	r3, r3, PPC44x_TLB_PAGEID
251	isync
25210:
253#elif defined(CONFIG_FSL_BOOKE)
254	rlwinm	r4, r3, 0, 0, 19
255	ori	r5, r4, 0x08	/* TLBSEL = 1 */
256	ori	r6, r4, 0x10	/* TLBSEL = 2 */
257	ori	r7, r4, 0x18	/* TLBSEL = 3 */
258	tlbivax	0, r4
259	tlbivax	0, r5
260	tlbivax	0, r6
261	tlbivax	0, r7
262	msync
263#if defined(CONFIG_SMP)
264	tlbsync
265#endif /* CONFIG_SMP */
266#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */
267#if defined(CONFIG_SMP)
268	rlwinm	r8,r1,0,0,18
269	lwz	r8,TI_CPU(r8)
270	oris	r8,r8,11
271	mfmsr	r10
272	SYNC
273	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
274	rlwinm	r0,r0,0,28,26		/* clear DR */
275	mtmsr	r0
276	SYNC_601
277	isync
278	lis	r9,mmu_hash_lock@h
279	ori	r9,r9,mmu_hash_lock@l
280	tophys(r9,r9)
28110:	lwarx	r7,0,r9
282	cmpwi	0,r7,0
283	bne-	10b
284	stwcx.	r8,0,r9
285	bne-	10b
286	eieio
287	tlbie	r3
288	sync
289	TLBSYNC
290	li	r0,0
291	stw	r0,0(r9)		/* clear mmu_hash_lock */
292	mtmsr	r10
293	SYNC_601
294	isync
295#else /* CONFIG_SMP */
296	tlbie	r3
297	sync
298#endif /* CONFIG_SMP */
299#endif /* ! CONFIG_40x */
300	blr
301
302/*
303 * Flush instruction cache.
304 * This is a no-op on the 601.
305 */
306_GLOBAL(flush_instruction_cache)
307#if defined(CONFIG_8xx)
308	isync
309	lis	r5, IDC_INVALL@h
310	mtspr	SPRN_IC_CST, r5
311#elif defined(CONFIG_4xx)
312#ifdef CONFIG_403GCX
313	li      r3, 512
314	mtctr   r3
315	lis     r4, KERNELBASE@h
3161:	iccci   0, r4
317	addi    r4, r4, 16
318	bdnz    1b
319#else
320	lis	r3, KERNELBASE@h
321	iccci	0,r3
322#endif
323#elif CONFIG_FSL_BOOKE
324BEGIN_FTR_SECTION
325	mfspr   r3,SPRN_L1CSR0
326	ori     r3,r3,L1CSR0_CFI|L1CSR0_CLFC
327	/* msync; isync recommended here */
328	mtspr   SPRN_L1CSR0,r3
329	isync
330	blr
331END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
332	mfspr	r3,SPRN_L1CSR1
333	ori	r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
334	mtspr	SPRN_L1CSR1,r3
335#else
336	mfspr	r3,SPRN_PVR
337	rlwinm	r3,r3,16,16,31
338	cmpwi	0,r3,1
339	beqlr			/* for 601, do nothing */
340	/* 603/604 processor - use invalidate-all bit in HID0 */
341	mfspr	r3,SPRN_HID0
342	ori	r3,r3,HID0_ICFI
343	mtspr	SPRN_HID0,r3
344#endif /* CONFIG_8xx/4xx */
345	isync
346	blr
347
348/*
349 * Write any modified data cache blocks out to memory
350 * and invalidate the corresponding instruction cache blocks.
351 * This is a no-op on the 601.
352 *
353 * __flush_icache_range(unsigned long start, unsigned long stop)
354 */
355_GLOBAL(__flush_icache_range)
356BEGIN_FTR_SECTION
357	blr				/* for 601, do nothing */
358END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
359	li	r5,L1_CACHE_BYTES-1
360	andc	r3,r3,r5
361	subf	r4,r3,r4
362	add	r4,r4,r5
363	srwi.	r4,r4,L1_CACHE_SHIFT
364	beqlr
365	mtctr	r4
366	mr	r6,r3
3671:	dcbst	0,r3
368	addi	r3,r3,L1_CACHE_BYTES
369	bdnz	1b
370	sync				/* wait for dcbst's to get to ram */
371	mtctr	r4
3722:	icbi	0,r6
373	addi	r6,r6,L1_CACHE_BYTES
374	bdnz	2b
375	sync				/* additional sync needed on g4 */
376	isync
377	blr
378/*
379 * Write any modified data cache blocks out to memory.
380 * Does not invalidate the corresponding cache lines (especially for
381 * any corresponding instruction cache).
382 *
383 * clean_dcache_range(unsigned long start, unsigned long stop)
384 */
385_GLOBAL(clean_dcache_range)
386	li	r5,L1_CACHE_BYTES-1
387	andc	r3,r3,r5
388	subf	r4,r3,r4
389	add	r4,r4,r5
390	srwi.	r4,r4,L1_CACHE_SHIFT
391	beqlr
392	mtctr	r4
393
3941:	dcbst	0,r3
395	addi	r3,r3,L1_CACHE_BYTES
396	bdnz	1b
397	sync				/* wait for dcbst's to get to ram */
398	blr
399
400/*
401 * Write any modified data cache blocks out to memory and invalidate them.
402 * Does not invalidate the corresponding instruction cache blocks.
403 *
404 * flush_dcache_range(unsigned long start, unsigned long stop)
405 */
406_GLOBAL(flush_dcache_range)
407	li	r5,L1_CACHE_BYTES-1
408	andc	r3,r3,r5
409	subf	r4,r3,r4
410	add	r4,r4,r5
411	srwi.	r4,r4,L1_CACHE_SHIFT
412	beqlr
413	mtctr	r4
414
4151:	dcbf	0,r3
416	addi	r3,r3,L1_CACHE_BYTES
417	bdnz	1b
418	sync				/* wait for dcbst's to get to ram */
419	blr
420
421/*
422 * Like above, but invalidate the D-cache.  This is used by the 8xx
423 * to invalidate the cache so the PPC core doesn't get stale data
424 * from the CPM (no cache snooping here :-).
425 *
426 * invalidate_dcache_range(unsigned long start, unsigned long stop)
427 */
428_GLOBAL(invalidate_dcache_range)
429	li	r5,L1_CACHE_BYTES-1
430	andc	r3,r3,r5
431	subf	r4,r3,r4
432	add	r4,r4,r5
433	srwi.	r4,r4,L1_CACHE_SHIFT
434	beqlr
435	mtctr	r4
436
4371:	dcbi	0,r3
438	addi	r3,r3,L1_CACHE_BYTES
439	bdnz	1b
440	sync				/* wait for dcbi's to get to ram */
441	blr
442
443#ifdef CONFIG_NOT_COHERENT_CACHE
444/*
445 * 40x cores have 8K or 16K dcache and 32 byte line size.
446 * 44x has a 32K dcache and 32 byte line size.
447 * 8xx has 1, 2, 4, 8K variants.
448 * For now, cover the worst case of the 44x.
449 * Must be called with external interrupts disabled.
450 */
451#define CACHE_NWAYS	64
452#define CACHE_NLINES	16
453
454_GLOBAL(flush_dcache_all)
455	li	r4, (2 * CACHE_NWAYS * CACHE_NLINES)
456	mtctr	r4
457	lis     r5, KERNELBASE@h
4581:	lwz	r3, 0(r5)		/* Load one word from every line */
459	addi	r5, r5, L1_CACHE_BYTES
460	bdnz    1b
461	blr
462#endif /* CONFIG_NOT_COHERENT_CACHE */
463
464/*
465 * Flush a particular page from the data cache to RAM.
466 * Note: this is necessary because the instruction cache does *not*
467 * snoop from the data cache.
468 * This is a no-op on the 601 which has a unified cache.
469 *
470 *	void __flush_dcache_icache(void *page)
471 */
472_GLOBAL(__flush_dcache_icache)
473BEGIN_FTR_SECTION
474	blr					/* for 601, do nothing */
475END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
476	rlwinm	r3,r3,0,0,19			/* Get page base address */
477	li	r4,4096/L1_CACHE_BYTES	/* Number of lines in a page */
478	mtctr	r4
479	mr	r6,r3
4800:	dcbst	0,r3				/* Write line to ram */
481	addi	r3,r3,L1_CACHE_BYTES
482	bdnz	0b
483	sync
484	mtctr	r4
4851:	icbi	0,r6
486	addi	r6,r6,L1_CACHE_BYTES
487	bdnz	1b
488	sync
489	isync
490	blr
491
492/*
493 * Flush a particular page from the data cache to RAM, identified
494 * by its physical address.  We turn off the MMU so we can just use
495 * the physical address (this may be a highmem page without a kernel
496 * mapping).
497 *
498 *	void __flush_dcache_icache_phys(unsigned long physaddr)
499 */
500_GLOBAL(__flush_dcache_icache_phys)
501BEGIN_FTR_SECTION
502	blr					/* for 601, do nothing */
503END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
504	mfmsr	r10
505	rlwinm	r0,r10,0,28,26			/* clear DR */
506	mtmsr	r0
507	isync
508	rlwinm	r3,r3,0,0,19			/* Get page base address */
509	li	r4,4096/L1_CACHE_BYTES	/* Number of lines in a page */
510	mtctr	r4
511	mr	r6,r3
5120:	dcbst	0,r3				/* Write line to ram */
513	addi	r3,r3,L1_CACHE_BYTES
514	bdnz	0b
515	sync
516	mtctr	r4
5171:	icbi	0,r6
518	addi	r6,r6,L1_CACHE_BYTES
519	bdnz	1b
520	sync
521	mtmsr	r10				/* restore DR */
522	isync
523	blr
524
525/*
526 * Clear pages using the dcbz instruction, which doesn't cause any
527 * memory traffic (except to write out any cache lines which get
528 * displaced).  This only works on cacheable memory.
529 *
530 * void clear_pages(void *page, int order) ;
531 */
532_GLOBAL(clear_pages)
533	li	r0,4096/L1_CACHE_BYTES
534	slw	r0,r0,r4
535	mtctr	r0
536#ifdef CONFIG_8xx
537	li	r4, 0
5381:	stw	r4, 0(r3)
539	stw	r4, 4(r3)
540	stw	r4, 8(r3)
541	stw	r4, 12(r3)
542#else
5431:	dcbz	0,r3
544#endif
545	addi	r3,r3,L1_CACHE_BYTES
546	bdnz	1b
547	blr
548
549/*
550 * Copy a whole page.  We use the dcbz instruction on the destination
551 * to reduce memory traffic (it eliminates the unnecessary reads of
552 * the destination into cache).  This requires that the destination
553 * is cacheable.
554 */
555#define COPY_16_BYTES		\
556	lwz	r6,4(r4);	\
557	lwz	r7,8(r4);	\
558	lwz	r8,12(r4);	\
559	lwzu	r9,16(r4);	\
560	stw	r6,4(r3);	\
561	stw	r7,8(r3);	\
562	stw	r8,12(r3);	\
563	stwu	r9,16(r3)
564
565_GLOBAL(copy_page)
566	addi	r3,r3,-4
567	addi	r4,r4,-4
568
569#ifdef CONFIG_8xx
570	/* don't use prefetch on 8xx */
571    	li	r0,4096/L1_CACHE_BYTES
572	mtctr	r0
5731:	COPY_16_BYTES
574	bdnz	1b
575	blr
576
577#else	/* not 8xx, we can prefetch */
578	li	r5,4
579
580#if MAX_COPY_PREFETCH > 1
581	li	r0,MAX_COPY_PREFETCH
582	li	r11,4
583	mtctr	r0
58411:	dcbt	r11,r4
585	addi	r11,r11,L1_CACHE_BYTES
586	bdnz	11b
587#else /* MAX_COPY_PREFETCH == 1 */
588	dcbt	r5,r4
589	li	r11,L1_CACHE_BYTES+4
590#endif /* MAX_COPY_PREFETCH */
591	li	r0,4096/L1_CACHE_BYTES - MAX_COPY_PREFETCH
592	crclr	4*cr0+eq
5932:
594	mtctr	r0
5951:
596	dcbt	r11,r4
597	dcbz	r5,r3
598	COPY_16_BYTES
599#if L1_CACHE_BYTES >= 32
600	COPY_16_BYTES
601#if L1_CACHE_BYTES >= 64
602	COPY_16_BYTES
603	COPY_16_BYTES
604#if L1_CACHE_BYTES >= 128
605	COPY_16_BYTES
606	COPY_16_BYTES
607	COPY_16_BYTES
608	COPY_16_BYTES
609#endif
610#endif
611#endif
612	bdnz	1b
613	beqlr
614	crnot	4*cr0+eq,4*cr0+eq
615	li	r0,MAX_COPY_PREFETCH
616	li	r11,4
617	b	2b
618#endif	/* CONFIG_8xx */
619
620/*
621 * void atomic_clear_mask(atomic_t mask, atomic_t *addr)
622 * void atomic_set_mask(atomic_t mask, atomic_t *addr);
623 */
624_GLOBAL(atomic_clear_mask)
62510:	lwarx	r5,0,r4
626	andc	r5,r5,r3
627	PPC405_ERR77(0,r4)
628	stwcx.	r5,0,r4
629	bne-	10b
630	blr
631_GLOBAL(atomic_set_mask)
63210:	lwarx	r5,0,r4
633	or	r5,r5,r3
634	PPC405_ERR77(0,r4)
635	stwcx.	r5,0,r4
636	bne-	10b
637	blr
638
639/*
640 * I/O string operations
641 *
642 * insb(port, buf, len)
643 * outsb(port, buf, len)
644 * insw(port, buf, len)
645 * outsw(port, buf, len)
646 * insl(port, buf, len)
647 * outsl(port, buf, len)
648 * insw_ns(port, buf, len)
649 * outsw_ns(port, buf, len)
650 * insl_ns(port, buf, len)
651 * outsl_ns(port, buf, len)
652 *
653 * The *_ns versions don't do byte-swapping.
654 */
655_GLOBAL(_insb)
656	cmpwi	0,r5,0
657	mtctr	r5
658	subi	r4,r4,1
659	blelr-
66000:	lbz	r5,0(r3)
66101:	eieio
66202:	stbu	r5,1(r4)
663	ISYNC_8xx
664	.section .fixup,"ax"
66503:	blr
666	.text
667	.section __ex_table, "a"
668		.align 2
669		.long 00b, 03b
670		.long 01b, 03b
671		.long 02b, 03b
672	.text
673	bdnz	00b
674	blr
675
676_GLOBAL(_outsb)
677	cmpwi	0,r5,0
678	mtctr	r5
679	subi	r4,r4,1
680	blelr-
68100:	lbzu	r5,1(r4)
68201:	stb	r5,0(r3)
68302:	eieio
684	ISYNC_8xx
685	.section .fixup,"ax"
68603:	blr
687	.text
688	.section __ex_table, "a"
689		.align 2
690		.long 00b, 03b
691		.long 01b, 03b
692		.long 02b, 03b
693	.text
694	bdnz	00b
695	blr
696
697_GLOBAL(_insw_ns)
698	cmpwi	0,r5,0
699	mtctr	r5
700	subi	r4,r4,2
701	blelr-
70200:	lhz	r5,0(r3)
70301:	eieio
70402:	sthu	r5,2(r4)
705	ISYNC_8xx
706	.section .fixup,"ax"
70703:	blr
708	.text
709	.section __ex_table, "a"
710		.align 2
711		.long 00b, 03b
712		.long 01b, 03b
713		.long 02b, 03b
714	.text
715	bdnz	00b
716	blr
717
718_GLOBAL(_outsw_ns)
719	cmpwi	0,r5,0
720	mtctr	r5
721	subi	r4,r4,2
722	blelr-
72300:	lhzu	r5,2(r4)
72401:	sth	r5,0(r3)
72502:	eieio
726	ISYNC_8xx
727	.section .fixup,"ax"
72803:	blr
729	.text
730	.section __ex_table, "a"
731		.align 2
732		.long 00b, 03b
733		.long 01b, 03b
734		.long 02b, 03b
735	.text
736	bdnz	00b
737	blr
738
739_GLOBAL(_insl_ns)
740	cmpwi	0,r5,0
741	mtctr	r5
742	subi	r4,r4,4
743	blelr-
74400:	lwz	r5,0(r3)
74501:	eieio
74602:	stwu	r5,4(r4)
747	ISYNC_8xx
748	.section .fixup,"ax"
74903:	blr
750	.text
751	.section __ex_table, "a"
752		.align 2
753		.long 00b, 03b
754		.long 01b, 03b
755		.long 02b, 03b
756	.text
757	bdnz	00b
758	blr
759
760_GLOBAL(_outsl_ns)
761	cmpwi	0,r5,0
762	mtctr	r5
763	subi	r4,r4,4
764	blelr-
76500:	lwzu	r5,4(r4)
76601:	stw	r5,0(r3)
76702:	eieio
768	ISYNC_8xx
769	.section .fixup,"ax"
77003:	blr
771	.text
772	.section __ex_table, "a"
773		.align 2
774		.long 00b, 03b
775		.long 01b, 03b
776		.long 02b, 03b
777	.text
778	bdnz	00b
779	blr
780
781/*
782 * Extended precision shifts.
783 *
784 * Updated to be valid for shift counts from 0 to 63 inclusive.
785 * -- Gabriel
786 *
787 * R3/R4 has 64 bit value
788 * R5    has shift count
789 * result in R3/R4
790 *
791 *  ashrdi3: arithmetic right shift (sign propagation)
792 *  lshrdi3: logical right shift
793 *  ashldi3: left shift
794 */
795_GLOBAL(__ashrdi3)
796	subfic	r6,r5,32
797	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
798	addi	r7,r5,32	# could be xori, or addi with -32
799	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
800	rlwinm	r8,r7,0,32	# t3 = (count < 32) ? 32 : 0
801	sraw	r7,r3,r7	# t2 = MSW >> (count-32)
802	or	r4,r4,r6	# LSW |= t1
803	slw	r7,r7,r8	# t2 = (count < 32) ? 0 : t2
804	sraw	r3,r3,r5	# MSW = MSW >> count
805	or	r4,r4,r7	# LSW |= t2
806	blr
807
808_GLOBAL(__ashldi3)
809	subfic	r6,r5,32
810	slw	r3,r3,r5	# MSW = count > 31 ? 0 : MSW << count
811	addi	r7,r5,32	# could be xori, or addi with -32
812	srw	r6,r4,r6	# t1 = count > 31 ? 0 : LSW >> (32-count)
813	slw	r7,r4,r7	# t2 = count < 32 ? 0 : LSW << (count-32)
814	or	r3,r3,r6	# MSW |= t1
815	slw	r4,r4,r5	# LSW = LSW << count
816	or	r3,r3,r7	# MSW |= t2
817	blr
818
819_GLOBAL(__lshrdi3)
820	subfic	r6,r5,32
821	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
822	addi	r7,r5,32	# could be xori, or addi with -32
823	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
824	srw	r7,r3,r7	# t2 = count < 32 ? 0 : MSW >> (count-32)
825	or	r4,r4,r6	# LSW |= t1
826	srw	r3,r3,r5	# MSW = MSW >> count
827	or	r4,r4,r7	# LSW |= t2
828	blr
829
830_GLOBAL(abs)
831	srawi	r4,r3,31
832	xor	r3,r3,r4
833	sub	r3,r3,r4
834	blr
835
836_GLOBAL(_get_SP)
837	mr	r3,r1		/* Close enough */
838	blr
839
840/*
841 * Create a kernel thread
842 *   kernel_thread(fn, arg, flags)
843 */
844_GLOBAL(kernel_thread)
845	stwu	r1,-16(r1)
846	stw	r30,8(r1)
847	stw	r31,12(r1)
848	mr	r30,r3		/* function */
849	mr	r31,r4		/* argument */
850	ori	r3,r5,CLONE_VM	/* flags */
851	oris	r3,r3,CLONE_UNTRACED>>16
852	li	r4,0		/* new sp (unused) */
853	li	r0,__NR_clone
854	sc
855	cmpwi	0,r3,0		/* parent or child? */
856	bne	1f		/* return if parent */
857	li	r0,0		/* make top-level stack frame */
858	stwu	r0,-16(r1)
859	mtlr	r30		/* fn addr in lr */
860	mr	r3,r31		/* load arg and call fn */
861	PPC440EP_ERR42
862	blrl
863	li	r0,__NR_exit	/* exit if function returns */
864	li	r3,0
865	sc
8661:	lwz	r30,8(r1)
867	lwz	r31,12(r1)
868	addi	r1,r1,16
869	blr
870
871_GLOBAL(kernel_execve)
872	li	r0,__NR_execve
873	sc
874	bnslr
875	neg	r3,r3
876	blr
877
878/*
879 * This routine is just here to keep GCC happy - sigh...
880 */
881_GLOBAL(__main)
882	blr
883