lib1funcs.S revision 1.1.1.1.4.2
1@ libgcc routines for ARM cpu.
2@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
3
4/* Copyright (C) 1995-2013 Free Software Foundation, Inc.
5
6This file is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11This file is distributed in the hope that it will be useful, but
12WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14General Public License for more details.
15
16Under Section 7 of GPL version 3, you are granted additional
17permissions described in the GCC Runtime Library Exception, version
183.1, as published by the Free Software Foundation.
19
20You should have received a copy of the GNU General Public License and
21a copy of the GCC Runtime Library Exception along with this program;
22see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23<http://www.gnu.org/licenses/>.  */
24
25/* An executable stack is *not* required for these functions.  */
26#if defined(__ELF__) && defined(__linux__)
27.section .note.GNU-stack,"",%progbits
28.previous
29#endif  /* __ELF__ and __linux__ */
30
31#ifdef __ARM_EABI__
32/* Some attributes that are common to all routines in this file.  */
33	/* Tag_ABI_align_needed: This code does not require 8-byte
34	   alignment from the caller.  */
35	/* .eabi_attribute 24, 0  -- default setting.  */
36	/* Tag_ABI_align_preserved: This code preserves 8-byte
37	   alignment in any callee.  */
38	.eabi_attribute 25, 1
39#endif /* __ARM_EABI__ */
40/* ------------------------------------------------------------------------ */
41
42/* We need to know what prefix to add to function names.  */
43
44#ifndef __USER_LABEL_PREFIX__
45#error  __USER_LABEL_PREFIX__ not defined
46#endif
47
48/* ANSI concatenation macros.  */
49
50#define CONCAT1(a, b) CONCAT2(a, b)
51#define CONCAT2(a, b) a ## b
52
53/* Use the right prefix for global labels.  */
54
55#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
56
57#ifdef __ELF__
58#ifdef __thumb__
59#define __PLT__  /* Not supported in Thumb assembler (for now).  */
60#elif defined __vxworks && !defined __PIC__
61#define __PLT__ /* Not supported by the kernel loader.  */
62#else
63#define __PLT__ (PLT)
64#endif
65#define TYPE(x) .type SYM(x),function
66#define SIZE(x) .size SYM(x), . - SYM(x)
67#define LSYM(x) .x
68#else
69#define __PLT__
70#define TYPE(x)
71#define SIZE(x)
72#define LSYM(x) x
73#endif
74
75/* Function end macros.  Variants for interworking.  */
76
77#if defined(__ARM_ARCH_2__)
78# define __ARM_ARCH__ 2
79#endif
80
81#if defined(__ARM_ARCH_3__)
82# define __ARM_ARCH__ 3
83#endif
84
85#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
86	|| defined(__ARM_ARCH_4T__)
87/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
88   long multiply instructions.  That includes v3M.  */
89# define __ARM_ARCH__ 4
90#endif
91
92#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
93	|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
94	|| defined(__ARM_ARCH_5TEJ__)
95# define __ARM_ARCH__ 5
96#endif
97
98#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
99	|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
100	|| defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
101	|| defined(__ARM_ARCH_6M__)
102# define __ARM_ARCH__ 6
103#endif
104
105#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
106	|| defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
107	|| defined(__ARM_ARCH_7EM__)
108# define __ARM_ARCH__ 7
109#endif
110
111#if defined(__ARM_ARCH_8A__)
112# define __ARM_ARCH__ 8
113#endif
114
115#ifndef __ARM_ARCH__
116#error Unable to determine architecture.
117#endif
118
119/* There are times when we might prefer Thumb1 code even if ARM code is
120   permitted, for example, the code might be smaller, or there might be
121   interworking problems with switching to ARM state if interworking is
122   disabled.  */
123#if (defined(__thumb__)			\
124     && !defined(__thumb2__)		\
125     && (!defined(__THUMB_INTERWORK__)	\
126	 || defined (__OPTIMIZE_SIZE__)	\
127	 || defined(__ARM_ARCH_6M__)))
128# define __prefer_thumb__
129#endif
130
131/* How to return from a function call depends on the architecture variant.  */
132
133#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
134
135# define RET		bx	lr
136# define RETc(x)	bx##x	lr
137
138/* Special precautions for interworking on armv4t.  */
139# if (__ARM_ARCH__ == 4)
140
141/* Always use bx, not ldr pc.  */
142#  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
143#    define __INTERWORKING__
144#   endif /* __THUMB__ || __THUMB_INTERWORK__ */
145
146/* Include thumb stub before arm mode code.  */
147#  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
148#   define __INTERWORKING_STUBS__
149#  endif /* __thumb__ && !__THUMB_INTERWORK__ */
150
151#endif /* __ARM_ARCH == 4 */
152
153#else
154
155# define RET		mov	pc, lr
156# define RETc(x)	mov##x	pc, lr
157
158#endif
159
160.macro	cfi_pop		advance, reg, cfa_offset
161#ifdef __ELF__
162	.pushsection	.debug_frame
163	.byte	0x4		/* DW_CFA_advance_loc4 */
164	.4byte	\advance
165	.byte	(0xc0 | \reg)	/* DW_CFA_restore */
166	.byte	0xe		/* DW_CFA_def_cfa_offset */
167	.uleb128 \cfa_offset
168	.popsection
169#endif
170.endm
171.macro	cfi_push	advance, reg, offset, cfa_offset
172#ifdef __ELF__
173	.pushsection	.debug_frame
174	.byte	0x4		/* DW_CFA_advance_loc4 */
175	.4byte	\advance
176	.byte	(0x80 | \reg)	/* DW_CFA_offset */
177	.uleb128 (\offset / -4)
178	.byte	0xe		/* DW_CFA_def_cfa_offset */
179	.uleb128 \cfa_offset
180	.popsection
181#endif
182.endm
183.macro cfi_start	start_label, end_label
184#ifdef __ELF__
185	.pushsection	.debug_frame
186LSYM(Lstart_frame):
187	.4byte	LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
188LSYM(Lstart_cie):
189        .4byte	0xffffffff	@ CIE Identifier Tag
190        .byte	0x1	@ CIE Version
191        .ascii	"\0"	@ CIE Augmentation
192        .uleb128 0x1	@ CIE Code Alignment Factor
193        .sleb128 -4	@ CIE Data Alignment Factor
194        .byte	0xe	@ CIE RA Column
195        .byte	0xc	@ DW_CFA_def_cfa
196        .uleb128 0xd
197        .uleb128 0x0
198
199	.align 2
200LSYM(Lend_cie):
201	.4byte	LSYM(Lend_fde)-LSYM(Lstart_fde)	@ FDE Length
202LSYM(Lstart_fde):
203	.4byte	LSYM(Lstart_frame)	@ FDE CIE offset
204	.4byte	\start_label	@ FDE initial location
205	.4byte	\end_label-\start_label	@ FDE address range
206	.popsection
207#endif
208.endm
209.macro cfi_end	end_label
210#ifdef __ELF__
211	.pushsection	.debug_frame
212	.align	2
213LSYM(Lend_fde):
214	.popsection
215\end_label:
216#endif
217.endm
218
219/* Don't pass dirn, it's there just to get token pasting right.  */
220
221.macro	RETLDM	regs=, cond=, unwind=, dirn=ia
222#if defined (__INTERWORKING__)
223	.ifc "\regs",""
224	ldr\cond	lr, [sp], #8
225	.else
226# if defined(__thumb2__)
227	pop\cond	{\regs, lr}
228# else
229	ldm\cond\dirn	sp!, {\regs, lr}
230# endif
231	.endif
232	.ifnc "\unwind", ""
233	/* Mark LR as restored.  */
23497:	cfi_pop 97b - \unwind, 0xe, 0x0
235	.endif
236	bx\cond	lr
237#else
238	/* Caller is responsible for providing IT instruction.  */
239	.ifc "\regs",""
240	ldr\cond	pc, [sp], #8
241	.else
242# if defined(__thumb2__)
243	pop\cond	{\regs, pc}
244# else
245	ldm\cond\dirn	sp!, {\regs, pc}
246# endif
247	.endif
248#endif
249.endm
250
251/* The Unified assembly syntax allows the same code to be assembled for both
252   ARM and Thumb-2.  However this is only supported by recent gas, so define
253   a set of macros to allow ARM code on older assemblers.  */
254#if defined(__thumb2__)
255.macro do_it cond, suffix=""
256	it\suffix	\cond
257.endm
258.macro shift1 op, arg0, arg1, arg2
259	\op	\arg0, \arg1, \arg2
260.endm
261#define do_push	push
262#define do_pop	pop
263#define COND(op1, op2, cond) op1 ## op2 ## cond
264/* Perform an arithmetic operation with a variable shift operand.  This
265   requires two instructions and a scratch register on Thumb-2.  */
266.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
267	\shiftop \tmp, \src2, \shiftreg
268	\name \dest, \src1, \tmp
269.endm
270#else
271.macro do_it cond, suffix=""
272.endm
273.macro shift1 op, arg0, arg1, arg2
274	mov	\arg0, \arg1, \op \arg2
275.endm
276#define do_push	stmfd sp!,
277#define do_pop	ldmfd sp!,
278#define COND(op1, op2, cond) op1 ## cond ## op2
279.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
280	\name \dest, \src1, \src2, \shiftop \shiftreg
281.endm
282#endif
283
284#ifdef __ARM_EABI__
285.macro ARM_LDIV0 name signed
286	cmp	r0, #0
287	.ifc	\signed, unsigned
288	movne	r0, #0xffffffff
289	.else
290	movgt	r0, #0x7fffffff
291	movlt	r0, #0x80000000
292	.endif
293	b	SYM (__aeabi_idiv0) __PLT__
294.endm
295#else
296.macro ARM_LDIV0 name signed
297	str	lr, [sp, #-8]!
29898:	cfi_push 98b - __\name, 0xe, -0x8, 0x8
299	bl	SYM (__div0) __PLT__
300	mov	r0, #0			@ About as wrong as it could be.
301	RETLDM	unwind=98b
302.endm
303#endif
304
305
306#ifdef __ARM_EABI__
307.macro THUMB_LDIV0 name signed
308#if defined(__ARM_ARCH_6M__)
309	.ifc \signed, unsigned
310	cmp	r0, #0
311	beq	1f
312	mov	r0, #0
313	mvn	r0, r0		@ 0xffffffff
3141:
315	.else
316	cmp	r0, #0
317	beq	2f
318	blt	3f
319	mov	r0, #0
320	mvn	r0, r0
321	lsr	r0, r0, #1	@ 0x7fffffff
322	b	2f
3233:	mov	r0, #0x80
324	lsl	r0, r0, #24	@ 0x80000000
3252:
326	.endif
327	push	{r0, r1, r2}
328	ldr	r0, 4f
329	adr	r1, 4f
330	add	r0, r1
331	str	r0, [sp, #8]
332	@ We know we are not on armv4t, so pop pc is safe.
333	pop	{r0, r1, pc}
334	.align	2
3354:
336	.word	__aeabi_idiv0 - 4b
337#elif defined(__thumb2__)
338	.syntax unified
339	.ifc \signed, unsigned
340	cbz	r0, 1f
341	mov	r0, #0xffffffff
3421:
343	.else
344	cmp	r0, #0
345	do_it	gt
346	movgt	r0, #0x7fffffff
347	do_it	lt
348	movlt	r0, #0x80000000
349	.endif
350	b.w	SYM(__aeabi_idiv0) __PLT__
351#else
352	.align	2
353	bx	pc
354	nop
355	.arm
356	cmp	r0, #0
357	.ifc	\signed, unsigned
358	movne	r0, #0xffffffff
359	.else
360	movgt	r0, #0x7fffffff
361	movlt	r0, #0x80000000
362	.endif
363	b	SYM(__aeabi_idiv0) __PLT__
364	.thumb
365#endif
366.endm
367#else
368.macro THUMB_LDIV0 name signed
369	push	{ r1, lr }
37098:	cfi_push 98b - __\name, 0xe, -0x4, 0x8
371	bl	SYM (__div0)
372	mov	r0, #0			@ About as wrong as it could be.
373#if defined (__INTERWORKING__)
374	pop	{ r1, r2 }
375	bx	r2
376#else
377	pop	{ r1, pc }
378#endif
379.endm
380#endif
381
382.macro FUNC_END name
383	SIZE (__\name)
384.endm
385
386.macro DIV_FUNC_END name signed
387	cfi_start	__\name, LSYM(Lend_div0)
388LSYM(Ldiv0):
389#ifdef __thumb__
390	THUMB_LDIV0 \name \signed
391#else
392	ARM_LDIV0 \name \signed
393#endif
394	cfi_end	LSYM(Lend_div0)
395	FUNC_END \name
396.endm
397
398.macro THUMB_FUNC_START name
399	.globl	SYM (\name)
400	TYPE	(\name)
401	.thumb_func
402SYM (\name):
403.endm
404
405/* Function start macros.  Variants for ARM and Thumb.  */
406
407#ifdef __thumb__
408#define THUMB_FUNC .thumb_func
409#define THUMB_CODE .force_thumb
410# if defined(__thumb2__)
411#define THUMB_SYNTAX .syntax divided
412# else
413#define THUMB_SYNTAX
414# endif
415#else
416#define THUMB_FUNC
417#define THUMB_CODE
418#define THUMB_SYNTAX
419#endif
420
421.macro FUNC_START name
422	.text
423	.globl SYM (__\name)
424	TYPE (__\name)
425	.align 0
426	THUMB_CODE
427	THUMB_FUNC
428	THUMB_SYNTAX
429SYM (__\name):
430.endm
431
432/* Special function that will always be coded in ARM assembly, even if
433   in Thumb-only compilation.  */
434
435#if defined(__thumb2__)
436
437/* For Thumb-2 we build everything in thumb mode.  */
438.macro ARM_FUNC_START name
439       FUNC_START \name
440       .syntax unified
441.endm
442#define EQUIV .thumb_set
443.macro  ARM_CALL name
444	bl	__\name
445.endm
446
447#elif defined(__INTERWORKING_STUBS__)
448
449.macro	ARM_FUNC_START name
450	FUNC_START \name
451	bx	pc
452	nop
453	.arm
454/* A hook to tell gdb that we've switched to ARM mode.  Also used to call
455   directly from other local arm routines.  */
456_L__\name:
457.endm
458#define EQUIV .thumb_set
459/* Branch directly to a function declared with ARM_FUNC_START.
460   Must be called in arm mode.  */
461.macro  ARM_CALL name
462	bl	_L__\name
463.endm
464
465#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
466
467#ifdef __ARM_ARCH_6M__
468#define EQUIV .thumb_set
469#else
470.macro	ARM_FUNC_START name
471	.text
472	.globl SYM (__\name)
473	TYPE (__\name)
474	.align 0
475	.arm
476SYM (__\name):
477.endm
478#define EQUIV .set
479.macro  ARM_CALL name
480	bl	__\name
481.endm
482#endif
483
484#endif
485
486.macro	FUNC_ALIAS new old
487	.globl	SYM (__\new)
488#if defined (__thumb__)
489	.thumb_set	SYM (__\new), SYM (__\old)
490#else
491	.set	SYM (__\new), SYM (__\old)
492#endif
493.endm
494
495#ifndef __ARM_ARCH_6M__
496.macro	ARM_FUNC_ALIAS new old
497	.globl	SYM (__\new)
498	EQUIV	SYM (__\new), SYM (__\old)
499#if defined(__INTERWORKING_STUBS__)
500	.set	SYM (_L__\new), SYM (_L__\old)
501#endif
502.endm
503#endif
504
505#ifdef __ARMEB__
506#define xxh r0
507#define xxl r1
508#define yyh r2
509#define yyl r3
510#else
511#define xxh r1
512#define xxl r0
513#define yyh r3
514#define yyl r2
515#endif
516
517#ifdef __ARM_EABI__
518.macro	WEAK name
519	.weak SYM (__\name)
520.endm
521#endif
522
523#ifdef __thumb__
524/* Register aliases.  */
525
526work		.req	r4	@ XXXX is this safe ?
527dividend	.req	r0
528divisor		.req	r1
529overdone	.req	r2
530result		.req	r2
531curbit		.req	r3
532#endif
533#if 0
534ip		.req	r12
535sp		.req	r13
536lr		.req	r14
537pc		.req	r15
538#endif
539
540/* ------------------------------------------------------------------------ */
541/*		Bodies of the division and modulo routines.		    */
542/* ------------------------------------------------------------------------ */
543.macro ARM_DIV_BODY dividend, divisor, result, curbit
544
545#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
546
547#if defined (__thumb2__)
548	clz	\curbit, \dividend
549	clz	\result, \divisor
550	sub	\curbit, \result, \curbit
551	rsb	\curbit, \curbit, #31
552	adr	\result, 1f
553	add	\curbit, \result, \curbit, lsl #4
554	mov	\result, #0
555	mov	pc, \curbit
556.p2align 3
5571:
558	.set	shift, 32
559	.rept	32
560	.set	shift, shift - 1
561	cmp.w	\dividend, \divisor, lsl #shift
562	nop.n
563	adc.w	\result, \result, \result
564	it	cs
565	subcs.w	\dividend, \dividend, \divisor, lsl #shift
566	.endr
567#else
568	clz	\curbit, \dividend
569	clz	\result, \divisor
570	sub	\curbit, \result, \curbit
571	rsbs	\curbit, \curbit, #31
572	addne	\curbit, \curbit, \curbit, lsl #1
573	mov	\result, #0
574	addne	pc, pc, \curbit, lsl #2
575	nop
576	.set	shift, 32
577	.rept	32
578	.set	shift, shift - 1
579	cmp	\dividend, \divisor, lsl #shift
580	adc	\result, \result, \result
581	subcs	\dividend, \dividend, \divisor, lsl #shift
582	.endr
583#endif
584
585#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
586#if __ARM_ARCH__ >= 5
587
588	clz	\curbit, \divisor
589	clz	\result, \dividend
590	sub	\result, \curbit, \result
591	mov	\curbit, #1
592	mov	\divisor, \divisor, lsl \result
593	mov	\curbit, \curbit, lsl \result
594	mov	\result, #0
595
596#else /* __ARM_ARCH__ < 5 */
597
598	@ Initially shift the divisor left 3 bits if possible,
599	@ set curbit accordingly.  This allows for curbit to be located
600	@ at the left end of each 4-bit nibbles in the division loop
601	@ to save one loop in most cases.
602	tst	\divisor, #0xe0000000
603	moveq	\divisor, \divisor, lsl #3
604	moveq	\curbit, #8
605	movne	\curbit, #1
606
607	@ Unless the divisor is very big, shift it up in multiples of
608	@ four bits, since this is the amount of unwinding in the main
609	@ division loop.  Continue shifting until the divisor is
610	@ larger than the dividend.
6111:	cmp	\divisor, #0x10000000
612	cmplo	\divisor, \dividend
613	movlo	\divisor, \divisor, lsl #4
614	movlo	\curbit, \curbit, lsl #4
615	blo	1b
616
617	@ For very big divisors, we must shift it a bit at a time, or
618	@ we will be in danger of overflowing.
6191:	cmp	\divisor, #0x80000000
620	cmplo	\divisor, \dividend
621	movlo	\divisor, \divisor, lsl #1
622	movlo	\curbit, \curbit, lsl #1
623	blo	1b
624
625	mov	\result, #0
626
627#endif /* __ARM_ARCH__ < 5 */
628
629	@ Division loop
6301:	cmp	\dividend, \divisor
631	do_it	hs, t
632	subhs	\dividend, \dividend, \divisor
633	orrhs	\result,   \result,   \curbit
634	cmp	\dividend, \divisor,  lsr #1
635	do_it	hs, t
636	subhs	\dividend, \dividend, \divisor, lsr #1
637	orrhs	\result,   \result,   \curbit,  lsr #1
638	cmp	\dividend, \divisor,  lsr #2
639	do_it	hs, t
640	subhs	\dividend, \dividend, \divisor, lsr #2
641	orrhs	\result,   \result,   \curbit,  lsr #2
642	cmp	\dividend, \divisor,  lsr #3
643	do_it	hs, t
644	subhs	\dividend, \dividend, \divisor, lsr #3
645	orrhs	\result,   \result,   \curbit,  lsr #3
646	cmp	\dividend, #0			@ Early termination?
647	do_it	ne, t
648	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
649	movne	\divisor,  \divisor, lsr #4
650	bne	1b
651
652#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
653
654.endm
655/* ------------------------------------------------------------------------ */
656.macro ARM_DIV2_ORDER divisor, order
657
658#if __ARM_ARCH__ >= 5
659
660	clz	\order, \divisor
661	rsb	\order, \order, #31
662
663#else
664
665	cmp	\divisor, #(1 << 16)
666	movhs	\divisor, \divisor, lsr #16
667	movhs	\order, #16
668	movlo	\order, #0
669
670	cmp	\divisor, #(1 << 8)
671	movhs	\divisor, \divisor, lsr #8
672	addhs	\order, \order, #8
673
674	cmp	\divisor, #(1 << 4)
675	movhs	\divisor, \divisor, lsr #4
676	addhs	\order, \order, #4
677
678	cmp	\divisor, #(1 << 2)
679	addhi	\order, \order, #3
680	addls	\order, \order, \divisor, lsr #1
681
682#endif
683
684.endm
685/* ------------------------------------------------------------------------ */
686.macro ARM_MOD_BODY dividend, divisor, order, spare
687
688#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
689
690	clz	\order, \divisor
691	clz	\spare, \dividend
692	sub	\order, \order, \spare
693	rsbs	\order, \order, #31
694	addne	pc, pc, \order, lsl #3
695	nop
696	.set	shift, 32
697	.rept	32
698	.set	shift, shift - 1
699	cmp	\dividend, \divisor, lsl #shift
700	subcs	\dividend, \dividend, \divisor, lsl #shift
701	.endr
702
703#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
704#if __ARM_ARCH__ >= 5
705
706	clz	\order, \divisor
707	clz	\spare, \dividend
708	sub	\order, \order, \spare
709	mov	\divisor, \divisor, lsl \order
710
711#else /* __ARM_ARCH__ < 5 */
712
713	mov	\order, #0
714
715	@ Unless the divisor is very big, shift it up in multiples of
716	@ four bits, since this is the amount of unwinding in the main
717	@ division loop.  Continue shifting until the divisor is
718	@ larger than the dividend.
7191:	cmp	\divisor, #0x10000000
720	cmplo	\divisor, \dividend
721	movlo	\divisor, \divisor, lsl #4
722	addlo	\order, \order, #4
723	blo	1b
724
725	@ For very big divisors, we must shift it a bit at a time, or
726	@ we will be in danger of overflowing.
7271:	cmp	\divisor, #0x80000000
728	cmplo	\divisor, \dividend
729	movlo	\divisor, \divisor, lsl #1
730	addlo	\order, \order, #1
731	blo	1b
732
733#endif /* __ARM_ARCH__ < 5 */
734
735	@ Perform all needed substractions to keep only the reminder.
736	@ Do comparisons in batch of 4 first.
737	subs	\order, \order, #3		@ yes, 3 is intended here
738	blt	2f
739
7401:	cmp	\dividend, \divisor
741	subhs	\dividend, \dividend, \divisor
742	cmp	\dividend, \divisor,  lsr #1
743	subhs	\dividend, \dividend, \divisor, lsr #1
744	cmp	\dividend, \divisor,  lsr #2
745	subhs	\dividend, \dividend, \divisor, lsr #2
746	cmp	\dividend, \divisor,  lsr #3
747	subhs	\dividend, \dividend, \divisor, lsr #3
748	cmp	\dividend, #1
749	mov	\divisor, \divisor, lsr #4
750	subges	\order, \order, #4
751	bge	1b
752
753	tst	\order, #3
754	teqne	\dividend, #0
755	beq	5f
756
757	@ Either 1, 2 or 3 comparison/substractions are left.
7582:	cmn	\order, #2
759	blt	4f
760	beq	3f
761	cmp	\dividend, \divisor
762	subhs	\dividend, \dividend, \divisor
763	mov	\divisor,  \divisor,  lsr #1
7643:	cmp	\dividend, \divisor
765	subhs	\dividend, \dividend, \divisor
766	mov	\divisor,  \divisor,  lsr #1
7674:	cmp	\dividend, \divisor
768	subhs	\dividend, \dividend, \divisor
7695:
770
771#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
772
773.endm
774/* ------------------------------------------------------------------------ */
775.macro THUMB_DIV_MOD_BODY modulo
776	@ Load the constant 0x10000000 into our work register.
777	mov	work, #1
778	lsl	work, #28
779LSYM(Loop1):
780	@ Unless the divisor is very big, shift it up in multiples of
781	@ four bits, since this is the amount of unwinding in the main
782	@ division loop.  Continue shifting until the divisor is
783	@ larger than the dividend.
784	cmp	divisor, work
785	bhs	LSYM(Lbignum)
786	cmp	divisor, dividend
787	bhs	LSYM(Lbignum)
788	lsl	divisor, #4
789	lsl	curbit,  #4
790	b	LSYM(Loop1)
791LSYM(Lbignum):
792	@ Set work to 0x80000000
793	lsl	work, #3
794LSYM(Loop2):
795	@ For very big divisors, we must shift it a bit at a time, or
796	@ we will be in danger of overflowing.
797	cmp	divisor, work
798	bhs	LSYM(Loop3)
799	cmp	divisor, dividend
800	bhs	LSYM(Loop3)
801	lsl	divisor, #1
802	lsl	curbit,  #1
803	b	LSYM(Loop2)
804LSYM(Loop3):
805	@ Test for possible subtractions ...
806  .if \modulo
807	@ ... On the final pass, this may subtract too much from the dividend,
808	@ so keep track of which subtractions are done, we can fix them up
809	@ afterwards.
810	mov	overdone, #0
811	cmp	dividend, divisor
812	blo	LSYM(Lover1)
813	sub	dividend, dividend, divisor
814LSYM(Lover1):
815	lsr	work, divisor, #1
816	cmp	dividend, work
817	blo	LSYM(Lover2)
818	sub	dividend, dividend, work
819	mov	ip, curbit
820	mov	work, #1
821	ror	curbit, work
822	orr	overdone, curbit
823	mov	curbit, ip
824LSYM(Lover2):
825	lsr	work, divisor, #2
826	cmp	dividend, work
827	blo	LSYM(Lover3)
828	sub	dividend, dividend, work
829	mov	ip, curbit
830	mov	work, #2
831	ror	curbit, work
832	orr	overdone, curbit
833	mov	curbit, ip
834LSYM(Lover3):
835	lsr	work, divisor, #3
836	cmp	dividend, work
837	blo	LSYM(Lover4)
838	sub	dividend, dividend, work
839	mov	ip, curbit
840	mov	work, #3
841	ror	curbit, work
842	orr	overdone, curbit
843	mov	curbit, ip
844LSYM(Lover4):
845	mov	ip, curbit
846  .else
847	@ ... and note which bits are done in the result.  On the final pass,
848	@ this may subtract too much from the dividend, but the result will be ok,
849	@ since the "bit" will have been shifted out at the bottom.
850	cmp	dividend, divisor
851	blo	LSYM(Lover1)
852	sub	dividend, dividend, divisor
853	orr	result, result, curbit
854LSYM(Lover1):
855	lsr	work, divisor, #1
856	cmp	dividend, work
857	blo	LSYM(Lover2)
858	sub	dividend, dividend, work
859	lsr	work, curbit, #1
860	orr	result, work
861LSYM(Lover2):
862	lsr	work, divisor, #2
863	cmp	dividend, work
864	blo	LSYM(Lover3)
865	sub	dividend, dividend, work
866	lsr	work, curbit, #2
867	orr	result, work
868LSYM(Lover3):
869	lsr	work, divisor, #3
870	cmp	dividend, work
871	blo	LSYM(Lover4)
872	sub	dividend, dividend, work
873	lsr	work, curbit, #3
874	orr	result, work
875LSYM(Lover4):
876  .endif
877
878	cmp	dividend, #0			@ Early termination?
879	beq	LSYM(Lover5)
880	lsr	curbit,  #4			@ No, any more bits to do?
881	beq	LSYM(Lover5)
882	lsr	divisor, #4
883	b	LSYM(Loop3)
884LSYM(Lover5):
885  .if \modulo
886	@ Any subtractions that we should not have done will be recorded in
887	@ the top three bits of "overdone".  Exactly which were not needed
888	@ are governed by the position of the bit, stored in ip.
889	mov	work, #0xe
890	lsl	work, #28
891	and	overdone, work
892	beq	LSYM(Lgot_result)
893
894	@ If we terminated early, because dividend became zero, then the
895	@ bit in ip will not be in the bottom nibble, and we should not
896	@ perform the additions below.  We must test for this though
897	@ (rather relying upon the TSTs to prevent the additions) since
898	@ the bit in ip could be in the top two bits which might then match
899	@ with one of the smaller RORs.
900	mov	curbit, ip
901	mov	work, #0x7
902	tst	curbit, work
903	beq	LSYM(Lgot_result)
904
905	mov	curbit, ip
906	mov	work, #3
907	ror	curbit, work
908	tst	overdone, curbit
909	beq	LSYM(Lover6)
910	lsr	work, divisor, #3
911	add	dividend, work
912LSYM(Lover6):
913	mov	curbit, ip
914	mov	work, #2
915	ror	curbit, work
916	tst	overdone, curbit
917	beq	LSYM(Lover7)
918	lsr	work, divisor, #2
919	add	dividend, work
920LSYM(Lover7):
921	mov	curbit, ip
922	mov	work, #1
923	ror	curbit, work
924	tst	overdone, curbit
925	beq	LSYM(Lgot_result)
926	lsr	work, divisor, #1
927	add	dividend, work
928  .endif
929LSYM(Lgot_result):
930.endm
931/* ------------------------------------------------------------------------ */
932/*		Start of the Real Functions				    */
933/* ------------------------------------------------------------------------ */
934#ifdef L_udivsi3
935
936#if defined(__prefer_thumb__)
937
938	FUNC_START udivsi3
939	FUNC_ALIAS aeabi_uidiv udivsi3
940
941	cmp	divisor, #0
942	beq	LSYM(Ldiv0)
943LSYM(udivsi3_skip_div0_test):
944	mov	curbit, #1
945	mov	result, #0
946
947	push	{ work }
948	cmp	dividend, divisor
949	blo	LSYM(Lgot_result)
950
951	THUMB_DIV_MOD_BODY 0
952
953	mov	r0, result
954	pop	{ work }
955	RET
956
957#elif defined(__ARM_ARCH_EXT_IDIV__)
958
959	ARM_FUNC_START udivsi3
960	ARM_FUNC_ALIAS aeabi_uidiv udivsi3
961
962	cmp	r1, #0
963	beq	LSYM(Ldiv0)
964
965	udiv	r0, r0, r1
966	RET
967
968#else /* ARM version/Thumb-2.  */
969
970	ARM_FUNC_START udivsi3
971	ARM_FUNC_ALIAS aeabi_uidiv udivsi3
972
973	/* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
974	   check for division-by-zero a second time.  */
975LSYM(udivsi3_skip_div0_test):
976	subs	r2, r1, #1
977	do_it	eq
978	RETc(eq)
979	bcc	LSYM(Ldiv0)
980	cmp	r0, r1
981	bls	11f
982	tst	r1, r2
983	beq	12f
984
985	ARM_DIV_BODY r0, r1, r2, r3
986
987	mov	r0, r2
988	RET
989
99011:	do_it	eq, e
991	moveq	r0, #1
992	movne	r0, #0
993	RET
994
99512:	ARM_DIV2_ORDER r1, r2
996
997	mov	r0, r0, lsr r2
998	RET
999
1000#endif /* ARM version */
1001
1002	DIV_FUNC_END udivsi3 unsigned
1003
1004#if defined(__prefer_thumb__)
1005FUNC_START aeabi_uidivmod
1006	cmp	r1, #0
1007	beq	LSYM(Ldiv0)
1008	push	{r0, r1, lr}
1009	bl	LSYM(udivsi3_skip_div0_test)
1010	POP	{r1, r2, r3}
1011	mul	r2, r0
1012	sub	r1, r1, r2
1013	bx	r3
1014#elif defined(__ARM_ARCH_EXT_IDIV__)
1015ARM_FUNC_START aeabi_uidivmod
1016	cmp	r1, #0
1017	beq	LSYM(Ldiv0)
1018	mov     r2, r0
1019	udiv	r0, r0, r1
1020	mls     r1, r0, r1, r2
1021	RET
1022#else
1023ARM_FUNC_START aeabi_uidivmod
1024	cmp	r1, #0
1025	beq	LSYM(Ldiv0)
1026	stmfd	sp!, { r0, r1, lr }
1027	bl	LSYM(udivsi3_skip_div0_test)
1028	ldmfd	sp!, { r1, r2, lr }
1029	mul	r3, r2, r0
1030	sub	r1, r1, r3
1031	RET
1032#endif
1033	FUNC_END aeabi_uidivmod
1034
1035#endif /* L_udivsi3 */
1036/* ------------------------------------------------------------------------ */
1037#ifdef L_umodsi3
1038
1039#ifdef __ARM_ARCH_EXT_IDIV__
1040
1041	ARM_FUNC_START umodsi3
1042
1043	cmp	r1, #0
1044	beq	LSYM(Ldiv0)
1045	udiv	r2, r0, r1
1046	mls     r0, r1, r2, r0
1047	RET
1048
1049#elif defined(__thumb__)
1050
1051	FUNC_START umodsi3
1052
1053	cmp	divisor, #0
1054	beq	LSYM(Ldiv0)
1055	mov	curbit, #1
1056	cmp	dividend, divisor
1057	bhs	LSYM(Lover10)
1058	RET
1059
1060LSYM(Lover10):
1061	push	{ work }
1062
1063	THUMB_DIV_MOD_BODY 1
1064
1065	pop	{ work }
1066	RET
1067
1068#else  /* ARM version.  */
1069
1070	FUNC_START umodsi3
1071
1072	subs	r2, r1, #1			@ compare divisor with 1
1073	bcc	LSYM(Ldiv0)
1074	cmpne	r0, r1				@ compare dividend with divisor
1075	moveq   r0, #0
1076	tsthi	r1, r2				@ see if divisor is power of 2
1077	andeq	r0, r0, r2
1078	RETc(ls)
1079
1080	ARM_MOD_BODY r0, r1, r2, r3
1081
1082	RET
1083
1084#endif /* ARM version.  */
1085
1086	DIV_FUNC_END umodsi3 unsigned
1087
1088#endif /* L_umodsi3 */
1089/* ------------------------------------------------------------------------ */
1090#ifdef L_divsi3
1091
1092#if defined(__prefer_thumb__)
1093
1094	FUNC_START divsi3
1095	FUNC_ALIAS aeabi_idiv divsi3
1096
1097	cmp	divisor, #0
1098	beq	LSYM(Ldiv0)
1099LSYM(divsi3_skip_div0_test):
1100	push	{ work }
1101	mov	work, dividend
1102	eor	work, divisor		@ Save the sign of the result.
1103	mov	ip, work
1104	mov	curbit, #1
1105	mov	result, #0
1106	cmp	divisor, #0
1107	bpl	LSYM(Lover10)
1108	neg	divisor, divisor	@ Loops below use unsigned.
1109LSYM(Lover10):
1110	cmp	dividend, #0
1111	bpl	LSYM(Lover11)
1112	neg	dividend, dividend
1113LSYM(Lover11):
1114	cmp	dividend, divisor
1115	blo	LSYM(Lgot_result)
1116
1117	THUMB_DIV_MOD_BODY 0
1118
1119	mov	r0, result
1120	mov	work, ip
1121	cmp	work, #0
1122	bpl	LSYM(Lover12)
1123	neg	r0, r0
1124LSYM(Lover12):
1125	pop	{ work }
1126	RET
1127
1128#elif defined(__ARM_ARCH_EXT_IDIV__)
1129
1130	ARM_FUNC_START divsi3
1131	ARM_FUNC_ALIAS aeabi_idiv divsi3
1132
1133	cmp 	r1, #0
1134	beq	LSYM(Ldiv0)
1135	sdiv	r0, r0, r1
1136	RET
1137
1138#else /* ARM/Thumb-2 version.  */
1139
1140	ARM_FUNC_START divsi3
1141	ARM_FUNC_ALIAS aeabi_idiv divsi3
1142
1143	cmp	r1, #0
1144	beq	LSYM(Ldiv0)
1145LSYM(divsi3_skip_div0_test):
1146	eor	ip, r0, r1			@ save the sign of the result.
1147	do_it	mi
1148	rsbmi	r1, r1, #0			@ loops below use unsigned.
1149	subs	r2, r1, #1			@ division by 1 or -1 ?
1150	beq	10f
1151	movs	r3, r0
1152	do_it	mi
1153	rsbmi	r3, r0, #0			@ positive dividend value
1154	cmp	r3, r1
1155	bls	11f
1156	tst	r1, r2				@ divisor is power of 2 ?
1157	beq	12f
1158
1159	ARM_DIV_BODY r3, r1, r0, r2
1160
1161	cmp	ip, #0
1162	do_it	mi
1163	rsbmi	r0, r0, #0
1164	RET
1165
116610:	teq	ip, r0				@ same sign ?
1167	do_it	mi
1168	rsbmi	r0, r0, #0
1169	RET
1170
117111:	do_it	lo
1172	movlo	r0, #0
1173	do_it	eq,t
1174	moveq	r0, ip, asr #31
1175	orreq	r0, r0, #1
1176	RET
1177
117812:	ARM_DIV2_ORDER r1, r2
1179
1180	cmp	ip, #0
1181	mov	r0, r3, lsr r2
1182	do_it	mi
1183	rsbmi	r0, r0, #0
1184	RET
1185
1186#endif /* ARM version */
1187
1188	DIV_FUNC_END divsi3 signed
1189
1190#if defined(__prefer_thumb__)
1191FUNC_START aeabi_idivmod
1192	cmp	r1, #0
1193	beq	LSYM(Ldiv0)
1194	push	{r0, r1, lr}
1195	bl	LSYM(divsi3_skip_div0_test)
1196	POP	{r1, r2, r3}
1197	mul	r2, r0
1198	sub	r1, r1, r2
1199	bx	r3
1200#elif defined(__ARM_ARCH_EXT_IDIV__)
1201ARM_FUNC_START aeabi_idivmod
1202	cmp 	r1, #0
1203	beq	LSYM(Ldiv0)
1204	mov     r2, r0
1205	sdiv	r0, r0, r1
1206	mls     r1, r0, r1, r2
1207	RET
1208#else
1209ARM_FUNC_START aeabi_idivmod
1210	cmp	r1, #0
1211	beq	LSYM(Ldiv0)
1212	stmfd	sp!, { r0, r1, lr }
1213	bl	LSYM(divsi3_skip_div0_test)
1214	ldmfd	sp!, { r1, r2, lr }
1215	mul	r3, r2, r0
1216	sub	r1, r1, r3
1217	RET
1218#endif
1219	FUNC_END aeabi_idivmod
1220
1221#endif /* L_divsi3 */
1222/* ------------------------------------------------------------------------ */
1223#ifdef L_modsi3
1224
1225#if defined(__ARM_ARCH_EXT_IDIV__)
1226
1227	ARM_FUNC_START modsi3
1228
1229	cmp	r1, #0
1230	beq	LSYM(Ldiv0)
1231
1232	sdiv	r2, r0, r1
1233	mls     r0, r1, r2, r0
1234	RET
1235
1236#elif defined(__thumb__)
1237
1238	FUNC_START modsi3
1239
1240	mov	curbit, #1
1241	cmp	divisor, #0
1242	beq	LSYM(Ldiv0)
1243	bpl	LSYM(Lover10)
1244	neg	divisor, divisor		@ Loops below use unsigned.
1245LSYM(Lover10):
1246	push	{ work }
1247	@ Need to save the sign of the dividend, unfortunately, we need
1248	@ work later on.  Must do this after saving the original value of
1249	@ the work register, because we will pop this value off first.
1250	push	{ dividend }
1251	cmp	dividend, #0
1252	bpl	LSYM(Lover11)
1253	neg	dividend, dividend
1254LSYM(Lover11):
1255	cmp	dividend, divisor
1256	blo	LSYM(Lgot_result)
1257
1258	THUMB_DIV_MOD_BODY 1
1259
1260	pop	{ work }
1261	cmp	work, #0
1262	bpl	LSYM(Lover12)
1263	neg	dividend, dividend
1264LSYM(Lover12):
1265	pop	{ work }
1266	RET
1267
1268#else /* ARM version.  */
1269
1270	FUNC_START modsi3
1271
1272	cmp	r1, #0
1273	beq	LSYM(Ldiv0)
1274	rsbmi	r1, r1, #0			@ loops below use unsigned.
1275	movs	ip, r0				@ preserve sign of dividend
1276	rsbmi	r0, r0, #0			@ if negative make positive
1277	subs	r2, r1, #1			@ compare divisor with 1
1278	cmpne	r0, r1				@ compare dividend with divisor
1279	moveq	r0, #0
1280	tsthi	r1, r2				@ see if divisor is power of 2
1281	andeq	r0, r0, r2
1282	bls	10f
1283
1284	ARM_MOD_BODY r0, r1, r2, r3
1285
128610:	cmp	ip, #0
1287	rsbmi	r0, r0, #0
1288	RET
1289
1290#endif /* ARM version */
1291
1292	DIV_FUNC_END modsi3 signed
1293
1294#endif /* L_modsi3 */
1295/* ------------------------------------------------------------------------ */
1296#ifdef L_dvmd_tls
1297
1298#ifdef __ARM_EABI__
1299	WEAK aeabi_idiv0
1300	WEAK aeabi_ldiv0
1301	FUNC_START aeabi_idiv0
1302	FUNC_START aeabi_ldiv0
1303	RET
1304	FUNC_END aeabi_ldiv0
1305	FUNC_END aeabi_idiv0
1306#else
1307	FUNC_START div0
1308	RET
1309	FUNC_END div0
1310#endif
1311
1312#endif /* L_divmodsi_tools */
1313/* ------------------------------------------------------------------------ */
1314#ifdef L_dvmd_lnx
1315@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
1316
1317/* Constant taken from <asm/signal.h>.  */
1318#define SIGFPE	8
1319
1320#ifdef __ARM_EABI__
1321	WEAK aeabi_idiv0
1322	WEAK aeabi_ldiv0
1323	ARM_FUNC_START aeabi_idiv0
1324	ARM_FUNC_START aeabi_ldiv0
1325#else
1326	ARM_FUNC_START div0
1327#endif
1328
1329	do_push	{r1, lr}
1330	mov	r0, #SIGFPE
1331	bl	SYM(raise) __PLT__
1332	RETLDM	r1
1333
1334#ifdef __ARM_EABI__
1335	FUNC_END aeabi_ldiv0
1336	FUNC_END aeabi_idiv0
1337#else
1338	FUNC_END div0
1339#endif
1340
1341#endif /* L_dvmd_lnx */
1342#ifdef L_clear_cache
1343#if defined __ARM_EABI__ && defined __linux__
1344@ EABI GNU/Linux call to cacheflush syscall.
1345	ARM_FUNC_START clear_cache
1346	do_push	{r7}
1347#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
1348	movw	r7, #2
1349	movt	r7, #0xf
1350#else
1351	mov	r7, #0xf0000
1352	add	r7, r7, #2
1353#endif
1354	mov	r2, #0
1355	swi	0
1356	do_pop	{r7}
1357	RET
1358	FUNC_END clear_cache
1359#else
1360#error "This is only for ARM EABI GNU/Linux"
1361#endif
1362#endif /* L_clear_cache */
1363/* ------------------------------------------------------------------------ */
1364/* Dword shift operations.  */
1365/* All the following Dword shift variants rely on the fact that
1366	shft xxx, Reg
1367   is in fact done as
1368	shft xxx, (Reg & 255)
1369   so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1370   case of logical shifts) or the sign (for asr).  */
1371
1372#ifdef __ARMEB__
1373#define al	r1
1374#define ah	r0
1375#else
1376#define al	r0
1377#define ah	r1
1378#endif
1379
1380/* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
1381#ifndef __symbian__
1382
1383#ifdef L_lshrdi3
1384
1385	FUNC_START lshrdi3
1386	FUNC_ALIAS aeabi_llsr lshrdi3
1387
1388#ifdef __thumb__
1389	lsr	al, r2
1390	mov	r3, ah
1391	lsr	ah, r2
1392	mov	ip, r3
1393	sub	r2, #32
1394	lsr	r3, r2
1395	orr	al, r3
1396	neg	r2, r2
1397	mov	r3, ip
1398	lsl	r3, r2
1399	orr	al, r3
1400	RET
1401#else
1402	subs	r3, r2, #32
1403	rsb	ip, r2, #32
1404	movmi	al, al, lsr r2
1405	movpl	al, ah, lsr r3
1406	orrmi	al, al, ah, lsl ip
1407	mov	ah, ah, lsr r2
1408	RET
1409#endif
1410	FUNC_END aeabi_llsr
1411	FUNC_END lshrdi3
1412
1413#endif
1414
1415#ifdef L_ashrdi3
1416
1417	FUNC_START ashrdi3
1418	FUNC_ALIAS aeabi_lasr ashrdi3
1419
1420#ifdef __thumb__
1421	lsr	al, r2
1422	mov	r3, ah
1423	asr	ah, r2
1424	sub	r2, #32
1425	@ If r2 is negative at this point the following step would OR
1426	@ the sign bit into all of AL.  That's not what we want...
1427	bmi	1f
1428	mov	ip, r3
1429	asr	r3, r2
1430	orr	al, r3
1431	mov	r3, ip
14321:
1433	neg	r2, r2
1434	lsl	r3, r2
1435	orr	al, r3
1436	RET
1437#else
1438	subs	r3, r2, #32
1439	rsb	ip, r2, #32
1440	movmi	al, al, lsr r2
1441	movpl	al, ah, asr r3
1442	orrmi	al, al, ah, lsl ip
1443	mov	ah, ah, asr r2
1444	RET
1445#endif
1446
1447	FUNC_END aeabi_lasr
1448	FUNC_END ashrdi3
1449
1450#endif
1451
1452#ifdef L_ashldi3
1453
1454	FUNC_START ashldi3
1455	FUNC_ALIAS aeabi_llsl ashldi3
1456
1457#ifdef __thumb__
1458	lsl	ah, r2
1459	mov	r3, al
1460	lsl	al, r2
1461	mov	ip, r3
1462	sub	r2, #32
1463	lsl	r3, r2
1464	orr	ah, r3
1465	neg	r2, r2
1466	mov	r3, ip
1467	lsr	r3, r2
1468	orr	ah, r3
1469	RET
1470#else
1471	subs	r3, r2, #32
1472	rsb	ip, r2, #32
1473	movmi	ah, ah, lsl r2
1474	movpl	ah, al, lsl r3
1475	orrmi	ah, ah, al, lsr ip
1476	mov	al, al, lsl r2
1477	RET
1478#endif
1479	FUNC_END aeabi_llsl
1480	FUNC_END ashldi3
1481
1482#endif
1483
1484#endif /* __symbian__ */
1485
1486#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \
1487    || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
1488    || defined(__ARM_ARCH_5TEJ__)
1489#define HAVE_ARM_CLZ 1
1490#endif
1491
1492#ifdef L_clzsi2
1493#if defined(__ARM_ARCH_6M__)
1494FUNC_START clzsi2
1495	mov	r1, #28
1496	mov	r3, #1
1497	lsl	r3, r3, #16
1498	cmp	r0, r3 /* 0x10000 */
1499	bcc	2f
1500	lsr	r0, r0, #16
1501	sub	r1, r1, #16
15022:	lsr	r3, r3, #8
1503	cmp	r0, r3 /* #0x100 */
1504	bcc	2f
1505	lsr	r0, r0, #8
1506	sub	r1, r1, #8
15072:	lsr	r3, r3, #4
1508	cmp	r0, r3 /* #0x10 */
1509	bcc	2f
1510	lsr	r0, r0, #4
1511	sub	r1, r1, #4
15122:	adr	r2, 1f
1513	ldrb	r0, [r2, r0]
1514	add	r0, r0, r1
1515	bx lr
1516.align 2
15171:
1518.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1519	FUNC_END clzsi2
1520#else
1521ARM_FUNC_START clzsi2
1522# if defined(HAVE_ARM_CLZ)
1523	clz	r0, r0
1524	RET
1525# else
1526	mov	r1, #28
1527	cmp	r0, #0x10000
1528	do_it	cs, t
1529	movcs	r0, r0, lsr #16
1530	subcs	r1, r1, #16
1531	cmp	r0, #0x100
1532	do_it	cs, t
1533	movcs	r0, r0, lsr #8
1534	subcs	r1, r1, #8
1535	cmp	r0, #0x10
1536	do_it	cs, t
1537	movcs	r0, r0, lsr #4
1538	subcs	r1, r1, #4
1539	adr	r2, 1f
1540	ldrb	r0, [r2, r0]
1541	add	r0, r0, r1
1542	RET
1543.align 2
15441:
1545.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1546# endif /* !HAVE_ARM_CLZ */
1547	FUNC_END clzsi2
1548#endif
1549#endif /* L_clzsi2 */
1550
1551#ifdef L_clzdi2
1552#if !defined(HAVE_ARM_CLZ)
1553
1554# if defined(__ARM_ARCH_6M__)
1555FUNC_START clzdi2
1556	push	{r4, lr}
1557# else
1558ARM_FUNC_START clzdi2
1559	do_push	{r4, lr}
1560# endif
1561	cmp	xxh, #0
1562	bne	1f
1563# ifdef __ARMEB__
1564	mov	r0, xxl
1565	bl	__clzsi2
1566	add	r0, r0, #32
1567	b 2f
15681:
1569	bl	__clzsi2
1570# else
1571	bl	__clzsi2
1572	add	r0, r0, #32
1573	b 2f
15741:
1575	mov	r0, xxh
1576	bl	__clzsi2
1577# endif
15782:
1579# if defined(__ARM_ARCH_6M__)
1580	pop	{r4, pc}
1581# else
1582	RETLDM	r4
1583# endif
1584	FUNC_END clzdi2
1585
1586#else /* HAVE_ARM_CLZ */
1587
1588ARM_FUNC_START clzdi2
1589	cmp	xxh, #0
1590	do_it	eq, et
1591	clzeq	r0, xxl
1592	clzne	r0, xxh
1593	addeq	r0, r0, #32
1594	RET
1595	FUNC_END clzdi2
1596
1597#endif
1598#endif /* L_clzdi2 */
1599
1600#ifdef L_ctzsi2
1601#if defined(__ARM_ARCH_6M__)
1602FUNC_START ctzsi2
1603	neg	r1, r0
1604	and	r0, r0, r1
1605	mov	r1, #28
1606	mov	r3, #1
1607	lsl	r3, r3, #16
1608	cmp	r0, r3 /* 0x10000 */
1609	bcc	2f
1610	lsr	r0, r0, #16
1611	sub	r1, r1, #16
16122:	lsr	r3, r3, #8
1613	cmp	r0, r3 /* #0x100 */
1614	bcc	2f
1615	lsr	r0, r0, #8
1616	sub	r1, r1, #8
16172:	lsr	r3, r3, #4
1618	cmp	r0, r3 /* #0x10 */
1619	bcc	2f
1620	lsr	r0, r0, #4
1621	sub	r1, r1, #4
16222:	adr	r2, 1f
1623	ldrb	r0, [r2, r0]
1624	sub	r0, r0, r1
1625	bx lr
1626.align 2
16271:
1628.byte	27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1629	FUNC_END ctzsi2
1630#else
1631ARM_FUNC_START ctzsi2
1632	rsb	r1, r0, #0
1633	and	r0, r0, r1
1634# if defined(HAVE_ARM_CLZ)
1635	clz	r0, r0
1636	rsb	r0, r0, #31
1637	RET
1638# else
1639	mov	r1, #28
1640	cmp	r0, #0x10000
1641	do_it	cs, t
1642	movcs	r0, r0, lsr #16
1643	subcs	r1, r1, #16
1644	cmp	r0, #0x100
1645	do_it	cs, t
1646	movcs	r0, r0, lsr #8
1647	subcs	r1, r1, #8
1648	cmp	r0, #0x10
1649	do_it	cs, t
1650	movcs	r0, r0, lsr #4
1651	subcs	r1, r1, #4
1652	adr	r2, 1f
1653	ldrb	r0, [r2, r0]
1654	sub	r0, r0, r1
1655	RET
1656.align 2
16571:
1658.byte	27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1659# endif /* !HAVE_ARM_CLZ */
1660	FUNC_END ctzsi2
1661#endif
1662#endif /* L_clzsi2 */
1663
1664/* ------------------------------------------------------------------------ */
1665/* These next two sections are here despite the fact that they contain Thumb
1666   assembler because their presence allows interworked code to be linked even
1667   when the GCC library is this one.  */
1668
1669/* Do not build the interworking functions when the target architecture does
1670   not support Thumb instructions.  (This can be a multilib option).  */
1671#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1672      || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1673      || __ARM_ARCH__ >= 6
1674
1675#if defined L_call_via_rX
1676
1677/* These labels & instructions are used by the Arm/Thumb interworking code.
1678   The address of function to be called is loaded into a register and then
1679   one of these labels is called via a BL instruction.  This puts the
1680   return address into the link register with the bottom bit set, and the
1681   code here switches to the correct mode before executing the function.  */
1682
1683	.text
1684	.align 0
1685        .force_thumb
1686
1687.macro call_via register
1688	THUMB_FUNC_START _call_via_\register
1689
1690	bx	\register
1691	nop
1692
1693	SIZE	(_call_via_\register)
1694.endm
1695
1696	call_via r0
1697	call_via r1
1698	call_via r2
1699	call_via r3
1700	call_via r4
1701	call_via r5
1702	call_via r6
1703	call_via r7
1704	call_via r8
1705	call_via r9
1706	call_via sl
1707	call_via fp
1708	call_via ip
1709	call_via sp
1710	call_via lr
1711
1712#endif /* L_call_via_rX */
1713
1714/* Don't bother with the old interworking routines for Thumb-2.  */
1715/* ??? Maybe only omit these on "m" variants.  */
1716#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__)
1717
1718#if defined L_interwork_call_via_rX
1719
1720/* These labels & instructions are used by the Arm/Thumb interworking code,
1721   when the target address is in an unknown instruction set.  The address
1722   of function to be called is loaded into a register and then one of these
1723   labels is called via a BL instruction.  This puts the return address
1724   into the link register with the bottom bit set, and the code here
1725   switches to the correct mode before executing the function.  Unfortunately
1726   the target code cannot be relied upon to return via a BX instruction, so
1727   instead we have to store the resturn address on the stack and allow the
1728   called function to return here instead.  Upon return we recover the real
1729   return address and use a BX to get back to Thumb mode.
1730
1731   There are three variations of this code.  The first,
1732   _interwork_call_via_rN(), will push the return address onto the
1733   stack and pop it in _arm_return().  It should only be used if all
1734   arguments are passed in registers.
1735
1736   The second, _interwork_r7_call_via_rN(), instead stores the return
1737   address at [r7, #-4].  It is the caller's responsibility to ensure
1738   that this address is valid and contains no useful data.
1739
1740   The third, _interwork_r11_call_via_rN(), works in the same way but
1741   uses r11 instead of r7.  It is useful if the caller does not really
1742   need a frame pointer.  */
1743
1744	.text
1745	.align 0
1746
1747	.code   32
1748	.globl _arm_return
1749LSYM(Lstart_arm_return):
1750	cfi_start	LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1751	cfi_push	0, 0xe, -0x8, 0x8
1752	nop	@ This nop is for the benefit of debuggers, so that
1753		@ backtraces will use the correct unwind information.
1754_arm_return:
1755	RETLDM	unwind=LSYM(Lstart_arm_return)
1756	cfi_end	LSYM(Lend_arm_return)
1757
1758	.globl _arm_return_r7
1759_arm_return_r7:
1760	ldr	lr, [r7, #-4]
1761	bx	lr
1762
1763	.globl _arm_return_r11
1764_arm_return_r11:
1765	ldr	lr, [r11, #-4]
1766	bx	lr
1767
1768.macro interwork_with_frame frame, register, name, return
1769	.code	16
1770
1771	THUMB_FUNC_START \name
1772
1773	bx	pc
1774	nop
1775
1776	.code	32
1777	tst	\register, #1
1778	streq	lr, [\frame, #-4]
1779	adreq	lr, _arm_return_\frame
1780	bx	\register
1781
1782	SIZE	(\name)
1783.endm
1784
1785.macro interwork register
1786	.code	16
1787
1788	THUMB_FUNC_START _interwork_call_via_\register
1789
1790	bx	pc
1791	nop
1792
1793	.code	32
1794	.globl LSYM(Lchange_\register)
1795LSYM(Lchange_\register):
1796	tst	\register, #1
1797	streq	lr, [sp, #-8]!
1798	adreq	lr, _arm_return
1799	bx	\register
1800
1801	SIZE	(_interwork_call_via_\register)
1802
1803	interwork_with_frame r7,\register,_interwork_r7_call_via_\register
1804	interwork_with_frame r11,\register,_interwork_r11_call_via_\register
1805.endm
1806
1807	interwork r0
1808	interwork r1
1809	interwork r2
1810	interwork r3
1811	interwork r4
1812	interwork r5
1813	interwork r6
1814	interwork r7
1815	interwork r8
1816	interwork r9
1817	interwork sl
1818	interwork fp
1819	interwork ip
1820	interwork sp
1821
1822	/* The LR case has to be handled a little differently...  */
1823	.code 16
1824
1825	THUMB_FUNC_START _interwork_call_via_lr
1826
1827	bx 	pc
1828	nop
1829
1830	.code 32
1831	.globl .Lchange_lr
1832.Lchange_lr:
1833	tst	lr, #1
1834	stmeqdb	r13!, {lr, pc}
1835	mov	ip, lr
1836	adreq	lr, _arm_return
1837	bx	ip
1838
1839	SIZE	(_interwork_call_via_lr)
1840
1841#endif /* L_interwork_call_via_rX */
1842#endif /* !__thumb2__ */
1843
1844/* Functions to support compact pic switch tables in thumb1 state.
1845   All these routines take an index into the table in r0.  The
1846   table is at LR & ~1 (but this must be rounded up in the case
1847   of 32-bit entires).  They are only permitted to clobber r12
1848   and r14 and r0 must be preserved on exit.  */
1849#ifdef L_thumb1_case_sqi
1850
1851	.text
1852	.align 0
1853        .force_thumb
1854	.syntax unified
1855	THUMB_FUNC_START __gnu_thumb1_case_sqi
1856	push	{r1}
1857	mov	r1, lr
1858	lsrs	r1, r1, #1
1859	lsls	r1, r1, #1
1860	ldrsb	r1, [r1, r0]
1861	lsls	r1, r1, #1
1862	add	lr, lr, r1
1863	pop	{r1}
1864	bx	lr
1865	SIZE (__gnu_thumb1_case_sqi)
1866#endif
1867
1868#ifdef L_thumb1_case_uqi
1869
1870	.text
1871	.align 0
1872        .force_thumb
1873	.syntax unified
1874	THUMB_FUNC_START __gnu_thumb1_case_uqi
1875	push	{r1}
1876	mov	r1, lr
1877	lsrs	r1, r1, #1
1878	lsls	r1, r1, #1
1879	ldrb	r1, [r1, r0]
1880	lsls	r1, r1, #1
1881	add	lr, lr, r1
1882	pop	{r1}
1883	bx	lr
1884	SIZE (__gnu_thumb1_case_uqi)
1885#endif
1886
1887#ifdef L_thumb1_case_shi
1888
1889	.text
1890	.align 0
1891        .force_thumb
1892	.syntax unified
1893	THUMB_FUNC_START __gnu_thumb1_case_shi
1894	push	{r0, r1}
1895	mov	r1, lr
1896	lsrs	r1, r1, #1
1897	lsls	r0, r0, #1
1898	lsls	r1, r1, #1
1899	ldrsh	r1, [r1, r0]
1900	lsls	r1, r1, #1
1901	add	lr, lr, r1
1902	pop	{r0, r1}
1903	bx	lr
1904	SIZE (__gnu_thumb1_case_shi)
1905#endif
1906
1907#ifdef L_thumb1_case_uhi
1908
1909	.text
1910	.align 0
1911        .force_thumb
1912	.syntax unified
1913	THUMB_FUNC_START __gnu_thumb1_case_uhi
1914	push	{r0, r1}
1915	mov	r1, lr
1916	lsrs	r1, r1, #1
1917	lsls	r0, r0, #1
1918	lsls	r1, r1, #1
1919	ldrh	r1, [r1, r0]
1920	lsls	r1, r1, #1
1921	add	lr, lr, r1
1922	pop	{r0, r1}
1923	bx	lr
1924	SIZE (__gnu_thumb1_case_uhi)
1925#endif
1926
1927#ifdef L_thumb1_case_si
1928
1929	.text
1930	.align 0
1931        .force_thumb
1932	.syntax unified
1933	THUMB_FUNC_START __gnu_thumb1_case_si
1934	push	{r0, r1}
1935	mov	r1, lr
1936	adds.n	r1, r1, #2	/* Align to word.  */
1937	lsrs	r1, r1, #2
1938	lsls	r0, r0, #2
1939	lsls	r1, r1, #2
1940	ldr	r0, [r1, r0]
1941	adds	r0, r0, r1
1942	mov	lr, r0
1943	pop	{r0, r1}
1944	mov	pc, lr		/* We know we were called from thumb code.  */
1945	SIZE (__gnu_thumb1_case_si)
1946#endif
1947
1948#endif /* Arch supports thumb.  */
1949
1950#ifndef __symbian__
1951#ifndef __ARM_ARCH_6M__
1952#include "ieee754-df.S"
1953#include "ieee754-sf.S"
1954#include "bpabi.S"
1955#else /* __ARM_ARCH_6M__ */
1956#include "bpabi-v6m.S"
1957#endif /* __ARM_ARCH_6M__ */
1958#endif /* !__symbian__ */
1959