lib1funcs.asm revision 275337
1289284Srpaulo@ libgcc routines for ARM cpu.
2289284Srpaulo@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
3289284Srpaulo
4289284Srpaulo/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
5289284Srpaulo   Free Software Foundation, Inc.
6289284Srpaulo
7289284SrpauloThis file is free software; you can redistribute it and/or modify it
8289284Srpaulounder the terms of the GNU General Public License as published by the
9289284SrpauloFree Software Foundation; either version 2, or (at your option) any
10289284Srpaulolater version.
11289284Srpaulo
12289284SrpauloIn addition to the permissions in the GNU General Public License, the
13289284SrpauloFree Software Foundation gives you unlimited permission to link the
14289284Srpaulocompiled version of this file into combinations with other programs,
15289284Srpauloand to distribute those combinations without any restriction coming
16289284Srpaulofrom the use of this file.  (The General Public License restrictions
17289284Srpaulodo apply in other respects; for example, they cover modification of
18289284Srpaulothe file, and distribution when not linked into a combine
19289284Srpauloexecutable.)
20289284Srpaulo
21289284SrpauloThis file is distributed in the hope that it will be useful, but
22346981ScyWITHOUT ANY WARRANTY; without even the implied warranty of
23346981ScyMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24346981ScyGeneral Public License for more details.
25346981Scy
26346981ScyYou should have received a copy of the GNU General Public License
27346981Scyalong with this program; see the file COPYING.  If not, write to
28346981Scythe Free Software Foundation, 51 Franklin Street, Fifth Floor,
29346981ScyBoston, MA 02110-1301, USA.  */
30346981Scy/* ------------------------------------------------------------------------ */
31346981Scy
32346981Scy/* We need to know what prefix to add to function names.  */
33346981Scy
34289284Srpaulo#ifndef __USER_LABEL_PREFIX__
35289284Srpaulo#error  __USER_LABEL_PREFIX__ not defined
36289284Srpaulo#endif
37289284Srpaulo
38289284Srpaulo/* ANSI concatenation macros.  */
39289284Srpaulo
40289284Srpaulo#define CONCAT1(a, b) CONCAT2(a, b)
41289284Srpaulo#define CONCAT2(a, b) a ## b
42289284Srpaulo
43289284Srpaulo/* Use the right prefix for global labels.  */
44289284Srpaulo
45289284Srpaulo#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
46289284Srpaulo
47289284Srpaulo#ifdef __ELF__
48289284Srpaulo#ifdef __thumb__
49289284Srpaulo#define __PLT__  /* Not supported in Thumb assembler (for now).  */
50289284Srpaulo#else
51289284Srpaulo#define __PLT__ (PLT)
52289284Srpaulo#endif
53289284Srpaulo#define TYPE(x) .type SYM(x),function
54289284Srpaulo#define SIZE(x) .size SYM(x), . - SYM(x)
55289284Srpaulo#define LSYM(x) .x
56289284Srpaulo#else
57289284Srpaulo#define __PLT__
58289284Srpaulo#define TYPE(x)
59289284Srpaulo#define SIZE(x)
60289284Srpaulo#define LSYM(x) x
61289284Srpaulo#endif
62289284Srpaulo
63289284Srpaulo/* Function end macros.  Variants for interworking.  */
64289284Srpaulo
65289284Srpaulo@ This selects the minimum architecture level required.
66289284Srpaulo#define __ARM_ARCH__ 3
67289284Srpaulo
68289284Srpaulo#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
69289284Srpaulo	|| defined(__ARM_ARCH_4T__)
70289284Srpaulo/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
71289284Srpaulo   long multiply instructions.  That includes v3M.  */
72289284Srpaulo# undef __ARM_ARCH__
73289284Srpaulo# define __ARM_ARCH__ 4
74289284Srpaulo#endif
75289284Srpaulo	
76289284Srpaulo#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
77289284Srpaulo	|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
78289284Srpaulo	|| defined(__ARM_ARCH_5TEJ__)
79289284Srpaulo# undef __ARM_ARCH__
80289284Srpaulo# define __ARM_ARCH__ 5
81289284Srpaulo#endif
82289284Srpaulo
83289284Srpaulo#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
84289284Srpaulo	|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
85289284Srpaulo	|| defined(__ARM_ARCH_6ZK__)
86289284Srpaulo# undef __ARM_ARCH__
87289284Srpaulo# define __ARM_ARCH__ 6
88289284Srpaulo#endif
89289284Srpaulo
90289284Srpaulo#ifndef __ARM_ARCH__
91289284Srpaulo#error Unable to determine architecture.
92289284Srpaulo#endif
93289284Srpaulo
94289284Srpaulo/* How to return from a function call depends on the architecture variant.  */
95289284Srpaulo
96289284Srpaulo#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
97289284Srpaulo
98289284Srpaulo# define RET		bx	lr
99289284Srpaulo# define RETc(x)	bx##x	lr
100289284Srpaulo
101289284Srpaulo/* Special precautions for interworking on armv4t.  */
102289284Srpaulo# if (__ARM_ARCH__ == 4)
103289284Srpaulo
104289284Srpaulo/* Always use bx, not ldr pc.  */
105289284Srpaulo#  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
106289284Srpaulo#    define __INTERWORKING__
107289284Srpaulo#   endif /* __THUMB__ || __THUMB_INTERWORK__ */
108289284Srpaulo
109289284Srpaulo/* Include thumb stub before arm mode code.  */
110289284Srpaulo#  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
111289284Srpaulo#   define __INTERWORKING_STUBS__
112289284Srpaulo#  endif /* __thumb__ && !__THUMB_INTERWORK__ */
113289284Srpaulo
114289284Srpaulo#endif /* __ARM_ARCH == 4 */
115289284Srpaulo
116289284Srpaulo#else
117289284Srpaulo
118289284Srpaulo# define RET		mov	pc, lr
119289284Srpaulo# define RETc(x)	mov##x	pc, lr
120289284Srpaulo
121289284Srpaulo#endif
122289284Srpaulo
123289284Srpaulo.macro	cfi_pop		advance, reg, cfa_offset
124289284Srpaulo#ifdef __ELF__
125289284Srpaulo	.pushsection	.debug_frame
126289284Srpaulo	.byte	0x4		/* DW_CFA_advance_loc4 */
127289284Srpaulo	.4byte	\advance
128289284Srpaulo	.byte	(0xc0 | \reg)	/* DW_CFA_restore */
129289284Srpaulo	.byte	0xe		/* DW_CFA_def_cfa_offset */
130289284Srpaulo	.uleb128 \cfa_offset
131289284Srpaulo	.popsection
132289284Srpaulo#endif
133289284Srpaulo.endm
134289284Srpaulo.macro	cfi_push	advance, reg, offset, cfa_offset
135289284Srpaulo#ifdef __ELF__
136289284Srpaulo	.pushsection	.debug_frame
137289284Srpaulo	.byte	0x4		/* DW_CFA_advance_loc4 */
138289284Srpaulo	.4byte	\advance
139289284Srpaulo	.byte	(0x80 | \reg)	/* DW_CFA_offset */
140289284Srpaulo	.uleb128 (\offset / -4)
141289284Srpaulo	.byte	0xe		/* DW_CFA_def_cfa_offset */
142289284Srpaulo	.uleb128 \cfa_offset
143289284Srpaulo	.popsection
144289284Srpaulo#endif
145289284Srpaulo.endm
146289284Srpaulo.macro cfi_start	start_label, end_label
147289284Srpaulo#ifdef __ELF__
148289284Srpaulo	.pushsection	.debug_frame
149289284SrpauloLSYM(Lstart_frame):
150289284Srpaulo	.4byte	LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
151289284SrpauloLSYM(Lstart_cie):
152289284Srpaulo        .4byte	0xffffffff	@ CIE Identifier Tag
153289284Srpaulo        .byte	0x1	@ CIE Version
154289284Srpaulo        .ascii	"\0"	@ CIE Augmentation
155289284Srpaulo        .uleb128 0x1	@ CIE Code Alignment Factor
156289284Srpaulo        .sleb128 -4	@ CIE Data Alignment Factor
157289284Srpaulo        .byte	0xe	@ CIE RA Column
158289284Srpaulo        .byte	0xc	@ DW_CFA_def_cfa
159289284Srpaulo        .uleb128 0xd
160289284Srpaulo        .uleb128 0x0
161289284Srpaulo
162289284Srpaulo	.align 2
163289284SrpauloLSYM(Lend_cie):
164289284Srpaulo	.4byte	LSYM(Lend_fde)-LSYM(Lstart_fde)	@ FDE Length
165289284SrpauloLSYM(Lstart_fde):
166289284Srpaulo	.4byte	LSYM(Lstart_frame)	@ FDE CIE offset
167289284Srpaulo	.4byte	\start_label	@ FDE initial location
168289284Srpaulo	.4byte	\end_label-\start_label	@ FDE address range
169289284Srpaulo	.popsection
170289284Srpaulo#endif
171289284Srpaulo.endm
172289284Srpaulo.macro cfi_end	end_label
173289284Srpaulo#ifdef __ELF__
174289284Srpaulo	.pushsection	.debug_frame
175289284Srpaulo	.align	2
176289284SrpauloLSYM(Lend_fde):
177289284Srpaulo	.popsection
178289284Srpaulo\end_label:
179289284Srpaulo#endif
180289284Srpaulo.endm
181289284Srpaulo
182289284Srpaulo/* Don't pass dirn, it's there just to get token pasting right.  */
183289284Srpaulo
184289284Srpaulo.macro	RETLDM	regs=, cond=, unwind=, dirn=ia
185289284Srpaulo#if defined (__INTERWORKING__)
186289284Srpaulo	.ifc "\regs",""
187289284Srpaulo	ldr\cond	lr, [sp], #8
188289284Srpaulo	.else
189289284Srpaulo	ldm\cond\dirn	sp!, {\regs, lr}
190289284Srpaulo	.endif
191289284Srpaulo	.ifnc "\unwind", ""
192289284Srpaulo	/* Mark LR as restored.  */
193289284Srpaulo97:	cfi_pop 97b - \unwind, 0xe, 0x0
194289284Srpaulo	.endif
195289284Srpaulo	bx\cond	lr
196289284Srpaulo#else
197289284Srpaulo	.ifc "\regs",""
198289284Srpaulo	ldr\cond	pc, [sp], #8
199289284Srpaulo	.else
200289284Srpaulo	ldm\cond\dirn	sp!, {\regs, pc}
201289284Srpaulo	.endif
202289284Srpaulo#endif
203289284Srpaulo.endm
204289284Srpaulo
205289284Srpaulo
206289284Srpaulo.macro ARM_LDIV0 name
207289284Srpaulo	str	lr, [sp, #-8]!
208289284Srpaulo98:	cfi_push 98b - __\name, 0xe, -0x8, 0x8
209289284Srpaulo	bl	SYM (__div0) __PLT__
210289284Srpaulo	mov	r0, #0			@ About as wrong as it could be.
211289284Srpaulo	RETLDM	unwind=98b
212289284Srpaulo.endm
213289284Srpaulo
214289284Srpaulo
215289284Srpaulo.macro THUMB_LDIV0 name
216289284Srpaulo	push	{ r1, lr }
217289284Srpaulo98:	cfi_push 98b - __\name, 0xe, -0x4, 0x8
218289284Srpaulo	bl	SYM (__div0)
219289284Srpaulo	mov	r0, #0			@ About as wrong as it could be.
220289284Srpaulo#if defined (__INTERWORKING__)
221289284Srpaulo	pop	{ r1, r2 }
222289284Srpaulo	bx	r2
223289284Srpaulo#else
224289284Srpaulo	pop	{ r1, pc }
225289284Srpaulo#endif
226289284Srpaulo.endm
227289284Srpaulo
228289284Srpaulo.macro FUNC_END name
229289284Srpaulo	SIZE (__\name)
230289284Srpaulo.endm
231289284Srpaulo
232289284Srpaulo.macro DIV_FUNC_END name
233289284Srpaulo	cfi_start	__\name, LSYM(Lend_div0)
234289284SrpauloLSYM(Ldiv0):
235289284Srpaulo#ifdef __thumb__
236289284Srpaulo	THUMB_LDIV0 \name
237289284Srpaulo#else
238289284Srpaulo	ARM_LDIV0 \name
239289284Srpaulo#endif
240289284Srpaulo	cfi_end	LSYM(Lend_div0)
241289284Srpaulo	FUNC_END \name
242289284Srpaulo.endm
243289284Srpaulo
244289284Srpaulo.macro THUMB_FUNC_START name
245289284Srpaulo	.globl	SYM (\name)
246289284Srpaulo	TYPE	(\name)
247289284Srpaulo	.thumb_func
248289284SrpauloSYM (\name):
249289284Srpaulo.endm
250289284Srpaulo
251289284Srpaulo/* Function start macros.  Variants for ARM and Thumb.  */
252289284Srpaulo
253289284Srpaulo#ifdef __thumb__
254289284Srpaulo#define THUMB_FUNC .thumb_func
255289284Srpaulo#define THUMB_CODE .force_thumb
256289284Srpaulo#else
257289284Srpaulo#define THUMB_FUNC
258289284Srpaulo#define THUMB_CODE
259289284Srpaulo#endif
260289284Srpaulo	
261289284Srpaulo.macro FUNC_START name
262289284Srpaulo	.text
263289284Srpaulo	.globl SYM (__\name)
264289284Srpaulo	TYPE (__\name)
265289284Srpaulo	.align 0
266289284Srpaulo	THUMB_CODE
267289284Srpaulo	THUMB_FUNC
268289284SrpauloSYM (__\name):
269289284Srpaulo.endm
270289284Srpaulo
271289284Srpaulo/* Special function that will always be coded in ARM assembly, even if
272289284Srpaulo   in Thumb-only compilation.  */
273289284Srpaulo
274289284Srpaulo#if defined(__INTERWORKING_STUBS__)
275289284Srpaulo.macro	ARM_FUNC_START name
276289284Srpaulo	FUNC_START \name
277289284Srpaulo	bx	pc
278289284Srpaulo	nop
279289284Srpaulo	.arm
280289284Srpaulo/* A hook to tell gdb that we've switched to ARM mode.  Also used to call
281289284Srpaulo   directly from other local arm routines.  */
282289284Srpaulo_L__\name:		
283289284Srpaulo.endm
284289284Srpaulo#define EQUIV .thumb_set
285289284Srpaulo/* Branch directly to a function declared with ARM_FUNC_START.
286289284Srpaulo   Must be called in arm mode.  */
287289284Srpaulo.macro  ARM_CALL name
288289284Srpaulo	bl	_L__\name
289289284Srpaulo.endm
290289284Srpaulo#else
291289284Srpaulo.macro	ARM_FUNC_START name
292289284Srpaulo	.text
293289284Srpaulo	.globl SYM (__\name)
294289284Srpaulo	TYPE (__\name)
295289284Srpaulo	.align 0
296289284Srpaulo	.arm
297289284SrpauloSYM (__\name):
298289284Srpaulo.endm
299289284Srpaulo#define EQUIV .set
300289284Srpaulo.macro  ARM_CALL name
301289284Srpaulo	bl	__\name
302289284Srpaulo.endm
303289284Srpaulo#endif
304289284Srpaulo
305.macro	FUNC_ALIAS new old
306	.globl	SYM (__\new)
307#if defined (__thumb__)
308	.thumb_set	SYM (__\new), SYM (__\old)
309#else
310	.set	SYM (__\new), SYM (__\old)
311#endif
312.endm
313
314.macro	ARM_FUNC_ALIAS new old
315	.globl	SYM (__\new)
316	EQUIV	SYM (__\new), SYM (__\old)
317#if defined(__INTERWORKING_STUBS__)
318	.set	SYM (_L__\new), SYM (_L__\old)
319#endif
320.endm
321
322#ifdef __thumb__
323/* Register aliases.  */
324
325work		.req	r4	@ XXXX is this safe ?
326dividend	.req	r0
327divisor		.req	r1
328overdone	.req	r2
329result		.req	r2
330curbit		.req	r3
331#endif
332#if 0
333ip		.req	r12
334sp		.req	r13
335lr		.req	r14
336pc		.req	r15
337#endif
338
339/* ------------------------------------------------------------------------ */
340/*		Bodies of the division and modulo routines.		    */
341/* ------------------------------------------------------------------------ */	
342.macro ARM_DIV_BODY dividend, divisor, result, curbit
343
344#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
345
346	clz	\curbit, \dividend
347	clz	\result, \divisor
348	sub	\curbit, \result, \curbit
349	rsbs	\curbit, \curbit, #31
350	addne	\curbit, \curbit, \curbit, lsl #1
351	mov	\result, #0
352	addne	pc, pc, \curbit, lsl #2
353	nop
354	.set	shift, 32
355	.rept	32
356	.set	shift, shift - 1
357	cmp	\dividend, \divisor, lsl #shift
358	adc	\result, \result, \result
359	subcs	\dividend, \dividend, \divisor, lsl #shift
360	.endr
361
362#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
363#if __ARM_ARCH__ >= 5
364
365	clz	\curbit, \divisor
366	clz	\result, \dividend
367	sub	\result, \curbit, \result
368	mov	\curbit, #1
369	mov	\divisor, \divisor, lsl \result
370	mov	\curbit, \curbit, lsl \result
371	mov	\result, #0
372	
373#else /* __ARM_ARCH__ < 5 */
374
375	@ Initially shift the divisor left 3 bits if possible,
376	@ set curbit accordingly.  This allows for curbit to be located
377	@ at the left end of each 4 bit nibbles in the division loop
378	@ to save one loop in most cases.
379	tst	\divisor, #0xe0000000
380	moveq	\divisor, \divisor, lsl #3
381	moveq	\curbit, #8
382	movne	\curbit, #1
383
384	@ Unless the divisor is very big, shift it up in multiples of
385	@ four bits, since this is the amount of unwinding in the main
386	@ division loop.  Continue shifting until the divisor is 
387	@ larger than the dividend.
3881:	cmp	\divisor, #0x10000000
389	cmplo	\divisor, \dividend
390	movlo	\divisor, \divisor, lsl #4
391	movlo	\curbit, \curbit, lsl #4
392	blo	1b
393
394	@ For very big divisors, we must shift it a bit at a time, or
395	@ we will be in danger of overflowing.
3961:	cmp	\divisor, #0x80000000
397	cmplo	\divisor, \dividend
398	movlo	\divisor, \divisor, lsl #1
399	movlo	\curbit, \curbit, lsl #1
400	blo	1b
401
402	mov	\result, #0
403
404#endif /* __ARM_ARCH__ < 5 */
405
406	@ Division loop
4071:	cmp	\dividend, \divisor
408	subhs	\dividend, \dividend, \divisor
409	orrhs	\result,   \result,   \curbit
410	cmp	\dividend, \divisor,  lsr #1
411	subhs	\dividend, \dividend, \divisor, lsr #1
412	orrhs	\result,   \result,   \curbit,  lsr #1
413	cmp	\dividend, \divisor,  lsr #2
414	subhs	\dividend, \dividend, \divisor, lsr #2
415	orrhs	\result,   \result,   \curbit,  lsr #2
416	cmp	\dividend, \divisor,  lsr #3
417	subhs	\dividend, \dividend, \divisor, lsr #3
418	orrhs	\result,   \result,   \curbit,  lsr #3
419	cmp	\dividend, #0			@ Early termination?
420	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
421	movne	\divisor,  \divisor, lsr #4
422	bne	1b
423
424#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
425
426.endm
427/* ------------------------------------------------------------------------ */	
428.macro ARM_DIV2_ORDER divisor, order
429
430#if __ARM_ARCH__ >= 5
431
432	clz	\order, \divisor
433	rsb	\order, \order, #31
434
435#else
436
437	cmp	\divisor, #(1 << 16)
438	movhs	\divisor, \divisor, lsr #16
439	movhs	\order, #16
440	movlo	\order, #0
441
442	cmp	\divisor, #(1 << 8)
443	movhs	\divisor, \divisor, lsr #8
444	addhs	\order, \order, #8
445
446	cmp	\divisor, #(1 << 4)
447	movhs	\divisor, \divisor, lsr #4
448	addhs	\order, \order, #4
449
450	cmp	\divisor, #(1 << 2)
451	addhi	\order, \order, #3
452	addls	\order, \order, \divisor, lsr #1
453
454#endif
455
456.endm
457/* ------------------------------------------------------------------------ */
458.macro ARM_MOD_BODY dividend, divisor, order, spare
459
460#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
461
462	clz	\order, \divisor
463	clz	\spare, \dividend
464	sub	\order, \order, \spare
465	rsbs	\order, \order, #31
466	addne	pc, pc, \order, lsl #3
467	nop
468	.set	shift, 32
469	.rept	32
470	.set	shift, shift - 1
471	cmp	\dividend, \divisor, lsl #shift
472	subcs	\dividend, \dividend, \divisor, lsl #shift
473	.endr
474
475#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
476#if __ARM_ARCH__ >= 5
477
478	clz	\order, \divisor
479	clz	\spare, \dividend
480	sub	\order, \order, \spare
481	mov	\divisor, \divisor, lsl \order
482	
483#else /* __ARM_ARCH__ < 5 */
484
485	mov	\order, #0
486
487	@ Unless the divisor is very big, shift it up in multiples of
488	@ four bits, since this is the amount of unwinding in the main
489	@ division loop.  Continue shifting until the divisor is 
490	@ larger than the dividend.
4911:	cmp	\divisor, #0x10000000
492	cmplo	\divisor, \dividend
493	movlo	\divisor, \divisor, lsl #4
494	addlo	\order, \order, #4
495	blo	1b
496
497	@ For very big divisors, we must shift it a bit at a time, or
498	@ we will be in danger of overflowing.
4991:	cmp	\divisor, #0x80000000
500	cmplo	\divisor, \dividend
501	movlo	\divisor, \divisor, lsl #1
502	addlo	\order, \order, #1
503	blo	1b
504
505#endif /* __ARM_ARCH__ < 5 */
506
507	@ Perform all needed substractions to keep only the reminder.
508	@ Do comparisons in batch of 4 first.
509	subs	\order, \order, #3		@ yes, 3 is intended here
510	blt	2f
511
5121:	cmp	\dividend, \divisor
513	subhs	\dividend, \dividend, \divisor
514	cmp	\dividend, \divisor,  lsr #1
515	subhs	\dividend, \dividend, \divisor, lsr #1
516	cmp	\dividend, \divisor,  lsr #2
517	subhs	\dividend, \dividend, \divisor, lsr #2
518	cmp	\dividend, \divisor,  lsr #3
519	subhs	\dividend, \dividend, \divisor, lsr #3
520	cmp	\dividend, #1
521	mov	\divisor, \divisor, lsr #4
522	subges	\order, \order, #4
523	bge	1b
524
525	tst	\order, #3
526	teqne	\dividend, #0
527	beq	5f
528
529	@ Either 1, 2 or 3 comparison/substractions are left.
5302:	cmn	\order, #2
531	blt	4f
532	beq	3f
533	cmp	\dividend, \divisor
534	subhs	\dividend, \dividend, \divisor
535	mov	\divisor,  \divisor,  lsr #1
5363:	cmp	\dividend, \divisor
537	subhs	\dividend, \dividend, \divisor
538	mov	\divisor,  \divisor,  lsr #1
5394:	cmp	\dividend, \divisor
540	subhs	\dividend, \dividend, \divisor
5415:
542
543#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
544
545.endm
546/* ------------------------------------------------------------------------ */
547.macro THUMB_DIV_MOD_BODY modulo
548	@ Load the constant 0x10000000 into our work register.
549	mov	work, #1
550	lsl	work, #28
551LSYM(Loop1):
552	@ Unless the divisor is very big, shift it up in multiples of
553	@ four bits, since this is the amount of unwinding in the main
554	@ division loop.  Continue shifting until the divisor is 
555	@ larger than the dividend.
556	cmp	divisor, work
557	bhs	LSYM(Lbignum)
558	cmp	divisor, dividend
559	bhs	LSYM(Lbignum)
560	lsl	divisor, #4
561	lsl	curbit,  #4
562	b	LSYM(Loop1)
563LSYM(Lbignum):
564	@ Set work to 0x80000000
565	lsl	work, #3
566LSYM(Loop2):
567	@ For very big divisors, we must shift it a bit at a time, or
568	@ we will be in danger of overflowing.
569	cmp	divisor, work
570	bhs	LSYM(Loop3)
571	cmp	divisor, dividend
572	bhs	LSYM(Loop3)
573	lsl	divisor, #1
574	lsl	curbit,  #1
575	b	LSYM(Loop2)
576LSYM(Loop3):
577	@ Test for possible subtractions ...
578  .if \modulo
579	@ ... On the final pass, this may subtract too much from the dividend, 
580	@ so keep track of which subtractions are done, we can fix them up 
581	@ afterwards.
582	mov	overdone, #0
583	cmp	dividend, divisor
584	blo	LSYM(Lover1)
585	sub	dividend, dividend, divisor
586LSYM(Lover1):
587	lsr	work, divisor, #1
588	cmp	dividend, work
589	blo	LSYM(Lover2)
590	sub	dividend, dividend, work
591	mov	ip, curbit
592	mov	work, #1
593	ror	curbit, work
594	orr	overdone, curbit
595	mov	curbit, ip
596LSYM(Lover2):
597	lsr	work, divisor, #2
598	cmp	dividend, work
599	blo	LSYM(Lover3)
600	sub	dividend, dividend, work
601	mov	ip, curbit
602	mov	work, #2
603	ror	curbit, work
604	orr	overdone, curbit
605	mov	curbit, ip
606LSYM(Lover3):
607	lsr	work, divisor, #3
608	cmp	dividend, work
609	blo	LSYM(Lover4)
610	sub	dividend, dividend, work
611	mov	ip, curbit
612	mov	work, #3
613	ror	curbit, work
614	orr	overdone, curbit
615	mov	curbit, ip
616LSYM(Lover4):
617	mov	ip, curbit
618  .else
619	@ ... and note which bits are done in the result.  On the final pass,
620	@ this may subtract too much from the dividend, but the result will be ok,
621	@ since the "bit" will have been shifted out at the bottom.
622	cmp	dividend, divisor
623	blo	LSYM(Lover1)
624	sub	dividend, dividend, divisor
625	orr	result, result, curbit
626LSYM(Lover1):
627	lsr	work, divisor, #1
628	cmp	dividend, work
629	blo	LSYM(Lover2)
630	sub	dividend, dividend, work
631	lsr	work, curbit, #1
632	orr	result, work
633LSYM(Lover2):
634	lsr	work, divisor, #2
635	cmp	dividend, work
636	blo	LSYM(Lover3)
637	sub	dividend, dividend, work
638	lsr	work, curbit, #2
639	orr	result, work
640LSYM(Lover3):
641	lsr	work, divisor, #3
642	cmp	dividend, work
643	blo	LSYM(Lover4)
644	sub	dividend, dividend, work
645	lsr	work, curbit, #3
646	orr	result, work
647LSYM(Lover4):
648  .endif
649	
650	cmp	dividend, #0			@ Early termination?
651	beq	LSYM(Lover5)
652	lsr	curbit,  #4			@ No, any more bits to do?
653	beq	LSYM(Lover5)
654	lsr	divisor, #4
655	b	LSYM(Loop3)
656LSYM(Lover5):
657  .if \modulo
658	@ Any subtractions that we should not have done will be recorded in
659	@ the top three bits of "overdone".  Exactly which were not needed
660	@ are governed by the position of the bit, stored in ip.
661	mov	work, #0xe
662	lsl	work, #28
663	and	overdone, work
664	beq	LSYM(Lgot_result)
665	
666	@ If we terminated early, because dividend became zero, then the 
667	@ bit in ip will not be in the bottom nibble, and we should not
668	@ perform the additions below.  We must test for this though
669	@ (rather relying upon the TSTs to prevent the additions) since
670	@ the bit in ip could be in the top two bits which might then match
671	@ with one of the smaller RORs.
672	mov	curbit, ip
673	mov	work, #0x7
674	tst	curbit, work
675	beq	LSYM(Lgot_result)
676	
677	mov	curbit, ip
678	mov	work, #3
679	ror	curbit, work
680	tst	overdone, curbit
681	beq	LSYM(Lover6)
682	lsr	work, divisor, #3
683	add	dividend, work
684LSYM(Lover6):
685	mov	curbit, ip
686	mov	work, #2
687	ror	curbit, work
688	tst	overdone, curbit
689	beq	LSYM(Lover7)
690	lsr	work, divisor, #2
691	add	dividend, work
692LSYM(Lover7):
693	mov	curbit, ip
694	mov	work, #1
695	ror	curbit, work
696	tst	overdone, curbit
697	beq	LSYM(Lgot_result)
698	lsr	work, divisor, #1
699	add	dividend, work
700  .endif
701LSYM(Lgot_result):
702.endm	
703/* ------------------------------------------------------------------------ */
704/*		Start of the Real Functions				    */
705/* ------------------------------------------------------------------------ */
706#ifdef L_udivsi3
707
708	FUNC_START udivsi3
709	FUNC_ALIAS aeabi_uidiv udivsi3
710
711#ifdef __thumb__
712
713	cmp	divisor, #0
714	beq	LSYM(Ldiv0)
715	mov	curbit, #1
716	mov	result, #0
717	
718	push	{ work }
719	cmp	dividend, divisor
720	blo	LSYM(Lgot_result)
721
722	THUMB_DIV_MOD_BODY 0
723	
724	mov	r0, result
725	pop	{ work }
726	RET
727
728#else /* ARM version.  */
729
730	subs	r2, r1, #1
731	RETc(eq)
732	bcc	LSYM(Ldiv0)
733	cmp	r0, r1
734	bls	11f
735	tst	r1, r2
736	beq	12f
737	
738	ARM_DIV_BODY r0, r1, r2, r3
739	
740	mov	r0, r2
741	RET	
742
74311:	moveq	r0, #1
744	movne	r0, #0
745	RET
746
74712:	ARM_DIV2_ORDER r1, r2
748
749	mov	r0, r0, lsr r2
750	RET
751
752#endif /* ARM version */
753
754	DIV_FUNC_END udivsi3
755
756FUNC_START aeabi_uidivmod
757#ifdef __thumb__
758	push	{r0, r1, lr}
759	bl	SYM(__udivsi3)
760	POP	{r1, r2, r3}
761	mul	r2, r0
762	sub	r1, r1, r2
763	bx	r3
764#else
765	stmfd	sp!, { r0, r1, lr }
766	bl	SYM(__udivsi3)
767	ldmfd	sp!, { r1, r2, lr }
768	mul	r3, r2, r0
769	sub	r1, r1, r3
770	RET
771#endif
772	FUNC_END aeabi_uidivmod
773	
774#endif /* L_udivsi3 */
775/* ------------------------------------------------------------------------ */
776#ifdef L_umodsi3
777
778	FUNC_START umodsi3
779
780#ifdef __thumb__
781
782	cmp	divisor, #0
783	beq	LSYM(Ldiv0)
784	mov	curbit, #1
785	cmp	dividend, divisor
786	bhs	LSYM(Lover10)
787	RET	
788
789LSYM(Lover10):
790	push	{ work }
791
792	THUMB_DIV_MOD_BODY 1
793	
794	pop	{ work }
795	RET
796	
797#else  /* ARM version.  */
798	
799	subs	r2, r1, #1			@ compare divisor with 1
800	bcc	LSYM(Ldiv0)
801	cmpne	r0, r1				@ compare dividend with divisor
802	moveq   r0, #0
803	tsthi	r1, r2				@ see if divisor is power of 2
804	andeq	r0, r0, r2
805	RETc(ls)
806
807	ARM_MOD_BODY r0, r1, r2, r3
808	
809	RET	
810
811#endif /* ARM version.  */
812	
813	DIV_FUNC_END umodsi3
814
815#endif /* L_umodsi3 */
816/* ------------------------------------------------------------------------ */
817#ifdef L_divsi3
818
819	FUNC_START divsi3	
820	FUNC_ALIAS aeabi_idiv divsi3
821
822#ifdef __thumb__
823	cmp	divisor, #0
824	beq	LSYM(Ldiv0)
825	
826	push	{ work }
827	mov	work, dividend
828	eor	work, divisor		@ Save the sign of the result.
829	mov	ip, work
830	mov	curbit, #1
831	mov	result, #0
832	cmp	divisor, #0
833	bpl	LSYM(Lover10)
834	neg	divisor, divisor	@ Loops below use unsigned.
835LSYM(Lover10):
836	cmp	dividend, #0
837	bpl	LSYM(Lover11)
838	neg	dividend, dividend
839LSYM(Lover11):
840	cmp	dividend, divisor
841	blo	LSYM(Lgot_result)
842
843	THUMB_DIV_MOD_BODY 0
844	
845	mov	r0, result
846	mov	work, ip
847	cmp	work, #0
848	bpl	LSYM(Lover12)
849	neg	r0, r0
850LSYM(Lover12):
851	pop	{ work }
852	RET
853
854#else /* ARM version.  */
855	
856	cmp	r1, #0
857	eor	ip, r0, r1			@ save the sign of the result.
858	beq	LSYM(Ldiv0)
859	rsbmi	r1, r1, #0			@ loops below use unsigned.
860	subs	r2, r1, #1			@ division by 1 or -1 ?
861	beq	10f
862	movs	r3, r0
863	rsbmi	r3, r0, #0			@ positive dividend value
864	cmp	r3, r1
865	bls	11f
866	tst	r1, r2				@ divisor is power of 2 ?
867	beq	12f
868
869	ARM_DIV_BODY r3, r1, r0, r2
870	
871	cmp	ip, #0
872	rsbmi	r0, r0, #0
873	RET	
874
87510:	teq	ip, r0				@ same sign ?
876	rsbmi	r0, r0, #0
877	RET	
878
87911:	movlo	r0, #0
880	moveq	r0, ip, asr #31
881	orreq	r0, r0, #1
882	RET
883
88412:	ARM_DIV2_ORDER r1, r2
885
886	cmp	ip, #0
887	mov	r0, r3, lsr r2
888	rsbmi	r0, r0, #0
889	RET
890
891#endif /* ARM version */
892	
893	DIV_FUNC_END divsi3
894
895FUNC_START aeabi_idivmod
896#ifdef __thumb__
897	push	{r0, r1, lr}
898	bl	SYM(__divsi3)
899	POP	{r1, r2, r3}
900	mul	r2, r0
901	sub	r1, r1, r2
902	bx	r3
903#else
904	stmfd	sp!, { r0, r1, lr }
905	bl	SYM(__divsi3)
906	ldmfd	sp!, { r1, r2, lr }
907	mul	r3, r2, r0
908	sub	r1, r1, r3
909	RET
910#endif
911	FUNC_END aeabi_idivmod
912	
913#endif /* L_divsi3 */
914/* ------------------------------------------------------------------------ */
915#ifdef L_modsi3
916
917	FUNC_START modsi3
918
919#ifdef __thumb__
920
921	mov	curbit, #1
922	cmp	divisor, #0
923	beq	LSYM(Ldiv0)
924	bpl	LSYM(Lover10)
925	neg	divisor, divisor		@ Loops below use unsigned.
926LSYM(Lover10):
927	push	{ work }
928	@ Need to save the sign of the dividend, unfortunately, we need
929	@ work later on.  Must do this after saving the original value of
930	@ the work register, because we will pop this value off first.
931	push	{ dividend }
932	cmp	dividend, #0
933	bpl	LSYM(Lover11)
934	neg	dividend, dividend
935LSYM(Lover11):
936	cmp	dividend, divisor
937	blo	LSYM(Lgot_result)
938
939	THUMB_DIV_MOD_BODY 1
940		
941	pop	{ work }
942	cmp	work, #0
943	bpl	LSYM(Lover12)
944	neg	dividend, dividend
945LSYM(Lover12):
946	pop	{ work }
947	RET	
948
949#else /* ARM version.  */
950	
951	cmp	r1, #0
952	beq	LSYM(Ldiv0)
953	rsbmi	r1, r1, #0			@ loops below use unsigned.
954	movs	ip, r0				@ preserve sign of dividend
955	rsbmi	r0, r0, #0			@ if negative make positive
956	subs	r2, r1, #1			@ compare divisor with 1
957	cmpne	r0, r1				@ compare dividend with divisor
958	moveq	r0, #0
959	tsthi	r1, r2				@ see if divisor is power of 2
960	andeq	r0, r0, r2
961	bls	10f
962
963	ARM_MOD_BODY r0, r1, r2, r3
964
96510:	cmp	ip, #0
966	rsbmi	r0, r0, #0
967	RET	
968
969#endif /* ARM version */
970	
971	DIV_FUNC_END modsi3
972
973#endif /* L_modsi3 */
974/* ------------------------------------------------------------------------ */
975#ifdef L_dvmd_tls
976
977	FUNC_START div0
978	FUNC_ALIAS aeabi_idiv0 div0
979	FUNC_ALIAS aeabi_ldiv0 div0
980
981	RET
982
983	FUNC_END div0
984	
985#endif /* L_divmodsi_tools */
986/* ------------------------------------------------------------------------ */
987#ifdef L_dvmd_lnx
988@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
989
990/* Constant taken from <asm/signal.h>.  */
991#define SIGFPE	8
992
993	.code	32
994	FUNC_START div0
995
996	stmfd	sp!, {r1, lr}
997	mov	r0, #SIGFPE
998	bl	SYM(raise) __PLT__
999	RETLDM	r1
1000
1001	FUNC_END div0
1002	
1003#endif /* L_dvmd_lnx */
1004/* ------------------------------------------------------------------------ */
1005/* Dword shift operations.  */
1006/* All the following Dword shift variants rely on the fact that
1007	shft xxx, Reg
1008   is in fact done as
1009	shft xxx, (Reg & 255)
1010   so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1011   case of logical shifts) or the sign (for asr).  */
1012
1013#ifdef __ARMEB__
1014#define al	r1
1015#define ah	r0
1016#else
1017#define al	r0
1018#define ah	r1
1019#endif
1020
1021/* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
1022#ifndef __symbian__
1023
1024#ifdef L_lshrdi3
1025
1026	FUNC_START lshrdi3
1027	FUNC_ALIAS aeabi_llsr lshrdi3
1028	
1029#ifdef __thumb__
1030	lsr	al, r2
1031	mov	r3, ah
1032	lsr	ah, r2
1033	mov	ip, r3
1034	sub	r2, #32
1035	lsr	r3, r2
1036	orr	al, r3
1037	neg	r2, r2
1038	mov	r3, ip
1039	lsl	r3, r2
1040	orr	al, r3
1041	RET
1042#else
1043	subs	r3, r2, #32
1044	rsb	ip, r2, #32
1045	movmi	al, al, lsr r2
1046	movpl	al, ah, lsr r3
1047	orrmi	al, al, ah, lsl ip
1048	mov	ah, ah, lsr r2
1049	RET
1050#endif
1051	FUNC_END aeabi_llsr
1052	FUNC_END lshrdi3
1053
1054#endif
1055	
1056#ifdef L_ashrdi3
1057	
1058	FUNC_START ashrdi3
1059	FUNC_ALIAS aeabi_lasr ashrdi3
1060	
1061#ifdef __thumb__
1062	lsr	al, r2
1063	mov	r3, ah
1064	asr	ah, r2
1065	sub	r2, #32
1066	@ If r2 is negative at this point the following step would OR
1067	@ the sign bit into all of AL.  That's not what we want...
1068	bmi	1f
1069	mov	ip, r3
1070	asr	r3, r2
1071	orr	al, r3
1072	mov	r3, ip
10731:
1074	neg	r2, r2
1075	lsl	r3, r2
1076	orr	al, r3
1077	RET
1078#else
1079	subs	r3, r2, #32
1080	rsb	ip, r2, #32
1081	movmi	al, al, lsr r2
1082	movpl	al, ah, asr r3
1083	orrmi	al, al, ah, lsl ip
1084	mov	ah, ah, asr r2
1085	RET
1086#endif
1087
1088	FUNC_END aeabi_lasr
1089	FUNC_END ashrdi3
1090
1091#endif
1092
1093#ifdef L_ashldi3
1094
1095	FUNC_START ashldi3
1096	FUNC_ALIAS aeabi_llsl ashldi3
1097	
1098#ifdef __thumb__
1099	lsl	ah, r2
1100	mov	r3, al
1101	lsl	al, r2
1102	mov	ip, r3
1103	sub	r2, #32
1104	lsl	r3, r2
1105	orr	ah, r3
1106	neg	r2, r2
1107	mov	r3, ip
1108	lsr	r3, r2
1109	orr	ah, r3
1110	RET
1111#else
1112	subs	r3, r2, #32
1113	rsb	ip, r2, #32
1114	movmi	ah, ah, lsl r2
1115	movpl	ah, al, lsl r3
1116	orrmi	ah, ah, al, lsr ip
1117	mov	al, al, lsl r2
1118	RET
1119#endif
1120	FUNC_END aeabi_llsl
1121	FUNC_END ashldi3
1122
1123#endif
1124
1125#endif /* __symbian__ */
1126
1127/* ------------------------------------------------------------------------ */
1128/* These next two sections are here despite the fact that they contain Thumb 
1129   assembler because their presence allows interworked code to be linked even
1130   when the GCC library is this one.  */
1131		
1132/* Do not build the interworking functions when the target architecture does 
1133   not support Thumb instructions.  (This can be a multilib option).  */
1134#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1135      || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1136      || __ARM_ARCH__ >= 6
1137
1138#if defined L_call_via_rX
1139
1140/* These labels & instructions are used by the Arm/Thumb interworking code. 
1141   The address of function to be called is loaded into a register and then 
1142   one of these labels is called via a BL instruction.  This puts the 
1143   return address into the link register with the bottom bit set, and the 
1144   code here switches to the correct mode before executing the function.  */
1145	
1146	.text
1147	.align 0
1148        .force_thumb
1149
1150.macro call_via register
1151	THUMB_FUNC_START _call_via_\register
1152
1153	bx	\register
1154	nop
1155
1156	SIZE	(_call_via_\register)
1157.endm
1158
1159	call_via r0
1160	call_via r1
1161	call_via r2
1162	call_via r3
1163	call_via r4
1164	call_via r5
1165	call_via r6
1166	call_via r7
1167	call_via r8
1168	call_via r9
1169	call_via sl
1170	call_via fp
1171	call_via ip
1172	call_via sp
1173	call_via lr
1174
1175#endif /* L_call_via_rX */
1176
1177#if defined L_interwork_call_via_rX
1178
1179/* These labels & instructions are used by the Arm/Thumb interworking code,
1180   when the target address is in an unknown instruction set.  The address 
1181   of function to be called is loaded into a register and then one of these
1182   labels is called via a BL instruction.  This puts the return address 
1183   into the link register with the bottom bit set, and the code here 
1184   switches to the correct mode before executing the function.  Unfortunately
1185   the target code cannot be relied upon to return via a BX instruction, so
1186   instead we have to store the resturn address on the stack and allow the
1187   called function to return here instead.  Upon return we recover the real
1188   return address and use a BX to get back to Thumb mode.
1189
1190   There are three variations of this code.  The first,
1191   _interwork_call_via_rN(), will push the return address onto the
1192   stack and pop it in _arm_return().  It should only be used if all
1193   arguments are passed in registers.
1194
1195   The second, _interwork_r7_call_via_rN(), instead stores the return
1196   address at [r7, #-4].  It is the caller's responsibility to ensure
1197   that this address is valid and contains no useful data.
1198
1199   The third, _interwork_r11_call_via_rN(), works in the same way but
1200   uses r11 instead of r7.  It is useful if the caller does not really
1201   need a frame pointer.  */
1202	
1203	.text
1204	.align 0
1205
1206	.code   32
1207	.globl _arm_return
1208LSYM(Lstart_arm_return):
1209	cfi_start	LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1210	cfi_push	0, 0xe, -0x8, 0x8
1211	nop	@ This nop is for the benefit of debuggers, so that
1212		@ backtraces will use the correct unwind information.
1213_arm_return:
1214	RETLDM	unwind=LSYM(Lstart_arm_return)
1215	cfi_end	LSYM(Lend_arm_return)
1216
1217	.globl _arm_return_r7
1218_arm_return_r7:
1219	ldr	lr, [r7, #-4]
1220	bx	lr
1221
1222	.globl _arm_return_r11
1223_arm_return_r11:
1224	ldr	lr, [r11, #-4]
1225	bx	lr
1226
1227.macro interwork_with_frame frame, register, name, return
1228	.code	16
1229
1230	THUMB_FUNC_START \name
1231
1232	bx	pc
1233	nop
1234
1235	.code	32
1236	tst	\register, #1
1237	streq	lr, [\frame, #-4]
1238	adreq	lr, _arm_return_\frame
1239	bx	\register
1240
1241	SIZE	(\name)
1242.endm
1243
1244.macro interwork register
1245	.code	16
1246
1247	THUMB_FUNC_START _interwork_call_via_\register
1248
1249	bx	pc
1250	nop
1251
1252	.code	32
1253	.globl LSYM(Lchange_\register)
1254LSYM(Lchange_\register):
1255	tst	\register, #1
1256	streq	lr, [sp, #-8]!
1257	adreq	lr, _arm_return
1258	bx	\register
1259
1260	SIZE	(_interwork_call_via_\register)
1261
1262	interwork_with_frame r7,\register,_interwork_r7_call_via_\register
1263	interwork_with_frame r11,\register,_interwork_r11_call_via_\register
1264.endm
1265	
1266	interwork r0
1267	interwork r1
1268	interwork r2
1269	interwork r3
1270	interwork r4
1271	interwork r5
1272	interwork r6
1273	interwork r7
1274	interwork r8
1275	interwork r9
1276	interwork sl
1277	interwork fp
1278	interwork ip
1279	interwork sp
1280	
1281	/* The LR case has to be handled a little differently...  */
1282	.code 16
1283
1284	THUMB_FUNC_START _interwork_call_via_lr
1285
1286	bx 	pc
1287	nop
1288	
1289	.code 32
1290	.globl .Lchange_lr
1291.Lchange_lr:
1292	tst	lr, #1
1293	stmeqdb	r13!, {lr, pc}
1294	mov	ip, lr
1295	adreq	lr, _arm_return
1296	bx	ip
1297	
1298	SIZE	(_interwork_call_via_lr)
1299	
1300#endif /* L_interwork_call_via_rX */
1301#endif /* Arch supports thumb.  */
1302
1303#ifndef __symbian__
1304#include "ieee754-df.S"
1305#include "ieee754-sf.S"
1306#include "bpabi.S"
1307#endif /* __symbian__ */
1308