lib1funcs.asm revision 169690
1@ libgcc routines for ARM cpu.
2@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
3
4/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
5   Free Software Foundation, Inc.
6
7This file is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
9Free Software Foundation; either version 2, or (at your option) any
10later version.
11
12In addition to the permissions in the GNU General Public License, the
13Free Software Foundation gives you unlimited permission to link the
14compiled version of this file into combinations with other programs,
15and to distribute those combinations without any restriction coming
16from the use of this file.  (The General Public License restrictions
17do apply in other respects; for example, they cover modification of
18the file, and distribution when not linked into a combine
19executable.)
20
21This file is distributed in the hope that it will be useful, but
22WITHOUT ANY WARRANTY; without even the implied warranty of
23MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24General Public License for more details.
25
26You should have received a copy of the GNU General Public License
27along with this program; see the file COPYING.  If not, write to
28the Free Software Foundation, 51 Franklin Street, Fifth Floor,
29Boston, MA 02110-1301, USA.  */
30/* ------------------------------------------------------------------------ */
31
32/* We need to know what prefix to add to function names.  */
33
34#ifndef __USER_LABEL_PREFIX__
35#error  __USER_LABEL_PREFIX__ not defined
36#endif
37
38/* ANSI concatenation macros.  */
39
40#define CONCAT1(a, b) CONCAT2(a, b)
41#define CONCAT2(a, b) a ## b
42
43/* Use the right prefix for global labels.  */
44
45#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
46
47#ifdef __ELF__
48#ifdef __thumb__
49#define __PLT__  /* Not supported in Thumb assembler (for now).  */
50#else
51#define __PLT__ (PLT)
52#endif
53#define TYPE(x) .type SYM(x),function
54#define SIZE(x) .size SYM(x), . - SYM(x)
55#define LSYM(x) .x
56#else
57#define __PLT__
58#define TYPE(x)
59#define SIZE(x)
60#define LSYM(x) x
61#endif
62
63/* Function end macros.  Variants for interworking.  */
64
65@ This selects the minimum architecture level required.
66#define __ARM_ARCH__ 3
67
68#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
69	|| defined(__ARM_ARCH_4T__)
70/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
71   long multiply instructions.  That includes v3M.  */
72# undef __ARM_ARCH__
73# define __ARM_ARCH__ 4
74#endif
75	
76#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
77	|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
78	|| defined(__ARM_ARCH_5TEJ__)
79# undef __ARM_ARCH__
80# define __ARM_ARCH__ 5
81#endif
82
83#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
84	|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
85	|| defined(__ARM_ARCH_6ZK__)
86# undef __ARM_ARCH__
87# define __ARM_ARCH__ 6
88#endif
89
90#ifndef __ARM_ARCH__
91#error Unable to determine architecture.
92#endif
93
94/* How to return from a function call depends on the architecture variant.  */
95
96#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
97
98# define RET		bx	lr
99# define RETc(x)	bx##x	lr
100
101/* Special precautions for interworking on armv4t.  */
102# if (__ARM_ARCH__ == 4)
103
104/* Always use bx, not ldr pc.  */
105#  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
106#    define __INTERWORKING__
107#   endif /* __THUMB__ || __THUMB_INTERWORK__ */
108
109/* Include thumb stub before arm mode code.  */
110#  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
111#   define __INTERWORKING_STUBS__
112#  endif /* __thumb__ && !__THUMB_INTERWORK__ */
113
114#endif /* __ARM_ARCH == 4 */
115
116#else
117
118# define RET		mov	pc, lr
119# define RETc(x)	mov##x	pc, lr
120
121#endif
122
123.macro	cfi_pop		advance, reg, cfa_offset
124#ifdef __ELF__
125	.pushsection	.debug_frame
126	.byte	0x4		/* DW_CFA_advance_loc4 */
127	.4byte	\advance
128	.byte	(0xc0 | \reg)	/* DW_CFA_restore */
129	.byte	0xe		/* DW_CFA_def_cfa_offset */
130	.uleb128 \cfa_offset
131	.popsection
132#endif
133.endm
134.macro	cfi_push	advance, reg, offset, cfa_offset
135#ifdef __ELF__
136	.pushsection	.debug_frame
137	.byte	0x4		/* DW_CFA_advance_loc4 */
138	.4byte	\advance
139	.byte	(0x80 | \reg)	/* DW_CFA_offset */
140	.uleb128 (\offset / -4)
141	.byte	0xe		/* DW_CFA_def_cfa_offset */
142	.uleb128 \cfa_offset
143	.popsection
144#endif
145.endm
146.macro cfi_start	start_label, end_label
147#ifdef __ELF__
148	.pushsection	.debug_frame
149LSYM(Lstart_frame):
150	.4byte	LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
151LSYM(Lstart_cie):
152        .4byte	0xffffffff	@ CIE Identifier Tag
153        .byte	0x1	@ CIE Version
154        .ascii	"\0"	@ CIE Augmentation
155        .uleb128 0x1	@ CIE Code Alignment Factor
156        .sleb128 -4	@ CIE Data Alignment Factor
157        .byte	0xe	@ CIE RA Column
158        .byte	0xc	@ DW_CFA_def_cfa
159        .uleb128 0xd
160        .uleb128 0x0
161
162	.align 2
163LSYM(Lend_cie):
164	.4byte	LSYM(Lend_fde)-LSYM(Lstart_fde)	@ FDE Length
165LSYM(Lstart_fde):
166	.4byte	LSYM(Lstart_frame)	@ FDE CIE offset
167	.4byte	\start_label	@ FDE initial location
168	.4byte	\end_label-\start_label	@ FDE address range
169	.popsection
170#endif
171.endm
172.macro cfi_end	end_label
173#ifdef __ELF__
174	.pushsection	.debug_frame
175	.align	2
176LSYM(Lend_fde):
177	.popsection
178\end_label:
179#endif
180.endm
181
182/* Don't pass dirn, it's there just to get token pasting right.  */
183
184.macro	RETLDM	regs=, cond=, unwind=, dirn=ia
185#if defined (__INTERWORKING__)
186	.ifc "\regs",""
187	ldr\cond	lr, [sp], #8
188	.else
189	ldm\cond\dirn	sp!, {\regs, lr}
190	.endif
191	.ifnc "\unwind", ""
192	/* Mark LR as restored.  */
19397:	cfi_pop 97b - \unwind, 0xe, 0x0
194	.endif
195	bx\cond	lr
196#else
197	.ifc "\regs",""
198	ldr\cond	pc, [sp], #8
199	.else
200	ldm\cond\dirn	sp!, {\regs, pc}
201	.endif
202#endif
203.endm
204
205
206.macro ARM_LDIV0 name
207	str	lr, [sp, #-8]!
20898:	cfi_push 98b - __\name, 0xe, -0x8, 0x8
209	bl	SYM (__div0) __PLT__
210	mov	r0, #0			@ About as wrong as it could be.
211	RETLDM	unwind=98b
212.endm
213
214
215.macro THUMB_LDIV0 name
216	push	{ r1, lr }
21798:	cfi_push 98b - __\name, 0xe, -0x4, 0x8
218	bl	SYM (__div0)
219	mov	r0, #0			@ About as wrong as it could be.
220#if defined (__INTERWORKING__)
221	pop	{ r1, r2 }
222	bx	r2
223#else
224	pop	{ r1, pc }
225#endif
226.endm
227
228.macro FUNC_END name
229	SIZE (__\name)
230.endm
231
232.macro DIV_FUNC_END name
233	cfi_start	__\name, LSYM(Lend_div0)
234LSYM(Ldiv0):
235#ifdef __thumb__
236	THUMB_LDIV0 \name
237#else
238	ARM_LDIV0 \name
239#endif
240	cfi_end	LSYM(Lend_div0)
241	FUNC_END \name
242.endm
243
244.macro THUMB_FUNC_START name
245	.globl	SYM (\name)
246	TYPE	(\name)
247	.thumb_func
248SYM (\name):
249.endm
250
251/* Function start macros.  Variants for ARM and Thumb.  */
252
253#ifdef __thumb__
254#define THUMB_FUNC .thumb_func
255#define THUMB_CODE .force_thumb
256#else
257#define THUMB_FUNC
258#define THUMB_CODE
259#endif
260	
261.macro FUNC_START name
262	.text
263	.globl SYM (__\name)
264	TYPE (__\name)
265	.align 0
266	THUMB_CODE
267	THUMB_FUNC
268SYM (__\name):
269.endm
270
271/* Special function that will always be coded in ARM assembly, even if
272   in Thumb-only compilation.  */
273
274#if defined(__INTERWORKING_STUBS__)
275.macro	ARM_FUNC_START name
276	FUNC_START \name
277	bx	pc
278	nop
279	.arm
280/* A hook to tell gdb that we've switched to ARM mode.  Also used to call
281   directly from other local arm routines.  */
282_L__\name:		
283.endm
284#define EQUIV .thumb_set
285/* Branch directly to a function declared with ARM_FUNC_START.
286   Must be called in arm mode.  */
287.macro  ARM_CALL name
288	bl	_L__\name
289.endm
290#else
291.macro	ARM_FUNC_START name
292	.text
293	.globl SYM (__\name)
294	TYPE (__\name)
295	.align 0
296	.arm
297SYM (__\name):
298.endm
299#define EQUIV .set
300.macro  ARM_CALL name
301	bl	__\name
302.endm
303#endif
304
305.macro	FUNC_ALIAS new old
306	.globl	SYM (__\new)
307#if defined (__thumb__)
308	.thumb_set	SYM (__\new), SYM (__\old)
309#else
310	.set	SYM (__\new), SYM (__\old)
311#endif
312.endm
313
314.macro	ARM_FUNC_ALIAS new old
315	.globl	SYM (__\new)
316	EQUIV	SYM (__\new), SYM (__\old)
317#if defined(__INTERWORKING_STUBS__)
318	.set	SYM (_L__\new), SYM (_L__\old)
319#endif
320.endm
321
322#ifdef __thumb__
323/* Register aliases.  */
324
325work		.req	r4	@ XXXX is this safe ?
326dividend	.req	r0
327divisor		.req	r1
328overdone	.req	r2
329result		.req	r2
330curbit		.req	r3
331#endif
332#if 0
333ip		.req	r12
334sp		.req	r13
335lr		.req	r14
336pc		.req	r15
337#endif
338
339/* ------------------------------------------------------------------------ */
340/*		Bodies of the division and modulo routines.		    */
341/* ------------------------------------------------------------------------ */	
342.macro ARM_DIV_BODY dividend, divisor, result, curbit
343
344#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
345
346	clz	\curbit, \dividend
347	clz	\result, \divisor
348	sub	\curbit, \result, \curbit
349	rsbs	\curbit, \curbit, #31
350	addne	\curbit, \curbit, \curbit, lsl #1
351	mov	\result, #0
352	addne	pc, pc, \curbit, lsl #2
353	nop
354	.set	shift, 32
355	.rept	32
356	.set	shift, shift - 1
357	cmp	\dividend, \divisor, lsl #shift
358	adc	\result, \result, \result
359	subcs	\dividend, \dividend, \divisor, lsl #shift
360	.endr
361
362#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
363#if __ARM_ARCH__ >= 5
364
365	clz	\curbit, \divisor
366	clz	\result, \dividend
367	sub	\result, \curbit, \result
368	mov	\curbit, #1
369	mov	\divisor, \divisor, lsl \result
370	mov	\curbit, \curbit, lsl \result
371	mov	\result, #0
372	
373#else /* __ARM_ARCH__ < 5 */
374
375	@ Initially shift the divisor left 3 bits if possible,
376	@ set curbit accordingly.  This allows for curbit to be located
377	@ at the left end of each 4 bit nibbles in the division loop
378	@ to save one loop in most cases.
379	tst	\divisor, #0xe0000000
380	moveq	\divisor, \divisor, lsl #3
381	moveq	\curbit, #8
382	movne	\curbit, #1
383
384	@ Unless the divisor is very big, shift it up in multiples of
385	@ four bits, since this is the amount of unwinding in the main
386	@ division loop.  Continue shifting until the divisor is 
387	@ larger than the dividend.
3881:	cmp	\divisor, #0x10000000
389	cmplo	\divisor, \dividend
390	movlo	\divisor, \divisor, lsl #4
391	movlo	\curbit, \curbit, lsl #4
392	blo	1b
393
394	@ For very big divisors, we must shift it a bit at a time, or
395	@ we will be in danger of overflowing.
3961:	cmp	\divisor, #0x80000000
397	cmplo	\divisor, \dividend
398	movlo	\divisor, \divisor, lsl #1
399	movlo	\curbit, \curbit, lsl #1
400	blo	1b
401
402	mov	\result, #0
403
404#endif /* __ARM_ARCH__ < 5 */
405
406	@ Division loop
4071:	cmp	\dividend, \divisor
408	subhs	\dividend, \dividend, \divisor
409	orrhs	\result,   \result,   \curbit
410	cmp	\dividend, \divisor,  lsr #1
411	subhs	\dividend, \dividend, \divisor, lsr #1
412	orrhs	\result,   \result,   \curbit,  lsr #1
413	cmp	\dividend, \divisor,  lsr #2
414	subhs	\dividend, \dividend, \divisor, lsr #2
415	orrhs	\result,   \result,   \curbit,  lsr #2
416	cmp	\dividend, \divisor,  lsr #3
417	subhs	\dividend, \dividend, \divisor, lsr #3
418	orrhs	\result,   \result,   \curbit,  lsr #3
419	cmp	\dividend, #0			@ Early termination?
420	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
421	movne	\divisor,  \divisor, lsr #4
422	bne	1b
423
424#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
425
426.endm
427/* ------------------------------------------------------------------------ */	
428.macro ARM_DIV2_ORDER divisor, order
429
430#if __ARM_ARCH__ >= 5
431
432	clz	\order, \divisor
433	rsb	\order, \order, #31
434
435#else
436
437	cmp	\divisor, #(1 << 16)
438	movhs	\divisor, \divisor, lsr #16
439	movhs	\order, #16
440	movlo	\order, #0
441
442	cmp	\divisor, #(1 << 8)
443	movhs	\divisor, \divisor, lsr #8
444	addhs	\order, \order, #8
445
446	cmp	\divisor, #(1 << 4)
447	movhs	\divisor, \divisor, lsr #4
448	addhs	\order, \order, #4
449
450	cmp	\divisor, #(1 << 2)
451	addhi	\order, \order, #3
452	addls	\order, \order, \divisor, lsr #1
453
454#endif
455
456.endm
457/* ------------------------------------------------------------------------ */
458.macro ARM_MOD_BODY dividend, divisor, order, spare
459
460#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
461
462	clz	\order, \divisor
463	clz	\spare, \dividend
464	sub	\order, \order, \spare
465	rsbs	\order, \order, #31
466	addne	pc, pc, \order, lsl #3
467	nop
468	.set	shift, 32
469	.rept	32
470	.set	shift, shift - 1
471	cmp	\dividend, \divisor, lsl #shift
472	subcs	\dividend, \dividend, \divisor, lsl #shift
473	.endr
474
475#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
476#if __ARM_ARCH__ >= 5
477
478	clz	\order, \divisor
479	clz	\spare, \dividend
480	sub	\order, \order, \spare
481	mov	\divisor, \divisor, lsl \order
482	
483#else /* __ARM_ARCH__ < 5 */
484
485	mov	\order, #0
486
487	@ Unless the divisor is very big, shift it up in multiples of
488	@ four bits, since this is the amount of unwinding in the main
489	@ division loop.  Continue shifting until the divisor is 
490	@ larger than the dividend.
4911:	cmp	\divisor, #0x10000000
492	cmplo	\divisor, \dividend
493	movlo	\divisor, \divisor, lsl #4
494	addlo	\order, \order, #4
495	blo	1b
496
497	@ For very big divisors, we must shift it a bit at a time, or
498	@ we will be in danger of overflowing.
4991:	cmp	\divisor, #0x80000000
500	cmplo	\divisor, \dividend
501	movlo	\divisor, \divisor, lsl #1
502	addlo	\order, \order, #1
503	blo	1b
504
505#endif /* __ARM_ARCH__ < 5 */
506
507	@ Perform all needed substractions to keep only the reminder.
508	@ Do comparisons in batch of 4 first.
509	subs	\order, \order, #3		@ yes, 3 is intended here
510	blt	2f
511
5121:	cmp	\dividend, \divisor
513	subhs	\dividend, \dividend, \divisor
514	cmp	\dividend, \divisor,  lsr #1
515	subhs	\dividend, \dividend, \divisor, lsr #1
516	cmp	\dividend, \divisor,  lsr #2
517	subhs	\dividend, \dividend, \divisor, lsr #2
518	cmp	\dividend, \divisor,  lsr #3
519	subhs	\dividend, \dividend, \divisor, lsr #3
520	cmp	\dividend, #1
521	mov	\divisor, \divisor, lsr #4
522	subges	\order, \order, #4
523	bge	1b
524
525	tst	\order, #3
526	teqne	\dividend, #0
527	beq	5f
528
529	@ Either 1, 2 or 3 comparison/substractions are left.
5302:	cmn	\order, #2
531	blt	4f
532	beq	3f
533	cmp	\dividend, \divisor
534	subhs	\dividend, \dividend, \divisor
535	mov	\divisor,  \divisor,  lsr #1
5363:	cmp	\dividend, \divisor
537	subhs	\dividend, \dividend, \divisor
538	mov	\divisor,  \divisor,  lsr #1
5394:	cmp	\dividend, \divisor
540	subhs	\dividend, \dividend, \divisor
5415:
542
543#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
544
545.endm
546/* ------------------------------------------------------------------------ */
547.macro THUMB_DIV_MOD_BODY modulo
548	@ Load the constant 0x10000000 into our work register.
549	mov	work, #1
550	lsl	work, #28
551LSYM(Loop1):
552	@ Unless the divisor is very big, shift it up in multiples of
553	@ four bits, since this is the amount of unwinding in the main
554	@ division loop.  Continue shifting until the divisor is 
555	@ larger than the dividend.
556	cmp	divisor, work
557	bhs	LSYM(Lbignum)
558	cmp	divisor, dividend
559	bhs	LSYM(Lbignum)
560	lsl	divisor, #4
561	lsl	curbit,  #4
562	b	LSYM(Loop1)
563LSYM(Lbignum):
564	@ Set work to 0x80000000
565	lsl	work, #3
566LSYM(Loop2):
567	@ For very big divisors, we must shift it a bit at a time, or
568	@ we will be in danger of overflowing.
569	cmp	divisor, work
570	bhs	LSYM(Loop3)
571	cmp	divisor, dividend
572	bhs	LSYM(Loop3)
573	lsl	divisor, #1
574	lsl	curbit,  #1
575	b	LSYM(Loop2)
576LSYM(Loop3):
577	@ Test for possible subtractions ...
578  .if \modulo
579	@ ... On the final pass, this may subtract too much from the dividend, 
580	@ so keep track of which subtractions are done, we can fix them up 
581	@ afterwards.
582	mov	overdone, #0
583	cmp	dividend, divisor
584	blo	LSYM(Lover1)
585	sub	dividend, dividend, divisor
586LSYM(Lover1):
587	lsr	work, divisor, #1
588	cmp	dividend, work
589	blo	LSYM(Lover2)
590	sub	dividend, dividend, work
591	mov	ip, curbit
592	mov	work, #1
593	ror	curbit, work
594	orr	overdone, curbit
595	mov	curbit, ip
596LSYM(Lover2):
597	lsr	work, divisor, #2
598	cmp	dividend, work
599	blo	LSYM(Lover3)
600	sub	dividend, dividend, work
601	mov	ip, curbit
602	mov	work, #2
603	ror	curbit, work
604	orr	overdone, curbit
605	mov	curbit, ip
606LSYM(Lover3):
607	lsr	work, divisor, #3
608	cmp	dividend, work
609	blo	LSYM(Lover4)
610	sub	dividend, dividend, work
611	mov	ip, curbit
612	mov	work, #3
613	ror	curbit, work
614	orr	overdone, curbit
615	mov	curbit, ip
616LSYM(Lover4):
617	mov	ip, curbit
618  .else
619	@ ... and note which bits are done in the result.  On the final pass,
620	@ this may subtract too much from the dividend, but the result will be ok,
621	@ since the "bit" will have been shifted out at the bottom.
622	cmp	dividend, divisor
623	blo	LSYM(Lover1)
624	sub	dividend, dividend, divisor
625	orr	result, result, curbit
626LSYM(Lover1):
627	lsr	work, divisor, #1
628	cmp	dividend, work
629	blo	LSYM(Lover2)
630	sub	dividend, dividend, work
631	lsr	work, curbit, #1
632	orr	result, work
633LSYM(Lover2):
634	lsr	work, divisor, #2
635	cmp	dividend, work
636	blo	LSYM(Lover3)
637	sub	dividend, dividend, work
638	lsr	work, curbit, #2
639	orr	result, work
640LSYM(Lover3):
641	lsr	work, divisor, #3
642	cmp	dividend, work
643	blo	LSYM(Lover4)
644	sub	dividend, dividend, work
645	lsr	work, curbit, #3
646	orr	result, work
647LSYM(Lover4):
648  .endif
649	
650	cmp	dividend, #0			@ Early termination?
651	beq	LSYM(Lover5)
652	lsr	curbit,  #4			@ No, any more bits to do?
653	beq	LSYM(Lover5)
654	lsr	divisor, #4
655	b	LSYM(Loop3)
656LSYM(Lover5):
657  .if \modulo
658	@ Any subtractions that we should not have done will be recorded in
659	@ the top three bits of "overdone".  Exactly which were not needed
660	@ are governed by the position of the bit, stored in ip.
661	mov	work, #0xe
662	lsl	work, #28
663	and	overdone, work
664	beq	LSYM(Lgot_result)
665	
666	@ If we terminated early, because dividend became zero, then the 
667	@ bit in ip will not be in the bottom nibble, and we should not
668	@ perform the additions below.  We must test for this though
669	@ (rather relying upon the TSTs to prevent the additions) since
670	@ the bit in ip could be in the top two bits which might then match
671	@ with one of the smaller RORs.
672	mov	curbit, ip
673	mov	work, #0x7
674	tst	curbit, work
675	beq	LSYM(Lgot_result)
676	
677	mov	curbit, ip
678	mov	work, #3
679	ror	curbit, work
680	tst	overdone, curbit
681	beq	LSYM(Lover6)
682	lsr	work, divisor, #3
683	add	dividend, work
684LSYM(Lover6):
685	mov	curbit, ip
686	mov	work, #2
687	ror	curbit, work
688	tst	overdone, curbit
689	beq	LSYM(Lover7)
690	lsr	work, divisor, #2
691	add	dividend, work
692LSYM(Lover7):
693	mov	curbit, ip
694	mov	work, #1
695	ror	curbit, work
696	tst	overdone, curbit
697	beq	LSYM(Lgot_result)
698	lsr	work, divisor, #1
699	add	dividend, work
700  .endif
701LSYM(Lgot_result):
702.endm	
703/* ------------------------------------------------------------------------ */
704/*		Start of the Real Functions				    */
705/* ------------------------------------------------------------------------ */
706#ifdef L_udivsi3
707
708	FUNC_START udivsi3
709	FUNC_ALIAS aeabi_uidiv udivsi3
710
711#ifdef __thumb__
712
713	cmp	divisor, #0
714	beq	LSYM(Ldiv0)
715	mov	curbit, #1
716	mov	result, #0
717	
718	push	{ work }
719	cmp	dividend, divisor
720	blo	LSYM(Lgot_result)
721
722	THUMB_DIV_MOD_BODY 0
723	
724	mov	r0, result
725	pop	{ work }
726	RET
727
728#else /* ARM version.  */
729
730	subs	r2, r1, #1
731	RETc(eq)
732	bcc	LSYM(Ldiv0)
733	cmp	r0, r1
734	bls	11f
735	tst	r1, r2
736	beq	12f
737	
738	ARM_DIV_BODY r0, r1, r2, r3
739	
740	mov	r0, r2
741	RET	
742
74311:	moveq	r0, #1
744	movne	r0, #0
745	RET
746
74712:	ARM_DIV2_ORDER r1, r2
748
749	mov	r0, r0, lsr r2
750	RET
751
752#endif /* ARM version */
753
754	DIV_FUNC_END udivsi3
755
756FUNC_START aeabi_uidivmod
757#ifdef __thumb__
758	push	{r0, r1, lr}
759	bl	SYM(__udivsi3)
760	POP	{r1, r2, r3}
761	mul	r2, r0
762	sub	r1, r1, r2
763	bx	r3
764#else
765	stmfd	sp!, { r0, r1, lr }
766	bl	SYM(__udivsi3)
767	ldmfd	sp!, { r1, r2, lr }
768	mul	r3, r2, r0
769	sub	r1, r1, r3
770	RET
771#endif
772	FUNC_END aeabi_uidivmod
773	
774#endif /* L_udivsi3 */
775/* ------------------------------------------------------------------------ */
776#ifdef L_umodsi3
777
778	FUNC_START umodsi3
779
780#ifdef __thumb__
781
782	cmp	divisor, #0
783	beq	LSYM(Ldiv0)
784	mov	curbit, #1
785	cmp	dividend, divisor
786	bhs	LSYM(Lover10)
787	RET	
788
789LSYM(Lover10):
790	push	{ work }
791
792	THUMB_DIV_MOD_BODY 1
793	
794	pop	{ work }
795	RET
796	
797#else  /* ARM version.  */
798	
799	subs	r2, r1, #1			@ compare divisor with 1
800	bcc	LSYM(Ldiv0)
801	cmpne	r0, r1				@ compare dividend with divisor
802	moveq   r0, #0
803	tsthi	r1, r2				@ see if divisor is power of 2
804	andeq	r0, r0, r2
805	RETc(ls)
806
807	ARM_MOD_BODY r0, r1, r2, r3
808	
809	RET	
810
811#endif /* ARM version.  */
812	
813	DIV_FUNC_END umodsi3
814
815#endif /* L_umodsi3 */
816/* ------------------------------------------------------------------------ */
817#ifdef L_divsi3
818
819	FUNC_START divsi3	
820	FUNC_ALIAS aeabi_idiv divsi3
821
822#ifdef __thumb__
823	cmp	divisor, #0
824	beq	LSYM(Ldiv0)
825	
826	push	{ work }
827	mov	work, dividend
828	eor	work, divisor		@ Save the sign of the result.
829	mov	ip, work
830	mov	curbit, #1
831	mov	result, #0
832	cmp	divisor, #0
833	bpl	LSYM(Lover10)
834	neg	divisor, divisor	@ Loops below use unsigned.
835LSYM(Lover10):
836	cmp	dividend, #0
837	bpl	LSYM(Lover11)
838	neg	dividend, dividend
839LSYM(Lover11):
840	cmp	dividend, divisor
841	blo	LSYM(Lgot_result)
842
843	THUMB_DIV_MOD_BODY 0
844	
845	mov	r0, result
846	mov	work, ip
847	cmp	work, #0
848	bpl	LSYM(Lover12)
849	neg	r0, r0
850LSYM(Lover12):
851	pop	{ work }
852	RET
853
854#else /* ARM version.  */
855	
856	cmp	r1, #0
857	eor	ip, r0, r1			@ save the sign of the result.
858	beq	LSYM(Ldiv0)
859	rsbmi	r1, r1, #0			@ loops below use unsigned.
860	subs	r2, r1, #1			@ division by 1 or -1 ?
861	beq	10f
862	movs	r3, r0
863	rsbmi	r3, r0, #0			@ positive dividend value
864	cmp	r3, r1
865	bls	11f
866	tst	r1, r2				@ divisor is power of 2 ?
867	beq	12f
868
869	ARM_DIV_BODY r3, r1, r0, r2
870	
871	cmp	ip, #0
872	rsbmi	r0, r0, #0
873	RET	
874
87510:	teq	ip, r0				@ same sign ?
876	rsbmi	r0, r0, #0
877	RET	
878
87911:	movlo	r0, #0
880	moveq	r0, ip, asr #31
881	orreq	r0, r0, #1
882	RET
883
88412:	ARM_DIV2_ORDER r1, r2
885
886	cmp	ip, #0
887	mov	r0, r3, lsr r2
888	rsbmi	r0, r0, #0
889	RET
890
891#endif /* ARM version */
892	
893	DIV_FUNC_END divsi3
894
895FUNC_START aeabi_idivmod
896#ifdef __thumb__
897	push	{r0, r1, lr}
898	bl	SYM(__divsi3)
899	POP	{r1, r2, r3}
900	mul	r2, r0
901	sub	r1, r1, r2
902	bx	r3
903#else
904	stmfd	sp!, { r0, r1, lr }
905	bl	SYM(__divsi3)
906	ldmfd	sp!, { r1, r2, lr }
907	mul	r3, r2, r0
908	sub	r1, r1, r3
909	RET
910#endif
911	FUNC_END aeabi_idivmod
912	
913#endif /* L_divsi3 */
914/* ------------------------------------------------------------------------ */
915#ifdef L_modsi3
916
917	FUNC_START modsi3
918
919#ifdef __thumb__
920
921	mov	curbit, #1
922	cmp	divisor, #0
923	beq	LSYM(Ldiv0)
924	bpl	LSYM(Lover10)
925	neg	divisor, divisor		@ Loops below use unsigned.
926LSYM(Lover10):
927	push	{ work }
928	@ Need to save the sign of the dividend, unfortunately, we need
929	@ work later on.  Must do this after saving the original value of
930	@ the work register, because we will pop this value off first.
931	push	{ dividend }
932	cmp	dividend, #0
933	bpl	LSYM(Lover11)
934	neg	dividend, dividend
935LSYM(Lover11):
936	cmp	dividend, divisor
937	blo	LSYM(Lgot_result)
938
939	THUMB_DIV_MOD_BODY 1
940		
941	pop	{ work }
942	cmp	work, #0
943	bpl	LSYM(Lover12)
944	neg	dividend, dividend
945LSYM(Lover12):
946	pop	{ work }
947	RET	
948
949#else /* ARM version.  */
950	
951	cmp	r1, #0
952	beq	LSYM(Ldiv0)
953	rsbmi	r1, r1, #0			@ loops below use unsigned.
954	movs	ip, r0				@ preserve sign of dividend
955	rsbmi	r0, r0, #0			@ if negative make positive
956	subs	r2, r1, #1			@ compare divisor with 1
957	cmpne	r0, r1				@ compare dividend with divisor
958	moveq	r0, #0
959	tsthi	r1, r2				@ see if divisor is power of 2
960	andeq	r0, r0, r2
961	bls	10f
962
963	ARM_MOD_BODY r0, r1, r2, r3
964
96510:	cmp	ip, #0
966	rsbmi	r0, r0, #0
967	RET	
968
969#endif /* ARM version */
970	
971	DIV_FUNC_END modsi3
972
973#endif /* L_modsi3 */
974/* ------------------------------------------------------------------------ */
975#ifdef L_dvmd_tls
976
977	FUNC_START div0
978	FUNC_ALIAS aeabi_idiv0 div0
979	FUNC_ALIAS aeabi_ldiv0 div0
980
981	RET
982
983	FUNC_END aeabi_ldiv0
984	FUNC_END aeabi_idiv0
985	FUNC_END div0
986	
987#endif /* L_divmodsi_tools */
988/* ------------------------------------------------------------------------ */
989#ifdef L_dvmd_lnx
990@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
991
992/* Constant taken from <asm/signal.h>.  */
993#define SIGFPE	8
994
995	.code	32
996	FUNC_START div0
997
998	stmfd	sp!, {r1, lr}
999	mov	r0, #SIGFPE
1000	bl	SYM(raise) __PLT__
1001	RETLDM	r1
1002
1003	FUNC_END div0
1004	
1005#endif /* L_dvmd_lnx */
1006/* ------------------------------------------------------------------------ */
1007/* Dword shift operations.  */
1008/* All the following Dword shift variants rely on the fact that
1009	shft xxx, Reg
1010   is in fact done as
1011	shft xxx, (Reg & 255)
1012   so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1013   case of logical shifts) or the sign (for asr).  */
1014
1015#ifdef __ARMEB__
1016#define al	r1
1017#define ah	r0
1018#else
1019#define al	r0
1020#define ah	r1
1021#endif
1022
1023/* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
1024#ifndef __symbian__
1025
1026#ifdef L_lshrdi3
1027
1028	FUNC_START lshrdi3
1029	FUNC_ALIAS aeabi_llsr lshrdi3
1030	
1031#ifdef __thumb__
1032	lsr	al, r2
1033	mov	r3, ah
1034	lsr	ah, r2
1035	mov	ip, r3
1036	sub	r2, #32
1037	lsr	r3, r2
1038	orr	al, r3
1039	neg	r2, r2
1040	mov	r3, ip
1041	lsl	r3, r2
1042	orr	al, r3
1043	RET
1044#else
1045	subs	r3, r2, #32
1046	rsb	ip, r2, #32
1047	movmi	al, al, lsr r2
1048	movpl	al, ah, lsr r3
1049	orrmi	al, al, ah, lsl ip
1050	mov	ah, ah, lsr r2
1051	RET
1052#endif
1053	FUNC_END aeabi_llsr
1054	FUNC_END lshrdi3
1055
1056#endif
1057	
1058#ifdef L_ashrdi3
1059	
1060	FUNC_START ashrdi3
1061	FUNC_ALIAS aeabi_lasr ashrdi3
1062	
1063#ifdef __thumb__
1064	lsr	al, r2
1065	mov	r3, ah
1066	asr	ah, r2
1067	sub	r2, #32
1068	@ If r2 is negative at this point the following step would OR
1069	@ the sign bit into all of AL.  That's not what we want...
1070	bmi	1f
1071	mov	ip, r3
1072	asr	r3, r2
1073	orr	al, r3
1074	mov	r3, ip
10751:
1076	neg	r2, r2
1077	lsl	r3, r2
1078	orr	al, r3
1079	RET
1080#else
1081	subs	r3, r2, #32
1082	rsb	ip, r2, #32
1083	movmi	al, al, lsr r2
1084	movpl	al, ah, asr r3
1085	orrmi	al, al, ah, lsl ip
1086	mov	ah, ah, asr r2
1087	RET
1088#endif
1089
1090	FUNC_END aeabi_lasr
1091	FUNC_END ashrdi3
1092
1093#endif
1094
1095#ifdef L_ashldi3
1096
1097	FUNC_START ashldi3
1098	FUNC_ALIAS aeabi_llsl ashldi3
1099	
1100#ifdef __thumb__
1101	lsl	ah, r2
1102	mov	r3, al
1103	lsl	al, r2
1104	mov	ip, r3
1105	sub	r2, #32
1106	lsl	r3, r2
1107	orr	ah, r3
1108	neg	r2, r2
1109	mov	r3, ip
1110	lsr	r3, r2
1111	orr	ah, r3
1112	RET
1113#else
1114	subs	r3, r2, #32
1115	rsb	ip, r2, #32
1116	movmi	ah, ah, lsl r2
1117	movpl	ah, al, lsl r3
1118	orrmi	ah, ah, al, lsr ip
1119	mov	al, al, lsl r2
1120	RET
1121#endif
1122	FUNC_END aeabi_llsl
1123	FUNC_END ashldi3
1124
1125#endif
1126
1127#endif /* __symbian__ */
1128
1129/* ------------------------------------------------------------------------ */
1130/* These next two sections are here despite the fact that they contain Thumb 
1131   assembler because their presence allows interworked code to be linked even
1132   when the GCC library is this one.  */
1133		
1134/* Do not build the interworking functions when the target architecture does 
1135   not support Thumb instructions.  (This can be a multilib option).  */
1136#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1137      || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1138      || __ARM_ARCH__ >= 6
1139
1140#if defined L_call_via_rX
1141
1142/* These labels & instructions are used by the Arm/Thumb interworking code. 
1143   The address of function to be called is loaded into a register and then 
1144   one of these labels is called via a BL instruction.  This puts the 
1145   return address into the link register with the bottom bit set, and the 
1146   code here switches to the correct mode before executing the function.  */
1147	
1148	.text
1149	.align 0
1150        .force_thumb
1151
1152.macro call_via register
1153	THUMB_FUNC_START _call_via_\register
1154
1155	bx	\register
1156	nop
1157
1158	SIZE	(_call_via_\register)
1159.endm
1160
1161	call_via r0
1162	call_via r1
1163	call_via r2
1164	call_via r3
1165	call_via r4
1166	call_via r5
1167	call_via r6
1168	call_via r7
1169	call_via r8
1170	call_via r9
1171	call_via sl
1172	call_via fp
1173	call_via ip
1174	call_via sp
1175	call_via lr
1176
1177#endif /* L_call_via_rX */
1178
1179#if defined L_interwork_call_via_rX
1180
1181/* These labels & instructions are used by the Arm/Thumb interworking code,
1182   when the target address is in an unknown instruction set.  The address 
1183   of function to be called is loaded into a register and then one of these
1184   labels is called via a BL instruction.  This puts the return address 
1185   into the link register with the bottom bit set, and the code here 
1186   switches to the correct mode before executing the function.  Unfortunately
1187   the target code cannot be relied upon to return via a BX instruction, so
1188   instead we have to store the resturn address on the stack and allow the
1189   called function to return here instead.  Upon return we recover the real
1190   return address and use a BX to get back to Thumb mode.
1191
1192   There are three variations of this code.  The first,
1193   _interwork_call_via_rN(), will push the return address onto the
1194   stack and pop it in _arm_return().  It should only be used if all
1195   arguments are passed in registers.
1196
1197   The second, _interwork_r7_call_via_rN(), instead stores the return
1198   address at [r7, #-4].  It is the caller's responsibility to ensure
1199   that this address is valid and contains no useful data.
1200
1201   The third, _interwork_r11_call_via_rN(), works in the same way but
1202   uses r11 instead of r7.  It is useful if the caller does not really
1203   need a frame pointer.  */
1204	
1205	.text
1206	.align 0
1207
1208	.code   32
1209	.globl _arm_return
1210LSYM(Lstart_arm_return):
1211	cfi_start	LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1212	cfi_push	0, 0xe, -0x8, 0x8
1213	nop	@ This nop is for the benefit of debuggers, so that
1214		@ backtraces will use the correct unwind information.
1215_arm_return:
1216	RETLDM	unwind=LSYM(Lstart_arm_return)
1217	cfi_end	LSYM(Lend_arm_return)
1218
1219	.globl _arm_return_r7
1220_arm_return_r7:
1221	ldr	lr, [r7, #-4]
1222	bx	lr
1223
1224	.globl _arm_return_r11
1225_arm_return_r11:
1226	ldr	lr, [r11, #-4]
1227	bx	lr
1228
1229.macro interwork_with_frame frame, register, name, return
1230	.code	16
1231
1232	THUMB_FUNC_START \name
1233
1234	bx	pc
1235	nop
1236
1237	.code	32
1238	tst	\register, #1
1239	streq	lr, [\frame, #-4]
1240	adreq	lr, _arm_return_\frame
1241	bx	\register
1242
1243	SIZE	(\name)
1244.endm
1245
1246.macro interwork register
1247	.code	16
1248
1249	THUMB_FUNC_START _interwork_call_via_\register
1250
1251	bx	pc
1252	nop
1253
1254	.code	32
1255	.globl LSYM(Lchange_\register)
1256LSYM(Lchange_\register):
1257	tst	\register, #1
1258	streq	lr, [sp, #-8]!
1259	adreq	lr, _arm_return
1260	bx	\register
1261
1262	SIZE	(_interwork_call_via_\register)
1263
1264	interwork_with_frame r7,\register,_interwork_r7_call_via_\register
1265	interwork_with_frame r11,\register,_interwork_r11_call_via_\register
1266.endm
1267	
1268	interwork r0
1269	interwork r1
1270	interwork r2
1271	interwork r3
1272	interwork r4
1273	interwork r5
1274	interwork r6
1275	interwork r7
1276	interwork r8
1277	interwork r9
1278	interwork sl
1279	interwork fp
1280	interwork ip
1281	interwork sp
1282	
1283	/* The LR case has to be handled a little differently...  */
1284	.code 16
1285
1286	THUMB_FUNC_START _interwork_call_via_lr
1287
1288	bx 	pc
1289	nop
1290	
1291	.code 32
1292	.globl .Lchange_lr
1293.Lchange_lr:
1294	tst	lr, #1
1295	stmeqdb	r13!, {lr, pc}
1296	mov	ip, lr
1297	adreq	lr, _arm_return
1298	bx	ip
1299	
1300	SIZE	(_interwork_call_via_lr)
1301	
1302#endif /* L_interwork_call_via_rX */
1303#endif /* Arch supports thumb.  */
1304
1305#ifndef __symbian__
1306#include "ieee754-df.S"
1307#include "ieee754-sf.S"
1308#include "bpabi.S"
1309#endif /* __symbian__ */
1310