lib1funcs.asm revision 132718
1@ libgcc routines for ARM cpu.
2@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
3
4/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004
5   Free Software Foundation, Inc.
6
7This file is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
9Free Software Foundation; either version 2, or (at your option) any
10later version.
11
12In addition to the permissions in the GNU General Public License, the
13Free Software Foundation gives you unlimited permission to link the
14compiled version of this file into combinations with other programs,
15and to distribute those combinations without any restriction coming
16from the use of this file.  (The General Public License restrictions
17do apply in other respects; for example, they cover modification of
18the file, and distribution when not linked into a combine
19executable.)
20
21This file is distributed in the hope that it will be useful, but
22WITHOUT ANY WARRANTY; without even the implied warranty of
23MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24General Public License for more details.
25
26You should have received a copy of the GNU General Public License
27along with this program; see the file COPYING.  If not, write to
28the Free Software Foundation, 59 Temple Place - Suite 330,
29Boston, MA 02111-1307, USA.  */
30/* ------------------------------------------------------------------------ */
31
32/* We need to know what prefix to add to function names.  */
33
34#ifndef __USER_LABEL_PREFIX__
35#error  __USER_LABEL_PREFIX__ not defined
36#endif
37
38/* ANSI concatenation macros.  */
39
40#define CONCAT1(a, b) CONCAT2(a, b)
41#define CONCAT2(a, b) a ## b
42
43/* Use the right prefix for global labels.  */
44
45#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
46
47#ifdef __ELF__
48#ifdef __thumb__
49#define __PLT__  /* Not supported in Thumb assembler (for now).  */
50#else
51#define __PLT__ (PLT)
52#endif
53#define TYPE(x) .type SYM(x),function
54#define SIZE(x) .size SYM(x), . - SYM(x)
55#define LSYM(x) .x
56#else
57#define __PLT__
58#define TYPE(x)
59#define SIZE(x)
60#define LSYM(x) x
61#endif
62
63/* Function end macros.  Variants for 26 bit APCS and interworking.  */
64
65@ This selects the minimum architecture level required.
66#define __ARM_ARCH__ 3
67
68#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
69	|| defined(__ARM_ARCH_4T__)
70/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
71   long multiply instructions.  That includes v3M.  */
72# undef __ARM_ARCH__
73# define __ARM_ARCH__ 4
74#endif
75	
76#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
77	|| defined(__ARM_ARCH_5TE__)
78# undef __ARM_ARCH__
79# define __ARM_ARCH__ 5
80#endif
81
82/* How to return from a function call depends on the architecture variant.  */
83
84#ifdef __APCS_26__
85
86# define RET		movs	pc, lr
87# define RETc(x)	mov##x##s	pc, lr
88
89#elif (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
90
91# define RET		bx	lr
92# define RETc(x)	bx##x	lr
93
94# if (__ARM_ARCH__ == 4) \
95	&& (defined(__thumb__) || defined(__THUMB_INTERWORK__))
96#  define __INTERWORKING__
97# endif
98
99#else
100
101# define RET		mov	pc, lr
102# define RETc(x)	mov##x	pc, lr
103
104#endif
105
106/* Don't pass dirn, it's there just to get token pasting right.  */
107
108.macro	RETLDM	regs=, cond=, dirn=ia
109#ifdef __APCS_26__
110	.ifc "\regs",""
111	ldm\cond\dirn	sp!, {pc}^
112	.else
113	ldm\cond\dirn	sp!, {\regs, pc}^
114	.endif
115#elif defined (__INTERWORKING__)
116	.ifc "\regs",""
117	ldr\cond	lr, [sp], #4
118	.else
119	ldm\cond\dirn	sp!, {\regs, lr}
120	.endif
121	bx\cond	lr
122#else
123	.ifc "\regs",""
124	ldr\cond	pc, [sp], #4
125	.else
126	ldm\cond\dirn	sp!, {\regs, pc}
127	.endif
128#endif
129.endm
130
131
132.macro ARM_LDIV0
133LSYM(Ldiv0):
134	str	lr, [sp, #-4]!
135	bl	SYM (__div0) __PLT__
136	mov	r0, #0			@ About as wrong as it could be.
137	RETLDM
138.endm
139
140
141.macro THUMB_LDIV0
142LSYM(Ldiv0):
143	push	{ lr }
144	bl	SYM (__div0)
145	mov	r0, #0			@ About as wrong as it could be.
146#if defined (__INTERWORKING__)
147	pop	{ r1 }
148	bx	r1
149#else
150	pop	{ pc }
151#endif
152.endm
153
154.macro FUNC_END name
155	SIZE (__\name)
156.endm
157
158.macro DIV_FUNC_END name
159LSYM(Ldiv0):
160#ifdef __thumb__
161	THUMB_LDIV0
162#else
163	ARM_LDIV0
164#endif
165	FUNC_END \name
166.endm
167
168.macro THUMB_FUNC_START name
169	.globl	SYM (\name)
170	TYPE	(\name)
171	.thumb_func
172SYM (\name):
173.endm
174
175/* Function start macros.  Variants for ARM and Thumb.  */
176
177#ifdef __thumb__
178#define THUMB_FUNC .thumb_func
179#define THUMB_CODE .force_thumb
180#else
181#define THUMB_FUNC
182#define THUMB_CODE
183#endif
184	
185.macro FUNC_START name
186	.text
187	.globl SYM (__\name)
188	TYPE (__\name)
189	.align 0
190	THUMB_CODE
191	THUMB_FUNC
192SYM (__\name):
193.endm
194
195/* Special function that will always be coded in ARM assembly, even if
196   in Thumb-only compilation.  */
197
198#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
199.macro	ARM_FUNC_START name
200	FUNC_START \name
201	bx	pc
202	nop
203	.arm
204_L__\name:		/* A hook to tell gdb that we've switched to ARM */
205.endm
206#define EQUIV .thumb_set
207#else
208.macro	ARM_FUNC_START name
209	.text
210	.globl SYM (__\name)
211	TYPE (__\name)
212	.align 0
213	.arm
214SYM (__\name):
215.endm
216#define EQUIV .set
217#endif
218
219.macro	ARM_FUNC_ALIAS new old
220	.globl	SYM (__\new)
221	EQUIV	SYM (__\new), SYM (__\old)
222.endm
223
224#ifdef __thumb__
225/* Register aliases.  */
226
227work		.req	r4	@ XXXX is this safe ?
228dividend	.req	r0
229divisor		.req	r1
230overdone	.req	r2
231result		.req	r2
232curbit		.req	r3
233#endif
234#if 0
235ip		.req	r12
236sp		.req	r13
237lr		.req	r14
238pc		.req	r15
239#endif
240
241/* ------------------------------------------------------------------------ */
242/*		Bodies of the division and modulo routines.		    */
243/* ------------------------------------------------------------------------ */	
244.macro ARM_DIV_BODY dividend, divisor, result, curbit
245
246#if __ARM_ARCH__ >= 5
247
248	clz	\curbit, \divisor
249	clz	\result, \dividend
250	sub	\result, \curbit, \result
251	mov	\curbit, #1
252	mov	\divisor, \divisor, lsl \result
253	mov	\curbit, \curbit, lsl \result
254	mov	\result, #0
255	
256#else
257
258	@ Initially shift the divisor left 3 bits if possible,
259	@ set curbit accordingly.  This allows for curbit to be located
260	@ at the left end of each 4 bit nibbles in the division loop
261	@ to save one loop in most cases.
262	tst	\divisor, #0xe0000000
263	moveq	\divisor, \divisor, lsl #3
264	moveq	\curbit, #8
265	movne	\curbit, #1
266
267	@ Unless the divisor is very big, shift it up in multiples of
268	@ four bits, since this is the amount of unwinding in the main
269	@ division loop.  Continue shifting until the divisor is 
270	@ larger than the dividend.
2711:	cmp	\divisor, #0x10000000
272	cmplo	\divisor, \dividend
273	movlo	\divisor, \divisor, lsl #4
274	movlo	\curbit, \curbit, lsl #4
275	blo	1b
276
277	@ For very big divisors, we must shift it a bit at a time, or
278	@ we will be in danger of overflowing.
2791:	cmp	\divisor, #0x80000000
280	cmplo	\divisor, \dividend
281	movlo	\divisor, \divisor, lsl #1
282	movlo	\curbit, \curbit, lsl #1
283	blo	1b
284
285	mov	\result, #0
286
287#endif
288
289	@ Division loop
2901:	cmp	\dividend, \divisor
291	subhs	\dividend, \dividend, \divisor
292	orrhs	\result,   \result,   \curbit
293	cmp	\dividend, \divisor,  lsr #1
294	subhs	\dividend, \dividend, \divisor, lsr #1
295	orrhs	\result,   \result,   \curbit,  lsr #1
296	cmp	\dividend, \divisor,  lsr #2
297	subhs	\dividend, \dividend, \divisor, lsr #2
298	orrhs	\result,   \result,   \curbit,  lsr #2
299	cmp	\dividend, \divisor,  lsr #3
300	subhs	\dividend, \dividend, \divisor, lsr #3
301	orrhs	\result,   \result,   \curbit,  lsr #3
302	cmp	\dividend, #0			@ Early termination?
303	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
304	movne	\divisor,  \divisor, lsr #4
305	bne	1b
306
307.endm
308/* ------------------------------------------------------------------------ */	
309.macro ARM_DIV2_ORDER divisor, order
310
311#if __ARM_ARCH__ >= 5
312
313	clz	\order, \divisor
314	rsb	\order, \order, #31
315
316#else
317
318	cmp	\divisor, #(1 << 16)
319	movhs	\divisor, \divisor, lsr #16
320	movhs	\order, #16
321	movlo	\order, #0
322
323	cmp	\divisor, #(1 << 8)
324	movhs	\divisor, \divisor, lsr #8
325	addhs	\order, \order, #8
326
327	cmp	\divisor, #(1 << 4)
328	movhs	\divisor, \divisor, lsr #4
329	addhs	\order, \order, #4
330
331	cmp	\divisor, #(1 << 2)
332	addhi	\order, \order, #3
333	addls	\order, \order, \divisor, lsr #1
334
335#endif
336
337.endm
338/* ------------------------------------------------------------------------ */
339.macro ARM_MOD_BODY dividend, divisor, order, spare
340
341#if __ARM_ARCH__ >= 5
342
343	clz	\order, \divisor
344	clz	\spare, \dividend
345	sub	\order, \order, \spare
346	mov	\divisor, \divisor, lsl \order
347	
348#else
349
350	mov	\order, #0
351
352	@ Unless the divisor is very big, shift it up in multiples of
353	@ four bits, since this is the amount of unwinding in the main
354	@ division loop.  Continue shifting until the divisor is 
355	@ larger than the dividend.
3561:	cmp	\divisor, #0x10000000
357	cmplo	\divisor, \dividend
358	movlo	\divisor, \divisor, lsl #4
359	addlo	\order, \order, #4
360	blo	1b
361
362	@ For very big divisors, we must shift it a bit at a time, or
363	@ we will be in danger of overflowing.
3641:	cmp	\divisor, #0x80000000
365	cmplo	\divisor, \dividend
366	movlo	\divisor, \divisor, lsl #1
367	addlo	\order, \order, #1
368	blo	1b
369
370#endif
371
372	@ Perform all needed substractions to keep only the reminder.
373	@ Do comparisons in batch of 4 first.
374	subs	\order, \order, #3		@ yes, 3 is intended here
375	blt	2f
376
3771:	cmp	\dividend, \divisor
378	subhs	\dividend, \dividend, \divisor
379	cmp	\dividend, \divisor,  lsr #1
380	subhs	\dividend, \dividend, \divisor, lsr #1
381	cmp	\dividend, \divisor,  lsr #2
382	subhs	\dividend, \dividend, \divisor, lsr #2
383	cmp	\dividend, \divisor,  lsr #3
384	subhs	\dividend, \dividend, \divisor, lsr #3
385	cmp	\dividend, #1
386	mov	\divisor, \divisor, lsr #4
387	subges	\order, \order, #4
388	bge	1b
389
390	tst	\order, #3
391	teqne	\dividend, #0
392	beq	5f
393
394	@ Either 1, 2 or 3 comparison/substractions are left.
3952:	cmn	\order, #2
396	blt	4f
397	beq	3f
398	cmp	\dividend, \divisor
399	subhs	\dividend, \dividend, \divisor
400	mov	\divisor,  \divisor,  lsr #1
4013:	cmp	\dividend, \divisor
402	subhs	\dividend, \dividend, \divisor
403	mov	\divisor,  \divisor,  lsr #1
4044:	cmp	\dividend, \divisor
405	subhs	\dividend, \dividend, \divisor
4065:
407.endm
408/* ------------------------------------------------------------------------ */
409.macro THUMB_DIV_MOD_BODY modulo
410	@ Load the constant 0x10000000 into our work register.
411	mov	work, #1
412	lsl	work, #28
413LSYM(Loop1):
414	@ Unless the divisor is very big, shift it up in multiples of
415	@ four bits, since this is the amount of unwinding in the main
416	@ division loop.  Continue shifting until the divisor is 
417	@ larger than the dividend.
418	cmp	divisor, work
419	bhs	LSYM(Lbignum)
420	cmp	divisor, dividend
421	bhs	LSYM(Lbignum)
422	lsl	divisor, #4
423	lsl	curbit,  #4
424	b	LSYM(Loop1)
425LSYM(Lbignum):
426	@ Set work to 0x80000000
427	lsl	work, #3
428LSYM(Loop2):
429	@ For very big divisors, we must shift it a bit at a time, or
430	@ we will be in danger of overflowing.
431	cmp	divisor, work
432	bhs	LSYM(Loop3)
433	cmp	divisor, dividend
434	bhs	LSYM(Loop3)
435	lsl	divisor, #1
436	lsl	curbit,  #1
437	b	LSYM(Loop2)
438LSYM(Loop3):
439	@ Test for possible subtractions ...
440  .if \modulo
441	@ ... On the final pass, this may subtract too much from the dividend, 
442	@ so keep track of which subtractions are done, we can fix them up 
443	@ afterwards.
444	mov	overdone, #0
445	cmp	dividend, divisor
446	blo	LSYM(Lover1)
447	sub	dividend, dividend, divisor
448LSYM(Lover1):
449	lsr	work, divisor, #1
450	cmp	dividend, work
451	blo	LSYM(Lover2)
452	sub	dividend, dividend, work
453	mov	ip, curbit
454	mov	work, #1
455	ror	curbit, work
456	orr	overdone, curbit
457	mov	curbit, ip
458LSYM(Lover2):
459	lsr	work, divisor, #2
460	cmp	dividend, work
461	blo	LSYM(Lover3)
462	sub	dividend, dividend, work
463	mov	ip, curbit
464	mov	work, #2
465	ror	curbit, work
466	orr	overdone, curbit
467	mov	curbit, ip
468LSYM(Lover3):
469	lsr	work, divisor, #3
470	cmp	dividend, work
471	blo	LSYM(Lover4)
472	sub	dividend, dividend, work
473	mov	ip, curbit
474	mov	work, #3
475	ror	curbit, work
476	orr	overdone, curbit
477	mov	curbit, ip
478LSYM(Lover4):
479	mov	ip, curbit
480  .else
481	@ ... and note which bits are done in the result.  On the final pass,
482	@ this may subtract too much from the dividend, but the result will be ok,
483	@ since the "bit" will have been shifted out at the bottom.
484	cmp	dividend, divisor
485	blo	LSYM(Lover1)
486	sub	dividend, dividend, divisor
487	orr	result, result, curbit
488LSYM(Lover1):
489	lsr	work, divisor, #1
490	cmp	dividend, work
491	blo	LSYM(Lover2)
492	sub	dividend, dividend, work
493	lsr	work, curbit, #1
494	orr	result, work
495LSYM(Lover2):
496	lsr	work, divisor, #2
497	cmp	dividend, work
498	blo	LSYM(Lover3)
499	sub	dividend, dividend, work
500	lsr	work, curbit, #2
501	orr	result, work
502LSYM(Lover3):
503	lsr	work, divisor, #3
504	cmp	dividend, work
505	blo	LSYM(Lover4)
506	sub	dividend, dividend, work
507	lsr	work, curbit, #3
508	orr	result, work
509LSYM(Lover4):
510  .endif
511	
512	cmp	dividend, #0			@ Early termination?
513	beq	LSYM(Lover5)
514	lsr	curbit,  #4			@ No, any more bits to do?
515	beq	LSYM(Lover5)
516	lsr	divisor, #4
517	b	LSYM(Loop3)
518LSYM(Lover5):
519  .if \modulo
520	@ Any subtractions that we should not have done will be recorded in
521	@ the top three bits of "overdone".  Exactly which were not needed
522	@ are governed by the position of the bit, stored in ip.
523	mov	work, #0xe
524	lsl	work, #28
525	and	overdone, work
526	beq	LSYM(Lgot_result)
527	
528	@ If we terminated early, because dividend became zero, then the 
529	@ bit in ip will not be in the bottom nibble, and we should not
530	@ perform the additions below.  We must test for this though
531	@ (rather relying upon the TSTs to prevent the additions) since
532	@ the bit in ip could be in the top two bits which might then match
533	@ with one of the smaller RORs.
534	mov	curbit, ip
535	mov	work, #0x7
536	tst	curbit, work
537	beq	LSYM(Lgot_result)
538	
539	mov	curbit, ip
540	mov	work, #3
541	ror	curbit, work
542	tst	overdone, curbit
543	beq	LSYM(Lover6)
544	lsr	work, divisor, #3
545	add	dividend, work
546LSYM(Lover6):
547	mov	curbit, ip
548	mov	work, #2
549	ror	curbit, work
550	tst	overdone, curbit
551	beq	LSYM(Lover7)
552	lsr	work, divisor, #2
553	add	dividend, work
554LSYM(Lover7):
555	mov	curbit, ip
556	mov	work, #1
557	ror	curbit, work
558	tst	overdone, curbit
559	beq	LSYM(Lgot_result)
560	lsr	work, divisor, #1
561	add	dividend, work
562  .endif
563LSYM(Lgot_result):
564.endm	
565/* ------------------------------------------------------------------------ */
566/*		Start of the Real Functions				    */
567/* ------------------------------------------------------------------------ */
568#ifdef L_udivsi3
569
570	FUNC_START udivsi3
571
572#ifdef __thumb__
573
574	cmp	divisor, #0
575	beq	LSYM(Ldiv0)
576	mov	curbit, #1
577	mov	result, #0
578	
579	push	{ work }
580	cmp	dividend, divisor
581	blo	LSYM(Lgot_result)
582
583	THUMB_DIV_MOD_BODY 0
584	
585	mov	r0, result
586	pop	{ work }
587	RET
588
589#else /* ARM version.  */
590
591	subs	r2, r1, #1
592	RETc(eq)
593	bcc	LSYM(Ldiv0)
594	cmp	r0, r1
595	bls	11f
596	tst	r1, r2
597	beq	12f
598	
599	ARM_DIV_BODY r0, r1, r2, r3
600	
601	mov	r0, r2
602	RET	
603
60411:	moveq	r0, #1
605	movne	r0, #0
606	RET
607
60812:	ARM_DIV2_ORDER r1, r2
609
610	mov	r0, r0, lsr r2
611	RET
612
613#endif /* ARM version */
614
615	DIV_FUNC_END udivsi3
616
617#endif /* L_udivsi3 */
618/* ------------------------------------------------------------------------ */
619#ifdef L_umodsi3
620
621	FUNC_START umodsi3
622
623#ifdef __thumb__
624
625	cmp	divisor, #0
626	beq	LSYM(Ldiv0)
627	mov	curbit, #1
628	cmp	dividend, divisor
629	bhs	LSYM(Lover10)
630	RET	
631
632LSYM(Lover10):
633	push	{ work }
634
635	THUMB_DIV_MOD_BODY 1
636	
637	pop	{ work }
638	RET
639	
640#else  /* ARM version.  */
641	
642	subs	r2, r1, #1			@ compare divisor with 1
643	bcc	LSYM(Ldiv0)
644	cmpne	r0, r1				@ compare dividend with divisor
645	moveq   r0, #0
646	tsthi	r1, r2				@ see if divisor is power of 2
647	andeq	r0, r0, r2
648	RETc(ls)
649
650	ARM_MOD_BODY r0, r1, r2, r3
651	
652	RET	
653
654#endif /* ARM version.  */
655	
656	DIV_FUNC_END umodsi3
657
658#endif /* L_umodsi3 */
659/* ------------------------------------------------------------------------ */
660#ifdef L_divsi3
661
662	FUNC_START divsi3	
663
664#ifdef __thumb__
665	cmp	divisor, #0
666	beq	LSYM(Ldiv0)
667	
668	push	{ work }
669	mov	work, dividend
670	eor	work, divisor		@ Save the sign of the result.
671	mov	ip, work
672	mov	curbit, #1
673	mov	result, #0
674	cmp	divisor, #0
675	bpl	LSYM(Lover10)
676	neg	divisor, divisor	@ Loops below use unsigned.
677LSYM(Lover10):
678	cmp	dividend, #0
679	bpl	LSYM(Lover11)
680	neg	dividend, dividend
681LSYM(Lover11):
682	cmp	dividend, divisor
683	blo	LSYM(Lgot_result)
684
685	THUMB_DIV_MOD_BODY 0
686	
687	mov	r0, result
688	mov	work, ip
689	cmp	work, #0
690	bpl	LSYM(Lover12)
691	neg	r0, r0
692LSYM(Lover12):
693	pop	{ work }
694	RET
695
696#else /* ARM version.  */
697	
698	cmp	r1, #0
699	eor	ip, r0, r1			@ save the sign of the result.
700	beq	LSYM(Ldiv0)
701	rsbmi	r1, r1, #0			@ loops below use unsigned.
702	subs	r2, r1, #1			@ division by 1 or -1 ?
703	beq	10f
704	movs	r3, r0
705	rsbmi	r3, r0, #0			@ positive dividend value
706	cmp	r3, r1
707	bls	11f
708	tst	r1, r2				@ divisor is power of 2 ?
709	beq	12f
710
711	ARM_DIV_BODY r3, r1, r0, r2
712	
713	cmp	ip, #0
714	rsbmi	r0, r0, #0
715	RET	
716
71710:	teq	ip, r0				@ same sign ?
718	rsbmi	r0, r0, #0
719	RET	
720
72111:	movlo	r0, #0
722	moveq	r0, ip, asr #31
723	orreq	r0, r0, #1
724	RET
725
72612:	ARM_DIV2_ORDER r1, r2
727
728	cmp	ip, #0
729	mov	r0, r3, lsr r2
730	rsbmi	r0, r0, #0
731	RET
732
733#endif /* ARM version */
734	
735	DIV_FUNC_END divsi3
736
737#endif /* L_divsi3 */
738/* ------------------------------------------------------------------------ */
739#ifdef L_modsi3
740
741	FUNC_START modsi3
742
743#ifdef __thumb__
744
745	mov	curbit, #1
746	cmp	divisor, #0
747	beq	LSYM(Ldiv0)
748	bpl	LSYM(Lover10)
749	neg	divisor, divisor		@ Loops below use unsigned.
750LSYM(Lover10):
751	push	{ work }
752	@ Need to save the sign of the dividend, unfortunately, we need
753	@ work later on.  Must do this after saving the original value of
754	@ the work register, because we will pop this value off first.
755	push	{ dividend }
756	cmp	dividend, #0
757	bpl	LSYM(Lover11)
758	neg	dividend, dividend
759LSYM(Lover11):
760	cmp	dividend, divisor
761	blo	LSYM(Lgot_result)
762
763	THUMB_DIV_MOD_BODY 1
764		
765	pop	{ work }
766	cmp	work, #0
767	bpl	LSYM(Lover12)
768	neg	dividend, dividend
769LSYM(Lover12):
770	pop	{ work }
771	RET	
772
773#else /* ARM version.  */
774	
775	cmp	r1, #0
776	beq	LSYM(Ldiv0)
777	rsbmi	r1, r1, #0			@ loops below use unsigned.
778	movs	ip, r0				@ preserve sign of dividend
779	rsbmi	r0, r0, #0			@ if negative make positive
780	subs	r2, r1, #1			@ compare divisor with 1
781	cmpne	r0, r1				@ compare dividend with divisor
782	moveq	r0, #0
783	tsthi	r1, r2				@ see if divisor is power of 2
784	andeq	r0, r0, r2
785	bls	10f
786
787	ARM_MOD_BODY r0, r1, r2, r3
788
78910:	cmp	ip, #0
790	rsbmi	r0, r0, #0
791	RET	
792
793#endif /* ARM version */
794	
795	DIV_FUNC_END modsi3
796
797#endif /* L_modsi3 */
798/* ------------------------------------------------------------------------ */
799#ifdef L_dvmd_tls
800
801	FUNC_START div0
802
803	RET
804
805	FUNC_END div0
806	
807#endif /* L_divmodsi_tools */
808/* ------------------------------------------------------------------------ */
809#ifdef L_dvmd_lnx
810@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
811
812/* Constants taken from <asm/unistd.h> and <asm/signal.h> */
813#define SIGFPE	8
814#define __NR_SYSCALL_BASE	0x900000
815#define __NR_getpid			(__NR_SYSCALL_BASE+ 20)
816#define __NR_kill			(__NR_SYSCALL_BASE+ 37)
817
818	.code	32
819	FUNC_START div0
820
821	stmfd	sp!, {r1, lr}
822	swi	__NR_getpid
823	cmn	r0, #1000
824	RETLDM	r1 hs
825	mov	r1, #SIGFPE
826	swi	__NR_kill
827	RETLDM	r1
828
829	FUNC_END div0
830	
831#endif /* L_dvmd_lnx */
832/* ------------------------------------------------------------------------ */
833/* These next two sections are here despite the fact that they contain Thumb 
834   assembler because their presence allows interworked code to be linked even
835   when the GCC library is this one.  */
836		
837/* Do not build the interworking functions when the target architecture does 
838   not support Thumb instructions.  (This can be a multilib option).  */
839#if defined L_call_via_rX && (defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__)
840
841/* These labels & instructions are used by the Arm/Thumb interworking code. 
842   The address of function to be called is loaded into a register and then 
843   one of these labels is called via a BL instruction.  This puts the 
844   return address into the link register with the bottom bit set, and the 
845   code here switches to the correct mode before executing the function.  */
846	
847	.text
848	.align 0
849        .force_thumb
850
851.macro call_via register
852	THUMB_FUNC_START _call_via_\register
853
854	bx	\register
855	nop
856
857	SIZE	(_call_via_\register)
858.endm
859
860	call_via r0
861	call_via r1
862	call_via r2
863	call_via r3
864	call_via r4
865	call_via r5
866	call_via r6
867	call_via r7
868	call_via r8
869	call_via r9
870	call_via sl
871	call_via fp
872	call_via ip
873	call_via sp
874	call_via lr
875
876#endif /* L_call_via_rX */
877/* ------------------------------------------------------------------------ */
878/* Do not build the interworking functions when the target architecture does 
879   not support Thumb instructions.  (This can be a multilib option).  */
880#if defined L_interwork_call_via_rX && (defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__)
881
882/* These labels & instructions are used by the Arm/Thumb interworking code,
883   when the target address is in an unknown instruction set.  The address 
884   of function to be called is loaded into a register and then one of these
885   labels is called via a BL instruction.  This puts the return address 
886   into the link register with the bottom bit set, and the code here 
887   switches to the correct mode before executing the function.  Unfortunately
888   the target code cannot be relied upon to return via a BX instruction, so
889   instead we have to store the resturn address on the stack and allow the
890   called function to return here instead.  Upon return we recover the real
891   return address and use a BX to get back to Thumb mode.  */
892	
893	.text
894	.align 0
895
896	.code   32
897	.globl _arm_return
898_arm_return:
899	RETLDM
900	.code   16
901
902.macro interwork register
903	.code	16
904
905	THUMB_FUNC_START _interwork_call_via_\register
906
907	bx	pc
908	nop
909
910	.code	32
911	.globl LSYM(Lchange_\register)
912LSYM(Lchange_\register):
913	tst	\register, #1
914	streq	lr, [sp, #-4]!
915	adreq	lr, _arm_return
916	bx	\register
917
918	SIZE	(_interwork_call_via_\register)
919.endm
920	
921	interwork r0
922	interwork r1
923	interwork r2
924	interwork r3
925	interwork r4
926	interwork r5
927	interwork r6
928	interwork r7
929	interwork r8
930	interwork r9
931	interwork sl
932	interwork fp
933	interwork ip
934	interwork sp
935	
936	/* The LR case has to be handled a little differently...  */
937	.code 16
938
939	THUMB_FUNC_START _interwork_call_via_lr
940
941	bx 	pc
942	nop
943	
944	.code 32
945	.globl .Lchange_lr
946.Lchange_lr:
947	tst	lr, #1
948	stmeqdb	r13!, {lr}
949	mov	ip, lr
950	adreq	lr, _arm_return
951	bx	ip
952	
953	SIZE	(_interwork_call_via_lr)
954	
955#endif /* L_interwork_call_via_rX */
956
957#include "ieee754-df.S"
958#include "ieee754-sf.S"
959
960