lib1funcs.asm revision 90075
1@ libgcc routines for ARM cpu.
2@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
3
4/* Copyright 1995, 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
5
6This file is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 2, or (at your option) any
9later version.
10
11In addition to the permissions in the GNU General Public License, the
12Free Software Foundation gives you unlimited permission to link the
13compiled version of this file into combinations with other programs,
14and to distribute those combinations without any restriction coming
15from the use of this file.  (The General Public License restrictions
16do apply in other respects; for example, they cover modification of
17the file, and distribution when not linked into a combine
18executable.)
19
20This file is distributed in the hope that it will be useful, but
21WITHOUT ANY WARRANTY; without even the implied warranty of
22MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23General Public License for more details.
24
25You should have received a copy of the GNU General Public License
26along with this program; see the file COPYING.  If not, write to
27the Free Software Foundation, 59 Temple Place - Suite 330,
28Boston, MA 02111-1307, USA.  */
29/* ------------------------------------------------------------------------ */
30
31/* We need to know what prefix to add to function names.  */
32
33#ifndef __USER_LABEL_PREFIX__
34#error  __USER_LABEL_PREFIX__ not defined
35#endif
36
37/* ANSI concatenation macros.  */
38
39#define CONCAT1(a, b) CONCAT2(a, b)
40#define CONCAT2(a, b) a ## b
41
42/* Use the right prefix for global labels.  */
43
44#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
45
46#ifdef __ELF__
47#ifdef __thumb__
48#define __PLT__  /* Not supported in Thumb assembler (for now).  */
49#else
50#define __PLT__ (PLT)
51#endif
52#define TYPE(x) .type SYM(x),function
53#define SIZE(x) .size SYM(x), . - SYM(x)
54#else
55#define __PLT__
56#define TYPE(x)
57#define SIZE(x)
58#endif
59
60/* Function end macros.  Variants for 26 bit APCS and interworking.  */
61
62#ifdef __APCS_26__
63# define RET		movs	pc, lr
64# define RETc(x)	mov##x##s	pc, lr
65# define RETCOND 	^
66.macro ARM_LDIV0
67Ldiv0:
68	str	lr, [sp, #-4]!
69	bl	SYM (__div0) __PLT__
70	mov	r0, #0			@ About as wrong as it could be.
71	ldmia	sp!, {pc}^
72.endm
73#else
74# ifdef __THUMB_INTERWORK__
75#  define RET		bx	lr
76#  define RETc(x)	bx##x	lr
77.macro THUMB_LDIV0
78Ldiv0:
79	push	{ lr }
80	bl	SYM (__div0)
81	mov	r0, #0			@ About as wrong as it could be.
82	pop	{ r1 }
83	bx	r1
84.endm
85.macro ARM_LDIV0
86Ldiv0:
87	str	lr, [sp, #-4]!
88	bl	SYM (__div0) __PLT__
89	mov	r0, #0			@ About as wrong as it could be.
90	ldr	lr, [sp], #4
91	bx	lr
92.endm	
93# else
94#  define RET		mov	pc, lr
95#  define RETc(x)	mov##x	pc, lr
96.macro THUMB_LDIV0
97Ldiv0:
98	push	{ lr }
99	bl	SYM (__div0)
100	mov	r0, #0			@ About as wrong as it could be.
101	pop	{ pc }
102.endm
103.macro ARM_LDIV0
104Ldiv0:
105	str	lr, [sp, #-4]!
106	bl	SYM (__div0) __PLT__
107	mov	r0, #0			@ About as wrong as it could be.
108	ldmia	sp!, {pc}
109.endm	
110# endif
111# define RETCOND
112#endif
113
114.macro FUNC_END name
115Ldiv0:
116#ifdef __thumb__
117	THUMB_LDIV0
118#else
119	ARM_LDIV0
120#endif
121	SIZE (__\name)	
122.endm
123
124.macro THUMB_FUNC_START name
125	.globl	SYM (\name)
126	TYPE	(\name)
127	.thumb_func
128SYM (\name):
129.endm
130
131/* Function start macros.  Variants for ARM and Thumb.  */
132
133#ifdef __thumb__
134#define THUMB_FUNC .thumb_func
135#define THUMB_CODE .force_thumb
136#else
137#define THUMB_FUNC
138#define THUMB_CODE
139#endif
140	
141.macro FUNC_START name
142	.text
143	.globl SYM (__\name)
144	TYPE (__\name)
145	.align 0
146	THUMB_CODE
147	THUMB_FUNC
148SYM (__\name):
149.endm
150		
151/* Register aliases.  */
152
153work		.req	r4	@ XXXX is this safe ?
154dividend	.req	r0
155divisor		.req	r1
156overdone	.req	r2
157result		.req	r2
158curbit		.req	r3
159ip		.req	r12
160sp		.req	r13
161lr		.req	r14
162pc		.req	r15
163
164/* ------------------------------------------------------------------------ */
165/*		Bodies of the divsion and modulo routines.		    */
166/* ------------------------------------------------------------------------ */	
167.macro ARM_DIV_MOD_BODY modulo
168Loop1:
169	@ Unless the divisor is very big, shift it up in multiples of
170	@ four bits, since this is the amount of unwinding in the main
171	@ division loop.  Continue shifting until the divisor is 
172	@ larger than the dividend.
173	cmp	divisor, #0x10000000
174	cmplo	divisor, dividend
175	movlo	divisor, divisor, lsl #4
176	movlo	curbit,  curbit,  lsl #4
177	blo	Loop1
178
179Lbignum:
180	@ For very big divisors, we must shift it a bit at a time, or
181	@ we will be in danger of overflowing.
182	cmp	divisor, #0x80000000
183	cmplo	divisor, dividend
184	movlo	divisor, divisor, lsl #1
185	movlo	curbit,  curbit,  lsl #1
186	blo	Lbignum
187
188Loop3:
189	@ Test for possible subtractions.  On the final pass, this may 
190	@ subtract too much from the dividend ...
191	
192  .if \modulo
193	@ ... so keep track of which subtractions are done in OVERDONE.
194	@ We can fix them up afterwards.
195	mov	overdone, #0
196	cmp	dividend, divisor
197	subhs	dividend, dividend, divisor
198	cmp	dividend, divisor,  lsr #1
199	subhs	dividend, dividend, divisor, lsr #1
200	orrhs	overdone, overdone, curbit,  ror #1
201	cmp	dividend, divisor,  lsr #2
202	subhs	dividend, dividend, divisor, lsr #2
203	orrhs	overdone, overdone, curbit,  ror #2
204	cmp	dividend, divisor,  lsr #3
205	subhs	dividend, dividend, divisor, lsr #3
206	orrhs	overdone, overdone, curbit,  ror #3
207	mov	ip,       curbit
208  .else
209	@ ... so keep track of which subtractions are done in RESULT.
210	@ The result will be ok, since the "bit" will have been 
211	@ shifted out at the bottom.
212	cmp	dividend, divisor
213	subhs	dividend, dividend, divisor
214	orrhs	result,   result,   curbit
215	cmp	dividend, divisor,  lsr #1
216	subhs	dividend, dividend, divisor, lsr #1
217	orrhs	result,   result,   curbit,  lsr #1
218	cmp	dividend, divisor,  lsr #2
219	subhs	dividend, dividend, divisor, lsr #2
220	orrhs	result,   result,   curbit,  lsr #2
221	cmp	dividend, divisor,  lsr #3
222	subhs	dividend, dividend, divisor, lsr #3
223	orrhs	result,   result,   curbit,  lsr #3
224  .endif
225
226	cmp	dividend, #0			@ Early termination?
227	movnes	curbit,   curbit,  lsr #4	@ No, any more bits to do?
228	movne	divisor,  divisor, lsr #4
229	bne	Loop3
230
231  .if \modulo
232Lfixup_dividend:	
233	@ Any subtractions that we should not have done will be recorded in
234	@ the top three bits of OVERDONE.  Exactly which were not needed
235	@ are governed by the position of the bit, stored in IP.
236	ands	overdone, overdone, #0xe0000000
237	@ If we terminated early, because dividend became zero, then the 
238	@ bit in ip will not be in the bottom nibble, and we should not
239	@ perform the additions below.  We must test for this though
240	@ (rather relying upon the TSTs to prevent the additions) since
241	@ the bit in ip could be in the top two bits which might then match
242	@ with one of the smaller RORs.
243	tstne	ip, #0x7
244	beq	Lgot_result
245	tst	overdone, ip, ror #3
246	addne	dividend, dividend, divisor, lsr #3
247	tst	overdone, ip, ror #2
248	addne	dividend, dividend, divisor, lsr #2
249	tst	overdone, ip, ror #1
250	addne	dividend, dividend, divisor, lsr #1
251  .endif
252
253Lgot_result:
254.endm
255/* ------------------------------------------------------------------------ */
256.macro THUMB_DIV_MOD_BODY modulo
257	@ Load the constant 0x10000000 into our work register.
258	mov	work, #1
259	lsl	work, #28
260Loop1:
261	@ Unless the divisor is very big, shift it up in multiples of
262	@ four bits, since this is the amount of unwinding in the main
263	@ division loop.  Continue shifting until the divisor is 
264	@ larger than the dividend.
265	cmp	divisor, work
266	bhs	Lbignum
267	cmp	divisor, dividend
268	bhs	Lbignum
269	lsl	divisor, #4
270	lsl	curbit,  #4
271	b	Loop1
272Lbignum:
273	@ Set work to 0x80000000
274	lsl	work, #3
275Loop2:
276	@ For very big divisors, we must shift it a bit at a time, or
277	@ we will be in danger of overflowing.
278	cmp	divisor, work
279	bhs	Loop3
280	cmp	divisor, dividend
281	bhs	Loop3
282	lsl	divisor, #1
283	lsl	curbit,  #1
284	b	Loop2
285Loop3:
286	@ Test for possible subtractions ...
287  .if \modulo
288	@ ... On the final pass, this may subtract too much from the dividend, 
289	@ so keep track of which subtractions are done, we can fix them up 
290	@ afterwards.
291	mov	overdone, #0
292	cmp	dividend, divisor
293	blo	Lover1
294	sub	dividend, dividend, divisor
295Lover1:
296	lsr	work, divisor, #1
297	cmp	dividend, work
298	blo	Lover2
299	sub	dividend, dividend, work
300	mov	ip, curbit
301	mov	work, #1
302	ror	curbit, work
303	orr	overdone, curbit
304	mov	curbit, ip
305Lover2:
306	lsr	work, divisor, #2
307	cmp	dividend, work
308	blo	Lover3
309	sub	dividend, dividend, work
310	mov	ip, curbit
311	mov	work, #2
312	ror	curbit, work
313	orr	overdone, curbit
314	mov	curbit, ip
315Lover3:
316	lsr	work, divisor, #3
317	cmp	dividend, work
318	blo	Lover4
319	sub	dividend, dividend, work
320	mov	ip, curbit
321	mov	work, #3
322	ror	curbit, work
323	orr	overdone, curbit
324	mov	curbit, ip
325Lover4:
326	mov	ip, curbit
327  .else
328	@ ... and note which bits are done in the result.  On the final pass,
329	@ this may subtract too much from the dividend, but the result will be ok,
330	@ since the "bit" will have been shifted out at the bottom.
331	cmp	dividend, divisor
332	blo	Lover1
333	sub	dividend, dividend, divisor
334	orr	result, result, curbit
335Lover1:
336	lsr	work, divisor, #1
337	cmp	dividend, work
338	blo	Lover2
339	sub	dividend, dividend, work
340	lsr	work, curbit, #1
341	orr	result, work
342Lover2:
343	lsr	work, divisor, #2
344	cmp	dividend, work
345	blo	Lover3
346	sub	dividend, dividend, work
347	lsr	work, curbit, #2
348	orr	result, work
349Lover3:
350	lsr	work, divisor, #3
351	cmp	dividend, work
352	blo	Lover4
353	sub	dividend, dividend, work
354	lsr	work, curbit, #3
355	orr	result, work
356Lover4:
357  .endif
358	
359	cmp	dividend, #0			@ Early termination?
360	beq	Lover5
361	lsr	curbit,  #4			@ No, any more bits to do?
362	beq	Lover5
363	lsr	divisor, #4
364	b	Loop3
365Lover5:
366  .if \modulo
367	@ Any subtractions that we should not have done will be recorded in
368	@ the top three bits of "overdone".  Exactly which were not needed
369	@ are governed by the position of the bit, stored in ip.
370	mov	work, #0xe
371	lsl	work, #28
372	and	overdone, work
373	beq	Lgot_result
374	
375	@ If we terminated early, because dividend became zero, then the 
376	@ bit in ip will not be in the bottom nibble, and we should not
377	@ perform the additions below.  We must test for this though
378	@ (rather relying upon the TSTs to prevent the additions) since
379	@ the bit in ip could be in the top two bits which might then match
380	@ with one of the smaller RORs.
381	mov	curbit, ip
382	mov	work, #0x7
383	tst	curbit, work
384	beq	Lgot_result
385	
386	mov	curbit, ip
387	mov	work, #3
388	ror	curbit, work
389	tst	overdone, curbit
390	beq	Lover6
391	lsr	work, divisor, #3
392	add	dividend, work
393Lover6:
394	mov	curbit, ip
395	mov	work, #2
396	ror	curbit, work
397	tst	overdone, curbit
398	beq	Lover7
399	lsr	work, divisor, #2
400	add	dividend, work
401Lover7:
402	mov	curbit, ip
403	mov	work, #1
404	ror	curbit, work
405	tst	overdone, curbit
406	beq	Lgot_result
407	lsr	work, divisor, #1
408	add	dividend, work
409  .endif
410Lgot_result:
411.endm	
412/* ------------------------------------------------------------------------ */
413/*		Start of the Real Functions				    */
414/* ------------------------------------------------------------------------ */
415#ifdef L_udivsi3
416
417	FUNC_START udivsi3
418
419#ifdef __thumb__
420
421	cmp	divisor, #0
422	beq	Ldiv0
423	mov	curbit, #1
424	mov	result, #0
425	
426	push	{ work }
427	cmp	dividend, divisor
428	blo	Lgot_result
429
430	THUMB_DIV_MOD_BODY 0
431	
432	mov	r0, result
433	pop	{ work }
434	RET
435
436#else /* ARM version.  */
437	
438	cmp	divisor, #0
439	beq	Ldiv0
440	mov	curbit, #1
441	mov	result, #0
442	cmp	dividend, divisor
443	blo	Lgot_result
444	
445	ARM_DIV_MOD_BODY 0
446	
447	mov	r0, result
448	RET	
449
450#endif /* ARM version */
451
452	FUNC_END udivsi3
453
454#endif /* L_udivsi3 */
455/* ------------------------------------------------------------------------ */
456#ifdef L_umodsi3
457
458	FUNC_START umodsi3
459
460#ifdef __thumb__
461
462	cmp	divisor, #0
463	beq	Ldiv0
464	mov	curbit, #1
465	cmp	dividend, divisor
466	bhs	Lover10
467	RET	
468
469Lover10:
470	push	{ work }
471
472	THUMB_DIV_MOD_BODY 1
473	
474	pop	{ work }
475	RET
476	
477#else  /* ARM version.  */
478	
479	cmp	divisor, #0
480	beq	Ldiv0
481	cmp     divisor, #1
482	cmpne	dividend, divisor
483	moveq   dividend, #0
484	RETc(lo)
485	mov	curbit, #1
486
487	ARM_DIV_MOD_BODY 1
488	
489	RET	
490
491#endif /* ARM version.  */
492	
493	FUNC_END umodsi3
494
495#endif /* L_umodsi3 */
496/* ------------------------------------------------------------------------ */
497#ifdef L_divsi3
498
499	FUNC_START divsi3	
500
501#ifdef __thumb__
502	cmp	divisor, #0
503	beq	Ldiv0
504	
505	push	{ work }
506	mov	work, dividend
507	eor	work, divisor		@ Save the sign of the result.
508	mov	ip, work
509	mov	curbit, #1
510	mov	result, #0
511	cmp	divisor, #0
512	bpl	Lover10
513	neg	divisor, divisor	@ Loops below use unsigned.
514Lover10:
515	cmp	dividend, #0
516	bpl	Lover11
517	neg	dividend, dividend
518Lover11:
519	cmp	dividend, divisor
520	blo	Lgot_result
521
522	THUMB_DIV_MOD_BODY 0
523	
524	mov	r0, result
525	mov	work, ip
526	cmp	work, #0
527	bpl	Lover12
528	neg	r0, r0
529Lover12:
530	pop	{ work }
531	RET
532
533#else /* ARM version.  */
534	
535	eor	ip, dividend, divisor		@ Save the sign of the result.
536	mov	curbit, #1
537	mov	result, #0
538	cmp	divisor, #0
539	rsbmi	divisor, divisor, #0		@ Loops below use unsigned.
540	beq	Ldiv0
541	cmp	dividend, #0
542	rsbmi	dividend, dividend, #0
543	cmp	dividend, divisor
544	blo	Lgot_result
545
546	ARM_DIV_MOD_BODY 0
547	
548	mov	r0, result
549	cmp	ip, #0
550	rsbmi	r0, r0, #0
551	RET	
552
553#endif /* ARM version */
554	
555	FUNC_END divsi3
556
557#endif /* L_divsi3 */
558/* ------------------------------------------------------------------------ */
559#ifdef L_modsi3
560
561	FUNC_START modsi3
562
563#ifdef __thumb__
564
565	mov	curbit, #1
566	cmp	divisor, #0
567	beq	Ldiv0
568	bpl	Lover10
569	neg	divisor, divisor		@ Loops below use unsigned.
570Lover10:
571	push	{ work }
572	@ Need to save the sign of the dividend, unfortunately, we need
573	@ work later on.  Must do this after saving the original value of
574	@ the work register, because we will pop this value off first.
575	push	{ dividend }
576	cmp	dividend, #0
577	bpl	Lover11
578	neg	dividend, dividend
579Lover11:
580	cmp	dividend, divisor
581	blo	Lgot_result
582
583	THUMB_DIV_MOD_BODY 1
584		
585	pop	{ work }
586	cmp	work, #0
587	bpl	Lover12
588	neg	dividend, dividend
589Lover12:
590	pop	{ work }
591	RET	
592
593#else /* ARM version.  */
594	
595	cmp	divisor, #0
596	rsbmi	divisor, divisor, #0		@ Loops below use unsigned.
597	beq	Ldiv0
598	@ Need to save the sign of the dividend, unfortunately, we need
599	@ ip later on; this is faster than pushing lr and using that.
600	str	dividend, [sp, #-4]!
601	cmp	dividend, #0			@ Test dividend against zero
602	rsbmi	dividend, dividend, #0		@ If negative make positive
603	cmp	dividend, divisor		@ else if zero return zero
604	blo	Lgot_result			@ if smaller return dividend
605	mov	curbit, #1
606
607	ARM_DIV_MOD_BODY 1
608
609	ldr	ip, [sp], #4
610	cmp	ip, #0
611	rsbmi	dividend, dividend, #0
612	RET	
613
614#endif /* ARM version */
615	
616	FUNC_END modsi3
617
618#endif /* L_modsi3 */
619/* ------------------------------------------------------------------------ */
620#ifdef L_dvmd_tls
621
622	FUNC_START div0
623
624	RET
625
626	SIZE	(__div0)
627	
628#endif /* L_divmodsi_tools */
629/* ------------------------------------------------------------------------ */
630#ifdef L_dvmd_lnx
631@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
632
633/* Constants taken from <asm/unistd.h> and <asm/signal.h> */
634#define SIGFPE	8
635#define __NR_SYSCALL_BASE	0x900000
636#define __NR_getpid			(__NR_SYSCALL_BASE+ 20)
637#define __NR_kill			(__NR_SYSCALL_BASE+ 37)
638
639	FUNC_START div0
640
641	stmfd	sp!, {r1, lr}
642	swi	__NR_getpid
643	cmn	r0, #1000
644	ldmhsfd	sp!, {r1, pc}RETCOND	@ not much we can do
645	mov	r1, #SIGFPE
646	swi	__NR_kill
647#ifdef __THUMB_INTERWORK__
648	ldmfd	sp!, {r1, lr}
649	bx	lr
650#else
651	ldmfd	sp!, {r1, pc}RETCOND
652#endif
653
654	SIZE 	(__div0)
655	
656#endif /* L_dvmd_lnx */
657/* ------------------------------------------------------------------------ */
658/* These next two sections are here despite the fact that they contain Thumb 
659   assembler because their presence allows interworked code to be linked even
660   when the GCC library is this one.  */
661		
662/* Do not build the interworking functions when the target architecture does 
663   not support Thumb instructions.  (This can be a multilib option).  */
664#if defined L_call_via_rX && (defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__)
665
666/* These labels & instructions are used by the Arm/Thumb interworking code. 
667   The address of function to be called is loaded into a register and then 
668   one of these labels is called via a BL instruction.  This puts the 
669   return address into the link register with the bottom bit set, and the 
670   code here switches to the correct mode before executing the function.  */
671	
672	.text
673	.align 0
674        .force_thumb
675
676.macro call_via register
677	THUMB_FUNC_START _call_via_\register
678
679	bx	\register
680	nop
681
682	SIZE	(_call_via_\register)
683.endm
684
685	call_via r0
686	call_via r1
687	call_via r2
688	call_via r3
689	call_via r4
690	call_via r5
691	call_via r6
692	call_via r7
693	call_via r8
694	call_via r9
695	call_via sl
696	call_via fp
697	call_via ip
698	call_via sp
699	call_via lr
700
701#endif /* L_call_via_rX */
702/* ------------------------------------------------------------------------ */
703/* Do not build the interworking functions when the target architecture does 
704   not support Thumb instructions.  (This can be a multilib option).  */
705#if defined L_interwork_call_via_rX && (defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__)
706
707/* These labels & instructions are used by the Arm/Thumb interworking code,
708   when the target address is in an unknown instruction set.  The address 
709   of function to be called is loaded into a register and then one of these
710   labels is called via a BL instruction.  This puts the return address 
711   into the link register with the bottom bit set, and the code here 
712   switches to the correct mode before executing the function.  Unfortunately
713   the target code cannot be relied upon to return via a BX instruction, so
714   instead we have to store the resturn address on the stack and allow the
715   called function to return here instead.  Upon return we recover the real
716   return address and use a BX to get back to Thumb mode.  */
717	
718	.text
719	.align 0
720
721	.code   32
722	.globl _arm_return
723_arm_return:		
724	ldmia 	r13!, {r12}
725	bx 	r12
726	.code   16
727
728.macro interwork register					
729	.code   16
730
731	THUMB_FUNC_START _interwork_call_via_\register
732
733	bx 	pc
734	nop
735	
736	.code   32
737	.globl .Lchange_\register
738.Lchange_\register:
739	tst	\register, #1
740	stmeqdb	r13!, {lr}
741	adreq	lr, _arm_return
742	bx	\register
743
744	SIZE	(_interwork_call_via_\register)
745.endm
746	
747	interwork r0
748	interwork r1
749	interwork r2
750	interwork r3
751	interwork r4
752	interwork r5
753	interwork r6
754	interwork r7
755	interwork r8
756	interwork r9
757	interwork sl
758	interwork fp
759	interwork ip
760	interwork sp
761	
762	/* The LR case has to be handled a little differently...  */
763	.code 16
764
765	THUMB_FUNC_START _interwork_call_via_lr
766
767	bx 	pc
768	nop
769	
770	.code 32
771	.globl .Lchange_lr
772.Lchange_lr:
773	tst	lr, #1
774	stmeqdb	r13!, {lr}
775	mov	ip, lr
776	adreq	lr, _arm_return
777	bx	ip
778	
779	SIZE	(_interwork_call_via_lr)
780	
781#endif /* L_interwork_call_via_rX */
782