1@ libgcc1 routines for ARM cpu.
2@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
3
4/* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
5
6This file is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 2, or (at your option) any
9later version.
10
11In addition to the permissions in the GNU General Public License, the
12Free Software Foundation gives you unlimited permission to link the
13compiled version of this file with other programs, and to distribute
14those programs without any restriction coming from the use of this
15file.  (The General Public License restrictions do apply in other
16respects; for example, they cover modification of the file, and
17distribution when not linked into another program.)
18
19This file is distributed in the hope that it will be useful, but
20WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22General Public License for more details.
23
24You should have received a copy of the GNU General Public License
25along with this program; see the file COPYING.  If not, write to
26the Free Software Foundation, 59 Temple Place - Suite 330,
27Boston, MA 02111-1307, USA.  */
28
29/* As a special exception, if you link this library with other files,
30   some of which are compiled with GCC, to produce an executable,
31   this library does not by itself cause the resulting executable
32   to be covered by the GNU General Public License.
33   This exception does not however invalidate any other reasons why
34   the executable file might be covered by the GNU General Public License.  */
35
36	.code	 16
37	
38#ifndef __USER_LABEL_PREFIX__
39#error  __USER_LABEL_PREFIX__ not defined
40#endif
41
42#ifdef __elf__
43#define __PLT__ (PLT)
44#define TYPE(x) .type SYM(x),function
45#define SIZE(x) .size SYM(x), . - SYM(x)
46#else
47#define __PLT__
48#define TYPE(x)
49#define SIZE(x)
50#endif
51
52#define RET	mov	pc, lr
53	
54/* ANSI concatenation macros.  */
55
56#define CONCAT1(a, b) CONCAT2(a, b)
57#define CONCAT2(a, b) a ## b
58
59/* Use the right prefix for global labels.  */
60
61#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
62
63work		.req	r4	@ XXXX is this safe ?
64
65#ifdef L_udivsi3
66
67dividend	.req	r0
68divisor		.req	r1
69result		.req	r2
70curbit		.req	r3
71ip		.req	r12
72sp		.req	r13
73lr		.req	r14
74pc		.req	r15
75	
76	.text
77	.globl	SYM (__udivsi3)
78	TYPE 	(__udivsi3)
79	.align	0
80	.thumb_func
81SYM (__udivsi3):
82	cmp	divisor, #0
83	beq	Ldiv0
84	mov	curbit, #1
85	mov	result, #0
86	
87	push	{ work }
88	cmp	dividend, divisor
89	bcc	Lgot_result
90
91	@ Load the constant 0x10000000 into our work register
92	mov	work, #1
93	lsl	work, #28
94Loop1:
95	@ Unless the divisor is very big, shift it up in multiples of
96	@ four bits, since this is the amount of unwinding in the main
97	@ division loop.  Continue shifting until the divisor is 
98	@ larger than the dividend.
99	cmp	divisor, work
100	bcs     Lbignum
101	cmp	divisor, dividend
102	bcs     Lbignum
103	lsl	divisor, #4
104	lsl	curbit,  #4
105	b	Loop1
106
107Lbignum:
108	@ Set work to 0x80000000
109	lsl	work, #3
110Loop2:		
111	@ For very big divisors, we must shift it a bit at a time, or
112	@ we will be in danger of overflowing.
113	cmp	divisor, work
114	bcs	Loop3
115	cmp	divisor, dividend
116	bcs	Loop3
117	lsl	divisor, #1
118	lsl	curbit,  #1
119	b	Loop2
120
121Loop3:
122	@ Test for possible subtractions, and note which bits
123	@ are done in the result.  On the final pass, this may subtract
124	@ too much from the dividend, but the result will be ok, since the
125	@ "bit" will have been shifted out at the bottom.
126	cmp	dividend, divisor
127	bcc     Over1
128	sub	dividend, dividend, divisor
129	orr	result, result, curbit
130Over1:	
131	lsr	work, divisor, #1
132	cmp	dividend, work
133	bcc	Over2
134	sub	dividend, dividend, work
135	lsr	work, curbit, #1
136	orr	result, work
137Over2:	
138	lsr	work, divisor, #2
139	cmp	dividend, work
140	bcc	Over3
141	sub	dividend, dividend, work
142	lsr	work, curbit, #2
143	orr	result, work
144Over3:	
145	lsr	work, divisor, #3
146	cmp	dividend, work
147	bcc	Over4
148	sub	dividend, dividend, work
149	lsr	work, curbit, #3
150	orr	result, work
151Over4:	
152	cmp	dividend, #0			@ Early termination?
153	beq	Lgot_result
154	lsr	curbit,  #4			@ No, any more bits to do?
155	beq	Lgot_result
156	lsr	divisor, #4
157	b	Loop3
158Lgot_result:
159	mov	r0, result
160	pop	{ work }
161	RET
162
163Ldiv0:
164	push	{ lr }
165	bl	SYM (__div0) __PLT__
166	mov	r0, #0			@ about as wrong as it could be
167	pop	{ pc }
168
169	SIZE	(__udivsi3)
170	
171#endif /* L_udivsi3 */
172
173#ifdef L_umodsi3
174
175dividend	.req	r0
176divisor		.req	r1
177overdone	.req	r2
178curbit		.req	r3
179ip		.req	r12
180sp		.req	r13
181lr		.req	r14
182pc		.req	r15
183	
184	.text
185	.globl	SYM (__umodsi3)
186	TYPE	(__umodsi3)
187	.align	0
188	.thumb_func
189SYM (__umodsi3):
190	cmp	divisor, #0
191	beq	Ldiv0
192	mov	curbit, #1
193	cmp	dividend, divisor
194	bcs	Over1
195	RET	
196
197Over1:	
198	@ Load the constant 0x10000000 into our work register
199	push	{ work }
200	mov	work, #1
201	lsl	work, #28
202Loop1:
203	@ Unless the divisor is very big, shift it up in multiples of
204	@ four bits, since this is the amount of unwinding in the main
205	@ division loop.  Continue shifting until the divisor is 
206	@ larger than the dividend.
207	cmp	divisor, work
208	bcs	Lbignum
209	cmp	divisor, dividend
210	bcs	Lbignum
211	lsl	divisor, #4
212	lsl	curbit, #4
213	b	Loop1
214
215Lbignum:
216	@ Set work to 0x80000000
217	lsl	work, #3
218Loop2:
219	@ For very big divisors, we must shift it a bit at a time, or
220	@ we will be in danger of overflowing.
221	cmp	divisor, work
222	bcs	Loop3
223	cmp	divisor, dividend
224	bcs	Loop3
225	lsl	divisor, #1
226	lsl	curbit, #1
227	b	Loop2
228
229Loop3:
230	@ Test for possible subtractions.  On the final pass, this may 
231	@ subtract too much from the dividend, so keep track of which
232	@ subtractions are done, we can fix them up afterwards...
233	mov	overdone, #0
234	cmp	dividend, divisor
235	bcc	Over2
236	sub	dividend, dividend, divisor
237Over2:
238	lsr	work, divisor, #1
239	cmp	dividend, work
240	bcc	Over3
241	sub	dividend, dividend, work
242	mov	ip, curbit
243	mov	work, #1
244	ror	curbit, work
245	orr	overdone, curbit
246	mov	curbit, ip
247Over3:
248	lsr	work, divisor, #2
249	cmp	dividend, work
250	bcc	Over4
251	sub	dividend, dividend, work
252	mov	ip, curbit
253	mov	work, #2
254	ror	curbit, work
255	orr	overdone, curbit
256	mov	curbit, ip
257Over4:
258	lsr	work, divisor, #3
259	cmp	dividend, work
260	bcc	Over5
261	sub	dividend, dividend, work
262	mov	ip, curbit
263	mov	work, #3
264	ror	curbit, work
265	orr	overdone, curbit
266	mov	curbit, ip
267Over5:
268	mov	ip, curbit
269	cmp	dividend, #0			@ Early termination?
270	beq	Over6
271	lsr	curbit, #4			@ No, any more bits to do?
272	beq	Over6
273	lsr	divisor, #4
274	b	Loop3
275
276Over6:	
277	@ Any subtractions that we should not have done will be recorded in
278	@ the top three bits of "overdone".  Exactly which were not needed
279	@ are governed by the position of the bit, stored in ip.
280	@ If we terminated early, because dividend became zero,
281	@ then none of the below will match, since the bit in ip will not be
282	@ in the bottom nibble.
283
284	mov	work, #0xe
285	lsl	work, #28	
286	and	overdone, work
287	bne	Over7
288	pop	{ work }
289	RET					@ No fixups needed
290Over7:
291	mov	curbit, ip
292	mov	work, #3
293	ror	curbit, work
294	tst	overdone, curbit
295	beq	Over8
296	lsr	work, divisor, #3
297	add	dividend, dividend, work
298Over8:
299	mov	curbit, ip
300	mov	work, #2
301	ror	curbit, work
302	tst	overdone, curbit
303	beq	Over9
304	lsr	work, divisor, #2
305	add	dividend, dividend, work
306Over9:
307	mov	curbit, ip
308	mov	work, #1
309	ror	curbit, work
310	tst	overdone, curbit
311	beq	Over10
312	lsr	work, divisor, #1
313	add	dividend, dividend, work
314Over10:
315	pop	{ work }
316	RET	
317
318Ldiv0:
319	push	{ lr }
320	bl	SYM (__div0) __PLT__
321	mov	r0, #0			@ about as wrong as it could be
322	pop	{ pc }
323
324	SIZE	(__umodsi3)
325	
326#endif /* L_umodsi3 */
327
328#ifdef L_divsi3
329
330dividend	.req	r0
331divisor		.req	r1
332result		.req	r2
333curbit		.req	r3
334ip		.req	r12
335sp		.req	r13
336lr		.req	r14
337pc		.req	r15
338	
339	.text
340	.globl	SYM (__divsi3)
341	TYPE	(__divsi3)
342	.align	0
343	.thumb_func
344SYM (__divsi3):
345	cmp	divisor, #0
346	beq	Ldiv0
347	
348	push	{ work }
349	mov	work, dividend
350	eor	work, divisor		@ Save the sign of the result.
351	mov	ip, work
352	mov	curbit, #1
353	mov	result, #0
354	cmp	divisor, #0
355	bpl	Over1
356	neg	divisor, divisor	@ Loops below use unsigned.
357Over1:	
358	cmp	dividend, #0
359	bpl	Over2
360	neg	dividend, dividend
361Over2:	
362	cmp	dividend, divisor
363	bcc	Lgot_result
364
365	mov	work, #1
366	lsl	work, #28
367Loop1:
368	@ Unless the divisor is very big, shift it up in multiples of
369	@ four bits, since this is the amount of unwinding in the main
370	@ division loop.  Continue shifting until the divisor is 
371	@ larger than the dividend.
372	cmp	divisor, work
373	Bcs	Lbignum
374	cmp	divisor, dividend
375	Bcs	Lbignum
376	lsl	divisor, #4
377	lsl	curbit, #4
378	b	Loop1
379
380Lbignum:
381	@ For very big divisors, we must shift it a bit at a time, or
382	@ we will be in danger of overflowing.
383	lsl	work, #3
384Loop2:		
385	cmp	divisor, work
386	Bcs	Loop3
387	cmp	divisor, dividend
388	Bcs	Loop3
389	lsl	divisor, #1
390	lsl	curbit, #1
391	b	Loop2
392
393Loop3:
394	@ Test for possible subtractions, and note which bits
395	@ are done in the result.  On the final pass, this may subtract
396	@ too much from the dividend, but the result will be ok, since the
397	@ "bit" will have been shifted out at the bottom.
398	cmp	dividend, divisor
399	Bcc	Over3
400	sub	dividend, dividend, divisor
401	orr	result, result, curbit
402Over3:
403	lsr	work, divisor, #1
404	cmp	dividend, work
405	Bcc	Over4
406	sub	dividend, dividend, work
407	lsr	work, curbit, #1
408	orr	result, work
409Over4:	
410	lsr	work, divisor, #2
411	cmp	dividend, work
412	Bcc	Over5
413	sub	dividend, dividend, work
414	lsr	work, curbit, #2
415	orr	result, result, work
416Over5:	
417	lsr	work, divisor, #3
418	cmp	dividend, work
419	Bcc	Over6
420	sub	dividend, dividend, work
421	lsr	work, curbit, #3
422	orr	result, result, work
423Over6:	
424	cmp	dividend, #0			@ Early termination?
425	Beq	Lgot_result
426	lsr	curbit, #4			@ No, any more bits to do?
427	Beq	Lgot_result
428	lsr	divisor, #4
429	b	Loop3
430	
431Lgot_result:
432	mov	r0, result
433	mov	work, ip
434	cmp	work, #0
435	Bpl	Over7
436	neg	r0, r0
437Over7:
438	pop	{ work }
439	RET	
440
441Ldiv0:
442	push	{ lr }
443	bl	SYM (__div0) __PLT__
444	mov	r0, #0			@ about as wrong as it could be
445	pop	{ pc }
446
447	SIZE	(__divsi3)
448	
449#endif /* L_divsi3 */
450
451#ifdef L_modsi3
452
453dividend	.req	r0
454divisor		.req	r1
455overdone	.req	r2
456curbit		.req	r3
457ip		.req	r12
458sp		.req	r13
459lr		.req	r14
460pc		.req	r15
461	
462	.text
463	.globl	SYM (__modsi3)
464	TYPE	(__modsi3)
465	.align	0
466	.thumb_func
467SYM (__modsi3):
468	mov	curbit, #1
469	cmp	divisor, #0
470	beq	Ldiv0
471	Bpl	Over1
472	neg	divisor, divisor		@ Loops below use unsigned.
473Over1:	
474	push	{ work }
475	@ Need to save the sign of the dividend, unfortunately, we need
476	@ ip later on.  Must do this after saving the original value of
477	@ the work register, because we will pop this value off first.
478	push	{ dividend }
479	cmp	dividend, #0
480	Bpl	Over2
481	neg	dividend, dividend
482Over2:	
483	cmp	dividend, divisor
484	bcc	Lgot_result
485	mov	work, #1
486	lsl	work, #28
487Loop1:
488	@ Unless the divisor is very big, shift it up in multiples of
489	@ four bits, since this is the amount of unwinding in the main
490	@ division loop.  Continue shifting until the divisor is 
491	@ larger than the dividend.
492	cmp	divisor, work
493	bcs	Lbignum
494	cmp	divisor, dividend
495	bcs	Lbignum
496	lsl	divisor, #4
497	lsl	curbit, #4
498	b	Loop1
499
500Lbignum:
501	@ Set work to 0x80000000
502	lsl	work, #3
503Loop2:
504	@ For very big divisors, we must shift it a bit at a time, or
505	@ we will be in danger of overflowing.
506	cmp	divisor, work
507	bcs	Loop3
508	cmp	divisor, dividend
509	bcs	Loop3
510	lsl	divisor, #1
511	lsl	curbit, #1
512	b	Loop2
513
514Loop3:
515	@ Test for possible subtractions.  On the final pass, this may 
516	@ subtract too much from the dividend, so keep track of which
517	@ subtractions are done, we can fix them up afterwards...
518	mov	overdone, #0
519	cmp	dividend, divisor
520	bcc	Over3
521	sub	dividend, dividend, divisor
522Over3:
523	lsr	work, divisor, #1
524	cmp	dividend, work
525	bcc	Over4
526	sub	dividend, dividend, work
527	mov	ip, curbit
528	mov	work, #1
529	ror	curbit, work
530	orr	overdone, curbit
531	mov	curbit, ip
532Over4:
533	lsr	work, divisor, #2
534	cmp	dividend, work
535	bcc	Over5
536	sub	dividend, dividend, work
537	mov	ip, curbit
538	mov	work, #2
539	ror	curbit, work
540	orr	overdone, curbit
541	mov	curbit, ip
542Over5:
543	lsr	work, divisor, #3
544	cmp	dividend, work
545	bcc	Over6
546	sub	dividend, dividend, work
547	mov	ip, curbit
548	mov	work, #3
549	ror	curbit, work
550	orr	overdone, curbit
551	mov	curbit, ip
552Over6:
553	mov	ip, curbit
554	cmp	dividend, #0			@ Early termination?
555	beq	Over7
556	lsr	curbit, #4			@ No, any more bits to do?
557	beq	Over7
558	lsr	divisor, #4
559	b	Loop3
560
561Over7:	
562	@ Any subtractions that we should not have done will be recorded in
563	@ the top three bits of "overdone".  Exactly which were not needed
564	@ are governed by the position of the bit, stored in ip.
565	@ If we terminated early, because dividend became zero,
566	@ then none of the below will match, since the bit in ip will not be
567	@ in the bottom nibble.
568	mov	work, #0xe
569	lsl	work, #28
570	and	overdone, work
571	beq	Lgot_result
572	
573	mov	curbit, ip
574	mov	work, #3
575	ror	curbit, work
576	tst	overdone, curbit
577	beq	Over8
578	lsr	work, divisor, #3
579	add	dividend, dividend, work
580Over8:
581	mov	curbit, ip
582	mov	work, #2
583	ror	curbit, work
584	tst	overdone, curbit
585	beq	Over9
586	lsr	work, divisor, #2
587	add	dividend, dividend, work
588Over9:
589	mov	curbit, ip
590	mov	work, #1
591	ror	curbit, work
592	tst	overdone, curbit
593	beq	Lgot_result
594	lsr	work, divisor, #1
595	add	dividend, dividend, work
596Lgot_result:
597	pop	{ work }
598	cmp	work, #0
599	bpl	Over10
600	neg	dividend, dividend
601Over10:
602	pop	{ work }
603	RET	
604
605Ldiv0:
606	push    { lr }
607	bl	SYM (__div0) __PLT__
608	mov	r0, #0			@ about as wrong as it could be
609	pop	{ pc }
610	
611	SIZE	(__modsi3)
612		
613#endif /* L_modsi3 */
614
615#ifdef L_dvmd_tls
616
617	.globl	SYM (__div0)
618	TYPE	(__div0)
619	.align	0
620	.thumb_func
621SYM (__div0):
622	RET	
623
624	SIZE	(__div0)
625	
626#endif /* L_divmodsi_tools */
627
628	
629#ifdef L_call_via_rX
630
631/* These labels & instructions are used by the Arm/Thumb interworking code. 
632   The address of function to be called is loaded into a register and then 
633   one of these labels is called via a BL instruction.  This puts the 
634   return address into the link register with the bottom bit set, and the 
635   code here switches to the correct mode before executing the function.  */
636	
637	.text
638	.align 0
639
640.macro call_via register
641	.globl	SYM (_call_via_\register)
642	TYPE	(_call_via_\register)
643	.thumb_func
644SYM (_call_via_\register):
645	bx	\register
646	nop
647	
648	SIZE	(_call_via_\register)
649.endm
650
651	call_via r0
652	call_via r1
653	call_via r2
654	call_via r3
655	call_via r4
656	call_via r5
657	call_via r6
658	call_via r7
659	call_via r8
660	call_via r9
661	call_via sl
662	call_via fp
663	call_via ip
664	call_via sp
665	call_via lr
666
667#endif /* L_call_via_rX */
668
669#ifdef L_interwork_call_via_rX
670
671/* These labels & instructions are used by the Arm/Thumb interworking code,
672   when the target address is in an unknown instruction set.  The address 
673   of function to be called is loaded into a register and then one of these
674   labels is called via a BL instruction.  This puts the return address 
675   into the link register with the bottom bit set, and the code here 
676   switches to the correct mode before executing the function.  Unfortunately
677   the target code cannot be relied upon to return via a BX instruction, so
678   instead we have to store the resturn address on the stack and allow the
679   called function to return here instead.  Upon return we recover the real
680   return address and use a BX to get back to Thumb mode.  */
681	
682	.text
683	.align 0
684
685	.code 32
686	.globl _arm_return
687_arm_return:		
688	ldmia 	r13!, {r12}
689	bx 	r12
690	
691.macro interwork register					
692	.code 16
693	
694	.globl	SYM (_interwork_call_via_\register)
695	TYPE	(_interwork_call_via_\register)
696	.thumb_func
697SYM (_interwork_call_via_\register):
698	bx 	pc
699	nop
700	
701	.code 32
702	.globl .Lchange_\register
703.Lchange_\register:
704	tst	\register, #1
705	stmeqdb	r13!, {lr}
706	adreq	lr, _arm_return
707	bx	\register
708
709	SIZE	(_interwork_call_via_\register)
710.endm
711	
712	interwork r0
713	interwork r1
714	interwork r2
715	interwork r3
716	interwork r4
717	interwork r5
718	interwork r6
719	interwork r7
720	interwork r8
721	interwork r9
722	interwork sl
723	interwork fp
724	interwork ip
725	interwork sp
726
727	/* The lr case has to be handled a little differently...*/
728	.code 16
729	.globl	SYM (_interwork_call_via_lr)
730	TYPE	(_interwork_call_via_lr)
731	.thumb_func
732SYM (_interwork_call_via_lr):
733	bx 	pc
734	nop
735	
736	.code 32
737	.globl .Lchange_lr
738.Lchange_lr:
739	tst	lr, #1
740	stmeqdb	r13!, {lr}
741	mov	ip, lr
742	adreq	lr, _arm_return
743	bx	ip
744
745	SIZE	(_interwork_call_via_lr)
746	
747#endif /* L_interwork_call_via_rX */
748
749	
750