1@ libgcc1 routines for ARM cpu.
2@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
3
4/* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
5
6This file is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 2, or (at your option) any
9later version.
10
11In addition to the permissions in the GNU General Public License, the
12Free Software Foundation gives you unlimited permission to link the
13compiled version of this file with other programs, and to distribute
14those programs without any restriction coming from the use of this
15file.  (The General Public License restrictions do apply in other
16respects; for example, they cover modification of the file, and
17distribution when not linked into another program.)
18
19This file is distributed in the hope that it will be useful, but
20WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22General Public License for more details.
23
24You should have received a copy of the GNU General Public License
25along with this program; see the file COPYING.  If not, write to
26the Free Software Foundation, 59 Temple Place - Suite 330,
27Boston, MA 02111-1307, USA.  */
28
29/* As a special exception, if you link this library with other files,
30   some of which are compiled with GCC, to produce an executable,
31   this library does not by itself cause the resulting executable
32   to be covered by the GNU General Public License.
33   This exception does not however invalidate any other reasons why
34   the executable file might be covered by the GNU General Public License.  */
35
36#ifdef __APCS_26__
37#define RET	movs
38#define RETc(x)	mov##x##s
39#define RETCOND ^
40#else
41#define RET	mov
42#define RETc(x)	mov##x
43#define RETCOND
44#endif
45
46#ifndef __USER_LABEL_PREFIX__
47#error  __USER_LABEL_PREFIX__ not defined
48#endif
49
50/* ANSI concatenation macros.  */
51
52#define CONCAT1(a, b) CONCAT2(a, b)
53#define CONCAT2(a, b) a ## b
54
55/* Use the right prefix for global labels.  */
56
57#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
58
59#ifdef __ELF__
60#define __PLT__ (PLT)
61#define TYPE(x) .type SYM(x),function
62#define SIZE(x) .size SYM(x), . - SYM(x)
63#else
64#define __PLT__
65#define TYPE(x)
66#define SIZE(x)
67#endif
68
69#ifdef L_udivsi3
70
71dividend	.req	r0
72divisor		.req	r1
73result		.req	r2
74curbit		.req	r3
75ip		.req	r12
76sp		.req	r13
77lr		.req	r14
78pc		.req	r15
79	
80	.text
81	.globl	SYM (__udivsi3)
82	TYPE 	(__udivsi3)
83	.align	0
84
85SYM (__udivsi3):
86	cmp	divisor, #0
87	beq	Ldiv0
88	mov	curbit, #1
89	mov	result, #0
90	cmp	dividend, divisor
91	bcc	Lgot_result
92Loop1:
93	@ Unless the divisor is very big, shift it up in multiples of
94	@ four bits, since this is the amount of unwinding in the main
95	@ division loop.  Continue shifting until the divisor is 
96	@ larger than the dividend.
97	cmp	divisor, #0x10000000
98	cmpcc	divisor, dividend
99	movcc	divisor, divisor, lsl #4
100	movcc	curbit, curbit, lsl #4
101	bcc	Loop1
102
103Lbignum:
104	@ For very big divisors, we must shift it a bit at a time, or
105	@ we will be in danger of overflowing.
106	cmp	divisor, #0x80000000
107	cmpcc	divisor, dividend
108	movcc	divisor, divisor, lsl #1
109	movcc	curbit, curbit, lsl #1
110	bcc	Lbignum
111
112Loop3:
113	@ Test for possible subtractions, and note which bits
114	@ are done in the result.  On the final pass, this may subtract
115	@ too much from the dividend, but the result will be ok, since the
116	@ "bit" will have been shifted out at the bottom.
117	cmp	dividend, divisor
118	subcs	dividend, dividend, divisor
119	orrcs	result, result, curbit
120	cmp	dividend, divisor, lsr #1
121	subcs	dividend, dividend, divisor, lsr #1
122	orrcs	result, result, curbit, lsr #1
123	cmp	dividend, divisor, lsr #2
124	subcs	dividend, dividend, divisor, lsr #2
125	orrcs	result, result, curbit, lsr #2
126	cmp	dividend, divisor, lsr #3
127	subcs	dividend, dividend, divisor, lsr #3
128	orrcs	result, result, curbit, lsr #3
129	cmp	dividend, #0			@ Early termination?
130	movnes	curbit, curbit, lsr #4		@ No, any more bits to do?
131	movne	divisor, divisor, lsr #4
132	bne	Loop3
133Lgot_result:
134	mov	r0, result
135	RET	pc, lr
136
137Ldiv0:
138	str	lr, [sp, #-4]!
139	bl	SYM (__div0) __PLT__
140	mov	r0, #0			@ about as wrong as it could be
141	ldmia	sp!, {pc}RETCOND
142
143	SIZE	(__udivsi3)
144
145#endif /* L_udivsi3 */
146
147#ifdef L_umodsi3
148
149dividend	.req	r0
150divisor		.req	r1
151overdone	.req	r2
152curbit		.req	r3
153ip		.req	r12
154sp		.req	r13
155lr		.req	r14
156pc		.req	r15
157	
158	.text
159	.globl	SYM (__umodsi3)
160	TYPE	(__umodsi3)
161	.align 0
162
163SYM (__umodsi3):
164	cmp	divisor, #0
165	beq	Ldiv0
166	mov	curbit, #1
167	cmp	dividend, divisor
168	RETc(cc)	pc, lr
169Loop1:
170	@ Unless the divisor is very big, shift it up in multiples of
171	@ four bits, since this is the amount of unwinding in the main
172	@ division loop.  Continue shifting until the divisor is 
173	@ larger than the dividend.
174	cmp	divisor, #0x10000000
175	cmpcc	divisor, dividend
176	movcc	divisor, divisor, lsl #4
177	movcc	curbit, curbit, lsl #4
178	bcc	Loop1
179
180Lbignum:
181	@ For very big divisors, we must shift it a bit at a time, or
182	@ we will be in danger of overflowing.
183	cmp	divisor, #0x80000000
184	cmpcc	divisor, dividend
185	movcc	divisor, divisor, lsl #1
186	movcc	curbit, curbit, lsl #1
187	bcc	Lbignum
188
189Loop3:
190	@ Test for possible subtractions.  On the final pass, this may 
191	@ subtract too much from the dividend, so keep track of which
192	@ subtractions are done, we can fix them up afterwards...
193	mov	overdone, #0
194	cmp	dividend, divisor
195	subcs	dividend, dividend, divisor
196	cmp	dividend, divisor, lsr #1
197	subcs	dividend, dividend, divisor, lsr #1
198	orrcs	overdone, overdone, curbit, ror #1
199	cmp	dividend, divisor, lsr #2
200	subcs	dividend, dividend, divisor, lsr #2
201	orrcs	overdone, overdone, curbit, ror #2
202	cmp	dividend, divisor, lsr #3
203	subcs	dividend, dividend, divisor, lsr #3
204	orrcs	overdone, overdone, curbit, ror #3
205	mov	ip, curbit
206	cmp	dividend, #0			@ Early termination?
207	movnes	curbit, curbit, lsr #4		@ No, any more bits to do?
208	movne	divisor, divisor, lsr #4
209	bne	Loop3
210
211	@ Any subtractions that we should not have done will be recorded in
212	@ the top three bits of "overdone".  Exactly which were not needed
213	@ are governed by the position of the bit, stored in ip.
214	@ If we terminated early, because dividend became zero,
215	@ then none of the below will match, since the bit in ip will not be
216	@ in the bottom nibble.
217	ands	overdone, overdone, #0xe0000000
218	RETc(eq)	pc, lr				@ No fixups needed
219	tst	overdone, ip, ror #3
220	addne	dividend, dividend, divisor, lsr #3
221	tst	overdone, ip, ror #2
222	addne	dividend, dividend, divisor, lsr #2
223	tst	overdone, ip, ror #1
224	addne	dividend, dividend, divisor, lsr #1
225	RET	pc, lr
226
227Ldiv0:
228	str	lr, [sp, #-4]!
229	bl	SYM (__div0) __PLT__
230	mov	r0, #0			@ about as wrong as it could be
231	ldmia	sp!, {pc}RETCOND
232
233	SIZE	(__umodsi3)
234
235#endif /* L_umodsi3 */
236
237#ifdef L_divsi3
238
239dividend	.req	r0
240divisor		.req	r1
241result		.req	r2
242curbit		.req	r3
243ip		.req	r12
244sp		.req	r13
245lr		.req	r14
246pc		.req	r15
247	
248	.text
249	.globl	SYM (__divsi3)
250	TYPE	(__divsi3)
251	.align 0
252
253SYM (__divsi3):
254	eor	ip, dividend, divisor		@ Save the sign of the result.
255	mov	curbit, #1
256	mov	result, #0
257	cmp	divisor, #0
258	rsbmi	divisor, divisor, #0		@ Loops below use unsigned.
259	beq	Ldiv0
260	cmp	dividend, #0
261	rsbmi	dividend, dividend, #0
262	cmp	dividend, divisor
263	bcc	Lgot_result
264
265Loop1:
266	@ Unless the divisor is very big, shift it up in multiples of
267	@ four bits, since this is the amount of unwinding in the main
268	@ division loop.  Continue shifting until the divisor is 
269	@ larger than the dividend.
270	cmp	divisor, #0x10000000
271	cmpcc	divisor, dividend
272	movcc	divisor, divisor, lsl #4
273	movcc	curbit, curbit, lsl #4
274	bcc	Loop1
275
276Lbignum:
277	@ For very big divisors, we must shift it a bit at a time, or
278	@ we will be in danger of overflowing.
279	cmp	divisor, #0x80000000
280	cmpcc	divisor, dividend
281	movcc	divisor, divisor, lsl #1
282	movcc	curbit, curbit, lsl #1
283	bcc	Lbignum
284
285Loop3:
286	@ Test for possible subtractions, and note which bits
287	@ are done in the result.  On the final pass, this may subtract
288	@ too much from the dividend, but the result will be ok, since the
289	@ "bit" will have been shifted out at the bottom.
290	cmp	dividend, divisor
291	subcs	dividend, dividend, divisor
292	orrcs	result, result, curbit
293	cmp	dividend, divisor, lsr #1
294	subcs	dividend, dividend, divisor, lsr #1
295	orrcs	result, result, curbit, lsr #1
296	cmp	dividend, divisor, lsr #2
297	subcs	dividend, dividend, divisor, lsr #2
298	orrcs	result, result, curbit, lsr #2
299	cmp	dividend, divisor, lsr #3
300	subcs	dividend, dividend, divisor, lsr #3
301	orrcs	result, result, curbit, lsr #3
302	cmp	dividend, #0			@ Early termination?
303	movnes	curbit, curbit, lsr #4		@ No, any more bits to do?
304	movne	divisor, divisor, lsr #4
305	bne	Loop3
306Lgot_result:
307	mov	r0, result
308	cmp	ip, #0
309	rsbmi	r0, r0, #0
310	RET	pc, lr
311
312Ldiv0:
313	str	lr, [sp, #-4]!
314	bl	SYM (__div0) __PLT__
315	mov	r0, #0			@ about as wrong as it could be
316	ldmia	sp!, {pc}RETCOND
317
318	SIZE	(__divsi3)
319
320#endif /* L_divsi3 */
321
322#ifdef L_modsi3
323
324dividend	.req	r0
325divisor		.req	r1
326overdone	.req	r2
327curbit		.req	r3
328ip		.req	r12
329sp		.req	r13
330lr		.req	r14
331pc		.req	r15
332	
333	.text
334	.globl	SYM (__modsi3)
335	TYPE	(__modsi3)
336	.align 0
337
338SYM (__modsi3):
339	mov	curbit, #1
340	cmp	divisor, #0
341	rsbmi	divisor, divisor, #0		@ Loops below use unsigned.
342	beq	Ldiv0
343	@ Need to save the sign of the dividend, unfortunately, we need
344	@ ip later on; this is faster than pushing lr and using that.
345	str	dividend, [sp, #-4]!
346	cmp	dividend, #0
347	rsbmi	dividend, dividend, #0
348	cmp	dividend, divisor
349	bcc	Lgot_result
350
351Loop1:
352	@ Unless the divisor is very big, shift it up in multiples of
353	@ four bits, since this is the amount of unwinding in the main
354	@ division loop.  Continue shifting until the divisor is 
355	@ larger than the dividend.
356	cmp	divisor, #0x10000000
357	cmpcc	divisor, dividend
358	movcc	divisor, divisor, lsl #4
359	movcc	curbit, curbit, lsl #4
360	bcc	Loop1
361
362Lbignum:
363	@ For very big divisors, we must shift it a bit at a time, or
364	@ we will be in danger of overflowing.
365	cmp	divisor, #0x80000000
366	cmpcc	divisor, dividend
367	movcc	divisor, divisor, lsl #1
368	movcc	curbit, curbit, lsl #1
369	bcc	Lbignum
370
371Loop3:
372	@ Test for possible subtractions.  On the final pass, this may 
373	@ subtract too much from the dividend, so keep track of which
374	@ subtractions are done, we can fix them up afterwards...
375	mov	overdone, #0
376	cmp	dividend, divisor
377	subcs	dividend, dividend, divisor
378	cmp	dividend, divisor, lsr #1
379	subcs	dividend, dividend, divisor, lsr #1
380	orrcs	overdone, overdone, curbit, ror #1
381	cmp	dividend, divisor, lsr #2
382	subcs	dividend, dividend, divisor, lsr #2
383	orrcs	overdone, overdone, curbit, ror #2
384	cmp	dividend, divisor, lsr #3
385	subcs	dividend, dividend, divisor, lsr #3
386	orrcs	overdone, overdone, curbit, ror #3
387	mov	ip, curbit
388	cmp	dividend, #0			@ Early termination?
389	movnes	curbit, curbit, lsr #4		@ No, any more bits to do?
390	movne	divisor, divisor, lsr #4
391	bne	Loop3
392
393	@ Any subtractions that we should not have done will be recorded in
394	@ the top three bits of "overdone".  Exactly which were not needed
395	@ are governed by the position of the bit, stored in ip.
396	@ If we terminated early, because dividend became zero,
397	@ then none of the below will match, since the bit in ip will not be
398	@ in the bottom nibble.
399	ands	overdone, overdone, #0xe0000000
400	beq	Lgot_result
401	tst	overdone, ip, ror #3
402	addne	dividend, dividend, divisor, lsr #3
403	tst	overdone, ip, ror #2
404	addne	dividend, dividend, divisor, lsr #2
405	tst	overdone, ip, ror #1
406	addne	dividend, dividend, divisor, lsr #1
407Lgot_result:
408	ldr	ip, [sp], #4
409	cmp	ip, #0
410	rsbmi	dividend, dividend, #0
411	RET	pc, lr
412
413Ldiv0:
414	str	lr, [sp, #-4]!
415	bl	SYM (__div0) __PLT__
416	mov	r0, #0			@ about as wrong as it could be
417	ldmia	sp!, {pc}RETCOND
418
419	SIZE	(__modsi3)
420
421#endif /* L_modsi3 */
422
423#ifdef L_dvmd_tls
424
425	.globl	SYM (__div0)
426	TYPE	(__div0)
427	.align 0
428SYM (__div0):
429	RET	pc, lr
430
431	SIZE	(__div0)
432	
433#endif /* L_divmodsi_tools */
434
435#ifdef L_dvmd_lnx
436@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
437
438#include <asm/unistd.h>
439	
440#define SIGFPE	8			@ cant use <asm/signal.h> as it
441					@ contains too much C rubbish
442	.globl	SYM (__div0)
443	TYPE	(__div0)
444	.align 0
445SYM (__div0):
446	stmfd	sp!, {r1, lr}
447	swi	__NR_getpid
448	cmn	r0, #1000
449	ldmhsfd	sp!, {r1, pc}RETCOND	@ not much we can do
450	mov	r1, #SIGFPE
451	swi	__NR_kill
452	ldmfd	sp!, {r1, pc}RETCOND
453
454	SIZE 	(__div0)
455	
456#endif /* L_dvmd_lnx */
457
458/* These next two sections are here despite the fact that they contain Thumb 
459   assembler because their presence allows interworked code to be linked even
460   when the GCC library is this one.  */
461		
462#ifdef L_call_via_rX
463
464/* These labels & instructions are used by the Arm/Thumb interworking code. 
465   The address of function to be called is loaded into a register and then 
466   one of these labels is called via a BL instruction.  This puts the 
467   return address into the link register with the bottom bit set, and the 
468   code here switches to the correct mode before executing the function.  */
469	
470	.text
471	.align 0
472	.code 16
473.macro call_via register
474	.globl	SYM (_call_via_\register)
475	TYPE	(_call_via_\register)
476	.thumb_func
477SYM (_call_via_\register):
478	bx	\register
479	nop
480
481	SIZE	(_call_via_\register)
482.endm
483
484	call_via r0
485	call_via r1
486	call_via r2
487	call_via r3
488	call_via r4
489	call_via r5
490	call_via r6
491	call_via r7
492	call_via r8
493	call_via r9
494	call_via sl
495	call_via fp
496	call_via ip
497	call_via sp
498	call_via lr
499
500#endif /* L_call_via_rX */
501
502#ifdef L_interwork_call_via_rX
503
504/* These labels & instructions are used by the Arm/Thumb interworking code,
505   when the target address is in an unknown instruction set.  The address 
506   of function to be called is loaded into a register and then one of these
507   labels is called via a BL instruction.  This puts the return address 
508   into the link register with the bottom bit set, and the code here 
509   switches to the correct mode before executing the function.  Unfortunately
510   the target code cannot be relied upon to return via a BX instruction, so
511   instead we have to store the resturn address on the stack and allow the
512   called function to return here instead.  Upon return we recover the real
513   return address and use a BX to get back to Thumb mode.  */
514	
515	.text
516	.align 0
517
518	.code   32
519	.globl _arm_return
520_arm_return:		
521	ldmia 	r13!, {r12}
522	bx 	r12
523	.code   16
524
525.macro interwork register					
526	.code   16
527	.globl	SYM (_interwork_call_via_\register)
528	TYPE	(_interwork_call_via_\register)
529	.thumb_func
530SYM (_interwork_call_via_\register):
531	bx 	pc
532	nop
533	
534	.code   32
535	.globl .Lchange_\register
536.Lchange_\register:
537	tst	\register, #1
538	stmeqdb	r13!, {lr}
539	adreq	lr, _arm_return
540	bx	\register
541
542	SIZE	(_interwork_call_via_\register)
543.endm
544	
545	interwork r0
546	interwork r1
547	interwork r2
548	interwork r3
549	interwork r4
550	interwork r5
551	interwork r6
552	interwork r7
553	interwork r8
554	interwork r9
555	interwork sl
556	interwork fp
557	interwork ip
558	interwork sp
559	
560	/* The lr case has to be handled a little differently...*/
561	.code 16
562	.globl	SYM (_interwork_call_via_lr)
563	TYPE	(_interwork_call_via_lr)
564	.thumb_func
565SYM (_interwork_call_via_lr):
566	bx 	pc
567	nop
568	
569	.code 32
570	.globl .Lchange_lr
571.Lchange_lr:
572	tst	lr, #1
573	stmeqdb	r13!, {lr}
574	mov	ip, lr
575	adreq	lr, _arm_return
576	bx	ip
577	
578	SIZE	(_interwork_call_via_lr)
579	
580#endif /* L_interwork_call_via_rX */
581