1/* ieee754-df.S double-precision floating point support for ARM
2
3   Copyright (C) 2003, 2004, 2005  Free Software Foundation, Inc.
4   Contributed by Nicolas Pitre (nico@cam.org)
5
6   This file is free software; you can redistribute it and/or modify it
7   under the terms of the GNU General Public License as published by the
8   Free Software Foundation; either version 2, or (at your option) any
9   later version.
10
11   In addition to the permissions in the GNU General Public License, the
12   Free Software Foundation gives you unlimited permission to link the
13   compiled version of this file into combinations with other programs,
14   and to distribute those combinations without any restriction coming
15   from the use of this file.  (The General Public License restrictions
16   do apply in other respects; for example, they cover modification of
17   the file, and distribution when not linked into a combine
18   executable.)
19
20   This file is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received a copy of the GNU General Public License
26   along with this program; see the file COPYING.  If not, write to
27   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
28   Boston, MA 02110-1301, USA.  */
29
30/*
31 * Notes:
32 *
33 * The goal of this code is to be as fast as possible.  This is
34 * not meant to be easy to understand for the casual reader.
35 * For slightly simpler code please see the single precision version
36 * of this file.
37 *
38 * Only the default rounding mode is intended for best performances.
39 * Exceptions aren't supported yet, but that can be added quite easily
40 * if necessary without impacting performances.
41 */
42
43
44@ For FPA, float words are always big-endian.
45@ For VFP, floats words follow the memory system mode.
46#if defined(__VFP_FP__) && !defined(__ARMEB__)
47#define xl r0
48#define xh r1
49#define yl r2
50#define yh r3
51#else
52#define xh r0
53#define xl r1
54#define yh r2
55#define yl r3
56#endif
57
58
59#ifdef L_negdf2
60
61ARM_FUNC_START negdf2
62ARM_FUNC_ALIAS aeabi_dneg negdf2
63
64	@ flip sign bit
65	eor	xh, xh, #0x80000000
66	RET
67
68	FUNC_END aeabi_dneg
69	FUNC_END negdf2
70
71#endif
72
73#ifdef L_addsubdf3
74
75ARM_FUNC_START aeabi_drsub
76
77	eor	xh, xh, #0x80000000	@ flip sign bit of first arg
78	b	1f
79
80ARM_FUNC_START subdf3
81ARM_FUNC_ALIAS aeabi_dsub subdf3
82
83	eor	yh, yh, #0x80000000	@ flip sign bit of second arg
84#if defined(__INTERWORKING_STUBS__)
85	b	1f			@ Skip Thumb-code prologue
86#endif
87
88ARM_FUNC_START adddf3
89ARM_FUNC_ALIAS aeabi_dadd adddf3
90
911:	stmfd	sp!, {r4, r5, lr}
92
93	@ Look for zeroes, equal values, INF, or NAN.
94	mov	r4, xh, lsl #1
95	mov	r5, yh, lsl #1
96	teq	r4, r5
97	teqeq	xl, yl
98	orrnes	ip, r4, xl
99	orrnes	ip, r5, yl
100	mvnnes	ip, r4, asr #21
101	mvnnes	ip, r5, asr #21
102	beq	LSYM(Lad_s)
103
104	@ Compute exponent difference.  Make largest exponent in r4,
105	@ corresponding arg in xh-xl, and positive exponent difference in r5.
106	mov	r4, r4, lsr #21
107	rsbs	r5, r4, r5, lsr #21
108	rsblt	r5, r5, #0
109	ble	1f
110	add	r4, r4, r5
111	eor	yl, xl, yl
112	eor	yh, xh, yh
113	eor	xl, yl, xl
114	eor	xh, yh, xh
115	eor	yl, xl, yl
116	eor	yh, xh, yh
1171:
118	@ If exponent difference is too large, return largest argument
119	@ already in xh-xl.  We need up to 54 bit to handle proper rounding
120	@ of 0x1p54 - 1.1.
121	cmp	r5, #54
122	RETLDM	"r4, r5" hi
123
124	@ Convert mantissa to signed integer.
125	tst	xh, #0x80000000
126	mov	xh, xh, lsl #12
127	mov	ip, #0x00100000
128	orr	xh, ip, xh, lsr #12
129	beq	1f
130	rsbs	xl, xl, #0
131	rsc	xh, xh, #0
1321:
133	tst	yh, #0x80000000
134	mov	yh, yh, lsl #12
135	orr	yh, ip, yh, lsr #12
136	beq	1f
137	rsbs	yl, yl, #0
138	rsc	yh, yh, #0
1391:
140	@ If exponent == difference, one or both args were denormalized.
141	@ Since this is not common case, rescale them off line.
142	teq	r4, r5
143	beq	LSYM(Lad_d)
144LSYM(Lad_x):
145
146	@ Compensate for the exponent overlapping the mantissa MSB added later
147	sub	r4, r4, #1
148
149	@ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
150	rsbs	lr, r5, #32
151	blt	1f
152	mov	ip, yl, lsl lr
153	adds	xl, xl, yl, lsr r5
154	adc	xh, xh, #0
155	adds	xl, xl, yh, lsl lr
156	adcs	xh, xh, yh, asr r5
157	b	2f
1581:	sub	r5, r5, #32
159	add	lr, lr, #32
160	cmp	yl, #1
161	mov	ip, yh, lsl lr
162	orrcs	ip, ip, #2		@ 2 not 1, to allow lsr #1 later
163	adds	xl, xl, yh, asr r5
164	adcs	xh, xh, yh, asr #31
1652:
166	@ We now have a result in xh-xl-ip.
167	@ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
168	and	r5, xh, #0x80000000
169	bpl	LSYM(Lad_p)
170	rsbs	ip, ip, #0
171	rscs	xl, xl, #0
172	rsc	xh, xh, #0
173
174	@ Determine how to normalize the result.
175LSYM(Lad_p):
176	cmp	xh, #0x00100000
177	bcc	LSYM(Lad_a)
178	cmp	xh, #0x00200000
179	bcc	LSYM(Lad_e)
180
181	@ Result needs to be shifted right.
182	movs	xh, xh, lsr #1
183	movs	xl, xl, rrx
184	mov	ip, ip, rrx
185	add	r4, r4, #1
186
187	@ Make sure we did not bust our exponent.
188	mov	r2, r4, lsl #21
189	cmn	r2, #(2 << 21)
190	bcs	LSYM(Lad_o)
191
192	@ Our result is now properly aligned into xh-xl, remaining bits in ip.
193	@ Round with MSB of ip. If halfway between two numbers, round towards
194	@ LSB of xl = 0.
195	@ Pack final result together.
196LSYM(Lad_e):
197	cmp	ip, #0x80000000
198	moveqs	ip, xl, lsr #1
199	adcs	xl, xl, #0
200	adc	xh, xh, r4, lsl #20
201	orr	xh, xh, r5
202	RETLDM	"r4, r5"
203
204	@ Result must be shifted left and exponent adjusted.
205LSYM(Lad_a):
206	movs	ip, ip, lsl #1
207	adcs	xl, xl, xl
208	adc	xh, xh, xh
209	tst	xh, #0x00100000
210	sub	r4, r4, #1
211	bne	LSYM(Lad_e)
212
213	@ No rounding necessary since ip will always be 0 at this point.
214LSYM(Lad_l):
215
216#if __ARM_ARCH__ < 5
217
218	teq	xh, #0
219	movne	r3, #20
220	moveq	r3, #52
221	moveq	xh, xl
222	moveq	xl, #0
223	mov	r2, xh
224	cmp	r2, #(1 << 16)
225	movhs	r2, r2, lsr #16
226	subhs	r3, r3, #16
227	cmp	r2, #(1 << 8)
228	movhs	r2, r2, lsr #8
229	subhs	r3, r3, #8
230	cmp	r2, #(1 << 4)
231	movhs	r2, r2, lsr #4
232	subhs	r3, r3, #4
233	cmp	r2, #(1 << 2)
234	subhs	r3, r3, #2
235	sublo	r3, r3, r2, lsr #1
236	sub	r3, r3, r2, lsr #3
237
238#else
239
240	teq	xh, #0
241	moveq	xh, xl
242	moveq	xl, #0
243	clz	r3, xh
244	addeq	r3, r3, #32
245	sub	r3, r3, #11
246
247#endif
248
249	@ determine how to shift the value.
250	subs	r2, r3, #32
251	bge	2f
252	adds	r2, r2, #12
253	ble	1f
254
255	@ shift value left 21 to 31 bits, or actually right 11 to 1 bits
256	@ since a register switch happened above.
257	add	ip, r2, #20
258	rsb	r2, r2, #12
259	mov	xl, xh, lsl ip
260	mov	xh, xh, lsr r2
261	b	3f
262
263	@ actually shift value left 1 to 20 bits, which might also represent
264	@ 32 to 52 bits if counting the register switch that happened earlier.
2651:	add	r2, r2, #20
2662:	rsble	ip, r2, #32
267	mov	xh, xh, lsl r2
268	orrle	xh, xh, xl, lsr ip
269	movle	xl, xl, lsl r2
270
271	@ adjust exponent accordingly.
2723:	subs	r4, r4, r3
273	addge	xh, xh, r4, lsl #20
274	orrge	xh, xh, r5
275	RETLDM	"r4, r5" ge
276
277	@ Exponent too small, denormalize result.
278	@ Find out proper shift value.
279	mvn	r4, r4
280	subs	r4, r4, #31
281	bge	2f
282	adds	r4, r4, #12
283	bgt	1f
284
285	@ shift result right of 1 to 20 bits, sign is in r5.
286	add	r4, r4, #20
287	rsb	r2, r4, #32
288	mov	xl, xl, lsr r4
289	orr	xl, xl, xh, lsl r2
290	orr	xh, r5, xh, lsr r4
291	RETLDM	"r4, r5"
292
293	@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
294	@ a register switch from xh to xl.
2951:	rsb	r4, r4, #12
296	rsb	r2, r4, #32
297	mov	xl, xl, lsr r2
298	orr	xl, xl, xh, lsl r4
299	mov	xh, r5
300	RETLDM	"r4, r5"
301
302	@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
303	@ from xh to xl.
3042:	mov	xl, xh, lsr r4
305	mov	xh, r5
306	RETLDM	"r4, r5"
307
308	@ Adjust exponents for denormalized arguments.
309	@ Note that r4 must not remain equal to 0.
310LSYM(Lad_d):
311	teq	r4, #0
312	eor	yh, yh, #0x00100000
313	eoreq	xh, xh, #0x00100000
314	addeq	r4, r4, #1
315	subne	r5, r5, #1
316	b	LSYM(Lad_x)
317
318
319LSYM(Lad_s):
320	mvns	ip, r4, asr #21
321	mvnnes	ip, r5, asr #21
322	beq	LSYM(Lad_i)
323
324	teq	r4, r5
325	teqeq	xl, yl
326	beq	1f
327
328	@ Result is x + 0.0 = x or 0.0 + y = y.
329	orrs	ip, r4, xl
330	moveq	xh, yh
331	moveq	xl, yl
332	RETLDM	"r4, r5"
333
3341:	teq	xh, yh
335
336	@ Result is x - x = 0.
337	movne	xh, #0
338	movne	xl, #0
339	RETLDM	"r4, r5" ne
340
341	@ Result is x + x = 2x.
342	movs	ip, r4, lsr #21
343	bne	2f
344	movs	xl, xl, lsl #1
345	adcs	xh, xh, xh
346	orrcs	xh, xh, #0x80000000
347	RETLDM	"r4, r5"
3482:	adds	r4, r4, #(2 << 21)
349	addcc	xh, xh, #(1 << 20)
350	RETLDM	"r4, r5" cc
351	and	r5, xh, #0x80000000
352
353	@ Overflow: return INF.
354LSYM(Lad_o):
355	orr	xh, r5, #0x7f000000
356	orr	xh, xh, #0x00f00000
357	mov	xl, #0
358	RETLDM	"r4, r5"
359
360	@ At least one of x or y is INF/NAN.
361	@   if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
362	@   if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
363	@   if either is NAN: return NAN
364	@   if opposite sign: return NAN
365	@   otherwise return xh-xl (which is INF or -INF)
366LSYM(Lad_i):
367	mvns	ip, r4, asr #21
368	movne	xh, yh
369	movne	xl, yl
370	mvneqs	ip, r5, asr #21
371	movne	yh, xh
372	movne	yl, xl
373	orrs	r4, xl, xh, lsl #12
374	orreqs	r5, yl, yh, lsl #12
375	teqeq	xh, yh
376	orrne	xh, xh, #0x00080000	@ quiet NAN
377	RETLDM	"r4, r5"
378
379	FUNC_END aeabi_dsub
380	FUNC_END subdf3
381	FUNC_END aeabi_dadd
382	FUNC_END adddf3
383
384ARM_FUNC_START floatunsidf
385ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
386
387	teq	r0, #0
388	moveq	r1, #0
389	RETc(eq)
390	stmfd	sp!, {r4, r5, lr}
391	mov	r4, #0x400		@ initial exponent
392	add	r4, r4, #(52-1 - 1)
393	mov	r5, #0			@ sign bit is 0
394	.ifnc	xl, r0
395	mov	xl, r0
396	.endif
397	mov	xh, #0
398	b	LSYM(Lad_l)
399
400	FUNC_END aeabi_ui2d
401	FUNC_END floatunsidf
402
403ARM_FUNC_START floatsidf
404ARM_FUNC_ALIAS aeabi_i2d floatsidf
405
406	teq	r0, #0
407	moveq	r1, #0
408	RETc(eq)
409	stmfd	sp!, {r4, r5, lr}
410	mov	r4, #0x400		@ initial exponent
411	add	r4, r4, #(52-1 - 1)
412	ands	r5, r0, #0x80000000	@ sign bit in r5
413	rsbmi	r0, r0, #0		@ absolute value
414	.ifnc	xl, r0
415	mov	xl, r0
416	.endif
417	mov	xh, #0
418	b	LSYM(Lad_l)
419
420	FUNC_END aeabi_i2d
421	FUNC_END floatsidf
422
423ARM_FUNC_START extendsfdf2
424ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
425
426	movs	r2, r0, lsl #1		@ toss sign bit
427	mov	xh, r2, asr #3		@ stretch exponent
428	mov	xh, xh, rrx		@ retrieve sign bit
429	mov	xl, r2, lsl #28		@ retrieve remaining bits
430	andnes	r3, r2, #0xff000000	@ isolate exponent
431	teqne	r3, #0xff000000		@ if not 0, check if INF or NAN
432	eorne	xh, xh, #0x38000000	@ fixup exponent otherwise.
433	RETc(ne)			@ and return it.
434
435	teq	r2, #0			@ if actually 0
436	teqne	r3, #0xff000000		@ or INF or NAN
437	RETc(eq)			@ we are done already.
438
439	@ value was denormalized.  We can normalize it now.
440	stmfd	sp!, {r4, r5, lr}
441	mov	r4, #0x380		@ setup corresponding exponent
442	and	r5, xh, #0x80000000	@ move sign bit in r5
443	bic	xh, xh, #0x80000000
444	b	LSYM(Lad_l)
445
446	FUNC_END aeabi_f2d
447	FUNC_END extendsfdf2
448
449ARM_FUNC_START floatundidf
450ARM_FUNC_ALIAS aeabi_ul2d floatundidf
451
452	orrs	r2, r0, r1
453#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
454	mvfeqd	f0, #0.0
455#endif
456	RETc(eq)
457
458#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
459	@ For hard FPA code we want to return via the tail below so that
460	@ we can return the result in f0 as well as in r0/r1 for backwards
461	@ compatibility.
462	adr	ip, LSYM(f0_ret)
463	stmfd	sp!, {r4, r5, ip, lr}
464#else
465	stmfd	sp!, {r4, r5, lr}
466#endif
467
468	mov	r5, #0
469	b	2f
470
471ARM_FUNC_START floatdidf
472ARM_FUNC_ALIAS aeabi_l2d floatdidf
473
474	orrs	r2, r0, r1
475#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
476	mvfeqd	f0, #0.0
477#endif
478	RETc(eq)
479
480#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
481	@ For hard FPA code we want to return via the tail below so that
482	@ we can return the result in f0 as well as in r0/r1 for backwards
483	@ compatibility.
484	adr	ip, LSYM(f0_ret)
485	stmfd	sp!, {r4, r5, ip, lr}
486#else
487	stmfd	sp!, {r4, r5, lr}
488#endif
489
490	ands	r5, ah, #0x80000000	@ sign bit in r5
491	bpl	2f
492	rsbs	al, al, #0
493	rsc	ah, ah, #0
4942:
495	mov	r4, #0x400		@ initial exponent
496	add	r4, r4, #(52-1 - 1)
497
498	@ FPA little-endian: must swap the word order.
499	.ifnc	xh, ah
500	mov	ip, al
501	mov	xh, ah
502	mov	xl, ip
503	.endif
504
505	movs	ip, xh, lsr #22
506	beq	LSYM(Lad_p)
507
508	@ The value is too big.  Scale it down a bit...
509	mov	r2, #3
510	movs	ip, ip, lsr #3
511	addne	r2, r2, #3
512	movs	ip, ip, lsr #3
513	addne	r2, r2, #3
514	add	r2, r2, ip, lsr #3
515
516	rsb	r3, r2, #32
517	mov	ip, xl, lsl r3
518	mov	xl, xl, lsr r2
519	orr	xl, xl, xh, lsl r3
520	mov	xh, xh, lsr r2
521	add	r4, r4, r2
522	b	LSYM(Lad_p)
523
524#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
525
526	@ Legacy code expects the result to be returned in f0.  Copy it
527	@ there as well.
528LSYM(f0_ret):
529	stmfd	sp!, {r0, r1}
530	ldfd	f0, [sp], #8
531	RETLDM
532
533#endif
534
535	FUNC_END floatdidf
536	FUNC_END aeabi_l2d
537	FUNC_END floatundidf
538	FUNC_END aeabi_ul2d
539
540#endif /* L_addsubdf3 */
541
542#ifdef L_muldivdf3
543
544ARM_FUNC_START muldf3
545ARM_FUNC_ALIAS aeabi_dmul muldf3
546	stmfd	sp!, {r4, r5, r6, lr}
547
548	@ Mask out exponents, trap any zero/denormal/INF/NAN.
549	mov	ip, #0xff
550	orr	ip, ip, #0x700
551	ands	r4, ip, xh, lsr #20
552	andnes	r5, ip, yh, lsr #20
553	teqne	r4, ip
554	teqne	r5, ip
555	bleq	LSYM(Lml_s)
556
557	@ Add exponents together
558	add	r4, r4, r5
559
560	@ Determine final sign.
561	eor	r6, xh, yh
562
563	@ Convert mantissa to unsigned integer.
564	@ If power of two, branch to a separate path.
565	bic	xh, xh, ip, lsl #21
566	bic	yh, yh, ip, lsl #21
567	orrs	r5, xl, xh, lsl #12
568	orrnes	r5, yl, yh, lsl #12
569	orr	xh, xh, #0x00100000
570	orr	yh, yh, #0x00100000
571	beq	LSYM(Lml_1)
572
573#if __ARM_ARCH__ < 4
574
575	@ Put sign bit in r6, which will be restored in yl later.
576	and   r6, r6, #0x80000000
577
578	@ Well, no way to make it shorter without the umull instruction.
579	stmfd	sp!, {r6, r7, r8, r9, sl, fp}
580	mov	r7, xl, lsr #16
581	mov	r8, yl, lsr #16
582	mov	r9, xh, lsr #16
583	mov	sl, yh, lsr #16
584	bic	xl, xl, r7, lsl #16
585	bic	yl, yl, r8, lsl #16
586	bic	xh, xh, r9, lsl #16
587	bic	yh, yh, sl, lsl #16
588	mul	ip, xl, yl
589	mul	fp, xl, r8
590	mov	lr, #0
591	adds	ip, ip, fp, lsl #16
592	adc	lr, lr, fp, lsr #16
593	mul	fp, r7, yl
594	adds	ip, ip, fp, lsl #16
595	adc	lr, lr, fp, lsr #16
596	mul	fp, xl, sl
597	mov	r5, #0
598	adds	lr, lr, fp, lsl #16
599	adc	r5, r5, fp, lsr #16
600	mul	fp, r7, yh
601	adds	lr, lr, fp, lsl #16
602	adc	r5, r5, fp, lsr #16
603	mul	fp, xh, r8
604	adds	lr, lr, fp, lsl #16
605	adc	r5, r5, fp, lsr #16
606	mul	fp, r9, yl
607	adds	lr, lr, fp, lsl #16
608	adc	r5, r5, fp, lsr #16
609	mul	fp, xh, sl
610	mul	r6, r9, sl
611	adds	r5, r5, fp, lsl #16
612	adc	r6, r6, fp, lsr #16
613	mul	fp, r9, yh
614	adds	r5, r5, fp, lsl #16
615	adc	r6, r6, fp, lsr #16
616	mul	fp, xl, yh
617	adds	lr, lr, fp
618	mul	fp, r7, sl
619	adcs	r5, r5, fp
620	mul	fp, xh, yl
621	adc	r6, r6, #0
622	adds	lr, lr, fp
623	mul	fp, r9, r8
624	adcs	r5, r5, fp
625	mul	fp, r7, r8
626	adc	r6, r6, #0
627	adds	lr, lr, fp
628	mul	fp, xh, yh
629	adcs	r5, r5, fp
630	adc	r6, r6, #0
631	ldmfd	sp!, {yl, r7, r8, r9, sl, fp}
632
633#else
634
635	@ Here is the actual multiplication.
636	umull	ip, lr, xl, yl
637	mov	r5, #0
638	umlal	lr, r5, xh, yl
639	and	yl, r6, #0x80000000
640	umlal	lr, r5, xl, yh
641	mov	r6, #0
642	umlal	r5, r6, xh, yh
643
644#endif
645
646	@ The LSBs in ip are only significant for the final rounding.
647	@ Fold them into lr.
648	teq	ip, #0
649	orrne	lr, lr, #1
650
651	@ Adjust result upon the MSB position.
652	sub	r4, r4, #0xff
653	cmp	r6, #(1 << (20-11))
654	sbc	r4, r4, #0x300
655	bcs	1f
656	movs	lr, lr, lsl #1
657	adcs	r5, r5, r5
658	adc	r6, r6, r6
6591:
660	@ Shift to final position, add sign to result.
661	orr	xh, yl, r6, lsl #11
662	orr	xh, xh, r5, lsr #21
663	mov	xl, r5, lsl #11
664	orr	xl, xl, lr, lsr #21
665	mov	lr, lr, lsl #11
666
667	@ Check exponent range for under/overflow.
668	subs	ip, r4, #(254 - 1)
669	cmphi	ip, #0x700
670	bhi	LSYM(Lml_u)
671
672	@ Round the result, merge final exponent.
673	cmp	lr, #0x80000000
674	moveqs	lr, xl, lsr #1
675	adcs	xl, xl, #0
676	adc	xh, xh, r4, lsl #20
677	RETLDM	"r4, r5, r6"
678
679	@ Multiplication by 0x1p*: let''s shortcut a lot of code.
680LSYM(Lml_1):
681	and	r6, r6, #0x80000000
682	orr	xh, r6, xh
683	orr	xl, xl, yl
684	eor	xh, xh, yh
685	subs	r4, r4, ip, lsr #1
686	rsbgts	r5, r4, ip
687	orrgt	xh, xh, r4, lsl #20
688	RETLDM	"r4, r5, r6" gt
689
690	@ Under/overflow: fix things up for the code below.
691	orr	xh, xh, #0x00100000
692	mov	lr, #0
693	subs	r4, r4, #1
694
695LSYM(Lml_u):
696	@ Overflow?
697	bgt	LSYM(Lml_o)
698
699	@ Check if denormalized result is possible, otherwise return signed 0.
700	cmn	r4, #(53 + 1)
701	movle	xl, #0
702	bicle	xh, xh, #0x7fffffff
703	RETLDM	"r4, r5, r6" le
704
705	@ Find out proper shift value.
706	rsb	r4, r4, #0
707	subs	r4, r4, #32
708	bge	2f
709	adds	r4, r4, #12
710	bgt	1f
711
712	@ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
713	add	r4, r4, #20
714	rsb	r5, r4, #32
715	mov	r3, xl, lsl r5
716	mov	xl, xl, lsr r4
717	orr	xl, xl, xh, lsl r5
718	and	r2, xh, #0x80000000
719	bic	xh, xh, #0x80000000
720	adds	xl, xl, r3, lsr #31
721	adc	xh, r2, xh, lsr r4
722	orrs	lr, lr, r3, lsl #1
723	biceq	xl, xl, r3, lsr #31
724	RETLDM	"r4, r5, r6"
725
726	@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
727	@ a register switch from xh to xl. Then round.
7281:	rsb	r4, r4, #12
729	rsb	r5, r4, #32
730	mov	r3, xl, lsl r4
731	mov	xl, xl, lsr r5
732	orr	xl, xl, xh, lsl r4
733	bic	xh, xh, #0x7fffffff
734	adds	xl, xl, r3, lsr #31
735	adc	xh, xh, #0
736	orrs	lr, lr, r3, lsl #1
737	biceq	xl, xl, r3, lsr #31
738	RETLDM	"r4, r5, r6"
739
740	@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
741	@ from xh to xl.  Leftover bits are in r3-r6-lr for rounding.
7422:	rsb	r5, r4, #32
743	orr	lr, lr, xl, lsl r5
744	mov	r3, xl, lsr r4
745	orr	r3, r3, xh, lsl r5
746	mov	xl, xh, lsr r4
747	bic	xh, xh, #0x7fffffff
748	bic	xl, xl, xh, lsr r4
749	add	xl, xl, r3, lsr #31
750	orrs	lr, lr, r3, lsl #1
751	biceq	xl, xl, r3, lsr #31
752	RETLDM	"r4, r5, r6"
753
754	@ One or both arguments are denormalized.
755	@ Scale them leftwards and preserve sign bit.
756LSYM(Lml_d):
757	teq	r4, #0
758	bne	2f
759	and	r6, xh, #0x80000000
7601:	movs	xl, xl, lsl #1
761	adc	xh, xh, xh
762	tst	xh, #0x00100000
763	subeq	r4, r4, #1
764	beq	1b
765	orr	xh, xh, r6
766	teq	r5, #0
767	movne	pc, lr
7682:	and	r6, yh, #0x80000000
7693:	movs	yl, yl, lsl #1
770	adc	yh, yh, yh
771	tst	yh, #0x00100000
772	subeq	r5, r5, #1
773	beq	3b
774	orr	yh, yh, r6
775	mov	pc, lr
776
777LSYM(Lml_s):
778	@ Isolate the INF and NAN cases away
779	teq	r4, ip
780	and	r5, ip, yh, lsr #20
781	teqne	r5, ip
782	beq	1f
783
784	@ Here, one or more arguments are either denormalized or zero.
785	orrs	r6, xl, xh, lsl #1
786	orrnes	r6, yl, yh, lsl #1
787	bne	LSYM(Lml_d)
788
789	@ Result is 0, but determine sign anyway.
790LSYM(Lml_z):
791	eor	xh, xh, yh
792	bic	xh, xh, #0x7fffffff
793	mov	xl, #0
794	RETLDM	"r4, r5, r6"
795
7961:	@ One or both args are INF or NAN.
797	orrs	r6, xl, xh, lsl #1
798	moveq	xl, yl
799	moveq	xh, yh
800	orrnes	r6, yl, yh, lsl #1
801	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
802	teq	r4, ip
803	bne	1f
804	orrs	r6, xl, xh, lsl #12
805	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
8061:	teq	r5, ip
807	bne	LSYM(Lml_i)
808	orrs	r6, yl, yh, lsl #12
809	movne	xl, yl
810	movne	xh, yh
811	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
812
813	@ Result is INF, but we need to determine its sign.
814LSYM(Lml_i):
815	eor	xh, xh, yh
816
817	@ Overflow: return INF (sign already in xh).
818LSYM(Lml_o):
819	and	xh, xh, #0x80000000
820	orr	xh, xh, #0x7f000000
821	orr	xh, xh, #0x00f00000
822	mov	xl, #0
823	RETLDM	"r4, r5, r6"
824
825	@ Return a quiet NAN.
826LSYM(Lml_n):
827	orr	xh, xh, #0x7f000000
828	orr	xh, xh, #0x00f80000
829	RETLDM	"r4, r5, r6"
830
831	FUNC_END aeabi_dmul
832	FUNC_END muldf3
833
834ARM_FUNC_START divdf3
835ARM_FUNC_ALIAS aeabi_ddiv divdf3
836
837	stmfd	sp!, {r4, r5, r6, lr}
838
839	@ Mask out exponents, trap any zero/denormal/INF/NAN.
840	mov	ip, #0xff
841	orr	ip, ip, #0x700
842	ands	r4, ip, xh, lsr #20
843	andnes	r5, ip, yh, lsr #20
844	teqne	r4, ip
845	teqne	r5, ip
846	bleq	LSYM(Ldv_s)
847
848	@ Substract divisor exponent from dividend''s.
849	sub	r4, r4, r5
850
851	@ Preserve final sign into lr.
852	eor	lr, xh, yh
853
854	@ Convert mantissa to unsigned integer.
855	@ Dividend -> r5-r6, divisor -> yh-yl.
856	orrs	r5, yl, yh, lsl #12
857	mov	xh, xh, lsl #12
858	beq	LSYM(Ldv_1)
859	mov	yh, yh, lsl #12
860	mov	r5, #0x10000000
861	orr	yh, r5, yh, lsr #4
862	orr	yh, yh, yl, lsr #24
863	mov	yl, yl, lsl #8
864	orr	r5, r5, xh, lsr #4
865	orr	r5, r5, xl, lsr #24
866	mov	r6, xl, lsl #8
867
868	@ Initialize xh with final sign bit.
869	and	xh, lr, #0x80000000
870
871	@ Ensure result will land to known bit position.
872	@ Apply exponent bias accordingly.
873	cmp	r5, yh
874	cmpeq	r6, yl
875	adc	r4, r4, #(255 - 2)
876	add	r4, r4, #0x300
877	bcs	1f
878	movs	yh, yh, lsr #1
879	mov	yl, yl, rrx
8801:
881	@ Perform first substraction to align result to a nibble.
882	subs	r6, r6, yl
883	sbc	r5, r5, yh
884	movs	yh, yh, lsr #1
885	mov	yl, yl, rrx
886	mov	xl, #0x00100000
887	mov	ip, #0x00080000
888
889	@ The actual division loop.
8901:	subs	lr, r6, yl
891	sbcs	lr, r5, yh
892	subcs	r6, r6, yl
893	movcs	r5, lr
894	orrcs	xl, xl, ip
895	movs	yh, yh, lsr #1
896	mov	yl, yl, rrx
897	subs	lr, r6, yl
898	sbcs	lr, r5, yh
899	subcs	r6, r6, yl
900	movcs	r5, lr
901	orrcs	xl, xl, ip, lsr #1
902	movs	yh, yh, lsr #1
903	mov	yl, yl, rrx
904	subs	lr, r6, yl
905	sbcs	lr, r5, yh
906	subcs	r6, r6, yl
907	movcs	r5, lr
908	orrcs	xl, xl, ip, lsr #2
909	movs	yh, yh, lsr #1
910	mov	yl, yl, rrx
911	subs	lr, r6, yl
912	sbcs	lr, r5, yh
913	subcs	r6, r6, yl
914	movcs	r5, lr
915	orrcs	xl, xl, ip, lsr #3
916
917	orrs	lr, r5, r6
918	beq	2f
919	mov	r5, r5, lsl #4
920	orr	r5, r5, r6, lsr #28
921	mov	r6, r6, lsl #4
922	mov	yh, yh, lsl #3
923	orr	yh, yh, yl, lsr #29
924	mov	yl, yl, lsl #3
925	movs	ip, ip, lsr #4
926	bne	1b
927
928	@ We are done with a word of the result.
929	@ Loop again for the low word if this pass was for the high word.
930	tst	xh, #0x00100000
931	bne	3f
932	orr	xh, xh, xl
933	mov	xl, #0
934	mov	ip, #0x80000000
935	b	1b
9362:
937	@ Be sure result starts in the high word.
938	tst	xh, #0x00100000
939	orreq	xh, xh, xl
940	moveq	xl, #0
9413:
942	@ Check exponent range for under/overflow.
943	subs	ip, r4, #(254 - 1)
944	cmphi	ip, #0x700
945	bhi	LSYM(Lml_u)
946
947	@ Round the result, merge final exponent.
948	subs	ip, r5, yh
949	subeqs	ip, r6, yl
950	moveqs	ip, xl, lsr #1
951	adcs	xl, xl, #0
952	adc	xh, xh, r4, lsl #20
953	RETLDM	"r4, r5, r6"
954
955	@ Division by 0x1p*: shortcut a lot of code.
956LSYM(Ldv_1):
957	and	lr, lr, #0x80000000
958	orr	xh, lr, xh, lsr #12
959	adds	r4, r4, ip, lsr #1
960	rsbgts	r5, r4, ip
961	orrgt	xh, xh, r4, lsl #20
962	RETLDM	"r4, r5, r6" gt
963
964	orr	xh, xh, #0x00100000
965	mov	lr, #0
966	subs	r4, r4, #1
967	b	LSYM(Lml_u)
968
969	@ Result mightt need to be denormalized: put remainder bits
970	@ in lr for rounding considerations.
971LSYM(Ldv_u):
972	orr	lr, r5, r6
973	b	LSYM(Lml_u)
974
975	@ One or both arguments is either INF, NAN or zero.
976LSYM(Ldv_s):
977	and	r5, ip, yh, lsr #20
978	teq	r4, ip
979	teqeq	r5, ip
980	beq	LSYM(Lml_n)		@ INF/NAN / INF/NAN -> NAN
981	teq	r4, ip
982	bne	1f
983	orrs	r4, xl, xh, lsl #12
984	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
985	teq	r5, ip
986	bne	LSYM(Lml_i)		@ INF / <anything> -> INF
987	mov	xl, yl
988	mov	xh, yh
989	b	LSYM(Lml_n)		@ INF / (INF or NAN) -> NAN
9901:	teq	r5, ip
991	bne	2f
992	orrs	r5, yl, yh, lsl #12
993	beq	LSYM(Lml_z)		@ <anything> / INF -> 0
994	mov	xl, yl
995	mov	xh, yh
996	b	LSYM(Lml_n)		@ <anything> / NAN -> NAN
9972:	@ If both are nonzero, we need to normalize and resume above.
998	orrs	r6, xl, xh, lsl #1
999	orrnes	r6, yl, yh, lsl #1
1000	bne	LSYM(Lml_d)
1001	@ One or both arguments are 0.
1002	orrs	r4, xl, xh, lsl #1
1003	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
1004	orrs	r5, yl, yh, lsl #1
1005	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
1006	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
1007
1008	FUNC_END aeabi_ddiv
1009	FUNC_END divdf3
1010
1011#endif /* L_muldivdf3 */
1012
1013#ifdef L_cmpdf2
1014
1015@ Note: only r0 (return value) and ip are clobbered here.
1016
1017ARM_FUNC_START gtdf2
1018ARM_FUNC_ALIAS gedf2 gtdf2
1019	mov	ip, #-1
1020	b	1f
1021
1022ARM_FUNC_START ltdf2
1023ARM_FUNC_ALIAS ledf2 ltdf2
1024	mov	ip, #1
1025	b	1f
1026
1027ARM_FUNC_START cmpdf2
1028ARM_FUNC_ALIAS nedf2 cmpdf2
1029ARM_FUNC_ALIAS eqdf2 cmpdf2
1030	mov	ip, #1			@ how should we specify unordered here?
1031
10321:	str	ip, [sp, #-4]
1033
1034	@ Trap any INF/NAN first.
1035	mov	ip, xh, lsl #1
1036	mvns	ip, ip, asr #21
1037	mov	ip, yh, lsl #1
1038	mvnnes	ip, ip, asr #21
1039	beq	3f
1040
1041	@ Test for equality.
1042	@ Note that 0.0 is equal to -0.0.
10432:	orrs	ip, xl, xh, lsl #1	@ if x == 0.0 or -0.0
1044	orreqs	ip, yl, yh, lsl #1	@ and y == 0.0 or -0.0
1045	teqne	xh, yh			@ or xh == yh
1046	teqeq	xl, yl			@ and xl == yl
1047	moveq	r0, #0			@ then equal.
1048	RETc(eq)
1049
1050	@ Clear C flag
1051	cmn	r0, #0
1052
1053	@ Compare sign,
1054	teq	xh, yh
1055
1056	@ Compare values if same sign
1057	cmppl	xh, yh
1058	cmpeq	xl, yl
1059
1060	@ Result:
1061	movcs	r0, yh, asr #31
1062	mvncc	r0, yh, asr #31
1063	orr	r0, r0, #1
1064	RET
1065
1066	@ Look for a NAN.
10673:	mov	ip, xh, lsl #1
1068	mvns	ip, ip, asr #21
1069	bne	4f
1070	orrs	ip, xl, xh, lsl #12
1071	bne	5f			@ x is NAN
10724:	mov	ip, yh, lsl #1
1073	mvns	ip, ip, asr #21
1074	bne	2b
1075	orrs	ip, yl, yh, lsl #12
1076	beq	2b			@ y is not NAN
10775:	ldr	r0, [sp, #-4]		@ unordered return code
1078	RET
1079
1080	FUNC_END gedf2
1081	FUNC_END gtdf2
1082	FUNC_END ledf2
1083	FUNC_END ltdf2
1084	FUNC_END nedf2
1085	FUNC_END eqdf2
1086	FUNC_END cmpdf2
1087
1088ARM_FUNC_START aeabi_cdrcmple
1089
1090	mov	ip, r0
1091	mov	r0, r2
1092	mov	r2, ip
1093	mov	ip, r1
1094	mov	r1, r3
1095	mov	r3, ip
1096	b	6f
1097
1098ARM_FUNC_START aeabi_cdcmpeq
1099ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
1100
1101	@ The status-returning routines are required to preserve all
1102	@ registers except ip, lr, and cpsr.
11036:	stmfd	sp!, {r0, lr}
1104	ARM_CALL cmpdf2
1105	@ Set the Z flag correctly, and the C flag unconditionally.
1106	cmp	 r0, #0
1107	@ Clear the C flag if the return value was -1, indicating
1108	@ that the first operand was smaller than the second.
1109	cmnmi	 r0, #0
1110	RETLDM   "r0"
1111
1112	FUNC_END aeabi_cdcmple
1113	FUNC_END aeabi_cdcmpeq
1114	FUNC_END aeabi_cdrcmple
1115
1116ARM_FUNC_START	aeabi_dcmpeq
1117
1118	str	lr, [sp, #-8]!
1119	ARM_CALL aeabi_cdcmple
1120	moveq	r0, #1	@ Equal to.
1121	movne	r0, #0	@ Less than, greater than, or unordered.
1122	RETLDM
1123
1124	FUNC_END aeabi_dcmpeq
1125
1126ARM_FUNC_START	aeabi_dcmplt
1127
1128	str	lr, [sp, #-8]!
1129	ARM_CALL aeabi_cdcmple
1130	movcc	r0, #1	@ Less than.
1131	movcs	r0, #0	@ Equal to, greater than, or unordered.
1132	RETLDM
1133
1134	FUNC_END aeabi_dcmplt
1135
1136ARM_FUNC_START	aeabi_dcmple
1137
1138	str	lr, [sp, #-8]!
1139	ARM_CALL aeabi_cdcmple
1140	movls	r0, #1  @ Less than or equal to.
1141	movhi	r0, #0	@ Greater than or unordered.
1142	RETLDM
1143
1144	FUNC_END aeabi_dcmple
1145
1146ARM_FUNC_START	aeabi_dcmpge
1147
1148	str	lr, [sp, #-8]!
1149	ARM_CALL aeabi_cdrcmple
1150	movls	r0, #1	@ Operand 2 is less than or equal to operand 1.
1151	movhi	r0, #0	@ Operand 2 greater than operand 1, or unordered.
1152	RETLDM
1153
1154	FUNC_END aeabi_dcmpge
1155
1156ARM_FUNC_START	aeabi_dcmpgt
1157
1158	str	lr, [sp, #-8]!
1159	ARM_CALL aeabi_cdrcmple
1160	movcc	r0, #1	@ Operand 2 is less than operand 1.
1161	movcs	r0, #0  @ Operand 2 is greater than or equal to operand 1,
1162			@ or they are unordered.
1163	RETLDM
1164
1165	FUNC_END aeabi_dcmpgt
1166
1167#endif /* L_cmpdf2 */
1168
1169#ifdef L_unorddf2
1170
1171ARM_FUNC_START unorddf2
1172ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
1173
1174	mov	ip, xh, lsl #1
1175	mvns	ip, ip, asr #21
1176	bne	1f
1177	orrs	ip, xl, xh, lsl #12
1178	bne	3f			@ x is NAN
11791:	mov	ip, yh, lsl #1
1180	mvns	ip, ip, asr #21
1181	bne	2f
1182	orrs	ip, yl, yh, lsl #12
1183	bne	3f			@ y is NAN
11842:	mov	r0, #0			@ arguments are ordered.
1185	RET
1186
11873:	mov	r0, #1			@ arguments are unordered.
1188	RET
1189
1190	FUNC_END aeabi_dcmpun
1191	FUNC_END unorddf2
1192
1193#endif /* L_unorddf2 */
1194
1195#ifdef L_fixdfsi
1196
1197ARM_FUNC_START fixdfsi
1198ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
1199
1200	@ check exponent range.
1201	mov	r2, xh, lsl #1
1202	adds	r2, r2, #(1 << 21)
1203	bcs	2f			@ value is INF or NAN
1204	bpl	1f			@ value is too small
1205	mov	r3, #(0xfffffc00 + 31)
1206	subs	r2, r3, r2, asr #21
1207	bls	3f			@ value is too large
1208
1209	@ scale value
1210	mov	r3, xh, lsl #11
1211	orr	r3, r3, #0x80000000
1212	orr	r3, r3, xl, lsr #21
1213	tst	xh, #0x80000000		@ the sign bit
1214	mov	r0, r3, lsr r2
1215	rsbne	r0, r0, #0
1216	RET
1217
12181:	mov	r0, #0
1219	RET
1220
12212:	orrs	xl, xl, xh, lsl #12
1222	bne	4f			@ x is NAN.
12233:	ands	r0, xh, #0x80000000	@ the sign bit
1224	moveq	r0, #0x7fffffff		@ maximum signed positive si
1225	RET
1226
12274:	mov	r0, #0			@ How should we convert NAN?
1228	RET
1229
1230	FUNC_END aeabi_d2iz
1231	FUNC_END fixdfsi
1232
1233#endif /* L_fixdfsi */
1234
1235#ifdef L_fixunsdfsi
1236
1237ARM_FUNC_START fixunsdfsi
1238ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
1239
1240	@ check exponent range.
1241	movs	r2, xh, lsl #1
1242	bcs	1f			@ value is negative
1243	adds	r2, r2, #(1 << 21)
1244	bcs	2f			@ value is INF or NAN
1245	bpl	1f			@ value is too small
1246	mov	r3, #(0xfffffc00 + 31)
1247	subs	r2, r3, r2, asr #21
1248	bmi	3f			@ value is too large
1249
1250	@ scale value
1251	mov	r3, xh, lsl #11
1252	orr	r3, r3, #0x80000000
1253	orr	r3, r3, xl, lsr #21
1254	mov	r0, r3, lsr r2
1255	RET
1256
12571:	mov	r0, #0
1258	RET
1259
12602:	orrs	xl, xl, xh, lsl #12
1261	bne	4f			@ value is NAN.
12623:	mov	r0, #0xffffffff		@ maximum unsigned si
1263	RET
1264
12654:	mov	r0, #0			@ How should we convert NAN?
1266	RET
1267
1268	FUNC_END aeabi_d2uiz
1269	FUNC_END fixunsdfsi
1270
1271#endif /* L_fixunsdfsi */
1272
1273#ifdef L_truncdfsf2
1274
1275ARM_FUNC_START truncdfsf2
1276ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
1277
1278	@ check exponent range.
1279	mov	r2, xh, lsl #1
1280	subs	r3, r2, #((1023 - 127) << 21)
1281	subcss	ip, r3, #(1 << 21)
1282	rsbcss	ip, ip, #(254 << 21)
1283	bls	2f			@ value is out of range
1284
12851:	@ shift and round mantissa
1286	and	ip, xh, #0x80000000
1287	mov	r2, xl, lsl #3
1288	orr	xl, ip, xl, lsr #29
1289	cmp	r2, #0x80000000
1290	adc	r0, xl, r3, lsl #2
1291	biceq	r0, r0, #1
1292	RET
1293
12942:	@ either overflow or underflow
1295	tst	xh, #0x40000000
1296	bne	3f			@ overflow
1297
1298	@ check if denormalized value is possible
1299	adds	r2, r3, #(23 << 21)
1300	andlt	r0, xh, #0x80000000	@ too small, return signed 0.
1301	RETc(lt)
1302
1303	@ denormalize value so we can resume with the code above afterwards.
1304	orr	xh, xh, #0x00100000
1305	mov	r2, r2, lsr #21
1306	rsb	r2, r2, #24
1307	rsb	ip, r2, #32
1308	movs	r3, xl, lsl ip
1309	mov	xl, xl, lsr r2
1310	orrne	xl, xl, #1		@ fold r3 for rounding considerations.
1311	mov	r3, xh, lsl #11
1312	mov	r3, r3, lsr #11
1313	orr	xl, xl, r3, lsl ip
1314	mov	r3, r3, lsr r2
1315	mov	r3, r3, lsl #1
1316	b	1b
1317
13183:	@ chech for NAN
1319	mvns	r3, r2, asr #21
1320	bne	5f			@ simple overflow
1321	orrs	r3, xl, xh, lsl #12
1322	movne	r0, #0x7f000000
1323	orrne	r0, r0, #0x00c00000
1324	RETc(ne)			@ return NAN
1325
13265:	@ return INF with sign
1327	and	r0, xh, #0x80000000
1328	orr	r0, r0, #0x7f000000
1329	orr	r0, r0, #0x00800000
1330	RET
1331
1332	FUNC_END aeabi_d2f
1333	FUNC_END truncdfsf2
1334
1335#endif /* L_truncdfsf2 */
1336