1/* Copyright (C) 2008-2020 Free Software Foundation, Inc.
2   Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
3		on behalf of Synopsys Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24<http://www.gnu.org/licenses/>.  */
25
26/*
27   - calculate 15..18 bit inverse using a table of approximating polynoms.
28     precision is higher for polynoms used to evaluate input with larger
29     value.
30   - do one newton-raphson iteration step to double the precision,
31     then multiply this with the divisor
32	-> more time to decide if dividend is subnormal
33     - the worst error propagation is on the side of the value range
34       with the least initial defect, thus giving us about 30 bits precision.
35 */
36#include "../arc-ieee-754.h"
37#define mlo acc2
38#define mhi acc1
39#define mul64(b,c) mullw 0,b,c` machlw 0,b,c
40#define mulu64(b,c) mululw 0,b,c` machulw 0,b,c
41
42#if 0 /* DEBUG */
43	.global __divsf3
44	FUNC(__divsf3)
45	.balign 4
46__divsf3:
47	push_s blink
48	push_s r1
49	bl.d __divsf3_c
50	push_s r0
51	ld_s r1,[sp,4]
52	st_s r0,[sp,4]
53	bl.d __divsf3_asm
54	pop_s r0
55	pop_s r1
56	pop_s blink
57	cmp r0,r1
58#if 1
59	bne abort
60	jeq_s [blink]
61	b abort
62#else
63	bne abort
64	j_s [blink]
65#endif
66	ENDFUNC(__divsf3)
67#define __divsf3 __divsf3_asm
68#endif /* DEBUG */
69
70	FUNC(__divsf3)
71	.balign 4
72.Ldivtab:
73	.long 0xfc0ffff0
74	.long 0xf46ffefd
75	.long 0xed1ffd2a
76	.long 0xe627fa8e
77	.long 0xdf7ff73b
78	.long 0xd917f33b
79	.long 0xd2f7eea3
80	.long 0xcd1fe986
81	.long 0xc77fe3e7
82	.long 0xc21fdddb
83	.long 0xbcefd760
84	.long 0xb7f7d08c
85	.long 0xb32fc960
86	.long 0xae97c1ea
87	.long 0xaa27ba26
88	.long 0xa5e7b22e
89	.long 0xa1cfa9fe
90	.long 0x9ddfa1a0
91	.long 0x9a0f990c
92	.long 0x9667905d
93	.long 0x92df878a
94	.long 0x8f6f7e84
95	.long 0x8c27757e
96	.long 0x88f76c54
97	.long 0x85df630c
98	.long 0x82e759c5
99	.long 0x8007506d
100	.long 0x7d3f470a
101	.long 0x7a8f3da2
102	.long 0x77ef341e
103	.long 0x756f2abe
104	.long 0x72f7212d
105	.long 0x709717ad
106	.long 0x6e4f0e44
107	.long 0x6c1704d6
108	.long 0x69e6fb44
109	.long 0x67cef1d7
110	.long 0x65c6e872
111	.long 0x63cedf18
112	.long 0x61e6d5cd
113	.long 0x6006cc6d
114	.long 0x5e36c323
115	.long 0x5c76b9f3
116	.long 0x5abeb0b7
117	.long 0x5916a79b
118	.long 0x57769e77
119	.long 0x55de954d
120	.long 0x54568c4e
121	.long 0x52d6834d
122	.long 0x51667a7f
123	.long 0x4ffe71b5
124	.long 0x4e9e68f1
125	.long 0x4d466035
126	.long 0x4bf65784
127	.long 0x4aae4ede
128	.long 0x496e4646
129	.long 0x48363dbd
130	.long 0x47063547
131	.long 0x45de2ce5
132	.long 0x44be2498
133	.long 0x43a61c64
134	.long 0x4296144a
135	.long 0x41860c0e
136	.long 0x407e03ee
137.L7f800000:
138	.long 0x7f800000
139	.balign 4
140	.global __divsf3_support
141__divsf3_support:
142.Linf_NaN:
143	bclr.f 0,r0,31 ; 0/0 -> NaN
144	xor_s r0,r0,r1
145	bmsk r1,r0,30
146	bic_s r0,r0,r1
147	sub.eq r0,r0,1
148	j_s.d [blink]
149	or r0,r0,r9
150.Lret0:
151	xor_s r0,r0,r1
152	bmsk r1,r0,30
153	j_s.d [blink]
154	bic_s r0,r0,r1
155/* N.B. the spacing between divtab and the sub3 to get its address must
156   be a multiple of 8.  */
157__divsf3:
158	ld.as r9,[pcl,-9]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
159	sub3 r3,pcl,37;(.-.Ldivtab) >> 3
160	lsr r2,r1,17
161	and.f r11,r1,r9
162	bmsk r5,r2,5
163	beq.d .Ldenorm_fp1
164	asl r6,r1,8
165	and.f r2,r0,r9
166	ld.as r5,[r3,r5]
167	asl r4,r1,9
168	bset r6,r6,31
169	breq.d r11,r9,.Linf_nan_fp1
170.Lpast_denorm_fp1:
171	mululw 0,r5,r4
172	machulw r8,r5,r4
173	breq.d r2,r9,.Linf_nan_fp0
174	asl r5,r5,13
175	sub r7,r5,r8
176	mululw 0,r7,r6
177	machulw r8,r7,r6
178	beq.d .Ldenorm_fp0
179	asl r12,r0,8
180	mulu64 (r8,r7)
181	bset r3,r12,31
182.Lpast_denorm_fp0:
183	cmp_s r3,r6
184	lsr.cc r3,r3,1
185	add_s r2,r2, /* wait for immediate */ \
186		0x3f000000
187	sub r7,r7,mhi ; u1.31 inverse, about 30 bit
188	mulu64 (r3,r7)
189	sbc r2,r2,r11
190	xor.f 0,r0,r1
191	and r0,r2,r9
192	bclr r3,r9,23 ; 0x7f000000
193	brhs.d r2,r3,.Linf_denorm
194	bxor.mi r0,r0,31
195.Lpast_denorm:
196	add r3,mhi,0x22 ; round to nearest or higher
197	tst r3,0x3c ; check if rounding was unsafe
198	lsr r3,r3,6
199	jne.d [blink] ; return if rounding was safe.
200	add_s r0,r0,r3
201        /* work out exact rounding if we fall through here.  */
202        /* We know that the exact result cannot be represented in single
203           precision.  Find the mid-point between the two nearest
204           representable values, multiply with the divisor, and check if
205           the result is larger than the dividend.  */
206        add_s r3,r3,r3
207        sub_s r3,r3,1
208        mulu64 (r3,r6)
209	asr.f 0,r0,1 ; for round-to-even in case this is a denorm
210	rsub r2,r9,25
211        asl_s r12,r12,r2
212        sub.f 0,r12,mlo
213        j_s.d [blink]
214        sub.mi r0,r0,1
215.Linf_nan_fp1:
216	lsr_s r0,r0,31
217	bmsk.f 0,r1,22
218	asl_s r0,r0,31
219	bne_s 0f ; inf/inf -> nan
220	brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan
2210:	j_s.d [blink]
222	mov r0,-1
223.Lsigned0:
224.Linf_nan_fp0:
225	tst_s r1,r1
226	j_s.d [blink]
227	bxor.mi r0,r0,31
228	.balign 4
229	.global __divsf3
230/* For denormal results, it is possible that an exact result needs
231   rounding, and thus the round-to-even rule has to come into play.  */
232.Linf_denorm:
233	brlo r2,0xc0000000,.Linf
234.Ldenorm:
235	asr_s r2,r2,23
236	bic r0,r0,r9
237	neg r9,r2
238	brlo.d r9,25,.Lpast_denorm
239	lsr r3,mlo,r9
240	/* Fall through: return +- 0 */
241	j_s [blink]
242.Linf:
243	j_s.d [blink]
244	or r0,r0,r9
245	.balign 4
246.Ldenorm_fp1:
247	norm.f r12,r6 ; flag for x/0 -> Inf check
248	add r6,r6,r6
249	rsub r5,r12,16
250	ror r5,r1,r5
251	bmsk r5,r5,5
252	bic.ne.f 0, \
253		0x60000000,r0 ; large number / denorm -> Inf
254	ld.as r5,[r3,r5]
255	asl r6,r6,r12
256	beq.d .Linf_NaN
257	and.f r2,r0,r9
258	add r4,r6,r6
259	asl_s r12,r12,23
260	bne.d .Lpast_denorm_fp1
261	add_s r2,r2,r12
262.Ldenorm_fp0:
263	mulu64 (r8,r7)
264	bclr r12,r12,31
265	norm.f r3,r12 ; flag for 0/x -> 0 check
266	bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
267	beq_s .Lret0
268	asl_s r12,r12,r3
269	asl_s r3,r3,23
270	add_s r12,r12,r12
271	add r11,r11,r3
272	b.d .Lpast_denorm_fp0
273	mov_s r3,r12
274	ENDFUNC(__divsf3)
275