1//===----------------------Hexagon builtin routine ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// Double Precision Divide
10
11#define A r1:0
12#define AH r1
13#define AL r0
14
15#define B r3:2
16#define BH r3
17#define BL r2
18
19#define Q r5:4
20#define QH r5
21#define QL r4
22
23#define PROD r7:6
24#define PRODHI r7
25#define PRODLO r6
26
27#define SFONE r8
28#define SFDEN r9
29#define SFERROR r10
30#define SFRECIP r11
31
32#define EXPBA r13:12
33#define EXPB r13
34#define EXPA r12
35
36#define REMSUB2 r15:14
37
38
39
40#define SIGN r28
41
42#define Q_POSITIVE p3
43#define NORMAL p2
44#define NO_OVF_UNF p1
45#define P_TMP p0
46
47#define RECIPEST_SHIFT 3
48#define QADJ 61
49
50#define DFCLASS_NORMAL 0x02
51#define DFCLASS_NUMBER 0x0F
52#define DFCLASS_INFINITE 0x08
53#define DFCLASS_ZERO 0x01
54#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
55#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
56
57#define DF_MANTBITS 52
58#define DF_EXPBITS 11
59#define SF_MANTBITS 23
60#define SF_EXPBITS 8
61#define DF_BIAS 0x3ff
62
63#define SR_ROUND_OFF 22
64
65#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
66#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
67#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
68#define END(TAG) .size TAG,.-TAG
69
70	.text
71	.global __hexagon_divdf3
72	.type __hexagon_divdf3,@function
73	Q6_ALIAS(divdf3)
74        FAST_ALIAS(divdf3)
75        FAST2_ALIAS(divdf3)
76	.p2align 5
77__hexagon_divdf3:
78	{
79		NORMAL = dfclass(A,#DFCLASS_NORMAL)
80		NORMAL = dfclass(B,#DFCLASS_NORMAL)
81		EXPBA = combine(BH,AH)
82		SIGN = xor(AH,BH)
83	}
84#undef A
85#undef AH
86#undef AL
87#undef B
88#undef BH
89#undef BL
90#define REM r1:0
91#define REMHI r1
92#define REMLO r0
93#define DENOM r3:2
94#define DENOMHI r3
95#define DENOMLO r2
96	{
97		if (!NORMAL) jump .Ldiv_abnormal
98		PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
99		SFONE = ##0x3f800001
100	}
101	{
102		SFDEN = or(SFONE,PRODLO)
103		EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
104		EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
105		Q_POSITIVE = cmp.gt(SIGN,#-1)
106	}
107#undef SIGN
108#define ONE r28
109.Ldenorm_continue:
110	{
111		SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
112		SFERROR = and(SFONE,#-2)
113		ONE = #1
114		EXPA = sub(EXPA,EXPB)
115	}
116#undef EXPB
117#define RECIPEST r13
118	{
119		SFERROR -= sfmpy(SFRECIP,SFDEN):lib
120		REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
121		RECIPEST = ##0x00800000 << RECIPEST_SHIFT
122	}
123	{
124		SFRECIP += sfmpy(SFRECIP,SFERROR):lib
125		DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
126		SFERROR = and(SFONE,#-2)
127	}
128	{
129		SFERROR -= sfmpy(SFRECIP,SFDEN):lib
130		QH = #-DF_BIAS+1
131		QL = #DF_BIAS-1
132	}
133	{
134		SFRECIP += sfmpy(SFRECIP,SFERROR):lib
135		NO_OVF_UNF = cmp.gt(EXPA,QH)
136		NO_OVF_UNF = !cmp.gt(EXPA,QL)
137	}
138	{
139		RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
140		Q = #0
141		EXPA = add(EXPA,#-QADJ)
142	}
143#undef SFERROR
144#undef SFRECIP
145#define TMP r10
146#define TMP1 r11
147	{
148		RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
149	}
150
151#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
152	{ \
153		PROD = mpyu(RECIPEST,REMHI); \
154		REM = asl(REM,# ## ( REMSHIFT )); \
155	}; \
156	{ \
157		PRODLO = # ## 0; \
158		REM -= mpyu(PRODHI,DENOMLO); \
159		REMSUB2 = mpyu(PRODHI,DENOMHI); \
160	}; \
161	{ \
162		Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
163		REM -= asl(REMSUB2, # ## 32); \
164		EXTRA \
165	}
166
167
168	DIV_ITER1B(ASL,14,15,)
169	DIV_ITER1B(ASR,1,15,)
170	DIV_ITER1B(ASR,16,15,)
171	DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
172
173#undef REMSUB2
174#define TMPPAIR r15:14
175#define TMPPAIRHI r15
176#define TMPPAIRLO r14
177#undef RECIPEST
178#define EXPB r13
179	{
180		// compare or sub with carry
181		TMPPAIR = sub(REM,DENOM)
182		P_TMP = cmp.gtu(DENOM,REM)
183		// set up amt to add to q
184		if (!P_TMP.new) PRODLO  = #2
185	}
186	{
187		Q = add(Q,PROD)
188		if (!P_TMP) REM = TMPPAIR
189		TMPPAIR = #0
190	}
191	{
192		P_TMP = cmp.eq(REM,TMPPAIR)
193		if (!P_TMP.new) QL = or(QL,ONE)
194	}
195	{
196		PROD = neg(Q)
197	}
198	{
199		if (!Q_POSITIVE) Q = PROD
200	}
201#undef REM
202#undef REMHI
203#undef REMLO
204#undef DENOM
205#undef DENOMLO
206#undef DENOMHI
207#define A r1:0
208#define AH r1
209#define AL r0
210#define B r3:2
211#define BH r3
212#define BL r2
213	{
214		A = convert_d2df(Q)
215		if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
216	}
217	{
218		AH += asl(EXPA,#DF_MANTBITS-32)
219		jumpr r31
220	}
221
222.Ldiv_ovf_unf:
223	{
224		AH += asl(EXPA,#DF_MANTBITS-32)
225		EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
226	}
227	{
228		PROD = abs(Q)
229		EXPA = add(EXPA,EXPB)
230	}
231	{
232		P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS)		// overflow
233		if (P_TMP.new) jump:nt .Ldiv_ovf
234	}
235	{
236		P_TMP = cmp.gt(EXPA,#0)
237		if (P_TMP.new) jump:nt .Lpossible_unf		// round up to normal possible...
238	}
239	// Underflow
240	// We know what the infinite range exponent should be (EXPA)
241	// Q is 2's complement, PROD is abs(Q)
242	// Normalize Q, shift right, add a high bit, convert, change exponent
243
244#define FUDGE1 7	// how much to shift right
245#define FUDGE2 4	// how many guard/round to keep at lsbs
246
247	{
248		EXPB = add(clb(PROD),#-1)			// doesn't need to be added in since
249		EXPA = sub(#FUDGE1,EXPA)			// we extract post-converted exponent
250		TMP = USR
251		TMP1 = #63
252	}
253	{
254		EXPB = min(EXPA,TMP1)
255		TMP1 = or(TMP,#0x030)
256		PROD = asl(PROD,EXPB)
257		EXPA = #0
258	}
259	{
260		TMPPAIR = extractu(PROD,EXPBA)				// bits that will get shifted out
261		PROD = lsr(PROD,EXPB)					// shift out bits
262		B = #1
263	}
264	{
265		P_TMP = cmp.gtu(B,TMPPAIR)
266		if (!P_TMP.new) PRODLO = or(BL,PRODLO)
267		PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
268	}
269	{
270		Q = neg(PROD)
271		P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
272		if (!P_TMP.new) TMP = TMP1
273	}
274	{
275		USR = TMP
276		if (Q_POSITIVE) Q = PROD
277		TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
278	}
279	{
280		A = convert_d2df(Q)
281	}
282	{
283		AH += asl(TMP,#DF_MANTBITS-32)
284		jumpr r31
285	}
286
287
288.Lpossible_unf:
289	// If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal
290	// The answer is correct, but we need to raise Underflow
291	{
292		B = extractu(A,#63,#0)
293		TMPPAIR = combine(##0x00100000,#0)		// min normal
294		TMP = #0x7FFF
295	}
296	{
297		P_TMP = dfcmp.eq(TMPPAIR,B)		// Is everything zero in the rounded value...
298		P_TMP = bitsset(PRODHI,TMP)		// but a bunch of bits set in the unrounded abs(quotient)?
299	}
300
301#if (__HEXAGON_ARCH__ == 60)
302		TMP = USR		// If not, just return
303		if (!P_TMP) jumpr r31   // Else, we want to set Unf+Inexact
304					// Note that inexact is already set...
305#else
306	{
307		if (!P_TMP) jumpr r31			// If not, just return
308		TMP = USR				// Else, we want to set Unf+Inexact
309	}						// Note that inexact is already set...
310#endif
311	{
312		TMP = or(TMP,#0x30)
313	}
314	{
315		USR = TMP
316	}
317	{
318		p0 = dfcmp.eq(A,A)
319		jumpr r31
320	}
321
322.Ldiv_ovf:
323
324	// Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
325
326	{
327		TMP = USR
328		B = combine(##0x7fefffff,#-1)
329		AH = mux(Q_POSITIVE,#0,#-1)
330	}
331	{
332		PROD = combine(##0x7ff00000,#0)
333		QH = extractu(TMP,#2,#SR_ROUND_OFF)
334		TMP = or(TMP,#0x28)
335	}
336	{
337		USR = TMP
338		QH ^= lsr(AH,#31)
339		QL = QH
340	}
341	{
342		p0 = !cmp.eq(QL,#1)		// if not round-to-zero
343		p0 = !cmp.eq(QH,#2)		// and not rounding the other way
344		if (p0.new) B = PROD		// go to inf
345		p0 = dfcmp.eq(B,B)		// get exceptions
346	}
347	{
348		A = insert(B,#63,#0)
349		jumpr r31
350	}
351
352#undef ONE
353#define SIGN r28
354#undef NORMAL
355#undef NO_OVF_UNF
356#define P_INF p1
357#define P_ZERO p2
358.Ldiv_abnormal:
359	{
360		P_TMP = dfclass(A,#DFCLASS_NUMBER)
361		P_TMP = dfclass(B,#DFCLASS_NUMBER)
362		Q_POSITIVE = cmp.gt(SIGN,#-1)
363	}
364	{
365		P_INF = dfclass(A,#DFCLASS_INFINITE)
366		P_INF = dfclass(B,#DFCLASS_INFINITE)
367	}
368	{
369		P_ZERO = dfclass(A,#DFCLASS_ZERO)
370		P_ZERO = dfclass(B,#DFCLASS_ZERO)
371	}
372	{
373		if (!P_TMP) jump .Ldiv_nan
374		if (P_INF) jump .Ldiv_invalid
375	}
376	{
377		if (P_ZERO) jump .Ldiv_invalid
378	}
379	{
380		P_ZERO = dfclass(A,#DFCLASS_NONZERO)		// nonzero
381		P_ZERO = dfclass(B,#DFCLASS_NONINFINITE)	// non-infinite
382	}
383	{
384		P_INF = dfclass(A,#DFCLASS_NONINFINITE)	// non-infinite
385		P_INF = dfclass(B,#DFCLASS_NONZERO)	// nonzero
386	}
387	{
388		if (!P_ZERO) jump .Ldiv_zero_result
389		if (!P_INF) jump .Ldiv_inf_result
390	}
391	// Now we've narrowed it down to (de)normal / (de)normal
392	// Set up A/EXPA B/EXPB and go back
393#undef P_ZERO
394#undef P_INF
395#define P_TMP2 p1
396	{
397		P_TMP = dfclass(A,#DFCLASS_NORMAL)
398		P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
399		TMP = ##0x00100000
400	}
401	{
402		EXPBA = combine(BH,AH)
403		AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)		// clear out hidden bit, sign bit
404		BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)		// clear out hidden bit, sign bit
405	}
406	{
407		if (P_TMP) AH = or(AH,TMP)				// if normal, add back in hidden bit
408		if (P_TMP2) BH = or(BH,TMP)				// if normal, add back in hidden bit
409	}
410	{
411		QH = add(clb(A),#-DF_EXPBITS)
412		QL = add(clb(B),#-DF_EXPBITS)
413		TMP = #1
414	}
415	{
416		EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
417		EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
418	}
419	{
420		A = asl(A,QH)
421		B = asl(B,QL)
422		if (!P_TMP) EXPA = sub(TMP,QH)
423		if (!P_TMP2) EXPB = sub(TMP,QL)
424	}	// recreate values needed by resume coke
425	{
426		PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
427	}
428	{
429		SFDEN = or(SFONE,PRODLO)
430		jump .Ldenorm_continue
431	}
432
433.Ldiv_zero_result:
434	{
435		AH = xor(AH,BH)
436		B = #0
437	}
438	{
439		A = insert(B,#63,#0)
440		jumpr r31
441	}
442.Ldiv_inf_result:
443	{
444		p2 = dfclass(B,#DFCLASS_ZERO)
445		p2 = dfclass(A,#DFCLASS_NONINFINITE)
446	}
447	{
448		TMP = USR
449		if (!p2) jump 1f
450		AH = xor(AH,BH)
451	}
452	{
453		TMP = or(TMP,#0x04)		// DBZ
454	}
455	{
456		USR = TMP
457	}
4581:
459	{
460		B = combine(##0x7ff00000,#0)
461		p0 = dfcmp.uo(B,B)		// take possible exception
462	}
463	{
464		A = insert(B,#63,#0)
465		jumpr r31
466	}
467.Ldiv_nan:
468	{
469		p0 = dfclass(A,#0x10)
470		p1 = dfclass(B,#0x10)
471		if (!p0.new) A = B
472		if (!p1.new) B = A
473	}
474	{
475		QH = convert_df2sf(A)	// get possible invalid exceptions
476		QL = convert_df2sf(B)
477	}
478	{
479		A = #-1
480		jumpr r31
481	}
482
483.Ldiv_invalid:
484	{
485		TMP = ##0x7f800001
486	}
487	{
488		A = convert_sf2df(TMP)		// get invalid, get DF qNaN
489		jumpr r31
490	}
491END(__hexagon_divdf3)
492