1336817Sdim//===----------------------Hexagon builtin routine ------------------------===//
2336817Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6336817Sdim//
7336817Sdim//===----------------------------------------------------------------------===//
8336817Sdim
9353358Sdim// Double Precision Divide
10336817Sdim
11336817Sdim#define A r1:0
12336817Sdim#define AH r1
13336817Sdim#define AL r0
14336817Sdim
15336817Sdim#define B r3:2
16336817Sdim#define BH r3
17336817Sdim#define BL r2
18336817Sdim
19336817Sdim#define Q r5:4
20336817Sdim#define QH r5
21336817Sdim#define QL r4
22336817Sdim
23336817Sdim#define PROD r7:6
24336817Sdim#define PRODHI r7
25336817Sdim#define PRODLO r6
26336817Sdim
27336817Sdim#define SFONE r8
28336817Sdim#define SFDEN r9
29336817Sdim#define SFERROR r10
30336817Sdim#define SFRECIP r11
31336817Sdim
32336817Sdim#define EXPBA r13:12
33336817Sdim#define EXPB r13
34336817Sdim#define EXPA r12
35336817Sdim
36336817Sdim#define REMSUB2 r15:14
37336817Sdim
38336817Sdim
39336817Sdim
40336817Sdim#define SIGN r28
41336817Sdim
42336817Sdim#define Q_POSITIVE p3
43336817Sdim#define NORMAL p2
44336817Sdim#define NO_OVF_UNF p1
45336817Sdim#define P_TMP p0
46336817Sdim
47336817Sdim#define RECIPEST_SHIFT 3
48336817Sdim#define QADJ 61
49336817Sdim
50336817Sdim#define DFCLASS_NORMAL 0x02
51336817Sdim#define DFCLASS_NUMBER 0x0F
52336817Sdim#define DFCLASS_INFINITE 0x08
53336817Sdim#define DFCLASS_ZERO 0x01
54336817Sdim#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
55336817Sdim#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
56336817Sdim
57336817Sdim#define DF_MANTBITS 52
58336817Sdim#define DF_EXPBITS 11
59336817Sdim#define SF_MANTBITS 23
60336817Sdim#define SF_EXPBITS 8
61336817Sdim#define DF_BIAS 0x3ff
62336817Sdim
63336817Sdim#define SR_ROUND_OFF 22
64336817Sdim
65336817Sdim#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
66336817Sdim#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
67336817Sdim#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
68336817Sdim#define END(TAG) .size TAG,.-TAG
69336817Sdim
70336817Sdim	.text
71336817Sdim	.global __hexagon_divdf3
72336817Sdim	.type __hexagon_divdf3,@function
73336817Sdim	Q6_ALIAS(divdf3)
74336817Sdim        FAST_ALIAS(divdf3)
75336817Sdim        FAST2_ALIAS(divdf3)
76336817Sdim	.p2align 5
77336817Sdim__hexagon_divdf3:
78336817Sdim	{
79336817Sdim		NORMAL = dfclass(A,#DFCLASS_NORMAL)
80336817Sdim		NORMAL = dfclass(B,#DFCLASS_NORMAL)
81336817Sdim		EXPBA = combine(BH,AH)
82336817Sdim		SIGN = xor(AH,BH)
83336817Sdim	}
84336817Sdim#undef A
85336817Sdim#undef AH
86336817Sdim#undef AL
87336817Sdim#undef B
88336817Sdim#undef BH
89336817Sdim#undef BL
90336817Sdim#define REM r1:0
91336817Sdim#define REMHI r1
92336817Sdim#define REMLO r0
93336817Sdim#define DENOM r3:2
94336817Sdim#define DENOMHI r3
95336817Sdim#define DENOMLO r2
96336817Sdim	{
97336817Sdim		if (!NORMAL) jump .Ldiv_abnormal
98336817Sdim		PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
99336817Sdim		SFONE = ##0x3f800001
100336817Sdim	}
101336817Sdim	{
102336817Sdim		SFDEN = or(SFONE,PRODLO)
103336817Sdim		EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
104336817Sdim		EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
105336817Sdim		Q_POSITIVE = cmp.gt(SIGN,#-1)
106336817Sdim	}
107336817Sdim#undef SIGN
108336817Sdim#define ONE r28
109336817Sdim.Ldenorm_continue:
110336817Sdim	{
111336817Sdim		SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
112336817Sdim		SFERROR = and(SFONE,#-2)
113336817Sdim		ONE = #1
114336817Sdim		EXPA = sub(EXPA,EXPB)
115336817Sdim	}
116336817Sdim#undef EXPB
117336817Sdim#define RECIPEST r13
118336817Sdim	{
119336817Sdim		SFERROR -= sfmpy(SFRECIP,SFDEN):lib
120336817Sdim		REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
121336817Sdim		RECIPEST = ##0x00800000 << RECIPEST_SHIFT
122336817Sdim	}
123336817Sdim	{
124336817Sdim		SFRECIP += sfmpy(SFRECIP,SFERROR):lib
125336817Sdim		DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
126336817Sdim		SFERROR = and(SFONE,#-2)
127336817Sdim	}
128336817Sdim	{
129336817Sdim		SFERROR -= sfmpy(SFRECIP,SFDEN):lib
130336817Sdim		QH = #-DF_BIAS+1
131336817Sdim		QL = #DF_BIAS-1
132336817Sdim	}
133336817Sdim	{
134336817Sdim		SFRECIP += sfmpy(SFRECIP,SFERROR):lib
135336817Sdim		NO_OVF_UNF = cmp.gt(EXPA,QH)
136336817Sdim		NO_OVF_UNF = !cmp.gt(EXPA,QL)
137336817Sdim	}
138336817Sdim	{
139336817Sdim		RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
140336817Sdim		Q = #0
141336817Sdim		EXPA = add(EXPA,#-QADJ)
142336817Sdim	}
143336817Sdim#undef SFERROR
144336817Sdim#undef SFRECIP
145336817Sdim#define TMP r10
146336817Sdim#define TMP1 r11
147336817Sdim	{
148336817Sdim		RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
149336817Sdim	}
150336817Sdim
151336817Sdim#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
152336817Sdim	{ \
153336817Sdim		PROD = mpyu(RECIPEST,REMHI); \
154336817Sdim		REM = asl(REM,# ## ( REMSHIFT )); \
155336817Sdim	}; \
156336817Sdim	{ \
157336817Sdim		PRODLO = # ## 0; \
158336817Sdim		REM -= mpyu(PRODHI,DENOMLO); \
159336817Sdim		REMSUB2 = mpyu(PRODHI,DENOMHI); \
160336817Sdim	}; \
161336817Sdim	{ \
162336817Sdim		Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
163336817Sdim		REM -= asl(REMSUB2, # ## 32); \
164336817Sdim		EXTRA \
165336817Sdim	}
166336817Sdim
167336817Sdim
168336817Sdim	DIV_ITER1B(ASL,14,15,)
169336817Sdim	DIV_ITER1B(ASR,1,15,)
170336817Sdim	DIV_ITER1B(ASR,16,15,)
171336817Sdim	DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
172336817Sdim
173336817Sdim#undef REMSUB2
174336817Sdim#define TMPPAIR r15:14
175336817Sdim#define TMPPAIRHI r15
176336817Sdim#define TMPPAIRLO r14
177336817Sdim#undef RECIPEST
178336817Sdim#define EXPB r13
179336817Sdim	{
180336817Sdim		// compare or sub with carry
181336817Sdim		TMPPAIR = sub(REM,DENOM)
182336817Sdim		P_TMP = cmp.gtu(DENOM,REM)
183336817Sdim		// set up amt to add to q
184336817Sdim		if (!P_TMP.new) PRODLO  = #2
185336817Sdim	}
186336817Sdim	{
187336817Sdim		Q = add(Q,PROD)
188336817Sdim		if (!P_TMP) REM = TMPPAIR
189336817Sdim		TMPPAIR = #0
190336817Sdim	}
191336817Sdim	{
192336817Sdim		P_TMP = cmp.eq(REM,TMPPAIR)
193336817Sdim		if (!P_TMP.new) QL = or(QL,ONE)
194336817Sdim	}
195336817Sdim	{
196336817Sdim		PROD = neg(Q)
197336817Sdim	}
198336817Sdim	{
199336817Sdim		if (!Q_POSITIVE) Q = PROD
200336817Sdim	}
201336817Sdim#undef REM
202336817Sdim#undef REMHI
203336817Sdim#undef REMLO
204336817Sdim#undef DENOM
205336817Sdim#undef DENOMLO
206336817Sdim#undef DENOMHI
207336817Sdim#define A r1:0
208336817Sdim#define AH r1
209336817Sdim#define AL r0
210336817Sdim#define B r3:2
211336817Sdim#define BH r3
212336817Sdim#define BL r2
213336817Sdim	{
214336817Sdim		A = convert_d2df(Q)
215336817Sdim		if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
216336817Sdim	}
217336817Sdim	{
218336817Sdim		AH += asl(EXPA,#DF_MANTBITS-32)
219336817Sdim		jumpr r31
220336817Sdim	}
221336817Sdim
222336817Sdim.Ldiv_ovf_unf:
223336817Sdim	{
224336817Sdim		AH += asl(EXPA,#DF_MANTBITS-32)
225336817Sdim		EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
226336817Sdim	}
227336817Sdim	{
228336817Sdim		PROD = abs(Q)
229336817Sdim		EXPA = add(EXPA,EXPB)
230336817Sdim	}
231336817Sdim	{
232336817Sdim		P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS)		// overflow
233336817Sdim		if (P_TMP.new) jump:nt .Ldiv_ovf
234336817Sdim	}
235336817Sdim	{
236336817Sdim		P_TMP = cmp.gt(EXPA,#0)
237336817Sdim		if (P_TMP.new) jump:nt .Lpossible_unf		// round up to normal possible...
238336817Sdim	}
239353358Sdim	// Underflow
240353358Sdim	// We know what the infinite range exponent should be (EXPA)
241353358Sdim	// Q is 2's complement, PROD is abs(Q)
242353358Sdim	// Normalize Q, shift right, add a high bit, convert, change exponent
243336817Sdim
244336817Sdim#define FUDGE1 7	// how much to shift right
245336817Sdim#define FUDGE2 4	// how many guard/round to keep at lsbs
246336817Sdim
247336817Sdim	{
248336817Sdim		EXPB = add(clb(PROD),#-1)			// doesn't need to be added in since
249336817Sdim		EXPA = sub(#FUDGE1,EXPA)			// we extract post-converted exponent
250336817Sdim		TMP = USR
251336817Sdim		TMP1 = #63
252336817Sdim	}
253336817Sdim	{
254336817Sdim		EXPB = min(EXPA,TMP1)
255336817Sdim		TMP1 = or(TMP,#0x030)
256336817Sdim		PROD = asl(PROD,EXPB)
257336817Sdim		EXPA = #0
258336817Sdim	}
259336817Sdim	{
260336817Sdim		TMPPAIR = extractu(PROD,EXPBA)				// bits that will get shifted out
261336817Sdim		PROD = lsr(PROD,EXPB)					// shift out bits
262336817Sdim		B = #1
263336817Sdim	}
264336817Sdim	{
265336817Sdim		P_TMP = cmp.gtu(B,TMPPAIR)
266336817Sdim		if (!P_TMP.new) PRODLO = or(BL,PRODLO)
267336817Sdim		PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
268336817Sdim	}
269336817Sdim	{
270336817Sdim		Q = neg(PROD)
271336817Sdim		P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
272336817Sdim		if (!P_TMP.new) TMP = TMP1
273336817Sdim	}
274336817Sdim	{
275336817Sdim		USR = TMP
276336817Sdim		if (Q_POSITIVE) Q = PROD
277336817Sdim		TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
278336817Sdim	}
279336817Sdim	{
280336817Sdim		A = convert_d2df(Q)
281336817Sdim	}
282336817Sdim	{
283336817Sdim		AH += asl(TMP,#DF_MANTBITS-32)
284336817Sdim		jumpr r31
285336817Sdim	}
286336817Sdim
287336817Sdim
288336817Sdim.Lpossible_unf:
289353358Sdim	// If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal
290353358Sdim	// The answer is correct, but we need to raise Underflow
291336817Sdim	{
292336817Sdim		B = extractu(A,#63,#0)
293336817Sdim		TMPPAIR = combine(##0x00100000,#0)		// min normal
294336817Sdim		TMP = #0x7FFF
295336817Sdim	}
296336817Sdim	{
297336817Sdim		P_TMP = dfcmp.eq(TMPPAIR,B)		// Is everything zero in the rounded value...
298336817Sdim		P_TMP = bitsset(PRODHI,TMP)		// but a bunch of bits set in the unrounded abs(quotient)?
299336817Sdim	}
300336817Sdim
301336817Sdim#if (__HEXAGON_ARCH__ == 60)
302336817Sdim		TMP = USR		// If not, just return
303336817Sdim		if (!P_TMP) jumpr r31   // Else, we want to set Unf+Inexact
304336817Sdim					// Note that inexact is already set...
305336817Sdim#else
306336817Sdim	{
307336817Sdim		if (!P_TMP) jumpr r31			// If not, just return
308336817Sdim		TMP = USR				// Else, we want to set Unf+Inexact
309336817Sdim	}						// Note that inexact is already set...
310336817Sdim#endif
311336817Sdim	{
312336817Sdim		TMP = or(TMP,#0x30)
313336817Sdim	}
314336817Sdim	{
315336817Sdim		USR = TMP
316336817Sdim	}
317336817Sdim	{
318336817Sdim		p0 = dfcmp.eq(A,A)
319336817Sdim		jumpr r31
320336817Sdim	}
321336817Sdim
322336817Sdim.Ldiv_ovf:
323353358Sdim
324353358Sdim	// Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
325353358Sdim
326336817Sdim	{
327336817Sdim		TMP = USR
328336817Sdim		B = combine(##0x7fefffff,#-1)
329336817Sdim		AH = mux(Q_POSITIVE,#0,#-1)
330336817Sdim	}
331336817Sdim	{
332336817Sdim		PROD = combine(##0x7ff00000,#0)
333336817Sdim		QH = extractu(TMP,#2,#SR_ROUND_OFF)
334336817Sdim		TMP = or(TMP,#0x28)
335336817Sdim	}
336336817Sdim	{
337336817Sdim		USR = TMP
338336817Sdim		QH ^= lsr(AH,#31)
339336817Sdim		QL = QH
340336817Sdim	}
341336817Sdim	{
342336817Sdim		p0 = !cmp.eq(QL,#1)		// if not round-to-zero
343336817Sdim		p0 = !cmp.eq(QH,#2)		// and not rounding the other way
344336817Sdim		if (p0.new) B = PROD		// go to inf
345336817Sdim		p0 = dfcmp.eq(B,B)		// get exceptions
346336817Sdim	}
347336817Sdim	{
348336817Sdim		A = insert(B,#63,#0)
349336817Sdim		jumpr r31
350336817Sdim	}
351336817Sdim
352336817Sdim#undef ONE
353336817Sdim#define SIGN r28
354336817Sdim#undef NORMAL
355336817Sdim#undef NO_OVF_UNF
356336817Sdim#define P_INF p1
357336817Sdim#define P_ZERO p2
358336817Sdim.Ldiv_abnormal:
359336817Sdim	{
360336817Sdim		P_TMP = dfclass(A,#DFCLASS_NUMBER)
361336817Sdim		P_TMP = dfclass(B,#DFCLASS_NUMBER)
362336817Sdim		Q_POSITIVE = cmp.gt(SIGN,#-1)
363336817Sdim	}
364336817Sdim	{
365336817Sdim		P_INF = dfclass(A,#DFCLASS_INFINITE)
366336817Sdim		P_INF = dfclass(B,#DFCLASS_INFINITE)
367336817Sdim	}
368336817Sdim	{
369336817Sdim		P_ZERO = dfclass(A,#DFCLASS_ZERO)
370336817Sdim		P_ZERO = dfclass(B,#DFCLASS_ZERO)
371336817Sdim	}
372336817Sdim	{
373336817Sdim		if (!P_TMP) jump .Ldiv_nan
374336817Sdim		if (P_INF) jump .Ldiv_invalid
375336817Sdim	}
376336817Sdim	{
377336817Sdim		if (P_ZERO) jump .Ldiv_invalid
378336817Sdim	}
379336817Sdim	{
380336817Sdim		P_ZERO = dfclass(A,#DFCLASS_NONZERO)		// nonzero
381336817Sdim		P_ZERO = dfclass(B,#DFCLASS_NONINFINITE)	// non-infinite
382336817Sdim	}
383336817Sdim	{
384336817Sdim		P_INF = dfclass(A,#DFCLASS_NONINFINITE)	// non-infinite
385336817Sdim		P_INF = dfclass(B,#DFCLASS_NONZERO)	// nonzero
386336817Sdim	}
387336817Sdim	{
388336817Sdim		if (!P_ZERO) jump .Ldiv_zero_result
389336817Sdim		if (!P_INF) jump .Ldiv_inf_result
390336817Sdim	}
391353358Sdim	// Now we've narrowed it down to (de)normal / (de)normal
392353358Sdim	// Set up A/EXPA B/EXPB and go back
393336817Sdim#undef P_ZERO
394336817Sdim#undef P_INF
395336817Sdim#define P_TMP2 p1
396336817Sdim	{
397336817Sdim		P_TMP = dfclass(A,#DFCLASS_NORMAL)
398336817Sdim		P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
399336817Sdim		TMP = ##0x00100000
400336817Sdim	}
401336817Sdim	{
402336817Sdim		EXPBA = combine(BH,AH)
403336817Sdim		AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)		// clear out hidden bit, sign bit
404336817Sdim		BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)		// clear out hidden bit, sign bit
405336817Sdim	}
406336817Sdim	{
407336817Sdim		if (P_TMP) AH = or(AH,TMP)				// if normal, add back in hidden bit
408336817Sdim		if (P_TMP2) BH = or(BH,TMP)				// if normal, add back in hidden bit
409336817Sdim	}
410336817Sdim	{
411336817Sdim		QH = add(clb(A),#-DF_EXPBITS)
412336817Sdim		QL = add(clb(B),#-DF_EXPBITS)
413336817Sdim		TMP = #1
414336817Sdim	}
415336817Sdim	{
416336817Sdim		EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
417336817Sdim		EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
418336817Sdim	}
419336817Sdim	{
420336817Sdim		A = asl(A,QH)
421336817Sdim		B = asl(B,QL)
422336817Sdim		if (!P_TMP) EXPA = sub(TMP,QH)
423336817Sdim		if (!P_TMP2) EXPB = sub(TMP,QL)
424336817Sdim	}	// recreate values needed by resume coke
425336817Sdim	{
426336817Sdim		PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
427336817Sdim	}
428336817Sdim	{
429336817Sdim		SFDEN = or(SFONE,PRODLO)
430336817Sdim		jump .Ldenorm_continue
431336817Sdim	}
432336817Sdim
433336817Sdim.Ldiv_zero_result:
434336817Sdim	{
435336817Sdim		AH = xor(AH,BH)
436336817Sdim		B = #0
437336817Sdim	}
438336817Sdim	{
439336817Sdim		A = insert(B,#63,#0)
440336817Sdim		jumpr r31
441336817Sdim	}
442336817Sdim.Ldiv_inf_result:
443336817Sdim	{
444336817Sdim		p2 = dfclass(B,#DFCLASS_ZERO)
445336817Sdim		p2 = dfclass(A,#DFCLASS_NONINFINITE)
446336817Sdim	}
447336817Sdim	{
448336817Sdim		TMP = USR
449336817Sdim		if (!p2) jump 1f
450336817Sdim		AH = xor(AH,BH)
451336817Sdim	}
452336817Sdim	{
453336817Sdim		TMP = or(TMP,#0x04)		// DBZ
454336817Sdim	}
455336817Sdim	{
456336817Sdim		USR = TMP
457336817Sdim	}
458336817Sdim1:
459336817Sdim	{
460336817Sdim		B = combine(##0x7ff00000,#0)
461336817Sdim		p0 = dfcmp.uo(B,B)		// take possible exception
462336817Sdim	}
463336817Sdim	{
464336817Sdim		A = insert(B,#63,#0)
465336817Sdim		jumpr r31
466336817Sdim	}
467336817Sdim.Ldiv_nan:
468336817Sdim	{
469336817Sdim		p0 = dfclass(A,#0x10)
470336817Sdim		p1 = dfclass(B,#0x10)
471336817Sdim		if (!p0.new) A = B
472336817Sdim		if (!p1.new) B = A
473336817Sdim	}
474336817Sdim	{
475336817Sdim		QH = convert_df2sf(A)	// get possible invalid exceptions
476336817Sdim		QL = convert_df2sf(B)
477336817Sdim	}
478336817Sdim	{
479336817Sdim		A = #-1
480336817Sdim		jumpr r31
481336817Sdim	}
482336817Sdim
483336817Sdim.Ldiv_invalid:
484336817Sdim	{
485336817Sdim		TMP = ##0x7f800001
486336817Sdim	}
487336817Sdim	{
488336817Sdim		A = convert_sf2df(TMP)		// get invalid, get DF qNaN
489336817Sdim		jumpr r31
490336817Sdim	}
491336817SdimEND(__hexagon_divdf3)
492