1336817Sdim//===----------------------Hexagon builtin routine ------------------------===//
2336817Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6336817Sdim//
7336817Sdim//===----------------------------------------------------------------------===//
8336817Sdim
9353358Sdim// Double Precision Multiply
10336817Sdim
11336817Sdim#define A r1:0
12336817Sdim#define AH r1
13336817Sdim#define AL r0
14336817Sdim#define B r3:2
15336817Sdim#define BH r3
16336817Sdim#define BL r2
17336817Sdim
18336817Sdim#define EXPA r4
19336817Sdim#define EXPB r5
20336817Sdim#define EXPB_A r5:4
21336817Sdim
22336817Sdim#define ZTMP r7:6
23336817Sdim#define ZTMPH r7
24336817Sdim#define ZTMPL r6
25336817Sdim
26336817Sdim#define ATMP r13:12
27336817Sdim#define ATMPH r13
28336817Sdim#define ATMPL r12
29336817Sdim
30336817Sdim#define BTMP r9:8
31336817Sdim#define BTMPH r9
32336817Sdim#define BTMPL r8
33336817Sdim
34336817Sdim#define ATMP2 r11:10
35336817Sdim#define ATMP2H r11
36336817Sdim#define ATMP2L r10
37336817Sdim
38336817Sdim#define EXPDIFF r15
39336817Sdim#define EXTRACTOFF r14
40336817Sdim#define EXTRACTAMT r15:14
41336817Sdim
42336817Sdim#define TMP r28
43336817Sdim
44336817Sdim#define MANTBITS 52
45336817Sdim#define HI_MANTBITS 20
46336817Sdim#define EXPBITS 11
47336817Sdim#define BIAS 1024
48336817Sdim#define MANTISSA_TO_INT_BIAS 52
49336817Sdim#define SR_BIT_INEXACT 5
50336817Sdim
51336817Sdim#ifndef SR_ROUND_OFF
52336817Sdim#define SR_ROUND_OFF 22
53336817Sdim#endif
54336817Sdim
55336817Sdim#define NORMAL p3
56336817Sdim#define BIGB p2
57336817Sdim
58336817Sdim#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
59336817Sdim#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
60336817Sdim#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
61336817Sdim#define END(TAG) .size TAG,.-TAG
62336817Sdim
63336817Sdim	.text
64336817Sdim	.global __hexagon_adddf3
65336817Sdim	.global __hexagon_subdf3
66336817Sdim	.type __hexagon_adddf3, @function
67336817Sdim	.type __hexagon_subdf3, @function
68336817Sdim
69336817SdimQ6_ALIAS(adddf3)
70336817SdimFAST_ALIAS(adddf3)
71336817SdimFAST2_ALIAS(adddf3)
72336817SdimQ6_ALIAS(subdf3)
73336817SdimFAST_ALIAS(subdf3)
74336817SdimFAST2_ALIAS(subdf3)
75336817Sdim
76336817Sdim	.p2align 5
77336817Sdim__hexagon_adddf3:
78336817Sdim	{
79336817Sdim		EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
80336817Sdim		EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
81336817Sdim		ATMP = combine(##0x20000000,#0)
82336817Sdim	}
83336817Sdim	{
84336817Sdim		NORMAL = dfclass(A,#2)
85336817Sdim		NORMAL = dfclass(B,#2)
86336817Sdim		BTMP = ATMP
87336817Sdim		BIGB = cmp.gtu(EXPB,EXPA)			// Is B substantially greater than A?
88336817Sdim	}
89336817Sdim	{
90336817Sdim		if (!NORMAL) jump .Ladd_abnormal		// If abnormal, go to special code
91336817Sdim		if (BIGB) A = B				// if B >> A, swap A and B
92336817Sdim		if (BIGB) B = A				// If B >> A, swap A and B
93336817Sdim		if (BIGB) EXPB_A = combine(EXPA,EXPB)	// swap exponents
94336817Sdim	}
95336817Sdim	{
96336817Sdim		ATMP = insert(A,#MANTBITS,#EXPBITS-2)	// Q1.62
97336817Sdim		BTMP = insert(B,#MANTBITS,#EXPBITS-2)	// Q1.62
98336817Sdim		EXPDIFF = sub(EXPA,EXPB)
99336817Sdim		ZTMP = combine(#62,#1)
100336817Sdim	}
101336817Sdim#undef BIGB
102336817Sdim#undef NORMAL
103336817Sdim#define B_POS p3
104336817Sdim#define A_POS p2
105336817Sdim#define NO_STICKIES p1
106336817Sdim.Ladd_continue:
107336817Sdim	{
108336817Sdim		EXPDIFF = min(EXPDIFF,ZTMPH)		// If exponent difference >= ~60,
109336817Sdim							// will collapse to sticky bit
110336817Sdim		ATMP2 = neg(ATMP)
111336817Sdim		A_POS = cmp.gt(AH,#-1)
112336817Sdim		EXTRACTOFF = #0
113336817Sdim	}
114336817Sdim	{
115336817Sdim		if (!A_POS) ATMP = ATMP2
116336817Sdim		ATMP2 = extractu(BTMP,EXTRACTAMT)
117336817Sdim		BTMP = ASR(BTMP,EXPDIFF)
118336817Sdim#undef EXTRACTAMT
119336817Sdim#undef EXPDIFF
120336817Sdim#undef EXTRACTOFF
121336817Sdim#define ZERO r15:14
122336817Sdim		ZERO = #0
123336817Sdim	}
124336817Sdim	{
125336817Sdim		NO_STICKIES = cmp.eq(ATMP2,ZERO)
126336817Sdim		if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
127336817Sdim		EXPB = add(EXPA,#-BIAS-60)
128336817Sdim		B_POS = cmp.gt(BH,#-1)
129336817Sdim	}
130336817Sdim	{
131336817Sdim		ATMP = add(ATMP,BTMP)			// ADD!!!
132336817Sdim		ATMP2 = sub(ATMP,BTMP)			// Negate and ADD --> SUB!!!
133336817Sdim		ZTMP = combine(#54,##2045)
134336817Sdim	}
135336817Sdim	{
136336817Sdim		p0 = cmp.gtu(EXPA,ZTMPH)		// must be pretty high in case of large cancellation
137336817Sdim		p0 = !cmp.gtu(EXPA,ZTMPL)
138336817Sdim		if (!p0.new) jump:nt .Ladd_ovf_unf
139336817Sdim		if (!B_POS) ATMP = ATMP2		// if B neg, pick difference
140336817Sdim	}
141336817Sdim	{
142336817Sdim		A = convert_d2df(ATMP)			// Convert to Double Precision, taking care of flags, etc.  So nice!
143336817Sdim		p0 = cmp.eq(ATMPH,#0)
144336817Sdim		p0 = cmp.eq(ATMPL,#0)
145336817Sdim		if (p0.new) jump:nt .Ladd_zero		// or maybe conversion handles zero case correctly?
146336817Sdim	}
147336817Sdim	{
148336817Sdim		AH += asl(EXPB,#HI_MANTBITS)
149336817Sdim		jumpr r31
150336817Sdim	}
151336817Sdim	.falign
152336817Sdim__hexagon_subdf3:
153336817Sdim	{
154336817Sdim		BH = togglebit(BH,#31)
155336817Sdim		jump __qdsp_adddf3
156336817Sdim	}
157336817Sdim
158336817Sdim
159336817Sdim	.falign
160336817Sdim.Ladd_zero:
161336817Sdim	// True zero, full cancellation
162336817Sdim	// +0 unless round towards negative infinity
163336817Sdim	{
164336817Sdim		TMP = USR
165336817Sdim		A = #0
166336817Sdim		BH = #1
167336817Sdim	}
168336817Sdim	{
169336817Sdim		TMP = extractu(TMP,#2,#22)
170336817Sdim		BH = asl(BH,#31)
171336817Sdim	}
172336817Sdim	{
173336817Sdim		p0 = cmp.eq(TMP,#2)
174336817Sdim		if (p0.new) AH = xor(AH,BH)
175336817Sdim		jumpr r31
176336817Sdim	}
177336817Sdim	.falign
178336817Sdim.Ladd_ovf_unf:
179336817Sdim	// Overflow or Denormal is possible
180336817Sdim	// Good news: Underflow flag is not possible!
181353358Sdim
182353358Sdim	// ATMP has 2's complement value
183353358Sdim	//
184353358Sdim	// EXPA has A's exponent, EXPB has EXPA-BIAS-60
185353358Sdim	//
186353358Sdim	// Convert, extract exponent, add adjustment.
187353358Sdim	// If > 2046, overflow
188353358Sdim	// If <= 0, denormal
189353358Sdim	//
190353358Sdim	// Note that we've not done our zero check yet, so do that too
191353358Sdim
192336817Sdim	{
193336817Sdim		A = convert_d2df(ATMP)
194336817Sdim		p0 = cmp.eq(ATMPH,#0)
195336817Sdim		p0 = cmp.eq(ATMPL,#0)
196336817Sdim		if (p0.new) jump:nt .Ladd_zero
197336817Sdim	}
198336817Sdim	{
199336817Sdim		TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
200336817Sdim		AH += asl(EXPB,#HI_MANTBITS)
201336817Sdim	}
202336817Sdim	{
203336817Sdim		EXPB = add(EXPB,TMP)
204336817Sdim		B = combine(##0x00100000,#0)
205336817Sdim	}
206336817Sdim	{
207336817Sdim		p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
208336817Sdim		if (p0.new) jump:nt .Ladd_ovf
209336817Sdim	}
210336817Sdim	{
211336817Sdim		p0 = cmp.gt(EXPB,#0)
212336817Sdim		if (p0.new) jumpr:t r31
213336817Sdim		TMP = sub(#1,EXPB)
214336817Sdim	}
215336817Sdim	{
216336817Sdim		B = insert(A,#MANTBITS,#0)
217336817Sdim		A = ATMP
218336817Sdim	}
219336817Sdim	{
220336817Sdim		B = lsr(B,TMP)
221336817Sdim	}
222336817Sdim	{
223336817Sdim		A = insert(B,#63,#0)
224336817Sdim		jumpr r31
225336817Sdim	}
226336817Sdim	.falign
227336817Sdim.Ladd_ovf:
228336817Sdim	// We get either max finite value or infinity.  Either way, overflow+inexact
229336817Sdim	{
230336817Sdim		A = ATMP				// 2's complement value
231336817Sdim		TMP = USR
232336817Sdim		ATMP = combine(##0x7fefffff,#-1)	// positive max finite
233336817Sdim	}
234336817Sdim	{
235336817Sdim		EXPB = extractu(TMP,#2,#SR_ROUND_OFF)	// rounding bits
236336817Sdim		TMP = or(TMP,#0x28)			// inexact + overflow
237336817Sdim		BTMP = combine(##0x7ff00000,#0)		// positive infinity
238336817Sdim	}
239336817Sdim	{
240336817Sdim		USR = TMP
241336817Sdim		EXPB ^= lsr(AH,#31)			// Does sign match rounding?
242336817Sdim		TMP = EXPB				// unmodified rounding mode
243336817Sdim	}
244336817Sdim	{
245336817Sdim		p0 = !cmp.eq(TMP,#1)			// If not round-to-zero and
246336817Sdim		p0 = !cmp.eq(EXPB,#2)			// Not rounding the other way,
247336817Sdim		if (p0.new) ATMP = BTMP			// we should get infinity
248336817Sdim	}
249336817Sdim	{
250336817Sdim		A = insert(ATMP,#63,#0)			// insert inf/maxfinite, leave sign
251336817Sdim	}
252336817Sdim	{
253336817Sdim		p0 = dfcmp.eq(A,A)
254336817Sdim		jumpr r31
255336817Sdim	}
256336817Sdim
257336817Sdim.Ladd_abnormal:
258336817Sdim	{
259336817Sdim		ATMP = extractu(A,#63,#0)		// strip off sign
260336817Sdim		BTMP = extractu(B,#63,#0)		// strip off sign
261336817Sdim	}
262336817Sdim	{
263336817Sdim		p3 = cmp.gtu(ATMP,BTMP)
264336817Sdim		if (!p3.new) A = B			// sort values
265336817Sdim		if (!p3.new) B = A			// sort values
266336817Sdim	}
267336817Sdim	{
268336817Sdim		// Any NaN --> NaN, possibly raise invalid if sNaN
269336817Sdim		p0 = dfclass(A,#0x0f)		// A not NaN?
270336817Sdim		if (!p0.new) jump:nt .Linvalid_nan_add
271336817Sdim		if (!p3) ATMP = BTMP
272336817Sdim		if (!p3) BTMP = ATMP
273336817Sdim	}
274336817Sdim	{
275336817Sdim		// Infinity + non-infinity number is infinity
276336817Sdim		// Infinity + infinity --> inf or nan
277336817Sdim		p1 = dfclass(A,#0x08)		// A is infinity
278336817Sdim		if (p1.new) jump:nt .Linf_add
279336817Sdim	}
280336817Sdim	{
281336817Sdim		p2 = dfclass(B,#0x01)		// B is zero
282336817Sdim		if (p2.new) jump:nt .LB_zero	// so return A or special 0+0
283336817Sdim		ATMP = #0
284336817Sdim	}
285336817Sdim	// We are left with adding one or more subnormals
286336817Sdim	{
287336817Sdim		p0 = dfclass(A,#4)
288336817Sdim		if (p0.new) jump:nt .Ladd_two_subnormal
289336817Sdim		ATMP = combine(##0x20000000,#0)
290336817Sdim	}
291336817Sdim	{
292336817Sdim		EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
293336817Sdim		EXPB = #1
294336817Sdim		// BTMP already ABS(B)
295336817Sdim		BTMP = asl(BTMP,#EXPBITS-2)
296336817Sdim	}
297336817Sdim#undef ZERO
298336817Sdim#define EXTRACTOFF r14
299336817Sdim#define EXPDIFF r15
300336817Sdim	{
301336817Sdim		ATMP = insert(A,#MANTBITS,#EXPBITS-2)
302336817Sdim		EXPDIFF = sub(EXPA,EXPB)
303336817Sdim		ZTMP = combine(#62,#1)
304336817Sdim		jump .Ladd_continue
305336817Sdim	}
306336817Sdim
307336817Sdim.Ladd_two_subnormal:
308336817Sdim	{
309336817Sdim		ATMP = extractu(A,#63,#0)
310336817Sdim		BTMP = extractu(B,#63,#0)
311336817Sdim	}
312336817Sdim	{
313336817Sdim		ATMP = neg(ATMP)
314336817Sdim		BTMP = neg(BTMP)
315336817Sdim		p0 = cmp.gt(AH,#-1)
316336817Sdim		p1 = cmp.gt(BH,#-1)
317336817Sdim	}
318336817Sdim	{
319336817Sdim		if (p0) ATMP = A
320336817Sdim		if (p1) BTMP = B
321336817Sdim	}
322336817Sdim	{
323336817Sdim		ATMP = add(ATMP,BTMP)
324336817Sdim	}
325336817Sdim	{
326336817Sdim		BTMP = neg(ATMP)
327336817Sdim		p0 = cmp.gt(ATMPH,#-1)
328336817Sdim		B = #0
329336817Sdim	}
330336817Sdim	{
331336817Sdim		if (!p0) A = BTMP
332336817Sdim		if (p0) A = ATMP
333336817Sdim		BH = ##0x80000000
334336817Sdim	}
335336817Sdim	{
336336817Sdim		if (!p0) AH = or(AH,BH)
337336817Sdim		p0 = dfcmp.eq(A,B)
338336817Sdim		if (p0.new) jump:nt .Lzero_plus_zero
339336817Sdim	}
340336817Sdim	{
341336817Sdim		jumpr r31
342336817Sdim	}
343336817Sdim
344336817Sdim.Linvalid_nan_add:
345336817Sdim	{
346336817Sdim		TMP = convert_df2sf(A)			// will generate invalid if sNaN
347336817Sdim		p0 = dfclass(B,#0x0f)			// if B is not NaN
348336817Sdim		if (p0.new) B = A 			// make it whatever A is
349336817Sdim	}
350336817Sdim	{
351336817Sdim		BL = convert_df2sf(B)			// will generate invalid if sNaN
352336817Sdim		A = #-1
353336817Sdim		jumpr r31
354336817Sdim	}
355336817Sdim	.falign
356336817Sdim.LB_zero:
357336817Sdim	{
358336817Sdim		p0 = dfcmp.eq(ATMP,A)			// is A also zero?
359336817Sdim		if (!p0.new) jumpr:t r31		// If not, just return A
360336817Sdim	}
361336817Sdim	// 0 + 0 is special
362336817Sdim	// if equal integral values, they have the same sign, which is fine for all rounding
363336817Sdim	// modes.
364336817Sdim	// If unequal in sign, we get +0 for all rounding modes except round down
365336817Sdim.Lzero_plus_zero:
366336817Sdim	{
367336817Sdim		p0 = cmp.eq(A,B)
368336817Sdim		if (p0.new) jumpr:t r31
369336817Sdim	}
370336817Sdim	{
371336817Sdim		TMP = USR
372336817Sdim	}
373336817Sdim	{
374336817Sdim		TMP = extractu(TMP,#2,#SR_ROUND_OFF)
375336817Sdim		A = #0
376336817Sdim	}
377336817Sdim	{
378336817Sdim		p0 = cmp.eq(TMP,#2)
379336817Sdim		if (p0.new) AH = ##0x80000000
380336817Sdim		jumpr r31
381336817Sdim	}
382336817Sdim.Linf_add:
383336817Sdim	// adding infinities is only OK if they are equal
384336817Sdim	{
385336817Sdim		p0 = !cmp.eq(AH,BH)			// Do they have different signs
386336817Sdim		p0 = dfclass(B,#8)			// And is B also infinite?
387336817Sdim		if (!p0.new) jumpr:t r31		// If not, just a normal inf
388336817Sdim	}
389336817Sdim	{
390336817Sdim		BL = ##0x7f800001			// sNAN
391336817Sdim	}
392336817Sdim	{
393336817Sdim		A = convert_sf2df(BL)			// trigger invalid, set NaN
394336817Sdim		jumpr r31
395336817Sdim	}
396336817SdimEND(__hexagon_adddf3)
397