dfaddsub.S revision 337136
1//===----------------------Hexagon builtin routine ------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is dual licensed under the MIT and the University of Illinois Open
6// Source Licenses. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10/* Double Precision Multiply */
11
12#define A r1:0
13#define AH r1
14#define AL r0
15#define B r3:2
16#define BH r3
17#define BL r2
18
19#define EXPA r4
20#define EXPB r5
21#define EXPB_A r5:4
22
23#define ZTMP r7:6
24#define ZTMPH r7
25#define ZTMPL r6
26
27#define ATMP r13:12
28#define ATMPH r13
29#define ATMPL r12
30
31#define BTMP r9:8
32#define BTMPH r9
33#define BTMPL r8
34
35#define ATMP2 r11:10
36#define ATMP2H r11
37#define ATMP2L r10
38
39#define EXPDIFF r15
40#define EXTRACTOFF r14
41#define EXTRACTAMT r15:14
42
43#define TMP r28
44
45#define MANTBITS 52
46#define HI_MANTBITS 20
47#define EXPBITS 11
48#define BIAS 1024
49#define MANTISSA_TO_INT_BIAS 52
50#define SR_BIT_INEXACT 5
51
52#ifndef SR_ROUND_OFF
53#define SR_ROUND_OFF 22
54#endif
55
56#define NORMAL p3
57#define BIGB p2
58
59#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
60#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
61#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
62#define END(TAG) .size TAG,.-TAG
63
64	.text
65	.global __hexagon_adddf3
66	.global __hexagon_subdf3
67	.type __hexagon_adddf3, @function
68	.type __hexagon_subdf3, @function
69
70Q6_ALIAS(adddf3)
71FAST_ALIAS(adddf3)
72FAST2_ALIAS(adddf3)
73Q6_ALIAS(subdf3)
74FAST_ALIAS(subdf3)
75FAST2_ALIAS(subdf3)
76
77	.p2align 5
78__hexagon_adddf3:
79	{
80		EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
81		EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
82		ATMP = combine(##0x20000000,#0)
83	}
84	{
85		NORMAL = dfclass(A,#2)
86		NORMAL = dfclass(B,#2)
87		BTMP = ATMP
88		BIGB = cmp.gtu(EXPB,EXPA)			// Is B substantially greater than A?
89	}
90	{
91		if (!NORMAL) jump .Ladd_abnormal		// If abnormal, go to special code
92		if (BIGB) A = B				// if B >> A, swap A and B
93		if (BIGB) B = A				// If B >> A, swap A and B
94		if (BIGB) EXPB_A = combine(EXPA,EXPB)	// swap exponents
95	}
96	{
97		ATMP = insert(A,#MANTBITS,#EXPBITS-2)	// Q1.62
98		BTMP = insert(B,#MANTBITS,#EXPBITS-2)	// Q1.62
99		EXPDIFF = sub(EXPA,EXPB)
100		ZTMP = combine(#62,#1)
101	}
102#undef BIGB
103#undef NORMAL
104#define B_POS p3
105#define A_POS p2
106#define NO_STICKIES p1
107.Ladd_continue:
108	{
109		EXPDIFF = min(EXPDIFF,ZTMPH)		// If exponent difference >= ~60,
110							// will collapse to sticky bit
111		ATMP2 = neg(ATMP)
112		A_POS = cmp.gt(AH,#-1)
113		EXTRACTOFF = #0
114	}
115	{
116		if (!A_POS) ATMP = ATMP2
117		ATMP2 = extractu(BTMP,EXTRACTAMT)
118		BTMP = ASR(BTMP,EXPDIFF)
119#undef EXTRACTAMT
120#undef EXPDIFF
121#undef EXTRACTOFF
122#define ZERO r15:14
123		ZERO = #0
124	}
125	{
126		NO_STICKIES = cmp.eq(ATMP2,ZERO)
127		if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
128		EXPB = add(EXPA,#-BIAS-60)
129		B_POS = cmp.gt(BH,#-1)
130	}
131	{
132		ATMP = add(ATMP,BTMP)			// ADD!!!
133		ATMP2 = sub(ATMP,BTMP)			// Negate and ADD --> SUB!!!
134		ZTMP = combine(#54,##2045)
135	}
136	{
137		p0 = cmp.gtu(EXPA,ZTMPH)		// must be pretty high in case of large cancellation
138		p0 = !cmp.gtu(EXPA,ZTMPL)
139		if (!p0.new) jump:nt .Ladd_ovf_unf
140		if (!B_POS) ATMP = ATMP2		// if B neg, pick difference
141	}
142	{
143		A = convert_d2df(ATMP)			// Convert to Double Precision, taking care of flags, etc.  So nice!
144		p0 = cmp.eq(ATMPH,#0)
145		p0 = cmp.eq(ATMPL,#0)
146		if (p0.new) jump:nt .Ladd_zero		// or maybe conversion handles zero case correctly?
147	}
148	{
149		AH += asl(EXPB,#HI_MANTBITS)
150		jumpr r31
151	}
152	.falign
153__hexagon_subdf3:
154	{
155		BH = togglebit(BH,#31)
156		jump __qdsp_adddf3
157	}
158
159
160	.falign
161.Ladd_zero:
162	// True zero, full cancellation
163	// +0 unless round towards negative infinity
164	{
165		TMP = USR
166		A = #0
167		BH = #1
168	}
169	{
170		TMP = extractu(TMP,#2,#22)
171		BH = asl(BH,#31)
172	}
173	{
174		p0 = cmp.eq(TMP,#2)
175		if (p0.new) AH = xor(AH,BH)
176		jumpr r31
177	}
178	.falign
179.Ladd_ovf_unf:
180	// Overflow or Denormal is possible
181	// Good news: Underflow flag is not possible!
182	/*
183	 * ATMP has 2's complement value
184	 *
185	 * EXPA has A's exponent, EXPB has EXPA-BIAS-60
186	 *
187	 * Convert, extract exponent, add adjustment.
188	 * If > 2046, overflow
189	 * If <= 0, denormal
190	 *
191	 * Note that we've not done our zero check yet, so do that too
192	 *
193	 */
194	{
195		A = convert_d2df(ATMP)
196		p0 = cmp.eq(ATMPH,#0)
197		p0 = cmp.eq(ATMPL,#0)
198		if (p0.new) jump:nt .Ladd_zero
199	}
200	{
201		TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
202		AH += asl(EXPB,#HI_MANTBITS)
203	}
204	{
205		EXPB = add(EXPB,TMP)
206		B = combine(##0x00100000,#0)
207	}
208	{
209		p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
210		if (p0.new) jump:nt .Ladd_ovf
211	}
212	{
213		p0 = cmp.gt(EXPB,#0)
214		if (p0.new) jumpr:t r31
215		TMP = sub(#1,EXPB)
216	}
217	{
218		B = insert(A,#MANTBITS,#0)
219		A = ATMP
220	}
221	{
222		B = lsr(B,TMP)
223	}
224	{
225		A = insert(B,#63,#0)
226		jumpr r31
227	}
228	.falign
229.Ladd_ovf:
230	// We get either max finite value or infinity.  Either way, overflow+inexact
231	{
232		A = ATMP				// 2's complement value
233		TMP = USR
234		ATMP = combine(##0x7fefffff,#-1)	// positive max finite
235	}
236	{
237		EXPB = extractu(TMP,#2,#SR_ROUND_OFF)	// rounding bits
238		TMP = or(TMP,#0x28)			// inexact + overflow
239		BTMP = combine(##0x7ff00000,#0)		// positive infinity
240	}
241	{
242		USR = TMP
243		EXPB ^= lsr(AH,#31)			// Does sign match rounding?
244		TMP = EXPB				// unmodified rounding mode
245	}
246	{
247		p0 = !cmp.eq(TMP,#1)			// If not round-to-zero and
248		p0 = !cmp.eq(EXPB,#2)			// Not rounding the other way,
249		if (p0.new) ATMP = BTMP			// we should get infinity
250	}
251	{
252		A = insert(ATMP,#63,#0)			// insert inf/maxfinite, leave sign
253	}
254	{
255		p0 = dfcmp.eq(A,A)
256		jumpr r31
257	}
258
259.Ladd_abnormal:
260	{
261		ATMP = extractu(A,#63,#0)		// strip off sign
262		BTMP = extractu(B,#63,#0)		// strip off sign
263	}
264	{
265		p3 = cmp.gtu(ATMP,BTMP)
266		if (!p3.new) A = B			// sort values
267		if (!p3.new) B = A			// sort values
268	}
269	{
270		// Any NaN --> NaN, possibly raise invalid if sNaN
271		p0 = dfclass(A,#0x0f)		// A not NaN?
272		if (!p0.new) jump:nt .Linvalid_nan_add
273		if (!p3) ATMP = BTMP
274		if (!p3) BTMP = ATMP
275	}
276	{
277		// Infinity + non-infinity number is infinity
278		// Infinity + infinity --> inf or nan
279		p1 = dfclass(A,#0x08)		// A is infinity
280		if (p1.new) jump:nt .Linf_add
281	}
282	{
283		p2 = dfclass(B,#0x01)		// B is zero
284		if (p2.new) jump:nt .LB_zero	// so return A or special 0+0
285		ATMP = #0
286	}
287	// We are left with adding one or more subnormals
288	{
289		p0 = dfclass(A,#4)
290		if (p0.new) jump:nt .Ladd_two_subnormal
291		ATMP = combine(##0x20000000,#0)
292	}
293	{
294		EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
295		EXPB = #1
296		// BTMP already ABS(B)
297		BTMP = asl(BTMP,#EXPBITS-2)
298	}
299#undef ZERO
300#define EXTRACTOFF r14
301#define EXPDIFF r15
302	{
303		ATMP = insert(A,#MANTBITS,#EXPBITS-2)
304		EXPDIFF = sub(EXPA,EXPB)
305		ZTMP = combine(#62,#1)
306		jump .Ladd_continue
307	}
308
309.Ladd_two_subnormal:
310	{
311		ATMP = extractu(A,#63,#0)
312		BTMP = extractu(B,#63,#0)
313	}
314	{
315		ATMP = neg(ATMP)
316		BTMP = neg(BTMP)
317		p0 = cmp.gt(AH,#-1)
318		p1 = cmp.gt(BH,#-1)
319	}
320	{
321		if (p0) ATMP = A
322		if (p1) BTMP = B
323	}
324	{
325		ATMP = add(ATMP,BTMP)
326	}
327	{
328		BTMP = neg(ATMP)
329		p0 = cmp.gt(ATMPH,#-1)
330		B = #0
331	}
332	{
333		if (!p0) A = BTMP
334		if (p0) A = ATMP
335		BH = ##0x80000000
336	}
337	{
338		if (!p0) AH = or(AH,BH)
339		p0 = dfcmp.eq(A,B)
340		if (p0.new) jump:nt .Lzero_plus_zero
341	}
342	{
343		jumpr r31
344	}
345
346.Linvalid_nan_add:
347	{
348		TMP = convert_df2sf(A)			// will generate invalid if sNaN
349		p0 = dfclass(B,#0x0f)			// if B is not NaN
350		if (p0.new) B = A 			// make it whatever A is
351	}
352	{
353		BL = convert_df2sf(B)			// will generate invalid if sNaN
354		A = #-1
355		jumpr r31
356	}
357	.falign
358.LB_zero:
359	{
360		p0 = dfcmp.eq(ATMP,A)			// is A also zero?
361		if (!p0.new) jumpr:t r31		// If not, just return A
362	}
363	// 0 + 0 is special
364	// if equal integral values, they have the same sign, which is fine for all rounding
365	// modes.
366	// If unequal in sign, we get +0 for all rounding modes except round down
367.Lzero_plus_zero:
368	{
369		p0 = cmp.eq(A,B)
370		if (p0.new) jumpr:t r31
371	}
372	{
373		TMP = USR
374	}
375	{
376		TMP = extractu(TMP,#2,#SR_ROUND_OFF)
377		A = #0
378	}
379	{
380		p0 = cmp.eq(TMP,#2)
381		if (p0.new) AH = ##0x80000000
382		jumpr r31
383	}
384.Linf_add:
385	// adding infinities is only OK if they are equal
386	{
387		p0 = !cmp.eq(AH,BH)			// Do they have different signs
388		p0 = dfclass(B,#8)			// And is B also infinite?
389		if (!p0.new) jumpr:t r31		// If not, just a normal inf
390	}
391	{
392		BL = ##0x7f800001			// sNAN
393	}
394	{
395		A = convert_sf2df(BL)			// trigger invalid, set NaN
396		jumpr r31
397	}
398END(__hexagon_adddf3)
399