1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
4 *
5 * Floating-point emulation code
6 *  Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
7 */
8/*
9 * BEGIN_DESC
10 *
11 *  File:
12 *	@(#)	pa/spmath/fmpyfadd.c		$Revision: 1.1 $
13 *
14 *  Purpose:
15 *	Double Floating-point Multiply Fused Add
16 *	Double Floating-point Multiply Negate Fused Add
17 *	Single Floating-point Multiply Fused Add
18 *	Single Floating-point Multiply Negate Fused Add
19 *
20 *  External Interfaces:
21 *	dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
22 *	dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
23 *	sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
24 *	sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
25 *
26 *  Internal Interfaces:
27 *
28 *  Theory:
29 *	<<please update with a overview of the operation of this file>>
30 *
31 * END_DESC
32*/
33
34
35#include "float.h"
36#include "sgl_float.h"
37#include "dbl_float.h"
38
39
40/*
41 *  Double Floating-point Multiply Fused Add
42 */
43
44int
45dbl_fmpyfadd(
46	    dbl_floating_point *src1ptr,
47	    dbl_floating_point *src2ptr,
48	    dbl_floating_point *src3ptr,
49	    unsigned int *status,
50	    dbl_floating_point *dstptr)
51{
52	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
53	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
54	unsigned int rightp1, rightp2, rightp3, rightp4;
55	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
56	register int mpy_exponent, add_exponent, count;
57	boolean inexact = FALSE, is_tiny = FALSE;
58
59	unsigned int signlessleft1, signlessright1, save;
60	register int result_exponent, diff_exponent;
61	int sign_save, jumpsize;
62
63	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
64	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
65	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
66
67	/*
68	 * set sign bit of result of multiply
69	 */
70	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
71		Dbl_setnegativezerop1(resultp1);
72	else Dbl_setzerop1(resultp1);
73
74	/*
75	 * Generate multiply exponent
76	 */
77	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
78
79	/*
80	 * check first operand for NaN's or infinity
81	 */
82	if (Dbl_isinfinity_exponent(opnd1p1)) {
83		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
84			if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
85			    Dbl_isnotnan(opnd3p1,opnd3p2)) {
86				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
87					/*
88					 * invalid since operands are infinity
89					 * and zero
90					 */
91					if (Is_invalidtrap_enabled())
92						return(OPC_2E_INVALIDEXCEPTION);
93					Set_invalidflag();
94					Dbl_makequietnan(resultp1,resultp2);
95					Dbl_copytoptr(resultp1,resultp2,dstptr);
96					return(NOEXCEPTION);
97				}
98				/*
99				 * Check third operand for infinity with a
100				 *  sign opposite of the multiply result
101				 */
102				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
103				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
104					/*
105					 * invalid since attempting a magnitude
106					 * subtraction of infinities
107					 */
108					if (Is_invalidtrap_enabled())
109						return(OPC_2E_INVALIDEXCEPTION);
110					Set_invalidflag();
111					Dbl_makequietnan(resultp1,resultp2);
112					Dbl_copytoptr(resultp1,resultp2,dstptr);
113					return(NOEXCEPTION);
114				}
115
116				/*
117			 	 * return infinity
118			 	 */
119				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
120				Dbl_copytoptr(resultp1,resultp2,dstptr);
121				return(NOEXCEPTION);
122			}
123		}
124		else {
125			/*
126		 	 * is NaN; signaling or quiet?
127		 	 */
128			if (Dbl_isone_signaling(opnd1p1)) {
129				/* trap if INVALIDTRAP enabled */
130				if (Is_invalidtrap_enabled())
131			    		return(OPC_2E_INVALIDEXCEPTION);
132				/* make NaN quiet */
133				Set_invalidflag();
134				Dbl_set_quiet(opnd1p1);
135			}
136			/*
137			 * is second operand a signaling NaN?
138			 */
139			else if (Dbl_is_signalingnan(opnd2p1)) {
140				/* trap if INVALIDTRAP enabled */
141				if (Is_invalidtrap_enabled())
142			    		return(OPC_2E_INVALIDEXCEPTION);
143				/* make NaN quiet */
144				Set_invalidflag();
145				Dbl_set_quiet(opnd2p1);
146				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
147				return(NOEXCEPTION);
148			}
149			/*
150			 * is third operand a signaling NaN?
151			 */
152			else if (Dbl_is_signalingnan(opnd3p1)) {
153				/* trap if INVALIDTRAP enabled */
154				if (Is_invalidtrap_enabled())
155			    		return(OPC_2E_INVALIDEXCEPTION);
156				/* make NaN quiet */
157				Set_invalidflag();
158				Dbl_set_quiet(opnd3p1);
159				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
160				return(NOEXCEPTION);
161			}
162			/*
163		 	 * return quiet NaN
164		 	 */
165			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
166			return(NOEXCEPTION);
167		}
168	}
169
170	/*
171	 * check second operand for NaN's or infinity
172	 */
173	if (Dbl_isinfinity_exponent(opnd2p1)) {
174		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
175			if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
176				if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
177					/*
178					 * invalid since multiply operands are
179					 * zero & infinity
180					 */
181					if (Is_invalidtrap_enabled())
182						return(OPC_2E_INVALIDEXCEPTION);
183					Set_invalidflag();
184					Dbl_makequietnan(opnd2p1,opnd2p2);
185					Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
186					return(NOEXCEPTION);
187				}
188
189				/*
190				 * Check third operand for infinity with a
191				 *  sign opposite of the multiply result
192				 */
193				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
194				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
195					/*
196					 * invalid since attempting a magnitude
197					 * subtraction of infinities
198					 */
199					if (Is_invalidtrap_enabled())
200				       		return(OPC_2E_INVALIDEXCEPTION);
201				       	Set_invalidflag();
202				       	Dbl_makequietnan(resultp1,resultp2);
203					Dbl_copytoptr(resultp1,resultp2,dstptr);
204					return(NOEXCEPTION);
205				}
206
207				/*
208				 * return infinity
209				 */
210				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
211				Dbl_copytoptr(resultp1,resultp2,dstptr);
212				return(NOEXCEPTION);
213			}
214		}
215		else {
216			/*
217			 * is NaN; signaling or quiet?
218			 */
219			if (Dbl_isone_signaling(opnd2p1)) {
220				/* trap if INVALIDTRAP enabled */
221				if (Is_invalidtrap_enabled())
222					return(OPC_2E_INVALIDEXCEPTION);
223				/* make NaN quiet */
224				Set_invalidflag();
225				Dbl_set_quiet(opnd2p1);
226			}
227			/*
228			 * is third operand a signaling NaN?
229			 */
230			else if (Dbl_is_signalingnan(opnd3p1)) {
231			       	/* trap if INVALIDTRAP enabled */
232			       	if (Is_invalidtrap_enabled())
233				   		return(OPC_2E_INVALIDEXCEPTION);
234			       	/* make NaN quiet */
235			       	Set_invalidflag();
236			       	Dbl_set_quiet(opnd3p1);
237				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
238		       		return(NOEXCEPTION);
239			}
240			/*
241			 * return quiet NaN
242			 */
243			Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
244			return(NOEXCEPTION);
245		}
246	}
247
248	/*
249	 * check third operand for NaN's or infinity
250	 */
251	if (Dbl_isinfinity_exponent(opnd3p1)) {
252		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
253			/* return infinity */
254			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
255			return(NOEXCEPTION);
256		} else {
257			/*
258			 * is NaN; signaling or quiet?
259			 */
260			if (Dbl_isone_signaling(opnd3p1)) {
261				/* trap if INVALIDTRAP enabled */
262				if (Is_invalidtrap_enabled())
263					return(OPC_2E_INVALIDEXCEPTION);
264				/* make NaN quiet */
265				Set_invalidflag();
266				Dbl_set_quiet(opnd3p1);
267			}
268			/*
269			 * return quiet NaN
270 			 */
271			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
272			return(NOEXCEPTION);
273		}
274    	}
275
276	/*
277	 * Generate multiply mantissa
278	 */
279	if (Dbl_isnotzero_exponent(opnd1p1)) {
280		/* set hidden bit */
281		Dbl_clear_signexponent_set_hidden(opnd1p1);
282	}
283	else {
284		/* check for zero */
285		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
286			/*
287			 * Perform the add opnd3 with zero here.
288			 */
289			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
290				if (Is_rounding_mode(ROUNDMINUS)) {
291					Dbl_or_signs(opnd3p1,resultp1);
292				} else {
293					Dbl_and_signs(opnd3p1,resultp1);
294				}
295			}
296			/*
297			 * Now let's check for trapped underflow case.
298			 */
299			else if (Dbl_iszero_exponent(opnd3p1) &&
300			         Is_underflowtrap_enabled()) {
301                    		/* need to normalize results mantissa */
302                    		sign_save = Dbl_signextendedsign(opnd3p1);
303				result_exponent = 0;
304                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
305                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
306                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
307                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
308							unfl);
309                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
310                    		/* inexact = FALSE */
311                    		return(OPC_2E_UNDERFLOWEXCEPTION);
312			}
313			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
314			return(NOEXCEPTION);
315		}
316		/* is denormalized, adjust exponent */
317		Dbl_clear_signexponent(opnd1p1);
318		Dbl_leftshiftby1(opnd1p1,opnd1p2);
319		Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
320	}
321	/* opnd2 needs to have hidden bit set with msb in hidden bit */
322	if (Dbl_isnotzero_exponent(opnd2p1)) {
323		Dbl_clear_signexponent_set_hidden(opnd2p1);
324	}
325	else {
326		/* check for zero */
327		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
328			/*
329			 * Perform the add opnd3 with zero here.
330			 */
331			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
332				if (Is_rounding_mode(ROUNDMINUS)) {
333					Dbl_or_signs(opnd3p1,resultp1);
334				} else {
335					Dbl_and_signs(opnd3p1,resultp1);
336				}
337			}
338			/*
339			 * Now let's check for trapped underflow case.
340			 */
341			else if (Dbl_iszero_exponent(opnd3p1) &&
342			    Is_underflowtrap_enabled()) {
343                    		/* need to normalize results mantissa */
344                    		sign_save = Dbl_signextendedsign(opnd3p1);
345				result_exponent = 0;
346                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
347                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
348                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
349                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
350							unfl);
351                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
352                    		/* inexact = FALSE */
353				return(OPC_2E_UNDERFLOWEXCEPTION);
354			}
355			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
356			return(NOEXCEPTION);
357		}
358		/* is denormalized; want to normalize */
359		Dbl_clear_signexponent(opnd2p1);
360		Dbl_leftshiftby1(opnd2p1,opnd2p2);
361		Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
362	}
363
364	/* Multiply the first two source mantissas together */
365
366	/*
367	 * The intermediate result will be kept in tmpres,
368	 * which needs enough room for 106 bits of mantissa,
369	 * so lets call it a Double extended.
370	 */
371	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
372
373	/*
374	 * Four bits at a time are inspected in each loop, and a
375	 * simple shift and add multiply algorithm is used.
376	 */
377	for (count = DBL_P-1; count >= 0; count -= 4) {
378		Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
379		if (Dbit28p2(opnd1p2)) {
380	 		/* Fourword_add should be an ADD followed by 3 ADDC's */
381			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
382			 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
383		}
384		if (Dbit29p2(opnd1p2)) {
385			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
386			 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
387		}
388		if (Dbit30p2(opnd1p2)) {
389			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
390			 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
391		}
392		if (Dbit31p2(opnd1p2)) {
393			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
394			 opnd2p1, opnd2p2, 0, 0);
395		}
396		Dbl_rightshiftby4(opnd1p1,opnd1p2);
397	}
398	if (Is_dexthiddenoverflow(tmpresp1)) {
399		/* result mantissa >= 2 (mantissa overflow) */
400		mpy_exponent++;
401		Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
402	}
403
404	/*
405	 * Restore the sign of the mpy result which was saved in resultp1.
406	 * The exponent will continue to be kept in mpy_exponent.
407	 */
408	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
409
410	/*
411	 * No rounding is required, since the result of the multiply
412	 * is exact in the extended format.
413	 */
414
415	/*
416	 * Now we are ready to perform the add portion of the operation.
417	 *
418	 * The exponents need to be kept as integers for now, since the
419	 * multiply result might not fit into the exponent field.  We
420	 * can't overflow or underflow because of this yet, since the
421	 * add could bring the final result back into range.
422	 */
423	add_exponent = Dbl_exponent(opnd3p1);
424
425	/*
426	 * Check for denormalized or zero add operand.
427	 */
428	if (add_exponent == 0) {
429		/* check for zero */
430		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
431			/* right is zero */
432			/* Left can't be zero and must be result.
433			 *
434			 * The final result is now in tmpres and mpy_exponent,
435			 * and needs to be rounded and squeezed back into
436			 * double precision format from double extended.
437			 */
438			result_exponent = mpy_exponent;
439			Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
440				resultp1,resultp2,resultp3,resultp4);
441			sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
442			goto round;
443		}
444
445		/*
446		 * Neither are zeroes.
447		 * Adjust exponent and normalize add operand.
448		 */
449		sign_save = Dbl_signextendedsign(opnd3p1);	/* save sign */
450		Dbl_clear_signexponent(opnd3p1);
451		Dbl_leftshiftby1(opnd3p1,opnd3p2);
452		Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
453		Dbl_set_sign(opnd3p1,sign_save);	/* restore sign */
454	} else {
455		Dbl_clear_exponent_set_hidden(opnd3p1);
456	}
457	/*
458	 * Copy opnd3 to the double extended variable called right.
459	 */
460	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
461
462	/*
463	 * A zero "save" helps discover equal operands (for later),
464	 * and is used in swapping operands (if needed).
465	 */
466	Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
467
468	/*
469	 * Compare magnitude of operands.
470	 */
471	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
472	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
473	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
474	    Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
475		/*
476		 * Set the left operand to the larger one by XOR swap.
477		 * First finish the first word "save".
478		 */
479		Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
480		Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
481		Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
482			rightp2,rightp3,rightp4);
483		/* also setup exponents used in rest of routine */
484		diff_exponent = add_exponent - mpy_exponent;
485		result_exponent = add_exponent;
486	} else {
487		/* also setup exponents used in rest of routine */
488		diff_exponent = mpy_exponent - add_exponent;
489		result_exponent = mpy_exponent;
490	}
491	/* Invariant: left is not smaller than right. */
492
493	/*
494	 * Special case alignment of operands that would force alignment
495	 * beyond the extent of the extension.  A further optimization
496	 * could special case this but only reduces the path length for
497	 * this infrequent case.
498	 */
499	if (diff_exponent > DBLEXT_THRESHOLD) {
500		diff_exponent = DBLEXT_THRESHOLD;
501	}
502
503	/* Align right operand by shifting it to the right */
504	Dblext_clear_sign(rightp1);
505	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
506		/*shifted by*/diff_exponent);
507
508	/* Treat sum and difference of the operands separately. */
509	if ((int)save < 0) {
510		/*
511		 * Difference of the two operands.  Overflow can occur if the
512		 * multiply overflowed.  A borrow can occur out of the hidden
513		 * bit and force a post normalization phase.
514		 */
515		Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
516			rightp1,rightp2,rightp3,rightp4,
517			resultp1,resultp2,resultp3,resultp4);
518		sign_save = Dbl_signextendedsign(resultp1);
519		if (Dbl_iszero_hidden(resultp1)) {
520			/* Handle normalization */
521		/* A straightforward algorithm would now shift the
522		 * result and extension left until the hidden bit
523		 * becomes one.  Not all of the extension bits need
524		 * participate in the shift.  Only the two most
525		 * significant bits (round and guard) are needed.
526		 * If only a single shift is needed then the guard
527		 * bit becomes a significant low order bit and the
528		 * extension must participate in the rounding.
529		 * If more than a single shift is needed, then all
530		 * bits to the right of the guard bit are zeros,
531		 * and the guard bit may or may not be zero. */
532			Dblext_leftshiftby1(resultp1,resultp2,resultp3,
533				resultp4);
534
535			/* Need to check for a zero result.  The sign and
536			 * exponent fields have already been zeroed.  The more
537			 * efficient test of the full object can be used.
538			 */
539			 if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
540				/* Must have been "x-x" or "x+(-x)". */
541				if (Is_rounding_mode(ROUNDMINUS))
542					Dbl_setone_sign(resultp1);
543				Dbl_copytoptr(resultp1,resultp2,dstptr);
544				return(NOEXCEPTION);
545			}
546			result_exponent--;
547
548			/* Look to see if normalization is finished. */
549			if (Dbl_isone_hidden(resultp1)) {
550				/* No further normalization is needed */
551				goto round;
552			}
553
554			/* Discover first one bit to determine shift amount.
555			 * Use a modified binary search.  We have already
556			 * shifted the result one position right and still
557			 * not found a one so the remainder of the extension
558			 * must be zero and simplifies rounding. */
559			/* Scan bytes */
560			while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
561				Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
562				result_exponent -= 8;
563			}
564			/* Now narrow it down to the nibble */
565			if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
566				/* The lower nibble contains the
567				 * normalizing one */
568				Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
569				result_exponent -= 4;
570			}
571			/* Select case where first bit is set (already
572			 * normalized) otherwise select the proper shift. */
573			jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
574			if (jumpsize <= 7) switch(jumpsize) {
575			case 1:
576				Dblext_leftshiftby3(resultp1,resultp2,resultp3,
577					resultp4);
578				result_exponent -= 3;
579				break;
580			case 2:
581			case 3:
582				Dblext_leftshiftby2(resultp1,resultp2,resultp3,
583					resultp4);
584				result_exponent -= 2;
585				break;
586			case 4:
587			case 5:
588			case 6:
589			case 7:
590				Dblext_leftshiftby1(resultp1,resultp2,resultp3,
591					resultp4);
592				result_exponent -= 1;
593				break;
594			}
595		} /* end if (hidden...)... */
596	/* Fall through and round */
597	} /* end if (save < 0)... */
598	else {
599		/* Add magnitudes */
600		Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
601			rightp1,rightp2,rightp3,rightp4,
602			/*to*/resultp1,resultp2,resultp3,resultp4);
603		sign_save = Dbl_signextendedsign(resultp1);
604		if (Dbl_isone_hiddenoverflow(resultp1)) {
605	    		/* Prenormalization required. */
606	    		Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
607				resultp4);
608	    		result_exponent++;
609		} /* end if hiddenoverflow... */
610	} /* end else ...add magnitudes... */
611
612	/* Round the result.  If the extension and lower two words are
613	 * all zeros, then the result is exact.  Otherwise round in the
614	 * correct direction.  Underflow is possible. If a postnormalization
615	 * is necessary, then the mantissa is all zeros so no shift is needed.
616	 */
617  round:
618	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
619		Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
620			result_exponent,is_tiny);
621	}
622	Dbl_set_sign(resultp1,/*using*/sign_save);
623	if (Dblext_isnotzero_mantissap3(resultp3) ||
624	    Dblext_isnotzero_mantissap4(resultp4)) {
625		inexact = TRUE;
626		switch(Rounding_mode()) {
627		case ROUNDNEAREST: /* The default. */
628			if (Dblext_isone_highp3(resultp3)) {
629				/* at least 1/2 ulp */
630				if (Dblext_isnotzero_low31p3(resultp3) ||
631				    Dblext_isnotzero_mantissap4(resultp4) ||
632				    Dblext_isone_lowp2(resultp2)) {
633					/* either exactly half way and odd or
634					 * more than 1/2ulp */
635					Dbl_increment(resultp1,resultp2);
636				}
637			}
638	    		break;
639
640		case ROUNDPLUS:
641	    		if (Dbl_iszero_sign(resultp1)) {
642				/* Round up positive results */
643				Dbl_increment(resultp1,resultp2);
644			}
645			break;
646
647		case ROUNDMINUS:
648	    		if (Dbl_isone_sign(resultp1)) {
649				/* Round down negative results */
650				Dbl_increment(resultp1,resultp2);
651			}
652
653		case ROUNDZERO:;
654			/* truncate is simple */
655		} /* end switch... */
656		if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
657	}
658	if (result_exponent >= DBL_INFINITY_EXPONENT) {
659                /* trap if OVERFLOWTRAP enabled */
660                if (Is_overflowtrap_enabled()) {
661                        /*
662                         * Adjust bias of result
663                         */
664                        Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
665                        Dbl_copytoptr(resultp1,resultp2,dstptr);
666                        if (inexact)
667                            if (Is_inexacttrap_enabled())
668                                return (OPC_2E_OVERFLOWEXCEPTION |
669					OPC_2E_INEXACTEXCEPTION);
670                            else Set_inexactflag();
671                        return (OPC_2E_OVERFLOWEXCEPTION);
672                }
673                inexact = TRUE;
674                Set_overflowflag();
675                /* set result to infinity or largest number */
676                Dbl_setoverflow(resultp1,resultp2);
677
678	} else if (result_exponent <= 0) {	/* underflow case */
679		if (Is_underflowtrap_enabled()) {
680                        /*
681                         * Adjust bias of result
682                         */
683                	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
684			Dbl_copytoptr(resultp1,resultp2,dstptr);
685                        if (inexact)
686                            if (Is_inexacttrap_enabled())
687                                return (OPC_2E_UNDERFLOWEXCEPTION |
688					OPC_2E_INEXACTEXCEPTION);
689                            else Set_inexactflag();
690	    		return(OPC_2E_UNDERFLOWEXCEPTION);
691		}
692		else if (inexact && is_tiny) Set_underflowflag();
693	}
694	else Dbl_set_exponent(resultp1,result_exponent);
695	Dbl_copytoptr(resultp1,resultp2,dstptr);
696	if (inexact)
697		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
698		else Set_inexactflag();
699    	return(NOEXCEPTION);
700}
701
702/*
703 *  Double Floating-point Multiply Negate Fused Add
704 */
705
706dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
707
708dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
709unsigned int *status;
710{
711	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
712	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
713	unsigned int rightp1, rightp2, rightp3, rightp4;
714	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
715	register int mpy_exponent, add_exponent, count;
716	boolean inexact = FALSE, is_tiny = FALSE;
717
718	unsigned int signlessleft1, signlessright1, save;
719	register int result_exponent, diff_exponent;
720	int sign_save, jumpsize;
721
722	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
723	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
724	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
725
726	/*
727	 * set sign bit of result of multiply
728	 */
729	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
730		Dbl_setzerop1(resultp1);
731	else
732		Dbl_setnegativezerop1(resultp1);
733
734	/*
735	 * Generate multiply exponent
736	 */
737	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
738
739	/*
740	 * check first operand for NaN's or infinity
741	 */
742	if (Dbl_isinfinity_exponent(opnd1p1)) {
743		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
744			if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
745			    Dbl_isnotnan(opnd3p1,opnd3p2)) {
746				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
747					/*
748					 * invalid since operands are infinity
749					 * and zero
750					 */
751					if (Is_invalidtrap_enabled())
752						return(OPC_2E_INVALIDEXCEPTION);
753					Set_invalidflag();
754					Dbl_makequietnan(resultp1,resultp2);
755					Dbl_copytoptr(resultp1,resultp2,dstptr);
756					return(NOEXCEPTION);
757				}
758				/*
759				 * Check third operand for infinity with a
760				 *  sign opposite of the multiply result
761				 */
762				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
763				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
764					/*
765					 * invalid since attempting a magnitude
766					 * subtraction of infinities
767					 */
768					if (Is_invalidtrap_enabled())
769						return(OPC_2E_INVALIDEXCEPTION);
770					Set_invalidflag();
771					Dbl_makequietnan(resultp1,resultp2);
772					Dbl_copytoptr(resultp1,resultp2,dstptr);
773					return(NOEXCEPTION);
774				}
775
776				/*
777			 	 * return infinity
778			 	 */
779				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
780				Dbl_copytoptr(resultp1,resultp2,dstptr);
781				return(NOEXCEPTION);
782			}
783		}
784		else {
785			/*
786		 	 * is NaN; signaling or quiet?
787		 	 */
788			if (Dbl_isone_signaling(opnd1p1)) {
789				/* trap if INVALIDTRAP enabled */
790				if (Is_invalidtrap_enabled())
791			    		return(OPC_2E_INVALIDEXCEPTION);
792				/* make NaN quiet */
793				Set_invalidflag();
794				Dbl_set_quiet(opnd1p1);
795			}
796			/*
797			 * is second operand a signaling NaN?
798			 */
799			else if (Dbl_is_signalingnan(opnd2p1)) {
800				/* trap if INVALIDTRAP enabled */
801				if (Is_invalidtrap_enabled())
802			    		return(OPC_2E_INVALIDEXCEPTION);
803				/* make NaN quiet */
804				Set_invalidflag();
805				Dbl_set_quiet(opnd2p1);
806				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
807				return(NOEXCEPTION);
808			}
809			/*
810			 * is third operand a signaling NaN?
811			 */
812			else if (Dbl_is_signalingnan(opnd3p1)) {
813				/* trap if INVALIDTRAP enabled */
814				if (Is_invalidtrap_enabled())
815			    		return(OPC_2E_INVALIDEXCEPTION);
816				/* make NaN quiet */
817				Set_invalidflag();
818				Dbl_set_quiet(opnd3p1);
819				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
820				return(NOEXCEPTION);
821			}
822			/*
823		 	 * return quiet NaN
824		 	 */
825			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
826			return(NOEXCEPTION);
827		}
828	}
829
830	/*
831	 * check second operand for NaN's or infinity
832	 */
833	if (Dbl_isinfinity_exponent(opnd2p1)) {
834		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
835			if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
836				if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
837					/*
838					 * invalid since multiply operands are
839					 * zero & infinity
840					 */
841					if (Is_invalidtrap_enabled())
842						return(OPC_2E_INVALIDEXCEPTION);
843					Set_invalidflag();
844					Dbl_makequietnan(opnd2p1,opnd2p2);
845					Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
846					return(NOEXCEPTION);
847				}
848
849				/*
850				 * Check third operand for infinity with a
851				 *  sign opposite of the multiply result
852				 */
853				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
854				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
855					/*
856					 * invalid since attempting a magnitude
857					 * subtraction of infinities
858					 */
859					if (Is_invalidtrap_enabled())
860				       		return(OPC_2E_INVALIDEXCEPTION);
861				       	Set_invalidflag();
862				       	Dbl_makequietnan(resultp1,resultp2);
863					Dbl_copytoptr(resultp1,resultp2,dstptr);
864					return(NOEXCEPTION);
865				}
866
867				/*
868				 * return infinity
869				 */
870				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
871				Dbl_copytoptr(resultp1,resultp2,dstptr);
872				return(NOEXCEPTION);
873			}
874		}
875		else {
876			/*
877			 * is NaN; signaling or quiet?
878			 */
879			if (Dbl_isone_signaling(opnd2p1)) {
880				/* trap if INVALIDTRAP enabled */
881				if (Is_invalidtrap_enabled())
882					return(OPC_2E_INVALIDEXCEPTION);
883				/* make NaN quiet */
884				Set_invalidflag();
885				Dbl_set_quiet(opnd2p1);
886			}
887			/*
888			 * is third operand a signaling NaN?
889			 */
890			else if (Dbl_is_signalingnan(opnd3p1)) {
891			       	/* trap if INVALIDTRAP enabled */
892			       	if (Is_invalidtrap_enabled())
893				   		return(OPC_2E_INVALIDEXCEPTION);
894			       	/* make NaN quiet */
895			       	Set_invalidflag();
896			       	Dbl_set_quiet(opnd3p1);
897				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
898		       		return(NOEXCEPTION);
899			}
900			/*
901			 * return quiet NaN
902			 */
903			Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
904			return(NOEXCEPTION);
905		}
906	}
907
908	/*
909	 * check third operand for NaN's or infinity
910	 */
911	if (Dbl_isinfinity_exponent(opnd3p1)) {
912		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
913			/* return infinity */
914			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
915			return(NOEXCEPTION);
916		} else {
917			/*
918			 * is NaN; signaling or quiet?
919			 */
920			if (Dbl_isone_signaling(opnd3p1)) {
921				/* trap if INVALIDTRAP enabled */
922				if (Is_invalidtrap_enabled())
923					return(OPC_2E_INVALIDEXCEPTION);
924				/* make NaN quiet */
925				Set_invalidflag();
926				Dbl_set_quiet(opnd3p1);
927			}
928			/*
929			 * return quiet NaN
930 			 */
931			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
932			return(NOEXCEPTION);
933		}
934    	}
935
936	/*
937	 * Generate multiply mantissa
938	 */
939	if (Dbl_isnotzero_exponent(opnd1p1)) {
940		/* set hidden bit */
941		Dbl_clear_signexponent_set_hidden(opnd1p1);
942	}
943	else {
944		/* check for zero */
945		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
946			/*
947			 * Perform the add opnd3 with zero here.
948			 */
949			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
950				if (Is_rounding_mode(ROUNDMINUS)) {
951					Dbl_or_signs(opnd3p1,resultp1);
952				} else {
953					Dbl_and_signs(opnd3p1,resultp1);
954				}
955			}
956			/*
957			 * Now let's check for trapped underflow case.
958			 */
959			else if (Dbl_iszero_exponent(opnd3p1) &&
960			         Is_underflowtrap_enabled()) {
961                    		/* need to normalize results mantissa */
962                    		sign_save = Dbl_signextendedsign(opnd3p1);
963				result_exponent = 0;
964                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
965                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
966                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
967                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
968							unfl);
969                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
970                    		/* inexact = FALSE */
971                    		return(OPC_2E_UNDERFLOWEXCEPTION);
972			}
973			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
974			return(NOEXCEPTION);
975		}
976		/* is denormalized, adjust exponent */
977		Dbl_clear_signexponent(opnd1p1);
978		Dbl_leftshiftby1(opnd1p1,opnd1p2);
979		Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
980	}
981	/* opnd2 needs to have hidden bit set with msb in hidden bit */
982	if (Dbl_isnotzero_exponent(opnd2p1)) {
983		Dbl_clear_signexponent_set_hidden(opnd2p1);
984	}
985	else {
986		/* check for zero */
987		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
988			/*
989			 * Perform the add opnd3 with zero here.
990			 */
991			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
992				if (Is_rounding_mode(ROUNDMINUS)) {
993					Dbl_or_signs(opnd3p1,resultp1);
994				} else {
995					Dbl_and_signs(opnd3p1,resultp1);
996				}
997			}
998			/*
999			 * Now let's check for trapped underflow case.
1000			 */
1001			else if (Dbl_iszero_exponent(opnd3p1) &&
1002			    Is_underflowtrap_enabled()) {
1003                    		/* need to normalize results mantissa */
1004                    		sign_save = Dbl_signextendedsign(opnd3p1);
1005				result_exponent = 0;
1006                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
1007                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1008                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
1009                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1010							unfl);
1011                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1012                    		/* inexact = FALSE */
1013                    		return(OPC_2E_UNDERFLOWEXCEPTION);
1014			}
1015			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1016			return(NOEXCEPTION);
1017		}
1018		/* is denormalized; want to normalize */
1019		Dbl_clear_signexponent(opnd2p1);
1020		Dbl_leftshiftby1(opnd2p1,opnd2p2);
1021		Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1022	}
1023
1024	/* Multiply the first two source mantissas together */
1025
1026	/*
1027	 * The intermediate result will be kept in tmpres,
1028	 * which needs enough room for 106 bits of mantissa,
1029	 * so lets call it a Double extended.
1030	 */
1031	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1032
1033	/*
1034	 * Four bits at a time are inspected in each loop, and a
1035	 * simple shift and add multiply algorithm is used.
1036	 */
1037	for (count = DBL_P-1; count >= 0; count -= 4) {
1038		Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1039		if (Dbit28p2(opnd1p2)) {
1040	 		/* Fourword_add should be an ADD followed by 3 ADDC's */
1041			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1042			 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1043		}
1044		if (Dbit29p2(opnd1p2)) {
1045			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1046			 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1047		}
1048		if (Dbit30p2(opnd1p2)) {
1049			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1050			 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1051		}
1052		if (Dbit31p2(opnd1p2)) {
1053			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1054			 opnd2p1, opnd2p2, 0, 0);
1055		}
1056		Dbl_rightshiftby4(opnd1p1,opnd1p2);
1057	}
1058	if (Is_dexthiddenoverflow(tmpresp1)) {
1059		/* result mantissa >= 2 (mantissa overflow) */
1060		mpy_exponent++;
1061		Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1062	}
1063
1064	/*
1065	 * Restore the sign of the mpy result which was saved in resultp1.
1066	 * The exponent will continue to be kept in mpy_exponent.
1067	 */
1068	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1069
1070	/*
1071	 * No rounding is required, since the result of the multiply
1072	 * is exact in the extended format.
1073	 */
1074
1075	/*
1076	 * Now we are ready to perform the add portion of the operation.
1077	 *
1078	 * The exponents need to be kept as integers for now, since the
1079	 * multiply result might not fit into the exponent field.  We
1080	 * can't overflow or underflow because of this yet, since the
1081	 * add could bring the final result back into range.
1082	 */
1083	add_exponent = Dbl_exponent(opnd3p1);
1084
1085	/*
1086	 * Check for denormalized or zero add operand.
1087	 */
1088	if (add_exponent == 0) {
1089		/* check for zero */
1090		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1091			/* right is zero */
1092			/* Left can't be zero and must be result.
1093			 *
1094			 * The final result is now in tmpres and mpy_exponent,
1095			 * and needs to be rounded and squeezed back into
1096			 * double precision format from double extended.
1097			 */
1098			result_exponent = mpy_exponent;
1099			Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1100				resultp1,resultp2,resultp3,resultp4);
1101			sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1102			goto round;
1103		}
1104
1105		/*
1106		 * Neither are zeroes.
1107		 * Adjust exponent and normalize add operand.
1108		 */
1109		sign_save = Dbl_signextendedsign(opnd3p1);	/* save sign */
1110		Dbl_clear_signexponent(opnd3p1);
1111		Dbl_leftshiftby1(opnd3p1,opnd3p2);
1112		Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1113		Dbl_set_sign(opnd3p1,sign_save);	/* restore sign */
1114	} else {
1115		Dbl_clear_exponent_set_hidden(opnd3p1);
1116	}
1117	/*
1118	 * Copy opnd3 to the double extended variable called right.
1119	 */
1120	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1121
1122	/*
1123	 * A zero "save" helps discover equal operands (for later),
1124	 * and is used in swapping operands (if needed).
1125	 */
1126	Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1127
1128	/*
1129	 * Compare magnitude of operands.
1130	 */
1131	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1132	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1133	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1134	    Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1135		/*
1136		 * Set the left operand to the larger one by XOR swap.
1137		 * First finish the first word "save".
1138		 */
1139		Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1140		Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1141		Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1142			rightp2,rightp3,rightp4);
1143		/* also setup exponents used in rest of routine */
1144		diff_exponent = add_exponent - mpy_exponent;
1145		result_exponent = add_exponent;
1146	} else {
1147		/* also setup exponents used in rest of routine */
1148		diff_exponent = mpy_exponent - add_exponent;
1149		result_exponent = mpy_exponent;
1150	}
1151	/* Invariant: left is not smaller than right. */
1152
1153	/*
1154	 * Special case alignment of operands that would force alignment
1155	 * beyond the extent of the extension.  A further optimization
1156	 * could special case this but only reduces the path length for
1157	 * this infrequent case.
1158	 */
1159	if (diff_exponent > DBLEXT_THRESHOLD) {
1160		diff_exponent = DBLEXT_THRESHOLD;
1161	}
1162
1163	/* Align right operand by shifting it to the right */
1164	Dblext_clear_sign(rightp1);
1165	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1166		/*shifted by*/diff_exponent);
1167
1168	/* Treat sum and difference of the operands separately. */
1169	if ((int)save < 0) {
1170		/*
1171		 * Difference of the two operands.  Overflow can occur if the
1172		 * multiply overflowed.  A borrow can occur out of the hidden
1173		 * bit and force a post normalization phase.
1174		 */
1175		Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1176			rightp1,rightp2,rightp3,rightp4,
1177			resultp1,resultp2,resultp3,resultp4);
1178		sign_save = Dbl_signextendedsign(resultp1);
1179		if (Dbl_iszero_hidden(resultp1)) {
1180			/* Handle normalization */
1181		/* A straightforward algorithm would now shift the
1182		 * result and extension left until the hidden bit
1183		 * becomes one.  Not all of the extension bits need
1184		 * participate in the shift.  Only the two most
1185		 * significant bits (round and guard) are needed.
1186		 * If only a single shift is needed then the guard
1187		 * bit becomes a significant low order bit and the
1188		 * extension must participate in the rounding.
1189		 * If more than a single shift is needed, then all
1190		 * bits to the right of the guard bit are zeros,
1191		 * and the guard bit may or may not be zero. */
1192			Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1193				resultp4);
1194
1195			/* Need to check for a zero result.  The sign and
1196			 * exponent fields have already been zeroed.  The more
1197			 * efficient test of the full object can be used.
1198			 */
1199			 if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1200				/* Must have been "x-x" or "x+(-x)". */
1201				if (Is_rounding_mode(ROUNDMINUS))
1202					Dbl_setone_sign(resultp1);
1203				Dbl_copytoptr(resultp1,resultp2,dstptr);
1204				return(NOEXCEPTION);
1205			}
1206			result_exponent--;
1207
1208			/* Look to see if normalization is finished. */
1209			if (Dbl_isone_hidden(resultp1)) {
1210				/* No further normalization is needed */
1211				goto round;
1212			}
1213
1214			/* Discover first one bit to determine shift amount.
1215			 * Use a modified binary search.  We have already
1216			 * shifted the result one position right and still
1217			 * not found a one so the remainder of the extension
1218			 * must be zero and simplifies rounding. */
1219			/* Scan bytes */
1220			while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1221				Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1222				result_exponent -= 8;
1223			}
1224			/* Now narrow it down to the nibble */
1225			if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1226				/* The lower nibble contains the
1227				 * normalizing one */
1228				Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1229				result_exponent -= 4;
1230			}
1231			/* Select case where first bit is set (already
1232			 * normalized) otherwise select the proper shift. */
1233			jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1234			if (jumpsize <= 7) switch(jumpsize) {
1235			case 1:
1236				Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1237					resultp4);
1238				result_exponent -= 3;
1239				break;
1240			case 2:
1241			case 3:
1242				Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1243					resultp4);
1244				result_exponent -= 2;
1245				break;
1246			case 4:
1247			case 5:
1248			case 6:
1249			case 7:
1250				Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1251					resultp4);
1252				result_exponent -= 1;
1253				break;
1254			}
1255		} /* end if (hidden...)... */
1256	/* Fall through and round */
1257	} /* end if (save < 0)... */
1258	else {
1259		/* Add magnitudes */
1260		Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1261			rightp1,rightp2,rightp3,rightp4,
1262			/*to*/resultp1,resultp2,resultp3,resultp4);
1263		sign_save = Dbl_signextendedsign(resultp1);
1264		if (Dbl_isone_hiddenoverflow(resultp1)) {
1265	    		/* Prenormalization required. */
1266	    		Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1267				resultp4);
1268	    		result_exponent++;
1269		} /* end if hiddenoverflow... */
1270	} /* end else ...add magnitudes... */
1271
1272	/* Round the result.  If the extension and lower two words are
1273	 * all zeros, then the result is exact.  Otherwise round in the
1274	 * correct direction.  Underflow is possible. If a postnormalization
1275	 * is necessary, then the mantissa is all zeros so no shift is needed.
1276	 */
1277  round:
1278	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1279		Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1280			result_exponent,is_tiny);
1281	}
1282	Dbl_set_sign(resultp1,/*using*/sign_save);
1283	if (Dblext_isnotzero_mantissap3(resultp3) ||
1284	    Dblext_isnotzero_mantissap4(resultp4)) {
1285		inexact = TRUE;
1286		switch(Rounding_mode()) {
1287		case ROUNDNEAREST: /* The default. */
1288			if (Dblext_isone_highp3(resultp3)) {
1289				/* at least 1/2 ulp */
1290				if (Dblext_isnotzero_low31p3(resultp3) ||
1291				    Dblext_isnotzero_mantissap4(resultp4) ||
1292				    Dblext_isone_lowp2(resultp2)) {
1293					/* either exactly half way and odd or
1294					 * more than 1/2ulp */
1295					Dbl_increment(resultp1,resultp2);
1296				}
1297			}
1298	    		break;
1299
1300		case ROUNDPLUS:
1301	    		if (Dbl_iszero_sign(resultp1)) {
1302				/* Round up positive results */
1303				Dbl_increment(resultp1,resultp2);
1304			}
1305			break;
1306
1307		case ROUNDMINUS:
1308	    		if (Dbl_isone_sign(resultp1)) {
1309				/* Round down negative results */
1310				Dbl_increment(resultp1,resultp2);
1311			}
1312
1313		case ROUNDZERO:;
1314			/* truncate is simple */
1315		} /* end switch... */
1316		if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1317	}
1318	if (result_exponent >= DBL_INFINITY_EXPONENT) {
1319		/* Overflow */
1320		if (Is_overflowtrap_enabled()) {
1321                        /*
1322                         * Adjust bias of result
1323                         */
1324                        Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1325                        Dbl_copytoptr(resultp1,resultp2,dstptr);
1326                        if (inexact)
1327                            if (Is_inexacttrap_enabled())
1328                                return (OPC_2E_OVERFLOWEXCEPTION |
1329					OPC_2E_INEXACTEXCEPTION);
1330                            else Set_inexactflag();
1331                        return (OPC_2E_OVERFLOWEXCEPTION);
1332		}
1333		inexact = TRUE;
1334		Set_overflowflag();
1335		Dbl_setoverflow(resultp1,resultp2);
1336	} else if (result_exponent <= 0) {	/* underflow case */
1337		if (Is_underflowtrap_enabled()) {
1338                        /*
1339                         * Adjust bias of result
1340                         */
1341                	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1342			Dbl_copytoptr(resultp1,resultp2,dstptr);
1343                        if (inexact)
1344                            if (Is_inexacttrap_enabled())
1345                                return (OPC_2E_UNDERFLOWEXCEPTION |
1346					OPC_2E_INEXACTEXCEPTION);
1347                            else Set_inexactflag();
1348	    		return(OPC_2E_UNDERFLOWEXCEPTION);
1349		}
1350		else if (inexact && is_tiny) Set_underflowflag();
1351	}
1352	else Dbl_set_exponent(resultp1,result_exponent);
1353	Dbl_copytoptr(resultp1,resultp2,dstptr);
1354	if (inexact)
1355		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1356		else Set_inexactflag();
1357    	return(NOEXCEPTION);
1358}
1359
1360/*
1361 *  Single Floating-point Multiply Fused Add
1362 */
1363
1364sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1365
1366sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1367unsigned int *status;
1368{
1369	unsigned int opnd1, opnd2, opnd3;
1370	register unsigned int tmpresp1, tmpresp2;
1371	unsigned int rightp1, rightp2;
1372	unsigned int resultp1, resultp2 = 0;
1373	register int mpy_exponent, add_exponent, count;
1374	boolean inexact = FALSE, is_tiny = FALSE;
1375
1376	unsigned int signlessleft1, signlessright1, save;
1377	register int result_exponent, diff_exponent;
1378	int sign_save, jumpsize;
1379
1380	Sgl_copyfromptr(src1ptr,opnd1);
1381	Sgl_copyfromptr(src2ptr,opnd2);
1382	Sgl_copyfromptr(src3ptr,opnd3);
1383
1384	/*
1385	 * set sign bit of result of multiply
1386	 */
1387	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
1388		Sgl_setnegativezero(resultp1);
1389	else Sgl_setzero(resultp1);
1390
1391	/*
1392	 * Generate multiply exponent
1393	 */
1394	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1395
1396	/*
1397	 * check first operand for NaN's or infinity
1398	 */
1399	if (Sgl_isinfinity_exponent(opnd1)) {
1400		if (Sgl_iszero_mantissa(opnd1)) {
1401			if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1402				if (Sgl_iszero_exponentmantissa(opnd2)) {
1403					/*
1404					 * invalid since operands are infinity
1405					 * and zero
1406					 */
1407					if (Is_invalidtrap_enabled())
1408						return(OPC_2E_INVALIDEXCEPTION);
1409					Set_invalidflag();
1410					Sgl_makequietnan(resultp1);
1411					Sgl_copytoptr(resultp1,dstptr);
1412					return(NOEXCEPTION);
1413				}
1414				/*
1415				 * Check third operand for infinity with a
1416				 *  sign opposite of the multiply result
1417				 */
1418				if (Sgl_isinfinity(opnd3) &&
1419				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1420					/*
1421					 * invalid since attempting a magnitude
1422					 * subtraction of infinities
1423					 */
1424					if (Is_invalidtrap_enabled())
1425						return(OPC_2E_INVALIDEXCEPTION);
1426					Set_invalidflag();
1427					Sgl_makequietnan(resultp1);
1428					Sgl_copytoptr(resultp1,dstptr);
1429					return(NOEXCEPTION);
1430				}
1431
1432				/*
1433			 	 * return infinity
1434			 	 */
1435				Sgl_setinfinity_exponentmantissa(resultp1);
1436				Sgl_copytoptr(resultp1,dstptr);
1437				return(NOEXCEPTION);
1438			}
1439		}
1440		else {
1441			/*
1442		 	 * is NaN; signaling or quiet?
1443		 	 */
1444			if (Sgl_isone_signaling(opnd1)) {
1445				/* trap if INVALIDTRAP enabled */
1446				if (Is_invalidtrap_enabled())
1447			    		return(OPC_2E_INVALIDEXCEPTION);
1448				/* make NaN quiet */
1449				Set_invalidflag();
1450				Sgl_set_quiet(opnd1);
1451			}
1452			/*
1453			 * is second operand a signaling NaN?
1454			 */
1455			else if (Sgl_is_signalingnan(opnd2)) {
1456				/* trap if INVALIDTRAP enabled */
1457				if (Is_invalidtrap_enabled())
1458			    		return(OPC_2E_INVALIDEXCEPTION);
1459				/* make NaN quiet */
1460				Set_invalidflag();
1461				Sgl_set_quiet(opnd2);
1462				Sgl_copytoptr(opnd2,dstptr);
1463				return(NOEXCEPTION);
1464			}
1465			/*
1466			 * is third operand a signaling NaN?
1467			 */
1468			else if (Sgl_is_signalingnan(opnd3)) {
1469				/* trap if INVALIDTRAP enabled */
1470				if (Is_invalidtrap_enabled())
1471			    		return(OPC_2E_INVALIDEXCEPTION);
1472				/* make NaN quiet */
1473				Set_invalidflag();
1474				Sgl_set_quiet(opnd3);
1475				Sgl_copytoptr(opnd3,dstptr);
1476				return(NOEXCEPTION);
1477			}
1478			/*
1479		 	 * return quiet NaN
1480		 	 */
1481			Sgl_copytoptr(opnd1,dstptr);
1482			return(NOEXCEPTION);
1483		}
1484	}
1485
1486	/*
1487	 * check second operand for NaN's or infinity
1488	 */
1489	if (Sgl_isinfinity_exponent(opnd2)) {
1490		if (Sgl_iszero_mantissa(opnd2)) {
1491			if (Sgl_isnotnan(opnd3)) {
1492				if (Sgl_iszero_exponentmantissa(opnd1)) {
1493					/*
1494					 * invalid since multiply operands are
1495					 * zero & infinity
1496					 */
1497					if (Is_invalidtrap_enabled())
1498						return(OPC_2E_INVALIDEXCEPTION);
1499					Set_invalidflag();
1500					Sgl_makequietnan(opnd2);
1501					Sgl_copytoptr(opnd2,dstptr);
1502					return(NOEXCEPTION);
1503				}
1504
1505				/*
1506				 * Check third operand for infinity with a
1507				 *  sign opposite of the multiply result
1508				 */
1509				if (Sgl_isinfinity(opnd3) &&
1510				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1511					/*
1512					 * invalid since attempting a magnitude
1513					 * subtraction of infinities
1514					 */
1515					if (Is_invalidtrap_enabled())
1516				       		return(OPC_2E_INVALIDEXCEPTION);
1517				       	Set_invalidflag();
1518				       	Sgl_makequietnan(resultp1);
1519					Sgl_copytoptr(resultp1,dstptr);
1520					return(NOEXCEPTION);
1521				}
1522
1523				/*
1524				 * return infinity
1525				 */
1526				Sgl_setinfinity_exponentmantissa(resultp1);
1527				Sgl_copytoptr(resultp1,dstptr);
1528				return(NOEXCEPTION);
1529			}
1530		}
1531		else {
1532			/*
1533			 * is NaN; signaling or quiet?
1534			 */
1535			if (Sgl_isone_signaling(opnd2)) {
1536				/* trap if INVALIDTRAP enabled */
1537				if (Is_invalidtrap_enabled())
1538					return(OPC_2E_INVALIDEXCEPTION);
1539				/* make NaN quiet */
1540				Set_invalidflag();
1541				Sgl_set_quiet(opnd2);
1542			}
1543			/*
1544			 * is third operand a signaling NaN?
1545			 */
1546			else if (Sgl_is_signalingnan(opnd3)) {
1547			       	/* trap if INVALIDTRAP enabled */
1548			       	if (Is_invalidtrap_enabled())
1549				   		return(OPC_2E_INVALIDEXCEPTION);
1550			       	/* make NaN quiet */
1551			       	Set_invalidflag();
1552			       	Sgl_set_quiet(opnd3);
1553				Sgl_copytoptr(opnd3,dstptr);
1554		       		return(NOEXCEPTION);
1555			}
1556			/*
1557			 * return quiet NaN
1558			 */
1559			Sgl_copytoptr(opnd2,dstptr);
1560			return(NOEXCEPTION);
1561		}
1562	}
1563
1564	/*
1565	 * check third operand for NaN's or infinity
1566	 */
1567	if (Sgl_isinfinity_exponent(opnd3)) {
1568		if (Sgl_iszero_mantissa(opnd3)) {
1569			/* return infinity */
1570			Sgl_copytoptr(opnd3,dstptr);
1571			return(NOEXCEPTION);
1572		} else {
1573			/*
1574			 * is NaN; signaling or quiet?
1575			 */
1576			if (Sgl_isone_signaling(opnd3)) {
1577				/* trap if INVALIDTRAP enabled */
1578				if (Is_invalidtrap_enabled())
1579					return(OPC_2E_INVALIDEXCEPTION);
1580				/* make NaN quiet */
1581				Set_invalidflag();
1582				Sgl_set_quiet(opnd3);
1583			}
1584			/*
1585			 * return quiet NaN
1586 			 */
1587			Sgl_copytoptr(opnd3,dstptr);
1588			return(NOEXCEPTION);
1589		}
1590    	}
1591
1592	/*
1593	 * Generate multiply mantissa
1594	 */
1595	if (Sgl_isnotzero_exponent(opnd1)) {
1596		/* set hidden bit */
1597		Sgl_clear_signexponent_set_hidden(opnd1);
1598	}
1599	else {
1600		/* check for zero */
1601		if (Sgl_iszero_mantissa(opnd1)) {
1602			/*
1603			 * Perform the add opnd3 with zero here.
1604			 */
1605			if (Sgl_iszero_exponentmantissa(opnd3)) {
1606				if (Is_rounding_mode(ROUNDMINUS)) {
1607					Sgl_or_signs(opnd3,resultp1);
1608				} else {
1609					Sgl_and_signs(opnd3,resultp1);
1610				}
1611			}
1612			/*
1613			 * Now let's check for trapped underflow case.
1614			 */
1615			else if (Sgl_iszero_exponent(opnd3) &&
1616			         Is_underflowtrap_enabled()) {
1617                    		/* need to normalize results mantissa */
1618                    		sign_save = Sgl_signextendedsign(opnd3);
1619				result_exponent = 0;
1620                    		Sgl_leftshiftby1(opnd3);
1621                    		Sgl_normalize(opnd3,result_exponent);
1622                    		Sgl_set_sign(opnd3,/*using*/sign_save);
1623                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
1624							unfl);
1625                    		Sgl_copytoptr(opnd3,dstptr);
1626                    		/* inexact = FALSE */
1627                    		return(OPC_2E_UNDERFLOWEXCEPTION);
1628			}
1629			Sgl_copytoptr(opnd3,dstptr);
1630			return(NOEXCEPTION);
1631		}
1632		/* is denormalized, adjust exponent */
1633		Sgl_clear_signexponent(opnd1);
1634		Sgl_leftshiftby1(opnd1);
1635		Sgl_normalize(opnd1,mpy_exponent);
1636	}
1637	/* opnd2 needs to have hidden bit set with msb in hidden bit */
1638	if (Sgl_isnotzero_exponent(opnd2)) {
1639		Sgl_clear_signexponent_set_hidden(opnd2);
1640	}
1641	else {
1642		/* check for zero */
1643		if (Sgl_iszero_mantissa(opnd2)) {
1644			/*
1645			 * Perform the add opnd3 with zero here.
1646			 */
1647			if (Sgl_iszero_exponentmantissa(opnd3)) {
1648				if (Is_rounding_mode(ROUNDMINUS)) {
1649					Sgl_or_signs(opnd3,resultp1);
1650				} else {
1651					Sgl_and_signs(opnd3,resultp1);
1652				}
1653			}
1654			/*
1655			 * Now let's check for trapped underflow case.
1656			 */
1657			else if (Sgl_iszero_exponent(opnd3) &&
1658			    Is_underflowtrap_enabled()) {
1659                    		/* need to normalize results mantissa */
1660                    		sign_save = Sgl_signextendedsign(opnd3);
1661				result_exponent = 0;
1662                    		Sgl_leftshiftby1(opnd3);
1663                    		Sgl_normalize(opnd3,result_exponent);
1664                    		Sgl_set_sign(opnd3,/*using*/sign_save);
1665                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
1666							unfl);
1667                    		Sgl_copytoptr(opnd3,dstptr);
1668                    		/* inexact = FALSE */
1669                    		return(OPC_2E_UNDERFLOWEXCEPTION);
1670			}
1671			Sgl_copytoptr(opnd3,dstptr);
1672			return(NOEXCEPTION);
1673		}
1674		/* is denormalized; want to normalize */
1675		Sgl_clear_signexponent(opnd2);
1676		Sgl_leftshiftby1(opnd2);
1677		Sgl_normalize(opnd2,mpy_exponent);
1678	}
1679
1680	/* Multiply the first two source mantissas together */
1681
1682	/*
1683	 * The intermediate result will be kept in tmpres,
1684	 * which needs enough room for 106 bits of mantissa,
1685	 * so lets call it a Double extended.
1686	 */
1687	Sglext_setzero(tmpresp1,tmpresp2);
1688
1689	/*
1690	 * Four bits at a time are inspected in each loop, and a
1691	 * simple shift and add multiply algorithm is used.
1692	 */
1693	for (count = SGL_P-1; count >= 0; count -= 4) {
1694		Sglext_rightshiftby4(tmpresp1,tmpresp2);
1695		if (Sbit28(opnd1)) {
1696	 		/* Twoword_add should be an ADD followed by 2 ADDC's */
1697			Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1698		}
1699		if (Sbit29(opnd1)) {
1700			Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1701		}
1702		if (Sbit30(opnd1)) {
1703			Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1704		}
1705		if (Sbit31(opnd1)) {
1706			Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1707		}
1708		Sgl_rightshiftby4(opnd1);
1709	}
1710	if (Is_sexthiddenoverflow(tmpresp1)) {
1711		/* result mantissa >= 2 (mantissa overflow) */
1712		mpy_exponent++;
1713		Sglext_rightshiftby4(tmpresp1,tmpresp2);
1714	} else {
1715		Sglext_rightshiftby3(tmpresp1,tmpresp2);
1716	}
1717
1718	/*
1719	 * Restore the sign of the mpy result which was saved in resultp1.
1720	 * The exponent will continue to be kept in mpy_exponent.
1721	 */
1722	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1723
1724	/*
1725	 * No rounding is required, since the result of the multiply
1726	 * is exact in the extended format.
1727	 */
1728
1729	/*
1730	 * Now we are ready to perform the add portion of the operation.
1731	 *
1732	 * The exponents need to be kept as integers for now, since the
1733	 * multiply result might not fit into the exponent field.  We
1734	 * can't overflow or underflow because of this yet, since the
1735	 * add could bring the final result back into range.
1736	 */
1737	add_exponent = Sgl_exponent(opnd3);
1738
1739	/*
1740	 * Check for denormalized or zero add operand.
1741	 */
1742	if (add_exponent == 0) {
1743		/* check for zero */
1744		if (Sgl_iszero_mantissa(opnd3)) {
1745			/* right is zero */
1746			/* Left can't be zero and must be result.
1747			 *
1748			 * The final result is now in tmpres and mpy_exponent,
1749			 * and needs to be rounded and squeezed back into
1750			 * double precision format from double extended.
1751			 */
1752			result_exponent = mpy_exponent;
1753			Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1754			sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1755			goto round;
1756		}
1757
1758		/*
1759		 * Neither are zeroes.
1760		 * Adjust exponent and normalize add operand.
1761		 */
1762		sign_save = Sgl_signextendedsign(opnd3);	/* save sign */
1763		Sgl_clear_signexponent(opnd3);
1764		Sgl_leftshiftby1(opnd3);
1765		Sgl_normalize(opnd3,add_exponent);
1766		Sgl_set_sign(opnd3,sign_save);		/* restore sign */
1767	} else {
1768		Sgl_clear_exponent_set_hidden(opnd3);
1769	}
1770	/*
1771	 * Copy opnd3 to the double extended variable called right.
1772	 */
1773	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1774
1775	/*
1776	 * A zero "save" helps discover equal operands (for later),
1777	 * and is used in swapping operands (if needed).
1778	 */
1779	Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1780
1781	/*
1782	 * Compare magnitude of operands.
1783	 */
1784	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1785	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1786	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1787	    Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1788		/*
1789		 * Set the left operand to the larger one by XOR swap.
1790		 * First finish the first word "save".
1791		 */
1792		Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1793		Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1794		Sglext_swap_lower(tmpresp2,rightp2);
1795		/* also setup exponents used in rest of routine */
1796		diff_exponent = add_exponent - mpy_exponent;
1797		result_exponent = add_exponent;
1798	} else {
1799		/* also setup exponents used in rest of routine */
1800		diff_exponent = mpy_exponent - add_exponent;
1801		result_exponent = mpy_exponent;
1802	}
1803	/* Invariant: left is not smaller than right. */
1804
1805	/*
1806	 * Special case alignment of operands that would force alignment
1807	 * beyond the extent of the extension.  A further optimization
1808	 * could special case this but only reduces the path length for
1809	 * this infrequent case.
1810	 */
1811	if (diff_exponent > SGLEXT_THRESHOLD) {
1812		diff_exponent = SGLEXT_THRESHOLD;
1813	}
1814
1815	/* Align right operand by shifting it to the right */
1816	Sglext_clear_sign(rightp1);
1817	Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1818
1819	/* Treat sum and difference of the operands separately. */
1820	if ((int)save < 0) {
1821		/*
1822		 * Difference of the two operands.  Overflow can occur if the
1823		 * multiply overflowed.  A borrow can occur out of the hidden
1824		 * bit and force a post normalization phase.
1825		 */
1826		Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1827			resultp1,resultp2);
1828		sign_save = Sgl_signextendedsign(resultp1);
1829		if (Sgl_iszero_hidden(resultp1)) {
1830			/* Handle normalization */
1831		/* A straightforward algorithm would now shift the
1832		 * result and extension left until the hidden bit
1833		 * becomes one.  Not all of the extension bits need
1834		 * participate in the shift.  Only the two most
1835		 * significant bits (round and guard) are needed.
1836		 * If only a single shift is needed then the guard
1837		 * bit becomes a significant low order bit and the
1838		 * extension must participate in the rounding.
1839		 * If more than a single shift is needed, then all
1840		 * bits to the right of the guard bit are zeros,
1841		 * and the guard bit may or may not be zero. */
1842			Sglext_leftshiftby1(resultp1,resultp2);
1843
1844			/* Need to check for a zero result.  The sign and
1845			 * exponent fields have already been zeroed.  The more
1846			 * efficient test of the full object can be used.
1847			 */
1848			 if (Sglext_iszero(resultp1,resultp2)) {
1849				/* Must have been "x-x" or "x+(-x)". */
1850				if (Is_rounding_mode(ROUNDMINUS))
1851					Sgl_setone_sign(resultp1);
1852				Sgl_copytoptr(resultp1,dstptr);
1853				return(NOEXCEPTION);
1854			}
1855			result_exponent--;
1856
1857			/* Look to see if normalization is finished. */
1858			if (Sgl_isone_hidden(resultp1)) {
1859				/* No further normalization is needed */
1860				goto round;
1861			}
1862
1863			/* Discover first one bit to determine shift amount.
1864			 * Use a modified binary search.  We have already
1865			 * shifted the result one position right and still
1866			 * not found a one so the remainder of the extension
1867			 * must be zero and simplifies rounding. */
1868			/* Scan bytes */
1869			while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1870				Sglext_leftshiftby8(resultp1,resultp2);
1871				result_exponent -= 8;
1872			}
1873			/* Now narrow it down to the nibble */
1874			if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1875				/* The lower nibble contains the
1876				 * normalizing one */
1877				Sglext_leftshiftby4(resultp1,resultp2);
1878				result_exponent -= 4;
1879			}
1880			/* Select case where first bit is set (already
1881			 * normalized) otherwise select the proper shift. */
1882			jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1883			if (jumpsize <= 7) switch(jumpsize) {
1884			case 1:
1885				Sglext_leftshiftby3(resultp1,resultp2);
1886				result_exponent -= 3;
1887				break;
1888			case 2:
1889			case 3:
1890				Sglext_leftshiftby2(resultp1,resultp2);
1891				result_exponent -= 2;
1892				break;
1893			case 4:
1894			case 5:
1895			case 6:
1896			case 7:
1897				Sglext_leftshiftby1(resultp1,resultp2);
1898				result_exponent -= 1;
1899				break;
1900			}
1901		} /* end if (hidden...)... */
1902	/* Fall through and round */
1903	} /* end if (save < 0)... */
1904	else {
1905		/* Add magnitudes */
1906		Sglext_addition(tmpresp1,tmpresp2,
1907			rightp1,rightp2, /*to*/resultp1,resultp2);
1908		sign_save = Sgl_signextendedsign(resultp1);
1909		if (Sgl_isone_hiddenoverflow(resultp1)) {
1910	    		/* Prenormalization required. */
1911	    		Sglext_arithrightshiftby1(resultp1,resultp2);
1912	    		result_exponent++;
1913		} /* end if hiddenoverflow... */
1914	} /* end else ...add magnitudes... */
1915
1916	/* Round the result.  If the extension and lower two words are
1917	 * all zeros, then the result is exact.  Otherwise round in the
1918	 * correct direction.  Underflow is possible. If a postnormalization
1919	 * is necessary, then the mantissa is all zeros so no shift is needed.
1920	 */
1921  round:
1922	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1923		Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1924	}
1925	Sgl_set_sign(resultp1,/*using*/sign_save);
1926	if (Sglext_isnotzero_mantissap2(resultp2)) {
1927		inexact = TRUE;
1928		switch(Rounding_mode()) {
1929		case ROUNDNEAREST: /* The default. */
1930			if (Sglext_isone_highp2(resultp2)) {
1931				/* at least 1/2 ulp */
1932				if (Sglext_isnotzero_low31p2(resultp2) ||
1933				    Sglext_isone_lowp1(resultp1)) {
1934					/* either exactly half way and odd or
1935					 * more than 1/2ulp */
1936					Sgl_increment(resultp1);
1937				}
1938			}
1939	    		break;
1940
1941		case ROUNDPLUS:
1942	    		if (Sgl_iszero_sign(resultp1)) {
1943				/* Round up positive results */
1944				Sgl_increment(resultp1);
1945			}
1946			break;
1947
1948		case ROUNDMINUS:
1949	    		if (Sgl_isone_sign(resultp1)) {
1950				/* Round down negative results */
1951				Sgl_increment(resultp1);
1952			}
1953
1954		case ROUNDZERO:;
1955			/* truncate is simple */
1956		} /* end switch... */
1957		if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1958	}
1959	if (result_exponent >= SGL_INFINITY_EXPONENT) {
1960		/* Overflow */
1961		if (Is_overflowtrap_enabled()) {
1962                        /*
1963                         * Adjust bias of result
1964                         */
1965                        Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1966                        Sgl_copytoptr(resultp1,dstptr);
1967                        if (inexact)
1968                            if (Is_inexacttrap_enabled())
1969                                return (OPC_2E_OVERFLOWEXCEPTION |
1970					OPC_2E_INEXACTEXCEPTION);
1971                            else Set_inexactflag();
1972                        return (OPC_2E_OVERFLOWEXCEPTION);
1973		}
1974		inexact = TRUE;
1975		Set_overflowflag();
1976		Sgl_setoverflow(resultp1);
1977	} else if (result_exponent <= 0) {	/* underflow case */
1978		if (Is_underflowtrap_enabled()) {
1979                        /*
1980                         * Adjust bias of result
1981                         */
1982                	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1983			Sgl_copytoptr(resultp1,dstptr);
1984                        if (inexact)
1985                            if (Is_inexacttrap_enabled())
1986                                return (OPC_2E_UNDERFLOWEXCEPTION |
1987					OPC_2E_INEXACTEXCEPTION);
1988                            else Set_inexactflag();
1989	    		return(OPC_2E_UNDERFLOWEXCEPTION);
1990		}
1991		else if (inexact && is_tiny) Set_underflowflag();
1992	}
1993	else Sgl_set_exponent(resultp1,result_exponent);
1994	Sgl_copytoptr(resultp1,dstptr);
1995	if (inexact)
1996		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1997		else Set_inexactflag();
1998    	return(NOEXCEPTION);
1999}
2000
2001/*
2002 *  Single Floating-point Multiply Negate Fused Add
2003 */
2004
2005sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2006
2007sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2008unsigned int *status;
2009{
2010	unsigned int opnd1, opnd2, opnd3;
2011	register unsigned int tmpresp1, tmpresp2;
2012	unsigned int rightp1, rightp2;
2013	unsigned int resultp1, resultp2 = 0;
2014	register int mpy_exponent, add_exponent, count;
2015	boolean inexact = FALSE, is_tiny = FALSE;
2016
2017	unsigned int signlessleft1, signlessright1, save;
2018	register int result_exponent, diff_exponent;
2019	int sign_save, jumpsize;
2020
2021	Sgl_copyfromptr(src1ptr,opnd1);
2022	Sgl_copyfromptr(src2ptr,opnd2);
2023	Sgl_copyfromptr(src3ptr,opnd3);
2024
2025	/*
2026	 * set sign bit of result of multiply
2027	 */
2028	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
2029		Sgl_setzero(resultp1);
2030	else
2031		Sgl_setnegativezero(resultp1);
2032
2033	/*
2034	 * Generate multiply exponent
2035	 */
2036	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2037
2038	/*
2039	 * check first operand for NaN's or infinity
2040	 */
2041	if (Sgl_isinfinity_exponent(opnd1)) {
2042		if (Sgl_iszero_mantissa(opnd1)) {
2043			if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2044				if (Sgl_iszero_exponentmantissa(opnd2)) {
2045					/*
2046					 * invalid since operands are infinity
2047					 * and zero
2048					 */
2049					if (Is_invalidtrap_enabled())
2050						return(OPC_2E_INVALIDEXCEPTION);
2051					Set_invalidflag();
2052					Sgl_makequietnan(resultp1);
2053					Sgl_copytoptr(resultp1,dstptr);
2054					return(NOEXCEPTION);
2055				}
2056				/*
2057				 * Check third operand for infinity with a
2058				 *  sign opposite of the multiply result
2059				 */
2060				if (Sgl_isinfinity(opnd3) &&
2061				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2062					/*
2063					 * invalid since attempting a magnitude
2064					 * subtraction of infinities
2065					 */
2066					if (Is_invalidtrap_enabled())
2067						return(OPC_2E_INVALIDEXCEPTION);
2068					Set_invalidflag();
2069					Sgl_makequietnan(resultp1);
2070					Sgl_copytoptr(resultp1,dstptr);
2071					return(NOEXCEPTION);
2072				}
2073
2074				/*
2075			 	 * return infinity
2076			 	 */
2077				Sgl_setinfinity_exponentmantissa(resultp1);
2078				Sgl_copytoptr(resultp1,dstptr);
2079				return(NOEXCEPTION);
2080			}
2081		}
2082		else {
2083			/*
2084		 	 * is NaN; signaling or quiet?
2085		 	 */
2086			if (Sgl_isone_signaling(opnd1)) {
2087				/* trap if INVALIDTRAP enabled */
2088				if (Is_invalidtrap_enabled())
2089			    		return(OPC_2E_INVALIDEXCEPTION);
2090				/* make NaN quiet */
2091				Set_invalidflag();
2092				Sgl_set_quiet(opnd1);
2093			}
2094			/*
2095			 * is second operand a signaling NaN?
2096			 */
2097			else if (Sgl_is_signalingnan(opnd2)) {
2098				/* trap if INVALIDTRAP enabled */
2099				if (Is_invalidtrap_enabled())
2100			    		return(OPC_2E_INVALIDEXCEPTION);
2101				/* make NaN quiet */
2102				Set_invalidflag();
2103				Sgl_set_quiet(opnd2);
2104				Sgl_copytoptr(opnd2,dstptr);
2105				return(NOEXCEPTION);
2106			}
2107			/*
2108			 * is third operand a signaling NaN?
2109			 */
2110			else if (Sgl_is_signalingnan(opnd3)) {
2111				/* trap if INVALIDTRAP enabled */
2112				if (Is_invalidtrap_enabled())
2113			    		return(OPC_2E_INVALIDEXCEPTION);
2114				/* make NaN quiet */
2115				Set_invalidflag();
2116				Sgl_set_quiet(opnd3);
2117				Sgl_copytoptr(opnd3,dstptr);
2118				return(NOEXCEPTION);
2119			}
2120			/*
2121		 	 * return quiet NaN
2122		 	 */
2123			Sgl_copytoptr(opnd1,dstptr);
2124			return(NOEXCEPTION);
2125		}
2126	}
2127
2128	/*
2129	 * check second operand for NaN's or infinity
2130	 */
2131	if (Sgl_isinfinity_exponent(opnd2)) {
2132		if (Sgl_iszero_mantissa(opnd2)) {
2133			if (Sgl_isnotnan(opnd3)) {
2134				if (Sgl_iszero_exponentmantissa(opnd1)) {
2135					/*
2136					 * invalid since multiply operands are
2137					 * zero & infinity
2138					 */
2139					if (Is_invalidtrap_enabled())
2140						return(OPC_2E_INVALIDEXCEPTION);
2141					Set_invalidflag();
2142					Sgl_makequietnan(opnd2);
2143					Sgl_copytoptr(opnd2,dstptr);
2144					return(NOEXCEPTION);
2145				}
2146
2147				/*
2148				 * Check third operand for infinity with a
2149				 *  sign opposite of the multiply result
2150				 */
2151				if (Sgl_isinfinity(opnd3) &&
2152				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2153					/*
2154					 * invalid since attempting a magnitude
2155					 * subtraction of infinities
2156					 */
2157					if (Is_invalidtrap_enabled())
2158				       		return(OPC_2E_INVALIDEXCEPTION);
2159				       	Set_invalidflag();
2160				       	Sgl_makequietnan(resultp1);
2161					Sgl_copytoptr(resultp1,dstptr);
2162					return(NOEXCEPTION);
2163				}
2164
2165				/*
2166				 * return infinity
2167				 */
2168				Sgl_setinfinity_exponentmantissa(resultp1);
2169				Sgl_copytoptr(resultp1,dstptr);
2170				return(NOEXCEPTION);
2171			}
2172		}
2173		else {
2174			/*
2175			 * is NaN; signaling or quiet?
2176			 */
2177			if (Sgl_isone_signaling(opnd2)) {
2178				/* trap if INVALIDTRAP enabled */
2179				if (Is_invalidtrap_enabled())
2180					return(OPC_2E_INVALIDEXCEPTION);
2181				/* make NaN quiet */
2182				Set_invalidflag();
2183				Sgl_set_quiet(opnd2);
2184			}
2185			/*
2186			 * is third operand a signaling NaN?
2187			 */
2188			else if (Sgl_is_signalingnan(opnd3)) {
2189			       	/* trap if INVALIDTRAP enabled */
2190			       	if (Is_invalidtrap_enabled())
2191				   		return(OPC_2E_INVALIDEXCEPTION);
2192			       	/* make NaN quiet */
2193			       	Set_invalidflag();
2194			       	Sgl_set_quiet(opnd3);
2195				Sgl_copytoptr(opnd3,dstptr);
2196		       		return(NOEXCEPTION);
2197			}
2198			/*
2199			 * return quiet NaN
2200			 */
2201			Sgl_copytoptr(opnd2,dstptr);
2202			return(NOEXCEPTION);
2203		}
2204	}
2205
2206	/*
2207	 * check third operand for NaN's or infinity
2208	 */
2209	if (Sgl_isinfinity_exponent(opnd3)) {
2210		if (Sgl_iszero_mantissa(opnd3)) {
2211			/* return infinity */
2212			Sgl_copytoptr(opnd3,dstptr);
2213			return(NOEXCEPTION);
2214		} else {
2215			/*
2216			 * is NaN; signaling or quiet?
2217			 */
2218			if (Sgl_isone_signaling(opnd3)) {
2219				/* trap if INVALIDTRAP enabled */
2220				if (Is_invalidtrap_enabled())
2221					return(OPC_2E_INVALIDEXCEPTION);
2222				/* make NaN quiet */
2223				Set_invalidflag();
2224				Sgl_set_quiet(opnd3);
2225			}
2226			/*
2227			 * return quiet NaN
2228 			 */
2229			Sgl_copytoptr(opnd3,dstptr);
2230			return(NOEXCEPTION);
2231		}
2232    	}
2233
2234	/*
2235	 * Generate multiply mantissa
2236	 */
2237	if (Sgl_isnotzero_exponent(opnd1)) {
2238		/* set hidden bit */
2239		Sgl_clear_signexponent_set_hidden(opnd1);
2240	}
2241	else {
2242		/* check for zero */
2243		if (Sgl_iszero_mantissa(opnd1)) {
2244			/*
2245			 * Perform the add opnd3 with zero here.
2246			 */
2247			if (Sgl_iszero_exponentmantissa(opnd3)) {
2248				if (Is_rounding_mode(ROUNDMINUS)) {
2249					Sgl_or_signs(opnd3,resultp1);
2250				} else {
2251					Sgl_and_signs(opnd3,resultp1);
2252				}
2253			}
2254			/*
2255			 * Now let's check for trapped underflow case.
2256			 */
2257			else if (Sgl_iszero_exponent(opnd3) &&
2258			         Is_underflowtrap_enabled()) {
2259                    		/* need to normalize results mantissa */
2260                    		sign_save = Sgl_signextendedsign(opnd3);
2261				result_exponent = 0;
2262                    		Sgl_leftshiftby1(opnd3);
2263                    		Sgl_normalize(opnd3,result_exponent);
2264                    		Sgl_set_sign(opnd3,/*using*/sign_save);
2265                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
2266							unfl);
2267                    		Sgl_copytoptr(opnd3,dstptr);
2268                    		/* inexact = FALSE */
2269                    		return(OPC_2E_UNDERFLOWEXCEPTION);
2270			}
2271			Sgl_copytoptr(opnd3,dstptr);
2272			return(NOEXCEPTION);
2273		}
2274		/* is denormalized, adjust exponent */
2275		Sgl_clear_signexponent(opnd1);
2276		Sgl_leftshiftby1(opnd1);
2277		Sgl_normalize(opnd1,mpy_exponent);
2278	}
2279	/* opnd2 needs to have hidden bit set with msb in hidden bit */
2280	if (Sgl_isnotzero_exponent(opnd2)) {
2281		Sgl_clear_signexponent_set_hidden(opnd2);
2282	}
2283	else {
2284		/* check for zero */
2285		if (Sgl_iszero_mantissa(opnd2)) {
2286			/*
2287			 * Perform the add opnd3 with zero here.
2288			 */
2289			if (Sgl_iszero_exponentmantissa(opnd3)) {
2290				if (Is_rounding_mode(ROUNDMINUS)) {
2291					Sgl_or_signs(opnd3,resultp1);
2292				} else {
2293					Sgl_and_signs(opnd3,resultp1);
2294				}
2295			}
2296			/*
2297			 * Now let's check for trapped underflow case.
2298			 */
2299			else if (Sgl_iszero_exponent(opnd3) &&
2300			    Is_underflowtrap_enabled()) {
2301                    		/* need to normalize results mantissa */
2302                    		sign_save = Sgl_signextendedsign(opnd3);
2303				result_exponent = 0;
2304                    		Sgl_leftshiftby1(opnd3);
2305                    		Sgl_normalize(opnd3,result_exponent);
2306                    		Sgl_set_sign(opnd3,/*using*/sign_save);
2307                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
2308							unfl);
2309                    		Sgl_copytoptr(opnd3,dstptr);
2310                    		/* inexact = FALSE */
2311                    		return(OPC_2E_UNDERFLOWEXCEPTION);
2312			}
2313			Sgl_copytoptr(opnd3,dstptr);
2314			return(NOEXCEPTION);
2315		}
2316		/* is denormalized; want to normalize */
2317		Sgl_clear_signexponent(opnd2);
2318		Sgl_leftshiftby1(opnd2);
2319		Sgl_normalize(opnd2,mpy_exponent);
2320	}
2321
2322	/* Multiply the first two source mantissas together */
2323
2324	/*
2325	 * The intermediate result will be kept in tmpres,
2326	 * which needs enough room for 106 bits of mantissa,
2327	 * so lets call it a Double extended.
2328	 */
2329	Sglext_setzero(tmpresp1,tmpresp2);
2330
2331	/*
2332	 * Four bits at a time are inspected in each loop, and a
2333	 * simple shift and add multiply algorithm is used.
2334	 */
2335	for (count = SGL_P-1; count >= 0; count -= 4) {
2336		Sglext_rightshiftby4(tmpresp1,tmpresp2);
2337		if (Sbit28(opnd1)) {
2338	 		/* Twoword_add should be an ADD followed by 2 ADDC's */
2339			Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2340		}
2341		if (Sbit29(opnd1)) {
2342			Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2343		}
2344		if (Sbit30(opnd1)) {
2345			Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2346		}
2347		if (Sbit31(opnd1)) {
2348			Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2349		}
2350		Sgl_rightshiftby4(opnd1);
2351	}
2352	if (Is_sexthiddenoverflow(tmpresp1)) {
2353		/* result mantissa >= 2 (mantissa overflow) */
2354		mpy_exponent++;
2355		Sglext_rightshiftby4(tmpresp1,tmpresp2);
2356	} else {
2357		Sglext_rightshiftby3(tmpresp1,tmpresp2);
2358	}
2359
2360	/*
2361	 * Restore the sign of the mpy result which was saved in resultp1.
2362	 * The exponent will continue to be kept in mpy_exponent.
2363	 */
2364	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2365
2366	/*
2367	 * No rounding is required, since the result of the multiply
2368	 * is exact in the extended format.
2369	 */
2370
2371	/*
2372	 * Now we are ready to perform the add portion of the operation.
2373	 *
2374	 * The exponents need to be kept as integers for now, since the
2375	 * multiply result might not fit into the exponent field.  We
2376	 * can't overflow or underflow because of this yet, since the
2377	 * add could bring the final result back into range.
2378	 */
2379	add_exponent = Sgl_exponent(opnd3);
2380
2381	/*
2382	 * Check for denormalized or zero add operand.
2383	 */
2384	if (add_exponent == 0) {
2385		/* check for zero */
2386		if (Sgl_iszero_mantissa(opnd3)) {
2387			/* right is zero */
2388			/* Left can't be zero and must be result.
2389			 *
2390			 * The final result is now in tmpres and mpy_exponent,
2391			 * and needs to be rounded and squeezed back into
2392			 * double precision format from double extended.
2393			 */
2394			result_exponent = mpy_exponent;
2395			Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2396			sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2397			goto round;
2398		}
2399
2400		/*
2401		 * Neither are zeroes.
2402		 * Adjust exponent and normalize add operand.
2403		 */
2404		sign_save = Sgl_signextendedsign(opnd3);	/* save sign */
2405		Sgl_clear_signexponent(opnd3);
2406		Sgl_leftshiftby1(opnd3);
2407		Sgl_normalize(opnd3,add_exponent);
2408		Sgl_set_sign(opnd3,sign_save);		/* restore sign */
2409	} else {
2410		Sgl_clear_exponent_set_hidden(opnd3);
2411	}
2412	/*
2413	 * Copy opnd3 to the double extended variable called right.
2414	 */
2415	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2416
2417	/*
2418	 * A zero "save" helps discover equal operands (for later),
2419	 * and is used in swapping operands (if needed).
2420	 */
2421	Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2422
2423	/*
2424	 * Compare magnitude of operands.
2425	 */
2426	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2427	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2428	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2429	    Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2430		/*
2431		 * Set the left operand to the larger one by XOR swap.
2432		 * First finish the first word "save".
2433		 */
2434		Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2435		Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2436		Sglext_swap_lower(tmpresp2,rightp2);
2437		/* also setup exponents used in rest of routine */
2438		diff_exponent = add_exponent - mpy_exponent;
2439		result_exponent = add_exponent;
2440	} else {
2441		/* also setup exponents used in rest of routine */
2442		diff_exponent = mpy_exponent - add_exponent;
2443		result_exponent = mpy_exponent;
2444	}
2445	/* Invariant: left is not smaller than right. */
2446
2447	/*
2448	 * Special case alignment of operands that would force alignment
2449	 * beyond the extent of the extension.  A further optimization
2450	 * could special case this but only reduces the path length for
2451	 * this infrequent case.
2452	 */
2453	if (diff_exponent > SGLEXT_THRESHOLD) {
2454		diff_exponent = SGLEXT_THRESHOLD;
2455	}
2456
2457	/* Align right operand by shifting it to the right */
2458	Sglext_clear_sign(rightp1);
2459	Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2460
2461	/* Treat sum and difference of the operands separately. */
2462	if ((int)save < 0) {
2463		/*
2464		 * Difference of the two operands.  Overflow can occur if the
2465		 * multiply overflowed.  A borrow can occur out of the hidden
2466		 * bit and force a post normalization phase.
2467		 */
2468		Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2469			resultp1,resultp2);
2470		sign_save = Sgl_signextendedsign(resultp1);
2471		if (Sgl_iszero_hidden(resultp1)) {
2472			/* Handle normalization */
2473		/* A straightforward algorithm would now shift the
2474		 * result and extension left until the hidden bit
2475		 * becomes one.  Not all of the extension bits need
2476		 * participate in the shift.  Only the two most
2477		 * significant bits (round and guard) are needed.
2478		 * If only a single shift is needed then the guard
2479		 * bit becomes a significant low order bit and the
2480		 * extension must participate in the rounding.
2481		 * If more than a single shift is needed, then all
2482		 * bits to the right of the guard bit are zeros,
2483		 * and the guard bit may or may not be zero. */
2484			Sglext_leftshiftby1(resultp1,resultp2);
2485
2486			/* Need to check for a zero result.  The sign and
2487			 * exponent fields have already been zeroed.  The more
2488			 * efficient test of the full object can be used.
2489			 */
2490			 if (Sglext_iszero(resultp1,resultp2)) {
2491				/* Must have been "x-x" or "x+(-x)". */
2492				if (Is_rounding_mode(ROUNDMINUS))
2493					Sgl_setone_sign(resultp1);
2494				Sgl_copytoptr(resultp1,dstptr);
2495				return(NOEXCEPTION);
2496			}
2497			result_exponent--;
2498
2499			/* Look to see if normalization is finished. */
2500			if (Sgl_isone_hidden(resultp1)) {
2501				/* No further normalization is needed */
2502				goto round;
2503			}
2504
2505			/* Discover first one bit to determine shift amount.
2506			 * Use a modified binary search.  We have already
2507			 * shifted the result one position right and still
2508			 * not found a one so the remainder of the extension
2509			 * must be zero and simplifies rounding. */
2510			/* Scan bytes */
2511			while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2512				Sglext_leftshiftby8(resultp1,resultp2);
2513				result_exponent -= 8;
2514			}
2515			/* Now narrow it down to the nibble */
2516			if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2517				/* The lower nibble contains the
2518				 * normalizing one */
2519				Sglext_leftshiftby4(resultp1,resultp2);
2520				result_exponent -= 4;
2521			}
2522			/* Select case where first bit is set (already
2523			 * normalized) otherwise select the proper shift. */
2524			jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2525			if (jumpsize <= 7) switch(jumpsize) {
2526			case 1:
2527				Sglext_leftshiftby3(resultp1,resultp2);
2528				result_exponent -= 3;
2529				break;
2530			case 2:
2531			case 3:
2532				Sglext_leftshiftby2(resultp1,resultp2);
2533				result_exponent -= 2;
2534				break;
2535			case 4:
2536			case 5:
2537			case 6:
2538			case 7:
2539				Sglext_leftshiftby1(resultp1,resultp2);
2540				result_exponent -= 1;
2541				break;
2542			}
2543		} /* end if (hidden...)... */
2544	/* Fall through and round */
2545	} /* end if (save < 0)... */
2546	else {
2547		/* Add magnitudes */
2548		Sglext_addition(tmpresp1,tmpresp2,
2549			rightp1,rightp2, /*to*/resultp1,resultp2);
2550		sign_save = Sgl_signextendedsign(resultp1);
2551		if (Sgl_isone_hiddenoverflow(resultp1)) {
2552	    		/* Prenormalization required. */
2553	    		Sglext_arithrightshiftby1(resultp1,resultp2);
2554	    		result_exponent++;
2555		} /* end if hiddenoverflow... */
2556	} /* end else ...add magnitudes... */
2557
2558	/* Round the result.  If the extension and lower two words are
2559	 * all zeros, then the result is exact.  Otherwise round in the
2560	 * correct direction.  Underflow is possible. If a postnormalization
2561	 * is necessary, then the mantissa is all zeros so no shift is needed.
2562	 */
2563  round:
2564	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2565		Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2566	}
2567	Sgl_set_sign(resultp1,/*using*/sign_save);
2568	if (Sglext_isnotzero_mantissap2(resultp2)) {
2569		inexact = TRUE;
2570		switch(Rounding_mode()) {
2571		case ROUNDNEAREST: /* The default. */
2572			if (Sglext_isone_highp2(resultp2)) {
2573				/* at least 1/2 ulp */
2574				if (Sglext_isnotzero_low31p2(resultp2) ||
2575				    Sglext_isone_lowp1(resultp1)) {
2576					/* either exactly half way and odd or
2577					 * more than 1/2ulp */
2578					Sgl_increment(resultp1);
2579				}
2580			}
2581	    		break;
2582
2583		case ROUNDPLUS:
2584	    		if (Sgl_iszero_sign(resultp1)) {
2585				/* Round up positive results */
2586				Sgl_increment(resultp1);
2587			}
2588			break;
2589
2590		case ROUNDMINUS:
2591	    		if (Sgl_isone_sign(resultp1)) {
2592				/* Round down negative results */
2593				Sgl_increment(resultp1);
2594			}
2595
2596		case ROUNDZERO:;
2597			/* truncate is simple */
2598		} /* end switch... */
2599		if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2600	}
2601	if (result_exponent >= SGL_INFINITY_EXPONENT) {
2602		/* Overflow */
2603		if (Is_overflowtrap_enabled()) {
2604                        /*
2605                         * Adjust bias of result
2606                         */
2607                        Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2608                        Sgl_copytoptr(resultp1,dstptr);
2609                        if (inexact)
2610                            if (Is_inexacttrap_enabled())
2611                                return (OPC_2E_OVERFLOWEXCEPTION |
2612					OPC_2E_INEXACTEXCEPTION);
2613                            else Set_inexactflag();
2614                        return (OPC_2E_OVERFLOWEXCEPTION);
2615		}
2616		inexact = TRUE;
2617		Set_overflowflag();
2618		Sgl_setoverflow(resultp1);
2619	} else if (result_exponent <= 0) {	/* underflow case */
2620		if (Is_underflowtrap_enabled()) {
2621                        /*
2622                         * Adjust bias of result
2623                         */
2624                	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2625			Sgl_copytoptr(resultp1,dstptr);
2626                        if (inexact)
2627                            if (Is_inexacttrap_enabled())
2628                                return (OPC_2E_UNDERFLOWEXCEPTION |
2629					OPC_2E_INEXACTEXCEPTION);
2630                            else Set_inexactflag();
2631	    		return(OPC_2E_UNDERFLOWEXCEPTION);
2632		}
2633		else if (inexact && is_tiny) Set_underflowflag();
2634	}
2635	else Sgl_set_exponent(resultp1,result_exponent);
2636	Sgl_copytoptr(resultp1,dstptr);
2637	if (inexact)
2638		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2639		else Set_inexactflag();
2640    	return(NOEXCEPTION);
2641}
2642
2643