1/*
2 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
3 *
4 * Floating-point emulation code
5 *  Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
6 *
7 *    This program is free software; you can redistribute it and/or modify
8 *    it under the terms of the GNU General Public License as published by
9 *    the Free Software Foundation; either version 2, or (at your option)
10 *    any later version.
11 *
12 *    This program is distributed in the hope that it will be useful,
13 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 *    GNU General Public License for more details.
16 *
17 *    You should have received a copy of the GNU General Public License
18 *    along with this program; if not, write to the Free Software
19 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20 */
21/*
22 * BEGIN_DESC
23 *
24 *  File:
25 *	@(#)	pa/spmath/fmpyfadd.c		$Revision: 1.1.1.1 $
26 *
27 *  Purpose:
28 *	Double Floating-point Multiply Fused Add
29 *	Double Floating-point Multiply Negate Fused Add
30 *	Single Floating-point Multiply Fused Add
31 *	Single Floating-point Multiply Negate Fused Add
32 *
33 *  External Interfaces:
34 *	dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
35 *	dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
36 *	sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
37 *	sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
38 *
39 *  Internal Interfaces:
40 *
41 *  Theory:
42 *	<<please update with a overview of the operation of this file>>
43 *
44 * END_DESC
45*/
46
47
48#include "float.h"
49#include "sgl_float.h"
50#include "dbl_float.h"
51
52
53/*
54 *  Double Floating-point Multiply Fused Add
55 */
56
57int
58dbl_fmpyfadd(
59	    dbl_floating_point *src1ptr,
60	    dbl_floating_point *src2ptr,
61	    dbl_floating_point *src3ptr,
62	    unsigned int *status,
63	    dbl_floating_point *dstptr)
64{
65	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
66	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
67	unsigned int rightp1, rightp2, rightp3, rightp4;
68	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
69	register int mpy_exponent, add_exponent, count;
70	boolean inexact = FALSE, is_tiny = FALSE;
71
72	unsigned int signlessleft1, signlessright1, save;
73	register int result_exponent, diff_exponent;
74	int sign_save, jumpsize;
75
76	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
77	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
78	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
79
80	/*
81	 * set sign bit of result of multiply
82	 */
83	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
84		Dbl_setnegativezerop1(resultp1);
85	else Dbl_setzerop1(resultp1);
86
87	/*
88	 * Generate multiply exponent
89	 */
90	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
91
92	/*
93	 * check first operand for NaN's or infinity
94	 */
95	if (Dbl_isinfinity_exponent(opnd1p1)) {
96		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
97			if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
98			    Dbl_isnotnan(opnd3p1,opnd3p2)) {
99				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
100					/*
101					 * invalid since operands are infinity
102					 * and zero
103					 */
104					if (Is_invalidtrap_enabled())
105						return(OPC_2E_INVALIDEXCEPTION);
106					Set_invalidflag();
107					Dbl_makequietnan(resultp1,resultp2);
108					Dbl_copytoptr(resultp1,resultp2,dstptr);
109					return(NOEXCEPTION);
110				}
111				/*
112				 * Check third operand for infinity with a
113				 *  sign opposite of the multiply result
114				 */
115				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
116				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
117					/*
118					 * invalid since attempting a magnitude
119					 * subtraction of infinities
120					 */
121					if (Is_invalidtrap_enabled())
122						return(OPC_2E_INVALIDEXCEPTION);
123					Set_invalidflag();
124					Dbl_makequietnan(resultp1,resultp2);
125					Dbl_copytoptr(resultp1,resultp2,dstptr);
126					return(NOEXCEPTION);
127				}
128
129				/*
130			 	 * return infinity
131			 	 */
132				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
133				Dbl_copytoptr(resultp1,resultp2,dstptr);
134				return(NOEXCEPTION);
135			}
136		}
137		else {
138			/*
139		 	 * is NaN; signaling or quiet?
140		 	 */
141			if (Dbl_isone_signaling(opnd1p1)) {
142				/* trap if INVALIDTRAP enabled */
143				if (Is_invalidtrap_enabled())
144			    		return(OPC_2E_INVALIDEXCEPTION);
145				/* make NaN quiet */
146				Set_invalidflag();
147				Dbl_set_quiet(opnd1p1);
148			}
149			/*
150			 * is second operand a signaling NaN?
151			 */
152			else if (Dbl_is_signalingnan(opnd2p1)) {
153				/* trap if INVALIDTRAP enabled */
154				if (Is_invalidtrap_enabled())
155			    		return(OPC_2E_INVALIDEXCEPTION);
156				/* make NaN quiet */
157				Set_invalidflag();
158				Dbl_set_quiet(opnd2p1);
159				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
160				return(NOEXCEPTION);
161			}
162			/*
163			 * is third operand a signaling NaN?
164			 */
165			else if (Dbl_is_signalingnan(opnd3p1)) {
166				/* trap if INVALIDTRAP enabled */
167				if (Is_invalidtrap_enabled())
168			    		return(OPC_2E_INVALIDEXCEPTION);
169				/* make NaN quiet */
170				Set_invalidflag();
171				Dbl_set_quiet(opnd3p1);
172				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
173				return(NOEXCEPTION);
174			}
175			/*
176		 	 * return quiet NaN
177		 	 */
178			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
179			return(NOEXCEPTION);
180		}
181	}
182
183	/*
184	 * check second operand for NaN's or infinity
185	 */
186	if (Dbl_isinfinity_exponent(opnd2p1)) {
187		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
188			if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
189				if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
190					/*
191					 * invalid since multiply operands are
192					 * zero & infinity
193					 */
194					if (Is_invalidtrap_enabled())
195						return(OPC_2E_INVALIDEXCEPTION);
196					Set_invalidflag();
197					Dbl_makequietnan(opnd2p1,opnd2p2);
198					Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
199					return(NOEXCEPTION);
200				}
201
202				/*
203				 * Check third operand for infinity with a
204				 *  sign opposite of the multiply result
205				 */
206				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
207				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
208					/*
209					 * invalid since attempting a magnitude
210					 * subtraction of infinities
211					 */
212					if (Is_invalidtrap_enabled())
213				       		return(OPC_2E_INVALIDEXCEPTION);
214				       	Set_invalidflag();
215				       	Dbl_makequietnan(resultp1,resultp2);
216					Dbl_copytoptr(resultp1,resultp2,dstptr);
217					return(NOEXCEPTION);
218				}
219
220				/*
221				 * return infinity
222				 */
223				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
224				Dbl_copytoptr(resultp1,resultp2,dstptr);
225				return(NOEXCEPTION);
226			}
227		}
228		else {
229			/*
230			 * is NaN; signaling or quiet?
231			 */
232			if (Dbl_isone_signaling(opnd2p1)) {
233				/* trap if INVALIDTRAP enabled */
234				if (Is_invalidtrap_enabled())
235					return(OPC_2E_INVALIDEXCEPTION);
236				/* make NaN quiet */
237				Set_invalidflag();
238				Dbl_set_quiet(opnd2p1);
239			}
240			/*
241			 * is third operand a signaling NaN?
242			 */
243			else if (Dbl_is_signalingnan(opnd3p1)) {
244			       	/* trap if INVALIDTRAP enabled */
245			       	if (Is_invalidtrap_enabled())
246				   		return(OPC_2E_INVALIDEXCEPTION);
247			       	/* make NaN quiet */
248			       	Set_invalidflag();
249			       	Dbl_set_quiet(opnd3p1);
250				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
251		       		return(NOEXCEPTION);
252			}
253			/*
254			 * return quiet NaN
255			 */
256			Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
257			return(NOEXCEPTION);
258		}
259	}
260
261	/*
262	 * check third operand for NaN's or infinity
263	 */
264	if (Dbl_isinfinity_exponent(opnd3p1)) {
265		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
266			/* return infinity */
267			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
268			return(NOEXCEPTION);
269		} else {
270			/*
271			 * is NaN; signaling or quiet?
272			 */
273			if (Dbl_isone_signaling(opnd3p1)) {
274				/* trap if INVALIDTRAP enabled */
275				if (Is_invalidtrap_enabled())
276					return(OPC_2E_INVALIDEXCEPTION);
277				/* make NaN quiet */
278				Set_invalidflag();
279				Dbl_set_quiet(opnd3p1);
280			}
281			/*
282			 * return quiet NaN
283 			 */
284			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
285			return(NOEXCEPTION);
286		}
287    	}
288
289	/*
290	 * Generate multiply mantissa
291	 */
292	if (Dbl_isnotzero_exponent(opnd1p1)) {
293		/* set hidden bit */
294		Dbl_clear_signexponent_set_hidden(opnd1p1);
295	}
296	else {
297		/* check for zero */
298		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
299			/*
300			 * Perform the add opnd3 with zero here.
301			 */
302			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
303				if (Is_rounding_mode(ROUNDMINUS)) {
304					Dbl_or_signs(opnd3p1,resultp1);
305				} else {
306					Dbl_and_signs(opnd3p1,resultp1);
307				}
308			}
309			/*
310			 * Now let's check for trapped underflow case.
311			 */
312			else if (Dbl_iszero_exponent(opnd3p1) &&
313			         Is_underflowtrap_enabled()) {
314                    		/* need to normalize results mantissa */
315                    		sign_save = Dbl_signextendedsign(opnd3p1);
316				result_exponent = 0;
317                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
318                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
319                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
320                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
321							unfl);
322                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
323                    		/* inexact = FALSE */
324                    		return(OPC_2E_UNDERFLOWEXCEPTION);
325			}
326			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
327			return(NOEXCEPTION);
328		}
329		/* is denormalized, adjust exponent */
330		Dbl_clear_signexponent(opnd1p1);
331		Dbl_leftshiftby1(opnd1p1,opnd1p2);
332		Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
333	}
334	/* opnd2 needs to have hidden bit set with msb in hidden bit */
335	if (Dbl_isnotzero_exponent(opnd2p1)) {
336		Dbl_clear_signexponent_set_hidden(opnd2p1);
337	}
338	else {
339		/* check for zero */
340		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
341			/*
342			 * Perform the add opnd3 with zero here.
343			 */
344			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
345				if (Is_rounding_mode(ROUNDMINUS)) {
346					Dbl_or_signs(opnd3p1,resultp1);
347				} else {
348					Dbl_and_signs(opnd3p1,resultp1);
349				}
350			}
351			/*
352			 * Now let's check for trapped underflow case.
353			 */
354			else if (Dbl_iszero_exponent(opnd3p1) &&
355			    Is_underflowtrap_enabled()) {
356                    		/* need to normalize results mantissa */
357                    		sign_save = Dbl_signextendedsign(opnd3p1);
358				result_exponent = 0;
359                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
360                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
361                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
362                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
363							unfl);
364                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
365                    		/* inexact = FALSE */
366				return(OPC_2E_UNDERFLOWEXCEPTION);
367			}
368			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
369			return(NOEXCEPTION);
370		}
371		/* is denormalized; want to normalize */
372		Dbl_clear_signexponent(opnd2p1);
373		Dbl_leftshiftby1(opnd2p1,opnd2p2);
374		Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
375	}
376
377	/* Multiply the first two source mantissas together */
378
379	/*
380	 * The intermediate result will be kept in tmpres,
381	 * which needs enough room for 106 bits of mantissa,
382	 * so lets call it a Double extended.
383	 */
384	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
385
386	/*
387	 * Four bits at a time are inspected in each loop, and a
388	 * simple shift and add multiply algorithm is used.
389	 */
390	for (count = DBL_P-1; count >= 0; count -= 4) {
391		Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
392		if (Dbit28p2(opnd1p2)) {
393	 		/* Fourword_add should be an ADD followed by 3 ADDC's */
394			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
395			 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
396		}
397		if (Dbit29p2(opnd1p2)) {
398			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
399			 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
400		}
401		if (Dbit30p2(opnd1p2)) {
402			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
403			 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
404		}
405		if (Dbit31p2(opnd1p2)) {
406			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
407			 opnd2p1, opnd2p2, 0, 0);
408		}
409		Dbl_rightshiftby4(opnd1p1,opnd1p2);
410	}
411	if (Is_dexthiddenoverflow(tmpresp1)) {
412		/* result mantissa >= 2 (mantissa overflow) */
413		mpy_exponent++;
414		Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
415	}
416
417	/*
418	 * Restore the sign of the mpy result which was saved in resultp1.
419	 * The exponent will continue to be kept in mpy_exponent.
420	 */
421	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
422
423	/*
424	 * No rounding is required, since the result of the multiply
425	 * is exact in the extended format.
426	 */
427
428	/*
429	 * Now we are ready to perform the add portion of the operation.
430	 *
431	 * The exponents need to be kept as integers for now, since the
432	 * multiply result might not fit into the exponent field.  We
433	 * can't overflow or underflow because of this yet, since the
434	 * add could bring the final result back into range.
435	 */
436	add_exponent = Dbl_exponent(opnd3p1);
437
438	/*
439	 * Check for denormalized or zero add operand.
440	 */
441	if (add_exponent == 0) {
442		/* check for zero */
443		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
444			/* right is zero */
445			/* Left can't be zero and must be result.
446			 *
447			 * The final result is now in tmpres and mpy_exponent,
448			 * and needs to be rounded and squeezed back into
449			 * double precision format from double extended.
450			 */
451			result_exponent = mpy_exponent;
452			Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
453				resultp1,resultp2,resultp3,resultp4);
454			sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
455			goto round;
456		}
457
458		/*
459		 * Neither are zeroes.
460		 * Adjust exponent and normalize add operand.
461		 */
462		sign_save = Dbl_signextendedsign(opnd3p1);	/* save sign */
463		Dbl_clear_signexponent(opnd3p1);
464		Dbl_leftshiftby1(opnd3p1,opnd3p2);
465		Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
466		Dbl_set_sign(opnd3p1,sign_save);	/* restore sign */
467	} else {
468		Dbl_clear_exponent_set_hidden(opnd3p1);
469	}
470	/*
471	 * Copy opnd3 to the double extended variable called right.
472	 */
473	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
474
475	/*
476	 * A zero "save" helps discover equal operands (for later),
477	 * and is used in swapping operands (if needed).
478	 */
479	Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
480
481	/*
482	 * Compare magnitude of operands.
483	 */
484	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
485	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
486	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
487	    Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
488		/*
489		 * Set the left operand to the larger one by XOR swap.
490		 * First finish the first word "save".
491		 */
492		Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
493		Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
494		Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
495			rightp2,rightp3,rightp4);
496		/* also setup exponents used in rest of routine */
497		diff_exponent = add_exponent - mpy_exponent;
498		result_exponent = add_exponent;
499	} else {
500		/* also setup exponents used in rest of routine */
501		diff_exponent = mpy_exponent - add_exponent;
502		result_exponent = mpy_exponent;
503	}
504	/* Invariant: left is not smaller than right. */
505
506	/*
507	 * Special case alignment of operands that would force alignment
508	 * beyond the extent of the extension.  A further optimization
509	 * could special case this but only reduces the path length for
510	 * this infrequent case.
511	 */
512	if (diff_exponent > DBLEXT_THRESHOLD) {
513		diff_exponent = DBLEXT_THRESHOLD;
514	}
515
516	/* Align right operand by shifting it to the right */
517	Dblext_clear_sign(rightp1);
518	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
519		/*shifted by*/diff_exponent);
520
521	/* Treat sum and difference of the operands separately. */
522	if ((int)save < 0) {
523		/*
524		 * Difference of the two operands.  Overflow can occur if the
525		 * multiply overflowed.  A borrow can occur out of the hidden
526		 * bit and force a post normalization phase.
527		 */
528		Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
529			rightp1,rightp2,rightp3,rightp4,
530			resultp1,resultp2,resultp3,resultp4);
531		sign_save = Dbl_signextendedsign(resultp1);
532		if (Dbl_iszero_hidden(resultp1)) {
533			/* Handle normalization */
534		/* A straight foward algorithm would now shift the
535		 * result and extension left until the hidden bit
536		 * becomes one.  Not all of the extension bits need
537		 * participate in the shift.  Only the two most
538		 * significant bits (round and guard) are needed.
539		 * If only a single shift is needed then the guard
540		 * bit becomes a significant low order bit and the
541		 * extension must participate in the rounding.
542		 * If more than a single shift is needed, then all
543		 * bits to the right of the guard bit are zeros,
544		 * and the guard bit may or may not be zero. */
545			Dblext_leftshiftby1(resultp1,resultp2,resultp3,
546				resultp4);
547
548			/* Need to check for a zero result.  The sign and
549			 * exponent fields have already been zeroed.  The more
550			 * efficient test of the full object can be used.
551			 */
552			 if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
553				/* Must have been "x-x" or "x+(-x)". */
554				if (Is_rounding_mode(ROUNDMINUS))
555					Dbl_setone_sign(resultp1);
556				Dbl_copytoptr(resultp1,resultp2,dstptr);
557				return(NOEXCEPTION);
558			}
559			result_exponent--;
560
561			/* Look to see if normalization is finished. */
562			if (Dbl_isone_hidden(resultp1)) {
563				/* No further normalization is needed */
564				goto round;
565			}
566
567			/* Discover first one bit to determine shift amount.
568			 * Use a modified binary search.  We have already
569			 * shifted the result one position right and still
570			 * not found a one so the remainder of the extension
571			 * must be zero and simplifies rounding. */
572			/* Scan bytes */
573			while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
574				Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
575				result_exponent -= 8;
576			}
577			/* Now narrow it down to the nibble */
578			if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
579				/* The lower nibble contains the
580				 * normalizing one */
581				Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
582				result_exponent -= 4;
583			}
584			/* Select case where first bit is set (already
585			 * normalized) otherwise select the proper shift. */
586			jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
587			if (jumpsize <= 7) switch(jumpsize) {
588			case 1:
589				Dblext_leftshiftby3(resultp1,resultp2,resultp3,
590					resultp4);
591				result_exponent -= 3;
592				break;
593			case 2:
594			case 3:
595				Dblext_leftshiftby2(resultp1,resultp2,resultp3,
596					resultp4);
597				result_exponent -= 2;
598				break;
599			case 4:
600			case 5:
601			case 6:
602			case 7:
603				Dblext_leftshiftby1(resultp1,resultp2,resultp3,
604					resultp4);
605				result_exponent -= 1;
606				break;
607			}
608		} /* end if (hidden...)... */
609	/* Fall through and round */
610	} /* end if (save < 0)... */
611	else {
612		/* Add magnitudes */
613		Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
614			rightp1,rightp2,rightp3,rightp4,
615			/*to*/resultp1,resultp2,resultp3,resultp4);
616		sign_save = Dbl_signextendedsign(resultp1);
617		if (Dbl_isone_hiddenoverflow(resultp1)) {
618	    		/* Prenormalization required. */
619	    		Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
620				resultp4);
621	    		result_exponent++;
622		} /* end if hiddenoverflow... */
623	} /* end else ...add magnitudes... */
624
625	/* Round the result.  If the extension and lower two words are
626	 * all zeros, then the result is exact.  Otherwise round in the
627	 * correct direction.  Underflow is possible. If a postnormalization
628	 * is necessary, then the mantissa is all zeros so no shift is needed.
629	 */
630  round:
631	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
632		Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
633			result_exponent,is_tiny);
634	}
635	Dbl_set_sign(resultp1,/*using*/sign_save);
636	if (Dblext_isnotzero_mantissap3(resultp3) ||
637	    Dblext_isnotzero_mantissap4(resultp4)) {
638		inexact = TRUE;
639		switch(Rounding_mode()) {
640		case ROUNDNEAREST: /* The default. */
641			if (Dblext_isone_highp3(resultp3)) {
642				/* at least 1/2 ulp */
643				if (Dblext_isnotzero_low31p3(resultp3) ||
644				    Dblext_isnotzero_mantissap4(resultp4) ||
645				    Dblext_isone_lowp2(resultp2)) {
646					/* either exactly half way and odd or
647					 * more than 1/2ulp */
648					Dbl_increment(resultp1,resultp2);
649				}
650			}
651	    		break;
652
653		case ROUNDPLUS:
654	    		if (Dbl_iszero_sign(resultp1)) {
655				/* Round up positive results */
656				Dbl_increment(resultp1,resultp2);
657			}
658			break;
659
660		case ROUNDMINUS:
661	    		if (Dbl_isone_sign(resultp1)) {
662				/* Round down negative results */
663				Dbl_increment(resultp1,resultp2);
664			}
665
666		case ROUNDZERO:;
667			/* truncate is simple */
668		} /* end switch... */
669		if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
670	}
671	if (result_exponent >= DBL_INFINITY_EXPONENT) {
672                /* trap if OVERFLOWTRAP enabled */
673                if (Is_overflowtrap_enabled()) {
674                        /*
675                         * Adjust bias of result
676                         */
677                        Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
678                        Dbl_copytoptr(resultp1,resultp2,dstptr);
679                        if (inexact)
680                            if (Is_inexacttrap_enabled())
681                                return (OPC_2E_OVERFLOWEXCEPTION |
682					OPC_2E_INEXACTEXCEPTION);
683                            else Set_inexactflag();
684                        return (OPC_2E_OVERFLOWEXCEPTION);
685                }
686                inexact = TRUE;
687                Set_overflowflag();
688                /* set result to infinity or largest number */
689                Dbl_setoverflow(resultp1,resultp2);
690
691	} else if (result_exponent <= 0) {	/* underflow case */
692		if (Is_underflowtrap_enabled()) {
693                        /*
694                         * Adjust bias of result
695                         */
696                	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
697			Dbl_copytoptr(resultp1,resultp2,dstptr);
698                        if (inexact)
699                            if (Is_inexacttrap_enabled())
700                                return (OPC_2E_UNDERFLOWEXCEPTION |
701					OPC_2E_INEXACTEXCEPTION);
702                            else Set_inexactflag();
703	    		return(OPC_2E_UNDERFLOWEXCEPTION);
704		}
705		else if (inexact && is_tiny) Set_underflowflag();
706	}
707	else Dbl_set_exponent(resultp1,result_exponent);
708	Dbl_copytoptr(resultp1,resultp2,dstptr);
709	if (inexact)
710		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
711		else Set_inexactflag();
712    	return(NOEXCEPTION);
713}
714
715/*
716 *  Double Floating-point Multiply Negate Fused Add
717 */
718
719dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
720
721dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
722unsigned int *status;
723{
724	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
725	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
726	unsigned int rightp1, rightp2, rightp3, rightp4;
727	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
728	register int mpy_exponent, add_exponent, count;
729	boolean inexact = FALSE, is_tiny = FALSE;
730
731	unsigned int signlessleft1, signlessright1, save;
732	register int result_exponent, diff_exponent;
733	int sign_save, jumpsize;
734
735	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
736	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
737	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
738
739	/*
740	 * set sign bit of result of multiply
741	 */
742	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
743		Dbl_setzerop1(resultp1);
744	else
745		Dbl_setnegativezerop1(resultp1);
746
747	/*
748	 * Generate multiply exponent
749	 */
750	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
751
752	/*
753	 * check first operand for NaN's or infinity
754	 */
755	if (Dbl_isinfinity_exponent(opnd1p1)) {
756		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
757			if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
758			    Dbl_isnotnan(opnd3p1,opnd3p2)) {
759				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
760					/*
761					 * invalid since operands are infinity
762					 * and zero
763					 */
764					if (Is_invalidtrap_enabled())
765						return(OPC_2E_INVALIDEXCEPTION);
766					Set_invalidflag();
767					Dbl_makequietnan(resultp1,resultp2);
768					Dbl_copytoptr(resultp1,resultp2,dstptr);
769					return(NOEXCEPTION);
770				}
771				/*
772				 * Check third operand for infinity with a
773				 *  sign opposite of the multiply result
774				 */
775				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
776				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
777					/*
778					 * invalid since attempting a magnitude
779					 * subtraction of infinities
780					 */
781					if (Is_invalidtrap_enabled())
782						return(OPC_2E_INVALIDEXCEPTION);
783					Set_invalidflag();
784					Dbl_makequietnan(resultp1,resultp2);
785					Dbl_copytoptr(resultp1,resultp2,dstptr);
786					return(NOEXCEPTION);
787				}
788
789				/*
790			 	 * return infinity
791			 	 */
792				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
793				Dbl_copytoptr(resultp1,resultp2,dstptr);
794				return(NOEXCEPTION);
795			}
796		}
797		else {
798			/*
799		 	 * is NaN; signaling or quiet?
800		 	 */
801			if (Dbl_isone_signaling(opnd1p1)) {
802				/* trap if INVALIDTRAP enabled */
803				if (Is_invalidtrap_enabled())
804			    		return(OPC_2E_INVALIDEXCEPTION);
805				/* make NaN quiet */
806				Set_invalidflag();
807				Dbl_set_quiet(opnd1p1);
808			}
809			/*
810			 * is second operand a signaling NaN?
811			 */
812			else if (Dbl_is_signalingnan(opnd2p1)) {
813				/* trap if INVALIDTRAP enabled */
814				if (Is_invalidtrap_enabled())
815			    		return(OPC_2E_INVALIDEXCEPTION);
816				/* make NaN quiet */
817				Set_invalidflag();
818				Dbl_set_quiet(opnd2p1);
819				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
820				return(NOEXCEPTION);
821			}
822			/*
823			 * is third operand a signaling NaN?
824			 */
825			else if (Dbl_is_signalingnan(opnd3p1)) {
826				/* trap if INVALIDTRAP enabled */
827				if (Is_invalidtrap_enabled())
828			    		return(OPC_2E_INVALIDEXCEPTION);
829				/* make NaN quiet */
830				Set_invalidflag();
831				Dbl_set_quiet(opnd3p1);
832				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
833				return(NOEXCEPTION);
834			}
835			/*
836		 	 * return quiet NaN
837		 	 */
838			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
839			return(NOEXCEPTION);
840		}
841	}
842
843	/*
844	 * check second operand for NaN's or infinity
845	 */
846	if (Dbl_isinfinity_exponent(opnd2p1)) {
847		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
848			if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
849				if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
850					/*
851					 * invalid since multiply operands are
852					 * zero & infinity
853					 */
854					if (Is_invalidtrap_enabled())
855						return(OPC_2E_INVALIDEXCEPTION);
856					Set_invalidflag();
857					Dbl_makequietnan(opnd2p1,opnd2p2);
858					Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
859					return(NOEXCEPTION);
860				}
861
862				/*
863				 * Check third operand for infinity with a
864				 *  sign opposite of the multiply result
865				 */
866				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
867				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
868					/*
869					 * invalid since attempting a magnitude
870					 * subtraction of infinities
871					 */
872					if (Is_invalidtrap_enabled())
873				       		return(OPC_2E_INVALIDEXCEPTION);
874				       	Set_invalidflag();
875				       	Dbl_makequietnan(resultp1,resultp2);
876					Dbl_copytoptr(resultp1,resultp2,dstptr);
877					return(NOEXCEPTION);
878				}
879
880				/*
881				 * return infinity
882				 */
883				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
884				Dbl_copytoptr(resultp1,resultp2,dstptr);
885				return(NOEXCEPTION);
886			}
887		}
888		else {
889			/*
890			 * is NaN; signaling or quiet?
891			 */
892			if (Dbl_isone_signaling(opnd2p1)) {
893				/* trap if INVALIDTRAP enabled */
894				if (Is_invalidtrap_enabled())
895					return(OPC_2E_INVALIDEXCEPTION);
896				/* make NaN quiet */
897				Set_invalidflag();
898				Dbl_set_quiet(opnd2p1);
899			}
900			/*
901			 * is third operand a signaling NaN?
902			 */
903			else if (Dbl_is_signalingnan(opnd3p1)) {
904			       	/* trap if INVALIDTRAP enabled */
905			       	if (Is_invalidtrap_enabled())
906				   		return(OPC_2E_INVALIDEXCEPTION);
907			       	/* make NaN quiet */
908			       	Set_invalidflag();
909			       	Dbl_set_quiet(opnd3p1);
910				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
911		       		return(NOEXCEPTION);
912			}
913			/*
914			 * return quiet NaN
915			 */
916			Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
917			return(NOEXCEPTION);
918		}
919	}
920
921	/*
922	 * check third operand for NaN's or infinity
923	 */
924	if (Dbl_isinfinity_exponent(opnd3p1)) {
925		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
926			/* return infinity */
927			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
928			return(NOEXCEPTION);
929		} else {
930			/*
931			 * is NaN; signaling or quiet?
932			 */
933			if (Dbl_isone_signaling(opnd3p1)) {
934				/* trap if INVALIDTRAP enabled */
935				if (Is_invalidtrap_enabled())
936					return(OPC_2E_INVALIDEXCEPTION);
937				/* make NaN quiet */
938				Set_invalidflag();
939				Dbl_set_quiet(opnd3p1);
940			}
941			/*
942			 * return quiet NaN
943 			 */
944			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
945			return(NOEXCEPTION);
946		}
947    	}
948
949	/*
950	 * Generate multiply mantissa
951	 */
952	if (Dbl_isnotzero_exponent(opnd1p1)) {
953		/* set hidden bit */
954		Dbl_clear_signexponent_set_hidden(opnd1p1);
955	}
956	else {
957		/* check for zero */
958		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
959			/*
960			 * Perform the add opnd3 with zero here.
961			 */
962			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
963				if (Is_rounding_mode(ROUNDMINUS)) {
964					Dbl_or_signs(opnd3p1,resultp1);
965				} else {
966					Dbl_and_signs(opnd3p1,resultp1);
967				}
968			}
969			/*
970			 * Now let's check for trapped underflow case.
971			 */
972			else if (Dbl_iszero_exponent(opnd3p1) &&
973			         Is_underflowtrap_enabled()) {
974                    		/* need to normalize results mantissa */
975                    		sign_save = Dbl_signextendedsign(opnd3p1);
976				result_exponent = 0;
977                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
978                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
979                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
980                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
981							unfl);
982                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
983                    		/* inexact = FALSE */
984                    		return(OPC_2E_UNDERFLOWEXCEPTION);
985			}
986			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
987			return(NOEXCEPTION);
988		}
989		/* is denormalized, adjust exponent */
990		Dbl_clear_signexponent(opnd1p1);
991		Dbl_leftshiftby1(opnd1p1,opnd1p2);
992		Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
993	}
994	/* opnd2 needs to have hidden bit set with msb in hidden bit */
995	if (Dbl_isnotzero_exponent(opnd2p1)) {
996		Dbl_clear_signexponent_set_hidden(opnd2p1);
997	}
998	else {
999		/* check for zero */
1000		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
1001			/*
1002			 * Perform the add opnd3 with zero here.
1003			 */
1004			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
1005				if (Is_rounding_mode(ROUNDMINUS)) {
1006					Dbl_or_signs(opnd3p1,resultp1);
1007				} else {
1008					Dbl_and_signs(opnd3p1,resultp1);
1009				}
1010			}
1011			/*
1012			 * Now let's check for trapped underflow case.
1013			 */
1014			else if (Dbl_iszero_exponent(opnd3p1) &&
1015			    Is_underflowtrap_enabled()) {
1016                    		/* need to normalize results mantissa */
1017                    		sign_save = Dbl_signextendedsign(opnd3p1);
1018				result_exponent = 0;
1019                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
1020                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1021                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
1022                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1023							unfl);
1024                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1025                    		/* inexact = FALSE */
1026                    		return(OPC_2E_UNDERFLOWEXCEPTION);
1027			}
1028			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1029			return(NOEXCEPTION);
1030		}
1031		/* is denormalized; want to normalize */
1032		Dbl_clear_signexponent(opnd2p1);
1033		Dbl_leftshiftby1(opnd2p1,opnd2p2);
1034		Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1035	}
1036
1037	/* Multiply the first two source mantissas together */
1038
1039	/*
1040	 * The intermediate result will be kept in tmpres,
1041	 * which needs enough room for 106 bits of mantissa,
1042	 * so lets call it a Double extended.
1043	 */
1044	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1045
1046	/*
1047	 * Four bits at a time are inspected in each loop, and a
1048	 * simple shift and add multiply algorithm is used.
1049	 */
1050	for (count = DBL_P-1; count >= 0; count -= 4) {
1051		Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1052		if (Dbit28p2(opnd1p2)) {
1053	 		/* Fourword_add should be an ADD followed by 3 ADDC's */
1054			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1055			 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1056		}
1057		if (Dbit29p2(opnd1p2)) {
1058			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1059			 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1060		}
1061		if (Dbit30p2(opnd1p2)) {
1062			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1063			 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1064		}
1065		if (Dbit31p2(opnd1p2)) {
1066			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1067			 opnd2p1, opnd2p2, 0, 0);
1068		}
1069		Dbl_rightshiftby4(opnd1p1,opnd1p2);
1070	}
1071	if (Is_dexthiddenoverflow(tmpresp1)) {
1072		/* result mantissa >= 2 (mantissa overflow) */
1073		mpy_exponent++;
1074		Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1075	}
1076
1077	/*
1078	 * Restore the sign of the mpy result which was saved in resultp1.
1079	 * The exponent will continue to be kept in mpy_exponent.
1080	 */
1081	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1082
1083	/*
1084	 * No rounding is required, since the result of the multiply
1085	 * is exact in the extended format.
1086	 */
1087
1088	/*
1089	 * Now we are ready to perform the add portion of the operation.
1090	 *
1091	 * The exponents need to be kept as integers for now, since the
1092	 * multiply result might not fit into the exponent field.  We
1093	 * can't overflow or underflow because of this yet, since the
1094	 * add could bring the final result back into range.
1095	 */
1096	add_exponent = Dbl_exponent(opnd3p1);
1097
1098	/*
1099	 * Check for denormalized or zero add operand.
1100	 */
1101	if (add_exponent == 0) {
1102		/* check for zero */
1103		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1104			/* right is zero */
1105			/* Left can't be zero and must be result.
1106			 *
1107			 * The final result is now in tmpres and mpy_exponent,
1108			 * and needs to be rounded and squeezed back into
1109			 * double precision format from double extended.
1110			 */
1111			result_exponent = mpy_exponent;
1112			Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1113				resultp1,resultp2,resultp3,resultp4);
1114			sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1115			goto round;
1116		}
1117
1118		/*
1119		 * Neither are zeroes.
1120		 * Adjust exponent and normalize add operand.
1121		 */
1122		sign_save = Dbl_signextendedsign(opnd3p1);	/* save sign */
1123		Dbl_clear_signexponent(opnd3p1);
1124		Dbl_leftshiftby1(opnd3p1,opnd3p2);
1125		Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1126		Dbl_set_sign(opnd3p1,sign_save);	/* restore sign */
1127	} else {
1128		Dbl_clear_exponent_set_hidden(opnd3p1);
1129	}
1130	/*
1131	 * Copy opnd3 to the double extended variable called right.
1132	 */
1133	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1134
1135	/*
1136	 * A zero "save" helps discover equal operands (for later),
1137	 * and is used in swapping operands (if needed).
1138	 */
1139	Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1140
1141	/*
1142	 * Compare magnitude of operands.
1143	 */
1144	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1145	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1146	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1147	    Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1148		/*
1149		 * Set the left operand to the larger one by XOR swap.
1150		 * First finish the first word "save".
1151		 */
1152		Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1153		Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1154		Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1155			rightp2,rightp3,rightp4);
1156		/* also setup exponents used in rest of routine */
1157		diff_exponent = add_exponent - mpy_exponent;
1158		result_exponent = add_exponent;
1159	} else {
1160		/* also setup exponents used in rest of routine */
1161		diff_exponent = mpy_exponent - add_exponent;
1162		result_exponent = mpy_exponent;
1163	}
1164	/* Invariant: left is not smaller than right. */
1165
1166	/*
1167	 * Special case alignment of operands that would force alignment
1168	 * beyond the extent of the extension.  A further optimization
1169	 * could special case this but only reduces the path length for
1170	 * this infrequent case.
1171	 */
1172	if (diff_exponent > DBLEXT_THRESHOLD) {
1173		diff_exponent = DBLEXT_THRESHOLD;
1174	}
1175
1176	/* Align right operand by shifting it to the right */
1177	Dblext_clear_sign(rightp1);
1178	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1179		/*shifted by*/diff_exponent);
1180
1181	/* Treat sum and difference of the operands separately. */
1182	if ((int)save < 0) {
1183		/*
1184		 * Difference of the two operands.  Overflow can occur if the
1185		 * multiply overflowed.  A borrow can occur out of the hidden
1186		 * bit and force a post normalization phase.
1187		 */
1188		Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1189			rightp1,rightp2,rightp3,rightp4,
1190			resultp1,resultp2,resultp3,resultp4);
1191		sign_save = Dbl_signextendedsign(resultp1);
1192		if (Dbl_iszero_hidden(resultp1)) {
1193			/* Handle normalization */
1194		/* A straight foward algorithm would now shift the
1195		 * result and extension left until the hidden bit
1196		 * becomes one.  Not all of the extension bits need
1197		 * participate in the shift.  Only the two most
1198		 * significant bits (round and guard) are needed.
1199		 * If only a single shift is needed then the guard
1200		 * bit becomes a significant low order bit and the
1201		 * extension must participate in the rounding.
1202		 * If more than a single shift is needed, then all
1203		 * bits to the right of the guard bit are zeros,
1204		 * and the guard bit may or may not be zero. */
1205			Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1206				resultp4);
1207
1208			/* Need to check for a zero result.  The sign and
1209			 * exponent fields have already been zeroed.  The more
1210			 * efficient test of the full object can be used.
1211			 */
1212			 if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1213				/* Must have been "x-x" or "x+(-x)". */
1214				if (Is_rounding_mode(ROUNDMINUS))
1215					Dbl_setone_sign(resultp1);
1216				Dbl_copytoptr(resultp1,resultp2,dstptr);
1217				return(NOEXCEPTION);
1218			}
1219			result_exponent--;
1220
1221			/* Look to see if normalization is finished. */
1222			if (Dbl_isone_hidden(resultp1)) {
1223				/* No further normalization is needed */
1224				goto round;
1225			}
1226
1227			/* Discover first one bit to determine shift amount.
1228			 * Use a modified binary search.  We have already
1229			 * shifted the result one position right and still
1230			 * not found a one so the remainder of the extension
1231			 * must be zero and simplifies rounding. */
1232			/* Scan bytes */
1233			while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1234				Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1235				result_exponent -= 8;
1236			}
1237			/* Now narrow it down to the nibble */
1238			if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1239				/* The lower nibble contains the
1240				 * normalizing one */
1241				Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1242				result_exponent -= 4;
1243			}
1244			/* Select case where first bit is set (already
1245			 * normalized) otherwise select the proper shift. */
1246			jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1247			if (jumpsize <= 7) switch(jumpsize) {
1248			case 1:
1249				Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1250					resultp4);
1251				result_exponent -= 3;
1252				break;
1253			case 2:
1254			case 3:
1255				Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1256					resultp4);
1257				result_exponent -= 2;
1258				break;
1259			case 4:
1260			case 5:
1261			case 6:
1262			case 7:
1263				Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1264					resultp4);
1265				result_exponent -= 1;
1266				break;
1267			}
1268		} /* end if (hidden...)... */
1269	/* Fall through and round */
1270	} /* end if (save < 0)... */
1271	else {
1272		/* Add magnitudes */
1273		Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1274			rightp1,rightp2,rightp3,rightp4,
1275			/*to*/resultp1,resultp2,resultp3,resultp4);
1276		sign_save = Dbl_signextendedsign(resultp1);
1277		if (Dbl_isone_hiddenoverflow(resultp1)) {
1278	    		/* Prenormalization required. */
1279	    		Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1280				resultp4);
1281	    		result_exponent++;
1282		} /* end if hiddenoverflow... */
1283	} /* end else ...add magnitudes... */
1284
1285	/* Round the result.  If the extension and lower two words are
1286	 * all zeros, then the result is exact.  Otherwise round in the
1287	 * correct direction.  Underflow is possible. If a postnormalization
1288	 * is necessary, then the mantissa is all zeros so no shift is needed.
1289	 */
1290  round:
1291	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1292		Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1293			result_exponent,is_tiny);
1294	}
1295	Dbl_set_sign(resultp1,/*using*/sign_save);
1296	if (Dblext_isnotzero_mantissap3(resultp3) ||
1297	    Dblext_isnotzero_mantissap4(resultp4)) {
1298		inexact = TRUE;
1299		switch(Rounding_mode()) {
1300		case ROUNDNEAREST: /* The default. */
1301			if (Dblext_isone_highp3(resultp3)) {
1302				/* at least 1/2 ulp */
1303				if (Dblext_isnotzero_low31p3(resultp3) ||
1304				    Dblext_isnotzero_mantissap4(resultp4) ||
1305				    Dblext_isone_lowp2(resultp2)) {
1306					/* either exactly half way and odd or
1307					 * more than 1/2ulp */
1308					Dbl_increment(resultp1,resultp2);
1309				}
1310			}
1311	    		break;
1312
1313		case ROUNDPLUS:
1314	    		if (Dbl_iszero_sign(resultp1)) {
1315				/* Round up positive results */
1316				Dbl_increment(resultp1,resultp2);
1317			}
1318			break;
1319
1320		case ROUNDMINUS:
1321	    		if (Dbl_isone_sign(resultp1)) {
1322				/* Round down negative results */
1323				Dbl_increment(resultp1,resultp2);
1324			}
1325
1326		case ROUNDZERO:;
1327			/* truncate is simple */
1328		} /* end switch... */
1329		if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1330	}
1331	if (result_exponent >= DBL_INFINITY_EXPONENT) {
1332		/* Overflow */
1333		if (Is_overflowtrap_enabled()) {
1334                        /*
1335                         * Adjust bias of result
1336                         */
1337                        Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1338                        Dbl_copytoptr(resultp1,resultp2,dstptr);
1339                        if (inexact)
1340                            if (Is_inexacttrap_enabled())
1341                                return (OPC_2E_OVERFLOWEXCEPTION |
1342					OPC_2E_INEXACTEXCEPTION);
1343                            else Set_inexactflag();
1344                        return (OPC_2E_OVERFLOWEXCEPTION);
1345		}
1346		inexact = TRUE;
1347		Set_overflowflag();
1348		Dbl_setoverflow(resultp1,resultp2);
1349	} else if (result_exponent <= 0) {	/* underflow case */
1350		if (Is_underflowtrap_enabled()) {
1351                        /*
1352                         * Adjust bias of result
1353                         */
1354                	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1355			Dbl_copytoptr(resultp1,resultp2,dstptr);
1356                        if (inexact)
1357                            if (Is_inexacttrap_enabled())
1358                                return (OPC_2E_UNDERFLOWEXCEPTION |
1359					OPC_2E_INEXACTEXCEPTION);
1360                            else Set_inexactflag();
1361	    		return(OPC_2E_UNDERFLOWEXCEPTION);
1362		}
1363		else if (inexact && is_tiny) Set_underflowflag();
1364	}
1365	else Dbl_set_exponent(resultp1,result_exponent);
1366	Dbl_copytoptr(resultp1,resultp2,dstptr);
1367	if (inexact)
1368		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1369		else Set_inexactflag();
1370    	return(NOEXCEPTION);
1371}
1372
1373/*
1374 *  Single Floating-point Multiply Fused Add
1375 */
1376
1377sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1378
1379sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1380unsigned int *status;
1381{
1382	unsigned int opnd1, opnd2, opnd3;
1383	register unsigned int tmpresp1, tmpresp2;
1384	unsigned int rightp1, rightp2;
1385	unsigned int resultp1, resultp2 = 0;
1386	register int mpy_exponent, add_exponent, count;
1387	boolean inexact = FALSE, is_tiny = FALSE;
1388
1389	unsigned int signlessleft1, signlessright1, save;
1390	register int result_exponent, diff_exponent;
1391	int sign_save, jumpsize;
1392
1393	Sgl_copyfromptr(src1ptr,opnd1);
1394	Sgl_copyfromptr(src2ptr,opnd2);
1395	Sgl_copyfromptr(src3ptr,opnd3);
1396
1397	/*
1398	 * set sign bit of result of multiply
1399	 */
1400	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
1401		Sgl_setnegativezero(resultp1);
1402	else Sgl_setzero(resultp1);
1403
1404	/*
1405	 * Generate multiply exponent
1406	 */
1407	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1408
1409	/*
1410	 * check first operand for NaN's or infinity
1411	 */
1412	if (Sgl_isinfinity_exponent(opnd1)) {
1413		if (Sgl_iszero_mantissa(opnd1)) {
1414			if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1415				if (Sgl_iszero_exponentmantissa(opnd2)) {
1416					/*
1417					 * invalid since operands are infinity
1418					 * and zero
1419					 */
1420					if (Is_invalidtrap_enabled())
1421						return(OPC_2E_INVALIDEXCEPTION);
1422					Set_invalidflag();
1423					Sgl_makequietnan(resultp1);
1424					Sgl_copytoptr(resultp1,dstptr);
1425					return(NOEXCEPTION);
1426				}
1427				/*
1428				 * Check third operand for infinity with a
1429				 *  sign opposite of the multiply result
1430				 */
1431				if (Sgl_isinfinity(opnd3) &&
1432				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1433					/*
1434					 * invalid since attempting a magnitude
1435					 * subtraction of infinities
1436					 */
1437					if (Is_invalidtrap_enabled())
1438						return(OPC_2E_INVALIDEXCEPTION);
1439					Set_invalidflag();
1440					Sgl_makequietnan(resultp1);
1441					Sgl_copytoptr(resultp1,dstptr);
1442					return(NOEXCEPTION);
1443				}
1444
1445				/*
1446			 	 * return infinity
1447			 	 */
1448				Sgl_setinfinity_exponentmantissa(resultp1);
1449				Sgl_copytoptr(resultp1,dstptr);
1450				return(NOEXCEPTION);
1451			}
1452		}
1453		else {
1454			/*
1455		 	 * is NaN; signaling or quiet?
1456		 	 */
1457			if (Sgl_isone_signaling(opnd1)) {
1458				/* trap if INVALIDTRAP enabled */
1459				if (Is_invalidtrap_enabled())
1460			    		return(OPC_2E_INVALIDEXCEPTION);
1461				/* make NaN quiet */
1462				Set_invalidflag();
1463				Sgl_set_quiet(opnd1);
1464			}
1465			/*
1466			 * is second operand a signaling NaN?
1467			 */
1468			else if (Sgl_is_signalingnan(opnd2)) {
1469				/* trap if INVALIDTRAP enabled */
1470				if (Is_invalidtrap_enabled())
1471			    		return(OPC_2E_INVALIDEXCEPTION);
1472				/* make NaN quiet */
1473				Set_invalidflag();
1474				Sgl_set_quiet(opnd2);
1475				Sgl_copytoptr(opnd2,dstptr);
1476				return(NOEXCEPTION);
1477			}
1478			/*
1479			 * is third operand a signaling NaN?
1480			 */
1481			else if (Sgl_is_signalingnan(opnd3)) {
1482				/* trap if INVALIDTRAP enabled */
1483				if (Is_invalidtrap_enabled())
1484			    		return(OPC_2E_INVALIDEXCEPTION);
1485				/* make NaN quiet */
1486				Set_invalidflag();
1487				Sgl_set_quiet(opnd3);
1488				Sgl_copytoptr(opnd3,dstptr);
1489				return(NOEXCEPTION);
1490			}
1491			/*
1492		 	 * return quiet NaN
1493		 	 */
1494			Sgl_copytoptr(opnd1,dstptr);
1495			return(NOEXCEPTION);
1496		}
1497	}
1498
1499	/*
1500	 * check second operand for NaN's or infinity
1501	 */
1502	if (Sgl_isinfinity_exponent(opnd2)) {
1503		if (Sgl_iszero_mantissa(opnd2)) {
1504			if (Sgl_isnotnan(opnd3)) {
1505				if (Sgl_iszero_exponentmantissa(opnd1)) {
1506					/*
1507					 * invalid since multiply operands are
1508					 * zero & infinity
1509					 */
1510					if (Is_invalidtrap_enabled())
1511						return(OPC_2E_INVALIDEXCEPTION);
1512					Set_invalidflag();
1513					Sgl_makequietnan(opnd2);
1514					Sgl_copytoptr(opnd2,dstptr);
1515					return(NOEXCEPTION);
1516				}
1517
1518				/*
1519				 * Check third operand for infinity with a
1520				 *  sign opposite of the multiply result
1521				 */
1522				if (Sgl_isinfinity(opnd3) &&
1523				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1524					/*
1525					 * invalid since attempting a magnitude
1526					 * subtraction of infinities
1527					 */
1528					if (Is_invalidtrap_enabled())
1529				       		return(OPC_2E_INVALIDEXCEPTION);
1530				       	Set_invalidflag();
1531				       	Sgl_makequietnan(resultp1);
1532					Sgl_copytoptr(resultp1,dstptr);
1533					return(NOEXCEPTION);
1534				}
1535
1536				/*
1537				 * return infinity
1538				 */
1539				Sgl_setinfinity_exponentmantissa(resultp1);
1540				Sgl_copytoptr(resultp1,dstptr);
1541				return(NOEXCEPTION);
1542			}
1543		}
1544		else {
1545			/*
1546			 * is NaN; signaling or quiet?
1547			 */
1548			if (Sgl_isone_signaling(opnd2)) {
1549				/* trap if INVALIDTRAP enabled */
1550				if (Is_invalidtrap_enabled())
1551					return(OPC_2E_INVALIDEXCEPTION);
1552				/* make NaN quiet */
1553				Set_invalidflag();
1554				Sgl_set_quiet(opnd2);
1555			}
1556			/*
1557			 * is third operand a signaling NaN?
1558			 */
1559			else if (Sgl_is_signalingnan(opnd3)) {
1560			       	/* trap if INVALIDTRAP enabled */
1561			       	if (Is_invalidtrap_enabled())
1562				   		return(OPC_2E_INVALIDEXCEPTION);
1563			       	/* make NaN quiet */
1564			       	Set_invalidflag();
1565			       	Sgl_set_quiet(opnd3);
1566				Sgl_copytoptr(opnd3,dstptr);
1567		       		return(NOEXCEPTION);
1568			}
1569			/*
1570			 * return quiet NaN
1571			 */
1572			Sgl_copytoptr(opnd2,dstptr);
1573			return(NOEXCEPTION);
1574		}
1575	}
1576
1577	/*
1578	 * check third operand for NaN's or infinity
1579	 */
1580	if (Sgl_isinfinity_exponent(opnd3)) {
1581		if (Sgl_iszero_mantissa(opnd3)) {
1582			/* return infinity */
1583			Sgl_copytoptr(opnd3,dstptr);
1584			return(NOEXCEPTION);
1585		} else {
1586			/*
1587			 * is NaN; signaling or quiet?
1588			 */
1589			if (Sgl_isone_signaling(opnd3)) {
1590				/* trap if INVALIDTRAP enabled */
1591				if (Is_invalidtrap_enabled())
1592					return(OPC_2E_INVALIDEXCEPTION);
1593				/* make NaN quiet */
1594				Set_invalidflag();
1595				Sgl_set_quiet(opnd3);
1596			}
1597			/*
1598			 * return quiet NaN
1599 			 */
1600			Sgl_copytoptr(opnd3,dstptr);
1601			return(NOEXCEPTION);
1602		}
1603    	}
1604
1605	/*
1606	 * Generate multiply mantissa
1607	 */
1608	if (Sgl_isnotzero_exponent(opnd1)) {
1609		/* set hidden bit */
1610		Sgl_clear_signexponent_set_hidden(opnd1);
1611	}
1612	else {
1613		/* check for zero */
1614		if (Sgl_iszero_mantissa(opnd1)) {
1615			/*
1616			 * Perform the add opnd3 with zero here.
1617			 */
1618			if (Sgl_iszero_exponentmantissa(opnd3)) {
1619				if (Is_rounding_mode(ROUNDMINUS)) {
1620					Sgl_or_signs(opnd3,resultp1);
1621				} else {
1622					Sgl_and_signs(opnd3,resultp1);
1623				}
1624			}
1625			/*
1626			 * Now let's check for trapped underflow case.
1627			 */
1628			else if (Sgl_iszero_exponent(opnd3) &&
1629			         Is_underflowtrap_enabled()) {
1630                    		/* need to normalize results mantissa */
1631                    		sign_save = Sgl_signextendedsign(opnd3);
1632				result_exponent = 0;
1633                    		Sgl_leftshiftby1(opnd3);
1634                    		Sgl_normalize(opnd3,result_exponent);
1635                    		Sgl_set_sign(opnd3,/*using*/sign_save);
1636                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
1637							unfl);
1638                    		Sgl_copytoptr(opnd3,dstptr);
1639                    		/* inexact = FALSE */
1640                    		return(OPC_2E_UNDERFLOWEXCEPTION);
1641			}
1642			Sgl_copytoptr(opnd3,dstptr);
1643			return(NOEXCEPTION);
1644		}
1645		/* is denormalized, adjust exponent */
1646		Sgl_clear_signexponent(opnd1);
1647		Sgl_leftshiftby1(opnd1);
1648		Sgl_normalize(opnd1,mpy_exponent);
1649	}
1650	/* opnd2 needs to have hidden bit set with msb in hidden bit */
1651	if (Sgl_isnotzero_exponent(opnd2)) {
1652		Sgl_clear_signexponent_set_hidden(opnd2);
1653	}
1654	else {
1655		/* check for zero */
1656		if (Sgl_iszero_mantissa(opnd2)) {
1657			/*
1658			 * Perform the add opnd3 with zero here.
1659			 */
1660			if (Sgl_iszero_exponentmantissa(opnd3)) {
1661				if (Is_rounding_mode(ROUNDMINUS)) {
1662					Sgl_or_signs(opnd3,resultp1);
1663				} else {
1664					Sgl_and_signs(opnd3,resultp1);
1665				}
1666			}
1667			/*
1668			 * Now let's check for trapped underflow case.
1669			 */
1670			else if (Sgl_iszero_exponent(opnd3) &&
1671			    Is_underflowtrap_enabled()) {
1672                    		/* need to normalize results mantissa */
1673                    		sign_save = Sgl_signextendedsign(opnd3);
1674				result_exponent = 0;
1675                    		Sgl_leftshiftby1(opnd3);
1676                    		Sgl_normalize(opnd3,result_exponent);
1677                    		Sgl_set_sign(opnd3,/*using*/sign_save);
1678                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
1679							unfl);
1680                    		Sgl_copytoptr(opnd3,dstptr);
1681                    		/* inexact = FALSE */
1682                    		return(OPC_2E_UNDERFLOWEXCEPTION);
1683			}
1684			Sgl_copytoptr(opnd3,dstptr);
1685			return(NOEXCEPTION);
1686		}
1687		/* is denormalized; want to normalize */
1688		Sgl_clear_signexponent(opnd2);
1689		Sgl_leftshiftby1(opnd2);
1690		Sgl_normalize(opnd2,mpy_exponent);
1691	}
1692
1693	/* Multiply the first two source mantissas together */
1694
1695	/*
1696	 * The intermediate result will be kept in tmpres,
1697	 * which needs enough room for 106 bits of mantissa,
1698	 * so lets call it a Double extended.
1699	 */
1700	Sglext_setzero(tmpresp1,tmpresp2);
1701
1702	/*
1703	 * Four bits at a time are inspected in each loop, and a
1704	 * simple shift and add multiply algorithm is used.
1705	 */
1706	for (count = SGL_P-1; count >= 0; count -= 4) {
1707		Sglext_rightshiftby4(tmpresp1,tmpresp2);
1708		if (Sbit28(opnd1)) {
1709	 		/* Twoword_add should be an ADD followed by 2 ADDC's */
1710			Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1711		}
1712		if (Sbit29(opnd1)) {
1713			Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1714		}
1715		if (Sbit30(opnd1)) {
1716			Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1717		}
1718		if (Sbit31(opnd1)) {
1719			Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1720		}
1721		Sgl_rightshiftby4(opnd1);
1722	}
1723	if (Is_sexthiddenoverflow(tmpresp1)) {
1724		/* result mantissa >= 2 (mantissa overflow) */
1725		mpy_exponent++;
1726		Sglext_rightshiftby4(tmpresp1,tmpresp2);
1727	} else {
1728		Sglext_rightshiftby3(tmpresp1,tmpresp2);
1729	}
1730
1731	/*
1732	 * Restore the sign of the mpy result which was saved in resultp1.
1733	 * The exponent will continue to be kept in mpy_exponent.
1734	 */
1735	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1736
1737	/*
1738	 * No rounding is required, since the result of the multiply
1739	 * is exact in the extended format.
1740	 */
1741
1742	/*
1743	 * Now we are ready to perform the add portion of the operation.
1744	 *
1745	 * The exponents need to be kept as integers for now, since the
1746	 * multiply result might not fit into the exponent field.  We
1747	 * can't overflow or underflow because of this yet, since the
1748	 * add could bring the final result back into range.
1749	 */
1750	add_exponent = Sgl_exponent(opnd3);
1751
1752	/*
1753	 * Check for denormalized or zero add operand.
1754	 */
1755	if (add_exponent == 0) {
1756		/* check for zero */
1757		if (Sgl_iszero_mantissa(opnd3)) {
1758			/* right is zero */
1759			/* Left can't be zero and must be result.
1760			 *
1761			 * The final result is now in tmpres and mpy_exponent,
1762			 * and needs to be rounded and squeezed back into
1763			 * double precision format from double extended.
1764			 */
1765			result_exponent = mpy_exponent;
1766			Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1767			sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1768			goto round;
1769		}
1770
1771		/*
1772		 * Neither are zeroes.
1773		 * Adjust exponent and normalize add operand.
1774		 */
1775		sign_save = Sgl_signextendedsign(opnd3);	/* save sign */
1776		Sgl_clear_signexponent(opnd3);
1777		Sgl_leftshiftby1(opnd3);
1778		Sgl_normalize(opnd3,add_exponent);
1779		Sgl_set_sign(opnd3,sign_save);		/* restore sign */
1780	} else {
1781		Sgl_clear_exponent_set_hidden(opnd3);
1782	}
1783	/*
1784	 * Copy opnd3 to the double extended variable called right.
1785	 */
1786	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1787
1788	/*
1789	 * A zero "save" helps discover equal operands (for later),
1790	 * and is used in swapping operands (if needed).
1791	 */
1792	Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1793
1794	/*
1795	 * Compare magnitude of operands.
1796	 */
1797	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1798	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1799	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1800	    Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1801		/*
1802		 * Set the left operand to the larger one by XOR swap.
1803		 * First finish the first word "save".
1804		 */
1805		Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1806		Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1807		Sglext_swap_lower(tmpresp2,rightp2);
1808		/* also setup exponents used in rest of routine */
1809		diff_exponent = add_exponent - mpy_exponent;
1810		result_exponent = add_exponent;
1811	} else {
1812		/* also setup exponents used in rest of routine */
1813		diff_exponent = mpy_exponent - add_exponent;
1814		result_exponent = mpy_exponent;
1815	}
1816	/* Invariant: left is not smaller than right. */
1817
1818	/*
1819	 * Special case alignment of operands that would force alignment
1820	 * beyond the extent of the extension.  A further optimization
1821	 * could special case this but only reduces the path length for
1822	 * this infrequent case.
1823	 */
1824	if (diff_exponent > SGLEXT_THRESHOLD) {
1825		diff_exponent = SGLEXT_THRESHOLD;
1826	}
1827
1828	/* Align right operand by shifting it to the right */
1829	Sglext_clear_sign(rightp1);
1830	Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1831
1832	/* Treat sum and difference of the operands separately. */
1833	if ((int)save < 0) {
1834		/*
1835		 * Difference of the two operands.  Overflow can occur if the
1836		 * multiply overflowed.  A borrow can occur out of the hidden
1837		 * bit and force a post normalization phase.
1838		 */
1839		Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1840			resultp1,resultp2);
1841		sign_save = Sgl_signextendedsign(resultp1);
1842		if (Sgl_iszero_hidden(resultp1)) {
1843			/* Handle normalization */
1844		/* A straight foward algorithm would now shift the
1845		 * result and extension left until the hidden bit
1846		 * becomes one.  Not all of the extension bits need
1847		 * participate in the shift.  Only the two most
1848		 * significant bits (round and guard) are needed.
1849		 * If only a single shift is needed then the guard
1850		 * bit becomes a significant low order bit and the
1851		 * extension must participate in the rounding.
1852		 * If more than a single shift is needed, then all
1853		 * bits to the right of the guard bit are zeros,
1854		 * and the guard bit may or may not be zero. */
1855			Sglext_leftshiftby1(resultp1,resultp2);
1856
1857			/* Need to check for a zero result.  The sign and
1858			 * exponent fields have already been zeroed.  The more
1859			 * efficient test of the full object can be used.
1860			 */
1861			 if (Sglext_iszero(resultp1,resultp2)) {
1862				/* Must have been "x-x" or "x+(-x)". */
1863				if (Is_rounding_mode(ROUNDMINUS))
1864					Sgl_setone_sign(resultp1);
1865				Sgl_copytoptr(resultp1,dstptr);
1866				return(NOEXCEPTION);
1867			}
1868			result_exponent--;
1869
1870			/* Look to see if normalization is finished. */
1871			if (Sgl_isone_hidden(resultp1)) {
1872				/* No further normalization is needed */
1873				goto round;
1874			}
1875
1876			/* Discover first one bit to determine shift amount.
1877			 * Use a modified binary search.  We have already
1878			 * shifted the result one position right and still
1879			 * not found a one so the remainder of the extension
1880			 * must be zero and simplifies rounding. */
1881			/* Scan bytes */
1882			while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1883				Sglext_leftshiftby8(resultp1,resultp2);
1884				result_exponent -= 8;
1885			}
1886			/* Now narrow it down to the nibble */
1887			if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1888				/* The lower nibble contains the
1889				 * normalizing one */
1890				Sglext_leftshiftby4(resultp1,resultp2);
1891				result_exponent -= 4;
1892			}
1893			/* Select case where first bit is set (already
1894			 * normalized) otherwise select the proper shift. */
1895			jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1896			if (jumpsize <= 7) switch(jumpsize) {
1897			case 1:
1898				Sglext_leftshiftby3(resultp1,resultp2);
1899				result_exponent -= 3;
1900				break;
1901			case 2:
1902			case 3:
1903				Sglext_leftshiftby2(resultp1,resultp2);
1904				result_exponent -= 2;
1905				break;
1906			case 4:
1907			case 5:
1908			case 6:
1909			case 7:
1910				Sglext_leftshiftby1(resultp1,resultp2);
1911				result_exponent -= 1;
1912				break;
1913			}
1914		} /* end if (hidden...)... */
1915	/* Fall through and round */
1916	} /* end if (save < 0)... */
1917	else {
1918		/* Add magnitudes */
1919		Sglext_addition(tmpresp1,tmpresp2,
1920			rightp1,rightp2, /*to*/resultp1,resultp2);
1921		sign_save = Sgl_signextendedsign(resultp1);
1922		if (Sgl_isone_hiddenoverflow(resultp1)) {
1923	    		/* Prenormalization required. */
1924	    		Sglext_arithrightshiftby1(resultp1,resultp2);
1925	    		result_exponent++;
1926		} /* end if hiddenoverflow... */
1927	} /* end else ...add magnitudes... */
1928
1929	/* Round the result.  If the extension and lower two words are
1930	 * all zeros, then the result is exact.  Otherwise round in the
1931	 * correct direction.  Underflow is possible. If a postnormalization
1932	 * is necessary, then the mantissa is all zeros so no shift is needed.
1933	 */
1934  round:
1935	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1936		Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1937	}
1938	Sgl_set_sign(resultp1,/*using*/sign_save);
1939	if (Sglext_isnotzero_mantissap2(resultp2)) {
1940		inexact = TRUE;
1941		switch(Rounding_mode()) {
1942		case ROUNDNEAREST: /* The default. */
1943			if (Sglext_isone_highp2(resultp2)) {
1944				/* at least 1/2 ulp */
1945				if (Sglext_isnotzero_low31p2(resultp2) ||
1946				    Sglext_isone_lowp1(resultp1)) {
1947					/* either exactly half way and odd or
1948					 * more than 1/2ulp */
1949					Sgl_increment(resultp1);
1950				}
1951			}
1952	    		break;
1953
1954		case ROUNDPLUS:
1955	    		if (Sgl_iszero_sign(resultp1)) {
1956				/* Round up positive results */
1957				Sgl_increment(resultp1);
1958			}
1959			break;
1960
1961		case ROUNDMINUS:
1962	    		if (Sgl_isone_sign(resultp1)) {
1963				/* Round down negative results */
1964				Sgl_increment(resultp1);
1965			}
1966
1967		case ROUNDZERO:;
1968			/* truncate is simple */
1969		} /* end switch... */
1970		if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1971	}
1972	if (result_exponent >= SGL_INFINITY_EXPONENT) {
1973		/* Overflow */
1974		if (Is_overflowtrap_enabled()) {
1975                        /*
1976                         * Adjust bias of result
1977                         */
1978                        Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1979                        Sgl_copytoptr(resultp1,dstptr);
1980                        if (inexact)
1981                            if (Is_inexacttrap_enabled())
1982                                return (OPC_2E_OVERFLOWEXCEPTION |
1983					OPC_2E_INEXACTEXCEPTION);
1984                            else Set_inexactflag();
1985                        return (OPC_2E_OVERFLOWEXCEPTION);
1986		}
1987		inexact = TRUE;
1988		Set_overflowflag();
1989		Sgl_setoverflow(resultp1);
1990	} else if (result_exponent <= 0) {	/* underflow case */
1991		if (Is_underflowtrap_enabled()) {
1992                        /*
1993                         * Adjust bias of result
1994                         */
1995                	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1996			Sgl_copytoptr(resultp1,dstptr);
1997                        if (inexact)
1998                            if (Is_inexacttrap_enabled())
1999                                return (OPC_2E_UNDERFLOWEXCEPTION |
2000					OPC_2E_INEXACTEXCEPTION);
2001                            else Set_inexactflag();
2002	    		return(OPC_2E_UNDERFLOWEXCEPTION);
2003		}
2004		else if (inexact && is_tiny) Set_underflowflag();
2005	}
2006	else Sgl_set_exponent(resultp1,result_exponent);
2007	Sgl_copytoptr(resultp1,dstptr);
2008	if (inexact)
2009		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2010		else Set_inexactflag();
2011    	return(NOEXCEPTION);
2012}
2013
2014/*
2015 *  Single Floating-point Multiply Negate Fused Add
2016 */
2017
2018sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2019
2020sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2021unsigned int *status;
2022{
2023	unsigned int opnd1, opnd2, opnd3;
2024	register unsigned int tmpresp1, tmpresp2;
2025	unsigned int rightp1, rightp2;
2026	unsigned int resultp1, resultp2 = 0;
2027	register int mpy_exponent, add_exponent, count;
2028	boolean inexact = FALSE, is_tiny = FALSE;
2029
2030	unsigned int signlessleft1, signlessright1, save;
2031	register int result_exponent, diff_exponent;
2032	int sign_save, jumpsize;
2033
2034	Sgl_copyfromptr(src1ptr,opnd1);
2035	Sgl_copyfromptr(src2ptr,opnd2);
2036	Sgl_copyfromptr(src3ptr,opnd3);
2037
2038	/*
2039	 * set sign bit of result of multiply
2040	 */
2041	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
2042		Sgl_setzero(resultp1);
2043	else
2044		Sgl_setnegativezero(resultp1);
2045
2046	/*
2047	 * Generate multiply exponent
2048	 */
2049	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2050
2051	/*
2052	 * check first operand for NaN's or infinity
2053	 */
2054	if (Sgl_isinfinity_exponent(opnd1)) {
2055		if (Sgl_iszero_mantissa(opnd1)) {
2056			if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2057				if (Sgl_iszero_exponentmantissa(opnd2)) {
2058					/*
2059					 * invalid since operands are infinity
2060					 * and zero
2061					 */
2062					if (Is_invalidtrap_enabled())
2063						return(OPC_2E_INVALIDEXCEPTION);
2064					Set_invalidflag();
2065					Sgl_makequietnan(resultp1);
2066					Sgl_copytoptr(resultp1,dstptr);
2067					return(NOEXCEPTION);
2068				}
2069				/*
2070				 * Check third operand for infinity with a
2071				 *  sign opposite of the multiply result
2072				 */
2073				if (Sgl_isinfinity(opnd3) &&
2074				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2075					/*
2076					 * invalid since attempting a magnitude
2077					 * subtraction of infinities
2078					 */
2079					if (Is_invalidtrap_enabled())
2080						return(OPC_2E_INVALIDEXCEPTION);
2081					Set_invalidflag();
2082					Sgl_makequietnan(resultp1);
2083					Sgl_copytoptr(resultp1,dstptr);
2084					return(NOEXCEPTION);
2085				}
2086
2087				/*
2088			 	 * return infinity
2089			 	 */
2090				Sgl_setinfinity_exponentmantissa(resultp1);
2091				Sgl_copytoptr(resultp1,dstptr);
2092				return(NOEXCEPTION);
2093			}
2094		}
2095		else {
2096			/*
2097		 	 * is NaN; signaling or quiet?
2098		 	 */
2099			if (Sgl_isone_signaling(opnd1)) {
2100				/* trap if INVALIDTRAP enabled */
2101				if (Is_invalidtrap_enabled())
2102			    		return(OPC_2E_INVALIDEXCEPTION);
2103				/* make NaN quiet */
2104				Set_invalidflag();
2105				Sgl_set_quiet(opnd1);
2106			}
2107			/*
2108			 * is second operand a signaling NaN?
2109			 */
2110			else if (Sgl_is_signalingnan(opnd2)) {
2111				/* trap if INVALIDTRAP enabled */
2112				if (Is_invalidtrap_enabled())
2113			    		return(OPC_2E_INVALIDEXCEPTION);
2114				/* make NaN quiet */
2115				Set_invalidflag();
2116				Sgl_set_quiet(opnd2);
2117				Sgl_copytoptr(opnd2,dstptr);
2118				return(NOEXCEPTION);
2119			}
2120			/*
2121			 * is third operand a signaling NaN?
2122			 */
2123			else if (Sgl_is_signalingnan(opnd3)) {
2124				/* trap if INVALIDTRAP enabled */
2125				if (Is_invalidtrap_enabled())
2126			    		return(OPC_2E_INVALIDEXCEPTION);
2127				/* make NaN quiet */
2128				Set_invalidflag();
2129				Sgl_set_quiet(opnd3);
2130				Sgl_copytoptr(opnd3,dstptr);
2131				return(NOEXCEPTION);
2132			}
2133			/*
2134		 	 * return quiet NaN
2135		 	 */
2136			Sgl_copytoptr(opnd1,dstptr);
2137			return(NOEXCEPTION);
2138		}
2139	}
2140
2141	/*
2142	 * check second operand for NaN's or infinity
2143	 */
2144	if (Sgl_isinfinity_exponent(opnd2)) {
2145		if (Sgl_iszero_mantissa(opnd2)) {
2146			if (Sgl_isnotnan(opnd3)) {
2147				if (Sgl_iszero_exponentmantissa(opnd1)) {
2148					/*
2149					 * invalid since multiply operands are
2150					 * zero & infinity
2151					 */
2152					if (Is_invalidtrap_enabled())
2153						return(OPC_2E_INVALIDEXCEPTION);
2154					Set_invalidflag();
2155					Sgl_makequietnan(opnd2);
2156					Sgl_copytoptr(opnd2,dstptr);
2157					return(NOEXCEPTION);
2158				}
2159
2160				/*
2161				 * Check third operand for infinity with a
2162				 *  sign opposite of the multiply result
2163				 */
2164				if (Sgl_isinfinity(opnd3) &&
2165				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2166					/*
2167					 * invalid since attempting a magnitude
2168					 * subtraction of infinities
2169					 */
2170					if (Is_invalidtrap_enabled())
2171				       		return(OPC_2E_INVALIDEXCEPTION);
2172				       	Set_invalidflag();
2173				       	Sgl_makequietnan(resultp1);
2174					Sgl_copytoptr(resultp1,dstptr);
2175					return(NOEXCEPTION);
2176				}
2177
2178				/*
2179				 * return infinity
2180				 */
2181				Sgl_setinfinity_exponentmantissa(resultp1);
2182				Sgl_copytoptr(resultp1,dstptr);
2183				return(NOEXCEPTION);
2184			}
2185		}
2186		else {
2187			/*
2188			 * is NaN; signaling or quiet?
2189			 */
2190			if (Sgl_isone_signaling(opnd2)) {
2191				/* trap if INVALIDTRAP enabled */
2192				if (Is_invalidtrap_enabled())
2193					return(OPC_2E_INVALIDEXCEPTION);
2194				/* make NaN quiet */
2195				Set_invalidflag();
2196				Sgl_set_quiet(opnd2);
2197			}
2198			/*
2199			 * is third operand a signaling NaN?
2200			 */
2201			else if (Sgl_is_signalingnan(opnd3)) {
2202			       	/* trap if INVALIDTRAP enabled */
2203			       	if (Is_invalidtrap_enabled())
2204				   		return(OPC_2E_INVALIDEXCEPTION);
2205			       	/* make NaN quiet */
2206			       	Set_invalidflag();
2207			       	Sgl_set_quiet(opnd3);
2208				Sgl_copytoptr(opnd3,dstptr);
2209		       		return(NOEXCEPTION);
2210			}
2211			/*
2212			 * return quiet NaN
2213			 */
2214			Sgl_copytoptr(opnd2,dstptr);
2215			return(NOEXCEPTION);
2216		}
2217	}
2218
2219	/*
2220	 * check third operand for NaN's or infinity
2221	 */
2222	if (Sgl_isinfinity_exponent(opnd3)) {
2223		if (Sgl_iszero_mantissa(opnd3)) {
2224			/* return infinity */
2225			Sgl_copytoptr(opnd3,dstptr);
2226			return(NOEXCEPTION);
2227		} else {
2228			/*
2229			 * is NaN; signaling or quiet?
2230			 */
2231			if (Sgl_isone_signaling(opnd3)) {
2232				/* trap if INVALIDTRAP enabled */
2233				if (Is_invalidtrap_enabled())
2234					return(OPC_2E_INVALIDEXCEPTION);
2235				/* make NaN quiet */
2236				Set_invalidflag();
2237				Sgl_set_quiet(opnd3);
2238			}
2239			/*
2240			 * return quiet NaN
2241 			 */
2242			Sgl_copytoptr(opnd3,dstptr);
2243			return(NOEXCEPTION);
2244		}
2245    	}
2246
2247	/*
2248	 * Generate multiply mantissa
2249	 */
2250	if (Sgl_isnotzero_exponent(opnd1)) {
2251		/* set hidden bit */
2252		Sgl_clear_signexponent_set_hidden(opnd1);
2253	}
2254	else {
2255		/* check for zero */
2256		if (Sgl_iszero_mantissa(opnd1)) {
2257			/*
2258			 * Perform the add opnd3 with zero here.
2259			 */
2260			if (Sgl_iszero_exponentmantissa(opnd3)) {
2261				if (Is_rounding_mode(ROUNDMINUS)) {
2262					Sgl_or_signs(opnd3,resultp1);
2263				} else {
2264					Sgl_and_signs(opnd3,resultp1);
2265				}
2266			}
2267			/*
2268			 * Now let's check for trapped underflow case.
2269			 */
2270			else if (Sgl_iszero_exponent(opnd3) &&
2271			         Is_underflowtrap_enabled()) {
2272                    		/* need to normalize results mantissa */
2273                    		sign_save = Sgl_signextendedsign(opnd3);
2274				result_exponent = 0;
2275                    		Sgl_leftshiftby1(opnd3);
2276                    		Sgl_normalize(opnd3,result_exponent);
2277                    		Sgl_set_sign(opnd3,/*using*/sign_save);
2278                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
2279							unfl);
2280                    		Sgl_copytoptr(opnd3,dstptr);
2281                    		/* inexact = FALSE */
2282                    		return(OPC_2E_UNDERFLOWEXCEPTION);
2283			}
2284			Sgl_copytoptr(opnd3,dstptr);
2285			return(NOEXCEPTION);
2286		}
2287		/* is denormalized, adjust exponent */
2288		Sgl_clear_signexponent(opnd1);
2289		Sgl_leftshiftby1(opnd1);
2290		Sgl_normalize(opnd1,mpy_exponent);
2291	}
2292	/* opnd2 needs to have hidden bit set with msb in hidden bit */
2293	if (Sgl_isnotzero_exponent(opnd2)) {
2294		Sgl_clear_signexponent_set_hidden(opnd2);
2295	}
2296	else {
2297		/* check for zero */
2298		if (Sgl_iszero_mantissa(opnd2)) {
2299			/*
2300			 * Perform the add opnd3 with zero here.
2301			 */
2302			if (Sgl_iszero_exponentmantissa(opnd3)) {
2303				if (Is_rounding_mode(ROUNDMINUS)) {
2304					Sgl_or_signs(opnd3,resultp1);
2305				} else {
2306					Sgl_and_signs(opnd3,resultp1);
2307				}
2308			}
2309			/*
2310			 * Now let's check for trapped underflow case.
2311			 */
2312			else if (Sgl_iszero_exponent(opnd3) &&
2313			    Is_underflowtrap_enabled()) {
2314                    		/* need to normalize results mantissa */
2315                    		sign_save = Sgl_signextendedsign(opnd3);
2316				result_exponent = 0;
2317                    		Sgl_leftshiftby1(opnd3);
2318                    		Sgl_normalize(opnd3,result_exponent);
2319                    		Sgl_set_sign(opnd3,/*using*/sign_save);
2320                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
2321							unfl);
2322                    		Sgl_copytoptr(opnd3,dstptr);
2323                    		/* inexact = FALSE */
2324                    		return(OPC_2E_UNDERFLOWEXCEPTION);
2325			}
2326			Sgl_copytoptr(opnd3,dstptr);
2327			return(NOEXCEPTION);
2328		}
2329		/* is denormalized; want to normalize */
2330		Sgl_clear_signexponent(opnd2);
2331		Sgl_leftshiftby1(opnd2);
2332		Sgl_normalize(opnd2,mpy_exponent);
2333	}
2334
2335	/* Multiply the first two source mantissas together */
2336
2337	/*
2338	 * The intermediate result will be kept in tmpres,
2339	 * which needs enough room for 106 bits of mantissa,
2340	 * so lets call it a Double extended.
2341	 */
2342	Sglext_setzero(tmpresp1,tmpresp2);
2343
2344	/*
2345	 * Four bits at a time are inspected in each loop, and a
2346	 * simple shift and add multiply algorithm is used.
2347	 */
2348	for (count = SGL_P-1; count >= 0; count -= 4) {
2349		Sglext_rightshiftby4(tmpresp1,tmpresp2);
2350		if (Sbit28(opnd1)) {
2351	 		/* Twoword_add should be an ADD followed by 2 ADDC's */
2352			Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2353		}
2354		if (Sbit29(opnd1)) {
2355			Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2356		}
2357		if (Sbit30(opnd1)) {
2358			Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2359		}
2360		if (Sbit31(opnd1)) {
2361			Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2362		}
2363		Sgl_rightshiftby4(opnd1);
2364	}
2365	if (Is_sexthiddenoverflow(tmpresp1)) {
2366		/* result mantissa >= 2 (mantissa overflow) */
2367		mpy_exponent++;
2368		Sglext_rightshiftby4(tmpresp1,tmpresp2);
2369	} else {
2370		Sglext_rightshiftby3(tmpresp1,tmpresp2);
2371	}
2372
2373	/*
2374	 * Restore the sign of the mpy result which was saved in resultp1.
2375	 * The exponent will continue to be kept in mpy_exponent.
2376	 */
2377	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2378
2379	/*
2380	 * No rounding is required, since the result of the multiply
2381	 * is exact in the extended format.
2382	 */
2383
2384	/*
2385	 * Now we are ready to perform the add portion of the operation.
2386	 *
2387	 * The exponents need to be kept as integers for now, since the
2388	 * multiply result might not fit into the exponent field.  We
2389	 * can't overflow or underflow because of this yet, since the
2390	 * add could bring the final result back into range.
2391	 */
2392	add_exponent = Sgl_exponent(opnd3);
2393
2394	/*
2395	 * Check for denormalized or zero add operand.
2396	 */
2397	if (add_exponent == 0) {
2398		/* check for zero */
2399		if (Sgl_iszero_mantissa(opnd3)) {
2400			/* right is zero */
2401			/* Left can't be zero and must be result.
2402			 *
2403			 * The final result is now in tmpres and mpy_exponent,
2404			 * and needs to be rounded and squeezed back into
2405			 * double precision format from double extended.
2406			 */
2407			result_exponent = mpy_exponent;
2408			Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2409			sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2410			goto round;
2411		}
2412
2413		/*
2414		 * Neither are zeroes.
2415		 * Adjust exponent and normalize add operand.
2416		 */
2417		sign_save = Sgl_signextendedsign(opnd3);	/* save sign */
2418		Sgl_clear_signexponent(opnd3);
2419		Sgl_leftshiftby1(opnd3);
2420		Sgl_normalize(opnd3,add_exponent);
2421		Sgl_set_sign(opnd3,sign_save);		/* restore sign */
2422	} else {
2423		Sgl_clear_exponent_set_hidden(opnd3);
2424	}
2425	/*
2426	 * Copy opnd3 to the double extended variable called right.
2427	 */
2428	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2429
2430	/*
2431	 * A zero "save" helps discover equal operands (for later),
2432	 * and is used in swapping operands (if needed).
2433	 */
2434	Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2435
2436	/*
2437	 * Compare magnitude of operands.
2438	 */
2439	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2440	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2441	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2442	    Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2443		/*
2444		 * Set the left operand to the larger one by XOR swap.
2445		 * First finish the first word "save".
2446		 */
2447		Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2448		Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2449		Sglext_swap_lower(tmpresp2,rightp2);
2450		/* also setup exponents used in rest of routine */
2451		diff_exponent = add_exponent - mpy_exponent;
2452		result_exponent = add_exponent;
2453	} else {
2454		/* also setup exponents used in rest of routine */
2455		diff_exponent = mpy_exponent - add_exponent;
2456		result_exponent = mpy_exponent;
2457	}
2458	/* Invariant: left is not smaller than right. */
2459
2460	/*
2461	 * Special case alignment of operands that would force alignment
2462	 * beyond the extent of the extension.  A further optimization
2463	 * could special case this but only reduces the path length for
2464	 * this infrequent case.
2465	 */
2466	if (diff_exponent > SGLEXT_THRESHOLD) {
2467		diff_exponent = SGLEXT_THRESHOLD;
2468	}
2469
2470	/* Align right operand by shifting it to the right */
2471	Sglext_clear_sign(rightp1);
2472	Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2473
2474	/* Treat sum and difference of the operands separately. */
2475	if ((int)save < 0) {
2476		/*
2477		 * Difference of the two operands.  Overflow can occur if the
2478		 * multiply overflowed.  A borrow can occur out of the hidden
2479		 * bit and force a post normalization phase.
2480		 */
2481		Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2482			resultp1,resultp2);
2483		sign_save = Sgl_signextendedsign(resultp1);
2484		if (Sgl_iszero_hidden(resultp1)) {
2485			/* Handle normalization */
2486		/* A straight foward algorithm would now shift the
2487		 * result and extension left until the hidden bit
2488		 * becomes one.  Not all of the extension bits need
2489		 * participate in the shift.  Only the two most
2490		 * significant bits (round and guard) are needed.
2491		 * If only a single shift is needed then the guard
2492		 * bit becomes a significant low order bit and the
2493		 * extension must participate in the rounding.
2494		 * If more than a single shift is needed, then all
2495		 * bits to the right of the guard bit are zeros,
2496		 * and the guard bit may or may not be zero. */
2497			Sglext_leftshiftby1(resultp1,resultp2);
2498
2499			/* Need to check for a zero result.  The sign and
2500			 * exponent fields have already been zeroed.  The more
2501			 * efficient test of the full object can be used.
2502			 */
2503			 if (Sglext_iszero(resultp1,resultp2)) {
2504				/* Must have been "x-x" or "x+(-x)". */
2505				if (Is_rounding_mode(ROUNDMINUS))
2506					Sgl_setone_sign(resultp1);
2507				Sgl_copytoptr(resultp1,dstptr);
2508				return(NOEXCEPTION);
2509			}
2510			result_exponent--;
2511
2512			/* Look to see if normalization is finished. */
2513			if (Sgl_isone_hidden(resultp1)) {
2514				/* No further normalization is needed */
2515				goto round;
2516			}
2517
2518			/* Discover first one bit to determine shift amount.
2519			 * Use a modified binary search.  We have already
2520			 * shifted the result one position right and still
2521			 * not found a one so the remainder of the extension
2522			 * must be zero and simplifies rounding. */
2523			/* Scan bytes */
2524			while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2525				Sglext_leftshiftby8(resultp1,resultp2);
2526				result_exponent -= 8;
2527			}
2528			/* Now narrow it down to the nibble */
2529			if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2530				/* The lower nibble contains the
2531				 * normalizing one */
2532				Sglext_leftshiftby4(resultp1,resultp2);
2533				result_exponent -= 4;
2534			}
2535			/* Select case where first bit is set (already
2536			 * normalized) otherwise select the proper shift. */
2537			jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2538			if (jumpsize <= 7) switch(jumpsize) {
2539			case 1:
2540				Sglext_leftshiftby3(resultp1,resultp2);
2541				result_exponent -= 3;
2542				break;
2543			case 2:
2544			case 3:
2545				Sglext_leftshiftby2(resultp1,resultp2);
2546				result_exponent -= 2;
2547				break;
2548			case 4:
2549			case 5:
2550			case 6:
2551			case 7:
2552				Sglext_leftshiftby1(resultp1,resultp2);
2553				result_exponent -= 1;
2554				break;
2555			}
2556		} /* end if (hidden...)... */
2557	/* Fall through and round */
2558	} /* end if (save < 0)... */
2559	else {
2560		/* Add magnitudes */
2561		Sglext_addition(tmpresp1,tmpresp2,
2562			rightp1,rightp2, /*to*/resultp1,resultp2);
2563		sign_save = Sgl_signextendedsign(resultp1);
2564		if (Sgl_isone_hiddenoverflow(resultp1)) {
2565	    		/* Prenormalization required. */
2566	    		Sglext_arithrightshiftby1(resultp1,resultp2);
2567	    		result_exponent++;
2568		} /* end if hiddenoverflow... */
2569	} /* end else ...add magnitudes... */
2570
2571	/* Round the result.  If the extension and lower two words are
2572	 * all zeros, then the result is exact.  Otherwise round in the
2573	 * correct direction.  Underflow is possible. If a postnormalization
2574	 * is necessary, then the mantissa is all zeros so no shift is needed.
2575	 */
2576  round:
2577	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2578		Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2579	}
2580	Sgl_set_sign(resultp1,/*using*/sign_save);
2581	if (Sglext_isnotzero_mantissap2(resultp2)) {
2582		inexact = TRUE;
2583		switch(Rounding_mode()) {
2584		case ROUNDNEAREST: /* The default. */
2585			if (Sglext_isone_highp2(resultp2)) {
2586				/* at least 1/2 ulp */
2587				if (Sglext_isnotzero_low31p2(resultp2) ||
2588				    Sglext_isone_lowp1(resultp1)) {
2589					/* either exactly half way and odd or
2590					 * more than 1/2ulp */
2591					Sgl_increment(resultp1);
2592				}
2593			}
2594	    		break;
2595
2596		case ROUNDPLUS:
2597	    		if (Sgl_iszero_sign(resultp1)) {
2598				/* Round up positive results */
2599				Sgl_increment(resultp1);
2600			}
2601			break;
2602
2603		case ROUNDMINUS:
2604	    		if (Sgl_isone_sign(resultp1)) {
2605				/* Round down negative results */
2606				Sgl_increment(resultp1);
2607			}
2608
2609		case ROUNDZERO:;
2610			/* truncate is simple */
2611		} /* end switch... */
2612		if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2613	}
2614	if (result_exponent >= SGL_INFINITY_EXPONENT) {
2615		/* Overflow */
2616		if (Is_overflowtrap_enabled()) {
2617                        /*
2618                         * Adjust bias of result
2619                         */
2620                        Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2621                        Sgl_copytoptr(resultp1,dstptr);
2622                        if (inexact)
2623                            if (Is_inexacttrap_enabled())
2624                                return (OPC_2E_OVERFLOWEXCEPTION |
2625					OPC_2E_INEXACTEXCEPTION);
2626                            else Set_inexactflag();
2627                        return (OPC_2E_OVERFLOWEXCEPTION);
2628		}
2629		inexact = TRUE;
2630		Set_overflowflag();
2631		Sgl_setoverflow(resultp1);
2632	} else if (result_exponent <= 0) {	/* underflow case */
2633		if (Is_underflowtrap_enabled()) {
2634                        /*
2635                         * Adjust bias of result
2636                         */
2637                	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2638			Sgl_copytoptr(resultp1,dstptr);
2639                        if (inexact)
2640                            if (Is_inexacttrap_enabled())
2641                                return (OPC_2E_UNDERFLOWEXCEPTION |
2642					OPC_2E_INEXACTEXCEPTION);
2643                            else Set_inexactflag();
2644	    		return(OPC_2E_UNDERFLOWEXCEPTION);
2645		}
2646		else if (inexact && is_tiny) Set_underflowflag();
2647	}
2648	else Sgl_set_exponent(resultp1,result_exponent);
2649	Sgl_copytoptr(resultp1,dstptr);
2650	if (inexact)
2651		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2652		else Set_inexactflag();
2653    	return(NOEXCEPTION);
2654}
2655