! ! Copyright 2005 Sun Microsystems, Inc. All rights reserved. ! Use is subject to license terms. ! ! CDDL HEADER START ! ! The contents of this file are subject to the terms of the ! Common Development and Distribution License, Version 1.0 only ! (the "License"). You may not use this file except in compliance ! with the License. ! ! You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE ! or http://www.opensolaris.org/os/licensing. ! See the License for the specific language governing permissions ! and limitations under the License. ! ! When distributing Covered Code, include this CDDL HEADER in each ! file and include the License file at usr/src/OPENSOLARIS.LICENSE. ! If applicable, add the following below this CDDL HEADER, with the ! fields enclosed by brackets "[]" replaced with your own identifying ! information: Portions Copyright [yyyy] [name of copyright owner] ! ! CDDL HEADER END ! .ident "%Z%%M% %I% %E% SMI" ! /* ! * This file contains __quad_mag_add and __quad_mag_sub, the core ! * of the quad precision add and subtract operations. ! */ ! SPARC V9 version hand-coded in assembly to use 64-bit integer registers .file "__quad_mag64.s" #include ! union longdouble { ! struct { ! unsigned int msw; ! unsigned int frac2; ! unsigned int frac3; ! unsigned int frac4; ! } l; ! struct { ! unsigned long msll; ! unsigned long frac; ! } ll; ! long double d; ! }; ! ! /* ! * __quad_mag_add(x, y, z, fsr) ! * ! * Sets *z = *x + *y, rounded according to the rounding mode in *fsr, ! * and updates the current exceptions in *fsr. This routine assumes ! * *x and *y are finite, with the same sign (i.e., an addition of ! * magnitudes), |*x| >= |*y|, and *z already has its sign bit set. ! */ ! void ! __quad_mag_add(const union longdouble *x, const union longdouble *y, ! union longdouble *z, unsigned int *fsr) ! { ! unsigned long lx, ly, frac, sticky; ! unsigned int ex, ey, round, rm; ! int e, uflo; ! ! /* get the leading significand double-words and exponents */ ! ex = (x->ll.msll >> 48) & 0x7fff; ! lx = x->ll.msll & ~0xffff000000000000ul; ! if (ex == 0) ! ex = 1; ! else ! lx |= 0x0001000000000000ul; ! ! ey = (y->ll.msll >> 48) & 0x7fff; ! ly = y->ll.msll & ~0xffff000000000000ul; ! if (ey == 0) ! ey = 1; ! else ! ly |= 0x0001000000000000ul; ! ! /* prenormalize y */ ! e = (int) ex - (int) ey; ! round = sticky = 0; ! if (e >= 114) { ! frac = x->ll.frac; ! sticky = ly | y->ll.frac; ! } else { ! frac = y->ll.frac; ! if (e >= 64) { ! sticky = frac & 0x7ffffffffffffffful; ! round = frac >> 63; ! frac = ly; ! ly = 0; ! e -= 64; ! } ! if (e) { ! sticky |= round | (frac & ((1ul << (e - 1)) - 1)); ! round = (frac >> (e - 1)) & 1; ! frac = (frac >> e) | (ly << (64 - e)); ! ly >>= e; ! } ! ! /* add, propagating carries */ ! frac += x->ll.frac; ! lx += ly; ! if (frac < x->ll.frac) ! lx++; ! ! /* postnormalize */ ! if (lx >= 0x0002000000000000ul) { ! sticky |= round; ! round = frac & 1; ! frac = (frac >> 1) | (lx << 63); ! lx >>= 1; ! ex++; ! } ! } ! ! /* keep track of whether the result before rounding is tiny */ ! uflo = (lx < 0x0001000000000000ul); ! ! /* get the rounding mode, fudging directed rounding modes ! as though the result were positive */ ! rm = *fsr >> 30; ! if (z->l.msw) ! rm ^= (rm >> 1); ! ! /* see if we need to round */ ! if (round | sticky) { ! *fsr |= FSR_NXC; ! ! /* round up if necessary */ ! if (rm == FSR_RP || (rm == FSR_RN && round && ! (sticky || (frac & 1)))) { ! if (++frac == 0) ! if (++lx >= 0x0002000000000000ul) { ! lx >>= 1; ! ex++; ! } ! } ! } ! ! /* check for overflow */ ! if (ex >= 0x7fff) { ! /* store the default overflowed result */ ! *fsr |= FSR_OFC | FSR_NXC; ! if (rm == FSR_RN || rm == FSR_RP) { ! z->l.msw |= 0x7fff0000; ! z->l.frac2 = 0; ! z->ll.frac = 0; ! } else { ! z->l.msw |= 0x7ffeffff; ! z->l.frac2 = 0xffffffff; ! z->ll.frac = 0xfffffffffffffffful; ! } ! } else { ! /* store the result */ ! if (lx >= 0x0001000000000000ul) ! z->l.msw |= (ex << 16); ! z->l.msw |= (lx >> 32) & 0xffff; ! z->l.frac2 = (lx & 0xffffffff); ! z->ll.frac = frac; ! ! /* if the pre-rounded result was tiny and underflow trapping ! is enabled, simulate underflow */ ! if (uflo && (*fsr & FSR_UFM)) ! *fsr |= FSR_UFC; ! } ! } ENTRY(__quad_mag_add) save %sp,-SA(MINFRAME),%sp sethi %hi(0xffff0000),%g1 sllx %g1,32,%g1 ! g1 = 0xffff000000000000 sethi %hi(0x7fff),%l7 or %l7,%lo(0x7fff),%l7 ! l7 = 0x7fff ldx [%i0],%o0 srlx %o0,48,%l0 andcc %l0,%l7,%l0 ! l0 = ex beq,pn %icc,1f andn %o0,%g1,%o0 ! o0 = lx ba,pt %icc,2f sub %o0,%g1,%o0 1: mov 1,%l0 2: ldx [%i1],%o1 srlx %o1,48,%l1 andcc %l1,%l7,%l1 ! l1 = ey beq,pn %icc,1f andn %o1,%g1,%o1 ! o1 = ly ba,pt %icc,2f sub %o1,%g1,%o1 1: mov 1,%l1 2: sub %l0,%l1,%l1 ! l1 = e = ex - ey cmp %l1,114 ! see if we need to prenormalize bge,pn %icc,1f mov 0,%l6 ! l6 = round mov 0,%o7 ! o7 = sticky cmp %l1,64 bl,pt %icc,3f ldx [%i1+8],%o2 ! o2 = frac sllx %o2,1,%o7 ! lop off high order bit srlx %o2,63,%l6 mov %o1,%o2 mov 0,%o1 sub %l1,64,%l1 3: tst %l1 beq,pn %icc,4f sub %l1,1,%l2 mov 1,%o3 sllx %o3,%l2,%o3 sub %o3,1,%o3 and %o3,%o2,%o3 or %o3,%l6,%o3 or %o7,%o3,%o7 srlx %o2,%l2,%o4 and %o4,1,%l6 srlx %o2,%l1,%o2 mov 64,%l3 sub %l3,%l1,%l3 sllx %o1,%l3,%o5 or %o2,%o5,%o2 srlx %o1,%l1,%o1 4: ldx [%i0+8],%o3 add %o2,%o3,%o2 ! add, propagating carry cmp %o2,%o3 bgeu,pt %xcc,5f add %o0,%o1,%o0 add %o0,1,%o0 5: srlx %o0,49,%o5 ! if sum carried out, postnormalize tst %o5 beq,pt %icc,2f nop or %o7,%l6,%o7 and %o2,1,%l6 srlx %o2,1,%o2 sllx %o0,63,%o3 or %o2,%o3,%o2 srlx %o0,1,%o0 ba,pt %icc,2f add %l0,1,%l0 1: ldx [%i0+8],%o2 ! (full prenormalization shift case) ldx [%i1+8],%o3 or %o1,%o3,%o7 2: add %o0,%g1,%o1 ! see if sum is tiny srlx %o1,63,%l2 ! l2 = uflo ld [%i3],%i4 ! get the rounding mode srl %i4,30,%l3 ! l3 = rm ld [%i2],%l4 ! l4 = z->l.msw tst %l4 beq,pn %icc,1f srl %l3,1,%l5 xor %l3,%l5,%l3 1: orcc %o7,%l6,%g0 ! see if we need to round beq,pn %xcc,1f andcc %l3,1,%g0 or %i4,1,%i4 bne,pn %icc,1f tst %l3 bne,pn %icc,2f tst %l6 beq,pn %icc,1f and %o2,1,%o3 orcc %o3,%o7,%g0 beq,pn %xcc,1f nop 2: addcc %o2,1,%o2 ! round up and check for carry out bne,pt %xcc,1f nop add %o0,1,%o0 srlx %o0,49,%o1 tst %o1 beq,pt %icc,1f nop srlx %o0,1,%o0 add %l0,1,%l0 1: cmp %l0,%l7 ! check for overflow bge,pn %icc,1f addcc %o0,%g1,%g0 bl,pn %xcc,2f sll %l0,16,%l1 or %l4,%l1,%l4 2: sllx %o0,16,%o1 srlx %o1,48,%o1 or %l4,%o1,%l4 st %l4,[%i2] st %o0,[%i2+4] stx %o2,[%i2+8] tst %l2 ! see if we need to raise underflow beq,pt %icc,3f srl %i4,23,%i5 andcc %i5,4,%i5 ba,pt %icc,3f or %i4,%i5,%i4 1: andcc %l3,1,%g0 bne,pn %icc,2f or %i4,9,%i4 ! overflow sll %l7,16,%l7 ! 7fff00000... or %l4,%l7,%l4 st %l4,[%i2] st %g0,[%i2+4] ba,pt %icc,3f stx %g0,[%i2+8] 2: mov -1,%o0 ! 7ffeffff... sll %l7,16,%l7 add %o0,%l7,%l7 or %l4,%l7,%l4 st %l4,[%i2] st %o0,[%i2+4] stx %o0,[%i2+8] 3: st %i4,[%i3] ret restore SET_SIZE(__quad_mag_add) ! /* ! * __quad_mag_sub(x, y, z, fsr) ! * ! * Sets *z = *x - *y, rounded according to the rounding mode in *fsr, ! * and updates the current exceptions in *fsr. This routine assumes ! * *x and *y are finite, with opposite signs (i.e., a subtraction of ! * magnitudes), |*x| >= |*y|, and *z already has its sign bit set. ! */ ! void ! __quad_mag_sub(const union longdouble *x, const union longdouble *y, ! union longdouble *z, unsigned int *fsr) ! { ! unsigned long lx, ly, frac, sticky; ! unsigned int ex, ey, gr, borrow, rm; ! int e; ! ! /* get the leading significand double-words and exponents */ ! ex = (x->ll.msll >> 48) & 0x7fff; ! lx = x->ll.msll & ~0xffff000000000000ul; ! if (ex == 0) ! ex = 1; ! else ! lx |= 0x0001000000000000ul; ! ! ey = (y->ll.msll >> 48) & 0x7fff; ! ly = y->ll.msll & ~0xffff000000000000ul; ! if (ey == 0) ! ey = 1; ! else ! ly |= 0x0001000000000000ul; ! ! /* prenormalize y */ ! e = (int) ex - (int) ey; ! gr = sticky = 0; ! if (e > 114) { ! sticky = ly | y->ll.frac; ! ly = frac = 0; ! } else { ! frac = y->ll.frac; ! if (e >= 64) { ! gr = frac >> 62; ! sticky = frac << 2; ! frac = ly; ! ly = 0; ! e -= 64; ! } ! if (e > 1) { ! sticky |= gr | (frac & ((1ul << (e - 2)) - 1)); ! gr = (frac >> (e - 2)) & 3; ! frac = (frac >> e) | (ly << (64 - e)); ! ly >>= e; ! } else if (e == 1) { ! sticky |= (gr & 1); ! gr = (gr >> 1) | ((frac & 1) << 1); ! frac = (frac >> 1) | (ly << 63); ! ly >>= 1; ! } ! } ! ! /* complement guard, round, and sticky as need be */ ! gr <<= 1; ! if (sticky) ! gr |= 1; ! gr = (-gr & 7); ! if (gr) ! if (++frac == 0) ! ly++; ! ! /* subtract, propagating borrows */ ! frac = x->ll.frac - frac; ! lx -= ly; ! if (frac > x->ll.frac) ! lx--; ! ! /* get the rounding mode */ ! rm = *fsr >> 30; ! ! /* handle zero result */ ! if (!(lx | frac | gr)) { ! z->l.msw = ((rm == FSR_RM)? 0x80000000 : 0); ! z->l.frac2 = z->l.frac3 = z->l.frac4 = 0; ! return; ! } ! ! /* postnormalize */ ! if (lx < 0x0001000000000000ul) { ! /* if cancellation occurred or the exponent is 1, ! the result is exact */ ! if (lx < 0x0000800000000000ul || ex == 1) { ! if ((lx | (frac & 0xfffe000000000000ul)) == 0 && ! ex > 64) { ! lx = frac; ! frac = (unsigned long) gr << 61; ! gr = 0; ! ex -= 64; ! } ! while (lx < 0x0001000000000000ul && ex > 1) { ! lx = (lx << 1) | (frac >> 63); ! frac = (frac << 1) | (gr >> 2); ! gr = 0; ! ex--; ! } ! if (lx >= 0x0001000000000000ul) ! z->l.msw |= (ex << 16); ! z->l.msw |= ((lx >> 32) & 0xffff); ! z->l.frac2 = (lx & 0xffffffff); ! z->ll.frac = frac; ! ! /* if the result is tiny and underflow trapping is ! enabled, simulate underflow */ ! if (lx < 0x0001000000000000ul && (*fsr & FSR_UFM)) ! *fsr |= FSR_UFC; ! return; ! } ! ! /* otherwise we only borrowed one place */ ! lx = (lx << 1) | (frac >> 63); ! frac = (frac << 1) | (gr >> 2); ! gr &= 3; ! ex--; ! } ! else ! gr = (gr >> 1) | (gr & 1); ! ! /* fudge directed rounding modes as though the result were positive */ ! if (z->l.msw) ! rm ^= (rm >> 1); ! ! /* see if we need to round */ ! if (gr) { ! *fsr |= FSR_NXC; ! ! /* round up if necessary */ ! if (rm == FSR_RP || (rm == FSR_RN && (gr & 2) && ! ((gr & 1) || (frac & 1)))) { ! if (++frac == 0) ! if (++lx >= 0x0002000000000000ul) { ! lx >>= 1; ! ex++; ! } ! } ! } ! ! /* store the result */ ! z->l.msw |= (ex << 16) | ((lx >> 32) & 0xffff); ! z->l.frac2 = (lx & 0xffffffff); ! z->ll.frac = frac; ! } ENTRY(__quad_mag_sub) save %sp,-SA(MINFRAME),%sp sethi %hi(0xffff0000),%g1 sllx %g1,32,%g1 ! g1 = 0xffff000000000000 sethi %hi(0x7fff),%l7 or %l7,%lo(0x7fff),%l7 ! l7 = 0x7fff ldx [%i0],%o0 srlx %o0,48,%l0 andcc %l0,%l7,%l0 ! l0 = ex beq,pn %icc,1f andn %o0,%g1,%o0 ! o0 = lx ba,pt %icc,2f sub %o0,%g1,%o0 1: mov 1,%l0 2: ldx [%i1],%o1 srlx %o1,48,%l1 andcc %l1,%l7,%l1 ! l1 = ey beq,pn %icc,1f andn %o1,%g1,%o1 ! o1 = ly ba,pt %icc,2f sub %o1,%g1,%o1 1: mov 1,%l1 2: sub %l0,%l1,%l1 ! l1 = e = ex - ey cmp %l1,114 ! see if we need to prenormalize y bg,pn %icc,1f mov 0,%l6 ! l6 = gr mov 0,%o7 ! o7 = sticky cmp %l1,64 bl,pt %icc,3f ldx [%i1+8],%o2 ! o2 = frac srlx %o2,62,%l6 sllx %o2,2,%o7 ! lop off top two bits mov %o1,%o2 mov 0,%o1 sub %l1,64,%l1 3: cmp %l1,1 ble,pn %icc,4f sub %l1,2,%l2 ! shift more than one bit mov 1,%o3 sllx %o3,%l2,%o3 sub %o3,1,%o3 and %o3,%o2,%o3 or %o3,%l6,%o3 or %o7,%o3,%o7 srlx %o2,%l2,%o4 and %o4,3,%l6 srlx %o2,%l1,%o2 mov 64,%l3 sub %l3,%l1,%l3 sllx %o1,%l3,%o5 or %o2,%o5,%o2 ba,pt %icc,2f srlx %o1,%l1,%o1 4: bne,pn %icc,2f and %l6,1,%o3 ! shift one bit or %o7,%o3,%o7 and %o2,1,%o4 sllx %o4,1,%o4 srl %l6,1,%l6 or %l6,%o4,%l6 srlx %o2,1,%o2 sllx %o1,63,%o5 or %o2,%o5,%o2 ba,pt %icc,2f srlx %o1,1,%o1 1: ldx [%i1+8],%o3 ! (full prenormalization shift case) or %o1,%o3,%o7 mov 0,%o1 mov 0,%o2 2: tst %o7 ! complement guard, round, and beq,pn %xcc,1f ! sticky as need be sll %l6,1,%l6 or %l6,1,%l6 1: subcc %g0,%l6,%l6 beq,pn %icc,1f and %l6,7,%l6 addcc %o2,1,%o2 beq,a,pn %xcc,1f add %o1,1,%o1 1: ldx [%i0+8],%o3 ! subtract, propagating borrows sub %o3,%o2,%o2 cmp %o3,%o2 bgeu,pt %xcc,5f sub %o0,%o1,%o0 sub %o0,1,%o0 5: ld [%i3],%i4 ! get the rounding mode srl %i4,30,%l3 ! l3 = rm or %o0,%o2,%o1 ! look for zero result orcc %o1,%l6,%g0 bne,pt %xcc,1f srl %l3,1,%l4 and %l3,%l4,%l4 sll %l4,31,%l4 st %l4,[%i2] st %g0,[%i2+4] stx %g0,[%i2+8] ret restore 1: addcc %o0,%g1,%g0 ! postnormalize bl,pt %xcc,1f ld [%i2],%l4 ! l4 = z->l.msw and %l6,1,%l5 ! (no cancellation or borrow case) srl %l6,1,%l6 ba,pt %icc,2f or %l6,%l5,%l6 1: srax %g1,1,%o7 addcc %o0,%o7,%g0 bl,pn %xcc,1f cmp %l0,1 beq,pt %icc,1f srlx %o2,63,%o3 ! borrowed one place sllx %o0,1,%o0 or %o0,%o3,%o0 srl %l6,2,%o4 sllx %o2,1,%o2 or %o2,%o4,%o2 and %l6,3,%l6 ba,pt %icc,2f sub %l0,1,%l0 1: srlx %o2,49,%o3 ! cancellation or tiny result orcc %o0,%o3,%g0 bne,pt %xcc,1f cmp %l0,64 ble,pn %icc,1f nop mov %o2,%o0 sllx %l6,61,%o2 mov 0,%l6 sub %l0,64,%l0 1: addcc %o0,%g1,%g0 ! normalization loop bge,pn %xcc,1f cmp %l0,1 ble,pn %icc,1f srl %l6,2,%l6 srlx %o2,63,%o3 sllx %o0,1,%o0 or %o0,%o3,%o0 sllx %o2,1,%o2 or %o2,%l6,%o2 ba,pt %icc,1b sub %l0,1,%l0 1: sllx %o0,16,%o1 srlx %o1,48,%l5 or %l4,%l5,%l4 addcc %o0,%g1,%g0 ! see if result is tiny bl,pn %xcc,1f sll %l0,16,%l5 or %l4,%l5,%l4 1: st %l4,[%i2] st %o0,[%i2+4] bge,pt %xcc,1f stx %o2,[%i2+8] srl %i4,23,%i5 andcc %i5,4,%g0 ! see if we need to raise underflow beq,pt %icc,1f or %i4,4,%i4 st %i4,[%i3] 1: ret restore 2: tst %l4 ! fudge directect rounding modes beq,pn %icc,1f srl %l3,1,%l5 xor %l3,%l5,%l3 1: tst %l6 ! see if we need to round beq,pn %icc,1f or %i4,1,%i4 st %i4,[%i3] andcc %l3,1,%g0 bne,pn %icc,1f tst %l3 bne,pn %icc,2f andcc %l6,2,%g0 beq,pn %icc,1f or %l6,%o2,%o3 andcc %o3,1,%o3 beq,pn %xcc,1f nop 2: addcc %o2,1,%o2 ! round up and check for carry bne,pt %xcc,1f nop add %o0,1,%o0 srlx %o0,49,%o1 tst %o1 beq,pt %icc,1f nop srlx %o0,1,%o0 add %l0,1,%l0 1: sllx %o0,16,%o1 srlx %o1,48,%o1 or %l4,%o1,%l4 sll %l0,16,%l5 or %l4,%l5,%l4 st %l4,[%i2] st %o0,[%i2+4] stx %o2,[%i2+8] ret restore SET_SIZE(__quad_mag_sub)