190075Sobrien/* This is an assembly language implementation of mulsi3, divsi3, and modsi3 290075Sobrien for the sparc processor. 350397Sobrien 4117395Skan These routines are derived from the SPARC Architecture Manual, version 8, 550397Sobrien slightly edited to match the desired calling convention, and also to 650397Sobrien optimize them for our purposes. */ 750397Sobrien 850397Sobrien#ifdef L_mulsi3 950397Sobrien.text 1050397Sobrien .align 4 1150397Sobrien .global .umul 1250397Sobrien .proc 4 1350397Sobrien.umul: 1450397Sobrien or %o0, %o1, %o4 ! logical or of multiplier and multiplicand 1550397Sobrien mov %o0, %y ! multiplier to Y register 1650397Sobrien andncc %o4, 0xfff, %o5 ! mask out lower 12 bits 1750397Sobrien be mul_shortway ! can do it the short way 1850397Sobrien andcc %g0, %g0, %o4 ! zero the partial product and clear NV cc 1950397Sobrien ! 2050397Sobrien ! long multiply 2150397Sobrien ! 2250397Sobrien mulscc %o4, %o1, %o4 ! first iteration of 33 2350397Sobrien mulscc %o4, %o1, %o4 2450397Sobrien mulscc %o4, %o1, %o4 2550397Sobrien mulscc %o4, %o1, %o4 2650397Sobrien mulscc %o4, %o1, %o4 2750397Sobrien mulscc %o4, %o1, %o4 2850397Sobrien mulscc %o4, %o1, %o4 2950397Sobrien mulscc %o4, %o1, %o4 3050397Sobrien mulscc %o4, %o1, %o4 3150397Sobrien mulscc %o4, %o1, %o4 3250397Sobrien mulscc %o4, %o1, %o4 3350397Sobrien mulscc %o4, %o1, %o4 3450397Sobrien mulscc %o4, %o1, %o4 3550397Sobrien mulscc %o4, %o1, %o4 3650397Sobrien mulscc %o4, %o1, %o4 3750397Sobrien mulscc %o4, %o1, %o4 3850397Sobrien mulscc %o4, %o1, %o4 3950397Sobrien mulscc %o4, %o1, %o4 4050397Sobrien mulscc %o4, %o1, %o4 4150397Sobrien mulscc %o4, %o1, %o4 4250397Sobrien mulscc %o4, %o1, %o4 4350397Sobrien mulscc %o4, %o1, %o4 4450397Sobrien mulscc %o4, %o1, %o4 4550397Sobrien mulscc %o4, %o1, %o4 4650397Sobrien mulscc %o4, %o1, %o4 4750397Sobrien mulscc %o4, %o1, %o4 4850397Sobrien mulscc %o4, %o1, %o4 4950397Sobrien mulscc %o4, %o1, %o4 5050397Sobrien mulscc %o4, %o1, %o4 5150397Sobrien mulscc %o4, %o1, %o4 5250397Sobrien mulscc %o4, %o1, %o4 5350397Sobrien mulscc %o4, %o1, %o4 ! 32nd iteration 5450397Sobrien mulscc %o4, %g0, %o4 ! last iteration only shifts 5550397Sobrien ! the upper 32 bits of product are wrong, but we do not care 5650397Sobrien retl 5750397Sobrien rd %y, %o0 5850397Sobrien ! 5950397Sobrien ! short multiply 6050397Sobrien ! 6150397Sobrienmul_shortway: 6250397Sobrien mulscc %o4, %o1, %o4 ! first iteration of 13 6350397Sobrien mulscc %o4, %o1, %o4 6450397Sobrien mulscc %o4, %o1, %o4 6550397Sobrien mulscc %o4, %o1, %o4 6650397Sobrien mulscc %o4, %o1, %o4 6750397Sobrien mulscc %o4, %o1, %o4 6850397Sobrien mulscc %o4, %o1, %o4 6950397Sobrien mulscc %o4, %o1, %o4 7050397Sobrien mulscc %o4, %o1, %o4 7150397Sobrien mulscc %o4, %o1, %o4 7250397Sobrien mulscc %o4, %o1, %o4 7350397Sobrien mulscc %o4, %o1, %o4 ! 12th iteration 7450397Sobrien mulscc %o4, %g0, %o4 ! last iteration only shifts 7550397Sobrien rd %y, %o5 7650397Sobrien sll %o4, 12, %o4 ! left shift partial product by 12 bits 7750397Sobrien srl %o5, 20, %o5 ! right shift partial product by 20 bits 7850397Sobrien retl 7950397Sobrien or %o5, %o4, %o0 ! merge for true product 8050397Sobrien#endif 8150397Sobrien 8250397Sobrien#ifdef L_divsi3 8350397Sobrien/* 84117395Skan * Division and remainder, from Appendix E of the SPARC Version 8 8550397Sobrien * Architecture Manual, with fixes from Gordon Irlam. 8650397Sobrien */ 8750397Sobrien 8850397Sobrien/* 8950397Sobrien * Input: dividend and divisor in %o0 and %o1 respectively. 9050397Sobrien * 9150397Sobrien * m4 parameters: 9250397Sobrien * .div name of function to generate 9350397Sobrien * div div=div => %o0 / %o1; div=rem => %o0 % %o1 9450397Sobrien * true true=true => signed; true=false => unsigned 9550397Sobrien * 9650397Sobrien * Algorithm parameters: 9750397Sobrien * N how many bits per iteration we try to get (4) 9850397Sobrien * WORDSIZE total number of bits (32) 9950397Sobrien * 10050397Sobrien * Derived constants: 10150397Sobrien * TOPBITS number of bits in the top decade of a number 10250397Sobrien * 10350397Sobrien * Important variables: 10450397Sobrien * Q the partial quotient under development (initially 0) 10550397Sobrien * R the remainder so far, initially the dividend 10650397Sobrien * ITER number of main division loop iterations required; 10750397Sobrien * equal to ceil(log2(quotient) / N). Note that this 10850397Sobrien * is the log base (2^N) of the quotient. 10950397Sobrien * V the current comparand, initially divisor*2^(ITER*N-1) 11050397Sobrien * 11150397Sobrien * Cost: 11250397Sobrien * Current estimate for non-large dividend is 11350397Sobrien * ceil(log2(quotient) / N) * (10 + 7N/2) + C 11450397Sobrien * A large dividend is one greater than 2^(31-TOPBITS) and takes a 11550397Sobrien * different path, as the upper bits of the quotient must be developed 11650397Sobrien * one bit at a time. 11750397Sobrien */ 11850397Sobrien .global .udiv 11950397Sobrien .align 4 12050397Sobrien .proc 4 12150397Sobrien .text 12250397Sobrien.udiv: 12350397Sobrien b ready_to_divide 12450397Sobrien mov 0, %g3 ! result is always positive 12550397Sobrien 12650397Sobrien .global .div 12750397Sobrien .align 4 12850397Sobrien .proc 4 12950397Sobrien .text 13050397Sobrien.div: 13150397Sobrien ! compute sign of result; if neither is negative, no problem 13250397Sobrien orcc %o1, %o0, %g0 ! either negative? 13350397Sobrien bge ready_to_divide ! no, go do the divide 13450397Sobrien xor %o1, %o0, %g3 ! compute sign in any case 13550397Sobrien tst %o1 13650397Sobrien bge 1f 13750397Sobrien tst %o0 13850397Sobrien ! %o1 is definitely negative; %o0 might also be negative 13950397Sobrien bge ready_to_divide ! if %o0 not negative... 14050397Sobrien sub %g0, %o1, %o1 ! in any case, make %o1 nonneg 14150397Sobrien1: ! %o0 is negative, %o1 is nonnegative 14250397Sobrien sub %g0, %o0, %o0 ! make %o0 nonnegative 14350397Sobrien 14450397Sobrien 14550397Sobrienready_to_divide: 14650397Sobrien 14750397Sobrien ! Ready to divide. Compute size of quotient; scale comparand. 14850397Sobrien orcc %o1, %g0, %o5 14950397Sobrien bne 1f 15050397Sobrien mov %o0, %o3 15150397Sobrien 15250397Sobrien ! Divide by zero trap. If it returns, return 0 (about as 15350397Sobrien ! wrong as possible, but that is what SunOS does...). 15450397Sobrien ta 0x2 ! ST_DIV0 15550397Sobrien retl 15650397Sobrien clr %o0 15750397Sobrien 15850397Sobrien1: 15950397Sobrien cmp %o3, %o5 ! if %o1 exceeds %o0, done 16050397Sobrien blu got_result ! (and algorithm fails otherwise) 16150397Sobrien clr %o2 16250397Sobrien sethi %hi(1 << (32 - 4 - 1)), %g1 16350397Sobrien cmp %o3, %g1 16450397Sobrien blu not_really_big 16550397Sobrien clr %o4 16650397Sobrien 16750397Sobrien ! Here the dividend is >= 2**(31-N) or so. We must be careful here, 16850397Sobrien ! as our usual N-at-a-shot divide step will cause overflow and havoc. 16950397Sobrien ! The number of bits in the result here is N*ITER+SC, where SC <= N. 17050397Sobrien ! Compute ITER in an unorthodox manner: know we need to shift V into 17150397Sobrien ! the top decade: so do not even bother to compare to R. 17250397Sobrien 1: 17350397Sobrien cmp %o5, %g1 17450397Sobrien bgeu 3f 17550397Sobrien mov 1, %g2 17650397Sobrien sll %o5, 4, %o5 17750397Sobrien b 1b 17850397Sobrien add %o4, 1, %o4 17950397Sobrien 18050397Sobrien ! Now compute %g2. 18150397Sobrien 2: addcc %o5, %o5, %o5 18250397Sobrien bcc not_too_big 18350397Sobrien add %g2, 1, %g2 18450397Sobrien 18550397Sobrien ! We get here if the %o1 overflowed while shifting. 18650397Sobrien ! This means that %o3 has the high-order bit set. 18750397Sobrien ! Restore %o5 and subtract from %o3. 18850397Sobrien sll %g1, 4, %g1 ! high order bit 18950397Sobrien srl %o5, 1, %o5 ! rest of %o5 19050397Sobrien add %o5, %g1, %o5 19150397Sobrien b do_single_div 19250397Sobrien sub %g2, 1, %g2 19350397Sobrien 19450397Sobrien not_too_big: 19550397Sobrien 3: cmp %o5, %o3 19650397Sobrien blu 2b 19750397Sobrien nop 19850397Sobrien be do_single_div 19950397Sobrien nop 200117395Skan /* NB: these are commented out in the V8-SPARC manual as well */ 20150397Sobrien /* (I do not understand this) */ 20250397Sobrien ! %o5 > %o3: went too far: back up 1 step 20350397Sobrien ! srl %o5, 1, %o5 20450397Sobrien ! dec %g2 20550397Sobrien ! do single-bit divide steps 20650397Sobrien ! 20750397Sobrien ! We have to be careful here. We know that %o3 >= %o5, so we can do the 20850397Sobrien ! first divide step without thinking. BUT, the others are conditional, 20950397Sobrien ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- 21050397Sobrien ! order bit set in the first step, just falling into the regular 21150397Sobrien ! division loop will mess up the first time around. 21250397Sobrien ! So we unroll slightly... 21350397Sobrien do_single_div: 21450397Sobrien subcc %g2, 1, %g2 21550397Sobrien bl end_regular_divide 21650397Sobrien nop 21750397Sobrien sub %o3, %o5, %o3 21850397Sobrien mov 1, %o2 21950397Sobrien b end_single_divloop 22050397Sobrien nop 22150397Sobrien single_divloop: 22250397Sobrien sll %o2, 1, %o2 22350397Sobrien bl 1f 22450397Sobrien srl %o5, 1, %o5 22550397Sobrien ! %o3 >= 0 22650397Sobrien sub %o3, %o5, %o3 22750397Sobrien b 2f 22850397Sobrien add %o2, 1, %o2 22950397Sobrien 1: ! %o3 < 0 23050397Sobrien add %o3, %o5, %o3 23150397Sobrien sub %o2, 1, %o2 23250397Sobrien 2: 23350397Sobrien end_single_divloop: 23450397Sobrien subcc %g2, 1, %g2 23550397Sobrien bge single_divloop 23650397Sobrien tst %o3 23750397Sobrien b,a end_regular_divide 23850397Sobrien 23950397Sobriennot_really_big: 24050397Sobrien1: 24150397Sobrien sll %o5, 4, %o5 24250397Sobrien cmp %o5, %o3 24350397Sobrien bleu 1b 24450397Sobrien addcc %o4, 1, %o4 24550397Sobrien be got_result 24650397Sobrien sub %o4, 1, %o4 24750397Sobrien 24850397Sobrien tst %o3 ! set up for initial iteration 24950397Sobriendivloop: 25050397Sobrien sll %o2, 4, %o2 25150397Sobrien ! depth 1, accumulated bits 0 25250397Sobrien bl L1.16 25350397Sobrien srl %o5,1,%o5 25450397Sobrien ! remainder is positive 25550397Sobrien subcc %o3,%o5,%o3 25650397Sobrien ! depth 2, accumulated bits 1 25750397Sobrien bl L2.17 25850397Sobrien srl %o5,1,%o5 25950397Sobrien ! remainder is positive 26050397Sobrien subcc %o3,%o5,%o3 26150397Sobrien ! depth 3, accumulated bits 3 26250397Sobrien bl L3.19 26350397Sobrien srl %o5,1,%o5 26450397Sobrien ! remainder is positive 26550397Sobrien subcc %o3,%o5,%o3 26650397Sobrien ! depth 4, accumulated bits 7 26750397Sobrien bl L4.23 26850397Sobrien srl %o5,1,%o5 26950397Sobrien ! remainder is positive 27050397Sobrien subcc %o3,%o5,%o3 27150397Sobrien b 9f 27250397Sobrien add %o2, (7*2+1), %o2 27350397Sobrien 27450397SobrienL4.23: 27550397Sobrien ! remainder is negative 27650397Sobrien addcc %o3,%o5,%o3 27750397Sobrien b 9f 27850397Sobrien add %o2, (7*2-1), %o2 27950397Sobrien 28050397Sobrien 28150397SobrienL3.19: 28250397Sobrien ! remainder is negative 28350397Sobrien addcc %o3,%o5,%o3 28450397Sobrien ! depth 4, accumulated bits 5 28550397Sobrien bl L4.21 28650397Sobrien srl %o5,1,%o5 28750397Sobrien ! remainder is positive 28850397Sobrien subcc %o3,%o5,%o3 28950397Sobrien b 9f 29050397Sobrien add %o2, (5*2+1), %o2 29150397Sobrien 29250397SobrienL4.21: 29350397Sobrien ! remainder is negative 29450397Sobrien addcc %o3,%o5,%o3 29550397Sobrien b 9f 29650397Sobrien add %o2, (5*2-1), %o2 29750397Sobrien 29850397SobrienL2.17: 29950397Sobrien ! remainder is negative 30050397Sobrien addcc %o3,%o5,%o3 30150397Sobrien ! depth 3, accumulated bits 1 30250397Sobrien bl L3.17 30350397Sobrien srl %o5,1,%o5 30450397Sobrien ! remainder is positive 30550397Sobrien subcc %o3,%o5,%o3 30650397Sobrien ! depth 4, accumulated bits 3 30750397Sobrien bl L4.19 30850397Sobrien srl %o5,1,%o5 30950397Sobrien ! remainder is positive 31050397Sobrien subcc %o3,%o5,%o3 31150397Sobrien b 9f 31250397Sobrien add %o2, (3*2+1), %o2 31350397Sobrien 31450397SobrienL4.19: 31550397Sobrien ! remainder is negative 31650397Sobrien addcc %o3,%o5,%o3 31750397Sobrien b 9f 31850397Sobrien add %o2, (3*2-1), %o2 31950397Sobrien 32050397SobrienL3.17: 32150397Sobrien ! remainder is negative 32250397Sobrien addcc %o3,%o5,%o3 32350397Sobrien ! depth 4, accumulated bits 1 32450397Sobrien bl L4.17 32550397Sobrien srl %o5,1,%o5 32650397Sobrien ! remainder is positive 32750397Sobrien subcc %o3,%o5,%o3 32850397Sobrien b 9f 32950397Sobrien add %o2, (1*2+1), %o2 33050397Sobrien 33150397SobrienL4.17: 33250397Sobrien ! remainder is negative 33350397Sobrien addcc %o3,%o5,%o3 33450397Sobrien b 9f 33550397Sobrien add %o2, (1*2-1), %o2 33650397Sobrien 33750397SobrienL1.16: 33850397Sobrien ! remainder is negative 33950397Sobrien addcc %o3,%o5,%o3 34050397Sobrien ! depth 2, accumulated bits -1 34150397Sobrien bl L2.15 34250397Sobrien srl %o5,1,%o5 34350397Sobrien ! remainder is positive 34450397Sobrien subcc %o3,%o5,%o3 34550397Sobrien ! depth 3, accumulated bits -1 34650397Sobrien bl L3.15 34750397Sobrien srl %o5,1,%o5 34850397Sobrien ! remainder is positive 34950397Sobrien subcc %o3,%o5,%o3 35050397Sobrien ! depth 4, accumulated bits -1 35150397Sobrien bl L4.15 35250397Sobrien srl %o5,1,%o5 35350397Sobrien ! remainder is positive 35450397Sobrien subcc %o3,%o5,%o3 35550397Sobrien b 9f 35650397Sobrien add %o2, (-1*2+1), %o2 35750397Sobrien 35850397SobrienL4.15: 35950397Sobrien ! remainder is negative 36050397Sobrien addcc %o3,%o5,%o3 36150397Sobrien b 9f 36250397Sobrien add %o2, (-1*2-1), %o2 36350397Sobrien 36450397SobrienL3.15: 36550397Sobrien ! remainder is negative 36650397Sobrien addcc %o3,%o5,%o3 36750397Sobrien ! depth 4, accumulated bits -3 36850397Sobrien bl L4.13 36950397Sobrien srl %o5,1,%o5 37050397Sobrien ! remainder is positive 37150397Sobrien subcc %o3,%o5,%o3 37250397Sobrien b 9f 37350397Sobrien add %o2, (-3*2+1), %o2 37450397Sobrien 37550397SobrienL4.13: 37650397Sobrien ! remainder is negative 37750397Sobrien addcc %o3,%o5,%o3 37850397Sobrien b 9f 37950397Sobrien add %o2, (-3*2-1), %o2 38050397Sobrien 38150397SobrienL2.15: 38250397Sobrien ! remainder is negative 38350397Sobrien addcc %o3,%o5,%o3 38450397Sobrien ! depth 3, accumulated bits -3 38550397Sobrien bl L3.13 38650397Sobrien srl %o5,1,%o5 38750397Sobrien ! remainder is positive 38850397Sobrien subcc %o3,%o5,%o3 38950397Sobrien ! depth 4, accumulated bits -5 39050397Sobrien bl L4.11 39150397Sobrien srl %o5,1,%o5 39250397Sobrien ! remainder is positive 39350397Sobrien subcc %o3,%o5,%o3 39450397Sobrien b 9f 39550397Sobrien add %o2, (-5*2+1), %o2 39650397Sobrien 39750397SobrienL4.11: 39850397Sobrien ! remainder is negative 39950397Sobrien addcc %o3,%o5,%o3 40050397Sobrien b 9f 40150397Sobrien add %o2, (-5*2-1), %o2 40250397Sobrien 40350397SobrienL3.13: 40450397Sobrien ! remainder is negative 40550397Sobrien addcc %o3,%o5,%o3 40650397Sobrien ! depth 4, accumulated bits -7 40750397Sobrien bl L4.9 40850397Sobrien srl %o5,1,%o5 40950397Sobrien ! remainder is positive 41050397Sobrien subcc %o3,%o5,%o3 41150397Sobrien b 9f 41250397Sobrien add %o2, (-7*2+1), %o2 41350397Sobrien 41450397SobrienL4.9: 41550397Sobrien ! remainder is negative 41650397Sobrien addcc %o3,%o5,%o3 41750397Sobrien b 9f 41850397Sobrien add %o2, (-7*2-1), %o2 41950397Sobrien 42050397Sobrien 9: 42150397Sobrienend_regular_divide: 42250397Sobrien subcc %o4, 1, %o4 42350397Sobrien bge divloop 42450397Sobrien tst %o3 42550397Sobrien bl,a got_result 42650397Sobrien ! non-restoring fixup here (one instruction only!) 42750397Sobrien sub %o2, 1, %o2 42850397Sobrien 42950397Sobrien 43050397Sobriengot_result: 43150397Sobrien ! check to see if answer should be < 0 43250397Sobrien tst %g3 43350397Sobrien bl,a 1f 43450397Sobrien sub %g0, %o2, %o2 43550397Sobrien1: 43650397Sobrien retl 43750397Sobrien mov %o2, %o0 43850397Sobrien#endif 43950397Sobrien 44050397Sobrien#ifdef L_modsi3 44150397Sobrien/* This implementation was taken from glibc: 44250397Sobrien * 44350397Sobrien * Input: dividend and divisor in %o0 and %o1 respectively. 44450397Sobrien * 44550397Sobrien * Algorithm parameters: 44650397Sobrien * N how many bits per iteration we try to get (4) 44750397Sobrien * WORDSIZE total number of bits (32) 44850397Sobrien * 44950397Sobrien * Derived constants: 45050397Sobrien * TOPBITS number of bits in the top decade of a number 45150397Sobrien * 45250397Sobrien * Important variables: 45350397Sobrien * Q the partial quotient under development (initially 0) 45450397Sobrien * R the remainder so far, initially the dividend 45550397Sobrien * ITER number of main division loop iterations required; 45650397Sobrien * equal to ceil(log2(quotient) / N). Note that this 45750397Sobrien * is the log base (2^N) of the quotient. 45850397Sobrien * V the current comparand, initially divisor*2^(ITER*N-1) 45950397Sobrien * 46050397Sobrien * Cost: 46150397Sobrien * Current estimate for non-large dividend is 46250397Sobrien * ceil(log2(quotient) / N) * (10 + 7N/2) + C 46350397Sobrien * A large dividend is one greater than 2^(31-TOPBITS) and takes a 46450397Sobrien * different path, as the upper bits of the quotient must be developed 46550397Sobrien * one bit at a time. 46650397Sobrien */ 46750397Sobrien.text 46850397Sobrien .align 4 46950397Sobrien .global .urem 47050397Sobrien .proc 4 47150397Sobrien.urem: 47250397Sobrien b divide 47350397Sobrien mov 0, %g3 ! result always positive 47450397Sobrien 47550397Sobrien .align 4 47650397Sobrien .global .rem 47750397Sobrien .proc 4 47850397Sobrien.rem: 47950397Sobrien ! compute sign of result; if neither is negative, no problem 48050397Sobrien orcc %o1, %o0, %g0 ! either negative? 48150397Sobrien bge 2f ! no, go do the divide 48250397Sobrien mov %o0, %g3 ! sign of remainder matches %o0 48350397Sobrien tst %o1 48450397Sobrien bge 1f 48550397Sobrien tst %o0 48650397Sobrien ! %o1 is definitely negative; %o0 might also be negative 48750397Sobrien bge 2f ! if %o0 not negative... 48850397Sobrien sub %g0, %o1, %o1 ! in any case, make %o1 nonneg 48950397Sobrien1: ! %o0 is negative, %o1 is nonnegative 49050397Sobrien sub %g0, %o0, %o0 ! make %o0 nonnegative 49150397Sobrien2: 49250397Sobrien 49350397Sobrien ! Ready to divide. Compute size of quotient; scale comparand. 49450397Sobriendivide: 49550397Sobrien orcc %o1, %g0, %o5 49650397Sobrien bne 1f 49750397Sobrien mov %o0, %o3 49850397Sobrien 49950397Sobrien ! Divide by zero trap. If it returns, return 0 (about as 50050397Sobrien ! wrong as possible, but that is what SunOS does...). 50150397Sobrien ta 0x2 !ST_DIV0 50250397Sobrien retl 50350397Sobrien clr %o0 50450397Sobrien 50550397Sobrien1: 50650397Sobrien cmp %o3, %o5 ! if %o1 exceeds %o0, done 50750397Sobrien blu got_result ! (and algorithm fails otherwise) 50850397Sobrien clr %o2 50950397Sobrien sethi %hi(1 << (32 - 4 - 1)), %g1 51050397Sobrien cmp %o3, %g1 51150397Sobrien blu not_really_big 51250397Sobrien clr %o4 51350397Sobrien 51450397Sobrien ! Here the dividend is >= 2**(31-N) or so. We must be careful here, 51550397Sobrien ! as our usual N-at-a-shot divide step will cause overflow and havoc. 51650397Sobrien ! The number of bits in the result here is N*ITER+SC, where SC <= N. 51750397Sobrien ! Compute ITER in an unorthodox manner: know we need to shift V into 51850397Sobrien ! the top decade: so do not even bother to compare to R. 51950397Sobrien 1: 52050397Sobrien cmp %o5, %g1 52150397Sobrien bgeu 3f 52250397Sobrien mov 1, %g2 52350397Sobrien sll %o5, 4, %o5 52450397Sobrien b 1b 52550397Sobrien add %o4, 1, %o4 52650397Sobrien 52750397Sobrien ! Now compute %g2. 52850397Sobrien 2: addcc %o5, %o5, %o5 52950397Sobrien bcc not_too_big 53050397Sobrien add %g2, 1, %g2 53150397Sobrien 53250397Sobrien ! We get here if the %o1 overflowed while shifting. 53350397Sobrien ! This means that %o3 has the high-order bit set. 53450397Sobrien ! Restore %o5 and subtract from %o3. 53550397Sobrien sll %g1, 4, %g1 ! high order bit 53650397Sobrien srl %o5, 1, %o5 ! rest of %o5 53750397Sobrien add %o5, %g1, %o5 53850397Sobrien b do_single_div 53950397Sobrien sub %g2, 1, %g2 54050397Sobrien 54150397Sobrien not_too_big: 54250397Sobrien 3: cmp %o5, %o3 54350397Sobrien blu 2b 54450397Sobrien nop 54550397Sobrien be do_single_div 54650397Sobrien nop 547117395Skan /* NB: these are commented out in the V8-SPARC manual as well */ 54850397Sobrien /* (I do not understand this) */ 54950397Sobrien ! %o5 > %o3: went too far: back up 1 step 55050397Sobrien ! srl %o5, 1, %o5 55150397Sobrien ! dec %g2 55250397Sobrien ! do single-bit divide steps 55350397Sobrien ! 55450397Sobrien ! We have to be careful here. We know that %o3 >= %o5, so we can do the 55550397Sobrien ! first divide step without thinking. BUT, the others are conditional, 55650397Sobrien ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- 55750397Sobrien ! order bit set in the first step, just falling into the regular 55850397Sobrien ! division loop will mess up the first time around. 55950397Sobrien ! So we unroll slightly... 56050397Sobrien do_single_div: 56150397Sobrien subcc %g2, 1, %g2 56250397Sobrien bl end_regular_divide 56350397Sobrien nop 56450397Sobrien sub %o3, %o5, %o3 56550397Sobrien mov 1, %o2 56650397Sobrien b end_single_divloop 56750397Sobrien nop 56850397Sobrien single_divloop: 56950397Sobrien sll %o2, 1, %o2 57050397Sobrien bl 1f 57150397Sobrien srl %o5, 1, %o5 57250397Sobrien ! %o3 >= 0 57350397Sobrien sub %o3, %o5, %o3 57450397Sobrien b 2f 57550397Sobrien add %o2, 1, %o2 57650397Sobrien 1: ! %o3 < 0 57750397Sobrien add %o3, %o5, %o3 57850397Sobrien sub %o2, 1, %o2 57950397Sobrien 2: 58050397Sobrien end_single_divloop: 58150397Sobrien subcc %g2, 1, %g2 58250397Sobrien bge single_divloop 58350397Sobrien tst %o3 58450397Sobrien b,a end_regular_divide 58550397Sobrien 58650397Sobriennot_really_big: 58750397Sobrien1: 58850397Sobrien sll %o5, 4, %o5 58950397Sobrien cmp %o5, %o3 59050397Sobrien bleu 1b 59150397Sobrien addcc %o4, 1, %o4 59250397Sobrien be got_result 59350397Sobrien sub %o4, 1, %o4 59450397Sobrien 59550397Sobrien tst %o3 ! set up for initial iteration 59650397Sobriendivloop: 59750397Sobrien sll %o2, 4, %o2 59850397Sobrien ! depth 1, accumulated bits 0 59950397Sobrien bl L1.16 60050397Sobrien srl %o5,1,%o5 60150397Sobrien ! remainder is positive 60250397Sobrien subcc %o3,%o5,%o3 60350397Sobrien ! depth 2, accumulated bits 1 60450397Sobrien bl L2.17 60550397Sobrien srl %o5,1,%o5 60650397Sobrien ! remainder is positive 60750397Sobrien subcc %o3,%o5,%o3 60850397Sobrien ! depth 3, accumulated bits 3 60950397Sobrien bl L3.19 61050397Sobrien srl %o5,1,%o5 61150397Sobrien ! remainder is positive 61250397Sobrien subcc %o3,%o5,%o3 61350397Sobrien ! depth 4, accumulated bits 7 61450397Sobrien bl L4.23 61550397Sobrien srl %o5,1,%o5 61650397Sobrien ! remainder is positive 61750397Sobrien subcc %o3,%o5,%o3 61850397Sobrien b 9f 61950397Sobrien add %o2, (7*2+1), %o2 62050397SobrienL4.23: 62150397Sobrien ! remainder is negative 62250397Sobrien addcc %o3,%o5,%o3 62350397Sobrien b 9f 62450397Sobrien add %o2, (7*2-1), %o2 62550397Sobrien 62650397SobrienL3.19: 62750397Sobrien ! remainder is negative 62850397Sobrien addcc %o3,%o5,%o3 62950397Sobrien ! depth 4, accumulated bits 5 63050397Sobrien bl L4.21 63150397Sobrien srl %o5,1,%o5 63250397Sobrien ! remainder is positive 63350397Sobrien subcc %o3,%o5,%o3 63450397Sobrien b 9f 63550397Sobrien add %o2, (5*2+1), %o2 63650397Sobrien 63750397SobrienL4.21: 63850397Sobrien ! remainder is negative 63950397Sobrien addcc %o3,%o5,%o3 64050397Sobrien b 9f 64150397Sobrien add %o2, (5*2-1), %o2 64250397Sobrien 64350397SobrienL2.17: 64450397Sobrien ! remainder is negative 64550397Sobrien addcc %o3,%o5,%o3 64650397Sobrien ! depth 3, accumulated bits 1 64750397Sobrien bl L3.17 64850397Sobrien srl %o5,1,%o5 64950397Sobrien ! remainder is positive 65050397Sobrien subcc %o3,%o5,%o3 65150397Sobrien ! depth 4, accumulated bits 3 65250397Sobrien bl L4.19 65350397Sobrien srl %o5,1,%o5 65450397Sobrien ! remainder is positive 65550397Sobrien subcc %o3,%o5,%o3 65650397Sobrien b 9f 65750397Sobrien add %o2, (3*2+1), %o2 65850397Sobrien 65950397SobrienL4.19: 66050397Sobrien ! remainder is negative 66150397Sobrien addcc %o3,%o5,%o3 66250397Sobrien b 9f 66350397Sobrien add %o2, (3*2-1), %o2 66450397Sobrien 66550397SobrienL3.17: 66650397Sobrien ! remainder is negative 66750397Sobrien addcc %o3,%o5,%o3 66850397Sobrien ! depth 4, accumulated bits 1 66950397Sobrien bl L4.17 67050397Sobrien srl %o5,1,%o5 67150397Sobrien ! remainder is positive 67250397Sobrien subcc %o3,%o5,%o3 67350397Sobrien b 9f 67450397Sobrien add %o2, (1*2+1), %o2 67550397Sobrien 67650397SobrienL4.17: 67750397Sobrien ! remainder is negative 67850397Sobrien addcc %o3,%o5,%o3 67950397Sobrien b 9f 68050397Sobrien add %o2, (1*2-1), %o2 68150397Sobrien 68250397SobrienL1.16: 68350397Sobrien ! remainder is negative 68450397Sobrien addcc %o3,%o5,%o3 68550397Sobrien ! depth 2, accumulated bits -1 68650397Sobrien bl L2.15 68750397Sobrien srl %o5,1,%o5 68850397Sobrien ! remainder is positive 68950397Sobrien subcc %o3,%o5,%o3 69050397Sobrien ! depth 3, accumulated bits -1 69150397Sobrien bl L3.15 69250397Sobrien srl %o5,1,%o5 69350397Sobrien ! remainder is positive 69450397Sobrien subcc %o3,%o5,%o3 69550397Sobrien ! depth 4, accumulated bits -1 69650397Sobrien bl L4.15 69750397Sobrien srl %o5,1,%o5 69850397Sobrien ! remainder is positive 69950397Sobrien subcc %o3,%o5,%o3 70050397Sobrien b 9f 70150397Sobrien add %o2, (-1*2+1), %o2 70250397Sobrien 70350397SobrienL4.15: 70450397Sobrien ! remainder is negative 70550397Sobrien addcc %o3,%o5,%o3 70650397Sobrien b 9f 70750397Sobrien add %o2, (-1*2-1), %o2 70850397Sobrien 70950397SobrienL3.15: 71050397Sobrien ! remainder is negative 71150397Sobrien addcc %o3,%o5,%o3 71250397Sobrien ! depth 4, accumulated bits -3 71350397Sobrien bl L4.13 71450397Sobrien srl %o5,1,%o5 71550397Sobrien ! remainder is positive 71650397Sobrien subcc %o3,%o5,%o3 71750397Sobrien b 9f 71850397Sobrien add %o2, (-3*2+1), %o2 71950397Sobrien 72050397SobrienL4.13: 72150397Sobrien ! remainder is negative 72250397Sobrien addcc %o3,%o5,%o3 72350397Sobrien b 9f 72450397Sobrien add %o2, (-3*2-1), %o2 72550397Sobrien 72650397SobrienL2.15: 72750397Sobrien ! remainder is negative 72850397Sobrien addcc %o3,%o5,%o3 72950397Sobrien ! depth 3, accumulated bits -3 73050397Sobrien bl L3.13 73150397Sobrien srl %o5,1,%o5 73250397Sobrien ! remainder is positive 73350397Sobrien subcc %o3,%o5,%o3 73450397Sobrien ! depth 4, accumulated bits -5 73550397Sobrien bl L4.11 73650397Sobrien srl %o5,1,%o5 73750397Sobrien ! remainder is positive 73850397Sobrien subcc %o3,%o5,%o3 73950397Sobrien b 9f 74050397Sobrien add %o2, (-5*2+1), %o2 74150397Sobrien 74250397SobrienL4.11: 74350397Sobrien ! remainder is negative 74450397Sobrien addcc %o3,%o5,%o3 74550397Sobrien b 9f 74650397Sobrien add %o2, (-5*2-1), %o2 74750397Sobrien 74850397SobrienL3.13: 74950397Sobrien ! remainder is negative 75050397Sobrien addcc %o3,%o5,%o3 75150397Sobrien ! depth 4, accumulated bits -7 75250397Sobrien bl L4.9 75350397Sobrien srl %o5,1,%o5 75450397Sobrien ! remainder is positive 75550397Sobrien subcc %o3,%o5,%o3 75650397Sobrien b 9f 75750397Sobrien add %o2, (-7*2+1), %o2 75850397Sobrien 75950397SobrienL4.9: 76050397Sobrien ! remainder is negative 76150397Sobrien addcc %o3,%o5,%o3 76250397Sobrien b 9f 76350397Sobrien add %o2, (-7*2-1), %o2 76450397Sobrien 76550397Sobrien 9: 76650397Sobrienend_regular_divide: 76750397Sobrien subcc %o4, 1, %o4 76850397Sobrien bge divloop 76950397Sobrien tst %o3 77050397Sobrien bl,a got_result 77150397Sobrien ! non-restoring fixup here (one instruction only!) 77250397Sobrien add %o3, %o1, %o3 77350397Sobrien 77450397Sobriengot_result: 77550397Sobrien ! check to see if answer should be < 0 77650397Sobrien tst %g3 77750397Sobrien bl,a 1f 77850397Sobrien sub %g0, %o3, %o3 77950397Sobrien1: 78050397Sobrien retl 78150397Sobrien mov %o3, %o0 78250397Sobrien 78350397Sobrien#endif 78450397Sobrien 785