1/* This is an assembly language implementation of mulsi3, divsi3, and modsi3 2 for the sparc processor. 3 4 These routines are derived from the SPARC Architecture Manual, version 8, 5 slightly edited to match the desired calling convention, and also to 6 optimize them for our purposes. */ 7 8#ifdef L_mulsi3 9.text 10 .align 4 11 .global .umul 12 .proc 4 13.umul: 14 or %o0, %o1, %o4 ! logical or of multiplier and multiplicand 15 mov %o0, %y ! multiplier to Y register 16 andncc %o4, 0xfff, %o5 ! mask out lower 12 bits 17 be mul_shortway ! can do it the short way 18 andcc %g0, %g0, %o4 ! zero the partial product and clear NV cc 19 ! 20 ! long multiply 21 ! 22 mulscc %o4, %o1, %o4 ! first iteration of 33 23 mulscc %o4, %o1, %o4 24 mulscc %o4, %o1, %o4 25 mulscc %o4, %o1, %o4 26 mulscc %o4, %o1, %o4 27 mulscc %o4, %o1, %o4 28 mulscc %o4, %o1, %o4 29 mulscc %o4, %o1, %o4 30 mulscc %o4, %o1, %o4 31 mulscc %o4, %o1, %o4 32 mulscc %o4, %o1, %o4 33 mulscc %o4, %o1, %o4 34 mulscc %o4, %o1, %o4 35 mulscc %o4, %o1, %o4 36 mulscc %o4, %o1, %o4 37 mulscc %o4, %o1, %o4 38 mulscc %o4, %o1, %o4 39 mulscc %o4, %o1, %o4 40 mulscc %o4, %o1, %o4 41 mulscc %o4, %o1, %o4 42 mulscc %o4, %o1, %o4 43 mulscc %o4, %o1, %o4 44 mulscc %o4, %o1, %o4 45 mulscc %o4, %o1, %o4 46 mulscc %o4, %o1, %o4 47 mulscc %o4, %o1, %o4 48 mulscc %o4, %o1, %o4 49 mulscc %o4, %o1, %o4 50 mulscc %o4, %o1, %o4 51 mulscc %o4, %o1, %o4 52 mulscc %o4, %o1, %o4 53 mulscc %o4, %o1, %o4 ! 32nd iteration 54 mulscc %o4, %g0, %o4 ! last iteration only shifts 55 ! the upper 32 bits of product are wrong, but we do not care 56 retl 57 rd %y, %o0 58 ! 59 ! short multiply 60 ! 61mul_shortway: 62 mulscc %o4, %o1, %o4 ! first iteration of 13 63 mulscc %o4, %o1, %o4 64 mulscc %o4, %o1, %o4 65 mulscc %o4, %o1, %o4 66 mulscc %o4, %o1, %o4 67 mulscc %o4, %o1, %o4 68 mulscc %o4, %o1, %o4 69 mulscc %o4, %o1, %o4 70 mulscc %o4, %o1, %o4 71 mulscc %o4, %o1, %o4 72 mulscc %o4, %o1, %o4 73 mulscc %o4, %o1, %o4 ! 12th iteration 74 mulscc %o4, %g0, %o4 ! last iteration only shifts 75 rd %y, %o5 76 sll %o4, 12, %o4 ! left shift partial product by 12 bits 77 srl %o5, 20, %o5 ! right shift partial product by 20 bits 78 retl 79 or %o5, %o4, %o0 ! merge for true product 80#endif 81 82#ifdef L_divsi3 83/* 84 * Division and remainder, from Appendix E of the SPARC Version 8 85 * Architecture Manual, with fixes from Gordon Irlam. 86 */ 87 88/* 89 * Input: dividend and divisor in %o0 and %o1 respectively. 90 * 91 * m4 parameters: 92 * .div name of function to generate 93 * div div=div => %o0 / %o1; div=rem => %o0 % %o1 94 * true true=true => signed; true=false => unsigned 95 * 96 * Algorithm parameters: 97 * N how many bits per iteration we try to get (4) 98 * WORDSIZE total number of bits (32) 99 * 100 * Derived constants: 101 * TOPBITS number of bits in the top decade of a number 102 * 103 * Important variables: 104 * Q the partial quotient under development (initially 0) 105 * R the remainder so far, initially the dividend 106 * ITER number of main division loop iterations required; 107 * equal to ceil(log2(quotient) / N). Note that this 108 * is the log base (2^N) of the quotient. 109 * V the current comparand, initially divisor*2^(ITER*N-1) 110 * 111 * Cost: 112 * Current estimate for non-large dividend is 113 * ceil(log2(quotient) / N) * (10 + 7N/2) + C 114 * A large dividend is one greater than 2^(31-TOPBITS) and takes a 115 * different path, as the upper bits of the quotient must be developed 116 * one bit at a time. 117 */ 118 .global .udiv 119 .align 4 120 .proc 4 121 .text 122.udiv: 123 b ready_to_divide 124 mov 0, %g3 ! result is always positive 125 126 .global .div 127 .align 4 128 .proc 4 129 .text 130.div: 131 ! compute sign of result; if neither is negative, no problem 132 orcc %o1, %o0, %g0 ! either negative? 133 bge ready_to_divide ! no, go do the divide 134 xor %o1, %o0, %g3 ! compute sign in any case 135 tst %o1 136 bge 1f 137 tst %o0 138 ! %o1 is definitely negative; %o0 might also be negative 139 bge ready_to_divide ! if %o0 not negative... 140 sub %g0, %o1, %o1 ! in any case, make %o1 nonneg 1411: ! %o0 is negative, %o1 is nonnegative 142 sub %g0, %o0, %o0 ! make %o0 nonnegative 143 144 145ready_to_divide: 146 147 ! Ready to divide. Compute size of quotient; scale comparand. 148 orcc %o1, %g0, %o5 149 bne 1f 150 mov %o0, %o3 151 152 ! Divide by zero trap. If it returns, return 0 (about as 153 ! wrong as possible, but that is what SunOS does...). 154 ta 0x2 ! ST_DIV0 155 retl 156 clr %o0 157 1581: 159 cmp %o3, %o5 ! if %o1 exceeds %o0, done 160 blu got_result ! (and algorithm fails otherwise) 161 clr %o2 162 sethi %hi(1 << (32 - 4 - 1)), %g1 163 cmp %o3, %g1 164 blu not_really_big 165 clr %o4 166 167 ! Here the dividend is >= 2**(31-N) or so. We must be careful here, 168 ! as our usual N-at-a-shot divide step will cause overflow and havoc. 169 ! The number of bits in the result here is N*ITER+SC, where SC <= N. 170 ! Compute ITER in an unorthodox manner: know we need to shift V into 171 ! the top decade: so do not even bother to compare to R. 172 1: 173 cmp %o5, %g1 174 bgeu 3f 175 mov 1, %g2 176 sll %o5, 4, %o5 177 b 1b 178 add %o4, 1, %o4 179 180 ! Now compute %g2. 181 2: addcc %o5, %o5, %o5 182 bcc not_too_big 183 add %g2, 1, %g2 184 185 ! We get here if the %o1 overflowed while shifting. 186 ! This means that %o3 has the high-order bit set. 187 ! Restore %o5 and subtract from %o3. 188 sll %g1, 4, %g1 ! high order bit 189 srl %o5, 1, %o5 ! rest of %o5 190 add %o5, %g1, %o5 191 b do_single_div 192 sub %g2, 1, %g2 193 194 not_too_big: 195 3: cmp %o5, %o3 196 blu 2b 197 nop 198 be do_single_div 199 nop 200 /* NB: these are commented out in the V8-SPARC manual as well */ 201 /* (I do not understand this) */ 202 ! %o5 > %o3: went too far: back up 1 step 203 ! srl %o5, 1, %o5 204 ! dec %g2 205 ! do single-bit divide steps 206 ! 207 ! We have to be careful here. We know that %o3 >= %o5, so we can do the 208 ! first divide step without thinking. BUT, the others are conditional, 209 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- 210 ! order bit set in the first step, just falling into the regular 211 ! division loop will mess up the first time around. 212 ! So we unroll slightly... 213 do_single_div: 214 subcc %g2, 1, %g2 215 bl end_regular_divide 216 nop 217 sub %o3, %o5, %o3 218 mov 1, %o2 219 b end_single_divloop 220 nop 221 single_divloop: 222 sll %o2, 1, %o2 223 bl 1f 224 srl %o5, 1, %o5 225 ! %o3 >= 0 226 sub %o3, %o5, %o3 227 b 2f 228 add %o2, 1, %o2 229 1: ! %o3 < 0 230 add %o3, %o5, %o3 231 sub %o2, 1, %o2 232 2: 233 end_single_divloop: 234 subcc %g2, 1, %g2 235 bge single_divloop 236 tst %o3 237 b,a end_regular_divide 238 239not_really_big: 2401: 241 sll %o5, 4, %o5 242 cmp %o5, %o3 243 bleu 1b 244 addcc %o4, 1, %o4 245 be got_result 246 sub %o4, 1, %o4 247 248 tst %o3 ! set up for initial iteration 249divloop: 250 sll %o2, 4, %o2 251 ! depth 1, accumulated bits 0 252 bl L1.16 253 srl %o5,1,%o5 254 ! remainder is positive 255 subcc %o3,%o5,%o3 256 ! depth 2, accumulated bits 1 257 bl L2.17 258 srl %o5,1,%o5 259 ! remainder is positive 260 subcc %o3,%o5,%o3 261 ! depth 3, accumulated bits 3 262 bl L3.19 263 srl %o5,1,%o5 264 ! remainder is positive 265 subcc %o3,%o5,%o3 266 ! depth 4, accumulated bits 7 267 bl L4.23 268 srl %o5,1,%o5 269 ! remainder is positive 270 subcc %o3,%o5,%o3 271 b 9f 272 add %o2, (7*2+1), %o2 273 274L4.23: 275 ! remainder is negative 276 addcc %o3,%o5,%o3 277 b 9f 278 add %o2, (7*2-1), %o2 279 280 281L3.19: 282 ! remainder is negative 283 addcc %o3,%o5,%o3 284 ! depth 4, accumulated bits 5 285 bl L4.21 286 srl %o5,1,%o5 287 ! remainder is positive 288 subcc %o3,%o5,%o3 289 b 9f 290 add %o2, (5*2+1), %o2 291 292L4.21: 293 ! remainder is negative 294 addcc %o3,%o5,%o3 295 b 9f 296 add %o2, (5*2-1), %o2 297 298L2.17: 299 ! remainder is negative 300 addcc %o3,%o5,%o3 301 ! depth 3, accumulated bits 1 302 bl L3.17 303 srl %o5,1,%o5 304 ! remainder is positive 305 subcc %o3,%o5,%o3 306 ! depth 4, accumulated bits 3 307 bl L4.19 308 srl %o5,1,%o5 309 ! remainder is positive 310 subcc %o3,%o5,%o3 311 b 9f 312 add %o2, (3*2+1), %o2 313 314L4.19: 315 ! remainder is negative 316 addcc %o3,%o5,%o3 317 b 9f 318 add %o2, (3*2-1), %o2 319 320L3.17: 321 ! remainder is negative 322 addcc %o3,%o5,%o3 323 ! depth 4, accumulated bits 1 324 bl L4.17 325 srl %o5,1,%o5 326 ! remainder is positive 327 subcc %o3,%o5,%o3 328 b 9f 329 add %o2, (1*2+1), %o2 330 331L4.17: 332 ! remainder is negative 333 addcc %o3,%o5,%o3 334 b 9f 335 add %o2, (1*2-1), %o2 336 337L1.16: 338 ! remainder is negative 339 addcc %o3,%o5,%o3 340 ! depth 2, accumulated bits -1 341 bl L2.15 342 srl %o5,1,%o5 343 ! remainder is positive 344 subcc %o3,%o5,%o3 345 ! depth 3, accumulated bits -1 346 bl L3.15 347 srl %o5,1,%o5 348 ! remainder is positive 349 subcc %o3,%o5,%o3 350 ! depth 4, accumulated bits -1 351 bl L4.15 352 srl %o5,1,%o5 353 ! remainder is positive 354 subcc %o3,%o5,%o3 355 b 9f 356 add %o2, (-1*2+1), %o2 357 358L4.15: 359 ! remainder is negative 360 addcc %o3,%o5,%o3 361 b 9f 362 add %o2, (-1*2-1), %o2 363 364L3.15: 365 ! remainder is negative 366 addcc %o3,%o5,%o3 367 ! depth 4, accumulated bits -3 368 bl L4.13 369 srl %o5,1,%o5 370 ! remainder is positive 371 subcc %o3,%o5,%o3 372 b 9f 373 add %o2, (-3*2+1), %o2 374 375L4.13: 376 ! remainder is negative 377 addcc %o3,%o5,%o3 378 b 9f 379 add %o2, (-3*2-1), %o2 380 381L2.15: 382 ! remainder is negative 383 addcc %o3,%o5,%o3 384 ! depth 3, accumulated bits -3 385 bl L3.13 386 srl %o5,1,%o5 387 ! remainder is positive 388 subcc %o3,%o5,%o3 389 ! depth 4, accumulated bits -5 390 bl L4.11 391 srl %o5,1,%o5 392 ! remainder is positive 393 subcc %o3,%o5,%o3 394 b 9f 395 add %o2, (-5*2+1), %o2 396 397L4.11: 398 ! remainder is negative 399 addcc %o3,%o5,%o3 400 b 9f 401 add %o2, (-5*2-1), %o2 402 403L3.13: 404 ! remainder is negative 405 addcc %o3,%o5,%o3 406 ! depth 4, accumulated bits -7 407 bl L4.9 408 srl %o5,1,%o5 409 ! remainder is positive 410 subcc %o3,%o5,%o3 411 b 9f 412 add %o2, (-7*2+1), %o2 413 414L4.9: 415 ! remainder is negative 416 addcc %o3,%o5,%o3 417 b 9f 418 add %o2, (-7*2-1), %o2 419 420 9: 421end_regular_divide: 422 subcc %o4, 1, %o4 423 bge divloop 424 tst %o3 425 bl,a got_result 426 ! non-restoring fixup here (one instruction only!) 427 sub %o2, 1, %o2 428 429 430got_result: 431 ! check to see if answer should be < 0 432 tst %g3 433 bl,a 1f 434 sub %g0, %o2, %o2 4351: 436 retl 437 mov %o2, %o0 438#endif 439 440#ifdef L_modsi3 441/* This implementation was taken from glibc: 442 * 443 * Input: dividend and divisor in %o0 and %o1 respectively. 444 * 445 * Algorithm parameters: 446 * N how many bits per iteration we try to get (4) 447 * WORDSIZE total number of bits (32) 448 * 449 * Derived constants: 450 * TOPBITS number of bits in the top decade of a number 451 * 452 * Important variables: 453 * Q the partial quotient under development (initially 0) 454 * R the remainder so far, initially the dividend 455 * ITER number of main division loop iterations required; 456 * equal to ceil(log2(quotient) / N). Note that this 457 * is the log base (2^N) of the quotient. 458 * V the current comparand, initially divisor*2^(ITER*N-1) 459 * 460 * Cost: 461 * Current estimate for non-large dividend is 462 * ceil(log2(quotient) / N) * (10 + 7N/2) + C 463 * A large dividend is one greater than 2^(31-TOPBITS) and takes a 464 * different path, as the upper bits of the quotient must be developed 465 * one bit at a time. 466 */ 467.text 468 .align 4 469 .global .urem 470 .proc 4 471.urem: 472 b divide 473 mov 0, %g3 ! result always positive 474 475 .align 4 476 .global .rem 477 .proc 4 478.rem: 479 ! compute sign of result; if neither is negative, no problem 480 orcc %o1, %o0, %g0 ! either negative? 481 bge 2f ! no, go do the divide 482 mov %o0, %g3 ! sign of remainder matches %o0 483 tst %o1 484 bge 1f 485 tst %o0 486 ! %o1 is definitely negative; %o0 might also be negative 487 bge 2f ! if %o0 not negative... 488 sub %g0, %o1, %o1 ! in any case, make %o1 nonneg 4891: ! %o0 is negative, %o1 is nonnegative 490 sub %g0, %o0, %o0 ! make %o0 nonnegative 4912: 492 493 ! Ready to divide. Compute size of quotient; scale comparand. 494divide: 495 orcc %o1, %g0, %o5 496 bne 1f 497 mov %o0, %o3 498 499 ! Divide by zero trap. If it returns, return 0 (about as 500 ! wrong as possible, but that is what SunOS does...). 501 ta 0x2 !ST_DIV0 502 retl 503 clr %o0 504 5051: 506 cmp %o3, %o5 ! if %o1 exceeds %o0, done 507 blu got_result ! (and algorithm fails otherwise) 508 clr %o2 509 sethi %hi(1 << (32 - 4 - 1)), %g1 510 cmp %o3, %g1 511 blu not_really_big 512 clr %o4 513 514 ! Here the dividend is >= 2**(31-N) or so. We must be careful here, 515 ! as our usual N-at-a-shot divide step will cause overflow and havoc. 516 ! The number of bits in the result here is N*ITER+SC, where SC <= N. 517 ! Compute ITER in an unorthodox manner: know we need to shift V into 518 ! the top decade: so do not even bother to compare to R. 519 1: 520 cmp %o5, %g1 521 bgeu 3f 522 mov 1, %g2 523 sll %o5, 4, %o5 524 b 1b 525 add %o4, 1, %o4 526 527 ! Now compute %g2. 528 2: addcc %o5, %o5, %o5 529 bcc not_too_big 530 add %g2, 1, %g2 531 532 ! We get here if the %o1 overflowed while shifting. 533 ! This means that %o3 has the high-order bit set. 534 ! Restore %o5 and subtract from %o3. 535 sll %g1, 4, %g1 ! high order bit 536 srl %o5, 1, %o5 ! rest of %o5 537 add %o5, %g1, %o5 538 b do_single_div 539 sub %g2, 1, %g2 540 541 not_too_big: 542 3: cmp %o5, %o3 543 blu 2b 544 nop 545 be do_single_div 546 nop 547 /* NB: these are commented out in the V8-SPARC manual as well */ 548 /* (I do not understand this) */ 549 ! %o5 > %o3: went too far: back up 1 step 550 ! srl %o5, 1, %o5 551 ! dec %g2 552 ! do single-bit divide steps 553 ! 554 ! We have to be careful here. We know that %o3 >= %o5, so we can do the 555 ! first divide step without thinking. BUT, the others are conditional, 556 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- 557 ! order bit set in the first step, just falling into the regular 558 ! division loop will mess up the first time around. 559 ! So we unroll slightly... 560 do_single_div: 561 subcc %g2, 1, %g2 562 bl end_regular_divide 563 nop 564 sub %o3, %o5, %o3 565 mov 1, %o2 566 b end_single_divloop 567 nop 568 single_divloop: 569 sll %o2, 1, %o2 570 bl 1f 571 srl %o5, 1, %o5 572 ! %o3 >= 0 573 sub %o3, %o5, %o3 574 b 2f 575 add %o2, 1, %o2 576 1: ! %o3 < 0 577 add %o3, %o5, %o3 578 sub %o2, 1, %o2 579 2: 580 end_single_divloop: 581 subcc %g2, 1, %g2 582 bge single_divloop 583 tst %o3 584 b,a end_regular_divide 585 586not_really_big: 5871: 588 sll %o5, 4, %o5 589 cmp %o5, %o3 590 bleu 1b 591 addcc %o4, 1, %o4 592 be got_result 593 sub %o4, 1, %o4 594 595 tst %o3 ! set up for initial iteration 596divloop: 597 sll %o2, 4, %o2 598 ! depth 1, accumulated bits 0 599 bl L1.16 600 srl %o5,1,%o5 601 ! remainder is positive 602 subcc %o3,%o5,%o3 603 ! depth 2, accumulated bits 1 604 bl L2.17 605 srl %o5,1,%o5 606 ! remainder is positive 607 subcc %o3,%o5,%o3 608 ! depth 3, accumulated bits 3 609 bl L3.19 610 srl %o5,1,%o5 611 ! remainder is positive 612 subcc %o3,%o5,%o3 613 ! depth 4, accumulated bits 7 614 bl L4.23 615 srl %o5,1,%o5 616 ! remainder is positive 617 subcc %o3,%o5,%o3 618 b 9f 619 add %o2, (7*2+1), %o2 620L4.23: 621 ! remainder is negative 622 addcc %o3,%o5,%o3 623 b 9f 624 add %o2, (7*2-1), %o2 625 626L3.19: 627 ! remainder is negative 628 addcc %o3,%o5,%o3 629 ! depth 4, accumulated bits 5 630 bl L4.21 631 srl %o5,1,%o5 632 ! remainder is positive 633 subcc %o3,%o5,%o3 634 b 9f 635 add %o2, (5*2+1), %o2 636 637L4.21: 638 ! remainder is negative 639 addcc %o3,%o5,%o3 640 b 9f 641 add %o2, (5*2-1), %o2 642 643L2.17: 644 ! remainder is negative 645 addcc %o3,%o5,%o3 646 ! depth 3, accumulated bits 1 647 bl L3.17 648 srl %o5,1,%o5 649 ! remainder is positive 650 subcc %o3,%o5,%o3 651 ! depth 4, accumulated bits 3 652 bl L4.19 653 srl %o5,1,%o5 654 ! remainder is positive 655 subcc %o3,%o5,%o3 656 b 9f 657 add %o2, (3*2+1), %o2 658 659L4.19: 660 ! remainder is negative 661 addcc %o3,%o5,%o3 662 b 9f 663 add %o2, (3*2-1), %o2 664 665L3.17: 666 ! remainder is negative 667 addcc %o3,%o5,%o3 668 ! depth 4, accumulated bits 1 669 bl L4.17 670 srl %o5,1,%o5 671 ! remainder is positive 672 subcc %o3,%o5,%o3 673 b 9f 674 add %o2, (1*2+1), %o2 675 676L4.17: 677 ! remainder is negative 678 addcc %o3,%o5,%o3 679 b 9f 680 add %o2, (1*2-1), %o2 681 682L1.16: 683 ! remainder is negative 684 addcc %o3,%o5,%o3 685 ! depth 2, accumulated bits -1 686 bl L2.15 687 srl %o5,1,%o5 688 ! remainder is positive 689 subcc %o3,%o5,%o3 690 ! depth 3, accumulated bits -1 691 bl L3.15 692 srl %o5,1,%o5 693 ! remainder is positive 694 subcc %o3,%o5,%o3 695 ! depth 4, accumulated bits -1 696 bl L4.15 697 srl %o5,1,%o5 698 ! remainder is positive 699 subcc %o3,%o5,%o3 700 b 9f 701 add %o2, (-1*2+1), %o2 702 703L4.15: 704 ! remainder is negative 705 addcc %o3,%o5,%o3 706 b 9f 707 add %o2, (-1*2-1), %o2 708 709L3.15: 710 ! remainder is negative 711 addcc %o3,%o5,%o3 712 ! depth 4, accumulated bits -3 713 bl L4.13 714 srl %o5,1,%o5 715 ! remainder is positive 716 subcc %o3,%o5,%o3 717 b 9f 718 add %o2, (-3*2+1), %o2 719 720L4.13: 721 ! remainder is negative 722 addcc %o3,%o5,%o3 723 b 9f 724 add %o2, (-3*2-1), %o2 725 726L2.15: 727 ! remainder is negative 728 addcc %o3,%o5,%o3 729 ! depth 3, accumulated bits -3 730 bl L3.13 731 srl %o5,1,%o5 732 ! remainder is positive 733 subcc %o3,%o5,%o3 734 ! depth 4, accumulated bits -5 735 bl L4.11 736 srl %o5,1,%o5 737 ! remainder is positive 738 subcc %o3,%o5,%o3 739 b 9f 740 add %o2, (-5*2+1), %o2 741 742L4.11: 743 ! remainder is negative 744 addcc %o3,%o5,%o3 745 b 9f 746 add %o2, (-5*2-1), %o2 747 748L3.13: 749 ! remainder is negative 750 addcc %o3,%o5,%o3 751 ! depth 4, accumulated bits -7 752 bl L4.9 753 srl %o5,1,%o5 754 ! remainder is positive 755 subcc %o3,%o5,%o3 756 b 9f 757 add %o2, (-7*2+1), %o2 758 759L4.9: 760 ! remainder is negative 761 addcc %o3,%o5,%o3 762 b 9f 763 add %o2, (-7*2-1), %o2 764 765 9: 766end_regular_divide: 767 subcc %o4, 1, %o4 768 bge divloop 769 tst %o3 770 bl,a got_result 771 ! non-restoring fixup here (one instruction only!) 772 add %o3, %o1, %o3 773 774got_result: 775 ! check to see if answer should be < 0 776 tst %g3 777 bl,a 1f 778 sub %g0, %o3, %o3 7791: 780 retl 781 mov %o3, %o0 782 783#endif 784 785