1#if !(defined (__mips_isa_rev) && (__mips_isa_rev >= 6)) 2.set mips2 3#endif 4#include "mips_arch.h" 5 6#if defined(_MIPS_ARCH_MIPS64R6) 7# define ddivu(rs,rt) 8# define mfqt(rd,rs,rt) ddivu rd,rs,rt 9# define mfrm(rd,rs,rt) dmodu rd,rs,rt 10#elif defined(_MIPS_ARCH_MIPS32R6) 11# define divu(rs,rt) 12# define mfqt(rd,rs,rt) divu rd,rs,rt 13# define mfrm(rd,rs,rt) modu rd,rs,rt 14#else 15# define divu(rs,rt) divu $0,rs,rt 16# define mfqt(rd,rs,rt) mflo rd 17# define mfrm(rd,rs,rt) mfhi rd 18#endif 19 20.rdata 21.asciiz "mips3.s, Version 1.2" 22.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" 23 24.text 25.set noat 26 27.align 5 28.globl bn_mul_add_words 29.ent bn_mul_add_words 30bn_mul_add_words: 31 .set noreorder 32 bgtz $6,bn_mul_add_words_internal 33 move $2,$0 34 jr $31 35 move $4,$2 36.end bn_mul_add_words 37 38.align 5 39.ent bn_mul_add_words_internal 40bn_mul_add_words_internal: 41 .set reorder 42 li $3,-4 43 and $8,$6,$3 44 beqz $8,.L_bn_mul_add_words_tail 45 46.L_bn_mul_add_words_loop: 47 lw $12,0($5) 48 multu ($12,$7) 49 lw $13,0($4) 50 lw $14,4($5) 51 lw $15,4($4) 52 lw $8,2*4($5) 53 lw $9,2*4($4) 54 addu $13,$2 55 sltu $2,$13,$2 # All manuals say it "compares 32-bit 56 # values", but it seems to work fine 57 # even on 64-bit registers. 58 mflo ($1,$12,$7) 59 mfhi ($12,$12,$7) 60 addu $13,$1 61 addu $2,$12 62 multu ($14,$7) 63 sltu $1,$13,$1 64 sw $13,0($4) 65 addu $2,$1 66 67 lw $10,3*4($5) 68 lw $11,3*4($4) 69 addu $15,$2 70 sltu $2,$15,$2 71 mflo ($1,$14,$7) 72 mfhi ($14,$14,$7) 73 addu $15,$1 74 addu $2,$14 75 multu ($8,$7) 76 sltu $1,$15,$1 77 sw $15,4($4) 78 addu $2,$1 79 80 subu $6,4 81 addu $4,4*4 82 addu $5,4*4 83 addu $9,$2 84 sltu $2,$9,$2 85 mflo ($1,$8,$7) 86 mfhi ($8,$8,$7) 87 addu $9,$1 88 addu $2,$8 89 multu ($10,$7) 90 sltu $1,$9,$1 91 sw $9,-2*4($4) 92 addu $2,$1 93 94 95 and $8,$6,$3 96 addu $11,$2 97 sltu $2,$11,$2 98 mflo ($1,$10,$7) 99 mfhi ($10,$10,$7) 100 addu $11,$1 101 addu $2,$10 102 sltu $1,$11,$1 103 sw $11,-4($4) 104 .set noreorder 105 bgtz $8,.L_bn_mul_add_words_loop 106 addu $2,$1 107 108 beqz $6,.L_bn_mul_add_words_return 109 nop 110 111.L_bn_mul_add_words_tail: 112 .set reorder 113 lw $12,0($5) 114 multu ($12,$7) 115 lw $13,0($4) 116 subu $6,1 117 addu $13,$2 118 sltu $2,$13,$2 119 mflo ($1,$12,$7) 120 mfhi ($12,$12,$7) 121 addu $13,$1 122 addu $2,$12 123 sltu $1,$13,$1 124 sw $13,0($4) 125 addu $2,$1 126 beqz $6,.L_bn_mul_add_words_return 127 128 lw $12,4($5) 129 multu ($12,$7) 130 lw $13,4($4) 131 subu $6,1 132 addu $13,$2 133 sltu $2,$13,$2 134 mflo ($1,$12,$7) 135 mfhi ($12,$12,$7) 136 addu $13,$1 137 addu $2,$12 138 sltu $1,$13,$1 139 sw $13,4($4) 140 addu $2,$1 141 beqz $6,.L_bn_mul_add_words_return 142 143 lw $12,2*4($5) 144 multu ($12,$7) 145 lw $13,2*4($4) 146 addu $13,$2 147 sltu $2,$13,$2 148 mflo ($1,$12,$7) 149 mfhi ($12,$12,$7) 150 addu $13,$1 151 addu $2,$12 152 sltu $1,$13,$1 153 sw $13,2*4($4) 154 addu $2,$1 155 156.L_bn_mul_add_words_return: 157 .set noreorder 158 jr $31 159 move $4,$2 160.end bn_mul_add_words_internal 161 162.align 5 163.globl bn_mul_words 164.ent bn_mul_words 165bn_mul_words: 166 .set noreorder 167 bgtz $6,bn_mul_words_internal 168 move $2,$0 169 jr $31 170 move $4,$2 171.end bn_mul_words 172 173.align 5 174.ent bn_mul_words_internal 175bn_mul_words_internal: 176 .set reorder 177 li $3,-4 178 and $8,$6,$3 179 beqz $8,.L_bn_mul_words_tail 180 181.L_bn_mul_words_loop: 182 lw $12,0($5) 183 multu ($12,$7) 184 lw $14,4($5) 185 lw $8,2*4($5) 186 lw $10,3*4($5) 187 mflo ($1,$12,$7) 188 mfhi ($12,$12,$7) 189 addu $2,$1 190 sltu $13,$2,$1 191 multu ($14,$7) 192 sw $2,0($4) 193 addu $2,$13,$12 194 195 subu $6,4 196 addu $4,4*4 197 addu $5,4*4 198 mflo ($1,$14,$7) 199 mfhi ($14,$14,$7) 200 addu $2,$1 201 sltu $15,$2,$1 202 multu ($8,$7) 203 sw $2,-3*4($4) 204 addu $2,$15,$14 205 206 mflo ($1,$8,$7) 207 mfhi ($8,$8,$7) 208 addu $2,$1 209 sltu $9,$2,$1 210 multu ($10,$7) 211 sw $2,-2*4($4) 212 addu $2,$9,$8 213 214 and $8,$6,$3 215 mflo ($1,$10,$7) 216 mfhi ($10,$10,$7) 217 addu $2,$1 218 sltu $11,$2,$1 219 sw $2,-4($4) 220 .set noreorder 221 bgtz $8,.L_bn_mul_words_loop 222 addu $2,$11,$10 223 224 beqz $6,.L_bn_mul_words_return 225 nop 226 227.L_bn_mul_words_tail: 228 .set reorder 229 lw $12,0($5) 230 multu ($12,$7) 231 subu $6,1 232 mflo ($1,$12,$7) 233 mfhi ($12,$12,$7) 234 addu $2,$1 235 sltu $13,$2,$1 236 sw $2,0($4) 237 addu $2,$13,$12 238 beqz $6,.L_bn_mul_words_return 239 240 lw $12,4($5) 241 multu ($12,$7) 242 subu $6,1 243 mflo ($1,$12,$7) 244 mfhi ($12,$12,$7) 245 addu $2,$1 246 sltu $13,$2,$1 247 sw $2,4($4) 248 addu $2,$13,$12 249 beqz $6,.L_bn_mul_words_return 250 251 lw $12,2*4($5) 252 multu ($12,$7) 253 mflo ($1,$12,$7) 254 mfhi ($12,$12,$7) 255 addu $2,$1 256 sltu $13,$2,$1 257 sw $2,2*4($4) 258 addu $2,$13,$12 259 260.L_bn_mul_words_return: 261 .set noreorder 262 jr $31 263 move $4,$2 264.end bn_mul_words_internal 265 266.align 5 267.globl bn_sqr_words 268.ent bn_sqr_words 269bn_sqr_words: 270 .set noreorder 271 bgtz $6,bn_sqr_words_internal 272 move $2,$0 273 jr $31 274 move $4,$2 275.end bn_sqr_words 276 277.align 5 278.ent bn_sqr_words_internal 279bn_sqr_words_internal: 280 .set reorder 281 li $3,-4 282 and $8,$6,$3 283 beqz $8,.L_bn_sqr_words_tail 284 285.L_bn_sqr_words_loop: 286 lw $12,0($5) 287 multu ($12,$12) 288 lw $14,4($5) 289 lw $8,2*4($5) 290 lw $10,3*4($5) 291 mflo ($13,$12,$12) 292 mfhi ($12,$12,$12) 293 sw $13,0($4) 294 sw $12,4($4) 295 296 multu ($14,$14) 297 subu $6,4 298 addu $4,8*4 299 addu $5,4*4 300 mflo ($15,$14,$14) 301 mfhi ($14,$14,$14) 302 sw $15,-6*4($4) 303 sw $14,-5*4($4) 304 305 multu ($8,$8) 306 mflo ($9,$8,$8) 307 mfhi ($8,$8,$8) 308 sw $9,-4*4($4) 309 sw $8,-3*4($4) 310 311 312 multu ($10,$10) 313 and $8,$6,$3 314 mflo ($11,$10,$10) 315 mfhi ($10,$10,$10) 316 sw $11,-2*4($4) 317 318 .set noreorder 319 bgtz $8,.L_bn_sqr_words_loop 320 sw $10,-4($4) 321 322 beqz $6,.L_bn_sqr_words_return 323 nop 324 325.L_bn_sqr_words_tail: 326 .set reorder 327 lw $12,0($5) 328 multu ($12,$12) 329 subu $6,1 330 mflo ($13,$12,$12) 331 mfhi ($12,$12,$12) 332 sw $13,0($4) 333 sw $12,4($4) 334 beqz $6,.L_bn_sqr_words_return 335 336 lw $12,4($5) 337 multu ($12,$12) 338 subu $6,1 339 mflo ($13,$12,$12) 340 mfhi ($12,$12,$12) 341 sw $13,2*4($4) 342 sw $12,3*4($4) 343 beqz $6,.L_bn_sqr_words_return 344 345 lw $12,2*4($5) 346 multu ($12,$12) 347 mflo ($13,$12,$12) 348 mfhi ($12,$12,$12) 349 sw $13,4*4($4) 350 sw $12,5*4($4) 351 352.L_bn_sqr_words_return: 353 .set noreorder 354 jr $31 355 move $4,$2 356 357.end bn_sqr_words_internal 358 359.align 5 360.globl bn_add_words 361.ent bn_add_words 362bn_add_words: 363 .set noreorder 364 bgtz $7,bn_add_words_internal 365 move $2,$0 366 jr $31 367 move $4,$2 368.end bn_add_words 369 370.align 5 371.ent bn_add_words_internal 372bn_add_words_internal: 373 .set reorder 374 li $3,-4 375 and $1,$7,$3 376 beqz $1,.L_bn_add_words_tail 377 378.L_bn_add_words_loop: 379 lw $12,0($5) 380 lw $8,0($6) 381 subu $7,4 382 lw $13,4($5) 383 and $1,$7,$3 384 lw $14,2*4($5) 385 addu $6,4*4 386 lw $15,3*4($5) 387 addu $4,4*4 388 lw $9,-3*4($6) 389 addu $5,4*4 390 lw $10,-2*4($6) 391 lw $11,-4($6) 392 addu $8,$12 393 sltu $24,$8,$12 394 addu $12,$8,$2 395 sltu $2,$12,$8 396 sw $12,-4*4($4) 397 addu $2,$24 398 399 addu $9,$13 400 sltu $25,$9,$13 401 addu $13,$9,$2 402 sltu $2,$13,$9 403 sw $13,-3*4($4) 404 addu $2,$25 405 406 addu $10,$14 407 sltu $24,$10,$14 408 addu $14,$10,$2 409 sltu $2,$14,$10 410 sw $14,-2*4($4) 411 addu $2,$24 412 413 addu $11,$15 414 sltu $25,$11,$15 415 addu $15,$11,$2 416 sltu $2,$15,$11 417 sw $15,-4($4) 418 419 .set noreorder 420 bgtz $1,.L_bn_add_words_loop 421 addu $2,$25 422 423 beqz $7,.L_bn_add_words_return 424 nop 425 426.L_bn_add_words_tail: 427 .set reorder 428 lw $12,0($5) 429 lw $8,0($6) 430 addu $8,$12 431 subu $7,1 432 sltu $24,$8,$12 433 addu $12,$8,$2 434 sltu $2,$12,$8 435 sw $12,0($4) 436 addu $2,$24 437 beqz $7,.L_bn_add_words_return 438 439 lw $13,4($5) 440 lw $9,4($6) 441 addu $9,$13 442 subu $7,1 443 sltu $25,$9,$13 444 addu $13,$9,$2 445 sltu $2,$13,$9 446 sw $13,4($4) 447 addu $2,$25 448 beqz $7,.L_bn_add_words_return 449 450 lw $14,2*4($5) 451 lw $10,2*4($6) 452 addu $10,$14 453 sltu $24,$10,$14 454 addu $14,$10,$2 455 sltu $2,$14,$10 456 sw $14,2*4($4) 457 addu $2,$24 458 459.L_bn_add_words_return: 460 .set noreorder 461 jr $31 462 move $4,$2 463 464.end bn_add_words_internal 465 466.align 5 467.globl bn_sub_words 468.ent bn_sub_words 469bn_sub_words: 470 .set noreorder 471 bgtz $7,bn_sub_words_internal 472 move $2,$0 473 jr $31 474 move $4,$0 475.end bn_sub_words 476 477.align 5 478.ent bn_sub_words_internal 479bn_sub_words_internal: 480 .set reorder 481 li $3,-4 482 and $1,$7,$3 483 beqz $1,.L_bn_sub_words_tail 484 485.L_bn_sub_words_loop: 486 lw $12,0($5) 487 lw $8,0($6) 488 subu $7,4 489 lw $13,4($5) 490 and $1,$7,$3 491 lw $14,2*4($5) 492 addu $6,4*4 493 lw $15,3*4($5) 494 addu $4,4*4 495 lw $9,-3*4($6) 496 addu $5,4*4 497 lw $10,-2*4($6) 498 lw $11,-4($6) 499 sltu $24,$12,$8 500 subu $8,$12,$8 501 subu $12,$8,$2 502 sgtu $2,$12,$8 503 sw $12,-4*4($4) 504 addu $2,$24 505 506 sltu $25,$13,$9 507 subu $9,$13,$9 508 subu $13,$9,$2 509 sgtu $2,$13,$9 510 sw $13,-3*4($4) 511 addu $2,$25 512 513 514 sltu $24,$14,$10 515 subu $10,$14,$10 516 subu $14,$10,$2 517 sgtu $2,$14,$10 518 sw $14,-2*4($4) 519 addu $2,$24 520 521 sltu $25,$15,$11 522 subu $11,$15,$11 523 subu $15,$11,$2 524 sgtu $2,$15,$11 525 sw $15,-4($4) 526 527 .set noreorder 528 bgtz $1,.L_bn_sub_words_loop 529 addu $2,$25 530 531 beqz $7,.L_bn_sub_words_return 532 nop 533 534.L_bn_sub_words_tail: 535 .set reorder 536 lw $12,0($5) 537 lw $8,0($6) 538 subu $7,1 539 sltu $24,$12,$8 540 subu $8,$12,$8 541 subu $12,$8,$2 542 sgtu $2,$12,$8 543 sw $12,0($4) 544 addu $2,$24 545 beqz $7,.L_bn_sub_words_return 546 547 lw $13,4($5) 548 subu $7,1 549 lw $9,4($6) 550 sltu $25,$13,$9 551 subu $9,$13,$9 552 subu $13,$9,$2 553 sgtu $2,$13,$9 554 sw $13,4($4) 555 addu $2,$25 556 beqz $7,.L_bn_sub_words_return 557 558 lw $14,2*4($5) 559 lw $10,2*4($6) 560 sltu $24,$14,$10 561 subu $10,$14,$10 562 subu $14,$10,$2 563 sgtu $2,$14,$10 564 sw $14,2*4($4) 565 addu $2,$24 566 567.L_bn_sub_words_return: 568 .set noreorder 569 jr $31 570 move $4,$2 571.end bn_sub_words_internal 572 573#if 0 574/* 575 * The bn_div_3_words entry point is re-used for constant-time interface. 576 * Implementation is retained as historical reference. 577 */ 578.align 5 579.globl bn_div_3_words 580.ent bn_div_3_words 581bn_div_3_words: 582 .set noreorder 583 move $7,$4 # we know that bn_div_words does not 584 # touch $7, $10, $11 and preserves $6 585 # so that we can save two arguments 586 # and return address in registers 587 # instead of stack:-) 588 589 lw $4,($7) 590 move $10,$5 591 bne $4,$6,bn_div_3_words_internal 592 lw $5,-4($7) 593 li $2,-1 594 jr $31 595 move $4,$2 596.end bn_div_3_words 597 598.align 5 599.ent bn_div_3_words_internal 600bn_div_3_words_internal: 601 .set reorder 602 move $11,$31 603 bal bn_div_words_internal 604 move $31,$11 605 multu ($10,$2) 606 lw $14,-2*4($7) 607 move $8,$0 608 mfhi ($13,$10,$2) 609 mflo ($12,$10,$2) 610 sltu $24,$13,$5 611.L_bn_div_3_words_inner_loop: 612 bnez $24,.L_bn_div_3_words_inner_loop_done 613 sgeu $1,$14,$12 614 seq $25,$13,$5 615 and $1,$25 616 sltu $15,$12,$10 617 addu $5,$6 618 subu $13,$15 619 subu $12,$10 620 sltu $24,$13,$5 621 sltu $8,$5,$6 622 or $24,$8 623 .set noreorder 624 beqz $1,.L_bn_div_3_words_inner_loop 625 subu $2,1 626 addu $2,1 627 .set reorder 628.L_bn_div_3_words_inner_loop_done: 629 .set noreorder 630 jr $31 631 move $4,$2 632.end bn_div_3_words_internal 633#endif 634 635.align 5 636.globl bn_div_words 637.ent bn_div_words 638bn_div_words: 639 .set noreorder 640 bnez $6,bn_div_words_internal 641 li $2,-1 # I would rather signal div-by-zero 642 # which can be done with 'break 7' 643 jr $31 644 move $4,$2 645.end bn_div_words 646 647.align 5 648.ent bn_div_words_internal 649bn_div_words_internal: 650 move $3,$0 651 bltz $6,.L_bn_div_words_body 652 move $25,$3 653 sll $6,1 654 bgtz $6,.-4 655 addu $25,1 656 657 .set reorder 658 negu $13,$25 659 li $14,-1 660 sll $14,$13 661 and $14,$4 662 srl $1,$5,$13 663 .set noreorder 664 beqz $14,.+12 665 nop 666 break 6 # signal overflow 667 .set reorder 668 sll $4,$25 669 sll $5,$25 670 or $4,$1 671.L_bn_div_words_body: 672 srl $3,$6,4*4 # bits 673 sgeu $1,$4,$6 674 .set noreorder 675 beqz $1,.+12 676 nop 677 subu $4,$6 678 .set reorder 679 680 li $8,-1 681 srl $9,$4,4*4 # bits 682 srl $8,4*4 # q=0xffffffff 683 beq $3,$9,.L_bn_div_words_skip_div1 684 divu ($4,$3) 685 mfqt ($8,$4,$3) 686.L_bn_div_words_skip_div1: 687 multu ($6,$8) 688 sll $15,$4,4*4 # bits 689 srl $1,$5,4*4 # bits 690 or $15,$1 691 mflo ($12,$6,$8) 692 mfhi ($13,$6,$8) 693.L_bn_div_words_inner_loop1: 694 sltu $14,$15,$12 695 seq $24,$9,$13 696 sltu $1,$9,$13 697 and $14,$24 698 sltu $2,$12,$6 699 or $1,$14 700 .set noreorder 701 beqz $1,.L_bn_div_words_inner_loop1_done 702 subu $13,$2 703 subu $12,$6 704 b .L_bn_div_words_inner_loop1 705 subu $8,1 706 .set reorder 707.L_bn_div_words_inner_loop1_done: 708 709 sll $5,4*4 # bits 710 subu $4,$15,$12 711 sll $2,$8,4*4 # bits 712 713 li $8,-1 714 srl $9,$4,4*4 # bits 715 srl $8,4*4 # q=0xffffffff 716 beq $3,$9,.L_bn_div_words_skip_div2 717 divu ($4,$3) 718 mfqt ($8,$4,$3) 719.L_bn_div_words_skip_div2: 720 multu ($6,$8) 721 sll $15,$4,4*4 # bits 722 srl $1,$5,4*4 # bits 723 or $15,$1 724 mflo ($12,$6,$8) 725 mfhi ($13,$6,$8) 726.L_bn_div_words_inner_loop2: 727 sltu $14,$15,$12 728 seq $24,$9,$13 729 sltu $1,$9,$13 730 and $14,$24 731 sltu $3,$12,$6 732 or $1,$14 733 .set noreorder 734 beqz $1,.L_bn_div_words_inner_loop2_done 735 subu $13,$3 736 subu $12,$6 737 b .L_bn_div_words_inner_loop2 738 subu $8,1 739 .set reorder 740.L_bn_div_words_inner_loop2_done: 741 742 subu $4,$15,$12 743 or $2,$8 744 srl $3,$4,$25 # $3 contains remainder if anybody wants it 745 srl $6,$25 # restore $6 746 747 .set noreorder 748 move $5,$3 749 jr $31 750 move $4,$2 751.end bn_div_words_internal 752 753.align 5 754.globl bn_mul_comba8 755.ent bn_mul_comba8 756bn_mul_comba8: 757 .set noreorder 758 .frame $29,6*4,$31 759 .mask 0x003f0000,-4 760 subu $29,6*4 761 sw $21,5*4($29) 762 sw $20,4*4($29) 763 sw $19,3*4($29) 764 sw $18,2*4($29) 765 sw $17,1*4($29) 766 sw $16,0*4($29) 767 768 .set reorder 769 lw $12,0($5) # If compiled with -mips3 option on 770 # R5000 box assembler barks on this 771 # 1ine with "should not have mult/div 772 # as last instruction in bb (R10K 773 # bug)" warning. If anybody out there 774 # has a clue about how to circumvent 775 # this do send me a note. 776 # <appro@fy.chalmers.se> 777 778 lw $8,0($6) 779 lw $13,4($5) 780 lw $14,2*4($5) 781 multu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); 782 lw $15,3*4($5) 783 lw $9,4($6) 784 lw $10,2*4($6) 785 lw $11,3*4($6) 786 mflo ($2,$12,$8) 787 mfhi ($3,$12,$8) 788 789 lw $16,4*4($5) 790 lw $18,5*4($5) 791 multu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); 792 lw $20,6*4($5) 793 lw $5,7*4($5) 794 lw $17,4*4($6) 795 lw $19,5*4($6) 796 mflo ($24,$12,$9) 797 mfhi ($25,$12,$9) 798 addu $3,$24 799 sltu $1,$3,$24 800 multu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); 801 addu $7,$25,$1 802 lw $21,6*4($6) 803 lw $6,7*4($6) 804 sw $2,0($4) # r[0]=c1; 805 mflo ($24,$13,$8) 806 mfhi ($25,$13,$8) 807 addu $3,$24 808 sltu $1,$3,$24 809 multu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); 810 addu $25,$1 811 addu $7,$25 812 sltu $2,$7,$25 813 sw $3,4($4) # r[1]=c2; 814 815 mflo ($24,$14,$8) 816 mfhi ($25,$14,$8) 817 addu $7,$24 818 sltu $1,$7,$24 819 multu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); 820 addu $25,$1 821 addu $2,$25 822 mflo ($24,$13,$9) 823 mfhi ($25,$13,$9) 824 addu $7,$24 825 sltu $1,$7,$24 826 multu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); 827 addu $25,$1 828 addu $2,$25 829 sltu $3,$2,$25 830 mflo ($24,$12,$10) 831 mfhi ($25,$12,$10) 832 addu $7,$24 833 sltu $1,$7,$24 834 multu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); 835 addu $25,$1 836 addu $2,$25 837 sltu $1,$2,$25 838 addu $3,$1 839 sw $7,2*4($4) # r[2]=c3; 840 841 mflo ($24,$12,$11) 842 mfhi ($25,$12,$11) 843 addu $2,$24 844 sltu $1,$2,$24 845 multu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); 846 addu $25,$1 847 addu $3,$25 848 sltu $7,$3,$25 849 mflo ($24,$13,$10) 850 mfhi ($25,$13,$10) 851 addu $2,$24 852 sltu $1,$2,$24 853 multu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); 854 addu $25,$1 855 addu $3,$25 856 sltu $1,$3,$25 857 addu $7,$1 858 mflo ($24,$14,$9) 859 mfhi ($25,$14,$9) 860 addu $2,$24 861 sltu $1,$2,$24 862 multu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); 863 addu $25,$1 864 addu $3,$25 865 sltu $1,$3,$25 866 addu $7,$1 867 mflo ($24,$15,$8) 868 mfhi ($25,$15,$8) 869 addu $2,$24 870 sltu $1,$2,$24 871 multu ($16,$8) # mul_add_c(a[4],b[0],c2,c3,c1); 872 addu $25,$1 873 addu $3,$25 874 sltu $1,$3,$25 875 addu $7,$1 876 sw $2,3*4($4) # r[3]=c1; 877 878 mflo ($24,$16,$8) 879 mfhi ($25,$16,$8) 880 addu $3,$24 881 sltu $1,$3,$24 882 multu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); 883 addu $25,$1 884 addu $7,$25 885 sltu $2,$7,$25 886 mflo ($24,$15,$9) 887 mfhi ($25,$15,$9) 888 addu $3,$24 889 sltu $1,$3,$24 890 multu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); 891 addu $25,$1 892 addu $7,$25 893 sltu $1,$7,$25 894 addu $2,$1 895 mflo ($24,$14,$10) 896 mfhi ($25,$14,$10) 897 addu $3,$24 898 sltu $1,$3,$24 899 multu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); 900 addu $25,$1 901 addu $7,$25 902 sltu $1,$7,$25 903 addu $2,$1 904 mflo ($24,$13,$11) 905 mfhi ($25,$13,$11) 906 addu $3,$24 907 sltu $1,$3,$24 908 multu ($12,$17) # mul_add_c(a[0],b[4],c2,c3,c1); 909 addu $25,$1 910 addu $7,$25 911 sltu $1,$7,$25 912 addu $2,$1 913 mflo ($24,$12,$17) 914 mfhi ($25,$12,$17) 915 addu $3,$24 916 sltu $1,$3,$24 917 multu ($12,$19) # mul_add_c(a[0],b[5],c3,c1,c2); 918 addu $25,$1 919 addu $7,$25 920 sltu $1,$7,$25 921 addu $2,$1 922 sw $3,4*4($4) # r[4]=c2; 923 924 mflo ($24,$12,$19) 925 mfhi ($25,$12,$19) 926 addu $7,$24 927 sltu $1,$7,$24 928 multu ($13,$17) # mul_add_c(a[1],b[4],c3,c1,c2); 929 addu $25,$1 930 addu $2,$25 931 sltu $3,$2,$25 932 mflo ($24,$13,$17) 933 mfhi ($25,$13,$17) 934 addu $7,$24 935 sltu $1,$7,$24 936 multu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); 937 addu $25,$1 938 addu $2,$25 939 sltu $1,$2,$25 940 addu $3,$1 941 mflo ($24,$14,$11) 942 mfhi ($25,$14,$11) 943 addu $7,$24 944 sltu $1,$7,$24 945 multu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); 946 addu $25,$1 947 addu $2,$25 948 sltu $1,$2,$25 949 addu $3,$1 950 mflo ($24,$15,$10) 951 mfhi ($25,$15,$10) 952 addu $7,$24 953 sltu $1,$7,$24 954 multu ($16,$9) # mul_add_c(a[4],b[1],c3,c1,c2); 955 addu $25,$1 956 addu $2,$25 957 sltu $1,$2,$25 958 addu $3,$1 959 mflo ($24,$16,$9) 960 mfhi ($25,$16,$9) 961 addu $7,$24 962 sltu $1,$7,$24 963 multu ($18,$8) # mul_add_c(a[5],b[0],c3,c1,c2); 964 addu $25,$1 965 addu $2,$25 966 sltu $1,$2,$25 967 addu $3,$1 968 mflo ($24,$18,$8) 969 mfhi ($25,$18,$8) 970 addu $7,$24 971 sltu $1,$7,$24 972 multu ($20,$8) # mul_add_c(a[6],b[0],c1,c2,c3); 973 addu $25,$1 974 addu $2,$25 975 sltu $1,$2,$25 976 addu $3,$1 977 sw $7,5*4($4) # r[5]=c3; 978 979 mflo ($24,$20,$8) 980 mfhi ($25,$20,$8) 981 addu $2,$24 982 sltu $1,$2,$24 983 multu ($18,$9) # mul_add_c(a[5],b[1],c1,c2,c3); 984 addu $25,$1 985 addu $3,$25 986 sltu $7,$3,$25 987 mflo ($24,$18,$9) 988 mfhi ($25,$18,$9) 989 addu $2,$24 990 sltu $1,$2,$24 991 multu ($16,$10) # mul_add_c(a[4],b[2],c1,c2,c3); 992 addu $25,$1 993 addu $3,$25 994 sltu $1,$3,$25 995 addu $7,$1 996 mflo ($24,$16,$10) 997 mfhi ($25,$16,$10) 998 addu $2,$24 999 sltu $1,$2,$24 1000 multu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); 1001 addu $25,$1 1002 addu $3,$25 1003 sltu $1,$3,$25 1004 addu $7,$1 1005 mflo ($24,$15,$11) 1006 mfhi ($25,$15,$11) 1007 addu $2,$24 1008 sltu $1,$2,$24 1009 multu ($14,$17) # mul_add_c(a[2],b[4],c1,c2,c3); 1010 addu $25,$1 1011 addu $3,$25 1012 sltu $1,$3,$25 1013 addu $7,$1 1014 mflo ($24,$14,$17) 1015 mfhi ($25,$14,$17) 1016 addu $2,$24 1017 sltu $1,$2,$24 1018 multu ($13,$19) # mul_add_c(a[1],b[5],c1,c2,c3); 1019 addu $25,$1 1020 addu $3,$25 1021 sltu $1,$3,$25 1022 addu $7,$1 1023 mflo ($24,$13,$19) 1024 mfhi ($25,$13,$19) 1025 addu $2,$24 1026 sltu $1,$2,$24 1027 multu ($12,$21) # mul_add_c(a[0],b[6],c1,c2,c3); 1028 addu $25,$1 1029 addu $3,$25 1030 sltu $1,$3,$25 1031 addu $7,$1 1032 mflo ($24,$12,$21) 1033 mfhi ($25,$12,$21) 1034 addu $2,$24 1035 sltu $1,$2,$24 1036 multu ($12,$6) # mul_add_c(a[0],b[7],c2,c3,c1); 1037 addu $25,$1 1038 addu $3,$25 1039 sltu $1,$3,$25 1040 addu $7,$1 1041 sw $2,6*4($4) # r[6]=c1; 1042 1043 mflo ($24,$12,$6) 1044 mfhi ($25,$12,$6) 1045 addu $3,$24 1046 sltu $1,$3,$24 1047 multu ($13,$21) # mul_add_c(a[1],b[6],c2,c3,c1); 1048 addu $25,$1 1049 addu $7,$25 1050 sltu $2,$7,$25 1051 mflo ($24,$13,$21) 1052 mfhi ($25,$13,$21) 1053 addu $3,$24 1054 sltu $1,$3,$24 1055 multu ($14,$19) # mul_add_c(a[2],b[5],c2,c3,c1); 1056 addu $25,$1 1057 addu $7,$25 1058 sltu $1,$7,$25 1059 addu $2,$1 1060 mflo ($24,$14,$19) 1061 mfhi ($25,$14,$19) 1062 addu $3,$24 1063 sltu $1,$3,$24 1064 multu ($15,$17) # mul_add_c(a[3],b[4],c2,c3,c1); 1065 addu $25,$1 1066 addu $7,$25 1067 sltu $1,$7,$25 1068 addu $2,$1 1069 mflo ($24,$15,$17) 1070 mfhi ($25,$15,$17) 1071 addu $3,$24 1072 sltu $1,$3,$24 1073 multu ($16,$11) # mul_add_c(a[4],b[3],c2,c3,c1); 1074 addu $25,$1 1075 addu $7,$25 1076 sltu $1,$7,$25 1077 addu $2,$1 1078 mflo ($24,$16,$11) 1079 mfhi ($25,$16,$11) 1080 addu $3,$24 1081 sltu $1,$3,$24 1082 multu ($18,$10) # mul_add_c(a[5],b[2],c2,c3,c1); 1083 addu $25,$1 1084 addu $7,$25 1085 sltu $1,$7,$25 1086 addu $2,$1 1087 mflo ($24,$18,$10) 1088 mfhi ($25,$18,$10) 1089 addu $3,$24 1090 sltu $1,$3,$24 1091 multu ($20,$9) # mul_add_c(a[6],b[1],c2,c3,c1); 1092 addu $25,$1 1093 addu $7,$25 1094 sltu $1,$7,$25 1095 addu $2,$1 1096 mflo ($24,$20,$9) 1097 mfhi ($25,$20,$9) 1098 addu $3,$24 1099 sltu $1,$3,$24 1100 multu ($5,$8) # mul_add_c(a[7],b[0],c2,c3,c1); 1101 addu $25,$1 1102 addu $7,$25 1103 sltu $1,$7,$25 1104 addu $2,$1 1105 mflo ($24,$5,$8) 1106 mfhi ($25,$5,$8) 1107 addu $3,$24 1108 sltu $1,$3,$24 1109 multu ($5,$9) # mul_add_c(a[7],b[1],c3,c1,c2); 1110 addu $25,$1 1111 addu $7,$25 1112 sltu $1,$7,$25 1113 addu $2,$1 1114 sw $3,7*4($4) # r[7]=c2; 1115 1116 mflo ($24,$5,$9) 1117 mfhi ($25,$5,$9) 1118 addu $7,$24 1119 sltu $1,$7,$24 1120 multu ($20,$10) # mul_add_c(a[6],b[2],c3,c1,c2); 1121 addu $25,$1 1122 addu $2,$25 1123 sltu $3,$2,$25 1124 mflo ($24,$20,$10) 1125 mfhi ($25,$20,$10) 1126 addu $7,$24 1127 sltu $1,$7,$24 1128 multu ($18,$11) # mul_add_c(a[5],b[3],c3,c1,c2); 1129 addu $25,$1 1130 addu $2,$25 1131 sltu $1,$2,$25 1132 addu $3,$1 1133 mflo ($24,$18,$11) 1134 mfhi ($25,$18,$11) 1135 addu $7,$24 1136 sltu $1,$7,$24 1137 multu ($16,$17) # mul_add_c(a[4],b[4],c3,c1,c2); 1138 addu $25,$1 1139 addu $2,$25 1140 sltu $1,$2,$25 1141 addu $3,$1 1142 mflo ($24,$16,$17) 1143 mfhi ($25,$16,$17) 1144 addu $7,$24 1145 sltu $1,$7,$24 1146 multu ($15,$19) # mul_add_c(a[3],b[5],c3,c1,c2); 1147 addu $25,$1 1148 addu $2,$25 1149 sltu $1,$2,$25 1150 addu $3,$1 1151 mflo ($24,$15,$19) 1152 mfhi ($25,$15,$19) 1153 addu $7,$24 1154 sltu $1,$7,$24 1155 multu ($14,$21) # mul_add_c(a[2],b[6],c3,c1,c2); 1156 addu $25,$1 1157 addu $2,$25 1158 sltu $1,$2,$25 1159 addu $3,$1 1160 mflo ($24,$14,$21) 1161 mfhi ($25,$14,$21) 1162 addu $7,$24 1163 sltu $1,$7,$24 1164 multu ($13,$6) # mul_add_c(a[1],b[7],c3,c1,c2); 1165 addu $25,$1 1166 addu $2,$25 1167 sltu $1,$2,$25 1168 addu $3,$1 1169 mflo ($24,$13,$6) 1170 mfhi ($25,$13,$6) 1171 addu $7,$24 1172 sltu $1,$7,$24 1173 multu ($14,$6) # mul_add_c(a[2],b[7],c1,c2,c3); 1174 addu $25,$1 1175 addu $2,$25 1176 sltu $1,$2,$25 1177 addu $3,$1 1178 sw $7,8*4($4) # r[8]=c3; 1179 1180 mflo ($24,$14,$6) 1181 mfhi ($25,$14,$6) 1182 addu $2,$24 1183 sltu $1,$2,$24 1184 multu ($15,$21) # mul_add_c(a[3],b[6],c1,c2,c3); 1185 addu $25,$1 1186 addu $3,$25 1187 sltu $7,$3,$25 1188 mflo ($24,$15,$21) 1189 mfhi ($25,$15,$21) 1190 addu $2,$24 1191 sltu $1,$2,$24 1192 multu ($16,$19) # mul_add_c(a[4],b[5],c1,c2,c3); 1193 addu $25,$1 1194 addu $3,$25 1195 sltu $1,$3,$25 1196 addu $7,$1 1197 mflo ($24,$16,$19) 1198 mfhi ($25,$16,$19) 1199 addu $2,$24 1200 sltu $1,$2,$24 1201 multu ($18,$17) # mul_add_c(a[5],b[4],c1,c2,c3); 1202 addu $25,$1 1203 addu $3,$25 1204 sltu $1,$3,$25 1205 addu $7,$1 1206 mflo ($24,$18,$17) 1207 mfhi ($25,$18,$17) 1208 addu $2,$24 1209 sltu $1,$2,$24 1210 multu ($20,$11) # mul_add_c(a[6],b[3],c1,c2,c3); 1211 addu $25,$1 1212 addu $3,$25 1213 sltu $1,$3,$25 1214 addu $7,$1 1215 mflo ($24,$20,$11) 1216 mfhi ($25,$20,$11) 1217 addu $2,$24 1218 sltu $1,$2,$24 1219 multu ($5,$10) # mul_add_c(a[7],b[2],c1,c2,c3); 1220 addu $25,$1 1221 addu $3,$25 1222 sltu $1,$3,$25 1223 addu $7,$1 1224 mflo ($24,$5,$10) 1225 mfhi ($25,$5,$10) 1226 addu $2,$24 1227 sltu $1,$2,$24 1228 multu ($5,$11) # mul_add_c(a[7],b[3],c2,c3,c1); 1229 addu $25,$1 1230 addu $3,$25 1231 sltu $1,$3,$25 1232 addu $7,$1 1233 sw $2,9*4($4) # r[9]=c1; 1234 1235 mflo ($24,$5,$11) 1236 mfhi ($25,$5,$11) 1237 addu $3,$24 1238 sltu $1,$3,$24 1239 multu ($20,$17) # mul_add_c(a[6],b[4],c2,c3,c1); 1240 addu $25,$1 1241 addu $7,$25 1242 sltu $2,$7,$25 1243 mflo ($24,$20,$17) 1244 mfhi ($25,$20,$17) 1245 addu $3,$24 1246 sltu $1,$3,$24 1247 multu ($18,$19) # mul_add_c(a[5],b[5],c2,c3,c1); 1248 addu $25,$1 1249 addu $7,$25 1250 sltu $1,$7,$25 1251 addu $2,$1 1252 mflo ($24,$18,$19) 1253 mfhi ($25,$18,$19) 1254 addu $3,$24 1255 sltu $1,$3,$24 1256 multu ($16,$21) # mul_add_c(a[4],b[6],c2,c3,c1); 1257 addu $25,$1 1258 addu $7,$25 1259 sltu $1,$7,$25 1260 addu $2,$1 1261 mflo ($24,$16,$21) 1262 mfhi ($25,$16,$21) 1263 addu $3,$24 1264 sltu $1,$3,$24 1265 multu ($15,$6) # mul_add_c(a[3],b[7],c2,c3,c1); 1266 addu $25,$1 1267 addu $7,$25 1268 sltu $1,$7,$25 1269 addu $2,$1 1270 mflo ($24,$15,$6) 1271 mfhi ($25,$15,$6) 1272 addu $3,$24 1273 sltu $1,$3,$24 1274 multu ($16,$6) # mul_add_c(a[4],b[7],c3,c1,c2); 1275 addu $25,$1 1276 addu $7,$25 1277 sltu $1,$7,$25 1278 addu $2,$1 1279 sw $3,10*4($4) # r[10]=c2; 1280 1281 mflo ($24,$16,$6) 1282 mfhi ($25,$16,$6) 1283 addu $7,$24 1284 sltu $1,$7,$24 1285 multu ($18,$21) # mul_add_c(a[5],b[6],c3,c1,c2); 1286 addu $25,$1 1287 addu $2,$25 1288 sltu $3,$2,$25 1289 mflo ($24,$18,$21) 1290 mfhi ($25,$18,$21) 1291 addu $7,$24 1292 sltu $1,$7,$24 1293 multu ($20,$19) # mul_add_c(a[6],b[5],c3,c1,c2); 1294 addu $25,$1 1295 addu $2,$25 1296 sltu $1,$2,$25 1297 addu $3,$1 1298 mflo ($24,$20,$19) 1299 mfhi ($25,$20,$19) 1300 addu $7,$24 1301 sltu $1,$7,$24 1302 multu ($5,$17) # mul_add_c(a[7],b[4],c3,c1,c2); 1303 addu $25,$1 1304 addu $2,$25 1305 sltu $1,$2,$25 1306 addu $3,$1 1307 mflo ($24,$5,$17) 1308 mfhi ($25,$5,$17) 1309 addu $7,$24 1310 sltu $1,$7,$24 1311 multu ($5,$19) # mul_add_c(a[7],b[5],c1,c2,c3); 1312 addu $25,$1 1313 addu $2,$25 1314 sltu $1,$2,$25 1315 addu $3,$1 1316 sw $7,11*4($4) # r[11]=c3; 1317 1318 mflo ($24,$5,$19) 1319 mfhi ($25,$5,$19) 1320 addu $2,$24 1321 sltu $1,$2,$24 1322 multu ($20,$21) # mul_add_c(a[6],b[6],c1,c2,c3); 1323 addu $25,$1 1324 addu $3,$25 1325 sltu $7,$3,$25 1326 mflo ($24,$20,$21) 1327 mfhi ($25,$20,$21) 1328 addu $2,$24 1329 sltu $1,$2,$24 1330 multu ($18,$6) # mul_add_c(a[5],b[7],c1,c2,c3); 1331 addu $25,$1 1332 addu $3,$25 1333 sltu $1,$3,$25 1334 addu $7,$1 1335 mflo ($24,$18,$6) 1336 mfhi ($25,$18,$6) 1337 addu $2,$24 1338 sltu $1,$2,$24 1339 multu ($20,$6) # mul_add_c(a[6],b[7],c2,c3,c1); 1340 addu $25,$1 1341 addu $3,$25 1342 sltu $1,$3,$25 1343 addu $7,$1 1344 sw $2,12*4($4) # r[12]=c1; 1345 1346 mflo ($24,$20,$6) 1347 mfhi ($25,$20,$6) 1348 addu $3,$24 1349 sltu $1,$3,$24 1350 multu ($5,$21) # mul_add_c(a[7],b[6],c2,c3,c1); 1351 addu $25,$1 1352 addu $7,$25 1353 sltu $2,$7,$25 1354 mflo ($24,$5,$21) 1355 mfhi ($25,$5,$21) 1356 addu $3,$24 1357 sltu $1,$3,$24 1358 multu ($5,$6) # mul_add_c(a[7],b[7],c3,c1,c2); 1359 addu $25,$1 1360 addu $7,$25 1361 sltu $1,$7,$25 1362 addu $2,$1 1363 sw $3,13*4($4) # r[13]=c2; 1364 1365 mflo ($24,$5,$6) 1366 mfhi ($25,$5,$6) 1367 addu $7,$24 1368 sltu $1,$7,$24 1369 addu $25,$1 1370 addu $2,$25 1371 sw $7,14*4($4) # r[14]=c3; 1372 sw $2,15*4($4) # r[15]=c1; 1373 1374 .set noreorder 1375 lw $21,5*4($29) 1376 lw $20,4*4($29) 1377 lw $19,3*4($29) 1378 lw $18,2*4($29) 1379 lw $17,1*4($29) 1380 lw $16,0*4($29) 1381 jr $31 1382 addu $29,6*4 1383.end bn_mul_comba8 1384 1385.align 5 1386.globl bn_mul_comba4 1387.ent bn_mul_comba4 1388bn_mul_comba4: 1389 .set reorder 1390 lw $12,0($5) 1391 lw $8,0($6) 1392 lw $13,4($5) 1393 lw $14,2*4($5) 1394 multu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); 1395 lw $15,3*4($5) 1396 lw $9,4($6) 1397 lw $10,2*4($6) 1398 lw $11,3*4($6) 1399 mflo ($2,$12,$8) 1400 mfhi ($3,$12,$8) 1401 sw $2,0($4) 1402 1403 multu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); 1404 mflo ($24,$12,$9) 1405 mfhi ($25,$12,$9) 1406 addu $3,$24 1407 sltu $1,$3,$24 1408 multu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); 1409 addu $7,$25,$1 1410 mflo ($24,$13,$8) 1411 mfhi ($25,$13,$8) 1412 addu $3,$24 1413 sltu $1,$3,$24 1414 multu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); 1415 addu $25,$1 1416 addu $7,$25 1417 sltu $2,$7,$25 1418 sw $3,4($4) 1419 1420 mflo ($24,$14,$8) 1421 mfhi ($25,$14,$8) 1422 addu $7,$24 1423 sltu $1,$7,$24 1424 multu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); 1425 addu $25,$1 1426 addu $2,$25 1427 mflo ($24,$13,$9) 1428 mfhi ($25,$13,$9) 1429 addu $7,$24 1430 sltu $1,$7,$24 1431 multu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); 1432 addu $25,$1 1433 addu $2,$25 1434 sltu $3,$2,$25 1435 mflo ($24,$12,$10) 1436 mfhi ($25,$12,$10) 1437 addu $7,$24 1438 sltu $1,$7,$24 1439 multu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); 1440 addu $25,$1 1441 addu $2,$25 1442 sltu $1,$2,$25 1443 addu $3,$1 1444 sw $7,2*4($4) 1445 1446 mflo ($24,$12,$11) 1447 mfhi ($25,$12,$11) 1448 addu $2,$24 1449 sltu $1,$2,$24 1450 multu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); 1451 addu $25,$1 1452 addu $3,$25 1453 sltu $7,$3,$25 1454 mflo ($24,$13,$10) 1455 mfhi ($25,$13,$10) 1456 addu $2,$24 1457 sltu $1,$2,$24 1458 multu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); 1459 addu $25,$1 1460 addu $3,$25 1461 sltu $1,$3,$25 1462 addu $7,$1 1463 mflo ($24,$14,$9) 1464 mfhi ($25,$14,$9) 1465 addu $2,$24 1466 sltu $1,$2,$24 1467 multu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); 1468 addu $25,$1 1469 addu $3,$25 1470 sltu $1,$3,$25 1471 addu $7,$1 1472 mflo ($24,$15,$8) 1473 mfhi ($25,$15,$8) 1474 addu $2,$24 1475 sltu $1,$2,$24 1476 multu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); 1477 addu $25,$1 1478 addu $3,$25 1479 sltu $1,$3,$25 1480 addu $7,$1 1481 sw $2,3*4($4) 1482 1483 mflo ($24,$15,$9) 1484 mfhi ($25,$15,$9) 1485 addu $3,$24 1486 sltu $1,$3,$24 1487 multu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); 1488 addu $25,$1 1489 addu $7,$25 1490 sltu $2,$7,$25 1491 mflo ($24,$14,$10) 1492 mfhi ($25,$14,$10) 1493 addu $3,$24 1494 sltu $1,$3,$24 1495 multu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); 1496 addu $25,$1 1497 addu $7,$25 1498 sltu $1,$7,$25 1499 addu $2,$1 1500 mflo ($24,$13,$11) 1501 mfhi ($25,$13,$11) 1502 addu $3,$24 1503 sltu $1,$3,$24 1504 multu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); 1505 addu $25,$1 1506 addu $7,$25 1507 sltu $1,$7,$25 1508 addu $2,$1 1509 sw $3,4*4($4) 1510 1511 mflo ($24,$14,$11) 1512 mfhi ($25,$14,$11) 1513 addu $7,$24 1514 sltu $1,$7,$24 1515 multu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); 1516 addu $25,$1 1517 addu $2,$25 1518 sltu $3,$2,$25 1519 mflo ($24,$15,$10) 1520 mfhi ($25,$15,$10) 1521 addu $7,$24 1522 sltu $1,$7,$24 1523 multu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); 1524 addu $25,$1 1525 addu $2,$25 1526 sltu $1,$2,$25 1527 addu $3,$1 1528 sw $7,5*4($4) 1529 1530 mflo ($24,$15,$11) 1531 mfhi ($25,$15,$11) 1532 addu $2,$24 1533 sltu $1,$2,$24 1534 addu $25,$1 1535 addu $3,$25 1536 sw $2,6*4($4) 1537 sw $3,7*4($4) 1538 1539 .set noreorder 1540 jr $31 1541 nop 1542.end bn_mul_comba4 1543 1544.align 5 1545.globl bn_sqr_comba8 1546.ent bn_sqr_comba8 1547bn_sqr_comba8: 1548 .set reorder 1549 lw $12,0($5) 1550 lw $13,4($5) 1551 lw $14,2*4($5) 1552 lw $15,3*4($5) 1553 1554 multu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); 1555 lw $8,4*4($5) 1556 lw $9,5*4($5) 1557 lw $10,6*4($5) 1558 lw $11,7*4($5) 1559 mflo ($2,$12,$12) 1560 mfhi ($3,$12,$12) 1561 sw $2,0($4) 1562 1563 multu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); 1564 mflo ($24,$12,$13) 1565 mfhi ($25,$12,$13) 1566 slt $2,$25,$0 1567 sll $25,1 1568 multu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); 1569 slt $6,$24,$0 1570 addu $25,$6 1571 sll $24,1 1572 addu $3,$24 1573 sltu $1,$3,$24 1574 addu $7,$25,$1 1575 sw $3,4($4) 1576 mflo ($24,$14,$12) 1577 mfhi ($25,$14,$12) 1578 addu $7,$24 1579 sltu $1,$7,$24 1580 multu ($13,$13) # forward multiplication 1581 addu $7,$24 1582 addu $1,$25 1583 sltu $24,$7,$24 1584 addu $2,$1 1585 addu $25,$24 1586 sltu $3,$2,$1 1587 addu $2,$25 1588 sltu $25,$2,$25 1589 addu $3,$25 1590 mflo ($24,$13,$13) 1591 mfhi ($25,$13,$13) 1592 addu $7,$24 1593 sltu $1,$7,$24 1594 multu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); 1595 addu $25,$1 1596 addu $2,$25 1597 sltu $1,$2,$25 1598 addu $3,$1 1599 sw $7,2*4($4) 1600 mflo ($24,$12,$15) 1601 mfhi ($25,$12,$15) 1602 addu $2,$24 1603 sltu $1,$2,$24 1604 multu ($13,$14) # forward multiplication 1605 addu $2,$24 1606 addu $1,$25 1607 sltu $24,$2,$24 1608 addu $3,$1 1609 addu $25,$24 1610 sltu $7,$3,$1 1611 addu $3,$25 1612 sltu $25,$3,$25 1613 addu $7,$25 1614 mflo ($24,$13,$14) 1615 mfhi ($25,$13,$14) 1616 addu $2,$24 1617 sltu $1,$2,$24 1618 multu ($8,$12) # forward multiplication 1619 addu $2,$24 1620 addu $1,$25 1621 sltu $24,$2,$24 1622 addu $3,$1 1623 addu $25,$24 1624 sltu $1,$3,$1 1625 addu $3,$25 1626 addu $7,$1 1627 sltu $25,$3,$25 1628 addu $7,$25 1629 mflo ($24,$8,$12) 1630 mfhi ($25,$8,$12) 1631 sw $2,3*4($4) 1632 addu $3,$24 1633 sltu $1,$3,$24 1634 multu ($15,$13) # forward multiplication 1635 addu $3,$24 1636 addu $1,$25 1637 sltu $24,$3,$24 1638 addu $7,$1 1639 addu $25,$24 1640 sltu $2,$7,$1 1641 addu $7,$25 1642 sltu $25,$7,$25 1643 addu $2,$25 1644 mflo ($24,$15,$13) 1645 mfhi ($25,$15,$13) 1646 addu $3,$24 1647 sltu $1,$3,$24 1648 multu ($14,$14) # forward multiplication 1649 addu $3,$24 1650 addu $1,$25 1651 sltu $24,$3,$24 1652 addu $7,$1 1653 addu $25,$24 1654 sltu $1,$7,$1 1655 addu $7,$25 1656 addu $2,$1 1657 sltu $25,$7,$25 1658 addu $2,$25 1659 mflo ($24,$14,$14) 1660 mfhi ($25,$14,$14) 1661 addu $3,$24 1662 sltu $1,$3,$24 1663 multu ($12,$9) # mul_add_c2(a[0],b[5],c3,c1,c2); 1664 addu $25,$1 1665 addu $7,$25 1666 sltu $1,$7,$25 1667 addu $2,$1 1668 sw $3,4*4($4) 1669 mflo ($24,$12,$9) 1670 mfhi ($25,$12,$9) 1671 addu $7,$24 1672 sltu $1,$7,$24 1673 multu ($13,$8) # forward multiplication 1674 addu $7,$24 1675 addu $1,$25 1676 sltu $24,$7,$24 1677 addu $2,$1 1678 addu $25,$24 1679 sltu $3,$2,$1 1680 addu $2,$25 1681 sltu $25,$2,$25 1682 addu $3,$25 1683 mflo ($24,$13,$8) 1684 mfhi ($25,$13,$8) 1685 addu $7,$24 1686 sltu $1,$7,$24 1687 multu ($14,$15) # forward multiplication 1688 addu $7,$24 1689 addu $1,$25 1690 sltu $24,$7,$24 1691 addu $2,$1 1692 addu $25,$24 1693 sltu $1,$2,$1 1694 addu $2,$25 1695 addu $3,$1 1696 sltu $25,$2,$25 1697 addu $3,$25 1698 mflo ($24,$14,$15) 1699 mfhi ($25,$14,$15) 1700 addu $7,$24 1701 sltu $1,$7,$24 1702 multu ($10,$12) # forward multiplication 1703 addu $7,$24 1704 addu $1,$25 1705 sltu $24,$7,$24 1706 addu $2,$1 1707 addu $25,$24 1708 sltu $1,$2,$1 1709 addu $2,$25 1710 addu $3,$1 1711 sltu $25,$2,$25 1712 addu $3,$25 1713 mflo ($24,$10,$12) 1714 mfhi ($25,$10,$12) 1715 sw $7,5*4($4) 1716 addu $2,$24 1717 sltu $1,$2,$24 1718 multu ($9,$13) # forward multiplication 1719 addu $2,$24 1720 addu $1,$25 1721 sltu $24,$2,$24 1722 addu $3,$1 1723 addu $25,$24 1724 sltu $7,$3,$1 1725 addu $3,$25 1726 sltu $25,$3,$25 1727 addu $7,$25 1728 mflo ($24,$9,$13) 1729 mfhi ($25,$9,$13) 1730 addu $2,$24 1731 sltu $1,$2,$24 1732 multu ($8,$14) # forward multiplication 1733 addu $2,$24 1734 addu $1,$25 1735 sltu $24,$2,$24 1736 addu $3,$1 1737 addu $25,$24 1738 sltu $1,$3,$1 1739 addu $3,$25 1740 addu $7,$1 1741 sltu $25,$3,$25 1742 addu $7,$25 1743 mflo ($24,$8,$14) 1744 mfhi ($25,$8,$14) 1745 addu $2,$24 1746 sltu $1,$2,$24 1747 multu ($15,$15) # forward multiplication 1748 addu $2,$24 1749 addu $1,$25 1750 sltu $24,$2,$24 1751 addu $3,$1 1752 addu $25,$24 1753 sltu $1,$3,$1 1754 addu $3,$25 1755 addu $7,$1 1756 sltu $25,$3,$25 1757 addu $7,$25 1758 mflo ($24,$15,$15) 1759 mfhi ($25,$15,$15) 1760 addu $2,$24 1761 sltu $1,$2,$24 1762 multu ($12,$11) # mul_add_c2(a[0],b[7],c2,c3,c1); 1763 addu $25,$1 1764 addu $3,$25 1765 sltu $1,$3,$25 1766 addu $7,$1 1767 sw $2,6*4($4) 1768 mflo ($24,$12,$11) 1769 mfhi ($25,$12,$11) 1770 addu $3,$24 1771 sltu $1,$3,$24 1772 multu ($13,$10) # forward multiplication 1773 addu $3,$24 1774 addu $1,$25 1775 sltu $24,$3,$24 1776 addu $7,$1 1777 addu $25,$24 1778 sltu $2,$7,$1 1779 addu $7,$25 1780 sltu $25,$7,$25 1781 addu $2,$25 1782 mflo ($24,$13,$10) 1783 mfhi ($25,$13,$10) 1784 addu $3,$24 1785 sltu $1,$3,$24 1786 multu ($14,$9) # forward multiplication 1787 addu $3,$24 1788 addu $1,$25 1789 sltu $24,$3,$24 1790 addu $7,$1 1791 addu $25,$24 1792 sltu $1,$7,$1 1793 addu $7,$25 1794 addu $2,$1 1795 sltu $25,$7,$25 1796 addu $2,$25 1797 mflo ($24,$14,$9) 1798 mfhi ($25,$14,$9) 1799 addu $3,$24 1800 sltu $1,$3,$24 1801 multu ($15,$8) # forward multiplication 1802 addu $3,$24 1803 addu $1,$25 1804 sltu $24,$3,$24 1805 addu $7,$1 1806 addu $25,$24 1807 sltu $1,$7,$1 1808 addu $7,$25 1809 addu $2,$1 1810 sltu $25,$7,$25 1811 addu $2,$25 1812 mflo ($24,$15,$8) 1813 mfhi ($25,$15,$8) 1814 addu $3,$24 1815 sltu $1,$3,$24 1816 multu ($11,$13) # forward multiplication 1817 addu $3,$24 1818 addu $1,$25 1819 sltu $24,$3,$24 1820 addu $7,$1 1821 addu $25,$24 1822 sltu $1,$7,$1 1823 addu $7,$25 1824 addu $2,$1 1825 sltu $25,$7,$25 1826 addu $2,$25 1827 mflo ($24,$11,$13) 1828 mfhi ($25,$11,$13) 1829 sw $3,7*4($4) 1830 addu $7,$24 1831 sltu $1,$7,$24 1832 multu ($10,$14) # forward multiplication 1833 addu $7,$24 1834 addu $1,$25 1835 sltu $24,$7,$24 1836 addu $2,$1 1837 addu $25,$24 1838 sltu $3,$2,$1 1839 addu $2,$25 1840 sltu $25,$2,$25 1841 addu $3,$25 1842 mflo ($24,$10,$14) 1843 mfhi ($25,$10,$14) 1844 addu $7,$24 1845 sltu $1,$7,$24 1846 multu ($9,$15) # forward multiplication 1847 addu $7,$24 1848 addu $1,$25 1849 sltu $24,$7,$24 1850 addu $2,$1 1851 addu $25,$24 1852 sltu $1,$2,$1 1853 addu $2,$25 1854 addu $3,$1 1855 sltu $25,$2,$25 1856 addu $3,$25 1857 mflo ($24,$9,$15) 1858 mfhi ($25,$9,$15) 1859 addu $7,$24 1860 sltu $1,$7,$24 1861 multu ($8,$8) # forward multiplication 1862 addu $7,$24 1863 addu $1,$25 1864 sltu $24,$7,$24 1865 addu $2,$1 1866 addu $25,$24 1867 sltu $1,$2,$1 1868 addu $2,$25 1869 addu $3,$1 1870 sltu $25,$2,$25 1871 addu $3,$25 1872 mflo ($24,$8,$8) 1873 mfhi ($25,$8,$8) 1874 addu $7,$24 1875 sltu $1,$7,$24 1876 multu ($14,$11) # mul_add_c2(a[2],b[7],c1,c2,c3); 1877 addu $25,$1 1878 addu $2,$25 1879 sltu $1,$2,$25 1880 addu $3,$1 1881 sw $7,8*4($4) 1882 mflo ($24,$14,$11) 1883 mfhi ($25,$14,$11) 1884 addu $2,$24 1885 sltu $1,$2,$24 1886 multu ($15,$10) # forward multiplication 1887 addu $2,$24 1888 addu $1,$25 1889 sltu $24,$2,$24 1890 addu $3,$1 1891 addu $25,$24 1892 sltu $7,$3,$1 1893 addu $3,$25 1894 sltu $25,$3,$25 1895 addu $7,$25 1896 mflo ($24,$15,$10) 1897 mfhi ($25,$15,$10) 1898 addu $2,$24 1899 sltu $1,$2,$24 1900 multu ($8,$9) # forward multiplication 1901 addu $2,$24 1902 addu $1,$25 1903 sltu $24,$2,$24 1904 addu $3,$1 1905 addu $25,$24 1906 sltu $1,$3,$1 1907 addu $3,$25 1908 addu $7,$1 1909 sltu $25,$3,$25 1910 addu $7,$25 1911 mflo ($24,$8,$9) 1912 mfhi ($25,$8,$9) 1913 addu $2,$24 1914 sltu $1,$2,$24 1915 multu ($11,$15) # forward multiplication 1916 addu $2,$24 1917 addu $1,$25 1918 sltu $24,$2,$24 1919 addu $3,$1 1920 addu $25,$24 1921 sltu $1,$3,$1 1922 addu $3,$25 1923 addu $7,$1 1924 sltu $25,$3,$25 1925 addu $7,$25 1926 mflo ($24,$11,$15) 1927 mfhi ($25,$11,$15) 1928 sw $2,9*4($4) 1929 addu $3,$24 1930 sltu $1,$3,$24 1931 multu ($10,$8) # forward multiplication 1932 addu $3,$24 1933 addu $1,$25 1934 sltu $24,$3,$24 1935 addu $7,$1 1936 addu $25,$24 1937 sltu $2,$7,$1 1938 addu $7,$25 1939 sltu $25,$7,$25 1940 addu $2,$25 1941 mflo ($24,$10,$8) 1942 mfhi ($25,$10,$8) 1943 addu $3,$24 1944 sltu $1,$3,$24 1945 multu ($9,$9) # forward multiplication 1946 addu $3,$24 1947 addu $1,$25 1948 sltu $24,$3,$24 1949 addu $7,$1 1950 addu $25,$24 1951 sltu $1,$7,$1 1952 addu $7,$25 1953 addu $2,$1 1954 sltu $25,$7,$25 1955 addu $2,$25 1956 mflo ($24,$9,$9) 1957 mfhi ($25,$9,$9) 1958 addu $3,$24 1959 sltu $1,$3,$24 1960 multu ($8,$11) # mul_add_c2(a[4],b[7],c3,c1,c2); 1961 addu $25,$1 1962 addu $7,$25 1963 sltu $1,$7,$25 1964 addu $2,$1 1965 sw $3,10*4($4) 1966 mflo ($24,$8,$11) 1967 mfhi ($25,$8,$11) 1968 addu $7,$24 1969 sltu $1,$7,$24 1970 multu ($9,$10) # forward multiplication 1971 addu $7,$24 1972 addu $1,$25 1973 sltu $24,$7,$24 1974 addu $2,$1 1975 addu $25,$24 1976 sltu $3,$2,$1 1977 addu $2,$25 1978 sltu $25,$2,$25 1979 addu $3,$25 1980 mflo ($24,$9,$10) 1981 mfhi ($25,$9,$10) 1982 addu $7,$24 1983 sltu $1,$7,$24 1984 multu ($11,$9) # forward multiplication 1985 addu $7,$24 1986 addu $1,$25 1987 sltu $24,$7,$24 1988 addu $2,$1 1989 addu $25,$24 1990 sltu $1,$2,$1 1991 addu $2,$25 1992 addu $3,$1 1993 sltu $25,$2,$25 1994 addu $3,$25 1995 mflo ($24,$11,$9) 1996 mfhi ($25,$11,$9) 1997 sw $7,11*4($4) 1998 addu $2,$24 1999 sltu $1,$2,$24 2000 multu ($10,$10) # forward multiplication 2001 addu $2,$24 2002 addu $1,$25 2003 sltu $24,$2,$24 2004 addu $3,$1 2005 addu $25,$24 2006 sltu $7,$3,$1 2007 addu $3,$25 2008 sltu $25,$3,$25 2009 addu $7,$25 2010 mflo ($24,$10,$10) 2011 mfhi ($25,$10,$10) 2012 addu $2,$24 2013 sltu $1,$2,$24 2014 multu ($10,$11) # mul_add_c2(a[6],b[7],c2,c3,c1); 2015 addu $25,$1 2016 addu $3,$25 2017 sltu $1,$3,$25 2018 addu $7,$1 2019 sw $2,12*4($4) 2020 mflo ($24,$10,$11) 2021 mfhi ($25,$10,$11) 2022 addu $3,$24 2023 sltu $1,$3,$24 2024 multu ($11,$11) # forward multiplication 2025 addu $3,$24 2026 addu $1,$25 2027 sltu $24,$3,$24 2028 addu $7,$1 2029 addu $25,$24 2030 sltu $2,$7,$1 2031 addu $7,$25 2032 sltu $25,$7,$25 2033 addu $2,$25 2034 mflo ($24,$11,$11) 2035 mfhi ($25,$11,$11) 2036 sw $3,13*4($4) 2037 2038 addu $7,$24 2039 sltu $1,$7,$24 2040 addu $25,$1 2041 addu $2,$25 2042 sw $7,14*4($4) 2043 sw $2,15*4($4) 2044 2045 .set noreorder 2046 jr $31 2047 nop 2048.end bn_sqr_comba8 2049 2050.align 5 2051.globl bn_sqr_comba4 2052.ent bn_sqr_comba4 2053bn_sqr_comba4: 2054 .set reorder 2055 lw $12,0($5) 2056 lw $13,4($5) 2057 multu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); 2058 lw $14,2*4($5) 2059 lw $15,3*4($5) 2060 mflo ($2,$12,$12) 2061 mfhi ($3,$12,$12) 2062 sw $2,0($4) 2063 2064 multu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); 2065 mflo ($24,$12,$13) 2066 mfhi ($25,$12,$13) 2067 slt $2,$25,$0 2068 sll $25,1 2069 multu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); 2070 slt $6,$24,$0 2071 addu $25,$6 2072 sll $24,1 2073 addu $3,$24 2074 sltu $1,$3,$24 2075 addu $7,$25,$1 2076 sw $3,4($4) 2077 mflo ($24,$14,$12) 2078 mfhi ($25,$14,$12) 2079 addu $7,$24 2080 sltu $1,$7,$24 2081 multu ($13,$13) # forward multiplication 2082 addu $7,$24 2083 addu $1,$25 2084 sltu $24,$7,$24 2085 addu $2,$1 2086 addu $25,$24 2087 sltu $3,$2,$1 2088 addu $2,$25 2089 sltu $25,$2,$25 2090 addu $3,$25 2091 mflo ($24,$13,$13) 2092 mfhi ($25,$13,$13) 2093 addu $7,$24 2094 sltu $1,$7,$24 2095 multu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); 2096 addu $25,$1 2097 addu $2,$25 2098 sltu $1,$2,$25 2099 addu $3,$1 2100 sw $7,2*4($4) 2101 mflo ($24,$12,$15) 2102 mfhi ($25,$12,$15) 2103 addu $2,$24 2104 sltu $1,$2,$24 2105 multu ($13,$14) # forward multiplication 2106 addu $2,$24 2107 addu $1,$25 2108 sltu $24,$2,$24 2109 addu $3,$1 2110 addu $25,$24 2111 sltu $7,$3,$1 2112 addu $3,$25 2113 sltu $25,$3,$25 2114 addu $7,$25 2115 mflo ($24,$13,$14) 2116 mfhi ($25,$13,$14) 2117 addu $2,$24 2118 sltu $1,$2,$24 2119 multu ($15,$13) # forward multiplication 2120 addu $2,$24 2121 addu $1,$25 2122 sltu $24,$2,$24 2123 addu $3,$1 2124 addu $25,$24 2125 sltu $1,$3,$1 2126 addu $3,$25 2127 addu $7,$1 2128 sltu $25,$3,$25 2129 addu $7,$25 2130 mflo ($24,$15,$13) 2131 mfhi ($25,$15,$13) 2132 sw $2,3*4($4) 2133 addu $3,$24 2134 sltu $1,$3,$24 2135 multu ($14,$14) # forward multiplication 2136 addu $3,$24 2137 addu $1,$25 2138 sltu $24,$3,$24 2139 addu $7,$1 2140 addu $25,$24 2141 sltu $2,$7,$1 2142 addu $7,$25 2143 sltu $25,$7,$25 2144 addu $2,$25 2145 mflo ($24,$14,$14) 2146 mfhi ($25,$14,$14) 2147 addu $3,$24 2148 sltu $1,$3,$24 2149 multu ($14,$15) # mul_add_c2(a[2],b[3],c3,c1,c2); 2150 addu $25,$1 2151 addu $7,$25 2152 sltu $1,$7,$25 2153 addu $2,$1 2154 sw $3,4*4($4) 2155 mflo ($24,$14,$15) 2156 mfhi ($25,$14,$15) 2157 addu $7,$24 2158 sltu $1,$7,$24 2159 multu ($15,$15) # forward multiplication 2160 addu $7,$24 2161 addu $1,$25 2162 sltu $24,$7,$24 2163 addu $2,$1 2164 addu $25,$24 2165 sltu $3,$2,$1 2166 addu $2,$25 2167 sltu $25,$2,$25 2168 addu $3,$25 2169 mflo ($24,$15,$15) 2170 mfhi ($25,$15,$15) 2171 sw $7,5*4($4) 2172 2173 addu $2,$24 2174 sltu $1,$2,$24 2175 addu $25,$1 2176 addu $3,$25 2177 sw $2,6*4($4) 2178 sw $3,7*4($4) 2179 2180 .set noreorder 2181 jr $31 2182 nop 2183.end bn_sqr_comba4 2184