1#!/usr/local/bin/perl 2# I have this in perl so I can use more usefull register names and then convert 3# them into alpha registers. 4# 5 6$d=&data(); 7$d =~ s/CC/0/g; 8$d =~ s/R1/1/g; 9$d =~ s/R2/2/g; 10$d =~ s/R3/3/g; 11$d =~ s/R4/4/g; 12$d =~ s/L1/5/g; 13$d =~ s/L2/6/g; 14$d =~ s/L3/7/g; 15$d =~ s/L4/8/g; 16$d =~ s/O1/22/g; 17$d =~ s/O2/23/g; 18$d =~ s/O3/24/g; 19$d =~ s/O4/25/g; 20$d =~ s/A1/20/g; 21$d =~ s/A2/21/g; 22$d =~ s/A3/27/g; 23$d =~ s/A4/28/g; 24if (0){ 25} 26 27print $d; 28 29sub data 30 { 31 local($data)=<<'EOF'; 32 33 # DEC Alpha assember 34 # The bn_div_words is actually gcc output but the other parts are hand done. 35 # Thanks to tzeruch@ceddec.com for sending me the gcc output for 36 # bn_div_words. 37 # I've gone back and re-done most of routines. 38 # The key thing to remeber for the 164 CPU is that while a 39 # multiply operation takes 8 cycles, another one can only be issued 40 # after 4 cycles have elapsed. I've done modification to help 41 # improve this. Also, normally, a ld instruction will not be available 42 # for about 3 cycles. 43 .file 1 "bn_asm.c" 44 .set noat 45gcc2_compiled.: 46__gnu_compiled_c: 47 .text 48 .align 3 49 .globl bn_mul_add_words 50 .ent bn_mul_add_words 51bn_mul_add_words: 52bn_mul_add_words..ng: 53 .frame $30,0,$26,0 54 .prologue 0 55 .align 5 56 subq $18,4,$18 57 bis $31,$31,$CC 58 blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code 59 ldq $A1,0($17) # 1 1 60 ldq $R1,0($16) # 1 1 61 .align 3 62$42: 63 mulq $A1,$19,$L1 # 1 2 1 ###### 64 ldq $A2,8($17) # 2 1 65 ldq $R2,8($16) # 2 1 66 umulh $A1,$19,$A1 # 1 2 ###### 67 ldq $A3,16($17) # 3 1 68 ldq $R3,16($16) # 3 1 69 mulq $A2,$19,$L2 # 2 2 1 ###### 70 ldq $A4,24($17) # 4 1 71 addq $R1,$L1,$R1 # 1 2 2 72 ldq $R4,24($16) # 4 1 73 umulh $A2,$19,$A2 # 2 2 ###### 74 cmpult $R1,$L1,$O1 # 1 2 3 1 75 addq $A1,$O1,$A1 # 1 3 1 76 addq $R1,$CC,$R1 # 1 2 3 1 77 mulq $A3,$19,$L3 # 3 2 1 ###### 78 cmpult $R1,$CC,$CC # 1 2 3 2 79 addq $R2,$L2,$R2 # 2 2 2 80 addq $A1,$CC,$CC # 1 3 2 81 cmpult $R2,$L2,$O2 # 2 2 3 1 82 addq $A2,$O2,$A2 # 2 3 1 83 umulh $A3,$19,$A3 # 3 2 ###### 84 addq $R2,$CC,$R2 # 2 2 3 1 85 cmpult $R2,$CC,$CC # 2 2 3 2 86 subq $18,4,$18 87 mulq $A4,$19,$L4 # 4 2 1 ###### 88 addq $A2,$CC,$CC # 2 3 2 89 addq $R3,$L3,$R3 # 3 2 2 90 addq $16,32,$16 91 cmpult $R3,$L3,$O3 # 3 2 3 1 92 stq $R1,-32($16) # 1 2 4 93 umulh $A4,$19,$A4 # 4 2 ###### 94 addq $A3,$O3,$A3 # 3 3 1 95 addq $R3,$CC,$R3 # 3 2 3 1 96 stq $R2,-24($16) # 2 2 4 97 cmpult $R3,$CC,$CC # 3 2 3 2 98 stq $R3,-16($16) # 3 2 4 99 addq $R4,$L4,$R4 # 4 2 2 100 addq $A3,$CC,$CC # 3 3 2 101 cmpult $R4,$L4,$O4 # 4 2 3 1 102 addq $17,32,$17 103 addq $A4,$O4,$A4 # 4 3 1 104 addq $R4,$CC,$R4 # 4 2 3 1 105 cmpult $R4,$CC,$CC # 4 2 3 2 106 stq $R4,-8($16) # 4 2 4 107 addq $A4,$CC,$CC # 4 3 2 108 blt $18,$43 109 110 ldq $A1,0($17) # 1 1 111 ldq $R1,0($16) # 1 1 112 113 br $42 114 115 .align 4 116$45: 117 ldq $A1,0($17) # 4 1 118 ldq $R1,0($16) # 4 1 119 mulq $A1,$19,$L1 # 4 2 1 120 subq $18,1,$18 121 addq $16,8,$16 122 addq $17,8,$17 123 umulh $A1,$19,$A1 # 4 2 124 addq $R1,$L1,$R1 # 4 2 2 125 cmpult $R1,$L1,$O1 # 4 2 3 1 126 addq $A1,$O1,$A1 # 4 3 1 127 addq $R1,$CC,$R1 # 4 2 3 1 128 cmpult $R1,$CC,$CC # 4 2 3 2 129 addq $A1,$CC,$CC # 4 3 2 130 stq $R1,-8($16) # 4 2 4 131 bgt $18,$45 132 ret $31,($26),1 # else exit 133 134 .align 4 135$43: 136 addq $18,4,$18 137 bgt $18,$45 # goto tail code 138 ret $31,($26),1 # else exit 139 140 .end bn_mul_add_words 141 .align 3 142 .globl bn_mul_words 143 .ent bn_mul_words 144bn_mul_words: 145bn_mul_words..ng: 146 .frame $30,0,$26,0 147 .prologue 0 148 .align 5 149 subq $18,4,$18 150 bis $31,$31,$CC 151 blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code 152 ldq $A1,0($17) # 1 1 153 .align 3 154$142: 155 156 mulq $A1,$19,$L1 # 1 2 1 ##### 157 ldq $A2,8($17) # 2 1 158 ldq $A3,16($17) # 3 1 159 umulh $A1,$19,$A1 # 1 2 ##### 160 ldq $A4,24($17) # 4 1 161 mulq $A2,$19,$L2 # 2 2 1 ##### 162 addq $L1,$CC,$L1 # 1 2 3 1 163 subq $18,4,$18 164 cmpult $L1,$CC,$CC # 1 2 3 2 165 umulh $A2,$19,$A2 # 2 2 ##### 166 addq $A1,$CC,$CC # 1 3 2 167 addq $17,32,$17 168 addq $L2,$CC,$L2 # 2 2 3 1 169 mulq $A3,$19,$L3 # 3 2 1 ##### 170 cmpult $L2,$CC,$CC # 2 2 3 2 171 addq $A2,$CC,$CC # 2 3 2 172 addq $16,32,$16 173 umulh $A3,$19,$A3 # 3 2 ##### 174 stq $L1,-32($16) # 1 2 4 175 mulq $A4,$19,$L4 # 4 2 1 ##### 176 addq $L3,$CC,$L3 # 3 2 3 1 177 stq $L2,-24($16) # 2 2 4 178 cmpult $L3,$CC,$CC # 3 2 3 2 179 umulh $A4,$19,$A4 # 4 2 ##### 180 addq $A3,$CC,$CC # 3 3 2 181 stq $L3,-16($16) # 3 2 4 182 addq $L4,$CC,$L4 # 4 2 3 1 183 cmpult $L4,$CC,$CC # 4 2 3 2 184 185 addq $A4,$CC,$CC # 4 3 2 186 187 stq $L4,-8($16) # 4 2 4 188 189 blt $18,$143 190 191 ldq $A1,0($17) # 1 1 192 193 br $142 194 195 .align 4 196$145: 197 ldq $A1,0($17) # 4 1 198 mulq $A1,$19,$L1 # 4 2 1 199 subq $18,1,$18 200 umulh $A1,$19,$A1 # 4 2 201 addq $L1,$CC,$L1 # 4 2 3 1 202 addq $16,8,$16 203 cmpult $L1,$CC,$CC # 4 2 3 2 204 addq $17,8,$17 205 addq $A1,$CC,$CC # 4 3 2 206 stq $L1,-8($16) # 4 2 4 207 208 bgt $18,$145 209 ret $31,($26),1 # else exit 210 211 .align 4 212$143: 213 addq $18,4,$18 214 bgt $18,$145 # goto tail code 215 ret $31,($26),1 # else exit 216 217 .end bn_mul_words 218 .align 3 219 .globl bn_sqr_words 220 .ent bn_sqr_words 221bn_sqr_words: 222bn_sqr_words..ng: 223 .frame $30,0,$26,0 224 .prologue 0 225 226 subq $18,4,$18 227 blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code 228 ldq $A1,0($17) # 1 1 229 .align 3 230$542: 231 mulq $A1,$A1,$L1 ###### 232 ldq $A2,8($17) # 1 1 233 subq $18,4 234 umulh $A1,$A1,$R1 ###### 235 ldq $A3,16($17) # 1 1 236 mulq $A2,$A2,$L2 ###### 237 ldq $A4,24($17) # 1 1 238 stq $L1,0($16) # r[0] 239 umulh $A2,$A2,$R2 ###### 240 stq $R1,8($16) # r[1] 241 mulq $A3,$A3,$L3 ###### 242 stq $L2,16($16) # r[0] 243 umulh $A3,$A3,$R3 ###### 244 stq $R2,24($16) # r[1] 245 mulq $A4,$A4,$L4 ###### 246 stq $L3,32($16) # r[0] 247 umulh $A4,$A4,$R4 ###### 248 stq $R3,40($16) # r[1] 249 250 addq $16,64,$16 251 addq $17,32,$17 252 stq $L4,-16($16) # r[0] 253 stq $R4,-8($16) # r[1] 254 255 blt $18,$543 256 ldq $A1,0($17) # 1 1 257 br $542 258 259$442: 260 ldq $A1,0($17) # a[0] 261 mulq $A1,$A1,$L1 # a[0]*w low part r2 262 addq $16,16,$16 263 addq $17,8,$17 264 subq $18,1,$18 265 umulh $A1,$A1,$R1 # a[0]*w high part r3 266 stq $L1,-16($16) # r[0] 267 stq $R1,-8($16) # r[1] 268 269 bgt $18,$442 270 ret $31,($26),1 # else exit 271 272 .align 4 273$543: 274 addq $18,4,$18 275 bgt $18,$442 # goto tail code 276 ret $31,($26),1 # else exit 277 .end bn_sqr_words 278 279 .align 3 280 .globl bn_add_words 281 .ent bn_add_words 282bn_add_words: 283bn_add_words..ng: 284 .frame $30,0,$26,0 285 .prologue 0 286 287 subq $19,4,$19 288 bis $31,$31,$CC # carry = 0 289 blt $19,$900 290 ldq $L1,0($17) # a[0] 291 ldq $R1,0($18) # b[1] 292 .align 3 293$901: 294 addq $R1,$L1,$R1 # r=a+b; 295 ldq $L2,8($17) # a[1] 296 cmpult $R1,$L1,$O1 # did we overflow? 297 ldq $R2,8($18) # b[1] 298 addq $R1,$CC,$R1 # c+= overflow 299 ldq $L3,16($17) # a[2] 300 cmpult $R1,$CC,$CC # overflow? 301 ldq $R3,16($18) # b[2] 302 addq $CC,$O1,$CC 303 ldq $L4,24($17) # a[3] 304 addq $R2,$L2,$R2 # r=a+b; 305 ldq $R4,24($18) # b[3] 306 cmpult $R2,$L2,$O2 # did we overflow? 307 addq $R3,$L3,$R3 # r=a+b; 308 addq $R2,$CC,$R2 # c+= overflow 309 cmpult $R3,$L3,$O3 # did we overflow? 310 cmpult $R2,$CC,$CC # overflow? 311 addq $R4,$L4,$R4 # r=a+b; 312 addq $CC,$O2,$CC 313 cmpult $R4,$L4,$O4 # did we overflow? 314 addq $R3,$CC,$R3 # c+= overflow 315 stq $R1,0($16) # r[0]=c 316 cmpult $R3,$CC,$CC # overflow? 317 stq $R2,8($16) # r[1]=c 318 addq $CC,$O3,$CC 319 stq $R3,16($16) # r[2]=c 320 addq $R4,$CC,$R4 # c+= overflow 321 subq $19,4,$19 # loop-- 322 cmpult $R4,$CC,$CC # overflow? 323 addq $17,32,$17 # a++ 324 addq $CC,$O4,$CC 325 stq $R4,24($16) # r[3]=c 326 addq $18,32,$18 # b++ 327 addq $16,32,$16 # r++ 328 329 blt $19,$900 330 ldq $L1,0($17) # a[0] 331 ldq $R1,0($18) # b[1] 332 br $901 333 .align 4 334$945: 335 ldq $L1,0($17) # a[0] 336 ldq $R1,0($18) # b[1] 337 addq $R1,$L1,$R1 # r=a+b; 338 subq $19,1,$19 # loop-- 339 addq $R1,$CC,$R1 # c+= overflow 340 addq $17,8,$17 # a++ 341 cmpult $R1,$L1,$O1 # did we overflow? 342 cmpult $R1,$CC,$CC # overflow? 343 addq $18,8,$18 # b++ 344 stq $R1,0($16) # r[0]=c 345 addq $CC,$O1,$CC 346 addq $16,8,$16 # r++ 347 348 bgt $19,$945 349 ret $31,($26),1 # else exit 350 351$900: 352 addq $19,4,$19 353 bgt $19,$945 # goto tail code 354 ret $31,($26),1 # else exit 355 .end bn_add_words 356 357 .align 3 358 .globl bn_sub_words 359 .ent bn_sub_words 360bn_sub_words: 361bn_sub_words..ng: 362 .frame $30,0,$26,0 363 .prologue 0 364 365 subq $19,4,$19 366 bis $31,$31,$CC # carry = 0 367 br $800 368 blt $19,$800 369 ldq $L1,0($17) # a[0] 370 ldq $R1,0($18) # b[1] 371 .align 3 372$801: 373 addq $R1,$L1,$R1 # r=a+b; 374 ldq $L2,8($17) # a[1] 375 cmpult $R1,$L1,$O1 # did we overflow? 376 ldq $R2,8($18) # b[1] 377 addq $R1,$CC,$R1 # c+= overflow 378 ldq $L3,16($17) # a[2] 379 cmpult $R1,$CC,$CC # overflow? 380 ldq $R3,16($18) # b[2] 381 addq $CC,$O1,$CC 382 ldq $L4,24($17) # a[3] 383 addq $R2,$L2,$R2 # r=a+b; 384 ldq $R4,24($18) # b[3] 385 cmpult $R2,$L2,$O2 # did we overflow? 386 addq $R3,$L3,$R3 # r=a+b; 387 addq $R2,$CC,$R2 # c+= overflow 388 cmpult $R3,$L3,$O3 # did we overflow? 389 cmpult $R2,$CC,$CC # overflow? 390 addq $R4,$L4,$R4 # r=a+b; 391 addq $CC,$O2,$CC 392 cmpult $R4,$L4,$O4 # did we overflow? 393 addq $R3,$CC,$R3 # c+= overflow 394 stq $R1,0($16) # r[0]=c 395 cmpult $R3,$CC,$CC # overflow? 396 stq $R2,8($16) # r[1]=c 397 addq $CC,$O3,$CC 398 stq $R3,16($16) # r[2]=c 399 addq $R4,$CC,$R4 # c+= overflow 400 subq $19,4,$19 # loop-- 401 cmpult $R4,$CC,$CC # overflow? 402 addq $17,32,$17 # a++ 403 addq $CC,$O4,$CC 404 stq $R4,24($16) # r[3]=c 405 addq $18,32,$18 # b++ 406 addq $16,32,$16 # r++ 407 408 blt $19,$800 409 ldq $L1,0($17) # a[0] 410 ldq $R1,0($18) # b[1] 411 br $801 412 .align 4 413$845: 414 ldq $L1,0($17) # a[0] 415 ldq $R1,0($18) # b[1] 416 cmpult $L1,$R1,$O1 # will we borrow? 417 subq $L1,$R1,$R1 # r=a-b; 418 subq $19,1,$19 # loop-- 419 cmpult $R1,$CC,$O2 # will we borrow? 420 subq $R1,$CC,$R1 # c+= overflow 421 addq $17,8,$17 # a++ 422 addq $18,8,$18 # b++ 423 stq $R1,0($16) # r[0]=c 424 addq $O2,$O1,$CC 425 addq $16,8,$16 # r++ 426 427 bgt $19,$845 428 ret $31,($26),1 # else exit 429 430$800: 431 addq $19,4,$19 432 bgt $19,$845 # goto tail code 433 ret $31,($26),1 # else exit 434 .end bn_sub_words 435 436 # 437 # What follows was taken directly from the C compiler with a few 438 # hacks to redo the lables. 439 # 440.text 441 .align 3 442 .globl bn_div_words 443 .ent bn_div_words 444bn_div_words: 445 ldgp $29,0($27) 446bn_div_words..ng: 447 lda $30,-48($30) 448 .frame $30,48,$26,0 449 stq $26,0($30) 450 stq $9,8($30) 451 stq $10,16($30) 452 stq $11,24($30) 453 stq $12,32($30) 454 stq $13,40($30) 455 .mask 0x4003e00,-48 456 .prologue 1 457 bis $16,$16,$9 458 bis $17,$17,$10 459 bis $18,$18,$11 460 bis $31,$31,$13 461 bis $31,2,$12 462 bne $11,$119 463 lda $0,-1 464 br $31,$136 465 .align 4 466$119: 467 bis $11,$11,$16 468 jsr $26,BN_num_bits_word 469 ldgp $29,0($26) 470 subq $0,64,$1 471 beq $1,$120 472 bis $31,1,$1 473 sll $1,$0,$1 474 cmpule $9,$1,$1 475 bne $1,$120 476 # lda $16,_IO_stderr_ 477 # lda $17,$C32 478 # bis $0,$0,$18 479 # jsr $26,fprintf 480 # ldgp $29,0($26) 481 jsr $26,abort 482 ldgp $29,0($26) 483 .align 4 484$120: 485 bis $31,64,$3 486 cmpult $9,$11,$2 487 subq $3,$0,$1 488 addl $1,$31,$0 489 subq $9,$11,$1 490 cmoveq $2,$1,$9 491 beq $0,$122 492 zapnot $0,15,$2 493 subq $3,$0,$1 494 sll $11,$2,$11 495 sll $9,$2,$3 496 srl $10,$1,$1 497 sll $10,$2,$10 498 bis $3,$1,$9 499$122: 500 srl $11,32,$5 501 zapnot $11,15,$6 502 lda $7,-1 503 .align 5 504$123: 505 srl $9,32,$1 506 subq $1,$5,$1 507 bne $1,$126 508 zapnot $7,15,$27 509 br $31,$127 510 .align 4 511$126: 512 bis $9,$9,$24 513 bis $5,$5,$25 514 divqu $24,$25,$27 515$127: 516 srl $10,32,$4 517 .align 5 518$128: 519 mulq $27,$5,$1 520 subq $9,$1,$3 521 zapnot $3,240,$1 522 bne $1,$129 523 mulq $6,$27,$2 524 sll $3,32,$1 525 addq $1,$4,$1 526 cmpule $2,$1,$2 527 bne $2,$129 528 subq $27,1,$27 529 br $31,$128 530 .align 4 531$129: 532 mulq $27,$6,$1 533 mulq $27,$5,$4 534 srl $1,32,$3 535 sll $1,32,$1 536 addq $4,$3,$4 537 cmpult $10,$1,$2 538 subq $10,$1,$10 539 addq $2,$4,$2 540 cmpult $9,$2,$1 541 bis $2,$2,$4 542 beq $1,$134 543 addq $9,$11,$9 544 subq $27,1,$27 545$134: 546 subl $12,1,$12 547 subq $9,$4,$9 548 beq $12,$124 549 sll $27,32,$13 550 sll $9,32,$2 551 srl $10,32,$1 552 sll $10,32,$10 553 bis $2,$1,$9 554 br $31,$123 555 .align 4 556$124: 557 bis $13,$27,$0 558$136: 559 ldq $26,0($30) 560 ldq $9,8($30) 561 ldq $10,16($30) 562 ldq $11,24($30) 563 ldq $12,32($30) 564 ldq $13,40($30) 565 addq $30,48,$30 566 ret $31,($26),1 567 .end bn_div_words 568EOF 569 return($data); 570 } 571 572