1#ifdef __linux__ 2#include <asm/regdef.h> 3#else 4#include <asm.h> 5#include <regdef.h> 6#endif 7 8.text 9 10.set noat 11.set noreorder 12.globl gcm_gmult_4bit 13.align 4 14.ent gcm_gmult_4bit 15gcm_gmult_4bit: 16 .frame sp,0,ra 17 .prologue 0 18 19 ldq t11,8(a0) 20 ldq t10,0(a0) 21 22 bsr t0,picmeup 23 nop 24.align 4 25 extbl t11,7,a4 26 and a4,0xf0,a5 27 sll a4,4,a4 28 and a4,0xf0,a4 29 30 addq a4,a1,a4 31 ldq t9,8(a4) 32 addq a5,a1,a5 33 ldq t8,0(a4) 34 35 and t9,0x0f,t12 36 sll t8,60,t0 37 lda v0,6(zero) 38 extbl t11,6,a4 39 40 ldq t6,8(a5) 41 s8addq t12,AT,t12 42 ldq t5,0(a5) 43 srl t9,4,t9 44 45 ldq t7,0(t12) 46 srl t8,4,t8 47 xor t0,t9,t9 48 and a4,0xf0,a5 49 50 xor t6,t9,t9 51 sll a4,4,a4 52 xor t5,t8,t8 53 and a4,0xf0,a4 54 55 addq a4,a1,a4 56 ldq t4,8(a4) 57 addq a5,a1,a5 58 ldq t3,0(a4) 59 60.Looplo1: 61 and t9,0x0f,t12 62 sll t8,60,t0 63 subq v0,1,v0 64 srl t9,4,t9 65 66 ldq t6,8(a5) 67 xor t7,t8,t8 68 ldq t5,0(a5) 69 s8addq t12,AT,t12 70 71 ldq t7,0(t12) 72 srl t8,4,t8 73 xor t0,t9,t9 74 extbl t11,v0,a4 75 76 and a4,0xf0,a5 77 xor t3,t8,t8 78 xor t4,t9,t9 79 sll a4,4,a4 80 81 82 and t9,0x0f,t12 83 sll t8,60,t0 84 and a4,0xf0,a4 85 srl t9,4,t9 86 87 s8addq t12,AT,t12 88 xor t7,t8,t8 89 addq a4,a1,a4 90 addq a5,a1,a5 91 92 ldq t7,0(t12) 93 srl t8,4,t8 94 ldq t4,8(a4) 95 xor t0,t9,t9 96 97 xor t6,t9,t9 98 xor t5,t8,t8 99 ldq t3,0(a4) 100 bne v0,.Looplo1 101 102 103 and t9,0x0f,t12 104 sll t8,60,t0 105 lda v0,7(zero) 106 srl t9,4,t9 107 108 ldq t6,8(a5) 109 xor t7,t8,t8 110 ldq t5,0(a5) 111 s8addq t12,AT,t12 112 113 ldq t7,0(t12) 114 srl t8,4,t8 115 xor t0,t9,t9 116 extbl t10,v0,a4 117 118 and a4,0xf0,a5 119 xor t3,t8,t8 120 xor t4,t9,t9 121 sll a4,4,a4 122 123 and t9,0x0f,t12 124 sll t8,60,t0 125 and a4,0xf0,a4 126 srl t9,4,t9 127 128 s8addq t12,AT,t12 129 xor t7,t8,t8 130 addq a4,a1,a4 131 addq a5,a1,a5 132 133 ldq t7,0(t12) 134 srl t8,4,t8 135 ldq t4,8(a4) 136 xor t0,t9,t9 137 138 xor t6,t9,t9 139 xor t5,t8,t8 140 ldq t3,0(a4) 141 unop 142 143 144.Loophi1: 145 and t9,0x0f,t12 146 sll t8,60,t0 147 subq v0,1,v0 148 srl t9,4,t9 149 150 ldq t6,8(a5) 151 xor t7,t8,t8 152 ldq t5,0(a5) 153 s8addq t12,AT,t12 154 155 ldq t7,0(t12) 156 srl t8,4,t8 157 xor t0,t9,t9 158 extbl t10,v0,a4 159 160 and a4,0xf0,a5 161 xor t3,t8,t8 162 xor t4,t9,t9 163 sll a4,4,a4 164 165 166 and t9,0x0f,t12 167 sll t8,60,t0 168 and a4,0xf0,a4 169 srl t9,4,t9 170 171 s8addq t12,AT,t12 172 xor t7,t8,t8 173 addq a4,a1,a4 174 addq a5,a1,a5 175 176 ldq t7,0(t12) 177 srl t8,4,t8 178 ldq t4,8(a4) 179 xor t0,t9,t9 180 181 xor t6,t9,t9 182 xor t5,t8,t8 183 ldq t3,0(a4) 184 bne v0,.Loophi1 185 186 187 and t9,0x0f,t12 188 sll t8,60,t0 189 srl t9,4,t9 190 191 ldq t6,8(a5) 192 xor t7,t8,t8 193 ldq t5,0(a5) 194 s8addq t12,AT,t12 195 196 ldq t7,0(t12) 197 srl t8,4,t8 198 xor t0,t9,t9 199 200 xor t4,t9,t9 201 xor t3,t8,t8 202 203 and t9,0x0f,t12 204 sll t8,60,t0 205 srl t9,4,t9 206 207 s8addq t12,AT,t12 208 xor t7,t8,t8 209 210 ldq t7,0(t12) 211 srl t8,4,t8 212 xor t6,t9,t9 213 xor t5,t8,t8 214 xor t0,t9,t9 215 xor t7,t8,t8 216 srl t9,24,t0 # byte swap 217 srl t9,8,t1 218 219 sll t9,8,t2 220 sll t9,24,t9 221 zapnot t0,0x11,t0 222 zapnot t1,0x22,t1 223 224 zapnot t9,0x88,t9 225 or t0,t1,t0 226 zapnot t2,0x44,t2 227 228 or t9,t0,t9 229 srl t8,24,t0 230 srl t8,8,t1 231 232 or t9,t2,t9 233 sll t8,8,t2 234 sll t8,24,t8 235 236 srl t9,32,t11 237 sll t9,32,t9 238 239 zapnot t0,0x11,t0 240 zapnot t1,0x22,t1 241 or t9,t11,t11 242 243 zapnot t8,0x88,t8 244 or t0,t1,t0 245 zapnot t2,0x44,t2 246 247 or t8,t0,t8 248 or t8,t2,t8 249 250 srl t8,32,t10 251 sll t8,32,t8 252 253 or t8,t10,t10 254 stq t11,8(a0) 255 stq t10,0(a0) 256 257 ret (ra) 258.end gcm_gmult_4bit 259.globl gcm_ghash_4bit 260.align 4 261.ent gcm_ghash_4bit 262gcm_ghash_4bit: 263 lda sp,-32(sp) 264 stq ra,0(sp) 265 stq s0,8(sp) 266 stq s1,16(sp) 267 .mask 0x04000600,-32 268 .frame sp,32,ra 269 .prologue 0 270 271 ldq_u s0,0(a2) 272 ldq_u t3,7(a2) 273 ldq_u s1,8(a2) 274 ldq_u t4,15(a2) 275 ldq t10,0(a0) 276 ldq t11,8(a0) 277 278 bsr t0,picmeup 279 nop 280 281.Louter: 282 extql s0,a2,s0 283 extqh t3,a2,t3 284 or s0,t3,s0 285 lda a2,16(a2) 286 287 extql s1,a2,s1 288 extqh t4,a2,t4 289 or s1,t4,s1 290 subq a3,16,a3 291 292 xor t11,s1,t11 293 xor t10,s0,t10 294.align 4 295 extbl t11,7,a4 296 and a4,0xf0,a5 297 sll a4,4,a4 298 and a4,0xf0,a4 299 300 addq a4,a1,a4 301 ldq t9,8(a4) 302 addq a5,a1,a5 303 ldq t8,0(a4) 304 305 and t9,0x0f,t12 306 sll t8,60,t0 307 lda v0,6(zero) 308 extbl t11,6,a4 309 310 ldq t6,8(a5) 311 s8addq t12,AT,t12 312 ldq t5,0(a5) 313 srl t9,4,t9 314 315 ldq t7,0(t12) 316 srl t8,4,t8 317 xor t0,t9,t9 318 and a4,0xf0,a5 319 320 xor t6,t9,t9 321 sll a4,4,a4 322 xor t5,t8,t8 323 and a4,0xf0,a4 324 325 addq a4,a1,a4 326 ldq t4,8(a4) 327 addq a5,a1,a5 328 ldq t3,0(a4) 329 330.Looplo2: 331 and t9,0x0f,t12 332 sll t8,60,t0 333 subq v0,1,v0 334 srl t9,4,t9 335 336 ldq t6,8(a5) 337 xor t7,t8,t8 338 ldq t5,0(a5) 339 s8addq t12,AT,t12 340 341 ldq t7,0(t12) 342 srl t8,4,t8 343 xor t0,t9,t9 344 extbl t11,v0,a4 345 346 and a4,0xf0,a5 347 xor t3,t8,t8 348 xor t4,t9,t9 349 sll a4,4,a4 350 351 352 and t9,0x0f,t12 353 sll t8,60,t0 354 and a4,0xf0,a4 355 srl t9,4,t9 356 357 s8addq t12,AT,t12 358 xor t7,t8,t8 359 addq a4,a1,a4 360 addq a5,a1,a5 361 362 ldq t7,0(t12) 363 srl t8,4,t8 364 ldq t4,8(a4) 365 xor t0,t9,t9 366 367 xor t6,t9,t9 368 xor t5,t8,t8 369 ldq t3,0(a4) 370 bne v0,.Looplo2 371 372 373 and t9,0x0f,t12 374 sll t8,60,t0 375 lda v0,7(zero) 376 srl t9,4,t9 377 378 ldq t6,8(a5) 379 xor t7,t8,t8 380 ldq t5,0(a5) 381 s8addq t12,AT,t12 382 383 ldq t7,0(t12) 384 srl t8,4,t8 385 xor t0,t9,t9 386 extbl t10,v0,a4 387 388 and a4,0xf0,a5 389 xor t3,t8,t8 390 xor t4,t9,t9 391 sll a4,4,a4 392 393 and t9,0x0f,t12 394 sll t8,60,t0 395 and a4,0xf0,a4 396 srl t9,4,t9 397 398 s8addq t12,AT,t12 399 xor t7,t8,t8 400 addq a4,a1,a4 401 addq a5,a1,a5 402 403 ldq t7,0(t12) 404 srl t8,4,t8 405 ldq t4,8(a4) 406 xor t0,t9,t9 407 408 xor t6,t9,t9 409 xor t5,t8,t8 410 ldq t3,0(a4) 411 unop 412 413 414.Loophi2: 415 and t9,0x0f,t12 416 sll t8,60,t0 417 subq v0,1,v0 418 srl t9,4,t9 419 420 ldq t6,8(a5) 421 xor t7,t8,t8 422 ldq t5,0(a5) 423 s8addq t12,AT,t12 424 425 ldq t7,0(t12) 426 srl t8,4,t8 427 xor t0,t9,t9 428 extbl t10,v0,a4 429 430 and a4,0xf0,a5 431 xor t3,t8,t8 432 xor t4,t9,t9 433 sll a4,4,a4 434 435 436 and t9,0x0f,t12 437 sll t8,60,t0 438 and a4,0xf0,a4 439 srl t9,4,t9 440 441 s8addq t12,AT,t12 442 xor t7,t8,t8 443 addq a4,a1,a4 444 addq a5,a1,a5 445 446 ldq t7,0(t12) 447 srl t8,4,t8 448 ldq t4,8(a4) 449 xor t0,t9,t9 450 451 xor t6,t9,t9 452 xor t5,t8,t8 453 ldq t3,0(a4) 454 bne v0,.Loophi2 455 456 457 and t9,0x0f,t12 458 sll t8,60,t0 459 srl t9,4,t9 460 461 ldq t6,8(a5) 462 xor t7,t8,t8 463 ldq t5,0(a5) 464 s8addq t12,AT,t12 465 466 ldq t7,0(t12) 467 srl t8,4,t8 468 xor t0,t9,t9 469 470 xor t4,t9,t9 471 xor t3,t8,t8 472 473 and t9,0x0f,t12 474 sll t8,60,t0 475 srl t9,4,t9 476 477 s8addq t12,AT,t12 478 xor t7,t8,t8 479 480 ldq t7,0(t12) 481 srl t8,4,t8 482 xor t6,t9,t9 483 xor t5,t8,t8 484 xor t0,t9,t9 485 xor t7,t8,t8 486 srl t9,24,t0 # byte swap 487 srl t9,8,t1 488 489 sll t9,8,t2 490 sll t9,24,t9 491 zapnot t0,0x11,t0 492 zapnot t1,0x22,t1 493 494 zapnot t9,0x88,t9 495 or t0,t1,t0 496 zapnot t2,0x44,t2 497 498 or t9,t0,t9 499 srl t8,24,t0 500 srl t8,8,t1 501 502 or t9,t2,t9 503 sll t8,8,t2 504 sll t8,24,t8 505 506 srl t9,32,t11 507 sll t9,32,t9 508 beq a3,.Ldone 509 510 zapnot t0,0x11,t0 511 zapnot t1,0x22,t1 512 or t9,t11,t11 513 ldq_u s0,0(a2) 514 515 zapnot t8,0x88,t8 516 or t0,t1,t0 517 zapnot t2,0x44,t2 518 ldq_u t3,7(a2) 519 520 or t8,t0,t8 521 or t8,t2,t8 522 ldq_u s1,8(a2) 523 ldq_u t4,15(a2) 524 525 srl t8,32,t10 526 sll t8,32,t8 527 528 or t8,t10,t10 529 br zero,.Louter 530 531.Ldone: 532 zapnot t0,0x11,t0 533 zapnot t1,0x22,t1 534 or t9,t11,t11 535 536 zapnot t8,0x88,t8 537 or t0,t1,t0 538 zapnot t2,0x44,t2 539 540 or t8,t0,t8 541 or t8,t2,t8 542 543 srl t8,32,t10 544 sll t8,32,t8 545 546 or t8,t10,t10 547 548 stq t11,8(a0) 549 stq t10,0(a0) 550 551 .set noreorder 552 /*ldq ra,0(sp)*/ 553 ldq s0,8(sp) 554 ldq s1,16(sp) 555 lda sp,32(sp) 556 ret (ra) 557.end gcm_ghash_4bit 558 559.align 4 560.ent picmeup 561picmeup: 562 .frame sp,0,t0 563 .prologue 0 564 br AT,.Lpic 565.Lpic: lda AT,12(AT) 566 ret (t0) 567.end picmeup 568 nop 569rem_4bit: 570 .long 0,0x0000<<16, 0,0x1C20<<16, 0,0x3840<<16, 0,0x2460<<16 571 .long 0,0x7080<<16, 0,0x6CA0<<16, 0,0x48C0<<16, 0,0x54E0<<16 572 .long 0,0xE100<<16, 0,0xFD20<<16, 0,0xD940<<16, 0,0xC560<<16 573 .long 0,0x9180<<16, 0,0x8DA0<<16, 0,0xA9C0<<16, 0,0xB5E0<<16 574.ascii "GHASH for Alpha, CRYPTOGAMS by <appro@openssl.org>" 575.align 4 576 577