1/* 2 * ==================================================================== 3 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 4 * project. Rights for redistribution and usage in source and binary 5 * forms are granted according to the OpenSSL license. 6 * ==================================================================== 7 * 8 * sha256/512_block procedure for x86_64. 9 * 10 * 40% improvement over compiler-generated code on Opteron. On EM64T 11 * sha256 was observed to run >80% faster and sha512 - >40%. No magical 12 * tricks, just straight implementation... I really wonder why gcc 13 * [being armed with inline assembler] fails to generate as fast code. 14 * The only thing which is cool about this module is that it's very 15 * same instruction sequence used for both SHA-256 and SHA-512. In 16 * former case the instructions operate on 32-bit operands, while in 17 * latter - on 64-bit ones. All I had to do is to get one flavor right, 18 * the other one passed the test right away:-) 19 * 20 * sha256_block runs in ~1005 cycles on Opteron, which gives you 21 * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock 22 * frequency in GHz. sha512_block runs in ~1275 cycles, which results 23 * in 128*1000/1275=100MBps per GHz. Is there room for improvement? 24 * Well, if you compare it to IA-64 implementation, which maintains 25 * X[16] in register bank[!], tends to 4 instructions per CPU clock 26 * cycle and runs in 1003 cycles, 1275 is very good result for 3-way 27 * issue Opteron pipeline and X[16] maintained in memory. So that *if* 28 * there is a way to improve it, *then* the only way would be to try to 29 * offload X[16] updates to SSE unit, but that would require "deeper" 30 * loop unroll, which in turn would naturally cause size blow-up, not 31 * to mention increased complexity! And once again, only *if* it's 32 * actually possible to noticeably improve overall ILP, instruction 33 * level parallelism, on a given CPU implementation in this case. 34 * 35 * Special note on Intel EM64T. While Opteron CPU exhibits perfect 36 * performance ratio of 1.5 between 64- and 32-bit flavors [see above], 37 * [currently available] EM64T CPUs apparently are far from it. On the 38 * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit 39 * sha256_block:-( This is presumably because 64-bit shifts/rotates 40 * apparently are not atomic instructions, but implemented in microcode. 41 */ 42 43/* 44 * OpenSolaris OS modifications 45 * 46 * Sun elects to use this software under the BSD license. 47 * 48 * This source originates from OpenSSL file sha512-x86_64.pl at 49 * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz 50 * (presumably for future OpenSSL release 0.9.8h), with these changes: 51 * 52 * 1. Added perl "use strict" and declared variables. 53 * 54 * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from 55 * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards. 56 * 57 * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1) 58 * assemblers). Replaced the .picmeup macro with assembler code. 59 * 60 * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype", 61 * at the beginning of SHA2_CTX (the next field is 8-byte aligned). 62 */ 63 64/* 65 * This file was generated by a perl script (sha512-x86_64.pl) that were 66 * used to generate sha256 and sha512 variants from the same code base. 67 * The comments from the original file have been pasted above. 68 */ 69 70 71#if defined(lint) || defined(__lint) 72#include <sys/stdint.h> 73#include <sha2/sha2.h> 74 75/* ARGSUSED */ 76void 77SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num) 78{ 79} 80 81 82#else 83#define _ASM 84#include <sys/asm_linkage.h> 85 86ENTRY_NP(SHA512TransformBlocks) 87 push %rbx 88 push %rbp 89 push %r12 90 push %r13 91 push %r14 92 push %r15 93 mov %rsp,%rbp # copy %rsp 94 shl $4,%rdx # num*16 95 sub $16*8+4*8,%rsp 96 lea (%rsi,%rdx,8),%rdx # inp+num*16*8 97 and $-64,%rsp # align stack frame 98 add $8,%rdi # Skip OpenSolaris field, "algotype" 99 mov %rdi,16*8+0*8(%rsp) # save ctx, 1st arg 100 mov %rsi,16*8+1*8(%rsp) # save inp, 2nd arg 101 mov %rdx,16*8+2*8(%rsp) # save end pointer, "3rd" arg 102 mov %rbp,16*8+3*8(%rsp) # save copy of %rsp 103 104 #.picmeup %rbp 105 # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts 106 # the address of the "next" instruction into the target register 107 # (%rbp). This generates these 2 instructions: 108 lea .Llea(%rip),%rbp 109 #nop # .picmeup generates a nop for mod 8 alignment--not needed here 110 111.Llea: 112 lea K512-.(%rbp),%rbp 113 114 mov 8*0(%rdi),%rax 115 mov 8*1(%rdi),%rbx 116 mov 8*2(%rdi),%rcx 117 mov 8*3(%rdi),%rdx 118 mov 8*4(%rdi),%r8 119 mov 8*5(%rdi),%r9 120 mov 8*6(%rdi),%r10 121 mov 8*7(%rdi),%r11 122 jmp .Lloop 123 124.align 16 125.Lloop: 126 xor %rdi,%rdi 127 mov 8*0(%rsi),%r12 128 bswap %r12 129 mov %r8,%r13 130 mov %r8,%r14 131 mov %r9,%r15 132 133 ror $14,%r13 134 ror $18,%r14 135 xor %r10,%r15 # f^g 136 137 xor %r14,%r13 138 ror $23,%r14 139 and %r8,%r15 # (f^g)&e 140 mov %r12,0(%rsp) 141 142 xor %r14,%r13 # Sigma1(e) 143 xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g 144 add %r11,%r12 # T1+=h 145 146 mov %rax,%r11 147 add %r13,%r12 # T1+=Sigma1(e) 148 149 add %r15,%r12 # T1+=Ch(e,f,g) 150 mov %rax,%r13 151 mov %rax,%r14 152 153 ror $28,%r11 154 ror $34,%r13 155 mov %rax,%r15 156 add (%rbp,%rdi,8),%r12 # T1+=K[round] 157 158 xor %r13,%r11 159 ror $5,%r13 160 or %rcx,%r14 # a|c 161 162 xor %r13,%r11 # h=Sigma0(a) 163 and %rcx,%r15 # a&c 164 add %r12,%rdx # d+=T1 165 166 and %rbx,%r14 # (a|c)&b 167 add %r12,%r11 # h+=T1 168 169 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 170 lea 1(%rdi),%rdi # round++ 171 172 add %r14,%r11 # h+=Maj(a,b,c) 173 mov 8*1(%rsi),%r12 174 bswap %r12 175 mov %rdx,%r13 176 mov %rdx,%r14 177 mov %r8,%r15 178 179 ror $14,%r13 180 ror $18,%r14 181 xor %r9,%r15 # f^g 182 183 xor %r14,%r13 184 ror $23,%r14 185 and %rdx,%r15 # (f^g)&e 186 mov %r12,8(%rsp) 187 188 xor %r14,%r13 # Sigma1(e) 189 xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g 190 add %r10,%r12 # T1+=h 191 192 mov %r11,%r10 193 add %r13,%r12 # T1+=Sigma1(e) 194 195 add %r15,%r12 # T1+=Ch(e,f,g) 196 mov %r11,%r13 197 mov %r11,%r14 198 199 ror $28,%r10 200 ror $34,%r13 201 mov %r11,%r15 202 add (%rbp,%rdi,8),%r12 # T1+=K[round] 203 204 xor %r13,%r10 205 ror $5,%r13 206 or %rbx,%r14 # a|c 207 208 xor %r13,%r10 # h=Sigma0(a) 209 and %rbx,%r15 # a&c 210 add %r12,%rcx # d+=T1 211 212 and %rax,%r14 # (a|c)&b 213 add %r12,%r10 # h+=T1 214 215 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 216 lea 1(%rdi),%rdi # round++ 217 218 add %r14,%r10 # h+=Maj(a,b,c) 219 mov 8*2(%rsi),%r12 220 bswap %r12 221 mov %rcx,%r13 222 mov %rcx,%r14 223 mov %rdx,%r15 224 225 ror $14,%r13 226 ror $18,%r14 227 xor %r8,%r15 # f^g 228 229 xor %r14,%r13 230 ror $23,%r14 231 and %rcx,%r15 # (f^g)&e 232 mov %r12,16(%rsp) 233 234 xor %r14,%r13 # Sigma1(e) 235 xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g 236 add %r9,%r12 # T1+=h 237 238 mov %r10,%r9 239 add %r13,%r12 # T1+=Sigma1(e) 240 241 add %r15,%r12 # T1+=Ch(e,f,g) 242 mov %r10,%r13 243 mov %r10,%r14 244 245 ror $28,%r9 246 ror $34,%r13 247 mov %r10,%r15 248 add (%rbp,%rdi,8),%r12 # T1+=K[round] 249 250 xor %r13,%r9 251 ror $5,%r13 252 or %rax,%r14 # a|c 253 254 xor %r13,%r9 # h=Sigma0(a) 255 and %rax,%r15 # a&c 256 add %r12,%rbx # d+=T1 257 258 and %r11,%r14 # (a|c)&b 259 add %r12,%r9 # h+=T1 260 261 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 262 lea 1(%rdi),%rdi # round++ 263 264 add %r14,%r9 # h+=Maj(a,b,c) 265 mov 8*3(%rsi),%r12 266 bswap %r12 267 mov %rbx,%r13 268 mov %rbx,%r14 269 mov %rcx,%r15 270 271 ror $14,%r13 272 ror $18,%r14 273 xor %rdx,%r15 # f^g 274 275 xor %r14,%r13 276 ror $23,%r14 277 and %rbx,%r15 # (f^g)&e 278 mov %r12,24(%rsp) 279 280 xor %r14,%r13 # Sigma1(e) 281 xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g 282 add %r8,%r12 # T1+=h 283 284 mov %r9,%r8 285 add %r13,%r12 # T1+=Sigma1(e) 286 287 add %r15,%r12 # T1+=Ch(e,f,g) 288 mov %r9,%r13 289 mov %r9,%r14 290 291 ror $28,%r8 292 ror $34,%r13 293 mov %r9,%r15 294 add (%rbp,%rdi,8),%r12 # T1+=K[round] 295 296 xor %r13,%r8 297 ror $5,%r13 298 or %r11,%r14 # a|c 299 300 xor %r13,%r8 # h=Sigma0(a) 301 and %r11,%r15 # a&c 302 add %r12,%rax # d+=T1 303 304 and %r10,%r14 # (a|c)&b 305 add %r12,%r8 # h+=T1 306 307 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 308 lea 1(%rdi),%rdi # round++ 309 310 add %r14,%r8 # h+=Maj(a,b,c) 311 mov 8*4(%rsi),%r12 312 bswap %r12 313 mov %rax,%r13 314 mov %rax,%r14 315 mov %rbx,%r15 316 317 ror $14,%r13 318 ror $18,%r14 319 xor %rcx,%r15 # f^g 320 321 xor %r14,%r13 322 ror $23,%r14 323 and %rax,%r15 # (f^g)&e 324 mov %r12,32(%rsp) 325 326 xor %r14,%r13 # Sigma1(e) 327 xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g 328 add %rdx,%r12 # T1+=h 329 330 mov %r8,%rdx 331 add %r13,%r12 # T1+=Sigma1(e) 332 333 add %r15,%r12 # T1+=Ch(e,f,g) 334 mov %r8,%r13 335 mov %r8,%r14 336 337 ror $28,%rdx 338 ror $34,%r13 339 mov %r8,%r15 340 add (%rbp,%rdi,8),%r12 # T1+=K[round] 341 342 xor %r13,%rdx 343 ror $5,%r13 344 or %r10,%r14 # a|c 345 346 xor %r13,%rdx # h=Sigma0(a) 347 and %r10,%r15 # a&c 348 add %r12,%r11 # d+=T1 349 350 and %r9,%r14 # (a|c)&b 351 add %r12,%rdx # h+=T1 352 353 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 354 lea 1(%rdi),%rdi # round++ 355 356 add %r14,%rdx # h+=Maj(a,b,c) 357 mov 8*5(%rsi),%r12 358 bswap %r12 359 mov %r11,%r13 360 mov %r11,%r14 361 mov %rax,%r15 362 363 ror $14,%r13 364 ror $18,%r14 365 xor %rbx,%r15 # f^g 366 367 xor %r14,%r13 368 ror $23,%r14 369 and %r11,%r15 # (f^g)&e 370 mov %r12,40(%rsp) 371 372 xor %r14,%r13 # Sigma1(e) 373 xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g 374 add %rcx,%r12 # T1+=h 375 376 mov %rdx,%rcx 377 add %r13,%r12 # T1+=Sigma1(e) 378 379 add %r15,%r12 # T1+=Ch(e,f,g) 380 mov %rdx,%r13 381 mov %rdx,%r14 382 383 ror $28,%rcx 384 ror $34,%r13 385 mov %rdx,%r15 386 add (%rbp,%rdi,8),%r12 # T1+=K[round] 387 388 xor %r13,%rcx 389 ror $5,%r13 390 or %r9,%r14 # a|c 391 392 xor %r13,%rcx # h=Sigma0(a) 393 and %r9,%r15 # a&c 394 add %r12,%r10 # d+=T1 395 396 and %r8,%r14 # (a|c)&b 397 add %r12,%rcx # h+=T1 398 399 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 400 lea 1(%rdi),%rdi # round++ 401 402 add %r14,%rcx # h+=Maj(a,b,c) 403 mov 8*6(%rsi),%r12 404 bswap %r12 405 mov %r10,%r13 406 mov %r10,%r14 407 mov %r11,%r15 408 409 ror $14,%r13 410 ror $18,%r14 411 xor %rax,%r15 # f^g 412 413 xor %r14,%r13 414 ror $23,%r14 415 and %r10,%r15 # (f^g)&e 416 mov %r12,48(%rsp) 417 418 xor %r14,%r13 # Sigma1(e) 419 xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g 420 add %rbx,%r12 # T1+=h 421 422 mov %rcx,%rbx 423 add %r13,%r12 # T1+=Sigma1(e) 424 425 add %r15,%r12 # T1+=Ch(e,f,g) 426 mov %rcx,%r13 427 mov %rcx,%r14 428 429 ror $28,%rbx 430 ror $34,%r13 431 mov %rcx,%r15 432 add (%rbp,%rdi,8),%r12 # T1+=K[round] 433 434 xor %r13,%rbx 435 ror $5,%r13 436 or %r8,%r14 # a|c 437 438 xor %r13,%rbx # h=Sigma0(a) 439 and %r8,%r15 # a&c 440 add %r12,%r9 # d+=T1 441 442 and %rdx,%r14 # (a|c)&b 443 add %r12,%rbx # h+=T1 444 445 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 446 lea 1(%rdi),%rdi # round++ 447 448 add %r14,%rbx # h+=Maj(a,b,c) 449 mov 8*7(%rsi),%r12 450 bswap %r12 451 mov %r9,%r13 452 mov %r9,%r14 453 mov %r10,%r15 454 455 ror $14,%r13 456 ror $18,%r14 457 xor %r11,%r15 # f^g 458 459 xor %r14,%r13 460 ror $23,%r14 461 and %r9,%r15 # (f^g)&e 462 mov %r12,56(%rsp) 463 464 xor %r14,%r13 # Sigma1(e) 465 xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g 466 add %rax,%r12 # T1+=h 467 468 mov %rbx,%rax 469 add %r13,%r12 # T1+=Sigma1(e) 470 471 add %r15,%r12 # T1+=Ch(e,f,g) 472 mov %rbx,%r13 473 mov %rbx,%r14 474 475 ror $28,%rax 476 ror $34,%r13 477 mov %rbx,%r15 478 add (%rbp,%rdi,8),%r12 # T1+=K[round] 479 480 xor %r13,%rax 481 ror $5,%r13 482 or %rdx,%r14 # a|c 483 484 xor %r13,%rax # h=Sigma0(a) 485 and %rdx,%r15 # a&c 486 add %r12,%r8 # d+=T1 487 488 and %rcx,%r14 # (a|c)&b 489 add %r12,%rax # h+=T1 490 491 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 492 lea 1(%rdi),%rdi # round++ 493 494 add %r14,%rax # h+=Maj(a,b,c) 495 mov 8*8(%rsi),%r12 496 bswap %r12 497 mov %r8,%r13 498 mov %r8,%r14 499 mov %r9,%r15 500 501 ror $14,%r13 502 ror $18,%r14 503 xor %r10,%r15 # f^g 504 505 xor %r14,%r13 506 ror $23,%r14 507 and %r8,%r15 # (f^g)&e 508 mov %r12,64(%rsp) 509 510 xor %r14,%r13 # Sigma1(e) 511 xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g 512 add %r11,%r12 # T1+=h 513 514 mov %rax,%r11 515 add %r13,%r12 # T1+=Sigma1(e) 516 517 add %r15,%r12 # T1+=Ch(e,f,g) 518 mov %rax,%r13 519 mov %rax,%r14 520 521 ror $28,%r11 522 ror $34,%r13 523 mov %rax,%r15 524 add (%rbp,%rdi,8),%r12 # T1+=K[round] 525 526 xor %r13,%r11 527 ror $5,%r13 528 or %rcx,%r14 # a|c 529 530 xor %r13,%r11 # h=Sigma0(a) 531 and %rcx,%r15 # a&c 532 add %r12,%rdx # d+=T1 533 534 and %rbx,%r14 # (a|c)&b 535 add %r12,%r11 # h+=T1 536 537 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 538 lea 1(%rdi),%rdi # round++ 539 540 add %r14,%r11 # h+=Maj(a,b,c) 541 mov 8*9(%rsi),%r12 542 bswap %r12 543 mov %rdx,%r13 544 mov %rdx,%r14 545 mov %r8,%r15 546 547 ror $14,%r13 548 ror $18,%r14 549 xor %r9,%r15 # f^g 550 551 xor %r14,%r13 552 ror $23,%r14 553 and %rdx,%r15 # (f^g)&e 554 mov %r12,72(%rsp) 555 556 xor %r14,%r13 # Sigma1(e) 557 xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g 558 add %r10,%r12 # T1+=h 559 560 mov %r11,%r10 561 add %r13,%r12 # T1+=Sigma1(e) 562 563 add %r15,%r12 # T1+=Ch(e,f,g) 564 mov %r11,%r13 565 mov %r11,%r14 566 567 ror $28,%r10 568 ror $34,%r13 569 mov %r11,%r15 570 add (%rbp,%rdi,8),%r12 # T1+=K[round] 571 572 xor %r13,%r10 573 ror $5,%r13 574 or %rbx,%r14 # a|c 575 576 xor %r13,%r10 # h=Sigma0(a) 577 and %rbx,%r15 # a&c 578 add %r12,%rcx # d+=T1 579 580 and %rax,%r14 # (a|c)&b 581 add %r12,%r10 # h+=T1 582 583 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 584 lea 1(%rdi),%rdi # round++ 585 586 add %r14,%r10 # h+=Maj(a,b,c) 587 mov 8*10(%rsi),%r12 588 bswap %r12 589 mov %rcx,%r13 590 mov %rcx,%r14 591 mov %rdx,%r15 592 593 ror $14,%r13 594 ror $18,%r14 595 xor %r8,%r15 # f^g 596 597 xor %r14,%r13 598 ror $23,%r14 599 and %rcx,%r15 # (f^g)&e 600 mov %r12,80(%rsp) 601 602 xor %r14,%r13 # Sigma1(e) 603 xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g 604 add %r9,%r12 # T1+=h 605 606 mov %r10,%r9 607 add %r13,%r12 # T1+=Sigma1(e) 608 609 add %r15,%r12 # T1+=Ch(e,f,g) 610 mov %r10,%r13 611 mov %r10,%r14 612 613 ror $28,%r9 614 ror $34,%r13 615 mov %r10,%r15 616 add (%rbp,%rdi,8),%r12 # T1+=K[round] 617 618 xor %r13,%r9 619 ror $5,%r13 620 or %rax,%r14 # a|c 621 622 xor %r13,%r9 # h=Sigma0(a) 623 and %rax,%r15 # a&c 624 add %r12,%rbx # d+=T1 625 626 and %r11,%r14 # (a|c)&b 627 add %r12,%r9 # h+=T1 628 629 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 630 lea 1(%rdi),%rdi # round++ 631 632 add %r14,%r9 # h+=Maj(a,b,c) 633 mov 8*11(%rsi),%r12 634 bswap %r12 635 mov %rbx,%r13 636 mov %rbx,%r14 637 mov %rcx,%r15 638 639 ror $14,%r13 640 ror $18,%r14 641 xor %rdx,%r15 # f^g 642 643 xor %r14,%r13 644 ror $23,%r14 645 and %rbx,%r15 # (f^g)&e 646 mov %r12,88(%rsp) 647 648 xor %r14,%r13 # Sigma1(e) 649 xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g 650 add %r8,%r12 # T1+=h 651 652 mov %r9,%r8 653 add %r13,%r12 # T1+=Sigma1(e) 654 655 add %r15,%r12 # T1+=Ch(e,f,g) 656 mov %r9,%r13 657 mov %r9,%r14 658 659 ror $28,%r8 660 ror $34,%r13 661 mov %r9,%r15 662 add (%rbp,%rdi,8),%r12 # T1+=K[round] 663 664 xor %r13,%r8 665 ror $5,%r13 666 or %r11,%r14 # a|c 667 668 xor %r13,%r8 # h=Sigma0(a) 669 and %r11,%r15 # a&c 670 add %r12,%rax # d+=T1 671 672 and %r10,%r14 # (a|c)&b 673 add %r12,%r8 # h+=T1 674 675 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 676 lea 1(%rdi),%rdi # round++ 677 678 add %r14,%r8 # h+=Maj(a,b,c) 679 mov 8*12(%rsi),%r12 680 bswap %r12 681 mov %rax,%r13 682 mov %rax,%r14 683 mov %rbx,%r15 684 685 ror $14,%r13 686 ror $18,%r14 687 xor %rcx,%r15 # f^g 688 689 xor %r14,%r13 690 ror $23,%r14 691 and %rax,%r15 # (f^g)&e 692 mov %r12,96(%rsp) 693 694 xor %r14,%r13 # Sigma1(e) 695 xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g 696 add %rdx,%r12 # T1+=h 697 698 mov %r8,%rdx 699 add %r13,%r12 # T1+=Sigma1(e) 700 701 add %r15,%r12 # T1+=Ch(e,f,g) 702 mov %r8,%r13 703 mov %r8,%r14 704 705 ror $28,%rdx 706 ror $34,%r13 707 mov %r8,%r15 708 add (%rbp,%rdi,8),%r12 # T1+=K[round] 709 710 xor %r13,%rdx 711 ror $5,%r13 712 or %r10,%r14 # a|c 713 714 xor %r13,%rdx # h=Sigma0(a) 715 and %r10,%r15 # a&c 716 add %r12,%r11 # d+=T1 717 718 and %r9,%r14 # (a|c)&b 719 add %r12,%rdx # h+=T1 720 721 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 722 lea 1(%rdi),%rdi # round++ 723 724 add %r14,%rdx # h+=Maj(a,b,c) 725 mov 8*13(%rsi),%r12 726 bswap %r12 727 mov %r11,%r13 728 mov %r11,%r14 729 mov %rax,%r15 730 731 ror $14,%r13 732 ror $18,%r14 733 xor %rbx,%r15 # f^g 734 735 xor %r14,%r13 736 ror $23,%r14 737 and %r11,%r15 # (f^g)&e 738 mov %r12,104(%rsp) 739 740 xor %r14,%r13 # Sigma1(e) 741 xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g 742 add %rcx,%r12 # T1+=h 743 744 mov %rdx,%rcx 745 add %r13,%r12 # T1+=Sigma1(e) 746 747 add %r15,%r12 # T1+=Ch(e,f,g) 748 mov %rdx,%r13 749 mov %rdx,%r14 750 751 ror $28,%rcx 752 ror $34,%r13 753 mov %rdx,%r15 754 add (%rbp,%rdi,8),%r12 # T1+=K[round] 755 756 xor %r13,%rcx 757 ror $5,%r13 758 or %r9,%r14 # a|c 759 760 xor %r13,%rcx # h=Sigma0(a) 761 and %r9,%r15 # a&c 762 add %r12,%r10 # d+=T1 763 764 and %r8,%r14 # (a|c)&b 765 add %r12,%rcx # h+=T1 766 767 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 768 lea 1(%rdi),%rdi # round++ 769 770 add %r14,%rcx # h+=Maj(a,b,c) 771 mov 8*14(%rsi),%r12 772 bswap %r12 773 mov %r10,%r13 774 mov %r10,%r14 775 mov %r11,%r15 776 777 ror $14,%r13 778 ror $18,%r14 779 xor %rax,%r15 # f^g 780 781 xor %r14,%r13 782 ror $23,%r14 783 and %r10,%r15 # (f^g)&e 784 mov %r12,112(%rsp) 785 786 xor %r14,%r13 # Sigma1(e) 787 xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g 788 add %rbx,%r12 # T1+=h 789 790 mov %rcx,%rbx 791 add %r13,%r12 # T1+=Sigma1(e) 792 793 add %r15,%r12 # T1+=Ch(e,f,g) 794 mov %rcx,%r13 795 mov %rcx,%r14 796 797 ror $28,%rbx 798 ror $34,%r13 799 mov %rcx,%r15 800 add (%rbp,%rdi,8),%r12 # T1+=K[round] 801 802 xor %r13,%rbx 803 ror $5,%r13 804 or %r8,%r14 # a|c 805 806 xor %r13,%rbx # h=Sigma0(a) 807 and %r8,%r15 # a&c 808 add %r12,%r9 # d+=T1 809 810 and %rdx,%r14 # (a|c)&b 811 add %r12,%rbx # h+=T1 812 813 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 814 lea 1(%rdi),%rdi # round++ 815 816 add %r14,%rbx # h+=Maj(a,b,c) 817 mov 8*15(%rsi),%r12 818 bswap %r12 819 mov %r9,%r13 820 mov %r9,%r14 821 mov %r10,%r15 822 823 ror $14,%r13 824 ror $18,%r14 825 xor %r11,%r15 # f^g 826 827 xor %r14,%r13 828 ror $23,%r14 829 and %r9,%r15 # (f^g)&e 830 mov %r12,120(%rsp) 831 832 xor %r14,%r13 # Sigma1(e) 833 xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g 834 add %rax,%r12 # T1+=h 835 836 mov %rbx,%rax 837 add %r13,%r12 # T1+=Sigma1(e) 838 839 add %r15,%r12 # T1+=Ch(e,f,g) 840 mov %rbx,%r13 841 mov %rbx,%r14 842 843 ror $28,%rax 844 ror $34,%r13 845 mov %rbx,%r15 846 add (%rbp,%rdi,8),%r12 # T1+=K[round] 847 848 xor %r13,%rax 849 ror $5,%r13 850 or %rdx,%r14 # a|c 851 852 xor %r13,%rax # h=Sigma0(a) 853 and %rdx,%r15 # a&c 854 add %r12,%r8 # d+=T1 855 856 and %rcx,%r14 # (a|c)&b 857 add %r12,%rax # h+=T1 858 859 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 860 lea 1(%rdi),%rdi # round++ 861 862 add %r14,%rax # h+=Maj(a,b,c) 863 jmp .Lrounds_16_xx 864.align 16 865.Lrounds_16_xx: 866 mov 8(%rsp),%r13 867 mov 112(%rsp),%r12 868 869 mov %r13,%r15 870 871 shr $7,%r13 872 ror $1,%r15 873 874 xor %r15,%r13 875 ror $7,%r15 876 877 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 878 mov %r12,%r14 879 880 shr $6,%r12 881 ror $19,%r14 882 883 xor %r14,%r12 884 ror $42,%r14 885 886 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 887 888 add %r13,%r12 889 890 add 72(%rsp),%r12 891 892 add 0(%rsp),%r12 893 mov %r8,%r13 894 mov %r8,%r14 895 mov %r9,%r15 896 897 ror $14,%r13 898 ror $18,%r14 899 xor %r10,%r15 # f^g 900 901 xor %r14,%r13 902 ror $23,%r14 903 and %r8,%r15 # (f^g)&e 904 mov %r12,0(%rsp) 905 906 xor %r14,%r13 # Sigma1(e) 907 xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g 908 add %r11,%r12 # T1+=h 909 910 mov %rax,%r11 911 add %r13,%r12 # T1+=Sigma1(e) 912 913 add %r15,%r12 # T1+=Ch(e,f,g) 914 mov %rax,%r13 915 mov %rax,%r14 916 917 ror $28,%r11 918 ror $34,%r13 919 mov %rax,%r15 920 add (%rbp,%rdi,8),%r12 # T1+=K[round] 921 922 xor %r13,%r11 923 ror $5,%r13 924 or %rcx,%r14 # a|c 925 926 xor %r13,%r11 # h=Sigma0(a) 927 and %rcx,%r15 # a&c 928 add %r12,%rdx # d+=T1 929 930 and %rbx,%r14 # (a|c)&b 931 add %r12,%r11 # h+=T1 932 933 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 934 lea 1(%rdi),%rdi # round++ 935 936 add %r14,%r11 # h+=Maj(a,b,c) 937 mov 16(%rsp),%r13 938 mov 120(%rsp),%r12 939 940 mov %r13,%r15 941 942 shr $7,%r13 943 ror $1,%r15 944 945 xor %r15,%r13 946 ror $7,%r15 947 948 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 949 mov %r12,%r14 950 951 shr $6,%r12 952 ror $19,%r14 953 954 xor %r14,%r12 955 ror $42,%r14 956 957 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 958 959 add %r13,%r12 960 961 add 80(%rsp),%r12 962 963 add 8(%rsp),%r12 964 mov %rdx,%r13 965 mov %rdx,%r14 966 mov %r8,%r15 967 968 ror $14,%r13 969 ror $18,%r14 970 xor %r9,%r15 # f^g 971 972 xor %r14,%r13 973 ror $23,%r14 974 and %rdx,%r15 # (f^g)&e 975 mov %r12,8(%rsp) 976 977 xor %r14,%r13 # Sigma1(e) 978 xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g 979 add %r10,%r12 # T1+=h 980 981 mov %r11,%r10 982 add %r13,%r12 # T1+=Sigma1(e) 983 984 add %r15,%r12 # T1+=Ch(e,f,g) 985 mov %r11,%r13 986 mov %r11,%r14 987 988 ror $28,%r10 989 ror $34,%r13 990 mov %r11,%r15 991 add (%rbp,%rdi,8),%r12 # T1+=K[round] 992 993 xor %r13,%r10 994 ror $5,%r13 995 or %rbx,%r14 # a|c 996 997 xor %r13,%r10 # h=Sigma0(a) 998 and %rbx,%r15 # a&c 999 add %r12,%rcx # d+=T1 1000 1001 and %rax,%r14 # (a|c)&b 1002 add %r12,%r10 # h+=T1 1003 1004 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1005 lea 1(%rdi),%rdi # round++ 1006 1007 add %r14,%r10 # h+=Maj(a,b,c) 1008 mov 24(%rsp),%r13 1009 mov 0(%rsp),%r12 1010 1011 mov %r13,%r15 1012 1013 shr $7,%r13 1014 ror $1,%r15 1015 1016 xor %r15,%r13 1017 ror $7,%r15 1018 1019 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 1020 mov %r12,%r14 1021 1022 shr $6,%r12 1023 ror $19,%r14 1024 1025 xor %r14,%r12 1026 ror $42,%r14 1027 1028 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 1029 1030 add %r13,%r12 1031 1032 add 88(%rsp),%r12 1033 1034 add 16(%rsp),%r12 1035 mov %rcx,%r13 1036 mov %rcx,%r14 1037 mov %rdx,%r15 1038 1039 ror $14,%r13 1040 ror $18,%r14 1041 xor %r8,%r15 # f^g 1042 1043 xor %r14,%r13 1044 ror $23,%r14 1045 and %rcx,%r15 # (f^g)&e 1046 mov %r12,16(%rsp) 1047 1048 xor %r14,%r13 # Sigma1(e) 1049 xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g 1050 add %r9,%r12 # T1+=h 1051 1052 mov %r10,%r9 1053 add %r13,%r12 # T1+=Sigma1(e) 1054 1055 add %r15,%r12 # T1+=Ch(e,f,g) 1056 mov %r10,%r13 1057 mov %r10,%r14 1058 1059 ror $28,%r9 1060 ror $34,%r13 1061 mov %r10,%r15 1062 add (%rbp,%rdi,8),%r12 # T1+=K[round] 1063 1064 xor %r13,%r9 1065 ror $5,%r13 1066 or %rax,%r14 # a|c 1067 1068 xor %r13,%r9 # h=Sigma0(a) 1069 and %rax,%r15 # a&c 1070 add %r12,%rbx # d+=T1 1071 1072 and %r11,%r14 # (a|c)&b 1073 add %r12,%r9 # h+=T1 1074 1075 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1076 lea 1(%rdi),%rdi # round++ 1077 1078 add %r14,%r9 # h+=Maj(a,b,c) 1079 mov 32(%rsp),%r13 1080 mov 8(%rsp),%r12 1081 1082 mov %r13,%r15 1083 1084 shr $7,%r13 1085 ror $1,%r15 1086 1087 xor %r15,%r13 1088 ror $7,%r15 1089 1090 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 1091 mov %r12,%r14 1092 1093 shr $6,%r12 1094 ror $19,%r14 1095 1096 xor %r14,%r12 1097 ror $42,%r14 1098 1099 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 1100 1101 add %r13,%r12 1102 1103 add 96(%rsp),%r12 1104 1105 add 24(%rsp),%r12 1106 mov %rbx,%r13 1107 mov %rbx,%r14 1108 mov %rcx,%r15 1109 1110 ror $14,%r13 1111 ror $18,%r14 1112 xor %rdx,%r15 # f^g 1113 1114 xor %r14,%r13 1115 ror $23,%r14 1116 and %rbx,%r15 # (f^g)&e 1117 mov %r12,24(%rsp) 1118 1119 xor %r14,%r13 # Sigma1(e) 1120 xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g 1121 add %r8,%r12 # T1+=h 1122 1123 mov %r9,%r8 1124 add %r13,%r12 # T1+=Sigma1(e) 1125 1126 add %r15,%r12 # T1+=Ch(e,f,g) 1127 mov %r9,%r13 1128 mov %r9,%r14 1129 1130 ror $28,%r8 1131 ror $34,%r13 1132 mov %r9,%r15 1133 add (%rbp,%rdi,8),%r12 # T1+=K[round] 1134 1135 xor %r13,%r8 1136 ror $5,%r13 1137 or %r11,%r14 # a|c 1138 1139 xor %r13,%r8 # h=Sigma0(a) 1140 and %r11,%r15 # a&c 1141 add %r12,%rax # d+=T1 1142 1143 and %r10,%r14 # (a|c)&b 1144 add %r12,%r8 # h+=T1 1145 1146 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1147 lea 1(%rdi),%rdi # round++ 1148 1149 add %r14,%r8 # h+=Maj(a,b,c) 1150 mov 40(%rsp),%r13 1151 mov 16(%rsp),%r12 1152 1153 mov %r13,%r15 1154 1155 shr $7,%r13 1156 ror $1,%r15 1157 1158 xor %r15,%r13 1159 ror $7,%r15 1160 1161 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 1162 mov %r12,%r14 1163 1164 shr $6,%r12 1165 ror $19,%r14 1166 1167 xor %r14,%r12 1168 ror $42,%r14 1169 1170 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 1171 1172 add %r13,%r12 1173 1174 add 104(%rsp),%r12 1175 1176 add 32(%rsp),%r12 1177 mov %rax,%r13 1178 mov %rax,%r14 1179 mov %rbx,%r15 1180 1181 ror $14,%r13 1182 ror $18,%r14 1183 xor %rcx,%r15 # f^g 1184 1185 xor %r14,%r13 1186 ror $23,%r14 1187 and %rax,%r15 # (f^g)&e 1188 mov %r12,32(%rsp) 1189 1190 xor %r14,%r13 # Sigma1(e) 1191 xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g 1192 add %rdx,%r12 # T1+=h 1193 1194 mov %r8,%rdx 1195 add %r13,%r12 # T1+=Sigma1(e) 1196 1197 add %r15,%r12 # T1+=Ch(e,f,g) 1198 mov %r8,%r13 1199 mov %r8,%r14 1200 1201 ror $28,%rdx 1202 ror $34,%r13 1203 mov %r8,%r15 1204 add (%rbp,%rdi,8),%r12 # T1+=K[round] 1205 1206 xor %r13,%rdx 1207 ror $5,%r13 1208 or %r10,%r14 # a|c 1209 1210 xor %r13,%rdx # h=Sigma0(a) 1211 and %r10,%r15 # a&c 1212 add %r12,%r11 # d+=T1 1213 1214 and %r9,%r14 # (a|c)&b 1215 add %r12,%rdx # h+=T1 1216 1217 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1218 lea 1(%rdi),%rdi # round++ 1219 1220 add %r14,%rdx # h+=Maj(a,b,c) 1221 mov 48(%rsp),%r13 1222 mov 24(%rsp),%r12 1223 1224 mov %r13,%r15 1225 1226 shr $7,%r13 1227 ror $1,%r15 1228 1229 xor %r15,%r13 1230 ror $7,%r15 1231 1232 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 1233 mov %r12,%r14 1234 1235 shr $6,%r12 1236 ror $19,%r14 1237 1238 xor %r14,%r12 1239 ror $42,%r14 1240 1241 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 1242 1243 add %r13,%r12 1244 1245 add 112(%rsp),%r12 1246 1247 add 40(%rsp),%r12 1248 mov %r11,%r13 1249 mov %r11,%r14 1250 mov %rax,%r15 1251 1252 ror $14,%r13 1253 ror $18,%r14 1254 xor %rbx,%r15 # f^g 1255 1256 xor %r14,%r13 1257 ror $23,%r14 1258 and %r11,%r15 # (f^g)&e 1259 mov %r12,40(%rsp) 1260 1261 xor %r14,%r13 # Sigma1(e) 1262 xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g 1263 add %rcx,%r12 # T1+=h 1264 1265 mov %rdx,%rcx 1266 add %r13,%r12 # T1+=Sigma1(e) 1267 1268 add %r15,%r12 # T1+=Ch(e,f,g) 1269 mov %rdx,%r13 1270 mov %rdx,%r14 1271 1272 ror $28,%rcx 1273 ror $34,%r13 1274 mov %rdx,%r15 1275 add (%rbp,%rdi,8),%r12 # T1+=K[round] 1276 1277 xor %r13,%rcx 1278 ror $5,%r13 1279 or %r9,%r14 # a|c 1280 1281 xor %r13,%rcx # h=Sigma0(a) 1282 and %r9,%r15 # a&c 1283 add %r12,%r10 # d+=T1 1284 1285 and %r8,%r14 # (a|c)&b 1286 add %r12,%rcx # h+=T1 1287 1288 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1289 lea 1(%rdi),%rdi # round++ 1290 1291 add %r14,%rcx # h+=Maj(a,b,c) 1292 mov 56(%rsp),%r13 1293 mov 32(%rsp),%r12 1294 1295 mov %r13,%r15 1296 1297 shr $7,%r13 1298 ror $1,%r15 1299 1300 xor %r15,%r13 1301 ror $7,%r15 1302 1303 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 1304 mov %r12,%r14 1305 1306 shr $6,%r12 1307 ror $19,%r14 1308 1309 xor %r14,%r12 1310 ror $42,%r14 1311 1312 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 1313 1314 add %r13,%r12 1315 1316 add 120(%rsp),%r12 1317 1318 add 48(%rsp),%r12 1319 mov %r10,%r13 1320 mov %r10,%r14 1321 mov %r11,%r15 1322 1323 ror $14,%r13 1324 ror $18,%r14 1325 xor %rax,%r15 # f^g 1326 1327 xor %r14,%r13 1328 ror $23,%r14 1329 and %r10,%r15 # (f^g)&e 1330 mov %r12,48(%rsp) 1331 1332 xor %r14,%r13 # Sigma1(e) 1333 xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g 1334 add %rbx,%r12 # T1+=h 1335 1336 mov %rcx,%rbx 1337 add %r13,%r12 # T1+=Sigma1(e) 1338 1339 add %r15,%r12 # T1+=Ch(e,f,g) 1340 mov %rcx,%r13 1341 mov %rcx,%r14 1342 1343 ror $28,%rbx 1344 ror $34,%r13 1345 mov %rcx,%r15 1346 add (%rbp,%rdi,8),%r12 # T1+=K[round] 1347 1348 xor %r13,%rbx 1349 ror $5,%r13 1350 or %r8,%r14 # a|c 1351 1352 xor %r13,%rbx # h=Sigma0(a) 1353 and %r8,%r15 # a&c 1354 add %r12,%r9 # d+=T1 1355 1356 and %rdx,%r14 # (a|c)&b 1357 add %r12,%rbx # h+=T1 1358 1359 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1360 lea 1(%rdi),%rdi # round++ 1361 1362 add %r14,%rbx # h+=Maj(a,b,c) 1363 mov 64(%rsp),%r13 1364 mov 40(%rsp),%r12 1365 1366 mov %r13,%r15 1367 1368 shr $7,%r13 1369 ror $1,%r15 1370 1371 xor %r15,%r13 1372 ror $7,%r15 1373 1374 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 1375 mov %r12,%r14 1376 1377 shr $6,%r12 1378 ror $19,%r14 1379 1380 xor %r14,%r12 1381 ror $42,%r14 1382 1383 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 1384 1385 add %r13,%r12 1386 1387 add 0(%rsp),%r12 1388 1389 add 56(%rsp),%r12 1390 mov %r9,%r13 1391 mov %r9,%r14 1392 mov %r10,%r15 1393 1394 ror $14,%r13 1395 ror $18,%r14 1396 xor %r11,%r15 # f^g 1397 1398 xor %r14,%r13 1399 ror $23,%r14 1400 and %r9,%r15 # (f^g)&e 1401 mov %r12,56(%rsp) 1402 1403 xor %r14,%r13 # Sigma1(e) 1404 xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g 1405 add %rax,%r12 # T1+=h 1406 1407 mov %rbx,%rax 1408 add %r13,%r12 # T1+=Sigma1(e) 1409 1410 add %r15,%r12 # T1+=Ch(e,f,g) 1411 mov %rbx,%r13 1412 mov %rbx,%r14 1413 1414 ror $28,%rax 1415 ror $34,%r13 1416 mov %rbx,%r15 1417 add (%rbp,%rdi,8),%r12 # T1+=K[round] 1418 1419 xor %r13,%rax 1420 ror $5,%r13 1421 or %rdx,%r14 # a|c 1422 1423 xor %r13,%rax # h=Sigma0(a) 1424 and %rdx,%r15 # a&c 1425 add %r12,%r8 # d+=T1 1426 1427 and %rcx,%r14 # (a|c)&b 1428 add %r12,%rax # h+=T1 1429 1430 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1431 lea 1(%rdi),%rdi # round++ 1432 1433 add %r14,%rax # h+=Maj(a,b,c) 1434 mov 72(%rsp),%r13 1435 mov 48(%rsp),%r12 1436 1437 mov %r13,%r15 1438 1439 shr $7,%r13 1440 ror $1,%r15 1441 1442 xor %r15,%r13 1443 ror $7,%r15 1444 1445 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 1446 mov %r12,%r14 1447 1448 shr $6,%r12 1449 ror $19,%r14 1450 1451 xor %r14,%r12 1452 ror $42,%r14 1453 1454 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 1455 1456 add %r13,%r12 1457 1458 add 8(%rsp),%r12 1459 1460 add 64(%rsp),%r12 1461 mov %r8,%r13 1462 mov %r8,%r14 1463 mov %r9,%r15 1464 1465 ror $14,%r13 1466 ror $18,%r14 1467 xor %r10,%r15 # f^g 1468 1469 xor %r14,%r13 1470 ror $23,%r14 1471 and %r8,%r15 # (f^g)&e 1472 mov %r12,64(%rsp) 1473 1474 xor %r14,%r13 # Sigma1(e) 1475 xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g 1476 add %r11,%r12 # T1+=h 1477 1478 mov %rax,%r11 1479 add %r13,%r12 # T1+=Sigma1(e) 1480 1481 add %r15,%r12 # T1+=Ch(e,f,g) 1482 mov %rax,%r13 1483 mov %rax,%r14 1484 1485 ror $28,%r11 1486 ror $34,%r13 1487 mov %rax,%r15 1488 add (%rbp,%rdi,8),%r12 # T1+=K[round] 1489 1490 xor %r13,%r11 1491 ror $5,%r13 1492 or %rcx,%r14 # a|c 1493 1494 xor %r13,%r11 # h=Sigma0(a) 1495 and %rcx,%r15 # a&c 1496 add %r12,%rdx # d+=T1 1497 1498 and %rbx,%r14 # (a|c)&b 1499 add %r12,%r11 # h+=T1 1500 1501 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1502 lea 1(%rdi),%rdi # round++ 1503 1504 add %r14,%r11 # h+=Maj(a,b,c) 1505 mov 80(%rsp),%r13 1506 mov 56(%rsp),%r12 1507 1508 mov %r13,%r15 1509 1510 shr $7,%r13 1511 ror $1,%r15 1512 1513 xor %r15,%r13 1514 ror $7,%r15 1515 1516 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 1517 mov %r12,%r14 1518 1519 shr $6,%r12 1520 ror $19,%r14 1521 1522 xor %r14,%r12 1523 ror $42,%r14 1524 1525 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 1526 1527 add %r13,%r12 1528 1529 add 16(%rsp),%r12 1530 1531 add 72(%rsp),%r12 1532 mov %rdx,%r13 1533 mov %rdx,%r14 1534 mov %r8,%r15 1535 1536 ror $14,%r13 1537 ror $18,%r14 1538 xor %r9,%r15 # f^g 1539 1540 xor %r14,%r13 1541 ror $23,%r14 1542 and %rdx,%r15 # (f^g)&e 1543 mov %r12,72(%rsp) 1544 1545 xor %r14,%r13 # Sigma1(e) 1546 xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g 1547 add %r10,%r12 # T1+=h 1548 1549 mov %r11,%r10 1550 add %r13,%r12 # T1+=Sigma1(e) 1551 1552 add %r15,%r12 # T1+=Ch(e,f,g) 1553 mov %r11,%r13 1554 mov %r11,%r14 1555 1556 ror $28,%r10 1557 ror $34,%r13 1558 mov %r11,%r15 1559 add (%rbp,%rdi,8),%r12 # T1+=K[round] 1560 1561 xor %r13,%r10 1562 ror $5,%r13 1563 or %rbx,%r14 # a|c 1564 1565 xor %r13,%r10 # h=Sigma0(a) 1566 and %rbx,%r15 # a&c 1567 add %r12,%rcx # d+=T1 1568 1569 and %rax,%r14 # (a|c)&b 1570 add %r12,%r10 # h+=T1 1571 1572 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1573 lea 1(%rdi),%rdi # round++ 1574 1575 add %r14,%r10 # h+=Maj(a,b,c) 1576 mov 88(%rsp),%r13 1577 mov 64(%rsp),%r12 1578 1579 mov %r13,%r15 1580 1581 shr $7,%r13 1582 ror $1,%r15 1583 1584 xor %r15,%r13 1585 ror $7,%r15 1586 1587 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 1588 mov %r12,%r14 1589 1590 shr $6,%r12 1591 ror $19,%r14 1592 1593 xor %r14,%r12 1594 ror $42,%r14 1595 1596 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 1597 1598 add %r13,%r12 1599 1600 add 24(%rsp),%r12 1601 1602 add 80(%rsp),%r12 1603 mov %rcx,%r13 1604 mov %rcx,%r14 1605 mov %rdx,%r15 1606 1607 ror $14,%r13 1608 ror $18,%r14 1609 xor %r8,%r15 # f^g 1610 1611 xor %r14,%r13 1612 ror $23,%r14 1613 and %rcx,%r15 # (f^g)&e 1614 mov %r12,80(%rsp) 1615 1616 xor %r14,%r13 # Sigma1(e) 1617 xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g 1618 add %r9,%r12 # T1+=h 1619 1620 mov %r10,%r9 1621 add %r13,%r12 # T1+=Sigma1(e) 1622 1623 add %r15,%r12 # T1+=Ch(e,f,g) 1624 mov %r10,%r13 1625 mov %r10,%r14 1626 1627 ror $28,%r9 1628 ror $34,%r13 1629 mov %r10,%r15 1630 add (%rbp,%rdi,8),%r12 # T1+=K[round] 1631 1632 xor %r13,%r9 1633 ror $5,%r13 1634 or %rax,%r14 # a|c 1635 1636 xor %r13,%r9 # h=Sigma0(a) 1637 and %rax,%r15 # a&c 1638 add %r12,%rbx # d+=T1 1639 1640 and %r11,%r14 # (a|c)&b 1641 add %r12,%r9 # h+=T1 1642 1643 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1644 lea 1(%rdi),%rdi # round++ 1645 1646 add %r14,%r9 # h+=Maj(a,b,c) 1647 mov 96(%rsp),%r13 1648 mov 72(%rsp),%r12 1649 1650 mov %r13,%r15 1651 1652 shr $7,%r13 1653 ror $1,%r15 1654 1655 xor %r15,%r13 1656 ror $7,%r15 1657 1658 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 1659 mov %r12,%r14 1660 1661 shr $6,%r12 1662 ror $19,%r14 1663 1664 xor %r14,%r12 1665 ror $42,%r14 1666 1667 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 1668 1669 add %r13,%r12 1670 1671 add 32(%rsp),%r12 1672 1673 add 88(%rsp),%r12 1674 mov %rbx,%r13 1675 mov %rbx,%r14 1676 mov %rcx,%r15 1677 1678 ror $14,%r13 1679 ror $18,%r14 1680 xor %rdx,%r15 # f^g 1681 1682 xor %r14,%r13 1683 ror $23,%r14 1684 and %rbx,%r15 # (f^g)&e 1685 mov %r12,88(%rsp) 1686 1687 xor %r14,%r13 # Sigma1(e) 1688 xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g 1689 add %r8,%r12 # T1+=h 1690 1691 mov %r9,%r8 1692 add %r13,%r12 # T1+=Sigma1(e) 1693 1694 add %r15,%r12 # T1+=Ch(e,f,g) 1695 mov %r9,%r13 1696 mov %r9,%r14 1697 1698 ror $28,%r8 1699 ror $34,%r13 1700 mov %r9,%r15 1701 add (%rbp,%rdi,8),%r12 # T1+=K[round] 1702 1703 xor %r13,%r8 1704 ror $5,%r13 1705 or %r11,%r14 # a|c 1706 1707 xor %r13,%r8 # h=Sigma0(a) 1708 and %r11,%r15 # a&c 1709 add %r12,%rax # d+=T1 1710 1711 and %r10,%r14 # (a|c)&b 1712 add %r12,%r8 # h+=T1 1713 1714 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1715 lea 1(%rdi),%rdi # round++ 1716 1717 add %r14,%r8 # h+=Maj(a,b,c) 1718 mov 104(%rsp),%r13 1719 mov 80(%rsp),%r12 1720 1721 mov %r13,%r15 1722 1723 shr $7,%r13 1724 ror $1,%r15 1725 1726 xor %r15,%r13 1727 ror $7,%r15 1728 1729 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 1730 mov %r12,%r14 1731 1732 shr $6,%r12 1733 ror $19,%r14 1734 1735 xor %r14,%r12 1736 ror $42,%r14 1737 1738 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 1739 1740 add %r13,%r12 1741 1742 add 40(%rsp),%r12 1743 1744 add 96(%rsp),%r12 1745 mov %rax,%r13 1746 mov %rax,%r14 1747 mov %rbx,%r15 1748 1749 ror $14,%r13 1750 ror $18,%r14 1751 xor %rcx,%r15 # f^g 1752 1753 xor %r14,%r13 1754 ror $23,%r14 1755 and %rax,%r15 # (f^g)&e 1756 mov %r12,96(%rsp) 1757 1758 xor %r14,%r13 # Sigma1(e) 1759 xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g 1760 add %rdx,%r12 # T1+=h 1761 1762 mov %r8,%rdx 1763 add %r13,%r12 # T1+=Sigma1(e) 1764 1765 add %r15,%r12 # T1+=Ch(e,f,g) 1766 mov %r8,%r13 1767 mov %r8,%r14 1768 1769 ror $28,%rdx 1770 ror $34,%r13 1771 mov %r8,%r15 1772 add (%rbp,%rdi,8),%r12 # T1+=K[round] 1773 1774 xor %r13,%rdx 1775 ror $5,%r13 1776 or %r10,%r14 # a|c 1777 1778 xor %r13,%rdx # h=Sigma0(a) 1779 and %r10,%r15 # a&c 1780 add %r12,%r11 # d+=T1 1781 1782 and %r9,%r14 # (a|c)&b 1783 add %r12,%rdx # h+=T1 1784 1785 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1786 lea 1(%rdi),%rdi # round++ 1787 1788 add %r14,%rdx # h+=Maj(a,b,c) 1789 mov 112(%rsp),%r13 1790 mov 88(%rsp),%r12 1791 1792 mov %r13,%r15 1793 1794 shr $7,%r13 1795 ror $1,%r15 1796 1797 xor %r15,%r13 1798 ror $7,%r15 1799 1800 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 1801 mov %r12,%r14 1802 1803 shr $6,%r12 1804 ror $19,%r14 1805 1806 xor %r14,%r12 1807 ror $42,%r14 1808 1809 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 1810 1811 add %r13,%r12 1812 1813 add 48(%rsp),%r12 1814 1815 add 104(%rsp),%r12 1816 mov %r11,%r13 1817 mov %r11,%r14 1818 mov %rax,%r15 1819 1820 ror $14,%r13 1821 ror $18,%r14 1822 xor %rbx,%r15 # f^g 1823 1824 xor %r14,%r13 1825 ror $23,%r14 1826 and %r11,%r15 # (f^g)&e 1827 mov %r12,104(%rsp) 1828 1829 xor %r14,%r13 # Sigma1(e) 1830 xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g 1831 add %rcx,%r12 # T1+=h 1832 1833 mov %rdx,%rcx 1834 add %r13,%r12 # T1+=Sigma1(e) 1835 1836 add %r15,%r12 # T1+=Ch(e,f,g) 1837 mov %rdx,%r13 1838 mov %rdx,%r14 1839 1840 ror $28,%rcx 1841 ror $34,%r13 1842 mov %rdx,%r15 1843 add (%rbp,%rdi,8),%r12 # T1+=K[round] 1844 1845 xor %r13,%rcx 1846 ror $5,%r13 1847 or %r9,%r14 # a|c 1848 1849 xor %r13,%rcx # h=Sigma0(a) 1850 and %r9,%r15 # a&c 1851 add %r12,%r10 # d+=T1 1852 1853 and %r8,%r14 # (a|c)&b 1854 add %r12,%rcx # h+=T1 1855 1856 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1857 lea 1(%rdi),%rdi # round++ 1858 1859 add %r14,%rcx # h+=Maj(a,b,c) 1860 mov 120(%rsp),%r13 1861 mov 96(%rsp),%r12 1862 1863 mov %r13,%r15 1864 1865 shr $7,%r13 1866 ror $1,%r15 1867 1868 xor %r15,%r13 1869 ror $7,%r15 1870 1871 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 1872 mov %r12,%r14 1873 1874 shr $6,%r12 1875 ror $19,%r14 1876 1877 xor %r14,%r12 1878 ror $42,%r14 1879 1880 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 1881 1882 add %r13,%r12 1883 1884 add 56(%rsp),%r12 1885 1886 add 112(%rsp),%r12 1887 mov %r10,%r13 1888 mov %r10,%r14 1889 mov %r11,%r15 1890 1891 ror $14,%r13 1892 ror $18,%r14 1893 xor %rax,%r15 # f^g 1894 1895 xor %r14,%r13 1896 ror $23,%r14 1897 and %r10,%r15 # (f^g)&e 1898 mov %r12,112(%rsp) 1899 1900 xor %r14,%r13 # Sigma1(e) 1901 xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g 1902 add %rbx,%r12 # T1+=h 1903 1904 mov %rcx,%rbx 1905 add %r13,%r12 # T1+=Sigma1(e) 1906 1907 add %r15,%r12 # T1+=Ch(e,f,g) 1908 mov %rcx,%r13 1909 mov %rcx,%r14 1910 1911 ror $28,%rbx 1912 ror $34,%r13 1913 mov %rcx,%r15 1914 add (%rbp,%rdi,8),%r12 # T1+=K[round] 1915 1916 xor %r13,%rbx 1917 ror $5,%r13 1918 or %r8,%r14 # a|c 1919 1920 xor %r13,%rbx # h=Sigma0(a) 1921 and %r8,%r15 # a&c 1922 add %r12,%r9 # d+=T1 1923 1924 and %rdx,%r14 # (a|c)&b 1925 add %r12,%rbx # h+=T1 1926 1927 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1928 lea 1(%rdi),%rdi # round++ 1929 1930 add %r14,%rbx # h+=Maj(a,b,c) 1931 mov 0(%rsp),%r13 1932 mov 104(%rsp),%r12 1933 1934 mov %r13,%r15 1935 1936 shr $7,%r13 1937 ror $1,%r15 1938 1939 xor %r15,%r13 1940 ror $7,%r15 1941 1942 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) 1943 mov %r12,%r14 1944 1945 shr $6,%r12 1946 ror $19,%r14 1947 1948 xor %r14,%r12 1949 ror $42,%r14 1950 1951 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) 1952 1953 add %r13,%r12 1954 1955 add 64(%rsp),%r12 1956 1957 add 120(%rsp),%r12 1958 mov %r9,%r13 1959 mov %r9,%r14 1960 mov %r10,%r15 1961 1962 ror $14,%r13 1963 ror $18,%r14 1964 xor %r11,%r15 # f^g 1965 1966 xor %r14,%r13 1967 ror $23,%r14 1968 and %r9,%r15 # (f^g)&e 1969 mov %r12,120(%rsp) 1970 1971 xor %r14,%r13 # Sigma1(e) 1972 xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g 1973 add %rax,%r12 # T1+=h 1974 1975 mov %rbx,%rax 1976 add %r13,%r12 # T1+=Sigma1(e) 1977 1978 add %r15,%r12 # T1+=Ch(e,f,g) 1979 mov %rbx,%r13 1980 mov %rbx,%r14 1981 1982 ror $28,%rax 1983 ror $34,%r13 1984 mov %rbx,%r15 1985 add (%rbp,%rdi,8),%r12 # T1+=K[round] 1986 1987 xor %r13,%rax 1988 ror $5,%r13 1989 or %rdx,%r14 # a|c 1990 1991 xor %r13,%rax # h=Sigma0(a) 1992 and %rdx,%r15 # a&c 1993 add %r12,%r8 # d+=T1 1994 1995 and %rcx,%r14 # (a|c)&b 1996 add %r12,%rax # h+=T1 1997 1998 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) 1999 lea 1(%rdi),%rdi # round++ 2000 2001 add %r14,%rax # h+=Maj(a,b,c) 2002 cmp $80,%rdi 2003 jb .Lrounds_16_xx 2004 2005 mov 16*8+0*8(%rsp),%rdi 2006 lea 16*8(%rsi),%rsi 2007 2008 add 8*0(%rdi),%rax 2009 add 8*1(%rdi),%rbx 2010 add 8*2(%rdi),%rcx 2011 add 8*3(%rdi),%rdx 2012 add 8*4(%rdi),%r8 2013 add 8*5(%rdi),%r9 2014 add 8*6(%rdi),%r10 2015 add 8*7(%rdi),%r11 2016 2017 cmp 16*8+2*8(%rsp),%rsi 2018 2019 mov %rax,8*0(%rdi) 2020 mov %rbx,8*1(%rdi) 2021 mov %rcx,8*2(%rdi) 2022 mov %rdx,8*3(%rdi) 2023 mov %r8,8*4(%rdi) 2024 mov %r9,8*5(%rdi) 2025 mov %r10,8*6(%rdi) 2026 mov %r11,8*7(%rdi) 2027 jb .Lloop 2028 2029 mov 16*8+3*8(%rsp),%rsp 2030 pop %r15 2031 pop %r14 2032 pop %r13 2033 pop %r12 2034 pop %rbp 2035 pop %rbx 2036 2037 ret 2038SET_SIZE(SHA512TransformBlocks) 2039 2040.data 2041.align 64 2042.type K512,@object 2043K512: 2044 .quad 0x428a2f98d728ae22,0x7137449123ef65cd 2045 .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 2046 .quad 0x3956c25bf348b538,0x59f111f1b605d019 2047 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 2048 .quad 0xd807aa98a3030242,0x12835b0145706fbe 2049 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 2050 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 2051 .quad 0x9bdc06a725c71235,0xc19bf174cf692694 2052 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 2053 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 2054 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 2055 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 2056 .quad 0x983e5152ee66dfab,0xa831c66d2db43210 2057 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 2058 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 2059 .quad 0x06ca6351e003826f,0x142929670a0e6e70 2060 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 2061 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 2062 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 2063 .quad 0x81c2c92e47edaee6,0x92722c851482353b 2064 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 2065 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 2066 .quad 0xd192e819d6ef5218,0xd69906245565a910 2067 .quad 0xf40e35855771202a,0x106aa07032bbd1b8 2068 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 2069 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 2070 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 2071 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 2072 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 2073 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 2074 .quad 0x90befffa23631e28,0xa4506cebde82bde9 2075 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 2076 .quad 0xca273eceea26619c,0xd186b8c721c0c207 2077 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 2078 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 2079 .quad 0x113f9804bef90dae,0x1b710b35131c471b 2080 .quad 0x28db77f523047d84,0x32caab7b40c72493 2081 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 2082 .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 2083 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 2084#endif /* !lint && !__lint */ 2085 2086#ifdef __ELF__ 2087.section .note.GNU-stack,"",%progbits 2088#endif 2089