1! des_enc.m4 2! des_enc.S (generated from des_enc.m4) 3! 4! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file. 5! 6! Version 1.0. 32-bit version. 7! 8! June 8, 2000. 9! 10! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation 11! by Andy Polyakov. 12! 13! January 1, 2003. 14! 15! Assembler version: Copyright Svend Olaf Mikkelsen. 16! 17! Original C code: Copyright Eric A. Young. 18! 19! This code can be freely used by LibDES/SSLeay/OpenSSL users. 20! 21! The LibDES/SSLeay/OpenSSL copyright notices must be respected. 22! 23! This version can be redistributed. 24! 25! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S 26! 27! Global registers 1 to 5 are used. This is the same as done by the 28! cc compiler. The UltraSPARC load/store little endian feature is used. 29! 30! Instruction grouping often refers to one CPU cycle. 31! 32! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S 33! 34! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S 35! 36! Performance improvement according to './apps/openssl speed des' 37! 38! 32-bit build: 39! 23% faster than cc-5.2 -xarch=v8plus -xO5 40! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5 41! 64-bit build: 42! 50% faster than cc-5.2 -xarch=v9 -xO5 43! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5 44! 45 46.ident "des_enc.m4 2.1" 47.file "des_enc-sparc.S" 48 49#include <openssl/opensslconf.h> 50 51#if defined(__SUNPRO_C) && defined(__sparcv9) 52# define ABI64 /* They've said -xarch=v9 at command line */ 53#elif defined(__GNUC__) && defined(__arch64__) 54# define ABI64 /* They've said -m64 at command line */ 55#endif 56 57#ifdef ABI64 58 .register %g2,#scratch 59 .register %g3,#scratch 60# define FRAME -192 61# define BIAS 2047 62# define LDPTR ldx 63# define STPTR stx 64# define ARG0 128 65# define ARGSZ 8 66# ifndef OPENSSL_SYSNAME_ULTRASPARC 67# define OPENSSL_SYSNAME_ULTRASPARC 68# endif 69#else 70# define FRAME -96 71# define BIAS 0 72# define LDPTR ld 73# define STPTR st 74# define ARG0 68 75# define ARGSZ 4 76#endif 77 78#define LOOPS 7 79 80#define global0 %g0 81#define global1 %g1 82#define global2 %g2 83#define global3 %g3 84#define global4 %g4 85#define global5 %g5 86 87#define local0 %l0 88#define local1 %l1 89#define local2 %l2 90#define local3 %l3 91#define local4 %l4 92#define local5 %l5 93#define local7 %l6 94#define local6 %l7 95 96#define in0 %i0 97#define in1 %i1 98#define in2 %i2 99#define in3 %i3 100#define in4 %i4 101#define in5 %i5 102#define in6 %i6 103#define in7 %i7 104 105#define out0 %o0 106#define out1 %o1 107#define out2 %o2 108#define out3 %o3 109#define out4 %o4 110#define out5 %o5 111#define out6 %o6 112#define out7 %o7 113 114#define stub stb 115 116changequote({,}) 117 118 119! Macro definitions: 120 121 122! {ip_macro} 123! 124! The logic used in initial and final permutations is the same as in 125! the C code. The permutations are done with a clever shift, xor, and 126! technique. 127! 128! The macro also loads address sbox 1 to 5 to global 1 to 5, address 129! sbox 6 to local6, and addres sbox 8 to out3. 130! 131! Rotates the halfs 3 left to bring the sbox bits in convenient positions. 132! 133! Loads key first round from address in parameter 5 to out0, out1. 134! 135! After the the original LibDES initial permutation, the resulting left 136! is in the variable initially used for right and vice versa. The macro 137! implements the possibility to keep the halfs in the original registers. 138! 139! parameter 1 left 140! parameter 2 right 141! parameter 3 result left (modify in first round) 142! parameter 4 result right (use in first round) 143! parameter 5 key address 144! parameter 6 1/2 for include encryption/decryption 145! parameter 7 1 for move in1 to in3 146! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 147! parameter 9 1 for load ks3 and ks2 to in4 and in3 148 149define(ip_macro, { 150 151! {ip_macro} 152! $1 $2 $4 $3 $5 $6 $7 $8 $9 153 154 ld [out2+256], local1 155 srl $2, 4, local4 156 157 xor local4, $1, local4 158 ifelse($7,1,{mov in1, in3},{nop}) 159 160 ld [out2+260], local2 161 and local4, local1, local4 162 ifelse($8,1,{mov in3, in4},{}) 163 ifelse($8,2,{mov in4, in3},{}) 164 165 ld [out2+280], out4 ! loop counter 166 sll local4, 4, local1 167 xor $1, local4, $1 168 169 ld [out2+264], local3 170 srl $1, 16, local4 171 xor $2, local1, $2 172 173 ifelse($9,1,{LDPTR KS3, in4},{}) 174 xor local4, $2, local4 175 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr 176 177 ifelse($9,1,{LDPTR KS2, in3},{}) 178 and local4, local2, local4 179 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr 180 181 sll local4, 16, local1 182 xor $2, local4, $2 183 184 srl $2, 2, local4 185 xor $1, local1, $1 186 187 sethi %hi(16711680), local5 188 xor local4, $1, local4 189 190 and local4, local3, local4 191 or local5, 255, local5 192 193 sll local4, 2, local2 194 xor $1, local4, $1 195 196 srl $1, 8, local4 197 xor $2, local2, $2 198 199 xor local4, $2, local4 200 add global1, 768, global4 201 202 and local4, local5, local4 203 add global1, 1024, global5 204 205 ld [out2+272], local7 206 sll local4, 8, local1 207 xor $2, local4, $2 208 209 srl $2, 1, local4 210 xor $1, local1, $1 211 212 ld [$5], out0 ! key 7531 213 xor local4, $1, local4 214 add global1, 256, global2 215 216 ld [$5+4], out1 ! key 8642 217 and local4, local7, local4 218 add global1, 512, global3 219 220 sll local4, 1, local1 221 xor $1, local4, $1 222 223 sll $1, 3, local3 224 xor $2, local1, $2 225 226 sll $2, 3, local2 227 add global1, 1280, local6 ! address sbox 8 228 229 srl $1, 29, local4 230 add global1, 1792, out3 ! address sbox 8 231 232 srl $2, 29, local1 233 or local4, local3, $4 234 235 or local2, local1, $3 236 237 ifelse($6, 1, { 238 239 ld [out2+284], local5 ! 0x0000FC00 used in the rounds 240 or local2, local1, $3 241 xor $4, out0, local1 242 243 call .des_enc.1 244 and local1, 252, local1 245 246 },{}) 247 248 ifelse($6, 2, { 249 250 ld [out2+284], local5 ! 0x0000FC00 used in the rounds 251 or local2, local1, $3 252 xor $4, out0, local1 253 254 call .des_dec.1 255 and local1, 252, local1 256 257 },{}) 258}) 259 260 261! {rounds_macro} 262! 263! The logic used in the DES rounds is the same as in the C code, 264! except that calculations for sbox 1 and sbox 5 begin before 265! the previous round is finished. 266! 267! In each round one half (work) is modified based on key and the 268! other half (use). 269! 270! In this version we do two rounds in a loop repeated 7 times 271! and two rounds seperately. 272! 273! One half has the bits for the sboxes in the following positions: 274! 275! 777777xx555555xx333333xx111111xx 276! 277! 88xx666666xx444444xx222222xx8888 278! 279! The bits for each sbox are xor-ed with the key bits for that box. 280! The above xx bits are cleared, and the result used for lookup in 281! the sbox table. Each sbox entry contains the 4 output bits permuted 282! into 32 bits according to the P permutation. 283! 284! In the description of DES, left and right are switched after 285! each round, except after last round. In this code the original 286! left and right are kept in the same register in all rounds, meaning 287! that after the 16 rounds the result for right is in the register 288! originally used for left. 289! 290! parameter 1 first work (left in first round) 291! parameter 2 first use (right in first round) 292! parameter 3 enc/dec 1/-1 293! parameter 4 loop label 294! parameter 5 key address register 295! parameter 6 optional address for key next encryption/decryption 296! parameter 7 not empty for include retl 297! 298! also compares in2 to 8 299 300define(rounds_macro, { 301 302! {rounds_macro} 303! $1 $2 $3 $4 $5 $6 $7 $8 $9 304 305 xor $2, out0, local1 306 307 ld [out2+284], local5 ! 0x0000FC00 308 ba $4 309 and local1, 252, local1 310 311 .align 32 312 313$4: 314 ! local6 is address sbox 6 315 ! out3 is address sbox 8 316 ! out4 is loop counter 317 318 ld [global1+local1], local1 319 xor $2, out1, out1 ! 8642 320 xor $2, out0, out0 ! 7531 321 ! fmovs %f0, %f0 ! fxor used for alignment 322 323 srl out1, 4, local0 ! rotate 4 right 324 and out0, local5, local3 ! 3 325 ! fmovs %f0, %f0 326 327 ld [$5+$3*8], local7 ! key 7531 next round 328 srl local3, 8, local3 ! 3 329 and local0, 252, local2 ! 2 330 ! fmovs %f0, %f0 331 332 ld [global3+local3],local3 ! 3 333 sll out1, 28, out1 ! rotate 334 xor $1, local1, $1 ! 1 finished, local1 now sbox 7 335 336 ld [global2+local2], local2 ! 2 337 srl out0, 24, local1 ! 7 338 or out1, local0, out1 ! rotate 339 340 ldub [out2+local1], local1 ! 7 (and 0xFC) 341 srl out1, 24, local0 ! 8 342 and out1, local5, local4 ! 4 343 344 ldub [out2+local0], local0 ! 8 (and 0xFC) 345 srl local4, 8, local4 ! 4 346 xor $1, local2, $1 ! 2 finished local2 now sbox 6 347 348 ld [global4+local4],local4 ! 4 349 srl out1, 16, local2 ! 6 350 xor $1, local3, $1 ! 3 finished local3 now sbox 5 351 352 ld [out3+local0],local0 ! 8 353 and local2, 252, local2 ! 6 354 add global1, 1536, local5 ! address sbox 7 355 356 ld [local6+local2], local2 ! 6 357 srl out0, 16, local3 ! 5 358 xor $1, local4, $1 ! 4 finished 359 360 ld [local5+local1],local1 ! 7 361 and local3, 252, local3 ! 5 362 xor $1, local0, $1 ! 8 finished 363 364 ld [global5+local3],local3 ! 5 365 xor $1, local2, $1 ! 6 finished 366 subcc out4, 1, out4 367 368 ld [$5+$3*8+4], out0 ! key 8642 next round 369 xor $1, local7, local2 ! sbox 5 next round 370 xor $1, local1, $1 ! 7 finished 371 372 srl local2, 16, local2 ! sbox 5 next round 373 xor $1, local3, $1 ! 5 finished 374 375 ld [$5+$3*16+4], out1 ! key 8642 next round again 376 and local2, 252, local2 ! sbox5 next round 377! next round 378 xor $1, local7, local7 ! 7531 379 380 ld [global5+local2], local2 ! 5 381 srl local7, 24, local3 ! 7 382 xor $1, out0, out0 ! 8642 383 384 ldub [out2+local3], local3 ! 7 (and 0xFC) 385 srl out0, 4, local0 ! rotate 4 right 386 and local7, 252, local1 ! 1 387 388 sll out0, 28, out0 ! rotate 389 xor $2, local2, $2 ! 5 finished local2 used 390 391 srl local0, 8, local4 ! 4 392 and local0, 252, local2 ! 2 393 ld [local5+local3], local3 ! 7 394 395 srl local0, 16, local5 ! 6 396 or out0, local0, out0 ! rotate 397 ld [global2+local2], local2 ! 2 398 399 srl out0, 24, local0 400 ld [$5+$3*16], out0 ! key 7531 next round 401 and local4, 252, local4 ! 4 402 403 and local5, 252, local5 ! 6 404 ld [global4+local4], local4 ! 4 405 xor $2, local3, $2 ! 7 finished local3 used 406 407 and local0, 252, local0 ! 8 408 ld [local6+local5], local5 ! 6 409 xor $2, local2, $2 ! 2 finished local2 now sbox 3 410 411 srl local7, 8, local2 ! 3 start 412 ld [out3+local0], local0 ! 8 413 xor $2, local4, $2 ! 4 finished 414 415 and local2, 252, local2 ! 3 416 ld [global1+local1], local1 ! 1 417 xor $2, local5, $2 ! 6 finished local5 used 418 419 ld [global3+local2], local2 ! 3 420 xor $2, local0, $2 ! 8 finished 421 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer 422 423 ld [out2+284], local5 ! 0x0000FC00 424 xor $2, out0, local4 ! sbox 1 next round 425 xor $2, local1, $2 ! 1 finished 426 427 xor $2, local2, $2 ! 3 finished 428#ifdef OPENSSL_SYSNAME_ULTRASPARC 429 bne,pt %icc, $4 430#else 431 bne $4 432#endif 433 and local4, 252, local1 ! sbox 1 next round 434 435! two rounds more: 436 437 ld [global1+local1], local1 438 xor $2, out1, out1 439 xor $2, out0, out0 440 441 srl out1, 4, local0 ! rotate 442 and out0, local5, local3 443 444 ld [$5+$3*8], local7 ! key 7531 445 srl local3, 8, local3 446 and local0, 252, local2 447 448 ld [global3+local3],local3 449 sll out1, 28, out1 ! rotate 450 xor $1, local1, $1 ! 1 finished, local1 now sbox 7 451 452 ld [global2+local2], local2 453 srl out0, 24, local1 454 or out1, local0, out1 ! rotate 455 456 ldub [out2+local1], local1 457 srl out1, 24, local0 458 and out1, local5, local4 459 460 ldub [out2+local0], local0 461 srl local4, 8, local4 462 xor $1, local2, $1 ! 2 finished local2 now sbox 6 463 464 ld [global4+local4],local4 465 srl out1, 16, local2 466 xor $1, local3, $1 ! 3 finished local3 now sbox 5 467 468 ld [out3+local0],local0 469 and local2, 252, local2 470 add global1, 1536, local5 ! address sbox 7 471 472 ld [local6+local2], local2 473 srl out0, 16, local3 474 xor $1, local4, $1 ! 4 finished 475 476 ld [local5+local1],local1 477 and local3, 252, local3 478 xor $1, local0, $1 479 480 ld [global5+local3],local3 481 xor $1, local2, $1 ! 6 finished 482 cmp in2, 8 483 484 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter 485 xor $1, local7, local2 ! sbox 5 next round 486 xor $1, local1, $1 ! 7 finished 487 488 ld [$5+$3*8+4], out0 489 srl local2, 16, local2 ! sbox 5 next round 490 xor $1, local3, $1 ! 5 finished 491 492 and local2, 252, local2 493! next round (two rounds more) 494 xor $1, local7, local7 ! 7531 495 496 ld [global5+local2], local2 497 srl local7, 24, local3 498 xor $1, out0, out0 ! 8642 499 500 ldub [out2+local3], local3 501 srl out0, 4, local0 ! rotate 502 and local7, 252, local1 503 504 sll out0, 28, out0 ! rotate 505 xor $2, local2, $2 ! 5 finished local2 used 506 507 srl local0, 8, local4 508 and local0, 252, local2 509 ld [local5+local3], local3 510 511 srl local0, 16, local5 512 or out0, local0, out0 ! rotate 513 ld [global2+local2], local2 514 515 srl out0, 24, local0 516 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption 517 and local4, 252, local4 518 519 and local5, 252, local5 520 ld [global4+local4], local4 521 xor $2, local3, $2 ! 7 finished local3 used 522 523 and local0, 252, local0 524 ld [local6+local5], local5 525 xor $2, local2, $2 ! 2 finished local2 now sbox 3 526 527 srl local7, 8, local2 ! 3 start 528 ld [out3+local0], local0 529 xor $2, local4, $2 530 531 and local2, 252, local2 532 ld [global1+local1], local1 533 xor $2, local5, $2 ! 6 finished local5 used 534 535 ld [global3+local2], local2 536 srl $1, 3, local3 537 xor $2, local0, $2 538 539 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption 540 sll $1, 29, local4 541 xor $2, local1, $2 542 543 ifelse($7,{}, {}, {retl}) 544 xor $2, local2, $2 545}) 546 547 548! {fp_macro} 549! 550! parameter 1 right (original left) 551! parameter 2 left (original right) 552! parameter 3 1 for optional store to [in0] 553! parameter 4 1 for load input/output address to local5/7 554! 555! The final permutation logic switches the halfes, meaning that 556! left and right ends up the the registers originally used. 557 558define(fp_macro, { 559 560! {fp_macro} 561! $1 $2 $3 $4 $5 $6 $7 $8 $9 562 563 ! initially undo the rotate 3 left done after initial permutation 564 ! original left is received shifted 3 right and 29 left in local3/4 565 566 sll $2, 29, local1 567 or local3, local4, $1 568 569 srl $2, 3, $2 570 sethi %hi(0x55555555), local2 571 572 or $2, local1, $2 573 or local2, %lo(0x55555555), local2 574 575 srl $2, 1, local3 576 sethi %hi(0x00ff00ff), local1 577 xor local3, $1, local3 578 or local1, %lo(0x00ff00ff), local1 579 and local3, local2, local3 580 sethi %hi(0x33333333), local4 581 sll local3, 1, local2 582 583 xor $1, local3, $1 584 585 srl $1, 8, local3 586 xor $2, local2, $2 587 xor local3, $2, local3 588 or local4, %lo(0x33333333), local4 589 and local3, local1, local3 590 sethi %hi(0x0000ffff), local1 591 sll local3, 8, local2 592 593 xor $2, local3, $2 594 595 srl $2, 2, local3 596 xor $1, local2, $1 597 xor local3, $1, local3 598 or local1, %lo(0x0000ffff), local1 599 and local3, local4, local3 600 sethi %hi(0x0f0f0f0f), local4 601 sll local3, 2, local2 602 603 ifelse($4,1, {LDPTR INPUT, local5}) 604 xor $1, local3, $1 605 606 ifelse($4,1, {LDPTR OUTPUT, local7}) 607 srl $1, 16, local3 608 xor $2, local2, $2 609 xor local3, $2, local3 610 or local4, %lo(0x0f0f0f0f), local4 611 and local3, local1, local3 612 sll local3, 16, local2 613 614 xor $2, local3, local1 615 616 srl local1, 4, local3 617 xor $1, local2, $1 618 xor local3, $1, local3 619 and local3, local4, local3 620 sll local3, 4, local2 621 622 xor $1, local3, $1 623 624 ! optional store: 625 626 ifelse($3,1, {st $1, [in0]}) 627 628 xor local1, local2, $2 629 630 ifelse($3,1, {st $2, [in0+4]}) 631 632}) 633 634 635! {fp_ip_macro} 636! 637! Does initial permutation for next block mixed with 638! final permutation for current block. 639! 640! parameter 1 original left 641! parameter 2 original right 642! parameter 3 left ip 643! parameter 4 right ip 644! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 645! 2: mov in4 to in3 646! 647! also adds -8 to length in2 and loads loop counter to out4 648 649define(fp_ip_macro, { 650 651! {fp_ip_macro} 652! $1 $2 $3 $4 $5 $6 $7 $8 $9 653 654 define({temp1},{out4}) 655 define({temp2},{local3}) 656 657 define({ip1},{local1}) 658 define({ip2},{local2}) 659 define({ip4},{local4}) 660 define({ip5},{local5}) 661 662 ! $1 in local3, local4 663 664 ld [out2+256], ip1 665 sll out5, 29, temp1 666 or local3, local4, $1 667 668 srl out5, 3, $2 669 ifelse($5,2,{mov in4, in3}) 670 671 ld [out2+272], ip5 672 srl $4, 4, local0 673 or $2, temp1, $2 674 675 srl $2, 1, temp1 676 xor temp1, $1, temp1 677 678 and temp1, ip5, temp1 679 xor local0, $3, local0 680 681 sll temp1, 1, temp2 682 xor $1, temp1, $1 683 684 and local0, ip1, local0 685 add in2, -8, in2 686 687 sll local0, 4, local7 688 xor $3, local0, $3 689 690 ld [out2+268], ip4 691 srl $1, 8, temp1 692 xor $2, temp2, $2 693 ld [out2+260], ip2 694 srl $3, 16, local0 695 xor $4, local7, $4 696 xor temp1, $2, temp1 697 xor local0, $4, local0 698 and temp1, ip4, temp1 699 and local0, ip2, local0 700 sll temp1, 8, temp2 701 xor $2, temp1, $2 702 sll local0, 16, local7 703 xor $4, local0, $4 704 705 srl $2, 2, temp1 706 xor $1, temp2, $1 707 708 ld [out2+264], temp2 ! ip3 709 srl $4, 2, local0 710 xor $3, local7, $3 711 xor temp1, $1, temp1 712 xor local0, $3, local0 713 and temp1, temp2, temp1 714 and local0, temp2, local0 715 sll temp1, 2, temp2 716 xor $1, temp1, $1 717 sll local0, 2, local7 718 xor $3, local0, $3 719 720 srl $1, 16, temp1 721 xor $2, temp2, $2 722 srl $3, 8, local0 723 xor $4, local7, $4 724 xor temp1, $2, temp1 725 xor local0, $4, local0 726 and temp1, ip2, temp1 727 and local0, ip4, local0 728 sll temp1, 16, temp2 729 xor $2, temp1, local4 730 sll local0, 8, local7 731 xor $4, local0, $4 732 733 srl $4, 1, local0 734 xor $3, local7, $3 735 736 srl local4, 4, temp1 737 xor local0, $3, local0 738 739 xor $1, temp2, $1 740 and local0, ip5, local0 741 742 sll local0, 1, local7 743 xor temp1, $1, temp1 744 745 xor $3, local0, $3 746 xor $4, local7, $4 747 748 sll $3, 3, local5 749 and temp1, ip1, temp1 750 751 sll temp1, 4, temp2 752 xor $1, temp1, $1 753 754 ifelse($5,1,{LDPTR KS2, in4}) 755 sll $4, 3, local2 756 xor local4, temp2, $2 757 758 ! reload since used as temporar: 759 760 ld [out2+280], out4 ! loop counter 761 762 srl $3, 29, local0 763 ifelse($5,1,{add in4, 120, in4}) 764 765 ifelse($5,1,{LDPTR KS1, in3}) 766 srl $4, 29, local7 767 768 or local0, local5, $4 769 or local2, local7, $3 770 771}) 772 773 774 775! {load_little_endian} 776! 777! parameter 1 address 778! parameter 2 destination left 779! parameter 3 destination right 780! parameter 4 temporar 781! parameter 5 label 782 783define(load_little_endian, { 784 785! {load_little_endian} 786! $1 $2 $3 $4 $5 $6 $7 $8 $9 787 788 ! first in memory to rightmost in register 789 790#ifdef OPENSSL_SYSNAME_ULTRASPARC 791 andcc $1, 3, global0 792 bne,pn %icc, $5 793 nop 794 795 lda [$1] 0x88, $2 796 add $1, 4, $4 797 798 ba,pt %icc, $5a 799 lda [$4] 0x88, $3 800#endif 801 802$5: 803 ldub [$1+3], $2 804 805 ldub [$1+2], $4 806 sll $2, 8, $2 807 or $2, $4, $2 808 809 ldub [$1+1], $4 810 sll $2, 8, $2 811 or $2, $4, $2 812 813 ldub [$1+0], $4 814 sll $2, 8, $2 815 or $2, $4, $2 816 817 818 ldub [$1+3+4], $3 819 820 ldub [$1+2+4], $4 821 sll $3, 8, $3 822 or $3, $4, $3 823 824 ldub [$1+1+4], $4 825 sll $3, 8, $3 826 or $3, $4, $3 827 828 ldub [$1+0+4], $4 829 sll $3, 8, $3 830 or $3, $4, $3 831$5a: 832 833}) 834 835 836! {load_little_endian_inc} 837! 838! parameter 1 address 839! parameter 2 destination left 840! parameter 3 destination right 841! parameter 4 temporar 842! parameter 4 label 843! 844! adds 8 to address 845 846define(load_little_endian_inc, { 847 848! {load_little_endian_inc} 849! $1 $2 $3 $4 $5 $6 $7 $8 $9 850 851 ! first in memory to rightmost in register 852 853#ifdef OPENSSL_SYSNAME_ULTRASPARC 854 andcc $1, 3, global0 855 bne,pn %icc, $5 856 nop 857 858 lda [$1] 0x88, $2 859 add $1, 4, $1 860 861 lda [$1] 0x88, $3 862 ba,pt %icc, $5a 863 add $1, 4, $1 864#endif 865 866$5: 867 ldub [$1+3], $2 868 869 ldub [$1+2], $4 870 sll $2, 8, $2 871 or $2, $4, $2 872 873 ldub [$1+1], $4 874 sll $2, 8, $2 875 or $2, $4, $2 876 877 ldub [$1+0], $4 878 sll $2, 8, $2 879 or $2, $4, $2 880 881 ldub [$1+3+4], $3 882 add $1, 8, $1 883 884 ldub [$1+2+4-8], $4 885 sll $3, 8, $3 886 or $3, $4, $3 887 888 ldub [$1+1+4-8], $4 889 sll $3, 8, $3 890 or $3, $4, $3 891 892 ldub [$1+0+4-8], $4 893 sll $3, 8, $3 894 or $3, $4, $3 895$5a: 896 897}) 898 899 900! {load_n_bytes} 901! 902! Loads 1 to 7 bytes little endian 903! Remaining bytes are zeroed. 904! 905! parameter 1 address 906! parameter 2 length 907! parameter 3 destination register left 908! parameter 4 destination register right 909! parameter 5 temp 910! parameter 6 temp2 911! parameter 7 label 912! parameter 8 return label 913 914define(load_n_bytes, { 915 916! {load_n_bytes} 917! $1 $2 $5 $6 $7 $8 $7 $8 $9 918 919$7.0: call .+8 920 sll $2, 2, $6 921 922 add %o7,$7.jmp.table-$7.0,$5 923 924 add $5, $6, $5 925 mov 0, $4 926 927 ld [$5], $5 928 929 jmp %o7+$5 930 mov 0, $3 931 932$7.7: 933 ldub [$1+6], $5 934 sll $5, 16, $5 935 or $3, $5, $3 936$7.6: 937 ldub [$1+5], $5 938 sll $5, 8, $5 939 or $3, $5, $3 940$7.5: 941 ldub [$1+4], $5 942 or $3, $5, $3 943$7.4: 944 ldub [$1+3], $5 945 sll $5, 24, $5 946 or $4, $5, $4 947$7.3: 948 ldub [$1+2], $5 949 sll $5, 16, $5 950 or $4, $5, $4 951$7.2: 952 ldub [$1+1], $5 953 sll $5, 8, $5 954 or $4, $5, $4 955$7.1: 956 ldub [$1+0], $5 957 ba $8 958 or $4, $5, $4 959 960 .align 4 961 962$7.jmp.table: 963 .word 0 964 .word $7.1-$7.0 965 .word $7.2-$7.0 966 .word $7.3-$7.0 967 .word $7.4-$7.0 968 .word $7.5-$7.0 969 .word $7.6-$7.0 970 .word $7.7-$7.0 971}) 972 973 974! {store_little_endian} 975! 976! parameter 1 address 977! parameter 2 source left 978! parameter 3 source right 979! parameter 4 temporar 980 981define(store_little_endian, { 982 983! {store_little_endian} 984! $1 $2 $3 $4 $5 $6 $7 $8 $9 985 986 ! rightmost in register to first in memory 987 988#ifdef OPENSSL_SYSNAME_ULTRASPARC 989 andcc $1, 3, global0 990 bne,pn %icc, $5 991 nop 992 993 sta $2, [$1] 0x88 994 add $1, 4, $4 995 996 ba,pt %icc, $5a 997 sta $3, [$4] 0x88 998#endif 999 1000$5: 1001 and $2, 255, $4 1002 stub $4, [$1+0] 1003 1004 srl $2, 8, $4 1005 and $4, 255, $4 1006 stub $4, [$1+1] 1007 1008 srl $2, 16, $4 1009 and $4, 255, $4 1010 stub $4, [$1+2] 1011 1012 srl $2, 24, $4 1013 stub $4, [$1+3] 1014 1015 1016 and $3, 255, $4 1017 stub $4, [$1+0+4] 1018 1019 srl $3, 8, $4 1020 and $4, 255, $4 1021 stub $4, [$1+1+4] 1022 1023 srl $3, 16, $4 1024 and $4, 255, $4 1025 stub $4, [$1+2+4] 1026 1027 srl $3, 24, $4 1028 stub $4, [$1+3+4] 1029 1030$5a: 1031 1032}) 1033 1034 1035! {store_n_bytes} 1036! 1037! Stores 1 to 7 bytes little endian 1038! 1039! parameter 1 address 1040! parameter 2 length 1041! parameter 3 source register left 1042! parameter 4 source register right 1043! parameter 5 temp 1044! parameter 6 temp2 1045! parameter 7 label 1046! parameter 8 return label 1047 1048define(store_n_bytes, { 1049 1050! {store_n_bytes} 1051! $1 $2 $5 $6 $7 $8 $7 $8 $9 1052 1053$7.0: call .+8 1054 sll $2, 2, $6 1055 1056 add %o7,$7.jmp.table-$7.0,$5 1057 1058 add $5, $6, $5 1059 1060 ld [$5], $5 1061 1062 jmp %o7+$5 1063 nop 1064 1065$7.7: 1066 srl $3, 16, $5 1067 and $5, 0xff, $5 1068 stub $5, [$1+6] 1069$7.6: 1070 srl $3, 8, $5 1071 and $5, 0xff, $5 1072 stub $5, [$1+5] 1073$7.5: 1074 and $3, 0xff, $5 1075 stub $5, [$1+4] 1076$7.4: 1077 srl $4, 24, $5 1078 stub $5, [$1+3] 1079$7.3: 1080 srl $4, 16, $5 1081 and $5, 0xff, $5 1082 stub $5, [$1+2] 1083$7.2: 1084 srl $4, 8, $5 1085 and $5, 0xff, $5 1086 stub $5, [$1+1] 1087$7.1: 1088 and $4, 0xff, $5 1089 1090 1091 ba $8 1092 stub $5, [$1] 1093 1094 .align 4 1095 1096$7.jmp.table: 1097 1098 .word 0 1099 .word $7.1-$7.0 1100 .word $7.2-$7.0 1101 .word $7.3-$7.0 1102 .word $7.4-$7.0 1103 .word $7.5-$7.0 1104 .word $7.6-$7.0 1105 .word $7.7-$7.0 1106}) 1107 1108 1109define(testvalue,{1}) 1110 1111define(register_init, { 1112 1113! For test purposes: 1114 1115 sethi %hi(testvalue), local0 1116 or local0, %lo(testvalue), local0 1117 1118 ifelse($1,{},{}, {mov local0, $1}) 1119 ifelse($2,{},{}, {mov local0, $2}) 1120 ifelse($3,{},{}, {mov local0, $3}) 1121 ifelse($4,{},{}, {mov local0, $4}) 1122 ifelse($5,{},{}, {mov local0, $5}) 1123 ifelse($6,{},{}, {mov local0, $6}) 1124 ifelse($7,{},{}, {mov local0, $7}) 1125 ifelse($8,{},{}, {mov local0, $8}) 1126 1127 mov local0, local1 1128 mov local0, local2 1129 mov local0, local3 1130 mov local0, local4 1131 mov local0, local5 1132 mov local0, local7 1133 mov local0, local6 1134 mov local0, out0 1135 mov local0, out1 1136 mov local0, out2 1137 mov local0, out3 1138 mov local0, out4 1139 mov local0, out5 1140 mov local0, global1 1141 mov local0, global2 1142 mov local0, global3 1143 mov local0, global4 1144 mov local0, global5 1145 1146}) 1147 1148.section ".text" 1149 1150 .align 32 1151 1152.des_enc: 1153 1154 ! key address in3 1155 ! loads key next encryption/decryption first round from [in4] 1156 1157 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl) 1158 1159 1160 .align 32 1161 1162.des_dec: 1163 1164 ! implemented with out5 as first parameter to avoid 1165 ! register exchange in ede modes 1166 1167 ! key address in4 1168 ! loads key next encryption/decryption first round from [in3] 1169 1170 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl) 1171 1172 1173 1174! void DES_encrypt1(data, ks, enc) 1175! ******************************* 1176 1177 .align 32 1178 .global DES_encrypt1 1179 .type DES_encrypt1,#function 1180 1181DES_encrypt1: 1182 1183 save %sp, FRAME, %sp 1184 1185 sethi %hi(.PIC.DES_SPtrans-1f),global1 1186 or global1,%lo(.PIC.DES_SPtrans-1f),global1 11871: call .+8 1188 add %o7,global1,global1 1189 sub global1,.PIC.DES_SPtrans-.des_and,out2 1190 1191 ld [in0], in5 ! left 1192 cmp in2, 0 ! enc 1193 1194#ifdef OPENSSL_SYSNAME_ULTRASPARC 1195 be,pn %icc, .encrypt.dec ! enc/dec 1196#else 1197 be .encrypt.dec 1198#endif 1199 ld [in0+4], out5 ! right 1200 1201 ! parameter 6 1/2 for include encryption/decryption 1202 ! parameter 7 1 for move in1 to in3 1203 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1204 1205 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1) 1206 1207 rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used 1208 1209 fp_macro(in5, out5, 1) ! 1 for store to [in0] 1210 1211 ret 1212 restore 1213 1214.encrypt.dec: 1215 1216 add in1, 120, in3 ! use last subkey for first round 1217 1218 ! parameter 6 1/2 for include encryption/decryption 1219 ! parameter 7 1 for move in1 to in3 1220 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1221 1222 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4 1223 1224 fp_macro(out5, in5, 1) ! 1 for store to [in0] 1225 1226 ret 1227 restore 1228 1229.DES_encrypt1.end: 1230 .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1 1231 1232 1233! void DES_encrypt2(data, ks, enc) 1234!********************************* 1235 1236 ! encrypts/decrypts without initial/final permutation 1237 1238 .align 32 1239 .global DES_encrypt2 1240 .type DES_encrypt2,#function 1241 1242DES_encrypt2: 1243 1244 save %sp, FRAME, %sp 1245 1246 sethi %hi(.PIC.DES_SPtrans-1f),global1 1247 or global1,%lo(.PIC.DES_SPtrans-1f),global1 12481: call .+8 1249 add %o7,global1,global1 1250 sub global1,.PIC.DES_SPtrans-.des_and,out2 1251 1252 ! Set sbox address 1 to 6 and rotate halfs 3 left 1253 ! Errors caught by destest? Yes. Still? *NO* 1254 1255 !sethi %hi(DES_SPtrans), global1 ! address sbox 1 1256 1257 !or global1, %lo(DES_SPtrans), global1 ! sbox 1 1258 1259 add global1, 256, global2 ! sbox 2 1260 add global1, 512, global3 ! sbox 3 1261 1262 ld [in0], out5 ! right 1263 add global1, 768, global4 ! sbox 4 1264 add global1, 1024, global5 ! sbox 5 1265 1266 ld [in0+4], in5 ! left 1267 add global1, 1280, local6 ! sbox 6 1268 add global1, 1792, out3 ! sbox 8 1269 1270 ! rotate 1271 1272 sll in5, 3, local5 1273 mov in1, in3 ! key address to in3 1274 1275 sll out5, 3, local7 1276 srl in5, 29, in5 1277 1278 srl out5, 29, out5 1279 add in5, local5, in5 1280 1281 add out5, local7, out5 1282 cmp in2, 0 1283 1284 ! we use our own stackframe 1285 1286#ifdef OPENSSL_SYSNAME_ULTRASPARC 1287 be,pn %icc, .encrypt2.dec ! decryption 1288#else 1289 be .encrypt2.dec 1290#endif 1291 STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ] 1292 1293 ld [in3], out0 ! key 7531 first round 1294 mov LOOPS, out4 ! loop counter 1295 1296 ld [in3+4], out1 ! key 8642 first round 1297 sethi %hi(0x0000FC00), local5 1298 1299 call .des_enc 1300 mov in3, in4 1301 1302 ! rotate 1303 sll in5, 29, in0 1304 srl in5, 3, in5 1305 sll out5, 29, in1 1306 add in5, in0, in5 1307 srl out5, 3, out5 1308 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 1309 add out5, in1, out5 1310 st in5, [in0] 1311 st out5, [in0+4] 1312 1313 ret 1314 restore 1315 1316 1317.encrypt2.dec: 1318 1319 add in3, 120, in4 1320 1321 ld [in4], out0 ! key 7531 first round 1322 mov LOOPS, out4 ! loop counter 1323 1324 ld [in4+4], out1 ! key 8642 first round 1325 sethi %hi(0x0000FC00), local5 1326 1327 mov in5, local1 ! left expected in out5 1328 mov out5, in5 1329 1330 call .des_dec 1331 mov local1, out5 1332 1333.encrypt2.finish: 1334 1335 ! rotate 1336 sll in5, 29, in0 1337 srl in5, 3, in5 1338 sll out5, 29, in1 1339 add in5, in0, in5 1340 srl out5, 3, out5 1341 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 1342 add out5, in1, out5 1343 st out5, [in0] 1344 st in5, [in0+4] 1345 1346 ret 1347 restore 1348 1349.DES_encrypt2.end: 1350 .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2 1351 1352 1353! void DES_encrypt3(data, ks1, ks2, ks3) 1354! ************************************** 1355 1356 .align 32 1357 .global DES_encrypt3 1358 .type DES_encrypt3,#function 1359 1360DES_encrypt3: 1361 1362 save %sp, FRAME, %sp 1363 1364 sethi %hi(.PIC.DES_SPtrans-1f),global1 1365 or global1,%lo(.PIC.DES_SPtrans-1f),global1 13661: call .+8 1367 add %o7,global1,global1 1368 sub global1,.PIC.DES_SPtrans-.des_and,out2 1369 1370 ld [in0], in5 ! left 1371 add in2, 120, in4 ! ks2 1372 1373 ld [in0+4], out5 ! right 1374 mov in3, in2 ! save ks3 1375 1376 ! parameter 6 1/2 for include encryption/decryption 1377 ! parameter 7 1 for mov in1 to in3 1378 ! parameter 8 1 for mov in3 to in4 1379 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1380 1381 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0) 1382 1383 call .des_dec 1384 mov in2, in3 ! preload ks3 1385 1386 call .des_enc 1387 nop 1388 1389 fp_macro(in5, out5, 1) 1390 1391 ret 1392 restore 1393 1394.DES_encrypt3.end: 1395 .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3 1396 1397 1398! void DES_decrypt3(data, ks1, ks2, ks3) 1399! ************************************** 1400 1401 .align 32 1402 .global DES_decrypt3 1403 .type DES_decrypt3,#function 1404 1405DES_decrypt3: 1406 1407 save %sp, FRAME, %sp 1408 1409 sethi %hi(.PIC.DES_SPtrans-1f),global1 1410 or global1,%lo(.PIC.DES_SPtrans-1f),global1 14111: call .+8 1412 add %o7,global1,global1 1413 sub global1,.PIC.DES_SPtrans-.des_and,out2 1414 1415 ld [in0], in5 ! left 1416 add in3, 120, in4 ! ks3 1417 1418 ld [in0+4], out5 ! right 1419 mov in2, in3 ! ks2 1420 1421 ! parameter 6 1/2 for include encryption/decryption 1422 ! parameter 7 1 for mov in1 to in3 1423 ! parameter 8 1 for mov in3 to in4 1424 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1425 1426 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0) 1427 1428 call .des_enc 1429 add in1, 120, in4 ! preload ks1 1430 1431 call .des_dec 1432 nop 1433 1434 fp_macro(out5, in5, 1) 1435 1436 ret 1437 restore 1438 1439.DES_decrypt3.end: 1440 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3 1441 1442! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc) 1443! ***************************************************************** 1444 1445 1446 .align 32 1447 .global DES_ncbc_encrypt 1448 .type DES_ncbc_encrypt,#function 1449 1450DES_ncbc_encrypt: 1451 1452 save %sp, FRAME, %sp 1453 1454 define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] }) 1455 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] }) 1456 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] }) 1457 1458 sethi %hi(.PIC.DES_SPtrans-1f),global1 1459 or global1,%lo(.PIC.DES_SPtrans-1f),global1 14601: call .+8 1461 add %o7,global1,global1 1462 sub global1,.PIC.DES_SPtrans-.des_and,out2 1463 1464 cmp in5, 0 ! enc 1465 1466#ifdef OPENSSL_SYSNAME_ULTRASPARC 1467 be,pn %icc, .ncbc.dec 1468#else 1469 be .ncbc.dec 1470#endif 1471 STPTR in4, IVEC 1472 1473 ! addr left right temp label 1474 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv 1475 1476 addcc in2, -8, in2 ! bytes missing when first block done 1477 1478#ifdef OPENSSL_SYSNAME_ULTRASPARC 1479 bl,pn %icc, .ncbc.enc.seven.or.less 1480#else 1481 bl .ncbc.enc.seven.or.less 1482#endif 1483 mov in3, in4 ! schedule 1484 1485.ncbc.enc.next.block: 1486 1487 load_little_endian(in0, out4, global4, local3, .LLE2) ! block 1488 1489.ncbc.enc.next.block_1: 1490 1491 xor in5, out4, in5 ! iv xor 1492 xor out5, global4, out5 ! iv xor 1493 1494 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1495 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2) 1496 1497.ncbc.enc.next.block_2: 1498 1499!// call .des_enc ! compares in2 to 8 1500! rounds inlined for alignment purposes 1501 1502 add global1, 768, global4 ! address sbox 4 since register used below 1503 1504 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3 1505 1506#ifdef OPENSSL_SYSNAME_ULTRASPARC 1507 bl,pn %icc, .ncbc.enc.next.block_fp 1508#else 1509 bl .ncbc.enc.next.block_fp 1510#endif 1511 add in0, 8, in0 ! input address 1512 1513 ! If 8 or more bytes are to be encrypted after this block, 1514 ! we combine final permutation for this block with initial 1515 ! permutation for next block. Load next block: 1516 1517 load_little_endian(in0, global3, global4, local5, .LLE12) 1518 1519 ! parameter 1 original left 1520 ! parameter 2 original right 1521 ! parameter 3 left ip 1522 ! parameter 4 right ip 1523 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 1524 ! 2: mov in4 to in3 1525 ! 1526 ! also adds -8 to length in2 and loads loop counter to out4 1527 1528 fp_ip_macro(out0, out1, global3, global4, 2) 1529 1530 store_little_endian(in1, out0, out1, local3, .SLE10) ! block 1531 1532 ld [in3], out0 ! key 7531 first round next block 1533 mov in5, local1 1534 xor global3, out5, in5 ! iv xor next block 1535 1536 ld [in3+4], out1 ! key 8642 1537 add global1, 512, global3 ! address sbox 3 since register used 1538 xor global4, local1, out5 ! iv xor next block 1539 1540 ba .ncbc.enc.next.block_2 1541 add in1, 8, in1 ! output adress 1542 1543.ncbc.enc.next.block_fp: 1544 1545 fp_macro(in5, out5) 1546 1547 store_little_endian(in1, in5, out5, local3, .SLE1) ! block 1548 1549 addcc in2, -8, in2 ! bytes missing when next block done 1550 1551#ifdef OPENSSL_SYSNAME_ULTRASPARC 1552 bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0 1553#else 1554 bpos .ncbc.enc.next.block 1555#endif 1556 add in1, 8, in1 1557 1558.ncbc.enc.seven.or.less: 1559 1560 cmp in2, -8 1561 1562#ifdef OPENSSL_SYSNAME_ULTRASPARC 1563 ble,pt %icc, .ncbc.enc.finish 1564#else 1565 ble .ncbc.enc.finish 1566#endif 1567 nop 1568 1569 add in2, 8, local1 ! bytes to load 1570 1571 ! addr, length, dest left, dest right, temp, temp2, label, ret label 1572 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1) 1573 1574 ! Loads 1 to 7 bytes little endian to global4, out4 1575 1576 1577.ncbc.enc.finish: 1578 1579 LDPTR IVEC, local4 1580 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec 1581 1582 ret 1583 restore 1584 1585 1586.ncbc.dec: 1587 1588 STPTR in0, INPUT 1589 cmp in2, 0 ! length 1590 add in3, 120, in3 1591 1592 LDPTR IVEC, local7 ! ivec 1593#ifdef OPENSSL_SYSNAME_ULTRASPARC 1594 ble,pn %icc, .ncbc.dec.finish 1595#else 1596 ble .ncbc.dec.finish 1597#endif 1598 mov in3, in4 ! schedule 1599 1600 STPTR in1, OUTPUT 1601 mov in0, local5 ! input 1602 1603 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec 1604 1605.ncbc.dec.next.block: 1606 1607 load_little_endian(local5, in5, out5, local3, .LLE4) ! block 1608 1609 ! parameter 6 1/2 for include encryption/decryption 1610 ! parameter 7 1 for mov in1 to in3 1611 ! parameter 8 1 for mov in3 to in4 1612 1613 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4 1614 1615 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7 1616 1617 ! in2 is bytes left to be stored 1618 ! in2 is compared to 8 in the rounds 1619 1620 xor out5, in0, out4 ! iv xor 1621#ifdef OPENSSL_SYSNAME_ULTRASPARC 1622 bl,pn %icc, .ncbc.dec.seven.or.less 1623#else 1624 bl .ncbc.dec.seven.or.less 1625#endif 1626 xor in5, in1, global4 ! iv xor 1627 1628 ! Load ivec next block now, since input and output address might be the same. 1629 1630 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv 1631 1632 store_little_endian(local7, out4, global4, local3, .SLE3) 1633 1634 STPTR local5, INPUT 1635 add local7, 8, local7 1636 addcc in2, -8, in2 1637 1638#ifdef OPENSSL_SYSNAME_ULTRASPARC 1639 bg,pt %icc, .ncbc.dec.next.block 1640#else 1641 bg .ncbc.dec.next.block 1642#endif 1643 STPTR local7, OUTPUT 1644 1645 1646.ncbc.dec.store.iv: 1647 1648 LDPTR IVEC, local4 ! ivec 1649 store_little_endian(local4, in0, in1, local5, .SLE4) 1650 1651.ncbc.dec.finish: 1652 1653 ret 1654 restore 1655 1656.ncbc.dec.seven.or.less: 1657 1658 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec 1659 1660 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv) 1661 1662 1663.DES_ncbc_encrypt.end: 1664 .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt 1665 1666 1667! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc) 1668! ************************************************************************** 1669 1670 1671 .align 32 1672 .global DES_ede3_cbc_encrypt 1673 .type DES_ede3_cbc_encrypt,#function 1674 1675DES_ede3_cbc_encrypt: 1676 1677 save %sp, FRAME, %sp 1678 1679 define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] }) 1680 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] }) 1681 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] }) 1682 1683 sethi %hi(.PIC.DES_SPtrans-1f),global1 1684 or global1,%lo(.PIC.DES_SPtrans-1f),global1 16851: call .+8 1686 add %o7,global1,global1 1687 sub global1,.PIC.DES_SPtrans-.des_and,out2 1688 1689 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc 1690 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1691 cmp local3, 0 ! enc 1692 1693#ifdef OPENSSL_SYSNAME_ULTRASPARC 1694 be,pn %icc, .ede3.dec 1695#else 1696 be .ede3.dec 1697#endif 1698 STPTR in4, KS2 1699 1700 STPTR in5, KS3 1701 1702 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec 1703 1704 addcc in2, -8, in2 ! bytes missing after next block 1705 1706#ifdef OPENSSL_SYSNAME_ULTRASPARC 1707 bl,pn %icc, .ede3.enc.seven.or.less 1708#else 1709 bl .ede3.enc.seven.or.less 1710#endif 1711 STPTR in3, KS1 1712 1713.ede3.enc.next.block: 1714 1715 load_little_endian(in0, out4, global4, local3, .LLE7) 1716 1717.ede3.enc.next.block_1: 1718 1719 LDPTR KS2, in4 1720 xor in5, out4, in5 ! iv xor 1721 xor out5, global4, out5 ! iv xor 1722 1723 LDPTR KS1, in3 1724 add in4, 120, in4 ! for decryption we use last subkey first 1725 nop 1726 1727 ip_macro(in5, out5, in5, out5, in3) 1728 1729.ede3.enc.next.block_2: 1730 1731 call .des_enc ! ks1 in3 1732 nop 1733 1734 call .des_dec ! ks2 in4 1735 LDPTR KS3, in3 1736 1737 call .des_enc ! ks3 in3 compares in2 to 8 1738 nop 1739 1740#ifdef OPENSSL_SYSNAME_ULTRASPARC 1741 bl,pn %icc, .ede3.enc.next.block_fp 1742#else 1743 bl .ede3.enc.next.block_fp 1744#endif 1745 add in0, 8, in0 1746 1747 ! If 8 or more bytes are to be encrypted after this block, 1748 ! we combine final permutation for this block with initial 1749 ! permutation for next block. Load next block: 1750 1751 load_little_endian(in0, global3, global4, local5, .LLE11) 1752 1753 ! parameter 1 original left 1754 ! parameter 2 original right 1755 ! parameter 3 left ip 1756 ! parameter 4 right ip 1757 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 1758 ! 2: mov in4 to in3 1759 ! 1760 ! also adds -8 to length in2 and loads loop counter to out4 1761 1762 fp_ip_macro(out0, out1, global3, global4, 1) 1763 1764 store_little_endian(in1, out0, out1, local3, .SLE9) ! block 1765 1766 mov in5, local1 1767 xor global3, out5, in5 ! iv xor next block 1768 1769 ld [in3], out0 ! key 7531 1770 add global1, 512, global3 ! address sbox 3 1771 xor global4, local1, out5 ! iv xor next block 1772 1773 ld [in3+4], out1 ! key 8642 1774 add global1, 768, global4 ! address sbox 4 1775 ba .ede3.enc.next.block_2 1776 add in1, 8, in1 1777 1778.ede3.enc.next.block_fp: 1779 1780 fp_macro(in5, out5) 1781 1782 store_little_endian(in1, in5, out5, local3, .SLE5) ! block 1783 1784 addcc in2, -8, in2 ! bytes missing when next block done 1785 1786#ifdef OPENSSL_SYSNAME_ULTRASPARC 1787 bpos,pt %icc, .ede3.enc.next.block 1788#else 1789 bpos .ede3.enc.next.block 1790#endif 1791 add in1, 8, in1 1792 1793.ede3.enc.seven.or.less: 1794 1795 cmp in2, -8 1796 1797#ifdef OPENSSL_SYSNAME_ULTRASPARC 1798 ble,pt %icc, .ede3.enc.finish 1799#else 1800 ble .ede3.enc.finish 1801#endif 1802 nop 1803 1804 add in2, 8, local1 ! bytes to load 1805 1806 ! addr, length, dest left, dest right, temp, temp2, label, ret label 1807 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1) 1808 1809.ede3.enc.finish: 1810 1811 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1812 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec 1813 1814 ret 1815 restore 1816 1817.ede3.dec: 1818 1819 STPTR in0, INPUT 1820 add in5, 120, in5 1821 1822 STPTR in1, OUTPUT 1823 mov in0, local5 1824 add in3, 120, in3 1825 1826 STPTR in3, KS1 1827 cmp in2, 0 1828 1829#ifdef OPENSSL_SYSNAME_ULTRASPARC 1830 ble %icc, .ede3.dec.finish 1831#else 1832 ble .ede3.dec.finish 1833#endif 1834 STPTR in5, KS3 1835 1836 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv 1837 load_little_endian(local7, in0, in1, local3, .LLE8) 1838 1839.ede3.dec.next.block: 1840 1841 load_little_endian(local5, in5, out5, local3, .LLE9) 1842 1843 ! parameter 6 1/2 for include encryption/decryption 1844 ! parameter 7 1 for mov in1 to in3 1845 ! parameter 8 1 for mov in3 to in4 1846 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1847 1848 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4 1849 1850 call .des_enc ! ks2 in3 1851 LDPTR KS1, in4 1852 1853 call .des_dec ! ks1 in4 1854 nop 1855 1856 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7 1857 1858 ! in2 is bytes left to be stored 1859 ! in2 is compared to 8 in the rounds 1860 1861 xor out5, in0, out4 1862#ifdef OPENSSL_SYSNAME_ULTRASPARC 1863 bl,pn %icc, .ede3.dec.seven.or.less 1864#else 1865 bl .ede3.dec.seven.or.less 1866#endif 1867 xor in5, in1, global4 1868 1869 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block 1870 1871 store_little_endian(local7, out4, global4, local3, .SLE7) ! block 1872 1873 STPTR local5, INPUT 1874 addcc in2, -8, in2 1875 add local7, 8, local7 1876 1877#ifdef OPENSSL_SYSNAME_ULTRASPARC 1878 bg,pt %icc, .ede3.dec.next.block 1879#else 1880 bg .ede3.dec.next.block 1881#endif 1882 STPTR local7, OUTPUT 1883 1884.ede3.dec.store.iv: 1885 1886 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1887 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec 1888 1889.ede3.dec.finish: 1890 1891 ret 1892 restore 1893 1894.ede3.dec.seven.or.less: 1895 1896 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv 1897 1898 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv) 1899 1900 1901.DES_ede3_cbc_encrypt.end: 1902 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt 1903 1904 .align 256 1905 .type .des_and,#object 1906 .size .des_and,284 1907 1908.des_and: 1909 1910! This table is used for AND 0xFC when it is known that register 1911! bits 8-31 are zero. Makes it possible to do three arithmetic 1912! operations in one cycle. 1913 1914 .byte 0, 0, 0, 0, 4, 4, 4, 4 1915 .byte 8, 8, 8, 8, 12, 12, 12, 12 1916 .byte 16, 16, 16, 16, 20, 20, 20, 20 1917 .byte 24, 24, 24, 24, 28, 28, 28, 28 1918 .byte 32, 32, 32, 32, 36, 36, 36, 36 1919 .byte 40, 40, 40, 40, 44, 44, 44, 44 1920 .byte 48, 48, 48, 48, 52, 52, 52, 52 1921 .byte 56, 56, 56, 56, 60, 60, 60, 60 1922 .byte 64, 64, 64, 64, 68, 68, 68, 68 1923 .byte 72, 72, 72, 72, 76, 76, 76, 76 1924 .byte 80, 80, 80, 80, 84, 84, 84, 84 1925 .byte 88, 88, 88, 88, 92, 92, 92, 92 1926 .byte 96, 96, 96, 96, 100, 100, 100, 100 1927 .byte 104, 104, 104, 104, 108, 108, 108, 108 1928 .byte 112, 112, 112, 112, 116, 116, 116, 116 1929 .byte 120, 120, 120, 120, 124, 124, 124, 124 1930 .byte 128, 128, 128, 128, 132, 132, 132, 132 1931 .byte 136, 136, 136, 136, 140, 140, 140, 140 1932 .byte 144, 144, 144, 144, 148, 148, 148, 148 1933 .byte 152, 152, 152, 152, 156, 156, 156, 156 1934 .byte 160, 160, 160, 160, 164, 164, 164, 164 1935 .byte 168, 168, 168, 168, 172, 172, 172, 172 1936 .byte 176, 176, 176, 176, 180, 180, 180, 180 1937 .byte 184, 184, 184, 184, 188, 188, 188, 188 1938 .byte 192, 192, 192, 192, 196, 196, 196, 196 1939 .byte 200, 200, 200, 200, 204, 204, 204, 204 1940 .byte 208, 208, 208, 208, 212, 212, 212, 212 1941 .byte 216, 216, 216, 216, 220, 220, 220, 220 1942 .byte 224, 224, 224, 224, 228, 228, 228, 228 1943 .byte 232, 232, 232, 232, 236, 236, 236, 236 1944 .byte 240, 240, 240, 240, 244, 244, 244, 244 1945 .byte 248, 248, 248, 248, 252, 252, 252, 252 1946 1947 ! 5 numbers for initil/final permutation 1948 1949 .word 0x0f0f0f0f ! offset 256 1950 .word 0x0000ffff ! 260 1951 .word 0x33333333 ! 264 1952 .word 0x00ff00ff ! 268 1953 .word 0x55555555 ! 272 1954 1955 .word 0 ! 276 1956 .word LOOPS ! 280 1957 .word 0x0000FC00 ! 284 1958 1959 .global DES_SPtrans 1960 .type DES_SPtrans,#object 1961 .size DES_SPtrans,2048 1962.align 64 1963DES_SPtrans: 1964.PIC.DES_SPtrans: 1965 ! nibble 0 1966 .word 0x02080800, 0x00080000, 0x02000002, 0x02080802 1967 .word 0x02000000, 0x00080802, 0x00080002, 0x02000002 1968 .word 0x00080802, 0x02080800, 0x02080000, 0x00000802 1969 .word 0x02000802, 0x02000000, 0x00000000, 0x00080002 1970 .word 0x00080000, 0x00000002, 0x02000800, 0x00080800 1971 .word 0x02080802, 0x02080000, 0x00000802, 0x02000800 1972 .word 0x00000002, 0x00000800, 0x00080800, 0x02080002 1973 .word 0x00000800, 0x02000802, 0x02080002, 0x00000000 1974 .word 0x00000000, 0x02080802, 0x02000800, 0x00080002 1975 .word 0x02080800, 0x00080000, 0x00000802, 0x02000800 1976 .word 0x02080002, 0x00000800, 0x00080800, 0x02000002 1977 .word 0x00080802, 0x00000002, 0x02000002, 0x02080000 1978 .word 0x02080802, 0x00080800, 0x02080000, 0x02000802 1979 .word 0x02000000, 0x00000802, 0x00080002, 0x00000000 1980 .word 0x00080000, 0x02000000, 0x02000802, 0x02080800 1981 .word 0x00000002, 0x02080002, 0x00000800, 0x00080802 1982 ! nibble 1 1983 .word 0x40108010, 0x00000000, 0x00108000, 0x40100000 1984 .word 0x40000010, 0x00008010, 0x40008000, 0x00108000 1985 .word 0x00008000, 0x40100010, 0x00000010, 0x40008000 1986 .word 0x00100010, 0x40108000, 0x40100000, 0x00000010 1987 .word 0x00100000, 0x40008010, 0x40100010, 0x00008000 1988 .word 0x00108010, 0x40000000, 0x00000000, 0x00100010 1989 .word 0x40008010, 0x00108010, 0x40108000, 0x40000010 1990 .word 0x40000000, 0x00100000, 0x00008010, 0x40108010 1991 .word 0x00100010, 0x40108000, 0x40008000, 0x00108010 1992 .word 0x40108010, 0x00100010, 0x40000010, 0x00000000 1993 .word 0x40000000, 0x00008010, 0x00100000, 0x40100010 1994 .word 0x00008000, 0x40000000, 0x00108010, 0x40008010 1995 .word 0x40108000, 0x00008000, 0x00000000, 0x40000010 1996 .word 0x00000010, 0x40108010, 0x00108000, 0x40100000 1997 .word 0x40100010, 0x00100000, 0x00008010, 0x40008000 1998 .word 0x40008010, 0x00000010, 0x40100000, 0x00108000 1999 ! nibble 2 2000 .word 0x04000001, 0x04040100, 0x00000100, 0x04000101 2001 .word 0x00040001, 0x04000000, 0x04000101, 0x00040100 2002 .word 0x04000100, 0x00040000, 0x04040000, 0x00000001 2003 .word 0x04040101, 0x00000101, 0x00000001, 0x04040001 2004 .word 0x00000000, 0x00040001, 0x04040100, 0x00000100 2005 .word 0x00000101, 0x04040101, 0x00040000, 0x04000001 2006 .word 0x04040001, 0x04000100, 0x00040101, 0x04040000 2007 .word 0x00040100, 0x00000000, 0x04000000, 0x00040101 2008 .word 0x04040100, 0x00000100, 0x00000001, 0x00040000 2009 .word 0x00000101, 0x00040001, 0x04040000, 0x04000101 2010 .word 0x00000000, 0x04040100, 0x00040100, 0x04040001 2011 .word 0x00040001, 0x04000000, 0x04040101, 0x00000001 2012 .word 0x00040101, 0x04000001, 0x04000000, 0x04040101 2013 .word 0x00040000, 0x04000100, 0x04000101, 0x00040100 2014 .word 0x04000100, 0x00000000, 0x04040001, 0x00000101 2015 .word 0x04000001, 0x00040101, 0x00000100, 0x04040000 2016 ! nibble 3 2017 .word 0x00401008, 0x10001000, 0x00000008, 0x10401008 2018 .word 0x00000000, 0x10400000, 0x10001008, 0x00400008 2019 .word 0x10401000, 0x10000008, 0x10000000, 0x00001008 2020 .word 0x10000008, 0x00401008, 0x00400000, 0x10000000 2021 .word 0x10400008, 0x00401000, 0x00001000, 0x00000008 2022 .word 0x00401000, 0x10001008, 0x10400000, 0x00001000 2023 .word 0x00001008, 0x00000000, 0x00400008, 0x10401000 2024 .word 0x10001000, 0x10400008, 0x10401008, 0x00400000 2025 .word 0x10400008, 0x00001008, 0x00400000, 0x10000008 2026 .word 0x00401000, 0x10001000, 0x00000008, 0x10400000 2027 .word 0x10001008, 0x00000000, 0x00001000, 0x00400008 2028 .word 0x00000000, 0x10400008, 0x10401000, 0x00001000 2029 .word 0x10000000, 0x10401008, 0x00401008, 0x00400000 2030 .word 0x10401008, 0x00000008, 0x10001000, 0x00401008 2031 .word 0x00400008, 0x00401000, 0x10400000, 0x10001008 2032 .word 0x00001008, 0x10000000, 0x10000008, 0x10401000 2033 ! nibble 4 2034 .word 0x08000000, 0x00010000, 0x00000400, 0x08010420 2035 .word 0x08010020, 0x08000400, 0x00010420, 0x08010000 2036 .word 0x00010000, 0x00000020, 0x08000020, 0x00010400 2037 .word 0x08000420, 0x08010020, 0x08010400, 0x00000000 2038 .word 0x00010400, 0x08000000, 0x00010020, 0x00000420 2039 .word 0x08000400, 0x00010420, 0x00000000, 0x08000020 2040 .word 0x00000020, 0x08000420, 0x08010420, 0x00010020 2041 .word 0x08010000, 0x00000400, 0x00000420, 0x08010400 2042 .word 0x08010400, 0x08000420, 0x00010020, 0x08010000 2043 .word 0x00010000, 0x00000020, 0x08000020, 0x08000400 2044 .word 0x08000000, 0x00010400, 0x08010420, 0x00000000 2045 .word 0x00010420, 0x08000000, 0x00000400, 0x00010020 2046 .word 0x08000420, 0x00000400, 0x00000000, 0x08010420 2047 .word 0x08010020, 0x08010400, 0x00000420, 0x00010000 2048 .word 0x00010400, 0x08010020, 0x08000400, 0x00000420 2049 .word 0x00000020, 0x00010420, 0x08010000, 0x08000020 2050 ! nibble 5 2051 .word 0x80000040, 0x00200040, 0x00000000, 0x80202000 2052 .word 0x00200040, 0x00002000, 0x80002040, 0x00200000 2053 .word 0x00002040, 0x80202040, 0x00202000, 0x80000000 2054 .word 0x80002000, 0x80000040, 0x80200000, 0x00202040 2055 .word 0x00200000, 0x80002040, 0x80200040, 0x00000000 2056 .word 0x00002000, 0x00000040, 0x80202000, 0x80200040 2057 .word 0x80202040, 0x80200000, 0x80000000, 0x00002040 2058 .word 0x00000040, 0x00202000, 0x00202040, 0x80002000 2059 .word 0x00002040, 0x80000000, 0x80002000, 0x00202040 2060 .word 0x80202000, 0x00200040, 0x00000000, 0x80002000 2061 .word 0x80000000, 0x00002000, 0x80200040, 0x00200000 2062 .word 0x00200040, 0x80202040, 0x00202000, 0x00000040 2063 .word 0x80202040, 0x00202000, 0x00200000, 0x80002040 2064 .word 0x80000040, 0x80200000, 0x00202040, 0x00000000 2065 .word 0x00002000, 0x80000040, 0x80002040, 0x80202000 2066 .word 0x80200000, 0x00002040, 0x00000040, 0x80200040 2067 ! nibble 6 2068 .word 0x00004000, 0x00000200, 0x01000200, 0x01000004 2069 .word 0x01004204, 0x00004004, 0x00004200, 0x00000000 2070 .word 0x01000000, 0x01000204, 0x00000204, 0x01004000 2071 .word 0x00000004, 0x01004200, 0x01004000, 0x00000204 2072 .word 0x01000204, 0x00004000, 0x00004004, 0x01004204 2073 .word 0x00000000, 0x01000200, 0x01000004, 0x00004200 2074 .word 0x01004004, 0x00004204, 0x01004200, 0x00000004 2075 .word 0x00004204, 0x01004004, 0x00000200, 0x01000000 2076 .word 0x00004204, 0x01004000, 0x01004004, 0x00000204 2077 .word 0x00004000, 0x00000200, 0x01000000, 0x01004004 2078 .word 0x01000204, 0x00004204, 0x00004200, 0x00000000 2079 .word 0x00000200, 0x01000004, 0x00000004, 0x01000200 2080 .word 0x00000000, 0x01000204, 0x01000200, 0x00004200 2081 .word 0x00000204, 0x00004000, 0x01004204, 0x01000000 2082 .word 0x01004200, 0x00000004, 0x00004004, 0x01004204 2083 .word 0x01000004, 0x01004200, 0x01004000, 0x00004004 2084 ! nibble 7 2085 .word 0x20800080, 0x20820000, 0x00020080, 0x00000000 2086 .word 0x20020000, 0x00800080, 0x20800000, 0x20820080 2087 .word 0x00000080, 0x20000000, 0x00820000, 0x00020080 2088 .word 0x00820080, 0x20020080, 0x20000080, 0x20800000 2089 .word 0x00020000, 0x00820080, 0x00800080, 0x20020000 2090 .word 0x20820080, 0x20000080, 0x00000000, 0x00820000 2091 .word 0x20000000, 0x00800000, 0x20020080, 0x20800080 2092 .word 0x00800000, 0x00020000, 0x20820000, 0x00000080 2093 .word 0x00800000, 0x00020000, 0x20000080, 0x20820080 2094 .word 0x00020080, 0x20000000, 0x00000000, 0x00820000 2095 .word 0x20800080, 0x20020080, 0x20020000, 0x00800080 2096 .word 0x20820000, 0x00000080, 0x00800080, 0x20020000 2097 .word 0x20820080, 0x00800000, 0x20800000, 0x20000080 2098 .word 0x00820000, 0x00020080, 0x20020080, 0x20800000 2099 .word 0x00000080, 0x20820000, 0x00820080, 0x00000000 2100 .word 0x20000000, 0x20800080, 0x00020000, 0x00820080 2101 2102