1#!/usr/bin/env perl 2 3# ==================================================================== 4# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov 5# <appro@openssl.org>. The module is licensed under 2-clause BSD 6# license. October 2012. All rights reserved. 7# ==================================================================== 8 9###################################################################### 10# Camellia for SPARC T4. 11# 12# As with AES below results [for aligned data] are virtually identical 13# to critical path lenths for 3-cycle instruction latency: 14# 15# 128-bit key 192/256- 16# CBC encrypt 4.14/4.21(*) 5.46/5.52 17# (*) numbers after slash are for 18# misaligned data. 19# 20# As with Intel AES-NI, question is if it's possible to improve 21# performance of parallelizeable modes by interleaving round 22# instructions. In Camellia every instruction is dependent on 23# previous, which means that there is place for 2 additional ones 24# in between two dependent. Can we expect 3x performance improvement? 25# At least one can argue that it should be possible to break 2x 26# barrier... For some reason not even 2x appears to be possible: 27# 28# 128-bit key 192/256- 29# CBC decrypt 2.21/2.74 2.99/3.40 30# CTR 2.15/2.68(*) 2.93/3.34 31# (*) numbers after slash are for 32# misaligned data. 33# 34# This is for 2x interleave. But compared to 1x interleave CBC decrypt 35# improved by ... 0% for 128-bit key, and 11% for 192/256-bit one. 36# So that out-of-order execution logic can take non-interleaved code 37# to 1.87x, but can't take 2x interleaved one any further. There 38# surely is some explanation... As result 3x interleave was not even 39# attempted. Instead an effort was made to share specific modes 40# implementations with AES module (therefore sparct4_modes.pl). 41# 42# To anchor to something else, software C implementation processes 43# one byte in 38 cycles with 128-bit key on same processor. 44 45$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 46push(@INC,"${dir}","${dir}../../perlasm"); 47require "sparcv9_modes.pl"; 48 49&asm_init(@ARGV); 50 51$::evp=1; # if $evp is set to 0, script generates module with 52# Camellia_[en|de]crypt, Camellia_set_key and Camellia_cbc_encrypt 53# entry points. These are fully compatible with openssl/camellia.h. 54 55###################################################################### 56# single-round subroutines 57# 58{ 59my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5)); 60 61$code=<<___; 62.text 63 64.globl cmll_t4_encrypt 65.align 32 66cmll_t4_encrypt: 67 andcc $inp, 7, %g1 ! is input aligned? 68 andn $inp, 7, $inp 69 70 ldx [$key + 0], %g4 71 ldx [$key + 8], %g5 72 73 ldx [$inp + 0], %o4 74 bz,pt %icc, 1f 75 ldx [$inp + 8], %o5 76 ldx [$inp + 16], $inp 77 sll %g1, 3, %g1 78 sub %g0, %g1, %o3 79 sllx %o4, %g1, %o4 80 sllx %o5, %g1, %g1 81 srlx %o5, %o3, %o5 82 srlx $inp, %o3, %o3 83 or %o5, %o4, %o4 84 or %o3, %g1, %o5 851: 86 ld [$key + 272], $rounds ! grandRounds, 3 or 4 87 ldd [$key + 16], %f12 88 ldd [$key + 24], %f14 89 xor %g4, %o4, %o4 90 xor %g5, %o5, %o5 91 ldd [$key + 32], %f16 92 ldd [$key + 40], %f18 93 movxtod %o4, %f0 94 movxtod %o5, %f2 95 ldd [$key + 48], %f20 96 ldd [$key + 56], %f22 97 sub $rounds, 1, $rounds 98 ldd [$key + 64], %f24 99 ldd [$key + 72], %f26 100 add $key, 80, $key 101 102.Lenc: 103 camellia_f %f12, %f2, %f0, %f2 104 ldd [$key + 0], %f12 105 sub $rounds,1,$rounds 106 camellia_f %f14, %f0, %f2, %f0 107 ldd [$key + 8], %f14 108 camellia_f %f16, %f2, %f0, %f2 109 ldd [$key + 16], %f16 110 camellia_f %f18, %f0, %f2, %f0 111 ldd [$key + 24], %f18 112 camellia_f %f20, %f2, %f0, %f2 113 ldd [$key + 32], %f20 114 camellia_f %f22, %f0, %f2, %f0 115 ldd [$key + 40], %f22 116 camellia_fl %f24, %f0, %f0 117 ldd [$key + 48], %f24 118 camellia_fli %f26, %f2, %f2 119 ldd [$key + 56], %f26 120 brnz,pt $rounds, .Lenc 121 add $key, 64, $key 122 123 andcc $out, 7, $tmp ! is output aligned? 124 camellia_f %f12, %f2, %f0, %f2 125 camellia_f %f14, %f0, %f2, %f0 126 camellia_f %f16, %f2, %f0, %f2 127 camellia_f %f18, %f0, %f2, %f0 128 camellia_f %f20, %f2, %f0, %f4 129 camellia_f %f22, %f0, %f4, %f2 130 fxor %f24, %f4, %f0 131 fxor %f26, %f2, %f2 132 133 bnz,pn %icc, 2f 134 nop 135 136 std %f0, [$out + 0] 137 retl 138 std %f2, [$out + 8] 139 1402: alignaddrl $out, %g0, $out 141 mov 0xff, $mask 142 srl $mask, $tmp, $mask 143 144 faligndata %f0, %f0, %f4 145 faligndata %f0, %f2, %f6 146 faligndata %f2, %f2, %f8 147 148 stda %f4, [$out + $mask]0xc0 ! partial store 149 std %f6, [$out + 8] 150 add $out, 16, $out 151 orn %g0, $mask, $mask 152 retl 153 stda %f8, [$out + $mask]0xc0 ! partial store 154.type cmll_t4_encrypt,#function 155.size cmll_t4_encrypt,.-cmll_t4_encrypt 156 157.globl cmll_t4_decrypt 158.align 32 159cmll_t4_decrypt: 160 ld [$key + 272], $rounds ! grandRounds, 3 or 4 161 andcc $inp, 7, %g1 ! is input aligned? 162 andn $inp, 7, $inp 163 164 sll $rounds, 6, $rounds 165 add $rounds, $key, $key 166 167 ldx [$inp + 0], %o4 168 bz,pt %icc, 1f 169 ldx [$inp + 8], %o5 170 ldx [$inp + 16], $inp 171 sll %g1, 3, %g1 172 sub %g0, %g1, %g4 173 sllx %o4, %g1, %o4 174 sllx %o5, %g1, %g1 175 srlx %o5, %g4, %o5 176 srlx $inp, %g4, %g4 177 or %o5, %o4, %o4 178 or %g4, %g1, %o5 1791: 180 ldx [$key + 0], %g4 181 ldx [$key + 8], %g5 182 ldd [$key - 8], %f12 183 ldd [$key - 16], %f14 184 xor %g4, %o4, %o4 185 xor %g5, %o5, %o5 186 ldd [$key - 24], %f16 187 ldd [$key - 32], %f18 188 movxtod %o4, %f0 189 movxtod %o5, %f2 190 ldd [$key - 40], %f20 191 ldd [$key - 48], %f22 192 sub $rounds, 64, $rounds 193 ldd [$key - 56], %f24 194 ldd [$key - 64], %f26 195 sub $key, 64, $key 196 197.Ldec: 198 camellia_f %f12, %f2, %f0, %f2 199 ldd [$key - 8], %f12 200 sub $rounds, 64, $rounds 201 camellia_f %f14, %f0, %f2, %f0 202 ldd [$key - 16], %f14 203 camellia_f %f16, %f2, %f0, %f2 204 ldd [$key - 24], %f16 205 camellia_f %f18, %f0, %f2, %f0 206 ldd [$key - 32], %f18 207 camellia_f %f20, %f2, %f0, %f2 208 ldd [$key - 40], %f20 209 camellia_f %f22, %f0, %f2, %f0 210 ldd [$key - 48], %f22 211 camellia_fl %f24, %f0, %f0 212 ldd [$key - 56], %f24 213 camellia_fli %f26, %f2, %f2 214 ldd [$key - 64], %f26 215 brnz,pt $rounds, .Ldec 216 sub $key, 64, $key 217 218 andcc $out, 7, $tmp ! is output aligned? 219 camellia_f %f12, %f2, %f0, %f2 220 camellia_f %f14, %f0, %f2, %f0 221 camellia_f %f16, %f2, %f0, %f2 222 camellia_f %f18, %f0, %f2, %f0 223 camellia_f %f20, %f2, %f0, %f4 224 camellia_f %f22, %f0, %f4, %f2 225 fxor %f26, %f4, %f0 226 fxor %f24, %f2, %f2 227 228 bnz,pn %icc, 2f 229 nop 230 231 std %f0, [$out + 0] 232 retl 233 std %f2, [$out + 8] 234 2352: alignaddrl $out, %g0, $out 236 mov 0xff, $mask 237 srl $mask, $tmp, $mask 238 239 faligndata %f0, %f0, %f4 240 faligndata %f0, %f2, %f6 241 faligndata %f2, %f2, %f8 242 243 stda %f4, [$out + $mask]0xc0 ! partial store 244 std %f6, [$out + 8] 245 add $out, 16, $out 246 orn %g0, $mask, $mask 247 retl 248 stda %f8, [$out + $mask]0xc0 ! partial store 249.type cmll_t4_decrypt,#function 250.size cmll_t4_decrypt,.-cmll_t4_decrypt 251___ 252} 253 254###################################################################### 255# key setup subroutines 256# 257{ 258sub ROTL128 { 259 my $rot = shift; 260 261 "srlx %o4, 64-$rot, %g4\n\t". 262 "sllx %o4, $rot, %o4\n\t". 263 "srlx %o5, 64-$rot, %g5\n\t". 264 "sllx %o5, $rot, %o5\n\t". 265 "or %o4, %g5, %o4\n\t". 266 "or %o5, %g4, %o5"; 267} 268 269my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5)); 270$code.=<<___; 271.globl cmll_t4_set_key 272.align 32 273cmll_t4_set_key: 274 and $inp, 7, $tmp 275 alignaddr $inp, %g0, $inp 276 cmp $bits, 192 277 ldd [$inp + 0], %f0 278 bl,pt %icc,.L128 279 ldd [$inp + 8], %f2 280 281 be,pt %icc,.L192 282 ldd [$inp + 16], %f4 283 284 brz,pt $tmp, .L256aligned 285 ldd [$inp + 24], %f6 286 287 ldd [$inp + 32], %f8 288 faligndata %f0, %f2, %f0 289 faligndata %f2, %f4, %f2 290 faligndata %f4, %f6, %f4 291 b .L256aligned 292 faligndata %f6, %f8, %f6 293 294.align 16 295.L192: 296 brz,a,pt $tmp, .L256aligned 297 fnot2 %f4, %f6 298 299 ldd [$inp + 24], %f6 300 nop 301 faligndata %f0, %f2, %f0 302 faligndata %f2, %f4, %f2 303 faligndata %f4, %f6, %f4 304 fnot2 %f4, %f6 305 306.L256aligned: 307 std %f0, [$out + 0] ! k[0, 1] 308 fsrc2 %f0, %f28 309 std %f2, [$out + 8] ! k[2, 3] 310 fsrc2 %f2, %f30 311 fxor %f4, %f0, %f0 312 b .L128key 313 fxor %f6, %f2, %f2 314 315.align 16 316.L128: 317 brz,pt $tmp, .L128aligned 318 nop 319 320 ldd [$inp + 16], %f4 321 nop 322 faligndata %f0, %f2, %f0 323 faligndata %f2, %f4, %f2 324 325.L128aligned: 326 std %f0, [$out + 0] ! k[0, 1] 327 fsrc2 %f0, %f28 328 std %f2, [$out + 8] ! k[2, 3] 329 fsrc2 %f2, %f30 330 331.L128key: 332 mov %o7, %o5 3331: call .+8 334 add %o7, SIGMA-1b, %o4 335 mov %o5, %o7 336 337 ldd [%o4 + 0], %f16 338 ldd [%o4 + 8], %f18 339 ldd [%o4 + 16], %f20 340 ldd [%o4 + 24], %f22 341 342 camellia_f %f16, %f2, %f0, %f2 343 camellia_f %f18, %f0, %f2, %f0 344 fxor %f28, %f0, %f0 345 fxor %f30, %f2, %f2 346 camellia_f %f20, %f2, %f0, %f2 347 camellia_f %f22, %f0, %f2, %f0 348 349 bge,pn %icc, .L256key 350 nop 351 std %f0, [$out + 0x10] ! k[ 4, 5] 352 std %f2, [$out + 0x18] ! k[ 6, 7] 353 354 movdtox %f0, %o4 355 movdtox %f2, %o5 356 `&ROTL128(15)` 357 stx %o4, [$out + 0x30] ! k[12, 13] 358 stx %o5, [$out + 0x38] ! k[14, 15] 359 `&ROTL128(15)` 360 stx %o4, [$out + 0x40] ! k[16, 17] 361 stx %o5, [$out + 0x48] ! k[18, 19] 362 `&ROTL128(15)` 363 stx %o4, [$out + 0x60] ! k[24, 25] 364 `&ROTL128(15)` 365 stx %o4, [$out + 0x70] ! k[28, 29] 366 stx %o5, [$out + 0x78] ! k[30, 31] 367 `&ROTL128(34)` 368 stx %o4, [$out + 0xa0] ! k[40, 41] 369 stx %o5, [$out + 0xa8] ! k[42, 43] 370 `&ROTL128(17)` 371 stx %o4, [$out + 0xc0] ! k[48, 49] 372 stx %o5, [$out + 0xc8] ! k[50, 51] 373 374 movdtox %f28, %o4 ! k[ 0, 1] 375 movdtox %f30, %o5 ! k[ 2, 3] 376 `&ROTL128(15)` 377 stx %o4, [$out + 0x20] ! k[ 8, 9] 378 stx %o5, [$out + 0x28] ! k[10, 11] 379 `&ROTL128(30)` 380 stx %o4, [$out + 0x50] ! k[20, 21] 381 stx %o5, [$out + 0x58] ! k[22, 23] 382 `&ROTL128(15)` 383 stx %o5, [$out + 0x68] ! k[26, 27] 384 `&ROTL128(17)` 385 stx %o4, [$out + 0x80] ! k[32, 33] 386 stx %o5, [$out + 0x88] ! k[34, 35] 387 `&ROTL128(17)` 388 stx %o4, [$out + 0x90] ! k[36, 37] 389 stx %o5, [$out + 0x98] ! k[38, 39] 390 `&ROTL128(17)` 391 stx %o4, [$out + 0xb0] ! k[44, 45] 392 stx %o5, [$out + 0xb8] ! k[46, 47] 393 394 mov 3, $tmp 395 st $tmp, [$out + 0x110] 396 retl 397 xor %o0, %o0, %o0 398 399.align 16 400.L256key: 401 ldd [%o4 + 32], %f24 402 ldd [%o4 + 40], %f26 403 404 std %f0, [$out + 0x30] ! k[12, 13] 405 std %f2, [$out + 0x38] ! k[14, 15] 406 407 fxor %f4, %f0, %f0 408 fxor %f6, %f2, %f2 409 camellia_f %f24, %f2, %f0, %f2 410 camellia_f %f26, %f0, %f2, %f0 411 412 std %f0, [$out + 0x10] ! k[ 4, 5] 413 std %f2, [$out + 0x18] ! k[ 6, 7] 414 415 movdtox %f0, %o4 416 movdtox %f2, %o5 417 `&ROTL128(30)` 418 stx %o4, [$out + 0x50] ! k[20, 21] 419 stx %o5, [$out + 0x58] ! k[22, 23] 420 `&ROTL128(30)` 421 stx %o4, [$out + 0xa0] ! k[40, 41] 422 stx %o5, [$out + 0xa8] ! k[42, 43] 423 `&ROTL128(51)` 424 stx %o4, [$out + 0x100] ! k[64, 65] 425 stx %o5, [$out + 0x108] ! k[66, 67] 426 427 movdtox %f4, %o4 ! k[ 8, 9] 428 movdtox %f6, %o5 ! k[10, 11] 429 `&ROTL128(15)` 430 stx %o4, [$out + 0x20] ! k[ 8, 9] 431 stx %o5, [$out + 0x28] ! k[10, 11] 432 `&ROTL128(15)` 433 stx %o4, [$out + 0x40] ! k[16, 17] 434 stx %o5, [$out + 0x48] ! k[18, 19] 435 `&ROTL128(30)` 436 stx %o4, [$out + 0x90] ! k[36, 37] 437 stx %o5, [$out + 0x98] ! k[38, 39] 438 `&ROTL128(34)` 439 stx %o4, [$out + 0xd0] ! k[52, 53] 440 stx %o5, [$out + 0xd8] ! k[54, 55] 441 ldx [$out + 0x30], %o4 ! k[12, 13] 442 ldx [$out + 0x38], %o5 ! k[14, 15] 443 `&ROTL128(15)` 444 stx %o4, [$out + 0x30] ! k[12, 13] 445 stx %o5, [$out + 0x38] ! k[14, 15] 446 `&ROTL128(30)` 447 stx %o4, [$out + 0x70] ! k[28, 29] 448 stx %o5, [$out + 0x78] ! k[30, 31] 449 srlx %o4, 32, %g4 450 srlx %o5, 32, %g5 451 st %o4, [$out + 0xc0] ! k[48] 452 st %g5, [$out + 0xc4] ! k[49] 453 st %o5, [$out + 0xc8] ! k[50] 454 st %g4, [$out + 0xcc] ! k[51] 455 `&ROTL128(49)` 456 stx %o4, [$out + 0xe0] ! k[56, 57] 457 stx %o5, [$out + 0xe8] ! k[58, 59] 458 459 movdtox %f28, %o4 ! k[ 0, 1] 460 movdtox %f30, %o5 ! k[ 2, 3] 461 `&ROTL128(45)` 462 stx %o4, [$out + 0x60] ! k[24, 25] 463 stx %o5, [$out + 0x68] ! k[26, 27] 464 `&ROTL128(15)` 465 stx %o4, [$out + 0x80] ! k[32, 33] 466 stx %o5, [$out + 0x88] ! k[34, 35] 467 `&ROTL128(17)` 468 stx %o4, [$out + 0xb0] ! k[44, 45] 469 stx %o5, [$out + 0xb8] ! k[46, 47] 470 `&ROTL128(34)` 471 stx %o4, [$out + 0xf0] ! k[60, 61] 472 stx %o5, [$out + 0xf8] ! k[62, 63] 473 474 mov 4, $tmp 475 st $tmp, [$out + 0x110] 476 retl 477 xor %o0, %o0, %o0 478.type cmll_t4_set_key,#function 479.size cmll_t4_set_key,.-cmll_t4_set_key 480.align 32 481SIGMA: 482 .long 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2 483 .long 0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c 484 .long 0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd 485.type SIGMA,#object 486.size SIGMA,.-SIGMA 487.asciz "Camellia for SPARC T4, David S. Miller, Andy Polyakov" 488___ 489} 490 491{{{ 492my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5)); 493my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7)); 494 495$code.=<<___; 496.align 32 497_cmll128_load_enckey: 498 ldx [$key + 0], %g4 499 ldx [$key + 8], %g5 500___ 501for ($i=2; $i<26;$i++) { # load key schedule 502 $code.=<<___; 503 ldd [$key + `8*$i`], %f`12+2*$i` 504___ 505} 506$code.=<<___; 507 retl 508 nop 509.type _cmll128_load_enckey,#function 510.size _cmll128_load_enckey,.-_cmll128_load_enckey 511_cmll256_load_enckey=_cmll128_load_enckey 512 513.align 32 514_cmll256_load_deckey: 515 ldd [$key + 64], %f62 516 ldd [$key + 72], %f60 517 b .Load_deckey 518 add $key, 64, $key 519_cmll128_load_deckey: 520 ldd [$key + 0], %f60 521 ldd [$key + 8], %f62 522.Load_deckey: 523___ 524for ($i=2; $i<24;$i++) { # load key schedule 525 $code.=<<___; 526 ldd [$key + `8*$i`], %f`62-2*$i` 527___ 528} 529$code.=<<___; 530 ldx [$key + 192], %g4 531 retl 532 ldx [$key + 200], %g5 533.type _cmll256_load_deckey,#function 534.size _cmll256_load_deckey,.-_cmll256_load_deckey 535 536.align 32 537_cmll128_encrypt_1x: 538___ 539for ($i=0; $i<3; $i++) { 540 $code.=<<___; 541 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 542 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 543 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 544 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 545___ 546$code.=<<___ if ($i<2); 547 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 548 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 549 camellia_fl %f`16+16*$i+12`, %f0, %f0 550 camellia_fli %f`16+16*$i+14`, %f2, %f2 551___ 552} 553$code.=<<___; 554 camellia_f %f56, %f2, %f0, %f4 555 camellia_f %f58, %f0, %f4, %f2 556 fxor %f60, %f4, %f0 557 retl 558 fxor %f62, %f2, %f2 559.type _cmll128_encrypt_1x,#function 560.size _cmll128_encrypt_1x,.-_cmll128_encrypt_1x 561_cmll128_decrypt_1x=_cmll128_encrypt_1x 562 563.align 32 564_cmll128_encrypt_2x: 565___ 566for ($i=0; $i<3; $i++) { 567 $code.=<<___; 568 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 569 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6 570 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 571 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4 572 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 573 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6 574 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 575 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4 576___ 577$code.=<<___ if ($i<2); 578 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 579 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6 580 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 581 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4 582 camellia_fl %f`16+16*$i+12`, %f0, %f0 583 camellia_fl %f`16+16*$i+12`, %f4, %f4 584 camellia_fli %f`16+16*$i+14`, %f2, %f2 585 camellia_fli %f`16+16*$i+14`, %f6, %f6 586___ 587} 588$code.=<<___; 589 camellia_f %f56, %f2, %f0, %f8 590 camellia_f %f56, %f6, %f4, %f10 591 camellia_f %f58, %f0, %f8, %f2 592 camellia_f %f58, %f4, %f10, %f6 593 fxor %f60, %f8, %f0 594 fxor %f60, %f10, %f4 595 fxor %f62, %f2, %f2 596 retl 597 fxor %f62, %f6, %f6 598.type _cmll128_encrypt_2x,#function 599.size _cmll128_encrypt_2x,.-_cmll128_encrypt_2x 600_cmll128_decrypt_2x=_cmll128_encrypt_2x 601 602.align 32 603_cmll256_encrypt_1x: 604 camellia_f %f16, %f2, %f0, %f2 605 camellia_f %f18, %f0, %f2, %f0 606 ldd [$key + 208], %f16 607 ldd [$key + 216], %f18 608 camellia_f %f20, %f2, %f0, %f2 609 camellia_f %f22, %f0, %f2, %f0 610 ldd [$key + 224], %f20 611 ldd [$key + 232], %f22 612 camellia_f %f24, %f2, %f0, %f2 613 camellia_f %f26, %f0, %f2, %f0 614 ldd [$key + 240], %f24 615 ldd [$key + 248], %f26 616 camellia_fl %f28, %f0, %f0 617 camellia_fli %f30, %f2, %f2 618 ldd [$key + 256], %f28 619 ldd [$key + 264], %f30 620___ 621for ($i=1; $i<3; $i++) { 622 $code.=<<___; 623 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 624 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 625 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 626 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 627 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 628 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 629 camellia_fl %f`16+16*$i+12`, %f0, %f0 630 camellia_fli %f`16+16*$i+14`, %f2, %f2 631___ 632} 633$code.=<<___; 634 camellia_f %f16, %f2, %f0, %f2 635 camellia_f %f18, %f0, %f2, %f0 636 ldd [$key + 16], %f16 637 ldd [$key + 24], %f18 638 camellia_f %f20, %f2, %f0, %f2 639 camellia_f %f22, %f0, %f2, %f0 640 ldd [$key + 32], %f20 641 ldd [$key + 40], %f22 642 camellia_f %f24, %f2, %f0, %f4 643 camellia_f %f26, %f0, %f4, %f2 644 ldd [$key + 48], %f24 645 ldd [$key + 56], %f26 646 fxor %f28, %f4, %f0 647 fxor %f30, %f2, %f2 648 ldd [$key + 64], %f28 649 retl 650 ldd [$key + 72], %f30 651.type _cmll256_encrypt_1x,#function 652.size _cmll256_encrypt_1x,.-_cmll256_encrypt_1x 653 654.align 32 655_cmll256_encrypt_2x: 656 camellia_f %f16, %f2, %f0, %f2 657 camellia_f %f16, %f6, %f4, %f6 658 camellia_f %f18, %f0, %f2, %f0 659 camellia_f %f18, %f4, %f6, %f4 660 ldd [$key + 208], %f16 661 ldd [$key + 216], %f18 662 camellia_f %f20, %f2, %f0, %f2 663 camellia_f %f20, %f6, %f4, %f6 664 camellia_f %f22, %f0, %f2, %f0 665 camellia_f %f22, %f4, %f6, %f4 666 ldd [$key + 224], %f20 667 ldd [$key + 232], %f22 668 camellia_f %f24, %f2, %f0, %f2 669 camellia_f %f24, %f6, %f4, %f6 670 camellia_f %f26, %f0, %f2, %f0 671 camellia_f %f26, %f4, %f6, %f4 672 ldd [$key + 240], %f24 673 ldd [$key + 248], %f26 674 camellia_fl %f28, %f0, %f0 675 camellia_fl %f28, %f4, %f4 676 camellia_fli %f30, %f2, %f2 677 camellia_fli %f30, %f6, %f6 678 ldd [$key + 256], %f28 679 ldd [$key + 264], %f30 680___ 681for ($i=1; $i<3; $i++) { 682 $code.=<<___; 683 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 684 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6 685 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 686 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4 687 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 688 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6 689 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 690 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4 691 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 692 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6 693 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 694 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4 695 camellia_fl %f`16+16*$i+12`, %f0, %f0 696 camellia_fl %f`16+16*$i+12`, %f4, %f4 697 camellia_fli %f`16+16*$i+14`, %f2, %f2 698 camellia_fli %f`16+16*$i+14`, %f6, %f6 699___ 700} 701$code.=<<___; 702 camellia_f %f16, %f2, %f0, %f2 703 camellia_f %f16, %f6, %f4, %f6 704 camellia_f %f18, %f0, %f2, %f0 705 camellia_f %f18, %f4, %f6, %f4 706 ldd [$key + 16], %f16 707 ldd [$key + 24], %f18 708 camellia_f %f20, %f2, %f0, %f2 709 camellia_f %f20, %f6, %f4, %f6 710 camellia_f %f22, %f0, %f2, %f0 711 camellia_f %f22, %f4, %f6, %f4 712 ldd [$key + 32], %f20 713 ldd [$key + 40], %f22 714 camellia_f %f24, %f2, %f0, %f8 715 camellia_f %f24, %f6, %f4, %f10 716 camellia_f %f26, %f0, %f8, %f2 717 camellia_f %f26, %f4, %f10, %f6 718 ldd [$key + 48], %f24 719 ldd [$key + 56], %f26 720 fxor %f28, %f8, %f0 721 fxor %f28, %f10, %f4 722 fxor %f30, %f2, %f2 723 fxor %f30, %f6, %f6 724 ldd [$key + 64], %f28 725 retl 726 ldd [$key + 72], %f30 727.type _cmll256_encrypt_2x,#function 728.size _cmll256_encrypt_2x,.-_cmll256_encrypt_2x 729 730.align 32 731_cmll256_decrypt_1x: 732 camellia_f %f16, %f2, %f0, %f2 733 camellia_f %f18, %f0, %f2, %f0 734 ldd [$key - 8], %f16 735 ldd [$key - 16], %f18 736 camellia_f %f20, %f2, %f0, %f2 737 camellia_f %f22, %f0, %f2, %f0 738 ldd [$key - 24], %f20 739 ldd [$key - 32], %f22 740 camellia_f %f24, %f2, %f0, %f2 741 camellia_f %f26, %f0, %f2, %f0 742 ldd [$key - 40], %f24 743 ldd [$key - 48], %f26 744 camellia_fl %f28, %f0, %f0 745 camellia_fli %f30, %f2, %f2 746 ldd [$key - 56], %f28 747 ldd [$key - 64], %f30 748___ 749for ($i=1; $i<3; $i++) { 750 $code.=<<___; 751 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 752 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 753 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 754 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 755 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 756 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 757 camellia_fl %f`16+16*$i+12`, %f0, %f0 758 camellia_fli %f`16+16*$i+14`, %f2, %f2 759___ 760} 761$code.=<<___; 762 camellia_f %f16, %f2, %f0, %f2 763 camellia_f %f18, %f0, %f2, %f0 764 ldd [$key + 184], %f16 765 ldd [$key + 176], %f18 766 camellia_f %f20, %f2, %f0, %f2 767 camellia_f %f22, %f0, %f2, %f0 768 ldd [$key + 168], %f20 769 ldd [$key + 160], %f22 770 camellia_f %f24, %f2, %f0, %f4 771 camellia_f %f26, %f0, %f4, %f2 772 ldd [$key + 152], %f24 773 ldd [$key + 144], %f26 774 fxor %f30, %f4, %f0 775 fxor %f28, %f2, %f2 776 ldd [$key + 136], %f28 777 retl 778 ldd [$key + 128], %f30 779.type _cmll256_decrypt_1x,#function 780.size _cmll256_decrypt_1x,.-_cmll256_decrypt_1x 781 782.align 32 783_cmll256_decrypt_2x: 784 camellia_f %f16, %f2, %f0, %f2 785 camellia_f %f16, %f6, %f4, %f6 786 camellia_f %f18, %f0, %f2, %f0 787 camellia_f %f18, %f4, %f6, %f4 788 ldd [$key - 8], %f16 789 ldd [$key - 16], %f18 790 camellia_f %f20, %f2, %f0, %f2 791 camellia_f %f20, %f6, %f4, %f6 792 camellia_f %f22, %f0, %f2, %f0 793 camellia_f %f22, %f4, %f6, %f4 794 ldd [$key - 24], %f20 795 ldd [$key - 32], %f22 796 camellia_f %f24, %f2, %f0, %f2 797 camellia_f %f24, %f6, %f4, %f6 798 camellia_f %f26, %f0, %f2, %f0 799 camellia_f %f26, %f4, %f6, %f4 800 ldd [$key - 40], %f24 801 ldd [$key - 48], %f26 802 camellia_fl %f28, %f0, %f0 803 camellia_fl %f28, %f4, %f4 804 camellia_fli %f30, %f2, %f2 805 camellia_fli %f30, %f6, %f6 806 ldd [$key - 56], %f28 807 ldd [$key - 64], %f30 808___ 809for ($i=1; $i<3; $i++) { 810 $code.=<<___; 811 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 812 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6 813 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 814 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4 815 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 816 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6 817 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 818 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4 819 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 820 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6 821 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 822 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4 823 camellia_fl %f`16+16*$i+12`, %f0, %f0 824 camellia_fl %f`16+16*$i+12`, %f4, %f4 825 camellia_fli %f`16+16*$i+14`, %f2, %f2 826 camellia_fli %f`16+16*$i+14`, %f6, %f6 827___ 828} 829$code.=<<___; 830 camellia_f %f16, %f2, %f0, %f2 831 camellia_f %f16, %f6, %f4, %f6 832 camellia_f %f18, %f0, %f2, %f0 833 camellia_f %f18, %f4, %f6, %f4 834 ldd [$key + 184], %f16 835 ldd [$key + 176], %f18 836 camellia_f %f20, %f2, %f0, %f2 837 camellia_f %f20, %f6, %f4, %f6 838 camellia_f %f22, %f0, %f2, %f0 839 camellia_f %f22, %f4, %f6, %f4 840 ldd [$key + 168], %f20 841 ldd [$key + 160], %f22 842 camellia_f %f24, %f2, %f0, %f8 843 camellia_f %f24, %f6, %f4, %f10 844 camellia_f %f26, %f0, %f8, %f2 845 camellia_f %f26, %f4, %f10, %f6 846 ldd [$key + 152], %f24 847 ldd [$key + 144], %f26 848 fxor %f30, %f8, %f0 849 fxor %f30, %f10, %f4 850 fxor %f28, %f2, %f2 851 fxor %f28, %f6, %f6 852 ldd [$key + 136], %f28 853 retl 854 ldd [$key + 128], %f30 855.type _cmll256_decrypt_2x,#function 856.size _cmll256_decrypt_2x,.-_cmll256_decrypt_2x 857___ 858 859&alg_cbc_encrypt_implement("cmll",128); 860&alg_cbc_encrypt_implement("cmll",256); 861 862&alg_cbc_decrypt_implement("cmll",128); 863&alg_cbc_decrypt_implement("cmll",256); 864 865if ($::evp) { 866 &alg_ctr32_implement("cmll",128); 867 &alg_ctr32_implement("cmll",256); 868} 869}}} 870 871if (!$::evp) { 872$code.=<<___; 873.global Camellia_encrypt 874Camellia_encrypt=cmll_t4_encrypt 875.global Camellia_decrypt 876Camellia_decrypt=cmll_t4_decrypt 877.global Camellia_set_key 878.align 32 879Camellia_set_key: 880 andcc %o2, 7, %g0 ! double-check alignment 881 bnz,a,pn %icc, 1f 882 mov -1, %o0 883 brz,a,pn %o0, 1f 884 mov -1, %o0 885 brz,a,pn %o2, 1f 886 mov -1, %o0 887 andncc %o1, 0x1c0, %g0 888 bnz,a,pn %icc, 1f 889 mov -2, %o0 890 cmp %o1, 128 891 bl,a,pn %icc, 1f 892 mov -2, %o0 893 b cmll_t4_set_key 894 nop 8951: retl 896 nop 897.type Camellia_set_key,#function 898.size Camellia_set_key,.-Camellia_set_key 899___ 900 901my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5)); 902 903$code.=<<___; 904.globl Camellia_cbc_encrypt 905.align 32 906Camellia_cbc_encrypt: 907 ld [$key + 272], %g1 908 nop 909 brz $enc, .Lcbc_decrypt 910 cmp %g1, 3 911 912 be,pt %icc, cmll128_t4_cbc_encrypt 913 nop 914 ba cmll256_t4_cbc_encrypt 915 nop 916 917.Lcbc_decrypt: 918 be,pt %icc, cmll128_t4_cbc_decrypt 919 nop 920 ba cmll256_t4_cbc_decrypt 921 nop 922.type Camellia_cbc_encrypt,#function 923.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt 924___ 925} 926 927&emit_assembler(); 928 929close STDOUT; 930