1289848Sjkim#!/usr/bin/env perl 2289848Sjkim 3289848Sjkim# ==================================================================== 4289848Sjkim# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov 5289848Sjkim# <appro@openssl.org>. The module is licensed under 2-clause BSD 6289848Sjkim# license. March 2013. All rights reserved. 7289848Sjkim# ==================================================================== 8289848Sjkim 9289848Sjkim###################################################################### 10289848Sjkim# DES for SPARC T4. 11289848Sjkim# 12289848Sjkim# As with other hardware-assisted ciphers CBC encrypt results [for 13289848Sjkim# aligned data] are virtually identical to critical path lengths: 14289848Sjkim# 15289848Sjkim# DES Triple-DES 16289848Sjkim# CBC encrypt 4.14/4.15(*) 11.7/11.7 17289848Sjkim# CBC decrypt 1.77/4.11(**) 6.42/7.47 18289848Sjkim# 19289848Sjkim# (*) numbers after slash are for 20289848Sjkim# misaligned data; 21289848Sjkim# (**) this is result for largest 22289848Sjkim# block size, unlike all other 23289848Sjkim# cases smaller blocks results 24289848Sjkim# are better[?]; 25289848Sjkim 26289848Sjkim$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 27289848Sjkimpush(@INC,"${dir}","${dir}../../perlasm"); 28289848Sjkimrequire "sparcv9_modes.pl"; 29289848Sjkim 30289848Sjkim&asm_init(@ARGV); 31289848Sjkim 32289848Sjkim$code.=<<___ if ($::abibits==64); 33289848Sjkim.register %g2,#scratch 34289848Sjkim.register %g3,#scratch 35289848Sjkim___ 36289848Sjkim 37289848Sjkim$code.=<<___; 38289848Sjkim.text 39289848Sjkim___ 40289848Sjkim 41289848Sjkim{ my ($inp,$out)=("%o0","%o1"); 42289848Sjkim 43289848Sjkim$code.=<<___; 44289848Sjkim.align 32 45289848Sjkim.globl des_t4_key_expand 46289848Sjkim.type des_t4_key_expand,#function 47289848Sjkimdes_t4_key_expand: 48289848Sjkim andcc $inp, 0x7, %g0 49289848Sjkim alignaddr $inp, %g0, $inp 50289848Sjkim bz,pt %icc, 1f 51289848Sjkim ldd [$inp + 0x00], %f0 52289848Sjkim ldd [$inp + 0x08], %f2 53289848Sjkim faligndata %f0, %f2, %f0 54289848Sjkim1: des_kexpand %f0, 0, %f0 55289848Sjkim des_kexpand %f0, 1, %f2 56289848Sjkim std %f0, [$out + 0x00] 57289848Sjkim des_kexpand %f2, 3, %f6 58289848Sjkim std %f2, [$out + 0x08] 59289848Sjkim des_kexpand %f2, 2, %f4 60289848Sjkim des_kexpand %f6, 3, %f10 61289848Sjkim std %f6, [$out + 0x18] 62289848Sjkim des_kexpand %f6, 2, %f8 63289848Sjkim std %f4, [$out + 0x10] 64289848Sjkim des_kexpand %f10, 3, %f14 65289848Sjkim std %f10, [$out + 0x28] 66289848Sjkim des_kexpand %f10, 2, %f12 67289848Sjkim std %f8, [$out + 0x20] 68289848Sjkim des_kexpand %f14, 1, %f16 69289848Sjkim std %f14, [$out + 0x38] 70289848Sjkim des_kexpand %f16, 3, %f20 71289848Sjkim std %f12, [$out + 0x30] 72289848Sjkim des_kexpand %f16, 2, %f18 73289848Sjkim std %f16, [$out + 0x40] 74289848Sjkim des_kexpand %f20, 3, %f24 75289848Sjkim std %f20, [$out + 0x50] 76289848Sjkim des_kexpand %f20, 2, %f22 77289848Sjkim std %f18, [$out + 0x48] 78289848Sjkim des_kexpand %f24, 3, %f28 79289848Sjkim std %f24, [$out + 0x60] 80289848Sjkim des_kexpand %f24, 2, %f26 81289848Sjkim std %f22, [$out + 0x58] 82289848Sjkim des_kexpand %f28, 1, %f30 83289848Sjkim std %f28, [$out + 0x70] 84289848Sjkim std %f26, [$out + 0x68] 85289848Sjkim retl 86289848Sjkim std %f30, [$out + 0x78] 87289848Sjkim.size des_t4_key_expand,.-des_t4_key_expand 88289848Sjkim___ 89289848Sjkim} 90289848Sjkim{ my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4)); 91289848Sjkim my ($ileft,$iright,$omask) = map("%g$_",(1..3)); 92289848Sjkim 93289848Sjkim$code.=<<___; 94289848Sjkim.globl des_t4_cbc_encrypt 95289848Sjkim.align 32 96289848Sjkimdes_t4_cbc_encrypt: 97289848Sjkim cmp $len, 0 98289848Sjkim be,pn $::size_t_cc, .Lcbc_abort 99306198Sjkim srln $len, 0, $len ! needed on v8+, "nop" on v9 100289848Sjkim ld [$ivec + 0], %f0 ! load ivec 101289848Sjkim ld [$ivec + 4], %f1 102289848Sjkim 103289848Sjkim and $inp, 7, $ileft 104289848Sjkim andn $inp, 7, $inp 105289848Sjkim sll $ileft, 3, $ileft 106289848Sjkim mov 0xff, $omask 107289848Sjkim prefetch [$inp], 20 108289848Sjkim prefetch [$inp + 63], 20 109289848Sjkim sub %g0, $ileft, $iright 110289848Sjkim and $out, 7, %g4 111289848Sjkim alignaddrl $out, %g0, $out 112289848Sjkim srl $omask, %g4, $omask 113289848Sjkim srlx $len, 3, $len 114289848Sjkim movrz %g4, 0, $omask 115289848Sjkim prefetch [$out], 22 116289848Sjkim 117289848Sjkim ldd [$key + 0x00], %f4 ! load key schedule 118289848Sjkim ldd [$key + 0x08], %f6 119289848Sjkim ldd [$key + 0x10], %f8 120289848Sjkim ldd [$key + 0x18], %f10 121289848Sjkim ldd [$key + 0x20], %f12 122289848Sjkim ldd [$key + 0x28], %f14 123289848Sjkim ldd [$key + 0x30], %f16 124289848Sjkim ldd [$key + 0x38], %f18 125289848Sjkim ldd [$key + 0x40], %f20 126289848Sjkim ldd [$key + 0x48], %f22 127289848Sjkim ldd [$key + 0x50], %f24 128289848Sjkim ldd [$key + 0x58], %f26 129289848Sjkim ldd [$key + 0x60], %f28 130289848Sjkim ldd [$key + 0x68], %f30 131289848Sjkim ldd [$key + 0x70], %f32 132289848Sjkim ldd [$key + 0x78], %f34 133289848Sjkim 134289848Sjkim.Ldes_cbc_enc_loop: 135289848Sjkim ldx [$inp + 0], %g4 136289848Sjkim brz,pt $ileft, 4f 137289848Sjkim nop 138289848Sjkim 139289848Sjkim ldx [$inp + 8], %g5 140289848Sjkim sllx %g4, $ileft, %g4 141289848Sjkim srlx %g5, $iright, %g5 142289848Sjkim or %g5, %g4, %g4 143289848Sjkim4: 144289848Sjkim movxtod %g4, %f2 145289848Sjkim prefetch [$inp + 8+63], 20 146289848Sjkim add $inp, 8, $inp 147289848Sjkim fxor %f2, %f0, %f0 ! ^= ivec 148289848Sjkim prefetch [$out + 63], 22 149289848Sjkim 150289848Sjkim des_ip %f0, %f0 151289848Sjkim des_round %f4, %f6, %f0, %f0 152289848Sjkim des_round %f8, %f10, %f0, %f0 153289848Sjkim des_round %f12, %f14, %f0, %f0 154289848Sjkim des_round %f16, %f18, %f0, %f0 155289848Sjkim des_round %f20, %f22, %f0, %f0 156289848Sjkim des_round %f24, %f26, %f0, %f0 157289848Sjkim des_round %f28, %f30, %f0, %f0 158289848Sjkim des_round %f32, %f34, %f0, %f0 159289848Sjkim des_iip %f0, %f0 160289848Sjkim 161289848Sjkim brnz,pn $omask, 2f 162289848Sjkim sub $len, 1, $len 163289848Sjkim 164289848Sjkim std %f0, [$out + 0] 165289848Sjkim brnz,pt $len, .Ldes_cbc_enc_loop 166289848Sjkim add $out, 8, $out 167289848Sjkim 168289848Sjkim st %f0, [$ivec + 0] ! write out ivec 169289848Sjkim retl 170289848Sjkim st %f1, [$ivec + 4] 171289848Sjkim.Lcbc_abort: 172289848Sjkim retl 173289848Sjkim nop 174289848Sjkim 175289848Sjkim.align 16 176289848Sjkim2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard 177289848Sjkim ! and ~4x deterioration 178289848Sjkim ! in inp==out case 179289848Sjkim faligndata %f0, %f0, %f2 ! handle unaligned output 180289848Sjkim 181289848Sjkim stda %f2, [$out + $omask]0xc0 ! partial store 182289848Sjkim add $out, 8, $out 183289848Sjkim orn %g0, $omask, $omask 184289848Sjkim stda %f2, [$out + $omask]0xc0 ! partial store 185289848Sjkim 186289848Sjkim brnz,pt $len, .Ldes_cbc_enc_loop+4 187289848Sjkim orn %g0, $omask, $omask 188289848Sjkim 189289848Sjkim st %f0, [$ivec + 0] ! write out ivec 190289848Sjkim retl 191289848Sjkim st %f1, [$ivec + 4] 192289848Sjkim.type des_t4_cbc_encrypt,#function 193289848Sjkim.size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt 194289848Sjkim 195289848Sjkim.globl des_t4_cbc_decrypt 196289848Sjkim.align 32 197289848Sjkimdes_t4_cbc_decrypt: 198289848Sjkim cmp $len, 0 199289848Sjkim be,pn $::size_t_cc, .Lcbc_abort 200306198Sjkim srln $len, 0, $len ! needed on v8+, "nop" on v9 201289848Sjkim ld [$ivec + 0], %f2 ! load ivec 202289848Sjkim ld [$ivec + 4], %f3 203289848Sjkim 204289848Sjkim and $inp, 7, $ileft 205289848Sjkim andn $inp, 7, $inp 206289848Sjkim sll $ileft, 3, $ileft 207289848Sjkim mov 0xff, $omask 208289848Sjkim prefetch [$inp], 20 209289848Sjkim prefetch [$inp + 63], 20 210289848Sjkim sub %g0, $ileft, $iright 211289848Sjkim and $out, 7, %g4 212289848Sjkim alignaddrl $out, %g0, $out 213289848Sjkim srl $omask, %g4, $omask 214289848Sjkim srlx $len, 3, $len 215289848Sjkim movrz %g4, 0, $omask 216289848Sjkim prefetch [$out], 22 217289848Sjkim 218289848Sjkim ldd [$key + 0x78], %f4 ! load key schedule 219289848Sjkim ldd [$key + 0x70], %f6 220289848Sjkim ldd [$key + 0x68], %f8 221289848Sjkim ldd [$key + 0x60], %f10 222289848Sjkim ldd [$key + 0x58], %f12 223289848Sjkim ldd [$key + 0x50], %f14 224289848Sjkim ldd [$key + 0x48], %f16 225289848Sjkim ldd [$key + 0x40], %f18 226289848Sjkim ldd [$key + 0x38], %f20 227289848Sjkim ldd [$key + 0x30], %f22 228289848Sjkim ldd [$key + 0x28], %f24 229289848Sjkim ldd [$key + 0x20], %f26 230289848Sjkim ldd [$key + 0x18], %f28 231289848Sjkim ldd [$key + 0x10], %f30 232289848Sjkim ldd [$key + 0x08], %f32 233289848Sjkim ldd [$key + 0x00], %f34 234289848Sjkim 235289848Sjkim.Ldes_cbc_dec_loop: 236289848Sjkim ldx [$inp + 0], %g4 237289848Sjkim brz,pt $ileft, 4f 238289848Sjkim nop 239289848Sjkim 240289848Sjkim ldx [$inp + 8], %g5 241289848Sjkim sllx %g4, $ileft, %g4 242289848Sjkim srlx %g5, $iright, %g5 243289848Sjkim or %g5, %g4, %g4 244289848Sjkim4: 245289848Sjkim movxtod %g4, %f0 246289848Sjkim prefetch [$inp + 8+63], 20 247289848Sjkim add $inp, 8, $inp 248289848Sjkim prefetch [$out + 63], 22 249289848Sjkim 250289848Sjkim des_ip %f0, %f0 251289848Sjkim des_round %f4, %f6, %f0, %f0 252289848Sjkim des_round %f8, %f10, %f0, %f0 253289848Sjkim des_round %f12, %f14, %f0, %f0 254289848Sjkim des_round %f16, %f18, %f0, %f0 255289848Sjkim des_round %f20, %f22, %f0, %f0 256289848Sjkim des_round %f24, %f26, %f0, %f0 257289848Sjkim des_round %f28, %f30, %f0, %f0 258289848Sjkim des_round %f32, %f34, %f0, %f0 259289848Sjkim des_iip %f0, %f0 260289848Sjkim 261289848Sjkim fxor %f2, %f0, %f0 ! ^= ivec 262289848Sjkim movxtod %g4, %f2 263289848Sjkim 264289848Sjkim brnz,pn $omask, 2f 265289848Sjkim sub $len, 1, $len 266289848Sjkim 267289848Sjkim std %f0, [$out + 0] 268289848Sjkim brnz,pt $len, .Ldes_cbc_dec_loop 269289848Sjkim add $out, 8, $out 270289848Sjkim 271289848Sjkim st %f2, [$ivec + 0] ! write out ivec 272289848Sjkim retl 273289848Sjkim st %f3, [$ivec + 4] 274289848Sjkim 275289848Sjkim.align 16 276289848Sjkim2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard 277289848Sjkim ! and ~4x deterioration 278289848Sjkim ! in inp==out case 279289848Sjkim faligndata %f0, %f0, %f0 ! handle unaligned output 280289848Sjkim 281289848Sjkim stda %f0, [$out + $omask]0xc0 ! partial store 282289848Sjkim add $out, 8, $out 283289848Sjkim orn %g0, $omask, $omask 284289848Sjkim stda %f0, [$out + $omask]0xc0 ! partial store 285289848Sjkim 286289848Sjkim brnz,pt $len, .Ldes_cbc_dec_loop+4 287289848Sjkim orn %g0, $omask, $omask 288289848Sjkim 289289848Sjkim st %f2, [$ivec + 0] ! write out ivec 290289848Sjkim retl 291289848Sjkim st %f3, [$ivec + 4] 292289848Sjkim.type des_t4_cbc_decrypt,#function 293289848Sjkim.size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt 294289848Sjkim___ 295289848Sjkim 296289848Sjkim# One might wonder why does one have back-to-back des_iip/des_ip 297289848Sjkim# pairs between EDE passes. Indeed, aren't they inverse of each other? 298289848Sjkim# They almost are. Outcome of the pair is 32-bit words being swapped 299289848Sjkim# in target register. Consider pair of des_iip/des_ip as a way to 300289848Sjkim# perform the due swap, it's actually fastest way in this case. 301289848Sjkim 302289848Sjkim$code.=<<___; 303289848Sjkim.globl des_t4_ede3_cbc_encrypt 304289848Sjkim.align 32 305289848Sjkimdes_t4_ede3_cbc_encrypt: 306289848Sjkim cmp $len, 0 307289848Sjkim be,pn $::size_t_cc, .Lcbc_abort 308306198Sjkim srln $len, 0, $len ! needed on v8+, "nop" on v9 309289848Sjkim ld [$ivec + 0], %f0 ! load ivec 310289848Sjkim ld [$ivec + 4], %f1 311289848Sjkim 312289848Sjkim and $inp, 7, $ileft 313289848Sjkim andn $inp, 7, $inp 314289848Sjkim sll $ileft, 3, $ileft 315289848Sjkim mov 0xff, $omask 316289848Sjkim prefetch [$inp], 20 317289848Sjkim prefetch [$inp + 63], 20 318289848Sjkim sub %g0, $ileft, $iright 319289848Sjkim and $out, 7, %g4 320289848Sjkim alignaddrl $out, %g0, $out 321289848Sjkim srl $omask, %g4, $omask 322289848Sjkim srlx $len, 3, $len 323289848Sjkim movrz %g4, 0, $omask 324289848Sjkim prefetch [$out], 22 325289848Sjkim 326289848Sjkim ldd [$key + 0x00], %f4 ! load key schedule 327289848Sjkim ldd [$key + 0x08], %f6 328289848Sjkim ldd [$key + 0x10], %f8 329289848Sjkim ldd [$key + 0x18], %f10 330289848Sjkim ldd [$key + 0x20], %f12 331289848Sjkim ldd [$key + 0x28], %f14 332289848Sjkim ldd [$key + 0x30], %f16 333289848Sjkim ldd [$key + 0x38], %f18 334289848Sjkim ldd [$key + 0x40], %f20 335289848Sjkim ldd [$key + 0x48], %f22 336289848Sjkim ldd [$key + 0x50], %f24 337289848Sjkim ldd [$key + 0x58], %f26 338289848Sjkim ldd [$key + 0x60], %f28 339289848Sjkim ldd [$key + 0x68], %f30 340289848Sjkim ldd [$key + 0x70], %f32 341289848Sjkim ldd [$key + 0x78], %f34 342289848Sjkim 343289848Sjkim.Ldes_ede3_cbc_enc_loop: 344289848Sjkim ldx [$inp + 0], %g4 345289848Sjkim brz,pt $ileft, 4f 346289848Sjkim nop 347289848Sjkim 348289848Sjkim ldx [$inp + 8], %g5 349289848Sjkim sllx %g4, $ileft, %g4 350289848Sjkim srlx %g5, $iright, %g5 351289848Sjkim or %g5, %g4, %g4 352289848Sjkim4: 353289848Sjkim movxtod %g4, %f2 354289848Sjkim prefetch [$inp + 8+63], 20 355289848Sjkim add $inp, 8, $inp 356289848Sjkim fxor %f2, %f0, %f0 ! ^= ivec 357289848Sjkim prefetch [$out + 63], 22 358289848Sjkim 359289848Sjkim des_ip %f0, %f0 360289848Sjkim des_round %f4, %f6, %f0, %f0 361289848Sjkim des_round %f8, %f10, %f0, %f0 362289848Sjkim des_round %f12, %f14, %f0, %f0 363289848Sjkim des_round %f16, %f18, %f0, %f0 364289848Sjkim ldd [$key + 0x100-0x08], %f36 365289848Sjkim ldd [$key + 0x100-0x10], %f38 366289848Sjkim des_round %f20, %f22, %f0, %f0 367289848Sjkim ldd [$key + 0x100-0x18], %f40 368289848Sjkim ldd [$key + 0x100-0x20], %f42 369289848Sjkim des_round %f24, %f26, %f0, %f0 370289848Sjkim ldd [$key + 0x100-0x28], %f44 371289848Sjkim ldd [$key + 0x100-0x30], %f46 372289848Sjkim des_round %f28, %f30, %f0, %f0 373289848Sjkim ldd [$key + 0x100-0x38], %f48 374289848Sjkim ldd [$key + 0x100-0x40], %f50 375289848Sjkim des_round %f32, %f34, %f0, %f0 376289848Sjkim ldd [$key + 0x100-0x48], %f52 377289848Sjkim ldd [$key + 0x100-0x50], %f54 378289848Sjkim des_iip %f0, %f0 379289848Sjkim 380289848Sjkim ldd [$key + 0x100-0x58], %f56 381289848Sjkim ldd [$key + 0x100-0x60], %f58 382289848Sjkim des_ip %f0, %f0 383289848Sjkim ldd [$key + 0x100-0x68], %f60 384289848Sjkim ldd [$key + 0x100-0x70], %f62 385289848Sjkim des_round %f36, %f38, %f0, %f0 386289848Sjkim ldd [$key + 0x100-0x78], %f36 387289848Sjkim ldd [$key + 0x100-0x80], %f38 388289848Sjkim des_round %f40, %f42, %f0, %f0 389289848Sjkim des_round %f44, %f46, %f0, %f0 390289848Sjkim des_round %f48, %f50, %f0, %f0 391289848Sjkim ldd [$key + 0x100+0x00], %f40 392289848Sjkim ldd [$key + 0x100+0x08], %f42 393289848Sjkim des_round %f52, %f54, %f0, %f0 394289848Sjkim ldd [$key + 0x100+0x10], %f44 395289848Sjkim ldd [$key + 0x100+0x18], %f46 396289848Sjkim des_round %f56, %f58, %f0, %f0 397289848Sjkim ldd [$key + 0x100+0x20], %f48 398289848Sjkim ldd [$key + 0x100+0x28], %f50 399289848Sjkim des_round %f60, %f62, %f0, %f0 400289848Sjkim ldd [$key + 0x100+0x30], %f52 401289848Sjkim ldd [$key + 0x100+0x38], %f54 402289848Sjkim des_round %f36, %f38, %f0, %f0 403289848Sjkim ldd [$key + 0x100+0x40], %f56 404289848Sjkim ldd [$key + 0x100+0x48], %f58 405289848Sjkim des_iip %f0, %f0 406289848Sjkim 407289848Sjkim ldd [$key + 0x100+0x50], %f60 408289848Sjkim ldd [$key + 0x100+0x58], %f62 409289848Sjkim des_ip %f0, %f0 410289848Sjkim ldd [$key + 0x100+0x60], %f36 411289848Sjkim ldd [$key + 0x100+0x68], %f38 412289848Sjkim des_round %f40, %f42, %f0, %f0 413289848Sjkim ldd [$key + 0x100+0x70], %f40 414289848Sjkim ldd [$key + 0x100+0x78], %f42 415289848Sjkim des_round %f44, %f46, %f0, %f0 416289848Sjkim des_round %f48, %f50, %f0, %f0 417289848Sjkim des_round %f52, %f54, %f0, %f0 418289848Sjkim des_round %f56, %f58, %f0, %f0 419289848Sjkim des_round %f60, %f62, %f0, %f0 420289848Sjkim des_round %f36, %f38, %f0, %f0 421289848Sjkim des_round %f40, %f42, %f0, %f0 422289848Sjkim des_iip %f0, %f0 423289848Sjkim 424289848Sjkim brnz,pn $omask, 2f 425289848Sjkim sub $len, 1, $len 426289848Sjkim 427289848Sjkim std %f0, [$out + 0] 428289848Sjkim brnz,pt $len, .Ldes_ede3_cbc_enc_loop 429289848Sjkim add $out, 8, $out 430289848Sjkim 431289848Sjkim st %f0, [$ivec + 0] ! write out ivec 432289848Sjkim retl 433289848Sjkim st %f1, [$ivec + 4] 434289848Sjkim 435289848Sjkim.align 16 436289848Sjkim2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard 437289848Sjkim ! and ~2x deterioration 438289848Sjkim ! in inp==out case 439289848Sjkim faligndata %f0, %f0, %f2 ! handle unaligned output 440289848Sjkim 441289848Sjkim stda %f2, [$out + $omask]0xc0 ! partial store 442289848Sjkim add $out, 8, $out 443289848Sjkim orn %g0, $omask, $omask 444289848Sjkim stda %f2, [$out + $omask]0xc0 ! partial store 445289848Sjkim 446289848Sjkim brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4 447289848Sjkim orn %g0, $omask, $omask 448289848Sjkim 449289848Sjkim st %f0, [$ivec + 0] ! write out ivec 450289848Sjkim retl 451289848Sjkim st %f1, [$ivec + 4] 452289848Sjkim.type des_t4_ede3_cbc_encrypt,#function 453289848Sjkim.size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt 454289848Sjkim 455289848Sjkim.globl des_t4_ede3_cbc_decrypt 456289848Sjkim.align 32 457289848Sjkimdes_t4_ede3_cbc_decrypt: 458289848Sjkim cmp $len, 0 459289848Sjkim be,pn $::size_t_cc, .Lcbc_abort 460306198Sjkim srln $len, 0, $len ! needed on v8+, "nop" on v9 461289848Sjkim ld [$ivec + 0], %f2 ! load ivec 462289848Sjkim ld [$ivec + 4], %f3 463289848Sjkim 464289848Sjkim and $inp, 7, $ileft 465289848Sjkim andn $inp, 7, $inp 466289848Sjkim sll $ileft, 3, $ileft 467289848Sjkim mov 0xff, $omask 468289848Sjkim prefetch [$inp], 20 469289848Sjkim prefetch [$inp + 63], 20 470289848Sjkim sub %g0, $ileft, $iright 471289848Sjkim and $out, 7, %g4 472289848Sjkim alignaddrl $out, %g0, $out 473289848Sjkim srl $omask, %g4, $omask 474289848Sjkim srlx $len, 3, $len 475289848Sjkim movrz %g4, 0, $omask 476289848Sjkim prefetch [$out], 22 477289848Sjkim 478289848Sjkim ldd [$key + 0x100+0x78], %f4 ! load key schedule 479289848Sjkim ldd [$key + 0x100+0x70], %f6 480289848Sjkim ldd [$key + 0x100+0x68], %f8 481289848Sjkim ldd [$key + 0x100+0x60], %f10 482289848Sjkim ldd [$key + 0x100+0x58], %f12 483289848Sjkim ldd [$key + 0x100+0x50], %f14 484289848Sjkim ldd [$key + 0x100+0x48], %f16 485289848Sjkim ldd [$key + 0x100+0x40], %f18 486289848Sjkim ldd [$key + 0x100+0x38], %f20 487289848Sjkim ldd [$key + 0x100+0x30], %f22 488289848Sjkim ldd [$key + 0x100+0x28], %f24 489289848Sjkim ldd [$key + 0x100+0x20], %f26 490289848Sjkim ldd [$key + 0x100+0x18], %f28 491289848Sjkim ldd [$key + 0x100+0x10], %f30 492289848Sjkim ldd [$key + 0x100+0x08], %f32 493289848Sjkim ldd [$key + 0x100+0x00], %f34 494289848Sjkim 495289848Sjkim.Ldes_ede3_cbc_dec_loop: 496289848Sjkim ldx [$inp + 0], %g4 497289848Sjkim brz,pt $ileft, 4f 498289848Sjkim nop 499289848Sjkim 500289848Sjkim ldx [$inp + 8], %g5 501289848Sjkim sllx %g4, $ileft, %g4 502289848Sjkim srlx %g5, $iright, %g5 503289848Sjkim or %g5, %g4, %g4 504289848Sjkim4: 505289848Sjkim movxtod %g4, %f0 506289848Sjkim prefetch [$inp + 8+63], 20 507289848Sjkim add $inp, 8, $inp 508289848Sjkim prefetch [$out + 63], 22 509289848Sjkim 510289848Sjkim des_ip %f0, %f0 511289848Sjkim des_round %f4, %f6, %f0, %f0 512289848Sjkim des_round %f8, %f10, %f0, %f0 513289848Sjkim des_round %f12, %f14, %f0, %f0 514289848Sjkim des_round %f16, %f18, %f0, %f0 515289848Sjkim ldd [$key + 0x80+0x00], %f36 516289848Sjkim ldd [$key + 0x80+0x08], %f38 517289848Sjkim des_round %f20, %f22, %f0, %f0 518289848Sjkim ldd [$key + 0x80+0x10], %f40 519289848Sjkim ldd [$key + 0x80+0x18], %f42 520289848Sjkim des_round %f24, %f26, %f0, %f0 521289848Sjkim ldd [$key + 0x80+0x20], %f44 522289848Sjkim ldd [$key + 0x80+0x28], %f46 523289848Sjkim des_round %f28, %f30, %f0, %f0 524289848Sjkim ldd [$key + 0x80+0x30], %f48 525289848Sjkim ldd [$key + 0x80+0x38], %f50 526289848Sjkim des_round %f32, %f34, %f0, %f0 527289848Sjkim ldd [$key + 0x80+0x40], %f52 528289848Sjkim ldd [$key + 0x80+0x48], %f54 529289848Sjkim des_iip %f0, %f0 530289848Sjkim 531289848Sjkim ldd [$key + 0x80+0x50], %f56 532289848Sjkim ldd [$key + 0x80+0x58], %f58 533289848Sjkim des_ip %f0, %f0 534289848Sjkim ldd [$key + 0x80+0x60], %f60 535289848Sjkim ldd [$key + 0x80+0x68], %f62 536289848Sjkim des_round %f36, %f38, %f0, %f0 537289848Sjkim ldd [$key + 0x80+0x70], %f36 538289848Sjkim ldd [$key + 0x80+0x78], %f38 539289848Sjkim des_round %f40, %f42, %f0, %f0 540289848Sjkim des_round %f44, %f46, %f0, %f0 541289848Sjkim des_round %f48, %f50, %f0, %f0 542289848Sjkim ldd [$key + 0x80-0x08], %f40 543289848Sjkim ldd [$key + 0x80-0x10], %f42 544289848Sjkim des_round %f52, %f54, %f0, %f0 545289848Sjkim ldd [$key + 0x80-0x18], %f44 546289848Sjkim ldd [$key + 0x80-0x20], %f46 547289848Sjkim des_round %f56, %f58, %f0, %f0 548289848Sjkim ldd [$key + 0x80-0x28], %f48 549289848Sjkim ldd [$key + 0x80-0x30], %f50 550289848Sjkim des_round %f60, %f62, %f0, %f0 551289848Sjkim ldd [$key + 0x80-0x38], %f52 552289848Sjkim ldd [$key + 0x80-0x40], %f54 553289848Sjkim des_round %f36, %f38, %f0, %f0 554289848Sjkim ldd [$key + 0x80-0x48], %f56 555289848Sjkim ldd [$key + 0x80-0x50], %f58 556289848Sjkim des_iip %f0, %f0 557289848Sjkim 558289848Sjkim ldd [$key + 0x80-0x58], %f60 559289848Sjkim ldd [$key + 0x80-0x60], %f62 560289848Sjkim des_ip %f0, %f0 561289848Sjkim ldd [$key + 0x80-0x68], %f36 562289848Sjkim ldd [$key + 0x80-0x70], %f38 563289848Sjkim des_round %f40, %f42, %f0, %f0 564289848Sjkim ldd [$key + 0x80-0x78], %f40 565289848Sjkim ldd [$key + 0x80-0x80], %f42 566289848Sjkim des_round %f44, %f46, %f0, %f0 567289848Sjkim des_round %f48, %f50, %f0, %f0 568289848Sjkim des_round %f52, %f54, %f0, %f0 569289848Sjkim des_round %f56, %f58, %f0, %f0 570289848Sjkim des_round %f60, %f62, %f0, %f0 571289848Sjkim des_round %f36, %f38, %f0, %f0 572289848Sjkim des_round %f40, %f42, %f0, %f0 573289848Sjkim des_iip %f0, %f0 574289848Sjkim 575289848Sjkim fxor %f2, %f0, %f0 ! ^= ivec 576289848Sjkim movxtod %g4, %f2 577289848Sjkim 578289848Sjkim brnz,pn $omask, 2f 579289848Sjkim sub $len, 1, $len 580289848Sjkim 581289848Sjkim std %f0, [$out + 0] 582289848Sjkim brnz,pt $len, .Ldes_ede3_cbc_dec_loop 583289848Sjkim add $out, 8, $out 584289848Sjkim 585289848Sjkim st %f2, [$ivec + 0] ! write out ivec 586289848Sjkim retl 587289848Sjkim st %f3, [$ivec + 4] 588289848Sjkim 589289848Sjkim.align 16 590289848Sjkim2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard 591289848Sjkim ! and ~3x deterioration 592289848Sjkim ! in inp==out case 593289848Sjkim faligndata %f0, %f0, %f0 ! handle unaligned output 594289848Sjkim 595289848Sjkim stda %f0, [$out + $omask]0xc0 ! partial store 596289848Sjkim add $out, 8, $out 597289848Sjkim orn %g0, $omask, $omask 598289848Sjkim stda %f0, [$out + $omask]0xc0 ! partial store 599289848Sjkim 600289848Sjkim brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4 601289848Sjkim orn %g0, $omask, $omask 602289848Sjkim 603289848Sjkim st %f2, [$ivec + 0] ! write out ivec 604289848Sjkim retl 605289848Sjkim st %f3, [$ivec + 4] 606289848Sjkim.type des_t4_ede3_cbc_decrypt,#function 607289848Sjkim.size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt 608289848Sjkim___ 609289848Sjkim} 610289848Sjkim$code.=<<___; 611289848Sjkim.asciz "DES for SPARC T4, David S. Miller, Andy Polyakov" 612289848Sjkim.align 4 613289848Sjkim___ 614289848Sjkim 615289848Sjkim&emit_assembler(); 616289848Sjkim 617289848Sjkimclose STDOUT; 618