1#!/usr/bin/env perl 2# 3# ==================================================================== 4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9# 10# 11# AES-NI-CTR+GHASH stitch. 12# 13# February 2013 14# 15# OpenSSL GCM implementation is organized in such way that its 16# performance is rather close to the sum of its streamed components, 17# in the context parallelized AES-NI CTR and modulo-scheduled 18# PCLMULQDQ-enabled GHASH. Unfortunately, as no stitch implementation 19# was observed to perform significantly better than the sum of the 20# components on contemporary CPUs, the effort was deemed impossible to 21# justify. This module is based on combination of Intel submissions, 22# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max 23# Locktyukhin of Intel Corp. who verified that it reduces shuffles 24# pressure with notable relative improvement, achieving 1.0 cycle per 25# byte processed with 128-bit key on Haswell processor, and 0.74 - 26# on Broadwell. [Mentioned results are raw profiled measurements for 27# favourable packet size, one divisible by 96. Applications using the 28# EVP interface will observe a few percent worse performance.] 29# 30# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest 31# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf 32 33$flavour = shift; 34$output = shift; 35if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 36 37$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 38 39$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 40( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 41( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 42die "can't locate x86_64-xlate.pl"; 43 44if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` 45 =~ /GNU assembler version ([2-9]\.[0-9]+)/) { 46 $avx = ($1>=2.19) + ($1>=2.22); 47} 48 49if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && 50 `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { 51 $avx = ($1>=2.09) + ($1>=2.10); 52} 53 54if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && 55 `ml64 2>&1` =~ /Version ([0-9]+)\./) { 56 $avx = ($1>=10) + ($1>=11); 57} 58 59if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) { 60 $avx = ($2>=3.0) + ($2>3.0); 61} 62 63open OUT,"| \"$^X\" $xlate $flavour $output"; 64*STDOUT=*OUT; 65 66if ($avx>1) {{{ 67 68($inp,$out,$len,$key,$ivp,$Xip)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9"); 69 70($Ii,$T1,$T2,$Hkey, 71 $Z0,$Z1,$Z2,$Z3,$Xi) = map("%xmm$_",(0..8)); 72 73($inout0,$inout1,$inout2,$inout3,$inout4,$inout5,$rndkey) = map("%xmm$_",(9..15)); 74 75($counter,$rounds,$ret,$const,$in0,$end0)=("%ebx","%ebp","%r10","%r11","%r14","%r15"); 76 77$code=<<___; 78.text 79 80.type _aesni_ctr32_ghash_6x,\@abi-omnipotent 81.align 32 82_aesni_ctr32_ghash_6x: 83 vmovdqu 0x20($const),$T2 # borrow $T2, .Lone_msb 84 sub \$6,$len 85 vpxor $Z0,$Z0,$Z0 # $Z0 = 0 86 vmovdqu 0x00-0x80($key),$rndkey 87 vpaddb $T2,$T1,$inout1 88 vpaddb $T2,$inout1,$inout2 89 vpaddb $T2,$inout2,$inout3 90 vpaddb $T2,$inout3,$inout4 91 vpaddb $T2,$inout4,$inout5 92 vpxor $rndkey,$T1,$inout0 93 vmovdqu $Z0,16+8(%rsp) # "$Z3" = 0 94 jmp .Loop6x 95 96.align 32 97.Loop6x: 98 add \$`6<<24`,$counter 99 jc .Lhandle_ctr32 # discard $inout[1-5]? 100 vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 101 vpaddb $T2,$inout5,$T1 # next counter value 102 vpxor $rndkey,$inout1,$inout1 103 vpxor $rndkey,$inout2,$inout2 104 105.Lresume_ctr32: 106 vmovdqu $T1,($ivp) # save next counter value 107 vpclmulqdq \$0x10,$Hkey,$Z3,$Z1 108 vpxor $rndkey,$inout3,$inout3 109 vmovups 0x10-0x80($key),$T2 # borrow $T2 for $rndkey 110 vpclmulqdq \$0x01,$Hkey,$Z3,$Z2 111 xor %r12,%r12 112 cmp $in0,$end0 113 114 vaesenc $T2,$inout0,$inout0 115 vmovdqu 0x30+8(%rsp),$Ii # I[4] 116 vpxor $rndkey,$inout4,$inout4 117 vpclmulqdq \$0x00,$Hkey,$Z3,$T1 118 vaesenc $T2,$inout1,$inout1 119 vpxor $rndkey,$inout5,$inout5 120 setnc %r12b 121 vpclmulqdq \$0x11,$Hkey,$Z3,$Z3 122 vaesenc $T2,$inout2,$inout2 123 vmovdqu 0x10-0x20($Xip),$Hkey # $Hkey^2 124 neg %r12 125 vaesenc $T2,$inout3,$inout3 126 vpxor $Z1,$Z2,$Z2 127 vpclmulqdq \$0x00,$Hkey,$Ii,$Z1 128 vpxor $Z0,$Xi,$Xi # modulo-scheduled 129 vaesenc $T2,$inout4,$inout4 130 vpxor $Z1,$T1,$Z0 131 and \$0x60,%r12 132 vmovups 0x20-0x80($key),$rndkey 133 vpclmulqdq \$0x10,$Hkey,$Ii,$T1 134 vaesenc $T2,$inout5,$inout5 135 136 vpclmulqdq \$0x01,$Hkey,$Ii,$T2 137 lea ($in0,%r12),$in0 138 vaesenc $rndkey,$inout0,$inout0 139 vpxor 16+8(%rsp),$Xi,$Xi # modulo-scheduled [vpxor $Z3,$Xi,$Xi] 140 vpclmulqdq \$0x11,$Hkey,$Ii,$Hkey 141 vmovdqu 0x40+8(%rsp),$Ii # I[3] 142 vaesenc $rndkey,$inout1,$inout1 143 movbe 0x58($in0),%r13 144 vaesenc $rndkey,$inout2,$inout2 145 movbe 0x50($in0),%r12 146 vaesenc $rndkey,$inout3,$inout3 147 mov %r13,0x20+8(%rsp) 148 vaesenc $rndkey,$inout4,$inout4 149 mov %r12,0x28+8(%rsp) 150 vmovdqu 0x30-0x20($Xip),$Z1 # borrow $Z1 for $Hkey^3 151 vaesenc $rndkey,$inout5,$inout5 152 153 vmovups 0x30-0x80($key),$rndkey 154 vpxor $T1,$Z2,$Z2 155 vpclmulqdq \$0x00,$Z1,$Ii,$T1 156 vaesenc $rndkey,$inout0,$inout0 157 vpxor $T2,$Z2,$Z2 158 vpclmulqdq \$0x10,$Z1,$Ii,$T2 159 vaesenc $rndkey,$inout1,$inout1 160 vpxor $Hkey,$Z3,$Z3 161 vpclmulqdq \$0x01,$Z1,$Ii,$Hkey 162 vaesenc $rndkey,$inout2,$inout2 163 vpclmulqdq \$0x11,$Z1,$Ii,$Z1 164 vmovdqu 0x50+8(%rsp),$Ii # I[2] 165 vaesenc $rndkey,$inout3,$inout3 166 vaesenc $rndkey,$inout4,$inout4 167 vpxor $T1,$Z0,$Z0 168 vmovdqu 0x40-0x20($Xip),$T1 # borrow $T1 for $Hkey^4 169 vaesenc $rndkey,$inout5,$inout5 170 171 vmovups 0x40-0x80($key),$rndkey 172 vpxor $T2,$Z2,$Z2 173 vpclmulqdq \$0x00,$T1,$Ii,$T2 174 vaesenc $rndkey,$inout0,$inout0 175 vpxor $Hkey,$Z2,$Z2 176 vpclmulqdq \$0x10,$T1,$Ii,$Hkey 177 vaesenc $rndkey,$inout1,$inout1 178 movbe 0x48($in0),%r13 179 vpxor $Z1,$Z3,$Z3 180 vpclmulqdq \$0x01,$T1,$Ii,$Z1 181 vaesenc $rndkey,$inout2,$inout2 182 movbe 0x40($in0),%r12 183 vpclmulqdq \$0x11,$T1,$Ii,$T1 184 vmovdqu 0x60+8(%rsp),$Ii # I[1] 185 vaesenc $rndkey,$inout3,$inout3 186 mov %r13,0x30+8(%rsp) 187 vaesenc $rndkey,$inout4,$inout4 188 mov %r12,0x38+8(%rsp) 189 vpxor $T2,$Z0,$Z0 190 vmovdqu 0x60-0x20($Xip),$T2 # borrow $T2 for $Hkey^5 191 vaesenc $rndkey,$inout5,$inout5 192 193 vmovups 0x50-0x80($key),$rndkey 194 vpxor $Hkey,$Z2,$Z2 195 vpclmulqdq \$0x00,$T2,$Ii,$Hkey 196 vaesenc $rndkey,$inout0,$inout0 197 vpxor $Z1,$Z2,$Z2 198 vpclmulqdq \$0x10,$T2,$Ii,$Z1 199 vaesenc $rndkey,$inout1,$inout1 200 movbe 0x38($in0),%r13 201 vpxor $T1,$Z3,$Z3 202 vpclmulqdq \$0x01,$T2,$Ii,$T1 203 vpxor 0x70+8(%rsp),$Xi,$Xi # accumulate I[0] 204 vaesenc $rndkey,$inout2,$inout2 205 movbe 0x30($in0),%r12 206 vpclmulqdq \$0x11,$T2,$Ii,$T2 207 vaesenc $rndkey,$inout3,$inout3 208 mov %r13,0x40+8(%rsp) 209 vaesenc $rndkey,$inout4,$inout4 210 mov %r12,0x48+8(%rsp) 211 vpxor $Hkey,$Z0,$Z0 212 vmovdqu 0x70-0x20($Xip),$Hkey # $Hkey^6 213 vaesenc $rndkey,$inout5,$inout5 214 215 vmovups 0x60-0x80($key),$rndkey 216 vpxor $Z1,$Z2,$Z2 217 vpclmulqdq \$0x10,$Hkey,$Xi,$Z1 218 vaesenc $rndkey,$inout0,$inout0 219 vpxor $T1,$Z2,$Z2 220 vpclmulqdq \$0x01,$Hkey,$Xi,$T1 221 vaesenc $rndkey,$inout1,$inout1 222 movbe 0x28($in0),%r13 223 vpxor $T2,$Z3,$Z3 224 vpclmulqdq \$0x00,$Hkey,$Xi,$T2 225 vaesenc $rndkey,$inout2,$inout2 226 movbe 0x20($in0),%r12 227 vpclmulqdq \$0x11,$Hkey,$Xi,$Xi 228 vaesenc $rndkey,$inout3,$inout3 229 mov %r13,0x50+8(%rsp) 230 vaesenc $rndkey,$inout4,$inout4 231 mov %r12,0x58+8(%rsp) 232 vpxor $Z1,$Z2,$Z2 233 vaesenc $rndkey,$inout5,$inout5 234 vpxor $T1,$Z2,$Z2 235 236 vmovups 0x70-0x80($key),$rndkey 237 vpslldq \$8,$Z2,$Z1 238 vpxor $T2,$Z0,$Z0 239 vmovdqu 0x10($const),$Hkey # .Lpoly 240 241 vaesenc $rndkey,$inout0,$inout0 242 vpxor $Xi,$Z3,$Z3 243 vaesenc $rndkey,$inout1,$inout1 244 vpxor $Z1,$Z0,$Z0 245 movbe 0x18($in0),%r13 246 vaesenc $rndkey,$inout2,$inout2 247 movbe 0x10($in0),%r12 248 vpalignr \$8,$Z0,$Z0,$Ii # 1st phase 249 vpclmulqdq \$0x10,$Hkey,$Z0,$Z0 250 mov %r13,0x60+8(%rsp) 251 vaesenc $rndkey,$inout3,$inout3 252 mov %r12,0x68+8(%rsp) 253 vaesenc $rndkey,$inout4,$inout4 254 vmovups 0x80-0x80($key),$T1 # borrow $T1 for $rndkey 255 vaesenc $rndkey,$inout5,$inout5 256 257 vaesenc $T1,$inout0,$inout0 258 vmovups 0x90-0x80($key),$rndkey 259 vaesenc $T1,$inout1,$inout1 260 vpsrldq \$8,$Z2,$Z2 261 vaesenc $T1,$inout2,$inout2 262 vpxor $Z2,$Z3,$Z3 263 vaesenc $T1,$inout3,$inout3 264 vpxor $Ii,$Z0,$Z0 265 movbe 0x08($in0),%r13 266 vaesenc $T1,$inout4,$inout4 267 movbe 0x00($in0),%r12 268 vaesenc $T1,$inout5,$inout5 269 vmovups 0xa0-0x80($key),$T1 270 cmp \$11,$rounds 271 jb .Lenc_tail # 128-bit key 272 273 vaesenc $rndkey,$inout0,$inout0 274 vaesenc $rndkey,$inout1,$inout1 275 vaesenc $rndkey,$inout2,$inout2 276 vaesenc $rndkey,$inout3,$inout3 277 vaesenc $rndkey,$inout4,$inout4 278 vaesenc $rndkey,$inout5,$inout5 279 280 vaesenc $T1,$inout0,$inout0 281 vaesenc $T1,$inout1,$inout1 282 vaesenc $T1,$inout2,$inout2 283 vaesenc $T1,$inout3,$inout3 284 vaesenc $T1,$inout4,$inout4 285 vmovups 0xb0-0x80($key),$rndkey 286 vaesenc $T1,$inout5,$inout5 287 vmovups 0xc0-0x80($key),$T1 288 je .Lenc_tail # 192-bit key 289 290 vaesenc $rndkey,$inout0,$inout0 291 vaesenc $rndkey,$inout1,$inout1 292 vaesenc $rndkey,$inout2,$inout2 293 vaesenc $rndkey,$inout3,$inout3 294 vaesenc $rndkey,$inout4,$inout4 295 vaesenc $rndkey,$inout5,$inout5 296 297 vaesenc $T1,$inout0,$inout0 298 vaesenc $T1,$inout1,$inout1 299 vaesenc $T1,$inout2,$inout2 300 vaesenc $T1,$inout3,$inout3 301 vaesenc $T1,$inout4,$inout4 302 vmovups 0xd0-0x80($key),$rndkey 303 vaesenc $T1,$inout5,$inout5 304 vmovups 0xe0-0x80($key),$T1 305 jmp .Lenc_tail # 256-bit key 306 307.align 32 308.Lhandle_ctr32: 309 vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask 310 vpshufb $Ii,$T1,$Z2 # byte-swap counter 311 vmovdqu 0x30($const),$Z1 # borrow $Z1, .Ltwo_lsb 312 vpaddd 0x40($const),$Z2,$inout1 # .Lone_lsb 313 vpaddd $Z1,$Z2,$inout2 314 vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 315 vpaddd $Z1,$inout1,$inout3 316 vpshufb $Ii,$inout1,$inout1 317 vpaddd $Z1,$inout2,$inout4 318 vpshufb $Ii,$inout2,$inout2 319 vpxor $rndkey,$inout1,$inout1 320 vpaddd $Z1,$inout3,$inout5 321 vpshufb $Ii,$inout3,$inout3 322 vpxor $rndkey,$inout2,$inout2 323 vpaddd $Z1,$inout4,$T1 # byte-swapped next counter value 324 vpshufb $Ii,$inout4,$inout4 325 vpshufb $Ii,$inout5,$inout5 326 vpshufb $Ii,$T1,$T1 # next counter value 327 jmp .Lresume_ctr32 328 329.align 32 330.Lenc_tail: 331 vaesenc $rndkey,$inout0,$inout0 332 vmovdqu $Z3,16+8(%rsp) # postpone vpxor $Z3,$Xi,$Xi 333 vpalignr \$8,$Z0,$Z0,$Xi # 2nd phase 334 vaesenc $rndkey,$inout1,$inout1 335 vpclmulqdq \$0x10,$Hkey,$Z0,$Z0 336 vpxor 0x00($inp),$T1,$T2 337 vaesenc $rndkey,$inout2,$inout2 338 vpxor 0x10($inp),$T1,$Ii 339 vaesenc $rndkey,$inout3,$inout3 340 vpxor 0x20($inp),$T1,$Z1 341 vaesenc $rndkey,$inout4,$inout4 342 vpxor 0x30($inp),$T1,$Z2 343 vaesenc $rndkey,$inout5,$inout5 344 vpxor 0x40($inp),$T1,$Z3 345 vpxor 0x50($inp),$T1,$Hkey 346 vmovdqu ($ivp),$T1 # load next counter value 347 348 vaesenclast $T2,$inout0,$inout0 349 vmovdqu 0x20($const),$T2 # borrow $T2, .Lone_msb 350 vaesenclast $Ii,$inout1,$inout1 351 vpaddb $T2,$T1,$Ii 352 mov %r13,0x70+8(%rsp) 353 lea 0x60($inp),$inp 354 vaesenclast $Z1,$inout2,$inout2 355 vpaddb $T2,$Ii,$Z1 356 mov %r12,0x78+8(%rsp) 357 lea 0x60($out),$out 358 vmovdqu 0x00-0x80($key),$rndkey 359 vaesenclast $Z2,$inout3,$inout3 360 vpaddb $T2,$Z1,$Z2 361 vaesenclast $Z3, $inout4,$inout4 362 vpaddb $T2,$Z2,$Z3 363 vaesenclast $Hkey,$inout5,$inout5 364 vpaddb $T2,$Z3,$Hkey 365 366 add \$0x60,$ret 367 sub \$0x6,$len 368 jc .L6x_done 369 370 vmovups $inout0,-0x60($out) # save output 371 vpxor $rndkey,$T1,$inout0 372 vmovups $inout1,-0x50($out) 373 vmovdqa $Ii,$inout1 # 0 latency 374 vmovups $inout2,-0x40($out) 375 vmovdqa $Z1,$inout2 # 0 latency 376 vmovups $inout3,-0x30($out) 377 vmovdqa $Z2,$inout3 # 0 latency 378 vmovups $inout4,-0x20($out) 379 vmovdqa $Z3,$inout4 # 0 latency 380 vmovups $inout5,-0x10($out) 381 vmovdqa $Hkey,$inout5 # 0 latency 382 vmovdqu 0x20+8(%rsp),$Z3 # I[5] 383 jmp .Loop6x 384 385.L6x_done: 386 vpxor 16+8(%rsp),$Xi,$Xi # modulo-scheduled 387 vpxor $Z0,$Xi,$Xi # modulo-scheduled 388 389 ret 390.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x 391___ 392###################################################################### 393# 394# size_t aesni_gcm_[en|de]crypt(const void *inp, void *out, size_t len, 395# const AES_KEY *key, unsigned char iv[16], 396# struct { u128 Xi,H,Htbl[9]; } *Xip); 397$code.=<<___; 398.globl aesni_gcm_decrypt 399.type aesni_gcm_decrypt,\@function,6 400.align 32 401aesni_gcm_decrypt: 402 xor $ret,$ret 403 cmp \$0x60,$len # minimal accepted length 404 jb .Lgcm_dec_abort 405 406 lea (%rsp),%rax # save stack pointer 407 push %rbx 408 push %rbp 409 push %r12 410 push %r13 411 push %r14 412 push %r15 413___ 414$code.=<<___ if ($win64); 415 lea -0xa8(%rsp),%rsp 416 movaps %xmm6,-0xd8(%rax) 417 movaps %xmm7,-0xc8(%rax) 418 movaps %xmm8,-0xb8(%rax) 419 movaps %xmm9,-0xa8(%rax) 420 movaps %xmm10,-0x98(%rax) 421 movaps %xmm11,-0x88(%rax) 422 movaps %xmm12,-0x78(%rax) 423 movaps %xmm13,-0x68(%rax) 424 movaps %xmm14,-0x58(%rax) 425 movaps %xmm15,-0x48(%rax) 426.Lgcm_dec_body: 427___ 428$code.=<<___; 429 vzeroupper 430 431 vmovdqu ($ivp),$T1 # input counter value 432 add \$-128,%rsp 433 mov 12($ivp),$counter 434 lea .Lbswap_mask(%rip),$const 435 lea -0x80($key),$in0 # borrow $in0 436 mov \$0xf80,$end0 # borrow $end0 437 vmovdqu ($Xip),$Xi # load Xi 438 and \$-128,%rsp # ensure stack alignment 439 vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask 440 lea 0x80($key),$key # size optimization 441 lea 0x20+0x20($Xip),$Xip # size optimization 442 mov 0xf0-0x80($key),$rounds 443 vpshufb $Ii,$Xi,$Xi 444 445 and $end0,$in0 446 and %rsp,$end0 447 sub $in0,$end0 448 jc .Ldec_no_key_aliasing 449 cmp \$768,$end0 450 jnc .Ldec_no_key_aliasing 451 sub $end0,%rsp # avoid aliasing with key 452.Ldec_no_key_aliasing: 453 454 vmovdqu 0x50($inp),$Z3 # I[5] 455 lea ($inp),$in0 456 vmovdqu 0x40($inp),$Z0 457 lea -0xc0($inp,$len),$end0 458 vmovdqu 0x30($inp),$Z1 459 shr \$4,$len 460 xor $ret,$ret 461 vmovdqu 0x20($inp),$Z2 462 vpshufb $Ii,$Z3,$Z3 # passed to _aesni_ctr32_ghash_6x 463 vmovdqu 0x10($inp),$T2 464 vpshufb $Ii,$Z0,$Z0 465 vmovdqu ($inp),$Hkey 466 vpshufb $Ii,$Z1,$Z1 467 vmovdqu $Z0,0x30(%rsp) 468 vpshufb $Ii,$Z2,$Z2 469 vmovdqu $Z1,0x40(%rsp) 470 vpshufb $Ii,$T2,$T2 471 vmovdqu $Z2,0x50(%rsp) 472 vpshufb $Ii,$Hkey,$Hkey 473 vmovdqu $T2,0x60(%rsp) 474 vmovdqu $Hkey,0x70(%rsp) 475 476 call _aesni_ctr32_ghash_6x 477 478 vmovups $inout0,-0x60($out) # save output 479 vmovups $inout1,-0x50($out) 480 vmovups $inout2,-0x40($out) 481 vmovups $inout3,-0x30($out) 482 vmovups $inout4,-0x20($out) 483 vmovups $inout5,-0x10($out) 484 485 vpshufb ($const),$Xi,$Xi # .Lbswap_mask 486 vmovdqu $Xi,-0x40($Xip) # output Xi 487 488 vzeroupper 489___ 490$code.=<<___ if ($win64); 491 movaps -0xd8(%rax),%xmm6 492 movaps -0xd8(%rax),%xmm7 493 movaps -0xb8(%rax),%xmm8 494 movaps -0xa8(%rax),%xmm9 495 movaps -0x98(%rax),%xmm10 496 movaps -0x88(%rax),%xmm11 497 movaps -0x78(%rax),%xmm12 498 movaps -0x68(%rax),%xmm13 499 movaps -0x58(%rax),%xmm14 500 movaps -0x48(%rax),%xmm15 501___ 502$code.=<<___; 503 mov -48(%rax),%r15 504 mov -40(%rax),%r14 505 mov -32(%rax),%r13 506 mov -24(%rax),%r12 507 mov -16(%rax),%rbp 508 mov -8(%rax),%rbx 509 lea (%rax),%rsp # restore %rsp 510.Lgcm_dec_abort: 511 mov $ret,%rax # return value 512 ret 513.size aesni_gcm_decrypt,.-aesni_gcm_decrypt 514___ 515 516$code.=<<___; 517.type _aesni_ctr32_6x,\@abi-omnipotent 518.align 32 519_aesni_ctr32_6x: 520 vmovdqu 0x00-0x80($key),$Z0 # borrow $Z0 for $rndkey 521 vmovdqu 0x20($const),$T2 # borrow $T2, .Lone_msb 522 lea -1($rounds),%r13 523 vmovups 0x10-0x80($key),$rndkey 524 lea 0x20-0x80($key),%r12 525 vpxor $Z0,$T1,$inout0 526 add \$`6<<24`,$counter 527 jc .Lhandle_ctr32_2 528 vpaddb $T2,$T1,$inout1 529 vpaddb $T2,$inout1,$inout2 530 vpxor $Z0,$inout1,$inout1 531 vpaddb $T2,$inout2,$inout3 532 vpxor $Z0,$inout2,$inout2 533 vpaddb $T2,$inout3,$inout4 534 vpxor $Z0,$inout3,$inout3 535 vpaddb $T2,$inout4,$inout5 536 vpxor $Z0,$inout4,$inout4 537 vpaddb $T2,$inout5,$T1 538 vpxor $Z0,$inout5,$inout5 539 jmp .Loop_ctr32 540 541.align 16 542.Loop_ctr32: 543 vaesenc $rndkey,$inout0,$inout0 544 vaesenc $rndkey,$inout1,$inout1 545 vaesenc $rndkey,$inout2,$inout2 546 vaesenc $rndkey,$inout3,$inout3 547 vaesenc $rndkey,$inout4,$inout4 548 vaesenc $rndkey,$inout5,$inout5 549 vmovups (%r12),$rndkey 550 lea 0x10(%r12),%r12 551 dec %r13d 552 jnz .Loop_ctr32 553 554 vmovdqu (%r12),$Hkey # last round key 555 vaesenc $rndkey,$inout0,$inout0 556 vpxor 0x00($inp),$Hkey,$Z0 557 vaesenc $rndkey,$inout1,$inout1 558 vpxor 0x10($inp),$Hkey,$Z1 559 vaesenc $rndkey,$inout2,$inout2 560 vpxor 0x20($inp),$Hkey,$Z2 561 vaesenc $rndkey,$inout3,$inout3 562 vpxor 0x30($inp),$Hkey,$Xi 563 vaesenc $rndkey,$inout4,$inout4 564 vpxor 0x40($inp),$Hkey,$T2 565 vaesenc $rndkey,$inout5,$inout5 566 vpxor 0x50($inp),$Hkey,$Hkey 567 lea 0x60($inp),$inp 568 569 vaesenclast $Z0,$inout0,$inout0 570 vaesenclast $Z1,$inout1,$inout1 571 vaesenclast $Z2,$inout2,$inout2 572 vaesenclast $Xi,$inout3,$inout3 573 vaesenclast $T2,$inout4,$inout4 574 vaesenclast $Hkey,$inout5,$inout5 575 vmovups $inout0,0x00($out) 576 vmovups $inout1,0x10($out) 577 vmovups $inout2,0x20($out) 578 vmovups $inout3,0x30($out) 579 vmovups $inout4,0x40($out) 580 vmovups $inout5,0x50($out) 581 lea 0x60($out),$out 582 583 ret 584.align 32 585.Lhandle_ctr32_2: 586 vpshufb $Ii,$T1,$Z2 # byte-swap counter 587 vmovdqu 0x30($const),$Z1 # borrow $Z1, .Ltwo_lsb 588 vpaddd 0x40($const),$Z2,$inout1 # .Lone_lsb 589 vpaddd $Z1,$Z2,$inout2 590 vpaddd $Z1,$inout1,$inout3 591 vpshufb $Ii,$inout1,$inout1 592 vpaddd $Z1,$inout2,$inout4 593 vpshufb $Ii,$inout2,$inout2 594 vpxor $Z0,$inout1,$inout1 595 vpaddd $Z1,$inout3,$inout5 596 vpshufb $Ii,$inout3,$inout3 597 vpxor $Z0,$inout2,$inout2 598 vpaddd $Z1,$inout4,$T1 # byte-swapped next counter value 599 vpshufb $Ii,$inout4,$inout4 600 vpxor $Z0,$inout3,$inout3 601 vpshufb $Ii,$inout5,$inout5 602 vpxor $Z0,$inout4,$inout4 603 vpshufb $Ii,$T1,$T1 # next counter value 604 vpxor $Z0,$inout5,$inout5 605 jmp .Loop_ctr32 606.size _aesni_ctr32_6x,.-_aesni_ctr32_6x 607 608.globl aesni_gcm_encrypt 609.type aesni_gcm_encrypt,\@function,6 610.align 32 611aesni_gcm_encrypt: 612 xor $ret,$ret 613 cmp \$0x60*3,$len # minimal accepted length 614 jb .Lgcm_enc_abort 615 616 lea (%rsp),%rax # save stack pointer 617 push %rbx 618 push %rbp 619 push %r12 620 push %r13 621 push %r14 622 push %r15 623___ 624$code.=<<___ if ($win64); 625 lea -0xa8(%rsp),%rsp 626 movaps %xmm6,-0xd8(%rax) 627 movaps %xmm7,-0xc8(%rax) 628 movaps %xmm8,-0xb8(%rax) 629 movaps %xmm9,-0xa8(%rax) 630 movaps %xmm10,-0x98(%rax) 631 movaps %xmm11,-0x88(%rax) 632 movaps %xmm12,-0x78(%rax) 633 movaps %xmm13,-0x68(%rax) 634 movaps %xmm14,-0x58(%rax) 635 movaps %xmm15,-0x48(%rax) 636.Lgcm_enc_body: 637___ 638$code.=<<___; 639 vzeroupper 640 641 vmovdqu ($ivp),$T1 # input counter value 642 add \$-128,%rsp 643 mov 12($ivp),$counter 644 lea .Lbswap_mask(%rip),$const 645 lea -0x80($key),$in0 # borrow $in0 646 mov \$0xf80,$end0 # borrow $end0 647 lea 0x80($key),$key # size optimization 648 vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask 649 and \$-128,%rsp # ensure stack alignment 650 mov 0xf0-0x80($key),$rounds 651 652 and $end0,$in0 653 and %rsp,$end0 654 sub $in0,$end0 655 jc .Lenc_no_key_aliasing 656 cmp \$768,$end0 657 jnc .Lenc_no_key_aliasing 658 sub $end0,%rsp # avoid aliasing with key 659.Lenc_no_key_aliasing: 660 661 lea ($out),$in0 662 lea -0xc0($out,$len),$end0 663 shr \$4,$len 664 665 call _aesni_ctr32_6x 666 vpshufb $Ii,$inout0,$Xi # save bswapped output on stack 667 vpshufb $Ii,$inout1,$T2 668 vmovdqu $Xi,0x70(%rsp) 669 vpshufb $Ii,$inout2,$Z0 670 vmovdqu $T2,0x60(%rsp) 671 vpshufb $Ii,$inout3,$Z1 672 vmovdqu $Z0,0x50(%rsp) 673 vpshufb $Ii,$inout4,$Z2 674 vmovdqu $Z1,0x40(%rsp) 675 vpshufb $Ii,$inout5,$Z3 # passed to _aesni_ctr32_ghash_6x 676 vmovdqu $Z2,0x30(%rsp) 677 678 call _aesni_ctr32_6x 679 680 vmovdqu ($Xip),$Xi # load Xi 681 lea 0x20+0x20($Xip),$Xip # size optimization 682 sub \$12,$len 683 mov \$0x60*2,$ret 684 vpshufb $Ii,$Xi,$Xi 685 686 call _aesni_ctr32_ghash_6x 687 vmovdqu 0x20(%rsp),$Z3 # I[5] 688 vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask 689 vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 690 vpunpckhqdq $Z3,$Z3,$T1 691 vmovdqu 0x20-0x20($Xip),$rndkey # borrow $rndkey for $HK 692 vmovups $inout0,-0x60($out) # save output 693 vpshufb $Ii,$inout0,$inout0 # but keep bswapped copy 694 vpxor $Z3,$T1,$T1 695 vmovups $inout1,-0x50($out) 696 vpshufb $Ii,$inout1,$inout1 697 vmovups $inout2,-0x40($out) 698 vpshufb $Ii,$inout2,$inout2 699 vmovups $inout3,-0x30($out) 700 vpshufb $Ii,$inout3,$inout3 701 vmovups $inout4,-0x20($out) 702 vpshufb $Ii,$inout4,$inout4 703 vmovups $inout5,-0x10($out) 704 vpshufb $Ii,$inout5,$inout5 705 vmovdqu $inout0,0x10(%rsp) # free $inout0 706___ 707{ my ($HK,$T3)=($rndkey,$inout0); 708 709$code.=<<___; 710 vmovdqu 0x30(%rsp),$Z2 # I[4] 711 vmovdqu 0x10-0x20($Xip),$Ii # borrow $Ii for $Hkey^2 712 vpunpckhqdq $Z2,$Z2,$T2 713 vpclmulqdq \$0x00,$Hkey,$Z3,$Z1 714 vpxor $Z2,$T2,$T2 715 vpclmulqdq \$0x11,$Hkey,$Z3,$Z3 716 vpclmulqdq \$0x00,$HK,$T1,$T1 717 718 vmovdqu 0x40(%rsp),$T3 # I[3] 719 vpclmulqdq \$0x00,$Ii,$Z2,$Z0 720 vmovdqu 0x30-0x20($Xip),$Hkey # $Hkey^3 721 vpxor $Z1,$Z0,$Z0 722 vpunpckhqdq $T3,$T3,$Z1 723 vpclmulqdq \$0x11,$Ii,$Z2,$Z2 724 vpxor $T3,$Z1,$Z1 725 vpxor $Z3,$Z2,$Z2 726 vpclmulqdq \$0x10,$HK,$T2,$T2 727 vmovdqu 0x50-0x20($Xip),$HK 728 vpxor $T1,$T2,$T2 729 730 vmovdqu 0x50(%rsp),$T1 # I[2] 731 vpclmulqdq \$0x00,$Hkey,$T3,$Z3 732 vmovdqu 0x40-0x20($Xip),$Ii # borrow $Ii for $Hkey^4 733 vpxor $Z0,$Z3,$Z3 734 vpunpckhqdq $T1,$T1,$Z0 735 vpclmulqdq \$0x11,$Hkey,$T3,$T3 736 vpxor $T1,$Z0,$Z0 737 vpxor $Z2,$T3,$T3 738 vpclmulqdq \$0x00,$HK,$Z1,$Z1 739 vpxor $T2,$Z1,$Z1 740 741 vmovdqu 0x60(%rsp),$T2 # I[1] 742 vpclmulqdq \$0x00,$Ii,$T1,$Z2 743 vmovdqu 0x60-0x20($Xip),$Hkey # $Hkey^5 744 vpxor $Z3,$Z2,$Z2 745 vpunpckhqdq $T2,$T2,$Z3 746 vpclmulqdq \$0x11,$Ii,$T1,$T1 747 vpxor $T2,$Z3,$Z3 748 vpxor $T3,$T1,$T1 749 vpclmulqdq \$0x10,$HK,$Z0,$Z0 750 vmovdqu 0x80-0x20($Xip),$HK 751 vpxor $Z1,$Z0,$Z0 752 753 vpxor 0x70(%rsp),$Xi,$Xi # accumulate I[0] 754 vpclmulqdq \$0x00,$Hkey,$T2,$Z1 755 vmovdqu 0x70-0x20($Xip),$Ii # borrow $Ii for $Hkey^6 756 vpunpckhqdq $Xi,$Xi,$T3 757 vpxor $Z2,$Z1,$Z1 758 vpclmulqdq \$0x11,$Hkey,$T2,$T2 759 vpxor $Xi,$T3,$T3 760 vpxor $T1,$T2,$T2 761 vpclmulqdq \$0x00,$HK,$Z3,$Z3 762 vpxor $Z0,$Z3,$Z0 763 764 vpclmulqdq \$0x00,$Ii,$Xi,$Z2 765 vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 766 vpunpckhqdq $inout5,$inout5,$T1 767 vpclmulqdq \$0x11,$Ii,$Xi,$Xi 768 vpxor $inout5,$T1,$T1 769 vpxor $Z1,$Z2,$Z1 770 vpclmulqdq \$0x10,$HK,$T3,$T3 771 vmovdqu 0x20-0x20($Xip),$HK 772 vpxor $T2,$Xi,$Z3 773 vpxor $Z0,$T3,$Z2 774 775 vmovdqu 0x10-0x20($Xip),$Ii # borrow $Ii for $Hkey^2 776 vpxor $Z1,$Z3,$T3 # aggregated Karatsuba post-processing 777 vpclmulqdq \$0x00,$Hkey,$inout5,$Z0 778 vpxor $T3,$Z2,$Z2 779 vpunpckhqdq $inout4,$inout4,$T2 780 vpclmulqdq \$0x11,$Hkey,$inout5,$inout5 781 vpxor $inout4,$T2,$T2 782 vpslldq \$8,$Z2,$T3 783 vpclmulqdq \$0x00,$HK,$T1,$T1 784 vpxor $T3,$Z1,$Xi 785 vpsrldq \$8,$Z2,$Z2 786 vpxor $Z2,$Z3,$Z3 787 788 vpclmulqdq \$0x00,$Ii,$inout4,$Z1 789 vmovdqu 0x30-0x20($Xip),$Hkey # $Hkey^3 790 vpxor $Z0,$Z1,$Z1 791 vpunpckhqdq $inout3,$inout3,$T3 792 vpclmulqdq \$0x11,$Ii,$inout4,$inout4 793 vpxor $inout3,$T3,$T3 794 vpxor $inout5,$inout4,$inout4 795 vpalignr \$8,$Xi,$Xi,$inout5 # 1st phase 796 vpclmulqdq \$0x10,$HK,$T2,$T2 797 vmovdqu 0x50-0x20($Xip),$HK 798 vpxor $T1,$T2,$T2 799 800 vpclmulqdq \$0x00,$Hkey,$inout3,$Z0 801 vmovdqu 0x40-0x20($Xip),$Ii # borrow $Ii for $Hkey^4 802 vpxor $Z1,$Z0,$Z0 803 vpunpckhqdq $inout2,$inout2,$T1 804 vpclmulqdq \$0x11,$Hkey,$inout3,$inout3 805 vpxor $inout2,$T1,$T1 806 vpxor $inout4,$inout3,$inout3 807 vxorps 0x10(%rsp),$Z3,$Z3 # accumulate $inout0 808 vpclmulqdq \$0x00,$HK,$T3,$T3 809 vpxor $T2,$T3,$T3 810 811 vpclmulqdq \$0x10,0x10($const),$Xi,$Xi 812 vxorps $inout5,$Xi,$Xi 813 814 vpclmulqdq \$0x00,$Ii,$inout2,$Z1 815 vmovdqu 0x60-0x20($Xip),$Hkey # $Hkey^5 816 vpxor $Z0,$Z1,$Z1 817 vpunpckhqdq $inout1,$inout1,$T2 818 vpclmulqdq \$0x11,$Ii,$inout2,$inout2 819 vpxor $inout1,$T2,$T2 820 vpalignr \$8,$Xi,$Xi,$inout5 # 2nd phase 821 vpxor $inout3,$inout2,$inout2 822 vpclmulqdq \$0x10,$HK,$T1,$T1 823 vmovdqu 0x80-0x20($Xip),$HK 824 vpxor $T3,$T1,$T1 825 826 vxorps $Z3,$inout5,$inout5 827 vpclmulqdq \$0x10,0x10($const),$Xi,$Xi 828 vxorps $inout5,$Xi,$Xi 829 830 vpclmulqdq \$0x00,$Hkey,$inout1,$Z0 831 vmovdqu 0x70-0x20($Xip),$Ii # borrow $Ii for $Hkey^6 832 vpxor $Z1,$Z0,$Z0 833 vpunpckhqdq $Xi,$Xi,$T3 834 vpclmulqdq \$0x11,$Hkey,$inout1,$inout1 835 vpxor $Xi,$T3,$T3 836 vpxor $inout2,$inout1,$inout1 837 vpclmulqdq \$0x00,$HK,$T2,$T2 838 vpxor $T1,$T2,$T2 839 840 vpclmulqdq \$0x00,$Ii,$Xi,$Z1 841 vpclmulqdq \$0x11,$Ii,$Xi,$Z3 842 vpxor $Z0,$Z1,$Z1 843 vpclmulqdq \$0x10,$HK,$T3,$Z2 844 vpxor $inout1,$Z3,$Z3 845 vpxor $T2,$Z2,$Z2 846 847 vpxor $Z1,$Z3,$Z0 # aggregated Karatsuba post-processing 848 vpxor $Z0,$Z2,$Z2 849 vpslldq \$8,$Z2,$T1 850 vmovdqu 0x10($const),$Hkey # .Lpoly 851 vpsrldq \$8,$Z2,$Z2 852 vpxor $T1,$Z1,$Xi 853 vpxor $Z2,$Z3,$Z3 854 855 vpalignr \$8,$Xi,$Xi,$T2 # 1st phase 856 vpclmulqdq \$0x10,$Hkey,$Xi,$Xi 857 vpxor $T2,$Xi,$Xi 858 859 vpalignr \$8,$Xi,$Xi,$T2 # 2nd phase 860 vpclmulqdq \$0x10,$Hkey,$Xi,$Xi 861 vpxor $Z3,$T2,$T2 862 vpxor $T2,$Xi,$Xi 863___ 864} 865$code.=<<___; 866 vpshufb ($const),$Xi,$Xi # .Lbswap_mask 867 vmovdqu $Xi,-0x40($Xip) # output Xi 868 869 vzeroupper 870___ 871$code.=<<___ if ($win64); 872 movaps -0xd8(%rax),%xmm6 873 movaps -0xc8(%rax),%xmm7 874 movaps -0xb8(%rax),%xmm8 875 movaps -0xa8(%rax),%xmm9 876 movaps -0x98(%rax),%xmm10 877 movaps -0x88(%rax),%xmm11 878 movaps -0x78(%rax),%xmm12 879 movaps -0x68(%rax),%xmm13 880 movaps -0x58(%rax),%xmm14 881 movaps -0x48(%rax),%xmm15 882___ 883$code.=<<___; 884 mov -48(%rax),%r15 885 mov -40(%rax),%r14 886 mov -32(%rax),%r13 887 mov -24(%rax),%r12 888 mov -16(%rax),%rbp 889 mov -8(%rax),%rbx 890 lea (%rax),%rsp # restore %rsp 891.Lgcm_enc_abort: 892 mov $ret,%rax # return value 893 ret 894.size aesni_gcm_encrypt,.-aesni_gcm_encrypt 895___ 896 897$code.=<<___; 898.align 64 899.Lbswap_mask: 900 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 901.Lpoly: 902 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 903.Lone_msb: 904 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 905.Ltwo_lsb: 906 .byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 907.Lone_lsb: 908 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 909.asciz "AES-NI GCM module for x86_64, CRYPTOGAMS by <appro\@openssl.org>" 910.align 64 911___ 912if ($win64) { 913$rec="%rcx"; 914$frame="%rdx"; 915$context="%r8"; 916$disp="%r9"; 917 918$code.=<<___ 919.extern __imp_RtlVirtualUnwind 920.type gcm_se_handler,\@abi-omnipotent 921.align 16 922gcm_se_handler: 923 push %rsi 924 push %rdi 925 push %rbx 926 push %rbp 927 push %r12 928 push %r13 929 push %r14 930 push %r15 931 pushfq 932 sub \$64,%rsp 933 934 mov 120($context),%rax # pull context->Rax 935 mov 248($context),%rbx # pull context->Rip 936 937 mov 8($disp),%rsi # disp->ImageBase 938 mov 56($disp),%r11 # disp->HandlerData 939 940 mov 0(%r11),%r10d # HandlerData[0] 941 lea (%rsi,%r10),%r10 # prologue label 942 cmp %r10,%rbx # context->Rip<prologue label 943 jb .Lcommon_seh_tail 944 945 mov 152($context),%rax # pull context->Rsp 946 947 mov 4(%r11),%r10d # HandlerData[1] 948 lea (%rsi,%r10),%r10 # epilogue label 949 cmp %r10,%rbx # context->Rip>=epilogue label 950 jae .Lcommon_seh_tail 951 952 mov 120($context),%rax # pull context->Rax 953 954 mov -48(%rax),%r15 955 mov -40(%rax),%r14 956 mov -32(%rax),%r13 957 mov -24(%rax),%r12 958 mov -16(%rax),%rbp 959 mov -8(%rax),%rbx 960 mov %r15,240($context) 961 mov %r14,232($context) 962 mov %r13,224($context) 963 mov %r12,216($context) 964 mov %rbp,160($context) 965 mov %rbx,144($context) 966 967 lea -0xd8(%rax),%rsi # %xmm save area 968 lea 512($context),%rdi # & context.Xmm6 969 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) 970 .long 0xa548f3fc # cld; rep movsq 971 972.Lcommon_seh_tail: 973 mov 8(%rax),%rdi 974 mov 16(%rax),%rsi 975 mov %rax,152($context) # restore context->Rsp 976 mov %rsi,168($context) # restore context->Rsi 977 mov %rdi,176($context) # restore context->Rdi 978 979 mov 40($disp),%rdi # disp->ContextRecord 980 mov $context,%rsi # context 981 mov \$154,%ecx # sizeof(CONTEXT) 982 .long 0xa548f3fc # cld; rep movsq 983 984 mov $disp,%rsi 985 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER 986 mov 8(%rsi),%rdx # arg2, disp->ImageBase 987 mov 0(%rsi),%r8 # arg3, disp->ControlPc 988 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry 989 mov 40(%rsi),%r10 # disp->ContextRecord 990 lea 56(%rsi),%r11 # &disp->HandlerData 991 lea 24(%rsi),%r12 # &disp->EstablisherFrame 992 mov %r10,32(%rsp) # arg5 993 mov %r11,40(%rsp) # arg6 994 mov %r12,48(%rsp) # arg7 995 mov %rcx,56(%rsp) # arg8, (NULL) 996 call *__imp_RtlVirtualUnwind(%rip) 997 998 mov \$1,%eax # ExceptionContinueSearch 999 add \$64,%rsp 1000 popfq 1001 pop %r15 1002 pop %r14 1003 pop %r13 1004 pop %r12 1005 pop %rbp 1006 pop %rbx 1007 pop %rdi 1008 pop %rsi 1009 ret 1010.size gcm_se_handler,.-gcm_se_handler 1011 1012.section .pdata 1013.align 4 1014 .rva .LSEH_begin_aesni_gcm_decrypt 1015 .rva .LSEH_end_aesni_gcm_decrypt 1016 .rva .LSEH_gcm_dec_info 1017 1018 .rva .LSEH_begin_aesni_gcm_encrypt 1019 .rva .LSEH_end_aesni_gcm_encrypt 1020 .rva .LSEH_gcm_enc_info 1021.section .xdata 1022.align 8 1023.LSEH_gcm_dec_info: 1024 .byte 9,0,0,0 1025 .rva gcm_se_handler 1026 .rva .Lgcm_dec_body,.Lgcm_dec_abort 1027.LSEH_gcm_enc_info: 1028 .byte 9,0,0,0 1029 .rva gcm_se_handler 1030 .rva .Lgcm_enc_body,.Lgcm_enc_abort 1031___ 1032} 1033}}} else {{{ 1034$code=<<___; # assembler is too old 1035.text 1036 1037.globl aesni_gcm_encrypt 1038.type aesni_gcm_encrypt,\@abi-omnipotent 1039aesni_gcm_encrypt: 1040 xor %eax,%eax 1041 ret 1042.size aesni_gcm_encrypt,.-aesni_gcm_encrypt 1043 1044.globl aesni_gcm_decrypt 1045.type aesni_gcm_decrypt,\@abi-omnipotent 1046aesni_gcm_decrypt: 1047 xor %eax,%eax 1048 ret 1049.size aesni_gcm_decrypt,.-aesni_gcm_decrypt 1050___ 1051}}} 1052 1053$code =~ s/\`([^\`]*)\`/eval($1)/gem; 1054 1055print $code; 1056 1057close STDOUT; 1058