1// gcm.cpp - written and placed in the public domain by Wei Dai 2 3// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM gcm.cpp" to generate MASM code 4 5#include "pch.h" 6 7#ifndef CRYPTOPP_IMPORTS 8#ifndef CRYPTOPP_GENERATE_X64_MASM 9 10#include "gcm.h" 11#include "cpu.h" 12 13#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 14#include <emmintrin.h> 15#endif 16 17NAMESPACE_BEGIN(CryptoPP) 18 19word16 GCM_Base::s_reductionTable[256]; 20bool GCM_Base::s_reductionTableInitialized = false; 21 22void GCM_Base::GCTR::IncrementCounterBy256() 23{ 24 IncrementCounterByOne(m_counterArray+BlockSize()-4, 3); 25} 26 27#if 0 28// preserved for testing 29void gcm_gf_mult(const unsigned char *a, const unsigned char *b, unsigned char *c) 30{ 31 word64 Z0=0, Z1=0, V0, V1; 32 33 typedef BlockGetAndPut<word64, BigEndian> Block; 34 Block::Get(a)(V0)(V1); 35 36 for (int i=0; i<16; i++) 37 { 38 for (int j=0x80; j!=0; j>>=1) 39 { 40 int x = b[i] & j; 41 Z0 ^= x ? V0 : 0; 42 Z1 ^= x ? V1 : 0; 43 x = (int)V1 & 1; 44 V1 = (V1>>1) | (V0<<63); 45 V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0); 46 } 47 } 48 Block::Put(NULL, c)(Z0)(Z1); 49} 50#endif 51 52#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 53inline static void SSE2_Xor16(byte *a, const byte *b, const byte *c) 54{ 55#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 56 *(__m128i *)a = _mm_xor_si128(*(__m128i *)b, *(__m128i *)c); 57#else 58 asm ("movdqa %1, %%xmm0; pxor %2, %%xmm0; movdqa %%xmm0, %0;" : "=m" (a[0]) : "m"(b[0]), "m"(c[0])); 59#endif 60} 61#endif 62 63inline static void Xor16(byte *a, const byte *b, const byte *c) 64{ 65 ((word64 *)a)[0] = ((word64 *)b)[0] ^ ((word64 *)c)[0]; 66 ((word64 *)a)[1] = ((word64 *)b)[1] ^ ((word64 *)c)[1]; 67} 68 69void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs ¶ms) 70{ 71 BlockCipher &blockCipher = AccessBlockCipher(); 72 blockCipher.SetKey(userKey, keylength, params); 73 74 if (blockCipher.BlockSize() != REQUIRED_BLOCKSIZE) 75 throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16"); 76 77 int tableSize; 78 if (params.GetIntValue(Name::TableSize(), tableSize)) 79 tableSize = (tableSize >= 64*1024) ? 64*1024 : 2*1024; 80 else 81 tableSize = (GetTablesOption() == GCM_64K_Tables) ? 64*1024 : 2*1024; 82 83#if defined(_MSC_VER) && (_MSC_VER >= 1300 && _MSC_VER < 1400) 84 // VC 2003 workaround: compiler generates bad code for 64K tables 85 tableSize = 2*1024; 86#endif 87 88 m_buffer.resize(3*REQUIRED_BLOCKSIZE + tableSize); 89 byte *hashKey = HashKey(); 90 memset(hashKey, 0, REQUIRED_BLOCKSIZE); 91 blockCipher.ProcessBlock(hashKey); 92 93 byte *table = MulTable(); 94 int i, j, k; 95 word64 V0, V1; 96 97 typedef BlockGetAndPut<word64, BigEndian> Block; 98 Block::Get(hashKey)(V0)(V1); 99 100 if (tableSize == 64*1024) 101 { 102 for (i=0; i<128; i++) 103 { 104 k = i%8; 105 Block::Put(NULL, table+(i/8)*256*16+(size_t(1)<<(11-k)))(V0)(V1); 106 107 int x = (int)V1 & 1; 108 V1 = (V1>>1) | (V0<<63); 109 V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0); 110 } 111 112 for (i=0; i<16; i++) 113 { 114 memset(table+i*256*16, 0, 16); 115#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 116 if (HasSSE2()) 117 for (j=2; j<=0x80; j*=2) 118 for (k=1; k<j; k++) 119 SSE2_Xor16(table+i*256*16+(j+k)*16, table+i*256*16+j*16, table+i*256*16+k*16); 120 else 121#endif 122 for (j=2; j<=0x80; j*=2) 123 for (k=1; k<j; k++) 124 Xor16(table+i*256*16+(j+k)*16, table+i*256*16+j*16, table+i*256*16+k*16); 125 } 126 } 127 else 128 { 129 if (!s_reductionTableInitialized) 130 { 131 s_reductionTable[0] = 0; 132 word16 x = 0x01c2; 133 s_reductionTable[1] = ByteReverse(x); 134 for (int i=2; i<=0x80; i*=2) 135 { 136 x <<= 1; 137 s_reductionTable[i] = ByteReverse(x); 138 for (int j=1; j<i; j++) 139 s_reductionTable[i+j] = s_reductionTable[i] ^ s_reductionTable[j]; 140 } 141 s_reductionTableInitialized = true; 142 } 143 144 for (i=0; i<128-24; i++) 145 { 146 k = i%32; 147 if (k < 4) 148 Block::Put(NULL, table+1024+(i/32)*256+(size_t(1)<<(7-k)))(V0)(V1); 149 else if (k < 8) 150 Block::Put(NULL, table+(i/32)*256+(size_t(1)<<(11-k)))(V0)(V1); 151 152 int x = (int)V1 & 1; 153 V1 = (V1>>1) | (V0<<63); 154 V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0); 155 } 156 157 for (i=0; i<4; i++) 158 { 159 memset(table+i*256, 0, 16); 160 memset(table+1024+i*256, 0, 16); 161#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 162 if (HasSSE2()) 163 for (j=2; j<=8; j*=2) 164 for (k=1; k<j; k++) 165 { 166 SSE2_Xor16(table+i*256+(j+k)*16, table+i*256+j*16, table+i*256+k*16); 167 SSE2_Xor16(table+1024+i*256+(j+k)*16, table+1024+i*256+j*16, table+1024+i*256+k*16); 168 } 169 else 170#endif 171 for (j=2; j<=8; j*=2) 172 for (k=1; k<j; k++) 173 { 174 Xor16(table+i*256+(j+k)*16, table+i*256+j*16, table+i*256+k*16); 175 Xor16(table+1024+i*256+(j+k)*16, table+1024+i*256+j*16, table+1024+i*256+k*16); 176 } 177 } 178 } 179} 180 181void GCM_Base::Resync(const byte *iv, size_t len) 182{ 183 BlockCipher &cipher = AccessBlockCipher(); 184 byte *hashBuffer = HashBuffer(); 185 186 if (len == 12) 187 { 188 memcpy(hashBuffer, iv, len); 189 memset(hashBuffer+len, 0, 3); 190 hashBuffer[len+3] = 1; 191 } 192 else 193 { 194 size_t origLen = len; 195 memset(hashBuffer, 0, HASH_BLOCKSIZE); 196 197 if (len >= HASH_BLOCKSIZE) 198 { 199 len = GCM_Base::AuthenticateBlocks(iv, len); 200 iv += (origLen - len); 201 } 202 203 if (len > 0) 204 { 205 memcpy(m_buffer, iv, len); 206 memset(m_buffer+len, 0, HASH_BLOCKSIZE-len); 207 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE); 208 } 209 210 PutBlock<word64, BigEndian, true>(NULL, m_buffer)(0)(origLen*8); 211 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE); 212 } 213 214 if (m_state >= State_IVSet) 215 m_ctr.Resynchronize(hashBuffer, REQUIRED_BLOCKSIZE); 216 else 217 m_ctr.SetCipherWithIV(cipher, hashBuffer); 218 219 m_ctr.Seek(HASH_BLOCKSIZE); 220 221 memset(hashBuffer, 0, HASH_BLOCKSIZE); 222} 223 224unsigned int GCM_Base::OptimalDataAlignment() const 225{ 226 return HasSSE2() ? 16 : GetBlockCipher().OptimalDataAlignment(); 227} 228 229#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code 230 231#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 232 233#ifdef CRYPTOPP_X64_MASM_AVAILABLE 234extern "C" { 235void GCM_AuthenticateBlocks_2K(const byte *data, size_t blocks, word64 *hashBuffer, const word16 *reductionTable); 236void GCM_AuthenticateBlocks_64K(const byte *data, size_t blocks, word64 *hashBuffer); 237} 238#endif 239 240#ifndef CRYPTOPP_GENERATE_X64_MASM 241 242size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len) 243{ 244 typedef BlockGetAndPut<word64, NativeByteOrder> Block; 245 word64 *hashBuffer = (word64 *)HashBuffer(); 246 247 switch (2*(m_buffer.size()>=64*1024) 248#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE) 249 + HasSSE2() 250#endif 251 ) 252 { 253 case 0: // non-SSE2 and 2K tables 254 { 255 byte *table = MulTable(); 256 word64 x0 = hashBuffer[0], x1 = hashBuffer[1]; 257 258 do 259 { 260 word64 y0, y1, a0, a1, b0, b1, c0, c1, d0, d1; 261 Block::Get(data)(y0)(y1); 262 x0 ^= y0; 263 x1 ^= y1; 264 265 data += HASH_BLOCKSIZE; 266 len -= HASH_BLOCKSIZE; 267 268 #define READ_TABLE_WORD64_COMMON(a, b, c, d) *(word64 *)(table+(a*1024)+(b*256)+c+d*8) 269 270 #ifdef IS_LITTLE_ENDIAN 271 #if CRYPTOPP_BOOL_SLOW_WORD64 272 word32 z0 = (word32)x0; 273 word32 z1 = (word32)(x0>>32); 274 word32 z2 = (word32)x1; 275 word32 z3 = (word32)(x1>>32); 276 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, (d?(z##c>>((d?d-1:0)*4))&0xf0:(z##c&0xf)<<4), e) 277 #else 278 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, ((d+8*b)?(x##a>>(((d+8*b)?(d+8*b)-1:1)*4))&0xf0:(x##a&0xf)<<4), e) 279 #endif 280 #define GF_MOST_SIG_8BITS(a) (a##1 >> 7*8) 281 #define GF_SHIFT_8(a) a##1 = (a##1 << 8) ^ (a##0 >> 7*8); a##0 <<= 8; 282 #else 283 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((1-d%2), c, ((15-d-8*b)?(x##a>>(((15-d-8*b)?(15-d-8*b)-1:0)*4))&0xf0:(x##a&0xf)<<4), e) 284 #define GF_MOST_SIG_8BITS(a) (a##1 & 0xff) 285 #define GF_SHIFT_8(a) a##1 = (a##1 >> 8) ^ (a##0 << 7*8); a##0 >>= 8; 286 #endif 287 288 #define GF_MUL_32BY128(op, a, b, c) \ 289 a0 op READ_TABLE_WORD64(a, b, c, 0, 0) ^ READ_TABLE_WORD64(a, b, c, 1, 0);\ 290 a1 op READ_TABLE_WORD64(a, b, c, 0, 1) ^ READ_TABLE_WORD64(a, b, c, 1, 1);\ 291 b0 op READ_TABLE_WORD64(a, b, c, 2, 0) ^ READ_TABLE_WORD64(a, b, c, 3, 0);\ 292 b1 op READ_TABLE_WORD64(a, b, c, 2, 1) ^ READ_TABLE_WORD64(a, b, c, 3, 1);\ 293 c0 op READ_TABLE_WORD64(a, b, c, 4, 0) ^ READ_TABLE_WORD64(a, b, c, 5, 0);\ 294 c1 op READ_TABLE_WORD64(a, b, c, 4, 1) ^ READ_TABLE_WORD64(a, b, c, 5, 1);\ 295 d0 op READ_TABLE_WORD64(a, b, c, 6, 0) ^ READ_TABLE_WORD64(a, b, c, 7, 0);\ 296 d1 op READ_TABLE_WORD64(a, b, c, 6, 1) ^ READ_TABLE_WORD64(a, b, c, 7, 1);\ 297 298 GF_MUL_32BY128(=, 0, 0, 0) 299 GF_MUL_32BY128(^=, 0, 1, 1) 300 GF_MUL_32BY128(^=, 1, 0, 2) 301 GF_MUL_32BY128(^=, 1, 1, 3) 302 303 word32 r = (word32)s_reductionTable[GF_MOST_SIG_8BITS(d)] << 16; 304 GF_SHIFT_8(d) 305 c0 ^= d0; c1 ^= d1; 306 r ^= (word32)s_reductionTable[GF_MOST_SIG_8BITS(c)] << 8; 307 GF_SHIFT_8(c) 308 b0 ^= c0; b1 ^= c1; 309 r ^= s_reductionTable[GF_MOST_SIG_8BITS(b)]; 310 GF_SHIFT_8(b) 311 a0 ^= b0; a1 ^= b1; 312 a0 ^= ConditionalByteReverse<word64>(LITTLE_ENDIAN_ORDER, r); 313 x0 = a0; x1 = a1; 314 } 315 while (len >= HASH_BLOCKSIZE); 316 317 hashBuffer[0] = x0; hashBuffer[1] = x1; 318 return len; 319 } 320 321 case 2: // non-SSE2 and 64K tables 322 { 323 byte *table = MulTable(); 324 word64 x0 = hashBuffer[0], x1 = hashBuffer[1]; 325 326 do 327 { 328 word64 y0, y1, a0, a1; 329 Block::Get(data)(y0)(y1); 330 x0 ^= y0; 331 x1 ^= y1; 332 333 data += HASH_BLOCKSIZE; 334 len -= HASH_BLOCKSIZE; 335 336 #undef READ_TABLE_WORD64_COMMON 337 #undef READ_TABLE_WORD64 338 339 #define READ_TABLE_WORD64_COMMON(a, c, d) *(word64 *)(table+(a)*256*16+(c)+(d)*8) 340 341 #ifdef IS_LITTLE_ENDIAN 342 #if CRYPTOPP_BOOL_SLOW_WORD64 343 word32 z0 = (word32)x0; 344 word32 z1 = (word32)(x0>>32); 345 word32 z2 = (word32)x1; 346 word32 z3 = (word32)(x1>>32); 347 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, (d?(z##c>>((d?d:1)*8-4))&0xff0:(z##c&0xff)<<4), e) 348 #else 349 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((d+4*(c%2))?(x##b>>(((d+4*(c%2))?(d+4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e) 350 #endif 351 #else 352 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((7-d-4*(c%2))?(x##b>>(((7-d-4*(c%2))?(7-d-4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e) 353 #endif 354 355 #define GF_MUL_8BY128(op, b, c, d) \ 356 a0 op READ_TABLE_WORD64(b, c, d, 0);\ 357 a1 op READ_TABLE_WORD64(b, c, d, 1);\ 358 359 GF_MUL_8BY128(=, 0, 0, 0) 360 GF_MUL_8BY128(^=, 0, 0, 1) 361 GF_MUL_8BY128(^=, 0, 0, 2) 362 GF_MUL_8BY128(^=, 0, 0, 3) 363 GF_MUL_8BY128(^=, 0, 1, 0) 364 GF_MUL_8BY128(^=, 0, 1, 1) 365 GF_MUL_8BY128(^=, 0, 1, 2) 366 GF_MUL_8BY128(^=, 0, 1, 3) 367 GF_MUL_8BY128(^=, 1, 2, 0) 368 GF_MUL_8BY128(^=, 1, 2, 1) 369 GF_MUL_8BY128(^=, 1, 2, 2) 370 GF_MUL_8BY128(^=, 1, 2, 3) 371 GF_MUL_8BY128(^=, 1, 3, 0) 372 GF_MUL_8BY128(^=, 1, 3, 1) 373 GF_MUL_8BY128(^=, 1, 3, 2) 374 GF_MUL_8BY128(^=, 1, 3, 3) 375 376 x0 = a0; x1 = a1; 377 } 378 while (len >= HASH_BLOCKSIZE); 379 380 hashBuffer[0] = x0; hashBuffer[1] = x1; 381 return len; 382 } 383#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 384 385#ifdef CRYPTOPP_X64_MASM_AVAILABLE 386 case 1: // SSE2 and 2K tables 387 GCM_AuthenticateBlocks_2K(data, len/16, hashBuffer, s_reductionTable); 388 return len % 16; 389 case 3: // SSE2 and 64K tables 390 GCM_AuthenticateBlocks_64K(data, len/16, hashBuffer); 391 return len % 16; 392#endif 393 394#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 395 case 1: // SSE2 and 2K tables 396 { 397 #ifdef __GNUC__ 398 __asm__ __volatile__ 399 ( 400 ".intel_syntax noprefix;" 401 #elif defined(CRYPTOPP_GENERATE_X64_MASM) 402 ALIGN 8 403 GCM_AuthenticateBlocks_2K PROC FRAME 404 rex_push_reg rsi 405 push_reg rdi 406 push_reg rbx 407 .endprolog 408 mov rsi, r8 409 mov r11, r9 410 #else 411 AS2( mov WORD_REG(cx), data ) 412 AS2( mov WORD_REG(dx), len ) 413 AS2( mov WORD_REG(si), hashBuffer ) 414 AS2( shr WORD_REG(dx), 4 ) 415 #endif 416 417 #if !defined(_MSC_VER) || (_MSC_VER < 1400) 418 AS_PUSH_IF86( bx) 419 #endif 420 AS_PUSH_IF86( bp) 421 422 #ifdef __GNUC__ 423 AS2( mov AS_REG_7, WORD_REG(di)) 424 #elif CRYPTOPP_BOOL_X86 425 AS2( lea AS_REG_7, s_reductionTable) 426 #endif 427 428 AS2( movdqa xmm0, [WORD_REG(si)] ) 429 430 #define MUL_TABLE_0 WORD_REG(si) + 32 431 #define MUL_TABLE_1 WORD_REG(si) + 32 + 1024 432 #define RED_TABLE AS_REG_7 433 434 ASL(0) 435 AS2( movdqu xmm4, [WORD_REG(cx)] ) 436 AS2( pxor xmm0, xmm4 ) 437 438 AS2( movd ebx, xmm0 ) 439 AS2( mov eax, AS_HEX(f0f0f0f0) ) 440 AS2( and eax, ebx ) 441 AS2( shl ebx, 4 ) 442 AS2( and ebx, AS_HEX(f0f0f0f0) ) 443 AS2( movzx edi, ah ) 444 AS2( movdqa xmm5, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] ) 445 AS2( movzx edi, al ) 446 AS2( movdqa xmm4, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] ) 447 AS2( shr eax, 16 ) 448 AS2( movzx edi, ah ) 449 AS2( movdqa xmm3, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] ) 450 AS2( movzx edi, al ) 451 AS2( movdqa xmm2, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] ) 452 453 #define SSE2_MUL_32BITS(i) \ 454 AS2( psrldq xmm0, 4 )\ 455 AS2( movd eax, xmm0 )\ 456 AS2( and eax, AS_HEX(f0f0f0f0) )\ 457 AS2( movzx edi, bh )\ 458 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\ 459 AS2( movzx edi, bl )\ 460 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\ 461 AS2( shr ebx, 16 )\ 462 AS2( movzx edi, bh )\ 463 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\ 464 AS2( movzx edi, bl )\ 465 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\ 466 AS2( movd ebx, xmm0 )\ 467 AS2( shl ebx, 4 )\ 468 AS2( and ebx, AS_HEX(f0f0f0f0) )\ 469 AS2( movzx edi, ah )\ 470 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\ 471 AS2( movzx edi, al )\ 472 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\ 473 AS2( shr eax, 16 )\ 474 AS2( movzx edi, ah )\ 475 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\ 476 AS2( movzx edi, al )\ 477 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\ 478 479 SSE2_MUL_32BITS(1) 480 SSE2_MUL_32BITS(2) 481 SSE2_MUL_32BITS(3) 482 483 AS2( movzx edi, bh ) 484 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] ) 485 AS2( movzx edi, bl ) 486 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] ) 487 AS2( shr ebx, 16 ) 488 AS2( movzx edi, bh ) 489 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] ) 490 AS2( movzx edi, bl ) 491 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] ) 492 493 AS2( movdqa xmm0, xmm3 ) 494 AS2( pslldq xmm3, 1 ) 495 AS2( pxor xmm2, xmm3 ) 496 AS2( movdqa xmm1, xmm2 ) 497 AS2( pslldq xmm2, 1 ) 498 AS2( pxor xmm5, xmm2 ) 499 500 AS2( psrldq xmm0, 15 ) 501 AS2( movd WORD_REG(di), xmm0 ) 502 AS2( movzx eax, WORD PTR [RED_TABLE + WORD_REG(di)*2] ) 503 AS2( shl eax, 8 ) 504 505 AS2( movdqa xmm0, xmm5 ) 506 AS2( pslldq xmm5, 1 ) 507 AS2( pxor xmm4, xmm5 ) 508 509 AS2( psrldq xmm1, 15 ) 510 AS2( movd WORD_REG(di), xmm1 ) 511 AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] ) 512 AS2( shl eax, 8 ) 513 514 AS2( psrldq xmm0, 15 ) 515 AS2( movd WORD_REG(di), xmm0 ) 516 AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] ) 517 518 AS2( movd xmm0, eax ) 519 AS2( pxor xmm0, xmm4 ) 520 521 AS2( add WORD_REG(cx), 16 ) 522 AS2( sub WORD_REG(dx), 1 ) 523 ASJ( jnz, 0, b ) 524 AS2( movdqa [WORD_REG(si)], xmm0 ) 525 526 AS_POP_IF86( bp) 527 #if !defined(_MSC_VER) || (_MSC_VER < 1400) 528 AS_POP_IF86( bx) 529 #endif 530 531 #ifdef __GNUC__ 532 ".att_syntax prefix;" 533 : 534 : "c" (data), "d" (len/16), "S" (hashBuffer), "D" (s_reductionTable) 535 : "memory", "cc", "%eax" 536 #if CRYPTOPP_BOOL_X64 537 , "%ebx", "%r11" 538 #endif 539 ); 540 #elif defined(CRYPTOPP_GENERATE_X64_MASM) 541 pop rbx 542 pop rdi 543 pop rsi 544 ret 545 GCM_AuthenticateBlocks_2K ENDP 546 #endif 547 548 return len%16; 549 } 550 case 3: // SSE2 and 64K tables 551 { 552 #ifdef __GNUC__ 553 __asm__ __volatile__ 554 ( 555 ".intel_syntax noprefix;" 556 #elif defined(CRYPTOPP_GENERATE_X64_MASM) 557 ALIGN 8 558 GCM_AuthenticateBlocks_64K PROC FRAME 559 rex_push_reg rsi 560 push_reg rdi 561 .endprolog 562 mov rsi, r8 563 #else 564 AS2( mov WORD_REG(cx), data ) 565 AS2( mov WORD_REG(dx), len ) 566 AS2( mov WORD_REG(si), hashBuffer ) 567 AS2( shr WORD_REG(dx), 4 ) 568 #endif 569 570 AS2( movdqa xmm0, [WORD_REG(si)] ) 571 572 #undef MUL_TABLE 573 #define MUL_TABLE(i,j) WORD_REG(si) + 32 + (i*4+j)*256*16 574 575 ASL(1) 576 AS2( movdqu xmm1, [WORD_REG(cx)] ) 577 AS2( pxor xmm1, xmm0 ) 578 AS2( pxor xmm0, xmm0 ) 579 580 #undef SSE2_MUL_32BITS 581 #define SSE2_MUL_32BITS(i) \ 582 AS2( movd eax, xmm1 )\ 583 AS2( psrldq xmm1, 4 )\ 584 AS2( movzx edi, al )\ 585 AS2( add WORD_REG(di), WORD_REG(di) )\ 586 AS2( pxor xmm0, [MUL_TABLE(i,0) + WORD_REG(di)*8] )\ 587 AS2( movzx edi, ah )\ 588 AS2( add WORD_REG(di), WORD_REG(di) )\ 589 AS2( pxor xmm0, [MUL_TABLE(i,1) + WORD_REG(di)*8] )\ 590 AS2( shr eax, 16 )\ 591 AS2( movzx edi, al )\ 592 AS2( add WORD_REG(di), WORD_REG(di) )\ 593 AS2( pxor xmm0, [MUL_TABLE(i,2) + WORD_REG(di)*8] )\ 594 AS2( movzx edi, ah )\ 595 AS2( add WORD_REG(di), WORD_REG(di) )\ 596 AS2( pxor xmm0, [MUL_TABLE(i,3) + WORD_REG(di)*8] )\ 597 598 SSE2_MUL_32BITS(0) 599 SSE2_MUL_32BITS(1) 600 SSE2_MUL_32BITS(2) 601 SSE2_MUL_32BITS(3) 602 603 AS2( add WORD_REG(cx), 16 ) 604 AS2( sub WORD_REG(dx), 1 ) 605 ASJ( jnz, 1, b ) 606 AS2( movdqa [WORD_REG(si)], xmm0 ) 607 608 #ifdef __GNUC__ 609 ".att_syntax prefix;" 610 : 611 : "c" (data), "d" (len/16), "S" (hashBuffer) 612 : "memory", "cc", "%edi", "%eax" 613 ); 614 #elif defined(CRYPTOPP_GENERATE_X64_MASM) 615 pop rdi 616 pop rsi 617 ret 618 GCM_AuthenticateBlocks_64K ENDP 619 #endif 620 621 return len%16; 622 } 623#endif 624#ifndef CRYPTOPP_GENERATE_X64_MASM 625 } 626 627 return len%16; 628} 629 630void GCM_Base::AuthenticateLastHeaderBlock() 631{ 632 if (m_bufferedDataLength > 0) 633 { 634 memset(m_buffer+m_bufferedDataLength, 0, HASH_BLOCKSIZE-m_bufferedDataLength); 635 m_bufferedDataLength = 0; 636 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE); 637 } 638} 639 640void GCM_Base::AuthenticateLastConfidentialBlock() 641{ 642 GCM_Base::AuthenticateLastHeaderBlock(); 643 PutBlock<word64, BigEndian, true>(NULL, m_buffer)(m_totalHeaderLength*8)(m_totalMessageLength*8); 644 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE); 645} 646 647void GCM_Base::AuthenticateLastFooterBlock(byte *mac, size_t macSize) 648{ 649 m_ctr.Seek(0); 650 m_ctr.ProcessData(mac, HashBuffer(), macSize); 651} 652 653NAMESPACE_END 654 655#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 656#endif 657