1/* 2 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the Apache License 2.0 (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 10#include <string.h> 11#include <openssl/crypto.h> 12#include "internal/cryptlib.h" 13#include "internal/endian.h" 14#include "crypto/modes.h" 15 16#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT) 17typedef size_t size_t_aX __attribute((__aligned__(1))); 18#else 19typedef size_t size_t_aX; 20#endif 21 22#if defined(BSWAP4) && defined(STRICT_ALIGNMENT) 23/* redefine, because alignment is ensured */ 24# undef GETU32 25# define GETU32(p) BSWAP4(*(const u32 *)(p)) 26# undef PUTU32 27# define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) 28#endif 29 30#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) 31#define REDUCE1BIT(V) do { \ 32 if (sizeof(size_t)==8) { \ 33 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \ 34 V.lo = (V.hi<<63)|(V.lo>>1); \ 35 V.hi = (V.hi>>1 )^T; \ 36 } \ 37 else { \ 38 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \ 39 V.lo = (V.hi<<63)|(V.lo>>1); \ 40 V.hi = (V.hi>>1 )^((u64)T<<32); \ 41 } \ 42} while(0) 43 44/*- 45 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should 46 * never be set to 8. 8 is effectively reserved for testing purposes. 47 * TABLE_BITS>1 are lookup-table-driven implementations referred to as 48 * "Shoup's" in GCM specification. In other words OpenSSL does not cover 49 * whole spectrum of possible table driven implementations. Why? In 50 * non-"Shoup's" case memory access pattern is segmented in such manner, 51 * that it's trivial to see that cache timing information can reveal 52 * fair portion of intermediate hash value. Given that ciphertext is 53 * always available to attacker, it's possible for him to attempt to 54 * deduce secret parameter H and if successful, tamper with messages 55 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's 56 * not as trivial, but there is no reason to believe that it's resistant 57 * to cache-timing attack. And the thing about "8-bit" implementation is 58 * that it consumes 16 (sixteen) times more memory, 4KB per individual 59 * key + 1KB shared. Well, on pros side it should be twice as fast as 60 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version 61 * was observed to run ~75% faster, closer to 100% for commercial 62 * compilers... Yet "4-bit" procedure is preferred, because it's 63 * believed to provide better security-performance balance and adequate 64 * all-round performance. "All-round" refers to things like: 65 * 66 * - shorter setup time effectively improves overall timing for 67 * handling short messages; 68 * - larger table allocation can become unbearable because of VM 69 * subsystem penalties (for example on Windows large enough free 70 * results in VM working set trimming, meaning that consequent 71 * malloc would immediately incur working set expansion); 72 * - larger table has larger cache footprint, which can affect 73 * performance of other code paths (not necessarily even from same 74 * thread in Hyper-Threading world); 75 * 76 * Value of 1 is not appropriate for performance reasons. 77 */ 78#if TABLE_BITS==8 79 80static void gcm_init_8bit(u128 Htable[256], u64 H[2]) 81{ 82 int i, j; 83 u128 V; 84 85 Htable[0].hi = 0; 86 Htable[0].lo = 0; 87 V.hi = H[0]; 88 V.lo = H[1]; 89 90 for (Htable[128] = V, i = 64; i > 0; i >>= 1) { 91 REDUCE1BIT(V); 92 Htable[i] = V; 93 } 94 95 for (i = 2; i < 256; i <<= 1) { 96 u128 *Hi = Htable + i, H0 = *Hi; 97 for (j = 1; j < i; ++j) { 98 Hi[j].hi = H0.hi ^ Htable[j].hi; 99 Hi[j].lo = H0.lo ^ Htable[j].lo; 100 } 101 } 102} 103 104static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) 105{ 106 u128 Z = { 0, 0 }; 107 const u8 *xi = (const u8 *)Xi + 15; 108 size_t rem, n = *xi; 109 DECLARE_IS_ENDIAN; 110 static const size_t rem_8bit[256] = { 111 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246), 112 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E), 113 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56), 114 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E), 115 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66), 116 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E), 117 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076), 118 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E), 119 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06), 120 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E), 121 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416), 122 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E), 123 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626), 124 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E), 125 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836), 126 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E), 127 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6), 128 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE), 129 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6), 130 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE), 131 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6), 132 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE), 133 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6), 134 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE), 135 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86), 136 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E), 137 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496), 138 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E), 139 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6), 140 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE), 141 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6), 142 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE), 143 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346), 144 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E), 145 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56), 146 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E), 147 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66), 148 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E), 149 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176), 150 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E), 151 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06), 152 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E), 153 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516), 154 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E), 155 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726), 156 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E), 157 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936), 158 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E), 159 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6), 160 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE), 161 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6), 162 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE), 163 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6), 164 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE), 165 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6), 166 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE), 167 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86), 168 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E), 169 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596), 170 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E), 171 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6), 172 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE), 173 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6), 174 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) 175 }; 176 177 while (1) { 178 Z.hi ^= Htable[n].hi; 179 Z.lo ^= Htable[n].lo; 180 181 if ((u8 *)Xi == xi) 182 break; 183 184 n = *(--xi); 185 186 rem = (size_t)Z.lo & 0xff; 187 Z.lo = (Z.hi << 56) | (Z.lo >> 8); 188 Z.hi = (Z.hi >> 8); 189 if (sizeof(size_t) == 8) 190 Z.hi ^= rem_8bit[rem]; 191 else 192 Z.hi ^= (u64)rem_8bit[rem] << 32; 193 } 194 195 if (IS_LITTLE_ENDIAN) { 196# ifdef BSWAP8 197 Xi[0] = BSWAP8(Z.hi); 198 Xi[1] = BSWAP8(Z.lo); 199# else 200 u8 *p = (u8 *)Xi; 201 u32 v; 202 v = (u32)(Z.hi >> 32); 203 PUTU32(p, v); 204 v = (u32)(Z.hi); 205 PUTU32(p + 4, v); 206 v = (u32)(Z.lo >> 32); 207 PUTU32(p + 8, v); 208 v = (u32)(Z.lo); 209 PUTU32(p + 12, v); 210# endif 211 } else { 212 Xi[0] = Z.hi; 213 Xi[1] = Z.lo; 214 } 215} 216 217# define GCM_MUL(ctx) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) 218 219#elif TABLE_BITS==4 220 221static void gcm_init_4bit(u128 Htable[16], u64 H[2]) 222{ 223 u128 V; 224# if defined(OPENSSL_SMALL_FOOTPRINT) 225 int i; 226# endif 227 228 Htable[0].hi = 0; 229 Htable[0].lo = 0; 230 V.hi = H[0]; 231 V.lo = H[1]; 232 233# if defined(OPENSSL_SMALL_FOOTPRINT) 234 for (Htable[8] = V, i = 4; i > 0; i >>= 1) { 235 REDUCE1BIT(V); 236 Htable[i] = V; 237 } 238 239 for (i = 2; i < 16; i <<= 1) { 240 u128 *Hi = Htable + i; 241 int j; 242 for (V = *Hi, j = 1; j < i; ++j) { 243 Hi[j].hi = V.hi ^ Htable[j].hi; 244 Hi[j].lo = V.lo ^ Htable[j].lo; 245 } 246 } 247# else 248 Htable[8] = V; 249 REDUCE1BIT(V); 250 Htable[4] = V; 251 REDUCE1BIT(V); 252 Htable[2] = V; 253 REDUCE1BIT(V); 254 Htable[1] = V; 255 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo; 256 V = Htable[4]; 257 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo; 258 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo; 259 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo; 260 V = Htable[8]; 261 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo; 262 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo; 263 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo; 264 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo; 265 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo; 266 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo; 267 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo; 268# endif 269# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm)) 270 /* 271 * ARM assembler expects specific dword order in Htable. 272 */ 273 { 274 int j; 275 DECLARE_IS_ENDIAN; 276 277 if (IS_LITTLE_ENDIAN) 278 for (j = 0; j < 16; ++j) { 279 V = Htable[j]; 280 Htable[j].hi = V.lo; 281 Htable[j].lo = V.hi; 282 } else 283 for (j = 0; j < 16; ++j) { 284 V = Htable[j]; 285 Htable[j].hi = V.lo << 32 | V.lo >> 32; 286 Htable[j].lo = V.hi << 32 | V.hi >> 32; 287 } 288 } 289# endif 290} 291 292# ifndef GHASH_ASM 293static const size_t rem_4bit[16] = { 294 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), 295 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), 296 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), 297 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) 298}; 299 300static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) 301{ 302 u128 Z; 303 int cnt = 15; 304 size_t rem, nlo, nhi; 305 DECLARE_IS_ENDIAN; 306 307 nlo = ((const u8 *)Xi)[15]; 308 nhi = nlo >> 4; 309 nlo &= 0xf; 310 311 Z.hi = Htable[nlo].hi; 312 Z.lo = Htable[nlo].lo; 313 314 while (1) { 315 rem = (size_t)Z.lo & 0xf; 316 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 317 Z.hi = (Z.hi >> 4); 318 if (sizeof(size_t) == 8) 319 Z.hi ^= rem_4bit[rem]; 320 else 321 Z.hi ^= (u64)rem_4bit[rem] << 32; 322 323 Z.hi ^= Htable[nhi].hi; 324 Z.lo ^= Htable[nhi].lo; 325 326 if (--cnt < 0) 327 break; 328 329 nlo = ((const u8 *)Xi)[cnt]; 330 nhi = nlo >> 4; 331 nlo &= 0xf; 332 333 rem = (size_t)Z.lo & 0xf; 334 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 335 Z.hi = (Z.hi >> 4); 336 if (sizeof(size_t) == 8) 337 Z.hi ^= rem_4bit[rem]; 338 else 339 Z.hi ^= (u64)rem_4bit[rem] << 32; 340 341 Z.hi ^= Htable[nlo].hi; 342 Z.lo ^= Htable[nlo].lo; 343 } 344 345 if (IS_LITTLE_ENDIAN) { 346# ifdef BSWAP8 347 Xi[0] = BSWAP8(Z.hi); 348 Xi[1] = BSWAP8(Z.lo); 349# else 350 u8 *p = (u8 *)Xi; 351 u32 v; 352 v = (u32)(Z.hi >> 32); 353 PUTU32(p, v); 354 v = (u32)(Z.hi); 355 PUTU32(p + 4, v); 356 v = (u32)(Z.lo >> 32); 357 PUTU32(p + 8, v); 358 v = (u32)(Z.lo); 359 PUTU32(p + 12, v); 360# endif 361 } else { 362 Xi[0] = Z.hi; 363 Xi[1] = Z.lo; 364 } 365} 366 367# if !defined(OPENSSL_SMALL_FOOTPRINT) 368/* 369 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for 370 * details... Compiler-generated code doesn't seem to give any 371 * performance improvement, at least not on x86[_64]. It's here 372 * mostly as reference and a placeholder for possible future 373 * non-trivial optimization[s]... 374 */ 375static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], 376 const u8 *inp, size_t len) 377{ 378 u128 Z; 379 int cnt; 380 size_t rem, nlo, nhi; 381 DECLARE_IS_ENDIAN; 382 383# if 1 384 do { 385 cnt = 15; 386 nlo = ((const u8 *)Xi)[15]; 387 nlo ^= inp[15]; 388 nhi = nlo >> 4; 389 nlo &= 0xf; 390 391 Z.hi = Htable[nlo].hi; 392 Z.lo = Htable[nlo].lo; 393 394 while (1) { 395 rem = (size_t)Z.lo & 0xf; 396 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 397 Z.hi = (Z.hi >> 4); 398 if (sizeof(size_t) == 8) 399 Z.hi ^= rem_4bit[rem]; 400 else 401 Z.hi ^= (u64)rem_4bit[rem] << 32; 402 403 Z.hi ^= Htable[nhi].hi; 404 Z.lo ^= Htable[nhi].lo; 405 406 if (--cnt < 0) 407 break; 408 409 nlo = ((const u8 *)Xi)[cnt]; 410 nlo ^= inp[cnt]; 411 nhi = nlo >> 4; 412 nlo &= 0xf; 413 414 rem = (size_t)Z.lo & 0xf; 415 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 416 Z.hi = (Z.hi >> 4); 417 if (sizeof(size_t) == 8) 418 Z.hi ^= rem_4bit[rem]; 419 else 420 Z.hi ^= (u64)rem_4bit[rem] << 32; 421 422 Z.hi ^= Htable[nlo].hi; 423 Z.lo ^= Htable[nlo].lo; 424 } 425# else 426 /* 427 * Extra 256+16 bytes per-key plus 512 bytes shared tables 428 * [should] give ~50% improvement... One could have PACK()-ed 429 * the rem_8bit even here, but the priority is to minimize 430 * cache footprint... 431 */ 432 u128 Hshr4[16]; /* Htable shifted right by 4 bits */ 433 u8 Hshl4[16]; /* Htable shifted left by 4 bits */ 434 static const unsigned short rem_8bit[256] = { 435 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E, 436 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E, 437 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E, 438 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E, 439 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E, 440 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E, 441 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E, 442 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E, 443 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE, 444 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE, 445 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE, 446 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE, 447 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E, 448 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E, 449 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE, 450 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE, 451 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E, 452 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E, 453 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E, 454 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E, 455 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E, 456 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E, 457 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E, 458 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E, 459 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE, 460 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE, 461 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE, 462 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE, 463 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E, 464 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E, 465 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE, 466 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE 467 }; 468 /* 469 * This pre-processing phase slows down procedure by approximately 470 * same time as it makes each loop spin faster. In other words 471 * single block performance is approximately same as straightforward 472 * "4-bit" implementation, and then it goes only faster... 473 */ 474 for (cnt = 0; cnt < 16; ++cnt) { 475 Z.hi = Htable[cnt].hi; 476 Z.lo = Htable[cnt].lo; 477 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4); 478 Hshr4[cnt].hi = (Z.hi >> 4); 479 Hshl4[cnt] = (u8)(Z.lo << 4); 480 } 481 482 do { 483 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) { 484 nlo = ((const u8 *)Xi)[cnt]; 485 nlo ^= inp[cnt]; 486 nhi = nlo >> 4; 487 nlo &= 0xf; 488 489 Z.hi ^= Htable[nlo].hi; 490 Z.lo ^= Htable[nlo].lo; 491 492 rem = (size_t)Z.lo & 0xff; 493 494 Z.lo = (Z.hi << 56) | (Z.lo >> 8); 495 Z.hi = (Z.hi >> 8); 496 497 Z.hi ^= Hshr4[nhi].hi; 498 Z.lo ^= Hshr4[nhi].lo; 499 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48; 500 } 501 502 nlo = ((const u8 *)Xi)[0]; 503 nlo ^= inp[0]; 504 nhi = nlo >> 4; 505 nlo &= 0xf; 506 507 Z.hi ^= Htable[nlo].hi; 508 Z.lo ^= Htable[nlo].lo; 509 510 rem = (size_t)Z.lo & 0xf; 511 512 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 513 Z.hi = (Z.hi >> 4); 514 515 Z.hi ^= Htable[nhi].hi; 516 Z.lo ^= Htable[nhi].lo; 517 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48; 518# endif 519 520 if (IS_LITTLE_ENDIAN) { 521# ifdef BSWAP8 522 Xi[0] = BSWAP8(Z.hi); 523 Xi[1] = BSWAP8(Z.lo); 524# else 525 u8 *p = (u8 *)Xi; 526 u32 v; 527 v = (u32)(Z.hi >> 32); 528 PUTU32(p, v); 529 v = (u32)(Z.hi); 530 PUTU32(p + 4, v); 531 v = (u32)(Z.lo >> 32); 532 PUTU32(p + 8, v); 533 v = (u32)(Z.lo); 534 PUTU32(p + 12, v); 535# endif 536 } else { 537 Xi[0] = Z.hi; 538 Xi[1] = Z.lo; 539 } 540 } while (inp += 16, len -= 16); 541} 542# endif 543# else 544void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]); 545void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp, 546 size_t len); 547# endif 548 549# define GCM_MUL(ctx) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) 550# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT) 551# define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) 552/* 553 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing 554 * effect. In other words idea is to hash data while it's still in L1 cache 555 * after encryption pass... 556 */ 557# define GHASH_CHUNK (3*1024) 558# endif 559 560#else /* TABLE_BITS */ 561 562static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2]) 563{ 564 u128 V, Z = { 0, 0 }; 565 long X; 566 int i, j; 567 const long *xi = (const long *)Xi; 568 DECLARE_IS_ENDIAN; 569 570 V.hi = H[0]; /* H is in host byte order, no byte swapping */ 571 V.lo = H[1]; 572 573 for (j = 0; j < 16 / sizeof(long); ++j) { 574 if (IS_LITTLE_ENDIAN) { 575 if (sizeof(long) == 8) { 576# ifdef BSWAP8 577 X = (long)(BSWAP8(xi[j])); 578# else 579 const u8 *p = (const u8 *)(xi + j); 580 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4)); 581# endif 582 } else { 583 const u8 *p = (const u8 *)(xi + j); 584 X = (long)GETU32(p); 585 } 586 } else 587 X = xi[j]; 588 589 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) { 590 u64 M = (u64)(X >> (8 * sizeof(long) - 1)); 591 Z.hi ^= V.hi & M; 592 Z.lo ^= V.lo & M; 593 594 REDUCE1BIT(V); 595 } 596 } 597 598 if (IS_LITTLE_ENDIAN) { 599# ifdef BSWAP8 600 Xi[0] = BSWAP8(Z.hi); 601 Xi[1] = BSWAP8(Z.lo); 602# else 603 u8 *p = (u8 *)Xi; 604 u32 v; 605 v = (u32)(Z.hi >> 32); 606 PUTU32(p, v); 607 v = (u32)(Z.hi); 608 PUTU32(p + 4, v); 609 v = (u32)(Z.lo >> 32); 610 PUTU32(p + 8, v); 611 v = (u32)(Z.lo); 612 PUTU32(p + 12, v); 613# endif 614 } else { 615 Xi[0] = Z.hi; 616 Xi[1] = Z.lo; 617 } 618} 619 620# define GCM_MUL(ctx) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) 621 622#endif 623 624#if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ)) 625# if !defined(I386_ONLY) && \ 626 (defined(__i386) || defined(__i386__) || \ 627 defined(__x86_64) || defined(__x86_64__) || \ 628 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) 629# define GHASH_ASM_X86_OR_64 630# define GCM_FUNCREF_4BIT 631 632void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]); 633void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]); 634void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp, 635 size_t len); 636 637# if defined(__i386) || defined(__i386__) || defined(_M_IX86) 638# define gcm_init_avx gcm_init_clmul 639# define gcm_gmult_avx gcm_gmult_clmul 640# define gcm_ghash_avx gcm_ghash_clmul 641# else 642void gcm_init_avx(u128 Htable[16], const u64 Xi[2]); 643void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]); 644void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp, 645 size_t len); 646# endif 647 648# if defined(__i386) || defined(__i386__) || defined(_M_IX86) 649# define GHASH_ASM_X86 650void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]); 651void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp, 652 size_t len); 653 654void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]); 655void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp, 656 size_t len); 657# endif 658# elif (defined(__arm__) || defined(__arm) || defined(__aarch64__)) && defined(GHASH_ASM) 659# include "arm_arch.h" 660# if __ARM_MAX_ARCH__>=7 661# define GHASH_ASM_ARM 662# define GCM_FUNCREF_4BIT 663# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL) 664# if defined(__arm__) || defined(__arm) 665# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) 666# endif 667void gcm_init_neon(u128 Htable[16], const u64 Xi[2]); 668void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]); 669void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp, 670 size_t len); 671void gcm_init_v8(u128 Htable[16], const u64 Xi[2]); 672void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]); 673void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp, 674 size_t len); 675# endif 676# elif defined(__sparc__) || defined(__sparc) 677# include "crypto/sparc_arch.h" 678# if defined(__arch64__) 679# define GHASH_ASM_SPARC 680# define GCM_FUNCREF_4BIT 681extern unsigned int OPENSSL_sparcv9cap_P[]; 682void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]); 683void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]); 684void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp, 685 size_t len); 686# endif /* __arch64__ */ 687# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) 688# include "crypto/ppc_arch.h" 689# define GHASH_ASM_PPC 690# define GCM_FUNCREF_4BIT 691void gcm_init_p8(u128 Htable[16], const u64 Xi[2]); 692void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]); 693void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp, 694 size_t len); 695# endif 696#endif 697 698#ifdef GCM_FUNCREF_4BIT 699# undef GCM_MUL 700# define GCM_MUL(ctx) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) 701# ifdef GHASH 702# undef GHASH 703# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len) 704# endif 705#endif 706 707void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) 708{ 709 DECLARE_IS_ENDIAN; 710 711 memset(ctx, 0, sizeof(*ctx)); 712 ctx->block = block; 713 ctx->key = key; 714 715 (*block) (ctx->H.c, ctx->H.c, key); 716 717 if (IS_LITTLE_ENDIAN) { 718 /* H is stored in host byte order */ 719#ifdef BSWAP8 720 ctx->H.u[0] = BSWAP8(ctx->H.u[0]); 721 ctx->H.u[1] = BSWAP8(ctx->H.u[1]); 722#else 723 u8 *p = ctx->H.c; 724 u64 hi, lo; 725 hi = (u64)GETU32(p) << 32 | GETU32(p + 4); 726 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); 727 ctx->H.u[0] = hi; 728 ctx->H.u[1] = lo; 729#endif 730 } 731#if TABLE_BITS==8 732 gcm_init_8bit(ctx->Htable, ctx->H.u); 733#elif TABLE_BITS==4 734# if defined(GHASH) 735# define CTX__GHASH(f) (ctx->ghash = (f)) 736# else 737# define CTX__GHASH(f) (ctx->ghash = NULL) 738# endif 739# if defined(GHASH_ASM_X86_OR_64) 740# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) 741 if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */ 742 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */ 743 gcm_init_avx(ctx->Htable, ctx->H.u); 744 ctx->gmult = gcm_gmult_avx; 745 CTX__GHASH(gcm_ghash_avx); 746 } else { 747 gcm_init_clmul(ctx->Htable, ctx->H.u); 748 ctx->gmult = gcm_gmult_clmul; 749 CTX__GHASH(gcm_ghash_clmul); 750 } 751 return; 752 } 753# endif 754 gcm_init_4bit(ctx->Htable, ctx->H.u); 755# if defined(GHASH_ASM_X86) /* x86 only */ 756# if defined(OPENSSL_IA32_SSE2) 757 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */ 758# else 759 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */ 760# endif 761 ctx->gmult = gcm_gmult_4bit_mmx; 762 CTX__GHASH(gcm_ghash_4bit_mmx); 763 } else { 764 ctx->gmult = gcm_gmult_4bit_x86; 765 CTX__GHASH(gcm_ghash_4bit_x86); 766 } 767# else 768 ctx->gmult = gcm_gmult_4bit; 769 CTX__GHASH(gcm_ghash_4bit); 770# endif 771# elif defined(GHASH_ASM_ARM) 772# ifdef PMULL_CAPABLE 773 if (PMULL_CAPABLE) { 774 gcm_init_v8(ctx->Htable, ctx->H.u); 775 ctx->gmult = gcm_gmult_v8; 776 CTX__GHASH(gcm_ghash_v8); 777 } else 778# endif 779# ifdef NEON_CAPABLE 780 if (NEON_CAPABLE) { 781 gcm_init_neon(ctx->Htable, ctx->H.u); 782 ctx->gmult = gcm_gmult_neon; 783 CTX__GHASH(gcm_ghash_neon); 784 } else 785# endif 786 { 787 gcm_init_4bit(ctx->Htable, ctx->H.u); 788 ctx->gmult = gcm_gmult_4bit; 789 CTX__GHASH(gcm_ghash_4bit); 790 } 791# elif defined(GHASH_ASM_SPARC) 792 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) { 793 gcm_init_vis3(ctx->Htable, ctx->H.u); 794 ctx->gmult = gcm_gmult_vis3; 795 CTX__GHASH(gcm_ghash_vis3); 796 } else { 797 gcm_init_4bit(ctx->Htable, ctx->H.u); 798 ctx->gmult = gcm_gmult_4bit; 799 CTX__GHASH(gcm_ghash_4bit); 800 } 801# elif defined(GHASH_ASM_PPC) 802 if (OPENSSL_ppccap_P & PPC_CRYPTO207) { 803 gcm_init_p8(ctx->Htable, ctx->H.u); 804 ctx->gmult = gcm_gmult_p8; 805 CTX__GHASH(gcm_ghash_p8); 806 } else { 807 gcm_init_4bit(ctx->Htable, ctx->H.u); 808 ctx->gmult = gcm_gmult_4bit; 809 CTX__GHASH(gcm_ghash_4bit); 810 } 811# else 812 gcm_init_4bit(ctx->Htable, ctx->H.u); 813# endif 814# undef CTX__GHASH 815#endif 816} 817 818void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, 819 size_t len) 820{ 821 DECLARE_IS_ENDIAN; 822 unsigned int ctr; 823#ifdef GCM_FUNCREF_4BIT 824 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 825#endif 826 827 ctx->len.u[0] = 0; /* AAD length */ 828 ctx->len.u[1] = 0; /* message length */ 829 ctx->ares = 0; 830 ctx->mres = 0; 831 832 if (len == 12) { 833 memcpy(ctx->Yi.c, iv, 12); 834 ctx->Yi.c[12] = 0; 835 ctx->Yi.c[13] = 0; 836 ctx->Yi.c[14] = 0; 837 ctx->Yi.c[15] = 1; 838 ctr = 1; 839 } else { 840 size_t i; 841 u64 len0 = len; 842 843 /* Borrow ctx->Xi to calculate initial Yi */ 844 ctx->Xi.u[0] = 0; 845 ctx->Xi.u[1] = 0; 846 847 while (len >= 16) { 848 for (i = 0; i < 16; ++i) 849 ctx->Xi.c[i] ^= iv[i]; 850 GCM_MUL(ctx); 851 iv += 16; 852 len -= 16; 853 } 854 if (len) { 855 for (i = 0; i < len; ++i) 856 ctx->Xi.c[i] ^= iv[i]; 857 GCM_MUL(ctx); 858 } 859 len0 <<= 3; 860 if (IS_LITTLE_ENDIAN) { 861#ifdef BSWAP8 862 ctx->Xi.u[1] ^= BSWAP8(len0); 863#else 864 ctx->Xi.c[8] ^= (u8)(len0 >> 56); 865 ctx->Xi.c[9] ^= (u8)(len0 >> 48); 866 ctx->Xi.c[10] ^= (u8)(len0 >> 40); 867 ctx->Xi.c[11] ^= (u8)(len0 >> 32); 868 ctx->Xi.c[12] ^= (u8)(len0 >> 24); 869 ctx->Xi.c[13] ^= (u8)(len0 >> 16); 870 ctx->Xi.c[14] ^= (u8)(len0 >> 8); 871 ctx->Xi.c[15] ^= (u8)(len0); 872#endif 873 } else { 874 ctx->Xi.u[1] ^= len0; 875 } 876 877 GCM_MUL(ctx); 878 879 if (IS_LITTLE_ENDIAN) 880#ifdef BSWAP4 881 ctr = BSWAP4(ctx->Xi.d[3]); 882#else 883 ctr = GETU32(ctx->Xi.c + 12); 884#endif 885 else 886 ctr = ctx->Xi.d[3]; 887 888 /* Copy borrowed Xi to Yi */ 889 ctx->Yi.u[0] = ctx->Xi.u[0]; 890 ctx->Yi.u[1] = ctx->Xi.u[1]; 891 } 892 893 ctx->Xi.u[0] = 0; 894 ctx->Xi.u[1] = 0; 895 896 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key); 897 ++ctr; 898 if (IS_LITTLE_ENDIAN) 899#ifdef BSWAP4 900 ctx->Yi.d[3] = BSWAP4(ctr); 901#else 902 PUTU32(ctx->Yi.c + 12, ctr); 903#endif 904 else 905 ctx->Yi.d[3] = ctr; 906} 907 908int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, 909 size_t len) 910{ 911 size_t i; 912 unsigned int n; 913 u64 alen = ctx->len.u[0]; 914#ifdef GCM_FUNCREF_4BIT 915 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 916# ifdef GHASH 917 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 918 const u8 *inp, size_t len) = ctx->ghash; 919# endif 920#endif 921 922 if (ctx->len.u[1]) 923 return -2; 924 925 alen += len; 926 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len)) 927 return -1; 928 ctx->len.u[0] = alen; 929 930 n = ctx->ares; 931 if (n) { 932 while (n && len) { 933 ctx->Xi.c[n] ^= *(aad++); 934 --len; 935 n = (n + 1) % 16; 936 } 937 if (n == 0) 938 GCM_MUL(ctx); 939 else { 940 ctx->ares = n; 941 return 0; 942 } 943 } 944#ifdef GHASH 945 if ((i = (len & (size_t)-16))) { 946 GHASH(ctx, aad, i); 947 aad += i; 948 len -= i; 949 } 950#else 951 while (len >= 16) { 952 for (i = 0; i < 16; ++i) 953 ctx->Xi.c[i] ^= aad[i]; 954 GCM_MUL(ctx); 955 aad += 16; 956 len -= 16; 957 } 958#endif 959 if (len) { 960 n = (unsigned int)len; 961 for (i = 0; i < len; ++i) 962 ctx->Xi.c[i] ^= aad[i]; 963 } 964 965 ctx->ares = n; 966 return 0; 967} 968 969int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, 970 const unsigned char *in, unsigned char *out, 971 size_t len) 972{ 973 DECLARE_IS_ENDIAN; 974 unsigned int n, ctr, mres; 975 size_t i; 976 u64 mlen = ctx->len.u[1]; 977 block128_f block = ctx->block; 978 void *key = ctx->key; 979#ifdef GCM_FUNCREF_4BIT 980 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 981# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 982 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 983 const u8 *inp, size_t len) = ctx->ghash; 984# endif 985#endif 986 987 mlen += len; 988 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 989 return -1; 990 ctx->len.u[1] = mlen; 991 992 mres = ctx->mres; 993 994 if (ctx->ares) { 995 /* First call to encrypt finalizes GHASH(AAD) */ 996#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 997 if (len == 0) { 998 GCM_MUL(ctx); 999 ctx->ares = 0; 1000 return 0; 1001 } 1002 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1003 ctx->Xi.u[0] = 0; 1004 ctx->Xi.u[1] = 0; 1005 mres = sizeof(ctx->Xi); 1006#else 1007 GCM_MUL(ctx); 1008#endif 1009 ctx->ares = 0; 1010 } 1011 1012 if (IS_LITTLE_ENDIAN) 1013#ifdef BSWAP4 1014 ctr = BSWAP4(ctx->Yi.d[3]); 1015#else 1016 ctr = GETU32(ctx->Yi.c + 12); 1017#endif 1018 else 1019 ctr = ctx->Yi.d[3]; 1020 1021 n = mres % 16; 1022#if !defined(OPENSSL_SMALL_FOOTPRINT) 1023 if (16 % sizeof(size_t) == 0) { /* always true actually */ 1024 do { 1025 if (n) { 1026# if defined(GHASH) 1027 while (n && len) { 1028 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; 1029 --len; 1030 n = (n + 1) % 16; 1031 } 1032 if (n == 0) { 1033 GHASH(ctx, ctx->Xn, mres); 1034 mres = 0; 1035 } else { 1036 ctx->mres = mres; 1037 return 0; 1038 } 1039# else 1040 while (n && len) { 1041 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 1042 --len; 1043 n = (n + 1) % 16; 1044 } 1045 if (n == 0) { 1046 GCM_MUL(ctx); 1047 mres = 0; 1048 } else { 1049 ctx->mres = n; 1050 return 0; 1051 } 1052# endif 1053 } 1054# if defined(STRICT_ALIGNMENT) 1055 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) 1056 break; 1057# endif 1058# if defined(GHASH) 1059 if (len >= 16 && mres) { 1060 GHASH(ctx, ctx->Xn, mres); 1061 mres = 0; 1062 } 1063# if defined(GHASH_CHUNK) 1064 while (len >= GHASH_CHUNK) { 1065 size_t j = GHASH_CHUNK; 1066 1067 while (j) { 1068 size_t_aX *out_t = (size_t_aX *)out; 1069 const size_t_aX *in_t = (const size_t_aX *)in; 1070 1071 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1072 ++ctr; 1073 if (IS_LITTLE_ENDIAN) 1074# ifdef BSWAP4 1075 ctx->Yi.d[3] = BSWAP4(ctr); 1076# else 1077 PUTU32(ctx->Yi.c + 12, ctr); 1078# endif 1079 else 1080 ctx->Yi.d[3] = ctr; 1081 for (i = 0; i < 16 / sizeof(size_t); ++i) 1082 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1083 out += 16; 1084 in += 16; 1085 j -= 16; 1086 } 1087 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK); 1088 len -= GHASH_CHUNK; 1089 } 1090# endif 1091 if ((i = (len & (size_t)-16))) { 1092 size_t j = i; 1093 1094 while (len >= 16) { 1095 size_t_aX *out_t = (size_t_aX *)out; 1096 const size_t_aX *in_t = (const size_t_aX *)in; 1097 1098 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1099 ++ctr; 1100 if (IS_LITTLE_ENDIAN) 1101# ifdef BSWAP4 1102 ctx->Yi.d[3] = BSWAP4(ctr); 1103# else 1104 PUTU32(ctx->Yi.c + 12, ctr); 1105# endif 1106 else 1107 ctx->Yi.d[3] = ctr; 1108 for (i = 0; i < 16 / sizeof(size_t); ++i) 1109 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1110 out += 16; 1111 in += 16; 1112 len -= 16; 1113 } 1114 GHASH(ctx, out - j, j); 1115 } 1116# else 1117 while (len >= 16) { 1118 size_t *out_t = (size_t *)out; 1119 const size_t *in_t = (const size_t *)in; 1120 1121 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1122 ++ctr; 1123 if (IS_LITTLE_ENDIAN) 1124# ifdef BSWAP4 1125 ctx->Yi.d[3] = BSWAP4(ctr); 1126# else 1127 PUTU32(ctx->Yi.c + 12, ctr); 1128# endif 1129 else 1130 ctx->Yi.d[3] = ctr; 1131 for (i = 0; i < 16 / sizeof(size_t); ++i) 1132 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1133 GCM_MUL(ctx); 1134 out += 16; 1135 in += 16; 1136 len -= 16; 1137 } 1138# endif 1139 if (len) { 1140 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1141 ++ctr; 1142 if (IS_LITTLE_ENDIAN) 1143# ifdef BSWAP4 1144 ctx->Yi.d[3] = BSWAP4(ctr); 1145# else 1146 PUTU32(ctx->Yi.c + 12, ctr); 1147# endif 1148 else 1149 ctx->Yi.d[3] = ctr; 1150# if defined(GHASH) 1151 while (len--) { 1152 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; 1153 ++n; 1154 } 1155# else 1156 while (len--) { 1157 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 1158 ++n; 1159 } 1160 mres = n; 1161# endif 1162 } 1163 1164 ctx->mres = mres; 1165 return 0; 1166 } while (0); 1167 } 1168#endif 1169 for (i = 0; i < len; ++i) { 1170 if (n == 0) { 1171 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1172 ++ctr; 1173 if (IS_LITTLE_ENDIAN) 1174#ifdef BSWAP4 1175 ctx->Yi.d[3] = BSWAP4(ctr); 1176#else 1177 PUTU32(ctx->Yi.c + 12, ctr); 1178#endif 1179 else 1180 ctx->Yi.d[3] = ctr; 1181 } 1182#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1183 ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n]; 1184 n = (n + 1) % 16; 1185 if (mres == sizeof(ctx->Xn)) { 1186 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); 1187 mres = 0; 1188 } 1189#else 1190 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; 1191 mres = n = (n + 1) % 16; 1192 if (n == 0) 1193 GCM_MUL(ctx); 1194#endif 1195 } 1196 1197 ctx->mres = mres; 1198 return 0; 1199} 1200 1201int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, 1202 const unsigned char *in, unsigned char *out, 1203 size_t len) 1204{ 1205 DECLARE_IS_ENDIAN; 1206 unsigned int n, ctr, mres; 1207 size_t i; 1208 u64 mlen = ctx->len.u[1]; 1209 block128_f block = ctx->block; 1210 void *key = ctx->key; 1211#ifdef GCM_FUNCREF_4BIT 1212 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1213# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1214 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1215 const u8 *inp, size_t len) = ctx->ghash; 1216# endif 1217#endif 1218 1219 mlen += len; 1220 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1221 return -1; 1222 ctx->len.u[1] = mlen; 1223 1224 mres = ctx->mres; 1225 1226 if (ctx->ares) { 1227 /* First call to decrypt finalizes GHASH(AAD) */ 1228#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1229 if (len == 0) { 1230 GCM_MUL(ctx); 1231 ctx->ares = 0; 1232 return 0; 1233 } 1234 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1235 ctx->Xi.u[0] = 0; 1236 ctx->Xi.u[1] = 0; 1237 mres = sizeof(ctx->Xi); 1238#else 1239 GCM_MUL(ctx); 1240#endif 1241 ctx->ares = 0; 1242 } 1243 1244 if (IS_LITTLE_ENDIAN) 1245#ifdef BSWAP4 1246 ctr = BSWAP4(ctx->Yi.d[3]); 1247#else 1248 ctr = GETU32(ctx->Yi.c + 12); 1249#endif 1250 else 1251 ctr = ctx->Yi.d[3]; 1252 1253 n = mres % 16; 1254#if !defined(OPENSSL_SMALL_FOOTPRINT) 1255 if (16 % sizeof(size_t) == 0) { /* always true actually */ 1256 do { 1257 if (n) { 1258# if defined(GHASH) 1259 while (n && len) { 1260 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; 1261 --len; 1262 n = (n + 1) % 16; 1263 } 1264 if (n == 0) { 1265 GHASH(ctx, ctx->Xn, mres); 1266 mres = 0; 1267 } else { 1268 ctx->mres = mres; 1269 return 0; 1270 } 1271# else 1272 while (n && len) { 1273 u8 c = *(in++); 1274 *(out++) = c ^ ctx->EKi.c[n]; 1275 ctx->Xi.c[n] ^= c; 1276 --len; 1277 n = (n + 1) % 16; 1278 } 1279 if (n == 0) { 1280 GCM_MUL(ctx); 1281 mres = 0; 1282 } else { 1283 ctx->mres = n; 1284 return 0; 1285 } 1286# endif 1287 } 1288# if defined(STRICT_ALIGNMENT) 1289 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) 1290 break; 1291# endif 1292# if defined(GHASH) 1293 if (len >= 16 && mres) { 1294 GHASH(ctx, ctx->Xn, mres); 1295 mres = 0; 1296 } 1297# if defined(GHASH_CHUNK) 1298 while (len >= GHASH_CHUNK) { 1299 size_t j = GHASH_CHUNK; 1300 1301 GHASH(ctx, in, GHASH_CHUNK); 1302 while (j) { 1303 size_t_aX *out_t = (size_t_aX *)out; 1304 const size_t_aX *in_t = (const size_t_aX *)in; 1305 1306 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1307 ++ctr; 1308 if (IS_LITTLE_ENDIAN) 1309# ifdef BSWAP4 1310 ctx->Yi.d[3] = BSWAP4(ctr); 1311# else 1312 PUTU32(ctx->Yi.c + 12, ctr); 1313# endif 1314 else 1315 ctx->Yi.d[3] = ctr; 1316 for (i = 0; i < 16 / sizeof(size_t); ++i) 1317 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1318 out += 16; 1319 in += 16; 1320 j -= 16; 1321 } 1322 len -= GHASH_CHUNK; 1323 } 1324# endif 1325 if ((i = (len & (size_t)-16))) { 1326 GHASH(ctx, in, i); 1327 while (len >= 16) { 1328 size_t_aX *out_t = (size_t_aX *)out; 1329 const size_t_aX *in_t = (const size_t_aX *)in; 1330 1331 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1332 ++ctr; 1333 if (IS_LITTLE_ENDIAN) 1334# ifdef BSWAP4 1335 ctx->Yi.d[3] = BSWAP4(ctr); 1336# else 1337 PUTU32(ctx->Yi.c + 12, ctr); 1338# endif 1339 else 1340 ctx->Yi.d[3] = ctr; 1341 for (i = 0; i < 16 / sizeof(size_t); ++i) 1342 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1343 out += 16; 1344 in += 16; 1345 len -= 16; 1346 } 1347 } 1348# else 1349 while (len >= 16) { 1350 size_t *out_t = (size_t *)out; 1351 const size_t *in_t = (const size_t *)in; 1352 1353 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1354 ++ctr; 1355 if (IS_LITTLE_ENDIAN) 1356# ifdef BSWAP4 1357 ctx->Yi.d[3] = BSWAP4(ctr); 1358# else 1359 PUTU32(ctx->Yi.c + 12, ctr); 1360# endif 1361 else 1362 ctx->Yi.d[3] = ctr; 1363 for (i = 0; i < 16 / sizeof(size_t); ++i) { 1364 size_t c = in_t[i]; 1365 out_t[i] = c ^ ctx->EKi.t[i]; 1366 ctx->Xi.t[i] ^= c; 1367 } 1368 GCM_MUL(ctx); 1369 out += 16; 1370 in += 16; 1371 len -= 16; 1372 } 1373# endif 1374 if (len) { 1375 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1376 ++ctr; 1377 if (IS_LITTLE_ENDIAN) 1378# ifdef BSWAP4 1379 ctx->Yi.d[3] = BSWAP4(ctr); 1380# else 1381 PUTU32(ctx->Yi.c + 12, ctr); 1382# endif 1383 else 1384 ctx->Yi.d[3] = ctr; 1385# if defined(GHASH) 1386 while (len--) { 1387 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; 1388 ++n; 1389 } 1390# else 1391 while (len--) { 1392 u8 c = in[n]; 1393 ctx->Xi.c[n] ^= c; 1394 out[n] = c ^ ctx->EKi.c[n]; 1395 ++n; 1396 } 1397 mres = n; 1398# endif 1399 } 1400 1401 ctx->mres = mres; 1402 return 0; 1403 } while (0); 1404 } 1405#endif 1406 for (i = 0; i < len; ++i) { 1407 u8 c; 1408 if (n == 0) { 1409 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1410 ++ctr; 1411 if (IS_LITTLE_ENDIAN) 1412#ifdef BSWAP4 1413 ctx->Yi.d[3] = BSWAP4(ctr); 1414#else 1415 PUTU32(ctx->Yi.c + 12, ctr); 1416#endif 1417 else 1418 ctx->Yi.d[3] = ctr; 1419 } 1420#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1421 out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n]; 1422 n = (n + 1) % 16; 1423 if (mres == sizeof(ctx->Xn)) { 1424 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); 1425 mres = 0; 1426 } 1427#else 1428 c = in[i]; 1429 out[i] = c ^ ctx->EKi.c[n]; 1430 ctx->Xi.c[n] ^= c; 1431 mres = n = (n + 1) % 16; 1432 if (n == 0) 1433 GCM_MUL(ctx); 1434#endif 1435 } 1436 1437 ctx->mres = mres; 1438 return 0; 1439} 1440 1441int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, 1442 const unsigned char *in, unsigned char *out, 1443 size_t len, ctr128_f stream) 1444{ 1445#if defined(OPENSSL_SMALL_FOOTPRINT) 1446 return CRYPTO_gcm128_encrypt(ctx, in, out, len); 1447#else 1448 DECLARE_IS_ENDIAN; 1449 unsigned int n, ctr, mres; 1450 size_t i; 1451 u64 mlen = ctx->len.u[1]; 1452 void *key = ctx->key; 1453# ifdef GCM_FUNCREF_4BIT 1454 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1455# ifdef GHASH 1456 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1457 const u8 *inp, size_t len) = ctx->ghash; 1458# endif 1459# endif 1460 1461 mlen += len; 1462 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1463 return -1; 1464 ctx->len.u[1] = mlen; 1465 1466 mres = ctx->mres; 1467 1468 if (ctx->ares) { 1469 /* First call to encrypt finalizes GHASH(AAD) */ 1470#if defined(GHASH) 1471 if (len == 0) { 1472 GCM_MUL(ctx); 1473 ctx->ares = 0; 1474 return 0; 1475 } 1476 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1477 ctx->Xi.u[0] = 0; 1478 ctx->Xi.u[1] = 0; 1479 mres = sizeof(ctx->Xi); 1480#else 1481 GCM_MUL(ctx); 1482#endif 1483 ctx->ares = 0; 1484 } 1485 1486 if (IS_LITTLE_ENDIAN) 1487# ifdef BSWAP4 1488 ctr = BSWAP4(ctx->Yi.d[3]); 1489# else 1490 ctr = GETU32(ctx->Yi.c + 12); 1491# endif 1492 else 1493 ctr = ctx->Yi.d[3]; 1494 1495 n = mres % 16; 1496 if (n) { 1497# if defined(GHASH) 1498 while (n && len) { 1499 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; 1500 --len; 1501 n = (n + 1) % 16; 1502 } 1503 if (n == 0) { 1504 GHASH(ctx, ctx->Xn, mres); 1505 mres = 0; 1506 } else { 1507 ctx->mres = mres; 1508 return 0; 1509 } 1510# else 1511 while (n && len) { 1512 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 1513 --len; 1514 n = (n + 1) % 16; 1515 } 1516 if (n == 0) { 1517 GCM_MUL(ctx); 1518 mres = 0; 1519 } else { 1520 ctx->mres = n; 1521 return 0; 1522 } 1523# endif 1524 } 1525# if defined(GHASH) 1526 if (len >= 16 && mres) { 1527 GHASH(ctx, ctx->Xn, mres); 1528 mres = 0; 1529 } 1530# if defined(GHASH_CHUNK) 1531 while (len >= GHASH_CHUNK) { 1532 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 1533 ctr += GHASH_CHUNK / 16; 1534 if (IS_LITTLE_ENDIAN) 1535# ifdef BSWAP4 1536 ctx->Yi.d[3] = BSWAP4(ctr); 1537# else 1538 PUTU32(ctx->Yi.c + 12, ctr); 1539# endif 1540 else 1541 ctx->Yi.d[3] = ctr; 1542 GHASH(ctx, out, GHASH_CHUNK); 1543 out += GHASH_CHUNK; 1544 in += GHASH_CHUNK; 1545 len -= GHASH_CHUNK; 1546 } 1547# endif 1548# endif 1549 if ((i = (len & (size_t)-16))) { 1550 size_t j = i / 16; 1551 1552 (*stream) (in, out, j, key, ctx->Yi.c); 1553 ctr += (unsigned int)j; 1554 if (IS_LITTLE_ENDIAN) 1555# ifdef BSWAP4 1556 ctx->Yi.d[3] = BSWAP4(ctr); 1557# else 1558 PUTU32(ctx->Yi.c + 12, ctr); 1559# endif 1560 else 1561 ctx->Yi.d[3] = ctr; 1562 in += i; 1563 len -= i; 1564# if defined(GHASH) 1565 GHASH(ctx, out, i); 1566 out += i; 1567# else 1568 while (j--) { 1569 for (i = 0; i < 16; ++i) 1570 ctx->Xi.c[i] ^= out[i]; 1571 GCM_MUL(ctx); 1572 out += 16; 1573 } 1574# endif 1575 } 1576 if (len) { 1577 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key); 1578 ++ctr; 1579 if (IS_LITTLE_ENDIAN) 1580# ifdef BSWAP4 1581 ctx->Yi.d[3] = BSWAP4(ctr); 1582# else 1583 PUTU32(ctx->Yi.c + 12, ctr); 1584# endif 1585 else 1586 ctx->Yi.d[3] = ctr; 1587 while (len--) { 1588# if defined(GHASH) 1589 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; 1590# else 1591 ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 1592# endif 1593 ++n; 1594 } 1595 } 1596 1597 ctx->mres = mres; 1598 return 0; 1599#endif 1600} 1601 1602int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, 1603 const unsigned char *in, unsigned char *out, 1604 size_t len, ctr128_f stream) 1605{ 1606#if defined(OPENSSL_SMALL_FOOTPRINT) 1607 return CRYPTO_gcm128_decrypt(ctx, in, out, len); 1608#else 1609 DECLARE_IS_ENDIAN; 1610 unsigned int n, ctr, mres; 1611 size_t i; 1612 u64 mlen = ctx->len.u[1]; 1613 void *key = ctx->key; 1614# ifdef GCM_FUNCREF_4BIT 1615 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1616# ifdef GHASH 1617 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1618 const u8 *inp, size_t len) = ctx->ghash; 1619# endif 1620# endif 1621 1622 mlen += len; 1623 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1624 return -1; 1625 ctx->len.u[1] = mlen; 1626 1627 mres = ctx->mres; 1628 1629 if (ctx->ares) { 1630 /* First call to decrypt finalizes GHASH(AAD) */ 1631# if defined(GHASH) 1632 if (len == 0) { 1633 GCM_MUL(ctx); 1634 ctx->ares = 0; 1635 return 0; 1636 } 1637 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1638 ctx->Xi.u[0] = 0; 1639 ctx->Xi.u[1] = 0; 1640 mres = sizeof(ctx->Xi); 1641# else 1642 GCM_MUL(ctx); 1643# endif 1644 ctx->ares = 0; 1645 } 1646 1647 if (IS_LITTLE_ENDIAN) 1648# ifdef BSWAP4 1649 ctr = BSWAP4(ctx->Yi.d[3]); 1650# else 1651 ctr = GETU32(ctx->Yi.c + 12); 1652# endif 1653 else 1654 ctr = ctx->Yi.d[3]; 1655 1656 n = mres % 16; 1657 if (n) { 1658# if defined(GHASH) 1659 while (n && len) { 1660 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; 1661 --len; 1662 n = (n + 1) % 16; 1663 } 1664 if (n == 0) { 1665 GHASH(ctx, ctx->Xn, mres); 1666 mres = 0; 1667 } else { 1668 ctx->mres = mres; 1669 return 0; 1670 } 1671# else 1672 while (n && len) { 1673 u8 c = *(in++); 1674 *(out++) = c ^ ctx->EKi.c[n]; 1675 ctx->Xi.c[n] ^= c; 1676 --len; 1677 n = (n + 1) % 16; 1678 } 1679 if (n == 0) { 1680 GCM_MUL(ctx); 1681 mres = 0; 1682 } else { 1683 ctx->mres = n; 1684 return 0; 1685 } 1686# endif 1687 } 1688# if defined(GHASH) 1689 if (len >= 16 && mres) { 1690 GHASH(ctx, ctx->Xn, mres); 1691 mres = 0; 1692 } 1693# if defined(GHASH_CHUNK) 1694 while (len >= GHASH_CHUNK) { 1695 GHASH(ctx, in, GHASH_CHUNK); 1696 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 1697 ctr += GHASH_CHUNK / 16; 1698 if (IS_LITTLE_ENDIAN) 1699# ifdef BSWAP4 1700 ctx->Yi.d[3] = BSWAP4(ctr); 1701# else 1702 PUTU32(ctx->Yi.c + 12, ctr); 1703# endif 1704 else 1705 ctx->Yi.d[3] = ctr; 1706 out += GHASH_CHUNK; 1707 in += GHASH_CHUNK; 1708 len -= GHASH_CHUNK; 1709 } 1710# endif 1711# endif 1712 if ((i = (len & (size_t)-16))) { 1713 size_t j = i / 16; 1714 1715# if defined(GHASH) 1716 GHASH(ctx, in, i); 1717# else 1718 while (j--) { 1719 size_t k; 1720 for (k = 0; k < 16; ++k) 1721 ctx->Xi.c[k] ^= in[k]; 1722 GCM_MUL(ctx); 1723 in += 16; 1724 } 1725 j = i / 16; 1726 in -= i; 1727# endif 1728 (*stream) (in, out, j, key, ctx->Yi.c); 1729 ctr += (unsigned int)j; 1730 if (IS_LITTLE_ENDIAN) 1731# ifdef BSWAP4 1732 ctx->Yi.d[3] = BSWAP4(ctr); 1733# else 1734 PUTU32(ctx->Yi.c + 12, ctr); 1735# endif 1736 else 1737 ctx->Yi.d[3] = ctr; 1738 out += i; 1739 in += i; 1740 len -= i; 1741 } 1742 if (len) { 1743 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key); 1744 ++ctr; 1745 if (IS_LITTLE_ENDIAN) 1746# ifdef BSWAP4 1747 ctx->Yi.d[3] = BSWAP4(ctr); 1748# else 1749 PUTU32(ctx->Yi.c + 12, ctr); 1750# endif 1751 else 1752 ctx->Yi.d[3] = ctr; 1753 while (len--) { 1754# if defined(GHASH) 1755 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; 1756# else 1757 u8 c = in[n]; 1758 ctx->Xi.c[mres++] ^= c; 1759 out[n] = c ^ ctx->EKi.c[n]; 1760# endif 1761 ++n; 1762 } 1763 } 1764 1765 ctx->mres = mres; 1766 return 0; 1767#endif 1768} 1769 1770int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, 1771 size_t len) 1772{ 1773 DECLARE_IS_ENDIAN; 1774 u64 alen = ctx->len.u[0] << 3; 1775 u64 clen = ctx->len.u[1] << 3; 1776#ifdef GCM_FUNCREF_4BIT 1777 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1778# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1779 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1780 const u8 *inp, size_t len) = ctx->ghash; 1781# endif 1782#endif 1783 1784#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1785 u128 bitlen; 1786 unsigned int mres = ctx->mres; 1787 1788 if (mres) { 1789 unsigned blocks = (mres + 15) & -16; 1790 1791 memset(ctx->Xn + mres, 0, blocks - mres); 1792 mres = blocks; 1793 if (mres == sizeof(ctx->Xn)) { 1794 GHASH(ctx, ctx->Xn, mres); 1795 mres = 0; 1796 } 1797 } else if (ctx->ares) { 1798 GCM_MUL(ctx); 1799 } 1800#else 1801 if (ctx->mres || ctx->ares) 1802 GCM_MUL(ctx); 1803#endif 1804 1805 if (IS_LITTLE_ENDIAN) { 1806#ifdef BSWAP8 1807 alen = BSWAP8(alen); 1808 clen = BSWAP8(clen); 1809#else 1810 u8 *p = ctx->len.c; 1811 1812 ctx->len.u[0] = alen; 1813 ctx->len.u[1] = clen; 1814 1815 alen = (u64)GETU32(p) << 32 | GETU32(p + 4); 1816 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); 1817#endif 1818 } 1819 1820#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1821 bitlen.hi = alen; 1822 bitlen.lo = clen; 1823 memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen)); 1824 mres += sizeof(bitlen); 1825 GHASH(ctx, ctx->Xn, mres); 1826#else 1827 ctx->Xi.u[0] ^= alen; 1828 ctx->Xi.u[1] ^= clen; 1829 GCM_MUL(ctx); 1830#endif 1831 1832 ctx->Xi.u[0] ^= ctx->EK0.u[0]; 1833 ctx->Xi.u[1] ^= ctx->EK0.u[1]; 1834 1835 if (tag && len <= sizeof(ctx->Xi)) 1836 return CRYPTO_memcmp(ctx->Xi.c, tag, len); 1837 else 1838 return -1; 1839} 1840 1841void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) 1842{ 1843 CRYPTO_gcm128_finish(ctx, NULL, 0); 1844 memcpy(tag, ctx->Xi.c, 1845 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c)); 1846} 1847 1848GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) 1849{ 1850 GCM128_CONTEXT *ret; 1851 1852 if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL) 1853 CRYPTO_gcm128_init(ret, key, block); 1854 1855 return ret; 1856} 1857 1858void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) 1859{ 1860 OPENSSL_clear_free(ctx, sizeof(*ctx)); 1861} 1862