1/* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in
13 *    the documentation and/or other materials provided with the
14 *    distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 *    software must display the following acknowledgment:
18 *    "This product includes software developed by the OpenSSL Project
19 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 *    endorse or promote products derived from this software without
23 *    prior written permission. For written permission, please contact
24 *    openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 *    nor may "OpenSSL" appear in their names without prior written
28 *    permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 *    acknowledgment:
32 *    "This product includes software developed by the OpenSSL Project
33 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
50#define OPENSSL_FIPSAPI
51
52#include <openssl/crypto.h>
53#include "modes_lcl.h"
54#include <string.h>
55
56#ifndef MODES_DEBUG
57# ifndef NDEBUG
58#  define NDEBUG
59# endif
60#endif
61#include <assert.h>
62
63#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64/* redefine, because alignment is ensured */
65# undef  GETU32
66# define GETU32(p)       BSWAP4(*(const u32 *)(p))
67# undef  PUTU32
68# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
69#endif
70
71#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
72#define REDUCE1BIT(V)   do { \
73        if (sizeof(size_t)==8) { \
74                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75                V.lo  = (V.hi<<63)|(V.lo>>1); \
76                V.hi  = (V.hi>>1 )^T; \
77        } \
78        else { \
79                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80                V.lo  = (V.hi<<63)|(V.lo>>1); \
81                V.hi  = (V.hi>>1 )^((u64)T<<32); \
82        } \
83} while(0)
84
85/*-
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
106 *
107 * - shorter setup time effectively improves overall timing for
108 *   handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 *   subsystem penalties (for example on Windows large enough free
111 *   results in VM working set trimming, meaning that consequent
112 *   malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 *   performance of other code paths (not necessarily even from same
115 *   thread in Hyper-Threading world);
116 *
117 * Value of 1 is not appropriate for performance reasons.
118 */
119#if     TABLE_BITS==8
120
121static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122{
123    int i, j;
124    u128 V;
125
126    Htable[0].hi = 0;
127    Htable[0].lo = 0;
128    V.hi = H[0];
129    V.lo = H[1];
130
131    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
132        REDUCE1BIT(V);
133        Htable[i] = V;
134    }
135
136    for (i = 2; i < 256; i <<= 1) {
137        u128 *Hi = Htable + i, H0 = *Hi;
138        for (j = 1; j < i; ++j) {
139            Hi[j].hi = H0.hi ^ Htable[j].hi;
140            Hi[j].lo = H0.lo ^ Htable[j].lo;
141        }
142    }
143}
144
145static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146{
147    u128 Z = { 0, 0 };
148    const u8 *xi = (const u8 *)Xi + 15;
149    size_t rem, n = *xi;
150    const union {
151        long one;
152        char little;
153    } is_endian = {
154        1
155    };
156    static const size_t rem_8bit[256] = {
157        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
158        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
159        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
160        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
161        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
162        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
163        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
164        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
165        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
166        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
167        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
168        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
169        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
170        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
171        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
172        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
173        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
174        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
175        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
176        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
177        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
178        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
179        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
180        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
181        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
182        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
183        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
184        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
185        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
186        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
187        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
188        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
189        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
190        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
191        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
192        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
193        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
194        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
195        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
196        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
197        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
198        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
199        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
200        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
201        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
202        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
203        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
204        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
205        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
206        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
207        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
208        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
209        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
210        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
211        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
212        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
213        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
214        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
215        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
216        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
217        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
218        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
219        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
220        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
221    };
222
223    while (1) {
224        Z.hi ^= Htable[n].hi;
225        Z.lo ^= Htable[n].lo;
226
227        if ((u8 *)Xi == xi)
228            break;
229
230        n = *(--xi);
231
232        rem = (size_t)Z.lo & 0xff;
233        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
234        Z.hi = (Z.hi >> 8);
235        if (sizeof(size_t) == 8)
236            Z.hi ^= rem_8bit[rem];
237        else
238            Z.hi ^= (u64)rem_8bit[rem] << 32;
239    }
240
241    if (is_endian.little) {
242# ifdef BSWAP8
243        Xi[0] = BSWAP8(Z.hi);
244        Xi[1] = BSWAP8(Z.lo);
245# else
246        u8 *p = (u8 *)Xi;
247        u32 v;
248        v = (u32)(Z.hi >> 32);
249        PUTU32(p, v);
250        v = (u32)(Z.hi);
251        PUTU32(p + 4, v);
252        v = (u32)(Z.lo >> 32);
253        PUTU32(p + 8, v);
254        v = (u32)(Z.lo);
255        PUTU32(p + 12, v);
256# endif
257    } else {
258        Xi[0] = Z.hi;
259        Xi[1] = Z.lo;
260    }
261}
262
263# define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
264
265#elif   TABLE_BITS==4
266
267static void gcm_init_4bit(u128 Htable[16], u64 H[2])
268{
269    u128 V;
270# if defined(OPENSSL_SMALL_FOOTPRINT)
271    int i;
272# endif
273
274    Htable[0].hi = 0;
275    Htable[0].lo = 0;
276    V.hi = H[0];
277    V.lo = H[1];
278
279# if defined(OPENSSL_SMALL_FOOTPRINT)
280    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
281        REDUCE1BIT(V);
282        Htable[i] = V;
283    }
284
285    for (i = 2; i < 16; i <<= 1) {
286        u128 *Hi = Htable + i;
287        int j;
288        for (V = *Hi, j = 1; j < i; ++j) {
289            Hi[j].hi = V.hi ^ Htable[j].hi;
290            Hi[j].lo = V.lo ^ Htable[j].lo;
291        }
292    }
293# else
294    Htable[8] = V;
295    REDUCE1BIT(V);
296    Htable[4] = V;
297    REDUCE1BIT(V);
298    Htable[2] = V;
299    REDUCE1BIT(V);
300    Htable[1] = V;
301    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
302    V = Htable[4];
303    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
304    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
305    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
306    V = Htable[8];
307    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
308    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
309    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
310    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
311    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
312    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
313    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
314# endif
315# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
316    /*
317     * ARM assembler expects specific dword order in Htable.
318     */
319    {
320        int j;
321        const union {
322            long one;
323            char little;
324        } is_endian = {
325            1
326        };
327
328        if (is_endian.little)
329            for (j = 0; j < 16; ++j) {
330                V = Htable[j];
331                Htable[j].hi = V.lo;
332                Htable[j].lo = V.hi;
333        } else
334            for (j = 0; j < 16; ++j) {
335                V = Htable[j];
336                Htable[j].hi = V.lo << 32 | V.lo >> 32;
337                Htable[j].lo = V.hi << 32 | V.hi >> 32;
338            }
339    }
340# endif
341}
342
343# ifndef GHASH_ASM
344static const size_t rem_4bit[16] = {
345    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
346    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
347    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
348    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
349};
350
351static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
352{
353    u128 Z;
354    int cnt = 15;
355    size_t rem, nlo, nhi;
356    const union {
357        long one;
358        char little;
359    } is_endian = {
360        1
361    };
362
363    nlo = ((const u8 *)Xi)[15];
364    nhi = nlo >> 4;
365    nlo &= 0xf;
366
367    Z.hi = Htable[nlo].hi;
368    Z.lo = Htable[nlo].lo;
369
370    while (1) {
371        rem = (size_t)Z.lo & 0xf;
372        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
373        Z.hi = (Z.hi >> 4);
374        if (sizeof(size_t) == 8)
375            Z.hi ^= rem_4bit[rem];
376        else
377            Z.hi ^= (u64)rem_4bit[rem] << 32;
378
379        Z.hi ^= Htable[nhi].hi;
380        Z.lo ^= Htable[nhi].lo;
381
382        if (--cnt < 0)
383            break;
384
385        nlo = ((const u8 *)Xi)[cnt];
386        nhi = nlo >> 4;
387        nlo &= 0xf;
388
389        rem = (size_t)Z.lo & 0xf;
390        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
391        Z.hi = (Z.hi >> 4);
392        if (sizeof(size_t) == 8)
393            Z.hi ^= rem_4bit[rem];
394        else
395            Z.hi ^= (u64)rem_4bit[rem] << 32;
396
397        Z.hi ^= Htable[nlo].hi;
398        Z.lo ^= Htable[nlo].lo;
399    }
400
401    if (is_endian.little) {
402#  ifdef BSWAP8
403        Xi[0] = BSWAP8(Z.hi);
404        Xi[1] = BSWAP8(Z.lo);
405#  else
406        u8 *p = (u8 *)Xi;
407        u32 v;
408        v = (u32)(Z.hi >> 32);
409        PUTU32(p, v);
410        v = (u32)(Z.hi);
411        PUTU32(p + 4, v);
412        v = (u32)(Z.lo >> 32);
413        PUTU32(p + 8, v);
414        v = (u32)(Z.lo);
415        PUTU32(p + 12, v);
416#  endif
417    } else {
418        Xi[0] = Z.hi;
419        Xi[1] = Z.lo;
420    }
421}
422
423#  if !defined(OPENSSL_SMALL_FOOTPRINT)
424/*
425 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
426 * details... Compiler-generated code doesn't seem to give any
427 * performance improvement, at least not on x86[_64]. It's here
428 * mostly as reference and a placeholder for possible future
429 * non-trivial optimization[s]...
430 */
431static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
432                           const u8 *inp, size_t len)
433{
434    u128 Z;
435    int cnt;
436    size_t rem, nlo, nhi;
437    const union {
438        long one;
439        char little;
440    } is_endian = {
441        1
442    };
443
444#   if 1
445    do {
446        cnt = 15;
447        nlo = ((const u8 *)Xi)[15];
448        nlo ^= inp[15];
449        nhi = nlo >> 4;
450        nlo &= 0xf;
451
452        Z.hi = Htable[nlo].hi;
453        Z.lo = Htable[nlo].lo;
454
455        while (1) {
456            rem = (size_t)Z.lo & 0xf;
457            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
458            Z.hi = (Z.hi >> 4);
459            if (sizeof(size_t) == 8)
460                Z.hi ^= rem_4bit[rem];
461            else
462                Z.hi ^= (u64)rem_4bit[rem] << 32;
463
464            Z.hi ^= Htable[nhi].hi;
465            Z.lo ^= Htable[nhi].lo;
466
467            if (--cnt < 0)
468                break;
469
470            nlo = ((const u8 *)Xi)[cnt];
471            nlo ^= inp[cnt];
472            nhi = nlo >> 4;
473            nlo &= 0xf;
474
475            rem = (size_t)Z.lo & 0xf;
476            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
477            Z.hi = (Z.hi >> 4);
478            if (sizeof(size_t) == 8)
479                Z.hi ^= rem_4bit[rem];
480            else
481                Z.hi ^= (u64)rem_4bit[rem] << 32;
482
483            Z.hi ^= Htable[nlo].hi;
484            Z.lo ^= Htable[nlo].lo;
485        }
486#   else
487    /*
488     * Extra 256+16 bytes per-key plus 512 bytes shared tables
489     * [should] give ~50% improvement... One could have PACK()-ed
490     * the rem_8bit even here, but the priority is to minimize
491     * cache footprint...
492     */
493    u128 Hshr4[16];             /* Htable shifted right by 4 bits */
494    u8 Hshl4[16];               /* Htable shifted left by 4 bits */
495    static const unsigned short rem_8bit[256] = {
496        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
497        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
498        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
499        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
500        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
501        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
502        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
503        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
504        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
505        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
506        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
507        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
508        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
509        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
510        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
511        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
512        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
513        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
514        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
515        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
516        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
517        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
518        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
519        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
520        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
521        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
522        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
523        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
524        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
525        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
526        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
527        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
528    };
529    /*
530     * This pre-processing phase slows down procedure by approximately
531     * same time as it makes each loop spin faster. In other words
532     * single block performance is approximately same as straightforward
533     * "4-bit" implementation, and then it goes only faster...
534     */
535    for (cnt = 0; cnt < 16; ++cnt) {
536        Z.hi = Htable[cnt].hi;
537        Z.lo = Htable[cnt].lo;
538        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
539        Hshr4[cnt].hi = (Z.hi >> 4);
540        Hshl4[cnt] = (u8)(Z.lo << 4);
541    }
542
543    do {
544        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
545            nlo = ((const u8 *)Xi)[cnt];
546            nlo ^= inp[cnt];
547            nhi = nlo >> 4;
548            nlo &= 0xf;
549
550            Z.hi ^= Htable[nlo].hi;
551            Z.lo ^= Htable[nlo].lo;
552
553            rem = (size_t)Z.lo & 0xff;
554
555            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
556            Z.hi = (Z.hi >> 8);
557
558            Z.hi ^= Hshr4[nhi].hi;
559            Z.lo ^= Hshr4[nhi].lo;
560            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
561        }
562
563        nlo = ((const u8 *)Xi)[0];
564        nlo ^= inp[0];
565        nhi = nlo >> 4;
566        nlo &= 0xf;
567
568        Z.hi ^= Htable[nlo].hi;
569        Z.lo ^= Htable[nlo].lo;
570
571        rem = (size_t)Z.lo & 0xf;
572
573        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
574        Z.hi = (Z.hi >> 4);
575
576        Z.hi ^= Htable[nhi].hi;
577        Z.lo ^= Htable[nhi].lo;
578        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
579#   endif
580
581        if (is_endian.little) {
582#   ifdef BSWAP8
583            Xi[0] = BSWAP8(Z.hi);
584            Xi[1] = BSWAP8(Z.lo);
585#   else
586            u8 *p = (u8 *)Xi;
587            u32 v;
588            v = (u32)(Z.hi >> 32);
589            PUTU32(p, v);
590            v = (u32)(Z.hi);
591            PUTU32(p + 4, v);
592            v = (u32)(Z.lo >> 32);
593            PUTU32(p + 8, v);
594            v = (u32)(Z.lo);
595            PUTU32(p + 12, v);
596#   endif
597        } else {
598            Xi[0] = Z.hi;
599            Xi[1] = Z.lo;
600        }
601    } while (inp += 16, len -= 16);
602}
603#  endif
604# else
605void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
606void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
607                    size_t len);
608# endif
609
610# define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
611# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
612#  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
613/*
614 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
615 * effect. In other words idea is to hash data while it's still in L1 cache
616 * after encryption pass...
617 */
618#  define GHASH_CHUNK       (3*1024)
619# endif
620
621#else                           /* TABLE_BITS */
622
623static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
624{
625    u128 V, Z = { 0, 0 };
626    long X;
627    int i, j;
628    const long *xi = (const long *)Xi;
629    const union {
630        long one;
631        char little;
632    } is_endian = {
633        1
634    };
635
636    V.hi = H[0];                /* H is in host byte order, no byte swapping */
637    V.lo = H[1];
638
639    for (j = 0; j < 16 / sizeof(long); ++j) {
640        if (is_endian.little) {
641            if (sizeof(long) == 8) {
642# ifdef BSWAP8
643                X = (long)(BSWAP8(xi[j]));
644# else
645                const u8 *p = (const u8 *)(xi + j);
646                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
647# endif
648            } else {
649                const u8 *p = (const u8 *)(xi + j);
650                X = (long)GETU32(p);
651            }
652        } else
653            X = xi[j];
654
655        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
656            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
657            Z.hi ^= V.hi & M;
658            Z.lo ^= V.lo & M;
659
660            REDUCE1BIT(V);
661        }
662    }
663
664    if (is_endian.little) {
665# ifdef BSWAP8
666        Xi[0] = BSWAP8(Z.hi);
667        Xi[1] = BSWAP8(Z.lo);
668# else
669        u8 *p = (u8 *)Xi;
670        u32 v;
671        v = (u32)(Z.hi >> 32);
672        PUTU32(p, v);
673        v = (u32)(Z.hi);
674        PUTU32(p + 4, v);
675        v = (u32)(Z.lo >> 32);
676        PUTU32(p + 8, v);
677        v = (u32)(Z.lo);
678        PUTU32(p + 12, v);
679# endif
680    } else {
681        Xi[0] = Z.hi;
682        Xi[1] = Z.lo;
683    }
684}
685
686# define GCM_MUL(ctx,Xi)   gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
687
688#endif
689
690#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
691# if    !defined(I386_ONLY) && \
692        (defined(__i386)        || defined(__i386__)    || \
693         defined(__x86_64)      || defined(__x86_64__)  || \
694         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
695#  define GHASH_ASM_X86_OR_64
696#  define GCM_FUNCREF_4BIT
697extern unsigned int OPENSSL_ia32cap_P[];
698
699void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
700void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
701void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
702                     size_t len);
703
704#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
705#   define gcm_init_avx   gcm_init_clmul
706#   define gcm_gmult_avx  gcm_gmult_clmul
707#   define gcm_ghash_avx  gcm_ghash_clmul
708#  else
709void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
710void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
711void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
712                   size_t len);
713#  endif
714
715#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
716#   define GHASH_ASM_X86
717void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
718void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
719                        size_t len);
720
721void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
722void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
723                        size_t len);
724#  endif
725# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
726#  include "arm_arch.h"
727#  if __ARM_MAX_ARCH__>=7
728#   define GHASH_ASM_ARM
729#   define GCM_FUNCREF_4BIT
730#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
731#   if defined(__arm__) || defined(__arm)
732#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
733#   endif
734void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
735void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
736void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
737                    size_t len);
738void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
739void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
740void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
741                  size_t len);
742#  endif
743# elif defined(__sparc__) || defined(__sparc)
744#  include "sparc_arch.h"
745#  define GHASH_ASM_SPARC
746#  define GCM_FUNCREF_4BIT
747extern unsigned int OPENSSL_sparcv9cap_P[];
748void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
749void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
750void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
751                    size_t len);
752# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
753#  include "ppc_arch.h"
754#  define GHASH_ASM_PPC
755#  define GCM_FUNCREF_4BIT
756void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
757void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
758void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
759                  size_t len);
760# endif
761#endif
762
763#ifdef GCM_FUNCREF_4BIT
764# undef  GCM_MUL
765# define GCM_MUL(ctx,Xi)        (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
766# ifdef GHASH
767#  undef  GHASH
768#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
769# endif
770#endif
771
772void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
773{
774    const union {
775        long one;
776        char little;
777    } is_endian = {
778        1
779    };
780
781    memset(ctx, 0, sizeof(*ctx));
782    ctx->block = block;
783    ctx->key = key;
784
785    (*block) (ctx->H.c, ctx->H.c, key);
786
787    if (is_endian.little) {
788        /* H is stored in host byte order */
789#ifdef BSWAP8
790        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
791        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
792#else
793        u8 *p = ctx->H.c;
794        u64 hi, lo;
795        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
796        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
797        ctx->H.u[0] = hi;
798        ctx->H.u[1] = lo;
799#endif
800    }
801#if     TABLE_BITS==8
802    gcm_init_8bit(ctx->Htable, ctx->H.u);
803#elif   TABLE_BITS==4
804# if    defined(GHASH_ASM_X86_OR_64)
805#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
806    if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
807        OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
808        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
809            gcm_init_avx(ctx->Htable, ctx->H.u);
810            ctx->gmult = gcm_gmult_avx;
811            ctx->ghash = gcm_ghash_avx;
812        } else {
813            gcm_init_clmul(ctx->Htable, ctx->H.u);
814            ctx->gmult = gcm_gmult_clmul;
815            ctx->ghash = gcm_ghash_clmul;
816        }
817        return;
818    }
819#  endif
820    gcm_init_4bit(ctx->Htable, ctx->H.u);
821#  if   defined(GHASH_ASM_X86)  /* x86 only */
822#   if  defined(OPENSSL_IA32_SSE2)
823    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
824#   else
825    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
826#   endif
827        ctx->gmult = gcm_gmult_4bit_mmx;
828        ctx->ghash = gcm_ghash_4bit_mmx;
829    } else {
830        ctx->gmult = gcm_gmult_4bit_x86;
831        ctx->ghash = gcm_ghash_4bit_x86;
832    }
833#  else
834    ctx->gmult = gcm_gmult_4bit;
835    ctx->ghash = gcm_ghash_4bit;
836#  endif
837# elif  defined(GHASH_ASM_ARM)
838#  ifdef PMULL_CAPABLE
839    if (PMULL_CAPABLE) {
840        gcm_init_v8(ctx->Htable, ctx->H.u);
841        ctx->gmult = gcm_gmult_v8;
842        ctx->ghash = gcm_ghash_v8;
843    } else
844#  endif
845#  ifdef NEON_CAPABLE
846    if (NEON_CAPABLE) {
847        gcm_init_neon(ctx->Htable, ctx->H.u);
848        ctx->gmult = gcm_gmult_neon;
849        ctx->ghash = gcm_ghash_neon;
850    } else
851#  endif
852    {
853        gcm_init_4bit(ctx->Htable, ctx->H.u);
854        ctx->gmult = gcm_gmult_4bit;
855#  if defined(GHASH)
856        ctx->ghash = gcm_ghash_4bit;
857#  else
858        ctx->ghash = NULL;
859#  endif
860    }
861# elif  defined(GHASH_ASM_SPARC)
862    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
863        gcm_init_vis3(ctx->Htable, ctx->H.u);
864        ctx->gmult = gcm_gmult_vis3;
865        ctx->ghash = gcm_ghash_vis3;
866    } else {
867        gcm_init_4bit(ctx->Htable, ctx->H.u);
868        ctx->gmult = gcm_gmult_4bit;
869        ctx->ghash = gcm_ghash_4bit;
870    }
871# elif  defined(GHASH_ASM_PPC)
872    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
873        gcm_init_p8(ctx->Htable, ctx->H.u);
874        ctx->gmult = gcm_gmult_p8;
875        ctx->ghash = gcm_ghash_p8;
876    } else {
877        gcm_init_4bit(ctx->Htable, ctx->H.u);
878        ctx->gmult = gcm_gmult_4bit;
879#  if defined(GHASH)
880        ctx->ghash = gcm_ghash_4bit;
881#  else
882        ctx->ghash = NULL;
883#  endif
884    }
885# else
886    gcm_init_4bit(ctx->Htable, ctx->H.u);
887# endif
888#endif
889}
890
891void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
892                         size_t len)
893{
894    const union {
895        long one;
896        char little;
897    } is_endian = {
898        1
899    };
900    unsigned int ctr;
901#ifdef GCM_FUNCREF_4BIT
902    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
903#endif
904
905    ctx->Yi.u[0] = 0;
906    ctx->Yi.u[1] = 0;
907    ctx->Xi.u[0] = 0;
908    ctx->Xi.u[1] = 0;
909    ctx->len.u[0] = 0;          /* AAD length */
910    ctx->len.u[1] = 0;          /* message length */
911    ctx->ares = 0;
912    ctx->mres = 0;
913
914    if (len == 12) {
915        memcpy(ctx->Yi.c, iv, 12);
916        ctx->Yi.c[15] = 1;
917        ctr = 1;
918    } else {
919        size_t i;
920        u64 len0 = len;
921
922        while (len >= 16) {
923            for (i = 0; i < 16; ++i)
924                ctx->Yi.c[i] ^= iv[i];
925            GCM_MUL(ctx, Yi);
926            iv += 16;
927            len -= 16;
928        }
929        if (len) {
930            for (i = 0; i < len; ++i)
931                ctx->Yi.c[i] ^= iv[i];
932            GCM_MUL(ctx, Yi);
933        }
934        len0 <<= 3;
935        if (is_endian.little) {
936#ifdef BSWAP8
937            ctx->Yi.u[1] ^= BSWAP8(len0);
938#else
939            ctx->Yi.c[8] ^= (u8)(len0 >> 56);
940            ctx->Yi.c[9] ^= (u8)(len0 >> 48);
941            ctx->Yi.c[10] ^= (u8)(len0 >> 40);
942            ctx->Yi.c[11] ^= (u8)(len0 >> 32);
943            ctx->Yi.c[12] ^= (u8)(len0 >> 24);
944            ctx->Yi.c[13] ^= (u8)(len0 >> 16);
945            ctx->Yi.c[14] ^= (u8)(len0 >> 8);
946            ctx->Yi.c[15] ^= (u8)(len0);
947#endif
948        } else
949            ctx->Yi.u[1] ^= len0;
950
951        GCM_MUL(ctx, Yi);
952
953        if (is_endian.little)
954#ifdef BSWAP4
955            ctr = BSWAP4(ctx->Yi.d[3]);
956#else
957            ctr = GETU32(ctx->Yi.c + 12);
958#endif
959        else
960            ctr = ctx->Yi.d[3];
961    }
962
963    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
964    ++ctr;
965    if (is_endian.little)
966#ifdef BSWAP4
967        ctx->Yi.d[3] = BSWAP4(ctr);
968#else
969        PUTU32(ctx->Yi.c + 12, ctr);
970#endif
971    else
972        ctx->Yi.d[3] = ctr;
973}
974
975int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
976                      size_t len)
977{
978    size_t i;
979    unsigned int n;
980    u64 alen = ctx->len.u[0];
981#ifdef GCM_FUNCREF_4BIT
982    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
983# ifdef GHASH
984    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
985                         const u8 *inp, size_t len) = ctx->ghash;
986# endif
987#endif
988
989    if (ctx->len.u[1])
990        return -2;
991
992    alen += len;
993    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
994        return -1;
995    ctx->len.u[0] = alen;
996
997    n = ctx->ares;
998    if (n) {
999        while (n && len) {
1000            ctx->Xi.c[n] ^= *(aad++);
1001            --len;
1002            n = (n + 1) % 16;
1003        }
1004        if (n == 0)
1005            GCM_MUL(ctx, Xi);
1006        else {
1007            ctx->ares = n;
1008            return 0;
1009        }
1010    }
1011#ifdef GHASH
1012    if ((i = (len & (size_t)-16))) {
1013        GHASH(ctx, aad, i);
1014        aad += i;
1015        len -= i;
1016    }
1017#else
1018    while (len >= 16) {
1019        for (i = 0; i < 16; ++i)
1020            ctx->Xi.c[i] ^= aad[i];
1021        GCM_MUL(ctx, Xi);
1022        aad += 16;
1023        len -= 16;
1024    }
1025#endif
1026    if (len) {
1027        n = (unsigned int)len;
1028        for (i = 0; i < len; ++i)
1029            ctx->Xi.c[i] ^= aad[i];
1030    }
1031
1032    ctx->ares = n;
1033    return 0;
1034}
1035
1036int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
1037                          const unsigned char *in, unsigned char *out,
1038                          size_t len)
1039{
1040    const union {
1041        long one;
1042        char little;
1043    } is_endian = {
1044        1
1045    };
1046    unsigned int n, ctr;
1047    size_t i;
1048    u64 mlen = ctx->len.u[1];
1049    block128_f block = ctx->block;
1050    void *key = ctx->key;
1051#ifdef GCM_FUNCREF_4BIT
1052    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1053# ifdef GHASH
1054    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1055                         const u8 *inp, size_t len) = ctx->ghash;
1056# endif
1057#endif
1058
1059#if 0
1060    n = (unsigned int)mlen % 16; /* alternative to ctx->mres */
1061#endif
1062    mlen += len;
1063    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1064        return -1;
1065    ctx->len.u[1] = mlen;
1066
1067    if (ctx->ares) {
1068        /* First call to encrypt finalizes GHASH(AAD) */
1069        GCM_MUL(ctx, Xi);
1070        ctx->ares = 0;
1071    }
1072
1073    if (is_endian.little)
1074#ifdef BSWAP4
1075        ctr = BSWAP4(ctx->Yi.d[3]);
1076#else
1077        ctr = GETU32(ctx->Yi.c + 12);
1078#endif
1079    else
1080        ctr = ctx->Yi.d[3];
1081
1082    n = ctx->mres;
1083#if !defined(OPENSSL_SMALL_FOOTPRINT)
1084    if (16 % sizeof(size_t) == 0) { /* always true actually */
1085        do {
1086            if (n) {
1087                while (n && len) {
1088                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1089                    --len;
1090                    n = (n + 1) % 16;
1091                }
1092                if (n == 0)
1093                    GCM_MUL(ctx, Xi);
1094                else {
1095                    ctx->mres = n;
1096                    return 0;
1097                }
1098            }
1099# if defined(STRICT_ALIGNMENT)
1100            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1101                break;
1102# endif
1103# if defined(GHASH) && defined(GHASH_CHUNK)
1104            while (len >= GHASH_CHUNK) {
1105                size_t j = GHASH_CHUNK;
1106
1107                while (j) {
1108                    size_t *out_t = (size_t *)out;
1109                    const size_t *in_t = (const size_t *)in;
1110
1111                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1112                    ++ctr;
1113                    if (is_endian.little)
1114#  ifdef BSWAP4
1115                        ctx->Yi.d[3] = BSWAP4(ctr);
1116#  else
1117                        PUTU32(ctx->Yi.c + 12, ctr);
1118#  endif
1119                    else
1120                        ctx->Yi.d[3] = ctr;
1121                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1122                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1123                    out += 16;
1124                    in += 16;
1125                    j -= 16;
1126                }
1127                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1128                len -= GHASH_CHUNK;
1129            }
1130            if ((i = (len & (size_t)-16))) {
1131                size_t j = i;
1132
1133                while (len >= 16) {
1134                    size_t *out_t = (size_t *)out;
1135                    const size_t *in_t = (const size_t *)in;
1136
1137                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1138                    ++ctr;
1139                    if (is_endian.little)
1140#  ifdef BSWAP4
1141                        ctx->Yi.d[3] = BSWAP4(ctr);
1142#  else
1143                        PUTU32(ctx->Yi.c + 12, ctr);
1144#  endif
1145                    else
1146                        ctx->Yi.d[3] = ctr;
1147                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1148                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1149                    out += 16;
1150                    in += 16;
1151                    len -= 16;
1152                }
1153                GHASH(ctx, out - j, j);
1154            }
1155# else
1156            while (len >= 16) {
1157                size_t *out_t = (size_t *)out;
1158                const size_t *in_t = (const size_t *)in;
1159
1160                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1161                ++ctr;
1162                if (is_endian.little)
1163#  ifdef BSWAP4
1164                    ctx->Yi.d[3] = BSWAP4(ctr);
1165#  else
1166                    PUTU32(ctx->Yi.c + 12, ctr);
1167#  endif
1168                else
1169                    ctx->Yi.d[3] = ctr;
1170                for (i = 0; i < 16 / sizeof(size_t); ++i)
1171                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1172                GCM_MUL(ctx, Xi);
1173                out += 16;
1174                in += 16;
1175                len -= 16;
1176            }
1177# endif
1178            if (len) {
1179                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1180                ++ctr;
1181                if (is_endian.little)
1182# ifdef BSWAP4
1183                    ctx->Yi.d[3] = BSWAP4(ctr);
1184# else
1185                    PUTU32(ctx->Yi.c + 12, ctr);
1186# endif
1187                else
1188                    ctx->Yi.d[3] = ctr;
1189                while (len--) {
1190                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1191                    ++n;
1192                }
1193            }
1194
1195            ctx->mres = n;
1196            return 0;
1197        } while (0);
1198    }
1199#endif
1200    for (i = 0; i < len; ++i) {
1201        if (n == 0) {
1202            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1203            ++ctr;
1204            if (is_endian.little)
1205#ifdef BSWAP4
1206                ctx->Yi.d[3] = BSWAP4(ctr);
1207#else
1208                PUTU32(ctx->Yi.c + 12, ctr);
1209#endif
1210            else
1211                ctx->Yi.d[3] = ctr;
1212        }
1213        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1214        n = (n + 1) % 16;
1215        if (n == 0)
1216            GCM_MUL(ctx, Xi);
1217    }
1218
1219    ctx->mres = n;
1220    return 0;
1221}
1222
1223int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1224                          const unsigned char *in, unsigned char *out,
1225                          size_t len)
1226{
1227    const union {
1228        long one;
1229        char little;
1230    } is_endian = {
1231        1
1232    };
1233    unsigned int n, ctr;
1234    size_t i;
1235    u64 mlen = ctx->len.u[1];
1236    block128_f block = ctx->block;
1237    void *key = ctx->key;
1238#ifdef GCM_FUNCREF_4BIT
1239    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1240# ifdef GHASH
1241    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1242                         const u8 *inp, size_t len) = ctx->ghash;
1243# endif
1244#endif
1245
1246    mlen += len;
1247    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1248        return -1;
1249    ctx->len.u[1] = mlen;
1250
1251    if (ctx->ares) {
1252        /* First call to decrypt finalizes GHASH(AAD) */
1253        GCM_MUL(ctx, Xi);
1254        ctx->ares = 0;
1255    }
1256
1257    if (is_endian.little)
1258#ifdef BSWAP4
1259        ctr = BSWAP4(ctx->Yi.d[3]);
1260#else
1261        ctr = GETU32(ctx->Yi.c + 12);
1262#endif
1263    else
1264        ctr = ctx->Yi.d[3];
1265
1266    n = ctx->mres;
1267#if !defined(OPENSSL_SMALL_FOOTPRINT)
1268    if (16 % sizeof(size_t) == 0) { /* always true actually */
1269        do {
1270            if (n) {
1271                while (n && len) {
1272                    u8 c = *(in++);
1273                    *(out++) = c ^ ctx->EKi.c[n];
1274                    ctx->Xi.c[n] ^= c;
1275                    --len;
1276                    n = (n + 1) % 16;
1277                }
1278                if (n == 0)
1279                    GCM_MUL(ctx, Xi);
1280                else {
1281                    ctx->mres = n;
1282                    return 0;
1283                }
1284            }
1285# if defined(STRICT_ALIGNMENT)
1286            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1287                break;
1288# endif
1289# if defined(GHASH) && defined(GHASH_CHUNK)
1290            while (len >= GHASH_CHUNK) {
1291                size_t j = GHASH_CHUNK;
1292
1293                GHASH(ctx, in, GHASH_CHUNK);
1294                while (j) {
1295                    size_t *out_t = (size_t *)out;
1296                    const size_t *in_t = (const size_t *)in;
1297
1298                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1299                    ++ctr;
1300                    if (is_endian.little)
1301#  ifdef BSWAP4
1302                        ctx->Yi.d[3] = BSWAP4(ctr);
1303#  else
1304                        PUTU32(ctx->Yi.c + 12, ctr);
1305#  endif
1306                    else
1307                        ctx->Yi.d[3] = ctr;
1308                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1309                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1310                    out += 16;
1311                    in += 16;
1312                    j -= 16;
1313                }
1314                len -= GHASH_CHUNK;
1315            }
1316            if ((i = (len & (size_t)-16))) {
1317                GHASH(ctx, in, i);
1318                while (len >= 16) {
1319                    size_t *out_t = (size_t *)out;
1320                    const size_t *in_t = (const size_t *)in;
1321
1322                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1323                    ++ctr;
1324                    if (is_endian.little)
1325#  ifdef BSWAP4
1326                        ctx->Yi.d[3] = BSWAP4(ctr);
1327#  else
1328                        PUTU32(ctx->Yi.c + 12, ctr);
1329#  endif
1330                    else
1331                        ctx->Yi.d[3] = ctr;
1332                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1333                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1334                    out += 16;
1335                    in += 16;
1336                    len -= 16;
1337                }
1338            }
1339# else
1340            while (len >= 16) {
1341                size_t *out_t = (size_t *)out;
1342                const size_t *in_t = (const size_t *)in;
1343
1344                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1345                ++ctr;
1346                if (is_endian.little)
1347#  ifdef BSWAP4
1348                    ctx->Yi.d[3] = BSWAP4(ctr);
1349#  else
1350                    PUTU32(ctx->Yi.c + 12, ctr);
1351#  endif
1352                else
1353                    ctx->Yi.d[3] = ctr;
1354                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1355                    size_t c = in[i];
1356                    out[i] = c ^ ctx->EKi.t[i];
1357                    ctx->Xi.t[i] ^= c;
1358                }
1359                GCM_MUL(ctx, Xi);
1360                out += 16;
1361                in += 16;
1362                len -= 16;
1363            }
1364# endif
1365            if (len) {
1366                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1367                ++ctr;
1368                if (is_endian.little)
1369# ifdef BSWAP4
1370                    ctx->Yi.d[3] = BSWAP4(ctr);
1371# else
1372                    PUTU32(ctx->Yi.c + 12, ctr);
1373# endif
1374                else
1375                    ctx->Yi.d[3] = ctr;
1376                while (len--) {
1377                    u8 c = in[n];
1378                    ctx->Xi.c[n] ^= c;
1379                    out[n] = c ^ ctx->EKi.c[n];
1380                    ++n;
1381                }
1382            }
1383
1384            ctx->mres = n;
1385            return 0;
1386        } while (0);
1387    }
1388#endif
1389    for (i = 0; i < len; ++i) {
1390        u8 c;
1391        if (n == 0) {
1392            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1393            ++ctr;
1394            if (is_endian.little)
1395#ifdef BSWAP4
1396                ctx->Yi.d[3] = BSWAP4(ctr);
1397#else
1398                PUTU32(ctx->Yi.c + 12, ctr);
1399#endif
1400            else
1401                ctx->Yi.d[3] = ctr;
1402        }
1403        c = in[i];
1404        out[i] = c ^ ctx->EKi.c[n];
1405        ctx->Xi.c[n] ^= c;
1406        n = (n + 1) % 16;
1407        if (n == 0)
1408            GCM_MUL(ctx, Xi);
1409    }
1410
1411    ctx->mres = n;
1412    return 0;
1413}
1414
1415int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1416                                const unsigned char *in, unsigned char *out,
1417                                size_t len, ctr128_f stream)
1418{
1419    const union {
1420        long one;
1421        char little;
1422    } is_endian = {
1423        1
1424    };
1425    unsigned int n, ctr;
1426    size_t i;
1427    u64 mlen = ctx->len.u[1];
1428    void *key = ctx->key;
1429#ifdef GCM_FUNCREF_4BIT
1430    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1431# ifdef GHASH
1432    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1433                         const u8 *inp, size_t len) = ctx->ghash;
1434# endif
1435#endif
1436
1437    mlen += len;
1438    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1439        return -1;
1440    ctx->len.u[1] = mlen;
1441
1442    if (ctx->ares) {
1443        /* First call to encrypt finalizes GHASH(AAD) */
1444        GCM_MUL(ctx, Xi);
1445        ctx->ares = 0;
1446    }
1447
1448    if (is_endian.little)
1449#ifdef BSWAP4
1450        ctr = BSWAP4(ctx->Yi.d[3]);
1451#else
1452        ctr = GETU32(ctx->Yi.c + 12);
1453#endif
1454    else
1455        ctr = ctx->Yi.d[3];
1456
1457    n = ctx->mres;
1458    if (n) {
1459        while (n && len) {
1460            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1461            --len;
1462            n = (n + 1) % 16;
1463        }
1464        if (n == 0)
1465            GCM_MUL(ctx, Xi);
1466        else {
1467            ctx->mres = n;
1468            return 0;
1469        }
1470    }
1471#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1472    while (len >= GHASH_CHUNK) {
1473        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1474        ctr += GHASH_CHUNK / 16;
1475        if (is_endian.little)
1476# ifdef BSWAP4
1477            ctx->Yi.d[3] = BSWAP4(ctr);
1478# else
1479            PUTU32(ctx->Yi.c + 12, ctr);
1480# endif
1481        else
1482            ctx->Yi.d[3] = ctr;
1483        GHASH(ctx, out, GHASH_CHUNK);
1484        out += GHASH_CHUNK;
1485        in += GHASH_CHUNK;
1486        len -= GHASH_CHUNK;
1487    }
1488#endif
1489    if ((i = (len & (size_t)-16))) {
1490        size_t j = i / 16;
1491
1492        (*stream) (in, out, j, key, ctx->Yi.c);
1493        ctr += (unsigned int)j;
1494        if (is_endian.little)
1495#ifdef BSWAP4
1496            ctx->Yi.d[3] = BSWAP4(ctr);
1497#else
1498            PUTU32(ctx->Yi.c + 12, ctr);
1499#endif
1500        else
1501            ctx->Yi.d[3] = ctr;
1502        in += i;
1503        len -= i;
1504#if defined(GHASH)
1505        GHASH(ctx, out, i);
1506        out += i;
1507#else
1508        while (j--) {
1509            for (i = 0; i < 16; ++i)
1510                ctx->Xi.c[i] ^= out[i];
1511            GCM_MUL(ctx, Xi);
1512            out += 16;
1513        }
1514#endif
1515    }
1516    if (len) {
1517        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1518        ++ctr;
1519        if (is_endian.little)
1520#ifdef BSWAP4
1521            ctx->Yi.d[3] = BSWAP4(ctr);
1522#else
1523            PUTU32(ctx->Yi.c + 12, ctr);
1524#endif
1525        else
1526            ctx->Yi.d[3] = ctr;
1527        while (len--) {
1528            ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1529            ++n;
1530        }
1531    }
1532
1533    ctx->mres = n;
1534    return 0;
1535}
1536
1537int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1538                                const unsigned char *in, unsigned char *out,
1539                                size_t len, ctr128_f stream)
1540{
1541    const union {
1542        long one;
1543        char little;
1544    } is_endian = {
1545        1
1546    };
1547    unsigned int n, ctr;
1548    size_t i;
1549    u64 mlen = ctx->len.u[1];
1550    void *key = ctx->key;
1551#ifdef GCM_FUNCREF_4BIT
1552    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1553# ifdef GHASH
1554    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1555                         const u8 *inp, size_t len) = ctx->ghash;
1556# endif
1557#endif
1558
1559    mlen += len;
1560    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1561        return -1;
1562    ctx->len.u[1] = mlen;
1563
1564    if (ctx->ares) {
1565        /* First call to decrypt finalizes GHASH(AAD) */
1566        GCM_MUL(ctx, Xi);
1567        ctx->ares = 0;
1568    }
1569
1570    if (is_endian.little)
1571#ifdef BSWAP4
1572        ctr = BSWAP4(ctx->Yi.d[3]);
1573#else
1574        ctr = GETU32(ctx->Yi.c + 12);
1575#endif
1576    else
1577        ctr = ctx->Yi.d[3];
1578
1579    n = ctx->mres;
1580    if (n) {
1581        while (n && len) {
1582            u8 c = *(in++);
1583            *(out++) = c ^ ctx->EKi.c[n];
1584            ctx->Xi.c[n] ^= c;
1585            --len;
1586            n = (n + 1) % 16;
1587        }
1588        if (n == 0)
1589            GCM_MUL(ctx, Xi);
1590        else {
1591            ctx->mres = n;
1592            return 0;
1593        }
1594    }
1595#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1596    while (len >= GHASH_CHUNK) {
1597        GHASH(ctx, in, GHASH_CHUNK);
1598        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1599        ctr += GHASH_CHUNK / 16;
1600        if (is_endian.little)
1601# ifdef BSWAP4
1602            ctx->Yi.d[3] = BSWAP4(ctr);
1603# else
1604            PUTU32(ctx->Yi.c + 12, ctr);
1605# endif
1606        else
1607            ctx->Yi.d[3] = ctr;
1608        out += GHASH_CHUNK;
1609        in += GHASH_CHUNK;
1610        len -= GHASH_CHUNK;
1611    }
1612#endif
1613    if ((i = (len & (size_t)-16))) {
1614        size_t j = i / 16;
1615
1616#if defined(GHASH)
1617        GHASH(ctx, in, i);
1618#else
1619        while (j--) {
1620            size_t k;
1621            for (k = 0; k < 16; ++k)
1622                ctx->Xi.c[k] ^= in[k];
1623            GCM_MUL(ctx, Xi);
1624            in += 16;
1625        }
1626        j = i / 16;
1627        in -= i;
1628#endif
1629        (*stream) (in, out, j, key, ctx->Yi.c);
1630        ctr += (unsigned int)j;
1631        if (is_endian.little)
1632#ifdef BSWAP4
1633            ctx->Yi.d[3] = BSWAP4(ctr);
1634#else
1635            PUTU32(ctx->Yi.c + 12, ctr);
1636#endif
1637        else
1638            ctx->Yi.d[3] = ctr;
1639        out += i;
1640        in += i;
1641        len -= i;
1642    }
1643    if (len) {
1644        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1645        ++ctr;
1646        if (is_endian.little)
1647#ifdef BSWAP4
1648            ctx->Yi.d[3] = BSWAP4(ctr);
1649#else
1650            PUTU32(ctx->Yi.c + 12, ctr);
1651#endif
1652        else
1653            ctx->Yi.d[3] = ctr;
1654        while (len--) {
1655            u8 c = in[n];
1656            ctx->Xi.c[n] ^= c;
1657            out[n] = c ^ ctx->EKi.c[n];
1658            ++n;
1659        }
1660    }
1661
1662    ctx->mres = n;
1663    return 0;
1664}
1665
1666int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1667                         size_t len)
1668{
1669    const union {
1670        long one;
1671        char little;
1672    } is_endian = {
1673        1
1674    };
1675    u64 alen = ctx->len.u[0] << 3;
1676    u64 clen = ctx->len.u[1] << 3;
1677#ifdef GCM_FUNCREF_4BIT
1678    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1679#endif
1680
1681    if (ctx->mres || ctx->ares)
1682        GCM_MUL(ctx, Xi);
1683
1684    if (is_endian.little) {
1685#ifdef BSWAP8
1686        alen = BSWAP8(alen);
1687        clen = BSWAP8(clen);
1688#else
1689        u8 *p = ctx->len.c;
1690
1691        ctx->len.u[0] = alen;
1692        ctx->len.u[1] = clen;
1693
1694        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1695        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1696#endif
1697    }
1698
1699    ctx->Xi.u[0] ^= alen;
1700    ctx->Xi.u[1] ^= clen;
1701    GCM_MUL(ctx, Xi);
1702
1703    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1704    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1705
1706    if (tag && len <= sizeof(ctx->Xi))
1707        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1708    else
1709        return -1;
1710}
1711
1712void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1713{
1714    CRYPTO_gcm128_finish(ctx, NULL, 0);
1715    memcpy(tag, ctx->Xi.c,
1716           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1717}
1718
1719GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1720{
1721    GCM128_CONTEXT *ret;
1722
1723    if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1724        CRYPTO_gcm128_init(ret, key, block);
1725
1726    return ret;
1727}
1728
1729void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1730{
1731    if (ctx) {
1732        OPENSSL_cleanse(ctx, sizeof(*ctx));
1733        OPENSSL_free(ctx);
1734    }
1735}
1736
1737#if defined(SELFTEST)
1738# include <stdio.h>
1739# include <openssl/aes.h>
1740
1741/* Test Case 1 */
1742static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL;
1743static const u8 T1[] = {
1744    0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
1745    0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a
1746};
1747
1748/* Test Case 2 */
1749# define K2 K1
1750# define A2 A1
1751# define IV2 IV1
1752static const u8 P2[16];
1753static const u8 C2[] = {
1754    0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
1755    0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78
1756};
1757
1758static const u8 T2[] = {
1759    0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd,
1760    0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf
1761};
1762
1763/* Test Case 3 */
1764# define A3 A2
1765static const u8 K3[] = {
1766    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1767    0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
1768};
1769
1770static const u8 P3[] = {
1771    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1772    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1773    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1774    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1775    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1776    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1777    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1778    0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1779};
1780
1781static const u8 IV3[] = {
1782    0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1783    0xde, 0xca, 0xf8, 0x88
1784};
1785
1786static const u8 C3[] = {
1787    0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1788    0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1789    0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1790    0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1791    0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1792    0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1793    0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1794    0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
1795};
1796
1797static const u8 T3[] = {
1798    0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6,
1799    0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4
1800};
1801
1802/* Test Case 4 */
1803# define K4 K3
1804# define IV4 IV3
1805static const u8 P4[] = {
1806    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1807    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1808    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1809    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1810    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1811    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1812    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1813    0xba, 0x63, 0x7b, 0x39
1814};
1815
1816static const u8 A4[] = {
1817    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1818    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1819    0xab, 0xad, 0xda, 0xd2
1820};
1821
1822static const u8 C4[] = {
1823    0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1824    0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1825    0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1826    0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1827    0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1828    0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1829    0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1830    0x3d, 0x58, 0xe0, 0x91
1831};
1832
1833static const u8 T4[] = {
1834    0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb,
1835    0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47
1836};
1837
1838/* Test Case 5 */
1839# define K5 K4
1840# define P5 P4
1841# define A5 A4
1842static const u8 IV5[] = {
1843    0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad
1844};
1845
1846static const u8 C5[] = {
1847    0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a,
1848    0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55,
1849    0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8,
1850    0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23,
1851    0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2,
1852    0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42,
1853    0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07,
1854    0xc2, 0x3f, 0x45, 0x98
1855};
1856
1857static const u8 T5[] = {
1858    0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85,
1859    0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb
1860};
1861
1862/* Test Case 6 */
1863# define K6 K5
1864# define P6 P5
1865# define A6 A5
1866static const u8 IV6[] = {
1867    0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1868    0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1869    0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1870    0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1871    0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1872    0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1873    0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1874    0xa6, 0x37, 0xb3, 0x9b
1875};
1876
1877static const u8 C6[] = {
1878    0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6,
1879    0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94,
1880    0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8,
1881    0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7,
1882    0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90,
1883    0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
1884    0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
1885    0x4c, 0x34, 0xae, 0xe5
1886};
1887
1888static const u8 T6[] = {
1889    0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
1890    0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50
1891};
1892
1893/* Test Case 7 */
1894static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL;
1895static const u8 T7[] = {
1896    0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
1897    0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35
1898};
1899
1900/* Test Case 8 */
1901# define K8 K7
1902# define IV8 IV7
1903# define A8 A7
1904static const u8 P8[16];
1905static const u8 C8[] = {
1906    0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
1907    0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00
1908};
1909
1910static const u8 T8[] = {
1911    0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
1912    0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb
1913};
1914
1915/* Test Case 9 */
1916# define A9 A8
1917static const u8 K9[] = {
1918    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1919    0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
1920    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
1921};
1922
1923static const u8 P9[] = {
1924    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1925    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1926    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1927    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1928    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1929    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1930    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1931    0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1932};
1933
1934static const u8 IV9[] = {
1935    0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1936    0xde, 0xca, 0xf8, 0x88
1937};
1938
1939static const u8 C9[] = {
1940    0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1941    0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1942    0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1943    0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1944    0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1945    0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1946    0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1947    0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56
1948};
1949
1950static const u8 T9[] = {
1951    0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf,
1952    0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14
1953};
1954
1955/* Test Case 10 */
1956# define K10 K9
1957# define IV10 IV9
1958static const u8 P10[] = {
1959    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1960    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1961    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1962    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1963    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1964    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1965    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1966    0xba, 0x63, 0x7b, 0x39
1967};
1968
1969static const u8 A10[] = {
1970    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1971    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1972    0xab, 0xad, 0xda, 0xd2
1973};
1974
1975static const u8 C10[] = {
1976    0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1977    0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1978    0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1979    0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1980    0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1981    0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1982    0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1983    0xcc, 0xda, 0x27, 0x10
1984};
1985
1986static const u8 T10[] = {
1987    0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f,
1988    0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c
1989};
1990
1991/* Test Case 11 */
1992# define K11 K10
1993# define P11 P10
1994# define A11 A10
1995static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
1996
1997static const u8 C11[] = {
1998    0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54,
1999    0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8,
2000    0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f,
2001    0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57,
2002    0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75,
2003    0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9,
2004    0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f,
2005    0xa0, 0xf0, 0x62, 0xf7
2006};
2007
2008static const u8 T11[] = {
2009    0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24,
2010    0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8
2011};
2012
2013/* Test Case 12 */
2014# define K12 K11
2015# define P12 P11
2016# define A12 A11
2017static const u8 IV12[] = {
2018    0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2019    0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2020    0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2021    0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2022    0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2023    0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2024    0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2025    0xa6, 0x37, 0xb3, 0x9b
2026};
2027
2028static const u8 C12[] = {
2029    0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
2030    0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
2031    0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
2032    0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
2033    0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
2034    0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
2035    0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
2036    0xe9, 0xb7, 0x37, 0x3b
2037};
2038
2039static const u8 T12[] = {
2040    0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
2041    0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
2042};
2043
2044/* Test Case 13 */
2045static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL;
2046static const u8 T13[] = {
2047    0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
2048    0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b
2049};
2050
2051/* Test Case 14 */
2052# define K14 K13
2053# define A14 A13
2054static const u8 P14[16], IV14[12];
2055static const u8 C14[] = {
2056    0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
2057    0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18
2058};
2059
2060static const u8 T14[] = {
2061    0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0,
2062    0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19
2063};
2064
2065/* Test Case 15 */
2066# define A15 A14
2067static const u8 K15[] = {
2068    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2069    0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
2070    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2071    0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
2072};
2073
2074static const u8 P15[] = {
2075    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2076    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2077    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2078    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2079    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2080    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2081    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2082    0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
2083};
2084
2085static const u8 IV15[] = {
2086    0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
2087    0xde, 0xca, 0xf8, 0x88
2088};
2089
2090static const u8 C15[] = {
2091    0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2092    0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2093    0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2094    0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2095    0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2096    0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2097    0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2098    0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2099};
2100
2101static const u8 T15[] = {
2102    0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd,
2103    0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c
2104};
2105
2106/* Test Case 16 */
2107# define K16 K15
2108# define IV16 IV15
2109static const u8 P16[] = {
2110    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2111    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2112    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2113    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2114    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2115    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2116    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2117    0xba, 0x63, 0x7b, 0x39
2118};
2119
2120static const u8 A16[] = {
2121    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2122    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2123    0xab, 0xad, 0xda, 0xd2
2124};
2125
2126static const u8 C16[] = {
2127    0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2128    0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2129    0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2130    0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2131    0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2132    0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2133    0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2134    0xbc, 0xc9, 0xf6, 0x62
2135};
2136
2137static const u8 T16[] = {
2138    0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
2139    0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
2140};
2141
2142/* Test Case 17 */
2143# define K17 K16
2144# define P17 P16
2145# define A17 A16
2146static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
2147
2148static const u8 C17[] = {
2149    0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32,
2150    0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb,
2151    0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa,
2152    0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0,
2153    0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0,
2154    0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78,
2155    0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99,
2156    0xf4, 0x7c, 0x9b, 0x1f
2157};
2158
2159static const u8 T17[] = {
2160    0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4,
2161    0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2
2162};
2163
2164/* Test Case 18 */
2165# define K18 K17
2166# define P18 P17
2167# define A18 A17
2168static const u8 IV18[] = {
2169    0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2170    0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2171    0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2172    0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2173    0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2174    0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2175    0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2176    0xa6, 0x37, 0xb3, 0x9b
2177};
2178
2179static const u8 C18[] = {
2180    0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1,
2181    0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20,
2182    0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19,
2183    0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4,
2184    0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45,
2185    0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
2186    0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
2187    0x44, 0xae, 0x7e, 0x3f
2188};
2189
2190static const u8 T18[] = {
2191    0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
2192    0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a
2193};
2194
2195/* Test Case 19 */
2196# define K19 K1
2197# define P19 P1
2198# define IV19 IV1
2199# define C19 C1
2200static const u8 A19[] = {
2201    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2202    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2203    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2204    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2205    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2206    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2207    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2208    0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55,
2209    0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2210    0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2211    0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2212    0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2213    0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2214    0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2215    0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2216    0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2217};
2218
2219static const u8 T19[] = {
2220    0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d,
2221    0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92
2222};
2223
2224/* Test Case 20 */
2225# define K20 K1
2226# define A20 A1
2227/* this results in 0xff in counter LSB */
2228static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff };
2229
2230static const u8 P20[288];
2231static const u8 C20[] = {
2232    0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a,
2233    0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14,
2234    0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce,
2235    0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f,
2236    0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70,
2237    0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18,
2238    0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf,
2239    0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49,
2240    0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab,
2241    0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c,
2242    0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c,
2243    0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29,
2244    0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1,
2245    0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76,
2246    0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2,
2247    0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce,
2248    0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f,
2249    0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86,
2250    0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb,
2251    0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18,
2252    0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65,
2253    0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42,
2254    0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b,
2255    0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06,
2256    0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24,
2257    0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c,
2258    0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4,
2259    0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64,
2260    0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03,
2261    0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6,
2262    0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90,
2263    0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74,
2264    0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67,
2265    0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46,
2266    0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78,
2267    0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c
2268};
2269
2270static const u8 T20[] = {
2271    0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a,
2272    0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f
2273};
2274
2275# define TEST_CASE(n)    do {                                    \
2276        u8 out[sizeof(P##n)];                                   \
2277        AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);          \
2278        CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);  \
2279        CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
2280        memset(out,0,sizeof(out));                              \
2281        if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
2282        if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));     \
2283        if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
2284            (C##n && memcmp(out,C##n,sizeof(out))))             \
2285                ret++, printf ("encrypt test#%d failed.\n",n);  \
2286        CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
2287        memset(out,0,sizeof(out));                              \
2288        if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
2289        if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));     \
2290        if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
2291            (P##n && memcmp(out,P##n,sizeof(out))))             \
2292                ret++, printf ("decrypt test#%d failed.\n",n);  \
2293        } while(0)
2294
2295int main()
2296{
2297    GCM128_CONTEXT ctx;
2298    AES_KEY key;
2299    int ret = 0;
2300
2301    TEST_CASE(1);
2302    TEST_CASE(2);
2303    TEST_CASE(3);
2304    TEST_CASE(4);
2305    TEST_CASE(5);
2306    TEST_CASE(6);
2307    TEST_CASE(7);
2308    TEST_CASE(8);
2309    TEST_CASE(9);
2310    TEST_CASE(10);
2311    TEST_CASE(11);
2312    TEST_CASE(12);
2313    TEST_CASE(13);
2314    TEST_CASE(14);
2315    TEST_CASE(15);
2316    TEST_CASE(16);
2317    TEST_CASE(17);
2318    TEST_CASE(18);
2319    TEST_CASE(19);
2320    TEST_CASE(20);
2321
2322# ifdef OPENSSL_CPUID_OBJ
2323    {
2324        size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc();
2325        union {
2326            u64 u;
2327            u8 c[1024];
2328        } buf;
2329        int i;
2330
2331        AES_set_encrypt_key(K1, sizeof(K1) * 8, &key);
2332        CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt);
2333        CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1));
2334
2335        CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2336        start = OPENSSL_rdtsc();
2337        CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2338        gcm_t = OPENSSL_rdtsc() - start;
2339
2340        CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2341                              &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2342                              (block128_f) AES_encrypt);
2343        start = OPENSSL_rdtsc();
2344        CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2345                              &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2346                              (block128_f) AES_encrypt);
2347        ctr_t = OPENSSL_rdtsc() - start;
2348
2349        printf("%.2f-%.2f=%.2f\n",
2350               gcm_t / (double)sizeof(buf),
2351               ctr_t / (double)sizeof(buf),
2352               (gcm_t - ctr_t) / (double)sizeof(buf));
2353#  ifdef GHASH
2354        {
2355            void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
2356                                 const u8 *inp, size_t len) = ctx.ghash;
2357
2358            GHASH((&ctx), buf.c, sizeof(buf));
2359            start = OPENSSL_rdtsc();
2360            for (i = 0; i < 100; ++i)
2361                GHASH((&ctx), buf.c, sizeof(buf));
2362            gcm_t = OPENSSL_rdtsc() - start;
2363            printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i);
2364        }
2365#  endif
2366    }
2367# endif
2368
2369    return ret;
2370}
2371#endif
2372