aesni_wrap.c revision 267815
1210409Skib/*- 2247061Spjd * Copyright (C) 2008 Damien Miller <djm@mindrot.org> 3210409Skib * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org> 4226839Spjd * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net> 5255187Sjmg * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org> 6210409Skib * All rights reserved. 7210409Skib * 8210409Skib * Redistribution and use in source and binary forms, with or without 9210409Skib * modification, are permitted provided that the following conditions 10210409Skib * are met: 11210409Skib * 1. Redistributions of source code must retain the above copyright 12210409Skib * notice, this list of conditions and the following disclaimer. 13210409Skib * 2. Redistributions in binary form must reproduce the above copyright 14210409Skib * notice, this list of conditions and the following disclaimer in the 15210409Skib * documentation and/or other materials provided with the distribution. 16210409Skib * 17210409Skib * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18210409Skib * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19210409Skib * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20210409Skib * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21210409Skib * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22210409Skib * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23210409Skib * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24210409Skib * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25210409Skib * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26210409Skib * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27210409Skib * SUCH DAMAGE. 28210409Skib */ 29210409Skib 30210409Skib#include <sys/cdefs.h> 31210409Skib__FBSDID("$FreeBSD: head/sys/crypto/aesni/aesni_wrap.c 267815 2014-06-24 06:55:49Z kib $"); 32255187Sjmg 33210409Skib#include <sys/param.h> 34210409Skib#include <sys/libkern.h> 35210409Skib#include <sys/malloc.h> 36210409Skib#include <sys/proc.h> 37210409Skib#include <sys/systm.h> 38210409Skib#include <crypto/aesni/aesni.h> 39255187Sjmg 40255187Sjmg#include "aesencdec.h" 41210409Skib 42210409SkibMALLOC_DECLARE(M_AESNI); 43210409Skib 44257757Sjmgstruct blocks8 { 45257757Sjmg __m128i blk[8]; 46257757Sjmg} __packed; 47257757Sjmg 48210409Skibvoid 49210409Skibaesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len, 50210409Skib const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]) 51210409Skib{ 52255187Sjmg __m128i tot, ivreg; 53210409Skib size_t i; 54210409Skib 55210409Skib len /= AES_BLOCK_LEN; 56255187Sjmg ivreg = _mm_loadu_si128((const __m128i *)iv); 57210409Skib for (i = 0; i < len; i++) { 58255187Sjmg tot = aesni_enc(rounds - 1, key_schedule, 59255187Sjmg _mm_loadu_si128((const __m128i *)from) ^ ivreg); 60255187Sjmg ivreg = tot; 61255187Sjmg _mm_storeu_si128((__m128i *)to, tot); 62210409Skib from += AES_BLOCK_LEN; 63210409Skib to += AES_BLOCK_LEN; 64210409Skib } 65210409Skib} 66210409Skib 67210409Skibvoid 68255187Sjmgaesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len, 69255187Sjmg uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN]) 70255187Sjmg{ 71255187Sjmg __m128i blocks[8]; 72257757Sjmg struct blocks8 *blks; 73255187Sjmg __m128i ivreg, nextiv; 74255187Sjmg size_t i, j, cnt; 75255187Sjmg 76255187Sjmg ivreg = _mm_loadu_si128((const __m128i *)iv); 77255187Sjmg cnt = len / AES_BLOCK_LEN / 8; 78255187Sjmg for (i = 0; i < cnt; i++) { 79257757Sjmg blks = (struct blocks8 *)buf; 80257757Sjmg aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1], 81257757Sjmg blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5], 82257757Sjmg blks->blk[6], blks->blk[7], &blocks[0]); 83255187Sjmg for (j = 0; j < 8; j++) { 84257757Sjmg nextiv = blks->blk[j]; 85257757Sjmg blks->blk[j] = blocks[j] ^ ivreg; 86255187Sjmg ivreg = nextiv; 87255187Sjmg } 88255187Sjmg buf += AES_BLOCK_LEN * 8; 89255187Sjmg } 90255187Sjmg i *= 8; 91255187Sjmg cnt = len / AES_BLOCK_LEN; 92255187Sjmg for (; i < cnt; i++) { 93257757Sjmg nextiv = _mm_loadu_si128((void *)buf); 94257757Sjmg _mm_storeu_si128((void *)buf, 95257757Sjmg aesni_dec(rounds - 1, key_schedule, nextiv) ^ ivreg); 96255187Sjmg ivreg = nextiv; 97255187Sjmg buf += AES_BLOCK_LEN; 98255187Sjmg } 99255187Sjmg} 100255187Sjmg 101255187Sjmgvoid 102210409Skibaesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len, 103255187Sjmg const uint8_t *from, uint8_t *to) 104210409Skib{ 105255187Sjmg __m128i tot; 106257757Sjmg __m128i tout[8]; 107257757Sjmg struct blocks8 *top; 108257757Sjmg const struct blocks8 *blks; 109255187Sjmg size_t i, cnt; 110210409Skib 111255187Sjmg cnt = len / AES_BLOCK_LEN / 8; 112255187Sjmg for (i = 0; i < cnt; i++) { 113257757Sjmg blks = (const struct blocks8 *)from; 114257757Sjmg top = (struct blocks8 *)to; 115257757Sjmg aesni_enc8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1], 116257757Sjmg blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5], 117257757Sjmg blks->blk[6], blks->blk[7], tout); 118257757Sjmg top->blk[0] = tout[0]; 119257757Sjmg top->blk[1] = tout[1]; 120257757Sjmg top->blk[2] = tout[2]; 121257757Sjmg top->blk[3] = tout[3]; 122257757Sjmg top->blk[4] = tout[4]; 123257757Sjmg top->blk[5] = tout[5]; 124257757Sjmg top->blk[6] = tout[6]; 125257757Sjmg top->blk[7] = tout[7]; 126255187Sjmg from += AES_BLOCK_LEN * 8; 127255187Sjmg to += AES_BLOCK_LEN * 8; 128255187Sjmg } 129255187Sjmg i *= 8; 130255187Sjmg cnt = len / AES_BLOCK_LEN; 131255187Sjmg for (; i < cnt; i++) { 132255187Sjmg tot = aesni_enc(rounds - 1, key_schedule, 133255187Sjmg _mm_loadu_si128((const __m128i *)from)); 134255187Sjmg _mm_storeu_si128((__m128i *)to, tot); 135210409Skib from += AES_BLOCK_LEN; 136210409Skib to += AES_BLOCK_LEN; 137210409Skib } 138210409Skib} 139210409Skib 140210409Skibvoid 141210409Skibaesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, 142210409Skib const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]) 143210409Skib{ 144255187Sjmg __m128i tot; 145257757Sjmg __m128i tout[8]; 146257757Sjmg const struct blocks8 *blks; 147257757Sjmg struct blocks8 *top; 148255187Sjmg size_t i, cnt; 149210409Skib 150255187Sjmg cnt = len / AES_BLOCK_LEN / 8; 151255187Sjmg for (i = 0; i < cnt; i++) { 152257757Sjmg blks = (const struct blocks8 *)from; 153257757Sjmg top = (struct blocks8 *)to; 154257757Sjmg aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1], 155257757Sjmg blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5], 156257757Sjmg blks->blk[6], blks->blk[7], tout); 157257757Sjmg top->blk[0] = tout[0]; 158257757Sjmg top->blk[1] = tout[1]; 159257757Sjmg top->blk[2] = tout[2]; 160257757Sjmg top->blk[3] = tout[3]; 161257757Sjmg top->blk[4] = tout[4]; 162257757Sjmg top->blk[5] = tout[5]; 163257757Sjmg top->blk[6] = tout[6]; 164257757Sjmg top->blk[7] = tout[7]; 165255187Sjmg from += AES_BLOCK_LEN * 8; 166255187Sjmg to += AES_BLOCK_LEN * 8; 167255187Sjmg } 168255187Sjmg i *= 8; 169255187Sjmg cnt = len / AES_BLOCK_LEN; 170255187Sjmg for (; i < cnt; i++) { 171255187Sjmg tot = aesni_dec(rounds - 1, key_schedule, 172255187Sjmg _mm_loadu_si128((const __m128i *)from)); 173255187Sjmg _mm_storeu_si128((__m128i *)to, tot); 174210409Skib from += AES_BLOCK_LEN; 175210409Skib to += AES_BLOCK_LEN; 176210409Skib } 177210409Skib} 178210409Skib 179213069Spjd#define AES_XTS_BLOCKSIZE 16 180213069Spjd#define AES_XTS_IVSIZE 8 181213069Spjd#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ 182213069Spjd 183255187Sjmgstatic inline __m128i 184255187Sjmgxts_crank_lfsr(__m128i inp) 185255187Sjmg{ 186255187Sjmg const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA); 187255187Sjmg __m128i xtweak, ret; 188255187Sjmg 189255187Sjmg /* set up xor mask */ 190255187Sjmg xtweak = _mm_shuffle_epi32(inp, 0x93); 191255187Sjmg xtweak = _mm_srai_epi32(xtweak, 31); 192255187Sjmg xtweak &= alphamask; 193255187Sjmg 194255187Sjmg /* next term */ 195255187Sjmg ret = _mm_slli_epi32(inp, 1); 196255187Sjmg ret ^= xtweak; 197255187Sjmg 198255187Sjmg return ret; 199255187Sjmg} 200255187Sjmg 201213069Spjdstatic void 202257757Sjmgaesni_crypt_xts_block(int rounds, const __m128i *key_schedule, __m128i *tweak, 203257757Sjmg const uint8_t *from, uint8_t *to, int do_encrypt) 204213069Spjd{ 205255187Sjmg __m128i block; 206213069Spjd 207257757Sjmg block = _mm_loadu_si128((const __m128i *)from) ^ *tweak; 208213069Spjd 209213069Spjd if (do_encrypt) 210255187Sjmg block = aesni_enc(rounds - 1, key_schedule, block); 211213069Spjd else 212255187Sjmg block = aesni_dec(rounds - 1, key_schedule, block); 213213069Spjd 214257757Sjmg _mm_storeu_si128((__m128i *)to, block ^ *tweak); 215213069Spjd 216255187Sjmg *tweak = xts_crank_lfsr(*tweak); 217255187Sjmg} 218226837Spjd 219255187Sjmgstatic void 220257757Sjmgaesni_crypt_xts_block8(int rounds, const __m128i *key_schedule, __m128i *tweak, 221257757Sjmg const uint8_t *from, uint8_t *to, int do_encrypt) 222255187Sjmg{ 223255187Sjmg __m128i tmptweak; 224255187Sjmg __m128i a, b, c, d, e, f, g, h; 225255187Sjmg __m128i tweaks[8]; 226255187Sjmg __m128i tmp[8]; 227257757Sjmg __m128i *top; 228257757Sjmg const __m128i *fromp; 229255187Sjmg 230255187Sjmg tmptweak = *tweak; 231255187Sjmg 232255187Sjmg /* 233255187Sjmg * unroll the loop. This lets gcc put values directly in the 234255187Sjmg * register and saves memory accesses. 235255187Sjmg */ 236257757Sjmg fromp = (const __m128i *)from; 237255187Sjmg#define PREPINP(v, pos) \ 238255187Sjmg do { \ 239255187Sjmg tweaks[(pos)] = tmptweak; \ 240257757Sjmg (v) = _mm_loadu_si128(&fromp[pos]) ^ \ 241257757Sjmg tmptweak; \ 242255187Sjmg tmptweak = xts_crank_lfsr(tmptweak); \ 243255187Sjmg } while (0) 244255187Sjmg PREPINP(a, 0); 245255187Sjmg PREPINP(b, 1); 246255187Sjmg PREPINP(c, 2); 247255187Sjmg PREPINP(d, 3); 248255187Sjmg PREPINP(e, 4); 249255187Sjmg PREPINP(f, 5); 250255187Sjmg PREPINP(g, 6); 251255187Sjmg PREPINP(h, 7); 252255187Sjmg *tweak = tmptweak; 253255187Sjmg 254255187Sjmg if (do_encrypt) 255255187Sjmg aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h, 256255187Sjmg tmp); 257255187Sjmg else 258255187Sjmg aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h, 259255187Sjmg tmp); 260255187Sjmg 261257757Sjmg top = (__m128i *)to; 262257757Sjmg _mm_storeu_si128(&top[0], tmp[0] ^ tweaks[0]); 263257757Sjmg _mm_storeu_si128(&top[1], tmp[1] ^ tweaks[1]); 264257757Sjmg _mm_storeu_si128(&top[2], tmp[2] ^ tweaks[2]); 265257757Sjmg _mm_storeu_si128(&top[3], tmp[3] ^ tweaks[3]); 266257757Sjmg _mm_storeu_si128(&top[4], tmp[4] ^ tweaks[4]); 267257757Sjmg _mm_storeu_si128(&top[5], tmp[5] ^ tweaks[5]); 268257757Sjmg _mm_storeu_si128(&top[6], tmp[6] ^ tweaks[6]); 269257757Sjmg _mm_storeu_si128(&top[7], tmp[7] ^ tweaks[7]); 270213069Spjd} 271213069Spjd 272213069Spjdstatic void 273257757Sjmgaesni_crypt_xts(int rounds, const __m128i *data_schedule, 274257757Sjmg const __m128i *tweak_schedule, size_t len, const uint8_t *from, 275257757Sjmg uint8_t *to, const uint8_t iv[AES_BLOCK_LEN], int do_encrypt) 276213069Spjd{ 277255187Sjmg __m128i tweakreg; 278255187Sjmg uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16); 279255187Sjmg size_t i, cnt; 280213069Spjd 281213069Spjd /* 282213069Spjd * Prepare tweak as E_k2(IV). IV is specified as LE representation 283213069Spjd * of a 64-bit block number which we allow to be passed in directly. 284213069Spjd */ 285226837Spjd#if BYTE_ORDER == LITTLE_ENDIAN 286226837Spjd bcopy(iv, tweak, AES_XTS_IVSIZE); 287213069Spjd /* Last 64 bits of IV are always zero. */ 288213069Spjd bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE); 289226837Spjd#else 290226837Spjd#error Only LITTLE_ENDIAN architectures are supported. 291226837Spjd#endif 292255187Sjmg tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]); 293255187Sjmg tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg); 294213069Spjd 295255187Sjmg cnt = len / AES_XTS_BLOCKSIZE / 8; 296255187Sjmg for (i = 0; i < cnt; i++) { 297255187Sjmg aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg, 298257757Sjmg from, to, do_encrypt); 299255187Sjmg from += AES_XTS_BLOCKSIZE * 8; 300255187Sjmg to += AES_XTS_BLOCKSIZE * 8; 301255187Sjmg } 302255187Sjmg i *= 8; 303255187Sjmg cnt = len / AES_XTS_BLOCKSIZE; 304255187Sjmg for (; i < cnt; i++) { 305255187Sjmg aesni_crypt_xts_block(rounds, data_schedule, &tweakreg, 306257757Sjmg from, to, do_encrypt); 307213069Spjd from += AES_XTS_BLOCKSIZE; 308213069Spjd to += AES_XTS_BLOCKSIZE; 309213069Spjd } 310213069Spjd} 311213069Spjd 312255187Sjmgvoid 313213069Spjdaesni_encrypt_xts(int rounds, const void *data_schedule, 314213069Spjd const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, 315213069Spjd const uint8_t iv[AES_BLOCK_LEN]) 316213069Spjd{ 317213069Spjd 318213069Spjd aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to, 319213069Spjd iv, 1); 320213069Spjd} 321213069Spjd 322255187Sjmgvoid 323213069Spjdaesni_decrypt_xts(int rounds, const void *data_schedule, 324213069Spjd const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, 325213069Spjd const uint8_t iv[AES_BLOCK_LEN]) 326213069Spjd{ 327213069Spjd 328213069Spjd aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to, 329213069Spjd iv, 0); 330213069Spjd} 331213069Spjd 332267815Skibint 333213066Spjdaesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, 334213066Spjd int keylen) 335210409Skib{ 336210409Skib 337213069Spjd switch (ses->algo) { 338213069Spjd case CRYPTO_AES_CBC: 339213069Spjd switch (keylen) { 340213069Spjd case 128: 341213069Spjd ses->rounds = AES128_ROUNDS; 342213069Spjd break; 343213069Spjd case 192: 344213069Spjd ses->rounds = AES192_ROUNDS; 345213069Spjd break; 346213069Spjd case 256: 347213069Spjd ses->rounds = AES256_ROUNDS; 348213069Spjd break; 349213069Spjd default: 350213069Spjd return (EINVAL); 351213069Spjd } 352210409Skib break; 353213069Spjd case CRYPTO_AES_XTS: 354213069Spjd switch (keylen) { 355213069Spjd case 256: 356213069Spjd ses->rounds = AES128_ROUNDS; 357213069Spjd break; 358213069Spjd case 512: 359213069Spjd ses->rounds = AES256_ROUNDS; 360213069Spjd break; 361213069Spjd default: 362213069Spjd return (EINVAL); 363213069Spjd } 364210409Skib break; 365210409Skib default: 366210409Skib return (EINVAL); 367210409Skib } 368213069Spjd 369213066Spjd aesni_set_enckey(key, ses->enc_schedule, ses->rounds); 370213066Spjd aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds); 371213166Spjd if (ses->algo == CRYPTO_AES_CBC) 372213069Spjd arc4rand(ses->iv, sizeof(ses->iv), 0); 373213069Spjd else /* if (ses->algo == CRYPTO_AES_XTS) */ { 374213069Spjd aesni_set_enckey(key + keylen / 16, ses->xts_schedule, 375213069Spjd ses->rounds); 376213069Spjd } 377210409Skib 378213066Spjd return (0); 379210409Skib} 380