1/* $NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__KERNEL_RCSID(1, "$NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $"); 31 32#ifdef _KERNEL 33#include <sys/systm.h> 34#include <lib/libkern/libkern.h> 35#else 36#include <assert.h> 37#include <inttypes.h> 38#include <stdio.h> 39#define KASSERT assert 40#endif 41 42#include "aes_ssse3_impl.h" 43 44static inline __m128i 45loadblock(const void *in) 46{ 47 return _mm_loadu_epi8(in); 48} 49 50static inline void 51storeblock(void *out, __m128i block) 52{ 53 _mm_storeu_epi8(out, block); 54} 55 56void 57aes_ssse3_enc(const struct aesenc *enc, const uint8_t in[static 16], 58 uint8_t out[static 16], uint32_t nrounds) 59{ 60 __m128i block; 61 62 block = loadblock(in); 63 block = aes_ssse3_enc1(enc, block, nrounds); 64 storeblock(out, block); 65} 66 67void 68aes_ssse3_dec(const struct aesdec *dec, const uint8_t in[static 16], 69 uint8_t out[static 16], uint32_t nrounds) 70{ 71 __m128i block; 72 73 block = loadblock(in); 74 block = aes_ssse3_dec1(dec, block, nrounds); 75 storeblock(out, block); 76} 77 78void 79aes_ssse3_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16], 80 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 81 uint32_t nrounds) 82{ 83 __m128i cv; 84 85 KASSERT(nbytes); 86 87 cv = loadblock(iv); 88 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 89 cv ^= loadblock(in); 90 cv = aes_ssse3_enc1(enc, cv, nrounds); 91 storeblock(out, cv); 92 } 93 storeblock(iv, cv); 94} 95 96void 97aes_ssse3_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16], 98 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 99 uint32_t nrounds) 100{ 101 __m128i iv0, cv, b; 102 103 KASSERT(nbytes); 104 KASSERT(nbytes % 16 == 0); 105 106 iv0 = loadblock(iv); 107 cv = loadblock(in + nbytes - 16); 108 storeblock(iv, cv); 109 110 for (;;) { 111 b = aes_ssse3_dec1(dec, cv, nrounds); 112 if ((nbytes -= 16) == 0) 113 break; 114 cv = loadblock(in + nbytes - 16); 115 storeblock(out + nbytes, b ^ cv); 116 } 117 storeblock(out, b ^ iv0); 118} 119 120static inline __m128i 121aes_ssse3_xts_update(__m128i t) 122{ 123 const __m128i one = _mm_set_epi64x(1, 1); 124 __m128i s, m, c; 125 126 s = _mm_srli_epi64(t, 63); /* 1 if high bit set else 0 */ 127 m = _mm_sub_epi64(s, one); /* 0 if high bit set else -1 */ 128 m = _mm_shuffle_epi32(m, 0x4e); /* swap halves */ 129 c = _mm_set_epi64x(1, 0x87); /* carry */ 130 131 return _mm_slli_epi64(t, 1) ^ (c & ~m); 132} 133 134static int 135aes_ssse3_xts_update_selftest(void) 136{ 137 static const struct { 138 uint32_t in[4], out[4]; 139 } cases[] = { 140 [0] = { {1}, {2} }, 141 [1] = { {0x80000000U,0,0,0}, {0,1,0,0} }, 142 [2] = { {0,0x80000000U,0,0}, {0,0,1,0} }, 143 [3] = { {0,0,0x80000000U,0}, {0,0,0,1} }, 144 [4] = { {0,0,0,0x80000000U}, {0x87,0,0,0} }, 145 [5] = { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} }, 146 }; 147 unsigned i; 148 uint32_t t[4]; 149 int result = 0; 150 151 for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { 152 t[0] = cases[i].in[0]; 153 t[1] = cases[i].in[1]; 154 t[2] = cases[i].in[2]; 155 t[3] = cases[i].in[3]; 156 storeblock(t, aes_ssse3_xts_update(loadblock(t))); 157 if (t[0] != cases[i].out[0] || 158 t[1] != cases[i].out[1] || 159 t[2] != cases[i].out[2] || 160 t[3] != cases[i].out[3]) { 161 printf("%s %u:" 162 " %"PRIx32" %"PRIx32" %"PRIx32" %"PRIx32"\n", 163 __func__, i, t[0], t[1], t[2], t[3]); 164 result = -1; 165 } 166 } 167 168 return result; 169} 170 171void 172aes_ssse3_xts_enc(const struct aesenc *enc, const uint8_t in[static 16], 173 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 174 uint32_t nrounds) 175{ 176 __m128i t, b; 177 178 KASSERT(nbytes); 179 KASSERT(nbytes % 16 == 0); 180 181 t = loadblock(tweak); 182 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 183 b = t ^ loadblock(in); 184 b = aes_ssse3_enc1(enc, b, nrounds); 185 storeblock(out, t ^ b); 186 t = aes_ssse3_xts_update(t); 187 } 188 storeblock(tweak, t); 189} 190 191void 192aes_ssse3_xts_dec(const struct aesdec *dec, const uint8_t in[static 16], 193 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 194 uint32_t nrounds) 195{ 196 __m128i t, b; 197 198 KASSERT(nbytes); 199 KASSERT(nbytes % 16 == 0); 200 201 t = loadblock(tweak); 202 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 203 b = t ^ loadblock(in); 204 b = aes_ssse3_dec1(dec, b, nrounds); 205 storeblock(out, t ^ b); 206 t = aes_ssse3_xts_update(t); 207 } 208 storeblock(tweak, t); 209} 210 211void 212aes_ssse3_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], 213 size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds) 214{ 215 __m128i auth; 216 217 KASSERT(nbytes); 218 KASSERT(nbytes % 16 == 0); 219 220 auth = loadblock(auth0); 221 for (; nbytes; nbytes -= 16, in += 16) 222 auth = aes_ssse3_enc1(enc, auth ^ loadblock(in), nrounds); 223 storeblock(auth0, auth); 224} 225 226void 227aes_ssse3_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16], 228 uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32], 229 uint32_t nrounds) 230{ 231 const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0); 232 const __m128i bs32 = 233 _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203); 234 __m128i auth, ctr_be, ctr, ptxt; 235 236 KASSERT(nbytes); 237 KASSERT(nbytes % 16 == 0); 238 239 auth = loadblock(authctr); 240 ctr_be = loadblock(authctr + 16); 241 ctr = _mm_shuffle_epi8(ctr_be, bs32); 242 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 243 ptxt = loadblock(in); 244 auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds); 245 ctr = _mm_add_epi32(ctr, ctr32_inc); 246 ctr_be = _mm_shuffle_epi8(ctr, bs32); 247 storeblock(out, ptxt ^ aes_ssse3_enc1(enc, ctr_be, nrounds)); 248 } 249 storeblock(authctr, auth); 250 storeblock(authctr + 16, ctr_be); 251} 252 253void 254aes_ssse3_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16], 255 uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32], 256 uint32_t nrounds) 257{ 258 const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0); 259 const __m128i bs32 = 260 _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203); 261 __m128i auth, ctr_be, ctr, ptxt; 262 263 KASSERT(nbytes); 264 KASSERT(nbytes % 16 == 0); 265 266 auth = loadblock(authctr); 267 ctr_be = loadblock(authctr + 16); 268 ctr = _mm_shuffle_epi8(ctr_be, bs32); 269 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 270 ctr = _mm_add_epi32(ctr, ctr32_inc); 271 ctr_be = _mm_shuffle_epi8(ctr, bs32); 272 ptxt = loadblock(in) ^ aes_ssse3_enc1(enc, ctr_be, nrounds); 273 storeblock(out, ptxt); 274 auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds); 275 } 276 storeblock(authctr, auth); 277 storeblock(authctr + 16, ctr_be); 278} 279 280int 281aes_ssse3_selftest(void) 282{ 283 284 if (aes_ssse3_xts_update_selftest()) 285 return -1; 286 287 return 0; 288} 289