1/*-
2 * Copyright (c) 2014 The FreeBSD Foundation
3 * Copyright (c) 2018 iXsystems, Inc
4 * All rights reserved.
5 *
6 * This software was developed by John-Mark Gurney under
7 * the sponsorship of the FreeBSD Foundation and
8 * Rubicon Communications, LLC (Netgate).
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1.  Redistributions of source code must retain the above copyright
13 *     notice, this list of conditions and the following disclaimer.
14 * 2.  Redistributions in binary form must reproduce the above copyright
15 *     notice, this list of conditions and the following disclaimer in the
16 *     documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 *
31 *	$FreeBSD$
32 *
33 * This file implements AES-CCM+CBC-MAC, as described
34 * at https://tools.ietf.org/html/rfc3610, using Intel's
35 * AES-NI instructions.
36 *
37 */
38
39#include <sys/types.h>
40#include <sys/endian.h>
41#include <sys/param.h>
42
43#include <sys/systm.h>
44#include <crypto/aesni/aesni.h>
45#include <crypto/aesni/aesni_os.h>
46#include <crypto/aesni/aesencdec.h>
47#define AESNI_ENC(d, k, nr)	aesni_enc(nr-1, (const __m128i*)k, d)
48
49#include <wmmintrin.h>
50#include <emmintrin.h>
51#include <smmintrin.h>
52
53/*
54 * Encrypt a single 128-bit block after
55 * doing an xor.  This is also used to
56 * decrypt (yay symmetric encryption).
57 */
58static inline __m128i
59xor_and_encrypt(__m128i a, __m128i b, const unsigned char *k, int nr)
60{
61	__m128i retval = _mm_xor_si128(a, b);
62
63	retval = AESNI_ENC(retval, k, nr);
64	return (retval);
65}
66
67/*
68 * Put value at the end of block, starting at offset.
69 * (This goes backwards, putting bytes in *until* it
70 * reaches offset.)
71 */
72static void
73append_int(size_t value, __m128i *block, size_t offset)
74{
75	int indx = sizeof(*block) - 1;
76	uint8_t *bp = (uint8_t*)block;
77
78	while (indx > (sizeof(*block) - offset)) {
79		bp[indx] = value & 0xff;
80		indx--;
81		value >>= 8;
82	}
83}
84
85/*
86 * Start the CBC-MAC process.  This handles the auth data.
87 */
88static __m128i
89cbc_mac_start(const unsigned char *auth_data, size_t auth_len,
90	     const unsigned char *nonce, size_t nonce_len,
91	     const unsigned char *key, int nr,
92	     size_t data_len, size_t tag_len)
93{
94	__m128i cbc_block, staging_block;
95	uint8_t *byte_ptr;
96	/* This defines where the message length goes */
97	int L = sizeof(__m128i) - 1 - nonce_len;
98
99	/*
100	 * Set up B0 here.  This has the flags byte,
101	 * followed by the nonce, followed by the
102	 * length of the message.
103	 */
104	cbc_block = _mm_setzero_si128();
105	byte_ptr = (uint8_t*)&cbc_block;
106	byte_ptr[0] = ((auth_len > 0) ? 1 : 0) * 64 |
107		(((tag_len - 2) / 2) * 8) |
108		(L - 1);
109	bcopy(nonce, byte_ptr + 1, nonce_len);
110	append_int(data_len, &cbc_block, L+1);
111	cbc_block = AESNI_ENC(cbc_block, key, nr);
112
113	if (auth_len != 0) {
114		/*
115		 * We need to start by appending the length descriptor.
116		 */
117		uint32_t auth_amt;
118		size_t copy_amt;
119		const uint8_t *auth_ptr = auth_data;
120
121		staging_block = _mm_setzero_si128();
122
123		/*
124		 * The current OCF calling convention means that
125		 * there can never be more than 4g of authentication
126		 * data, so we don't handle the 0xffff case.
127		 */
128		KASSERT(auth_len < (1ULL << 32),
129		    ("%s: auth_len (%zu) larger than 4GB",
130			__FUNCTION__, auth_len));
131
132		if (auth_len < ((1 << 16) - (1 << 8))) {
133			/*
134			 * If the auth data length is less than
135			 * 0xff00, we don't need to encode a length
136			 * specifier, just the length of the auth
137			 * data.
138			 */
139			be16enc(&staging_block, auth_len);
140			auth_amt = 2;
141		} else if (auth_len < (1ULL << 32)) {
142			/*
143			 * Two bytes for the length prefix, and then
144			 * four bytes for the length.  This makes a total
145			 * of 6 bytes to describe the auth data length.
146			 */
147			be16enc(&staging_block, 0xfffe);
148			be32enc((char*)&staging_block + 2, auth_len);
149			auth_amt = 6;
150		} else
151			panic("%s: auth len too large", __FUNCTION__);
152
153		/*
154		 * Need to copy abytes into blocks.  The first block is
155		 * already partially filled, by auth_amt, so we need
156		 * to handle that.  The last block needs to be zero padded.
157		 */
158		copy_amt = MIN(auth_len,
159		    sizeof(staging_block) - auth_amt);
160		byte_ptr = (uint8_t*)&staging_block;
161		bcopy(auth_ptr, &byte_ptr[auth_amt], copy_amt);
162		auth_ptr += copy_amt;
163
164		cbc_block = xor_and_encrypt(cbc_block, staging_block, key, nr);
165
166		while (auth_ptr < auth_data + auth_len) {
167			copy_amt = MIN((auth_data + auth_len) - auth_ptr,
168			    sizeof(staging_block));
169			if (copy_amt < sizeof(staging_block))
170				bzero(&staging_block, sizeof(staging_block));
171			bcopy(auth_ptr, &staging_block, copy_amt);
172			cbc_block = xor_and_encrypt(cbc_block, staging_block,
173			    key, nr);
174			auth_ptr += copy_amt;
175		}
176	}
177	return (cbc_block);
178}
179
180/*
181 * Implement AES CCM+CBC-MAC encryption and authentication.
182 *
183 * A couple of notes:
184 * The specification allows for a different number of tag lengths;
185 * however, they're always truncated from 16 bytes, and the tag
186 * length isn't passed in.  (This could be fixed by changing the
187 * code in aesni.c:aesni_cipher_crypt().)
188 * Similarly, although the nonce length is passed in, the
189 * OpenCrypto API that calls us doesn't have a way to set the nonce
190 * other than by having different crypto algorithm types.  As a result,
191 * this is currently always called with nlen=12; this means that we
192 * also have a maximum message length of 16 megabytes.  And similarly,
193 * since abytes is limited to a 32 bit value here, the AAD is
194 * limited to 4 gigabytes or less.
195 */
196void
197AES_CCM_encrypt(const unsigned char *in, unsigned char *out,
198		const unsigned char *addt, const unsigned char *nonce,
199		unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen,
200		const unsigned char *key, int nr)
201{
202	static const int tag_length = 16;	/* 128 bits */
203	int L;
204	int counter = 1;	/* S0 has 0, S1 has 1 */
205	size_t copy_amt, total = 0;
206	uint8_t *byte_ptr;
207	__m128i s0, rolling_mac, s_x, staging_block;
208
209	if (nbytes == 0 && abytes == 0)
210		return;
211
212	/* NIST 800-38c section A.1 says n is [7, 13]. */
213	if (nlen < 7 || nlen > 13)
214		panic("%s: bad nonce length %d", __FUNCTION__, nlen);
215
216	/*
217	 * We need to know how many bytes to use to describe
218	 * the length of the data.  Normally, nlen should be
219	 * 12, which leaves us 3 bytes to do that -- 16mbytes of
220	 * data to encrypt.  But it can be longer or shorter;
221	 * this impacts the length of the message.
222	 */
223	L = sizeof(__m128i) - 1 - nlen;
224
225	/*
226	 * Now, this shouldn't happen, but let's make sure that
227	 * the data length isn't too big.
228	 */
229	KASSERT(nbytes <= ((1 << (8 * L)) - 1),
230	    ("%s: nbytes is %u, but length field is %d bytes",
231		__FUNCTION__, nbytes, L));
232
233	/*
234	 * Clear out the blocks
235	 */
236	s0 = _mm_setzero_si128();
237
238	rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen,
239	    key, nr, nbytes, tag_length);
240
241	/* s0 has flags, nonce, and then 0 */
242	byte_ptr = (uint8_t*)&s0;
243	byte_ptr[0] = L - 1;	/* but the flags byte only has L' */
244	bcopy(nonce, &byte_ptr[1], nlen);
245
246	/*
247	 * Now to cycle through the rest of the data.
248	 */
249	bcopy(&s0, &s_x, sizeof(s0));
250
251	while (total < nbytes) {
252		/*
253		 * Copy the plain-text data into staging_block.
254		 * This may need to be zero-padded.
255		 */
256		copy_amt = MIN(nbytes - total, sizeof(staging_block));
257		bcopy(in+total, &staging_block, copy_amt);
258		if (copy_amt < sizeof(staging_block)) {
259			byte_ptr = (uint8_t*)&staging_block;
260			bzero(&byte_ptr[copy_amt],
261			    sizeof(staging_block) - copy_amt);
262		}
263		rolling_mac = xor_and_encrypt(rolling_mac, staging_block,
264		    key, nr);
265		/* Put the counter into the s_x block */
266		append_int(counter++, &s_x, L+1);
267		/* Encrypt that */
268		__m128i X = AESNI_ENC(s_x, key, nr);
269		/* XOR the plain-text with the encrypted counter block */
270		staging_block = _mm_xor_si128(staging_block, X);
271		/* And copy it out */
272		bcopy(&staging_block, out+total, copy_amt);
273		total += copy_amt;
274	}
275	/*
276	 * Allegedly done with it!  Except for the tag.
277	 */
278	s0 = AESNI_ENC(s0, key, nr);
279	staging_block = _mm_xor_si128(s0, rolling_mac);
280	bcopy(&staging_block, tag, tag_length);
281	explicit_bzero(&s0, sizeof(s0));
282	explicit_bzero(&staging_block, sizeof(staging_block));
283	explicit_bzero(&s_x, sizeof(s_x));
284	explicit_bzero(&rolling_mac, sizeof(rolling_mac));
285}
286
287/*
288 * Implement AES CCM+CBC-MAC decryption and authentication.
289 * Returns 0 on failure, 1 on success.
290 *
291 * The primary difference here is that each encrypted block
292 * needs to be hashed&encrypted after it is decrypted (since
293 * the CBC-MAC is based on the plain text).  This means that
294 * we do the decryption twice -- first to verify the tag,
295 * and second to decrypt and copy it out.
296 *
297 * To avoid annoying code copying, we implement the main
298 * loop as a separate function.
299 *
300 * Call with out as NULL to not store the decrypted results;
301 * call with hashp as NULL to not run the authentication.
302 * Calling with neither as NULL does the decryption and
303 * authentication as a single pass (which is not allowed
304 * per the specification, really).
305 *
306 * If hashp is non-NULL, it points to the post-AAD computed
307 * checksum.
308 */
309static void
310decrypt_loop(const unsigned char *in, unsigned char *out, size_t nbytes,
311    __m128i s0, size_t nonce_length, __m128i *macp,
312    const unsigned char *key, int nr)
313{
314	size_t total = 0;
315	__m128i s_x = s0, mac_block;
316	int counter = 1;
317	const size_t L = sizeof(__m128i) - 1 - nonce_length;
318	__m128i pad_block, staging_block;
319
320	/*
321	 * The starting mac (post AAD, if any).
322	 */
323	if (macp != NULL)
324		mac_block = *macp;
325
326	while (total < nbytes) {
327		size_t copy_amt = MIN(nbytes - total, sizeof(staging_block));
328
329		if (copy_amt < sizeof(staging_block)) {
330			staging_block = _mm_setzero_si128();
331		}
332		bcopy(in+total, &staging_block, copy_amt);
333
334		/*
335		 * staging_block has the current block of input data,
336		 * zero-padded if necessary.  This is used in computing
337		 * both the decrypted data, and the authentication tag.
338		 */
339		append_int(counter++, &s_x, L+1);
340		/*
341		 * The tag is computed based on the decrypted data.
342		 */
343		pad_block = AESNI_ENC(s_x, key, nr);
344		if (copy_amt < sizeof(staging_block)) {
345			/*
346			 * Need to pad out pad_block with 0.
347			 * (staging_block was set to 0's above.)
348			 */
349			uint8_t *end_of_buffer = (uint8_t*)&pad_block;
350			bzero(end_of_buffer + copy_amt,
351			    sizeof(pad_block) - copy_amt);
352		}
353		staging_block = _mm_xor_si128(staging_block, pad_block);
354
355		if (out)
356			bcopy(&staging_block, out+total, copy_amt);
357
358		if (macp)
359			mac_block = xor_and_encrypt(mac_block, staging_block,
360			    key, nr);
361		total += copy_amt;
362	}
363
364	if (macp)
365		*macp = mac_block;
366
367	explicit_bzero(&pad_block, sizeof(pad_block));
368	explicit_bzero(&staging_block, sizeof(staging_block));
369	explicit_bzero(&mac_block, sizeof(mac_block));
370}
371
372/*
373 * The exposed decryption routine.  This is practically a
374 * copy of the encryption routine, except that the order
375 * in which the tag is created is changed.
376 * XXX combine the two functions at some point!
377 */
378int
379AES_CCM_decrypt(const unsigned char *in, unsigned char *out,
380		const unsigned char *addt, const unsigned char *nonce,
381		const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen,
382		const unsigned char *key, int nr)
383{
384	static const int tag_length = 16;	/* 128 bits */
385	int L;
386	__m128i s0, rolling_mac, staging_block;
387	uint8_t *byte_ptr;
388
389	if (nbytes == 0 && abytes == 0)
390		return (1);	// No message means no decryption!
391	if (nlen < 0 || nlen > 15)
392		panic("%s: bad nonce length %d", __FUNCTION__, nlen);
393
394	/*
395	 * We need to know how many bytes to use to describe
396	 * the length of the data.  Normally, nlen should be
397	 * 12, which leaves us 3 bytes to do that -- 16mbytes of
398	 * data to encrypt.  But it can be longer or shorter.
399	 */
400	L = sizeof(__m128i) - 1 - nlen;
401
402	/*
403	 * Now, this shouldn't happen, but let's make sure that
404	 * the data length isn't too big.
405	 */
406	if (nbytes > ((1 << (8 * L)) - 1))
407		panic("%s: nbytes is %u, but length field is %d bytes",
408		      __FUNCTION__, nbytes, L);
409	/*
410	 * Clear out the blocks
411	 */
412	s0 = _mm_setzero_si128();
413
414	rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen,
415	    key, nr, nbytes, tag_length);
416	/* s0 has flags, nonce, and then 0 */
417	byte_ptr = (uint8_t*)&s0;
418	byte_ptr[0] = L-1;	/* but the flags byte only has L' */
419	bcopy(nonce, &byte_ptr[1], nlen);
420
421	/*
422	 * Now to cycle through the rest of the data.
423	 */
424	decrypt_loop(in, NULL, nbytes, s0, nlen, &rolling_mac, key, nr);
425
426	/*
427	 * Compare the tag.
428	 */
429	staging_block = _mm_xor_si128(AESNI_ENC(s0, key, nr), rolling_mac);
430	if (timingsafe_bcmp(&staging_block, tag, tag_length) != 0) {
431		return (0);
432	}
433
434	/*
435	 * Push out the decryption results this time.
436	 */
437	decrypt_loop(in, out, nbytes, s0, nlen, NULL, key, nr);
438	return (1);
439}
440