1/*
2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "inner.h"
26
27/*
28 * This is a "reference" implementation of Poly1305 that uses the
29 * generic "i15" code for big integers. It is slow, but it handles all
30 * big-integer operations with generic code, thereby avoiding most
31 * tricky situations with carry propagation and modular reduction.
32 */
33
34/*
35 * Modulus: 2^130-5.
36 */
37static const uint16_t P1305[] = {
38	0x008A,
39	0x7FFB, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x03FF
40};
41
42/*
43 * -p mod 2^15.
44 */
45#define P0I   0x4CCD
46
47/*
48 * R^2 mod p, for conversion to Montgomery representation (R = 2^135,
49 * since we use 9 words of 15 bits each, and 15*9 = 135).
50 */
51static const uint16_t R2[] = {
52	0x008A,
53	0x6400, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
54};
55
56/*
57 * Perform the inner processing of blocks for Poly1305. The "r" array
58 * is in Montgomery representation, while the "a" array is not.
59 */
60static void
61poly1305_inner(uint16_t *a, const uint16_t *r, const void *data, size_t len)
62{
63	const unsigned char *buf;
64
65	buf = data;
66	while (len > 0) {
67		unsigned char tmp[16], rev[16];
68		uint16_t b[10];
69		uint32_t ctl;
70		int i;
71
72		/*
73		 * If there is a partial block, right-pad it with zeros.
74		 */
75		if (len < 16) {
76			memset(tmp, 0, sizeof tmp);
77			memcpy(tmp, buf, len);
78			buf = tmp;
79			len = 16;
80		}
81
82		/*
83		 * Decode next block and apply the "high bit". Since
84		 * decoding is little-endian, we must byte-swap the buffer.
85		 */
86		for (i = 0; i < 16; i ++) {
87			rev[i] = buf[15 - i];
88		}
89		br_i15_decode_mod(b, rev, sizeof rev, P1305);
90		b[9] |= 0x0100;
91
92		/*
93		 * Add the accumulator to the decoded block (modular
94		 * addition).
95		 */
96		ctl = br_i15_add(b, a, 1);
97		ctl |= NOT(br_i15_sub(b, P1305, 0));
98		br_i15_sub(b, P1305, ctl);
99
100		/*
101		 * Multiply by r, result is the new accumulator value.
102		 */
103		br_i15_montymul(a, b, r, P1305, P0I);
104
105		buf += 16;
106		len -= 16;
107	}
108}
109
110/*
111 * Byteswap a 16-byte value.
112 */
113static void
114byteswap16(unsigned char *buf)
115{
116	int i;
117
118	for (i = 0; i < 8; i ++) {
119		unsigned x;
120
121		x = buf[i];
122		buf[i] = buf[15 - i];
123		buf[15 - i] = x;
124	}
125}
126
127/* see bearssl_block.h */
128void
129br_poly1305_i15_run(const void *key, const void *iv,
130	void *data, size_t len, const void *aad, size_t aad_len,
131	void *tag, br_chacha20_run ichacha, int encrypt)
132{
133	unsigned char pkey[32], foot[16];
134	uint16_t t[10], r[10], acc[10];
135
136	/*
137	 * Compute the MAC key. The 'r' value is the first 16 bytes of
138	 * pkey[].
139	 */
140	memset(pkey, 0, sizeof pkey);
141	ichacha(key, iv, 0, pkey, sizeof pkey);
142
143	/*
144	 * If encrypting, ChaCha20 must run first, followed by Poly1305.
145	 * When decrypting, the operations are reversed.
146	 */
147	if (encrypt) {
148		ichacha(key, iv, 1, data, len);
149	}
150
151	/*
152	 * Run Poly1305. We must process the AAD, then ciphertext, then
153	 * the footer (with the lengths). Note that the AAD and ciphertext
154	 * are meant to be padded with zeros up to the next multiple of 16,
155	 * and the length of the footer is 16 bytes as well.
156	 */
157
158	/*
159	 * Apply the "clamping" operation on the encoded 'r' value.
160	 */
161	pkey[ 3] &= 0x0F;
162	pkey[ 7] &= 0x0F;
163	pkey[11] &= 0x0F;
164	pkey[15] &= 0x0F;
165	pkey[ 4] &= 0xFC;
166	pkey[ 8] &= 0xFC;
167	pkey[12] &= 0xFC;
168
169	/*
170	 * Decode the clamped 'r' value. Decoding should use little-endian
171	 * so we must byteswap the value first.
172	 */
173	byteswap16(pkey);
174	br_i15_decode_mod(t, pkey, 16, P1305);
175
176	/*
177	 * Convert 'r' to Montgomery representation.
178	 */
179	br_i15_montymul(r, t, R2, P1305, P0I);
180
181	/*
182	 * Accumulator is 0.
183	 */
184	br_i15_zero(acc, 0x8A);
185
186	/*
187	 * Process the additional authenticated data, ciphertext, and
188	 * footer in due order.
189	 */
190	br_enc64le(foot, (uint64_t)aad_len);
191	br_enc64le(foot + 8, (uint64_t)len);
192	poly1305_inner(acc, r, aad, aad_len);
193	poly1305_inner(acc, r, data, len);
194	poly1305_inner(acc, r, foot, sizeof foot);
195
196	/*
197	 * Decode the value 's'. Again, a byteswap is needed.
198	 */
199	byteswap16(pkey + 16);
200	br_i15_decode_mod(t, pkey + 16, 16, P1305);
201
202	/*
203	 * Add the value 's' to the accumulator. That addition is done
204	 * modulo 2^128, so we just ignore the carry.
205	 */
206	br_i15_add(acc, t, 1);
207
208	/*
209	 * Encode the result (128 low bits) to the tag. Encoding should
210	 * be little-endian.
211	 */
212	br_i15_encode(tag, 16, acc);
213	byteswap16(tag);
214
215	/*
216	 * If decrypting, then ChaCha20 runs _after_ Poly1305.
217	 */
218	if (!encrypt) {
219		ichacha(key, iv, 1, data, len);
220	}
221}
222