1/*
2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#define BR_POWER_ASM_MACROS   1
26#include "inner.h"
27
28#if BR_POWER8
29
30/* see bearssl_block.h */
31void
32br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
33	const void *key, size_t len)
34{
35	ctx->vtable = &br_aes_pwr8_cbcenc_vtable;
36	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
37}
38
39static void
40cbcenc_128(const unsigned char *sk,
41	const unsigned char *iv, unsigned char *buf, size_t len)
42{
43	long cc;
44
45#if BR_POWER8_LE
46	static const uint32_t idx2be[] = {
47		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
48	};
49#endif
50
51	cc = 0;
52	asm volatile (
53
54		/*
55		 * Load subkeys into v0..v10
56		 */
57		lxvw4x(32, %[cc], %[sk])
58		addi(%[cc], %[cc], 16)
59		lxvw4x(33, %[cc], %[sk])
60		addi(%[cc], %[cc], 16)
61		lxvw4x(34, %[cc], %[sk])
62		addi(%[cc], %[cc], 16)
63		lxvw4x(35, %[cc], %[sk])
64		addi(%[cc], %[cc], 16)
65		lxvw4x(36, %[cc], %[sk])
66		addi(%[cc], %[cc], 16)
67		lxvw4x(37, %[cc], %[sk])
68		addi(%[cc], %[cc], 16)
69		lxvw4x(38, %[cc], %[sk])
70		addi(%[cc], %[cc], 16)
71		lxvw4x(39, %[cc], %[sk])
72		addi(%[cc], %[cc], 16)
73		lxvw4x(40, %[cc], %[sk])
74		addi(%[cc], %[cc], 16)
75		lxvw4x(41, %[cc], %[sk])
76		addi(%[cc], %[cc], 16)
77		lxvw4x(42, %[cc], %[sk])
78
79#if BR_POWER8_LE
80		/*
81		 * v15 = constant for byteswapping words
82		 */
83		lxvw4x(47, 0, %[idx2be])
84#endif
85		/*
86		 * Load IV into v16.
87		 */
88		lxvw4x(48, 0, %[iv])
89#if BR_POWER8_LE
90		vperm(16, 16, 16, 15)
91#endif
92
93		mtctr(%[num_blocks])
94	label(loop)
95		/*
96		 * Load next plaintext word and XOR with current IV.
97		 */
98		lxvw4x(49, 0, %[buf])
99#if BR_POWER8_LE
100		vperm(17, 17, 17, 15)
101#endif
102		vxor(16, 16, 17)
103
104		/*
105		 * Encrypt the block.
106		 */
107		vxor(16, 16, 0)
108		vcipher(16, 16, 1)
109		vcipher(16, 16, 2)
110		vcipher(16, 16, 3)
111		vcipher(16, 16, 4)
112		vcipher(16, 16, 5)
113		vcipher(16, 16, 6)
114		vcipher(16, 16, 7)
115		vcipher(16, 16, 8)
116		vcipher(16, 16, 9)
117		vcipherlast(16, 16, 10)
118
119		/*
120		 * Store back result (with byteswap)
121		 */
122#if BR_POWER8_LE
123		vperm(17, 16, 16, 15)
124		stxvw4x(49, 0, %[buf])
125#else
126		stxvw4x(48, 0, %[buf])
127#endif
128		addi(%[buf], %[buf], 16)
129
130		bdnz(loop)
131
132: [cc] "+b" (cc), [buf] "+b" (buf)
133: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
134#if BR_POWER8_LE
135	, [idx2be] "b" (idx2be)
136#endif
137: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
138  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
139  "ctr", "memory"
140	);
141}
142
143static void
144cbcenc_192(const unsigned char *sk,
145	const unsigned char *iv, unsigned char *buf, size_t len)
146{
147	long cc;
148
149#if BR_POWER8_LE
150	static const uint32_t idx2be[] = {
151		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
152	};
153#endif
154
155	cc = 0;
156	asm volatile (
157
158		/*
159		 * Load subkeys into v0..v12
160		 */
161		lxvw4x(32, %[cc], %[sk])
162		addi(%[cc], %[cc], 16)
163		lxvw4x(33, %[cc], %[sk])
164		addi(%[cc], %[cc], 16)
165		lxvw4x(34, %[cc], %[sk])
166		addi(%[cc], %[cc], 16)
167		lxvw4x(35, %[cc], %[sk])
168		addi(%[cc], %[cc], 16)
169		lxvw4x(36, %[cc], %[sk])
170		addi(%[cc], %[cc], 16)
171		lxvw4x(37, %[cc], %[sk])
172		addi(%[cc], %[cc], 16)
173		lxvw4x(38, %[cc], %[sk])
174		addi(%[cc], %[cc], 16)
175		lxvw4x(39, %[cc], %[sk])
176		addi(%[cc], %[cc], 16)
177		lxvw4x(40, %[cc], %[sk])
178		addi(%[cc], %[cc], 16)
179		lxvw4x(41, %[cc], %[sk])
180		addi(%[cc], %[cc], 16)
181		lxvw4x(42, %[cc], %[sk])
182		addi(%[cc], %[cc], 16)
183		lxvw4x(43, %[cc], %[sk])
184		addi(%[cc], %[cc], 16)
185		lxvw4x(44, %[cc], %[sk])
186
187#if BR_POWER8_LE
188		/*
189		 * v15 = constant for byteswapping words
190		 */
191		lxvw4x(47, 0, %[idx2be])
192#endif
193		/*
194		 * Load IV into v16.
195		 */
196		lxvw4x(48, 0, %[iv])
197#if BR_POWER8_LE
198		vperm(16, 16, 16, 15)
199#endif
200
201		mtctr(%[num_blocks])
202	label(loop)
203		/*
204		 * Load next plaintext word and XOR with current IV.
205		 */
206		lxvw4x(49, 0, %[buf])
207#if BR_POWER8_LE
208		vperm(17, 17, 17, 15)
209#endif
210		vxor(16, 16, 17)
211
212		/*
213		 * Encrypt the block.
214		 */
215		vxor(16, 16, 0)
216		vcipher(16, 16, 1)
217		vcipher(16, 16, 2)
218		vcipher(16, 16, 3)
219		vcipher(16, 16, 4)
220		vcipher(16, 16, 5)
221		vcipher(16, 16, 6)
222		vcipher(16, 16, 7)
223		vcipher(16, 16, 8)
224		vcipher(16, 16, 9)
225		vcipher(16, 16, 10)
226		vcipher(16, 16, 11)
227		vcipherlast(16, 16, 12)
228
229		/*
230		 * Store back result (with byteswap)
231		 */
232#if BR_POWER8_LE
233		vperm(17, 16, 16, 15)
234		stxvw4x(49, 0, %[buf])
235#else
236		stxvw4x(48, 0, %[buf])
237#endif
238		addi(%[buf], %[buf], 16)
239
240		bdnz(loop)
241
242: [cc] "+b" (cc), [buf] "+b" (buf)
243: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
244#if BR_POWER8_LE
245	, [idx2be] "b" (idx2be)
246#endif
247: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
248  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
249  "ctr", "memory"
250	);
251}
252
253static void
254cbcenc_256(const unsigned char *sk,
255	const unsigned char *iv, unsigned char *buf, size_t len)
256{
257	long cc;
258
259#if BR_POWER8_LE
260	static const uint32_t idx2be[] = {
261		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
262	};
263#endif
264
265	cc = 0;
266	asm volatile (
267
268		/*
269		 * Load subkeys into v0..v14
270		 */
271		lxvw4x(32, %[cc], %[sk])
272		addi(%[cc], %[cc], 16)
273		lxvw4x(33, %[cc], %[sk])
274		addi(%[cc], %[cc], 16)
275		lxvw4x(34, %[cc], %[sk])
276		addi(%[cc], %[cc], 16)
277		lxvw4x(35, %[cc], %[sk])
278		addi(%[cc], %[cc], 16)
279		lxvw4x(36, %[cc], %[sk])
280		addi(%[cc], %[cc], 16)
281		lxvw4x(37, %[cc], %[sk])
282		addi(%[cc], %[cc], 16)
283		lxvw4x(38, %[cc], %[sk])
284		addi(%[cc], %[cc], 16)
285		lxvw4x(39, %[cc], %[sk])
286		addi(%[cc], %[cc], 16)
287		lxvw4x(40, %[cc], %[sk])
288		addi(%[cc], %[cc], 16)
289		lxvw4x(41, %[cc], %[sk])
290		addi(%[cc], %[cc], 16)
291		lxvw4x(42, %[cc], %[sk])
292		addi(%[cc], %[cc], 16)
293		lxvw4x(43, %[cc], %[sk])
294		addi(%[cc], %[cc], 16)
295		lxvw4x(44, %[cc], %[sk])
296		addi(%[cc], %[cc], 16)
297		lxvw4x(45, %[cc], %[sk])
298		addi(%[cc], %[cc], 16)
299		lxvw4x(46, %[cc], %[sk])
300
301#if BR_POWER8_LE
302		/*
303		 * v15 = constant for byteswapping words
304		 */
305		lxvw4x(47, 0, %[idx2be])
306#endif
307		/*
308		 * Load IV into v16.
309		 */
310		lxvw4x(48, 0, %[iv])
311#if BR_POWER8_LE
312		vperm(16, 16, 16, 15)
313#endif
314
315		mtctr(%[num_blocks])
316	label(loop)
317		/*
318		 * Load next plaintext word and XOR with current IV.
319		 */
320		lxvw4x(49, 0, %[buf])
321#if BR_POWER8_LE
322		vperm(17, 17, 17, 15)
323#endif
324		vxor(16, 16, 17)
325
326		/*
327		 * Encrypt the block.
328		 */
329		vxor(16, 16, 0)
330		vcipher(16, 16, 1)
331		vcipher(16, 16, 2)
332		vcipher(16, 16, 3)
333		vcipher(16, 16, 4)
334		vcipher(16, 16, 5)
335		vcipher(16, 16, 6)
336		vcipher(16, 16, 7)
337		vcipher(16, 16, 8)
338		vcipher(16, 16, 9)
339		vcipher(16, 16, 10)
340		vcipher(16, 16, 11)
341		vcipher(16, 16, 12)
342		vcipher(16, 16, 13)
343		vcipherlast(16, 16, 14)
344
345		/*
346		 * Store back result (with byteswap)
347		 */
348#if BR_POWER8_LE
349		vperm(17, 16, 16, 15)
350		stxvw4x(49, 0, %[buf])
351#else
352		stxvw4x(48, 0, %[buf])
353#endif
354		addi(%[buf], %[buf], 16)
355
356		bdnz(loop)
357
358: [cc] "+b" (cc), [buf] "+b" (buf)
359: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
360#if BR_POWER8_LE
361	, [idx2be] "b" (idx2be)
362#endif
363: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
364  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
365  "ctr", "memory"
366	);
367}
368
369/* see bearssl_block.h */
370void
371br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx,
372	void *iv, void *data, size_t len)
373{
374	if (len > 0) {
375		switch (ctx->num_rounds) {
376		case 10:
377			cbcenc_128(ctx->skey.skni, iv, data, len);
378			break;
379		case 12:
380			cbcenc_192(ctx->skey.skni, iv, data, len);
381			break;
382		default:
383			cbcenc_256(ctx->skey.skni, iv, data, len);
384			break;
385		}
386		memcpy(iv, (unsigned char *)data + (len - 16), 16);
387	}
388}
389
390/* see bearssl_block.h */
391const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable = {
392	sizeof(br_aes_pwr8_cbcenc_keys),
393	16,
394	4,
395	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
396		&br_aes_pwr8_cbcenc_init,
397	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
398		&br_aes_pwr8_cbcenc_run
399};
400
401/* see bearssl_block.h */
402const br_block_cbcenc_class *
403br_aes_pwr8_cbcenc_get_vtable(void)
404{
405	return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcenc_vtable : NULL;
406}
407
408#else
409
410/* see bearssl_block.h */
411const br_block_cbcenc_class *
412br_aes_pwr8_cbcenc_get_vtable(void)
413{
414	return NULL;
415}
416
417#endif
418