1/*
2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#define BR_POWER_ASM_MACROS   1
26#include "inner.h"
27
28#if BR_POWER8
29
30/* see bearssl_block.h */
31void
32br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,
33	const void *key, size_t len)
34{
35	ctx->vtable = &br_aes_pwr8_ctr_vtable;
36	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
37}
38
39static void
40ctr_128(const unsigned char *sk, const unsigned char *ivbuf,
41	unsigned char *buf, size_t num_blocks)
42{
43	long cc0, cc1, cc2, cc3;
44
45#if BR_POWER8_LE
46	static const uint32_t idx2be[] = {
47		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
48	};
49#endif
50	static const uint32_t ctrinc[] = {
51		0, 0, 0, 4
52	};
53
54	cc0 = 0;
55	cc1 = 16;
56	cc2 = 32;
57	cc3 = 48;
58	asm volatile (
59
60		/*
61		 * Load subkeys into v0..v10
62		 */
63		lxvw4x(32, %[cc0], %[sk])
64		addi(%[cc0], %[cc0], 16)
65		lxvw4x(33, %[cc0], %[sk])
66		addi(%[cc0], %[cc0], 16)
67		lxvw4x(34, %[cc0], %[sk])
68		addi(%[cc0], %[cc0], 16)
69		lxvw4x(35, %[cc0], %[sk])
70		addi(%[cc0], %[cc0], 16)
71		lxvw4x(36, %[cc0], %[sk])
72		addi(%[cc0], %[cc0], 16)
73		lxvw4x(37, %[cc0], %[sk])
74		addi(%[cc0], %[cc0], 16)
75		lxvw4x(38, %[cc0], %[sk])
76		addi(%[cc0], %[cc0], 16)
77		lxvw4x(39, %[cc0], %[sk])
78		addi(%[cc0], %[cc0], 16)
79		lxvw4x(40, %[cc0], %[sk])
80		addi(%[cc0], %[cc0], 16)
81		lxvw4x(41, %[cc0], %[sk])
82		addi(%[cc0], %[cc0], 16)
83		lxvw4x(42, %[cc0], %[sk])
84		li(%[cc0], 0)
85
86#if BR_POWER8_LE
87		/*
88		 * v15 = constant for byteswapping words
89		 */
90		lxvw4x(47, 0, %[idx2be])
91#endif
92		/*
93		 * v28 = increment for IV counter.
94		 */
95		lxvw4x(60, 0, %[ctrinc])
96
97		/*
98		 * Load IV into v16..v19
99		 */
100		lxvw4x(48, %[cc0], %[ivbuf])
101		lxvw4x(49, %[cc1], %[ivbuf])
102		lxvw4x(50, %[cc2], %[ivbuf])
103		lxvw4x(51, %[cc3], %[ivbuf])
104#if BR_POWER8_LE
105		vperm(16, 16, 16, 15)
106		vperm(17, 17, 17, 15)
107		vperm(18, 18, 18, 15)
108		vperm(19, 19, 19, 15)
109#endif
110
111		mtctr(%[num_blocks])
112	label(loop)
113		/*
114		 * Compute next IV into v24..v27
115		 */
116		vadduwm(24, 16, 28)
117		vadduwm(25, 17, 28)
118		vadduwm(26, 18, 28)
119		vadduwm(27, 19, 28)
120
121		/*
122		 * Load next data blocks. We do this early on but we
123		 * won't need them until IV encryption is done.
124		 */
125		lxvw4x(52, %[cc0], %[buf])
126		lxvw4x(53, %[cc1], %[buf])
127		lxvw4x(54, %[cc2], %[buf])
128		lxvw4x(55, %[cc3], %[buf])
129
130		/*
131		 * Encrypt the current IV.
132		 */
133		vxor(16, 16, 0)
134		vxor(17, 17, 0)
135		vxor(18, 18, 0)
136		vxor(19, 19, 0)
137		vcipher(16, 16, 1)
138		vcipher(17, 17, 1)
139		vcipher(18, 18, 1)
140		vcipher(19, 19, 1)
141		vcipher(16, 16, 2)
142		vcipher(17, 17, 2)
143		vcipher(18, 18, 2)
144		vcipher(19, 19, 2)
145		vcipher(16, 16, 3)
146		vcipher(17, 17, 3)
147		vcipher(18, 18, 3)
148		vcipher(19, 19, 3)
149		vcipher(16, 16, 4)
150		vcipher(17, 17, 4)
151		vcipher(18, 18, 4)
152		vcipher(19, 19, 4)
153		vcipher(16, 16, 5)
154		vcipher(17, 17, 5)
155		vcipher(18, 18, 5)
156		vcipher(19, 19, 5)
157		vcipher(16, 16, 6)
158		vcipher(17, 17, 6)
159		vcipher(18, 18, 6)
160		vcipher(19, 19, 6)
161		vcipher(16, 16, 7)
162		vcipher(17, 17, 7)
163		vcipher(18, 18, 7)
164		vcipher(19, 19, 7)
165		vcipher(16, 16, 8)
166		vcipher(17, 17, 8)
167		vcipher(18, 18, 8)
168		vcipher(19, 19, 8)
169		vcipher(16, 16, 9)
170		vcipher(17, 17, 9)
171		vcipher(18, 18, 9)
172		vcipher(19, 19, 9)
173		vcipherlast(16, 16, 10)
174		vcipherlast(17, 17, 10)
175		vcipherlast(18, 18, 10)
176		vcipherlast(19, 19, 10)
177
178#if BR_POWER8_LE
179		vperm(16, 16, 16, 15)
180		vperm(17, 17, 17, 15)
181		vperm(18, 18, 18, 15)
182		vperm(19, 19, 19, 15)
183#endif
184
185		/*
186		 * Load next plaintext word and XOR with encrypted IV.
187		 */
188		vxor(16, 20, 16)
189		vxor(17, 21, 17)
190		vxor(18, 22, 18)
191		vxor(19, 23, 19)
192		stxvw4x(48, %[cc0], %[buf])
193		stxvw4x(49, %[cc1], %[buf])
194		stxvw4x(50, %[cc2], %[buf])
195		stxvw4x(51, %[cc3], %[buf])
196
197		addi(%[buf], %[buf], 64)
198
199		/*
200		 * Update IV.
201		 */
202		vand(16, 24, 24)
203		vand(17, 25, 25)
204		vand(18, 26, 26)
205		vand(19, 27, 27)
206
207		bdnz(loop)
208
209: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
210  [buf] "+b" (buf)
211: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
212  [ctrinc] "b" (ctrinc)
213#if BR_POWER8_LE
214	, [idx2be] "b" (idx2be)
215#endif
216: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
217  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
218  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
219  "ctr", "memory"
220	);
221}
222
223static void
224ctr_192(const unsigned char *sk, const unsigned char *ivbuf,
225	unsigned char *buf, size_t num_blocks)
226{
227	long cc0, cc1, cc2, cc3;
228
229#if BR_POWER8_LE
230	static const uint32_t idx2be[] = {
231		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
232	};
233#endif
234	static const uint32_t ctrinc[] = {
235		0, 0, 0, 4
236	};
237
238	cc0 = 0;
239	cc1 = 16;
240	cc2 = 32;
241	cc3 = 48;
242	asm volatile (
243
244		/*
245		 * Load subkeys into v0..v12
246		 */
247		lxvw4x(32, %[cc0], %[sk])
248		addi(%[cc0], %[cc0], 16)
249		lxvw4x(33, %[cc0], %[sk])
250		addi(%[cc0], %[cc0], 16)
251		lxvw4x(34, %[cc0], %[sk])
252		addi(%[cc0], %[cc0], 16)
253		lxvw4x(35, %[cc0], %[sk])
254		addi(%[cc0], %[cc0], 16)
255		lxvw4x(36, %[cc0], %[sk])
256		addi(%[cc0], %[cc0], 16)
257		lxvw4x(37, %[cc0], %[sk])
258		addi(%[cc0], %[cc0], 16)
259		lxvw4x(38, %[cc0], %[sk])
260		addi(%[cc0], %[cc0], 16)
261		lxvw4x(39, %[cc0], %[sk])
262		addi(%[cc0], %[cc0], 16)
263		lxvw4x(40, %[cc0], %[sk])
264		addi(%[cc0], %[cc0], 16)
265		lxvw4x(41, %[cc0], %[sk])
266		addi(%[cc0], %[cc0], 16)
267		lxvw4x(42, %[cc0], %[sk])
268		addi(%[cc0], %[cc0], 16)
269		lxvw4x(43, %[cc0], %[sk])
270		addi(%[cc0], %[cc0], 16)
271		lxvw4x(44, %[cc0], %[sk])
272		li(%[cc0], 0)
273
274#if BR_POWER8_LE
275		/*
276		 * v15 = constant for byteswapping words
277		 */
278		lxvw4x(47, 0, %[idx2be])
279#endif
280		/*
281		 * v28 = increment for IV counter.
282		 */
283		lxvw4x(60, 0, %[ctrinc])
284
285		/*
286		 * Load IV into v16..v19
287		 */
288		lxvw4x(48, %[cc0], %[ivbuf])
289		lxvw4x(49, %[cc1], %[ivbuf])
290		lxvw4x(50, %[cc2], %[ivbuf])
291		lxvw4x(51, %[cc3], %[ivbuf])
292#if BR_POWER8_LE
293		vperm(16, 16, 16, 15)
294		vperm(17, 17, 17, 15)
295		vperm(18, 18, 18, 15)
296		vperm(19, 19, 19, 15)
297#endif
298
299		mtctr(%[num_blocks])
300	label(loop)
301		/*
302		 * Compute next IV into v24..v27
303		 */
304		vadduwm(24, 16, 28)
305		vadduwm(25, 17, 28)
306		vadduwm(26, 18, 28)
307		vadduwm(27, 19, 28)
308
309		/*
310		 * Load next data blocks. We do this early on but we
311		 * won't need them until IV encryption is done.
312		 */
313		lxvw4x(52, %[cc0], %[buf])
314		lxvw4x(53, %[cc1], %[buf])
315		lxvw4x(54, %[cc2], %[buf])
316		lxvw4x(55, %[cc3], %[buf])
317
318		/*
319		 * Encrypt the current IV.
320		 */
321		vxor(16, 16, 0)
322		vxor(17, 17, 0)
323		vxor(18, 18, 0)
324		vxor(19, 19, 0)
325		vcipher(16, 16, 1)
326		vcipher(17, 17, 1)
327		vcipher(18, 18, 1)
328		vcipher(19, 19, 1)
329		vcipher(16, 16, 2)
330		vcipher(17, 17, 2)
331		vcipher(18, 18, 2)
332		vcipher(19, 19, 2)
333		vcipher(16, 16, 3)
334		vcipher(17, 17, 3)
335		vcipher(18, 18, 3)
336		vcipher(19, 19, 3)
337		vcipher(16, 16, 4)
338		vcipher(17, 17, 4)
339		vcipher(18, 18, 4)
340		vcipher(19, 19, 4)
341		vcipher(16, 16, 5)
342		vcipher(17, 17, 5)
343		vcipher(18, 18, 5)
344		vcipher(19, 19, 5)
345		vcipher(16, 16, 6)
346		vcipher(17, 17, 6)
347		vcipher(18, 18, 6)
348		vcipher(19, 19, 6)
349		vcipher(16, 16, 7)
350		vcipher(17, 17, 7)
351		vcipher(18, 18, 7)
352		vcipher(19, 19, 7)
353		vcipher(16, 16, 8)
354		vcipher(17, 17, 8)
355		vcipher(18, 18, 8)
356		vcipher(19, 19, 8)
357		vcipher(16, 16, 9)
358		vcipher(17, 17, 9)
359		vcipher(18, 18, 9)
360		vcipher(19, 19, 9)
361		vcipher(16, 16, 10)
362		vcipher(17, 17, 10)
363		vcipher(18, 18, 10)
364		vcipher(19, 19, 10)
365		vcipher(16, 16, 11)
366		vcipher(17, 17, 11)
367		vcipher(18, 18, 11)
368		vcipher(19, 19, 11)
369		vcipherlast(16, 16, 12)
370		vcipherlast(17, 17, 12)
371		vcipherlast(18, 18, 12)
372		vcipherlast(19, 19, 12)
373
374#if BR_POWER8_LE
375		vperm(16, 16, 16, 15)
376		vperm(17, 17, 17, 15)
377		vperm(18, 18, 18, 15)
378		vperm(19, 19, 19, 15)
379#endif
380
381		/*
382		 * Load next plaintext word and XOR with encrypted IV.
383		 */
384		vxor(16, 20, 16)
385		vxor(17, 21, 17)
386		vxor(18, 22, 18)
387		vxor(19, 23, 19)
388		stxvw4x(48, %[cc0], %[buf])
389		stxvw4x(49, %[cc1], %[buf])
390		stxvw4x(50, %[cc2], %[buf])
391		stxvw4x(51, %[cc3], %[buf])
392
393		addi(%[buf], %[buf], 64)
394
395		/*
396		 * Update IV.
397		 */
398		vand(16, 24, 24)
399		vand(17, 25, 25)
400		vand(18, 26, 26)
401		vand(19, 27, 27)
402
403		bdnz(loop)
404
405: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
406  [buf] "+b" (buf)
407: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
408  [ctrinc] "b" (ctrinc)
409#if BR_POWER8_LE
410	, [idx2be] "b" (idx2be)
411#endif
412: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
413  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
414  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
415  "ctr", "memory"
416	);
417}
418
419static void
420ctr_256(const unsigned char *sk, const unsigned char *ivbuf,
421	unsigned char *buf, size_t num_blocks)
422{
423	long cc0, cc1, cc2, cc3;
424
425#if BR_POWER8_LE
426	static const uint32_t idx2be[] = {
427		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
428	};
429#endif
430	static const uint32_t ctrinc[] = {
431		0, 0, 0, 4
432	};
433
434	cc0 = 0;
435	cc1 = 16;
436	cc2 = 32;
437	cc3 = 48;
438	asm volatile (
439
440		/*
441		 * Load subkeys into v0..v14
442		 */
443		lxvw4x(32, %[cc0], %[sk])
444		addi(%[cc0], %[cc0], 16)
445		lxvw4x(33, %[cc0], %[sk])
446		addi(%[cc0], %[cc0], 16)
447		lxvw4x(34, %[cc0], %[sk])
448		addi(%[cc0], %[cc0], 16)
449		lxvw4x(35, %[cc0], %[sk])
450		addi(%[cc0], %[cc0], 16)
451		lxvw4x(36, %[cc0], %[sk])
452		addi(%[cc0], %[cc0], 16)
453		lxvw4x(37, %[cc0], %[sk])
454		addi(%[cc0], %[cc0], 16)
455		lxvw4x(38, %[cc0], %[sk])
456		addi(%[cc0], %[cc0], 16)
457		lxvw4x(39, %[cc0], %[sk])
458		addi(%[cc0], %[cc0], 16)
459		lxvw4x(40, %[cc0], %[sk])
460		addi(%[cc0], %[cc0], 16)
461		lxvw4x(41, %[cc0], %[sk])
462		addi(%[cc0], %[cc0], 16)
463		lxvw4x(42, %[cc0], %[sk])
464		addi(%[cc0], %[cc0], 16)
465		lxvw4x(43, %[cc0], %[sk])
466		addi(%[cc0], %[cc0], 16)
467		lxvw4x(44, %[cc0], %[sk])
468		addi(%[cc0], %[cc0], 16)
469		lxvw4x(45, %[cc0], %[sk])
470		addi(%[cc0], %[cc0], 16)
471		lxvw4x(46, %[cc0], %[sk])
472		li(%[cc0], 0)
473
474#if BR_POWER8_LE
475		/*
476		 * v15 = constant for byteswapping words
477		 */
478		lxvw4x(47, 0, %[idx2be])
479#endif
480		/*
481		 * v28 = increment for IV counter.
482		 */
483		lxvw4x(60, 0, %[ctrinc])
484
485		/*
486		 * Load IV into v16..v19
487		 */
488		lxvw4x(48, %[cc0], %[ivbuf])
489		lxvw4x(49, %[cc1], %[ivbuf])
490		lxvw4x(50, %[cc2], %[ivbuf])
491		lxvw4x(51, %[cc3], %[ivbuf])
492#if BR_POWER8_LE
493		vperm(16, 16, 16, 15)
494		vperm(17, 17, 17, 15)
495		vperm(18, 18, 18, 15)
496		vperm(19, 19, 19, 15)
497#endif
498
499		mtctr(%[num_blocks])
500	label(loop)
501		/*
502		 * Compute next IV into v24..v27
503		 */
504		vadduwm(24, 16, 28)
505		vadduwm(25, 17, 28)
506		vadduwm(26, 18, 28)
507		vadduwm(27, 19, 28)
508
509		/*
510		 * Load next data blocks. We do this early on but we
511		 * won't need them until IV encryption is done.
512		 */
513		lxvw4x(52, %[cc0], %[buf])
514		lxvw4x(53, %[cc1], %[buf])
515		lxvw4x(54, %[cc2], %[buf])
516		lxvw4x(55, %[cc3], %[buf])
517
518		/*
519		 * Encrypt the current IV.
520		 */
521		vxor(16, 16, 0)
522		vxor(17, 17, 0)
523		vxor(18, 18, 0)
524		vxor(19, 19, 0)
525		vcipher(16, 16, 1)
526		vcipher(17, 17, 1)
527		vcipher(18, 18, 1)
528		vcipher(19, 19, 1)
529		vcipher(16, 16, 2)
530		vcipher(17, 17, 2)
531		vcipher(18, 18, 2)
532		vcipher(19, 19, 2)
533		vcipher(16, 16, 3)
534		vcipher(17, 17, 3)
535		vcipher(18, 18, 3)
536		vcipher(19, 19, 3)
537		vcipher(16, 16, 4)
538		vcipher(17, 17, 4)
539		vcipher(18, 18, 4)
540		vcipher(19, 19, 4)
541		vcipher(16, 16, 5)
542		vcipher(17, 17, 5)
543		vcipher(18, 18, 5)
544		vcipher(19, 19, 5)
545		vcipher(16, 16, 6)
546		vcipher(17, 17, 6)
547		vcipher(18, 18, 6)
548		vcipher(19, 19, 6)
549		vcipher(16, 16, 7)
550		vcipher(17, 17, 7)
551		vcipher(18, 18, 7)
552		vcipher(19, 19, 7)
553		vcipher(16, 16, 8)
554		vcipher(17, 17, 8)
555		vcipher(18, 18, 8)
556		vcipher(19, 19, 8)
557		vcipher(16, 16, 9)
558		vcipher(17, 17, 9)
559		vcipher(18, 18, 9)
560		vcipher(19, 19, 9)
561		vcipher(16, 16, 10)
562		vcipher(17, 17, 10)
563		vcipher(18, 18, 10)
564		vcipher(19, 19, 10)
565		vcipher(16, 16, 11)
566		vcipher(17, 17, 11)
567		vcipher(18, 18, 11)
568		vcipher(19, 19, 11)
569		vcipher(16, 16, 12)
570		vcipher(17, 17, 12)
571		vcipher(18, 18, 12)
572		vcipher(19, 19, 12)
573		vcipher(16, 16, 13)
574		vcipher(17, 17, 13)
575		vcipher(18, 18, 13)
576		vcipher(19, 19, 13)
577		vcipherlast(16, 16, 14)
578		vcipherlast(17, 17, 14)
579		vcipherlast(18, 18, 14)
580		vcipherlast(19, 19, 14)
581
582#if BR_POWER8_LE
583		vperm(16, 16, 16, 15)
584		vperm(17, 17, 17, 15)
585		vperm(18, 18, 18, 15)
586		vperm(19, 19, 19, 15)
587#endif
588
589		/*
590		 * Load next plaintext word and XOR with encrypted IV.
591		 */
592		vxor(16, 20, 16)
593		vxor(17, 21, 17)
594		vxor(18, 22, 18)
595		vxor(19, 23, 19)
596		stxvw4x(48, %[cc0], %[buf])
597		stxvw4x(49, %[cc1], %[buf])
598		stxvw4x(50, %[cc2], %[buf])
599		stxvw4x(51, %[cc3], %[buf])
600
601		addi(%[buf], %[buf], 64)
602
603		/*
604		 * Update IV.
605		 */
606		vand(16, 24, 24)
607		vand(17, 25, 25)
608		vand(18, 26, 26)
609		vand(19, 27, 27)
610
611		bdnz(loop)
612
613: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
614  [buf] "+b" (buf)
615: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
616  [ctrinc] "b" (ctrinc)
617#if BR_POWER8_LE
618	, [idx2be] "b" (idx2be)
619#endif
620: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
621  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
622  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
623  "ctr", "memory"
624	);
625}
626
627/* see bearssl_block.h */
628uint32_t
629br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,
630	const void *iv, uint32_t cc, void *data, size_t len)
631{
632	unsigned char *buf;
633	unsigned char ivbuf[64];
634
635	buf = data;
636	memcpy(ivbuf +  0, iv, 12);
637	memcpy(ivbuf + 16, iv, 12);
638	memcpy(ivbuf + 32, iv, 12);
639	memcpy(ivbuf + 48, iv, 12);
640	if (len >= 64) {
641		br_enc32be(ivbuf + 12, cc + 0);
642		br_enc32be(ivbuf + 28, cc + 1);
643		br_enc32be(ivbuf + 44, cc + 2);
644		br_enc32be(ivbuf + 60, cc + 3);
645		switch (ctx->num_rounds) {
646		case 10:
647			ctr_128(ctx->skey.skni, ivbuf, buf,
648				(len >> 4) & ~(size_t)3);
649			break;
650		case 12:
651			ctr_192(ctx->skey.skni, ivbuf, buf,
652				(len >> 4) & ~(size_t)3);
653			break;
654		default:
655			ctr_256(ctx->skey.skni, ivbuf, buf,
656				(len >> 4) & ~(size_t)3);
657			break;
658		}
659		cc += (len >> 4) & ~(size_t)3;
660		buf += len & ~(size_t)63;
661		len &= 63;
662	}
663	if (len > 0) {
664		unsigned char tmp[64];
665
666		memcpy(tmp, buf, len);
667		memset(tmp + len, 0, (sizeof tmp) - len);
668		br_enc32be(ivbuf + 12, cc + 0);
669		br_enc32be(ivbuf + 28, cc + 1);
670		br_enc32be(ivbuf + 44, cc + 2);
671		br_enc32be(ivbuf + 60, cc + 3);
672		switch (ctx->num_rounds) {
673		case 10:
674			ctr_128(ctx->skey.skni, ivbuf, tmp, 4);
675			break;
676		case 12:
677			ctr_192(ctx->skey.skni, ivbuf, tmp, 4);
678			break;
679		default:
680			ctr_256(ctx->skey.skni, ivbuf, tmp, 4);
681			break;
682		}
683		memcpy(buf, tmp, len);
684		cc += (len + 15) >> 4;
685	}
686	return cc;
687}
688
689/* see bearssl_block.h */
690const br_block_ctr_class br_aes_pwr8_ctr_vtable = {
691	sizeof(br_aes_pwr8_ctr_keys),
692	16,
693	4,
694	(void (*)(const br_block_ctr_class **, const void *, size_t))
695		&br_aes_pwr8_ctr_init,
696	(uint32_t (*)(const br_block_ctr_class *const *,
697		const void *, uint32_t, void *, size_t))
698		&br_aes_pwr8_ctr_run
699};
700
701/* see bearssl_block.h */
702const br_block_ctr_class *
703br_aes_pwr8_ctr_get_vtable(void)
704{
705	return br_aes_pwr8_supported() ? &br_aes_pwr8_ctr_vtable : NULL;
706}
707
708#else
709
710/* see bearssl_block.h */
711const br_block_ctr_class *
712br_aes_pwr8_ctr_get_vtable(void)
713{
714	return NULL;
715}
716
717#endif
718