1#include "sparc_arch.h"
2
3#define LOCALS (STACK_BIAS+STACK_FRAME)
4
5.text
6
7.globl	aes_fx_encrypt
8.align	32
9aes_fx_encrypt:
10	and		%o0, 7, %o4		! is input aligned?
11	andn		%o0, 7, %o0
12	ldd		[%o2 +  0], %f6	! round[0]
13	ldd		[%o2 +  8], %f8
14	mov		%o7, %g1
15	ld		[%o2 + 240], %o3
16
171:	call		.+8
18	add		%o7, .Linp_align-1b, %o7
19
20	sll		%o4, 3, %o4
21	ldd		[%o0 + 0], %f0		! load input
22	brz,pt		%o4, .Lenc_inp_aligned
23	ldd		[%o0 + 8], %f2
24
25	ldd		[%o7 + %o4], %f14	! shift left params
26	ldd		[%o0 + 16], %f4
27	.word	0x81b81d62 !fshiftorx	%f0,%f2,%f14,%f0
28	.word	0x85b89d64 !fshiftorx	%f2,%f4,%f14,%f2
29
30.Lenc_inp_aligned:
31	ldd		[%o2 + 16], %f10	! round[1]
32	ldd		[%o2 + 24], %f12
33
34	.word	0x81b00d86 !fxor	%f0,%f6,%f0		! ^=round[0]
35	.word	0x85b08d88 !fxor	%f2,%f8,%f2
36	ldd		[%o2 + 32], %f6	! round[2]
37	ldd		[%o2 + 40], %f8
38	add		%o2, 32, %o2
39	sub		%o3, 4, %o3
40
41.Loop_enc:
42	fmovd		%f0, %f4
43	.word	0x81b0920a !faesencx	%f2,%f10,%f0
44	.word	0x85b1120c !faesencx	%f4,%f12,%f2
45	ldd		[%o2 + 16], %f10
46	ldd		[%o2 + 24], %f12
47	add		%o2, 32, %o2
48
49	fmovd		%f0, %f4
50	.word	0x81b09206 !faesencx	%f2,%f6,%f0
51	.word	0x85b11208 !faesencx	%f4,%f8,%f2
52	ldd		[%o2 +  0], %f6
53	ldd		[%o2 +  8], %f8
54
55	brnz,a		%o3, .Loop_enc
56	sub		%o3, 2, %o3
57
58	andcc		%o1, 7, %o4		! is output aligned?
59	andn		%o1, 7, %o1
60	mov		0xff, %o5
61	srl		%o5, %o4, %o5
62	add		%o7, 64, %o7
63	sll		%o4, 3, %o4
64
65	fmovd		%f0, %f4
66	.word	0x81b0920a !faesencx	%f2,%f10,%f0
67	.word	0x85b1120c !faesencx	%f4,%f12,%f2
68	ldd		[%o7 + %o4], %f14	! shift right params
69
70	fmovd		%f0, %f4
71	.word	0x81b09246 !faesenclx	%f2,%f6,%f0
72	.word	0x85b11248 !faesenclx	%f4,%f8,%f2
73
74	bnz,pn		%icc, .Lenc_out_unaligned
75	mov		%g1, %o7
76
77	std		%f0, [%o1 + 0]
78	retl
79	std		%f2, [%o1 + 8]
80
81.align	16
82.Lenc_out_unaligned:
83	add		%o1, 16, %o0
84	orn		%g0, %o5, %o4
85	.word	0x89b81d60 !fshiftorx	%f0,%f0,%f14,%f4
86	.word	0x8db81d62 !fshiftorx	%f0,%f2,%f14,%f6
87	.word	0x91b89d62 !fshiftorx	%f2,%f2,%f14,%f8
88
89	stda		%f4, [%o1 + %o5]0xc0	! partial store
90	std		%f6, [%o1 + 8]
91	stda		%f8, [%o0 + %o4]0xc0	! partial store
92	retl
93	nop
94.type	aes_fx_encrypt,#function
95.size	aes_fx_encrypt,.-aes_fx_encrypt
96
97.globl	aes_fx_decrypt
98.align	32
99aes_fx_decrypt:
100	and		%o0, 7, %o4		! is input aligned?
101	andn		%o0, 7, %o0
102	ldd		[%o2 +  0], %f6	! round[0]
103	ldd		[%o2 +  8], %f8
104	mov		%o7, %g1
105	ld		[%o2 + 240], %o3
106
1071:	call		.+8
108	add		%o7, .Linp_align-1b, %o7
109
110	sll		%o4, 3, %o4
111	ldd		[%o0 + 0], %f0		! load input
112	brz,pt		%o4, .Ldec_inp_aligned
113	ldd		[%o0 + 8], %f2
114
115	ldd		[%o7 + %o4], %f14	! shift left params
116	ldd		[%o0 + 16], %f4
117	.word	0x81b81d62 !fshiftorx	%f0,%f2,%f14,%f0
118	.word	0x85b89d64 !fshiftorx	%f2,%f4,%f14,%f2
119
120.Ldec_inp_aligned:
121	ldd		[%o2 + 16], %f10	! round[1]
122	ldd		[%o2 + 24], %f12
123
124	.word	0x81b00d86 !fxor	%f0,%f6,%f0		! ^=round[0]
125	.word	0x85b08d88 !fxor	%f2,%f8,%f2
126	ldd		[%o2 + 32], %f6	! round[2]
127	ldd		[%o2 + 40], %f8
128	add		%o2, 32, %o2
129	sub		%o3, 4, %o3
130
131.Loop_dec:
132	fmovd		%f0, %f4
133	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
134	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
135	ldd		[%o2 + 16], %f10
136	ldd		[%o2 + 24], %f12
137	add		%o2, 32, %o2
138
139	fmovd		%f0, %f4
140	.word	0x81b09226 !faesdecx	%f2,%f6,%f0
141	.word	0x85b11228 !faesdecx	%f4,%f8,%f2
142	ldd		[%o2 +  0], %f6
143	ldd		[%o2 +  8], %f8
144
145	brnz,a		%o3, .Loop_dec
146	sub		%o3, 2, %o3
147
148	andcc		%o1, 7, %o4		! is output aligned?
149	andn		%o1, 7, %o1
150	mov		0xff, %o5
151	srl		%o5, %o4, %o5
152	add		%o7, 64, %o7
153	sll		%o4, 3, %o4
154
155	fmovd		%f0, %f4
156	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
157	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
158	ldd		[%o7 + %o4], %f14	! shift right params
159
160	fmovd		%f0, %f4
161	.word	0x81b09266 !faesdeclx	%f2,%f6,%f0
162	.word	0x85b11268 !faesdeclx	%f4,%f8,%f2
163
164	bnz,pn		%icc, .Ldec_out_unaligned
165	mov		%g1, %o7
166
167	std		%f0, [%o1 + 0]
168	retl
169	std		%f2, [%o1 + 8]
170
171.align	16
172.Ldec_out_unaligned:
173	add		%o1, 16, %o0
174	orn		%g0, %o5, %o4
175	.word	0x89b81d60 !fshiftorx	%f0,%f0,%f14,%f4
176	.word	0x8db81d62 !fshiftorx	%f0,%f2,%f14,%f6
177	.word	0x91b89d62 !fshiftorx	%f2,%f2,%f14,%f8
178
179	stda		%f4, [%o1 + %o5]0xc0	! partial store
180	std		%f6, [%o1 + 8]
181	stda		%f8, [%o0 + %o4]0xc0	! partial store
182	retl
183	nop
184.type	aes_fx_decrypt,#function
185.size	aes_fx_decrypt,.-aes_fx_decrypt
186.globl	aes_fx_set_decrypt_key
187.align	32
188aes_fx_set_decrypt_key:
189	b		.Lset_encrypt_key
190	mov		-1, %o4
191	retl
192	nop
193.type	aes_fx_set_decrypt_key,#function
194.size	aes_fx_set_decrypt_key,.-aes_fx_set_decrypt_key
195
196.globl	aes_fx_set_encrypt_key
197.align	32
198aes_fx_set_encrypt_key:
199	mov		1, %o4
200	nop
201.Lset_encrypt_key:
202	and		%o0, 7, %o3
203	andn		%o0, 7, %o0
204	sll		%o3, 3, %o3
205	mov		%o7, %g1
206
2071:	call		.+8
208	add		%o7, .Linp_align-1b, %o7
209
210	ldd		[%o7 + %o3], %f10	! shift left params
211	mov		%g1, %o7
212
213	cmp		%o1, 192
214	ldd		[%o0 + 0], %f0
215	bl,pt		%icc, .L128
216	ldd		[%o0 + 8], %f2
217
218	be,pt		%icc, .L192
219	ldd		[%o0 + 16], %f4
220	brz,pt		%o3, .L256aligned
221	ldd		[%o0 + 24], %f6
222
223	ldd		[%o0 + 32], %f8
224	.word	0x81b81562 !fshiftorx	%f0,%f2,%f10,%f0
225	.word	0x85b89564 !fshiftorx	%f2,%f4,%f10,%f2
226	.word	0x89b91566 !fshiftorx	%f4,%f6,%f10,%f4
227	.word	0x8db99568 !fshiftorx	%f6,%f8,%f10,%f6
228
229.L256aligned:
230	mov		14, %o1
231	and		%o4, 224, %o3
232	st		%o1, [%o2 + 240]	! store rounds
233	add		%o2, %o3, %o2	! start or end of key schedule
234	sllx		%o4, 4, %o4		! 16 or -16
235	std		%f0, [%o2 + 0]
236	.word	0x81b19290 !faeskeyx	%f6,16,%f0
237	std		%f2, [%o2 + 8]
238	add		%o2, %o4, %o2
239	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
240	std		%f4, [%o2 + 0]
241	.word	0x89b09281 !faeskeyx	%f2,0x01,%f4
242	std		%f6, [%o2 + 8]
243	add		%o2, %o4, %o2
244	.word	0x8db11280 !faeskeyx	%f4,0x00,%f6
245	std		%f0, [%o2 + 0]
246	.word	0x81b19291 !faeskeyx	%f6,17,%f0
247	std		%f2, [%o2 + 8]
248	add		%o2, %o4, %o2
249	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
250	std		%f4, [%o2 + 0]
251	.word	0x89b09281 !faeskeyx	%f2,0x01,%f4
252	std		%f6, [%o2 + 8]
253	add		%o2, %o4, %o2
254	.word	0x8db11280 !faeskeyx	%f4,0x00,%f6
255	std		%f0, [%o2 + 0]
256	.word	0x81b19292 !faeskeyx	%f6,18,%f0
257	std		%f2, [%o2 + 8]
258	add		%o2, %o4, %o2
259	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
260	std		%f4, [%o2 + 0]
261	.word	0x89b09281 !faeskeyx	%f2,0x01,%f4
262	std		%f6, [%o2 + 8]
263	add		%o2, %o4, %o2
264	.word	0x8db11280 !faeskeyx	%f4,0x00,%f6
265	std		%f0, [%o2 + 0]
266	.word	0x81b19293 !faeskeyx	%f6,19,%f0
267	std		%f2, [%o2 + 8]
268	add		%o2, %o4, %o2
269	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
270	std		%f4, [%o2 + 0]
271	.word	0x89b09281 !faeskeyx	%f2,0x01,%f4
272	std		%f6, [%o2 + 8]
273	add		%o2, %o4, %o2
274	.word	0x8db11280 !faeskeyx	%f4,0x00,%f6
275	std		%f0, [%o2 + 0]
276	.word	0x81b19294 !faeskeyx	%f6,20,%f0
277	std		%f2, [%o2 + 8]
278	add		%o2, %o4, %o2
279	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
280	std		%f4, [%o2 + 0]
281	.word	0x89b09281 !faeskeyx	%f2,0x01,%f4
282	std		%f6, [%o2 + 8]
283	add		%o2, %o4, %o2
284	.word	0x8db11280 !faeskeyx	%f4,0x00,%f6
285	std		%f0, [%o2 + 0]
286	.word	0x81b19295 !faeskeyx	%f6,21,%f0
287	std		%f2, [%o2 + 8]
288	add		%o2, %o4, %o2
289	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
290	std		%f4, [%o2 + 0]
291	.word	0x89b09281 !faeskeyx	%f2,0x01,%f4
292	std		%f6, [%o2 + 8]
293	add		%o2, %o4, %o2
294	.word	0x8db11280 !faeskeyx	%f4,0x00,%f6
295	std		%f0, [%o2 + 0]
296	.word	0x81b19296 !faeskeyx	%f6,22,%f0
297	std		%f2, [%o2 + 8]
298	add		%o2, %o4, %o2
299	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
300	std		%f4,[%o2 + 0]
301	std		%f6,[%o2 + 8]
302	add		%o2, %o4, %o2
303	std		%f0,[%o2 + 0]
304	std		%f2,[%o2 + 8]
305	retl
306	xor		%o0, %o0, %o0		! return 0
307
308.align	16
309.L192:
310	brz,pt		%o3, .L192aligned
311	nop
312
313	ldd		[%o0 + 24], %f6
314	.word	0x81b81562 !fshiftorx	%f0,%f2,%f10,%f0
315	.word	0x85b89564 !fshiftorx	%f2,%f4,%f10,%f2
316	.word	0x89b91566 !fshiftorx	%f4,%f6,%f10,%f4
317
318.L192aligned:
319	mov		12, %o1
320	and		%o4, 192, %o3
321	st		%o1, [%o2 + 240]	! store rounds
322	add		%o2, %o3, %o2	! start or end of key schedule
323	sllx		%o4, 4, %o4		! 16 or -16
324	std		%f0, [%o2 + 0]
325	.word	0x81b11290 !faeskeyx	%f4,16,%f0
326	std		%f2, [%o2 + 8]
327	add		%o2, %o4, %o2
328	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
329	std		%f4, [%o2 + 0]
330	.word	0x89b09280 !faeskeyx	%f2,0x00,%f4
331	std		%f0, [%o2 + 8]
332	add		%o2, %o4, %o2
333	.word	0x81b11291 !faeskeyx	%f4,17,%f0
334	std		%f2, [%o2 + 0]
335	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
336	std		%f4, [%o2 + 8]
337	add		%o2, %o4, %o2
338	.word	0x89b09280 !faeskeyx	%f2,0x00,%f4
339	std		%f0, [%o2 + 0]
340	.word	0x81b11292 !faeskeyx	%f4,18,%f0
341	std		%f2, [%o2 + 8]
342	add		%o2, %o4, %o2
343	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
344	std		%f4, [%o2 + 0]
345	.word	0x89b09280 !faeskeyx	%f2,0x00,%f4
346	std		%f0, [%o2 + 8]
347	add		%o2, %o4, %o2
348	.word	0x81b11293 !faeskeyx	%f4,19,%f0
349	std		%f2, [%o2 + 0]
350	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
351	std		%f4, [%o2 + 8]
352	add		%o2, %o4, %o2
353	.word	0x89b09280 !faeskeyx	%f2,0x00,%f4
354	std		%f0, [%o2 + 0]
355	.word	0x81b11294 !faeskeyx	%f4,20,%f0
356	std		%f2, [%o2 + 8]
357	add		%o2, %o4, %o2
358	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
359	std		%f4, [%o2 + 0]
360	.word	0x89b09280 !faeskeyx	%f2,0x00,%f4
361	std		%f0, [%o2 + 8]
362	add		%o2, %o4, %o2
363	.word	0x81b11295 !faeskeyx	%f4,21,%f0
364	std		%f2, [%o2 + 0]
365	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
366	std		%f4, [%o2 + 8]
367	add		%o2, %o4, %o2
368	.word	0x89b09280 !faeskeyx	%f2,0x00,%f4
369	std		%f0, [%o2 + 0]
370	.word	0x81b11296 !faeskeyx	%f4,22,%f0
371	std		%f2, [%o2 + 8]
372	add		%o2, %o4, %o2
373	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
374	std		%f4, [%o2 + 0]
375	.word	0x89b09280 !faeskeyx	%f2,0x00,%f4
376	std		%f0, [%o2 + 8]
377	add		%o2, %o4, %o2
378	.word	0x81b11297 !faeskeyx	%f4,23,%f0
379	std		%f2, [%o2 + 0]
380	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
381	std		%f4, [%o2 + 8]
382	add		%o2, %o4, %o2
383	std		%f0, [%o2 + 0]
384	std		%f2, [%o2 + 8]
385	retl
386	xor		%o0, %o0, %o0		! return 0
387
388.align	16
389.L128:
390	brz,pt		%o3, .L128aligned
391	nop
392
393	ldd		[%o0 + 16], %f4
394	.word	0x81b81562 !fshiftorx	%f0,%f2,%f10,%f0
395	.word	0x85b89564 !fshiftorx	%f2,%f4,%f10,%f2
396
397.L128aligned:
398	mov		10, %o1
399	and		%o4, 160, %o3
400	st		%o1, [%o2 + 240]	! store rounds
401	add		%o2, %o3, %o2	! start or end of key schedule
402	sllx		%o4, 4, %o4		! 16 or -16
403	std		%f0, [%o2 + 0]
404	.word	0x81b09290 !faeskeyx	%f2,16,%f0
405	std		%f2, [%o2 + 8]
406	add		%o2, %o4, %o2
407	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
408	std		%f0, [%o2 + 0]
409	.word	0x81b09291 !faeskeyx	%f2,17,%f0
410	std		%f2, [%o2 + 8]
411	add		%o2, %o4, %o2
412	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
413	std		%f0, [%o2 + 0]
414	.word	0x81b09292 !faeskeyx	%f2,18,%f0
415	std		%f2, [%o2 + 8]
416	add		%o2, %o4, %o2
417	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
418	std		%f0, [%o2 + 0]
419	.word	0x81b09293 !faeskeyx	%f2,19,%f0
420	std		%f2, [%o2 + 8]
421	add		%o2, %o4, %o2
422	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
423	std		%f0, [%o2 + 0]
424	.word	0x81b09294 !faeskeyx	%f2,20,%f0
425	std		%f2, [%o2 + 8]
426	add		%o2, %o4, %o2
427	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
428	std		%f0, [%o2 + 0]
429	.word	0x81b09295 !faeskeyx	%f2,21,%f0
430	std		%f2, [%o2 + 8]
431	add		%o2, %o4, %o2
432	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
433	std		%f0, [%o2 + 0]
434	.word	0x81b09296 !faeskeyx	%f2,22,%f0
435	std		%f2, [%o2 + 8]
436	add		%o2, %o4, %o2
437	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
438	std		%f0, [%o2 + 0]
439	.word	0x81b09297 !faeskeyx	%f2,23,%f0
440	std		%f2, [%o2 + 8]
441	add		%o2, %o4, %o2
442	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
443	std		%f0, [%o2 + 0]
444	.word	0x81b09298 !faeskeyx	%f2,24,%f0
445	std		%f2, [%o2 + 8]
446	add		%o2, %o4, %o2
447	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
448	std		%f0, [%o2 + 0]
449	.word	0x81b09299 !faeskeyx	%f2,25,%f0
450	std		%f2, [%o2 + 8]
451	add		%o2, %o4, %o2
452	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
453	std		%f0, [%o2 + 0]
454	std		%f2, [%o2 + 8]
455	retl
456	xor		%o0, %o0, %o0		! return 0
457.type	aes_fx_set_encrypt_key,#function
458.size	aes_fx_set_encrypt_key,.-aes_fx_set_encrypt_key
459.globl	aes_fx_cbc_encrypt
460.align	32
461aes_fx_cbc_encrypt:
462	save		%sp, -STACK_FRAME-16, %sp
463	srln		%i2, 4, %i2
464	and		%i0, 7, %l4
465	andn		%i0, 7, %i0
466	brz,pn		%i2, .Lcbc_no_data
467	sll		%l4, 3, %l4
468
4691:	call		.+8
470	add		%o7, .Linp_align-1b, %o7
471
472	ld		[%i3 + 240], %l0
473	and		%i1, 7, %l5
474	ld		[%i4 + 0], %f0		! load ivec
475	andn		%i1, 7, %i1
476	ld		[%i4 + 4], %f1
477	sll		%l5, 3, %l6
478	ld		[%i4 + 8], %f2
479	ld		[%i4 + 12], %f3
480
481	sll		%l0, 4, %l0
482	add		%l0, %i3, %l2
483	ldd		[%i3 + 0], %f20	! round[0]
484	ldd		[%i3 + 8], %f22
485
486	add		%i0, 16, %i0
487	sub		%i2,  1, %i2
488	ldd		[%l2 + 0], %f24	! round[last]
489	ldd		[%l2 + 8], %f26
490
491	mov		16, %l3
492	movrz		%i2, 0, %l3
493	ldd		[%i3 + 16], %f10	! round[1]
494	ldd		[%i3 + 24], %f12
495
496	ldd		[%o7 + %l4], %f36	! shift left params
497	add		%o7, 64, %o7
498	ldd		[%i0 - 16], %f28	! load input
499	ldd		[%i0 -  8], %f30
500	ldda		[%i0]0x82, %f32	! non-faulting load
501	brz		%i5, .Lcbc_decrypt
502	add		%i0, %l3, %i0	! inp+=16
503
504	.word	0x81b50d80 !fxor	%f20,%f0,%f0		! ivec^=round[0]
505	.word	0x85b58d82 !fxor	%f22,%f2,%f2
506	.word	0xb9bf0b7e !fshiftorx	%f28,%f30,%f36,%f28
507	.word	0xbdbf8b61 !fshiftorx	%f30,%f32,%f36,%f30
508	nop
509
510.Loop_cbc_enc:
511	.word	0x81b70d80 !fxor	%f28,%f0,%f0		! inp^ivec^round[0]
512	.word	0x85b78d82 !fxor	%f30,%f2,%f2
513	ldd		[%i3 + 32], %f6	! round[2]
514	ldd		[%i3 + 40], %f8
515	add		%i3, 32, %l2
516	sub		%l0, 16*6, %l1
517
518.Lcbc_enc:
519	fmovd		%f0, %f4
520	.word	0x81b0920a !faesencx	%f2,%f10,%f0
521	.word	0x85b1120c !faesencx	%f4,%f12,%f2
522	ldd		[%l2 + 16], %f10
523	ldd		[%l2 + 24], %f12
524	add		%l2, 32, %l2
525
526	fmovd		%f0, %f4
527	.word	0x81b09206 !faesencx	%f2,%f6,%f0
528	.word	0x85b11208 !faesencx	%f4,%f8,%f2
529	ldd		[%l2 + 0], %f6
530	ldd		[%l2 + 8], %f8
531
532	brnz,a		%l1, .Lcbc_enc
533	sub		%l1, 16*2, %l1
534
535	fmovd		%f0, %f4
536	.word	0x81b0920a !faesencx	%f2,%f10,%f0
537	.word	0x85b1120c !faesencx	%f4,%f12,%f2
538	ldd		[%l2 + 16], %f10	! round[last-1]
539	ldd		[%l2 + 24], %f12
540
541	movrz		%i2, 0, %l3
542	fmovd		%f32, %f28
543	ldd		[%i0 - 8], %f30	! load next input block
544	ldda		[%i0]0x82, %f32	! non-faulting load
545	add		%i0, %l3, %i0	! inp+=16
546
547	fmovd		%f0, %f4
548	.word	0x81b09206 !faesencx	%f2,%f6,%f0
549	.word	0x85b11208 !faesencx	%f4,%f8,%f2
550
551	.word	0xb9bf0b7e !fshiftorx	%f28,%f30,%f36,%f28
552	.word	0xbdbf8b61 !fshiftorx	%f30,%f32,%f36,%f30
553
554	fmovd		%f0, %f4
555	.word	0x81b0920a !faesencx	%f2,%f10,%f0
556	.word	0x85b1120c !faesencx	%f4,%f12,%f2
557	ldd		[%i3 + 16], %f10	! round[1]
558	ldd		[%i3 + 24], %f12
559
560	.word	0xb9b50d9c !fxor	%f20,%f28,%f28	! inp^=round[0]
561	.word	0xbdb58d9e !fxor	%f22,%f30,%f30
562
563	fmovd		%f0, %f4
564	.word	0x81b09258 !faesenclx	%f2,%f24,%f0
565	.word	0x85b1125a !faesenclx	%f4,%f26,%f2
566
567	brnz,pn		%l5, .Lcbc_enc_unaligned_out
568	nop
569
570	std		%f0, [%i1 + 0]
571	std		%f2, [%i1 + 8]
572	add		%i1, 16, %i1
573
574	brnz,a		%i2, .Loop_cbc_enc
575	sub		%i2, 1, %i2
576
577	st		%f0, [%i4 + 0]		! output ivec
578	st		%f1, [%i4 + 4]
579	st		%f2, [%i4 + 8]
580	st		%f3, [%i4 + 12]
581
582.Lcbc_no_data:
583	ret
584	restore
585
586.align	32
587.Lcbc_enc_unaligned_out:
588	ldd		[%o7 + %l6], %f36	! shift right params
589	mov		0xff, %l6
590	srl		%l6, %l5, %l6
591	sub		%g0, %l4, %l5
592
593	.word	0x8db80b60 !fshiftorx	%f0,%f0,%f36,%f6
594	.word	0x91b80b62 !fshiftorx	%f0,%f2,%f36,%f8
595
596	stda		%f6, [%i1 + %l6]0xc0	! partial store
597	orn		%g0, %l6, %l6
598	std		%f8, [%i1 + 8]
599	add		%i1, 16, %i1
600	brz		%i2, .Lcbc_enc_unaligned_out_done
601	sub		%i2, 1, %i2
602	b		.Loop_cbc_enc_unaligned_out
603	nop
604
605.align	32
606.Loop_cbc_enc_unaligned_out:
607	fmovd		%f2, %f34
608	.word	0x81b70d80 !fxor	%f28,%f0,%f0		! inp^ivec^round[0]
609	.word	0x85b78d82 !fxor	%f30,%f2,%f2
610	ldd		[%i3 + 32], %f6	! round[2]
611	ldd		[%i3 + 40], %f8
612
613	fmovd		%f0, %f4
614	.word	0x81b0920a !faesencx	%f2,%f10,%f0
615	.word	0x85b1120c !faesencx	%f4,%f12,%f2
616	ldd		[%i3 + 48], %f10	! round[3]
617	ldd		[%i3 + 56], %f12
618
619	ldx		[%i0 - 16], %o0
620	ldx		[%i0 -  8], %o1
621	brz		%l4, .Lcbc_enc_aligned_inp
622	movrz		%i2, 0, %l3
623
624	ldx		[%i0], %o2
625	sllx		%o0, %l4, %o0
626	srlx		%o1, %l5, %g1
627	sllx		%o1, %l4, %o1
628	or		%g1, %o0, %o0
629	srlx		%o2, %l5, %o2
630	or		%o2, %o1, %o1
631
632.Lcbc_enc_aligned_inp:
633	fmovd		%f0, %f4
634	.word	0x81b09206 !faesencx	%f2,%f6,%f0
635	.word	0x85b11208 !faesencx	%f4,%f8,%f2
636	ldd		[%i3 + 64], %f6	! round[4]
637	ldd		[%i3 + 72], %f8
638	add		%i3, 64, %l2
639	sub		%l0, 16*8, %l1
640
641	stx		%o0, [%sp + LOCALS + 0]
642	stx		%o1, [%sp + LOCALS + 8]
643	add		%i0, %l3, %i0	! inp+=16
644	nop
645
646.Lcbc_enc_unaligned:
647	fmovd		%f0, %f4
648	.word	0x81b0920a !faesencx	%f2,%f10,%f0
649	.word	0x85b1120c !faesencx	%f4,%f12,%f2
650	ldd		[%l2 + 16], %f10
651	ldd		[%l2 + 24], %f12
652	add		%l2, 32, %l2
653
654	fmovd		%f0, %f4
655	.word	0x81b09206 !faesencx	%f2,%f6,%f0
656	.word	0x85b11208 !faesencx	%f4,%f8,%f2
657	ldd		[%l2 + 0], %f6
658	ldd		[%l2 + 8], %f8
659
660	brnz,a		%l1, .Lcbc_enc_unaligned
661	sub		%l1, 16*2, %l1
662
663	fmovd		%f0, %f4
664	.word	0x81b0920a !faesencx	%f2,%f10,%f0
665	.word	0x85b1120c !faesencx	%f4,%f12,%f2
666	ldd		[%l2 + 16], %f10	! round[last-1]
667	ldd		[%l2 + 24], %f12
668
669	fmovd		%f0, %f4
670	.word	0x81b09206 !faesencx	%f2,%f6,%f0
671	.word	0x85b11208 !faesencx	%f4,%f8,%f2
672
673	ldd		[%sp + LOCALS + 0], %f28
674	ldd		[%sp + LOCALS + 8], %f30
675
676	fmovd		%f0, %f4
677	.word	0x81b0920a !faesencx	%f2,%f10,%f0
678	.word	0x85b1120c !faesencx	%f4,%f12,%f2
679	ldd		[%i3 + 16], %f10	! round[1]
680	ldd		[%i3 + 24], %f12
681
682	.word	0xb9b50d9c !fxor	%f20,%f28,%f28	! inp^=round[0]
683	.word	0xbdb58d9e !fxor	%f22,%f30,%f30
684
685	fmovd		%f0, %f4
686	.word	0x81b09258 !faesenclx	%f2,%f24,%f0
687	.word	0x85b1125a !faesenclx	%f4,%f26,%f2
688
689	.word	0x8db8cb60 !fshiftorx	%f34,%f0,%f36,%f6
690	.word	0x91b80b62 !fshiftorx	%f0,%f2,%f36,%f8
691	std		%f6, [%i1 + 0]
692	std		%f8, [%i1 + 8]
693	add		%i1, 16, %i1
694
695	brnz,a		%i2, .Loop_cbc_enc_unaligned_out
696	sub		%i2, 1, %i2
697
698.Lcbc_enc_unaligned_out_done:
699	.word	0x91b88b62 !fshiftorx	%f2,%f2,%f36,%f8
700	stda		%f8, [%i1 + %l6]0xc0	! partial store
701
702	st		%f0, [%i4 + 0]		! output ivec
703	st		%f1, [%i4 + 4]
704	st		%f2, [%i4 + 8]
705	st		%f3, [%i4 + 12]
706
707	ret
708	restore
709
710.align	32
711.Lcbc_decrypt:
712	.word	0xb9bf0b7e !fshiftorx	%f28,%f30,%f36,%f28
713	.word	0xbdbf8b61 !fshiftorx	%f30,%f32,%f36,%f30
714	fmovd		%f0, %f16
715	fmovd		%f2, %f18
716
717.Loop_cbc_dec:
718	.word	0x81b70d94 !fxor	%f28,%f20,%f0	! inp^round[0]
719	.word	0x85b78d96 !fxor	%f30,%f22,%f2
720	ldd		[%i3 + 32], %f6	! round[2]
721	ldd		[%i3 + 40], %f8
722	add		%i3, 32, %l2
723	sub		%l0, 16*6, %l1
724
725.Lcbc_dec:
726	fmovd		%f0, %f4
727	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
728	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
729	ldd		[%l2 + 16], %f10
730	ldd		[%l2 + 24], %f12
731	add		%l2, 32, %l2
732
733	fmovd		%f0, %f4
734	.word	0x81b09226 !faesdecx	%f2,%f6,%f0
735	.word	0x85b11228 !faesdecx	%f4,%f8,%f2
736	ldd		[%l2 + 0], %f6
737	ldd		[%l2 + 8], %f8
738
739	brnz,a		%l1, .Lcbc_dec
740	sub		%l1, 16*2, %l1
741
742	fmovd		%f0, %f4
743	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
744	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
745	ldd		[%l2 + 16], %f10	! round[last-1]
746	ldd		[%l2 + 24], %f12
747
748	fmovd		%f0, %f4
749	.word	0x81b09226 !faesdecx	%f2,%f6,%f0
750	.word	0x85b11228 !faesdecx	%f4,%f8,%f2
751	.word	0x8db40d98 !fxor	%f16,%f24,%f6	! ivec^round[last]
752	.word	0x91b48d9a !fxor	%f18,%f26,%f8
753	fmovd		%f28, %f16
754	fmovd		%f30, %f18
755
756	movrz		%i2, 0, %l3
757	fmovd		%f32, %f28
758	ldd		[%i0 - 8], %f30	! load next input block
759	ldda		[%i0]0x82, %f32	! non-faulting load
760	add		%i0, %l3, %i0	! inp+=16
761
762	fmovd		%f0, %f4
763	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
764	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
765	ldd		[%i3 + 16], %f10	! round[1]
766	ldd		[%i3 + 24], %f12
767
768	.word	0xb9bf0b7e !fshiftorx	%f28,%f30,%f36,%f28
769	.word	0xbdbf8b61 !fshiftorx	%f30,%f32,%f36,%f30
770
771	fmovd		%f0, %f4
772	.word	0x81b09266 !faesdeclx	%f2,%f6,%f0
773	.word	0x85b11268 !faesdeclx	%f4,%f8,%f2
774
775	brnz,pn		%l5, .Lcbc_dec_unaligned_out
776	nop
777
778	std		%f0, [%i1 + 0]
779	std		%f2, [%i1 + 8]
780	add		%i1, 16, %i1
781
782	brnz,a		%i2, .Loop_cbc_dec
783	sub		%i2, 1, %i2
784
785	st		%f16,    [%i4 + 0]	! output ivec
786	st		%f17, [%i4 + 4]
787	st		%f18,    [%i4 + 8]
788	st		%f19, [%i4 + 12]
789
790	ret
791	restore
792
793.align	32
794.Lcbc_dec_unaligned_out:
795	ldd		[%o7 + %l6], %f36	! shift right params
796	mov		0xff, %l6
797	srl		%l6, %l5, %l6
798	sub		%g0, %l4, %l5
799
800	.word	0x8db80b60 !fshiftorx	%f0,%f0,%f36,%f6
801	.word	0x91b80b62 !fshiftorx	%f0,%f2,%f36,%f8
802
803	stda		%f6, [%i1 + %l6]0xc0	! partial store
804	orn		%g0, %l6, %l6
805	std		%f8, [%i1 + 8]
806	add		%i1, 16, %i1
807	brz		%i2, .Lcbc_dec_unaligned_out_done
808	sub		%i2, 1, %i2
809	b		.Loop_cbc_dec_unaligned_out
810	nop
811
812.align	32
813.Loop_cbc_dec_unaligned_out:
814	fmovd		%f2, %f34
815	.word	0x81b70d94 !fxor	%f28,%f20,%f0	! inp^round[0]
816	.word	0x85b78d96 !fxor	%f30,%f22,%f2
817	ldd		[%i3 + 32], %f6	! round[2]
818	ldd		[%i3 + 40], %f8
819
820	fmovd		%f0, %f4
821	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
822	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
823	ldd		[%i3 + 48], %f10	! round[3]
824	ldd		[%i3 + 56], %f12
825
826	ldx		[%i0 - 16], %o0
827	ldx		[%i0 - 8], %o1
828	brz		%l4, .Lcbc_dec_aligned_inp
829	movrz		%i2, 0, %l3
830
831	ldx		[%i0], %o2
832	sllx		%o0, %l4, %o0
833	srlx		%o1, %l5, %g1
834	sllx		%o1, %l4, %o1
835	or		%g1, %o0, %o0
836	srlx		%o2, %l5, %o2
837	or		%o2, %o1, %o1
838
839.Lcbc_dec_aligned_inp:
840	fmovd		%f0, %f4
841	.word	0x81b09226 !faesdecx	%f2,%f6,%f0
842	.word	0x85b11228 !faesdecx	%f4,%f8,%f2
843	ldd		[%i3 + 64], %f6	! round[4]
844	ldd		[%i3 + 72], %f8
845	add		%i3, 64, %l2
846	sub		%l0, 16*8, %l1
847
848	stx		%o0, [%sp + LOCALS + 0]
849	stx		%o1, [%sp + LOCALS + 8]
850	add		%i0, %l3, %i0	! inp+=16
851	nop
852
853.Lcbc_dec_unaligned:
854	fmovd		%f0, %f4
855	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
856	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
857	ldd		[%l2 + 16], %f10
858	ldd		[%l2 + 24], %f12
859	add		%l2, 32, %l2
860
861	fmovd		%f0, %f4
862	.word	0x81b09226 !faesdecx	%f2,%f6,%f0
863	.word	0x85b11228 !faesdecx	%f4,%f8,%f2
864	ldd		[%l2 + 0], %f6
865	ldd		[%l2 + 8], %f8
866
867	brnz,a		%l1, .Lcbc_dec_unaligned
868	sub		%l1, 16*2, %l1
869
870	fmovd		%f0, %f4
871	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
872	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
873	ldd		[%l2 + 16], %f10	! round[last-1]
874	ldd		[%l2 + 24], %f12
875
876	fmovd		%f0, %f4
877	.word	0x81b09226 !faesdecx	%f2,%f6,%f0
878	.word	0x85b11228 !faesdecx	%f4,%f8,%f2
879
880	.word	0x8db40d98 !fxor	%f16,%f24,%f6	! ivec^round[last]
881	.word	0x91b48d9a !fxor	%f18,%f26,%f8
882	fmovd		%f28, %f16
883	fmovd		%f30, %f18
884	ldd		[%sp + LOCALS + 0], %f28
885	ldd		[%sp + LOCALS + 8], %f30
886
887	fmovd		%f0, %f4
888	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
889	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
890	ldd		[%i3 + 16], %f10	! round[1]
891	ldd		[%i3 + 24], %f12
892
893	fmovd		%f0, %f4
894	.word	0x81b09266 !faesdeclx	%f2,%f6,%f0
895	.word	0x85b11268 !faesdeclx	%f4,%f8,%f2
896
897	.word	0x8db8cb60 !fshiftorx	%f34,%f0,%f36,%f6
898	.word	0x91b80b62 !fshiftorx	%f0,%f2,%f36,%f8
899	std		%f6, [%i1 + 0]
900	std		%f8, [%i1 + 8]
901	add		%i1, 16, %i1
902
903	brnz,a		%i2, .Loop_cbc_dec_unaligned_out
904	sub		%i2, 1, %i2
905
906.Lcbc_dec_unaligned_out_done:
907	.word	0x91b88b62 !fshiftorx	%f2,%f2,%f36,%f8
908	stda		%f8, [%i1 + %l6]0xc0	! partial store
909
910	st		%f16,    [%i4 + 0]	! output ivec
911	st		%f17, [%i4 + 4]
912	st		%f18,    [%i4 + 8]
913	st		%f19, [%i4 + 12]
914
915	ret
916	restore
917.type	aes_fx_cbc_encrypt,#function
918.size	aes_fx_cbc_encrypt,.-aes_fx_cbc_encrypt
919.globl	aes_fx_ctr32_encrypt_blocks
920.align	32
921aes_fx_ctr32_encrypt_blocks:
922	save		%sp, -STACK_FRAME-16, %sp
923	srln		%i2, 0, %i2
924	and		%i0, 7, %l4
925	andn		%i0, 7, %i0
926	brz,pn		%i2, .Lctr32_no_data
927	sll		%l4, 3, %l4
928
929.Lpic:	call		.+8
930	add		%o7, .Linp_align - .Lpic, %o7
931
932	ld		[%i3 + 240], %l0
933	and		%i1, 7, %l5
934	ld		[%i4 +  0], %f16	! load counter
935	andn		%i1, 7, %i1
936	ld		[%i4 +  4], %f17
937	sll		%l5, 3, %l6
938	ld		[%i4 +  8], %f18
939	ld		[%i4 + 12], %f19
940	ldd		[%o7 + 128], %f14
941
942	sll		%l0, 4, %l0
943	add		%l0, %i3, %l2
944	ldd		[%i3 + 0], %f20	! round[0]
945	ldd		[%i3 + 8], %f22
946
947	add		%i0, 16, %i0
948	sub		%i2, 1, %i2
949	ldd		[%i3 + 16], %f10	! round[1]
950	ldd		[%i3 + 24], %f12
951
952	mov		16, %l3
953	movrz		%i2, 0, %l3
954	ldd		[%l2 + 0], %f24	! round[last]
955	ldd		[%l2 + 8], %f26
956
957	ldd		[%o7 + %l4], %f36	! shiftleft params
958	add		%o7, 64, %o7
959	ldd		[%i0 - 16], %f28	! load input
960	ldd		[%i0 -  8], %f30
961	ldda		[%i0]0x82, %f32	! non-faulting load
962	add		%i0, %l3, %i0	! inp+=16
963
964	.word	0xb9bf0b7e !fshiftorx	%f28,%f30,%f36,%f28
965	.word	0xbdbf8b61 !fshiftorx	%f30,%f32,%f36,%f30
966
967.Loop_ctr32:
968	.word	0x81b40d94 !fxor	%f16,%f20,%f0	! counter^round[0]
969	.word	0x85b48d96 !fxor	%f18,%f22,%f2
970	ldd		[%i3 + 32], %f6	! round[2]
971	ldd		[%i3 + 40], %f8
972	add		%i3, 32, %l2
973	sub		%l0, 16*6, %l1
974
975.Lctr32_enc:
976	fmovd		%f0, %f4
977	.word	0x81b0920a !faesencx	%f2,%f10,%f0
978	.word	0x85b1120c !faesencx	%f4,%f12,%f2
979	ldd		[%l2 + 16], %f10
980	ldd		[%l2 + 24], %f12
981	add		%l2, 32, %l2
982
983	fmovd		%f0, %f4
984	.word	0x81b09206 !faesencx	%f2,%f6,%f0
985	.word	0x85b11208 !faesencx	%f4,%f8,%f2
986	ldd		[%l2 + 0], %f6
987	ldd		[%l2 + 8], %f8
988
989	brnz,a		%l1, .Lctr32_enc
990	sub		%l1, 16*2, %l1
991
992	fmovd		%f0, %f4
993	.word	0x81b0920a !faesencx	%f2,%f10,%f0
994	.word	0x85b1120c !faesencx	%f4,%f12,%f2
995	ldd		[%l2 + 16], %f10	! round[last-1]
996	ldd		[%l2 + 24], %f12
997
998	fmovd		%f0, %f4
999	.word	0x81b09206 !faesencx	%f2,%f6,%f0
1000	.word	0x85b11208 !faesencx	%f4,%f8,%f2
1001	.word	0x8db70d98 !fxor	%f28,%f24,%f6	! inp^round[last]
1002	.word	0x91b78d9a !fxor	%f30,%f26,%f8
1003
1004	movrz		%i2, 0, %l3
1005	fmovd		%f32, %f28
1006	ldd		[%i0 - 8], %f30	! load next input block
1007	ldda		[%i0]0x82, %f32	! non-faulting load
1008	add		%i0, %l3, %i0	! inp+=16
1009
1010	fmovd		%f0, %f4
1011	.word	0x81b0920a !faesencx	%f2,%f10,%f0
1012	.word	0x85b1120c !faesencx	%f4,%f12,%f2
1013	ldd		[%i3 + 16], %f10	! round[1]
1014	ldd		[%i3 + 24], %f12
1015
1016	.word	0xb9bf0b7e !fshiftorx	%f28,%f30,%f36,%f28
1017	.word	0xbdbf8b61 !fshiftorx	%f30,%f32,%f36,%f30
1018	.word	0xa5b48a4e !fpadd32	%f18,%f14,%f18	! increment counter
1019
1020	fmovd		%f0, %f4
1021	.word	0x81b09246 !faesenclx	%f2,%f6,%f0
1022	.word	0x85b11248 !faesenclx	%f4,%f8,%f2
1023
1024	brnz,pn		%l5, .Lctr32_unaligned_out
1025	nop
1026
1027	std		%f0, [%i1 + 0]
1028	std		%f2, [%i1 + 8]
1029	add		%i1, 16, %i1
1030
1031	brnz,a		%i2, .Loop_ctr32
1032	sub		%i2, 1, %i2
1033
1034.Lctr32_no_data:
1035	ret
1036	restore
1037
1038.align	32
1039.Lctr32_unaligned_out:
1040	ldd		[%o7 + %l6], %f36	! shift right params
1041	mov		0xff, %l6
1042	srl		%l6, %l5, %l6
1043	sub		%g0, %l4, %l5
1044
1045	.word	0x8db80b60 !fshiftorx	%f0,%f0,%f36,%f6
1046	.word	0x91b80b62 !fshiftorx	%f0,%f2,%f36,%f8
1047
1048	stda		%f6, [%i1 + %l6]0xc0	! partial store
1049	orn		%g0, %l6, %l6
1050	std		%f8, [%i1 + 8]
1051	add		%i1, 16, %i1
1052	brz		%i2, .Lctr32_unaligned_out_done
1053	sub		%i2, 1, %i2
1054	b		.Loop_ctr32_unaligned_out
1055	nop
1056
1057.align	32
1058.Loop_ctr32_unaligned_out:
1059	fmovd		%f2, %f34
1060	.word	0x81b40d94 !fxor	%f16,%f20,%f0	! counter^round[0]
1061	.word	0x85b48d96 !fxor	%f18,%f22,%f2
1062	ldd		[%i3 + 32], %f6	! round[2]
1063	ldd		[%i3 + 40], %f8
1064
1065	fmovd		%f0, %f4
1066	.word	0x81b0920a !faesencx	%f2,%f10,%f0
1067	.word	0x85b1120c !faesencx	%f4,%f12,%f2
1068	ldd		[%i3 + 48], %f10	! round[3]
1069	ldd		[%i3 + 56], %f12
1070
1071	ldx		[%i0 - 16], %o0
1072	ldx		[%i0 -  8], %o1
1073	brz		%l4, .Lctr32_aligned_inp
1074	movrz		%i2, 0, %l3
1075
1076	ldx		[%i0], %o2
1077	sllx		%o0, %l4, %o0
1078	srlx		%o1, %l5, %g1
1079	sllx		%o1, %l4, %o1
1080	or		%g1, %o0, %o0
1081	srlx		%o2, %l5, %o2
1082	or		%o2, %o1, %o1
1083
1084.Lctr32_aligned_inp:
1085	fmovd		%f0, %f4
1086	.word	0x81b09206 !faesencx	%f2,%f6,%f0
1087	.word	0x85b11208 !faesencx	%f4,%f8,%f2
1088	ldd		[%i3 + 64], %f6	! round[4]
1089	ldd		[%i3 + 72], %f8
1090	add		%i3, 64, %l2
1091	sub		%l0, 16*8, %l1
1092
1093	stx		%o0, [%sp + LOCALS + 0]
1094	stx		%o1, [%sp + LOCALS + 8]
1095	add		%i0, %l3, %i0	! inp+=16
1096	nop
1097
1098.Lctr32_enc_unaligned:
1099	fmovd		%f0, %f4
1100	.word	0x81b0920a !faesencx	%f2,%f10,%f0
1101	.word	0x85b1120c !faesencx	%f4,%f12,%f2
1102	ldd		[%l2 + 16], %f10
1103	ldd		[%l2 + 24], %f12
1104	add		%l2, 32, %l2
1105
1106	fmovd		%f0, %f4
1107	.word	0x81b09206 !faesencx	%f2,%f6,%f0
1108	.word	0x85b11208 !faesencx	%f4,%f8,%f2
1109	ldd		[%l2 + 0], %f6
1110	ldd		[%l2 + 8], %f8
1111
1112	brnz,a		%l1, .Lctr32_enc_unaligned
1113	sub		%l1, 16*2, %l1
1114
1115	fmovd		%f0, %f4
1116	.word	0x81b0920a !faesencx	%f2,%f10,%f0
1117	.word	0x85b1120c !faesencx	%f4,%f12,%f2
1118	ldd		[%l2 + 16], %f10	! round[last-1]
1119	ldd		[%l2 + 24], %f12
1120	.word	0xa5b48a4e !fpadd32	%f18,%f14,%f18	! increment counter
1121
1122	fmovd		%f0, %f4
1123	.word	0x81b09206 !faesencx	%f2,%f6,%f0
1124	.word	0x85b11208 !faesencx	%f4,%f8,%f2
1125	.word	0x8db70d98 !fxor	%f28,%f24,%f6	! inp^round[last]
1126	.word	0x91b78d9a !fxor	%f30,%f26,%f8
1127	ldd		[%sp + LOCALS + 0], %f28
1128	ldd		[%sp + LOCALS + 8], %f30
1129
1130	fmovd		%f0, %f4
1131	.word	0x81b0920a !faesencx	%f2,%f10,%f0
1132	.word	0x85b1120c !faesencx	%f4,%f12,%f2
1133	ldd		[%i3 + 16], %f10	! round[1]
1134	ldd		[%i3 + 24], %f12
1135
1136	fmovd		%f0, %f4
1137	.word	0x81b09246 !faesenclx	%f2,%f6,%f0
1138	.word	0x85b11248 !faesenclx	%f4,%f8,%f2
1139
1140	.word	0x8db8cb60 !fshiftorx	%f34,%f0,%f36,%f6
1141	.word	0x91b80b62 !fshiftorx	%f0,%f2,%f36,%f8
1142	std		%f6, [%i1 + 0]
1143	std		%f8, [%i1 + 8]
1144	add		%i1, 16, %i1
1145
1146	brnz,a		%i2, .Loop_ctr32_unaligned_out
1147	sub		%i2, 1, %i2
1148
1149.Lctr32_unaligned_out_done:
1150	.word	0x91b88b62 !fshiftorx	%f2,%f2,%f36,%f8
1151	stda		%f8, [%i1 + %l6]0xc0	! partial store
1152
1153	ret
1154	restore
1155.type	aes_fx_ctr32_encrypt_blocks,#function
1156.size	aes_fx_ctr32_encrypt_blocks,.-aes_fx_ctr32_encrypt_blocks
1157
1158.align	32
1159.Linp_align:		! fshiftorx parameters for left shift toward %rs1
1160	.byte	0, 0, 64,  0,	0, 64,  0, -64
1161	.byte	0, 0, 56,  8,	0, 56,  8, -56
1162	.byte	0, 0, 48, 16,	0, 48, 16, -48
1163	.byte	0, 0, 40, 24,	0, 40, 24, -40
1164	.byte	0, 0, 32, 32,	0, 32, 32, -32
1165	.byte	0, 0, 24, 40,	0, 24, 40, -24
1166	.byte	0, 0, 16, 48,	0, 16, 48, -16
1167	.byte	0, 0,  8, 56,	0,  8, 56, -8
1168.Lout_align:		! fshiftorx parameters for right shift toward %rs2
1169	.byte	0, 0,  0, 64,	0,  0, 64,   0
1170	.byte	0, 0,  8, 56,	0,  8, 56,  -8
1171	.byte	0, 0, 16, 48,	0, 16, 48, -16
1172	.byte	0, 0, 24, 40,	0, 24, 40, -24
1173	.byte	0, 0, 32, 32,	0, 32, 32, -32
1174	.byte	0, 0, 40, 24,	0, 40, 24, -40
1175	.byte	0, 0, 48, 16,	0, 48, 16, -48
1176	.byte	0, 0, 56,  8,	0, 56,  8, -56
1177.Lone:
1178	.word	0, 1
1179.asciz	"AES for Fujitsu SPARC64 X, CRYPTOGAMS by <appro@openssl.org>"
1180.align	4
1181