1#ifndef __ASSEMBLER__
2# define __ASSEMBLER__ 1
3#endif
4#include "crypto/sparc_arch.h"
5
6#ifdef	__arch64__
7.register	%g2,#scratch
8.register	%g3,#scratch
9#endif
10
11.text
12
13.globl	aes_t4_encrypt
14.align	32
15aes_t4_encrypt:
16	andcc		%o0, 7, %g1		! is input aligned?
17	andn		%o0, 7, %o0
18
19	ldx		[%o2 + 0], %g4
20	ldx		[%o2 + 8], %g5
21
22	ldx		[%o0 + 0], %o4
23	bz,pt		%icc, 1f
24	ldx		[%o0 + 8], %o5
25	ldx		[%o0 + 16], %o0
26	sll		%g1, 3, %g1
27	sub		%g0, %g1, %o3
28	sllx		%o4, %g1, %o4
29	sllx		%o5, %g1, %g1
30	srlx		%o5, %o3, %o5
31	srlx		%o0, %o3, %o3
32	or		%o5, %o4, %o4
33	or		%o3, %g1, %o5
341:
35	ld		[%o2 + 240], %o3
36	ldd		[%o2 + 16], %f12
37	ldd		[%o2 + 24], %f14
38	xor		%g4, %o4, %o4
39	xor		%g5, %o5, %o5
40	.word	0x81b0230c !movxtod	%o4,%f0
41	.word	0x85b0230d !movxtod	%o5,%f2
42	srl		%o3, 1, %o3
43	ldd		[%o2 + 32], %f16
44	sub		%o3, 1, %o3
45	ldd		[%o2 + 40], %f18
46	add		%o2, 48, %o2
47
48.Lenc:
49	.word	0x88cb0400 !aes_eround01	%f12,%f0,%f2,%f4
50	.word	0x84cb8420 !aes_eround23	%f14,%f0,%f2,%f2
51	ldd		[%o2 + 0], %f12
52	ldd		[%o2 + 8], %f14
53	sub		%o3,1,%o3
54	.word	0x80cc0404 !aes_eround01	%f16,%f4,%f2,%f0
55	.word	0x84cc8424 !aes_eround23	%f18,%f4,%f2,%f2
56	ldd		[%o2 + 16], %f16
57	ldd		[%o2 + 24], %f18
58	brnz,pt		%o3, .Lenc
59	add		%o2, 32, %o2
60
61	andcc		%o1, 7, %o4		! is output aligned?
62	.word	0x88cb0400 !aes_eround01	%f12,%f0,%f2,%f4
63	.word	0x84cb8420 !aes_eround23	%f14,%f0,%f2,%f2
64	.word	0x80cc0484 !aes_eround01_l	%f16,%f4,%f2,%f0
65	.word	0x84cc84a4 !aes_eround23_l	%f18,%f4,%f2,%f2
66
67	bnz,pn		%icc, 2f
68	nop
69
70	std		%f0, [%o1 + 0]
71	retl
72	std		%f2, [%o1 + 8]
73
742:	.word	0x93b24340 !alignaddrl	%o1,%g0,%o1
75	mov		0xff, %o5
76	srl		%o5, %o4, %o5
77
78	.word	0x89b00900 !faligndata	%f0,%f0,%f4
79	.word	0x8db00902 !faligndata	%f0,%f2,%f6
80	.word	0x91b08902 !faligndata	%f2,%f2,%f8
81
82	stda		%f4, [%o1 + %o5]0xc0	! partial store
83	std		%f6, [%o1 + 8]
84	add		%o1, 16, %o1
85	orn		%g0, %o5, %o5
86	retl
87	stda		%f8, [%o1 + %o5]0xc0	! partial store
88.type	aes_t4_encrypt,#function
89.size	aes_t4_encrypt,.-aes_t4_encrypt
90
91.globl	aes_t4_decrypt
92.align	32
93aes_t4_decrypt:
94	andcc		%o0, 7, %g1		! is input aligned?
95	andn		%o0, 7, %o0
96
97	ldx		[%o2 + 0], %g4
98	ldx		[%o2 + 8], %g5
99
100	ldx		[%o0 + 0], %o4
101	bz,pt		%icc, 1f
102	ldx		[%o0 + 8], %o5
103	ldx		[%o0 + 16], %o0
104	sll		%g1, 3, %g1
105	sub		%g0, %g1, %o3
106	sllx		%o4, %g1, %o4
107	sllx		%o5, %g1, %g1
108	srlx		%o5, %o3, %o5
109	srlx		%o0, %o3, %o3
110	or		%o5, %o4, %o4
111	or		%o3, %g1, %o5
1121:
113	ld		[%o2 + 240], %o3
114	ldd		[%o2 + 16], %f12
115	ldd		[%o2 + 24], %f14
116	xor		%g4, %o4, %o4
117	xor		%g5, %o5, %o5
118	.word	0x81b0230c !movxtod	%o4,%f0
119	.word	0x85b0230d !movxtod	%o5,%f2
120	srl		%o3, 1, %o3
121	ldd		[%o2 + 32], %f16
122	sub		%o3, 1, %o3
123	ldd		[%o2 + 40], %f18
124	add		%o2, 48, %o2
125
126.Ldec:
127	.word	0x88cb0440 !aes_dround01	%f12,%f0,%f2,%f4
128	.word	0x84cb8460 !aes_dround23	%f14,%f0,%f2,%f2
129	ldd		[%o2 + 0], %f12
130	ldd		[%o2 + 8], %f14
131	sub		%o3,1,%o3
132	.word	0x80cc0444 !aes_dround01	%f16,%f4,%f2,%f0
133	.word	0x84cc8464 !aes_dround23	%f18,%f4,%f2,%f2
134	ldd		[%o2 + 16], %f16
135	ldd		[%o2 + 24], %f18
136	brnz,pt		%o3, .Ldec
137	add		%o2, 32, %o2
138
139	andcc		%o1, 7, %o4		! is output aligned?
140	.word	0x88cb0440 !aes_dround01	%f12,%f0,%f2,%f4
141	.word	0x84cb8460 !aes_dround23	%f14,%f0,%f2,%f2
142	.word	0x80cc04c4 !aes_dround01_l	%f16,%f4,%f2,%f0
143	.word	0x84cc84e4 !aes_dround23_l	%f18,%f4,%f2,%f2
144
145	bnz,pn		%icc, 2f
146	nop
147
148	std		%f0, [%o1 + 0]
149	retl
150	std		%f2, [%o1 + 8]
151
1522:	.word	0x93b24340 !alignaddrl	%o1,%g0,%o1
153	mov		0xff, %o5
154	srl		%o5, %o4, %o5
155
156	.word	0x89b00900 !faligndata	%f0,%f0,%f4
157	.word	0x8db00902 !faligndata	%f0,%f2,%f6
158	.word	0x91b08902 !faligndata	%f2,%f2,%f8
159
160	stda		%f4, [%o1 + %o5]0xc0	! partial store
161	std		%f6, [%o1 + 8]
162	add		%o1, 16, %o1
163	orn		%g0, %o5, %o5
164	retl
165	stda		%f8, [%o1 + %o5]0xc0	! partial store
166.type	aes_t4_decrypt,#function
167.size	aes_t4_decrypt,.-aes_t4_decrypt
168.globl	aes_t4_set_encrypt_key
169.align	32
170aes_t4_set_encrypt_key:
171.Lset_encrypt_key:
172	and		%o0, 7, %o3
173	.word	0x91b20300 !alignaddr	%o0,%g0,%o0
174	cmp		%o1, 192
175	ldd		[%o0 + 0], %f0
176	bl,pt		%icc,.L128
177	ldd		[%o0 + 8], %f2
178
179	be,pt		%icc,.L192
180	ldd		[%o0 + 16], %f4
181	brz,pt		%o3, .L256aligned
182	ldd		[%o0 + 24], %f6
183
184	ldd		[%o0 + 32], %f8
185	.word	0x81b00902 !faligndata	%f0,%f2,%f0
186	.word	0x85b08904 !faligndata	%f2,%f4,%f2
187	.word	0x89b10906 !faligndata	%f4,%f6,%f4
188	.word	0x8db18908 !faligndata	%f6,%f8,%f6
189.L256aligned:
190	std		%f0, [%o2 + 0]
191	.word	0x80c80106 !aes_kexpand1	%f0,%f6,0,%f0
192	std		%f2, [%o2 + 8]
193	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
194	std		%f4, [%o2 + 16]
195	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
196	std		%f6, [%o2 + 24]
197	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
198	std		%f0, [%o2 + 32]
199	.word	0x80c80306 !aes_kexpand1	%f0,%f6,1,%f0
200	std		%f2, [%o2 + 40]
201	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
202	std		%f4, [%o2 + 48]
203	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
204	std		%f6, [%o2 + 56]
205	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
206	std		%f0, [%o2 + 64]
207	.word	0x80c80506 !aes_kexpand1	%f0,%f6,2,%f0
208	std		%f2, [%o2 + 72]
209	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
210	std		%f4, [%o2 + 80]
211	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
212	std		%f6, [%o2 + 88]
213	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
214	std		%f0, [%o2 + 96]
215	.word	0x80c80706 !aes_kexpand1	%f0,%f6,3,%f0
216	std		%f2, [%o2 + 104]
217	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
218	std		%f4, [%o2 + 112]
219	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
220	std		%f6, [%o2 + 120]
221	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
222	std		%f0, [%o2 + 128]
223	.word	0x80c80906 !aes_kexpand1	%f0,%f6,4,%f0
224	std		%f2, [%o2 + 136]
225	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
226	std		%f4, [%o2 + 144]
227	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
228	std		%f6, [%o2 + 152]
229	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
230	std		%f0, [%o2 + 160]
231	.word	0x80c80b06 !aes_kexpand1	%f0,%f6,5,%f0
232	std		%f2, [%o2 + 168]
233	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
234	std		%f4, [%o2 + 176]
235	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
236	std		%f6, [%o2 + 184]
237	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
238	std		%f0, [%o2 + 192]
239	.word	0x80c80d06 !aes_kexpand1	%f0,%f6,6,%f0
240	std		%f2, [%o2 + 200]
241	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
242	std		%f4, [%o2 + 208]
243	std		%f6, [%o2 + 216]
244	std		%f0, [%o2 + 224]
245	std		%f2, [%o2 + 232]
246
247	mov		14, %o3
248	st		%o3, [%o2 + 240]
249	retl
250	xor		%o0, %o0, %o0
251
252.align	16
253.L192:
254	brz,pt		%o3, .L192aligned
255	nop
256
257	ldd		[%o0 + 24], %f6
258	.word	0x81b00902 !faligndata	%f0,%f2,%f0
259	.word	0x85b08904 !faligndata	%f2,%f4,%f2
260	.word	0x89b10906 !faligndata	%f4,%f6,%f4
261.L192aligned:
262	std		%f0, [%o2 + 0]
263	.word	0x80c80104 !aes_kexpand1	%f0,%f4,0,%f0
264	std		%f2, [%o2 + 8]
265	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
266	std		%f4, [%o2 + 16]
267	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
268	std		%f0, [%o2 + 24]
269	.word	0x80c80304 !aes_kexpand1	%f0,%f4,1,%f0
270	std		%f2, [%o2 + 32]
271	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
272	std		%f4, [%o2 + 40]
273	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
274	std		%f0, [%o2 + 48]
275	.word	0x80c80504 !aes_kexpand1	%f0,%f4,2,%f0
276	std		%f2, [%o2 + 56]
277	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
278	std		%f4, [%o2 + 64]
279	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
280	std		%f0, [%o2 + 72]
281	.word	0x80c80704 !aes_kexpand1	%f0,%f4,3,%f0
282	std		%f2, [%o2 + 80]
283	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
284	std		%f4, [%o2 + 88]
285	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
286	std		%f0, [%o2 + 96]
287	.word	0x80c80904 !aes_kexpand1	%f0,%f4,4,%f0
288	std		%f2, [%o2 + 104]
289	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
290	std		%f4, [%o2 + 112]
291	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
292	std		%f0, [%o2 + 120]
293	.word	0x80c80b04 !aes_kexpand1	%f0,%f4,5,%f0
294	std		%f2, [%o2 + 128]
295	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
296	std		%f4, [%o2 + 136]
297	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
298	std		%f0, [%o2 + 144]
299	.word	0x80c80d04 !aes_kexpand1	%f0,%f4,6,%f0
300	std		%f2, [%o2 + 152]
301	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
302	std		%f4, [%o2 + 160]
303	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
304	std		%f0, [%o2 + 168]
305	.word	0x80c80f04 !aes_kexpand1	%f0,%f4,7,%f0
306	std		%f2, [%o2 + 176]
307	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
308	std		%f4, [%o2 + 184]
309	std		%f0, [%o2 + 192]
310	std		%f2, [%o2 + 200]
311
312	mov		12, %o3
313	st		%o3, [%o2 + 240]
314	retl
315	xor		%o0, %o0, %o0
316
317.align	16
318.L128:
319	brz,pt		%o3, .L128aligned
320	nop
321
322	ldd		[%o0 + 16], %f4
323	.word	0x81b00902 !faligndata	%f0,%f2,%f0
324	.word	0x85b08904 !faligndata	%f2,%f4,%f2
325.L128aligned:
326	std		%f0, [%o2 + 0]
327	.word	0x80c80102 !aes_kexpand1	%f0,%f2,0,%f0
328	std		%f2, [%o2 + 8]
329	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
330	std		%f0, [%o2 + 16]
331	.word	0x80c80302 !aes_kexpand1	%f0,%f2,1,%f0
332	std		%f2, [%o2 + 24]
333	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
334	std		%f0, [%o2 + 32]
335	.word	0x80c80502 !aes_kexpand1	%f0,%f2,2,%f0
336	std		%f2, [%o2 + 40]
337	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
338	std		%f0, [%o2 + 48]
339	.word	0x80c80702 !aes_kexpand1	%f0,%f2,3,%f0
340	std		%f2, [%o2 + 56]
341	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
342	std		%f0, [%o2 + 64]
343	.word	0x80c80902 !aes_kexpand1	%f0,%f2,4,%f0
344	std		%f2, [%o2 + 72]
345	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
346	std		%f0, [%o2 + 80]
347	.word	0x80c80b02 !aes_kexpand1	%f0,%f2,5,%f0
348	std		%f2, [%o2 + 88]
349	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
350	std		%f0, [%o2 + 96]
351	.word	0x80c80d02 !aes_kexpand1	%f0,%f2,6,%f0
352	std		%f2, [%o2 + 104]
353	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
354	std		%f0, [%o2 + 112]
355	.word	0x80c80f02 !aes_kexpand1	%f0,%f2,7,%f0
356	std		%f2, [%o2 + 120]
357	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
358	std		%f0, [%o2 + 128]
359	.word	0x80c81102 !aes_kexpand1	%f0,%f2,8,%f0
360	std		%f2, [%o2 + 136]
361	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
362	std		%f0, [%o2 + 144]
363	.word	0x80c81302 !aes_kexpand1	%f0,%f2,9,%f0
364	std		%f2, [%o2 + 152]
365	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
366	std		%f0, [%o2 + 160]
367	std		%f2, [%o2 + 168]
368
369	mov		10, %o3
370	st		%o3, [%o2 + 240]
371	retl
372	xor		%o0, %o0, %o0
373.type	aes_t4_set_encrypt_key,#function
374.size	aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key
375
376.globl	aes_t4_set_decrypt_key
377.align	32
378aes_t4_set_decrypt_key:
379	mov		%o7, %o5
380	call		.Lset_encrypt_key
381	nop
382
383	mov		%o5, %o7
384	sll		%o3, 4, %o0		! %o3 is number of rounds
385	add		%o3, 2, %o3
386	add		%o2, %o0, %o0	! %o0=%o2+16*rounds
387	srl		%o3, 2, %o3		! %o3=(rounds+2)/4
388
389.Lkey_flip:
390	ldd		[%o2 + 0],  %f0
391	ldd		[%o2 + 8],  %f2
392	ldd		[%o2 + 16], %f4
393	ldd		[%o2 + 24], %f6
394	ldd		[%o0 + 0],  %f8
395	ldd		[%o0 + 8],  %f10
396	ldd		[%o0 - 16], %f12
397	ldd		[%o0 - 8],  %f14
398	sub		%o3, 1, %o3
399	std		%f0, [%o0 + 0]
400	std		%f2, [%o0 + 8]
401	std		%f4, [%o0 - 16]
402	std		%f6, [%o0 - 8]
403	std		%f8, [%o2 + 0]
404	std		%f10, [%o2 + 8]
405	std		%f12, [%o2 + 16]
406	std		%f14, [%o2 + 24]
407	add		%o2, 32, %o2
408	brnz		%o3, .Lkey_flip
409	sub		%o0, 32, %o0
410
411	retl
412	xor		%o0, %o0, %o0
413.type	aes_t4_set_decrypt_key,#function
414.size	aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key
415.align	32
416_aes128_encrypt_1x:
417	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
418	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
419	.word	0x80cd0404 !aes_eround01	%f20,%f4,%f2,%f0
420	.word	0x84cd8424 !aes_eround23	%f22,%f4,%f2,%f2
421	.word	0x88ce0400 !aes_eround01	%f24,%f0,%f2,%f4
422	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
423	.word	0x80cf0404 !aes_eround01	%f28,%f4,%f2,%f0
424	.word	0x84cf8424 !aes_eround23	%f30,%f4,%f2,%f2
425	.word	0x88c84400 !aes_eround01	%f32,%f0,%f2,%f4
426	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
427	.word	0x80c94404 !aes_eround01	%f36,%f4,%f2,%f0
428	.word	0x84c9c424 !aes_eround23	%f38,%f4,%f2,%f2
429	.word	0x88ca4400 !aes_eround01	%f40,%f0,%f2,%f4
430	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
431	.word	0x80cb4404 !aes_eround01	%f44,%f4,%f2,%f0
432	.word	0x84cbc424 !aes_eround23	%f46,%f4,%f2,%f2
433	.word	0x88cc4400 !aes_eround01	%f48,%f0,%f2,%f4
434	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
435	.word	0x80cd4484 !aes_eround01_l	%f52,%f4,%f2,%f0
436	retl
437	.word	0x84cdc4a4 !aes_eround23_l	%f54,%f4,%f2,%f2
438.type	_aes128_encrypt_1x,#function
439.size	_aes128_encrypt_1x,.-_aes128_encrypt_1x
440
441.align	32
442_aes128_encrypt_2x:
443	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
444	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
445	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
446	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
447	.word	0x80cd0408 !aes_eround01	%f20,%f8,%f2,%f0
448	.word	0x84cd8428 !aes_eround23	%f22,%f8,%f2,%f2
449	.word	0x88cd0c0a !aes_eround01	%f20,%f10,%f6,%f4
450	.word	0x8ccd8c2a !aes_eround23	%f22,%f10,%f6,%f6
451	.word	0x90ce0400 !aes_eround01	%f24,%f0,%f2,%f8
452	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
453	.word	0x94ce0c04 !aes_eround01	%f24,%f4,%f6,%f10
454	.word	0x8cce8c24 !aes_eround23	%f26,%f4,%f6,%f6
455	.word	0x80cf0408 !aes_eround01	%f28,%f8,%f2,%f0
456	.word	0x84cf8428 !aes_eround23	%f30,%f8,%f2,%f2
457	.word	0x88cf0c0a !aes_eround01	%f28,%f10,%f6,%f4
458	.word	0x8ccf8c2a !aes_eround23	%f30,%f10,%f6,%f6
459	.word	0x90c84400 !aes_eround01	%f32,%f0,%f2,%f8
460	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
461	.word	0x94c84c04 !aes_eround01	%f32,%f4,%f6,%f10
462	.word	0x8cc8cc24 !aes_eround23	%f34,%f4,%f6,%f6
463	.word	0x80c94408 !aes_eround01	%f36,%f8,%f2,%f0
464	.word	0x84c9c428 !aes_eround23	%f38,%f8,%f2,%f2
465	.word	0x88c94c0a !aes_eround01	%f36,%f10,%f6,%f4
466	.word	0x8cc9cc2a !aes_eround23	%f38,%f10,%f6,%f6
467	.word	0x90ca4400 !aes_eround01	%f40,%f0,%f2,%f8
468	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
469	.word	0x94ca4c04 !aes_eround01	%f40,%f4,%f6,%f10
470	.word	0x8ccacc24 !aes_eround23	%f42,%f4,%f6,%f6
471	.word	0x80cb4408 !aes_eround01	%f44,%f8,%f2,%f0
472	.word	0x84cbc428 !aes_eround23	%f46,%f8,%f2,%f2
473	.word	0x88cb4c0a !aes_eround01	%f44,%f10,%f6,%f4
474	.word	0x8ccbcc2a !aes_eround23	%f46,%f10,%f6,%f6
475	.word	0x90cc4400 !aes_eround01	%f48,%f0,%f2,%f8
476	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
477	.word	0x94cc4c04 !aes_eround01	%f48,%f4,%f6,%f10
478	.word	0x8ccccc24 !aes_eround23	%f50,%f4,%f6,%f6
479	.word	0x80cd4488 !aes_eround01_l	%f52,%f8,%f2,%f0
480	.word	0x84cdc4a8 !aes_eround23_l	%f54,%f8,%f2,%f2
481	.word	0x88cd4c8a !aes_eround01_l	%f52,%f10,%f6,%f4
482	retl
483	.word	0x8ccdccaa !aes_eround23_l	%f54,%f10,%f6,%f6
484.type	_aes128_encrypt_2x,#function
485.size	_aes128_encrypt_2x,.-_aes128_encrypt_2x
486
487.align	32
488_aes128_loadkey:
489	ldx		[%i3 + 0], %g4
490	ldx		[%i3 + 8], %g5
491	ldd		[%i3 + 16], %f16
492	ldd		[%i3 + 24], %f18
493	ldd		[%i3 + 32], %f20
494	ldd		[%i3 + 40], %f22
495	ldd		[%i3 + 48], %f24
496	ldd		[%i3 + 56], %f26
497	ldd		[%i3 + 64], %f28
498	ldd		[%i3 + 72], %f30
499	ldd		[%i3 + 80], %f32
500	ldd		[%i3 + 88], %f34
501	ldd		[%i3 + 96], %f36
502	ldd		[%i3 + 104], %f38
503	ldd		[%i3 + 112], %f40
504	ldd		[%i3 + 120], %f42
505	ldd		[%i3 + 128], %f44
506	ldd		[%i3 + 136], %f46
507	ldd		[%i3 + 144], %f48
508	ldd		[%i3 + 152], %f50
509	ldd		[%i3 + 160], %f52
510	ldd		[%i3 + 168], %f54
511	retl
512	nop
513.type	_aes128_loadkey,#function
514.size	_aes128_loadkey,.-_aes128_loadkey
515_aes128_load_enckey=_aes128_loadkey
516_aes128_load_deckey=_aes128_loadkey
517
518.globl	aes128_t4_cbc_encrypt
519.align	32
520aes128_t4_cbc_encrypt:
521	save		%sp, -STACK_FRAME, %sp
522	cmp		%i2, 0
523	be,pn		SIZE_T_CC, .L128_cbc_enc_abort
524	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
525	sub		%i0, %i1, %l5	! %i0!=%i1
526	ld		[%i4 + 0], %f0
527	ld		[%i4 + 4], %f1
528	ld		[%i4 + 8], %f2
529	ld		[%i4 + 12], %f3
530	prefetch	[%i0], 20
531	prefetch	[%i0 + 63], 20
532	call		_aes128_load_enckey
533	and		%i0, 7, %l0
534	andn		%i0, 7, %i0
535	sll		%l0, 3, %l0
536	mov		64, %l1
537	mov		0xff, %l3
538	sub		%l1, %l0, %l1
539	and		%i1, 7, %l2
540	cmp		%i2, 127
541	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
542	movleu		SIZE_T_CC, 0, %l5	!	%i2<128 ||
543	brnz,pn		%l5, .L128cbc_enc_blk	!	%i0==%i1)
544	srl		%l3, %l2, %l3
545
546	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
547	srlx		%i2, 4, %i2
548	prefetch	[%i1], 22
549
550.L128_cbc_enc_loop:
551	ldx		[%i0 + 0], %o0
552	brz,pt		%l0, 4f
553	ldx		[%i0 + 8], %o1
554
555	ldx		[%i0 + 16], %o2
556	sllx		%o0, %l0, %o0
557	srlx		%o1, %l1, %g1
558	sllx		%o1, %l0, %o1
559	or		%g1, %o0, %o0
560	srlx		%o2, %l1, %o2
561	or		%o2, %o1, %o1
5624:
563	xor		%g4, %o0, %o0		! ^= rk[0]
564	xor		%g5, %o1, %o1
565	.word	0x99b02308 !movxtod	%o0,%f12
566	.word	0x9db02309 !movxtod	%o1,%f14
567
568	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
569	.word	0x85b38d82 !fxor	%f14,%f2,%f2
570	prefetch	[%i1 + 63], 22
571	prefetch	[%i0 + 16+63], 20
572	call		_aes128_encrypt_1x
573	add		%i0, 16, %i0
574
575	brnz,pn		%l2, 2f
576	sub		%i2, 1, %i2
577
578	std		%f0, [%i1 + 0]
579	std		%f2, [%i1 + 8]
580	brnz,pt		%i2, .L128_cbc_enc_loop
581	add		%i1, 16, %i1
582	st		%f0, [%i4 + 0]
583	st		%f1, [%i4 + 4]
584	st		%f2, [%i4 + 8]
585	st		%f3, [%i4 + 12]
586.L128_cbc_enc_abort:
587	ret
588	restore
589
590.align	16
5912:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
592						! and ~3x deterioration
593						! in inp==out case
594	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
595	.word	0x8db00902 !faligndata	%f0,%f2,%f6
596	.word	0x91b08902 !faligndata	%f2,%f2,%f8
597
598	stda		%f4, [%i1 + %l3]0xc0	! partial store
599	std		%f6, [%i1 + 8]
600	add		%i1, 16, %i1
601	orn		%g0, %l3, %l3
602	stda		%f8, [%i1 + %l3]0xc0	! partial store
603
604	brnz,pt		%i2, .L128_cbc_enc_loop+4
605	orn		%g0, %l3, %l3
606	st		%f0, [%i4 + 0]
607	st		%f1, [%i4 + 4]
608	st		%f2, [%i4 + 8]
609	st		%f3, [%i4 + 12]
610	ret
611	restore
612
613!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
614.align	32
615.L128cbc_enc_blk:
616	add	%i1, %i2, %l5
617	and	%l5, 63, %l5	! tail
618	sub	%i2, %l5, %i2
619	add	%l5, 15, %l5	! round up to 16n
620	srlx	%i2, 4, %i2
621	srl	%l5, 4, %l5
622
623.L128_cbc_enc_blk_loop:
624	ldx		[%i0 + 0], %o0
625	brz,pt		%l0, 5f
626	ldx		[%i0 + 8], %o1
627
628	ldx		[%i0 + 16], %o2
629	sllx		%o0, %l0, %o0
630	srlx		%o1, %l1, %g1
631	sllx		%o1, %l0, %o1
632	or		%g1, %o0, %o0
633	srlx		%o2, %l1, %o2
634	or		%o2, %o1, %o1
6355:
636	xor		%g4, %o0, %o0		! ^= rk[0]
637	xor		%g5, %o1, %o1
638	.word	0x99b02308 !movxtod	%o0,%f12
639	.word	0x9db02309 !movxtod	%o1,%f14
640
641	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
642	.word	0x85b38d82 !fxor	%f14,%f2,%f2
643	prefetch	[%i0 + 16+63], 20
644	call		_aes128_encrypt_1x
645	add		%i0, 16, %i0
646	sub		%i2, 1, %i2
647
648	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
649	add		%i1, 8, %i1
650	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
651	brnz,pt		%i2, .L128_cbc_enc_blk_loop
652	add		%i1, 8, %i1
653
654	membar		#StoreLoad|#StoreStore
655	brnz,pt		%l5, .L128_cbc_enc_loop
656	mov		%l5, %i2
657	st		%f0, [%i4 + 0]
658	st		%f1, [%i4 + 4]
659	st		%f2, [%i4 + 8]
660	st		%f3, [%i4 + 12]
661	ret
662	restore
663.type	aes128_t4_cbc_encrypt,#function
664.size	aes128_t4_cbc_encrypt,.-aes128_t4_cbc_encrypt
665.globl	aes128_t4_ctr32_encrypt
666.align	32
667aes128_t4_ctr32_encrypt:
668	save		%sp, -STACK_FRAME, %sp
669	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
670
671	prefetch	[%i0], 20
672	prefetch	[%i0 + 63], 20
673	call		_aes128_load_enckey
674	sllx		%i2, 4, %i2
675
676	ld		[%i4 + 0], %l4	! counter
677	ld		[%i4 + 4], %l5
678	ld		[%i4 + 8], %l6
679	ld		[%i4 + 12], %l7
680
681	sllx		%l4, 32, %o5
682	or		%l5, %o5, %o5
683	sllx		%l6, 32, %g1
684	xor		%o5, %g4, %g4		! ^= rk[0]
685	xor		%g1, %g5, %g5
686	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
687
688	sub		%i0, %i1, %l5	! %i0!=%i1
689	and		%i0, 7, %l0
690	andn		%i0, 7, %i0
691	sll		%l0, 3, %l0
692	mov		64, %l1
693	mov		0xff, %l3
694	sub		%l1, %l0, %l1
695	and		%i1, 7, %l2
696	cmp		%i2, 255
697	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
698	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
699	brnz,pn		%l5, .L128_ctr32_blk	!	%i0==%i1)
700	srl		%l3, %l2, %l3
701
702	andcc		%i2, 16, %g0		! is number of blocks even?
703	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
704	bz		%icc, .L128_ctr32_loop2x
705	srlx		%i2, 4, %i2
706.L128_ctr32_loop:
707	ldx		[%i0 + 0], %o0
708	brz,pt		%l0, 4f
709	ldx		[%i0 + 8], %o1
710
711	ldx		[%i0 + 16], %o2
712	sllx		%o0, %l0, %o0
713	srlx		%o1, %l1, %g1
714	sllx		%o1, %l0, %o1
715	or		%g1, %o0, %o0
716	srlx		%o2, %l1, %o2
717	or		%o2, %o1, %o1
7184:
719	xor		%g5, %l7, %g1		! ^= rk[0]
720	add		%l7, 1, %l7
721	.word	0x85b02301 !movxtod	%g1,%f2
722	srl		%l7, 0, %l7		! clruw
723	prefetch	[%i1 + 63], 22
724	prefetch	[%i0 + 16+63], 20
725	.word	0x88cc040e !aes_eround01	%f16,%f14,%f2,%f4
726	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
727	call		_aes128_encrypt_1x+8
728	add		%i0, 16, %i0
729
730	.word	0x95b02308 !movxtod	%o0,%f10
731	.word	0x99b02309 !movxtod	%o1,%f12
732	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
733	.word	0x85b30d82 !fxor	%f12,%f2,%f2
734
735	brnz,pn		%l2, 2f
736	sub		%i2, 1, %i2
737
738	std		%f0, [%i1 + 0]
739	std		%f2, [%i1 + 8]
740	brnz,pt		%i2, .L128_ctr32_loop2x
741	add		%i1, 16, %i1
742
743	ret
744	restore
745
746.align	16
7472:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
748						! and ~3x deterioration
749						! in inp==out case
750	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
751	.word	0x8db00902 !faligndata	%f0,%f2,%f6
752	.word	0x91b08902 !faligndata	%f2,%f2,%f8
753	stda		%f4, [%i1 + %l3]0xc0	! partial store
754	std		%f6, [%i1 + 8]
755	add		%i1, 16, %i1
756	orn		%g0, %l3, %l3
757	stda		%f8, [%i1 + %l3]0xc0	! partial store
758
759	brnz,pt		%i2, .L128_ctr32_loop2x+4
760	orn		%g0, %l3, %l3
761
762	ret
763	restore
764
765!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
766.align	32
767.L128_ctr32_loop2x:
768	ldx		[%i0 + 0], %o0
769	ldx		[%i0 + 8], %o1
770	ldx		[%i0 + 16], %o2
771	brz,pt		%l0, 4f
772	ldx		[%i0 + 24], %o3
773
774	ldx		[%i0 + 32], %o4
775	sllx		%o0, %l0, %o0
776	srlx		%o1, %l1, %g1
777	or		%g1, %o0, %o0
778	sllx		%o1, %l0, %o1
779	srlx		%o2, %l1, %g1
780	or		%g1, %o1, %o1
781	sllx		%o2, %l0, %o2
782	srlx		%o3, %l1, %g1
783	or		%g1, %o2, %o2
784	sllx		%o3, %l0, %o3
785	srlx		%o4, %l1, %o4
786	or		%o4, %o3, %o3
7874:
788	xor		%g5, %l7, %g1		! ^= rk[0]
789	add		%l7, 1, %l7
790	.word	0x85b02301 !movxtod	%g1,%f2
791	srl		%l7, 0, %l7		! clruw
792	xor		%g5, %l7, %g1
793	add		%l7, 1, %l7
794	.word	0x8db02301 !movxtod	%g1,%f6
795	srl		%l7, 0, %l7		! clruw
796	prefetch	[%i1 + 63], 22
797	prefetch	[%i0 + 32+63], 20
798	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
799	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
800	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
801	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
802	call		_aes128_encrypt_2x+16
803	add		%i0, 32, %i0
804
805	.word	0x91b02308 !movxtod	%o0,%f8
806	.word	0x95b02309 !movxtod	%o1,%f10
807	.word	0x99b0230a !movxtod	%o2,%f12
808	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
809	.word	0x91b0230b !movxtod	%o3,%f8
810	.word	0x85b28d82 !fxor	%f10,%f2,%f2
811	.word	0x89b30d84 !fxor	%f12,%f4,%f4
812	.word	0x8db20d86 !fxor	%f8,%f6,%f6
813
814	brnz,pn		%l2, 2f
815	sub		%i2, 2, %i2
816
817	std		%f0, [%i1 + 0]
818	std		%f2, [%i1 + 8]
819	std		%f4, [%i1 + 16]
820	std		%f6, [%i1 + 24]
821	brnz,pt		%i2, .L128_ctr32_loop2x
822	add		%i1, 32, %i1
823
824	ret
825	restore
826
827.align	16
8282:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
829						! and ~3x deterioration
830						! in inp==out case
831	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
832	.word	0x81b00902 !faligndata	%f0,%f2,%f0
833	.word	0x85b08904 !faligndata	%f2,%f4,%f2
834	.word	0x89b10906 !faligndata	%f4,%f6,%f4
835	.word	0x8db18906 !faligndata	%f6,%f6,%f6
836
837	stda		%f8, [%i1 + %l3]0xc0	! partial store
838	std		%f0, [%i1 + 8]
839	std		%f2, [%i1 + 16]
840	std		%f4, [%i1 + 24]
841	add		%i1, 32, %i1
842	orn		%g0, %l3, %l3
843	stda		%f6, [%i1 + %l3]0xc0	! partial store
844
845	brnz,pt		%i2, .L128_ctr32_loop2x+4
846	orn		%g0, %l3, %l3
847
848	ret
849	restore
850
851!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
852.align	32
853.L128_ctr32_blk:
854	add	%i1, %i2, %l5
855	and	%l5, 63, %l5	! tail
856	sub	%i2, %l5, %i2
857	add	%l5, 15, %l5	! round up to 16n
858	srlx	%i2, 4, %i2
859	srl	%l5, 4, %l5
860	sub	%i2, 1, %i2
861	add	%l5, 1, %l5
862
863.L128_ctr32_blk_loop2x:
864	ldx		[%i0 + 0], %o0
865	ldx		[%i0 + 8], %o1
866	ldx		[%i0 + 16], %o2
867	brz,pt		%l0, 5f
868	ldx		[%i0 + 24], %o3
869
870	ldx		[%i0 + 32], %o4
871	sllx		%o0, %l0, %o0
872	srlx		%o1, %l1, %g1
873	or		%g1, %o0, %o0
874	sllx		%o1, %l0, %o1
875	srlx		%o2, %l1, %g1
876	or		%g1, %o1, %o1
877	sllx		%o2, %l0, %o2
878	srlx		%o3, %l1, %g1
879	or		%g1, %o2, %o2
880	sllx		%o3, %l0, %o3
881	srlx		%o4, %l1, %o4
882	or		%o4, %o3, %o3
8835:
884	xor		%g5, %l7, %g1		! ^= rk[0]
885	add		%l7, 1, %l7
886	.word	0x85b02301 !movxtod	%g1,%f2
887	srl		%l7, 0, %l7		! clruw
888	xor		%g5, %l7, %g1
889	add		%l7, 1, %l7
890	.word	0x8db02301 !movxtod	%g1,%f6
891	srl		%l7, 0, %l7		! clruw
892	prefetch	[%i0 + 32+63], 20
893	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
894	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
895	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
896	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
897	call		_aes128_encrypt_2x+16
898	add		%i0, 32, %i0
899	subcc		%i2, 2, %i2
900
901	.word	0x91b02308 !movxtod	%o0,%f8
902	.word	0x95b02309 !movxtod	%o1,%f10
903	.word	0x99b0230a !movxtod	%o2,%f12
904	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
905	.word	0x91b0230b !movxtod	%o3,%f8
906	.word	0x85b28d82 !fxor	%f10,%f2,%f2
907	.word	0x89b30d84 !fxor	%f12,%f4,%f4
908	.word	0x8db20d86 !fxor	%f8,%f6,%f6
909
910	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
911	add		%i1, 8, %i1
912	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
913	add		%i1, 8, %i1
914	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
915	add		%i1, 8, %i1
916	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
917	bgu,pt		SIZE_T_CC, .L128_ctr32_blk_loop2x
918	add		%i1, 8, %i1
919
920	add		%l5, %i2, %i2
921	andcc		%i2, 1, %g0		! is number of blocks even?
922	membar		#StoreLoad|#StoreStore
923	bnz,pt		%icc, .L128_ctr32_loop
924	srl		%i2, 0, %i2
925	brnz,pn		%i2, .L128_ctr32_loop2x
926	nop
927
928	ret
929	restore
930.type	aes128_t4_ctr32_encrypt,#function
931.size	aes128_t4_ctr32_encrypt,.-aes128_t4_ctr32_encrypt
932.globl	aes128_t4_xts_encrypt
933.align	32
934aes128_t4_xts_encrypt:
935	save		%sp, -STACK_FRAME-16, %sp
936	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
937
938	mov		%i5, %o0
939	add		%fp, STACK_BIAS-16, %o1
940	call		aes_t4_encrypt
941	mov		%i4, %o2
942
943	add		%fp, STACK_BIAS-16, %l7
944	ldxa		[%l7]0x88, %g2
945	add		%fp, STACK_BIAS-8, %l7
946	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
947
948	sethi		%hi(0x76543210), %l7
949	or		%l7, %lo(0x76543210), %l7
950	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
951
952	prefetch	[%i0], 20
953	prefetch	[%i0 + 63], 20
954	call		_aes128_load_enckey
955	and		%i2, 15,  %i5
956	and		%i2, -16, %i2
957
958	sub		%i0, %i1, %l5	! %i0!=%i1
959	and		%i0, 7, %l0
960	andn		%i0, 7, %i0
961	sll		%l0, 3, %l0
962	mov		64, %l1
963	mov		0xff, %l3
964	sub		%l1, %l0, %l1
965	and		%i1, 7, %l2
966	cmp		%i2, 255
967	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
968	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
969	brnz,pn		%l5, .L128_xts_enblk !	%i0==%i1)
970	srl		%l3, %l2, %l3
971
972	andcc		%i2, 16, %g0		! is number of blocks even?
973	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
974	bz		%icc, .L128_xts_enloop2x
975	srlx		%i2, 4, %i2
976.L128_xts_enloop:
977	ldx		[%i0 + 0], %o0
978	brz,pt		%l0, 4f
979	ldx		[%i0 + 8], %o1
980
981	ldx		[%i0 + 16], %o2
982	sllx		%o0, %l0, %o0
983	srlx		%o1, %l1, %g1
984	sllx		%o1, %l0, %o1
985	or		%g1, %o0, %o0
986	srlx		%o2, %l1, %o2
987	or		%o2, %o1, %o1
9884:
989	.word	0x99b02302 !movxtod	%g2,%f12
990	.word	0x9db02303 !movxtod	%g3,%f14
991	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
992	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
993
994	xor		%g4, %o0, %o0		! ^= rk[0]
995	xor		%g5, %o1, %o1
996	.word	0x81b02308 !movxtod	%o0,%f0
997	.word	0x85b02309 !movxtod	%o1,%f2
998
999	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1000	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1001
1002	prefetch	[%i1 + 63], 22
1003	prefetch	[%i0 + 16+63], 20
1004	call		_aes128_encrypt_1x
1005	add		%i0, 16, %i0
1006
1007	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1008	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1009
1010	srax		%g3, 63, %l7		! next tweak value
1011	addcc		%g2, %g2, %g2
1012	and		%l7, 0x87, %l7
1013	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1014	xor		%l7, %g2, %g2
1015
1016	brnz,pn		%l2, 2f
1017	sub		%i2, 1, %i2
1018
1019	std		%f0, [%i1 + 0]
1020	std		%f2, [%i1 + 8]
1021	brnz,pt		%i2, .L128_xts_enloop2x
1022	add		%i1, 16, %i1
1023
1024	brnz,pn		%i5, .L128_xts_ensteal
1025	nop
1026
1027	ret
1028	restore
1029
1030.align	16
10312:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1032						! and ~3x deterioration
1033						! in inp==out case
1034	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1035	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1036	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1037	stda		%f4, [%i1 + %l3]0xc0	! partial store
1038	std		%f6, [%i1 + 8]
1039	add		%i1, 16, %i1
1040	orn		%g0, %l3, %l3
1041	stda		%f8, [%i1 + %l3]0xc0	! partial store
1042
1043	brnz,pt		%i2, .L128_xts_enloop2x+4
1044	orn		%g0, %l3, %l3
1045
1046	brnz,pn		%i5, .L128_xts_ensteal
1047	nop
1048
1049	ret
1050	restore
1051
1052!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1053.align	32
1054.L128_xts_enloop2x:
1055	ldx		[%i0 + 0], %o0
1056	ldx		[%i0 + 8], %o1
1057	ldx		[%i0 + 16], %o2
1058	brz,pt		%l0, 4f
1059	ldx		[%i0 + 24], %o3
1060
1061	ldx		[%i0 + 32], %o4
1062	sllx		%o0, %l0, %o0
1063	srlx		%o1, %l1, %g1
1064	or		%g1, %o0, %o0
1065	sllx		%o1, %l0, %o1
1066	srlx		%o2, %l1, %g1
1067	or		%g1, %o1, %o1
1068	sllx		%o2, %l0, %o2
1069	srlx		%o3, %l1, %g1
1070	or		%g1, %o2, %o2
1071	sllx		%o3, %l0, %o3
1072	srlx		%o4, %l1, %o4
1073	or		%o4, %o3, %o3
10744:
1075	.word	0x99b02302 !movxtod	%g2,%f12
1076	.word	0x9db02303 !movxtod	%g3,%f14
1077	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1078	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1079
1080	srax		%g3, 63, %l7		! next tweak value
1081	addcc		%g2, %g2, %g2
1082	and		%l7, 0x87, %l7
1083	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1084	xor		%l7, %g2, %g2
1085
1086	.word	0x91b02302 !movxtod	%g2,%f8
1087	.word	0x95b02303 !movxtod	%g3,%f10
1088	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1089	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1090
1091	xor		%g4, %o0, %o0		! ^= rk[0]
1092	xor		%g5, %o1, %o1
1093	xor		%g4, %o2, %o2		! ^= rk[0]
1094	xor		%g5, %o3, %o3
1095	.word	0x81b02308 !movxtod	%o0,%f0
1096	.word	0x85b02309 !movxtod	%o1,%f2
1097	.word	0x89b0230a !movxtod	%o2,%f4
1098	.word	0x8db0230b !movxtod	%o3,%f6
1099
1100	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1101	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1102	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1103	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1104
1105	prefetch	[%i1 + 63], 22
1106	prefetch	[%i0 + 32+63], 20
1107	call		_aes128_encrypt_2x
1108	add		%i0, 32, %i0
1109
1110	.word	0x91b02302 !movxtod	%g2,%f8
1111	.word	0x95b02303 !movxtod	%g3,%f10
1112
1113	srax		%g3, 63, %l7		! next tweak value
1114	addcc		%g2, %g2, %g2
1115	and		%l7, 0x87, %l7
1116	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1117	xor		%l7, %g2, %g2
1118
1119	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1120	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1121
1122	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1123	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1124	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1125	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1126
1127	brnz,pn		%l2, 2f
1128	sub		%i2, 2, %i2
1129
1130	std		%f0, [%i1 + 0]
1131	std		%f2, [%i1 + 8]
1132	std		%f4, [%i1 + 16]
1133	std		%f6, [%i1 + 24]
1134	brnz,pt		%i2, .L128_xts_enloop2x
1135	add		%i1, 32, %i1
1136
1137	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1138	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1139	brnz,pn		%i5, .L128_xts_ensteal
1140	nop
1141
1142	ret
1143	restore
1144
1145.align	16
11462:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1147						! and ~3x deterioration
1148						! in inp==out case
1149	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1150	.word	0x95b00902 !faligndata	%f0,%f2,%f10
1151	.word	0x99b08904 !faligndata	%f2,%f4,%f12
1152	.word	0x9db10906 !faligndata	%f4,%f6,%f14
1153	.word	0x81b18906 !faligndata	%f6,%f6,%f0
1154
1155	stda		%f8, [%i1 + %l3]0xc0	! partial store
1156	std		%f10, [%i1 + 8]
1157	std		%f12, [%i1 + 16]
1158	std		%f14, [%i1 + 24]
1159	add		%i1, 32, %i1
1160	orn		%g0, %l3, %l3
1161	stda		%f0, [%i1 + %l3]0xc0	! partial store
1162
1163	brnz,pt		%i2, .L128_xts_enloop2x+4
1164	orn		%g0, %l3, %l3
1165
1166	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1167	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1168	brnz,pn		%i5, .L128_xts_ensteal
1169	nop
1170
1171	ret
1172	restore
1173
1174!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1175.align	32
1176.L128_xts_enblk:
1177	add	%i1, %i2, %l5
1178	and	%l5, 63, %l5	! tail
1179	sub	%i2, %l5, %i2
1180	add	%l5, 15, %l5	! round up to 16n
1181	srlx	%i2, 4, %i2
1182	srl	%l5, 4, %l5
1183	sub	%i2, 1, %i2
1184	add	%l5, 1, %l5
1185
1186.L128_xts_enblk2x:
1187	ldx		[%i0 + 0], %o0
1188	ldx		[%i0 + 8], %o1
1189	ldx		[%i0 + 16], %o2
1190	brz,pt		%l0, 5f
1191	ldx		[%i0 + 24], %o3
1192
1193	ldx		[%i0 + 32], %o4
1194	sllx		%o0, %l0, %o0
1195	srlx		%o1, %l1, %g1
1196	or		%g1, %o0, %o0
1197	sllx		%o1, %l0, %o1
1198	srlx		%o2, %l1, %g1
1199	or		%g1, %o1, %o1
1200	sllx		%o2, %l0, %o2
1201	srlx		%o3, %l1, %g1
1202	or		%g1, %o2, %o2
1203	sllx		%o3, %l0, %o3
1204	srlx		%o4, %l1, %o4
1205	or		%o4, %o3, %o3
12065:
1207	.word	0x99b02302 !movxtod	%g2,%f12
1208	.word	0x9db02303 !movxtod	%g3,%f14
1209	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1210	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1211
1212	srax		%g3, 63, %l7		! next tweak value
1213	addcc		%g2, %g2, %g2
1214	and		%l7, 0x87, %l7
1215	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1216	xor		%l7, %g2, %g2
1217
1218	.word	0x91b02302 !movxtod	%g2,%f8
1219	.word	0x95b02303 !movxtod	%g3,%f10
1220	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1221	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1222
1223	xor		%g4, %o0, %o0		! ^= rk[0]
1224	xor		%g5, %o1, %o1
1225	xor		%g4, %o2, %o2		! ^= rk[0]
1226	xor		%g5, %o3, %o3
1227	.word	0x81b02308 !movxtod	%o0,%f0
1228	.word	0x85b02309 !movxtod	%o1,%f2
1229	.word	0x89b0230a !movxtod	%o2,%f4
1230	.word	0x8db0230b !movxtod	%o3,%f6
1231
1232	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1233	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1234	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1235	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1236
1237	prefetch	[%i0 + 32+63], 20
1238	call		_aes128_encrypt_2x
1239	add		%i0, 32, %i0
1240
1241	.word	0x91b02302 !movxtod	%g2,%f8
1242	.word	0x95b02303 !movxtod	%g3,%f10
1243
1244	srax		%g3, 63, %l7		! next tweak value
1245	addcc		%g2, %g2, %g2
1246	and		%l7, 0x87, %l7
1247	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1248	xor		%l7, %g2, %g2
1249
1250	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1251	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1252
1253	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1254	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1255	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1256	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1257
1258	subcc		%i2, 2, %i2
1259	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1260	add		%i1, 8, %i1
1261	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1262	add		%i1, 8, %i1
1263	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1264	add		%i1, 8, %i1
1265	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1266	bgu,pt		SIZE_T_CC, .L128_xts_enblk2x
1267	add		%i1, 8, %i1
1268
1269	add		%l5, %i2, %i2
1270	andcc		%i2, 1, %g0		! is number of blocks even?
1271	membar		#StoreLoad|#StoreStore
1272	bnz,pt		%icc, .L128_xts_enloop
1273	srl		%i2, 0, %i2
1274	brnz,pn		%i2, .L128_xts_enloop2x
1275	nop
1276
1277	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1278	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1279	brnz,pn		%i5, .L128_xts_ensteal
1280	nop
1281
1282	ret
1283	restore
1284!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1285.align	32
1286.L128_xts_ensteal:
1287	std		%f0, [%fp + STACK_BIAS-16]	! copy of output
1288	std		%f2, [%fp + STACK_BIAS-8]
1289
1290	srl		%l0, 3, %l0
1291	add		%fp, STACK_BIAS-16, %l7
1292	add		%i0, %l0, %i0	! original %i0+%i2&-15
1293	add		%i1, %l2, %i1	! original %i1+%i2&-15
1294	mov		0, %l0
1295	nop					! align
1296
1297.L128_xts_enstealing:
1298	ldub		[%i0 + %l0], %o0
1299	ldub		[%l7  + %l0], %o1
1300	dec		%i5
1301	stb		%o0, [%l7  + %l0]
1302	stb		%o1, [%i1 + %l0]
1303	brnz		%i5, .L128_xts_enstealing
1304	inc		%l0
1305
1306	mov		%l7, %i0
1307	sub		%i1, 16, %i1
1308	mov		0, %l0
1309	sub		%i1, %l2, %i1
1310	ba		.L128_xts_enloop	! one more time
1311	mov		1, %i2				! %i5 is 0
1312	ret
1313	restore
1314.type	aes128_t4_xts_encrypt,#function
1315.size	aes128_t4_xts_encrypt,.-aes128_t4_xts_encrypt
1316.globl	aes128_t4_xts_decrypt
1317.align	32
1318aes128_t4_xts_decrypt:
1319	save		%sp, -STACK_FRAME-16, %sp
1320	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1321
1322	mov		%i5, %o0
1323	add		%fp, STACK_BIAS-16, %o1
1324	call		aes_t4_encrypt
1325	mov		%i4, %o2
1326
1327	add		%fp, STACK_BIAS-16, %l7
1328	ldxa		[%l7]0x88, %g2
1329	add		%fp, STACK_BIAS-8, %l7
1330	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
1331
1332	sethi		%hi(0x76543210), %l7
1333	or		%l7, %lo(0x76543210), %l7
1334	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
1335
1336	prefetch	[%i0], 20
1337	prefetch	[%i0 + 63], 20
1338	call		_aes128_load_deckey
1339	and		%i2, 15,  %i5
1340	and		%i2, -16, %i2
1341	mov		0, %l7
1342	movrnz		%i5, 16,  %l7
1343	sub		%i2, %l7, %i2
1344
1345	sub		%i0, %i1, %l5	! %i0!=%i1
1346	and		%i0, 7, %l0
1347	andn		%i0, 7, %i0
1348	sll		%l0, 3, %l0
1349	mov		64, %l1
1350	mov		0xff, %l3
1351	sub		%l1, %l0, %l1
1352	and		%i1, 7, %l2
1353	cmp		%i2, 255
1354	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1355	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
1356	brnz,pn		%l5, .L128_xts_deblk !	%i0==%i1)
1357	srl		%l3, %l2, %l3
1358
1359	andcc		%i2, 16, %g0		! is number of blocks even?
1360	brz,pn		%i2, .L128_xts_desteal
1361	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1362	bz		%icc, .L128_xts_deloop2x
1363	srlx		%i2, 4, %i2
1364.L128_xts_deloop:
1365	ldx		[%i0 + 0], %o0
1366	brz,pt		%l0, 4f
1367	ldx		[%i0 + 8], %o1
1368
1369	ldx		[%i0 + 16], %o2
1370	sllx		%o0, %l0, %o0
1371	srlx		%o1, %l1, %g1
1372	sllx		%o1, %l0, %o1
1373	or		%g1, %o0, %o0
1374	srlx		%o2, %l1, %o2
1375	or		%o2, %o1, %o1
13764:
1377	.word	0x99b02302 !movxtod	%g2,%f12
1378	.word	0x9db02303 !movxtod	%g3,%f14
1379	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1380	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1381
1382	xor		%g4, %o0, %o0		! ^= rk[0]
1383	xor		%g5, %o1, %o1
1384	.word	0x81b02308 !movxtod	%o0,%f0
1385	.word	0x85b02309 !movxtod	%o1,%f2
1386
1387	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1388	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1389
1390	prefetch	[%i1 + 63], 22
1391	prefetch	[%i0 + 16+63], 20
1392	call		_aes128_decrypt_1x
1393	add		%i0, 16, %i0
1394
1395	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1396	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1397
1398	srax		%g3, 63, %l7		! next tweak value
1399	addcc		%g2, %g2, %g2
1400	and		%l7, 0x87, %l7
1401	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1402	xor		%l7, %g2, %g2
1403
1404	brnz,pn		%l2, 2f
1405	sub		%i2, 1, %i2
1406
1407	std		%f0, [%i1 + 0]
1408	std		%f2, [%i1 + 8]
1409	brnz,pt		%i2, .L128_xts_deloop2x
1410	add		%i1, 16, %i1
1411
1412	brnz,pn		%i5, .L128_xts_desteal
1413	nop
1414
1415	ret
1416	restore
1417
1418.align	16
14192:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1420						! and ~3x deterioration
1421						! in inp==out case
1422	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1423	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1424	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1425	stda		%f4, [%i1 + %l3]0xc0	! partial store
1426	std		%f6, [%i1 + 8]
1427	add		%i1, 16, %i1
1428	orn		%g0, %l3, %l3
1429	stda		%f8, [%i1 + %l3]0xc0	! partial store
1430
1431	brnz,pt		%i2, .L128_xts_deloop2x+4
1432	orn		%g0, %l3, %l3
1433
1434	brnz,pn		%i5, .L128_xts_desteal
1435	nop
1436
1437	ret
1438	restore
1439
1440!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1441.align	32
1442.L128_xts_deloop2x:
1443	ldx		[%i0 + 0], %o0
1444	ldx		[%i0 + 8], %o1
1445	ldx		[%i0 + 16], %o2
1446	brz,pt		%l0, 4f
1447	ldx		[%i0 + 24], %o3
1448
1449	ldx		[%i0 + 32], %o4
1450	sllx		%o0, %l0, %o0
1451	srlx		%o1, %l1, %g1
1452	or		%g1, %o0, %o0
1453	sllx		%o1, %l0, %o1
1454	srlx		%o2, %l1, %g1
1455	or		%g1, %o1, %o1
1456	sllx		%o2, %l0, %o2
1457	srlx		%o3, %l1, %g1
1458	or		%g1, %o2, %o2
1459	sllx		%o3, %l0, %o3
1460	srlx		%o4, %l1, %o4
1461	or		%o4, %o3, %o3
14624:
1463	.word	0x99b02302 !movxtod	%g2,%f12
1464	.word	0x9db02303 !movxtod	%g3,%f14
1465	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1466	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1467
1468	srax		%g3, 63, %l7		! next tweak value
1469	addcc		%g2, %g2, %g2
1470	and		%l7, 0x87, %l7
1471	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1472	xor		%l7, %g2, %g2
1473
1474	.word	0x91b02302 !movxtod	%g2,%f8
1475	.word	0x95b02303 !movxtod	%g3,%f10
1476	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1477	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1478
1479	xor		%g4, %o0, %o0		! ^= rk[0]
1480	xor		%g5, %o1, %o1
1481	xor		%g4, %o2, %o2		! ^= rk[0]
1482	xor		%g5, %o3, %o3
1483	.word	0x81b02308 !movxtod	%o0,%f0
1484	.word	0x85b02309 !movxtod	%o1,%f2
1485	.word	0x89b0230a !movxtod	%o2,%f4
1486	.word	0x8db0230b !movxtod	%o3,%f6
1487
1488	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1489	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1490	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1491	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1492
1493	prefetch	[%i1 + 63], 22
1494	prefetch	[%i0 + 32+63], 20
1495	call		_aes128_decrypt_2x
1496	add		%i0, 32, %i0
1497
1498	.word	0x91b02302 !movxtod	%g2,%f8
1499	.word	0x95b02303 !movxtod	%g3,%f10
1500
1501	srax		%g3, 63, %l7		! next tweak value
1502	addcc		%g2, %g2, %g2
1503	and		%l7, 0x87, %l7
1504	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1505	xor		%l7, %g2, %g2
1506
1507	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1508	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1509
1510	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1511	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1512	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1513	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1514
1515	brnz,pn		%l2, 2f
1516	sub		%i2, 2, %i2
1517
1518	std		%f0, [%i1 + 0]
1519	std		%f2, [%i1 + 8]
1520	std		%f4, [%i1 + 16]
1521	std		%f6, [%i1 + 24]
1522	brnz,pt		%i2, .L128_xts_deloop2x
1523	add		%i1, 32, %i1
1524
1525	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1526	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1527	brnz,pn		%i5, .L128_xts_desteal
1528	nop
1529
1530	ret
1531	restore
1532
1533.align	16
15342:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1535						! and ~3x deterioration
1536						! in inp==out case
1537	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1538	.word	0x95b00902 !faligndata	%f0,%f2,%f10
1539	.word	0x99b08904 !faligndata	%f2,%f4,%f12
1540	.word	0x9db10906 !faligndata	%f4,%f6,%f14
1541	.word	0x81b18906 !faligndata	%f6,%f6,%f0
1542
1543	stda		%f8, [%i1 + %l3]0xc0	! partial store
1544	std		%f10, [%i1 + 8]
1545	std		%f12, [%i1 + 16]
1546	std		%f14, [%i1 + 24]
1547	add		%i1, 32, %i1
1548	orn		%g0, %l3, %l3
1549	stda		%f0, [%i1 + %l3]0xc0	! partial store
1550
1551	brnz,pt		%i2, .L128_xts_deloop2x+4
1552	orn		%g0, %l3, %l3
1553
1554	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1555	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1556	brnz,pn		%i5, .L128_xts_desteal
1557	nop
1558
1559	ret
1560	restore
1561
1562!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1563.align	32
1564.L128_xts_deblk:
1565	add	%i1, %i2, %l5
1566	and	%l5, 63, %l5	! tail
1567	sub	%i2, %l5, %i2
1568	add	%l5, 15, %l5	! round up to 16n
1569	srlx	%i2, 4, %i2
1570	srl	%l5, 4, %l5
1571	sub	%i2, 1, %i2
1572	add	%l5, 1, %l5
1573
1574.L128_xts_deblk2x:
1575	ldx		[%i0 + 0], %o0
1576	ldx		[%i0 + 8], %o1
1577	ldx		[%i0 + 16], %o2
1578	brz,pt		%l0, 5f
1579	ldx		[%i0 + 24], %o3
1580
1581	ldx		[%i0 + 32], %o4
1582	sllx		%o0, %l0, %o0
1583	srlx		%o1, %l1, %g1
1584	or		%g1, %o0, %o0
1585	sllx		%o1, %l0, %o1
1586	srlx		%o2, %l1, %g1
1587	or		%g1, %o1, %o1
1588	sllx		%o2, %l0, %o2
1589	srlx		%o3, %l1, %g1
1590	or		%g1, %o2, %o2
1591	sllx		%o3, %l0, %o3
1592	srlx		%o4, %l1, %o4
1593	or		%o4, %o3, %o3
15945:
1595	.word	0x99b02302 !movxtod	%g2,%f12
1596	.word	0x9db02303 !movxtod	%g3,%f14
1597	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1598	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1599
1600	srax		%g3, 63, %l7		! next tweak value
1601	addcc		%g2, %g2, %g2
1602	and		%l7, 0x87, %l7
1603	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1604	xor		%l7, %g2, %g2
1605
1606	.word	0x91b02302 !movxtod	%g2,%f8
1607	.word	0x95b02303 !movxtod	%g3,%f10
1608	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1609	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1610
1611	xor		%g4, %o0, %o0		! ^= rk[0]
1612	xor		%g5, %o1, %o1
1613	xor		%g4, %o2, %o2		! ^= rk[0]
1614	xor		%g5, %o3, %o3
1615	.word	0x81b02308 !movxtod	%o0,%f0
1616	.word	0x85b02309 !movxtod	%o1,%f2
1617	.word	0x89b0230a !movxtod	%o2,%f4
1618	.word	0x8db0230b !movxtod	%o3,%f6
1619
1620	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1621	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1622	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1623	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1624
1625	prefetch	[%i0 + 32+63], 20
1626	call		_aes128_decrypt_2x
1627	add		%i0, 32, %i0
1628
1629	.word	0x91b02302 !movxtod	%g2,%f8
1630	.word	0x95b02303 !movxtod	%g3,%f10
1631
1632	srax		%g3, 63, %l7		! next tweak value
1633	addcc		%g2, %g2, %g2
1634	and		%l7, 0x87, %l7
1635	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1636	xor		%l7, %g2, %g2
1637
1638	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1639	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1640
1641	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1642	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1643	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1644	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1645
1646	subcc		%i2, 2, %i2
1647	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1648	add		%i1, 8, %i1
1649	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1650	add		%i1, 8, %i1
1651	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1652	add		%i1, 8, %i1
1653	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1654	bgu,pt		SIZE_T_CC, .L128_xts_deblk2x
1655	add		%i1, 8, %i1
1656
1657	add		%l5, %i2, %i2
1658	andcc		%i2, 1, %g0		! is number of blocks even?
1659	membar		#StoreLoad|#StoreStore
1660	bnz,pt		%icc, .L128_xts_deloop
1661	srl		%i2, 0, %i2
1662	brnz,pn		%i2, .L128_xts_deloop2x
1663	nop
1664
1665	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1666	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1667	brnz,pn		%i5, .L128_xts_desteal
1668	nop
1669
1670	ret
1671	restore
1672!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1673.align	32
1674.L128_xts_desteal:
1675	ldx		[%i0 + 0], %o0
1676	brz,pt		%l0, 8f
1677	ldx		[%i0 + 8], %o1
1678
1679	ldx		[%i0 + 16], %o2
1680	sllx		%o0, %l0, %o0
1681	srlx		%o1, %l1, %g1
1682	sllx		%o1, %l0, %o1
1683	or		%g1, %o0, %o0
1684	srlx		%o2, %l1, %o2
1685	or		%o2, %o1, %o1
16868:
1687	srax		%g3, 63, %l7		! next tweak value
1688	addcc		%g2, %g2, %o2
1689	and		%l7, 0x87, %l7
1690	.word	0x97b0c223 !addxc	%g3,%g3,%o3
1691	xor		%l7, %o2, %o2
1692
1693	.word	0x99b0230a !movxtod	%o2,%f12
1694	.word	0x9db0230b !movxtod	%o3,%f14
1695	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1696	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1697
1698	xor		%g4, %o0, %o0		! ^= rk[0]
1699	xor		%g5, %o1, %o1
1700	.word	0x81b02308 !movxtod	%o0,%f0
1701	.word	0x85b02309 !movxtod	%o1,%f2
1702
1703	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1704	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1705
1706	call		_aes128_decrypt_1x
1707	add		%i0, 16, %i0
1708
1709	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1710	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1711
1712	std		%f0, [%fp + STACK_BIAS-16]
1713	std		%f2, [%fp + STACK_BIAS-8]
1714
1715	srl		%l0, 3, %l0
1716	add		%fp, STACK_BIAS-16, %l7
1717	add		%i0, %l0, %i0	! original %i0+%i2&-15
1718	add		%i1, %l2, %i1	! original %i1+%i2&-15
1719	mov		0, %l0
1720	add		%i1, 16, %i1
1721	nop					! align
1722
1723.L128_xts_destealing:
1724	ldub		[%i0 + %l0], %o0
1725	ldub		[%l7  + %l0], %o1
1726	dec		%i5
1727	stb		%o0, [%l7  + %l0]
1728	stb		%o1, [%i1 + %l0]
1729	brnz		%i5, .L128_xts_destealing
1730	inc		%l0
1731
1732	mov		%l7, %i0
1733	sub		%i1, 16, %i1
1734	mov		0, %l0
1735	sub		%i1, %l2, %i1
1736	ba		.L128_xts_deloop	! one more time
1737	mov		1, %i2				! %i5 is 0
1738	ret
1739	restore
1740.type	aes128_t4_xts_decrypt,#function
1741.size	aes128_t4_xts_decrypt,.-aes128_t4_xts_decrypt
1742.globl	aes128_t4_cbc_decrypt
1743.align	32
1744aes128_t4_cbc_decrypt:
1745	save		%sp, -STACK_FRAME, %sp
1746	cmp		%i2, 0
1747	be,pn		SIZE_T_CC, .L128_cbc_dec_abort
1748	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1749	sub		%i0, %i1, %l5	! %i0!=%i1
1750	ld		[%i4 + 0], %f12	! load ivec
1751	ld		[%i4 + 4], %f13
1752	ld		[%i4 + 8], %f14
1753	ld		[%i4 + 12], %f15
1754	prefetch	[%i0], 20
1755	prefetch	[%i0 + 63], 20
1756	call		_aes128_load_deckey
1757	and		%i0, 7, %l0
1758	andn		%i0, 7, %i0
1759	sll		%l0, 3, %l0
1760	mov		64, %l1
1761	mov		0xff, %l3
1762	sub		%l1, %l0, %l1
1763	and		%i1, 7, %l2
1764	cmp		%i2, 255
1765	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1766	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
1767	brnz,pn		%l5, .L128cbc_dec_blk	!	%i0==%i1)
1768	srl		%l3, %l2, %l3
1769
1770	andcc		%i2, 16, %g0		! is number of blocks even?
1771	srlx		%i2, 4, %i2
1772	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1773	bz		%icc, .L128_cbc_dec_loop2x
1774	prefetch	[%i1], 22
1775.L128_cbc_dec_loop:
1776	ldx		[%i0 + 0], %o0
1777	brz,pt		%l0, 4f
1778	ldx		[%i0 + 8], %o1
1779
1780	ldx		[%i0 + 16], %o2
1781	sllx		%o0, %l0, %o0
1782	srlx		%o1, %l1, %g1
1783	sllx		%o1, %l0, %o1
1784	or		%g1, %o0, %o0
1785	srlx		%o2, %l1, %o2
1786	or		%o2, %o1, %o1
17874:
1788	xor		%g4, %o0, %o2		! ^= rk[0]
1789	xor		%g5, %o1, %o3
1790	.word	0x81b0230a !movxtod	%o2,%f0
1791	.word	0x85b0230b !movxtod	%o3,%f2
1792
1793	prefetch	[%i1 + 63], 22
1794	prefetch	[%i0 + 16+63], 20
1795	call		_aes128_decrypt_1x
1796	add		%i0, 16, %i0
1797
1798	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1799	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1800	.word	0x99b02308 !movxtod	%o0,%f12
1801	.word	0x9db02309 !movxtod	%o1,%f14
1802
1803	brnz,pn		%l2, 2f
1804	sub		%i2, 1, %i2
1805
1806	std		%f0, [%i1 + 0]
1807	std		%f2, [%i1 + 8]
1808	brnz,pt		%i2, .L128_cbc_dec_loop2x
1809	add		%i1, 16, %i1
1810	st		%f12, [%i4 + 0]
1811	st		%f13, [%i4 + 4]
1812	st		%f14, [%i4 + 8]
1813	st		%f15, [%i4 + 12]
1814.L128_cbc_dec_abort:
1815	ret
1816	restore
1817
1818.align	16
18192:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1820						! and ~3x deterioration
1821						! in inp==out case
1822	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1823	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1824	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1825
1826	stda		%f4, [%i1 + %l3]0xc0	! partial store
1827	std		%f6, [%i1 + 8]
1828	add		%i1, 16, %i1
1829	orn		%g0, %l3, %l3
1830	stda		%f8, [%i1 + %l3]0xc0	! partial store
1831
1832	brnz,pt		%i2, .L128_cbc_dec_loop2x+4
1833	orn		%g0, %l3, %l3
1834	st		%f12, [%i4 + 0]
1835	st		%f13, [%i4 + 4]
1836	st		%f14, [%i4 + 8]
1837	st		%f15, [%i4 + 12]
1838	ret
1839	restore
1840
1841!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1842.align	32
1843.L128_cbc_dec_loop2x:
1844	ldx		[%i0 + 0], %o0
1845	ldx		[%i0 + 8], %o1
1846	ldx		[%i0 + 16], %o2
1847	brz,pt		%l0, 4f
1848	ldx		[%i0 + 24], %o3
1849
1850	ldx		[%i0 + 32], %o4
1851	sllx		%o0, %l0, %o0
1852	srlx		%o1, %l1, %g1
1853	or		%g1, %o0, %o0
1854	sllx		%o1, %l0, %o1
1855	srlx		%o2, %l1, %g1
1856	or		%g1, %o1, %o1
1857	sllx		%o2, %l0, %o2
1858	srlx		%o3, %l1, %g1
1859	or		%g1, %o2, %o2
1860	sllx		%o3, %l0, %o3
1861	srlx		%o4, %l1, %o4
1862	or		%o4, %o3, %o3
18634:
1864	xor		%g4, %o0, %o4		! ^= rk[0]
1865	xor		%g5, %o1, %o5
1866	.word	0x81b0230c !movxtod	%o4,%f0
1867	.word	0x85b0230d !movxtod	%o5,%f2
1868	xor		%g4, %o2, %o4
1869	xor		%g5, %o3, %o5
1870	.word	0x89b0230c !movxtod	%o4,%f4
1871	.word	0x8db0230d !movxtod	%o5,%f6
1872
1873	prefetch	[%i1 + 63], 22
1874	prefetch	[%i0 + 32+63], 20
1875	call		_aes128_decrypt_2x
1876	add		%i0, 32, %i0
1877
1878	.word	0x91b02308 !movxtod	%o0,%f8
1879	.word	0x95b02309 !movxtod	%o1,%f10
1880	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1881	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1882	.word	0x99b0230a !movxtod	%o2,%f12
1883	.word	0x9db0230b !movxtod	%o3,%f14
1884	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1885	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1886
1887	brnz,pn		%l2, 2f
1888	sub		%i2, 2, %i2
1889
1890	std		%f0, [%i1 + 0]
1891	std		%f2, [%i1 + 8]
1892	std		%f4, [%i1 + 16]
1893	std		%f6, [%i1 + 24]
1894	brnz,pt		%i2, .L128_cbc_dec_loop2x
1895	add		%i1, 32, %i1
1896	st		%f12, [%i4 + 0]
1897	st		%f13, [%i4 + 4]
1898	st		%f14, [%i4 + 8]
1899	st		%f15, [%i4 + 12]
1900	ret
1901	restore
1902
1903.align	16
19042:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1905						! and ~3x deterioration
1906						! in inp==out case
1907	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1908	.word	0x81b00902 !faligndata	%f0,%f2,%f0
1909	.word	0x85b08904 !faligndata	%f2,%f4,%f2
1910	.word	0x89b10906 !faligndata	%f4,%f6,%f4
1911	.word	0x8db18906 !faligndata	%f6,%f6,%f6
1912	stda		%f8, [%i1 + %l3]0xc0	! partial store
1913	std		%f0, [%i1 + 8]
1914	std		%f2, [%i1 + 16]
1915	std		%f4, [%i1 + 24]
1916	add		%i1, 32, %i1
1917	orn		%g0, %l3, %l3
1918	stda		%f6, [%i1 + %l3]0xc0	! partial store
1919
1920	brnz,pt		%i2, .L128_cbc_dec_loop2x+4
1921	orn		%g0, %l3, %l3
1922	st		%f12, [%i4 + 0]
1923	st		%f13, [%i4 + 4]
1924	st		%f14, [%i4 + 8]
1925	st		%f15, [%i4 + 12]
1926	ret
1927	restore
1928
1929!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1930.align	32
1931.L128cbc_dec_blk:
1932	add	%i1, %i2, %l5
1933	and	%l5, 63, %l5	! tail
1934	sub	%i2, %l5, %i2
1935	add	%l5, 15, %l5	! round up to 16n
1936	srlx	%i2, 4, %i2
1937	srl	%l5, 4, %l5
1938	sub	%i2, 1, %i2
1939	add	%l5, 1, %l5
1940
1941.L128_cbc_dec_blk_loop2x:
1942	ldx		[%i0 + 0], %o0
1943	ldx		[%i0 + 8], %o1
1944	ldx		[%i0 + 16], %o2
1945	brz,pt		%l0, 5f
1946	ldx		[%i0 + 24], %o3
1947
1948	ldx		[%i0 + 32], %o4
1949	sllx		%o0, %l0, %o0
1950	srlx		%o1, %l1, %g1
1951	or		%g1, %o0, %o0
1952	sllx		%o1, %l0, %o1
1953	srlx		%o2, %l1, %g1
1954	or		%g1, %o1, %o1
1955	sllx		%o2, %l0, %o2
1956	srlx		%o3, %l1, %g1
1957	or		%g1, %o2, %o2
1958	sllx		%o3, %l0, %o3
1959	srlx		%o4, %l1, %o4
1960	or		%o4, %o3, %o3
19615:
1962	xor		%g4, %o0, %o4		! ^= rk[0]
1963	xor		%g5, %o1, %o5
1964	.word	0x81b0230c !movxtod	%o4,%f0
1965	.word	0x85b0230d !movxtod	%o5,%f2
1966	xor		%g4, %o2, %o4
1967	xor		%g5, %o3, %o5
1968	.word	0x89b0230c !movxtod	%o4,%f4
1969	.word	0x8db0230d !movxtod	%o5,%f6
1970
1971	prefetch	[%i0 + 32+63], 20
1972	call		_aes128_decrypt_2x
1973	add		%i0, 32, %i0
1974	subcc		%i2, 2, %i2
1975
1976	.word	0x91b02308 !movxtod	%o0,%f8
1977	.word	0x95b02309 !movxtod	%o1,%f10
1978	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1979	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1980	.word	0x99b0230a !movxtod	%o2,%f12
1981	.word	0x9db0230b !movxtod	%o3,%f14
1982	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1983	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1984
1985	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1986	add		%i1, 8, %i1
1987	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1988	add		%i1, 8, %i1
1989	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1990	add		%i1, 8, %i1
1991	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1992	bgu,pt		SIZE_T_CC, .L128_cbc_dec_blk_loop2x
1993	add		%i1, 8, %i1
1994
1995	add		%l5, %i2, %i2
1996	andcc		%i2, 1, %g0		! is number of blocks even?
1997	membar		#StoreLoad|#StoreStore
1998	bnz,pt		%icc, .L128_cbc_dec_loop
1999	srl		%i2, 0, %i2
2000	brnz,pn		%i2, .L128_cbc_dec_loop2x
2001	nop
2002	st		%f12, [%i4 + 0]	! write out ivec
2003	st		%f13, [%i4 + 4]
2004	st		%f14, [%i4 + 8]
2005	st		%f15, [%i4 + 12]
2006	ret
2007	restore
2008.type	aes128_t4_cbc_decrypt,#function
2009.size	aes128_t4_cbc_decrypt,.-aes128_t4_cbc_decrypt
2010.align	32
2011_aes128_decrypt_1x:
2012	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
2013	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
2014	.word	0x80cd0444 !aes_dround01	%f20,%f4,%f2,%f0
2015	.word	0x84cd8464 !aes_dround23	%f22,%f4,%f2,%f2
2016	.word	0x88ce0440 !aes_dround01	%f24,%f0,%f2,%f4
2017	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
2018	.word	0x80cf0444 !aes_dround01	%f28,%f4,%f2,%f0
2019	.word	0x84cf8464 !aes_dround23	%f30,%f4,%f2,%f2
2020	.word	0x88c84440 !aes_dround01	%f32,%f0,%f2,%f4
2021	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
2022	.word	0x80c94444 !aes_dround01	%f36,%f4,%f2,%f0
2023	.word	0x84c9c464 !aes_dround23	%f38,%f4,%f2,%f2
2024	.word	0x88ca4440 !aes_dround01	%f40,%f0,%f2,%f4
2025	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
2026	.word	0x80cb4444 !aes_dround01	%f44,%f4,%f2,%f0
2027	.word	0x84cbc464 !aes_dround23	%f46,%f4,%f2,%f2
2028	.word	0x88cc4440 !aes_dround01	%f48,%f0,%f2,%f4
2029	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
2030	.word	0x80cd44c4 !aes_dround01_l	%f52,%f4,%f2,%f0
2031	retl
2032	.word	0x84cdc4e4 !aes_dround23_l	%f54,%f4,%f2,%f2
2033.type	_aes128_decrypt_1x,#function
2034.size	_aes128_decrypt_1x,.-_aes128_decrypt_1x
2035
2036.align	32
2037_aes128_decrypt_2x:
2038	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
2039	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
2040	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
2041	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
2042	.word	0x80cd0448 !aes_dround01	%f20,%f8,%f2,%f0
2043	.word	0x84cd8468 !aes_dround23	%f22,%f8,%f2,%f2
2044	.word	0x88cd0c4a !aes_dround01	%f20,%f10,%f6,%f4
2045	.word	0x8ccd8c6a !aes_dround23	%f22,%f10,%f6,%f6
2046	.word	0x90ce0440 !aes_dround01	%f24,%f0,%f2,%f8
2047	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
2048	.word	0x94ce0c44 !aes_dround01	%f24,%f4,%f6,%f10
2049	.word	0x8cce8c64 !aes_dround23	%f26,%f4,%f6,%f6
2050	.word	0x80cf0448 !aes_dround01	%f28,%f8,%f2,%f0
2051	.word	0x84cf8468 !aes_dround23	%f30,%f8,%f2,%f2
2052	.word	0x88cf0c4a !aes_dround01	%f28,%f10,%f6,%f4
2053	.word	0x8ccf8c6a !aes_dround23	%f30,%f10,%f6,%f6
2054	.word	0x90c84440 !aes_dround01	%f32,%f0,%f2,%f8
2055	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
2056	.word	0x94c84c44 !aes_dround01	%f32,%f4,%f6,%f10
2057	.word	0x8cc8cc64 !aes_dround23	%f34,%f4,%f6,%f6
2058	.word	0x80c94448 !aes_dround01	%f36,%f8,%f2,%f0
2059	.word	0x84c9c468 !aes_dround23	%f38,%f8,%f2,%f2
2060	.word	0x88c94c4a !aes_dround01	%f36,%f10,%f6,%f4
2061	.word	0x8cc9cc6a !aes_dround23	%f38,%f10,%f6,%f6
2062	.word	0x90ca4440 !aes_dround01	%f40,%f0,%f2,%f8
2063	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
2064	.word	0x94ca4c44 !aes_dround01	%f40,%f4,%f6,%f10
2065	.word	0x8ccacc64 !aes_dround23	%f42,%f4,%f6,%f6
2066	.word	0x80cb4448 !aes_dround01	%f44,%f8,%f2,%f0
2067	.word	0x84cbc468 !aes_dround23	%f46,%f8,%f2,%f2
2068	.word	0x88cb4c4a !aes_dround01	%f44,%f10,%f6,%f4
2069	.word	0x8ccbcc6a !aes_dround23	%f46,%f10,%f6,%f6
2070	.word	0x90cc4440 !aes_dround01	%f48,%f0,%f2,%f8
2071	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
2072	.word	0x94cc4c44 !aes_dround01	%f48,%f4,%f6,%f10
2073	.word	0x8ccccc64 !aes_dround23	%f50,%f4,%f6,%f6
2074	.word	0x80cd44c8 !aes_dround01_l	%f52,%f8,%f2,%f0
2075	.word	0x84cdc4e8 !aes_dround23_l	%f54,%f8,%f2,%f2
2076	.word	0x88cd4cca !aes_dround01_l	%f52,%f10,%f6,%f4
2077	retl
2078	.word	0x8ccdccea !aes_dround23_l	%f54,%f10,%f6,%f6
2079.type	_aes128_decrypt_2x,#function
2080.size	_aes128_decrypt_2x,.-_aes128_decrypt_2x
2081.align	32
2082_aes192_encrypt_1x:
2083	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
2084	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2085	.word	0x80cd0404 !aes_eround01	%f20,%f4,%f2,%f0
2086	.word	0x84cd8424 !aes_eround23	%f22,%f4,%f2,%f2
2087	.word	0x88ce0400 !aes_eround01	%f24,%f0,%f2,%f4
2088	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2089	.word	0x80cf0404 !aes_eround01	%f28,%f4,%f2,%f0
2090	.word	0x84cf8424 !aes_eround23	%f30,%f4,%f2,%f2
2091	.word	0x88c84400 !aes_eround01	%f32,%f0,%f2,%f4
2092	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2093	.word	0x80c94404 !aes_eround01	%f36,%f4,%f2,%f0
2094	.word	0x84c9c424 !aes_eround23	%f38,%f4,%f2,%f2
2095	.word	0x88ca4400 !aes_eround01	%f40,%f0,%f2,%f4
2096	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2097	.word	0x80cb4404 !aes_eround01	%f44,%f4,%f2,%f0
2098	.word	0x84cbc424 !aes_eround23	%f46,%f4,%f2,%f2
2099	.word	0x88cc4400 !aes_eround01	%f48,%f0,%f2,%f4
2100	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2101	.word	0x80cd4404 !aes_eround01	%f52,%f4,%f2,%f0
2102	.word	0x84cdc424 !aes_eround23	%f54,%f4,%f2,%f2
2103	.word	0x88ce4400 !aes_eround01	%f56,%f0,%f2,%f4
2104	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2105	.word	0x80cf4484 !aes_eround01_l	%f60,%f4,%f2,%f0
2106	retl
2107	.word	0x84cfc4a4 !aes_eround23_l	%f62,%f4,%f2,%f2
2108.type	_aes192_encrypt_1x,#function
2109.size	_aes192_encrypt_1x,.-_aes192_encrypt_1x
2110
2111.align	32
2112_aes192_encrypt_2x:
2113	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
2114	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2115	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
2116	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
2117	.word	0x80cd0408 !aes_eround01	%f20,%f8,%f2,%f0
2118	.word	0x84cd8428 !aes_eround23	%f22,%f8,%f2,%f2
2119	.word	0x88cd0c0a !aes_eround01	%f20,%f10,%f6,%f4
2120	.word	0x8ccd8c2a !aes_eround23	%f22,%f10,%f6,%f6
2121	.word	0x90ce0400 !aes_eround01	%f24,%f0,%f2,%f8
2122	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2123	.word	0x94ce0c04 !aes_eround01	%f24,%f4,%f6,%f10
2124	.word	0x8cce8c24 !aes_eround23	%f26,%f4,%f6,%f6
2125	.word	0x80cf0408 !aes_eround01	%f28,%f8,%f2,%f0
2126	.word	0x84cf8428 !aes_eround23	%f30,%f8,%f2,%f2
2127	.word	0x88cf0c0a !aes_eround01	%f28,%f10,%f6,%f4
2128	.word	0x8ccf8c2a !aes_eround23	%f30,%f10,%f6,%f6
2129	.word	0x90c84400 !aes_eround01	%f32,%f0,%f2,%f8
2130	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2131	.word	0x94c84c04 !aes_eround01	%f32,%f4,%f6,%f10
2132	.word	0x8cc8cc24 !aes_eround23	%f34,%f4,%f6,%f6
2133	.word	0x80c94408 !aes_eround01	%f36,%f8,%f2,%f0
2134	.word	0x84c9c428 !aes_eround23	%f38,%f8,%f2,%f2
2135	.word	0x88c94c0a !aes_eround01	%f36,%f10,%f6,%f4
2136	.word	0x8cc9cc2a !aes_eround23	%f38,%f10,%f6,%f6
2137	.word	0x90ca4400 !aes_eround01	%f40,%f0,%f2,%f8
2138	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2139	.word	0x94ca4c04 !aes_eround01	%f40,%f4,%f6,%f10
2140	.word	0x8ccacc24 !aes_eround23	%f42,%f4,%f6,%f6
2141	.word	0x80cb4408 !aes_eround01	%f44,%f8,%f2,%f0
2142	.word	0x84cbc428 !aes_eround23	%f46,%f8,%f2,%f2
2143	.word	0x88cb4c0a !aes_eround01	%f44,%f10,%f6,%f4
2144	.word	0x8ccbcc2a !aes_eround23	%f46,%f10,%f6,%f6
2145	.word	0x90cc4400 !aes_eround01	%f48,%f0,%f2,%f8
2146	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2147	.word	0x94cc4c04 !aes_eround01	%f48,%f4,%f6,%f10
2148	.word	0x8ccccc24 !aes_eround23	%f50,%f4,%f6,%f6
2149	.word	0x80cd4408 !aes_eround01	%f52,%f8,%f2,%f0
2150	.word	0x84cdc428 !aes_eround23	%f54,%f8,%f2,%f2
2151	.word	0x88cd4c0a !aes_eround01	%f52,%f10,%f6,%f4
2152	.word	0x8ccdcc2a !aes_eround23	%f54,%f10,%f6,%f6
2153	.word	0x90ce4400 !aes_eround01	%f56,%f0,%f2,%f8
2154	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2155	.word	0x94ce4c04 !aes_eround01	%f56,%f4,%f6,%f10
2156	.word	0x8ccecc24 !aes_eround23	%f58,%f4,%f6,%f6
2157	.word	0x80cf4488 !aes_eround01_l	%f60,%f8,%f2,%f0
2158	.word	0x84cfc4a8 !aes_eround23_l	%f62,%f8,%f2,%f2
2159	.word	0x88cf4c8a !aes_eround01_l	%f60,%f10,%f6,%f4
2160	retl
2161	.word	0x8ccfccaa !aes_eround23_l	%f62,%f10,%f6,%f6
2162.type	_aes192_encrypt_2x,#function
2163.size	_aes192_encrypt_2x,.-_aes192_encrypt_2x
2164
2165.align	32
2166_aes256_encrypt_1x:
2167	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
2168	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2169	ldd		[%i3 + 208], %f16
2170	ldd		[%i3 + 216], %f18
2171	.word	0x80cd0404 !aes_eround01	%f20,%f4,%f2,%f0
2172	.word	0x84cd8424 !aes_eround23	%f22,%f4,%f2,%f2
2173	ldd		[%i3 + 224], %f20
2174	ldd		[%i3 + 232], %f22
2175	.word	0x88ce0400 !aes_eround01	%f24,%f0,%f2,%f4
2176	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2177	.word	0x80cf0404 !aes_eround01	%f28,%f4,%f2,%f0
2178	.word	0x84cf8424 !aes_eround23	%f30,%f4,%f2,%f2
2179	.word	0x88c84400 !aes_eround01	%f32,%f0,%f2,%f4
2180	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2181	.word	0x80c94404 !aes_eround01	%f36,%f4,%f2,%f0
2182	.word	0x84c9c424 !aes_eround23	%f38,%f4,%f2,%f2
2183	.word	0x88ca4400 !aes_eround01	%f40,%f0,%f2,%f4
2184	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2185	.word	0x80cb4404 !aes_eround01	%f44,%f4,%f2,%f0
2186	.word	0x84cbc424 !aes_eround23	%f46,%f4,%f2,%f2
2187	.word	0x88cc4400 !aes_eround01	%f48,%f0,%f2,%f4
2188	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2189	.word	0x80cd4404 !aes_eround01	%f52,%f4,%f2,%f0
2190	.word	0x84cdc424 !aes_eround23	%f54,%f4,%f2,%f2
2191	.word	0x88ce4400 !aes_eround01	%f56,%f0,%f2,%f4
2192	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2193	.word	0x80cf4404 !aes_eround01	%f60,%f4,%f2,%f0
2194	.word	0x84cfc424 !aes_eround23	%f62,%f4,%f2,%f2
2195	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
2196	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2197	ldd		[%i3 + 16], %f16
2198	ldd		[%i3 + 24], %f18
2199	.word	0x80cd0484 !aes_eround01_l	%f20,%f4,%f2,%f0
2200	.word	0x84cd84a4 !aes_eround23_l	%f22,%f4,%f2,%f2
2201	ldd		[%i3 + 32], %f20
2202	retl
2203	ldd		[%i3 + 40], %f22
2204.type	_aes256_encrypt_1x,#function
2205.size	_aes256_encrypt_1x,.-_aes256_encrypt_1x
2206
2207.align	32
2208_aes256_encrypt_2x:
2209	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
2210	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2211	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
2212	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
2213	ldd		[%i3 + 208], %f16
2214	ldd		[%i3 + 216], %f18
2215	.word	0x80cd0408 !aes_eround01	%f20,%f8,%f2,%f0
2216	.word	0x84cd8428 !aes_eround23	%f22,%f8,%f2,%f2
2217	.word	0x88cd0c0a !aes_eround01	%f20,%f10,%f6,%f4
2218	.word	0x8ccd8c2a !aes_eround23	%f22,%f10,%f6,%f6
2219	ldd		[%i3 + 224], %f20
2220	ldd		[%i3 + 232], %f22
2221	.word	0x90ce0400 !aes_eround01	%f24,%f0,%f2,%f8
2222	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2223	.word	0x94ce0c04 !aes_eround01	%f24,%f4,%f6,%f10
2224	.word	0x8cce8c24 !aes_eround23	%f26,%f4,%f6,%f6
2225	.word	0x80cf0408 !aes_eround01	%f28,%f8,%f2,%f0
2226	.word	0x84cf8428 !aes_eround23	%f30,%f8,%f2,%f2
2227	.word	0x88cf0c0a !aes_eround01	%f28,%f10,%f6,%f4
2228	.word	0x8ccf8c2a !aes_eround23	%f30,%f10,%f6,%f6
2229	.word	0x90c84400 !aes_eround01	%f32,%f0,%f2,%f8
2230	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2231	.word	0x94c84c04 !aes_eround01	%f32,%f4,%f6,%f10
2232	.word	0x8cc8cc24 !aes_eround23	%f34,%f4,%f6,%f6
2233	.word	0x80c94408 !aes_eround01	%f36,%f8,%f2,%f0
2234	.word	0x84c9c428 !aes_eround23	%f38,%f8,%f2,%f2
2235	.word	0x88c94c0a !aes_eround01	%f36,%f10,%f6,%f4
2236	.word	0x8cc9cc2a !aes_eround23	%f38,%f10,%f6,%f6
2237	.word	0x90ca4400 !aes_eround01	%f40,%f0,%f2,%f8
2238	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2239	.word	0x94ca4c04 !aes_eround01	%f40,%f4,%f6,%f10
2240	.word	0x8ccacc24 !aes_eround23	%f42,%f4,%f6,%f6
2241	.word	0x80cb4408 !aes_eround01	%f44,%f8,%f2,%f0
2242	.word	0x84cbc428 !aes_eround23	%f46,%f8,%f2,%f2
2243	.word	0x88cb4c0a !aes_eround01	%f44,%f10,%f6,%f4
2244	.word	0x8ccbcc2a !aes_eround23	%f46,%f10,%f6,%f6
2245	.word	0x90cc4400 !aes_eround01	%f48,%f0,%f2,%f8
2246	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2247	.word	0x94cc4c04 !aes_eround01	%f48,%f4,%f6,%f10
2248	.word	0x8ccccc24 !aes_eround23	%f50,%f4,%f6,%f6
2249	.word	0x80cd4408 !aes_eround01	%f52,%f8,%f2,%f0
2250	.word	0x84cdc428 !aes_eround23	%f54,%f8,%f2,%f2
2251	.word	0x88cd4c0a !aes_eround01	%f52,%f10,%f6,%f4
2252	.word	0x8ccdcc2a !aes_eround23	%f54,%f10,%f6,%f6
2253	.word	0x90ce4400 !aes_eround01	%f56,%f0,%f2,%f8
2254	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2255	.word	0x94ce4c04 !aes_eround01	%f56,%f4,%f6,%f10
2256	.word	0x8ccecc24 !aes_eround23	%f58,%f4,%f6,%f6
2257	.word	0x80cf4408 !aes_eround01	%f60,%f8,%f2,%f0
2258	.word	0x84cfc428 !aes_eround23	%f62,%f8,%f2,%f2
2259	.word	0x88cf4c0a !aes_eround01	%f60,%f10,%f6,%f4
2260	.word	0x8ccfcc2a !aes_eround23	%f62,%f10,%f6,%f6
2261	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
2262	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2263	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
2264	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
2265	ldd		[%i3 + 16], %f16
2266	ldd		[%i3 + 24], %f18
2267	.word	0x80cd0488 !aes_eround01_l	%f20,%f8,%f2,%f0
2268	.word	0x84cd84a8 !aes_eround23_l	%f22,%f8,%f2,%f2
2269	.word	0x88cd0c8a !aes_eround01_l	%f20,%f10,%f6,%f4
2270	.word	0x8ccd8caa !aes_eround23_l	%f22,%f10,%f6,%f6
2271	ldd		[%i3 + 32], %f20
2272	retl
2273	ldd		[%i3 + 40], %f22
2274.type	_aes256_encrypt_2x,#function
2275.size	_aes256_encrypt_2x,.-_aes256_encrypt_2x
2276
2277.align	32
2278_aes192_loadkey:
2279	ldx		[%i3 + 0], %g4
2280	ldx		[%i3 + 8], %g5
2281	ldd		[%i3 + 16], %f16
2282	ldd		[%i3 + 24], %f18
2283	ldd		[%i3 + 32], %f20
2284	ldd		[%i3 + 40], %f22
2285	ldd		[%i3 + 48], %f24
2286	ldd		[%i3 + 56], %f26
2287	ldd		[%i3 + 64], %f28
2288	ldd		[%i3 + 72], %f30
2289	ldd		[%i3 + 80], %f32
2290	ldd		[%i3 + 88], %f34
2291	ldd		[%i3 + 96], %f36
2292	ldd		[%i3 + 104], %f38
2293	ldd		[%i3 + 112], %f40
2294	ldd		[%i3 + 120], %f42
2295	ldd		[%i3 + 128], %f44
2296	ldd		[%i3 + 136], %f46
2297	ldd		[%i3 + 144], %f48
2298	ldd		[%i3 + 152], %f50
2299	ldd		[%i3 + 160], %f52
2300	ldd		[%i3 + 168], %f54
2301	ldd		[%i3 + 176], %f56
2302	ldd		[%i3 + 184], %f58
2303	ldd		[%i3 + 192], %f60
2304	ldd		[%i3 + 200], %f62
2305	retl
2306	nop
2307.type	_aes192_loadkey,#function
2308.size	_aes192_loadkey,.-_aes192_loadkey
2309_aes256_loadkey=_aes192_loadkey
2310_aes192_load_enckey=_aes192_loadkey
2311_aes192_load_deckey=_aes192_loadkey
2312_aes256_load_enckey=_aes192_loadkey
2313_aes256_load_deckey=_aes192_loadkey
2314.globl	aes256_t4_cbc_encrypt
2315.align	32
2316aes256_t4_cbc_encrypt:
2317	save		%sp, -STACK_FRAME, %sp
2318	cmp		%i2, 0
2319	be,pn		SIZE_T_CC, .L256_cbc_enc_abort
2320	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2321	sub		%i0, %i1, %l5	! %i0!=%i1
2322	ld		[%i4 + 0], %f0
2323	ld		[%i4 + 4], %f1
2324	ld		[%i4 + 8], %f2
2325	ld		[%i4 + 12], %f3
2326	prefetch	[%i0], 20
2327	prefetch	[%i0 + 63], 20
2328	call		_aes256_load_enckey
2329	and		%i0, 7, %l0
2330	andn		%i0, 7, %i0
2331	sll		%l0, 3, %l0
2332	mov		64, %l1
2333	mov		0xff, %l3
2334	sub		%l1, %l0, %l1
2335	and		%i1, 7, %l2
2336	cmp		%i2, 127
2337	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2338	movleu		SIZE_T_CC, 0, %l5	!	%i2<128 ||
2339	brnz,pn		%l5, .L256cbc_enc_blk	!	%i0==%i1)
2340	srl		%l3, %l2, %l3
2341
2342	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2343	srlx		%i2, 4, %i2
2344	prefetch	[%i1], 22
2345
2346.L256_cbc_enc_loop:
2347	ldx		[%i0 + 0], %o0
2348	brz,pt		%l0, 4f
2349	ldx		[%i0 + 8], %o1
2350
2351	ldx		[%i0 + 16], %o2
2352	sllx		%o0, %l0, %o0
2353	srlx		%o1, %l1, %g1
2354	sllx		%o1, %l0, %o1
2355	or		%g1, %o0, %o0
2356	srlx		%o2, %l1, %o2
2357	or		%o2, %o1, %o1
23584:
2359	xor		%g4, %o0, %o0		! ^= rk[0]
2360	xor		%g5, %o1, %o1
2361	.word	0x99b02308 !movxtod	%o0,%f12
2362	.word	0x9db02309 !movxtod	%o1,%f14
2363
2364	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2365	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2366	prefetch	[%i1 + 63], 22
2367	prefetch	[%i0 + 16+63], 20
2368	call		_aes256_encrypt_1x
2369	add		%i0, 16, %i0
2370
2371	brnz,pn		%l2, 2f
2372	sub		%i2, 1, %i2
2373
2374	std		%f0, [%i1 + 0]
2375	std		%f2, [%i1 + 8]
2376	brnz,pt		%i2, .L256_cbc_enc_loop
2377	add		%i1, 16, %i1
2378	st		%f0, [%i4 + 0]
2379	st		%f1, [%i4 + 4]
2380	st		%f2, [%i4 + 8]
2381	st		%f3, [%i4 + 12]
2382.L256_cbc_enc_abort:
2383	ret
2384	restore
2385
2386.align	16
23872:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2388						! and ~3x deterioration
2389						! in inp==out case
2390	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2391	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2392	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2393
2394	stda		%f4, [%i1 + %l3]0xc0	! partial store
2395	std		%f6, [%i1 + 8]
2396	add		%i1, 16, %i1
2397	orn		%g0, %l3, %l3
2398	stda		%f8, [%i1 + %l3]0xc0	! partial store
2399
2400	brnz,pt		%i2, .L256_cbc_enc_loop+4
2401	orn		%g0, %l3, %l3
2402	st		%f0, [%i4 + 0]
2403	st		%f1, [%i4 + 4]
2404	st		%f2, [%i4 + 8]
2405	st		%f3, [%i4 + 12]
2406	ret
2407	restore
2408
2409!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2410.align	32
2411.L256cbc_enc_blk:
2412	add	%i1, %i2, %l5
2413	and	%l5, 63, %l5	! tail
2414	sub	%i2, %l5, %i2
2415	add	%l5, 15, %l5	! round up to 16n
2416	srlx	%i2, 4, %i2
2417	srl	%l5, 4, %l5
2418
2419.L256_cbc_enc_blk_loop:
2420	ldx		[%i0 + 0], %o0
2421	brz,pt		%l0, 5f
2422	ldx		[%i0 + 8], %o1
2423
2424	ldx		[%i0 + 16], %o2
2425	sllx		%o0, %l0, %o0
2426	srlx		%o1, %l1, %g1
2427	sllx		%o1, %l0, %o1
2428	or		%g1, %o0, %o0
2429	srlx		%o2, %l1, %o2
2430	or		%o2, %o1, %o1
24315:
2432	xor		%g4, %o0, %o0		! ^= rk[0]
2433	xor		%g5, %o1, %o1
2434	.word	0x99b02308 !movxtod	%o0,%f12
2435	.word	0x9db02309 !movxtod	%o1,%f14
2436
2437	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2438	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2439	prefetch	[%i0 + 16+63], 20
2440	call		_aes256_encrypt_1x
2441	add		%i0, 16, %i0
2442	sub		%i2, 1, %i2
2443
2444	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2445	add		%i1, 8, %i1
2446	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2447	brnz,pt		%i2, .L256_cbc_enc_blk_loop
2448	add		%i1, 8, %i1
2449
2450	membar		#StoreLoad|#StoreStore
2451	brnz,pt		%l5, .L256_cbc_enc_loop
2452	mov		%l5, %i2
2453	st		%f0, [%i4 + 0]
2454	st		%f1, [%i4 + 4]
2455	st		%f2, [%i4 + 8]
2456	st		%f3, [%i4 + 12]
2457	ret
2458	restore
2459.type	aes256_t4_cbc_encrypt,#function
2460.size	aes256_t4_cbc_encrypt,.-aes256_t4_cbc_encrypt
2461.globl	aes192_t4_cbc_encrypt
2462.align	32
2463aes192_t4_cbc_encrypt:
2464	save		%sp, -STACK_FRAME, %sp
2465	cmp		%i2, 0
2466	be,pn		SIZE_T_CC, .L192_cbc_enc_abort
2467	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2468	sub		%i0, %i1, %l5	! %i0!=%i1
2469	ld		[%i4 + 0], %f0
2470	ld		[%i4 + 4], %f1
2471	ld		[%i4 + 8], %f2
2472	ld		[%i4 + 12], %f3
2473	prefetch	[%i0], 20
2474	prefetch	[%i0 + 63], 20
2475	call		_aes192_load_enckey
2476	and		%i0, 7, %l0
2477	andn		%i0, 7, %i0
2478	sll		%l0, 3, %l0
2479	mov		64, %l1
2480	mov		0xff, %l3
2481	sub		%l1, %l0, %l1
2482	and		%i1, 7, %l2
2483	cmp		%i2, 127
2484	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2485	movleu		SIZE_T_CC, 0, %l5	!	%i2<128 ||
2486	brnz,pn		%l5, .L192cbc_enc_blk	!	%i0==%i1)
2487	srl		%l3, %l2, %l3
2488
2489	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2490	srlx		%i2, 4, %i2
2491	prefetch	[%i1], 22
2492
2493.L192_cbc_enc_loop:
2494	ldx		[%i0 + 0], %o0
2495	brz,pt		%l0, 4f
2496	ldx		[%i0 + 8], %o1
2497
2498	ldx		[%i0 + 16], %o2
2499	sllx		%o0, %l0, %o0
2500	srlx		%o1, %l1, %g1
2501	sllx		%o1, %l0, %o1
2502	or		%g1, %o0, %o0
2503	srlx		%o2, %l1, %o2
2504	or		%o2, %o1, %o1
25054:
2506	xor		%g4, %o0, %o0		! ^= rk[0]
2507	xor		%g5, %o1, %o1
2508	.word	0x99b02308 !movxtod	%o0,%f12
2509	.word	0x9db02309 !movxtod	%o1,%f14
2510
2511	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2512	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2513	prefetch	[%i1 + 63], 22
2514	prefetch	[%i0 + 16+63], 20
2515	call		_aes192_encrypt_1x
2516	add		%i0, 16, %i0
2517
2518	brnz,pn		%l2, 2f
2519	sub		%i2, 1, %i2
2520
2521	std		%f0, [%i1 + 0]
2522	std		%f2, [%i1 + 8]
2523	brnz,pt		%i2, .L192_cbc_enc_loop
2524	add		%i1, 16, %i1
2525	st		%f0, [%i4 + 0]
2526	st		%f1, [%i4 + 4]
2527	st		%f2, [%i4 + 8]
2528	st		%f3, [%i4 + 12]
2529.L192_cbc_enc_abort:
2530	ret
2531	restore
2532
2533.align	16
25342:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2535						! and ~3x deterioration
2536						! in inp==out case
2537	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2538	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2539	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2540
2541	stda		%f4, [%i1 + %l3]0xc0	! partial store
2542	std		%f6, [%i1 + 8]
2543	add		%i1, 16, %i1
2544	orn		%g0, %l3, %l3
2545	stda		%f8, [%i1 + %l3]0xc0	! partial store
2546
2547	brnz,pt		%i2, .L192_cbc_enc_loop+4
2548	orn		%g0, %l3, %l3
2549	st		%f0, [%i4 + 0]
2550	st		%f1, [%i4 + 4]
2551	st		%f2, [%i4 + 8]
2552	st		%f3, [%i4 + 12]
2553	ret
2554	restore
2555
2556!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2557.align	32
2558.L192cbc_enc_blk:
2559	add	%i1, %i2, %l5
2560	and	%l5, 63, %l5	! tail
2561	sub	%i2, %l5, %i2
2562	add	%l5, 15, %l5	! round up to 16n
2563	srlx	%i2, 4, %i2
2564	srl	%l5, 4, %l5
2565
2566.L192_cbc_enc_blk_loop:
2567	ldx		[%i0 + 0], %o0
2568	brz,pt		%l0, 5f
2569	ldx		[%i0 + 8], %o1
2570
2571	ldx		[%i0 + 16], %o2
2572	sllx		%o0, %l0, %o0
2573	srlx		%o1, %l1, %g1
2574	sllx		%o1, %l0, %o1
2575	or		%g1, %o0, %o0
2576	srlx		%o2, %l1, %o2
2577	or		%o2, %o1, %o1
25785:
2579	xor		%g4, %o0, %o0		! ^= rk[0]
2580	xor		%g5, %o1, %o1
2581	.word	0x99b02308 !movxtod	%o0,%f12
2582	.word	0x9db02309 !movxtod	%o1,%f14
2583
2584	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2585	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2586	prefetch	[%i0 + 16+63], 20
2587	call		_aes192_encrypt_1x
2588	add		%i0, 16, %i0
2589	sub		%i2, 1, %i2
2590
2591	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2592	add		%i1, 8, %i1
2593	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2594	brnz,pt		%i2, .L192_cbc_enc_blk_loop
2595	add		%i1, 8, %i1
2596
2597	membar		#StoreLoad|#StoreStore
2598	brnz,pt		%l5, .L192_cbc_enc_loop
2599	mov		%l5, %i2
2600	st		%f0, [%i4 + 0]
2601	st		%f1, [%i4 + 4]
2602	st		%f2, [%i4 + 8]
2603	st		%f3, [%i4 + 12]
2604	ret
2605	restore
2606.type	aes192_t4_cbc_encrypt,#function
2607.size	aes192_t4_cbc_encrypt,.-aes192_t4_cbc_encrypt
2608.globl	aes256_t4_ctr32_encrypt
2609.align	32
2610aes256_t4_ctr32_encrypt:
2611	save		%sp, -STACK_FRAME, %sp
2612	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2613
2614	prefetch	[%i0], 20
2615	prefetch	[%i0 + 63], 20
2616	call		_aes256_load_enckey
2617	sllx		%i2, 4, %i2
2618
2619	ld		[%i4 + 0], %l4	! counter
2620	ld		[%i4 + 4], %l5
2621	ld		[%i4 + 8], %l6
2622	ld		[%i4 + 12], %l7
2623
2624	sllx		%l4, 32, %o5
2625	or		%l5, %o5, %o5
2626	sllx		%l6, 32, %g1
2627	xor		%o5, %g4, %g4		! ^= rk[0]
2628	xor		%g1, %g5, %g5
2629	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
2630
2631	sub		%i0, %i1, %l5	! %i0!=%i1
2632	and		%i0, 7, %l0
2633	andn		%i0, 7, %i0
2634	sll		%l0, 3, %l0
2635	mov		64, %l1
2636	mov		0xff, %l3
2637	sub		%l1, %l0, %l1
2638	and		%i1, 7, %l2
2639	cmp		%i2, 255
2640	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2641	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
2642	brnz,pn		%l5, .L256_ctr32_blk	!	%i0==%i1)
2643	srl		%l3, %l2, %l3
2644
2645	andcc		%i2, 16, %g0		! is number of blocks even?
2646	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2647	bz		%icc, .L256_ctr32_loop2x
2648	srlx		%i2, 4, %i2
2649.L256_ctr32_loop:
2650	ldx		[%i0 + 0], %o0
2651	brz,pt		%l0, 4f
2652	ldx		[%i0 + 8], %o1
2653
2654	ldx		[%i0 + 16], %o2
2655	sllx		%o0, %l0, %o0
2656	srlx		%o1, %l1, %g1
2657	sllx		%o1, %l0, %o1
2658	or		%g1, %o0, %o0
2659	srlx		%o2, %l1, %o2
2660	or		%o2, %o1, %o1
26614:
2662	xor		%g5, %l7, %g1		! ^= rk[0]
2663	add		%l7, 1, %l7
2664	.word	0x85b02301 !movxtod	%g1,%f2
2665	srl		%l7, 0, %l7		! clruw
2666	prefetch	[%i1 + 63], 22
2667	prefetch	[%i0 + 16+63], 20
2668	.word	0x88cc040e !aes_eround01	%f16,%f14,%f2,%f4
2669	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
2670	call		_aes256_encrypt_1x+8
2671	add		%i0, 16, %i0
2672
2673	.word	0x95b02308 !movxtod	%o0,%f10
2674	.word	0x99b02309 !movxtod	%o1,%f12
2675	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
2676	.word	0x85b30d82 !fxor	%f12,%f2,%f2
2677
2678	brnz,pn		%l2, 2f
2679	sub		%i2, 1, %i2
2680
2681	std		%f0, [%i1 + 0]
2682	std		%f2, [%i1 + 8]
2683	brnz,pt		%i2, .L256_ctr32_loop2x
2684	add		%i1, 16, %i1
2685
2686	ret
2687	restore
2688
2689.align	16
26902:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2691						! and ~3x deterioration
2692						! in inp==out case
2693	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2694	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2695	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2696	stda		%f4, [%i1 + %l3]0xc0	! partial store
2697	std		%f6, [%i1 + 8]
2698	add		%i1, 16, %i1
2699	orn		%g0, %l3, %l3
2700	stda		%f8, [%i1 + %l3]0xc0	! partial store
2701
2702	brnz,pt		%i2, .L256_ctr32_loop2x+4
2703	orn		%g0, %l3, %l3
2704
2705	ret
2706	restore
2707
2708!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2709.align	32
2710.L256_ctr32_loop2x:
2711	ldx		[%i0 + 0], %o0
2712	ldx		[%i0 + 8], %o1
2713	ldx		[%i0 + 16], %o2
2714	brz,pt		%l0, 4f
2715	ldx		[%i0 + 24], %o3
2716
2717	ldx		[%i0 + 32], %o4
2718	sllx		%o0, %l0, %o0
2719	srlx		%o1, %l1, %g1
2720	or		%g1, %o0, %o0
2721	sllx		%o1, %l0, %o1
2722	srlx		%o2, %l1, %g1
2723	or		%g1, %o1, %o1
2724	sllx		%o2, %l0, %o2
2725	srlx		%o3, %l1, %g1
2726	or		%g1, %o2, %o2
2727	sllx		%o3, %l0, %o3
2728	srlx		%o4, %l1, %o4
2729	or		%o4, %o3, %o3
27304:
2731	xor		%g5, %l7, %g1		! ^= rk[0]
2732	add		%l7, 1, %l7
2733	.word	0x85b02301 !movxtod	%g1,%f2
2734	srl		%l7, 0, %l7		! clruw
2735	xor		%g5, %l7, %g1
2736	add		%l7, 1, %l7
2737	.word	0x8db02301 !movxtod	%g1,%f6
2738	srl		%l7, 0, %l7		! clruw
2739	prefetch	[%i1 + 63], 22
2740	prefetch	[%i0 + 32+63], 20
2741	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
2742	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
2743	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
2744	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
2745	call		_aes256_encrypt_2x+16
2746	add		%i0, 32, %i0
2747
2748	.word	0x91b02308 !movxtod	%o0,%f8
2749	.word	0x95b02309 !movxtod	%o1,%f10
2750	.word	0x99b0230a !movxtod	%o2,%f12
2751	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2752	.word	0x91b0230b !movxtod	%o3,%f8
2753	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2754	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2755	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2756
2757	brnz,pn		%l2, 2f
2758	sub		%i2, 2, %i2
2759
2760	std		%f0, [%i1 + 0]
2761	std		%f2, [%i1 + 8]
2762	std		%f4, [%i1 + 16]
2763	std		%f6, [%i1 + 24]
2764	brnz,pt		%i2, .L256_ctr32_loop2x
2765	add		%i1, 32, %i1
2766
2767	ret
2768	restore
2769
2770.align	16
27712:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2772						! and ~3x deterioration
2773						! in inp==out case
2774	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
2775	.word	0x81b00902 !faligndata	%f0,%f2,%f0
2776	.word	0x85b08904 !faligndata	%f2,%f4,%f2
2777	.word	0x89b10906 !faligndata	%f4,%f6,%f4
2778	.word	0x8db18906 !faligndata	%f6,%f6,%f6
2779
2780	stda		%f8, [%i1 + %l3]0xc0	! partial store
2781	std		%f0, [%i1 + 8]
2782	std		%f2, [%i1 + 16]
2783	std		%f4, [%i1 + 24]
2784	add		%i1, 32, %i1
2785	orn		%g0, %l3, %l3
2786	stda		%f6, [%i1 + %l3]0xc0	! partial store
2787
2788	brnz,pt		%i2, .L256_ctr32_loop2x+4
2789	orn		%g0, %l3, %l3
2790
2791	ret
2792	restore
2793
2794!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2795.align	32
2796.L256_ctr32_blk:
2797	add	%i1, %i2, %l5
2798	and	%l5, 63, %l5	! tail
2799	sub	%i2, %l5, %i2
2800	add	%l5, 15, %l5	! round up to 16n
2801	srlx	%i2, 4, %i2
2802	srl	%l5, 4, %l5
2803	sub	%i2, 1, %i2
2804	add	%l5, 1, %l5
2805
2806.L256_ctr32_blk_loop2x:
2807	ldx		[%i0 + 0], %o0
2808	ldx		[%i0 + 8], %o1
2809	ldx		[%i0 + 16], %o2
2810	brz,pt		%l0, 5f
2811	ldx		[%i0 + 24], %o3
2812
2813	ldx		[%i0 + 32], %o4
2814	sllx		%o0, %l0, %o0
2815	srlx		%o1, %l1, %g1
2816	or		%g1, %o0, %o0
2817	sllx		%o1, %l0, %o1
2818	srlx		%o2, %l1, %g1
2819	or		%g1, %o1, %o1
2820	sllx		%o2, %l0, %o2
2821	srlx		%o3, %l1, %g1
2822	or		%g1, %o2, %o2
2823	sllx		%o3, %l0, %o3
2824	srlx		%o4, %l1, %o4
2825	or		%o4, %o3, %o3
28265:
2827	xor		%g5, %l7, %g1		! ^= rk[0]
2828	add		%l7, 1, %l7
2829	.word	0x85b02301 !movxtod	%g1,%f2
2830	srl		%l7, 0, %l7		! clruw
2831	xor		%g5, %l7, %g1
2832	add		%l7, 1, %l7
2833	.word	0x8db02301 !movxtod	%g1,%f6
2834	srl		%l7, 0, %l7		! clruw
2835	prefetch	[%i0 + 32+63], 20
2836	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
2837	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
2838	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
2839	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
2840	call		_aes256_encrypt_2x+16
2841	add		%i0, 32, %i0
2842	subcc		%i2, 2, %i2
2843
2844	.word	0x91b02308 !movxtod	%o0,%f8
2845	.word	0x95b02309 !movxtod	%o1,%f10
2846	.word	0x99b0230a !movxtod	%o2,%f12
2847	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2848	.word	0x91b0230b !movxtod	%o3,%f8
2849	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2850	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2851	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2852
2853	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2854	add		%i1, 8, %i1
2855	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2856	add		%i1, 8, %i1
2857	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2858	add		%i1, 8, %i1
2859	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2860	bgu,pt		SIZE_T_CC, .L256_ctr32_blk_loop2x
2861	add		%i1, 8, %i1
2862
2863	add		%l5, %i2, %i2
2864	andcc		%i2, 1, %g0		! is number of blocks even?
2865	membar		#StoreLoad|#StoreStore
2866	bnz,pt		%icc, .L256_ctr32_loop
2867	srl		%i2, 0, %i2
2868	brnz,pn		%i2, .L256_ctr32_loop2x
2869	nop
2870
2871	ret
2872	restore
2873.type	aes256_t4_ctr32_encrypt,#function
2874.size	aes256_t4_ctr32_encrypt,.-aes256_t4_ctr32_encrypt
2875.globl	aes256_t4_xts_encrypt
2876.align	32
2877aes256_t4_xts_encrypt:
2878	save		%sp, -STACK_FRAME-16, %sp
2879	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2880
2881	mov		%i5, %o0
2882	add		%fp, STACK_BIAS-16, %o1
2883	call		aes_t4_encrypt
2884	mov		%i4, %o2
2885
2886	add		%fp, STACK_BIAS-16, %l7
2887	ldxa		[%l7]0x88, %g2
2888	add		%fp, STACK_BIAS-8, %l7
2889	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
2890
2891	sethi		%hi(0x76543210), %l7
2892	or		%l7, %lo(0x76543210), %l7
2893	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
2894
2895	prefetch	[%i0], 20
2896	prefetch	[%i0 + 63], 20
2897	call		_aes256_load_enckey
2898	and		%i2, 15,  %i5
2899	and		%i2, -16, %i2
2900
2901	sub		%i0, %i1, %l5	! %i0!=%i1
2902	and		%i0, 7, %l0
2903	andn		%i0, 7, %i0
2904	sll		%l0, 3, %l0
2905	mov		64, %l1
2906	mov		0xff, %l3
2907	sub		%l1, %l0, %l1
2908	and		%i1, 7, %l2
2909	cmp		%i2, 255
2910	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2911	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
2912	brnz,pn		%l5, .L256_xts_enblk !	%i0==%i1)
2913	srl		%l3, %l2, %l3
2914
2915	andcc		%i2, 16, %g0		! is number of blocks even?
2916	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2917	bz		%icc, .L256_xts_enloop2x
2918	srlx		%i2, 4, %i2
2919.L256_xts_enloop:
2920	ldx		[%i0 + 0], %o0
2921	brz,pt		%l0, 4f
2922	ldx		[%i0 + 8], %o1
2923
2924	ldx		[%i0 + 16], %o2
2925	sllx		%o0, %l0, %o0
2926	srlx		%o1, %l1, %g1
2927	sllx		%o1, %l0, %o1
2928	or		%g1, %o0, %o0
2929	srlx		%o2, %l1, %o2
2930	or		%o2, %o1, %o1
29314:
2932	.word	0x99b02302 !movxtod	%g2,%f12
2933	.word	0x9db02303 !movxtod	%g3,%f14
2934	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
2935	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
2936
2937	xor		%g4, %o0, %o0		! ^= rk[0]
2938	xor		%g5, %o1, %o1
2939	.word	0x81b02308 !movxtod	%o0,%f0
2940	.word	0x85b02309 !movxtod	%o1,%f2
2941
2942	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
2943	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2944
2945	prefetch	[%i1 + 63], 22
2946	prefetch	[%i0 + 16+63], 20
2947	call		_aes256_encrypt_1x
2948	add		%i0, 16, %i0
2949
2950	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
2951	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2952
2953	srax		%g3, 63, %l7		! next tweak value
2954	addcc		%g2, %g2, %g2
2955	and		%l7, 0x87, %l7
2956	.word	0x87b0c223 !addxc	%g3,%g3,%g3
2957	xor		%l7, %g2, %g2
2958
2959	brnz,pn		%l2, 2f
2960	sub		%i2, 1, %i2
2961
2962	std		%f0, [%i1 + 0]
2963	std		%f2, [%i1 + 8]
2964	brnz,pt		%i2, .L256_xts_enloop2x
2965	add		%i1, 16, %i1
2966
2967	brnz,pn		%i5, .L256_xts_ensteal
2968	nop
2969
2970	ret
2971	restore
2972
2973.align	16
29742:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2975						! and ~3x deterioration
2976						! in inp==out case
2977	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2978	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2979	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2980	stda		%f4, [%i1 + %l3]0xc0	! partial store
2981	std		%f6, [%i1 + 8]
2982	add		%i1, 16, %i1
2983	orn		%g0, %l3, %l3
2984	stda		%f8, [%i1 + %l3]0xc0	! partial store
2985
2986	brnz,pt		%i2, .L256_xts_enloop2x+4
2987	orn		%g0, %l3, %l3
2988
2989	brnz,pn		%i5, .L256_xts_ensteal
2990	nop
2991
2992	ret
2993	restore
2994
2995!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2996.align	32
2997.L256_xts_enloop2x:
2998	ldx		[%i0 + 0], %o0
2999	ldx		[%i0 + 8], %o1
3000	ldx		[%i0 + 16], %o2
3001	brz,pt		%l0, 4f
3002	ldx		[%i0 + 24], %o3
3003
3004	ldx		[%i0 + 32], %o4
3005	sllx		%o0, %l0, %o0
3006	srlx		%o1, %l1, %g1
3007	or		%g1, %o0, %o0
3008	sllx		%o1, %l0, %o1
3009	srlx		%o2, %l1, %g1
3010	or		%g1, %o1, %o1
3011	sllx		%o2, %l0, %o2
3012	srlx		%o3, %l1, %g1
3013	or		%g1, %o2, %o2
3014	sllx		%o3, %l0, %o3
3015	srlx		%o4, %l1, %o4
3016	or		%o4, %o3, %o3
30174:
3018	.word	0x99b02302 !movxtod	%g2,%f12
3019	.word	0x9db02303 !movxtod	%g3,%f14
3020	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3021	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3022
3023	srax		%g3, 63, %l7		! next tweak value
3024	addcc		%g2, %g2, %g2
3025	and		%l7, 0x87, %l7
3026	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3027	xor		%l7, %g2, %g2
3028
3029	.word	0x91b02302 !movxtod	%g2,%f8
3030	.word	0x95b02303 !movxtod	%g3,%f10
3031	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3032	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3033
3034	xor		%g4, %o0, %o0		! ^= rk[0]
3035	xor		%g5, %o1, %o1
3036	xor		%g4, %o2, %o2		! ^= rk[0]
3037	xor		%g5, %o3, %o3
3038	.word	0x81b02308 !movxtod	%o0,%f0
3039	.word	0x85b02309 !movxtod	%o1,%f2
3040	.word	0x89b0230a !movxtod	%o2,%f4
3041	.word	0x8db0230b !movxtod	%o3,%f6
3042
3043	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3044	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3045	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3046	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3047
3048	prefetch	[%i1 + 63], 22
3049	prefetch	[%i0 + 32+63], 20
3050	call		_aes256_encrypt_2x
3051	add		%i0, 32, %i0
3052
3053	.word	0x91b02302 !movxtod	%g2,%f8
3054	.word	0x95b02303 !movxtod	%g3,%f10
3055
3056	srax		%g3, 63, %l7		! next tweak value
3057	addcc		%g2, %g2, %g2
3058	and		%l7, 0x87, %l7
3059	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3060	xor		%l7, %g2, %g2
3061
3062	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3063	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3064
3065	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3066	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3067	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3068	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3069
3070	brnz,pn		%l2, 2f
3071	sub		%i2, 2, %i2
3072
3073	std		%f0, [%i1 + 0]
3074	std		%f2, [%i1 + 8]
3075	std		%f4, [%i1 + 16]
3076	std		%f6, [%i1 + 24]
3077	brnz,pt		%i2, .L256_xts_enloop2x
3078	add		%i1, 32, %i1
3079
3080	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3081	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3082	brnz,pn		%i5, .L256_xts_ensteal
3083	nop
3084
3085	ret
3086	restore
3087
3088.align	16
30892:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3090						! and ~3x deterioration
3091						! in inp==out case
3092	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
3093	.word	0x95b00902 !faligndata	%f0,%f2,%f10
3094	.word	0x99b08904 !faligndata	%f2,%f4,%f12
3095	.word	0x9db10906 !faligndata	%f4,%f6,%f14
3096	.word	0x81b18906 !faligndata	%f6,%f6,%f0
3097
3098	stda		%f8, [%i1 + %l3]0xc0	! partial store
3099	std		%f10, [%i1 + 8]
3100	std		%f12, [%i1 + 16]
3101	std		%f14, [%i1 + 24]
3102	add		%i1, 32, %i1
3103	orn		%g0, %l3, %l3
3104	stda		%f0, [%i1 + %l3]0xc0	! partial store
3105
3106	brnz,pt		%i2, .L256_xts_enloop2x+4
3107	orn		%g0, %l3, %l3
3108
3109	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3110	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3111	brnz,pn		%i5, .L256_xts_ensteal
3112	nop
3113
3114	ret
3115	restore
3116
3117!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3118.align	32
3119.L256_xts_enblk:
3120	add	%i1, %i2, %l5
3121	and	%l5, 63, %l5	! tail
3122	sub	%i2, %l5, %i2
3123	add	%l5, 15, %l5	! round up to 16n
3124	srlx	%i2, 4, %i2
3125	srl	%l5, 4, %l5
3126	sub	%i2, 1, %i2
3127	add	%l5, 1, %l5
3128
3129.L256_xts_enblk2x:
3130	ldx		[%i0 + 0], %o0
3131	ldx		[%i0 + 8], %o1
3132	ldx		[%i0 + 16], %o2
3133	brz,pt		%l0, 5f
3134	ldx		[%i0 + 24], %o3
3135
3136	ldx		[%i0 + 32], %o4
3137	sllx		%o0, %l0, %o0
3138	srlx		%o1, %l1, %g1
3139	or		%g1, %o0, %o0
3140	sllx		%o1, %l0, %o1
3141	srlx		%o2, %l1, %g1
3142	or		%g1, %o1, %o1
3143	sllx		%o2, %l0, %o2
3144	srlx		%o3, %l1, %g1
3145	or		%g1, %o2, %o2
3146	sllx		%o3, %l0, %o3
3147	srlx		%o4, %l1, %o4
3148	or		%o4, %o3, %o3
31495:
3150	.word	0x99b02302 !movxtod	%g2,%f12
3151	.word	0x9db02303 !movxtod	%g3,%f14
3152	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3153	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3154
3155	srax		%g3, 63, %l7		! next tweak value
3156	addcc		%g2, %g2, %g2
3157	and		%l7, 0x87, %l7
3158	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3159	xor		%l7, %g2, %g2
3160
3161	.word	0x91b02302 !movxtod	%g2,%f8
3162	.word	0x95b02303 !movxtod	%g3,%f10
3163	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3164	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3165
3166	xor		%g4, %o0, %o0		! ^= rk[0]
3167	xor		%g5, %o1, %o1
3168	xor		%g4, %o2, %o2		! ^= rk[0]
3169	xor		%g5, %o3, %o3
3170	.word	0x81b02308 !movxtod	%o0,%f0
3171	.word	0x85b02309 !movxtod	%o1,%f2
3172	.word	0x89b0230a !movxtod	%o2,%f4
3173	.word	0x8db0230b !movxtod	%o3,%f6
3174
3175	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3176	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3177	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3178	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3179
3180	prefetch	[%i0 + 32+63], 20
3181	call		_aes256_encrypt_2x
3182	add		%i0, 32, %i0
3183
3184	.word	0x91b02302 !movxtod	%g2,%f8
3185	.word	0x95b02303 !movxtod	%g3,%f10
3186
3187	srax		%g3, 63, %l7		! next tweak value
3188	addcc		%g2, %g2, %g2
3189	and		%l7, 0x87, %l7
3190	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3191	xor		%l7, %g2, %g2
3192
3193	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3194	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3195
3196	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3197	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3198	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3199	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3200
3201	subcc		%i2, 2, %i2
3202	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3203	add		%i1, 8, %i1
3204	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3205	add		%i1, 8, %i1
3206	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3207	add		%i1, 8, %i1
3208	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3209	bgu,pt		SIZE_T_CC, .L256_xts_enblk2x
3210	add		%i1, 8, %i1
3211
3212	add		%l5, %i2, %i2
3213	andcc		%i2, 1, %g0		! is number of blocks even?
3214	membar		#StoreLoad|#StoreStore
3215	bnz,pt		%icc, .L256_xts_enloop
3216	srl		%i2, 0, %i2
3217	brnz,pn		%i2, .L256_xts_enloop2x
3218	nop
3219
3220	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3221	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3222	brnz,pn		%i5, .L256_xts_ensteal
3223	nop
3224
3225	ret
3226	restore
3227!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3228.align	32
3229.L256_xts_ensteal:
3230	std		%f0, [%fp + STACK_BIAS-16]	! copy of output
3231	std		%f2, [%fp + STACK_BIAS-8]
3232
3233	srl		%l0, 3, %l0
3234	add		%fp, STACK_BIAS-16, %l7
3235	add		%i0, %l0, %i0	! original %i0+%i2&-15
3236	add		%i1, %l2, %i1	! original %i1+%i2&-15
3237	mov		0, %l0
3238	nop					! align
3239
3240.L256_xts_enstealing:
3241	ldub		[%i0 + %l0], %o0
3242	ldub		[%l7  + %l0], %o1
3243	dec		%i5
3244	stb		%o0, [%l7  + %l0]
3245	stb		%o1, [%i1 + %l0]
3246	brnz		%i5, .L256_xts_enstealing
3247	inc		%l0
3248
3249	mov		%l7, %i0
3250	sub		%i1, 16, %i1
3251	mov		0, %l0
3252	sub		%i1, %l2, %i1
3253	ba		.L256_xts_enloop	! one more time
3254	mov		1, %i2				! %i5 is 0
3255	ret
3256	restore
3257.type	aes256_t4_xts_encrypt,#function
3258.size	aes256_t4_xts_encrypt,.-aes256_t4_xts_encrypt
3259.globl	aes256_t4_xts_decrypt
3260.align	32
3261aes256_t4_xts_decrypt:
3262	save		%sp, -STACK_FRAME-16, %sp
3263	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
3264
3265	mov		%i5, %o0
3266	add		%fp, STACK_BIAS-16, %o1
3267	call		aes_t4_encrypt
3268	mov		%i4, %o2
3269
3270	add		%fp, STACK_BIAS-16, %l7
3271	ldxa		[%l7]0x88, %g2
3272	add		%fp, STACK_BIAS-8, %l7
3273	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
3274
3275	sethi		%hi(0x76543210), %l7
3276	or		%l7, %lo(0x76543210), %l7
3277	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
3278
3279	prefetch	[%i0], 20
3280	prefetch	[%i0 + 63], 20
3281	call		_aes256_load_deckey
3282	and		%i2, 15,  %i5
3283	and		%i2, -16, %i2
3284	mov		0, %l7
3285	movrnz		%i5, 16,  %l7
3286	sub		%i2, %l7, %i2
3287
3288	sub		%i0, %i1, %l5	! %i0!=%i1
3289	and		%i0, 7, %l0
3290	andn		%i0, 7, %i0
3291	sll		%l0, 3, %l0
3292	mov		64, %l1
3293	mov		0xff, %l3
3294	sub		%l1, %l0, %l1
3295	and		%i1, 7, %l2
3296	cmp		%i2, 255
3297	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
3298	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
3299	brnz,pn		%l5, .L256_xts_deblk !	%i0==%i1)
3300	srl		%l3, %l2, %l3
3301
3302	andcc		%i2, 16, %g0		! is number of blocks even?
3303	brz,pn		%i2, .L256_xts_desteal
3304	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
3305	bz		%icc, .L256_xts_deloop2x
3306	srlx		%i2, 4, %i2
3307.L256_xts_deloop:
3308	ldx		[%i0 + 0], %o0
3309	brz,pt		%l0, 4f
3310	ldx		[%i0 + 8], %o1
3311
3312	ldx		[%i0 + 16], %o2
3313	sllx		%o0, %l0, %o0
3314	srlx		%o1, %l1, %g1
3315	sllx		%o1, %l0, %o1
3316	or		%g1, %o0, %o0
3317	srlx		%o2, %l1, %o2
3318	or		%o2, %o1, %o1
33194:
3320	.word	0x99b02302 !movxtod	%g2,%f12
3321	.word	0x9db02303 !movxtod	%g3,%f14
3322	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3323	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3324
3325	xor		%g4, %o0, %o0		! ^= rk[0]
3326	xor		%g5, %o1, %o1
3327	.word	0x81b02308 !movxtod	%o0,%f0
3328	.word	0x85b02309 !movxtod	%o1,%f2
3329
3330	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3331	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3332
3333	prefetch	[%i1 + 63], 22
3334	prefetch	[%i0 + 16+63], 20
3335	call		_aes256_decrypt_1x
3336	add		%i0, 16, %i0
3337
3338	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3339	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3340
3341	srax		%g3, 63, %l7		! next tweak value
3342	addcc		%g2, %g2, %g2
3343	and		%l7, 0x87, %l7
3344	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3345	xor		%l7, %g2, %g2
3346
3347	brnz,pn		%l2, 2f
3348	sub		%i2, 1, %i2
3349
3350	std		%f0, [%i1 + 0]
3351	std		%f2, [%i1 + 8]
3352	brnz,pt		%i2, .L256_xts_deloop2x
3353	add		%i1, 16, %i1
3354
3355	brnz,pn		%i5, .L256_xts_desteal
3356	nop
3357
3358	ret
3359	restore
3360
3361.align	16
33622:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3363						! and ~3x deterioration
3364						! in inp==out case
3365	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
3366	.word	0x8db00902 !faligndata	%f0,%f2,%f6
3367	.word	0x91b08902 !faligndata	%f2,%f2,%f8
3368	stda		%f4, [%i1 + %l3]0xc0	! partial store
3369	std		%f6, [%i1 + 8]
3370	add		%i1, 16, %i1
3371	orn		%g0, %l3, %l3
3372	stda		%f8, [%i1 + %l3]0xc0	! partial store
3373
3374	brnz,pt		%i2, .L256_xts_deloop2x+4
3375	orn		%g0, %l3, %l3
3376
3377	brnz,pn		%i5, .L256_xts_desteal
3378	nop
3379
3380	ret
3381	restore
3382
3383!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3384.align	32
3385.L256_xts_deloop2x:
3386	ldx		[%i0 + 0], %o0
3387	ldx		[%i0 + 8], %o1
3388	ldx		[%i0 + 16], %o2
3389	brz,pt		%l0, 4f
3390	ldx		[%i0 + 24], %o3
3391
3392	ldx		[%i0 + 32], %o4
3393	sllx		%o0, %l0, %o0
3394	srlx		%o1, %l1, %g1
3395	or		%g1, %o0, %o0
3396	sllx		%o1, %l0, %o1
3397	srlx		%o2, %l1, %g1
3398	or		%g1, %o1, %o1
3399	sllx		%o2, %l0, %o2
3400	srlx		%o3, %l1, %g1
3401	or		%g1, %o2, %o2
3402	sllx		%o3, %l0, %o3
3403	srlx		%o4, %l1, %o4
3404	or		%o4, %o3, %o3
34054:
3406	.word	0x99b02302 !movxtod	%g2,%f12
3407	.word	0x9db02303 !movxtod	%g3,%f14
3408	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3409	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3410
3411	srax		%g3, 63, %l7		! next tweak value
3412	addcc		%g2, %g2, %g2
3413	and		%l7, 0x87, %l7
3414	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3415	xor		%l7, %g2, %g2
3416
3417	.word	0x91b02302 !movxtod	%g2,%f8
3418	.word	0x95b02303 !movxtod	%g3,%f10
3419	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3420	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3421
3422	xor		%g4, %o0, %o0		! ^= rk[0]
3423	xor		%g5, %o1, %o1
3424	xor		%g4, %o2, %o2		! ^= rk[0]
3425	xor		%g5, %o3, %o3
3426	.word	0x81b02308 !movxtod	%o0,%f0
3427	.word	0x85b02309 !movxtod	%o1,%f2
3428	.word	0x89b0230a !movxtod	%o2,%f4
3429	.word	0x8db0230b !movxtod	%o3,%f6
3430
3431	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3432	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3433	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3434	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3435
3436	prefetch	[%i1 + 63], 22
3437	prefetch	[%i0 + 32+63], 20
3438	call		_aes256_decrypt_2x
3439	add		%i0, 32, %i0
3440
3441	.word	0x91b02302 !movxtod	%g2,%f8
3442	.word	0x95b02303 !movxtod	%g3,%f10
3443
3444	srax		%g3, 63, %l7		! next tweak value
3445	addcc		%g2, %g2, %g2
3446	and		%l7, 0x87, %l7
3447	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3448	xor		%l7, %g2, %g2
3449
3450	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3451	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3452
3453	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3454	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3455	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3456	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3457
3458	brnz,pn		%l2, 2f
3459	sub		%i2, 2, %i2
3460
3461	std		%f0, [%i1 + 0]
3462	std		%f2, [%i1 + 8]
3463	std		%f4, [%i1 + 16]
3464	std		%f6, [%i1 + 24]
3465	brnz,pt		%i2, .L256_xts_deloop2x
3466	add		%i1, 32, %i1
3467
3468	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3469	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3470	brnz,pn		%i5, .L256_xts_desteal
3471	nop
3472
3473	ret
3474	restore
3475
3476.align	16
34772:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3478						! and ~3x deterioration
3479						! in inp==out case
3480	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
3481	.word	0x95b00902 !faligndata	%f0,%f2,%f10
3482	.word	0x99b08904 !faligndata	%f2,%f4,%f12
3483	.word	0x9db10906 !faligndata	%f4,%f6,%f14
3484	.word	0x81b18906 !faligndata	%f6,%f6,%f0
3485
3486	stda		%f8, [%i1 + %l3]0xc0	! partial store
3487	std		%f10, [%i1 + 8]
3488	std		%f12, [%i1 + 16]
3489	std		%f14, [%i1 + 24]
3490	add		%i1, 32, %i1
3491	orn		%g0, %l3, %l3
3492	stda		%f0, [%i1 + %l3]0xc0	! partial store
3493
3494	brnz,pt		%i2, .L256_xts_deloop2x+4
3495	orn		%g0, %l3, %l3
3496
3497	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3498	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3499	brnz,pn		%i5, .L256_xts_desteal
3500	nop
3501
3502	ret
3503	restore
3504
3505!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3506.align	32
3507.L256_xts_deblk:
3508	add	%i1, %i2, %l5
3509	and	%l5, 63, %l5	! tail
3510	sub	%i2, %l5, %i2
3511	add	%l5, 15, %l5	! round up to 16n
3512	srlx	%i2, 4, %i2
3513	srl	%l5, 4, %l5
3514	sub	%i2, 1, %i2
3515	add	%l5, 1, %l5
3516
3517.L256_xts_deblk2x:
3518	ldx		[%i0 + 0], %o0
3519	ldx		[%i0 + 8], %o1
3520	ldx		[%i0 + 16], %o2
3521	brz,pt		%l0, 5f
3522	ldx		[%i0 + 24], %o3
3523
3524	ldx		[%i0 + 32], %o4
3525	sllx		%o0, %l0, %o0
3526	srlx		%o1, %l1, %g1
3527	or		%g1, %o0, %o0
3528	sllx		%o1, %l0, %o1
3529	srlx		%o2, %l1, %g1
3530	or		%g1, %o1, %o1
3531	sllx		%o2, %l0, %o2
3532	srlx		%o3, %l1, %g1
3533	or		%g1, %o2, %o2
3534	sllx		%o3, %l0, %o3
3535	srlx		%o4, %l1, %o4
3536	or		%o4, %o3, %o3
35375:
3538	.word	0x99b02302 !movxtod	%g2,%f12
3539	.word	0x9db02303 !movxtod	%g3,%f14
3540	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3541	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3542
3543	srax		%g3, 63, %l7		! next tweak value
3544	addcc		%g2, %g2, %g2
3545	and		%l7, 0x87, %l7
3546	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3547	xor		%l7, %g2, %g2
3548
3549	.word	0x91b02302 !movxtod	%g2,%f8
3550	.word	0x95b02303 !movxtod	%g3,%f10
3551	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3552	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3553
3554	xor		%g4, %o0, %o0		! ^= rk[0]
3555	xor		%g5, %o1, %o1
3556	xor		%g4, %o2, %o2		! ^= rk[0]
3557	xor		%g5, %o3, %o3
3558	.word	0x81b02308 !movxtod	%o0,%f0
3559	.word	0x85b02309 !movxtod	%o1,%f2
3560	.word	0x89b0230a !movxtod	%o2,%f4
3561	.word	0x8db0230b !movxtod	%o3,%f6
3562
3563	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3564	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3565	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3566	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3567
3568	prefetch	[%i0 + 32+63], 20
3569	call		_aes256_decrypt_2x
3570	add		%i0, 32, %i0
3571
3572	.word	0x91b02302 !movxtod	%g2,%f8
3573	.word	0x95b02303 !movxtod	%g3,%f10
3574
3575	srax		%g3, 63, %l7		! next tweak value
3576	addcc		%g2, %g2, %g2
3577	and		%l7, 0x87, %l7
3578	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3579	xor		%l7, %g2, %g2
3580
3581	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3582	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3583
3584	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3585	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3586	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3587	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3588
3589	subcc		%i2, 2, %i2
3590	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3591	add		%i1, 8, %i1
3592	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3593	add		%i1, 8, %i1
3594	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3595	add		%i1, 8, %i1
3596	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3597	bgu,pt		SIZE_T_CC, .L256_xts_deblk2x
3598	add		%i1, 8, %i1
3599
3600	add		%l5, %i2, %i2
3601	andcc		%i2, 1, %g0		! is number of blocks even?
3602	membar		#StoreLoad|#StoreStore
3603	bnz,pt		%icc, .L256_xts_deloop
3604	srl		%i2, 0, %i2
3605	brnz,pn		%i2, .L256_xts_deloop2x
3606	nop
3607
3608	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3609	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3610	brnz,pn		%i5, .L256_xts_desteal
3611	nop
3612
3613	ret
3614	restore
3615!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3616.align	32
3617.L256_xts_desteal:
3618	ldx		[%i0 + 0], %o0
3619	brz,pt		%l0, 8f
3620	ldx		[%i0 + 8], %o1
3621
3622	ldx		[%i0 + 16], %o2
3623	sllx		%o0, %l0, %o0
3624	srlx		%o1, %l1, %g1
3625	sllx		%o1, %l0, %o1
3626	or		%g1, %o0, %o0
3627	srlx		%o2, %l1, %o2
3628	or		%o2, %o1, %o1
36298:
3630	srax		%g3, 63, %l7		! next tweak value
3631	addcc		%g2, %g2, %o2
3632	and		%l7, 0x87, %l7
3633	.word	0x97b0c223 !addxc	%g3,%g3,%o3
3634	xor		%l7, %o2, %o2
3635
3636	.word	0x99b0230a !movxtod	%o2,%f12
3637	.word	0x9db0230b !movxtod	%o3,%f14
3638	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3639	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3640
3641	xor		%g4, %o0, %o0		! ^= rk[0]
3642	xor		%g5, %o1, %o1
3643	.word	0x81b02308 !movxtod	%o0,%f0
3644	.word	0x85b02309 !movxtod	%o1,%f2
3645
3646	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3647	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3648
3649	call		_aes256_decrypt_1x
3650	add		%i0, 16, %i0
3651
3652	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3653	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3654
3655	std		%f0, [%fp + STACK_BIAS-16]
3656	std		%f2, [%fp + STACK_BIAS-8]
3657
3658	srl		%l0, 3, %l0
3659	add		%fp, STACK_BIAS-16, %l7
3660	add		%i0, %l0, %i0	! original %i0+%i2&-15
3661	add		%i1, %l2, %i1	! original %i1+%i2&-15
3662	mov		0, %l0
3663	add		%i1, 16, %i1
3664	nop					! align
3665
3666.L256_xts_destealing:
3667	ldub		[%i0 + %l0], %o0
3668	ldub		[%l7  + %l0], %o1
3669	dec		%i5
3670	stb		%o0, [%l7  + %l0]
3671	stb		%o1, [%i1 + %l0]
3672	brnz		%i5, .L256_xts_destealing
3673	inc		%l0
3674
3675	mov		%l7, %i0
3676	sub		%i1, 16, %i1
3677	mov		0, %l0
3678	sub		%i1, %l2, %i1
3679	ba		.L256_xts_deloop	! one more time
3680	mov		1, %i2				! %i5 is 0
3681	ret
3682	restore
3683.type	aes256_t4_xts_decrypt,#function
3684.size	aes256_t4_xts_decrypt,.-aes256_t4_xts_decrypt
3685.globl	aes192_t4_ctr32_encrypt
3686.align	32
3687aes192_t4_ctr32_encrypt:
3688	save		%sp, -STACK_FRAME, %sp
3689	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
3690
3691	prefetch	[%i0], 20
3692	prefetch	[%i0 + 63], 20
3693	call		_aes192_load_enckey
3694	sllx		%i2, 4, %i2
3695
3696	ld		[%i4 + 0], %l4	! counter
3697	ld		[%i4 + 4], %l5
3698	ld		[%i4 + 8], %l6
3699	ld		[%i4 + 12], %l7
3700
3701	sllx		%l4, 32, %o5
3702	or		%l5, %o5, %o5
3703	sllx		%l6, 32, %g1
3704	xor		%o5, %g4, %g4		! ^= rk[0]
3705	xor		%g1, %g5, %g5
3706	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
3707
3708	sub		%i0, %i1, %l5	! %i0!=%i1
3709	and		%i0, 7, %l0
3710	andn		%i0, 7, %i0
3711	sll		%l0, 3, %l0
3712	mov		64, %l1
3713	mov		0xff, %l3
3714	sub		%l1, %l0, %l1
3715	and		%i1, 7, %l2
3716	cmp		%i2, 255
3717	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
3718	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
3719	brnz,pn		%l5, .L192_ctr32_blk	!	%i0==%i1)
3720	srl		%l3, %l2, %l3
3721
3722	andcc		%i2, 16, %g0		! is number of blocks even?
3723	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
3724	bz		%icc, .L192_ctr32_loop2x
3725	srlx		%i2, 4, %i2
3726.L192_ctr32_loop:
3727	ldx		[%i0 + 0], %o0
3728	brz,pt		%l0, 4f
3729	ldx		[%i0 + 8], %o1
3730
3731	ldx		[%i0 + 16], %o2
3732	sllx		%o0, %l0, %o0
3733	srlx		%o1, %l1, %g1
3734	sllx		%o1, %l0, %o1
3735	or		%g1, %o0, %o0
3736	srlx		%o2, %l1, %o2
3737	or		%o2, %o1, %o1
37384:
3739	xor		%g5, %l7, %g1		! ^= rk[0]
3740	add		%l7, 1, %l7
3741	.word	0x85b02301 !movxtod	%g1,%f2
3742	srl		%l7, 0, %l7		! clruw
3743	prefetch	[%i1 + 63], 22
3744	prefetch	[%i0 + 16+63], 20
3745	.word	0x88cc040e !aes_eround01	%f16,%f14,%f2,%f4
3746	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
3747	call		_aes192_encrypt_1x+8
3748	add		%i0, 16, %i0
3749
3750	.word	0x95b02308 !movxtod	%o0,%f10
3751	.word	0x99b02309 !movxtod	%o1,%f12
3752	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
3753	.word	0x85b30d82 !fxor	%f12,%f2,%f2
3754
3755	brnz,pn		%l2, 2f
3756	sub		%i2, 1, %i2
3757
3758	std		%f0, [%i1 + 0]
3759	std		%f2, [%i1 + 8]
3760	brnz,pt		%i2, .L192_ctr32_loop2x
3761	add		%i1, 16, %i1
3762
3763	ret
3764	restore
3765
3766.align	16
37672:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3768						! and ~3x deterioration
3769						! in inp==out case
3770	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
3771	.word	0x8db00902 !faligndata	%f0,%f2,%f6
3772	.word	0x91b08902 !faligndata	%f2,%f2,%f8
3773	stda		%f4, [%i1 + %l3]0xc0	! partial store
3774	std		%f6, [%i1 + 8]
3775	add		%i1, 16, %i1
3776	orn		%g0, %l3, %l3
3777	stda		%f8, [%i1 + %l3]0xc0	! partial store
3778
3779	brnz,pt		%i2, .L192_ctr32_loop2x+4
3780	orn		%g0, %l3, %l3
3781
3782	ret
3783	restore
3784
3785!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3786.align	32
3787.L192_ctr32_loop2x:
3788	ldx		[%i0 + 0], %o0
3789	ldx		[%i0 + 8], %o1
3790	ldx		[%i0 + 16], %o2
3791	brz,pt		%l0, 4f
3792	ldx		[%i0 + 24], %o3
3793
3794	ldx		[%i0 + 32], %o4
3795	sllx		%o0, %l0, %o0
3796	srlx		%o1, %l1, %g1
3797	or		%g1, %o0, %o0
3798	sllx		%o1, %l0, %o1
3799	srlx		%o2, %l1, %g1
3800	or		%g1, %o1, %o1
3801	sllx		%o2, %l0, %o2
3802	srlx		%o3, %l1, %g1
3803	or		%g1, %o2, %o2
3804	sllx		%o3, %l0, %o3
3805	srlx		%o4, %l1, %o4
3806	or		%o4, %o3, %o3
38074:
3808	xor		%g5, %l7, %g1		! ^= rk[0]
3809	add		%l7, 1, %l7
3810	.word	0x85b02301 !movxtod	%g1,%f2
3811	srl		%l7, 0, %l7		! clruw
3812	xor		%g5, %l7, %g1
3813	add		%l7, 1, %l7
3814	.word	0x8db02301 !movxtod	%g1,%f6
3815	srl		%l7, 0, %l7		! clruw
3816	prefetch	[%i1 + 63], 22
3817	prefetch	[%i0 + 32+63], 20
3818	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
3819	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
3820	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
3821	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
3822	call		_aes192_encrypt_2x+16
3823	add		%i0, 32, %i0
3824
3825	.word	0x91b02308 !movxtod	%o0,%f8
3826	.word	0x95b02309 !movxtod	%o1,%f10
3827	.word	0x99b0230a !movxtod	%o2,%f12
3828	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
3829	.word	0x91b0230b !movxtod	%o3,%f8
3830	.word	0x85b28d82 !fxor	%f10,%f2,%f2
3831	.word	0x89b30d84 !fxor	%f12,%f4,%f4
3832	.word	0x8db20d86 !fxor	%f8,%f6,%f6
3833
3834	brnz,pn		%l2, 2f
3835	sub		%i2, 2, %i2
3836
3837	std		%f0, [%i1 + 0]
3838	std		%f2, [%i1 + 8]
3839	std		%f4, [%i1 + 16]
3840	std		%f6, [%i1 + 24]
3841	brnz,pt		%i2, .L192_ctr32_loop2x
3842	add		%i1, 32, %i1
3843
3844	ret
3845	restore
3846
3847.align	16
38482:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3849						! and ~3x deterioration
3850						! in inp==out case
3851	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
3852	.word	0x81b00902 !faligndata	%f0,%f2,%f0
3853	.word	0x85b08904 !faligndata	%f2,%f4,%f2
3854	.word	0x89b10906 !faligndata	%f4,%f6,%f4
3855	.word	0x8db18906 !faligndata	%f6,%f6,%f6
3856
3857	stda		%f8, [%i1 + %l3]0xc0	! partial store
3858	std		%f0, [%i1 + 8]
3859	std		%f2, [%i1 + 16]
3860	std		%f4, [%i1 + 24]
3861	add		%i1, 32, %i1
3862	orn		%g0, %l3, %l3
3863	stda		%f6, [%i1 + %l3]0xc0	! partial store
3864
3865	brnz,pt		%i2, .L192_ctr32_loop2x+4
3866	orn		%g0, %l3, %l3
3867
3868	ret
3869	restore
3870
3871!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3872.align	32
3873.L192_ctr32_blk:
3874	add	%i1, %i2, %l5
3875	and	%l5, 63, %l5	! tail
3876	sub	%i2, %l5, %i2
3877	add	%l5, 15, %l5	! round up to 16n
3878	srlx	%i2, 4, %i2
3879	srl	%l5, 4, %l5
3880	sub	%i2, 1, %i2
3881	add	%l5, 1, %l5
3882
3883.L192_ctr32_blk_loop2x:
3884	ldx		[%i0 + 0], %o0
3885	ldx		[%i0 + 8], %o1
3886	ldx		[%i0 + 16], %o2
3887	brz,pt		%l0, 5f
3888	ldx		[%i0 + 24], %o3
3889
3890	ldx		[%i0 + 32], %o4
3891	sllx		%o0, %l0, %o0
3892	srlx		%o1, %l1, %g1
3893	or		%g1, %o0, %o0
3894	sllx		%o1, %l0, %o1
3895	srlx		%o2, %l1, %g1
3896	or		%g1, %o1, %o1
3897	sllx		%o2, %l0, %o2
3898	srlx		%o3, %l1, %g1
3899	or		%g1, %o2, %o2
3900	sllx		%o3, %l0, %o3
3901	srlx		%o4, %l1, %o4
3902	or		%o4, %o3, %o3
39035:
3904	xor		%g5, %l7, %g1		! ^= rk[0]
3905	add		%l7, 1, %l7
3906	.word	0x85b02301 !movxtod	%g1,%f2
3907	srl		%l7, 0, %l7		! clruw
3908	xor		%g5, %l7, %g1
3909	add		%l7, 1, %l7
3910	.word	0x8db02301 !movxtod	%g1,%f6
3911	srl		%l7, 0, %l7		! clruw
3912	prefetch	[%i0 + 32+63], 20
3913	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
3914	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
3915	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
3916	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
3917	call		_aes192_encrypt_2x+16
3918	add		%i0, 32, %i0
3919	subcc		%i2, 2, %i2
3920
3921	.word	0x91b02308 !movxtod	%o0,%f8
3922	.word	0x95b02309 !movxtod	%o1,%f10
3923	.word	0x99b0230a !movxtod	%o2,%f12
3924	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
3925	.word	0x91b0230b !movxtod	%o3,%f8
3926	.word	0x85b28d82 !fxor	%f10,%f2,%f2
3927	.word	0x89b30d84 !fxor	%f12,%f4,%f4
3928	.word	0x8db20d86 !fxor	%f8,%f6,%f6
3929
3930	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3931	add		%i1, 8, %i1
3932	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3933	add		%i1, 8, %i1
3934	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3935	add		%i1, 8, %i1
3936	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3937	bgu,pt		SIZE_T_CC, .L192_ctr32_blk_loop2x
3938	add		%i1, 8, %i1
3939
3940	add		%l5, %i2, %i2
3941	andcc		%i2, 1, %g0		! is number of blocks even?
3942	membar		#StoreLoad|#StoreStore
3943	bnz,pt		%icc, .L192_ctr32_loop
3944	srl		%i2, 0, %i2
3945	brnz,pn		%i2, .L192_ctr32_loop2x
3946	nop
3947
3948	ret
3949	restore
3950.type	aes192_t4_ctr32_encrypt,#function
3951.size	aes192_t4_ctr32_encrypt,.-aes192_t4_ctr32_encrypt
3952.globl	aes192_t4_cbc_decrypt
3953.align	32
3954aes192_t4_cbc_decrypt:
3955	save		%sp, -STACK_FRAME, %sp
3956	cmp		%i2, 0
3957	be,pn		SIZE_T_CC, .L192_cbc_dec_abort
3958	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
3959	sub		%i0, %i1, %l5	! %i0!=%i1
3960	ld		[%i4 + 0], %f12	! load ivec
3961	ld		[%i4 + 4], %f13
3962	ld		[%i4 + 8], %f14
3963	ld		[%i4 + 12], %f15
3964	prefetch	[%i0], 20
3965	prefetch	[%i0 + 63], 20
3966	call		_aes192_load_deckey
3967	and		%i0, 7, %l0
3968	andn		%i0, 7, %i0
3969	sll		%l0, 3, %l0
3970	mov		64, %l1
3971	mov		0xff, %l3
3972	sub		%l1, %l0, %l1
3973	and		%i1, 7, %l2
3974	cmp		%i2, 255
3975	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
3976	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
3977	brnz,pn		%l5, .L192cbc_dec_blk	!	%i0==%i1)
3978	srl		%l3, %l2, %l3
3979
3980	andcc		%i2, 16, %g0		! is number of blocks even?
3981	srlx		%i2, 4, %i2
3982	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
3983	bz		%icc, .L192_cbc_dec_loop2x
3984	prefetch	[%i1], 22
3985.L192_cbc_dec_loop:
3986	ldx		[%i0 + 0], %o0
3987	brz,pt		%l0, 4f
3988	ldx		[%i0 + 8], %o1
3989
3990	ldx		[%i0 + 16], %o2
3991	sllx		%o0, %l0, %o0
3992	srlx		%o1, %l1, %g1
3993	sllx		%o1, %l0, %o1
3994	or		%g1, %o0, %o0
3995	srlx		%o2, %l1, %o2
3996	or		%o2, %o1, %o1
39974:
3998	xor		%g4, %o0, %o2		! ^= rk[0]
3999	xor		%g5, %o1, %o3
4000	.word	0x81b0230a !movxtod	%o2,%f0
4001	.word	0x85b0230b !movxtod	%o3,%f2
4002
4003	prefetch	[%i1 + 63], 22
4004	prefetch	[%i0 + 16+63], 20
4005	call		_aes192_decrypt_1x
4006	add		%i0, 16, %i0
4007
4008	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4009	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4010	.word	0x99b02308 !movxtod	%o0,%f12
4011	.word	0x9db02309 !movxtod	%o1,%f14
4012
4013	brnz,pn		%l2, 2f
4014	sub		%i2, 1, %i2
4015
4016	std		%f0, [%i1 + 0]
4017	std		%f2, [%i1 + 8]
4018	brnz,pt		%i2, .L192_cbc_dec_loop2x
4019	add		%i1, 16, %i1
4020	st		%f12, [%i4 + 0]
4021	st		%f13, [%i4 + 4]
4022	st		%f14, [%i4 + 8]
4023	st		%f15, [%i4 + 12]
4024.L192_cbc_dec_abort:
4025	ret
4026	restore
4027
4028.align	16
40292:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4030						! and ~3x deterioration
4031						! in inp==out case
4032	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
4033	.word	0x8db00902 !faligndata	%f0,%f2,%f6
4034	.word	0x91b08902 !faligndata	%f2,%f2,%f8
4035
4036	stda		%f4, [%i1 + %l3]0xc0	! partial store
4037	std		%f6, [%i1 + 8]
4038	add		%i1, 16, %i1
4039	orn		%g0, %l3, %l3
4040	stda		%f8, [%i1 + %l3]0xc0	! partial store
4041
4042	brnz,pt		%i2, .L192_cbc_dec_loop2x+4
4043	orn		%g0, %l3, %l3
4044	st		%f12, [%i4 + 0]
4045	st		%f13, [%i4 + 4]
4046	st		%f14, [%i4 + 8]
4047	st		%f15, [%i4 + 12]
4048	ret
4049	restore
4050
4051!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4052.align	32
4053.L192_cbc_dec_loop2x:
4054	ldx		[%i0 + 0], %o0
4055	ldx		[%i0 + 8], %o1
4056	ldx		[%i0 + 16], %o2
4057	brz,pt		%l0, 4f
4058	ldx		[%i0 + 24], %o3
4059
4060	ldx		[%i0 + 32], %o4
4061	sllx		%o0, %l0, %o0
4062	srlx		%o1, %l1, %g1
4063	or		%g1, %o0, %o0
4064	sllx		%o1, %l0, %o1
4065	srlx		%o2, %l1, %g1
4066	or		%g1, %o1, %o1
4067	sllx		%o2, %l0, %o2
4068	srlx		%o3, %l1, %g1
4069	or		%g1, %o2, %o2
4070	sllx		%o3, %l0, %o3
4071	srlx		%o4, %l1, %o4
4072	or		%o4, %o3, %o3
40734:
4074	xor		%g4, %o0, %o4		! ^= rk[0]
4075	xor		%g5, %o1, %o5
4076	.word	0x81b0230c !movxtod	%o4,%f0
4077	.word	0x85b0230d !movxtod	%o5,%f2
4078	xor		%g4, %o2, %o4
4079	xor		%g5, %o3, %o5
4080	.word	0x89b0230c !movxtod	%o4,%f4
4081	.word	0x8db0230d !movxtod	%o5,%f6
4082
4083	prefetch	[%i1 + 63], 22
4084	prefetch	[%i0 + 32+63], 20
4085	call		_aes192_decrypt_2x
4086	add		%i0, 32, %i0
4087
4088	.word	0x91b02308 !movxtod	%o0,%f8
4089	.word	0x95b02309 !movxtod	%o1,%f10
4090	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4091	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4092	.word	0x99b0230a !movxtod	%o2,%f12
4093	.word	0x9db0230b !movxtod	%o3,%f14
4094	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4095	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4096
4097	brnz,pn		%l2, 2f
4098	sub		%i2, 2, %i2
4099
4100	std		%f0, [%i1 + 0]
4101	std		%f2, [%i1 + 8]
4102	std		%f4, [%i1 + 16]
4103	std		%f6, [%i1 + 24]
4104	brnz,pt		%i2, .L192_cbc_dec_loop2x
4105	add		%i1, 32, %i1
4106	st		%f12, [%i4 + 0]
4107	st		%f13, [%i4 + 4]
4108	st		%f14, [%i4 + 8]
4109	st		%f15, [%i4 + 12]
4110	ret
4111	restore
4112
4113.align	16
41142:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4115						! and ~3x deterioration
4116						! in inp==out case
4117	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
4118	.word	0x81b00902 !faligndata	%f0,%f2,%f0
4119	.word	0x85b08904 !faligndata	%f2,%f4,%f2
4120	.word	0x89b10906 !faligndata	%f4,%f6,%f4
4121	.word	0x8db18906 !faligndata	%f6,%f6,%f6
4122	stda		%f8, [%i1 + %l3]0xc0	! partial store
4123	std		%f0, [%i1 + 8]
4124	std		%f2, [%i1 + 16]
4125	std		%f4, [%i1 + 24]
4126	add		%i1, 32, %i1
4127	orn		%g0, %l3, %l3
4128	stda		%f6, [%i1 + %l3]0xc0	! partial store
4129
4130	brnz,pt		%i2, .L192_cbc_dec_loop2x+4
4131	orn		%g0, %l3, %l3
4132	st		%f12, [%i4 + 0]
4133	st		%f13, [%i4 + 4]
4134	st		%f14, [%i4 + 8]
4135	st		%f15, [%i4 + 12]
4136	ret
4137	restore
4138
4139!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4140.align	32
4141.L192cbc_dec_blk:
4142	add	%i1, %i2, %l5
4143	and	%l5, 63, %l5	! tail
4144	sub	%i2, %l5, %i2
4145	add	%l5, 15, %l5	! round up to 16n
4146	srlx	%i2, 4, %i2
4147	srl	%l5, 4, %l5
4148	sub	%i2, 1, %i2
4149	add	%l5, 1, %l5
4150
4151.L192_cbc_dec_blk_loop2x:
4152	ldx		[%i0 + 0], %o0
4153	ldx		[%i0 + 8], %o1
4154	ldx		[%i0 + 16], %o2
4155	brz,pt		%l0, 5f
4156	ldx		[%i0 + 24], %o3
4157
4158	ldx		[%i0 + 32], %o4
4159	sllx		%o0, %l0, %o0
4160	srlx		%o1, %l1, %g1
4161	or		%g1, %o0, %o0
4162	sllx		%o1, %l0, %o1
4163	srlx		%o2, %l1, %g1
4164	or		%g1, %o1, %o1
4165	sllx		%o2, %l0, %o2
4166	srlx		%o3, %l1, %g1
4167	or		%g1, %o2, %o2
4168	sllx		%o3, %l0, %o3
4169	srlx		%o4, %l1, %o4
4170	or		%o4, %o3, %o3
41715:
4172	xor		%g4, %o0, %o4		! ^= rk[0]
4173	xor		%g5, %o1, %o5
4174	.word	0x81b0230c !movxtod	%o4,%f0
4175	.word	0x85b0230d !movxtod	%o5,%f2
4176	xor		%g4, %o2, %o4
4177	xor		%g5, %o3, %o5
4178	.word	0x89b0230c !movxtod	%o4,%f4
4179	.word	0x8db0230d !movxtod	%o5,%f6
4180
4181	prefetch	[%i0 + 32+63], 20
4182	call		_aes192_decrypt_2x
4183	add		%i0, 32, %i0
4184	subcc		%i2, 2, %i2
4185
4186	.word	0x91b02308 !movxtod	%o0,%f8
4187	.word	0x95b02309 !movxtod	%o1,%f10
4188	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4189	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4190	.word	0x99b0230a !movxtod	%o2,%f12
4191	.word	0x9db0230b !movxtod	%o3,%f14
4192	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4193	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4194
4195	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4196	add		%i1, 8, %i1
4197	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4198	add		%i1, 8, %i1
4199	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4200	add		%i1, 8, %i1
4201	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4202	bgu,pt		SIZE_T_CC, .L192_cbc_dec_blk_loop2x
4203	add		%i1, 8, %i1
4204
4205	add		%l5, %i2, %i2
4206	andcc		%i2, 1, %g0		! is number of blocks even?
4207	membar		#StoreLoad|#StoreStore
4208	bnz,pt		%icc, .L192_cbc_dec_loop
4209	srl		%i2, 0, %i2
4210	brnz,pn		%i2, .L192_cbc_dec_loop2x
4211	nop
4212	st		%f12, [%i4 + 0]	! write out ivec
4213	st		%f13, [%i4 + 4]
4214	st		%f14, [%i4 + 8]
4215	st		%f15, [%i4 + 12]
4216	ret
4217	restore
4218.type	aes192_t4_cbc_decrypt,#function
4219.size	aes192_t4_cbc_decrypt,.-aes192_t4_cbc_decrypt
4220.globl	aes256_t4_cbc_decrypt
4221.align	32
4222aes256_t4_cbc_decrypt:
4223	save		%sp, -STACK_FRAME, %sp
4224	cmp		%i2, 0
4225	be,pn		SIZE_T_CC, .L256_cbc_dec_abort
4226	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
4227	sub		%i0, %i1, %l5	! %i0!=%i1
4228	ld		[%i4 + 0], %f12	! load ivec
4229	ld		[%i4 + 4], %f13
4230	ld		[%i4 + 8], %f14
4231	ld		[%i4 + 12], %f15
4232	prefetch	[%i0], 20
4233	prefetch	[%i0 + 63], 20
4234	call		_aes256_load_deckey
4235	and		%i0, 7, %l0
4236	andn		%i0, 7, %i0
4237	sll		%l0, 3, %l0
4238	mov		64, %l1
4239	mov		0xff, %l3
4240	sub		%l1, %l0, %l1
4241	and		%i1, 7, %l2
4242	cmp		%i2, 255
4243	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
4244	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
4245	brnz,pn		%l5, .L256cbc_dec_blk	!	%i0==%i1)
4246	srl		%l3, %l2, %l3
4247
4248	andcc		%i2, 16, %g0		! is number of blocks even?
4249	srlx		%i2, 4, %i2
4250	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
4251	bz		%icc, .L256_cbc_dec_loop2x
4252	prefetch	[%i1], 22
4253.L256_cbc_dec_loop:
4254	ldx		[%i0 + 0], %o0
4255	brz,pt		%l0, 4f
4256	ldx		[%i0 + 8], %o1
4257
4258	ldx		[%i0 + 16], %o2
4259	sllx		%o0, %l0, %o0
4260	srlx		%o1, %l1, %g1
4261	sllx		%o1, %l0, %o1
4262	or		%g1, %o0, %o0
4263	srlx		%o2, %l1, %o2
4264	or		%o2, %o1, %o1
42654:
4266	xor		%g4, %o0, %o2		! ^= rk[0]
4267	xor		%g5, %o1, %o3
4268	.word	0x81b0230a !movxtod	%o2,%f0
4269	.word	0x85b0230b !movxtod	%o3,%f2
4270
4271	prefetch	[%i1 + 63], 22
4272	prefetch	[%i0 + 16+63], 20
4273	call		_aes256_decrypt_1x
4274	add		%i0, 16, %i0
4275
4276	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4277	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4278	.word	0x99b02308 !movxtod	%o0,%f12
4279	.word	0x9db02309 !movxtod	%o1,%f14
4280
4281	brnz,pn		%l2, 2f
4282	sub		%i2, 1, %i2
4283
4284	std		%f0, [%i1 + 0]
4285	std		%f2, [%i1 + 8]
4286	brnz,pt		%i2, .L256_cbc_dec_loop2x
4287	add		%i1, 16, %i1
4288	st		%f12, [%i4 + 0]
4289	st		%f13, [%i4 + 4]
4290	st		%f14, [%i4 + 8]
4291	st		%f15, [%i4 + 12]
4292.L256_cbc_dec_abort:
4293	ret
4294	restore
4295
4296.align	16
42972:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4298						! and ~3x deterioration
4299						! in inp==out case
4300	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
4301	.word	0x8db00902 !faligndata	%f0,%f2,%f6
4302	.word	0x91b08902 !faligndata	%f2,%f2,%f8
4303
4304	stda		%f4, [%i1 + %l3]0xc0	! partial store
4305	std		%f6, [%i1 + 8]
4306	add		%i1, 16, %i1
4307	orn		%g0, %l3, %l3
4308	stda		%f8, [%i1 + %l3]0xc0	! partial store
4309
4310	brnz,pt		%i2, .L256_cbc_dec_loop2x+4
4311	orn		%g0, %l3, %l3
4312	st		%f12, [%i4 + 0]
4313	st		%f13, [%i4 + 4]
4314	st		%f14, [%i4 + 8]
4315	st		%f15, [%i4 + 12]
4316	ret
4317	restore
4318
4319!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4320.align	32
4321.L256_cbc_dec_loop2x:
4322	ldx		[%i0 + 0], %o0
4323	ldx		[%i0 + 8], %o1
4324	ldx		[%i0 + 16], %o2
4325	brz,pt		%l0, 4f
4326	ldx		[%i0 + 24], %o3
4327
4328	ldx		[%i0 + 32], %o4
4329	sllx		%o0, %l0, %o0
4330	srlx		%o1, %l1, %g1
4331	or		%g1, %o0, %o0
4332	sllx		%o1, %l0, %o1
4333	srlx		%o2, %l1, %g1
4334	or		%g1, %o1, %o1
4335	sllx		%o2, %l0, %o2
4336	srlx		%o3, %l1, %g1
4337	or		%g1, %o2, %o2
4338	sllx		%o3, %l0, %o3
4339	srlx		%o4, %l1, %o4
4340	or		%o4, %o3, %o3
43414:
4342	xor		%g4, %o0, %o4		! ^= rk[0]
4343	xor		%g5, %o1, %o5
4344	.word	0x81b0230c !movxtod	%o4,%f0
4345	.word	0x85b0230d !movxtod	%o5,%f2
4346	xor		%g4, %o2, %o4
4347	xor		%g5, %o3, %o5
4348	.word	0x89b0230c !movxtod	%o4,%f4
4349	.word	0x8db0230d !movxtod	%o5,%f6
4350
4351	prefetch	[%i1 + 63], 22
4352	prefetch	[%i0 + 32+63], 20
4353	call		_aes256_decrypt_2x
4354	add		%i0, 32, %i0
4355
4356	.word	0x91b02308 !movxtod	%o0,%f8
4357	.word	0x95b02309 !movxtod	%o1,%f10
4358	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4359	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4360	.word	0x99b0230a !movxtod	%o2,%f12
4361	.word	0x9db0230b !movxtod	%o3,%f14
4362	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4363	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4364
4365	brnz,pn		%l2, 2f
4366	sub		%i2, 2, %i2
4367
4368	std		%f0, [%i1 + 0]
4369	std		%f2, [%i1 + 8]
4370	std		%f4, [%i1 + 16]
4371	std		%f6, [%i1 + 24]
4372	brnz,pt		%i2, .L256_cbc_dec_loop2x
4373	add		%i1, 32, %i1
4374	st		%f12, [%i4 + 0]
4375	st		%f13, [%i4 + 4]
4376	st		%f14, [%i4 + 8]
4377	st		%f15, [%i4 + 12]
4378	ret
4379	restore
4380
4381.align	16
43822:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4383						! and ~3x deterioration
4384						! in inp==out case
4385	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
4386	.word	0x81b00902 !faligndata	%f0,%f2,%f0
4387	.word	0x85b08904 !faligndata	%f2,%f4,%f2
4388	.word	0x89b10906 !faligndata	%f4,%f6,%f4
4389	.word	0x8db18906 !faligndata	%f6,%f6,%f6
4390	stda		%f8, [%i1 + %l3]0xc0	! partial store
4391	std		%f0, [%i1 + 8]
4392	std		%f2, [%i1 + 16]
4393	std		%f4, [%i1 + 24]
4394	add		%i1, 32, %i1
4395	orn		%g0, %l3, %l3
4396	stda		%f6, [%i1 + %l3]0xc0	! partial store
4397
4398	brnz,pt		%i2, .L256_cbc_dec_loop2x+4
4399	orn		%g0, %l3, %l3
4400	st		%f12, [%i4 + 0]
4401	st		%f13, [%i4 + 4]
4402	st		%f14, [%i4 + 8]
4403	st		%f15, [%i4 + 12]
4404	ret
4405	restore
4406
4407!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4408.align	32
4409.L256cbc_dec_blk:
4410	add	%i1, %i2, %l5
4411	and	%l5, 63, %l5	! tail
4412	sub	%i2, %l5, %i2
4413	add	%l5, 15, %l5	! round up to 16n
4414	srlx	%i2, 4, %i2
4415	srl	%l5, 4, %l5
4416	sub	%i2, 1, %i2
4417	add	%l5, 1, %l5
4418
4419.L256_cbc_dec_blk_loop2x:
4420	ldx		[%i0 + 0], %o0
4421	ldx		[%i0 + 8], %o1
4422	ldx		[%i0 + 16], %o2
4423	brz,pt		%l0, 5f
4424	ldx		[%i0 + 24], %o3
4425
4426	ldx		[%i0 + 32], %o4
4427	sllx		%o0, %l0, %o0
4428	srlx		%o1, %l1, %g1
4429	or		%g1, %o0, %o0
4430	sllx		%o1, %l0, %o1
4431	srlx		%o2, %l1, %g1
4432	or		%g1, %o1, %o1
4433	sllx		%o2, %l0, %o2
4434	srlx		%o3, %l1, %g1
4435	or		%g1, %o2, %o2
4436	sllx		%o3, %l0, %o3
4437	srlx		%o4, %l1, %o4
4438	or		%o4, %o3, %o3
44395:
4440	xor		%g4, %o0, %o4		! ^= rk[0]
4441	xor		%g5, %o1, %o5
4442	.word	0x81b0230c !movxtod	%o4,%f0
4443	.word	0x85b0230d !movxtod	%o5,%f2
4444	xor		%g4, %o2, %o4
4445	xor		%g5, %o3, %o5
4446	.word	0x89b0230c !movxtod	%o4,%f4
4447	.word	0x8db0230d !movxtod	%o5,%f6
4448
4449	prefetch	[%i0 + 32+63], 20
4450	call		_aes256_decrypt_2x
4451	add		%i0, 32, %i0
4452	subcc		%i2, 2, %i2
4453
4454	.word	0x91b02308 !movxtod	%o0,%f8
4455	.word	0x95b02309 !movxtod	%o1,%f10
4456	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4457	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4458	.word	0x99b0230a !movxtod	%o2,%f12
4459	.word	0x9db0230b !movxtod	%o3,%f14
4460	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4461	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4462
4463	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4464	add		%i1, 8, %i1
4465	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4466	add		%i1, 8, %i1
4467	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4468	add		%i1, 8, %i1
4469	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4470	bgu,pt		SIZE_T_CC, .L256_cbc_dec_blk_loop2x
4471	add		%i1, 8, %i1
4472
4473	add		%l5, %i2, %i2
4474	andcc		%i2, 1, %g0		! is number of blocks even?
4475	membar		#StoreLoad|#StoreStore
4476	bnz,pt		%icc, .L256_cbc_dec_loop
4477	srl		%i2, 0, %i2
4478	brnz,pn		%i2, .L256_cbc_dec_loop2x
4479	nop
4480	st		%f12, [%i4 + 0]	! write out ivec
4481	st		%f13, [%i4 + 4]
4482	st		%f14, [%i4 + 8]
4483	st		%f15, [%i4 + 12]
4484	ret
4485	restore
4486.type	aes256_t4_cbc_decrypt,#function
4487.size	aes256_t4_cbc_decrypt,.-aes256_t4_cbc_decrypt
4488.align	32
4489_aes256_decrypt_1x:
4490	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
4491	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4492	ldd		[%i3 + 208], %f16
4493	ldd		[%i3 + 216], %f18
4494	.word	0x80cd0444 !aes_dround01	%f20,%f4,%f2,%f0
4495	.word	0x84cd8464 !aes_dround23	%f22,%f4,%f2,%f2
4496	ldd		[%i3 + 224], %f20
4497	ldd		[%i3 + 232], %f22
4498	.word	0x88ce0440 !aes_dround01	%f24,%f0,%f2,%f4
4499	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4500	.word	0x80cf0444 !aes_dround01	%f28,%f4,%f2,%f0
4501	.word	0x84cf8464 !aes_dround23	%f30,%f4,%f2,%f2
4502	.word	0x88c84440 !aes_dround01	%f32,%f0,%f2,%f4
4503	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4504	.word	0x80c94444 !aes_dround01	%f36,%f4,%f2,%f0
4505	.word	0x84c9c464 !aes_dround23	%f38,%f4,%f2,%f2
4506	.word	0x88ca4440 !aes_dround01	%f40,%f0,%f2,%f4
4507	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4508	.word	0x80cb4444 !aes_dround01	%f44,%f4,%f2,%f0
4509	.word	0x84cbc464 !aes_dround23	%f46,%f4,%f2,%f2
4510	.word	0x88cc4440 !aes_dround01	%f48,%f0,%f2,%f4
4511	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4512	.word	0x80cd4444 !aes_dround01	%f52,%f4,%f2,%f0
4513	.word	0x84cdc464 !aes_dround23	%f54,%f4,%f2,%f2
4514	.word	0x88ce4440 !aes_dround01	%f56,%f0,%f2,%f4
4515	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4516	.word	0x80cf4444 !aes_dround01	%f60,%f4,%f2,%f0
4517	.word	0x84cfc464 !aes_dround23	%f62,%f4,%f2,%f2
4518	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
4519	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4520	ldd		[%i3 + 16], %f16
4521	ldd		[%i3 + 24], %f18
4522	.word	0x80cd04c4 !aes_dround01_l	%f20,%f4,%f2,%f0
4523	.word	0x84cd84e4 !aes_dround23_l	%f22,%f4,%f2,%f2
4524	ldd		[%i3 + 32], %f20
4525	retl
4526	ldd		[%i3 + 40], %f22
4527.type	_aes256_decrypt_1x,#function
4528.size	_aes256_decrypt_1x,.-_aes256_decrypt_1x
4529
4530.align	32
4531_aes256_decrypt_2x:
4532	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
4533	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4534	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
4535	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
4536	ldd		[%i3 + 208], %f16
4537	ldd		[%i3 + 216], %f18
4538	.word	0x80cd0448 !aes_dround01	%f20,%f8,%f2,%f0
4539	.word	0x84cd8468 !aes_dround23	%f22,%f8,%f2,%f2
4540	.word	0x88cd0c4a !aes_dround01	%f20,%f10,%f6,%f4
4541	.word	0x8ccd8c6a !aes_dround23	%f22,%f10,%f6,%f6
4542	ldd		[%i3 + 224], %f20
4543	ldd		[%i3 + 232], %f22
4544	.word	0x90ce0440 !aes_dround01	%f24,%f0,%f2,%f8
4545	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4546	.word	0x94ce0c44 !aes_dround01	%f24,%f4,%f6,%f10
4547	.word	0x8cce8c64 !aes_dround23	%f26,%f4,%f6,%f6
4548	.word	0x80cf0448 !aes_dround01	%f28,%f8,%f2,%f0
4549	.word	0x84cf8468 !aes_dround23	%f30,%f8,%f2,%f2
4550	.word	0x88cf0c4a !aes_dround01	%f28,%f10,%f6,%f4
4551	.word	0x8ccf8c6a !aes_dround23	%f30,%f10,%f6,%f6
4552	.word	0x90c84440 !aes_dround01	%f32,%f0,%f2,%f8
4553	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4554	.word	0x94c84c44 !aes_dround01	%f32,%f4,%f6,%f10
4555	.word	0x8cc8cc64 !aes_dround23	%f34,%f4,%f6,%f6
4556	.word	0x80c94448 !aes_dround01	%f36,%f8,%f2,%f0
4557	.word	0x84c9c468 !aes_dround23	%f38,%f8,%f2,%f2
4558	.word	0x88c94c4a !aes_dround01	%f36,%f10,%f6,%f4
4559	.word	0x8cc9cc6a !aes_dround23	%f38,%f10,%f6,%f6
4560	.word	0x90ca4440 !aes_dround01	%f40,%f0,%f2,%f8
4561	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4562	.word	0x94ca4c44 !aes_dround01	%f40,%f4,%f6,%f10
4563	.word	0x8ccacc64 !aes_dround23	%f42,%f4,%f6,%f6
4564	.word	0x80cb4448 !aes_dround01	%f44,%f8,%f2,%f0
4565	.word	0x84cbc468 !aes_dround23	%f46,%f8,%f2,%f2
4566	.word	0x88cb4c4a !aes_dround01	%f44,%f10,%f6,%f4
4567	.word	0x8ccbcc6a !aes_dround23	%f46,%f10,%f6,%f6
4568	.word	0x90cc4440 !aes_dround01	%f48,%f0,%f2,%f8
4569	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4570	.word	0x94cc4c44 !aes_dround01	%f48,%f4,%f6,%f10
4571	.word	0x8ccccc64 !aes_dround23	%f50,%f4,%f6,%f6
4572	.word	0x80cd4448 !aes_dround01	%f52,%f8,%f2,%f0
4573	.word	0x84cdc468 !aes_dround23	%f54,%f8,%f2,%f2
4574	.word	0x88cd4c4a !aes_dround01	%f52,%f10,%f6,%f4
4575	.word	0x8ccdcc6a !aes_dround23	%f54,%f10,%f6,%f6
4576	.word	0x90ce4440 !aes_dround01	%f56,%f0,%f2,%f8
4577	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4578	.word	0x94ce4c44 !aes_dround01	%f56,%f4,%f6,%f10
4579	.word	0x8ccecc64 !aes_dround23	%f58,%f4,%f6,%f6
4580	.word	0x80cf4448 !aes_dround01	%f60,%f8,%f2,%f0
4581	.word	0x84cfc468 !aes_dround23	%f62,%f8,%f2,%f2
4582	.word	0x88cf4c4a !aes_dround01	%f60,%f10,%f6,%f4
4583	.word	0x8ccfcc6a !aes_dround23	%f62,%f10,%f6,%f6
4584	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
4585	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4586	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
4587	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
4588	ldd		[%i3 + 16], %f16
4589	ldd		[%i3 + 24], %f18
4590	.word	0x80cd04c8 !aes_dround01_l	%f20,%f8,%f2,%f0
4591	.word	0x84cd84e8 !aes_dround23_l	%f22,%f8,%f2,%f2
4592	.word	0x88cd0cca !aes_dround01_l	%f20,%f10,%f6,%f4
4593	.word	0x8ccd8cea !aes_dround23_l	%f22,%f10,%f6,%f6
4594	ldd		[%i3 + 32], %f20
4595	retl
4596	ldd		[%i3 + 40], %f22
4597.type	_aes256_decrypt_2x,#function
4598.size	_aes256_decrypt_2x,.-_aes256_decrypt_2x
4599
4600.align	32
4601_aes192_decrypt_1x:
4602	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
4603	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4604	.word	0x80cd0444 !aes_dround01	%f20,%f4,%f2,%f0
4605	.word	0x84cd8464 !aes_dround23	%f22,%f4,%f2,%f2
4606	.word	0x88ce0440 !aes_dround01	%f24,%f0,%f2,%f4
4607	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4608	.word	0x80cf0444 !aes_dround01	%f28,%f4,%f2,%f0
4609	.word	0x84cf8464 !aes_dround23	%f30,%f4,%f2,%f2
4610	.word	0x88c84440 !aes_dround01	%f32,%f0,%f2,%f4
4611	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4612	.word	0x80c94444 !aes_dround01	%f36,%f4,%f2,%f0
4613	.word	0x84c9c464 !aes_dround23	%f38,%f4,%f2,%f2
4614	.word	0x88ca4440 !aes_dround01	%f40,%f0,%f2,%f4
4615	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4616	.word	0x80cb4444 !aes_dround01	%f44,%f4,%f2,%f0
4617	.word	0x84cbc464 !aes_dround23	%f46,%f4,%f2,%f2
4618	.word	0x88cc4440 !aes_dround01	%f48,%f0,%f2,%f4
4619	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4620	.word	0x80cd4444 !aes_dround01	%f52,%f4,%f2,%f0
4621	.word	0x84cdc464 !aes_dround23	%f54,%f4,%f2,%f2
4622	.word	0x88ce4440 !aes_dround01	%f56,%f0,%f2,%f4
4623	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4624	.word	0x80cf44c4 !aes_dround01_l	%f60,%f4,%f2,%f0
4625	retl
4626	.word	0x84cfc4e4 !aes_dround23_l	%f62,%f4,%f2,%f2
4627.type	_aes192_decrypt_1x,#function
4628.size	_aes192_decrypt_1x,.-_aes192_decrypt_1x
4629
4630.align	32
4631_aes192_decrypt_2x:
4632	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
4633	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4634	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
4635	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
4636	.word	0x80cd0448 !aes_dround01	%f20,%f8,%f2,%f0
4637	.word	0x84cd8468 !aes_dround23	%f22,%f8,%f2,%f2
4638	.word	0x88cd0c4a !aes_dround01	%f20,%f10,%f6,%f4
4639	.word	0x8ccd8c6a !aes_dround23	%f22,%f10,%f6,%f6
4640	.word	0x90ce0440 !aes_dround01	%f24,%f0,%f2,%f8
4641	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4642	.word	0x94ce0c44 !aes_dround01	%f24,%f4,%f6,%f10
4643	.word	0x8cce8c64 !aes_dround23	%f26,%f4,%f6,%f6
4644	.word	0x80cf0448 !aes_dround01	%f28,%f8,%f2,%f0
4645	.word	0x84cf8468 !aes_dround23	%f30,%f8,%f2,%f2
4646	.word	0x88cf0c4a !aes_dround01	%f28,%f10,%f6,%f4
4647	.word	0x8ccf8c6a !aes_dround23	%f30,%f10,%f6,%f6
4648	.word	0x90c84440 !aes_dround01	%f32,%f0,%f2,%f8
4649	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4650	.word	0x94c84c44 !aes_dround01	%f32,%f4,%f6,%f10
4651	.word	0x8cc8cc64 !aes_dround23	%f34,%f4,%f6,%f6
4652	.word	0x80c94448 !aes_dround01	%f36,%f8,%f2,%f0
4653	.word	0x84c9c468 !aes_dround23	%f38,%f8,%f2,%f2
4654	.word	0x88c94c4a !aes_dround01	%f36,%f10,%f6,%f4
4655	.word	0x8cc9cc6a !aes_dround23	%f38,%f10,%f6,%f6
4656	.word	0x90ca4440 !aes_dround01	%f40,%f0,%f2,%f8
4657	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4658	.word	0x94ca4c44 !aes_dround01	%f40,%f4,%f6,%f10
4659	.word	0x8ccacc64 !aes_dround23	%f42,%f4,%f6,%f6
4660	.word	0x80cb4448 !aes_dround01	%f44,%f8,%f2,%f0
4661	.word	0x84cbc468 !aes_dround23	%f46,%f8,%f2,%f2
4662	.word	0x88cb4c4a !aes_dround01	%f44,%f10,%f6,%f4
4663	.word	0x8ccbcc6a !aes_dround23	%f46,%f10,%f6,%f6
4664	.word	0x90cc4440 !aes_dround01	%f48,%f0,%f2,%f8
4665	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4666	.word	0x94cc4c44 !aes_dround01	%f48,%f4,%f6,%f10
4667	.word	0x8ccccc64 !aes_dround23	%f50,%f4,%f6,%f6
4668	.word	0x80cd4448 !aes_dround01	%f52,%f8,%f2,%f0
4669	.word	0x84cdc468 !aes_dround23	%f54,%f8,%f2,%f2
4670	.word	0x88cd4c4a !aes_dround01	%f52,%f10,%f6,%f4
4671	.word	0x8ccdcc6a !aes_dround23	%f54,%f10,%f6,%f6
4672	.word	0x90ce4440 !aes_dround01	%f56,%f0,%f2,%f8
4673	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4674	.word	0x94ce4c44 !aes_dround01	%f56,%f4,%f6,%f10
4675	.word	0x8ccecc64 !aes_dround23	%f58,%f4,%f6,%f6
4676	.word	0x80cf44c8 !aes_dround01_l	%f60,%f8,%f2,%f0
4677	.word	0x84cfc4e8 !aes_dround23_l	%f62,%f8,%f2,%f2
4678	.word	0x88cf4cca !aes_dround01_l	%f60,%f10,%f6,%f4
4679	retl
4680	.word	0x8ccfccea !aes_dround23_l	%f62,%f10,%f6,%f6
4681.type	_aes192_decrypt_2x,#function
4682.size	_aes192_decrypt_2x,.-_aes192_decrypt_2x
4683.asciz	"AES for SPARC T4, David S. Miller, Andy Polyakov"
4684.align	4
4685