1#include "sparc_arch.h"
2
3#ifdef	__arch64__
4.register	%g2,#scratch
5.register	%g3,#scratch
6#endif
7
8.text
9
10.globl	aes_t4_encrypt
11.align	32
12aes_t4_encrypt:
13	andcc		%o0, 7, %g1		! is input aligned?
14	andn		%o0, 7, %o0
15
16	ldx		[%o2 + 0], %g4
17	ldx		[%o2 + 8], %g5
18
19	ldx		[%o0 + 0], %o4
20	bz,pt		%icc, 1f
21	ldx		[%o0 + 8], %o5
22	ldx		[%o0 + 16], %o0
23	sll		%g1, 3, %g1
24	sub		%g0, %g1, %o3
25	sllx		%o4, %g1, %o4
26	sllx		%o5, %g1, %g1
27	srlx		%o5, %o3, %o5
28	srlx		%o0, %o3, %o3
29	or		%o5, %o4, %o4
30	or		%o3, %g1, %o5
311:
32	ld		[%o2 + 240], %o3
33	ldd		[%o2 + 16], %f12
34	ldd		[%o2 + 24], %f14
35	xor		%g4, %o4, %o4
36	xor		%g5, %o5, %o5
37	.word	0x81b0230c !movxtod	%o4,%f0
38	.word	0x85b0230d !movxtod	%o5,%f2
39	srl		%o3, 1, %o3
40	ldd		[%o2 + 32], %f16
41	sub		%o3, 1, %o3
42	ldd		[%o2 + 40], %f18
43	add		%o2, 48, %o2
44
45.Lenc:
46	.word	0x88cb0400 !aes_eround01	%f12,%f0,%f2,%f4
47	.word	0x84cb8420 !aes_eround23	%f14,%f0,%f2,%f2
48	ldd		[%o2 + 0], %f12
49	ldd		[%o2 + 8], %f14
50	sub		%o3,1,%o3
51	.word	0x80cc0404 !aes_eround01	%f16,%f4,%f2,%f0
52	.word	0x84cc8424 !aes_eround23	%f18,%f4,%f2,%f2
53	ldd		[%o2 + 16], %f16
54	ldd		[%o2 + 24], %f18
55	brnz,pt		%o3, .Lenc
56	add		%o2, 32, %o2
57
58	andcc		%o1, 7, %o4		! is output aligned?
59	.word	0x88cb0400 !aes_eround01	%f12,%f0,%f2,%f4
60	.word	0x84cb8420 !aes_eround23	%f14,%f0,%f2,%f2
61	.word	0x80cc0484 !aes_eround01_l	%f16,%f4,%f2,%f0
62	.word	0x84cc84a4 !aes_eround23_l	%f18,%f4,%f2,%f2
63
64	bnz,pn		%icc, 2f
65	nop
66
67	std		%f0, [%o1 + 0]
68	retl
69	std		%f2, [%o1 + 8]
70
712:	.word	0x93b24340 !alignaddrl	%o1,%g0,%o1
72	mov		0xff, %o5
73	srl		%o5, %o4, %o5
74
75	.word	0x89b00900 !faligndata	%f0,%f0,%f4
76	.word	0x8db00902 !faligndata	%f0,%f2,%f6
77	.word	0x91b08902 !faligndata	%f2,%f2,%f8
78
79	stda		%f4, [%o1 + %o5]0xc0	! partial store
80	std		%f6, [%o1 + 8]
81	add		%o1, 16, %o1
82	orn		%g0, %o5, %o5
83	retl
84	stda		%f8, [%o1 + %o5]0xc0	! partial store
85.type	aes_t4_encrypt,#function
86.size	aes_t4_encrypt,.-aes_t4_encrypt
87
88.globl	aes_t4_decrypt
89.align	32
90aes_t4_decrypt:
91	andcc		%o0, 7, %g1		! is input aligned?
92	andn		%o0, 7, %o0
93
94	ldx		[%o2 + 0], %g4
95	ldx		[%o2 + 8], %g5
96
97	ldx		[%o0 + 0], %o4
98	bz,pt		%icc, 1f
99	ldx		[%o0 + 8], %o5
100	ldx		[%o0 + 16], %o0
101	sll		%g1, 3, %g1
102	sub		%g0, %g1, %o3
103	sllx		%o4, %g1, %o4
104	sllx		%o5, %g1, %g1
105	srlx		%o5, %o3, %o5
106	srlx		%o0, %o3, %o3
107	or		%o5, %o4, %o4
108	or		%o3, %g1, %o5
1091:
110	ld		[%o2 + 240], %o3
111	ldd		[%o2 + 16], %f12
112	ldd		[%o2 + 24], %f14
113	xor		%g4, %o4, %o4
114	xor		%g5, %o5, %o5
115	.word	0x81b0230c !movxtod	%o4,%f0
116	.word	0x85b0230d !movxtod	%o5,%f2
117	srl		%o3, 1, %o3
118	ldd		[%o2 + 32], %f16
119	sub		%o3, 1, %o3
120	ldd		[%o2 + 40], %f18
121	add		%o2, 48, %o2
122
123.Ldec:
124	.word	0x88cb0440 !aes_dround01	%f12,%f0,%f2,%f4
125	.word	0x84cb8460 !aes_dround23	%f14,%f0,%f2,%f2
126	ldd		[%o2 + 0], %f12
127	ldd		[%o2 + 8], %f14
128	sub		%o3,1,%o3
129	.word	0x80cc0444 !aes_dround01	%f16,%f4,%f2,%f0
130	.word	0x84cc8464 !aes_dround23	%f18,%f4,%f2,%f2
131	ldd		[%o2 + 16], %f16
132	ldd		[%o2 + 24], %f18
133	brnz,pt		%o3, .Ldec
134	add		%o2, 32, %o2
135
136	andcc		%o1, 7, %o4		! is output aligned?
137	.word	0x88cb0440 !aes_dround01	%f12,%f0,%f2,%f4
138	.word	0x84cb8460 !aes_dround23	%f14,%f0,%f2,%f2
139	.word	0x80cc04c4 !aes_dround01_l	%f16,%f4,%f2,%f0
140	.word	0x84cc84e4 !aes_dround23_l	%f18,%f4,%f2,%f2
141
142	bnz,pn		%icc, 2f
143	nop
144
145	std		%f0, [%o1 + 0]
146	retl
147	std		%f2, [%o1 + 8]
148
1492:	.word	0x93b24340 !alignaddrl	%o1,%g0,%o1
150	mov		0xff, %o5
151	srl		%o5, %o4, %o5
152
153	.word	0x89b00900 !faligndata	%f0,%f0,%f4
154	.word	0x8db00902 !faligndata	%f0,%f2,%f6
155	.word	0x91b08902 !faligndata	%f2,%f2,%f8
156
157	stda		%f4, [%o1 + %o5]0xc0	! partial store
158	std		%f6, [%o1 + 8]
159	add		%o1, 16, %o1
160	orn		%g0, %o5, %o5
161	retl
162	stda		%f8, [%o1 + %o5]0xc0	! partial store
163.type	aes_t4_decrypt,#function
164.size	aes_t4_decrypt,.-aes_t4_decrypt
165.globl	aes_t4_set_encrypt_key
166.align	32
167aes_t4_set_encrypt_key:
168.Lset_encrypt_key:
169	and		%o0, 7, %o3
170	.word	0x91b20300 !alignaddr	%o0,%g0,%o0
171	cmp		%o1, 192
172	ldd		[%o0 + 0], %f0
173	bl,pt		%icc,.L128
174	ldd		[%o0 + 8], %f2
175
176	be,pt		%icc,.L192
177	ldd		[%o0 + 16], %f4
178	brz,pt		%o3, .L256aligned
179	ldd		[%o0 + 24], %f6
180
181	ldd		[%o0 + 32], %f8
182	.word	0x81b00902 !faligndata	%f0,%f2,%f0
183	.word	0x85b08904 !faligndata	%f2,%f4,%f2
184	.word	0x89b10906 !faligndata	%f4,%f6,%f4
185	.word	0x8db18908 !faligndata	%f6,%f8,%f6
186.L256aligned:
187	std		%f0, [%o2 + 0]
188	.word	0x80c80106 !aes_kexpand1	%f0,%f6,0,%f0
189	std		%f2, [%o2 + 8]
190	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
191	std		%f4, [%o2 + 16]
192	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
193	std		%f6, [%o2 + 24]
194	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
195	std		%f0, [%o2 + 32]
196	.word	0x80c80306 !aes_kexpand1	%f0,%f6,1,%f0
197	std		%f2, [%o2 + 40]
198	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
199	std		%f4, [%o2 + 48]
200	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
201	std		%f6, [%o2 + 56]
202	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
203	std		%f0, [%o2 + 64]
204	.word	0x80c80506 !aes_kexpand1	%f0,%f6,2,%f0
205	std		%f2, [%o2 + 72]
206	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
207	std		%f4, [%o2 + 80]
208	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
209	std		%f6, [%o2 + 88]
210	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
211	std		%f0, [%o2 + 96]
212	.word	0x80c80706 !aes_kexpand1	%f0,%f6,3,%f0
213	std		%f2, [%o2 + 104]
214	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
215	std		%f4, [%o2 + 112]
216	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
217	std		%f6, [%o2 + 120]
218	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
219	std		%f0, [%o2 + 128]
220	.word	0x80c80906 !aes_kexpand1	%f0,%f6,4,%f0
221	std		%f2, [%o2 + 136]
222	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
223	std		%f4, [%o2 + 144]
224	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
225	std		%f6, [%o2 + 152]
226	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
227	std		%f0, [%o2 + 160]
228	.word	0x80c80b06 !aes_kexpand1	%f0,%f6,5,%f0
229	std		%f2, [%o2 + 168]
230	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
231	std		%f4, [%o2 + 176]
232	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
233	std		%f6, [%o2 + 184]
234	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
235	std		%f0, [%o2 + 192]
236	.word	0x80c80d06 !aes_kexpand1	%f0,%f6,6,%f0
237	std		%f2, [%o2 + 200]
238	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
239	std		%f4, [%o2 + 208]
240	std		%f6, [%o2 + 216]
241	std		%f0, [%o2 + 224]
242	std		%f2, [%o2 + 232]
243
244	mov		14, %o3
245	st		%o3, [%o2 + 240]
246	retl
247	xor		%o0, %o0, %o0
248
249.align	16
250.L192:
251	brz,pt		%o3, .L192aligned
252	nop
253
254	ldd		[%o0 + 24], %f6
255	.word	0x81b00902 !faligndata	%f0,%f2,%f0
256	.word	0x85b08904 !faligndata	%f2,%f4,%f2
257	.word	0x89b10906 !faligndata	%f4,%f6,%f4
258.L192aligned:
259	std		%f0, [%o2 + 0]
260	.word	0x80c80104 !aes_kexpand1	%f0,%f4,0,%f0
261	std		%f2, [%o2 + 8]
262	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
263	std		%f4, [%o2 + 16]
264	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
265	std		%f0, [%o2 + 24]
266	.word	0x80c80304 !aes_kexpand1	%f0,%f4,1,%f0
267	std		%f2, [%o2 + 32]
268	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
269	std		%f4, [%o2 + 40]
270	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
271	std		%f0, [%o2 + 48]
272	.word	0x80c80504 !aes_kexpand1	%f0,%f4,2,%f0
273	std		%f2, [%o2 + 56]
274	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
275	std		%f4, [%o2 + 64]
276	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
277	std		%f0, [%o2 + 72]
278	.word	0x80c80704 !aes_kexpand1	%f0,%f4,3,%f0
279	std		%f2, [%o2 + 80]
280	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
281	std		%f4, [%o2 + 88]
282	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
283	std		%f0, [%o2 + 96]
284	.word	0x80c80904 !aes_kexpand1	%f0,%f4,4,%f0
285	std		%f2, [%o2 + 104]
286	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
287	std		%f4, [%o2 + 112]
288	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
289	std		%f0, [%o2 + 120]
290	.word	0x80c80b04 !aes_kexpand1	%f0,%f4,5,%f0
291	std		%f2, [%o2 + 128]
292	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
293	std		%f4, [%o2 + 136]
294	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
295	std		%f0, [%o2 + 144]
296	.word	0x80c80d04 !aes_kexpand1	%f0,%f4,6,%f0
297	std		%f2, [%o2 + 152]
298	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
299	std		%f4, [%o2 + 160]
300	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
301	std		%f0, [%o2 + 168]
302	.word	0x80c80f04 !aes_kexpand1	%f0,%f4,7,%f0
303	std		%f2, [%o2 + 176]
304	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
305	std		%f4, [%o2 + 184]
306	std		%f0, [%o2 + 192]
307	std		%f2, [%o2 + 200]
308
309	mov		12, %o3
310	st		%o3, [%o2 + 240]
311	retl
312	xor		%o0, %o0, %o0
313
314.align	16
315.L128:
316	brz,pt		%o3, .L128aligned
317	nop
318
319	ldd		[%o0 + 16], %f4
320	.word	0x81b00902 !faligndata	%f0,%f2,%f0
321	.word	0x85b08904 !faligndata	%f2,%f4,%f2
322.L128aligned:
323	std		%f0, [%o2 + 0]
324	.word	0x80c80102 !aes_kexpand1	%f0,%f2,0,%f0
325	std		%f2, [%o2 + 8]
326	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
327	std		%f0, [%o2 + 16]
328	.word	0x80c80302 !aes_kexpand1	%f0,%f2,1,%f0
329	std		%f2, [%o2 + 24]
330	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
331	std		%f0, [%o2 + 32]
332	.word	0x80c80502 !aes_kexpand1	%f0,%f2,2,%f0
333	std		%f2, [%o2 + 40]
334	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
335	std		%f0, [%o2 + 48]
336	.word	0x80c80702 !aes_kexpand1	%f0,%f2,3,%f0
337	std		%f2, [%o2 + 56]
338	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
339	std		%f0, [%o2 + 64]
340	.word	0x80c80902 !aes_kexpand1	%f0,%f2,4,%f0
341	std		%f2, [%o2 + 72]
342	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
343	std		%f0, [%o2 + 80]
344	.word	0x80c80b02 !aes_kexpand1	%f0,%f2,5,%f0
345	std		%f2, [%o2 + 88]
346	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
347	std		%f0, [%o2 + 96]
348	.word	0x80c80d02 !aes_kexpand1	%f0,%f2,6,%f0
349	std		%f2, [%o2 + 104]
350	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
351	std		%f0, [%o2 + 112]
352	.word	0x80c80f02 !aes_kexpand1	%f0,%f2,7,%f0
353	std		%f2, [%o2 + 120]
354	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
355	std		%f0, [%o2 + 128]
356	.word	0x80c81102 !aes_kexpand1	%f0,%f2,8,%f0
357	std		%f2, [%o2 + 136]
358	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
359	std		%f0, [%o2 + 144]
360	.word	0x80c81302 !aes_kexpand1	%f0,%f2,9,%f0
361	std		%f2, [%o2 + 152]
362	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
363	std		%f0, [%o2 + 160]
364	std		%f2, [%o2 + 168]
365
366	mov		10, %o3
367	st		%o3, [%o2 + 240]
368	retl
369	xor		%o0, %o0, %o0
370.type	aes_t4_set_encrypt_key,#function
371.size	aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key
372
373.globl	aes_t4_set_decrypt_key
374.align	32
375aes_t4_set_decrypt_key:
376	mov		%o7, %o5
377	call		.Lset_encrypt_key
378	nop
379
380	mov		%o5, %o7
381	sll		%o3, 4, %o0		! %o3 is number of rounds
382	add		%o3, 2, %o3
383	add		%o2, %o0, %o0	! %o0=%o2+16*rounds
384	srl		%o3, 2, %o3		! %o3=(rounds+2)/4
385
386.Lkey_flip:
387	ldd		[%o2 + 0],  %f0
388	ldd		[%o2 + 8],  %f2
389	ldd		[%o2 + 16], %f4
390	ldd		[%o2 + 24], %f6
391	ldd		[%o0 + 0],  %f8
392	ldd		[%o0 + 8],  %f10
393	ldd		[%o0 - 16], %f12
394	ldd		[%o0 - 8],  %f14
395	sub		%o3, 1, %o3
396	std		%f0, [%o0 + 0]
397	std		%f2, [%o0 + 8]
398	std		%f4, [%o0 - 16]
399	std		%f6, [%o0 - 8]
400	std		%f8, [%o2 + 0]
401	std		%f10, [%o2 + 8]
402	std		%f12, [%o2 + 16]
403	std		%f14, [%o2 + 24]
404	add		%o2, 32, %o2
405	brnz		%o3, .Lkey_flip
406	sub		%o0, 32, %o0
407
408	retl
409	xor		%o0, %o0, %o0
410.type	aes_t4_set_decrypt_key,#function
411.size	aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key
412.align	32
413_aes128_encrypt_1x:
414	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
415	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
416	.word	0x80cd0404 !aes_eround01	%f20,%f4,%f2,%f0
417	.word	0x84cd8424 !aes_eround23	%f22,%f4,%f2,%f2
418	.word	0x88ce0400 !aes_eround01	%f24,%f0,%f2,%f4
419	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
420	.word	0x80cf0404 !aes_eround01	%f28,%f4,%f2,%f0
421	.word	0x84cf8424 !aes_eround23	%f30,%f4,%f2,%f2
422	.word	0x88c84400 !aes_eround01	%f32,%f0,%f2,%f4
423	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
424	.word	0x80c94404 !aes_eround01	%f36,%f4,%f2,%f0
425	.word	0x84c9c424 !aes_eround23	%f38,%f4,%f2,%f2
426	.word	0x88ca4400 !aes_eround01	%f40,%f0,%f2,%f4
427	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
428	.word	0x80cb4404 !aes_eround01	%f44,%f4,%f2,%f0
429	.word	0x84cbc424 !aes_eround23	%f46,%f4,%f2,%f2
430	.word	0x88cc4400 !aes_eround01	%f48,%f0,%f2,%f4
431	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
432	.word	0x80cd4484 !aes_eround01_l	%f52,%f4,%f2,%f0
433	retl
434	.word	0x84cdc4a4 !aes_eround23_l	%f54,%f4,%f2,%f2
435.type	_aes128_encrypt_1x,#function
436.size	_aes128_encrypt_1x,.-_aes128_encrypt_1x
437
438.align	32
439_aes128_encrypt_2x:
440	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
441	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
442	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
443	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
444	.word	0x80cd0408 !aes_eround01	%f20,%f8,%f2,%f0
445	.word	0x84cd8428 !aes_eround23	%f22,%f8,%f2,%f2
446	.word	0x88cd0c0a !aes_eround01	%f20,%f10,%f6,%f4
447	.word	0x8ccd8c2a !aes_eround23	%f22,%f10,%f6,%f6
448	.word	0x90ce0400 !aes_eround01	%f24,%f0,%f2,%f8
449	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
450	.word	0x94ce0c04 !aes_eround01	%f24,%f4,%f6,%f10
451	.word	0x8cce8c24 !aes_eround23	%f26,%f4,%f6,%f6
452	.word	0x80cf0408 !aes_eround01	%f28,%f8,%f2,%f0
453	.word	0x84cf8428 !aes_eround23	%f30,%f8,%f2,%f2
454	.word	0x88cf0c0a !aes_eround01	%f28,%f10,%f6,%f4
455	.word	0x8ccf8c2a !aes_eround23	%f30,%f10,%f6,%f6
456	.word	0x90c84400 !aes_eround01	%f32,%f0,%f2,%f8
457	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
458	.word	0x94c84c04 !aes_eround01	%f32,%f4,%f6,%f10
459	.word	0x8cc8cc24 !aes_eround23	%f34,%f4,%f6,%f6
460	.word	0x80c94408 !aes_eround01	%f36,%f8,%f2,%f0
461	.word	0x84c9c428 !aes_eround23	%f38,%f8,%f2,%f2
462	.word	0x88c94c0a !aes_eround01	%f36,%f10,%f6,%f4
463	.word	0x8cc9cc2a !aes_eround23	%f38,%f10,%f6,%f6
464	.word	0x90ca4400 !aes_eround01	%f40,%f0,%f2,%f8
465	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
466	.word	0x94ca4c04 !aes_eround01	%f40,%f4,%f6,%f10
467	.word	0x8ccacc24 !aes_eround23	%f42,%f4,%f6,%f6
468	.word	0x80cb4408 !aes_eround01	%f44,%f8,%f2,%f0
469	.word	0x84cbc428 !aes_eround23	%f46,%f8,%f2,%f2
470	.word	0x88cb4c0a !aes_eround01	%f44,%f10,%f6,%f4
471	.word	0x8ccbcc2a !aes_eround23	%f46,%f10,%f6,%f6
472	.word	0x90cc4400 !aes_eround01	%f48,%f0,%f2,%f8
473	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
474	.word	0x94cc4c04 !aes_eround01	%f48,%f4,%f6,%f10
475	.word	0x8ccccc24 !aes_eround23	%f50,%f4,%f6,%f6
476	.word	0x80cd4488 !aes_eround01_l	%f52,%f8,%f2,%f0
477	.word	0x84cdc4a8 !aes_eround23_l	%f54,%f8,%f2,%f2
478	.word	0x88cd4c8a !aes_eround01_l	%f52,%f10,%f6,%f4
479	retl
480	.word	0x8ccdccaa !aes_eround23_l	%f54,%f10,%f6,%f6
481.type	_aes128_encrypt_2x,#function
482.size	_aes128_encrypt_2x,.-_aes128_encrypt_2x
483
484.align	32
485_aes128_loadkey:
486	ldx		[%i3 + 0], %g4
487	ldx		[%i3 + 8], %g5
488	ldd		[%i3 + 16], %f16
489	ldd		[%i3 + 24], %f18
490	ldd		[%i3 + 32], %f20
491	ldd		[%i3 + 40], %f22
492	ldd		[%i3 + 48], %f24
493	ldd		[%i3 + 56], %f26
494	ldd		[%i3 + 64], %f28
495	ldd		[%i3 + 72], %f30
496	ldd		[%i3 + 80], %f32
497	ldd		[%i3 + 88], %f34
498	ldd		[%i3 + 96], %f36
499	ldd		[%i3 + 104], %f38
500	ldd		[%i3 + 112], %f40
501	ldd		[%i3 + 120], %f42
502	ldd		[%i3 + 128], %f44
503	ldd		[%i3 + 136], %f46
504	ldd		[%i3 + 144], %f48
505	ldd		[%i3 + 152], %f50
506	ldd		[%i3 + 160], %f52
507	ldd		[%i3 + 168], %f54
508	retl
509	nop
510.type	_aes128_loadkey,#function
511.size	_aes128_loadkey,.-_aes128_loadkey
512_aes128_load_enckey=_aes128_loadkey
513_aes128_load_deckey=_aes128_loadkey
514
515.globl	aes128_t4_cbc_encrypt
516.align	32
517aes128_t4_cbc_encrypt:
518	save		%sp, -STACK_FRAME, %sp
519	cmp		%i2, 0
520	be,pn		SIZE_T_CC, .L128_cbc_enc_abort
521	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
522	sub		%i0, %i1, %l5	! %i0!=%i1
523	ld		[%i4 + 0], %f0
524	ld		[%i4 + 4], %f1
525	ld		[%i4 + 8], %f2
526	ld		[%i4 + 12], %f3
527	prefetch	[%i0], 20
528	prefetch	[%i0 + 63], 20
529	call		_aes128_load_enckey
530	and		%i0, 7, %l0
531	andn		%i0, 7, %i0
532	sll		%l0, 3, %l0
533	mov		64, %l1
534	mov		0xff, %l3
535	sub		%l1, %l0, %l1
536	and		%i1, 7, %l2
537	cmp		%i2, 127
538	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
539	movleu		SIZE_T_CC, 0, %l5	!	%i2<128 ||
540	brnz,pn		%l5, .L128cbc_enc_blk	!	%i0==%i1)
541	srl		%l3, %l2, %l3
542
543	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
544	srlx		%i2, 4, %i2
545	prefetch	[%i1], 22
546
547.L128_cbc_enc_loop:
548	ldx		[%i0 + 0], %o0
549	brz,pt		%l0, 4f
550	ldx		[%i0 + 8], %o1
551
552	ldx		[%i0 + 16], %o2
553	sllx		%o0, %l0, %o0
554	srlx		%o1, %l1, %g1
555	sllx		%o1, %l0, %o1
556	or		%g1, %o0, %o0
557	srlx		%o2, %l1, %o2
558	or		%o2, %o1, %o1
5594:
560	xor		%g4, %o0, %o0		! ^= rk[0]
561	xor		%g5, %o1, %o1
562	.word	0x99b02308 !movxtod	%o0,%f12
563	.word	0x9db02309 !movxtod	%o1,%f14
564
565	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
566	.word	0x85b38d82 !fxor	%f14,%f2,%f2
567	prefetch	[%i1 + 63], 22
568	prefetch	[%i0 + 16+63], 20
569	call		_aes128_encrypt_1x
570	add		%i0, 16, %i0
571
572	brnz,pn		%l2, 2f
573	sub		%i2, 1, %i2
574
575	std		%f0, [%i1 + 0]
576	std		%f2, [%i1 + 8]
577	brnz,pt		%i2, .L128_cbc_enc_loop
578	add		%i1, 16, %i1
579	st		%f0, [%i4 + 0]
580	st		%f1, [%i4 + 4]
581	st		%f2, [%i4 + 8]
582	st		%f3, [%i4 + 12]
583.L128_cbc_enc_abort:
584	ret
585	restore
586
587.align	16
5882:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
589						! and ~3x deterioration
590						! in inp==out case
591	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
592	.word	0x8db00902 !faligndata	%f0,%f2,%f6
593	.word	0x91b08902 !faligndata	%f2,%f2,%f8
594
595	stda		%f4, [%i1 + %l3]0xc0	! partial store
596	std		%f6, [%i1 + 8]
597	add		%i1, 16, %i1
598	orn		%g0, %l3, %l3
599	stda		%f8, [%i1 + %l3]0xc0	! partial store
600
601	brnz,pt		%i2, .L128_cbc_enc_loop+4
602	orn		%g0, %l3, %l3
603	st		%f0, [%i4 + 0]
604	st		%f1, [%i4 + 4]
605	st		%f2, [%i4 + 8]
606	st		%f3, [%i4 + 12]
607	ret
608	restore
609
610!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
611.align	32
612.L128cbc_enc_blk:
613	add	%i1, %i2, %l5
614	and	%l5, 63, %l5	! tail
615	sub	%i2, %l5, %i2
616	add	%l5, 15, %l5	! round up to 16n
617	srlx	%i2, 4, %i2
618	srl	%l5, 4, %l5
619
620.L128_cbc_enc_blk_loop:
621	ldx		[%i0 + 0], %o0
622	brz,pt		%l0, 5f
623	ldx		[%i0 + 8], %o1
624
625	ldx		[%i0 + 16], %o2
626	sllx		%o0, %l0, %o0
627	srlx		%o1, %l1, %g1
628	sllx		%o1, %l0, %o1
629	or		%g1, %o0, %o0
630	srlx		%o2, %l1, %o2
631	or		%o2, %o1, %o1
6325:
633	xor		%g4, %o0, %o0		! ^= rk[0]
634	xor		%g5, %o1, %o1
635	.word	0x99b02308 !movxtod	%o0,%f12
636	.word	0x9db02309 !movxtod	%o1,%f14
637
638	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
639	.word	0x85b38d82 !fxor	%f14,%f2,%f2
640	prefetch	[%i0 + 16+63], 20
641	call		_aes128_encrypt_1x
642	add		%i0, 16, %i0
643	sub		%i2, 1, %i2
644
645	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
646	add		%i1, 8, %i1
647	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
648	brnz,pt		%i2, .L128_cbc_enc_blk_loop
649	add		%i1, 8, %i1
650
651	membar		#StoreLoad|#StoreStore
652	brnz,pt		%l5, .L128_cbc_enc_loop
653	mov		%l5, %i2
654	st		%f0, [%i4 + 0]
655	st		%f1, [%i4 + 4]
656	st		%f2, [%i4 + 8]
657	st		%f3, [%i4 + 12]
658	ret
659	restore
660.type	aes128_t4_cbc_encrypt,#function
661.size	aes128_t4_cbc_encrypt,.-aes128_t4_cbc_encrypt
662.globl	aes128_t4_ctr32_encrypt
663.align	32
664aes128_t4_ctr32_encrypt:
665	save		%sp, -STACK_FRAME, %sp
666	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
667
668	prefetch	[%i0], 20
669	prefetch	[%i0 + 63], 20
670	call		_aes128_load_enckey
671	sllx		%i2, 4, %i2
672
673	ld		[%i4 + 0], %l4	! counter
674	ld		[%i4 + 4], %l5
675	ld		[%i4 + 8], %l6
676	ld		[%i4 + 12], %l7
677
678	sllx		%l4, 32, %o5
679	or		%l5, %o5, %o5
680	sllx		%l6, 32, %g1
681	xor		%o5, %g4, %g4		! ^= rk[0]
682	xor		%g1, %g5, %g5
683	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
684
685	sub		%i0, %i1, %l5	! %i0!=%i1
686	and		%i0, 7, %l0
687	andn		%i0, 7, %i0
688	sll		%l0, 3, %l0
689	mov		64, %l1
690	mov		0xff, %l3
691	sub		%l1, %l0, %l1
692	and		%i1, 7, %l2
693	cmp		%i2, 255
694	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
695	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
696	brnz,pn		%l5, .L128_ctr32_blk	!	%i0==%i1)
697	srl		%l3, %l2, %l3
698
699	andcc		%i2, 16, %g0		! is number of blocks even?
700	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
701	bz		%icc, .L128_ctr32_loop2x
702	srlx		%i2, 4, %i2
703.L128_ctr32_loop:
704	ldx		[%i0 + 0], %o0
705	brz,pt		%l0, 4f
706	ldx		[%i0 + 8], %o1
707
708	ldx		[%i0 + 16], %o2
709	sllx		%o0, %l0, %o0
710	srlx		%o1, %l1, %g1
711	sllx		%o1, %l0, %o1
712	or		%g1, %o0, %o0
713	srlx		%o2, %l1, %o2
714	or		%o2, %o1, %o1
7154:
716	xor		%g5, %l7, %g1		! ^= rk[0]
717	add		%l7, 1, %l7
718	.word	0x85b02301 !movxtod	%g1,%f2
719	srl		%l7, 0, %l7		! clruw
720	prefetch	[%i1 + 63], 22
721	prefetch	[%i0 + 16+63], 20
722	.word	0x88cc040e !aes_eround01	%f16,%f14,%f2,%f4
723	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
724	call		_aes128_encrypt_1x+8
725	add		%i0, 16, %i0
726
727	.word	0x95b02308 !movxtod	%o0,%f10
728	.word	0x99b02309 !movxtod	%o1,%f12
729	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
730	.word	0x85b30d82 !fxor	%f12,%f2,%f2
731
732	brnz,pn		%l2, 2f
733	sub		%i2, 1, %i2
734
735	std		%f0, [%i1 + 0]
736	std		%f2, [%i1 + 8]
737	brnz,pt		%i2, .L128_ctr32_loop2x
738	add		%i1, 16, %i1
739
740	ret
741	restore
742
743.align	16
7442:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
745						! and ~3x deterioration
746						! in inp==out case
747	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
748	.word	0x8db00902 !faligndata	%f0,%f2,%f6
749	.word	0x91b08902 !faligndata	%f2,%f2,%f8
750	stda		%f4, [%i1 + %l3]0xc0	! partial store
751	std		%f6, [%i1 + 8]
752	add		%i1, 16, %i1
753	orn		%g0, %l3, %l3
754	stda		%f8, [%i1 + %l3]0xc0	! partial store
755
756	brnz,pt		%i2, .L128_ctr32_loop2x+4
757	orn		%g0, %l3, %l3
758
759	ret
760	restore
761
762!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
763.align	32
764.L128_ctr32_loop2x:
765	ldx		[%i0 + 0], %o0
766	ldx		[%i0 + 8], %o1
767	ldx		[%i0 + 16], %o2
768	brz,pt		%l0, 4f
769	ldx		[%i0 + 24], %o3
770
771	ldx		[%i0 + 32], %o4
772	sllx		%o0, %l0, %o0
773	srlx		%o1, %l1, %g1
774	or		%g1, %o0, %o0
775	sllx		%o1, %l0, %o1
776	srlx		%o2, %l1, %g1
777	or		%g1, %o1, %o1
778	sllx		%o2, %l0, %o2
779	srlx		%o3, %l1, %g1
780	or		%g1, %o2, %o2
781	sllx		%o3, %l0, %o3
782	srlx		%o4, %l1, %o4
783	or		%o4, %o3, %o3
7844:
785	xor		%g5, %l7, %g1		! ^= rk[0]
786	add		%l7, 1, %l7
787	.word	0x85b02301 !movxtod	%g1,%f2
788	srl		%l7, 0, %l7		! clruw
789	xor		%g5, %l7, %g1
790	add		%l7, 1, %l7
791	.word	0x8db02301 !movxtod	%g1,%f6
792	srl		%l7, 0, %l7		! clruw
793	prefetch	[%i1 + 63], 22
794	prefetch	[%i0 + 32+63], 20
795	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
796	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
797	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
798	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
799	call		_aes128_encrypt_2x+16
800	add		%i0, 32, %i0
801
802	.word	0x91b02308 !movxtod	%o0,%f8
803	.word	0x95b02309 !movxtod	%o1,%f10
804	.word	0x99b0230a !movxtod	%o2,%f12
805	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
806	.word	0x91b0230b !movxtod	%o3,%f8
807	.word	0x85b28d82 !fxor	%f10,%f2,%f2
808	.word	0x89b30d84 !fxor	%f12,%f4,%f4
809	.word	0x8db20d86 !fxor	%f8,%f6,%f6
810
811	brnz,pn		%l2, 2f
812	sub		%i2, 2, %i2
813
814	std		%f0, [%i1 + 0]
815	std		%f2, [%i1 + 8]
816	std		%f4, [%i1 + 16]
817	std		%f6, [%i1 + 24]
818	brnz,pt		%i2, .L128_ctr32_loop2x
819	add		%i1, 32, %i1
820
821	ret
822	restore
823
824.align	16
8252:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
826						! and ~3x deterioration
827						! in inp==out case
828	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
829	.word	0x81b00902 !faligndata	%f0,%f2,%f0
830	.word	0x85b08904 !faligndata	%f2,%f4,%f2
831	.word	0x89b10906 !faligndata	%f4,%f6,%f4
832	.word	0x8db18906 !faligndata	%f6,%f6,%f6
833
834	stda		%f8, [%i1 + %l3]0xc0	! partial store
835	std		%f0, [%i1 + 8]
836	std		%f2, [%i1 + 16]
837	std		%f4, [%i1 + 24]
838	add		%i1, 32, %i1
839	orn		%g0, %l3, %l3
840	stda		%f6, [%i1 + %l3]0xc0	! partial store
841
842	brnz,pt		%i2, .L128_ctr32_loop2x+4
843	orn		%g0, %l3, %l3
844
845	ret
846	restore
847
848!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
849.align	32
850.L128_ctr32_blk:
851	add	%i1, %i2, %l5
852	and	%l5, 63, %l5	! tail
853	sub	%i2, %l5, %i2
854	add	%l5, 15, %l5	! round up to 16n
855	srlx	%i2, 4, %i2
856	srl	%l5, 4, %l5
857	sub	%i2, 1, %i2
858	add	%l5, 1, %l5
859
860.L128_ctr32_blk_loop2x:
861	ldx		[%i0 + 0], %o0
862	ldx		[%i0 + 8], %o1
863	ldx		[%i0 + 16], %o2
864	brz,pt		%l0, 5f
865	ldx		[%i0 + 24], %o3
866
867	ldx		[%i0 + 32], %o4
868	sllx		%o0, %l0, %o0
869	srlx		%o1, %l1, %g1
870	or		%g1, %o0, %o0
871	sllx		%o1, %l0, %o1
872	srlx		%o2, %l1, %g1
873	or		%g1, %o1, %o1
874	sllx		%o2, %l0, %o2
875	srlx		%o3, %l1, %g1
876	or		%g1, %o2, %o2
877	sllx		%o3, %l0, %o3
878	srlx		%o4, %l1, %o4
879	or		%o4, %o3, %o3
8805:
881	xor		%g5, %l7, %g1		! ^= rk[0]
882	add		%l7, 1, %l7
883	.word	0x85b02301 !movxtod	%g1,%f2
884	srl		%l7, 0, %l7		! clruw
885	xor		%g5, %l7, %g1
886	add		%l7, 1, %l7
887	.word	0x8db02301 !movxtod	%g1,%f6
888	srl		%l7, 0, %l7		! clruw
889	prefetch	[%i0 + 32+63], 20
890	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
891	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
892	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
893	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
894	call		_aes128_encrypt_2x+16
895	add		%i0, 32, %i0
896	subcc		%i2, 2, %i2
897
898	.word	0x91b02308 !movxtod	%o0,%f8
899	.word	0x95b02309 !movxtod	%o1,%f10
900	.word	0x99b0230a !movxtod	%o2,%f12
901	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
902	.word	0x91b0230b !movxtod	%o3,%f8
903	.word	0x85b28d82 !fxor	%f10,%f2,%f2
904	.word	0x89b30d84 !fxor	%f12,%f4,%f4
905	.word	0x8db20d86 !fxor	%f8,%f6,%f6
906
907	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
908	add		%i1, 8, %i1
909	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
910	add		%i1, 8, %i1
911	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
912	add		%i1, 8, %i1
913	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
914	bgu,pt		SIZE_T_CC, .L128_ctr32_blk_loop2x
915	add		%i1, 8, %i1
916
917	add		%l5, %i2, %i2
918	andcc		%i2, 1, %g0		! is number of blocks even?
919	membar		#StoreLoad|#StoreStore
920	bnz,pt		%icc, .L128_ctr32_loop
921	srl		%i2, 0, %i2
922	brnz,pn		%i2, .L128_ctr32_loop2x
923	nop
924
925	ret
926	restore
927.type	aes128_t4_ctr32_encrypt,#function
928.size	aes128_t4_ctr32_encrypt,.-aes128_t4_ctr32_encrypt
929.globl	aes128_t4_xts_encrypt
930.align	32
931aes128_t4_xts_encrypt:
932	save		%sp, -STACK_FRAME-16, %sp
933	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
934
935	mov		%i5, %o0
936	add		%fp, STACK_BIAS-16, %o1
937	call		aes_t4_encrypt
938	mov		%i4, %o2
939
940	add		%fp, STACK_BIAS-16, %l7
941	ldxa		[%l7]0x88, %g2
942	add		%fp, STACK_BIAS-8, %l7
943	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
944
945	sethi		%hi(0x76543210), %l7
946	or		%l7, %lo(0x76543210), %l7
947	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
948
949	prefetch	[%i0], 20
950	prefetch	[%i0 + 63], 20
951	call		_aes128_load_enckey
952	and		%i2, 15,  %i5
953	and		%i2, -16, %i2
954
955	sub		%i0, %i1, %l5	! %i0!=%i1
956	and		%i0, 7, %l0
957	andn		%i0, 7, %i0
958	sll		%l0, 3, %l0
959	mov		64, %l1
960	mov		0xff, %l3
961	sub		%l1, %l0, %l1
962	and		%i1, 7, %l2
963	cmp		%i2, 255
964	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
965	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
966	brnz,pn		%l5, .L128_xts_enblk !	%i0==%i1)
967	srl		%l3, %l2, %l3
968
969	andcc		%i2, 16, %g0		! is number of blocks even?
970	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
971	bz		%icc, .L128_xts_enloop2x
972	srlx		%i2, 4, %i2
973.L128_xts_enloop:
974	ldx		[%i0 + 0], %o0
975	brz,pt		%l0, 4f
976	ldx		[%i0 + 8], %o1
977
978	ldx		[%i0 + 16], %o2
979	sllx		%o0, %l0, %o0
980	srlx		%o1, %l1, %g1
981	sllx		%o1, %l0, %o1
982	or		%g1, %o0, %o0
983	srlx		%o2, %l1, %o2
984	or		%o2, %o1, %o1
9854:
986	.word	0x99b02302 !movxtod	%g2,%f12
987	.word	0x9db02303 !movxtod	%g3,%f14
988	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
989	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
990
991	xor		%g4, %o0, %o0		! ^= rk[0]
992	xor		%g5, %o1, %o1
993	.word	0x81b02308 !movxtod	%o0,%f0
994	.word	0x85b02309 !movxtod	%o1,%f2
995
996	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
997	.word	0x85b38d82 !fxor	%f14,%f2,%f2
998
999	prefetch	[%i1 + 63], 22
1000	prefetch	[%i0 + 16+63], 20
1001	call		_aes128_encrypt_1x
1002	add		%i0, 16, %i0
1003
1004	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1005	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1006
1007	srax		%g3, 63, %l7		! next tweak value
1008	addcc		%g2, %g2, %g2
1009	and		%l7, 0x87, %l7
1010	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1011	xor		%l7, %g2, %g2
1012
1013	brnz,pn		%l2, 2f
1014	sub		%i2, 1, %i2
1015
1016	std		%f0, [%i1 + 0]
1017	std		%f2, [%i1 + 8]
1018	brnz,pt		%i2, .L128_xts_enloop2x
1019	add		%i1, 16, %i1
1020
1021	brnz,pn		%i5, .L128_xts_ensteal
1022	nop
1023
1024	ret
1025	restore
1026
1027.align	16
10282:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1029						! and ~3x deterioration
1030						! in inp==out case
1031	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1032	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1033	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1034	stda		%f4, [%i1 + %l3]0xc0	! partial store
1035	std		%f6, [%i1 + 8]
1036	add		%i1, 16, %i1
1037	orn		%g0, %l3, %l3
1038	stda		%f8, [%i1 + %l3]0xc0	! partial store
1039
1040	brnz,pt		%i2, .L128_xts_enloop2x+4
1041	orn		%g0, %l3, %l3
1042
1043	brnz,pn		%i5, .L128_xts_ensteal
1044	nop
1045
1046	ret
1047	restore
1048
1049!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1050.align	32
1051.L128_xts_enloop2x:
1052	ldx		[%i0 + 0], %o0
1053	ldx		[%i0 + 8], %o1
1054	ldx		[%i0 + 16], %o2
1055	brz,pt		%l0, 4f
1056	ldx		[%i0 + 24], %o3
1057
1058	ldx		[%i0 + 32], %o4
1059	sllx		%o0, %l0, %o0
1060	srlx		%o1, %l1, %g1
1061	or		%g1, %o0, %o0
1062	sllx		%o1, %l0, %o1
1063	srlx		%o2, %l1, %g1
1064	or		%g1, %o1, %o1
1065	sllx		%o2, %l0, %o2
1066	srlx		%o3, %l1, %g1
1067	or		%g1, %o2, %o2
1068	sllx		%o3, %l0, %o3
1069	srlx		%o4, %l1, %o4
1070	or		%o4, %o3, %o3
10714:
1072	.word	0x99b02302 !movxtod	%g2,%f12
1073	.word	0x9db02303 !movxtod	%g3,%f14
1074	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1075	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1076
1077	srax		%g3, 63, %l7		! next tweak value
1078	addcc		%g2, %g2, %g2
1079	and		%l7, 0x87, %l7
1080	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1081	xor		%l7, %g2, %g2
1082
1083	.word	0x91b02302 !movxtod	%g2,%f8
1084	.word	0x95b02303 !movxtod	%g3,%f10
1085	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1086	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1087
1088	xor		%g4, %o0, %o0		! ^= rk[0]
1089	xor		%g5, %o1, %o1
1090	xor		%g4, %o2, %o2		! ^= rk[0]
1091	xor		%g5, %o3, %o3
1092	.word	0x81b02308 !movxtod	%o0,%f0
1093	.word	0x85b02309 !movxtod	%o1,%f2
1094	.word	0x89b0230a !movxtod	%o2,%f4
1095	.word	0x8db0230b !movxtod	%o3,%f6
1096
1097	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1098	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1099	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1100	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1101
1102	prefetch	[%i1 + 63], 22
1103	prefetch	[%i0 + 32+63], 20
1104	call		_aes128_encrypt_2x
1105	add		%i0, 32, %i0
1106
1107	.word	0x91b02302 !movxtod	%g2,%f8
1108	.word	0x95b02303 !movxtod	%g3,%f10
1109
1110	srax		%g3, 63, %l7		! next tweak value
1111	addcc		%g2, %g2, %g2
1112	and		%l7, 0x87, %l7
1113	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1114	xor		%l7, %g2, %g2
1115
1116	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1117	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1118
1119	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1120	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1121	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1122	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1123
1124	brnz,pn		%l2, 2f
1125	sub		%i2, 2, %i2
1126
1127	std		%f0, [%i1 + 0]
1128	std		%f2, [%i1 + 8]
1129	std		%f4, [%i1 + 16]
1130	std		%f6, [%i1 + 24]
1131	brnz,pt		%i2, .L128_xts_enloop2x
1132	add		%i1, 32, %i1
1133
1134	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1135	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1136	brnz,pn		%i5, .L128_xts_ensteal
1137	nop
1138
1139	ret
1140	restore
1141
1142.align	16
11432:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1144						! and ~3x deterioration
1145						! in inp==out case
1146	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1147	.word	0x95b00902 !faligndata	%f0,%f2,%f10
1148	.word	0x99b08904 !faligndata	%f2,%f4,%f12
1149	.word	0x9db10906 !faligndata	%f4,%f6,%f14
1150	.word	0x81b18906 !faligndata	%f6,%f6,%f0
1151
1152	stda		%f8, [%i1 + %l3]0xc0	! partial store
1153	std		%f10, [%i1 + 8]
1154	std		%f12, [%i1 + 16]
1155	std		%f14, [%i1 + 24]
1156	add		%i1, 32, %i1
1157	orn		%g0, %l3, %l3
1158	stda		%f0, [%i1 + %l3]0xc0	! partial store
1159
1160	brnz,pt		%i2, .L128_xts_enloop2x+4
1161	orn		%g0, %l3, %l3
1162
1163	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1164	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1165	brnz,pn		%i5, .L128_xts_ensteal
1166	nop
1167
1168	ret
1169	restore
1170
1171!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1172.align	32
1173.L128_xts_enblk:
1174	add	%i1, %i2, %l5
1175	and	%l5, 63, %l5	! tail
1176	sub	%i2, %l5, %i2
1177	add	%l5, 15, %l5	! round up to 16n
1178	srlx	%i2, 4, %i2
1179	srl	%l5, 4, %l5
1180	sub	%i2, 1, %i2
1181	add	%l5, 1, %l5
1182
1183.L128_xts_enblk2x:
1184	ldx		[%i0 + 0], %o0
1185	ldx		[%i0 + 8], %o1
1186	ldx		[%i0 + 16], %o2
1187	brz,pt		%l0, 5f
1188	ldx		[%i0 + 24], %o3
1189
1190	ldx		[%i0 + 32], %o4
1191	sllx		%o0, %l0, %o0
1192	srlx		%o1, %l1, %g1
1193	or		%g1, %o0, %o0
1194	sllx		%o1, %l0, %o1
1195	srlx		%o2, %l1, %g1
1196	or		%g1, %o1, %o1
1197	sllx		%o2, %l0, %o2
1198	srlx		%o3, %l1, %g1
1199	or		%g1, %o2, %o2
1200	sllx		%o3, %l0, %o3
1201	srlx		%o4, %l1, %o4
1202	or		%o4, %o3, %o3
12035:
1204	.word	0x99b02302 !movxtod	%g2,%f12
1205	.word	0x9db02303 !movxtod	%g3,%f14
1206	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1207	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1208
1209	srax		%g3, 63, %l7		! next tweak value
1210	addcc		%g2, %g2, %g2
1211	and		%l7, 0x87, %l7
1212	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1213	xor		%l7, %g2, %g2
1214
1215	.word	0x91b02302 !movxtod	%g2,%f8
1216	.word	0x95b02303 !movxtod	%g3,%f10
1217	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1218	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1219
1220	xor		%g4, %o0, %o0		! ^= rk[0]
1221	xor		%g5, %o1, %o1
1222	xor		%g4, %o2, %o2		! ^= rk[0]
1223	xor		%g5, %o3, %o3
1224	.word	0x81b02308 !movxtod	%o0,%f0
1225	.word	0x85b02309 !movxtod	%o1,%f2
1226	.word	0x89b0230a !movxtod	%o2,%f4
1227	.word	0x8db0230b !movxtod	%o3,%f6
1228
1229	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1230	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1231	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1232	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1233
1234	prefetch	[%i0 + 32+63], 20
1235	call		_aes128_encrypt_2x
1236	add		%i0, 32, %i0
1237
1238	.word	0x91b02302 !movxtod	%g2,%f8
1239	.word	0x95b02303 !movxtod	%g3,%f10
1240
1241	srax		%g3, 63, %l7		! next tweak value
1242	addcc		%g2, %g2, %g2
1243	and		%l7, 0x87, %l7
1244	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1245	xor		%l7, %g2, %g2
1246
1247	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1248	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1249
1250	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1251	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1252	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1253	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1254
1255	subcc		%i2, 2, %i2
1256	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1257	add		%i1, 8, %i1
1258	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1259	add		%i1, 8, %i1
1260	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1261	add		%i1, 8, %i1
1262	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1263	bgu,pt		SIZE_T_CC, .L128_xts_enblk2x
1264	add		%i1, 8, %i1
1265
1266	add		%l5, %i2, %i2
1267	andcc		%i2, 1, %g0		! is number of blocks even?
1268	membar		#StoreLoad|#StoreStore
1269	bnz,pt		%icc, .L128_xts_enloop
1270	srl		%i2, 0, %i2
1271	brnz,pn		%i2, .L128_xts_enloop2x
1272	nop
1273
1274	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1275	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1276	brnz,pn		%i5, .L128_xts_ensteal
1277	nop
1278
1279	ret
1280	restore
1281!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1282.align	32
1283.L128_xts_ensteal:
1284	std		%f0, [%fp + STACK_BIAS-16]	! copy of output
1285	std		%f2, [%fp + STACK_BIAS-8]
1286
1287	srl		%l0, 3, %l0
1288	add		%fp, STACK_BIAS-16, %l7
1289	add		%i0, %l0, %i0	! original %i0+%i2&-15
1290	add		%i1, %l2, %i1	! original %i1+%i2&-15
1291	mov		0, %l0
1292	nop					! align
1293
1294.L128_xts_enstealing:
1295	ldub		[%i0 + %l0], %o0
1296	ldub		[%l7  + %l0], %o1
1297	dec		%i5
1298	stb		%o0, [%l7  + %l0]
1299	stb		%o1, [%i1 + %l0]
1300	brnz		%i5, .L128_xts_enstealing
1301	inc		%l0
1302
1303	mov		%l7, %i0
1304	sub		%i1, 16, %i1
1305	mov		0, %l0
1306	sub		%i1, %l2, %i1
1307	ba		.L128_xts_enloop	! one more time
1308	mov		1, %i2				! %i5 is 0
1309	ret
1310	restore
1311.type	aes128_t4_xts_encrypt,#function
1312.size	aes128_t4_xts_encrypt,.-aes128_t4_xts_encrypt
1313.globl	aes128_t4_xts_decrypt
1314.align	32
1315aes128_t4_xts_decrypt:
1316	save		%sp, -STACK_FRAME-16, %sp
1317	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1318
1319	mov		%i5, %o0
1320	add		%fp, STACK_BIAS-16, %o1
1321	call		aes_t4_encrypt
1322	mov		%i4, %o2
1323
1324	add		%fp, STACK_BIAS-16, %l7
1325	ldxa		[%l7]0x88, %g2
1326	add		%fp, STACK_BIAS-8, %l7
1327	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
1328
1329	sethi		%hi(0x76543210), %l7
1330	or		%l7, %lo(0x76543210), %l7
1331	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
1332
1333	prefetch	[%i0], 20
1334	prefetch	[%i0 + 63], 20
1335	call		_aes128_load_deckey
1336	and		%i2, 15,  %i5
1337	and		%i2, -16, %i2
1338	mov		0, %l7
1339	movrnz		%i5, 16,  %l7
1340	sub		%i2, %l7, %i2
1341
1342	sub		%i0, %i1, %l5	! %i0!=%i1
1343	and		%i0, 7, %l0
1344	andn		%i0, 7, %i0
1345	sll		%l0, 3, %l0
1346	mov		64, %l1
1347	mov		0xff, %l3
1348	sub		%l1, %l0, %l1
1349	and		%i1, 7, %l2
1350	cmp		%i2, 255
1351	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1352	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
1353	brnz,pn		%l5, .L128_xts_deblk !	%i0==%i1)
1354	srl		%l3, %l2, %l3
1355
1356	andcc		%i2, 16, %g0		! is number of blocks even?
1357	brz,pn		%i2, .L128_xts_desteal
1358	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1359	bz		%icc, .L128_xts_deloop2x
1360	srlx		%i2, 4, %i2
1361.L128_xts_deloop:
1362	ldx		[%i0 + 0], %o0
1363	brz,pt		%l0, 4f
1364	ldx		[%i0 + 8], %o1
1365
1366	ldx		[%i0 + 16], %o2
1367	sllx		%o0, %l0, %o0
1368	srlx		%o1, %l1, %g1
1369	sllx		%o1, %l0, %o1
1370	or		%g1, %o0, %o0
1371	srlx		%o2, %l1, %o2
1372	or		%o2, %o1, %o1
13734:
1374	.word	0x99b02302 !movxtod	%g2,%f12
1375	.word	0x9db02303 !movxtod	%g3,%f14
1376	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1377	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1378
1379	xor		%g4, %o0, %o0		! ^= rk[0]
1380	xor		%g5, %o1, %o1
1381	.word	0x81b02308 !movxtod	%o0,%f0
1382	.word	0x85b02309 !movxtod	%o1,%f2
1383
1384	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1385	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1386
1387	prefetch	[%i1 + 63], 22
1388	prefetch	[%i0 + 16+63], 20
1389	call		_aes128_decrypt_1x
1390	add		%i0, 16, %i0
1391
1392	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1393	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1394
1395	srax		%g3, 63, %l7		! next tweak value
1396	addcc		%g2, %g2, %g2
1397	and		%l7, 0x87, %l7
1398	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1399	xor		%l7, %g2, %g2
1400
1401	brnz,pn		%l2, 2f
1402	sub		%i2, 1, %i2
1403
1404	std		%f0, [%i1 + 0]
1405	std		%f2, [%i1 + 8]
1406	brnz,pt		%i2, .L128_xts_deloop2x
1407	add		%i1, 16, %i1
1408
1409	brnz,pn		%i5, .L128_xts_desteal
1410	nop
1411
1412	ret
1413	restore
1414
1415.align	16
14162:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1417						! and ~3x deterioration
1418						! in inp==out case
1419	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1420	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1421	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1422	stda		%f4, [%i1 + %l3]0xc0	! partial store
1423	std		%f6, [%i1 + 8]
1424	add		%i1, 16, %i1
1425	orn		%g0, %l3, %l3
1426	stda		%f8, [%i1 + %l3]0xc0	! partial store
1427
1428	brnz,pt		%i2, .L128_xts_deloop2x+4
1429	orn		%g0, %l3, %l3
1430
1431	brnz,pn		%i5, .L128_xts_desteal
1432	nop
1433
1434	ret
1435	restore
1436
1437!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1438.align	32
1439.L128_xts_deloop2x:
1440	ldx		[%i0 + 0], %o0
1441	ldx		[%i0 + 8], %o1
1442	ldx		[%i0 + 16], %o2
1443	brz,pt		%l0, 4f
1444	ldx		[%i0 + 24], %o3
1445
1446	ldx		[%i0 + 32], %o4
1447	sllx		%o0, %l0, %o0
1448	srlx		%o1, %l1, %g1
1449	or		%g1, %o0, %o0
1450	sllx		%o1, %l0, %o1
1451	srlx		%o2, %l1, %g1
1452	or		%g1, %o1, %o1
1453	sllx		%o2, %l0, %o2
1454	srlx		%o3, %l1, %g1
1455	or		%g1, %o2, %o2
1456	sllx		%o3, %l0, %o3
1457	srlx		%o4, %l1, %o4
1458	or		%o4, %o3, %o3
14594:
1460	.word	0x99b02302 !movxtod	%g2,%f12
1461	.word	0x9db02303 !movxtod	%g3,%f14
1462	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1463	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1464
1465	srax		%g3, 63, %l7		! next tweak value
1466	addcc		%g2, %g2, %g2
1467	and		%l7, 0x87, %l7
1468	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1469	xor		%l7, %g2, %g2
1470
1471	.word	0x91b02302 !movxtod	%g2,%f8
1472	.word	0x95b02303 !movxtod	%g3,%f10
1473	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1474	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1475
1476	xor		%g4, %o0, %o0		! ^= rk[0]
1477	xor		%g5, %o1, %o1
1478	xor		%g4, %o2, %o2		! ^= rk[0]
1479	xor		%g5, %o3, %o3
1480	.word	0x81b02308 !movxtod	%o0,%f0
1481	.word	0x85b02309 !movxtod	%o1,%f2
1482	.word	0x89b0230a !movxtod	%o2,%f4
1483	.word	0x8db0230b !movxtod	%o3,%f6
1484
1485	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1486	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1487	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1488	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1489
1490	prefetch	[%i1 + 63], 22
1491	prefetch	[%i0 + 32+63], 20
1492	call		_aes128_decrypt_2x
1493	add		%i0, 32, %i0
1494
1495	.word	0x91b02302 !movxtod	%g2,%f8
1496	.word	0x95b02303 !movxtod	%g3,%f10
1497
1498	srax		%g3, 63, %l7		! next tweak value
1499	addcc		%g2, %g2, %g2
1500	and		%l7, 0x87, %l7
1501	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1502	xor		%l7, %g2, %g2
1503
1504	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1505	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1506
1507	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1508	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1509	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1510	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1511
1512	brnz,pn		%l2, 2f
1513	sub		%i2, 2, %i2
1514
1515	std		%f0, [%i1 + 0]
1516	std		%f2, [%i1 + 8]
1517	std		%f4, [%i1 + 16]
1518	std		%f6, [%i1 + 24]
1519	brnz,pt		%i2, .L128_xts_deloop2x
1520	add		%i1, 32, %i1
1521
1522	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1523	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1524	brnz,pn		%i5, .L128_xts_desteal
1525	nop
1526
1527	ret
1528	restore
1529
1530.align	16
15312:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1532						! and ~3x deterioration
1533						! in inp==out case
1534	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1535	.word	0x95b00902 !faligndata	%f0,%f2,%f10
1536	.word	0x99b08904 !faligndata	%f2,%f4,%f12
1537	.word	0x9db10906 !faligndata	%f4,%f6,%f14
1538	.word	0x81b18906 !faligndata	%f6,%f6,%f0
1539
1540	stda		%f8, [%i1 + %l3]0xc0	! partial store
1541	std		%f10, [%i1 + 8]
1542	std		%f12, [%i1 + 16]
1543	std		%f14, [%i1 + 24]
1544	add		%i1, 32, %i1
1545	orn		%g0, %l3, %l3
1546	stda		%f0, [%i1 + %l3]0xc0	! partial store
1547
1548	brnz,pt		%i2, .L128_xts_deloop2x+4
1549	orn		%g0, %l3, %l3
1550
1551	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1552	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1553	brnz,pn		%i5, .L128_xts_desteal
1554	nop
1555
1556	ret
1557	restore
1558
1559!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1560.align	32
1561.L128_xts_deblk:
1562	add	%i1, %i2, %l5
1563	and	%l5, 63, %l5	! tail
1564	sub	%i2, %l5, %i2
1565	add	%l5, 15, %l5	! round up to 16n
1566	srlx	%i2, 4, %i2
1567	srl	%l5, 4, %l5
1568	sub	%i2, 1, %i2
1569	add	%l5, 1, %l5
1570
1571.L128_xts_deblk2x:
1572	ldx		[%i0 + 0], %o0
1573	ldx		[%i0 + 8], %o1
1574	ldx		[%i0 + 16], %o2
1575	brz,pt		%l0, 5f
1576	ldx		[%i0 + 24], %o3
1577
1578	ldx		[%i0 + 32], %o4
1579	sllx		%o0, %l0, %o0
1580	srlx		%o1, %l1, %g1
1581	or		%g1, %o0, %o0
1582	sllx		%o1, %l0, %o1
1583	srlx		%o2, %l1, %g1
1584	or		%g1, %o1, %o1
1585	sllx		%o2, %l0, %o2
1586	srlx		%o3, %l1, %g1
1587	or		%g1, %o2, %o2
1588	sllx		%o3, %l0, %o3
1589	srlx		%o4, %l1, %o4
1590	or		%o4, %o3, %o3
15915:
1592	.word	0x99b02302 !movxtod	%g2,%f12
1593	.word	0x9db02303 !movxtod	%g3,%f14
1594	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1595	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1596
1597	srax		%g3, 63, %l7		! next tweak value
1598	addcc		%g2, %g2, %g2
1599	and		%l7, 0x87, %l7
1600	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1601	xor		%l7, %g2, %g2
1602
1603	.word	0x91b02302 !movxtod	%g2,%f8
1604	.word	0x95b02303 !movxtod	%g3,%f10
1605	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1606	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1607
1608	xor		%g4, %o0, %o0		! ^= rk[0]
1609	xor		%g5, %o1, %o1
1610	xor		%g4, %o2, %o2		! ^= rk[0]
1611	xor		%g5, %o3, %o3
1612	.word	0x81b02308 !movxtod	%o0,%f0
1613	.word	0x85b02309 !movxtod	%o1,%f2
1614	.word	0x89b0230a !movxtod	%o2,%f4
1615	.word	0x8db0230b !movxtod	%o3,%f6
1616
1617	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1618	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1619	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1620	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1621
1622	prefetch	[%i0 + 32+63], 20
1623	call		_aes128_decrypt_2x
1624	add		%i0, 32, %i0
1625
1626	.word	0x91b02302 !movxtod	%g2,%f8
1627	.word	0x95b02303 !movxtod	%g3,%f10
1628
1629	srax		%g3, 63, %l7		! next tweak value
1630	addcc		%g2, %g2, %g2
1631	and		%l7, 0x87, %l7
1632	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1633	xor		%l7, %g2, %g2
1634
1635	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1636	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1637
1638	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1639	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1640	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1641	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1642
1643	subcc		%i2, 2, %i2
1644	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1645	add		%i1, 8, %i1
1646	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1647	add		%i1, 8, %i1
1648	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1649	add		%i1, 8, %i1
1650	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1651	bgu,pt		SIZE_T_CC, .L128_xts_deblk2x
1652	add		%i1, 8, %i1
1653
1654	add		%l5, %i2, %i2
1655	andcc		%i2, 1, %g0		! is number of blocks even?
1656	membar		#StoreLoad|#StoreStore
1657	bnz,pt		%icc, .L128_xts_deloop
1658	srl		%i2, 0, %i2
1659	brnz,pn		%i2, .L128_xts_deloop2x
1660	nop
1661
1662	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1663	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1664	brnz,pn		%i5, .L128_xts_desteal
1665	nop
1666
1667	ret
1668	restore
1669!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1670.align	32
1671.L128_xts_desteal:
1672	ldx		[%i0 + 0], %o0
1673	brz,pt		%l0, 8f
1674	ldx		[%i0 + 8], %o1
1675
1676	ldx		[%i0 + 16], %o2
1677	sllx		%o0, %l0, %o0
1678	srlx		%o1, %l1, %g1
1679	sllx		%o1, %l0, %o1
1680	or		%g1, %o0, %o0
1681	srlx		%o2, %l1, %o2
1682	or		%o2, %o1, %o1
16838:
1684	srax		%g3, 63, %l7		! next tweak value
1685	addcc		%g2, %g2, %o2
1686	and		%l7, 0x87, %l7
1687	.word	0x97b0c223 !addxc	%g3,%g3,%o3
1688	xor		%l7, %o2, %o2
1689
1690	.word	0x99b0230a !movxtod	%o2,%f12
1691	.word	0x9db0230b !movxtod	%o3,%f14
1692	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1693	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1694
1695	xor		%g4, %o0, %o0		! ^= rk[0]
1696	xor		%g5, %o1, %o1
1697	.word	0x81b02308 !movxtod	%o0,%f0
1698	.word	0x85b02309 !movxtod	%o1,%f2
1699
1700	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1701	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1702
1703	call		_aes128_decrypt_1x
1704	add		%i0, 16, %i0
1705
1706	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1707	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1708
1709	std		%f0, [%fp + STACK_BIAS-16]
1710	std		%f2, [%fp + STACK_BIAS-8]
1711
1712	srl		%l0, 3, %l0
1713	add		%fp, STACK_BIAS-16, %l7
1714	add		%i0, %l0, %i0	! original %i0+%i2&-15
1715	add		%i1, %l2, %i1	! original %i1+%i2&-15
1716	mov		0, %l0
1717	add		%i1, 16, %i1
1718	nop					! align
1719
1720.L128_xts_destealing:
1721	ldub		[%i0 + %l0], %o0
1722	ldub		[%l7  + %l0], %o1
1723	dec		%i5
1724	stb		%o0, [%l7  + %l0]
1725	stb		%o1, [%i1 + %l0]
1726	brnz		%i5, .L128_xts_destealing
1727	inc		%l0
1728
1729	mov		%l7, %i0
1730	sub		%i1, 16, %i1
1731	mov		0, %l0
1732	sub		%i1, %l2, %i1
1733	ba		.L128_xts_deloop	! one more time
1734	mov		1, %i2				! %i5 is 0
1735	ret
1736	restore
1737.type	aes128_t4_xts_decrypt,#function
1738.size	aes128_t4_xts_decrypt,.-aes128_t4_xts_decrypt
1739.globl	aes128_t4_cbc_decrypt
1740.align	32
1741aes128_t4_cbc_decrypt:
1742	save		%sp, -STACK_FRAME, %sp
1743	cmp		%i2, 0
1744	be,pn		SIZE_T_CC, .L128_cbc_dec_abort
1745	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1746	sub		%i0, %i1, %l5	! %i0!=%i1
1747	ld		[%i4 + 0], %f12	! load ivec
1748	ld		[%i4 + 4], %f13
1749	ld		[%i4 + 8], %f14
1750	ld		[%i4 + 12], %f15
1751	prefetch	[%i0], 20
1752	prefetch	[%i0 + 63], 20
1753	call		_aes128_load_deckey
1754	and		%i0, 7, %l0
1755	andn		%i0, 7, %i0
1756	sll		%l0, 3, %l0
1757	mov		64, %l1
1758	mov		0xff, %l3
1759	sub		%l1, %l0, %l1
1760	and		%i1, 7, %l2
1761	cmp		%i2, 255
1762	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1763	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
1764	brnz,pn		%l5, .L128cbc_dec_blk	!	%i0==%i1)
1765	srl		%l3, %l2, %l3
1766
1767	andcc		%i2, 16, %g0		! is number of blocks even?
1768	srlx		%i2, 4, %i2
1769	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1770	bz		%icc, .L128_cbc_dec_loop2x
1771	prefetch	[%i1], 22
1772.L128_cbc_dec_loop:
1773	ldx		[%i0 + 0], %o0
1774	brz,pt		%l0, 4f
1775	ldx		[%i0 + 8], %o1
1776
1777	ldx		[%i0 + 16], %o2
1778	sllx		%o0, %l0, %o0
1779	srlx		%o1, %l1, %g1
1780	sllx		%o1, %l0, %o1
1781	or		%g1, %o0, %o0
1782	srlx		%o2, %l1, %o2
1783	or		%o2, %o1, %o1
17844:
1785	xor		%g4, %o0, %o2		! ^= rk[0]
1786	xor		%g5, %o1, %o3
1787	.word	0x81b0230a !movxtod	%o2,%f0
1788	.word	0x85b0230b !movxtod	%o3,%f2
1789
1790	prefetch	[%i1 + 63], 22
1791	prefetch	[%i0 + 16+63], 20
1792	call		_aes128_decrypt_1x
1793	add		%i0, 16, %i0
1794
1795	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1796	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1797	.word	0x99b02308 !movxtod	%o0,%f12
1798	.word	0x9db02309 !movxtod	%o1,%f14
1799
1800	brnz,pn		%l2, 2f
1801	sub		%i2, 1, %i2
1802
1803	std		%f0, [%i1 + 0]
1804	std		%f2, [%i1 + 8]
1805	brnz,pt		%i2, .L128_cbc_dec_loop2x
1806	add		%i1, 16, %i1
1807	st		%f12, [%i4 + 0]
1808	st		%f13, [%i4 + 4]
1809	st		%f14, [%i4 + 8]
1810	st		%f15, [%i4 + 12]
1811.L128_cbc_dec_abort:
1812	ret
1813	restore
1814
1815.align	16
18162:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1817						! and ~3x deterioration
1818						! in inp==out case
1819	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1820	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1821	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1822
1823	stda		%f4, [%i1 + %l3]0xc0	! partial store
1824	std		%f6, [%i1 + 8]
1825	add		%i1, 16, %i1
1826	orn		%g0, %l3, %l3
1827	stda		%f8, [%i1 + %l3]0xc0	! partial store
1828
1829	brnz,pt		%i2, .L128_cbc_dec_loop2x+4
1830	orn		%g0, %l3, %l3
1831	st		%f12, [%i4 + 0]
1832	st		%f13, [%i4 + 4]
1833	st		%f14, [%i4 + 8]
1834	st		%f15, [%i4 + 12]
1835	ret
1836	restore
1837
1838!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1839.align	32
1840.L128_cbc_dec_loop2x:
1841	ldx		[%i0 + 0], %o0
1842	ldx		[%i0 + 8], %o1
1843	ldx		[%i0 + 16], %o2
1844	brz,pt		%l0, 4f
1845	ldx		[%i0 + 24], %o3
1846
1847	ldx		[%i0 + 32], %o4
1848	sllx		%o0, %l0, %o0
1849	srlx		%o1, %l1, %g1
1850	or		%g1, %o0, %o0
1851	sllx		%o1, %l0, %o1
1852	srlx		%o2, %l1, %g1
1853	or		%g1, %o1, %o1
1854	sllx		%o2, %l0, %o2
1855	srlx		%o3, %l1, %g1
1856	or		%g1, %o2, %o2
1857	sllx		%o3, %l0, %o3
1858	srlx		%o4, %l1, %o4
1859	or		%o4, %o3, %o3
18604:
1861	xor		%g4, %o0, %o4		! ^= rk[0]
1862	xor		%g5, %o1, %o5
1863	.word	0x81b0230c !movxtod	%o4,%f0
1864	.word	0x85b0230d !movxtod	%o5,%f2
1865	xor		%g4, %o2, %o4
1866	xor		%g5, %o3, %o5
1867	.word	0x89b0230c !movxtod	%o4,%f4
1868	.word	0x8db0230d !movxtod	%o5,%f6
1869
1870	prefetch	[%i1 + 63], 22
1871	prefetch	[%i0 + 32+63], 20
1872	call		_aes128_decrypt_2x
1873	add		%i0, 32, %i0
1874
1875	.word	0x91b02308 !movxtod	%o0,%f8
1876	.word	0x95b02309 !movxtod	%o1,%f10
1877	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1878	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1879	.word	0x99b0230a !movxtod	%o2,%f12
1880	.word	0x9db0230b !movxtod	%o3,%f14
1881	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1882	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1883
1884	brnz,pn		%l2, 2f
1885	sub		%i2, 2, %i2
1886
1887	std		%f0, [%i1 + 0]
1888	std		%f2, [%i1 + 8]
1889	std		%f4, [%i1 + 16]
1890	std		%f6, [%i1 + 24]
1891	brnz,pt		%i2, .L128_cbc_dec_loop2x
1892	add		%i1, 32, %i1
1893	st		%f12, [%i4 + 0]
1894	st		%f13, [%i4 + 4]
1895	st		%f14, [%i4 + 8]
1896	st		%f15, [%i4 + 12]
1897	ret
1898	restore
1899
1900.align	16
19012:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1902						! and ~3x deterioration
1903						! in inp==out case
1904	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1905	.word	0x81b00902 !faligndata	%f0,%f2,%f0
1906	.word	0x85b08904 !faligndata	%f2,%f4,%f2
1907	.word	0x89b10906 !faligndata	%f4,%f6,%f4
1908	.word	0x8db18906 !faligndata	%f6,%f6,%f6
1909	stda		%f8, [%i1 + %l3]0xc0	! partial store
1910	std		%f0, [%i1 + 8]
1911	std		%f2, [%i1 + 16]
1912	std		%f4, [%i1 + 24]
1913	add		%i1, 32, %i1
1914	orn		%g0, %l3, %l3
1915	stda		%f6, [%i1 + %l3]0xc0	! partial store
1916
1917	brnz,pt		%i2, .L128_cbc_dec_loop2x+4
1918	orn		%g0, %l3, %l3
1919	st		%f12, [%i4 + 0]
1920	st		%f13, [%i4 + 4]
1921	st		%f14, [%i4 + 8]
1922	st		%f15, [%i4 + 12]
1923	ret
1924	restore
1925
1926!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1927.align	32
1928.L128cbc_dec_blk:
1929	add	%i1, %i2, %l5
1930	and	%l5, 63, %l5	! tail
1931	sub	%i2, %l5, %i2
1932	add	%l5, 15, %l5	! round up to 16n
1933	srlx	%i2, 4, %i2
1934	srl	%l5, 4, %l5
1935	sub	%i2, 1, %i2
1936	add	%l5, 1, %l5
1937
1938.L128_cbc_dec_blk_loop2x:
1939	ldx		[%i0 + 0], %o0
1940	ldx		[%i0 + 8], %o1
1941	ldx		[%i0 + 16], %o2
1942	brz,pt		%l0, 5f
1943	ldx		[%i0 + 24], %o3
1944
1945	ldx		[%i0 + 32], %o4
1946	sllx		%o0, %l0, %o0
1947	srlx		%o1, %l1, %g1
1948	or		%g1, %o0, %o0
1949	sllx		%o1, %l0, %o1
1950	srlx		%o2, %l1, %g1
1951	or		%g1, %o1, %o1
1952	sllx		%o2, %l0, %o2
1953	srlx		%o3, %l1, %g1
1954	or		%g1, %o2, %o2
1955	sllx		%o3, %l0, %o3
1956	srlx		%o4, %l1, %o4
1957	or		%o4, %o3, %o3
19585:
1959	xor		%g4, %o0, %o4		! ^= rk[0]
1960	xor		%g5, %o1, %o5
1961	.word	0x81b0230c !movxtod	%o4,%f0
1962	.word	0x85b0230d !movxtod	%o5,%f2
1963	xor		%g4, %o2, %o4
1964	xor		%g5, %o3, %o5
1965	.word	0x89b0230c !movxtod	%o4,%f4
1966	.word	0x8db0230d !movxtod	%o5,%f6
1967
1968	prefetch	[%i0 + 32+63], 20
1969	call		_aes128_decrypt_2x
1970	add		%i0, 32, %i0
1971	subcc		%i2, 2, %i2
1972
1973	.word	0x91b02308 !movxtod	%o0,%f8
1974	.word	0x95b02309 !movxtod	%o1,%f10
1975	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1976	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1977	.word	0x99b0230a !movxtod	%o2,%f12
1978	.word	0x9db0230b !movxtod	%o3,%f14
1979	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1980	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1981
1982	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1983	add		%i1, 8, %i1
1984	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1985	add		%i1, 8, %i1
1986	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1987	add		%i1, 8, %i1
1988	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1989	bgu,pt		SIZE_T_CC, .L128_cbc_dec_blk_loop2x
1990	add		%i1, 8, %i1
1991
1992	add		%l5, %i2, %i2
1993	andcc		%i2, 1, %g0		! is number of blocks even?
1994	membar		#StoreLoad|#StoreStore
1995	bnz,pt		%icc, .L128_cbc_dec_loop
1996	srl		%i2, 0, %i2
1997	brnz,pn		%i2, .L128_cbc_dec_loop2x
1998	nop
1999	st		%f12, [%i4 + 0]	! write out ivec
2000	st		%f13, [%i4 + 4]
2001	st		%f14, [%i4 + 8]
2002	st		%f15, [%i4 + 12]
2003	ret
2004	restore
2005.type	aes128_t4_cbc_decrypt,#function
2006.size	aes128_t4_cbc_decrypt,.-aes128_t4_cbc_decrypt
2007.align	32
2008_aes128_decrypt_1x:
2009	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
2010	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
2011	.word	0x80cd0444 !aes_dround01	%f20,%f4,%f2,%f0
2012	.word	0x84cd8464 !aes_dround23	%f22,%f4,%f2,%f2
2013	.word	0x88ce0440 !aes_dround01	%f24,%f0,%f2,%f4
2014	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
2015	.word	0x80cf0444 !aes_dround01	%f28,%f4,%f2,%f0
2016	.word	0x84cf8464 !aes_dround23	%f30,%f4,%f2,%f2
2017	.word	0x88c84440 !aes_dround01	%f32,%f0,%f2,%f4
2018	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
2019	.word	0x80c94444 !aes_dround01	%f36,%f4,%f2,%f0
2020	.word	0x84c9c464 !aes_dround23	%f38,%f4,%f2,%f2
2021	.word	0x88ca4440 !aes_dround01	%f40,%f0,%f2,%f4
2022	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
2023	.word	0x80cb4444 !aes_dround01	%f44,%f4,%f2,%f0
2024	.word	0x84cbc464 !aes_dround23	%f46,%f4,%f2,%f2
2025	.word	0x88cc4440 !aes_dround01	%f48,%f0,%f2,%f4
2026	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
2027	.word	0x80cd44c4 !aes_dround01_l	%f52,%f4,%f2,%f0
2028	retl
2029	.word	0x84cdc4e4 !aes_dround23_l	%f54,%f4,%f2,%f2
2030.type	_aes128_decrypt_1x,#function
2031.size	_aes128_decrypt_1x,.-_aes128_decrypt_1x
2032
2033.align	32
2034_aes128_decrypt_2x:
2035	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
2036	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
2037	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
2038	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
2039	.word	0x80cd0448 !aes_dround01	%f20,%f8,%f2,%f0
2040	.word	0x84cd8468 !aes_dround23	%f22,%f8,%f2,%f2
2041	.word	0x88cd0c4a !aes_dround01	%f20,%f10,%f6,%f4
2042	.word	0x8ccd8c6a !aes_dround23	%f22,%f10,%f6,%f6
2043	.word	0x90ce0440 !aes_dround01	%f24,%f0,%f2,%f8
2044	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
2045	.word	0x94ce0c44 !aes_dround01	%f24,%f4,%f6,%f10
2046	.word	0x8cce8c64 !aes_dround23	%f26,%f4,%f6,%f6
2047	.word	0x80cf0448 !aes_dround01	%f28,%f8,%f2,%f0
2048	.word	0x84cf8468 !aes_dround23	%f30,%f8,%f2,%f2
2049	.word	0x88cf0c4a !aes_dround01	%f28,%f10,%f6,%f4
2050	.word	0x8ccf8c6a !aes_dround23	%f30,%f10,%f6,%f6
2051	.word	0x90c84440 !aes_dround01	%f32,%f0,%f2,%f8
2052	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
2053	.word	0x94c84c44 !aes_dround01	%f32,%f4,%f6,%f10
2054	.word	0x8cc8cc64 !aes_dround23	%f34,%f4,%f6,%f6
2055	.word	0x80c94448 !aes_dround01	%f36,%f8,%f2,%f0
2056	.word	0x84c9c468 !aes_dround23	%f38,%f8,%f2,%f2
2057	.word	0x88c94c4a !aes_dround01	%f36,%f10,%f6,%f4
2058	.word	0x8cc9cc6a !aes_dround23	%f38,%f10,%f6,%f6
2059	.word	0x90ca4440 !aes_dround01	%f40,%f0,%f2,%f8
2060	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
2061	.word	0x94ca4c44 !aes_dround01	%f40,%f4,%f6,%f10
2062	.word	0x8ccacc64 !aes_dround23	%f42,%f4,%f6,%f6
2063	.word	0x80cb4448 !aes_dround01	%f44,%f8,%f2,%f0
2064	.word	0x84cbc468 !aes_dround23	%f46,%f8,%f2,%f2
2065	.word	0x88cb4c4a !aes_dround01	%f44,%f10,%f6,%f4
2066	.word	0x8ccbcc6a !aes_dround23	%f46,%f10,%f6,%f6
2067	.word	0x90cc4440 !aes_dround01	%f48,%f0,%f2,%f8
2068	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
2069	.word	0x94cc4c44 !aes_dround01	%f48,%f4,%f6,%f10
2070	.word	0x8ccccc64 !aes_dround23	%f50,%f4,%f6,%f6
2071	.word	0x80cd44c8 !aes_dround01_l	%f52,%f8,%f2,%f0
2072	.word	0x84cdc4e8 !aes_dround23_l	%f54,%f8,%f2,%f2
2073	.word	0x88cd4cca !aes_dround01_l	%f52,%f10,%f6,%f4
2074	retl
2075	.word	0x8ccdccea !aes_dround23_l	%f54,%f10,%f6,%f6
2076.type	_aes128_decrypt_2x,#function
2077.size	_aes128_decrypt_2x,.-_aes128_decrypt_2x
2078.align	32
2079_aes192_encrypt_1x:
2080	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
2081	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2082	.word	0x80cd0404 !aes_eround01	%f20,%f4,%f2,%f0
2083	.word	0x84cd8424 !aes_eround23	%f22,%f4,%f2,%f2
2084	.word	0x88ce0400 !aes_eround01	%f24,%f0,%f2,%f4
2085	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2086	.word	0x80cf0404 !aes_eround01	%f28,%f4,%f2,%f0
2087	.word	0x84cf8424 !aes_eround23	%f30,%f4,%f2,%f2
2088	.word	0x88c84400 !aes_eround01	%f32,%f0,%f2,%f4
2089	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2090	.word	0x80c94404 !aes_eround01	%f36,%f4,%f2,%f0
2091	.word	0x84c9c424 !aes_eround23	%f38,%f4,%f2,%f2
2092	.word	0x88ca4400 !aes_eround01	%f40,%f0,%f2,%f4
2093	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2094	.word	0x80cb4404 !aes_eround01	%f44,%f4,%f2,%f0
2095	.word	0x84cbc424 !aes_eround23	%f46,%f4,%f2,%f2
2096	.word	0x88cc4400 !aes_eround01	%f48,%f0,%f2,%f4
2097	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2098	.word	0x80cd4404 !aes_eround01	%f52,%f4,%f2,%f0
2099	.word	0x84cdc424 !aes_eround23	%f54,%f4,%f2,%f2
2100	.word	0x88ce4400 !aes_eround01	%f56,%f0,%f2,%f4
2101	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2102	.word	0x80cf4484 !aes_eround01_l	%f60,%f4,%f2,%f0
2103	retl
2104	.word	0x84cfc4a4 !aes_eround23_l	%f62,%f4,%f2,%f2
2105.type	_aes192_encrypt_1x,#function
2106.size	_aes192_encrypt_1x,.-_aes192_encrypt_1x
2107
2108.align	32
2109_aes192_encrypt_2x:
2110	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
2111	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2112	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
2113	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
2114	.word	0x80cd0408 !aes_eround01	%f20,%f8,%f2,%f0
2115	.word	0x84cd8428 !aes_eround23	%f22,%f8,%f2,%f2
2116	.word	0x88cd0c0a !aes_eround01	%f20,%f10,%f6,%f4
2117	.word	0x8ccd8c2a !aes_eround23	%f22,%f10,%f6,%f6
2118	.word	0x90ce0400 !aes_eround01	%f24,%f0,%f2,%f8
2119	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2120	.word	0x94ce0c04 !aes_eround01	%f24,%f4,%f6,%f10
2121	.word	0x8cce8c24 !aes_eround23	%f26,%f4,%f6,%f6
2122	.word	0x80cf0408 !aes_eround01	%f28,%f8,%f2,%f0
2123	.word	0x84cf8428 !aes_eround23	%f30,%f8,%f2,%f2
2124	.word	0x88cf0c0a !aes_eround01	%f28,%f10,%f6,%f4
2125	.word	0x8ccf8c2a !aes_eround23	%f30,%f10,%f6,%f6
2126	.word	0x90c84400 !aes_eround01	%f32,%f0,%f2,%f8
2127	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2128	.word	0x94c84c04 !aes_eround01	%f32,%f4,%f6,%f10
2129	.word	0x8cc8cc24 !aes_eround23	%f34,%f4,%f6,%f6
2130	.word	0x80c94408 !aes_eround01	%f36,%f8,%f2,%f0
2131	.word	0x84c9c428 !aes_eround23	%f38,%f8,%f2,%f2
2132	.word	0x88c94c0a !aes_eround01	%f36,%f10,%f6,%f4
2133	.word	0x8cc9cc2a !aes_eround23	%f38,%f10,%f6,%f6
2134	.word	0x90ca4400 !aes_eround01	%f40,%f0,%f2,%f8
2135	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2136	.word	0x94ca4c04 !aes_eround01	%f40,%f4,%f6,%f10
2137	.word	0x8ccacc24 !aes_eround23	%f42,%f4,%f6,%f6
2138	.word	0x80cb4408 !aes_eround01	%f44,%f8,%f2,%f0
2139	.word	0x84cbc428 !aes_eround23	%f46,%f8,%f2,%f2
2140	.word	0x88cb4c0a !aes_eround01	%f44,%f10,%f6,%f4
2141	.word	0x8ccbcc2a !aes_eround23	%f46,%f10,%f6,%f6
2142	.word	0x90cc4400 !aes_eround01	%f48,%f0,%f2,%f8
2143	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2144	.word	0x94cc4c04 !aes_eround01	%f48,%f4,%f6,%f10
2145	.word	0x8ccccc24 !aes_eround23	%f50,%f4,%f6,%f6
2146	.word	0x80cd4408 !aes_eround01	%f52,%f8,%f2,%f0
2147	.word	0x84cdc428 !aes_eround23	%f54,%f8,%f2,%f2
2148	.word	0x88cd4c0a !aes_eround01	%f52,%f10,%f6,%f4
2149	.word	0x8ccdcc2a !aes_eround23	%f54,%f10,%f6,%f6
2150	.word	0x90ce4400 !aes_eround01	%f56,%f0,%f2,%f8
2151	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2152	.word	0x94ce4c04 !aes_eround01	%f56,%f4,%f6,%f10
2153	.word	0x8ccecc24 !aes_eround23	%f58,%f4,%f6,%f6
2154	.word	0x80cf4488 !aes_eround01_l	%f60,%f8,%f2,%f0
2155	.word	0x84cfc4a8 !aes_eround23_l	%f62,%f8,%f2,%f2
2156	.word	0x88cf4c8a !aes_eround01_l	%f60,%f10,%f6,%f4
2157	retl
2158	.word	0x8ccfccaa !aes_eround23_l	%f62,%f10,%f6,%f6
2159.type	_aes192_encrypt_2x,#function
2160.size	_aes192_encrypt_2x,.-_aes192_encrypt_2x
2161
2162.align	32
2163_aes256_encrypt_1x:
2164	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
2165	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2166	ldd		[%i3 + 208], %f16
2167	ldd		[%i3 + 216], %f18
2168	.word	0x80cd0404 !aes_eround01	%f20,%f4,%f2,%f0
2169	.word	0x84cd8424 !aes_eround23	%f22,%f4,%f2,%f2
2170	ldd		[%i3 + 224], %f20
2171	ldd		[%i3 + 232], %f22
2172	.word	0x88ce0400 !aes_eround01	%f24,%f0,%f2,%f4
2173	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2174	.word	0x80cf0404 !aes_eround01	%f28,%f4,%f2,%f0
2175	.word	0x84cf8424 !aes_eround23	%f30,%f4,%f2,%f2
2176	.word	0x88c84400 !aes_eround01	%f32,%f0,%f2,%f4
2177	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2178	.word	0x80c94404 !aes_eround01	%f36,%f4,%f2,%f0
2179	.word	0x84c9c424 !aes_eround23	%f38,%f4,%f2,%f2
2180	.word	0x88ca4400 !aes_eround01	%f40,%f0,%f2,%f4
2181	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2182	.word	0x80cb4404 !aes_eround01	%f44,%f4,%f2,%f0
2183	.word	0x84cbc424 !aes_eround23	%f46,%f4,%f2,%f2
2184	.word	0x88cc4400 !aes_eround01	%f48,%f0,%f2,%f4
2185	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2186	.word	0x80cd4404 !aes_eround01	%f52,%f4,%f2,%f0
2187	.word	0x84cdc424 !aes_eround23	%f54,%f4,%f2,%f2
2188	.word	0x88ce4400 !aes_eround01	%f56,%f0,%f2,%f4
2189	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2190	.word	0x80cf4404 !aes_eround01	%f60,%f4,%f2,%f0
2191	.word	0x84cfc424 !aes_eround23	%f62,%f4,%f2,%f2
2192	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
2193	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2194	ldd		[%i3 + 16], %f16
2195	ldd		[%i3 + 24], %f18
2196	.word	0x80cd0484 !aes_eround01_l	%f20,%f4,%f2,%f0
2197	.word	0x84cd84a4 !aes_eround23_l	%f22,%f4,%f2,%f2
2198	ldd		[%i3 + 32], %f20
2199	retl
2200	ldd		[%i3 + 40], %f22
2201.type	_aes256_encrypt_1x,#function
2202.size	_aes256_encrypt_1x,.-_aes256_encrypt_1x
2203
2204.align	32
2205_aes256_encrypt_2x:
2206	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
2207	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2208	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
2209	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
2210	ldd		[%i3 + 208], %f16
2211	ldd		[%i3 + 216], %f18
2212	.word	0x80cd0408 !aes_eround01	%f20,%f8,%f2,%f0
2213	.word	0x84cd8428 !aes_eround23	%f22,%f8,%f2,%f2
2214	.word	0x88cd0c0a !aes_eround01	%f20,%f10,%f6,%f4
2215	.word	0x8ccd8c2a !aes_eround23	%f22,%f10,%f6,%f6
2216	ldd		[%i3 + 224], %f20
2217	ldd		[%i3 + 232], %f22
2218	.word	0x90ce0400 !aes_eround01	%f24,%f0,%f2,%f8
2219	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2220	.word	0x94ce0c04 !aes_eround01	%f24,%f4,%f6,%f10
2221	.word	0x8cce8c24 !aes_eround23	%f26,%f4,%f6,%f6
2222	.word	0x80cf0408 !aes_eround01	%f28,%f8,%f2,%f0
2223	.word	0x84cf8428 !aes_eround23	%f30,%f8,%f2,%f2
2224	.word	0x88cf0c0a !aes_eround01	%f28,%f10,%f6,%f4
2225	.word	0x8ccf8c2a !aes_eround23	%f30,%f10,%f6,%f6
2226	.word	0x90c84400 !aes_eround01	%f32,%f0,%f2,%f8
2227	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2228	.word	0x94c84c04 !aes_eround01	%f32,%f4,%f6,%f10
2229	.word	0x8cc8cc24 !aes_eround23	%f34,%f4,%f6,%f6
2230	.word	0x80c94408 !aes_eround01	%f36,%f8,%f2,%f0
2231	.word	0x84c9c428 !aes_eround23	%f38,%f8,%f2,%f2
2232	.word	0x88c94c0a !aes_eround01	%f36,%f10,%f6,%f4
2233	.word	0x8cc9cc2a !aes_eround23	%f38,%f10,%f6,%f6
2234	.word	0x90ca4400 !aes_eround01	%f40,%f0,%f2,%f8
2235	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2236	.word	0x94ca4c04 !aes_eround01	%f40,%f4,%f6,%f10
2237	.word	0x8ccacc24 !aes_eround23	%f42,%f4,%f6,%f6
2238	.word	0x80cb4408 !aes_eround01	%f44,%f8,%f2,%f0
2239	.word	0x84cbc428 !aes_eround23	%f46,%f8,%f2,%f2
2240	.word	0x88cb4c0a !aes_eround01	%f44,%f10,%f6,%f4
2241	.word	0x8ccbcc2a !aes_eround23	%f46,%f10,%f6,%f6
2242	.word	0x90cc4400 !aes_eround01	%f48,%f0,%f2,%f8
2243	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2244	.word	0x94cc4c04 !aes_eround01	%f48,%f4,%f6,%f10
2245	.word	0x8ccccc24 !aes_eround23	%f50,%f4,%f6,%f6
2246	.word	0x80cd4408 !aes_eround01	%f52,%f8,%f2,%f0
2247	.word	0x84cdc428 !aes_eround23	%f54,%f8,%f2,%f2
2248	.word	0x88cd4c0a !aes_eround01	%f52,%f10,%f6,%f4
2249	.word	0x8ccdcc2a !aes_eround23	%f54,%f10,%f6,%f6
2250	.word	0x90ce4400 !aes_eround01	%f56,%f0,%f2,%f8
2251	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2252	.word	0x94ce4c04 !aes_eround01	%f56,%f4,%f6,%f10
2253	.word	0x8ccecc24 !aes_eround23	%f58,%f4,%f6,%f6
2254	.word	0x80cf4408 !aes_eround01	%f60,%f8,%f2,%f0
2255	.word	0x84cfc428 !aes_eround23	%f62,%f8,%f2,%f2
2256	.word	0x88cf4c0a !aes_eround01	%f60,%f10,%f6,%f4
2257	.word	0x8ccfcc2a !aes_eround23	%f62,%f10,%f6,%f6
2258	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
2259	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2260	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
2261	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
2262	ldd		[%i3 + 16], %f16
2263	ldd		[%i3 + 24], %f18
2264	.word	0x80cd0488 !aes_eround01_l	%f20,%f8,%f2,%f0
2265	.word	0x84cd84a8 !aes_eround23_l	%f22,%f8,%f2,%f2
2266	.word	0x88cd0c8a !aes_eround01_l	%f20,%f10,%f6,%f4
2267	.word	0x8ccd8caa !aes_eround23_l	%f22,%f10,%f6,%f6
2268	ldd		[%i3 + 32], %f20
2269	retl
2270	ldd		[%i3 + 40], %f22
2271.type	_aes256_encrypt_2x,#function
2272.size	_aes256_encrypt_2x,.-_aes256_encrypt_2x
2273
2274.align	32
2275_aes192_loadkey:
2276	ldx		[%i3 + 0], %g4
2277	ldx		[%i3 + 8], %g5
2278	ldd		[%i3 + 16], %f16
2279	ldd		[%i3 + 24], %f18
2280	ldd		[%i3 + 32], %f20
2281	ldd		[%i3 + 40], %f22
2282	ldd		[%i3 + 48], %f24
2283	ldd		[%i3 + 56], %f26
2284	ldd		[%i3 + 64], %f28
2285	ldd		[%i3 + 72], %f30
2286	ldd		[%i3 + 80], %f32
2287	ldd		[%i3 + 88], %f34
2288	ldd		[%i3 + 96], %f36
2289	ldd		[%i3 + 104], %f38
2290	ldd		[%i3 + 112], %f40
2291	ldd		[%i3 + 120], %f42
2292	ldd		[%i3 + 128], %f44
2293	ldd		[%i3 + 136], %f46
2294	ldd		[%i3 + 144], %f48
2295	ldd		[%i3 + 152], %f50
2296	ldd		[%i3 + 160], %f52
2297	ldd		[%i3 + 168], %f54
2298	ldd		[%i3 + 176], %f56
2299	ldd		[%i3 + 184], %f58
2300	ldd		[%i3 + 192], %f60
2301	ldd		[%i3 + 200], %f62
2302	retl
2303	nop
2304.type	_aes192_loadkey,#function
2305.size	_aes192_loadkey,.-_aes192_loadkey
2306_aes256_loadkey=_aes192_loadkey
2307_aes192_load_enckey=_aes192_loadkey
2308_aes192_load_deckey=_aes192_loadkey
2309_aes256_load_enckey=_aes192_loadkey
2310_aes256_load_deckey=_aes192_loadkey
2311.globl	aes256_t4_cbc_encrypt
2312.align	32
2313aes256_t4_cbc_encrypt:
2314	save		%sp, -STACK_FRAME, %sp
2315	cmp		%i2, 0
2316	be,pn		SIZE_T_CC, .L256_cbc_enc_abort
2317	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2318	sub		%i0, %i1, %l5	! %i0!=%i1
2319	ld		[%i4 + 0], %f0
2320	ld		[%i4 + 4], %f1
2321	ld		[%i4 + 8], %f2
2322	ld		[%i4 + 12], %f3
2323	prefetch	[%i0], 20
2324	prefetch	[%i0 + 63], 20
2325	call		_aes256_load_enckey
2326	and		%i0, 7, %l0
2327	andn		%i0, 7, %i0
2328	sll		%l0, 3, %l0
2329	mov		64, %l1
2330	mov		0xff, %l3
2331	sub		%l1, %l0, %l1
2332	and		%i1, 7, %l2
2333	cmp		%i2, 127
2334	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2335	movleu		SIZE_T_CC, 0, %l5	!	%i2<128 ||
2336	brnz,pn		%l5, .L256cbc_enc_blk	!	%i0==%i1)
2337	srl		%l3, %l2, %l3
2338
2339	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2340	srlx		%i2, 4, %i2
2341	prefetch	[%i1], 22
2342
2343.L256_cbc_enc_loop:
2344	ldx		[%i0 + 0], %o0
2345	brz,pt		%l0, 4f
2346	ldx		[%i0 + 8], %o1
2347
2348	ldx		[%i0 + 16], %o2
2349	sllx		%o0, %l0, %o0
2350	srlx		%o1, %l1, %g1
2351	sllx		%o1, %l0, %o1
2352	or		%g1, %o0, %o0
2353	srlx		%o2, %l1, %o2
2354	or		%o2, %o1, %o1
23554:
2356	xor		%g4, %o0, %o0		! ^= rk[0]
2357	xor		%g5, %o1, %o1
2358	.word	0x99b02308 !movxtod	%o0,%f12
2359	.word	0x9db02309 !movxtod	%o1,%f14
2360
2361	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2362	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2363	prefetch	[%i1 + 63], 22
2364	prefetch	[%i0 + 16+63], 20
2365	call		_aes256_encrypt_1x
2366	add		%i0, 16, %i0
2367
2368	brnz,pn		%l2, 2f
2369	sub		%i2, 1, %i2
2370
2371	std		%f0, [%i1 + 0]
2372	std		%f2, [%i1 + 8]
2373	brnz,pt		%i2, .L256_cbc_enc_loop
2374	add		%i1, 16, %i1
2375	st		%f0, [%i4 + 0]
2376	st		%f1, [%i4 + 4]
2377	st		%f2, [%i4 + 8]
2378	st		%f3, [%i4 + 12]
2379.L256_cbc_enc_abort:
2380	ret
2381	restore
2382
2383.align	16
23842:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2385						! and ~3x deterioration
2386						! in inp==out case
2387	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2388	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2389	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2390
2391	stda		%f4, [%i1 + %l3]0xc0	! partial store
2392	std		%f6, [%i1 + 8]
2393	add		%i1, 16, %i1
2394	orn		%g0, %l3, %l3
2395	stda		%f8, [%i1 + %l3]0xc0	! partial store
2396
2397	brnz,pt		%i2, .L256_cbc_enc_loop+4
2398	orn		%g0, %l3, %l3
2399	st		%f0, [%i4 + 0]
2400	st		%f1, [%i4 + 4]
2401	st		%f2, [%i4 + 8]
2402	st		%f3, [%i4 + 12]
2403	ret
2404	restore
2405
2406!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2407.align	32
2408.L256cbc_enc_blk:
2409	add	%i1, %i2, %l5
2410	and	%l5, 63, %l5	! tail
2411	sub	%i2, %l5, %i2
2412	add	%l5, 15, %l5	! round up to 16n
2413	srlx	%i2, 4, %i2
2414	srl	%l5, 4, %l5
2415
2416.L256_cbc_enc_blk_loop:
2417	ldx		[%i0 + 0], %o0
2418	brz,pt		%l0, 5f
2419	ldx		[%i0 + 8], %o1
2420
2421	ldx		[%i0 + 16], %o2
2422	sllx		%o0, %l0, %o0
2423	srlx		%o1, %l1, %g1
2424	sllx		%o1, %l0, %o1
2425	or		%g1, %o0, %o0
2426	srlx		%o2, %l1, %o2
2427	or		%o2, %o1, %o1
24285:
2429	xor		%g4, %o0, %o0		! ^= rk[0]
2430	xor		%g5, %o1, %o1
2431	.word	0x99b02308 !movxtod	%o0,%f12
2432	.word	0x9db02309 !movxtod	%o1,%f14
2433
2434	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2435	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2436	prefetch	[%i0 + 16+63], 20
2437	call		_aes256_encrypt_1x
2438	add		%i0, 16, %i0
2439	sub		%i2, 1, %i2
2440
2441	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2442	add		%i1, 8, %i1
2443	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2444	brnz,pt		%i2, .L256_cbc_enc_blk_loop
2445	add		%i1, 8, %i1
2446
2447	membar		#StoreLoad|#StoreStore
2448	brnz,pt		%l5, .L256_cbc_enc_loop
2449	mov		%l5, %i2
2450	st		%f0, [%i4 + 0]
2451	st		%f1, [%i4 + 4]
2452	st		%f2, [%i4 + 8]
2453	st		%f3, [%i4 + 12]
2454	ret
2455	restore
2456.type	aes256_t4_cbc_encrypt,#function
2457.size	aes256_t4_cbc_encrypt,.-aes256_t4_cbc_encrypt
2458.globl	aes192_t4_cbc_encrypt
2459.align	32
2460aes192_t4_cbc_encrypt:
2461	save		%sp, -STACK_FRAME, %sp
2462	cmp		%i2, 0
2463	be,pn		SIZE_T_CC, .L192_cbc_enc_abort
2464	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2465	sub		%i0, %i1, %l5	! %i0!=%i1
2466	ld		[%i4 + 0], %f0
2467	ld		[%i4 + 4], %f1
2468	ld		[%i4 + 8], %f2
2469	ld		[%i4 + 12], %f3
2470	prefetch	[%i0], 20
2471	prefetch	[%i0 + 63], 20
2472	call		_aes192_load_enckey
2473	and		%i0, 7, %l0
2474	andn		%i0, 7, %i0
2475	sll		%l0, 3, %l0
2476	mov		64, %l1
2477	mov		0xff, %l3
2478	sub		%l1, %l0, %l1
2479	and		%i1, 7, %l2
2480	cmp		%i2, 127
2481	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2482	movleu		SIZE_T_CC, 0, %l5	!	%i2<128 ||
2483	brnz,pn		%l5, .L192cbc_enc_blk	!	%i0==%i1)
2484	srl		%l3, %l2, %l3
2485
2486	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2487	srlx		%i2, 4, %i2
2488	prefetch	[%i1], 22
2489
2490.L192_cbc_enc_loop:
2491	ldx		[%i0 + 0], %o0
2492	brz,pt		%l0, 4f
2493	ldx		[%i0 + 8], %o1
2494
2495	ldx		[%i0 + 16], %o2
2496	sllx		%o0, %l0, %o0
2497	srlx		%o1, %l1, %g1
2498	sllx		%o1, %l0, %o1
2499	or		%g1, %o0, %o0
2500	srlx		%o2, %l1, %o2
2501	or		%o2, %o1, %o1
25024:
2503	xor		%g4, %o0, %o0		! ^= rk[0]
2504	xor		%g5, %o1, %o1
2505	.word	0x99b02308 !movxtod	%o0,%f12
2506	.word	0x9db02309 !movxtod	%o1,%f14
2507
2508	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2509	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2510	prefetch	[%i1 + 63], 22
2511	prefetch	[%i0 + 16+63], 20
2512	call		_aes192_encrypt_1x
2513	add		%i0, 16, %i0
2514
2515	brnz,pn		%l2, 2f
2516	sub		%i2, 1, %i2
2517
2518	std		%f0, [%i1 + 0]
2519	std		%f2, [%i1 + 8]
2520	brnz,pt		%i2, .L192_cbc_enc_loop
2521	add		%i1, 16, %i1
2522	st		%f0, [%i4 + 0]
2523	st		%f1, [%i4 + 4]
2524	st		%f2, [%i4 + 8]
2525	st		%f3, [%i4 + 12]
2526.L192_cbc_enc_abort:
2527	ret
2528	restore
2529
2530.align	16
25312:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2532						! and ~3x deterioration
2533						! in inp==out case
2534	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2535	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2536	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2537
2538	stda		%f4, [%i1 + %l3]0xc0	! partial store
2539	std		%f6, [%i1 + 8]
2540	add		%i1, 16, %i1
2541	orn		%g0, %l3, %l3
2542	stda		%f8, [%i1 + %l3]0xc0	! partial store
2543
2544	brnz,pt		%i2, .L192_cbc_enc_loop+4
2545	orn		%g0, %l3, %l3
2546	st		%f0, [%i4 + 0]
2547	st		%f1, [%i4 + 4]
2548	st		%f2, [%i4 + 8]
2549	st		%f3, [%i4 + 12]
2550	ret
2551	restore
2552
2553!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2554.align	32
2555.L192cbc_enc_blk:
2556	add	%i1, %i2, %l5
2557	and	%l5, 63, %l5	! tail
2558	sub	%i2, %l5, %i2
2559	add	%l5, 15, %l5	! round up to 16n
2560	srlx	%i2, 4, %i2
2561	srl	%l5, 4, %l5
2562
2563.L192_cbc_enc_blk_loop:
2564	ldx		[%i0 + 0], %o0
2565	brz,pt		%l0, 5f
2566	ldx		[%i0 + 8], %o1
2567
2568	ldx		[%i0 + 16], %o2
2569	sllx		%o0, %l0, %o0
2570	srlx		%o1, %l1, %g1
2571	sllx		%o1, %l0, %o1
2572	or		%g1, %o0, %o0
2573	srlx		%o2, %l1, %o2
2574	or		%o2, %o1, %o1
25755:
2576	xor		%g4, %o0, %o0		! ^= rk[0]
2577	xor		%g5, %o1, %o1
2578	.word	0x99b02308 !movxtod	%o0,%f12
2579	.word	0x9db02309 !movxtod	%o1,%f14
2580
2581	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2582	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2583	prefetch	[%i0 + 16+63], 20
2584	call		_aes192_encrypt_1x
2585	add		%i0, 16, %i0
2586	sub		%i2, 1, %i2
2587
2588	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2589	add		%i1, 8, %i1
2590	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2591	brnz,pt		%i2, .L192_cbc_enc_blk_loop
2592	add		%i1, 8, %i1
2593
2594	membar		#StoreLoad|#StoreStore
2595	brnz,pt		%l5, .L192_cbc_enc_loop
2596	mov		%l5, %i2
2597	st		%f0, [%i4 + 0]
2598	st		%f1, [%i4 + 4]
2599	st		%f2, [%i4 + 8]
2600	st		%f3, [%i4 + 12]
2601	ret
2602	restore
2603.type	aes192_t4_cbc_encrypt,#function
2604.size	aes192_t4_cbc_encrypt,.-aes192_t4_cbc_encrypt
2605.globl	aes256_t4_ctr32_encrypt
2606.align	32
2607aes256_t4_ctr32_encrypt:
2608	save		%sp, -STACK_FRAME, %sp
2609	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2610
2611	prefetch	[%i0], 20
2612	prefetch	[%i0 + 63], 20
2613	call		_aes256_load_enckey
2614	sllx		%i2, 4, %i2
2615
2616	ld		[%i4 + 0], %l4	! counter
2617	ld		[%i4 + 4], %l5
2618	ld		[%i4 + 8], %l6
2619	ld		[%i4 + 12], %l7
2620
2621	sllx		%l4, 32, %o5
2622	or		%l5, %o5, %o5
2623	sllx		%l6, 32, %g1
2624	xor		%o5, %g4, %g4		! ^= rk[0]
2625	xor		%g1, %g5, %g5
2626	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
2627
2628	sub		%i0, %i1, %l5	! %i0!=%i1
2629	and		%i0, 7, %l0
2630	andn		%i0, 7, %i0
2631	sll		%l0, 3, %l0
2632	mov		64, %l1
2633	mov		0xff, %l3
2634	sub		%l1, %l0, %l1
2635	and		%i1, 7, %l2
2636	cmp		%i2, 255
2637	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2638	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
2639	brnz,pn		%l5, .L256_ctr32_blk	!	%i0==%i1)
2640	srl		%l3, %l2, %l3
2641
2642	andcc		%i2, 16, %g0		! is number of blocks even?
2643	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2644	bz		%icc, .L256_ctr32_loop2x
2645	srlx		%i2, 4, %i2
2646.L256_ctr32_loop:
2647	ldx		[%i0 + 0], %o0
2648	brz,pt		%l0, 4f
2649	ldx		[%i0 + 8], %o1
2650
2651	ldx		[%i0 + 16], %o2
2652	sllx		%o0, %l0, %o0
2653	srlx		%o1, %l1, %g1
2654	sllx		%o1, %l0, %o1
2655	or		%g1, %o0, %o0
2656	srlx		%o2, %l1, %o2
2657	or		%o2, %o1, %o1
26584:
2659	xor		%g5, %l7, %g1		! ^= rk[0]
2660	add		%l7, 1, %l7
2661	.word	0x85b02301 !movxtod	%g1,%f2
2662	srl		%l7, 0, %l7		! clruw
2663	prefetch	[%i1 + 63], 22
2664	prefetch	[%i0 + 16+63], 20
2665	.word	0x88cc040e !aes_eround01	%f16,%f14,%f2,%f4
2666	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
2667	call		_aes256_encrypt_1x+8
2668	add		%i0, 16, %i0
2669
2670	.word	0x95b02308 !movxtod	%o0,%f10
2671	.word	0x99b02309 !movxtod	%o1,%f12
2672	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
2673	.word	0x85b30d82 !fxor	%f12,%f2,%f2
2674
2675	brnz,pn		%l2, 2f
2676	sub		%i2, 1, %i2
2677
2678	std		%f0, [%i1 + 0]
2679	std		%f2, [%i1 + 8]
2680	brnz,pt		%i2, .L256_ctr32_loop2x
2681	add		%i1, 16, %i1
2682
2683	ret
2684	restore
2685
2686.align	16
26872:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2688						! and ~3x deterioration
2689						! in inp==out case
2690	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2691	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2692	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2693	stda		%f4, [%i1 + %l3]0xc0	! partial store
2694	std		%f6, [%i1 + 8]
2695	add		%i1, 16, %i1
2696	orn		%g0, %l3, %l3
2697	stda		%f8, [%i1 + %l3]0xc0	! partial store
2698
2699	brnz,pt		%i2, .L256_ctr32_loop2x+4
2700	orn		%g0, %l3, %l3
2701
2702	ret
2703	restore
2704
2705!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2706.align	32
2707.L256_ctr32_loop2x:
2708	ldx		[%i0 + 0], %o0
2709	ldx		[%i0 + 8], %o1
2710	ldx		[%i0 + 16], %o2
2711	brz,pt		%l0, 4f
2712	ldx		[%i0 + 24], %o3
2713
2714	ldx		[%i0 + 32], %o4
2715	sllx		%o0, %l0, %o0
2716	srlx		%o1, %l1, %g1
2717	or		%g1, %o0, %o0
2718	sllx		%o1, %l0, %o1
2719	srlx		%o2, %l1, %g1
2720	or		%g1, %o1, %o1
2721	sllx		%o2, %l0, %o2
2722	srlx		%o3, %l1, %g1
2723	or		%g1, %o2, %o2
2724	sllx		%o3, %l0, %o3
2725	srlx		%o4, %l1, %o4
2726	or		%o4, %o3, %o3
27274:
2728	xor		%g5, %l7, %g1		! ^= rk[0]
2729	add		%l7, 1, %l7
2730	.word	0x85b02301 !movxtod	%g1,%f2
2731	srl		%l7, 0, %l7		! clruw
2732	xor		%g5, %l7, %g1
2733	add		%l7, 1, %l7
2734	.word	0x8db02301 !movxtod	%g1,%f6
2735	srl		%l7, 0, %l7		! clruw
2736	prefetch	[%i1 + 63], 22
2737	prefetch	[%i0 + 32+63], 20
2738	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
2739	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
2740	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
2741	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
2742	call		_aes256_encrypt_2x+16
2743	add		%i0, 32, %i0
2744
2745	.word	0x91b02308 !movxtod	%o0,%f8
2746	.word	0x95b02309 !movxtod	%o1,%f10
2747	.word	0x99b0230a !movxtod	%o2,%f12
2748	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2749	.word	0x91b0230b !movxtod	%o3,%f8
2750	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2751	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2752	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2753
2754	brnz,pn		%l2, 2f
2755	sub		%i2, 2, %i2
2756
2757	std		%f0, [%i1 + 0]
2758	std		%f2, [%i1 + 8]
2759	std		%f4, [%i1 + 16]
2760	std		%f6, [%i1 + 24]
2761	brnz,pt		%i2, .L256_ctr32_loop2x
2762	add		%i1, 32, %i1
2763
2764	ret
2765	restore
2766
2767.align	16
27682:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2769						! and ~3x deterioration
2770						! in inp==out case
2771	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
2772	.word	0x81b00902 !faligndata	%f0,%f2,%f0
2773	.word	0x85b08904 !faligndata	%f2,%f4,%f2
2774	.word	0x89b10906 !faligndata	%f4,%f6,%f4
2775	.word	0x8db18906 !faligndata	%f6,%f6,%f6
2776
2777	stda		%f8, [%i1 + %l3]0xc0	! partial store
2778	std		%f0, [%i1 + 8]
2779	std		%f2, [%i1 + 16]
2780	std		%f4, [%i1 + 24]
2781	add		%i1, 32, %i1
2782	orn		%g0, %l3, %l3
2783	stda		%f6, [%i1 + %l3]0xc0	! partial store
2784
2785	brnz,pt		%i2, .L256_ctr32_loop2x+4
2786	orn		%g0, %l3, %l3
2787
2788	ret
2789	restore
2790
2791!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2792.align	32
2793.L256_ctr32_blk:
2794	add	%i1, %i2, %l5
2795	and	%l5, 63, %l5	! tail
2796	sub	%i2, %l5, %i2
2797	add	%l5, 15, %l5	! round up to 16n
2798	srlx	%i2, 4, %i2
2799	srl	%l5, 4, %l5
2800	sub	%i2, 1, %i2
2801	add	%l5, 1, %l5
2802
2803.L256_ctr32_blk_loop2x:
2804	ldx		[%i0 + 0], %o0
2805	ldx		[%i0 + 8], %o1
2806	ldx		[%i0 + 16], %o2
2807	brz,pt		%l0, 5f
2808	ldx		[%i0 + 24], %o3
2809
2810	ldx		[%i0 + 32], %o4
2811	sllx		%o0, %l0, %o0
2812	srlx		%o1, %l1, %g1
2813	or		%g1, %o0, %o0
2814	sllx		%o1, %l0, %o1
2815	srlx		%o2, %l1, %g1
2816	or		%g1, %o1, %o1
2817	sllx		%o2, %l0, %o2
2818	srlx		%o3, %l1, %g1
2819	or		%g1, %o2, %o2
2820	sllx		%o3, %l0, %o3
2821	srlx		%o4, %l1, %o4
2822	or		%o4, %o3, %o3
28235:
2824	xor		%g5, %l7, %g1		! ^= rk[0]
2825	add		%l7, 1, %l7
2826	.word	0x85b02301 !movxtod	%g1,%f2
2827	srl		%l7, 0, %l7		! clruw
2828	xor		%g5, %l7, %g1
2829	add		%l7, 1, %l7
2830	.word	0x8db02301 !movxtod	%g1,%f6
2831	srl		%l7, 0, %l7		! clruw
2832	prefetch	[%i0 + 32+63], 20
2833	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
2834	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
2835	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
2836	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
2837	call		_aes256_encrypt_2x+16
2838	add		%i0, 32, %i0
2839	subcc		%i2, 2, %i2
2840
2841	.word	0x91b02308 !movxtod	%o0,%f8
2842	.word	0x95b02309 !movxtod	%o1,%f10
2843	.word	0x99b0230a !movxtod	%o2,%f12
2844	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2845	.word	0x91b0230b !movxtod	%o3,%f8
2846	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2847	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2848	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2849
2850	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2851	add		%i1, 8, %i1
2852	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2853	add		%i1, 8, %i1
2854	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2855	add		%i1, 8, %i1
2856	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2857	bgu,pt		SIZE_T_CC, .L256_ctr32_blk_loop2x
2858	add		%i1, 8, %i1
2859
2860	add		%l5, %i2, %i2
2861	andcc		%i2, 1, %g0		! is number of blocks even?
2862	membar		#StoreLoad|#StoreStore
2863	bnz,pt		%icc, .L256_ctr32_loop
2864	srl		%i2, 0, %i2
2865	brnz,pn		%i2, .L256_ctr32_loop2x
2866	nop
2867
2868	ret
2869	restore
2870.type	aes256_t4_ctr32_encrypt,#function
2871.size	aes256_t4_ctr32_encrypt,.-aes256_t4_ctr32_encrypt
2872.globl	aes256_t4_xts_encrypt
2873.align	32
2874aes256_t4_xts_encrypt:
2875	save		%sp, -STACK_FRAME-16, %sp
2876	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2877
2878	mov		%i5, %o0
2879	add		%fp, STACK_BIAS-16, %o1
2880	call		aes_t4_encrypt
2881	mov		%i4, %o2
2882
2883	add		%fp, STACK_BIAS-16, %l7
2884	ldxa		[%l7]0x88, %g2
2885	add		%fp, STACK_BIAS-8, %l7
2886	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
2887
2888	sethi		%hi(0x76543210), %l7
2889	or		%l7, %lo(0x76543210), %l7
2890	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
2891
2892	prefetch	[%i0], 20
2893	prefetch	[%i0 + 63], 20
2894	call		_aes256_load_enckey
2895	and		%i2, 15,  %i5
2896	and		%i2, -16, %i2
2897
2898	sub		%i0, %i1, %l5	! %i0!=%i1
2899	and		%i0, 7, %l0
2900	andn		%i0, 7, %i0
2901	sll		%l0, 3, %l0
2902	mov		64, %l1
2903	mov		0xff, %l3
2904	sub		%l1, %l0, %l1
2905	and		%i1, 7, %l2
2906	cmp		%i2, 255
2907	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2908	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
2909	brnz,pn		%l5, .L256_xts_enblk !	%i0==%i1)
2910	srl		%l3, %l2, %l3
2911
2912	andcc		%i2, 16, %g0		! is number of blocks even?
2913	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2914	bz		%icc, .L256_xts_enloop2x
2915	srlx		%i2, 4, %i2
2916.L256_xts_enloop:
2917	ldx		[%i0 + 0], %o0
2918	brz,pt		%l0, 4f
2919	ldx		[%i0 + 8], %o1
2920
2921	ldx		[%i0 + 16], %o2
2922	sllx		%o0, %l0, %o0
2923	srlx		%o1, %l1, %g1
2924	sllx		%o1, %l0, %o1
2925	or		%g1, %o0, %o0
2926	srlx		%o2, %l1, %o2
2927	or		%o2, %o1, %o1
29284:
2929	.word	0x99b02302 !movxtod	%g2,%f12
2930	.word	0x9db02303 !movxtod	%g3,%f14
2931	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
2932	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
2933
2934	xor		%g4, %o0, %o0		! ^= rk[0]
2935	xor		%g5, %o1, %o1
2936	.word	0x81b02308 !movxtod	%o0,%f0
2937	.word	0x85b02309 !movxtod	%o1,%f2
2938
2939	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
2940	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2941
2942	prefetch	[%i1 + 63], 22
2943	prefetch	[%i0 + 16+63], 20
2944	call		_aes256_encrypt_1x
2945	add		%i0, 16, %i0
2946
2947	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
2948	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2949
2950	srax		%g3, 63, %l7		! next tweak value
2951	addcc		%g2, %g2, %g2
2952	and		%l7, 0x87, %l7
2953	.word	0x87b0c223 !addxc	%g3,%g3,%g3
2954	xor		%l7, %g2, %g2
2955
2956	brnz,pn		%l2, 2f
2957	sub		%i2, 1, %i2
2958
2959	std		%f0, [%i1 + 0]
2960	std		%f2, [%i1 + 8]
2961	brnz,pt		%i2, .L256_xts_enloop2x
2962	add		%i1, 16, %i1
2963
2964	brnz,pn		%i5, .L256_xts_ensteal
2965	nop
2966
2967	ret
2968	restore
2969
2970.align	16
29712:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2972						! and ~3x deterioration
2973						! in inp==out case
2974	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2975	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2976	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2977	stda		%f4, [%i1 + %l3]0xc0	! partial store
2978	std		%f6, [%i1 + 8]
2979	add		%i1, 16, %i1
2980	orn		%g0, %l3, %l3
2981	stda		%f8, [%i1 + %l3]0xc0	! partial store
2982
2983	brnz,pt		%i2, .L256_xts_enloop2x+4
2984	orn		%g0, %l3, %l3
2985
2986	brnz,pn		%i5, .L256_xts_ensteal
2987	nop
2988
2989	ret
2990	restore
2991
2992!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2993.align	32
2994.L256_xts_enloop2x:
2995	ldx		[%i0 + 0], %o0
2996	ldx		[%i0 + 8], %o1
2997	ldx		[%i0 + 16], %o2
2998	brz,pt		%l0, 4f
2999	ldx		[%i0 + 24], %o3
3000
3001	ldx		[%i0 + 32], %o4
3002	sllx		%o0, %l0, %o0
3003	srlx		%o1, %l1, %g1
3004	or		%g1, %o0, %o0
3005	sllx		%o1, %l0, %o1
3006	srlx		%o2, %l1, %g1
3007	or		%g1, %o1, %o1
3008	sllx		%o2, %l0, %o2
3009	srlx		%o3, %l1, %g1
3010	or		%g1, %o2, %o2
3011	sllx		%o3, %l0, %o3
3012	srlx		%o4, %l1, %o4
3013	or		%o4, %o3, %o3
30144:
3015	.word	0x99b02302 !movxtod	%g2,%f12
3016	.word	0x9db02303 !movxtod	%g3,%f14
3017	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3018	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3019
3020	srax		%g3, 63, %l7		! next tweak value
3021	addcc		%g2, %g2, %g2
3022	and		%l7, 0x87, %l7
3023	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3024	xor		%l7, %g2, %g2
3025
3026	.word	0x91b02302 !movxtod	%g2,%f8
3027	.word	0x95b02303 !movxtod	%g3,%f10
3028	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3029	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3030
3031	xor		%g4, %o0, %o0		! ^= rk[0]
3032	xor		%g5, %o1, %o1
3033	xor		%g4, %o2, %o2		! ^= rk[0]
3034	xor		%g5, %o3, %o3
3035	.word	0x81b02308 !movxtod	%o0,%f0
3036	.word	0x85b02309 !movxtod	%o1,%f2
3037	.word	0x89b0230a !movxtod	%o2,%f4
3038	.word	0x8db0230b !movxtod	%o3,%f6
3039
3040	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3041	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3042	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3043	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3044
3045	prefetch	[%i1 + 63], 22
3046	prefetch	[%i0 + 32+63], 20
3047	call		_aes256_encrypt_2x
3048	add		%i0, 32, %i0
3049
3050	.word	0x91b02302 !movxtod	%g2,%f8
3051	.word	0x95b02303 !movxtod	%g3,%f10
3052
3053	srax		%g3, 63, %l7		! next tweak value
3054	addcc		%g2, %g2, %g2
3055	and		%l7, 0x87, %l7
3056	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3057	xor		%l7, %g2, %g2
3058
3059	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3060	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3061
3062	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3063	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3064	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3065	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3066
3067	brnz,pn		%l2, 2f
3068	sub		%i2, 2, %i2
3069
3070	std		%f0, [%i1 + 0]
3071	std		%f2, [%i1 + 8]
3072	std		%f4, [%i1 + 16]
3073	std		%f6, [%i1 + 24]
3074	brnz,pt		%i2, .L256_xts_enloop2x
3075	add		%i1, 32, %i1
3076
3077	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3078	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3079	brnz,pn		%i5, .L256_xts_ensteal
3080	nop
3081
3082	ret
3083	restore
3084
3085.align	16
30862:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3087						! and ~3x deterioration
3088						! in inp==out case
3089	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
3090	.word	0x95b00902 !faligndata	%f0,%f2,%f10
3091	.word	0x99b08904 !faligndata	%f2,%f4,%f12
3092	.word	0x9db10906 !faligndata	%f4,%f6,%f14
3093	.word	0x81b18906 !faligndata	%f6,%f6,%f0
3094
3095	stda		%f8, [%i1 + %l3]0xc0	! partial store
3096	std		%f10, [%i1 + 8]
3097	std		%f12, [%i1 + 16]
3098	std		%f14, [%i1 + 24]
3099	add		%i1, 32, %i1
3100	orn		%g0, %l3, %l3
3101	stda		%f0, [%i1 + %l3]0xc0	! partial store
3102
3103	brnz,pt		%i2, .L256_xts_enloop2x+4
3104	orn		%g0, %l3, %l3
3105
3106	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3107	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3108	brnz,pn		%i5, .L256_xts_ensteal
3109	nop
3110
3111	ret
3112	restore
3113
3114!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3115.align	32
3116.L256_xts_enblk:
3117	add	%i1, %i2, %l5
3118	and	%l5, 63, %l5	! tail
3119	sub	%i2, %l5, %i2
3120	add	%l5, 15, %l5	! round up to 16n
3121	srlx	%i2, 4, %i2
3122	srl	%l5, 4, %l5
3123	sub	%i2, 1, %i2
3124	add	%l5, 1, %l5
3125
3126.L256_xts_enblk2x:
3127	ldx		[%i0 + 0], %o0
3128	ldx		[%i0 + 8], %o1
3129	ldx		[%i0 + 16], %o2
3130	brz,pt		%l0, 5f
3131	ldx		[%i0 + 24], %o3
3132
3133	ldx		[%i0 + 32], %o4
3134	sllx		%o0, %l0, %o0
3135	srlx		%o1, %l1, %g1
3136	or		%g1, %o0, %o0
3137	sllx		%o1, %l0, %o1
3138	srlx		%o2, %l1, %g1
3139	or		%g1, %o1, %o1
3140	sllx		%o2, %l0, %o2
3141	srlx		%o3, %l1, %g1
3142	or		%g1, %o2, %o2
3143	sllx		%o3, %l0, %o3
3144	srlx		%o4, %l1, %o4
3145	or		%o4, %o3, %o3
31465:
3147	.word	0x99b02302 !movxtod	%g2,%f12
3148	.word	0x9db02303 !movxtod	%g3,%f14
3149	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3150	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3151
3152	srax		%g3, 63, %l7		! next tweak value
3153	addcc		%g2, %g2, %g2
3154	and		%l7, 0x87, %l7
3155	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3156	xor		%l7, %g2, %g2
3157
3158	.word	0x91b02302 !movxtod	%g2,%f8
3159	.word	0x95b02303 !movxtod	%g3,%f10
3160	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3161	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3162
3163	xor		%g4, %o0, %o0		! ^= rk[0]
3164	xor		%g5, %o1, %o1
3165	xor		%g4, %o2, %o2		! ^= rk[0]
3166	xor		%g5, %o3, %o3
3167	.word	0x81b02308 !movxtod	%o0,%f0
3168	.word	0x85b02309 !movxtod	%o1,%f2
3169	.word	0x89b0230a !movxtod	%o2,%f4
3170	.word	0x8db0230b !movxtod	%o3,%f6
3171
3172	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3173	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3174	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3175	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3176
3177	prefetch	[%i0 + 32+63], 20
3178	call		_aes256_encrypt_2x
3179	add		%i0, 32, %i0
3180
3181	.word	0x91b02302 !movxtod	%g2,%f8
3182	.word	0x95b02303 !movxtod	%g3,%f10
3183
3184	srax		%g3, 63, %l7		! next tweak value
3185	addcc		%g2, %g2, %g2
3186	and		%l7, 0x87, %l7
3187	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3188	xor		%l7, %g2, %g2
3189
3190	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3191	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3192
3193	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3194	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3195	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3196	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3197
3198	subcc		%i2, 2, %i2
3199	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3200	add		%i1, 8, %i1
3201	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3202	add		%i1, 8, %i1
3203	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3204	add		%i1, 8, %i1
3205	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3206	bgu,pt		SIZE_T_CC, .L256_xts_enblk2x
3207	add		%i1, 8, %i1
3208
3209	add		%l5, %i2, %i2
3210	andcc		%i2, 1, %g0		! is number of blocks even?
3211	membar		#StoreLoad|#StoreStore
3212	bnz,pt		%icc, .L256_xts_enloop
3213	srl		%i2, 0, %i2
3214	brnz,pn		%i2, .L256_xts_enloop2x
3215	nop
3216
3217	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3218	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3219	brnz,pn		%i5, .L256_xts_ensteal
3220	nop
3221
3222	ret
3223	restore
3224!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3225.align	32
3226.L256_xts_ensteal:
3227	std		%f0, [%fp + STACK_BIAS-16]	! copy of output
3228	std		%f2, [%fp + STACK_BIAS-8]
3229
3230	srl		%l0, 3, %l0
3231	add		%fp, STACK_BIAS-16, %l7
3232	add		%i0, %l0, %i0	! original %i0+%i2&-15
3233	add		%i1, %l2, %i1	! original %i1+%i2&-15
3234	mov		0, %l0
3235	nop					! align
3236
3237.L256_xts_enstealing:
3238	ldub		[%i0 + %l0], %o0
3239	ldub		[%l7  + %l0], %o1
3240	dec		%i5
3241	stb		%o0, [%l7  + %l0]
3242	stb		%o1, [%i1 + %l0]
3243	brnz		%i5, .L256_xts_enstealing
3244	inc		%l0
3245
3246	mov		%l7, %i0
3247	sub		%i1, 16, %i1
3248	mov		0, %l0
3249	sub		%i1, %l2, %i1
3250	ba		.L256_xts_enloop	! one more time
3251	mov		1, %i2				! %i5 is 0
3252	ret
3253	restore
3254.type	aes256_t4_xts_encrypt,#function
3255.size	aes256_t4_xts_encrypt,.-aes256_t4_xts_encrypt
3256.globl	aes256_t4_xts_decrypt
3257.align	32
3258aes256_t4_xts_decrypt:
3259	save		%sp, -STACK_FRAME-16, %sp
3260	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
3261
3262	mov		%i5, %o0
3263	add		%fp, STACK_BIAS-16, %o1
3264	call		aes_t4_encrypt
3265	mov		%i4, %o2
3266
3267	add		%fp, STACK_BIAS-16, %l7
3268	ldxa		[%l7]0x88, %g2
3269	add		%fp, STACK_BIAS-8, %l7
3270	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
3271
3272	sethi		%hi(0x76543210), %l7
3273	or		%l7, %lo(0x76543210), %l7
3274	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
3275
3276	prefetch	[%i0], 20
3277	prefetch	[%i0 + 63], 20
3278	call		_aes256_load_deckey
3279	and		%i2, 15,  %i5
3280	and		%i2, -16, %i2
3281	mov		0, %l7
3282	movrnz		%i5, 16,  %l7
3283	sub		%i2, %l7, %i2
3284
3285	sub		%i0, %i1, %l5	! %i0!=%i1
3286	and		%i0, 7, %l0
3287	andn		%i0, 7, %i0
3288	sll		%l0, 3, %l0
3289	mov		64, %l1
3290	mov		0xff, %l3
3291	sub		%l1, %l0, %l1
3292	and		%i1, 7, %l2
3293	cmp		%i2, 255
3294	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
3295	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
3296	brnz,pn		%l5, .L256_xts_deblk !	%i0==%i1)
3297	srl		%l3, %l2, %l3
3298
3299	andcc		%i2, 16, %g0		! is number of blocks even?
3300	brz,pn		%i2, .L256_xts_desteal
3301	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
3302	bz		%icc, .L256_xts_deloop2x
3303	srlx		%i2, 4, %i2
3304.L256_xts_deloop:
3305	ldx		[%i0 + 0], %o0
3306	brz,pt		%l0, 4f
3307	ldx		[%i0 + 8], %o1
3308
3309	ldx		[%i0 + 16], %o2
3310	sllx		%o0, %l0, %o0
3311	srlx		%o1, %l1, %g1
3312	sllx		%o1, %l0, %o1
3313	or		%g1, %o0, %o0
3314	srlx		%o2, %l1, %o2
3315	or		%o2, %o1, %o1
33164:
3317	.word	0x99b02302 !movxtod	%g2,%f12
3318	.word	0x9db02303 !movxtod	%g3,%f14
3319	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3320	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3321
3322	xor		%g4, %o0, %o0		! ^= rk[0]
3323	xor		%g5, %o1, %o1
3324	.word	0x81b02308 !movxtod	%o0,%f0
3325	.word	0x85b02309 !movxtod	%o1,%f2
3326
3327	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3328	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3329
3330	prefetch	[%i1 + 63], 22
3331	prefetch	[%i0 + 16+63], 20
3332	call		_aes256_decrypt_1x
3333	add		%i0, 16, %i0
3334
3335	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3336	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3337
3338	srax		%g3, 63, %l7		! next tweak value
3339	addcc		%g2, %g2, %g2
3340	and		%l7, 0x87, %l7
3341	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3342	xor		%l7, %g2, %g2
3343
3344	brnz,pn		%l2, 2f
3345	sub		%i2, 1, %i2
3346
3347	std		%f0, [%i1 + 0]
3348	std		%f2, [%i1 + 8]
3349	brnz,pt		%i2, .L256_xts_deloop2x
3350	add		%i1, 16, %i1
3351
3352	brnz,pn		%i5, .L256_xts_desteal
3353	nop
3354
3355	ret
3356	restore
3357
3358.align	16
33592:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3360						! and ~3x deterioration
3361						! in inp==out case
3362	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
3363	.word	0x8db00902 !faligndata	%f0,%f2,%f6
3364	.word	0x91b08902 !faligndata	%f2,%f2,%f8
3365	stda		%f4, [%i1 + %l3]0xc0	! partial store
3366	std		%f6, [%i1 + 8]
3367	add		%i1, 16, %i1
3368	orn		%g0, %l3, %l3
3369	stda		%f8, [%i1 + %l3]0xc0	! partial store
3370
3371	brnz,pt		%i2, .L256_xts_deloop2x+4
3372	orn		%g0, %l3, %l3
3373
3374	brnz,pn		%i5, .L256_xts_desteal
3375	nop
3376
3377	ret
3378	restore
3379
3380!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3381.align	32
3382.L256_xts_deloop2x:
3383	ldx		[%i0 + 0], %o0
3384	ldx		[%i0 + 8], %o1
3385	ldx		[%i0 + 16], %o2
3386	brz,pt		%l0, 4f
3387	ldx		[%i0 + 24], %o3
3388
3389	ldx		[%i0 + 32], %o4
3390	sllx		%o0, %l0, %o0
3391	srlx		%o1, %l1, %g1
3392	or		%g1, %o0, %o0
3393	sllx		%o1, %l0, %o1
3394	srlx		%o2, %l1, %g1
3395	or		%g1, %o1, %o1
3396	sllx		%o2, %l0, %o2
3397	srlx		%o3, %l1, %g1
3398	or		%g1, %o2, %o2
3399	sllx		%o3, %l0, %o3
3400	srlx		%o4, %l1, %o4
3401	or		%o4, %o3, %o3
34024:
3403	.word	0x99b02302 !movxtod	%g2,%f12
3404	.word	0x9db02303 !movxtod	%g3,%f14
3405	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3406	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3407
3408	srax		%g3, 63, %l7		! next tweak value
3409	addcc		%g2, %g2, %g2
3410	and		%l7, 0x87, %l7
3411	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3412	xor		%l7, %g2, %g2
3413
3414	.word	0x91b02302 !movxtod	%g2,%f8
3415	.word	0x95b02303 !movxtod	%g3,%f10
3416	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3417	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3418
3419	xor		%g4, %o0, %o0		! ^= rk[0]
3420	xor		%g5, %o1, %o1
3421	xor		%g4, %o2, %o2		! ^= rk[0]
3422	xor		%g5, %o3, %o3
3423	.word	0x81b02308 !movxtod	%o0,%f0
3424	.word	0x85b02309 !movxtod	%o1,%f2
3425	.word	0x89b0230a !movxtod	%o2,%f4
3426	.word	0x8db0230b !movxtod	%o3,%f6
3427
3428	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3429	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3430	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3431	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3432
3433	prefetch	[%i1 + 63], 22
3434	prefetch	[%i0 + 32+63], 20
3435	call		_aes256_decrypt_2x
3436	add		%i0, 32, %i0
3437
3438	.word	0x91b02302 !movxtod	%g2,%f8
3439	.word	0x95b02303 !movxtod	%g3,%f10
3440
3441	srax		%g3, 63, %l7		! next tweak value
3442	addcc		%g2, %g2, %g2
3443	and		%l7, 0x87, %l7
3444	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3445	xor		%l7, %g2, %g2
3446
3447	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3448	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3449
3450	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3451	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3452	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3453	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3454
3455	brnz,pn		%l2, 2f
3456	sub		%i2, 2, %i2
3457
3458	std		%f0, [%i1 + 0]
3459	std		%f2, [%i1 + 8]
3460	std		%f4, [%i1 + 16]
3461	std		%f6, [%i1 + 24]
3462	brnz,pt		%i2, .L256_xts_deloop2x
3463	add		%i1, 32, %i1
3464
3465	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3466	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3467	brnz,pn		%i5, .L256_xts_desteal
3468	nop
3469
3470	ret
3471	restore
3472
3473.align	16
34742:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3475						! and ~3x deterioration
3476						! in inp==out case
3477	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
3478	.word	0x95b00902 !faligndata	%f0,%f2,%f10
3479	.word	0x99b08904 !faligndata	%f2,%f4,%f12
3480	.word	0x9db10906 !faligndata	%f4,%f6,%f14
3481	.word	0x81b18906 !faligndata	%f6,%f6,%f0
3482
3483	stda		%f8, [%i1 + %l3]0xc0	! partial store
3484	std		%f10, [%i1 + 8]
3485	std		%f12, [%i1 + 16]
3486	std		%f14, [%i1 + 24]
3487	add		%i1, 32, %i1
3488	orn		%g0, %l3, %l3
3489	stda		%f0, [%i1 + %l3]0xc0	! partial store
3490
3491	brnz,pt		%i2, .L256_xts_deloop2x+4
3492	orn		%g0, %l3, %l3
3493
3494	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3495	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3496	brnz,pn		%i5, .L256_xts_desteal
3497	nop
3498
3499	ret
3500	restore
3501
3502!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3503.align	32
3504.L256_xts_deblk:
3505	add	%i1, %i2, %l5
3506	and	%l5, 63, %l5	! tail
3507	sub	%i2, %l5, %i2
3508	add	%l5, 15, %l5	! round up to 16n
3509	srlx	%i2, 4, %i2
3510	srl	%l5, 4, %l5
3511	sub	%i2, 1, %i2
3512	add	%l5, 1, %l5
3513
3514.L256_xts_deblk2x:
3515	ldx		[%i0 + 0], %o0
3516	ldx		[%i0 + 8], %o1
3517	ldx		[%i0 + 16], %o2
3518	brz,pt		%l0, 5f
3519	ldx		[%i0 + 24], %o3
3520
3521	ldx		[%i0 + 32], %o4
3522	sllx		%o0, %l0, %o0
3523	srlx		%o1, %l1, %g1
3524	or		%g1, %o0, %o0
3525	sllx		%o1, %l0, %o1
3526	srlx		%o2, %l1, %g1
3527	or		%g1, %o1, %o1
3528	sllx		%o2, %l0, %o2
3529	srlx		%o3, %l1, %g1
3530	or		%g1, %o2, %o2
3531	sllx		%o3, %l0, %o3
3532	srlx		%o4, %l1, %o4
3533	or		%o4, %o3, %o3
35345:
3535	.word	0x99b02302 !movxtod	%g2,%f12
3536	.word	0x9db02303 !movxtod	%g3,%f14
3537	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3538	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3539
3540	srax		%g3, 63, %l7		! next tweak value
3541	addcc		%g2, %g2, %g2
3542	and		%l7, 0x87, %l7
3543	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3544	xor		%l7, %g2, %g2
3545
3546	.word	0x91b02302 !movxtod	%g2,%f8
3547	.word	0x95b02303 !movxtod	%g3,%f10
3548	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3549	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3550
3551	xor		%g4, %o0, %o0		! ^= rk[0]
3552	xor		%g5, %o1, %o1
3553	xor		%g4, %o2, %o2		! ^= rk[0]
3554	xor		%g5, %o3, %o3
3555	.word	0x81b02308 !movxtod	%o0,%f0
3556	.word	0x85b02309 !movxtod	%o1,%f2
3557	.word	0x89b0230a !movxtod	%o2,%f4
3558	.word	0x8db0230b !movxtod	%o3,%f6
3559
3560	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3561	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3562	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3563	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3564
3565	prefetch	[%i0 + 32+63], 20
3566	call		_aes256_decrypt_2x
3567	add		%i0, 32, %i0
3568
3569	.word	0x91b02302 !movxtod	%g2,%f8
3570	.word	0x95b02303 !movxtod	%g3,%f10
3571
3572	srax		%g3, 63, %l7		! next tweak value
3573	addcc		%g2, %g2, %g2
3574	and		%l7, 0x87, %l7
3575	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3576	xor		%l7, %g2, %g2
3577
3578	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3579	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3580
3581	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3582	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3583	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3584	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3585
3586	subcc		%i2, 2, %i2
3587	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3588	add		%i1, 8, %i1
3589	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3590	add		%i1, 8, %i1
3591	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3592	add		%i1, 8, %i1
3593	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3594	bgu,pt		SIZE_T_CC, .L256_xts_deblk2x
3595	add		%i1, 8, %i1
3596
3597	add		%l5, %i2, %i2
3598	andcc		%i2, 1, %g0		! is number of blocks even?
3599	membar		#StoreLoad|#StoreStore
3600	bnz,pt		%icc, .L256_xts_deloop
3601	srl		%i2, 0, %i2
3602	brnz,pn		%i2, .L256_xts_deloop2x
3603	nop
3604
3605	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3606	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3607	brnz,pn		%i5, .L256_xts_desteal
3608	nop
3609
3610	ret
3611	restore
3612!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3613.align	32
3614.L256_xts_desteal:
3615	ldx		[%i0 + 0], %o0
3616	brz,pt		%l0, 8f
3617	ldx		[%i0 + 8], %o1
3618
3619	ldx		[%i0 + 16], %o2
3620	sllx		%o0, %l0, %o0
3621	srlx		%o1, %l1, %g1
3622	sllx		%o1, %l0, %o1
3623	or		%g1, %o0, %o0
3624	srlx		%o2, %l1, %o2
3625	or		%o2, %o1, %o1
36268:
3627	srax		%g3, 63, %l7		! next tweak value
3628	addcc		%g2, %g2, %o2
3629	and		%l7, 0x87, %l7
3630	.word	0x97b0c223 !addxc	%g3,%g3,%o3
3631	xor		%l7, %o2, %o2
3632
3633	.word	0x99b0230a !movxtod	%o2,%f12
3634	.word	0x9db0230b !movxtod	%o3,%f14
3635	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3636	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3637
3638	xor		%g4, %o0, %o0		! ^= rk[0]
3639	xor		%g5, %o1, %o1
3640	.word	0x81b02308 !movxtod	%o0,%f0
3641	.word	0x85b02309 !movxtod	%o1,%f2
3642
3643	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3644	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3645
3646	call		_aes256_decrypt_1x
3647	add		%i0, 16, %i0
3648
3649	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3650	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3651
3652	std		%f0, [%fp + STACK_BIAS-16]
3653	std		%f2, [%fp + STACK_BIAS-8]
3654
3655	srl		%l0, 3, %l0
3656	add		%fp, STACK_BIAS-16, %l7
3657	add		%i0, %l0, %i0	! original %i0+%i2&-15
3658	add		%i1, %l2, %i1	! original %i1+%i2&-15
3659	mov		0, %l0
3660	add		%i1, 16, %i1
3661	nop					! align
3662
3663.L256_xts_destealing:
3664	ldub		[%i0 + %l0], %o0
3665	ldub		[%l7  + %l0], %o1
3666	dec		%i5
3667	stb		%o0, [%l7  + %l0]
3668	stb		%o1, [%i1 + %l0]
3669	brnz		%i5, .L256_xts_destealing
3670	inc		%l0
3671
3672	mov		%l7, %i0
3673	sub		%i1, 16, %i1
3674	mov		0, %l0
3675	sub		%i1, %l2, %i1
3676	ba		.L256_xts_deloop	! one more time
3677	mov		1, %i2				! %i5 is 0
3678	ret
3679	restore
3680.type	aes256_t4_xts_decrypt,#function
3681.size	aes256_t4_xts_decrypt,.-aes256_t4_xts_decrypt
3682.globl	aes192_t4_ctr32_encrypt
3683.align	32
3684aes192_t4_ctr32_encrypt:
3685	save		%sp, -STACK_FRAME, %sp
3686	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
3687
3688	prefetch	[%i0], 20
3689	prefetch	[%i0 + 63], 20
3690	call		_aes192_load_enckey
3691	sllx		%i2, 4, %i2
3692
3693	ld		[%i4 + 0], %l4	! counter
3694	ld		[%i4 + 4], %l5
3695	ld		[%i4 + 8], %l6
3696	ld		[%i4 + 12], %l7
3697
3698	sllx		%l4, 32, %o5
3699	or		%l5, %o5, %o5
3700	sllx		%l6, 32, %g1
3701	xor		%o5, %g4, %g4		! ^= rk[0]
3702	xor		%g1, %g5, %g5
3703	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
3704
3705	sub		%i0, %i1, %l5	! %i0!=%i1
3706	and		%i0, 7, %l0
3707	andn		%i0, 7, %i0
3708	sll		%l0, 3, %l0
3709	mov		64, %l1
3710	mov		0xff, %l3
3711	sub		%l1, %l0, %l1
3712	and		%i1, 7, %l2
3713	cmp		%i2, 255
3714	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
3715	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
3716	brnz,pn		%l5, .L192_ctr32_blk	!	%i0==%i1)
3717	srl		%l3, %l2, %l3
3718
3719	andcc		%i2, 16, %g0		! is number of blocks even?
3720	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
3721	bz		%icc, .L192_ctr32_loop2x
3722	srlx		%i2, 4, %i2
3723.L192_ctr32_loop:
3724	ldx		[%i0 + 0], %o0
3725	brz,pt		%l0, 4f
3726	ldx		[%i0 + 8], %o1
3727
3728	ldx		[%i0 + 16], %o2
3729	sllx		%o0, %l0, %o0
3730	srlx		%o1, %l1, %g1
3731	sllx		%o1, %l0, %o1
3732	or		%g1, %o0, %o0
3733	srlx		%o2, %l1, %o2
3734	or		%o2, %o1, %o1
37354:
3736	xor		%g5, %l7, %g1		! ^= rk[0]
3737	add		%l7, 1, %l7
3738	.word	0x85b02301 !movxtod	%g1,%f2
3739	srl		%l7, 0, %l7		! clruw
3740	prefetch	[%i1 + 63], 22
3741	prefetch	[%i0 + 16+63], 20
3742	.word	0x88cc040e !aes_eround01	%f16,%f14,%f2,%f4
3743	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
3744	call		_aes192_encrypt_1x+8
3745	add		%i0, 16, %i0
3746
3747	.word	0x95b02308 !movxtod	%o0,%f10
3748	.word	0x99b02309 !movxtod	%o1,%f12
3749	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
3750	.word	0x85b30d82 !fxor	%f12,%f2,%f2
3751
3752	brnz,pn		%l2, 2f
3753	sub		%i2, 1, %i2
3754
3755	std		%f0, [%i1 + 0]
3756	std		%f2, [%i1 + 8]
3757	brnz,pt		%i2, .L192_ctr32_loop2x
3758	add		%i1, 16, %i1
3759
3760	ret
3761	restore
3762
3763.align	16
37642:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3765						! and ~3x deterioration
3766						! in inp==out case
3767	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
3768	.word	0x8db00902 !faligndata	%f0,%f2,%f6
3769	.word	0x91b08902 !faligndata	%f2,%f2,%f8
3770	stda		%f4, [%i1 + %l3]0xc0	! partial store
3771	std		%f6, [%i1 + 8]
3772	add		%i1, 16, %i1
3773	orn		%g0, %l3, %l3
3774	stda		%f8, [%i1 + %l3]0xc0	! partial store
3775
3776	brnz,pt		%i2, .L192_ctr32_loop2x+4
3777	orn		%g0, %l3, %l3
3778
3779	ret
3780	restore
3781
3782!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3783.align	32
3784.L192_ctr32_loop2x:
3785	ldx		[%i0 + 0], %o0
3786	ldx		[%i0 + 8], %o1
3787	ldx		[%i0 + 16], %o2
3788	brz,pt		%l0, 4f
3789	ldx		[%i0 + 24], %o3
3790
3791	ldx		[%i0 + 32], %o4
3792	sllx		%o0, %l0, %o0
3793	srlx		%o1, %l1, %g1
3794	or		%g1, %o0, %o0
3795	sllx		%o1, %l0, %o1
3796	srlx		%o2, %l1, %g1
3797	or		%g1, %o1, %o1
3798	sllx		%o2, %l0, %o2
3799	srlx		%o3, %l1, %g1
3800	or		%g1, %o2, %o2
3801	sllx		%o3, %l0, %o3
3802	srlx		%o4, %l1, %o4
3803	or		%o4, %o3, %o3
38044:
3805	xor		%g5, %l7, %g1		! ^= rk[0]
3806	add		%l7, 1, %l7
3807	.word	0x85b02301 !movxtod	%g1,%f2
3808	srl		%l7, 0, %l7		! clruw
3809	xor		%g5, %l7, %g1
3810	add		%l7, 1, %l7
3811	.word	0x8db02301 !movxtod	%g1,%f6
3812	srl		%l7, 0, %l7		! clruw
3813	prefetch	[%i1 + 63], 22
3814	prefetch	[%i0 + 32+63], 20
3815	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
3816	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
3817	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
3818	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
3819	call		_aes192_encrypt_2x+16
3820	add		%i0, 32, %i0
3821
3822	.word	0x91b02308 !movxtod	%o0,%f8
3823	.word	0x95b02309 !movxtod	%o1,%f10
3824	.word	0x99b0230a !movxtod	%o2,%f12
3825	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
3826	.word	0x91b0230b !movxtod	%o3,%f8
3827	.word	0x85b28d82 !fxor	%f10,%f2,%f2
3828	.word	0x89b30d84 !fxor	%f12,%f4,%f4
3829	.word	0x8db20d86 !fxor	%f8,%f6,%f6
3830
3831	brnz,pn		%l2, 2f
3832	sub		%i2, 2, %i2
3833
3834	std		%f0, [%i1 + 0]
3835	std		%f2, [%i1 + 8]
3836	std		%f4, [%i1 + 16]
3837	std		%f6, [%i1 + 24]
3838	brnz,pt		%i2, .L192_ctr32_loop2x
3839	add		%i1, 32, %i1
3840
3841	ret
3842	restore
3843
3844.align	16
38452:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3846						! and ~3x deterioration
3847						! in inp==out case
3848	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
3849	.word	0x81b00902 !faligndata	%f0,%f2,%f0
3850	.word	0x85b08904 !faligndata	%f2,%f4,%f2
3851	.word	0x89b10906 !faligndata	%f4,%f6,%f4
3852	.word	0x8db18906 !faligndata	%f6,%f6,%f6
3853
3854	stda		%f8, [%i1 + %l3]0xc0	! partial store
3855	std		%f0, [%i1 + 8]
3856	std		%f2, [%i1 + 16]
3857	std		%f4, [%i1 + 24]
3858	add		%i1, 32, %i1
3859	orn		%g0, %l3, %l3
3860	stda		%f6, [%i1 + %l3]0xc0	! partial store
3861
3862	brnz,pt		%i2, .L192_ctr32_loop2x+4
3863	orn		%g0, %l3, %l3
3864
3865	ret
3866	restore
3867
3868!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3869.align	32
3870.L192_ctr32_blk:
3871	add	%i1, %i2, %l5
3872	and	%l5, 63, %l5	! tail
3873	sub	%i2, %l5, %i2
3874	add	%l5, 15, %l5	! round up to 16n
3875	srlx	%i2, 4, %i2
3876	srl	%l5, 4, %l5
3877	sub	%i2, 1, %i2
3878	add	%l5, 1, %l5
3879
3880.L192_ctr32_blk_loop2x:
3881	ldx		[%i0 + 0], %o0
3882	ldx		[%i0 + 8], %o1
3883	ldx		[%i0 + 16], %o2
3884	brz,pt		%l0, 5f
3885	ldx		[%i0 + 24], %o3
3886
3887	ldx		[%i0 + 32], %o4
3888	sllx		%o0, %l0, %o0
3889	srlx		%o1, %l1, %g1
3890	or		%g1, %o0, %o0
3891	sllx		%o1, %l0, %o1
3892	srlx		%o2, %l1, %g1
3893	or		%g1, %o1, %o1
3894	sllx		%o2, %l0, %o2
3895	srlx		%o3, %l1, %g1
3896	or		%g1, %o2, %o2
3897	sllx		%o3, %l0, %o3
3898	srlx		%o4, %l1, %o4
3899	or		%o4, %o3, %o3
39005:
3901	xor		%g5, %l7, %g1		! ^= rk[0]
3902	add		%l7, 1, %l7
3903	.word	0x85b02301 !movxtod	%g1,%f2
3904	srl		%l7, 0, %l7		! clruw
3905	xor		%g5, %l7, %g1
3906	add		%l7, 1, %l7
3907	.word	0x8db02301 !movxtod	%g1,%f6
3908	srl		%l7, 0, %l7		! clruw
3909	prefetch	[%i0 + 32+63], 20
3910	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
3911	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
3912	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
3913	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
3914	call		_aes192_encrypt_2x+16
3915	add		%i0, 32, %i0
3916	subcc		%i2, 2, %i2
3917
3918	.word	0x91b02308 !movxtod	%o0,%f8
3919	.word	0x95b02309 !movxtod	%o1,%f10
3920	.word	0x99b0230a !movxtod	%o2,%f12
3921	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
3922	.word	0x91b0230b !movxtod	%o3,%f8
3923	.word	0x85b28d82 !fxor	%f10,%f2,%f2
3924	.word	0x89b30d84 !fxor	%f12,%f4,%f4
3925	.word	0x8db20d86 !fxor	%f8,%f6,%f6
3926
3927	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3928	add		%i1, 8, %i1
3929	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3930	add		%i1, 8, %i1
3931	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3932	add		%i1, 8, %i1
3933	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3934	bgu,pt		SIZE_T_CC, .L192_ctr32_blk_loop2x
3935	add		%i1, 8, %i1
3936
3937	add		%l5, %i2, %i2
3938	andcc		%i2, 1, %g0		! is number of blocks even?
3939	membar		#StoreLoad|#StoreStore
3940	bnz,pt		%icc, .L192_ctr32_loop
3941	srl		%i2, 0, %i2
3942	brnz,pn		%i2, .L192_ctr32_loop2x
3943	nop
3944
3945	ret
3946	restore
3947.type	aes192_t4_ctr32_encrypt,#function
3948.size	aes192_t4_ctr32_encrypt,.-aes192_t4_ctr32_encrypt
3949.globl	aes192_t4_cbc_decrypt
3950.align	32
3951aes192_t4_cbc_decrypt:
3952	save		%sp, -STACK_FRAME, %sp
3953	cmp		%i2, 0
3954	be,pn		SIZE_T_CC, .L192_cbc_dec_abort
3955	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
3956	sub		%i0, %i1, %l5	! %i0!=%i1
3957	ld		[%i4 + 0], %f12	! load ivec
3958	ld		[%i4 + 4], %f13
3959	ld		[%i4 + 8], %f14
3960	ld		[%i4 + 12], %f15
3961	prefetch	[%i0], 20
3962	prefetch	[%i0 + 63], 20
3963	call		_aes192_load_deckey
3964	and		%i0, 7, %l0
3965	andn		%i0, 7, %i0
3966	sll		%l0, 3, %l0
3967	mov		64, %l1
3968	mov		0xff, %l3
3969	sub		%l1, %l0, %l1
3970	and		%i1, 7, %l2
3971	cmp		%i2, 255
3972	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
3973	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
3974	brnz,pn		%l5, .L192cbc_dec_blk	!	%i0==%i1)
3975	srl		%l3, %l2, %l3
3976
3977	andcc		%i2, 16, %g0		! is number of blocks even?
3978	srlx		%i2, 4, %i2
3979	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
3980	bz		%icc, .L192_cbc_dec_loop2x
3981	prefetch	[%i1], 22
3982.L192_cbc_dec_loop:
3983	ldx		[%i0 + 0], %o0
3984	brz,pt		%l0, 4f
3985	ldx		[%i0 + 8], %o1
3986
3987	ldx		[%i0 + 16], %o2
3988	sllx		%o0, %l0, %o0
3989	srlx		%o1, %l1, %g1
3990	sllx		%o1, %l0, %o1
3991	or		%g1, %o0, %o0
3992	srlx		%o2, %l1, %o2
3993	or		%o2, %o1, %o1
39944:
3995	xor		%g4, %o0, %o2		! ^= rk[0]
3996	xor		%g5, %o1, %o3
3997	.word	0x81b0230a !movxtod	%o2,%f0
3998	.word	0x85b0230b !movxtod	%o3,%f2
3999
4000	prefetch	[%i1 + 63], 22
4001	prefetch	[%i0 + 16+63], 20
4002	call		_aes192_decrypt_1x
4003	add		%i0, 16, %i0
4004
4005	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4006	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4007	.word	0x99b02308 !movxtod	%o0,%f12
4008	.word	0x9db02309 !movxtod	%o1,%f14
4009
4010	brnz,pn		%l2, 2f
4011	sub		%i2, 1, %i2
4012
4013	std		%f0, [%i1 + 0]
4014	std		%f2, [%i1 + 8]
4015	brnz,pt		%i2, .L192_cbc_dec_loop2x
4016	add		%i1, 16, %i1
4017	st		%f12, [%i4 + 0]
4018	st		%f13, [%i4 + 4]
4019	st		%f14, [%i4 + 8]
4020	st		%f15, [%i4 + 12]
4021.L192_cbc_dec_abort:
4022	ret
4023	restore
4024
4025.align	16
40262:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4027						! and ~3x deterioration
4028						! in inp==out case
4029	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
4030	.word	0x8db00902 !faligndata	%f0,%f2,%f6
4031	.word	0x91b08902 !faligndata	%f2,%f2,%f8
4032
4033	stda		%f4, [%i1 + %l3]0xc0	! partial store
4034	std		%f6, [%i1 + 8]
4035	add		%i1, 16, %i1
4036	orn		%g0, %l3, %l3
4037	stda		%f8, [%i1 + %l3]0xc0	! partial store
4038
4039	brnz,pt		%i2, .L192_cbc_dec_loop2x+4
4040	orn		%g0, %l3, %l3
4041	st		%f12, [%i4 + 0]
4042	st		%f13, [%i4 + 4]
4043	st		%f14, [%i4 + 8]
4044	st		%f15, [%i4 + 12]
4045	ret
4046	restore
4047
4048!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4049.align	32
4050.L192_cbc_dec_loop2x:
4051	ldx		[%i0 + 0], %o0
4052	ldx		[%i0 + 8], %o1
4053	ldx		[%i0 + 16], %o2
4054	brz,pt		%l0, 4f
4055	ldx		[%i0 + 24], %o3
4056
4057	ldx		[%i0 + 32], %o4
4058	sllx		%o0, %l0, %o0
4059	srlx		%o1, %l1, %g1
4060	or		%g1, %o0, %o0
4061	sllx		%o1, %l0, %o1
4062	srlx		%o2, %l1, %g1
4063	or		%g1, %o1, %o1
4064	sllx		%o2, %l0, %o2
4065	srlx		%o3, %l1, %g1
4066	or		%g1, %o2, %o2
4067	sllx		%o3, %l0, %o3
4068	srlx		%o4, %l1, %o4
4069	or		%o4, %o3, %o3
40704:
4071	xor		%g4, %o0, %o4		! ^= rk[0]
4072	xor		%g5, %o1, %o5
4073	.word	0x81b0230c !movxtod	%o4,%f0
4074	.word	0x85b0230d !movxtod	%o5,%f2
4075	xor		%g4, %o2, %o4
4076	xor		%g5, %o3, %o5
4077	.word	0x89b0230c !movxtod	%o4,%f4
4078	.word	0x8db0230d !movxtod	%o5,%f6
4079
4080	prefetch	[%i1 + 63], 22
4081	prefetch	[%i0 + 32+63], 20
4082	call		_aes192_decrypt_2x
4083	add		%i0, 32, %i0
4084
4085	.word	0x91b02308 !movxtod	%o0,%f8
4086	.word	0x95b02309 !movxtod	%o1,%f10
4087	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4088	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4089	.word	0x99b0230a !movxtod	%o2,%f12
4090	.word	0x9db0230b !movxtod	%o3,%f14
4091	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4092	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4093
4094	brnz,pn		%l2, 2f
4095	sub		%i2, 2, %i2
4096
4097	std		%f0, [%i1 + 0]
4098	std		%f2, [%i1 + 8]
4099	std		%f4, [%i1 + 16]
4100	std		%f6, [%i1 + 24]
4101	brnz,pt		%i2, .L192_cbc_dec_loop2x
4102	add		%i1, 32, %i1
4103	st		%f12, [%i4 + 0]
4104	st		%f13, [%i4 + 4]
4105	st		%f14, [%i4 + 8]
4106	st		%f15, [%i4 + 12]
4107	ret
4108	restore
4109
4110.align	16
41112:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4112						! and ~3x deterioration
4113						! in inp==out case
4114	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
4115	.word	0x81b00902 !faligndata	%f0,%f2,%f0
4116	.word	0x85b08904 !faligndata	%f2,%f4,%f2
4117	.word	0x89b10906 !faligndata	%f4,%f6,%f4
4118	.word	0x8db18906 !faligndata	%f6,%f6,%f6
4119	stda		%f8, [%i1 + %l3]0xc0	! partial store
4120	std		%f0, [%i1 + 8]
4121	std		%f2, [%i1 + 16]
4122	std		%f4, [%i1 + 24]
4123	add		%i1, 32, %i1
4124	orn		%g0, %l3, %l3
4125	stda		%f6, [%i1 + %l3]0xc0	! partial store
4126
4127	brnz,pt		%i2, .L192_cbc_dec_loop2x+4
4128	orn		%g0, %l3, %l3
4129	st		%f12, [%i4 + 0]
4130	st		%f13, [%i4 + 4]
4131	st		%f14, [%i4 + 8]
4132	st		%f15, [%i4 + 12]
4133	ret
4134	restore
4135
4136!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4137.align	32
4138.L192cbc_dec_blk:
4139	add	%i1, %i2, %l5
4140	and	%l5, 63, %l5	! tail
4141	sub	%i2, %l5, %i2
4142	add	%l5, 15, %l5	! round up to 16n
4143	srlx	%i2, 4, %i2
4144	srl	%l5, 4, %l5
4145	sub	%i2, 1, %i2
4146	add	%l5, 1, %l5
4147
4148.L192_cbc_dec_blk_loop2x:
4149	ldx		[%i0 + 0], %o0
4150	ldx		[%i0 + 8], %o1
4151	ldx		[%i0 + 16], %o2
4152	brz,pt		%l0, 5f
4153	ldx		[%i0 + 24], %o3
4154
4155	ldx		[%i0 + 32], %o4
4156	sllx		%o0, %l0, %o0
4157	srlx		%o1, %l1, %g1
4158	or		%g1, %o0, %o0
4159	sllx		%o1, %l0, %o1
4160	srlx		%o2, %l1, %g1
4161	or		%g1, %o1, %o1
4162	sllx		%o2, %l0, %o2
4163	srlx		%o3, %l1, %g1
4164	or		%g1, %o2, %o2
4165	sllx		%o3, %l0, %o3
4166	srlx		%o4, %l1, %o4
4167	or		%o4, %o3, %o3
41685:
4169	xor		%g4, %o0, %o4		! ^= rk[0]
4170	xor		%g5, %o1, %o5
4171	.word	0x81b0230c !movxtod	%o4,%f0
4172	.word	0x85b0230d !movxtod	%o5,%f2
4173	xor		%g4, %o2, %o4
4174	xor		%g5, %o3, %o5
4175	.word	0x89b0230c !movxtod	%o4,%f4
4176	.word	0x8db0230d !movxtod	%o5,%f6
4177
4178	prefetch	[%i0 + 32+63], 20
4179	call		_aes192_decrypt_2x
4180	add		%i0, 32, %i0
4181	subcc		%i2, 2, %i2
4182
4183	.word	0x91b02308 !movxtod	%o0,%f8
4184	.word	0x95b02309 !movxtod	%o1,%f10
4185	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4186	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4187	.word	0x99b0230a !movxtod	%o2,%f12
4188	.word	0x9db0230b !movxtod	%o3,%f14
4189	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4190	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4191
4192	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4193	add		%i1, 8, %i1
4194	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4195	add		%i1, 8, %i1
4196	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4197	add		%i1, 8, %i1
4198	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4199	bgu,pt		SIZE_T_CC, .L192_cbc_dec_blk_loop2x
4200	add		%i1, 8, %i1
4201
4202	add		%l5, %i2, %i2
4203	andcc		%i2, 1, %g0		! is number of blocks even?
4204	membar		#StoreLoad|#StoreStore
4205	bnz,pt		%icc, .L192_cbc_dec_loop
4206	srl		%i2, 0, %i2
4207	brnz,pn		%i2, .L192_cbc_dec_loop2x
4208	nop
4209	st		%f12, [%i4 + 0]	! write out ivec
4210	st		%f13, [%i4 + 4]
4211	st		%f14, [%i4 + 8]
4212	st		%f15, [%i4 + 12]
4213	ret
4214	restore
4215.type	aes192_t4_cbc_decrypt,#function
4216.size	aes192_t4_cbc_decrypt,.-aes192_t4_cbc_decrypt
4217.globl	aes256_t4_cbc_decrypt
4218.align	32
4219aes256_t4_cbc_decrypt:
4220	save		%sp, -STACK_FRAME, %sp
4221	cmp		%i2, 0
4222	be,pn		SIZE_T_CC, .L256_cbc_dec_abort
4223	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
4224	sub		%i0, %i1, %l5	! %i0!=%i1
4225	ld		[%i4 + 0], %f12	! load ivec
4226	ld		[%i4 + 4], %f13
4227	ld		[%i4 + 8], %f14
4228	ld		[%i4 + 12], %f15
4229	prefetch	[%i0], 20
4230	prefetch	[%i0 + 63], 20
4231	call		_aes256_load_deckey
4232	and		%i0, 7, %l0
4233	andn		%i0, 7, %i0
4234	sll		%l0, 3, %l0
4235	mov		64, %l1
4236	mov		0xff, %l3
4237	sub		%l1, %l0, %l1
4238	and		%i1, 7, %l2
4239	cmp		%i2, 255
4240	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
4241	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
4242	brnz,pn		%l5, .L256cbc_dec_blk	!	%i0==%i1)
4243	srl		%l3, %l2, %l3
4244
4245	andcc		%i2, 16, %g0		! is number of blocks even?
4246	srlx		%i2, 4, %i2
4247	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
4248	bz		%icc, .L256_cbc_dec_loop2x
4249	prefetch	[%i1], 22
4250.L256_cbc_dec_loop:
4251	ldx		[%i0 + 0], %o0
4252	brz,pt		%l0, 4f
4253	ldx		[%i0 + 8], %o1
4254
4255	ldx		[%i0 + 16], %o2
4256	sllx		%o0, %l0, %o0
4257	srlx		%o1, %l1, %g1
4258	sllx		%o1, %l0, %o1
4259	or		%g1, %o0, %o0
4260	srlx		%o2, %l1, %o2
4261	or		%o2, %o1, %o1
42624:
4263	xor		%g4, %o0, %o2		! ^= rk[0]
4264	xor		%g5, %o1, %o3
4265	.word	0x81b0230a !movxtod	%o2,%f0
4266	.word	0x85b0230b !movxtod	%o3,%f2
4267
4268	prefetch	[%i1 + 63], 22
4269	prefetch	[%i0 + 16+63], 20
4270	call		_aes256_decrypt_1x
4271	add		%i0, 16, %i0
4272
4273	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4274	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4275	.word	0x99b02308 !movxtod	%o0,%f12
4276	.word	0x9db02309 !movxtod	%o1,%f14
4277
4278	brnz,pn		%l2, 2f
4279	sub		%i2, 1, %i2
4280
4281	std		%f0, [%i1 + 0]
4282	std		%f2, [%i1 + 8]
4283	brnz,pt		%i2, .L256_cbc_dec_loop2x
4284	add		%i1, 16, %i1
4285	st		%f12, [%i4 + 0]
4286	st		%f13, [%i4 + 4]
4287	st		%f14, [%i4 + 8]
4288	st		%f15, [%i4 + 12]
4289.L256_cbc_dec_abort:
4290	ret
4291	restore
4292
4293.align	16
42942:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4295						! and ~3x deterioration
4296						! in inp==out case
4297	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
4298	.word	0x8db00902 !faligndata	%f0,%f2,%f6
4299	.word	0x91b08902 !faligndata	%f2,%f2,%f8
4300
4301	stda		%f4, [%i1 + %l3]0xc0	! partial store
4302	std		%f6, [%i1 + 8]
4303	add		%i1, 16, %i1
4304	orn		%g0, %l3, %l3
4305	stda		%f8, [%i1 + %l3]0xc0	! partial store
4306
4307	brnz,pt		%i2, .L256_cbc_dec_loop2x+4
4308	orn		%g0, %l3, %l3
4309	st		%f12, [%i4 + 0]
4310	st		%f13, [%i4 + 4]
4311	st		%f14, [%i4 + 8]
4312	st		%f15, [%i4 + 12]
4313	ret
4314	restore
4315
4316!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4317.align	32
4318.L256_cbc_dec_loop2x:
4319	ldx		[%i0 + 0], %o0
4320	ldx		[%i0 + 8], %o1
4321	ldx		[%i0 + 16], %o2
4322	brz,pt		%l0, 4f
4323	ldx		[%i0 + 24], %o3
4324
4325	ldx		[%i0 + 32], %o4
4326	sllx		%o0, %l0, %o0
4327	srlx		%o1, %l1, %g1
4328	or		%g1, %o0, %o0
4329	sllx		%o1, %l0, %o1
4330	srlx		%o2, %l1, %g1
4331	or		%g1, %o1, %o1
4332	sllx		%o2, %l0, %o2
4333	srlx		%o3, %l1, %g1
4334	or		%g1, %o2, %o2
4335	sllx		%o3, %l0, %o3
4336	srlx		%o4, %l1, %o4
4337	or		%o4, %o3, %o3
43384:
4339	xor		%g4, %o0, %o4		! ^= rk[0]
4340	xor		%g5, %o1, %o5
4341	.word	0x81b0230c !movxtod	%o4,%f0
4342	.word	0x85b0230d !movxtod	%o5,%f2
4343	xor		%g4, %o2, %o4
4344	xor		%g5, %o3, %o5
4345	.word	0x89b0230c !movxtod	%o4,%f4
4346	.word	0x8db0230d !movxtod	%o5,%f6
4347
4348	prefetch	[%i1 + 63], 22
4349	prefetch	[%i0 + 32+63], 20
4350	call		_aes256_decrypt_2x
4351	add		%i0, 32, %i0
4352
4353	.word	0x91b02308 !movxtod	%o0,%f8
4354	.word	0x95b02309 !movxtod	%o1,%f10
4355	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4356	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4357	.word	0x99b0230a !movxtod	%o2,%f12
4358	.word	0x9db0230b !movxtod	%o3,%f14
4359	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4360	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4361
4362	brnz,pn		%l2, 2f
4363	sub		%i2, 2, %i2
4364
4365	std		%f0, [%i1 + 0]
4366	std		%f2, [%i1 + 8]
4367	std		%f4, [%i1 + 16]
4368	std		%f6, [%i1 + 24]
4369	brnz,pt		%i2, .L256_cbc_dec_loop2x
4370	add		%i1, 32, %i1
4371	st		%f12, [%i4 + 0]
4372	st		%f13, [%i4 + 4]
4373	st		%f14, [%i4 + 8]
4374	st		%f15, [%i4 + 12]
4375	ret
4376	restore
4377
4378.align	16
43792:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4380						! and ~3x deterioration
4381						! in inp==out case
4382	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
4383	.word	0x81b00902 !faligndata	%f0,%f2,%f0
4384	.word	0x85b08904 !faligndata	%f2,%f4,%f2
4385	.word	0x89b10906 !faligndata	%f4,%f6,%f4
4386	.word	0x8db18906 !faligndata	%f6,%f6,%f6
4387	stda		%f8, [%i1 + %l3]0xc0	! partial store
4388	std		%f0, [%i1 + 8]
4389	std		%f2, [%i1 + 16]
4390	std		%f4, [%i1 + 24]
4391	add		%i1, 32, %i1
4392	orn		%g0, %l3, %l3
4393	stda		%f6, [%i1 + %l3]0xc0	! partial store
4394
4395	brnz,pt		%i2, .L256_cbc_dec_loop2x+4
4396	orn		%g0, %l3, %l3
4397	st		%f12, [%i4 + 0]
4398	st		%f13, [%i4 + 4]
4399	st		%f14, [%i4 + 8]
4400	st		%f15, [%i4 + 12]
4401	ret
4402	restore
4403
4404!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4405.align	32
4406.L256cbc_dec_blk:
4407	add	%i1, %i2, %l5
4408	and	%l5, 63, %l5	! tail
4409	sub	%i2, %l5, %i2
4410	add	%l5, 15, %l5	! round up to 16n
4411	srlx	%i2, 4, %i2
4412	srl	%l5, 4, %l5
4413	sub	%i2, 1, %i2
4414	add	%l5, 1, %l5
4415
4416.L256_cbc_dec_blk_loop2x:
4417	ldx		[%i0 + 0], %o0
4418	ldx		[%i0 + 8], %o1
4419	ldx		[%i0 + 16], %o2
4420	brz,pt		%l0, 5f
4421	ldx		[%i0 + 24], %o3
4422
4423	ldx		[%i0 + 32], %o4
4424	sllx		%o0, %l0, %o0
4425	srlx		%o1, %l1, %g1
4426	or		%g1, %o0, %o0
4427	sllx		%o1, %l0, %o1
4428	srlx		%o2, %l1, %g1
4429	or		%g1, %o1, %o1
4430	sllx		%o2, %l0, %o2
4431	srlx		%o3, %l1, %g1
4432	or		%g1, %o2, %o2
4433	sllx		%o3, %l0, %o3
4434	srlx		%o4, %l1, %o4
4435	or		%o4, %o3, %o3
44365:
4437	xor		%g4, %o0, %o4		! ^= rk[0]
4438	xor		%g5, %o1, %o5
4439	.word	0x81b0230c !movxtod	%o4,%f0
4440	.word	0x85b0230d !movxtod	%o5,%f2
4441	xor		%g4, %o2, %o4
4442	xor		%g5, %o3, %o5
4443	.word	0x89b0230c !movxtod	%o4,%f4
4444	.word	0x8db0230d !movxtod	%o5,%f6
4445
4446	prefetch	[%i0 + 32+63], 20
4447	call		_aes256_decrypt_2x
4448	add		%i0, 32, %i0
4449	subcc		%i2, 2, %i2
4450
4451	.word	0x91b02308 !movxtod	%o0,%f8
4452	.word	0x95b02309 !movxtod	%o1,%f10
4453	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4454	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4455	.word	0x99b0230a !movxtod	%o2,%f12
4456	.word	0x9db0230b !movxtod	%o3,%f14
4457	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4458	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4459
4460	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4461	add		%i1, 8, %i1
4462	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4463	add		%i1, 8, %i1
4464	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4465	add		%i1, 8, %i1
4466	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4467	bgu,pt		SIZE_T_CC, .L256_cbc_dec_blk_loop2x
4468	add		%i1, 8, %i1
4469
4470	add		%l5, %i2, %i2
4471	andcc		%i2, 1, %g0		! is number of blocks even?
4472	membar		#StoreLoad|#StoreStore
4473	bnz,pt		%icc, .L256_cbc_dec_loop
4474	srl		%i2, 0, %i2
4475	brnz,pn		%i2, .L256_cbc_dec_loop2x
4476	nop
4477	st		%f12, [%i4 + 0]	! write out ivec
4478	st		%f13, [%i4 + 4]
4479	st		%f14, [%i4 + 8]
4480	st		%f15, [%i4 + 12]
4481	ret
4482	restore
4483.type	aes256_t4_cbc_decrypt,#function
4484.size	aes256_t4_cbc_decrypt,.-aes256_t4_cbc_decrypt
4485.align	32
4486_aes256_decrypt_1x:
4487	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
4488	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4489	ldd		[%i3 + 208], %f16
4490	ldd		[%i3 + 216], %f18
4491	.word	0x80cd0444 !aes_dround01	%f20,%f4,%f2,%f0
4492	.word	0x84cd8464 !aes_dround23	%f22,%f4,%f2,%f2
4493	ldd		[%i3 + 224], %f20
4494	ldd		[%i3 + 232], %f22
4495	.word	0x88ce0440 !aes_dround01	%f24,%f0,%f2,%f4
4496	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4497	.word	0x80cf0444 !aes_dround01	%f28,%f4,%f2,%f0
4498	.word	0x84cf8464 !aes_dround23	%f30,%f4,%f2,%f2
4499	.word	0x88c84440 !aes_dround01	%f32,%f0,%f2,%f4
4500	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4501	.word	0x80c94444 !aes_dround01	%f36,%f4,%f2,%f0
4502	.word	0x84c9c464 !aes_dround23	%f38,%f4,%f2,%f2
4503	.word	0x88ca4440 !aes_dround01	%f40,%f0,%f2,%f4
4504	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4505	.word	0x80cb4444 !aes_dround01	%f44,%f4,%f2,%f0
4506	.word	0x84cbc464 !aes_dround23	%f46,%f4,%f2,%f2
4507	.word	0x88cc4440 !aes_dround01	%f48,%f0,%f2,%f4
4508	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4509	.word	0x80cd4444 !aes_dround01	%f52,%f4,%f2,%f0
4510	.word	0x84cdc464 !aes_dround23	%f54,%f4,%f2,%f2
4511	.word	0x88ce4440 !aes_dround01	%f56,%f0,%f2,%f4
4512	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4513	.word	0x80cf4444 !aes_dround01	%f60,%f4,%f2,%f0
4514	.word	0x84cfc464 !aes_dround23	%f62,%f4,%f2,%f2
4515	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
4516	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4517	ldd		[%i3 + 16], %f16
4518	ldd		[%i3 + 24], %f18
4519	.word	0x80cd04c4 !aes_dround01_l	%f20,%f4,%f2,%f0
4520	.word	0x84cd84e4 !aes_dround23_l	%f22,%f4,%f2,%f2
4521	ldd		[%i3 + 32], %f20
4522	retl
4523	ldd		[%i3 + 40], %f22
4524.type	_aes256_decrypt_1x,#function
4525.size	_aes256_decrypt_1x,.-_aes256_decrypt_1x
4526
4527.align	32
4528_aes256_decrypt_2x:
4529	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
4530	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4531	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
4532	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
4533	ldd		[%i3 + 208], %f16
4534	ldd		[%i3 + 216], %f18
4535	.word	0x80cd0448 !aes_dround01	%f20,%f8,%f2,%f0
4536	.word	0x84cd8468 !aes_dround23	%f22,%f8,%f2,%f2
4537	.word	0x88cd0c4a !aes_dround01	%f20,%f10,%f6,%f4
4538	.word	0x8ccd8c6a !aes_dround23	%f22,%f10,%f6,%f6
4539	ldd		[%i3 + 224], %f20
4540	ldd		[%i3 + 232], %f22
4541	.word	0x90ce0440 !aes_dround01	%f24,%f0,%f2,%f8
4542	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4543	.word	0x94ce0c44 !aes_dround01	%f24,%f4,%f6,%f10
4544	.word	0x8cce8c64 !aes_dround23	%f26,%f4,%f6,%f6
4545	.word	0x80cf0448 !aes_dround01	%f28,%f8,%f2,%f0
4546	.word	0x84cf8468 !aes_dround23	%f30,%f8,%f2,%f2
4547	.word	0x88cf0c4a !aes_dround01	%f28,%f10,%f6,%f4
4548	.word	0x8ccf8c6a !aes_dround23	%f30,%f10,%f6,%f6
4549	.word	0x90c84440 !aes_dround01	%f32,%f0,%f2,%f8
4550	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4551	.word	0x94c84c44 !aes_dround01	%f32,%f4,%f6,%f10
4552	.word	0x8cc8cc64 !aes_dround23	%f34,%f4,%f6,%f6
4553	.word	0x80c94448 !aes_dround01	%f36,%f8,%f2,%f0
4554	.word	0x84c9c468 !aes_dround23	%f38,%f8,%f2,%f2
4555	.word	0x88c94c4a !aes_dround01	%f36,%f10,%f6,%f4
4556	.word	0x8cc9cc6a !aes_dround23	%f38,%f10,%f6,%f6
4557	.word	0x90ca4440 !aes_dround01	%f40,%f0,%f2,%f8
4558	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4559	.word	0x94ca4c44 !aes_dround01	%f40,%f4,%f6,%f10
4560	.word	0x8ccacc64 !aes_dround23	%f42,%f4,%f6,%f6
4561	.word	0x80cb4448 !aes_dround01	%f44,%f8,%f2,%f0
4562	.word	0x84cbc468 !aes_dround23	%f46,%f8,%f2,%f2
4563	.word	0x88cb4c4a !aes_dround01	%f44,%f10,%f6,%f4
4564	.word	0x8ccbcc6a !aes_dround23	%f46,%f10,%f6,%f6
4565	.word	0x90cc4440 !aes_dround01	%f48,%f0,%f2,%f8
4566	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4567	.word	0x94cc4c44 !aes_dround01	%f48,%f4,%f6,%f10
4568	.word	0x8ccccc64 !aes_dround23	%f50,%f4,%f6,%f6
4569	.word	0x80cd4448 !aes_dround01	%f52,%f8,%f2,%f0
4570	.word	0x84cdc468 !aes_dround23	%f54,%f8,%f2,%f2
4571	.word	0x88cd4c4a !aes_dround01	%f52,%f10,%f6,%f4
4572	.word	0x8ccdcc6a !aes_dround23	%f54,%f10,%f6,%f6
4573	.word	0x90ce4440 !aes_dround01	%f56,%f0,%f2,%f8
4574	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4575	.word	0x94ce4c44 !aes_dround01	%f56,%f4,%f6,%f10
4576	.word	0x8ccecc64 !aes_dround23	%f58,%f4,%f6,%f6
4577	.word	0x80cf4448 !aes_dround01	%f60,%f8,%f2,%f0
4578	.word	0x84cfc468 !aes_dround23	%f62,%f8,%f2,%f2
4579	.word	0x88cf4c4a !aes_dround01	%f60,%f10,%f6,%f4
4580	.word	0x8ccfcc6a !aes_dround23	%f62,%f10,%f6,%f6
4581	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
4582	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4583	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
4584	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
4585	ldd		[%i3 + 16], %f16
4586	ldd		[%i3 + 24], %f18
4587	.word	0x80cd04c8 !aes_dround01_l	%f20,%f8,%f2,%f0
4588	.word	0x84cd84e8 !aes_dround23_l	%f22,%f8,%f2,%f2
4589	.word	0x88cd0cca !aes_dround01_l	%f20,%f10,%f6,%f4
4590	.word	0x8ccd8cea !aes_dround23_l	%f22,%f10,%f6,%f6
4591	ldd		[%i3 + 32], %f20
4592	retl
4593	ldd		[%i3 + 40], %f22
4594.type	_aes256_decrypt_2x,#function
4595.size	_aes256_decrypt_2x,.-_aes256_decrypt_2x
4596
4597.align	32
4598_aes192_decrypt_1x:
4599	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
4600	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4601	.word	0x80cd0444 !aes_dround01	%f20,%f4,%f2,%f0
4602	.word	0x84cd8464 !aes_dround23	%f22,%f4,%f2,%f2
4603	.word	0x88ce0440 !aes_dround01	%f24,%f0,%f2,%f4
4604	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4605	.word	0x80cf0444 !aes_dround01	%f28,%f4,%f2,%f0
4606	.word	0x84cf8464 !aes_dround23	%f30,%f4,%f2,%f2
4607	.word	0x88c84440 !aes_dround01	%f32,%f0,%f2,%f4
4608	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4609	.word	0x80c94444 !aes_dround01	%f36,%f4,%f2,%f0
4610	.word	0x84c9c464 !aes_dround23	%f38,%f4,%f2,%f2
4611	.word	0x88ca4440 !aes_dround01	%f40,%f0,%f2,%f4
4612	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4613	.word	0x80cb4444 !aes_dround01	%f44,%f4,%f2,%f0
4614	.word	0x84cbc464 !aes_dround23	%f46,%f4,%f2,%f2
4615	.word	0x88cc4440 !aes_dround01	%f48,%f0,%f2,%f4
4616	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4617	.word	0x80cd4444 !aes_dround01	%f52,%f4,%f2,%f0
4618	.word	0x84cdc464 !aes_dround23	%f54,%f4,%f2,%f2
4619	.word	0x88ce4440 !aes_dround01	%f56,%f0,%f2,%f4
4620	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4621	.word	0x80cf44c4 !aes_dround01_l	%f60,%f4,%f2,%f0
4622	retl
4623	.word	0x84cfc4e4 !aes_dround23_l	%f62,%f4,%f2,%f2
4624.type	_aes192_decrypt_1x,#function
4625.size	_aes192_decrypt_1x,.-_aes192_decrypt_1x
4626
4627.align	32
4628_aes192_decrypt_2x:
4629	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
4630	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4631	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
4632	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
4633	.word	0x80cd0448 !aes_dround01	%f20,%f8,%f2,%f0
4634	.word	0x84cd8468 !aes_dround23	%f22,%f8,%f2,%f2
4635	.word	0x88cd0c4a !aes_dround01	%f20,%f10,%f6,%f4
4636	.word	0x8ccd8c6a !aes_dround23	%f22,%f10,%f6,%f6
4637	.word	0x90ce0440 !aes_dround01	%f24,%f0,%f2,%f8
4638	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4639	.word	0x94ce0c44 !aes_dround01	%f24,%f4,%f6,%f10
4640	.word	0x8cce8c64 !aes_dround23	%f26,%f4,%f6,%f6
4641	.word	0x80cf0448 !aes_dround01	%f28,%f8,%f2,%f0
4642	.word	0x84cf8468 !aes_dround23	%f30,%f8,%f2,%f2
4643	.word	0x88cf0c4a !aes_dround01	%f28,%f10,%f6,%f4
4644	.word	0x8ccf8c6a !aes_dround23	%f30,%f10,%f6,%f6
4645	.word	0x90c84440 !aes_dround01	%f32,%f0,%f2,%f8
4646	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4647	.word	0x94c84c44 !aes_dround01	%f32,%f4,%f6,%f10
4648	.word	0x8cc8cc64 !aes_dround23	%f34,%f4,%f6,%f6
4649	.word	0x80c94448 !aes_dround01	%f36,%f8,%f2,%f0
4650	.word	0x84c9c468 !aes_dround23	%f38,%f8,%f2,%f2
4651	.word	0x88c94c4a !aes_dround01	%f36,%f10,%f6,%f4
4652	.word	0x8cc9cc6a !aes_dround23	%f38,%f10,%f6,%f6
4653	.word	0x90ca4440 !aes_dround01	%f40,%f0,%f2,%f8
4654	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4655	.word	0x94ca4c44 !aes_dround01	%f40,%f4,%f6,%f10
4656	.word	0x8ccacc64 !aes_dround23	%f42,%f4,%f6,%f6
4657	.word	0x80cb4448 !aes_dround01	%f44,%f8,%f2,%f0
4658	.word	0x84cbc468 !aes_dround23	%f46,%f8,%f2,%f2
4659	.word	0x88cb4c4a !aes_dround01	%f44,%f10,%f6,%f4
4660	.word	0x8ccbcc6a !aes_dround23	%f46,%f10,%f6,%f6
4661	.word	0x90cc4440 !aes_dround01	%f48,%f0,%f2,%f8
4662	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4663	.word	0x94cc4c44 !aes_dround01	%f48,%f4,%f6,%f10
4664	.word	0x8ccccc64 !aes_dround23	%f50,%f4,%f6,%f6
4665	.word	0x80cd4448 !aes_dround01	%f52,%f8,%f2,%f0
4666	.word	0x84cdc468 !aes_dround23	%f54,%f8,%f2,%f2
4667	.word	0x88cd4c4a !aes_dround01	%f52,%f10,%f6,%f4
4668	.word	0x8ccdcc6a !aes_dround23	%f54,%f10,%f6,%f6
4669	.word	0x90ce4440 !aes_dround01	%f56,%f0,%f2,%f8
4670	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4671	.word	0x94ce4c44 !aes_dround01	%f56,%f4,%f6,%f10
4672	.word	0x8ccecc64 !aes_dround23	%f58,%f4,%f6,%f6
4673	.word	0x80cf44c8 !aes_dround01_l	%f60,%f8,%f2,%f0
4674	.word	0x84cfc4e8 !aes_dround23_l	%f62,%f8,%f2,%f2
4675	.word	0x88cf4cca !aes_dround01_l	%f60,%f10,%f6,%f4
4676	retl
4677	.word	0x8ccfccea !aes_dround23_l	%f62,%f10,%f6,%f6
4678.type	_aes192_decrypt_2x,#function
4679.size	_aes192_decrypt_2x,.-_aes192_decrypt_2x
4680.asciz	"AES for SPARC T4, David S. Miller, Andy Polyakov"
4681.align	4
4682