dest4-sparcv9.pl revision 306195
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
5# <appro@openssl.org>. The module is licensed under 2-clause BSD
6# license. March 2013. All rights reserved.
7# ====================================================================
8
9######################################################################
10# DES for SPARC T4.
11#
12# As with other hardware-assisted ciphers CBC encrypt results [for
13# aligned data] are virtually identical to critical path lengths:
14#
15#		DES		Triple-DES
16# CBC encrypt	4.14/4.15(*)	11.7/11.7
17# CBC decrypt	1.77/4.11(**)	6.42/7.47
18#
19#			 (*)	numbers after slash are for
20#				misaligned data;
21#			 (**)	this is result for largest
22#				block size, unlike all other
23#				cases smaller blocks results
24#				are better[?];
25
26$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
27push(@INC,"${dir}","${dir}../../perlasm");
28require "sparcv9_modes.pl";
29
30&asm_init(@ARGV);
31
32$code.=<<___ if ($::abibits==64);
33.register       %g2,#scratch
34.register       %g3,#scratch
35___
36
37$code.=<<___;
38.text
39___
40
41{ my ($inp,$out)=("%o0","%o1");
42
43$code.=<<___;
44.align	32
45.globl	des_t4_key_expand
46.type	des_t4_key_expand,#function
47des_t4_key_expand:
48	andcc		$inp, 0x7, %g0
49	alignaddr	$inp, %g0, $inp
50	bz,pt		%icc, 1f
51	ldd		[$inp + 0x00], %f0
52	ldd		[$inp + 0x08], %f2
53	faligndata	%f0, %f2, %f0
541:	des_kexpand	%f0, 0, %f0
55	des_kexpand	%f0, 1, %f2
56	std		%f0, [$out + 0x00]
57	des_kexpand	%f2, 3, %f6
58	std		%f2, [$out + 0x08]
59	des_kexpand	%f2, 2, %f4
60	des_kexpand	%f6, 3, %f10
61	std		%f6, [$out + 0x18]
62	des_kexpand	%f6, 2, %f8
63	std		%f4, [$out + 0x10]
64	des_kexpand	%f10, 3, %f14
65	std		%f10, [$out + 0x28]
66	des_kexpand	%f10, 2, %f12
67	std		%f8, [$out + 0x20]
68	des_kexpand	%f14, 1, %f16
69	std		%f14, [$out + 0x38]
70	des_kexpand	%f16, 3, %f20
71	std		%f12, [$out + 0x30]
72	des_kexpand	%f16, 2, %f18
73	std		%f16, [$out + 0x40]
74	des_kexpand	%f20, 3, %f24
75	std		%f20, [$out + 0x50]
76	des_kexpand	%f20, 2, %f22
77	std		%f18, [$out + 0x48]
78	des_kexpand	%f24, 3, %f28
79	std		%f24, [$out + 0x60]
80	des_kexpand	%f24, 2, %f26
81	std		%f22, [$out + 0x58]
82	des_kexpand	%f28, 1, %f30
83	std		%f28, [$out + 0x70]
84	std		%f26, [$out + 0x68]
85	retl
86	std		%f30, [$out + 0x78]
87.size	des_t4_key_expand,.-des_t4_key_expand
88___
89}
90{ my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
91  my ($ileft,$iright,$omask) = map("%g$_",(1..3));
92
93$code.=<<___;
94.globl	des_t4_cbc_encrypt
95.align	32
96des_t4_cbc_encrypt:
97	cmp		$len, 0
98	be,pn		$::size_t_cc, .Lcbc_abort
99	srln		$len, 0, $len		! needed on v8+, "nop" on v9
100	ld		[$ivec + 0], %f0	! load ivec
101	ld		[$ivec + 4], %f1
102
103	and		$inp, 7, $ileft
104	andn		$inp, 7, $inp
105	sll		$ileft, 3, $ileft
106	mov		0xff, $omask
107	prefetch	[$inp], 20
108	prefetch	[$inp + 63], 20
109	sub		%g0, $ileft, $iright
110	and		$out, 7, %g4
111	alignaddrl	$out, %g0, $out
112	srl		$omask, %g4, $omask
113	srlx		$len, 3, $len
114	movrz		%g4, 0, $omask
115	prefetch	[$out], 22
116
117	ldd		[$key + 0x00], %f4	! load key schedule
118	ldd		[$key + 0x08], %f6
119	ldd		[$key + 0x10], %f8
120	ldd		[$key + 0x18], %f10
121	ldd		[$key + 0x20], %f12
122	ldd		[$key + 0x28], %f14
123	ldd		[$key + 0x30], %f16
124	ldd		[$key + 0x38], %f18
125	ldd		[$key + 0x40], %f20
126	ldd		[$key + 0x48], %f22
127	ldd		[$key + 0x50], %f24
128	ldd		[$key + 0x58], %f26
129	ldd		[$key + 0x60], %f28
130	ldd		[$key + 0x68], %f30
131	ldd		[$key + 0x70], %f32
132	ldd		[$key + 0x78], %f34
133
134.Ldes_cbc_enc_loop:
135	ldx		[$inp + 0], %g4
136	brz,pt		$ileft, 4f
137	nop
138
139	ldx		[$inp + 8], %g5
140	sllx		%g4, $ileft, %g4
141	srlx		%g5, $iright, %g5
142	or		%g5, %g4, %g4
1434:
144	movxtod		%g4, %f2
145	prefetch	[$inp + 8+63], 20
146	add		$inp, 8, $inp
147	fxor		%f2, %f0, %f0		! ^= ivec
148	prefetch	[$out + 63], 22
149
150	des_ip		%f0, %f0
151	des_round	%f4, %f6, %f0, %f0
152	des_round	%f8, %f10, %f0, %f0
153	des_round	%f12, %f14, %f0, %f0
154	des_round	%f16, %f18, %f0, %f0
155	des_round	%f20, %f22, %f0, %f0
156	des_round	%f24, %f26, %f0, %f0
157	des_round	%f28, %f30, %f0, %f0
158	des_round	%f32, %f34, %f0, %f0
159	des_iip		%f0, %f0
160
161	brnz,pn		$omask, 2f
162	sub		$len, 1, $len
163
164	std		%f0, [$out + 0]
165	brnz,pt		$len, .Ldes_cbc_enc_loop
166	add		$out, 8, $out
167
168	st		%f0, [$ivec + 0]	! write out ivec
169	retl
170	st		%f1, [$ivec + 4]
171.Lcbc_abort:
172	retl
173	nop
174
175.align	16
1762:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
177						! and ~4x deterioration
178						! in inp==out case
179	faligndata	%f0, %f0, %f2		! handle unaligned output
180
181	stda		%f2, [$out + $omask]0xc0	! partial store
182	add		$out, 8, $out
183	orn		%g0, $omask, $omask
184	stda		%f2, [$out + $omask]0xc0	! partial store
185
186	brnz,pt		$len, .Ldes_cbc_enc_loop+4
187	orn		%g0, $omask, $omask
188
189	st		%f0, [$ivec + 0]	! write out ivec
190	retl
191	st		%f1, [$ivec + 4]
192.type	des_t4_cbc_encrypt,#function
193.size	des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
194
195.globl	des_t4_cbc_decrypt
196.align	32
197des_t4_cbc_decrypt:
198	cmp		$len, 0
199	be,pn		$::size_t_cc, .Lcbc_abort
200	srln		$len, 0, $len		! needed on v8+, "nop" on v9
201	ld		[$ivec + 0], %f2	! load ivec
202	ld		[$ivec + 4], %f3
203
204	and		$inp, 7, $ileft
205	andn		$inp, 7, $inp
206	sll		$ileft, 3, $ileft
207	mov		0xff, $omask
208	prefetch	[$inp], 20
209	prefetch	[$inp + 63], 20
210	sub		%g0, $ileft, $iright
211	and		$out, 7, %g4
212	alignaddrl	$out, %g0, $out
213	srl		$omask, %g4, $omask
214	srlx		$len, 3, $len
215	movrz		%g4, 0, $omask
216	prefetch	[$out], 22
217
218	ldd		[$key + 0x78], %f4	! load key schedule
219	ldd		[$key + 0x70], %f6
220	ldd		[$key + 0x68], %f8
221	ldd		[$key + 0x60], %f10
222	ldd		[$key + 0x58], %f12
223	ldd		[$key + 0x50], %f14
224	ldd		[$key + 0x48], %f16
225	ldd		[$key + 0x40], %f18
226	ldd		[$key + 0x38], %f20
227	ldd		[$key + 0x30], %f22
228	ldd		[$key + 0x28], %f24
229	ldd		[$key + 0x20], %f26
230	ldd		[$key + 0x18], %f28
231	ldd		[$key + 0x10], %f30
232	ldd		[$key + 0x08], %f32
233	ldd		[$key + 0x00], %f34
234
235.Ldes_cbc_dec_loop:
236	ldx		[$inp + 0], %g4
237	brz,pt		$ileft, 4f
238	nop
239
240	ldx		[$inp + 8], %g5
241	sllx		%g4, $ileft, %g4
242	srlx		%g5, $iright, %g5
243	or		%g5, %g4, %g4
2444:
245	movxtod		%g4, %f0
246	prefetch	[$inp + 8+63], 20
247	add		$inp, 8, $inp
248	prefetch	[$out + 63], 22
249
250	des_ip		%f0, %f0
251	des_round	%f4, %f6, %f0, %f0
252	des_round	%f8, %f10, %f0, %f0
253	des_round	%f12, %f14, %f0, %f0
254	des_round	%f16, %f18, %f0, %f0
255	des_round	%f20, %f22, %f0, %f0
256	des_round	%f24, %f26, %f0, %f0
257	des_round	%f28, %f30, %f0, %f0
258	des_round	%f32, %f34, %f0, %f0
259	des_iip		%f0, %f0
260
261	fxor		%f2, %f0, %f0		! ^= ivec
262	movxtod		%g4, %f2
263
264	brnz,pn		$omask, 2f
265	sub		$len, 1, $len
266
267	std		%f0, [$out + 0]
268	brnz,pt		$len, .Ldes_cbc_dec_loop
269	add		$out, 8, $out
270
271	st		%f2, [$ivec + 0]	! write out ivec
272	retl
273	st		%f3, [$ivec + 4]
274
275.align	16
2762:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
277						! and ~4x deterioration
278						! in inp==out case
279	faligndata	%f0, %f0, %f0		! handle unaligned output
280
281	stda		%f0, [$out + $omask]0xc0	! partial store
282	add		$out, 8, $out
283	orn		%g0, $omask, $omask
284	stda		%f0, [$out + $omask]0xc0	! partial store
285
286	brnz,pt		$len, .Ldes_cbc_dec_loop+4
287	orn		%g0, $omask, $omask
288
289	st		%f2, [$ivec + 0]	! write out ivec
290	retl
291	st		%f3, [$ivec + 4]
292.type	des_t4_cbc_decrypt,#function
293.size	des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
294___
295
296# One might wonder why does one have back-to-back des_iip/des_ip
297# pairs between EDE passes. Indeed, aren't they inverse of each other?
298# They almost are. Outcome of the pair is 32-bit words being swapped
299# in target register. Consider pair of des_iip/des_ip as a way to
300# perform the due swap, it's actually fastest way in this case.
301
302$code.=<<___;
303.globl	des_t4_ede3_cbc_encrypt
304.align	32
305des_t4_ede3_cbc_encrypt:
306	cmp		$len, 0
307	be,pn		$::size_t_cc, .Lcbc_abort
308	srln		$len, 0, $len		! needed on v8+, "nop" on v9
309	ld		[$ivec + 0], %f0	! load ivec
310	ld		[$ivec + 4], %f1
311
312	and		$inp, 7, $ileft
313	andn		$inp, 7, $inp
314	sll		$ileft, 3, $ileft
315	mov		0xff, $omask
316	prefetch	[$inp], 20
317	prefetch	[$inp + 63], 20
318	sub		%g0, $ileft, $iright
319	and		$out, 7, %g4
320	alignaddrl	$out, %g0, $out
321	srl		$omask, %g4, $omask
322	srlx		$len, 3, $len
323	movrz		%g4, 0, $omask
324	prefetch	[$out], 22
325
326	ldd		[$key + 0x00], %f4	! load key schedule
327	ldd		[$key + 0x08], %f6
328	ldd		[$key + 0x10], %f8
329	ldd		[$key + 0x18], %f10
330	ldd		[$key + 0x20], %f12
331	ldd		[$key + 0x28], %f14
332	ldd		[$key + 0x30], %f16
333	ldd		[$key + 0x38], %f18
334	ldd		[$key + 0x40], %f20
335	ldd		[$key + 0x48], %f22
336	ldd		[$key + 0x50], %f24
337	ldd		[$key + 0x58], %f26
338	ldd		[$key + 0x60], %f28
339	ldd		[$key + 0x68], %f30
340	ldd		[$key + 0x70], %f32
341	ldd		[$key + 0x78], %f34
342
343.Ldes_ede3_cbc_enc_loop:
344	ldx		[$inp + 0], %g4
345	brz,pt		$ileft, 4f
346	nop
347
348	ldx		[$inp + 8], %g5
349	sllx		%g4, $ileft, %g4
350	srlx		%g5, $iright, %g5
351	or		%g5, %g4, %g4
3524:
353	movxtod		%g4, %f2
354	prefetch	[$inp + 8+63], 20
355	add		$inp, 8, $inp
356	fxor		%f2, %f0, %f0		! ^= ivec
357	prefetch	[$out + 63], 22
358
359	des_ip		%f0, %f0
360	des_round	%f4, %f6, %f0, %f0
361	des_round	%f8, %f10, %f0, %f0
362	des_round	%f12, %f14, %f0, %f0
363	des_round	%f16, %f18, %f0, %f0
364	ldd		[$key + 0x100-0x08], %f36
365	ldd		[$key + 0x100-0x10], %f38
366	des_round	%f20, %f22, %f0, %f0
367	ldd		[$key + 0x100-0x18], %f40
368	ldd		[$key + 0x100-0x20], %f42
369	des_round	%f24, %f26, %f0, %f0
370	ldd		[$key + 0x100-0x28], %f44
371	ldd		[$key + 0x100-0x30], %f46
372	des_round	%f28, %f30, %f0, %f0
373	ldd		[$key + 0x100-0x38], %f48
374	ldd		[$key + 0x100-0x40], %f50
375	des_round	%f32, %f34, %f0, %f0
376	ldd		[$key + 0x100-0x48], %f52
377	ldd		[$key + 0x100-0x50], %f54
378	des_iip		%f0, %f0
379
380	ldd		[$key + 0x100-0x58], %f56
381	ldd		[$key + 0x100-0x60], %f58
382	des_ip		%f0, %f0
383	ldd		[$key + 0x100-0x68], %f60
384	ldd		[$key + 0x100-0x70], %f62
385	des_round	%f36, %f38, %f0, %f0
386	ldd		[$key + 0x100-0x78], %f36
387	ldd		[$key + 0x100-0x80], %f38
388	des_round	%f40, %f42, %f0, %f0
389	des_round	%f44, %f46, %f0, %f0
390	des_round	%f48, %f50, %f0, %f0
391	ldd		[$key + 0x100+0x00], %f40
392	ldd		[$key + 0x100+0x08], %f42
393	des_round	%f52, %f54, %f0, %f0
394	ldd		[$key + 0x100+0x10], %f44
395	ldd		[$key + 0x100+0x18], %f46
396	des_round	%f56, %f58, %f0, %f0
397	ldd		[$key + 0x100+0x20], %f48
398	ldd		[$key + 0x100+0x28], %f50
399	des_round	%f60, %f62, %f0, %f0
400	ldd		[$key + 0x100+0x30], %f52
401	ldd		[$key + 0x100+0x38], %f54
402	des_round	%f36, %f38, %f0, %f0
403	ldd		[$key + 0x100+0x40], %f56
404	ldd		[$key + 0x100+0x48], %f58
405	des_iip		%f0, %f0
406
407	ldd		[$key + 0x100+0x50], %f60
408	ldd		[$key + 0x100+0x58], %f62
409	des_ip		%f0, %f0
410	ldd		[$key + 0x100+0x60], %f36
411	ldd		[$key + 0x100+0x68], %f38
412	des_round	%f40, %f42, %f0, %f0
413	ldd		[$key + 0x100+0x70], %f40
414	ldd		[$key + 0x100+0x78], %f42
415	des_round	%f44, %f46, %f0, %f0
416	des_round	%f48, %f50, %f0, %f0
417	des_round	%f52, %f54, %f0, %f0
418	des_round	%f56, %f58, %f0, %f0
419	des_round	%f60, %f62, %f0, %f0
420	des_round	%f36, %f38, %f0, %f0
421	des_round	%f40, %f42, %f0, %f0
422	des_iip		%f0, %f0
423
424	brnz,pn		$omask, 2f
425	sub		$len, 1, $len
426
427	std		%f0, [$out + 0]
428	brnz,pt		$len, .Ldes_ede3_cbc_enc_loop
429	add		$out, 8, $out
430
431	st		%f0, [$ivec + 0]	! write out ivec
432	retl
433	st		%f1, [$ivec + 4]
434
435.align	16
4362:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
437						! and ~2x deterioration
438						! in inp==out case
439	faligndata	%f0, %f0, %f2		! handle unaligned output
440
441	stda		%f2, [$out + $omask]0xc0	! partial store
442	add		$out, 8, $out
443	orn		%g0, $omask, $omask
444	stda		%f2, [$out + $omask]0xc0	! partial store
445
446	brnz,pt		$len, .Ldes_ede3_cbc_enc_loop+4
447	orn		%g0, $omask, $omask
448
449	st		%f0, [$ivec + 0]	! write out ivec
450	retl
451	st		%f1, [$ivec + 4]
452.type	des_t4_ede3_cbc_encrypt,#function
453.size	des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
454
455.globl	des_t4_ede3_cbc_decrypt
456.align	32
457des_t4_ede3_cbc_decrypt:
458	cmp		$len, 0
459	be,pn		$::size_t_cc, .Lcbc_abort
460	srln		$len, 0, $len		! needed on v8+, "nop" on v9
461	ld		[$ivec + 0], %f2	! load ivec
462	ld		[$ivec + 4], %f3
463
464	and		$inp, 7, $ileft
465	andn		$inp, 7, $inp
466	sll		$ileft, 3, $ileft
467	mov		0xff, $omask
468	prefetch	[$inp], 20
469	prefetch	[$inp + 63], 20
470	sub		%g0, $ileft, $iright
471	and		$out, 7, %g4
472	alignaddrl	$out, %g0, $out
473	srl		$omask, %g4, $omask
474	srlx		$len, 3, $len
475	movrz		%g4, 0, $omask
476	prefetch	[$out], 22
477
478	ldd		[$key + 0x100+0x78], %f4	! load key schedule
479	ldd		[$key + 0x100+0x70], %f6
480	ldd		[$key + 0x100+0x68], %f8
481	ldd		[$key + 0x100+0x60], %f10
482	ldd		[$key + 0x100+0x58], %f12
483	ldd		[$key + 0x100+0x50], %f14
484	ldd		[$key + 0x100+0x48], %f16
485	ldd		[$key + 0x100+0x40], %f18
486	ldd		[$key + 0x100+0x38], %f20
487	ldd		[$key + 0x100+0x30], %f22
488	ldd		[$key + 0x100+0x28], %f24
489	ldd		[$key + 0x100+0x20], %f26
490	ldd		[$key + 0x100+0x18], %f28
491	ldd		[$key + 0x100+0x10], %f30
492	ldd		[$key + 0x100+0x08], %f32
493	ldd		[$key + 0x100+0x00], %f34
494
495.Ldes_ede3_cbc_dec_loop:
496	ldx		[$inp + 0], %g4
497	brz,pt		$ileft, 4f
498	nop
499
500	ldx		[$inp + 8], %g5
501	sllx		%g4, $ileft, %g4
502	srlx		%g5, $iright, %g5
503	or		%g5, %g4, %g4
5044:
505	movxtod		%g4, %f0
506	prefetch	[$inp + 8+63], 20
507	add		$inp, 8, $inp
508	prefetch	[$out + 63], 22
509
510	des_ip		%f0, %f0
511	des_round	%f4, %f6, %f0, %f0
512	des_round	%f8, %f10, %f0, %f0
513	des_round	%f12, %f14, %f0, %f0
514	des_round	%f16, %f18, %f0, %f0
515	ldd		[$key + 0x80+0x00], %f36
516	ldd		[$key + 0x80+0x08], %f38
517	des_round	%f20, %f22, %f0, %f0
518	ldd		[$key + 0x80+0x10], %f40
519	ldd		[$key + 0x80+0x18], %f42
520	des_round	%f24, %f26, %f0, %f0
521	ldd		[$key + 0x80+0x20], %f44
522	ldd		[$key + 0x80+0x28], %f46
523	des_round	%f28, %f30, %f0, %f0
524	ldd		[$key + 0x80+0x30], %f48
525	ldd		[$key + 0x80+0x38], %f50
526	des_round	%f32, %f34, %f0, %f0
527	ldd		[$key + 0x80+0x40], %f52
528	ldd		[$key + 0x80+0x48], %f54
529	des_iip		%f0, %f0
530
531	ldd		[$key + 0x80+0x50], %f56
532	ldd		[$key + 0x80+0x58], %f58
533	des_ip		%f0, %f0
534	ldd		[$key + 0x80+0x60], %f60
535	ldd		[$key + 0x80+0x68], %f62
536	des_round	%f36, %f38, %f0, %f0
537	ldd		[$key + 0x80+0x70], %f36
538	ldd		[$key + 0x80+0x78], %f38
539	des_round	%f40, %f42, %f0, %f0
540	des_round	%f44, %f46, %f0, %f0
541	des_round	%f48, %f50, %f0, %f0
542	ldd		[$key + 0x80-0x08], %f40
543	ldd		[$key + 0x80-0x10], %f42
544	des_round	%f52, %f54, %f0, %f0
545	ldd		[$key + 0x80-0x18], %f44
546	ldd		[$key + 0x80-0x20], %f46
547	des_round	%f56, %f58, %f0, %f0
548	ldd		[$key + 0x80-0x28], %f48
549	ldd		[$key + 0x80-0x30], %f50
550	des_round	%f60, %f62, %f0, %f0
551	ldd		[$key + 0x80-0x38], %f52
552	ldd		[$key + 0x80-0x40], %f54
553	des_round	%f36, %f38, %f0, %f0
554	ldd		[$key + 0x80-0x48], %f56
555	ldd		[$key + 0x80-0x50], %f58
556	des_iip		%f0, %f0
557
558	ldd		[$key + 0x80-0x58], %f60
559	ldd		[$key + 0x80-0x60], %f62
560	des_ip		%f0, %f0
561	ldd		[$key + 0x80-0x68], %f36
562	ldd		[$key + 0x80-0x70], %f38
563	des_round	%f40, %f42, %f0, %f0
564	ldd		[$key + 0x80-0x78], %f40
565	ldd		[$key + 0x80-0x80], %f42
566	des_round	%f44, %f46, %f0, %f0
567	des_round	%f48, %f50, %f0, %f0
568	des_round	%f52, %f54, %f0, %f0
569	des_round	%f56, %f58, %f0, %f0
570	des_round	%f60, %f62, %f0, %f0
571	des_round	%f36, %f38, %f0, %f0
572	des_round	%f40, %f42, %f0, %f0
573	des_iip		%f0, %f0
574
575	fxor		%f2, %f0, %f0		! ^= ivec
576	movxtod		%g4, %f2
577
578	brnz,pn		$omask, 2f
579	sub		$len, 1, $len
580
581	std		%f0, [$out + 0]
582	brnz,pt		$len, .Ldes_ede3_cbc_dec_loop
583	add		$out, 8, $out
584
585	st		%f2, [$ivec + 0]	! write out ivec
586	retl
587	st		%f3, [$ivec + 4]
588
589.align	16
5902:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
591						! and ~3x deterioration
592						! in inp==out case
593	faligndata	%f0, %f0, %f0		! handle unaligned output
594
595	stda		%f0, [$out + $omask]0xc0	! partial store
596	add		$out, 8, $out
597	orn		%g0, $omask, $omask
598	stda		%f0, [$out + $omask]0xc0	! partial store
599
600	brnz,pt		$len, .Ldes_ede3_cbc_dec_loop+4
601	orn		%g0, $omask, $omask
602
603	st		%f2, [$ivec + 0]	! write out ivec
604	retl
605	st		%f3, [$ivec + 4]
606.type	des_t4_ede3_cbc_decrypt,#function
607.size	des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
608___
609}
610$code.=<<___;
611.asciz  "DES for SPARC T4, David S. Miller, Andy Polyakov"
612.align  4
613___
614
615&emit_assembler();
616
617close STDOUT;
618