1289848Sjkim#!/usr/bin/env perl
2289848Sjkim
3289848Sjkim# ====================================================================
4289848Sjkim# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
5289848Sjkim# <appro@openssl.org>. The module is licensed under 2-clause BSD
6289848Sjkim# license. March 2013. All rights reserved.
7289848Sjkim# ====================================================================
8289848Sjkim
9289848Sjkim######################################################################
10289848Sjkim# DES for SPARC T4.
11289848Sjkim#
12289848Sjkim# As with other hardware-assisted ciphers CBC encrypt results [for
13289848Sjkim# aligned data] are virtually identical to critical path lengths:
14289848Sjkim#
15289848Sjkim#		DES		Triple-DES
16289848Sjkim# CBC encrypt	4.14/4.15(*)	11.7/11.7
17289848Sjkim# CBC decrypt	1.77/4.11(**)	6.42/7.47
18289848Sjkim#
19289848Sjkim#			 (*)	numbers after slash are for
20289848Sjkim#				misaligned data;
21289848Sjkim#			 (**)	this is result for largest
22289848Sjkim#				block size, unlike all other
23289848Sjkim#				cases smaller blocks results
24289848Sjkim#				are better[?];
25289848Sjkim
26289848Sjkim$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
27289848Sjkimpush(@INC,"${dir}","${dir}../../perlasm");
28289848Sjkimrequire "sparcv9_modes.pl";
29289848Sjkim
30289848Sjkim&asm_init(@ARGV);
31289848Sjkim
32289848Sjkim$code.=<<___ if ($::abibits==64);
33289848Sjkim.register       %g2,#scratch
34289848Sjkim.register       %g3,#scratch
35289848Sjkim___
36289848Sjkim
37289848Sjkim$code.=<<___;
38289848Sjkim.text
39289848Sjkim___
40289848Sjkim
41289848Sjkim{ my ($inp,$out)=("%o0","%o1");
42289848Sjkim
43289848Sjkim$code.=<<___;
44289848Sjkim.align	32
45289848Sjkim.globl	des_t4_key_expand
46289848Sjkim.type	des_t4_key_expand,#function
47289848Sjkimdes_t4_key_expand:
48289848Sjkim	andcc		$inp, 0x7, %g0
49289848Sjkim	alignaddr	$inp, %g0, $inp
50289848Sjkim	bz,pt		%icc, 1f
51289848Sjkim	ldd		[$inp + 0x00], %f0
52289848Sjkim	ldd		[$inp + 0x08], %f2
53289848Sjkim	faligndata	%f0, %f2, %f0
54289848Sjkim1:	des_kexpand	%f0, 0, %f0
55289848Sjkim	des_kexpand	%f0, 1, %f2
56289848Sjkim	std		%f0, [$out + 0x00]
57289848Sjkim	des_kexpand	%f2, 3, %f6
58289848Sjkim	std		%f2, [$out + 0x08]
59289848Sjkim	des_kexpand	%f2, 2, %f4
60289848Sjkim	des_kexpand	%f6, 3, %f10
61289848Sjkim	std		%f6, [$out + 0x18]
62289848Sjkim	des_kexpand	%f6, 2, %f8
63289848Sjkim	std		%f4, [$out + 0x10]
64289848Sjkim	des_kexpand	%f10, 3, %f14
65289848Sjkim	std		%f10, [$out + 0x28]
66289848Sjkim	des_kexpand	%f10, 2, %f12
67289848Sjkim	std		%f8, [$out + 0x20]
68289848Sjkim	des_kexpand	%f14, 1, %f16
69289848Sjkim	std		%f14, [$out + 0x38]
70289848Sjkim	des_kexpand	%f16, 3, %f20
71289848Sjkim	std		%f12, [$out + 0x30]
72289848Sjkim	des_kexpand	%f16, 2, %f18
73289848Sjkim	std		%f16, [$out + 0x40]
74289848Sjkim	des_kexpand	%f20, 3, %f24
75289848Sjkim	std		%f20, [$out + 0x50]
76289848Sjkim	des_kexpand	%f20, 2, %f22
77289848Sjkim	std		%f18, [$out + 0x48]
78289848Sjkim	des_kexpand	%f24, 3, %f28
79289848Sjkim	std		%f24, [$out + 0x60]
80289848Sjkim	des_kexpand	%f24, 2, %f26
81289848Sjkim	std		%f22, [$out + 0x58]
82289848Sjkim	des_kexpand	%f28, 1, %f30
83289848Sjkim	std		%f28, [$out + 0x70]
84289848Sjkim	std		%f26, [$out + 0x68]
85289848Sjkim	retl
86289848Sjkim	std		%f30, [$out + 0x78]
87289848Sjkim.size	des_t4_key_expand,.-des_t4_key_expand
88289848Sjkim___
89289848Sjkim}
90289848Sjkim{ my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
91289848Sjkim  my ($ileft,$iright,$omask) = map("%g$_",(1..3));
92289848Sjkim
93289848Sjkim$code.=<<___;
94289848Sjkim.globl	des_t4_cbc_encrypt
95289848Sjkim.align	32
96289848Sjkimdes_t4_cbc_encrypt:
97289848Sjkim	cmp		$len, 0
98289848Sjkim	be,pn		$::size_t_cc, .Lcbc_abort
99306198Sjkim	srln		$len, 0, $len		! needed on v8+, "nop" on v9
100289848Sjkim	ld		[$ivec + 0], %f0	! load ivec
101289848Sjkim	ld		[$ivec + 4], %f1
102289848Sjkim
103289848Sjkim	and		$inp, 7, $ileft
104289848Sjkim	andn		$inp, 7, $inp
105289848Sjkim	sll		$ileft, 3, $ileft
106289848Sjkim	mov		0xff, $omask
107289848Sjkim	prefetch	[$inp], 20
108289848Sjkim	prefetch	[$inp + 63], 20
109289848Sjkim	sub		%g0, $ileft, $iright
110289848Sjkim	and		$out, 7, %g4
111289848Sjkim	alignaddrl	$out, %g0, $out
112289848Sjkim	srl		$omask, %g4, $omask
113289848Sjkim	srlx		$len, 3, $len
114289848Sjkim	movrz		%g4, 0, $omask
115289848Sjkim	prefetch	[$out], 22
116289848Sjkim
117289848Sjkim	ldd		[$key + 0x00], %f4	! load key schedule
118289848Sjkim	ldd		[$key + 0x08], %f6
119289848Sjkim	ldd		[$key + 0x10], %f8
120289848Sjkim	ldd		[$key + 0x18], %f10
121289848Sjkim	ldd		[$key + 0x20], %f12
122289848Sjkim	ldd		[$key + 0x28], %f14
123289848Sjkim	ldd		[$key + 0x30], %f16
124289848Sjkim	ldd		[$key + 0x38], %f18
125289848Sjkim	ldd		[$key + 0x40], %f20
126289848Sjkim	ldd		[$key + 0x48], %f22
127289848Sjkim	ldd		[$key + 0x50], %f24
128289848Sjkim	ldd		[$key + 0x58], %f26
129289848Sjkim	ldd		[$key + 0x60], %f28
130289848Sjkim	ldd		[$key + 0x68], %f30
131289848Sjkim	ldd		[$key + 0x70], %f32
132289848Sjkim	ldd		[$key + 0x78], %f34
133289848Sjkim
134289848Sjkim.Ldes_cbc_enc_loop:
135289848Sjkim	ldx		[$inp + 0], %g4
136289848Sjkim	brz,pt		$ileft, 4f
137289848Sjkim	nop
138289848Sjkim
139289848Sjkim	ldx		[$inp + 8], %g5
140289848Sjkim	sllx		%g4, $ileft, %g4
141289848Sjkim	srlx		%g5, $iright, %g5
142289848Sjkim	or		%g5, %g4, %g4
143289848Sjkim4:
144289848Sjkim	movxtod		%g4, %f2
145289848Sjkim	prefetch	[$inp + 8+63], 20
146289848Sjkim	add		$inp, 8, $inp
147289848Sjkim	fxor		%f2, %f0, %f0		! ^= ivec
148289848Sjkim	prefetch	[$out + 63], 22
149289848Sjkim
150289848Sjkim	des_ip		%f0, %f0
151289848Sjkim	des_round	%f4, %f6, %f0, %f0
152289848Sjkim	des_round	%f8, %f10, %f0, %f0
153289848Sjkim	des_round	%f12, %f14, %f0, %f0
154289848Sjkim	des_round	%f16, %f18, %f0, %f0
155289848Sjkim	des_round	%f20, %f22, %f0, %f0
156289848Sjkim	des_round	%f24, %f26, %f0, %f0
157289848Sjkim	des_round	%f28, %f30, %f0, %f0
158289848Sjkim	des_round	%f32, %f34, %f0, %f0
159289848Sjkim	des_iip		%f0, %f0
160289848Sjkim
161289848Sjkim	brnz,pn		$omask, 2f
162289848Sjkim	sub		$len, 1, $len
163289848Sjkim
164289848Sjkim	std		%f0, [$out + 0]
165289848Sjkim	brnz,pt		$len, .Ldes_cbc_enc_loop
166289848Sjkim	add		$out, 8, $out
167289848Sjkim
168289848Sjkim	st		%f0, [$ivec + 0]	! write out ivec
169289848Sjkim	retl
170289848Sjkim	st		%f1, [$ivec + 4]
171289848Sjkim.Lcbc_abort:
172289848Sjkim	retl
173289848Sjkim	nop
174289848Sjkim
175289848Sjkim.align	16
176289848Sjkim2:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
177289848Sjkim						! and ~4x deterioration
178289848Sjkim						! in inp==out case
179289848Sjkim	faligndata	%f0, %f0, %f2		! handle unaligned output
180289848Sjkim
181289848Sjkim	stda		%f2, [$out + $omask]0xc0	! partial store
182289848Sjkim	add		$out, 8, $out
183289848Sjkim	orn		%g0, $omask, $omask
184289848Sjkim	stda		%f2, [$out + $omask]0xc0	! partial store
185289848Sjkim
186289848Sjkim	brnz,pt		$len, .Ldes_cbc_enc_loop+4
187289848Sjkim	orn		%g0, $omask, $omask
188289848Sjkim
189289848Sjkim	st		%f0, [$ivec + 0]	! write out ivec
190289848Sjkim	retl
191289848Sjkim	st		%f1, [$ivec + 4]
192289848Sjkim.type	des_t4_cbc_encrypt,#function
193289848Sjkim.size	des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
194289848Sjkim
195289848Sjkim.globl	des_t4_cbc_decrypt
196289848Sjkim.align	32
197289848Sjkimdes_t4_cbc_decrypt:
198289848Sjkim	cmp		$len, 0
199289848Sjkim	be,pn		$::size_t_cc, .Lcbc_abort
200306198Sjkim	srln		$len, 0, $len		! needed on v8+, "nop" on v9
201289848Sjkim	ld		[$ivec + 0], %f2	! load ivec
202289848Sjkim	ld		[$ivec + 4], %f3
203289848Sjkim
204289848Sjkim	and		$inp, 7, $ileft
205289848Sjkim	andn		$inp, 7, $inp
206289848Sjkim	sll		$ileft, 3, $ileft
207289848Sjkim	mov		0xff, $omask
208289848Sjkim	prefetch	[$inp], 20
209289848Sjkim	prefetch	[$inp + 63], 20
210289848Sjkim	sub		%g0, $ileft, $iright
211289848Sjkim	and		$out, 7, %g4
212289848Sjkim	alignaddrl	$out, %g0, $out
213289848Sjkim	srl		$omask, %g4, $omask
214289848Sjkim	srlx		$len, 3, $len
215289848Sjkim	movrz		%g4, 0, $omask
216289848Sjkim	prefetch	[$out], 22
217289848Sjkim
218289848Sjkim	ldd		[$key + 0x78], %f4	! load key schedule
219289848Sjkim	ldd		[$key + 0x70], %f6
220289848Sjkim	ldd		[$key + 0x68], %f8
221289848Sjkim	ldd		[$key + 0x60], %f10
222289848Sjkim	ldd		[$key + 0x58], %f12
223289848Sjkim	ldd		[$key + 0x50], %f14
224289848Sjkim	ldd		[$key + 0x48], %f16
225289848Sjkim	ldd		[$key + 0x40], %f18
226289848Sjkim	ldd		[$key + 0x38], %f20
227289848Sjkim	ldd		[$key + 0x30], %f22
228289848Sjkim	ldd		[$key + 0x28], %f24
229289848Sjkim	ldd		[$key + 0x20], %f26
230289848Sjkim	ldd		[$key + 0x18], %f28
231289848Sjkim	ldd		[$key + 0x10], %f30
232289848Sjkim	ldd		[$key + 0x08], %f32
233289848Sjkim	ldd		[$key + 0x00], %f34
234289848Sjkim
235289848Sjkim.Ldes_cbc_dec_loop:
236289848Sjkim	ldx		[$inp + 0], %g4
237289848Sjkim	brz,pt		$ileft, 4f
238289848Sjkim	nop
239289848Sjkim
240289848Sjkim	ldx		[$inp + 8], %g5
241289848Sjkim	sllx		%g4, $ileft, %g4
242289848Sjkim	srlx		%g5, $iright, %g5
243289848Sjkim	or		%g5, %g4, %g4
244289848Sjkim4:
245289848Sjkim	movxtod		%g4, %f0
246289848Sjkim	prefetch	[$inp + 8+63], 20
247289848Sjkim	add		$inp, 8, $inp
248289848Sjkim	prefetch	[$out + 63], 22
249289848Sjkim
250289848Sjkim	des_ip		%f0, %f0
251289848Sjkim	des_round	%f4, %f6, %f0, %f0
252289848Sjkim	des_round	%f8, %f10, %f0, %f0
253289848Sjkim	des_round	%f12, %f14, %f0, %f0
254289848Sjkim	des_round	%f16, %f18, %f0, %f0
255289848Sjkim	des_round	%f20, %f22, %f0, %f0
256289848Sjkim	des_round	%f24, %f26, %f0, %f0
257289848Sjkim	des_round	%f28, %f30, %f0, %f0
258289848Sjkim	des_round	%f32, %f34, %f0, %f0
259289848Sjkim	des_iip		%f0, %f0
260289848Sjkim
261289848Sjkim	fxor		%f2, %f0, %f0		! ^= ivec
262289848Sjkim	movxtod		%g4, %f2
263289848Sjkim
264289848Sjkim	brnz,pn		$omask, 2f
265289848Sjkim	sub		$len, 1, $len
266289848Sjkim
267289848Sjkim	std		%f0, [$out + 0]
268289848Sjkim	brnz,pt		$len, .Ldes_cbc_dec_loop
269289848Sjkim	add		$out, 8, $out
270289848Sjkim
271289848Sjkim	st		%f2, [$ivec + 0]	! write out ivec
272289848Sjkim	retl
273289848Sjkim	st		%f3, [$ivec + 4]
274289848Sjkim
275289848Sjkim.align	16
276289848Sjkim2:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
277289848Sjkim						! and ~4x deterioration
278289848Sjkim						! in inp==out case
279289848Sjkim	faligndata	%f0, %f0, %f0		! handle unaligned output
280289848Sjkim
281289848Sjkim	stda		%f0, [$out + $omask]0xc0	! partial store
282289848Sjkim	add		$out, 8, $out
283289848Sjkim	orn		%g0, $omask, $omask
284289848Sjkim	stda		%f0, [$out + $omask]0xc0	! partial store
285289848Sjkim
286289848Sjkim	brnz,pt		$len, .Ldes_cbc_dec_loop+4
287289848Sjkim	orn		%g0, $omask, $omask
288289848Sjkim
289289848Sjkim	st		%f2, [$ivec + 0]	! write out ivec
290289848Sjkim	retl
291289848Sjkim	st		%f3, [$ivec + 4]
292289848Sjkim.type	des_t4_cbc_decrypt,#function
293289848Sjkim.size	des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
294289848Sjkim___
295289848Sjkim
296289848Sjkim# One might wonder why does one have back-to-back des_iip/des_ip
297289848Sjkim# pairs between EDE passes. Indeed, aren't they inverse of each other?
298289848Sjkim# They almost are. Outcome of the pair is 32-bit words being swapped
299289848Sjkim# in target register. Consider pair of des_iip/des_ip as a way to
300289848Sjkim# perform the due swap, it's actually fastest way in this case.
301289848Sjkim
302289848Sjkim$code.=<<___;
303289848Sjkim.globl	des_t4_ede3_cbc_encrypt
304289848Sjkim.align	32
305289848Sjkimdes_t4_ede3_cbc_encrypt:
306289848Sjkim	cmp		$len, 0
307289848Sjkim	be,pn		$::size_t_cc, .Lcbc_abort
308306198Sjkim	srln		$len, 0, $len		! needed on v8+, "nop" on v9
309289848Sjkim	ld		[$ivec + 0], %f0	! load ivec
310289848Sjkim	ld		[$ivec + 4], %f1
311289848Sjkim
312289848Sjkim	and		$inp, 7, $ileft
313289848Sjkim	andn		$inp, 7, $inp
314289848Sjkim	sll		$ileft, 3, $ileft
315289848Sjkim	mov		0xff, $omask
316289848Sjkim	prefetch	[$inp], 20
317289848Sjkim	prefetch	[$inp + 63], 20
318289848Sjkim	sub		%g0, $ileft, $iright
319289848Sjkim	and		$out, 7, %g4
320289848Sjkim	alignaddrl	$out, %g0, $out
321289848Sjkim	srl		$omask, %g4, $omask
322289848Sjkim	srlx		$len, 3, $len
323289848Sjkim	movrz		%g4, 0, $omask
324289848Sjkim	prefetch	[$out], 22
325289848Sjkim
326289848Sjkim	ldd		[$key + 0x00], %f4	! load key schedule
327289848Sjkim	ldd		[$key + 0x08], %f6
328289848Sjkim	ldd		[$key + 0x10], %f8
329289848Sjkim	ldd		[$key + 0x18], %f10
330289848Sjkim	ldd		[$key + 0x20], %f12
331289848Sjkim	ldd		[$key + 0x28], %f14
332289848Sjkim	ldd		[$key + 0x30], %f16
333289848Sjkim	ldd		[$key + 0x38], %f18
334289848Sjkim	ldd		[$key + 0x40], %f20
335289848Sjkim	ldd		[$key + 0x48], %f22
336289848Sjkim	ldd		[$key + 0x50], %f24
337289848Sjkim	ldd		[$key + 0x58], %f26
338289848Sjkim	ldd		[$key + 0x60], %f28
339289848Sjkim	ldd		[$key + 0x68], %f30
340289848Sjkim	ldd		[$key + 0x70], %f32
341289848Sjkim	ldd		[$key + 0x78], %f34
342289848Sjkim
343289848Sjkim.Ldes_ede3_cbc_enc_loop:
344289848Sjkim	ldx		[$inp + 0], %g4
345289848Sjkim	brz,pt		$ileft, 4f
346289848Sjkim	nop
347289848Sjkim
348289848Sjkim	ldx		[$inp + 8], %g5
349289848Sjkim	sllx		%g4, $ileft, %g4
350289848Sjkim	srlx		%g5, $iright, %g5
351289848Sjkim	or		%g5, %g4, %g4
352289848Sjkim4:
353289848Sjkim	movxtod		%g4, %f2
354289848Sjkim	prefetch	[$inp + 8+63], 20
355289848Sjkim	add		$inp, 8, $inp
356289848Sjkim	fxor		%f2, %f0, %f0		! ^= ivec
357289848Sjkim	prefetch	[$out + 63], 22
358289848Sjkim
359289848Sjkim	des_ip		%f0, %f0
360289848Sjkim	des_round	%f4, %f6, %f0, %f0
361289848Sjkim	des_round	%f8, %f10, %f0, %f0
362289848Sjkim	des_round	%f12, %f14, %f0, %f0
363289848Sjkim	des_round	%f16, %f18, %f0, %f0
364289848Sjkim	ldd		[$key + 0x100-0x08], %f36
365289848Sjkim	ldd		[$key + 0x100-0x10], %f38
366289848Sjkim	des_round	%f20, %f22, %f0, %f0
367289848Sjkim	ldd		[$key + 0x100-0x18], %f40
368289848Sjkim	ldd		[$key + 0x100-0x20], %f42
369289848Sjkim	des_round	%f24, %f26, %f0, %f0
370289848Sjkim	ldd		[$key + 0x100-0x28], %f44
371289848Sjkim	ldd		[$key + 0x100-0x30], %f46
372289848Sjkim	des_round	%f28, %f30, %f0, %f0
373289848Sjkim	ldd		[$key + 0x100-0x38], %f48
374289848Sjkim	ldd		[$key + 0x100-0x40], %f50
375289848Sjkim	des_round	%f32, %f34, %f0, %f0
376289848Sjkim	ldd		[$key + 0x100-0x48], %f52
377289848Sjkim	ldd		[$key + 0x100-0x50], %f54
378289848Sjkim	des_iip		%f0, %f0
379289848Sjkim
380289848Sjkim	ldd		[$key + 0x100-0x58], %f56
381289848Sjkim	ldd		[$key + 0x100-0x60], %f58
382289848Sjkim	des_ip		%f0, %f0
383289848Sjkim	ldd		[$key + 0x100-0x68], %f60
384289848Sjkim	ldd		[$key + 0x100-0x70], %f62
385289848Sjkim	des_round	%f36, %f38, %f0, %f0
386289848Sjkim	ldd		[$key + 0x100-0x78], %f36
387289848Sjkim	ldd		[$key + 0x100-0x80], %f38
388289848Sjkim	des_round	%f40, %f42, %f0, %f0
389289848Sjkim	des_round	%f44, %f46, %f0, %f0
390289848Sjkim	des_round	%f48, %f50, %f0, %f0
391289848Sjkim	ldd		[$key + 0x100+0x00], %f40
392289848Sjkim	ldd		[$key + 0x100+0x08], %f42
393289848Sjkim	des_round	%f52, %f54, %f0, %f0
394289848Sjkim	ldd		[$key + 0x100+0x10], %f44
395289848Sjkim	ldd		[$key + 0x100+0x18], %f46
396289848Sjkim	des_round	%f56, %f58, %f0, %f0
397289848Sjkim	ldd		[$key + 0x100+0x20], %f48
398289848Sjkim	ldd		[$key + 0x100+0x28], %f50
399289848Sjkim	des_round	%f60, %f62, %f0, %f0
400289848Sjkim	ldd		[$key + 0x100+0x30], %f52
401289848Sjkim	ldd		[$key + 0x100+0x38], %f54
402289848Sjkim	des_round	%f36, %f38, %f0, %f0
403289848Sjkim	ldd		[$key + 0x100+0x40], %f56
404289848Sjkim	ldd		[$key + 0x100+0x48], %f58
405289848Sjkim	des_iip		%f0, %f0
406289848Sjkim
407289848Sjkim	ldd		[$key + 0x100+0x50], %f60
408289848Sjkim	ldd		[$key + 0x100+0x58], %f62
409289848Sjkim	des_ip		%f0, %f0
410289848Sjkim	ldd		[$key + 0x100+0x60], %f36
411289848Sjkim	ldd		[$key + 0x100+0x68], %f38
412289848Sjkim	des_round	%f40, %f42, %f0, %f0
413289848Sjkim	ldd		[$key + 0x100+0x70], %f40
414289848Sjkim	ldd		[$key + 0x100+0x78], %f42
415289848Sjkim	des_round	%f44, %f46, %f0, %f0
416289848Sjkim	des_round	%f48, %f50, %f0, %f0
417289848Sjkim	des_round	%f52, %f54, %f0, %f0
418289848Sjkim	des_round	%f56, %f58, %f0, %f0
419289848Sjkim	des_round	%f60, %f62, %f0, %f0
420289848Sjkim	des_round	%f36, %f38, %f0, %f0
421289848Sjkim	des_round	%f40, %f42, %f0, %f0
422289848Sjkim	des_iip		%f0, %f0
423289848Sjkim
424289848Sjkim	brnz,pn		$omask, 2f
425289848Sjkim	sub		$len, 1, $len
426289848Sjkim
427289848Sjkim	std		%f0, [$out + 0]
428289848Sjkim	brnz,pt		$len, .Ldes_ede3_cbc_enc_loop
429289848Sjkim	add		$out, 8, $out
430289848Sjkim
431289848Sjkim	st		%f0, [$ivec + 0]	! write out ivec
432289848Sjkim	retl
433289848Sjkim	st		%f1, [$ivec + 4]
434289848Sjkim
435289848Sjkim.align	16
436289848Sjkim2:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
437289848Sjkim						! and ~2x deterioration
438289848Sjkim						! in inp==out case
439289848Sjkim	faligndata	%f0, %f0, %f2		! handle unaligned output
440289848Sjkim
441289848Sjkim	stda		%f2, [$out + $omask]0xc0	! partial store
442289848Sjkim	add		$out, 8, $out
443289848Sjkim	orn		%g0, $omask, $omask
444289848Sjkim	stda		%f2, [$out + $omask]0xc0	! partial store
445289848Sjkim
446289848Sjkim	brnz,pt		$len, .Ldes_ede3_cbc_enc_loop+4
447289848Sjkim	orn		%g0, $omask, $omask
448289848Sjkim
449289848Sjkim	st		%f0, [$ivec + 0]	! write out ivec
450289848Sjkim	retl
451289848Sjkim	st		%f1, [$ivec + 4]
452289848Sjkim.type	des_t4_ede3_cbc_encrypt,#function
453289848Sjkim.size	des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
454289848Sjkim
455289848Sjkim.globl	des_t4_ede3_cbc_decrypt
456289848Sjkim.align	32
457289848Sjkimdes_t4_ede3_cbc_decrypt:
458289848Sjkim	cmp		$len, 0
459289848Sjkim	be,pn		$::size_t_cc, .Lcbc_abort
460306198Sjkim	srln		$len, 0, $len		! needed on v8+, "nop" on v9
461289848Sjkim	ld		[$ivec + 0], %f2	! load ivec
462289848Sjkim	ld		[$ivec + 4], %f3
463289848Sjkim
464289848Sjkim	and		$inp, 7, $ileft
465289848Sjkim	andn		$inp, 7, $inp
466289848Sjkim	sll		$ileft, 3, $ileft
467289848Sjkim	mov		0xff, $omask
468289848Sjkim	prefetch	[$inp], 20
469289848Sjkim	prefetch	[$inp + 63], 20
470289848Sjkim	sub		%g0, $ileft, $iright
471289848Sjkim	and		$out, 7, %g4
472289848Sjkim	alignaddrl	$out, %g0, $out
473289848Sjkim	srl		$omask, %g4, $omask
474289848Sjkim	srlx		$len, 3, $len
475289848Sjkim	movrz		%g4, 0, $omask
476289848Sjkim	prefetch	[$out], 22
477289848Sjkim
478289848Sjkim	ldd		[$key + 0x100+0x78], %f4	! load key schedule
479289848Sjkim	ldd		[$key + 0x100+0x70], %f6
480289848Sjkim	ldd		[$key + 0x100+0x68], %f8
481289848Sjkim	ldd		[$key + 0x100+0x60], %f10
482289848Sjkim	ldd		[$key + 0x100+0x58], %f12
483289848Sjkim	ldd		[$key + 0x100+0x50], %f14
484289848Sjkim	ldd		[$key + 0x100+0x48], %f16
485289848Sjkim	ldd		[$key + 0x100+0x40], %f18
486289848Sjkim	ldd		[$key + 0x100+0x38], %f20
487289848Sjkim	ldd		[$key + 0x100+0x30], %f22
488289848Sjkim	ldd		[$key + 0x100+0x28], %f24
489289848Sjkim	ldd		[$key + 0x100+0x20], %f26
490289848Sjkim	ldd		[$key + 0x100+0x18], %f28
491289848Sjkim	ldd		[$key + 0x100+0x10], %f30
492289848Sjkim	ldd		[$key + 0x100+0x08], %f32
493289848Sjkim	ldd		[$key + 0x100+0x00], %f34
494289848Sjkim
495289848Sjkim.Ldes_ede3_cbc_dec_loop:
496289848Sjkim	ldx		[$inp + 0], %g4
497289848Sjkim	brz,pt		$ileft, 4f
498289848Sjkim	nop
499289848Sjkim
500289848Sjkim	ldx		[$inp + 8], %g5
501289848Sjkim	sllx		%g4, $ileft, %g4
502289848Sjkim	srlx		%g5, $iright, %g5
503289848Sjkim	or		%g5, %g4, %g4
504289848Sjkim4:
505289848Sjkim	movxtod		%g4, %f0
506289848Sjkim	prefetch	[$inp + 8+63], 20
507289848Sjkim	add		$inp, 8, $inp
508289848Sjkim	prefetch	[$out + 63], 22
509289848Sjkim
510289848Sjkim	des_ip		%f0, %f0
511289848Sjkim	des_round	%f4, %f6, %f0, %f0
512289848Sjkim	des_round	%f8, %f10, %f0, %f0
513289848Sjkim	des_round	%f12, %f14, %f0, %f0
514289848Sjkim	des_round	%f16, %f18, %f0, %f0
515289848Sjkim	ldd		[$key + 0x80+0x00], %f36
516289848Sjkim	ldd		[$key + 0x80+0x08], %f38
517289848Sjkim	des_round	%f20, %f22, %f0, %f0
518289848Sjkim	ldd		[$key + 0x80+0x10], %f40
519289848Sjkim	ldd		[$key + 0x80+0x18], %f42
520289848Sjkim	des_round	%f24, %f26, %f0, %f0
521289848Sjkim	ldd		[$key + 0x80+0x20], %f44
522289848Sjkim	ldd		[$key + 0x80+0x28], %f46
523289848Sjkim	des_round	%f28, %f30, %f0, %f0
524289848Sjkim	ldd		[$key + 0x80+0x30], %f48
525289848Sjkim	ldd		[$key + 0x80+0x38], %f50
526289848Sjkim	des_round	%f32, %f34, %f0, %f0
527289848Sjkim	ldd		[$key + 0x80+0x40], %f52
528289848Sjkim	ldd		[$key + 0x80+0x48], %f54
529289848Sjkim	des_iip		%f0, %f0
530289848Sjkim
531289848Sjkim	ldd		[$key + 0x80+0x50], %f56
532289848Sjkim	ldd		[$key + 0x80+0x58], %f58
533289848Sjkim	des_ip		%f0, %f0
534289848Sjkim	ldd		[$key + 0x80+0x60], %f60
535289848Sjkim	ldd		[$key + 0x80+0x68], %f62
536289848Sjkim	des_round	%f36, %f38, %f0, %f0
537289848Sjkim	ldd		[$key + 0x80+0x70], %f36
538289848Sjkim	ldd		[$key + 0x80+0x78], %f38
539289848Sjkim	des_round	%f40, %f42, %f0, %f0
540289848Sjkim	des_round	%f44, %f46, %f0, %f0
541289848Sjkim	des_round	%f48, %f50, %f0, %f0
542289848Sjkim	ldd		[$key + 0x80-0x08], %f40
543289848Sjkim	ldd		[$key + 0x80-0x10], %f42
544289848Sjkim	des_round	%f52, %f54, %f0, %f0
545289848Sjkim	ldd		[$key + 0x80-0x18], %f44
546289848Sjkim	ldd		[$key + 0x80-0x20], %f46
547289848Sjkim	des_round	%f56, %f58, %f0, %f0
548289848Sjkim	ldd		[$key + 0x80-0x28], %f48
549289848Sjkim	ldd		[$key + 0x80-0x30], %f50
550289848Sjkim	des_round	%f60, %f62, %f0, %f0
551289848Sjkim	ldd		[$key + 0x80-0x38], %f52
552289848Sjkim	ldd		[$key + 0x80-0x40], %f54
553289848Sjkim	des_round	%f36, %f38, %f0, %f0
554289848Sjkim	ldd		[$key + 0x80-0x48], %f56
555289848Sjkim	ldd		[$key + 0x80-0x50], %f58
556289848Sjkim	des_iip		%f0, %f0
557289848Sjkim
558289848Sjkim	ldd		[$key + 0x80-0x58], %f60
559289848Sjkim	ldd		[$key + 0x80-0x60], %f62
560289848Sjkim	des_ip		%f0, %f0
561289848Sjkim	ldd		[$key + 0x80-0x68], %f36
562289848Sjkim	ldd		[$key + 0x80-0x70], %f38
563289848Sjkim	des_round	%f40, %f42, %f0, %f0
564289848Sjkim	ldd		[$key + 0x80-0x78], %f40
565289848Sjkim	ldd		[$key + 0x80-0x80], %f42
566289848Sjkim	des_round	%f44, %f46, %f0, %f0
567289848Sjkim	des_round	%f48, %f50, %f0, %f0
568289848Sjkim	des_round	%f52, %f54, %f0, %f0
569289848Sjkim	des_round	%f56, %f58, %f0, %f0
570289848Sjkim	des_round	%f60, %f62, %f0, %f0
571289848Sjkim	des_round	%f36, %f38, %f0, %f0
572289848Sjkim	des_round	%f40, %f42, %f0, %f0
573289848Sjkim	des_iip		%f0, %f0
574289848Sjkim
575289848Sjkim	fxor		%f2, %f0, %f0		! ^= ivec
576289848Sjkim	movxtod		%g4, %f2
577289848Sjkim
578289848Sjkim	brnz,pn		$omask, 2f
579289848Sjkim	sub		$len, 1, $len
580289848Sjkim
581289848Sjkim	std		%f0, [$out + 0]
582289848Sjkim	brnz,pt		$len, .Ldes_ede3_cbc_dec_loop
583289848Sjkim	add		$out, 8, $out
584289848Sjkim
585289848Sjkim	st		%f2, [$ivec + 0]	! write out ivec
586289848Sjkim	retl
587289848Sjkim	st		%f3, [$ivec + 4]
588289848Sjkim
589289848Sjkim.align	16
590289848Sjkim2:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
591289848Sjkim						! and ~3x deterioration
592289848Sjkim						! in inp==out case
593289848Sjkim	faligndata	%f0, %f0, %f0		! handle unaligned output
594289848Sjkim
595289848Sjkim	stda		%f0, [$out + $omask]0xc0	! partial store
596289848Sjkim	add		$out, 8, $out
597289848Sjkim	orn		%g0, $omask, $omask
598289848Sjkim	stda		%f0, [$out + $omask]0xc0	! partial store
599289848Sjkim
600289848Sjkim	brnz,pt		$len, .Ldes_ede3_cbc_dec_loop+4
601289848Sjkim	orn		%g0, $omask, $omask
602289848Sjkim
603289848Sjkim	st		%f2, [$ivec + 0]	! write out ivec
604289848Sjkim	retl
605289848Sjkim	st		%f3, [$ivec + 4]
606289848Sjkim.type	des_t4_ede3_cbc_decrypt,#function
607289848Sjkim.size	des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
608289848Sjkim___
609289848Sjkim}
610289848Sjkim$code.=<<___;
611289848Sjkim.asciz  "DES for SPARC T4, David S. Miller, Andy Polyakov"
612289848Sjkim.align  4
613289848Sjkim___
614289848Sjkim
615289848Sjkim&emit_assembler();
616289848Sjkim
617289848Sjkimclose STDOUT;
618