1238384Sjkim#!/usr/bin/env perl
2238384Sjkim
3238384Sjkim# ====================================================================
4238384Sjkim# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5238384Sjkim# project. The module is, however, dual licensed under OpenSSL and
6238384Sjkim# CRYPTOGAMS licenses depending on where you obtain it. For further
7238384Sjkim# details see http://www.openssl.org/~appro/cryptogams/.
8238384Sjkim# ====================================================================
9238384Sjkim
10238384Sjkim# SHA1 block procedure for PA-RISC.
11238384Sjkim
12238384Sjkim# June 2009.
13238384Sjkim#
14238384Sjkim# On PA-7100LC performance is >30% better than gcc 3.2 generated code
15238384Sjkim# for aligned input and >50% better for unaligned. Compared to vendor
16238384Sjkim# compiler on PA-8600 it's almost 60% faster in 64-bit build and just
17238384Sjkim# few percent faster in 32-bit one (this for aligned input, data for
18238384Sjkim# unaligned input is not available).
19238384Sjkim#
20238384Sjkim# Special thanks to polarhome.com for providing HP-UX account.
21238384Sjkim
22238384Sjkim$flavour = shift;
23238384Sjkim$output = shift;
24238384Sjkimopen STDOUT,">$output";
25238384Sjkim
26238384Sjkimif ($flavour =~ /64/) {
27238384Sjkim	$LEVEL		="2.0W";
28238384Sjkim	$SIZE_T		=8;
29238384Sjkim	$FRAME_MARKER	=80;
30238384Sjkim	$SAVED_RP	=16;
31238384Sjkim	$PUSH		="std";
32238384Sjkim	$PUSHMA		="std,ma";
33238384Sjkim	$POP		="ldd";
34238384Sjkim	$POPMB		="ldd,mb";
35238384Sjkim} else {
36238384Sjkim	$LEVEL		="1.0";
37238384Sjkim	$SIZE_T		=4;
38238384Sjkim	$FRAME_MARKER	=48;
39238384Sjkim	$SAVED_RP	=20;
40238384Sjkim	$PUSH		="stw";
41238384Sjkim	$PUSHMA		="stwm";
42238384Sjkim	$POP		="ldw";
43238384Sjkim	$POPMB		="ldwm";
44238384Sjkim}
45238384Sjkim
46238384Sjkim$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker
47238384Sjkim				#                 [+ argument transfer]
48238384Sjkim$ctx="%r26";		# arg0
49238384Sjkim$inp="%r25";		# arg1
50238384Sjkim$num="%r24";		# arg2
51238384Sjkim
52238384Sjkim$t0="%r28";
53238384Sjkim$t1="%r29";
54238384Sjkim$K="%r31";
55238384Sjkim
56238384Sjkim@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
57238384Sjkim    "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0);
58238384Sjkim
59238384Sjkim@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23");
60238384Sjkim
61238384Sjkimsub BODY_00_19 {
62238384Sjkimmy ($i,$a,$b,$c,$d,$e)=@_;
63238384Sjkimmy $j=$i+1;
64238384Sjkim$code.=<<___ if ($i<15);
65238384Sjkim	addl	$K,$e,$e	; $i
66238384Sjkim	shd	$a,$a,27,$t1
67238384Sjkim	addl	@X[$i],$e,$e
68238384Sjkim	and	$c,$b,$t0
69238384Sjkim	addl	$t1,$e,$e
70238384Sjkim	andcm	$d,$b,$t1
71238384Sjkim	shd	$b,$b,2,$b
72238384Sjkim	or	$t1,$t0,$t0
73238384Sjkim	addl	$t0,$e,$e
74238384Sjkim___
75238384Sjkim$code.=<<___ if ($i>=15);	# with forward Xupdate
76238384Sjkim	addl	$K,$e,$e	; $i
77238384Sjkim	shd	$a,$a,27,$t1
78238384Sjkim	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
79238384Sjkim	addl	@X[$i%16],$e,$e
80238384Sjkim	and	$c,$b,$t0
81238384Sjkim	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
82238384Sjkim	addl	$t1,$e,$e
83238384Sjkim	andcm	$d,$b,$t1
84238384Sjkim	shd	$b,$b,2,$b
85238384Sjkim	or	$t1,$t0,$t0
86238384Sjkim	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
87238384Sjkim	add	$t0,$e,$e
88238384Sjkim	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
89238384Sjkim___
90238384Sjkim}
91238384Sjkim
92238384Sjkimsub BODY_20_39 {
93238384Sjkimmy ($i,$a,$b,$c,$d,$e)=@_;
94238384Sjkimmy $j=$i+1;
95238384Sjkim$code.=<<___ if ($i<79);
96238384Sjkim	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]	; $i
97238384Sjkim	addl	$K,$e,$e
98238384Sjkim	shd	$a,$a,27,$t1
99238384Sjkim	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
100238384Sjkim	addl	@X[$i%16],$e,$e
101238384Sjkim	xor	$b,$c,$t0
102238384Sjkim	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
103238384Sjkim	addl	$t1,$e,$e
104238384Sjkim	shd	$b,$b,2,$b
105238384Sjkim	xor	$d,$t0,$t0
106238384Sjkim	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
107238384Sjkim	addl	$t0,$e,$e
108238384Sjkim___
109238384Sjkim$code.=<<___ if ($i==79);	# with context load
110238384Sjkim	ldw	0($ctx),@X[0]	; $i
111238384Sjkim	addl	$K,$e,$e
112238384Sjkim	shd	$a,$a,27,$t1
113238384Sjkim	ldw	4($ctx),@X[1]
114238384Sjkim	addl	@X[$i%16],$e,$e
115238384Sjkim	xor	$b,$c,$t0
116238384Sjkim	ldw	8($ctx),@X[2]
117238384Sjkim	addl	$t1,$e,$e
118238384Sjkim	shd	$b,$b,2,$b
119238384Sjkim	xor	$d,$t0,$t0
120238384Sjkim	ldw	12($ctx),@X[3]
121238384Sjkim	addl	$t0,$e,$e
122238384Sjkim	ldw	16($ctx),@X[4]
123238384Sjkim___
124238384Sjkim}
125238384Sjkim
126238384Sjkimsub BODY_40_59 {
127238384Sjkimmy ($i,$a,$b,$c,$d,$e)=@_;
128238384Sjkimmy $j=$i+1;
129238384Sjkim$code.=<<___;
130238384Sjkim	shd	$a,$a,27,$t1	; $i
131238384Sjkim	addl	$K,$e,$e
132238384Sjkim	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
133238384Sjkim	xor	$d,$c,$t0
134238384Sjkim	addl	@X[$i%16],$e,$e
135238384Sjkim	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
136238384Sjkim	and	$b,$t0,$t0
137238384Sjkim	addl	$t1,$e,$e
138238384Sjkim	shd	$b,$b,2,$b
139238384Sjkim	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
140238384Sjkim	addl	$t0,$e,$e
141238384Sjkim	and	$d,$c,$t1
142238384Sjkim	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
143238384Sjkim	addl	$t1,$e,$e
144238384Sjkim___
145238384Sjkim}
146238384Sjkim
147238384Sjkim$code=<<___;
148238384Sjkim	.LEVEL	$LEVEL
149238384Sjkim	.SPACE	\$TEXT\$
150238384Sjkim	.SUBSPA	\$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
151238384Sjkim
152238384Sjkim	.EXPORT	sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
153238384Sjkimsha1_block_data_order
154238384Sjkim	.PROC
155238384Sjkim	.CALLINFO	FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16
156238384Sjkim	.ENTRY
157238384Sjkim	$PUSH	%r2,-$SAVED_RP(%sp)	; standard prologue
158238384Sjkim	$PUSHMA	%r3,$FRAME(%sp)
159238384Sjkim	$PUSH	%r4,`-$FRAME+1*$SIZE_T`(%sp)
160238384Sjkim	$PUSH	%r5,`-$FRAME+2*$SIZE_T`(%sp)
161238384Sjkim	$PUSH	%r6,`-$FRAME+3*$SIZE_T`(%sp)
162238384Sjkim	$PUSH	%r7,`-$FRAME+4*$SIZE_T`(%sp)
163238384Sjkim	$PUSH	%r8,`-$FRAME+5*$SIZE_T`(%sp)
164238384Sjkim	$PUSH	%r9,`-$FRAME+6*$SIZE_T`(%sp)
165238384Sjkim	$PUSH	%r10,`-$FRAME+7*$SIZE_T`(%sp)
166238384Sjkim	$PUSH	%r11,`-$FRAME+8*$SIZE_T`(%sp)
167238384Sjkim	$PUSH	%r12,`-$FRAME+9*$SIZE_T`(%sp)
168238384Sjkim	$PUSH	%r13,`-$FRAME+10*$SIZE_T`(%sp)
169238384Sjkim	$PUSH	%r14,`-$FRAME+11*$SIZE_T`(%sp)
170238384Sjkim	$PUSH	%r15,`-$FRAME+12*$SIZE_T`(%sp)
171238384Sjkim	$PUSH	%r16,`-$FRAME+13*$SIZE_T`(%sp)
172238384Sjkim
173238384Sjkim	ldw	0($ctx),$A
174238384Sjkim	ldw	4($ctx),$B
175238384Sjkim	ldw	8($ctx),$C
176238384Sjkim	ldw	12($ctx),$D
177238384Sjkim	ldw	16($ctx),$E
178238384Sjkim
179238384Sjkim	extru	$inp,31,2,$t0		; t0=inp&3;
180238384Sjkim	sh3addl	$t0,%r0,$t0		; t0*=8;
181238384Sjkim	subi	32,$t0,$t0		; t0=32-t0;
182238384Sjkim	mtctl	$t0,%cr11		; %sar=t0;
183238384Sjkim
184238384SjkimL\$oop
185238384Sjkim	ldi	3,$t0
186238384Sjkim	andcm	$inp,$t0,$t0		; 64-bit neutral
187238384Sjkim___
188238384Sjkim	for ($i=0;$i<15;$i++) {		# load input block
189238384Sjkim	$code.="\tldw	`4*$i`($t0),@X[$i]\n";		}
190238384Sjkim$code.=<<___;
191238384Sjkim	cmpb,*=	$inp,$t0,L\$aligned
192238384Sjkim	ldw	60($t0),@X[15]
193238384Sjkim	ldw	64($t0),@X[16]
194238384Sjkim___
195238384Sjkim	for ($i=0;$i<16;$i++) {		# align input
196238384Sjkim	$code.="\tvshd	@X[$i],@X[$i+1],@X[$i]\n";	}
197238384Sjkim$code.=<<___;
198238384SjkimL\$aligned
199238384Sjkim	ldil	L'0x5a827000,$K		; K_00_19
200238384Sjkim	ldo	0x999($K),$K
201238384Sjkim___
202238384Sjkimfor ($i=0;$i<20;$i++)   { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
203238384Sjkim$code.=<<___;
204238384Sjkim	ldil	L'0x6ed9e000,$K		; K_20_39
205238384Sjkim	ldo	0xba1($K),$K
206238384Sjkim___
207238384Sjkim
208238384Sjkimfor (;$i<40;$i++)       { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
209238384Sjkim$code.=<<___;
210238384Sjkim	ldil	L'0x8f1bb000,$K		; K_40_59
211238384Sjkim	ldo	0xcdc($K),$K
212238384Sjkim___
213238384Sjkim
214238384Sjkimfor (;$i<60;$i++)       { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
215238384Sjkim$code.=<<___;
216238384Sjkim	ldil	L'0xca62c000,$K		; K_60_79
217238384Sjkim	ldo	0x1d6($K),$K
218238384Sjkim___
219238384Sjkimfor (;$i<80;$i++)       { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
220238384Sjkim
221238384Sjkim$code.=<<___;
222238384Sjkim	addl	@X[0],$A,$A
223238384Sjkim	addl	@X[1],$B,$B
224238384Sjkim	addl	@X[2],$C,$C
225238384Sjkim	addl	@X[3],$D,$D
226238384Sjkim	addl	@X[4],$E,$E
227238384Sjkim	stw	$A,0($ctx)
228238384Sjkim	stw	$B,4($ctx)
229238384Sjkim	stw	$C,8($ctx)
230238384Sjkim	stw	$D,12($ctx)
231238384Sjkim	stw	$E,16($ctx)
232238384Sjkim	addib,*<> -1,$num,L\$oop
233238384Sjkim	ldo	64($inp),$inp
234238384Sjkim
235238384Sjkim	$POP	`-$FRAME-$SAVED_RP`(%sp),%r2	; standard epilogue
236238384Sjkim	$POP	`-$FRAME+1*$SIZE_T`(%sp),%r4
237238384Sjkim	$POP	`-$FRAME+2*$SIZE_T`(%sp),%r5
238238384Sjkim	$POP	`-$FRAME+3*$SIZE_T`(%sp),%r6
239238384Sjkim	$POP	`-$FRAME+4*$SIZE_T`(%sp),%r7
240238384Sjkim	$POP	`-$FRAME+5*$SIZE_T`(%sp),%r8
241238384Sjkim	$POP	`-$FRAME+6*$SIZE_T`(%sp),%r9
242238384Sjkim	$POP	`-$FRAME+7*$SIZE_T`(%sp),%r10
243238384Sjkim	$POP	`-$FRAME+8*$SIZE_T`(%sp),%r11
244238384Sjkim	$POP	`-$FRAME+9*$SIZE_T`(%sp),%r12
245238384Sjkim	$POP	`-$FRAME+10*$SIZE_T`(%sp),%r13
246238384Sjkim	$POP	`-$FRAME+11*$SIZE_T`(%sp),%r14
247238384Sjkim	$POP	`-$FRAME+12*$SIZE_T`(%sp),%r15
248238384Sjkim	$POP	`-$FRAME+13*$SIZE_T`(%sp),%r16
249238384Sjkim	bv	(%r2)
250238384Sjkim	.EXIT
251238384Sjkim	$POPMB	-$FRAME(%sp),%r3
252238384Sjkim	.PROCEND
253238384Sjkim	.STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
254238384Sjkim___
255238384Sjkim
256238384Sjkim$code =~ s/\`([^\`]*)\`/eval $1/gem;
257279264Sdelphij$code =~ s/,\*/,/gm		if ($SIZE_T==4);
258279264Sdelphij$code =~ s/\bbv\b/bve/gm	if ($SIZE_T==8);
259238384Sjkimprint $code;
260238384Sjkimclose STDOUT;
261