1238384Sjkim#!/usr/bin/env perl 2238384Sjkim 3238384Sjkim# ==================================================================== 4238384Sjkim# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5238384Sjkim# project. The module is, however, dual licensed under OpenSSL and 6238384Sjkim# CRYPTOGAMS licenses depending on where you obtain it. For further 7238384Sjkim# details see http://www.openssl.org/~appro/cryptogams/. 8238384Sjkim# ==================================================================== 9238384Sjkim 10238384Sjkim# SHA1 block procedure for PA-RISC. 11238384Sjkim 12238384Sjkim# June 2009. 13238384Sjkim# 14238384Sjkim# On PA-7100LC performance is >30% better than gcc 3.2 generated code 15238384Sjkim# for aligned input and >50% better for unaligned. Compared to vendor 16238384Sjkim# compiler on PA-8600 it's almost 60% faster in 64-bit build and just 17238384Sjkim# few percent faster in 32-bit one (this for aligned input, data for 18238384Sjkim# unaligned input is not available). 19238384Sjkim# 20238384Sjkim# Special thanks to polarhome.com for providing HP-UX account. 21238384Sjkim 22238384Sjkim$flavour = shift; 23238384Sjkim$output = shift; 24238384Sjkimopen STDOUT,">$output"; 25238384Sjkim 26238384Sjkimif ($flavour =~ /64/) { 27238384Sjkim $LEVEL ="2.0W"; 28238384Sjkim $SIZE_T =8; 29238384Sjkim $FRAME_MARKER =80; 30238384Sjkim $SAVED_RP =16; 31238384Sjkim $PUSH ="std"; 32238384Sjkim $PUSHMA ="std,ma"; 33238384Sjkim $POP ="ldd"; 34238384Sjkim $POPMB ="ldd,mb"; 35238384Sjkim} else { 36238384Sjkim $LEVEL ="1.0"; 37238384Sjkim $SIZE_T =4; 38238384Sjkim $FRAME_MARKER =48; 39238384Sjkim $SAVED_RP =20; 40238384Sjkim $PUSH ="stw"; 41238384Sjkim $PUSHMA ="stwm"; 42238384Sjkim $POP ="ldw"; 43238384Sjkim $POPMB ="ldwm"; 44238384Sjkim} 45238384Sjkim 46238384Sjkim$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker 47238384Sjkim # [+ argument transfer] 48238384Sjkim$ctx="%r26"; # arg0 49238384Sjkim$inp="%r25"; # arg1 50238384Sjkim$num="%r24"; # arg2 51238384Sjkim 52238384Sjkim$t0="%r28"; 53238384Sjkim$t1="%r29"; 54238384Sjkim$K="%r31"; 55238384Sjkim 56238384Sjkim@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", 57238384Sjkim "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0); 58238384Sjkim 59238384Sjkim@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23"); 60238384Sjkim 61238384Sjkimsub BODY_00_19 { 62238384Sjkimmy ($i,$a,$b,$c,$d,$e)=@_; 63238384Sjkimmy $j=$i+1; 64238384Sjkim$code.=<<___ if ($i<15); 65238384Sjkim addl $K,$e,$e ; $i 66238384Sjkim shd $a,$a,27,$t1 67238384Sjkim addl @X[$i],$e,$e 68238384Sjkim and $c,$b,$t0 69238384Sjkim addl $t1,$e,$e 70238384Sjkim andcm $d,$b,$t1 71238384Sjkim shd $b,$b,2,$b 72238384Sjkim or $t1,$t0,$t0 73238384Sjkim addl $t0,$e,$e 74238384Sjkim___ 75238384Sjkim$code.=<<___ if ($i>=15); # with forward Xupdate 76238384Sjkim addl $K,$e,$e ; $i 77238384Sjkim shd $a,$a,27,$t1 78238384Sjkim xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 79238384Sjkim addl @X[$i%16],$e,$e 80238384Sjkim and $c,$b,$t0 81238384Sjkim xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 82238384Sjkim addl $t1,$e,$e 83238384Sjkim andcm $d,$b,$t1 84238384Sjkim shd $b,$b,2,$b 85238384Sjkim or $t1,$t0,$t0 86238384Sjkim xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 87238384Sjkim add $t0,$e,$e 88238384Sjkim shd @X[$j%16],@X[$j%16],31,@X[$j%16] 89238384Sjkim___ 90238384Sjkim} 91238384Sjkim 92238384Sjkimsub BODY_20_39 { 93238384Sjkimmy ($i,$a,$b,$c,$d,$e)=@_; 94238384Sjkimmy $j=$i+1; 95238384Sjkim$code.=<<___ if ($i<79); 96238384Sjkim xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i 97238384Sjkim addl $K,$e,$e 98238384Sjkim shd $a,$a,27,$t1 99238384Sjkim xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 100238384Sjkim addl @X[$i%16],$e,$e 101238384Sjkim xor $b,$c,$t0 102238384Sjkim xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 103238384Sjkim addl $t1,$e,$e 104238384Sjkim shd $b,$b,2,$b 105238384Sjkim xor $d,$t0,$t0 106238384Sjkim shd @X[$j%16],@X[$j%16],31,@X[$j%16] 107238384Sjkim addl $t0,$e,$e 108238384Sjkim___ 109238384Sjkim$code.=<<___ if ($i==79); # with context load 110238384Sjkim ldw 0($ctx),@X[0] ; $i 111238384Sjkim addl $K,$e,$e 112238384Sjkim shd $a,$a,27,$t1 113238384Sjkim ldw 4($ctx),@X[1] 114238384Sjkim addl @X[$i%16],$e,$e 115238384Sjkim xor $b,$c,$t0 116238384Sjkim ldw 8($ctx),@X[2] 117238384Sjkim addl $t1,$e,$e 118238384Sjkim shd $b,$b,2,$b 119238384Sjkim xor $d,$t0,$t0 120238384Sjkim ldw 12($ctx),@X[3] 121238384Sjkim addl $t0,$e,$e 122238384Sjkim ldw 16($ctx),@X[4] 123238384Sjkim___ 124238384Sjkim} 125238384Sjkim 126238384Sjkimsub BODY_40_59 { 127238384Sjkimmy ($i,$a,$b,$c,$d,$e)=@_; 128238384Sjkimmy $j=$i+1; 129238384Sjkim$code.=<<___; 130238384Sjkim shd $a,$a,27,$t1 ; $i 131238384Sjkim addl $K,$e,$e 132238384Sjkim xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 133238384Sjkim xor $d,$c,$t0 134238384Sjkim addl @X[$i%16],$e,$e 135238384Sjkim xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 136238384Sjkim and $b,$t0,$t0 137238384Sjkim addl $t1,$e,$e 138238384Sjkim shd $b,$b,2,$b 139238384Sjkim xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 140238384Sjkim addl $t0,$e,$e 141238384Sjkim and $d,$c,$t1 142238384Sjkim shd @X[$j%16],@X[$j%16],31,@X[$j%16] 143238384Sjkim addl $t1,$e,$e 144238384Sjkim___ 145238384Sjkim} 146238384Sjkim 147238384Sjkim$code=<<___; 148238384Sjkim .LEVEL $LEVEL 149238384Sjkim .SPACE \$TEXT\$ 150238384Sjkim .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY 151238384Sjkim 152238384Sjkim .EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR 153238384Sjkimsha1_block_data_order 154238384Sjkim .PROC 155238384Sjkim .CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16 156238384Sjkim .ENTRY 157238384Sjkim $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 158238384Sjkim $PUSHMA %r3,$FRAME(%sp) 159238384Sjkim $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 160238384Sjkim $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 161238384Sjkim $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 162238384Sjkim $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) 163238384Sjkim $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) 164238384Sjkim $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) 165238384Sjkim $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) 166238384Sjkim $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) 167238384Sjkim $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) 168238384Sjkim $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) 169238384Sjkim $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) 170238384Sjkim $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) 171238384Sjkim $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) 172238384Sjkim 173238384Sjkim ldw 0($ctx),$A 174238384Sjkim ldw 4($ctx),$B 175238384Sjkim ldw 8($ctx),$C 176238384Sjkim ldw 12($ctx),$D 177238384Sjkim ldw 16($ctx),$E 178238384Sjkim 179238384Sjkim extru $inp,31,2,$t0 ; t0=inp&3; 180238384Sjkim sh3addl $t0,%r0,$t0 ; t0*=8; 181238384Sjkim subi 32,$t0,$t0 ; t0=32-t0; 182238384Sjkim mtctl $t0,%cr11 ; %sar=t0; 183238384Sjkim 184238384SjkimL\$oop 185238384Sjkim ldi 3,$t0 186238384Sjkim andcm $inp,$t0,$t0 ; 64-bit neutral 187238384Sjkim___ 188238384Sjkim for ($i=0;$i<15;$i++) { # load input block 189238384Sjkim $code.="\tldw `4*$i`($t0),@X[$i]\n"; } 190238384Sjkim$code.=<<___; 191238384Sjkim cmpb,*= $inp,$t0,L\$aligned 192238384Sjkim ldw 60($t0),@X[15] 193238384Sjkim ldw 64($t0),@X[16] 194238384Sjkim___ 195238384Sjkim for ($i=0;$i<16;$i++) { # align input 196238384Sjkim $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; } 197238384Sjkim$code.=<<___; 198238384SjkimL\$aligned 199238384Sjkim ldil L'0x5a827000,$K ; K_00_19 200238384Sjkim ldo 0x999($K),$K 201238384Sjkim___ 202238384Sjkimfor ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } 203238384Sjkim$code.=<<___; 204238384Sjkim ldil L'0x6ed9e000,$K ; K_20_39 205238384Sjkim ldo 0xba1($K),$K 206238384Sjkim___ 207238384Sjkim 208238384Sjkimfor (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 209238384Sjkim$code.=<<___; 210238384Sjkim ldil L'0x8f1bb000,$K ; K_40_59 211238384Sjkim ldo 0xcdc($K),$K 212238384Sjkim___ 213238384Sjkim 214238384Sjkimfor (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 215238384Sjkim$code.=<<___; 216238384Sjkim ldil L'0xca62c000,$K ; K_60_79 217238384Sjkim ldo 0x1d6($K),$K 218238384Sjkim___ 219238384Sjkimfor (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 220238384Sjkim 221238384Sjkim$code.=<<___; 222238384Sjkim addl @X[0],$A,$A 223238384Sjkim addl @X[1],$B,$B 224238384Sjkim addl @X[2],$C,$C 225238384Sjkim addl @X[3],$D,$D 226238384Sjkim addl @X[4],$E,$E 227238384Sjkim stw $A,0($ctx) 228238384Sjkim stw $B,4($ctx) 229238384Sjkim stw $C,8($ctx) 230238384Sjkim stw $D,12($ctx) 231238384Sjkim stw $E,16($ctx) 232238384Sjkim addib,*<> -1,$num,L\$oop 233238384Sjkim ldo 64($inp),$inp 234238384Sjkim 235238384Sjkim $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue 236238384Sjkim $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 237238384Sjkim $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 238238384Sjkim $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 239238384Sjkim $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 240238384Sjkim $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 241238384Sjkim $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 242238384Sjkim $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 243238384Sjkim $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 244238384Sjkim $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 245238384Sjkim $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 246238384Sjkim $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 247238384Sjkim $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 248238384Sjkim $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 249238384Sjkim bv (%r2) 250238384Sjkim .EXIT 251238384Sjkim $POPMB -$FRAME(%sp),%r3 252238384Sjkim .PROCEND 253238384Sjkim .STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" 254238384Sjkim___ 255238384Sjkim 256238384Sjkim$code =~ s/\`([^\`]*)\`/eval $1/gem; 257279264Sdelphij$code =~ s/,\*/,/gm if ($SIZE_T==4); 258279264Sdelphij$code =~ s/\bbv\b/bve/gm if ($SIZE_T==8); 259238384Sjkimprint $code; 260238384Sjkimclose STDOUT; 261