190075Sobrien#!/usr/bin/env perl 290075Sobrien 390075Sobrien# ==================================================================== 490075Sobrien# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 590075Sobrien# project. The module is, however, dual licensed under OpenSSL and 690075Sobrien# CRYPTOGAMS licenses depending on where you obtain it. For further 790075Sobrien# details see http://www.openssl.org/~appro/cryptogams/. 890075Sobrien# ==================================================================== 990075Sobrien 1090075Sobrien# SHA1 block procedure for MIPS. 1190075Sobrien 1290075Sobrien# Performance improvement is 30% on unaligned input. The "secret" is 1390075Sobrien# to deploy lwl/lwr pair to load unaligned input. One could have 1490075Sobrien# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32- 1590075Sobrien# compatible subroutine. There is room for minor optimization on 1690075Sobrien# little-endian platforms... 1790075Sobrien 1890075Sobrien###################################################################### 1990075Sobrien# There is a number of MIPS ABI in use, O32 and N32/64 are most 2090075Sobrien# widely used. Then there is a new contender: NUBI. It appears that if 2190075Sobrien# one picks the latter, it's possible to arrange code in ABI neutral 2290075Sobrien# manner. Therefore let's stick to NUBI register layout: 2390075Sobrien# 2490075Sobrien($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); 2590075Sobrien($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 2690075Sobrien($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); 2790075Sobrien($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); 2890075Sobrien# 2990075Sobrien# The return value is placed in $a0. Following coding rules facilitate 3090075Sobrien# interoperability: 3190075Sobrien# 3290075Sobrien# - never ever touch $tp, "thread pointer", former $gp; 3390075Sobrien# - copy return value to $t0, former $v0 [or to $a0 if you're adapting 3490075Sobrien# old code]; 3590075Sobrien# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; 3690075Sobrien# 3790075Sobrien# For reference here is register layout for N32/64 MIPS ABIs: 3890075Sobrien# 3990075Sobrien# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); 4090075Sobrien# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 4190075Sobrien# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); 4290075Sobrien# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); 4390075Sobrien# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); 4490075Sobrien# 4590075Sobrien$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 4696263Sobrien 4790075Sobrienif ($flavour =~ /64|n32/i) { 4890075Sobrien $PTR_ADD="dadd"; # incidentally works even on n32 4990075Sobrien $PTR_SUB="dsub"; # incidentally works even on n32 5090075Sobrien $REG_S="sd"; 5190075Sobrien $REG_L="ld"; 5290075Sobrien $PTR_SLL="dsll"; # incidentally works even on n32 5390075Sobrien $SZREG=8; 5490075Sobrien} else { 5590075Sobrien $PTR_ADD="add"; 5690075Sobrien $PTR_SUB="sub"; 5790075Sobrien $REG_S="sw"; 5890075Sobrien $REG_L="lw"; 5990075Sobrien $PTR_SLL="sll"; 6090075Sobrien $SZREG=4; 6190075Sobrien} 6290075Sobrien# 6390075Sobrien# <appro@openssl.org> 6490075Sobrien# 6590075Sobrien###################################################################### 6690075Sobrien 6790075Sobrien$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0; 6890075Sobrien 6990075Sobrienfor (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } 7090075Sobrienopen STDOUT,">$output"; 7190075Sobrien 7290075Sobrienif (!defined($big_endian)) 7390075Sobrien { $big_endian=(unpack('L',pack('N',1))==1); } 7490075Sobrien 7590075Sobrien# offsets of the Most and Least Significant Bytes 7690075Sobrien$MSB=$big_endian?0:3; 7790075Sobrien$LSB=3&~$MSB; 7890075Sobrien 7990075Sobrien@X=map("\$$_",(8..23)); # a4-a7,s0-s11 8090075Sobrien 8190075Sobrien$ctx=$a0; 8290075Sobrien$inp=$a1; 8390075Sobrien$num=$a2; 8490075Sobrien$A="\$1"; 8590075Sobrien$B="\$2"; 8690075Sobrien$C="\$3"; 8790075Sobrien$D="\$7"; 8890075Sobrien$E="\$24"; @V=($A,$B,$C,$D,$E); 8990075Sobrien$t0="\$25"; 9090075Sobrien$t1=$num; # $num is offloaded to stack 9190075Sobrien$t2="\$30"; # fp 9290075Sobrien$K="\$31"; # ra 9390075Sobrien 9490075Sobriensub BODY_00_14 { 9590075Sobrienmy ($i,$a,$b,$c,$d,$e)=@_; 9690075Sobrienmy $j=$i+1; 9790075Sobrien$code.=<<___ if (!$big_endian); 9890075Sobrien srl $t0,@X[$i],24 # byte swap($i) 9990075Sobrien srl $t1,@X[$i],8 10090075Sobrien andi $t2,@X[$i],0xFF00 10190075Sobrien sll @X[$i],@X[$i],24 10290075Sobrien andi $t1,0xFF00 10390075Sobrien sll $t2,$t2,8 10490075Sobrien or @X[$i],$t0 10590075Sobrien or $t1,$t2 10690075Sobrien or @X[$i],$t1 10790075Sobrien___ 10890075Sobrien$code.=<<___; 10990075Sobrien lwl @X[$j],$j*4+$MSB($inp) 11090075Sobrien sll $t0,$a,5 # $i 11190075Sobrien addu $e,$K 11290075Sobrien lwr @X[$j],$j*4+$LSB($inp) 11390075Sobrien srl $t1,$a,27 11490075Sobrien addu $e,$t0 11590075Sobrien xor $t0,$c,$d 11690075Sobrien addu $e,$t1 11790075Sobrien sll $t2,$b,30 11890075Sobrien and $t0,$b 11990075Sobrien srl $b,$b,2 12090075Sobrien xor $t0,$d 12190075Sobrien addu $e,@X[$i] 12290075Sobrien or $b,$t2 12390075Sobrien addu $e,$t0 12490075Sobrien___ 12590075Sobrien} 12690075Sobrien 12790075Sobriensub BODY_15_19 { 12890075Sobrienmy ($i,$a,$b,$c,$d,$e)=@_; 12990075Sobrienmy $j=$i+1; 13090075Sobrien 13190075Sobrien$code.=<<___ if (!$big_endian && $i==15); 13290075Sobrien srl $t0,@X[$i],24 # byte swap($i) 13390075Sobrien srl $t1,@X[$i],8 13490075Sobrien andi $t2,@X[$i],0xFF00 13590075Sobrien sll @X[$i],@X[$i],24 13690075Sobrien andi $t1,0xFF00 13790075Sobrien sll $t2,$t2,8 13890075Sobrien or @X[$i],$t0 13990075Sobrien or @X[$i],$t1 14090075Sobrien or @X[$i],$t2 14190075Sobrien___ 14290075Sobrien$code.=<<___; 14390075Sobrien xor @X[$j%16],@X[($j+2)%16] 14490075Sobrien sll $t0,$a,5 # $i 14590075Sobrien addu $e,$K 14690075Sobrien srl $t1,$a,27 14790075Sobrien addu $e,$t0 14890075Sobrien xor @X[$j%16],@X[($j+8)%16] 14990075Sobrien xor $t0,$c,$d 15090075Sobrien addu $e,$t1 15190075Sobrien xor @X[$j%16],@X[($j+13)%16] 15290075Sobrien sll $t2,$b,30 15390075Sobrien and $t0,$b 15490075Sobrien srl $t1,@X[$j%16],31 15590075Sobrien addu @X[$j%16],@X[$j%16] 15690075Sobrien srl $b,$b,2 15790075Sobrien xor $t0,$d 15890075Sobrien or @X[$j%16],$t1 15990075Sobrien addu $e,@X[$i%16] 16090075Sobrien or $b,$t2 16190075Sobrien addu $e,$t0 16290075Sobrien___ 16390075Sobrien} 16490075Sobrien 16590075Sobriensub BODY_20_39 { 16690075Sobrienmy ($i,$a,$b,$c,$d,$e)=@_; 16790075Sobrienmy $j=$i+1; 16890075Sobrien$code.=<<___ if ($i<79); 16990075Sobrien xor @X[$j%16],@X[($j+2)%16] 17090075Sobrien sll $t0,$a,5 # $i 17190075Sobrien addu $e,$K 17290075Sobrien srl $t1,$a,27 17390075Sobrien addu $e,$t0 17490075Sobrien xor @X[$j%16],@X[($j+8)%16] 17590075Sobrien xor $t0,$c,$d 17690075Sobrien addu $e,$t1 17790075Sobrien xor @X[$j%16],@X[($j+13)%16] 17890075Sobrien sll $t2,$b,30 17990075Sobrien xor $t0,$b 18090075Sobrien srl $t1,@X[$j%16],31 18190075Sobrien addu @X[$j%16],@X[$j%16] 18290075Sobrien srl $b,$b,2 18390075Sobrien addu $e,@X[$i%16] 18490075Sobrien or @X[$j%16],$t1 18590075Sobrien or $b,$t2 18690075Sobrien addu $e,$t0 18790075Sobrien___ 18890075Sobrien$code.=<<___ if ($i==79); 18990075Sobrien lw @X[0],0($ctx) 19090075Sobrien sll $t0,$a,5 # $i 19190075Sobrien addu $e,$K 19290075Sobrien lw @X[1],4($ctx) 19390075Sobrien srl $t1,$a,27 19490075Sobrien addu $e,$t0 19590075Sobrien lw @X[2],8($ctx) 19690075Sobrien xor $t0,$c,$d 19790075Sobrien addu $e,$t1 19890075Sobrien lw @X[3],12($ctx) 19990075Sobrien sll $t2,$b,30 20090075Sobrien xor $t0,$b 20190075Sobrien lw @X[4],16($ctx) 20290075Sobrien srl $b,$b,2 20390075Sobrien addu $e,@X[$i%16] 20490075Sobrien or $b,$t2 20590075Sobrien addu $e,$t0 20690075Sobrien___ 20790075Sobrien} 20890075Sobrien 20990075Sobriensub BODY_40_59 { 21090075Sobrienmy ($i,$a,$b,$c,$d,$e)=@_; 21190075Sobrienmy $j=$i+1; 21290075Sobrien$code.=<<___ if ($i<79); 21390075Sobrien xor @X[$j%16],@X[($j+2)%16] 21490075Sobrien sll $t0,$a,5 # $i 21590075Sobrien addu $e,$K 21690075Sobrien srl $t1,$a,27 21790075Sobrien addu $e,$t0 21890075Sobrien xor @X[$j%16],@X[($j+8)%16] 21990075Sobrien and $t0,$c,$d 22090075Sobrien addu $e,$t1 22190075Sobrien xor @X[$j%16],@X[($j+13)%16] 22290075Sobrien sll $t2,$b,30 22390075Sobrien addu $e,$t0 22490075Sobrien srl $t1,@X[$j%16],31 22590075Sobrien xor $t0,$c,$d 22690075Sobrien addu @X[$j%16],@X[$j%16] 22790075Sobrien and $t0,$b 22890075Sobrien srl $b,$b,2 22990075Sobrien or @X[$j%16],$t1 23090075Sobrien addu $e,@X[$i%16] 23190075Sobrien or $b,$t2 23290075Sobrien addu $e,$t0 23390075Sobrien___ 23490075Sobrien} 23590075Sobrien 23690075Sobrien$FRAMESIZE=16; # large enough to accommodate NUBI saved registers 23790075Sobrien$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; 23890075Sobrien 23990075Sobrien$code=<<___; 24090075Sobrien.text 24190075Sobrien 24290075Sobrien.set noat 24390075Sobrien.set noreorder 24490075Sobrien.align 5 24590075Sobrien.globl sha1_block_data_order 24690075Sobrien.ent sha1_block_data_order 24790075Sobriensha1_block_data_order: 24890075Sobrien .frame $sp,$FRAMESIZE*$SZREG,$ra 24990075Sobrien .mask $SAVED_REGS_MASK,-$SZREG 25090075Sobrien .set noreorder 25190075Sobrien $PTR_SUB $sp,$FRAMESIZE*$SZREG 25290075Sobrien $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp) 25390075Sobrien $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp) 25490075Sobrien $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp) 25590075Sobrien $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp) 25690075Sobrien $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp) 25790075Sobrien $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp) 25890075Sobrien $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp) 25990075Sobrien $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp) 26090075Sobrien $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp) 26190075Sobrien $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp) 26290075Sobrien___ 26390075Sobrien$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue 26490075Sobrien $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp) 26590075Sobrien $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp) 26690075Sobrien $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp) 26790075Sobrien $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp) 26890075Sobrien $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp) 26990075Sobrien___ 27090075Sobrien$code.=<<___; 27190075Sobrien $PTR_SLL $num,6 27290075Sobrien $PTR_ADD $num,$inp 27390075Sobrien $REG_S $num,0($sp) 27490075Sobrien lw $A,0($ctx) 27590075Sobrien lw $B,4($ctx) 27690075Sobrien lw $C,8($ctx) 27790075Sobrien lw $D,12($ctx) 27890075Sobrien b .Loop 27990075Sobrien lw $E,16($ctx) 28090075Sobrien.align 4 28196263Sobrien.Loop: 28296263Sobrien .set reorder 28396263Sobrien lwl @X[0],$MSB($inp) 28496263Sobrien lui $K,0x5a82 28596263Sobrien lwr @X[0],$LSB($inp) 28696263Sobrien ori $K,0x7999 # K_00_19 28796263Sobrien___ 28896263Sobrienfor ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); } 28996263Sobrienfor (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); } 29096263Sobrien$code.=<<___; 29196263Sobrien lui $K,0x6ed9 29296263Sobrien ori $K,0xeba1 # K_20_39 29396263Sobrien___ 29496263Sobrienfor (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 29596263Sobrien$code.=<<___; 29696263Sobrien lui $K,0x8f1b 29796263Sobrien ori $K,0xbcdc # K_40_59 29896263Sobrien___ 29996263Sobrienfor (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 30096263Sobrien$code.=<<___; 30196263Sobrien lui $K,0xca62 30296263Sobrien ori $K,0xc1d6 # K_60_79 30396263Sobrien___ 30496263Sobrienfor (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 30596263Sobrien$code.=<<___; 30696263Sobrien $PTR_ADD $inp,64 30796263Sobrien $REG_L $num,0($sp) 30896263Sobrien 30996263Sobrien addu $A,$X[0] 31096263Sobrien addu $B,$X[1] 31196263Sobrien sw $A,0($ctx) 31296263Sobrien addu $C,$X[2] 31396263Sobrien addu $D,$X[3] 31496263Sobrien sw $B,4($ctx) 31596263Sobrien addu $E,$X[4] 31696263Sobrien sw $C,8($ctx) 31796263Sobrien sw $D,12($ctx) 31890075Sobrien sw $E,16($ctx) 31990075Sobrien .set noreorder 32090075Sobrien bne $inp,$num,.Loop 32190075Sobrien nop 32290075Sobrien 32390075Sobrien .set noreorder 32490075Sobrien $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp) 32590075Sobrien $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp) 32690075Sobrien $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp) 32790075Sobrien $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp) 32890075Sobrien $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp) 32990075Sobrien $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp) 33090075Sobrien $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp) 33190075Sobrien $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp) 33290075Sobrien $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp) 33390075Sobrien $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp) 33490075Sobrien___ 33590075Sobrien$code.=<<___ if ($flavour =~ /nubi/i); 33690075Sobrien $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp) 33790075Sobrien $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp) 33890075Sobrien $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp) 33990075Sobrien $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp) 34090075Sobrien $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp) 34190075Sobrien___ 34290075Sobrien$code.=<<___; 34390075Sobrien jr $ra 34490075Sobrien $PTR_ADD $sp,$FRAMESIZE*$SZREG 34590075Sobrien.end sha1_block_data_order 34690075Sobrien.rdata 34790075Sobrien.asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>" 34890075Sobrien___ 34990075Sobrienprint $code; 35090075Sobrienclose STDOUT; 35190075Sobrien