1238384Sjkim#!/usr/bin/env perl 2238384Sjkim 3238384Sjkim# ==================================================================== 4238384Sjkim# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5238384Sjkim# project. The module is, however, dual licensed under OpenSSL and 6238384Sjkim# CRYPTOGAMS licenses depending on where you obtain it. For further 7238384Sjkim# details see http://www.openssl.org/~appro/cryptogams/. 8238384Sjkim# ==================================================================== 9238384Sjkim 10238384Sjkim# sha1_block for Thumb. 11238384Sjkim# 12238384Sjkim# January 2007. 13238384Sjkim# 14238384Sjkim# The code does not present direct interest to OpenSSL, because of low 15238384Sjkim# performance. Its purpose is to establish _size_ benchmark. Pretty 16238384Sjkim# useless one I must say, because 30% or 88 bytes larger ARMv4 code 17238384Sjkim# [avialable on demand] is almost _twice_ as fast. It should also be 18238384Sjkim# noted that in-lining of .Lcommon and .Lrotate improves performance 19238384Sjkim# by over 40%, while code increases by only 10% or 32 bytes. But once 20238384Sjkim# again, the goal was to establish _size_ benchmark, not performance. 21238384Sjkim 22238384Sjkim$output=shift; 23238384Sjkimopen STDOUT,">$output"; 24238384Sjkim 25238384Sjkim$inline=0; 26238384Sjkim#$cheat_on_binutils=1; 27238384Sjkim 28238384Sjkim$t0="r0"; 29238384Sjkim$t1="r1"; 30238384Sjkim$t2="r2"; 31238384Sjkim$a="r3"; 32238384Sjkim$b="r4"; 33238384Sjkim$c="r5"; 34238384Sjkim$d="r6"; 35238384Sjkim$e="r7"; 36238384Sjkim$K="r8"; # "upper" registers can be used in add/sub and mov insns 37238384Sjkim$ctx="r9"; 38238384Sjkim$inp="r10"; 39238384Sjkim$len="r11"; 40238384Sjkim$Xi="r12"; 41238384Sjkim 42238384Sjkimsub common { 43238384Sjkim<<___; 44238384Sjkim sub $t0,#4 45238384Sjkim ldr $t1,[$t0] 46238384Sjkim add $e,$K @ E+=K_xx_xx 47238384Sjkim lsl $t2,$a,#5 48238384Sjkim add $t2,$e 49238384Sjkim lsr $e,$a,#27 50238384Sjkim add $t2,$e @ E+=ROR(A,27) 51238384Sjkim add $t2,$t1 @ E+=X[i] 52238384Sjkim___ 53238384Sjkim} 54238384Sjkimsub rotate { 55238384Sjkim<<___; 56238384Sjkim mov $e,$d @ E=D 57238384Sjkim mov $d,$c @ D=C 58238384Sjkim lsl $c,$b,#30 59238384Sjkim lsr $b,$b,#2 60238384Sjkim orr $c,$b @ C=ROR(B,2) 61238384Sjkim mov $b,$a @ B=A 62238384Sjkim add $a,$t2,$t1 @ A=E+F_xx_xx(B,C,D) 63238384Sjkim___ 64238384Sjkim} 65238384Sjkim 66238384Sjkimsub BODY_00_19 { 67238384Sjkim$code.=$inline?&common():"\tbl .Lcommon\n"; 68238384Sjkim$code.=<<___; 69238384Sjkim mov $t1,$c 70238384Sjkim eor $t1,$d 71238384Sjkim and $t1,$b 72238384Sjkim eor $t1,$d @ F_00_19(B,C,D) 73238384Sjkim___ 74238384Sjkim$code.=$inline?&rotate():"\tbl .Lrotate\n"; 75238384Sjkim} 76238384Sjkim 77238384Sjkimsub BODY_20_39 { 78238384Sjkim$code.=$inline?&common():"\tbl .Lcommon\n"; 79238384Sjkim$code.=<<___; 80238384Sjkim mov $t1,$b 81238384Sjkim eor $t1,$c 82238384Sjkim eor $t1,$d @ F_20_39(B,C,D) 83238384Sjkim___ 84238384Sjkim$code.=$inline?&rotate():"\tbl .Lrotate\n"; 85238384Sjkim} 86238384Sjkim 87238384Sjkimsub BODY_40_59 { 88238384Sjkim$code.=$inline?&common():"\tbl .Lcommon\n"; 89238384Sjkim$code.=<<___; 90238384Sjkim mov $t1,$b 91238384Sjkim and $t1,$c 92238384Sjkim mov $e,$b 93238384Sjkim orr $e,$c 94238384Sjkim and $e,$d 95238384Sjkim orr $t1,$e @ F_40_59(B,C,D) 96238384Sjkim___ 97238384Sjkim$code.=$inline?&rotate():"\tbl .Lrotate\n"; 98238384Sjkim} 99238384Sjkim 100238384Sjkim$code=<<___; 101238384Sjkim.text 102238384Sjkim.code 16 103238384Sjkim 104238384Sjkim.global sha1_block_data_order 105238384Sjkim.type sha1_block_data_order,%function 106238384Sjkim 107238384Sjkim.align 2 108238384Sjkimsha1_block_data_order: 109238384Sjkim___ 110238384Sjkimif ($cheat_on_binutils) { 111238384Sjkim$code.=<<___; 112238384Sjkim.code 32 113238384Sjkim add r3,pc,#1 114238384Sjkim bx r3 @ switch to Thumb ISA 115238384Sjkim.code 16 116238384Sjkim___ 117238384Sjkim} 118238384Sjkim$code.=<<___; 119238384Sjkim push {r4-r7} 120238384Sjkim mov r3,r8 121238384Sjkim mov r4,r9 122238384Sjkim mov r5,r10 123238384Sjkim mov r6,r11 124238384Sjkim mov r7,r12 125238384Sjkim push {r3-r7,lr} 126238384Sjkim lsl r2,#6 127238384Sjkim mov $ctx,r0 @ save context 128238384Sjkim mov $inp,r1 @ save inp 129238384Sjkim mov $len,r2 @ save len 130238384Sjkim add $len,$inp @ $len to point at inp end 131238384Sjkim 132238384Sjkim.Lloop: 133238384Sjkim mov $Xi,sp 134238384Sjkim mov $t2,sp 135238384Sjkim sub $t2,#16*4 @ [3] 136238384Sjkim.LXload: 137238384Sjkim ldrb $a,[$t1,#0] @ $t1 is r1 and holds inp 138238384Sjkim ldrb $b,[$t1,#1] 139238384Sjkim ldrb $c,[$t1,#2] 140238384Sjkim ldrb $d,[$t1,#3] 141238384Sjkim lsl $a,#24 142238384Sjkim lsl $b,#16 143238384Sjkim lsl $c,#8 144238384Sjkim orr $a,$b 145238384Sjkim orr $a,$c 146238384Sjkim orr $a,$d 147238384Sjkim add $t1,#4 148238384Sjkim push {$a} 149238384Sjkim cmp sp,$t2 150238384Sjkim bne .LXload @ [+14*16] 151238384Sjkim 152238384Sjkim mov $inp,$t1 @ update $inp 153238384Sjkim sub $t2,#32*4 154238384Sjkim sub $t2,#32*4 155238384Sjkim mov $e,#31 @ [+4] 156238384Sjkim.LXupdate: 157238384Sjkim ldr $a,[sp,#15*4] 158238384Sjkim ldr $b,[sp,#13*4] 159238384Sjkim ldr $c,[sp,#7*4] 160238384Sjkim ldr $d,[sp,#2*4] 161238384Sjkim eor $a,$b 162238384Sjkim eor $a,$c 163238384Sjkim eor $a,$d 164238384Sjkim ror $a,$e 165238384Sjkim push {$a} 166238384Sjkim cmp sp,$t2 167238384Sjkim bne .LXupdate @ [+(11+1)*64] 168238384Sjkim 169238384Sjkim ldmia $t0!,{$a,$b,$c,$d,$e} @ $t0 is r0 and holds ctx 170238384Sjkim mov $t0,$Xi 171238384Sjkim 172238384Sjkim ldr $t2,.LK_00_19 173238384Sjkim mov $t1,$t0 174238384Sjkim sub $t1,#20*4 175238384Sjkim mov $Xi,$t1 176238384Sjkim mov $K,$t2 @ [+7+4] 177238384Sjkim.L_00_19: 178238384Sjkim___ 179238384Sjkim &BODY_00_19(); 180238384Sjkim$code.=<<___; 181238384Sjkim cmp $Xi,$t0 182238384Sjkim bne .L_00_19 @ [+(2+9+4+2+8+2)*20] 183238384Sjkim 184238384Sjkim ldr $t2,.LK_20_39 185238384Sjkim mov $t1,$t0 186238384Sjkim sub $t1,#20*4 187238384Sjkim mov $Xi,$t1 188238384Sjkim mov $K,$t2 @ [+5] 189238384Sjkim.L_20_39_or_60_79: 190238384Sjkim___ 191238384Sjkim &BODY_20_39(); 192238384Sjkim$code.=<<___; 193238384Sjkim cmp $Xi,$t0 194238384Sjkim bne .L_20_39_or_60_79 @ [+(2+9+3+2+8+2)*20*2] 195238384Sjkim cmp sp,$t0 196238384Sjkim beq .Ldone @ [+2] 197238384Sjkim 198238384Sjkim ldr $t2,.LK_40_59 199238384Sjkim mov $t1,$t0 200238384Sjkim sub $t1,#20*4 201238384Sjkim mov $Xi,$t1 202238384Sjkim mov $K,$t2 @ [+5] 203238384Sjkim.L_40_59: 204238384Sjkim___ 205238384Sjkim &BODY_40_59(); 206238384Sjkim$code.=<<___; 207238384Sjkim cmp $Xi,$t0 208238384Sjkim bne .L_40_59 @ [+(2+9+6+2+8+2)*20] 209238384Sjkim 210238384Sjkim ldr $t2,.LK_60_79 211238384Sjkim mov $Xi,sp 212238384Sjkim mov $K,$t2 213238384Sjkim b .L_20_39_or_60_79 @ [+4] 214238384Sjkim.Ldone: 215238384Sjkim mov $t0,$ctx 216238384Sjkim ldr $t1,[$t0,#0] 217238384Sjkim ldr $t2,[$t0,#4] 218238384Sjkim add $a,$t1 219238384Sjkim ldr $t1,[$t0,#8] 220238384Sjkim add $b,$t2 221238384Sjkim ldr $t2,[$t0,#12] 222238384Sjkim add $c,$t1 223238384Sjkim ldr $t1,[$t0,#16] 224238384Sjkim add $d,$t2 225238384Sjkim add $e,$t1 226238384Sjkim stmia $t0!,{$a,$b,$c,$d,$e} @ [+20] 227238384Sjkim 228238384Sjkim add sp,#80*4 @ deallocate stack frame 229238384Sjkim mov $t0,$ctx @ restore ctx 230238384Sjkim mov $t1,$inp @ restore inp 231238384Sjkim cmp $t1,$len 232238384Sjkim beq .Lexit 233238384Sjkim b .Lloop @ [+6] total 3212 cycles 234238384Sjkim.Lexit: 235238384Sjkim pop {r2-r7} 236238384Sjkim mov r8,r2 237238384Sjkim mov r9,r3 238238384Sjkim mov r10,r4 239238384Sjkim mov r11,r5 240238384Sjkim mov r12,r6 241238384Sjkim mov lr,r7 242238384Sjkim pop {r4-r7} 243238384Sjkim bx lr 244238384Sjkim.align 2 245238384Sjkim___ 246238384Sjkim$code.=".Lcommon:\n".&common()."\tmov pc,lr\n" if (!$inline); 247238384Sjkim$code.=".Lrotate:\n".&rotate()."\tmov pc,lr\n" if (!$inline); 248238384Sjkim$code.=<<___; 249238384Sjkim.align 2 250238384Sjkim.LK_00_19: .word 0x5a827999 251238384Sjkim.LK_20_39: .word 0x6ed9eba1 252238384Sjkim.LK_40_59: .word 0x8f1bbcdc 253238384Sjkim.LK_60_79: .word 0xca62c1d6 254238384Sjkim.size sha1_block_data_order,.-sha1_block_data_order 255238384Sjkim.asciz "SHA1 block transform for Thumb, CRYPTOGAMS by <appro\@openssl.org>" 256238384Sjkim___ 257238384Sjkim 258238384Sjkimprint $code; 259238384Sjkimclose STDOUT; # enforce flush 260