1238384Sjkim#!/usr/bin/env perl 2238384Sjkim 3238384Sjkim# ==================================================================== 4238384Sjkim# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5238384Sjkim# project. The module is, however, dual licensed under OpenSSL and 6238384Sjkim# CRYPTOGAMS licenses depending on where you obtain it. For further 7238384Sjkim# details see http://www.openssl.org/~appro/cryptogams/. 8238384Sjkim# ==================================================================== 9238384Sjkim 10238384Sjkim# Needs more work: key setup, CBC routine... 11238384Sjkim# 12238384Sjkim# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with 13238384Sjkim# 128-bit key, which is ~40% better than 64-bit code generated by gcc 14238384Sjkim# 4.0. But these are not the ones currently used! Their "compact" 15238384Sjkim# counterparts are, for security reason. ppc_AES_encrypt_compact runs 16238384Sjkim# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - 17238384Sjkim# at 1/3 of ppc_AES_decrypt. 18238384Sjkim 19238384Sjkim# February 2010 20238384Sjkim# 21238384Sjkim# Rescheduling instructions to favour Power6 pipeline gave 10% 22238384Sjkim# performance improvement on the platfrom in question (and marginal 23238384Sjkim# improvement even on others). It should be noted that Power6 fails 24238384Sjkim# to process byte in 18 cycles, only in 23, because it fails to issue 25238384Sjkim# 4 load instructions in two cycles, only in 3. As result non-compact 26238384Sjkim# block subroutines are 25% slower than one would expect. Compact 27238384Sjkim# functions scale better, because they have pure computational part, 28238384Sjkim# which scales perfectly with clock frequency. To be specific 29238384Sjkim# ppc_AES_encrypt_compact operates at 42 cycles per byte, while 30238384Sjkim# ppc_AES_decrypt_compact - at 55 (in 64-bit build). 31238384Sjkim 32238384Sjkim$flavour = shift; 33238384Sjkim 34238384Sjkimif ($flavour =~ /64/) { 35238384Sjkim $SIZE_T =8; 36238384Sjkim $LRSAVE =2*$SIZE_T; 37238384Sjkim $STU ="stdu"; 38238384Sjkim $POP ="ld"; 39238384Sjkim $PUSH ="std"; 40238384Sjkim} elsif ($flavour =~ /32/) { 41238384Sjkim $SIZE_T =4; 42238384Sjkim $LRSAVE =$SIZE_T; 43238384Sjkim $STU ="stwu"; 44238384Sjkim $POP ="lwz"; 45238384Sjkim $PUSH ="stw"; 46238384Sjkim} else { die "nonsense $flavour"; } 47238384Sjkim 48238384Sjkim$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 49238384Sjkim( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 50238384Sjkim( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 51238384Sjkimdie "can't locate ppc-xlate.pl"; 52238384Sjkim 53238384Sjkimopen STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 54238384Sjkim 55238384Sjkim$FRAME=32*$SIZE_T; 56238384Sjkim 57238384Sjkimsub _data_word() 58238384Sjkim{ my $i; 59238384Sjkim while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } 60238384Sjkim} 61238384Sjkim 62238384Sjkim$sp="r1"; 63238384Sjkim$toc="r2"; 64238384Sjkim$inp="r3"; 65238384Sjkim$out="r4"; 66238384Sjkim$key="r5"; 67238384Sjkim 68238384Sjkim$Tbl0="r3"; 69238384Sjkim$Tbl1="r6"; 70238384Sjkim$Tbl2="r7"; 71238384Sjkim$Tbl3="r2"; 72238384Sjkim 73238384Sjkim$s0="r8"; 74238384Sjkim$s1="r9"; 75238384Sjkim$s2="r10"; 76238384Sjkim$s3="r11"; 77238384Sjkim 78238384Sjkim$t0="r12"; 79238384Sjkim$t1="r13"; 80238384Sjkim$t2="r14"; 81238384Sjkim$t3="r15"; 82238384Sjkim 83238384Sjkim$acc00="r16"; 84238384Sjkim$acc01="r17"; 85238384Sjkim$acc02="r18"; 86238384Sjkim$acc03="r19"; 87238384Sjkim 88238384Sjkim$acc04="r20"; 89238384Sjkim$acc05="r21"; 90238384Sjkim$acc06="r22"; 91238384Sjkim$acc07="r23"; 92238384Sjkim 93238384Sjkim$acc08="r24"; 94238384Sjkim$acc09="r25"; 95238384Sjkim$acc10="r26"; 96238384Sjkim$acc11="r27"; 97238384Sjkim 98238384Sjkim$acc12="r28"; 99238384Sjkim$acc13="r29"; 100238384Sjkim$acc14="r30"; 101238384Sjkim$acc15="r31"; 102238384Sjkim 103238384Sjkim# stay away from TLS pointer 104238384Sjkimif ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; } 105238384Sjkimelse { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; } 106238384Sjkim$mask80=$Tbl2; 107238384Sjkim$mask1b=$Tbl3; 108238384Sjkim 109238384Sjkim$code.=<<___; 110238384Sjkim.machine "any" 111238384Sjkim.text 112238384Sjkim 113238384Sjkim.align 7 114238384SjkimLAES_Te: 115238384Sjkim mflr r0 116238384Sjkim bcl 20,31,\$+4 117238384Sjkim mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry 118238384Sjkim addi $Tbl0,$Tbl0,`128-8` 119238384Sjkim mtlr r0 120238384Sjkim blr 121238384Sjkim .long 0 122238384Sjkim .byte 0,12,0x14,0,0,0,0,0 123238384Sjkim .space `64-9*4` 124238384SjkimLAES_Td: 125238384Sjkim mflr r0 126238384Sjkim bcl 20,31,\$+4 127238384Sjkim mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry 128238384Sjkim addi $Tbl0,$Tbl0,`128-64-8+2048+256` 129238384Sjkim mtlr r0 130238384Sjkim blr 131238384Sjkim .long 0 132238384Sjkim .byte 0,12,0x14,0,0,0,0,0 133238384Sjkim .space `128-64-9*4` 134238384Sjkim___ 135238384Sjkim&_data_word( 136238384Sjkim 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 137238384Sjkim 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 138238384Sjkim 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 139238384Sjkim 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 140238384Sjkim 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 141238384Sjkim 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 142238384Sjkim 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 143238384Sjkim 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 144238384Sjkim 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 145238384Sjkim 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 146238384Sjkim 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 147238384Sjkim 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 148238384Sjkim 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 149238384Sjkim 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 150238384Sjkim 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 151238384Sjkim 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 152238384Sjkim 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 153238384Sjkim 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 154238384Sjkim 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 155238384Sjkim 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 156238384Sjkim 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 157238384Sjkim 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 158238384Sjkim 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 159238384Sjkim 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 160238384Sjkim 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 161238384Sjkim 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 162238384Sjkim 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 163238384Sjkim 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 164238384Sjkim 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 165238384Sjkim 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 166238384Sjkim 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 167238384Sjkim 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 168238384Sjkim 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 169238384Sjkim 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 170238384Sjkim 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 171238384Sjkim 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 172238384Sjkim 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 173238384Sjkim 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 174238384Sjkim 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 175238384Sjkim 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 176238384Sjkim 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 177238384Sjkim 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 178238384Sjkim 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 179238384Sjkim 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 180238384Sjkim 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 181238384Sjkim 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 182238384Sjkim 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 183238384Sjkim 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 184238384Sjkim 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 185238384Sjkim 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 186238384Sjkim 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 187238384Sjkim 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 188238384Sjkim 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 189238384Sjkim 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 190238384Sjkim 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 191238384Sjkim 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 192238384Sjkim 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 193238384Sjkim 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 194238384Sjkim 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 195238384Sjkim 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 196238384Sjkim 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 197238384Sjkim 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 198238384Sjkim 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 199238384Sjkim 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); 200238384Sjkim$code.=<<___; 201238384Sjkim.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 202238384Sjkim.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 203238384Sjkim.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 204238384Sjkim.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 205238384Sjkim.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 206238384Sjkim.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 207238384Sjkim.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 208238384Sjkim.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 209238384Sjkim.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 210238384Sjkim.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 211238384Sjkim.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 212238384Sjkim.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 213238384Sjkim.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 214238384Sjkim.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 215238384Sjkim.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 216238384Sjkim.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 217238384Sjkim.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 218238384Sjkim.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 219238384Sjkim.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 220238384Sjkim.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 221238384Sjkim.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 222238384Sjkim.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 223238384Sjkim.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 224238384Sjkim.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 225238384Sjkim.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 226238384Sjkim.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 227238384Sjkim.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 228238384Sjkim.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 229238384Sjkim.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 230238384Sjkim.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 231238384Sjkim.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 232238384Sjkim.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 233238384Sjkim___ 234238384Sjkim&_data_word( 235238384Sjkim 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 236238384Sjkim 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 237238384Sjkim 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 238238384Sjkim 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 239238384Sjkim 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 240238384Sjkim 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 241238384Sjkim 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 242238384Sjkim 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 243238384Sjkim 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 244238384Sjkim 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 245238384Sjkim 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 246238384Sjkim 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 247238384Sjkim 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 248238384Sjkim 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 249238384Sjkim 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 250238384Sjkim 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 251238384Sjkim 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 252238384Sjkim 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 253238384Sjkim 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 254238384Sjkim 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 255238384Sjkim 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 256238384Sjkim 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 257238384Sjkim 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 258238384Sjkim 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 259238384Sjkim 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 260238384Sjkim 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 261238384Sjkim 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 262238384Sjkim 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 263238384Sjkim 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 264238384Sjkim 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 265238384Sjkim 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 266238384Sjkim 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 267238384Sjkim 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 268238384Sjkim 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 269238384Sjkim 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 270238384Sjkim 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 271238384Sjkim 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 272238384Sjkim 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 273238384Sjkim 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 274238384Sjkim 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 275238384Sjkim 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 276238384Sjkim 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 277238384Sjkim 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 278238384Sjkim 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 279238384Sjkim 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 280238384Sjkim 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 281238384Sjkim 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 282238384Sjkim 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 283238384Sjkim 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 284238384Sjkim 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 285238384Sjkim 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 286238384Sjkim 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 287238384Sjkim 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 288238384Sjkim 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 289238384Sjkim 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 290238384Sjkim 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 291238384Sjkim 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 292238384Sjkim 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 293238384Sjkim 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 294238384Sjkim 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 295238384Sjkim 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 296238384Sjkim 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 297238384Sjkim 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 298238384Sjkim 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); 299238384Sjkim$code.=<<___; 300238384Sjkim.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 301238384Sjkim.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 302238384Sjkim.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 303238384Sjkim.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 304238384Sjkim.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 305238384Sjkim.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 306238384Sjkim.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 307238384Sjkim.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 308238384Sjkim.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 309238384Sjkim.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 310238384Sjkim.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 311238384Sjkim.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 312238384Sjkim.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 313238384Sjkim.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 314238384Sjkim.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 315238384Sjkim.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 316238384Sjkim.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 317238384Sjkim.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 318238384Sjkim.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 319238384Sjkim.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 320238384Sjkim.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 321238384Sjkim.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 322238384Sjkim.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 323238384Sjkim.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 324238384Sjkim.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 325238384Sjkim.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 326238384Sjkim.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 327238384Sjkim.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 328238384Sjkim.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 329238384Sjkim.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 330238384Sjkim.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 331238384Sjkim.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 332238384Sjkim 333238384Sjkim 334238384Sjkim.globl .AES_encrypt 335238384Sjkim.align 7 336238384Sjkim.AES_encrypt: 337238384Sjkim $STU $sp,-$FRAME($sp) 338238384Sjkim mflr r0 339238384Sjkim 340238384Sjkim $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) 341238384Sjkim $PUSH r13,`$FRAME-$SIZE_T*19`($sp) 342238384Sjkim $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 343238384Sjkim $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 344238384Sjkim $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 345238384Sjkim $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 346238384Sjkim $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 347238384Sjkim $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 348238384Sjkim $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 349238384Sjkim $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 350238384Sjkim $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 351238384Sjkim $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 352238384Sjkim $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 353238384Sjkim $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 354238384Sjkim $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 355238384Sjkim $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 356238384Sjkim $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 357238384Sjkim $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 358238384Sjkim $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 359238384Sjkim $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 360238384Sjkim $PUSH r0,`$FRAME+$LRSAVE`($sp) 361238384Sjkim 362238384Sjkim andi. $t0,$inp,3 363238384Sjkim andi. $t1,$out,3 364238384Sjkim or. $t0,$t0,$t1 365238384Sjkim bne Lenc_unaligned 366238384Sjkim 367238384SjkimLenc_unaligned_ok: 368238384Sjkim lwz $s0,0($inp) 369238384Sjkim lwz $s1,4($inp) 370238384Sjkim lwz $s2,8($inp) 371238384Sjkim lwz $s3,12($inp) 372238384Sjkim bl LAES_Te 373238384Sjkim bl Lppc_AES_encrypt_compact 374238384Sjkim stw $s0,0($out) 375238384Sjkim stw $s1,4($out) 376238384Sjkim stw $s2,8($out) 377238384Sjkim stw $s3,12($out) 378238384Sjkim b Lenc_done 379238384Sjkim 380238384SjkimLenc_unaligned: 381238384Sjkim subfic $t0,$inp,4096 382238384Sjkim subfic $t1,$out,4096 383238384Sjkim andi. $t0,$t0,4096-16 384238384Sjkim beq Lenc_xpage 385238384Sjkim andi. $t1,$t1,4096-16 386238384Sjkim bne Lenc_unaligned_ok 387238384Sjkim 388238384SjkimLenc_xpage: 389238384Sjkim lbz $acc00,0($inp) 390238384Sjkim lbz $acc01,1($inp) 391238384Sjkim lbz $acc02,2($inp) 392238384Sjkim lbz $s0,3($inp) 393238384Sjkim lbz $acc04,4($inp) 394238384Sjkim lbz $acc05,5($inp) 395238384Sjkim lbz $acc06,6($inp) 396238384Sjkim lbz $s1,7($inp) 397238384Sjkim lbz $acc08,8($inp) 398238384Sjkim lbz $acc09,9($inp) 399238384Sjkim lbz $acc10,10($inp) 400238384Sjkim insrwi $s0,$acc00,8,0 401238384Sjkim lbz $s2,11($inp) 402238384Sjkim insrwi $s1,$acc04,8,0 403238384Sjkim lbz $acc12,12($inp) 404238384Sjkim insrwi $s0,$acc01,8,8 405238384Sjkim lbz $acc13,13($inp) 406238384Sjkim insrwi $s1,$acc05,8,8 407238384Sjkim lbz $acc14,14($inp) 408238384Sjkim insrwi $s0,$acc02,8,16 409238384Sjkim lbz $s3,15($inp) 410238384Sjkim insrwi $s1,$acc06,8,16 411238384Sjkim insrwi $s2,$acc08,8,0 412238384Sjkim insrwi $s3,$acc12,8,0 413238384Sjkim insrwi $s2,$acc09,8,8 414238384Sjkim insrwi $s3,$acc13,8,8 415238384Sjkim insrwi $s2,$acc10,8,16 416238384Sjkim insrwi $s3,$acc14,8,16 417238384Sjkim 418238384Sjkim bl LAES_Te 419238384Sjkim bl Lppc_AES_encrypt_compact 420238384Sjkim 421238384Sjkim extrwi $acc00,$s0,8,0 422238384Sjkim extrwi $acc01,$s0,8,8 423238384Sjkim stb $acc00,0($out) 424238384Sjkim extrwi $acc02,$s0,8,16 425238384Sjkim stb $acc01,1($out) 426238384Sjkim stb $acc02,2($out) 427238384Sjkim extrwi $acc04,$s1,8,0 428238384Sjkim stb $s0,3($out) 429238384Sjkim extrwi $acc05,$s1,8,8 430238384Sjkim stb $acc04,4($out) 431238384Sjkim extrwi $acc06,$s1,8,16 432238384Sjkim stb $acc05,5($out) 433238384Sjkim stb $acc06,6($out) 434238384Sjkim extrwi $acc08,$s2,8,0 435238384Sjkim stb $s1,7($out) 436238384Sjkim extrwi $acc09,$s2,8,8 437238384Sjkim stb $acc08,8($out) 438238384Sjkim extrwi $acc10,$s2,8,16 439238384Sjkim stb $acc09,9($out) 440238384Sjkim stb $acc10,10($out) 441238384Sjkim extrwi $acc12,$s3,8,0 442238384Sjkim stb $s2,11($out) 443238384Sjkim extrwi $acc13,$s3,8,8 444238384Sjkim stb $acc12,12($out) 445238384Sjkim extrwi $acc14,$s3,8,16 446238384Sjkim stb $acc13,13($out) 447238384Sjkim stb $acc14,14($out) 448238384Sjkim stb $s3,15($out) 449238384Sjkim 450238384SjkimLenc_done: 451238384Sjkim $POP r0,`$FRAME+$LRSAVE`($sp) 452238384Sjkim $POP $toc,`$FRAME-$SIZE_T*20`($sp) 453238384Sjkim $POP r13,`$FRAME-$SIZE_T*19`($sp) 454238384Sjkim $POP r14,`$FRAME-$SIZE_T*18`($sp) 455238384Sjkim $POP r15,`$FRAME-$SIZE_T*17`($sp) 456238384Sjkim $POP r16,`$FRAME-$SIZE_T*16`($sp) 457238384Sjkim $POP r17,`$FRAME-$SIZE_T*15`($sp) 458238384Sjkim $POP r18,`$FRAME-$SIZE_T*14`($sp) 459238384Sjkim $POP r19,`$FRAME-$SIZE_T*13`($sp) 460238384Sjkim $POP r20,`$FRAME-$SIZE_T*12`($sp) 461238384Sjkim $POP r21,`$FRAME-$SIZE_T*11`($sp) 462238384Sjkim $POP r22,`$FRAME-$SIZE_T*10`($sp) 463238384Sjkim $POP r23,`$FRAME-$SIZE_T*9`($sp) 464238384Sjkim $POP r24,`$FRAME-$SIZE_T*8`($sp) 465238384Sjkim $POP r25,`$FRAME-$SIZE_T*7`($sp) 466238384Sjkim $POP r26,`$FRAME-$SIZE_T*6`($sp) 467238384Sjkim $POP r27,`$FRAME-$SIZE_T*5`($sp) 468238384Sjkim $POP r28,`$FRAME-$SIZE_T*4`($sp) 469238384Sjkim $POP r29,`$FRAME-$SIZE_T*3`($sp) 470238384Sjkim $POP r30,`$FRAME-$SIZE_T*2`($sp) 471238384Sjkim $POP r31,`$FRAME-$SIZE_T*1`($sp) 472238384Sjkim mtlr r0 473238384Sjkim addi $sp,$sp,$FRAME 474238384Sjkim blr 475238384Sjkim .long 0 476238384Sjkim .byte 0,12,4,1,0x80,18,3,0 477238384Sjkim .long 0 478238384Sjkim 479238384Sjkim.align 5 480238384SjkimLppc_AES_encrypt: 481238384Sjkim lwz $acc00,240($key) 482238384Sjkim addi $Tbl1,$Tbl0,3 483238384Sjkim lwz $t0,0($key) 484238384Sjkim addi $Tbl2,$Tbl0,2 485238384Sjkim lwz $t1,4($key) 486238384Sjkim addi $Tbl3,$Tbl0,1 487238384Sjkim lwz $t2,8($key) 488238384Sjkim addi $acc00,$acc00,-1 489238384Sjkim lwz $t3,12($key) 490238384Sjkim addi $key,$key,16 491238384Sjkim xor $s0,$s0,$t0 492238384Sjkim xor $s1,$s1,$t1 493238384Sjkim xor $s2,$s2,$t2 494238384Sjkim xor $s3,$s3,$t3 495238384Sjkim mtctr $acc00 496238384Sjkim.align 4 497238384SjkimLenc_loop: 498238384Sjkim rlwinm $acc00,$s0,`32-24+3`,21,28 499238384Sjkim rlwinm $acc01,$s1,`32-24+3`,21,28 500238384Sjkim rlwinm $acc02,$s2,`32-24+3`,21,28 501238384Sjkim rlwinm $acc03,$s3,`32-24+3`,21,28 502238384Sjkim lwz $t0,0($key) 503238384Sjkim rlwinm $acc04,$s1,`32-16+3`,21,28 504238384Sjkim lwz $t1,4($key) 505238384Sjkim rlwinm $acc05,$s2,`32-16+3`,21,28 506238384Sjkim lwz $t2,8($key) 507238384Sjkim rlwinm $acc06,$s3,`32-16+3`,21,28 508238384Sjkim lwz $t3,12($key) 509238384Sjkim rlwinm $acc07,$s0,`32-16+3`,21,28 510238384Sjkim lwzx $acc00,$Tbl0,$acc00 511238384Sjkim rlwinm $acc08,$s2,`32-8+3`,21,28 512238384Sjkim lwzx $acc01,$Tbl0,$acc01 513238384Sjkim rlwinm $acc09,$s3,`32-8+3`,21,28 514238384Sjkim lwzx $acc02,$Tbl0,$acc02 515238384Sjkim rlwinm $acc10,$s0,`32-8+3`,21,28 516238384Sjkim lwzx $acc03,$Tbl0,$acc03 517238384Sjkim rlwinm $acc11,$s1,`32-8+3`,21,28 518238384Sjkim lwzx $acc04,$Tbl1,$acc04 519238384Sjkim rlwinm $acc12,$s3,`0+3`,21,28 520238384Sjkim lwzx $acc05,$Tbl1,$acc05 521238384Sjkim rlwinm $acc13,$s0,`0+3`,21,28 522238384Sjkim lwzx $acc06,$Tbl1,$acc06 523238384Sjkim rlwinm $acc14,$s1,`0+3`,21,28 524238384Sjkim lwzx $acc07,$Tbl1,$acc07 525238384Sjkim rlwinm $acc15,$s2,`0+3`,21,28 526238384Sjkim lwzx $acc08,$Tbl2,$acc08 527238384Sjkim xor $t0,$t0,$acc00 528238384Sjkim lwzx $acc09,$Tbl2,$acc09 529238384Sjkim xor $t1,$t1,$acc01 530238384Sjkim lwzx $acc10,$Tbl2,$acc10 531238384Sjkim xor $t2,$t2,$acc02 532238384Sjkim lwzx $acc11,$Tbl2,$acc11 533238384Sjkim xor $t3,$t3,$acc03 534238384Sjkim lwzx $acc12,$Tbl3,$acc12 535238384Sjkim xor $t0,$t0,$acc04 536238384Sjkim lwzx $acc13,$Tbl3,$acc13 537238384Sjkim xor $t1,$t1,$acc05 538238384Sjkim lwzx $acc14,$Tbl3,$acc14 539238384Sjkim xor $t2,$t2,$acc06 540238384Sjkim lwzx $acc15,$Tbl3,$acc15 541238384Sjkim xor $t3,$t3,$acc07 542238384Sjkim xor $t0,$t0,$acc08 543238384Sjkim xor $t1,$t1,$acc09 544238384Sjkim xor $t2,$t2,$acc10 545238384Sjkim xor $t3,$t3,$acc11 546238384Sjkim xor $s0,$t0,$acc12 547238384Sjkim xor $s1,$t1,$acc13 548238384Sjkim xor $s2,$t2,$acc14 549238384Sjkim xor $s3,$t3,$acc15 550238384Sjkim addi $key,$key,16 551238384Sjkim bdnz- Lenc_loop 552238384Sjkim 553238384Sjkim addi $Tbl2,$Tbl0,2048 554238384Sjkim nop 555238384Sjkim lwz $t0,0($key) 556238384Sjkim rlwinm $acc00,$s0,`32-24`,24,31 557238384Sjkim lwz $t1,4($key) 558238384Sjkim rlwinm $acc01,$s1,`32-24`,24,31 559238384Sjkim lwz $t2,8($key) 560238384Sjkim rlwinm $acc02,$s2,`32-24`,24,31 561238384Sjkim lwz $t3,12($key) 562238384Sjkim rlwinm $acc03,$s3,`32-24`,24,31 563238384Sjkim lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4 564238384Sjkim rlwinm $acc04,$s1,`32-16`,24,31 565238384Sjkim lwz $acc09,`2048+32`($Tbl0) 566238384Sjkim rlwinm $acc05,$s2,`32-16`,24,31 567238384Sjkim lwz $acc10,`2048+64`($Tbl0) 568238384Sjkim rlwinm $acc06,$s3,`32-16`,24,31 569238384Sjkim lwz $acc11,`2048+96`($Tbl0) 570238384Sjkim rlwinm $acc07,$s0,`32-16`,24,31 571238384Sjkim lwz $acc12,`2048+128`($Tbl0) 572238384Sjkim rlwinm $acc08,$s2,`32-8`,24,31 573238384Sjkim lwz $acc13,`2048+160`($Tbl0) 574238384Sjkim rlwinm $acc09,$s3,`32-8`,24,31 575238384Sjkim lwz $acc14,`2048+192`($Tbl0) 576238384Sjkim rlwinm $acc10,$s0,`32-8`,24,31 577238384Sjkim lwz $acc15,`2048+224`($Tbl0) 578238384Sjkim rlwinm $acc11,$s1,`32-8`,24,31 579238384Sjkim lbzx $acc00,$Tbl2,$acc00 580238384Sjkim rlwinm $acc12,$s3,`0`,24,31 581238384Sjkim lbzx $acc01,$Tbl2,$acc01 582238384Sjkim rlwinm $acc13,$s0,`0`,24,31 583238384Sjkim lbzx $acc02,$Tbl2,$acc02 584238384Sjkim rlwinm $acc14,$s1,`0`,24,31 585238384Sjkim lbzx $acc03,$Tbl2,$acc03 586238384Sjkim rlwinm $acc15,$s2,`0`,24,31 587238384Sjkim lbzx $acc04,$Tbl2,$acc04 588238384Sjkim rlwinm $s0,$acc00,24,0,7 589238384Sjkim lbzx $acc05,$Tbl2,$acc05 590238384Sjkim rlwinm $s1,$acc01,24,0,7 591238384Sjkim lbzx $acc06,$Tbl2,$acc06 592238384Sjkim rlwinm $s2,$acc02,24,0,7 593238384Sjkim lbzx $acc07,$Tbl2,$acc07 594238384Sjkim rlwinm $s3,$acc03,24,0,7 595238384Sjkim lbzx $acc08,$Tbl2,$acc08 596238384Sjkim rlwimi $s0,$acc04,16,8,15 597238384Sjkim lbzx $acc09,$Tbl2,$acc09 598238384Sjkim rlwimi $s1,$acc05,16,8,15 599238384Sjkim lbzx $acc10,$Tbl2,$acc10 600238384Sjkim rlwimi $s2,$acc06,16,8,15 601238384Sjkim lbzx $acc11,$Tbl2,$acc11 602238384Sjkim rlwimi $s3,$acc07,16,8,15 603238384Sjkim lbzx $acc12,$Tbl2,$acc12 604238384Sjkim rlwimi $s0,$acc08,8,16,23 605238384Sjkim lbzx $acc13,$Tbl2,$acc13 606238384Sjkim rlwimi $s1,$acc09,8,16,23 607238384Sjkim lbzx $acc14,$Tbl2,$acc14 608238384Sjkim rlwimi $s2,$acc10,8,16,23 609238384Sjkim lbzx $acc15,$Tbl2,$acc15 610238384Sjkim rlwimi $s3,$acc11,8,16,23 611238384Sjkim or $s0,$s0,$acc12 612238384Sjkim or $s1,$s1,$acc13 613238384Sjkim or $s2,$s2,$acc14 614238384Sjkim or $s3,$s3,$acc15 615238384Sjkim xor $s0,$s0,$t0 616238384Sjkim xor $s1,$s1,$t1 617238384Sjkim xor $s2,$s2,$t2 618238384Sjkim xor $s3,$s3,$t3 619238384Sjkim blr 620238384Sjkim .long 0 621238384Sjkim .byte 0,12,0x14,0,0,0,0,0 622238384Sjkim 623238384Sjkim.align 4 624238384SjkimLppc_AES_encrypt_compact: 625238384Sjkim lwz $acc00,240($key) 626238384Sjkim addi $Tbl1,$Tbl0,2048 627238384Sjkim lwz $t0,0($key) 628238384Sjkim lis $mask80,0x8080 629238384Sjkim lwz $t1,4($key) 630238384Sjkim lis $mask1b,0x1b1b 631238384Sjkim lwz $t2,8($key) 632238384Sjkim ori $mask80,$mask80,0x8080 633238384Sjkim lwz $t3,12($key) 634238384Sjkim ori $mask1b,$mask1b,0x1b1b 635238384Sjkim addi $key,$key,16 636238384Sjkim mtctr $acc00 637238384Sjkim.align 4 638238384SjkimLenc_compact_loop: 639238384Sjkim xor $s0,$s0,$t0 640238384Sjkim xor $s1,$s1,$t1 641238384Sjkim rlwinm $acc00,$s0,`32-24`,24,31 642238384Sjkim xor $s2,$s2,$t2 643238384Sjkim rlwinm $acc01,$s1,`32-24`,24,31 644238384Sjkim xor $s3,$s3,$t3 645238384Sjkim rlwinm $acc02,$s2,`32-24`,24,31 646238384Sjkim rlwinm $acc03,$s3,`32-24`,24,31 647238384Sjkim rlwinm $acc04,$s1,`32-16`,24,31 648238384Sjkim rlwinm $acc05,$s2,`32-16`,24,31 649238384Sjkim rlwinm $acc06,$s3,`32-16`,24,31 650238384Sjkim rlwinm $acc07,$s0,`32-16`,24,31 651238384Sjkim lbzx $acc00,$Tbl1,$acc00 652238384Sjkim rlwinm $acc08,$s2,`32-8`,24,31 653238384Sjkim lbzx $acc01,$Tbl1,$acc01 654238384Sjkim rlwinm $acc09,$s3,`32-8`,24,31 655238384Sjkim lbzx $acc02,$Tbl1,$acc02 656238384Sjkim rlwinm $acc10,$s0,`32-8`,24,31 657238384Sjkim lbzx $acc03,$Tbl1,$acc03 658238384Sjkim rlwinm $acc11,$s1,`32-8`,24,31 659238384Sjkim lbzx $acc04,$Tbl1,$acc04 660238384Sjkim rlwinm $acc12,$s3,`0`,24,31 661238384Sjkim lbzx $acc05,$Tbl1,$acc05 662238384Sjkim rlwinm $acc13,$s0,`0`,24,31 663238384Sjkim lbzx $acc06,$Tbl1,$acc06 664238384Sjkim rlwinm $acc14,$s1,`0`,24,31 665238384Sjkim lbzx $acc07,$Tbl1,$acc07 666238384Sjkim rlwinm $acc15,$s2,`0`,24,31 667238384Sjkim lbzx $acc08,$Tbl1,$acc08 668238384Sjkim rlwinm $s0,$acc00,24,0,7 669238384Sjkim lbzx $acc09,$Tbl1,$acc09 670238384Sjkim rlwinm $s1,$acc01,24,0,7 671238384Sjkim lbzx $acc10,$Tbl1,$acc10 672238384Sjkim rlwinm $s2,$acc02,24,0,7 673238384Sjkim lbzx $acc11,$Tbl1,$acc11 674238384Sjkim rlwinm $s3,$acc03,24,0,7 675238384Sjkim lbzx $acc12,$Tbl1,$acc12 676238384Sjkim rlwimi $s0,$acc04,16,8,15 677238384Sjkim lbzx $acc13,$Tbl1,$acc13 678238384Sjkim rlwimi $s1,$acc05,16,8,15 679238384Sjkim lbzx $acc14,$Tbl1,$acc14 680238384Sjkim rlwimi $s2,$acc06,16,8,15 681238384Sjkim lbzx $acc15,$Tbl1,$acc15 682238384Sjkim rlwimi $s3,$acc07,16,8,15 683238384Sjkim rlwimi $s0,$acc08,8,16,23 684238384Sjkim rlwimi $s1,$acc09,8,16,23 685238384Sjkim rlwimi $s2,$acc10,8,16,23 686238384Sjkim rlwimi $s3,$acc11,8,16,23 687238384Sjkim lwz $t0,0($key) 688238384Sjkim or $s0,$s0,$acc12 689238384Sjkim lwz $t1,4($key) 690238384Sjkim or $s1,$s1,$acc13 691238384Sjkim lwz $t2,8($key) 692238384Sjkim or $s2,$s2,$acc14 693238384Sjkim lwz $t3,12($key) 694238384Sjkim or $s3,$s3,$acc15 695238384Sjkim 696238384Sjkim addi $key,$key,16 697238384Sjkim bdz Lenc_compact_done 698238384Sjkim 699238384Sjkim and $acc00,$s0,$mask80 # r1=r0&0x80808080 700238384Sjkim and $acc01,$s1,$mask80 701238384Sjkim and $acc02,$s2,$mask80 702238384Sjkim and $acc03,$s3,$mask80 703238384Sjkim srwi $acc04,$acc00,7 # r1>>7 704238384Sjkim andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 705238384Sjkim srwi $acc05,$acc01,7 706238384Sjkim andc $acc09,$s1,$mask80 707238384Sjkim srwi $acc06,$acc02,7 708238384Sjkim andc $acc10,$s2,$mask80 709238384Sjkim srwi $acc07,$acc03,7 710238384Sjkim andc $acc11,$s3,$mask80 711238384Sjkim sub $acc00,$acc00,$acc04 # r1-(r1>>7) 712238384Sjkim sub $acc01,$acc01,$acc05 713238384Sjkim sub $acc02,$acc02,$acc06 714238384Sjkim sub $acc03,$acc03,$acc07 715238384Sjkim add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 716238384Sjkim add $acc09,$acc09,$acc09 717238384Sjkim add $acc10,$acc10,$acc10 718238384Sjkim add $acc11,$acc11,$acc11 719238384Sjkim and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 720238384Sjkim and $acc01,$acc01,$mask1b 721238384Sjkim and $acc02,$acc02,$mask1b 722238384Sjkim and $acc03,$acc03,$mask1b 723238384Sjkim xor $acc00,$acc00,$acc08 # r2 724238384Sjkim xor $acc01,$acc01,$acc09 725238384Sjkim rotlwi $acc12,$s0,16 # ROTATE(r0,16) 726238384Sjkim xor $acc02,$acc02,$acc10 727238384Sjkim rotlwi $acc13,$s1,16 728238384Sjkim xor $acc03,$acc03,$acc11 729238384Sjkim rotlwi $acc14,$s2,16 730238384Sjkim 731238384Sjkim xor $s0,$s0,$acc00 # r0^r2 732238384Sjkim rotlwi $acc15,$s3,16 733238384Sjkim xor $s1,$s1,$acc01 734238384Sjkim rotrwi $s0,$s0,24 # ROTATE(r2^r0,24) 735238384Sjkim xor $s2,$s2,$acc02 736238384Sjkim rotrwi $s1,$s1,24 737238384Sjkim xor $s3,$s3,$acc03 738238384Sjkim rotrwi $s2,$s2,24 739238384Sjkim xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2 740238384Sjkim rotrwi $s3,$s3,24 741238384Sjkim xor $s1,$s1,$acc01 742238384Sjkim xor $s2,$s2,$acc02 743238384Sjkim xor $s3,$s3,$acc03 744238384Sjkim rotlwi $acc08,$acc12,8 # ROTATE(r0,24) 745238384Sjkim xor $s0,$s0,$acc12 # 746238384Sjkim rotlwi $acc09,$acc13,8 747238384Sjkim xor $s1,$s1,$acc13 748238384Sjkim rotlwi $acc10,$acc14,8 749238384Sjkim xor $s2,$s2,$acc14 750238384Sjkim rotlwi $acc11,$acc15,8 751238384Sjkim xor $s3,$s3,$acc15 752238384Sjkim xor $s0,$s0,$acc08 # 753238384Sjkim xor $s1,$s1,$acc09 754238384Sjkim xor $s2,$s2,$acc10 755238384Sjkim xor $s3,$s3,$acc11 756238384Sjkim 757238384Sjkim b Lenc_compact_loop 758238384Sjkim.align 4 759238384SjkimLenc_compact_done: 760238384Sjkim xor $s0,$s0,$t0 761238384Sjkim xor $s1,$s1,$t1 762238384Sjkim xor $s2,$s2,$t2 763238384Sjkim xor $s3,$s3,$t3 764238384Sjkim blr 765238384Sjkim .long 0 766238384Sjkim .byte 0,12,0x14,0,0,0,0,0 767238384Sjkim 768238384Sjkim.globl .AES_decrypt 769238384Sjkim.align 7 770238384Sjkim.AES_decrypt: 771238384Sjkim $STU $sp,-$FRAME($sp) 772238384Sjkim mflr r0 773238384Sjkim 774238384Sjkim $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) 775238384Sjkim $PUSH r13,`$FRAME-$SIZE_T*19`($sp) 776238384Sjkim $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 777238384Sjkim $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 778238384Sjkim $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 779238384Sjkim $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 780238384Sjkim $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 781238384Sjkim $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 782238384Sjkim $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 783238384Sjkim $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 784238384Sjkim $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 785238384Sjkim $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 786238384Sjkim $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 787238384Sjkim $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 788238384Sjkim $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 789238384Sjkim $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 790238384Sjkim $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 791238384Sjkim $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 792238384Sjkim $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 793238384Sjkim $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 794238384Sjkim $PUSH r0,`$FRAME+$LRSAVE`($sp) 795238384Sjkim 796238384Sjkim andi. $t0,$inp,3 797238384Sjkim andi. $t1,$out,3 798238384Sjkim or. $t0,$t0,$t1 799238384Sjkim bne Ldec_unaligned 800238384Sjkim 801238384SjkimLdec_unaligned_ok: 802238384Sjkim lwz $s0,0($inp) 803238384Sjkim lwz $s1,4($inp) 804238384Sjkim lwz $s2,8($inp) 805238384Sjkim lwz $s3,12($inp) 806238384Sjkim bl LAES_Td 807238384Sjkim bl Lppc_AES_decrypt_compact 808238384Sjkim stw $s0,0($out) 809238384Sjkim stw $s1,4($out) 810238384Sjkim stw $s2,8($out) 811238384Sjkim stw $s3,12($out) 812238384Sjkim b Ldec_done 813238384Sjkim 814238384SjkimLdec_unaligned: 815238384Sjkim subfic $t0,$inp,4096 816238384Sjkim subfic $t1,$out,4096 817238384Sjkim andi. $t0,$t0,4096-16 818238384Sjkim beq Ldec_xpage 819238384Sjkim andi. $t1,$t1,4096-16 820238384Sjkim bne Ldec_unaligned_ok 821238384Sjkim 822238384SjkimLdec_xpage: 823238384Sjkim lbz $acc00,0($inp) 824238384Sjkim lbz $acc01,1($inp) 825238384Sjkim lbz $acc02,2($inp) 826238384Sjkim lbz $s0,3($inp) 827238384Sjkim lbz $acc04,4($inp) 828238384Sjkim lbz $acc05,5($inp) 829238384Sjkim lbz $acc06,6($inp) 830238384Sjkim lbz $s1,7($inp) 831238384Sjkim lbz $acc08,8($inp) 832238384Sjkim lbz $acc09,9($inp) 833238384Sjkim lbz $acc10,10($inp) 834238384Sjkim insrwi $s0,$acc00,8,0 835238384Sjkim lbz $s2,11($inp) 836238384Sjkim insrwi $s1,$acc04,8,0 837238384Sjkim lbz $acc12,12($inp) 838238384Sjkim insrwi $s0,$acc01,8,8 839238384Sjkim lbz $acc13,13($inp) 840238384Sjkim insrwi $s1,$acc05,8,8 841238384Sjkim lbz $acc14,14($inp) 842238384Sjkim insrwi $s0,$acc02,8,16 843238384Sjkim lbz $s3,15($inp) 844238384Sjkim insrwi $s1,$acc06,8,16 845238384Sjkim insrwi $s2,$acc08,8,0 846238384Sjkim insrwi $s3,$acc12,8,0 847238384Sjkim insrwi $s2,$acc09,8,8 848238384Sjkim insrwi $s3,$acc13,8,8 849238384Sjkim insrwi $s2,$acc10,8,16 850238384Sjkim insrwi $s3,$acc14,8,16 851238384Sjkim 852238384Sjkim bl LAES_Td 853238384Sjkim bl Lppc_AES_decrypt_compact 854238384Sjkim 855238384Sjkim extrwi $acc00,$s0,8,0 856238384Sjkim extrwi $acc01,$s0,8,8 857238384Sjkim stb $acc00,0($out) 858238384Sjkim extrwi $acc02,$s0,8,16 859238384Sjkim stb $acc01,1($out) 860238384Sjkim stb $acc02,2($out) 861238384Sjkim extrwi $acc04,$s1,8,0 862238384Sjkim stb $s0,3($out) 863238384Sjkim extrwi $acc05,$s1,8,8 864238384Sjkim stb $acc04,4($out) 865238384Sjkim extrwi $acc06,$s1,8,16 866238384Sjkim stb $acc05,5($out) 867238384Sjkim stb $acc06,6($out) 868238384Sjkim extrwi $acc08,$s2,8,0 869238384Sjkim stb $s1,7($out) 870238384Sjkim extrwi $acc09,$s2,8,8 871238384Sjkim stb $acc08,8($out) 872238384Sjkim extrwi $acc10,$s2,8,16 873238384Sjkim stb $acc09,9($out) 874238384Sjkim stb $acc10,10($out) 875238384Sjkim extrwi $acc12,$s3,8,0 876238384Sjkim stb $s2,11($out) 877238384Sjkim extrwi $acc13,$s3,8,8 878238384Sjkim stb $acc12,12($out) 879238384Sjkim extrwi $acc14,$s3,8,16 880238384Sjkim stb $acc13,13($out) 881238384Sjkim stb $acc14,14($out) 882238384Sjkim stb $s3,15($out) 883238384Sjkim 884238384SjkimLdec_done: 885238384Sjkim $POP r0,`$FRAME+$LRSAVE`($sp) 886238384Sjkim $POP $toc,`$FRAME-$SIZE_T*20`($sp) 887238384Sjkim $POP r13,`$FRAME-$SIZE_T*19`($sp) 888238384Sjkim $POP r14,`$FRAME-$SIZE_T*18`($sp) 889238384Sjkim $POP r15,`$FRAME-$SIZE_T*17`($sp) 890238384Sjkim $POP r16,`$FRAME-$SIZE_T*16`($sp) 891238384Sjkim $POP r17,`$FRAME-$SIZE_T*15`($sp) 892238384Sjkim $POP r18,`$FRAME-$SIZE_T*14`($sp) 893238384Sjkim $POP r19,`$FRAME-$SIZE_T*13`($sp) 894238384Sjkim $POP r20,`$FRAME-$SIZE_T*12`($sp) 895238384Sjkim $POP r21,`$FRAME-$SIZE_T*11`($sp) 896238384Sjkim $POP r22,`$FRAME-$SIZE_T*10`($sp) 897238384Sjkim $POP r23,`$FRAME-$SIZE_T*9`($sp) 898238384Sjkim $POP r24,`$FRAME-$SIZE_T*8`($sp) 899238384Sjkim $POP r25,`$FRAME-$SIZE_T*7`($sp) 900238384Sjkim $POP r26,`$FRAME-$SIZE_T*6`($sp) 901238384Sjkim $POP r27,`$FRAME-$SIZE_T*5`($sp) 902238384Sjkim $POP r28,`$FRAME-$SIZE_T*4`($sp) 903238384Sjkim $POP r29,`$FRAME-$SIZE_T*3`($sp) 904238384Sjkim $POP r30,`$FRAME-$SIZE_T*2`($sp) 905238384Sjkim $POP r31,`$FRAME-$SIZE_T*1`($sp) 906238384Sjkim mtlr r0 907238384Sjkim addi $sp,$sp,$FRAME 908238384Sjkim blr 909238384Sjkim .long 0 910238384Sjkim .byte 0,12,4,1,0x80,18,3,0 911238384Sjkim .long 0 912238384Sjkim 913238384Sjkim.align 5 914238384SjkimLppc_AES_decrypt: 915238384Sjkim lwz $acc00,240($key) 916238384Sjkim addi $Tbl1,$Tbl0,3 917238384Sjkim lwz $t0,0($key) 918238384Sjkim addi $Tbl2,$Tbl0,2 919238384Sjkim lwz $t1,4($key) 920238384Sjkim addi $Tbl3,$Tbl0,1 921238384Sjkim lwz $t2,8($key) 922238384Sjkim addi $acc00,$acc00,-1 923238384Sjkim lwz $t3,12($key) 924238384Sjkim addi $key,$key,16 925238384Sjkim xor $s0,$s0,$t0 926238384Sjkim xor $s1,$s1,$t1 927238384Sjkim xor $s2,$s2,$t2 928238384Sjkim xor $s3,$s3,$t3 929238384Sjkim mtctr $acc00 930238384Sjkim.align 4 931238384SjkimLdec_loop: 932238384Sjkim rlwinm $acc00,$s0,`32-24+3`,21,28 933238384Sjkim rlwinm $acc01,$s1,`32-24+3`,21,28 934238384Sjkim rlwinm $acc02,$s2,`32-24+3`,21,28 935238384Sjkim rlwinm $acc03,$s3,`32-24+3`,21,28 936238384Sjkim lwz $t0,0($key) 937238384Sjkim rlwinm $acc04,$s3,`32-16+3`,21,28 938238384Sjkim lwz $t1,4($key) 939238384Sjkim rlwinm $acc05,$s0,`32-16+3`,21,28 940238384Sjkim lwz $t2,8($key) 941238384Sjkim rlwinm $acc06,$s1,`32-16+3`,21,28 942238384Sjkim lwz $t3,12($key) 943238384Sjkim rlwinm $acc07,$s2,`32-16+3`,21,28 944238384Sjkim lwzx $acc00,$Tbl0,$acc00 945238384Sjkim rlwinm $acc08,$s2,`32-8+3`,21,28 946238384Sjkim lwzx $acc01,$Tbl0,$acc01 947238384Sjkim rlwinm $acc09,$s3,`32-8+3`,21,28 948238384Sjkim lwzx $acc02,$Tbl0,$acc02 949238384Sjkim rlwinm $acc10,$s0,`32-8+3`,21,28 950238384Sjkim lwzx $acc03,$Tbl0,$acc03 951238384Sjkim rlwinm $acc11,$s1,`32-8+3`,21,28 952238384Sjkim lwzx $acc04,$Tbl1,$acc04 953238384Sjkim rlwinm $acc12,$s1,`0+3`,21,28 954238384Sjkim lwzx $acc05,$Tbl1,$acc05 955238384Sjkim rlwinm $acc13,$s2,`0+3`,21,28 956238384Sjkim lwzx $acc06,$Tbl1,$acc06 957238384Sjkim rlwinm $acc14,$s3,`0+3`,21,28 958238384Sjkim lwzx $acc07,$Tbl1,$acc07 959238384Sjkim rlwinm $acc15,$s0,`0+3`,21,28 960238384Sjkim lwzx $acc08,$Tbl2,$acc08 961238384Sjkim xor $t0,$t0,$acc00 962238384Sjkim lwzx $acc09,$Tbl2,$acc09 963238384Sjkim xor $t1,$t1,$acc01 964238384Sjkim lwzx $acc10,$Tbl2,$acc10 965238384Sjkim xor $t2,$t2,$acc02 966238384Sjkim lwzx $acc11,$Tbl2,$acc11 967238384Sjkim xor $t3,$t3,$acc03 968238384Sjkim lwzx $acc12,$Tbl3,$acc12 969238384Sjkim xor $t0,$t0,$acc04 970238384Sjkim lwzx $acc13,$Tbl3,$acc13 971238384Sjkim xor $t1,$t1,$acc05 972238384Sjkim lwzx $acc14,$Tbl3,$acc14 973238384Sjkim xor $t2,$t2,$acc06 974238384Sjkim lwzx $acc15,$Tbl3,$acc15 975238384Sjkim xor $t3,$t3,$acc07 976238384Sjkim xor $t0,$t0,$acc08 977238384Sjkim xor $t1,$t1,$acc09 978238384Sjkim xor $t2,$t2,$acc10 979238384Sjkim xor $t3,$t3,$acc11 980238384Sjkim xor $s0,$t0,$acc12 981238384Sjkim xor $s1,$t1,$acc13 982238384Sjkim xor $s2,$t2,$acc14 983238384Sjkim xor $s3,$t3,$acc15 984238384Sjkim addi $key,$key,16 985238384Sjkim bdnz- Ldec_loop 986238384Sjkim 987238384Sjkim addi $Tbl2,$Tbl0,2048 988238384Sjkim nop 989238384Sjkim lwz $t0,0($key) 990238384Sjkim rlwinm $acc00,$s0,`32-24`,24,31 991238384Sjkim lwz $t1,4($key) 992238384Sjkim rlwinm $acc01,$s1,`32-24`,24,31 993238384Sjkim lwz $t2,8($key) 994238384Sjkim rlwinm $acc02,$s2,`32-24`,24,31 995238384Sjkim lwz $t3,12($key) 996238384Sjkim rlwinm $acc03,$s3,`32-24`,24,31 997238384Sjkim lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4 998238384Sjkim rlwinm $acc04,$s3,`32-16`,24,31 999238384Sjkim lwz $acc09,`2048+32`($Tbl0) 1000238384Sjkim rlwinm $acc05,$s0,`32-16`,24,31 1001238384Sjkim lwz $acc10,`2048+64`($Tbl0) 1002238384Sjkim lbzx $acc00,$Tbl2,$acc00 1003238384Sjkim lwz $acc11,`2048+96`($Tbl0) 1004238384Sjkim lbzx $acc01,$Tbl2,$acc01 1005238384Sjkim lwz $acc12,`2048+128`($Tbl0) 1006238384Sjkim rlwinm $acc06,$s1,`32-16`,24,31 1007238384Sjkim lwz $acc13,`2048+160`($Tbl0) 1008238384Sjkim rlwinm $acc07,$s2,`32-16`,24,31 1009238384Sjkim lwz $acc14,`2048+192`($Tbl0) 1010238384Sjkim rlwinm $acc08,$s2,`32-8`,24,31 1011238384Sjkim lwz $acc15,`2048+224`($Tbl0) 1012238384Sjkim rlwinm $acc09,$s3,`32-8`,24,31 1013238384Sjkim lbzx $acc02,$Tbl2,$acc02 1014238384Sjkim rlwinm $acc10,$s0,`32-8`,24,31 1015238384Sjkim lbzx $acc03,$Tbl2,$acc03 1016238384Sjkim rlwinm $acc11,$s1,`32-8`,24,31 1017238384Sjkim lbzx $acc04,$Tbl2,$acc04 1018238384Sjkim rlwinm $acc12,$s1,`0`,24,31 1019238384Sjkim lbzx $acc05,$Tbl2,$acc05 1020238384Sjkim rlwinm $acc13,$s2,`0`,24,31 1021238384Sjkim lbzx $acc06,$Tbl2,$acc06 1022238384Sjkim rlwinm $acc14,$s3,`0`,24,31 1023238384Sjkim lbzx $acc07,$Tbl2,$acc07 1024238384Sjkim rlwinm $acc15,$s0,`0`,24,31 1025238384Sjkim lbzx $acc08,$Tbl2,$acc08 1026238384Sjkim rlwinm $s0,$acc00,24,0,7 1027238384Sjkim lbzx $acc09,$Tbl2,$acc09 1028238384Sjkim rlwinm $s1,$acc01,24,0,7 1029238384Sjkim lbzx $acc10,$Tbl2,$acc10 1030238384Sjkim rlwinm $s2,$acc02,24,0,7 1031238384Sjkim lbzx $acc11,$Tbl2,$acc11 1032238384Sjkim rlwinm $s3,$acc03,24,0,7 1033238384Sjkim lbzx $acc12,$Tbl2,$acc12 1034238384Sjkim rlwimi $s0,$acc04,16,8,15 1035238384Sjkim lbzx $acc13,$Tbl2,$acc13 1036238384Sjkim rlwimi $s1,$acc05,16,8,15 1037238384Sjkim lbzx $acc14,$Tbl2,$acc14 1038238384Sjkim rlwimi $s2,$acc06,16,8,15 1039238384Sjkim lbzx $acc15,$Tbl2,$acc15 1040238384Sjkim rlwimi $s3,$acc07,16,8,15 1041238384Sjkim rlwimi $s0,$acc08,8,16,23 1042238384Sjkim rlwimi $s1,$acc09,8,16,23 1043238384Sjkim rlwimi $s2,$acc10,8,16,23 1044238384Sjkim rlwimi $s3,$acc11,8,16,23 1045238384Sjkim or $s0,$s0,$acc12 1046238384Sjkim or $s1,$s1,$acc13 1047238384Sjkim or $s2,$s2,$acc14 1048238384Sjkim or $s3,$s3,$acc15 1049238384Sjkim xor $s0,$s0,$t0 1050238384Sjkim xor $s1,$s1,$t1 1051238384Sjkim xor $s2,$s2,$t2 1052238384Sjkim xor $s3,$s3,$t3 1053238384Sjkim blr 1054238384Sjkim .long 0 1055238384Sjkim .byte 0,12,0x14,0,0,0,0,0 1056238384Sjkim 1057238384Sjkim.align 4 1058238384SjkimLppc_AES_decrypt_compact: 1059238384Sjkim lwz $acc00,240($key) 1060238384Sjkim addi $Tbl1,$Tbl0,2048 1061238384Sjkim lwz $t0,0($key) 1062238384Sjkim lis $mask80,0x8080 1063238384Sjkim lwz $t1,4($key) 1064238384Sjkim lis $mask1b,0x1b1b 1065238384Sjkim lwz $t2,8($key) 1066238384Sjkim ori $mask80,$mask80,0x8080 1067238384Sjkim lwz $t3,12($key) 1068238384Sjkim ori $mask1b,$mask1b,0x1b1b 1069238384Sjkim addi $key,$key,16 1070238384Sjkim___ 1071238384Sjkim$code.=<<___ if ($SIZE_T==8); 1072238384Sjkim insrdi $mask80,$mask80,32,0 1073238384Sjkim insrdi $mask1b,$mask1b,32,0 1074238384Sjkim___ 1075238384Sjkim$code.=<<___; 1076238384Sjkim mtctr $acc00 1077238384Sjkim.align 4 1078238384SjkimLdec_compact_loop: 1079238384Sjkim xor $s0,$s0,$t0 1080238384Sjkim xor $s1,$s1,$t1 1081238384Sjkim rlwinm $acc00,$s0,`32-24`,24,31 1082238384Sjkim xor $s2,$s2,$t2 1083238384Sjkim rlwinm $acc01,$s1,`32-24`,24,31 1084238384Sjkim xor $s3,$s3,$t3 1085238384Sjkim rlwinm $acc02,$s2,`32-24`,24,31 1086238384Sjkim rlwinm $acc03,$s3,`32-24`,24,31 1087238384Sjkim rlwinm $acc04,$s3,`32-16`,24,31 1088238384Sjkim rlwinm $acc05,$s0,`32-16`,24,31 1089238384Sjkim rlwinm $acc06,$s1,`32-16`,24,31 1090238384Sjkim rlwinm $acc07,$s2,`32-16`,24,31 1091238384Sjkim lbzx $acc00,$Tbl1,$acc00 1092238384Sjkim rlwinm $acc08,$s2,`32-8`,24,31 1093238384Sjkim lbzx $acc01,$Tbl1,$acc01 1094238384Sjkim rlwinm $acc09,$s3,`32-8`,24,31 1095238384Sjkim lbzx $acc02,$Tbl1,$acc02 1096238384Sjkim rlwinm $acc10,$s0,`32-8`,24,31 1097238384Sjkim lbzx $acc03,$Tbl1,$acc03 1098238384Sjkim rlwinm $acc11,$s1,`32-8`,24,31 1099238384Sjkim lbzx $acc04,$Tbl1,$acc04 1100238384Sjkim rlwinm $acc12,$s1,`0`,24,31 1101238384Sjkim lbzx $acc05,$Tbl1,$acc05 1102238384Sjkim rlwinm $acc13,$s2,`0`,24,31 1103238384Sjkim lbzx $acc06,$Tbl1,$acc06 1104238384Sjkim rlwinm $acc14,$s3,`0`,24,31 1105238384Sjkim lbzx $acc07,$Tbl1,$acc07 1106238384Sjkim rlwinm $acc15,$s0,`0`,24,31 1107238384Sjkim lbzx $acc08,$Tbl1,$acc08 1108238384Sjkim rlwinm $s0,$acc00,24,0,7 1109238384Sjkim lbzx $acc09,$Tbl1,$acc09 1110238384Sjkim rlwinm $s1,$acc01,24,0,7 1111238384Sjkim lbzx $acc10,$Tbl1,$acc10 1112238384Sjkim rlwinm $s2,$acc02,24,0,7 1113238384Sjkim lbzx $acc11,$Tbl1,$acc11 1114238384Sjkim rlwinm $s3,$acc03,24,0,7 1115238384Sjkim lbzx $acc12,$Tbl1,$acc12 1116238384Sjkim rlwimi $s0,$acc04,16,8,15 1117238384Sjkim lbzx $acc13,$Tbl1,$acc13 1118238384Sjkim rlwimi $s1,$acc05,16,8,15 1119238384Sjkim lbzx $acc14,$Tbl1,$acc14 1120238384Sjkim rlwimi $s2,$acc06,16,8,15 1121238384Sjkim lbzx $acc15,$Tbl1,$acc15 1122238384Sjkim rlwimi $s3,$acc07,16,8,15 1123238384Sjkim rlwimi $s0,$acc08,8,16,23 1124238384Sjkim rlwimi $s1,$acc09,8,16,23 1125238384Sjkim rlwimi $s2,$acc10,8,16,23 1126238384Sjkim rlwimi $s3,$acc11,8,16,23 1127238384Sjkim lwz $t0,0($key) 1128238384Sjkim or $s0,$s0,$acc12 1129238384Sjkim lwz $t1,4($key) 1130238384Sjkim or $s1,$s1,$acc13 1131238384Sjkim lwz $t2,8($key) 1132238384Sjkim or $s2,$s2,$acc14 1133238384Sjkim lwz $t3,12($key) 1134238384Sjkim or $s3,$s3,$acc15 1135238384Sjkim 1136238384Sjkim addi $key,$key,16 1137238384Sjkim bdz Ldec_compact_done 1138238384Sjkim___ 1139238384Sjkim$code.=<<___ if ($SIZE_T==8); 1140238384Sjkim # vectorized permutation improves decrypt performance by 10% 1141238384Sjkim insrdi $s0,$s1,32,0 1142238384Sjkim insrdi $s2,$s3,32,0 1143238384Sjkim 1144238384Sjkim and $acc00,$s0,$mask80 # r1=r0&0x80808080 1145238384Sjkim and $acc02,$s2,$mask80 1146238384Sjkim srdi $acc04,$acc00,7 # r1>>7 1147238384Sjkim srdi $acc06,$acc02,7 1148238384Sjkim andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 1149238384Sjkim andc $acc10,$s2,$mask80 1150238384Sjkim sub $acc00,$acc00,$acc04 # r1-(r1>>7) 1151238384Sjkim sub $acc02,$acc02,$acc06 1152238384Sjkim add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 1153238384Sjkim add $acc10,$acc10,$acc10 1154238384Sjkim and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1155238384Sjkim and $acc02,$acc02,$mask1b 1156238384Sjkim xor $acc00,$acc00,$acc08 # r2 1157238384Sjkim xor $acc02,$acc02,$acc10 1158238384Sjkim 1159238384Sjkim and $acc04,$acc00,$mask80 # r1=r2&0x80808080 1160238384Sjkim and $acc06,$acc02,$mask80 1161238384Sjkim srdi $acc08,$acc04,7 # r1>>7 1162238384Sjkim srdi $acc10,$acc06,7 1163238384Sjkim andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f 1164238384Sjkim andc $acc14,$acc02,$mask80 1165238384Sjkim sub $acc04,$acc04,$acc08 # r1-(r1>>7) 1166238384Sjkim sub $acc06,$acc06,$acc10 1167238384Sjkim add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 1168238384Sjkim add $acc14,$acc14,$acc14 1169238384Sjkim and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1170238384Sjkim and $acc06,$acc06,$mask1b 1171238384Sjkim xor $acc04,$acc04,$acc12 # r4 1172238384Sjkim xor $acc06,$acc06,$acc14 1173238384Sjkim 1174238384Sjkim and $acc08,$acc04,$mask80 # r1=r4&0x80808080 1175238384Sjkim and $acc10,$acc06,$mask80 1176238384Sjkim srdi $acc12,$acc08,7 # r1>>7 1177238384Sjkim srdi $acc14,$acc10,7 1178238384Sjkim sub $acc08,$acc08,$acc12 # r1-(r1>>7) 1179238384Sjkim sub $acc10,$acc10,$acc14 1180238384Sjkim andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f 1181238384Sjkim andc $acc14,$acc06,$mask80 1182238384Sjkim add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 1183238384Sjkim add $acc14,$acc14,$acc14 1184238384Sjkim and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1185238384Sjkim and $acc10,$acc10,$mask1b 1186238384Sjkim xor $acc08,$acc08,$acc12 # r8 1187238384Sjkim xor $acc10,$acc10,$acc14 1188238384Sjkim 1189238384Sjkim xor $acc00,$acc00,$s0 # r2^r0 1190238384Sjkim xor $acc02,$acc02,$s2 1191238384Sjkim xor $acc04,$acc04,$s0 # r4^r0 1192238384Sjkim xor $acc06,$acc06,$s2 1193238384Sjkim 1194238384Sjkim extrdi $acc01,$acc00,32,0 1195238384Sjkim extrdi $acc03,$acc02,32,0 1196238384Sjkim extrdi $acc05,$acc04,32,0 1197238384Sjkim extrdi $acc07,$acc06,32,0 1198238384Sjkim extrdi $acc09,$acc08,32,0 1199238384Sjkim extrdi $acc11,$acc10,32,0 1200238384Sjkim___ 1201238384Sjkim$code.=<<___ if ($SIZE_T==4); 1202238384Sjkim and $acc00,$s0,$mask80 # r1=r0&0x80808080 1203238384Sjkim and $acc01,$s1,$mask80 1204238384Sjkim and $acc02,$s2,$mask80 1205238384Sjkim and $acc03,$s3,$mask80 1206238384Sjkim srwi $acc04,$acc00,7 # r1>>7 1207238384Sjkim andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 1208238384Sjkim srwi $acc05,$acc01,7 1209238384Sjkim andc $acc09,$s1,$mask80 1210238384Sjkim srwi $acc06,$acc02,7 1211238384Sjkim andc $acc10,$s2,$mask80 1212238384Sjkim srwi $acc07,$acc03,7 1213238384Sjkim andc $acc11,$s3,$mask80 1214238384Sjkim sub $acc00,$acc00,$acc04 # r1-(r1>>7) 1215238384Sjkim sub $acc01,$acc01,$acc05 1216238384Sjkim sub $acc02,$acc02,$acc06 1217238384Sjkim sub $acc03,$acc03,$acc07 1218238384Sjkim add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 1219238384Sjkim add $acc09,$acc09,$acc09 1220238384Sjkim add $acc10,$acc10,$acc10 1221238384Sjkim add $acc11,$acc11,$acc11 1222238384Sjkim and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1223238384Sjkim and $acc01,$acc01,$mask1b 1224238384Sjkim and $acc02,$acc02,$mask1b 1225238384Sjkim and $acc03,$acc03,$mask1b 1226238384Sjkim xor $acc00,$acc00,$acc08 # r2 1227238384Sjkim xor $acc01,$acc01,$acc09 1228238384Sjkim xor $acc02,$acc02,$acc10 1229238384Sjkim xor $acc03,$acc03,$acc11 1230238384Sjkim 1231238384Sjkim and $acc04,$acc00,$mask80 # r1=r2&0x80808080 1232238384Sjkim and $acc05,$acc01,$mask80 1233238384Sjkim and $acc06,$acc02,$mask80 1234238384Sjkim and $acc07,$acc03,$mask80 1235238384Sjkim srwi $acc08,$acc04,7 # r1>>7 1236238384Sjkim andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f 1237238384Sjkim srwi $acc09,$acc05,7 1238238384Sjkim andc $acc13,$acc01,$mask80 1239238384Sjkim srwi $acc10,$acc06,7 1240238384Sjkim andc $acc14,$acc02,$mask80 1241238384Sjkim srwi $acc11,$acc07,7 1242238384Sjkim andc $acc15,$acc03,$mask80 1243238384Sjkim sub $acc04,$acc04,$acc08 # r1-(r1>>7) 1244238384Sjkim sub $acc05,$acc05,$acc09 1245238384Sjkim sub $acc06,$acc06,$acc10 1246238384Sjkim sub $acc07,$acc07,$acc11 1247238384Sjkim add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 1248238384Sjkim add $acc13,$acc13,$acc13 1249238384Sjkim add $acc14,$acc14,$acc14 1250238384Sjkim add $acc15,$acc15,$acc15 1251238384Sjkim and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1252238384Sjkim and $acc05,$acc05,$mask1b 1253238384Sjkim and $acc06,$acc06,$mask1b 1254238384Sjkim and $acc07,$acc07,$mask1b 1255238384Sjkim xor $acc04,$acc04,$acc12 # r4 1256238384Sjkim xor $acc05,$acc05,$acc13 1257238384Sjkim xor $acc06,$acc06,$acc14 1258238384Sjkim xor $acc07,$acc07,$acc15 1259238384Sjkim 1260238384Sjkim and $acc08,$acc04,$mask80 # r1=r4&0x80808080 1261238384Sjkim and $acc09,$acc05,$mask80 1262238384Sjkim srwi $acc12,$acc08,7 # r1>>7 1263238384Sjkim and $acc10,$acc06,$mask80 1264238384Sjkim srwi $acc13,$acc09,7 1265238384Sjkim and $acc11,$acc07,$mask80 1266238384Sjkim srwi $acc14,$acc10,7 1267238384Sjkim sub $acc08,$acc08,$acc12 # r1-(r1>>7) 1268238384Sjkim srwi $acc15,$acc11,7 1269238384Sjkim sub $acc09,$acc09,$acc13 1270238384Sjkim sub $acc10,$acc10,$acc14 1271238384Sjkim sub $acc11,$acc11,$acc15 1272238384Sjkim andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f 1273238384Sjkim andc $acc13,$acc05,$mask80 1274238384Sjkim andc $acc14,$acc06,$mask80 1275238384Sjkim andc $acc15,$acc07,$mask80 1276238384Sjkim add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 1277238384Sjkim add $acc13,$acc13,$acc13 1278238384Sjkim add $acc14,$acc14,$acc14 1279238384Sjkim add $acc15,$acc15,$acc15 1280238384Sjkim and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1281238384Sjkim and $acc09,$acc09,$mask1b 1282238384Sjkim and $acc10,$acc10,$mask1b 1283238384Sjkim and $acc11,$acc11,$mask1b 1284238384Sjkim xor $acc08,$acc08,$acc12 # r8 1285238384Sjkim xor $acc09,$acc09,$acc13 1286238384Sjkim xor $acc10,$acc10,$acc14 1287238384Sjkim xor $acc11,$acc11,$acc15 1288238384Sjkim 1289238384Sjkim xor $acc00,$acc00,$s0 # r2^r0 1290238384Sjkim xor $acc01,$acc01,$s1 1291238384Sjkim xor $acc02,$acc02,$s2 1292238384Sjkim xor $acc03,$acc03,$s3 1293238384Sjkim xor $acc04,$acc04,$s0 # r4^r0 1294238384Sjkim xor $acc05,$acc05,$s1 1295238384Sjkim xor $acc06,$acc06,$s2 1296238384Sjkim xor $acc07,$acc07,$s3 1297238384Sjkim___ 1298238384Sjkim$code.=<<___; 1299238384Sjkim rotrwi $s0,$s0,8 # = ROTATE(r0,8) 1300238384Sjkim rotrwi $s1,$s1,8 1301238384Sjkim xor $s0,$s0,$acc00 # ^= r2^r0 1302238384Sjkim rotrwi $s2,$s2,8 1303238384Sjkim xor $s1,$s1,$acc01 1304238384Sjkim rotrwi $s3,$s3,8 1305238384Sjkim xor $s2,$s2,$acc02 1306238384Sjkim xor $s3,$s3,$acc03 1307238384Sjkim xor $acc00,$acc00,$acc08 1308238384Sjkim xor $acc01,$acc01,$acc09 1309238384Sjkim xor $acc02,$acc02,$acc10 1310238384Sjkim xor $acc03,$acc03,$acc11 1311238384Sjkim xor $s0,$s0,$acc04 # ^= r4^r0 1312238384Sjkim rotrwi $acc00,$acc00,24 1313238384Sjkim xor $s1,$s1,$acc05 1314238384Sjkim rotrwi $acc01,$acc01,24 1315238384Sjkim xor $s2,$s2,$acc06 1316238384Sjkim rotrwi $acc02,$acc02,24 1317238384Sjkim xor $s3,$s3,$acc07 1318238384Sjkim rotrwi $acc03,$acc03,24 1319238384Sjkim xor $acc04,$acc04,$acc08 1320238384Sjkim xor $acc05,$acc05,$acc09 1321238384Sjkim xor $acc06,$acc06,$acc10 1322238384Sjkim xor $acc07,$acc07,$acc11 1323238384Sjkim xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)] 1324238384Sjkim rotrwi $acc04,$acc04,16 1325238384Sjkim xor $s1,$s1,$acc09 1326238384Sjkim rotrwi $acc05,$acc05,16 1327238384Sjkim xor $s2,$s2,$acc10 1328238384Sjkim rotrwi $acc06,$acc06,16 1329238384Sjkim xor $s3,$s3,$acc11 1330238384Sjkim rotrwi $acc07,$acc07,16 1331238384Sjkim xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24) 1332238384Sjkim rotrwi $acc08,$acc08,8 1333238384Sjkim xor $s1,$s1,$acc01 1334238384Sjkim rotrwi $acc09,$acc09,8 1335238384Sjkim xor $s2,$s2,$acc02 1336238384Sjkim rotrwi $acc10,$acc10,8 1337238384Sjkim xor $s3,$s3,$acc03 1338238384Sjkim rotrwi $acc11,$acc11,8 1339238384Sjkim xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16) 1340238384Sjkim xor $s1,$s1,$acc05 1341238384Sjkim xor $s2,$s2,$acc06 1342238384Sjkim xor $s3,$s3,$acc07 1343238384Sjkim xor $s0,$s0,$acc08 # ^= ROTATE(r8,8) 1344238384Sjkim xor $s1,$s1,$acc09 1345238384Sjkim xor $s2,$s2,$acc10 1346238384Sjkim xor $s3,$s3,$acc11 1347238384Sjkim 1348238384Sjkim b Ldec_compact_loop 1349238384Sjkim.align 4 1350238384SjkimLdec_compact_done: 1351238384Sjkim xor $s0,$s0,$t0 1352238384Sjkim xor $s1,$s1,$t1 1353238384Sjkim xor $s2,$s2,$t2 1354238384Sjkim xor $s3,$s3,$t3 1355238384Sjkim blr 1356238384Sjkim .long 0 1357238384Sjkim .byte 0,12,0x14,0,0,0,0,0 1358238384Sjkim 1359238384Sjkim.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>" 1360238384Sjkim.align 7 1361238384Sjkim___ 1362238384Sjkim 1363238384Sjkim$code =~ s/\`([^\`]*)\`/eval $1/gem; 1364238384Sjkimprint $code; 1365238384Sjkimclose STDOUT; 1366