aes-ppc.pl revision 1.3
1#!/usr/bin/env perl 2 3# ==================================================================== 4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9 10# Needs more work: key setup, CBC routine... 11# 12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with 13# 128-bit key, which is ~40% better than 64-bit code generated by gcc 14# 4.0. But these are not the ones currently used! Their "compact" 15# counterparts are, for security reason. ppc_AES_encrypt_compact runs 16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - 17# at 1/3 of ppc_AES_decrypt. 18 19# February 2010 20# 21# Rescheduling instructions to favour Power6 pipeline gave 10% 22# performance improvement on the platform in question (and marginal 23# improvement even on others). It should be noted that Power6 fails 24# to process byte in 18 cycles, only in 23, because it fails to issue 25# 4 load instructions in two cycles, only in 3. As result non-compact 26# block subroutines are 25% slower than one would expect. Compact 27# functions scale better, because they have pure computational part, 28# which scales perfectly with clock frequency. To be specific 29# ppc_AES_encrypt_compact operates at 42 cycles per byte, while 30# ppc_AES_decrypt_compact - at 55 (in 64-bit build). 31 32$flavour = shift; 33 34if ($flavour =~ /64/) { 35 $SIZE_T =8; 36 $LRSAVE =2*$SIZE_T; 37 $STU ="stdu"; 38 $POP ="ld"; 39 $PUSH ="std"; 40} elsif ($flavour =~ /32/) { 41 $SIZE_T =4; 42 $LRSAVE =$SIZE_T; 43 $STU ="stwu"; 44 $POP ="lwz"; 45 $PUSH ="stw"; 46} else { die "nonsense $flavour"; } 47 48$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 49( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 50( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 51die "can't locate ppc-xlate.pl"; 52 53open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 54 55$FRAME=32*$SIZE_T; 56 57sub _data_word() 58{ my $i; 59 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } 60} 61 62$sp="r1"; 63$toc="r2"; 64$inp="r3"; 65$out="r4"; 66$key="r5"; 67 68$Tbl0="r3"; 69$Tbl1="r6"; 70$Tbl2="r7"; 71$Tbl3="r2"; 72 73$s0="r8"; 74$s1="r9"; 75$s2="r10"; 76$s3="r11"; 77 78$t0="r12"; 79$t1="r13"; 80$t2="r14"; 81$t3="r15"; 82 83$acc00="r16"; 84$acc01="r17"; 85$acc02="r18"; 86$acc03="r19"; 87 88$acc04="r20"; 89$acc05="r21"; 90$acc06="r22"; 91$acc07="r23"; 92 93$acc08="r24"; 94$acc09="r25"; 95$acc10="r26"; 96$acc11="r27"; 97 98$acc12="r28"; 99$acc13="r29"; 100$acc14="r30"; 101$acc15="r31"; 102 103# stay away from TLS pointer 104if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; } 105else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; } 106$mask80=$Tbl2; 107$mask1b=$Tbl3; 108 109$code.=<<___; 110.machine "any" 111.text 112 113.align 7 114LAES_Te: 115 mflr r0 116 bcl 20,31,\$+4 117 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry 118 addi $Tbl0,$Tbl0,`128-8` 119 mtlr r0 120 blr 121 .space `64-12*4` 122LAES_Td: 123 mflr r0 124 bcl 20,31,\$+4 125 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry 126 addi $Tbl0,$Tbl0,`128-64-8+2048+256` 127 mtlr r0 128 blr 129 .space `128-64-12*4` 130___ 131&_data_word( 132 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 133 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 134 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 135 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 136 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 137 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 138 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 139 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 140 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 141 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 142 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 143 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 144 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 145 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 146 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 147 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 148 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 149 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 150 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 151 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 152 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 153 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 154 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 155 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 156 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 157 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 158 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 159 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 160 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 161 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 162 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 163 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 164 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 165 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 166 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 167 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 168 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 169 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 170 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 171 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 172 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 173 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 174 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 175 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 176 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 177 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 178 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 179 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 180 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 181 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 182 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 183 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 184 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 185 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 186 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 187 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 188 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 189 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 190 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 191 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 192 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 193 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 194 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 195 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); 196$code.=<<___; 197.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 198.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 199.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 200.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 201.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 202.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 203.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 204.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 205.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 206.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 207.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 208.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 209.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 210.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 211.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 212.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 213.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 214.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 215.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 216.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 217.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 218.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 219.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 220.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 221.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 222.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 223.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 224.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 225.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 226.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 227.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 228.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 229___ 230&_data_word( 231 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 232 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 233 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 234 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 235 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 236 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 237 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 238 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 239 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 240 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 241 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 242 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 243 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 244 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 245 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 246 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 247 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 248 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 249 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 250 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 251 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 252 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 253 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 254 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 255 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 256 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 257 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 258 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 259 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 260 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 261 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 262 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 263 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 264 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 265 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 266 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 267 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 268 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 269 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 270 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 271 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 272 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 273 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 274 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 275 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 276 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 277 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 278 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 279 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 280 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 281 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 282 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 283 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 284 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 285 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 286 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 287 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 288 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 289 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 290 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 291 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 292 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 293 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 294 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); 295$code.=<<___; 296.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 297.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 298.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 299.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 300.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 301.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 302.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 303.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 304.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 305.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 306.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 307.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 308.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 309.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 310.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 311.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 312.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 313.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 314.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 315.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 316.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 317.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 318.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 319.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 320.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 321.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 322.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 323.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 324.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 325.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 326.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 327.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 328 329 330.globl .AES_encrypt 331.align 7 332.AES_encrypt: 333 $STU $sp,-$FRAME($sp) 334 mflr r0 335 336 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) 337 $PUSH r13,`$FRAME-$SIZE_T*19`($sp) 338 $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 339 $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 340 $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 341 $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 342 $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 343 $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 344 $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 345 $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 346 $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 347 $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 348 $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 349 $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 350 $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 351 $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 352 $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 353 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 354 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 355 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 356 $PUSH r0,`$FRAME+$LRSAVE`($sp) 357 358 andi. $t0,$inp,3 359 andi. $t1,$out,3 360 or. $t0,$t0,$t1 361 bne Lenc_unaligned 362 363Lenc_unaligned_ok: 364 lwz $s0,0($inp) 365 lwz $s1,4($inp) 366 lwz $s2,8($inp) 367 lwz $s3,12($inp) 368 bl LAES_Te 369 bl Lppc_AES_encrypt_compact 370 stw $s0,0($out) 371 stw $s1,4($out) 372 stw $s2,8($out) 373 stw $s3,12($out) 374 b Lenc_done 375 376Lenc_unaligned: 377 subfic $t0,$inp,4096 378 subfic $t1,$out,4096 379 andi. $t0,$t0,4096-16 380 beq Lenc_xpage 381 andi. $t1,$t1,4096-16 382 bne Lenc_unaligned_ok 383 384Lenc_xpage: 385 lbz $acc00,0($inp) 386 lbz $acc01,1($inp) 387 lbz $acc02,2($inp) 388 lbz $s0,3($inp) 389 lbz $acc04,4($inp) 390 lbz $acc05,5($inp) 391 lbz $acc06,6($inp) 392 lbz $s1,7($inp) 393 lbz $acc08,8($inp) 394 lbz $acc09,9($inp) 395 lbz $acc10,10($inp) 396 insrwi $s0,$acc00,8,0 397 lbz $s2,11($inp) 398 insrwi $s1,$acc04,8,0 399 lbz $acc12,12($inp) 400 insrwi $s0,$acc01,8,8 401 lbz $acc13,13($inp) 402 insrwi $s1,$acc05,8,8 403 lbz $acc14,14($inp) 404 insrwi $s0,$acc02,8,16 405 lbz $s3,15($inp) 406 insrwi $s1,$acc06,8,16 407 insrwi $s2,$acc08,8,0 408 insrwi $s3,$acc12,8,0 409 insrwi $s2,$acc09,8,8 410 insrwi $s3,$acc13,8,8 411 insrwi $s2,$acc10,8,16 412 insrwi $s3,$acc14,8,16 413 414 bl LAES_Te 415 bl Lppc_AES_encrypt_compact 416 417 extrwi $acc00,$s0,8,0 418 extrwi $acc01,$s0,8,8 419 stb $acc00,0($out) 420 extrwi $acc02,$s0,8,16 421 stb $acc01,1($out) 422 stb $acc02,2($out) 423 extrwi $acc04,$s1,8,0 424 stb $s0,3($out) 425 extrwi $acc05,$s1,8,8 426 stb $acc04,4($out) 427 extrwi $acc06,$s1,8,16 428 stb $acc05,5($out) 429 stb $acc06,6($out) 430 extrwi $acc08,$s2,8,0 431 stb $s1,7($out) 432 extrwi $acc09,$s2,8,8 433 stb $acc08,8($out) 434 extrwi $acc10,$s2,8,16 435 stb $acc09,9($out) 436 stb $acc10,10($out) 437 extrwi $acc12,$s3,8,0 438 stb $s2,11($out) 439 extrwi $acc13,$s3,8,8 440 stb $acc12,12($out) 441 extrwi $acc14,$s3,8,16 442 stb $acc13,13($out) 443 stb $acc14,14($out) 444 stb $s3,15($out) 445 446Lenc_done: 447 $POP r0,`$FRAME+$LRSAVE`($sp) 448 $POP $toc,`$FRAME-$SIZE_T*20`($sp) 449 $POP r13,`$FRAME-$SIZE_T*19`($sp) 450 $POP r14,`$FRAME-$SIZE_T*18`($sp) 451 $POP r15,`$FRAME-$SIZE_T*17`($sp) 452 $POP r16,`$FRAME-$SIZE_T*16`($sp) 453 $POP r17,`$FRAME-$SIZE_T*15`($sp) 454 $POP r18,`$FRAME-$SIZE_T*14`($sp) 455 $POP r19,`$FRAME-$SIZE_T*13`($sp) 456 $POP r20,`$FRAME-$SIZE_T*12`($sp) 457 $POP r21,`$FRAME-$SIZE_T*11`($sp) 458 $POP r22,`$FRAME-$SIZE_T*10`($sp) 459 $POP r23,`$FRAME-$SIZE_T*9`($sp) 460 $POP r24,`$FRAME-$SIZE_T*8`($sp) 461 $POP r25,`$FRAME-$SIZE_T*7`($sp) 462 $POP r26,`$FRAME-$SIZE_T*6`($sp) 463 $POP r27,`$FRAME-$SIZE_T*5`($sp) 464 $POP r28,`$FRAME-$SIZE_T*4`($sp) 465 $POP r29,`$FRAME-$SIZE_T*3`($sp) 466 $POP r30,`$FRAME-$SIZE_T*2`($sp) 467 $POP r31,`$FRAME-$SIZE_T*1`($sp) 468 mtlr r0 469 addi $sp,$sp,$FRAME 470 blr 471 472.align 5 473Lppc_AES_encrypt: 474 lwz $acc00,240($key) 475 addi $Tbl1,$Tbl0,3 476 lwz $t0,0($key) 477 addi $Tbl2,$Tbl0,2 478 lwz $t1,4($key) 479 addi $Tbl3,$Tbl0,1 480 lwz $t2,8($key) 481 addi $acc00,$acc00,-1 482 lwz $t3,12($key) 483 addi $key,$key,16 484 xor $s0,$s0,$t0 485 xor $s1,$s1,$t1 486 xor $s2,$s2,$t2 487 xor $s3,$s3,$t3 488 mtctr $acc00 489.align 4 490Lenc_loop: 491 rlwinm $acc00,$s0,`32-24+3`,21,28 492 rlwinm $acc01,$s1,`32-24+3`,21,28 493 rlwinm $acc02,$s2,`32-24+3`,21,28 494 rlwinm $acc03,$s3,`32-24+3`,21,28 495 lwz $t0,0($key) 496 rlwinm $acc04,$s1,`32-16+3`,21,28 497 lwz $t1,4($key) 498 rlwinm $acc05,$s2,`32-16+3`,21,28 499 lwz $t2,8($key) 500 rlwinm $acc06,$s3,`32-16+3`,21,28 501 lwz $t3,12($key) 502 rlwinm $acc07,$s0,`32-16+3`,21,28 503 lwzx $acc00,$Tbl0,$acc00 504 rlwinm $acc08,$s2,`32-8+3`,21,28 505 lwzx $acc01,$Tbl0,$acc01 506 rlwinm $acc09,$s3,`32-8+3`,21,28 507 lwzx $acc02,$Tbl0,$acc02 508 rlwinm $acc10,$s0,`32-8+3`,21,28 509 lwzx $acc03,$Tbl0,$acc03 510 rlwinm $acc11,$s1,`32-8+3`,21,28 511 lwzx $acc04,$Tbl1,$acc04 512 rlwinm $acc12,$s3,`0+3`,21,28 513 lwzx $acc05,$Tbl1,$acc05 514 rlwinm $acc13,$s0,`0+3`,21,28 515 lwzx $acc06,$Tbl1,$acc06 516 rlwinm $acc14,$s1,`0+3`,21,28 517 lwzx $acc07,$Tbl1,$acc07 518 rlwinm $acc15,$s2,`0+3`,21,28 519 lwzx $acc08,$Tbl2,$acc08 520 xor $t0,$t0,$acc00 521 lwzx $acc09,$Tbl2,$acc09 522 xor $t1,$t1,$acc01 523 lwzx $acc10,$Tbl2,$acc10 524 xor $t2,$t2,$acc02 525 lwzx $acc11,$Tbl2,$acc11 526 xor $t3,$t3,$acc03 527 lwzx $acc12,$Tbl3,$acc12 528 xor $t0,$t0,$acc04 529 lwzx $acc13,$Tbl3,$acc13 530 xor $t1,$t1,$acc05 531 lwzx $acc14,$Tbl3,$acc14 532 xor $t2,$t2,$acc06 533 lwzx $acc15,$Tbl3,$acc15 534 xor $t3,$t3,$acc07 535 xor $t0,$t0,$acc08 536 xor $t1,$t1,$acc09 537 xor $t2,$t2,$acc10 538 xor $t3,$t3,$acc11 539 xor $s0,$t0,$acc12 540 xor $s1,$t1,$acc13 541 xor $s2,$t2,$acc14 542 xor $s3,$t3,$acc15 543 addi $key,$key,16 544 bdnz- Lenc_loop 545 546 addi $Tbl2,$Tbl0,2048 547 nop 548 lwz $t0,0($key) 549 rlwinm $acc00,$s0,`32-24`,24,31 550 lwz $t1,4($key) 551 rlwinm $acc01,$s1,`32-24`,24,31 552 lwz $t2,8($key) 553 rlwinm $acc02,$s2,`32-24`,24,31 554 lwz $t3,12($key) 555 rlwinm $acc03,$s3,`32-24`,24,31 556 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4 557 rlwinm $acc04,$s1,`32-16`,24,31 558 lwz $acc09,`2048+32`($Tbl0) 559 rlwinm $acc05,$s2,`32-16`,24,31 560 lwz $acc10,`2048+64`($Tbl0) 561 rlwinm $acc06,$s3,`32-16`,24,31 562 lwz $acc11,`2048+96`($Tbl0) 563 rlwinm $acc07,$s0,`32-16`,24,31 564 lwz $acc12,`2048+128`($Tbl0) 565 rlwinm $acc08,$s2,`32-8`,24,31 566 lwz $acc13,`2048+160`($Tbl0) 567 rlwinm $acc09,$s3,`32-8`,24,31 568 lwz $acc14,`2048+192`($Tbl0) 569 rlwinm $acc10,$s0,`32-8`,24,31 570 lwz $acc15,`2048+224`($Tbl0) 571 rlwinm $acc11,$s1,`32-8`,24,31 572 lbzx $acc00,$Tbl2,$acc00 573 rlwinm $acc12,$s3,`0`,24,31 574 lbzx $acc01,$Tbl2,$acc01 575 rlwinm $acc13,$s0,`0`,24,31 576 lbzx $acc02,$Tbl2,$acc02 577 rlwinm $acc14,$s1,`0`,24,31 578 lbzx $acc03,$Tbl2,$acc03 579 rlwinm $acc15,$s2,`0`,24,31 580 lbzx $acc04,$Tbl2,$acc04 581 rlwinm $s0,$acc00,24,0,7 582 lbzx $acc05,$Tbl2,$acc05 583 rlwinm $s1,$acc01,24,0,7 584 lbzx $acc06,$Tbl2,$acc06 585 rlwinm $s2,$acc02,24,0,7 586 lbzx $acc07,$Tbl2,$acc07 587 rlwinm $s3,$acc03,24,0,7 588 lbzx $acc08,$Tbl2,$acc08 589 rlwimi $s0,$acc04,16,8,15 590 lbzx $acc09,$Tbl2,$acc09 591 rlwimi $s1,$acc05,16,8,15 592 lbzx $acc10,$Tbl2,$acc10 593 rlwimi $s2,$acc06,16,8,15 594 lbzx $acc11,$Tbl2,$acc11 595 rlwimi $s3,$acc07,16,8,15 596 lbzx $acc12,$Tbl2,$acc12 597 rlwimi $s0,$acc08,8,16,23 598 lbzx $acc13,$Tbl2,$acc13 599 rlwimi $s1,$acc09,8,16,23 600 lbzx $acc14,$Tbl2,$acc14 601 rlwimi $s2,$acc10,8,16,23 602 lbzx $acc15,$Tbl2,$acc15 603 rlwimi $s3,$acc11,8,16,23 604 or $s0,$s0,$acc12 605 or $s1,$s1,$acc13 606 or $s2,$s2,$acc14 607 or $s3,$s3,$acc15 608 xor $s0,$s0,$t0 609 xor $s1,$s1,$t1 610 xor $s2,$s2,$t2 611 xor $s3,$s3,$t3 612 blr 613 614.align 4 615Lppc_AES_encrypt_compact: 616 lwz $acc00,240($key) 617 addi $Tbl1,$Tbl0,2048 618 lwz $t0,0($key) 619 lis $mask80,0x8080 620 lwz $t1,4($key) 621 lis $mask1b,0x1b1b 622 lwz $t2,8($key) 623 ori $mask80,$mask80,0x8080 624 lwz $t3,12($key) 625 ori $mask1b,$mask1b,0x1b1b 626 addi $key,$key,16 627 mtctr $acc00 628.align 4 629Lenc_compact_loop: 630 xor $s0,$s0,$t0 631 xor $s1,$s1,$t1 632 rlwinm $acc00,$s0,`32-24`,24,31 633 xor $s2,$s2,$t2 634 rlwinm $acc01,$s1,`32-24`,24,31 635 xor $s3,$s3,$t3 636 rlwinm $acc02,$s2,`32-24`,24,31 637 rlwinm $acc03,$s3,`32-24`,24,31 638 rlwinm $acc04,$s1,`32-16`,24,31 639 rlwinm $acc05,$s2,`32-16`,24,31 640 rlwinm $acc06,$s3,`32-16`,24,31 641 rlwinm $acc07,$s0,`32-16`,24,31 642 lbzx $acc00,$Tbl1,$acc00 643 rlwinm $acc08,$s2,`32-8`,24,31 644 lbzx $acc01,$Tbl1,$acc01 645 rlwinm $acc09,$s3,`32-8`,24,31 646 lbzx $acc02,$Tbl1,$acc02 647 rlwinm $acc10,$s0,`32-8`,24,31 648 lbzx $acc03,$Tbl1,$acc03 649 rlwinm $acc11,$s1,`32-8`,24,31 650 lbzx $acc04,$Tbl1,$acc04 651 rlwinm $acc12,$s3,`0`,24,31 652 lbzx $acc05,$Tbl1,$acc05 653 rlwinm $acc13,$s0,`0`,24,31 654 lbzx $acc06,$Tbl1,$acc06 655 rlwinm $acc14,$s1,`0`,24,31 656 lbzx $acc07,$Tbl1,$acc07 657 rlwinm $acc15,$s2,`0`,24,31 658 lbzx $acc08,$Tbl1,$acc08 659 rlwinm $s0,$acc00,24,0,7 660 lbzx $acc09,$Tbl1,$acc09 661 rlwinm $s1,$acc01,24,0,7 662 lbzx $acc10,$Tbl1,$acc10 663 rlwinm $s2,$acc02,24,0,7 664 lbzx $acc11,$Tbl1,$acc11 665 rlwinm $s3,$acc03,24,0,7 666 lbzx $acc12,$Tbl1,$acc12 667 rlwimi $s0,$acc04,16,8,15 668 lbzx $acc13,$Tbl1,$acc13 669 rlwimi $s1,$acc05,16,8,15 670 lbzx $acc14,$Tbl1,$acc14 671 rlwimi $s2,$acc06,16,8,15 672 lbzx $acc15,$Tbl1,$acc15 673 rlwimi $s3,$acc07,16,8,15 674 rlwimi $s0,$acc08,8,16,23 675 rlwimi $s1,$acc09,8,16,23 676 rlwimi $s2,$acc10,8,16,23 677 rlwimi $s3,$acc11,8,16,23 678 lwz $t0,0($key) 679 or $s0,$s0,$acc12 680 lwz $t1,4($key) 681 or $s1,$s1,$acc13 682 lwz $t2,8($key) 683 or $s2,$s2,$acc14 684 lwz $t3,12($key) 685 or $s3,$s3,$acc15 686 687 addi $key,$key,16 688 bdz Lenc_compact_done 689 690 and $acc00,$s0,$mask80 # r1=r0&0x80808080 691 and $acc01,$s1,$mask80 692 and $acc02,$s2,$mask80 693 and $acc03,$s3,$mask80 694 srwi $acc04,$acc00,7 # r1>>7 695 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 696 srwi $acc05,$acc01,7 697 andc $acc09,$s1,$mask80 698 srwi $acc06,$acc02,7 699 andc $acc10,$s2,$mask80 700 srwi $acc07,$acc03,7 701 andc $acc11,$s3,$mask80 702 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 703 sub $acc01,$acc01,$acc05 704 sub $acc02,$acc02,$acc06 705 sub $acc03,$acc03,$acc07 706 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 707 add $acc09,$acc09,$acc09 708 add $acc10,$acc10,$acc10 709 add $acc11,$acc11,$acc11 710 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 711 and $acc01,$acc01,$mask1b 712 and $acc02,$acc02,$mask1b 713 and $acc03,$acc03,$mask1b 714 xor $acc00,$acc00,$acc08 # r2 715 xor $acc01,$acc01,$acc09 716 rotlwi $acc12,$s0,16 # ROTATE(r0,16) 717 xor $acc02,$acc02,$acc10 718 rotlwi $acc13,$s1,16 719 xor $acc03,$acc03,$acc11 720 rotlwi $acc14,$s2,16 721 722 xor $s0,$s0,$acc00 # r0^r2 723 rotlwi $acc15,$s3,16 724 xor $s1,$s1,$acc01 725 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24) 726 xor $s2,$s2,$acc02 727 rotrwi $s1,$s1,24 728 xor $s3,$s3,$acc03 729 rotrwi $s2,$s2,24 730 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2 731 rotrwi $s3,$s3,24 732 xor $s1,$s1,$acc01 733 xor $s2,$s2,$acc02 734 xor $s3,$s3,$acc03 735 rotlwi $acc08,$acc12,8 # ROTATE(r0,24) 736 xor $s0,$s0,$acc12 # 737 rotlwi $acc09,$acc13,8 738 xor $s1,$s1,$acc13 739 rotlwi $acc10,$acc14,8 740 xor $s2,$s2,$acc14 741 rotlwi $acc11,$acc15,8 742 xor $s3,$s3,$acc15 743 xor $s0,$s0,$acc08 # 744 xor $s1,$s1,$acc09 745 xor $s2,$s2,$acc10 746 xor $s3,$s3,$acc11 747 748 b Lenc_compact_loop 749.align 4 750Lenc_compact_done: 751 xor $s0,$s0,$t0 752 xor $s1,$s1,$t1 753 xor $s2,$s2,$t2 754 xor $s3,$s3,$t3 755 blr 756 757.globl .AES_decrypt 758.align 7 759.AES_decrypt: 760 $STU $sp,-$FRAME($sp) 761 mflr r0 762 763 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) 764 $PUSH r13,`$FRAME-$SIZE_T*19`($sp) 765 $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 766 $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 767 $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 768 $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 769 $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 770 $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 771 $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 772 $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 773 $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 774 $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 775 $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 776 $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 777 $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 778 $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 779 $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 780 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 781 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 782 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 783 $PUSH r0,`$FRAME+$LRSAVE`($sp) 784 785 andi. $t0,$inp,3 786 andi. $t1,$out,3 787 or. $t0,$t0,$t1 788 bne Ldec_unaligned 789 790Ldec_unaligned_ok: 791 lwz $s0,0($inp) 792 lwz $s1,4($inp) 793 lwz $s2,8($inp) 794 lwz $s3,12($inp) 795 bl LAES_Td 796 bl Lppc_AES_decrypt_compact 797 stw $s0,0($out) 798 stw $s1,4($out) 799 stw $s2,8($out) 800 stw $s3,12($out) 801 b Ldec_done 802 803Ldec_unaligned: 804 subfic $t0,$inp,4096 805 subfic $t1,$out,4096 806 andi. $t0,$t0,4096-16 807 beq Ldec_xpage 808 andi. $t1,$t1,4096-16 809 bne Ldec_unaligned_ok 810 811Ldec_xpage: 812 lbz $acc00,0($inp) 813 lbz $acc01,1($inp) 814 lbz $acc02,2($inp) 815 lbz $s0,3($inp) 816 lbz $acc04,4($inp) 817 lbz $acc05,5($inp) 818 lbz $acc06,6($inp) 819 lbz $s1,7($inp) 820 lbz $acc08,8($inp) 821 lbz $acc09,9($inp) 822 lbz $acc10,10($inp) 823 insrwi $s0,$acc00,8,0 824 lbz $s2,11($inp) 825 insrwi $s1,$acc04,8,0 826 lbz $acc12,12($inp) 827 insrwi $s0,$acc01,8,8 828 lbz $acc13,13($inp) 829 insrwi $s1,$acc05,8,8 830 lbz $acc14,14($inp) 831 insrwi $s0,$acc02,8,16 832 lbz $s3,15($inp) 833 insrwi $s1,$acc06,8,16 834 insrwi $s2,$acc08,8,0 835 insrwi $s3,$acc12,8,0 836 insrwi $s2,$acc09,8,8 837 insrwi $s3,$acc13,8,8 838 insrwi $s2,$acc10,8,16 839 insrwi $s3,$acc14,8,16 840 841 bl LAES_Td 842 bl Lppc_AES_decrypt_compact 843 844 extrwi $acc00,$s0,8,0 845 extrwi $acc01,$s0,8,8 846 stb $acc00,0($out) 847 extrwi $acc02,$s0,8,16 848 stb $acc01,1($out) 849 stb $acc02,2($out) 850 extrwi $acc04,$s1,8,0 851 stb $s0,3($out) 852 extrwi $acc05,$s1,8,8 853 stb $acc04,4($out) 854 extrwi $acc06,$s1,8,16 855 stb $acc05,5($out) 856 stb $acc06,6($out) 857 extrwi $acc08,$s2,8,0 858 stb $s1,7($out) 859 extrwi $acc09,$s2,8,8 860 stb $acc08,8($out) 861 extrwi $acc10,$s2,8,16 862 stb $acc09,9($out) 863 stb $acc10,10($out) 864 extrwi $acc12,$s3,8,0 865 stb $s2,11($out) 866 extrwi $acc13,$s3,8,8 867 stb $acc12,12($out) 868 extrwi $acc14,$s3,8,16 869 stb $acc13,13($out) 870 stb $acc14,14($out) 871 stb $s3,15($out) 872 873Ldec_done: 874 $POP r0,`$FRAME+$LRSAVE`($sp) 875 $POP $toc,`$FRAME-$SIZE_T*20`($sp) 876 $POP r13,`$FRAME-$SIZE_T*19`($sp) 877 $POP r14,`$FRAME-$SIZE_T*18`($sp) 878 $POP r15,`$FRAME-$SIZE_T*17`($sp) 879 $POP r16,`$FRAME-$SIZE_T*16`($sp) 880 $POP r17,`$FRAME-$SIZE_T*15`($sp) 881 $POP r18,`$FRAME-$SIZE_T*14`($sp) 882 $POP r19,`$FRAME-$SIZE_T*13`($sp) 883 $POP r20,`$FRAME-$SIZE_T*12`($sp) 884 $POP r21,`$FRAME-$SIZE_T*11`($sp) 885 $POP r22,`$FRAME-$SIZE_T*10`($sp) 886 $POP r23,`$FRAME-$SIZE_T*9`($sp) 887 $POP r24,`$FRAME-$SIZE_T*8`($sp) 888 $POP r25,`$FRAME-$SIZE_T*7`($sp) 889 $POP r26,`$FRAME-$SIZE_T*6`($sp) 890 $POP r27,`$FRAME-$SIZE_T*5`($sp) 891 $POP r28,`$FRAME-$SIZE_T*4`($sp) 892 $POP r29,`$FRAME-$SIZE_T*3`($sp) 893 $POP r30,`$FRAME-$SIZE_T*2`($sp) 894 $POP r31,`$FRAME-$SIZE_T*1`($sp) 895 mtlr r0 896 addi $sp,$sp,$FRAME 897 blr 898 899.align 5 900Lppc_AES_decrypt: 901 lwz $acc00,240($key) 902 addi $Tbl1,$Tbl0,3 903 lwz $t0,0($key) 904 addi $Tbl2,$Tbl0,2 905 lwz $t1,4($key) 906 addi $Tbl3,$Tbl0,1 907 lwz $t2,8($key) 908 addi $acc00,$acc00,-1 909 lwz $t3,12($key) 910 addi $key,$key,16 911 xor $s0,$s0,$t0 912 xor $s1,$s1,$t1 913 xor $s2,$s2,$t2 914 xor $s3,$s3,$t3 915 mtctr $acc00 916.align 4 917Ldec_loop: 918 rlwinm $acc00,$s0,`32-24+3`,21,28 919 rlwinm $acc01,$s1,`32-24+3`,21,28 920 rlwinm $acc02,$s2,`32-24+3`,21,28 921 rlwinm $acc03,$s3,`32-24+3`,21,28 922 lwz $t0,0($key) 923 rlwinm $acc04,$s3,`32-16+3`,21,28 924 lwz $t1,4($key) 925 rlwinm $acc05,$s0,`32-16+3`,21,28 926 lwz $t2,8($key) 927 rlwinm $acc06,$s1,`32-16+3`,21,28 928 lwz $t3,12($key) 929 rlwinm $acc07,$s2,`32-16+3`,21,28 930 lwzx $acc00,$Tbl0,$acc00 931 rlwinm $acc08,$s2,`32-8+3`,21,28 932 lwzx $acc01,$Tbl0,$acc01 933 rlwinm $acc09,$s3,`32-8+3`,21,28 934 lwzx $acc02,$Tbl0,$acc02 935 rlwinm $acc10,$s0,`32-8+3`,21,28 936 lwzx $acc03,$Tbl0,$acc03 937 rlwinm $acc11,$s1,`32-8+3`,21,28 938 lwzx $acc04,$Tbl1,$acc04 939 rlwinm $acc12,$s1,`0+3`,21,28 940 lwzx $acc05,$Tbl1,$acc05 941 rlwinm $acc13,$s2,`0+3`,21,28 942 lwzx $acc06,$Tbl1,$acc06 943 rlwinm $acc14,$s3,`0+3`,21,28 944 lwzx $acc07,$Tbl1,$acc07 945 rlwinm $acc15,$s0,`0+3`,21,28 946 lwzx $acc08,$Tbl2,$acc08 947 xor $t0,$t0,$acc00 948 lwzx $acc09,$Tbl2,$acc09 949 xor $t1,$t1,$acc01 950 lwzx $acc10,$Tbl2,$acc10 951 xor $t2,$t2,$acc02 952 lwzx $acc11,$Tbl2,$acc11 953 xor $t3,$t3,$acc03 954 lwzx $acc12,$Tbl3,$acc12 955 xor $t0,$t0,$acc04 956 lwzx $acc13,$Tbl3,$acc13 957 xor $t1,$t1,$acc05 958 lwzx $acc14,$Tbl3,$acc14 959 xor $t2,$t2,$acc06 960 lwzx $acc15,$Tbl3,$acc15 961 xor $t3,$t3,$acc07 962 xor $t0,$t0,$acc08 963 xor $t1,$t1,$acc09 964 xor $t2,$t2,$acc10 965 xor $t3,$t3,$acc11 966 xor $s0,$t0,$acc12 967 xor $s1,$t1,$acc13 968 xor $s2,$t2,$acc14 969 xor $s3,$t3,$acc15 970 addi $key,$key,16 971 bdnz- Ldec_loop 972 973 addi $Tbl2,$Tbl0,2048 974 nop 975 lwz $t0,0($key) 976 rlwinm $acc00,$s0,`32-24`,24,31 977 lwz $t1,4($key) 978 rlwinm $acc01,$s1,`32-24`,24,31 979 lwz $t2,8($key) 980 rlwinm $acc02,$s2,`32-24`,24,31 981 lwz $t3,12($key) 982 rlwinm $acc03,$s3,`32-24`,24,31 983 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4 984 rlwinm $acc04,$s3,`32-16`,24,31 985 lwz $acc09,`2048+32`($Tbl0) 986 rlwinm $acc05,$s0,`32-16`,24,31 987 lwz $acc10,`2048+64`($Tbl0) 988 lbzx $acc00,$Tbl2,$acc00 989 lwz $acc11,`2048+96`($Tbl0) 990 lbzx $acc01,$Tbl2,$acc01 991 lwz $acc12,`2048+128`($Tbl0) 992 rlwinm $acc06,$s1,`32-16`,24,31 993 lwz $acc13,`2048+160`($Tbl0) 994 rlwinm $acc07,$s2,`32-16`,24,31 995 lwz $acc14,`2048+192`($Tbl0) 996 rlwinm $acc08,$s2,`32-8`,24,31 997 lwz $acc15,`2048+224`($Tbl0) 998 rlwinm $acc09,$s3,`32-8`,24,31 999 lbzx $acc02,$Tbl2,$acc02 1000 rlwinm $acc10,$s0,`32-8`,24,31 1001 lbzx $acc03,$Tbl2,$acc03 1002 rlwinm $acc11,$s1,`32-8`,24,31 1003 lbzx $acc04,$Tbl2,$acc04 1004 rlwinm $acc12,$s1,`0`,24,31 1005 lbzx $acc05,$Tbl2,$acc05 1006 rlwinm $acc13,$s2,`0`,24,31 1007 lbzx $acc06,$Tbl2,$acc06 1008 rlwinm $acc14,$s3,`0`,24,31 1009 lbzx $acc07,$Tbl2,$acc07 1010 rlwinm $acc15,$s0,`0`,24,31 1011 lbzx $acc08,$Tbl2,$acc08 1012 rlwinm $s0,$acc00,24,0,7 1013 lbzx $acc09,$Tbl2,$acc09 1014 rlwinm $s1,$acc01,24,0,7 1015 lbzx $acc10,$Tbl2,$acc10 1016 rlwinm $s2,$acc02,24,0,7 1017 lbzx $acc11,$Tbl2,$acc11 1018 rlwinm $s3,$acc03,24,0,7 1019 lbzx $acc12,$Tbl2,$acc12 1020 rlwimi $s0,$acc04,16,8,15 1021 lbzx $acc13,$Tbl2,$acc13 1022 rlwimi $s1,$acc05,16,8,15 1023 lbzx $acc14,$Tbl2,$acc14 1024 rlwimi $s2,$acc06,16,8,15 1025 lbzx $acc15,$Tbl2,$acc15 1026 rlwimi $s3,$acc07,16,8,15 1027 rlwimi $s0,$acc08,8,16,23 1028 rlwimi $s1,$acc09,8,16,23 1029 rlwimi $s2,$acc10,8,16,23 1030 rlwimi $s3,$acc11,8,16,23 1031 or $s0,$s0,$acc12 1032 or $s1,$s1,$acc13 1033 or $s2,$s2,$acc14 1034 or $s3,$s3,$acc15 1035 xor $s0,$s0,$t0 1036 xor $s1,$s1,$t1 1037 xor $s2,$s2,$t2 1038 xor $s3,$s3,$t3 1039 blr 1040 1041.align 4 1042Lppc_AES_decrypt_compact: 1043 lwz $acc00,240($key) 1044 addi $Tbl1,$Tbl0,2048 1045 lwz $t0,0($key) 1046 lis $mask80,0x8080 1047 lwz $t1,4($key) 1048 lis $mask1b,0x1b1b 1049 lwz $t2,8($key) 1050 ori $mask80,$mask80,0x8080 1051 lwz $t3,12($key) 1052 ori $mask1b,$mask1b,0x1b1b 1053 addi $key,$key,16 1054___ 1055$code.=<<___ if ($SIZE_T==8); 1056 insrdi $mask80,$mask80,32,0 1057 insrdi $mask1b,$mask1b,32,0 1058___ 1059$code.=<<___; 1060 mtctr $acc00 1061.align 4 1062Ldec_compact_loop: 1063 xor $s0,$s0,$t0 1064 xor $s1,$s1,$t1 1065 rlwinm $acc00,$s0,`32-24`,24,31 1066 xor $s2,$s2,$t2 1067 rlwinm $acc01,$s1,`32-24`,24,31 1068 xor $s3,$s3,$t3 1069 rlwinm $acc02,$s2,`32-24`,24,31 1070 rlwinm $acc03,$s3,`32-24`,24,31 1071 rlwinm $acc04,$s3,`32-16`,24,31 1072 rlwinm $acc05,$s0,`32-16`,24,31 1073 rlwinm $acc06,$s1,`32-16`,24,31 1074 rlwinm $acc07,$s2,`32-16`,24,31 1075 lbzx $acc00,$Tbl1,$acc00 1076 rlwinm $acc08,$s2,`32-8`,24,31 1077 lbzx $acc01,$Tbl1,$acc01 1078 rlwinm $acc09,$s3,`32-8`,24,31 1079 lbzx $acc02,$Tbl1,$acc02 1080 rlwinm $acc10,$s0,`32-8`,24,31 1081 lbzx $acc03,$Tbl1,$acc03 1082 rlwinm $acc11,$s1,`32-8`,24,31 1083 lbzx $acc04,$Tbl1,$acc04 1084 rlwinm $acc12,$s1,`0`,24,31 1085 lbzx $acc05,$Tbl1,$acc05 1086 rlwinm $acc13,$s2,`0`,24,31 1087 lbzx $acc06,$Tbl1,$acc06 1088 rlwinm $acc14,$s3,`0`,24,31 1089 lbzx $acc07,$Tbl1,$acc07 1090 rlwinm $acc15,$s0,`0`,24,31 1091 lbzx $acc08,$Tbl1,$acc08 1092 rlwinm $s0,$acc00,24,0,7 1093 lbzx $acc09,$Tbl1,$acc09 1094 rlwinm $s1,$acc01,24,0,7 1095 lbzx $acc10,$Tbl1,$acc10 1096 rlwinm $s2,$acc02,24,0,7 1097 lbzx $acc11,$Tbl1,$acc11 1098 rlwinm $s3,$acc03,24,0,7 1099 lbzx $acc12,$Tbl1,$acc12 1100 rlwimi $s0,$acc04,16,8,15 1101 lbzx $acc13,$Tbl1,$acc13 1102 rlwimi $s1,$acc05,16,8,15 1103 lbzx $acc14,$Tbl1,$acc14 1104 rlwimi $s2,$acc06,16,8,15 1105 lbzx $acc15,$Tbl1,$acc15 1106 rlwimi $s3,$acc07,16,8,15 1107 rlwimi $s0,$acc08,8,16,23 1108 rlwimi $s1,$acc09,8,16,23 1109 rlwimi $s2,$acc10,8,16,23 1110 rlwimi $s3,$acc11,8,16,23 1111 lwz $t0,0($key) 1112 or $s0,$s0,$acc12 1113 lwz $t1,4($key) 1114 or $s1,$s1,$acc13 1115 lwz $t2,8($key) 1116 or $s2,$s2,$acc14 1117 lwz $t3,12($key) 1118 or $s3,$s3,$acc15 1119 1120 addi $key,$key,16 1121 bdz Ldec_compact_done 1122___ 1123$code.=<<___ if ($SIZE_T==8); 1124 # vectorized permutation improves decrypt performance by 10% 1125 insrdi $s0,$s1,32,0 1126 insrdi $s2,$s3,32,0 1127 1128 and $acc00,$s0,$mask80 # r1=r0&0x80808080 1129 and $acc02,$s2,$mask80 1130 srdi $acc04,$acc00,7 # r1>>7 1131 srdi $acc06,$acc02,7 1132 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 1133 andc $acc10,$s2,$mask80 1134 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 1135 sub $acc02,$acc02,$acc06 1136 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 1137 add $acc10,$acc10,$acc10 1138 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1139 and $acc02,$acc02,$mask1b 1140 xor $acc00,$acc00,$acc08 # r2 1141 xor $acc02,$acc02,$acc10 1142 1143 and $acc04,$acc00,$mask80 # r1=r2&0x80808080 1144 and $acc06,$acc02,$mask80 1145 srdi $acc08,$acc04,7 # r1>>7 1146 srdi $acc10,$acc06,7 1147 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f 1148 andc $acc14,$acc02,$mask80 1149 sub $acc04,$acc04,$acc08 # r1-(r1>>7) 1150 sub $acc06,$acc06,$acc10 1151 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 1152 add $acc14,$acc14,$acc14 1153 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1154 and $acc06,$acc06,$mask1b 1155 xor $acc04,$acc04,$acc12 # r4 1156 xor $acc06,$acc06,$acc14 1157 1158 and $acc08,$acc04,$mask80 # r1=r4&0x80808080 1159 and $acc10,$acc06,$mask80 1160 srdi $acc12,$acc08,7 # r1>>7 1161 srdi $acc14,$acc10,7 1162 sub $acc08,$acc08,$acc12 # r1-(r1>>7) 1163 sub $acc10,$acc10,$acc14 1164 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f 1165 andc $acc14,$acc06,$mask80 1166 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 1167 add $acc14,$acc14,$acc14 1168 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1169 and $acc10,$acc10,$mask1b 1170 xor $acc08,$acc08,$acc12 # r8 1171 xor $acc10,$acc10,$acc14 1172 1173 xor $acc00,$acc00,$s0 # r2^r0 1174 xor $acc02,$acc02,$s2 1175 xor $acc04,$acc04,$s0 # r4^r0 1176 xor $acc06,$acc06,$s2 1177 1178 extrdi $acc01,$acc00,32,0 1179 extrdi $acc03,$acc02,32,0 1180 extrdi $acc05,$acc04,32,0 1181 extrdi $acc07,$acc06,32,0 1182 extrdi $acc09,$acc08,32,0 1183 extrdi $acc11,$acc10,32,0 1184___ 1185$code.=<<___ if ($SIZE_T==4); 1186 and $acc00,$s0,$mask80 # r1=r0&0x80808080 1187 and $acc01,$s1,$mask80 1188 and $acc02,$s2,$mask80 1189 and $acc03,$s3,$mask80 1190 srwi $acc04,$acc00,7 # r1>>7 1191 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 1192 srwi $acc05,$acc01,7 1193 andc $acc09,$s1,$mask80 1194 srwi $acc06,$acc02,7 1195 andc $acc10,$s2,$mask80 1196 srwi $acc07,$acc03,7 1197 andc $acc11,$s3,$mask80 1198 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 1199 sub $acc01,$acc01,$acc05 1200 sub $acc02,$acc02,$acc06 1201 sub $acc03,$acc03,$acc07 1202 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 1203 add $acc09,$acc09,$acc09 1204 add $acc10,$acc10,$acc10 1205 add $acc11,$acc11,$acc11 1206 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1207 and $acc01,$acc01,$mask1b 1208 and $acc02,$acc02,$mask1b 1209 and $acc03,$acc03,$mask1b 1210 xor $acc00,$acc00,$acc08 # r2 1211 xor $acc01,$acc01,$acc09 1212 xor $acc02,$acc02,$acc10 1213 xor $acc03,$acc03,$acc11 1214 1215 and $acc04,$acc00,$mask80 # r1=r2&0x80808080 1216 and $acc05,$acc01,$mask80 1217 and $acc06,$acc02,$mask80 1218 and $acc07,$acc03,$mask80 1219 srwi $acc08,$acc04,7 # r1>>7 1220 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f 1221 srwi $acc09,$acc05,7 1222 andc $acc13,$acc01,$mask80 1223 srwi $acc10,$acc06,7 1224 andc $acc14,$acc02,$mask80 1225 srwi $acc11,$acc07,7 1226 andc $acc15,$acc03,$mask80 1227 sub $acc04,$acc04,$acc08 # r1-(r1>>7) 1228 sub $acc05,$acc05,$acc09 1229 sub $acc06,$acc06,$acc10 1230 sub $acc07,$acc07,$acc11 1231 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 1232 add $acc13,$acc13,$acc13 1233 add $acc14,$acc14,$acc14 1234 add $acc15,$acc15,$acc15 1235 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1236 and $acc05,$acc05,$mask1b 1237 and $acc06,$acc06,$mask1b 1238 and $acc07,$acc07,$mask1b 1239 xor $acc04,$acc04,$acc12 # r4 1240 xor $acc05,$acc05,$acc13 1241 xor $acc06,$acc06,$acc14 1242 xor $acc07,$acc07,$acc15 1243 1244 and $acc08,$acc04,$mask80 # r1=r4&0x80808080 1245 and $acc09,$acc05,$mask80 1246 srwi $acc12,$acc08,7 # r1>>7 1247 and $acc10,$acc06,$mask80 1248 srwi $acc13,$acc09,7 1249 and $acc11,$acc07,$mask80 1250 srwi $acc14,$acc10,7 1251 sub $acc08,$acc08,$acc12 # r1-(r1>>7) 1252 srwi $acc15,$acc11,7 1253 sub $acc09,$acc09,$acc13 1254 sub $acc10,$acc10,$acc14 1255 sub $acc11,$acc11,$acc15 1256 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f 1257 andc $acc13,$acc05,$mask80 1258 andc $acc14,$acc06,$mask80 1259 andc $acc15,$acc07,$mask80 1260 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 1261 add $acc13,$acc13,$acc13 1262 add $acc14,$acc14,$acc14 1263 add $acc15,$acc15,$acc15 1264 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1265 and $acc09,$acc09,$mask1b 1266 and $acc10,$acc10,$mask1b 1267 and $acc11,$acc11,$mask1b 1268 xor $acc08,$acc08,$acc12 # r8 1269 xor $acc09,$acc09,$acc13 1270 xor $acc10,$acc10,$acc14 1271 xor $acc11,$acc11,$acc15 1272 1273 xor $acc00,$acc00,$s0 # r2^r0 1274 xor $acc01,$acc01,$s1 1275 xor $acc02,$acc02,$s2 1276 xor $acc03,$acc03,$s3 1277 xor $acc04,$acc04,$s0 # r4^r0 1278 xor $acc05,$acc05,$s1 1279 xor $acc06,$acc06,$s2 1280 xor $acc07,$acc07,$s3 1281___ 1282$code.=<<___; 1283 rotrwi $s0,$s0,8 # = ROTATE(r0,8) 1284 rotrwi $s1,$s1,8 1285 xor $s0,$s0,$acc00 # ^= r2^r0 1286 rotrwi $s2,$s2,8 1287 xor $s1,$s1,$acc01 1288 rotrwi $s3,$s3,8 1289 xor $s2,$s2,$acc02 1290 xor $s3,$s3,$acc03 1291 xor $acc00,$acc00,$acc08 1292 xor $acc01,$acc01,$acc09 1293 xor $acc02,$acc02,$acc10 1294 xor $acc03,$acc03,$acc11 1295 xor $s0,$s0,$acc04 # ^= r4^r0 1296 rotrwi $acc00,$acc00,24 1297 xor $s1,$s1,$acc05 1298 rotrwi $acc01,$acc01,24 1299 xor $s2,$s2,$acc06 1300 rotrwi $acc02,$acc02,24 1301 xor $s3,$s3,$acc07 1302 rotrwi $acc03,$acc03,24 1303 xor $acc04,$acc04,$acc08 1304 xor $acc05,$acc05,$acc09 1305 xor $acc06,$acc06,$acc10 1306 xor $acc07,$acc07,$acc11 1307 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)] 1308 rotrwi $acc04,$acc04,16 1309 xor $s1,$s1,$acc09 1310 rotrwi $acc05,$acc05,16 1311 xor $s2,$s2,$acc10 1312 rotrwi $acc06,$acc06,16 1313 xor $s3,$s3,$acc11 1314 rotrwi $acc07,$acc07,16 1315 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24) 1316 rotrwi $acc08,$acc08,8 1317 xor $s1,$s1,$acc01 1318 rotrwi $acc09,$acc09,8 1319 xor $s2,$s2,$acc02 1320 rotrwi $acc10,$acc10,8 1321 xor $s3,$s3,$acc03 1322 rotrwi $acc11,$acc11,8 1323 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16) 1324 xor $s1,$s1,$acc05 1325 xor $s2,$s2,$acc06 1326 xor $s3,$s3,$acc07 1327 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8) 1328 xor $s1,$s1,$acc09 1329 xor $s2,$s2,$acc10 1330 xor $s3,$s3,$acc11 1331 1332 b Ldec_compact_loop 1333.align 4 1334Ldec_compact_done: 1335 xor $s0,$s0,$t0 1336 xor $s1,$s1,$t1 1337 xor $s2,$s2,$t2 1338 xor $s3,$s3,$t3 1339 blr 1340___ 1341 1342$code =~ s/\`([^\`]*)\`/eval $1/gem; 1343print $code; 1344close STDOUT; 1345