1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# Needs more work: key setup, CBC routine...
11#
12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13# 128-bit key, which is ~40% better than 64-bit code generated by gcc
14# 4.0. But these are not the ones currently used! Their "compact"
15# counterparts are, for security reason. ppc_AES_encrypt_compact runs
16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17# at 1/3 of ppc_AES_decrypt.
18
19# February 2010
20#
21# Rescheduling instructions to favour Power6 pipeline gave 10%
22# performance improvement on the platform in question (and marginal
23# improvement even on others). It should be noted that Power6 fails
24# to process byte in 18 cycles, only in 23, because it fails to issue
25# 4 load instructions in two cycles, only in 3. As result non-compact
26# block subroutines are 25% slower than one would expect. Compact
27# functions scale better, because they have pure computational part,
28# which scales perfectly with clock frequency. To be specific
29# ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30# ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
32$flavour = shift;
33
34if ($flavour =~ /64/) {
35	$SIZE_T	=8;
36	$LRSAVE	=2*$SIZE_T;
37	$STU	="stdu";
38	$POP	="ld";
39	$PUSH	="std";
40} elsif ($flavour =~ /32/) {
41	$SIZE_T	=4;
42	$LRSAVE	=$SIZE_T;
43	$STU	="stwu";
44	$POP	="lwz";
45	$PUSH	="stw";
46} else { die "nonsense $flavour"; }
47
48$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
49( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
50( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
51die "can't locate ppc-xlate.pl";
52
53open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
54
55$FRAME=32*$SIZE_T;
56
57sub _data_word()
58{ my $i;
59    while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
60}
61
62$sp="r1";
63$toc="r2";
64$inp="r3";
65$out="r4";
66$key="r5";
67
68$Tbl0="r3";
69$Tbl1="r6";
70$Tbl2="r7";
71$Tbl3="r2";
72
73$s0="r8";
74$s1="r9";
75$s2="r10";
76$s3="r11";
77
78$t0="r12";
79$t1="r13";
80$t2="r14";
81$t3="r15";
82
83$acc00="r16";
84$acc01="r17";
85$acc02="r18";
86$acc03="r19";
87
88$acc04="r20";
89$acc05="r21";
90$acc06="r22";
91$acc07="r23";
92
93$acc08="r24";
94$acc09="r25";
95$acc10="r26";
96$acc11="r27";
97
98$acc12="r28";
99$acc13="r29";
100$acc14="r30";
101$acc15="r31";
102
103# stay away from TLS pointer
104if ($SIZE_T==8)	{ die if ($t1 ne "r13");  $t1="r0";		}
105else		{ die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0";	}
106$mask80=$Tbl2;
107$mask1b=$Tbl3;
108
109$code.=<<___;
110.machine	"any"
111.text
112
113.align	7
114LAES_Te:
115	mflr	r0
116	bcl	20,31,\$+4
117	mflr	$Tbl0	;    vvvvv "distance" between . and 1st data entry
118	addi	$Tbl0,$Tbl0,`128-8`
119	mtlr	r0
120	blr
121	.space	`64-12*4`
122LAES_Td:
123	mflr	r0
124	bcl	20,31,\$+4
125	mflr	$Tbl0	;    vvvvvvvv "distance" between . and 1st data entry
126	addi	$Tbl0,$Tbl0,`128-64-8+2048+256`
127	mtlr	r0
128	blr
129	.space	`128-64-12*4`
130___
131&_data_word(
132	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
133	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
134	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
135	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
136	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
137	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
138	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
139	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
140	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
141	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
142	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
143	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
144	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
145	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
146	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
147	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
148	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
149	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
150	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
151	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
152	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
153	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
154	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
155	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
156	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
157	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
158	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
159	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
160	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
161	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
162	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
163	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
164	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
165	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
166	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
167	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
168	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
169	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
170	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
171	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
172	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
173	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
174	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
175	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
176	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
177	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
178	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
179	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
180	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
181	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
182	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
183	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
184	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
185	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
186	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
187	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
188	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
189	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
190	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
191	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
192	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
193	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
194	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
195	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
196$code.=<<___;
197.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
198.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
199.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
200.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
201.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
202.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
203.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
204.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
205.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
206.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
207.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
208.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
209.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
210.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
211.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
212.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
213.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
214.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
215.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
216.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
217.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
218.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
219.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
220.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
221.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
222.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
223.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
224.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
225.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
226.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
227.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
228.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
229___
230&_data_word(
231	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
232	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
233	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
234	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
235	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
236	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
237	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
238	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
239	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
240	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
241	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
242	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
243	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
244	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
245	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
246	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
247	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
248	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
249	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
250	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
251	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
252	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
253	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
254	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
255	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
256	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
257	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
258	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
259	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
260	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
261	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
262	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
263	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
264	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
265	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
266	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
267	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
268	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
269	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
270	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
271	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
272	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
273	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
274	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
275	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
276	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
277	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
278	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
279	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
280	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
281	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
282	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
283	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
284	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
285	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
286	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
287	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
288	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
289	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
290	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
291	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
292	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
293	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
294	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
295$code.=<<___;
296.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
297.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
298.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
299.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
300.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
301.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
302.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
303.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
304.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
305.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
306.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
307.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
308.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
309.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
310.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
311.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
312.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
313.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
314.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
315.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
316.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
317.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
318.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
319.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
320.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
321.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
322.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
323.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
324.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
325.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
326.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
327.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
328
329
330.globl	.aes_encrypt_internal
331.align	7
332.aes_encrypt_internal:
333	$STU	$sp,-$FRAME($sp)
334	mflr	r0
335
336	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
337	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
338	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
339	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
340	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
341	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
342	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
343	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
344	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
345	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
346	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
347	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
348	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
349	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
350	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
351	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
352	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
353	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
354	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
355	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
356	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
357
358	andi.	$t0,$inp,3
359	andi.	$t1,$out,3
360	or.	$t0,$t0,$t1
361	bne	Lenc_unaligned
362
363Lenc_unaligned_ok:
364	lwz	$s0,0($inp)
365	lwz	$s1,4($inp)
366	lwz	$s2,8($inp)
367	lwz	$s3,12($inp)
368	bl	LAES_Te
369	bl	Lppc_AES_encrypt_compact
370	stw	$s0,0($out)
371	stw	$s1,4($out)
372	stw	$s2,8($out)
373	stw	$s3,12($out)
374	b	Lenc_done
375
376Lenc_unaligned:
377	subfic	$t0,$inp,4096
378	subfic	$t1,$out,4096
379	andi.	$t0,$t0,4096-16
380	beq	Lenc_xpage
381	andi.	$t1,$t1,4096-16
382	bne	Lenc_unaligned_ok
383
384Lenc_xpage:
385	lbz	$acc00,0($inp)
386	lbz	$acc01,1($inp)
387	lbz	$acc02,2($inp)
388	lbz	$s0,3($inp)
389	lbz	$acc04,4($inp)
390	lbz	$acc05,5($inp)
391	lbz	$acc06,6($inp)
392	lbz	$s1,7($inp)
393	lbz	$acc08,8($inp)
394	lbz	$acc09,9($inp)
395	lbz	$acc10,10($inp)
396	insrwi	$s0,$acc00,8,0
397	lbz	$s2,11($inp)
398	insrwi	$s1,$acc04,8,0
399	lbz	$acc12,12($inp)
400	insrwi	$s0,$acc01,8,8
401	lbz	$acc13,13($inp)
402	insrwi	$s1,$acc05,8,8
403	lbz	$acc14,14($inp)
404	insrwi	$s0,$acc02,8,16
405	lbz	$s3,15($inp)
406	insrwi	$s1,$acc06,8,16
407	insrwi	$s2,$acc08,8,0
408	insrwi	$s3,$acc12,8,0
409	insrwi	$s2,$acc09,8,8
410	insrwi	$s3,$acc13,8,8
411	insrwi	$s2,$acc10,8,16
412	insrwi	$s3,$acc14,8,16
413
414	bl	LAES_Te
415	bl	Lppc_AES_encrypt_compact
416
417	extrwi	$acc00,$s0,8,0
418	extrwi	$acc01,$s0,8,8
419	stb	$acc00,0($out)
420	extrwi	$acc02,$s0,8,16
421	stb	$acc01,1($out)
422	stb	$acc02,2($out)
423	extrwi	$acc04,$s1,8,0
424	stb	$s0,3($out)
425	extrwi	$acc05,$s1,8,8
426	stb	$acc04,4($out)
427	extrwi	$acc06,$s1,8,16
428	stb	$acc05,5($out)
429	stb	$acc06,6($out)
430	extrwi	$acc08,$s2,8,0
431	stb	$s1,7($out)
432	extrwi	$acc09,$s2,8,8
433	stb	$acc08,8($out)
434	extrwi	$acc10,$s2,8,16
435	stb	$acc09,9($out)
436	stb	$acc10,10($out)
437	extrwi	$acc12,$s3,8,0
438	stb	$s2,11($out)
439	extrwi	$acc13,$s3,8,8
440	stb	$acc12,12($out)
441	extrwi	$acc14,$s3,8,16
442	stb	$acc13,13($out)
443	stb	$acc14,14($out)
444	stb	$s3,15($out)
445
446Lenc_done:
447	$POP	r0,`$FRAME+$LRSAVE`($sp)
448	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
449	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
450	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
451	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
452	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
453	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
454	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
455	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
456	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
457	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
458	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
459	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
460	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
461	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
462	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
463	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
464	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
465	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
466	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
467	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
468	mtlr	r0
469	addi	$sp,$sp,$FRAME
470	blr
471
472.align	5
473Lppc_AES_encrypt:
474	lwz	$acc00,240($key)
475	addi	$Tbl1,$Tbl0,3
476	lwz	$t0,0($key)
477	addi	$Tbl2,$Tbl0,2
478	lwz	$t1,4($key)
479	addi	$Tbl3,$Tbl0,1
480	lwz	$t2,8($key)
481	addi	$acc00,$acc00,-1
482	lwz	$t3,12($key)
483	addi	$key,$key,16
484	xor	$s0,$s0,$t0
485	xor	$s1,$s1,$t1
486	xor	$s2,$s2,$t2
487	xor	$s3,$s3,$t3
488	mtctr	$acc00
489.align	4
490Lenc_loop:
491	rlwinm	$acc00,$s0,`32-24+3`,21,28
492	rlwinm	$acc01,$s1,`32-24+3`,21,28
493	rlwinm	$acc02,$s2,`32-24+3`,21,28
494	rlwinm	$acc03,$s3,`32-24+3`,21,28
495	lwz	$t0,0($key)
496	rlwinm	$acc04,$s1,`32-16+3`,21,28
497	lwz	$t1,4($key)
498	rlwinm	$acc05,$s2,`32-16+3`,21,28
499	lwz	$t2,8($key)
500	rlwinm	$acc06,$s3,`32-16+3`,21,28
501	lwz	$t3,12($key)
502	rlwinm	$acc07,$s0,`32-16+3`,21,28
503	lwzx	$acc00,$Tbl0,$acc00
504	rlwinm	$acc08,$s2,`32-8+3`,21,28
505	lwzx	$acc01,$Tbl0,$acc01
506	rlwinm	$acc09,$s3,`32-8+3`,21,28
507	lwzx	$acc02,$Tbl0,$acc02
508	rlwinm	$acc10,$s0,`32-8+3`,21,28
509	lwzx	$acc03,$Tbl0,$acc03
510	rlwinm	$acc11,$s1,`32-8+3`,21,28
511	lwzx	$acc04,$Tbl1,$acc04
512	rlwinm	$acc12,$s3,`0+3`,21,28
513	lwzx	$acc05,$Tbl1,$acc05
514	rlwinm	$acc13,$s0,`0+3`,21,28
515	lwzx	$acc06,$Tbl1,$acc06
516	rlwinm	$acc14,$s1,`0+3`,21,28
517	lwzx	$acc07,$Tbl1,$acc07
518	rlwinm	$acc15,$s2,`0+3`,21,28
519	lwzx	$acc08,$Tbl2,$acc08
520	xor	$t0,$t0,$acc00
521	lwzx	$acc09,$Tbl2,$acc09
522	xor	$t1,$t1,$acc01
523	lwzx	$acc10,$Tbl2,$acc10
524	xor	$t2,$t2,$acc02
525	lwzx	$acc11,$Tbl2,$acc11
526	xor	$t3,$t3,$acc03
527	lwzx	$acc12,$Tbl3,$acc12
528	xor	$t0,$t0,$acc04
529	lwzx	$acc13,$Tbl3,$acc13
530	xor	$t1,$t1,$acc05
531	lwzx	$acc14,$Tbl3,$acc14
532	xor	$t2,$t2,$acc06
533	lwzx	$acc15,$Tbl3,$acc15
534	xor	$t3,$t3,$acc07
535	xor	$t0,$t0,$acc08
536	xor	$t1,$t1,$acc09
537	xor	$t2,$t2,$acc10
538	xor	$t3,$t3,$acc11
539	xor	$s0,$t0,$acc12
540	xor	$s1,$t1,$acc13
541	xor	$s2,$t2,$acc14
542	xor	$s3,$t3,$acc15
543	addi	$key,$key,16
544	bdnz-	Lenc_loop
545
546	addi	$Tbl2,$Tbl0,2048
547	nop
548	lwz	$t0,0($key)
549	rlwinm	$acc00,$s0,`32-24`,24,31
550	lwz	$t1,4($key)
551	rlwinm	$acc01,$s1,`32-24`,24,31
552	lwz	$t2,8($key)
553	rlwinm	$acc02,$s2,`32-24`,24,31
554	lwz	$t3,12($key)
555	rlwinm	$acc03,$s3,`32-24`,24,31
556	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Te4
557	rlwinm	$acc04,$s1,`32-16`,24,31
558	lwz	$acc09,`2048+32`($Tbl0)
559	rlwinm	$acc05,$s2,`32-16`,24,31
560	lwz	$acc10,`2048+64`($Tbl0)
561	rlwinm	$acc06,$s3,`32-16`,24,31
562	lwz	$acc11,`2048+96`($Tbl0)
563	rlwinm	$acc07,$s0,`32-16`,24,31
564	lwz	$acc12,`2048+128`($Tbl0)
565	rlwinm	$acc08,$s2,`32-8`,24,31
566	lwz	$acc13,`2048+160`($Tbl0)
567	rlwinm	$acc09,$s3,`32-8`,24,31
568	lwz	$acc14,`2048+192`($Tbl0)
569	rlwinm	$acc10,$s0,`32-8`,24,31
570	lwz	$acc15,`2048+224`($Tbl0)
571	rlwinm	$acc11,$s1,`32-8`,24,31
572	lbzx	$acc00,$Tbl2,$acc00
573	rlwinm	$acc12,$s3,`0`,24,31
574	lbzx	$acc01,$Tbl2,$acc01
575	rlwinm	$acc13,$s0,`0`,24,31
576	lbzx	$acc02,$Tbl2,$acc02
577	rlwinm	$acc14,$s1,`0`,24,31
578	lbzx	$acc03,$Tbl2,$acc03
579	rlwinm	$acc15,$s2,`0`,24,31
580	lbzx	$acc04,$Tbl2,$acc04
581	rlwinm	$s0,$acc00,24,0,7
582	lbzx	$acc05,$Tbl2,$acc05
583	rlwinm	$s1,$acc01,24,0,7
584	lbzx	$acc06,$Tbl2,$acc06
585	rlwinm	$s2,$acc02,24,0,7
586	lbzx	$acc07,$Tbl2,$acc07
587	rlwinm	$s3,$acc03,24,0,7
588	lbzx	$acc08,$Tbl2,$acc08
589	rlwimi	$s0,$acc04,16,8,15
590	lbzx	$acc09,$Tbl2,$acc09
591	rlwimi	$s1,$acc05,16,8,15
592	lbzx	$acc10,$Tbl2,$acc10
593	rlwimi	$s2,$acc06,16,8,15
594	lbzx	$acc11,$Tbl2,$acc11
595	rlwimi	$s3,$acc07,16,8,15
596	lbzx	$acc12,$Tbl2,$acc12
597	rlwimi	$s0,$acc08,8,16,23
598	lbzx	$acc13,$Tbl2,$acc13
599	rlwimi	$s1,$acc09,8,16,23
600	lbzx	$acc14,$Tbl2,$acc14
601	rlwimi	$s2,$acc10,8,16,23
602	lbzx	$acc15,$Tbl2,$acc15
603	rlwimi	$s3,$acc11,8,16,23
604	or	$s0,$s0,$acc12
605	or	$s1,$s1,$acc13
606	or	$s2,$s2,$acc14
607	or	$s3,$s3,$acc15
608	xor	$s0,$s0,$t0
609	xor	$s1,$s1,$t1
610	xor	$s2,$s2,$t2
611	xor	$s3,$s3,$t3
612	blr
613
614.align	4
615Lppc_AES_encrypt_compact:
616	lwz	$acc00,240($key)
617	addi	$Tbl1,$Tbl0,2048
618	lwz	$t0,0($key)
619	lis	$mask80,0x8080
620	lwz	$t1,4($key)
621	lis	$mask1b,0x1b1b
622	lwz	$t2,8($key)
623	ori	$mask80,$mask80,0x8080
624	lwz	$t3,12($key)
625	ori	$mask1b,$mask1b,0x1b1b
626	addi	$key,$key,16
627	mtctr	$acc00
628.align	4
629Lenc_compact_loop:
630	xor	$s0,$s0,$t0
631	xor	$s1,$s1,$t1
632	rlwinm	$acc00,$s0,`32-24`,24,31
633	xor	$s2,$s2,$t2
634	rlwinm	$acc01,$s1,`32-24`,24,31
635	xor	$s3,$s3,$t3
636	rlwinm	$acc02,$s2,`32-24`,24,31
637	rlwinm	$acc03,$s3,`32-24`,24,31
638	rlwinm	$acc04,$s1,`32-16`,24,31
639	rlwinm	$acc05,$s2,`32-16`,24,31
640	rlwinm	$acc06,$s3,`32-16`,24,31
641	rlwinm	$acc07,$s0,`32-16`,24,31
642	lbzx	$acc00,$Tbl1,$acc00
643	rlwinm	$acc08,$s2,`32-8`,24,31
644	lbzx	$acc01,$Tbl1,$acc01
645	rlwinm	$acc09,$s3,`32-8`,24,31
646	lbzx	$acc02,$Tbl1,$acc02
647	rlwinm	$acc10,$s0,`32-8`,24,31
648	lbzx	$acc03,$Tbl1,$acc03
649	rlwinm	$acc11,$s1,`32-8`,24,31
650	lbzx	$acc04,$Tbl1,$acc04
651	rlwinm	$acc12,$s3,`0`,24,31
652	lbzx	$acc05,$Tbl1,$acc05
653	rlwinm	$acc13,$s0,`0`,24,31
654	lbzx	$acc06,$Tbl1,$acc06
655	rlwinm	$acc14,$s1,`0`,24,31
656	lbzx	$acc07,$Tbl1,$acc07
657	rlwinm	$acc15,$s2,`0`,24,31
658	lbzx	$acc08,$Tbl1,$acc08
659	rlwinm	$s0,$acc00,24,0,7
660	lbzx	$acc09,$Tbl1,$acc09
661	rlwinm	$s1,$acc01,24,0,7
662	lbzx	$acc10,$Tbl1,$acc10
663	rlwinm	$s2,$acc02,24,0,7
664	lbzx	$acc11,$Tbl1,$acc11
665	rlwinm	$s3,$acc03,24,0,7
666	lbzx	$acc12,$Tbl1,$acc12
667	rlwimi	$s0,$acc04,16,8,15
668	lbzx	$acc13,$Tbl1,$acc13
669	rlwimi	$s1,$acc05,16,8,15
670	lbzx	$acc14,$Tbl1,$acc14
671	rlwimi	$s2,$acc06,16,8,15
672	lbzx	$acc15,$Tbl1,$acc15
673	rlwimi	$s3,$acc07,16,8,15
674	rlwimi	$s0,$acc08,8,16,23
675	rlwimi	$s1,$acc09,8,16,23
676	rlwimi	$s2,$acc10,8,16,23
677	rlwimi	$s3,$acc11,8,16,23
678	lwz	$t0,0($key)
679	or	$s0,$s0,$acc12
680	lwz	$t1,4($key)
681	or	$s1,$s1,$acc13
682	lwz	$t2,8($key)
683	or	$s2,$s2,$acc14
684	lwz	$t3,12($key)
685	or	$s3,$s3,$acc15
686
687	addi	$key,$key,16
688	bdz	Lenc_compact_done
689
690	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
691	and	$acc01,$s1,$mask80
692	and	$acc02,$s2,$mask80
693	and	$acc03,$s3,$mask80
694	srwi	$acc04,$acc00,7		# r1>>7
695	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
696	srwi	$acc05,$acc01,7
697	andc	$acc09,$s1,$mask80
698	srwi	$acc06,$acc02,7
699	andc	$acc10,$s2,$mask80
700	srwi	$acc07,$acc03,7
701	andc	$acc11,$s3,$mask80
702	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
703	sub	$acc01,$acc01,$acc05
704	sub	$acc02,$acc02,$acc06
705	sub	$acc03,$acc03,$acc07
706	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
707	add	$acc09,$acc09,$acc09
708	add	$acc10,$acc10,$acc10
709	add	$acc11,$acc11,$acc11
710	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
711	and	$acc01,$acc01,$mask1b
712	and	$acc02,$acc02,$mask1b
713	and	$acc03,$acc03,$mask1b
714	xor	$acc00,$acc00,$acc08	# r2
715	xor	$acc01,$acc01,$acc09
716	 rotlwi	$acc12,$s0,16		# ROTATE(r0,16)
717	xor	$acc02,$acc02,$acc10
718	 rotlwi	$acc13,$s1,16
719	xor	$acc03,$acc03,$acc11
720	 rotlwi	$acc14,$s2,16
721
722	xor	$s0,$s0,$acc00		# r0^r2
723	rotlwi	$acc15,$s3,16
724	xor	$s1,$s1,$acc01
725	rotrwi	$s0,$s0,24		# ROTATE(r2^r0,24)
726	xor	$s2,$s2,$acc02
727	rotrwi	$s1,$s1,24
728	xor	$s3,$s3,$acc03
729	rotrwi	$s2,$s2,24
730	xor	$s0,$s0,$acc00		# ROTATE(r2^r0,24)^r2
731	rotrwi	$s3,$s3,24
732	xor	$s1,$s1,$acc01
733	xor	$s2,$s2,$acc02
734	xor	$s3,$s3,$acc03
735	rotlwi	$acc08,$acc12,8		# ROTATE(r0,24)
736	xor	$s0,$s0,$acc12		#
737	rotlwi	$acc09,$acc13,8
738	xor	$s1,$s1,$acc13
739	rotlwi	$acc10,$acc14,8
740	xor	$s2,$s2,$acc14
741	rotlwi	$acc11,$acc15,8
742	xor	$s3,$s3,$acc15
743	xor	$s0,$s0,$acc08		#
744	xor	$s1,$s1,$acc09
745	xor	$s2,$s2,$acc10
746	xor	$s3,$s3,$acc11
747
748	b	Lenc_compact_loop
749.align	4
750Lenc_compact_done:
751	xor	$s0,$s0,$t0
752	xor	$s1,$s1,$t1
753	xor	$s2,$s2,$t2
754	xor	$s3,$s3,$t3
755	blr
756
757.globl	.aes_decrypt_internal
758.align	7
759.aes_decrypt_internal:
760	$STU	$sp,-$FRAME($sp)
761	mflr	r0
762
763	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
764	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
765	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
766	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
767	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
768	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
769	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
770	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
771	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
772	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
773	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
774	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
775	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
776	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
777	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
778	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
779	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
780	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
781	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
782	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
783	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
784
785	andi.	$t0,$inp,3
786	andi.	$t1,$out,3
787	or.	$t0,$t0,$t1
788	bne	Ldec_unaligned
789
790Ldec_unaligned_ok:
791	lwz	$s0,0($inp)
792	lwz	$s1,4($inp)
793	lwz	$s2,8($inp)
794	lwz	$s3,12($inp)
795	bl	LAES_Td
796	bl	Lppc_AES_decrypt_compact
797	stw	$s0,0($out)
798	stw	$s1,4($out)
799	stw	$s2,8($out)
800	stw	$s3,12($out)
801	b	Ldec_done
802
803Ldec_unaligned:
804	subfic	$t0,$inp,4096
805	subfic	$t1,$out,4096
806	andi.	$t0,$t0,4096-16
807	beq	Ldec_xpage
808	andi.	$t1,$t1,4096-16
809	bne	Ldec_unaligned_ok
810
811Ldec_xpage:
812	lbz	$acc00,0($inp)
813	lbz	$acc01,1($inp)
814	lbz	$acc02,2($inp)
815	lbz	$s0,3($inp)
816	lbz	$acc04,4($inp)
817	lbz	$acc05,5($inp)
818	lbz	$acc06,6($inp)
819	lbz	$s1,7($inp)
820	lbz	$acc08,8($inp)
821	lbz	$acc09,9($inp)
822	lbz	$acc10,10($inp)
823	insrwi	$s0,$acc00,8,0
824	lbz	$s2,11($inp)
825	insrwi	$s1,$acc04,8,0
826	lbz	$acc12,12($inp)
827	insrwi	$s0,$acc01,8,8
828	lbz	$acc13,13($inp)
829	insrwi	$s1,$acc05,8,8
830	lbz	$acc14,14($inp)
831	insrwi	$s0,$acc02,8,16
832	lbz	$s3,15($inp)
833	insrwi	$s1,$acc06,8,16
834	insrwi	$s2,$acc08,8,0
835	insrwi	$s3,$acc12,8,0
836	insrwi	$s2,$acc09,8,8
837	insrwi	$s3,$acc13,8,8
838	insrwi	$s2,$acc10,8,16
839	insrwi	$s3,$acc14,8,16
840
841	bl	LAES_Td
842	bl	Lppc_AES_decrypt_compact
843
844	extrwi	$acc00,$s0,8,0
845	extrwi	$acc01,$s0,8,8
846	stb	$acc00,0($out)
847	extrwi	$acc02,$s0,8,16
848	stb	$acc01,1($out)
849	stb	$acc02,2($out)
850	extrwi	$acc04,$s1,8,0
851	stb	$s0,3($out)
852	extrwi	$acc05,$s1,8,8
853	stb	$acc04,4($out)
854	extrwi	$acc06,$s1,8,16
855	stb	$acc05,5($out)
856	stb	$acc06,6($out)
857	extrwi	$acc08,$s2,8,0
858	stb	$s1,7($out)
859	extrwi	$acc09,$s2,8,8
860	stb	$acc08,8($out)
861	extrwi	$acc10,$s2,8,16
862	stb	$acc09,9($out)
863	stb	$acc10,10($out)
864	extrwi	$acc12,$s3,8,0
865	stb	$s2,11($out)
866	extrwi	$acc13,$s3,8,8
867	stb	$acc12,12($out)
868	extrwi	$acc14,$s3,8,16
869	stb	$acc13,13($out)
870	stb	$acc14,14($out)
871	stb	$s3,15($out)
872
873Ldec_done:
874	$POP	r0,`$FRAME+$LRSAVE`($sp)
875	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
876	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
877	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
878	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
879	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
880	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
881	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
882	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
883	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
884	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
885	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
886	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
887	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
888	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
889	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
890	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
891	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
892	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
893	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
894	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
895	mtlr	r0
896	addi	$sp,$sp,$FRAME
897	blr
898
899.align	5
900Lppc_AES_decrypt:
901	lwz	$acc00,240($key)
902	addi	$Tbl1,$Tbl0,3
903	lwz	$t0,0($key)
904	addi	$Tbl2,$Tbl0,2
905	lwz	$t1,4($key)
906	addi	$Tbl3,$Tbl0,1
907	lwz	$t2,8($key)
908	addi	$acc00,$acc00,-1
909	lwz	$t3,12($key)
910	addi	$key,$key,16
911	xor	$s0,$s0,$t0
912	xor	$s1,$s1,$t1
913	xor	$s2,$s2,$t2
914	xor	$s3,$s3,$t3
915	mtctr	$acc00
916.align	4
917Ldec_loop:
918	rlwinm	$acc00,$s0,`32-24+3`,21,28
919	rlwinm	$acc01,$s1,`32-24+3`,21,28
920	rlwinm	$acc02,$s2,`32-24+3`,21,28
921	rlwinm	$acc03,$s3,`32-24+3`,21,28
922	lwz	$t0,0($key)
923	rlwinm	$acc04,$s3,`32-16+3`,21,28
924	lwz	$t1,4($key)
925	rlwinm	$acc05,$s0,`32-16+3`,21,28
926	lwz	$t2,8($key)
927	rlwinm	$acc06,$s1,`32-16+3`,21,28
928	lwz	$t3,12($key)
929	rlwinm	$acc07,$s2,`32-16+3`,21,28
930	lwzx	$acc00,$Tbl0,$acc00
931	rlwinm	$acc08,$s2,`32-8+3`,21,28
932	lwzx	$acc01,$Tbl0,$acc01
933	rlwinm	$acc09,$s3,`32-8+3`,21,28
934	lwzx	$acc02,$Tbl0,$acc02
935	rlwinm	$acc10,$s0,`32-8+3`,21,28
936	lwzx	$acc03,$Tbl0,$acc03
937	rlwinm	$acc11,$s1,`32-8+3`,21,28
938	lwzx	$acc04,$Tbl1,$acc04
939	rlwinm	$acc12,$s1,`0+3`,21,28
940	lwzx	$acc05,$Tbl1,$acc05
941	rlwinm	$acc13,$s2,`0+3`,21,28
942	lwzx	$acc06,$Tbl1,$acc06
943	rlwinm	$acc14,$s3,`0+3`,21,28
944	lwzx	$acc07,$Tbl1,$acc07
945	rlwinm	$acc15,$s0,`0+3`,21,28
946	lwzx	$acc08,$Tbl2,$acc08
947	xor	$t0,$t0,$acc00
948	lwzx	$acc09,$Tbl2,$acc09
949	xor	$t1,$t1,$acc01
950	lwzx	$acc10,$Tbl2,$acc10
951	xor	$t2,$t2,$acc02
952	lwzx	$acc11,$Tbl2,$acc11
953	xor	$t3,$t3,$acc03
954	lwzx	$acc12,$Tbl3,$acc12
955	xor	$t0,$t0,$acc04
956	lwzx	$acc13,$Tbl3,$acc13
957	xor	$t1,$t1,$acc05
958	lwzx	$acc14,$Tbl3,$acc14
959	xor	$t2,$t2,$acc06
960	lwzx	$acc15,$Tbl3,$acc15
961	xor	$t3,$t3,$acc07
962	xor	$t0,$t0,$acc08
963	xor	$t1,$t1,$acc09
964	xor	$t2,$t2,$acc10
965	xor	$t3,$t3,$acc11
966	xor	$s0,$t0,$acc12
967	xor	$s1,$t1,$acc13
968	xor	$s2,$t2,$acc14
969	xor	$s3,$t3,$acc15
970	addi	$key,$key,16
971	bdnz-	Ldec_loop
972
973	addi	$Tbl2,$Tbl0,2048
974	nop
975	lwz	$t0,0($key)
976	rlwinm	$acc00,$s0,`32-24`,24,31
977	lwz	$t1,4($key)
978	rlwinm	$acc01,$s1,`32-24`,24,31
979	lwz	$t2,8($key)
980	rlwinm	$acc02,$s2,`32-24`,24,31
981	lwz	$t3,12($key)
982	rlwinm	$acc03,$s3,`32-24`,24,31
983	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Td4
984	rlwinm	$acc04,$s3,`32-16`,24,31
985	lwz	$acc09,`2048+32`($Tbl0)
986	rlwinm	$acc05,$s0,`32-16`,24,31
987	lwz	$acc10,`2048+64`($Tbl0)
988	lbzx	$acc00,$Tbl2,$acc00
989	lwz	$acc11,`2048+96`($Tbl0)
990	lbzx	$acc01,$Tbl2,$acc01
991	lwz	$acc12,`2048+128`($Tbl0)
992	rlwinm	$acc06,$s1,`32-16`,24,31
993	lwz	$acc13,`2048+160`($Tbl0)
994	rlwinm	$acc07,$s2,`32-16`,24,31
995	lwz	$acc14,`2048+192`($Tbl0)
996	rlwinm	$acc08,$s2,`32-8`,24,31
997	lwz	$acc15,`2048+224`($Tbl0)
998	rlwinm	$acc09,$s3,`32-8`,24,31
999	lbzx	$acc02,$Tbl2,$acc02
1000	rlwinm	$acc10,$s0,`32-8`,24,31
1001	lbzx	$acc03,$Tbl2,$acc03
1002	rlwinm	$acc11,$s1,`32-8`,24,31
1003	lbzx	$acc04,$Tbl2,$acc04
1004	rlwinm	$acc12,$s1,`0`,24,31
1005	lbzx	$acc05,$Tbl2,$acc05
1006	rlwinm	$acc13,$s2,`0`,24,31
1007	lbzx	$acc06,$Tbl2,$acc06
1008	rlwinm	$acc14,$s3,`0`,24,31
1009	lbzx	$acc07,$Tbl2,$acc07
1010	rlwinm	$acc15,$s0,`0`,24,31
1011	lbzx	$acc08,$Tbl2,$acc08
1012	rlwinm	$s0,$acc00,24,0,7
1013	lbzx	$acc09,$Tbl2,$acc09
1014	rlwinm	$s1,$acc01,24,0,7
1015	lbzx	$acc10,$Tbl2,$acc10
1016	rlwinm	$s2,$acc02,24,0,7
1017	lbzx	$acc11,$Tbl2,$acc11
1018	rlwinm	$s3,$acc03,24,0,7
1019	lbzx	$acc12,$Tbl2,$acc12
1020	rlwimi	$s0,$acc04,16,8,15
1021	lbzx	$acc13,$Tbl2,$acc13
1022	rlwimi	$s1,$acc05,16,8,15
1023	lbzx	$acc14,$Tbl2,$acc14
1024	rlwimi	$s2,$acc06,16,8,15
1025	lbzx	$acc15,$Tbl2,$acc15
1026	rlwimi	$s3,$acc07,16,8,15
1027	rlwimi	$s0,$acc08,8,16,23
1028	rlwimi	$s1,$acc09,8,16,23
1029	rlwimi	$s2,$acc10,8,16,23
1030	rlwimi	$s3,$acc11,8,16,23
1031	or	$s0,$s0,$acc12
1032	or	$s1,$s1,$acc13
1033	or	$s2,$s2,$acc14
1034	or	$s3,$s3,$acc15
1035	xor	$s0,$s0,$t0
1036	xor	$s1,$s1,$t1
1037	xor	$s2,$s2,$t2
1038	xor	$s3,$s3,$t3
1039	blr
1040
1041.align	4
1042Lppc_AES_decrypt_compact:
1043	lwz	$acc00,240($key)
1044	addi	$Tbl1,$Tbl0,2048
1045	lwz	$t0,0($key)
1046	lis	$mask80,0x8080
1047	lwz	$t1,4($key)
1048	lis	$mask1b,0x1b1b
1049	lwz	$t2,8($key)
1050	ori	$mask80,$mask80,0x8080
1051	lwz	$t3,12($key)
1052	ori	$mask1b,$mask1b,0x1b1b
1053	addi	$key,$key,16
1054___
1055$code.=<<___ if ($SIZE_T==8);
1056	insrdi	$mask80,$mask80,32,0
1057	insrdi	$mask1b,$mask1b,32,0
1058___
1059$code.=<<___;
1060	mtctr	$acc00
1061.align	4
1062Ldec_compact_loop:
1063	xor	$s0,$s0,$t0
1064	xor	$s1,$s1,$t1
1065	rlwinm	$acc00,$s0,`32-24`,24,31
1066	xor	$s2,$s2,$t2
1067	rlwinm	$acc01,$s1,`32-24`,24,31
1068	xor	$s3,$s3,$t3
1069	rlwinm	$acc02,$s2,`32-24`,24,31
1070	rlwinm	$acc03,$s3,`32-24`,24,31
1071	rlwinm	$acc04,$s3,`32-16`,24,31
1072	rlwinm	$acc05,$s0,`32-16`,24,31
1073	rlwinm	$acc06,$s1,`32-16`,24,31
1074	rlwinm	$acc07,$s2,`32-16`,24,31
1075	lbzx	$acc00,$Tbl1,$acc00
1076	rlwinm	$acc08,$s2,`32-8`,24,31
1077	lbzx	$acc01,$Tbl1,$acc01
1078	rlwinm	$acc09,$s3,`32-8`,24,31
1079	lbzx	$acc02,$Tbl1,$acc02
1080	rlwinm	$acc10,$s0,`32-8`,24,31
1081	lbzx	$acc03,$Tbl1,$acc03
1082	rlwinm	$acc11,$s1,`32-8`,24,31
1083	lbzx	$acc04,$Tbl1,$acc04
1084	rlwinm	$acc12,$s1,`0`,24,31
1085	lbzx	$acc05,$Tbl1,$acc05
1086	rlwinm	$acc13,$s2,`0`,24,31
1087	lbzx	$acc06,$Tbl1,$acc06
1088	rlwinm	$acc14,$s3,`0`,24,31
1089	lbzx	$acc07,$Tbl1,$acc07
1090	rlwinm	$acc15,$s0,`0`,24,31
1091	lbzx	$acc08,$Tbl1,$acc08
1092	rlwinm	$s0,$acc00,24,0,7
1093	lbzx	$acc09,$Tbl1,$acc09
1094	rlwinm	$s1,$acc01,24,0,7
1095	lbzx	$acc10,$Tbl1,$acc10
1096	rlwinm	$s2,$acc02,24,0,7
1097	lbzx	$acc11,$Tbl1,$acc11
1098	rlwinm	$s3,$acc03,24,0,7
1099	lbzx	$acc12,$Tbl1,$acc12
1100	rlwimi	$s0,$acc04,16,8,15
1101	lbzx	$acc13,$Tbl1,$acc13
1102	rlwimi	$s1,$acc05,16,8,15
1103	lbzx	$acc14,$Tbl1,$acc14
1104	rlwimi	$s2,$acc06,16,8,15
1105	lbzx	$acc15,$Tbl1,$acc15
1106	rlwimi	$s3,$acc07,16,8,15
1107	rlwimi	$s0,$acc08,8,16,23
1108	rlwimi	$s1,$acc09,8,16,23
1109	rlwimi	$s2,$acc10,8,16,23
1110	rlwimi	$s3,$acc11,8,16,23
1111	lwz	$t0,0($key)
1112	or	$s0,$s0,$acc12
1113	lwz	$t1,4($key)
1114	or	$s1,$s1,$acc13
1115	lwz	$t2,8($key)
1116	or	$s2,$s2,$acc14
1117	lwz	$t3,12($key)
1118	or	$s3,$s3,$acc15
1119
1120	addi	$key,$key,16
1121	bdz	Ldec_compact_done
1122___
1123$code.=<<___ if ($SIZE_T==8);
1124	# vectorized permutation improves decrypt performance by 10%
1125	insrdi	$s0,$s1,32,0
1126	insrdi	$s2,$s3,32,0
1127
1128	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
1129	and	$acc02,$s2,$mask80
1130	srdi	$acc04,$acc00,7		# r1>>7
1131	srdi	$acc06,$acc02,7
1132	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
1133	andc	$acc10,$s2,$mask80
1134	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
1135	sub	$acc02,$acc02,$acc06
1136	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
1137	add	$acc10,$acc10,$acc10
1138	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1139	and	$acc02,$acc02,$mask1b
1140	xor	$acc00,$acc00,$acc08	# r2
1141	xor	$acc02,$acc02,$acc10
1142
1143	and	$acc04,$acc00,$mask80	# r1=r2&0x80808080
1144	and	$acc06,$acc02,$mask80
1145	srdi	$acc08,$acc04,7		# r1>>7
1146	srdi	$acc10,$acc06,7
1147	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
1148	andc	$acc14,$acc02,$mask80
1149	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
1150	sub	$acc06,$acc06,$acc10
1151	add	$acc12,$acc12,$acc12	# (r2&0x7f7f7f7f)<<1
1152	add	$acc14,$acc14,$acc14
1153	and	$acc04,$acc04,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1154	and	$acc06,$acc06,$mask1b
1155	xor	$acc04,$acc04,$acc12	# r4
1156	xor	$acc06,$acc06,$acc14
1157
1158	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
1159	and	$acc10,$acc06,$mask80
1160	srdi	$acc12,$acc08,7		# r1>>7
1161	srdi	$acc14,$acc10,7
1162	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
1163	sub	$acc10,$acc10,$acc14
1164	andc	$acc12,$acc04,$mask80	# r4&0x7f7f7f7f
1165	andc	$acc14,$acc06,$mask80
1166	add	$acc12,$acc12,$acc12	# (r4&0x7f7f7f7f)<<1
1167	add	$acc14,$acc14,$acc14
1168	and	$acc08,$acc08,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1169	and	$acc10,$acc10,$mask1b
1170	xor	$acc08,$acc08,$acc12	# r8
1171	xor	$acc10,$acc10,$acc14
1172
1173	xor	$acc00,$acc00,$s0	# r2^r0
1174	xor	$acc02,$acc02,$s2
1175	xor	$acc04,$acc04,$s0	# r4^r0
1176	xor	$acc06,$acc06,$s2
1177
1178	extrdi	$acc01,$acc00,32,0
1179	extrdi	$acc03,$acc02,32,0
1180	extrdi	$acc05,$acc04,32,0
1181	extrdi	$acc07,$acc06,32,0
1182	extrdi	$acc09,$acc08,32,0
1183	extrdi	$acc11,$acc10,32,0
1184___
1185$code.=<<___ if ($SIZE_T==4);
1186	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
1187	and	$acc01,$s1,$mask80
1188	and	$acc02,$s2,$mask80
1189	and	$acc03,$s3,$mask80
1190	srwi	$acc04,$acc00,7		# r1>>7
1191	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
1192	srwi	$acc05,$acc01,7
1193	andc	$acc09,$s1,$mask80
1194	srwi	$acc06,$acc02,7
1195	andc	$acc10,$s2,$mask80
1196	srwi	$acc07,$acc03,7
1197	andc	$acc11,$s3,$mask80
1198	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
1199	sub	$acc01,$acc01,$acc05
1200	sub	$acc02,$acc02,$acc06
1201	sub	$acc03,$acc03,$acc07
1202	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
1203	add	$acc09,$acc09,$acc09
1204	add	$acc10,$acc10,$acc10
1205	add	$acc11,$acc11,$acc11
1206	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1207	and	$acc01,$acc01,$mask1b
1208	and	$acc02,$acc02,$mask1b
1209	and	$acc03,$acc03,$mask1b
1210	xor	$acc00,$acc00,$acc08	# r2
1211	xor	$acc01,$acc01,$acc09
1212	xor	$acc02,$acc02,$acc10
1213	xor	$acc03,$acc03,$acc11
1214
1215	and	$acc04,$acc00,$mask80	# r1=r2&0x80808080
1216	and	$acc05,$acc01,$mask80
1217	and	$acc06,$acc02,$mask80
1218	and	$acc07,$acc03,$mask80
1219	srwi	$acc08,$acc04,7		# r1>>7
1220	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
1221	srwi	$acc09,$acc05,7
1222	andc	$acc13,$acc01,$mask80
1223	srwi	$acc10,$acc06,7
1224	andc	$acc14,$acc02,$mask80
1225	srwi	$acc11,$acc07,7
1226	andc	$acc15,$acc03,$mask80
1227	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
1228	sub	$acc05,$acc05,$acc09
1229	sub	$acc06,$acc06,$acc10
1230	sub	$acc07,$acc07,$acc11
1231	add	$acc12,$acc12,$acc12	# (r2&0x7f7f7f7f)<<1
1232	add	$acc13,$acc13,$acc13
1233	add	$acc14,$acc14,$acc14
1234	add	$acc15,$acc15,$acc15
1235	and	$acc04,$acc04,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1236	and	$acc05,$acc05,$mask1b
1237	and	$acc06,$acc06,$mask1b
1238	and	$acc07,$acc07,$mask1b
1239	xor	$acc04,$acc04,$acc12	# r4
1240	xor	$acc05,$acc05,$acc13
1241	xor	$acc06,$acc06,$acc14
1242	xor	$acc07,$acc07,$acc15
1243
1244	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
1245	and	$acc09,$acc05,$mask80
1246	srwi	$acc12,$acc08,7		# r1>>7
1247	and	$acc10,$acc06,$mask80
1248	srwi	$acc13,$acc09,7
1249	and	$acc11,$acc07,$mask80
1250	srwi	$acc14,$acc10,7
1251	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
1252	srwi	$acc15,$acc11,7
1253	sub	$acc09,$acc09,$acc13
1254	sub	$acc10,$acc10,$acc14
1255	sub	$acc11,$acc11,$acc15
1256	andc	$acc12,$acc04,$mask80	# r4&0x7f7f7f7f
1257	andc	$acc13,$acc05,$mask80
1258	andc	$acc14,$acc06,$mask80
1259	andc	$acc15,$acc07,$mask80
1260	add	$acc12,$acc12,$acc12	# (r4&0x7f7f7f7f)<<1
1261	add	$acc13,$acc13,$acc13
1262	add	$acc14,$acc14,$acc14
1263	add	$acc15,$acc15,$acc15
1264	and	$acc08,$acc08,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1265	and	$acc09,$acc09,$mask1b
1266	and	$acc10,$acc10,$mask1b
1267	and	$acc11,$acc11,$mask1b
1268	xor	$acc08,$acc08,$acc12	# r8
1269	xor	$acc09,$acc09,$acc13
1270	xor	$acc10,$acc10,$acc14
1271	xor	$acc11,$acc11,$acc15
1272
1273	xor	$acc00,$acc00,$s0	# r2^r0
1274	xor	$acc01,$acc01,$s1
1275	xor	$acc02,$acc02,$s2
1276	xor	$acc03,$acc03,$s3
1277	xor	$acc04,$acc04,$s0	# r4^r0
1278	xor	$acc05,$acc05,$s1
1279	xor	$acc06,$acc06,$s2
1280	xor	$acc07,$acc07,$s3
1281___
1282$code.=<<___;
1283	rotrwi	$s0,$s0,8		# = ROTATE(r0,8)
1284	rotrwi	$s1,$s1,8
1285	xor	$s0,$s0,$acc00		# ^= r2^r0
1286	rotrwi	$s2,$s2,8
1287	xor	$s1,$s1,$acc01
1288	rotrwi	$s3,$s3,8
1289	xor	$s2,$s2,$acc02
1290	xor	$s3,$s3,$acc03
1291	xor	$acc00,$acc00,$acc08
1292	xor	$acc01,$acc01,$acc09
1293	xor	$acc02,$acc02,$acc10
1294	xor	$acc03,$acc03,$acc11
1295	xor	$s0,$s0,$acc04		# ^= r4^r0
1296	rotrwi	$acc00,$acc00,24
1297	xor	$s1,$s1,$acc05
1298	rotrwi	$acc01,$acc01,24
1299	xor	$s2,$s2,$acc06
1300	rotrwi	$acc02,$acc02,24
1301	xor	$s3,$s3,$acc07
1302	rotrwi	$acc03,$acc03,24
1303	xor	$acc04,$acc04,$acc08
1304	xor	$acc05,$acc05,$acc09
1305	xor	$acc06,$acc06,$acc10
1306	xor	$acc07,$acc07,$acc11
1307	xor	$s0,$s0,$acc08		# ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1308	rotrwi	$acc04,$acc04,16
1309	xor	$s1,$s1,$acc09
1310	rotrwi	$acc05,$acc05,16
1311	xor	$s2,$s2,$acc10
1312	rotrwi	$acc06,$acc06,16
1313	xor	$s3,$s3,$acc11
1314	rotrwi	$acc07,$acc07,16
1315	xor	$s0,$s0,$acc00		# ^= ROTATE(r8^r2^r0,24)
1316	rotrwi	$acc08,$acc08,8
1317	xor	$s1,$s1,$acc01
1318	rotrwi	$acc09,$acc09,8
1319	xor	$s2,$s2,$acc02
1320	rotrwi	$acc10,$acc10,8
1321	xor	$s3,$s3,$acc03
1322	rotrwi	$acc11,$acc11,8
1323	xor	$s0,$s0,$acc04		# ^= ROTATE(r8^r4^r0,16)
1324	xor	$s1,$s1,$acc05
1325	xor	$s2,$s2,$acc06
1326	xor	$s3,$s3,$acc07
1327	xor	$s0,$s0,$acc08		# ^= ROTATE(r8,8)
1328	xor	$s1,$s1,$acc09
1329	xor	$s2,$s2,$acc10
1330	xor	$s3,$s3,$acc11
1331
1332	b	Ldec_compact_loop
1333.align	4
1334Ldec_compact_done:
1335	xor	$s0,$s0,$t0
1336	xor	$s1,$s1,$t1
1337	xor	$s2,$s2,$t2
1338	xor	$s3,$s3,$t3
1339	blr
1340___
1341
1342$code =~ s/\`([^\`]*)\`/eval $1/gem;
1343print $code;
1344close STDOUT;
1345