1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# Needs more work: key setup, CBC routine...
11#
12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13# 128-bit key, which is ~40% better than 64-bit code generated by gcc
14# 4.0. But these are not the ones currently used! Their "compact"
15# counterparts are, for security reason. ppc_AES_encrypt_compact runs
16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17# at 1/3 of ppc_AES_decrypt.
18
19# February 2010
20#
21# Rescheduling instructions to favour Power6 pipeline gave 10%
22# performance improvement on the platfrom in question (and marginal
23# improvement even on others). It should be noted that Power6 fails
24# to process byte in 18 cycles, only in 23, because it fails to issue
25# 4 load instructions in two cycles, only in 3. As result non-compact
26# block subroutines are 25% slower than one would expect. Compact
27# functions scale better, because they have pure computational part,
28# which scales perfectly with clock frequency. To be specific
29# ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30# ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
32$flavour = shift;
33
34if ($flavour =~ /64/) {
35	$SIZE_T	=8;
36	$LRSAVE	=2*$SIZE_T;
37	$STU	="stdu";
38	$POP	="ld";
39	$PUSH	="std";
40} elsif ($flavour =~ /32/) {
41	$SIZE_T	=4;
42	$LRSAVE	=$SIZE_T;
43	$STU	="stwu";
44	$POP	="lwz";
45	$PUSH	="stw";
46} else { die "nonsense $flavour"; }
47
48$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
49( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
50( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
51die "can't locate ppc-xlate.pl";
52
53open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
54
55$FRAME=32*$SIZE_T;
56
57sub _data_word()
58{ my $i;
59    while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
60}
61
62$sp="r1";
63$toc="r2";
64$inp="r3";
65$out="r4";
66$key="r5";
67
68$Tbl0="r3";
69$Tbl1="r6";
70$Tbl2="r7";
71$Tbl3="r2";
72
73$s0="r8";
74$s1="r9";
75$s2="r10";
76$s3="r11";
77
78$t0="r12";
79$t1="r13";
80$t2="r14";
81$t3="r15";
82
83$acc00="r16";
84$acc01="r17";
85$acc02="r18";
86$acc03="r19";
87
88$acc04="r20";
89$acc05="r21";
90$acc06="r22";
91$acc07="r23";
92
93$acc08="r24";
94$acc09="r25";
95$acc10="r26";
96$acc11="r27";
97
98$acc12="r28";
99$acc13="r29";
100$acc14="r30";
101$acc15="r31";
102
103# stay away from TLS pointer
104if ($SIZE_T==8)	{ die if ($t1 ne "r13");  $t1="r0";		}
105else		{ die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0";	}
106$mask80=$Tbl2;
107$mask1b=$Tbl3;
108
109$code.=<<___;
110.machine	"any"
111.text
112
113.align	7
114LAES_Te:
115	mflr	r0
116	bcl	20,31,\$+4
117	mflr	$Tbl0	;    vvvvv "distance" between . and 1st data entry
118	addi	$Tbl0,$Tbl0,`128-8`
119	mtlr	r0
120	blr
121	.long	0
122	.byte	0,12,0x14,0,0,0,0,0
123	.space	`64-9*4`
124LAES_Td:
125	mflr	r0
126	bcl	20,31,\$+4
127	mflr	$Tbl0	;    vvvvvvvv "distance" between . and 1st data entry
128	addi	$Tbl0,$Tbl0,`128-64-8+2048+256`
129	mtlr	r0
130	blr
131	.long	0
132	.byte	0,12,0x14,0,0,0,0,0
133	.space	`128-64-9*4`
134___
135&_data_word(
136	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
137	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
138	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
139	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
140	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
141	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
142	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
143	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
144	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
145	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
146	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
147	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
148	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
149	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
150	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
151	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
152	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
153	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
154	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
155	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
156	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
157	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
158	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
159	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
160	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
161	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
162	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
163	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
164	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
165	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
166	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
167	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
168	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
169	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
170	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
171	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
172	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
173	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
174	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
175	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
176	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
177	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
178	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
179	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
180	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
181	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
182	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
183	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
184	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
185	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
186	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
187	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
188	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
189	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
190	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
191	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
192	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
193	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
194	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
195	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
196	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
197	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
198	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
199	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
200$code.=<<___;
201.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
202.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
203.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
204.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
205.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
206.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
207.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
208.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
209.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
210.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
211.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
212.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
213.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
214.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
215.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
216.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
217.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
218.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
219.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
220.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
221.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
222.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
223.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
224.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
225.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
226.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
227.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
228.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
229.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
230.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
231.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
232.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
233___
234&_data_word(
235	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
236	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
237	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
238	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
239	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
240	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
241	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
242	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
243	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
244	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
245	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
246	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
247	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
248	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
249	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
250	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
251	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
252	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
253	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
254	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
255	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
256	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
257	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
258	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
259	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
260	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
261	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
262	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
263	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
264	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
265	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
266	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
267	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
268	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
269	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
270	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
271	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
272	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
273	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
274	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
275	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
276	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
277	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
278	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
279	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
280	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
281	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
282	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
283	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
284	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
285	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
286	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
287	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
288	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
289	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
290	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
291	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
292	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
293	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
294	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
295	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
296	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
297	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
298	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
299$code.=<<___;
300.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
301.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
302.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
303.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
304.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
305.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
306.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
307.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
308.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
309.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
310.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
311.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
312.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
313.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
314.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
315.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
316.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
317.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
318.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
319.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
320.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
321.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
322.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
323.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
324.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
325.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
326.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
327.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
328.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
329.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
330.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
331.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
332
333
334.globl	.AES_encrypt
335.align	7
336.AES_encrypt:
337	$STU	$sp,-$FRAME($sp)
338	mflr	r0
339
340	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
341	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
342	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
343	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
344	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
345	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
346	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
347	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
348	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
349	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
350	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
351	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
352	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
353	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
354	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
355	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
356	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
357	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
358	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
359	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
360	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
361
362	andi.	$t0,$inp,3
363	andi.	$t1,$out,3
364	or.	$t0,$t0,$t1
365	bne	Lenc_unaligned
366
367Lenc_unaligned_ok:
368	lwz	$s0,0($inp)
369	lwz	$s1,4($inp)
370	lwz	$s2,8($inp)
371	lwz	$s3,12($inp)
372	bl	LAES_Te
373	bl	Lppc_AES_encrypt_compact
374	stw	$s0,0($out)
375	stw	$s1,4($out)
376	stw	$s2,8($out)
377	stw	$s3,12($out)
378	b	Lenc_done
379
380Lenc_unaligned:
381	subfic	$t0,$inp,4096
382	subfic	$t1,$out,4096
383	andi.	$t0,$t0,4096-16
384	beq	Lenc_xpage
385	andi.	$t1,$t1,4096-16
386	bne	Lenc_unaligned_ok
387
388Lenc_xpage:
389	lbz	$acc00,0($inp)
390	lbz	$acc01,1($inp)
391	lbz	$acc02,2($inp)
392	lbz	$s0,3($inp)
393	lbz	$acc04,4($inp)
394	lbz	$acc05,5($inp)
395	lbz	$acc06,6($inp)
396	lbz	$s1,7($inp)
397	lbz	$acc08,8($inp)
398	lbz	$acc09,9($inp)
399	lbz	$acc10,10($inp)
400	insrwi	$s0,$acc00,8,0
401	lbz	$s2,11($inp)
402	insrwi	$s1,$acc04,8,0
403	lbz	$acc12,12($inp)
404	insrwi	$s0,$acc01,8,8
405	lbz	$acc13,13($inp)
406	insrwi	$s1,$acc05,8,8
407	lbz	$acc14,14($inp)
408	insrwi	$s0,$acc02,8,16
409	lbz	$s3,15($inp)
410	insrwi	$s1,$acc06,8,16
411	insrwi	$s2,$acc08,8,0
412	insrwi	$s3,$acc12,8,0
413	insrwi	$s2,$acc09,8,8
414	insrwi	$s3,$acc13,8,8
415	insrwi	$s2,$acc10,8,16
416	insrwi	$s3,$acc14,8,16
417
418	bl	LAES_Te
419	bl	Lppc_AES_encrypt_compact
420
421	extrwi	$acc00,$s0,8,0
422	extrwi	$acc01,$s0,8,8
423	stb	$acc00,0($out)
424	extrwi	$acc02,$s0,8,16
425	stb	$acc01,1($out)
426	stb	$acc02,2($out)
427	extrwi	$acc04,$s1,8,0
428	stb	$s0,3($out)
429	extrwi	$acc05,$s1,8,8
430	stb	$acc04,4($out)
431	extrwi	$acc06,$s1,8,16
432	stb	$acc05,5($out)
433	stb	$acc06,6($out)
434	extrwi	$acc08,$s2,8,0
435	stb	$s1,7($out)
436	extrwi	$acc09,$s2,8,8
437	stb	$acc08,8($out)
438	extrwi	$acc10,$s2,8,16
439	stb	$acc09,9($out)
440	stb	$acc10,10($out)
441	extrwi	$acc12,$s3,8,0
442	stb	$s2,11($out)
443	extrwi	$acc13,$s3,8,8
444	stb	$acc12,12($out)
445	extrwi	$acc14,$s3,8,16
446	stb	$acc13,13($out)
447	stb	$acc14,14($out)
448	stb	$s3,15($out)
449
450Lenc_done:
451	$POP	r0,`$FRAME+$LRSAVE`($sp)
452	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
453	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
454	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
455	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
456	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
457	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
458	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
459	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
460	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
461	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
462	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
463	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
464	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
465	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
466	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
467	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
468	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
469	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
470	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
471	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
472	mtlr	r0
473	addi	$sp,$sp,$FRAME
474	blr
475	.long	0
476	.byte	0,12,4,1,0x80,18,3,0
477	.long	0
478
479.align	5
480Lppc_AES_encrypt:
481	lwz	$acc00,240($key)
482	addi	$Tbl1,$Tbl0,3
483	lwz	$t0,0($key)
484	addi	$Tbl2,$Tbl0,2
485	lwz	$t1,4($key)
486	addi	$Tbl3,$Tbl0,1
487	lwz	$t2,8($key)
488	addi	$acc00,$acc00,-1
489	lwz	$t3,12($key)
490	addi	$key,$key,16
491	xor	$s0,$s0,$t0
492	xor	$s1,$s1,$t1
493	xor	$s2,$s2,$t2
494	xor	$s3,$s3,$t3
495	mtctr	$acc00
496.align	4
497Lenc_loop:
498	rlwinm	$acc00,$s0,`32-24+3`,21,28
499	rlwinm	$acc01,$s1,`32-24+3`,21,28
500	rlwinm	$acc02,$s2,`32-24+3`,21,28
501	rlwinm	$acc03,$s3,`32-24+3`,21,28
502	lwz	$t0,0($key)
503	rlwinm	$acc04,$s1,`32-16+3`,21,28
504	lwz	$t1,4($key)
505	rlwinm	$acc05,$s2,`32-16+3`,21,28
506	lwz	$t2,8($key)
507	rlwinm	$acc06,$s3,`32-16+3`,21,28
508	lwz	$t3,12($key)
509	rlwinm	$acc07,$s0,`32-16+3`,21,28
510	lwzx	$acc00,$Tbl0,$acc00
511	rlwinm	$acc08,$s2,`32-8+3`,21,28
512	lwzx	$acc01,$Tbl0,$acc01
513	rlwinm	$acc09,$s3,`32-8+3`,21,28
514	lwzx	$acc02,$Tbl0,$acc02
515	rlwinm	$acc10,$s0,`32-8+3`,21,28
516	lwzx	$acc03,$Tbl0,$acc03
517	rlwinm	$acc11,$s1,`32-8+3`,21,28
518	lwzx	$acc04,$Tbl1,$acc04
519	rlwinm	$acc12,$s3,`0+3`,21,28
520	lwzx	$acc05,$Tbl1,$acc05
521	rlwinm	$acc13,$s0,`0+3`,21,28
522	lwzx	$acc06,$Tbl1,$acc06
523	rlwinm	$acc14,$s1,`0+3`,21,28
524	lwzx	$acc07,$Tbl1,$acc07
525	rlwinm	$acc15,$s2,`0+3`,21,28
526	lwzx	$acc08,$Tbl2,$acc08
527	xor	$t0,$t0,$acc00
528	lwzx	$acc09,$Tbl2,$acc09
529	xor	$t1,$t1,$acc01
530	lwzx	$acc10,$Tbl2,$acc10
531	xor	$t2,$t2,$acc02
532	lwzx	$acc11,$Tbl2,$acc11
533	xor	$t3,$t3,$acc03
534	lwzx	$acc12,$Tbl3,$acc12
535	xor	$t0,$t0,$acc04
536	lwzx	$acc13,$Tbl3,$acc13
537	xor	$t1,$t1,$acc05
538	lwzx	$acc14,$Tbl3,$acc14
539	xor	$t2,$t2,$acc06
540	lwzx	$acc15,$Tbl3,$acc15
541	xor	$t3,$t3,$acc07
542	xor	$t0,$t0,$acc08
543	xor	$t1,$t1,$acc09
544	xor	$t2,$t2,$acc10
545	xor	$t3,$t3,$acc11
546	xor	$s0,$t0,$acc12
547	xor	$s1,$t1,$acc13
548	xor	$s2,$t2,$acc14
549	xor	$s3,$t3,$acc15
550	addi	$key,$key,16
551	bdnz-	Lenc_loop
552
553	addi	$Tbl2,$Tbl0,2048
554	nop
555	lwz	$t0,0($key)
556	rlwinm	$acc00,$s0,`32-24`,24,31
557	lwz	$t1,4($key)
558	rlwinm	$acc01,$s1,`32-24`,24,31
559	lwz	$t2,8($key)
560	rlwinm	$acc02,$s2,`32-24`,24,31
561	lwz	$t3,12($key)
562	rlwinm	$acc03,$s3,`32-24`,24,31
563	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Te4
564	rlwinm	$acc04,$s1,`32-16`,24,31
565	lwz	$acc09,`2048+32`($Tbl0)
566	rlwinm	$acc05,$s2,`32-16`,24,31
567	lwz	$acc10,`2048+64`($Tbl0)
568	rlwinm	$acc06,$s3,`32-16`,24,31
569	lwz	$acc11,`2048+96`($Tbl0)
570	rlwinm	$acc07,$s0,`32-16`,24,31
571	lwz	$acc12,`2048+128`($Tbl0)
572	rlwinm	$acc08,$s2,`32-8`,24,31
573	lwz	$acc13,`2048+160`($Tbl0)
574	rlwinm	$acc09,$s3,`32-8`,24,31
575	lwz	$acc14,`2048+192`($Tbl0)
576	rlwinm	$acc10,$s0,`32-8`,24,31
577	lwz	$acc15,`2048+224`($Tbl0)
578	rlwinm	$acc11,$s1,`32-8`,24,31
579	lbzx	$acc00,$Tbl2,$acc00
580	rlwinm	$acc12,$s3,`0`,24,31
581	lbzx	$acc01,$Tbl2,$acc01
582	rlwinm	$acc13,$s0,`0`,24,31
583	lbzx	$acc02,$Tbl2,$acc02
584	rlwinm	$acc14,$s1,`0`,24,31
585	lbzx	$acc03,$Tbl2,$acc03
586	rlwinm	$acc15,$s2,`0`,24,31
587	lbzx	$acc04,$Tbl2,$acc04
588	rlwinm	$s0,$acc00,24,0,7
589	lbzx	$acc05,$Tbl2,$acc05
590	rlwinm	$s1,$acc01,24,0,7
591	lbzx	$acc06,$Tbl2,$acc06
592	rlwinm	$s2,$acc02,24,0,7
593	lbzx	$acc07,$Tbl2,$acc07
594	rlwinm	$s3,$acc03,24,0,7
595	lbzx	$acc08,$Tbl2,$acc08
596	rlwimi	$s0,$acc04,16,8,15
597	lbzx	$acc09,$Tbl2,$acc09
598	rlwimi	$s1,$acc05,16,8,15
599	lbzx	$acc10,$Tbl2,$acc10
600	rlwimi	$s2,$acc06,16,8,15
601	lbzx	$acc11,$Tbl2,$acc11
602	rlwimi	$s3,$acc07,16,8,15
603	lbzx	$acc12,$Tbl2,$acc12
604	rlwimi	$s0,$acc08,8,16,23
605	lbzx	$acc13,$Tbl2,$acc13
606	rlwimi	$s1,$acc09,8,16,23
607	lbzx	$acc14,$Tbl2,$acc14
608	rlwimi	$s2,$acc10,8,16,23
609	lbzx	$acc15,$Tbl2,$acc15
610	rlwimi	$s3,$acc11,8,16,23
611	or	$s0,$s0,$acc12
612	or	$s1,$s1,$acc13
613	or	$s2,$s2,$acc14
614	or	$s3,$s3,$acc15
615	xor	$s0,$s0,$t0
616	xor	$s1,$s1,$t1
617	xor	$s2,$s2,$t2
618	xor	$s3,$s3,$t3
619	blr
620	.long	0
621	.byte	0,12,0x14,0,0,0,0,0
622
623.align	4
624Lppc_AES_encrypt_compact:
625	lwz	$acc00,240($key)
626	addi	$Tbl1,$Tbl0,2048
627	lwz	$t0,0($key)
628	lis	$mask80,0x8080
629	lwz	$t1,4($key)
630	lis	$mask1b,0x1b1b
631	lwz	$t2,8($key)
632	ori	$mask80,$mask80,0x8080
633	lwz	$t3,12($key)
634	ori	$mask1b,$mask1b,0x1b1b
635	addi	$key,$key,16
636	mtctr	$acc00
637.align	4
638Lenc_compact_loop:
639	xor	$s0,$s0,$t0
640	xor	$s1,$s1,$t1
641	rlwinm	$acc00,$s0,`32-24`,24,31
642	xor	$s2,$s2,$t2
643	rlwinm	$acc01,$s1,`32-24`,24,31
644	xor	$s3,$s3,$t3
645	rlwinm	$acc02,$s2,`32-24`,24,31
646	rlwinm	$acc03,$s3,`32-24`,24,31
647	rlwinm	$acc04,$s1,`32-16`,24,31
648	rlwinm	$acc05,$s2,`32-16`,24,31
649	rlwinm	$acc06,$s3,`32-16`,24,31
650	rlwinm	$acc07,$s0,`32-16`,24,31
651	lbzx	$acc00,$Tbl1,$acc00
652	rlwinm	$acc08,$s2,`32-8`,24,31
653	lbzx	$acc01,$Tbl1,$acc01
654	rlwinm	$acc09,$s3,`32-8`,24,31
655	lbzx	$acc02,$Tbl1,$acc02
656	rlwinm	$acc10,$s0,`32-8`,24,31
657	lbzx	$acc03,$Tbl1,$acc03
658	rlwinm	$acc11,$s1,`32-8`,24,31
659	lbzx	$acc04,$Tbl1,$acc04
660	rlwinm	$acc12,$s3,`0`,24,31
661	lbzx	$acc05,$Tbl1,$acc05
662	rlwinm	$acc13,$s0,`0`,24,31
663	lbzx	$acc06,$Tbl1,$acc06
664	rlwinm	$acc14,$s1,`0`,24,31
665	lbzx	$acc07,$Tbl1,$acc07
666	rlwinm	$acc15,$s2,`0`,24,31
667	lbzx	$acc08,$Tbl1,$acc08
668	rlwinm	$s0,$acc00,24,0,7
669	lbzx	$acc09,$Tbl1,$acc09
670	rlwinm	$s1,$acc01,24,0,7
671	lbzx	$acc10,$Tbl1,$acc10
672	rlwinm	$s2,$acc02,24,0,7
673	lbzx	$acc11,$Tbl1,$acc11
674	rlwinm	$s3,$acc03,24,0,7
675	lbzx	$acc12,$Tbl1,$acc12
676	rlwimi	$s0,$acc04,16,8,15
677	lbzx	$acc13,$Tbl1,$acc13
678	rlwimi	$s1,$acc05,16,8,15
679	lbzx	$acc14,$Tbl1,$acc14
680	rlwimi	$s2,$acc06,16,8,15
681	lbzx	$acc15,$Tbl1,$acc15
682	rlwimi	$s3,$acc07,16,8,15
683	rlwimi	$s0,$acc08,8,16,23
684	rlwimi	$s1,$acc09,8,16,23
685	rlwimi	$s2,$acc10,8,16,23
686	rlwimi	$s3,$acc11,8,16,23
687	lwz	$t0,0($key)
688	or	$s0,$s0,$acc12
689	lwz	$t1,4($key)
690	or	$s1,$s1,$acc13
691	lwz	$t2,8($key)
692	or	$s2,$s2,$acc14
693	lwz	$t3,12($key)
694	or	$s3,$s3,$acc15
695
696	addi	$key,$key,16
697	bdz	Lenc_compact_done
698
699	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
700	and	$acc01,$s1,$mask80
701	and	$acc02,$s2,$mask80
702	and	$acc03,$s3,$mask80
703	srwi	$acc04,$acc00,7		# r1>>7
704	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
705	srwi	$acc05,$acc01,7
706	andc	$acc09,$s1,$mask80
707	srwi	$acc06,$acc02,7
708	andc	$acc10,$s2,$mask80
709	srwi	$acc07,$acc03,7
710	andc	$acc11,$s3,$mask80
711	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
712	sub	$acc01,$acc01,$acc05
713	sub	$acc02,$acc02,$acc06
714	sub	$acc03,$acc03,$acc07
715	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
716	add	$acc09,$acc09,$acc09
717	add	$acc10,$acc10,$acc10
718	add	$acc11,$acc11,$acc11
719	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
720	and	$acc01,$acc01,$mask1b
721	and	$acc02,$acc02,$mask1b
722	and	$acc03,$acc03,$mask1b
723	xor	$acc00,$acc00,$acc08	# r2
724	xor	$acc01,$acc01,$acc09
725	 rotlwi	$acc12,$s0,16		# ROTATE(r0,16)
726	xor	$acc02,$acc02,$acc10
727	 rotlwi	$acc13,$s1,16
728	xor	$acc03,$acc03,$acc11
729	 rotlwi	$acc14,$s2,16
730
731	xor	$s0,$s0,$acc00		# r0^r2
732	rotlwi	$acc15,$s3,16
733	xor	$s1,$s1,$acc01
734	rotrwi	$s0,$s0,24		# ROTATE(r2^r0,24)
735	xor	$s2,$s2,$acc02
736	rotrwi	$s1,$s1,24
737	xor	$s3,$s3,$acc03
738	rotrwi	$s2,$s2,24
739	xor	$s0,$s0,$acc00		# ROTATE(r2^r0,24)^r2
740	rotrwi	$s3,$s3,24
741	xor	$s1,$s1,$acc01
742	xor	$s2,$s2,$acc02
743	xor	$s3,$s3,$acc03
744	rotlwi	$acc08,$acc12,8		# ROTATE(r0,24)
745	xor	$s0,$s0,$acc12		#
746	rotlwi	$acc09,$acc13,8
747	xor	$s1,$s1,$acc13
748	rotlwi	$acc10,$acc14,8
749	xor	$s2,$s2,$acc14
750	rotlwi	$acc11,$acc15,8
751	xor	$s3,$s3,$acc15
752	xor	$s0,$s0,$acc08		#
753	xor	$s1,$s1,$acc09
754	xor	$s2,$s2,$acc10
755	xor	$s3,$s3,$acc11
756
757	b	Lenc_compact_loop
758.align	4
759Lenc_compact_done:
760	xor	$s0,$s0,$t0
761	xor	$s1,$s1,$t1
762	xor	$s2,$s2,$t2
763	xor	$s3,$s3,$t3
764	blr
765	.long	0
766	.byte	0,12,0x14,0,0,0,0,0
767
768.globl	.AES_decrypt
769.align	7
770.AES_decrypt:
771	$STU	$sp,-$FRAME($sp)
772	mflr	r0
773
774	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
775	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
776	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
777	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
778	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
779	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
780	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
781	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
782	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
783	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
784	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
785	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
786	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
787	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
788	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
789	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
790	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
791	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
792	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
793	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
794	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
795
796	andi.	$t0,$inp,3
797	andi.	$t1,$out,3
798	or.	$t0,$t0,$t1
799	bne	Ldec_unaligned
800
801Ldec_unaligned_ok:
802	lwz	$s0,0($inp)
803	lwz	$s1,4($inp)
804	lwz	$s2,8($inp)
805	lwz	$s3,12($inp)
806	bl	LAES_Td
807	bl	Lppc_AES_decrypt_compact
808	stw	$s0,0($out)
809	stw	$s1,4($out)
810	stw	$s2,8($out)
811	stw	$s3,12($out)
812	b	Ldec_done
813
814Ldec_unaligned:
815	subfic	$t0,$inp,4096
816	subfic	$t1,$out,4096
817	andi.	$t0,$t0,4096-16
818	beq	Ldec_xpage
819	andi.	$t1,$t1,4096-16
820	bne	Ldec_unaligned_ok
821
822Ldec_xpage:
823	lbz	$acc00,0($inp)
824	lbz	$acc01,1($inp)
825	lbz	$acc02,2($inp)
826	lbz	$s0,3($inp)
827	lbz	$acc04,4($inp)
828	lbz	$acc05,5($inp)
829	lbz	$acc06,6($inp)
830	lbz	$s1,7($inp)
831	lbz	$acc08,8($inp)
832	lbz	$acc09,9($inp)
833	lbz	$acc10,10($inp)
834	insrwi	$s0,$acc00,8,0
835	lbz	$s2,11($inp)
836	insrwi	$s1,$acc04,8,0
837	lbz	$acc12,12($inp)
838	insrwi	$s0,$acc01,8,8
839	lbz	$acc13,13($inp)
840	insrwi	$s1,$acc05,8,8
841	lbz	$acc14,14($inp)
842	insrwi	$s0,$acc02,8,16
843	lbz	$s3,15($inp)
844	insrwi	$s1,$acc06,8,16
845	insrwi	$s2,$acc08,8,0
846	insrwi	$s3,$acc12,8,0
847	insrwi	$s2,$acc09,8,8
848	insrwi	$s3,$acc13,8,8
849	insrwi	$s2,$acc10,8,16
850	insrwi	$s3,$acc14,8,16
851
852	bl	LAES_Td
853	bl	Lppc_AES_decrypt_compact
854
855	extrwi	$acc00,$s0,8,0
856	extrwi	$acc01,$s0,8,8
857	stb	$acc00,0($out)
858	extrwi	$acc02,$s0,8,16
859	stb	$acc01,1($out)
860	stb	$acc02,2($out)
861	extrwi	$acc04,$s1,8,0
862	stb	$s0,3($out)
863	extrwi	$acc05,$s1,8,8
864	stb	$acc04,4($out)
865	extrwi	$acc06,$s1,8,16
866	stb	$acc05,5($out)
867	stb	$acc06,6($out)
868	extrwi	$acc08,$s2,8,0
869	stb	$s1,7($out)
870	extrwi	$acc09,$s2,8,8
871	stb	$acc08,8($out)
872	extrwi	$acc10,$s2,8,16
873	stb	$acc09,9($out)
874	stb	$acc10,10($out)
875	extrwi	$acc12,$s3,8,0
876	stb	$s2,11($out)
877	extrwi	$acc13,$s3,8,8
878	stb	$acc12,12($out)
879	extrwi	$acc14,$s3,8,16
880	stb	$acc13,13($out)
881	stb	$acc14,14($out)
882	stb	$s3,15($out)
883
884Ldec_done:
885	$POP	r0,`$FRAME+$LRSAVE`($sp)
886	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
887	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
888	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
889	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
890	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
891	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
892	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
893	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
894	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
895	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
896	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
897	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
898	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
899	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
900	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
901	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
902	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
903	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
904	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
905	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
906	mtlr	r0
907	addi	$sp,$sp,$FRAME
908	blr
909	.long	0
910	.byte	0,12,4,1,0x80,18,3,0
911	.long	0
912
913.align	5
914Lppc_AES_decrypt:
915	lwz	$acc00,240($key)
916	addi	$Tbl1,$Tbl0,3
917	lwz	$t0,0($key)
918	addi	$Tbl2,$Tbl0,2
919	lwz	$t1,4($key)
920	addi	$Tbl3,$Tbl0,1
921	lwz	$t2,8($key)
922	addi	$acc00,$acc00,-1
923	lwz	$t3,12($key)
924	addi	$key,$key,16
925	xor	$s0,$s0,$t0
926	xor	$s1,$s1,$t1
927	xor	$s2,$s2,$t2
928	xor	$s3,$s3,$t3
929	mtctr	$acc00
930.align	4
931Ldec_loop:
932	rlwinm	$acc00,$s0,`32-24+3`,21,28
933	rlwinm	$acc01,$s1,`32-24+3`,21,28
934	rlwinm	$acc02,$s2,`32-24+3`,21,28
935	rlwinm	$acc03,$s3,`32-24+3`,21,28
936	lwz	$t0,0($key)
937	rlwinm	$acc04,$s3,`32-16+3`,21,28
938	lwz	$t1,4($key)
939	rlwinm	$acc05,$s0,`32-16+3`,21,28
940	lwz	$t2,8($key)
941	rlwinm	$acc06,$s1,`32-16+3`,21,28
942	lwz	$t3,12($key)
943	rlwinm	$acc07,$s2,`32-16+3`,21,28
944	lwzx	$acc00,$Tbl0,$acc00
945	rlwinm	$acc08,$s2,`32-8+3`,21,28
946	lwzx	$acc01,$Tbl0,$acc01
947	rlwinm	$acc09,$s3,`32-8+3`,21,28
948	lwzx	$acc02,$Tbl0,$acc02
949	rlwinm	$acc10,$s0,`32-8+3`,21,28
950	lwzx	$acc03,$Tbl0,$acc03
951	rlwinm	$acc11,$s1,`32-8+3`,21,28
952	lwzx	$acc04,$Tbl1,$acc04
953	rlwinm	$acc12,$s1,`0+3`,21,28
954	lwzx	$acc05,$Tbl1,$acc05
955	rlwinm	$acc13,$s2,`0+3`,21,28
956	lwzx	$acc06,$Tbl1,$acc06
957	rlwinm	$acc14,$s3,`0+3`,21,28
958	lwzx	$acc07,$Tbl1,$acc07
959	rlwinm	$acc15,$s0,`0+3`,21,28
960	lwzx	$acc08,$Tbl2,$acc08
961	xor	$t0,$t0,$acc00
962	lwzx	$acc09,$Tbl2,$acc09
963	xor	$t1,$t1,$acc01
964	lwzx	$acc10,$Tbl2,$acc10
965	xor	$t2,$t2,$acc02
966	lwzx	$acc11,$Tbl2,$acc11
967	xor	$t3,$t3,$acc03
968	lwzx	$acc12,$Tbl3,$acc12
969	xor	$t0,$t0,$acc04
970	lwzx	$acc13,$Tbl3,$acc13
971	xor	$t1,$t1,$acc05
972	lwzx	$acc14,$Tbl3,$acc14
973	xor	$t2,$t2,$acc06
974	lwzx	$acc15,$Tbl3,$acc15
975	xor	$t3,$t3,$acc07
976	xor	$t0,$t0,$acc08
977	xor	$t1,$t1,$acc09
978	xor	$t2,$t2,$acc10
979	xor	$t3,$t3,$acc11
980	xor	$s0,$t0,$acc12
981	xor	$s1,$t1,$acc13
982	xor	$s2,$t2,$acc14
983	xor	$s3,$t3,$acc15
984	addi	$key,$key,16
985	bdnz-	Ldec_loop
986
987	addi	$Tbl2,$Tbl0,2048
988	nop
989	lwz	$t0,0($key)
990	rlwinm	$acc00,$s0,`32-24`,24,31
991	lwz	$t1,4($key)
992	rlwinm	$acc01,$s1,`32-24`,24,31
993	lwz	$t2,8($key)
994	rlwinm	$acc02,$s2,`32-24`,24,31
995	lwz	$t3,12($key)
996	rlwinm	$acc03,$s3,`32-24`,24,31
997	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Td4
998	rlwinm	$acc04,$s3,`32-16`,24,31
999	lwz	$acc09,`2048+32`($Tbl0)
1000	rlwinm	$acc05,$s0,`32-16`,24,31
1001	lwz	$acc10,`2048+64`($Tbl0)
1002	lbzx	$acc00,$Tbl2,$acc00
1003	lwz	$acc11,`2048+96`($Tbl0)
1004	lbzx	$acc01,$Tbl2,$acc01
1005	lwz	$acc12,`2048+128`($Tbl0)
1006	rlwinm	$acc06,$s1,`32-16`,24,31
1007	lwz	$acc13,`2048+160`($Tbl0)
1008	rlwinm	$acc07,$s2,`32-16`,24,31
1009	lwz	$acc14,`2048+192`($Tbl0)
1010	rlwinm	$acc08,$s2,`32-8`,24,31
1011	lwz	$acc15,`2048+224`($Tbl0)
1012	rlwinm	$acc09,$s3,`32-8`,24,31
1013	lbzx	$acc02,$Tbl2,$acc02
1014	rlwinm	$acc10,$s0,`32-8`,24,31
1015	lbzx	$acc03,$Tbl2,$acc03
1016	rlwinm	$acc11,$s1,`32-8`,24,31
1017	lbzx	$acc04,$Tbl2,$acc04
1018	rlwinm	$acc12,$s1,`0`,24,31
1019	lbzx	$acc05,$Tbl2,$acc05
1020	rlwinm	$acc13,$s2,`0`,24,31
1021	lbzx	$acc06,$Tbl2,$acc06
1022	rlwinm	$acc14,$s3,`0`,24,31
1023	lbzx	$acc07,$Tbl2,$acc07
1024	rlwinm	$acc15,$s0,`0`,24,31
1025	lbzx	$acc08,$Tbl2,$acc08
1026	rlwinm	$s0,$acc00,24,0,7
1027	lbzx	$acc09,$Tbl2,$acc09
1028	rlwinm	$s1,$acc01,24,0,7
1029	lbzx	$acc10,$Tbl2,$acc10
1030	rlwinm	$s2,$acc02,24,0,7
1031	lbzx	$acc11,$Tbl2,$acc11
1032	rlwinm	$s3,$acc03,24,0,7
1033	lbzx	$acc12,$Tbl2,$acc12
1034	rlwimi	$s0,$acc04,16,8,15
1035	lbzx	$acc13,$Tbl2,$acc13
1036	rlwimi	$s1,$acc05,16,8,15
1037	lbzx	$acc14,$Tbl2,$acc14
1038	rlwimi	$s2,$acc06,16,8,15
1039	lbzx	$acc15,$Tbl2,$acc15
1040	rlwimi	$s3,$acc07,16,8,15
1041	rlwimi	$s0,$acc08,8,16,23
1042	rlwimi	$s1,$acc09,8,16,23
1043	rlwimi	$s2,$acc10,8,16,23
1044	rlwimi	$s3,$acc11,8,16,23
1045	or	$s0,$s0,$acc12
1046	or	$s1,$s1,$acc13
1047	or	$s2,$s2,$acc14
1048	or	$s3,$s3,$acc15
1049	xor	$s0,$s0,$t0
1050	xor	$s1,$s1,$t1
1051	xor	$s2,$s2,$t2
1052	xor	$s3,$s3,$t3
1053	blr
1054	.long	0
1055	.byte	0,12,0x14,0,0,0,0,0
1056
1057.align	4
1058Lppc_AES_decrypt_compact:
1059	lwz	$acc00,240($key)
1060	addi	$Tbl1,$Tbl0,2048
1061	lwz	$t0,0($key)
1062	lis	$mask80,0x8080
1063	lwz	$t1,4($key)
1064	lis	$mask1b,0x1b1b
1065	lwz	$t2,8($key)
1066	ori	$mask80,$mask80,0x8080
1067	lwz	$t3,12($key)
1068	ori	$mask1b,$mask1b,0x1b1b
1069	addi	$key,$key,16
1070___
1071$code.=<<___ if ($SIZE_T==8);
1072	insrdi	$mask80,$mask80,32,0
1073	insrdi	$mask1b,$mask1b,32,0
1074___
1075$code.=<<___;
1076	mtctr	$acc00
1077.align	4
1078Ldec_compact_loop:
1079	xor	$s0,$s0,$t0
1080	xor	$s1,$s1,$t1
1081	rlwinm	$acc00,$s0,`32-24`,24,31
1082	xor	$s2,$s2,$t2
1083	rlwinm	$acc01,$s1,`32-24`,24,31
1084	xor	$s3,$s3,$t3
1085	rlwinm	$acc02,$s2,`32-24`,24,31
1086	rlwinm	$acc03,$s3,`32-24`,24,31
1087	rlwinm	$acc04,$s3,`32-16`,24,31
1088	rlwinm	$acc05,$s0,`32-16`,24,31
1089	rlwinm	$acc06,$s1,`32-16`,24,31
1090	rlwinm	$acc07,$s2,`32-16`,24,31
1091	lbzx	$acc00,$Tbl1,$acc00
1092	rlwinm	$acc08,$s2,`32-8`,24,31
1093	lbzx	$acc01,$Tbl1,$acc01
1094	rlwinm	$acc09,$s3,`32-8`,24,31
1095	lbzx	$acc02,$Tbl1,$acc02
1096	rlwinm	$acc10,$s0,`32-8`,24,31
1097	lbzx	$acc03,$Tbl1,$acc03
1098	rlwinm	$acc11,$s1,`32-8`,24,31
1099	lbzx	$acc04,$Tbl1,$acc04
1100	rlwinm	$acc12,$s1,`0`,24,31
1101	lbzx	$acc05,$Tbl1,$acc05
1102	rlwinm	$acc13,$s2,`0`,24,31
1103	lbzx	$acc06,$Tbl1,$acc06
1104	rlwinm	$acc14,$s3,`0`,24,31
1105	lbzx	$acc07,$Tbl1,$acc07
1106	rlwinm	$acc15,$s0,`0`,24,31
1107	lbzx	$acc08,$Tbl1,$acc08
1108	rlwinm	$s0,$acc00,24,0,7
1109	lbzx	$acc09,$Tbl1,$acc09
1110	rlwinm	$s1,$acc01,24,0,7
1111	lbzx	$acc10,$Tbl1,$acc10
1112	rlwinm	$s2,$acc02,24,0,7
1113	lbzx	$acc11,$Tbl1,$acc11
1114	rlwinm	$s3,$acc03,24,0,7
1115	lbzx	$acc12,$Tbl1,$acc12
1116	rlwimi	$s0,$acc04,16,8,15
1117	lbzx	$acc13,$Tbl1,$acc13
1118	rlwimi	$s1,$acc05,16,8,15
1119	lbzx	$acc14,$Tbl1,$acc14
1120	rlwimi	$s2,$acc06,16,8,15
1121	lbzx	$acc15,$Tbl1,$acc15
1122	rlwimi	$s3,$acc07,16,8,15
1123	rlwimi	$s0,$acc08,8,16,23
1124	rlwimi	$s1,$acc09,8,16,23
1125	rlwimi	$s2,$acc10,8,16,23
1126	rlwimi	$s3,$acc11,8,16,23
1127	lwz	$t0,0($key)
1128	or	$s0,$s0,$acc12
1129	lwz	$t1,4($key)
1130	or	$s1,$s1,$acc13
1131	lwz	$t2,8($key)
1132	or	$s2,$s2,$acc14
1133	lwz	$t3,12($key)
1134	or	$s3,$s3,$acc15
1135
1136	addi	$key,$key,16
1137	bdz	Ldec_compact_done
1138___
1139$code.=<<___ if ($SIZE_T==8);
1140	# vectorized permutation improves decrypt performance by 10%
1141	insrdi	$s0,$s1,32,0
1142	insrdi	$s2,$s3,32,0
1143
1144	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
1145	and	$acc02,$s2,$mask80
1146	srdi	$acc04,$acc00,7		# r1>>7
1147	srdi	$acc06,$acc02,7
1148	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
1149	andc	$acc10,$s2,$mask80
1150	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
1151	sub	$acc02,$acc02,$acc06
1152	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
1153	add	$acc10,$acc10,$acc10
1154	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1155	and	$acc02,$acc02,$mask1b
1156	xor	$acc00,$acc00,$acc08	# r2
1157	xor	$acc02,$acc02,$acc10
1158
1159	and	$acc04,$acc00,$mask80	# r1=r2&0x80808080
1160	and	$acc06,$acc02,$mask80
1161	srdi	$acc08,$acc04,7		# r1>>7
1162	srdi	$acc10,$acc06,7
1163	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
1164	andc	$acc14,$acc02,$mask80
1165	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
1166	sub	$acc06,$acc06,$acc10
1167	add	$acc12,$acc12,$acc12	# (r2&0x7f7f7f7f)<<1
1168	add	$acc14,$acc14,$acc14
1169	and	$acc04,$acc04,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1170	and	$acc06,$acc06,$mask1b
1171	xor	$acc04,$acc04,$acc12	# r4
1172	xor	$acc06,$acc06,$acc14
1173
1174	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
1175	and	$acc10,$acc06,$mask80
1176	srdi	$acc12,$acc08,7		# r1>>7
1177	srdi	$acc14,$acc10,7
1178	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
1179	sub	$acc10,$acc10,$acc14
1180	andc	$acc12,$acc04,$mask80	# r4&0x7f7f7f7f
1181	andc	$acc14,$acc06,$mask80
1182	add	$acc12,$acc12,$acc12	# (r4&0x7f7f7f7f)<<1
1183	add	$acc14,$acc14,$acc14
1184	and	$acc08,$acc08,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1185	and	$acc10,$acc10,$mask1b
1186	xor	$acc08,$acc08,$acc12	# r8
1187	xor	$acc10,$acc10,$acc14
1188
1189	xor	$acc00,$acc00,$s0	# r2^r0
1190	xor	$acc02,$acc02,$s2
1191	xor	$acc04,$acc04,$s0	# r4^r0
1192	xor	$acc06,$acc06,$s2
1193
1194	extrdi	$acc01,$acc00,32,0
1195	extrdi	$acc03,$acc02,32,0
1196	extrdi	$acc05,$acc04,32,0
1197	extrdi	$acc07,$acc06,32,0
1198	extrdi	$acc09,$acc08,32,0
1199	extrdi	$acc11,$acc10,32,0
1200___
1201$code.=<<___ if ($SIZE_T==4);
1202	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
1203	and	$acc01,$s1,$mask80
1204	and	$acc02,$s2,$mask80
1205	and	$acc03,$s3,$mask80
1206	srwi	$acc04,$acc00,7		# r1>>7
1207	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
1208	srwi	$acc05,$acc01,7
1209	andc	$acc09,$s1,$mask80
1210	srwi	$acc06,$acc02,7
1211	andc	$acc10,$s2,$mask80
1212	srwi	$acc07,$acc03,7
1213	andc	$acc11,$s3,$mask80
1214	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
1215	sub	$acc01,$acc01,$acc05
1216	sub	$acc02,$acc02,$acc06
1217	sub	$acc03,$acc03,$acc07
1218	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
1219	add	$acc09,$acc09,$acc09
1220	add	$acc10,$acc10,$acc10
1221	add	$acc11,$acc11,$acc11
1222	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1223	and	$acc01,$acc01,$mask1b
1224	and	$acc02,$acc02,$mask1b
1225	and	$acc03,$acc03,$mask1b
1226	xor	$acc00,$acc00,$acc08	# r2
1227	xor	$acc01,$acc01,$acc09
1228	xor	$acc02,$acc02,$acc10
1229	xor	$acc03,$acc03,$acc11
1230
1231	and	$acc04,$acc00,$mask80	# r1=r2&0x80808080
1232	and	$acc05,$acc01,$mask80
1233	and	$acc06,$acc02,$mask80
1234	and	$acc07,$acc03,$mask80
1235	srwi	$acc08,$acc04,7		# r1>>7
1236	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
1237	srwi	$acc09,$acc05,7
1238	andc	$acc13,$acc01,$mask80
1239	srwi	$acc10,$acc06,7
1240	andc	$acc14,$acc02,$mask80
1241	srwi	$acc11,$acc07,7
1242	andc	$acc15,$acc03,$mask80
1243	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
1244	sub	$acc05,$acc05,$acc09
1245	sub	$acc06,$acc06,$acc10
1246	sub	$acc07,$acc07,$acc11
1247	add	$acc12,$acc12,$acc12	# (r2&0x7f7f7f7f)<<1
1248	add	$acc13,$acc13,$acc13
1249	add	$acc14,$acc14,$acc14
1250	add	$acc15,$acc15,$acc15
1251	and	$acc04,$acc04,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1252	and	$acc05,$acc05,$mask1b
1253	and	$acc06,$acc06,$mask1b
1254	and	$acc07,$acc07,$mask1b
1255	xor	$acc04,$acc04,$acc12	# r4
1256	xor	$acc05,$acc05,$acc13
1257	xor	$acc06,$acc06,$acc14
1258	xor	$acc07,$acc07,$acc15
1259
1260	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
1261	and	$acc09,$acc05,$mask80
1262	srwi	$acc12,$acc08,7		# r1>>7
1263	and	$acc10,$acc06,$mask80
1264	srwi	$acc13,$acc09,7
1265	and	$acc11,$acc07,$mask80
1266	srwi	$acc14,$acc10,7
1267	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
1268	srwi	$acc15,$acc11,7
1269	sub	$acc09,$acc09,$acc13
1270	sub	$acc10,$acc10,$acc14
1271	sub	$acc11,$acc11,$acc15
1272	andc	$acc12,$acc04,$mask80	# r4&0x7f7f7f7f
1273	andc	$acc13,$acc05,$mask80
1274	andc	$acc14,$acc06,$mask80
1275	andc	$acc15,$acc07,$mask80
1276	add	$acc12,$acc12,$acc12	# (r4&0x7f7f7f7f)<<1
1277	add	$acc13,$acc13,$acc13
1278	add	$acc14,$acc14,$acc14
1279	add	$acc15,$acc15,$acc15
1280	and	$acc08,$acc08,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1281	and	$acc09,$acc09,$mask1b
1282	and	$acc10,$acc10,$mask1b
1283	and	$acc11,$acc11,$mask1b
1284	xor	$acc08,$acc08,$acc12	# r8
1285	xor	$acc09,$acc09,$acc13
1286	xor	$acc10,$acc10,$acc14
1287	xor	$acc11,$acc11,$acc15
1288
1289	xor	$acc00,$acc00,$s0	# r2^r0
1290	xor	$acc01,$acc01,$s1
1291	xor	$acc02,$acc02,$s2
1292	xor	$acc03,$acc03,$s3
1293	xor	$acc04,$acc04,$s0	# r4^r0
1294	xor	$acc05,$acc05,$s1
1295	xor	$acc06,$acc06,$s2
1296	xor	$acc07,$acc07,$s3
1297___
1298$code.=<<___;
1299	rotrwi	$s0,$s0,8		# = ROTATE(r0,8)
1300	rotrwi	$s1,$s1,8
1301	xor	$s0,$s0,$acc00		# ^= r2^r0
1302	rotrwi	$s2,$s2,8
1303	xor	$s1,$s1,$acc01
1304	rotrwi	$s3,$s3,8
1305	xor	$s2,$s2,$acc02
1306	xor	$s3,$s3,$acc03
1307	xor	$acc00,$acc00,$acc08
1308	xor	$acc01,$acc01,$acc09
1309	xor	$acc02,$acc02,$acc10
1310	xor	$acc03,$acc03,$acc11
1311	xor	$s0,$s0,$acc04		# ^= r4^r0
1312	rotrwi	$acc00,$acc00,24
1313	xor	$s1,$s1,$acc05
1314	rotrwi	$acc01,$acc01,24
1315	xor	$s2,$s2,$acc06
1316	rotrwi	$acc02,$acc02,24
1317	xor	$s3,$s3,$acc07
1318	rotrwi	$acc03,$acc03,24
1319	xor	$acc04,$acc04,$acc08
1320	xor	$acc05,$acc05,$acc09
1321	xor	$acc06,$acc06,$acc10
1322	xor	$acc07,$acc07,$acc11
1323	xor	$s0,$s0,$acc08		# ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1324	rotrwi	$acc04,$acc04,16
1325	xor	$s1,$s1,$acc09
1326	rotrwi	$acc05,$acc05,16
1327	xor	$s2,$s2,$acc10
1328	rotrwi	$acc06,$acc06,16
1329	xor	$s3,$s3,$acc11
1330	rotrwi	$acc07,$acc07,16
1331	xor	$s0,$s0,$acc00		# ^= ROTATE(r8^r2^r0,24)
1332	rotrwi	$acc08,$acc08,8
1333	xor	$s1,$s1,$acc01
1334	rotrwi	$acc09,$acc09,8
1335	xor	$s2,$s2,$acc02
1336	rotrwi	$acc10,$acc10,8
1337	xor	$s3,$s3,$acc03
1338	rotrwi	$acc11,$acc11,8
1339	xor	$s0,$s0,$acc04		# ^= ROTATE(r8^r4^r0,16)
1340	xor	$s1,$s1,$acc05
1341	xor	$s2,$s2,$acc06
1342	xor	$s3,$s3,$acc07
1343	xor	$s0,$s0,$acc08		# ^= ROTATE(r8,8)
1344	xor	$s1,$s1,$acc09
1345	xor	$s2,$s2,$acc10
1346	xor	$s3,$s3,$acc11
1347
1348	b	Ldec_compact_loop
1349.align	4
1350Ldec_compact_done:
1351	xor	$s0,$s0,$t0
1352	xor	$s1,$s1,$t1
1353	xor	$s2,$s2,$t2
1354	xor	$s3,$s3,$t3
1355	blr
1356	.long	0
1357	.byte	0,12,0x14,0,0,0,0,0
1358
1359.asciz	"AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1360.align	7
1361___
1362
1363$code =~ s/\`([^\`]*)\`/eval $1/gem;
1364print $code;
1365close STDOUT;
1366