aes-mips.pl revision 302408
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for MIPS
11
12# October 2010
13#
14# Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
15# spends ~68 cycles per byte processed with 128-bit key. This is ~16%
16# faster than gcc-generated code, which is not very impressive. But
17# recall that compressed S-box requires extra processing, namely
18# additional rotations. Rotations are implemented with lwl/lwr pairs,
19# which is normally used for loading unaligned data. Another cool
20# thing about this module is its endian neutrality, which means that
21# it processes data without ever changing byte order...
22
23# September 2012
24#
25# Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
26# ~25% less instructions) code. Note that there is no run-time switch,
27# instead, code path is chosen upon pre-process time, pass -mips32r2
28# or/and -msmartmips.
29
30######################################################################
31# There is a number of MIPS ABI in use, O32 and N32/64 are most
32# widely used. Then there is a new contender: NUBI. It appears that if
33# one picks the latter, it's possible to arrange code in ABI neutral
34# manner. Therefore let's stick to NUBI register layout:
35#
36($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
37($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
38($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
39($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
40#
41# The return value is placed in $a0. Following coding rules facilitate
42# interoperability:
43#
44# - never ever touch $tp, "thread pointer", former $gp;
45# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
46#   old code];
47# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
48#
49# For reference here is register layout for N32/64 MIPS ABIs:
50#
51# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
52# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
53# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
54# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
55# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
56#
57$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
58
59if ($flavour =~ /64|n32/i) {
60	$PTR_ADD="dadd";	# incidentally works even on n32
61	$PTR_SUB="dsub";	# incidentally works even on n32
62	$PTR_INS="dins";
63	$REG_S="sd";
64	$REG_L="ld";
65	$PTR_SLL="dsll";	# incidentally works even on n32
66	$SZREG=8;
67} else {
68	$PTR_ADD="add";
69	$PTR_SUB="sub";
70	$PTR_INS="ins";
71	$REG_S="sw";
72	$REG_L="lw";
73	$PTR_SLL="sll";
74	$SZREG=4;
75}
76$pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
77#
78# <appro@openssl.org>
79#
80######################################################################
81
82$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC});
83
84for (@ARGV) {	$output=$_ if (/^\w[\w\-]*\.\w+$/);	}
85open STDOUT,">$output";
86
87if (!defined($big_endian))
88{    $big_endian=(unpack('L',pack('N',1))==1);   }
89
90while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
91open STDOUT,">$output";
92
93my ($MSB,$LSB)=(0,3);	# automatically converted to little-endian
94
95$code.=<<___;
96.text
97#ifdef OPENSSL_FIPSCANISTER
98# include <openssl/fipssyms.h>
99#endif
100
101#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
102#define _MIPS_ARCH_MIPS32R2
103#endif
104
105#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
106.option	pic2
107#endif
108.set	noat
109___
110
111{{{
112my $FRAMESIZE=16*$SZREG;
113my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
114
115my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
116my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
117my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
118my ($key0,$cnt)=($gp,$fp);
119
120# instuction ordering is "stolen" from output from MIPSpro assembler
121# invoked with -mips3 -O3 arguments...
122$code.=<<___;
123.align	5
124.ent	_mips_AES_encrypt
125_mips_AES_encrypt:
126	.frame	$sp,0,$ra
127	.set	reorder
128	lw	$t0,0($key)
129	lw	$t1,4($key)
130	lw	$t2,8($key)
131	lw	$t3,12($key)
132	lw	$cnt,240($key)
133	$PTR_ADD $key0,$key,16
134
135	xor	$s0,$t0
136	xor	$s1,$t1
137	xor	$s2,$t2
138	xor	$s3,$t3
139
140	sub	$cnt,1
141#if defined(__mips_smartmips)
142	ext	$i0,$s1,16,8
143.Loop_enc:
144	ext	$i1,$s2,16,8
145	ext	$i2,$s3,16,8
146	ext	$i3,$s0,16,8
147	lwxs	$t0,$i0($Tbl)		# Te1[s1>>16]
148	ext	$i0,$s2,8,8
149	lwxs	$t1,$i1($Tbl)		# Te1[s2>>16]
150	ext	$i1,$s3,8,8
151	lwxs	$t2,$i2($Tbl)		# Te1[s3>>16]
152	ext	$i2,$s0,8,8
153	lwxs	$t3,$i3($Tbl)		# Te1[s0>>16]
154	ext	$i3,$s1,8,8
155
156	lwxs	$t4,$i0($Tbl)		# Te2[s2>>8]
157	ext	$i0,$s3,0,8
158	lwxs	$t5,$i1($Tbl)		# Te2[s3>>8]
159	ext	$i1,$s0,0,8
160	lwxs	$t6,$i2($Tbl)		# Te2[s0>>8]
161	ext	$i2,$s1,0,8
162	lwxs	$t7,$i3($Tbl)		# Te2[s1>>8]
163	ext	$i3,$s2,0,8
164
165	lwxs	$t8,$i0($Tbl)		# Te3[s3]
166	ext	$i0,$s0,24,8
167	lwxs	$t9,$i1($Tbl)		# Te3[s0]
168	ext	$i1,$s1,24,8
169	lwxs	$t10,$i2($Tbl)		# Te3[s1]
170	ext	$i2,$s2,24,8
171	lwxs	$t11,$i3($Tbl)		# Te3[s2]
172	ext	$i3,$s3,24,8
173
174	rotr	$t0,$t0,8
175	rotr	$t1,$t1,8
176	rotr	$t2,$t2,8
177	rotr	$t3,$t3,8
178
179	rotr	$t4,$t4,16
180	rotr	$t5,$t5,16
181	rotr	$t6,$t6,16
182	rotr	$t7,$t7,16
183
184	xor	$t0,$t4
185	lwxs	$t4,$i0($Tbl)		# Te0[s0>>24]
186	xor	$t1,$t5
187	lwxs	$t5,$i1($Tbl)		# Te0[s1>>24]
188	xor	$t2,$t6
189	lwxs	$t6,$i2($Tbl)		# Te0[s2>>24]
190	xor	$t3,$t7
191	lwxs	$t7,$i3($Tbl)		# Te0[s3>>24]
192
193	rotr	$t8,$t8,24
194	lw	$s0,0($key0)
195	rotr	$t9,$t9,24
196	lw	$s1,4($key0)
197	rotr	$t10,$t10,24
198	lw	$s2,8($key0)
199	rotr	$t11,$t11,24
200	lw	$s3,12($key0)
201
202	xor	$t0,$t8
203	xor	$t1,$t9
204	xor	$t2,$t10
205	xor	$t3,$t11
206
207	xor	$t0,$t4
208	xor	$t1,$t5
209	xor	$t2,$t6
210	xor	$t3,$t7
211
212	sub	$cnt,1
213	$PTR_ADD $key0,16
214	xor	$s0,$t0
215	xor	$s1,$t1
216	xor	$s2,$t2
217	xor	$s3,$t3
218	.set	noreorder
219	bnez	$cnt,.Loop_enc
220	ext	$i0,$s1,16,8
221
222	_xtr	$i0,$s1,16-2
223#else
224	_xtr	$i0,$s1,16-2
225.Loop_enc:
226	_xtr	$i1,$s2,16-2
227	_xtr	$i2,$s3,16-2
228	_xtr	$i3,$s0,16-2
229	and	$i0,0x3fc
230	and	$i1,0x3fc
231	and	$i2,0x3fc
232	and	$i3,0x3fc
233	$PTR_ADD $i0,$Tbl
234	$PTR_ADD $i1,$Tbl
235	$PTR_ADD $i2,$Tbl
236	$PTR_ADD $i3,$Tbl
237#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
238	lw	$t0,0($i0)		# Te1[s1>>16]
239	_xtr	$i0,$s2,8-2
240	lw	$t1,0($i1)		# Te1[s2>>16]
241	_xtr	$i1,$s3,8-2
242	lw	$t2,0($i2)		# Te1[s3>>16]
243	_xtr	$i2,$s0,8-2
244	lw	$t3,0($i3)		# Te1[s0>>16]
245	_xtr	$i3,$s1,8-2
246#else
247	lwl	$t0,3($i0)		# Te1[s1>>16]
248	lwl	$t1,3($i1)		# Te1[s2>>16]
249	lwl	$t2,3($i2)		# Te1[s3>>16]
250	lwl	$t3,3($i3)		# Te1[s0>>16]
251	lwr	$t0,2($i0)		# Te1[s1>>16]
252	_xtr	$i0,$s2,8-2
253	lwr	$t1,2($i1)		# Te1[s2>>16]
254	_xtr	$i1,$s3,8-2
255	lwr	$t2,2($i2)		# Te1[s3>>16]
256	_xtr	$i2,$s0,8-2
257	lwr	$t3,2($i3)		# Te1[s0>>16]
258	_xtr	$i3,$s1,8-2
259#endif
260	and	$i0,0x3fc
261	and	$i1,0x3fc
262	and	$i2,0x3fc
263	and	$i3,0x3fc
264	$PTR_ADD $i0,$Tbl
265	$PTR_ADD $i1,$Tbl
266	$PTR_ADD $i2,$Tbl
267	$PTR_ADD $i3,$Tbl
268#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
269	rotr	$t0,$t0,8
270	rotr	$t1,$t1,8
271	rotr	$t2,$t2,8
272	rotr	$t3,$t3,8
273# if defined(_MIPSEL)
274	lw	$t4,0($i0)		# Te2[s2>>8]
275	_xtr	$i0,$s3,0-2
276	lw	$t5,0($i1)		# Te2[s3>>8]
277	_xtr	$i1,$s0,0-2
278	lw	$t6,0($i2)		# Te2[s0>>8]
279	_xtr	$i2,$s1,0-2
280	lw	$t7,0($i3)		# Te2[s1>>8]
281	_xtr	$i3,$s2,0-2
282
283	and	$i0,0x3fc
284	and	$i1,0x3fc
285	and	$i2,0x3fc
286	and	$i3,0x3fc
287	$PTR_ADD $i0,$Tbl
288	$PTR_ADD $i1,$Tbl
289	$PTR_ADD $i2,$Tbl
290	$PTR_ADD $i3,$Tbl
291	lw	$t8,0($i0)		# Te3[s3]
292	$PTR_INS $i0,$s0,2,8
293	lw	$t9,0($i1)		# Te3[s0]
294	$PTR_INS $i1,$s1,2,8
295	lw	$t10,0($i2)		# Te3[s1]
296	$PTR_INS $i2,$s2,2,8
297	lw	$t11,0($i3)		# Te3[s2]
298	$PTR_INS $i3,$s3,2,8
299# else
300	lw	$t4,0($i0)		# Te2[s2>>8]
301	$PTR_INS $i0,$s3,2,8
302	lw	$t5,0($i1)		# Te2[s3>>8]
303	$PTR_INS $i1,$s0,2,8
304	lw	$t6,0($i2)		# Te2[s0>>8]
305	$PTR_INS $i2,$s1,2,8
306	lw	$t7,0($i3)		# Te2[s1>>8]
307	$PTR_INS $i3,$s2,2,8
308
309	lw	$t8,0($i0)		# Te3[s3]
310	_xtr	$i0,$s0,24-2
311	lw	$t9,0($i1)		# Te3[s0]
312	_xtr	$i1,$s1,24-2
313	lw	$t10,0($i2)		# Te3[s1]
314	_xtr	$i2,$s2,24-2
315	lw	$t11,0($i3)		# Te3[s2]
316	_xtr	$i3,$s3,24-2
317
318	and	$i0,0x3fc
319	and	$i1,0x3fc
320	and	$i2,0x3fc
321	and	$i3,0x3fc
322	$PTR_ADD $i0,$Tbl
323	$PTR_ADD $i1,$Tbl
324	$PTR_ADD $i2,$Tbl
325	$PTR_ADD $i3,$Tbl
326# endif
327	rotr	$t4,$t4,16
328	rotr	$t5,$t5,16
329	rotr	$t6,$t6,16
330	rotr	$t7,$t7,16
331
332	rotr	$t8,$t8,24
333	rotr	$t9,$t9,24
334	rotr	$t10,$t10,24
335	rotr	$t11,$t11,24
336#else
337	lwl	$t4,2($i0)		# Te2[s2>>8]
338	lwl	$t5,2($i1)		# Te2[s3>>8]
339	lwl	$t6,2($i2)		# Te2[s0>>8]
340	lwl	$t7,2($i3)		# Te2[s1>>8]
341	lwr	$t4,1($i0)		# Te2[s2>>8]
342	_xtr	$i0,$s3,0-2
343	lwr	$t5,1($i1)		# Te2[s3>>8]
344	_xtr	$i1,$s0,0-2
345	lwr	$t6,1($i2)		# Te2[s0>>8]
346	_xtr	$i2,$s1,0-2
347	lwr	$t7,1($i3)		# Te2[s1>>8]
348	_xtr	$i3,$s2,0-2
349
350	and	$i0,0x3fc
351	and	$i1,0x3fc
352	and	$i2,0x3fc
353	and	$i3,0x3fc
354	$PTR_ADD $i0,$Tbl
355	$PTR_ADD $i1,$Tbl
356	$PTR_ADD $i2,$Tbl
357	$PTR_ADD $i3,$Tbl
358	lwl	$t8,1($i0)		# Te3[s3]
359	lwl	$t9,1($i1)		# Te3[s0]
360	lwl	$t10,1($i2)		# Te3[s1]
361	lwl	$t11,1($i3)		# Te3[s2]
362	lwr	$t8,0($i0)		# Te3[s3]
363	_xtr	$i0,$s0,24-2
364	lwr	$t9,0($i1)		# Te3[s0]
365	_xtr	$i1,$s1,24-2
366	lwr	$t10,0($i2)		# Te3[s1]
367	_xtr	$i2,$s2,24-2
368	lwr	$t11,0($i3)		# Te3[s2]
369	_xtr	$i3,$s3,24-2
370
371	and	$i0,0x3fc
372	and	$i1,0x3fc
373	and	$i2,0x3fc
374	and	$i3,0x3fc
375	$PTR_ADD $i0,$Tbl
376	$PTR_ADD $i1,$Tbl
377	$PTR_ADD $i2,$Tbl
378	$PTR_ADD $i3,$Tbl
379#endif
380	xor	$t0,$t4
381	lw	$t4,0($i0)		# Te0[s0>>24]
382	xor	$t1,$t5
383	lw	$t5,0($i1)		# Te0[s1>>24]
384	xor	$t2,$t6
385	lw	$t6,0($i2)		# Te0[s2>>24]
386	xor	$t3,$t7
387	lw	$t7,0($i3)		# Te0[s3>>24]
388
389	xor	$t0,$t8
390	lw	$s0,0($key0)
391	xor	$t1,$t9
392	lw	$s1,4($key0)
393	xor	$t2,$t10
394	lw	$s2,8($key0)
395	xor	$t3,$t11
396	lw	$s3,12($key0)
397
398	xor	$t0,$t4
399	xor	$t1,$t5
400	xor	$t2,$t6
401	xor	$t3,$t7
402
403	sub	$cnt,1
404	$PTR_ADD $key0,16
405	xor	$s0,$t0
406	xor	$s1,$t1
407	xor	$s2,$t2
408	xor	$s3,$t3
409	.set	noreorder
410	bnez	$cnt,.Loop_enc
411	_xtr	$i0,$s1,16-2
412#endif
413
414	.set	reorder
415	_xtr	$i1,$s2,16-2
416	_xtr	$i2,$s3,16-2
417	_xtr	$i3,$s0,16-2
418	and	$i0,0x3fc
419	and	$i1,0x3fc
420	and	$i2,0x3fc
421	and	$i3,0x3fc
422	$PTR_ADD $i0,$Tbl
423	$PTR_ADD $i1,$Tbl
424	$PTR_ADD $i2,$Tbl
425	$PTR_ADD $i3,$Tbl
426	lbu	$t0,2($i0)		# Te4[s1>>16]
427	_xtr	$i0,$s2,8-2
428	lbu	$t1,2($i1)		# Te4[s2>>16]
429	_xtr	$i1,$s3,8-2
430	lbu	$t2,2($i2)		# Te4[s3>>16]
431	_xtr	$i2,$s0,8-2
432	lbu	$t3,2($i3)		# Te4[s0>>16]
433	_xtr	$i3,$s1,8-2
434
435	and	$i0,0x3fc
436	and	$i1,0x3fc
437	and	$i2,0x3fc
438	and	$i3,0x3fc
439	$PTR_ADD $i0,$Tbl
440	$PTR_ADD $i1,$Tbl
441	$PTR_ADD $i2,$Tbl
442	$PTR_ADD $i3,$Tbl
443#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
444# if defined(_MIPSEL)
445	lbu	$t4,2($i0)		# Te4[s2>>8]
446	$PTR_INS $i0,$s0,2,8
447	lbu	$t5,2($i1)		# Te4[s3>>8]
448	$PTR_INS $i1,$s1,2,8
449	lbu	$t6,2($i2)		# Te4[s0>>8]
450	$PTR_INS $i2,$s2,2,8
451	lbu	$t7,2($i3)		# Te4[s1>>8]
452	$PTR_INS $i3,$s3,2,8
453
454	lbu	$t8,2($i0)		# Te4[s0>>24]
455	_xtr	$i0,$s3,0-2
456	lbu	$t9,2($i1)		# Te4[s1>>24]
457	_xtr	$i1,$s0,0-2
458	lbu	$t10,2($i2)		# Te4[s2>>24]
459	_xtr	$i2,$s1,0-2
460	lbu	$t11,2($i3)		# Te4[s3>>24]
461	_xtr	$i3,$s2,0-2
462
463	and	$i0,0x3fc
464	and	$i1,0x3fc
465	and	$i2,0x3fc
466	and	$i3,0x3fc
467	$PTR_ADD $i0,$Tbl
468	$PTR_ADD $i1,$Tbl
469	$PTR_ADD $i2,$Tbl
470	$PTR_ADD $i3,$Tbl
471# else
472	lbu	$t4,2($i0)		# Te4[s2>>8]
473	_xtr	$i0,$s0,24-2
474	lbu	$t5,2($i1)		# Te4[s3>>8]
475	_xtr	$i1,$s1,24-2
476	lbu	$t6,2($i2)		# Te4[s0>>8]
477	_xtr	$i2,$s2,24-2
478	lbu	$t7,2($i3)		# Te4[s1>>8]
479	_xtr	$i3,$s3,24-2
480
481	and	$i0,0x3fc
482	and	$i1,0x3fc
483	and	$i2,0x3fc
484	and	$i3,0x3fc
485	$PTR_ADD $i0,$Tbl
486	$PTR_ADD $i1,$Tbl
487	$PTR_ADD $i2,$Tbl
488	$PTR_ADD $i3,$Tbl
489	lbu	$t8,2($i0)		# Te4[s0>>24]
490	$PTR_INS $i0,$s3,2,8
491	lbu	$t9,2($i1)		# Te4[s1>>24]
492	$PTR_INS $i1,$s0,2,8
493	lbu	$t10,2($i2)		# Te4[s2>>24]
494	$PTR_INS $i2,$s1,2,8
495	lbu	$t11,2($i3)		# Te4[s3>>24]
496	$PTR_INS $i3,$s2,2,8
497# endif
498	_ins	$t0,16
499	_ins	$t1,16
500	_ins	$t2,16
501	_ins	$t3,16
502
503	_ins2	$t0,$t4,8
504	lbu	$t4,2($i0)		# Te4[s3]
505	_ins2	$t1,$t5,8
506	lbu	$t5,2($i1)		# Te4[s0]
507	_ins2	$t2,$t6,8
508	lbu	$t6,2($i2)		# Te4[s1]
509	_ins2	$t3,$t7,8
510	lbu	$t7,2($i3)		# Te4[s2]
511
512	_ins2	$t0,$t8,24
513	lw	$s0,0($key0)
514	_ins2	$t1,$t9,24
515	lw	$s1,4($key0)
516	_ins2	$t2,$t10,24
517	lw	$s2,8($key0)
518	_ins2	$t3,$t11,24
519	lw	$s3,12($key0)
520
521	_ins2	$t0,$t4,0
522	_ins2	$t1,$t5,0
523	_ins2	$t2,$t6,0
524	_ins2	$t3,$t7,0
525#else
526	lbu	$t4,2($i0)		# Te4[s2>>8]
527	_xtr	$i0,$s0,24-2
528	lbu	$t5,2($i1)		# Te4[s3>>8]
529	_xtr	$i1,$s1,24-2
530	lbu	$t6,2($i2)		# Te4[s0>>8]
531	_xtr	$i2,$s2,24-2
532	lbu	$t7,2($i3)		# Te4[s1>>8]
533	_xtr	$i3,$s3,24-2
534
535	and	$i0,0x3fc
536	and	$i1,0x3fc
537	and	$i2,0x3fc
538	and	$i3,0x3fc
539	$PTR_ADD $i0,$Tbl
540	$PTR_ADD $i1,$Tbl
541	$PTR_ADD $i2,$Tbl
542	$PTR_ADD $i3,$Tbl
543	lbu	$t8,2($i0)		# Te4[s0>>24]
544	_xtr	$i0,$s3,0-2
545	lbu	$t9,2($i1)		# Te4[s1>>24]
546	_xtr	$i1,$s0,0-2
547	lbu	$t10,2($i2)		# Te4[s2>>24]
548	_xtr	$i2,$s1,0-2
549	lbu	$t11,2($i3)		# Te4[s3>>24]
550	_xtr	$i3,$s2,0-2
551
552	and	$i0,0x3fc
553	and	$i1,0x3fc
554	and	$i2,0x3fc
555	and	$i3,0x3fc
556	$PTR_ADD $i0,$Tbl
557	$PTR_ADD $i1,$Tbl
558	$PTR_ADD $i2,$Tbl
559	$PTR_ADD $i3,$Tbl
560
561	_ins	$t0,16
562	_ins	$t1,16
563	_ins	$t2,16
564	_ins	$t3,16
565
566	_ins	$t4,8
567	_ins	$t5,8
568	_ins	$t6,8
569	_ins	$t7,8
570
571	xor	$t0,$t4
572	lbu	$t4,2($i0)		# Te4[s3]
573	xor	$t1,$t5
574	lbu	$t5,2($i1)		# Te4[s0]
575	xor	$t2,$t6
576	lbu	$t6,2($i2)		# Te4[s1]
577	xor	$t3,$t7
578	lbu	$t7,2($i3)		# Te4[s2]
579
580	_ins	$t8,24
581	lw	$s0,0($key0)
582	_ins	$t9,24
583	lw	$s1,4($key0)
584	_ins	$t10,24
585	lw	$s2,8($key0)
586	_ins	$t11,24
587	lw	$s3,12($key0)
588
589	xor	$t0,$t8
590	xor	$t1,$t9
591	xor	$t2,$t10
592	xor	$t3,$t11
593
594	_ins	$t4,0
595	_ins	$t5,0
596	_ins	$t6,0
597	_ins	$t7,0
598
599	xor	$t0,$t4
600	xor	$t1,$t5
601	xor	$t2,$t6
602	xor	$t3,$t7
603#endif
604	xor	$s0,$t0
605	xor	$s1,$t1
606	xor	$s2,$t2
607	xor	$s3,$t3
608
609	jr	$ra
610.end	_mips_AES_encrypt
611
612.align	5
613.globl	AES_encrypt
614.ent	AES_encrypt
615AES_encrypt:
616	.frame	$sp,$FRAMESIZE,$ra
617	.mask	$SAVED_REGS_MASK,-$SZREG
618	.set	noreorder
619___
620$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
621	.cpload	$pf
622___
623$code.=<<___;
624	$PTR_SUB $sp,$FRAMESIZE
625	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
626	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
627	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
628	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
629	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
630	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
631	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
632	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
633	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
634	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
635___
636$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
637	$REG_S	\$15,$FRAMESIZE-11*$SZREG($sp)
638	$REG_S	\$14,$FRAMESIZE-12*$SZREG($sp)
639	$REG_S	\$13,$FRAMESIZE-13*$SZREG($sp)
640	$REG_S	\$12,$FRAMESIZE-14*$SZREG($sp)
641	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
642___
643$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
644	.cplocal	$Tbl
645	.cpsetup	$pf,$zero,AES_encrypt
646___
647$code.=<<___;
648	.set	reorder
649	la	$Tbl,AES_Te		# PIC-ified 'load address'
650
651	lwl	$s0,0+$MSB($inp)
652	lwl	$s1,4+$MSB($inp)
653	lwl	$s2,8+$MSB($inp)
654	lwl	$s3,12+$MSB($inp)
655	lwr	$s0,0+$LSB($inp)
656	lwr	$s1,4+$LSB($inp)
657	lwr	$s2,8+$LSB($inp)
658	lwr	$s3,12+$LSB($inp)
659
660	bal	_mips_AES_encrypt
661
662	swr	$s0,0+$LSB($out)
663	swr	$s1,4+$LSB($out)
664	swr	$s2,8+$LSB($out)
665	swr	$s3,12+$LSB($out)
666	swl	$s0,0+$MSB($out)
667	swl	$s1,4+$MSB($out)
668	swl	$s2,8+$MSB($out)
669	swl	$s3,12+$MSB($out)
670
671	.set	noreorder
672	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
673	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
674	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
675	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
676	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
677	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
678	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
679	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
680	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
681	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
682___
683$code.=<<___ if ($flavour =~ /nubi/i);
684	$REG_L	\$15,$FRAMESIZE-11*$SZREG($sp)
685	$REG_L	\$14,$FRAMESIZE-12*$SZREG($sp)
686	$REG_L	\$13,$FRAMESIZE-13*$SZREG($sp)
687	$REG_L	\$12,$FRAMESIZE-14*$SZREG($sp)
688	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
689___
690$code.=<<___;
691	jr	$ra
692	$PTR_ADD $sp,$FRAMESIZE
693.end	AES_encrypt
694___
695
696$code.=<<___;
697.align	5
698.ent	_mips_AES_decrypt
699_mips_AES_decrypt:
700	.frame	$sp,0,$ra
701	.set	reorder
702	lw	$t0,0($key)
703	lw	$t1,4($key)
704	lw	$t2,8($key)
705	lw	$t3,12($key)
706	lw	$cnt,240($key)
707	$PTR_ADD $key0,$key,16
708
709	xor	$s0,$t0
710	xor	$s1,$t1
711	xor	$s2,$t2
712	xor	$s3,$t3
713
714	sub	$cnt,1
715#if defined(__mips_smartmips)
716	ext	$i0,$s3,16,8
717.Loop_dec:
718	ext	$i1,$s0,16,8
719	ext	$i2,$s1,16,8
720	ext	$i3,$s2,16,8
721	lwxs	$t0,$i0($Tbl)		# Td1[s3>>16]
722	ext	$i0,$s2,8,8
723	lwxs	$t1,$i1($Tbl)		# Td1[s0>>16]
724	ext	$i1,$s3,8,8
725	lwxs	$t2,$i2($Tbl)		# Td1[s1>>16]
726	ext	$i2,$s0,8,8
727	lwxs	$t3,$i3($Tbl)		# Td1[s2>>16]
728	ext	$i3,$s1,8,8
729
730	lwxs	$t4,$i0($Tbl)		# Td2[s2>>8]
731	ext	$i0,$s1,0,8
732	lwxs	$t5,$i1($Tbl)		# Td2[s3>>8]
733	ext	$i1,$s2,0,8
734	lwxs	$t6,$i2($Tbl)		# Td2[s0>>8]
735	ext	$i2,$s3,0,8
736	lwxs	$t7,$i3($Tbl)		# Td2[s1>>8]
737	ext	$i3,$s0,0,8
738
739	lwxs	$t8,$i0($Tbl)		# Td3[s1]
740	ext	$i0,$s0,24,8
741	lwxs	$t9,$i1($Tbl)		# Td3[s2]
742	ext	$i1,$s1,24,8
743	lwxs	$t10,$i2($Tbl)		# Td3[s3]
744	ext	$i2,$s2,24,8
745	lwxs	$t11,$i3($Tbl)		# Td3[s0]
746	ext	$i3,$s3,24,8
747
748	rotr	$t0,$t0,8
749	rotr	$t1,$t1,8
750	rotr	$t2,$t2,8
751	rotr	$t3,$t3,8
752
753	rotr	$t4,$t4,16
754	rotr	$t5,$t5,16
755	rotr	$t6,$t6,16
756	rotr	$t7,$t7,16
757
758	xor	$t0,$t4
759	lwxs	$t4,$i0($Tbl)		# Td0[s0>>24]
760	xor	$t1,$t5
761	lwxs	$t5,$i1($Tbl)		# Td0[s1>>24]
762	xor	$t2,$t6
763	lwxs	$t6,$i2($Tbl)		# Td0[s2>>24]
764	xor	$t3,$t7
765	lwxs	$t7,$i3($Tbl)		# Td0[s3>>24]
766
767	rotr	$t8,$t8,24
768	lw	$s0,0($key0)
769	rotr	$t9,$t9,24
770	lw	$s1,4($key0)
771	rotr	$t10,$t10,24
772	lw	$s2,8($key0)
773	rotr	$t11,$t11,24
774	lw	$s3,12($key0)
775
776	xor	$t0,$t8
777	xor	$t1,$t9
778	xor	$t2,$t10
779	xor	$t3,$t11
780
781	xor	$t0,$t4
782	xor	$t1,$t5
783	xor	$t2,$t6
784	xor	$t3,$t7
785
786	sub	$cnt,1
787	$PTR_ADD $key0,16
788	xor	$s0,$t0
789	xor	$s1,$t1
790	xor	$s2,$t2
791	xor	$s3,$t3
792	.set	noreorder
793	bnez	$cnt,.Loop_dec
794	ext	$i0,$s3,16,8
795
796	_xtr	$i0,$s3,16-2
797#else
798	_xtr	$i0,$s3,16-2
799.Loop_dec:
800	_xtr	$i1,$s0,16-2
801	_xtr	$i2,$s1,16-2
802	_xtr	$i3,$s2,16-2
803	and	$i0,0x3fc
804	and	$i1,0x3fc
805	and	$i2,0x3fc
806	and	$i3,0x3fc
807	$PTR_ADD $i0,$Tbl
808	$PTR_ADD $i1,$Tbl
809	$PTR_ADD $i2,$Tbl
810	$PTR_ADD $i3,$Tbl
811#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
812	lw	$t0,0($i0)		# Td1[s3>>16]
813	_xtr	$i0,$s2,8-2
814	lw	$t1,0($i1)		# Td1[s0>>16]
815	_xtr	$i1,$s3,8-2
816	lw	$t2,0($i2)		# Td1[s1>>16]
817	_xtr	$i2,$s0,8-2
818	lw	$t3,0($i3)		# Td1[s2>>16]
819	_xtr	$i3,$s1,8-2
820#else
821	lwl	$t0,3($i0)		# Td1[s3>>16]
822	lwl	$t1,3($i1)		# Td1[s0>>16]
823	lwl	$t2,3($i2)		# Td1[s1>>16]
824	lwl	$t3,3($i3)		# Td1[s2>>16]
825	lwr	$t0,2($i0)		# Td1[s3>>16]
826	_xtr	$i0,$s2,8-2
827	lwr	$t1,2($i1)		# Td1[s0>>16]
828	_xtr	$i1,$s3,8-2
829	lwr	$t2,2($i2)		# Td1[s1>>16]
830	_xtr	$i2,$s0,8-2
831	lwr	$t3,2($i3)		# Td1[s2>>16]
832	_xtr	$i3,$s1,8-2
833#endif
834
835	and	$i0,0x3fc
836	and	$i1,0x3fc
837	and	$i2,0x3fc
838	and	$i3,0x3fc
839	$PTR_ADD $i0,$Tbl
840	$PTR_ADD $i1,$Tbl
841	$PTR_ADD $i2,$Tbl
842	$PTR_ADD $i3,$Tbl
843#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
844	rotr	$t0,$t0,8
845	rotr	$t1,$t1,8
846	rotr	$t2,$t2,8
847	rotr	$t3,$t3,8
848# if defined(_MIPSEL)
849	lw	$t4,0($i0)		# Td2[s2>>8]
850	_xtr	$i0,$s1,0-2
851	lw	$t5,0($i1)		# Td2[s3>>8]
852	_xtr	$i1,$s2,0-2
853	lw	$t6,0($i2)		# Td2[s0>>8]
854	_xtr	$i2,$s3,0-2
855	lw	$t7,0($i3)		# Td2[s1>>8]
856	_xtr	$i3,$s0,0-2
857
858	and	$i0,0x3fc
859	and	$i1,0x3fc
860	and	$i2,0x3fc
861	and	$i3,0x3fc
862	$PTR_ADD $i0,$Tbl
863	$PTR_ADD $i1,$Tbl
864	$PTR_ADD $i2,$Tbl
865	$PTR_ADD $i3,$Tbl
866	lw	$t8,0($i0)		# Td3[s1]
867	$PTR_INS $i0,$s0,2,8
868	lw	$t9,0($i1)		# Td3[s2]
869	$PTR_INS $i1,$s1,2,8
870	lw	$t10,0($i2)		# Td3[s3]
871	$PTR_INS $i2,$s2,2,8
872	lw	$t11,0($i3)		# Td3[s0]
873	$PTR_INS $i3,$s3,2,8
874#else
875	lw	$t4,0($i0)		# Td2[s2>>8]
876	$PTR_INS $i0,$s1,2,8
877	lw	$t5,0($i1)		# Td2[s3>>8]
878	$PTR_INS $i1,$s2,2,8
879	lw	$t6,0($i2)		# Td2[s0>>8]
880	$PTR_INS $i2,$s3,2,8
881	lw	$t7,0($i3)		# Td2[s1>>8]
882	$PTR_INS $i3,$s0,2,8
883
884	lw	$t8,0($i0)		# Td3[s1]
885	_xtr	$i0,$s0,24-2
886	lw	$t9,0($i1)		# Td3[s2]
887	_xtr	$i1,$s1,24-2
888	lw	$t10,0($i2)		# Td3[s3]
889	_xtr	$i2,$s2,24-2
890	lw	$t11,0($i3)		# Td3[s0]
891	_xtr	$i3,$s3,24-2
892
893	and	$i0,0x3fc
894	and	$i1,0x3fc
895	and	$i2,0x3fc
896	and	$i3,0x3fc
897	$PTR_ADD $i0,$Tbl
898	$PTR_ADD $i1,$Tbl
899	$PTR_ADD $i2,$Tbl
900	$PTR_ADD $i3,$Tbl
901#endif
902	rotr	$t4,$t4,16
903	rotr	$t5,$t5,16
904	rotr	$t6,$t6,16
905	rotr	$t7,$t7,16
906
907	rotr	$t8,$t8,24
908	rotr	$t9,$t9,24
909	rotr	$t10,$t10,24
910	rotr	$t11,$t11,24
911#else
912	lwl	$t4,2($i0)		# Td2[s2>>8]
913	lwl	$t5,2($i1)		# Td2[s3>>8]
914	lwl	$t6,2($i2)		# Td2[s0>>8]
915	lwl	$t7,2($i3)		# Td2[s1>>8]
916	lwr	$t4,1($i0)		# Td2[s2>>8]
917	_xtr	$i0,$s1,0-2
918	lwr	$t5,1($i1)		# Td2[s3>>8]
919	_xtr	$i1,$s2,0-2
920	lwr	$t6,1($i2)		# Td2[s0>>8]
921	_xtr	$i2,$s3,0-2
922	lwr	$t7,1($i3)		# Td2[s1>>8]
923	_xtr	$i3,$s0,0-2
924
925	and	$i0,0x3fc
926	and	$i1,0x3fc
927	and	$i2,0x3fc
928	and	$i3,0x3fc
929	$PTR_ADD $i0,$Tbl
930	$PTR_ADD $i1,$Tbl
931	$PTR_ADD $i2,$Tbl
932	$PTR_ADD $i3,$Tbl
933	lwl	$t8,1($i0)		# Td3[s1]
934	lwl	$t9,1($i1)		# Td3[s2]
935	lwl	$t10,1($i2)		# Td3[s3]
936	lwl	$t11,1($i3)		# Td3[s0]
937	lwr	$t8,0($i0)		# Td3[s1]
938	_xtr	$i0,$s0,24-2
939	lwr	$t9,0($i1)		# Td3[s2]
940	_xtr	$i1,$s1,24-2
941	lwr	$t10,0($i2)		# Td3[s3]
942	_xtr	$i2,$s2,24-2
943	lwr	$t11,0($i3)		# Td3[s0]
944	_xtr	$i3,$s3,24-2
945
946	and	$i0,0x3fc
947	and	$i1,0x3fc
948	and	$i2,0x3fc
949	and	$i3,0x3fc
950	$PTR_ADD $i0,$Tbl
951	$PTR_ADD $i1,$Tbl
952	$PTR_ADD $i2,$Tbl
953	$PTR_ADD $i3,$Tbl
954#endif
955
956	xor	$t0,$t4
957	lw	$t4,0($i0)		# Td0[s0>>24]
958	xor	$t1,$t5
959	lw	$t5,0($i1)		# Td0[s1>>24]
960	xor	$t2,$t6
961	lw	$t6,0($i2)		# Td0[s2>>24]
962	xor	$t3,$t7
963	lw	$t7,0($i3)		# Td0[s3>>24]
964
965	xor	$t0,$t8
966	lw	$s0,0($key0)
967	xor	$t1,$t9
968	lw	$s1,4($key0)
969	xor	$t2,$t10
970	lw	$s2,8($key0)
971	xor	$t3,$t11
972	lw	$s3,12($key0)
973
974	xor	$t0,$t4
975	xor	$t1,$t5
976	xor	$t2,$t6
977	xor	$t3,$t7
978
979	sub	$cnt,1
980	$PTR_ADD $key0,16
981	xor	$s0,$t0
982	xor	$s1,$t1
983	xor	$s2,$t2
984	xor	$s3,$t3
985	.set	noreorder
986	bnez	$cnt,.Loop_dec
987	_xtr	$i0,$s3,16-2
988#endif
989
990	.set	reorder
991	lw	$t4,1024($Tbl)		# prefetch Td4
992	_xtr	$i0,$s3,16
993	lw	$t5,1024+32($Tbl)
994	_xtr	$i1,$s0,16
995	lw	$t6,1024+64($Tbl)
996	_xtr	$i2,$s1,16
997	lw	$t7,1024+96($Tbl)
998	_xtr	$i3,$s2,16
999	lw	$t8,1024+128($Tbl)
1000	and	$i0,0xff
1001	lw	$t9,1024+160($Tbl)
1002	and	$i1,0xff
1003	lw	$t10,1024+192($Tbl)
1004	and	$i2,0xff
1005	lw	$t11,1024+224($Tbl)
1006	and	$i3,0xff
1007
1008	$PTR_ADD $i0,$Tbl
1009	$PTR_ADD $i1,$Tbl
1010	$PTR_ADD $i2,$Tbl
1011	$PTR_ADD $i3,$Tbl
1012	lbu	$t0,1024($i0)		# Td4[s3>>16]
1013	_xtr	$i0,$s2,8
1014	lbu	$t1,1024($i1)		# Td4[s0>>16]
1015	_xtr	$i1,$s3,8
1016	lbu	$t2,1024($i2)		# Td4[s1>>16]
1017	_xtr	$i2,$s0,8
1018	lbu	$t3,1024($i3)		# Td4[s2>>16]
1019	_xtr	$i3,$s1,8
1020
1021	and	$i0,0xff
1022	and	$i1,0xff
1023	and	$i2,0xff
1024	and	$i3,0xff
1025	$PTR_ADD $i0,$Tbl
1026	$PTR_ADD $i1,$Tbl
1027	$PTR_ADD $i2,$Tbl
1028	$PTR_ADD $i3,$Tbl
1029#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1030# if defined(_MIPSEL)
1031	lbu	$t4,1024($i0)		# Td4[s2>>8]
1032	$PTR_INS $i0,$s0,0,8
1033	lbu	$t5,1024($i1)		# Td4[s3>>8]
1034	$PTR_INS $i1,$s1,0,8
1035	lbu	$t6,1024($i2)		# Td4[s0>>8]
1036	$PTR_INS $i2,$s2,0,8
1037	lbu	$t7,1024($i3)		# Td4[s1>>8]
1038	$PTR_INS $i3,$s3,0,8
1039
1040	lbu	$t8,1024($i0)		# Td4[s0>>24]
1041	_xtr	$i0,$s1,0
1042	lbu	$t9,1024($i1)		# Td4[s1>>24]
1043	_xtr	$i1,$s2,0
1044	lbu	$t10,1024($i2)		# Td4[s2>>24]
1045	_xtr	$i2,$s3,0
1046	lbu	$t11,1024($i3)		# Td4[s3>>24]
1047	_xtr	$i3,$s0,0
1048
1049	$PTR_ADD $i0,$Tbl
1050	$PTR_ADD $i1,$Tbl
1051	$PTR_ADD $i2,$Tbl
1052	$PTR_ADD $i3,$Tbl
1053# else
1054	lbu	$t4,1024($i0)		# Td4[s2>>8]
1055	_xtr	$i0,$s0,24
1056	lbu	$t5,1024($i1)		# Td4[s3>>8]
1057	_xtr	$i1,$s1,24
1058	lbu	$t6,1024($i2)		# Td4[s0>>8]
1059	_xtr	$i2,$s2,24
1060	lbu	$t7,1024($i3)		# Td4[s1>>8]
1061	_xtr	$i3,$s3,24
1062
1063	$PTR_ADD $i0,$Tbl
1064	$PTR_ADD $i1,$Tbl
1065	$PTR_ADD $i2,$Tbl
1066	$PTR_ADD $i3,$Tbl
1067	lbu	$t8,1024($i0)		# Td4[s0>>24]
1068	$PTR_INS $i0,$s1,0,8
1069	lbu	$t9,1024($i1)		# Td4[s1>>24]
1070	$PTR_INS $i1,$s2,0,8
1071	lbu	$t10,1024($i2)		# Td4[s2>>24]
1072	$PTR_INS $i2,$s3,0,8
1073	lbu	$t11,1024($i3)		# Td4[s3>>24]
1074	$PTR_INS $i3,$s0,0,8
1075# endif
1076	_ins	$t0,16
1077	_ins	$t1,16
1078	_ins	$t2,16
1079	_ins	$t3,16
1080
1081	_ins2	$t0,$t4,8
1082	lbu	$t4,1024($i0)		# Td4[s1]
1083	_ins2	$t1,$t5,8
1084	lbu	$t5,1024($i1)		# Td4[s2]
1085	_ins2	$t2,$t6,8
1086	lbu	$t6,1024($i2)		# Td4[s3]
1087	_ins2	$t3,$t7,8
1088	lbu	$t7,1024($i3)		# Td4[s0]
1089
1090	_ins2	$t0,$t8,24
1091	lw	$s0,0($key0)
1092	_ins2	$t1,$t9,24
1093	lw	$s1,4($key0)
1094	_ins2	$t2,$t10,24
1095	lw	$s2,8($key0)
1096	_ins2	$t3,$t11,24
1097	lw	$s3,12($key0)
1098
1099	_ins2	$t0,$t4,0
1100	_ins2	$t1,$t5,0
1101	_ins2	$t2,$t6,0
1102	_ins2	$t3,$t7,0
1103#else
1104	lbu	$t4,1024($i0)		# Td4[s2>>8]
1105	_xtr	$i0,$s0,24
1106	lbu	$t5,1024($i1)		# Td4[s3>>8]
1107	_xtr	$i1,$s1,24
1108	lbu	$t6,1024($i2)		# Td4[s0>>8]
1109	_xtr	$i2,$s2,24
1110	lbu	$t7,1024($i3)		# Td4[s1>>8]
1111	_xtr	$i3,$s3,24
1112
1113	$PTR_ADD $i0,$Tbl
1114	$PTR_ADD $i1,$Tbl
1115	$PTR_ADD $i2,$Tbl
1116	$PTR_ADD $i3,$Tbl
1117	lbu	$t8,1024($i0)		# Td4[s0>>24]
1118	_xtr	$i0,$s1,0
1119	lbu	$t9,1024($i1)		# Td4[s1>>24]
1120	_xtr	$i1,$s2,0
1121	lbu	$t10,1024($i2)		# Td4[s2>>24]
1122	_xtr	$i2,$s3,0
1123	lbu	$t11,1024($i3)		# Td4[s3>>24]
1124	_xtr	$i3,$s0,0
1125
1126	$PTR_ADD $i0,$Tbl
1127	$PTR_ADD $i1,$Tbl
1128	$PTR_ADD $i2,$Tbl
1129	$PTR_ADD $i3,$Tbl
1130
1131	_ins	$t0,16
1132	_ins	$t1,16
1133	_ins	$t2,16
1134	_ins	$t3,16
1135
1136	_ins	$t4,8
1137	_ins	$t5,8
1138	_ins	$t6,8
1139	_ins	$t7,8
1140
1141	xor	$t0,$t4
1142	lbu	$t4,1024($i0)		# Td4[s1]
1143	xor	$t1,$t5
1144	lbu	$t5,1024($i1)		# Td4[s2]
1145	xor	$t2,$t6
1146	lbu	$t6,1024($i2)		# Td4[s3]
1147	xor	$t3,$t7
1148	lbu	$t7,1024($i3)		# Td4[s0]
1149
1150	_ins	$t8,24
1151	lw	$s0,0($key0)
1152	_ins	$t9,24
1153	lw	$s1,4($key0)
1154	_ins	$t10,24
1155	lw	$s2,8($key0)
1156	_ins	$t11,24
1157	lw	$s3,12($key0)
1158
1159	xor	$t0,$t8
1160	xor	$t1,$t9
1161	xor	$t2,$t10
1162	xor	$t3,$t11
1163
1164	_ins	$t4,0
1165	_ins	$t5,0
1166	_ins	$t6,0
1167	_ins	$t7,0
1168
1169	xor	$t0,$t4
1170	xor	$t1,$t5
1171	xor	$t2,$t6
1172	xor	$t3,$t7
1173#endif
1174
1175	xor	$s0,$t0
1176	xor	$s1,$t1
1177	xor	$s2,$t2
1178	xor	$s3,$t3
1179
1180	jr	$ra
1181.end	_mips_AES_decrypt
1182
1183.align	5
1184.globl	AES_decrypt
1185.ent	AES_decrypt
1186AES_decrypt:
1187	.frame	$sp,$FRAMESIZE,$ra
1188	.mask	$SAVED_REGS_MASK,-$SZREG
1189	.set	noreorder
1190___
1191$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
1192	.cpload	$pf
1193___
1194$code.=<<___;
1195	$PTR_SUB $sp,$FRAMESIZE
1196	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
1197	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
1198	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
1199	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
1200	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
1201	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
1202	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
1203	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
1204	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
1205	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
1206___
1207$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
1208	$REG_S	\$15,$FRAMESIZE-11*$SZREG($sp)
1209	$REG_S	\$14,$FRAMESIZE-12*$SZREG($sp)
1210	$REG_S	\$13,$FRAMESIZE-13*$SZREG($sp)
1211	$REG_S	\$12,$FRAMESIZE-14*$SZREG($sp)
1212	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
1213___
1214$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
1215	.cplocal	$Tbl
1216	.cpsetup	$pf,$zero,AES_decrypt
1217___
1218$code.=<<___;
1219	.set	reorder
1220	la	$Tbl,AES_Td		# PIC-ified 'load address'
1221
1222	lwl	$s0,0+$MSB($inp)
1223	lwl	$s1,4+$MSB($inp)
1224	lwl	$s2,8+$MSB($inp)
1225	lwl	$s3,12+$MSB($inp)
1226	lwr	$s0,0+$LSB($inp)
1227	lwr	$s1,4+$LSB($inp)
1228	lwr	$s2,8+$LSB($inp)
1229	lwr	$s3,12+$LSB($inp)
1230
1231	bal	_mips_AES_decrypt
1232
1233	swr	$s0,0+$LSB($out)
1234	swr	$s1,4+$LSB($out)
1235	swr	$s2,8+$LSB($out)
1236	swr	$s3,12+$LSB($out)
1237	swl	$s0,0+$MSB($out)
1238	swl	$s1,4+$MSB($out)
1239	swl	$s2,8+$MSB($out)
1240	swl	$s3,12+$MSB($out)
1241
1242	.set	noreorder
1243	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
1244	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
1245	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
1246	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
1247	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
1248	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
1249	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
1250	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
1251	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
1252	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
1253___
1254$code.=<<___ if ($flavour =~ /nubi/i);
1255	$REG_L	\$15,$FRAMESIZE-11*$SZREG($sp)
1256	$REG_L	\$14,$FRAMESIZE-12*$SZREG($sp)
1257	$REG_L	\$13,$FRAMESIZE-13*$SZREG($sp)
1258	$REG_L	\$12,$FRAMESIZE-14*$SZREG($sp)
1259	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
1260___
1261$code.=<<___;
1262	jr	$ra
1263	$PTR_ADD $sp,$FRAMESIZE
1264.end	AES_decrypt
1265___
1266}}}
1267
1268{{{
1269my $FRAMESIZE=8*$SZREG;
1270my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
1271
1272my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
1273my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1274my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
1275my ($rcon,$cnt)=($gp,$fp);
1276
1277$code.=<<___;
1278.align	5
1279.ent	_mips_AES_set_encrypt_key
1280_mips_AES_set_encrypt_key:
1281	.frame	$sp,0,$ra
1282	.set	noreorder
1283	beqz	$inp,.Lekey_done
1284	li	$t0,-1
1285	beqz	$key,.Lekey_done
1286	$PTR_ADD $rcon,$Tbl,256
1287
1288	.set	reorder
1289	lwl	$rk0,0+$MSB($inp)	# load 128 bits
1290	lwl	$rk1,4+$MSB($inp)
1291	lwl	$rk2,8+$MSB($inp)
1292	lwl	$rk3,12+$MSB($inp)
1293	li	$at,128
1294	lwr	$rk0,0+$LSB($inp)
1295	lwr	$rk1,4+$LSB($inp)
1296	lwr	$rk2,8+$LSB($inp)
1297	lwr	$rk3,12+$LSB($inp)
1298	.set	noreorder
1299	beq	$bits,$at,.L128bits
1300	li	$cnt,10
1301
1302	.set	reorder
1303	lwl	$rk4,16+$MSB($inp)	# load 192 bits
1304	lwl	$rk5,20+$MSB($inp)
1305	li	$at,192
1306	lwr	$rk4,16+$LSB($inp)
1307	lwr	$rk5,20+$LSB($inp)
1308	.set	noreorder
1309	beq	$bits,$at,.L192bits
1310	li	$cnt,8
1311
1312	.set	reorder
1313	lwl	$rk6,24+$MSB($inp)	# load 256 bits
1314	lwl	$rk7,28+$MSB($inp)
1315	li	$at,256
1316	lwr	$rk6,24+$LSB($inp)
1317	lwr	$rk7,28+$LSB($inp)
1318	.set	noreorder
1319	beq	$bits,$at,.L256bits
1320	li	$cnt,7
1321
1322	b	.Lekey_done
1323	li	$t0,-2
1324
1325.align	4
1326.L128bits:
1327	.set	reorder
1328	srl	$i0,$rk3,16
1329	srl	$i1,$rk3,8
1330	and	$i0,0xff
1331	and	$i1,0xff
1332	and	$i2,$rk3,0xff
1333	srl	$i3,$rk3,24
1334	$PTR_ADD $i0,$Tbl
1335	$PTR_ADD $i1,$Tbl
1336	$PTR_ADD $i2,$Tbl
1337	$PTR_ADD $i3,$Tbl
1338	lbu	$i0,0($i0)
1339	lbu	$i1,0($i1)
1340	lbu	$i2,0($i2)
1341	lbu	$i3,0($i3)
1342
1343	sw	$rk0,0($key)
1344	sw	$rk1,4($key)
1345	sw	$rk2,8($key)
1346	sw	$rk3,12($key)
1347	sub	$cnt,1
1348	$PTR_ADD $key,16
1349
1350	_bias	$i0,24
1351	_bias	$i1,16
1352	_bias	$i2,8
1353	_bias	$i3,0
1354
1355	xor	$rk0,$i0
1356	lw	$i0,0($rcon)
1357	xor	$rk0,$i1
1358	xor	$rk0,$i2
1359	xor	$rk0,$i3
1360	xor	$rk0,$i0
1361
1362	xor	$rk1,$rk0
1363	xor	$rk2,$rk1
1364	xor	$rk3,$rk2
1365
1366	.set	noreorder
1367	bnez	$cnt,.L128bits
1368	$PTR_ADD $rcon,4
1369
1370	sw	$rk0,0($key)
1371	sw	$rk1,4($key)
1372	sw	$rk2,8($key)
1373	li	$cnt,10
1374	sw	$rk3,12($key)
1375	li	$t0,0
1376	sw	$cnt,80($key)
1377	b	.Lekey_done
1378	$PTR_SUB $key,10*16
1379
1380.align	4
1381.L192bits:
1382	.set	reorder
1383	srl	$i0,$rk5,16
1384	srl	$i1,$rk5,8
1385	and	$i0,0xff
1386	and	$i1,0xff
1387	and	$i2,$rk5,0xff
1388	srl	$i3,$rk5,24
1389	$PTR_ADD $i0,$Tbl
1390	$PTR_ADD $i1,$Tbl
1391	$PTR_ADD $i2,$Tbl
1392	$PTR_ADD $i3,$Tbl
1393	lbu	$i0,0($i0)
1394	lbu	$i1,0($i1)
1395	lbu	$i2,0($i2)
1396	lbu	$i3,0($i3)
1397
1398	sw	$rk0,0($key)
1399	sw	$rk1,4($key)
1400	sw	$rk2,8($key)
1401	sw	$rk3,12($key)
1402	sw	$rk4,16($key)
1403	sw	$rk5,20($key)
1404	sub	$cnt,1
1405	$PTR_ADD $key,24
1406
1407	_bias	$i0,24
1408	_bias	$i1,16
1409	_bias	$i2,8
1410	_bias	$i3,0
1411
1412	xor	$rk0,$i0
1413	lw	$i0,0($rcon)
1414	xor	$rk0,$i1
1415	xor	$rk0,$i2
1416	xor	$rk0,$i3
1417	xor	$rk0,$i0
1418
1419	xor	$rk1,$rk0
1420	xor	$rk2,$rk1
1421	xor	$rk3,$rk2
1422	xor	$rk4,$rk3
1423	xor	$rk5,$rk4
1424
1425	.set	noreorder
1426	bnez	$cnt,.L192bits
1427	$PTR_ADD $rcon,4
1428
1429	sw	$rk0,0($key)
1430	sw	$rk1,4($key)
1431	sw	$rk2,8($key)
1432	li	$cnt,12
1433	sw	$rk3,12($key)
1434	li	$t0,0
1435	sw	$cnt,48($key)
1436	b	.Lekey_done
1437	$PTR_SUB $key,12*16
1438
1439.align	4
1440.L256bits:
1441	.set	reorder
1442	srl	$i0,$rk7,16
1443	srl	$i1,$rk7,8
1444	and	$i0,0xff
1445	and	$i1,0xff
1446	and	$i2,$rk7,0xff
1447	srl	$i3,$rk7,24
1448	$PTR_ADD $i0,$Tbl
1449	$PTR_ADD $i1,$Tbl
1450	$PTR_ADD $i2,$Tbl
1451	$PTR_ADD $i3,$Tbl
1452	lbu	$i0,0($i0)
1453	lbu	$i1,0($i1)
1454	lbu	$i2,0($i2)
1455	lbu	$i3,0($i3)
1456
1457	sw	$rk0,0($key)
1458	sw	$rk1,4($key)
1459	sw	$rk2,8($key)
1460	sw	$rk3,12($key)
1461	sw	$rk4,16($key)
1462	sw	$rk5,20($key)
1463	sw	$rk6,24($key)
1464	sw	$rk7,28($key)
1465	sub	$cnt,1
1466
1467	_bias	$i0,24
1468	_bias	$i1,16
1469	_bias	$i2,8
1470	_bias	$i3,0
1471
1472	xor	$rk0,$i0
1473	lw	$i0,0($rcon)
1474	xor	$rk0,$i1
1475	xor	$rk0,$i2
1476	xor	$rk0,$i3
1477	xor	$rk0,$i0
1478
1479	xor	$rk1,$rk0
1480	xor	$rk2,$rk1
1481	xor	$rk3,$rk2
1482	beqz	$cnt,.L256bits_done
1483
1484	srl	$i0,$rk3,24
1485	srl	$i1,$rk3,16
1486	srl	$i2,$rk3,8
1487	and	$i3,$rk3,0xff
1488	and	$i1,0xff
1489	and	$i2,0xff
1490	$PTR_ADD $i0,$Tbl
1491	$PTR_ADD $i1,$Tbl
1492	$PTR_ADD $i2,$Tbl
1493	$PTR_ADD $i3,$Tbl
1494	lbu	$i0,0($i0)
1495	lbu	$i1,0($i1)
1496	lbu	$i2,0($i2)
1497	lbu	$i3,0($i3)
1498	sll	$i0,24
1499	sll	$i1,16
1500	sll	$i2,8
1501
1502	xor	$rk4,$i0
1503	xor	$rk4,$i1
1504	xor	$rk4,$i2
1505	xor	$rk4,$i3
1506
1507	xor	$rk5,$rk4
1508	xor	$rk6,$rk5
1509	xor	$rk7,$rk6
1510
1511	$PTR_ADD $key,32
1512	.set	noreorder
1513	b	.L256bits
1514	$PTR_ADD $rcon,4
1515
1516.L256bits_done:
1517	sw	$rk0,32($key)
1518	sw	$rk1,36($key)
1519	sw	$rk2,40($key)
1520	li	$cnt,14
1521	sw	$rk3,44($key)
1522	li	$t0,0
1523	sw	$cnt,48($key)
1524	$PTR_SUB $key,12*16
1525
1526.Lekey_done:
1527	jr	$ra
1528	nop
1529.end	_mips_AES_set_encrypt_key
1530
1531.globl	private_AES_set_encrypt_key
1532.ent	private_AES_set_encrypt_key
1533private_AES_set_encrypt_key:
1534	.frame	$sp,$FRAMESIZE,$ra
1535	.mask	$SAVED_REGS_MASK,-$SZREG
1536	.set	noreorder
1537___
1538$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
1539	.cpload	$pf
1540___
1541$code.=<<___;
1542	$PTR_SUB $sp,$FRAMESIZE
1543	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
1544	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
1545___
1546$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
1547	$REG_S	$s3,$FRAMESIZE-3*$SZREG($sp)
1548	$REG_S	$s2,$FRAMESIZE-4*$SZREG($sp)
1549	$REG_S	$s1,$FRAMESIZE-5*$SZREG($sp)
1550	$REG_S	$s0,$FRAMESIZE-6*$SZREG($sp)
1551	$REG_S	$gp,$FRAMESIZE-7*$SZREG($sp)
1552___
1553$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
1554	.cplocal	$Tbl
1555	.cpsetup	$pf,$zero,private_AES_set_encrypt_key
1556___
1557$code.=<<___;
1558	.set	reorder
1559	la	$Tbl,AES_Te4		# PIC-ified 'load address'
1560
1561	bal	_mips_AES_set_encrypt_key
1562
1563	.set	noreorder
1564	move	$a0,$t0
1565	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
1566	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
1567___
1568$code.=<<___ if ($flavour =~ /nubi/i);
1569	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
1570	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
1571	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
1572	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
1573	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
1574___
1575$code.=<<___;
1576	jr	$ra
1577	$PTR_ADD $sp,$FRAMESIZE
1578.end	private_AES_set_encrypt_key
1579___
1580
1581my ($head,$tail)=($inp,$bits);
1582my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1583my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1584$code.=<<___;
1585.align	5
1586.globl	private_AES_set_decrypt_key
1587.ent	private_AES_set_decrypt_key
1588private_AES_set_decrypt_key:
1589	.frame	$sp,$FRAMESIZE,$ra
1590	.mask	$SAVED_REGS_MASK,-$SZREG
1591	.set	noreorder
1592___
1593$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
1594	.cpload	$pf
1595___
1596$code.=<<___;
1597	$PTR_SUB $sp,$FRAMESIZE
1598	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
1599	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
1600___
1601$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
1602	$REG_S	$s3,$FRAMESIZE-3*$SZREG($sp)
1603	$REG_S	$s2,$FRAMESIZE-4*$SZREG($sp)
1604	$REG_S	$s1,$FRAMESIZE-5*$SZREG($sp)
1605	$REG_S	$s0,$FRAMESIZE-6*$SZREG($sp)
1606	$REG_S	$gp,$FRAMESIZE-7*$SZREG($sp)
1607___
1608$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
1609	.cplocal	$Tbl
1610	.cpsetup	$pf,$zero,private_AES_set_decrypt_key
1611___
1612$code.=<<___;
1613	.set	reorder
1614	la	$Tbl,AES_Te4		# PIC-ified 'load address'
1615
1616	bal	_mips_AES_set_encrypt_key
1617
1618	bltz	$t0,.Ldkey_done
1619
1620	sll	$at,$cnt,4
1621	$PTR_ADD $head,$key,0
1622	$PTR_ADD $tail,$key,$at
1623.align	4
1624.Lswap:
1625	lw	$rk0,0($head)
1626	lw	$rk1,4($head)
1627	lw	$rk2,8($head)
1628	lw	$rk3,12($head)
1629	lw	$rk4,0($tail)
1630	lw	$rk5,4($tail)
1631	lw	$rk6,8($tail)
1632	lw	$rk7,12($tail)
1633	sw	$rk0,0($tail)
1634	sw	$rk1,4($tail)
1635	sw	$rk2,8($tail)
1636	sw	$rk3,12($tail)
1637	$PTR_ADD $head,16
1638	$PTR_SUB $tail,16
1639	sw	$rk4,-16($head)
1640	sw	$rk5,-12($head)
1641	sw	$rk6,-8($head)
1642	sw	$rk7,-4($head)
1643	bne	$head,$tail,.Lswap
1644
1645	lw	$tp1,16($key)		# modulo-scheduled
1646	lui	$x80808080,0x8080
1647	sub	$cnt,1
1648	or	$x80808080,0x8080
1649	sll	$cnt,2
1650	$PTR_ADD $key,16
1651	lui	$x1b1b1b1b,0x1b1b
1652	nor	$x7f7f7f7f,$zero,$x80808080
1653	or	$x1b1b1b1b,0x1b1b
1654.align	4
1655.Lmix:
1656	and	$m,$tp1,$x80808080
1657	and	$tp2,$tp1,$x7f7f7f7f
1658	srl	$tp4,$m,7
1659	addu	$tp2,$tp2		# tp2<<1
1660	subu	$m,$tp4
1661	and	$m,$x1b1b1b1b
1662	xor	$tp2,$m
1663
1664	and	$m,$tp2,$x80808080
1665	and	$tp4,$tp2,$x7f7f7f7f
1666	srl	$tp8,$m,7
1667	addu	$tp4,$tp4		# tp4<<1
1668	subu	$m,$tp8
1669	and	$m,$x1b1b1b1b
1670	xor	$tp4,$m
1671
1672	and	$m,$tp4,$x80808080
1673	and	$tp8,$tp4,$x7f7f7f7f
1674	srl	$tp9,$m,7
1675	addu	$tp8,$tp8		# tp8<<1
1676	subu	$m,$tp9
1677	and	$m,$x1b1b1b1b
1678	xor	$tp8,$m
1679
1680	xor	$tp9,$tp8,$tp1
1681	xor	$tpe,$tp8,$tp4
1682	xor	$tpb,$tp9,$tp2
1683	xor	$tpd,$tp9,$tp4
1684
1685#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1686	rotr	$tp1,$tpd,16
1687	 xor	$tpe,$tp2
1688	rotr	$tp2,$tp9,8
1689	xor	$tpe,$tp1
1690	rotr	$tp4,$tpb,24
1691	xor	$tpe,$tp2
1692	lw	$tp1,4($key)		# modulo-scheduled
1693	xor	$tpe,$tp4
1694#else
1695	_ror	$tp1,$tpd,16
1696	 xor	$tpe,$tp2
1697	_ror	$tp2,$tpd,-16
1698	xor	$tpe,$tp1
1699	_ror	$tp1,$tp9,8
1700	xor	$tpe,$tp2
1701	_ror	$tp2,$tp9,-24
1702	xor	$tpe,$tp1
1703	_ror	$tp1,$tpb,24
1704	xor	$tpe,$tp2
1705	_ror	$tp2,$tpb,-8
1706	xor	$tpe,$tp1
1707	lw	$tp1,4($key)		# modulo-scheduled
1708	xor	$tpe,$tp2
1709#endif
1710	sub	$cnt,1
1711	sw	$tpe,0($key)
1712	$PTR_ADD $key,4
1713	bnez	$cnt,.Lmix
1714
1715	li	$t0,0
1716.Ldkey_done:
1717	.set	noreorder
1718	move	$a0,$t0
1719	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
1720	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
1721___
1722$code.=<<___ if ($flavour =~ /nubi/i);
1723	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
1724	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
1725	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
1726	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
1727	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
1728___
1729$code.=<<___;
1730	jr	$ra
1731	$PTR_ADD $sp,$FRAMESIZE
1732.end	private_AES_set_decrypt_key
1733___
1734}}}
1735
1736######################################################################
1737# Tables are kept in endian-neutral manner
1738$code.=<<___;
1739.rdata
1740.align	10
1741AES_Te:
1742.byte	0xc6,0x63,0x63,0xa5,	0xf8,0x7c,0x7c,0x84	# Te0
1743.byte	0xee,0x77,0x77,0x99,	0xf6,0x7b,0x7b,0x8d
1744.byte	0xff,0xf2,0xf2,0x0d,	0xd6,0x6b,0x6b,0xbd
1745.byte	0xde,0x6f,0x6f,0xb1,	0x91,0xc5,0xc5,0x54
1746.byte	0x60,0x30,0x30,0x50,	0x02,0x01,0x01,0x03
1747.byte	0xce,0x67,0x67,0xa9,	0x56,0x2b,0x2b,0x7d
1748.byte	0xe7,0xfe,0xfe,0x19,	0xb5,0xd7,0xd7,0x62
1749.byte	0x4d,0xab,0xab,0xe6,	0xec,0x76,0x76,0x9a
1750.byte	0x8f,0xca,0xca,0x45,	0x1f,0x82,0x82,0x9d
1751.byte	0x89,0xc9,0xc9,0x40,	0xfa,0x7d,0x7d,0x87
1752.byte	0xef,0xfa,0xfa,0x15,	0xb2,0x59,0x59,0xeb
1753.byte	0x8e,0x47,0x47,0xc9,	0xfb,0xf0,0xf0,0x0b
1754.byte	0x41,0xad,0xad,0xec,	0xb3,0xd4,0xd4,0x67
1755.byte	0x5f,0xa2,0xa2,0xfd,	0x45,0xaf,0xaf,0xea
1756.byte	0x23,0x9c,0x9c,0xbf,	0x53,0xa4,0xa4,0xf7
1757.byte	0xe4,0x72,0x72,0x96,	0x9b,0xc0,0xc0,0x5b
1758.byte	0x75,0xb7,0xb7,0xc2,	0xe1,0xfd,0xfd,0x1c
1759.byte	0x3d,0x93,0x93,0xae,	0x4c,0x26,0x26,0x6a
1760.byte	0x6c,0x36,0x36,0x5a,	0x7e,0x3f,0x3f,0x41
1761.byte	0xf5,0xf7,0xf7,0x02,	0x83,0xcc,0xcc,0x4f
1762.byte	0x68,0x34,0x34,0x5c,	0x51,0xa5,0xa5,0xf4
1763.byte	0xd1,0xe5,0xe5,0x34,	0xf9,0xf1,0xf1,0x08
1764.byte	0xe2,0x71,0x71,0x93,	0xab,0xd8,0xd8,0x73
1765.byte	0x62,0x31,0x31,0x53,	0x2a,0x15,0x15,0x3f
1766.byte	0x08,0x04,0x04,0x0c,	0x95,0xc7,0xc7,0x52
1767.byte	0x46,0x23,0x23,0x65,	0x9d,0xc3,0xc3,0x5e
1768.byte	0x30,0x18,0x18,0x28,	0x37,0x96,0x96,0xa1
1769.byte	0x0a,0x05,0x05,0x0f,	0x2f,0x9a,0x9a,0xb5
1770.byte	0x0e,0x07,0x07,0x09,	0x24,0x12,0x12,0x36
1771.byte	0x1b,0x80,0x80,0x9b,	0xdf,0xe2,0xe2,0x3d
1772.byte	0xcd,0xeb,0xeb,0x26,	0x4e,0x27,0x27,0x69
1773.byte	0x7f,0xb2,0xb2,0xcd,	0xea,0x75,0x75,0x9f
1774.byte	0x12,0x09,0x09,0x1b,	0x1d,0x83,0x83,0x9e
1775.byte	0x58,0x2c,0x2c,0x74,	0x34,0x1a,0x1a,0x2e
1776.byte	0x36,0x1b,0x1b,0x2d,	0xdc,0x6e,0x6e,0xb2
1777.byte	0xb4,0x5a,0x5a,0xee,	0x5b,0xa0,0xa0,0xfb
1778.byte	0xa4,0x52,0x52,0xf6,	0x76,0x3b,0x3b,0x4d
1779.byte	0xb7,0xd6,0xd6,0x61,	0x7d,0xb3,0xb3,0xce
1780.byte	0x52,0x29,0x29,0x7b,	0xdd,0xe3,0xe3,0x3e
1781.byte	0x5e,0x2f,0x2f,0x71,	0x13,0x84,0x84,0x97
1782.byte	0xa6,0x53,0x53,0xf5,	0xb9,0xd1,0xd1,0x68
1783.byte	0x00,0x00,0x00,0x00,	0xc1,0xed,0xed,0x2c
1784.byte	0x40,0x20,0x20,0x60,	0xe3,0xfc,0xfc,0x1f
1785.byte	0x79,0xb1,0xb1,0xc8,	0xb6,0x5b,0x5b,0xed
1786.byte	0xd4,0x6a,0x6a,0xbe,	0x8d,0xcb,0xcb,0x46
1787.byte	0x67,0xbe,0xbe,0xd9,	0x72,0x39,0x39,0x4b
1788.byte	0x94,0x4a,0x4a,0xde,	0x98,0x4c,0x4c,0xd4
1789.byte	0xb0,0x58,0x58,0xe8,	0x85,0xcf,0xcf,0x4a
1790.byte	0xbb,0xd0,0xd0,0x6b,	0xc5,0xef,0xef,0x2a
1791.byte	0x4f,0xaa,0xaa,0xe5,	0xed,0xfb,0xfb,0x16
1792.byte	0x86,0x43,0x43,0xc5,	0x9a,0x4d,0x4d,0xd7
1793.byte	0x66,0x33,0x33,0x55,	0x11,0x85,0x85,0x94
1794.byte	0x8a,0x45,0x45,0xcf,	0xe9,0xf9,0xf9,0x10
1795.byte	0x04,0x02,0x02,0x06,	0xfe,0x7f,0x7f,0x81
1796.byte	0xa0,0x50,0x50,0xf0,	0x78,0x3c,0x3c,0x44
1797.byte	0x25,0x9f,0x9f,0xba,	0x4b,0xa8,0xa8,0xe3
1798.byte	0xa2,0x51,0x51,0xf3,	0x5d,0xa3,0xa3,0xfe
1799.byte	0x80,0x40,0x40,0xc0,	0x05,0x8f,0x8f,0x8a
1800.byte	0x3f,0x92,0x92,0xad,	0x21,0x9d,0x9d,0xbc
1801.byte	0x70,0x38,0x38,0x48,	0xf1,0xf5,0xf5,0x04
1802.byte	0x63,0xbc,0xbc,0xdf,	0x77,0xb6,0xb6,0xc1
1803.byte	0xaf,0xda,0xda,0x75,	0x42,0x21,0x21,0x63
1804.byte	0x20,0x10,0x10,0x30,	0xe5,0xff,0xff,0x1a
1805.byte	0xfd,0xf3,0xf3,0x0e,	0xbf,0xd2,0xd2,0x6d
1806.byte	0x81,0xcd,0xcd,0x4c,	0x18,0x0c,0x0c,0x14
1807.byte	0x26,0x13,0x13,0x35,	0xc3,0xec,0xec,0x2f
1808.byte	0xbe,0x5f,0x5f,0xe1,	0x35,0x97,0x97,0xa2
1809.byte	0x88,0x44,0x44,0xcc,	0x2e,0x17,0x17,0x39
1810.byte	0x93,0xc4,0xc4,0x57,	0x55,0xa7,0xa7,0xf2
1811.byte	0xfc,0x7e,0x7e,0x82,	0x7a,0x3d,0x3d,0x47
1812.byte	0xc8,0x64,0x64,0xac,	0xba,0x5d,0x5d,0xe7
1813.byte	0x32,0x19,0x19,0x2b,	0xe6,0x73,0x73,0x95
1814.byte	0xc0,0x60,0x60,0xa0,	0x19,0x81,0x81,0x98
1815.byte	0x9e,0x4f,0x4f,0xd1,	0xa3,0xdc,0xdc,0x7f
1816.byte	0x44,0x22,0x22,0x66,	0x54,0x2a,0x2a,0x7e
1817.byte	0x3b,0x90,0x90,0xab,	0x0b,0x88,0x88,0x83
1818.byte	0x8c,0x46,0x46,0xca,	0xc7,0xee,0xee,0x29
1819.byte	0x6b,0xb8,0xb8,0xd3,	0x28,0x14,0x14,0x3c
1820.byte	0xa7,0xde,0xde,0x79,	0xbc,0x5e,0x5e,0xe2
1821.byte	0x16,0x0b,0x0b,0x1d,	0xad,0xdb,0xdb,0x76
1822.byte	0xdb,0xe0,0xe0,0x3b,	0x64,0x32,0x32,0x56
1823.byte	0x74,0x3a,0x3a,0x4e,	0x14,0x0a,0x0a,0x1e
1824.byte	0x92,0x49,0x49,0xdb,	0x0c,0x06,0x06,0x0a
1825.byte	0x48,0x24,0x24,0x6c,	0xb8,0x5c,0x5c,0xe4
1826.byte	0x9f,0xc2,0xc2,0x5d,	0xbd,0xd3,0xd3,0x6e
1827.byte	0x43,0xac,0xac,0xef,	0xc4,0x62,0x62,0xa6
1828.byte	0x39,0x91,0x91,0xa8,	0x31,0x95,0x95,0xa4
1829.byte	0xd3,0xe4,0xe4,0x37,	0xf2,0x79,0x79,0x8b
1830.byte	0xd5,0xe7,0xe7,0x32,	0x8b,0xc8,0xc8,0x43
1831.byte	0x6e,0x37,0x37,0x59,	0xda,0x6d,0x6d,0xb7
1832.byte	0x01,0x8d,0x8d,0x8c,	0xb1,0xd5,0xd5,0x64
1833.byte	0x9c,0x4e,0x4e,0xd2,	0x49,0xa9,0xa9,0xe0
1834.byte	0xd8,0x6c,0x6c,0xb4,	0xac,0x56,0x56,0xfa
1835.byte	0xf3,0xf4,0xf4,0x07,	0xcf,0xea,0xea,0x25
1836.byte	0xca,0x65,0x65,0xaf,	0xf4,0x7a,0x7a,0x8e
1837.byte	0x47,0xae,0xae,0xe9,	0x10,0x08,0x08,0x18
1838.byte	0x6f,0xba,0xba,0xd5,	0xf0,0x78,0x78,0x88
1839.byte	0x4a,0x25,0x25,0x6f,	0x5c,0x2e,0x2e,0x72
1840.byte	0x38,0x1c,0x1c,0x24,	0x57,0xa6,0xa6,0xf1
1841.byte	0x73,0xb4,0xb4,0xc7,	0x97,0xc6,0xc6,0x51
1842.byte	0xcb,0xe8,0xe8,0x23,	0xa1,0xdd,0xdd,0x7c
1843.byte	0xe8,0x74,0x74,0x9c,	0x3e,0x1f,0x1f,0x21
1844.byte	0x96,0x4b,0x4b,0xdd,	0x61,0xbd,0xbd,0xdc
1845.byte	0x0d,0x8b,0x8b,0x86,	0x0f,0x8a,0x8a,0x85
1846.byte	0xe0,0x70,0x70,0x90,	0x7c,0x3e,0x3e,0x42
1847.byte	0x71,0xb5,0xb5,0xc4,	0xcc,0x66,0x66,0xaa
1848.byte	0x90,0x48,0x48,0xd8,	0x06,0x03,0x03,0x05
1849.byte	0xf7,0xf6,0xf6,0x01,	0x1c,0x0e,0x0e,0x12
1850.byte	0xc2,0x61,0x61,0xa3,	0x6a,0x35,0x35,0x5f
1851.byte	0xae,0x57,0x57,0xf9,	0x69,0xb9,0xb9,0xd0
1852.byte	0x17,0x86,0x86,0x91,	0x99,0xc1,0xc1,0x58
1853.byte	0x3a,0x1d,0x1d,0x27,	0x27,0x9e,0x9e,0xb9
1854.byte	0xd9,0xe1,0xe1,0x38,	0xeb,0xf8,0xf8,0x13
1855.byte	0x2b,0x98,0x98,0xb3,	0x22,0x11,0x11,0x33
1856.byte	0xd2,0x69,0x69,0xbb,	0xa9,0xd9,0xd9,0x70
1857.byte	0x07,0x8e,0x8e,0x89,	0x33,0x94,0x94,0xa7
1858.byte	0x2d,0x9b,0x9b,0xb6,	0x3c,0x1e,0x1e,0x22
1859.byte	0x15,0x87,0x87,0x92,	0xc9,0xe9,0xe9,0x20
1860.byte	0x87,0xce,0xce,0x49,	0xaa,0x55,0x55,0xff
1861.byte	0x50,0x28,0x28,0x78,	0xa5,0xdf,0xdf,0x7a
1862.byte	0x03,0x8c,0x8c,0x8f,	0x59,0xa1,0xa1,0xf8
1863.byte	0x09,0x89,0x89,0x80,	0x1a,0x0d,0x0d,0x17
1864.byte	0x65,0xbf,0xbf,0xda,	0xd7,0xe6,0xe6,0x31
1865.byte	0x84,0x42,0x42,0xc6,	0xd0,0x68,0x68,0xb8
1866.byte	0x82,0x41,0x41,0xc3,	0x29,0x99,0x99,0xb0
1867.byte	0x5a,0x2d,0x2d,0x77,	0x1e,0x0f,0x0f,0x11
1868.byte	0x7b,0xb0,0xb0,0xcb,	0xa8,0x54,0x54,0xfc
1869.byte	0x6d,0xbb,0xbb,0xd6,	0x2c,0x16,0x16,0x3a
1870
1871AES_Td:
1872.byte	0x51,0xf4,0xa7,0x50,	0x7e,0x41,0x65,0x53	# Td0
1873.byte	0x1a,0x17,0xa4,0xc3,	0x3a,0x27,0x5e,0x96
1874.byte	0x3b,0xab,0x6b,0xcb,	0x1f,0x9d,0x45,0xf1
1875.byte	0xac,0xfa,0x58,0xab,	0x4b,0xe3,0x03,0x93
1876.byte	0x20,0x30,0xfa,0x55,	0xad,0x76,0x6d,0xf6
1877.byte	0x88,0xcc,0x76,0x91,	0xf5,0x02,0x4c,0x25
1878.byte	0x4f,0xe5,0xd7,0xfc,	0xc5,0x2a,0xcb,0xd7
1879.byte	0x26,0x35,0x44,0x80,	0xb5,0x62,0xa3,0x8f
1880.byte	0xde,0xb1,0x5a,0x49,	0x25,0xba,0x1b,0x67
1881.byte	0x45,0xea,0x0e,0x98,	0x5d,0xfe,0xc0,0xe1
1882.byte	0xc3,0x2f,0x75,0x02,	0x81,0x4c,0xf0,0x12
1883.byte	0x8d,0x46,0x97,0xa3,	0x6b,0xd3,0xf9,0xc6
1884.byte	0x03,0x8f,0x5f,0xe7,	0x15,0x92,0x9c,0x95
1885.byte	0xbf,0x6d,0x7a,0xeb,	0x95,0x52,0x59,0xda
1886.byte	0xd4,0xbe,0x83,0x2d,	0x58,0x74,0x21,0xd3
1887.byte	0x49,0xe0,0x69,0x29,	0x8e,0xc9,0xc8,0x44
1888.byte	0x75,0xc2,0x89,0x6a,	0xf4,0x8e,0x79,0x78
1889.byte	0x99,0x58,0x3e,0x6b,	0x27,0xb9,0x71,0xdd
1890.byte	0xbe,0xe1,0x4f,0xb6,	0xf0,0x88,0xad,0x17
1891.byte	0xc9,0x20,0xac,0x66,	0x7d,0xce,0x3a,0xb4
1892.byte	0x63,0xdf,0x4a,0x18,	0xe5,0x1a,0x31,0x82
1893.byte	0x97,0x51,0x33,0x60,	0x62,0x53,0x7f,0x45
1894.byte	0xb1,0x64,0x77,0xe0,	0xbb,0x6b,0xae,0x84
1895.byte	0xfe,0x81,0xa0,0x1c,	0xf9,0x08,0x2b,0x94
1896.byte	0x70,0x48,0x68,0x58,	0x8f,0x45,0xfd,0x19
1897.byte	0x94,0xde,0x6c,0x87,	0x52,0x7b,0xf8,0xb7
1898.byte	0xab,0x73,0xd3,0x23,	0x72,0x4b,0x02,0xe2
1899.byte	0xe3,0x1f,0x8f,0x57,	0x66,0x55,0xab,0x2a
1900.byte	0xb2,0xeb,0x28,0x07,	0x2f,0xb5,0xc2,0x03
1901.byte	0x86,0xc5,0x7b,0x9a,	0xd3,0x37,0x08,0xa5
1902.byte	0x30,0x28,0x87,0xf2,	0x23,0xbf,0xa5,0xb2
1903.byte	0x02,0x03,0x6a,0xba,	0xed,0x16,0x82,0x5c
1904.byte	0x8a,0xcf,0x1c,0x2b,	0xa7,0x79,0xb4,0x92
1905.byte	0xf3,0x07,0xf2,0xf0,	0x4e,0x69,0xe2,0xa1
1906.byte	0x65,0xda,0xf4,0xcd,	0x06,0x05,0xbe,0xd5
1907.byte	0xd1,0x34,0x62,0x1f,	0xc4,0xa6,0xfe,0x8a
1908.byte	0x34,0x2e,0x53,0x9d,	0xa2,0xf3,0x55,0xa0
1909.byte	0x05,0x8a,0xe1,0x32,	0xa4,0xf6,0xeb,0x75
1910.byte	0x0b,0x83,0xec,0x39,	0x40,0x60,0xef,0xaa
1911.byte	0x5e,0x71,0x9f,0x06,	0xbd,0x6e,0x10,0x51
1912.byte	0x3e,0x21,0x8a,0xf9,	0x96,0xdd,0x06,0x3d
1913.byte	0xdd,0x3e,0x05,0xae,	0x4d,0xe6,0xbd,0x46
1914.byte	0x91,0x54,0x8d,0xb5,	0x71,0xc4,0x5d,0x05
1915.byte	0x04,0x06,0xd4,0x6f,	0x60,0x50,0x15,0xff
1916.byte	0x19,0x98,0xfb,0x24,	0xd6,0xbd,0xe9,0x97
1917.byte	0x89,0x40,0x43,0xcc,	0x67,0xd9,0x9e,0x77
1918.byte	0xb0,0xe8,0x42,0xbd,	0x07,0x89,0x8b,0x88
1919.byte	0xe7,0x19,0x5b,0x38,	0x79,0xc8,0xee,0xdb
1920.byte	0xa1,0x7c,0x0a,0x47,	0x7c,0x42,0x0f,0xe9
1921.byte	0xf8,0x84,0x1e,0xc9,	0x00,0x00,0x00,0x00
1922.byte	0x09,0x80,0x86,0x83,	0x32,0x2b,0xed,0x48
1923.byte	0x1e,0x11,0x70,0xac,	0x6c,0x5a,0x72,0x4e
1924.byte	0xfd,0x0e,0xff,0xfb,	0x0f,0x85,0x38,0x56
1925.byte	0x3d,0xae,0xd5,0x1e,	0x36,0x2d,0x39,0x27
1926.byte	0x0a,0x0f,0xd9,0x64,	0x68,0x5c,0xa6,0x21
1927.byte	0x9b,0x5b,0x54,0xd1,	0x24,0x36,0x2e,0x3a
1928.byte	0x0c,0x0a,0x67,0xb1,	0x93,0x57,0xe7,0x0f
1929.byte	0xb4,0xee,0x96,0xd2,	0x1b,0x9b,0x91,0x9e
1930.byte	0x80,0xc0,0xc5,0x4f,	0x61,0xdc,0x20,0xa2
1931.byte	0x5a,0x77,0x4b,0x69,	0x1c,0x12,0x1a,0x16
1932.byte	0xe2,0x93,0xba,0x0a,	0xc0,0xa0,0x2a,0xe5
1933.byte	0x3c,0x22,0xe0,0x43,	0x12,0x1b,0x17,0x1d
1934.byte	0x0e,0x09,0x0d,0x0b,	0xf2,0x8b,0xc7,0xad
1935.byte	0x2d,0xb6,0xa8,0xb9,	0x14,0x1e,0xa9,0xc8
1936.byte	0x57,0xf1,0x19,0x85,	0xaf,0x75,0x07,0x4c
1937.byte	0xee,0x99,0xdd,0xbb,	0xa3,0x7f,0x60,0xfd
1938.byte	0xf7,0x01,0x26,0x9f,	0x5c,0x72,0xf5,0xbc
1939.byte	0x44,0x66,0x3b,0xc5,	0x5b,0xfb,0x7e,0x34
1940.byte	0x8b,0x43,0x29,0x76,	0xcb,0x23,0xc6,0xdc
1941.byte	0xb6,0xed,0xfc,0x68,	0xb8,0xe4,0xf1,0x63
1942.byte	0xd7,0x31,0xdc,0xca,	0x42,0x63,0x85,0x10
1943.byte	0x13,0x97,0x22,0x40,	0x84,0xc6,0x11,0x20
1944.byte	0x85,0x4a,0x24,0x7d,	0xd2,0xbb,0x3d,0xf8
1945.byte	0xae,0xf9,0x32,0x11,	0xc7,0x29,0xa1,0x6d
1946.byte	0x1d,0x9e,0x2f,0x4b,	0xdc,0xb2,0x30,0xf3
1947.byte	0x0d,0x86,0x52,0xec,	0x77,0xc1,0xe3,0xd0
1948.byte	0x2b,0xb3,0x16,0x6c,	0xa9,0x70,0xb9,0x99
1949.byte	0x11,0x94,0x48,0xfa,	0x47,0xe9,0x64,0x22
1950.byte	0xa8,0xfc,0x8c,0xc4,	0xa0,0xf0,0x3f,0x1a
1951.byte	0x56,0x7d,0x2c,0xd8,	0x22,0x33,0x90,0xef
1952.byte	0x87,0x49,0x4e,0xc7,	0xd9,0x38,0xd1,0xc1
1953.byte	0x8c,0xca,0xa2,0xfe,	0x98,0xd4,0x0b,0x36
1954.byte	0xa6,0xf5,0x81,0xcf,	0xa5,0x7a,0xde,0x28
1955.byte	0xda,0xb7,0x8e,0x26,	0x3f,0xad,0xbf,0xa4
1956.byte	0x2c,0x3a,0x9d,0xe4,	0x50,0x78,0x92,0x0d
1957.byte	0x6a,0x5f,0xcc,0x9b,	0x54,0x7e,0x46,0x62
1958.byte	0xf6,0x8d,0x13,0xc2,	0x90,0xd8,0xb8,0xe8
1959.byte	0x2e,0x39,0xf7,0x5e,	0x82,0xc3,0xaf,0xf5
1960.byte	0x9f,0x5d,0x80,0xbe,	0x69,0xd0,0x93,0x7c
1961.byte	0x6f,0xd5,0x2d,0xa9,	0xcf,0x25,0x12,0xb3
1962.byte	0xc8,0xac,0x99,0x3b,	0x10,0x18,0x7d,0xa7
1963.byte	0xe8,0x9c,0x63,0x6e,	0xdb,0x3b,0xbb,0x7b
1964.byte	0xcd,0x26,0x78,0x09,	0x6e,0x59,0x18,0xf4
1965.byte	0xec,0x9a,0xb7,0x01,	0x83,0x4f,0x9a,0xa8
1966.byte	0xe6,0x95,0x6e,0x65,	0xaa,0xff,0xe6,0x7e
1967.byte	0x21,0xbc,0xcf,0x08,	0xef,0x15,0xe8,0xe6
1968.byte	0xba,0xe7,0x9b,0xd9,	0x4a,0x6f,0x36,0xce
1969.byte	0xea,0x9f,0x09,0xd4,	0x29,0xb0,0x7c,0xd6
1970.byte	0x31,0xa4,0xb2,0xaf,	0x2a,0x3f,0x23,0x31
1971.byte	0xc6,0xa5,0x94,0x30,	0x35,0xa2,0x66,0xc0
1972.byte	0x74,0x4e,0xbc,0x37,	0xfc,0x82,0xca,0xa6
1973.byte	0xe0,0x90,0xd0,0xb0,	0x33,0xa7,0xd8,0x15
1974.byte	0xf1,0x04,0x98,0x4a,	0x41,0xec,0xda,0xf7
1975.byte	0x7f,0xcd,0x50,0x0e,	0x17,0x91,0xf6,0x2f
1976.byte	0x76,0x4d,0xd6,0x8d,	0x43,0xef,0xb0,0x4d
1977.byte	0xcc,0xaa,0x4d,0x54,	0xe4,0x96,0x04,0xdf
1978.byte	0x9e,0xd1,0xb5,0xe3,	0x4c,0x6a,0x88,0x1b
1979.byte	0xc1,0x2c,0x1f,0xb8,	0x46,0x65,0x51,0x7f
1980.byte	0x9d,0x5e,0xea,0x04,	0x01,0x8c,0x35,0x5d
1981.byte	0xfa,0x87,0x74,0x73,	0xfb,0x0b,0x41,0x2e
1982.byte	0xb3,0x67,0x1d,0x5a,	0x92,0xdb,0xd2,0x52
1983.byte	0xe9,0x10,0x56,0x33,	0x6d,0xd6,0x47,0x13
1984.byte	0x9a,0xd7,0x61,0x8c,	0x37,0xa1,0x0c,0x7a
1985.byte	0x59,0xf8,0x14,0x8e,	0xeb,0x13,0x3c,0x89
1986.byte	0xce,0xa9,0x27,0xee,	0xb7,0x61,0xc9,0x35
1987.byte	0xe1,0x1c,0xe5,0xed,	0x7a,0x47,0xb1,0x3c
1988.byte	0x9c,0xd2,0xdf,0x59,	0x55,0xf2,0x73,0x3f
1989.byte	0x18,0x14,0xce,0x79,	0x73,0xc7,0x37,0xbf
1990.byte	0x53,0xf7,0xcd,0xea,	0x5f,0xfd,0xaa,0x5b
1991.byte	0xdf,0x3d,0x6f,0x14,	0x78,0x44,0xdb,0x86
1992.byte	0xca,0xaf,0xf3,0x81,	0xb9,0x68,0xc4,0x3e
1993.byte	0x38,0x24,0x34,0x2c,	0xc2,0xa3,0x40,0x5f
1994.byte	0x16,0x1d,0xc3,0x72,	0xbc,0xe2,0x25,0x0c
1995.byte	0x28,0x3c,0x49,0x8b,	0xff,0x0d,0x95,0x41
1996.byte	0x39,0xa8,0x01,0x71,	0x08,0x0c,0xb3,0xde
1997.byte	0xd8,0xb4,0xe4,0x9c,	0x64,0x56,0xc1,0x90
1998.byte	0x7b,0xcb,0x84,0x61,	0xd5,0x32,0xb6,0x70
1999.byte	0x48,0x6c,0x5c,0x74,	0xd0,0xb8,0x57,0x42
2000
2001.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38	# Td4
2002.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
2003.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
2004.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
2005.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
2006.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
2007.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
2008.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
2009.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
2010.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
2011.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
2012.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
2013.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
2014.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
2015.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
2016.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
2017.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
2018.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
2019.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
2020.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
2021.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
2022.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
2023.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
2024.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
2025.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
2026.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
2027.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
2028.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
2029.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
2030.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
2031.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
2032.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
2033
2034AES_Te4:
2035.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5	# Te4
2036.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
2037.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
2038.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
2039.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
2040.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
2041.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
2042.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
2043.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
2044.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
2045.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
2046.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
2047.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
2048.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
2049.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
2050.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
2051.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
2052.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
2053.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
2054.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
2055.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
2056.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
2057.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
2058.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
2059.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
2060.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
2061.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
2062.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
2063.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
2064.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
2065.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
2066.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
2067
2068.byte	0x01,0x00,0x00,0x00,	0x02,0x00,0x00,0x00	# rcon
2069.byte	0x04,0x00,0x00,0x00,	0x08,0x00,0x00,0x00
2070.byte	0x10,0x00,0x00,0x00,	0x20,0x00,0x00,0x00
2071.byte	0x40,0x00,0x00,0x00,	0x80,0x00,0x00,0x00
2072.byte	0x1B,0x00,0x00,0x00,	0x36,0x00,0x00,0x00
2073___
2074
2075foreach (split("\n",$code)) {
2076	s/\`([^\`]*)\`/eval $1/ge;
2077
2078	# made-up _instructions, _xtr, _ins, _ror and _bias, cope
2079	# with byte order dependencies...
2080	if (/^\s+_/) {
2081	    s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
2082
2083	    s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
2084		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
2085					:		eval("24-$3"))/e or
2086	    s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2087		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
2088					:		eval("24-$3"))/e or
2089	    s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2090		sprintf("ins\t$1,$2,%d,8",$big_endian ?	eval($3)
2091					:		eval("24-$3"))/e or
2092	    s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
2093		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
2094					:		eval("$3*-1"))/e or
2095	    s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2096		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
2097					:		eval("($3-16)&31"))/e;
2098
2099	    s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
2100		sprintf("sll\t$1,$2,$3")/e				or
2101	    s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
2102		sprintf("and\t$1,$2,0xff")/e				or
2103	    s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
2104	}
2105
2106	# convert lwl/lwr and swr/swl to little-endian order
2107	if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
2108	    s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
2109		sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e	or
2110	    s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
2111		sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
2112	}
2113
2114	if (!$big_endian) {
2115	    s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
2116	    s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
2117	}
2118
2119	print $_,"\n";
2120}
2121
2122close STDOUT;
2123