1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for MIPS
11
12# October 2010
13#
14# Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
15# spends ~68 cycles per byte processed with 128-bit key. This is ~16%
16# faster than gcc-generated code, which is not very impressive. But
17# recall that compressed S-box requires extra processing, namely
18# additional rotations. Rotations are implemented with lwl/lwr pairs,
19# which is normally used for loading unaligned data. Another cool
20# thing about this module is its endian neutrality, which means that
21# it processes data without ever changing byte order...
22
23######################################################################
24# There is a number of MIPS ABI in use, O32 and N32/64 are most
25# widely used. Then there is a new contender: NUBI. It appears that if
26# one picks the latter, it's possible to arrange code in ABI neutral
27# manner. Therefore let's stick to NUBI register layout:
28#
29($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
30($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
31($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
32($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
33#
34# The return value is placed in $a0. Following coding rules facilitate
35# interoperability:
36#
37# - never ever touch $tp, "thread pointer", former $gp;
38# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
39#   old code];
40# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
41#
42# For reference here is register layout for N32/64 MIPS ABIs:
43#
44# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
45# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
46# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
47# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
48# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
49#
50$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
51
52if ($flavour =~ /64/i) {
53	$LA="dla";
54} else {
55	$LA="la";
56}
57
58if ($flavour =~ /64|n32/i) {
59	$PTR_ADD="dadd";	# incidentally works even on n32
60	$PTR_SUB="dsub";	# incidentally works even on n32
61	$REG_S="sd";
62	$REG_L="ld";
63	$PTR_SLL="dsll";	# incidentally works even on n32
64	$SZREG=8;
65} else {
66	$PTR_ADD="add";
67	$PTR_SUB="sub";
68	$REG_S="sw";
69	$REG_L="lw";
70	$PTR_SLL="sll";
71	$SZREG=4;
72}
73$pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
74#
75# <appro@openssl.org>
76#
77######################################################################
78
79$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
80
81for (@ARGV) {	$output=$_ if (/^\w[\w\-]*\.\w+$/);	}
82open STDOUT,">$output";
83
84if (!defined($big_endian))
85{    $big_endian=(unpack('L',pack('N',1))==1);   }
86
87while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
88open STDOUT,">$output";
89
90my ($MSB,$LSB)=(0,3);	# automatically converted to little-endian
91
92$code.=<<___;
93.text
94#if !defined(__vxworks) || defined(__pic__)
95.option	pic2
96#endif
97.set	noat
98___
99
100{{{
101my $FRAMESIZE=16*$SZREG;
102my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
103
104my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
105my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
106my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
107my ($key0,$cnt)=($gp,$fp);
108
109# instruction ordering is "stolen" from output from MIPSpro assembler
110# invoked with -mips3 -O3 arguments...
111$code.=<<___;
112.align	5
113.ent	_mips_AES_encrypt
114_mips_AES_encrypt:
115	.frame	$sp,0,$ra
116	.set	reorder
117	lw	$t0,0($key)
118	lw	$t1,4($key)
119	lw	$t2,8($key)
120	lw	$t3,12($key)
121	lw	$cnt,240($key)
122	$PTR_ADD $key0,$key,16
123
124	xor	$s0,$t0
125	xor	$s1,$t1
126	xor	$s2,$t2
127	xor	$s3,$t3
128
129	sub	$cnt,1
130	_xtr	$i0,$s1,16-2
131.Loop_enc:
132	_xtr	$i1,$s2,16-2
133	_xtr	$i2,$s3,16-2
134	_xtr	$i3,$s0,16-2
135	and	$i0,0x3fc
136	and	$i1,0x3fc
137	and	$i2,0x3fc
138	and	$i3,0x3fc
139	$PTR_ADD $i0,$Tbl
140	$PTR_ADD $i1,$Tbl
141	$PTR_ADD $i2,$Tbl
142	$PTR_ADD $i3,$Tbl
143	lwl	$t0,3($i0)		# Te1[s1>>16]
144	lwl	$t1,3($i1)		# Te1[s2>>16]
145	lwl	$t2,3($i2)		# Te1[s3>>16]
146	lwl	$t3,3($i3)		# Te1[s0>>16]
147	lwr	$t0,2($i0)		# Te1[s1>>16]
148	lwr	$t1,2($i1)		# Te1[s2>>16]
149	lwr	$t2,2($i2)		# Te1[s3>>16]
150	lwr	$t3,2($i3)		# Te1[s0>>16]
151
152	_xtr	$i0,$s2,8-2
153	_xtr	$i1,$s3,8-2
154	_xtr	$i2,$s0,8-2
155	_xtr	$i3,$s1,8-2
156	and	$i0,0x3fc
157	and	$i1,0x3fc
158	and	$i2,0x3fc
159	and	$i3,0x3fc
160	$PTR_ADD $i0,$Tbl
161	$PTR_ADD $i1,$Tbl
162	$PTR_ADD $i2,$Tbl
163	$PTR_ADD $i3,$Tbl
164	lwl	$t4,2($i0)		# Te2[s2>>8]
165	lwl	$t5,2($i1)		# Te2[s3>>8]
166	lwl	$t6,2($i2)		# Te2[s0>>8]
167	lwl	$t7,2($i3)		# Te2[s1>>8]
168	lwr	$t4,1($i0)		# Te2[s2>>8]
169	lwr	$t5,1($i1)		# Te2[s3>>8]
170	lwr	$t6,1($i2)		# Te2[s0>>8]
171	lwr	$t7,1($i3)		# Te2[s1>>8]
172
173	_xtr	$i0,$s3,0-2
174	_xtr	$i1,$s0,0-2
175	_xtr	$i2,$s1,0-2
176	_xtr	$i3,$s2,0-2
177	and	$i0,0x3fc
178	and	$i1,0x3fc
179	and	$i2,0x3fc
180	and	$i3,0x3fc
181	$PTR_ADD $i0,$Tbl
182	$PTR_ADD $i1,$Tbl
183	$PTR_ADD $i2,$Tbl
184	$PTR_ADD $i3,$Tbl
185	lwl	$t8,1($i0)		# Te3[s3]
186	lwl	$t9,1($i1)		# Te3[s0]
187	lwl	$t10,1($i2)		# Te3[s1]
188	lwl	$t11,1($i3)		# Te3[s2]
189	lwr	$t8,0($i0)		# Te3[s3]
190	lwr	$t9,0($i1)		# Te3[s0]
191	lwr	$t10,0($i2)		# Te3[s1]
192	lwr	$t11,0($i3)		# Te3[s2]
193
194	_xtr	$i0,$s0,24-2
195	_xtr	$i1,$s1,24-2
196	_xtr	$i2,$s2,24-2
197	_xtr	$i3,$s3,24-2
198	and	$i0,0x3fc
199	and	$i1,0x3fc
200	and	$i2,0x3fc
201	and	$i3,0x3fc
202	$PTR_ADD $i0,$Tbl
203	$PTR_ADD $i1,$Tbl
204	$PTR_ADD $i2,$Tbl
205	$PTR_ADD $i3,$Tbl
206	xor	$t0,$t4
207	xor	$t1,$t5
208	xor	$t2,$t6
209	xor	$t3,$t7
210	lw	$t4,0($i0)		# Te0[s0>>24]
211	lw	$t5,0($i1)		# Te0[s1>>24]
212	lw	$t6,0($i2)		# Te0[s2>>24]
213	lw	$t7,0($i3)		# Te0[s3>>24]
214
215	lw	$s0,0($key0)
216	lw	$s1,4($key0)
217	lw	$s2,8($key0)
218	lw	$s3,12($key0)
219
220	xor	$t0,$t8
221	xor	$t1,$t9
222	xor	$t2,$t10
223	xor	$t3,$t11
224
225	xor	$t0,$t4
226	xor	$t1,$t5
227	xor	$t2,$t6
228	xor	$t3,$t7
229
230	sub	$cnt,1
231	$PTR_ADD $key0,16
232	xor	$s0,$t0
233	xor	$s1,$t1
234	xor	$s2,$t2
235	xor	$s3,$t3
236	.set	noreorder
237	bnez	$cnt,.Loop_enc
238	_xtr	$i0,$s1,16-2
239
240	.set	reorder
241	_xtr	$i1,$s2,16-2
242	_xtr	$i2,$s3,16-2
243	_xtr	$i3,$s0,16-2
244	and	$i0,0x3fc
245	and	$i1,0x3fc
246	and	$i2,0x3fc
247	and	$i3,0x3fc
248	$PTR_ADD $i0,$Tbl
249	$PTR_ADD $i1,$Tbl
250	$PTR_ADD $i2,$Tbl
251	$PTR_ADD $i3,$Tbl
252	lbu	$t0,2($i0)		# Te4[s1>>16]
253	lbu	$t1,2($i1)		# Te4[s2>>16]
254	lbu	$t2,2($i2)		# Te4[s3>>16]
255	lbu	$t3,2($i3)		# Te4[s0>>16]
256
257	_xtr	$i0,$s2,8-2
258	_xtr	$i1,$s3,8-2
259	_xtr	$i2,$s0,8-2
260	_xtr	$i3,$s1,8-2
261	and	$i0,0x3fc
262	and	$i1,0x3fc
263	and	$i2,0x3fc
264	and	$i3,0x3fc
265	$PTR_ADD $i0,$Tbl
266	$PTR_ADD $i1,$Tbl
267	$PTR_ADD $i2,$Tbl
268	$PTR_ADD $i3,$Tbl
269	lbu	$t4,2($i0)		# Te4[s2>>8]
270	lbu	$t5,2($i1)		# Te4[s3>>8]
271	lbu	$t6,2($i2)		# Te4[s0>>8]
272	lbu	$t7,2($i3)		# Te4[s1>>8]
273
274	_xtr	$i0,$s0,24-2
275	_xtr	$i1,$s1,24-2
276	_xtr	$i2,$s2,24-2
277	_xtr	$i3,$s3,24-2
278	and	$i0,0x3fc
279	and	$i1,0x3fc
280	and	$i2,0x3fc
281	and	$i3,0x3fc
282	$PTR_ADD $i0,$Tbl
283	$PTR_ADD $i1,$Tbl
284	$PTR_ADD $i2,$Tbl
285	$PTR_ADD $i3,$Tbl
286	lbu	$t8,2($i0)		# Te4[s0>>24]
287	lbu	$t9,2($i1)		# Te4[s1>>24]
288	lbu	$t10,2($i2)		# Te4[s2>>24]
289	lbu	$t11,2($i3)		# Te4[s3>>24]
290
291	_xtr	$i0,$s3,0-2
292	_xtr	$i1,$s0,0-2
293	_xtr	$i2,$s1,0-2
294	_xtr	$i3,$s2,0-2
295	and	$i0,0x3fc
296	and	$i1,0x3fc
297	and	$i2,0x3fc
298	and	$i3,0x3fc
299
300	_ins	$t0,16
301	_ins	$t1,16
302	_ins	$t2,16
303	_ins	$t3,16
304
305	_ins	$t4,8
306	_ins	$t5,8
307	_ins	$t6,8
308	_ins	$t7,8
309
310	xor	$t0,$t4
311	xor	$t1,$t5
312	xor	$t2,$t6
313	xor	$t3,$t7
314
315	$PTR_ADD $i0,$Tbl
316	$PTR_ADD $i1,$Tbl
317	$PTR_ADD $i2,$Tbl
318	$PTR_ADD $i3,$Tbl
319	lbu	$t4,2($i0)		# Te4[s3]
320	lbu	$t5,2($i1)		# Te4[s0]
321	lbu	$t6,2($i2)		# Te4[s1]
322	lbu	$t7,2($i3)		# Te4[s2]
323
324	_ins	$t8,24
325	_ins	$t9,24
326	_ins	$t10,24
327	_ins	$t11,24
328
329	lw	$s0,0($key0)
330	lw	$s1,4($key0)
331	lw	$s2,8($key0)
332	lw	$s3,12($key0)
333
334	xor	$t0,$t8
335	xor	$t1,$t9
336	xor	$t2,$t10
337	xor	$t3,$t11
338
339	_ins	$t4,0
340	_ins	$t5,0
341	_ins	$t6,0
342	_ins	$t7,0
343
344	xor	$t0,$t4
345	xor	$t1,$t5
346	xor	$t2,$t6
347	xor	$t3,$t7
348
349	xor	$s0,$t0
350	xor	$s1,$t1
351	xor	$s2,$t2
352	xor	$s3,$t3
353
354	jr	$ra
355.end	_mips_AES_encrypt
356
357.align	5
358.globl	aes_encrypt_internal
359.ent	aes_encrypt_internal
360aes_encrypt_internal:
361	.frame	$sp,$FRAMESIZE,$ra
362	.mask	$SAVED_REGS_MASK,-$SZREG
363	.set	noreorder
364___
365$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
366	.cpload	$pf
367___
368$code.=<<___;
369	$PTR_SUB $sp,$FRAMESIZE
370	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
371	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
372	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
373	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
374	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
375	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
376	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
377	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
378	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
379	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
380___
381$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
382	$REG_S	\$15,$FRAMESIZE-11*$SZREG($sp)
383	$REG_S	\$14,$FRAMESIZE-12*$SZREG($sp)
384	$REG_S	\$13,$FRAMESIZE-13*$SZREG($sp)
385	$REG_S	\$12,$FRAMESIZE-14*$SZREG($sp)
386	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
387___
388$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
389	.cplocal	$Tbl
390	.cpsetup	$pf,$zero,aes_encrypt_internal
391___
392$code.=<<___;
393	.set	reorder
394	$LA	$Tbl,AES_Te		# PIC-ified 'load address'
395
396	lwl	$s0,0+$MSB($inp)
397	lwl	$s1,4+$MSB($inp)
398	lwl	$s2,8+$MSB($inp)
399	lwl	$s3,12+$MSB($inp)
400	lwr	$s0,0+$LSB($inp)
401	lwr	$s1,4+$LSB($inp)
402	lwr	$s2,8+$LSB($inp)
403	lwr	$s3,12+$LSB($inp)
404
405	bal	_mips_AES_encrypt
406
407	swr	$s0,0+$LSB($out)
408	swr	$s1,4+$LSB($out)
409	swr	$s2,8+$LSB($out)
410	swr	$s3,12+$LSB($out)
411	swl	$s0,0+$MSB($out)
412	swl	$s1,4+$MSB($out)
413	swl	$s2,8+$MSB($out)
414	swl	$s3,12+$MSB($out)
415
416	.set	noreorder
417	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
418	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
419	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
420	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
421	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
422	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
423	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
424	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
425	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
426	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
427___
428$code.=<<___ if ($flavour =~ /nubi/i);
429	$REG_L	\$15,$FRAMESIZE-11*$SZREG($sp)
430	$REG_L	\$14,$FRAMESIZE-12*$SZREG($sp)
431	$REG_L	\$13,$FRAMESIZE-13*$SZREG($sp)
432	$REG_L	\$12,$FRAMESIZE-14*$SZREG($sp)
433	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
434___
435$code.=<<___;
436	jr	$ra
437	$PTR_ADD $sp,$FRAMESIZE
438.end	aes_encrypt_internal
439___
440
441$code.=<<___;
442.align	5
443.ent	_mips_AES_decrypt
444_mips_AES_decrypt:
445	.frame	$sp,0,$ra
446	.set	reorder
447	lw	$t0,0($key)
448	lw	$t1,4($key)
449	lw	$t2,8($key)
450	lw	$t3,12($key)
451	lw	$cnt,240($key)
452	$PTR_ADD $key0,$key,16
453
454	xor	$s0,$t0
455	xor	$s1,$t1
456	xor	$s2,$t2
457	xor	$s3,$t3
458
459	sub	$cnt,1
460	_xtr	$i0,$s3,16-2
461.Loop_dec:
462	_xtr	$i1,$s0,16-2
463	_xtr	$i2,$s1,16-2
464	_xtr	$i3,$s2,16-2
465	and	$i0,0x3fc
466	and	$i1,0x3fc
467	and	$i2,0x3fc
468	and	$i3,0x3fc
469	$PTR_ADD $i0,$Tbl
470	$PTR_ADD $i1,$Tbl
471	$PTR_ADD $i2,$Tbl
472	$PTR_ADD $i3,$Tbl
473	lwl	$t0,3($i0)		# Td1[s3>>16]
474	lwl	$t1,3($i1)		# Td1[s0>>16]
475	lwl	$t2,3($i2)		# Td1[s1>>16]
476	lwl	$t3,3($i3)		# Td1[s2>>16]
477	lwr	$t0,2($i0)		# Td1[s3>>16]
478	lwr	$t1,2($i1)		# Td1[s0>>16]
479	lwr	$t2,2($i2)		# Td1[s1>>16]
480	lwr	$t3,2($i3)		# Td1[s2>>16]
481
482	_xtr	$i0,$s2,8-2
483	_xtr	$i1,$s3,8-2
484	_xtr	$i2,$s0,8-2
485	_xtr	$i3,$s1,8-2
486	and	$i0,0x3fc
487	and	$i1,0x3fc
488	and	$i2,0x3fc
489	and	$i3,0x3fc
490	$PTR_ADD $i0,$Tbl
491	$PTR_ADD $i1,$Tbl
492	$PTR_ADD $i2,$Tbl
493	$PTR_ADD $i3,$Tbl
494	lwl	$t4,2($i0)		# Td2[s2>>8]
495	lwl	$t5,2($i1)		# Td2[s3>>8]
496	lwl	$t6,2($i2)		# Td2[s0>>8]
497	lwl	$t7,2($i3)		# Td2[s1>>8]
498	lwr	$t4,1($i0)		# Td2[s2>>8]
499	lwr	$t5,1($i1)		# Td2[s3>>8]
500	lwr	$t6,1($i2)		# Td2[s0>>8]
501	lwr	$t7,1($i3)		# Td2[s1>>8]
502
503	_xtr	$i0,$s1,0-2
504	_xtr	$i1,$s2,0-2
505	_xtr	$i2,$s3,0-2
506	_xtr	$i3,$s0,0-2
507	and	$i0,0x3fc
508	and	$i1,0x3fc
509	and	$i2,0x3fc
510	and	$i3,0x3fc
511	$PTR_ADD $i0,$Tbl
512	$PTR_ADD $i1,$Tbl
513	$PTR_ADD $i2,$Tbl
514	$PTR_ADD $i3,$Tbl
515	lwl	$t8,1($i0)		# Td3[s1]
516	lwl	$t9,1($i1)		# Td3[s2]
517	lwl	$t10,1($i2)		# Td3[s3]
518	lwl	$t11,1($i3)		# Td3[s0]
519	lwr	$t8,0($i0)		# Td3[s1]
520	lwr	$t9,0($i1)		# Td3[s2]
521	lwr	$t10,0($i2)		# Td3[s3]
522	lwr	$t11,0($i3)		# Td3[s0]
523
524	_xtr	$i0,$s0,24-2
525	_xtr	$i1,$s1,24-2
526	_xtr	$i2,$s2,24-2
527	_xtr	$i3,$s3,24-2
528	and	$i0,0x3fc
529	and	$i1,0x3fc
530	and	$i2,0x3fc
531	and	$i3,0x3fc
532	$PTR_ADD $i0,$Tbl
533	$PTR_ADD $i1,$Tbl
534	$PTR_ADD $i2,$Tbl
535	$PTR_ADD $i3,$Tbl
536
537	xor	$t0,$t4
538	xor	$t1,$t5
539	xor	$t2,$t6
540	xor	$t3,$t7
541
542
543	lw	$t4,0($i0)		# Td0[s0>>24]
544	lw	$t5,0($i1)		# Td0[s1>>24]
545	lw	$t6,0($i2)		# Td0[s2>>24]
546	lw	$t7,0($i3)		# Td0[s3>>24]
547
548	lw	$s0,0($key0)
549	lw	$s1,4($key0)
550	lw	$s2,8($key0)
551	lw	$s3,12($key0)
552
553	xor	$t0,$t8
554	xor	$t1,$t9
555	xor	$t2,$t10
556	xor	$t3,$t11
557
558	xor	$t0,$t4
559	xor	$t1,$t5
560	xor	$t2,$t6
561	xor	$t3,$t7
562
563	sub	$cnt,1
564	$PTR_ADD $key0,16
565	xor	$s0,$t0
566	xor	$s1,$t1
567	xor	$s2,$t2
568	xor	$s3,$t3
569	.set	noreorder
570	bnez	$cnt,.Loop_dec
571	_xtr	$i0,$s3,16-2
572
573	.set	reorder
574	lw	$t4,1024($Tbl)		# prefetch Td4
575	lw	$t5,1024+32($Tbl)
576	lw	$t6,1024+64($Tbl)
577	lw	$t7,1024+96($Tbl)
578	lw	$t8,1024+128($Tbl)
579	lw	$t9,1024+160($Tbl)
580	lw	$t10,1024+192($Tbl)
581	lw	$t11,1024+224($Tbl)
582
583	_xtr	$i0,$s3,16
584	_xtr	$i1,$s0,16
585	_xtr	$i2,$s1,16
586	_xtr	$i3,$s2,16
587	and	$i0,0xff
588	and	$i1,0xff
589	and	$i2,0xff
590	and	$i3,0xff
591	$PTR_ADD $i0,$Tbl
592	$PTR_ADD $i1,$Tbl
593	$PTR_ADD $i2,$Tbl
594	$PTR_ADD $i3,$Tbl
595	lbu	$t0,1024($i0)		# Td4[s3>>16]
596	lbu	$t1,1024($i1)		# Td4[s0>>16]
597	lbu	$t2,1024($i2)		# Td4[s1>>16]
598	lbu	$t3,1024($i3)		# Td4[s2>>16]
599
600	_xtr	$i0,$s2,8
601	_xtr	$i1,$s3,8
602	_xtr	$i2,$s0,8
603	_xtr	$i3,$s1,8
604	and	$i0,0xff
605	and	$i1,0xff
606	and	$i2,0xff
607	and	$i3,0xff
608	$PTR_ADD $i0,$Tbl
609	$PTR_ADD $i1,$Tbl
610	$PTR_ADD $i2,$Tbl
611	$PTR_ADD $i3,$Tbl
612	lbu	$t4,1024($i0)		# Td4[s2>>8]
613	lbu	$t5,1024($i1)		# Td4[s3>>8]
614	lbu	$t6,1024($i2)		# Td4[s0>>8]
615	lbu	$t7,1024($i3)		# Td4[s1>>8]
616
617	_xtr	$i0,$s0,24
618	_xtr	$i1,$s1,24
619	_xtr	$i2,$s2,24
620	_xtr	$i3,$s3,24
621	$PTR_ADD $i0,$Tbl
622	$PTR_ADD $i1,$Tbl
623	$PTR_ADD $i2,$Tbl
624	$PTR_ADD $i3,$Tbl
625	lbu	$t8,1024($i0)		# Td4[s0>>24]
626	lbu	$t9,1024($i1)		# Td4[s1>>24]
627	lbu	$t10,1024($i2)		# Td4[s2>>24]
628	lbu	$t11,1024($i3)		# Td4[s3>>24]
629
630	_xtr	$i0,$s1,0
631	_xtr	$i1,$s2,0
632	_xtr	$i2,$s3,0
633	_xtr	$i3,$s0,0
634
635	_ins	$t0,16
636	_ins	$t1,16
637	_ins	$t2,16
638	_ins	$t3,16
639
640	_ins	$t4,8
641	_ins	$t5,8
642	_ins	$t6,8
643	_ins	$t7,8
644
645	xor	$t0,$t4
646	xor	$t1,$t5
647	xor	$t2,$t6
648	xor	$t3,$t7
649
650	$PTR_ADD $i0,$Tbl
651	$PTR_ADD $i1,$Tbl
652	$PTR_ADD $i2,$Tbl
653	$PTR_ADD $i3,$Tbl
654	lbu	$t4,1024($i0)		# Td4[s1]
655	lbu	$t5,1024($i1)		# Td4[s2]
656	lbu	$t6,1024($i2)		# Td4[s3]
657	lbu	$t7,1024($i3)		# Td4[s0]
658
659	_ins	$t8,24
660	_ins	$t9,24
661	_ins	$t10,24
662	_ins	$t11,24
663
664	lw	$s0,0($key0)
665	lw	$s1,4($key0)
666	lw	$s2,8($key0)
667	lw	$s3,12($key0)
668
669	_ins	$t4,0
670	_ins	$t5,0
671	_ins	$t6,0
672	_ins	$t7,0
673
674
675	xor	$t0,$t8
676	xor	$t1,$t9
677	xor	$t2,$t10
678	xor	$t3,$t11
679
680	xor	$t0,$t4
681	xor	$t1,$t5
682	xor	$t2,$t6
683	xor	$t3,$t7
684
685	xor	$s0,$t0
686	xor	$s1,$t1
687	xor	$s2,$t2
688	xor	$s3,$t3
689
690	jr	$ra
691.end	_mips_AES_decrypt
692
693.align	5
694.globl	aes_decrypt_internal
695.ent	aes_decrypt_internal
696aes_decrypt_internal:
697	.frame	$sp,$FRAMESIZE,$ra
698	.mask	$SAVED_REGS_MASK,-$SZREG
699	.set	noreorder
700___
701$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
702	.cpload	$pf
703___
704$code.=<<___;
705	$PTR_SUB $sp,$FRAMESIZE
706	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
707	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
708	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
709	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
710	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
711	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
712	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
713	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
714	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
715	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
716___
717$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
718	$REG_S	\$15,$FRAMESIZE-11*$SZREG($sp)
719	$REG_S	\$14,$FRAMESIZE-12*$SZREG($sp)
720	$REG_S	\$13,$FRAMESIZE-13*$SZREG($sp)
721	$REG_S	\$12,$FRAMESIZE-14*$SZREG($sp)
722	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
723___
724$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
725	.cplocal	$Tbl
726	.cpsetup	$pf,$zero,aes_decrypt_internal
727___
728$code.=<<___;
729	.set	reorder
730	$LA	$Tbl,AES_Td		# PIC-ified 'load address'
731
732	lwl	$s0,0+$MSB($inp)
733	lwl	$s1,4+$MSB($inp)
734	lwl	$s2,8+$MSB($inp)
735	lwl	$s3,12+$MSB($inp)
736	lwr	$s0,0+$LSB($inp)
737	lwr	$s1,4+$LSB($inp)
738	lwr	$s2,8+$LSB($inp)
739	lwr	$s3,12+$LSB($inp)
740
741	bal	_mips_AES_decrypt
742
743	swr	$s0,0+$LSB($out)
744	swr	$s1,4+$LSB($out)
745	swr	$s2,8+$LSB($out)
746	swr	$s3,12+$LSB($out)
747	swl	$s0,0+$MSB($out)
748	swl	$s1,4+$MSB($out)
749	swl	$s2,8+$MSB($out)
750	swl	$s3,12+$MSB($out)
751
752	.set	noreorder
753	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
754	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
755	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
756	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
757	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
758	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
759	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
760	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
761	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
762	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
763___
764$code.=<<___ if ($flavour =~ /nubi/i);
765	$REG_L	\$15,$FRAMESIZE-11*$SZREG($sp)
766	$REG_L	\$14,$FRAMESIZE-12*$SZREG($sp)
767	$REG_L	\$13,$FRAMESIZE-13*$SZREG($sp)
768	$REG_L	\$12,$FRAMESIZE-14*$SZREG($sp)
769	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
770___
771$code.=<<___;
772	jr	$ra
773	$PTR_ADD $sp,$FRAMESIZE
774.end	aes_decrypt_internal
775___
776}}}
777
778{{{
779my $FRAMESIZE=8*$SZREG;
780my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
781
782my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
783my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
784my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
785my ($rcon,$cnt)=($gp,$fp);
786
787$code.=<<___;
788.align	5
789.ent	_mips_AES_set_encrypt_key
790_mips_AES_set_encrypt_key:
791	.frame	$sp,0,$ra
792	.set	noreorder
793	beqz	$inp,.Lekey_done
794	li	$t0,-1
795	beqz	$key,.Lekey_done
796	$PTR_ADD $rcon,$Tbl,1024+256
797
798	.set	reorder
799	lwl	$rk0,0+$MSB($inp)	# load 128 bits
800	lwl	$rk1,4+$MSB($inp)
801	lwl	$rk2,8+$MSB($inp)
802	lwl	$rk3,12+$MSB($inp)
803	li	$at,128
804	lwr	$rk0,0+$LSB($inp)
805	lwr	$rk1,4+$LSB($inp)
806	lwr	$rk2,8+$LSB($inp)
807	lwr	$rk3,12+$LSB($inp)
808	.set	noreorder
809	beq	$bits,$at,.L128bits
810	li	$cnt,10
811
812	.set	reorder
813	lwl	$rk4,16+$MSB($inp)	# load 192 bits
814	lwl	$rk5,20+$MSB($inp)
815	li	$at,192
816	lwr	$rk4,16+$LSB($inp)
817	lwr	$rk5,20+$LSB($inp)
818	.set	noreorder
819	beq	$bits,$at,.L192bits
820	li	$cnt,8
821
822	.set	reorder
823	lwl	$rk6,24+$MSB($inp)	# load 256 bits
824	lwl	$rk7,28+$MSB($inp)
825	li	$at,256
826	lwr	$rk6,24+$LSB($inp)
827	lwr	$rk7,28+$LSB($inp)
828	.set	noreorder
829	beq	$bits,$at,.L256bits
830	li	$cnt,7
831
832	b	.Lekey_done
833	li	$t0,-2
834
835.align	4
836.L128bits:
837	.set	reorder
838	srl	$i0,$rk3,16
839	srl	$i1,$rk3,8
840	and	$i0,0xff
841	and	$i1,0xff
842	and	$i2,$rk3,0xff
843	srl	$i3,$rk3,24
844	$PTR_ADD $i0,$Tbl
845	$PTR_ADD $i1,$Tbl
846	$PTR_ADD $i2,$Tbl
847	$PTR_ADD $i3,$Tbl
848	lbu	$i0,1024($i0)
849	lbu	$i1,1024($i1)
850	lbu	$i2,1024($i2)
851	lbu	$i3,1024($i3)
852
853	sw	$rk0,0($key)
854	sw	$rk1,4($key)
855	sw	$rk2,8($key)
856	sw	$rk3,12($key)
857	sub	$cnt,1
858	$PTR_ADD $key,16
859
860	_bias	$i0,24
861	_bias	$i1,16
862	_bias	$i2,8
863	_bias	$i3,0
864
865	xor	$rk0,$i0
866	lw	$i0,0($rcon)
867	xor	$rk0,$i1
868	xor	$rk0,$i2
869	xor	$rk0,$i3
870	xor	$rk0,$i0
871
872	xor	$rk1,$rk0
873	xor	$rk2,$rk1
874	xor	$rk3,$rk2
875
876	.set	noreorder
877	bnez	$cnt,.L128bits
878	$PTR_ADD $rcon,4
879
880	sw	$rk0,0($key)
881	sw	$rk1,4($key)
882	sw	$rk2,8($key)
883	li	$cnt,10
884	sw	$rk3,12($key)
885	li	$t0,0
886	sw	$cnt,80($key)
887	b	.Lekey_done
888	$PTR_SUB $key,10*16
889
890.align	4
891.L192bits:
892	.set	reorder
893	srl	$i0,$rk5,16
894	srl	$i1,$rk5,8
895	and	$i0,0xff
896	and	$i1,0xff
897	and	$i2,$rk5,0xff
898	srl	$i3,$rk5,24
899	$PTR_ADD $i0,$Tbl
900	$PTR_ADD $i1,$Tbl
901	$PTR_ADD $i2,$Tbl
902	$PTR_ADD $i3,$Tbl
903	lbu	$i0,1024($i0)
904	lbu	$i1,1024($i1)
905	lbu	$i2,1024($i2)
906	lbu	$i3,1024($i3)
907
908	sw	$rk0,0($key)
909	sw	$rk1,4($key)
910	sw	$rk2,8($key)
911	sw	$rk3,12($key)
912	sw	$rk4,16($key)
913	sw	$rk5,20($key)
914	sub	$cnt,1
915	$PTR_ADD $key,24
916
917	_bias	$i0,24
918	_bias	$i1,16
919	_bias	$i2,8
920	_bias	$i3,0
921
922	xor	$rk0,$i0
923	lw	$i0,0($rcon)
924	xor	$rk0,$i1
925	xor	$rk0,$i2
926	xor	$rk0,$i3
927	xor	$rk0,$i0
928
929	xor	$rk1,$rk0
930	xor	$rk2,$rk1
931	xor	$rk3,$rk2
932	xor	$rk4,$rk3
933	xor	$rk5,$rk4
934
935	.set	noreorder
936	bnez	$cnt,.L192bits
937	$PTR_ADD $rcon,4
938
939	sw	$rk0,0($key)
940	sw	$rk1,4($key)
941	sw	$rk2,8($key)
942	li	$cnt,12
943	sw	$rk3,12($key)
944	li	$t0,0
945	sw	$cnt,48($key)
946	b	.Lekey_done
947	$PTR_SUB $key,12*16
948
949.align	4
950.L256bits:
951	.set	reorder
952	srl	$i0,$rk7,16
953	srl	$i1,$rk7,8
954	and	$i0,0xff
955	and	$i1,0xff
956	and	$i2,$rk7,0xff
957	srl	$i3,$rk7,24
958	$PTR_ADD $i0,$Tbl
959	$PTR_ADD $i1,$Tbl
960	$PTR_ADD $i2,$Tbl
961	$PTR_ADD $i3,$Tbl
962	lbu	$i0,1024($i0)
963	lbu	$i1,1024($i1)
964	lbu	$i2,1024($i2)
965	lbu	$i3,1024($i3)
966
967	sw	$rk0,0($key)
968	sw	$rk1,4($key)
969	sw	$rk2,8($key)
970	sw	$rk3,12($key)
971	sw	$rk4,16($key)
972	sw	$rk5,20($key)
973	sw	$rk6,24($key)
974	sw	$rk7,28($key)
975	sub	$cnt,1
976
977	_bias	$i0,24
978	_bias	$i1,16
979	_bias	$i2,8
980	_bias	$i3,0
981
982	xor	$rk0,$i0
983	lw	$i0,0($rcon)
984	xor	$rk0,$i1
985	xor	$rk0,$i2
986	xor	$rk0,$i3
987	xor	$rk0,$i0
988
989	xor	$rk1,$rk0
990	xor	$rk2,$rk1
991	xor	$rk3,$rk2
992	beqz	$cnt,.L256bits_done
993
994	srl	$i0,$rk3,24
995	srl	$i1,$rk3,16
996	srl	$i2,$rk3,8
997	and	$i3,$rk3,0xff
998	and	$i1,0xff
999	and	$i2,0xff
1000	$PTR_ADD $i0,$Tbl
1001	$PTR_ADD $i1,$Tbl
1002	$PTR_ADD $i2,$Tbl
1003	$PTR_ADD $i3,$Tbl
1004	lbu	$i0,1024($i0)
1005	lbu	$i1,1024($i1)
1006	lbu	$i2,1024($i2)
1007	lbu	$i3,1024($i3)
1008	sll	$i0,24
1009	sll	$i1,16
1010	sll	$i2,8
1011
1012	xor	$rk4,$i0
1013	xor	$rk4,$i1
1014	xor	$rk4,$i2
1015	xor	$rk4,$i3
1016
1017	xor	$rk5,$rk4
1018	xor	$rk6,$rk5
1019	xor	$rk7,$rk6
1020
1021	$PTR_ADD $key,32
1022	.set	noreorder
1023	b	.L256bits
1024	$PTR_ADD $rcon,4
1025
1026.L256bits_done:
1027	sw	$rk0,32($key)
1028	sw	$rk1,36($key)
1029	sw	$rk2,40($key)
1030	li	$cnt,14
1031	sw	$rk3,44($key)
1032	li	$t0,0
1033	sw	$cnt,48($key)
1034	$PTR_SUB $key,12*16
1035
1036.Lekey_done:
1037	jr	$ra
1038	nop
1039.end	_mips_AES_set_encrypt_key
1040
1041.globl	aes_set_encrypt_key_internal
1042.ent	aes_set_encrypt_key_internal
1043aes_set_encrypt_key_internal:
1044	.frame	$sp,$FRAMESIZE,$ra
1045	.mask	$SAVED_REGS_MASK,-$SZREG
1046	.set	noreorder
1047___
1048$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
1049	.cpload	$pf
1050___
1051$code.=<<___;
1052	$PTR_SUB $sp,$FRAMESIZE
1053	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
1054	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
1055___
1056$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
1057	$REG_S	$s3,$FRAMESIZE-3*$SZREG($sp)
1058	$REG_S	$s2,$FRAMESIZE-4*$SZREG($sp)
1059	$REG_S	$s1,$FRAMESIZE-5*$SZREG($sp)
1060	$REG_S	$s0,$FRAMESIZE-6*$SZREG($sp)
1061	$REG_S	$gp,$FRAMESIZE-7*$SZREG($sp)
1062___
1063$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
1064	.cplocal	$Tbl
1065	.cpsetup	$pf,$zero,aes_set_encrypt_key_internal
1066___
1067$code.=<<___;
1068	.set	reorder
1069	$LA	$Tbl,AES_Te		# PIC-ified 'load address'
1070
1071	bal	_mips_AES_set_encrypt_key
1072
1073	.set	noreorder
1074	move	$a0,$t0
1075	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
1076	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
1077___
1078$code.=<<___ if ($flavour =~ /nubi/i);
1079	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
1080	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
1081	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
1082	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
1083	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
1084___
1085$code.=<<___;
1086	jr	$ra
1087	$PTR_ADD $sp,$FRAMESIZE
1088.end	aes_set_encrypt_key_internal
1089___
1090
1091my ($head,$tail)=($inp,$bits);
1092my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1093my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1094$code.=<<___;
1095.align	5
1096.globl	aes_set_decrypt_key_internal
1097.ent	aes_set_decrypt_key_internal
1098aes_set_decrypt_key_internal:
1099	.frame	$sp,$FRAMESIZE,$ra
1100	.mask	$SAVED_REGS_MASK,-$SZREG
1101	.set	noreorder
1102___
1103$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
1104	.cpload	$pf
1105___
1106$code.=<<___;
1107	$PTR_SUB $sp,$FRAMESIZE
1108	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
1109	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
1110___
1111$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
1112	$REG_S	$s3,$FRAMESIZE-3*$SZREG($sp)
1113	$REG_S	$s2,$FRAMESIZE-4*$SZREG($sp)
1114	$REG_S	$s1,$FRAMESIZE-5*$SZREG($sp)
1115	$REG_S	$s0,$FRAMESIZE-6*$SZREG($sp)
1116	$REG_S	$gp,$FRAMESIZE-7*$SZREG($sp)
1117___
1118$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
1119	.cplocal	$Tbl
1120	.cpsetup	$pf,$zero,aes_set_decrypt_key_internal
1121___
1122$code.=<<___;
1123	.set	reorder
1124	$LA	$Tbl,AES_Te		# PIC-ified 'load address'
1125
1126	bal	_mips_AES_set_encrypt_key
1127
1128	bltz	$t0,.Ldkey_done
1129
1130	sll	$at,$cnt,4
1131	$PTR_ADD $head,$key,0
1132	$PTR_ADD $tail,$key,$at
1133.align	4
1134.Lswap:
1135	lw	$rk0,0($head)
1136	lw	$rk1,4($head)
1137	lw	$rk2,8($head)
1138	lw	$rk3,12($head)
1139	lw	$rk4,0($tail)
1140	lw	$rk5,4($tail)
1141	lw	$rk6,8($tail)
1142	lw	$rk7,12($tail)
1143	sw	$rk0,0($tail)
1144	sw	$rk1,4($tail)
1145	sw	$rk2,8($tail)
1146	sw	$rk3,12($tail)
1147	$PTR_ADD $head,16
1148	$PTR_SUB $tail,16
1149	sw	$rk4,-16($head)
1150	sw	$rk5,-12($head)
1151	sw	$rk6,-8($head)
1152	sw	$rk7,-4($head)
1153	bne	$head,$tail,.Lswap
1154
1155	lw	$tp1,16($key)		# modulo-scheduled
1156	lui	$x80808080,0x8080
1157	sub	$cnt,1
1158	or	$x80808080,0x8080
1159	sll	$cnt,2
1160	$PTR_ADD $key,16
1161	lui	$x1b1b1b1b,0x1b1b
1162	nor	$x7f7f7f7f,$zero,$x80808080
1163	or	$x1b1b1b1b,0x1b1b
1164.align	4
1165.Lmix:
1166	and	$m,$tp1,$x80808080
1167	and	$tp2,$tp1,$x7f7f7f7f
1168	srl	$tp4,$m,7
1169	addu	$tp2,$tp2		# tp2<<1
1170	subu	$m,$tp4
1171	and	$m,$x1b1b1b1b
1172	xor	$tp2,$m
1173
1174	and	$m,$tp2,$x80808080
1175	and	$tp4,$tp2,$x7f7f7f7f
1176	srl	$tp8,$m,7
1177	addu	$tp4,$tp4		# tp4<<1
1178	subu	$m,$tp8
1179	and	$m,$x1b1b1b1b
1180	xor	$tp4,$m
1181
1182	and	$m,$tp4,$x80808080
1183	and	$tp8,$tp4,$x7f7f7f7f
1184	srl	$tp9,$m,7
1185	addu	$tp8,$tp8		# tp8<<1
1186	subu	$m,$tp9
1187	and	$m,$x1b1b1b1b
1188	xor	$tp8,$m
1189
1190	xor	$tp9,$tp8,$tp1
1191	xor	$tpe,$tp8,$tp4
1192	xor	$tpb,$tp9,$tp2
1193	xor	$tpd,$tp9,$tp4
1194
1195	_ror	$tp1,$tpd,16
1196	 xor	$tpe,$tp2
1197	_ror	$tp2,$tpd,-16
1198	xor	$tpe,$tp1
1199	_ror	$tp1,$tp9,8
1200	xor	$tpe,$tp2
1201	_ror	$tp2,$tp9,-24
1202	xor	$tpe,$tp1
1203	_ror	$tp1,$tpb,24
1204	xor	$tpe,$tp2
1205	_ror	$tp2,$tpb,-8
1206	xor	$tpe,$tp1
1207	lw	$tp1,4($key)		# modulo-scheduled
1208	xor	$tpe,$tp2
1209	sub	$cnt,1
1210	sw	$tpe,0($key)
1211	$PTR_ADD $key,4
1212	bnez	$cnt,.Lmix
1213
1214	li	$t0,0
1215.Ldkey_done:
1216	.set	noreorder
1217	move	$a0,$t0
1218	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
1219	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
1220___
1221$code.=<<___ if ($flavour =~ /nubi/i);
1222	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
1223	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
1224	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
1225	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
1226	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
1227___
1228$code.=<<___;
1229	jr	$ra
1230	$PTR_ADD $sp,$FRAMESIZE
1231.end	aes_set_decrypt_key_internal
1232___
1233}}}
1234
1235######################################################################
1236# Tables are kept in endian-neutral manner
1237$code.=<<___;
1238.rdata
1239.align	6
1240AES_Te:
1241.byte	0xc6,0x63,0x63,0xa5,	0xf8,0x7c,0x7c,0x84	# Te0
1242.byte	0xee,0x77,0x77,0x99,	0xf6,0x7b,0x7b,0x8d
1243.byte	0xff,0xf2,0xf2,0x0d,	0xd6,0x6b,0x6b,0xbd
1244.byte	0xde,0x6f,0x6f,0xb1,	0x91,0xc5,0xc5,0x54
1245.byte	0x60,0x30,0x30,0x50,	0x02,0x01,0x01,0x03
1246.byte	0xce,0x67,0x67,0xa9,	0x56,0x2b,0x2b,0x7d
1247.byte	0xe7,0xfe,0xfe,0x19,	0xb5,0xd7,0xd7,0x62
1248.byte	0x4d,0xab,0xab,0xe6,	0xec,0x76,0x76,0x9a
1249.byte	0x8f,0xca,0xca,0x45,	0x1f,0x82,0x82,0x9d
1250.byte	0x89,0xc9,0xc9,0x40,	0xfa,0x7d,0x7d,0x87
1251.byte	0xef,0xfa,0xfa,0x15,	0xb2,0x59,0x59,0xeb
1252.byte	0x8e,0x47,0x47,0xc9,	0xfb,0xf0,0xf0,0x0b
1253.byte	0x41,0xad,0xad,0xec,	0xb3,0xd4,0xd4,0x67
1254.byte	0x5f,0xa2,0xa2,0xfd,	0x45,0xaf,0xaf,0xea
1255.byte	0x23,0x9c,0x9c,0xbf,	0x53,0xa4,0xa4,0xf7
1256.byte	0xe4,0x72,0x72,0x96,	0x9b,0xc0,0xc0,0x5b
1257.byte	0x75,0xb7,0xb7,0xc2,	0xe1,0xfd,0xfd,0x1c
1258.byte	0x3d,0x93,0x93,0xae,	0x4c,0x26,0x26,0x6a
1259.byte	0x6c,0x36,0x36,0x5a,	0x7e,0x3f,0x3f,0x41
1260.byte	0xf5,0xf7,0xf7,0x02,	0x83,0xcc,0xcc,0x4f
1261.byte	0x68,0x34,0x34,0x5c,	0x51,0xa5,0xa5,0xf4
1262.byte	0xd1,0xe5,0xe5,0x34,	0xf9,0xf1,0xf1,0x08
1263.byte	0xe2,0x71,0x71,0x93,	0xab,0xd8,0xd8,0x73
1264.byte	0x62,0x31,0x31,0x53,	0x2a,0x15,0x15,0x3f
1265.byte	0x08,0x04,0x04,0x0c,	0x95,0xc7,0xc7,0x52
1266.byte	0x46,0x23,0x23,0x65,	0x9d,0xc3,0xc3,0x5e
1267.byte	0x30,0x18,0x18,0x28,	0x37,0x96,0x96,0xa1
1268.byte	0x0a,0x05,0x05,0x0f,	0x2f,0x9a,0x9a,0xb5
1269.byte	0x0e,0x07,0x07,0x09,	0x24,0x12,0x12,0x36
1270.byte	0x1b,0x80,0x80,0x9b,	0xdf,0xe2,0xe2,0x3d
1271.byte	0xcd,0xeb,0xeb,0x26,	0x4e,0x27,0x27,0x69
1272.byte	0x7f,0xb2,0xb2,0xcd,	0xea,0x75,0x75,0x9f
1273.byte	0x12,0x09,0x09,0x1b,	0x1d,0x83,0x83,0x9e
1274.byte	0x58,0x2c,0x2c,0x74,	0x34,0x1a,0x1a,0x2e
1275.byte	0x36,0x1b,0x1b,0x2d,	0xdc,0x6e,0x6e,0xb2
1276.byte	0xb4,0x5a,0x5a,0xee,	0x5b,0xa0,0xa0,0xfb
1277.byte	0xa4,0x52,0x52,0xf6,	0x76,0x3b,0x3b,0x4d
1278.byte	0xb7,0xd6,0xd6,0x61,	0x7d,0xb3,0xb3,0xce
1279.byte	0x52,0x29,0x29,0x7b,	0xdd,0xe3,0xe3,0x3e
1280.byte	0x5e,0x2f,0x2f,0x71,	0x13,0x84,0x84,0x97
1281.byte	0xa6,0x53,0x53,0xf5,	0xb9,0xd1,0xd1,0x68
1282.byte	0x00,0x00,0x00,0x00,	0xc1,0xed,0xed,0x2c
1283.byte	0x40,0x20,0x20,0x60,	0xe3,0xfc,0xfc,0x1f
1284.byte	0x79,0xb1,0xb1,0xc8,	0xb6,0x5b,0x5b,0xed
1285.byte	0xd4,0x6a,0x6a,0xbe,	0x8d,0xcb,0xcb,0x46
1286.byte	0x67,0xbe,0xbe,0xd9,	0x72,0x39,0x39,0x4b
1287.byte	0x94,0x4a,0x4a,0xde,	0x98,0x4c,0x4c,0xd4
1288.byte	0xb0,0x58,0x58,0xe8,	0x85,0xcf,0xcf,0x4a
1289.byte	0xbb,0xd0,0xd0,0x6b,	0xc5,0xef,0xef,0x2a
1290.byte	0x4f,0xaa,0xaa,0xe5,	0xed,0xfb,0xfb,0x16
1291.byte	0x86,0x43,0x43,0xc5,	0x9a,0x4d,0x4d,0xd7
1292.byte	0x66,0x33,0x33,0x55,	0x11,0x85,0x85,0x94
1293.byte	0x8a,0x45,0x45,0xcf,	0xe9,0xf9,0xf9,0x10
1294.byte	0x04,0x02,0x02,0x06,	0xfe,0x7f,0x7f,0x81
1295.byte	0xa0,0x50,0x50,0xf0,	0x78,0x3c,0x3c,0x44
1296.byte	0x25,0x9f,0x9f,0xba,	0x4b,0xa8,0xa8,0xe3
1297.byte	0xa2,0x51,0x51,0xf3,	0x5d,0xa3,0xa3,0xfe
1298.byte	0x80,0x40,0x40,0xc0,	0x05,0x8f,0x8f,0x8a
1299.byte	0x3f,0x92,0x92,0xad,	0x21,0x9d,0x9d,0xbc
1300.byte	0x70,0x38,0x38,0x48,	0xf1,0xf5,0xf5,0x04
1301.byte	0x63,0xbc,0xbc,0xdf,	0x77,0xb6,0xb6,0xc1
1302.byte	0xaf,0xda,0xda,0x75,	0x42,0x21,0x21,0x63
1303.byte	0x20,0x10,0x10,0x30,	0xe5,0xff,0xff,0x1a
1304.byte	0xfd,0xf3,0xf3,0x0e,	0xbf,0xd2,0xd2,0x6d
1305.byte	0x81,0xcd,0xcd,0x4c,	0x18,0x0c,0x0c,0x14
1306.byte	0x26,0x13,0x13,0x35,	0xc3,0xec,0xec,0x2f
1307.byte	0xbe,0x5f,0x5f,0xe1,	0x35,0x97,0x97,0xa2
1308.byte	0x88,0x44,0x44,0xcc,	0x2e,0x17,0x17,0x39
1309.byte	0x93,0xc4,0xc4,0x57,	0x55,0xa7,0xa7,0xf2
1310.byte	0xfc,0x7e,0x7e,0x82,	0x7a,0x3d,0x3d,0x47
1311.byte	0xc8,0x64,0x64,0xac,	0xba,0x5d,0x5d,0xe7
1312.byte	0x32,0x19,0x19,0x2b,	0xe6,0x73,0x73,0x95
1313.byte	0xc0,0x60,0x60,0xa0,	0x19,0x81,0x81,0x98
1314.byte	0x9e,0x4f,0x4f,0xd1,	0xa3,0xdc,0xdc,0x7f
1315.byte	0x44,0x22,0x22,0x66,	0x54,0x2a,0x2a,0x7e
1316.byte	0x3b,0x90,0x90,0xab,	0x0b,0x88,0x88,0x83
1317.byte	0x8c,0x46,0x46,0xca,	0xc7,0xee,0xee,0x29
1318.byte	0x6b,0xb8,0xb8,0xd3,	0x28,0x14,0x14,0x3c
1319.byte	0xa7,0xde,0xde,0x79,	0xbc,0x5e,0x5e,0xe2
1320.byte	0x16,0x0b,0x0b,0x1d,	0xad,0xdb,0xdb,0x76
1321.byte	0xdb,0xe0,0xe0,0x3b,	0x64,0x32,0x32,0x56
1322.byte	0x74,0x3a,0x3a,0x4e,	0x14,0x0a,0x0a,0x1e
1323.byte	0x92,0x49,0x49,0xdb,	0x0c,0x06,0x06,0x0a
1324.byte	0x48,0x24,0x24,0x6c,	0xb8,0x5c,0x5c,0xe4
1325.byte	0x9f,0xc2,0xc2,0x5d,	0xbd,0xd3,0xd3,0x6e
1326.byte	0x43,0xac,0xac,0xef,	0xc4,0x62,0x62,0xa6
1327.byte	0x39,0x91,0x91,0xa8,	0x31,0x95,0x95,0xa4
1328.byte	0xd3,0xe4,0xe4,0x37,	0xf2,0x79,0x79,0x8b
1329.byte	0xd5,0xe7,0xe7,0x32,	0x8b,0xc8,0xc8,0x43
1330.byte	0x6e,0x37,0x37,0x59,	0xda,0x6d,0x6d,0xb7
1331.byte	0x01,0x8d,0x8d,0x8c,	0xb1,0xd5,0xd5,0x64
1332.byte	0x9c,0x4e,0x4e,0xd2,	0x49,0xa9,0xa9,0xe0
1333.byte	0xd8,0x6c,0x6c,0xb4,	0xac,0x56,0x56,0xfa
1334.byte	0xf3,0xf4,0xf4,0x07,	0xcf,0xea,0xea,0x25
1335.byte	0xca,0x65,0x65,0xaf,	0xf4,0x7a,0x7a,0x8e
1336.byte	0x47,0xae,0xae,0xe9,	0x10,0x08,0x08,0x18
1337.byte	0x6f,0xba,0xba,0xd5,	0xf0,0x78,0x78,0x88
1338.byte	0x4a,0x25,0x25,0x6f,	0x5c,0x2e,0x2e,0x72
1339.byte	0x38,0x1c,0x1c,0x24,	0x57,0xa6,0xa6,0xf1
1340.byte	0x73,0xb4,0xb4,0xc7,	0x97,0xc6,0xc6,0x51
1341.byte	0xcb,0xe8,0xe8,0x23,	0xa1,0xdd,0xdd,0x7c
1342.byte	0xe8,0x74,0x74,0x9c,	0x3e,0x1f,0x1f,0x21
1343.byte	0x96,0x4b,0x4b,0xdd,	0x61,0xbd,0xbd,0xdc
1344.byte	0x0d,0x8b,0x8b,0x86,	0x0f,0x8a,0x8a,0x85
1345.byte	0xe0,0x70,0x70,0x90,	0x7c,0x3e,0x3e,0x42
1346.byte	0x71,0xb5,0xb5,0xc4,	0xcc,0x66,0x66,0xaa
1347.byte	0x90,0x48,0x48,0xd8,	0x06,0x03,0x03,0x05
1348.byte	0xf7,0xf6,0xf6,0x01,	0x1c,0x0e,0x0e,0x12
1349.byte	0xc2,0x61,0x61,0xa3,	0x6a,0x35,0x35,0x5f
1350.byte	0xae,0x57,0x57,0xf9,	0x69,0xb9,0xb9,0xd0
1351.byte	0x17,0x86,0x86,0x91,	0x99,0xc1,0xc1,0x58
1352.byte	0x3a,0x1d,0x1d,0x27,	0x27,0x9e,0x9e,0xb9
1353.byte	0xd9,0xe1,0xe1,0x38,	0xeb,0xf8,0xf8,0x13
1354.byte	0x2b,0x98,0x98,0xb3,	0x22,0x11,0x11,0x33
1355.byte	0xd2,0x69,0x69,0xbb,	0xa9,0xd9,0xd9,0x70
1356.byte	0x07,0x8e,0x8e,0x89,	0x33,0x94,0x94,0xa7
1357.byte	0x2d,0x9b,0x9b,0xb6,	0x3c,0x1e,0x1e,0x22
1358.byte	0x15,0x87,0x87,0x92,	0xc9,0xe9,0xe9,0x20
1359.byte	0x87,0xce,0xce,0x49,	0xaa,0x55,0x55,0xff
1360.byte	0x50,0x28,0x28,0x78,	0xa5,0xdf,0xdf,0x7a
1361.byte	0x03,0x8c,0x8c,0x8f,	0x59,0xa1,0xa1,0xf8
1362.byte	0x09,0x89,0x89,0x80,	0x1a,0x0d,0x0d,0x17
1363.byte	0x65,0xbf,0xbf,0xda,	0xd7,0xe6,0xe6,0x31
1364.byte	0x84,0x42,0x42,0xc6,	0xd0,0x68,0x68,0xb8
1365.byte	0x82,0x41,0x41,0xc3,	0x29,0x99,0x99,0xb0
1366.byte	0x5a,0x2d,0x2d,0x77,	0x1e,0x0f,0x0f,0x11
1367.byte	0x7b,0xb0,0xb0,0xcb,	0xa8,0x54,0x54,0xfc
1368.byte	0x6d,0xbb,0xbb,0xd6,	0x2c,0x16,0x16,0x3a
1369
1370.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5	# Te4
1371.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
1372.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
1373.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
1374.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
1375.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
1376.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
1377.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
1378.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
1379.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
1380.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
1381.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
1382.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
1383.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
1384.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
1385.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
1386.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
1387.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
1388.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
1389.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
1390.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
1391.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
1392.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
1393.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
1394.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
1395.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
1396.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
1397.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
1398.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
1399.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
1400.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
1401.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
1402
1403.byte	0x01,0x00,0x00,0x00,	0x02,0x00,0x00,0x00	# rcon
1404.byte	0x04,0x00,0x00,0x00,	0x08,0x00,0x00,0x00
1405.byte	0x10,0x00,0x00,0x00,	0x20,0x00,0x00,0x00
1406.byte	0x40,0x00,0x00,0x00,	0x80,0x00,0x00,0x00
1407.byte	0x1B,0x00,0x00,0x00,	0x36,0x00,0x00,0x00
1408
1409.align	6
1410AES_Td:
1411.byte	0x51,0xf4,0xa7,0x50,	0x7e,0x41,0x65,0x53	# Td0
1412.byte	0x1a,0x17,0xa4,0xc3,	0x3a,0x27,0x5e,0x96
1413.byte	0x3b,0xab,0x6b,0xcb,	0x1f,0x9d,0x45,0xf1
1414.byte	0xac,0xfa,0x58,0xab,	0x4b,0xe3,0x03,0x93
1415.byte	0x20,0x30,0xfa,0x55,	0xad,0x76,0x6d,0xf6
1416.byte	0x88,0xcc,0x76,0x91,	0xf5,0x02,0x4c,0x25
1417.byte	0x4f,0xe5,0xd7,0xfc,	0xc5,0x2a,0xcb,0xd7
1418.byte	0x26,0x35,0x44,0x80,	0xb5,0x62,0xa3,0x8f
1419.byte	0xde,0xb1,0x5a,0x49,	0x25,0xba,0x1b,0x67
1420.byte	0x45,0xea,0x0e,0x98,	0x5d,0xfe,0xc0,0xe1
1421.byte	0xc3,0x2f,0x75,0x02,	0x81,0x4c,0xf0,0x12
1422.byte	0x8d,0x46,0x97,0xa3,	0x6b,0xd3,0xf9,0xc6
1423.byte	0x03,0x8f,0x5f,0xe7,	0x15,0x92,0x9c,0x95
1424.byte	0xbf,0x6d,0x7a,0xeb,	0x95,0x52,0x59,0xda
1425.byte	0xd4,0xbe,0x83,0x2d,	0x58,0x74,0x21,0xd3
1426.byte	0x49,0xe0,0x69,0x29,	0x8e,0xc9,0xc8,0x44
1427.byte	0x75,0xc2,0x89,0x6a,	0xf4,0x8e,0x79,0x78
1428.byte	0x99,0x58,0x3e,0x6b,	0x27,0xb9,0x71,0xdd
1429.byte	0xbe,0xe1,0x4f,0xb6,	0xf0,0x88,0xad,0x17
1430.byte	0xc9,0x20,0xac,0x66,	0x7d,0xce,0x3a,0xb4
1431.byte	0x63,0xdf,0x4a,0x18,	0xe5,0x1a,0x31,0x82
1432.byte	0x97,0x51,0x33,0x60,	0x62,0x53,0x7f,0x45
1433.byte	0xb1,0x64,0x77,0xe0,	0xbb,0x6b,0xae,0x84
1434.byte	0xfe,0x81,0xa0,0x1c,	0xf9,0x08,0x2b,0x94
1435.byte	0x70,0x48,0x68,0x58,	0x8f,0x45,0xfd,0x19
1436.byte	0x94,0xde,0x6c,0x87,	0x52,0x7b,0xf8,0xb7
1437.byte	0xab,0x73,0xd3,0x23,	0x72,0x4b,0x02,0xe2
1438.byte	0xe3,0x1f,0x8f,0x57,	0x66,0x55,0xab,0x2a
1439.byte	0xb2,0xeb,0x28,0x07,	0x2f,0xb5,0xc2,0x03
1440.byte	0x86,0xc5,0x7b,0x9a,	0xd3,0x37,0x08,0xa5
1441.byte	0x30,0x28,0x87,0xf2,	0x23,0xbf,0xa5,0xb2
1442.byte	0x02,0x03,0x6a,0xba,	0xed,0x16,0x82,0x5c
1443.byte	0x8a,0xcf,0x1c,0x2b,	0xa7,0x79,0xb4,0x92
1444.byte	0xf3,0x07,0xf2,0xf0,	0x4e,0x69,0xe2,0xa1
1445.byte	0x65,0xda,0xf4,0xcd,	0x06,0x05,0xbe,0xd5
1446.byte	0xd1,0x34,0x62,0x1f,	0xc4,0xa6,0xfe,0x8a
1447.byte	0x34,0x2e,0x53,0x9d,	0xa2,0xf3,0x55,0xa0
1448.byte	0x05,0x8a,0xe1,0x32,	0xa4,0xf6,0xeb,0x75
1449.byte	0x0b,0x83,0xec,0x39,	0x40,0x60,0xef,0xaa
1450.byte	0x5e,0x71,0x9f,0x06,	0xbd,0x6e,0x10,0x51
1451.byte	0x3e,0x21,0x8a,0xf9,	0x96,0xdd,0x06,0x3d
1452.byte	0xdd,0x3e,0x05,0xae,	0x4d,0xe6,0xbd,0x46
1453.byte	0x91,0x54,0x8d,0xb5,	0x71,0xc4,0x5d,0x05
1454.byte	0x04,0x06,0xd4,0x6f,	0x60,0x50,0x15,0xff
1455.byte	0x19,0x98,0xfb,0x24,	0xd6,0xbd,0xe9,0x97
1456.byte	0x89,0x40,0x43,0xcc,	0x67,0xd9,0x9e,0x77
1457.byte	0xb0,0xe8,0x42,0xbd,	0x07,0x89,0x8b,0x88
1458.byte	0xe7,0x19,0x5b,0x38,	0x79,0xc8,0xee,0xdb
1459.byte	0xa1,0x7c,0x0a,0x47,	0x7c,0x42,0x0f,0xe9
1460.byte	0xf8,0x84,0x1e,0xc9,	0x00,0x00,0x00,0x00
1461.byte	0x09,0x80,0x86,0x83,	0x32,0x2b,0xed,0x48
1462.byte	0x1e,0x11,0x70,0xac,	0x6c,0x5a,0x72,0x4e
1463.byte	0xfd,0x0e,0xff,0xfb,	0x0f,0x85,0x38,0x56
1464.byte	0x3d,0xae,0xd5,0x1e,	0x36,0x2d,0x39,0x27
1465.byte	0x0a,0x0f,0xd9,0x64,	0x68,0x5c,0xa6,0x21
1466.byte	0x9b,0x5b,0x54,0xd1,	0x24,0x36,0x2e,0x3a
1467.byte	0x0c,0x0a,0x67,0xb1,	0x93,0x57,0xe7,0x0f
1468.byte	0xb4,0xee,0x96,0xd2,	0x1b,0x9b,0x91,0x9e
1469.byte	0x80,0xc0,0xc5,0x4f,	0x61,0xdc,0x20,0xa2
1470.byte	0x5a,0x77,0x4b,0x69,	0x1c,0x12,0x1a,0x16
1471.byte	0xe2,0x93,0xba,0x0a,	0xc0,0xa0,0x2a,0xe5
1472.byte	0x3c,0x22,0xe0,0x43,	0x12,0x1b,0x17,0x1d
1473.byte	0x0e,0x09,0x0d,0x0b,	0xf2,0x8b,0xc7,0xad
1474.byte	0x2d,0xb6,0xa8,0xb9,	0x14,0x1e,0xa9,0xc8
1475.byte	0x57,0xf1,0x19,0x85,	0xaf,0x75,0x07,0x4c
1476.byte	0xee,0x99,0xdd,0xbb,	0xa3,0x7f,0x60,0xfd
1477.byte	0xf7,0x01,0x26,0x9f,	0x5c,0x72,0xf5,0xbc
1478.byte	0x44,0x66,0x3b,0xc5,	0x5b,0xfb,0x7e,0x34
1479.byte	0x8b,0x43,0x29,0x76,	0xcb,0x23,0xc6,0xdc
1480.byte	0xb6,0xed,0xfc,0x68,	0xb8,0xe4,0xf1,0x63
1481.byte	0xd7,0x31,0xdc,0xca,	0x42,0x63,0x85,0x10
1482.byte	0x13,0x97,0x22,0x40,	0x84,0xc6,0x11,0x20
1483.byte	0x85,0x4a,0x24,0x7d,	0xd2,0xbb,0x3d,0xf8
1484.byte	0xae,0xf9,0x32,0x11,	0xc7,0x29,0xa1,0x6d
1485.byte	0x1d,0x9e,0x2f,0x4b,	0xdc,0xb2,0x30,0xf3
1486.byte	0x0d,0x86,0x52,0xec,	0x77,0xc1,0xe3,0xd0
1487.byte	0x2b,0xb3,0x16,0x6c,	0xa9,0x70,0xb9,0x99
1488.byte	0x11,0x94,0x48,0xfa,	0x47,0xe9,0x64,0x22
1489.byte	0xa8,0xfc,0x8c,0xc4,	0xa0,0xf0,0x3f,0x1a
1490.byte	0x56,0x7d,0x2c,0xd8,	0x22,0x33,0x90,0xef
1491.byte	0x87,0x49,0x4e,0xc7,	0xd9,0x38,0xd1,0xc1
1492.byte	0x8c,0xca,0xa2,0xfe,	0x98,0xd4,0x0b,0x36
1493.byte	0xa6,0xf5,0x81,0xcf,	0xa5,0x7a,0xde,0x28
1494.byte	0xda,0xb7,0x8e,0x26,	0x3f,0xad,0xbf,0xa4
1495.byte	0x2c,0x3a,0x9d,0xe4,	0x50,0x78,0x92,0x0d
1496.byte	0x6a,0x5f,0xcc,0x9b,	0x54,0x7e,0x46,0x62
1497.byte	0xf6,0x8d,0x13,0xc2,	0x90,0xd8,0xb8,0xe8
1498.byte	0x2e,0x39,0xf7,0x5e,	0x82,0xc3,0xaf,0xf5
1499.byte	0x9f,0x5d,0x80,0xbe,	0x69,0xd0,0x93,0x7c
1500.byte	0x6f,0xd5,0x2d,0xa9,	0xcf,0x25,0x12,0xb3
1501.byte	0xc8,0xac,0x99,0x3b,	0x10,0x18,0x7d,0xa7
1502.byte	0xe8,0x9c,0x63,0x6e,	0xdb,0x3b,0xbb,0x7b
1503.byte	0xcd,0x26,0x78,0x09,	0x6e,0x59,0x18,0xf4
1504.byte	0xec,0x9a,0xb7,0x01,	0x83,0x4f,0x9a,0xa8
1505.byte	0xe6,0x95,0x6e,0x65,	0xaa,0xff,0xe6,0x7e
1506.byte	0x21,0xbc,0xcf,0x08,	0xef,0x15,0xe8,0xe6
1507.byte	0xba,0xe7,0x9b,0xd9,	0x4a,0x6f,0x36,0xce
1508.byte	0xea,0x9f,0x09,0xd4,	0x29,0xb0,0x7c,0xd6
1509.byte	0x31,0xa4,0xb2,0xaf,	0x2a,0x3f,0x23,0x31
1510.byte	0xc6,0xa5,0x94,0x30,	0x35,0xa2,0x66,0xc0
1511.byte	0x74,0x4e,0xbc,0x37,	0xfc,0x82,0xca,0xa6
1512.byte	0xe0,0x90,0xd0,0xb0,	0x33,0xa7,0xd8,0x15
1513.byte	0xf1,0x04,0x98,0x4a,	0x41,0xec,0xda,0xf7
1514.byte	0x7f,0xcd,0x50,0x0e,	0x17,0x91,0xf6,0x2f
1515.byte	0x76,0x4d,0xd6,0x8d,	0x43,0xef,0xb0,0x4d
1516.byte	0xcc,0xaa,0x4d,0x54,	0xe4,0x96,0x04,0xdf
1517.byte	0x9e,0xd1,0xb5,0xe3,	0x4c,0x6a,0x88,0x1b
1518.byte	0xc1,0x2c,0x1f,0xb8,	0x46,0x65,0x51,0x7f
1519.byte	0x9d,0x5e,0xea,0x04,	0x01,0x8c,0x35,0x5d
1520.byte	0xfa,0x87,0x74,0x73,	0xfb,0x0b,0x41,0x2e
1521.byte	0xb3,0x67,0x1d,0x5a,	0x92,0xdb,0xd2,0x52
1522.byte	0xe9,0x10,0x56,0x33,	0x6d,0xd6,0x47,0x13
1523.byte	0x9a,0xd7,0x61,0x8c,	0x37,0xa1,0x0c,0x7a
1524.byte	0x59,0xf8,0x14,0x8e,	0xeb,0x13,0x3c,0x89
1525.byte	0xce,0xa9,0x27,0xee,	0xb7,0x61,0xc9,0x35
1526.byte	0xe1,0x1c,0xe5,0xed,	0x7a,0x47,0xb1,0x3c
1527.byte	0x9c,0xd2,0xdf,0x59,	0x55,0xf2,0x73,0x3f
1528.byte	0x18,0x14,0xce,0x79,	0x73,0xc7,0x37,0xbf
1529.byte	0x53,0xf7,0xcd,0xea,	0x5f,0xfd,0xaa,0x5b
1530.byte	0xdf,0x3d,0x6f,0x14,	0x78,0x44,0xdb,0x86
1531.byte	0xca,0xaf,0xf3,0x81,	0xb9,0x68,0xc4,0x3e
1532.byte	0x38,0x24,0x34,0x2c,	0xc2,0xa3,0x40,0x5f
1533.byte	0x16,0x1d,0xc3,0x72,	0xbc,0xe2,0x25,0x0c
1534.byte	0x28,0x3c,0x49,0x8b,	0xff,0x0d,0x95,0x41
1535.byte	0x39,0xa8,0x01,0x71,	0x08,0x0c,0xb3,0xde
1536.byte	0xd8,0xb4,0xe4,0x9c,	0x64,0x56,0xc1,0x90
1537.byte	0x7b,0xcb,0x84,0x61,	0xd5,0x32,0xb6,0x70
1538.byte	0x48,0x6c,0x5c,0x74,	0xd0,0xb8,0x57,0x42
1539
1540.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38	# Td4
1541.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1542.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1543.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1544.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1545.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1546.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1547.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1548.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1549.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1550.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1551.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1552.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1553.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1554.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1555.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1556.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1557.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1558.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1559.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1560.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1561.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1562.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1563.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1564.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1565.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1566.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1567.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1568.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1569.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1570.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1571.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1572___
1573
1574foreach (split("\n",$code)) {
1575	s/\`([^\`]*)\`/eval $1/ge;
1576
1577	# made-up _instructions, _xtr, _ins, _ror and _bias, cope
1578	# with byte order dependencies...
1579	if (/^\s+_/) {
1580	    s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
1581
1582	    s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
1583		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
1584					:		eval("24-$3"))/e or
1585	    s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
1586		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
1587					:		eval("24-$3"))/e or
1588	    s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
1589		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
1590					:		eval("$3*-1"))/e or
1591	    s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
1592		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
1593					:		eval("($3-16)&31"))/e;
1594
1595	    s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
1596		sprintf("sll\t$1,$2,$3")/e				or
1597	    s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
1598		sprintf("and\t$1,$2,0xff")/e				or
1599	    s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
1600	}
1601
1602	# convert lwl/lwr and swr/swl to little-endian order
1603	if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
1604	    s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
1605		sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e	or
1606	    s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
1607		sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
1608	}
1609
1610	print $_,"\n";
1611}
1612
1613close STDOUT;
1614