• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt/router/openssl-1.0.0q/crypto/aes/asm/
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for ARMv4
11
12# January 2007.
13#
14# Code uses single 1K S-box and is >2 times faster than code generated
15# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
16# allows to merge logical or arithmetic operation with shift or rotate
17# in one instruction and emit combined result every cycle. The module
18# is endian-neutral. The performance is ~42 cycles/byte for 128-bit
19# key [on single-issue Xscale PXA250 core].
20
21# May 2007.
22#
23# AES_set_[en|de]crypt_key is added.
24
25# July 2010.
26#
27# Rescheduling for dual-issue pipeline resulted in 12% improvement on
28# Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
29
30while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
31open STDOUT,">$output";
32
33$s0="r0";
34$s1="r1";
35$s2="r2";
36$s3="r3";
37$t1="r4";
38$t2="r5";
39$t3="r6";
40$i1="r7";
41$i2="r8";
42$i3="r9";
43
44$tbl="r10";
45$key="r11";
46$rounds="r12";
47
48$code=<<___;
49.text
50.code	32
51
52.type	AES_Te,%object
53.align	5
54AES_Te:
55.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
56.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
57.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
58.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
59.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
60.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
61.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
62.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
63.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
64.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
65.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
66.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
67.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
68.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
69.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
70.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
71.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
72.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
73.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
74.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
75.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
76.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
77.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
78.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
79.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
80.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
81.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
82.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
83.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
84.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
85.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
86.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
87.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
88.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
89.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
90.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
91.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
92.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
93.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
94.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
95.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
96.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
97.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
98.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
99.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
100.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
101.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
102.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
103.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
104.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
105.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
106.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
107.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
108.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
109.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
110.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
111.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
112.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
113.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
114.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
115.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
116.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
117.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
118.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
119@ Te4[256]
120.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
121.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
122.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
123.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
124.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
125.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
126.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
127.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
128.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
129.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
130.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
131.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
132.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
133.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
134.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
135.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
136.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
137.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
138.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
139.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
140.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
141.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
142.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
143.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
144.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
145.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
146.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
147.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
148.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
149.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
150.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
151.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
152@ rcon[]
153.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
154.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
155.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
156.size	AES_Te,.-AES_Te
157
158@ void AES_encrypt(const unsigned char *in, unsigned char *out,
159@ 		 const AES_KEY *key) {
160.global AES_encrypt
161.type   AES_encrypt,%function
162.align	5
163AES_encrypt:
164	sub	r3,pc,#8		@ AES_encrypt
165	stmdb   sp!,{r1,r4-r12,lr}
166	mov	$rounds,r0		@ inp
167	mov	$key,r2
168	sub	$tbl,r3,#AES_encrypt-AES_Te	@ Te
169
170	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
171	ldrb	$t1,[$rounds,#2]	@ manner...
172	ldrb	$t2,[$rounds,#1]
173	ldrb	$t3,[$rounds,#0]
174	orr	$s0,$s0,$t1,lsl#8
175	ldrb	$s1,[$rounds,#7]
176	orr	$s0,$s0,$t2,lsl#16
177	ldrb	$t1,[$rounds,#6]
178	orr	$s0,$s0,$t3,lsl#24
179	ldrb	$t2,[$rounds,#5]
180	ldrb	$t3,[$rounds,#4]
181	orr	$s1,$s1,$t1,lsl#8
182	ldrb	$s2,[$rounds,#11]
183	orr	$s1,$s1,$t2,lsl#16
184	ldrb	$t1,[$rounds,#10]
185	orr	$s1,$s1,$t3,lsl#24
186	ldrb	$t2,[$rounds,#9]
187	ldrb	$t3,[$rounds,#8]
188	orr	$s2,$s2,$t1,lsl#8
189	ldrb	$s3,[$rounds,#15]
190	orr	$s2,$s2,$t2,lsl#16
191	ldrb	$t1,[$rounds,#14]
192	orr	$s2,$s2,$t3,lsl#24
193	ldrb	$t2,[$rounds,#13]
194	ldrb	$t3,[$rounds,#12]
195	orr	$s3,$s3,$t1,lsl#8
196	orr	$s3,$s3,$t2,lsl#16
197	orr	$s3,$s3,$t3,lsl#24
198
199	bl	_armv4_AES_encrypt
200
201	ldr	$rounds,[sp],#4		@ pop out
202	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
203	mov	$t2,$s0,lsr#16		@ manner...
204	mov	$t3,$s0,lsr#8
205	strb	$t1,[$rounds,#0]
206	strb	$t2,[$rounds,#1]
207	mov	$t1,$s1,lsr#24
208	strb	$t3,[$rounds,#2]
209	mov	$t2,$s1,lsr#16
210	strb	$s0,[$rounds,#3]
211	mov	$t3,$s1,lsr#8
212	strb	$t1,[$rounds,#4]
213	strb	$t2,[$rounds,#5]
214	mov	$t1,$s2,lsr#24
215	strb	$t3,[$rounds,#6]
216	mov	$t2,$s2,lsr#16
217	strb	$s1,[$rounds,#7]
218	mov	$t3,$s2,lsr#8
219	strb	$t1,[$rounds,#8]
220	strb	$t2,[$rounds,#9]
221	mov	$t1,$s3,lsr#24
222	strb	$t3,[$rounds,#10]
223	mov	$t2,$s3,lsr#16
224	strb	$s2,[$rounds,#11]
225	mov	$t3,$s3,lsr#8
226	strb	$t1,[$rounds,#12]
227	strb	$t2,[$rounds,#13]
228	strb	$t3,[$rounds,#14]
229	strb	$s3,[$rounds,#15]
230
231	ldmia   sp!,{r4-r12,lr}
232	tst	lr,#1
233	moveq	pc,lr			@ be binary compatible with V4, yet
234	bx	lr			@ interoperable with Thumb ISA:-)
235.size	AES_encrypt,.-AES_encrypt
236
237.type   _armv4_AES_encrypt,%function
238.align	2
239_armv4_AES_encrypt:
240	str	lr,[sp,#-4]!		@ push lr
241	ldmia	$key!,{$t1-$i1}
242	eor	$s0,$s0,$t1
243	ldr	$rounds,[$key,#240-16]
244	eor	$s1,$s1,$t2
245	eor	$s2,$s2,$t3
246	eor	$s3,$s3,$i1
247	sub	$rounds,$rounds,#1
248	mov	lr,#255
249
250	and	$i1,lr,$s0
251	and	$i2,lr,$s0,lsr#8
252	and	$i3,lr,$s0,lsr#16
253	mov	$s0,$s0,lsr#24
254.Lenc_loop:
255	ldr	$t1,[$tbl,$i1,lsl#2]	@ Te3[s0>>0]
256	and	$i1,lr,$s1,lsr#16	@ i0
257	ldr	$t2,[$tbl,$i2,lsl#2]	@ Te2[s0>>8]
258	and	$i2,lr,$s1
259	ldr	$t3,[$tbl,$i3,lsl#2]	@ Te1[s0>>16]
260	and	$i3,lr,$s1,lsr#8
261	ldr	$s0,[$tbl,$s0,lsl#2]	@ Te0[s0>>24]
262	mov	$s1,$s1,lsr#24
263
264	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te1[s1>>16]
265	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te3[s1>>0]
266	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te2[s1>>8]
267	eor	$s0,$s0,$i1,ror#8
268	ldr	$s1,[$tbl,$s1,lsl#2]	@ Te0[s1>>24]
269	and	$i1,lr,$s2,lsr#8	@ i0
270	eor	$t2,$t2,$i2,ror#8
271	and	$i2,lr,$s2,lsr#16	@ i1
272	eor	$t3,$t3,$i3,ror#8
273	and	$i3,lr,$s2
274	eor	$s1,$s1,$t1,ror#24
275	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te2[s2>>8]
276	mov	$s2,$s2,lsr#24
277
278	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te1[s2>>16]
279	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te3[s2>>0]
280	eor	$s0,$s0,$i1,ror#16
281	ldr	$s2,[$tbl,$s2,lsl#2]	@ Te0[s2>>24]
282	and	$i1,lr,$s3		@ i0
283	eor	$s1,$s1,$i2,ror#8
284	and	$i2,lr,$s3,lsr#8	@ i1
285	eor	$t3,$t3,$i3,ror#16
286	and	$i3,lr,$s3,lsr#16	@ i2
287	eor	$s2,$s2,$t2,ror#16
288	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te3[s3>>0]
289	mov	$s3,$s3,lsr#24
290
291	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te2[s3>>8]
292	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te1[s3>>16]
293	eor	$s0,$s0,$i1,ror#24
294	ldr	$s3,[$tbl,$s3,lsl#2]	@ Te0[s3>>24]
295	eor	$s1,$s1,$i2,ror#16
296	ldr	$i1,[$key],#16
297	eor	$s2,$s2,$i3,ror#8
298	ldr	$t1,[$key,#-12]
299	eor	$s3,$s3,$t3,ror#8
300
301	ldr	$t2,[$key,#-8]
302	eor	$s0,$s0,$i1
303	ldr	$t3,[$key,#-4]
304	and	$i1,lr,$s0
305	eor	$s1,$s1,$t1
306	and	$i2,lr,$s0,lsr#8
307	eor	$s2,$s2,$t2
308	and	$i3,lr,$s0,lsr#16
309	eor	$s3,$s3,$t3
310	mov	$s0,$s0,lsr#24
311
312	subs	$rounds,$rounds,#1
313	bne	.Lenc_loop
314
315	add	$tbl,$tbl,#2
316
317	ldrb	$t1,[$tbl,$i1,lsl#2]	@ Te4[s0>>0]
318	and	$i1,lr,$s1,lsr#16	@ i0
319	ldrb	$t2,[$tbl,$i2,lsl#2]	@ Te4[s0>>8]
320	and	$i2,lr,$s1
321	ldrb	$t3,[$tbl,$i3,lsl#2]	@ Te4[s0>>16]
322	and	$i3,lr,$s1,lsr#8
323	ldrb	$s0,[$tbl,$s0,lsl#2]	@ Te4[s0>>24]
324	mov	$s1,$s1,lsr#24
325
326	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s1>>16]
327	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s1>>0]
328	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s1>>8]
329	eor	$s0,$i1,$s0,lsl#8
330	ldrb	$s1,[$tbl,$s1,lsl#2]	@ Te4[s1>>24]
331	and	$i1,lr,$s2,lsr#8	@ i0
332	eor	$t2,$i2,$t2,lsl#8
333	and	$i2,lr,$s2,lsr#16	@ i1
334	eor	$t3,$i3,$t3,lsl#8
335	and	$i3,lr,$s2
336	eor	$s1,$t1,$s1,lsl#24
337	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s2>>8]
338	mov	$s2,$s2,lsr#24
339
340	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s2>>16]
341	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s2>>0]
342	eor	$s0,$i1,$s0,lsl#8
343	ldrb	$s2,[$tbl,$s2,lsl#2]	@ Te4[s2>>24]
344	and	$i1,lr,$s3		@ i0
345	eor	$s1,$s1,$i2,lsl#16
346	and	$i2,lr,$s3,lsr#8	@ i1
347	eor	$t3,$i3,$t3,lsl#8
348	and	$i3,lr,$s3,lsr#16	@ i2
349	eor	$s2,$t2,$s2,lsl#24
350	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s3>>0]
351	mov	$s3,$s3,lsr#24
352
353	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s3>>8]
354	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s3>>16]
355	eor	$s0,$i1,$s0,lsl#8
356	ldrb	$s3,[$tbl,$s3,lsl#2]	@ Te4[s3>>24]
357	ldr	$i1,[$key,#0]
358	eor	$s1,$s1,$i2,lsl#8
359	ldr	$t1,[$key,#4]
360	eor	$s2,$s2,$i3,lsl#16
361	ldr	$t2,[$key,#8]
362	eor	$s3,$t3,$s3,lsl#24
363	ldr	$t3,[$key,#12]
364
365	eor	$s0,$s0,$i1
366	eor	$s1,$s1,$t1
367	eor	$s2,$s2,$t2
368	eor	$s3,$s3,$t3
369
370	sub	$tbl,$tbl,#2
371	ldr	pc,[sp],#4		@ pop and return
372.size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
373
374.global AES_set_encrypt_key
375.type   AES_set_encrypt_key,%function
376.align	5
377AES_set_encrypt_key:
378	sub	r3,pc,#8		@ AES_set_encrypt_key
379	teq	r0,#0
380	moveq	r0,#-1
381	beq	.Labrt
382	teq	r2,#0
383	moveq	r0,#-1
384	beq	.Labrt
385
386	teq	r1,#128
387	beq	.Lok
388	teq	r1,#192
389	beq	.Lok
390	teq	r1,#256
391	movne	r0,#-1
392	bne	.Labrt
393
394.Lok:	stmdb   sp!,{r4-r12,lr}
395	sub	$tbl,r3,#AES_set_encrypt_key-AES_Te-1024	@ Te4
396
397	mov	$rounds,r0		@ inp
398	mov	lr,r1			@ bits
399	mov	$key,r2			@ key
400
401	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
402	ldrb	$t1,[$rounds,#2]	@ manner...
403	ldrb	$t2,[$rounds,#1]
404	ldrb	$t3,[$rounds,#0]
405	orr	$s0,$s0,$t1,lsl#8
406	ldrb	$s1,[$rounds,#7]
407	orr	$s0,$s0,$t2,lsl#16
408	ldrb	$t1,[$rounds,#6]
409	orr	$s0,$s0,$t3,lsl#24
410	ldrb	$t2,[$rounds,#5]
411	ldrb	$t3,[$rounds,#4]
412	orr	$s1,$s1,$t1,lsl#8
413	ldrb	$s2,[$rounds,#11]
414	orr	$s1,$s1,$t2,lsl#16
415	ldrb	$t1,[$rounds,#10]
416	orr	$s1,$s1,$t3,lsl#24
417	ldrb	$t2,[$rounds,#9]
418	ldrb	$t3,[$rounds,#8]
419	orr	$s2,$s2,$t1,lsl#8
420	ldrb	$s3,[$rounds,#15]
421	orr	$s2,$s2,$t2,lsl#16
422	ldrb	$t1,[$rounds,#14]
423	orr	$s2,$s2,$t3,lsl#24
424	ldrb	$t2,[$rounds,#13]
425	ldrb	$t3,[$rounds,#12]
426	orr	$s3,$s3,$t1,lsl#8
427	str	$s0,[$key],#16
428	orr	$s3,$s3,$t2,lsl#16
429	str	$s1,[$key,#-12]
430	orr	$s3,$s3,$t3,lsl#24
431	str	$s2,[$key,#-8]
432	str	$s3,[$key,#-4]
433
434	teq	lr,#128
435	bne	.Lnot128
436	mov	$rounds,#10
437	str	$rounds,[$key,#240-16]
438	add	$t3,$tbl,#256			@ rcon
439	mov	lr,#255
440
441.L128_loop:
442	and	$t2,lr,$s3,lsr#24
443	and	$i1,lr,$s3,lsr#16
444	ldrb	$t2,[$tbl,$t2]
445	and	$i2,lr,$s3,lsr#8
446	ldrb	$i1,[$tbl,$i1]
447	and	$i3,lr,$s3
448	ldrb	$i2,[$tbl,$i2]
449	orr	$t2,$t2,$i1,lsl#24
450	ldrb	$i3,[$tbl,$i3]
451	orr	$t2,$t2,$i2,lsl#16
452	ldr	$t1,[$t3],#4			@ rcon[i++]
453	orr	$t2,$t2,$i3,lsl#8
454	eor	$t2,$t2,$t1
455	eor	$s0,$s0,$t2			@ rk[4]=rk[0]^...
456	eor	$s1,$s1,$s0			@ rk[5]=rk[1]^rk[4]
457	str	$s0,[$key],#16
458	eor	$s2,$s2,$s1			@ rk[6]=rk[2]^rk[5]
459	str	$s1,[$key,#-12]
460	eor	$s3,$s3,$s2			@ rk[7]=rk[3]^rk[6]
461	str	$s2,[$key,#-8]
462	subs	$rounds,$rounds,#1
463	str	$s3,[$key,#-4]
464	bne	.L128_loop
465	sub	r2,$key,#176
466	b	.Ldone
467
468.Lnot128:
469	ldrb	$i2,[$rounds,#19]
470	ldrb	$t1,[$rounds,#18]
471	ldrb	$t2,[$rounds,#17]
472	ldrb	$t3,[$rounds,#16]
473	orr	$i2,$i2,$t1,lsl#8
474	ldrb	$i3,[$rounds,#23]
475	orr	$i2,$i2,$t2,lsl#16
476	ldrb	$t1,[$rounds,#22]
477	orr	$i2,$i2,$t3,lsl#24
478	ldrb	$t2,[$rounds,#21]
479	ldrb	$t3,[$rounds,#20]
480	orr	$i3,$i3,$t1,lsl#8
481	orr	$i3,$i3,$t2,lsl#16
482	str	$i2,[$key],#8
483	orr	$i3,$i3,$t3,lsl#24
484	str	$i3,[$key,#-4]
485
486	teq	lr,#192
487	bne	.Lnot192
488	mov	$rounds,#12
489	str	$rounds,[$key,#240-24]
490	add	$t3,$tbl,#256			@ rcon
491	mov	lr,#255
492	mov	$rounds,#8
493
494.L192_loop:
495	and	$t2,lr,$i3,lsr#24
496	and	$i1,lr,$i3,lsr#16
497	ldrb	$t2,[$tbl,$t2]
498	and	$i2,lr,$i3,lsr#8
499	ldrb	$i1,[$tbl,$i1]
500	and	$i3,lr,$i3
501	ldrb	$i2,[$tbl,$i2]
502	orr	$t2,$t2,$i1,lsl#24
503	ldrb	$i3,[$tbl,$i3]
504	orr	$t2,$t2,$i2,lsl#16
505	ldr	$t1,[$t3],#4			@ rcon[i++]
506	orr	$t2,$t2,$i3,lsl#8
507	eor	$i3,$t2,$t1
508	eor	$s0,$s0,$i3			@ rk[6]=rk[0]^...
509	eor	$s1,$s1,$s0			@ rk[7]=rk[1]^rk[6]
510	str	$s0,[$key],#24
511	eor	$s2,$s2,$s1			@ rk[8]=rk[2]^rk[7]
512	str	$s1,[$key,#-20]
513	eor	$s3,$s3,$s2			@ rk[9]=rk[3]^rk[8]
514	str	$s2,[$key,#-16]
515	subs	$rounds,$rounds,#1
516	str	$s3,[$key,#-12]
517	subeq	r2,$key,#216
518	beq	.Ldone
519
520	ldr	$i1,[$key,#-32]
521	ldr	$i2,[$key,#-28]
522	eor	$i1,$i1,$s3			@ rk[10]=rk[4]^rk[9]
523	eor	$i3,$i2,$i1			@ rk[11]=rk[5]^rk[10]
524	str	$i1,[$key,#-8]
525	str	$i3,[$key,#-4]
526	b	.L192_loop
527
528.Lnot192:
529	ldrb	$i2,[$rounds,#27]
530	ldrb	$t1,[$rounds,#26]
531	ldrb	$t2,[$rounds,#25]
532	ldrb	$t3,[$rounds,#24]
533	orr	$i2,$i2,$t1,lsl#8
534	ldrb	$i3,[$rounds,#31]
535	orr	$i2,$i2,$t2,lsl#16
536	ldrb	$t1,[$rounds,#30]
537	orr	$i2,$i2,$t3,lsl#24
538	ldrb	$t2,[$rounds,#29]
539	ldrb	$t3,[$rounds,#28]
540	orr	$i3,$i3,$t1,lsl#8
541	orr	$i3,$i3,$t2,lsl#16
542	str	$i2,[$key],#8
543	orr	$i3,$i3,$t3,lsl#24
544	str	$i3,[$key,#-4]
545
546	mov	$rounds,#14
547	str	$rounds,[$key,#240-32]
548	add	$t3,$tbl,#256			@ rcon
549	mov	lr,#255
550	mov	$rounds,#7
551
552.L256_loop:
553	and	$t2,lr,$i3,lsr#24
554	and	$i1,lr,$i3,lsr#16
555	ldrb	$t2,[$tbl,$t2]
556	and	$i2,lr,$i3,lsr#8
557	ldrb	$i1,[$tbl,$i1]
558	and	$i3,lr,$i3
559	ldrb	$i2,[$tbl,$i2]
560	orr	$t2,$t2,$i1,lsl#24
561	ldrb	$i3,[$tbl,$i3]
562	orr	$t2,$t2,$i2,lsl#16
563	ldr	$t1,[$t3],#4			@ rcon[i++]
564	orr	$t2,$t2,$i3,lsl#8
565	eor	$i3,$t2,$t1
566	eor	$s0,$s0,$i3			@ rk[8]=rk[0]^...
567	eor	$s1,$s1,$s0			@ rk[9]=rk[1]^rk[8]
568	str	$s0,[$key],#32
569	eor	$s2,$s2,$s1			@ rk[10]=rk[2]^rk[9]
570	str	$s1,[$key,#-28]
571	eor	$s3,$s3,$s2			@ rk[11]=rk[3]^rk[10]
572	str	$s2,[$key,#-24]
573	subs	$rounds,$rounds,#1
574	str	$s3,[$key,#-20]
575	subeq	r2,$key,#256
576	beq	.Ldone
577
578	and	$t2,lr,$s3
579	and	$i1,lr,$s3,lsr#8
580	ldrb	$t2,[$tbl,$t2]
581	and	$i2,lr,$s3,lsr#16
582	ldrb	$i1,[$tbl,$i1]
583	and	$i3,lr,$s3,lsr#24
584	ldrb	$i2,[$tbl,$i2]
585	orr	$t2,$t2,$i1,lsl#8
586	ldrb	$i3,[$tbl,$i3]
587	orr	$t2,$t2,$i2,lsl#16
588	ldr	$t1,[$key,#-48]
589	orr	$t2,$t2,$i3,lsl#24
590
591	ldr	$i1,[$key,#-44]
592	ldr	$i2,[$key,#-40]
593	eor	$t1,$t1,$t2			@ rk[12]=rk[4]^...
594	ldr	$i3,[$key,#-36]
595	eor	$i1,$i1,$t1			@ rk[13]=rk[5]^rk[12]
596	str	$t1,[$key,#-16]
597	eor	$i2,$i2,$i1			@ rk[14]=rk[6]^rk[13]
598	str	$i1,[$key,#-12]
599	eor	$i3,$i3,$i2			@ rk[15]=rk[7]^rk[14]
600	str	$i2,[$key,#-8]
601	str	$i3,[$key,#-4]
602	b	.L256_loop
603
604.Ldone:	mov	r0,#0
605	ldmia   sp!,{r4-r12,lr}
606.Labrt:	tst	lr,#1
607	moveq	pc,lr			@ be binary compatible with V4, yet
608	bx	lr			@ interoperable with Thumb ISA:-)
609.size	AES_set_encrypt_key,.-AES_set_encrypt_key
610
611.global AES_set_decrypt_key
612.type   AES_set_decrypt_key,%function
613.align	5
614AES_set_decrypt_key:
615	str	lr,[sp,#-4]!            @ push lr
616	bl	AES_set_encrypt_key
617	teq	r0,#0
618	ldrne	lr,[sp],#4              @ pop lr
619	bne	.Labrt
620
621	stmdb   sp!,{r4-r12}
622
623	ldr	$rounds,[r2,#240]	@ AES_set_encrypt_key preserves r2,
624	mov	$key,r2			@ which is AES_KEY *key
625	mov	$i1,r2
626	add	$i2,r2,$rounds,lsl#4
627
628.Linv:	ldr	$s0,[$i1]
629	ldr	$s1,[$i1,#4]
630	ldr	$s2,[$i1,#8]
631	ldr	$s3,[$i1,#12]
632	ldr	$t1,[$i2]
633	ldr	$t2,[$i2,#4]
634	ldr	$t3,[$i2,#8]
635	ldr	$i3,[$i2,#12]
636	str	$s0,[$i2],#-16
637	str	$s1,[$i2,#16+4]
638	str	$s2,[$i2,#16+8]
639	str	$s3,[$i2,#16+12]
640	str	$t1,[$i1],#16
641	str	$t2,[$i1,#-12]
642	str	$t3,[$i1,#-8]
643	str	$i3,[$i1,#-4]
644	teq	$i1,$i2
645	bne	.Linv
646___
647$mask80=$i1;
648$mask1b=$i2;
649$mask7f=$i3;
650$code.=<<___;
651	ldr	$s0,[$key,#16]!		@ prefetch tp1
652	mov	$mask80,#0x80
653	mov	$mask1b,#0x1b
654	orr	$mask80,$mask80,#0x8000
655	orr	$mask1b,$mask1b,#0x1b00
656	orr	$mask80,$mask80,$mask80,lsl#16
657	orr	$mask1b,$mask1b,$mask1b,lsl#16
658	sub	$rounds,$rounds,#1
659	mvn	$mask7f,$mask80
660	mov	$rounds,$rounds,lsl#2	@ (rounds-1)*4
661
662.Lmix:	and	$t1,$s0,$mask80
663	and	$s1,$s0,$mask7f
664	sub	$t1,$t1,$t1,lsr#7
665	and	$t1,$t1,$mask1b
666	eor	$s1,$t1,$s1,lsl#1	@ tp2
667
668	and	$t1,$s1,$mask80
669	and	$s2,$s1,$mask7f
670	sub	$t1,$t1,$t1,lsr#7
671	and	$t1,$t1,$mask1b
672	eor	$s2,$t1,$s2,lsl#1	@ tp4
673
674	and	$t1,$s2,$mask80
675	and	$s3,$s2,$mask7f
676	sub	$t1,$t1,$t1,lsr#7
677	and	$t1,$t1,$mask1b
678	eor	$s3,$t1,$s3,lsl#1	@ tp8
679
680	eor	$t1,$s1,$s2
681	eor	$t2,$s0,$s3		@ tp9
682	eor	$t1,$t1,$s3		@ tpe
683	eor	$t1,$t1,$s1,ror#24
684	eor	$t1,$t1,$t2,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
685	eor	$t1,$t1,$s2,ror#16
686	eor	$t1,$t1,$t2,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
687	eor	$t1,$t1,$t2,ror#8	@ ^= ROTATE(tp9,24)
688
689	ldr	$s0,[$key,#4]		@ prefetch tp1
690	str	$t1,[$key],#4
691	subs	$rounds,$rounds,#1
692	bne	.Lmix
693
694	mov	r0,#0
695	ldmia   sp!,{r4-r12,lr}
696	tst	lr,#1
697	moveq	pc,lr			@ be binary compatible with V4, yet
698	bx	lr			@ interoperable with Thumb ISA:-)
699.size	AES_set_decrypt_key,.-AES_set_decrypt_key
700
701.type	AES_Td,%object
702.align	5
703AES_Td:
704.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
705.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
706.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
707.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
708.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
709.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
710.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
711.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
712.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
713.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
714.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
715.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
716.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
717.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
718.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
719.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
720.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
721.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
722.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
723.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
724.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
725.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
726.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
727.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
728.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
729.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
730.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
731.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
732.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
733.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
734.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
735.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
736.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
737.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
738.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
739.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
740.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
741.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
742.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
743.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
744.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
745.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
746.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
747.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
748.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
749.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
750.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
751.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
752.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
753.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
754.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
755.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
756.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
757.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
758.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
759.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
760.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
761.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
762.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
763.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
764.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
765.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
766.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
767.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
768@ Td4[256]
769.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
770.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
771.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
772.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
773.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
774.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
775.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
776.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
777.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
778.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
779.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
780.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
781.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
782.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
783.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
784.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
785.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
786.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
787.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
788.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
789.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
790.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
791.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
792.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
793.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
794.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
795.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
796.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
797.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
798.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
799.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
800.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
801.size	AES_Td,.-AES_Td
802
803@ void AES_decrypt(const unsigned char *in, unsigned char *out,
804@ 		 const AES_KEY *key) {
805.global AES_decrypt
806.type   AES_decrypt,%function
807.align	5
808AES_decrypt:
809	sub	r3,pc,#8		@ AES_decrypt
810	stmdb   sp!,{r1,r4-r12,lr}
811	mov	$rounds,r0		@ inp
812	mov	$key,r2
813	sub	$tbl,r3,#AES_decrypt-AES_Td		@ Td
814
815	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
816	ldrb	$t1,[$rounds,#2]	@ manner...
817	ldrb	$t2,[$rounds,#1]
818	ldrb	$t3,[$rounds,#0]
819	orr	$s0,$s0,$t1,lsl#8
820	ldrb	$s1,[$rounds,#7]
821	orr	$s0,$s0,$t2,lsl#16
822	ldrb	$t1,[$rounds,#6]
823	orr	$s0,$s0,$t3,lsl#24
824	ldrb	$t2,[$rounds,#5]
825	ldrb	$t3,[$rounds,#4]
826	orr	$s1,$s1,$t1,lsl#8
827	ldrb	$s2,[$rounds,#11]
828	orr	$s1,$s1,$t2,lsl#16
829	ldrb	$t1,[$rounds,#10]
830	orr	$s1,$s1,$t3,lsl#24
831	ldrb	$t2,[$rounds,#9]
832	ldrb	$t3,[$rounds,#8]
833	orr	$s2,$s2,$t1,lsl#8
834	ldrb	$s3,[$rounds,#15]
835	orr	$s2,$s2,$t2,lsl#16
836	ldrb	$t1,[$rounds,#14]
837	orr	$s2,$s2,$t3,lsl#24
838	ldrb	$t2,[$rounds,#13]
839	ldrb	$t3,[$rounds,#12]
840	orr	$s3,$s3,$t1,lsl#8
841	orr	$s3,$s3,$t2,lsl#16
842	orr	$s3,$s3,$t3,lsl#24
843
844	bl	_armv4_AES_decrypt
845
846	ldr	$rounds,[sp],#4		@ pop out
847	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
848	mov	$t2,$s0,lsr#16		@ manner...
849	mov	$t3,$s0,lsr#8
850	strb	$t1,[$rounds,#0]
851	strb	$t2,[$rounds,#1]
852	mov	$t1,$s1,lsr#24
853	strb	$t3,[$rounds,#2]
854	mov	$t2,$s1,lsr#16
855	strb	$s0,[$rounds,#3]
856	mov	$t3,$s1,lsr#8
857	strb	$t1,[$rounds,#4]
858	strb	$t2,[$rounds,#5]
859	mov	$t1,$s2,lsr#24
860	strb	$t3,[$rounds,#6]
861	mov	$t2,$s2,lsr#16
862	strb	$s1,[$rounds,#7]
863	mov	$t3,$s2,lsr#8
864	strb	$t1,[$rounds,#8]
865	strb	$t2,[$rounds,#9]
866	mov	$t1,$s3,lsr#24
867	strb	$t3,[$rounds,#10]
868	mov	$t2,$s3,lsr#16
869	strb	$s2,[$rounds,#11]
870	mov	$t3,$s3,lsr#8
871	strb	$t1,[$rounds,#12]
872	strb	$t2,[$rounds,#13]
873	strb	$t3,[$rounds,#14]
874	strb	$s3,[$rounds,#15]
875
876	ldmia   sp!,{r4-r12,lr}
877	tst	lr,#1
878	moveq	pc,lr			@ be binary compatible with V4, yet
879	bx	lr			@ interoperable with Thumb ISA:-)
880.size	AES_decrypt,.-AES_decrypt
881
882.type   _armv4_AES_decrypt,%function
883.align	2
884_armv4_AES_decrypt:
885	str	lr,[sp,#-4]!		@ push lr
886	ldmia	$key!,{$t1-$i1}
887	eor	$s0,$s0,$t1
888	ldr	$rounds,[$key,#240-16]
889	eor	$s1,$s1,$t2
890	eor	$s2,$s2,$t3
891	eor	$s3,$s3,$i1
892	sub	$rounds,$rounds,#1
893	mov	lr,#255
894
895	and	$i1,lr,$s0,lsr#16
896	and	$i2,lr,$s0,lsr#8
897	and	$i3,lr,$s0
898	mov	$s0,$s0,lsr#24
899.Ldec_loop:
900	ldr	$t1,[$tbl,$i1,lsl#2]	@ Td1[s0>>16]
901	and	$i1,lr,$s1		@ i0
902	ldr	$t2,[$tbl,$i2,lsl#2]	@ Td2[s0>>8]
903	and	$i2,lr,$s1,lsr#16
904	ldr	$t3,[$tbl,$i3,lsl#2]	@ Td3[s0>>0]
905	and	$i3,lr,$s1,lsr#8
906	ldr	$s0,[$tbl,$s0,lsl#2]	@ Td0[s0>>24]
907	mov	$s1,$s1,lsr#24
908
909	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td3[s1>>0]
910	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td1[s1>>16]
911	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td2[s1>>8]
912	eor	$s0,$s0,$i1,ror#24
913	ldr	$s1,[$tbl,$s1,lsl#2]	@ Td0[s1>>24]
914	and	$i1,lr,$s2,lsr#8	@ i0
915	eor	$t2,$i2,$t2,ror#8
916	and	$i2,lr,$s2		@ i1
917	eor	$t3,$i3,$t3,ror#8
918	and	$i3,lr,$s2,lsr#16
919	eor	$s1,$s1,$t1,ror#8
920	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td2[s2>>8]
921	mov	$s2,$s2,lsr#24
922
923	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td3[s2>>0]
924	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td1[s2>>16]
925	eor	$s0,$s0,$i1,ror#16
926	ldr	$s2,[$tbl,$s2,lsl#2]	@ Td0[s2>>24]
927	and	$i1,lr,$s3,lsr#16	@ i0
928	eor	$s1,$s1,$i2,ror#24
929	and	$i2,lr,$s3,lsr#8	@ i1
930	eor	$t3,$i3,$t3,ror#8
931	and	$i3,lr,$s3		@ i2
932	eor	$s2,$s2,$t2,ror#8
933	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td1[s3>>16]
934	mov	$s3,$s3,lsr#24
935
936	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td2[s3>>8]
937	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td3[s3>>0]
938	eor	$s0,$s0,$i1,ror#8
939	ldr	$s3,[$tbl,$s3,lsl#2]	@ Td0[s3>>24]
940	eor	$s1,$s1,$i2,ror#16
941	eor	$s2,$s2,$i3,ror#24
942	ldr	$i1,[$key],#16
943	eor	$s3,$s3,$t3,ror#8
944
945	ldr	$t1,[$key,#-12]
946	ldr	$t2,[$key,#-8]
947	eor	$s0,$s0,$i1
948	ldr	$t3,[$key,#-4]
949	and	$i1,lr,$s0,lsr#16
950	eor	$s1,$s1,$t1
951	and	$i2,lr,$s0,lsr#8
952	eor	$s2,$s2,$t2
953	and	$i3,lr,$s0
954	eor	$s3,$s3,$t3
955	mov	$s0,$s0,lsr#24
956
957	subs	$rounds,$rounds,#1
958	bne	.Ldec_loop
959
960	add	$tbl,$tbl,#1024
961
962	ldr	$t2,[$tbl,#0]		@ prefetch Td4
963	ldr	$t3,[$tbl,#32]
964	ldr	$t1,[$tbl,#64]
965	ldr	$t2,[$tbl,#96]
966	ldr	$t3,[$tbl,#128]
967	ldr	$t1,[$tbl,#160]
968	ldr	$t2,[$tbl,#192]
969	ldr	$t3,[$tbl,#224]
970
971	ldrb	$s0,[$tbl,$s0]		@ Td4[s0>>24]
972	ldrb	$t1,[$tbl,$i1]		@ Td4[s0>>16]
973	and	$i1,lr,$s1		@ i0
974	ldrb	$t2,[$tbl,$i2]		@ Td4[s0>>8]
975	and	$i2,lr,$s1,lsr#16
976	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]
977	and	$i3,lr,$s1,lsr#8
978
979	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0]
980	ldrb	$s1,[$tbl,$s1,lsr#24]	@ Td4[s1>>24]
981	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]
982	eor	$s0,$i1,$s0,lsl#24
983	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8]
984	eor	$s1,$t1,$s1,lsl#8
985	and	$i1,lr,$s2,lsr#8	@ i0
986	eor	$t2,$t2,$i2,lsl#8
987	and	$i2,lr,$s2		@ i1
988	eor	$t3,$t3,$i3,lsl#8
989	ldrb	$i1,[$tbl,$i1]		@ Td4[s2>>8]
990	and	$i3,lr,$s2,lsr#16
991
992	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]
993	ldrb	$s2,[$tbl,$s2,lsr#24]	@ Td4[s2>>24]
994	eor	$s0,$s0,$i1,lsl#8
995	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]
996	eor	$s1,$i2,$s1,lsl#16
997	and	$i1,lr,$s3,lsr#16	@ i0
998	eor	$s2,$t2,$s2,lsl#16
999	and	$i2,lr,$s3,lsr#8	@ i1
1000	eor	$t3,$t3,$i3,lsl#16
1001	ldrb	$i1,[$tbl,$i1]		@ Td4[s3>>16]
1002	and	$i3,lr,$s3		@ i2
1003
1004	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]
1005	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0]
1006	ldrb	$s3,[$tbl,$s3,lsr#24]	@ Td4[s3>>24]
1007	eor	$s0,$s0,$i1,lsl#16
1008	ldr	$i1,[$key,#0]
1009	eor	$s1,$s1,$i2,lsl#8
1010	ldr	$t1,[$key,#4]
1011	eor	$s2,$i3,$s2,lsl#8
1012	ldr	$t2,[$key,#8]
1013	eor	$s3,$t3,$s3,lsl#24
1014	ldr	$t3,[$key,#12]
1015
1016	eor	$s0,$s0,$i1
1017	eor	$s1,$s1,$t1
1018	eor	$s2,$s2,$t2
1019	eor	$s3,$s3,$t3
1020
1021	sub	$tbl,$tbl,#1024
1022	ldr	pc,[sp],#4		@ pop and return
1023.size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
1024.asciz	"AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
1025.align	2
1026___
1027
1028$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
1029print $code;
1030close STDOUT;	# enforce flush
1031