1238384Sjkim#!/usr/bin/env perl
2238384Sjkim
3238384Sjkim# ====================================================================
4238384Sjkim# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5238384Sjkim# project. The module is, however, dual licensed under OpenSSL and
6238384Sjkim# CRYPTOGAMS licenses depending on where you obtain it. For further
7238384Sjkim# details see http://www.openssl.org/~appro/cryptogams/.
8238384Sjkim# ====================================================================
9238384Sjkim
10238384Sjkim# AES for ARMv4
11238384Sjkim
12238384Sjkim# January 2007.
13238384Sjkim#
14238384Sjkim# Code uses single 1K S-box and is >2 times faster than code generated
15238384Sjkim# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
16238384Sjkim# allows to merge logical or arithmetic operation with shift or rotate
17238384Sjkim# in one instruction and emit combined result every cycle. The module
18238384Sjkim# is endian-neutral. The performance is ~42 cycles/byte for 128-bit
19238384Sjkim# key [on single-issue Xscale PXA250 core].
20238384Sjkim
21238384Sjkim# May 2007.
22238384Sjkim#
23238384Sjkim# AES_set_[en|de]crypt_key is added.
24238384Sjkim
25238384Sjkim# July 2010.
26238384Sjkim#
27238384Sjkim# Rescheduling for dual-issue pipeline resulted in 12% improvement on
28238384Sjkim# Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
29238384Sjkim
30238384Sjkim# February 2011.
31238384Sjkim#
32238384Sjkim# Profiler-assisted and platform-specific optimization resulted in 16%
33238384Sjkim# improvement on Cortex A8 core and ~21.5 cycles per byte.
34238384Sjkim
35238384Sjkimwhile (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
36238384Sjkimopen STDOUT,">$output";
37238384Sjkim
38238384Sjkim$s0="r0";
39238384Sjkim$s1="r1";
40238384Sjkim$s2="r2";
41238384Sjkim$s3="r3";
42238384Sjkim$t1="r4";
43238384Sjkim$t2="r5";
44238384Sjkim$t3="r6";
45238384Sjkim$i1="r7";
46238384Sjkim$i2="r8";
47238384Sjkim$i3="r9";
48238384Sjkim
49238384Sjkim$tbl="r10";
50238384Sjkim$key="r11";
51238384Sjkim$rounds="r12";
52238384Sjkim
53238384Sjkim$code=<<___;
54238384Sjkim#include "arm_arch.h"
55238384Sjkim.text
56238384Sjkim.code	32
57238384Sjkim
58238384Sjkim.type	AES_Te,%object
59238384Sjkim.align	5
60238384SjkimAES_Te:
61238384Sjkim.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
62238384Sjkim.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
63238384Sjkim.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
64238384Sjkim.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
65238384Sjkim.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
66238384Sjkim.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
67238384Sjkim.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
68238384Sjkim.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
69238384Sjkim.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
70238384Sjkim.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
71238384Sjkim.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
72238384Sjkim.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
73238384Sjkim.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
74238384Sjkim.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
75238384Sjkim.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
76238384Sjkim.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
77238384Sjkim.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
78238384Sjkim.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
79238384Sjkim.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
80238384Sjkim.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
81238384Sjkim.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
82238384Sjkim.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
83238384Sjkim.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
84238384Sjkim.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
85238384Sjkim.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
86238384Sjkim.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
87238384Sjkim.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
88238384Sjkim.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
89238384Sjkim.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
90238384Sjkim.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
91238384Sjkim.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
92238384Sjkim.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
93238384Sjkim.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
94238384Sjkim.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
95238384Sjkim.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
96238384Sjkim.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
97238384Sjkim.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
98238384Sjkim.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
99238384Sjkim.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
100238384Sjkim.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
101238384Sjkim.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
102238384Sjkim.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
103238384Sjkim.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
104238384Sjkim.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
105238384Sjkim.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
106238384Sjkim.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
107238384Sjkim.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
108238384Sjkim.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
109238384Sjkim.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
110238384Sjkim.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
111238384Sjkim.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
112238384Sjkim.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
113238384Sjkim.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
114238384Sjkim.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
115238384Sjkim.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
116238384Sjkim.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
117238384Sjkim.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
118238384Sjkim.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
119238384Sjkim.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
120238384Sjkim.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
121238384Sjkim.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
122238384Sjkim.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
123238384Sjkim.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
124238384Sjkim.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
125238384Sjkim@ Te4[256]
126238384Sjkim.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
127238384Sjkim.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
128238384Sjkim.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
129238384Sjkim.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
130238384Sjkim.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
131238384Sjkim.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
132238384Sjkim.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
133238384Sjkim.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
134238384Sjkim.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
135238384Sjkim.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
136238384Sjkim.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
137238384Sjkim.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
138238384Sjkim.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
139238384Sjkim.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
140238384Sjkim.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
141238384Sjkim.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
142238384Sjkim.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
143238384Sjkim.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
144238384Sjkim.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
145238384Sjkim.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
146238384Sjkim.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
147238384Sjkim.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
148238384Sjkim.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
149238384Sjkim.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
150238384Sjkim.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
151238384Sjkim.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
152238384Sjkim.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
153238384Sjkim.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
154238384Sjkim.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
155238384Sjkim.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
156238384Sjkim.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
157238384Sjkim.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
158238384Sjkim@ rcon[]
159238384Sjkim.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
160238384Sjkim.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
161238384Sjkim.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
162238384Sjkim.size	AES_Te,.-AES_Te
163238384Sjkim
164238384Sjkim@ void AES_encrypt(const unsigned char *in, unsigned char *out,
165238384Sjkim@ 		 const AES_KEY *key) {
166238384Sjkim.global AES_encrypt
167238384Sjkim.type   AES_encrypt,%function
168238384Sjkim.align	5
169238384SjkimAES_encrypt:
170238384Sjkim	sub	r3,pc,#8		@ AES_encrypt
171238384Sjkim	stmdb   sp!,{r1,r4-r12,lr}
172238384Sjkim	mov	$rounds,r0		@ inp
173238384Sjkim	mov	$key,r2
174238384Sjkim	sub	$tbl,r3,#AES_encrypt-AES_Te	@ Te
175238384Sjkim#if __ARM_ARCH__<7
176238384Sjkim	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
177238384Sjkim	ldrb	$t1,[$rounds,#2]	@ manner...
178238384Sjkim	ldrb	$t2,[$rounds,#1]
179238384Sjkim	ldrb	$t3,[$rounds,#0]
180238384Sjkim	orr	$s0,$s0,$t1,lsl#8
181238384Sjkim	ldrb	$s1,[$rounds,#7]
182238384Sjkim	orr	$s0,$s0,$t2,lsl#16
183238384Sjkim	ldrb	$t1,[$rounds,#6]
184238384Sjkim	orr	$s0,$s0,$t3,lsl#24
185238384Sjkim	ldrb	$t2,[$rounds,#5]
186238384Sjkim	ldrb	$t3,[$rounds,#4]
187238384Sjkim	orr	$s1,$s1,$t1,lsl#8
188238384Sjkim	ldrb	$s2,[$rounds,#11]
189238384Sjkim	orr	$s1,$s1,$t2,lsl#16
190238384Sjkim	ldrb	$t1,[$rounds,#10]
191238384Sjkim	orr	$s1,$s1,$t3,lsl#24
192238384Sjkim	ldrb	$t2,[$rounds,#9]
193238384Sjkim	ldrb	$t3,[$rounds,#8]
194238384Sjkim	orr	$s2,$s2,$t1,lsl#8
195238384Sjkim	ldrb	$s3,[$rounds,#15]
196238384Sjkim	orr	$s2,$s2,$t2,lsl#16
197238384Sjkim	ldrb	$t1,[$rounds,#14]
198238384Sjkim	orr	$s2,$s2,$t3,lsl#24
199238384Sjkim	ldrb	$t2,[$rounds,#13]
200238384Sjkim	ldrb	$t3,[$rounds,#12]
201238384Sjkim	orr	$s3,$s3,$t1,lsl#8
202238384Sjkim	orr	$s3,$s3,$t2,lsl#16
203238384Sjkim	orr	$s3,$s3,$t3,lsl#24
204238384Sjkim#else
205238384Sjkim	ldr	$s0,[$rounds,#0]
206238384Sjkim	ldr	$s1,[$rounds,#4]
207238384Sjkim	ldr	$s2,[$rounds,#8]
208238384Sjkim	ldr	$s3,[$rounds,#12]
209238384Sjkim#ifdef __ARMEL__
210238384Sjkim	rev	$s0,$s0
211238384Sjkim	rev	$s1,$s1
212238384Sjkim	rev	$s2,$s2
213238384Sjkim	rev	$s3,$s3
214238384Sjkim#endif
215238384Sjkim#endif
216238384Sjkim	bl	_armv4_AES_encrypt
217238384Sjkim
218238384Sjkim	ldr	$rounds,[sp],#4		@ pop out
219238384Sjkim#if __ARM_ARCH__>=7
220238384Sjkim#ifdef __ARMEL__
221238384Sjkim	rev	$s0,$s0
222238384Sjkim	rev	$s1,$s1
223238384Sjkim	rev	$s2,$s2
224238384Sjkim	rev	$s3,$s3
225238384Sjkim#endif
226238384Sjkim	str	$s0,[$rounds,#0]
227238384Sjkim	str	$s1,[$rounds,#4]
228238384Sjkim	str	$s2,[$rounds,#8]
229238384Sjkim	str	$s3,[$rounds,#12]
230238384Sjkim#else
231238384Sjkim	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
232238384Sjkim	mov	$t2,$s0,lsr#16		@ manner...
233238384Sjkim	mov	$t3,$s0,lsr#8
234238384Sjkim	strb	$t1,[$rounds,#0]
235238384Sjkim	strb	$t2,[$rounds,#1]
236238384Sjkim	mov	$t1,$s1,lsr#24
237238384Sjkim	strb	$t3,[$rounds,#2]
238238384Sjkim	mov	$t2,$s1,lsr#16
239238384Sjkim	strb	$s0,[$rounds,#3]
240238384Sjkim	mov	$t3,$s1,lsr#8
241238384Sjkim	strb	$t1,[$rounds,#4]
242238384Sjkim	strb	$t2,[$rounds,#5]
243238384Sjkim	mov	$t1,$s2,lsr#24
244238384Sjkim	strb	$t3,[$rounds,#6]
245238384Sjkim	mov	$t2,$s2,lsr#16
246238384Sjkim	strb	$s1,[$rounds,#7]
247238384Sjkim	mov	$t3,$s2,lsr#8
248238384Sjkim	strb	$t1,[$rounds,#8]
249238384Sjkim	strb	$t2,[$rounds,#9]
250238384Sjkim	mov	$t1,$s3,lsr#24
251238384Sjkim	strb	$t3,[$rounds,#10]
252238384Sjkim	mov	$t2,$s3,lsr#16
253238384Sjkim	strb	$s2,[$rounds,#11]
254238384Sjkim	mov	$t3,$s3,lsr#8
255238384Sjkim	strb	$t1,[$rounds,#12]
256238384Sjkim	strb	$t2,[$rounds,#13]
257238384Sjkim	strb	$t3,[$rounds,#14]
258238384Sjkim	strb	$s3,[$rounds,#15]
259238384Sjkim#endif
260238384Sjkim#if __ARM_ARCH__>=5
261238384Sjkim	ldmia	sp!,{r4-r12,pc}
262238384Sjkim#else
263238384Sjkim	ldmia   sp!,{r4-r12,lr}
264238384Sjkim	tst	lr,#1
265238384Sjkim	moveq	pc,lr			@ be binary compatible with V4, yet
266238384Sjkim	bx	lr			@ interoperable with Thumb ISA:-)
267238384Sjkim#endif
268238384Sjkim.size	AES_encrypt,.-AES_encrypt
269238384Sjkim
270238384Sjkim.type   _armv4_AES_encrypt,%function
271238384Sjkim.align	2
272238384Sjkim_armv4_AES_encrypt:
273238384Sjkim	str	lr,[sp,#-4]!		@ push lr
274238384Sjkim	ldmia	$key!,{$t1-$i1}
275238384Sjkim	eor	$s0,$s0,$t1
276238384Sjkim	ldr	$rounds,[$key,#240-16]
277238384Sjkim	eor	$s1,$s1,$t2
278238384Sjkim	eor	$s2,$s2,$t3
279238384Sjkim	eor	$s3,$s3,$i1
280238384Sjkim	sub	$rounds,$rounds,#1
281238384Sjkim	mov	lr,#255
282238384Sjkim
283238384Sjkim	and	$i1,lr,$s0
284238384Sjkim	and	$i2,lr,$s0,lsr#8
285238384Sjkim	and	$i3,lr,$s0,lsr#16
286238384Sjkim	mov	$s0,$s0,lsr#24
287238384Sjkim.Lenc_loop:
288238384Sjkim	ldr	$t1,[$tbl,$i1,lsl#2]	@ Te3[s0>>0]
289238384Sjkim	and	$i1,lr,$s1,lsr#16	@ i0
290238384Sjkim	ldr	$t2,[$tbl,$i2,lsl#2]	@ Te2[s0>>8]
291238384Sjkim	and	$i2,lr,$s1
292238384Sjkim	ldr	$t3,[$tbl,$i3,lsl#2]	@ Te1[s0>>16]
293238384Sjkim	and	$i3,lr,$s1,lsr#8
294238384Sjkim	ldr	$s0,[$tbl,$s0,lsl#2]	@ Te0[s0>>24]
295238384Sjkim	mov	$s1,$s1,lsr#24
296238384Sjkim
297238384Sjkim	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te1[s1>>16]
298238384Sjkim	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te3[s1>>0]
299238384Sjkim	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te2[s1>>8]
300238384Sjkim	eor	$s0,$s0,$i1,ror#8
301238384Sjkim	ldr	$s1,[$tbl,$s1,lsl#2]	@ Te0[s1>>24]
302238384Sjkim	and	$i1,lr,$s2,lsr#8	@ i0
303238384Sjkim	eor	$t2,$t2,$i2,ror#8
304238384Sjkim	and	$i2,lr,$s2,lsr#16	@ i1
305238384Sjkim	eor	$t3,$t3,$i3,ror#8
306238384Sjkim	and	$i3,lr,$s2
307238384Sjkim	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te2[s2>>8]
308238384Sjkim	eor	$s1,$s1,$t1,ror#24
309238384Sjkim	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te1[s2>>16]
310238384Sjkim	mov	$s2,$s2,lsr#24
311238384Sjkim
312238384Sjkim	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te3[s2>>0]
313238384Sjkim	eor	$s0,$s0,$i1,ror#16
314238384Sjkim	ldr	$s2,[$tbl,$s2,lsl#2]	@ Te0[s2>>24]
315238384Sjkim	and	$i1,lr,$s3		@ i0
316238384Sjkim	eor	$s1,$s1,$i2,ror#8
317238384Sjkim	and	$i2,lr,$s3,lsr#8	@ i1
318238384Sjkim	eor	$t3,$t3,$i3,ror#16
319238384Sjkim	and	$i3,lr,$s3,lsr#16	@ i2
320238384Sjkim	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te3[s3>>0]
321238384Sjkim	eor	$s2,$s2,$t2,ror#16
322238384Sjkim	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te2[s3>>8]
323238384Sjkim	mov	$s3,$s3,lsr#24
324238384Sjkim
325238384Sjkim	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te1[s3>>16]
326238384Sjkim	eor	$s0,$s0,$i1,ror#24
327238384Sjkim	ldr	$i1,[$key],#16
328238384Sjkim	eor	$s1,$s1,$i2,ror#16
329238384Sjkim	ldr	$s3,[$tbl,$s3,lsl#2]	@ Te0[s3>>24]
330238384Sjkim	eor	$s2,$s2,$i3,ror#8
331238384Sjkim	ldr	$t1,[$key,#-12]
332238384Sjkim	eor	$s3,$s3,$t3,ror#8
333238384Sjkim
334238384Sjkim	ldr	$t2,[$key,#-8]
335238384Sjkim	eor	$s0,$s0,$i1
336238384Sjkim	ldr	$t3,[$key,#-4]
337238384Sjkim	and	$i1,lr,$s0
338238384Sjkim	eor	$s1,$s1,$t1
339238384Sjkim	and	$i2,lr,$s0,lsr#8
340238384Sjkim	eor	$s2,$s2,$t2
341238384Sjkim	and	$i3,lr,$s0,lsr#16
342238384Sjkim	eor	$s3,$s3,$t3
343238384Sjkim	mov	$s0,$s0,lsr#24
344238384Sjkim
345238384Sjkim	subs	$rounds,$rounds,#1
346238384Sjkim	bne	.Lenc_loop
347238384Sjkim
348238384Sjkim	add	$tbl,$tbl,#2
349238384Sjkim
350238384Sjkim	ldrb	$t1,[$tbl,$i1,lsl#2]	@ Te4[s0>>0]
351238384Sjkim	and	$i1,lr,$s1,lsr#16	@ i0
352238384Sjkim	ldrb	$t2,[$tbl,$i2,lsl#2]	@ Te4[s0>>8]
353238384Sjkim	and	$i2,lr,$s1
354238384Sjkim	ldrb	$t3,[$tbl,$i3,lsl#2]	@ Te4[s0>>16]
355238384Sjkim	and	$i3,lr,$s1,lsr#8
356238384Sjkim	ldrb	$s0,[$tbl,$s0,lsl#2]	@ Te4[s0>>24]
357238384Sjkim	mov	$s1,$s1,lsr#24
358238384Sjkim
359238384Sjkim	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s1>>16]
360238384Sjkim	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s1>>0]
361238384Sjkim	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s1>>8]
362238384Sjkim	eor	$s0,$i1,$s0,lsl#8
363238384Sjkim	ldrb	$s1,[$tbl,$s1,lsl#2]	@ Te4[s1>>24]
364238384Sjkim	and	$i1,lr,$s2,lsr#8	@ i0
365238384Sjkim	eor	$t2,$i2,$t2,lsl#8
366238384Sjkim	and	$i2,lr,$s2,lsr#16	@ i1
367238384Sjkim	eor	$t3,$i3,$t3,lsl#8
368238384Sjkim	and	$i3,lr,$s2
369238384Sjkim	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s2>>8]
370238384Sjkim	eor	$s1,$t1,$s1,lsl#24
371238384Sjkim	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s2>>16]
372238384Sjkim	mov	$s2,$s2,lsr#24
373238384Sjkim
374238384Sjkim	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s2>>0]
375238384Sjkim	eor	$s0,$i1,$s0,lsl#8
376238384Sjkim	ldrb	$s2,[$tbl,$s2,lsl#2]	@ Te4[s2>>24]
377238384Sjkim	and	$i1,lr,$s3		@ i0
378238384Sjkim	eor	$s1,$s1,$i2,lsl#16
379238384Sjkim	and	$i2,lr,$s3,lsr#8	@ i1
380238384Sjkim	eor	$t3,$i3,$t3,lsl#8
381238384Sjkim	and	$i3,lr,$s3,lsr#16	@ i2
382238384Sjkim	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s3>>0]
383238384Sjkim	eor	$s2,$t2,$s2,lsl#24
384238384Sjkim	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s3>>8]
385238384Sjkim	mov	$s3,$s3,lsr#24
386238384Sjkim
387238384Sjkim	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s3>>16]
388238384Sjkim	eor	$s0,$i1,$s0,lsl#8
389238384Sjkim	ldr	$i1,[$key,#0]
390238384Sjkim	ldrb	$s3,[$tbl,$s3,lsl#2]	@ Te4[s3>>24]
391238384Sjkim	eor	$s1,$s1,$i2,lsl#8
392238384Sjkim	ldr	$t1,[$key,#4]
393238384Sjkim	eor	$s2,$s2,$i3,lsl#16
394238384Sjkim	ldr	$t2,[$key,#8]
395238384Sjkim	eor	$s3,$t3,$s3,lsl#24
396238384Sjkim	ldr	$t3,[$key,#12]
397238384Sjkim
398238384Sjkim	eor	$s0,$s0,$i1
399238384Sjkim	eor	$s1,$s1,$t1
400238384Sjkim	eor	$s2,$s2,$t2
401238384Sjkim	eor	$s3,$s3,$t3
402238384Sjkim
403238384Sjkim	sub	$tbl,$tbl,#2
404238384Sjkim	ldr	pc,[sp],#4		@ pop and return
405238384Sjkim.size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
406238384Sjkim
407238384Sjkim.global private_AES_set_encrypt_key
408238384Sjkim.type   private_AES_set_encrypt_key,%function
409238384Sjkim.align	5
410238384Sjkimprivate_AES_set_encrypt_key:
411238384Sjkim_armv4_AES_set_encrypt_key:
412238384Sjkim	sub	r3,pc,#8		@ AES_set_encrypt_key
413238384Sjkim	teq	r0,#0
414238384Sjkim	moveq	r0,#-1
415238384Sjkim	beq	.Labrt
416238384Sjkim	teq	r2,#0
417238384Sjkim	moveq	r0,#-1
418238384Sjkim	beq	.Labrt
419238384Sjkim
420238384Sjkim	teq	r1,#128
421238384Sjkim	beq	.Lok
422238384Sjkim	teq	r1,#192
423238384Sjkim	beq	.Lok
424238384Sjkim	teq	r1,#256
425238384Sjkim	movne	r0,#-1
426238384Sjkim	bne	.Labrt
427238384Sjkim
428238384Sjkim.Lok:	stmdb   sp!,{r4-r12,lr}
429238384Sjkim	sub	$tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
430238384Sjkim
431238384Sjkim	mov	$rounds,r0		@ inp
432238384Sjkim	mov	lr,r1			@ bits
433238384Sjkim	mov	$key,r2			@ key
434238384Sjkim
435238384Sjkim#if __ARM_ARCH__<7
436238384Sjkim	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
437238384Sjkim	ldrb	$t1,[$rounds,#2]	@ manner...
438238384Sjkim	ldrb	$t2,[$rounds,#1]
439238384Sjkim	ldrb	$t3,[$rounds,#0]
440238384Sjkim	orr	$s0,$s0,$t1,lsl#8
441238384Sjkim	ldrb	$s1,[$rounds,#7]
442238384Sjkim	orr	$s0,$s0,$t2,lsl#16
443238384Sjkim	ldrb	$t1,[$rounds,#6]
444238384Sjkim	orr	$s0,$s0,$t3,lsl#24
445238384Sjkim	ldrb	$t2,[$rounds,#5]
446238384Sjkim	ldrb	$t3,[$rounds,#4]
447238384Sjkim	orr	$s1,$s1,$t1,lsl#8
448238384Sjkim	ldrb	$s2,[$rounds,#11]
449238384Sjkim	orr	$s1,$s1,$t2,lsl#16
450238384Sjkim	ldrb	$t1,[$rounds,#10]
451238384Sjkim	orr	$s1,$s1,$t3,lsl#24
452238384Sjkim	ldrb	$t2,[$rounds,#9]
453238384Sjkim	ldrb	$t3,[$rounds,#8]
454238384Sjkim	orr	$s2,$s2,$t1,lsl#8
455238384Sjkim	ldrb	$s3,[$rounds,#15]
456238384Sjkim	orr	$s2,$s2,$t2,lsl#16
457238384Sjkim	ldrb	$t1,[$rounds,#14]
458238384Sjkim	orr	$s2,$s2,$t3,lsl#24
459238384Sjkim	ldrb	$t2,[$rounds,#13]
460238384Sjkim	ldrb	$t3,[$rounds,#12]
461238384Sjkim	orr	$s3,$s3,$t1,lsl#8
462238384Sjkim	str	$s0,[$key],#16
463238384Sjkim	orr	$s3,$s3,$t2,lsl#16
464238384Sjkim	str	$s1,[$key,#-12]
465238384Sjkim	orr	$s3,$s3,$t3,lsl#24
466238384Sjkim	str	$s2,[$key,#-8]
467238384Sjkim	str	$s3,[$key,#-4]
468238384Sjkim#else
469238384Sjkim	ldr	$s0,[$rounds,#0]
470238384Sjkim	ldr	$s1,[$rounds,#4]
471238384Sjkim	ldr	$s2,[$rounds,#8]
472238384Sjkim	ldr	$s3,[$rounds,#12]
473238384Sjkim#ifdef __ARMEL__
474238384Sjkim	rev	$s0,$s0
475238384Sjkim	rev	$s1,$s1
476238384Sjkim	rev	$s2,$s2
477238384Sjkim	rev	$s3,$s3
478238384Sjkim#endif
479238384Sjkim	str	$s0,[$key],#16
480238384Sjkim	str	$s1,[$key,#-12]
481238384Sjkim	str	$s2,[$key,#-8]
482238384Sjkim	str	$s3,[$key,#-4]
483238384Sjkim#endif
484238384Sjkim
485238384Sjkim	teq	lr,#128
486238384Sjkim	bne	.Lnot128
487238384Sjkim	mov	$rounds,#10
488238384Sjkim	str	$rounds,[$key,#240-16]
489238384Sjkim	add	$t3,$tbl,#256			@ rcon
490238384Sjkim	mov	lr,#255
491238384Sjkim
492238384Sjkim.L128_loop:
493238384Sjkim	and	$t2,lr,$s3,lsr#24
494238384Sjkim	and	$i1,lr,$s3,lsr#16
495238384Sjkim	ldrb	$t2,[$tbl,$t2]
496238384Sjkim	and	$i2,lr,$s3,lsr#8
497238384Sjkim	ldrb	$i1,[$tbl,$i1]
498238384Sjkim	and	$i3,lr,$s3
499238384Sjkim	ldrb	$i2,[$tbl,$i2]
500238384Sjkim	orr	$t2,$t2,$i1,lsl#24
501238384Sjkim	ldrb	$i3,[$tbl,$i3]
502238384Sjkim	orr	$t2,$t2,$i2,lsl#16
503238384Sjkim	ldr	$t1,[$t3],#4			@ rcon[i++]
504238384Sjkim	orr	$t2,$t2,$i3,lsl#8
505238384Sjkim	eor	$t2,$t2,$t1
506238384Sjkim	eor	$s0,$s0,$t2			@ rk[4]=rk[0]^...
507238384Sjkim	eor	$s1,$s1,$s0			@ rk[5]=rk[1]^rk[4]
508238384Sjkim	str	$s0,[$key],#16
509238384Sjkim	eor	$s2,$s2,$s1			@ rk[6]=rk[2]^rk[5]
510238384Sjkim	str	$s1,[$key,#-12]
511238384Sjkim	eor	$s3,$s3,$s2			@ rk[7]=rk[3]^rk[6]
512238384Sjkim	str	$s2,[$key,#-8]
513238384Sjkim	subs	$rounds,$rounds,#1
514238384Sjkim	str	$s3,[$key,#-4]
515238384Sjkim	bne	.L128_loop
516238384Sjkim	sub	r2,$key,#176
517238384Sjkim	b	.Ldone
518238384Sjkim
519238384Sjkim.Lnot128:
520238384Sjkim#if __ARM_ARCH__<7
521238384Sjkim	ldrb	$i2,[$rounds,#19]
522238384Sjkim	ldrb	$t1,[$rounds,#18]
523238384Sjkim	ldrb	$t2,[$rounds,#17]
524238384Sjkim	ldrb	$t3,[$rounds,#16]
525238384Sjkim	orr	$i2,$i2,$t1,lsl#8
526238384Sjkim	ldrb	$i3,[$rounds,#23]
527238384Sjkim	orr	$i2,$i2,$t2,lsl#16
528238384Sjkim	ldrb	$t1,[$rounds,#22]
529238384Sjkim	orr	$i2,$i2,$t3,lsl#24
530238384Sjkim	ldrb	$t2,[$rounds,#21]
531238384Sjkim	ldrb	$t3,[$rounds,#20]
532238384Sjkim	orr	$i3,$i3,$t1,lsl#8
533238384Sjkim	orr	$i3,$i3,$t2,lsl#16
534238384Sjkim	str	$i2,[$key],#8
535238384Sjkim	orr	$i3,$i3,$t3,lsl#24
536238384Sjkim	str	$i3,[$key,#-4]
537238384Sjkim#else
538238384Sjkim	ldr	$i2,[$rounds,#16]
539238384Sjkim	ldr	$i3,[$rounds,#20]
540238384Sjkim#ifdef __ARMEL__
541238384Sjkim	rev	$i2,$i2
542238384Sjkim	rev	$i3,$i3
543238384Sjkim#endif
544238384Sjkim	str	$i2,[$key],#8
545238384Sjkim	str	$i3,[$key,#-4]
546238384Sjkim#endif
547238384Sjkim
548238384Sjkim	teq	lr,#192
549238384Sjkim	bne	.Lnot192
550238384Sjkim	mov	$rounds,#12
551238384Sjkim	str	$rounds,[$key,#240-24]
552238384Sjkim	add	$t3,$tbl,#256			@ rcon
553238384Sjkim	mov	lr,#255
554238384Sjkim	mov	$rounds,#8
555238384Sjkim
556238384Sjkim.L192_loop:
557238384Sjkim	and	$t2,lr,$i3,lsr#24
558238384Sjkim	and	$i1,lr,$i3,lsr#16
559238384Sjkim	ldrb	$t2,[$tbl,$t2]
560238384Sjkim	and	$i2,lr,$i3,lsr#8
561238384Sjkim	ldrb	$i1,[$tbl,$i1]
562238384Sjkim	and	$i3,lr,$i3
563238384Sjkim	ldrb	$i2,[$tbl,$i2]
564238384Sjkim	orr	$t2,$t2,$i1,lsl#24
565238384Sjkim	ldrb	$i3,[$tbl,$i3]
566238384Sjkim	orr	$t2,$t2,$i2,lsl#16
567238384Sjkim	ldr	$t1,[$t3],#4			@ rcon[i++]
568238384Sjkim	orr	$t2,$t2,$i3,lsl#8
569238384Sjkim	eor	$i3,$t2,$t1
570238384Sjkim	eor	$s0,$s0,$i3			@ rk[6]=rk[0]^...
571238384Sjkim	eor	$s1,$s1,$s0			@ rk[7]=rk[1]^rk[6]
572238384Sjkim	str	$s0,[$key],#24
573238384Sjkim	eor	$s2,$s2,$s1			@ rk[8]=rk[2]^rk[7]
574238384Sjkim	str	$s1,[$key,#-20]
575238384Sjkim	eor	$s3,$s3,$s2			@ rk[9]=rk[3]^rk[8]
576238384Sjkim	str	$s2,[$key,#-16]
577238384Sjkim	subs	$rounds,$rounds,#1
578238384Sjkim	str	$s3,[$key,#-12]
579238384Sjkim	subeq	r2,$key,#216
580238384Sjkim	beq	.Ldone
581238384Sjkim
582238384Sjkim	ldr	$i1,[$key,#-32]
583238384Sjkim	ldr	$i2,[$key,#-28]
584238384Sjkim	eor	$i1,$i1,$s3			@ rk[10]=rk[4]^rk[9]
585238384Sjkim	eor	$i3,$i2,$i1			@ rk[11]=rk[5]^rk[10]
586238384Sjkim	str	$i1,[$key,#-8]
587238384Sjkim	str	$i3,[$key,#-4]
588238384Sjkim	b	.L192_loop
589238384Sjkim
590238384Sjkim.Lnot192:
591238384Sjkim#if __ARM_ARCH__<7
592238384Sjkim	ldrb	$i2,[$rounds,#27]
593238384Sjkim	ldrb	$t1,[$rounds,#26]
594238384Sjkim	ldrb	$t2,[$rounds,#25]
595238384Sjkim	ldrb	$t3,[$rounds,#24]
596238384Sjkim	orr	$i2,$i2,$t1,lsl#8
597238384Sjkim	ldrb	$i3,[$rounds,#31]
598238384Sjkim	orr	$i2,$i2,$t2,lsl#16
599238384Sjkim	ldrb	$t1,[$rounds,#30]
600238384Sjkim	orr	$i2,$i2,$t3,lsl#24
601238384Sjkim	ldrb	$t2,[$rounds,#29]
602238384Sjkim	ldrb	$t3,[$rounds,#28]
603238384Sjkim	orr	$i3,$i3,$t1,lsl#8
604238384Sjkim	orr	$i3,$i3,$t2,lsl#16
605238384Sjkim	str	$i2,[$key],#8
606238384Sjkim	orr	$i3,$i3,$t3,lsl#24
607238384Sjkim	str	$i3,[$key,#-4]
608238384Sjkim#else
609238384Sjkim	ldr	$i2,[$rounds,#24]
610238384Sjkim	ldr	$i3,[$rounds,#28]
611238384Sjkim#ifdef __ARMEL__
612238384Sjkim	rev	$i2,$i2
613238384Sjkim	rev	$i3,$i3
614238384Sjkim#endif
615238384Sjkim	str	$i2,[$key],#8
616238384Sjkim	str	$i3,[$key,#-4]
617238384Sjkim#endif
618238384Sjkim
619238384Sjkim	mov	$rounds,#14
620238384Sjkim	str	$rounds,[$key,#240-32]
621238384Sjkim	add	$t3,$tbl,#256			@ rcon
622238384Sjkim	mov	lr,#255
623238384Sjkim	mov	$rounds,#7
624238384Sjkim
625238384Sjkim.L256_loop:
626238384Sjkim	and	$t2,lr,$i3,lsr#24
627238384Sjkim	and	$i1,lr,$i3,lsr#16
628238384Sjkim	ldrb	$t2,[$tbl,$t2]
629238384Sjkim	and	$i2,lr,$i3,lsr#8
630238384Sjkim	ldrb	$i1,[$tbl,$i1]
631238384Sjkim	and	$i3,lr,$i3
632238384Sjkim	ldrb	$i2,[$tbl,$i2]
633238384Sjkim	orr	$t2,$t2,$i1,lsl#24
634238384Sjkim	ldrb	$i3,[$tbl,$i3]
635238384Sjkim	orr	$t2,$t2,$i2,lsl#16
636238384Sjkim	ldr	$t1,[$t3],#4			@ rcon[i++]
637238384Sjkim	orr	$t2,$t2,$i3,lsl#8
638238384Sjkim	eor	$i3,$t2,$t1
639238384Sjkim	eor	$s0,$s0,$i3			@ rk[8]=rk[0]^...
640238384Sjkim	eor	$s1,$s1,$s0			@ rk[9]=rk[1]^rk[8]
641238384Sjkim	str	$s0,[$key],#32
642238384Sjkim	eor	$s2,$s2,$s1			@ rk[10]=rk[2]^rk[9]
643238384Sjkim	str	$s1,[$key,#-28]
644238384Sjkim	eor	$s3,$s3,$s2			@ rk[11]=rk[3]^rk[10]
645238384Sjkim	str	$s2,[$key,#-24]
646238384Sjkim	subs	$rounds,$rounds,#1
647238384Sjkim	str	$s3,[$key,#-20]
648238384Sjkim	subeq	r2,$key,#256
649238384Sjkim	beq	.Ldone
650238384Sjkim
651238384Sjkim	and	$t2,lr,$s3
652238384Sjkim	and	$i1,lr,$s3,lsr#8
653238384Sjkim	ldrb	$t2,[$tbl,$t2]
654238384Sjkim	and	$i2,lr,$s3,lsr#16
655238384Sjkim	ldrb	$i1,[$tbl,$i1]
656238384Sjkim	and	$i3,lr,$s3,lsr#24
657238384Sjkim	ldrb	$i2,[$tbl,$i2]
658238384Sjkim	orr	$t2,$t2,$i1,lsl#8
659238384Sjkim	ldrb	$i3,[$tbl,$i3]
660238384Sjkim	orr	$t2,$t2,$i2,lsl#16
661238384Sjkim	ldr	$t1,[$key,#-48]
662238384Sjkim	orr	$t2,$t2,$i3,lsl#24
663238384Sjkim
664238384Sjkim	ldr	$i1,[$key,#-44]
665238384Sjkim	ldr	$i2,[$key,#-40]
666238384Sjkim	eor	$t1,$t1,$t2			@ rk[12]=rk[4]^...
667238384Sjkim	ldr	$i3,[$key,#-36]
668238384Sjkim	eor	$i1,$i1,$t1			@ rk[13]=rk[5]^rk[12]
669238384Sjkim	str	$t1,[$key,#-16]
670238384Sjkim	eor	$i2,$i2,$i1			@ rk[14]=rk[6]^rk[13]
671238384Sjkim	str	$i1,[$key,#-12]
672238384Sjkim	eor	$i3,$i3,$i2			@ rk[15]=rk[7]^rk[14]
673238384Sjkim	str	$i2,[$key,#-8]
674238384Sjkim	str	$i3,[$key,#-4]
675238384Sjkim	b	.L256_loop
676238384Sjkim
677238384Sjkim.Ldone:	mov	r0,#0
678238384Sjkim	ldmia   sp!,{r4-r12,lr}
679238384Sjkim.Labrt:	tst	lr,#1
680238384Sjkim	moveq	pc,lr			@ be binary compatible with V4, yet
681238384Sjkim	bx	lr			@ interoperable with Thumb ISA:-)
682238384Sjkim.size	private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
683238384Sjkim
684238384Sjkim.global private_AES_set_decrypt_key
685238384Sjkim.type   private_AES_set_decrypt_key,%function
686238384Sjkim.align	5
687238384Sjkimprivate_AES_set_decrypt_key:
688238384Sjkim	str	lr,[sp,#-4]!            @ push lr
689238384Sjkim	bl	_armv4_AES_set_encrypt_key
690238384Sjkim	teq	r0,#0
691238384Sjkim	ldrne	lr,[sp],#4              @ pop lr
692238384Sjkim	bne	.Labrt
693238384Sjkim
694238384Sjkim	stmdb   sp!,{r4-r12}
695238384Sjkim
696238384Sjkim	ldr	$rounds,[r2,#240]	@ AES_set_encrypt_key preserves r2,
697238384Sjkim	mov	$key,r2			@ which is AES_KEY *key
698238384Sjkim	mov	$i1,r2
699238384Sjkim	add	$i2,r2,$rounds,lsl#4
700238384Sjkim
701238384Sjkim.Linv:	ldr	$s0,[$i1]
702238384Sjkim	ldr	$s1,[$i1,#4]
703238384Sjkim	ldr	$s2,[$i1,#8]
704238384Sjkim	ldr	$s3,[$i1,#12]
705238384Sjkim	ldr	$t1,[$i2]
706238384Sjkim	ldr	$t2,[$i2,#4]
707238384Sjkim	ldr	$t3,[$i2,#8]
708238384Sjkim	ldr	$i3,[$i2,#12]
709238384Sjkim	str	$s0,[$i2],#-16
710238384Sjkim	str	$s1,[$i2,#16+4]
711238384Sjkim	str	$s2,[$i2,#16+8]
712238384Sjkim	str	$s3,[$i2,#16+12]
713238384Sjkim	str	$t1,[$i1],#16
714238384Sjkim	str	$t2,[$i1,#-12]
715238384Sjkim	str	$t3,[$i1,#-8]
716238384Sjkim	str	$i3,[$i1,#-4]
717238384Sjkim	teq	$i1,$i2
718238384Sjkim	bne	.Linv
719238384Sjkim___
720238384Sjkim$mask80=$i1;
721238384Sjkim$mask1b=$i2;
722238384Sjkim$mask7f=$i3;
723238384Sjkim$code.=<<___;
724238384Sjkim	ldr	$s0,[$key,#16]!		@ prefetch tp1
725238384Sjkim	mov	$mask80,#0x80
726238384Sjkim	mov	$mask1b,#0x1b
727238384Sjkim	orr	$mask80,$mask80,#0x8000
728238384Sjkim	orr	$mask1b,$mask1b,#0x1b00
729238384Sjkim	orr	$mask80,$mask80,$mask80,lsl#16
730238384Sjkim	orr	$mask1b,$mask1b,$mask1b,lsl#16
731238384Sjkim	sub	$rounds,$rounds,#1
732238384Sjkim	mvn	$mask7f,$mask80
733238384Sjkim	mov	$rounds,$rounds,lsl#2	@ (rounds-1)*4
734238384Sjkim
735238384Sjkim.Lmix:	and	$t1,$s0,$mask80
736238384Sjkim	and	$s1,$s0,$mask7f
737238384Sjkim	sub	$t1,$t1,$t1,lsr#7
738238384Sjkim	and	$t1,$t1,$mask1b
739238384Sjkim	eor	$s1,$t1,$s1,lsl#1	@ tp2
740238384Sjkim
741238384Sjkim	and	$t1,$s1,$mask80
742238384Sjkim	and	$s2,$s1,$mask7f
743238384Sjkim	sub	$t1,$t1,$t1,lsr#7
744238384Sjkim	and	$t1,$t1,$mask1b
745238384Sjkim	eor	$s2,$t1,$s2,lsl#1	@ tp4
746238384Sjkim
747238384Sjkim	and	$t1,$s2,$mask80
748238384Sjkim	and	$s3,$s2,$mask7f
749238384Sjkim	sub	$t1,$t1,$t1,lsr#7
750238384Sjkim	and	$t1,$t1,$mask1b
751238384Sjkim	eor	$s3,$t1,$s3,lsl#1	@ tp8
752238384Sjkim
753238384Sjkim	eor	$t1,$s1,$s2
754238384Sjkim	eor	$t2,$s0,$s3		@ tp9
755238384Sjkim	eor	$t1,$t1,$s3		@ tpe
756238384Sjkim	eor	$t1,$t1,$s1,ror#24
757238384Sjkim	eor	$t1,$t1,$t2,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
758238384Sjkim	eor	$t1,$t1,$s2,ror#16
759238384Sjkim	eor	$t1,$t1,$t2,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
760238384Sjkim	eor	$t1,$t1,$t2,ror#8	@ ^= ROTATE(tp9,24)
761238384Sjkim
762238384Sjkim	ldr	$s0,[$key,#4]		@ prefetch tp1
763238384Sjkim	str	$t1,[$key],#4
764238384Sjkim	subs	$rounds,$rounds,#1
765238384Sjkim	bne	.Lmix
766238384Sjkim
767238384Sjkim	mov	r0,#0
768238384Sjkim#if __ARM_ARCH__>=5
769238384Sjkim	ldmia	sp!,{r4-r12,pc}
770238384Sjkim#else
771238384Sjkim	ldmia   sp!,{r4-r12,lr}
772238384Sjkim	tst	lr,#1
773238384Sjkim	moveq	pc,lr			@ be binary compatible with V4, yet
774238384Sjkim	bx	lr			@ interoperable with Thumb ISA:-)
775238384Sjkim#endif
776238384Sjkim.size	private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
777238384Sjkim
778238384Sjkim.type	AES_Td,%object
779238384Sjkim.align	5
780238384SjkimAES_Td:
781238384Sjkim.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
782238384Sjkim.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
783238384Sjkim.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
784238384Sjkim.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
785238384Sjkim.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
786238384Sjkim.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
787238384Sjkim.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
788238384Sjkim.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
789238384Sjkim.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
790238384Sjkim.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
791238384Sjkim.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
792238384Sjkim.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
793238384Sjkim.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
794238384Sjkim.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
795238384Sjkim.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
796238384Sjkim.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
797238384Sjkim.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
798238384Sjkim.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
799238384Sjkim.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
800238384Sjkim.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
801238384Sjkim.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
802238384Sjkim.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
803238384Sjkim.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
804238384Sjkim.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
805238384Sjkim.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
806238384Sjkim.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
807238384Sjkim.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
808238384Sjkim.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
809238384Sjkim.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
810238384Sjkim.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
811238384Sjkim.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
812238384Sjkim.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
813238384Sjkim.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
814238384Sjkim.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
815238384Sjkim.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
816238384Sjkim.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
817238384Sjkim.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
818238384Sjkim.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
819238384Sjkim.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
820238384Sjkim.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
821238384Sjkim.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
822238384Sjkim.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
823238384Sjkim.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
824238384Sjkim.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
825238384Sjkim.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
826238384Sjkim.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
827238384Sjkim.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
828238384Sjkim.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
829238384Sjkim.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
830238384Sjkim.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
831238384Sjkim.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
832238384Sjkim.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
833238384Sjkim.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
834238384Sjkim.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
835238384Sjkim.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
836238384Sjkim.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
837238384Sjkim.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
838238384Sjkim.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
839238384Sjkim.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
840238384Sjkim.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
841238384Sjkim.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
842238384Sjkim.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
843238384Sjkim.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
844238384Sjkim.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
845238384Sjkim@ Td4[256]
846238384Sjkim.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
847238384Sjkim.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
848238384Sjkim.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
849238384Sjkim.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
850238384Sjkim.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
851238384Sjkim.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
852238384Sjkim.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
853238384Sjkim.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
854238384Sjkim.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
855238384Sjkim.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
856238384Sjkim.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
857238384Sjkim.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
858238384Sjkim.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
859238384Sjkim.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
860238384Sjkim.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
861238384Sjkim.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
862238384Sjkim.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
863238384Sjkim.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
864238384Sjkim.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
865238384Sjkim.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
866238384Sjkim.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
867238384Sjkim.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
868238384Sjkim.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
869238384Sjkim.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
870238384Sjkim.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
871238384Sjkim.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
872238384Sjkim.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
873238384Sjkim.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
874238384Sjkim.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
875238384Sjkim.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
876238384Sjkim.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
877238384Sjkim.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
878238384Sjkim.size	AES_Td,.-AES_Td
879238384Sjkim
880238384Sjkim@ void AES_decrypt(const unsigned char *in, unsigned char *out,
881238384Sjkim@ 		 const AES_KEY *key) {
882238384Sjkim.global AES_decrypt
883238384Sjkim.type   AES_decrypt,%function
884238384Sjkim.align	5
885238384SjkimAES_decrypt:
886238384Sjkim	sub	r3,pc,#8		@ AES_decrypt
887238384Sjkim	stmdb   sp!,{r1,r4-r12,lr}
888238384Sjkim	mov	$rounds,r0		@ inp
889238384Sjkim	mov	$key,r2
890238384Sjkim	sub	$tbl,r3,#AES_decrypt-AES_Td		@ Td
891238384Sjkim#if __ARM_ARCH__<7
892238384Sjkim	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
893238384Sjkim	ldrb	$t1,[$rounds,#2]	@ manner...
894238384Sjkim	ldrb	$t2,[$rounds,#1]
895238384Sjkim	ldrb	$t3,[$rounds,#0]
896238384Sjkim	orr	$s0,$s0,$t1,lsl#8
897238384Sjkim	ldrb	$s1,[$rounds,#7]
898238384Sjkim	orr	$s0,$s0,$t2,lsl#16
899238384Sjkim	ldrb	$t1,[$rounds,#6]
900238384Sjkim	orr	$s0,$s0,$t3,lsl#24
901238384Sjkim	ldrb	$t2,[$rounds,#5]
902238384Sjkim	ldrb	$t3,[$rounds,#4]
903238384Sjkim	orr	$s1,$s1,$t1,lsl#8
904238384Sjkim	ldrb	$s2,[$rounds,#11]
905238384Sjkim	orr	$s1,$s1,$t2,lsl#16
906238384Sjkim	ldrb	$t1,[$rounds,#10]
907238384Sjkim	orr	$s1,$s1,$t3,lsl#24
908238384Sjkim	ldrb	$t2,[$rounds,#9]
909238384Sjkim	ldrb	$t3,[$rounds,#8]
910238384Sjkim	orr	$s2,$s2,$t1,lsl#8
911238384Sjkim	ldrb	$s3,[$rounds,#15]
912238384Sjkim	orr	$s2,$s2,$t2,lsl#16
913238384Sjkim	ldrb	$t1,[$rounds,#14]
914238384Sjkim	orr	$s2,$s2,$t3,lsl#24
915238384Sjkim	ldrb	$t2,[$rounds,#13]
916238384Sjkim	ldrb	$t3,[$rounds,#12]
917238384Sjkim	orr	$s3,$s3,$t1,lsl#8
918238384Sjkim	orr	$s3,$s3,$t2,lsl#16
919238384Sjkim	orr	$s3,$s3,$t3,lsl#24
920238384Sjkim#else
921238384Sjkim	ldr	$s0,[$rounds,#0]
922238384Sjkim	ldr	$s1,[$rounds,#4]
923238384Sjkim	ldr	$s2,[$rounds,#8]
924238384Sjkim	ldr	$s3,[$rounds,#12]
925238384Sjkim#ifdef __ARMEL__
926238384Sjkim	rev	$s0,$s0
927238384Sjkim	rev	$s1,$s1
928238384Sjkim	rev	$s2,$s2
929238384Sjkim	rev	$s3,$s3
930238384Sjkim#endif
931238384Sjkim#endif
932238384Sjkim	bl	_armv4_AES_decrypt
933238384Sjkim
934238384Sjkim	ldr	$rounds,[sp],#4		@ pop out
935238384Sjkim#if __ARM_ARCH__>=7
936238384Sjkim#ifdef __ARMEL__
937238384Sjkim	rev	$s0,$s0
938238384Sjkim	rev	$s1,$s1
939238384Sjkim	rev	$s2,$s2
940238384Sjkim	rev	$s3,$s3
941238384Sjkim#endif
942238384Sjkim	str	$s0,[$rounds,#0]
943238384Sjkim	str	$s1,[$rounds,#4]
944238384Sjkim	str	$s2,[$rounds,#8]
945238384Sjkim	str	$s3,[$rounds,#12]
946238384Sjkim#else
947238384Sjkim	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
948238384Sjkim	mov	$t2,$s0,lsr#16		@ manner...
949238384Sjkim	mov	$t3,$s0,lsr#8
950238384Sjkim	strb	$t1,[$rounds,#0]
951238384Sjkim	strb	$t2,[$rounds,#1]
952238384Sjkim	mov	$t1,$s1,lsr#24
953238384Sjkim	strb	$t3,[$rounds,#2]
954238384Sjkim	mov	$t2,$s1,lsr#16
955238384Sjkim	strb	$s0,[$rounds,#3]
956238384Sjkim	mov	$t3,$s1,lsr#8
957238384Sjkim	strb	$t1,[$rounds,#4]
958238384Sjkim	strb	$t2,[$rounds,#5]
959238384Sjkim	mov	$t1,$s2,lsr#24
960238384Sjkim	strb	$t3,[$rounds,#6]
961238384Sjkim	mov	$t2,$s2,lsr#16
962238384Sjkim	strb	$s1,[$rounds,#7]
963238384Sjkim	mov	$t3,$s2,lsr#8
964238384Sjkim	strb	$t1,[$rounds,#8]
965238384Sjkim	strb	$t2,[$rounds,#9]
966238384Sjkim	mov	$t1,$s3,lsr#24
967238384Sjkim	strb	$t3,[$rounds,#10]
968238384Sjkim	mov	$t2,$s3,lsr#16
969238384Sjkim	strb	$s2,[$rounds,#11]
970238384Sjkim	mov	$t3,$s3,lsr#8
971238384Sjkim	strb	$t1,[$rounds,#12]
972238384Sjkim	strb	$t2,[$rounds,#13]
973238384Sjkim	strb	$t3,[$rounds,#14]
974238384Sjkim	strb	$s3,[$rounds,#15]
975238384Sjkim#endif
976238384Sjkim#if __ARM_ARCH__>=5
977238384Sjkim	ldmia	sp!,{r4-r12,pc}
978238384Sjkim#else
979238384Sjkim	ldmia   sp!,{r4-r12,lr}
980238384Sjkim	tst	lr,#1
981238384Sjkim	moveq	pc,lr			@ be binary compatible with V4, yet
982238384Sjkim	bx	lr			@ interoperable with Thumb ISA:-)
983238384Sjkim#endif
984238384Sjkim.size	AES_decrypt,.-AES_decrypt
985238384Sjkim
986238384Sjkim.type   _armv4_AES_decrypt,%function
987238384Sjkim.align	2
988238384Sjkim_armv4_AES_decrypt:
989238384Sjkim	str	lr,[sp,#-4]!		@ push lr
990238384Sjkim	ldmia	$key!,{$t1-$i1}
991238384Sjkim	eor	$s0,$s0,$t1
992238384Sjkim	ldr	$rounds,[$key,#240-16]
993238384Sjkim	eor	$s1,$s1,$t2
994238384Sjkim	eor	$s2,$s2,$t3
995238384Sjkim	eor	$s3,$s3,$i1
996238384Sjkim	sub	$rounds,$rounds,#1
997238384Sjkim	mov	lr,#255
998238384Sjkim
999238384Sjkim	and	$i1,lr,$s0,lsr#16
1000238384Sjkim	and	$i2,lr,$s0,lsr#8
1001238384Sjkim	and	$i3,lr,$s0
1002238384Sjkim	mov	$s0,$s0,lsr#24
1003238384Sjkim.Ldec_loop:
1004238384Sjkim	ldr	$t1,[$tbl,$i1,lsl#2]	@ Td1[s0>>16]
1005238384Sjkim	and	$i1,lr,$s1		@ i0
1006238384Sjkim	ldr	$t2,[$tbl,$i2,lsl#2]	@ Td2[s0>>8]
1007238384Sjkim	and	$i2,lr,$s1,lsr#16
1008238384Sjkim	ldr	$t3,[$tbl,$i3,lsl#2]	@ Td3[s0>>0]
1009238384Sjkim	and	$i3,lr,$s1,lsr#8
1010238384Sjkim	ldr	$s0,[$tbl,$s0,lsl#2]	@ Td0[s0>>24]
1011238384Sjkim	mov	$s1,$s1,lsr#24
1012238384Sjkim
1013238384Sjkim	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td3[s1>>0]
1014238384Sjkim	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td1[s1>>16]
1015238384Sjkim	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td2[s1>>8]
1016238384Sjkim	eor	$s0,$s0,$i1,ror#24
1017238384Sjkim	ldr	$s1,[$tbl,$s1,lsl#2]	@ Td0[s1>>24]
1018238384Sjkim	and	$i1,lr,$s2,lsr#8	@ i0
1019238384Sjkim	eor	$t2,$i2,$t2,ror#8
1020238384Sjkim	and	$i2,lr,$s2		@ i1
1021238384Sjkim	eor	$t3,$i3,$t3,ror#8
1022238384Sjkim	and	$i3,lr,$s2,lsr#16
1023238384Sjkim	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td2[s2>>8]
1024238384Sjkim	eor	$s1,$s1,$t1,ror#8
1025238384Sjkim	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td3[s2>>0]
1026238384Sjkim	mov	$s2,$s2,lsr#24
1027238384Sjkim
1028238384Sjkim	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td1[s2>>16]
1029238384Sjkim	eor	$s0,$s0,$i1,ror#16
1030238384Sjkim	ldr	$s2,[$tbl,$s2,lsl#2]	@ Td0[s2>>24]
1031238384Sjkim	and	$i1,lr,$s3,lsr#16	@ i0
1032238384Sjkim	eor	$s1,$s1,$i2,ror#24
1033238384Sjkim	and	$i2,lr,$s3,lsr#8	@ i1
1034238384Sjkim	eor	$t3,$i3,$t3,ror#8
1035238384Sjkim	and	$i3,lr,$s3		@ i2
1036238384Sjkim	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td1[s3>>16]
1037238384Sjkim	eor	$s2,$s2,$t2,ror#8
1038238384Sjkim	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td2[s3>>8]
1039238384Sjkim	mov	$s3,$s3,lsr#24
1040238384Sjkim
1041238384Sjkim	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td3[s3>>0]
1042238384Sjkim	eor	$s0,$s0,$i1,ror#8
1043238384Sjkim	ldr	$i1,[$key],#16
1044238384Sjkim	eor	$s1,$s1,$i2,ror#16
1045238384Sjkim	ldr	$s3,[$tbl,$s3,lsl#2]	@ Td0[s3>>24]
1046238384Sjkim	eor	$s2,$s2,$i3,ror#24
1047238384Sjkim
1048238384Sjkim	ldr	$t1,[$key,#-12]
1049238384Sjkim	eor	$s0,$s0,$i1
1050238384Sjkim	ldr	$t2,[$key,#-8]
1051238384Sjkim	eor	$s3,$s3,$t3,ror#8
1052238384Sjkim	ldr	$t3,[$key,#-4]
1053238384Sjkim	and	$i1,lr,$s0,lsr#16
1054238384Sjkim	eor	$s1,$s1,$t1
1055238384Sjkim	and	$i2,lr,$s0,lsr#8
1056238384Sjkim	eor	$s2,$s2,$t2
1057238384Sjkim	and	$i3,lr,$s0
1058238384Sjkim	eor	$s3,$s3,$t3
1059238384Sjkim	mov	$s0,$s0,lsr#24
1060238384Sjkim
1061238384Sjkim	subs	$rounds,$rounds,#1
1062238384Sjkim	bne	.Ldec_loop
1063238384Sjkim
1064238384Sjkim	add	$tbl,$tbl,#1024
1065238384Sjkim
1066238384Sjkim	ldr	$t2,[$tbl,#0]		@ prefetch Td4
1067238384Sjkim	ldr	$t3,[$tbl,#32]
1068238384Sjkim	ldr	$t1,[$tbl,#64]
1069238384Sjkim	ldr	$t2,[$tbl,#96]
1070238384Sjkim	ldr	$t3,[$tbl,#128]
1071238384Sjkim	ldr	$t1,[$tbl,#160]
1072238384Sjkim	ldr	$t2,[$tbl,#192]
1073238384Sjkim	ldr	$t3,[$tbl,#224]
1074238384Sjkim
1075238384Sjkim	ldrb	$s0,[$tbl,$s0]		@ Td4[s0>>24]
1076238384Sjkim	ldrb	$t1,[$tbl,$i1]		@ Td4[s0>>16]
1077238384Sjkim	and	$i1,lr,$s1		@ i0
1078238384Sjkim	ldrb	$t2,[$tbl,$i2]		@ Td4[s0>>8]
1079238384Sjkim	and	$i2,lr,$s1,lsr#16
1080238384Sjkim	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]
1081238384Sjkim	and	$i3,lr,$s1,lsr#8
1082238384Sjkim
1083238384Sjkim	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0]
1084238384Sjkim	ldrb	$s1,[$tbl,$s1,lsr#24]	@ Td4[s1>>24]
1085238384Sjkim	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]
1086238384Sjkim	eor	$s0,$i1,$s0,lsl#24
1087238384Sjkim	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8]
1088238384Sjkim	eor	$s1,$t1,$s1,lsl#8
1089238384Sjkim	and	$i1,lr,$s2,lsr#8	@ i0
1090238384Sjkim	eor	$t2,$t2,$i2,lsl#8
1091238384Sjkim	and	$i2,lr,$s2		@ i1
1092238384Sjkim	ldrb	$i1,[$tbl,$i1]		@ Td4[s2>>8]
1093238384Sjkim	eor	$t3,$t3,$i3,lsl#8
1094238384Sjkim	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]
1095238384Sjkim	and	$i3,lr,$s2,lsr#16
1096238384Sjkim
1097238384Sjkim	ldrb	$s2,[$tbl,$s2,lsr#24]	@ Td4[s2>>24]
1098238384Sjkim	eor	$s0,$s0,$i1,lsl#8
1099238384Sjkim	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]
1100238384Sjkim	eor	$s1,$i2,$s1,lsl#16
1101238384Sjkim	and	$i1,lr,$s3,lsr#16	@ i0
1102238384Sjkim	eor	$s2,$t2,$s2,lsl#16
1103238384Sjkim	and	$i2,lr,$s3,lsr#8	@ i1
1104238384Sjkim	ldrb	$i1,[$tbl,$i1]		@ Td4[s3>>16]
1105238384Sjkim	eor	$t3,$t3,$i3,lsl#16
1106238384Sjkim	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]
1107238384Sjkim	and	$i3,lr,$s3		@ i2
1108238384Sjkim
1109238384Sjkim	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0]
1110238384Sjkim	ldrb	$s3,[$tbl,$s3,lsr#24]	@ Td4[s3>>24]
1111238384Sjkim	eor	$s0,$s0,$i1,lsl#16
1112238384Sjkim	ldr	$i1,[$key,#0]
1113238384Sjkim	eor	$s1,$s1,$i2,lsl#8
1114238384Sjkim	ldr	$t1,[$key,#4]
1115238384Sjkim	eor	$s2,$i3,$s2,lsl#8
1116238384Sjkim	ldr	$t2,[$key,#8]
1117238384Sjkim	eor	$s3,$t3,$s3,lsl#24
1118238384Sjkim	ldr	$t3,[$key,#12]
1119238384Sjkim
1120238384Sjkim	eor	$s0,$s0,$i1
1121238384Sjkim	eor	$s1,$s1,$t1
1122238384Sjkim	eor	$s2,$s2,$t2
1123238384Sjkim	eor	$s3,$s3,$t3
1124238384Sjkim
1125238384Sjkim	sub	$tbl,$tbl,#1024
1126238384Sjkim	ldr	pc,[sp],#4		@ pop and return
1127238384Sjkim.size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
1128238384Sjkim.asciz	"AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
1129238384Sjkim.align	2
1130238384Sjkim___
1131238384Sjkim
1132238384Sjkim$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
1133238384Sjkimprint $code;
1134238384Sjkimclose STDOUT;	# enforce flush
1135