vpaes-x86.S revision 299389
1	# $FreeBSD: head/secure/lib/libcrypto/i386/vpaes-x86.S 299389 2016-05-10 20:31:09Z jkim $
2#ifdef PIC
3.file	"vpaes-x86.S"
4.text
5.align	64
6.L_vpaes_consts:
7.long	218628480,235210255,168496130,67568393
8.long	252381056,17041926,33884169,51187212
9.long	252645135,252645135,252645135,252645135
10.long	1512730624,3266504856,1377990664,3401244816
11.long	830229760,1275146365,2969422977,3447763452
12.long	3411033600,2979783055,338359620,2782886510
13.long	4209124096,907596821,221174255,1006095553
14.long	191964160,3799684038,3164090317,1589111125
15.long	182528256,1777043520,2877432650,3265356744
16.long	1874708224,3503451415,3305285752,363511674
17.long	1606117888,3487855781,1093350906,2384367825
18.long	197121,67569157,134941193,202313229
19.long	67569157,134941193,202313229,197121
20.long	134941193,202313229,197121,67569157
21.long	202313229,197121,67569157,134941193
22.long	33619971,100992007,168364043,235736079
23.long	235736079,33619971,100992007,168364043
24.long	168364043,235736079,33619971,100992007
25.long	100992007,168364043,235736079,33619971
26.long	50462976,117835012,185207048,252579084
27.long	252314880,51251460,117574920,184942860
28.long	184682752,252054788,50987272,118359308
29.long	118099200,185467140,251790600,50727180
30.long	2946363062,528716217,1300004225,1881839624
31.long	1532713819,1532713819,1532713819,1532713819
32.long	3602276352,4288629033,3737020424,4153884961
33.long	1354558464,32357713,2958822624,3775749553
34.long	1201988352,132424512,1572796698,503232858
35.long	2213177600,1597421020,4103937655,675398315
36.long	2749646592,4273543773,1511898873,121693092
37.long	3040248576,1103263732,2871565598,1608280554
38.long	2236667136,2588920351,482954393,64377734
39.long	3069987328,291237287,2117370568,3650299247
40.long	533321216,3573750986,2572112006,1401264716
41.long	1339849704,2721158661,548607111,3445553514
42.long	2128193280,3054596040,2183486460,1257083700
43.long	655635200,1165381986,3923443150,2344132524
44.long	190078720,256924420,290342170,357187870
45.long	1610966272,2263057382,4103205268,309794674
46.long	2592527872,2233205587,1335446729,3402964816
47.long	3973531904,3225098121,3002836325,1918774430
48.long	3870401024,2102906079,2284471353,4117666579
49.long	617007872,1021508343,366931923,691083277
50.long	2528395776,3491914898,2968704004,1613121270
51.long	3445188352,3247741094,844474987,4093578302
52.long	651481088,1190302358,1689581232,574775300
53.long	4289380608,206939853,2555985458,2489840491
54.long	2130264064,327674451,3566485037,3349835193
55.long	2470714624,316102159,3636825756,3393945945
56.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
57.byte	111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
58.byte	83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
59.byte	114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
60.byte	118,101,114,115,105,116,121,41,0
61.align	64
62.type	_vpaes_preheat,@function
63.align	16
64_vpaes_preheat:
65	addl	(%esp),%ebp
66	movdqa	-48(%ebp),%xmm7
67	movdqa	-16(%ebp),%xmm6
68	ret
69.size	_vpaes_preheat,.-_vpaes_preheat
70.type	_vpaes_encrypt_core,@function
71.align	16
72_vpaes_encrypt_core:
73	movl	$16,%ecx
74	movl	240(%edx),%eax
75	movdqa	%xmm6,%xmm1
76	movdqa	(%ebp),%xmm2
77	pandn	%xmm0,%xmm1
78	pand	%xmm6,%xmm0
79	movdqu	(%edx),%xmm5
80.byte	102,15,56,0,208
81	movdqa	16(%ebp),%xmm0
82	pxor	%xmm5,%xmm2
83	psrld	$4,%xmm1
84	addl	$16,%edx
85.byte	102,15,56,0,193
86	leal	192(%ebp),%ebx
87	pxor	%xmm2,%xmm0
88	jmp	.L000enc_entry
89.align	16
90.L001enc_loop:
91	movdqa	32(%ebp),%xmm4
92	movdqa	48(%ebp),%xmm0
93.byte	102,15,56,0,226
94.byte	102,15,56,0,195
95	pxor	%xmm5,%xmm4
96	movdqa	64(%ebp),%xmm5
97	pxor	%xmm4,%xmm0
98	movdqa	-64(%ebx,%ecx,1),%xmm1
99.byte	102,15,56,0,234
100	movdqa	80(%ebp),%xmm2
101	movdqa	(%ebx,%ecx,1),%xmm4
102.byte	102,15,56,0,211
103	movdqa	%xmm0,%xmm3
104	pxor	%xmm5,%xmm2
105.byte	102,15,56,0,193
106	addl	$16,%edx
107	pxor	%xmm2,%xmm0
108.byte	102,15,56,0,220
109	addl	$16,%ecx
110	pxor	%xmm0,%xmm3
111.byte	102,15,56,0,193
112	andl	$48,%ecx
113	subl	$1,%eax
114	pxor	%xmm3,%xmm0
115.L000enc_entry:
116	movdqa	%xmm6,%xmm1
117	movdqa	-32(%ebp),%xmm5
118	pandn	%xmm0,%xmm1
119	psrld	$4,%xmm1
120	pand	%xmm6,%xmm0
121.byte	102,15,56,0,232
122	movdqa	%xmm7,%xmm3
123	pxor	%xmm1,%xmm0
124.byte	102,15,56,0,217
125	movdqa	%xmm7,%xmm4
126	pxor	%xmm5,%xmm3
127.byte	102,15,56,0,224
128	movdqa	%xmm7,%xmm2
129	pxor	%xmm5,%xmm4
130.byte	102,15,56,0,211
131	movdqa	%xmm7,%xmm3
132	pxor	%xmm0,%xmm2
133.byte	102,15,56,0,220
134	movdqu	(%edx),%xmm5
135	pxor	%xmm1,%xmm3
136	jnz	.L001enc_loop
137	movdqa	96(%ebp),%xmm4
138	movdqa	112(%ebp),%xmm0
139.byte	102,15,56,0,226
140	pxor	%xmm5,%xmm4
141.byte	102,15,56,0,195
142	movdqa	64(%ebx,%ecx,1),%xmm1
143	pxor	%xmm4,%xmm0
144.byte	102,15,56,0,193
145	ret
146.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
147.type	_vpaes_decrypt_core,@function
148.align	16
149_vpaes_decrypt_core:
150	leal	608(%ebp),%ebx
151	movl	240(%edx),%eax
152	movdqa	%xmm6,%xmm1
153	movdqa	-64(%ebx),%xmm2
154	pandn	%xmm0,%xmm1
155	movl	%eax,%ecx
156	psrld	$4,%xmm1
157	movdqu	(%edx),%xmm5
158	shll	$4,%ecx
159	pand	%xmm6,%xmm0
160.byte	102,15,56,0,208
161	movdqa	-48(%ebx),%xmm0
162	xorl	$48,%ecx
163.byte	102,15,56,0,193
164	andl	$48,%ecx
165	pxor	%xmm5,%xmm2
166	movdqa	176(%ebp),%xmm5
167	pxor	%xmm2,%xmm0
168	addl	$16,%edx
169	leal	-352(%ebx,%ecx,1),%ecx
170	jmp	.L002dec_entry
171.align	16
172.L003dec_loop:
173	movdqa	-32(%ebx),%xmm4
174	movdqa	-16(%ebx),%xmm1
175.byte	102,15,56,0,226
176.byte	102,15,56,0,203
177	pxor	%xmm4,%xmm0
178	movdqa	(%ebx),%xmm4
179	pxor	%xmm1,%xmm0
180	movdqa	16(%ebx),%xmm1
181.byte	102,15,56,0,226
182.byte	102,15,56,0,197
183.byte	102,15,56,0,203
184	pxor	%xmm4,%xmm0
185	movdqa	32(%ebx),%xmm4
186	pxor	%xmm1,%xmm0
187	movdqa	48(%ebx),%xmm1
188.byte	102,15,56,0,226
189.byte	102,15,56,0,197
190.byte	102,15,56,0,203
191	pxor	%xmm4,%xmm0
192	movdqa	64(%ebx),%xmm4
193	pxor	%xmm1,%xmm0
194	movdqa	80(%ebx),%xmm1
195.byte	102,15,56,0,226
196.byte	102,15,56,0,197
197.byte	102,15,56,0,203
198	pxor	%xmm4,%xmm0
199	addl	$16,%edx
200.byte	102,15,58,15,237,12
201	pxor	%xmm1,%xmm0
202	subl	$1,%eax
203.L002dec_entry:
204	movdqa	%xmm6,%xmm1
205	movdqa	-32(%ebp),%xmm2
206	pandn	%xmm0,%xmm1
207	pand	%xmm6,%xmm0
208	psrld	$4,%xmm1
209.byte	102,15,56,0,208
210	movdqa	%xmm7,%xmm3
211	pxor	%xmm1,%xmm0
212.byte	102,15,56,0,217
213	movdqa	%xmm7,%xmm4
214	pxor	%xmm2,%xmm3
215.byte	102,15,56,0,224
216	pxor	%xmm2,%xmm4
217	movdqa	%xmm7,%xmm2
218.byte	102,15,56,0,211
219	movdqa	%xmm7,%xmm3
220	pxor	%xmm0,%xmm2
221.byte	102,15,56,0,220
222	movdqu	(%edx),%xmm0
223	pxor	%xmm1,%xmm3
224	jnz	.L003dec_loop
225	movdqa	96(%ebx),%xmm4
226.byte	102,15,56,0,226
227	pxor	%xmm0,%xmm4
228	movdqa	112(%ebx),%xmm0
229	movdqa	(%ecx),%xmm2
230.byte	102,15,56,0,195
231	pxor	%xmm4,%xmm0
232.byte	102,15,56,0,194
233	ret
234.size	_vpaes_decrypt_core,.-_vpaes_decrypt_core
235.type	_vpaes_schedule_core,@function
236.align	16
237_vpaes_schedule_core:
238	addl	(%esp),%ebp
239	movdqu	(%esi),%xmm0
240	movdqa	320(%ebp),%xmm2
241	movdqa	%xmm0,%xmm3
242	leal	(%ebp),%ebx
243	movdqa	%xmm2,4(%esp)
244	call	_vpaes_schedule_transform
245	movdqa	%xmm0,%xmm7
246	testl	%edi,%edi
247	jnz	.L004schedule_am_decrypting
248	movdqu	%xmm0,(%edx)
249	jmp	.L005schedule_go
250.L004schedule_am_decrypting:
251	movdqa	256(%ebp,%ecx,1),%xmm1
252.byte	102,15,56,0,217
253	movdqu	%xmm3,(%edx)
254	xorl	$48,%ecx
255.L005schedule_go:
256	cmpl	$192,%eax
257	ja	.L006schedule_256
258	je	.L007schedule_192
259.L008schedule_128:
260	movl	$10,%eax
261.L009loop_schedule_128:
262	call	_vpaes_schedule_round
263	decl	%eax
264	jz	.L010schedule_mangle_last
265	call	_vpaes_schedule_mangle
266	jmp	.L009loop_schedule_128
267.align	16
268.L007schedule_192:
269	movdqu	8(%esi),%xmm0
270	call	_vpaes_schedule_transform
271	movdqa	%xmm0,%xmm6
272	pxor	%xmm4,%xmm4
273	movhlps	%xmm4,%xmm6
274	movl	$4,%eax
275.L011loop_schedule_192:
276	call	_vpaes_schedule_round
277.byte	102,15,58,15,198,8
278	call	_vpaes_schedule_mangle
279	call	_vpaes_schedule_192_smear
280	call	_vpaes_schedule_mangle
281	call	_vpaes_schedule_round
282	decl	%eax
283	jz	.L010schedule_mangle_last
284	call	_vpaes_schedule_mangle
285	call	_vpaes_schedule_192_smear
286	jmp	.L011loop_schedule_192
287.align	16
288.L006schedule_256:
289	movdqu	16(%esi),%xmm0
290	call	_vpaes_schedule_transform
291	movl	$7,%eax
292.L012loop_schedule_256:
293	call	_vpaes_schedule_mangle
294	movdqa	%xmm0,%xmm6
295	call	_vpaes_schedule_round
296	decl	%eax
297	jz	.L010schedule_mangle_last
298	call	_vpaes_schedule_mangle
299	pshufd	$255,%xmm0,%xmm0
300	movdqa	%xmm7,20(%esp)
301	movdqa	%xmm6,%xmm7
302	call	.L_vpaes_schedule_low_round
303	movdqa	20(%esp),%xmm7
304	jmp	.L012loop_schedule_256
305.align	16
306.L010schedule_mangle_last:
307	leal	384(%ebp),%ebx
308	testl	%edi,%edi
309	jnz	.L013schedule_mangle_last_dec
310	movdqa	256(%ebp,%ecx,1),%xmm1
311.byte	102,15,56,0,193
312	leal	352(%ebp),%ebx
313	addl	$32,%edx
314.L013schedule_mangle_last_dec:
315	addl	$-16,%edx
316	pxor	336(%ebp),%xmm0
317	call	_vpaes_schedule_transform
318	movdqu	%xmm0,(%edx)
319	pxor	%xmm0,%xmm0
320	pxor	%xmm1,%xmm1
321	pxor	%xmm2,%xmm2
322	pxor	%xmm3,%xmm3
323	pxor	%xmm4,%xmm4
324	pxor	%xmm5,%xmm5
325	pxor	%xmm6,%xmm6
326	pxor	%xmm7,%xmm7
327	ret
328.size	_vpaes_schedule_core,.-_vpaes_schedule_core
329.type	_vpaes_schedule_192_smear,@function
330.align	16
331_vpaes_schedule_192_smear:
332	pshufd	$128,%xmm6,%xmm1
333	pshufd	$254,%xmm7,%xmm0
334	pxor	%xmm1,%xmm6
335	pxor	%xmm1,%xmm1
336	pxor	%xmm0,%xmm6
337	movdqa	%xmm6,%xmm0
338	movhlps	%xmm1,%xmm6
339	ret
340.size	_vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
341.type	_vpaes_schedule_round,@function
342.align	16
343_vpaes_schedule_round:
344	movdqa	8(%esp),%xmm2
345	pxor	%xmm1,%xmm1
346.byte	102,15,58,15,202,15
347.byte	102,15,58,15,210,15
348	pxor	%xmm1,%xmm7
349	pshufd	$255,%xmm0,%xmm0
350.byte	102,15,58,15,192,1
351	movdqa	%xmm2,8(%esp)
352.L_vpaes_schedule_low_round:
353	movdqa	%xmm7,%xmm1
354	pslldq	$4,%xmm7
355	pxor	%xmm1,%xmm7
356	movdqa	%xmm7,%xmm1
357	pslldq	$8,%xmm7
358	pxor	%xmm1,%xmm7
359	pxor	336(%ebp),%xmm7
360	movdqa	-16(%ebp),%xmm4
361	movdqa	-48(%ebp),%xmm5
362	movdqa	%xmm4,%xmm1
363	pandn	%xmm0,%xmm1
364	psrld	$4,%xmm1
365	pand	%xmm4,%xmm0
366	movdqa	-32(%ebp),%xmm2
367.byte	102,15,56,0,208
368	pxor	%xmm1,%xmm0
369	movdqa	%xmm5,%xmm3
370.byte	102,15,56,0,217
371	pxor	%xmm2,%xmm3
372	movdqa	%xmm5,%xmm4
373.byte	102,15,56,0,224
374	pxor	%xmm2,%xmm4
375	movdqa	%xmm5,%xmm2
376.byte	102,15,56,0,211
377	pxor	%xmm0,%xmm2
378	movdqa	%xmm5,%xmm3
379.byte	102,15,56,0,220
380	pxor	%xmm1,%xmm3
381	movdqa	32(%ebp),%xmm4
382.byte	102,15,56,0,226
383	movdqa	48(%ebp),%xmm0
384.byte	102,15,56,0,195
385	pxor	%xmm4,%xmm0
386	pxor	%xmm7,%xmm0
387	movdqa	%xmm0,%xmm7
388	ret
389.size	_vpaes_schedule_round,.-_vpaes_schedule_round
390.type	_vpaes_schedule_transform,@function
391.align	16
392_vpaes_schedule_transform:
393	movdqa	-16(%ebp),%xmm2
394	movdqa	%xmm2,%xmm1
395	pandn	%xmm0,%xmm1
396	psrld	$4,%xmm1
397	pand	%xmm2,%xmm0
398	movdqa	(%ebx),%xmm2
399.byte	102,15,56,0,208
400	movdqa	16(%ebx),%xmm0
401.byte	102,15,56,0,193
402	pxor	%xmm2,%xmm0
403	ret
404.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform
405.type	_vpaes_schedule_mangle,@function
406.align	16
407_vpaes_schedule_mangle:
408	movdqa	%xmm0,%xmm4
409	movdqa	128(%ebp),%xmm5
410	testl	%edi,%edi
411	jnz	.L014schedule_mangle_dec
412	addl	$16,%edx
413	pxor	336(%ebp),%xmm4
414.byte	102,15,56,0,229
415	movdqa	%xmm4,%xmm3
416.byte	102,15,56,0,229
417	pxor	%xmm4,%xmm3
418.byte	102,15,56,0,229
419	pxor	%xmm4,%xmm3
420	jmp	.L015schedule_mangle_both
421.align	16
422.L014schedule_mangle_dec:
423	movdqa	-16(%ebp),%xmm2
424	leal	416(%ebp),%esi
425	movdqa	%xmm2,%xmm1
426	pandn	%xmm4,%xmm1
427	psrld	$4,%xmm1
428	pand	%xmm2,%xmm4
429	movdqa	(%esi),%xmm2
430.byte	102,15,56,0,212
431	movdqa	16(%esi),%xmm3
432.byte	102,15,56,0,217
433	pxor	%xmm2,%xmm3
434.byte	102,15,56,0,221
435	movdqa	32(%esi),%xmm2
436.byte	102,15,56,0,212
437	pxor	%xmm3,%xmm2
438	movdqa	48(%esi),%xmm3
439.byte	102,15,56,0,217
440	pxor	%xmm2,%xmm3
441.byte	102,15,56,0,221
442	movdqa	64(%esi),%xmm2
443.byte	102,15,56,0,212
444	pxor	%xmm3,%xmm2
445	movdqa	80(%esi),%xmm3
446.byte	102,15,56,0,217
447	pxor	%xmm2,%xmm3
448.byte	102,15,56,0,221
449	movdqa	96(%esi),%xmm2
450.byte	102,15,56,0,212
451	pxor	%xmm3,%xmm2
452	movdqa	112(%esi),%xmm3
453.byte	102,15,56,0,217
454	pxor	%xmm2,%xmm3
455	addl	$-16,%edx
456.L015schedule_mangle_both:
457	movdqa	256(%ebp,%ecx,1),%xmm1
458.byte	102,15,56,0,217
459	addl	$-16,%ecx
460	andl	$48,%ecx
461	movdqu	%xmm3,(%edx)
462	ret
463.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle
464.globl	vpaes_set_encrypt_key
465.type	vpaes_set_encrypt_key,@function
466.align	16
467vpaes_set_encrypt_key:
468.L_vpaes_set_encrypt_key_begin:
469	pushl	%ebp
470	pushl	%ebx
471	pushl	%esi
472	pushl	%edi
473	movl	20(%esp),%esi
474	leal	-56(%esp),%ebx
475	movl	24(%esp),%eax
476	andl	$-16,%ebx
477	movl	28(%esp),%edx
478	xchgl	%esp,%ebx
479	movl	%ebx,48(%esp)
480	movl	%eax,%ebx
481	shrl	$5,%ebx
482	addl	$5,%ebx
483	movl	%ebx,240(%edx)
484	movl	$48,%ecx
485	movl	$0,%edi
486	leal	.L_vpaes_consts+0x30-.L016pic_point,%ebp
487	call	_vpaes_schedule_core
488.L016pic_point:
489	movl	48(%esp),%esp
490	xorl	%eax,%eax
491	popl	%edi
492	popl	%esi
493	popl	%ebx
494	popl	%ebp
495	ret
496.size	vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin
497.globl	vpaes_set_decrypt_key
498.type	vpaes_set_decrypt_key,@function
499.align	16
500vpaes_set_decrypt_key:
501.L_vpaes_set_decrypt_key_begin:
502	pushl	%ebp
503	pushl	%ebx
504	pushl	%esi
505	pushl	%edi
506	movl	20(%esp),%esi
507	leal	-56(%esp),%ebx
508	movl	24(%esp),%eax
509	andl	$-16,%ebx
510	movl	28(%esp),%edx
511	xchgl	%esp,%ebx
512	movl	%ebx,48(%esp)
513	movl	%eax,%ebx
514	shrl	$5,%ebx
515	addl	$5,%ebx
516	movl	%ebx,240(%edx)
517	shll	$4,%ebx
518	leal	16(%edx,%ebx,1),%edx
519	movl	$1,%edi
520	movl	%eax,%ecx
521	shrl	$1,%ecx
522	andl	$32,%ecx
523	xorl	$32,%ecx
524	leal	.L_vpaes_consts+0x30-.L017pic_point,%ebp
525	call	_vpaes_schedule_core
526.L017pic_point:
527	movl	48(%esp),%esp
528	xorl	%eax,%eax
529	popl	%edi
530	popl	%esi
531	popl	%ebx
532	popl	%ebp
533	ret
534.size	vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin
535.globl	vpaes_encrypt
536.type	vpaes_encrypt,@function
537.align	16
538vpaes_encrypt:
539.L_vpaes_encrypt_begin:
540	pushl	%ebp
541	pushl	%ebx
542	pushl	%esi
543	pushl	%edi
544	leal	.L_vpaes_consts+0x30-.L018pic_point,%ebp
545	call	_vpaes_preheat
546.L018pic_point:
547	movl	20(%esp),%esi
548	leal	-56(%esp),%ebx
549	movl	24(%esp),%edi
550	andl	$-16,%ebx
551	movl	28(%esp),%edx
552	xchgl	%esp,%ebx
553	movl	%ebx,48(%esp)
554	movdqu	(%esi),%xmm0
555	call	_vpaes_encrypt_core
556	movdqu	%xmm0,(%edi)
557	movl	48(%esp),%esp
558	popl	%edi
559	popl	%esi
560	popl	%ebx
561	popl	%ebp
562	ret
563.size	vpaes_encrypt,.-.L_vpaes_encrypt_begin
564.globl	vpaes_decrypt
565.type	vpaes_decrypt,@function
566.align	16
567vpaes_decrypt:
568.L_vpaes_decrypt_begin:
569	pushl	%ebp
570	pushl	%ebx
571	pushl	%esi
572	pushl	%edi
573	leal	.L_vpaes_consts+0x30-.L019pic_point,%ebp
574	call	_vpaes_preheat
575.L019pic_point:
576	movl	20(%esp),%esi
577	leal	-56(%esp),%ebx
578	movl	24(%esp),%edi
579	andl	$-16,%ebx
580	movl	28(%esp),%edx
581	xchgl	%esp,%ebx
582	movl	%ebx,48(%esp)
583	movdqu	(%esi),%xmm0
584	call	_vpaes_decrypt_core
585	movdqu	%xmm0,(%edi)
586	movl	48(%esp),%esp
587	popl	%edi
588	popl	%esi
589	popl	%ebx
590	popl	%ebp
591	ret
592.size	vpaes_decrypt,.-.L_vpaes_decrypt_begin
593.globl	vpaes_cbc_encrypt
594.type	vpaes_cbc_encrypt,@function
595.align	16
596vpaes_cbc_encrypt:
597.L_vpaes_cbc_encrypt_begin:
598	pushl	%ebp
599	pushl	%ebx
600	pushl	%esi
601	pushl	%edi
602	movl	20(%esp),%esi
603	movl	24(%esp),%edi
604	movl	28(%esp),%eax
605	movl	32(%esp),%edx
606	subl	$16,%eax
607	jc	.L020cbc_abort
608	leal	-56(%esp),%ebx
609	movl	36(%esp),%ebp
610	andl	$-16,%ebx
611	movl	40(%esp),%ecx
612	xchgl	%esp,%ebx
613	movdqu	(%ebp),%xmm1
614	subl	%esi,%edi
615	movl	%ebx,48(%esp)
616	movl	%edi,(%esp)
617	movl	%edx,4(%esp)
618	movl	%ebp,8(%esp)
619	movl	%eax,%edi
620	leal	.L_vpaes_consts+0x30-.L021pic_point,%ebp
621	call	_vpaes_preheat
622.L021pic_point:
623	cmpl	$0,%ecx
624	je	.L022cbc_dec_loop
625	jmp	.L023cbc_enc_loop
626.align	16
627.L023cbc_enc_loop:
628	movdqu	(%esi),%xmm0
629	pxor	%xmm1,%xmm0
630	call	_vpaes_encrypt_core
631	movl	(%esp),%ebx
632	movl	4(%esp),%edx
633	movdqa	%xmm0,%xmm1
634	movdqu	%xmm0,(%ebx,%esi,1)
635	leal	16(%esi),%esi
636	subl	$16,%edi
637	jnc	.L023cbc_enc_loop
638	jmp	.L024cbc_done
639.align	16
640.L022cbc_dec_loop:
641	movdqu	(%esi),%xmm0
642	movdqa	%xmm1,16(%esp)
643	movdqa	%xmm0,32(%esp)
644	call	_vpaes_decrypt_core
645	movl	(%esp),%ebx
646	movl	4(%esp),%edx
647	pxor	16(%esp),%xmm0
648	movdqa	32(%esp),%xmm1
649	movdqu	%xmm0,(%ebx,%esi,1)
650	leal	16(%esi),%esi
651	subl	$16,%edi
652	jnc	.L022cbc_dec_loop
653.L024cbc_done:
654	movl	8(%esp),%ebx
655	movl	48(%esp),%esp
656	movdqu	%xmm1,(%ebx)
657.L020cbc_abort:
658	popl	%edi
659	popl	%esi
660	popl	%ebx
661	popl	%ebp
662	ret
663.size	vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
664#else
665.file	"vpaes-x86.S"
666.text
667.align	64
668.L_vpaes_consts:
669.long	218628480,235210255,168496130,67568393
670.long	252381056,17041926,33884169,51187212
671.long	252645135,252645135,252645135,252645135
672.long	1512730624,3266504856,1377990664,3401244816
673.long	830229760,1275146365,2969422977,3447763452
674.long	3411033600,2979783055,338359620,2782886510
675.long	4209124096,907596821,221174255,1006095553
676.long	191964160,3799684038,3164090317,1589111125
677.long	182528256,1777043520,2877432650,3265356744
678.long	1874708224,3503451415,3305285752,363511674
679.long	1606117888,3487855781,1093350906,2384367825
680.long	197121,67569157,134941193,202313229
681.long	67569157,134941193,202313229,197121
682.long	134941193,202313229,197121,67569157
683.long	202313229,197121,67569157,134941193
684.long	33619971,100992007,168364043,235736079
685.long	235736079,33619971,100992007,168364043
686.long	168364043,235736079,33619971,100992007
687.long	100992007,168364043,235736079,33619971
688.long	50462976,117835012,185207048,252579084
689.long	252314880,51251460,117574920,184942860
690.long	184682752,252054788,50987272,118359308
691.long	118099200,185467140,251790600,50727180
692.long	2946363062,528716217,1300004225,1881839624
693.long	1532713819,1532713819,1532713819,1532713819
694.long	3602276352,4288629033,3737020424,4153884961
695.long	1354558464,32357713,2958822624,3775749553
696.long	1201988352,132424512,1572796698,503232858
697.long	2213177600,1597421020,4103937655,675398315
698.long	2749646592,4273543773,1511898873,121693092
699.long	3040248576,1103263732,2871565598,1608280554
700.long	2236667136,2588920351,482954393,64377734
701.long	3069987328,291237287,2117370568,3650299247
702.long	533321216,3573750986,2572112006,1401264716
703.long	1339849704,2721158661,548607111,3445553514
704.long	2128193280,3054596040,2183486460,1257083700
705.long	655635200,1165381986,3923443150,2344132524
706.long	190078720,256924420,290342170,357187870
707.long	1610966272,2263057382,4103205268,309794674
708.long	2592527872,2233205587,1335446729,3402964816
709.long	3973531904,3225098121,3002836325,1918774430
710.long	3870401024,2102906079,2284471353,4117666579
711.long	617007872,1021508343,366931923,691083277
712.long	2528395776,3491914898,2968704004,1613121270
713.long	3445188352,3247741094,844474987,4093578302
714.long	651481088,1190302358,1689581232,574775300
715.long	4289380608,206939853,2555985458,2489840491
716.long	2130264064,327674451,3566485037,3349835193
717.long	2470714624,316102159,3636825756,3393945945
718.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
719.byte	111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
720.byte	83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
721.byte	114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
722.byte	118,101,114,115,105,116,121,41,0
723.align	64
724.type	_vpaes_preheat,@function
725.align	16
726_vpaes_preheat:
727	addl	(%esp),%ebp
728	movdqa	-48(%ebp),%xmm7
729	movdqa	-16(%ebp),%xmm6
730	ret
731.size	_vpaes_preheat,.-_vpaes_preheat
732.type	_vpaes_encrypt_core,@function
733.align	16
734_vpaes_encrypt_core:
735	movl	$16,%ecx
736	movl	240(%edx),%eax
737	movdqa	%xmm6,%xmm1
738	movdqa	(%ebp),%xmm2
739	pandn	%xmm0,%xmm1
740	pand	%xmm6,%xmm0
741	movdqu	(%edx),%xmm5
742.byte	102,15,56,0,208
743	movdqa	16(%ebp),%xmm0
744	pxor	%xmm5,%xmm2
745	psrld	$4,%xmm1
746	addl	$16,%edx
747.byte	102,15,56,0,193
748	leal	192(%ebp),%ebx
749	pxor	%xmm2,%xmm0
750	jmp	.L000enc_entry
751.align	16
752.L001enc_loop:
753	movdqa	32(%ebp),%xmm4
754	movdqa	48(%ebp),%xmm0
755.byte	102,15,56,0,226
756.byte	102,15,56,0,195
757	pxor	%xmm5,%xmm4
758	movdqa	64(%ebp),%xmm5
759	pxor	%xmm4,%xmm0
760	movdqa	-64(%ebx,%ecx,1),%xmm1
761.byte	102,15,56,0,234
762	movdqa	80(%ebp),%xmm2
763	movdqa	(%ebx,%ecx,1),%xmm4
764.byte	102,15,56,0,211
765	movdqa	%xmm0,%xmm3
766	pxor	%xmm5,%xmm2
767.byte	102,15,56,0,193
768	addl	$16,%edx
769	pxor	%xmm2,%xmm0
770.byte	102,15,56,0,220
771	addl	$16,%ecx
772	pxor	%xmm0,%xmm3
773.byte	102,15,56,0,193
774	andl	$48,%ecx
775	subl	$1,%eax
776	pxor	%xmm3,%xmm0
777.L000enc_entry:
778	movdqa	%xmm6,%xmm1
779	movdqa	-32(%ebp),%xmm5
780	pandn	%xmm0,%xmm1
781	psrld	$4,%xmm1
782	pand	%xmm6,%xmm0
783.byte	102,15,56,0,232
784	movdqa	%xmm7,%xmm3
785	pxor	%xmm1,%xmm0
786.byte	102,15,56,0,217
787	movdqa	%xmm7,%xmm4
788	pxor	%xmm5,%xmm3
789.byte	102,15,56,0,224
790	movdqa	%xmm7,%xmm2
791	pxor	%xmm5,%xmm4
792.byte	102,15,56,0,211
793	movdqa	%xmm7,%xmm3
794	pxor	%xmm0,%xmm2
795.byte	102,15,56,0,220
796	movdqu	(%edx),%xmm5
797	pxor	%xmm1,%xmm3
798	jnz	.L001enc_loop
799	movdqa	96(%ebp),%xmm4
800	movdqa	112(%ebp),%xmm0
801.byte	102,15,56,0,226
802	pxor	%xmm5,%xmm4
803.byte	102,15,56,0,195
804	movdqa	64(%ebx,%ecx,1),%xmm1
805	pxor	%xmm4,%xmm0
806.byte	102,15,56,0,193
807	ret
808.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
809.type	_vpaes_decrypt_core,@function
810.align	16
811_vpaes_decrypt_core:
812	leal	608(%ebp),%ebx
813	movl	240(%edx),%eax
814	movdqa	%xmm6,%xmm1
815	movdqa	-64(%ebx),%xmm2
816	pandn	%xmm0,%xmm1
817	movl	%eax,%ecx
818	psrld	$4,%xmm1
819	movdqu	(%edx),%xmm5
820	shll	$4,%ecx
821	pand	%xmm6,%xmm0
822.byte	102,15,56,0,208
823	movdqa	-48(%ebx),%xmm0
824	xorl	$48,%ecx
825.byte	102,15,56,0,193
826	andl	$48,%ecx
827	pxor	%xmm5,%xmm2
828	movdqa	176(%ebp),%xmm5
829	pxor	%xmm2,%xmm0
830	addl	$16,%edx
831	leal	-352(%ebx,%ecx,1),%ecx
832	jmp	.L002dec_entry
833.align	16
834.L003dec_loop:
835	movdqa	-32(%ebx),%xmm4
836	movdqa	-16(%ebx),%xmm1
837.byte	102,15,56,0,226
838.byte	102,15,56,0,203
839	pxor	%xmm4,%xmm0
840	movdqa	(%ebx),%xmm4
841	pxor	%xmm1,%xmm0
842	movdqa	16(%ebx),%xmm1
843.byte	102,15,56,0,226
844.byte	102,15,56,0,197
845.byte	102,15,56,0,203
846	pxor	%xmm4,%xmm0
847	movdqa	32(%ebx),%xmm4
848	pxor	%xmm1,%xmm0
849	movdqa	48(%ebx),%xmm1
850.byte	102,15,56,0,226
851.byte	102,15,56,0,197
852.byte	102,15,56,0,203
853	pxor	%xmm4,%xmm0
854	movdqa	64(%ebx),%xmm4
855	pxor	%xmm1,%xmm0
856	movdqa	80(%ebx),%xmm1
857.byte	102,15,56,0,226
858.byte	102,15,56,0,197
859.byte	102,15,56,0,203
860	pxor	%xmm4,%xmm0
861	addl	$16,%edx
862.byte	102,15,58,15,237,12
863	pxor	%xmm1,%xmm0
864	subl	$1,%eax
865.L002dec_entry:
866	movdqa	%xmm6,%xmm1
867	movdqa	-32(%ebp),%xmm2
868	pandn	%xmm0,%xmm1
869	pand	%xmm6,%xmm0
870	psrld	$4,%xmm1
871.byte	102,15,56,0,208
872	movdqa	%xmm7,%xmm3
873	pxor	%xmm1,%xmm0
874.byte	102,15,56,0,217
875	movdqa	%xmm7,%xmm4
876	pxor	%xmm2,%xmm3
877.byte	102,15,56,0,224
878	pxor	%xmm2,%xmm4
879	movdqa	%xmm7,%xmm2
880.byte	102,15,56,0,211
881	movdqa	%xmm7,%xmm3
882	pxor	%xmm0,%xmm2
883.byte	102,15,56,0,220
884	movdqu	(%edx),%xmm0
885	pxor	%xmm1,%xmm3
886	jnz	.L003dec_loop
887	movdqa	96(%ebx),%xmm4
888.byte	102,15,56,0,226
889	pxor	%xmm0,%xmm4
890	movdqa	112(%ebx),%xmm0
891	movdqa	(%ecx),%xmm2
892.byte	102,15,56,0,195
893	pxor	%xmm4,%xmm0
894.byte	102,15,56,0,194
895	ret
896.size	_vpaes_decrypt_core,.-_vpaes_decrypt_core
897.type	_vpaes_schedule_core,@function
898.align	16
899_vpaes_schedule_core:
900	addl	(%esp),%ebp
901	movdqu	(%esi),%xmm0
902	movdqa	320(%ebp),%xmm2
903	movdqa	%xmm0,%xmm3
904	leal	(%ebp),%ebx
905	movdqa	%xmm2,4(%esp)
906	call	_vpaes_schedule_transform
907	movdqa	%xmm0,%xmm7
908	testl	%edi,%edi
909	jnz	.L004schedule_am_decrypting
910	movdqu	%xmm0,(%edx)
911	jmp	.L005schedule_go
912.L004schedule_am_decrypting:
913	movdqa	256(%ebp,%ecx,1),%xmm1
914.byte	102,15,56,0,217
915	movdqu	%xmm3,(%edx)
916	xorl	$48,%ecx
917.L005schedule_go:
918	cmpl	$192,%eax
919	ja	.L006schedule_256
920	je	.L007schedule_192
921.L008schedule_128:
922	movl	$10,%eax
923.L009loop_schedule_128:
924	call	_vpaes_schedule_round
925	decl	%eax
926	jz	.L010schedule_mangle_last
927	call	_vpaes_schedule_mangle
928	jmp	.L009loop_schedule_128
929.align	16
930.L007schedule_192:
931	movdqu	8(%esi),%xmm0
932	call	_vpaes_schedule_transform
933	movdqa	%xmm0,%xmm6
934	pxor	%xmm4,%xmm4
935	movhlps	%xmm4,%xmm6
936	movl	$4,%eax
937.L011loop_schedule_192:
938	call	_vpaes_schedule_round
939.byte	102,15,58,15,198,8
940	call	_vpaes_schedule_mangle
941	call	_vpaes_schedule_192_smear
942	call	_vpaes_schedule_mangle
943	call	_vpaes_schedule_round
944	decl	%eax
945	jz	.L010schedule_mangle_last
946	call	_vpaes_schedule_mangle
947	call	_vpaes_schedule_192_smear
948	jmp	.L011loop_schedule_192
949.align	16
950.L006schedule_256:
951	movdqu	16(%esi),%xmm0
952	call	_vpaes_schedule_transform
953	movl	$7,%eax
954.L012loop_schedule_256:
955	call	_vpaes_schedule_mangle
956	movdqa	%xmm0,%xmm6
957	call	_vpaes_schedule_round
958	decl	%eax
959	jz	.L010schedule_mangle_last
960	call	_vpaes_schedule_mangle
961	pshufd	$255,%xmm0,%xmm0
962	movdqa	%xmm7,20(%esp)
963	movdqa	%xmm6,%xmm7
964	call	.L_vpaes_schedule_low_round
965	movdqa	20(%esp),%xmm7
966	jmp	.L012loop_schedule_256
967.align	16
968.L010schedule_mangle_last:
969	leal	384(%ebp),%ebx
970	testl	%edi,%edi
971	jnz	.L013schedule_mangle_last_dec
972	movdqa	256(%ebp,%ecx,1),%xmm1
973.byte	102,15,56,0,193
974	leal	352(%ebp),%ebx
975	addl	$32,%edx
976.L013schedule_mangle_last_dec:
977	addl	$-16,%edx
978	pxor	336(%ebp),%xmm0
979	call	_vpaes_schedule_transform
980	movdqu	%xmm0,(%edx)
981	pxor	%xmm0,%xmm0
982	pxor	%xmm1,%xmm1
983	pxor	%xmm2,%xmm2
984	pxor	%xmm3,%xmm3
985	pxor	%xmm4,%xmm4
986	pxor	%xmm5,%xmm5
987	pxor	%xmm6,%xmm6
988	pxor	%xmm7,%xmm7
989	ret
990.size	_vpaes_schedule_core,.-_vpaes_schedule_core
991.type	_vpaes_schedule_192_smear,@function
992.align	16
993_vpaes_schedule_192_smear:
994	pshufd	$128,%xmm6,%xmm1
995	pshufd	$254,%xmm7,%xmm0
996	pxor	%xmm1,%xmm6
997	pxor	%xmm1,%xmm1
998	pxor	%xmm0,%xmm6
999	movdqa	%xmm6,%xmm0
1000	movhlps	%xmm1,%xmm6
1001	ret
1002.size	_vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
1003.type	_vpaes_schedule_round,@function
1004.align	16
1005_vpaes_schedule_round:
1006	movdqa	8(%esp),%xmm2
1007	pxor	%xmm1,%xmm1
1008.byte	102,15,58,15,202,15
1009.byte	102,15,58,15,210,15
1010	pxor	%xmm1,%xmm7
1011	pshufd	$255,%xmm0,%xmm0
1012.byte	102,15,58,15,192,1
1013	movdqa	%xmm2,8(%esp)
1014.L_vpaes_schedule_low_round:
1015	movdqa	%xmm7,%xmm1
1016	pslldq	$4,%xmm7
1017	pxor	%xmm1,%xmm7
1018	movdqa	%xmm7,%xmm1
1019	pslldq	$8,%xmm7
1020	pxor	%xmm1,%xmm7
1021	pxor	336(%ebp),%xmm7
1022	movdqa	-16(%ebp),%xmm4
1023	movdqa	-48(%ebp),%xmm5
1024	movdqa	%xmm4,%xmm1
1025	pandn	%xmm0,%xmm1
1026	psrld	$4,%xmm1
1027	pand	%xmm4,%xmm0
1028	movdqa	-32(%ebp),%xmm2
1029.byte	102,15,56,0,208
1030	pxor	%xmm1,%xmm0
1031	movdqa	%xmm5,%xmm3
1032.byte	102,15,56,0,217
1033	pxor	%xmm2,%xmm3
1034	movdqa	%xmm5,%xmm4
1035.byte	102,15,56,0,224
1036	pxor	%xmm2,%xmm4
1037	movdqa	%xmm5,%xmm2
1038.byte	102,15,56,0,211
1039	pxor	%xmm0,%xmm2
1040	movdqa	%xmm5,%xmm3
1041.byte	102,15,56,0,220
1042	pxor	%xmm1,%xmm3
1043	movdqa	32(%ebp),%xmm4
1044.byte	102,15,56,0,226
1045	movdqa	48(%ebp),%xmm0
1046.byte	102,15,56,0,195
1047	pxor	%xmm4,%xmm0
1048	pxor	%xmm7,%xmm0
1049	movdqa	%xmm0,%xmm7
1050	ret
1051.size	_vpaes_schedule_round,.-_vpaes_schedule_round
1052.type	_vpaes_schedule_transform,@function
1053.align	16
1054_vpaes_schedule_transform:
1055	movdqa	-16(%ebp),%xmm2
1056	movdqa	%xmm2,%xmm1
1057	pandn	%xmm0,%xmm1
1058	psrld	$4,%xmm1
1059	pand	%xmm2,%xmm0
1060	movdqa	(%ebx),%xmm2
1061.byte	102,15,56,0,208
1062	movdqa	16(%ebx),%xmm0
1063.byte	102,15,56,0,193
1064	pxor	%xmm2,%xmm0
1065	ret
1066.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform
1067.type	_vpaes_schedule_mangle,@function
1068.align	16
1069_vpaes_schedule_mangle:
1070	movdqa	%xmm0,%xmm4
1071	movdqa	128(%ebp),%xmm5
1072	testl	%edi,%edi
1073	jnz	.L014schedule_mangle_dec
1074	addl	$16,%edx
1075	pxor	336(%ebp),%xmm4
1076.byte	102,15,56,0,229
1077	movdqa	%xmm4,%xmm3
1078.byte	102,15,56,0,229
1079	pxor	%xmm4,%xmm3
1080.byte	102,15,56,0,229
1081	pxor	%xmm4,%xmm3
1082	jmp	.L015schedule_mangle_both
1083.align	16
1084.L014schedule_mangle_dec:
1085	movdqa	-16(%ebp),%xmm2
1086	leal	416(%ebp),%esi
1087	movdqa	%xmm2,%xmm1
1088	pandn	%xmm4,%xmm1
1089	psrld	$4,%xmm1
1090	pand	%xmm2,%xmm4
1091	movdqa	(%esi),%xmm2
1092.byte	102,15,56,0,212
1093	movdqa	16(%esi),%xmm3
1094.byte	102,15,56,0,217
1095	pxor	%xmm2,%xmm3
1096.byte	102,15,56,0,221
1097	movdqa	32(%esi),%xmm2
1098.byte	102,15,56,0,212
1099	pxor	%xmm3,%xmm2
1100	movdqa	48(%esi),%xmm3
1101.byte	102,15,56,0,217
1102	pxor	%xmm2,%xmm3
1103.byte	102,15,56,0,221
1104	movdqa	64(%esi),%xmm2
1105.byte	102,15,56,0,212
1106	pxor	%xmm3,%xmm2
1107	movdqa	80(%esi),%xmm3
1108.byte	102,15,56,0,217
1109	pxor	%xmm2,%xmm3
1110.byte	102,15,56,0,221
1111	movdqa	96(%esi),%xmm2
1112.byte	102,15,56,0,212
1113	pxor	%xmm3,%xmm2
1114	movdqa	112(%esi),%xmm3
1115.byte	102,15,56,0,217
1116	pxor	%xmm2,%xmm3
1117	addl	$-16,%edx
1118.L015schedule_mangle_both:
1119	movdqa	256(%ebp,%ecx,1),%xmm1
1120.byte	102,15,56,0,217
1121	addl	$-16,%ecx
1122	andl	$48,%ecx
1123	movdqu	%xmm3,(%edx)
1124	ret
1125.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle
1126.globl	vpaes_set_encrypt_key
1127.type	vpaes_set_encrypt_key,@function
1128.align	16
1129vpaes_set_encrypt_key:
1130.L_vpaes_set_encrypt_key_begin:
1131	pushl	%ebp
1132	pushl	%ebx
1133	pushl	%esi
1134	pushl	%edi
1135	movl	20(%esp),%esi
1136	leal	-56(%esp),%ebx
1137	movl	24(%esp),%eax
1138	andl	$-16,%ebx
1139	movl	28(%esp),%edx
1140	xchgl	%esp,%ebx
1141	movl	%ebx,48(%esp)
1142	movl	%eax,%ebx
1143	shrl	$5,%ebx
1144	addl	$5,%ebx
1145	movl	%ebx,240(%edx)
1146	movl	$48,%ecx
1147	movl	$0,%edi
1148	leal	.L_vpaes_consts+0x30-.L016pic_point,%ebp
1149	call	_vpaes_schedule_core
1150.L016pic_point:
1151	movl	48(%esp),%esp
1152	xorl	%eax,%eax
1153	popl	%edi
1154	popl	%esi
1155	popl	%ebx
1156	popl	%ebp
1157	ret
1158.size	vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin
1159.globl	vpaes_set_decrypt_key
1160.type	vpaes_set_decrypt_key,@function
1161.align	16
1162vpaes_set_decrypt_key:
1163.L_vpaes_set_decrypt_key_begin:
1164	pushl	%ebp
1165	pushl	%ebx
1166	pushl	%esi
1167	pushl	%edi
1168	movl	20(%esp),%esi
1169	leal	-56(%esp),%ebx
1170	movl	24(%esp),%eax
1171	andl	$-16,%ebx
1172	movl	28(%esp),%edx
1173	xchgl	%esp,%ebx
1174	movl	%ebx,48(%esp)
1175	movl	%eax,%ebx
1176	shrl	$5,%ebx
1177	addl	$5,%ebx
1178	movl	%ebx,240(%edx)
1179	shll	$4,%ebx
1180	leal	16(%edx,%ebx,1),%edx
1181	movl	$1,%edi
1182	movl	%eax,%ecx
1183	shrl	$1,%ecx
1184	andl	$32,%ecx
1185	xorl	$32,%ecx
1186	leal	.L_vpaes_consts+0x30-.L017pic_point,%ebp
1187	call	_vpaes_schedule_core
1188.L017pic_point:
1189	movl	48(%esp),%esp
1190	xorl	%eax,%eax
1191	popl	%edi
1192	popl	%esi
1193	popl	%ebx
1194	popl	%ebp
1195	ret
1196.size	vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin
1197.globl	vpaes_encrypt
1198.type	vpaes_encrypt,@function
1199.align	16
1200vpaes_encrypt:
1201.L_vpaes_encrypt_begin:
1202	pushl	%ebp
1203	pushl	%ebx
1204	pushl	%esi
1205	pushl	%edi
1206	leal	.L_vpaes_consts+0x30-.L018pic_point,%ebp
1207	call	_vpaes_preheat
1208.L018pic_point:
1209	movl	20(%esp),%esi
1210	leal	-56(%esp),%ebx
1211	movl	24(%esp),%edi
1212	andl	$-16,%ebx
1213	movl	28(%esp),%edx
1214	xchgl	%esp,%ebx
1215	movl	%ebx,48(%esp)
1216	movdqu	(%esi),%xmm0
1217	call	_vpaes_encrypt_core
1218	movdqu	%xmm0,(%edi)
1219	movl	48(%esp),%esp
1220	popl	%edi
1221	popl	%esi
1222	popl	%ebx
1223	popl	%ebp
1224	ret
1225.size	vpaes_encrypt,.-.L_vpaes_encrypt_begin
1226.globl	vpaes_decrypt
1227.type	vpaes_decrypt,@function
1228.align	16
1229vpaes_decrypt:
1230.L_vpaes_decrypt_begin:
1231	pushl	%ebp
1232	pushl	%ebx
1233	pushl	%esi
1234	pushl	%edi
1235	leal	.L_vpaes_consts+0x30-.L019pic_point,%ebp
1236	call	_vpaes_preheat
1237.L019pic_point:
1238	movl	20(%esp),%esi
1239	leal	-56(%esp),%ebx
1240	movl	24(%esp),%edi
1241	andl	$-16,%ebx
1242	movl	28(%esp),%edx
1243	xchgl	%esp,%ebx
1244	movl	%ebx,48(%esp)
1245	movdqu	(%esi),%xmm0
1246	call	_vpaes_decrypt_core
1247	movdqu	%xmm0,(%edi)
1248	movl	48(%esp),%esp
1249	popl	%edi
1250	popl	%esi
1251	popl	%ebx
1252	popl	%ebp
1253	ret
1254.size	vpaes_decrypt,.-.L_vpaes_decrypt_begin
1255.globl	vpaes_cbc_encrypt
1256.type	vpaes_cbc_encrypt,@function
1257.align	16
1258vpaes_cbc_encrypt:
1259.L_vpaes_cbc_encrypt_begin:
1260	pushl	%ebp
1261	pushl	%ebx
1262	pushl	%esi
1263	pushl	%edi
1264	movl	20(%esp),%esi
1265	movl	24(%esp),%edi
1266	movl	28(%esp),%eax
1267	movl	32(%esp),%edx
1268	subl	$16,%eax
1269	jc	.L020cbc_abort
1270	leal	-56(%esp),%ebx
1271	movl	36(%esp),%ebp
1272	andl	$-16,%ebx
1273	movl	40(%esp),%ecx
1274	xchgl	%esp,%ebx
1275	movdqu	(%ebp),%xmm1
1276	subl	%esi,%edi
1277	movl	%ebx,48(%esp)
1278	movl	%edi,(%esp)
1279	movl	%edx,4(%esp)
1280	movl	%ebp,8(%esp)
1281	movl	%eax,%edi
1282	leal	.L_vpaes_consts+0x30-.L021pic_point,%ebp
1283	call	_vpaes_preheat
1284.L021pic_point:
1285	cmpl	$0,%ecx
1286	je	.L022cbc_dec_loop
1287	jmp	.L023cbc_enc_loop
1288.align	16
1289.L023cbc_enc_loop:
1290	movdqu	(%esi),%xmm0
1291	pxor	%xmm1,%xmm0
1292	call	_vpaes_encrypt_core
1293	movl	(%esp),%ebx
1294	movl	4(%esp),%edx
1295	movdqa	%xmm0,%xmm1
1296	movdqu	%xmm0,(%ebx,%esi,1)
1297	leal	16(%esi),%esi
1298	subl	$16,%edi
1299	jnc	.L023cbc_enc_loop
1300	jmp	.L024cbc_done
1301.align	16
1302.L022cbc_dec_loop:
1303	movdqu	(%esi),%xmm0
1304	movdqa	%xmm1,16(%esp)
1305	movdqa	%xmm0,32(%esp)
1306	call	_vpaes_decrypt_core
1307	movl	(%esp),%ebx
1308	movl	4(%esp),%edx
1309	pxor	16(%esp),%xmm0
1310	movdqa	32(%esp),%xmm1
1311	movdqu	%xmm0,(%ebx,%esi,1)
1312	leal	16(%esi),%esi
1313	subl	$16,%edi
1314	jnc	.L022cbc_dec_loop
1315.L024cbc_done:
1316	movl	8(%esp),%ebx
1317	movl	48(%esp),%esp
1318	movdqu	%xmm1,(%ebx)
1319.L020cbc_abort:
1320	popl	%edi
1321	popl	%esi
1322	popl	%ebx
1323	popl	%ebp
1324	ret
1325.size	vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
1326#endif
1327