e_padlock-x86_64.S revision 1.1
1#include <machine/asm.h>
2.text
3.globl	padlock_capability
4.type	padlock_capability,@function
5.align	16
6padlock_capability:
7	movq	%rbx,%r8
8	xorl	%eax,%eax
9	cpuid
10	xorl	%eax,%eax
11	cmpl	$0x746e6543,%ebx
12	jne	.Lnoluck
13	cmpl	$0x48727561,%edx
14	jne	.Lnoluck
15	cmpl	$0x736c7561,%ecx
16	jne	.Lnoluck
17	movl	$0xC0000000,%eax
18	cpuid
19	movl	%eax,%edx
20	xorl	%eax,%eax
21	cmpl	$0xC0000001,%edx
22	jb	.Lnoluck
23	movl	$0xC0000001,%eax
24	cpuid
25	movl	%edx,%eax
26	andl	$0xffffffef,%eax
27	orl	$0x10,%eax
28.Lnoluck:
29	movq	%r8,%rbx
30	.byte	0xf3,0xc3
31.size	padlock_capability,.-padlock_capability
32
33.globl	padlock_key_bswap
34.type	padlock_key_bswap,@function
35.align	16
36padlock_key_bswap:
37	movl	240(%rdi),%edx
38.Lbswap_loop:
39	movl	(%rdi),%eax
40	bswapl	%eax
41	movl	%eax,(%rdi)
42	leaq	4(%rdi),%rdi
43	subl	$1,%edx
44	jnz	.Lbswap_loop
45	.byte	0xf3,0xc3
46.size	padlock_key_bswap,.-padlock_key_bswap
47
48.globl	padlock_verify_context
49.type	padlock_verify_context,@function
50.align	16
51padlock_verify_context:
52	movq	%rdi,%rdx
53	pushf
54	leaq	.Lpadlock_saved_context(%rip),%rax
55	call	_padlock_verify_ctx
56	leaq	8(%rsp),%rsp
57	.byte	0xf3,0xc3
58.size	padlock_verify_context,.-padlock_verify_context
59
60.type	_padlock_verify_ctx,@function
61.align	16
62_padlock_verify_ctx:
63	movq	8(%rsp),%r8
64	btq	$30,%r8
65	jnc	.Lverified
66	cmpq	(%rax),%rdx
67	je	.Lverified
68	pushf
69	popf
70.Lverified:
71	movq	%rdx,(%rax)
72	.byte	0xf3,0xc3
73.size	_padlock_verify_ctx,.-_padlock_verify_ctx
74
75.globl	padlock_reload_key
76.type	padlock_reload_key,@function
77.align	16
78padlock_reload_key:
79	pushf
80	popf
81	.byte	0xf3,0xc3
82.size	padlock_reload_key,.-padlock_reload_key
83
84.globl	padlock_aes_block
85.type	padlock_aes_block,@function
86.align	16
87padlock_aes_block:
88	movq	%rbx,%r8
89	movq	$1,%rcx
90	leaq	32(%rdx),%rbx
91	leaq	16(%rdx),%rdx
92.byte	0xf3,0x0f,0xa7,0xc8
93	movq	%r8,%rbx
94	.byte	0xf3,0xc3
95.size	padlock_aes_block,.-padlock_aes_block
96
97.globl	padlock_xstore
98.type	padlock_xstore,@function
99.align	16
100padlock_xstore:
101	movl	%esi,%edx
102.byte	0x0f,0xa7,0xc0
103	.byte	0xf3,0xc3
104.size	padlock_xstore,.-padlock_xstore
105
106.globl	padlock_sha1_oneshot
107.type	padlock_sha1_oneshot,@function
108.align	16
109padlock_sha1_oneshot:
110	movq	%rdx,%rcx
111	movq	%rdi,%rdx
112	movups	(%rdi),%xmm0
113	subq	$128+8,%rsp
114	movl	16(%rdi),%eax
115	movaps	%xmm0,(%rsp)
116	movq	%rsp,%rdi
117	movl	%eax,16(%rsp)
118	xorq	%rax,%rax
119.byte	0xf3,0x0f,0xa6,0xc8
120	movaps	(%rsp),%xmm0
121	movl	16(%rsp),%eax
122	addq	$128+8,%rsp
123	movups	%xmm0,(%rdx)
124	movl	%eax,16(%rdx)
125	.byte	0xf3,0xc3
126.size	padlock_sha1_oneshot,.-padlock_sha1_oneshot
127
128.globl	padlock_sha1_blocks
129.type	padlock_sha1_blocks,@function
130.align	16
131padlock_sha1_blocks:
132	movq	%rdx,%rcx
133	movq	%rdi,%rdx
134	movups	(%rdi),%xmm0
135	subq	$128+8,%rsp
136	movl	16(%rdi),%eax
137	movaps	%xmm0,(%rsp)
138	movq	%rsp,%rdi
139	movl	%eax,16(%rsp)
140	movq	$-1,%rax
141.byte	0xf3,0x0f,0xa6,0xc8
142	movaps	(%rsp),%xmm0
143	movl	16(%rsp),%eax
144	addq	$128+8,%rsp
145	movups	%xmm0,(%rdx)
146	movl	%eax,16(%rdx)
147	.byte	0xf3,0xc3
148.size	padlock_sha1_blocks,.-padlock_sha1_blocks
149
150.globl	padlock_sha256_oneshot
151.type	padlock_sha256_oneshot,@function
152.align	16
153padlock_sha256_oneshot:
154	movq	%rdx,%rcx
155	movq	%rdi,%rdx
156	movups	(%rdi),%xmm0
157	subq	$128+8,%rsp
158	movups	16(%rdi),%xmm1
159	movaps	%xmm0,(%rsp)
160	movq	%rsp,%rdi
161	movaps	%xmm1,16(%rsp)
162	xorq	%rax,%rax
163.byte	0xf3,0x0f,0xa6,0xd0
164	movaps	(%rsp),%xmm0
165	movaps	16(%rsp),%xmm1
166	addq	$128+8,%rsp
167	movups	%xmm0,(%rdx)
168	movups	%xmm1,16(%rdx)
169	.byte	0xf3,0xc3
170.size	padlock_sha256_oneshot,.-padlock_sha256_oneshot
171
172.globl	padlock_sha256_blocks
173.type	padlock_sha256_blocks,@function
174.align	16
175padlock_sha256_blocks:
176	movq	%rdx,%rcx
177	movq	%rdi,%rdx
178	movups	(%rdi),%xmm0
179	subq	$128+8,%rsp
180	movups	16(%rdi),%xmm1
181	movaps	%xmm0,(%rsp)
182	movq	%rsp,%rdi
183	movaps	%xmm1,16(%rsp)
184	movq	$-1,%rax
185.byte	0xf3,0x0f,0xa6,0xd0
186	movaps	(%rsp),%xmm0
187	movaps	16(%rsp),%xmm1
188	addq	$128+8,%rsp
189	movups	%xmm0,(%rdx)
190	movups	%xmm1,16(%rdx)
191	.byte	0xf3,0xc3
192.size	padlock_sha256_blocks,.-padlock_sha256_blocks
193
194.globl	padlock_sha512_blocks
195.type	padlock_sha512_blocks,@function
196.align	16
197padlock_sha512_blocks:
198	movq	%rdx,%rcx
199	movq	%rdi,%rdx
200	movups	(%rdi),%xmm0
201	subq	$128+8,%rsp
202	movups	16(%rdi),%xmm1
203	movups	32(%rdi),%xmm2
204	movups	48(%rdi),%xmm3
205	movaps	%xmm0,(%rsp)
206	movq	%rsp,%rdi
207	movaps	%xmm1,16(%rsp)
208	movaps	%xmm2,32(%rsp)
209	movaps	%xmm3,48(%rsp)
210.byte	0xf3,0x0f,0xa6,0xe0
211	movaps	(%rsp),%xmm0
212	movaps	16(%rsp),%xmm1
213	movaps	32(%rsp),%xmm2
214	movaps	48(%rsp),%xmm3
215	addq	$128+8,%rsp
216	movups	%xmm0,(%rdx)
217	movups	%xmm1,16(%rdx)
218	movups	%xmm2,32(%rdx)
219	movups	%xmm3,48(%rdx)
220	.byte	0xf3,0xc3
221.size	padlock_sha512_blocks,.-padlock_sha512_blocks
222.globl	padlock_ecb_encrypt
223.type	padlock_ecb_encrypt,@function
224.align	16
225padlock_ecb_encrypt:
226	pushq	%rbp
227	pushq	%rbx
228
229	xorl	%eax,%eax
230	testq	$15,%rdx
231	jnz	.Lecb_abort
232	testq	$15,%rcx
233	jnz	.Lecb_abort
234	leaq	.Lpadlock_saved_context(%rip),%rax
235	pushf
236	cld
237	call	_padlock_verify_ctx
238	leaq	16(%rdx),%rdx
239	xorl	%eax,%eax
240	xorl	%ebx,%ebx
241	testl	$32,(%rdx)
242	jnz	.Lecb_aligned
243	testq	$0x0f,%rdi
244	setz	%al
245	testq	$0x0f,%rsi
246	setz	%bl
247	testl	%ebx,%eax
248	jnz	.Lecb_aligned
249	negq	%rax
250	movq	$512,%rbx
251	notq	%rax
252	leaq	(%rsp),%rbp
253	cmpq	%rbx,%rcx
254	cmovcq	%rcx,%rbx
255	andq	%rbx,%rax
256	movq	%rcx,%rbx
257	negq	%rax
258	andq	$512-1,%rbx
259	leaq	(%rax,%rbp,1),%rsp
260	movq	$512,%rax
261	cmovzq	%rax,%rbx
262	cmpq	%rbx,%rcx
263	ja	.Lecb_loop
264	movq	%rsi,%rax
265	cmpq	%rsp,%rbp
266	cmoveq	%rdi,%rax
267	addq	%rcx,%rax
268	negq	%rax
269	andq	$0xfff,%rax
270	cmpq	$128,%rax
271	movq	$-128,%rax
272	cmovaeq	%rbx,%rax
273	andq	%rax,%rbx
274	jz	.Lecb_unaligned_tail
275	jmp	.Lecb_loop
276.align	16
277.Lecb_loop:
278	cmpq	%rcx,%rbx
279	cmovaq	%rcx,%rbx
280	movq	%rdi,%r8
281	movq	%rsi,%r9
282	movq	%rcx,%r10
283	movq	%rbx,%rcx
284	movq	%rbx,%r11
285	testq	$0x0f,%rdi
286	cmovnzq	%rsp,%rdi
287	testq	$0x0f,%rsi
288	jz	.Lecb_inp_aligned
289	shrq	$3,%rcx
290.byte	0xf3,0x48,0xa5
291	subq	%rbx,%rdi
292	movq	%rbx,%rcx
293	movq	%rdi,%rsi
294.Lecb_inp_aligned:
295	leaq	-16(%rdx),%rax
296	leaq	16(%rdx),%rbx
297	shrq	$4,%rcx
298.byte	0xf3,0x0f,0xa7,200
299	movq	%r8,%rdi
300	movq	%r11,%rbx
301	testq	$0x0f,%rdi
302	jz	.Lecb_out_aligned
303	movq	%rbx,%rcx
304	leaq	(%rsp),%rsi
305	shrq	$3,%rcx
306.byte	0xf3,0x48,0xa5
307	subq	%rbx,%rdi
308.Lecb_out_aligned:
309	movq	%r9,%rsi
310	movq	%r10,%rcx
311	addq	%rbx,%rdi
312	addq	%rbx,%rsi
313	subq	%rbx,%rcx
314	movq	$512,%rbx
315	jz	.Lecb_break
316	cmpq	%rbx,%rcx
317	jae	.Lecb_loop
318.Lecb_unaligned_tail:
319	xorl	%eax,%eax
320	cmpq	%rsp,%rbp
321	cmoveq	%rcx,%rax
322	movq	%rdi,%r8
323	movq	%rcx,%rbx
324	subq	%rax,%rsp
325	shrq	$3,%rcx
326	leaq	(%rsp),%rdi
327.byte	0xf3,0x48,0xa5
328	movq	%rsp,%rsi
329	movq	%r8,%rdi
330	movq	%rbx,%rcx
331	jmp	.Lecb_loop
332.align	16
333.Lecb_break:
334	cmpq	%rbp,%rsp
335	je	.Lecb_done
336
337	pxor	%xmm0,%xmm0
338	leaq	(%rsp),%rax
339.Lecb_bzero:
340	movaps	%xmm0,(%rax)
341	leaq	16(%rax),%rax
342	cmpq	%rax,%rbp
343	ja	.Lecb_bzero
344
345.Lecb_done:
346	leaq	(%rbp),%rsp
347	jmp	.Lecb_exit
348
349.align	16
350.Lecb_aligned:
351	leaq	(%rsi,%rcx,1),%rbp
352	negq	%rbp
353	andq	$0xfff,%rbp
354	xorl	%eax,%eax
355	cmpq	$128,%rbp
356	movq	$128-1,%rbp
357	cmovaeq	%rax,%rbp
358	andq	%rcx,%rbp
359	subq	%rbp,%rcx
360	jz	.Lecb_aligned_tail
361	leaq	-16(%rdx),%rax
362	leaq	16(%rdx),%rbx
363	shrq	$4,%rcx
364.byte	0xf3,0x0f,0xa7,200
365	testq	%rbp,%rbp
366	jz	.Lecb_exit
367
368.Lecb_aligned_tail:
369	movq	%rdi,%r8
370	movq	%rbp,%rbx
371	movq	%rbp,%rcx
372	leaq	(%rsp),%rbp
373	subq	%rcx,%rsp
374	shrq	$3,%rcx
375	leaq	(%rsp),%rdi
376.byte	0xf3,0x48,0xa5
377	leaq	(%r8),%rdi
378	leaq	(%rsp),%rsi
379	movq	%rbx,%rcx
380	jmp	.Lecb_loop
381.Lecb_exit:
382	movl	$1,%eax
383	leaq	8(%rsp),%rsp
384.Lecb_abort:
385	popq	%rbx
386	popq	%rbp
387	.byte	0xf3,0xc3
388.size	padlock_ecb_encrypt,.-padlock_ecb_encrypt
389.globl	padlock_cbc_encrypt
390.type	padlock_cbc_encrypt,@function
391.align	16
392padlock_cbc_encrypt:
393	pushq	%rbp
394	pushq	%rbx
395
396	xorl	%eax,%eax
397	testq	$15,%rdx
398	jnz	.Lcbc_abort
399	testq	$15,%rcx
400	jnz	.Lcbc_abort
401	leaq	.Lpadlock_saved_context(%rip),%rax
402	pushf
403	cld
404	call	_padlock_verify_ctx
405	leaq	16(%rdx),%rdx
406	xorl	%eax,%eax
407	xorl	%ebx,%ebx
408	testl	$32,(%rdx)
409	jnz	.Lcbc_aligned
410	testq	$0x0f,%rdi
411	setz	%al
412	testq	$0x0f,%rsi
413	setz	%bl
414	testl	%ebx,%eax
415	jnz	.Lcbc_aligned
416	negq	%rax
417	movq	$512,%rbx
418	notq	%rax
419	leaq	(%rsp),%rbp
420	cmpq	%rbx,%rcx
421	cmovcq	%rcx,%rbx
422	andq	%rbx,%rax
423	movq	%rcx,%rbx
424	negq	%rax
425	andq	$512-1,%rbx
426	leaq	(%rax,%rbp,1),%rsp
427	movq	$512,%rax
428	cmovzq	%rax,%rbx
429	cmpq	%rbx,%rcx
430	ja	.Lcbc_loop
431	movq	%rsi,%rax
432	cmpq	%rsp,%rbp
433	cmoveq	%rdi,%rax
434	addq	%rcx,%rax
435	negq	%rax
436	andq	$0xfff,%rax
437	cmpq	$64,%rax
438	movq	$-64,%rax
439	cmovaeq	%rbx,%rax
440	andq	%rax,%rbx
441	jz	.Lcbc_unaligned_tail
442	jmp	.Lcbc_loop
443.align	16
444.Lcbc_loop:
445	cmpq	%rcx,%rbx
446	cmovaq	%rcx,%rbx
447	movq	%rdi,%r8
448	movq	%rsi,%r9
449	movq	%rcx,%r10
450	movq	%rbx,%rcx
451	movq	%rbx,%r11
452	testq	$0x0f,%rdi
453	cmovnzq	%rsp,%rdi
454	testq	$0x0f,%rsi
455	jz	.Lcbc_inp_aligned
456	shrq	$3,%rcx
457.byte	0xf3,0x48,0xa5
458	subq	%rbx,%rdi
459	movq	%rbx,%rcx
460	movq	%rdi,%rsi
461.Lcbc_inp_aligned:
462	leaq	-16(%rdx),%rax
463	leaq	16(%rdx),%rbx
464	shrq	$4,%rcx
465.byte	0xf3,0x0f,0xa7,208
466	movdqa	(%rax),%xmm0
467	movdqa	%xmm0,-16(%rdx)
468	movq	%r8,%rdi
469	movq	%r11,%rbx
470	testq	$0x0f,%rdi
471	jz	.Lcbc_out_aligned
472	movq	%rbx,%rcx
473	leaq	(%rsp),%rsi
474	shrq	$3,%rcx
475.byte	0xf3,0x48,0xa5
476	subq	%rbx,%rdi
477.Lcbc_out_aligned:
478	movq	%r9,%rsi
479	movq	%r10,%rcx
480	addq	%rbx,%rdi
481	addq	%rbx,%rsi
482	subq	%rbx,%rcx
483	movq	$512,%rbx
484	jz	.Lcbc_break
485	cmpq	%rbx,%rcx
486	jae	.Lcbc_loop
487.Lcbc_unaligned_tail:
488	xorl	%eax,%eax
489	cmpq	%rsp,%rbp
490	cmoveq	%rcx,%rax
491	movq	%rdi,%r8
492	movq	%rcx,%rbx
493	subq	%rax,%rsp
494	shrq	$3,%rcx
495	leaq	(%rsp),%rdi
496.byte	0xf3,0x48,0xa5
497	movq	%rsp,%rsi
498	movq	%r8,%rdi
499	movq	%rbx,%rcx
500	jmp	.Lcbc_loop
501.align	16
502.Lcbc_break:
503	cmpq	%rbp,%rsp
504	je	.Lcbc_done
505
506	pxor	%xmm0,%xmm0
507	leaq	(%rsp),%rax
508.Lcbc_bzero:
509	movaps	%xmm0,(%rax)
510	leaq	16(%rax),%rax
511	cmpq	%rax,%rbp
512	ja	.Lcbc_bzero
513
514.Lcbc_done:
515	leaq	(%rbp),%rsp
516	jmp	.Lcbc_exit
517
518.align	16
519.Lcbc_aligned:
520	leaq	(%rsi,%rcx,1),%rbp
521	negq	%rbp
522	andq	$0xfff,%rbp
523	xorl	%eax,%eax
524	cmpq	$64,%rbp
525	movq	$64-1,%rbp
526	cmovaeq	%rax,%rbp
527	andq	%rcx,%rbp
528	subq	%rbp,%rcx
529	jz	.Lcbc_aligned_tail
530	leaq	-16(%rdx),%rax
531	leaq	16(%rdx),%rbx
532	shrq	$4,%rcx
533.byte	0xf3,0x0f,0xa7,208
534	movdqa	(%rax),%xmm0
535	movdqa	%xmm0,-16(%rdx)
536	testq	%rbp,%rbp
537	jz	.Lcbc_exit
538
539.Lcbc_aligned_tail:
540	movq	%rdi,%r8
541	movq	%rbp,%rbx
542	movq	%rbp,%rcx
543	leaq	(%rsp),%rbp
544	subq	%rcx,%rsp
545	shrq	$3,%rcx
546	leaq	(%rsp),%rdi
547.byte	0xf3,0x48,0xa5
548	leaq	(%r8),%rdi
549	leaq	(%rsp),%rsi
550	movq	%rbx,%rcx
551	jmp	.Lcbc_loop
552.Lcbc_exit:
553	movl	$1,%eax
554	leaq	8(%rsp),%rsp
555.Lcbc_abort:
556	popq	%rbx
557	popq	%rbp
558	.byte	0xf3,0xc3
559.size	padlock_cbc_encrypt,.-padlock_cbc_encrypt
560.globl	padlock_cfb_encrypt
561.type	padlock_cfb_encrypt,@function
562.align	16
563padlock_cfb_encrypt:
564	pushq	%rbp
565	pushq	%rbx
566
567	xorl	%eax,%eax
568	testq	$15,%rdx
569	jnz	.Lcfb_abort
570	testq	$15,%rcx
571	jnz	.Lcfb_abort
572	leaq	.Lpadlock_saved_context(%rip),%rax
573	pushf
574	cld
575	call	_padlock_verify_ctx
576	leaq	16(%rdx),%rdx
577	xorl	%eax,%eax
578	xorl	%ebx,%ebx
579	testl	$32,(%rdx)
580	jnz	.Lcfb_aligned
581	testq	$0x0f,%rdi
582	setz	%al
583	testq	$0x0f,%rsi
584	setz	%bl
585	testl	%ebx,%eax
586	jnz	.Lcfb_aligned
587	negq	%rax
588	movq	$512,%rbx
589	notq	%rax
590	leaq	(%rsp),%rbp
591	cmpq	%rbx,%rcx
592	cmovcq	%rcx,%rbx
593	andq	%rbx,%rax
594	movq	%rcx,%rbx
595	negq	%rax
596	andq	$512-1,%rbx
597	leaq	(%rax,%rbp,1),%rsp
598	movq	$512,%rax
599	cmovzq	%rax,%rbx
600	jmp	.Lcfb_loop
601.align	16
602.Lcfb_loop:
603	cmpq	%rcx,%rbx
604	cmovaq	%rcx,%rbx
605	movq	%rdi,%r8
606	movq	%rsi,%r9
607	movq	%rcx,%r10
608	movq	%rbx,%rcx
609	movq	%rbx,%r11
610	testq	$0x0f,%rdi
611	cmovnzq	%rsp,%rdi
612	testq	$0x0f,%rsi
613	jz	.Lcfb_inp_aligned
614	shrq	$3,%rcx
615.byte	0xf3,0x48,0xa5
616	subq	%rbx,%rdi
617	movq	%rbx,%rcx
618	movq	%rdi,%rsi
619.Lcfb_inp_aligned:
620	leaq	-16(%rdx),%rax
621	leaq	16(%rdx),%rbx
622	shrq	$4,%rcx
623.byte	0xf3,0x0f,0xa7,224
624	movdqa	(%rax),%xmm0
625	movdqa	%xmm0,-16(%rdx)
626	movq	%r8,%rdi
627	movq	%r11,%rbx
628	testq	$0x0f,%rdi
629	jz	.Lcfb_out_aligned
630	movq	%rbx,%rcx
631	leaq	(%rsp),%rsi
632	shrq	$3,%rcx
633.byte	0xf3,0x48,0xa5
634	subq	%rbx,%rdi
635.Lcfb_out_aligned:
636	movq	%r9,%rsi
637	movq	%r10,%rcx
638	addq	%rbx,%rdi
639	addq	%rbx,%rsi
640	subq	%rbx,%rcx
641	movq	$512,%rbx
642	jnz	.Lcfb_loop
643	cmpq	%rbp,%rsp
644	je	.Lcfb_done
645
646	pxor	%xmm0,%xmm0
647	leaq	(%rsp),%rax
648.Lcfb_bzero:
649	movaps	%xmm0,(%rax)
650	leaq	16(%rax),%rax
651	cmpq	%rax,%rbp
652	ja	.Lcfb_bzero
653
654.Lcfb_done:
655	leaq	(%rbp),%rsp
656	jmp	.Lcfb_exit
657
658.align	16
659.Lcfb_aligned:
660	leaq	-16(%rdx),%rax
661	leaq	16(%rdx),%rbx
662	shrq	$4,%rcx
663.byte	0xf3,0x0f,0xa7,224
664	movdqa	(%rax),%xmm0
665	movdqa	%xmm0,-16(%rdx)
666.Lcfb_exit:
667	movl	$1,%eax
668	leaq	8(%rsp),%rsp
669.Lcfb_abort:
670	popq	%rbx
671	popq	%rbp
672	.byte	0xf3,0xc3
673.size	padlock_cfb_encrypt,.-padlock_cfb_encrypt
674.globl	padlock_ofb_encrypt
675.type	padlock_ofb_encrypt,@function
676.align	16
677padlock_ofb_encrypt:
678	pushq	%rbp
679	pushq	%rbx
680
681	xorl	%eax,%eax
682	testq	$15,%rdx
683	jnz	.Lofb_abort
684	testq	$15,%rcx
685	jnz	.Lofb_abort
686	leaq	.Lpadlock_saved_context(%rip),%rax
687	pushf
688	cld
689	call	_padlock_verify_ctx
690	leaq	16(%rdx),%rdx
691	xorl	%eax,%eax
692	xorl	%ebx,%ebx
693	testl	$32,(%rdx)
694	jnz	.Lofb_aligned
695	testq	$0x0f,%rdi
696	setz	%al
697	testq	$0x0f,%rsi
698	setz	%bl
699	testl	%ebx,%eax
700	jnz	.Lofb_aligned
701	negq	%rax
702	movq	$512,%rbx
703	notq	%rax
704	leaq	(%rsp),%rbp
705	cmpq	%rbx,%rcx
706	cmovcq	%rcx,%rbx
707	andq	%rbx,%rax
708	movq	%rcx,%rbx
709	negq	%rax
710	andq	$512-1,%rbx
711	leaq	(%rax,%rbp,1),%rsp
712	movq	$512,%rax
713	cmovzq	%rax,%rbx
714	jmp	.Lofb_loop
715.align	16
716.Lofb_loop:
717	cmpq	%rcx,%rbx
718	cmovaq	%rcx,%rbx
719	movq	%rdi,%r8
720	movq	%rsi,%r9
721	movq	%rcx,%r10
722	movq	%rbx,%rcx
723	movq	%rbx,%r11
724	testq	$0x0f,%rdi
725	cmovnzq	%rsp,%rdi
726	testq	$0x0f,%rsi
727	jz	.Lofb_inp_aligned
728	shrq	$3,%rcx
729.byte	0xf3,0x48,0xa5
730	subq	%rbx,%rdi
731	movq	%rbx,%rcx
732	movq	%rdi,%rsi
733.Lofb_inp_aligned:
734	leaq	-16(%rdx),%rax
735	leaq	16(%rdx),%rbx
736	shrq	$4,%rcx
737.byte	0xf3,0x0f,0xa7,232
738	movdqa	(%rax),%xmm0
739	movdqa	%xmm0,-16(%rdx)
740	movq	%r8,%rdi
741	movq	%r11,%rbx
742	testq	$0x0f,%rdi
743	jz	.Lofb_out_aligned
744	movq	%rbx,%rcx
745	leaq	(%rsp),%rsi
746	shrq	$3,%rcx
747.byte	0xf3,0x48,0xa5
748	subq	%rbx,%rdi
749.Lofb_out_aligned:
750	movq	%r9,%rsi
751	movq	%r10,%rcx
752	addq	%rbx,%rdi
753	addq	%rbx,%rsi
754	subq	%rbx,%rcx
755	movq	$512,%rbx
756	jnz	.Lofb_loop
757	cmpq	%rbp,%rsp
758	je	.Lofb_done
759
760	pxor	%xmm0,%xmm0
761	leaq	(%rsp),%rax
762.Lofb_bzero:
763	movaps	%xmm0,(%rax)
764	leaq	16(%rax),%rax
765	cmpq	%rax,%rbp
766	ja	.Lofb_bzero
767
768.Lofb_done:
769	leaq	(%rbp),%rsp
770	jmp	.Lofb_exit
771
772.align	16
773.Lofb_aligned:
774	leaq	-16(%rdx),%rax
775	leaq	16(%rdx),%rbx
776	shrq	$4,%rcx
777.byte	0xf3,0x0f,0xa7,232
778	movdqa	(%rax),%xmm0
779	movdqa	%xmm0,-16(%rdx)
780.Lofb_exit:
781	movl	$1,%eax
782	leaq	8(%rsp),%rsp
783.Lofb_abort:
784	popq	%rbx
785	popq	%rbp
786	.byte	0xf3,0xc3
787.size	padlock_ofb_encrypt,.-padlock_ofb_encrypt
788.globl	padlock_ctr32_encrypt
789.type	padlock_ctr32_encrypt,@function
790.align	16
791padlock_ctr32_encrypt:
792	pushq	%rbp
793	pushq	%rbx
794
795	xorl	%eax,%eax
796	testq	$15,%rdx
797	jnz	.Lctr32_abort
798	testq	$15,%rcx
799	jnz	.Lctr32_abort
800	leaq	.Lpadlock_saved_context(%rip),%rax
801	pushf
802	cld
803	call	_padlock_verify_ctx
804	leaq	16(%rdx),%rdx
805	xorl	%eax,%eax
806	xorl	%ebx,%ebx
807	testl	$32,(%rdx)
808	jnz	.Lctr32_aligned
809	testq	$0x0f,%rdi
810	setz	%al
811	testq	$0x0f,%rsi
812	setz	%bl
813	testl	%ebx,%eax
814	jnz	.Lctr32_aligned
815	negq	%rax
816	movq	$512,%rbx
817	notq	%rax
818	leaq	(%rsp),%rbp
819	cmpq	%rbx,%rcx
820	cmovcq	%rcx,%rbx
821	andq	%rbx,%rax
822	movq	%rcx,%rbx
823	negq	%rax
824	andq	$512-1,%rbx
825	leaq	(%rax,%rbp,1),%rsp
826	movq	$512,%rax
827	cmovzq	%rax,%rbx
828.Lctr32_reenter:
829	movl	-4(%rdx),%eax
830	bswapl	%eax
831	negl	%eax
832	andl	$31,%eax
833	movq	$512,%rbx
834	shll	$4,%eax
835	cmovzq	%rbx,%rax
836	cmpq	%rax,%rcx
837	cmovaq	%rax,%rbx
838	cmovbeq	%rcx,%rbx
839	cmpq	%rbx,%rcx
840	ja	.Lctr32_loop
841	movq	%rsi,%rax
842	cmpq	%rsp,%rbp
843	cmoveq	%rdi,%rax
844	addq	%rcx,%rax
845	negq	%rax
846	andq	$0xfff,%rax
847	cmpq	$32,%rax
848	movq	$-32,%rax
849	cmovaeq	%rbx,%rax
850	andq	%rax,%rbx
851	jz	.Lctr32_unaligned_tail
852	jmp	.Lctr32_loop
853.align	16
854.Lctr32_loop:
855	cmpq	%rcx,%rbx
856	cmovaq	%rcx,%rbx
857	movq	%rdi,%r8
858	movq	%rsi,%r9
859	movq	%rcx,%r10
860	movq	%rbx,%rcx
861	movq	%rbx,%r11
862	testq	$0x0f,%rdi
863	cmovnzq	%rsp,%rdi
864	testq	$0x0f,%rsi
865	jz	.Lctr32_inp_aligned
866	shrq	$3,%rcx
867.byte	0xf3,0x48,0xa5
868	subq	%rbx,%rdi
869	movq	%rbx,%rcx
870	movq	%rdi,%rsi
871.Lctr32_inp_aligned:
872	leaq	-16(%rdx),%rax
873	leaq	16(%rdx),%rbx
874	shrq	$4,%rcx
875.byte	0xf3,0x0f,0xa7,216
876	movl	-4(%rdx),%eax
877	testl	$0xffff0000,%eax
878	jnz	.Lctr32_no_carry
879	bswapl	%eax
880	addl	$0x10000,%eax
881	bswapl	%eax
882	movl	%eax,-4(%rdx)
883.Lctr32_no_carry:
884	movq	%r8,%rdi
885	movq	%r11,%rbx
886	testq	$0x0f,%rdi
887	jz	.Lctr32_out_aligned
888	movq	%rbx,%rcx
889	leaq	(%rsp),%rsi
890	shrq	$3,%rcx
891.byte	0xf3,0x48,0xa5
892	subq	%rbx,%rdi
893.Lctr32_out_aligned:
894	movq	%r9,%rsi
895	movq	%r10,%rcx
896	addq	%rbx,%rdi
897	addq	%rbx,%rsi
898	subq	%rbx,%rcx
899	movq	$512,%rbx
900	jz	.Lctr32_break
901	cmpq	%rbx,%rcx
902	jae	.Lctr32_loop
903	movq	%rcx,%rbx
904	movq	%rsi,%rax
905	cmpq	%rsp,%rbp
906	cmoveq	%rdi,%rax
907	addq	%rcx,%rax
908	negq	%rax
909	andq	$0xfff,%rax
910	cmpq	$32,%rax
911	movq	$-32,%rax
912	cmovaeq	%rbx,%rax
913	andq	%rax,%rbx
914	jnz	.Lctr32_loop
915.Lctr32_unaligned_tail:
916	xorl	%eax,%eax
917	cmpq	%rsp,%rbp
918	cmoveq	%rcx,%rax
919	movq	%rdi,%r8
920	movq	%rcx,%rbx
921	subq	%rax,%rsp
922	shrq	$3,%rcx
923	leaq	(%rsp),%rdi
924.byte	0xf3,0x48,0xa5
925	movq	%rsp,%rsi
926	movq	%r8,%rdi
927	movq	%rbx,%rcx
928	jmp	.Lctr32_loop
929.align	16
930.Lctr32_break:
931	cmpq	%rbp,%rsp
932	je	.Lctr32_done
933
934	pxor	%xmm0,%xmm0
935	leaq	(%rsp),%rax
936.Lctr32_bzero:
937	movaps	%xmm0,(%rax)
938	leaq	16(%rax),%rax
939	cmpq	%rax,%rbp
940	ja	.Lctr32_bzero
941
942.Lctr32_done:
943	leaq	(%rbp),%rsp
944	jmp	.Lctr32_exit
945
946.align	16
947.Lctr32_aligned:
948	movl	-4(%rdx),%eax
949	bswapl	%eax
950	negl	%eax
951	andl	$0xffff,%eax
952	movq	$1048576,%rbx
953	shll	$4,%eax
954	cmovzq	%rbx,%rax
955	cmpq	%rax,%rcx
956	cmovaq	%rax,%rbx
957	cmovbeq	%rcx,%rbx
958	jbe	.Lctr32_aligned_skip
959
960.Lctr32_aligned_loop:
961	movq	%rcx,%r10
962	movq	%rbx,%rcx
963	movq	%rbx,%r11
964
965	leaq	-16(%rdx),%rax
966	leaq	16(%rdx),%rbx
967	shrq	$4,%rcx
968.byte	0xf3,0x0f,0xa7,216
969
970	movl	-4(%rdx),%eax
971	bswapl	%eax
972	addl	$0x10000,%eax
973	bswapl	%eax
974	movl	%eax,-4(%rdx)
975
976	movq	%r10,%rcx
977	subq	%r11,%rcx
978	movq	$1048576,%rbx
979	jz	.Lctr32_exit
980	cmpq	%rbx,%rcx
981	jae	.Lctr32_aligned_loop
982
983.Lctr32_aligned_skip:
984	leaq	(%rsi,%rcx,1),%rbp
985	negq	%rbp
986	andq	$0xfff,%rbp
987	xorl	%eax,%eax
988	cmpq	$32,%rbp
989	movq	$32-1,%rbp
990	cmovaeq	%rax,%rbp
991	andq	%rcx,%rbp
992	subq	%rbp,%rcx
993	jz	.Lctr32_aligned_tail
994	leaq	-16(%rdx),%rax
995	leaq	16(%rdx),%rbx
996	shrq	$4,%rcx
997.byte	0xf3,0x0f,0xa7,216
998	testq	%rbp,%rbp
999	jz	.Lctr32_exit
1000
1001.Lctr32_aligned_tail:
1002	movq	%rdi,%r8
1003	movq	%rbp,%rbx
1004	movq	%rbp,%rcx
1005	leaq	(%rsp),%rbp
1006	subq	%rcx,%rsp
1007	shrq	$3,%rcx
1008	leaq	(%rsp),%rdi
1009.byte	0xf3,0x48,0xa5
1010	leaq	(%r8),%rdi
1011	leaq	(%rsp),%rsi
1012	movq	%rbx,%rcx
1013	jmp	.Lctr32_loop
1014.Lctr32_exit:
1015	movl	$1,%eax
1016	leaq	8(%rsp),%rsp
1017.Lctr32_abort:
1018	popq	%rbx
1019	popq	%rbp
1020	.byte	0xf3,0xc3
1021.size	padlock_ctr32_encrypt,.-padlock_ctr32_encrypt
1022.byte	86,73,65,32,80,97,100,108,111,99,107,32,120,56,54,95,54,52,32,109,111,100,117,108,101,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1023.align	16
1024.data
1025.align	8
1026.Lpadlock_saved_context:
1027.quad	0
1028