1#include <machine/asm.h>
2.text
3.globl	padlock_capability
4.type	padlock_capability,@function
5.align	16
6padlock_capability:
7	movq	%rbx,%r8
8	xorl	%eax,%eax
9	cpuid
10	xorl	%eax,%eax
11	cmpl	$0x746e6543,%ebx
12	jne	.Lzhaoxin
13	cmpl	$0x48727561,%edx
14	jne	.Lnoluck
15	cmpl	$0x736c7561,%ecx
16	jne	.Lnoluck
17	jmp	.LzhaoxinEnd
18.Lzhaoxin:
19	cmpl	$0x68532020,%ebx
20	jne	.Lnoluck
21	cmpl	$0x68676e61,%edx
22	jne	.Lnoluck
23	cmpl	$0x20206961,%ecx
24	jne	.Lnoluck
25.LzhaoxinEnd:
26	movl	$0xC0000000,%eax
27	cpuid
28	movl	%eax,%edx
29	xorl	%eax,%eax
30	cmpl	$0xC0000001,%edx
31	jb	.Lnoluck
32	movl	$0xC0000001,%eax
33	cpuid
34	movl	%edx,%eax
35	andl	$0xffffffef,%eax
36	orl	$0x10,%eax
37.Lnoluck:
38	movq	%r8,%rbx
39	.byte	0xf3,0xc3
40.size	padlock_capability,.-padlock_capability
41
42.globl	padlock_key_bswap
43.type	padlock_key_bswap,@function
44.align	16
45padlock_key_bswap:
46	movl	240(%rdi),%edx
47.Lbswap_loop:
48	movl	(%rdi),%eax
49	bswapl	%eax
50	movl	%eax,(%rdi)
51	leaq	4(%rdi),%rdi
52	subl	$1,%edx
53	jnz	.Lbswap_loop
54	.byte	0xf3,0xc3
55.size	padlock_key_bswap,.-padlock_key_bswap
56
57.globl	padlock_verify_context
58.type	padlock_verify_context,@function
59.align	16
60padlock_verify_context:
61	movq	%rdi,%rdx
62	pushf
63	leaq	.Lpadlock_saved_context(%rip),%rax
64	call	_padlock_verify_ctx
65	leaq	8(%rsp),%rsp
66	.byte	0xf3,0xc3
67.size	padlock_verify_context,.-padlock_verify_context
68
69.type	_padlock_verify_ctx,@function
70.align	16
71_padlock_verify_ctx:
72	movq	8(%rsp),%r8
73	btq	$30,%r8
74	jnc	.Lverified
75	cmpq	(%rax),%rdx
76	je	.Lverified
77	pushf
78	popf
79.Lverified:
80	movq	%rdx,(%rax)
81	.byte	0xf3,0xc3
82.size	_padlock_verify_ctx,.-_padlock_verify_ctx
83
84.globl	padlock_reload_key
85.type	padlock_reload_key,@function
86.align	16
87padlock_reload_key:
88	pushf
89	popf
90	.byte	0xf3,0xc3
91.size	padlock_reload_key,.-padlock_reload_key
92
93.globl	padlock_aes_block
94.type	padlock_aes_block,@function
95.align	16
96padlock_aes_block:
97	movq	%rbx,%r8
98	movq	$1,%rcx
99	leaq	32(%rdx),%rbx
100	leaq	16(%rdx),%rdx
101.byte	0xf3,0x0f,0xa7,0xc8
102	movq	%r8,%rbx
103	.byte	0xf3,0xc3
104.size	padlock_aes_block,.-padlock_aes_block
105
106.globl	padlock_xstore
107.type	padlock_xstore,@function
108.align	16
109padlock_xstore:
110	movl	%esi,%edx
111.byte	0x0f,0xa7,0xc0
112	.byte	0xf3,0xc3
113.size	padlock_xstore,.-padlock_xstore
114
115.globl	padlock_sha1_oneshot
116.type	padlock_sha1_oneshot,@function
117.align	16
118padlock_sha1_oneshot:
119	movq	%rdx,%rcx
120	movq	%rdi,%rdx
121	movups	(%rdi),%xmm0
122	subq	$128+8,%rsp
123	movl	16(%rdi),%eax
124	movaps	%xmm0,(%rsp)
125	movq	%rsp,%rdi
126	movl	%eax,16(%rsp)
127	xorq	%rax,%rax
128.byte	0xf3,0x0f,0xa6,0xc8
129	movaps	(%rsp),%xmm0
130	movl	16(%rsp),%eax
131	addq	$128+8,%rsp
132	movups	%xmm0,(%rdx)
133	movl	%eax,16(%rdx)
134	.byte	0xf3,0xc3
135.size	padlock_sha1_oneshot,.-padlock_sha1_oneshot
136
137.globl	padlock_sha1_blocks
138.type	padlock_sha1_blocks,@function
139.align	16
140padlock_sha1_blocks:
141	movq	%rdx,%rcx
142	movq	%rdi,%rdx
143	movups	(%rdi),%xmm0
144	subq	$128+8,%rsp
145	movl	16(%rdi),%eax
146	movaps	%xmm0,(%rsp)
147	movq	%rsp,%rdi
148	movl	%eax,16(%rsp)
149	movq	$-1,%rax
150.byte	0xf3,0x0f,0xa6,0xc8
151	movaps	(%rsp),%xmm0
152	movl	16(%rsp),%eax
153	addq	$128+8,%rsp
154	movups	%xmm0,(%rdx)
155	movl	%eax,16(%rdx)
156	.byte	0xf3,0xc3
157.size	padlock_sha1_blocks,.-padlock_sha1_blocks
158
159.globl	padlock_sha256_oneshot
160.type	padlock_sha256_oneshot,@function
161.align	16
162padlock_sha256_oneshot:
163	movq	%rdx,%rcx
164	movq	%rdi,%rdx
165	movups	(%rdi),%xmm0
166	subq	$128+8,%rsp
167	movups	16(%rdi),%xmm1
168	movaps	%xmm0,(%rsp)
169	movq	%rsp,%rdi
170	movaps	%xmm1,16(%rsp)
171	xorq	%rax,%rax
172.byte	0xf3,0x0f,0xa6,0xd0
173	movaps	(%rsp),%xmm0
174	movaps	16(%rsp),%xmm1
175	addq	$128+8,%rsp
176	movups	%xmm0,(%rdx)
177	movups	%xmm1,16(%rdx)
178	.byte	0xf3,0xc3
179.size	padlock_sha256_oneshot,.-padlock_sha256_oneshot
180
181.globl	padlock_sha256_blocks
182.type	padlock_sha256_blocks,@function
183.align	16
184padlock_sha256_blocks:
185	movq	%rdx,%rcx
186	movq	%rdi,%rdx
187	movups	(%rdi),%xmm0
188	subq	$128+8,%rsp
189	movups	16(%rdi),%xmm1
190	movaps	%xmm0,(%rsp)
191	movq	%rsp,%rdi
192	movaps	%xmm1,16(%rsp)
193	movq	$-1,%rax
194.byte	0xf3,0x0f,0xa6,0xd0
195	movaps	(%rsp),%xmm0
196	movaps	16(%rsp),%xmm1
197	addq	$128+8,%rsp
198	movups	%xmm0,(%rdx)
199	movups	%xmm1,16(%rdx)
200	.byte	0xf3,0xc3
201.size	padlock_sha256_blocks,.-padlock_sha256_blocks
202
203.globl	padlock_sha512_blocks
204.type	padlock_sha512_blocks,@function
205.align	16
206padlock_sha512_blocks:
207	movq	%rdx,%rcx
208	movq	%rdi,%rdx
209	movups	(%rdi),%xmm0
210	subq	$128+8,%rsp
211	movups	16(%rdi),%xmm1
212	movups	32(%rdi),%xmm2
213	movups	48(%rdi),%xmm3
214	movaps	%xmm0,(%rsp)
215	movq	%rsp,%rdi
216	movaps	%xmm1,16(%rsp)
217	movaps	%xmm2,32(%rsp)
218	movaps	%xmm3,48(%rsp)
219.byte	0xf3,0x0f,0xa6,0xe0
220	movaps	(%rsp),%xmm0
221	movaps	16(%rsp),%xmm1
222	movaps	32(%rsp),%xmm2
223	movaps	48(%rsp),%xmm3
224	addq	$128+8,%rsp
225	movups	%xmm0,(%rdx)
226	movups	%xmm1,16(%rdx)
227	movups	%xmm2,32(%rdx)
228	movups	%xmm3,48(%rdx)
229	.byte	0xf3,0xc3
230.size	padlock_sha512_blocks,.-padlock_sha512_blocks
231.globl	padlock_ecb_encrypt
232.type	padlock_ecb_encrypt,@function
233.align	16
234padlock_ecb_encrypt:
235	pushq	%rbp
236	pushq	%rbx
237
238	xorl	%eax,%eax
239	testq	$15,%rdx
240	jnz	.Lecb_abort
241	testq	$15,%rcx
242	jnz	.Lecb_abort
243	leaq	.Lpadlock_saved_context(%rip),%rax
244	pushf
245	cld
246	call	_padlock_verify_ctx
247	leaq	16(%rdx),%rdx
248	xorl	%eax,%eax
249	xorl	%ebx,%ebx
250	testl	$32,(%rdx)
251	jnz	.Lecb_aligned
252	testq	$0x0f,%rdi
253	setz	%al
254	testq	$0x0f,%rsi
255	setz	%bl
256	testl	%ebx,%eax
257	jnz	.Lecb_aligned
258	negq	%rax
259	movq	$512,%rbx
260	notq	%rax
261	leaq	(%rsp),%rbp
262	cmpq	%rbx,%rcx
263	cmovcq	%rcx,%rbx
264	andq	%rbx,%rax
265	movq	%rcx,%rbx
266	negq	%rax
267	andq	$512-1,%rbx
268	leaq	(%rax,%rbp,1),%rsp
269	movq	$512,%rax
270	cmovzq	%rax,%rbx
271	cmpq	%rbx,%rcx
272	ja	.Lecb_loop
273	movq	%rsi,%rax
274	cmpq	%rsp,%rbp
275	cmoveq	%rdi,%rax
276	addq	%rcx,%rax
277	negq	%rax
278	andq	$0xfff,%rax
279	cmpq	$128,%rax
280	movq	$-128,%rax
281	cmovaeq	%rbx,%rax
282	andq	%rax,%rbx
283	jz	.Lecb_unaligned_tail
284	jmp	.Lecb_loop
285.align	16
286.Lecb_loop:
287	cmpq	%rcx,%rbx
288	cmovaq	%rcx,%rbx
289	movq	%rdi,%r8
290	movq	%rsi,%r9
291	movq	%rcx,%r10
292	movq	%rbx,%rcx
293	movq	%rbx,%r11
294	testq	$0x0f,%rdi
295	cmovnzq	%rsp,%rdi
296	testq	$0x0f,%rsi
297	jz	.Lecb_inp_aligned
298	shrq	$3,%rcx
299.byte	0xf3,0x48,0xa5
300	subq	%rbx,%rdi
301	movq	%rbx,%rcx
302	movq	%rdi,%rsi
303.Lecb_inp_aligned:
304	leaq	-16(%rdx),%rax
305	leaq	16(%rdx),%rbx
306	shrq	$4,%rcx
307.byte	0xf3,0x0f,0xa7,200
308	movq	%r8,%rdi
309	movq	%r11,%rbx
310	testq	$0x0f,%rdi
311	jz	.Lecb_out_aligned
312	movq	%rbx,%rcx
313	leaq	(%rsp),%rsi
314	shrq	$3,%rcx
315.byte	0xf3,0x48,0xa5
316	subq	%rbx,%rdi
317.Lecb_out_aligned:
318	movq	%r9,%rsi
319	movq	%r10,%rcx
320	addq	%rbx,%rdi
321	addq	%rbx,%rsi
322	subq	%rbx,%rcx
323	movq	$512,%rbx
324	jz	.Lecb_break
325	cmpq	%rbx,%rcx
326	jae	.Lecb_loop
327.Lecb_unaligned_tail:
328	xorl	%eax,%eax
329	cmpq	%rsp,%rbp
330	cmoveq	%rcx,%rax
331	movq	%rdi,%r8
332	movq	%rcx,%rbx
333	subq	%rax,%rsp
334	shrq	$3,%rcx
335	leaq	(%rsp),%rdi
336.byte	0xf3,0x48,0xa5
337	movq	%rsp,%rsi
338	movq	%r8,%rdi
339	movq	%rbx,%rcx
340	jmp	.Lecb_loop
341.align	16
342.Lecb_break:
343	cmpq	%rbp,%rsp
344	je	.Lecb_done
345
346	pxor	%xmm0,%xmm0
347	leaq	(%rsp),%rax
348.Lecb_bzero:
349	movaps	%xmm0,(%rax)
350	leaq	16(%rax),%rax
351	cmpq	%rax,%rbp
352	ja	.Lecb_bzero
353
354.Lecb_done:
355	leaq	(%rbp),%rsp
356	jmp	.Lecb_exit
357
358.align	16
359.Lecb_aligned:
360	leaq	(%rsi,%rcx,1),%rbp
361	negq	%rbp
362	andq	$0xfff,%rbp
363	xorl	%eax,%eax
364	cmpq	$128,%rbp
365	movq	$128-1,%rbp
366	cmovaeq	%rax,%rbp
367	andq	%rcx,%rbp
368	subq	%rbp,%rcx
369	jz	.Lecb_aligned_tail
370	leaq	-16(%rdx),%rax
371	leaq	16(%rdx),%rbx
372	shrq	$4,%rcx
373.byte	0xf3,0x0f,0xa7,200
374	testq	%rbp,%rbp
375	jz	.Lecb_exit
376
377.Lecb_aligned_tail:
378	movq	%rdi,%r8
379	movq	%rbp,%rbx
380	movq	%rbp,%rcx
381	leaq	(%rsp),%rbp
382	subq	%rcx,%rsp
383	shrq	$3,%rcx
384	leaq	(%rsp),%rdi
385.byte	0xf3,0x48,0xa5
386	leaq	(%r8),%rdi
387	leaq	(%rsp),%rsi
388	movq	%rbx,%rcx
389	jmp	.Lecb_loop
390.Lecb_exit:
391	movl	$1,%eax
392	leaq	8(%rsp),%rsp
393.Lecb_abort:
394	popq	%rbx
395	popq	%rbp
396	.byte	0xf3,0xc3
397.size	padlock_ecb_encrypt,.-padlock_ecb_encrypt
398.globl	padlock_cbc_encrypt
399.type	padlock_cbc_encrypt,@function
400.align	16
401padlock_cbc_encrypt:
402	pushq	%rbp
403	pushq	%rbx
404
405	xorl	%eax,%eax
406	testq	$15,%rdx
407	jnz	.Lcbc_abort
408	testq	$15,%rcx
409	jnz	.Lcbc_abort
410	leaq	.Lpadlock_saved_context(%rip),%rax
411	pushf
412	cld
413	call	_padlock_verify_ctx
414	leaq	16(%rdx),%rdx
415	xorl	%eax,%eax
416	xorl	%ebx,%ebx
417	testl	$32,(%rdx)
418	jnz	.Lcbc_aligned
419	testq	$0x0f,%rdi
420	setz	%al
421	testq	$0x0f,%rsi
422	setz	%bl
423	testl	%ebx,%eax
424	jnz	.Lcbc_aligned
425	negq	%rax
426	movq	$512,%rbx
427	notq	%rax
428	leaq	(%rsp),%rbp
429	cmpq	%rbx,%rcx
430	cmovcq	%rcx,%rbx
431	andq	%rbx,%rax
432	movq	%rcx,%rbx
433	negq	%rax
434	andq	$512-1,%rbx
435	leaq	(%rax,%rbp,1),%rsp
436	movq	$512,%rax
437	cmovzq	%rax,%rbx
438	cmpq	%rbx,%rcx
439	ja	.Lcbc_loop
440	movq	%rsi,%rax
441	cmpq	%rsp,%rbp
442	cmoveq	%rdi,%rax
443	addq	%rcx,%rax
444	negq	%rax
445	andq	$0xfff,%rax
446	cmpq	$64,%rax
447	movq	$-64,%rax
448	cmovaeq	%rbx,%rax
449	andq	%rax,%rbx
450	jz	.Lcbc_unaligned_tail
451	jmp	.Lcbc_loop
452.align	16
453.Lcbc_loop:
454	cmpq	%rcx,%rbx
455	cmovaq	%rcx,%rbx
456	movq	%rdi,%r8
457	movq	%rsi,%r9
458	movq	%rcx,%r10
459	movq	%rbx,%rcx
460	movq	%rbx,%r11
461	testq	$0x0f,%rdi
462	cmovnzq	%rsp,%rdi
463	testq	$0x0f,%rsi
464	jz	.Lcbc_inp_aligned
465	shrq	$3,%rcx
466.byte	0xf3,0x48,0xa5
467	subq	%rbx,%rdi
468	movq	%rbx,%rcx
469	movq	%rdi,%rsi
470.Lcbc_inp_aligned:
471	leaq	-16(%rdx),%rax
472	leaq	16(%rdx),%rbx
473	shrq	$4,%rcx
474.byte	0xf3,0x0f,0xa7,208
475	movdqa	(%rax),%xmm0
476	movdqa	%xmm0,-16(%rdx)
477	movq	%r8,%rdi
478	movq	%r11,%rbx
479	testq	$0x0f,%rdi
480	jz	.Lcbc_out_aligned
481	movq	%rbx,%rcx
482	leaq	(%rsp),%rsi
483	shrq	$3,%rcx
484.byte	0xf3,0x48,0xa5
485	subq	%rbx,%rdi
486.Lcbc_out_aligned:
487	movq	%r9,%rsi
488	movq	%r10,%rcx
489	addq	%rbx,%rdi
490	addq	%rbx,%rsi
491	subq	%rbx,%rcx
492	movq	$512,%rbx
493	jz	.Lcbc_break
494	cmpq	%rbx,%rcx
495	jae	.Lcbc_loop
496.Lcbc_unaligned_tail:
497	xorl	%eax,%eax
498	cmpq	%rsp,%rbp
499	cmoveq	%rcx,%rax
500	movq	%rdi,%r8
501	movq	%rcx,%rbx
502	subq	%rax,%rsp
503	shrq	$3,%rcx
504	leaq	(%rsp),%rdi
505.byte	0xf3,0x48,0xa5
506	movq	%rsp,%rsi
507	movq	%r8,%rdi
508	movq	%rbx,%rcx
509	jmp	.Lcbc_loop
510.align	16
511.Lcbc_break:
512	cmpq	%rbp,%rsp
513	je	.Lcbc_done
514
515	pxor	%xmm0,%xmm0
516	leaq	(%rsp),%rax
517.Lcbc_bzero:
518	movaps	%xmm0,(%rax)
519	leaq	16(%rax),%rax
520	cmpq	%rax,%rbp
521	ja	.Lcbc_bzero
522
523.Lcbc_done:
524	leaq	(%rbp),%rsp
525	jmp	.Lcbc_exit
526
527.align	16
528.Lcbc_aligned:
529	leaq	(%rsi,%rcx,1),%rbp
530	negq	%rbp
531	andq	$0xfff,%rbp
532	xorl	%eax,%eax
533	cmpq	$64,%rbp
534	movq	$64-1,%rbp
535	cmovaeq	%rax,%rbp
536	andq	%rcx,%rbp
537	subq	%rbp,%rcx
538	jz	.Lcbc_aligned_tail
539	leaq	-16(%rdx),%rax
540	leaq	16(%rdx),%rbx
541	shrq	$4,%rcx
542.byte	0xf3,0x0f,0xa7,208
543	movdqa	(%rax),%xmm0
544	movdqa	%xmm0,-16(%rdx)
545	testq	%rbp,%rbp
546	jz	.Lcbc_exit
547
548.Lcbc_aligned_tail:
549	movq	%rdi,%r8
550	movq	%rbp,%rbx
551	movq	%rbp,%rcx
552	leaq	(%rsp),%rbp
553	subq	%rcx,%rsp
554	shrq	$3,%rcx
555	leaq	(%rsp),%rdi
556.byte	0xf3,0x48,0xa5
557	leaq	(%r8),%rdi
558	leaq	(%rsp),%rsi
559	movq	%rbx,%rcx
560	jmp	.Lcbc_loop
561.Lcbc_exit:
562	movl	$1,%eax
563	leaq	8(%rsp),%rsp
564.Lcbc_abort:
565	popq	%rbx
566	popq	%rbp
567	.byte	0xf3,0xc3
568.size	padlock_cbc_encrypt,.-padlock_cbc_encrypt
569.globl	padlock_cfb_encrypt
570.type	padlock_cfb_encrypt,@function
571.align	16
572padlock_cfb_encrypt:
573	pushq	%rbp
574	pushq	%rbx
575
576	xorl	%eax,%eax
577	testq	$15,%rdx
578	jnz	.Lcfb_abort
579	testq	$15,%rcx
580	jnz	.Lcfb_abort
581	leaq	.Lpadlock_saved_context(%rip),%rax
582	pushf
583	cld
584	call	_padlock_verify_ctx
585	leaq	16(%rdx),%rdx
586	xorl	%eax,%eax
587	xorl	%ebx,%ebx
588	testl	$32,(%rdx)
589	jnz	.Lcfb_aligned
590	testq	$0x0f,%rdi
591	setz	%al
592	testq	$0x0f,%rsi
593	setz	%bl
594	testl	%ebx,%eax
595	jnz	.Lcfb_aligned
596	negq	%rax
597	movq	$512,%rbx
598	notq	%rax
599	leaq	(%rsp),%rbp
600	cmpq	%rbx,%rcx
601	cmovcq	%rcx,%rbx
602	andq	%rbx,%rax
603	movq	%rcx,%rbx
604	negq	%rax
605	andq	$512-1,%rbx
606	leaq	(%rax,%rbp,1),%rsp
607	movq	$512,%rax
608	cmovzq	%rax,%rbx
609	jmp	.Lcfb_loop
610.align	16
611.Lcfb_loop:
612	cmpq	%rcx,%rbx
613	cmovaq	%rcx,%rbx
614	movq	%rdi,%r8
615	movq	%rsi,%r9
616	movq	%rcx,%r10
617	movq	%rbx,%rcx
618	movq	%rbx,%r11
619	testq	$0x0f,%rdi
620	cmovnzq	%rsp,%rdi
621	testq	$0x0f,%rsi
622	jz	.Lcfb_inp_aligned
623	shrq	$3,%rcx
624.byte	0xf3,0x48,0xa5
625	subq	%rbx,%rdi
626	movq	%rbx,%rcx
627	movq	%rdi,%rsi
628.Lcfb_inp_aligned:
629	leaq	-16(%rdx),%rax
630	leaq	16(%rdx),%rbx
631	shrq	$4,%rcx
632.byte	0xf3,0x0f,0xa7,224
633	movdqa	(%rax),%xmm0
634	movdqa	%xmm0,-16(%rdx)
635	movq	%r8,%rdi
636	movq	%r11,%rbx
637	testq	$0x0f,%rdi
638	jz	.Lcfb_out_aligned
639	movq	%rbx,%rcx
640	leaq	(%rsp),%rsi
641	shrq	$3,%rcx
642.byte	0xf3,0x48,0xa5
643	subq	%rbx,%rdi
644.Lcfb_out_aligned:
645	movq	%r9,%rsi
646	movq	%r10,%rcx
647	addq	%rbx,%rdi
648	addq	%rbx,%rsi
649	subq	%rbx,%rcx
650	movq	$512,%rbx
651	jnz	.Lcfb_loop
652	cmpq	%rbp,%rsp
653	je	.Lcfb_done
654
655	pxor	%xmm0,%xmm0
656	leaq	(%rsp),%rax
657.Lcfb_bzero:
658	movaps	%xmm0,(%rax)
659	leaq	16(%rax),%rax
660	cmpq	%rax,%rbp
661	ja	.Lcfb_bzero
662
663.Lcfb_done:
664	leaq	(%rbp),%rsp
665	jmp	.Lcfb_exit
666
667.align	16
668.Lcfb_aligned:
669	leaq	-16(%rdx),%rax
670	leaq	16(%rdx),%rbx
671	shrq	$4,%rcx
672.byte	0xf3,0x0f,0xa7,224
673	movdqa	(%rax),%xmm0
674	movdqa	%xmm0,-16(%rdx)
675.Lcfb_exit:
676	movl	$1,%eax
677	leaq	8(%rsp),%rsp
678.Lcfb_abort:
679	popq	%rbx
680	popq	%rbp
681	.byte	0xf3,0xc3
682.size	padlock_cfb_encrypt,.-padlock_cfb_encrypt
683.globl	padlock_ofb_encrypt
684.type	padlock_ofb_encrypt,@function
685.align	16
686padlock_ofb_encrypt:
687	pushq	%rbp
688	pushq	%rbx
689
690	xorl	%eax,%eax
691	testq	$15,%rdx
692	jnz	.Lofb_abort
693	testq	$15,%rcx
694	jnz	.Lofb_abort
695	leaq	.Lpadlock_saved_context(%rip),%rax
696	pushf
697	cld
698	call	_padlock_verify_ctx
699	leaq	16(%rdx),%rdx
700	xorl	%eax,%eax
701	xorl	%ebx,%ebx
702	testl	$32,(%rdx)
703	jnz	.Lofb_aligned
704	testq	$0x0f,%rdi
705	setz	%al
706	testq	$0x0f,%rsi
707	setz	%bl
708	testl	%ebx,%eax
709	jnz	.Lofb_aligned
710	negq	%rax
711	movq	$512,%rbx
712	notq	%rax
713	leaq	(%rsp),%rbp
714	cmpq	%rbx,%rcx
715	cmovcq	%rcx,%rbx
716	andq	%rbx,%rax
717	movq	%rcx,%rbx
718	negq	%rax
719	andq	$512-1,%rbx
720	leaq	(%rax,%rbp,1),%rsp
721	movq	$512,%rax
722	cmovzq	%rax,%rbx
723	jmp	.Lofb_loop
724.align	16
725.Lofb_loop:
726	cmpq	%rcx,%rbx
727	cmovaq	%rcx,%rbx
728	movq	%rdi,%r8
729	movq	%rsi,%r9
730	movq	%rcx,%r10
731	movq	%rbx,%rcx
732	movq	%rbx,%r11
733	testq	$0x0f,%rdi
734	cmovnzq	%rsp,%rdi
735	testq	$0x0f,%rsi
736	jz	.Lofb_inp_aligned
737	shrq	$3,%rcx
738.byte	0xf3,0x48,0xa5
739	subq	%rbx,%rdi
740	movq	%rbx,%rcx
741	movq	%rdi,%rsi
742.Lofb_inp_aligned:
743	leaq	-16(%rdx),%rax
744	leaq	16(%rdx),%rbx
745	shrq	$4,%rcx
746.byte	0xf3,0x0f,0xa7,232
747	movdqa	(%rax),%xmm0
748	movdqa	%xmm0,-16(%rdx)
749	movq	%r8,%rdi
750	movq	%r11,%rbx
751	testq	$0x0f,%rdi
752	jz	.Lofb_out_aligned
753	movq	%rbx,%rcx
754	leaq	(%rsp),%rsi
755	shrq	$3,%rcx
756.byte	0xf3,0x48,0xa5
757	subq	%rbx,%rdi
758.Lofb_out_aligned:
759	movq	%r9,%rsi
760	movq	%r10,%rcx
761	addq	%rbx,%rdi
762	addq	%rbx,%rsi
763	subq	%rbx,%rcx
764	movq	$512,%rbx
765	jnz	.Lofb_loop
766	cmpq	%rbp,%rsp
767	je	.Lofb_done
768
769	pxor	%xmm0,%xmm0
770	leaq	(%rsp),%rax
771.Lofb_bzero:
772	movaps	%xmm0,(%rax)
773	leaq	16(%rax),%rax
774	cmpq	%rax,%rbp
775	ja	.Lofb_bzero
776
777.Lofb_done:
778	leaq	(%rbp),%rsp
779	jmp	.Lofb_exit
780
781.align	16
782.Lofb_aligned:
783	leaq	-16(%rdx),%rax
784	leaq	16(%rdx),%rbx
785	shrq	$4,%rcx
786.byte	0xf3,0x0f,0xa7,232
787	movdqa	(%rax),%xmm0
788	movdqa	%xmm0,-16(%rdx)
789.Lofb_exit:
790	movl	$1,%eax
791	leaq	8(%rsp),%rsp
792.Lofb_abort:
793	popq	%rbx
794	popq	%rbp
795	.byte	0xf3,0xc3
796.size	padlock_ofb_encrypt,.-padlock_ofb_encrypt
797.globl	padlock_ctr32_encrypt
798.type	padlock_ctr32_encrypt,@function
799.align	16
800padlock_ctr32_encrypt:
801	pushq	%rbp
802	pushq	%rbx
803
804	xorl	%eax,%eax
805	testq	$15,%rdx
806	jnz	.Lctr32_abort
807	testq	$15,%rcx
808	jnz	.Lctr32_abort
809	leaq	.Lpadlock_saved_context(%rip),%rax
810	pushf
811	cld
812	call	_padlock_verify_ctx
813	leaq	16(%rdx),%rdx
814	xorl	%eax,%eax
815	xorl	%ebx,%ebx
816	testl	$32,(%rdx)
817	jnz	.Lctr32_aligned
818	testq	$0x0f,%rdi
819	setz	%al
820	testq	$0x0f,%rsi
821	setz	%bl
822	testl	%ebx,%eax
823	jnz	.Lctr32_aligned
824	negq	%rax
825	movq	$512,%rbx
826	notq	%rax
827	leaq	(%rsp),%rbp
828	cmpq	%rbx,%rcx
829	cmovcq	%rcx,%rbx
830	andq	%rbx,%rax
831	movq	%rcx,%rbx
832	negq	%rax
833	andq	$512-1,%rbx
834	leaq	(%rax,%rbp,1),%rsp
835	movq	$512,%rax
836	cmovzq	%rax,%rbx
837.Lctr32_reenter:
838	movl	-4(%rdx),%eax
839	bswapl	%eax
840	negl	%eax
841	andl	$31,%eax
842	movq	$512,%rbx
843	shll	$4,%eax
844	cmovzq	%rbx,%rax
845	cmpq	%rax,%rcx
846	cmovaq	%rax,%rbx
847	cmovbeq	%rcx,%rbx
848	cmpq	%rbx,%rcx
849	ja	.Lctr32_loop
850	movq	%rsi,%rax
851	cmpq	%rsp,%rbp
852	cmoveq	%rdi,%rax
853	addq	%rcx,%rax
854	negq	%rax
855	andq	$0xfff,%rax
856	cmpq	$32,%rax
857	movq	$-32,%rax
858	cmovaeq	%rbx,%rax
859	andq	%rax,%rbx
860	jz	.Lctr32_unaligned_tail
861	jmp	.Lctr32_loop
862.align	16
863.Lctr32_loop:
864	cmpq	%rcx,%rbx
865	cmovaq	%rcx,%rbx
866	movq	%rdi,%r8
867	movq	%rsi,%r9
868	movq	%rcx,%r10
869	movq	%rbx,%rcx
870	movq	%rbx,%r11
871	testq	$0x0f,%rdi
872	cmovnzq	%rsp,%rdi
873	testq	$0x0f,%rsi
874	jz	.Lctr32_inp_aligned
875	shrq	$3,%rcx
876.byte	0xf3,0x48,0xa5
877	subq	%rbx,%rdi
878	movq	%rbx,%rcx
879	movq	%rdi,%rsi
880.Lctr32_inp_aligned:
881	leaq	-16(%rdx),%rax
882	leaq	16(%rdx),%rbx
883	shrq	$4,%rcx
884.byte	0xf3,0x0f,0xa7,216
885	movl	-4(%rdx),%eax
886	testl	$0xffff0000,%eax
887	jnz	.Lctr32_no_carry
888	bswapl	%eax
889	addl	$0x10000,%eax
890	bswapl	%eax
891	movl	%eax,-4(%rdx)
892.Lctr32_no_carry:
893	movq	%r8,%rdi
894	movq	%r11,%rbx
895	testq	$0x0f,%rdi
896	jz	.Lctr32_out_aligned
897	movq	%rbx,%rcx
898	leaq	(%rsp),%rsi
899	shrq	$3,%rcx
900.byte	0xf3,0x48,0xa5
901	subq	%rbx,%rdi
902.Lctr32_out_aligned:
903	movq	%r9,%rsi
904	movq	%r10,%rcx
905	addq	%rbx,%rdi
906	addq	%rbx,%rsi
907	subq	%rbx,%rcx
908	movq	$512,%rbx
909	jz	.Lctr32_break
910	cmpq	%rbx,%rcx
911	jae	.Lctr32_loop
912	movq	%rcx,%rbx
913	movq	%rsi,%rax
914	cmpq	%rsp,%rbp
915	cmoveq	%rdi,%rax
916	addq	%rcx,%rax
917	negq	%rax
918	andq	$0xfff,%rax
919	cmpq	$32,%rax
920	movq	$-32,%rax
921	cmovaeq	%rbx,%rax
922	andq	%rax,%rbx
923	jnz	.Lctr32_loop
924.Lctr32_unaligned_tail:
925	xorl	%eax,%eax
926	cmpq	%rsp,%rbp
927	cmoveq	%rcx,%rax
928	movq	%rdi,%r8
929	movq	%rcx,%rbx
930	subq	%rax,%rsp
931	shrq	$3,%rcx
932	leaq	(%rsp),%rdi
933.byte	0xf3,0x48,0xa5
934	movq	%rsp,%rsi
935	movq	%r8,%rdi
936	movq	%rbx,%rcx
937	jmp	.Lctr32_loop
938.align	16
939.Lctr32_break:
940	cmpq	%rbp,%rsp
941	je	.Lctr32_done
942
943	pxor	%xmm0,%xmm0
944	leaq	(%rsp),%rax
945.Lctr32_bzero:
946	movaps	%xmm0,(%rax)
947	leaq	16(%rax),%rax
948	cmpq	%rax,%rbp
949	ja	.Lctr32_bzero
950
951.Lctr32_done:
952	leaq	(%rbp),%rsp
953	jmp	.Lctr32_exit
954
955.align	16
956.Lctr32_aligned:
957	movl	-4(%rdx),%eax
958	bswapl	%eax
959	negl	%eax
960	andl	$0xffff,%eax
961	movq	$1048576,%rbx
962	shll	$4,%eax
963	cmovzq	%rbx,%rax
964	cmpq	%rax,%rcx
965	cmovaq	%rax,%rbx
966	cmovbeq	%rcx,%rbx
967	jbe	.Lctr32_aligned_skip
968
969.Lctr32_aligned_loop:
970	movq	%rcx,%r10
971	movq	%rbx,%rcx
972	movq	%rbx,%r11
973
974	leaq	-16(%rdx),%rax
975	leaq	16(%rdx),%rbx
976	shrq	$4,%rcx
977.byte	0xf3,0x0f,0xa7,216
978
979	movl	-4(%rdx),%eax
980	bswapl	%eax
981	addl	$0x10000,%eax
982	bswapl	%eax
983	movl	%eax,-4(%rdx)
984
985	movq	%r10,%rcx
986	subq	%r11,%rcx
987	movq	$1048576,%rbx
988	jz	.Lctr32_exit
989	cmpq	%rbx,%rcx
990	jae	.Lctr32_aligned_loop
991
992.Lctr32_aligned_skip:
993	leaq	(%rsi,%rcx,1),%rbp
994	negq	%rbp
995	andq	$0xfff,%rbp
996	xorl	%eax,%eax
997	cmpq	$32,%rbp
998	movq	$32-1,%rbp
999	cmovaeq	%rax,%rbp
1000	andq	%rcx,%rbp
1001	subq	%rbp,%rcx
1002	jz	.Lctr32_aligned_tail
1003	leaq	-16(%rdx),%rax
1004	leaq	16(%rdx),%rbx
1005	shrq	$4,%rcx
1006.byte	0xf3,0x0f,0xa7,216
1007	testq	%rbp,%rbp
1008	jz	.Lctr32_exit
1009
1010.Lctr32_aligned_tail:
1011	movq	%rdi,%r8
1012	movq	%rbp,%rbx
1013	movq	%rbp,%rcx
1014	leaq	(%rsp),%rbp
1015	subq	%rcx,%rsp
1016	shrq	$3,%rcx
1017	leaq	(%rsp),%rdi
1018.byte	0xf3,0x48,0xa5
1019	leaq	(%r8),%rdi
1020	leaq	(%rsp),%rsi
1021	movq	%rbx,%rcx
1022	jmp	.Lctr32_loop
1023.Lctr32_exit:
1024	movl	$1,%eax
1025	leaq	8(%rsp),%rsp
1026.Lctr32_abort:
1027	popq	%rbx
1028	popq	%rbp
1029	.byte	0xf3,0xc3
1030.size	padlock_ctr32_encrypt,.-padlock_ctr32_encrypt
1031.byte	86,73,65,32,80,97,100,108,111,99,107,32,120,56,54,95,54,52,32,109,111,100,117,108,101,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1032.align	16
1033.data
1034.align	8
1035.Lpadlock_saved_context:
1036.quad	0
1037