1	# $FreeBSD$
2.file	"rc4-586.s"
3.text
4.globl	RC4
5.type	RC4,@function
6.align	16
7RC4:
8.L_RC4_begin:
9	pushl	%ebp
10	pushl	%ebx
11	pushl	%esi
12	pushl	%edi
13	movl	20(%esp),%edi
14	movl	24(%esp),%edx
15	movl	28(%esp),%esi
16	movl	32(%esp),%ebp
17	xorl	%eax,%eax
18	xorl	%ebx,%ebx
19	cmpl	$0,%edx
20	je	.L000abort
21	movb	(%edi),%al
22	movb	4(%edi),%bl
23	addl	$8,%edi
24	leal	(%esi,%edx,1),%ecx
25	subl	%esi,%ebp
26	movl	%ecx,24(%esp)
27	incb	%al
28	cmpl	$-1,256(%edi)
29	je	.L001RC4_CHAR
30	movl	(%edi,%eax,4),%ecx
31	andl	$-4,%edx
32	jz	.L002loop1
33	testl	$-8,%edx
34	movl	%ebp,32(%esp)
35	jz	.L003go4loop4
36	leal	OPENSSL_ia32cap_P,%ebp
37	btl	$26,(%ebp)
38	jnc	.L003go4loop4
39	movl	32(%esp),%ebp
40	andl	$-8,%edx
41	leal	-8(%esi,%edx,1),%edx
42	movl	%edx,-4(%edi)
43	addb	%cl,%bl
44	movl	(%edi,%ebx,4),%edx
45	movl	%ecx,(%edi,%ebx,4)
46	movl	%edx,(%edi,%eax,4)
47	incl	%eax
48	addl	%ecx,%edx
49	movzbl	%al,%eax
50	movzbl	%dl,%edx
51	movq	(%esi),%mm0
52	movl	(%edi,%eax,4),%ecx
53	movd	(%edi,%edx,4),%mm2
54	jmp	.L004loop_mmx_enter
55.align	16
56.L005loop_mmx:
57	addb	%cl,%bl
58	psllq	$56,%mm1
59	movl	(%edi,%ebx,4),%edx
60	movl	%ecx,(%edi,%ebx,4)
61	movl	%edx,(%edi,%eax,4)
62	incl	%eax
63	addl	%ecx,%edx
64	movzbl	%al,%eax
65	movzbl	%dl,%edx
66	pxor	%mm1,%mm2
67	movq	(%esi),%mm0
68	movq	%mm2,-8(%ebp,%esi,1)
69	movl	(%edi,%eax,4),%ecx
70	movd	(%edi,%edx,4),%mm2
71.L004loop_mmx_enter:
72	addb	%cl,%bl
73	movl	(%edi,%ebx,4),%edx
74	movl	%ecx,(%edi,%ebx,4)
75	movl	%edx,(%edi,%eax,4)
76	incl	%eax
77	addl	%ecx,%edx
78	movzbl	%al,%eax
79	movzbl	%dl,%edx
80	pxor	%mm0,%mm2
81	movl	(%edi,%eax,4),%ecx
82	movd	(%edi,%edx,4),%mm1
83	addb	%cl,%bl
84	psllq	$8,%mm1
85	movl	(%edi,%ebx,4),%edx
86	movl	%ecx,(%edi,%ebx,4)
87	movl	%edx,(%edi,%eax,4)
88	incl	%eax
89	addl	%ecx,%edx
90	movzbl	%al,%eax
91	movzbl	%dl,%edx
92	pxor	%mm1,%mm2
93	movl	(%edi,%eax,4),%ecx
94	movd	(%edi,%edx,4),%mm1
95	addb	%cl,%bl
96	psllq	$16,%mm1
97	movl	(%edi,%ebx,4),%edx
98	movl	%ecx,(%edi,%ebx,4)
99	movl	%edx,(%edi,%eax,4)
100	incl	%eax
101	addl	%ecx,%edx
102	movzbl	%al,%eax
103	movzbl	%dl,%edx
104	pxor	%mm1,%mm2
105	movl	(%edi,%eax,4),%ecx
106	movd	(%edi,%edx,4),%mm1
107	addb	%cl,%bl
108	psllq	$24,%mm1
109	movl	(%edi,%ebx,4),%edx
110	movl	%ecx,(%edi,%ebx,4)
111	movl	%edx,(%edi,%eax,4)
112	incl	%eax
113	addl	%ecx,%edx
114	movzbl	%al,%eax
115	movzbl	%dl,%edx
116	pxor	%mm1,%mm2
117	movl	(%edi,%eax,4),%ecx
118	movd	(%edi,%edx,4),%mm1
119	addb	%cl,%bl
120	psllq	$32,%mm1
121	movl	(%edi,%ebx,4),%edx
122	movl	%ecx,(%edi,%ebx,4)
123	movl	%edx,(%edi,%eax,4)
124	incl	%eax
125	addl	%ecx,%edx
126	movzbl	%al,%eax
127	movzbl	%dl,%edx
128	pxor	%mm1,%mm2
129	movl	(%edi,%eax,4),%ecx
130	movd	(%edi,%edx,4),%mm1
131	addb	%cl,%bl
132	psllq	$40,%mm1
133	movl	(%edi,%ebx,4),%edx
134	movl	%ecx,(%edi,%ebx,4)
135	movl	%edx,(%edi,%eax,4)
136	incl	%eax
137	addl	%ecx,%edx
138	movzbl	%al,%eax
139	movzbl	%dl,%edx
140	pxor	%mm1,%mm2
141	movl	(%edi,%eax,4),%ecx
142	movd	(%edi,%edx,4),%mm1
143	addb	%cl,%bl
144	psllq	$48,%mm1
145	movl	(%edi,%ebx,4),%edx
146	movl	%ecx,(%edi,%ebx,4)
147	movl	%edx,(%edi,%eax,4)
148	incl	%eax
149	addl	%ecx,%edx
150	movzbl	%al,%eax
151	movzbl	%dl,%edx
152	pxor	%mm1,%mm2
153	movl	(%edi,%eax,4),%ecx
154	movd	(%edi,%edx,4),%mm1
155	movl	%ebx,%edx
156	xorl	%ebx,%ebx
157	movb	%dl,%bl
158	cmpl	-4(%edi),%esi
159	leal	8(%esi),%esi
160	jb	.L005loop_mmx
161	psllq	$56,%mm1
162	pxor	%mm1,%mm2
163	movq	%mm2,-8(%ebp,%esi,1)
164	emms
165	cmpl	24(%esp),%esi
166	je	.L006done
167	jmp	.L002loop1
168.align	16
169.L003go4loop4:
170	leal	-4(%esi,%edx,1),%edx
171	movl	%edx,28(%esp)
172.L007loop4:
173	addb	%cl,%bl
174	movl	(%edi,%ebx,4),%edx
175	movl	%ecx,(%edi,%ebx,4)
176	movl	%edx,(%edi,%eax,4)
177	addl	%ecx,%edx
178	incb	%al
179	andl	$255,%edx
180	movl	(%edi,%eax,4),%ecx
181	movl	(%edi,%edx,4),%ebp
182	addb	%cl,%bl
183	movl	(%edi,%ebx,4),%edx
184	movl	%ecx,(%edi,%ebx,4)
185	movl	%edx,(%edi,%eax,4)
186	addl	%ecx,%edx
187	incb	%al
188	andl	$255,%edx
189	rorl	$8,%ebp
190	movl	(%edi,%eax,4),%ecx
191	orl	(%edi,%edx,4),%ebp
192	addb	%cl,%bl
193	movl	(%edi,%ebx,4),%edx
194	movl	%ecx,(%edi,%ebx,4)
195	movl	%edx,(%edi,%eax,4)
196	addl	%ecx,%edx
197	incb	%al
198	andl	$255,%edx
199	rorl	$8,%ebp
200	movl	(%edi,%eax,4),%ecx
201	orl	(%edi,%edx,4),%ebp
202	addb	%cl,%bl
203	movl	(%edi,%ebx,4),%edx
204	movl	%ecx,(%edi,%ebx,4)
205	movl	%edx,(%edi,%eax,4)
206	addl	%ecx,%edx
207	incb	%al
208	andl	$255,%edx
209	rorl	$8,%ebp
210	movl	32(%esp),%ecx
211	orl	(%edi,%edx,4),%ebp
212	rorl	$8,%ebp
213	xorl	(%esi),%ebp
214	cmpl	28(%esp),%esi
215	movl	%ebp,(%ecx,%esi,1)
216	leal	4(%esi),%esi
217	movl	(%edi,%eax,4),%ecx
218	jb	.L007loop4
219	cmpl	24(%esp),%esi
220	je	.L006done
221	movl	32(%esp),%ebp
222.align	16
223.L002loop1:
224	addb	%cl,%bl
225	movl	(%edi,%ebx,4),%edx
226	movl	%ecx,(%edi,%ebx,4)
227	movl	%edx,(%edi,%eax,4)
228	addl	%ecx,%edx
229	incb	%al
230	andl	$255,%edx
231	movl	(%edi,%edx,4),%edx
232	xorb	(%esi),%dl
233	leal	1(%esi),%esi
234	movl	(%edi,%eax,4),%ecx
235	cmpl	24(%esp),%esi
236	movb	%dl,-1(%ebp,%esi,1)
237	jb	.L002loop1
238	jmp	.L006done
239.align	16
240.L001RC4_CHAR:
241	movzbl	(%edi,%eax,1),%ecx
242.L008cloop1:
243	addb	%cl,%bl
244	movzbl	(%edi,%ebx,1),%edx
245	movb	%cl,(%edi,%ebx,1)
246	movb	%dl,(%edi,%eax,1)
247	addb	%cl,%dl
248	movzbl	(%edi,%edx,1),%edx
249	addb	$1,%al
250	xorb	(%esi),%dl
251	leal	1(%esi),%esi
252	movzbl	(%edi,%eax,1),%ecx
253	cmpl	24(%esp),%esi
254	movb	%dl,-1(%ebp,%esi,1)
255	jb	.L008cloop1
256.L006done:
257	decb	%al
258	movl	%ebx,-4(%edi)
259	movb	%al,-8(%edi)
260.L000abort:
261	popl	%edi
262	popl	%esi
263	popl	%ebx
264	popl	%ebp
265	ret
266.size	RC4,.-.L_RC4_begin
267.globl	private_RC4_set_key
268.type	private_RC4_set_key,@function
269.align	16
270private_RC4_set_key:
271.L_private_RC4_set_key_begin:
272	pushl	%ebp
273	pushl	%ebx
274	pushl	%esi
275	pushl	%edi
276	movl	20(%esp),%edi
277	movl	24(%esp),%ebp
278	movl	28(%esp),%esi
279	leal	OPENSSL_ia32cap_P,%edx
280	leal	8(%edi),%edi
281	leal	(%esi,%ebp,1),%esi
282	negl	%ebp
283	xorl	%eax,%eax
284	movl	%ebp,-4(%edi)
285	btl	$20,(%edx)
286	jc	.L009c1stloop
287.align	16
288.L010w1stloop:
289	movl	%eax,(%edi,%eax,4)
290	addb	$1,%al
291	jnc	.L010w1stloop
292	xorl	%ecx,%ecx
293	xorl	%edx,%edx
294.align	16
295.L011w2ndloop:
296	movl	(%edi,%ecx,4),%eax
297	addb	(%esi,%ebp,1),%dl
298	addb	%al,%dl
299	addl	$1,%ebp
300	movl	(%edi,%edx,4),%ebx
301	jnz	.L012wnowrap
302	movl	-4(%edi),%ebp
303.L012wnowrap:
304	movl	%eax,(%edi,%edx,4)
305	movl	%ebx,(%edi,%ecx,4)
306	addb	$1,%cl
307	jnc	.L011w2ndloop
308	jmp	.L013exit
309.align	16
310.L009c1stloop:
311	movb	%al,(%edi,%eax,1)
312	addb	$1,%al
313	jnc	.L009c1stloop
314	xorl	%ecx,%ecx
315	xorl	%edx,%edx
316	xorl	%ebx,%ebx
317.align	16
318.L014c2ndloop:
319	movb	(%edi,%ecx,1),%al
320	addb	(%esi,%ebp,1),%dl
321	addb	%al,%dl
322	addl	$1,%ebp
323	movb	(%edi,%edx,1),%bl
324	jnz	.L015cnowrap
325	movl	-4(%edi),%ebp
326.L015cnowrap:
327	movb	%al,(%edi,%edx,1)
328	movb	%bl,(%edi,%ecx,1)
329	addb	$1,%cl
330	jnc	.L014c2ndloop
331	movl	$-1,256(%edi)
332.L013exit:
333	xorl	%eax,%eax
334	movl	%eax,-8(%edi)
335	movl	%eax,-4(%edi)
336	popl	%edi
337	popl	%esi
338	popl	%ebx
339	popl	%ebp
340	ret
341.size	private_RC4_set_key,.-.L_private_RC4_set_key_begin
342.globl	RC4_options
343.type	RC4_options,@function
344.align	16
345RC4_options:
346.L_RC4_options_begin:
347	call	.L016pic_point
348.L016pic_point:
349	popl	%eax
350	leal	.L017opts-.L016pic_point(%eax),%eax
351	leal	OPENSSL_ia32cap_P,%edx
352	movl	(%edx),%edx
353	btl	$20,%edx
354	jc	.L0181xchar
355	btl	$26,%edx
356	jnc	.L019ret
357	addl	$25,%eax
358	ret
359.L0181xchar:
360	addl	$12,%eax
361.L019ret:
362	ret
363.align	64
364.L017opts:
365.byte	114,99,52,40,52,120,44,105,110,116,41,0
366.byte	114,99,52,40,49,120,44,99,104,97,114,41,0
367.byte	114,99,52,40,56,120,44,109,109,120,41,0
368.byte	82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89
369.byte	80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
370.byte	111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
371.align	64
372.size	RC4_options,.-.L_RC4_options_begin
373.comm	OPENSSL_ia32cap_P,8,4
374