1/*
2 * include/asm-alpha/xor.h
3 *
4 * Optimized RAID-5 checksumming functions for alpha EV5 and EV6
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
9 * any later version.
10 *
11 * You should have received a copy of the GNU General Public License
12 * (for example /usr/src/linux/COPYING); if not, write to the Free
13 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14 */
15
16extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *);
17extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *,
18		        unsigned long *);
19extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *,
20		        unsigned long *, unsigned long *);
21extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *,
22		        unsigned long *, unsigned long *, unsigned long *);
23
24extern void xor_alpha_prefetch_2(unsigned long, unsigned long *,
25				 unsigned long *);
26extern void xor_alpha_prefetch_3(unsigned long, unsigned long *,
27				 unsigned long *, unsigned long *);
28extern void xor_alpha_prefetch_4(unsigned long, unsigned long *,
29				 unsigned long *, unsigned long *,
30				 unsigned long *);
31extern void xor_alpha_prefetch_5(unsigned long, unsigned long *,
32				 unsigned long *, unsigned long *,
33				 unsigned long *, unsigned long *);
34
35asm("
36	.text
37	.align 3
38	.ent xor_alpha_2
39xor_alpha_2:
40	.prologue 0
41	srl $16, 6, $16
42	.align 4
432:
44	ldq $0,0($17)
45	ldq $1,0($18)
46	ldq $2,8($17)
47	ldq $3,8($18)
48
49	ldq $4,16($17)
50	ldq $5,16($18)
51	ldq $6,24($17)
52	ldq $7,24($18)
53
54	ldq $19,32($17)
55	ldq $20,32($18)
56	ldq $21,40($17)
57	ldq $22,40($18)
58
59	ldq $23,48($17)
60	ldq $24,48($18)
61	ldq $25,56($17)
62	xor $0,$1,$0		# 7 cycles from $1 load
63
64	ldq $27,56($18)
65	xor $2,$3,$2
66	stq $0,0($17)
67	xor $4,$5,$4
68
69	stq $2,8($17)
70	xor $6,$7,$6
71	stq $4,16($17)
72	xor $19,$20,$19
73
74	stq $6,24($17)
75	xor $21,$22,$21
76	stq $19,32($17)
77	xor $23,$24,$23
78
79	stq $21,40($17)
80	xor $25,$27,$25
81	stq $23,48($17)
82	subq $16,1,$16
83
84	stq $25,56($17)
85	addq $17,64,$17
86	addq $18,64,$18
87	bgt $16,2b
88
89	ret
90	.end xor_alpha_2
91
92	.align 3
93	.ent xor_alpha_3
94xor_alpha_3:
95	.prologue 0
96	srl $16, 6, $16
97	.align 4
983:
99	ldq $0,0($17)
100	ldq $1,0($18)
101	ldq $2,0($19)
102	ldq $3,8($17)
103
104	ldq $4,8($18)
105	ldq $6,16($17)
106	ldq $7,16($18)
107	ldq $21,24($17)
108
109	ldq $22,24($18)
110	ldq $24,32($17)
111	ldq $25,32($18)
112	ldq $5,8($19)
113
114	ldq $20,16($19)
115	ldq $23,24($19)
116	ldq $27,32($19)
117	nop
118
119	xor $0,$1,$1		# 8 cycles from $0 load
120	xor $3,$4,$4		# 6 cycles from $4 load
121	xor $6,$7,$7		# 6 cycles from $7 load
122	xor $21,$22,$22		# 5 cycles from $22 load
123
124	xor $1,$2,$2		# 9 cycles from $2 load
125	xor $24,$25,$25		# 5 cycles from $25 load
126	stq $2,0($17)
127	xor $4,$5,$5		# 6 cycles from $5 load
128
129	stq $5,8($17)
130	xor $7,$20,$20		# 7 cycles from $20 load
131	stq $20,16($17)
132	xor $22,$23,$23		# 7 cycles from $23 load
133
134	stq $23,24($17)
135	xor $25,$27,$27		# 7 cycles from $27 load
136	stq $27,32($17)
137	nop
138
139	ldq $0,40($17)
140	ldq $1,40($18)
141	ldq $3,48($17)
142	ldq $4,48($18)
143
144	ldq $6,56($17)
145	ldq $7,56($18)
146	ldq $2,40($19)
147	ldq $5,48($19)
148
149	ldq $20,56($19)
150	xor $0,$1,$1		# 4 cycles from $1 load
151	xor $3,$4,$4		# 5 cycles from $4 load
152	xor $6,$7,$7		# 5 cycles from $7 load
153
154	xor $1,$2,$2		# 4 cycles from $2 load
155	xor $4,$5,$5		# 5 cycles from $5 load
156	stq $2,40($17)
157	xor $7,$20,$20		# 4 cycles from $20 load
158
159	stq $5,48($17)
160	subq $16,1,$16
161	stq $20,56($17)
162	addq $19,64,$19
163
164	addq $18,64,$18
165	addq $17,64,$17
166	bgt $16,3b
167	ret
168	.end xor_alpha_3
169
170	.align 3
171	.ent xor_alpha_4
172xor_alpha_4:
173	.prologue 0
174	srl $16, 6, $16
175	.align 4
1764:
177	ldq $0,0($17)
178	ldq $1,0($18)
179	ldq $2,0($19)
180	ldq $3,0($20)
181
182	ldq $4,8($17)
183	ldq $5,8($18)
184	ldq $6,8($19)
185	ldq $7,8($20)
186
187	ldq $21,16($17)
188	ldq $22,16($18)
189	ldq $23,16($19)
190	ldq $24,16($20)
191
192	ldq $25,24($17)
193	xor $0,$1,$1		# 6 cycles from $1 load
194	ldq $27,24($18)
195	xor $2,$3,$3		# 6 cycles from $3 load
196
197	ldq $0,24($19)
198	xor $1,$3,$3
199	ldq $1,24($20)
200	xor $4,$5,$5		# 7 cycles from $5 load
201
202	stq $3,0($17)
203	xor $6,$7,$7
204	xor $21,$22,$22		# 7 cycles from $22 load
205	xor $5,$7,$7
206
207	stq $7,8($17)
208	xor $23,$24,$24		# 7 cycles from $24 load
209	ldq $2,32($17)
210	xor $22,$24,$24
211
212	ldq $3,32($18)
213	ldq $4,32($19)
214	ldq $5,32($20)
215	xor $25,$27,$27		# 8 cycles from $27 load
216
217	ldq $6,40($17)
218	ldq $7,40($18)
219	ldq $21,40($19)
220	ldq $22,40($20)
221
222	stq $24,16($17)
223	xor $0,$1,$1		# 9 cycles from $1 load
224	xor $2,$3,$3		# 5 cycles from $3 load
225	xor $27,$1,$1
226
227	stq $1,24($17)
228	xor $4,$5,$5		# 5 cycles from $5 load
229	ldq $23,48($17)
230	ldq $24,48($18)
231
232	ldq $25,48($19)
233	xor $3,$5,$5
234	ldq $27,48($20)
235	ldq $0,56($17)
236
237	ldq $1,56($18)
238	ldq $2,56($19)
239	xor $6,$7,$7		# 8 cycles from $6 load
240	ldq $3,56($20)
241
242	stq $5,32($17)
243	xor $21,$22,$22		# 8 cycles from $22 load
244	xor $7,$22,$22
245	xor $23,$24,$24		# 5 cycles from $24 load
246
247	stq $22,40($17)
248	xor $25,$27,$27		# 5 cycles from $27 load
249	xor $24,$27,$27
250	xor $0,$1,$1		# 5 cycles from $1 load
251
252	stq $27,48($17)
253	xor $2,$3,$3		# 4 cycles from $3 load
254	xor $1,$3,$3
255	subq $16,1,$16
256
257	stq $3,56($17)
258	addq $20,64,$20
259	addq $19,64,$19
260	addq $18,64,$18
261
262	addq $17,64,$17
263	bgt $16,4b
264	ret
265	.end xor_alpha_4
266
267	.align 3
268	.ent xor_alpha_5
269xor_alpha_5:
270	.prologue 0
271	srl $16, 6, $16
272	.align 4
2735:
274	ldq $0,0($17)
275	ldq $1,0($18)
276	ldq $2,0($19)
277	ldq $3,0($20)
278
279	ldq $4,0($21)
280	ldq $5,8($17)
281	ldq $6,8($18)
282	ldq $7,8($19)
283
284	ldq $22,8($20)
285	ldq $23,8($21)
286	ldq $24,16($17)
287	ldq $25,16($18)
288
289	ldq $27,16($19)
290	xor $0,$1,$1		# 6 cycles from $1 load
291	ldq $28,16($20)
292	xor $2,$3,$3		# 6 cycles from $3 load
293
294	ldq $0,16($21)
295	xor $1,$3,$3
296	ldq $1,24($17)
297	xor $3,$4,$4		# 7 cycles from $4 load
298
299	stq $4,0($17)
300	xor $5,$6,$6		# 7 cycles from $6 load
301	xor $7,$22,$22		# 7 cycles from $22 load
302	xor $6,$23,$23		# 7 cycles from $23 load
303
304	ldq $2,24($18)
305	xor $22,$23,$23
306	ldq $3,24($19)
307	xor $24,$25,$25		# 8 cycles from $25 load
308
309	stq $23,8($17)
310	xor $25,$27,$27		# 8 cycles from $27 load
311	ldq $4,24($20)
312	xor $28,$0,$0		# 7 cycles from $0 load
313
314	ldq $5,24($21)
315	xor $27,$0,$0
316	ldq $6,32($17)
317	ldq $7,32($18)
318
319	stq $0,16($17)
320	xor $1,$2,$2		# 6 cycles from $2 load
321	ldq $22,32($19)
322	xor $3,$4,$4		# 4 cycles from $4 load
323
324	ldq $23,32($20)
325	xor $2,$4,$4
326	ldq $24,32($21)
327	ldq $25,40($17)
328
329	ldq $27,40($18)
330	ldq $28,40($19)
331	ldq $0,40($20)
332	xor $4,$5,$5		# 7 cycles from $5 load
333
334	stq $5,24($17)
335	xor $6,$7,$7		# 7 cycles from $7 load
336	ldq $1,40($21)
337	ldq $2,48($17)
338
339	ldq $3,48($18)
340	xor $7,$22,$22		# 7 cycles from $22 load
341	ldq $4,48($19)
342	xor $23,$24,$24		# 6 cycles from $24 load
343
344	ldq $5,48($20)
345	xor $22,$24,$24
346	ldq $6,48($21)
347	xor $25,$27,$27		# 7 cycles from $27 load
348
349	stq $24,32($17)
350	xor $27,$28,$28		# 8 cycles from $28 load
351	ldq $7,56($17)
352	xor $0,$1,$1		# 6 cycles from $1 load
353
354	ldq $22,56($18)
355	ldq $23,56($19)
356	ldq $24,56($20)
357	ldq $25,56($21)
358
359	xor $28,$1,$1
360	xor $2,$3,$3		# 9 cycles from $3 load
361	xor $3,$4,$4		# 9 cycles from $4 load
362	xor $5,$6,$6		# 8 cycles from $6 load
363
364	stq $1,40($17)
365	xor $4,$6,$6
366	xor $7,$22,$22		# 7 cycles from $22 load
367	xor $23,$24,$24		# 6 cycles from $24 load
368
369	stq $6,48($17)
370	xor $22,$24,$24
371	subq $16,1,$16
372	xor $24,$25,$25		# 8 cycles from $25 load
373
374	stq $25,56($17)
375	addq $21,64,$21
376	addq $20,64,$20
377	addq $19,64,$19
378
379	addq $18,64,$18
380	addq $17,64,$17
381	bgt $16,5b
382	ret
383	.end xor_alpha_5
384
385	.align 3
386	.ent xor_alpha_prefetch_2
387xor_alpha_prefetch_2:
388	.prologue 0
389	srl $16, 6, $16
390
391	ldq $31, 0($17)
392	ldq $31, 0($18)
393
394	ldq $31, 64($17)
395	ldq $31, 64($18)
396
397	ldq $31, 128($17)
398	ldq $31, 128($18)
399
400	ldq $31, 192($17)
401	ldq $31, 192($18)
402	.align 4
4032:
404	ldq $0,0($17)
405	ldq $1,0($18)
406	ldq $2,8($17)
407	ldq $3,8($18)
408
409	ldq $4,16($17)
410	ldq $5,16($18)
411	ldq $6,24($17)
412	ldq $7,24($18)
413
414	ldq $19,32($17)
415	ldq $20,32($18)
416	ldq $21,40($17)
417	ldq $22,40($18)
418
419	ldq $23,48($17)
420	ldq $24,48($18)
421	ldq $25,56($17)
422	ldq $27,56($18)
423
424	ldq $31,256($17)
425	xor $0,$1,$0		# 8 cycles from $1 load
426	ldq $31,256($18)
427	xor $2,$3,$2
428
429	stq $0,0($17)
430	xor $4,$5,$4
431	stq $2,8($17)
432	xor $6,$7,$6
433
434	stq $4,16($17)
435	xor $19,$20,$19
436	stq $6,24($17)
437	xor $21,$22,$21
438
439	stq $19,32($17)
440	xor $23,$24,$23
441	stq $21,40($17)
442	xor $25,$27,$25
443
444	stq $23,48($17)
445	subq $16,1,$16
446	stq $25,56($17)
447	addq $17,64,$17
448
449	addq $18,64,$18
450	bgt $16,2b
451	ret
452	.end xor_alpha_prefetch_2
453
454	.align 3
455	.ent xor_alpha_prefetch_3
456xor_alpha_prefetch_3:
457	.prologue 0
458	srl $16, 6, $16
459
460	ldq $31, 0($17)
461	ldq $31, 0($18)
462	ldq $31, 0($19)
463
464	ldq $31, 64($17)
465	ldq $31, 64($18)
466	ldq $31, 64($19)
467
468	ldq $31, 128($17)
469	ldq $31, 128($18)
470	ldq $31, 128($19)
471
472	ldq $31, 192($17)
473	ldq $31, 192($18)
474	ldq $31, 192($19)
475	.align 4
4763:
477	ldq $0,0($17)
478	ldq $1,0($18)
479	ldq $2,0($19)
480	ldq $3,8($17)
481
482	ldq $4,8($18)
483	ldq $6,16($17)
484	ldq $7,16($18)
485	ldq $21,24($17)
486
487	ldq $22,24($18)
488	ldq $24,32($17)
489	ldq $25,32($18)
490	ldq $5,8($19)
491
492	ldq $20,16($19)
493	ldq $23,24($19)
494	ldq $27,32($19)
495	nop
496
497	xor $0,$1,$1		# 8 cycles from $0 load
498	xor $3,$4,$4		# 7 cycles from $4 load
499	xor $6,$7,$7		# 6 cycles from $7 load
500	xor $21,$22,$22		# 5 cycles from $22 load
501
502	xor $1,$2,$2		# 9 cycles from $2 load
503	xor $24,$25,$25		# 5 cycles from $25 load
504	stq $2,0($17)
505	xor $4,$5,$5		# 6 cycles from $5 load
506
507	stq $5,8($17)
508	xor $7,$20,$20		# 7 cycles from $20 load
509	stq $20,16($17)
510	xor $22,$23,$23		# 7 cycles from $23 load
511
512	stq $23,24($17)
513	xor $25,$27,$27		# 7 cycles from $27 load
514	stq $27,32($17)
515	nop
516
517	ldq $0,40($17)
518	ldq $1,40($18)
519	ldq $3,48($17)
520	ldq $4,48($18)
521
522	ldq $6,56($17)
523	ldq $7,56($18)
524	ldq $2,40($19)
525	ldq $5,48($19)
526
527	ldq $20,56($19)
528	ldq $31,256($17)
529	ldq $31,256($18)
530	ldq $31,256($19)
531
532	xor $0,$1,$1		# 6 cycles from $1 load
533	xor $3,$4,$4		# 5 cycles from $4 load
534	xor $6,$7,$7		# 5 cycles from $7 load
535	xor $1,$2,$2		# 4 cycles from $2 load
536
537	xor $4,$5,$5		# 5 cycles from $5 load
538	xor $7,$20,$20		# 4 cycles from $20 load
539	stq $2,40($17)
540	subq $16,1,$16
541
542	stq $5,48($17)
543	addq $19,64,$19
544	stq $20,56($17)
545	addq $18,64,$18
546
547	addq $17,64,$17
548	bgt $16,3b
549	ret
550	.end xor_alpha_prefetch_3
551
552	.align 3
553	.ent xor_alpha_prefetch_4
554xor_alpha_prefetch_4:
555	.prologue 0
556	srl $16, 6, $16
557
558	ldq $31, 0($17)
559	ldq $31, 0($18)
560	ldq $31, 0($19)
561	ldq $31, 0($20)
562
563	ldq $31, 64($17)
564	ldq $31, 64($18)
565	ldq $31, 64($19)
566	ldq $31, 64($20)
567
568	ldq $31, 128($17)
569	ldq $31, 128($18)
570	ldq $31, 128($19)
571	ldq $31, 128($20)
572
573	ldq $31, 192($17)
574	ldq $31, 192($18)
575	ldq $31, 192($19)
576	ldq $31, 192($20)
577	.align 4
5784:
579	ldq $0,0($17)
580	ldq $1,0($18)
581	ldq $2,0($19)
582	ldq $3,0($20)
583
584	ldq $4,8($17)
585	ldq $5,8($18)
586	ldq $6,8($19)
587	ldq $7,8($20)
588
589	ldq $21,16($17)
590	ldq $22,16($18)
591	ldq $23,16($19)
592	ldq $24,16($20)
593
594	ldq $25,24($17)
595	xor $0,$1,$1		# 6 cycles from $1 load
596	ldq $27,24($18)
597	xor $2,$3,$3		# 6 cycles from $3 load
598
599	ldq $0,24($19)
600	xor $1,$3,$3
601	ldq $1,24($20)
602	xor $4,$5,$5		# 7 cycles from $5 load
603
604	stq $3,0($17)
605	xor $6,$7,$7
606	xor $21,$22,$22		# 7 cycles from $22 load
607	xor $5,$7,$7
608
609	stq $7,8($17)
610	xor $23,$24,$24		# 7 cycles from $24 load
611	ldq $2,32($17)
612	xor $22,$24,$24
613
614	ldq $3,32($18)
615	ldq $4,32($19)
616	ldq $5,32($20)
617	xor $25,$27,$27		# 8 cycles from $27 load
618
619	ldq $6,40($17)
620	ldq $7,40($18)
621	ldq $21,40($19)
622	ldq $22,40($20)
623
624	stq $24,16($17)
625	xor $0,$1,$1		# 9 cycles from $1 load
626	xor $2,$3,$3		# 5 cycles from $3 load
627	xor $27,$1,$1
628
629	stq $1,24($17)
630	xor $4,$5,$5		# 5 cycles from $5 load
631	ldq $23,48($17)
632	xor $3,$5,$5
633
634	ldq $24,48($18)
635	ldq $25,48($19)
636	ldq $27,48($20)
637	ldq $0,56($17)
638
639	ldq $1,56($18)
640	ldq $2,56($19)
641	ldq $3,56($20)
642	xor $6,$7,$7		# 8 cycles from $6 load
643
644	ldq $31,256($17)
645	xor $21,$22,$22		# 8 cycles from $22 load
646	ldq $31,256($18)
647	xor $7,$22,$22
648
649	ldq $31,256($19)
650	xor $23,$24,$24		# 6 cycles from $24 load
651	ldq $31,256($20)
652	xor $25,$27,$27		# 6 cycles from $27 load
653
654	stq $5,32($17)
655	xor $24,$27,$27
656	xor $0,$1,$1		# 7 cycles from $1 load
657	xor $2,$3,$3		# 6 cycles from $3 load
658
659	stq $22,40($17)
660	xor $1,$3,$3
661	stq $27,48($17)
662	subq $16,1,$16
663
664	stq $3,56($17)
665	addq $20,64,$20
666	addq $19,64,$19
667	addq $18,64,$18
668
669	addq $17,64,$17
670	bgt $16,4b
671	ret
672	.end xor_alpha_prefetch_4
673
674	.align 3
675	.ent xor_alpha_prefetch_5
676xor_alpha_prefetch_5:
677	.prologue 0
678	srl $16, 6, $16
679
680	ldq $31, 0($17)
681	ldq $31, 0($18)
682	ldq $31, 0($19)
683	ldq $31, 0($20)
684	ldq $31, 0($21)
685
686	ldq $31, 64($17)
687	ldq $31, 64($18)
688	ldq $31, 64($19)
689	ldq $31, 64($20)
690	ldq $31, 64($21)
691
692	ldq $31, 128($17)
693	ldq $31, 128($18)
694	ldq $31, 128($19)
695	ldq $31, 128($20)
696	ldq $31, 128($21)
697
698	ldq $31, 192($17)
699	ldq $31, 192($18)
700	ldq $31, 192($19)
701	ldq $31, 192($20)
702	ldq $31, 192($21)
703	.align 4
7045:
705	ldq $0,0($17)
706	ldq $1,0($18)
707	ldq $2,0($19)
708	ldq $3,0($20)
709
710	ldq $4,0($21)
711	ldq $5,8($17)
712	ldq $6,8($18)
713	ldq $7,8($19)
714
715	ldq $22,8($20)
716	ldq $23,8($21)
717	ldq $24,16($17)
718	ldq $25,16($18)
719
720	ldq $27,16($19)
721	xor $0,$1,$1		# 6 cycles from $1 load
722	ldq $28,16($20)
723	xor $2,$3,$3		# 6 cycles from $3 load
724
725	ldq $0,16($21)
726	xor $1,$3,$3
727	ldq $1,24($17)
728	xor $3,$4,$4		# 7 cycles from $4 load
729
730	stq $4,0($17)
731	xor $5,$6,$6		# 7 cycles from $6 load
732	xor $7,$22,$22		# 7 cycles from $22 load
733	xor $6,$23,$23		# 7 cycles from $23 load
734
735	ldq $2,24($18)
736	xor $22,$23,$23
737	ldq $3,24($19)
738	xor $24,$25,$25		# 8 cycles from $25 load
739
740	stq $23,8($17)
741	xor $25,$27,$27		# 8 cycles from $27 load
742	ldq $4,24($20)
743	xor $28,$0,$0		# 7 cycles from $0 load
744
745	ldq $5,24($21)
746	xor $27,$0,$0
747	ldq $6,32($17)
748	ldq $7,32($18)
749
750	stq $0,16($17)
751	xor $1,$2,$2		# 6 cycles from $2 load
752	ldq $22,32($19)
753	xor $3,$4,$4		# 4 cycles from $4 load
754
755	ldq $23,32($20)
756	xor $2,$4,$4
757	ldq $24,32($21)
758	ldq $25,40($17)
759
760	ldq $27,40($18)
761	ldq $28,40($19)
762	ldq $0,40($20)
763	xor $4,$5,$5		# 7 cycles from $5 load
764
765	stq $5,24($17)
766	xor $6,$7,$7		# 7 cycles from $7 load
767	ldq $1,40($21)
768	ldq $2,48($17)
769
770	ldq $3,48($18)
771	xor $7,$22,$22		# 7 cycles from $22 load
772	ldq $4,48($19)
773	xor $23,$24,$24		# 6 cycles from $24 load
774
775	ldq $5,48($20)
776	xor $22,$24,$24
777	ldq $6,48($21)
778	xor $25,$27,$27		# 7 cycles from $27 load
779
780	stq $24,32($17)
781	xor $27,$28,$28		# 8 cycles from $28 load
782	ldq $7,56($17)
783	xor $0,$1,$1		# 6 cycles from $1 load
784
785	ldq $22,56($18)
786	ldq $23,56($19)
787	ldq $24,56($20)
788	ldq $25,56($21)
789
790	ldq $31,256($17)
791	xor $28,$1,$1
792	ldq $31,256($18)
793	xor $2,$3,$3		# 9 cycles from $3 load
794
795	ldq $31,256($19)
796	xor $3,$4,$4		# 9 cycles from $4 load
797	ldq $31,256($20)
798	xor $5,$6,$6		# 8 cycles from $6 load
799
800	stq $1,40($17)
801	xor $4,$6,$6
802	xor $7,$22,$22		# 7 cycles from $22 load
803	xor $23,$24,$24		# 6 cycles from $24 load
804
805	stq $6,48($17)
806	xor $22,$24,$24
807	ldq $31,256($21)
808	xor $24,$25,$25		# 8 cycles from $25 load
809
810	stq $25,56($17)
811	subq $16,1,$16
812	addq $21,64,$21
813	addq $20,64,$20
814
815	addq $19,64,$19
816	addq $18,64,$18
817	addq $17,64,$17
818	bgt $16,5b
819
820	ret
821	.end xor_alpha_prefetch_5
822");
823
824static struct xor_block_template xor_block_alpha = {
825	name: "alpha",
826	do_2: xor_alpha_2,
827	do_3: xor_alpha_3,
828	do_4: xor_alpha_4,
829	do_5: xor_alpha_5,
830};
831
832static struct xor_block_template xor_block_alpha_prefetch = {
833	name: "alpha prefetch",
834	do_2: xor_alpha_prefetch_2,
835	do_3: xor_alpha_prefetch_3,
836	do_4: xor_alpha_prefetch_4,
837	do_5: xor_alpha_prefetch_5,
838};
839
840/* For grins, also test the generic routines.  */
841#include <asm-generic/xor.h>
842
843#undef XOR_TRY_TEMPLATES
844#define XOR_TRY_TEMPLATES				\
845	do {						\
846		xor_speed(&xor_block_8regs);		\
847		xor_speed(&xor_block_32regs);		\
848		xor_speed(&xor_block_alpha);		\
849		xor_speed(&xor_block_alpha_prefetch);	\
850	} while (0)
851
852/* Force the use of alpha_prefetch if EV6, as it is significantly
853   faster in the cold cache case.  */
854#define XOR_SELECT_TEMPLATE(FASTEST) \
855	(implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST)
856