1/*
2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <asm/processor.h>
10#include <asm/ppc_asm.h>
11
12	.align	7
13_GLOBAL(__copy_tofrom_user)
14	/* first check for a whole page copy on a page boundary */
15	cmpldi	cr1,r5,16
16	cmpdi	cr6,r5,4096
17	or	r0,r3,r4
18	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
19	andi.	r0,r0,4095
20	std	r3,-24(r1)
21	crand	cr0*4+2,cr0*4+2,cr6*4+2
22	std	r4,-16(r1)
23	std	r5,-8(r1)
24	dcbt	0,r4
25	beq	.Lcopy_page_4K
26	andi.	r6,r6,7
27	PPC_MTOCRF	0x01,r5
28	blt	cr1,.Lshort_copy
29	bne	.Ldst_unaligned
30.Ldst_aligned:
31	andi.	r0,r4,7
32	addi	r3,r3,-16
33	bne	.Lsrc_unaligned
34	srdi	r7,r5,4
3520:	ld	r9,0(r4)
36	addi	r4,r4,-8
37	mtctr	r7
38	andi.	r5,r5,7
39	bf	cr7*4+0,22f
40	addi	r3,r3,8
41	addi	r4,r4,8
42	mr	r8,r9
43	blt	cr1,72f
4421:	ld	r9,8(r4)
4570:	std	r8,8(r3)
4622:	ldu	r8,16(r4)
4771:	stdu	r9,16(r3)
48	bdnz	21b
4972:	std	r8,8(r3)
50	beq+	3f
51	addi	r3,r3,16
5223:	ld	r9,8(r4)
53.Ldo_tail:
54	bf	cr7*4+1,1f
55	rotldi	r9,r9,32
5673:	stw	r9,0(r3)
57	addi	r3,r3,4
581:	bf	cr7*4+2,2f
59	rotldi	r9,r9,16
6074:	sth	r9,0(r3)
61	addi	r3,r3,2
622:	bf	cr7*4+3,3f
63	rotldi	r9,r9,8
6475:	stb	r9,0(r3)
653:	li	r3,0
66	blr
67
68.Lsrc_unaligned:
69	srdi	r6,r5,3
70	addi	r5,r5,-16
71	subf	r4,r0,r4
72	srdi	r7,r5,4
73	sldi	r10,r0,3
74	cmpldi	cr6,r6,3
75	andi.	r5,r5,7
76	mtctr	r7
77	subfic	r11,r10,64
78	add	r5,r5,r0
79	bt	cr7*4+0,28f
80
8124:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
8225:	ld	r0,8(r4)
83	sld	r6,r9,r10
8426:	ldu	r9,16(r4)
85	srd	r7,r0,r11
86	sld	r8,r0,r10
87	or	r7,r7,r6
88	blt	cr6,79f
8927:	ld	r0,8(r4)
90	b	2f
91
9228:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
9329:	ldu	r9,8(r4)
94	sld	r8,r0,r10
95	addi	r3,r3,-8
96	blt	cr6,5f
9730:	ld	r0,8(r4)
98	srd	r12,r9,r11
99	sld	r6,r9,r10
10031:	ldu	r9,16(r4)
101	or	r12,r8,r12
102	srd	r7,r0,r11
103	sld	r8,r0,r10
104	addi	r3,r3,16
105	beq	cr6,78f
106
1071:	or	r7,r7,r6
10832:	ld	r0,8(r4)
10976:	std	r12,8(r3)
1102:	srd	r12,r9,r11
111	sld	r6,r9,r10
11233:	ldu	r9,16(r4)
113	or	r12,r8,r12
11477:	stdu	r7,16(r3)
115	srd	r7,r0,r11
116	sld	r8,r0,r10
117	bdnz	1b
118
11978:	std	r12,8(r3)
120	or	r7,r7,r6
12179:	std	r7,16(r3)
1225:	srd	r12,r9,r11
123	or	r12,r8,r12
12480:	std	r12,24(r3)
125	bne	6f
126	li	r3,0
127	blr
1286:	cmpwi	cr1,r5,8
129	addi	r3,r3,32
130	sld	r9,r9,r10
131	ble	cr1,.Ldo_tail
13234:	ld	r0,8(r4)
133	srd	r7,r0,r11
134	or	r9,r7,r9
135	b	.Ldo_tail
136
137.Ldst_unaligned:
138	PPC_MTOCRF	0x01,r6		/* put #bytes to 8B bdry into cr7 */
139	subf	r5,r6,r5
140	li	r7,0
141	cmpldi	r1,r5,16
142	bf	cr7*4+3,1f
14335:	lbz	r0,0(r4)
14481:	stb	r0,0(r3)
145	addi	r7,r7,1
1461:	bf	cr7*4+2,2f
14736:	lhzx	r0,r7,r4
14882:	sthx	r0,r7,r3
149	addi	r7,r7,2
1502:	bf	cr7*4+1,3f
15137:	lwzx	r0,r7,r4
15283:	stwx	r0,r7,r3
1533:	PPC_MTOCRF	0x01,r5
154	add	r4,r6,r4
155	add	r3,r6,r3
156	b	.Ldst_aligned
157
158.Lshort_copy:
159	bf	cr7*4+0,1f
16038:	lwz	r0,0(r4)
16139:	lwz	r9,4(r4)
162	addi	r4,r4,8
16384:	stw	r0,0(r3)
16485:	stw	r9,4(r3)
165	addi	r3,r3,8
1661:	bf	cr7*4+1,2f
16740:	lwz	r0,0(r4)
168	addi	r4,r4,4
16986:	stw	r0,0(r3)
170	addi	r3,r3,4
1712:	bf	cr7*4+2,3f
17241:	lhz	r0,0(r4)
173	addi	r4,r4,2
17487:	sth	r0,0(r3)
175	addi	r3,r3,2
1763:	bf	cr7*4+3,4f
17742:	lbz	r0,0(r4)
17888:	stb	r0,0(r3)
1794:	li	r3,0
180	blr
181
182/*
183 * exception handlers follow
184 * we have to return the number of bytes not copied
185 * for an exception on a load, we set the rest of the destination to 0
186 */
187
188136:
189137:
190	add	r3,r3,r7
191	b	1f
192130:
193131:
194	addi	r3,r3,8
195120:
196122:
197124:
198125:
199126:
200127:
201128:
202129:
203133:
204	addi	r3,r3,8
205121:
206132:
207	addi	r3,r3,8
208123:
209134:
210135:
211138:
212139:
213140:
214141:
215142:
216
217/*
218 * here we have had a fault on a load and r3 points to the first
219 * unmodified byte of the destination
220 */
2211:	ld	r6,-24(r1)
222	ld	r4,-16(r1)
223	ld	r5,-8(r1)
224	subf	r6,r6,r3
225	add	r4,r4,r6
226	subf	r5,r6,r5	/* #bytes left to go */
227
228/*
229 * first see if we can copy any more bytes before hitting another exception
230 */
231	mtctr	r5
23243:	lbz	r0,0(r4)
233	addi	r4,r4,1
23489:	stb	r0,0(r3)
235	addi	r3,r3,1
236	bdnz	43b
237	li	r3,0		/* huh? all copied successfully this time? */
238	blr
239
240/*
241 * here we have trapped again, need to clear ctr bytes starting at r3
242 */
243143:	mfctr	r5
244	li	r0,0
245	mr	r4,r3
246	mr	r3,r5		/* return the number of bytes not copied */
2471:	andi.	r9,r4,7
248	beq	3f
24990:	stb	r0,0(r4)
250	addic.	r5,r5,-1
251	addi	r4,r4,1
252	bne	1b
253	blr
2543:	cmpldi	cr1,r5,8
255	srdi	r9,r5,3
256	andi.	r5,r5,7
257	blt	cr1,93f
258	mtctr	r9
25991:	std	r0,0(r4)
260	addi	r4,r4,8
261	bdnz	91b
26293:	beqlr
263	mtctr	r5
26492:	stb	r0,0(r4)
265	addi	r4,r4,1
266	bdnz	92b
267	blr
268
269/*
270 * exception handlers for stores: we just need to work
271 * out how many bytes weren't copied
272 */
273182:
274183:
275	add	r3,r3,r7
276	b	1f
277180:
278	addi	r3,r3,8
279171:
280177:
281	addi	r3,r3,8
282170:
283172:
284176:
285178:
286	addi	r3,r3,4
287185:
288	addi	r3,r3,4
289173:
290174:
291175:
292179:
293181:
294184:
295186:
296187:
297188:
298189:
2991:
300	ld	r6,-24(r1)
301	ld	r5,-8(r1)
302	add	r6,r6,r5
303	subf	r3,r3,r6	/* #bytes not copied */
304190:
305191:
306192:
307	blr			/* #bytes not copied in r3 */
308
309	.section __ex_table,"a"
310	.align	3
311	.llong	20b,120b
312	.llong	21b,121b
313	.llong	70b,170b
314	.llong	22b,122b
315	.llong	71b,171b
316	.llong	72b,172b
317	.llong	23b,123b
318	.llong	73b,173b
319	.llong	74b,174b
320	.llong	75b,175b
321	.llong	24b,124b
322	.llong	25b,125b
323	.llong	26b,126b
324	.llong	27b,127b
325	.llong	28b,128b
326	.llong	29b,129b
327	.llong	30b,130b
328	.llong	31b,131b
329	.llong	32b,132b
330	.llong	76b,176b
331	.llong	33b,133b
332	.llong	77b,177b
333	.llong	78b,178b
334	.llong	79b,179b
335	.llong	80b,180b
336	.llong	34b,134b
337	.llong	35b,135b
338	.llong	81b,181b
339	.llong	36b,136b
340	.llong	82b,182b
341	.llong	37b,137b
342	.llong	83b,183b
343	.llong	38b,138b
344	.llong	39b,139b
345	.llong	84b,184b
346	.llong	85b,185b
347	.llong	40b,140b
348	.llong	86b,186b
349	.llong	41b,141b
350	.llong	87b,187b
351	.llong	42b,142b
352	.llong	88b,188b
353	.llong	43b,143b
354	.llong	89b,189b
355	.llong	90b,190b
356	.llong	91b,191b
357	.llong	92b,192b
358
359	.text
360
361/*
362 * Routine to copy a whole page of data, optimized for POWER4.
363 * On POWER4 it is more than 50% faster than the simple loop
364 * above (following the .Ldst_aligned label) but it runs slightly
365 * slower on POWER3.
366 */
367.Lcopy_page_4K:
368	std	r31,-32(1)
369	std	r30,-40(1)
370	std	r29,-48(1)
371	std	r28,-56(1)
372	std	r27,-64(1)
373	std	r26,-72(1)
374	std	r25,-80(1)
375	std	r24,-88(1)
376	std	r23,-96(1)
377	std	r22,-104(1)
378	std	r21,-112(1)
379	std	r20,-120(1)
380	li	r5,4096/32 - 1
381	addi	r3,r3,-8
382	li	r0,5
3830:	addi	r5,r5,-24
384	mtctr	r0
38520:	ld	r22,640(4)
38621:	ld	r21,512(4)
38722:	ld	r20,384(4)
38823:	ld	r11,256(4)
38924:	ld	r9,128(4)
39025:	ld	r7,0(4)
39126:	ld	r25,648(4)
39227:	ld	r24,520(4)
39328:	ld	r23,392(4)
39429:	ld	r10,264(4)
39530:	ld	r8,136(4)
39631:	ldu	r6,8(4)
397	cmpwi	r5,24
3981:
39932:	std	r22,648(3)
40033:	std	r21,520(3)
40134:	std	r20,392(3)
40235:	std	r11,264(3)
40336:	std	r9,136(3)
40437:	std	r7,8(3)
40538:	ld	r28,648(4)
40639:	ld	r27,520(4)
40740:	ld	r26,392(4)
40841:	ld	r31,264(4)
40942:	ld	r30,136(4)
41043:	ld	r29,8(4)
41144:	std	r25,656(3)
41245:	std	r24,528(3)
41346:	std	r23,400(3)
41447:	std	r10,272(3)
41548:	std	r8,144(3)
41649:	std	r6,16(3)
41750:	ld	r22,656(4)
41851:	ld	r21,528(4)
41952:	ld	r20,400(4)
42053:	ld	r11,272(4)
42154:	ld	r9,144(4)
42255:	ld	r7,16(4)
42356:	std	r28,664(3)
42457:	std	r27,536(3)
42558:	std	r26,408(3)
42659:	std	r31,280(3)
42760:	std	r30,152(3)
42861:	stdu	r29,24(3)
42962:	ld	r25,664(4)
43063:	ld	r24,536(4)
43164:	ld	r23,408(4)
43265:	ld	r10,280(4)
43366:	ld	r8,152(4)
43467:	ldu	r6,24(4)
435	bdnz	1b
43668:	std	r22,648(3)
43769:	std	r21,520(3)
43870:	std	r20,392(3)
43971:	std	r11,264(3)
44072:	std	r9,136(3)
44173:	std	r7,8(3)
44274:	addi	r4,r4,640
44375:	addi	r3,r3,648
444	bge	0b
445	mtctr	r5
44676:	ld	r7,0(4)
44777:	ld	r8,8(4)
44878:	ldu	r9,16(4)
4493:
45079:	ld	r10,8(4)
45180:	std	r7,8(3)
45281:	ld	r7,16(4)
45382:	std	r8,16(3)
45483:	ld	r8,24(4)
45584:	std	r9,24(3)
45685:	ldu	r9,32(4)
45786:	stdu	r10,32(3)
458	bdnz	3b
4594:
46087:	ld	r10,8(4)
46188:	std	r7,8(3)
46289:	std	r8,16(3)
46390:	std	r9,24(3)
46491:	std	r10,32(3)
4659:	ld	r20,-120(1)
466	ld	r21,-112(1)
467	ld	r22,-104(1)
468	ld	r23,-96(1)
469	ld	r24,-88(1)
470	ld	r25,-80(1)
471	ld	r26,-72(1)
472	ld	r27,-64(1)
473	ld	r28,-56(1)
474	ld	r29,-48(1)
475	ld	r30,-40(1)
476	ld	r31,-32(1)
477	li	r3,0
478	blr
479
480/*
481 * on an exception, reset to the beginning and jump back into the
482 * standard __copy_tofrom_user
483 */
484100:	ld	r20,-120(1)
485	ld	r21,-112(1)
486	ld	r22,-104(1)
487	ld	r23,-96(1)
488	ld	r24,-88(1)
489	ld	r25,-80(1)
490	ld	r26,-72(1)
491	ld	r27,-64(1)
492	ld	r28,-56(1)
493	ld	r29,-48(1)
494	ld	r30,-40(1)
495	ld	r31,-32(1)
496	ld	r3,-24(r1)
497	ld	r4,-16(r1)
498	li	r5,4096
499	b	.Ldst_aligned
500
501	.section __ex_table,"a"
502	.align	3
503	.llong	20b,100b
504	.llong	21b,100b
505	.llong	22b,100b
506	.llong	23b,100b
507	.llong	24b,100b
508	.llong	25b,100b
509	.llong	26b,100b
510	.llong	27b,100b
511	.llong	28b,100b
512	.llong	29b,100b
513	.llong	30b,100b
514	.llong	31b,100b
515	.llong	32b,100b
516	.llong	33b,100b
517	.llong	34b,100b
518	.llong	35b,100b
519	.llong	36b,100b
520	.llong	37b,100b
521	.llong	38b,100b
522	.llong	39b,100b
523	.llong	40b,100b
524	.llong	41b,100b
525	.llong	42b,100b
526	.llong	43b,100b
527	.llong	44b,100b
528	.llong	45b,100b
529	.llong	46b,100b
530	.llong	47b,100b
531	.llong	48b,100b
532	.llong	49b,100b
533	.llong	50b,100b
534	.llong	51b,100b
535	.llong	52b,100b
536	.llong	53b,100b
537	.llong	54b,100b
538	.llong	55b,100b
539	.llong	56b,100b
540	.llong	57b,100b
541	.llong	58b,100b
542	.llong	59b,100b
543	.llong	60b,100b
544	.llong	61b,100b
545	.llong	62b,100b
546	.llong	63b,100b
547	.llong	64b,100b
548	.llong	65b,100b
549	.llong	66b,100b
550	.llong	67b,100b
551	.llong	68b,100b
552	.llong	69b,100b
553	.llong	70b,100b
554	.llong	71b,100b
555	.llong	72b,100b
556	.llong	73b,100b
557	.llong	74b,100b
558	.llong	75b,100b
559	.llong	76b,100b
560	.llong	77b,100b
561	.llong	78b,100b
562	.llong	79b,100b
563	.llong	80b,100b
564	.llong	81b,100b
565	.llong	82b,100b
566	.llong	83b,100b
567	.llong	84b,100b
568	.llong	85b,100b
569	.llong	86b,100b
570	.llong	87b,100b
571	.llong	88b,100b
572	.llong	89b,100b
573	.llong	90b,100b
574	.llong	91b,100b
575