1/*	$NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $	*/
2
3/*-
4 * Copyright (c) 2002 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Allen Briggs for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed for the NetBSD Project by
20 *      Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38
39#include "assym.s"
40
41#include <machine/asm.h>
42#include <sys/errno.h>
43
44.L_arm_memcpy:
45	.word	_C_LABEL(_arm_memcpy)
46.L_min_memcpy_size:
47	.word	_C_LABEL(_min_memcpy_size)
48
49__FBSDID("$FreeBSD$");
50#ifdef _ARM_ARCH_5E
51#include <arm/arm/bcopyinout_xscale.S>
52#else
53
54	.text
55	.align	0
56
57#ifdef _ARM_ARCH_6
58#define GET_PCB(tmp) \
59	mrc p15, 0, tmp, c13, c0, 4; \
60	add	tmp, tmp, #(PC_CURPCB)
61#else
62.Lcurpcb:
63	.word	_C_LABEL(__pcpu) + PC_CURPCB
64
65#define GET_PCB(tmp) \
66	ldr	tmp, .Lcurpcb
67#endif
68
69
70#define SAVE_REGS	stmfd	sp!, {r4-r11}
71#define RESTORE_REGS	ldmfd	sp!, {r4-r11}
72
73#if defined(_ARM_ARCH_5E)
74#define HELLOCPP #
75#define PREFETCH(rx,o)	pld	[ rx , HELLOCPP (o) ]
76#else
77#define PREFETCH(rx,o)
78#endif
79
80/*
81 * r0 = user space address
82 * r1 = kernel space address
83 * r2 = length
84 *
85 * Copies bytes from user space to kernel space
86 *
87 * We save/restore r4-r11:
88 * r4-r11 are scratch
89 */
90ENTRY(copyin)
91	/* Quick exit if length is zero */
92	teq	r2, #0
93	moveq	r0, #0
94	RETeq
95
96	ldr	r3, .L_arm_memcpy
97	ldr	r3, [r3]
98	cmp	r3, #0
99	beq	.Lnormal
100	ldr	r3, .L_min_memcpy_size
101	ldr	r3, [r3]
102	cmp	r2, r3
103	blt	.Lnormal
104	stmfd	sp!, {r0-r2, r4, lr}
105	mov     r3, r0
106	mov     r0, r1
107	mov     r1, r3
108	mov     r3, #2 /* SRC_IS_USER */
109	ldr	r4, .L_arm_memcpy
110	mov	lr, pc
111	ldr	pc, [r4]
112	cmp     r0, #0
113	ldmfd   sp!, {r0-r2, r4, lr}
114	moveq	r0, #0
115	RETeq
116
117.Lnormal:
118	SAVE_REGS
119	GET_PCB(r4)
120	ldr	r4, [r4]
121
122
123	ldr	r5, [r4, #PCB_ONFAULT]
124	adr	r3, .Lcopyfault
125	str	r3, [r4, #PCB_ONFAULT]
126
127	PREFETCH(r0, 0)
128	PREFETCH(r1, 0)
129
130	/*
131	 * If not too many bytes, take the slow path.
132	 */
133	cmp	r2, #0x08
134	blt	.Licleanup
135
136	/*
137	 * Align destination to word boundary.
138	 */
139	and	r6, r1, #0x3
140	ldr	pc, [pc, r6, lsl #2]
141	b	.Lialend
142	.word	.Lialend
143	.word	.Lial3
144	.word	.Lial2
145	.word	.Lial1
146.Lial3:	ldrbt	r6, [r0], #1
147	sub	r2, r2, #1
148	strb	r6, [r1], #1
149.Lial2:	ldrbt	r7, [r0], #1
150	sub	r2, r2, #1
151	strb	r7, [r1], #1
152.Lial1:	ldrbt	r6, [r0], #1
153	sub	r2, r2, #1
154	strb	r6, [r1], #1
155.Lialend:
156
157	/*
158	 * If few bytes left, finish slow.
159	 */
160	cmp	r2, #0x08
161	blt	.Licleanup
162
163	/*
164	 * If source is not aligned, finish slow.
165	 */
166	ands	r3, r0, #0x03
167	bne	.Licleanup
168
169	cmp	r2, #0x60	/* Must be > 0x5f for unrolled cacheline */
170	blt	.Licleanup8
171
172	/*
173	 * Align destination to cacheline boundary.
174	 * If source and destination are nicely aligned, this can be a big
175	 * win.  If not, it's still cheaper to copy in groups of 32 even if
176	 * we don't get the nice cacheline alignment.
177	 */
178	and	r6, r1, #0x1f
179	ldr	pc, [pc, r6]
180	b	.Licaligned
181	.word	.Licaligned
182	.word	.Lical28
183	.word	.Lical24
184	.word	.Lical20
185	.word	.Lical16
186	.word	.Lical12
187	.word	.Lical8
188	.word	.Lical4
189.Lical28:ldrt	r6, [r0], #4
190	sub	r2, r2, #4
191	str	r6, [r1], #4
192.Lical24:ldrt	r7, [r0], #4
193	sub	r2, r2, #4
194	str	r7, [r1], #4
195.Lical20:ldrt	r6, [r0], #4
196	sub	r2, r2, #4
197	str	r6, [r1], #4
198.Lical16:ldrt	r7, [r0], #4
199	sub	r2, r2, #4
200	str	r7, [r1], #4
201.Lical12:ldrt	r6, [r0], #4
202	sub	r2, r2, #4
203	str	r6, [r1], #4
204.Lical8:ldrt	r7, [r0], #4
205	sub	r2, r2, #4
206	str	r7, [r1], #4
207.Lical4:ldrt	r6, [r0], #4
208	sub	r2, r2, #4
209	str	r6, [r1], #4
210
211	/*
212	 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
213	 * part of the code, and we may have knocked that down by as much
214	 * as 0x1c getting aligned).
215	 *
216	 * This loop basically works out to:
217	 * do {
218	 * 	prefetch-next-cacheline(s)
219	 *	bytes -= 0x20;
220	 *	copy cacheline
221	 * } while (bytes >= 0x40);
222	 * bytes -= 0x20;
223	 * copy cacheline
224	 */
225.Licaligned:
226	PREFETCH(r0, 32)
227	PREFETCH(r1, 32)
228
229	sub	r2, r2, #0x20
230
231	/* Copy a cacheline */
232	ldrt	r10, [r0], #4
233	ldrt	r11, [r0], #4
234	ldrt	r6, [r0], #4
235	ldrt	r7, [r0], #4
236	ldrt	r8, [r0], #4
237	ldrt	r9, [r0], #4
238	stmia	r1!, {r10-r11}
239	ldrt	r10, [r0], #4
240	ldrt	r11, [r0], #4
241	stmia	r1!, {r6-r11}
242
243	cmp	r2, #0x40
244	bge	.Licaligned
245
246	sub	r2, r2, #0x20
247
248	/* Copy a cacheline */
249	ldrt	r10, [r0], #4
250	ldrt	r11, [r0], #4
251	ldrt	r6, [r0], #4
252	ldrt	r7, [r0], #4
253	ldrt	r8, [r0], #4
254	ldrt	r9, [r0], #4
255	stmia	r1!, {r10-r11}
256	ldrt	r10, [r0], #4
257	ldrt	r11, [r0], #4
258	stmia	r1!, {r6-r11}
259
260	cmp	r2, #0x08
261	blt	.Liprecleanup
262
263.Licleanup8:
264	ldrt	r8, [r0], #4
265	ldrt	r9, [r0], #4
266	sub	r2, r2, #8
267	stmia	r1!, {r8, r9}
268	cmp	r2, #8
269	bge	.Licleanup8
270
271.Liprecleanup:
272	/*
273	 * If we're done, bail.
274	 */
275	cmp	r2, #0
276	beq	.Lout
277
278.Licleanup:
279	and	r6, r2, #0x3
280	ldr	pc, [pc, r6, lsl #2]
281	b	.Licend
282	.word	.Lic4
283	.word	.Lic1
284	.word	.Lic2
285	.word	.Lic3
286.Lic4:	ldrbt	r6, [r0], #1
287	sub	r2, r2, #1
288	strb	r6, [r1], #1
289.Lic3:	ldrbt	r7, [r0], #1
290	sub	r2, r2, #1
291	strb	r7, [r1], #1
292.Lic2:	ldrbt	r6, [r0], #1
293	sub	r2, r2, #1
294	strb	r6, [r1], #1
295.Lic1:	ldrbt	r7, [r0], #1
296	subs	r2, r2, #1
297	strb	r7, [r1], #1
298.Licend:
299	bne	.Licleanup
300
301.Liout:
302	mov	r0, #0
303
304	str	r5, [r4, #PCB_ONFAULT]
305	RESTORE_REGS
306
307	RET
308
309.Lcopyfault:
310	ldr	r0, =EFAULT
311	str	r5, [r4, #PCB_ONFAULT]
312	RESTORE_REGS
313
314	RET
315END(copyin)
316
317/*
318 * r0 = kernel space address
319 * r1 = user space address
320 * r2 = length
321 *
322 * Copies bytes from kernel space to user space
323 *
324 * We save/restore r4-r11:
325 * r4-r11 are scratch
326 */
327
328ENTRY(copyout)
329	/* Quick exit if length is zero */
330	teq	r2, #0
331	moveq	r0, #0
332	RETeq
333
334	ldr	r3, .L_arm_memcpy
335	ldr	r3, [r3]
336	cmp	r3, #0
337	beq	.Lnormale
338	ldr	r3, .L_min_memcpy_size
339	ldr	r3, [r3]
340	cmp	r2, r3
341	blt	.Lnormale
342	stmfd	sp!, {r0-r2, r4, lr}
343	mov     r3, r0
344	mov     r0, r1
345	mov     r1, r3
346	mov     r3, #1 /* DST_IS_USER */
347	ldr	r4, .L_arm_memcpy
348	mov	lr, pc
349	ldr	pc, [r4]
350	cmp     r0, #0
351	ldmfd   sp!, {r0-r2, r4, lr}
352	moveq	r0, #0
353	RETeq
354
355.Lnormale:
356	SAVE_REGS
357	GET_PCB(r4)
358	ldr	r4, [r4]
359
360	ldr	r5, [r4, #PCB_ONFAULT]
361	adr	r3, .Lcopyfault
362	str	r3, [r4, #PCB_ONFAULT]
363
364	PREFETCH(r0, 0)
365	PREFETCH(r1, 0)
366
367	/*
368	 * If not too many bytes, take the slow path.
369	 */
370	cmp	r2, #0x08
371	blt	.Lcleanup
372
373	/*
374	 * Align destination to word boundary.
375	 */
376	and	r6, r1, #0x3
377	ldr	pc, [pc, r6, lsl #2]
378	b	.Lalend
379	.word	.Lalend
380	.word	.Lal3
381	.word	.Lal2
382	.word	.Lal1
383.Lal3:	ldrb	r6, [r0], #1
384	sub	r2, r2, #1
385	strbt	r6, [r1], #1
386.Lal2:	ldrb	r7, [r0], #1
387	sub	r2, r2, #1
388	strbt	r7, [r1], #1
389.Lal1:	ldrb	r6, [r0], #1
390	sub	r2, r2, #1
391	strbt	r6, [r1], #1
392.Lalend:
393
394	/*
395	 * If few bytes left, finish slow.
396	 */
397	cmp	r2, #0x08
398	blt	.Lcleanup
399
400	/*
401	 * If source is not aligned, finish slow.
402	 */
403	ands	r3, r0, #0x03
404	bne	.Lcleanup
405
406	cmp	r2, #0x60	/* Must be > 0x5f for unrolled cacheline */
407	blt	.Lcleanup8
408
409	/*
410	 * Align source & destination to cacheline boundary.
411	 */
412	and	r6, r1, #0x1f
413	ldr	pc, [pc, r6]
414	b	.Lcaligned
415	.word	.Lcaligned
416	.word	.Lcal28
417	.word	.Lcal24
418	.word	.Lcal20
419	.word	.Lcal16
420	.word	.Lcal12
421	.word	.Lcal8
422	.word	.Lcal4
423.Lcal28:ldr	r6, [r0], #4
424	sub	r2, r2, #4
425	strt	r6, [r1], #4
426.Lcal24:ldr	r7, [r0], #4
427	sub	r2, r2, #4
428	strt	r7, [r1], #4
429.Lcal20:ldr	r6, [r0], #4
430	sub	r2, r2, #4
431	strt	r6, [r1], #4
432.Lcal16:ldr	r7, [r0], #4
433	sub	r2, r2, #4
434	strt	r7, [r1], #4
435.Lcal12:ldr	r6, [r0], #4
436	sub	r2, r2, #4
437	strt	r6, [r1], #4
438.Lcal8:	ldr	r7, [r0], #4
439	sub	r2, r2, #4
440	strt	r7, [r1], #4
441.Lcal4:	ldr	r6, [r0], #4
442	sub	r2, r2, #4
443	strt	r6, [r1], #4
444
445	/*
446	 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
447	 * part of the code, and we may have knocked that down by as much
448	 * as 0x1c getting aligned).
449	 *
450	 * This loop basically works out to:
451	 * do {
452	 * 	prefetch-next-cacheline(s)
453	 *	bytes -= 0x20;
454	 *	copy cacheline
455	 * } while (bytes >= 0x40);
456	 * bytes -= 0x20;
457	 * copy cacheline
458	 */
459.Lcaligned:
460	PREFETCH(r0, 32)
461	PREFETCH(r1, 32)
462
463	sub	r2, r2, #0x20
464
465	/* Copy a cacheline */
466	ldmia	r0!, {r6-r11}
467	strt	r6, [r1], #4
468	strt	r7, [r1], #4
469	ldmia	r0!, {r6-r7}
470	strt	r8, [r1], #4
471	strt	r9, [r1], #4
472	strt	r10, [r1], #4
473	strt	r11, [r1], #4
474	strt	r6, [r1], #4
475	strt	r7, [r1], #4
476
477	cmp	r2, #0x40
478	bge	.Lcaligned
479
480	sub	r2, r2, #0x20
481
482	/* Copy a cacheline */
483	ldmia	r0!, {r6-r11}
484	strt	r6, [r1], #4
485	strt	r7, [r1], #4
486	ldmia	r0!, {r6-r7}
487	strt	r8, [r1], #4
488	strt	r9, [r1], #4
489	strt	r10, [r1], #4
490	strt	r11, [r1], #4
491	strt	r6, [r1], #4
492	strt	r7, [r1], #4
493
494	cmp	r2, #0x08
495	blt	.Lprecleanup
496
497.Lcleanup8:
498	ldmia	r0!, {r8-r9}
499	sub	r2, r2, #8
500	strt	r8, [r1], #4
501	strt	r9, [r1], #4
502	cmp	r2, #8
503	bge	.Lcleanup8
504
505.Lprecleanup:
506	/*
507	 * If we're done, bail.
508	 */
509	cmp	r2, #0
510	beq	.Lout
511
512.Lcleanup:
513	and	r6, r2, #0x3
514	ldr	pc, [pc, r6, lsl #2]
515	b	.Lcend
516	.word	.Lc4
517	.word	.Lc1
518	.word	.Lc2
519	.word	.Lc3
520.Lc4:	ldrb	r6, [r0], #1
521	sub	r2, r2, #1
522	strbt	r6, [r1], #1
523.Lc3:	ldrb	r7, [r0], #1
524	sub	r2, r2, #1
525	strbt	r7, [r1], #1
526.Lc2:	ldrb	r6, [r0], #1
527	sub	r2, r2, #1
528	strbt	r6, [r1], #1
529.Lc1:	ldrb	r7, [r0], #1
530	subs	r2, r2, #1
531	strbt	r7, [r1], #1
532.Lcend:
533	bne	.Lcleanup
534
535.Lout:
536	mov	r0, #0
537
538	str	r5, [r4, #PCB_ONFAULT]
539	RESTORE_REGS
540
541	RET
542END(copyout)
543#endif
544
545/*
546 * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
547 *
548 * Copies a single 8-bit value from src to dest, returning 0 on success,
549 * else EFAULT if a page fault occurred.
550 */
551ENTRY(badaddr_read_1)
552	GET_PCB(r2)
553	ldr	r2, [r2]
554
555	ldr	ip, [r2, #PCB_ONFAULT]
556	adr	r3, 1f
557	str	r3, [r2, #PCB_ONFAULT]
558	nop
559	nop
560	nop
561	ldrb	r3, [r0]
562	nop
563	nop
564	nop
565	strb	r3, [r1]
566	mov	r0, #0		/* No fault */
5671:	str	ip, [r2, #PCB_ONFAULT]
568	RET
569END(badaddr_read_1)
570
571/*
572 * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
573 *
574 * Copies a single 16-bit value from src to dest, returning 0 on success,
575 * else EFAULT if a page fault occurred.
576 */
577ENTRY(badaddr_read_2)
578	GET_PCB(r2)
579	ldr	r2, [r2]
580
581	ldr	ip, [r2, #PCB_ONFAULT]
582	adr	r3, 1f
583	str	r3, [r2, #PCB_ONFAULT]
584	nop
585	nop
586	nop
587	ldrh	r3, [r0]
588	nop
589	nop
590	nop
591	strh	r3, [r1]
592	mov	r0, #0		/* No fault */
5931:	str	ip, [r2, #PCB_ONFAULT]
594	RET
595END(badaddr_read_2)
596
597/*
598 * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
599 *
600 * Copies a single 32-bit value from src to dest, returning 0 on success,
601 * else EFAULT if a page fault occurred.
602 */
603ENTRY(badaddr_read_4)
604	GET_PCB(r2)
605	ldr	r2, [r2]
606
607	ldr	ip, [r2, #PCB_ONFAULT]
608	adr	r3, 1f
609	str	r3, [r2, #PCB_ONFAULT]
610	nop
611	nop
612	nop
613	ldr	r3, [r0]
614	nop
615	nop
616	nop
617	str	r3, [r1]
618	mov	r0, #0		/* No fault */
6191:	str	ip, [r2, #PCB_ONFAULT]
620	RET
621END(badaddr_read_4)
622
623