1/*	$NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $	*/
2
3/*-
4 * Copyright (c) 2002 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Allen Briggs for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed for the NetBSD Project by
20 *      Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38
39#include "assym.s"
40
41#include <machine/acle-compat.h>
42#include <machine/asm.h>
43#include <sys/errno.h>
44
45.L_arm_memcpy:
46	.word	_C_LABEL(_arm_memcpy)
47.L_min_memcpy_size:
48	.word	_C_LABEL(_min_memcpy_size)
49
50__FBSDID("$FreeBSD: releng/10.3/sys/arm/arm/bcopyinout.S 294681 2016-01-24 19:58:58Z ian $");
51#ifdef _ARM_ARCH_5E
52#include <arm/arm/bcopyinout_xscale.S>
53#else
54
55	.text
56	.align	2
57
58#if __ARM_ARCH >= 6
59#define GET_PCB(tmp) \
60	mrc p15, 0, tmp, c13, c0, 4; \
61	add	tmp, tmp, #(TD_PCB)
62#else
63.Lcurpcb:
64	.word	_C_LABEL(__pcpu) + PC_CURPCB
65
66#define GET_PCB(tmp) \
67	ldr	tmp, .Lcurpcb
68#endif
69
70
71#define SAVE_REGS	stmfd	sp!, {r4-r11}
72#define RESTORE_REGS	ldmfd	sp!, {r4-r11}
73
74#if defined(_ARM_ARCH_5E)
75#define HELLOCPP #
76#define PREFETCH(rx,o)	pld	[ rx , HELLOCPP (o) ]
77#else
78#define PREFETCH(rx,o)
79#endif
80
81/*
82 * r0 = user space address
83 * r1 = kernel space address
84 * r2 = length
85 *
86 * Copies bytes from user space to kernel space
87 *
88 * We save/restore r4-r11:
89 * r4-r11 are scratch
90 */
91ENTRY(copyin)
92	/* Quick exit if length is zero */
93	teq	r2, #0
94	moveq	r0, #0
95	RETeq
96
97	ldr	r3, .L_arm_memcpy
98	ldr	r3, [r3]
99	cmp	r3, #0
100	beq	.Lnormal
101	ldr	r3, .L_min_memcpy_size
102	ldr	r3, [r3]
103	cmp	r2, r3
104	blt	.Lnormal
105	stmfd	sp!, {r0-r2, r4, lr}
106	mov     r3, r0
107	mov     r0, r1
108	mov     r1, r3
109	mov     r3, #2 /* SRC_IS_USER */
110	ldr	r4, .L_arm_memcpy
111	mov	lr, pc
112	ldr	pc, [r4]
113	cmp     r0, #0
114	ldmfd   sp!, {r0-r2, r4, lr}
115	moveq	r0, #0
116	RETeq
117
118.Lnormal:
119	SAVE_REGS
120	GET_PCB(r4)
121	ldr	r4, [r4]
122
123
124	ldr	r5, [r4, #PCB_ONFAULT]
125	adr	r3, .Lcopyfault
126	str	r3, [r4, #PCB_ONFAULT]
127
128	PREFETCH(r0, 0)
129	PREFETCH(r1, 0)
130
131	/*
132	 * If not too many bytes, take the slow path.
133	 */
134	cmp	r2, #0x08
135	blt	.Licleanup
136
137	/*
138	 * Align destination to word boundary.
139	 */
140	and	r6, r1, #0x3
141	ldr	pc, [pc, r6, lsl #2]
142	b	.Lialend
143	.word	.Lialend
144	.word	.Lial3
145	.word	.Lial2
146	.word	.Lial1
147.Lial3:	ldrbt	r6, [r0], #1
148	sub	r2, r2, #1
149	strb	r6, [r1], #1
150.Lial2:	ldrbt	r7, [r0], #1
151	sub	r2, r2, #1
152	strb	r7, [r1], #1
153.Lial1:	ldrbt	r6, [r0], #1
154	sub	r2, r2, #1
155	strb	r6, [r1], #1
156.Lialend:
157
158	/*
159	 * If few bytes left, finish slow.
160	 */
161	cmp	r2, #0x08
162	blt	.Licleanup
163
164	/*
165	 * If source is not aligned, finish slow.
166	 */
167	ands	r3, r0, #0x03
168	bne	.Licleanup
169
170	cmp	r2, #0x60	/* Must be > 0x5f for unrolled cacheline */
171	blt	.Licleanup8
172
173	/*
174	 * Align destination to cacheline boundary.
175	 * If source and destination are nicely aligned, this can be a big
176	 * win.  If not, it's still cheaper to copy in groups of 32 even if
177	 * we don't get the nice cacheline alignment.
178	 */
179	and	r6, r1, #0x1f
180	ldr	pc, [pc, r6]
181	b	.Licaligned
182	.word	.Licaligned
183	.word	.Lical28
184	.word	.Lical24
185	.word	.Lical20
186	.word	.Lical16
187	.word	.Lical12
188	.word	.Lical8
189	.word	.Lical4
190.Lical28:ldrt	r6, [r0], #4
191	sub	r2, r2, #4
192	str	r6, [r1], #4
193.Lical24:ldrt	r7, [r0], #4
194	sub	r2, r2, #4
195	str	r7, [r1], #4
196.Lical20:ldrt	r6, [r0], #4
197	sub	r2, r2, #4
198	str	r6, [r1], #4
199.Lical16:ldrt	r7, [r0], #4
200	sub	r2, r2, #4
201	str	r7, [r1], #4
202.Lical12:ldrt	r6, [r0], #4
203	sub	r2, r2, #4
204	str	r6, [r1], #4
205.Lical8:ldrt	r7, [r0], #4
206	sub	r2, r2, #4
207	str	r7, [r1], #4
208.Lical4:ldrt	r6, [r0], #4
209	sub	r2, r2, #4
210	str	r6, [r1], #4
211
212	/*
213	 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
214	 * part of the code, and we may have knocked that down by as much
215	 * as 0x1c getting aligned).
216	 *
217	 * This loop basically works out to:
218	 * do {
219	 * 	prefetch-next-cacheline(s)
220	 *	bytes -= 0x20;
221	 *	copy cacheline
222	 * } while (bytes >= 0x40);
223	 * bytes -= 0x20;
224	 * copy cacheline
225	 */
226.Licaligned:
227	PREFETCH(r0, 32)
228	PREFETCH(r1, 32)
229
230	sub	r2, r2, #0x20
231
232	/* Copy a cacheline */
233	ldrt	r10, [r0], #4
234	ldrt	r11, [r0], #4
235	ldrt	r6, [r0], #4
236	ldrt	r7, [r0], #4
237	ldrt	r8, [r0], #4
238	ldrt	r9, [r0], #4
239	stmia	r1!, {r10-r11}
240	ldrt	r10, [r0], #4
241	ldrt	r11, [r0], #4
242	stmia	r1!, {r6-r11}
243
244	cmp	r2, #0x40
245	bge	.Licaligned
246
247	sub	r2, r2, #0x20
248
249	/* Copy a cacheline */
250	ldrt	r10, [r0], #4
251	ldrt	r11, [r0], #4
252	ldrt	r6, [r0], #4
253	ldrt	r7, [r0], #4
254	ldrt	r8, [r0], #4
255	ldrt	r9, [r0], #4
256	stmia	r1!, {r10-r11}
257	ldrt	r10, [r0], #4
258	ldrt	r11, [r0], #4
259	stmia	r1!, {r6-r11}
260
261	cmp	r2, #0x08
262	blt	.Liprecleanup
263
264.Licleanup8:
265	ldrt	r8, [r0], #4
266	ldrt	r9, [r0], #4
267	sub	r2, r2, #8
268	stmia	r1!, {r8, r9}
269	cmp	r2, #8
270	bge	.Licleanup8
271
272.Liprecleanup:
273	/*
274	 * If we're done, bail.
275	 */
276	cmp	r2, #0
277	beq	.Lout
278
279.Licleanup:
280	and	r6, r2, #0x3
281	ldr	pc, [pc, r6, lsl #2]
282	b	.Licend
283	.word	.Lic4
284	.word	.Lic1
285	.word	.Lic2
286	.word	.Lic3
287.Lic4:	ldrbt	r6, [r0], #1
288	sub	r2, r2, #1
289	strb	r6, [r1], #1
290.Lic3:	ldrbt	r7, [r0], #1
291	sub	r2, r2, #1
292	strb	r7, [r1], #1
293.Lic2:	ldrbt	r6, [r0], #1
294	sub	r2, r2, #1
295	strb	r6, [r1], #1
296.Lic1:	ldrbt	r7, [r0], #1
297	subs	r2, r2, #1
298	strb	r7, [r1], #1
299.Licend:
300	bne	.Licleanup
301
302.Liout:
303	mov	r0, #0
304
305	str	r5, [r4, #PCB_ONFAULT]
306	RESTORE_REGS
307
308	RET
309
310.Lcopyfault:
311	ldr	r0, =EFAULT
312	str	r5, [r4, #PCB_ONFAULT]
313	RESTORE_REGS
314
315	RET
316END(copyin)
317
318/*
319 * r0 = kernel space address
320 * r1 = user space address
321 * r2 = length
322 *
323 * Copies bytes from kernel space to user space
324 *
325 * We save/restore r4-r11:
326 * r4-r11 are scratch
327 */
328
329ENTRY(copyout)
330	/* Quick exit if length is zero */
331	teq	r2, #0
332	moveq	r0, #0
333	RETeq
334
335	ldr	r3, .L_arm_memcpy
336	ldr	r3, [r3]
337	cmp	r3, #0
338	beq	.Lnormale
339	ldr	r3, .L_min_memcpy_size
340	ldr	r3, [r3]
341	cmp	r2, r3
342	blt	.Lnormale
343	stmfd	sp!, {r0-r2, r4, lr}
344	mov     r3, r0
345	mov     r0, r1
346	mov     r1, r3
347	mov     r3, #1 /* DST_IS_USER */
348	ldr	r4, .L_arm_memcpy
349	mov	lr, pc
350	ldr	pc, [r4]
351	cmp     r0, #0
352	ldmfd   sp!, {r0-r2, r4, lr}
353	moveq	r0, #0
354	RETeq
355
356.Lnormale:
357	SAVE_REGS
358	GET_PCB(r4)
359	ldr	r4, [r4]
360
361	ldr	r5, [r4, #PCB_ONFAULT]
362	adr	r3, .Lcopyfault
363	str	r3, [r4, #PCB_ONFAULT]
364
365	PREFETCH(r0, 0)
366	PREFETCH(r1, 0)
367
368	/*
369	 * If not too many bytes, take the slow path.
370	 */
371	cmp	r2, #0x08
372	blt	.Lcleanup
373
374	/*
375	 * Align destination to word boundary.
376	 */
377	and	r6, r1, #0x3
378	ldr	pc, [pc, r6, lsl #2]
379	b	.Lalend
380	.word	.Lalend
381	.word	.Lal3
382	.word	.Lal2
383	.word	.Lal1
384.Lal3:	ldrb	r6, [r0], #1
385	sub	r2, r2, #1
386	strbt	r6, [r1], #1
387.Lal2:	ldrb	r7, [r0], #1
388	sub	r2, r2, #1
389	strbt	r7, [r1], #1
390.Lal1:	ldrb	r6, [r0], #1
391	sub	r2, r2, #1
392	strbt	r6, [r1], #1
393.Lalend:
394
395	/*
396	 * If few bytes left, finish slow.
397	 */
398	cmp	r2, #0x08
399	blt	.Lcleanup
400
401	/*
402	 * If source is not aligned, finish slow.
403	 */
404	ands	r3, r0, #0x03
405	bne	.Lcleanup
406
407	cmp	r2, #0x60	/* Must be > 0x5f for unrolled cacheline */
408	blt	.Lcleanup8
409
410	/*
411	 * Align source & destination to cacheline boundary.
412	 */
413	and	r6, r1, #0x1f
414	ldr	pc, [pc, r6]
415	b	.Lcaligned
416	.word	.Lcaligned
417	.word	.Lcal28
418	.word	.Lcal24
419	.word	.Lcal20
420	.word	.Lcal16
421	.word	.Lcal12
422	.word	.Lcal8
423	.word	.Lcal4
424.Lcal28:ldr	r6, [r0], #4
425	sub	r2, r2, #4
426	strt	r6, [r1], #4
427.Lcal24:ldr	r7, [r0], #4
428	sub	r2, r2, #4
429	strt	r7, [r1], #4
430.Lcal20:ldr	r6, [r0], #4
431	sub	r2, r2, #4
432	strt	r6, [r1], #4
433.Lcal16:ldr	r7, [r0], #4
434	sub	r2, r2, #4
435	strt	r7, [r1], #4
436.Lcal12:ldr	r6, [r0], #4
437	sub	r2, r2, #4
438	strt	r6, [r1], #4
439.Lcal8:	ldr	r7, [r0], #4
440	sub	r2, r2, #4
441	strt	r7, [r1], #4
442.Lcal4:	ldr	r6, [r0], #4
443	sub	r2, r2, #4
444	strt	r6, [r1], #4
445
446	/*
447	 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
448	 * part of the code, and we may have knocked that down by as much
449	 * as 0x1c getting aligned).
450	 *
451	 * This loop basically works out to:
452	 * do {
453	 * 	prefetch-next-cacheline(s)
454	 *	bytes -= 0x20;
455	 *	copy cacheline
456	 * } while (bytes >= 0x40);
457	 * bytes -= 0x20;
458	 * copy cacheline
459	 */
460.Lcaligned:
461	PREFETCH(r0, 32)
462	PREFETCH(r1, 32)
463
464	sub	r2, r2, #0x20
465
466	/* Copy a cacheline */
467	ldmia	r0!, {r6-r11}
468	strt	r6, [r1], #4
469	strt	r7, [r1], #4
470	ldmia	r0!, {r6-r7}
471	strt	r8, [r1], #4
472	strt	r9, [r1], #4
473	strt	r10, [r1], #4
474	strt	r11, [r1], #4
475	strt	r6, [r1], #4
476	strt	r7, [r1], #4
477
478	cmp	r2, #0x40
479	bge	.Lcaligned
480
481	sub	r2, r2, #0x20
482
483	/* Copy a cacheline */
484	ldmia	r0!, {r6-r11}
485	strt	r6, [r1], #4
486	strt	r7, [r1], #4
487	ldmia	r0!, {r6-r7}
488	strt	r8, [r1], #4
489	strt	r9, [r1], #4
490	strt	r10, [r1], #4
491	strt	r11, [r1], #4
492	strt	r6, [r1], #4
493	strt	r7, [r1], #4
494
495	cmp	r2, #0x08
496	blt	.Lprecleanup
497
498.Lcleanup8:
499	ldmia	r0!, {r8-r9}
500	sub	r2, r2, #8
501	strt	r8, [r1], #4
502	strt	r9, [r1], #4
503	cmp	r2, #8
504	bge	.Lcleanup8
505
506.Lprecleanup:
507	/*
508	 * If we're done, bail.
509	 */
510	cmp	r2, #0
511	beq	.Lout
512
513.Lcleanup:
514	and	r6, r2, #0x3
515	ldr	pc, [pc, r6, lsl #2]
516	b	.Lcend
517	.word	.Lc4
518	.word	.Lc1
519	.word	.Lc2
520	.word	.Lc3
521.Lc4:	ldrb	r6, [r0], #1
522	sub	r2, r2, #1
523	strbt	r6, [r1], #1
524.Lc3:	ldrb	r7, [r0], #1
525	sub	r2, r2, #1
526	strbt	r7, [r1], #1
527.Lc2:	ldrb	r6, [r0], #1
528	sub	r2, r2, #1
529	strbt	r6, [r1], #1
530.Lc1:	ldrb	r7, [r0], #1
531	subs	r2, r2, #1
532	strbt	r7, [r1], #1
533.Lcend:
534	bne	.Lcleanup
535
536.Lout:
537	mov	r0, #0
538
539	str	r5, [r4, #PCB_ONFAULT]
540	RESTORE_REGS
541
542	RET
543END(copyout)
544#endif
545
546/*
547 * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
548 *
549 * Copies a single 8-bit value from src to dest, returning 0 on success,
550 * else EFAULT if a page fault occurred.
551 */
552ENTRY(badaddr_read_1)
553	GET_PCB(r2)
554	ldr	r2, [r2]
555
556	ldr	ip, [r2, #PCB_ONFAULT]
557	adr	r3, 1f
558	str	r3, [r2, #PCB_ONFAULT]
559	nop
560	nop
561	nop
562	ldrb	r3, [r0]
563	nop
564	nop
565	nop
566	strb	r3, [r1]
567	mov	r0, #0		/* No fault */
5681:	str	ip, [r2, #PCB_ONFAULT]
569	RET
570END(badaddr_read_1)
571
572/*
573 * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
574 *
575 * Copies a single 16-bit value from src to dest, returning 0 on success,
576 * else EFAULT if a page fault occurred.
577 */
578ENTRY(badaddr_read_2)
579	GET_PCB(r2)
580	ldr	r2, [r2]
581
582	ldr	ip, [r2, #PCB_ONFAULT]
583	adr	r3, 1f
584	str	r3, [r2, #PCB_ONFAULT]
585	nop
586	nop
587	nop
588	ldrh	r3, [r0]
589	nop
590	nop
591	nop
592	strh	r3, [r1]
593	mov	r0, #0		/* No fault */
5941:	str	ip, [r2, #PCB_ONFAULT]
595	RET
596END(badaddr_read_2)
597
598/*
599 * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
600 *
601 * Copies a single 32-bit value from src to dest, returning 0 on success,
602 * else EFAULT if a page fault occurred.
603 */
604ENTRY(badaddr_read_4)
605	GET_PCB(r2)
606	ldr	r2, [r2]
607
608	ldr	ip, [r2, #PCB_ONFAULT]
609	adr	r3, 1f
610	str	r3, [r2, #PCB_ONFAULT]
611	nop
612	nop
613	nop
614	ldr	r3, [r0]
615	nop
616	nop
617	nop
618	str	r3, [r1]
619	mov	r0, #0		/* No fault */
6201:	str	ip, [r2, #PCB_ONFAULT]
621	RET
622END(badaddr_read_4)
623
624