1/*	$NetBSD: copy.S,v 1.35 2022/12/18 07:53:30 skrll Exp $	*/
2
3/*
4 * Copyright (c) 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Frank van der Linden for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed for the NetBSD Project by
20 *      Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include "assym.h"
39
40#include <sys/errno.h>
41#include <sys/syscall.h>
42
43#include <machine/asm.h>
44#include <machine/frameasm.h>
45
46#define GET_CURPCB(reg)	\
47	movq	CPUVAR(CURLWP),reg; \
48	movq	L_PCB(reg),reg
49
50/*
51 * These are arranged so that the abnormal case is a forwards
52 * conditional branch - which will be predicted not-taken by
53 * both Intel and AMD processors.
54 */
55#define DEFERRED_SWITCH_CHECK \
56	CHECK_DEFERRED_SWITCH			; \
57	jnz	99f				; \
5898:
59
60#define DEFERRED_SWITCH_CALL \
6199:						; \
62	call	_C_LABEL(do_pmap_load)		; \
63	jmp	98b
64
65/*
66 * The following primitives are to copy regions of memory.
67 * Label must be before all copy functions.
68 */
69	.text
70
71x86_copyfunc_start:	.globl	x86_copyfunc_start
72
73/*
74 * Handle deferred pmap switch.  We must re-enable preemption without
75 * making a function call, so that the program counter is visible to
76 * cpu_kpreempt_exit().  It can then know if it needs to restore the
77 * pmap on returning, because a preemption occurred within one of the
78 * copy functions.
79 */
80ENTRY(do_pmap_load)
81	pushq	%rbp
82	movq	%rsp,%rbp
83	pushq	%rdi
84	pushq	%rsi
85	pushq	%rdx
86	pushq	%rcx
87	pushq	%rbx
88	movq	CPUVAR(CURLWP),%rbx
891:
90	incl	L_NOPREEMPT(%rbx)
91	call	_C_LABEL(pmap_load)
92	decl	L_NOPREEMPT(%rbx)
93	jnz	2f
94	cmpl	$0,L_DOPREEMPT(%rbx)
95	jz	2f
96	xorq	%rdi,%rdi
97	call	_C_LABEL(kpreempt)
982:
99	cmpl	$0,CPUVAR(WANT_PMAPLOAD)
100	jnz	1b
101	popq	%rbx
102	popq	%rcx
103	popq	%rdx
104	popq	%rsi
105	popq	%rdi
106	leaveq
107	ret
108END(do_pmap_load)
109
110/*
111 * Copy routines from and to userland, plus a few more. See the
112 * section 9 manpages for info. Some cases can be optimized more.
113 *
114 * I wonder if it's worthwhile to make these use SSE2 registers?
115 * (dsl) Not from info I've read from the AMD guides.
116 *
117 * Also note that the setup time for 'rep movs' is horrid - especially on P4
118 * netburst - but on my AMD X2 it manages one copy (read+write) per clock
119 * which can be achieved with a code loop, but is probably impossible to beat.
120 * However the use of 'rep movsb' for the final bytes should be killed.
121 *
122 * Newer Intel cpus have a much lower setup time, and may (someday)
123 * be able to do cache-line size copies....
124 */
125
126/*
127 * int kcopy(const void *from, void *to, size_t len);
128 * Copy len bytes from and to kernel memory, and abort on fault.
129 */
130ENTRY(kcopy)
131	xchgq	%rdi,%rsi
132	movq	%rdx,%rcx
133.Lkcopy_start:
134	movq	%rdi,%rax
135	subq	%rsi,%rax
136	cmpq	%rcx,%rax		/* overlapping? */
137	jb	1f
138	/* nope, copy forward */
139	shrq	$3,%rcx			/* copy by 64-bit words */
140	rep
141	movsq
142
143	movq	%rdx,%rcx
144	andl	$7,%ecx			/* any bytes left? */
145	rep
146	movsb
147
148	xorq	%rax,%rax
149	ret
150
151/*
152 * Using 'rep movs' to copy backwards is not as fast as for forwards copies
153 * and ought not be done when the copy doesn't acually overlap.
154 * However kcopy() isn't used any that looks even vaguely used often.
155 * I'm also not sure it is ever asked to do overlapping copies!
156 */
157
1581:	addq	%rcx,%rdi		/* copy backward */
159	addq	%rcx,%rsi
160	std
161	andq	$7,%rcx			/* any fractional bytes? */
162	decq	%rdi
163	decq	%rsi
164	rep
165	movsb
166	movq	%rdx,%rcx		/* copy remainder by 64-bit words */
167	shrq	$3,%rcx
168	subq	$7,%rsi
169	subq	$7,%rdi
170	rep
171	movsq
172	cld
173.Lkcopy_end:
174	xorq	%rax,%rax
175	ret
176END(kcopy)
177
178ENTRY(copyout)
179	DEFERRED_SWITCH_CHECK
180
181	xchgq	%rdi,%rsi		/* kernel address to %rsi, user to %rdi */
182	movq	%rdx,%rax		/* save transfer length (bytes) */
183
184	addq	%rdi,%rdx		/* end address to %rdx */
185	jc	_C_LABEL(copy_efault)	/* jump if wraps */
186	movq	$VM_MAXUSER_ADDRESS,%r8
187	cmpq	%r8,%rdx
188	ja	_C_LABEL(copy_efault)	/* jump if end in kernel space */
189
190	SMAP_DISABLE
191.Lcopyout_start:
192	movq	%rax,%rcx		/* length */
193	shrq	$3,%rcx			/* count of 8-byte words */
194	rep
195	movsq				/* copy from %rsi to %rdi */
196	movb	%al,%cl
197	andb	$7,%cl			/* remaining number of bytes */
198	rep
199	movsb				/* copy remaining bytes */
200.Lcopyout_end:
201	SMAP_ENABLE
202
203	xorl	%eax,%eax
204	ret
205	DEFERRED_SWITCH_CALL
206END(copyout)
207
208ENTRY(copyin)
209	DEFERRED_SWITCH_CHECK
210
211	xchgq	%rdi,%rsi
212	movq	%rdx,%rax
213
214	addq	%rsi,%rdx		/* check source address not wrapped */
215	jc	_C_LABEL(copy_efault)
216	movq	$VM_MAXUSER_ADDRESS,%r8
217	cmpq	%r8,%rdx
218	ja	_C_LABEL(copy_efault)	/* j if end in kernel space */
219
220	SMAP_DISABLE
221.Lcopyin_start:
2223:	/* bcopy(%rsi, %rdi, %rax); */
223	movq	%rax,%rcx
224	shrq	$3,%rcx
225	rep
226	movsq
227	movb	%al,%cl
228	andb	$7,%cl
229	rep
230	movsb
231.Lcopyin_end:
232	SMAP_ENABLE
233
234	xorl	%eax,%eax
235	ret
236	DEFERRED_SWITCH_CALL
237END(copyin)
238
239ENTRY(copy_efault)
240	movq	$EFAULT,%rax
241	ret
242END(copy_efault)
243
244ENTRY(kcopy_fault)
245	cld
246	ret
247END(kcopy_fault)
248
249ENTRY(copy_fault)
250	SMAP_ENABLE
251	ret
252END(copy_fault)
253
254ENTRY(copyoutstr)
255	DEFERRED_SWITCH_CHECK
256	xchgq	%rdi,%rsi
257	movq	%rdx,%r8
258	movq	%rcx,%r9
259
260	/*
261	 * Get min(%rdx, VM_MAXUSER_ADDRESS-%rdi).
262	 */
263	movq	$VM_MAXUSER_ADDRESS,%rax
264	subq	%rdi,%rax
265	jc	_C_LABEL(copystr_efault)
266	cmpq	%rdx,%rax
267	jae	1f
268	movq	%rax,%rdx
269	movq	%rax,%r8
2701:	incq	%rdx
271
272	SMAP_DISABLE
273.Lcopyoutstr_start:
2741:	decq	%rdx
275	jz	2f
276	lodsb
277	stosb
278	testb	%al,%al
279	jnz	1b
280.Lcopyoutstr_end:
281	SMAP_ENABLE
282
283	/* Success -- 0 byte reached. */
284	decq	%rdx
285	xorq	%rax,%rax
286	jmp	copystr_return
287
2882:	/* rdx is zero -- return EFAULT or ENAMETOOLONG. */
289	SMAP_ENABLE
290	movq	$VM_MAXUSER_ADDRESS,%r11
291	cmpq	%r11,%rdi
292	jae	_C_LABEL(copystr_efault)
293	movq	$ENAMETOOLONG,%rax
294	jmp	copystr_return
295	DEFERRED_SWITCH_CALL
296END(copyoutstr)
297
298ENTRY(copyinstr)
299	DEFERRED_SWITCH_CHECK
300	xchgq	%rdi,%rsi
301	movq	%rdx,%r8
302	movq	%rcx,%r9
303
304	/*
305	 * Get min(%rdx, VM_MAXUSER_ADDRESS-%rsi).
306	 */
307	movq	$VM_MAXUSER_ADDRESS,%rax
308	subq	%rsi,%rax
309	jc	_C_LABEL(copystr_efault)
310	cmpq	%rdx,%rax
311	jae	1f
312	movq	%rax,%rdx
313	movq	%rax,%r8
3141:	incq	%rdx
315
316	SMAP_DISABLE
317.Lcopyinstr_start:
3181:	decq	%rdx
319	jz	2f
320	lodsb
321	stosb
322	testb	%al,%al
323	jnz	1b
324.Lcopyinstr_end:
325	SMAP_ENABLE
326
327	/* Success -- 0 byte reached. */
328	decq	%rdx
329	xorq	%rax,%rax
330	jmp	copystr_return
331
3322:	/* rdx is zero -- return EFAULT or ENAMETOOLONG. */
333	SMAP_ENABLE
334	movq	$VM_MAXUSER_ADDRESS,%r11
335	cmpq	%r11,%rsi
336	jae	_C_LABEL(copystr_efault)
337	movq	$ENAMETOOLONG,%rax
338	jmp	copystr_return
339	DEFERRED_SWITCH_CALL
340END(copyinstr)
341
342ENTRY(copystr_efault)
343	movl	$EFAULT,%eax
344	jmp	copystr_return
345END(copystr_efault)
346
347ENTRY(copystr_fault)
348	SMAP_ENABLE
349copystr_return:
350	/* Set *lencopied and return %eax. */
351	testq	%r9,%r9
352	jz	8f
353	subq	%rdx,%r8
354	movq	%r8,(%r9)
3558:	ret
356END(copystr_fault)
357
358/**************************************************************************/
359
360#define	UFETCHSTORE_PROLOGUE(x)						\
361	movq	$VM_MAXUSER_ADDRESS-x,%r11			;	\
362	cmpq	%r11,%rdi					;	\
363	ja	_C_LABEL(copy_efault)
364
365/* LINTSTUB: int _ufetch_8(const uint8_t *uaddr, uint8_t *valp); */
366ENTRY(_ufetch_8)
367	DEFERRED_SWITCH_CHECK
368	UFETCHSTORE_PROLOGUE(1)
369
370	SMAP_DISABLE
371.L_ufetch_8_start:
372	movb	(%rdi),%al
373.L_ufetch_8_end:
374	SMAP_ENABLE
375
376	movb	%al,(%rsi)
377	xorq	%rax,%rax
378	ret
379	DEFERRED_SWITCH_CALL
380END(_ufetch_8)
381
382/* LINTSTUB: int _ufetch_16(const uint16_t *uaddr, uint16_t *valp); */
383ENTRY(_ufetch_16)
384	DEFERRED_SWITCH_CHECK
385	UFETCHSTORE_PROLOGUE(2)
386
387	SMAP_DISABLE
388.L_ufetch_16_start:
389	movw	(%rdi),%ax
390.L_ufetch_16_end:
391	SMAP_ENABLE
392
393	movw	%ax,(%rsi)
394	xorq	%rax,%rax
395	ret
396	DEFERRED_SWITCH_CALL
397END(_ufetch_16)
398
399/* LINTSTUB: int _ufetch_32(const uint32_t *uaddr, uint32_t *valp); */
400ENTRY(_ufetch_32)
401	DEFERRED_SWITCH_CHECK
402	UFETCHSTORE_PROLOGUE(4)
403
404	SMAP_DISABLE
405.L_ufetch_32_start:
406	movl	(%rdi),%eax
407.L_ufetch_32_end:
408	SMAP_ENABLE
409
410	movl	%eax,(%rsi)
411	xorq	%rax,%rax
412	ret
413	DEFERRED_SWITCH_CALL
414END(_ufetch_32)
415
416/* LINTSTUB: int _ufetch_64(const uint64_t *uaddr, uint64_t *valp); */
417ENTRY(_ufetch_64)
418	DEFERRED_SWITCH_CHECK
419	UFETCHSTORE_PROLOGUE(8)
420
421	SMAP_DISABLE
422.L_ufetch_64_start:
423	movq	(%rdi),%rax
424.L_ufetch_64_end:
425	SMAP_ENABLE
426
427	movq	%rax,(%rsi)
428	xorq	%rax,%rax
429	ret
430	DEFERRED_SWITCH_CALL
431END(_ufetch_64)
432
433/* LINTSTUB: int _ustore_8(uint8_t *uaddr, uint8_t val); */
434ENTRY(_ustore_8)
435	DEFERRED_SWITCH_CHECK
436	UFETCHSTORE_PROLOGUE(1)
437
438	SMAP_DISABLE
439.L_ustore_8_start:
440	movb	%sil,(%rdi)
441.L_ustore_8_end:
442	SMAP_ENABLE
443
444	xorq	%rax,%rax
445	ret
446	DEFERRED_SWITCH_CALL
447END(_ustore_8)
448
449/* LINTSTUB: int _ustore_16(uint16_t *uaddr, uint16_t val); */
450ENTRY(_ustore_16)
451	DEFERRED_SWITCH_CHECK
452	UFETCHSTORE_PROLOGUE(2)
453
454	SMAP_DISABLE
455.L_ustore_16_start:
456	movw	%si,(%rdi)
457.L_ustore_16_end:
458	SMAP_ENABLE
459
460	xorq	%rax,%rax
461	ret
462	DEFERRED_SWITCH_CALL
463END(_ustore_16)
464
465/* LINTSTUB: int _ustore_32(uint32_t *uaddr, uint32_t val); */
466ENTRY(_ustore_32)
467	DEFERRED_SWITCH_CHECK
468	UFETCHSTORE_PROLOGUE(4)
469
470	SMAP_DISABLE
471.L_ustore_32_start:
472	movl	%esi,(%rdi)
473.L_ustore_32_end:
474	SMAP_ENABLE
475
476	xorq	%rax,%rax
477	ret
478	DEFERRED_SWITCH_CALL
479END(_ustore_32)
480
481/* LINTSTUB: int _ustore_64(uint64_t *uaddr, uint64_t val); */
482ENTRY(_ustore_64)
483	DEFERRED_SWITCH_CHECK
484	UFETCHSTORE_PROLOGUE(8)
485
486	SMAP_DISABLE
487.L_ustore_64_start:
488	movq	%rsi,(%rdi)
489.L_ustore_64_end:
490	SMAP_ENABLE
491
492	xorq	%rax,%rax
493	ret
494	DEFERRED_SWITCH_CALL
495END(_ustore_64)
496
497/**************************************************************************/
498
499/*
500 * Compare-and-swap the 64-bit integer in the user-space.
501 *
502 * int	_ucas_64(volatile uint64_t *uptr, uint64_t old, uint64_t new,
503 *		 uint64_t *ret);
504 */
505ENTRY(_ucas_64)
506	DEFERRED_SWITCH_CHECK
507	/* Fail if kernel-space */
508	movq	$VM_MAXUSER_ADDRESS-8,%r8
509	cmpq	%r8,%rdi
510	ja	_C_LABEL(ucas_efault)
511	movq	%rsi,%rax
512
513	SMAP_DISABLE
514.Lucas64_start:
515	/* Perform the CAS */
516	lock
517	cmpxchgq %rdx,(%rdi)
518.Lucas64_end:
519	SMAP_ENABLE
520
521	/*
522	 * Note: %rax is "old" value.
523	 * Set the return values.
524	 */
525	movq	%rax,(%rcx)
526	xorq	%rax,%rax
527	ret
528	DEFERRED_SWITCH_CALL
529END(_ucas_64)
530
531/*
532 * int	_ucas_32(volatile uint32_t *uptr, uint32_t old, uint32_t new,
533 *		 uint32_t *ret);
534 */
535ENTRY(_ucas_32)
536	DEFERRED_SWITCH_CHECK
537	/* Fail if kernel-space */
538	movq	$VM_MAXUSER_ADDRESS-4,%r8
539	cmpq	%r8,%rdi
540	ja	_C_LABEL(ucas_efault)
541	movl	%esi,%eax
542
543	SMAP_DISABLE
544.Lucas32_start:
545	/* Perform the CAS */
546	lock
547	cmpxchgl %edx,(%rdi)
548.Lucas32_end:
549	SMAP_ENABLE
550
551	/*
552	 * Note: %eax is "old" value.
553	 * Set the return values.
554	 */
555	movl	%eax,(%rcx)
556	xorq	%rax,%rax
557	ret
558	DEFERRED_SWITCH_CALL
559END(_ucas_32)
560
561ENTRY(ucas_efault)
562	movq	$EFAULT,%rax
563	ret
564END(ucas_efault)
565
566ENTRY(ucas_fault)
567	SMAP_ENABLE
568	ret
569END(ucas_fault)
570
571/*
572 * Label must be after all copy functions.
573 */
574x86_copyfunc_end:	.globl	x86_copyfunc_end
575
576/*
577 * Fault table of copy functions for trap().
578 */
579	.section ".rodata"
580	.globl _C_LABEL(onfault_table)
581
582_C_LABEL(onfault_table):
583	.quad .Lcopyin_start
584	.quad .Lcopyin_end
585	.quad _C_LABEL(copy_fault)
586
587	.quad .Lcopyout_start
588	.quad .Lcopyout_end
589	.quad _C_LABEL(copy_fault)
590
591	.quad .Lkcopy_start
592	.quad .Lkcopy_end
593	.quad _C_LABEL(kcopy_fault)
594
595	.quad .Lcopyoutstr_start
596	.quad .Lcopyoutstr_end
597	.quad _C_LABEL(copystr_fault)
598
599	.quad .Lcopyinstr_start
600	.quad .Lcopyinstr_end
601	.quad _C_LABEL(copystr_fault)
602
603	.quad .Lucas64_start
604	.quad .Lucas64_end
605	.quad _C_LABEL(ucas_fault)
606
607	.quad .Lucas32_start
608	.quad .Lucas32_end
609	.quad _C_LABEL(ucas_fault)
610
611	.quad .L_ufetch_8_start
612	.quad .L_ufetch_8_end
613	.quad _C_LABEL(copy_fault)
614
615	.quad .L_ufetch_16_start
616	.quad .L_ufetch_16_end
617	.quad _C_LABEL(copy_fault)
618
619	.quad .L_ufetch_32_start
620	.quad .L_ufetch_32_end
621	.quad _C_LABEL(copy_fault)
622
623	.quad .L_ufetch_64_start
624	.quad .L_ufetch_64_end
625	.quad _C_LABEL(copy_fault)
626
627	.quad .L_ustore_8_start
628	.quad .L_ustore_8_end
629	.quad _C_LABEL(copy_fault)
630
631	.quad .L_ustore_16_start
632	.quad .L_ustore_16_end
633	.quad _C_LABEL(copy_fault)
634
635	.quad .L_ustore_32_start
636	.quad .L_ustore_32_end
637	.quad _C_LABEL(copy_fault)
638
639	.quad .L_ustore_64_start
640	.quad .L_ustore_64_end
641	.quad _C_LABEL(copy_fault)
642
643	.quad 0	/* terminate */
644
645	.text
646