1/* Copyright 2002 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License v2.
3 *
4 * Functions to copy from and to user space.
5 */
6
7#include <linux/linkage.h>
8#include <asm/dwarf2.h>
9
10#define FIX_ALIGNMENT 1
11
12#include <asm/current.h>
13#include <asm/asm-offsets.h>
14#include <asm/thread_info.h>
15#include <asm/cpufeature.h>
16
17	.macro ALTERNATIVE_JUMP feature,orig,alt
180:
19	.byte 0xe9	/* 32bit jump */
20	.long \orig-1f	/* by default jump to orig */
211:
22	.section .altinstr_replacement,"ax"
232:	.byte 0xe9	             /* near jump with 32bit immediate */
24	.long \alt-1b /* offset */   /* or alternatively to alt */
25	.previous
26	.section .altinstructions,"a"
27	.align 8
28	.quad  0b
29	.quad  2b
30	.byte  \feature		     /* when feature is set */
31	.byte  5
32	.byte  5
33	.previous
34	.endm
35
36/* Standard copy_to_user with segment limit checking */
37ENTRY(copy_to_user)
38	CFI_STARTPROC
39	GET_THREAD_INFO(%rax)
40	movq %rdi,%rcx
41	addq %rdx,%rcx
42	jc  bad_to_user
43	cmpq threadinfo_addr_limit(%rax),%rcx
44	jae bad_to_user
45	xorl %eax,%eax	/* clear zero flag */
46	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
47	CFI_ENDPROC
48
49ENTRY(copy_user_generic)
50	CFI_STARTPROC
51	movl $1,%ecx	/* set zero flag */
52	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
53	CFI_ENDPROC
54
55ENTRY(__copy_from_user_inatomic)
56	CFI_STARTPROC
57	xorl %ecx,%ecx	/* clear zero flag */
58	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
59	CFI_ENDPROC
60
61/* Standard copy_from_user with segment limit checking */
62ENTRY(copy_from_user)
63	CFI_STARTPROC
64	GET_THREAD_INFO(%rax)
65	movq %rsi,%rcx
66	addq %rdx,%rcx
67	jc  bad_from_user
68	cmpq threadinfo_addr_limit(%rax),%rcx
69	jae  bad_from_user
70	movl $1,%ecx	/* set zero flag */
71	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
72	CFI_ENDPROC
73ENDPROC(copy_from_user)
74
75	.section .fixup,"ax"
76	/* must zero dest */
77bad_from_user:
78	CFI_STARTPROC
79	movl %edx,%ecx
80	xorl %eax,%eax
81	rep
82	stosb
83bad_to_user:
84	movl	%edx,%eax
85	ret
86	CFI_ENDPROC
87END(bad_from_user)
88	.previous
89
90
91/*
92 * copy_user_generic_unrolled - memory copy with exception handling.
93 * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
94 *
95 * Input:
96 * rdi destination
97 * rsi source
98 * rdx count
99 * ecx zero flag -- if true zero destination on error
100 *
101 * Output:
102 * eax uncopied bytes or 0 if successful.
103 */
104ENTRY(copy_user_generic_unrolled)
105	CFI_STARTPROC
106	pushq %rbx
107	CFI_ADJUST_CFA_OFFSET 8
108	CFI_REL_OFFSET rbx, 0
109	pushq %rcx
110	CFI_ADJUST_CFA_OFFSET 8
111	CFI_REL_OFFSET rcx, 0
112	xorl %eax,%eax		/*zero for the exception handler */
113
114#ifdef FIX_ALIGNMENT
115	/* check for bad alignment of destination */
116	movl %edi,%ecx
117	andl $7,%ecx
118	jnz  .Lbad_alignment
119.Lafter_bad_alignment:
120#endif
121
122	movq %rdx,%rcx
123
124	movl $64,%ebx
125	shrq $6,%rdx
126	decq %rdx
127	js   .Lhandle_tail
128
129	.p2align 4
130.Lloop:
131.Ls1:	movq (%rsi),%r11
132.Ls2:	movq 1*8(%rsi),%r8
133.Ls3:	movq 2*8(%rsi),%r9
134.Ls4:	movq 3*8(%rsi),%r10
135.Ld1:	movq %r11,(%rdi)
136.Ld2:	movq %r8,1*8(%rdi)
137.Ld3:	movq %r9,2*8(%rdi)
138.Ld4:	movq %r10,3*8(%rdi)
139
140.Ls5:	movq 4*8(%rsi),%r11
141.Ls6:	movq 5*8(%rsi),%r8
142.Ls7:	movq 6*8(%rsi),%r9
143.Ls8:	movq 7*8(%rsi),%r10
144.Ld5:	movq %r11,4*8(%rdi)
145.Ld6:	movq %r8,5*8(%rdi)
146.Ld7:	movq %r9,6*8(%rdi)
147.Ld8:	movq %r10,7*8(%rdi)
148
149	decq %rdx
150
151	leaq 64(%rsi),%rsi
152	leaq 64(%rdi),%rdi
153
154	jns  .Lloop
155
156	.p2align 4
157.Lhandle_tail:
158	movl %ecx,%edx
159	andl $63,%ecx
160	shrl $3,%ecx
161	jz   .Lhandle_7
162	movl $8,%ebx
163	.p2align 4
164.Lloop_8:
165.Ls9:	movq (%rsi),%r8
166.Ld9:	movq %r8,(%rdi)
167	decl %ecx
168	leaq 8(%rdi),%rdi
169	leaq 8(%rsi),%rsi
170	jnz .Lloop_8
171
172.Lhandle_7:
173	movl %edx,%ecx
174	andl $7,%ecx
175	jz   .Lende
176	.p2align 4
177.Lloop_1:
178.Ls10:	movb (%rsi),%bl
179.Ld10:	movb %bl,(%rdi)
180	incq %rdi
181	incq %rsi
182	decl %ecx
183	jnz .Lloop_1
184
185	CFI_REMEMBER_STATE
186.Lende:
187	popq %rcx
188	CFI_ADJUST_CFA_OFFSET -8
189	CFI_RESTORE rcx
190	popq %rbx
191	CFI_ADJUST_CFA_OFFSET -8
192	CFI_RESTORE rbx
193	ret
194	CFI_RESTORE_STATE
195
196#ifdef FIX_ALIGNMENT
197	/* align destination */
198	.p2align 4
199.Lbad_alignment:
200	movl $8,%r9d
201	subl %ecx,%r9d
202	movl %r9d,%ecx
203	cmpq %r9,%rdx
204	jz   .Lhandle_7
205	js   .Lhandle_7
206.Lalign_1:
207.Ls11:	movb (%rsi),%bl
208.Ld11:	movb %bl,(%rdi)
209	incq %rsi
210	incq %rdi
211	decl %ecx
212	jnz .Lalign_1
213	subq %r9,%rdx
214	jmp .Lafter_bad_alignment
215#endif
216
217	/* table sorted by exception address */
218	.section __ex_table,"a"
219	.align 8
220	.quad .Ls1,.Ls1e
221	.quad .Ls2,.Ls2e
222	.quad .Ls3,.Ls3e
223	.quad .Ls4,.Ls4e
224	.quad .Ld1,.Ls1e
225	.quad .Ld2,.Ls2e
226	.quad .Ld3,.Ls3e
227	.quad .Ld4,.Ls4e
228	.quad .Ls5,.Ls5e
229	.quad .Ls6,.Ls6e
230	.quad .Ls7,.Ls7e
231	.quad .Ls8,.Ls8e
232	.quad .Ld5,.Ls5e
233	.quad .Ld6,.Ls6e
234	.quad .Ld7,.Ls7e
235	.quad .Ld8,.Ls8e
236	.quad .Ls9,.Le_quad
237	.quad .Ld9,.Le_quad
238	.quad .Ls10,.Le_byte
239	.quad .Ld10,.Le_byte
240#ifdef FIX_ALIGNMENT
241	.quad .Ls11,.Lzero_rest
242	.quad .Ld11,.Lzero_rest
243#endif
244	.quad .Le5,.Le_zero
245	.previous
246
247	/* compute 64-offset for main loop. 8 bytes accuracy with error on the
248	   pessimistic side. this is gross. it would be better to fix the
249	   interface. */
250	/* eax: zero, ebx: 64 */
251.Ls1e: 	addl $8,%eax
252.Ls2e: 	addl $8,%eax
253.Ls3e: 	addl $8,%eax
254.Ls4e: 	addl $8,%eax
255.Ls5e: 	addl $8,%eax
256.Ls6e: 	addl $8,%eax
257.Ls7e: 	addl $8,%eax
258.Ls8e: 	addl $8,%eax
259	addq %rbx,%rdi	/* +64 */
260	subq %rax,%rdi  /* correct destination with computed offset */
261
262	shlq $6,%rdx	/* loop counter * 64 (stride length) */
263	addq %rax,%rdx	/* add offset to loopcnt */
264	andl $63,%ecx	/* remaining bytes */
265	addq %rcx,%rdx	/* add them */
266	jmp .Lzero_rest
267
268	/* exception on quad word loop in tail handling */
269	/* ecx:	loopcnt/8, %edx: length, rdi: correct */
270.Le_quad:
271	shll $3,%ecx
272	andl $7,%edx
273	addl %ecx,%edx
274	/* edx: bytes to zero, rdi: dest, eax:zero */
275.Lzero_rest:
276	cmpl $0,(%rsp)
277	jz   .Le_zero
278	movq %rdx,%rcx
279.Le_byte:
280	xorl %eax,%eax
281.Le5:	rep
282	stosb
283	/* when there is another exception while zeroing the rest just return */
284.Le_zero:
285	movq %rdx,%rax
286	jmp .Lende
287	CFI_ENDPROC
288ENDPROC(copy_user_generic)
289
290
291	/* Some CPUs run faster using the string copy instructions.
292	   This is also a lot simpler. Use them when possible.
293	   Patch in jmps to this code instead of copying it fully
294	   to avoid unwanted aliasing in the exception tables. */
295
296 /* rdi	destination
297  * rsi source
298  * rdx count
299  * ecx zero flag
300  *
301  * Output:
302  * eax uncopied bytes or 0 if successfull.
303  *
304  * Only 4GB of copy is supported. This shouldn't be a problem
305  * because the kernel normally only writes from/to page sized chunks
306  * even if user space passed a longer buffer.
307  * And more would be dangerous because both Intel and AMD have
308  * errata with rep movsq > 4GB. If someone feels the need to fix
309  * this please consider this.
310  */
311ENTRY(copy_user_generic_string)
312	CFI_STARTPROC
313	movl %ecx,%r8d		/* save zero flag */
314	movl %edx,%ecx
315	shrl $3,%ecx
316	andl $7,%edx
317	jz   10f
3181:	rep
319	movsq
320	movl %edx,%ecx
3212:	rep
322	movsb
3239:	movl %ecx,%eax
324	ret
325
326	/* multiple of 8 byte */
32710:	rep
328	movsq
329	xor %eax,%eax
330	ret
331
332	/* exception handling */
3333:      lea (%rdx,%rcx,8),%rax	/* exception on quad loop */
334	jmp 6f
3355:	movl %ecx,%eax		/* exception on byte loop */
336	/* eax: left over bytes */
3376:	testl %r8d,%r8d		/* zero flag set? */
338	jz 7f
339	movl %eax,%ecx		/* initialize x86 loop counter */
340	push %rax
341	xorl %eax,%eax
3428:	rep
343	stosb 			/* zero the rest */
34411:	pop %rax
3457:	ret
346	CFI_ENDPROC
347END(copy_user_generic_c)
348
349	.section __ex_table,"a"
350	.quad 1b,3b
351	.quad 2b,5b
352	.quad 8b,11b
353	.quad 10b,3b
354	.previous
355