1/*
2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3 * Public domain.
4 */
5
6#include <machine/asm.h>
7
8#if defined(LIBC_SCCS)
9	RCSID("$NetBSD: strcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $")
10#endif
11
12/*
13 * This strcpy implementation copies a byte at a time until the
14 * source pointer is aligned to a word boundary, it then copies by
15 * words until it finds a word containing a zero byte, and finally
16 * copies by bytes until the end of the string is reached.
17 *
18 * While this may result in unaligned stores if the source and
19 * destination pointers are unaligned with respect to each other,
20 * it is still faster than either byte copies or the overhead of
21 * an implementation suitable for machines with strict alignment
22 * requirements.
23 */
24
25ENTRY(strcpy)
26	movq	%rdi,%rax
27	movabsq	$0x0101010101010101,%r8
28	movabsq	$0x8080808080808080,%r9
29
30	/*
31	 * Align source to a word boundary.
32	 * Consider unrolling loop?
33	 */
34	_ALIGN_TEXT
35.Lalign:
36	testb	$7,%sil
37	je	.Lword_aligned
38	movb	(%rsi),%dl
39	incq	%rsi
40	movb	%dl,(%rdi)
41	incq	%rdi
42	testb	%dl,%dl
43	jne	.Lalign
44	ret
45
46	_ALIGN_TEXT
47.Lloop:
48	movq	%rdx,(%rdi)
49	addq	$8,%rdi
50.Lword_aligned:
51	movq	(%rsi),%rdx
52	movq	%rdx,%rcx
53	addq	$8,%rsi
54	subq	%r8,%rcx
55	testq	%r9,%rcx
56	je	.Lloop
57
58	/*
59	 * In rare cases, the above loop may exit prematurely. We must
60	 * return to the loop if none of the bytes in the word equal 0.
61	 */
62
63	movb	%dl,(%rdi)
64	incq	%rdi
65	testb	%dl,%dl		/* 1st byte == 0? */
66	je	.Ldone
67
68	shrq	$8,%rdx
69	movb	%dl,(%rdi)
70	incq	%rdi
71	testb	%dl,%dl		/* 2nd byte == 0? */
72	je	.Ldone
73
74	shrq	$8,%rdx
75	movb	%dl,(%rdi)
76	incq	%rdi
77	testb	%dl,%dl		/* 3rd byte == 0? */
78	je	.Ldone
79
80	shrq	$8,%rdx
81	movb	%dl,(%rdi)
82	incq	%rdi
83	testb	%dl,%dl		/* 4th byte == 0? */
84	je	.Ldone
85
86	shrq	$8,%rdx
87	movb	%dl,(%rdi)
88	incq	%rdi
89	testb	%dl,%dl		/* 5th byte == 0? */
90	je	.Ldone
91
92	shrq	$8,%rdx
93	movb	%dl,(%rdi)
94	incq	%rdi
95	testb	%dl,%dl		/* 6th byte == 0? */
96	je	.Ldone
97
98	shrq	$8,%rdx
99	movb	%dl,(%rdi)
100	incq	%rdi
101	testb	%dl,%dl		/* 7th byte == 0? */
102	je	.Ldone
103
104	shrq	$8,%rdx
105	movb	%dl,(%rdi)
106	incq	%rdi
107	testb	%dl,%dl		/* 8th byte == 0? */
108	jne	.Lword_aligned
109
110.Ldone:
111	ret
112