Deleted Added
full compact
stpcpy.S (145459) stpcpy.S (184547)
1/*
2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3 * Public domain.
4 */
5
6#include <machine/asm.h>
1/*
2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3 * Public domain.
4 */
5
6#include <machine/asm.h>
7__FBSDID("$FreeBSD: head/lib/libc/amd64/string/strcpy.S 145459 2005-04-23 18:45:36Z alc $");
7__FBSDID("$FreeBSD: head/lib/libc/amd64/string/strcpy.S 184547 2008-11-02 01:10:54Z peter $");
8
9#if 0
10 RCSID("$NetBSD: strcpy.S,v 1.3 2004/07/19 20:04:41 drochner Exp $")
11#endif
12
13/*
14 * This strcpy implementation copies a byte at a time until the
15 * source pointer is aligned to a word boundary, it then copies by
16 * words until it finds a word containing a zero byte, and finally
17 * copies by bytes until the end of the string is reached.
18 *
19 * While this may result in unaligned stores if the source and
20 * destination pointers are unaligned with respect to each other,
21 * it is still faster than either byte copies or the overhead of
22 * an implementation suitable for machines with strict alignment
23 * requirements.
24 */
25
26ENTRY(strcpy)
27 movq %rdi,%rax
28 movabsq $0x0101010101010101,%r8
29 movabsq $0x8080808080808080,%r9
30
31 /*
32 * Align source to a word boundary.
33 * Consider unrolling loop?
34 */
35.Lalign:
36 testb $7,%sil
37 je .Lword_aligned
38 movb (%rsi),%dl
39 incq %rsi
40 movb %dl,(%rdi)
41 incq %rdi
42 testb %dl,%dl
43 jne .Lalign
44 ret
45
46 .p2align 4
47.Lloop:
48 movq %rdx,(%rdi)
49 addq $8,%rdi
50.Lword_aligned:
51 movq (%rsi),%rdx
52 movq %rdx,%rcx
53 addq $8,%rsi
54 subq %r8,%rcx
55 testq %r9,%rcx
56 je .Lloop
57
58 /*
59 * In rare cases, the above loop may exit prematurely. We must
60 * return to the loop if none of the bytes in the word equal 0.
61 */
62
63 movb %dl,(%rdi)
64 incq %rdi
65 testb %dl,%dl /* 1st byte == 0? */
66 je .Ldone
67
68 shrq $8,%rdx
69 movb %dl,(%rdi)
70 incq %rdi
71 testb %dl,%dl /* 2nd byte == 0? */
72 je .Ldone
73
74 shrq $8,%rdx
75 movb %dl,(%rdi)
76 incq %rdi
77 testb %dl,%dl /* 3rd byte == 0? */
78 je .Ldone
79
80 shrq $8,%rdx
81 movb %dl,(%rdi)
82 incq %rdi
83 testb %dl,%dl /* 4th byte == 0? */
84 je .Ldone
85
86 shrq $8,%rdx
87 movb %dl,(%rdi)
88 incq %rdi
89 testb %dl,%dl /* 5th byte == 0? */
90 je .Ldone
91
92 shrq $8,%rdx
93 movb %dl,(%rdi)
94 incq %rdi
95 testb %dl,%dl /* 6th byte == 0? */
96 je .Ldone
97
98 shrq $8,%rdx
99 movb %dl,(%rdi)
100 incq %rdi
101 testb %dl,%dl /* 7th byte == 0? */
102 je .Ldone
103
104 shrq $8,%rdx
105 movb %dl,(%rdi)
106 incq %rdi
107 testb %dl,%dl /* 8th byte == 0? */
108 jne .Lword_aligned
109
110.Ldone:
111 ret
8
9#if 0
10 RCSID("$NetBSD: strcpy.S,v 1.3 2004/07/19 20:04:41 drochner Exp $")
11#endif
12
13/*
14 * This strcpy implementation copies a byte at a time until the
15 * source pointer is aligned to a word boundary, it then copies by
16 * words until it finds a word containing a zero byte, and finally
17 * copies by bytes until the end of the string is reached.
18 *
19 * While this may result in unaligned stores if the source and
20 * destination pointers are unaligned with respect to each other,
21 * it is still faster than either byte copies or the overhead of
22 * an implementation suitable for machines with strict alignment
23 * requirements.
24 */
25
26ENTRY(strcpy)
27 movq %rdi,%rax
28 movabsq $0x0101010101010101,%r8
29 movabsq $0x8080808080808080,%r9
30
31 /*
32 * Align source to a word boundary.
33 * Consider unrolling loop?
34 */
35.Lalign:
36 testb $7,%sil
37 je .Lword_aligned
38 movb (%rsi),%dl
39 incq %rsi
40 movb %dl,(%rdi)
41 incq %rdi
42 testb %dl,%dl
43 jne .Lalign
44 ret
45
46 .p2align 4
47.Lloop:
48 movq %rdx,(%rdi)
49 addq $8,%rdi
50.Lword_aligned:
51 movq (%rsi),%rdx
52 movq %rdx,%rcx
53 addq $8,%rsi
54 subq %r8,%rcx
55 testq %r9,%rcx
56 je .Lloop
57
58 /*
59 * In rare cases, the above loop may exit prematurely. We must
60 * return to the loop if none of the bytes in the word equal 0.
61 */
62
63 movb %dl,(%rdi)
64 incq %rdi
65 testb %dl,%dl /* 1st byte == 0? */
66 je .Ldone
67
68 shrq $8,%rdx
69 movb %dl,(%rdi)
70 incq %rdi
71 testb %dl,%dl /* 2nd byte == 0? */
72 je .Ldone
73
74 shrq $8,%rdx
75 movb %dl,(%rdi)
76 incq %rdi
77 testb %dl,%dl /* 3rd byte == 0? */
78 je .Ldone
79
80 shrq $8,%rdx
81 movb %dl,(%rdi)
82 incq %rdi
83 testb %dl,%dl /* 4th byte == 0? */
84 je .Ldone
85
86 shrq $8,%rdx
87 movb %dl,(%rdi)
88 incq %rdi
89 testb %dl,%dl /* 5th byte == 0? */
90 je .Ldone
91
92 shrq $8,%rdx
93 movb %dl,(%rdi)
94 incq %rdi
95 testb %dl,%dl /* 6th byte == 0? */
96 je .Ldone
97
98 shrq $8,%rdx
99 movb %dl,(%rdi)
100 incq %rdi
101 testb %dl,%dl /* 7th byte == 0? */
102 je .Ldone
103
104 shrq $8,%rdx
105 movb %dl,(%rdi)
106 incq %rdi
107 testb %dl,%dl /* 8th byte == 0? */
108 jne .Lword_aligned
109
110.Ldone:
111 ret
112END(strcpy)