Deleted Added
sdiff udiff text old ( 144872 ) new ( 184547 )
full compact
1/*
2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3 * Public domain.
4 */
5
6#include <machine/asm.h>
7__FBSDID("$FreeBSD: head/lib/libc/amd64/string/strcat.S 144872 2005-04-10 18:58:49Z alc $");
8
9#if 0
10 RCSID("$NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $")
11#endif
12
13ENTRY(strcat)
14 movq %rdi,%rax
15 movabsq $0x0101010101010101,%r8
16 movabsq $0x8080808080808080,%r9
17
18 /*
19 * Align destination to word boundary.
20 * Consider unrolling loop?
21 */
22.Lscan:
23.Lscan_align:
24 testb $7,%dil
25 je .Lscan_aligned
26 cmpb $0,(%rdi)
27 je .Lcopy
28 incq %rdi
29 jmp .Lscan_align
30
31 .align 4
32.Lscan_aligned:
33.Lscan_loop:
34 movq (%rdi),%rdx
35 addq $8,%rdi
36 subq %r8,%rdx
37 testq %r9,%rdx
38 je .Lscan_loop
39
40 /*
41 * In rare cases, the above loop may exit prematurely. We must
42 * return to the loop if none of the bytes in the word equal 0.
43 */
44
45 cmpb $0,-8(%rdi) /* 1st byte == 0? */
46 jne 1f
47 subq $8,%rdi
48 jmp .Lcopy
49
501: cmpb $0,-7(%rdi) /* 2nd byte == 0? */
51 jne 1f
52 subq $7,%rdi
53 jmp .Lcopy
54
551: cmpb $0,-6(%rdi) /* 3rd byte == 0? */
56 jne 1f
57 subq $6,%rdi
58 jmp .Lcopy
59
601: cmpb $0,-5(%rdi) /* 4th byte == 0? */
61 jne 1f
62 subq $5,%rdi
63 jmp .Lcopy
64
651: cmpb $0,-4(%rdi) /* 5th byte == 0? */
66 jne 1f
67 subq $4,%rdi
68 jmp .Lcopy
69
701: cmpb $0,-3(%rdi) /* 6th byte == 0? */
71 jne 1f
72 subq $3,%rdi
73 jmp .Lcopy
74
751: cmpb $0,-2(%rdi) /* 7th byte == 0? */
76 jne 1f
77 subq $2,%rdi
78 jmp .Lcopy
79
801: cmpb $0,-1(%rdi) /* 8th byte == 0? */
81 jne .Lscan_loop
82 subq $1,%rdi
83
84 /*
85 * Align source to a word boundary.
86 * Consider unrolling loop?
87 */
88.Lcopy:
89.Lcopy_align:
90 testb $7,%sil
91 je .Lcopy_aligned
92 movb (%rsi),%dl
93 incq %rsi
94 movb %dl,(%rdi)
95 incq %rdi
96 testb %dl,%dl
97 jne .Lcopy_align
98 ret
99
100 .align 4
101.Lcopy_loop:
102 movq %rdx,(%rdi)
103 addq $8,%rdi
104.Lcopy_aligned:
105 movq (%rsi),%rdx
106 movq %rdx,%rcx
107 addq $8,%rsi
108 subq %r8,%rcx
109 testq %r9,%rcx
110 je .Lcopy_loop
111
112 /*
113 * In rare cases, the above loop may exit prematurely. We must
114 * return to the loop if none of the bytes in the word equal 0.
115 */
116
117 movb %dl,(%rdi)
118 incq %rdi
119 testb %dl,%dl /* 1st byte == 0? */
120 je .Ldone
121
122 shrq $8,%rdx
123 movb %dl,(%rdi)
124 incq %rdi
125 testb %dl,%dl /* 2nd byte == 0? */
126 je .Ldone
127
128 shrq $8,%rdx
129 movb %dl,(%rdi)
130 incq %rdi
131 testb %dl,%dl /* 3rd byte == 0? */
132 je .Ldone
133
134 shrq $8,%rdx
135 movb %dl,(%rdi)
136 incq %rdi
137 testb %dl,%dl /* 4th byte == 0? */
138 je .Ldone
139
140 shrq $8,%rdx
141 movb %dl,(%rdi)
142 incq %rdi
143 testb %dl,%dl /* 5th byte == 0? */
144 je .Ldone
145
146 shrq $8,%rdx
147 movb %dl,(%rdi)
148 incq %rdi
149 testb %dl,%dl /* 6th byte == 0? */
150 je .Ldone
151
152 shrq $8,%rdx
153 movb %dl,(%rdi)
154 incq %rdi
155 testb %dl,%dl /* 7th byte == 0? */
156 je .Ldone
157
158 shrq $8,%rdx
159 movb %dl,(%rdi)
160 incq %rdi
161 testb %dl,%dl /* 8th byte == 0? */
162 jne .Lcopy_aligned
163
164.Ldone:
165 ret