1144872Salc/*
2144872Salc * Written by J.T. Conklin <jtc@acorntoolworks.com>
3144872Salc * Public domain.
4144872Salc */
5144872Salc
6144872Salc#include <machine/asm.h>
7144872Salc__FBSDID("$FreeBSD$");
8144872Salc
9144872Salc#if 0
10144872Salc	RCSID("$NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $")
11144872Salc#endif
12144872Salc
13144872SalcENTRY(strcat)
14144872Salc	movq	%rdi,%rax
15144872Salc	movabsq	$0x0101010101010101,%r8
16144872Salc	movabsq	$0x8080808080808080,%r9
17144872Salc
18144872Salc	/*
19144872Salc	 * Align destination to word boundary.
20144872Salc	 * Consider unrolling loop?
21144872Salc	 */
22144872Salc.Lscan:
23144872Salc.Lscan_align:
24144872Salc	testb	$7,%dil
25144872Salc	je	.Lscan_aligned
26144872Salc	cmpb	$0,(%rdi)
27144872Salc	je	.Lcopy
28144872Salc	incq	%rdi
29144872Salc	jmp	.Lscan_align
30144872Salc
31144872Salc	.align	4
32144872Salc.Lscan_aligned:
33144872Salc.Lscan_loop:
34144872Salc	movq	(%rdi),%rdx
35144872Salc	addq	$8,%rdi
36144872Salc	subq	%r8,%rdx
37144872Salc	testq	%r9,%rdx
38144872Salc	je	.Lscan_loop
39144872Salc
40144872Salc	/*
41144872Salc	 * In rare cases, the above loop may exit prematurely. We must
42144872Salc	 * return to the loop if none of the bytes in the word equal 0.
43144872Salc	 */
44144872Salc
45144872Salc	cmpb	$0,-8(%rdi)	/* 1st byte == 0? */
46144872Salc	jne	1f
47144872Salc	subq	$8,%rdi
48144872Salc	jmp	.Lcopy
49144872Salc
50144872Salc1:	cmpb	$0,-7(%rdi)	/* 2nd byte == 0? */
51144872Salc	jne	1f
52144872Salc	subq	$7,%rdi
53144872Salc	jmp	.Lcopy
54144872Salc
55144872Salc1:	cmpb	$0,-6(%rdi)	/* 3rd byte == 0? */
56144872Salc	jne	1f
57144872Salc	subq	$6,%rdi
58144872Salc	jmp	.Lcopy
59144872Salc
60144872Salc1:	cmpb	$0,-5(%rdi)	/* 4th byte == 0? */
61144872Salc	jne	1f
62144872Salc	subq	$5,%rdi
63144872Salc	jmp	.Lcopy
64144872Salc
65144872Salc1:	cmpb	$0,-4(%rdi)	/* 5th byte == 0? */
66144872Salc	jne	1f
67144872Salc	subq	$4,%rdi
68144872Salc	jmp	.Lcopy
69144872Salc
70144872Salc1:	cmpb	$0,-3(%rdi)	/* 6th byte == 0? */
71144872Salc	jne	1f
72144872Salc	subq	$3,%rdi
73144872Salc	jmp	.Lcopy
74144872Salc
75144872Salc1:	cmpb	$0,-2(%rdi)	/* 7th byte == 0? */
76144872Salc	jne	1f
77144872Salc	subq	$2,%rdi
78144872Salc	jmp	.Lcopy
79144872Salc
80144872Salc1:	cmpb	$0,-1(%rdi)	/* 8th byte == 0? */
81144872Salc	jne	.Lscan_loop
82144872Salc	subq	$1,%rdi
83144872Salc
84144872Salc	/*
85144872Salc	 * Align source to a word boundary.
86144872Salc	 * Consider unrolling loop?
87144872Salc	 */
88144872Salc.Lcopy:
89144872Salc.Lcopy_align:
90144872Salc	testb	$7,%sil
91144872Salc	je	.Lcopy_aligned
92144872Salc	movb	(%rsi),%dl
93144872Salc	incq	%rsi
94144872Salc	movb	%dl,(%rdi)
95144872Salc	incq	%rdi
96144872Salc	testb	%dl,%dl
97144872Salc	jne	.Lcopy_align
98144872Salc	ret
99144872Salc
100144872Salc	.align	4
101144872Salc.Lcopy_loop:
102144872Salc	movq	%rdx,(%rdi)
103144872Salc	addq	$8,%rdi
104144872Salc.Lcopy_aligned:
105144872Salc	movq	(%rsi),%rdx
106144872Salc	movq	%rdx,%rcx
107144872Salc	addq	$8,%rsi
108144872Salc	subq	%r8,%rcx
109144872Salc	testq	%r9,%rcx
110144872Salc	je	.Lcopy_loop
111144872Salc
112144872Salc	/*
113144872Salc	 * In rare cases, the above loop may exit prematurely. We must
114144872Salc	 * return to the loop if none of the bytes in the word equal 0.
115144872Salc	 */
116144872Salc
117144872Salc	movb	%dl,(%rdi)
118144872Salc	incq	%rdi
119144872Salc	testb	%dl,%dl		/* 1st byte == 0? */
120144872Salc	je	.Ldone
121144872Salc
122144872Salc	shrq	$8,%rdx
123144872Salc	movb	%dl,(%rdi)
124144872Salc	incq	%rdi
125144872Salc	testb	%dl,%dl		/* 2nd byte == 0? */
126144872Salc	je	.Ldone
127144872Salc
128144872Salc	shrq	$8,%rdx
129144872Salc	movb	%dl,(%rdi)
130144872Salc	incq	%rdi
131144872Salc	testb	%dl,%dl		/* 3rd byte == 0? */
132144872Salc	je	.Ldone
133144872Salc
134144872Salc	shrq	$8,%rdx
135144872Salc	movb	%dl,(%rdi)
136144872Salc	incq	%rdi
137144872Salc	testb	%dl,%dl		/* 4th byte == 0? */
138144872Salc	je	.Ldone
139144872Salc
140144872Salc	shrq	$8,%rdx
141144872Salc	movb	%dl,(%rdi)
142144872Salc	incq	%rdi
143144872Salc	testb	%dl,%dl		/* 5th byte == 0? */
144144872Salc	je	.Ldone
145144872Salc
146144872Salc	shrq	$8,%rdx
147144872Salc	movb	%dl,(%rdi)
148144872Salc	incq	%rdi
149144872Salc	testb	%dl,%dl		/* 6th byte == 0? */
150144872Salc	je	.Ldone
151144872Salc
152144872Salc	shrq	$8,%rdx
153144872Salc	movb	%dl,(%rdi)
154144872Salc	incq	%rdi
155144872Salc	testb	%dl,%dl		/* 7th byte == 0? */
156144872Salc	je	.Ldone
157144872Salc
158144872Salc	shrq	$8,%rdx
159144872Salc	movb	%dl,(%rdi)
160144872Salc	incq	%rdi
161144872Salc	testb	%dl,%dl		/* 8th byte == 0? */
162144872Salc	jne	.Lcopy_aligned
163144872Salc
164144872Salc.Ldone:
165144872Salc	ret
166184547SpeterEND(strcat)
167217106Skib
168217106Skib	.section .note.GNU-stack,"",%progbits
169