1/*
2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3 * Public domain.
4 */
5
6#include <machine/asm.h>
7
8#if defined(LIBC_SCCS)
9	RCSID("$NetBSD: strcpy.S,v 1.1 2005/12/20 19:28:49 christos Exp $")
10#endif
11
12/*
13 * This strcpy implementation copies a byte at a time until the
14 * source pointer is aligned to a word boundary, it then copies by
15 * words until it finds a word containing a zero byte, and finally
16 * copies by bytes until the end of the string is reached.
17 *
18 * While this may result in unaligned stores if the source and
19 * destination pointers are unaligned with respect to each other,
20 * it is still faster than either byte copies or the overhead of
21 * an implementation suitable for machines with strict alignment
22 * requirements.
23 */
24
25ENTRY(strcpy)
26	pushl	%ebx
27	movl	8(%esp),%ecx
28	movl	12(%esp),%eax
29
30	/*
31	 * Align source to a word boundary.
32	 * Consider unrolling loop?
33	 */
34	_ALIGN_TEXT
35.Lalign:
36	testl	$3,%eax
37	je	.Lword_aligned
38	movb	(%eax),%bl
39	incl	%eax
40	movb	%bl,(%ecx)
41	incl	%ecx
42	testb	%bl,%bl
43	jne	.Lalign
44	jmp	.Ldone
45
46	_ALIGN_TEXT
47.Lloop:
48	movl	%ebx,(%ecx)
49	addl	$4,%ecx
50.Lword_aligned:
51	movl	(%eax),%ebx
52	addl	$4,%eax
53	leal	-0x01010101(%ebx),%edx
54	testl	$0x80808080,%edx
55	je	.Lloop
56
57	/*
58	 * In rare cases, the above loop may exit prematurely. We must
59	 * return to the loop if none of the bytes in the word equal 0.
60	 */
61
62	movb	%bl,(%ecx)
63	incl	%ecx
64	testb	%bl,%bl
65	je	.Ldone
66
67	movb	%bh,(%ecx)
68	incl	%ecx
69	testb	%bh,%bh
70	je	.Ldone
71
72	shrl	$16,%ebx
73	movb	%bl,(%ecx)
74	incl	%ecx
75	testb	%bl,%bl
76	je	.Ldone
77
78	movb	%bh,(%ecx)
79	incl	%ecx
80	testb	%bh,%bh
81	jne	.Lword_aligned
82
83.Ldone:
84	movl	8(%esp),%eax
85	popl	%ebx
86	ret
87