1/* strlen -- Compute length of NUL terminated string.
2   Highly optimized version for ix86, x>=5.
3   Copyright (C) 1995, 1996, 1997, 2000, 2002 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5   Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 2.1 of the License, or (at your option) any later version.
11
12   The GNU C Library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   Lesser General Public License for more details.
16
17   You should have received a copy of the GNU Lesser General Public
18   License along with the GNU C Library; if not, write to the Free
19   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20   02111-1307 USA.  */
21
22#include <sysdep.h>
23#include "asm-syntax.h"
24#include "bp-sym.h"
25#include "bp-asm.h"
26
27/* This version is especially optimized for the i586 (and following?)
28   processors.  This is mainly done by using the two pipelines.  The
29   version optimized for i486 is weak in this aspect because to get
30   as much parallelism we have to execute some *more* instructions.
31
32   The code below is structured to reflect the pairing of the instructions
33   as *I think* it is.  I have no processor data book to verify this.
34   If you find something you think is incorrect let me know.  */
35
36
37/* The magic value which is used throughout in the whole code.  */
38#define magic 0xfefefeff
39
40#define PARMS	LINKAGE		/* no space for saved regs */
41#define STR	PARMS
42
43	.text
44ENTRY (BP_SYM (strlen))
45	ENTER
46
47	movl STR(%esp), %eax
48	CHECK_BOUNDS_LOW (%eax, STR(%esp))
49	movl $3, %edx		/* load mask (= 3) */
50
51	andl %eax, %edx		/* separate last two bits of address */
52
53	jz L(1)			/* aligned => start loop */
54	jp L(0)			/* exactly two bits set */
55
56	cmpb %dh, (%eax)	/* is byte NUL? */
57	je L(2)			/* yes => return */
58
59	incl %eax		/* increment pointer */
60	cmpb %dh, (%eax)	/* is byte NUL? */
61
62	je L(2)			/* yes => return */
63
64	incl %eax		/* increment pointer */
65	xorl $2, %edx
66
67	jz L(1)
68
69L(0):	cmpb %dh, (%eax)	/* is byte NUL? */
70	je L(2)			/* yes => return */
71
72	incl %eax		/* increment pointer */
73	xorl %edx, %edx		/* We need %edx == 0 for later */
74
75      /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
76	 change any of the hole bits of LONGWORD.
77
78	 1) Is this safe?  Will it catch all the zero bytes?
79	 Suppose there is a byte with all zeros.  Any carry bits
80	 propagating from its left will fall into the hole at its
81	 least significant bit and stop.  Since there will be no
82	 carry from its most significant bit, the LSB of the
83	 byte to the left will be unchanged, and the zero will be
84	 detected.
85
86	 2) Is this worthwhile?  Will it ignore everything except
87	 zero bytes?  Suppose every byte of LONGWORD has a bit set
88	 somewhere.  There will be a carry into bit 8.	If bit 8
89	 is set, this will carry into bit 16.  If bit 8 is clear,
90	 one of bits 9-15 must be set, so there will be a carry
91	 into bit 16.  Similarly, there will be a carry into bit
92	 24.  If one of bits 24-31 is set, there will be a carry
93	 into bit 32 (=carry flag), so all of the hole bits will
94	 be changed.
95
96	 Note: %edx == 0 in any case here.  */
97
98L(1):
99	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
100	addl $4, %eax		/* adjust pointer for *next* word */
101
102	subl %ecx, %edx		/* first step to negate word */
103	addl $magic, %ecx	/* add magic word */
104
105	decl %edx		/* complete negation of word */
106	jnc L(3)		/* previous addl caused overflow? */
107
108	xorl %ecx, %edx		/* (word+magic)^word */
109
110	andl $~magic, %edx	/* any of the carry flags set? */
111
112	jne L(3)		/* yes => determine byte */
113
114
115	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
116	addl $4, %eax		/* adjust pointer for *next* word */
117
118	subl %ecx, %edx		/* first step to negate word */
119	addl $magic, %ecx	/* add magic word */
120
121	decl %edx		/* complete negation of word */
122	jnc L(3)		/* previous addl caused overflow? */
123
124	xorl %ecx, %edx		/* (word+magic)^word */
125
126	andl $~magic, %edx	/* any of the carry flags set? */
127
128	jne L(3)		/* yes => determine byte */
129
130
131	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
132	addl $4, %eax		/* adjust pointer for *next* word */
133
134	subl %ecx, %edx		/* first step to negate word */
135	addl $magic, %ecx	/* add magic word */
136
137	decl %edx		/* complete negation of word */
138	jnc L(3)		/* previous addl caused overflow? */
139
140	xorl %ecx, %edx		/* (word+magic)^word */
141
142	andl $~magic, %edx	/* any of the carry flags set? */
143
144	jne L(3)		/* yes => determine byte */
145
146
147	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
148	addl $4, %eax		/* adjust pointer for *next* word */
149
150	subl %ecx, %edx		/* first step to negate word */
151	addl $magic, %ecx	/* add magic word */
152
153	decl %edx		/* complete negation of word */
154	jnc L(3)		/* previous addl caused overflow? */
155
156	xorl %ecx, %edx		/* (word+magic)^word */
157
158	andl $~magic, %edx	/* any of the carry flags set? */
159
160	je L(1)			/* no => start loop again */
161
162
163L(3):	subl $4, %eax		/* correct too early pointer increment */
164	subl $magic, %ecx
165
166	cmpb $0, %cl		/* lowest byte NUL? */
167	jz L(2)			/* yes => return */
168
169	inc %eax		/* increment pointer */
170	testb %ch, %ch		/* second byte NUL? */
171
172	jz L(2)			/* yes => return */
173
174	shrl $16, %ecx		/* make upper bytes accessible */
175	incl %eax		/* increment pointer */
176
177	cmpb $0, %cl		/* is third byte NUL? */
178	jz L(2)			/* yes => return */
179
180	incl %eax		/* increment pointer */
181
182L(2):	CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
183	subl STR(%esp), %eax	/* now compute the length as difference
184				   between start and terminating NUL
185				   character */
186	LEAVE
187	ret
188END (BP_SYM (strlen))
189