1/* strlen -- Compute length of NUL terminated string. 2 Highly optimized version for ix86, x>=5. 3 Copyright (C) 1995, 1996, 1997, 2000, 2002 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 5 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. 6 7 The GNU C Library is free software; you can redistribute it and/or 8 modify it under the terms of the GNU Lesser General Public 9 License as published by the Free Software Foundation; either 10 version 2.1 of the License, or (at your option) any later version. 11 12 The GNU C Library is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 Lesser General Public License for more details. 16 17 You should have received a copy of the GNU Lesser General Public 18 License along with the GNU C Library; if not, write to the Free 19 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 20 02111-1307 USA. */ 21 22#include <sysdep.h> 23#include "asm-syntax.h" 24#include "bp-sym.h" 25#include "bp-asm.h" 26 27/* This version is especially optimized for the i586 (and following?) 28 processors. This is mainly done by using the two pipelines. The 29 version optimized for i486 is weak in this aspect because to get 30 as much parallelism we have to execute some *more* instructions. 31 32 The code below is structured to reflect the pairing of the instructions 33 as *I think* it is. I have no processor data book to verify this. 34 If you find something you think is incorrect let me know. */ 35 36 37/* The magic value which is used throughout in the whole code. */ 38#define magic 0xfefefeff 39 40#define PARMS LINKAGE /* no space for saved regs */ 41#define STR PARMS 42 43 .text 44ENTRY (BP_SYM (strlen)) 45 ENTER 46 47 movl STR(%esp), %eax 48 CHECK_BOUNDS_LOW (%eax, STR(%esp)) 49 movl $3, %edx /* load mask (= 3) */ 50 51 andl %eax, %edx /* separate last two bits of address */ 52 53 jz L(1) /* aligned => start loop */ 54 jp L(0) /* exactly two bits set */ 55 56 cmpb %dh, (%eax) /* is byte NUL? */ 57 je L(2) /* yes => return */ 58 59 incl %eax /* increment pointer */ 60 cmpb %dh, (%eax) /* is byte NUL? */ 61 62 je L(2) /* yes => return */ 63 64 incl %eax /* increment pointer */ 65 xorl $2, %edx 66 67 jz L(1) 68 69L(0): cmpb %dh, (%eax) /* is byte NUL? */ 70 je L(2) /* yes => return */ 71 72 incl %eax /* increment pointer */ 73 xorl %edx, %edx /* We need %edx == 0 for later */ 74 75 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to 76 change any of the hole bits of LONGWORD. 77 78 1) Is this safe? Will it catch all the zero bytes? 79 Suppose there is a byte with all zeros. Any carry bits 80 propagating from its left will fall into the hole at its 81 least significant bit and stop. Since there will be no 82 carry from its most significant bit, the LSB of the 83 byte to the left will be unchanged, and the zero will be 84 detected. 85 86 2) Is this worthwhile? Will it ignore everything except 87 zero bytes? Suppose every byte of LONGWORD has a bit set 88 somewhere. There will be a carry into bit 8. If bit 8 89 is set, this will carry into bit 16. If bit 8 is clear, 90 one of bits 9-15 must be set, so there will be a carry 91 into bit 16. Similarly, there will be a carry into bit 92 24. If one of bits 24-31 is set, there will be a carry 93 into bit 32 (=carry flag), so all of the hole bits will 94 be changed. 95 96 Note: %edx == 0 in any case here. */ 97 98L(1): 99 movl (%eax), %ecx /* get word (= 4 bytes) in question */ 100 addl $4, %eax /* adjust pointer for *next* word */ 101 102 subl %ecx, %edx /* first step to negate word */ 103 addl $magic, %ecx /* add magic word */ 104 105 decl %edx /* complete negation of word */ 106 jnc L(3) /* previous addl caused overflow? */ 107 108 xorl %ecx, %edx /* (word+magic)^word */ 109 110 andl $~magic, %edx /* any of the carry flags set? */ 111 112 jne L(3) /* yes => determine byte */ 113 114 115 movl (%eax), %ecx /* get word (= 4 bytes) in question */ 116 addl $4, %eax /* adjust pointer for *next* word */ 117 118 subl %ecx, %edx /* first step to negate word */ 119 addl $magic, %ecx /* add magic word */ 120 121 decl %edx /* complete negation of word */ 122 jnc L(3) /* previous addl caused overflow? */ 123 124 xorl %ecx, %edx /* (word+magic)^word */ 125 126 andl $~magic, %edx /* any of the carry flags set? */ 127 128 jne L(3) /* yes => determine byte */ 129 130 131 movl (%eax), %ecx /* get word (= 4 bytes) in question */ 132 addl $4, %eax /* adjust pointer for *next* word */ 133 134 subl %ecx, %edx /* first step to negate word */ 135 addl $magic, %ecx /* add magic word */ 136 137 decl %edx /* complete negation of word */ 138 jnc L(3) /* previous addl caused overflow? */ 139 140 xorl %ecx, %edx /* (word+magic)^word */ 141 142 andl $~magic, %edx /* any of the carry flags set? */ 143 144 jne L(3) /* yes => determine byte */ 145 146 147 movl (%eax), %ecx /* get word (= 4 bytes) in question */ 148 addl $4, %eax /* adjust pointer for *next* word */ 149 150 subl %ecx, %edx /* first step to negate word */ 151 addl $magic, %ecx /* add magic word */ 152 153 decl %edx /* complete negation of word */ 154 jnc L(3) /* previous addl caused overflow? */ 155 156 xorl %ecx, %edx /* (word+magic)^word */ 157 158 andl $~magic, %edx /* any of the carry flags set? */ 159 160 je L(1) /* no => start loop again */ 161 162 163L(3): subl $4, %eax /* correct too early pointer increment */ 164 subl $magic, %ecx 165 166 cmpb $0, %cl /* lowest byte NUL? */ 167 jz L(2) /* yes => return */ 168 169 inc %eax /* increment pointer */ 170 testb %ch, %ch /* second byte NUL? */ 171 172 jz L(2) /* yes => return */ 173 174 shrl $16, %ecx /* make upper bytes accessible */ 175 incl %eax /* increment pointer */ 176 177 cmpb $0, %cl /* is third byte NUL? */ 178 jz L(2) /* yes => return */ 179 180 incl %eax /* increment pointer */ 181 182L(2): CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb) 183 subl STR(%esp), %eax /* now compute the length as difference 184 between start and terminating NUL 185 character */ 186 LEAVE 187 ret 188END (BP_SYM (strlen)) 189