11541Srgrimes/*-
2242506Sdelphij * Copyright (c) 2009, 2010 Xin LI <delphij@FreeBSD.org>
3242506Sdelphij * All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes *
14242506Sdelphij * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
151541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
161541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17242506Sdelphij * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
181541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
191541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
201541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
211541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
221541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
231541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
241541Srgrimes * SUCH DAMAGE.
251541Srgrimes */
261541Srgrimes
27116189Sobrien#include <sys/cdefs.h>
28116189Sobrien__FBSDID("$FreeBSD$");
29116189Sobrien
30102281Sjhb#include <sys/libkern.h>
31242506Sdelphij#include <sys/limits.h>
321541Srgrimes
33242506Sdelphij/*
34242506Sdelphij * Portable strlen() for 32-bit and 64-bit systems.
35242506Sdelphij *
36242506Sdelphij * Rationale: it is generally much more efficient to do word length
37242506Sdelphij * operations and avoid branches on modern computer systems, as
38242506Sdelphij * compared to byte-length operations with a lot of branches.
39242506Sdelphij *
40242506Sdelphij * The expression:
41242506Sdelphij *
42242506Sdelphij *	((x - 0x01....01) & ~x & 0x80....80)
43242506Sdelphij *
44242506Sdelphij * would evaluate to a non-zero value iff any of the bytes in the
45242506Sdelphij * original word is zero.
46242506Sdelphij *
47242506Sdelphij * On multi-issue processors, we can divide the above expression into:
48242506Sdelphij *	a)  (x - 0x01....01)
49242506Sdelphij *	b) (~x & 0x80....80)
50242506Sdelphij *	c) a & b
51242506Sdelphij *
52242506Sdelphij * Where, a) and b) can be partially computed in parallel.
53242506Sdelphij *
54242506Sdelphij * The algorithm above is found on "Hacker's Delight" by
55242506Sdelphij * Henry S. Warren, Jr.
56242506Sdelphij */
57242506Sdelphij
58242506Sdelphij/* Magic numbers for the algorithm */
59242506Sdelphij#if LONG_BIT == 32
60242506Sdelphijstatic const unsigned long mask01 = 0x01010101;
61242506Sdelphijstatic const unsigned long mask80 = 0x80808080;
62242506Sdelphij#elif LONG_BIT == 64
63242506Sdelphijstatic const unsigned long mask01 = 0x0101010101010101;
64242506Sdelphijstatic const unsigned long mask80 = 0x8080808080808080;
65242506Sdelphij#else
66242506Sdelphij#error Unsupported word size
67242506Sdelphij#endif
68242506Sdelphij
69242506Sdelphij#define	LONGPTR_MASK (sizeof(long) - 1)
70242506Sdelphij
71242506Sdelphij/*
72242506Sdelphij * Helper macro to return string length if we caught the zero
73242506Sdelphij * byte.
74242506Sdelphij */
75242506Sdelphij#define testbyte(x)				\
76242506Sdelphij	do {					\
77242506Sdelphij		if (p[x] == '\0')		\
78242506Sdelphij		    return (p - str + x);	\
79242506Sdelphij	} while (0)
80242506Sdelphij
811541Srgrimessize_t
82242506Sdelphijstrlen(const char *str)
831541Srgrimes{
84242506Sdelphij	const char *p;
85242506Sdelphij	const unsigned long *lp;
86242506Sdelphij	long va, vb;
871541Srgrimes
88242506Sdelphij	/*
89242506Sdelphij	 * Before trying the hard (unaligned byte-by-byte access) way
90242506Sdelphij	 * to figure out whether there is a nul character, try to see
91242506Sdelphij	 * if there is a nul character is within this accessible word
92242506Sdelphij	 * first.
93242506Sdelphij	 *
94242506Sdelphij	 * p and (p & ~LONGPTR_MASK) must be equally accessible since
95242506Sdelphij	 * they always fall in the same memory page, as long as page
96242506Sdelphij	 * boundaries is integral multiple of word size.
97242506Sdelphij	 */
98242506Sdelphij	lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK);
99242506Sdelphij	va = (*lp - mask01);
100242506Sdelphij	vb = ((~*lp) & mask80);
101242506Sdelphij	lp++;
102242506Sdelphij	if (va & vb)
103242506Sdelphij		/* Check if we have \0 in the first part */
104242506Sdelphij		for (p = str; p < (const char *)lp; p++)
105242506Sdelphij			if (*p == '\0')
106242506Sdelphij				return (p - str);
107242506Sdelphij
108242506Sdelphij	/* Scan the rest of the string using word sized operation */
109242506Sdelphij	for (; ; lp++) {
110242506Sdelphij		va = (*lp - mask01);
111242506Sdelphij		vb = ((~*lp) & mask80);
112242506Sdelphij		if (va & vb) {
113242506Sdelphij			p = (const char *)(lp);
114242506Sdelphij			testbyte(0);
115242506Sdelphij			testbyte(1);
116242506Sdelphij			testbyte(2);
117242506Sdelphij			testbyte(3);
118242506Sdelphij#if (LONG_BIT >= 64)
119242506Sdelphij			testbyte(4);
120242506Sdelphij			testbyte(5);
121242506Sdelphij			testbyte(6);
122242506Sdelphij			testbyte(7);
123242506Sdelphij#endif
124242506Sdelphij		}
125242506Sdelphij	}
126242506Sdelphij
127242506Sdelphij	/* NOTREACHED */
128242506Sdelphij	return (0);
1291541Srgrimes}
130