1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2009, 2010 Xin LI <delphij@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31#include <sys/limits.h>
32#include <sys/types.h>
33#include <string.h>
34
35/*
36 * Portable strlen() for 32-bit and 64-bit systems.
37 *
38 * Rationale: it is generally much more efficient to do word length
39 * operations and avoid branches on modern computer systems, as
40 * compared to byte-length operations with a lot of branches.
41 *
42 * The expression:
43 *
44 *	((x - 0x01....01) & ~x & 0x80....80)
45 *
46 * would evaluate to a non-zero value iff any of the bytes in the
47 * original word is zero.
48 *
49 * On multi-issue processors, we can divide the above expression into:
50 *	a)  (x - 0x01....01)
51 *	b) (~x & 0x80....80)
52 *	c) a & b
53 *
54 * Where, a) and b) can be partially computed in parallel.
55 *
56 * The algorithm above is found on "Hacker's Delight" by
57 * Henry S. Warren, Jr.
58 */
59
60/* Magic numbers for the algorithm */
61#if LONG_BIT == 32
62static const unsigned long mask01 = 0x01010101;
63static const unsigned long mask80 = 0x80808080;
64#elif LONG_BIT == 64
65static const unsigned long mask01 = 0x0101010101010101;
66static const unsigned long mask80 = 0x8080808080808080;
67#else
68#error Unsupported word size
69#endif
70
71#define	LONGPTR_MASK (sizeof(long) - 1)
72
73/*
74 * Helper macro to return string length if we caught the zero
75 * byte.
76 */
77#define testbyte(x)				\
78	do {					\
79		if (p[x] == '\0')		\
80		    return (p - str + x);	\
81	} while (0)
82
83size_t
84strlen(const char *str)
85{
86	const char *p;
87	const unsigned long *lp;
88	long va, vb;
89
90	/*
91	 * Before trying the hard (unaligned byte-by-byte access) way
92	 * to figure out whether there is a nul character, try to see
93	 * if there is a nul character is within this accessible word
94	 * first.
95	 *
96	 * p and (p & ~LONGPTR_MASK) must be equally accessible since
97	 * they always fall in the same memory page, as long as page
98	 * boundaries is integral multiple of word size.
99	 */
100	lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK);
101	va = (*lp - mask01);
102	vb = ((~*lp) & mask80);
103	lp++;
104	if (va & vb)
105		/* Check if we have \0 in the first part */
106		for (p = str; p < (const char *)lp; p++)
107			if (*p == '\0')
108				return (p - str);
109
110	/* Scan the rest of the string using word sized operation */
111	for (; ; lp++) {
112		va = (*lp - mask01);
113		vb = ((~*lp) & mask80);
114		if (va & vb) {
115			p = (const char *)(lp);
116			testbyte(0);
117			testbyte(1);
118			testbyte(2);
119			testbyte(3);
120#if (LONG_BIT >= 64)
121			testbyte(4);
122			testbyte(5);
123			testbyte(6);
124			testbyte(7);
125#endif
126		}
127	}
128
129	/* NOTREACHED */
130	return (0);
131}
132