1/*	$NetBSD: strspn.c,v 1.3 2023/06/18 22:18:13 rillig Exp $	*/
2
3/*-
4 * Copyright (c) 2008 Joerg Sonnenberger
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__RCSID("$NetBSD: strspn.c,v 1.3 2023/06/18 22:18:13 rillig Exp $");
30
31#if !defined(_KERNEL) && !defined(_STANDALONE)
32#include <assert.h>
33#include <inttypes.h>
34#include <limits.h>
35#include <string.h>
36#else
37#include <lib/libkern/libkern.h>
38#endif
39
40#if ULONG_MAX != 0xffffffffffffffffull
41
42size_t
43strspn(const char *s, const char *charset)
44{
45	static const uint8_t idx[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };
46	uint8_t set[32];
47	const char *t;
48#define UC(a) ((unsigned int)(unsigned char)(a))
49
50	if (charset[0] == '\0')
51		return 0;
52	if (charset[1] == '\0') {
53		for (t = s; *t != '\0'; ++t) {
54			if (*t != *charset)
55				break;
56		}
57		return t - s;
58	}
59
60	(void)memset(set, 0, sizeof(set));
61
62	for (; *charset != '\0'; ++charset)
63		set[UC(*charset) >> 3] |= idx[UC(*charset) & 7];
64
65	for (t = s; *t != '\0'; ++t)
66		if ((set[UC(*t) >> 3] & idx[UC(*t) & 7]) == 0)
67			break;
68	return t - s;
69}
70
71#else
72
73/* 64 bit system, use four 64 bits registers for bitmask */
74
75static size_t
76strspn_x(const char *s_s, const char *charset_s, unsigned long invert)
77{
78	const unsigned char *s = (const unsigned char *)s_s;
79	const unsigned char *charset = (const unsigned char *)charset_s;
80	unsigned long m_0, m_4, m_8, m_c;
81	unsigned char ch, next_ch;
82	unsigned long bit;
83	unsigned long check;
84	size_t count;
85
86	/* Four 64bit registers have one bit for each character value */
87	m_0 = 0;
88	m_4 = 0;
89	m_8 = 0;
90	m_c = 0;
91
92	for (ch = *charset; ch != 0; ch = next_ch) {
93		next_ch = *++charset;
94		bit = 1ul << (ch & 0x3f);
95		if (__predict_true(ch < 0x80)) {
96			if (ch < 0x40)
97				m_0 |= bit;
98			else
99				m_4 |= bit;
100		} else {
101			if (ch < 0xc0)
102				m_8 |= bit;
103			else
104				m_c |= bit;
105		}
106	}
107
108	/* For strcspn() we just invert the validity set */
109	m_0 ^= invert;
110	m_4 ^= invert;
111	m_8 ^= invert;
112	m_c ^= invert;
113
114	/*
115	 * We could do remove the lsb from m_0 to terminate at the
116	 * end of the input string.
117	 * However prefetching the next char is beneficial and we must
118	 * not read the byte after the \0 - as it might fault!
119	 * So we take the 'hit' of the compare against 0.
120	 */
121
122	ch = *s++;
123	for (count = 0; ch != 0; ch = next_ch) {
124		next_ch = s[count];
125		if (__predict_true(ch < 0x80)) {
126			check = m_0;
127			if (ch >= 0x40)
128				check = m_4;
129		} else {
130			check = m_8;
131			if (ch >= 0xc0)
132				check = m_c;
133		}
134		if (!((check >> (ch & 0x3f)) & 1))
135			break;
136		count++;
137	}
138	return count;
139}
140
141size_t
142strspn(const char *s, const char *charset)
143{
144	return strspn_x(s, charset, 0);
145}
146
147size_t
148strcspn(const char *s, const char *charset)
149{
150	return strspn_x(s, charset, ~0ul);
151}
152#endif
153