1/*	$NetBSD: strspn.c,v 1.16 2008/07/30 16:13:59 joerg Exp $	*/
2
3/*-
4 * Copyright (c) 2008 Joerg Sonnenberger
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__RCSID("$NetBSD: strspn.c,v 1.16 2008/07/30 16:13:59 joerg Exp $");
30
31#include <assert.h>
32#include <inttypes.h>
33#include <limits.h>
34#include <string.h>
35
36#if ULONG_MAX != 0xffffffffffffffffull
37
38size_t
39strspn(const char *s, const char *charset)
40{
41	static const size_t idx[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };
42	uint8_t set[32];
43	const char *t;
44#define UC(a) ((unsigned int)(unsigned char)(a))
45
46	_DIAGASSERT(s != NULL);
47	_DIAGASSERT(charset != NULL);
48
49	if (charset[0] == '\0')
50		return 0;
51	if (charset[1] == '\0') {
52		for (t = s; *t != '\0'; ++t) {
53			if (*t != *charset)
54				break;
55		}
56		return t - s;
57	}
58
59	(void)memset(set, 0, sizeof(set));
60
61	for (; *charset != '\0'; ++charset)
62		set[UC(*charset) >> 3] |= idx[UC(*charset) & 7];
63
64	for (t = s; *t != '\0'; ++t)
65		if ((set[UC(*t) >> 3] & idx[UC(*t) & 7]) == 0)
66			break;
67	return t - s;
68}
69
70#else
71
72/* 64 bit system, use four 64 bits registers for bitmask */
73
74static size_t
75strspn_x(const char *s_s, const char *charset_s, unsigned long invert)
76{
77	const unsigned char *s = (const unsigned char *)s_s;
78	const unsigned char *charset = (const unsigned char *)charset_s;
79	unsigned long m_0, m_4, m_8, m_c;
80	unsigned char ch, next_ch;
81	unsigned long bit;
82	unsigned long check;
83	size_t count;
84
85	/* Four 64bit registers have one bit for each character value */
86	m_0 = 0;
87	m_4 = 0;
88	m_8 = 0;
89	m_c = 0;
90
91	for (ch = *charset; ch != 0; ch = next_ch) {
92		next_ch = *++charset;
93		bit = 1ul << (ch & 0x3f);
94		if (__predict_true(ch < 0x80)) {
95			if (ch < 0x40)
96				m_0 |= bit;
97			else
98				m_4 |= bit;
99		} else {
100			if (ch < 0xc0)
101				m_8 |= bit;
102			else
103				m_c |= bit;
104		}
105	}
106
107	/* For strcspn() we just invert the validity set */
108	m_0 ^= invert;
109	m_4 ^= invert;
110	m_8 ^= invert;
111	m_c ^= invert;
112
113	/*
114	 * We could do remove the lsb from m_0 to terminate at the
115	 * end of the input string.
116	 * However prefetching the next char is benifitial and we must
117	 * not read the byte after the \0 - as it might fault!
118	 * So we take the 'hit' of the compare against 0.
119	 */
120
121	ch = *s++;
122	for (count = 0; ch != 0; ch = next_ch) {
123		next_ch = s[count];
124		if (__predict_true(ch < 0x80)) {
125			check = m_0;
126			if (ch >= 0x40)
127				check = m_4;
128		} else {
129			check = m_8;
130			if (ch >= 0xc0)
131				check = m_c;
132		}
133		if (!((check >> (ch & 0x3f)) & 1))
134			break;
135		count++;
136	}
137	return count;
138}
139
140size_t
141strspn(const char *s, const char *charset)
142{
143	return strspn_x(s, charset, 0);
144}
145
146size_t
147strcspn(const char *s, const char *charset)
148{
149	return strspn_x(s, charset, ~0ul);
150}
151#endif
152