1/* $NetBSD: strspn.c,v 1.3 2023/06/18 22:18:13 rillig Exp $ */ 2 3/*- 4 * Copyright (c) 2008 Joerg Sonnenberger 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__RCSID("$NetBSD: strspn.c,v 1.3 2023/06/18 22:18:13 rillig Exp $"); 30 31#if !defined(_KERNEL) && !defined(_STANDALONE) 32#include <assert.h> 33#include <inttypes.h> 34#include <limits.h> 35#include <string.h> 36#else 37#include <lib/libkern/libkern.h> 38#endif 39 40#if ULONG_MAX != 0xffffffffffffffffull 41 42size_t 43strspn(const char *s, const char *charset) 44{ 45 static const uint8_t idx[8] = { 1, 2, 4, 8, 16, 32, 64, 128 }; 46 uint8_t set[32]; 47 const char *t; 48#define UC(a) ((unsigned int)(unsigned char)(a)) 49 50 if (charset[0] == '\0') 51 return 0; 52 if (charset[1] == '\0') { 53 for (t = s; *t != '\0'; ++t) { 54 if (*t != *charset) 55 break; 56 } 57 return t - s; 58 } 59 60 (void)memset(set, 0, sizeof(set)); 61 62 for (; *charset != '\0'; ++charset) 63 set[UC(*charset) >> 3] |= idx[UC(*charset) & 7]; 64 65 for (t = s; *t != '\0'; ++t) 66 if ((set[UC(*t) >> 3] & idx[UC(*t) & 7]) == 0) 67 break; 68 return t - s; 69} 70 71#else 72 73/* 64 bit system, use four 64 bits registers for bitmask */ 74 75static size_t 76strspn_x(const char *s_s, const char *charset_s, unsigned long invert) 77{ 78 const unsigned char *s = (const unsigned char *)s_s; 79 const unsigned char *charset = (const unsigned char *)charset_s; 80 unsigned long m_0, m_4, m_8, m_c; 81 unsigned char ch, next_ch; 82 unsigned long bit; 83 unsigned long check; 84 size_t count; 85 86 /* Four 64bit registers have one bit for each character value */ 87 m_0 = 0; 88 m_4 = 0; 89 m_8 = 0; 90 m_c = 0; 91 92 for (ch = *charset; ch != 0; ch = next_ch) { 93 next_ch = *++charset; 94 bit = 1ul << (ch & 0x3f); 95 if (__predict_true(ch < 0x80)) { 96 if (ch < 0x40) 97 m_0 |= bit; 98 else 99 m_4 |= bit; 100 } else { 101 if (ch < 0xc0) 102 m_8 |= bit; 103 else 104 m_c |= bit; 105 } 106 } 107 108 /* For strcspn() we just invert the validity set */ 109 m_0 ^= invert; 110 m_4 ^= invert; 111 m_8 ^= invert; 112 m_c ^= invert; 113 114 /* 115 * We could do remove the lsb from m_0 to terminate at the 116 * end of the input string. 117 * However prefetching the next char is beneficial and we must 118 * not read the byte after the \0 - as it might fault! 119 * So we take the 'hit' of the compare against 0. 120 */ 121 122 ch = *s++; 123 for (count = 0; ch != 0; ch = next_ch) { 124 next_ch = s[count]; 125 if (__predict_true(ch < 0x80)) { 126 check = m_0; 127 if (ch >= 0x40) 128 check = m_4; 129 } else { 130 check = m_8; 131 if (ch >= 0xc0) 132 check = m_c; 133 } 134 if (!((check >> (ch & 0x3f)) & 1)) 135 break; 136 count++; 137 } 138 return count; 139} 140 141size_t 142strspn(const char *s, const char *charset) 143{ 144 return strspn_x(s, charset, 0); 145} 146 147size_t 148strcspn(const char *s, const char *charset) 149{ 150 return strspn_x(s, charset, ~0ul); 151} 152#endif 153