1/* $NetBSD: strspn.c,v 1.16 2008/07/30 16:13:59 joerg Exp $ */ 2 3/*- 4 * Copyright (c) 2008 Joerg Sonnenberger 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__RCSID("$NetBSD: strspn.c,v 1.16 2008/07/30 16:13:59 joerg Exp $"); 30 31#include <assert.h> 32#include <inttypes.h> 33#include <limits.h> 34#include <string.h> 35 36#if ULONG_MAX != 0xffffffffffffffffull 37 38size_t 39strspn(const char *s, const char *charset) 40{ 41 static const size_t idx[8] = { 1, 2, 4, 8, 16, 32, 64, 128 }; 42 uint8_t set[32]; 43 const char *t; 44#define UC(a) ((unsigned int)(unsigned char)(a)) 45 46 _DIAGASSERT(s != NULL); 47 _DIAGASSERT(charset != NULL); 48 49 if (charset[0] == '\0') 50 return 0; 51 if (charset[1] == '\0') { 52 for (t = s; *t != '\0'; ++t) { 53 if (*t != *charset) 54 break; 55 } 56 return t - s; 57 } 58 59 (void)memset(set, 0, sizeof(set)); 60 61 for (; *charset != '\0'; ++charset) 62 set[UC(*charset) >> 3] |= idx[UC(*charset) & 7]; 63 64 for (t = s; *t != '\0'; ++t) 65 if ((set[UC(*t) >> 3] & idx[UC(*t) & 7]) == 0) 66 break; 67 return t - s; 68} 69 70#else 71 72/* 64 bit system, use four 64 bits registers for bitmask */ 73 74static size_t 75strspn_x(const char *s_s, const char *charset_s, unsigned long invert) 76{ 77 const unsigned char *s = (const unsigned char *)s_s; 78 const unsigned char *charset = (const unsigned char *)charset_s; 79 unsigned long m_0, m_4, m_8, m_c; 80 unsigned char ch, next_ch; 81 unsigned long bit; 82 unsigned long check; 83 size_t count; 84 85 /* Four 64bit registers have one bit for each character value */ 86 m_0 = 0; 87 m_4 = 0; 88 m_8 = 0; 89 m_c = 0; 90 91 for (ch = *charset; ch != 0; ch = next_ch) { 92 next_ch = *++charset; 93 bit = 1ul << (ch & 0x3f); 94 if (__predict_true(ch < 0x80)) { 95 if (ch < 0x40) 96 m_0 |= bit; 97 else 98 m_4 |= bit; 99 } else { 100 if (ch < 0xc0) 101 m_8 |= bit; 102 else 103 m_c |= bit; 104 } 105 } 106 107 /* For strcspn() we just invert the validity set */ 108 m_0 ^= invert; 109 m_4 ^= invert; 110 m_8 ^= invert; 111 m_c ^= invert; 112 113 /* 114 * We could do remove the lsb from m_0 to terminate at the 115 * end of the input string. 116 * However prefetching the next char is benifitial and we must 117 * not read the byte after the \0 - as it might fault! 118 * So we take the 'hit' of the compare against 0. 119 */ 120 121 ch = *s++; 122 for (count = 0; ch != 0; ch = next_ch) { 123 next_ch = s[count]; 124 if (__predict_true(ch < 0x80)) { 125 check = m_0; 126 if (ch >= 0x40) 127 check = m_4; 128 } else { 129 check = m_8; 130 if (ch >= 0xc0) 131 check = m_c; 132 } 133 if (!((check >> (ch & 0x3f)) & 1)) 134 break; 135 count++; 136 } 137 return count; 138} 139 140size_t 141strspn(const char *s, const char *charset) 142{ 143 return strspn_x(s, charset, 0); 144} 145 146size_t 147strcspn(const char *s, const char *charset) 148{ 149 return strspn_x(s, charset, ~0ul); 150} 151#endif 152