1#include <string.h> 2#include <stdint.h> 3 4static char *twobyte_strstr(const unsigned char *h, const unsigned char *n) 5{ 6 uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1]; 7 for (h++; *h && hw != nw; hw = hw<<8 | *++h); 8 return *h ? (char *)h-1 : 0; 9} 10 11static char *threebyte_strstr(const unsigned char *h, const unsigned char *n) 12{ 13 uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8; 14 uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8; 15 for (h+=2; *h && hw != nw; hw = (hw|*++h)<<8); 16 return *h ? (char *)h-2 : 0; 17} 18 19static char *fourbyte_strstr(const unsigned char *h, const unsigned char *n) 20{ 21 uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; 22 uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; 23 for (h+=3; *h && hw != nw; hw = hw<<8 | *++h); 24 return *h ? (char *)h-3 : 0; 25} 26 27#define MAX(a,b) ((a)>(b)?(a):(b)) 28#define MIN(a,b) ((a)<(b)?(a):(b)) 29 30#define BITOP(a,b,op) \ 31 ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a)))) 32 33static char *twoway_strstr(const unsigned char *h, const unsigned char *n) 34{ 35 const unsigned char *z; 36 size_t l, ip, jp, k, p, ms, p0, mem, mem0; 37 size_t byteset[32 / sizeof(size_t)] = { 0 }; 38 size_t shift[256]; 39 40 /* Computing length of needle and fill shift table */ 41 for (l=0; n[l] && h[l]; l++) 42 BITOP(byteset, n[l], |=), shift[n[l]] = l+1; 43 if (n[l]) return 0; /* hit the end of h */ 44 45 /* Compute maximal suffix */ 46 ip = -1; jp = 0; k = p = 1; 47 while (jp+k<l) { 48 if (n[ip+k] == n[jp+k]) { 49 if (k == p) { 50 jp += p; 51 k = 1; 52 } else k++; 53 } else if (n[ip+k] > n[jp+k]) { 54 jp += k; 55 k = 1; 56 p = jp - ip; 57 } else { 58 ip = jp++; 59 k = p = 1; 60 } 61 } 62 ms = ip; 63 p0 = p; 64 65 /* And with the opposite comparison */ 66 ip = -1; jp = 0; k = p = 1; 67 while (jp+k<l) { 68 if (n[ip+k] == n[jp+k]) { 69 if (k == p) { 70 jp += p; 71 k = 1; 72 } else k++; 73 } else if (n[ip+k] < n[jp+k]) { 74 jp += k; 75 k = 1; 76 p = jp - ip; 77 } else { 78 ip = jp++; 79 k = p = 1; 80 } 81 } 82 if (ip+1 > ms+1) ms = ip; 83 else p = p0; 84 85 /* Periodic needle? */ 86 if (memcmp(n, n+p, ms+1)) { 87 mem0 = 0; 88 p = MAX(ms, l-ms-1) + 1; 89 } else mem0 = l-p; 90 mem = 0; 91 92 /* Initialize incremental end-of-haystack pointer */ 93 z = h; 94 95 /* Search loop */ 96 for (;;) { 97 /* Update incremental end-of-haystack pointer */ 98 if (z-h < l) { 99 /* Fast estimate for MIN(l,63) */ 100 size_t grow = l | 63; 101 const unsigned char *z2 = memchr(z, 0, grow); 102 if (z2) { 103 z = z2; 104 if (z-h < l) return 0; 105 } else z += grow; 106 } 107 108 /* Check last byte first; advance by shift on mismatch */ 109 if (BITOP(byteset, h[l-1], &)) { 110 k = l-shift[h[l-1]]; 111 //printf("adv by %zu (on %c) at [%s] (%zu;l=%zu)\n", k, h[l-1], h, shift[h[l-1]], l); 112 if (k) { 113 if (mem0 && mem && k < p) k = l-p; 114 h += k; 115 mem = 0; 116 continue; 117 } 118 } else { 119 h += l; 120 mem = 0; 121 continue; 122 } 123 124 /* Compare right half */ 125 for (k=MAX(ms+1,mem); n[k] && n[k] == h[k]; k++); 126 if (n[k]) { 127 h += k-ms; 128 mem = 0; 129 continue; 130 } 131 /* Compare left half */ 132 for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--); 133 if (k <= mem) return (char *)h; 134 h += p; 135 mem = mem0; 136 } 137} 138 139char *strstr(const char *h, const char *n) 140{ 141 /* Return immediately on empty needle */ 142 if (!n[0]) return (char *)h; 143 144 /* Use faster algorithms for short needles */ 145 h = strchr(h, *n); 146 if (!h || !n[1]) return (char *)h; 147 if (!h[1]) return 0; 148 if (!n[2]) return twobyte_strstr((void *)h, (void *)n); 149 if (!h[2]) return 0; 150 if (!n[3]) return threebyte_strstr((void *)h, (void *)n); 151 if (!h[3]) return 0; 152 if (!n[4]) return fourbyte_strstr((void *)h, (void *)n); 153 154 return twoway_strstr((void *)h, (void *)n); 155} 156