• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-WNDR4500-V1.0.1.40_1.0.68/ap/gpl/timemachine/gettext-0.17/gettext-tools/gnulib-lib/
1/* c-strcasestr.c -- case insensitive substring search in C locale
2   Copyright (C) 2005-2007 Free Software Foundation, Inc.
3   Written by Bruno Haible <bruno@clisp.org>, 2005.
4
5   This program is free software: you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18#include <config.h>
19
20/* Specification.  */
21#include "c-strcasestr.h"
22
23#include <stdbool.h>
24#include <stddef.h>
25#include <string.h>
26
27#include "malloca.h"
28#include "c-ctype.h"
29
30/* Knuth-Morris-Pratt algorithm.
31   See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
32   Return a boolean indicating success.  */
33static bool
34knuth_morris_pratt (const char *haystack, const char *needle,
35		    const char **resultp)
36{
37  size_t m = strlen (needle);
38
39  /* Allocate the table.  */
40  size_t *table = (size_t *) malloca (m * sizeof (size_t));
41  if (table == NULL)
42    return false;
43  /* Fill the table.
44     For 0 < i < m:
45       0 < table[i] <= i is defined such that
46       rhaystack[0..i-1] == needle[0..i-1] and rhaystack[i] != needle[i]
47       implies
48       forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1],
49       and table[i] is as large as possible with this property.
50     table[0] remains uninitialized.  */
51  {
52    size_t i, j;
53
54    table[1] = 1;
55    j = 0;
56    for (i = 2; i < m; i++)
57      {
58	unsigned char b = c_tolower ((unsigned char) needle[i - 1]);
59
60	for (;;)
61	  {
62	    if (b == c_tolower ((unsigned char) needle[j]))
63	      {
64		table[i] = i - ++j;
65		break;
66	      }
67	    if (j == 0)
68	      {
69		table[i] = i;
70		break;
71	      }
72	    j = j - table[j];
73	  }
74      }
75  }
76
77  /* Search, using the table to accelerate the processing.  */
78  {
79    size_t j;
80    const char *rhaystack;
81    const char *phaystack;
82
83    *resultp = NULL;
84    j = 0;
85    rhaystack = haystack;
86    phaystack = haystack;
87    /* Invariant: phaystack = rhaystack + j.  */
88    while (*phaystack != '\0')
89      if (c_tolower ((unsigned char) needle[j])
90	  == c_tolower ((unsigned char) *phaystack))
91	{
92	  j++;
93	  phaystack++;
94	  if (j == m)
95	    {
96	      /* The entire needle has been found.  */
97	      *resultp = rhaystack;
98	      break;
99	    }
100	}
101      else if (j > 0)
102	{
103	  /* Found a match of needle[0..j-1], mismatch at needle[j].  */
104	  rhaystack += table[j];
105	  j -= table[j];
106	}
107      else
108	{
109	  /* Found a mismatch at needle[0] already.  */
110	  rhaystack++;
111	  phaystack++;
112	}
113  }
114
115  freea (table);
116  return true;
117}
118
119/* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
120   comparison.
121   Note: This function may, in multibyte locales, return success even if
122   strlen (haystack) < strlen (needle) !  */
123char *
124c_strcasestr (const char *haystack, const char *needle)
125{
126  /* Be careful not to look at the entire extent of haystack or needle
127     until needed.  This is useful because of these two cases:
128       - haystack may be very long, and a match of needle found early,
129       - needle may be very long, and not even a short initial segment of
130         needle may be found in haystack.  */
131  if (*needle != '\0')
132    {
133      /* Minimizing the worst-case complexity:
134	 Let n = strlen(haystack), m = strlen(needle).
135	 The na��ve algorithm is O(n*m) worst-case.
136	 The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
137	 memory allocation.
138	 To achieve linear complexity and yet amortize the cost of the memory
139	 allocation, we activate the Knuth-Morris-Pratt algorithm only once
140	 the na��ve algorithm has already run for some time; more precisely,
141	 when
142	   - the outer loop count is >= 10,
143	   - the average number of comparisons per outer loop is >= 5,
144	   - the total number of comparisons is >= m.
145	 But we try it only once.  If the memory allocation attempt failed,
146	 we don't retry it.  */
147      bool try_kmp = true;
148      size_t outer_loop_count = 0;
149      size_t comparison_count = 0;
150      size_t last_ccount = 0;			/* last comparison count */
151      const char *needle_last_ccount = needle;	/* = needle + last_ccount */
152
153      /* Speed up the following searches of needle by caching its first
154	 character.  */
155      unsigned char b = c_tolower ((unsigned char) *needle);
156
157      needle++;
158      for (;; haystack++)
159	{
160	  if (*haystack == '\0')
161	    /* No match.  */
162	    return NULL;
163
164	  /* See whether it's advisable to use an asymptotically faster
165	     algorithm.  */
166	  if (try_kmp
167	      && outer_loop_count >= 10
168	      && comparison_count >= 5 * outer_loop_count)
169	    {
170	      /* See if needle + comparison_count now reaches the end of
171		 needle.  */
172	      if (needle_last_ccount != NULL)
173		{
174		  needle_last_ccount +=
175		    strnlen (needle_last_ccount, comparison_count - last_ccount);
176		  if (*needle_last_ccount == '\0')
177		    needle_last_ccount = NULL;
178		  last_ccount = comparison_count;
179		}
180	      if (needle_last_ccount == NULL)
181		{
182		  /* Try the Knuth-Morris-Pratt algorithm.  */
183		  const char *result;
184		  bool success =
185		    knuth_morris_pratt (haystack, needle - 1, &result);
186		  if (success)
187		    return (char *) result;
188		  try_kmp = false;
189		}
190	    }
191
192	  outer_loop_count++;
193	  comparison_count++;
194	  if (c_tolower ((unsigned char) *haystack) == b)
195	    /* The first character matches.  */
196	    {
197	      const char *rhaystack = haystack + 1;
198	      const char *rneedle = needle;
199
200	      for (;; rhaystack++, rneedle++)
201		{
202		  if (*rneedle == '\0')
203		    /* Found a match.  */
204		    return (char *) haystack;
205		  if (*rhaystack == '\0')
206		    /* No match.  */
207		    return NULL;
208		  comparison_count++;
209		  if (c_tolower ((unsigned char) *rhaystack)
210		      != c_tolower ((unsigned char) *rneedle))
211		    /* Nothing in this round.  */
212		    break;
213		}
214	    }
215	}
216    }
217  else
218    return (char *) haystack;
219}
220