• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-WNDR4500v2-V1.0.0.60_1.0.38/ap/gpl/timemachine/gettext-0.17/gettext-runtime/gnulib-lib/
1/* c-strstr.c -- substring search in C locale
2   Copyright (C) 2005-2007 Free Software Foundation, Inc.
3   Written by Bruno Haible <bruno@clisp.org>, 2005, 2007.
4
5   This program is free software: you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18#include <config.h>
19
20/* Specification.  */
21#include "c-strstr.h"
22
23#include <stdbool.h>
24#include <stdlib.h>
25#include <string.h>
26
27#include "malloca.h"
28
29/* Knuth-Morris-Pratt algorithm.
30   See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
31   Return a boolean indicating success.  */
32static bool
33knuth_morris_pratt (const char *haystack, const char *needle,
34		    const char **resultp)
35{
36  size_t m = strlen (needle);
37
38  /* Allocate the table.  */
39  size_t *table = (size_t *) malloca (m * sizeof (size_t));
40  if (table == NULL)
41    return false;
42  /* Fill the table.
43     For 0 < i < m:
44       0 < table[i] <= i is defined such that
45       rhaystack[0..i-1] == needle[0..i-1] and rhaystack[i] != needle[i]
46       implies
47       forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1],
48       and table[i] is as large as possible with this property.
49     table[0] remains uninitialized.  */
50  {
51    size_t i, j;
52
53    table[1] = 1;
54    j = 0;
55    for (i = 2; i < m; i++)
56      {
57	unsigned char b = (unsigned char) needle[i - 1];
58
59	for (;;)
60	  {
61	    if (b == (unsigned char) needle[j])
62	      {
63		table[i] = i - ++j;
64		break;
65	      }
66	    if (j == 0)
67	      {
68		table[i] = i;
69		break;
70	      }
71	    j = j - table[j];
72	  }
73      }
74  }
75
76  /* Search, using the table to accelerate the processing.  */
77  {
78    size_t j;
79    const char *rhaystack;
80    const char *phaystack;
81
82    *resultp = NULL;
83    j = 0;
84    rhaystack = haystack;
85    phaystack = haystack;
86    /* Invariant: phaystack = rhaystack + j.  */
87    while (*phaystack != '\0')
88      if ((unsigned char) needle[j] == (unsigned char) *phaystack)
89	{
90	  j++;
91	  phaystack++;
92	  if (j == m)
93	    {
94	      /* The entire needle has been found.  */
95	      *resultp = rhaystack;
96	      break;
97	    }
98	}
99      else if (j > 0)
100	{
101	  /* Found a match of needle[0..j-1], mismatch at needle[j].  */
102	  rhaystack += table[j];
103	  j -= table[j];
104	}
105      else
106	{
107	  /* Found a mismatch at needle[0] already.  */
108	  rhaystack++;
109	  phaystack++;
110	}
111  }
112
113  freea (table);
114  return true;
115}
116
117/* Find the first occurrence of NEEDLE in HAYSTACK.  */
118char *
119c_strstr (const char *haystack, const char *needle)
120{
121  /* Be careful not to look at the entire extent of haystack or needle
122     until needed.  This is useful because of these two cases:
123       - haystack may be very long, and a match of needle found early,
124       - needle may be very long, and not even a short initial segment of
125         needle may be found in haystack.  */
126  if (*needle != '\0')
127    {
128      /* Minimizing the worst-case complexity:
129	 Let n = strlen(haystack), m = strlen(needle).
130	 The na��ve algorithm is O(n*m) worst-case.
131	 The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
132	 memory allocation.
133	 To achieve linear complexity and yet amortize the cost of the memory
134	 allocation, we activate the Knuth-Morris-Pratt algorithm only once
135	 the na��ve algorithm has already run for some time; more precisely,
136	 when
137	   - the outer loop count is >= 10,
138	   - the average number of comparisons per outer loop is >= 5,
139	   - the total number of comparisons is >= m.
140	 But we try it only once.  If the memory allocation attempt failed,
141	 we don't retry it.  */
142      bool try_kmp = true;
143      size_t outer_loop_count = 0;
144      size_t comparison_count = 0;
145      size_t last_ccount = 0;			/* last comparison count */
146      const char *needle_last_ccount = needle;	/* = needle + last_ccount */
147
148      /* Speed up the following searches of needle by caching its first
149	 character.  */
150      unsigned char b = (unsigned char) *needle;
151
152      needle++;
153      for (;; haystack++)
154	{
155	  if (*haystack == '\0')
156	    /* No match.  */
157	    return NULL;
158
159	  /* See whether it's advisable to use an asymptotically faster
160	     algorithm.  */
161	  if (try_kmp
162	      && outer_loop_count >= 10
163	      && comparison_count >= 5 * outer_loop_count)
164	    {
165	      /* See if needle + comparison_count now reaches the end of
166		 needle.  */
167	      if (needle_last_ccount != NULL)
168		{
169		  needle_last_ccount +=
170		    strnlen (needle_last_ccount, comparison_count - last_ccount);
171		  if (*needle_last_ccount == '\0')
172		    needle_last_ccount = NULL;
173		  last_ccount = comparison_count;
174		}
175	      if (needle_last_ccount == NULL)
176		{
177		  /* Try the Knuth-Morris-Pratt algorithm.  */
178		  const char *result;
179		  bool success =
180		    knuth_morris_pratt (haystack, needle - 1, &result);
181		  if (success)
182		    return (char *) result;
183		  try_kmp = false;
184		}
185	    }
186
187	  outer_loop_count++;
188	  comparison_count++;
189	  if ((unsigned char) *haystack == b)
190	    /* The first character matches.  */
191	    {
192	      const char *rhaystack = haystack + 1;
193	      const char *rneedle = needle;
194
195	      for (;; rhaystack++, rneedle++)
196		{
197		  if (*rneedle == '\0')
198		    /* Found a match.  */
199		    return (char *) haystack;
200		  if (*rhaystack == '\0')
201		    /* No match.  */
202		    return NULL;
203		  comparison_count++;
204		  if ((unsigned char) *rhaystack != (unsigned char) *rneedle)
205		    /* Nothing in this round.  */
206		    break;
207		}
208	    }
209	}
210    }
211  else
212    return (char *) haystack;
213}
214