1/*
2 * "$Id: search.c 11093 2013-07-03 20:48:42Z msweet $"
3 *
4 *   Search routines for CUPS.
5 *
6 *   Copyright 2007-2012 by Apple Inc.
7 *   Copyright 1997-2006 by Easy Software Products.
8 *
9 *   These coded instructions, statements, and computer programs are the
10 *   property of Apple Inc. and are protected by Federal copyright
11 *   law.  Distribution and use rights are outlined in the file "LICENSE.txt"
12 *   which should have been included with this file.  If this file is
13 *   file is missing or damaged, see the license at "http://www.cups.org/".
14 *
15 * Contents:
16 *
17 *   cgiCompileSearch() - Compile a search string.
18 *   cgiDoSearch()      - Do a search of some text.
19 *   cgiFreeSearch()    - Free a compiled search context.
20 */
21
22/*
23 * Include necessary headers...
24 */
25
26#include "cgi-private.h"
27#include <regex.h>
28
29
30/*
31 * 'cgiCompileSearch()' - Compile a search string.
32 */
33
34void *					/* O - Search context */
35cgiCompileSearch(const char *query)	/* I - Query string */
36{
37  regex_t	*re;			/* Regular expression */
38  char		*s,			/* Regular expression string */
39		*sptr,			/* Pointer into RE string */
40		*sword;			/* Pointer to start of word */
41  int		slen;			/* Allocated size of RE string */
42  const char	*qptr,			/* Pointer into query string */
43		*qend;			/* End of current word */
44  const char	*prefix;		/* Prefix to add to next word */
45  int		quoted;			/* Word is quoted */
46  int		wlen;			/* Word length */
47  char		*lword;			/* Last word in query */
48
49
50  DEBUG_printf(("cgiCompileSearch(query=\"%s\")\n", query));
51
52 /*
53  * Range check input...
54  */
55
56  if (!query)
57    return (NULL);
58
59 /*
60  * Allocate a regular expression storage structure...
61  */
62
63  if ((re = (regex_t *)calloc(1, sizeof(regex_t))) == NULL)
64    return (NULL);
65
66 /*
67  * Allocate a buffer to hold the regular expression string, starting
68  * at 1024 bytes or 3 times the length of the query string, whichever
69  * is greater.  We'll expand the string as needed...
70  */
71
72  slen = strlen(query) * 3;
73  if (slen < 1024)
74    slen = 1024;
75
76  if ((s = (char *)malloc(slen)) == NULL)
77  {
78    free(re);
79    return (NULL);
80  }
81
82 /*
83  * Copy the query string to the regular expression, handling basic
84  * AND and OR logic...
85  */
86
87  prefix = ".*";
88  qptr   = query;
89  sptr   = s;
90  lword  = NULL;
91
92  while (*qptr)
93  {
94   /*
95    * Skip leading whitespace...
96    */
97
98    while (isspace(*qptr & 255))
99      qptr ++;
100
101    if (!*qptr)
102      break;
103
104   /*
105    * Find the end of the current word...
106    */
107
108    if (*qptr == '\"' || *qptr == '\'')
109    {
110     /*
111      * Scan quoted string...
112      */
113
114      quoted = *qptr ++;
115      for (qend = qptr; *qend && *qend != quoted; qend ++);
116
117      if (!*qend)
118      {
119       /*
120        * No closing quote, error out!
121	*/
122
123	free(s);
124	free(re);
125
126	if (lword)
127          free(lword);
128
129	return (NULL);
130      }
131    }
132    else
133    {
134     /*
135      * Scan whitespace-delimited string...
136      */
137
138      quoted = 0;
139      for (qend = qptr + 1; *qend && !isspace(*qend); qend ++);
140    }
141
142    wlen = qend - qptr;
143
144   /*
145    * Look for logic words: AND, OR
146    */
147
148    if (wlen == 3 && !_cups_strncasecmp(qptr, "AND", 3))
149    {
150     /*
151      * Logical AND with the following text...
152      */
153
154      if (sptr > s)
155        prefix = ".*";
156
157      qptr = qend;
158    }
159    else if (wlen == 2 && !_cups_strncasecmp(qptr, "OR", 2))
160    {
161     /*
162      * Logical OR with the following text...
163      */
164
165      if (sptr > s)
166        prefix = ".*|.*";
167
168      qptr = qend;
169    }
170    else
171    {
172     /*
173      * Add a search word, making sure we have enough room for the
174      * string + RE overhead...
175      */
176
177      wlen = (sptr - s) + 2 * 4 * wlen + 2 * strlen(prefix) + 11;
178      if (lword)
179        wlen += strlen(lword);
180
181      if (wlen > slen)
182      {
183       /*
184        * Expand the RE string buffer...
185	*/
186
187        char *temp;			/* Temporary string pointer */
188
189
190	slen = wlen + 128;
191        temp = (char *)realloc(s, slen);
192	if (!temp)
193	{
194	  free(s);
195	  free(re);
196
197	  if (lword)
198            free(lword);
199
200	  return (NULL);
201	}
202
203        sptr = temp + (sptr - s);
204	s    = temp;
205      }
206
207     /*
208      * Add the prefix string...
209      */
210
211      memcpy(sptr, prefix, strlen(prefix) + 1);
212      sptr += strlen(sptr);
213
214     /*
215      * Then quote the remaining word characters as needed for the
216      * RE...
217      */
218
219      sword = sptr;
220
221      while (qptr < qend)
222      {
223       /*
224        * Quote: ^ . [ $ ( ) | * + ? { \
225	*/
226
227        if (strchr("^.[$()|*+?{\\", *qptr))
228	  *sptr++ = '\\';
229
230	*sptr++ = *qptr++;
231      }
232
233      *sptr = '\0';
234
235     /*
236      * For "word1 AND word2", add reciprocal "word2 AND word1"...
237      */
238
239      if (!strcmp(prefix, ".*") && lword)
240      {
241        char *lword2;			/* New "last word" */
242
243
244        if ((lword2 = strdup(sword)) == NULL)
245	{
246	  free(lword);
247	  free(s);
248	  free(re);
249	  return (NULL);
250	}
251
252        memcpy(sptr, ".*|.*", 6);
253	sptr += 5;
254
255	memcpy(sptr, lword2, strlen(lword2) + 1);
256	sptr += strlen(sptr);
257
258        memcpy(sptr, ".*", 3);
259	sptr += 2;
260
261	memcpy(sptr, lword, strlen(lword) + 1);
262	sptr += strlen(sptr);
263
264        free(lword);
265	lword = lword2;
266      }
267      else
268      {
269	if (lword)
270          free(lword);
271
272	lword = strdup(sword);
273      }
274
275      prefix = ".*|.*";
276    }
277
278   /*
279    * Advance to the next string...
280    */
281
282    if (quoted)
283      qptr ++;
284  }
285
286  if (lword)
287    free(lword);
288
289  if (sptr > s)
290    memcpy(sptr, ".*", 3);
291  else
292  {
293   /*
294    * No query data, return NULL...
295    */
296
297    free(s);
298    free(re);
299
300    return (NULL);
301  }
302
303 /*
304  * Compile the regular expression...
305  */
306
307  DEBUG_printf(("    s=\"%s\"\n", s));
308
309  if (regcomp(re, s, REG_EXTENDED | REG_ICASE))
310  {
311    free(re);
312    free(s);
313
314    return (NULL);
315  }
316
317 /*
318  * Free the RE string and return the new regular expression we compiled...
319  */
320
321  free(s);
322
323  return ((void *)re);
324}
325
326
327/*
328 * 'cgiDoSearch()' - Do a search of some text.
329 */
330
331int					/* O - Number of matches */
332cgiDoSearch(void       *search,		/* I - Search context */
333            const char *text)		/* I - Text to search */
334{
335  int		i;			/* Looping var */
336  regmatch_t	matches[100];		/* RE matches */
337
338
339 /*
340  * Range check...
341  */
342
343  if (!search || !text)
344    return (0);
345
346 /*
347  * Do a lookup...
348  */
349
350  if (!regexec((regex_t *)search, text, sizeof(matches) / sizeof(matches[0]),
351               matches, 0))
352  {
353   /*
354    * Figure out the number of matches in the string...
355    */
356
357    for (i = 0; i < (int)(sizeof(matches) / sizeof(matches[0])); i ++)
358      if (matches[i].rm_so < 0)
359	break;
360
361    return (i);
362  }
363  else
364    return (0);
365}
366
367
368/*
369 * 'cgiFreeSearch()' - Free a compiled search context.
370 */
371
372void
373cgiFreeSearch(void *search)		/* I - Search context */
374{
375  regfree((regex_t *)search);
376}
377
378
379/*
380 * End of "$Id: search.c 11093 2013-07-03 20:48:42Z msweet $".
381 */
382