1/*	$NetBSD: search.c,v 1.1.1.1 2016/01/14 00:11:29 christos Exp $	*/
2
3/* search.c -- searching large bodies of text.
4   Id: search.c,v 1.3 2004/04/11 17:56:46 karl Exp
5
6   Copyright (C) 1993, 1997, 1998, 2002, 2004 Free Software Foundation, Inc.
7
8   This program is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2, or (at your option)
11   any later version.
12
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program; if not, write to the Free Software
20   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21
22   Written by Brian Fox (bfox@ai.mit.edu). */
23
24#include "info.h"
25
26#include "search.h"
27#include "nodes.h"
28
29/* The search functions take two arguments:
30
31     1) a string to search for, and
32
33     2) a pointer to a SEARCH_BINDING which contains the buffer, start,
34        and end of the search.
35
36   They return a long, which is the offset from the start of the buffer
37   at which the match was found.  An offset of -1 indicates failure. */
38
39/* A function which makes a binding with buffer and bounds. */
40SEARCH_BINDING *
41make_binding (char *buffer, long int start, long int end)
42{
43  SEARCH_BINDING *binding;
44
45  binding = (SEARCH_BINDING *)xmalloc (sizeof (SEARCH_BINDING));
46  binding->buffer = buffer;
47  binding->start = start;
48  binding->end = end;
49  binding->flags = 0;
50
51  return (binding);
52}
53
54/* Make a copy of BINDING without duplicating the data. */
55SEARCH_BINDING *
56copy_binding (SEARCH_BINDING *binding)
57{
58  SEARCH_BINDING *copy;
59
60  copy = make_binding (binding->buffer, binding->start, binding->end);
61  copy->flags = binding->flags;
62  return (copy);
63}
64
65
66/* **************************************************************** */
67/*                                                                  */
68/*                 The Actual Searching Functions                   */
69/*                                                                  */
70/* **************************************************************** */
71
72/* Search forwards or backwards for the text delimited by BINDING.
73   The search is forwards if BINDING->start is greater than BINDING->end. */
74long
75search (char *string, SEARCH_BINDING *binding)
76{
77  long result;
78
79  /* If the search is backwards, then search backwards, otherwise forwards. */
80  if (binding->start > binding->end)
81    result = search_backward (string, binding);
82  else
83    result = search_forward (string, binding);
84
85  return (result);
86}
87
88/* Search forwards for STRING through the text delimited in BINDING. */
89long
90search_forward (char *string, SEARCH_BINDING *binding)
91{
92  register int c, i, len;
93  register char *buff, *end;
94  char *alternate = (char *)NULL;
95
96  len = strlen (string);
97
98  /* We match characters in the search buffer against STRING and ALTERNATE.
99     ALTERNATE is a case reversed version of STRING; this is cheaper than
100     case folding each character before comparison.   Alternate is only
101     used if the case folding bit is turned on in the passed BINDING. */
102
103  if (binding->flags & S_FoldCase)
104    {
105      alternate = xstrdup (string);
106
107      for (i = 0; i < len; i++)
108        {
109          if (islower (alternate[i]))
110            alternate[i] = toupper (alternate[i]);
111          else if (isupper (alternate[i]))
112            alternate[i] = tolower (alternate[i]);
113        }
114    }
115
116  buff = binding->buffer + binding->start;
117  end = binding->buffer + binding->end + 1;
118
119  while (buff < (end - len))
120    {
121      for (i = 0; i < len; i++)
122        {
123          c = buff[i];
124
125          if ((c != string[i]) && (!alternate || c != alternate[i]))
126            break;
127        }
128
129      if (!string[i])
130        {
131          if (alternate)
132            free (alternate);
133          if (binding->flags & S_SkipDest)
134            buff += len;
135          return ((long) (buff - binding->buffer));
136        }
137
138      buff++;
139    }
140
141  if (alternate)
142    free (alternate);
143
144  return ((long) -1);
145}
146
147/* Search for STRING backwards through the text delimited in BINDING. */
148long
149search_backward (char *input_string, SEARCH_BINDING *binding)
150{
151  register int c, i, len;
152  register char *buff, *end;
153  char *string;
154  char *alternate = (char *)NULL;
155
156  len = strlen (input_string);
157
158  /* Reverse the characters in the search string. */
159  string = (char *)xmalloc (1 + len);
160  for (c = 0, i = len - 1; input_string[c]; c++, i--)
161    string[i] = input_string[c];
162
163  string[c] = '\0';
164
165  /* We match characters in the search buffer against STRING and ALTERNATE.
166     ALTERNATE is a case reversed version of STRING; this is cheaper than
167     case folding each character before comparison.   ALTERNATE is only
168     used if the case folding bit is turned on in the passed BINDING. */
169
170  if (binding->flags & S_FoldCase)
171    {
172      alternate = xstrdup (string);
173
174      for (i = 0; i < len; i++)
175        {
176          if (islower (alternate[i]))
177            alternate[i] = toupper (alternate[i]);
178          else if (isupper (alternate[i]))
179            alternate[i] = tolower (alternate[i]);
180        }
181    }
182
183  buff = binding->buffer + binding->start - 1;
184  end = binding->buffer + binding->end;
185
186  while (buff > (end + len))
187    {
188      for (i = 0; i < len; i++)
189        {
190          c = *(buff - i);
191
192          if (c != string[i] && (!alternate || c != alternate[i]))
193            break;
194        }
195
196      if (!string[i])
197        {
198          free (string);
199          if (alternate)
200            free (alternate);
201
202          if (binding->flags & S_SkipDest)
203            buff -= len;
204          return ((long) (1 + (buff - binding->buffer)));
205        }
206
207      buff--;
208    }
209
210  free (string);
211  if (alternate)
212    free (alternate);
213
214  return ((long) -1);
215}
216
217/* Find STRING in LINE, returning the offset of the end of the string.
218   Return an offset of -1 if STRING does not appear in LINE.  The search
219   is bound by the end of the line (i.e., either NEWLINE or 0). */
220int
221string_in_line (char *string, char *line)
222{
223  register int end;
224  SEARCH_BINDING binding;
225
226  /* Find the end of the line. */
227  for (end = 0; line[end] && line[end] != '\n'; end++);
228
229  /* Search for STRING within these confines. */
230  binding.buffer = line;
231  binding.start = 0;
232  binding.end = end;
233  binding.flags = S_FoldCase | S_SkipDest;
234
235  return (search_forward (string, &binding));
236}
237
238/* Return non-zero if STRING is the first text to appear at BINDING. */
239int
240looking_at (char *string, SEARCH_BINDING *binding)
241{
242  long search_end;
243
244  search_end = search (string, binding);
245
246  /* If the string was not found, SEARCH_END is -1.  If the string was found,
247     but not right away, SEARCH_END is != binding->start.  Otherwise, the
248     string was found at binding->start. */
249  return (search_end == binding->start);
250}
251
252/* **************************************************************** */
253/*                                                                  */
254/*                    Small String Searches                         */
255/*                                                                  */
256/* **************************************************************** */
257
258/* Function names that start with "skip" are passed a string, and return
259   an offset from the start of that string.  Function names that start
260   with "find" are passed a SEARCH_BINDING, and return an absolute position
261   marker of the item being searched for.  "Find" functions return a value
262   of -1 if the item being looked for couldn't be found. */
263
264/* Return the index of the first non-whitespace character in STRING. */
265int
266skip_whitespace (char *string)
267{
268  register int i;
269
270  for (i = 0; string && whitespace (string[i]); i++);
271  return (i);
272}
273
274/* Return the index of the first non-whitespace or newline character in
275   STRING. */
276int
277skip_whitespace_and_newlines (char *string)
278{
279  register int i;
280
281  for (i = 0; string && whitespace_or_newline (string[i]); i++);
282  return (i);
283}
284
285/* Return the index of the first whitespace character in STRING. */
286int
287skip_non_whitespace (char *string)
288{
289  register int i;
290
291  for (i = 0; string && string[i] && !whitespace (string[i]); i++);
292  return (i);
293}
294
295/* Return the index of the first non-node character in STRING.  Note that
296   this function contains quite a bit of hair to ignore periods in some
297   special cases.  This is because we here at GNU ship some info files which
298   contain nodenames that contain periods.  No such nodename can start with
299   a period, or continue with whitespace, newline, or ')' immediately following
300   the period.  If second argument NEWLINES_OKAY is non-zero, newlines should
301   be skipped while parsing out the nodename specification. */
302int
303skip_node_characters (char *string, int newlines_okay)
304{
305  register int c, i = 0;
306  int paren_seen = 0;
307  int paren = 0;
308
309  /* Handle special case.  This is when another function has parsed out the
310     filename component of the node name, and we just want to parse out the
311     nodename proper.  In that case, a period at the start of the nodename
312     indicates an empty nodename. */
313  if (string && *string == '.')
314    return (0);
315
316  if (string && *string == '(')
317    {
318      paren++;
319      paren_seen++;
320      i++;
321    }
322
323  for (; string && (c = string[i]); i++)
324    {
325      if (paren)
326        {
327          if (c == '(')
328            paren++;
329          else if (c == ')')
330            paren--;
331
332          continue;
333        }
334
335      /* If the character following the close paren is a space or period,
336         then this node name has no more characters associated with it. */
337      if (c == '\t' ||
338          c == ','  ||
339          c == INFO_TAGSEP ||
340          ((!newlines_okay) && (c == '\n')) ||
341          ((paren_seen && string[i - 1] == ')') &&
342           (c == ' ' || c == '.')) ||
343          (c == '.' &&
344           (
345#if 0
346/* This test causes a node name ending in a period, like `This.', not to
347   be found.  The trailing . is stripped.  This occurs in the jargon
348   file (`I see no X here.' is a node name).  */
349           (!string[i + 1]) ||
350#endif
351            (whitespace_or_newline (string[i + 1])) ||
352            (string[i + 1] == ')'))))
353        break;
354    }
355  return (i);
356}
357
358
359/* **************************************************************** */
360/*                                                                  */
361/*                   Searching FILE_BUFFER's                        */
362/*                                                                  */
363/* **************************************************************** */
364
365/* Return the absolute position of the first occurence of a node separator in
366   BINDING-buffer.  The search starts at BINDING->start.  Return -1 if no node
367   separator was found. */
368long
369find_node_separator (SEARCH_BINDING *binding)
370{
371  register long i;
372  char *body;
373
374  body = binding->buffer;
375
376  /* A node is started by [^L]^_[^L]\n.  That is to say, the C-l's are
377     optional, but the DELETE and NEWLINE are not.  This separator holds
378     true for all separated elements in an Info file, including the tags
379     table (if present) and the indirect tags table (if present). */
380  for (i = binding->start; i < binding->end - 1; i++)
381    if (((body[i] == INFO_FF && body[i + 1] == INFO_COOKIE) &&
382         (body[i + 2] == '\n' ||
383          (body[i + 2] == INFO_FF && body[i + 3] == '\n'))) ||
384        ((body[i] == INFO_COOKIE) &&
385         (body[i + 1] == '\n' ||
386          (body[i + 1] == INFO_FF && body[i + 2] == '\n'))))
387      return (i);
388  return (-1);
389}
390
391/* Return the length of the node separator characters that BODY is
392   currently pointing at. */
393int
394skip_node_separator (char *body)
395{
396  register int i;
397
398  i = 0;
399
400  if (body[i] == INFO_FF)
401    i++;
402
403  if (body[i++] != INFO_COOKIE)
404    return (0);
405
406  if (body[i] == INFO_FF)
407    i++;
408
409  if (body[i++] != '\n')
410    return (0);
411
412  return (i);
413}
414
415/* Return the number of characters from STRING to the start of
416   the next line. */
417int
418skip_line (char *string)
419{
420  register int i;
421
422  for (i = 0; string && string[i] && string[i] != '\n'; i++);
423
424  if (string[i] == '\n')
425    i++;
426
427  return (i);
428}
429
430/* Return the absolute position of the beginning of a tags table in this
431   binding starting the search at binding->start. */
432long
433find_tags_table (SEARCH_BINDING *binding)
434{
435  SEARCH_BINDING tmp_search;
436  long position;
437
438  tmp_search.buffer = binding->buffer;
439  tmp_search.start = binding->start;
440  tmp_search.end = binding->end;
441  tmp_search.flags = S_FoldCase;
442
443  while ((position = find_node_separator (&tmp_search)) != -1 )
444    {
445      tmp_search.start = position;
446      tmp_search.start += skip_node_separator (tmp_search.buffer
447          + tmp_search.start);
448
449      if (looking_at (TAGS_TABLE_BEG_LABEL, &tmp_search))
450        return (position);
451    }
452  return (-1);
453}
454
455/* Return the absolute position of the node named NODENAME in BINDING.
456   This is a brute force search, and we wish to avoid it when possible.
457   This function is called when a tag (indirect or otherwise) doesn't
458   really point to the right node.  It returns the absolute position of
459   the separator preceding the node. */
460long
461find_node_in_binding (char *nodename, SEARCH_BINDING *binding)
462{
463  long position;
464  int offset, namelen;
465  SEARCH_BINDING tmp_search;
466
467  namelen = strlen (nodename);
468
469  tmp_search.buffer = binding->buffer;
470  tmp_search.start = binding->start;
471  tmp_search.end = binding->end;
472  tmp_search.flags = 0;
473
474  while ((position = find_node_separator (&tmp_search)) != -1)
475    {
476      tmp_search.start = position;
477      tmp_search.start += skip_node_separator
478        (tmp_search.buffer + tmp_search.start);
479
480      offset = string_in_line
481        (INFO_NODE_LABEL, tmp_search.buffer + tmp_search.start);
482
483      if (offset == -1)
484        continue;
485
486      tmp_search.start += offset;
487      tmp_search.start += skip_whitespace (tmp_search.buffer + tmp_search.start);
488      offset = skip_node_characters
489        (tmp_search.buffer + tmp_search.start, DONT_SKIP_NEWLINES);
490
491      /* Notice that this is an exact match.  You cannot grovel through
492         the buffer with this function looking for random nodes. */
493       if ((offset == namelen) &&
494           (tmp_search.buffer[tmp_search.start] == nodename[0]) &&
495           (strncmp (tmp_search.buffer + tmp_search.start, nodename, offset) == 0))
496         return (position);
497    }
498  return (-1);
499}
500