1/* String search routines for GNU Emacs.
2   Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
3                 2003, 2004, 2005, 2006, 2007  Free Software Foundation, Inc.
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING.  If not, write to
19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA.  */
21
22
23#include <config.h>
24#include "lisp.h"
25#include "syntax.h"
26#include "category.h"
27#include "buffer.h"
28#include "charset.h"
29#include "region-cache.h"
30#include "commands.h"
31#include "blockinput.h"
32#include "intervals.h"
33
34#include <sys/types.h>
35#include "regex.h"
36
37#define REGEXP_CACHE_SIZE 20
38
39/* If the regexp is non-nil, then the buffer contains the compiled form
40   of that regexp, suitable for searching.  */
41struct regexp_cache
42{
43  struct regexp_cache *next;
44  Lisp_Object regexp, whitespace_regexp;
45  /* Syntax table for which the regexp applies.  We need this because
46     of character classes.  If this is t, then the compiled pattern is valid
47     for any syntax-table.  */
48  Lisp_Object syntax_table;
49  struct re_pattern_buffer buf;
50  char fastmap[0400];
51  /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
52  char posix;
53};
54
55/* The instances of that struct.  */
56struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
57
58/* The head of the linked list; points to the most recently used buffer.  */
59struct regexp_cache *searchbuf_head;
60
61
62/* Every call to re_match, etc., must pass &search_regs as the regs
63   argument unless you can show it is unnecessary (i.e., if re_match
64   is certainly going to be called again before region-around-match
65   can be called).
66
67   Since the registers are now dynamically allocated, we need to make
68   sure not to refer to the Nth register before checking that it has
69   been allocated by checking search_regs.num_regs.
70
71   The regex code keeps track of whether it has allocated the search
72   buffer using bits in the re_pattern_buffer.  This means that whenever
73   you compile a new pattern, it completely forgets whether it has
74   allocated any registers, and will allocate new registers the next
75   time you call a searching or matching function.  Therefore, we need
76   to call re_set_registers after compiling a new pattern or after
77   setting the match registers, so that the regex functions will be
78   able to free or re-allocate it properly.  */
79static struct re_registers search_regs;
80
81/* The buffer in which the last search was performed, or
82   Qt if the last search was done in a string;
83   Qnil if no searching has been done yet.  */
84static Lisp_Object last_thing_searched;
85
86/* error condition signaled when regexp compile_pattern fails */
87
88Lisp_Object Qinvalid_regexp;
89
90/* Error condition used for failing searches */
91Lisp_Object Qsearch_failed;
92
93Lisp_Object Vsearch_spaces_regexp;
94
95static void set_search_regs ();
96static void save_search_regs ();
97static int simple_search ();
98static int boyer_moore ();
99static int search_buffer ();
100static void matcher_overflow () NO_RETURN;
101
102static void
103matcher_overflow ()
104{
105  error ("Stack overflow in regexp matcher");
106}
107
108/* Compile a regexp and signal a Lisp error if anything goes wrong.
109   PATTERN is the pattern to compile.
110   CP is the place to put the result.
111   TRANSLATE is a translation table for ignoring case, or nil for none.
112   REGP is the structure that says where to store the "register"
113   values that will result from matching this pattern.
114   If it is 0, we should compile the pattern not to record any
115   subexpression bounds.
116   POSIX is nonzero if we want full backtracking (POSIX style)
117   for this pattern.  0 means backtrack only enough to get a valid match.
118   MULTIBYTE is nonzero if we want to handle multibyte characters in
119   PATTERN.  0 means all multibyte characters are recognized just as
120   sequences of binary data.
121
122   The behavior also depends on Vsearch_spaces_regexp.  */
123
124static void
125compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
126     struct regexp_cache *cp;
127     Lisp_Object pattern;
128     Lisp_Object translate;
129     struct re_registers *regp;
130     int posix;
131     int multibyte;
132{
133  unsigned char *raw_pattern;
134  int raw_pattern_size;
135  char *val;
136  reg_syntax_t old;
137
138  /* MULTIBYTE says whether the text to be searched is multibyte.
139     We must convert PATTERN to match that, or we will not really
140     find things right.  */
141
142  if (multibyte == STRING_MULTIBYTE (pattern))
143    {
144      raw_pattern = (unsigned char *) SDATA (pattern);
145      raw_pattern_size = SBYTES (pattern);
146    }
147  else if (multibyte)
148    {
149      raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
150						  SCHARS (pattern));
151      raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
152      copy_text (SDATA (pattern), raw_pattern,
153		 SCHARS (pattern), 0, 1);
154    }
155  else
156    {
157      /* Converting multibyte to single-byte.
158
159	 ??? Perhaps this conversion should be done in a special way
160	 by subtracting nonascii-insert-offset from each non-ASCII char,
161	 so that only the multibyte chars which really correspond to
162	 the chosen single-byte character set can possibly match.  */
163      raw_pattern_size = SCHARS (pattern);
164      raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
165      copy_text (SDATA (pattern), raw_pattern,
166		 SBYTES (pattern), 1, 0);
167    }
168
169  cp->regexp = Qnil;
170  cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
171  cp->posix = posix;
172  cp->buf.multibyte = multibyte;
173  cp->whitespace_regexp = Vsearch_spaces_regexp;
174  /* rms: I think BLOCK_INPUT is not needed here any more,
175     because regex.c defines malloc to call xmalloc.
176     Using BLOCK_INPUT here means the debugger won't run if an error occurs.
177     So let's turn it off.  */
178  /*  BLOCK_INPUT;  */
179  old = re_set_syntax (RE_SYNTAX_EMACS
180		       | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
181
182  re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
183			    : SDATA (Vsearch_spaces_regexp));
184
185  val = (char *) re_compile_pattern ((char *)raw_pattern,
186				     raw_pattern_size, &cp->buf);
187
188  /* If the compiled pattern hard codes some of the contents of the
189     syntax-table, it can only be reused with *this* syntax table.  */
190  cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
191
192  re_set_whitespace_regexp (NULL);
193
194  re_set_syntax (old);
195  /* UNBLOCK_INPUT;  */
196  if (val)
197    xsignal1 (Qinvalid_regexp, build_string (val));
198
199  cp->regexp = Fcopy_sequence (pattern);
200}
201
202/* Shrink each compiled regexp buffer in the cache
203   to the size actually used right now.
204   This is called from garbage collection.  */
205
206void
207shrink_regexp_cache ()
208{
209  struct regexp_cache *cp;
210
211  for (cp = searchbuf_head; cp != 0; cp = cp->next)
212    {
213      cp->buf.allocated = cp->buf.used;
214      cp->buf.buffer
215	= (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
216    }
217}
218
219/* Clear the regexp cache w.r.t. a particular syntax table,
220   because it was changed.
221   There is no danger of memory leak here because re_compile_pattern
222   automagically manages the memory in each re_pattern_buffer struct,
223   based on its `allocated' and `buffer' values.  */
224void
225clear_regexp_cache ()
226{
227  int i;
228
229  for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
230    /* It's tempting to compare with the syntax-table we've actually changd,
231       but it's not sufficient because char-table inheritance mewans that
232       modifying one syntax-table can change others at the same time.  */
233    if (!EQ (searchbufs[i].syntax_table, Qt))
234      searchbufs[i].regexp = Qnil;
235}
236
237/* Compile a regexp if necessary, but first check to see if there's one in
238   the cache.
239   PATTERN is the pattern to compile.
240   TRANSLATE is a translation table for ignoring case, or nil for none.
241   REGP is the structure that says where to store the "register"
242   values that will result from matching this pattern.
243   If it is 0, we should compile the pattern not to record any
244   subexpression bounds.
245   POSIX is nonzero if we want full backtracking (POSIX style)
246   for this pattern.  0 means backtrack only enough to get a valid match.  */
247
248struct re_pattern_buffer *
249compile_pattern (pattern, regp, translate, posix, multibyte)
250     Lisp_Object pattern;
251     struct re_registers *regp;
252     Lisp_Object translate;
253     int posix, multibyte;
254{
255  struct regexp_cache *cp, **cpp;
256
257  for (cpp = &searchbuf_head; ; cpp = &cp->next)
258    {
259      cp = *cpp;
260      /* Entries are initialized to nil, and may be set to nil by
261	 compile_pattern_1 if the pattern isn't valid.  Don't apply
262	 string accessors in those cases.  However, compile_pattern_1
263	 is only applied to the cache entry we pick here to reuse.  So
264	 nil should never appear before a non-nil entry.  */
265      if (NILP (cp->regexp))
266	goto compile_it;
267      if (SCHARS (cp->regexp) == SCHARS (pattern)
268	  && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
269	  && !NILP (Fstring_equal (cp->regexp, pattern))
270	  && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
271	  && cp->posix == posix
272	  && cp->buf.multibyte == multibyte
273	  && (EQ (cp->syntax_table, Qt)
274	      || EQ (cp->syntax_table, current_buffer->syntax_table))
275	  && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
276	break;
277
278      /* If we're at the end of the cache, compile into the nil cell
279	 we found, or the last (least recently used) cell with a
280	 string value.  */
281      if (cp->next == 0)
282	{
283	compile_it:
284	  compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
285	  break;
286	}
287    }
288
289  /* When we get here, cp (aka *cpp) contains the compiled pattern,
290     either because we found it in the cache or because we just compiled it.
291     Move it to the front of the queue to mark it as most recently used.  */
292  *cpp = cp->next;
293  cp->next = searchbuf_head;
294  searchbuf_head = cp;
295
296  /* Advise the searching functions about the space we have allocated
297     for register data.  */
298  if (regp)
299    re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
300
301  return &cp->buf;
302}
303
304
305static Lisp_Object
306looking_at_1 (string, posix)
307     Lisp_Object string;
308     int posix;
309{
310  Lisp_Object val;
311  unsigned char *p1, *p2;
312  int s1, s2;
313  register int i;
314  struct re_pattern_buffer *bufp;
315
316  if (running_asynch_code)
317    save_search_regs ();
318
319  /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
320  XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
321    = current_buffer->case_eqv_table;
322
323  CHECK_STRING (string);
324  bufp = compile_pattern (string, &search_regs,
325			  (!NILP (current_buffer->case_fold_search)
326			   ? current_buffer->case_canon_table : Qnil),
327			  posix,
328			  !NILP (current_buffer->enable_multibyte_characters));
329
330  immediate_quit = 1;
331  QUIT;			/* Do a pending quit right away, to avoid paradoxical behavior */
332
333  /* Get pointers and sizes of the two strings
334     that make up the visible portion of the buffer. */
335
336  p1 = BEGV_ADDR;
337  s1 = GPT_BYTE - BEGV_BYTE;
338  p2 = GAP_END_ADDR;
339  s2 = ZV_BYTE - GPT_BYTE;
340  if (s1 < 0)
341    {
342      p2 = p1;
343      s2 = ZV_BYTE - BEGV_BYTE;
344      s1 = 0;
345    }
346  if (s2 < 0)
347    {
348      s1 = ZV_BYTE - BEGV_BYTE;
349      s2 = 0;
350    }
351
352  re_match_object = Qnil;
353
354  i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
355		  PT_BYTE - BEGV_BYTE, &search_regs,
356		  ZV_BYTE - BEGV_BYTE);
357  immediate_quit = 0;
358
359  if (i == -2)
360    matcher_overflow ();
361
362  val = (0 <= i ? Qt : Qnil);
363  if (i >= 0)
364    for (i = 0; i < search_regs.num_regs; i++)
365      if (search_regs.start[i] >= 0)
366	{
367	  search_regs.start[i]
368	    = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
369	  search_regs.end[i]
370	    = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
371	}
372  XSETBUFFER (last_thing_searched, current_buffer);
373  return val;
374}
375
376DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
377       doc: /* Return t if text after point matches regular expression REGEXP.
378This function modifies the match data that `match-beginning',
379`match-end' and `match-data' access; save and restore the match
380data if you want to preserve them.  */)
381     (regexp)
382     Lisp_Object regexp;
383{
384  return looking_at_1 (regexp, 0);
385}
386
387DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
388       doc: /* Return t if text after point matches regular expression REGEXP.
389Find the longest match, in accord with Posix regular expression rules.
390This function modifies the match data that `match-beginning',
391`match-end' and `match-data' access; save and restore the match
392data if you want to preserve them.  */)
393     (regexp)
394     Lisp_Object regexp;
395{
396  return looking_at_1 (regexp, 1);
397}
398
399static Lisp_Object
400string_match_1 (regexp, string, start, posix)
401     Lisp_Object regexp, string, start;
402     int posix;
403{
404  int val;
405  struct re_pattern_buffer *bufp;
406  int pos, pos_byte;
407  int i;
408
409  if (running_asynch_code)
410    save_search_regs ();
411
412  CHECK_STRING (regexp);
413  CHECK_STRING (string);
414
415  if (NILP (start))
416    pos = 0, pos_byte = 0;
417  else
418    {
419      int len = SCHARS (string);
420
421      CHECK_NUMBER (start);
422      pos = XINT (start);
423      if (pos < 0 && -pos <= len)
424	pos = len + pos;
425      else if (0 > pos || pos > len)
426	args_out_of_range (string, start);
427      pos_byte = string_char_to_byte (string, pos);
428    }
429
430  /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
431  XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
432    = current_buffer->case_eqv_table;
433
434  bufp = compile_pattern (regexp, &search_regs,
435			  (!NILP (current_buffer->case_fold_search)
436			   ? current_buffer->case_canon_table : Qnil),
437			  posix,
438			  STRING_MULTIBYTE (string));
439  immediate_quit = 1;
440  re_match_object = string;
441
442  val = re_search (bufp, (char *) SDATA (string),
443		   SBYTES (string), pos_byte,
444		   SBYTES (string) - pos_byte,
445		   &search_regs);
446  immediate_quit = 0;
447  last_thing_searched = Qt;
448  if (val == -2)
449    matcher_overflow ();
450  if (val < 0) return Qnil;
451
452  for (i = 0; i < search_regs.num_regs; i++)
453    if (search_regs.start[i] >= 0)
454      {
455	search_regs.start[i]
456	  = string_byte_to_char (string, search_regs.start[i]);
457	search_regs.end[i]
458	  = string_byte_to_char (string, search_regs.end[i]);
459      }
460
461  return make_number (string_byte_to_char (string, val));
462}
463
464DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
465       doc: /* Return index of start of first match for REGEXP in STRING, or nil.
466Matching ignores case if `case-fold-search' is non-nil.
467If third arg START is non-nil, start search at that index in STRING.
468For index of first char beyond the match, do (match-end 0).
469`match-end' and `match-beginning' also give indices of substrings
470matched by parenthesis constructs in the pattern.
471
472You can use the function `match-string' to extract the substrings
473matched by the parenthesis constructions in REGEXP. */)
474     (regexp, string, start)
475     Lisp_Object regexp, string, start;
476{
477  return string_match_1 (regexp, string, start, 0);
478}
479
480DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
481       doc: /* Return index of start of first match for REGEXP in STRING, or nil.
482Find the longest match, in accord with Posix regular expression rules.
483Case is ignored if `case-fold-search' is non-nil in the current buffer.
484If third arg START is non-nil, start search at that index in STRING.
485For index of first char beyond the match, do (match-end 0).
486`match-end' and `match-beginning' also give indices of substrings
487matched by parenthesis constructs in the pattern.  */)
488     (regexp, string, start)
489     Lisp_Object regexp, string, start;
490{
491  return string_match_1 (regexp, string, start, 1);
492}
493
494/* Match REGEXP against STRING, searching all of STRING,
495   and return the index of the match, or negative on failure.
496   This does not clobber the match data.  */
497
498int
499fast_string_match (regexp, string)
500     Lisp_Object regexp, string;
501{
502  int val;
503  struct re_pattern_buffer *bufp;
504
505  bufp = compile_pattern (regexp, 0, Qnil,
506			  0, STRING_MULTIBYTE (string));
507  immediate_quit = 1;
508  re_match_object = string;
509
510  val = re_search (bufp, (char *) SDATA (string),
511		   SBYTES (string), 0,
512		   SBYTES (string), 0);
513  immediate_quit = 0;
514  return val;
515}
516
517/* Match REGEXP against STRING, searching all of STRING ignoring case,
518   and return the index of the match, or negative on failure.
519   This does not clobber the match data.
520   We assume that STRING contains single-byte characters.  */
521
522extern Lisp_Object Vascii_downcase_table;
523
524int
525fast_c_string_match_ignore_case (regexp, string)
526     Lisp_Object regexp;
527     const char *string;
528{
529  int val;
530  struct re_pattern_buffer *bufp;
531  int len = strlen (string);
532
533  regexp = string_make_unibyte (regexp);
534  re_match_object = Qt;
535  bufp = compile_pattern (regexp, 0,
536			  Vascii_canon_table, 0,
537			  0);
538  immediate_quit = 1;
539  val = re_search (bufp, string, len, 0, len, 0);
540  immediate_quit = 0;
541  return val;
542}
543
544/* Like fast_string_match but ignore case.  */
545
546int
547fast_string_match_ignore_case (regexp, string)
548     Lisp_Object regexp, string;
549{
550  int val;
551  struct re_pattern_buffer *bufp;
552
553  bufp = compile_pattern (regexp, 0, Vascii_canon_table,
554			  0, STRING_MULTIBYTE (string));
555  immediate_quit = 1;
556  re_match_object = string;
557
558  val = re_search (bufp, (char *) SDATA (string),
559		   SBYTES (string), 0,
560		   SBYTES (string), 0);
561  immediate_quit = 0;
562  return val;
563}
564
565/* The newline cache: remembering which sections of text have no newlines.  */
566
567/* If the user has requested newline caching, make sure it's on.
568   Otherwise, make sure it's off.
569   This is our cheezy way of associating an action with the change of
570   state of a buffer-local variable.  */
571static void
572newline_cache_on_off (buf)
573     struct buffer *buf;
574{
575  if (NILP (buf->cache_long_line_scans))
576    {
577      /* It should be off.  */
578      if (buf->newline_cache)
579        {
580          free_region_cache (buf->newline_cache);
581          buf->newline_cache = 0;
582        }
583    }
584  else
585    {
586      /* It should be on.  */
587      if (buf->newline_cache == 0)
588        buf->newline_cache = new_region_cache ();
589    }
590}
591
592
593/* Search for COUNT instances of the character TARGET between START and END.
594
595   If COUNT is positive, search forwards; END must be >= START.
596   If COUNT is negative, search backwards for the -COUNTth instance;
597      END must be <= START.
598   If COUNT is zero, do anything you please; run rogue, for all I care.
599
600   If END is zero, use BEGV or ZV instead, as appropriate for the
601   direction indicated by COUNT.
602
603   If we find COUNT instances, set *SHORTAGE to zero, and return the
604   position past the COUNTth match.  Note that for reverse motion
605   this is not the same as the usual convention for Emacs motion commands.
606
607   If we don't find COUNT instances before reaching END, set *SHORTAGE
608   to the number of TARGETs left unfound, and return END.
609
610   If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
611   except when inside redisplay.  */
612
613int
614scan_buffer (target, start, end, count, shortage, allow_quit)
615     register int target;
616     int start, end;
617     int count;
618     int *shortage;
619     int allow_quit;
620{
621  struct region_cache *newline_cache;
622  int direction;
623
624  if (count > 0)
625    {
626      direction = 1;
627      if (! end) end = ZV;
628    }
629  else
630    {
631      direction = -1;
632      if (! end) end = BEGV;
633    }
634
635  newline_cache_on_off (current_buffer);
636  newline_cache = current_buffer->newline_cache;
637
638  if (shortage != 0)
639    *shortage = 0;
640
641  immediate_quit = allow_quit;
642
643  if (count > 0)
644    while (start != end)
645      {
646        /* Our innermost scanning loop is very simple; it doesn't know
647           about gaps, buffer ends, or the newline cache.  ceiling is
648           the position of the last character before the next such
649           obstacle --- the last character the dumb search loop should
650           examine.  */
651	int ceiling_byte = CHAR_TO_BYTE (end) - 1;
652	int start_byte = CHAR_TO_BYTE (start);
653	int tem;
654
655        /* If we're looking for a newline, consult the newline cache
656           to see where we can avoid some scanning.  */
657        if (target == '\n' && newline_cache)
658          {
659            int next_change;
660            immediate_quit = 0;
661            while (region_cache_forward
662                   (current_buffer, newline_cache, start_byte, &next_change))
663              start_byte = next_change;
664            immediate_quit = allow_quit;
665
666            /* START should never be after END.  */
667            if (start_byte > ceiling_byte)
668              start_byte = ceiling_byte;
669
670            /* Now the text after start is an unknown region, and
671               next_change is the position of the next known region. */
672            ceiling_byte = min (next_change - 1, ceiling_byte);
673          }
674
675        /* The dumb loop can only scan text stored in contiguous
676           bytes. BUFFER_CEILING_OF returns the last character
677           position that is contiguous, so the ceiling is the
678           position after that.  */
679	tem = BUFFER_CEILING_OF (start_byte);
680	ceiling_byte = min (tem, ceiling_byte);
681
682        {
683          /* The termination address of the dumb loop.  */
684          register unsigned char *ceiling_addr
685	    = BYTE_POS_ADDR (ceiling_byte) + 1;
686          register unsigned char *cursor
687	    = BYTE_POS_ADDR (start_byte);
688          unsigned char *base = cursor;
689
690          while (cursor < ceiling_addr)
691            {
692              unsigned char *scan_start = cursor;
693
694              /* The dumb loop.  */
695              while (*cursor != target && ++cursor < ceiling_addr)
696                ;
697
698              /* If we're looking for newlines, cache the fact that
699                 the region from start to cursor is free of them. */
700              if (target == '\n' && newline_cache)
701                know_region_cache (current_buffer, newline_cache,
702                                   start_byte + scan_start - base,
703                                   start_byte + cursor - base);
704
705              /* Did we find the target character?  */
706              if (cursor < ceiling_addr)
707                {
708                  if (--count == 0)
709                    {
710                      immediate_quit = 0;
711                      return BYTE_TO_CHAR (start_byte + cursor - base + 1);
712                    }
713                  cursor++;
714                }
715            }
716
717          start = BYTE_TO_CHAR (start_byte + cursor - base);
718        }
719      }
720  else
721    while (start > end)
722      {
723        /* The last character to check before the next obstacle.  */
724	int ceiling_byte = CHAR_TO_BYTE (end);
725	int start_byte = CHAR_TO_BYTE (start);
726	int tem;
727
728        /* Consult the newline cache, if appropriate.  */
729        if (target == '\n' && newline_cache)
730          {
731            int next_change;
732            immediate_quit = 0;
733            while (region_cache_backward
734                   (current_buffer, newline_cache, start_byte, &next_change))
735              start_byte = next_change;
736            immediate_quit = allow_quit;
737
738            /* Start should never be at or before end.  */
739            if (start_byte <= ceiling_byte)
740              start_byte = ceiling_byte + 1;
741
742            /* Now the text before start is an unknown region, and
743               next_change is the position of the next known region. */
744            ceiling_byte = max (next_change, ceiling_byte);
745          }
746
747        /* Stop scanning before the gap.  */
748	tem = BUFFER_FLOOR_OF (start_byte - 1);
749	ceiling_byte = max (tem, ceiling_byte);
750
751        {
752          /* The termination address of the dumb loop.  */
753          register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
754          register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
755          unsigned char *base = cursor;
756
757          while (cursor >= ceiling_addr)
758            {
759              unsigned char *scan_start = cursor;
760
761              while (*cursor != target && --cursor >= ceiling_addr)
762                ;
763
764              /* If we're looking for newlines, cache the fact that
765                 the region from after the cursor to start is free of them.  */
766              if (target == '\n' && newline_cache)
767                know_region_cache (current_buffer, newline_cache,
768                                   start_byte + cursor - base,
769                                   start_byte + scan_start - base);
770
771              /* Did we find the target character?  */
772              if (cursor >= ceiling_addr)
773                {
774                  if (++count >= 0)
775                    {
776                      immediate_quit = 0;
777                      return BYTE_TO_CHAR (start_byte + cursor - base);
778                    }
779                  cursor--;
780                }
781            }
782
783	  start = BYTE_TO_CHAR (start_byte + cursor - base);
784        }
785      }
786
787  immediate_quit = 0;
788  if (shortage != 0)
789    *shortage = count * direction;
790  return start;
791}
792
793/* Search for COUNT instances of a line boundary, which means either a
794   newline or (if selective display enabled) a carriage return.
795   Start at START.  If COUNT is negative, search backwards.
796
797   We report the resulting position by calling TEMP_SET_PT_BOTH.
798
799   If we find COUNT instances. we position after (always after,
800   even if scanning backwards) the COUNTth match, and return 0.
801
802   If we don't find COUNT instances before reaching the end of the
803   buffer (or the beginning, if scanning backwards), we return
804   the number of line boundaries left unfound, and position at
805   the limit we bumped up against.
806
807   If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
808   except in special cases.  */
809
810int
811scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
812     int start, start_byte;
813     int limit, limit_byte;
814     register int count;
815     int allow_quit;
816{
817  int direction = ((count > 0) ? 1 : -1);
818
819  register unsigned char *cursor;
820  unsigned char *base;
821
822  register int ceiling;
823  register unsigned char *ceiling_addr;
824
825  int old_immediate_quit = immediate_quit;
826
827  /* The code that follows is like scan_buffer
828     but checks for either newline or carriage return.  */
829
830  if (allow_quit)
831    immediate_quit++;
832
833  start_byte = CHAR_TO_BYTE (start);
834
835  if (count > 0)
836    {
837      while (start_byte < limit_byte)
838	{
839	  ceiling =  BUFFER_CEILING_OF (start_byte);
840	  ceiling = min (limit_byte - 1, ceiling);
841	  ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
842	  base = (cursor = BYTE_POS_ADDR (start_byte));
843	  while (1)
844	    {
845	      while (*cursor != '\n' && ++cursor != ceiling_addr)
846		;
847
848	      if (cursor != ceiling_addr)
849		{
850		  if (--count == 0)
851		    {
852		      immediate_quit = old_immediate_quit;
853		      start_byte = start_byte + cursor - base + 1;
854		      start = BYTE_TO_CHAR (start_byte);
855		      TEMP_SET_PT_BOTH (start, start_byte);
856		      return 0;
857		    }
858		  else
859		    if (++cursor == ceiling_addr)
860		      break;
861		}
862	      else
863		break;
864	    }
865	  start_byte += cursor - base;
866	}
867    }
868  else
869    {
870      while (start_byte > limit_byte)
871	{
872	  ceiling = BUFFER_FLOOR_OF (start_byte - 1);
873	  ceiling = max (limit_byte, ceiling);
874	  ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
875	  base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
876	  while (1)
877	    {
878	      while (--cursor != ceiling_addr && *cursor != '\n')
879		;
880
881	      if (cursor != ceiling_addr)
882		{
883		  if (++count == 0)
884		    {
885		      immediate_quit = old_immediate_quit;
886		      /* Return the position AFTER the match we found.  */
887		      start_byte = start_byte + cursor - base + 1;
888		      start = BYTE_TO_CHAR (start_byte);
889		      TEMP_SET_PT_BOTH (start, start_byte);
890		      return 0;
891		    }
892		}
893	      else
894		break;
895	    }
896	  /* Here we add 1 to compensate for the last decrement
897	     of CURSOR, which took it past the valid range.  */
898	  start_byte += cursor - base + 1;
899	}
900    }
901
902  TEMP_SET_PT_BOTH (limit, limit_byte);
903  immediate_quit = old_immediate_quit;
904
905  return count * direction;
906}
907
908int
909find_next_newline_no_quit (from, cnt)
910     register int from, cnt;
911{
912  return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
913}
914
915/* Like find_next_newline, but returns position before the newline,
916   not after, and only search up to TO.  This isn't just
917   find_next_newline (...)-1, because you might hit TO.  */
918
919int
920find_before_next_newline (from, to, cnt)
921     int from, to, cnt;
922{
923  int shortage;
924  int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
925
926  if (shortage == 0)
927    pos--;
928
929  return pos;
930}
931
932/* Subroutines of Lisp buffer search functions. */
933
934static Lisp_Object
935search_command (string, bound, noerror, count, direction, RE, posix)
936     Lisp_Object string, bound, noerror, count;
937     int direction;
938     int RE;
939     int posix;
940{
941  register int np;
942  EMACS_INT lim, lim_byte;
943  EMACS_INT n = direction;
944
945  if (!NILP (count))
946    {
947      CHECK_NUMBER (count);
948      n *= XINT (count);
949    }
950
951  CHECK_STRING (string);
952  if (NILP (bound))
953    {
954      if (n > 0)
955	lim = ZV, lim_byte = ZV_BYTE;
956      else
957	lim = BEGV, lim_byte = BEGV_BYTE;
958    }
959  else
960    {
961      CHECK_NUMBER_COERCE_MARKER (bound);
962      lim = XINT (bound);
963      if (n > 0 ? lim < PT : lim > PT)
964	error ("Invalid search bound (wrong side of point)");
965      if (lim > ZV)
966	lim = ZV, lim_byte = ZV_BYTE;
967      else if (lim < BEGV)
968	lim = BEGV, lim_byte = BEGV_BYTE;
969      else
970	lim_byte = CHAR_TO_BYTE (lim);
971    }
972
973  /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
974  XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
975    = current_buffer->case_eqv_table;
976
977  np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
978		      (!NILP (current_buffer->case_fold_search)
979		       ? current_buffer->case_canon_table
980		       : Qnil),
981		      (!NILP (current_buffer->case_fold_search)
982		       ? current_buffer->case_eqv_table
983		       : Qnil),
984		      posix);
985  if (np <= 0)
986    {
987      if (NILP (noerror))
988	xsignal1 (Qsearch_failed, string);
989
990      if (!EQ (noerror, Qt))
991	{
992	  if (lim < BEGV || lim > ZV)
993	    abort ();
994	  SET_PT_BOTH (lim, lim_byte);
995	  return Qnil;
996#if 0 /* This would be clean, but maybe programs depend on
997	 a value of nil here.  */
998	  np = lim;
999#endif
1000	}
1001      else
1002	return Qnil;
1003    }
1004
1005  if (np < BEGV || np > ZV)
1006    abort ();
1007
1008  SET_PT (np);
1009
1010  return make_number (np);
1011}
1012
1013/* Return 1 if REGEXP it matches just one constant string.  */
1014
1015static int
1016trivial_regexp_p (regexp)
1017     Lisp_Object regexp;
1018{
1019  int len = SBYTES (regexp);
1020  unsigned char *s = SDATA (regexp);
1021  while (--len >= 0)
1022    {
1023      switch (*s++)
1024	{
1025	case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1026	  return 0;
1027	case '\\':
1028	  if (--len < 0)
1029	    return 0;
1030	  switch (*s++)
1031	    {
1032	    case '|': case '(': case ')': case '`': case '\'': case 'b':
1033	    case 'B': case '<': case '>': case 'w': case 'W': case 's':
1034	    case 'S': case '=': case '{': case '}': case '_':
1035	    case 'c': case 'C':	/* for categoryspec and notcategoryspec */
1036	    case '1': case '2': case '3': case '4': case '5':
1037	    case '6': case '7': case '8': case '9':
1038	      return 0;
1039	    }
1040	}
1041    }
1042  return 1;
1043}
1044
1045/* Search for the n'th occurrence of STRING in the current buffer,
1046   starting at position POS and stopping at position LIM,
1047   treating STRING as a literal string if RE is false or as
1048   a regular expression if RE is true.
1049
1050   If N is positive, searching is forward and LIM must be greater than POS.
1051   If N is negative, searching is backward and LIM must be less than POS.
1052
1053   Returns -x if x occurrences remain to be found (x > 0),
1054   or else the position at the beginning of the Nth occurrence
1055   (if searching backward) or the end (if searching forward).
1056
1057   POSIX is nonzero if we want full backtracking (POSIX style)
1058   for this pattern.  0 means backtrack only enough to get a valid match.  */
1059
1060#define TRANSLATE(out, trt, d)			\
1061do						\
1062  {						\
1063    if (! NILP (trt))				\
1064      {						\
1065	Lisp_Object temp;			\
1066	temp = Faref (trt, make_number (d));	\
1067	if (INTEGERP (temp))			\
1068	  out = XINT (temp);			\
1069	else					\
1070	  out = d;				\
1071      }						\
1072    else					\
1073      out = d;					\
1074  }						\
1075while (0)
1076
1077static int
1078search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1079	       RE, trt, inverse_trt, posix)
1080     Lisp_Object string;
1081     EMACS_INT pos;
1082     EMACS_INT pos_byte;
1083     EMACS_INT lim;
1084     EMACS_INT lim_byte;
1085     EMACS_INT n;
1086     int RE;
1087     Lisp_Object trt;
1088     Lisp_Object inverse_trt;
1089     int posix;
1090{
1091  EMACS_INT len = SCHARS (string);
1092  EMACS_INT len_byte = SBYTES (string);
1093  register int i;
1094
1095  if (running_asynch_code)
1096    save_search_regs ();
1097
1098  /* Searching 0 times means don't move.  */
1099  /* Null string is found at starting position.  */
1100  if (len == 0 || n == 0)
1101    {
1102      set_search_regs (pos_byte, 0);
1103      return pos;
1104    }
1105
1106  if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1107    {
1108      unsigned char *p1, *p2;
1109      int s1, s2;
1110      struct re_pattern_buffer *bufp;
1111
1112      bufp = compile_pattern (string, &search_regs, trt, posix,
1113			      !NILP (current_buffer->enable_multibyte_characters));
1114
1115      immediate_quit = 1;	/* Quit immediately if user types ^G,
1116				   because letting this function finish
1117				   can take too long. */
1118      QUIT;			/* Do a pending quit right away,
1119				   to avoid paradoxical behavior */
1120      /* Get pointers and sizes of the two strings
1121	 that make up the visible portion of the buffer. */
1122
1123      p1 = BEGV_ADDR;
1124      s1 = GPT_BYTE - BEGV_BYTE;
1125      p2 = GAP_END_ADDR;
1126      s2 = ZV_BYTE - GPT_BYTE;
1127      if (s1 < 0)
1128	{
1129	  p2 = p1;
1130	  s2 = ZV_BYTE - BEGV_BYTE;
1131	  s1 = 0;
1132	}
1133      if (s2 < 0)
1134	{
1135	  s1 = ZV_BYTE - BEGV_BYTE;
1136	  s2 = 0;
1137	}
1138      re_match_object = Qnil;
1139
1140      while (n < 0)
1141	{
1142	  int val;
1143	  val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1144			     pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1145			     &search_regs,
1146			     /* Don't allow match past current point */
1147			     pos_byte - BEGV_BYTE);
1148	  if (val == -2)
1149	    {
1150	      matcher_overflow ();
1151	    }
1152	  if (val >= 0)
1153	    {
1154	      pos_byte = search_regs.start[0] + BEGV_BYTE;
1155	      for (i = 0; i < search_regs.num_regs; i++)
1156		if (search_regs.start[i] >= 0)
1157		  {
1158		    search_regs.start[i]
1159		      = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1160		    search_regs.end[i]
1161		      = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1162		  }
1163	      XSETBUFFER (last_thing_searched, current_buffer);
1164	      /* Set pos to the new position. */
1165	      pos = search_regs.start[0];
1166	    }
1167	  else
1168	    {
1169	      immediate_quit = 0;
1170	      return (n);
1171	    }
1172	  n++;
1173	}
1174      while (n > 0)
1175	{
1176	  int val;
1177	  val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1178			     pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1179			     &search_regs,
1180			     lim_byte - BEGV_BYTE);
1181	  if (val == -2)
1182	    {
1183	      matcher_overflow ();
1184	    }
1185	  if (val >= 0)
1186	    {
1187	      pos_byte = search_regs.end[0] + BEGV_BYTE;
1188	      for (i = 0; i < search_regs.num_regs; i++)
1189		if (search_regs.start[i] >= 0)
1190		  {
1191		    search_regs.start[i]
1192		      = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1193		    search_regs.end[i]
1194		      = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1195		  }
1196	      XSETBUFFER (last_thing_searched, current_buffer);
1197	      pos = search_regs.end[0];
1198	    }
1199	  else
1200	    {
1201	      immediate_quit = 0;
1202	      return (0 - n);
1203	    }
1204	  n--;
1205	}
1206      immediate_quit = 0;
1207      return (pos);
1208    }
1209  else				/* non-RE case */
1210    {
1211      unsigned char *raw_pattern, *pat;
1212      int raw_pattern_size;
1213      int raw_pattern_size_byte;
1214      unsigned char *patbuf;
1215      int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1216      unsigned char *base_pat;
1217      /* Set to positive if we find a non-ASCII char that need
1218	 translation.  Otherwise set to zero later.  */
1219      int charset_base = -1;
1220      int boyer_moore_ok = 1;
1221
1222      /* MULTIBYTE says whether the text to be searched is multibyte.
1223	 We must convert PATTERN to match that, or we will not really
1224	 find things right.  */
1225
1226      if (multibyte == STRING_MULTIBYTE (string))
1227	{
1228	  raw_pattern = (unsigned char *) SDATA (string);
1229	  raw_pattern_size = SCHARS (string);
1230	  raw_pattern_size_byte = SBYTES (string);
1231	}
1232      else if (multibyte)
1233	{
1234	  raw_pattern_size = SCHARS (string);
1235	  raw_pattern_size_byte
1236	    = count_size_as_multibyte (SDATA (string),
1237				       raw_pattern_size);
1238	  raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1239	  copy_text (SDATA (string), raw_pattern,
1240		     SCHARS (string), 0, 1);
1241	}
1242      else
1243	{
1244	  /* Converting multibyte to single-byte.
1245
1246	     ??? Perhaps this conversion should be done in a special way
1247	     by subtracting nonascii-insert-offset from each non-ASCII char,
1248	     so that only the multibyte chars which really correspond to
1249	     the chosen single-byte character set can possibly match.  */
1250	  raw_pattern_size = SCHARS (string);
1251	  raw_pattern_size_byte = SCHARS (string);
1252	  raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1253	  copy_text (SDATA (string), raw_pattern,
1254		     SBYTES (string), 1, 0);
1255	}
1256
1257      /* Copy and optionally translate the pattern.  */
1258      len = raw_pattern_size;
1259      len_byte = raw_pattern_size_byte;
1260      patbuf = (unsigned char *) alloca (len_byte);
1261      pat = patbuf;
1262      base_pat = raw_pattern;
1263      if (multibyte)
1264	{
1265	  /* Fill patbuf by translated characters in STRING while
1266	     checking if we can use boyer-moore search.  If TRT is
1267	     non-nil, we can use boyer-moore search only if TRT can be
1268	     represented by the byte array of 256 elements.  For that,
1269	     all non-ASCII case-equivalents of all case-senstive
1270	     characters in STRING must belong to the same charset and
1271	     row.  */
1272
1273	  while (--len >= 0)
1274	    {
1275	      unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1276	      int c, translated, inverse;
1277	      int in_charlen, charlen;
1278
1279	      /* If we got here and the RE flag is set, it's because we're
1280		 dealing with a regexp known to be trivial, so the backslash
1281		 just quotes the next character.  */
1282	      if (RE && *base_pat == '\\')
1283		{
1284		  len--;
1285		  raw_pattern_size--;
1286		  len_byte--;
1287		  base_pat++;
1288		}
1289
1290	      c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1291
1292	      if (NILP (trt))
1293		{
1294		  str = base_pat;
1295		  charlen = in_charlen;
1296		}
1297	      else
1298		{
1299		  /* Translate the character.  */
1300		  TRANSLATE (translated, trt, c);
1301		  charlen = CHAR_STRING (translated, str_base);
1302		  str = str_base;
1303
1304		  /* Check if C has any other case-equivalents.  */
1305		  TRANSLATE (inverse, inverse_trt, c);
1306		  /* If so, check if we can use boyer-moore.  */
1307		  if (c != inverse && boyer_moore_ok)
1308		    {
1309		      /* Check if all equivalents belong to the same
1310			 charset & row.  Note that the check of C
1311			 itself is done by the last iteration.  Note
1312			 also that we don't have to check ASCII
1313			 characters because boyer-moore search can
1314			 always handle their translation.  */
1315		      while (1)
1316			{
1317			  if (ASCII_BYTE_P (inverse))
1318			    {
1319			      if (charset_base > 0)
1320				{
1321				  boyer_moore_ok = 0;
1322				  break;
1323				}
1324			      charset_base = 0;
1325			    }
1326			  else if (SINGLE_BYTE_CHAR_P (inverse))
1327			    {
1328			      /* Boyer-moore search can't handle a
1329				 translation of an eight-bit
1330				 character.  */
1331			      boyer_moore_ok = 0;
1332			      break;
1333			    }
1334			  else if (charset_base < 0)
1335			    charset_base = inverse & ~CHAR_FIELD3_MASK;
1336			  else if ((inverse & ~CHAR_FIELD3_MASK)
1337				   != charset_base)
1338			    {
1339			      boyer_moore_ok = 0;
1340			      break;
1341			    }
1342			  if (c == inverse)
1343			    break;
1344			  TRANSLATE (inverse, inverse_trt, inverse);
1345			}
1346		    }
1347		}
1348	      if (charset_base < 0)
1349		charset_base = 0;
1350
1351	      /* Store this character into the translated pattern.  */
1352	      bcopy (str, pat, charlen);
1353	      pat += charlen;
1354	      base_pat += in_charlen;
1355	      len_byte -= in_charlen;
1356	    }
1357	}
1358      else
1359	{
1360	  /* Unibyte buffer.  */
1361	  charset_base = 0;
1362	  while (--len >= 0)
1363	    {
1364	      int c, translated;
1365
1366	      /* If we got here and the RE flag is set, it's because we're
1367		 dealing with a regexp known to be trivial, so the backslash
1368		 just quotes the next character.  */
1369	      if (RE && *base_pat == '\\')
1370		{
1371		  len--;
1372		  raw_pattern_size--;
1373		  base_pat++;
1374		}
1375	      c = *base_pat++;
1376	      TRANSLATE (translated, trt, c);
1377	      *pat++ = translated;
1378	    }
1379	}
1380
1381      len_byte = pat - patbuf;
1382      len = raw_pattern_size;
1383      pat = base_pat = patbuf;
1384
1385      if (boyer_moore_ok)
1386	return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1387			    pos, pos_byte, lim, lim_byte,
1388			    charset_base);
1389      else
1390	return simple_search (n, pat, len, len_byte, trt,
1391			      pos, pos_byte, lim, lim_byte);
1392    }
1393}
1394
1395/* Do a simple string search N times for the string PAT,
1396   whose length is LEN/LEN_BYTE,
1397   from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1398   TRT is the translation table.
1399
1400   Return the character position where the match is found.
1401   Otherwise, if M matches remained to be found, return -M.
1402
1403   This kind of search works regardless of what is in PAT and
1404   regardless of what is in TRT.  It is used in cases where
1405   boyer_moore cannot work.  */
1406
1407static int
1408simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1409     EMACS_INT n;
1410     unsigned char *pat;
1411     EMACS_INT len, len_byte;
1412     Lisp_Object trt;
1413     EMACS_INT pos, pos_byte;
1414     EMACS_INT lim, lim_byte;
1415{
1416  int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1417  int forward = n > 0;
1418
1419  if (lim > pos && multibyte)
1420    while (n > 0)
1421      {
1422	while (1)
1423	  {
1424	    /* Try matching at position POS.  */
1425	    EMACS_INT this_pos = pos;
1426	    EMACS_INT this_pos_byte = pos_byte;
1427	    EMACS_INT this_len = len;
1428	    EMACS_INT this_len_byte = len_byte;
1429	    unsigned char *p = pat;
1430	    if (pos + len > lim)
1431	      goto stop;
1432
1433	    while (this_len > 0)
1434	      {
1435		int charlen, buf_charlen;
1436		int pat_ch, buf_ch;
1437
1438		pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1439		buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1440						 ZV_BYTE - this_pos_byte,
1441						 buf_charlen);
1442		TRANSLATE (buf_ch, trt, buf_ch);
1443
1444		if (buf_ch != pat_ch)
1445		  break;
1446
1447		this_len_byte -= charlen;
1448		this_len--;
1449		p += charlen;
1450
1451		this_pos_byte += buf_charlen;
1452		this_pos++;
1453	      }
1454
1455	    if (this_len == 0)
1456	      {
1457		pos += len;
1458		pos_byte += len_byte;
1459		break;
1460	      }
1461
1462	    INC_BOTH (pos, pos_byte);
1463	  }
1464
1465	n--;
1466      }
1467  else if (lim > pos)
1468    while (n > 0)
1469      {
1470	while (1)
1471	  {
1472	    /* Try matching at position POS.  */
1473	    EMACS_INT this_pos = pos;
1474	    EMACS_INT this_len = len;
1475	    unsigned char *p = pat;
1476
1477	    if (pos + len > lim)
1478	      goto stop;
1479
1480	    while (this_len > 0)
1481	      {
1482		int pat_ch = *p++;
1483		int buf_ch = FETCH_BYTE (this_pos);
1484		TRANSLATE (buf_ch, trt, buf_ch);
1485
1486		if (buf_ch != pat_ch)
1487		  break;
1488
1489		this_len--;
1490		this_pos++;
1491	      }
1492
1493	    if (this_len == 0)
1494	      {
1495		pos += len;
1496		break;
1497	      }
1498
1499	    pos++;
1500	  }
1501
1502	n--;
1503      }
1504  /* Backwards search.  */
1505  else if (lim < pos && multibyte)
1506    while (n < 0)
1507      {
1508	while (1)
1509	  {
1510	    /* Try matching at position POS.  */
1511	    EMACS_INT this_pos = pos - len;
1512	    EMACS_INT this_pos_byte = pos_byte - len_byte;
1513	    EMACS_INT this_len = len;
1514	    EMACS_INT this_len_byte = len_byte;
1515	    unsigned char *p = pat;
1516
1517	    if (this_pos < lim || this_pos_byte < lim_byte)
1518	      goto stop;
1519
1520	    while (this_len > 0)
1521	      {
1522		int charlen, buf_charlen;
1523		int pat_ch, buf_ch;
1524
1525		pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1526		buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1527						 ZV_BYTE - this_pos_byte,
1528						 buf_charlen);
1529		TRANSLATE (buf_ch, trt, buf_ch);
1530
1531		if (buf_ch != pat_ch)
1532		  break;
1533
1534		this_len_byte -= charlen;
1535		this_len--;
1536		p += charlen;
1537		this_pos_byte += buf_charlen;
1538		this_pos++;
1539	      }
1540
1541	    if (this_len == 0)
1542	      {
1543		pos -= len;
1544		pos_byte -= len_byte;
1545		break;
1546	      }
1547
1548	    DEC_BOTH (pos, pos_byte);
1549	  }
1550
1551	n++;
1552      }
1553  else if (lim < pos)
1554    while (n < 0)
1555      {
1556	while (1)
1557	  {
1558	    /* Try matching at position POS.  */
1559	    EMACS_INT this_pos = pos - len;
1560	    EMACS_INT this_len = len;
1561	    unsigned char *p = pat;
1562
1563	    if (pos - len < lim)
1564	      goto stop;
1565
1566	    while (this_len > 0)
1567	      {
1568		int pat_ch = *p++;
1569		int buf_ch = FETCH_BYTE (this_pos);
1570		TRANSLATE (buf_ch, trt, buf_ch);
1571
1572		if (buf_ch != pat_ch)
1573		  break;
1574		this_len--;
1575		this_pos++;
1576	      }
1577
1578	    if (this_len == 0)
1579	      {
1580		pos -= len;
1581		break;
1582	      }
1583
1584	    pos--;
1585	  }
1586
1587	n++;
1588      }
1589
1590 stop:
1591  if (n == 0)
1592    {
1593      if (forward)
1594	set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1595      else
1596	set_search_regs (multibyte ? pos_byte : pos, len_byte);
1597
1598      return pos;
1599    }
1600  else if (n > 0)
1601    return -n;
1602  else
1603    return n;
1604}
1605
1606/* Do Boyer-Moore search N times for the string BASE_PAT,
1607   whose length is LEN/LEN_BYTE,
1608   from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1609   DIRECTION says which direction we search in.
1610   TRT and INVERSE_TRT are translation tables.
1611   Characters in PAT are already translated by TRT.
1612
1613   This kind of search works if all the characters in BASE_PAT that
1614   have nontrivial translation are the same aside from the last byte.
1615   This makes it possible to translate just the last byte of a
1616   character, and do so after just a simple test of the context.
1617   CHARSET_BASE is nonzero iff there is such a non-ASCII character.
1618
1619   If that criterion is not satisfied, do not call this function.  */
1620
1621static int
1622boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1623	     pos, pos_byte, lim, lim_byte, charset_base)
1624     EMACS_INT n;
1625     unsigned char *base_pat;
1626     EMACS_INT len, len_byte;
1627     Lisp_Object trt;
1628     Lisp_Object inverse_trt;
1629     EMACS_INT pos, pos_byte;
1630     EMACS_INT lim, lim_byte;
1631     int charset_base;
1632{
1633  EMACS_INT direction = ((n > 0) ? 1 : -1);
1634  EMACS_INT dirlen;
1635  EMACS_INT infinity, limit, stride_for_teases = 0;
1636  register unsigned char *cursor, *p_limit;
1637  EMACS_INT i, j;
1638  unsigned char *pat, *pat_end;
1639  int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1640
1641  unsigned char simple_translate[0400];
1642  /* These are set to the preceding bytes of a byte to be translated
1643     if charset_base is nonzero.  As the maximum byte length of a
1644     multibyte character is 4, we have to check at most three previous
1645     bytes.  */
1646  int translate_prev_byte1 = 0;
1647  int translate_prev_byte2 = 0;
1648  int translate_prev_byte3 = 0;
1649
1650  EMACS_INT BM_tab[0400];
1651
1652  /* The general approach is that we are going to maintain that we know */
1653  /* the first (closest to the present position, in whatever direction */
1654  /* we're searching) character that could possibly be the last */
1655  /* (furthest from present position) character of a valid match.  We */
1656  /* advance the state of our knowledge by looking at that character */
1657  /* and seeing whether it indeed matches the last character of the */
1658  /* pattern.  If it does, we take a closer look.  If it does not, we */
1659  /* move our pointer (to putative last characters) as far as is */
1660  /* logically possible.  This amount of movement, which I call a */
1661  /* stride, will be the length of the pattern if the actual character */
1662  /* appears nowhere in the pattern, otherwise it will be the distance */
1663  /* from the last occurrence of that character to the end of the */
1664  /* pattern. */
1665  /* As a coding trick, an enormous stride is coded into the table for */
1666  /* characters that match the last character.  This allows use of only */
1667  /* a single test, a test for having gone past the end of the */
1668  /* permissible match region, to test for both possible matches (when */
1669  /* the stride goes past the end immediately) and failure to */
1670  /* match (where you get nudged past the end one stride at a time). */
1671
1672  /* Here we make a "mickey mouse" BM table.  The stride of the search */
1673  /* is determined only by the last character of the putative match. */
1674  /* If that character does not match, we will stride the proper */
1675  /* distance to propose a match that superimposes it on the last */
1676  /* instance of a character that matches it (per trt), or misses */
1677  /* it entirely if there is none. */
1678
1679  dirlen = len_byte * direction;
1680  infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1681
1682  /* Record position after the end of the pattern.  */
1683  pat_end = base_pat + len_byte;
1684  /* BASE_PAT points to a character that we start scanning from.
1685     It is the first character in a forward search,
1686     the last character in a backward search.  */
1687  if (direction < 0)
1688    base_pat = pat_end - 1;
1689
1690  for (i = 0; i < 0400; i++) {
1691    BM_tab[i] = dirlen;
1692  }
1693
1694  /* We use this for translation, instead of TRT itself.
1695     We fill this in to handle the characters that actually
1696     occur in the pattern.  Others don't matter anyway!  */
1697  for (i = 0; i < 0400; i++)
1698    simple_translate[i] = i;
1699
1700  if (charset_base)
1701    {
1702      /* Setup translate_prev_byte1/2/3 from CHARSET_BASE.  Only a
1703	 byte following them are the target of translation.  */
1704      int sample_char = charset_base | 0x20;
1705      unsigned char str[MAX_MULTIBYTE_LENGTH];
1706      int len = CHAR_STRING (sample_char, str);
1707
1708      translate_prev_byte1 = str[len - 2];
1709      if (len > 2)
1710	{
1711	  translate_prev_byte2 = str[len - 3];
1712	  if (len > 3)
1713	    translate_prev_byte3 = str[len - 4];
1714	}
1715    }
1716
1717  i = 0;
1718  while (i != infinity)
1719    {
1720      unsigned char *ptr = base_pat + i;
1721      i += direction;
1722      if (i == dirlen)
1723	i = infinity;
1724      if (! NILP (trt))
1725	{
1726	  /* If the byte currently looking at is the last of a
1727	     character to check case-equivalents, set CH to that
1728	     character.  An ASCII character and a non-ASCII character
1729	     matching with CHARSET_BASE are to be checked.  */
1730	  int ch = -1;
1731
1732	  if (ASCII_BYTE_P (*ptr) || ! multibyte)
1733	    ch = *ptr;
1734	  else if (charset_base
1735		   && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1736	    {
1737	      unsigned char *charstart = ptr - 1;
1738
1739	      while (! (CHAR_HEAD_P (*charstart)))
1740		charstart--;
1741	      ch = STRING_CHAR (charstart, ptr - charstart + 1);
1742	      if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1743		ch = -1;
1744	    }
1745
1746	  if (ch >= 0400)
1747	    j = ((unsigned char) ch) | 0200;
1748	  else
1749	    j = *ptr;
1750
1751	  if (i == infinity)
1752	    stride_for_teases = BM_tab[j];
1753
1754	  BM_tab[j] = dirlen - i;
1755	  /* A translation table is accompanied by its inverse -- see */
1756	  /* comment following downcase_table for details */
1757	  if (ch >= 0)
1758	    {
1759	      int starting_ch = ch;
1760	      int starting_j = j;
1761
1762	      while (1)
1763		{
1764		  TRANSLATE (ch, inverse_trt, ch);
1765		  if (ch >= 0400)
1766		    j = ((unsigned char) ch) | 0200;
1767		  else
1768		    j = (unsigned char) ch;
1769
1770		  /* For all the characters that map into CH,
1771		     set up simple_translate to map the last byte
1772		     into STARTING_J.  */
1773		  simple_translate[j] = starting_j;
1774		  if (ch == starting_ch)
1775		    break;
1776		  BM_tab[j] = dirlen - i;
1777		}
1778	    }
1779	}
1780      else
1781	{
1782	  j = *ptr;
1783
1784	  if (i == infinity)
1785	    stride_for_teases = BM_tab[j];
1786	  BM_tab[j] = dirlen - i;
1787	}
1788      /* stride_for_teases tells how much to stride if we get a */
1789      /* match on the far character but are subsequently */
1790      /* disappointed, by recording what the stride would have been */
1791      /* for that character if the last character had been */
1792      /* different. */
1793    }
1794  infinity = dirlen - infinity;
1795  pos_byte += dirlen - ((direction > 0) ? direction : 0);
1796  /* loop invariant - POS_BYTE points at where last char (first
1797     char if reverse) of pattern would align in a possible match.  */
1798  while (n != 0)
1799    {
1800      EMACS_INT tail_end;
1801      unsigned char *tail_end_ptr;
1802
1803      /* It's been reported that some (broken) compiler thinks that
1804	 Boolean expressions in an arithmetic context are unsigned.
1805	 Using an explicit ?1:0 prevents this.  */
1806      if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1807	  < 0)
1808	return (n * (0 - direction));
1809      /* First we do the part we can by pointers (maybe nothing) */
1810      QUIT;
1811      pat = base_pat;
1812      limit = pos_byte - dirlen + direction;
1813      if (direction > 0)
1814	{
1815	  limit = BUFFER_CEILING_OF (limit);
1816	  /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1817	     can take on without hitting edge of buffer or the gap.  */
1818	  limit = min (limit, pos_byte + 20000);
1819	  limit = min (limit, lim_byte - 1);
1820	}
1821      else
1822	{
1823	  limit = BUFFER_FLOOR_OF (limit);
1824	  /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1825	     can take on without hitting edge of buffer or the gap.  */
1826	  limit = max (limit, pos_byte - 20000);
1827	  limit = max (limit, lim_byte);
1828	}
1829      tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1830      tail_end_ptr = BYTE_POS_ADDR (tail_end);
1831
1832      if ((limit - pos_byte) * direction > 20)
1833	{
1834	  unsigned char *p2;
1835
1836	  p_limit = BYTE_POS_ADDR (limit);
1837	  p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1838	  /* In this loop, pos + cursor - p2 is the surrogate for pos */
1839	  while (1)		/* use one cursor setting as long as i can */
1840	    {
1841	      if (direction > 0) /* worth duplicating */
1842		{
1843		  /* Use signed comparison if appropriate
1844		     to make cursor+infinity sure to be > p_limit.
1845		     Assuming that the buffer lies in a range of addresses
1846		     that are all "positive" (as ints) or all "negative",
1847		     either kind of comparison will work as long
1848		     as we don't step by infinity.  So pick the kind
1849		     that works when we do step by infinity.  */
1850		  if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1851		    while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1852		      cursor += BM_tab[*cursor];
1853		  else
1854		    while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1855		      cursor += BM_tab[*cursor];
1856		}
1857	      else
1858		{
1859		  if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1860		    while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1861		      cursor += BM_tab[*cursor];
1862		  else
1863		    while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1864		      cursor += BM_tab[*cursor];
1865		}
1866/* If you are here, cursor is beyond the end of the searched region. */
1867/* This can happen if you match on the far character of the pattern, */
1868/* because the "stride" of that character is infinity, a number able */
1869/* to throw you well beyond the end of the search.  It can also */
1870/* happen if you fail to match within the permitted region and would */
1871/* otherwise try a character beyond that region */
1872	      if ((cursor - p_limit) * direction <= len_byte)
1873		break;	/* a small overrun is genuine */
1874	      cursor -= infinity; /* large overrun = hit */
1875	      i = dirlen - direction;
1876	      if (! NILP (trt))
1877		{
1878		  while ((i -= direction) + direction != 0)
1879		    {
1880		      int ch;
1881		      cursor -= direction;
1882		      /* Translate only the last byte of a character.  */
1883		      if (! multibyte
1884			  || ((cursor == tail_end_ptr
1885			       || CHAR_HEAD_P (cursor[1]))
1886			      && (CHAR_HEAD_P (cursor[0])
1887				  /* Check if this is the last byte of
1888				     a translable character.  */
1889				  || (translate_prev_byte1 == cursor[-1]
1890				      && (CHAR_HEAD_P (translate_prev_byte1)
1891					  || (translate_prev_byte2 == cursor[-2]
1892					      && (CHAR_HEAD_P (translate_prev_byte2)
1893						  || (translate_prev_byte3 == cursor[-3]))))))))
1894			ch = simple_translate[*cursor];
1895		      else
1896			ch = *cursor;
1897		      if (pat[i] != ch)
1898			break;
1899		    }
1900		}
1901	      else
1902		{
1903		  while ((i -= direction) + direction != 0)
1904		    {
1905		      cursor -= direction;
1906		      if (pat[i] != *cursor)
1907			break;
1908		    }
1909		}
1910	      cursor += dirlen - i - direction;	/* fix cursor */
1911	      if (i + direction == 0)
1912		{
1913		  EMACS_INT position;
1914
1915		  cursor -= direction;
1916
1917		  position = pos_byte + cursor - p2 + ((direction > 0)
1918						       ? 1 - len_byte : 0);
1919		  set_search_regs (position, len_byte);
1920
1921		  if ((n -= direction) != 0)
1922		    cursor += dirlen; /* to resume search */
1923		  else
1924		    return ((direction > 0)
1925			    ? search_regs.end[0] : search_regs.start[0]);
1926		}
1927	      else
1928		cursor += stride_for_teases; /* <sigh> we lose -  */
1929	    }
1930	  pos_byte += cursor - p2;
1931	}
1932      else
1933	/* Now we'll pick up a clump that has to be done the hard */
1934	/* way because it covers a discontinuity */
1935	{
1936	  limit = ((direction > 0)
1937		   ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1938		   : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1939	  limit = ((direction > 0)
1940		   ? min (limit + len_byte, lim_byte - 1)
1941		   : max (limit - len_byte, lim_byte));
1942	  /* LIMIT is now the last value POS_BYTE can have
1943	     and still be valid for a possible match.  */
1944	  while (1)
1945	    {
1946	      /* This loop can be coded for space rather than */
1947	      /* speed because it will usually run only once. */
1948	      /* (the reach is at most len + 21, and typically */
1949	      /* does not exceed len) */
1950	      while ((limit - pos_byte) * direction >= 0)
1951		pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1952	      /* now run the same tests to distinguish going off the */
1953	      /* end, a match or a phony match. */
1954	      if ((pos_byte - limit) * direction <= len_byte)
1955		break;	/* ran off the end */
1956	      /* Found what might be a match.
1957		 Set POS_BYTE back to last (first if reverse) pos.  */
1958	      pos_byte -= infinity;
1959	      i = dirlen - direction;
1960	      while ((i -= direction) + direction != 0)
1961		{
1962		  int ch;
1963		  unsigned char *ptr;
1964		  pos_byte -= direction;
1965		  ptr = BYTE_POS_ADDR (pos_byte);
1966		  /* Translate only the last byte of a character.  */
1967		  if (! multibyte
1968		      || ((ptr == tail_end_ptr
1969			   || CHAR_HEAD_P (ptr[1]))
1970			  && (CHAR_HEAD_P (ptr[0])
1971			      /* Check if this is the last byte of a
1972				 translable character.  */
1973			      || (translate_prev_byte1 == ptr[-1]
1974				  && (CHAR_HEAD_P (translate_prev_byte1)
1975				      || (translate_prev_byte2 == ptr[-2]
1976					  && (CHAR_HEAD_P (translate_prev_byte2)
1977					      || translate_prev_byte3 == ptr[-3])))))))
1978		    ch = simple_translate[*ptr];
1979		  else
1980		    ch = *ptr;
1981		  if (pat[i] != ch)
1982		    break;
1983		}
1984	      /* Above loop has moved POS_BYTE part or all the way
1985		 back to the first pos (last pos if reverse).
1986		 Set it once again at the last (first if reverse) char.  */
1987	      pos_byte += dirlen - i- direction;
1988	      if (i + direction == 0)
1989		{
1990		  EMACS_INT position;
1991		  pos_byte -= direction;
1992
1993		  position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1994
1995		  set_search_regs (position, len_byte);
1996
1997		  if ((n -= direction) != 0)
1998		    pos_byte += dirlen; /* to resume search */
1999		  else
2000		    return ((direction > 0)
2001			    ? search_regs.end[0] : search_regs.start[0]);
2002		}
2003	      else
2004		pos_byte += stride_for_teases;
2005	    }
2006	  }
2007      /* We have done one clump.  Can we continue? */
2008      if ((lim_byte - pos_byte) * direction < 0)
2009	return ((0 - n) * direction);
2010    }
2011  return BYTE_TO_CHAR (pos_byte);
2012}
2013
2014/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2015   for the overall match just found in the current buffer.
2016   Also clear out the match data for registers 1 and up.  */
2017
2018static void
2019set_search_regs (beg_byte, nbytes)
2020     int beg_byte, nbytes;
2021{
2022  int i;
2023
2024  /* Make sure we have registers in which to store
2025     the match position.  */
2026  if (search_regs.num_regs == 0)
2027    {
2028      search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2029      search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2030      search_regs.num_regs = 2;
2031    }
2032
2033  /* Clear out the other registers.  */
2034  for (i = 1; i < search_regs.num_regs; i++)
2035    {
2036      search_regs.start[i] = -1;
2037      search_regs.end[i] = -1;
2038    }
2039
2040  search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2041  search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2042  XSETBUFFER (last_thing_searched, current_buffer);
2043}
2044
2045/* Given a string of words separated by word delimiters,
2046  compute a regexp that matches those exact words
2047  separated by arbitrary punctuation.  */
2048
2049static Lisp_Object
2050wordify (string)
2051     Lisp_Object string;
2052{
2053  register unsigned char *p, *o;
2054  register int i, i_byte, len, punct_count = 0, word_count = 0;
2055  Lisp_Object val;
2056  int prev_c = 0;
2057  int adjust;
2058
2059  CHECK_STRING (string);
2060  p = SDATA (string);
2061  len = SCHARS (string);
2062
2063  for (i = 0, i_byte = 0; i < len; )
2064    {
2065      int c;
2066
2067      FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2068
2069      if (SYNTAX (c) != Sword)
2070	{
2071	  punct_count++;
2072	  if (i > 0 && SYNTAX (prev_c) == Sword)
2073	    word_count++;
2074	}
2075
2076      prev_c = c;
2077    }
2078
2079  if (SYNTAX (prev_c) == Sword)
2080    word_count++;
2081  if (!word_count)
2082    return empty_string;
2083
2084  adjust = - punct_count + 5 * (word_count - 1) + 4;
2085  if (STRING_MULTIBYTE (string))
2086    val = make_uninit_multibyte_string (len + adjust,
2087					SBYTES (string)
2088					+ adjust);
2089  else
2090    val = make_uninit_string (len + adjust);
2091
2092  o = SDATA (val);
2093  *o++ = '\\';
2094  *o++ = 'b';
2095  prev_c = 0;
2096
2097  for (i = 0, i_byte = 0; i < len; )
2098    {
2099      int c;
2100      int i_byte_orig = i_byte;
2101
2102      FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2103
2104      if (SYNTAX (c) == Sword)
2105	{
2106	  bcopy (SDATA (string) + i_byte_orig, o,
2107		 i_byte - i_byte_orig);
2108	  o += i_byte - i_byte_orig;
2109	}
2110      else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2111	{
2112	  *o++ = '\\';
2113	  *o++ = 'W';
2114	  *o++ = '\\';
2115	  *o++ = 'W';
2116	  *o++ = '*';
2117	}
2118
2119      prev_c = c;
2120    }
2121
2122  *o++ = '\\';
2123  *o++ = 'b';
2124
2125  return val;
2126}
2127
2128DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2129       "MSearch backward: ",
2130       doc: /* Search backward from point for STRING.
2131Set point to the beginning of the occurrence found, and return point.
2132An optional second argument bounds the search; it is a buffer position.
2133The match found must not extend before that position.
2134Optional third argument, if t, means if fail just return nil (no error).
2135 If not nil and not t, position at limit of search and return nil.
2136Optional fourth argument is repeat count--search for successive occurrences.
2137
2138Search case-sensitivity is determined by the value of the variable
2139`case-fold-search', which see.
2140
2141See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2142     (string, bound, noerror, count)
2143     Lisp_Object string, bound, noerror, count;
2144{
2145  return search_command (string, bound, noerror, count, -1, 0, 0);
2146}
2147
2148DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2149       doc: /* Search forward from point for STRING.
2150Set point to the end of the occurrence found, and return point.
2151An optional second argument bounds the search; it is a buffer position.
2152The match found must not extend after that position.  A value of nil is
2153  equivalent to (point-max).
2154Optional third argument, if t, means if fail just return nil (no error).
2155  If not nil and not t, move to limit of search and return nil.
2156Optional fourth argument is repeat count--search for successive occurrences.
2157
2158Search case-sensitivity is determined by the value of the variable
2159`case-fold-search', which see.
2160
2161See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2162     (string, bound, noerror, count)
2163     Lisp_Object string, bound, noerror, count;
2164{
2165  return search_command (string, bound, noerror, count, 1, 0, 0);
2166}
2167
2168DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2169       "sWord search backward: ",
2170       doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2171Set point to the beginning of the occurrence found, and return point.
2172An optional second argument bounds the search; it is a buffer position.
2173The match found must not extend before that position.
2174Optional third argument, if t, means if fail just return nil (no error).
2175  If not nil and not t, move to limit of search and return nil.
2176Optional fourth argument is repeat count--search for successive occurrences.  */)
2177     (string, bound, noerror, count)
2178     Lisp_Object string, bound, noerror, count;
2179{
2180  return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2181}
2182
2183DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2184       "sWord search: ",
2185       doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2186Set point to the end of the occurrence found, and return point.
2187An optional second argument bounds the search; it is a buffer position.
2188The match found must not extend after that position.
2189Optional third argument, if t, means if fail just return nil (no error).
2190  If not nil and not t, move to limit of search and return nil.
2191Optional fourth argument is repeat count--search for successive occurrences.  */)
2192     (string, bound, noerror, count)
2193     Lisp_Object string, bound, noerror, count;
2194{
2195  return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2196}
2197
2198DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2199       "sRE search backward: ",
2200       doc: /* Search backward from point for match for regular expression REGEXP.
2201Set point to the beginning of the match, and return point.
2202The match found is the one starting last in the buffer
2203and yet ending before the origin of the search.
2204An optional second argument bounds the search; it is a buffer position.
2205The match found must start at or after that position.
2206Optional third argument, if t, means if fail just return nil (no error).
2207  If not nil and not t, move to limit of search and return nil.
2208Optional fourth argument is repeat count--search for successive occurrences.
2209See also the functions `match-beginning', `match-end', `match-string',
2210and `replace-match'.  */)
2211     (regexp, bound, noerror, count)
2212     Lisp_Object regexp, bound, noerror, count;
2213{
2214  return search_command (regexp, bound, noerror, count, -1, 1, 0);
2215}
2216
2217DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2218       "sRE search: ",
2219       doc: /* Search forward from point for regular expression REGEXP.
2220Set point to the end of the occurrence found, and return point.
2221An optional second argument bounds the search; it is a buffer position.
2222The match found must not extend after that position.
2223Optional third argument, if t, means if fail just return nil (no error).
2224  If not nil and not t, move to limit of search and return nil.
2225Optional fourth argument is repeat count--search for successive occurrences.
2226See also the functions `match-beginning', `match-end', `match-string',
2227and `replace-match'.  */)
2228     (regexp, bound, noerror, count)
2229     Lisp_Object regexp, bound, noerror, count;
2230{
2231  return search_command (regexp, bound, noerror, count, 1, 1, 0);
2232}
2233
2234DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2235       "sPosix search backward: ",
2236       doc: /* Search backward from point for match for regular expression REGEXP.
2237Find the longest match in accord with Posix regular expression rules.
2238Set point to the beginning of the match, and return point.
2239The match found is the one starting last in the buffer
2240and yet ending before the origin of the search.
2241An optional second argument bounds the search; it is a buffer position.
2242The match found must start at or after that position.
2243Optional third argument, if t, means if fail just return nil (no error).
2244  If not nil and not t, move to limit of search and return nil.
2245Optional fourth argument is repeat count--search for successive occurrences.
2246See also the functions `match-beginning', `match-end', `match-string',
2247and `replace-match'.  */)
2248     (regexp, bound, noerror, count)
2249     Lisp_Object regexp, bound, noerror, count;
2250{
2251  return search_command (regexp, bound, noerror, count, -1, 1, 1);
2252}
2253
2254DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2255       "sPosix search: ",
2256       doc: /* Search forward from point for regular expression REGEXP.
2257Find the longest match in accord with Posix regular expression rules.
2258Set point to the end of the occurrence found, and return point.
2259An optional second argument bounds the search; it is a buffer position.
2260The match found must not extend after that position.
2261Optional third argument, if t, means if fail just return nil (no error).
2262  If not nil and not t, move to limit of search and return nil.
2263Optional fourth argument is repeat count--search for successive occurrences.
2264See also the functions `match-beginning', `match-end', `match-string',
2265and `replace-match'.  */)
2266     (regexp, bound, noerror, count)
2267     Lisp_Object regexp, bound, noerror, count;
2268{
2269  return search_command (regexp, bound, noerror, count, 1, 1, 1);
2270}
2271
2272DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2273       doc: /* Replace text matched by last search with NEWTEXT.
2274Leave point at the end of the replacement text.
2275
2276If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2277Otherwise maybe capitalize the whole text, or maybe just word initials,
2278based on the replaced text.
2279If the replaced text has only capital letters
2280and has at least one multiletter word, convert NEWTEXT to all caps.
2281Otherwise if all words are capitalized in the replaced text,
2282capitalize each word in NEWTEXT.
2283
2284If third arg LITERAL is non-nil, insert NEWTEXT literally.
2285Otherwise treat `\\' as special:
2286  `\\&' in NEWTEXT means substitute original matched text.
2287  `\\N' means substitute what matched the Nth `\\(...\\)'.
2288       If Nth parens didn't match, substitute nothing.
2289  `\\\\' means insert one `\\'.
2290Case conversion does not apply to these substitutions.
2291
2292FIXEDCASE and LITERAL are optional arguments.
2293
2294The optional fourth argument STRING can be a string to modify.
2295This is meaningful when the previous match was done against STRING,
2296using `string-match'.  When used this way, `replace-match'
2297creates and returns a new string made by copying STRING and replacing
2298the part of STRING that was matched.
2299
2300The optional fifth argument SUBEXP specifies a subexpression;
2301it says to replace just that subexpression with NEWTEXT,
2302rather than replacing the entire matched text.
2303This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2304`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2305NEWTEXT in place of subexp N.
2306This is useful only after a regular expression search or match,
2307since only regular expressions have distinguished subexpressions.  */)
2308     (newtext, fixedcase, literal, string, subexp)
2309     Lisp_Object newtext, fixedcase, literal, string, subexp;
2310{
2311  enum { nochange, all_caps, cap_initial } case_action;
2312  register int pos, pos_byte;
2313  int some_multiletter_word;
2314  int some_lowercase;
2315  int some_uppercase;
2316  int some_nonuppercase_initial;
2317  register int c, prevc;
2318  int sub;
2319  int opoint, newpoint;
2320
2321  CHECK_STRING (newtext);
2322
2323  if (! NILP (string))
2324    CHECK_STRING (string);
2325
2326  case_action = nochange;	/* We tried an initialization */
2327				/* but some C compilers blew it */
2328
2329  if (search_regs.num_regs <= 0)
2330    error ("`replace-match' called before any match found");
2331
2332  if (NILP (subexp))
2333    sub = 0;
2334  else
2335    {
2336      CHECK_NUMBER (subexp);
2337      sub = XINT (subexp);
2338      if (sub < 0 || sub >= search_regs.num_regs)
2339	args_out_of_range (subexp, make_number (search_regs.num_regs));
2340    }
2341
2342  if (NILP (string))
2343    {
2344      if (search_regs.start[sub] < BEGV
2345	  || search_regs.start[sub] > search_regs.end[sub]
2346	  || search_regs.end[sub] > ZV)
2347	args_out_of_range (make_number (search_regs.start[sub]),
2348			   make_number (search_regs.end[sub]));
2349    }
2350  else
2351    {
2352      if (search_regs.start[sub] < 0
2353	  || search_regs.start[sub] > search_regs.end[sub]
2354	  || search_regs.end[sub] > SCHARS (string))
2355	args_out_of_range (make_number (search_regs.start[sub]),
2356			   make_number (search_regs.end[sub]));
2357    }
2358
2359  if (NILP (fixedcase))
2360    {
2361      /* Decide how to casify by examining the matched text. */
2362      int last;
2363
2364      pos = search_regs.start[sub];
2365      last = search_regs.end[sub];
2366
2367      if (NILP (string))
2368	pos_byte = CHAR_TO_BYTE (pos);
2369      else
2370	pos_byte = string_char_to_byte (string, pos);
2371
2372      prevc = '\n';
2373      case_action = all_caps;
2374
2375      /* some_multiletter_word is set nonzero if any original word
2376	 is more than one letter long. */
2377      some_multiletter_word = 0;
2378      some_lowercase = 0;
2379      some_nonuppercase_initial = 0;
2380      some_uppercase = 0;
2381
2382      while (pos < last)
2383	{
2384	  if (NILP (string))
2385	    {
2386	      c = FETCH_CHAR (pos_byte);
2387	      INC_BOTH (pos, pos_byte);
2388	    }
2389	  else
2390	    FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2391
2392	  if (LOWERCASEP (c))
2393	    {
2394	      /* Cannot be all caps if any original char is lower case */
2395
2396	      some_lowercase = 1;
2397	      if (SYNTAX (prevc) != Sword)
2398		some_nonuppercase_initial = 1;
2399	      else
2400		some_multiletter_word = 1;
2401	    }
2402	  else if (UPPERCASEP (c))
2403	    {
2404	      some_uppercase = 1;
2405	      if (SYNTAX (prevc) != Sword)
2406		;
2407	      else
2408		some_multiletter_word = 1;
2409	    }
2410	  else
2411	    {
2412	      /* If the initial is a caseless word constituent,
2413		 treat that like a lowercase initial.  */
2414	      if (SYNTAX (prevc) != Sword)
2415		some_nonuppercase_initial = 1;
2416	    }
2417
2418	  prevc = c;
2419	}
2420
2421      /* Convert to all caps if the old text is all caps
2422	 and has at least one multiletter word.  */
2423      if (! some_lowercase && some_multiletter_word)
2424	case_action = all_caps;
2425      /* Capitalize each word, if the old text has all capitalized words.  */
2426      else if (!some_nonuppercase_initial && some_multiletter_word)
2427	case_action = cap_initial;
2428      else if (!some_nonuppercase_initial && some_uppercase)
2429	/* Should x -> yz, operating on X, give Yz or YZ?
2430	   We'll assume the latter.  */
2431	case_action = all_caps;
2432      else
2433	case_action = nochange;
2434    }
2435
2436  /* Do replacement in a string.  */
2437  if (!NILP (string))
2438    {
2439      Lisp_Object before, after;
2440
2441      before = Fsubstring (string, make_number (0),
2442			   make_number (search_regs.start[sub]));
2443      after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2444
2445      /* Substitute parts of the match into NEWTEXT
2446	 if desired.  */
2447      if (NILP (literal))
2448	{
2449	  int lastpos = 0;
2450	  int lastpos_byte = 0;
2451	  /* We build up the substituted string in ACCUM.  */
2452	  Lisp_Object accum;
2453	  Lisp_Object middle;
2454	  int length = SBYTES (newtext);
2455
2456	  accum = Qnil;
2457
2458	  for (pos_byte = 0, pos = 0; pos_byte < length;)
2459	    {
2460	      int substart = -1;
2461	      int subend = 0;
2462	      int delbackslash = 0;
2463
2464	      FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2465
2466	      if (c == '\\')
2467		{
2468		  FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2469
2470		  if (c == '&')
2471		    {
2472		      substart = search_regs.start[sub];
2473		      subend = search_regs.end[sub];
2474		    }
2475		  else if (c >= '1' && c <= '9')
2476		    {
2477		      if (search_regs.start[c - '0'] >= 0
2478			  && c <= search_regs.num_regs + '0')
2479			{
2480			  substart = search_regs.start[c - '0'];
2481			  subend = search_regs.end[c - '0'];
2482			}
2483		      else
2484			{
2485			  /* If that subexp did not match,
2486			     replace \\N with nothing.  */
2487			  substart = 0;
2488			  subend = 0;
2489			}
2490		    }
2491		  else if (c == '\\')
2492		    delbackslash = 1;
2493		  else
2494		    error ("Invalid use of `\\' in replacement text");
2495		}
2496	      if (substart >= 0)
2497		{
2498		  if (pos - 2 != lastpos)
2499		    middle = substring_both (newtext, lastpos,
2500					     lastpos_byte,
2501					     pos - 2, pos_byte - 2);
2502		  else
2503		    middle = Qnil;
2504		  accum = concat3 (accum, middle,
2505				   Fsubstring (string,
2506					       make_number (substart),
2507					       make_number (subend)));
2508		  lastpos = pos;
2509		  lastpos_byte = pos_byte;
2510		}
2511	      else if (delbackslash)
2512		{
2513		  middle = substring_both (newtext, lastpos,
2514					   lastpos_byte,
2515					   pos - 1, pos_byte - 1);
2516
2517		  accum = concat2 (accum, middle);
2518		  lastpos = pos;
2519		  lastpos_byte = pos_byte;
2520		}
2521	    }
2522
2523	  if (pos != lastpos)
2524	    middle = substring_both (newtext, lastpos,
2525				     lastpos_byte,
2526				     pos, pos_byte);
2527	  else
2528	    middle = Qnil;
2529
2530	  newtext = concat2 (accum, middle);
2531	}
2532
2533      /* Do case substitution in NEWTEXT if desired.  */
2534      if (case_action == all_caps)
2535	newtext = Fupcase (newtext);
2536      else if (case_action == cap_initial)
2537	newtext = Fupcase_initials (newtext);
2538
2539      return concat3 (before, newtext, after);
2540    }
2541
2542  /* Record point, then move (quietly) to the start of the match.  */
2543  if (PT >= search_regs.end[sub])
2544    opoint = PT - ZV;
2545  else if (PT > search_regs.start[sub])
2546    opoint = search_regs.end[sub] - ZV;
2547  else
2548    opoint = PT;
2549
2550  /* If we want non-literal replacement,
2551     perform substitution on the replacement string.  */
2552  if (NILP (literal))
2553    {
2554      int length = SBYTES (newtext);
2555      unsigned char *substed;
2556      int substed_alloc_size, substed_len;
2557      int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2558      int str_multibyte = STRING_MULTIBYTE (newtext);
2559      Lisp_Object rev_tbl;
2560      int really_changed = 0;
2561
2562      rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2563		? Fchar_table_extra_slot (Vnonascii_translation_table,
2564					  make_number (0))
2565		: Qnil);
2566
2567      substed_alloc_size = length * 2 + 100;
2568      substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2569      substed_len = 0;
2570
2571      /* Go thru NEWTEXT, producing the actual text to insert in
2572	 SUBSTED while adjusting multibyteness to that of the current
2573	 buffer.  */
2574
2575      for (pos_byte = 0, pos = 0; pos_byte < length;)
2576	{
2577	  unsigned char str[MAX_MULTIBYTE_LENGTH];
2578	  unsigned char *add_stuff = NULL;
2579	  int add_len = 0;
2580	  int idx = -1;
2581
2582	  if (str_multibyte)
2583	    {
2584	      FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2585	      if (!buf_multibyte)
2586		c = multibyte_char_to_unibyte (c, rev_tbl);
2587	    }
2588	  else
2589	    {
2590	      /* Note that we don't have to increment POS.  */
2591	      c = SREF (newtext, pos_byte++);
2592	      if (buf_multibyte)
2593		c = unibyte_char_to_multibyte (c);
2594	    }
2595
2596	  /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2597	     or set IDX to a match index, which means put that part
2598	     of the buffer text into SUBSTED.  */
2599
2600	  if (c == '\\')
2601	    {
2602	      really_changed = 1;
2603
2604	      if (str_multibyte)
2605		{
2606		  FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2607						      pos, pos_byte);
2608		  if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2609		    c = multibyte_char_to_unibyte (c, rev_tbl);
2610		}
2611	      else
2612		{
2613		  c = SREF (newtext, pos_byte++);
2614		  if (buf_multibyte)
2615		    c = unibyte_char_to_multibyte (c);
2616		}
2617
2618	      if (c == '&')
2619		idx = sub;
2620	      else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2621		{
2622		  if (search_regs.start[c - '0'] >= 1)
2623		    idx = c - '0';
2624		}
2625	      else if (c == '\\')
2626		add_len = 1, add_stuff = "\\";
2627	      else
2628		{
2629		  xfree (substed);
2630		  error ("Invalid use of `\\' in replacement text");
2631		}
2632	    }
2633	  else
2634	    {
2635	      add_len = CHAR_STRING (c, str);
2636	      add_stuff = str;
2637	    }
2638
2639	  /* If we want to copy part of a previous match,
2640	     set up ADD_STUFF and ADD_LEN to point to it.  */
2641	  if (idx >= 0)
2642	    {
2643	      int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2644	      add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2645	      if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2646		move_gap (search_regs.start[idx]);
2647	      add_stuff = BYTE_POS_ADDR (begbyte);
2648	    }
2649
2650	  /* Now the stuff we want to add to SUBSTED
2651	     is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2652
2653	  /* Make sure SUBSTED is big enough.  */
2654	  if (substed_len + add_len >= substed_alloc_size)
2655	    {
2656	      substed_alloc_size = substed_len + add_len + 500;
2657	      substed = (unsigned char *) xrealloc (substed,
2658						    substed_alloc_size + 1);
2659	    }
2660
2661	  /* Now add to the end of SUBSTED.  */
2662	  if (add_stuff)
2663	    {
2664	      bcopy (add_stuff, substed + substed_len, add_len);
2665	      substed_len += add_len;
2666	    }
2667	}
2668
2669      if (really_changed)
2670	{
2671	  if (buf_multibyte)
2672	    {
2673	      int nchars = multibyte_chars_in_text (substed, substed_len);
2674
2675	      newtext = make_multibyte_string (substed, nchars, substed_len);
2676	    }
2677	  else
2678	    newtext = make_unibyte_string (substed, substed_len);
2679	}
2680      xfree (substed);
2681    }
2682
2683  /* Replace the old text with the new in the cleanest possible way.  */
2684  replace_range (search_regs.start[sub], search_regs.end[sub],
2685		 newtext, 1, 0, 1);
2686  newpoint = search_regs.start[sub] + SCHARS (newtext);
2687
2688  if (case_action == all_caps)
2689    Fupcase_region (make_number (search_regs.start[sub]),
2690		    make_number (newpoint));
2691  else if (case_action == cap_initial)
2692    Fupcase_initials_region (make_number (search_regs.start[sub]),
2693			     make_number (newpoint));
2694
2695  /* Adjust search data for this change.  */
2696  {
2697    int oldend = search_regs.end[sub];
2698    int oldstart = search_regs.start[sub];
2699    int change = newpoint - search_regs.end[sub];
2700    int i;
2701
2702    for (i = 0; i < search_regs.num_regs; i++)
2703      {
2704	if (search_regs.start[i] >= oldend)
2705	  search_regs.start[i] += change;
2706	else if (search_regs.start[i] > oldstart)
2707	  search_regs.start[i] = oldstart;
2708	if (search_regs.end[i] >= oldend)
2709	  search_regs.end[i] += change;
2710	else if (search_regs.end[i] > oldstart)
2711	  search_regs.end[i] = oldstart;
2712      }
2713  }
2714
2715  /* Put point back where it was in the text.  */
2716  if (opoint <= 0)
2717    TEMP_SET_PT (opoint + ZV);
2718  else
2719    TEMP_SET_PT (opoint);
2720
2721  /* Now move point "officially" to the start of the inserted replacement.  */
2722  move_if_not_intangible (newpoint);
2723
2724  return Qnil;
2725}
2726
2727static Lisp_Object
2728match_limit (num, beginningp)
2729     Lisp_Object num;
2730     int beginningp;
2731{
2732  register int n;
2733
2734  CHECK_NUMBER (num);
2735  n = XINT (num);
2736  if (n < 0)
2737    args_out_of_range (num, make_number (0));
2738  if (search_regs.num_regs <= 0)
2739    error ("No match data, because no search succeeded");
2740  if (n >= search_regs.num_regs
2741      || search_regs.start[n] < 0)
2742    return Qnil;
2743  return (make_number ((beginningp) ? search_regs.start[n]
2744		                    : search_regs.end[n]));
2745}
2746
2747DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2748       doc: /* Return position of start of text matched by last search.
2749SUBEXP, a number, specifies which parenthesized expression in the last
2750  regexp.
2751Value is nil if SUBEXPth pair didn't match, or there were less than
2752  SUBEXP pairs.
2753Zero means the entire text matched by the whole regexp or whole string.  */)
2754     (subexp)
2755     Lisp_Object subexp;
2756{
2757  return match_limit (subexp, 1);
2758}
2759
2760DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2761       doc: /* Return position of end of text matched by last search.
2762SUBEXP, a number, specifies which parenthesized expression in the last
2763  regexp.
2764Value is nil if SUBEXPth pair didn't match, or there were less than
2765  SUBEXP pairs.
2766Zero means the entire text matched by the whole regexp or whole string.  */)
2767     (subexp)
2768     Lisp_Object subexp;
2769{
2770  return match_limit (subexp, 0);
2771}
2772
2773DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2774       doc: /* Return a list containing all info on what the last search matched.
2775Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2776All the elements are markers or nil (nil if the Nth pair didn't match)
2777if the last match was on a buffer; integers or nil if a string was matched.
2778Use `store-match-data' to reinstate the data in this list.
2779
2780If INTEGERS (the optional first argument) is non-nil, always use
2781integers \(rather than markers) to represent buffer positions.  In
2782this case, and if the last match was in a buffer, the buffer will get
2783stored as one additional element at the end of the list.
2784
2785If REUSE is a list, reuse it as part of the value.  If REUSE is long
2786enough to hold all the values, and if INTEGERS is non-nil, no consing
2787is done.
2788
2789If optional third arg RESEAT is non-nil, any previous markers on the
2790REUSE list will be modified to point to nowhere.
2791
2792Return value is undefined if the last search failed.  */)
2793  (integers, reuse, reseat)
2794     Lisp_Object integers, reuse, reseat;
2795{
2796  Lisp_Object tail, prev;
2797  Lisp_Object *data;
2798  int i, len;
2799
2800  if (!NILP (reseat))
2801    for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2802      if (MARKERP (XCAR (tail)))
2803	{
2804	  unchain_marker (XMARKER (XCAR (tail)));
2805	  XSETCAR (tail, Qnil);
2806	}
2807
2808  if (NILP (last_thing_searched))
2809    return Qnil;
2810
2811  prev = Qnil;
2812
2813  data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2814				 * sizeof (Lisp_Object));
2815
2816  len = 0;
2817  for (i = 0; i < search_regs.num_regs; i++)
2818    {
2819      int start = search_regs.start[i];
2820      if (start >= 0)
2821	{
2822	  if (EQ (last_thing_searched, Qt)
2823	      || ! NILP (integers))
2824	    {
2825	      XSETFASTINT (data[2 * i], start);
2826	      XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2827	    }
2828	  else if (BUFFERP (last_thing_searched))
2829	    {
2830	      data[2 * i] = Fmake_marker ();
2831	      Fset_marker (data[2 * i],
2832			   make_number (start),
2833			   last_thing_searched);
2834	      data[2 * i + 1] = Fmake_marker ();
2835	      Fset_marker (data[2 * i + 1],
2836			   make_number (search_regs.end[i]),
2837			   last_thing_searched);
2838	    }
2839	  else
2840	    /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2841	    abort ();
2842
2843	  len = 2 * i + 2;
2844	}
2845      else
2846	data[2 * i] = data[2 * i + 1] = Qnil;
2847    }
2848
2849  if (BUFFERP (last_thing_searched) && !NILP (integers))
2850    {
2851      data[len] = last_thing_searched;
2852      len++;
2853    }
2854
2855  /* If REUSE is not usable, cons up the values and return them.  */
2856  if (! CONSP (reuse))
2857    return Flist (len, data);
2858
2859  /* If REUSE is a list, store as many value elements as will fit
2860     into the elements of REUSE.  */
2861  for (i = 0, tail = reuse; CONSP (tail);
2862       i++, tail = XCDR (tail))
2863    {
2864      if (i < len)
2865	XSETCAR (tail, data[i]);
2866      else
2867	XSETCAR (tail, Qnil);
2868      prev = tail;
2869    }
2870
2871  /* If we couldn't fit all value elements into REUSE,
2872     cons up the rest of them and add them to the end of REUSE.  */
2873  if (i < len)
2874    XSETCDR (prev, Flist (len - i, data + i));
2875
2876  return reuse;
2877}
2878
2879/* Internal usage only:
2880   If RESEAT is `evaporate', put the markers back on the free list
2881   immediately.  No other references to the markers must exist in this case,
2882   so it is used only internally on the unwind stack and save-match-data from
2883   Lisp.  */
2884
2885DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2886       doc: /* Set internal data on last search match from elements of LIST.
2887LIST should have been created by calling `match-data' previously.
2888
2889If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2890    (list, reseat)
2891     register Lisp_Object list, reseat;
2892{
2893  register int i;
2894  register Lisp_Object marker;
2895
2896  if (running_asynch_code)
2897    save_search_regs ();
2898
2899  CHECK_LIST (list);
2900
2901  /* Unless we find a marker with a buffer or an explicit buffer
2902     in LIST, assume that this match data came from a string.  */
2903  last_thing_searched = Qt;
2904
2905  /* Allocate registers if they don't already exist.  */
2906  {
2907    int length = XFASTINT (Flength (list)) / 2;
2908
2909    if (length > search_regs.num_regs)
2910      {
2911	if (search_regs.num_regs == 0)
2912	  {
2913	    search_regs.start
2914	      = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2915	    search_regs.end
2916	      = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2917	  }
2918	else
2919	  {
2920	    search_regs.start
2921	      = (regoff_t *) xrealloc (search_regs.start,
2922				       length * sizeof (regoff_t));
2923	    search_regs.end
2924	      = (regoff_t *) xrealloc (search_regs.end,
2925				       length * sizeof (regoff_t));
2926	  }
2927
2928	for (i = search_regs.num_regs; i < length; i++)
2929	  search_regs.start[i] = -1;
2930
2931	search_regs.num_regs = length;
2932      }
2933
2934    for (i = 0; CONSP (list); i++)
2935      {
2936	marker = XCAR (list);
2937	if (BUFFERP (marker))
2938	  {
2939	    last_thing_searched = marker;
2940	    break;
2941	  }
2942	if (i >= length)
2943	  break;
2944	if (NILP (marker))
2945	  {
2946	    search_regs.start[i] = -1;
2947	    list = XCDR (list);
2948	  }
2949	else
2950	  {
2951	    int from;
2952	    Lisp_Object m;
2953
2954	    m = marker;
2955	    if (MARKERP (marker))
2956	      {
2957		if (XMARKER (marker)->buffer == 0)
2958		  XSETFASTINT (marker, 0);
2959		else
2960		  XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2961	      }
2962
2963	    CHECK_NUMBER_COERCE_MARKER (marker);
2964	    from = XINT (marker);
2965
2966	    if (!NILP (reseat) && MARKERP (m))
2967	      {
2968		if (EQ (reseat, Qevaporate))
2969		  free_marker (m);
2970		else
2971		  unchain_marker (XMARKER (m));
2972		XSETCAR (list, Qnil);
2973	      }
2974
2975	    if ((list = XCDR (list), !CONSP (list)))
2976	      break;
2977
2978	    m = marker = XCAR (list);
2979
2980	    if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2981	      XSETFASTINT (marker, 0);
2982
2983	    CHECK_NUMBER_COERCE_MARKER (marker);
2984	    search_regs.start[i] = from;
2985	    search_regs.end[i] = XINT (marker);
2986
2987	    if (!NILP (reseat) && MARKERP (m))
2988	      {
2989		if (EQ (reseat, Qevaporate))
2990		  free_marker (m);
2991		else
2992		  unchain_marker (XMARKER (m));
2993		XSETCAR (list, Qnil);
2994	      }
2995	  }
2996	list = XCDR (list);
2997      }
2998
2999    for (; i < search_regs.num_regs; i++)
3000      search_regs.start[i] = -1;
3001  }
3002
3003  return Qnil;
3004}
3005
3006/* If non-zero the match data have been saved in saved_search_regs
3007   during the execution of a sentinel or filter. */
3008static int search_regs_saved;
3009static struct re_registers saved_search_regs;
3010static Lisp_Object saved_last_thing_searched;
3011
3012/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3013   if asynchronous code (filter or sentinel) is running. */
3014static void
3015save_search_regs ()
3016{
3017  if (!search_regs_saved)
3018    {
3019      saved_search_regs.num_regs = search_regs.num_regs;
3020      saved_search_regs.start = search_regs.start;
3021      saved_search_regs.end = search_regs.end;
3022      saved_last_thing_searched = last_thing_searched;
3023      last_thing_searched = Qnil;
3024      search_regs.num_regs = 0;
3025      search_regs.start = 0;
3026      search_regs.end = 0;
3027
3028      search_regs_saved = 1;
3029    }
3030}
3031
3032/* Called upon exit from filters and sentinels. */
3033void
3034restore_search_regs ()
3035{
3036  if (search_regs_saved)
3037    {
3038      if (search_regs.num_regs > 0)
3039	{
3040	  xfree (search_regs.start);
3041	  xfree (search_regs.end);
3042	}
3043      search_regs.num_regs = saved_search_regs.num_regs;
3044      search_regs.start = saved_search_regs.start;
3045      search_regs.end = saved_search_regs.end;
3046      last_thing_searched = saved_last_thing_searched;
3047      saved_last_thing_searched = Qnil;
3048      search_regs_saved = 0;
3049    }
3050}
3051
3052static Lisp_Object
3053unwind_set_match_data (list)
3054     Lisp_Object list;
3055{
3056  /* It is safe to free (evaporate) the markers immediately.  */
3057  return Fset_match_data (list, Qevaporate);
3058}
3059
3060/* Called to unwind protect the match data.  */
3061void
3062record_unwind_save_match_data ()
3063{
3064  record_unwind_protect (unwind_set_match_data,
3065			 Fmatch_data (Qnil, Qnil, Qnil));
3066}
3067
3068/* Quote a string to inactivate reg-expr chars */
3069
3070DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3071       doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3072     (string)
3073     Lisp_Object string;
3074{
3075  register unsigned char *in, *out, *end;
3076  register unsigned char *temp;
3077  int backslashes_added = 0;
3078
3079  CHECK_STRING (string);
3080
3081  temp = (unsigned char *) alloca (SBYTES (string) * 2);
3082
3083  /* Now copy the data into the new string, inserting escapes. */
3084
3085  in = SDATA (string);
3086  end = in + SBYTES (string);
3087  out = temp;
3088
3089  for (; in != end; in++)
3090    {
3091      if (*in == '['
3092	  || *in == '*' || *in == '.' || *in == '\\'
3093	  || *in == '?' || *in == '+'
3094	  || *in == '^' || *in == '$')
3095	*out++ = '\\', backslashes_added++;
3096      *out++ = *in;
3097    }
3098
3099  return make_specified_string (temp,
3100				SCHARS (string) + backslashes_added,
3101				out - temp,
3102				STRING_MULTIBYTE (string));
3103}
3104
3105void
3106syms_of_search ()
3107{
3108  register int i;
3109
3110  for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3111    {
3112      searchbufs[i].buf.allocated = 100;
3113      searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3114      searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3115      searchbufs[i].regexp = Qnil;
3116      searchbufs[i].whitespace_regexp = Qnil;
3117      searchbufs[i].syntax_table = Qnil;
3118      staticpro (&searchbufs[i].regexp);
3119      staticpro (&searchbufs[i].whitespace_regexp);
3120      staticpro (&searchbufs[i].syntax_table);
3121      searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3122    }
3123  searchbuf_head = &searchbufs[0];
3124
3125  Qsearch_failed = intern ("search-failed");
3126  staticpro (&Qsearch_failed);
3127  Qinvalid_regexp = intern ("invalid-regexp");
3128  staticpro (&Qinvalid_regexp);
3129
3130  Fput (Qsearch_failed, Qerror_conditions,
3131	Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3132  Fput (Qsearch_failed, Qerror_message,
3133	build_string ("Search failed"));
3134
3135  Fput (Qinvalid_regexp, Qerror_conditions,
3136	Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3137  Fput (Qinvalid_regexp, Qerror_message,
3138	build_string ("Invalid regexp"));
3139
3140  last_thing_searched = Qnil;
3141  staticpro (&last_thing_searched);
3142
3143  saved_last_thing_searched = Qnil;
3144  staticpro (&saved_last_thing_searched);
3145
3146  DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3147      doc: /* Regexp to substitute for bunches of spaces in regexp search.
3148Some commands use this for user-specified regexps.
3149Spaces that occur inside character classes or repetition operators
3150or other such regexp constructs are not replaced with this.
3151A value of nil (which is the normal value) means treat spaces literally.  */);
3152  Vsearch_spaces_regexp = Qnil;
3153
3154  defsubr (&Slooking_at);
3155  defsubr (&Sposix_looking_at);
3156  defsubr (&Sstring_match);
3157  defsubr (&Sposix_string_match);
3158  defsubr (&Ssearch_forward);
3159  defsubr (&Ssearch_backward);
3160  defsubr (&Sword_search_forward);
3161  defsubr (&Sword_search_backward);
3162  defsubr (&Sre_search_forward);
3163  defsubr (&Sre_search_backward);
3164  defsubr (&Sposix_search_forward);
3165  defsubr (&Sposix_search_backward);
3166  defsubr (&Sreplace_match);
3167  defsubr (&Smatch_beginning);
3168  defsubr (&Smatch_end);
3169  defsubr (&Smatch_data);
3170  defsubr (&Sset_match_data);
3171  defsubr (&Sregexp_quote);
3172}
3173
3174/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3175   (do not change this comment) */
3176