1/* String search routines for GNU Emacs. 2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002, 3 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. 4 5This file is part of GNU Emacs. 6 7GNU Emacs is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 2, or (at your option) 10any later version. 11 12GNU Emacs is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GNU Emacs; see the file COPYING. If not, write to 19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 20Boston, MA 02110-1301, USA. */ 21 22 23#include <config.h> 24#include "lisp.h" 25#include "syntax.h" 26#include "category.h" 27#include "buffer.h" 28#include "charset.h" 29#include "region-cache.h" 30#include "commands.h" 31#include "blockinput.h" 32#include "intervals.h" 33 34#include <sys/types.h> 35#include "regex.h" 36 37#define REGEXP_CACHE_SIZE 20 38 39/* If the regexp is non-nil, then the buffer contains the compiled form 40 of that regexp, suitable for searching. */ 41struct regexp_cache 42{ 43 struct regexp_cache *next; 44 Lisp_Object regexp, whitespace_regexp; 45 /* Syntax table for which the regexp applies. We need this because 46 of character classes. If this is t, then the compiled pattern is valid 47 for any syntax-table. */ 48 Lisp_Object syntax_table; 49 struct re_pattern_buffer buf; 50 char fastmap[0400]; 51 /* Nonzero means regexp was compiled to do full POSIX backtracking. */ 52 char posix; 53}; 54 55/* The instances of that struct. */ 56struct regexp_cache searchbufs[REGEXP_CACHE_SIZE]; 57 58/* The head of the linked list; points to the most recently used buffer. */ 59struct regexp_cache *searchbuf_head; 60 61 62/* Every call to re_match, etc., must pass &search_regs as the regs 63 argument unless you can show it is unnecessary (i.e., if re_match 64 is certainly going to be called again before region-around-match 65 can be called). 66 67 Since the registers are now dynamically allocated, we need to make 68 sure not to refer to the Nth register before checking that it has 69 been allocated by checking search_regs.num_regs. 70 71 The regex code keeps track of whether it has allocated the search 72 buffer using bits in the re_pattern_buffer. This means that whenever 73 you compile a new pattern, it completely forgets whether it has 74 allocated any registers, and will allocate new registers the next 75 time you call a searching or matching function. Therefore, we need 76 to call re_set_registers after compiling a new pattern or after 77 setting the match registers, so that the regex functions will be 78 able to free or re-allocate it properly. */ 79static struct re_registers search_regs; 80 81/* The buffer in which the last search was performed, or 82 Qt if the last search was done in a string; 83 Qnil if no searching has been done yet. */ 84static Lisp_Object last_thing_searched; 85 86/* error condition signaled when regexp compile_pattern fails */ 87 88Lisp_Object Qinvalid_regexp; 89 90/* Error condition used for failing searches */ 91Lisp_Object Qsearch_failed; 92 93Lisp_Object Vsearch_spaces_regexp; 94 95static void set_search_regs (); 96static void save_search_regs (); 97static int simple_search (); 98static int boyer_moore (); 99static int search_buffer (); 100static void matcher_overflow () NO_RETURN; 101 102static void 103matcher_overflow () 104{ 105 error ("Stack overflow in regexp matcher"); 106} 107 108/* Compile a regexp and signal a Lisp error if anything goes wrong. 109 PATTERN is the pattern to compile. 110 CP is the place to put the result. 111 TRANSLATE is a translation table for ignoring case, or nil for none. 112 REGP is the structure that says where to store the "register" 113 values that will result from matching this pattern. 114 If it is 0, we should compile the pattern not to record any 115 subexpression bounds. 116 POSIX is nonzero if we want full backtracking (POSIX style) 117 for this pattern. 0 means backtrack only enough to get a valid match. 118 MULTIBYTE is nonzero if we want to handle multibyte characters in 119 PATTERN. 0 means all multibyte characters are recognized just as 120 sequences of binary data. 121 122 The behavior also depends on Vsearch_spaces_regexp. */ 123 124static void 125compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte) 126 struct regexp_cache *cp; 127 Lisp_Object pattern; 128 Lisp_Object translate; 129 struct re_registers *regp; 130 int posix; 131 int multibyte; 132{ 133 unsigned char *raw_pattern; 134 int raw_pattern_size; 135 char *val; 136 reg_syntax_t old; 137 138 /* MULTIBYTE says whether the text to be searched is multibyte. 139 We must convert PATTERN to match that, or we will not really 140 find things right. */ 141 142 if (multibyte == STRING_MULTIBYTE (pattern)) 143 { 144 raw_pattern = (unsigned char *) SDATA (pattern); 145 raw_pattern_size = SBYTES (pattern); 146 } 147 else if (multibyte) 148 { 149 raw_pattern_size = count_size_as_multibyte (SDATA (pattern), 150 SCHARS (pattern)); 151 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1); 152 copy_text (SDATA (pattern), raw_pattern, 153 SCHARS (pattern), 0, 1); 154 } 155 else 156 { 157 /* Converting multibyte to single-byte. 158 159 ??? Perhaps this conversion should be done in a special way 160 by subtracting nonascii-insert-offset from each non-ASCII char, 161 so that only the multibyte chars which really correspond to 162 the chosen single-byte character set can possibly match. */ 163 raw_pattern_size = SCHARS (pattern); 164 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1); 165 copy_text (SDATA (pattern), raw_pattern, 166 SBYTES (pattern), 1, 0); 167 } 168 169 cp->regexp = Qnil; 170 cp->buf.translate = (! NILP (translate) ? translate : make_number (0)); 171 cp->posix = posix; 172 cp->buf.multibyte = multibyte; 173 cp->whitespace_regexp = Vsearch_spaces_regexp; 174 /* rms: I think BLOCK_INPUT is not needed here any more, 175 because regex.c defines malloc to call xmalloc. 176 Using BLOCK_INPUT here means the debugger won't run if an error occurs. 177 So let's turn it off. */ 178 /* BLOCK_INPUT; */ 179 old = re_set_syntax (RE_SYNTAX_EMACS 180 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING)); 181 182 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL 183 : SDATA (Vsearch_spaces_regexp)); 184 185 val = (char *) re_compile_pattern ((char *)raw_pattern, 186 raw_pattern_size, &cp->buf); 187 188 /* If the compiled pattern hard codes some of the contents of the 189 syntax-table, it can only be reused with *this* syntax table. */ 190 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt; 191 192 re_set_whitespace_regexp (NULL); 193 194 re_set_syntax (old); 195 /* UNBLOCK_INPUT; */ 196 if (val) 197 xsignal1 (Qinvalid_regexp, build_string (val)); 198 199 cp->regexp = Fcopy_sequence (pattern); 200} 201 202/* Shrink each compiled regexp buffer in the cache 203 to the size actually used right now. 204 This is called from garbage collection. */ 205 206void 207shrink_regexp_cache () 208{ 209 struct regexp_cache *cp; 210 211 for (cp = searchbuf_head; cp != 0; cp = cp->next) 212 { 213 cp->buf.allocated = cp->buf.used; 214 cp->buf.buffer 215 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used); 216 } 217} 218 219/* Clear the regexp cache w.r.t. a particular syntax table, 220 because it was changed. 221 There is no danger of memory leak here because re_compile_pattern 222 automagically manages the memory in each re_pattern_buffer struct, 223 based on its `allocated' and `buffer' values. */ 224void 225clear_regexp_cache () 226{ 227 int i; 228 229 for (i = 0; i < REGEXP_CACHE_SIZE; ++i) 230 /* It's tempting to compare with the syntax-table we've actually changd, 231 but it's not sufficient because char-table inheritance mewans that 232 modifying one syntax-table can change others at the same time. */ 233 if (!EQ (searchbufs[i].syntax_table, Qt)) 234 searchbufs[i].regexp = Qnil; 235} 236 237/* Compile a regexp if necessary, but first check to see if there's one in 238 the cache. 239 PATTERN is the pattern to compile. 240 TRANSLATE is a translation table for ignoring case, or nil for none. 241 REGP is the structure that says where to store the "register" 242 values that will result from matching this pattern. 243 If it is 0, we should compile the pattern not to record any 244 subexpression bounds. 245 POSIX is nonzero if we want full backtracking (POSIX style) 246 for this pattern. 0 means backtrack only enough to get a valid match. */ 247 248struct re_pattern_buffer * 249compile_pattern (pattern, regp, translate, posix, multibyte) 250 Lisp_Object pattern; 251 struct re_registers *regp; 252 Lisp_Object translate; 253 int posix, multibyte; 254{ 255 struct regexp_cache *cp, **cpp; 256 257 for (cpp = &searchbuf_head; ; cpp = &cp->next) 258 { 259 cp = *cpp; 260 /* Entries are initialized to nil, and may be set to nil by 261 compile_pattern_1 if the pattern isn't valid. Don't apply 262 string accessors in those cases. However, compile_pattern_1 263 is only applied to the cache entry we pick here to reuse. So 264 nil should never appear before a non-nil entry. */ 265 if (NILP (cp->regexp)) 266 goto compile_it; 267 if (SCHARS (cp->regexp) == SCHARS (pattern) 268 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern) 269 && !NILP (Fstring_equal (cp->regexp, pattern)) 270 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0))) 271 && cp->posix == posix 272 && cp->buf.multibyte == multibyte 273 && (EQ (cp->syntax_table, Qt) 274 || EQ (cp->syntax_table, current_buffer->syntax_table)) 275 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))) 276 break; 277 278 /* If we're at the end of the cache, compile into the nil cell 279 we found, or the last (least recently used) cell with a 280 string value. */ 281 if (cp->next == 0) 282 { 283 compile_it: 284 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte); 285 break; 286 } 287 } 288 289 /* When we get here, cp (aka *cpp) contains the compiled pattern, 290 either because we found it in the cache or because we just compiled it. 291 Move it to the front of the queue to mark it as most recently used. */ 292 *cpp = cp->next; 293 cp->next = searchbuf_head; 294 searchbuf_head = cp; 295 296 /* Advise the searching functions about the space we have allocated 297 for register data. */ 298 if (regp) 299 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end); 300 301 return &cp->buf; 302} 303 304 305static Lisp_Object 306looking_at_1 (string, posix) 307 Lisp_Object string; 308 int posix; 309{ 310 Lisp_Object val; 311 unsigned char *p1, *p2; 312 int s1, s2; 313 register int i; 314 struct re_pattern_buffer *bufp; 315 316 if (running_asynch_code) 317 save_search_regs (); 318 319 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */ 320 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2] 321 = current_buffer->case_eqv_table; 322 323 CHECK_STRING (string); 324 bufp = compile_pattern (string, &search_regs, 325 (!NILP (current_buffer->case_fold_search) 326 ? current_buffer->case_canon_table : Qnil), 327 posix, 328 !NILP (current_buffer->enable_multibyte_characters)); 329 330 immediate_quit = 1; 331 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */ 332 333 /* Get pointers and sizes of the two strings 334 that make up the visible portion of the buffer. */ 335 336 p1 = BEGV_ADDR; 337 s1 = GPT_BYTE - BEGV_BYTE; 338 p2 = GAP_END_ADDR; 339 s2 = ZV_BYTE - GPT_BYTE; 340 if (s1 < 0) 341 { 342 p2 = p1; 343 s2 = ZV_BYTE - BEGV_BYTE; 344 s1 = 0; 345 } 346 if (s2 < 0) 347 { 348 s1 = ZV_BYTE - BEGV_BYTE; 349 s2 = 0; 350 } 351 352 re_match_object = Qnil; 353 354 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2, 355 PT_BYTE - BEGV_BYTE, &search_regs, 356 ZV_BYTE - BEGV_BYTE); 357 immediate_quit = 0; 358 359 if (i == -2) 360 matcher_overflow (); 361 362 val = (0 <= i ? Qt : Qnil); 363 if (i >= 0) 364 for (i = 0; i < search_regs.num_regs; i++) 365 if (search_regs.start[i] >= 0) 366 { 367 search_regs.start[i] 368 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE); 369 search_regs.end[i] 370 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE); 371 } 372 XSETBUFFER (last_thing_searched, current_buffer); 373 return val; 374} 375 376DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0, 377 doc: /* Return t if text after point matches regular expression REGEXP. 378This function modifies the match data that `match-beginning', 379`match-end' and `match-data' access; save and restore the match 380data if you want to preserve them. */) 381 (regexp) 382 Lisp_Object regexp; 383{ 384 return looking_at_1 (regexp, 0); 385} 386 387DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0, 388 doc: /* Return t if text after point matches regular expression REGEXP. 389Find the longest match, in accord with Posix regular expression rules. 390This function modifies the match data that `match-beginning', 391`match-end' and `match-data' access; save and restore the match 392data if you want to preserve them. */) 393 (regexp) 394 Lisp_Object regexp; 395{ 396 return looking_at_1 (regexp, 1); 397} 398 399static Lisp_Object 400string_match_1 (regexp, string, start, posix) 401 Lisp_Object regexp, string, start; 402 int posix; 403{ 404 int val; 405 struct re_pattern_buffer *bufp; 406 int pos, pos_byte; 407 int i; 408 409 if (running_asynch_code) 410 save_search_regs (); 411 412 CHECK_STRING (regexp); 413 CHECK_STRING (string); 414 415 if (NILP (start)) 416 pos = 0, pos_byte = 0; 417 else 418 { 419 int len = SCHARS (string); 420 421 CHECK_NUMBER (start); 422 pos = XINT (start); 423 if (pos < 0 && -pos <= len) 424 pos = len + pos; 425 else if (0 > pos || pos > len) 426 args_out_of_range (string, start); 427 pos_byte = string_char_to_byte (string, pos); 428 } 429 430 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */ 431 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2] 432 = current_buffer->case_eqv_table; 433 434 bufp = compile_pattern (regexp, &search_regs, 435 (!NILP (current_buffer->case_fold_search) 436 ? current_buffer->case_canon_table : Qnil), 437 posix, 438 STRING_MULTIBYTE (string)); 439 immediate_quit = 1; 440 re_match_object = string; 441 442 val = re_search (bufp, (char *) SDATA (string), 443 SBYTES (string), pos_byte, 444 SBYTES (string) - pos_byte, 445 &search_regs); 446 immediate_quit = 0; 447 last_thing_searched = Qt; 448 if (val == -2) 449 matcher_overflow (); 450 if (val < 0) return Qnil; 451 452 for (i = 0; i < search_regs.num_regs; i++) 453 if (search_regs.start[i] >= 0) 454 { 455 search_regs.start[i] 456 = string_byte_to_char (string, search_regs.start[i]); 457 search_regs.end[i] 458 = string_byte_to_char (string, search_regs.end[i]); 459 } 460 461 return make_number (string_byte_to_char (string, val)); 462} 463 464DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0, 465 doc: /* Return index of start of first match for REGEXP in STRING, or nil. 466Matching ignores case if `case-fold-search' is non-nil. 467If third arg START is non-nil, start search at that index in STRING. 468For index of first char beyond the match, do (match-end 0). 469`match-end' and `match-beginning' also give indices of substrings 470matched by parenthesis constructs in the pattern. 471 472You can use the function `match-string' to extract the substrings 473matched by the parenthesis constructions in REGEXP. */) 474 (regexp, string, start) 475 Lisp_Object regexp, string, start; 476{ 477 return string_match_1 (regexp, string, start, 0); 478} 479 480DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0, 481 doc: /* Return index of start of first match for REGEXP in STRING, or nil. 482Find the longest match, in accord with Posix regular expression rules. 483Case is ignored if `case-fold-search' is non-nil in the current buffer. 484If third arg START is non-nil, start search at that index in STRING. 485For index of first char beyond the match, do (match-end 0). 486`match-end' and `match-beginning' also give indices of substrings 487matched by parenthesis constructs in the pattern. */) 488 (regexp, string, start) 489 Lisp_Object regexp, string, start; 490{ 491 return string_match_1 (regexp, string, start, 1); 492} 493 494/* Match REGEXP against STRING, searching all of STRING, 495 and return the index of the match, or negative on failure. 496 This does not clobber the match data. */ 497 498int 499fast_string_match (regexp, string) 500 Lisp_Object regexp, string; 501{ 502 int val; 503 struct re_pattern_buffer *bufp; 504 505 bufp = compile_pattern (regexp, 0, Qnil, 506 0, STRING_MULTIBYTE (string)); 507 immediate_quit = 1; 508 re_match_object = string; 509 510 val = re_search (bufp, (char *) SDATA (string), 511 SBYTES (string), 0, 512 SBYTES (string), 0); 513 immediate_quit = 0; 514 return val; 515} 516 517/* Match REGEXP against STRING, searching all of STRING ignoring case, 518 and return the index of the match, or negative on failure. 519 This does not clobber the match data. 520 We assume that STRING contains single-byte characters. */ 521 522extern Lisp_Object Vascii_downcase_table; 523 524int 525fast_c_string_match_ignore_case (regexp, string) 526 Lisp_Object regexp; 527 const char *string; 528{ 529 int val; 530 struct re_pattern_buffer *bufp; 531 int len = strlen (string); 532 533 regexp = string_make_unibyte (regexp); 534 re_match_object = Qt; 535 bufp = compile_pattern (regexp, 0, 536 Vascii_canon_table, 0, 537 0); 538 immediate_quit = 1; 539 val = re_search (bufp, string, len, 0, len, 0); 540 immediate_quit = 0; 541 return val; 542} 543 544/* Like fast_string_match but ignore case. */ 545 546int 547fast_string_match_ignore_case (regexp, string) 548 Lisp_Object regexp, string; 549{ 550 int val; 551 struct re_pattern_buffer *bufp; 552 553 bufp = compile_pattern (regexp, 0, Vascii_canon_table, 554 0, STRING_MULTIBYTE (string)); 555 immediate_quit = 1; 556 re_match_object = string; 557 558 val = re_search (bufp, (char *) SDATA (string), 559 SBYTES (string), 0, 560 SBYTES (string), 0); 561 immediate_quit = 0; 562 return val; 563} 564 565/* The newline cache: remembering which sections of text have no newlines. */ 566 567/* If the user has requested newline caching, make sure it's on. 568 Otherwise, make sure it's off. 569 This is our cheezy way of associating an action with the change of 570 state of a buffer-local variable. */ 571static void 572newline_cache_on_off (buf) 573 struct buffer *buf; 574{ 575 if (NILP (buf->cache_long_line_scans)) 576 { 577 /* It should be off. */ 578 if (buf->newline_cache) 579 { 580 free_region_cache (buf->newline_cache); 581 buf->newline_cache = 0; 582 } 583 } 584 else 585 { 586 /* It should be on. */ 587 if (buf->newline_cache == 0) 588 buf->newline_cache = new_region_cache (); 589 } 590} 591 592 593/* Search for COUNT instances of the character TARGET between START and END. 594 595 If COUNT is positive, search forwards; END must be >= START. 596 If COUNT is negative, search backwards for the -COUNTth instance; 597 END must be <= START. 598 If COUNT is zero, do anything you please; run rogue, for all I care. 599 600 If END is zero, use BEGV or ZV instead, as appropriate for the 601 direction indicated by COUNT. 602 603 If we find COUNT instances, set *SHORTAGE to zero, and return the 604 position past the COUNTth match. Note that for reverse motion 605 this is not the same as the usual convention for Emacs motion commands. 606 607 If we don't find COUNT instances before reaching END, set *SHORTAGE 608 to the number of TARGETs left unfound, and return END. 609 610 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do 611 except when inside redisplay. */ 612 613int 614scan_buffer (target, start, end, count, shortage, allow_quit) 615 register int target; 616 int start, end; 617 int count; 618 int *shortage; 619 int allow_quit; 620{ 621 struct region_cache *newline_cache; 622 int direction; 623 624 if (count > 0) 625 { 626 direction = 1; 627 if (! end) end = ZV; 628 } 629 else 630 { 631 direction = -1; 632 if (! end) end = BEGV; 633 } 634 635 newline_cache_on_off (current_buffer); 636 newline_cache = current_buffer->newline_cache; 637 638 if (shortage != 0) 639 *shortage = 0; 640 641 immediate_quit = allow_quit; 642 643 if (count > 0) 644 while (start != end) 645 { 646 /* Our innermost scanning loop is very simple; it doesn't know 647 about gaps, buffer ends, or the newline cache. ceiling is 648 the position of the last character before the next such 649 obstacle --- the last character the dumb search loop should 650 examine. */ 651 int ceiling_byte = CHAR_TO_BYTE (end) - 1; 652 int start_byte = CHAR_TO_BYTE (start); 653 int tem; 654 655 /* If we're looking for a newline, consult the newline cache 656 to see where we can avoid some scanning. */ 657 if (target == '\n' && newline_cache) 658 { 659 int next_change; 660 immediate_quit = 0; 661 while (region_cache_forward 662 (current_buffer, newline_cache, start_byte, &next_change)) 663 start_byte = next_change; 664 immediate_quit = allow_quit; 665 666 /* START should never be after END. */ 667 if (start_byte > ceiling_byte) 668 start_byte = ceiling_byte; 669 670 /* Now the text after start is an unknown region, and 671 next_change is the position of the next known region. */ 672 ceiling_byte = min (next_change - 1, ceiling_byte); 673 } 674 675 /* The dumb loop can only scan text stored in contiguous 676 bytes. BUFFER_CEILING_OF returns the last character 677 position that is contiguous, so the ceiling is the 678 position after that. */ 679 tem = BUFFER_CEILING_OF (start_byte); 680 ceiling_byte = min (tem, ceiling_byte); 681 682 { 683 /* The termination address of the dumb loop. */ 684 register unsigned char *ceiling_addr 685 = BYTE_POS_ADDR (ceiling_byte) + 1; 686 register unsigned char *cursor 687 = BYTE_POS_ADDR (start_byte); 688 unsigned char *base = cursor; 689 690 while (cursor < ceiling_addr) 691 { 692 unsigned char *scan_start = cursor; 693 694 /* The dumb loop. */ 695 while (*cursor != target && ++cursor < ceiling_addr) 696 ; 697 698 /* If we're looking for newlines, cache the fact that 699 the region from start to cursor is free of them. */ 700 if (target == '\n' && newline_cache) 701 know_region_cache (current_buffer, newline_cache, 702 start_byte + scan_start - base, 703 start_byte + cursor - base); 704 705 /* Did we find the target character? */ 706 if (cursor < ceiling_addr) 707 { 708 if (--count == 0) 709 { 710 immediate_quit = 0; 711 return BYTE_TO_CHAR (start_byte + cursor - base + 1); 712 } 713 cursor++; 714 } 715 } 716 717 start = BYTE_TO_CHAR (start_byte + cursor - base); 718 } 719 } 720 else 721 while (start > end) 722 { 723 /* The last character to check before the next obstacle. */ 724 int ceiling_byte = CHAR_TO_BYTE (end); 725 int start_byte = CHAR_TO_BYTE (start); 726 int tem; 727 728 /* Consult the newline cache, if appropriate. */ 729 if (target == '\n' && newline_cache) 730 { 731 int next_change; 732 immediate_quit = 0; 733 while (region_cache_backward 734 (current_buffer, newline_cache, start_byte, &next_change)) 735 start_byte = next_change; 736 immediate_quit = allow_quit; 737 738 /* Start should never be at or before end. */ 739 if (start_byte <= ceiling_byte) 740 start_byte = ceiling_byte + 1; 741 742 /* Now the text before start is an unknown region, and 743 next_change is the position of the next known region. */ 744 ceiling_byte = max (next_change, ceiling_byte); 745 } 746 747 /* Stop scanning before the gap. */ 748 tem = BUFFER_FLOOR_OF (start_byte - 1); 749 ceiling_byte = max (tem, ceiling_byte); 750 751 { 752 /* The termination address of the dumb loop. */ 753 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte); 754 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1); 755 unsigned char *base = cursor; 756 757 while (cursor >= ceiling_addr) 758 { 759 unsigned char *scan_start = cursor; 760 761 while (*cursor != target && --cursor >= ceiling_addr) 762 ; 763 764 /* If we're looking for newlines, cache the fact that 765 the region from after the cursor to start is free of them. */ 766 if (target == '\n' && newline_cache) 767 know_region_cache (current_buffer, newline_cache, 768 start_byte + cursor - base, 769 start_byte + scan_start - base); 770 771 /* Did we find the target character? */ 772 if (cursor >= ceiling_addr) 773 { 774 if (++count >= 0) 775 { 776 immediate_quit = 0; 777 return BYTE_TO_CHAR (start_byte + cursor - base); 778 } 779 cursor--; 780 } 781 } 782 783 start = BYTE_TO_CHAR (start_byte + cursor - base); 784 } 785 } 786 787 immediate_quit = 0; 788 if (shortage != 0) 789 *shortage = count * direction; 790 return start; 791} 792 793/* Search for COUNT instances of a line boundary, which means either a 794 newline or (if selective display enabled) a carriage return. 795 Start at START. If COUNT is negative, search backwards. 796 797 We report the resulting position by calling TEMP_SET_PT_BOTH. 798 799 If we find COUNT instances. we position after (always after, 800 even if scanning backwards) the COUNTth match, and return 0. 801 802 If we don't find COUNT instances before reaching the end of the 803 buffer (or the beginning, if scanning backwards), we return 804 the number of line boundaries left unfound, and position at 805 the limit we bumped up against. 806 807 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do 808 except in special cases. */ 809 810int 811scan_newline (start, start_byte, limit, limit_byte, count, allow_quit) 812 int start, start_byte; 813 int limit, limit_byte; 814 register int count; 815 int allow_quit; 816{ 817 int direction = ((count > 0) ? 1 : -1); 818 819 register unsigned char *cursor; 820 unsigned char *base; 821 822 register int ceiling; 823 register unsigned char *ceiling_addr; 824 825 int old_immediate_quit = immediate_quit; 826 827 /* The code that follows is like scan_buffer 828 but checks for either newline or carriage return. */ 829 830 if (allow_quit) 831 immediate_quit++; 832 833 start_byte = CHAR_TO_BYTE (start); 834 835 if (count > 0) 836 { 837 while (start_byte < limit_byte) 838 { 839 ceiling = BUFFER_CEILING_OF (start_byte); 840 ceiling = min (limit_byte - 1, ceiling); 841 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1; 842 base = (cursor = BYTE_POS_ADDR (start_byte)); 843 while (1) 844 { 845 while (*cursor != '\n' && ++cursor != ceiling_addr) 846 ; 847 848 if (cursor != ceiling_addr) 849 { 850 if (--count == 0) 851 { 852 immediate_quit = old_immediate_quit; 853 start_byte = start_byte + cursor - base + 1; 854 start = BYTE_TO_CHAR (start_byte); 855 TEMP_SET_PT_BOTH (start, start_byte); 856 return 0; 857 } 858 else 859 if (++cursor == ceiling_addr) 860 break; 861 } 862 else 863 break; 864 } 865 start_byte += cursor - base; 866 } 867 } 868 else 869 { 870 while (start_byte > limit_byte) 871 { 872 ceiling = BUFFER_FLOOR_OF (start_byte - 1); 873 ceiling = max (limit_byte, ceiling); 874 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1; 875 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1); 876 while (1) 877 { 878 while (--cursor != ceiling_addr && *cursor != '\n') 879 ; 880 881 if (cursor != ceiling_addr) 882 { 883 if (++count == 0) 884 { 885 immediate_quit = old_immediate_quit; 886 /* Return the position AFTER the match we found. */ 887 start_byte = start_byte + cursor - base + 1; 888 start = BYTE_TO_CHAR (start_byte); 889 TEMP_SET_PT_BOTH (start, start_byte); 890 return 0; 891 } 892 } 893 else 894 break; 895 } 896 /* Here we add 1 to compensate for the last decrement 897 of CURSOR, which took it past the valid range. */ 898 start_byte += cursor - base + 1; 899 } 900 } 901 902 TEMP_SET_PT_BOTH (limit, limit_byte); 903 immediate_quit = old_immediate_quit; 904 905 return count * direction; 906} 907 908int 909find_next_newline_no_quit (from, cnt) 910 register int from, cnt; 911{ 912 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0); 913} 914 915/* Like find_next_newline, but returns position before the newline, 916 not after, and only search up to TO. This isn't just 917 find_next_newline (...)-1, because you might hit TO. */ 918 919int 920find_before_next_newline (from, to, cnt) 921 int from, to, cnt; 922{ 923 int shortage; 924 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1); 925 926 if (shortage == 0) 927 pos--; 928 929 return pos; 930} 931 932/* Subroutines of Lisp buffer search functions. */ 933 934static Lisp_Object 935search_command (string, bound, noerror, count, direction, RE, posix) 936 Lisp_Object string, bound, noerror, count; 937 int direction; 938 int RE; 939 int posix; 940{ 941 register int np; 942 EMACS_INT lim, lim_byte; 943 EMACS_INT n = direction; 944 945 if (!NILP (count)) 946 { 947 CHECK_NUMBER (count); 948 n *= XINT (count); 949 } 950 951 CHECK_STRING (string); 952 if (NILP (bound)) 953 { 954 if (n > 0) 955 lim = ZV, lim_byte = ZV_BYTE; 956 else 957 lim = BEGV, lim_byte = BEGV_BYTE; 958 } 959 else 960 { 961 CHECK_NUMBER_COERCE_MARKER (bound); 962 lim = XINT (bound); 963 if (n > 0 ? lim < PT : lim > PT) 964 error ("Invalid search bound (wrong side of point)"); 965 if (lim > ZV) 966 lim = ZV, lim_byte = ZV_BYTE; 967 else if (lim < BEGV) 968 lim = BEGV, lim_byte = BEGV_BYTE; 969 else 970 lim_byte = CHAR_TO_BYTE (lim); 971 } 972 973 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */ 974 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2] 975 = current_buffer->case_eqv_table; 976 977 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE, 978 (!NILP (current_buffer->case_fold_search) 979 ? current_buffer->case_canon_table 980 : Qnil), 981 (!NILP (current_buffer->case_fold_search) 982 ? current_buffer->case_eqv_table 983 : Qnil), 984 posix); 985 if (np <= 0) 986 { 987 if (NILP (noerror)) 988 xsignal1 (Qsearch_failed, string); 989 990 if (!EQ (noerror, Qt)) 991 { 992 if (lim < BEGV || lim > ZV) 993 abort (); 994 SET_PT_BOTH (lim, lim_byte); 995 return Qnil; 996#if 0 /* This would be clean, but maybe programs depend on 997 a value of nil here. */ 998 np = lim; 999#endif 1000 } 1001 else 1002 return Qnil; 1003 } 1004 1005 if (np < BEGV || np > ZV) 1006 abort (); 1007 1008 SET_PT (np); 1009 1010 return make_number (np); 1011} 1012 1013/* Return 1 if REGEXP it matches just one constant string. */ 1014 1015static int 1016trivial_regexp_p (regexp) 1017 Lisp_Object regexp; 1018{ 1019 int len = SBYTES (regexp); 1020 unsigned char *s = SDATA (regexp); 1021 while (--len >= 0) 1022 { 1023 switch (*s++) 1024 { 1025 case '.': case '*': case '+': case '?': case '[': case '^': case '$': 1026 return 0; 1027 case '\\': 1028 if (--len < 0) 1029 return 0; 1030 switch (*s++) 1031 { 1032 case '|': case '(': case ')': case '`': case '\'': case 'b': 1033 case 'B': case '<': case '>': case 'w': case 'W': case 's': 1034 case 'S': case '=': case '{': case '}': case '_': 1035 case 'c': case 'C': /* for categoryspec and notcategoryspec */ 1036 case '1': case '2': case '3': case '4': case '5': 1037 case '6': case '7': case '8': case '9': 1038 return 0; 1039 } 1040 } 1041 } 1042 return 1; 1043} 1044 1045/* Search for the n'th occurrence of STRING in the current buffer, 1046 starting at position POS and stopping at position LIM, 1047 treating STRING as a literal string if RE is false or as 1048 a regular expression if RE is true. 1049 1050 If N is positive, searching is forward and LIM must be greater than POS. 1051 If N is negative, searching is backward and LIM must be less than POS. 1052 1053 Returns -x if x occurrences remain to be found (x > 0), 1054 or else the position at the beginning of the Nth occurrence 1055 (if searching backward) or the end (if searching forward). 1056 1057 POSIX is nonzero if we want full backtracking (POSIX style) 1058 for this pattern. 0 means backtrack only enough to get a valid match. */ 1059 1060#define TRANSLATE(out, trt, d) \ 1061do \ 1062 { \ 1063 if (! NILP (trt)) \ 1064 { \ 1065 Lisp_Object temp; \ 1066 temp = Faref (trt, make_number (d)); \ 1067 if (INTEGERP (temp)) \ 1068 out = XINT (temp); \ 1069 else \ 1070 out = d; \ 1071 } \ 1072 else \ 1073 out = d; \ 1074 } \ 1075while (0) 1076 1077static int 1078search_buffer (string, pos, pos_byte, lim, lim_byte, n, 1079 RE, trt, inverse_trt, posix) 1080 Lisp_Object string; 1081 EMACS_INT pos; 1082 EMACS_INT pos_byte; 1083 EMACS_INT lim; 1084 EMACS_INT lim_byte; 1085 EMACS_INT n; 1086 int RE; 1087 Lisp_Object trt; 1088 Lisp_Object inverse_trt; 1089 int posix; 1090{ 1091 EMACS_INT len = SCHARS (string); 1092 EMACS_INT len_byte = SBYTES (string); 1093 register int i; 1094 1095 if (running_asynch_code) 1096 save_search_regs (); 1097 1098 /* Searching 0 times means don't move. */ 1099 /* Null string is found at starting position. */ 1100 if (len == 0 || n == 0) 1101 { 1102 set_search_regs (pos_byte, 0); 1103 return pos; 1104 } 1105 1106 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp))) 1107 { 1108 unsigned char *p1, *p2; 1109 int s1, s2; 1110 struct re_pattern_buffer *bufp; 1111 1112 bufp = compile_pattern (string, &search_regs, trt, posix, 1113 !NILP (current_buffer->enable_multibyte_characters)); 1114 1115 immediate_quit = 1; /* Quit immediately if user types ^G, 1116 because letting this function finish 1117 can take too long. */ 1118 QUIT; /* Do a pending quit right away, 1119 to avoid paradoxical behavior */ 1120 /* Get pointers and sizes of the two strings 1121 that make up the visible portion of the buffer. */ 1122 1123 p1 = BEGV_ADDR; 1124 s1 = GPT_BYTE - BEGV_BYTE; 1125 p2 = GAP_END_ADDR; 1126 s2 = ZV_BYTE - GPT_BYTE; 1127 if (s1 < 0) 1128 { 1129 p2 = p1; 1130 s2 = ZV_BYTE - BEGV_BYTE; 1131 s1 = 0; 1132 } 1133 if (s2 < 0) 1134 { 1135 s1 = ZV_BYTE - BEGV_BYTE; 1136 s2 = 0; 1137 } 1138 re_match_object = Qnil; 1139 1140 while (n < 0) 1141 { 1142 int val; 1143 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, 1144 pos_byte - BEGV_BYTE, lim_byte - pos_byte, 1145 &search_regs, 1146 /* Don't allow match past current point */ 1147 pos_byte - BEGV_BYTE); 1148 if (val == -2) 1149 { 1150 matcher_overflow (); 1151 } 1152 if (val >= 0) 1153 { 1154 pos_byte = search_regs.start[0] + BEGV_BYTE; 1155 for (i = 0; i < search_regs.num_regs; i++) 1156 if (search_regs.start[i] >= 0) 1157 { 1158 search_regs.start[i] 1159 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE); 1160 search_regs.end[i] 1161 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE); 1162 } 1163 XSETBUFFER (last_thing_searched, current_buffer); 1164 /* Set pos to the new position. */ 1165 pos = search_regs.start[0]; 1166 } 1167 else 1168 { 1169 immediate_quit = 0; 1170 return (n); 1171 } 1172 n++; 1173 } 1174 while (n > 0) 1175 { 1176 int val; 1177 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, 1178 pos_byte - BEGV_BYTE, lim_byte - pos_byte, 1179 &search_regs, 1180 lim_byte - BEGV_BYTE); 1181 if (val == -2) 1182 { 1183 matcher_overflow (); 1184 } 1185 if (val >= 0) 1186 { 1187 pos_byte = search_regs.end[0] + BEGV_BYTE; 1188 for (i = 0; i < search_regs.num_regs; i++) 1189 if (search_regs.start[i] >= 0) 1190 { 1191 search_regs.start[i] 1192 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE); 1193 search_regs.end[i] 1194 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE); 1195 } 1196 XSETBUFFER (last_thing_searched, current_buffer); 1197 pos = search_regs.end[0]; 1198 } 1199 else 1200 { 1201 immediate_quit = 0; 1202 return (0 - n); 1203 } 1204 n--; 1205 } 1206 immediate_quit = 0; 1207 return (pos); 1208 } 1209 else /* non-RE case */ 1210 { 1211 unsigned char *raw_pattern, *pat; 1212 int raw_pattern_size; 1213 int raw_pattern_size_byte; 1214 unsigned char *patbuf; 1215 int multibyte = !NILP (current_buffer->enable_multibyte_characters); 1216 unsigned char *base_pat; 1217 /* Set to positive if we find a non-ASCII char that need 1218 translation. Otherwise set to zero later. */ 1219 int charset_base = -1; 1220 int boyer_moore_ok = 1; 1221 1222 /* MULTIBYTE says whether the text to be searched is multibyte. 1223 We must convert PATTERN to match that, or we will not really 1224 find things right. */ 1225 1226 if (multibyte == STRING_MULTIBYTE (string)) 1227 { 1228 raw_pattern = (unsigned char *) SDATA (string); 1229 raw_pattern_size = SCHARS (string); 1230 raw_pattern_size_byte = SBYTES (string); 1231 } 1232 else if (multibyte) 1233 { 1234 raw_pattern_size = SCHARS (string); 1235 raw_pattern_size_byte 1236 = count_size_as_multibyte (SDATA (string), 1237 raw_pattern_size); 1238 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1); 1239 copy_text (SDATA (string), raw_pattern, 1240 SCHARS (string), 0, 1); 1241 } 1242 else 1243 { 1244 /* Converting multibyte to single-byte. 1245 1246 ??? Perhaps this conversion should be done in a special way 1247 by subtracting nonascii-insert-offset from each non-ASCII char, 1248 so that only the multibyte chars which really correspond to 1249 the chosen single-byte character set can possibly match. */ 1250 raw_pattern_size = SCHARS (string); 1251 raw_pattern_size_byte = SCHARS (string); 1252 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1); 1253 copy_text (SDATA (string), raw_pattern, 1254 SBYTES (string), 1, 0); 1255 } 1256 1257 /* Copy and optionally translate the pattern. */ 1258 len = raw_pattern_size; 1259 len_byte = raw_pattern_size_byte; 1260 patbuf = (unsigned char *) alloca (len_byte); 1261 pat = patbuf; 1262 base_pat = raw_pattern; 1263 if (multibyte) 1264 { 1265 /* Fill patbuf by translated characters in STRING while 1266 checking if we can use boyer-moore search. If TRT is 1267 non-nil, we can use boyer-moore search only if TRT can be 1268 represented by the byte array of 256 elements. For that, 1269 all non-ASCII case-equivalents of all case-senstive 1270 characters in STRING must belong to the same charset and 1271 row. */ 1272 1273 while (--len >= 0) 1274 { 1275 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str; 1276 int c, translated, inverse; 1277 int in_charlen, charlen; 1278 1279 /* If we got here and the RE flag is set, it's because we're 1280 dealing with a regexp known to be trivial, so the backslash 1281 just quotes the next character. */ 1282 if (RE && *base_pat == '\\') 1283 { 1284 len--; 1285 raw_pattern_size--; 1286 len_byte--; 1287 base_pat++; 1288 } 1289 1290 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen); 1291 1292 if (NILP (trt)) 1293 { 1294 str = base_pat; 1295 charlen = in_charlen; 1296 } 1297 else 1298 { 1299 /* Translate the character. */ 1300 TRANSLATE (translated, trt, c); 1301 charlen = CHAR_STRING (translated, str_base); 1302 str = str_base; 1303 1304 /* Check if C has any other case-equivalents. */ 1305 TRANSLATE (inverse, inverse_trt, c); 1306 /* If so, check if we can use boyer-moore. */ 1307 if (c != inverse && boyer_moore_ok) 1308 { 1309 /* Check if all equivalents belong to the same 1310 charset & row. Note that the check of C 1311 itself is done by the last iteration. Note 1312 also that we don't have to check ASCII 1313 characters because boyer-moore search can 1314 always handle their translation. */ 1315 while (1) 1316 { 1317 if (ASCII_BYTE_P (inverse)) 1318 { 1319 if (charset_base > 0) 1320 { 1321 boyer_moore_ok = 0; 1322 break; 1323 } 1324 charset_base = 0; 1325 } 1326 else if (SINGLE_BYTE_CHAR_P (inverse)) 1327 { 1328 /* Boyer-moore search can't handle a 1329 translation of an eight-bit 1330 character. */ 1331 boyer_moore_ok = 0; 1332 break; 1333 } 1334 else if (charset_base < 0) 1335 charset_base = inverse & ~CHAR_FIELD3_MASK; 1336 else if ((inverse & ~CHAR_FIELD3_MASK) 1337 != charset_base) 1338 { 1339 boyer_moore_ok = 0; 1340 break; 1341 } 1342 if (c == inverse) 1343 break; 1344 TRANSLATE (inverse, inverse_trt, inverse); 1345 } 1346 } 1347 } 1348 if (charset_base < 0) 1349 charset_base = 0; 1350 1351 /* Store this character into the translated pattern. */ 1352 bcopy (str, pat, charlen); 1353 pat += charlen; 1354 base_pat += in_charlen; 1355 len_byte -= in_charlen; 1356 } 1357 } 1358 else 1359 { 1360 /* Unibyte buffer. */ 1361 charset_base = 0; 1362 while (--len >= 0) 1363 { 1364 int c, translated; 1365 1366 /* If we got here and the RE flag is set, it's because we're 1367 dealing with a regexp known to be trivial, so the backslash 1368 just quotes the next character. */ 1369 if (RE && *base_pat == '\\') 1370 { 1371 len--; 1372 raw_pattern_size--; 1373 base_pat++; 1374 } 1375 c = *base_pat++; 1376 TRANSLATE (translated, trt, c); 1377 *pat++ = translated; 1378 } 1379 } 1380 1381 len_byte = pat - patbuf; 1382 len = raw_pattern_size; 1383 pat = base_pat = patbuf; 1384 1385 if (boyer_moore_ok) 1386 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt, 1387 pos, pos_byte, lim, lim_byte, 1388 charset_base); 1389 else 1390 return simple_search (n, pat, len, len_byte, trt, 1391 pos, pos_byte, lim, lim_byte); 1392 } 1393} 1394 1395/* Do a simple string search N times for the string PAT, 1396 whose length is LEN/LEN_BYTE, 1397 from buffer position POS/POS_BYTE until LIM/LIM_BYTE. 1398 TRT is the translation table. 1399 1400 Return the character position where the match is found. 1401 Otherwise, if M matches remained to be found, return -M. 1402 1403 This kind of search works regardless of what is in PAT and 1404 regardless of what is in TRT. It is used in cases where 1405 boyer_moore cannot work. */ 1406 1407static int 1408simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) 1409 EMACS_INT n; 1410 unsigned char *pat; 1411 EMACS_INT len, len_byte; 1412 Lisp_Object trt; 1413 EMACS_INT pos, pos_byte; 1414 EMACS_INT lim, lim_byte; 1415{ 1416 int multibyte = ! NILP (current_buffer->enable_multibyte_characters); 1417 int forward = n > 0; 1418 1419 if (lim > pos && multibyte) 1420 while (n > 0) 1421 { 1422 while (1) 1423 { 1424 /* Try matching at position POS. */ 1425 EMACS_INT this_pos = pos; 1426 EMACS_INT this_pos_byte = pos_byte; 1427 EMACS_INT this_len = len; 1428 EMACS_INT this_len_byte = len_byte; 1429 unsigned char *p = pat; 1430 if (pos + len > lim) 1431 goto stop; 1432 1433 while (this_len > 0) 1434 { 1435 int charlen, buf_charlen; 1436 int pat_ch, buf_ch; 1437 1438 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); 1439 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), 1440 ZV_BYTE - this_pos_byte, 1441 buf_charlen); 1442 TRANSLATE (buf_ch, trt, buf_ch); 1443 1444 if (buf_ch != pat_ch) 1445 break; 1446 1447 this_len_byte -= charlen; 1448 this_len--; 1449 p += charlen; 1450 1451 this_pos_byte += buf_charlen; 1452 this_pos++; 1453 } 1454 1455 if (this_len == 0) 1456 { 1457 pos += len; 1458 pos_byte += len_byte; 1459 break; 1460 } 1461 1462 INC_BOTH (pos, pos_byte); 1463 } 1464 1465 n--; 1466 } 1467 else if (lim > pos) 1468 while (n > 0) 1469 { 1470 while (1) 1471 { 1472 /* Try matching at position POS. */ 1473 EMACS_INT this_pos = pos; 1474 EMACS_INT this_len = len; 1475 unsigned char *p = pat; 1476 1477 if (pos + len > lim) 1478 goto stop; 1479 1480 while (this_len > 0) 1481 { 1482 int pat_ch = *p++; 1483 int buf_ch = FETCH_BYTE (this_pos); 1484 TRANSLATE (buf_ch, trt, buf_ch); 1485 1486 if (buf_ch != pat_ch) 1487 break; 1488 1489 this_len--; 1490 this_pos++; 1491 } 1492 1493 if (this_len == 0) 1494 { 1495 pos += len; 1496 break; 1497 } 1498 1499 pos++; 1500 } 1501 1502 n--; 1503 } 1504 /* Backwards search. */ 1505 else if (lim < pos && multibyte) 1506 while (n < 0) 1507 { 1508 while (1) 1509 { 1510 /* Try matching at position POS. */ 1511 EMACS_INT this_pos = pos - len; 1512 EMACS_INT this_pos_byte = pos_byte - len_byte; 1513 EMACS_INT this_len = len; 1514 EMACS_INT this_len_byte = len_byte; 1515 unsigned char *p = pat; 1516 1517 if (this_pos < lim || this_pos_byte < lim_byte) 1518 goto stop; 1519 1520 while (this_len > 0) 1521 { 1522 int charlen, buf_charlen; 1523 int pat_ch, buf_ch; 1524 1525 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); 1526 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), 1527 ZV_BYTE - this_pos_byte, 1528 buf_charlen); 1529 TRANSLATE (buf_ch, trt, buf_ch); 1530 1531 if (buf_ch != pat_ch) 1532 break; 1533 1534 this_len_byte -= charlen; 1535 this_len--; 1536 p += charlen; 1537 this_pos_byte += buf_charlen; 1538 this_pos++; 1539 } 1540 1541 if (this_len == 0) 1542 { 1543 pos -= len; 1544 pos_byte -= len_byte; 1545 break; 1546 } 1547 1548 DEC_BOTH (pos, pos_byte); 1549 } 1550 1551 n++; 1552 } 1553 else if (lim < pos) 1554 while (n < 0) 1555 { 1556 while (1) 1557 { 1558 /* Try matching at position POS. */ 1559 EMACS_INT this_pos = pos - len; 1560 EMACS_INT this_len = len; 1561 unsigned char *p = pat; 1562 1563 if (pos - len < lim) 1564 goto stop; 1565 1566 while (this_len > 0) 1567 { 1568 int pat_ch = *p++; 1569 int buf_ch = FETCH_BYTE (this_pos); 1570 TRANSLATE (buf_ch, trt, buf_ch); 1571 1572 if (buf_ch != pat_ch) 1573 break; 1574 this_len--; 1575 this_pos++; 1576 } 1577 1578 if (this_len == 0) 1579 { 1580 pos -= len; 1581 break; 1582 } 1583 1584 pos--; 1585 } 1586 1587 n++; 1588 } 1589 1590 stop: 1591 if (n == 0) 1592 { 1593 if (forward) 1594 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte); 1595 else 1596 set_search_regs (multibyte ? pos_byte : pos, len_byte); 1597 1598 return pos; 1599 } 1600 else if (n > 0) 1601 return -n; 1602 else 1603 return n; 1604} 1605 1606/* Do Boyer-Moore search N times for the string BASE_PAT, 1607 whose length is LEN/LEN_BYTE, 1608 from buffer position POS/POS_BYTE until LIM/LIM_BYTE. 1609 DIRECTION says which direction we search in. 1610 TRT and INVERSE_TRT are translation tables. 1611 Characters in PAT are already translated by TRT. 1612 1613 This kind of search works if all the characters in BASE_PAT that 1614 have nontrivial translation are the same aside from the last byte. 1615 This makes it possible to translate just the last byte of a 1616 character, and do so after just a simple test of the context. 1617 CHARSET_BASE is nonzero iff there is such a non-ASCII character. 1618 1619 If that criterion is not satisfied, do not call this function. */ 1620 1621static int 1622boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, 1623 pos, pos_byte, lim, lim_byte, charset_base) 1624 EMACS_INT n; 1625 unsigned char *base_pat; 1626 EMACS_INT len, len_byte; 1627 Lisp_Object trt; 1628 Lisp_Object inverse_trt; 1629 EMACS_INT pos, pos_byte; 1630 EMACS_INT lim, lim_byte; 1631 int charset_base; 1632{ 1633 EMACS_INT direction = ((n > 0) ? 1 : -1); 1634 EMACS_INT dirlen; 1635 EMACS_INT infinity, limit, stride_for_teases = 0; 1636 register unsigned char *cursor, *p_limit; 1637 EMACS_INT i, j; 1638 unsigned char *pat, *pat_end; 1639 int multibyte = ! NILP (current_buffer->enable_multibyte_characters); 1640 1641 unsigned char simple_translate[0400]; 1642 /* These are set to the preceding bytes of a byte to be translated 1643 if charset_base is nonzero. As the maximum byte length of a 1644 multibyte character is 4, we have to check at most three previous 1645 bytes. */ 1646 int translate_prev_byte1 = 0; 1647 int translate_prev_byte2 = 0; 1648 int translate_prev_byte3 = 0; 1649 1650 EMACS_INT BM_tab[0400]; 1651 1652 /* The general approach is that we are going to maintain that we know */ 1653 /* the first (closest to the present position, in whatever direction */ 1654 /* we're searching) character that could possibly be the last */ 1655 /* (furthest from present position) character of a valid match. We */ 1656 /* advance the state of our knowledge by looking at that character */ 1657 /* and seeing whether it indeed matches the last character of the */ 1658 /* pattern. If it does, we take a closer look. If it does not, we */ 1659 /* move our pointer (to putative last characters) as far as is */ 1660 /* logically possible. This amount of movement, which I call a */ 1661 /* stride, will be the length of the pattern if the actual character */ 1662 /* appears nowhere in the pattern, otherwise it will be the distance */ 1663 /* from the last occurrence of that character to the end of the */ 1664 /* pattern. */ 1665 /* As a coding trick, an enormous stride is coded into the table for */ 1666 /* characters that match the last character. This allows use of only */ 1667 /* a single test, a test for having gone past the end of the */ 1668 /* permissible match region, to test for both possible matches (when */ 1669 /* the stride goes past the end immediately) and failure to */ 1670 /* match (where you get nudged past the end one stride at a time). */ 1671 1672 /* Here we make a "mickey mouse" BM table. The stride of the search */ 1673 /* is determined only by the last character of the putative match. */ 1674 /* If that character does not match, we will stride the proper */ 1675 /* distance to propose a match that superimposes it on the last */ 1676 /* instance of a character that matches it (per trt), or misses */ 1677 /* it entirely if there is none. */ 1678 1679 dirlen = len_byte * direction; 1680 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction; 1681 1682 /* Record position after the end of the pattern. */ 1683 pat_end = base_pat + len_byte; 1684 /* BASE_PAT points to a character that we start scanning from. 1685 It is the first character in a forward search, 1686 the last character in a backward search. */ 1687 if (direction < 0) 1688 base_pat = pat_end - 1; 1689 1690 for (i = 0; i < 0400; i++) { 1691 BM_tab[i] = dirlen; 1692 } 1693 1694 /* We use this for translation, instead of TRT itself. 1695 We fill this in to handle the characters that actually 1696 occur in the pattern. Others don't matter anyway! */ 1697 for (i = 0; i < 0400; i++) 1698 simple_translate[i] = i; 1699 1700 if (charset_base) 1701 { 1702 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a 1703 byte following them are the target of translation. */ 1704 int sample_char = charset_base | 0x20; 1705 unsigned char str[MAX_MULTIBYTE_LENGTH]; 1706 int len = CHAR_STRING (sample_char, str); 1707 1708 translate_prev_byte1 = str[len - 2]; 1709 if (len > 2) 1710 { 1711 translate_prev_byte2 = str[len - 3]; 1712 if (len > 3) 1713 translate_prev_byte3 = str[len - 4]; 1714 } 1715 } 1716 1717 i = 0; 1718 while (i != infinity) 1719 { 1720 unsigned char *ptr = base_pat + i; 1721 i += direction; 1722 if (i == dirlen) 1723 i = infinity; 1724 if (! NILP (trt)) 1725 { 1726 /* If the byte currently looking at is the last of a 1727 character to check case-equivalents, set CH to that 1728 character. An ASCII character and a non-ASCII character 1729 matching with CHARSET_BASE are to be checked. */ 1730 int ch = -1; 1731 1732 if (ASCII_BYTE_P (*ptr) || ! multibyte) 1733 ch = *ptr; 1734 else if (charset_base 1735 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1]))) 1736 { 1737 unsigned char *charstart = ptr - 1; 1738 1739 while (! (CHAR_HEAD_P (*charstart))) 1740 charstart--; 1741 ch = STRING_CHAR (charstart, ptr - charstart + 1); 1742 if (charset_base != (ch & ~CHAR_FIELD3_MASK)) 1743 ch = -1; 1744 } 1745 1746 if (ch >= 0400) 1747 j = ((unsigned char) ch) | 0200; 1748 else 1749 j = *ptr; 1750 1751 if (i == infinity) 1752 stride_for_teases = BM_tab[j]; 1753 1754 BM_tab[j] = dirlen - i; 1755 /* A translation table is accompanied by its inverse -- see */ 1756 /* comment following downcase_table for details */ 1757 if (ch >= 0) 1758 { 1759 int starting_ch = ch; 1760 int starting_j = j; 1761 1762 while (1) 1763 { 1764 TRANSLATE (ch, inverse_trt, ch); 1765 if (ch >= 0400) 1766 j = ((unsigned char) ch) | 0200; 1767 else 1768 j = (unsigned char) ch; 1769 1770 /* For all the characters that map into CH, 1771 set up simple_translate to map the last byte 1772 into STARTING_J. */ 1773 simple_translate[j] = starting_j; 1774 if (ch == starting_ch) 1775 break; 1776 BM_tab[j] = dirlen - i; 1777 } 1778 } 1779 } 1780 else 1781 { 1782 j = *ptr; 1783 1784 if (i == infinity) 1785 stride_for_teases = BM_tab[j]; 1786 BM_tab[j] = dirlen - i; 1787 } 1788 /* stride_for_teases tells how much to stride if we get a */ 1789 /* match on the far character but are subsequently */ 1790 /* disappointed, by recording what the stride would have been */ 1791 /* for that character if the last character had been */ 1792 /* different. */ 1793 } 1794 infinity = dirlen - infinity; 1795 pos_byte += dirlen - ((direction > 0) ? direction : 0); 1796 /* loop invariant - POS_BYTE points at where last char (first 1797 char if reverse) of pattern would align in a possible match. */ 1798 while (n != 0) 1799 { 1800 EMACS_INT tail_end; 1801 unsigned char *tail_end_ptr; 1802 1803 /* It's been reported that some (broken) compiler thinks that 1804 Boolean expressions in an arithmetic context are unsigned. 1805 Using an explicit ?1:0 prevents this. */ 1806 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction 1807 < 0) 1808 return (n * (0 - direction)); 1809 /* First we do the part we can by pointers (maybe nothing) */ 1810 QUIT; 1811 pat = base_pat; 1812 limit = pos_byte - dirlen + direction; 1813 if (direction > 0) 1814 { 1815 limit = BUFFER_CEILING_OF (limit); 1816 /* LIMIT is now the last (not beyond-last!) value POS_BYTE 1817 can take on without hitting edge of buffer or the gap. */ 1818 limit = min (limit, pos_byte + 20000); 1819 limit = min (limit, lim_byte - 1); 1820 } 1821 else 1822 { 1823 limit = BUFFER_FLOOR_OF (limit); 1824 /* LIMIT is now the last (not beyond-last!) value POS_BYTE 1825 can take on without hitting edge of buffer or the gap. */ 1826 limit = max (limit, pos_byte - 20000); 1827 limit = max (limit, lim_byte); 1828 } 1829 tail_end = BUFFER_CEILING_OF (pos_byte) + 1; 1830 tail_end_ptr = BYTE_POS_ADDR (tail_end); 1831 1832 if ((limit - pos_byte) * direction > 20) 1833 { 1834 unsigned char *p2; 1835 1836 p_limit = BYTE_POS_ADDR (limit); 1837 p2 = (cursor = BYTE_POS_ADDR (pos_byte)); 1838 /* In this loop, pos + cursor - p2 is the surrogate for pos */ 1839 while (1) /* use one cursor setting as long as i can */ 1840 { 1841 if (direction > 0) /* worth duplicating */ 1842 { 1843 /* Use signed comparison if appropriate 1844 to make cursor+infinity sure to be > p_limit. 1845 Assuming that the buffer lies in a range of addresses 1846 that are all "positive" (as ints) or all "negative", 1847 either kind of comparison will work as long 1848 as we don't step by infinity. So pick the kind 1849 that works when we do step by infinity. */ 1850 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit) 1851 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit) 1852 cursor += BM_tab[*cursor]; 1853 else 1854 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit) 1855 cursor += BM_tab[*cursor]; 1856 } 1857 else 1858 { 1859 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit) 1860 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit) 1861 cursor += BM_tab[*cursor]; 1862 else 1863 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit) 1864 cursor += BM_tab[*cursor]; 1865 } 1866/* If you are here, cursor is beyond the end of the searched region. */ 1867/* This can happen if you match on the far character of the pattern, */ 1868/* because the "stride" of that character is infinity, a number able */ 1869/* to throw you well beyond the end of the search. It can also */ 1870/* happen if you fail to match within the permitted region and would */ 1871/* otherwise try a character beyond that region */ 1872 if ((cursor - p_limit) * direction <= len_byte) 1873 break; /* a small overrun is genuine */ 1874 cursor -= infinity; /* large overrun = hit */ 1875 i = dirlen - direction; 1876 if (! NILP (trt)) 1877 { 1878 while ((i -= direction) + direction != 0) 1879 { 1880 int ch; 1881 cursor -= direction; 1882 /* Translate only the last byte of a character. */ 1883 if (! multibyte 1884 || ((cursor == tail_end_ptr 1885 || CHAR_HEAD_P (cursor[1])) 1886 && (CHAR_HEAD_P (cursor[0]) 1887 /* Check if this is the last byte of 1888 a translable character. */ 1889 || (translate_prev_byte1 == cursor[-1] 1890 && (CHAR_HEAD_P (translate_prev_byte1) 1891 || (translate_prev_byte2 == cursor[-2] 1892 && (CHAR_HEAD_P (translate_prev_byte2) 1893 || (translate_prev_byte3 == cursor[-3])))))))) 1894 ch = simple_translate[*cursor]; 1895 else 1896 ch = *cursor; 1897 if (pat[i] != ch) 1898 break; 1899 } 1900 } 1901 else 1902 { 1903 while ((i -= direction) + direction != 0) 1904 { 1905 cursor -= direction; 1906 if (pat[i] != *cursor) 1907 break; 1908 } 1909 } 1910 cursor += dirlen - i - direction; /* fix cursor */ 1911 if (i + direction == 0) 1912 { 1913 EMACS_INT position; 1914 1915 cursor -= direction; 1916 1917 position = pos_byte + cursor - p2 + ((direction > 0) 1918 ? 1 - len_byte : 0); 1919 set_search_regs (position, len_byte); 1920 1921 if ((n -= direction) != 0) 1922 cursor += dirlen; /* to resume search */ 1923 else 1924 return ((direction > 0) 1925 ? search_regs.end[0] : search_regs.start[0]); 1926 } 1927 else 1928 cursor += stride_for_teases; /* <sigh> we lose - */ 1929 } 1930 pos_byte += cursor - p2; 1931 } 1932 else 1933 /* Now we'll pick up a clump that has to be done the hard */ 1934 /* way because it covers a discontinuity */ 1935 { 1936 limit = ((direction > 0) 1937 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1) 1938 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1)); 1939 limit = ((direction > 0) 1940 ? min (limit + len_byte, lim_byte - 1) 1941 : max (limit - len_byte, lim_byte)); 1942 /* LIMIT is now the last value POS_BYTE can have 1943 and still be valid for a possible match. */ 1944 while (1) 1945 { 1946 /* This loop can be coded for space rather than */ 1947 /* speed because it will usually run only once. */ 1948 /* (the reach is at most len + 21, and typically */ 1949 /* does not exceed len) */ 1950 while ((limit - pos_byte) * direction >= 0) 1951 pos_byte += BM_tab[FETCH_BYTE (pos_byte)]; 1952 /* now run the same tests to distinguish going off the */ 1953 /* end, a match or a phony match. */ 1954 if ((pos_byte - limit) * direction <= len_byte) 1955 break; /* ran off the end */ 1956 /* Found what might be a match. 1957 Set POS_BYTE back to last (first if reverse) pos. */ 1958 pos_byte -= infinity; 1959 i = dirlen - direction; 1960 while ((i -= direction) + direction != 0) 1961 { 1962 int ch; 1963 unsigned char *ptr; 1964 pos_byte -= direction; 1965 ptr = BYTE_POS_ADDR (pos_byte); 1966 /* Translate only the last byte of a character. */ 1967 if (! multibyte 1968 || ((ptr == tail_end_ptr 1969 || CHAR_HEAD_P (ptr[1])) 1970 && (CHAR_HEAD_P (ptr[0]) 1971 /* Check if this is the last byte of a 1972 translable character. */ 1973 || (translate_prev_byte1 == ptr[-1] 1974 && (CHAR_HEAD_P (translate_prev_byte1) 1975 || (translate_prev_byte2 == ptr[-2] 1976 && (CHAR_HEAD_P (translate_prev_byte2) 1977 || translate_prev_byte3 == ptr[-3]))))))) 1978 ch = simple_translate[*ptr]; 1979 else 1980 ch = *ptr; 1981 if (pat[i] != ch) 1982 break; 1983 } 1984 /* Above loop has moved POS_BYTE part or all the way 1985 back to the first pos (last pos if reverse). 1986 Set it once again at the last (first if reverse) char. */ 1987 pos_byte += dirlen - i- direction; 1988 if (i + direction == 0) 1989 { 1990 EMACS_INT position; 1991 pos_byte -= direction; 1992 1993 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0); 1994 1995 set_search_regs (position, len_byte); 1996 1997 if ((n -= direction) != 0) 1998 pos_byte += dirlen; /* to resume search */ 1999 else 2000 return ((direction > 0) 2001 ? search_regs.end[0] : search_regs.start[0]); 2002 } 2003 else 2004 pos_byte += stride_for_teases; 2005 } 2006 } 2007 /* We have done one clump. Can we continue? */ 2008 if ((lim_byte - pos_byte) * direction < 0) 2009 return ((0 - n) * direction); 2010 } 2011 return BYTE_TO_CHAR (pos_byte); 2012} 2013 2014/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES 2015 for the overall match just found in the current buffer. 2016 Also clear out the match data for registers 1 and up. */ 2017 2018static void 2019set_search_regs (beg_byte, nbytes) 2020 int beg_byte, nbytes; 2021{ 2022 int i; 2023 2024 /* Make sure we have registers in which to store 2025 the match position. */ 2026 if (search_regs.num_regs == 0) 2027 { 2028 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t)); 2029 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t)); 2030 search_regs.num_regs = 2; 2031 } 2032 2033 /* Clear out the other registers. */ 2034 for (i = 1; i < search_regs.num_regs; i++) 2035 { 2036 search_regs.start[i] = -1; 2037 search_regs.end[i] = -1; 2038 } 2039 2040 search_regs.start[0] = BYTE_TO_CHAR (beg_byte); 2041 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes); 2042 XSETBUFFER (last_thing_searched, current_buffer); 2043} 2044 2045/* Given a string of words separated by word delimiters, 2046 compute a regexp that matches those exact words 2047 separated by arbitrary punctuation. */ 2048 2049static Lisp_Object 2050wordify (string) 2051 Lisp_Object string; 2052{ 2053 register unsigned char *p, *o; 2054 register int i, i_byte, len, punct_count = 0, word_count = 0; 2055 Lisp_Object val; 2056 int prev_c = 0; 2057 int adjust; 2058 2059 CHECK_STRING (string); 2060 p = SDATA (string); 2061 len = SCHARS (string); 2062 2063 for (i = 0, i_byte = 0; i < len; ) 2064 { 2065 int c; 2066 2067 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); 2068 2069 if (SYNTAX (c) != Sword) 2070 { 2071 punct_count++; 2072 if (i > 0 && SYNTAX (prev_c) == Sword) 2073 word_count++; 2074 } 2075 2076 prev_c = c; 2077 } 2078 2079 if (SYNTAX (prev_c) == Sword) 2080 word_count++; 2081 if (!word_count) 2082 return empty_string; 2083 2084 adjust = - punct_count + 5 * (word_count - 1) + 4; 2085 if (STRING_MULTIBYTE (string)) 2086 val = make_uninit_multibyte_string (len + adjust, 2087 SBYTES (string) 2088 + adjust); 2089 else 2090 val = make_uninit_string (len + adjust); 2091 2092 o = SDATA (val); 2093 *o++ = '\\'; 2094 *o++ = 'b'; 2095 prev_c = 0; 2096 2097 for (i = 0, i_byte = 0; i < len; ) 2098 { 2099 int c; 2100 int i_byte_orig = i_byte; 2101 2102 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); 2103 2104 if (SYNTAX (c) == Sword) 2105 { 2106 bcopy (SDATA (string) + i_byte_orig, o, 2107 i_byte - i_byte_orig); 2108 o += i_byte - i_byte_orig; 2109 } 2110 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count) 2111 { 2112 *o++ = '\\'; 2113 *o++ = 'W'; 2114 *o++ = '\\'; 2115 *o++ = 'W'; 2116 *o++ = '*'; 2117 } 2118 2119 prev_c = c; 2120 } 2121 2122 *o++ = '\\'; 2123 *o++ = 'b'; 2124 2125 return val; 2126} 2127 2128DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4, 2129 "MSearch backward: ", 2130 doc: /* Search backward from point for STRING. 2131Set point to the beginning of the occurrence found, and return point. 2132An optional second argument bounds the search; it is a buffer position. 2133The match found must not extend before that position. 2134Optional third argument, if t, means if fail just return nil (no error). 2135 If not nil and not t, position at limit of search and return nil. 2136Optional fourth argument is repeat count--search for successive occurrences. 2137 2138Search case-sensitivity is determined by the value of the variable 2139`case-fold-search', which see. 2140 2141See also the functions `match-beginning', `match-end' and `replace-match'. */) 2142 (string, bound, noerror, count) 2143 Lisp_Object string, bound, noerror, count; 2144{ 2145 return search_command (string, bound, noerror, count, -1, 0, 0); 2146} 2147 2148DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ", 2149 doc: /* Search forward from point for STRING. 2150Set point to the end of the occurrence found, and return point. 2151An optional second argument bounds the search; it is a buffer position. 2152The match found must not extend after that position. A value of nil is 2153 equivalent to (point-max). 2154Optional third argument, if t, means if fail just return nil (no error). 2155 If not nil and not t, move to limit of search and return nil. 2156Optional fourth argument is repeat count--search for successive occurrences. 2157 2158Search case-sensitivity is determined by the value of the variable 2159`case-fold-search', which see. 2160 2161See also the functions `match-beginning', `match-end' and `replace-match'. */) 2162 (string, bound, noerror, count) 2163 Lisp_Object string, bound, noerror, count; 2164{ 2165 return search_command (string, bound, noerror, count, 1, 0, 0); 2166} 2167 2168DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4, 2169 "sWord search backward: ", 2170 doc: /* Search backward from point for STRING, ignoring differences in punctuation. 2171Set point to the beginning of the occurrence found, and return point. 2172An optional second argument bounds the search; it is a buffer position. 2173The match found must not extend before that position. 2174Optional third argument, if t, means if fail just return nil (no error). 2175 If not nil and not t, move to limit of search and return nil. 2176Optional fourth argument is repeat count--search for successive occurrences. */) 2177 (string, bound, noerror, count) 2178 Lisp_Object string, bound, noerror, count; 2179{ 2180 return search_command (wordify (string), bound, noerror, count, -1, 1, 0); 2181} 2182 2183DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4, 2184 "sWord search: ", 2185 doc: /* Search forward from point for STRING, ignoring differences in punctuation. 2186Set point to the end of the occurrence found, and return point. 2187An optional second argument bounds the search; it is a buffer position. 2188The match found must not extend after that position. 2189Optional third argument, if t, means if fail just return nil (no error). 2190 If not nil and not t, move to limit of search and return nil. 2191Optional fourth argument is repeat count--search for successive occurrences. */) 2192 (string, bound, noerror, count) 2193 Lisp_Object string, bound, noerror, count; 2194{ 2195 return search_command (wordify (string), bound, noerror, count, 1, 1, 0); 2196} 2197 2198DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4, 2199 "sRE search backward: ", 2200 doc: /* Search backward from point for match for regular expression REGEXP. 2201Set point to the beginning of the match, and return point. 2202The match found is the one starting last in the buffer 2203and yet ending before the origin of the search. 2204An optional second argument bounds the search; it is a buffer position. 2205The match found must start at or after that position. 2206Optional third argument, if t, means if fail just return nil (no error). 2207 If not nil and not t, move to limit of search and return nil. 2208Optional fourth argument is repeat count--search for successive occurrences. 2209See also the functions `match-beginning', `match-end', `match-string', 2210and `replace-match'. */) 2211 (regexp, bound, noerror, count) 2212 Lisp_Object regexp, bound, noerror, count; 2213{ 2214 return search_command (regexp, bound, noerror, count, -1, 1, 0); 2215} 2216 2217DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4, 2218 "sRE search: ", 2219 doc: /* Search forward from point for regular expression REGEXP. 2220Set point to the end of the occurrence found, and return point. 2221An optional second argument bounds the search; it is a buffer position. 2222The match found must not extend after that position. 2223Optional third argument, if t, means if fail just return nil (no error). 2224 If not nil and not t, move to limit of search and return nil. 2225Optional fourth argument is repeat count--search for successive occurrences. 2226See also the functions `match-beginning', `match-end', `match-string', 2227and `replace-match'. */) 2228 (regexp, bound, noerror, count) 2229 Lisp_Object regexp, bound, noerror, count; 2230{ 2231 return search_command (regexp, bound, noerror, count, 1, 1, 0); 2232} 2233 2234DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4, 2235 "sPosix search backward: ", 2236 doc: /* Search backward from point for match for regular expression REGEXP. 2237Find the longest match in accord with Posix regular expression rules. 2238Set point to the beginning of the match, and return point. 2239The match found is the one starting last in the buffer 2240and yet ending before the origin of the search. 2241An optional second argument bounds the search; it is a buffer position. 2242The match found must start at or after that position. 2243Optional third argument, if t, means if fail just return nil (no error). 2244 If not nil and not t, move to limit of search and return nil. 2245Optional fourth argument is repeat count--search for successive occurrences. 2246See also the functions `match-beginning', `match-end', `match-string', 2247and `replace-match'. */) 2248 (regexp, bound, noerror, count) 2249 Lisp_Object regexp, bound, noerror, count; 2250{ 2251 return search_command (regexp, bound, noerror, count, -1, 1, 1); 2252} 2253 2254DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4, 2255 "sPosix search: ", 2256 doc: /* Search forward from point for regular expression REGEXP. 2257Find the longest match in accord with Posix regular expression rules. 2258Set point to the end of the occurrence found, and return point. 2259An optional second argument bounds the search; it is a buffer position. 2260The match found must not extend after that position. 2261Optional third argument, if t, means if fail just return nil (no error). 2262 If not nil and not t, move to limit of search and return nil. 2263Optional fourth argument is repeat count--search for successive occurrences. 2264See also the functions `match-beginning', `match-end', `match-string', 2265and `replace-match'. */) 2266 (regexp, bound, noerror, count) 2267 Lisp_Object regexp, bound, noerror, count; 2268{ 2269 return search_command (regexp, bound, noerror, count, 1, 1, 1); 2270} 2271 2272DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0, 2273 doc: /* Replace text matched by last search with NEWTEXT. 2274Leave point at the end of the replacement text. 2275 2276If second arg FIXEDCASE is non-nil, do not alter case of replacement text. 2277Otherwise maybe capitalize the whole text, or maybe just word initials, 2278based on the replaced text. 2279If the replaced text has only capital letters 2280and has at least one multiletter word, convert NEWTEXT to all caps. 2281Otherwise if all words are capitalized in the replaced text, 2282capitalize each word in NEWTEXT. 2283 2284If third arg LITERAL is non-nil, insert NEWTEXT literally. 2285Otherwise treat `\\' as special: 2286 `\\&' in NEWTEXT means substitute original matched text. 2287 `\\N' means substitute what matched the Nth `\\(...\\)'. 2288 If Nth parens didn't match, substitute nothing. 2289 `\\\\' means insert one `\\'. 2290Case conversion does not apply to these substitutions. 2291 2292FIXEDCASE and LITERAL are optional arguments. 2293 2294The optional fourth argument STRING can be a string to modify. 2295This is meaningful when the previous match was done against STRING, 2296using `string-match'. When used this way, `replace-match' 2297creates and returns a new string made by copying STRING and replacing 2298the part of STRING that was matched. 2299 2300The optional fifth argument SUBEXP specifies a subexpression; 2301it says to replace just that subexpression with NEWTEXT, 2302rather than replacing the entire matched text. 2303This is, in a vague sense, the inverse of using `\\N' in NEWTEXT; 2304`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts 2305NEWTEXT in place of subexp N. 2306This is useful only after a regular expression search or match, 2307since only regular expressions have distinguished subexpressions. */) 2308 (newtext, fixedcase, literal, string, subexp) 2309 Lisp_Object newtext, fixedcase, literal, string, subexp; 2310{ 2311 enum { nochange, all_caps, cap_initial } case_action; 2312 register int pos, pos_byte; 2313 int some_multiletter_word; 2314 int some_lowercase; 2315 int some_uppercase; 2316 int some_nonuppercase_initial; 2317 register int c, prevc; 2318 int sub; 2319 int opoint, newpoint; 2320 2321 CHECK_STRING (newtext); 2322 2323 if (! NILP (string)) 2324 CHECK_STRING (string); 2325 2326 case_action = nochange; /* We tried an initialization */ 2327 /* but some C compilers blew it */ 2328 2329 if (search_regs.num_regs <= 0) 2330 error ("`replace-match' called before any match found"); 2331 2332 if (NILP (subexp)) 2333 sub = 0; 2334 else 2335 { 2336 CHECK_NUMBER (subexp); 2337 sub = XINT (subexp); 2338 if (sub < 0 || sub >= search_regs.num_regs) 2339 args_out_of_range (subexp, make_number (search_regs.num_regs)); 2340 } 2341 2342 if (NILP (string)) 2343 { 2344 if (search_regs.start[sub] < BEGV 2345 || search_regs.start[sub] > search_regs.end[sub] 2346 || search_regs.end[sub] > ZV) 2347 args_out_of_range (make_number (search_regs.start[sub]), 2348 make_number (search_regs.end[sub])); 2349 } 2350 else 2351 { 2352 if (search_regs.start[sub] < 0 2353 || search_regs.start[sub] > search_regs.end[sub] 2354 || search_regs.end[sub] > SCHARS (string)) 2355 args_out_of_range (make_number (search_regs.start[sub]), 2356 make_number (search_regs.end[sub])); 2357 } 2358 2359 if (NILP (fixedcase)) 2360 { 2361 /* Decide how to casify by examining the matched text. */ 2362 int last; 2363 2364 pos = search_regs.start[sub]; 2365 last = search_regs.end[sub]; 2366 2367 if (NILP (string)) 2368 pos_byte = CHAR_TO_BYTE (pos); 2369 else 2370 pos_byte = string_char_to_byte (string, pos); 2371 2372 prevc = '\n'; 2373 case_action = all_caps; 2374 2375 /* some_multiletter_word is set nonzero if any original word 2376 is more than one letter long. */ 2377 some_multiletter_word = 0; 2378 some_lowercase = 0; 2379 some_nonuppercase_initial = 0; 2380 some_uppercase = 0; 2381 2382 while (pos < last) 2383 { 2384 if (NILP (string)) 2385 { 2386 c = FETCH_CHAR (pos_byte); 2387 INC_BOTH (pos, pos_byte); 2388 } 2389 else 2390 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte); 2391 2392 if (LOWERCASEP (c)) 2393 { 2394 /* Cannot be all caps if any original char is lower case */ 2395 2396 some_lowercase = 1; 2397 if (SYNTAX (prevc) != Sword) 2398 some_nonuppercase_initial = 1; 2399 else 2400 some_multiletter_word = 1; 2401 } 2402 else if (UPPERCASEP (c)) 2403 { 2404 some_uppercase = 1; 2405 if (SYNTAX (prevc) != Sword) 2406 ; 2407 else 2408 some_multiletter_word = 1; 2409 } 2410 else 2411 { 2412 /* If the initial is a caseless word constituent, 2413 treat that like a lowercase initial. */ 2414 if (SYNTAX (prevc) != Sword) 2415 some_nonuppercase_initial = 1; 2416 } 2417 2418 prevc = c; 2419 } 2420 2421 /* Convert to all caps if the old text is all caps 2422 and has at least one multiletter word. */ 2423 if (! some_lowercase && some_multiletter_word) 2424 case_action = all_caps; 2425 /* Capitalize each word, if the old text has all capitalized words. */ 2426 else if (!some_nonuppercase_initial && some_multiletter_word) 2427 case_action = cap_initial; 2428 else if (!some_nonuppercase_initial && some_uppercase) 2429 /* Should x -> yz, operating on X, give Yz or YZ? 2430 We'll assume the latter. */ 2431 case_action = all_caps; 2432 else 2433 case_action = nochange; 2434 } 2435 2436 /* Do replacement in a string. */ 2437 if (!NILP (string)) 2438 { 2439 Lisp_Object before, after; 2440 2441 before = Fsubstring (string, make_number (0), 2442 make_number (search_regs.start[sub])); 2443 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil); 2444 2445 /* Substitute parts of the match into NEWTEXT 2446 if desired. */ 2447 if (NILP (literal)) 2448 { 2449 int lastpos = 0; 2450 int lastpos_byte = 0; 2451 /* We build up the substituted string in ACCUM. */ 2452 Lisp_Object accum; 2453 Lisp_Object middle; 2454 int length = SBYTES (newtext); 2455 2456 accum = Qnil; 2457 2458 for (pos_byte = 0, pos = 0; pos_byte < length;) 2459 { 2460 int substart = -1; 2461 int subend = 0; 2462 int delbackslash = 0; 2463 2464 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte); 2465 2466 if (c == '\\') 2467 { 2468 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte); 2469 2470 if (c == '&') 2471 { 2472 substart = search_regs.start[sub]; 2473 subend = search_regs.end[sub]; 2474 } 2475 else if (c >= '1' && c <= '9') 2476 { 2477 if (search_regs.start[c - '0'] >= 0 2478 && c <= search_regs.num_regs + '0') 2479 { 2480 substart = search_regs.start[c - '0']; 2481 subend = search_regs.end[c - '0']; 2482 } 2483 else 2484 { 2485 /* If that subexp did not match, 2486 replace \\N with nothing. */ 2487 substart = 0; 2488 subend = 0; 2489 } 2490 } 2491 else if (c == '\\') 2492 delbackslash = 1; 2493 else 2494 error ("Invalid use of `\\' in replacement text"); 2495 } 2496 if (substart >= 0) 2497 { 2498 if (pos - 2 != lastpos) 2499 middle = substring_both (newtext, lastpos, 2500 lastpos_byte, 2501 pos - 2, pos_byte - 2); 2502 else 2503 middle = Qnil; 2504 accum = concat3 (accum, middle, 2505 Fsubstring (string, 2506 make_number (substart), 2507 make_number (subend))); 2508 lastpos = pos; 2509 lastpos_byte = pos_byte; 2510 } 2511 else if (delbackslash) 2512 { 2513 middle = substring_both (newtext, lastpos, 2514 lastpos_byte, 2515 pos - 1, pos_byte - 1); 2516 2517 accum = concat2 (accum, middle); 2518 lastpos = pos; 2519 lastpos_byte = pos_byte; 2520 } 2521 } 2522 2523 if (pos != lastpos) 2524 middle = substring_both (newtext, lastpos, 2525 lastpos_byte, 2526 pos, pos_byte); 2527 else 2528 middle = Qnil; 2529 2530 newtext = concat2 (accum, middle); 2531 } 2532 2533 /* Do case substitution in NEWTEXT if desired. */ 2534 if (case_action == all_caps) 2535 newtext = Fupcase (newtext); 2536 else if (case_action == cap_initial) 2537 newtext = Fupcase_initials (newtext); 2538 2539 return concat3 (before, newtext, after); 2540 } 2541 2542 /* Record point, then move (quietly) to the start of the match. */ 2543 if (PT >= search_regs.end[sub]) 2544 opoint = PT - ZV; 2545 else if (PT > search_regs.start[sub]) 2546 opoint = search_regs.end[sub] - ZV; 2547 else 2548 opoint = PT; 2549 2550 /* If we want non-literal replacement, 2551 perform substitution on the replacement string. */ 2552 if (NILP (literal)) 2553 { 2554 int length = SBYTES (newtext); 2555 unsigned char *substed; 2556 int substed_alloc_size, substed_len; 2557 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters); 2558 int str_multibyte = STRING_MULTIBYTE (newtext); 2559 Lisp_Object rev_tbl; 2560 int really_changed = 0; 2561 2562 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table) 2563 ? Fchar_table_extra_slot (Vnonascii_translation_table, 2564 make_number (0)) 2565 : Qnil); 2566 2567 substed_alloc_size = length * 2 + 100; 2568 substed = (unsigned char *) xmalloc (substed_alloc_size + 1); 2569 substed_len = 0; 2570 2571 /* Go thru NEWTEXT, producing the actual text to insert in 2572 SUBSTED while adjusting multibyteness to that of the current 2573 buffer. */ 2574 2575 for (pos_byte = 0, pos = 0; pos_byte < length;) 2576 { 2577 unsigned char str[MAX_MULTIBYTE_LENGTH]; 2578 unsigned char *add_stuff = NULL; 2579 int add_len = 0; 2580 int idx = -1; 2581 2582 if (str_multibyte) 2583 { 2584 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte); 2585 if (!buf_multibyte) 2586 c = multibyte_char_to_unibyte (c, rev_tbl); 2587 } 2588 else 2589 { 2590 /* Note that we don't have to increment POS. */ 2591 c = SREF (newtext, pos_byte++); 2592 if (buf_multibyte) 2593 c = unibyte_char_to_multibyte (c); 2594 } 2595 2596 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED, 2597 or set IDX to a match index, which means put that part 2598 of the buffer text into SUBSTED. */ 2599 2600 if (c == '\\') 2601 { 2602 really_changed = 1; 2603 2604 if (str_multibyte) 2605 { 2606 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, 2607 pos, pos_byte); 2608 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c)) 2609 c = multibyte_char_to_unibyte (c, rev_tbl); 2610 } 2611 else 2612 { 2613 c = SREF (newtext, pos_byte++); 2614 if (buf_multibyte) 2615 c = unibyte_char_to_multibyte (c); 2616 } 2617 2618 if (c == '&') 2619 idx = sub; 2620 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0') 2621 { 2622 if (search_regs.start[c - '0'] >= 1) 2623 idx = c - '0'; 2624 } 2625 else if (c == '\\') 2626 add_len = 1, add_stuff = "\\"; 2627 else 2628 { 2629 xfree (substed); 2630 error ("Invalid use of `\\' in replacement text"); 2631 } 2632 } 2633 else 2634 { 2635 add_len = CHAR_STRING (c, str); 2636 add_stuff = str; 2637 } 2638 2639 /* If we want to copy part of a previous match, 2640 set up ADD_STUFF and ADD_LEN to point to it. */ 2641 if (idx >= 0) 2642 { 2643 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]); 2644 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte; 2645 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx]) 2646 move_gap (search_regs.start[idx]); 2647 add_stuff = BYTE_POS_ADDR (begbyte); 2648 } 2649 2650 /* Now the stuff we want to add to SUBSTED 2651 is invariably ADD_LEN bytes starting at ADD_STUFF. */ 2652 2653 /* Make sure SUBSTED is big enough. */ 2654 if (substed_len + add_len >= substed_alloc_size) 2655 { 2656 substed_alloc_size = substed_len + add_len + 500; 2657 substed = (unsigned char *) xrealloc (substed, 2658 substed_alloc_size + 1); 2659 } 2660 2661 /* Now add to the end of SUBSTED. */ 2662 if (add_stuff) 2663 { 2664 bcopy (add_stuff, substed + substed_len, add_len); 2665 substed_len += add_len; 2666 } 2667 } 2668 2669 if (really_changed) 2670 { 2671 if (buf_multibyte) 2672 { 2673 int nchars = multibyte_chars_in_text (substed, substed_len); 2674 2675 newtext = make_multibyte_string (substed, nchars, substed_len); 2676 } 2677 else 2678 newtext = make_unibyte_string (substed, substed_len); 2679 } 2680 xfree (substed); 2681 } 2682 2683 /* Replace the old text with the new in the cleanest possible way. */ 2684 replace_range (search_regs.start[sub], search_regs.end[sub], 2685 newtext, 1, 0, 1); 2686 newpoint = search_regs.start[sub] + SCHARS (newtext); 2687 2688 if (case_action == all_caps) 2689 Fupcase_region (make_number (search_regs.start[sub]), 2690 make_number (newpoint)); 2691 else if (case_action == cap_initial) 2692 Fupcase_initials_region (make_number (search_regs.start[sub]), 2693 make_number (newpoint)); 2694 2695 /* Adjust search data for this change. */ 2696 { 2697 int oldend = search_regs.end[sub]; 2698 int oldstart = search_regs.start[sub]; 2699 int change = newpoint - search_regs.end[sub]; 2700 int i; 2701 2702 for (i = 0; i < search_regs.num_regs; i++) 2703 { 2704 if (search_regs.start[i] >= oldend) 2705 search_regs.start[i] += change; 2706 else if (search_regs.start[i] > oldstart) 2707 search_regs.start[i] = oldstart; 2708 if (search_regs.end[i] >= oldend) 2709 search_regs.end[i] += change; 2710 else if (search_regs.end[i] > oldstart) 2711 search_regs.end[i] = oldstart; 2712 } 2713 } 2714 2715 /* Put point back where it was in the text. */ 2716 if (opoint <= 0) 2717 TEMP_SET_PT (opoint + ZV); 2718 else 2719 TEMP_SET_PT (opoint); 2720 2721 /* Now move point "officially" to the start of the inserted replacement. */ 2722 move_if_not_intangible (newpoint); 2723 2724 return Qnil; 2725} 2726 2727static Lisp_Object 2728match_limit (num, beginningp) 2729 Lisp_Object num; 2730 int beginningp; 2731{ 2732 register int n; 2733 2734 CHECK_NUMBER (num); 2735 n = XINT (num); 2736 if (n < 0) 2737 args_out_of_range (num, make_number (0)); 2738 if (search_regs.num_regs <= 0) 2739 error ("No match data, because no search succeeded"); 2740 if (n >= search_regs.num_regs 2741 || search_regs.start[n] < 0) 2742 return Qnil; 2743 return (make_number ((beginningp) ? search_regs.start[n] 2744 : search_regs.end[n])); 2745} 2746 2747DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0, 2748 doc: /* Return position of start of text matched by last search. 2749SUBEXP, a number, specifies which parenthesized expression in the last 2750 regexp. 2751Value is nil if SUBEXPth pair didn't match, or there were less than 2752 SUBEXP pairs. 2753Zero means the entire text matched by the whole regexp or whole string. */) 2754 (subexp) 2755 Lisp_Object subexp; 2756{ 2757 return match_limit (subexp, 1); 2758} 2759 2760DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0, 2761 doc: /* Return position of end of text matched by last search. 2762SUBEXP, a number, specifies which parenthesized expression in the last 2763 regexp. 2764Value is nil if SUBEXPth pair didn't match, or there were less than 2765 SUBEXP pairs. 2766Zero means the entire text matched by the whole regexp or whole string. */) 2767 (subexp) 2768 Lisp_Object subexp; 2769{ 2770 return match_limit (subexp, 0); 2771} 2772 2773DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0, 2774 doc: /* Return a list containing all info on what the last search matched. 2775Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'. 2776All the elements are markers or nil (nil if the Nth pair didn't match) 2777if the last match was on a buffer; integers or nil if a string was matched. 2778Use `store-match-data' to reinstate the data in this list. 2779 2780If INTEGERS (the optional first argument) is non-nil, always use 2781integers \(rather than markers) to represent buffer positions. In 2782this case, and if the last match was in a buffer, the buffer will get 2783stored as one additional element at the end of the list. 2784 2785If REUSE is a list, reuse it as part of the value. If REUSE is long 2786enough to hold all the values, and if INTEGERS is non-nil, no consing 2787is done. 2788 2789If optional third arg RESEAT is non-nil, any previous markers on the 2790REUSE list will be modified to point to nowhere. 2791 2792Return value is undefined if the last search failed. */) 2793 (integers, reuse, reseat) 2794 Lisp_Object integers, reuse, reseat; 2795{ 2796 Lisp_Object tail, prev; 2797 Lisp_Object *data; 2798 int i, len; 2799 2800 if (!NILP (reseat)) 2801 for (tail = reuse; CONSP (tail); tail = XCDR (tail)) 2802 if (MARKERP (XCAR (tail))) 2803 { 2804 unchain_marker (XMARKER (XCAR (tail))); 2805 XSETCAR (tail, Qnil); 2806 } 2807 2808 if (NILP (last_thing_searched)) 2809 return Qnil; 2810 2811 prev = Qnil; 2812 2813 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1) 2814 * sizeof (Lisp_Object)); 2815 2816 len = 0; 2817 for (i = 0; i < search_regs.num_regs; i++) 2818 { 2819 int start = search_regs.start[i]; 2820 if (start >= 0) 2821 { 2822 if (EQ (last_thing_searched, Qt) 2823 || ! NILP (integers)) 2824 { 2825 XSETFASTINT (data[2 * i], start); 2826 XSETFASTINT (data[2 * i + 1], search_regs.end[i]); 2827 } 2828 else if (BUFFERP (last_thing_searched)) 2829 { 2830 data[2 * i] = Fmake_marker (); 2831 Fset_marker (data[2 * i], 2832 make_number (start), 2833 last_thing_searched); 2834 data[2 * i + 1] = Fmake_marker (); 2835 Fset_marker (data[2 * i + 1], 2836 make_number (search_regs.end[i]), 2837 last_thing_searched); 2838 } 2839 else 2840 /* last_thing_searched must always be Qt, a buffer, or Qnil. */ 2841 abort (); 2842 2843 len = 2 * i + 2; 2844 } 2845 else 2846 data[2 * i] = data[2 * i + 1] = Qnil; 2847 } 2848 2849 if (BUFFERP (last_thing_searched) && !NILP (integers)) 2850 { 2851 data[len] = last_thing_searched; 2852 len++; 2853 } 2854 2855 /* If REUSE is not usable, cons up the values and return them. */ 2856 if (! CONSP (reuse)) 2857 return Flist (len, data); 2858 2859 /* If REUSE is a list, store as many value elements as will fit 2860 into the elements of REUSE. */ 2861 for (i = 0, tail = reuse; CONSP (tail); 2862 i++, tail = XCDR (tail)) 2863 { 2864 if (i < len) 2865 XSETCAR (tail, data[i]); 2866 else 2867 XSETCAR (tail, Qnil); 2868 prev = tail; 2869 } 2870 2871 /* If we couldn't fit all value elements into REUSE, 2872 cons up the rest of them and add them to the end of REUSE. */ 2873 if (i < len) 2874 XSETCDR (prev, Flist (len - i, data + i)); 2875 2876 return reuse; 2877} 2878 2879/* Internal usage only: 2880 If RESEAT is `evaporate', put the markers back on the free list 2881 immediately. No other references to the markers must exist in this case, 2882 so it is used only internally on the unwind stack and save-match-data from 2883 Lisp. */ 2884 2885DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0, 2886 doc: /* Set internal data on last search match from elements of LIST. 2887LIST should have been created by calling `match-data' previously. 2888 2889If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */) 2890 (list, reseat) 2891 register Lisp_Object list, reseat; 2892{ 2893 register int i; 2894 register Lisp_Object marker; 2895 2896 if (running_asynch_code) 2897 save_search_regs (); 2898 2899 CHECK_LIST (list); 2900 2901 /* Unless we find a marker with a buffer or an explicit buffer 2902 in LIST, assume that this match data came from a string. */ 2903 last_thing_searched = Qt; 2904 2905 /* Allocate registers if they don't already exist. */ 2906 { 2907 int length = XFASTINT (Flength (list)) / 2; 2908 2909 if (length > search_regs.num_regs) 2910 { 2911 if (search_regs.num_regs == 0) 2912 { 2913 search_regs.start 2914 = (regoff_t *) xmalloc (length * sizeof (regoff_t)); 2915 search_regs.end 2916 = (regoff_t *) xmalloc (length * sizeof (regoff_t)); 2917 } 2918 else 2919 { 2920 search_regs.start 2921 = (regoff_t *) xrealloc (search_regs.start, 2922 length * sizeof (regoff_t)); 2923 search_regs.end 2924 = (regoff_t *) xrealloc (search_regs.end, 2925 length * sizeof (regoff_t)); 2926 } 2927 2928 for (i = search_regs.num_regs; i < length; i++) 2929 search_regs.start[i] = -1; 2930 2931 search_regs.num_regs = length; 2932 } 2933 2934 for (i = 0; CONSP (list); i++) 2935 { 2936 marker = XCAR (list); 2937 if (BUFFERP (marker)) 2938 { 2939 last_thing_searched = marker; 2940 break; 2941 } 2942 if (i >= length) 2943 break; 2944 if (NILP (marker)) 2945 { 2946 search_regs.start[i] = -1; 2947 list = XCDR (list); 2948 } 2949 else 2950 { 2951 int from; 2952 Lisp_Object m; 2953 2954 m = marker; 2955 if (MARKERP (marker)) 2956 { 2957 if (XMARKER (marker)->buffer == 0) 2958 XSETFASTINT (marker, 0); 2959 else 2960 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer); 2961 } 2962 2963 CHECK_NUMBER_COERCE_MARKER (marker); 2964 from = XINT (marker); 2965 2966 if (!NILP (reseat) && MARKERP (m)) 2967 { 2968 if (EQ (reseat, Qevaporate)) 2969 free_marker (m); 2970 else 2971 unchain_marker (XMARKER (m)); 2972 XSETCAR (list, Qnil); 2973 } 2974 2975 if ((list = XCDR (list), !CONSP (list))) 2976 break; 2977 2978 m = marker = XCAR (list); 2979 2980 if (MARKERP (marker) && XMARKER (marker)->buffer == 0) 2981 XSETFASTINT (marker, 0); 2982 2983 CHECK_NUMBER_COERCE_MARKER (marker); 2984 search_regs.start[i] = from; 2985 search_regs.end[i] = XINT (marker); 2986 2987 if (!NILP (reseat) && MARKERP (m)) 2988 { 2989 if (EQ (reseat, Qevaporate)) 2990 free_marker (m); 2991 else 2992 unchain_marker (XMARKER (m)); 2993 XSETCAR (list, Qnil); 2994 } 2995 } 2996 list = XCDR (list); 2997 } 2998 2999 for (; i < search_regs.num_regs; i++) 3000 search_regs.start[i] = -1; 3001 } 3002 3003 return Qnil; 3004} 3005 3006/* If non-zero the match data have been saved in saved_search_regs 3007 during the execution of a sentinel or filter. */ 3008static int search_regs_saved; 3009static struct re_registers saved_search_regs; 3010static Lisp_Object saved_last_thing_searched; 3011 3012/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data 3013 if asynchronous code (filter or sentinel) is running. */ 3014static void 3015save_search_regs () 3016{ 3017 if (!search_regs_saved) 3018 { 3019 saved_search_regs.num_regs = search_regs.num_regs; 3020 saved_search_regs.start = search_regs.start; 3021 saved_search_regs.end = search_regs.end; 3022 saved_last_thing_searched = last_thing_searched; 3023 last_thing_searched = Qnil; 3024 search_regs.num_regs = 0; 3025 search_regs.start = 0; 3026 search_regs.end = 0; 3027 3028 search_regs_saved = 1; 3029 } 3030} 3031 3032/* Called upon exit from filters and sentinels. */ 3033void 3034restore_search_regs () 3035{ 3036 if (search_regs_saved) 3037 { 3038 if (search_regs.num_regs > 0) 3039 { 3040 xfree (search_regs.start); 3041 xfree (search_regs.end); 3042 } 3043 search_regs.num_regs = saved_search_regs.num_regs; 3044 search_regs.start = saved_search_regs.start; 3045 search_regs.end = saved_search_regs.end; 3046 last_thing_searched = saved_last_thing_searched; 3047 saved_last_thing_searched = Qnil; 3048 search_regs_saved = 0; 3049 } 3050} 3051 3052static Lisp_Object 3053unwind_set_match_data (list) 3054 Lisp_Object list; 3055{ 3056 /* It is safe to free (evaporate) the markers immediately. */ 3057 return Fset_match_data (list, Qevaporate); 3058} 3059 3060/* Called to unwind protect the match data. */ 3061void 3062record_unwind_save_match_data () 3063{ 3064 record_unwind_protect (unwind_set_match_data, 3065 Fmatch_data (Qnil, Qnil, Qnil)); 3066} 3067 3068/* Quote a string to inactivate reg-expr chars */ 3069 3070DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0, 3071 doc: /* Return a regexp string which matches exactly STRING and nothing else. */) 3072 (string) 3073 Lisp_Object string; 3074{ 3075 register unsigned char *in, *out, *end; 3076 register unsigned char *temp; 3077 int backslashes_added = 0; 3078 3079 CHECK_STRING (string); 3080 3081 temp = (unsigned char *) alloca (SBYTES (string) * 2); 3082 3083 /* Now copy the data into the new string, inserting escapes. */ 3084 3085 in = SDATA (string); 3086 end = in + SBYTES (string); 3087 out = temp; 3088 3089 for (; in != end; in++) 3090 { 3091 if (*in == '[' 3092 || *in == '*' || *in == '.' || *in == '\\' 3093 || *in == '?' || *in == '+' 3094 || *in == '^' || *in == '$') 3095 *out++ = '\\', backslashes_added++; 3096 *out++ = *in; 3097 } 3098 3099 return make_specified_string (temp, 3100 SCHARS (string) + backslashes_added, 3101 out - temp, 3102 STRING_MULTIBYTE (string)); 3103} 3104 3105void 3106syms_of_search () 3107{ 3108 register int i; 3109 3110 for (i = 0; i < REGEXP_CACHE_SIZE; ++i) 3111 { 3112 searchbufs[i].buf.allocated = 100; 3113 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100); 3114 searchbufs[i].buf.fastmap = searchbufs[i].fastmap; 3115 searchbufs[i].regexp = Qnil; 3116 searchbufs[i].whitespace_regexp = Qnil; 3117 searchbufs[i].syntax_table = Qnil; 3118 staticpro (&searchbufs[i].regexp); 3119 staticpro (&searchbufs[i].whitespace_regexp); 3120 staticpro (&searchbufs[i].syntax_table); 3121 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]); 3122 } 3123 searchbuf_head = &searchbufs[0]; 3124 3125 Qsearch_failed = intern ("search-failed"); 3126 staticpro (&Qsearch_failed); 3127 Qinvalid_regexp = intern ("invalid-regexp"); 3128 staticpro (&Qinvalid_regexp); 3129 3130 Fput (Qsearch_failed, Qerror_conditions, 3131 Fcons (Qsearch_failed, Fcons (Qerror, Qnil))); 3132 Fput (Qsearch_failed, Qerror_message, 3133 build_string ("Search failed")); 3134 3135 Fput (Qinvalid_regexp, Qerror_conditions, 3136 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil))); 3137 Fput (Qinvalid_regexp, Qerror_message, 3138 build_string ("Invalid regexp")); 3139 3140 last_thing_searched = Qnil; 3141 staticpro (&last_thing_searched); 3142 3143 saved_last_thing_searched = Qnil; 3144 staticpro (&saved_last_thing_searched); 3145 3146 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp, 3147 doc: /* Regexp to substitute for bunches of spaces in regexp search. 3148Some commands use this for user-specified regexps. 3149Spaces that occur inside character classes or repetition operators 3150or other such regexp constructs are not replaced with this. 3151A value of nil (which is the normal value) means treat spaces literally. */); 3152 Vsearch_spaces_regexp = Qnil; 3153 3154 defsubr (&Slooking_at); 3155 defsubr (&Sposix_looking_at); 3156 defsubr (&Sstring_match); 3157 defsubr (&Sposix_string_match); 3158 defsubr (&Ssearch_forward); 3159 defsubr (&Ssearch_backward); 3160 defsubr (&Sword_search_forward); 3161 defsubr (&Sword_search_backward); 3162 defsubr (&Sre_search_forward); 3163 defsubr (&Sre_search_backward); 3164 defsubr (&Sposix_search_forward); 3165 defsubr (&Sposix_search_backward); 3166 defsubr (&Sreplace_match); 3167 defsubr (&Smatch_beginning); 3168 defsubr (&Smatch_end); 3169 defsubr (&Smatch_data); 3170 defsubr (&Sset_match_data); 3171 defsubr (&Sregexp_quote); 3172} 3173 3174/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f 3175 (do not change this comment) */ 3176