1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2   Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2001,
3                 2002, 2003, 2004, 2005, 2006, 2007
4                 Free Software Foundation, Inc.
5
6This file is part of GNU Emacs.
7
8GNU Emacs is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU Emacs; see the file COPYING.  If not, write to
20the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21Boston, MA 02110-1301, USA.  */
22
23
24#include <config.h>
25#include <ctype.h>
26#include "lisp.h"
27#include "commands.h"
28#include "buffer.h"
29#include "charset.h"
30#include "keymap.h"
31#include "regex.h"
32
33/* Make syntax table lookup grant data in gl_state.  */
34#define SYNTAX_ENTRY_VIA_PROPERTY
35
36#include "syntax.h"
37#include "intervals.h"
38
39/* We use these constants in place for comment-style and
40   string-ender-char to distinguish  comments/strings started by
41   comment_fence and string_fence codes.  */
42
43#define ST_COMMENT_STYLE (256 + 1)
44#define ST_STRING_STYLE (256 + 2)
45#include "category.h"
46
47Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
48
49int words_include_escapes;
50int parse_sexp_lookup_properties;
51
52/* Nonzero means `scan-sexps' treat all multibyte characters as symbol.  */
53int multibyte_syntax_as_symbol;
54
55/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
56   if not compiled with GCC.  No need to mark it, since it is used
57   only very temporarily.  */
58Lisp_Object syntax_temp;
59
60/* Non-zero means an open parenthesis in column 0 is always considered
61   to be the start of a defun.  Zero means an open parenthesis in
62   column 0 has no special meaning.  */
63
64int open_paren_in_column_0_is_defun_start;
65
66/* This is the internal form of the parse state used in parse-partial-sexp.  */
67
68struct lisp_parse_state
69  {
70    int depth;		/* Depth at end of parsing.  */
71    int instring;	/* -1 if not within string, else desired terminator.  */
72    int incomment;	/* -1 if in unnestable comment else comment nesting */
73    int comstyle;	/* comment style a=0, or b=1, or ST_COMMENT_STYLE.  */
74    int quoted;		/* Nonzero if just after an escape char at end of parsing */
75    int thislevelstart;	/* Char number of most recent start-of-expression at current level */
76    int prevlevelstart; /* Char number of start of containing expression */
77    int location;	/* Char number at which parsing stopped.  */
78    int mindepth;	/* Minimum depth seen while scanning.  */
79    int comstr_start;	/* Position just after last comment/string starter.  */
80    Lisp_Object levelstarts;	/* Char numbers of starts-of-expression
81				   of levels (starting from outermost).  */
82  };
83
84/* These variables are a cache for finding the start of a defun.
85   find_start_pos is the place for which the defun start was found.
86   find_start_value is the defun start position found for it.
87   find_start_value_byte is the corresponding byte position.
88   find_start_buffer is the buffer it was found in.
89   find_start_begv is the BEGV value when it was found.
90   find_start_modiff is the value of MODIFF when it was found.  */
91
92static int find_start_pos;
93static int find_start_value;
94static int find_start_value_byte;
95static struct buffer *find_start_buffer;
96static int find_start_begv;
97static int find_start_modiff;
98
99
100static int find_defun_start P_ ((int, int));
101static int back_comment P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int, int,
102			     EMACS_INT *, EMACS_INT *));
103static int char_quoted P_ ((int, int));
104static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object, int));
105static Lisp_Object scan_lists P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int));
106static void scan_sexps_forward P_ ((struct lisp_parse_state *,
107				    int, int, int, int,
108				    int, Lisp_Object, int));
109static int in_classes P_ ((int, Lisp_Object));
110
111
112struct gl_state_s gl_state;		/* Global state of syntax parser.  */
113
114INTERVAL interval_of ();
115#define INTERVALS_AT_ONCE 10		/* 1 + max-number of intervals
116					   to scan to property-change.  */
117
118/* Update gl_state to an appropriate interval which contains CHARPOS.  The
119   sign of COUNT give the relative position of CHARPOS wrt the previously
120   valid interval.  If INIT, only [be]_property fields of gl_state are
121   valid at start, the rest is filled basing on OBJECT.
122
123   `gl_state.*_i' are the intervals, and CHARPOS is further in the search
124   direction than the intervals - or in an interval.  We update the
125   current syntax-table basing on the property of this interval, and
126   update the interval to start further than CHARPOS - or be
127   NULL_INTERVAL.  We also update lim_property to be the next value of
128   charpos to call this subroutine again - or be before/after the
129   start/end of OBJECT.  */
130
131void
132update_syntax_table (charpos, count, init, object)
133     int charpos, count, init;
134     Lisp_Object object;
135{
136  Lisp_Object tmp_table;
137  int cnt = 0, invalidate = 1;
138  INTERVAL i;
139
140  if (init)
141    {
142      gl_state.old_prop = Qnil;
143      gl_state.start = gl_state.b_property;
144      gl_state.stop = gl_state.e_property;
145      i = interval_of (charpos, object);
146      gl_state.backward_i = gl_state.forward_i = i;
147      invalidate = 0;
148      if (NULL_INTERVAL_P (i))
149	return;
150      /* interval_of updates only ->position of the return value, so
151	 update the parents manually to speed up update_interval.  */
152      while (!NULL_PARENT (i))
153	{
154	  if (AM_RIGHT_CHILD (i))
155	    INTERVAL_PARENT (i)->position = i->position
156	      - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
157	      - TOTAL_LENGTH (INTERVAL_PARENT (i))
158	      + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
159	  else
160	    INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
161	      + TOTAL_LENGTH (i);
162	  i = INTERVAL_PARENT (i);
163	}
164      i = gl_state.forward_i;
165      gl_state.b_property = i->position - gl_state.offset;
166      gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
167      goto update;
168    }
169  i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
170
171  /* We are guaranteed to be called with CHARPOS either in i,
172     or further off.  */
173  if (NULL_INTERVAL_P (i))
174    error ("Error in syntax_table logic for to-the-end intervals");
175  else if (charpos < i->position)		/* Move left.  */
176    {
177      if (count > 0)
178	error ("Error in syntax_table logic for intervals <-");
179      /* Update the interval.  */
180      i = update_interval (i, charpos);
181      if (INTERVAL_LAST_POS (i) != gl_state.b_property)
182	{
183	  invalidate = 0;
184	  gl_state.forward_i = i;
185	  gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
186	}
187    }
188  else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right.  */
189    {
190      if (count < 0)
191	error ("Error in syntax_table logic for intervals ->");
192      /* Update the interval.  */
193      i = update_interval (i, charpos);
194      if (i->position != gl_state.e_property)
195	{
196	  invalidate = 0;
197	  gl_state.backward_i = i;
198	  gl_state.b_property = i->position - gl_state.offset;
199	}
200    }
201
202  update:
203  tmp_table = textget (i->plist, Qsyntax_table);
204
205  if (invalidate)
206    invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
207
208  if (invalidate)		/* Did not get to adjacent interval.  */
209    {				/* with the same table => */
210				/* invalidate the old range.  */
211      if (count > 0)
212	{
213	  gl_state.backward_i = i;
214	  gl_state.b_property = i->position - gl_state.offset;
215	}
216      else
217	{
218	  gl_state.forward_i = i;
219	  gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
220	}
221    }
222
223  if (!EQ (tmp_table, gl_state.old_prop))
224    {
225      gl_state.current_syntax_table = tmp_table;
226      gl_state.old_prop = tmp_table;
227      if (EQ (Fsyntax_table_p (tmp_table), Qt))
228	{
229	  gl_state.use_global = 0;
230	}
231      else if (CONSP (tmp_table))
232	{
233	  gl_state.use_global = 1;
234	  gl_state.global_code = tmp_table;
235	}
236      else
237	{
238	  gl_state.use_global = 0;
239	  gl_state.current_syntax_table = current_buffer->syntax_table;
240	}
241    }
242
243  while (!NULL_INTERVAL_P (i))
244    {
245      if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
246	{
247	  if (count > 0)
248	    {
249	      gl_state.e_property = i->position - gl_state.offset;
250	      gl_state.forward_i = i;
251	    }
252	  else
253	    {
254	      gl_state.b_property
255		= i->position + LENGTH (i) - gl_state.offset;
256	      gl_state.backward_i = i;
257	    }
258	  return;
259	}
260      else if (cnt == INTERVALS_AT_ONCE)
261	{
262	  if (count > 0)
263	    {
264	      gl_state.e_property
265		= i->position + LENGTH (i) - gl_state.offset
266		/* e_property at EOB is not set to ZV but to ZV+1, so that
267		   we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
268		   having to check eob between the two.  */
269		+ (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
270	      gl_state.forward_i = i;
271	    }
272	  else
273	    {
274	      gl_state.b_property = i->position - gl_state.offset;
275	      gl_state.backward_i = i;
276	    }
277	  return;
278	}
279      cnt++;
280      i = count > 0 ? next_interval (i) : previous_interval (i);
281    }
282  eassert (NULL_INTERVAL_P (i)); /* This property goes to the end.  */
283  if (count > 0)
284    gl_state.e_property = gl_state.stop;
285  else
286    gl_state.b_property = gl_state.start;
287}
288
289/* Returns TRUE if char at CHARPOS is quoted.
290   Global syntax-table data should be set up already to be good at CHARPOS
291   or after.  On return global syntax data is good for lookup at CHARPOS. */
292
293static int
294char_quoted (charpos, bytepos)
295     register int charpos, bytepos;
296{
297  register enum syntaxcode code;
298  register int beg = BEGV;
299  register int quoted = 0;
300  int orig = charpos;
301
302  DEC_BOTH (charpos, bytepos);
303
304  while (charpos >= beg)
305    {
306      int c;
307
308      UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
309      c = FETCH_CHAR (bytepos);
310      code = SYNTAX (c);
311      if (! (code == Scharquote || code == Sescape))
312	break;
313
314      DEC_BOTH (charpos, bytepos);
315      quoted = !quoted;
316    }
317
318  UPDATE_SYNTAX_TABLE (orig);
319  return quoted;
320}
321
322/* Return the bytepos one character after BYTEPOS.
323   We assume that BYTEPOS is not at the end of the buffer.  */
324
325INLINE int
326inc_bytepos (bytepos)
327     int bytepos;
328{
329  if (NILP (current_buffer->enable_multibyte_characters))
330    return bytepos + 1;
331
332  INC_POS (bytepos);
333  return bytepos;
334}
335
336/* Return the bytepos one character before BYTEPOS.
337   We assume that BYTEPOS is not at the start of the buffer.  */
338
339INLINE int
340dec_bytepos (bytepos)
341     int bytepos;
342{
343  if (NILP (current_buffer->enable_multibyte_characters))
344    return bytepos - 1;
345
346  DEC_POS (bytepos);
347  return bytepos;
348}
349
350/* Return a defun-start position before before POS and not too far before.
351   It should be the last one before POS, or nearly the last.
352
353   When open_paren_in_column_0_is_defun_start is nonzero,
354   only the beginning of the buffer is treated as a defun-start.
355
356   We record the information about where the scan started
357   and what its result was, so that another call in the same area
358   can return the same value very quickly.
359
360   There is no promise at which position the global syntax data is
361   valid on return from the subroutine, so the caller should explicitly
362   update the global data.  */
363
364static int
365find_defun_start (pos, pos_byte)
366     int pos, pos_byte;
367{
368  int opoint = PT, opoint_byte = PT_BYTE;
369
370  if (!open_paren_in_column_0_is_defun_start)
371    {
372      find_start_value_byte = BEGV_BYTE;
373      return BEGV;
374    }
375
376  /* Use previous finding, if it's valid and applies to this inquiry.  */
377  if (current_buffer == find_start_buffer
378      /* Reuse the defun-start even if POS is a little farther on.
379	 POS might be in the next defun, but that's ok.
380	 Our value may not be the best possible, but will still be usable.  */
381      && pos <= find_start_pos + 1000
382      && pos >= find_start_value
383      && BEGV == find_start_begv
384      && MODIFF == find_start_modiff)
385    return find_start_value;
386
387  /* Back up to start of line.  */
388  scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
389
390  /* We optimize syntax-table lookup for rare updates.  Thus we accept
391     only those `^\s(' which are good in global _and_ text-property
392     syntax-tables.  */
393  gl_state.current_syntax_table = current_buffer->syntax_table;
394  gl_state.use_global = 0;
395  while (PT > BEGV)
396    {
397      int c;
398
399      /* Open-paren at start of line means we may have found our
400	 defun-start.  */
401      c = FETCH_CHAR (PT_BYTE);
402      if (SYNTAX (c) == Sopen)
403	{
404	  SETUP_SYNTAX_TABLE (PT + 1, -1);	/* Try again... */
405	  c = FETCH_CHAR (PT_BYTE);
406	  if (SYNTAX (c) == Sopen)
407	    break;
408	  /* Now fallback to the default value.  */
409	  gl_state.current_syntax_table = current_buffer->syntax_table;
410	  gl_state.use_global = 0;
411	}
412      /* Move to beg of previous line.  */
413      scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
414    }
415
416  /* Record what we found, for the next try.  */
417  find_start_value = PT;
418  find_start_value_byte = PT_BYTE;
419  find_start_buffer = current_buffer;
420  find_start_modiff = MODIFF;
421  find_start_begv = BEGV;
422  find_start_pos = pos;
423
424  TEMP_SET_PT_BOTH (opoint, opoint_byte);
425
426  return find_start_value;
427}
428
429/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE.  */
430
431static int
432prev_char_comend_first (pos, pos_byte)
433     int pos, pos_byte;
434{
435  int c, val;
436
437  DEC_BOTH (pos, pos_byte);
438  UPDATE_SYNTAX_TABLE_BACKWARD (pos);
439  c = FETCH_CHAR (pos_byte);
440  val = SYNTAX_COMEND_FIRST (c);
441  UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
442  return val;
443}
444
445/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE.  */
446
447/* static int
448 * prev_char_comstart_first (pos, pos_byte)
449 *      int pos, pos_byte;
450 * {
451 *   int c, val;
452 *
453 *   DEC_BOTH (pos, pos_byte);
454 *   UPDATE_SYNTAX_TABLE_BACKWARD (pos);
455 *   c = FETCH_CHAR (pos_byte);
456 *   val = SYNTAX_COMSTART_FIRST (c);
457 *   UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
458 *   return val;
459 * } */
460
461/* Checks whether charpos FROM is at the end of a comment.
462   FROM_BYTE is the bytepos corresponding to FROM.
463   Do not move back before STOP.
464
465   Return a positive value if we find a comment ending at FROM/FROM_BYTE;
466   return -1 otherwise.
467
468   If successful, store the charpos of the comment's beginning
469   into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
470
471   Global syntax data remains valid for backward search starting at
472   the returned value (or at FROM, if the search was not successful).  */
473
474static int
475back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
476     EMACS_INT from, from_byte, stop;
477     int comnested, comstyle;
478     EMACS_INT *charpos_ptr, *bytepos_ptr;
479{
480  /* Look back, counting the parity of string-quotes,
481     and recording the comment-starters seen.
482     When we reach a safe place, assume that's not in a string;
483     then step the main scan to the earliest comment-starter seen
484     an even number of string quotes away from the safe place.
485
486     OFROM[I] is position of the earliest comment-starter seen
487     which is I+2X quotes from the comment-end.
488     PARITY is current parity of quotes from the comment end.  */
489  int string_style = -1;	/* Presumed outside of any string. */
490  int string_lossage = 0;
491  /* Not a real lossage: indicates that we have passed a matching comment
492     starter plus a non-matching comment-ender, meaning that any matching
493     comment-starter we might see later could be a false positive (hidden
494     inside another comment).
495     Test case:  { a (* b } c (* d *) */
496  int comment_lossage = 0;
497  int comment_end = from;
498  int comment_end_byte = from_byte;
499  int comstart_pos = 0;
500  int comstart_byte;
501  /* Place where the containing defun starts,
502     or 0 if we didn't come across it yet.  */
503  int defun_start = 0;
504  int defun_start_byte = 0;
505  register enum syntaxcode code;
506  int nesting = 1;		/* current comment nesting */
507  int c;
508  int syntax = 0;
509
510  /* FIXME: A }} comment-ender style leads to incorrect behavior
511     in the case of {{ c }}} because we ignore the last two chars which are
512     assumed to be comment-enders although they aren't.  */
513
514  /* At beginning of range to scan, we're outside of strings;
515     that determines quote parity to the comment-end.  */
516  while (from != stop)
517    {
518      int temp_byte, prev_syntax;
519      int com2start, com2end;
520
521      /* Move back and examine a character.  */
522      DEC_BOTH (from, from_byte);
523      UPDATE_SYNTAX_TABLE_BACKWARD (from);
524
525      prev_syntax = syntax;
526      c = FETCH_CHAR (from_byte);
527      syntax = SYNTAX_WITH_FLAGS (c);
528      code = SYNTAX (c);
529
530      /* Check for 2-char comment markers.  */
531      com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
532		   && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
533		   && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
534		   && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
535		       || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
536      com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
537		 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
538
539      /* Nasty cases with overlapping 2-char comment markers:
540	 - snmp-mode: -- c -- foo -- c --
541	              --- c --
542		      ------ c --
543	 - c-mode:    *||*
544		      |* *|* *|
545		      |*| |* |*|
546		      ///   */
547
548      /* If a 2-char comment sequence partly overlaps with another,
549	 we don't try to be clever.  */
550      if (from > stop && (com2end || com2start))
551	{
552	  int next = from, next_byte = from_byte, next_c, next_syntax;
553	  DEC_BOTH (next, next_byte);
554	  UPDATE_SYNTAX_TABLE_BACKWARD (next);
555	  next_c = FETCH_CHAR (next_byte);
556	  next_syntax = SYNTAX_WITH_FLAGS (next_c);
557	  if (((com2start || comnested)
558	       && SYNTAX_FLAGS_COMEND_SECOND (syntax)
559	       && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
560	      || ((com2end || comnested)
561		  && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
562		  && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
563		  && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
564	    goto lossage;
565	  /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
566	}
567
568      if (com2start && comstart_pos == 0)
569	/* We're looking at a comment starter.  But it might be a comment
570	   ender as well (see snmp-mode).  The first time we see one, we
571	   need to consider it as a comment starter,
572	   and the subsequent times as a comment ender.  */
573	com2end = 0;
574
575      /* Turn a 2-char comment sequences into the appropriate syntax.  */
576      if (com2end)
577	code = Sendcomment;
578      else if (com2start)
579	code = Scomment;
580      /* Ignore comment starters of a different style.  */
581      else if (code == Scomment
582	       && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
583		   || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
584	continue;
585
586      /* Ignore escaped characters, except comment-enders.  */
587      if (code != Sendcomment && char_quoted (from, from_byte))
588	continue;
589
590      switch (code)
591	{
592	case Sstring_fence:
593	case Scomment_fence:
594	  c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
595	case Sstring:
596	  /* Track parity of quotes.  */
597	  if (string_style == -1)
598	    /* Entering a string.  */
599	    string_style = c;
600	  else if (string_style == c)
601	    /* Leaving the string.  */
602	    string_style = -1;
603	  else
604	    /* If we have two kinds of string delimiters.
605	       There's no way to grok this scanning backwards.  */
606	    string_lossage = 1;
607	  break;
608
609	case Scomment:
610	  /* We've already checked that it is the relevant comstyle.  */
611	  if (string_style != -1 || comment_lossage || string_lossage)
612	    /* There are odd string quotes involved, so let's be careful.
613	       Test case in Pascal: " { " a { " } */
614	    goto lossage;
615
616	  if (!comnested)
617	    {
618	      /* Record best comment-starter so far.  */
619	      comstart_pos = from;
620	      comstart_byte = from_byte;
621	    }
622	  else if (--nesting <= 0)
623	    /* nested comments have to be balanced, so we don't need to
624	       keep looking for earlier ones.  We use here the same (slightly
625	       incorrect) reasoning as below:  since it is followed by uniform
626	       paired string quotes, this comment-start has to be outside of
627	       strings, else the comment-end itself would be inside a string. */
628	    goto done;
629	  break;
630
631	case Sendcomment:
632	  if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
633	      && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
634		  || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
635	    /* This is the same style of comment ender as ours. */
636	    {
637	      if (comnested)
638		nesting++;
639	      else
640		/* Anything before that can't count because it would match
641		   this comment-ender rather than ours.  */
642		from = stop;	/* Break out of the loop.  */
643	    }
644	  else if (comstart_pos != 0 || c != '\n')
645	    /* We're mixing comment styles here, so we'd better be careful.
646	       The (comstart_pos != 0 || c != '\n') check is not quite correct
647	       (we should just always set comment_lossage), but removing it
648	       would imply that any multiline comment in C would go through
649	       lossage, which seems overkill.
650	       The failure should only happen in the rare cases such as
651	         { (* } *)   */
652	    comment_lossage = 1;
653	  break;
654
655	case Sopen:
656	  /* Assume a defun-start point is outside of strings.  */
657	  if (open_paren_in_column_0_is_defun_start
658	      && (from == stop
659		  || (temp_byte = dec_bytepos (from_byte),
660		      FETCH_CHAR (temp_byte) == '\n')))
661	    {
662	      defun_start = from;
663	      defun_start_byte = from_byte;
664	      from = stop;	/* Break out of the loop.  */
665	    }
666	  break;
667
668	default:
669	  break;
670	}
671    }
672
673  if (comstart_pos == 0)
674    {
675      from = comment_end;
676      from_byte = comment_end_byte;
677      UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
678    }
679  /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
680     or `done'), then we've found the beginning of the non-nested comment.  */
681  else if (1)	/* !comnested */
682    {
683      from = comstart_pos;
684      from_byte = comstart_byte;
685      UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
686    }
687  else
688    {
689      struct lisp_parse_state state;
690    lossage:
691      /* We had two kinds of string delimiters mixed up
692	 together.  Decode this going forwards.
693	 Scan fwd from a known safe place (beginning-of-defun)
694	 to the one in question; this records where we
695	 last passed a comment starter.  */
696      /* If we did not already find the defun start, find it now.  */
697      if (defun_start == 0)
698	{
699	  defun_start = find_defun_start (comment_end, comment_end_byte);
700	  defun_start_byte = find_start_value_byte;
701	}
702      do
703	{
704	  scan_sexps_forward (&state,
705			      defun_start, defun_start_byte,
706			      comment_end, -10000, 0, Qnil, 0);
707	  defun_start = comment_end;
708	  if (state.incomment == (comnested ? 1 : -1)
709	      && state.comstyle == comstyle)
710	    from = state.comstr_start;
711	  else
712	    {
713	      from = comment_end;
714	      if (state.incomment)
715		/* If comment_end is inside some other comment, maybe ours
716		   is nested, so we need to try again from within the
717		   surrounding comment.  Example: { a (* " *)  */
718		{
719		  /* FIXME: We should advance by one or two chars. */
720		  defun_start = state.comstr_start + 2;
721		  defun_start_byte = CHAR_TO_BYTE (defun_start);
722		}
723	    }
724	} while (defun_start < comment_end);
725
726      from_byte = CHAR_TO_BYTE (from);
727      UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
728    }
729
730 done:
731  *charpos_ptr = from;
732  *bytepos_ptr = from_byte;
733
734  return (from == comment_end) ? -1 : from;
735}
736
737DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
738       doc: /* Return t if OBJECT is a syntax table.
739Currently, any char-table counts as a syntax table.  */)
740     (object)
741     Lisp_Object object;
742{
743  if (CHAR_TABLE_P (object)
744      && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
745    return Qt;
746  return Qnil;
747}
748
749static void
750check_syntax_table (obj)
751     Lisp_Object obj;
752{
753  CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
754	      Qsyntax_table_p, obj);
755}
756
757DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
758       doc: /* Return the current syntax table.
759This is the one specified by the current buffer.  */)
760     ()
761{
762  return current_buffer->syntax_table;
763}
764
765DEFUN ("standard-syntax-table", Fstandard_syntax_table,
766   Sstandard_syntax_table, 0, 0, 0,
767       doc: /* Return the standard syntax table.
768This is the one used for new buffers.  */)
769     ()
770{
771  return Vstandard_syntax_table;
772}
773
774DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
775       doc: /* Construct a new syntax table and return it.
776It is a copy of the TABLE, which defaults to the standard syntax table.  */)
777     (table)
778     Lisp_Object table;
779{
780  Lisp_Object copy;
781
782  if (!NILP (table))
783    check_syntax_table (table);
784  else
785    table = Vstandard_syntax_table;
786
787  copy = Fcopy_sequence (table);
788
789  /* Only the standard syntax table should have a default element.
790     Other syntax tables should inherit from parents instead.  */
791  XCHAR_TABLE (copy)->defalt = Qnil;
792
793  /* Copied syntax tables should all have parents.
794     If we copied one with no parent, such as the standard syntax table,
795     use the standard syntax table as the copy's parent.  */
796  if (NILP (XCHAR_TABLE (copy)->parent))
797    Fset_char_table_parent (copy, Vstandard_syntax_table);
798  return copy;
799}
800
801DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
802       doc: /* Select a new syntax table for the current buffer.
803One argument, a syntax table.  */)
804     (table)
805     Lisp_Object table;
806{
807  int idx;
808  check_syntax_table (table);
809  current_buffer->syntax_table = table;
810  /* Indicate that this buffer now has a specified syntax table.  */
811  idx = PER_BUFFER_VAR_IDX (syntax_table);
812  SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
813  return table;
814}
815
816/* Convert a letter which signifies a syntax code
817 into the code it signifies.
818 This is used by modify-syntax-entry, and other things.  */
819
820unsigned char syntax_spec_code[0400] =
821  { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
822    0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
823    0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
824    0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
825    (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
826        (char) Smath, 0377, 0377, (char) Squote,
827    (char) Sopen, (char) Sclose, 0377, 0377,
828	0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
829    0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
830    0377, 0377, 0377, 0377,
831	(char) Scomment, 0377, (char) Sendcomment, 0377,
832    (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* @, A ... */
833    0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
834    0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
835    0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
836    0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* `, a, ... */
837    0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
838    0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
839    0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
840  };
841
842/* Indexed by syntax code, give the letter that describes it.  */
843
844char syntax_code_spec[16] =
845  {
846    ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
847    '!', '|'
848  };
849
850/* Indexed by syntax code, give the object (cons of syntax code and
851   nil) to be stored in syntax table.  Since these objects can be
852   shared among syntax tables, we generate them in advance.  By
853   sharing objects, the function `describe-syntax' can give a more
854   compact listing.  */
855static Lisp_Object Vsyntax_code_object;
856
857
858/* Look up the value for CHARACTER in syntax table TABLE's parent
859   and its parents.  SYNTAX_ENTRY calls this, when TABLE itself has nil
860   for CHARACTER.  It's actually used only when not compiled with GCC.  */
861
862Lisp_Object
863syntax_parent_lookup (table, character)
864     Lisp_Object table;
865     int character;
866{
867  Lisp_Object value;
868
869  while (1)
870    {
871      table = XCHAR_TABLE (table)->parent;
872      if (NILP (table))
873	return Qnil;
874
875      value = XCHAR_TABLE (table)->contents[character];
876      if (!NILP (value))
877	return value;
878    }
879}
880
881DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
882       doc: /* Return the syntax code of CHARACTER, described by a character.
883For example, if CHARACTER is a word constituent,
884the character `w' is returned.
885The characters that correspond to various syntax codes
886are listed in the documentation of `modify-syntax-entry'.  */)
887     (character)
888     Lisp_Object character;
889{
890  int char_int;
891  gl_state.current_syntax_table = current_buffer->syntax_table;
892
893  gl_state.use_global = 0;
894  CHECK_NUMBER (character);
895  char_int = XINT (character);
896  return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
897}
898
899DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
900       doc: /* Return the matching parenthesis of CHARACTER, or nil if none.  */)
901     (character)
902     Lisp_Object character;
903{
904  int char_int, code;
905  gl_state.current_syntax_table = current_buffer->syntax_table;
906  gl_state.use_global = 0;
907  CHECK_NUMBER (character);
908  char_int = XINT (character);
909  code = SYNTAX (char_int);
910  if (code == Sopen || code == Sclose)
911    return SYNTAX_MATCH (char_int);
912  return Qnil;
913}
914
915DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
916       doc: /* Convert a syntax specification STRING into syntax cell form.
917STRING should be a string as it is allowed as argument of
918`modify-syntax-entry'.  Value is the equivalent cons cell
919\(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
920text property.  */)
921     (string)
922     Lisp_Object string;
923{
924  register const unsigned char *p;
925  register enum syntaxcode code;
926  int val;
927  Lisp_Object match;
928
929  CHECK_STRING (string);
930
931  p = SDATA (string);
932  code = (enum syntaxcode) syntax_spec_code[*p++];
933  if (((int) code & 0377) == 0377)
934    error ("Invalid syntax description letter: %c", p[-1]);
935
936  if (code == Sinherit)
937    return Qnil;
938
939  if (*p)
940    {
941      int len;
942      int character = (STRING_CHAR_AND_LENGTH
943		       (p, SBYTES (string) - 1, len));
944      XSETINT (match, character);
945      if (XFASTINT (match) == ' ')
946	match = Qnil;
947      p += len;
948    }
949  else
950    match = Qnil;
951
952  val = (int) code;
953  while (*p)
954    switch (*p++)
955      {
956      case '1':
957	val |= 1 << 16;
958	break;
959
960      case '2':
961	val |= 1 << 17;
962	break;
963
964      case '3':
965	val |= 1 << 18;
966	break;
967
968      case '4':
969	val |= 1 << 19;
970	break;
971
972      case 'p':
973	val |= 1 << 20;
974	break;
975
976      case 'b':
977	val |= 1 << 21;
978	break;
979
980      case 'n':
981	val |= 1 << 22;
982	break;
983      }
984
985  if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
986    return XVECTOR (Vsyntax_code_object)->contents[val];
987  else
988    /* Since we can't use a shared object, let's make a new one.  */
989    return Fcons (make_number (val), match);
990}
991
992/* I really don't know why this is interactive
993   help-form should at least be made useful whilst reading the second arg.  */
994DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
995  "cSet syntax for character: \nsSet syntax for %s to: ",
996       doc: /* Set syntax for character CHAR according to string NEWENTRY.
997The syntax is changed only for table SYNTAX-TABLE, which defaults to
998 the current buffer's syntax table.
999The first character of NEWENTRY should be one of the following:
1000  Space or -  whitespace syntax.    w   word constituent.
1001  _           symbol constituent.   .   punctuation.
1002  (           open-parenthesis.     )   close-parenthesis.
1003  "           string quote.         \\   escape.
1004  $           paired delimiter.     '   expression quote or prefix operator.
1005  <           comment starter.      >   comment ender.
1006  /           character-quote.      @   inherit from `standard-syntax-table'.
1007  |           generic string fence. !   generic comment fence.
1008
1009Only single-character comment start and end sequences are represented thus.
1010Two-character sequences are represented as described below.
1011The second character of NEWENTRY is the matching parenthesis,
1012 used only if the first character is `(' or `)'.
1013Any additional characters are flags.
1014Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1015 1 means CHAR is the start of a two-char comment start sequence.
1016 2 means CHAR is the second character of such a sequence.
1017 3 means CHAR is the start of a two-char comment end sequence.
1018 4 means CHAR is the second character of such a sequence.
1019
1020There can be up to two orthogonal comment sequences.  This is to support
1021language modes such as C++.  By default, all comment sequences are of style
1022a, but you can set the comment sequence style to b (on the second character
1023of a comment-start, or the first character of a comment-end sequence) using
1024this flag:
1025 b means CHAR is part of comment sequence b.
1026 n means CHAR is part of a nestable comment sequence.
1027
1028 p means CHAR is a prefix character for `backward-prefix-chars';
1029   such characters are treated as whitespace when they occur
1030   between expressions.
1031usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1032     (c, newentry, syntax_table)
1033     Lisp_Object c, newentry, syntax_table;
1034{
1035  CHECK_NUMBER (c);
1036
1037  if (NILP (syntax_table))
1038    syntax_table = current_buffer->syntax_table;
1039  else
1040    check_syntax_table (syntax_table);
1041
1042  SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Fstring_to_syntax (newentry));
1043
1044  /* We clear the regexp cache, since character classes can now have
1045     different values from those in the compiled regexps.*/
1046  clear_regexp_cache ();
1047
1048  return Qnil;
1049}
1050
1051/* Dump syntax table to buffer in human-readable format */
1052
1053DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1054       Sinternal_describe_syntax_value, 1, 1, 0,
1055       doc: /* Insert a description of the internal syntax description SYNTAX at point.  */)
1056     (syntax)
1057     Lisp_Object syntax;
1058{
1059  register enum syntaxcode code;
1060  char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
1061  char str[2];
1062  Lisp_Object first, match_lisp, value = syntax;
1063
1064  if (NILP (value))
1065    {
1066      insert_string ("default");
1067      return syntax;
1068    }
1069
1070  if (CHAR_TABLE_P (value))
1071    {
1072      insert_string ("deeper char-table ...");
1073      return syntax;
1074    }
1075
1076  if (!CONSP (value))
1077    {
1078      insert_string ("invalid");
1079      return syntax;
1080    }
1081
1082  first = XCAR (value);
1083  match_lisp = XCDR (value);
1084
1085  if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1086    {
1087      insert_string ("invalid");
1088      return syntax;
1089    }
1090
1091  code = (enum syntaxcode) (XINT (first) & 0377);
1092  start1 = (XINT (first) >> 16) & 1;
1093  start2 = (XINT (first) >> 17) & 1;
1094  end1 = (XINT (first) >> 18) & 1;
1095  end2 = (XINT (first) >> 19) & 1;
1096  prefix = (XINT (first) >> 20) & 1;
1097  comstyle = (XINT (first) >> 21) & 1;
1098  comnested = (XINT (first) >> 22) & 1;
1099
1100  if ((int) code < 0 || (int) code >= (int) Smax)
1101    {
1102      insert_string ("invalid");
1103      return syntax;
1104    }
1105  desc = syntax_code_spec[(int) code];
1106
1107  str[0] = desc, str[1] = 0;
1108  insert (str, 1);
1109
1110  if (NILP (match_lisp))
1111    insert (" ", 1);
1112  else
1113    insert_char (XINT (match_lisp));
1114
1115  if (start1)
1116    insert ("1", 1);
1117  if (start2)
1118    insert ("2", 1);
1119
1120  if (end1)
1121    insert ("3", 1);
1122  if (end2)
1123    insert ("4", 1);
1124
1125  if (prefix)
1126    insert ("p", 1);
1127  if (comstyle)
1128    insert ("b", 1);
1129  if (comnested)
1130    insert ("n", 1);
1131
1132  insert_string ("\twhich means: ");
1133
1134  switch (SWITCH_ENUM_CAST (code))
1135    {
1136    case Swhitespace:
1137      insert_string ("whitespace"); break;
1138    case Spunct:
1139      insert_string ("punctuation"); break;
1140    case Sword:
1141      insert_string ("word"); break;
1142    case Ssymbol:
1143      insert_string ("symbol"); break;
1144    case Sopen:
1145      insert_string ("open"); break;
1146    case Sclose:
1147      insert_string ("close"); break;
1148    case Squote:
1149      insert_string ("prefix"); break;
1150    case Sstring:
1151      insert_string ("string"); break;
1152    case Smath:
1153      insert_string ("math"); break;
1154    case Sescape:
1155      insert_string ("escape"); break;
1156    case Scharquote:
1157      insert_string ("charquote"); break;
1158    case Scomment:
1159      insert_string ("comment"); break;
1160    case Sendcomment:
1161      insert_string ("endcomment"); break;
1162    case Sinherit:
1163      insert_string ("inherit"); break;
1164    case Scomment_fence:
1165      insert_string ("comment fence"); break;
1166    case Sstring_fence:
1167      insert_string ("string fence"); break;
1168    default:
1169      insert_string ("invalid");
1170      return syntax;
1171    }
1172
1173  if (!NILP (match_lisp))
1174    {
1175      insert_string (", matches ");
1176      insert_char (XINT (match_lisp));
1177    }
1178
1179  if (start1)
1180    insert_string (",\n\t  is the first character of a comment-start sequence");
1181  if (start2)
1182    insert_string (",\n\t  is the second character of a comment-start sequence");
1183
1184  if (end1)
1185    insert_string (",\n\t  is the first character of a comment-end sequence");
1186  if (end2)
1187    insert_string (",\n\t  is the second character of a comment-end sequence");
1188  if (comstyle)
1189    insert_string (" (comment style b)");
1190  if (comnested)
1191    insert_string (" (nestable)");
1192
1193  if (prefix)
1194    insert_string (",\n\t  is a prefix character for `backward-prefix-chars'");
1195
1196  return syntax;
1197}
1198
1199int parse_sexp_ignore_comments;
1200
1201/* Return the position across COUNT words from FROM.
1202   If that many words cannot be found before the end of the buffer, return 0.
1203   COUNT negative means scan backward and stop at word beginning.  */
1204
1205int
1206scan_words (from, count)
1207     register int from, count;
1208{
1209  register int beg = BEGV;
1210  register int end = ZV;
1211  register int from_byte = CHAR_TO_BYTE (from);
1212  register enum syntaxcode code;
1213  int ch0, ch1;
1214
1215  immediate_quit = 1;
1216  QUIT;
1217
1218  SETUP_SYNTAX_TABLE (from, count);
1219
1220  while (count > 0)
1221    {
1222      while (1)
1223	{
1224	  if (from == end)
1225	    {
1226	      immediate_quit = 0;
1227	      return 0;
1228	    }
1229	  UPDATE_SYNTAX_TABLE_FORWARD (from);
1230	  ch0 = FETCH_CHAR (from_byte);
1231	  code = SYNTAX (ch0);
1232	  INC_BOTH (from, from_byte);
1233	  if (words_include_escapes
1234	      && (code == Sescape || code == Scharquote))
1235	    break;
1236	  if (code == Sword)
1237	    break;
1238	}
1239      /* Now CH0 is a character which begins a word and FROM is the
1240         position of the next character.  */
1241      while (1)
1242	{
1243	  if (from == end) break;
1244	  UPDATE_SYNTAX_TABLE_FORWARD (from);
1245	  ch1 = FETCH_CHAR (from_byte);
1246	  code = SYNTAX (ch1);
1247	  if (!(words_include_escapes
1248		&& (code == Sescape || code == Scharquote)))
1249	    if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1250	      break;
1251	  INC_BOTH (from, from_byte);
1252	  ch0 = ch1;
1253	}
1254      count--;
1255    }
1256  while (count < 0)
1257    {
1258      while (1)
1259	{
1260	  if (from == beg)
1261	    {
1262	      immediate_quit = 0;
1263	      return 0;
1264	    }
1265	  DEC_BOTH (from, from_byte);
1266	  UPDATE_SYNTAX_TABLE_BACKWARD (from);
1267	  ch1 = FETCH_CHAR (from_byte);
1268	  code = SYNTAX (ch1);
1269	  if (words_include_escapes
1270	      && (code == Sescape || code == Scharquote))
1271	    break;
1272	  if (code == Sword)
1273	    break;
1274	}
1275      /* Now CH1 is a character which ends a word and FROM is the
1276         position of it.  */
1277      while (1)
1278	{
1279	  int temp_byte;
1280
1281	  if (from == beg)
1282	    break;
1283	  temp_byte = dec_bytepos (from_byte);
1284	  UPDATE_SYNTAX_TABLE_BACKWARD (from);
1285	  ch0 = FETCH_CHAR (temp_byte);
1286	  code = SYNTAX (ch0);
1287	  if (!(words_include_escapes
1288		&& (code == Sescape || code == Scharquote)))
1289	    if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1290	      break;
1291	  DEC_BOTH (from, from_byte);
1292	  ch1 = ch0;
1293	}
1294      count++;
1295    }
1296
1297  immediate_quit = 0;
1298
1299  return from;
1300}
1301
1302DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "p",
1303       doc: /* Move point forward ARG words (backward if ARG is negative).
1304Normally returns t.
1305If an edge of the buffer or a field boundary is reached, point is left there
1306and the function returns nil.  Field boundaries are not noticed if
1307`inhibit-field-text-motion' is non-nil.  */)
1308     (arg)
1309     Lisp_Object arg;
1310{
1311  Lisp_Object tmp;
1312  int orig_val, val;
1313
1314  if (NILP (arg))
1315    XSETFASTINT (arg, 1);
1316  else
1317    CHECK_NUMBER (arg);
1318
1319  val = orig_val = scan_words (PT, XINT (arg));
1320  if (! orig_val)
1321    val = XINT (arg) > 0 ? ZV : BEGV;
1322
1323  /* Avoid jumping out of an input field.  */
1324  tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1325			     Qt, Qnil, Qnil);
1326  val = XFASTINT (tmp);
1327
1328  SET_PT (val);
1329  return val == orig_val ? Qt : Qnil;
1330}
1331
1332Lisp_Object skip_chars ();
1333
1334DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1335       doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1336STRING is like the inside of a `[...]' in a regular expression
1337except that `]' is never special and `\\' quotes `^', `-' or `\\'
1338 (but not as the end of a range; quoting is never needed there).
1339Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1340With arg "^a-zA-Z", skips nonletters stopping before first letter.
1341Char classes, e.g. `[:alpha:]', are supported.
1342
1343Returns the distance traveled, either zero or positive.  */)
1344     (string, lim)
1345     Lisp_Object string, lim;
1346{
1347  return skip_chars (1, 0, string, lim, 1);
1348}
1349
1350DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1351       doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1352See `skip-chars-forward' for details.
1353Returns the distance traveled, either zero or negative.  */)
1354     (string, lim)
1355     Lisp_Object string, lim;
1356{
1357  return skip_chars (0, 0, string, lim, 1);
1358}
1359
1360DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1361       doc: /* Move point forward across chars in specified syntax classes.
1362SYNTAX is a string of syntax code characters.
1363Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1364If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1365This function returns the distance traveled, either zero or positive.  */)
1366     (syntax, lim)
1367     Lisp_Object syntax, lim;
1368{
1369  return skip_chars (1, 1, syntax, lim, 0);
1370}
1371
1372DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1373       doc: /* Move point backward across chars in specified syntax classes.
1374SYNTAX is a string of syntax code characters.
1375Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1376If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1377This function returns the distance traveled, either zero or negative.  */)
1378     (syntax, lim)
1379     Lisp_Object syntax, lim;
1380{
1381  return skip_chars (0, 1, syntax, lim, 0);
1382}
1383
1384static Lisp_Object
1385skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes)
1386     int forwardp, syntaxp;
1387     Lisp_Object string, lim;
1388     int handle_iso_classes;
1389{
1390  register unsigned int c;
1391  unsigned char fastmap[0400];
1392  /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1393     of which codes don't fit in FASTMAP.  In that case, set the
1394     ranges of characters in CHAR_RANGES.  */
1395  int *char_ranges;
1396  int n_char_ranges = 0;
1397  int negate = 0;
1398  register int i, i_byte;
1399  int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1400  int string_multibyte;
1401  int size_byte;
1402  const unsigned char *str;
1403  int len;
1404  Lisp_Object iso_classes;
1405
1406  CHECK_STRING (string);
1407  char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2);
1408  string_multibyte = STRING_MULTIBYTE (string);
1409  str = SDATA (string);
1410  size_byte = SBYTES (string);
1411  iso_classes = Qnil;
1412
1413  /* Adjust the multibyteness of the string to that of the buffer.  */
1414  if (multibyte != string_multibyte)
1415    {
1416      int nbytes;
1417
1418      if (multibyte)
1419	nbytes = count_size_as_multibyte (SDATA (string),
1420					  SCHARS (string));
1421      else
1422	nbytes = SCHARS (string);
1423      if (nbytes != size_byte)
1424	{
1425	  unsigned char *tmp = (unsigned char *) alloca (nbytes);
1426	  copy_text (SDATA (string), tmp, size_byte,
1427		     string_multibyte, multibyte);
1428	  size_byte = nbytes;
1429	  str = tmp;
1430	}
1431    }
1432
1433  if (NILP (lim))
1434    XSETINT (lim, forwardp ? ZV : BEGV);
1435  else
1436    CHECK_NUMBER_COERCE_MARKER (lim);
1437
1438  /* In any case, don't allow scan outside bounds of buffer.  */
1439  if (XINT (lim) > ZV)
1440    XSETFASTINT (lim, ZV);
1441  if (XINT (lim) < BEGV)
1442    XSETFASTINT (lim, BEGV);
1443
1444  bzero (fastmap, sizeof fastmap);
1445
1446  i_byte = 0;
1447
1448  if (i_byte < size_byte
1449      && SREF (string, 0) == '^')
1450    {
1451      negate = 1; i_byte++;
1452    }
1453
1454  /* Find the characters specified and set their elements of fastmap.
1455     If syntaxp, each character counts as itself.
1456     Otherwise, handle backslashes and ranges specially.  */
1457
1458  while (i_byte < size_byte)
1459    {
1460      c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
1461      i_byte += len;
1462
1463      if (syntaxp)
1464	fastmap[syntax_spec_code[c & 0377]] = 1;
1465      else
1466	{
1467	  if (handle_iso_classes && c == '['
1468	      && i_byte < size_byte
1469	      && STRING_CHAR (str + i_byte, size_byte - i_byte) == ':')
1470	    {
1471	      const unsigned char *class_beg = str + i_byte + 1;
1472	      const unsigned char *class_end = class_beg;
1473	      const unsigned char *class_limit = str + size_byte - 2;
1474	      /* Leave room for the null.  */
1475	      unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1476	      re_wctype_t cc;
1477
1478	      if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1479		class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1480
1481	      while (class_end < class_limit
1482		     && *class_end >= 'a' && *class_end <= 'z')
1483		class_end++;
1484
1485	      if (class_end == class_beg
1486		  || *class_end != ':' || class_end[1] != ']')
1487		goto not_a_class_name;
1488
1489	      bcopy (class_beg, class_name, class_end - class_beg);
1490	      class_name[class_end - class_beg] = 0;
1491
1492	      cc = re_wctype (class_name);
1493	      if (cc == 0)
1494		error ("Invalid ISO C character class");
1495
1496	      iso_classes = Fcons (make_number (cc), iso_classes);
1497
1498	      i_byte = class_end + 2 - str;
1499	      continue;
1500	    }
1501
1502	not_a_class_name:
1503	  if (c == '\\')
1504	    {
1505	      if (i_byte == size_byte)
1506		break;
1507
1508	      c = STRING_CHAR_AND_LENGTH (str + i_byte,
1509					  size_byte - i_byte, len);
1510	      i_byte += len;
1511	    }
1512	  /* Treat `-' as range character only if another character
1513	     follows.  */
1514	  if (i_byte + 1 < size_byte
1515	      && str[i_byte] == '-')
1516	    {
1517	      unsigned int c2;
1518
1519	      /* Skip over the dash.  */
1520	      i_byte++;
1521
1522	      /* Get the end of the range.  */
1523	      c2 = STRING_CHAR_AND_LENGTH (str + i_byte,
1524					   size_byte - i_byte, len);
1525	      i_byte += len;
1526
1527	      if (SINGLE_BYTE_CHAR_P (c))
1528		{
1529		  if (! SINGLE_BYTE_CHAR_P (c2))
1530		    {
1531		      /* Handle a range starting with a character of
1532			 less than 256, and ending with a character of
1533			 not less than 256.  Split that into two
1534			 ranges, the low one ending at 0377, and the
1535			 high one starting at the smallest character
1536			 in the charset of C2 and ending at C2.  */
1537		      int charset = CHAR_CHARSET (c2);
1538		      int c1 = MAKE_CHAR (charset, 0, 0);
1539
1540		      char_ranges[n_char_ranges++] = c1;
1541		      char_ranges[n_char_ranges++] = c2;
1542		      c2 = 0377;
1543		    }
1544		  while (c <= c2)
1545		    {
1546		      fastmap[c] = 1;
1547		      c++;
1548		    }
1549		}
1550	      else if (c <= c2)	/* Both C and C2 are multibyte char.  */
1551		{
1552		  char_ranges[n_char_ranges++] = c;
1553		  char_ranges[n_char_ranges++] = c2;
1554		}
1555	    }
1556	  else
1557	    {
1558	      if (SINGLE_BYTE_CHAR_P (c))
1559		fastmap[c] = 1;
1560	      else
1561		{
1562		  char_ranges[n_char_ranges++] = c;
1563		  char_ranges[n_char_ranges++] = c;
1564		}
1565	    }
1566	}
1567    }
1568
1569  /* If ^ was the first character, complement the fastmap.  */
1570  if (negate)
1571    for (i = 0; i < sizeof fastmap; i++)
1572      fastmap[i] ^= 1;
1573
1574  {
1575    int start_point = PT;
1576    int pos = PT;
1577    int pos_byte = PT_BYTE;
1578    unsigned char *p = PT_ADDR, *endp, *stop;
1579
1580    if (forwardp)
1581      {
1582	endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1583	stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1584      }
1585    else
1586      {
1587	endp = CHAR_POS_ADDR (XINT (lim));
1588	stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1589      }
1590
1591    immediate_quit = 1;
1592    if (syntaxp)
1593      {
1594        SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1595	if (forwardp)
1596	  {
1597	    if (multibyte)
1598	      while (1)
1599		{
1600		  int nbytes;
1601
1602		  if (p >= stop)
1603		    {
1604		      if (p >= endp)
1605			break;
1606		      p = GAP_END_ADDR;
1607		      stop = endp;
1608		    }
1609		  c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
1610		  if (! fastmap[(int) SYNTAX (c)])
1611		    break;
1612		  p += nbytes, pos++, pos_byte += nbytes;
1613		  UPDATE_SYNTAX_TABLE_FORWARD (pos);
1614		}
1615	    else
1616	      while (1)
1617		{
1618		  if (p >= stop)
1619		    {
1620		      if (p >= endp)
1621			break;
1622		      p = GAP_END_ADDR;
1623		      stop = endp;
1624		    }
1625		  if (! fastmap[(int) SYNTAX (*p)])
1626		    break;
1627		  p++, pos++;
1628		  UPDATE_SYNTAX_TABLE_FORWARD (pos);
1629		}
1630	  }
1631	else
1632	  {
1633	    if (multibyte)
1634	      while (1)
1635		{
1636		  unsigned char *prev_p;
1637		  int nbytes;
1638
1639		  if (p <= stop)
1640		    {
1641		      if (p <= endp)
1642			break;
1643		      p = GPT_ADDR;
1644		      stop = endp;
1645		    }
1646		  prev_p = p;
1647		  while (--p >= stop && ! CHAR_HEAD_P (*p));
1648		  PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
1649		  if (prev_p - p > nbytes)
1650		    p = prev_p - 1, c = *p, nbytes = 1;
1651		  else
1652		    c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
1653		  pos--, pos_byte -= nbytes;
1654		  UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1655		  if (! fastmap[(int) SYNTAX (c)])
1656		    {
1657		      pos++;
1658		      pos_byte += nbytes;
1659		      break;
1660		    }
1661		}
1662	    else
1663	      while (1)
1664		{
1665		  if (p <= stop)
1666		    {
1667		      if (p <= endp)
1668			break;
1669		      p = GPT_ADDR;
1670		      stop = endp;
1671		    }
1672		  UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1673		  if (! fastmap[(int) SYNTAX (p[-1])])
1674		    break;
1675		  p--, pos--;
1676		}
1677	  }
1678      }
1679    else
1680      {
1681	if (forwardp)
1682	  {
1683	    if (multibyte)
1684	      while (1)
1685		{
1686		  int nbytes;
1687
1688		  if (p >= stop)
1689		    {
1690		      if (p >= endp)
1691			break;
1692		      p = GAP_END_ADDR;
1693		      stop = endp;
1694		    }
1695		  c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
1696
1697		  if (! NILP (iso_classes) && in_classes (c, iso_classes))
1698		    {
1699		      if (negate)
1700			break;
1701		      else
1702			goto fwd_ok;
1703		    }
1704
1705		  if (SINGLE_BYTE_CHAR_P (c))
1706		    {
1707		      if (!fastmap[c])
1708			break;
1709		    }
1710		  else
1711		    {
1712		      /* If we are looking at a multibyte character,
1713			 we must look up the character in the table
1714			 CHAR_RANGES.  If there's no data in the
1715			 table, that character is not what we want to
1716			 skip.  */
1717
1718		      /* The following code do the right thing even if
1719			 n_char_ranges is zero (i.e. no data in
1720			 CHAR_RANGES).  */
1721		      for (i = 0; i < n_char_ranges; i += 2)
1722			if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1723			  break;
1724		      if (!(negate ^ (i < n_char_ranges)))
1725			break;
1726		    }
1727		fwd_ok:
1728		  p += nbytes, pos++, pos_byte += nbytes;
1729		}
1730	    else
1731	      while (1)
1732		{
1733		  if (p >= stop)
1734		    {
1735		      if (p >= endp)
1736			break;
1737		      p = GAP_END_ADDR;
1738		      stop = endp;
1739		    }
1740
1741		  if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1742		    {
1743		      if (negate)
1744			break;
1745		      else
1746			goto fwd_unibyte_ok;
1747		    }
1748
1749		  if (!fastmap[*p])
1750		    break;
1751		fwd_unibyte_ok:
1752		  p++, pos++;
1753		}
1754	  }
1755	else
1756	  {
1757	    if (multibyte)
1758	      while (1)
1759		{
1760		  unsigned char *prev_p;
1761		  int nbytes;
1762
1763		  if (p <= stop)
1764		    {
1765		      if (p <= endp)
1766			break;
1767		      p = GPT_ADDR;
1768		      stop = endp;
1769		    }
1770		  prev_p = p;
1771		  while (--p >= stop && ! CHAR_HEAD_P (*p));
1772		  PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
1773		  if (prev_p - p > nbytes)
1774		    p = prev_p - 1, c = *p, nbytes = 1;
1775		  else
1776		    c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
1777
1778		  if (! NILP (iso_classes) && in_classes (c, iso_classes))
1779		    {
1780		      if (negate)
1781			break;
1782		      else
1783			goto back_ok;
1784		    }
1785
1786		  if (SINGLE_BYTE_CHAR_P (c))
1787		    {
1788		      if (!fastmap[c])
1789			break;
1790		    }
1791		  else
1792		    {
1793		      /* See the comment in the previous similar code.  */
1794		      for (i = 0; i < n_char_ranges; i += 2)
1795			if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1796			  break;
1797		      if (!(negate ^ (i < n_char_ranges)))
1798			break;
1799		    }
1800		back_ok:
1801		  pos--, pos_byte -= nbytes;
1802		}
1803	    else
1804	      while (1)
1805		{
1806		  if (p <= stop)
1807		    {
1808		      if (p <= endp)
1809			break;
1810		      p = GPT_ADDR;
1811		      stop = endp;
1812		    }
1813
1814		  if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1815		    {
1816		      if (negate)
1817			break;
1818		      else
1819			goto back_unibyte_ok;
1820		    }
1821
1822		  if (!fastmap[p[-1]])
1823		    break;
1824		back_unibyte_ok:
1825		  p--, pos--;
1826		}
1827	  }
1828      }
1829
1830#if 0 /* Not needed now that a position in mid-character
1831	 cannot be specified in Lisp.  */
1832    if (multibyte
1833	/* INC_POS or DEC_POS might have moved POS over LIM.  */
1834	&& (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1835      pos = XINT (lim);
1836#endif
1837
1838    if (! multibyte)
1839      pos_byte = pos;
1840
1841    SET_PT_BOTH (pos, pos_byte);
1842    immediate_quit = 0;
1843
1844    return make_number (PT - start_point);
1845  }
1846}
1847
1848/* Return 1 if character C belongs to one of the ISO classes
1849   in the list ISO_CLASSES.  Each class is represented by an
1850   integer which is its type according to re_wctype.  */
1851
1852static int
1853in_classes (c, iso_classes)
1854     int c;
1855     Lisp_Object iso_classes;
1856{
1857  int fits_class = 0;
1858
1859  while (! NILP (iso_classes))
1860    {
1861      Lisp_Object elt;
1862      elt = XCAR (iso_classes);
1863      iso_classes = XCDR (iso_classes);
1864
1865      if (re_iswctype (c, XFASTINT (elt)))
1866	fits_class = 1;
1867    }
1868
1869  return fits_class;
1870}
1871
1872/* Jump over a comment, assuming we are at the beginning of one.
1873   FROM is the current position.
1874   FROM_BYTE is the bytepos corresponding to FROM.
1875   Do not move past STOP (a charpos).
1876   The comment over which we have to jump is of style STYLE
1877     (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1878   NESTING should be positive to indicate the nesting at the beginning
1879     for nested comments and should be zero or negative else.
1880     ST_COMMENT_STYLE cannot be nested.
1881   PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1882     (or 0 If the search cannot start in the middle of a two-character).
1883
1884   If successful, return 1 and store the charpos of the comment's end
1885   into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1886   Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1887   corresponding bytepos into *BYTEPOS_PTR and the current nesting
1888   (as defined for state.incomment) in *INCOMMENT_PTR.
1889
1890   The comment end is the last character of the comment rather than the
1891     character just after the comment.
1892
1893   Global syntax data is assumed to initially be valid for FROM and
1894   remains valid for forward search starting at the returned position. */
1895
1896static int
1897forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1898	      charpos_ptr, bytepos_ptr, incomment_ptr)
1899     EMACS_INT from, from_byte, stop;
1900     int nesting, style, prev_syntax;
1901     EMACS_INT *charpos_ptr, *bytepos_ptr;
1902     int *incomment_ptr;
1903{
1904  register int c, c1;
1905  register enum syntaxcode code;
1906  register int syntax;
1907
1908  if (nesting <= 0) nesting = -1;
1909
1910  /* Enter the loop in the middle so that we find
1911     a 2-char comment ender if we start in the middle of it.  */
1912  syntax = prev_syntax;
1913  if (syntax != 0) goto forw_incomment;
1914
1915  while (1)
1916    {
1917      if (from == stop)
1918	{
1919	  *incomment_ptr = nesting;
1920	  *charpos_ptr = from;
1921	  *bytepos_ptr = from_byte;
1922	  return 0;
1923	}
1924      c = FETCH_CHAR (from_byte);
1925      syntax = SYNTAX_WITH_FLAGS (c);
1926      code = syntax & 0xff;
1927      if (code == Sendcomment
1928	  && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1929	  && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
1930	      (nesting > 0 && --nesting == 0) : nesting < 0))
1931	/* we have encountered a comment end of the same style
1932	   as the comment sequence which began this comment
1933	   section */
1934	break;
1935      if (code == Scomment_fence
1936	  && style == ST_COMMENT_STYLE)
1937	/* we have encountered a comment end of the same style
1938	   as the comment sequence which began this comment
1939	   section.  */
1940	break;
1941      if (nesting > 0
1942	  && code == Scomment
1943	  && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
1944	  && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1945	/* we have encountered a nested comment of the same style
1946	   as the comment sequence which began this comment section */
1947	nesting++;
1948      INC_BOTH (from, from_byte);
1949      UPDATE_SYNTAX_TABLE_FORWARD (from);
1950
1951    forw_incomment:
1952      if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1953	  && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1954	  && (c1 = FETCH_CHAR (from_byte),
1955	      SYNTAX_COMEND_SECOND (c1))
1956	  && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1957	       SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
1958	{
1959	  if (--nesting <= 0)
1960	    /* we have encountered a comment end of the same style
1961	       as the comment sequence which began this comment
1962	       section */
1963	    break;
1964	  else
1965	    {
1966	      INC_BOTH (from, from_byte);
1967	      UPDATE_SYNTAX_TABLE_FORWARD (from);
1968	    }
1969	}
1970      if (nesting > 0
1971	  && from < stop
1972	  && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1973	  && (c1 = FETCH_CHAR (from_byte),
1974	      SYNTAX_COMMENT_STYLE (c1) == style
1975	      && SYNTAX_COMSTART_SECOND (c1))
1976	  && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1977	      SYNTAX_COMMENT_NESTED (c1)))
1978	/* we have encountered a nested comment of the same style
1979	   as the comment sequence which began this comment
1980	   section */
1981	{
1982	  INC_BOTH (from, from_byte);
1983	  UPDATE_SYNTAX_TABLE_FORWARD (from);
1984	  nesting++;
1985	}
1986    }
1987  *charpos_ptr = from;
1988  *bytepos_ptr = from_byte;
1989  return 1;
1990}
1991
1992DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1993       doc: /*
1994Move forward across up to COUNT comments.  If COUNT is negative, move backward.
1995Stop scanning if we find something other than a comment or whitespace.
1996Set point to where scanning stops.
1997If COUNT comments are found as expected, with nothing except whitespace
1998between them, return t; otherwise return nil.  */)
1999     (count)
2000     Lisp_Object count;
2001{
2002  register EMACS_INT from;
2003  EMACS_INT from_byte;
2004  register EMACS_INT stop;
2005  register int c, c1;
2006  register enum syntaxcode code;
2007  int comstyle = 0;	    /* style of comment encountered */
2008  int comnested = 0;	    /* whether the comment is nestable or not */
2009  int found;
2010  EMACS_INT count1;
2011  EMACS_INT out_charpos, out_bytepos;
2012  int dummy;
2013
2014  CHECK_NUMBER (count);
2015  count1 = XINT (count);
2016  stop = count1 > 0 ? ZV : BEGV;
2017
2018  immediate_quit = 1;
2019  QUIT;
2020
2021  from = PT;
2022  from_byte = PT_BYTE;
2023
2024  SETUP_SYNTAX_TABLE (from, count1);
2025  while (count1 > 0)
2026    {
2027      do
2028	{
2029	  int comstart_first;
2030
2031	  if (from == stop)
2032	    {
2033	      SET_PT_BOTH (from, from_byte);
2034	      immediate_quit = 0;
2035	      return Qnil;
2036	    }
2037	  c = FETCH_CHAR (from_byte);
2038	  code = SYNTAX (c);
2039	  comstart_first = SYNTAX_COMSTART_FIRST (c);
2040	  comnested = SYNTAX_COMMENT_NESTED (c);
2041	  comstyle = SYNTAX_COMMENT_STYLE (c);
2042	  INC_BOTH (from, from_byte);
2043	  UPDATE_SYNTAX_TABLE_FORWARD (from);
2044	  if (from < stop && comstart_first
2045	      && (c1 = FETCH_CHAR (from_byte),
2046		  SYNTAX_COMSTART_SECOND (c1)))
2047	    {
2048	      /* We have encountered a comment start sequence and we
2049		 are ignoring all text inside comments.  We must record
2050		 the comment style this sequence begins so that later,
2051		 only a comment end of the same style actually ends
2052		 the comment section.  */
2053	      code = Scomment;
2054	      comstyle = SYNTAX_COMMENT_STYLE (c1);
2055	      comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2056	      INC_BOTH (from, from_byte);
2057	      UPDATE_SYNTAX_TABLE_FORWARD (from);
2058	    }
2059	}
2060      while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2061
2062      if (code == Scomment_fence)
2063	comstyle = ST_COMMENT_STYLE;
2064      else if (code != Scomment)
2065	{
2066	  immediate_quit = 0;
2067	  DEC_BOTH (from, from_byte);
2068	  SET_PT_BOTH (from, from_byte);
2069	  return Qnil;
2070	}
2071      /* We're at the start of a comment.  */
2072      found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2073			    &out_charpos, &out_bytepos, &dummy);
2074      from = out_charpos; from_byte = out_bytepos;
2075      if (!found)
2076	{
2077	  immediate_quit = 0;
2078	  SET_PT_BOTH (from, from_byte);
2079	  return Qnil;
2080	}
2081      INC_BOTH (from, from_byte);
2082      UPDATE_SYNTAX_TABLE_FORWARD (from);
2083      /* We have skipped one comment.  */
2084      count1--;
2085    }
2086
2087  while (count1 < 0)
2088    {
2089      while (1)
2090	{
2091	  int quoted;
2092
2093	  if (from <= stop)
2094	    {
2095	      SET_PT_BOTH (BEGV, BEGV_BYTE);
2096	      immediate_quit = 0;
2097	      return Qnil;
2098	    }
2099
2100	  DEC_BOTH (from, from_byte);
2101	  /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from).  */
2102	  quoted = char_quoted (from, from_byte);
2103	  c = FETCH_CHAR (from_byte);
2104	  code = SYNTAX (c);
2105	  comstyle = 0;
2106	  comnested = SYNTAX_COMMENT_NESTED (c);
2107	  if (code == Sendcomment)
2108	    comstyle = SYNTAX_COMMENT_STYLE (c);
2109	  if (from > stop && SYNTAX_COMEND_SECOND (c)
2110	      && prev_char_comend_first (from, from_byte)
2111	      && !char_quoted (from - 1, dec_bytepos (from_byte)))
2112	    {
2113	      /* We must record the comment style encountered so that
2114		 later, we can match only the proper comment begin
2115		 sequence of the same style.  */
2116	      DEC_BOTH (from, from_byte);
2117	      code = Sendcomment;
2118	      /* Calling char_quoted, above, set up global syntax position
2119		 at the new value of FROM.  */
2120	      c1 = FETCH_CHAR (from_byte);
2121	      comstyle = SYNTAX_COMMENT_STYLE (c1);
2122	      comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2123	    }
2124
2125	  if (code == Scomment_fence)
2126	    {
2127	      /* Skip until first preceding unquoted comment_fence.  */
2128	      int found = 0, ini = from, ini_byte = from_byte;
2129
2130	      while (1)
2131		{
2132		  DEC_BOTH (from, from_byte);
2133		  UPDATE_SYNTAX_TABLE_BACKWARD (from);
2134		  c = FETCH_CHAR (from_byte);
2135		  if (SYNTAX (c) == Scomment_fence
2136		      && !char_quoted (from, from_byte))
2137		    {
2138		      found = 1;
2139		      break;
2140		    }
2141		  else if (from == stop)
2142		    break;
2143		}
2144	      if (found == 0)
2145		{
2146		  from = ini;		/* Set point to ini + 1.  */
2147		  from_byte = ini_byte;
2148		  goto leave;
2149		}
2150 	      else
2151		/* We have skipped one comment.  */
2152		break;
2153	    }
2154	  else if (code == Sendcomment)
2155	    {
2156	      found = back_comment (from, from_byte, stop, comnested, comstyle,
2157				    &out_charpos, &out_bytepos);
2158	      if (found == -1)
2159		{
2160		  if (c == '\n')
2161		    /* This end-of-line is not an end-of-comment.
2162		       Treat it like a whitespace.
2163		       CC-mode (and maybe others) relies on this behavior.  */
2164		    ;
2165		  else
2166		    {
2167		      /* Failure: we should go back to the end of this
2168			 not-quite-endcomment.  */
2169		      if (SYNTAX(c) != code)
2170			/* It was a two-char Sendcomment.  */
2171			INC_BOTH (from, from_byte);
2172		      goto leave;
2173		    }
2174		}
2175	      else
2176		{
2177		  /* We have skipped one comment.  */
2178		  from = out_charpos, from_byte = out_bytepos;
2179		  break;
2180		}
2181	    }
2182	  else if (code != Swhitespace || quoted)
2183	    {
2184	    leave:
2185	      immediate_quit = 0;
2186	      INC_BOTH (from, from_byte);
2187	      SET_PT_BOTH (from, from_byte);
2188	      return Qnil;
2189	    }
2190	}
2191
2192      count1++;
2193    }
2194
2195  SET_PT_BOTH (from, from_byte);
2196  immediate_quit = 0;
2197  return Qt;
2198}
2199
2200/* Return syntax code of character C if C is a single byte character
2201   or `multibyte_symbol_p' is zero.  Otherwise, return Ssymbol.  */
2202
2203#define SYNTAX_WITH_MULTIBYTE_CHECK(c)			\
2204  ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p)	\
2205   ? SYNTAX (c) : Ssymbol)
2206
2207static Lisp_Object
2208scan_lists (from, count, depth, sexpflag)
2209     register EMACS_INT from;
2210     EMACS_INT count, depth;
2211     int sexpflag;
2212{
2213  Lisp_Object val;
2214  register EMACS_INT stop = count > 0 ? ZV : BEGV;
2215  register int c, c1;
2216  int stringterm;
2217  int quoted;
2218  int mathexit = 0;
2219  register enum syntaxcode code, temp_code;
2220  int min_depth = depth;    /* Err out if depth gets less than this.  */
2221  int comstyle = 0;	    /* style of comment encountered */
2222  int comnested = 0;	    /* whether the comment is nestable or not */
2223  EMACS_INT temp_pos;
2224  EMACS_INT last_good = from;
2225  int found;
2226  EMACS_INT from_byte;
2227  EMACS_INT out_bytepos, out_charpos;
2228  int temp, dummy;
2229  int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2230
2231  if (depth > 0) min_depth = 0;
2232
2233  if (from > ZV) from = ZV;
2234  if (from < BEGV) from = BEGV;
2235
2236  from_byte = CHAR_TO_BYTE (from);
2237
2238  immediate_quit = 1;
2239  QUIT;
2240
2241  SETUP_SYNTAX_TABLE (from, count);
2242  while (count > 0)
2243    {
2244      while (from < stop)
2245	{
2246	  int comstart_first, prefix;
2247	  UPDATE_SYNTAX_TABLE_FORWARD (from);
2248	  c = FETCH_CHAR (from_byte);
2249	  code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2250	  comstart_first = SYNTAX_COMSTART_FIRST (c);
2251	  comnested = SYNTAX_COMMENT_NESTED (c);
2252	  comstyle = SYNTAX_COMMENT_STYLE (c);
2253	  prefix = SYNTAX_PREFIX (c);
2254	  if (depth == min_depth)
2255	    last_good = from;
2256	  INC_BOTH (from, from_byte);
2257	  UPDATE_SYNTAX_TABLE_FORWARD (from);
2258	  if (from < stop && comstart_first
2259	      && (c = FETCH_CHAR (from_byte), SYNTAX_COMSTART_SECOND (c))
2260	      && parse_sexp_ignore_comments)
2261	    {
2262	      /* we have encountered a comment start sequence and we
2263		 are ignoring all text inside comments.  We must record
2264		 the comment style this sequence begins so that later,
2265		 only a comment end of the same style actually ends
2266		 the comment section */
2267	      code = Scomment;
2268	      c1 = FETCH_CHAR (from_byte);
2269	      comstyle = SYNTAX_COMMENT_STYLE (c1);
2270	      comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2271	      INC_BOTH (from, from_byte);
2272	      UPDATE_SYNTAX_TABLE_FORWARD (from);
2273	    }
2274
2275	  if (prefix)
2276	    continue;
2277
2278	  switch (SWITCH_ENUM_CAST (code))
2279	    {
2280	    case Sescape:
2281	    case Scharquote:
2282	      if (from == stop) goto lose;
2283	      INC_BOTH (from, from_byte);
2284	      /* treat following character as a word constituent */
2285	    case Sword:
2286	    case Ssymbol:
2287	      if (depth || !sexpflag) break;
2288	      /* This word counts as a sexp; return at end of it.  */
2289	      while (from < stop)
2290		{
2291		  UPDATE_SYNTAX_TABLE_FORWARD (from);
2292
2293		  /* Some compilers can't handle this inside the switch.  */
2294		  c = FETCH_CHAR (from_byte);
2295		  temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2296		  switch (temp)
2297		    {
2298		    case Scharquote:
2299		    case Sescape:
2300		      INC_BOTH (from, from_byte);
2301		      if (from == stop) goto lose;
2302		      break;
2303		    case Sword:
2304		    case Ssymbol:
2305		    case Squote:
2306		      break;
2307		    default:
2308		      goto done;
2309		    }
2310		  INC_BOTH (from, from_byte);
2311		}
2312	      goto done;
2313
2314	    case Scomment_fence:
2315	      comstyle = ST_COMMENT_STYLE;
2316	      /* FALLTHROUGH */
2317	    case Scomment:
2318	      if (!parse_sexp_ignore_comments) break;
2319	      UPDATE_SYNTAX_TABLE_FORWARD (from);
2320	      found = forw_comment (from, from_byte, stop,
2321				    comnested, comstyle, 0,
2322				    &out_charpos, &out_bytepos, &dummy);
2323	      from = out_charpos, from_byte = out_bytepos;
2324	      if (!found)
2325		{
2326		  if (depth == 0)
2327		    goto done;
2328		  goto lose;
2329		}
2330	      INC_BOTH (from, from_byte);
2331	      UPDATE_SYNTAX_TABLE_FORWARD (from);
2332	      break;
2333
2334	    case Smath:
2335	      if (!sexpflag)
2336		break;
2337	      if (from != stop && c == FETCH_CHAR (from_byte))
2338		{
2339		  INC_BOTH (from, from_byte);
2340		}
2341	      if (mathexit)
2342		{
2343		  mathexit = 0;
2344		  goto close1;
2345		}
2346	      mathexit = 1;
2347
2348	    case Sopen:
2349	      if (!++depth) goto done;
2350	      break;
2351
2352	    case Sclose:
2353	    close1:
2354	      if (!--depth) goto done;
2355	      if (depth < min_depth)
2356		xsignal3 (Qscan_error,
2357			  build_string ("Containing expression ends prematurely"),
2358			  make_number (last_good), make_number (from));
2359	      break;
2360
2361	    case Sstring:
2362	    case Sstring_fence:
2363	      temp_pos = dec_bytepos (from_byte);
2364	      stringterm = FETCH_CHAR (temp_pos);
2365	      while (1)
2366		{
2367		  if (from >= stop) goto lose;
2368		  UPDATE_SYNTAX_TABLE_FORWARD (from);
2369		  c = FETCH_CHAR (from_byte);
2370		  if (code == Sstring
2371		      ? (c == stringterm
2372			 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2373		      : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2374		    break;
2375
2376		  /* Some compilers can't handle this inside the switch.  */
2377		  temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2378		  switch (temp)
2379		    {
2380		    case Scharquote:
2381		    case Sescape:
2382		      INC_BOTH (from, from_byte);
2383		    }
2384		  INC_BOTH (from, from_byte);
2385		}
2386	      INC_BOTH (from, from_byte);
2387	      if (!depth && sexpflag) goto done;
2388	      break;
2389	    default:
2390	      /* Ignore whitespace, punctuation, quote, endcomment.  */
2391	      break;
2392	    }
2393	}
2394
2395      /* Reached end of buffer.  Error if within object, return nil if between */
2396      if (depth) goto lose;
2397
2398      immediate_quit = 0;
2399      return Qnil;
2400
2401      /* End of object reached */
2402    done:
2403      count--;
2404    }
2405
2406  while (count < 0)
2407    {
2408      while (from > stop)
2409	{
2410	  DEC_BOTH (from, from_byte);
2411	  UPDATE_SYNTAX_TABLE_BACKWARD (from);
2412	  c = FETCH_CHAR (from_byte);
2413	  code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2414	  if (depth == min_depth)
2415	    last_good = from;
2416	  comstyle = 0;
2417	  comnested = SYNTAX_COMMENT_NESTED (c);
2418	  if (code == Sendcomment)
2419	    comstyle = SYNTAX_COMMENT_STYLE (c);
2420	  if (from > stop && SYNTAX_COMEND_SECOND (c)
2421	      && prev_char_comend_first (from, from_byte)
2422	      && parse_sexp_ignore_comments)
2423	    {
2424	      /* We must record the comment style encountered so that
2425		 later, we can match only the proper comment begin
2426		 sequence of the same style.  */
2427	      DEC_BOTH (from, from_byte);
2428	      UPDATE_SYNTAX_TABLE_BACKWARD (from);
2429	      code = Sendcomment;
2430	      c1 = FETCH_CHAR (from_byte);
2431	      comstyle = SYNTAX_COMMENT_STYLE (c1);
2432	      comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2433	    }
2434
2435	  /* Quoting turns anything except a comment-ender
2436	     into a word character.  Note that this cannot be true
2437	     if we decremented FROM in the if-statement above.  */
2438	  if (code != Sendcomment && char_quoted (from, from_byte))
2439	    {
2440	      DEC_BOTH (from, from_byte);
2441	      code = Sword;
2442	    }
2443	  else if (SYNTAX_PREFIX (c))
2444	    continue;
2445
2446	  switch (SWITCH_ENUM_CAST (code))
2447	    {
2448	    case Sword:
2449	    case Ssymbol:
2450	    case Sescape:
2451	    case Scharquote:
2452	      if (depth || !sexpflag) break;
2453	      /* This word counts as a sexp; count object finished
2454		 after passing it.  */
2455	      while (from > stop)
2456		{
2457		  temp_pos = from_byte;
2458		  if (! NILP (current_buffer->enable_multibyte_characters))
2459		    DEC_POS (temp_pos);
2460		  else
2461		    temp_pos--;
2462		  UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2463		  c1 = FETCH_CHAR (temp_pos);
2464		  temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2465		  /* Don't allow comment-end to be quoted.  */
2466		  if (temp_code == Sendcomment)
2467		    goto done2;
2468		  quoted = char_quoted (from - 1, temp_pos);
2469		  if (quoted)
2470		    {
2471		      DEC_BOTH (from, from_byte);
2472		      temp_pos = dec_bytepos (temp_pos);
2473		      UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2474		    }
2475		  c1 = FETCH_CHAR (temp_pos);
2476		  temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2477		  if (! (quoted || temp_code == Sword
2478			 || temp_code == Ssymbol
2479			 || temp_code == Squote))
2480            	    goto done2;
2481		  DEC_BOTH (from, from_byte);
2482		}
2483	      goto done2;
2484
2485	    case Smath:
2486	      if (!sexpflag)
2487		break;
2488	      temp_pos = dec_bytepos (from_byte);
2489	      UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2490	      if (from != stop && c == FETCH_CHAR (temp_pos))
2491		DEC_BOTH (from, from_byte);
2492	      if (mathexit)
2493		{
2494		  mathexit = 0;
2495		  goto open2;
2496		}
2497	      mathexit = 1;
2498
2499	    case Sclose:
2500	      if (!++depth) goto done2;
2501	      break;
2502
2503	    case Sopen:
2504	    open2:
2505	      if (!--depth) goto done2;
2506	      if (depth < min_depth)
2507		xsignal3 (Qscan_error,
2508			  build_string ("Containing expression ends prematurely"),
2509			  make_number (last_good), make_number (from));
2510	      break;
2511
2512	    case Sendcomment:
2513	      if (!parse_sexp_ignore_comments)
2514		break;
2515	      found = back_comment (from, from_byte, stop, comnested, comstyle,
2516				    &out_charpos, &out_bytepos);
2517	      /* FIXME:  if found == -1, then it really wasn't a comment-end.
2518		 For single-char Sendcomment, we can't do much about it apart
2519		 from skipping the char.
2520		 For 2-char endcomments, we could try again, taking both
2521		 chars as separate entities, but it's a lot of trouble
2522		 for very little gain, so we don't bother either.  -sm */
2523	      if (found != -1)
2524		from = out_charpos, from_byte = out_bytepos;
2525	      break;
2526
2527	    case Scomment_fence:
2528	    case Sstring_fence:
2529	      while (1)
2530		{
2531		  if (from == stop) goto lose;
2532		  DEC_BOTH (from, from_byte);
2533		  UPDATE_SYNTAX_TABLE_BACKWARD (from);
2534		  if (!char_quoted (from, from_byte)
2535		      && (c = FETCH_CHAR (from_byte),
2536			  SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2537		    break;
2538		}
2539	      if (code == Sstring_fence && !depth && sexpflag) goto done2;
2540	      break;
2541
2542	    case Sstring:
2543	      stringterm = FETCH_CHAR (from_byte);
2544	      while (1)
2545		{
2546		  if (from == stop) goto lose;
2547		  DEC_BOTH (from, from_byte);
2548		  UPDATE_SYNTAX_TABLE_BACKWARD (from);
2549		  if (!char_quoted (from, from_byte)
2550		      && stringterm == (c = FETCH_CHAR (from_byte))
2551		      && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2552		    break;
2553		}
2554	      if (!depth && sexpflag) goto done2;
2555	      break;
2556	    default:
2557	      /* Ignore whitespace, punctuation, quote, endcomment.  */
2558	      break;
2559	    }
2560	}
2561
2562      /* Reached start of buffer.  Error if within object, return nil if between */
2563      if (depth) goto lose;
2564
2565      immediate_quit = 0;
2566      return Qnil;
2567
2568    done2:
2569      count++;
2570    }
2571
2572
2573  immediate_quit = 0;
2574  XSETFASTINT (val, from);
2575  return val;
2576
2577 lose:
2578  xsignal3 (Qscan_error,
2579	    build_string ("Unbalanced parentheses"),
2580	    make_number (last_good), make_number (from));
2581}
2582
2583DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2584       doc: /* Scan from character number FROM by COUNT lists.
2585Returns the character number of the position thus found.
2586
2587If DEPTH is nonzero, paren depth begins counting from that value,
2588only places where the depth in parentheses becomes zero
2589are candidates for stopping; COUNT such places are counted.
2590Thus, a positive value for DEPTH means go out levels.
2591
2592Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2593
2594If the beginning or end of (the accessible part of) the buffer is reached
2595and the depth is wrong, an error is signaled.
2596If the depth is right but the count is not used up, nil is returned.  */)
2597     (from, count, depth)
2598     Lisp_Object from, count, depth;
2599{
2600  CHECK_NUMBER (from);
2601  CHECK_NUMBER (count);
2602  CHECK_NUMBER (depth);
2603
2604  return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2605}
2606
2607DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2608       doc: /* Scan from character number FROM by COUNT balanced expressions.
2609If COUNT is negative, scan backwards.
2610Returns the character number of the position thus found.
2611
2612Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2613
2614If the beginning or end of (the accessible part of) the buffer is reached
2615in the middle of a parenthetical grouping, an error is signaled.
2616If the beginning or end is reached between groupings
2617but before count is used up, nil is returned.  */)
2618     (from, count)
2619     Lisp_Object from, count;
2620{
2621  CHECK_NUMBER (from);
2622  CHECK_NUMBER (count);
2623
2624  return scan_lists (XINT (from), XINT (count), 0, 1);
2625}
2626
2627DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2628       0, 0, 0,
2629       doc: /* Move point backward over any number of chars with prefix syntax.
2630This includes chars with "quote" or "prefix" syntax (' or p).  */)
2631     ()
2632{
2633  int beg = BEGV;
2634  int opoint = PT;
2635  int opoint_byte = PT_BYTE;
2636  int pos = PT;
2637  int pos_byte = PT_BYTE;
2638  int c;
2639
2640  if (pos <= beg)
2641    {
2642      SET_PT_BOTH (opoint, opoint_byte);
2643
2644      return Qnil;
2645    }
2646
2647  SETUP_SYNTAX_TABLE (pos, -1);
2648
2649  DEC_BOTH (pos, pos_byte);
2650
2651  while (!char_quoted (pos, pos_byte)
2652	 /* Previous statement updates syntax table.  */
2653	 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
2654	     || SYNTAX_PREFIX (c)))
2655    {
2656      opoint = pos;
2657      opoint_byte = pos_byte;
2658
2659      if (pos + 1 > beg)
2660	DEC_BOTH (pos, pos_byte);
2661    }
2662
2663  SET_PT_BOTH (opoint, opoint_byte);
2664
2665  return Qnil;
2666}
2667
2668/* Parse forward from FROM / FROM_BYTE to END,
2669   assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2670   and return a description of the state of the parse at END.
2671   If STOPBEFORE is nonzero, stop at the start of an atom.
2672   If COMMENTSTOP is 1, stop at the start of a comment.
2673   If COMMENTSTOP is -1, stop at the start or end of a comment,
2674   after the beginning of a string, or after the end of a string.  */
2675
2676static void
2677scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
2678		    stopbefore, oldstate, commentstop)
2679     struct lisp_parse_state *stateptr;
2680     register int from;
2681     int end, targetdepth, stopbefore, from_byte;
2682     Lisp_Object oldstate;
2683     int commentstop;
2684{
2685  struct lisp_parse_state state;
2686
2687  register enum syntaxcode code;
2688  int c1;
2689  int comnested;
2690  struct level { int last, prev; };
2691  struct level levelstart[100];
2692  register struct level *curlevel = levelstart;
2693  struct level *endlevel = levelstart + 100;
2694  register int depth;	/* Paren depth of current scanning location.
2695			   level - levelstart equals this except
2696			   when the depth becomes negative.  */
2697  int mindepth;		/* Lowest DEPTH value seen.  */
2698  int start_quoted = 0;		/* Nonzero means starting after a char quote */
2699  Lisp_Object tem;
2700  int prev_from;		/* Keep one character before FROM.  */
2701  int prev_from_byte;
2702  int prev_from_syntax;
2703  int boundary_stop = commentstop == -1;
2704  int nofence;
2705  int found;
2706  EMACS_INT out_bytepos, out_charpos;
2707  int temp;
2708
2709  prev_from = from;
2710  prev_from_byte = from_byte;
2711  if (from != BEGV)
2712    DEC_BOTH (prev_from, prev_from_byte);
2713
2714  /* Use this macro instead of `from++'.  */
2715#define INC_FROM				\
2716do { prev_from = from;				\
2717     prev_from_byte = from_byte; 		\
2718     temp = FETCH_CHAR (prev_from_byte);	\
2719     prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
2720     INC_BOTH (from, from_byte);		\
2721     if (from < end)				\
2722       UPDATE_SYNTAX_TABLE_FORWARD (from);	\
2723  } while (0)
2724
2725  immediate_quit = 1;
2726  QUIT;
2727
2728  if (NILP (oldstate))
2729    {
2730      depth = 0;
2731      state.instring = -1;
2732      state.incomment = 0;
2733      state.comstyle = 0;	/* comment style a by default.  */
2734      state.comstr_start = -1;	/* no comment/string seen.  */
2735    }
2736  else
2737    {
2738      tem = Fcar (oldstate);
2739      if (!NILP (tem))
2740	depth = XINT (tem);
2741      else
2742	depth = 0;
2743
2744      oldstate = Fcdr (oldstate);
2745      oldstate = Fcdr (oldstate);
2746      oldstate = Fcdr (oldstate);
2747      tem = Fcar (oldstate);
2748      /* Check whether we are inside string_fence-style string: */
2749      state.instring = (!NILP (tem)
2750			? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2751			: -1);
2752
2753      oldstate = Fcdr (oldstate);
2754      tem = Fcar (oldstate);
2755      state.incomment = (!NILP (tem)
2756			 ? (INTEGERP (tem) ? XINT (tem) : -1)
2757			 : 0);
2758
2759      oldstate = Fcdr (oldstate);
2760      tem = Fcar (oldstate);
2761      start_quoted = !NILP (tem);
2762
2763      /* if the eighth element of the list is nil, we are in comment
2764	 style a.  If it is non-nil, we are in comment style b */
2765      oldstate = Fcdr (oldstate);
2766      oldstate = Fcdr (oldstate);
2767      tem = Fcar (oldstate);
2768      state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
2769					 ? ST_COMMENT_STYLE : 1);
2770
2771      oldstate = Fcdr (oldstate);
2772      tem = Fcar (oldstate);
2773      state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
2774      oldstate = Fcdr (oldstate);
2775      tem = Fcar (oldstate);
2776      while (!NILP (tem))		/* >= second enclosing sexps.  */
2777	{
2778	  /* curlevel++->last ran into compiler bug on Apollo */
2779	  curlevel->last = XINT (Fcar (tem));
2780	  if (++curlevel == endlevel)
2781	    curlevel--; /* error ("Nesting too deep for parser"); */
2782	  curlevel->prev = -1;
2783	  curlevel->last = -1;
2784	  tem = Fcdr (tem);
2785	}
2786    }
2787  state.quoted = 0;
2788  mindepth = depth;
2789
2790  curlevel->prev = -1;
2791  curlevel->last = -1;
2792
2793  SETUP_SYNTAX_TABLE (prev_from, 1);
2794  temp = FETCH_CHAR (prev_from_byte);
2795  prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
2796  UPDATE_SYNTAX_TABLE_FORWARD (from);
2797
2798  /* Enter the loop at a place appropriate for initial state.  */
2799
2800  if (state.incomment)
2801    goto startincomment;
2802  if (state.instring >= 0)
2803    {
2804      nofence = state.instring != ST_STRING_STYLE;
2805      if (start_quoted)
2806	goto startquotedinstring;
2807      goto startinstring;
2808    }
2809  else if (start_quoted)
2810    goto startquoted;
2811
2812  while (from < end)
2813    {
2814      INC_FROM;
2815      code = prev_from_syntax & 0xff;
2816
2817      if (from < end
2818	  && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
2819	  && (c1 = FETCH_CHAR (from_byte),
2820	      SYNTAX_COMSTART_SECOND (c1)))
2821	/* Duplicate code to avoid a complex if-expression
2822	   which causes trouble for the SGI compiler.  */
2823	{
2824	  /* Record the comment style we have entered so that only
2825	     the comment-end sequence of the same style actually
2826	     terminates the comment section.  */
2827	  state.comstyle = SYNTAX_COMMENT_STYLE (c1);
2828	  comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2829	  comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2830	  state.incomment = comnested ? 1 : -1;
2831	  state.comstr_start = prev_from;
2832	  INC_FROM;
2833	  code = Scomment;
2834	}
2835      else if (code == Scomment_fence)
2836	{
2837	  /* Record the comment style we have entered so that only
2838	     the comment-end sequence of the same style actually
2839	     terminates the comment section.  */
2840	  state.comstyle = ST_COMMENT_STYLE;
2841	  state.incomment = -1;
2842	  state.comstr_start = prev_from;
2843	  code = Scomment;
2844	}
2845      else if (code == Scomment)
2846	{
2847	  state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
2848	  state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2849			     1 : -1);
2850	  state.comstr_start = prev_from;
2851	}
2852
2853      if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
2854	continue;
2855      switch (SWITCH_ENUM_CAST (code))
2856	{
2857	case Sescape:
2858	case Scharquote:
2859	  if (stopbefore) goto stop;  /* this arg means stop at sexp start */
2860	  curlevel->last = prev_from;
2861	startquoted:
2862	  if (from == end) goto endquoted;
2863	  INC_FROM;
2864	  goto symstarted;
2865	  /* treat following character as a word constituent */
2866	case Sword:
2867	case Ssymbol:
2868	  if (stopbefore) goto stop;  /* this arg means stop at sexp start */
2869	  curlevel->last = prev_from;
2870	symstarted:
2871	  while (from < end)
2872	    {
2873	      /* Some compilers can't handle this inside the switch.  */
2874	      temp = FETCH_CHAR (from_byte);
2875	      temp = SYNTAX (temp);
2876	      switch (temp)
2877		{
2878		case Scharquote:
2879		case Sescape:
2880		  INC_FROM;
2881		  if (from == end) goto endquoted;
2882		  break;
2883		case Sword:
2884		case Ssymbol:
2885		case Squote:
2886		  break;
2887		default:
2888		  goto symdone;
2889		}
2890	      INC_FROM;
2891	    }
2892	symdone:
2893	  curlevel->prev = curlevel->last;
2894	  break;
2895
2896	case Scomment_fence: /* Can't happen because it's handled above.  */
2897	case Scomment:
2898	  if (commentstop || boundary_stop) goto done;
2899	startincomment:
2900	  /* The (from == BEGV) test was to enter the loop in the middle so
2901	     that we find a 2-char comment ender even if we start in the
2902	     middle of it.  We don't want to do that if we're just at the
2903	     beginning of the comment (think of (*) ... (*)).  */
2904	  found = forw_comment (from, from_byte, end,
2905				state.incomment, state.comstyle,
2906				(from == BEGV || from < state.comstr_start + 3)
2907				? 0 : prev_from_syntax,
2908				&out_charpos, &out_bytepos, &state.incomment);
2909	  from = out_charpos; from_byte = out_bytepos;
2910	  /* Beware!  prev_from and friends are invalid now.
2911	     Luckily, the `done' doesn't use them and the INC_FROM
2912	     sets them to a sane value without looking at them. */
2913	  if (!found) goto done;
2914	  INC_FROM;
2915	  state.incomment = 0;
2916	  state.comstyle = 0;	/* reset the comment style */
2917	  if (boundary_stop) goto done;
2918	  break;
2919
2920	case Sopen:
2921	  if (stopbefore) goto stop;  /* this arg means stop at sexp start */
2922	  depth++;
2923	  /* curlevel++->last ran into compiler bug on Apollo */
2924	  curlevel->last = prev_from;
2925	  if (++curlevel == endlevel)
2926	    curlevel--; /* error ("Nesting too deep for parser"); */
2927	  curlevel->prev = -1;
2928	  curlevel->last = -1;
2929	  if (targetdepth == depth) goto done;
2930	  break;
2931
2932	case Sclose:
2933	  depth--;
2934	  if (depth < mindepth)
2935	    mindepth = depth;
2936	  if (curlevel != levelstart)
2937	    curlevel--;
2938	  curlevel->prev = curlevel->last;
2939	  if (targetdepth == depth) goto done;
2940	  break;
2941
2942	case Sstring:
2943	case Sstring_fence:
2944	  state.comstr_start = from - 1;
2945	  if (stopbefore) goto stop;  /* this arg means stop at sexp start */
2946	  curlevel->last = prev_from;
2947	  state.instring = (code == Sstring
2948			    ? (FETCH_CHAR (prev_from_byte))
2949			    : ST_STRING_STYLE);
2950	  if (boundary_stop) goto done;
2951	startinstring:
2952	  {
2953	    nofence = state.instring != ST_STRING_STYLE;
2954
2955	    while (1)
2956	      {
2957		int c;
2958
2959		if (from >= end) goto done;
2960		c = FETCH_CHAR (from_byte);
2961		/* Some compilers can't handle this inside the switch.  */
2962		temp = SYNTAX (c);
2963
2964		/* Check TEMP here so that if the char has
2965		   a syntax-table property which says it is NOT
2966		   a string character, it does not end the string.  */
2967		if (nofence && c == state.instring && temp == Sstring)
2968		  break;
2969
2970		switch (temp)
2971		  {
2972		  case Sstring_fence:
2973		    if (!nofence) goto string_end;
2974		    break;
2975		  case Scharquote:
2976		  case Sescape:
2977		    INC_FROM;
2978		  startquotedinstring:
2979		    if (from >= end) goto endquoted;
2980		  }
2981		INC_FROM;
2982	      }
2983	  }
2984	string_end:
2985	  state.instring = -1;
2986	  curlevel->prev = curlevel->last;
2987	  INC_FROM;
2988	  if (boundary_stop) goto done;
2989	  break;
2990
2991	case Smath:
2992	  /* FIXME: We should do something with it.  */
2993	  break;
2994	default:
2995	  /* Ignore whitespace, punctuation, quote, endcomment.  */
2996	  break;
2997	}
2998    }
2999  goto done;
3000
3001 stop:   /* Here if stopping before start of sexp. */
3002  from = prev_from;    /* We have just fetched the char that starts it; */
3003  goto done; /* but return the position before it. */
3004
3005 endquoted:
3006  state.quoted = 1;
3007 done:
3008  state.depth = depth;
3009  state.mindepth = mindepth;
3010  state.thislevelstart = curlevel->prev;
3011  state.prevlevelstart
3012    = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3013  state.location = from;
3014  state.levelstarts = Qnil;
3015  while (--curlevel >= levelstart)
3016      state.levelstarts = Fcons (make_number (curlevel->last),
3017				 state.levelstarts);
3018  immediate_quit = 0;
3019
3020  *stateptr = state;
3021}
3022
3023DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3024       doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3025Parsing stops at TO or when certain criteria are met;
3026 point is set to where parsing stops.
3027If fifth arg OLDSTATE is omitted or nil,
3028 parsing assumes that FROM is the beginning of a function.
3029Value is a list of elements describing final state of parsing:
3030 0. depth in parens.
3031 1. character address of start of innermost containing list; nil if none.
3032 2. character address of start of last complete sexp terminated.
3033 3. non-nil if inside a string.
3034    (it is the character that will terminate the string,
3035     or t if the string should be terminated by a generic string delimiter.)
3036 4. nil if outside a comment, t if inside a non-nestable comment,
3037    else an integer (the current comment nesting).
3038 5. t if following a quote character.
3039 6. the minimum paren-depth encountered during this scan.
3040 7. t if in a comment of style b; symbol `syntax-table' if the comment
3041    should be terminated by a generic comment delimiter.
3042 8. character address of start of comment or string; nil if not in one.
3043 9. Intermediate data for continuation of parsing (subject to change).
3044If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3045in parentheses becomes equal to TARGETDEPTH.
3046Fourth arg STOPBEFORE non-nil means stop when come to
3047 any character that starts a sexp.
3048Fifth arg OLDSTATE is a list like what this function returns.
3049 It is used to initialize the state of the parse.  Elements number 1, 2, 6
3050 and 8 are ignored.
3051Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3052 If it is symbol `syntax-table', stop after the start of a comment or a
3053 string, or after end of a comment or a string.  */)
3054     (from, to, targetdepth, stopbefore, oldstate, commentstop)
3055     Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
3056{
3057  struct lisp_parse_state state;
3058  int target;
3059
3060  if (!NILP (targetdepth))
3061    {
3062      CHECK_NUMBER (targetdepth);
3063      target = XINT (targetdepth);
3064    }
3065  else
3066    target = -100000;		/* We won't reach this depth */
3067
3068  validate_region (&from, &to);
3069  scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3070		      XINT (to),
3071		      target, !NILP (stopbefore), oldstate,
3072		      (NILP (commentstop)
3073		       ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3074
3075  SET_PT (state.location);
3076
3077  return Fcons (make_number (state.depth),
3078	   Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
3079	     Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
3080	       Fcons (state.instring >= 0
3081		      ? (state.instring == ST_STRING_STYLE
3082			 ? Qt : make_number (state.instring)) : Qnil,
3083		 Fcons (state.incomment < 0 ? Qt :
3084			(state.incomment == 0 ? Qnil :
3085			 make_number (state.incomment)),
3086		   Fcons (state.quoted ? Qt : Qnil,
3087		     Fcons (make_number (state.mindepth),
3088		       Fcons ((state.comstyle
3089			       ? (state.comstyle == ST_COMMENT_STYLE
3090				  ? Qsyntax_table : Qt) :
3091			       Qnil),
3092			      Fcons (((state.incomment
3093				       || (state.instring >= 0))
3094				      ? make_number (state.comstr_start)
3095				      : Qnil),
3096				     Fcons (state.levelstarts, Qnil))))))))));
3097}
3098
3099void
3100init_syntax_once ()
3101{
3102  register int i, c;
3103  Lisp_Object temp;
3104
3105  /* This has to be done here, before we call Fmake_char_table.  */
3106  Qsyntax_table = intern ("syntax-table");
3107  staticpro (&Qsyntax_table);
3108
3109  /* Intern this now in case it isn't already done.
3110     Setting this variable twice is harmless.
3111     But don't staticpro it here--that is done in alloc.c.  */
3112  Qchar_table_extra_slots = intern ("char-table-extra-slots");
3113
3114  /* Create objects which can be shared among syntax tables.  */
3115  Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
3116  for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3117    XVECTOR (Vsyntax_code_object)->contents[i]
3118      = Fcons (make_number (i), Qnil);
3119
3120  /* Now we are ready to set up this property, so we can
3121     create syntax tables.  */
3122  Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3123
3124  temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3125
3126  Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3127
3128  /* Control characters should not be whitespace.  */
3129  temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3130  for (i = 0; i <= ' ' - 1; i++)
3131    SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3132  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3133
3134  /* Except that a few really are whitespace.  */
3135  temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3136  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3137  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3138  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3139  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3140  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3141
3142  temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3143  for (i = 'a'; i <= 'z'; i++)
3144    SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3145  for (i = 'A'; i <= 'Z'; i++)
3146    SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3147  for (i = '0'; i <= '9'; i++)
3148    SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3149
3150  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3151  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3152
3153  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3154			Fcons (make_number (Sopen), make_number (')')));
3155  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3156			Fcons (make_number (Sclose), make_number ('(')));
3157  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3158			Fcons (make_number (Sopen), make_number (']')));
3159  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3160			Fcons (make_number (Sclose), make_number ('[')));
3161  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3162			Fcons (make_number (Sopen), make_number ('}')));
3163  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3164			Fcons (make_number (Sclose), make_number ('{')));
3165  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3166			Fcons (make_number ((int) Sstring), Qnil));
3167  SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3168			Fcons (make_number ((int) Sescape), Qnil));
3169
3170  temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
3171  for (i = 0; i < 10; i++)
3172    {
3173      c = "_-+*/&|<>="[i];
3174      SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3175    }
3176
3177  temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3178  for (i = 0; i < 12; i++)
3179    {
3180      c = ".,;:?!#@~^'`"[i];
3181      SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3182    }
3183
3184  /* All multibyte characters have syntax `word' by default.  */
3185  temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3186  for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
3187    XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
3188}
3189
3190void
3191syms_of_syntax ()
3192{
3193  Qsyntax_table_p = intern ("syntax-table-p");
3194  staticpro (&Qsyntax_table_p);
3195
3196  staticpro (&Vsyntax_code_object);
3197
3198  staticpro (&gl_state.object);
3199  staticpro (&gl_state.global_code);
3200  staticpro (&gl_state.current_syntax_table);
3201  staticpro (&gl_state.old_prop);
3202
3203  /* Defined in regex.c */
3204  staticpro (&re_match_object);
3205
3206  Qscan_error = intern ("scan-error");
3207  staticpro (&Qscan_error);
3208  Fput (Qscan_error, Qerror_conditions,
3209	Fcons (Qscan_error, Fcons (Qerror, Qnil)));
3210  Fput (Qscan_error, Qerror_message,
3211	build_string ("Scan error"));
3212
3213  DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
3214	       doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace.  */);
3215
3216  DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
3217	       doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3218Otherwise, that text property is simply ignored.
3219See the info node `(elisp)Syntax Properties' for a description of the
3220`syntax-table' property.  */);
3221
3222  words_include_escapes = 0;
3223  DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
3224	       doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words.  */);
3225
3226  DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
3227	       doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol.  */);
3228  multibyte_syntax_as_symbol = 0;
3229
3230  DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3231	       &open_paren_in_column_0_is_defun_start,
3232	       doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun.  */);
3233  open_paren_in_column_0_is_defun_start = 1;
3234
3235  defsubr (&Ssyntax_table_p);
3236  defsubr (&Ssyntax_table);
3237  defsubr (&Sstandard_syntax_table);
3238  defsubr (&Scopy_syntax_table);
3239  defsubr (&Sset_syntax_table);
3240  defsubr (&Schar_syntax);
3241  defsubr (&Smatching_paren);
3242  defsubr (&Sstring_to_syntax);
3243  defsubr (&Smodify_syntax_entry);
3244  defsubr (&Sinternal_describe_syntax_value);
3245
3246  defsubr (&Sforward_word);
3247
3248  defsubr (&Sskip_chars_forward);
3249  defsubr (&Sskip_chars_backward);
3250  defsubr (&Sskip_syntax_forward);
3251  defsubr (&Sskip_syntax_backward);
3252
3253  defsubr (&Sforward_comment);
3254  defsubr (&Sscan_lists);
3255  defsubr (&Sscan_sexps);
3256  defsubr (&Sbackward_prefix_chars);
3257  defsubr (&Sparse_partial_sexp);
3258}
3259
3260/* arch-tag: 3e297b9f-088e-4b64-8f4c-fb0b3443e412
3261   (do not change this comment) */
3262