1/* Declarations having to do with GNU Emacs syntax tables.
2   Copyright (C) 1985, 1993, 1994, 1997, 1998, 2001, 2002, 2003, 2004,
3                 2005, 2006, 2007  Free Software Foundation, Inc.
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING.  If not, write to
19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA.  */
21
22
23extern Lisp_Object Qsyntax_table_p;
24extern void update_syntax_table P_ ((int, int, int, Lisp_Object));
25
26/* The standard syntax table is stored where it will automatically
27   be used in all new buffers.  */
28#define Vstandard_syntax_table buffer_defaults.syntax_table
29
30/* A syntax table is a chartable whose elements are cons cells
31   (CODE+FLAGS . MATCHING-CHAR).  MATCHING-CHAR can be nil if the char
32   is not a kind of parenthesis.
33
34   The low 8 bits of CODE+FLAGS is a code, as follows:  */
35
36enum syntaxcode
37  {
38    Swhitespace, /* for a whitespace character */
39    Spunct,	 /* for random punctuation characters */
40    Sword,	 /* for a word constituent */
41    Ssymbol,	 /* symbol constituent but not word constituent */
42    Sopen,	 /* for a beginning delimiter */
43    Sclose,      /* for an ending delimiter */
44    Squote,	 /* for a prefix character like Lisp ' */
45    Sstring,	 /* for a string-grouping character like Lisp " */
46    Smath,	 /* for delimiters like $ in Tex.  */
47    Sescape,	 /* for a character that begins a C-style escape */
48    Scharquote,  /* for a character that quotes the following character */
49    Scomment,    /* for a comment-starting character */
50    Sendcomment, /* for a comment-ending character */
51    Sinherit,    /* use the standard syntax table for this character */
52    Scomment_fence, /* Starts/ends comment which is delimited on the
53		       other side by any char with the same syntaxcode.  */
54    Sstring_fence,  /* Starts/ends string which is delimited on the
55		       other side by any char with the same syntaxcode.  */
56    Smax	 /* Upper bound on codes that are meaningful */
57  };
58
59/* Set the syntax entry VAL for char C in table TABLE.  */
60
61#define SET_RAW_SYNTAX_ENTRY(table, c, val)				\
62  ((((c) & 0xFF) == (c))						\
63   ? (XCHAR_TABLE (table)->contents[(unsigned char) (c)] = (val))	\
64   : Faset ((table), make_number (c), (val)))
65
66/* Fetch the syntax entry for char C in syntax table TABLE.
67   This macro is called only when C is less than CHAR_TABLE_ORDINARY_SLOTS.
68   Do inheritance.  */
69
70#ifdef __GNUC__
71#define SYNTAX_ENTRY_FOLLOW_PARENT(table, c)			\
72  ({ Lisp_Object _syntax_tbl = (table);				\
73     Lisp_Object _syntax_temp = XCHAR_TABLE (_syntax_tbl)->contents[(c)]; \
74     while (NILP (_syntax_temp))				\
75       {							\
76	 _syntax_tbl = XCHAR_TABLE (_syntax_tbl)->parent;	\
77	 if (NILP (_syntax_tbl))				\
78	   break;						\
79	 _syntax_temp = XCHAR_TABLE (_syntax_tbl)->contents[(c)]; \
80       }							\
81     _syntax_temp; })
82#else
83extern Lisp_Object syntax_temp;
84extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int));
85
86#define SYNTAX_ENTRY_FOLLOW_PARENT(table, c)	    	\
87  (syntax_temp = XCHAR_TABLE (table)->contents[(c)],	\
88   (NILP (syntax_temp)				    	\
89    ? syntax_parent_lookup (table, (c))		    	\
90    : syntax_temp))
91#endif
92
93/* SYNTAX_ENTRY fetches the information from the entry for character C
94   in syntax table TABLE, or from globally kept data (gl_state).
95   Does inheritance.  */
96/* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
97   position, it is either the buffer's syntax table, or syntax table
98   found in text properties.  */
99
100#ifdef SYNTAX_ENTRY_VIA_PROPERTY
101#  define SYNTAX_ENTRY(c)                                             \
102    (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
103#  define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
104#else
105#  define SYNTAX_ENTRY SYNTAX_ENTRY_INT
106#  define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
107#endif
108
109#define SYNTAX_ENTRY_INT(c)				\
110  ((((c) & 0xFF) == (c))				\
111   ? SYNTAX_ENTRY_FOLLOW_PARENT (CURRENT_SYNTAX_TABLE,	\
112				 (unsigned char) (c))	\
113   : Faref (CURRENT_SYNTAX_TABLE,			\
114	    make_number (c)))
115
116/* Extract the information from the entry for character C
117   in the current syntax table.  */
118
119#ifdef __GNUC__
120#define SYNTAX(c)							\
121  ({ Lisp_Object _syntax_temp;						\
122     _syntax_temp = SYNTAX_ENTRY (c);					\
123     (CONSP (_syntax_temp)						\
124      ? (enum syntaxcode) (XINT (XCAR (_syntax_temp)) & 0xff)		\
125      : Swhitespace); })
126
127#define SYNTAX_WITH_FLAGS(c)						\
128  ({ Lisp_Object _syntax_temp;						\
129     _syntax_temp = SYNTAX_ENTRY (c);					\
130     (CONSP (_syntax_temp)						\
131      ? XINT (XCAR (_syntax_temp))					\
132      : (int) Swhitespace); })
133
134#define SYNTAX_MATCH(c)							\
135  ({ Lisp_Object _syntax_temp;						\
136     _syntax_temp = SYNTAX_ENTRY (c);					\
137     (CONSP (_syntax_temp)						\
138      ? XCDR (_syntax_temp)						\
139      : Qnil); })
140#else
141#define SYNTAX(c)							\
142  (syntax_temp = SYNTAX_ENTRY ((c)),					\
143   (CONSP (syntax_temp)							\
144    ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff)	\
145    : Swhitespace))
146
147#define SYNTAX_WITH_FLAGS(c)						\
148  (syntax_temp = SYNTAX_ENTRY ((c)),					\
149   (CONSP (syntax_temp)							\
150    ? XINT (XCAR (syntax_temp))					\
151    : (int) Swhitespace))
152
153#define SYNTAX_MATCH(c)							\
154  (syntax_temp = SYNTAX_ENTRY ((c)),					\
155   (CONSP (syntax_temp)							\
156    ? XCDR (syntax_temp)						\
157    : Qnil))
158#endif
159
160/* Then there are seven single-bit flags that have the following meanings:
161  1. This character is the first of a two-character comment-start sequence.
162  2. This character is the second of a two-character comment-start sequence.
163  3. This character is the first of a two-character comment-end sequence.
164  4. This character is the second of a two-character comment-end sequence.
165  5. This character is a prefix, for backward-prefix-chars.
166  6. see below
167  7. This character is part of a nestable comment sequence.
168  Note that any two-character sequence whose first character has flag 1
169  and whose second character has flag 2 will be interpreted as a comment start.
170
171  bit 6 is used to discriminate between two different comment styles.
172  Languages such as C++ allow two orthogonal syntax start/end pairs
173  and bit 6 is used to determine whether a comment-end or Scommentend
174  ends style a or b.  Comment start sequences can start style a or b.
175  Style a is always the default.
176  */
177
178/* These macros extract a particular flag for a given character.  */
179
180#define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
181
182#define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
183
184#define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
185
186#define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
187
188#define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
189
190#define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
191
192#define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
193
194/* These macros extract specific flags from an integer
195   that holds the syntax code and the flags.  */
196
197#define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
198
199#define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
200
201#define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
202
203#define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
204
205#define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
206
207#define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
208
209#define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
210
211/* This array, indexed by a character, contains the syntax code which that
212 character signifies (as a char).  For example,
213 (enum syntaxcode) syntax_spec_code['w'] is Sword.  */
214
215extern unsigned char syntax_spec_code[0400];
216
217/* Indexed by syntax code, give the letter that describes it.  */
218
219extern char syntax_code_spec[16];
220
221/* Convert the byte offset BYTEPOS into a character position,
222   for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
223
224   The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
225   These macros do nothing when parse_sexp_lookup_properties is 0,
226   so we return 0 in that case, for speed.  */
227
228#define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos)				\
229  (! parse_sexp_lookup_properties					\
230   ? 0									\
231   : STRINGP (gl_state.object)						\
232   ? string_byte_to_char (gl_state.object, (bytepos))			\
233   : BUFFERP (gl_state.object)						\
234   ? buf_bytepos_to_charpos (XBUFFER (gl_state.object),			\
235			     (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1	\
236   : NILP (gl_state.object)						\
237   ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1		\
238   : (bytepos))
239
240/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
241   currently good for a position before CHARPOS.  */
242
243#define UPDATE_SYNTAX_TABLE_FORWARD(charpos)			\
244  (parse_sexp_lookup_properties					\
245   && (charpos) >= gl_state.e_property				\
246   ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,	\
247			   gl_state.object),			\
248      1)							\
249   : 0)
250
251/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
252   currently good for a position after CHARPOS.  */
253
254#define UPDATE_SYNTAX_TABLE_BACKWARD(charpos)			\
255  (parse_sexp_lookup_properties					\
256   && (charpos) < gl_state.b_property				\
257   ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0,	\
258			   gl_state.object),			\
259      1)							\
260   : 0)
261
262/* Make syntax table good for CHARPOS.  */
263
264#define UPDATE_SYNTAX_TABLE(charpos)				\
265  (parse_sexp_lookup_properties					\
266   && (charpos) < gl_state.b_property				\
267   ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0,	\
268			   gl_state.object),			\
269      1)							\
270   : (parse_sexp_lookup_properties				\
271      && (charpos) >= gl_state.e_property			\
272      ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
273			      gl_state.object),			\
274	 1)							\
275      : 0))
276
277/* This macro should be called with FROM at the start of forward
278   search, or after the last position of the backward search.  It
279   makes sure that the first char is picked up with correct table, so
280   one does not need to call UPDATE_SYNTAX_TABLE immediately after the
281   call.
282   Sign of COUNT gives the direction of the search.
283 */
284
285#define SETUP_SYNTAX_TABLE(FROM, COUNT)					\
286if (1)									\
287  {									\
288    gl_state.b_property = BEGV;						\
289    gl_state.e_property = ZV + 1;					\
290    gl_state.object = Qnil;						\
291    gl_state.use_global = 0;						\
292    gl_state.offset = 0;						\
293    gl_state.current_syntax_table = current_buffer->syntax_table;	\
294    if (parse_sexp_lookup_properties)					\
295      if ((COUNT) > 0 || (FROM) > BEGV)					\
296        update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
297			     1, Qnil);					\
298  }									\
299else
300
301/* Same as above, but in OBJECT.  If OBJECT is nil, use current buffer.
302   If it is t, ignore properties altogether.
303
304   This is meant for regex.c to use.  For buffers, regex.c passes arguments
305   to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
306   So if it is a buffer, we set the offset field to BEGV.  */
307
308#define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT)		\
309if (1)									\
310  {									\
311    gl_state.object = (OBJECT);						\
312    if (BUFFERP (gl_state.object))					\
313      {									\
314	struct buffer *buf = XBUFFER (gl_state.object);			\
315	gl_state.b_property = 1;					\
316	gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;	\
317	gl_state.offset = BUF_BEGV (buf) - 1;				\
318      }									\
319    else if (NILP (gl_state.object))					\
320      {									\
321	gl_state.b_property = 1;					\
322	gl_state.e_property = ZV - BEGV + 1;				\
323	gl_state.offset = BEGV - 1;					\
324      }									\
325    else if (EQ (gl_state.object, Qt))					\
326      {									\
327	gl_state.b_property = 0;					\
328	gl_state.e_property = 1500000000;				\
329	gl_state.offset = 0;						\
330      }									\
331    else								\
332      {									\
333	gl_state.b_property = 0;					\
334	gl_state.e_property = 1 + SCHARS (gl_state.object);		\
335	gl_state.offset = 0;						\
336      }									\
337    gl_state.use_global = 0;						\
338    gl_state.current_syntax_table = current_buffer->syntax_table;	\
339    if (parse_sexp_lookup_properties)					\
340      update_syntax_table (((FROM) + gl_state.offset			\
341			    + (COUNT > 0 ? 0 :  -1)),			\
342			   COUNT, 1, gl_state.object);			\
343  }									\
344else
345
346struct gl_state_s
347{
348  Lisp_Object object;			/* The object we are scanning. */
349  int start;				/* Where to stop. */
350  int stop;				/* Where to stop. */
351  int use_global;			/* Whether to use global_code
352					   or c_s_t. */
353  Lisp_Object global_code;		/* Syntax code of current char. */
354  Lisp_Object current_syntax_table;	/* Syntax table for current pos. */
355  Lisp_Object old_prop;			/* Syntax-table prop at prev pos. */
356  int b_property;			/* First index where c_s_t is valid. */
357  int e_property;			/* First index where c_s_t is
358					   not valid. */
359  INTERVAL forward_i;			/* Where to start lookup on forward */
360  INTERVAL backward_i;			/* or backward movement.  The
361					   data in c_s_t is valid
362					   between these intervals,
363					   and possibly at the
364					   intervals too, depending
365					   on: */
366  /* Offset for positions specified to UPDATE_SYNTAX_TABLE.  */
367  int offset;
368};
369
370extern struct gl_state_s gl_state;
371extern int parse_sexp_lookup_properties;
372extern INTERVAL interval_of P_ ((int, Lisp_Object));
373
374extern int scan_words P_ ((int, int));
375
376/* arch-tag: 28833cca-cd73-4741-8c85-a3111166a0e0
377   (do not change this comment) */
378