1/**********************************************************************
2  regsyntax.c -  Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2012  K.Takata  <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regint.h"
32
33const OnigSyntaxType OnigSyntaxASIS = {
34    0
35  , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
36  , 0
37  , ONIG_OPTION_NONE
38  ,
39  {
40      (OnigCodePoint )'\\'                       /* esc */
41    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
42    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
43    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
44    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
45    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
46  }
47};
48
49const OnigSyntaxType OnigSyntaxPosixBasic = {
50  ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
51    ONIG_SYN_OP_ESC_BRACE_INTERVAL )
52  , 0
53  , 0
54  , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
55  ,
56  {
57      (OnigCodePoint )'\\'                       /* esc */
58    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
59    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
60    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
61    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
62    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
63  }
64};
65
66const OnigSyntaxType OnigSyntaxPosixExtended = {
67  ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
68    ONIG_SYN_OP_BRACE_INTERVAL |
69    ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
70  , 0
71  , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
72      ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
73      ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
74      ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
75  , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
76  ,
77  {
78      (OnigCodePoint )'\\'                       /* esc */
79    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
80    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
81    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
82    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
83    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
84  }
85};
86
87const OnigSyntaxType OnigSyntaxEmacs = {
88  ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
89    ONIG_SYN_OP_ESC_BRACE_INTERVAL |
90    ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
91    ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
92    ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
93    ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
94  , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
95  , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
96  , ONIG_OPTION_NONE
97  ,
98  {
99      (OnigCodePoint )'\\'                       /* esc */
100    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
101    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
102    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
103    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
104    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
105  }
106};
107
108const OnigSyntaxType OnigSyntaxGrep = {
109  ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
110    ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
111    ONIG_SYN_OP_ESC_VBAR_ALT |
112    ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
113    ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
114    ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
115    ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
116  , 0
117  , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
118  , ONIG_OPTION_NONE
119  ,
120  {
121      (OnigCodePoint )'\\'                       /* esc */
122    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
123    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
124    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
125    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
126    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
127  }
128};
129
130const OnigSyntaxType OnigSyntaxGnuRegex = {
131  SYN_GNU_REGEX_OP
132  , 0
133  , SYN_GNU_REGEX_BV
134  , ONIG_OPTION_NONE
135  ,
136  {
137      (OnigCodePoint )'\\'                       /* esc */
138    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
139    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
140    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
141    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
142    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
143  }
144};
145
146const OnigSyntaxType OnigSyntaxJava = {
147  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
148     ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
149     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
150   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
151  , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
152      ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
153      ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
154      ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
155      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
156  , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
157  , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_ASCII_RANGE |
158      ONIG_OPTION_WORD_BOUND_ALL_RANGE )
159  ,
160  {
161      (OnigCodePoint )'\\'                       /* esc */
162    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
163    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
164    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
165    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
166    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
167  }
168};
169
170/* Perl 5.8 */
171const OnigSyntaxType OnigSyntaxPerl58 = {
172  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
173     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
174     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
175     ONIG_SYN_OP_ESC_C_CONTROL )
176   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
177  , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
178      ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
179      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
180      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
181      ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
182      ONIG_SYN_OP2_QMARK_LPAREN_CONDITION)
183  , SYN_GNU_REGEX_BV
184  , ONIG_OPTION_SINGLELINE
185  ,
186  {
187      (OnigCodePoint )'\\'                       /* esc */
188    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
189    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
190    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
191    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
192    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
193  }
194};
195
196/* Perl 5.8 + named group */
197const OnigSyntaxType OnigSyntaxPerl58_NG = {
198  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
199     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
200     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
201     ONIG_SYN_OP_ESC_C_CONTROL )
202   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
203  , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
204      ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
205      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
206      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
207      ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
208      ONIG_SYN_OP2_QMARK_LPAREN_CONDITION     |
209      ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP       |
210      ONIG_SYN_OP2_ESC_K_NAMED_BACKREF        |
211      ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
212  , ( SYN_GNU_REGEX_BV |
213      ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
214      ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
215  , ONIG_OPTION_SINGLELINE
216  ,
217  {
218      (OnigCodePoint )'\\'                       /* esc */
219    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
220    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
221    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
222    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
223    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
224  }
225};
226
227/* Perl 5.10+ */
228const OnigSyntaxType OnigSyntaxPerl = {
229  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
230     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
231     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
232     ONIG_SYN_OP_ESC_C_CONTROL )
233   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
234  , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
235      ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
236      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
237      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
238      ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
239      ONIG_SYN_OP2_QMARK_LPAREN_CONDITION |
240      ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
241      ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL |
242      ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK |
243      ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
244      ONIG_SYN_OP2_QMARK_SUBEXP_CALL |
245      ONIG_SYN_OP2_ESC_G_BRACE_BACKREF |
246      ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP |
247      ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |
248      ONIG_SYN_OP2_ESC_K_NAMED_BACKREF )
249  , ( SYN_GNU_REGEX_BV |
250      ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
251      ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL )
252  , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_CAPTURE_GROUP )
253  ,
254  {
255      (OnigCodePoint )'\\'                       /* esc */
256    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
257    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
258    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
259    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
260    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
261  }
262};
263
264const OnigSyntaxType OnigSyntaxPython = {
265  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
266     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
267     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
268     ONIG_SYN_OP_ESC_C_CONTROL )
269   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
270  , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
271      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
272      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
273      ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
274      ONIG_SYN_OP2_ESC_V_VTAB |
275      ONIG_SYN_OP2_ESC_U_HEX4 |
276      ONIG_SYN_OP2_QMARK_LPAREN_CONDITION |
277      ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP )
278  , ( SYN_GNU_REGEX_BV |
279      ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV )
280  , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_ASCII_RANGE )
281  ,
282  {
283      (OnigCodePoint )'\\'                       /* esc */
284    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
285    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
286    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
287    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
288    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
289  }
290};
291
292
293
294extern int
295onig_set_default_syntax(const OnigSyntaxType* syntax)
296{
297  if (IS_NULL(syntax))
298    syntax = ONIG_SYNTAX_RUBY;
299
300  OnigDefaultSyntax = syntax;
301  return 0;
302}
303
304extern void
305onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from)
306{
307  *to = *from;
308}
309
310extern void
311onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
312{
313  syntax->op = op;
314}
315
316extern void
317onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
318{
319  syntax->op2 = op2;
320}
321
322extern void
323onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
324{
325  syntax->behavior = behavior;
326}
327
328extern void
329onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
330{
331  syntax->options = options;
332}
333
334extern unsigned int
335onig_get_syntax_op(OnigSyntaxType* syntax)
336{
337  return syntax->op;
338}
339
340extern unsigned int
341onig_get_syntax_op2(OnigSyntaxType* syntax)
342{
343  return syntax->op2;
344}
345
346extern unsigned int
347onig_get_syntax_behavior(OnigSyntaxType* syntax)
348{
349  return syntax->behavior;
350}
351
352extern OnigOptionType
353onig_get_syntax_options(OnigSyntaxType* syntax)
354{
355  return syntax->options;
356}
357
358#ifdef USE_VARIABLE_META_CHARS
359extern int onig_set_meta_char(OnigSyntaxType* enc,
360                              unsigned int what, OnigCodePoint code)
361{
362  switch (what) {
363  case ONIG_META_CHAR_ESCAPE:
364    enc->meta_char_table.esc = code;
365    break;
366  case ONIG_META_CHAR_ANYCHAR:
367    enc->meta_char_table.anychar = code;
368    break;
369  case ONIG_META_CHAR_ANYTIME:
370    enc->meta_char_table.anytime = code;
371    break;
372  case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
373    enc->meta_char_table.zero_or_one_time = code;
374    break;
375  case ONIG_META_CHAR_ONE_OR_MORE_TIME:
376    enc->meta_char_table.one_or_more_time = code;
377    break;
378  case ONIG_META_CHAR_ANYCHAR_ANYTIME:
379    enc->meta_char_table.anychar_anytime = code;
380    break;
381  default:
382    return ONIGERR_INVALID_ARGUMENT;
383    break;
384  }
385  return 0;
386}
387#endif /* USE_VARIABLE_META_CHARS */
388