1#ifndef ONIGURUMA_REGPARSE_H
2#define ONIGURUMA_REGPARSE_H
3/**********************************************************************
4  regparse.h -  Onigmo (Oniguruma-mod) (regular expression library)
5**********************************************************************/
6/*-
7 * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
8 * Copyright (c) 2011       K.Takata  <kentkt AT csc DOT jp>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include "regint.h"
34
35#if defined __GNUC__ && __GNUC__ >= 4
36#pragma GCC visibility push(default)
37#endif
38
39/* node type */
40#define NT_STR         0
41#define NT_CCLASS      1
42#define NT_CTYPE       2
43#define NT_CANY        3
44#define NT_BREF        4
45#define NT_QTFR        5
46#define NT_ENCLOSE     6
47#define NT_ANCHOR      7
48#define NT_LIST        8
49#define NT_ALT         9
50#define NT_CALL       10
51
52/* node type bit */
53#define NTYPE2BIT(type)      (1<<(type))
54
55#define BIT_NT_STR        NTYPE2BIT(NT_STR)
56#define BIT_NT_CCLASS     NTYPE2BIT(NT_CCLASS)
57#define BIT_NT_CTYPE      NTYPE2BIT(NT_CTYPE)
58#define BIT_NT_CANY       NTYPE2BIT(NT_CANY)
59#define BIT_NT_BREF       NTYPE2BIT(NT_BREF)
60#define BIT_NT_QTFR       NTYPE2BIT(NT_QTFR)
61#define BIT_NT_ENCLOSE    NTYPE2BIT(NT_ENCLOSE)
62#define BIT_NT_ANCHOR     NTYPE2BIT(NT_ANCHOR)
63#define BIT_NT_LIST       NTYPE2BIT(NT_LIST)
64#define BIT_NT_ALT        NTYPE2BIT(NT_ALT)
65#define BIT_NT_CALL       NTYPE2BIT(NT_CALL)
66
67#define IS_NODE_TYPE_SIMPLE(type) \
68  ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
69                       BIT_NT_CANY | BIT_NT_BREF)) != 0)
70
71#define NTYPE(node)             ((node)->u.base.type)
72#define SET_NTYPE(node, ntype)   (node)->u.base.type = (ntype)
73
74#define NSTR(node)         (&((node)->u.str))
75#define NCCLASS(node)      (&((node)->u.cclass))
76#define NCTYPE(node)       (&((node)->u.ctype))
77#define NBREF(node)        (&((node)->u.bref))
78#define NQTFR(node)        (&((node)->u.qtfr))
79#define NENCLOSE(node)     (&((node)->u.enclose))
80#define NANCHOR(node)      (&((node)->u.anchor))
81#define NCONS(node)        (&((node)->u.cons))
82#define NCALL(node)        (&((node)->u.call))
83
84#define NCAR(node)         (NCONS(node)->car)
85#define NCDR(node)         (NCONS(node)->cdr)
86
87
88
89#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
90#define ANCHOR_END_BUF_MASK      (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
91
92#define ENCLOSE_MEMORY           (1<<0)
93#define ENCLOSE_OPTION           (1<<1)
94#define ENCLOSE_STOP_BACKTRACK   (1<<2)
95#define ENCLOSE_CONDITION        (1<<3)
96
97#define NODE_STR_MARGIN         16
98#define NODE_STR_BUF_SIZE       24  /* sizeof(CClassNode) - sizeof(int)*4 */
99#define NODE_BACKREFS_SIZE       6
100
101#define NSTR_RAW                (1<<0) /* by backslashed number */
102#define NSTR_AMBIG              (1<<1)
103#define NSTR_DONT_GET_OPT_INFO  (1<<2)
104
105#define NSTRING_LEN(node) (OnigDistance )((node)->u.str.end - (node)->u.str.s)
106#define NSTRING_SET_RAW(node)          (node)->u.str.flag |= NSTR_RAW
107#define NSTRING_CLEAR_RAW(node)        (node)->u.str.flag &= ~NSTR_RAW
108#define NSTRING_SET_AMBIG(node)        (node)->u.str.flag |= NSTR_AMBIG
109#define NSTRING_SET_DONT_GET_OPT_INFO(node) \
110  (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
111#define NSTRING_IS_RAW(node)          (((node)->u.str.flag & NSTR_RAW)   != 0)
112#define NSTRING_IS_AMBIG(node)        (((node)->u.str.flag & NSTR_AMBIG) != 0)
113#define NSTRING_IS_DONT_GET_OPT_INFO(node) \
114  (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
115
116#define BACKREFS_P(br) \
117  (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
118
119#define NQ_TARGET_ISNOT_EMPTY     0
120#define NQ_TARGET_IS_EMPTY        1
121#define NQ_TARGET_IS_EMPTY_MEM    2
122#define NQ_TARGET_IS_EMPTY_REC    3
123
124/* status bits */
125#define NST_MIN_FIXED             (1<<0)
126#define NST_MAX_FIXED             (1<<1)
127#define NST_CLEN_FIXED            (1<<2)
128#define NST_MARK1                 (1<<3)
129#define NST_MARK2                 (1<<4)
130#define NST_MEM_BACKREFED         (1<<5)
131#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
132#define NST_RECURSION             (1<<7)
133#define NST_CALLED                (1<<8)
134#define NST_ADDR_FIXED            (1<<9)
135#define NST_NAMED_GROUP           (1<<10)
136#define NST_NAME_REF              (1<<11)
137#define NST_IN_REPEAT             (1<<12) /* STK_REPEAT is nested in stack. */
138#define NST_NEST_LEVEL            (1<<13)
139#define NST_BY_NUMBER             (1<<14) /* {n,m} */
140
141#define SET_ENCLOSE_STATUS(node,f)      (node)->u.enclose.state |=  (f)
142#define CLEAR_ENCLOSE_STATUS(node,f)    (node)->u.enclose.state &= ~(f)
143
144#define IS_ENCLOSE_CALLED(en)          (((en)->state & NST_CALLED)        != 0)
145#define IS_ENCLOSE_ADDR_FIXED(en)      (((en)->state & NST_ADDR_FIXED)    != 0)
146#define IS_ENCLOSE_RECURSION(en)       (((en)->state & NST_RECURSION)     != 0)
147#define IS_ENCLOSE_MARK1(en)           (((en)->state & NST_MARK1)         != 0)
148#define IS_ENCLOSE_MARK2(en)           (((en)->state & NST_MARK2)         != 0)
149#define IS_ENCLOSE_MIN_FIXED(en)       (((en)->state & NST_MIN_FIXED)     != 0)
150#define IS_ENCLOSE_MAX_FIXED(en)       (((en)->state & NST_MAX_FIXED)     != 0)
151#define IS_ENCLOSE_CLEN_FIXED(en)      (((en)->state & NST_CLEN_FIXED)    != 0)
152#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
153    (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
154#define IS_ENCLOSE_NAMED_GROUP(en)     (((en)->state & NST_NAMED_GROUP)   != 0)
155#define IS_ENCLOSE_NAME_REF(en)        (((en)->state & NST_NAME_REF)      != 0)
156
157#define SET_CALL_RECURSION(node)       (node)->u.call.state |= NST_RECURSION
158#define IS_CALL_RECURSION(cn)          (((cn)->state & NST_RECURSION)  != 0)
159#define IS_CALL_NAME_REF(cn)           (((cn)->state & NST_NAME_REF)   != 0)
160#define IS_BACKREF_NAME_REF(bn)        (((bn)->state & NST_NAME_REF)   != 0)
161#define IS_BACKREF_NEST_LEVEL(bn)      (((bn)->state & NST_NEST_LEVEL) != 0)
162#define IS_QUANTIFIER_IN_REPEAT(qn)    (((qn)->state & NST_IN_REPEAT)  != 0)
163#define IS_QUANTIFIER_BY_NUMBER(qn)    (((qn)->state & NST_BY_NUMBER)  != 0)
164
165#define CALLNODE_REFNUM_UNDEF  -1
166
167typedef struct {
168  NodeBase base;
169  UChar* s;
170  UChar* end;
171  unsigned int flag;
172  int    capa;    /* (allocated size - 1) or 0: use buf[] */
173  UChar  buf[NODE_STR_BUF_SIZE];
174} StrNode;
175
176typedef struct {
177  NodeBase base;
178  int state;
179  struct _Node* target;
180  int lower;
181  int upper;
182  int greedy;
183  int target_empty_info;
184  struct _Node* head_exact;
185  struct _Node* next_head_exact;
186  int is_refered;     /* include called node. don't eliminate even if {0} */
187#ifdef USE_COMBINATION_EXPLOSION_CHECK
188  int comb_exp_check_num;  /* 1,2,3...: check,  0: no check  */
189#endif
190} QtfrNode;
191
192typedef struct {
193  NodeBase base;
194  int state;
195  int type;
196  int regnum;
197  OnigOptionType option;
198  struct _Node*  target;
199  AbsAddrType    call_addr;
200  /* for multiple call reference */
201  OnigDistance min_len; /* min length (byte) */
202  OnigDistance max_len; /* max length (byte) */
203  int char_len;         /* character length  */
204  int opt_count;        /* referenced count in optimize_node_left() */
205} EncloseNode;
206
207#ifdef USE_SUBEXP_CALL
208
209typedef struct {
210  int           offset;
211  struct _Node* target;
212} UnsetAddr;
213
214typedef struct {
215  int        num;
216  int        alloc;
217  UnsetAddr* us;
218} UnsetAddrList;
219
220typedef struct {
221  NodeBase base;
222  int     state;
223  int     group_num;
224  UChar*  name;
225  UChar*  name_end;
226  struct _Node*  target;  /* EncloseNode : ENCLOSE_MEMORY */
227  UnsetAddrList* unset_addr_list;
228} CallNode;
229
230#endif
231
232typedef struct {
233  NodeBase base;
234  int  state;
235  int  back_num;
236  int  back_static[NODE_BACKREFS_SIZE];
237  int* back_dynamic;
238  int  nest_level;
239} BRefNode;
240
241typedef struct {
242  NodeBase base;
243  int type;
244  struct _Node* target;
245  int char_len;
246  int ascii_range;
247} AnchorNode;
248
249typedef struct {
250  NodeBase base;
251  struct _Node* car;
252  struct _Node* cdr;
253} ConsAltNode;
254
255typedef struct {
256  NodeBase base;
257  int ctype;
258  int not;
259  int ascii_range;
260} CtypeNode;
261
262typedef struct _Node {
263  union {
264    NodeBase     base;
265    StrNode      str;
266    CClassNode   cclass;
267    QtfrNode     qtfr;
268    EncloseNode  enclose;
269    BRefNode     bref;
270    AnchorNode   anchor;
271    ConsAltNode  cons;
272    CtypeNode    ctype;
273#ifdef USE_SUBEXP_CALL
274    CallNode     call;
275#endif
276  } u;
277} Node;
278
279
280#define NULL_NODE  ((Node* )0)
281
282#define SCANENV_MEMNODES_SIZE               8
283#define SCANENV_MEM_NODES(senv)   \
284 (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
285    (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
286
287typedef struct {
288  OnigOptionType   option;
289  OnigCaseFoldType case_fold_flag;
290  OnigEncoding     enc;
291  const OnigSyntaxType* syntax;
292  BitStatusType    capture_history;
293  BitStatusType    bt_mem_start;
294  BitStatusType    bt_mem_end;
295  BitStatusType    backrefed_mem;
296  UChar*           pattern;
297  UChar*           pattern_end;
298  UChar*           error;
299  UChar*           error_end;
300  regex_t*         reg;       /* for reg->names only */
301  int              num_call;
302#ifdef USE_SUBEXP_CALL
303  UnsetAddrList*   unset_addr_list;
304#endif
305  int              num_mem;
306#ifdef USE_NAMED_GROUP
307  int              num_named;
308#endif
309  int              mem_alloc;
310  Node*            mem_nodes_static[SCANENV_MEMNODES_SIZE];
311  Node**           mem_nodes_dynamic;
312#ifdef USE_COMBINATION_EXPLOSION_CHECK
313  int num_comb_exp_check;
314  int comb_exp_max_regnum;
315  int curr_max_regnum;
316  int has_recursion;
317#endif
318  int warnings_flag;
319  const char* sourcefile;
320  int sourceline;
321} ScanEnv;
322
323
324#define IS_SYNTAX_OP(syn, opm)    (((syn)->op  & (opm)) != 0)
325#define IS_SYNTAX_OP2(syn, opm)   (((syn)->op2 & (opm)) != 0)
326#define IS_SYNTAX_BV(syn, bvm)    (((syn)->behavior & (bvm)) != 0)
327
328#ifdef USE_NAMED_GROUP
329typedef struct {
330  int new_val;
331} GroupNumRemap;
332
333extern int    onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
334#endif
335
336extern int    onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
337extern void   onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
338extern void   onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
339extern int    onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
340extern void   onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
341extern void   onig_node_conv_to_str_node P_((Node* node, int raw));
342extern int    onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
343extern int    onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
344extern void   onig_node_free P_((Node* node));
345extern Node*  onig_node_new_enclose P_((int type));
346extern Node*  onig_node_new_anchor P_((int type));
347extern Node*  onig_node_new_str P_((const UChar* s, const UChar* end));
348extern Node*  onig_node_new_list P_((Node* left, Node* right));
349extern Node*  onig_node_list_add P_((Node* list, Node* x));
350extern Node*  onig_node_new_alt P_((Node* left, Node* right));
351extern void   onig_node_str_clear P_((Node* node));
352extern int    onig_free_node_list P_((void));
353extern int    onig_names_free P_((regex_t* reg));
354extern int    onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
355extern int    onig_free_shared_cclass_table P_((void));
356
357#ifdef ONIG_DEBUG
358#ifdef USE_NAMED_GROUP
359extern int onig_print_names(FILE*, regex_t*);
360#endif
361#endif
362
363#if defined __GNUC__ && __GNUC__ >= 4
364#pragma GCC visibility pop
365#endif
366
367#endif /* ONIGURUMA_REGPARSE_H */
368