1#ifndef ONIGURUMA_REGENC_H
2#define ONIGURUMA_REGENC_H
3/**********************************************************************
4  regenc.h -  Onigmo (Oniguruma-mod) (regular expression library)
5**********************************************************************/
6/*-
7 * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
8 * Copyright (c) 2011       K.Takata  <kentkt AT csc DOT jp>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32#ifndef REGINT_H
33#ifndef RUBY_EXTERN
34#include "ruby/config.h"
35#include "ruby/defines.h"
36#endif
37#ifdef ONIG_ESCAPE_UCHAR_COLLISION
38#undef ONIG_ESCAPE_UCHAR_COLLISION
39#endif
40#endif
41#include "ruby/oniguruma.h"
42
43#if defined __GNUC__ && __GNUC__ >= 4
44#pragma GCC visibility push(default)
45#endif
46
47typedef struct {
48  OnigCodePoint from;
49  OnigCodePoint to;
50} OnigPairCaseFoldCodes;
51
52
53#ifndef NULL
54#define NULL   ((void* )0)
55#endif
56
57#ifndef TRUE
58#define TRUE    1
59#endif
60
61#ifndef FALSE
62#define FALSE   0
63#endif
64
65#ifndef ARG_UNUSED
66#if defined(__GNUC__)
67#  define ARG_UNUSED  __attribute__ ((unused))
68#else
69#  define ARG_UNUSED
70#endif
71#endif
72
73#define ONIG_IS_NULL(p)                    (((void*)(p)) == (void*)0)
74#define ONIG_IS_NOT_NULL(p)                (((void*)(p)) != (void*)0)
75#define ONIG_CHECK_NULL_RETURN(p)          if (ONIG_IS_NULL(p)) return NULL
76#define ONIG_CHECK_NULL_RETURN_VAL(p,val)  if (ONIG_IS_NULL(p)) return (val)
77
78#define enclen(enc,p,e) ((enc->max_enc_len == enc->min_enc_len) ? enc->min_enc_len : ONIGENC_MBC_ENC_LEN(enc,p,e))
79
80/* character types bit flag */
81#define BIT_CTYPE_NEWLINE  (1<< ONIGENC_CTYPE_NEWLINE)
82#define BIT_CTYPE_ALPHA    (1<< ONIGENC_CTYPE_ALPHA)
83#define BIT_CTYPE_BLANK    (1<< ONIGENC_CTYPE_BLANK)
84#define BIT_CTYPE_CNTRL    (1<< ONIGENC_CTYPE_CNTRL)
85#define BIT_CTYPE_DIGIT    (1<< ONIGENC_CTYPE_DIGIT)
86#define BIT_CTYPE_GRAPH    (1<< ONIGENC_CTYPE_GRAPH)
87#define BIT_CTYPE_LOWER    (1<< ONIGENC_CTYPE_LOWER)
88#define BIT_CTYPE_PRINT    (1<< ONIGENC_CTYPE_PRINT)
89#define BIT_CTYPE_PUNCT    (1<< ONIGENC_CTYPE_PUNCT)
90#define BIT_CTYPE_SPACE    (1<< ONIGENC_CTYPE_SPACE)
91#define BIT_CTYPE_UPPER    (1<< ONIGENC_CTYPE_UPPER)
92#define BIT_CTYPE_XDIGIT   (1<< ONIGENC_CTYPE_XDIGIT)
93#define BIT_CTYPE_WORD     (1<< ONIGENC_CTYPE_WORD)
94#define BIT_CTYPE_ALNUM    (1<< ONIGENC_CTYPE_ALNUM)
95#define BIT_CTYPE_ASCII    (1<< ONIGENC_CTYPE_ASCII)
96
97#define CTYPE_TO_BIT(ctype)  (1<<(ctype))
98#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \
99  ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\
100   (ctype) == ONIGENC_CTYPE_PRINT)
101
102
103typedef struct {
104  const UChar *name;
105  int       ctype;
106  short int len;
107} PosixBracketEntryType;
108
109#define PosixBracketEntryInit(name, ctype) {(const UChar *)name, ctype, (short int)(sizeof(name) - 1)}
110
111#define USE_CRNL_AS_LINE_TERMINATOR
112#define USE_UNICODE_PROPERTIES
113/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
114/* #define USE_UNICODE_ALL_LINE_TERMINATORS */  /* see Unicode.org UTS #18 */
115
116
117#define ONIG_ENCODING_INIT_DEFAULT           ONIG_ENCODING_ASCII
118
119/* for encoding system implementation (internal) */
120ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc));
121ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc));
122ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
123ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
124ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc));
125ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end, OnigEncoding enc));
126
127
128/* methods for single byte encoding */
129ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower, OnigEncoding enc));
130ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p, const UChar* e, OnigEncoding enc));
131ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end, OnigEncoding enc));
132ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code, OnigEncoding enc));
133ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf, OnigEncoding enc));
134ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s, const OnigUChar* end, OnigEncoding enc));
135ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end, OnigEncoding enc));
136ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end, OnigEncoding enc));
137ONIG_EXTERN int onigenc_ascii_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc));
138
139/* methods for multi byte encoding */
140ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
141ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
142ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code, OnigEncoding enc));
143ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
144ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
145ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
146ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
147ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code, OnigEncoding enc));
148ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
149ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
150
151
152/* in enc/unicode.c */
153ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc));
154ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[], OnigEncoding enc));
155ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
156ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
157ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
158ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc));
159
160
161#define UTF16_IS_SURROGATE_FIRST(c)    (((c) & 0xfc) == 0xd8)
162#define UTF16_IS_SURROGATE_SECOND(c)   (((c) & 0xfc) == 0xdc)
163
164#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
165  OnigEncISO_8859_1_ToLowerCaseTable[c]
166#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
167  OnigEncISO_8859_1_ToUpperCaseTable[c]
168
169ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
170ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
171
172ONIG_EXTERN int
173onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
174ONIG_EXTERN int
175onigenc_with_ascii_strnicmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
176ONIG_EXTERN UChar*
177onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
178
179/* defined in regexec.c, but used in enc/xxx.c */
180extern int  onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
181
182ONIG_EXTERN OnigEncoding  OnigEncDefaultCharEncoding;
183ONIG_EXTERN const UChar  OnigEncAsciiToLowerCaseTable[];
184ONIG_EXTERN const UChar  OnigEncAsciiToUpperCaseTable[];
185ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
186
187#define ONIGENC_IS_ASCII_CODE(code)  ((code) < 0x80)
188#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
189#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
190#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
191  ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
192#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
193 (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
194  ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
195
196/* Check if the code is in the range. (from <= code && code <= to) */
197#define ONIGENC_IS_IN_RANGE(code, from, to) \
198  ((OnigCodePoint )((code) - (from)) <= (OnigCodePoint )((to) - (from)))
199
200
201#ifdef ONIG_ENC_REGISTER
202extern int ONIG_ENC_REGISTER(const char *, OnigEncodingType*);
203#define OnigEncodingName(n) encoding_##n
204#define OnigEncodingDeclare(n) static OnigEncodingType OnigEncodingName(n)
205#define OnigEncodingDefine(f,n)			     \
206    OnigEncodingDeclare(n);			     \
207    void Init_##f(void) {			     \
208	ONIG_ENC_REGISTER(OnigEncodingName(n).name,  \
209			  &OnigEncodingName(n));     \
210    }						     \
211    OnigEncodingDeclare(n)
212#else
213#define OnigEncodingName(n) OnigEncoding##n
214#define OnigEncodingDeclare(n) OnigEncodingType OnigEncodingName(n)
215#define OnigEncodingDefine(f,n) OnigEncodingDeclare(n)
216#endif
217
218/* macros for define replica encoding and encoding alias */
219#define ENC_REPLICATE(name, orig)
220#define ENC_ALIAS(name, orig)
221#define ENC_DUMMY(name)
222
223#if defined __GNUC__ && __GNUC__ >= 4
224#pragma GCC visibility pop
225#endif
226
227#endif /* ONIGURUMA_REGENC_H */
228